Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions backend/app/services/runs_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,39 @@ def init_db():

CREATE INDEX IF NOT EXISTS idx_run_potions_potion ON run_potions(potion_id);
CREATE INDEX IF NOT EXISTS idx_run_potions_run ON run_potions(run_id);

-- Per-encounter rows for "win rate vs monster X" / "deadliest
-- encounter" / "damage taken vs Y" queries. Populated at
-- submit_run() time from map_point_history.rooms[]. Backfill
-- script: tools/backfill_run_encounters.py for runs landed
-- before this table existed.
CREATE TABLE IF NOT EXISTS run_encounters (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id INTEGER NOT NULL REFERENCES runs(id),
encounter_id TEXT NOT NULL,
act_id TEXT,
room_type TEXT,
floor INTEGER,
damage_taken INTEGER NOT NULL DEFAULT 0,
turns_taken INTEGER NOT NULL DEFAULT 0,
won_fight INTEGER NOT NULL DEFAULT 1
);

-- monster_ids is a list per encounter (encounters can host
-- multiple monsters). Normalized into its own table so
-- "win rate vs MONSTER" doesn't need json_each() and stays
-- indexable. PRIMARY KEY collapses duplicate refs within
-- the same encounter row to a single entry.
CREATE TABLE IF NOT EXISTS run_encounter_monsters (
encounter_row_id INTEGER NOT NULL REFERENCES run_encounters(id),
monster_id TEXT NOT NULL,
PRIMARY KEY (encounter_row_id, monster_id)
);

CREATE INDEX IF NOT EXISTS idx_run_encounters_encounter ON run_encounters(encounter_id);
CREATE INDEX IF NOT EXISTS idx_run_encounters_run ON run_encounters(run_id);
CREATE INDEX IF NOT EXISTS idx_run_encounter_monsters_monster
ON run_encounter_monsters(monster_id);
""")

# Migrations — add columns to existing tables
Expand Down Expand Up @@ -257,6 +290,104 @@ def clean_id(raw_id: str) -> str:
return raw_id


def extract_run_encounters(
data: dict,
player_id: int,
is_win: bool,
is_abandoned: bool,
) -> list[dict]:
"""Walk map_point_history and yield per-encounter rows.

Each combat room becomes one row: (encounter_id, monster_ids,
act_id, room_type, floor, damage_taken, turns_taken, won_fight).

Won-fight heuristic: every combat encounter is a win except the
*last* combat room of a non-win, non-abandoned run whose encounter
id matches `killed_by_encounter`. Abandoned runs leave the final
encounter as won_fight=1 since the player quit out rather than
losing the fight.

Exported so tools/backfill_run_encounters.py can replay archived
run JSONs through the same logic without going through submit_run.
"""
acts = data.get("acts", [])
map_history = data.get("map_point_history", [])
killed_by = clean_id(data.get("killed_by_encounter", "")) or None

# Pass 1: collect every combat room scoped to this player.
combat_rooms: list[tuple[int, int, dict, dict]] = []
for act_idx, act_floors in enumerate(map_history):
for floor_idx, floor in enumerate(act_floors):
for ps in floor.get("player_stats", []):
if ps.get("player_id") and ps["player_id"] != player_id:
continue
for room in floor.get("rooms", []):
if room.get("room_type") in {"monster", "elite", "boss"}:
combat_rooms.append((act_idx, floor_idx, room, ps))

if not combat_rooms:
return []

encounters: list[dict] = []
last_idx = len(combat_rooms) - 1
for i, (act_idx, floor_idx, room, ps) in enumerate(combat_rooms):
encounter_id = clean_id(room.get("model_id", "")) or ""
if not encounter_id:
continue
monster_ids = sorted({clean_id(m) for m in room.get("monster_ids", []) if m})
act_raw = acts[act_idx] if act_idx < len(acts) else None
won = 1
if (
i == last_idx
and not is_win
and not is_abandoned
and killed_by
and encounter_id == killed_by
):
won = 0
encounters.append(
{
"encounter_id": encounter_id,
"monster_ids": monster_ids,
"act_id": clean_id(act_raw) if act_raw else None,
"room_type": room.get("room_type"),
"floor": floor_idx + 1,
"damage_taken": int(ps.get("damage_taken", 0) or 0),
"turns_taken": int(room.get("turns_taken", 0) or 0),
"won_fight": won,
}
)
return encounters


def _insert_run_encounters(conn, run_id: int, encounters: list[dict]) -> None:
"""Write parsed encounter rows + their monster join entries."""
for enc in encounters:
cursor = conn.execute(
"""INSERT INTO run_encounters
(run_id, encounter_id, act_id, room_type, floor,
damage_taken, turns_taken, won_fight)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
(
run_id,
enc["encounter_id"],
enc["act_id"],
enc["room_type"],
enc["floor"],
enc["damage_taken"],
enc["turns_taken"],
enc["won_fight"],
),
)
enc_row_id = cursor.lastrowid
for monster_id in enc["monster_ids"]:
conn.execute(
"""INSERT OR IGNORE INTO run_encounter_monsters
(encounter_row_id, monster_id) VALUES (?, ?)""",
(enc_row_id, monster_id),
)


def submit_run(data: dict, username: str | None = None) -> dict:
"""Parse and store a run. Returns status dict."""
# Validate structure. Errors call out the specific field so failed
Expand Down Expand Up @@ -442,6 +573,22 @@ def _submit_player_run(
(run_id, pid, int(was_picked), was_used),
)

# Per-encounter rows for /api/runs/monster-stats and the
# forthcoming Stats tab on /monsters/[id]. Failures here must
# not roll back the run row — the encounters table is a
# downstream analytics surface, not a primary record. Backfill
# script picks up anything that fails to parse here.
try:
encounters = extract_run_encounters(
data,
player_id=player_id,
is_win=bool(data.get("win", False)),
is_abandoned=bool(was_abandoned),
)
_insert_run_encounters(conn, run_id, encounters)
except Exception:
pass

return {"success": True, "run_id": run_id, "run_hash": run_hash}


Expand Down
165 changes: 165 additions & 0 deletions tools/backfill_run_encounters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""Backfill `run_encounters` + `run_encounter_monsters` for runs submitted
before the schema existed.

Walks `data/runs/*.json` (the archived raw run submissions), looks up
each run's `runs` row by hash, and replays `extract_run_encounters()` to
populate the two new analytics tables. Idempotent: skips any run that
already has rows in `run_encounters`.

Designed to run on the prod host. SSH in, then:

cd /var/www/spire-codex
docker exec -it spire-codex-backend python3 -m tools.backfill_run_encounters

Or locally:

DATA_DIR=$(pwd)/data python3 tools/backfill_run_encounters.py

Reports rows inserted and dry-run optionable via --dry-run.
"""

from __future__ import annotations

import argparse
import json
import os
import sys
from pathlib import Path

# Make `backend.app.*` importable regardless of cwd.
HERE = Path(__file__).resolve().parent
sys.path.insert(0, str(HERE.parent / "backend"))

from app.services.runs_db import ( # noqa: E402
extract_run_encounters,
_insert_run_encounters,
get_conn,
)


def _data_dir() -> Path:
return Path(os.environ.get("DATA_DIR", HERE.parent / "data"))


def _runs_dir() -> Path:
return _data_dir() / "runs"


def _runs_already_backfilled(conn) -> set[int]:
"""Return run_ids that already have at least one encounter row."""
rows = conn.execute("SELECT DISTINCT run_id FROM run_encounters").fetchall()
return {r["run_id"] for r in rows}


def _player_id_for_hash(conn, run_hash: str) -> tuple[int, int] | None:
"""Resolve (run_id, player_idx) for one hash.

Multiplayer runs produce multiple `runs` rows sharing a base seed but
distinct hashes; the hash is deterministic on the player index. We
look up the row, then derive player_id from the archived JSON's
player list (player_id matches the `id` field on the player block,
not the row index).
"""
row = conn.execute(
"SELECT id, character FROM runs WHERE run_hash = ?", (run_hash,)
).fetchone()
if not row:
return None
return (row["id"], 0) # player_id resolved below


def backfill(dry_run: bool = False) -> dict:
runs_dir = _runs_dir()
if not runs_dir.exists():
print(f"runs directory missing: {runs_dir}", file=sys.stderr)
return {"runs_processed": 0, "encounters_inserted": 0, "skipped": 0}

runs_processed = 0
encounters_inserted = 0
skipped = 0
no_match = 0

with get_conn() as conn:
already = _runs_already_backfilled(conn)

for json_path in sorted(runs_dir.glob("*.json")):
run_hash = json_path.stem
row = conn.execute(
"SELECT id, character, win, was_abandoned FROM runs WHERE run_hash = ?",
(run_hash,),
).fetchone()
if not row:
no_match += 1
continue
run_id = row["id"]
if run_id in already:
skipped += 1
continue

try:
data = json.loads(json_path.read_text(encoding="utf-8"))
except Exception as exc:
print(f"!! {run_hash}: bad JSON ({exc})", file=sys.stderr)
continue

# Find which player in the JSON corresponds to this hash's row.
# The hash baked in `player_idx`; the easiest re-derivation is
# to match by character — single-character collisions across
# players in the same run are vanishingly rare (different
# characters per player is the multiplayer convention).
char = row["character"]
target_player = None
for p in data.get("players", []):
p_char = (p.get("character", "") or "").replace("CHARACTER.", "")
if p_char == char:
target_player = p
break
if not target_player:
print(
f"!! {run_hash}: no player matched character {char}",
file=sys.stderr,
)
continue
player_id = target_player.get("id", 1)

encounters = extract_run_encounters(
data,
player_id=player_id,
is_win=bool(data.get("win", False)),
is_abandoned=bool(row["was_abandoned"]),
)
if not encounters:
runs_processed += 1
continue

if not dry_run:
_insert_run_encounters(conn, run_id, encounters)
runs_processed += 1
encounters_inserted += len(encounters)

if dry_run:
conn.rollback()

return {
"runs_processed": runs_processed,
"encounters_inserted": encounters_inserted,
"skipped_already_backfilled": skipped,
"skipped_no_db_row": no_match,
}


def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--dry-run",
action="store_true",
help="Parse + report without writing rows.",
)
args = parser.parse_args()

result = backfill(dry_run=args.dry_run)
print(json.dumps(result, indent=2))


if __name__ == "__main__":
main()
Loading