From f77dd0572bb9168bce4fcb5a775c436806985e2f Mon Sep 17 00:00:00 2001 From: Peter Lord Date: Wed, 13 May 2026 07:10:24 -0700 Subject: [PATCH] Add run_encounters + run_encounter_monsters tables + backfill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Roadmap #3 phase 1 of 2 (schema + backfill, then UI in a follow-up). Unlocks monster-level community stats (encounter rate, player win rate vs monster, avg HP lost when fought, run-killer rank) which the current schema couldn't answer — only `runs.killed_by` exists today and it captures the boss that ended a losing run, nothing else. ### Schema (init_db, both auto-created on first run) run_encounters ( id, run_id, encounter_id, act_id, room_type, floor, damage_taken, turns_taken, won_fight ) run_encounter_monsters ( encounter_row_id, monster_id -- composite PK collapses dups ) Normalized split (instead of stuffing monster_ids as JSON in run_encounters) so "win rate vs MONSTER" stays a regular indexed JOIN without json_each(). Indexes on encounter_id, run_id, monster_id. ### Submit-time ingest extract_run_encounters(data, player_id, is_win, is_abandoned) walks map_point_history and yields one row per combat room scoped to the target player. Won-fight heuristic: every combat is a win except the last combat room of a non-win, non-abandoned run whose encounter_id matches killed_by_encounter. Abandoned-early runs leave the final encounter as won=1 (quit, not died). _submit_player_run() calls the extractor and inserts rows after the existing cards/relics/potions writes. Wrapped in try/except so a parse failure doesn't roll back the primary run row — the encounter table is an analytics surface, not a record of truth, and the backfill picks up anything that misses live. ### Backfill — tools/backfill_run_encounters.py Walks data/runs/*.json, looks up the row in `runs` by hash, replays extract_run_encounters() via the same shared helper. Idempotent (skips runs already represented in run_encounters). Reports counts. Verified locally against 44 archived runs: - 523 encounter rows inserted (~12 / run, matches typical run length) - 696 monster join rows - Top encounters: NIBBITS_WEAK, SHRINKER_BEETLE_WEAK, CORPSE_SLUGS_WEAK - Second run with no new data: skipped 43, no-op on the 44th Run on prod after this lands: docker exec spire-codex-backend \ python3 /app/tools/backfill_run_encounters.py ### Follow-up Next PR adds GET /api/runs/monster-stats/{monster_id} + a Stats tab on /monsters/[id] that consumes it. No schema change required there — this PR carries the load-bearing piece. --- backend/app/services/runs_db.py | 147 +++++++++++++++++++++++++++ tools/backfill_run_encounters.py | 165 +++++++++++++++++++++++++++++++ 2 files changed, 312 insertions(+) create mode 100644 tools/backfill_run_encounters.py diff --git a/backend/app/services/runs_db.py b/backend/app/services/runs_db.py index 2d173a4f..4e622549 100644 --- a/backend/app/services/runs_db.py +++ b/backend/app/services/runs_db.py @@ -213,6 +213,39 @@ def init_db(): CREATE INDEX IF NOT EXISTS idx_run_potions_potion ON run_potions(potion_id); CREATE INDEX IF NOT EXISTS idx_run_potions_run ON run_potions(run_id); + + -- Per-encounter rows for "win rate vs monster X" / "deadliest + -- encounter" / "damage taken vs Y" queries. Populated at + -- submit_run() time from map_point_history.rooms[]. Backfill + -- script: tools/backfill_run_encounters.py for runs landed + -- before this table existed. + CREATE TABLE IF NOT EXISTS run_encounters ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id INTEGER NOT NULL REFERENCES runs(id), + encounter_id TEXT NOT NULL, + act_id TEXT, + room_type TEXT, + floor INTEGER, + damage_taken INTEGER NOT NULL DEFAULT 0, + turns_taken INTEGER NOT NULL DEFAULT 0, + won_fight INTEGER NOT NULL DEFAULT 1 + ); + + -- monster_ids is a list per encounter (encounters can host + -- multiple monsters). Normalized into its own table so + -- "win rate vs MONSTER" doesn't need json_each() and stays + -- indexable. PRIMARY KEY collapses duplicate refs within + -- the same encounter row to a single entry. + CREATE TABLE IF NOT EXISTS run_encounter_monsters ( + encounter_row_id INTEGER NOT NULL REFERENCES run_encounters(id), + monster_id TEXT NOT NULL, + PRIMARY KEY (encounter_row_id, monster_id) + ); + + CREATE INDEX IF NOT EXISTS idx_run_encounters_encounter ON run_encounters(encounter_id); + CREATE INDEX IF NOT EXISTS idx_run_encounters_run ON run_encounters(run_id); + CREATE INDEX IF NOT EXISTS idx_run_encounter_monsters_monster + ON run_encounter_monsters(monster_id); """) # Migrations — add columns to existing tables @@ -257,6 +290,104 @@ def clean_id(raw_id: str) -> str: return raw_id +def extract_run_encounters( + data: dict, + player_id: int, + is_win: bool, + is_abandoned: bool, +) -> list[dict]: + """Walk map_point_history and yield per-encounter rows. + + Each combat room becomes one row: (encounter_id, monster_ids, + act_id, room_type, floor, damage_taken, turns_taken, won_fight). + + Won-fight heuristic: every combat encounter is a win except the + *last* combat room of a non-win, non-abandoned run whose encounter + id matches `killed_by_encounter`. Abandoned runs leave the final + encounter as won_fight=1 since the player quit out rather than + losing the fight. + + Exported so tools/backfill_run_encounters.py can replay archived + run JSONs through the same logic without going through submit_run. + """ + acts = data.get("acts", []) + map_history = data.get("map_point_history", []) + killed_by = clean_id(data.get("killed_by_encounter", "")) or None + + # Pass 1: collect every combat room scoped to this player. + combat_rooms: list[tuple[int, int, dict, dict]] = [] + for act_idx, act_floors in enumerate(map_history): + for floor_idx, floor in enumerate(act_floors): + for ps in floor.get("player_stats", []): + if ps.get("player_id") and ps["player_id"] != player_id: + continue + for room in floor.get("rooms", []): + if room.get("room_type") in {"monster", "elite", "boss"}: + combat_rooms.append((act_idx, floor_idx, room, ps)) + + if not combat_rooms: + return [] + + encounters: list[dict] = [] + last_idx = len(combat_rooms) - 1 + for i, (act_idx, floor_idx, room, ps) in enumerate(combat_rooms): + encounter_id = clean_id(room.get("model_id", "")) or "" + if not encounter_id: + continue + monster_ids = sorted({clean_id(m) for m in room.get("monster_ids", []) if m}) + act_raw = acts[act_idx] if act_idx < len(acts) else None + won = 1 + if ( + i == last_idx + and not is_win + and not is_abandoned + and killed_by + and encounter_id == killed_by + ): + won = 0 + encounters.append( + { + "encounter_id": encounter_id, + "monster_ids": monster_ids, + "act_id": clean_id(act_raw) if act_raw else None, + "room_type": room.get("room_type"), + "floor": floor_idx + 1, + "damage_taken": int(ps.get("damage_taken", 0) or 0), + "turns_taken": int(room.get("turns_taken", 0) or 0), + "won_fight": won, + } + ) + return encounters + + +def _insert_run_encounters(conn, run_id: int, encounters: list[dict]) -> None: + """Write parsed encounter rows + their monster join entries.""" + for enc in encounters: + cursor = conn.execute( + """INSERT INTO run_encounters + (run_id, encounter_id, act_id, room_type, floor, + damage_taken, turns_taken, won_fight) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", + ( + run_id, + enc["encounter_id"], + enc["act_id"], + enc["room_type"], + enc["floor"], + enc["damage_taken"], + enc["turns_taken"], + enc["won_fight"], + ), + ) + enc_row_id = cursor.lastrowid + for monster_id in enc["monster_ids"]: + conn.execute( + """INSERT OR IGNORE INTO run_encounter_monsters + (encounter_row_id, monster_id) VALUES (?, ?)""", + (enc_row_id, monster_id), + ) + + def submit_run(data: dict, username: str | None = None) -> dict: """Parse and store a run. Returns status dict.""" # Validate structure. Errors call out the specific field so failed @@ -442,6 +573,22 @@ def _submit_player_run( (run_id, pid, int(was_picked), was_used), ) + # Per-encounter rows for /api/runs/monster-stats and the + # forthcoming Stats tab on /monsters/[id]. Failures here must + # not roll back the run row — the encounters table is a + # downstream analytics surface, not a primary record. Backfill + # script picks up anything that fails to parse here. + try: + encounters = extract_run_encounters( + data, + player_id=player_id, + is_win=bool(data.get("win", False)), + is_abandoned=bool(was_abandoned), + ) + _insert_run_encounters(conn, run_id, encounters) + except Exception: + pass + return {"success": True, "run_id": run_id, "run_hash": run_hash} diff --git a/tools/backfill_run_encounters.py b/tools/backfill_run_encounters.py new file mode 100644 index 00000000..d9149642 --- /dev/null +++ b/tools/backfill_run_encounters.py @@ -0,0 +1,165 @@ +"""Backfill `run_encounters` + `run_encounter_monsters` for runs submitted +before the schema existed. + +Walks `data/runs/*.json` (the archived raw run submissions), looks up +each run's `runs` row by hash, and replays `extract_run_encounters()` to +populate the two new analytics tables. Idempotent: skips any run that +already has rows in `run_encounters`. + +Designed to run on the prod host. SSH in, then: + + cd /var/www/spire-codex + docker exec -it spire-codex-backend python3 -m tools.backfill_run_encounters + +Or locally: + + DATA_DIR=$(pwd)/data python3 tools/backfill_run_encounters.py + +Reports rows inserted and dry-run optionable via --dry-run. +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from pathlib import Path + +# Make `backend.app.*` importable regardless of cwd. +HERE = Path(__file__).resolve().parent +sys.path.insert(0, str(HERE.parent / "backend")) + +from app.services.runs_db import ( # noqa: E402 + extract_run_encounters, + _insert_run_encounters, + get_conn, +) + + +def _data_dir() -> Path: + return Path(os.environ.get("DATA_DIR", HERE.parent / "data")) + + +def _runs_dir() -> Path: + return _data_dir() / "runs" + + +def _runs_already_backfilled(conn) -> set[int]: + """Return run_ids that already have at least one encounter row.""" + rows = conn.execute("SELECT DISTINCT run_id FROM run_encounters").fetchall() + return {r["run_id"] for r in rows} + + +def _player_id_for_hash(conn, run_hash: str) -> tuple[int, int] | None: + """Resolve (run_id, player_idx) for one hash. + + Multiplayer runs produce multiple `runs` rows sharing a base seed but + distinct hashes; the hash is deterministic on the player index. We + look up the row, then derive player_id from the archived JSON's + player list (player_id matches the `id` field on the player block, + not the row index). + """ + row = conn.execute( + "SELECT id, character FROM runs WHERE run_hash = ?", (run_hash,) + ).fetchone() + if not row: + return None + return (row["id"], 0) # player_id resolved below + + +def backfill(dry_run: bool = False) -> dict: + runs_dir = _runs_dir() + if not runs_dir.exists(): + print(f"runs directory missing: {runs_dir}", file=sys.stderr) + return {"runs_processed": 0, "encounters_inserted": 0, "skipped": 0} + + runs_processed = 0 + encounters_inserted = 0 + skipped = 0 + no_match = 0 + + with get_conn() as conn: + already = _runs_already_backfilled(conn) + + for json_path in sorted(runs_dir.glob("*.json")): + run_hash = json_path.stem + row = conn.execute( + "SELECT id, character, win, was_abandoned FROM runs WHERE run_hash = ?", + (run_hash,), + ).fetchone() + if not row: + no_match += 1 + continue + run_id = row["id"] + if run_id in already: + skipped += 1 + continue + + try: + data = json.loads(json_path.read_text(encoding="utf-8")) + except Exception as exc: + print(f"!! {run_hash}: bad JSON ({exc})", file=sys.stderr) + continue + + # Find which player in the JSON corresponds to this hash's row. + # The hash baked in `player_idx`; the easiest re-derivation is + # to match by character — single-character collisions across + # players in the same run are vanishingly rare (different + # characters per player is the multiplayer convention). + char = row["character"] + target_player = None + for p in data.get("players", []): + p_char = (p.get("character", "") or "").replace("CHARACTER.", "") + if p_char == char: + target_player = p + break + if not target_player: + print( + f"!! {run_hash}: no player matched character {char}", + file=sys.stderr, + ) + continue + player_id = target_player.get("id", 1) + + encounters = extract_run_encounters( + data, + player_id=player_id, + is_win=bool(data.get("win", False)), + is_abandoned=bool(row["was_abandoned"]), + ) + if not encounters: + runs_processed += 1 + continue + + if not dry_run: + _insert_run_encounters(conn, run_id, encounters) + runs_processed += 1 + encounters_inserted += len(encounters) + + if dry_run: + conn.rollback() + + return { + "runs_processed": runs_processed, + "encounters_inserted": encounters_inserted, + "skipped_already_backfilled": skipped, + "skipped_no_db_row": no_match, + } + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--dry-run", + action="store_true", + help="Parse + report without writing rows.", + ) + args = parser.parse_args() + + result = backfill(dry_run=args.dry_run) + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main()