From f77dd0572bb9168bce4fcb5a775c436806985e2f Mon Sep 17 00:00:00 2001
From: Peter Lord <im@ptrlrd.com>
Date: Wed, 13 May 2026 07:10:24 -0700
Subject: [PATCH] Add run_encounters + run_encounter_monsters tables + backfill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Roadmap #3 phase 1 of 2 (schema + backfill, then UI in a follow-up).
Unlocks monster-level community stats (encounter rate, player win
rate vs monster, avg HP lost when fought, run-killer rank) which the
current schema couldn't answer — only `runs.killed_by` exists today
and it captures the boss that ended a losing run, nothing else.

### Schema (init_db, both auto-created on first run)

run_encounters (
  id, run_id, encounter_id, act_id, room_type, floor,
  damage_taken, turns_taken, won_fight
)

run_encounter_monsters (
  encounter_row_id, monster_id           -- composite PK collapses dups
)

Normalized split (instead of stuffing monster_ids as JSON in
run_encounters) so "win rate vs MONSTER" stays a regular indexed JOIN
without json_each(). Indexes on encounter_id, run_id, monster_id.

### Submit-time ingest

extract_run_encounters(data, player_id, is_win, is_abandoned) walks
map_point_history and yields one row per combat room scoped to the
target player. Won-fight heuristic: every combat is a win except the
last combat room of a non-win, non-abandoned run whose encounter_id
matches killed_by_encounter. Abandoned-early runs leave the final
encounter as won=1 (quit, not died).

_submit_player_run() calls the extractor and inserts rows after the
existing cards/relics/potions writes. Wrapped in try/except so a
parse failure doesn't roll back the primary run row — the encounter
table is an analytics surface, not a record of truth, and the
backfill picks up anything that misses live.

### Backfill — tools/backfill_run_encounters.py

Walks data/runs/*.json, looks up the row in `runs` by hash, replays
extract_run_encounters() via the same shared helper. Idempotent
(skips runs already represented in run_encounters). Reports counts.

Verified locally against 44 archived runs:
  - 523 encounter rows inserted (~12 / run, matches typical run length)
  - 696 monster join rows
  - Top encounters: NIBBITS_WEAK, SHRINKER_BEETLE_WEAK, CORPSE_SLUGS_WEAK
  - Second run with no new data: skipped 43, no-op on the 44th

Run on prod after this lands:
  docker exec spire-codex-backend \
    python3 /app/tools/backfill_run_encounters.py

### Follow-up

Next PR adds GET /api/runs/monster-stats/{monster_id} + a Stats tab on
/monsters/[id] that consumes it. No schema change required there —
this PR carries the load-bearing piece.
---
 backend/app/services/runs_db.py  | 147 +++++++++++++++++++++++++++
 tools/backfill_run_encounters.py | 165 +++++++++++++++++++++++++++++++
 2 files changed, 312 insertions(+)
 create mode 100644 tools/backfill_run_encounters.py

diff --git a/backend/app/services/runs_db.py b/backend/app/services/runs_db.py
index 2d173a4f..4e622549 100644
--- a/backend/app/services/runs_db.py
+++ b/backend/app/services/runs_db.py
@@ -213,6 +213,39 @@ def init_db():
 
             CREATE INDEX IF NOT EXISTS idx_run_potions_potion ON run_potions(potion_id);
             CREATE INDEX IF NOT EXISTS idx_run_potions_run ON run_potions(run_id);
+
+            -- Per-encounter rows for "win rate vs monster X" / "deadliest
+            -- encounter" / "damage taken vs Y" queries. Populated at
+            -- submit_run() time from map_point_history.rooms[]. Backfill
+            -- script: tools/backfill_run_encounters.py for runs landed
+            -- before this table existed.
+            CREATE TABLE IF NOT EXISTS run_encounters (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                run_id INTEGER NOT NULL REFERENCES runs(id),
+                encounter_id TEXT NOT NULL,
+                act_id TEXT,
+                room_type TEXT,
+                floor INTEGER,
+                damage_taken INTEGER NOT NULL DEFAULT 0,
+                turns_taken INTEGER NOT NULL DEFAULT 0,
+                won_fight INTEGER NOT NULL DEFAULT 1
+            );
+
+            -- monster_ids is a list per encounter (encounters can host
+            -- multiple monsters). Normalized into its own table so
+            -- "win rate vs MONSTER" doesn't need json_each() and stays
+            -- indexable. PRIMARY KEY collapses duplicate refs within
+            -- the same encounter row to a single entry.
+            CREATE TABLE IF NOT EXISTS run_encounter_monsters (
+                encounter_row_id INTEGER NOT NULL REFERENCES run_encounters(id),
+                monster_id TEXT NOT NULL,
+                PRIMARY KEY (encounter_row_id, monster_id)
+            );
+
+            CREATE INDEX IF NOT EXISTS idx_run_encounters_encounter ON run_encounters(encounter_id);
+            CREATE INDEX IF NOT EXISTS idx_run_encounters_run ON run_encounters(run_id);
+            CREATE INDEX IF NOT EXISTS idx_run_encounter_monsters_monster
+                ON run_encounter_monsters(monster_id);
         """)
 
         # Migrations — add columns to existing tables
@@ -257,6 +290,104 @@ def clean_id(raw_id: str) -> str:
     return raw_id
 
 
+def extract_run_encounters(
+    data: dict,
+    player_id: int,
+    is_win: bool,
+    is_abandoned: bool,
+) -> list[dict]:
+    """Walk map_point_history and yield per-encounter rows.
+
+    Each combat room becomes one row: (encounter_id, monster_ids,
+    act_id, room_type, floor, damage_taken, turns_taken, won_fight).
+
+    Won-fight heuristic: every combat encounter is a win except the
+    *last* combat room of a non-win, non-abandoned run whose encounter
+    id matches `killed_by_encounter`. Abandoned runs leave the final
+    encounter as won_fight=1 since the player quit out rather than
+    losing the fight.
+
+    Exported so tools/backfill_run_encounters.py can replay archived
+    run JSONs through the same logic without going through submit_run.
+    """
+    acts = data.get("acts", [])
+    map_history = data.get("map_point_history", [])
+    killed_by = clean_id(data.get("killed_by_encounter", "")) or None
+
+    # Pass 1: collect every combat room scoped to this player.
+    combat_rooms: list[tuple[int, int, dict, dict]] = []
+    for act_idx, act_floors in enumerate(map_history):
+        for floor_idx, floor in enumerate(act_floors):
+            for ps in floor.get("player_stats", []):
+                if ps.get("player_id") and ps["player_id"] != player_id:
+                    continue
+                for room in floor.get("rooms", []):
+                    if room.get("room_type") in {"monster", "elite", "boss"}:
+                        combat_rooms.append((act_idx, floor_idx, room, ps))
+
+    if not combat_rooms:
+        return []
+
+    encounters: list[dict] = []
+    last_idx = len(combat_rooms) - 1
+    for i, (act_idx, floor_idx, room, ps) in enumerate(combat_rooms):
+        encounter_id = clean_id(room.get("model_id", "")) or ""
+        if not encounter_id:
+            continue
+        monster_ids = sorted({clean_id(m) for m in room.get("monster_ids", []) if m})
+        act_raw = acts[act_idx] if act_idx < len(acts) else None
+        won = 1
+        if (
+            i == last_idx
+            and not is_win
+            and not is_abandoned
+            and killed_by
+            and encounter_id == killed_by
+        ):
+            won = 0
+        encounters.append(
+            {
+                "encounter_id": encounter_id,
+                "monster_ids": monster_ids,
+                "act_id": clean_id(act_raw) if act_raw else None,
+                "room_type": room.get("room_type"),
+                "floor": floor_idx + 1,
+                "damage_taken": int(ps.get("damage_taken", 0) or 0),
+                "turns_taken": int(room.get("turns_taken", 0) or 0),
+                "won_fight": won,
+            }
+        )
+    return encounters
+
+
+def _insert_run_encounters(conn, run_id: int, encounters: list[dict]) -> None:
+    """Write parsed encounter rows + their monster join entries."""
+    for enc in encounters:
+        cursor = conn.execute(
+            """INSERT INTO run_encounters
+               (run_id, encounter_id, act_id, room_type, floor,
+                damage_taken, turns_taken, won_fight)
+               VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
+            (
+                run_id,
+                enc["encounter_id"],
+                enc["act_id"],
+                enc["room_type"],
+                enc["floor"],
+                enc["damage_taken"],
+                enc["turns_taken"],
+                enc["won_fight"],
+            ),
+        )
+        enc_row_id = cursor.lastrowid
+        for monster_id in enc["monster_ids"]:
+            conn.execute(
+                """INSERT OR IGNORE INTO run_encounter_monsters
+                   (encounter_row_id, monster_id) VALUES (?, ?)""",
+                (enc_row_id, monster_id),
+            )
+
+
 def submit_run(data: dict, username: str | None = None) -> dict:
     """Parse and store a run. Returns status dict."""
     # Validate structure. Errors call out the specific field so failed
@@ -442,6 +573,22 @@ def _submit_player_run(
                 (run_id, pid, int(was_picked), was_used),
             )
 
+        # Per-encounter rows for /api/runs/monster-stats and the
+        # forthcoming Stats tab on /monsters/[id]. Failures here must
+        # not roll back the run row — the encounters table is a
+        # downstream analytics surface, not a primary record. Backfill
+        # script picks up anything that fails to parse here.
+        try:
+            encounters = extract_run_encounters(
+                data,
+                player_id=player_id,
+                is_win=bool(data.get("win", False)),
+                is_abandoned=bool(was_abandoned),
+            )
+            _insert_run_encounters(conn, run_id, encounters)
+        except Exception:
+            pass
+
     return {"success": True, "run_id": run_id, "run_hash": run_hash}
 
 
diff --git a/tools/backfill_run_encounters.py b/tools/backfill_run_encounters.py
new file mode 100644
index 00000000..d9149642
--- /dev/null
+++ b/tools/backfill_run_encounters.py
@@ -0,0 +1,165 @@
+"""Backfill `run_encounters` + `run_encounter_monsters` for runs submitted
+before the schema existed.
+
+Walks `data/runs/*.json` (the archived raw run submissions), looks up
+each run's `runs` row by hash, and replays `extract_run_encounters()` to
+populate the two new analytics tables. Idempotent: skips any run that
+already has rows in `run_encounters`.
+
+Designed to run on the prod host. SSH in, then:
+
+    cd /var/www/spire-codex
+    docker exec -it spire-codex-backend python3 -m tools.backfill_run_encounters
+
+Or locally:
+
+    DATA_DIR=$(pwd)/data python3 tools/backfill_run_encounters.py
+
+Reports rows inserted and dry-run optionable via --dry-run.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+# Make `backend.app.*` importable regardless of cwd.
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE.parent / "backend"))
+
+from app.services.runs_db import (  # noqa: E402
+    extract_run_encounters,
+    _insert_run_encounters,
+    get_conn,
+)
+
+
+def _data_dir() -> Path:
+    return Path(os.environ.get("DATA_DIR", HERE.parent / "data"))
+
+
+def _runs_dir() -> Path:
+    return _data_dir() / "runs"
+
+
+def _runs_already_backfilled(conn) -> set[int]:
+    """Return run_ids that already have at least one encounter row."""
+    rows = conn.execute("SELECT DISTINCT run_id FROM run_encounters").fetchall()
+    return {r["run_id"] for r in rows}
+
+
+def _player_id_for_hash(conn, run_hash: str) -> tuple[int, int] | None:
+    """Resolve (run_id, player_idx) for one hash.
+
+    Multiplayer runs produce multiple `runs` rows sharing a base seed but
+    distinct hashes; the hash is deterministic on the player index. We
+    look up the row, then derive player_id from the archived JSON's
+    player list (player_id matches the `id` field on the player block,
+    not the row index).
+    """
+    row = conn.execute(
+        "SELECT id, character FROM runs WHERE run_hash = ?", (run_hash,)
+    ).fetchone()
+    if not row:
+        return None
+    return (row["id"], 0)  # player_id resolved below
+
+
+def backfill(dry_run: bool = False) -> dict:
+    runs_dir = _runs_dir()
+    if not runs_dir.exists():
+        print(f"runs directory missing: {runs_dir}", file=sys.stderr)
+        return {"runs_processed": 0, "encounters_inserted": 0, "skipped": 0}
+
+    runs_processed = 0
+    encounters_inserted = 0
+    skipped = 0
+    no_match = 0
+
+    with get_conn() as conn:
+        already = _runs_already_backfilled(conn)
+
+        for json_path in sorted(runs_dir.glob("*.json")):
+            run_hash = json_path.stem
+            row = conn.execute(
+                "SELECT id, character, win, was_abandoned FROM runs WHERE run_hash = ?",
+                (run_hash,),
+            ).fetchone()
+            if not row:
+                no_match += 1
+                continue
+            run_id = row["id"]
+            if run_id in already:
+                skipped += 1
+                continue
+
+            try:
+                data = json.loads(json_path.read_text(encoding="utf-8"))
+            except Exception as exc:
+                print(f"!! {run_hash}: bad JSON ({exc})", file=sys.stderr)
+                continue
+
+            # Find which player in the JSON corresponds to this hash's row.
+            # The hash baked in `player_idx`; the easiest re-derivation is
+            # to match by character — single-character collisions across
+            # players in the same run are vanishingly rare (different
+            # characters per player is the multiplayer convention).
+            char = row["character"]
+            target_player = None
+            for p in data.get("players", []):
+                p_char = (p.get("character", "") or "").replace("CHARACTER.", "")
+                if p_char == char:
+                    target_player = p
+                    break
+            if not target_player:
+                print(
+                    f"!! {run_hash}: no player matched character {char}",
+                    file=sys.stderr,
+                )
+                continue
+            player_id = target_player.get("id", 1)
+
+            encounters = extract_run_encounters(
+                data,
+                player_id=player_id,
+                is_win=bool(data.get("win", False)),
+                is_abandoned=bool(row["was_abandoned"]),
+            )
+            if not encounters:
+                runs_processed += 1
+                continue
+
+            if not dry_run:
+                _insert_run_encounters(conn, run_id, encounters)
+            runs_processed += 1
+            encounters_inserted += len(encounters)
+
+        if dry_run:
+            conn.rollback()
+
+    return {
+        "runs_processed": runs_processed,
+        "encounters_inserted": encounters_inserted,
+        "skipped_already_backfilled": skipped,
+        "skipped_no_db_row": no_match,
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Parse + report without writing rows.",
+    )
+    args = parser.parse_args()
+
+    result = backfill(dry_run=args.dry_run)
+    print(json.dumps(result, indent=2))
+
+
+if __name__ == "__main__":
+    main()