DataHackIL · shaypal5 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/.agent-plan.md b/.agent-plan.md
@@ -6,7 +6,11 @@
 
 ## Mainline Status
 
-- Last merged PR on main: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer
+- Last merged PR on main: `#223` (`FIX-DISCOVERY-RUNS-SCHEMA`; refs #150/#152/#156/#184/#186, closed on deploy) —
+  add the missing `discovery_runs.skipped_query_count` column whose absence was failing every
+  Supabase run-metadata write and blocking candidate persistence on daily-review runs; a guard test
+  now asserts every `DiscoveryRun` field maps to a column. Clears the schema break on the
+  `UNIFY-PR-06` go-live path. Prior: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer
   [gitleaks](https://github.com/gitleaks/gitleaks) secret-scan guard (the outer defense following
   the seed-time leak incident below): a shared `.gitleaks.toml`, a `pre-commit` pre-push hook, a
   fail-closed `scripts/state-run.sh` scan before each state push, and a Claude Code
@@ -347,6 +351,15 @@
   detection (no fail-open on `--no-pager`/`-c`/`-C` forms), per-`-C` target scanning, and an
   explicit script-relative `--config`. CI installs gitleaks so the guard's blocking tests run rather
   than skip. Documented under AGENTS.md “Secret Scanning”. This satisfies gate (a) of `UNIFY-PR-06`.
+- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (#223; refs #150/#152/#156/#184/#186, closed on deploy): add the missing
+  `discovery_runs.skipped_query_count` column. `DiscoveryRun` gained the field (the rolling-window
+  search backstop counts deliberately-skipped queries) and `DiscoveryStore.write_run()` posts the
+  full `model_dump()` to `discovery_runs` via PostgREST, but no migration ever added the column — so
+  every run-metadata write failed with “column skipped_query_count does not exist”, blocking
+  candidate persistence on the daily-review runs. New migration
+  `supabase/migrations/20260616_discovery_runs_skipped_query_count.sql` plus a regression test and a
+  guard test asserting every `DiscoveryRun` field maps to a `discovery_runs` column. Clears the
+  schema break that would otherwise fail the `UNIFY-PR-06` go-live dispatch.
 - [next] `UNIFY-PR-06` (operational, go-live): the state repo is **seeded** (key-scrubbed after the
   incident below) with the recovered core state from local `data/news_items` (27,568 candidates +
   queues/attempts/verdicts/budget/yield + backfill_batches/runs/metrics; excluded: prefilter models

diff --git a/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql
@@ -0,0 +1,17 @@
+-- Add the discovery_runs.skipped_query_count column.
+--
+-- DiscoveryRun gained a `skipped_query_count` field (the rolling-window search
+-- backstop counts queries it deliberately skips), and the Supabase-backed
+-- DiscoveryStore.write_run() posts the full model_dump() to discovery_runs via
+-- PostgREST. The column was never added to the table, so every run-metadata
+-- write failed with "column skipped_query_count does not exist", which blocked
+-- candidate persistence on the daily-review runs.
+
+alter table public.discovery_runs
+    add column if not exists skipped_query_count integer not null default 0;
+
+-- Deploy note: the runtime error is PGRST204 ("not found in the schema cache"),
+-- so this must be applied to the live project (supabase db push) AND PostgREST
+-- must reload its schema cache before the write succeeds. PostgREST reloads
+-- automatically on most managed deploys; if the 400 persists, force it with:
+--   notify pgrst, 'reload schema';
diff --git a/tests/unit/test_discovery_migration.py b/tests/unit/test_discovery_migration.py
@@ -1,8 +1,66 @@
 """Unit tests for the discovery-layer Supabase migration scaffold."""
 
+import re
 from pathlib import Path
 
+from denbust.discovery.models import (
+    BackfillBatch,
+    CandidateProvenance,
+    DiscoveryRun,
+    PersistentCandidate,
+    ScrapeAttempt,
+)
+
 MIGRATION_PATH = Path("supabase/migrations/20260410_discovery_candidacy_foundation.sql")
+SKIPPED_QUERY_COUNT_MIGRATION = Path(
+    "supabase/migrations/20260616_discovery_runs_skipped_query_count.sql"
+)
+
+_MIGRATIONS_DIR = Path("supabase/migrations")
+_ALL_MIGRATIONS = "\n".join(
+    p.read_text(encoding="utf-8") for p in sorted(_MIGRATIONS_DIR.glob("*.sql"))
+)
+
+# Tokens that begin a table-constraint line rather than a column definition.
+_NON_COLUMN_TOKENS = {"primary", "unique", "constraint", "foreign", "check", "references"}
+
+# Each persisted discovery model and the table its writer POSTs a full
+# ``model_dump()`` to. The third element lists fields that are intentionally NOT
+# stored as a same-named column (none today); adding such a field is then a
+# conscious choice rather than a silent production break.
+_MODEL_TABLES: list[tuple[type, str, set[str]]] = [
+    (DiscoveryRun, "discovery_runs", set()),
+    (PersistentCandidate, "persistent_candidates", set()),
+    (BackfillBatch, "backfill_batches", set()),
+    (ScrapeAttempt, "scrape_attempts", set()),
+    (CandidateProvenance, "candidate_provenance", set()),
+]
+
+
+def _table_columns(table: str) -> set[str]:
+    """Columns of ``public.<table>``, parsed from its CREATE TABLE block and any
+    ALTER ... ADD COLUMN statements across all migrations.
+
+    Scoped to the specific table — a substring search over all migrations is
+    unsound, because discovery models share field names (``status``, ``errors``,
+    ``dataset_name``, ...) with columns on *other* tables, so a column missing
+    from this table could be masked by a same-named column elsewhere.
+    """
+    cols: set[str] = set()
+    create = re.search(
+        rf"create table if not exists public\.{table}\s*\((.*?)\);",
+        _ALL_MIGRATIONS,
+        re.S,
+    )
+    if create:
+        for line in create.group(1).splitlines():
+            match = re.match(r"\s*([a-z_]+)\s+\S", line)
+            if match and match.group(1) not in _NON_COLUMN_TOKENS:
+                cols.add(match.group(1))
+    for alter in re.finditer(rf"alter table public\.{table}\b(.*?);", _ALL_MIGRATIONS, re.S):
+        for col in re.finditer(r"add column if not exists\s+([a-z_]+)", alter.group(1)):
+            cols.add(col.group(1))
+    return cols
 
 
 def test_discovery_migration_defines_required_tables() -> None:
@@ -25,3 +83,32 @@ def test_discovery_migration_includes_core_columns() -> None:
     assert "metadata jsonb not null default '{}'::jsonb" in sql
     assert "errors jsonb not null default '[]'::jsonb" in sql
     assert "diagnostics jsonb not null default '{}'::jsonb" in sql
+
+
+def test_every_persisted_model_field_maps_to_a_table_column() -> None:
+    """Every field of every persisted discovery model must have a matching column
+    on its target table, so a writer's ``model_dump()`` POST never references an
+    unknown column (the PGRST204 class of failure that issues #150/#152/#156/#184/
+    #186 reported for discovery_runs.skipped_query_count).
+
+    This is the real regression guard: it covers ALL five model<->table pairs and
+    is scoped per table, so drift in any of them fails CI rather than waiting for
+    the daily-review bot to file the next ticket.
+    """
+    failures: list[str] = []
+    for model, table, exceptions in _MODEL_TABLES:
+        columns = _table_columns(table)
+        # A parsing failure would yield an empty set and (correctly) fail loudly.
+        assert columns, f"could not parse any columns for public.{table}"
+        missing = set(model.model_fields) - columns - exceptions
+        if missing:
+            failures.append(f"public.{table} is missing columns for {sorted(missing)}")
+    assert not failures, "model<->schema drift detected:\n" + "\n".join(failures)
+
+
+def test_skipped_query_count_column_is_added() -> None:
+    """Regression for the missing-column persistence failure (issues #150/#152/
+    #156/#184/#186): the column DiscoveryRun.skipped_query_count writes must exist."""
+    sql = SKIPPED_QUERY_COUNT_MIGRATION.read_text(encoding="utf-8")
+    assert "add column if not exists skipped_query_count integer not null default 0" in sql
+    assert "alter table public.discovery_runs" in sql