diff --git a/.agent-plan.md b/.agent-plan.md index fca4306..11c33a4 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -6,7 +6,11 @@ ## Mainline Status -- Last merged PR on main: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer +- Last merged PR on main: `#223` (`FIX-DISCOVERY-RUNS-SCHEMA`; refs #150/#152/#156/#184/#186, closed on deploy) — + add the missing `discovery_runs.skipped_query_count` column whose absence was failing every + Supabase run-metadata write and blocking candidate persistence on daily-review runs; a guard test + now asserts every `DiscoveryRun` field maps to a column. Clears the schema break on the + `UNIFY-PR-06` go-live path. Prior: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer [gitleaks](https://github.com/gitleaks/gitleaks) secret-scan guard (the outer defense following the seed-time leak incident below): a shared `.gitleaks.toml`, a `pre-commit` pre-push hook, a fail-closed `scripts/state-run.sh` scan before each state push, and a Claude Code @@ -347,6 +351,15 @@ detection (no fail-open on `--no-pager`/`-c`/`-C` forms), per-`-C` target scanning, and an explicit script-relative `--config`. CI installs gitleaks so the guard's blocking tests run rather than skip. Documented under AGENTS.md “Secret Scanning”. This satisfies gate (a) of `UNIFY-PR-06`. +- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (#223; refs #150/#152/#156/#184/#186, closed on deploy): add the missing + `discovery_runs.skipped_query_count` column. `DiscoveryRun` gained the field (the rolling-window + search backstop counts deliberately-skipped queries) and `DiscoveryStore.write_run()` posts the + full `model_dump()` to `discovery_runs` via PostgREST, but no migration ever added the column — so + every run-metadata write failed with “column skipped_query_count does not exist”, blocking + candidate persistence on the daily-review runs. New migration + `supabase/migrations/20260616_discovery_runs_skipped_query_count.sql` plus a regression test and a + guard test asserting every `DiscoveryRun` field maps to a `discovery_runs` column. Clears the + schema break that would otherwise fail the `UNIFY-PR-06` go-live dispatch. - [next] `UNIFY-PR-06` (operational, go-live): the state repo is **seeded** (key-scrubbed after the incident below) with the recovered core state from local `data/news_items` (27,568 candidates + queues/attempts/verdicts/budget/yield + backfill_batches/runs/metrics; excluded: prefilter models diff --git a/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql new file mode 100644 index 0000000..70aad25 --- /dev/null +++ b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql @@ -0,0 +1,17 @@ +-- Add the discovery_runs.skipped_query_count column. +-- +-- DiscoveryRun gained a `skipped_query_count` field (the rolling-window search +-- backstop counts queries it deliberately skips), and the Supabase-backed +-- DiscoveryStore.write_run() posts the full model_dump() to discovery_runs via +-- PostgREST. The column was never added to the table, so every run-metadata +-- write failed with "column skipped_query_count does not exist", which blocked +-- candidate persistence on the daily-review runs. + +alter table public.discovery_runs + add column if not exists skipped_query_count integer not null default 0; + +-- Deploy note: the runtime error is PGRST204 ("not found in the schema cache"), +-- so this must be applied to the live project (supabase db push) AND PostgREST +-- must reload its schema cache before the write succeeds. PostgREST reloads +-- automatically on most managed deploys; if the 400 persists, force it with: +-- notify pgrst, 'reload schema'; diff --git a/tests/unit/test_discovery_migration.py b/tests/unit/test_discovery_migration.py index 4a2faaa..d7b7e66 100644 --- a/tests/unit/test_discovery_migration.py +++ b/tests/unit/test_discovery_migration.py @@ -1,8 +1,66 @@ """Unit tests for the discovery-layer Supabase migration scaffold.""" +import re from pathlib import Path +from denbust.discovery.models import ( + BackfillBatch, + CandidateProvenance, + DiscoveryRun, + PersistentCandidate, + ScrapeAttempt, +) + MIGRATION_PATH = Path("supabase/migrations/20260410_discovery_candidacy_foundation.sql") +SKIPPED_QUERY_COUNT_MIGRATION = Path( + "supabase/migrations/20260616_discovery_runs_skipped_query_count.sql" +) + +_MIGRATIONS_DIR = Path("supabase/migrations") +_ALL_MIGRATIONS = "\n".join( + p.read_text(encoding="utf-8") for p in sorted(_MIGRATIONS_DIR.glob("*.sql")) +) + +# Tokens that begin a table-constraint line rather than a column definition. +_NON_COLUMN_TOKENS = {"primary", "unique", "constraint", "foreign", "check", "references"} + +# Each persisted discovery model and the table its writer POSTs a full +# ``model_dump()`` to. The third element lists fields that are intentionally NOT +# stored as a same-named column (none today); adding such a field is then a +# conscious choice rather than a silent production break. +_MODEL_TABLES: list[tuple[type, str, set[str]]] = [ + (DiscoveryRun, "discovery_runs", set()), + (PersistentCandidate, "persistent_candidates", set()), + (BackfillBatch, "backfill_batches", set()), + (ScrapeAttempt, "scrape_attempts", set()), + (CandidateProvenance, "candidate_provenance", set()), +] + + +def _table_columns(table: str) -> set[str]: + """Columns of ``public.``, parsed from its CREATE TABLE block and any + ALTER ... ADD COLUMN statements across all migrations. + + Scoped to the specific table — a substring search over all migrations is + unsound, because discovery models share field names (``status``, ``errors``, + ``dataset_name``, ...) with columns on *other* tables, so a column missing + from this table could be masked by a same-named column elsewhere. + """ + cols: set[str] = set() + create = re.search( + rf"create table if not exists public\.{table}\s*\((.*?)\);", + _ALL_MIGRATIONS, + re.S, + ) + if create: + for line in create.group(1).splitlines(): + match = re.match(r"\s*([a-z_]+)\s+\S", line) + if match and match.group(1) not in _NON_COLUMN_TOKENS: + cols.add(match.group(1)) + for alter in re.finditer(rf"alter table public\.{table}\b(.*?);", _ALL_MIGRATIONS, re.S): + for col in re.finditer(r"add column if not exists\s+([a-z_]+)", alter.group(1)): + cols.add(col.group(1)) + return cols def test_discovery_migration_defines_required_tables() -> None: @@ -25,3 +83,32 @@ def test_discovery_migration_includes_core_columns() -> None: assert "metadata jsonb not null default '{}'::jsonb" in sql assert "errors jsonb not null default '[]'::jsonb" in sql assert "diagnostics jsonb not null default '{}'::jsonb" in sql + + +def test_every_persisted_model_field_maps_to_a_table_column() -> None: + """Every field of every persisted discovery model must have a matching column + on its target table, so a writer's ``model_dump()`` POST never references an + unknown column (the PGRST204 class of failure that issues #150/#152/#156/#184/ + #186 reported for discovery_runs.skipped_query_count). + + This is the real regression guard: it covers ALL five model<->table pairs and + is scoped per table, so drift in any of them fails CI rather than waiting for + the daily-review bot to file the next ticket. + """ + failures: list[str] = [] + for model, table, exceptions in _MODEL_TABLES: + columns = _table_columns(table) + # A parsing failure would yield an empty set and (correctly) fail loudly. + assert columns, f"could not parse any columns for public.{table}" + missing = set(model.model_fields) - columns - exceptions + if missing: + failures.append(f"public.{table} is missing columns for {sorted(missing)}") + assert not failures, "model<->schema drift detected:\n" + "\n".join(failures) + + +def test_skipped_query_count_column_is_added() -> None: + """Regression for the missing-column persistence failure (issues #150/#152/ + #156/#184/#186): the column DiscoveryRun.skipped_query_count writes must exist.""" + sql = SKIPPED_QUERY_COUNT_MIGRATION.read_text(encoding="utf-8") + assert "add column if not exists skipped_query_count integer not null default 0" in sql + assert "alter table public.discovery_runs" in sql