diff --git a/.agent-plan.md b/.agent-plan.md index fca4306..11c33a4 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -6,7 +6,11 @@ ## Mainline Status -- Last merged PR on main: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer +- Last merged PR on main: `#223` (`FIX-DISCOVERY-RUNS-SCHEMA`; refs #150/#152/#156/#184/#186, closed on deploy) — + add the missing `discovery_runs.skipped_query_count` column whose absence was failing every + Supabase run-metadata write and blocking candidate persistence on daily-review runs; a guard test + now asserts every `DiscoveryRun` field maps to a column. Clears the schema break on the + `UNIFY-PR-06` go-live path. Prior: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer [gitleaks](https://github.com/gitleaks/gitleaks) secret-scan guard (the outer defense following the seed-time leak incident below): a shared `.gitleaks.toml`, a `pre-commit` pre-push hook, a fail-closed `scripts/state-run.sh` scan before each state push, and a Claude Code @@ -347,6 +351,15 @@ detection (no fail-open on `--no-pager`/`-c`/`-C` forms), per-`-C` target scanning, and an explicit script-relative `--config`. CI installs gitleaks so the guard's blocking tests run rather than skip. Documented under AGENTS.md “Secret Scanning”. This satisfies gate (a) of `UNIFY-PR-06`. +- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (#223; refs #150/#152/#156/#184/#186, closed on deploy): add the missing + `discovery_runs.skipped_query_count` column. `DiscoveryRun` gained the field (the rolling-window + search backstop counts deliberately-skipped queries) and `DiscoveryStore.write_run()` posts the + full `model_dump()` to `discovery_runs` via PostgREST, but no migration ever added the column — so + every run-metadata write failed with “column skipped_query_count does not exist”, blocking + candidate persistence on the daily-review runs. New migration + `supabase/migrations/20260616_discovery_runs_skipped_query_count.sql` plus a regression test and a + guard test asserting every `DiscoveryRun` field maps to a `discovery_runs` column. Clears the + schema break that would otherwise fail the `UNIFY-PR-06` go-live dispatch. - [next] `UNIFY-PR-06` (operational, go-live): the state repo is **seeded** (key-scrubbed after the incident below) with the recovered core state from local `data/news_items` (27,568 candidates + queues/attempts/verdicts/budget/yield + backfill_batches/runs/metrics; excluded: prefilter models diff --git a/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql new file mode 100644 index 0000000..70aad25 --- /dev/null +++ b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql @@ -0,0 +1,17 @@ +-- Add the discovery_runs.skipped_query_count column. +-- +-- DiscoveryRun gained a `skipped_query_count` field (the rolling-window search +-- backstop counts queries it deliberately skips), and the Supabase-backed +-- DiscoveryStore.write_run() posts the full model_dump() to discovery_runs via +-- PostgREST. The column was never added to the table, so every run-metadata +-- write failed with "column skipped_query_count does not exist", which blocked +-- candidate persistence on the daily-review runs. + +alter table public.discovery_runs + add column if not exists skipped_query_count integer not null default 0; + +-- Deploy note: the runtime error is PGRST204 ("not found in the schema cache"), +-- so this must be applied to the live project (supabase db push) AND PostgREST +-- must reload its schema cache before the write succeeds. PostgREST reloads +-- automatically on most managed deploys; if the 400 persists, force it with: +-- notify pgrst, 'reload schema'; diff --git a/tests/unit/test_discovery_migration.py b/tests/unit/test_discovery_migration.py index 4a2faaa..d7b7e66 100644 --- a/tests/unit/test_discovery_migration.py +++ b/tests/unit/test_discovery_migration.py @@ -1,8 +1,66 @@ """Unit tests for the discovery-layer Supabase migration scaffold.""" +import re from pathlib import Path +from denbust.discovery.models import ( + BackfillBatch, + CandidateProvenance, + DiscoveryRun, + PersistentCandidate, + ScrapeAttempt, +) + MIGRATION_PATH = Path("supabase/migrations/20260410_discovery_candidacy_foundation.sql") +SKIPPED_QUERY_COUNT_MIGRATION = Path( + "supabase/migrations/20260616_discovery_runs_skipped_query_count.sql" +) + +_MIGRATIONS_DIR = Path("supabase/migrations") +_ALL_MIGRATIONS = "\n".join( + p.read_text(encoding="utf-8") for p in sorted(_MIGRATIONS_DIR.glob("*.sql")) +) + +# Tokens that begin a table-constraint line rather than a column definition. +_NON_COLUMN_TOKENS = {"primary", "unique", "constraint", "foreign", "check", "references"} + +# Each persisted discovery model and the table its writer POSTs a full +# ``model_dump()`` to. The third element lists fields that are intentionally NOT +# stored as a same-named column (none today); adding such a field is then a +# conscious choice rather than a silent production break. +_MODEL_TABLES: list[tuple[type, str, set[str]]] = [ + (DiscoveryRun, "discovery_runs", set()), + (PersistentCandidate, "persistent_candidates", set()), + (BackfillBatch, "backfill_batches", set()), + (ScrapeAttempt, "scrape_attempts", set()), + (CandidateProvenance, "candidate_provenance", set()), +] + + +def _table_columns(table: str) -> set[str]: + """Columns of ``public.