From b2fda8e816915b6e3090e67c83097ba7a3d78f45 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 16 Jun 2026 10:18:58 +0300 Subject: [PATCH 1/4] fix(discovery): add missing discovery_runs.skipped_query_count column DiscoveryRun.skipped_query_count (the rolling-window search backstop's deliberately-skipped-query counter) is written by DiscoveryStore.write_run() via a full model_dump() POST to discovery_runs, but no migration ever added the column. Every run-metadata write therefore failed with "column skipped_query_count does not exist", blocking candidate persistence on the daily-review runs (issues #150/#152/#156/#184/#186). Add supabase/migrations/20260616_discovery_runs_skipped_query_count.sql, a regression test for the column, and a guard test asserting every DiscoveryRun field maps to a discovery_runs column so this class of drift is caught in CI. This also clears the schema break that would otherwise fail the UNIFY-PR-06 go-live dispatch. Co-Authored-By: Claude Opus 4.8 --- .agent-plan.md | 9 +++++++ ...616_discovery_runs_skipped_query_count.sql | 11 +++++++++ tests/unit/test_discovery_migration.py | 24 +++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 supabase/migrations/20260616_discovery_runs_skipped_query_count.sql diff --git a/.agent-plan.md b/.agent-plan.md index fca4306..532a004 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -347,6 +347,15 @@ detection (no fail-open on `--no-pager`/`-c`/`-C` forms), per-`-C` target scanning, and an explicit script-relative `--config`. CI installs gitleaks so the guard's blocking tests run rather than skip. Documented under AGENTS.md “Secret Scanning”. This satisfies gate (a) of `UNIFY-PR-06`. +- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (closes #150/#152/#156/#184/#186): add the missing + `discovery_runs.skipped_query_count` column. `DiscoveryRun` gained the field (the rolling-window + search backstop counts deliberately-skipped queries) and `DiscoveryStore.write_run()` posts the + full `model_dump()` to `discovery_runs` via PostgREST, but no migration ever added the column — so + every run-metadata write failed with “column skipped_query_count does not exist”, blocking + candidate persistence on the daily-review runs. New migration + `supabase/migrations/20260616_discovery_runs_skipped_query_count.sql` plus a regression test and a + guard test asserting every `DiscoveryRun` field maps to a `discovery_runs` column. Clears the + schema break that would otherwise fail the `UNIFY-PR-06` go-live dispatch. - [next] `UNIFY-PR-06` (operational, go-live): the state repo is **seeded** (key-scrubbed after the incident below) with the recovered core state from local `data/news_items` (27,568 candidates + queues/attempts/verdicts/budget/yield + backfill_batches/runs/metrics; excluded: prefilter models diff --git a/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql new file mode 100644 index 0000000..150d90c --- /dev/null +++ b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql @@ -0,0 +1,11 @@ +-- Add the discovery_runs.skipped_query_count column. +-- +-- DiscoveryRun gained a `skipped_query_count` field (the rolling-window search +-- backstop counts queries it deliberately skips), and the Supabase-backed +-- DiscoveryStore.write_run() posts the full model_dump() to discovery_runs via +-- PostgREST. The column was never added to the table, so every run-metadata +-- write failed with "column skipped_query_count does not exist", which blocked +-- candidate persistence on the daily-review runs. + +alter table public.discovery_runs + add column if not exists skipped_query_count integer not null default 0; diff --git a/tests/unit/test_discovery_migration.py b/tests/unit/test_discovery_migration.py index 4a2faaa..f95a0a1 100644 --- a/tests/unit/test_discovery_migration.py +++ b/tests/unit/test_discovery_migration.py @@ -2,7 +2,12 @@ from pathlib import Path +from denbust.discovery.models import DiscoveryRun + MIGRATION_PATH = Path("supabase/migrations/20260410_discovery_candidacy_foundation.sql") +SKIPPED_QUERY_COUNT_MIGRATION = Path( + "supabase/migrations/20260616_discovery_runs_skipped_query_count.sql" +) def test_discovery_migration_defines_required_tables() -> None: @@ -25,3 +30,22 @@ def test_discovery_migration_includes_core_columns() -> None: assert "metadata jsonb not null default '{}'::jsonb" in sql assert "errors jsonb not null default '[]'::jsonb" in sql assert "diagnostics jsonb not null default '{}'::jsonb" in sql + + +def test_discovery_runs_has_a_column_for_every_model_field() -> None: + """Every DiscoveryRun scalar field must map to a discovery_runs column across + the migrations, so write_run()'s model_dump() never posts an unknown column.""" + migrations = "\n".join( + p.read_text(encoding="utf-8") for p in sorted(Path("supabase/migrations").glob("*.sql")) + ) + # `errors` is the jsonb column; the rest are scalar columns on discovery_runs. + for field in DiscoveryRun.model_fields: + assert field in migrations, f"discovery_runs is missing a column for {field!r}" + + +def test_skipped_query_count_column_is_added() -> None: + """Regression for the missing-column persistence failure (issues #150/#152/ + #156/#184/#186): the column DiscoveryRun.skipped_query_count writes must exist.""" + sql = SKIPPED_QUERY_COUNT_MIGRATION.read_text(encoding="utf-8") + assert "add column if not exists skipped_query_count integer not null default 0" in sql + assert "alter table public.discovery_runs" in sql From b5e19635244b04b7f74795d34dfd948b18ccf9d4 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 16 Jun 2026 10:20:51 +0300 Subject: [PATCH 2/4] docs(plan): record PR #223 for the discovery_runs schema fix Co-Authored-By: Claude Opus 4.8 --- .agent-plan.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.agent-plan.md b/.agent-plan.md index 532a004..6ee55c2 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -6,7 +6,11 @@ ## Mainline Status -- Last merged PR on main: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer +- Last merged PR on main: `#223` (`FIX-DISCOVERY-RUNS-SCHEMA`, closes #150/#152/#156/#184/#186) — + add the missing `discovery_runs.skipped_query_count` column whose absence was failing every + Supabase run-metadata write and blocking candidate persistence on daily-review runs; a guard test + now asserts every `DiscoveryRun` field maps to a column. Clears the schema break on the + `UNIFY-PR-06` go-live path. Prior: `#220` (`GUARD-PR-SECRET-SCAN`, closes #218) — the three-layer [gitleaks](https://github.com/gitleaks/gitleaks) secret-scan guard (the outer defense following the seed-time leak incident below): a shared `.gitleaks.toml`, a `pre-commit` pre-push hook, a fail-closed `scripts/state-run.sh` scan before each state push, and a Claude Code @@ -347,7 +351,7 @@ detection (no fail-open on `--no-pager`/`-c`/`-C` forms), per-`-C` target scanning, and an explicit script-relative `--config`. CI installs gitleaks so the guard's blocking tests run rather than skip. Documented under AGENTS.md “Secret Scanning”. This satisfies gate (a) of `UNIFY-PR-06`. -- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (closes #150/#152/#156/#184/#186): add the missing +- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (#223, closes #150/#152/#156/#184/#186): add the missing `discovery_runs.skipped_query_count` column. `DiscoveryRun` gained the field (the rolling-window search backstop counts deliberately-skipped queries) and `DiscoveryStore.write_run()` posts the full `model_dump()` to `discovery_runs` via PostgREST, but no migration ever added the column — so From 5e5748495535e21eef4261613791fb0f0126ca60 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 16 Jun 2026 12:30:01 +0300 Subject: [PATCH 3/4] test(discovery): sound, all-model schema-parity guard + deploy note MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review found the regression test was unsound: it substring-matched each DiscoveryRun field against ALL migrations concatenated, but discovery models share field names (status, errors, dataset_name, ...) with columns on other tables — so a column missing from discovery_runs could be masked by a same-named column elsewhere, and the guard would pass while production threw PGRST204. - Replace it with a per-table column extractor (scoped to each table's CREATE TABLE block + ALTER ... ADD COLUMN) and assert model_fields is a subset of the table's columns for ALL five persisted discovery models (DiscoveryRun, PersistentCandidate, BackfillBatch, ScrapeAttempt, CandidateProvenance), with an explicit (empty) per-model exception set. Drift in any of them now fails CI. Verified the other four tables are already in sync. - Add a deploy note to the migration: PGRST204 is a schema-cache miss, so after `supabase db push` PostgREST must reload its schema cache. Co-Authored-By: Claude Opus 4.8 --- ...616_discovery_runs_skipped_query_count.sql | 6 ++ tests/unit/test_discovery_migration.py | 83 ++++++++++++++++--- 2 files changed, 79 insertions(+), 10 deletions(-) diff --git a/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql index 150d90c..70aad25 100644 --- a/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql +++ b/supabase/migrations/20260616_discovery_runs_skipped_query_count.sql @@ -9,3 +9,9 @@ alter table public.discovery_runs add column if not exists skipped_query_count integer not null default 0; + +-- Deploy note: the runtime error is PGRST204 ("not found in the schema cache"), +-- so this must be applied to the live project (supabase db push) AND PostgREST +-- must reload its schema cache before the write succeeds. PostgREST reloads +-- automatically on most managed deploys; if the 400 persists, force it with: +-- notify pgrst, 'reload schema'; diff --git a/tests/unit/test_discovery_migration.py b/tests/unit/test_discovery_migration.py index f95a0a1..d7b7e66 100644 --- a/tests/unit/test_discovery_migration.py +++ b/tests/unit/test_discovery_migration.py @@ -1,14 +1,67 @@ """Unit tests for the discovery-layer Supabase migration scaffold.""" +import re from pathlib import Path -from denbust.discovery.models import DiscoveryRun +from denbust.discovery.models import ( + BackfillBatch, + CandidateProvenance, + DiscoveryRun, + PersistentCandidate, + ScrapeAttempt, +) MIGRATION_PATH = Path("supabase/migrations/20260410_discovery_candidacy_foundation.sql") SKIPPED_QUERY_COUNT_MIGRATION = Path( "supabase/migrations/20260616_discovery_runs_skipped_query_count.sql" ) +_MIGRATIONS_DIR = Path("supabase/migrations") +_ALL_MIGRATIONS = "\n".join( + p.read_text(encoding="utf-8") for p in sorted(_MIGRATIONS_DIR.glob("*.sql")) +) + +# Tokens that begin a table-constraint line rather than a column definition. +_NON_COLUMN_TOKENS = {"primary", "unique", "constraint", "foreign", "check", "references"} + +# Each persisted discovery model and the table its writer POSTs a full +# ``model_dump()`` to. The third element lists fields that are intentionally NOT +# stored as a same-named column (none today); adding such a field is then a +# conscious choice rather than a silent production break. +_MODEL_TABLES: list[tuple[type, str, set[str]]] = [ + (DiscoveryRun, "discovery_runs", set()), + (PersistentCandidate, "persistent_candidates", set()), + (BackfillBatch, "backfill_batches", set()), + (ScrapeAttempt, "scrape_attempts", set()), + (CandidateProvenance, "candidate_provenance", set()), +] + + +def _table_columns(table: str) -> set[str]: + """Columns of ``public.``, parsed from its CREATE TABLE block and any + ALTER ... ADD COLUMN statements across all migrations. + + Scoped to the specific table — a substring search over all migrations is + unsound, because discovery models share field names (``status``, ``errors``, + ``dataset_name``, ...) with columns on *other* tables, so a column missing + from this table could be masked by a same-named column elsewhere. + """ + cols: set[str] = set() + create = re.search( + rf"create table if not exists public\.{table}\s*\((.*?)\);", + _ALL_MIGRATIONS, + re.S, + ) + if create: + for line in create.group(1).splitlines(): + match = re.match(r"\s*([a-z_]+)\s+\S", line) + if match and match.group(1) not in _NON_COLUMN_TOKENS: + cols.add(match.group(1)) + for alter in re.finditer(rf"alter table public\.{table}\b(.*?);", _ALL_MIGRATIONS, re.S): + for col in re.finditer(r"add column if not exists\s+([a-z_]+)", alter.group(1)): + cols.add(col.group(1)) + return cols + def test_discovery_migration_defines_required_tables() -> None: """The foundation migration should create the durable candidate-layer tables.""" @@ -32,15 +85,25 @@ def test_discovery_migration_includes_core_columns() -> None: assert "diagnostics jsonb not null default '{}'::jsonb" in sql -def test_discovery_runs_has_a_column_for_every_model_field() -> None: - """Every DiscoveryRun scalar field must map to a discovery_runs column across - the migrations, so write_run()'s model_dump() never posts an unknown column.""" - migrations = "\n".join( - p.read_text(encoding="utf-8") for p in sorted(Path("supabase/migrations").glob("*.sql")) - ) - # `errors` is the jsonb column; the rest are scalar columns on discovery_runs. - for field in DiscoveryRun.model_fields: - assert field in migrations, f"discovery_runs is missing a column for {field!r}" +def test_every_persisted_model_field_maps_to_a_table_column() -> None: + """Every field of every persisted discovery model must have a matching column + on its target table, so a writer's ``model_dump()`` POST never references an + unknown column (the PGRST204 class of failure that issues #150/#152/#156/#184/ + #186 reported for discovery_runs.skipped_query_count). + + This is the real regression guard: it covers ALL five model<->table pairs and + is scoped per table, so drift in any of them fails CI rather than waiting for + the daily-review bot to file the next ticket. + """ + failures: list[str] = [] + for model, table, exceptions in _MODEL_TABLES: + columns = _table_columns(table) + # A parsing failure would yield an empty set and (correctly) fail loudly. + assert columns, f"could not parse any columns for public.{table}" + missing = set(model.model_fields) - columns - exceptions + if missing: + failures.append(f"public.{table} is missing columns for {sorted(missing)}") + assert not failures, "model<->schema drift detected:\n" + "\n".join(failures) def test_skipped_query_count_column_is_added() -> None: From 18bca77c1844fe522af90c037e8e50177fa7649e Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Tue, 16 Jun 2026 12:31:59 +0300 Subject: [PATCH 4/4] docs(plan): refs (not closes) for the deploy-gated schema issues Co-Authored-By: Claude Opus 4.8 --- .agent-plan.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.agent-plan.md b/.agent-plan.md index 6ee55c2..11c33a4 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -6,7 +6,7 @@ ## Mainline Status -- Last merged PR on main: `#223` (`FIX-DISCOVERY-RUNS-SCHEMA`, closes #150/#152/#156/#184/#186) — +- Last merged PR on main: `#223` (`FIX-DISCOVERY-RUNS-SCHEMA`; refs #150/#152/#156/#184/#186, closed on deploy) — add the missing `discovery_runs.skipped_query_count` column whose absence was failing every Supabase run-metadata write and blocking candidate persistence on daily-review runs; a guard test now asserts every `DiscoveryRun` field maps to a column. Clears the schema break on the @@ -351,7 +351,7 @@ detection (no fail-open on `--no-pager`/`-c`/`-C` forms), per-`-C` target scanning, and an explicit script-relative `--config`. CI installs gitleaks so the guard's blocking tests run rather than skip. Documented under AGENTS.md “Secret Scanning”. This satisfies gate (a) of `UNIFY-PR-06`. -- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (#223, closes #150/#152/#156/#184/#186): add the missing +- [done] `FIX-DISCOVERY-RUNS-SCHEMA` (#223; refs #150/#152/#156/#184/#186, closed on deploy): add the missing `discovery_runs.skipped_query_count` column. `DiscoveryRun` gained the field (the rolling-window search backstop counts deliberately-skipped queries) and `DiscoveryStore.write_run()` posts the full `model_dump()` to `discovery_runs` via PostgREST, but no migration ever added the column — so