From c7276156cf2e960f8496d91f521fdbfbb48dd2b1 Mon Sep 17 00:00:00 2001 From: krystian Date: Fri, 22 May 2026 22:01:59 -0400 Subject: [PATCH] fix: add beta 6 grammar shape pressure --- docs/governance/DECISIONS.md | 27 +++++++++++++++++++++ docs/governance/SESSION_HANDOFF.md | 9 +++++-- docs/research/BETA_6_FAIL_PRESSURE_PULSE.md | 17 +++++++++++++ docs/runtime/ARCHITECTURE.md | 10 +++++--- src/probaboracle/config.py | 7 +++++- tests/test_agent.py | 10 ++++++++ 6 files changed, 74 insertions(+), 6 deletions(-) diff --git a/docs/governance/DECISIONS.md b/docs/governance/DECISIONS.md index d1256c8..5098445 100644 --- a/docs/governance/DECISIONS.md +++ b/docs/governance/DECISIONS.md @@ -1038,3 +1038,30 @@ If a decision crosses layers, say so plainly instead of flattening the method in - Why: The first valid pulse already produced enough evidence to plan the next method slice. More live generation would spend limited prepaid credits while adding pressure before the failure family has been digested. + +## D-056: First Beta 6.0 correction adds grammatical-shape pressure + +- Date: `2026-05-22` +- Category: `runtime_engineering` +- Tags: `beta_6`, `why_prompt`, `shape_first`, `soft_drift` +- Provenance: `failed pulse planning surface with implementation decision` +- Decision: + - use failed pulse rows `4850-4863` as the correction surface + - keep the prompt surface fixed and do not add a phrase bank + - keep the correction grammatical and shape-first: + - choose one plain sentence claim + - make grammar carry the answer shape + - prefer one clear subject and finite verb + - keep imagery secondary to the sentence claim + - vary sentence openings across samples + - do not run another live pulse until the rate-limit / prepaid-credit boundary + is cleared +- Validation: + - `make lint-docs` + - `git diff --check` + - `make check` +- Why: The first valid `why` pulse did not fail because the prompt needed + content examples. It failed because the model kept replacing the answer shape + with repeated soft abstraction. The smallest correction is to make the + sentence grammar carry more of the shape while still preserving the fixed + prompt surface and non-concrete oracle contract. diff --git a/docs/governance/SESSION_HANDOFF.md b/docs/governance/SESSION_HANDOFF.md index 1f795a3..481c36e 100644 --- a/docs/governance/SESSION_HANDOFF.md +++ b/docs/governance/SESSION_HANDOFF.md @@ -141,6 +141,12 @@ Useful current reads: - excluded noise: `0` - false-start batches `4790-4804`, `4805-4819`, and `4820-4849` were discarded from the active eval surface +- first correction after the failed pulse is shape-first and grammar-led: + - choose one plain sentence claim + - make grammar carry the answer shape + - prefer one clear subject and finite verb + - keep imagery secondary to the sentence claim + - vary sentence openings across samples - Stop condition for the next session: - do not start another live pulse until rate limits and prepaid credits are confirmed healthy @@ -169,8 +175,7 @@ Choose one lane at a time: - use the one-sample-per-minute pulse default unless the method changes - label rows as pulse evidence only - treat the first valid pulse verdict as `FAIL` - - decide the smallest correction that breaks the repeated soft-drift family - without reintroducing phrase pools + - validate the first grammar-led correction before any live rerun - do not start another live pulse until the rate-limit / prepaid-credit boundary is cleared - docs: diff --git a/docs/research/BETA_6_FAIL_PRESSURE_PULSE.md b/docs/research/BETA_6_FAIL_PRESSURE_PULSE.md index 23d0777..a482c25 100644 --- a/docs/research/BETA_6_FAIL_PRESSURE_PULSE.md +++ b/docs/research/BETA_6_FAIL_PRESSURE_PULSE.md @@ -99,6 +99,23 @@ soft-drift family without returning to hard-coded phrase scaffolds. Live reruns are paused until rate limits and prepaid credits are healthy again. +## First Correction Surface + +The first correction is shape-first and grammar-led. It does not add prompt +examples or a phrase bank. + +Runtime pressure now asks each response to: + +- choose one plain sentence claim +- make grammar carry the answer shape +- prefer one clear subject and finite verb +- keep imagery secondary to the sentence claim +- vary sentence openings across samples + +That correction targets the repeated soft-drift family while keeping the +fixed-prompt pulse method unchanged. The next live pulse should wait until the +rate-limit / prepaid-credit boundary is healthy. + ## Relationship To Beta 5.1 `Research Beta 5.1` remains the closed row-level baseline: diff --git a/docs/runtime/ARCHITECTURE.md b/docs/runtime/ARCHITECTURE.md index e8c59de..76cf6cb 100644 --- a/docs/runtime/ARCHITECTURE.md +++ b/docs/runtime/ARCHITECTURE.md @@ -50,9 +50,13 @@ Explicit subcommands such as `ask`, `sample`, `eval-list`, and `judge` remain av - indecision - connective hinges - soft conclusions -5. `agent.py` runs one OpenAI Agents SDK generation node. -6. The model resolves the final sentence structure inside that node. -7. The CLI prints the final response. +5. The runtime asks grammar to carry the answer shape: + - one plain sentence claim + - one clear subject and finite verb + - imagery secondary to the sentence claim +6. `agent.py` runs one OpenAI Agents SDK generation node. +7. The model resolves the final sentence structure inside that node. +8. The CLI prints the final response. The runtime is not stitched from static fragments. The shared style signals are cues for synthesis, not a fixed word bank. diff --git a/src/probaboracle/config.py b/src/probaboracle/config.py index d705cd3..47e7812 100644 --- a/src/probaboracle/config.py +++ b/src/probaboracle/config.py @@ -47,7 +47,8 @@ PIPELINE_STEPS: tuple[str, ...] = ( "read slot", - "compose one line", + "choose one plain sentence claim", + "compose one complete line", "remove useful detail", ) @@ -59,6 +60,10 @@ OUTPUT_GUARDS: tuple[str, ...] = ( "keep the final line fully lowercase", + "make grammar carry the answer shape", + "prefer one clear subject and finite verb", + "keep imagery secondary to the sentence claim", + "vary sentence openings across samples", "keep repeated structures rare", "avoid stock openers and closers", "do not echo slot labels", diff --git a/tests/test_agent.py b/tests/test_agent.py index 87faaf5..00fda84 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -39,6 +39,11 @@ def test_build_prompt_uses_shape_contract_without_lane_examples(self) -> None: self.assertIn("Shared style signals:", prompt) self.assertIn("compact", prompt) self.assertIn("slot c", prompt) + self.assertIn("choose one plain sentence claim", prompt) + self.assertIn("make grammar carry the answer shape", prompt) + self.assertIn("prefer one clear subject and finite verb", prompt) + self.assertIn("keep imagery secondary to the sentence claim", prompt) + self.assertIn("vary sentence openings across samples", prompt) self.assertNotIn("Lane example:", prompt) self.assertNotIn("Selected prompt type:", prompt) self.assertNotIn("why", prompt.lower()) @@ -51,6 +56,11 @@ def test_build_prompt_uses_shape_contract_without_lane_examples(self) -> None: self.assertNotIn("source", prompt.lower()) self.assertNotIn("temporal", prompt.lower()) self.assertNotIn("spatial", prompt.lower()) + self.assertNotIn("drift", prompt.lower()) + self.assertNotIn("whisper", prompt.lower()) + self.assertNotIn("horizon", prompt.lower()) + self.assertNotIn("settle", prompt.lower()) + self.assertNotIn("land", prompt.lower()) self.assertNotIn("closure", prompt.lower()) self.assertNotIn("direct answer", prompt.lower()) self.assertNotIn("payoff", prompt.lower())