From 2c4d5af503fbc6fa5688099266f2954fe3f8b120 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 22 Jun 2026 19:09:21 +0000 Subject: [PATCH] Add ci-passed release admission to decouple merges from deploy time Co-authored-by: mberman84 --- .mergequeue.example.toml | 4 ++ README.md | 4 ++ docs/reference.md | 3 +- src/agent_merge_queue/cli.py | 26 ++++++++-- src/agent_merge_queue/config.py | 14 +++++ src/agent_merge_queue/pipeline.py | 14 +++++ tests/test_cli.py | 85 +++++++++++++++++++++++++++++++ tests/test_config.py | 13 +++++ tests/test_docs.py | 1 + tests/test_pipeline.py | 36 +++++++++++++ 10 files changed, 195 insertions(+), 5 deletions(-) diff --git a/.mergequeue.example.toml b/.mergequeue.example.toml index b48af47..1985104 100644 --- a/.mergequeue.example.toml +++ b/.mergequeue.example.toml @@ -52,6 +52,10 @@ merge_to_live_target_minutes = 10 auto_promote = true intent_scope = "head" # safest: a trusted source agent refreshes replacement heads pause_on_failure = true +# verified (default, safest) holds new merges until the cumulative release is +# live. ci-passed admits the next batch as soon as exact-main CI passes and lets +# deploy and health checks keep following, trading blast radius for throughput. +release_admission = "verified" # Receives best-effort events, including retryable thread-deployed messages. # webhook_url_env = "DEPLOYBOT_WEBHOOK_URL" diff --git a/README.md b/README.md index 680fcc1..7364986 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,9 @@ A larger indivisible source-overlap or dependency closure is the sole exception: it ships alone, never mixed with unrelated work. After any merge, admission stays closed until the cumulative exact-main release is verified live, preventing newer merges from starving an older deployment. +Set `pipeline.release_admission = "ci-passed"` to reopen admission as soon as +exact-main CI is green—deploy and health checks keep following in the +background—when higher merge throughput is worth a larger failure blast radius. Draft status and incomplete checks or reviews remain waiting states; they do not create a repair latch. A conflict, failed gate, unresolved review, manual block, or stale authorized head @@ -262,6 +265,7 @@ merge_to_live_target_minutes = 10 auto_promote = true intent_scope = "head" pause_on_failure = true +release_admission = "verified" # or "ci-passed" for higher merge throughput [[pipeline.verifications]] name = "Login" diff --git a/docs/reference.md b/docs/reference.md index 6a6d078..382b34b 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -170,7 +170,8 @@ Provider fields are: | `ci_failure_grace_seconds` | Non-negative window for an exact-main CI retry to replace a failed attempt before the release fails. Default: 90. | | `promotion_workers` | Positive maximum number of deploy requests promoted concurrently. Default: 4. | | `repair_hold_minutes` | Positive maximum time that a genuine repair may hold overlapping ready work without becoming merge-eligible. Default: 60. | -| `hold_merges_while_releasing` | Default `true`; after a merge, admit no newer batch until the cumulative exact-main revision is verified live. | +| `hold_merges_while_releasing` | Default `true`; after a merge, admit no newer batch until the release reaches the `release_admission` gate. | +| `release_admission` | How far an in-flight release must progress before the next batch is admitted; allowed: `verified` (default, safest) waits for the cumulative exact-main revision to be live, `ci-passed` reopens admission once exact-main CI is green while deploy and health checks keep following in the background. `ci-passed` trades a larger failure blast radius for throughput, and verification and notifications for a release may be emitted by a later reaction rather than the merging one. | | `repair_branch_prefix` | Deterministic release-repair lease branch prefix; default `"deploybot/repair"`. | | `ready_to_merge_target_minutes` | Positive request-to-ready and queued-to-merge timing target; default 15. | | `merge_to_live_target_minutes` | Positive timing target; default 10. | diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index cac0e07..58626a3 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -5116,12 +5116,14 @@ def command_follow( poll_seconds: int, json_output: bool, emit: bool = True, + admit_gate: str = "verified", ) -> dict[str, Any]: try: result = follow_release( client, timeout_seconds=timeout_seconds, poll_seconds=poll_seconds, + admit_gate=admit_gate, ) except QueueError as error: if client.config.pipeline.pause_on_failure: @@ -5527,7 +5529,20 @@ def command_react( release_is_verified = release_before_merge["state"] == "verified" if release_is_verified: client.record_verified_main(current_main_sha) - if has_release_owner and not release_is_verified: + # In "verified" mode the next batch waits for the cumulative release to + # be fully live. In "ci-passed" mode admission reopens as soon as + # exact-main CI is green (deploy in flight), so merges stop waiting on + # deploy and health-check time. + admission_gate = client.config.pipeline.release_admission + admitted_states = {"verified"} + if admission_gate == "ci-passed": + admitted_states |= {"awaiting-deploy", "deploying"} + release_admitted = ( + release_already_verified + or release_is_verified + or release_before_merge.get("state") in admitted_states + ) + if has_release_owner and not release_admitted: release = release_before_merge if follow: release = command_follow( @@ -5536,8 +5551,9 @@ def command_react( poll_seconds=10, json_output=False, emit=False, + admit_gate=admission_gate, ) - if release.get("state") != "verified": + if release.get("state") not in admitted_states: result = { "state": "release-held", "release": release, @@ -5551,8 +5567,9 @@ def command_react( } print(json.dumps(result, indent=2, sort_keys=True)) return result - release_completed_before_merge = True - release_before_batch = release + if release.get("state") == "verified": + release_completed_before_merge = True + release_before_batch = release def own_integration_checks( numbers: Iterable[int] | None = None, @@ -5760,6 +5777,7 @@ def own_integration_checks( poll_seconds=10, json_output=False, emit=False, + admit_gate=client.config.pipeline.release_admission, ) result = { "state": "complete", diff --git a/src/agent_merge_queue/config.py b/src/agent_merge_queue/config.py index 4934a61..168cbdd 100644 --- a/src/agent_merge_queue/config.py +++ b/src/agent_merge_queue/config.py @@ -55,6 +55,7 @@ class PipelineConfig: auto_promote: bool intent_scope: str pause_on_failure: bool + release_admission: str webhook_url_env: str | None verifications: tuple[VerificationConfig, ...] @@ -91,6 +92,7 @@ class QueueConfig: ALLOWED_MERGE_METHODS = {"merge", "squash", "rebase"} ALLOWED_INTEGRATION_MODES = {"manual", "overlap", "all"} ALLOWED_INTENT_SCOPES = {"head"} +ALLOWED_RELEASE_ADMISSION = {"verified", "ci-passed"} DEFAULT_CONFIG = """\ [queue] base_branch = "main" @@ -136,6 +138,11 @@ class QueueConfig: auto_promote = true intent_scope = "head" pause_on_failure = true +# verified (safest): hold new merges until the cumulative release is live. +# ci-passed: admit the next batch once exact-main CI passes; deploy and health +# checks keep following in the background, trading a larger failure blast radius +# for higher merge throughput. +release_admission = "verified" # Receives best-effort events, including retryable thread-deployed messages. # webhook_url_env = "DEPLOYBOT_WEBHOOK_URL" @@ -417,6 +424,12 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig: if intent_scope not in ALLOWED_INTENT_SCOPES: allowed = ", ".join(sorted(ALLOWED_INTENT_SCOPES)) raise ConfigError(f"pipeline.intent_scope must be one of: {allowed}") + release_admission = _require_string( + pipeline.get("release_admission"), "pipeline.release_admission", "verified" + ) + if release_admission not in ALLOWED_RELEASE_ADMISSION: + allowed = ", ".join(sorted(ALLOWED_RELEASE_ADMISSION)) + raise ConfigError(f"pipeline.release_admission must be one of: {allowed}") return QueueConfig( base_branch=_require_string( @@ -530,6 +543,7 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig: "pipeline.pause_on_failure", True, ), + release_admission=release_admission, webhook_url_env=webhook_url_env, verifications=_verifications(pipeline.get("verifications")), ), diff --git a/src/agent_merge_queue/pipeline.py b/src/agent_merge_queue/pipeline.py index 88b0e0d..40497cb 100644 --- a/src/agent_merge_queue/pipeline.py +++ b/src/agent_merge_queue/pipeline.py @@ -209,6 +209,7 @@ def follow_release( *, timeout_seconds: int, poll_seconds: int, + admit_gate: str = "verified", ) -> dict[str, Any]: deadline = time.monotonic() + timeout_seconds observed_sha = "" @@ -262,6 +263,19 @@ def follow_release( client.dispatch_deploy_workflows(ci_run=ci) ) dispatched_for.add(key) + if admit_gate == "ci-passed" and value["state"] in { + "awaiting-deploy", + "deploying", + }: + # Exact-main CI already passed and the deployment is in flight. Hand + # control back so the next batch can merge; a later reaction (or the + # scheduled reconciliation) follows this deployment through to + # verification, records the watermark, and emits notifications. + return { + **value, + "dispatched_deployments": dispatched_deployments, + "verifications": [], + } if value["state"] == "verified": checks = http_verifications(client.config.pipeline) last_verifications = checks diff --git a/tests/test_cli.py b/tests/test_cli.py index 2fef07c..e1348d5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2299,6 +2299,90 @@ def test_reactor_requires_configured_health_before_reopening_admission(self) -> self.assertEqual(result["release"]["state"], "verify-failed") promote.assert_not_called() + def test_reactor_holds_at_awaiting_deploy_by_default(self) -> None: + sha = "a" * 40 + client = Mock() + client.config = CONFIG + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + } + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch( + "agent_merge_queue.cli.command_follow", + return_value={"state": "awaiting-deploy", "main_sha": sha}, + ) as follow, + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=True, timeout_seconds=10) + + self.assertEqual(result["state"], "release-held") + promote.assert_not_called() + self.assertEqual( + follow.call_args.kwargs["admit_gate"], "verified" + ) + + def test_reactor_admits_at_ci_passed_when_configured(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "ci-passed"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + } + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ) as promote, + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + # CI is green on the in-flight release, so admission reopens instead of + # waiting for the deploy to finish. + self.assertNotEqual(result.get("state"), "release-held") + promote.assert_called_once() + client.record_verified_main.assert_not_called() + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: sha = "a" * 40 client = object.__new__(GitHub) @@ -5477,6 +5561,7 @@ def test_reactor_follows_release_without_a_new_merge(self) -> None: poll_seconds=10, json_output=False, emit=False, + admit_gate="verified", ) self.assertEqual(result["release"], release) diff --git a/tests/test_config.py b/tests/test_config.py index 61b1ff5..b24ca8a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -115,6 +115,7 @@ def test_init_creates_a_loadable_safe_policy(self) -> None: self.assertEqual(path.name, ".mergequeue.toml") self.assertEqual(config.required_checks, ("CI",)) + self.assertEqual(config.pipeline.release_admission, "verified") self.assertEqual( config.trusted_actors, ("@repository-owner",), @@ -188,6 +189,7 @@ def test_parses_pipeline_integration_and_health_policy(self) -> None: "promotion_workers": 3, "repair_hold_minutes": 90, "hold_merges_while_releasing": False, + "release_admission": "ci-passed", "repair_branch_prefix": "repairs/main", "auto_promote": False, "verifications": [ @@ -212,6 +214,7 @@ def test_parses_pipeline_integration_and_health_policy(self) -> None: self.assertEqual(config.pipeline.promotion_workers, 3) self.assertEqual(config.pipeline.repair_hold_minutes, 90) self.assertFalse(config.pipeline.hold_merges_while_releasing) + self.assertEqual(config.pipeline.release_admission, "ci-passed") self.assertEqual(config.pipeline.repair_branch_prefix, "repairs/main") self.assertFalse(config.pipeline.auto_promote) self.assertEqual(config.pipeline.verifications[0].expected_status, 200) @@ -293,6 +296,16 @@ def test_rejects_invalid_integration_mode_and_boolean(self) -> None: "pipeline": {"intent_scope": "pull-request"}, } ) + with self.assertRaisesRegex(ConfigError, "release_admission"): + parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "deployed"}, + } + ) with self.assertRaisesRegex(ConfigError, "batch_settle_seconds"): parse_config( { diff --git a/tests/test_docs.py b/tests/test_docs.py index 32e6938..b4df3e3 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -138,6 +138,7 @@ def test_reference_names_every_policy_field(self) -> None: "auto_promote", "intent_scope", "pause_on_failure", + "release_admission", "webhook_url_env", "verifications", "name", diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index c062175..9243871 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -172,6 +172,42 @@ def test_follow_dispatches_deploy_after_token_dispatched_ci(self) -> None: self.assertEqual(result["state"], "verified") self.assertEqual(result["dispatched_deployments"][0]["id"], 9) + def test_follow_admits_at_ci_passed_without_waiting_for_deploy(self) -> None: + sha = "a" * 40 + ci = { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + "event": "workflow_dispatch", + "created_at": "2026-06-20T00:00:00Z", + } + client = Mock() + client.config = CONFIG + client.base_sha.return_value = sha + client.workflow_runs.return_value = [ci] + client.dispatch_deploy_workflows.return_value = [ + {"id": 9, "name": "Deploy", "ci_sha": sha, "ci_run_id": 1} + ] + with ( + patch("agent_merge_queue.pipeline.time.sleep") as sleep, + patch("agent_merge_queue.pipeline.time.monotonic", return_value=0), + ): + result = follow_release( + client, + timeout_seconds=10, + poll_seconds=1, + admit_gate="ci-passed", + ) + + # CI is green, so admission returns immediately even though the deploy is + # only just dispatched and never verified within this call. + client.dispatch_deploy_workflows.assert_called_once() + self.assertEqual(result["state"], "awaiting-deploy") + self.assertEqual(result["dispatched_deployments"][0]["id"], 9) + sleep.assert_not_called() + def test_follow_absorbs_a_ci_rerun_during_failure_grace(self) -> None: sha = "a" * 40 failed = {