From f2438ffe7c482834b74edd494c1c382d22d8cdee Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:42:41 -0700 Subject: [PATCH 01/16] Add minimum-latency merged release admission --- .mergequeue.example.toml | 9 +- README.md | 32 ++- action.yml | 2 +- .../claude-code/skills/deploybot/SKILL.md | 13 +- .../skills/manage-merge-queue/SKILL.md | 9 +- .../skills/deploybot/SKILL.md | 14 +- .../skills/manage-merge-queue/SKILL.md | 9 +- adapters/cursor/.cursor/rules/deploybot.mdc | 4 + .../.cursor/rules/manage-merge-queue.mdc | 4 +- adapters/cursor/AGENTS.md | 4 +- docs/reference.md | 6 +- examples/github-workflow.yml | 7 +- pyproject.toml | 2 +- skills/deploybot/SKILL.md | 13 +- skills/manage-merge-queue/SKILL.md | 10 +- src/agent_merge_queue/__init__.py | 2 +- src/agent_merge_queue/cli.py | 35 +-- src/agent_merge_queue/config.py | 18 +- src/agent_merge_queue/mcp_server.py | 2 +- src/agent_merge_queue/pipeline.py | 35 ++- tests/test_cli.py | 199 +++++++++++++++++- tests/test_config.py | 3 +- tests/test_pipeline.py | 73 ++++++- 23 files changed, 416 insertions(+), 89 deletions(-) diff --git a/.mergequeue.example.toml b/.mergequeue.example.toml index 1985104..5389065 100644 --- a/.mergequeue.example.toml +++ b/.mergequeue.example.toml @@ -44,6 +44,7 @@ registry_title = "DeployBot delivery registry" thread_active_hours = 72 ci_workflows = ["CI"] deploy_workflows = ["Deploy"] +batch_settle_seconds = 0 repair_hold_minutes = 60 hold_merges_while_releasing = true repair_branch_prefix = "deploybot/repair" @@ -52,10 +53,10 @@ merge_to_live_target_minutes = 10 auto_promote = true intent_scope = "head" # safest: a trusted source agent refreshes replacement heads pause_on_failure = true -# verified (default, safest) holds new merges until the cumulative release is -# live. ci-passed admits the next batch as soon as exact-main CI passes and lets -# deploy and health checks keep following, trading blast radius for throughput. -release_admission = "verified" +# merged (default) admits independent ready work immediately after the previous +# merge. ci-passed waits for exact-main CI; verified waits until production is +# live. Every mode pauses future merges when a later release failure is observed. +release_admission = "merged" # Receives best-effort events, including retryable thread-deployed messages. # webhook_url_env = "DEPLOYBOT_WEBHOOK_URL" diff --git a/README.md b/README.md index 99851d3..47b4713 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ integration PRs, follows `main` through production, and pauses after failures. ## Install -Install the reviewed `v0.2.24` source commit directly from GitHub: +Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ @@ -90,9 +90,12 @@ completions, and completed external check suites. Keep its `workflows` list aligned with `pipeline.ci_workflows`. A five-minute scheduled reconciliation rereads all durable state in case GitHub concurrency coalesces the last pending event in a burst. The privileged worker never checks out or executes -pull-request code. The Action follows releases by default so the same serialized -worker can dispatch deployment when GitHub suppresses the `workflow_run` event -for token-dispatched CI. Pin the Action to the full reviewed release commit: +pull-request code. The Action advances releases to the configured admission +gate. In the default `merged` mode it returns after each healthy observation, +leaving completion to later release events and keeping the serialized merge +worker free. It can still dispatch deployment when GitHub suppresses the +`workflow_run` event for token-dispatched CI. Pin the Action to the full reviewed +release commit: ```yaml - uses: Forward-Future/DeployBot@73004ea7c9dcb81e7f1281c0687aea0897d1571d @@ -141,6 +144,13 @@ workflow name, base branch, head SHA, event, status, and conclusion to match the expected successful exact-main CI run. The deployment must still pull the current base branch and stop if it no longer equals `ci_sha`. +The deployment workflow must also acquire the repository's shared deployment +lock, fetch the base branch again after acquiring it, and coalesce superseded +requests onto that newest integrated SHA. It must never deploy an older SHA after +a newer one, and it keeps the lock through production health verification. These +release rules are unchanged by `release_admission = "merged"`; only merge +admission becomes asynchronous. + The workflow bot and each person allowed to request deployment must be explicitly listed: @@ -172,11 +182,11 @@ work, and creates integration PRs when configured. New batches contain at most `integration.max_batch_size` entries; later FIFO work remains in the next batch. A larger indivisible source-overlap or dependency closure is the sole exception: it ships alone, never mixed with unrelated work. -After any merge, admission stays closed until the cumulative exact-main release -is verified live, preventing newer merges from starving an older deployment. -Set `pipeline.release_admission = "ci-passed"` to reopen admission as soon as -exact-main CI is green—deploy and health checks keep following in the -background—when higher merge throughput is worth a larger failure blast radius. +By default, `pipeline.release_admission = "merged"`: after one healthy merge, +DeployBot immediately admits the next independent ready PR or batch. Exact-main +CI, deployment, and health checks keep tracking asynchronously, and a later real +failure pauses future merges. Use `ci-passed` to wait for exact-main CI before +admitting more work, or `verified` to wait until the cumulative revision is live. Draft status and incomplete checks or reviews remain waiting states; they do not create a repair latch. A conflict, failed gate, unresolved review, manual block, or stale authorized head @@ -269,7 +279,7 @@ ending the PR-opening-thread response. [pipeline] ci_workflows = ["CI"] deploy_workflows = ["Deploy"] -batch_settle_seconds = 15 +batch_settle_seconds = 0 ci_failure_grace_seconds = 90 promotion_workers = 4 hold_merges_while_releasing = true @@ -279,7 +289,7 @@ merge_to_live_target_minutes = 10 auto_promote = true intent_scope = "head" pause_on_failure = true -release_admission = "verified" # or "ci-passed" for higher merge throughput +release_admission = "merged" # or "ci-passed" / "verified" for stricter admission [[pipeline.verifications]] name = "Login" diff --git a/action.yml b/action.yml index 70341cf..8546666 100644 --- a/action.yml +++ b/action.yml @@ -6,7 +6,7 @@ inputs: required: false default: .mergequeue.toml follow: - description: Follow cumulative main through CI and deployment + description: Advance cumulative main to the configured release-admission gate required: false default: "true" dispatch_ci: diff --git a/adapters/claude-code/skills/deploybot/SKILL.md b/adapters/claude-code/skills/deploybot/SKILL.md index 4045725..707674c 100644 --- a/adapters/claude-code/skills/deploybot/SKILL.md +++ b/adapters/claude-code/skills/deploybot/SKILL.md @@ -87,8 +87,10 @@ Merge independent ready pull requests back-to-back. Route source-overlap groups through `create_integration_pull_request`; when policy mode is `all`, validate the entire frozen batch through that cumulative PR. Never invent a conflict resolution. Return the repair packet to its source thread, then call `resume` -after its new exact head passes. Finish with `follow_release`, following newer -cumulative base heads until CI, deployment, and configured health checks verify. +after its new exact head passes. Keep release tracking event-driven: in +`release_admission = "merged"` mode, admit independent ready work immediately +after a healthy merge while later events continue CI, deployment, and health +tracking. Scheduled reconciliation is a fallback, not the normal promotion path. Genuine repair blocks may hold overlapping ready work for the configured bounded repair window, but they remain merge-ineligible until the trusted source agent @@ -123,9 +125,10 @@ named thread already owns that failed SHA; wait for that repair and never create a competing PR. The owner is encoded in the atomic branch ref, so a registry write failure is recovered by calling the same tool again. -New batches are FIFO-bounded by `integration.max_batch_size`, and a merged batch -closes admission until its cumulative main revision is verified live. Do not -override either boundary for later work. Never execute merged PR code inside +New batches are FIFO-bounded by `integration.max_batch_size`. Honor the configured +release-admission gate: `merged` permits the next independent batch immediately, +while `ci-passed` and `verified` impose stricter release fences. A later observed +release failure pauses future merges in every mode. Never execute merged PR code inside the privileged coordinator; generated-artifact conflicts go to the elected repair owner for a normal reviewed rebuild. When PR-authored checks are required, use a GitHub App installation token, list its bot login in diff --git a/adapters/claude-code/skills/manage-merge-queue/SKILL.md b/adapters/claude-code/skills/manage-merge-queue/SKILL.md index d377d79..16745ec 100644 --- a/adapters/claude-code/skills/manage-merge-queue/SKILL.md +++ b/adapters/claude-code/skills/manage-merge-queue/SKILL.md @@ -26,8 +26,9 @@ poll or merge an unlabeled PR. Use `pipeline_status` and `react_to_delivery_event` for bursts. Skip blockers, honor dependencies, route overlap or cumulative validation through `create_integration_pull_request`, return repair packets to the source thread, -and use `resume_pull_request` after fresh review. Finish with `follow_release`; -a failed CI or deployment pauses the pipeline until verified recovery. +and use `resume_pull_request` after fresh review. In `release_admission = +"merged"` mode, admit independent ready work immediately after merge while +later events track CI and deployment; a later failure pauses the pipeline. A genuine repair remains merge-ineligible, but DeployBot may temporarily hold overlapping ready work for the configured bounded repair window so concurrent @@ -35,8 +36,8 @@ merges do not repeatedly invalidate the replacement head. Before creating an exact-main recovery, call `claim_release_repair`; only the returned `owned` thread may use the deterministic repair branch. Respect the -maximum batch size and keep new merges closed while an earlier release is -unfinished. +maximum batch size and the selected `merged`, `ci-passed`, or `verified` +release-admission fence. Immediately before asking the user to `unpause` or take another repair action, call `pipeline_status` again. Never show a stale pause prompt when durable state diff --git a/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md b/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md index b85760c..055a843 100644 --- a/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md +++ b/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md @@ -81,9 +81,10 @@ Merge independent ready pull requests back-to-back. Route source-overlap groups through `deploybot integrate`; when policy mode is `all`, validate the entire frozen batch through that cumulative PR. Never invent a conflict resolution. Return the repair packet to its source thread, then run `deploybot resume` after -its new exact head passes. Finish with `deploybot follow --json`, following -newer cumulative base heads until CI, deployment, and configured health checks -verify. +its new exact head passes. Keep release tracking event-driven: in +`release_admission = "merged"` mode, admit independent ready work immediately +after a healthy merge while later events continue CI, deployment, and health +tracking. Scheduled reconciliation is a fallback, not the normal promotion path. Genuine repair blocks may hold overlapping ready work for the configured bounded repair window, but they remain merge-ineligible until the trusted source agent @@ -118,9 +119,10 @@ named thread already owns that failed SHA; wait for that repair and never create a competing PR. The owner is encoded in the atomic branch ref, so a registry write failure is recovered by calling the same tool again. -New batches are FIFO-bounded by `integration.max_batch_size`, and a merged batch -closes admission until its cumulative main revision is verified live. Do not -override either boundary for later work. Never execute merged PR code inside +New batches are FIFO-bounded by `integration.max_batch_size`. Honor the configured +release-admission gate: `merged` permits the next independent batch immediately, +while `ci-passed` and `verified` impose stricter release fences. A later observed +release failure pauses future merges in every mode. Never execute merged PR code inside the privileged coordinator; generated-artifact conflicts go to the elected repair owner for a normal reviewed rebuild. When PR-authored checks are required, use a GitHub App installation token, list its bot login in diff --git a/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md b/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md index 46860b7..48b56b5 100644 --- a/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md +++ b/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md @@ -27,8 +27,9 @@ Run `deploybot status --json` before a burst and `deploybot react` to coordinate it. Merge independent ready PRs back-to-back, skip blocked work, honor explicit dependencies, and use `deploybot integrate` for overlaps or a cumulative batch gate. Return repair packets to their source thread and run `deploybot resume` -after fresh review. Finish with `deploybot follow --json`; a failed CI or -deployment pauses the pipeline until verified recovery. +after fresh review. In `release_admission = "merged"` mode, admit independent +ready work immediately after merge while later events track CI and deployment; +a later failure pauses the pipeline. A genuine repair remains merge-ineligible, but DeployBot may temporarily hold overlapping ready work for the configured bounded repair window so concurrent @@ -36,8 +37,8 @@ merges do not repeatedly invalidate the replacement head. Before creating an exact-main recovery, run `deploybot claim-release-repair`; only the returned `owned` thread may use the deterministic repair branch. Respect the -maximum batch size and keep new merges closed while an earlier release is -unfinished. +maximum batch size and the selected `merged`, `ci-passed`, or `verified` +release-admission fence. Immediately before asking the user to `unpause` or take another repair action, run `deploybot status --json` again. Never show a stale pause prompt when diff --git a/adapters/cursor/.cursor/rules/deploybot.mdc b/adapters/cursor/.cursor/rules/deploybot.mdc index b6a76da..b0e6174 100644 --- a/adapters/cursor/.cursor/rules/deploybot.mdc +++ b/adapters/cursor/.cursor/rules/deploybot.mdc @@ -15,6 +15,10 @@ the stable Cursor thread ID, never prompts or transcripts. Refresh intent only after replacement-head review. Only the coordinator may react, integrate, drain, follow, pause, or resume repaired work. +Honor `pipeline.release_admission`. In `merged` mode, admit the next independent +ready PR immediately after a healthy merge while release events continue CI, +deployment, and health tracking. A later failure pauses future merges normally. + Immediately before asking the user to unpause or take repair action, call `pipeline_status` again and suppress the request if durable state is already running or the release advanced. The original deploy instruction authorizes the diff --git a/adapters/cursor/.cursor/rules/manage-merge-queue.mdc b/adapters/cursor/.cursor/rules/manage-merge-queue.mdc index 79e56f9..a76f3af 100644 --- a/adapters/cursor/.cursor/rules/manage-merge-queue.mdc +++ b/adapters/cursor/.cursor/rules/manage-merge-queue.mdc @@ -13,4 +13,6 @@ wakes GitHub and promotes only after fresh exact-head gates; never poll or merge an unlabeled PR. Use `pipeline_status` and `react_to_delivery_event`, skip blockers, honor dependencies, route overlaps or cumulative validation through one integration PR, return repair packets to their source thread, atomically -resume after fresh review, and follow cumulative `main` through deployment. +resume after fresh review, and follow cumulative `main` through deployment. In +`release_admission = "merged"` mode, admit independent ready work immediately +after merge while later events continue release tracking. diff --git a/adapters/cursor/AGENTS.md b/adapters/cursor/AGENTS.md index 2c7dbf8..a3e98dc 100644 --- a/adapters/cursor/AGENTS.md +++ b/adapters/cursor/AGENTS.md @@ -27,7 +27,9 @@ branch is itself a merge and is forbidden outside DeployBot. Never poll, merge an unlabeled PR, or absorb unrelated work. Let the event worker promote fresh exact heads, use one integration PR for overlaps or cumulative validation, return repair packets to the source thread, atomically resume after -fresh review, and follow cumulative `main` through verified deployment. +fresh review, and follow cumulative `main` through verified deployment. When +`release_admission = "merged"`, admit independent ready work immediately after +merge while release events continue asynchronously; later failures still pause. For each verified `thread_notifications` entry, post its message back to the native PR-opening thread and only then call `acknowledge_thread_deployment`. Leave diff --git a/docs/reference.md b/docs/reference.md index ece430c..77c739a 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -1,7 +1,7 @@ # DeployBot reference This reference describes the CLI, MCP server, policy file, and GitHub Action in -DeployBot v0.2.24. GitHub labels and authenticated comments are the durable state; +DeployBot v0.2.25. GitHub labels and authenticated comments are the durable state; the CLI and MCP tools are two interfaces to the same operations. ## CLI @@ -166,7 +166,7 @@ Provider fields are: | `thread_active_hours` | Positive integer; default 72. Notification obligations and pending messages use their own non-expiring outbox. | | `ci_workflows` | Workflow names followed as exact-main CI. Default: `["CI"]`. | | `deploy_workflows` | Deployment workflow names. Default: `["Deploy"]`. | -| `batch_settle_seconds` | Non-negative window for coalescing near-ready deploy requests before freezing a batch. Default: 15. | +| `batch_settle_seconds` | Non-negative window for coalescing near-ready deploy requests before freezing a batch. Default: 0, so event-driven ready work freezes immediately; scheduled reconciliation is only a fallback. | | `ci_failure_grace_seconds` | Non-negative window for an exact-main CI retry to replace a failed attempt before the release fails. Default: 90. | | `promotion_workers` | Positive maximum number of deploy requests promoted concurrently. Default: 4. | | `repair_hold_minutes` | Positive maximum time that a genuine repair may hold overlapping ready work without becoming merge-eligible. Default: 60. | @@ -174,7 +174,7 @@ Provider fields are: | integration repair packet | Includes `source_pull_requests` and the complete `source_heads` map so the elected owner can verify every frozen source before resuming the cumulative PR. | | suppressed integration PR run | Integration `pull_request` runs, including `action_required` zero-job placeholders, are not exact CI evidence. DeployBot uses its own exact-branch `workflow_dispatch` run, whose real failures still fail closed. | | `hold_merges_while_releasing` | Default `true`; after a merge, admit no newer batch until the release reaches the `release_admission` gate. | -| `release_admission` | How far an in-flight release must progress before the next batch is admitted; allowed: `verified` (default, safest) waits for the cumulative exact-main revision to be live, `ci-passed` reopens admission once exact-main CI is green while deploy and health checks keep following in the background. `ci-passed` trades a larger failure blast radius for throughput, and verification and notifications for a release may be emitted by a later reaction rather than the merging one. | +| `release_admission` | How far an in-flight release must progress before the next independent batch is admitted. `merged` (default) reopens admission immediately after merge while CI, deployment, and health tracking continue asynchronously. `ci-passed` waits for exact-main CI; `verified` waits until the cumulative revision is live. A later observed CI, deployment, or health failure pauses future merges in every mode. | | `repair_branch_prefix` | Deterministic release-repair lease branch prefix; default `"deploybot/repair"`. | | `ready_to_merge_target_minutes` | Positive request-to-ready and queued-to-merge timing target; default 15. | | `merge_to_live_target_minutes` | Positive timing target; default 10. | diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index e1a6423..cf700ad 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -77,10 +77,11 @@ jobs: with: ref: ${{ github.event.repository.default_branch }} persist-credentials: false - # v0.2.24 implementation; keep the full commit for privileged workflows. + # v0.2.25 implementation; keep the full commit for privileged workflows. - uses: Forward-Future/DeployBot@73004ea7c9dcb81e7f1281c0687aea0897d1571d with: - # PR and review events reconcile quickly. Only release-owner events - # stay attached to cumulative main through CI and deployment. + # PR and review events reconcile immediately. Release-owner events + # advance to the configured admission gate; "merged" observations + # return quickly, while the schedule remains a fallback reconciliation. follow: ${{ github.event_name == 'workflow_run' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} timeout: ${{ (github.event_name == 'workflow_run' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && '2400' || '600' }} diff --git a/pyproject.toml b/pyproject.toml index f4e76ec..7b1c493 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "deploybot-merge-queue" -version = "0.2.24" +version = "0.2.25" description = "DeployBot: a provider-neutral GitHub merge queue for coding agents" readme = "README.md" license = "MIT" diff --git a/skills/deploybot/SKILL.md b/skills/deploybot/SKILL.md index 4045725..707674c 100644 --- a/skills/deploybot/SKILL.md +++ b/skills/deploybot/SKILL.md @@ -87,8 +87,10 @@ Merge independent ready pull requests back-to-back. Route source-overlap groups through `create_integration_pull_request`; when policy mode is `all`, validate the entire frozen batch through that cumulative PR. Never invent a conflict resolution. Return the repair packet to its source thread, then call `resume` -after its new exact head passes. Finish with `follow_release`, following newer -cumulative base heads until CI, deployment, and configured health checks verify. +after its new exact head passes. Keep release tracking event-driven: in +`release_admission = "merged"` mode, admit independent ready work immediately +after a healthy merge while later events continue CI, deployment, and health +tracking. Scheduled reconciliation is a fallback, not the normal promotion path. Genuine repair blocks may hold overlapping ready work for the configured bounded repair window, but they remain merge-ineligible until the trusted source agent @@ -123,9 +125,10 @@ named thread already owns that failed SHA; wait for that repair and never create a competing PR. The owner is encoded in the atomic branch ref, so a registry write failure is recovered by calling the same tool again. -New batches are FIFO-bounded by `integration.max_batch_size`, and a merged batch -closes admission until its cumulative main revision is verified live. Do not -override either boundary for later work. Never execute merged PR code inside +New batches are FIFO-bounded by `integration.max_batch_size`. Honor the configured +release-admission gate: `merged` permits the next independent batch immediately, +while `ci-passed` and `verified` impose stricter release fences. A later observed +release failure pauses future merges in every mode. Never execute merged PR code inside the privileged coordinator; generated-artifact conflicts go to the elected repair owner for a normal reviewed rebuild. When PR-authored checks are required, use a GitHub App installation token, list its bot login in diff --git a/skills/manage-merge-queue/SKILL.md b/skills/manage-merge-queue/SKILL.md index 6e06ac0..bea7814 100644 --- a/skills/manage-merge-queue/SKILL.md +++ b/skills/manage-merge-queue/SKILL.md @@ -54,12 +54,14 @@ A genuine repair remains merge-ineligible, but DeployBot may temporarily hold overlapping ready work for the configured bounded repair window so concurrent merges do not repeatedly invalidate the replacement head. -Finish with `follow_release`, tracking newer cumulative base heads through CI, -deployment, and configured health checks. A failure pauses further merges until +Track newer cumulative base heads through CI, deployment, and configured health +checks from release events. With `release_admission = "merged"`, immediately +admit the next independent ready batch after merge instead of occupying the merge +worker while production catches up. A later failure pauses further merges until the coordinator verifies recovery and unpauses. Before creating that recovery, call `claim_release_repair`; only the returned `owned` thread may use the -deterministic repair branch. Respect the configured maximum batch size and keep -new merges closed while an earlier exact-main release is unfinished. Record +deterministic repair branch. Respect the configured maximum batch size and the +selected `merged`, `ci-passed`, or `verified` release-admission fence. Record exact heads, review verdicts, merged commits, waiting items, repair packets, integration groups, and delivery timing. diff --git a/src/agent_merge_queue/__init__.py b/src/agent_merge_queue/__init__.py index 52ab33d..a1c3a9a 100644 --- a/src/agent_merge_queue/__init__.py +++ b/src/agent_merge_queue/__init__.py @@ -1,3 +1,3 @@ """DeployBot: a provider-neutral GitHub merge queue for coding agents.""" -__version__ = "0.2.24" +__version__ = "0.2.25" diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index 9a570ad..081e1d3 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -28,6 +28,7 @@ follow_release, http_verifications, notify, + release_admitted, release_state, seconds_between, summarize_metrics, @@ -5092,9 +5093,13 @@ def command_drain( key = tuple(int(value) for value in group["pull_requests"]) integration_by_members[key] = group next_batch = list(batch_result["next_batch"]) - if batch_result["merged"]: - # One reaction owns one bounded release batch. Leave the FIFO - # remainder for the event that runs after exact-main verification. + if ( + batch_result["merged"] + and client.config.pipeline.release_admission != "merged" + ): + # Stricter admission modes own one bounded release batch at a time. + # Minimum-latency mode keeps draining independent FIFO batches in + # this same event, before CI or deployment begins. break if not next_batch: break @@ -5850,22 +5855,21 @@ def command_react( release_is_verified = release_before_merge["state"] == "verified" if release_is_verified: client.record_verified_main(current_main_sha) - # In "verified" mode the next batch waits for the cumulative release to - # be fully live. In "ci-passed" mode admission reopens as soon as - # exact-main CI is green (deploy in flight), so merges stop waiting on - # deploy and health-check time. + # Release admission is independent from release tracking. "merged" + # reopens immediately for healthy in-flight releases, "ci-passed" + # waits for exact-main CI, and "verified" waits until production is + # live. Every mode still stops on an observed release failure. admission_gate = client.config.pipeline.release_admission - admitted_states = {"verified"} - if admission_gate == "ci-passed": - admitted_states |= {"awaiting-deploy", "deploying"} - release_admitted = ( + is_release_admitted = ( release_already_verified or release_is_verified - or release_before_merge.get("state") in admitted_states + or release_admitted( + str(release_before_merge.get("state") or ""), admission_gate + ) ) if ( has_release_owner - and not release_admitted + and not is_release_admitted and not recovering_current_main ): release = release_before_merge @@ -5878,7 +5882,9 @@ def command_react( emit=False, admit_gate=admission_gate, ) - if release.get("state") not in admitted_states: + if not release_admitted( + str(release.get("state") or ""), admission_gate + ): result = { "state": "release-held", "release": release, @@ -6528,6 +6534,7 @@ def main(argv: list[str] | None = None) -> int: timeout_seconds=arguments.timeout, poll_seconds=arguments.poll, json_output=arguments.json_output, + admit_gate=client.config.pipeline.release_admission, ) elif arguments.command == "pause": command_control(client, state="paused", reason=arguments.reason) diff --git a/src/agent_merge_queue/config.py b/src/agent_merge_queue/config.py index 168cbdd..f0860f9 100644 --- a/src/agent_merge_queue/config.py +++ b/src/agent_merge_queue/config.py @@ -92,7 +92,7 @@ class QueueConfig: ALLOWED_MERGE_METHODS = {"merge", "squash", "rebase"} ALLOWED_INTEGRATION_MODES = {"manual", "overlap", "all"} ALLOWED_INTENT_SCOPES = {"head"} -ALLOWED_RELEASE_ADMISSION = {"verified", "ci-passed"} +ALLOWED_RELEASE_ADMISSION = {"verified", "ci-passed", "merged"} DEFAULT_CONFIG = """\ [queue] base_branch = "main" @@ -127,7 +127,7 @@ class QueueConfig: thread_active_hours = 72 ci_workflows = ["CI"] deploy_workflows = ["Deploy"] -batch_settle_seconds = 15 +batch_settle_seconds = 0 ci_failure_grace_seconds = 90 promotion_workers = 4 repair_hold_minutes = 60 @@ -138,11 +138,11 @@ class QueueConfig: auto_promote = true intent_scope = "head" pause_on_failure = true -# verified (safest): hold new merges until the cumulative release is live. -# ci-passed: admit the next batch once exact-main CI passes; deploy and health -# checks keep following in the background, trading a larger failure blast radius -# for higher merge throughput. -release_admission = "verified" +# merged (default): admit independent ready work immediately after the previous +# merge while CI, deployment, and health checks continue asynchronously. +# ci-passed: wait for exact-main CI; verified: wait until the release is live. +# Any later release failure pauses future merges in every mode. +release_admission = "merged" # Receives best-effort events, including retryable thread-deployed messages. # webhook_url_env = "DEPLOYBOT_WEBHOOK_URL" @@ -425,7 +425,7 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig: allowed = ", ".join(sorted(ALLOWED_INTENT_SCOPES)) raise ConfigError(f"pipeline.intent_scope must be one of: {allowed}") release_admission = _require_string( - pipeline.get("release_admission"), "pipeline.release_admission", "verified" + pipeline.get("release_admission"), "pipeline.release_admission", "merged" ) if release_admission not in ALLOWED_RELEASE_ADMISSION: allowed = ", ".join(sorted(ALLOWED_RELEASE_ADMISSION)) @@ -497,7 +497,7 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig: batch_settle_seconds=_non_negative_int( pipeline.get("batch_settle_seconds"), "pipeline.batch_settle_seconds", - 15, + 0, ), ci_failure_grace_seconds=_non_negative_int( pipeline.get("ci_failure_grace_seconds"), diff --git a/src/agent_merge_queue/mcp_server.py b/src/agent_merge_queue/mcp_server.py index 29770c0..9b39e0a 100644 --- a/src/agent_merge_queue/mcp_server.py +++ b/src/agent_merge_queue/mcp_server.py @@ -175,7 +175,7 @@ def follow_release( repository: str | None = None, config: str | None = None, ) -> str: - """Follow newest exact main through CI, deployment, and health verification.""" + """Advance the newest exact-main release to its configured admission gate.""" return _run( "follow", "--timeout", diff --git a/src/agent_merge_queue/pipeline.py b/src/agent_merge_queue/pipeline.py index c466519..a111663 100644 --- a/src/agent_merge_queue/pipeline.py +++ b/src/agent_merge_queue/pipeline.py @@ -14,6 +14,16 @@ from .records import parse_time +def release_admitted(state: str, admit_gate: str) -> bool: + """Return whether a healthy release state permits another merge.""" + admitted = {"verified"} + if admit_gate == "ci-passed": + admitted.update({"awaiting-deploy", "deploying"}) + elif admit_gate == "merged": + admitted.update({"testing", "awaiting-deploy", "deploying"}) + return state in admitted + + def workflow_run(run: dict[str, Any]) -> dict[str, Any]: return { "id": run.get("id"), @@ -283,14 +293,12 @@ def follow_release( client.dispatch_deploy_workflows(ci_run=ci) ) dispatched_for.add(key) - if admit_gate == "ci-passed" and value["state"] in { - "awaiting-deploy", - "deploying", - }: - # Exact-main CI already passed and the deployment is in flight. Hand - # control back so the next batch can merge; a later reaction (or the - # scheduled reconciliation) follows this deployment through to - # verification, records the watermark, and emits notifications. + if value["state"] != "verified" and release_admitted( + str(value["state"]), admit_gate + ): + # The configured admission milestone has passed. Hand control back + # so independent ready work can merge; later release events (or the + # scheduled fallback) continue from durable GitHub state. return { **value, "dispatched_deployments": dispatched_deployments, @@ -305,6 +313,17 @@ def follow_release( "dispatched_deployments": dispatched_deployments, "verifications": checks, } + if admit_gate == "merged": + # Minimum-latency mode is event-driven rather than a long-lived + # release poller. A completed deploy with a failing configured + # health check is actionable evidence, so let command_follow + # record the pause immediately. + return { + **value, + "state": "verify-failed", + "dispatched_deployments": dispatched_deployments, + "verifications": checks, + } if (clock if clock is not None else time.monotonic()) >= deadline: state = ( "verify-failed" diff --git a/tests/test_cli.py b/tests/test_cli.py index 7c0d6c8..3732a98 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -95,7 +95,8 @@ "required_checks": ["CI"], "dependency_directive": "Queue-after", "trusted_actors": ["trusted"], - } + }, + "pipeline": {"release_admission": "verified"}, } ) @@ -2558,6 +2559,155 @@ def test_reactor_admits_at_ci_passed_when_configured(self) -> None: promote.assert_called_once() client.record_verified_main.assert_not_called() + def test_reactor_merges_next_pr_while_previous_release_is_in_ci(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "in_progress", + "conclusion": None, + } + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [2], "waiting": [], "blocked": []}, + ), + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + patch( + "agent_merge_queue.cli.command_drain", + return_value={"merged": [{"number": 2}]}, + ), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "complete") + self.assertEqual(result["drain"]["merged"], [{"number": 2}]) + + def test_reactor_merged_mode_does_not_wait_for_pending_deployment(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "in_progress", + "conclusion": None, + }, + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ) as promote, + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertNotEqual(result.get("state"), "release-held") + promote.assert_called_once() + + def test_reactor_merged_mode_pauses_after_later_deploy_failure(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "completed", + "conclusion": "failure", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=True, timeout_seconds=10) + + self.assertEqual(result["state"], "release-held") + client.set_pipeline_control.assert_called_once_with( + "paused", f"deploy-failed on {sha}", main_sha=sha + ) + promote.assert_not_called() + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: sha = "a" * 40 client = object.__new__(GitHub) @@ -3291,6 +3441,53 @@ def test_drain_stops_after_one_batch_merges(self) -> None: self.assertEqual(result["merged"][0]["number"], 1) self.assertEqual(result["next_batch"], [2]) + def test_drain_merged_mode_lands_independent_batches_back_to_back(self) -> None: + first_entry = entry(1, "a.py") + later_entry = entry(2, "b.py") + first = FreezeResult( + batch={"batch_id": "first"}, + queue=[first_entry], + blocked_queue=[], + next_batch=[later_entry], + overlap_groups=[], + ) + second = FreezeResult( + batch={"batch_id": "second"}, + queue=[later_entry], + blocked_queue=[], + next_batch=[], + overlap_groups=[], + ) + client = Mock() + client.config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + with ( + patch( + "agent_merge_queue.cli.freeze_queue", + side_effect=[first, second], + ) as freeze, + patch( + "agent_merge_queue.cli.command_merge", return_value="m" * 40 + ) as merge, + redirect_stdout(io.StringIO()), + ): + result = command_drain(client, json_output=True) + + self.assertEqual(freeze.call_count, 2) + self.assertEqual(merge.call_count, 2) + self.assertEqual(result["batch_ids"], ["first", "second"]) + self.assertEqual( + [value["number"] for value in result["merged"]], + [1, 2], + ) + def test_reenqueue_toggles_label_to_wake_event_coordinator(self) -> None: value = entry(1) old_head = "a" * 40 diff --git a/tests/test_config.py b/tests/test_config.py index b24ca8a..0326bae 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -115,7 +115,8 @@ def test_init_creates_a_loadable_safe_policy(self) -> None: self.assertEqual(path.name, ".mergequeue.toml") self.assertEqual(config.required_checks, ("CI",)) - self.assertEqual(config.pipeline.release_admission, "verified") + self.assertEqual(config.pipeline.release_admission, "merged") + self.assertEqual(config.pipeline.batch_settle_seconds, 0) self.assertEqual( config.trusted_actors, ("@repository-owner",), diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 31acf18..0e6d2fa 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -14,7 +14,10 @@ CONFIG = parse_config( - {"queue": {"required_checks": ["CI"], "trusted_actors": ["trusted"]}} + { + "queue": {"required_checks": ["CI"], "trusted_actors": ["trusted"]}, + "pipeline": {"release_admission": "verified"}, + } ) @@ -277,6 +280,74 @@ def test_follow_admits_at_ci_passed_without_waiting_for_deploy(self) -> None: self.assertEqual(result["dispatched_deployments"][0]["id"], 9) sleep.assert_not_called() + def test_follow_admits_at_merged_while_ci_is_still_running(self) -> None: + sha = "a" * 40 + client = Mock() + client.config = CONFIG + client.base_sha.return_value = sha + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "in_progress", + "conclusion": None, + } + ] + with ( + patch("agent_merge_queue.pipeline.time.sleep") as sleep, + patch("agent_merge_queue.pipeline.time.monotonic", return_value=0), + ): + result = follow_release( + client, + timeout_seconds=10, + poll_seconds=1, + admit_gate="merged", + ) + + self.assertEqual(result["state"], "testing") + sleep.assert_not_called() + + def test_merged_mode_dispatches_only_newest_main_deployment(self) -> None: + old = "a" * 40 + newest = "b" * 40 + old_ci = { + "id": 1, + "name": "CI", + "head_sha": old, + "status": "completed", + "conclusion": "success", + "event": "workflow_dispatch", + } + newest_ci = { + "id": 2, + "name": "CI", + "head_sha": newest, + "status": "completed", + "conclusion": "success", + "event": "workflow_dispatch", + } + client = Mock() + client.config = CONFIG + client.base_sha.return_value = newest + client.workflow_runs.return_value = [old_ci, newest_ci] + client.dispatch_deploy_workflows.return_value = [ + {"id": 9, "name": "Deploy", "ci_sha": newest, "ci_run_id": 2} + ] + + result = follow_release( + client, + timeout_seconds=10, + poll_seconds=1, + admit_gate="merged", + ) + + self.assertEqual(result["main_sha"], newest) + client.dispatch_deploy_workflows.assert_called_once() + dispatched_ci = client.dispatch_deploy_workflows.call_args.kwargs["ci_run"] + self.assertEqual(dispatched_ci["id"], newest_ci["id"]) + self.assertEqual(dispatched_ci["head_sha"], newest) + def test_follow_absorbs_a_ci_rerun_during_failure_grace(self) -> None: sha = "a" * 40 failed = { From 8a00930ed4b3c827cb996aa85c22a2ce6df570c4 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:43:14 -0700 Subject: [PATCH 02/16] Pin clients to merged-admission runtime --- README.md | 4 ++-- adapters/claude-code/.claude-plugin/plugin.json | 2 +- adapters/claude-code/.mcp.json | 2 +- adapters/codex/agent-merge-queue/.codex-plugin/plugin.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 47b4713..a894bba 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@73004ea7c9dcb81e7f1281c0687aea0897d1571d' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f2438ffe7c482834b74edd494c1c382d22d8cdee' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@73004ea7c9dcb81e7f1281c0687aea0897d1571d +- uses: Forward-Future/DeployBot@f2438ffe7c482834b74edd494c1c382d22d8cdee ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.claude-plugin/plugin.json b/adapters/claude-code/.claude-plugin/plugin.json index 3197f1a..76794d5 100644 --- a/adapters/claude-code/.claude-plugin/plugin.json +++ b/adapters/claude-code/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "deploybot", - "version": "0.2.24", + "version": "0.2.25", "description": "DeployBot: a provider-neutral GitHub merge queue for coding agents", "author": { "name": "DeployBot contributors" diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index dff1ee2..86a8178 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@73004ea7c9dcb81e7f1281c0687aea0897d1571d", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f2438ffe7c482834b74edd494c1c382d22d8cdee", "deploybot-mcp" ] } diff --git a/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json b/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json index d0ec9d3..156bb62 100644 --- a/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json +++ b/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "deploybot", - "version": "0.2.24", + "version": "0.2.25", "description": "Coordinate exact-head pull requests through verified deployment and thread notification", "author": { "name": "DeployBot contributors" diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index dff1ee2..86a8178 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@73004ea7c9dcb81e7f1281c0687aea0897d1571d", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f2438ffe7c482834b74edd494c1c382d22d8cdee", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index cf700ad..b8fe2a2 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@73004ea7c9dcb81e7f1281c0687aea0897d1571d + - uses: Forward-Future/DeployBot@f2438ffe7c482834b74edd494c1c382d22d8cdee with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index 97ad62e..1becaff 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "73004ea7c9dcb81e7f1281c0687aea0897d1571d" +RELEASE_COMMIT = "f2438ffe7c482834b74edd494c1c382d22d8cdee" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0" From 2614898924999137b7a249733f57ebaf4e5ddaad Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:46:10 -0700 Subject: [PATCH 03/16] Keep superseded release tracking fail-closed --- src/agent_merge_queue/cli.py | 103 +++++++++++++++++++++++++++- src/agent_merge_queue/pipeline.py | 11 --- tests/test_cli.py | 109 ++++++++++++++++++++++++++++++ tests/test_pipeline.py | 58 ++++++++++++++++ 4 files changed, 268 insertions(+), 13 deletions(-) diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index 081e1d3..2d1edf6 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -5619,6 +5619,55 @@ def release_follow_needed(client: GitHub) -> bool: return True +def superseded_release_failure( + client: GitHub, + *, + current_main_sha: str, + verified_main_sha: str | None, + workflow_runs: list[dict[str, Any]], + recovered_main_sha: str | None, +) -> dict[str, Any] | None: + """Find a failed admitted release that a newer merge moved past.""" + records = client.thread_records(include_terminal=True) + if not isinstance(records, list): + return None + candidates = { + str(record.get("merge_sha") or "") + for record in records + if record.get("phase") == "merged" and record.get("merge_sha") + } + for main_sha in sorted(candidates): + if main_sha in {current_main_sha, recovered_main_sha}: + continue + if not client.is_ancestor(main_sha, current_main_sha): + continue + if verified_main_sha and client.is_ancestor(main_sha, verified_main_sha): + continue + value = release_state( + main_sha=main_sha, + runs=workflow_runs, + config=client.config.pipeline, + ) + if value["state"] == "deploy-failed": + return value + if value["state"] != "ci-failed": + continue + grace = client.config.pipeline.ci_failure_grace_seconds + latest_ci = value.get("latest_ci") or {} + failed_at = parse_time( + str(latest_ci.get("updated_at") or latest_ci.get("created_at") or "") + ) + now = parse_time(utc_now()) + if ( + not grace + or failed_at is None + or now is None + or (now - failed_at).total_seconds() >= grace + ): + return value + return None + + def should_settle_batch(client: GitHub, entries: list[QueueEntry]) -> bool: has_ready = any( client.config.queue_label in entry.labels and entry.state == "ready" @@ -5784,12 +5833,48 @@ def command_react( and bool(control.get("recovered_main_sha")) and control.get("recovered_main_sha") == current_main_sha ) + raw_watermark = client.verified_main_sha() + admission_gate = client.config.pipeline.release_admission release_before_merge = release_state( main_sha=current_main_sha, runs=workflow_runs, config=client.config.pipeline, ) - raw_watermark = client.verified_main_sha() + if ( + admission_gate == "merged" + and release_before_merge.get("state") != "verified" + ): + failed_release = superseded_release_failure( + client, + current_main_sha=current_main_sha, + verified_main_sha=( + str(raw_watermark) if isinstance(raw_watermark, str) else None + ), + workflow_runs=workflow_runs, + recovered_main_sha=( + str(control.get("recovered_main_sha") or "") or None + ), + ) + if failed_release is not None: + if client.config.pipeline.pause_on_failure: + client.set_pipeline_control( + "paused", + f"{failed_release['state']} on {failed_release['main_sha']}", + main_sha=str(failed_release["main_sha"]), + ) + result = { + "state": "release-held", + "release": failed_release, + "promoted": {}, + "promoted_integrations": [], + "drain": {}, + "dispatched_ci": [], + "integrations": [], + "integration_checks": [], + "reconciled_merges": reconciled_merges, + } + print(json.dumps(result, indent=2, sort_keys=True)) + return result release_already_verified = raw_watermark == current_main_sha has_release_owner = ( not release_already_verified @@ -5834,6 +5919,21 @@ def command_react( # durable merged obligation owns it; historical runs for older # SHAs cannot make an unobservable release finish. client.record_verified_main(current_main_sha) + if admission_gate == "merged" and release_before_merge.get("state") == ( + "awaiting-deploy" + ): + # Token-dispatched CI may not emit its workflow_run handoff. Start + # the exact authorized deployment before another merge can + # supersede this SHA; the deployment's own freshness check then + # safely coalesces it if main advances before the lock is acquired. + release_before_merge = command_follow( + client, + timeout_seconds=timeout_seconds, + poll_seconds=10, + json_output=False, + emit=False, + admit_gate=admission_gate, + ) release_is_verified = release_already_verified or ( release_before_merge.get("state") == "verified" ) @@ -5859,7 +5959,6 @@ def command_react( # reopens immediately for healthy in-flight releases, "ci-passed" # waits for exact-main CI, and "verified" waits until production is # live. Every mode still stops on an observed release failure. - admission_gate = client.config.pipeline.release_admission is_release_admitted = ( release_already_verified or release_is_verified diff --git a/src/agent_merge_queue/pipeline.py b/src/agent_merge_queue/pipeline.py index a111663..4bc713b 100644 --- a/src/agent_merge_queue/pipeline.py +++ b/src/agent_merge_queue/pipeline.py @@ -313,17 +313,6 @@ def follow_release( "dispatched_deployments": dispatched_deployments, "verifications": checks, } - if admit_gate == "merged": - # Minimum-latency mode is event-driven rather than a long-lived - # release poller. A completed deploy with a failing configured - # health check is actionable evidence, so let command_follow - # record the pause immediately. - return { - **value, - "state": "verify-failed", - "dispatched_deployments": dispatched_deployments, - "verifications": checks, - } if (clock if clock is not None else time.monotonic()) >= deadline: state = ( "verify-failed" diff --git a/tests/test_cli.py b/tests/test_cli.py index 3732a98..40f7db4 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2708,6 +2708,115 @@ def test_reactor_merged_mode_pauses_after_later_deploy_failure(self) -> None: ) promote.assert_not_called() + def test_reactor_dispatches_pending_release_before_next_merged_mode_pr( + self, + ) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + "event": "workflow_dispatch", + } + ] + client.dispatch_deploy_workflows.return_value = [ + {"id": 9, "name": "Deploy", "ci_sha": sha, "ci_run_id": 1} + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [2], "waiting": [], "blocked": []}, + ), + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + patch( + "agent_merge_queue.cli.command_drain", + return_value={"merged": [{"number": 2}]}, + ), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["drain"]["merged"], [{"number": 2}]) + client.dispatch_deploy_workflows.assert_called_once() + + def test_reactor_pauses_for_failed_superseded_merged_mode_release(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = None + client.thread_records.return_value = [ + {"phase": "merged", "merge_sha": old, "pull_request": 1} + ] + client.is_ancestor.return_value = True + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": old, + "status": "completed", + "conclusion": "failure", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "release-held") + self.assertEqual(result["release"]["main_sha"], old) + client.set_pipeline_control.assert_called_once_with( + "paused", f"deploy-failed on {old}", main_sha=old + ) + promote.assert_not_called() + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: sha = "a" * 40 client = object.__new__(GitHub) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 0e6d2fa..9d965aa 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -348,6 +348,64 @@ def test_merged_mode_dispatches_only_newest_main_deployment(self) -> None: self.assertEqual(dispatched_ci["id"], newest_ci["id"]) self.assertEqual(dispatched_ci["head_sha"], newest) + def test_merged_mode_retries_health_before_reporting_failure(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": { + "verifications": [ + {"name": "Login", "url": "https://example.test/login"} + ] + }, + } + ) + client = Mock() + client.config = config + client.base_sha.return_value = sha + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + ] + with ( + patch( + "agent_merge_queue.pipeline.http_verifications", + side_effect=[ + [{"name": "Login", "passed": False}], + [{"name": "Login", "passed": True}], + ], + ), + patch("agent_merge_queue.pipeline.time.sleep") as sleep, + patch( + "agent_merge_queue.pipeline.time.monotonic", + side_effect=[0, 1, 2], + ), + ): + result = follow_release( + client, + timeout_seconds=10, + poll_seconds=1, + admit_gate="merged", + ) + + self.assertEqual(result["state"], "verified") + sleep.assert_called_once_with(1) + def test_follow_absorbs_a_ci_rerun_during_failure_grace(self) -> None: sha = "a" * 40 failed = { From 0e8d10b684dbffbffa8f83b3eb834ca4f6c49b7e Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:46:17 -0700 Subject: [PATCH 04/16] Pin clients to fail-closed merged admission --- README.md | 4 ++-- adapters/claude-code/.mcp.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a894bba..2246585 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f2438ffe7c482834b74edd494c1c382d22d8cdee' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@2614898924999137b7a249733f57ebaf4e5ddaad' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@f2438ffe7c482834b74edd494c1c382d22d8cdee +- uses: Forward-Future/DeployBot@2614898924999137b7a249733f57ebaf4e5ddaad ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index 86a8178..dd3d1f4 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f2438ffe7c482834b74edd494c1c382d22d8cdee", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@2614898924999137b7a249733f57ebaf4e5ddaad", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index 86a8178..dd3d1f4 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f2438ffe7c482834b74edd494c1c382d22d8cdee", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@2614898924999137b7a249733f57ebaf4e5ddaad", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index b8fe2a2..cb26497 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@f2438ffe7c482834b74edd494c1c382d22d8cdee + - uses: Forward-Future/DeployBot@2614898924999137b7a249733f57ebaf4e5ddaad with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index 1becaff..c79f311 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "f2438ffe7c482834b74edd494c1c382d22d8cdee" +RELEASE_COMMIT = "2614898924999137b7a249733f57ebaf4e5ddaad" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0" From f7a484db9f11e8eb2547a5ab08f938edc3787105 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:48:44 -0700 Subject: [PATCH 05/16] Track release failures without thread metadata --- src/agent_merge_queue/cli.py | 18 +++++++++++- tests/test_cli.py | 56 ++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index 2d1edf6..cee6a51 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -5630,12 +5630,28 @@ def superseded_release_failure( """Find a failed admitted release that a newer merge moved past.""" records = client.thread_records(include_terminal=True) if not isinstance(records, list): - return None + records = [] candidates = { str(record.get("merge_sha") or "") for record in records if record.get("phase") == "merged" and record.get("merge_sha") } + release_workflows = { + *client.config.pipeline.ci_workflows, + *client.config.pipeline.deploy_workflows, + } + candidates.update( + str(run.get("head_sha") or "") + for run in workflow_runs + if run.get("head_sha") + and str(run.get("name") or "") in release_workflows + and str(run.get("event") or "") + in {"push", "workflow_dispatch", "workflow_run", "schedule"} + and ( + not run.get("head_branch") + or str(run.get("head_branch")) == client.config.base_branch + ) + ) for main_sha in sorted(candidates): if main_sha in {current_main_sha, recovered_main_sha}: continue diff --git a/tests/test_cli.py b/tests/test_cli.py index 40f7db4..89a11bc 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2817,6 +2817,62 @@ def test_reactor_pauses_for_failed_superseded_merged_mode_release(self) -> None: ) promote.assert_not_called() + def test_reactor_tracks_superseded_failure_without_thread_metadata(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = None + client.thread_records.return_value = [] + client.is_ancestor.return_value = True + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "failure", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "release-held") + self.assertEqual(result["release"]["main_sha"], old) + client.set_pipeline_control.assert_called_once_with( + "paused", f"deploy-failed on {old}", main_sha=old + ) + promote.assert_not_called() + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: sha = "a" * 40 client = object.__new__(GitHub) From a14fa6c9cbf8a9e7c58541ba015943686226ec47 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:48:50 -0700 Subject: [PATCH 06/16] Pin clients to metadata-independent tracking --- README.md | 4 ++-- adapters/claude-code/.mcp.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2246585..a642c9a 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@2614898924999137b7a249733f57ebaf4e5ddaad' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f7a484db9f11e8eb2547a5ab08f938edc3787105' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@2614898924999137b7a249733f57ebaf4e5ddaad +- uses: Forward-Future/DeployBot@f7a484db9f11e8eb2547a5ab08f938edc3787105 ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index dd3d1f4..c96167e 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@2614898924999137b7a249733f57ebaf4e5ddaad", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f7a484db9f11e8eb2547a5ab08f938edc3787105", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index dd3d1f4..c96167e 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@2614898924999137b7a249733f57ebaf4e5ddaad", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f7a484db9f11e8eb2547a5ab08f938edc3787105", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index cb26497..1a84f80 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@2614898924999137b7a249733f57ebaf4e5ddaad + - uses: Forward-Future/DeployBot@f7a484db9f11e8eb2547a5ab08f938edc3787105 with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index c79f311..729e0be 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "2614898924999137b7a249733f57ebaf4e5ddaad" +RELEASE_COMMIT = "f7a484db9f11e8eb2547a5ab08f938edc3787105" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0" From 57eab6540143cac9aa1243c8905cccc30004ca29 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:49:51 -0700 Subject: [PATCH 07/16] Treat superseded cancellations as release coalescing --- src/agent_merge_queue/cli.py | 12 +++++++++ tests/test_cli.py | 52 ++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index cee6a51..e07a317 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -5665,9 +5665,21 @@ def superseded_release_failure( config=client.config.pipeline, ) if value["state"] == "deploy-failed": + if str((value.get("latest_deploy") or {}).get("conclusion") or "") == ( + "cancelled" + ): + # A newer cumulative main normally cancels a superseded deploy. + # That is release coalescing, not production failure evidence. + continue return value if value["state"] != "ci-failed": continue + if str((value.get("latest_ci") or {}).get("conclusion") or "") == ( + "cancelled" + ): + # Main moving again can cancel obsolete exact-main CI. The newest + # cumulative revision owns the release instead. + continue grace = client.config.pipeline.ci_failure_grace_seconds latest_ci = value.get("latest_ci") or {} failed_at = parse_time( diff --git a/tests/test_cli.py b/tests/test_cli.py index 89a11bc..f5a6808 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2873,6 +2873,58 @@ def test_reactor_tracks_superseded_failure_without_thread_metadata(self) -> None ) promote.assert_not_called() + def test_reactor_ignores_expected_superseded_ci_cancellation(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = None + client.thread_records.return_value = [] + client.deployment_notifications.return_value = [] + client.is_ancestor.return_value = True + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "cancelled", + } + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ) as promote, + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "complete") + client.set_pipeline_control.assert_not_called() + promote.assert_called_once() + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: sha = "a" * 40 client = object.__new__(GitHub) From 995176306267822b9b69fe28075910649ce4b115 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:49:58 -0700 Subject: [PATCH 08/16] Pin clients to coalescing-safe runtime --- README.md | 4 ++-- adapters/claude-code/.mcp.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a642c9a..98538b1 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f7a484db9f11e8eb2547a5ab08f938edc3787105' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@57eab6540143cac9aa1243c8905cccc30004ca29' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@f7a484db9f11e8eb2547a5ab08f938edc3787105 +- uses: Forward-Future/DeployBot@57eab6540143cac9aa1243c8905cccc30004ca29 ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index c96167e..50316a3 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f7a484db9f11e8eb2547a5ab08f938edc3787105", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@57eab6540143cac9aa1243c8905cccc30004ca29", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index c96167e..50316a3 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@f7a484db9f11e8eb2547a5ab08f938edc3787105", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@57eab6540143cac9aa1243c8905cccc30004ca29", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index 1a84f80..f174dc6 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@f7a484db9f11e8eb2547a5ab08f938edc3787105 + - uses: Forward-Future/DeployBot@57eab6540143cac9aa1243c8905cccc30004ca29 with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index 729e0be..22511cb 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "f7a484db9f11e8eb2547a5ab08f938edc3787105" +RELEASE_COMMIT = "57eab6540143cac9aa1243c8905cccc30004ca29" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0" From 521b1a209c08a2386408b0cf3515a5107cf36c77 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:51:28 -0700 Subject: [PATCH 09/16] Verify health for superseded deployments --- src/agent_merge_queue/cli.py | 17 +++++++++ tests/test_cli.py | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index e07a317..8d54425 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -5626,6 +5626,8 @@ def superseded_release_failure( verified_main_sha: str | None, workflow_runs: list[dict[str, Any]], recovered_main_sha: str | None, + timeout_seconds: int, + poll_seconds: int = 10, ) -> dict[str, Any] | None: """Find a failed admitted release that a newer merge moved past.""" records = client.thread_records(include_terminal=True) @@ -5664,6 +5666,20 @@ def superseded_release_failure( runs=workflow_runs, config=client.config.pipeline, ) + if value["state"] == "verified" and client.config.pipeline.verifications: + deadline = time.monotonic() + timeout_seconds + while True: + checks = http_verifications(client.config.pipeline) + if all(item["passed"] for item in checks): + break + if time.monotonic() >= deadline: + return { + **value, + "state": "verify-failed", + "verifications": checks, + } + time.sleep(poll_seconds) + continue if value["state"] == "deploy-failed": if str((value.get("latest_deploy") or {}).get("conclusion") or "") == ( "cancelled" @@ -5882,6 +5898,7 @@ def command_react( recovered_main_sha=( str(control.get("recovered_main_sha") or "") or None ), + timeout_seconds=timeout_seconds, ) if failed_release is not None: if client.config.pipeline.pause_on_failure: diff --git a/tests/test_cli.py b/tests/test_cli.py index f5a6808..c54651f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2925,6 +2925,74 @@ def test_reactor_ignores_expected_superseded_ci_cancellation(self) -> None: client.set_pipeline_control.assert_not_called() promote.assert_called_once() + def test_reactor_verifies_superseded_deployment_health(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": { + "release_admission": "merged", + "verifications": [ + {"name": "Login", "url": "https://example.test/login"} + ], + }, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = None + client.thread_records.return_value = [] + client.is_ancestor.return_value = True + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch( + "agent_merge_queue.cli.http_verifications", + return_value=[{"name": "Login", "passed": False}], + ), + patch("agent_merge_queue.cli.time.monotonic", side_effect=[0, 1]), + patch("agent_merge_queue.cli.time.sleep") as sleep, + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=1) + + self.assertEqual(result["state"], "release-held") + self.assertEqual(result["release"]["state"], "verify-failed") + client.set_pipeline_control.assert_called_once_with( + "paused", f"verify-failed on {old}", main_sha=old + ) + sleep.assert_not_called() + promote.assert_not_called() + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: sha = "a" * 40 client = object.__new__(GitHub) From d4f2cfe7bbc08440e4448dca58c1f3d9ea7ef506 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:51:39 -0700 Subject: [PATCH 10/16] Pin clients to health-tracking runtime --- README.md | 4 ++-- adapters/claude-code/.mcp.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 98538b1..90f93e5 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@57eab6540143cac9aa1243c8905cccc30004ca29' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@521b1a209c08a2386408b0cf3515a5107cf36c77' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@57eab6540143cac9aa1243c8905cccc30004ca29 +- uses: Forward-Future/DeployBot@521b1a209c08a2386408b0cf3515a5107cf36c77 ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index 50316a3..e9b3d47 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@57eab6540143cac9aa1243c8905cccc30004ca29", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@521b1a209c08a2386408b0cf3515a5107cf36c77", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index 50316a3..e9b3d47 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@57eab6540143cac9aa1243c8905cccc30004ca29", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@521b1a209c08a2386408b0cf3515a5107cf36c77", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index f174dc6..decefec 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@57eab6540143cac9aa1243c8905cccc30004ca29 + - uses: Forward-Future/DeployBot@521b1a209c08a2386408b0cf3515a5107cf36c77 with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index 22511cb..43a5873 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "57eab6540143cac9aa1243c8905cccc30004ca29" +RELEASE_COMMIT = "521b1a209c08a2386408b0cf3515a5107cf36c77" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0" From 17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:54:41 -0700 Subject: [PATCH 11/16] Finalize verified receipts before merged admission --- src/agent_merge_queue/cli.py | 21 +++++++++- tests/test_cli.py | 75 ++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index 8d54425..af87256 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -5979,12 +5979,31 @@ def command_react( emit=False, admit_gate=admission_gate, ) + finalized_release = False + if ( + admission_gate == "merged" + and not release_already_verified + and release_before_merge.get("state") == "verified" + ): + # Complete durable watermark, thread-state, and notification + # bookkeeping before a new merge changes base_sha. Otherwise a busy + # merged-mode queue can keep overtaking already-live receipts. + release_before_merge = command_follow( + client, + timeout_seconds=timeout_seconds, + poll_seconds=10, + json_output=False, + emit=False, + admit_gate=admission_gate, + ) + finalized_release = release_before_merge.get("state") == "verified" release_is_verified = release_already_verified or ( release_before_merge.get("state") == "verified" ) if ( not release_already_verified and release_is_verified + and admission_gate != "merged" and client.config.pipeline.verifications ): health = http_verifications(client.config.pipeline) @@ -5998,7 +6017,7 @@ def command_react( "verifications": health, } release_is_verified = release_before_merge["state"] == "verified" - if release_is_verified: + if release_is_verified and not finalized_release: client.record_verified_main(current_main_sha) # Release admission is independent from release tracking. "merged" # reopens immediately for healthy in-flight releases, "ci-passed" diff --git a/tests/test_cli.py b/tests/test_cli.py index c54651f..526b4c3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2993,6 +2993,80 @@ def test_reactor_verifies_superseded_deployment_health(self) -> None: sleep.assert_not_called() promote.assert_not_called() + def test_reactor_finalizes_verified_release_before_merged_mode_drain( + self, + ) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.thread_records.return_value = [ + {"phase": "merged", "merge_sha": sha, "pull_request": 1} + ] + client.is_ancestor.return_value = True + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch( + "agent_merge_queue.cli.command_follow", + return_value={ + "state": "verified", + "main_sha": sha, + "thread_notifications": [{"notification_id": "receipt"}], + }, + ) as follow, + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ), + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "complete") + follow.assert_called_once_with( + client, + timeout_seconds=10, + poll_seconds=10, + json_output=False, + emit=False, + admit_gate="merged", + ) + client.record_verified_main.assert_not_called() + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: sha = "a" * 40 client = object.__new__(GitHub) @@ -6537,6 +6611,7 @@ def test_reactor_does_not_follow_conflicted_all_mode_integration_batch( "trusted_actors": ["trusted"], "coordinator_actors": ["coordinator"], }, + "pipeline": {"release_admission": "verified"}, "integration": {"mode": "all"}, } ) From 7016dfeb1aff9e3969780f9c899c51aaf6c64808 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:54:49 -0700 Subject: [PATCH 12/16] Pin clients to receipt-safe runtime --- README.md | 4 ++-- adapters/claude-code/.mcp.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 90f93e5..ce212d3 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@521b1a209c08a2386408b0cf3515a5107cf36c77' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@521b1a209c08a2386408b0cf3515a5107cf36c77 +- uses: Forward-Future/DeployBot@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179 ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index e9b3d47..8d2e4b1 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@521b1a209c08a2386408b0cf3515a5107cf36c77", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index e9b3d47..8d2e4b1 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@521b1a209c08a2386408b0cf3515a5107cf36c77", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index decefec..f1bea4f 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@521b1a209c08a2386408b0cf3515a5107cf36c77 + - uses: Forward-Future/DeployBot@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179 with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index 43a5873..0f248a1 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "521b1a209c08a2386408b0cf3515a5107cf36c77" +RELEASE_COMMIT = "17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0" From 28b98aa7724a1ca4c8b1f7d589d39d658bf914f6 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:56:26 -0700 Subject: [PATCH 13/16] Ignore pre-install release history --- src/agent_merge_queue/cli.py | 73 ++++++++++++++++++------------------ tests/test_cli.py | 27 +++++++++---- 2 files changed, 56 insertions(+), 44 deletions(-) diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index af87256..c35b81b 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -5884,42 +5884,6 @@ def command_react( runs=workflow_runs, config=client.config.pipeline, ) - if ( - admission_gate == "merged" - and release_before_merge.get("state") != "verified" - ): - failed_release = superseded_release_failure( - client, - current_main_sha=current_main_sha, - verified_main_sha=( - str(raw_watermark) if isinstance(raw_watermark, str) else None - ), - workflow_runs=workflow_runs, - recovered_main_sha=( - str(control.get("recovered_main_sha") or "") or None - ), - timeout_seconds=timeout_seconds, - ) - if failed_release is not None: - if client.config.pipeline.pause_on_failure: - client.set_pipeline_control( - "paused", - f"{failed_release['state']} on {failed_release['main_sha']}", - main_sha=str(failed_release["main_sha"]), - ) - result = { - "state": "release-held", - "release": failed_release, - "promoted": {}, - "promoted_integrations": [], - "drain": {}, - "dispatched_ci": [], - "integrations": [], - "integration_checks": [], - "reconciled_merges": reconciled_merges, - } - print(json.dumps(result, indent=2, sort_keys=True)) - return result release_already_verified = raw_watermark == current_main_sha has_release_owner = ( not release_already_verified @@ -5964,6 +5928,43 @@ def command_react( # durable merged obligation owns it; historical runs for older # SHAs cannot make an unobservable release finish. client.record_verified_main(current_main_sha) + if ( + admission_gate == "merged" + and release_before_merge.get("state") != "verified" + and (raw_watermark is not None or has_release_owner) + ): + failed_release = superseded_release_failure( + client, + current_main_sha=current_main_sha, + verified_main_sha=( + str(raw_watermark) if isinstance(raw_watermark, str) else None + ), + workflow_runs=workflow_runs, + recovered_main_sha=( + str(control.get("recovered_main_sha") or "") or None + ), + timeout_seconds=timeout_seconds, + ) + if failed_release is not None: + if client.config.pipeline.pause_on_failure: + client.set_pipeline_control( + "paused", + f"{failed_release['state']} on {failed_release['main_sha']}", + main_sha=str(failed_release["main_sha"]), + ) + result = { + "state": "release-held", + "release": failed_release, + "promoted": {}, + "promoted_integrations": [], + "drain": {}, + "dispatched_ci": [], + "integrations": [], + "integration_checks": [], + "reconciled_merges": reconciled_merges, + } + print(json.dumps(result, indent=2, sort_keys=True)) + return result if admission_gate == "merged" and release_before_merge.get("state") == ( "awaiting-deploy" ): diff --git a/tests/test_cli.py b/tests/test_cli.py index 526b4c3..140eedd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2367,7 +2367,15 @@ def test_reactor_holds_newly_merged_revision_before_ci_is_visible(self) -> None: def test_reactor_seeds_first_install_despite_historical_runs(self) -> None: sha = "a" * 40 client = Mock() - client.config = CONFIG + client.config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) client.pipeline_control.return_value = {"state": "running"} client.base_sha.return_value = sha client.workflow_runs.return_value = [ @@ -2375,13 +2383,16 @@ def test_reactor_seeds_first_install_despite_historical_runs(self) -> None: "id": 99, "name": "CI", "head_sha": "b" * 40, + "head_branch": "main", + "event": "workflow_dispatch", "status": "completed", - "conclusion": "success", + "conclusion": "failure", } ] client.verified_main_sha.return_value = None client.thread_records.return_value = [] client.deployment_notifications.return_value = [] + client.is_ancestor.return_value = True frozen = FreezeResult(None, [], [], [], []) with ( patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), @@ -2833,9 +2844,9 @@ def test_reactor_tracks_superseded_failure_without_thread_metadata(self) -> None client.config = config client.pipeline_control.return_value = {"state": "running"} client.base_sha.return_value = current - client.verified_main_sha.return_value = None + client.verified_main_sha.return_value = "f" * 40 client.thread_records.return_value = [] - client.is_ancestor.return_value = True + client.is_ancestor.side_effect = lambda _left, right: right == current client.workflow_runs.return_value = [ { "id": 1, @@ -2889,10 +2900,10 @@ def test_reactor_ignores_expected_superseded_ci_cancellation(self) -> None: client.config = config client.pipeline_control.return_value = {"state": "running"} client.base_sha.return_value = current - client.verified_main_sha.return_value = None + client.verified_main_sha.return_value = "f" * 40 client.thread_records.return_value = [] client.deployment_notifications.return_value = [] - client.is_ancestor.return_value = True + client.is_ancestor.side_effect = lambda _left, right: right == current client.workflow_runs.return_value = [ { "id": 1, @@ -2946,9 +2957,9 @@ def test_reactor_verifies_superseded_deployment_health(self) -> None: client.config = config client.pipeline_control.return_value = {"state": "running"} client.base_sha.return_value = current - client.verified_main_sha.return_value = None + client.verified_main_sha.return_value = "f" * 40 client.thread_records.return_value = [] - client.is_ancestor.return_value = True + client.is_ancestor.side_effect = lambda _left, right: right == current client.workflow_runs.return_value = [ { "id": 1, From 42128a6f8dd014cfd9969652ab2da23586c8fb30 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:56:35 -0700 Subject: [PATCH 14/16] Pin clients to baseline-safe runtime --- README.md | 4 ++-- adapters/claude-code/.mcp.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ce212d3..a0ee8ec 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179 +- uses: Forward-Future/DeployBot@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6 ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index 8d2e4b1..c667b45 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index 8d2e4b1..c667b45 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index f1bea4f..35c8193 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179 + - uses: Forward-Future/DeployBot@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6 with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index 0f248a1..b628ee3 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "17f6e9a5c4e8da491e28dc1c99bf53c77e1a2179" +RELEASE_COMMIT = "28b98aa7724a1ca4c8b1f7d589d39d658bf914f6" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0" From 12c6c03aa76a553fa4068279baa29e90a30bbeb1 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:58:37 -0700 Subject: [PATCH 15/16] Finalize cumulative recovery boundaries --- src/agent_merge_queue/cli.py | 80 ++++++++++++++++++++++++------- tests/test_cli.py | 91 ++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 16 deletions(-) diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index c35b81b..6ae8219 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -15,7 +15,7 @@ import sys import time from concurrent.futures import ThreadPoolExecutor -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, replace from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any, Callable, Iterable @@ -5654,31 +5654,79 @@ def superseded_release_failure( or str(run.get("head_branch")) == client.config.base_branch ) ) - for main_sha in sorted(candidates): + states: dict[str, dict[str, Any]] = {} + for main_sha in candidates: if main_sha in {current_main_sha, recovered_main_sha}: continue if not client.is_ancestor(main_sha, current_main_sha): continue if verified_main_sha and client.is_ancestor(main_sha, verified_main_sha): continue - value = release_state( + states[main_sha] = release_state( main_sha=main_sha, runs=workflow_runs, config=client.config.pipeline, ) - if value["state"] == "verified" and client.config.pipeline.verifications: - deadline = time.monotonic() + timeout_seconds - while True: - checks = http_verifications(client.config.pipeline) - if all(item["passed"] for item in checks): - break - if time.monotonic() >= deadline: - return { - **value, - "state": "verify-failed", - "verifications": checks, - } - time.sleep(poll_seconds) + + verified_candidates = [ + main_sha for main_sha, value in states.items() if value["state"] == "verified" + ] + newest_verified = [ + main_sha + for main_sha in verified_candidates + if not any( + main_sha != other and client.is_ancestor(main_sha, other) + for other in verified_candidates + ) + ] + effective_verified_sha: str | None = None + if newest_verified: + effective_verified_sha = max( + newest_verified, + key=lambda main_sha: str( + (states[main_sha].get("latest_deploy") or {}).get("created_at") or "" + ), + ) + + class FixedBaseReleaseClient: + def __init__(self, wrapped: GitHub, base_sha: str) -> None: + self._wrapped = wrapped + self._base_sha = base_sha + self.config = replace( + wrapped.config, + pipeline=replace( + wrapped.config.pipeline, + pause_on_failure=False, + ), + ) + + def base_sha(self) -> str: + return self._base_sha + + def __getattr__(self, name: str) -> Any: + return getattr(self._wrapped, name) + + finalized = command_follow( + FixedBaseReleaseClient(client, effective_verified_sha), + timeout_seconds=timeout_seconds, + poll_seconds=poll_seconds, + json_output=False, + emit=False, + admit_gate="verified", + ) + if finalized.get("state") != "verified": + return finalized + + def failure_time(value: dict[str, Any]) -> str: + run = value.get("latest_deploy") or value.get("latest_ci") or {} + return str(run.get("updated_at") or run.get("created_at") or "") + + for main_sha, value in sorted( + states.items(), key=lambda item: failure_time(item[1]), reverse=True + ): + if effective_verified_sha and client.is_ancestor( + main_sha, effective_verified_sha + ): continue if value["state"] == "deploy-failed": if str((value.get("latest_deploy") or {}).get("conclusion") or "") == ( diff --git a/tests/test_cli.py b/tests/test_cli.py index 140eedd..1491fe0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3004,6 +3004,97 @@ def test_reactor_verifies_superseded_deployment_health(self) -> None: sleep.assert_not_called() promote.assert_not_called() + def test_reactor_uses_newer_verified_release_as_recovery_boundary(self) -> None: + failed = "a" * 40 + recovered = "b" * 40 + current = "c" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = "f" * 40 + client.thread_records.return_value = [] + client.deployment_notifications.return_value = [] + client.is_ancestor.side_effect = lambda left, right: ( + right == current or (left == failed and right == recovered) + ) + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": failed, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + "created_at": "2026-06-20T00:00:00Z", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": failed, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "failure", + "created_at": "2026-06-20T00:01:00Z", + }, + { + "id": 3, + "name": "CI", + "head_sha": recovered, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + "created_at": "2026-06-20T00:02:00Z", + }, + { + "id": 4, + "name": "Deploy", + "head_sha": recovered, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + "created_at": "2026-06-20T00:03:00Z", + }, + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch( + "agent_merge_queue.cli.command_follow", + return_value={"state": "verified", "main_sha": recovered}, + ) as follow, + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ) as promote, + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "complete") + self.assertEqual(follow.call_args.args[0].base_sha(), recovered) + client.set_pipeline_control.assert_not_called() + promote.assert_called_once() + def test_reactor_finalizes_verified_release_before_merged_mode_drain( self, ) -> None: From de24eb9fbd08b87b99f400e4edbbffbac40dd902 Mon Sep 17 00:00:00 2001 From: Matthew Berman <748450+mberman84@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:58:45 -0700 Subject: [PATCH 16/16] Pin clients to cumulative-recovery runtime --- README.md | 4 ++-- adapters/claude-code/.mcp.json | 2 +- adapters/cursor/.cursor/mcp.json | 2 +- examples/github-workflow.yml | 2 +- tests/test_skill.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a0ee8ec..3cd737c 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@12c6c03aa76a553fa4068279baa29e90a30bbeb1' deploybot init ``` @@ -98,7 +98,7 @@ worker free. It can still dispatch deployment when GitHub suppresses the release commit: ```yaml -- uses: Forward-Future/DeployBot@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6 +- uses: Forward-Future/DeployBot@12c6c03aa76a553fa4068279baa29e90a30bbeb1 ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index c667b45..375d6e7 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@12c6c03aa76a553fa4068279baa29e90a30bbeb1", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index c667b45..375d6e7 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@12c6c03aa76a553fa4068279baa29e90a30bbeb1", "deploybot-mcp" ] } diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index 35c8193..062ee42 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -78,7 +78,7 @@ jobs: ref: ${{ github.event.repository.default_branch }} persist-credentials: false # v0.2.25 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@28b98aa7724a1ca4c8b1f7d589d39d658bf914f6 + - uses: Forward-Future/DeployBot@12c6c03aa76a553fa4068279baa29e90a30bbeb1 with: # PR and review events reconcile immediately. Release-owner events # advance to the configured admission gate; "merged" observations diff --git a/tests/test_skill.py b/tests/test_skill.py index b628ee3..f573fb6 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "28b98aa7724a1ca4c8b1f7d589d39d658bf914f6" +RELEASE_COMMIT = "12c6c03aa76a553fa4068279baa29e90a30bbeb1" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0"