diff --git a/.mergequeue.example.toml b/.mergequeue.example.toml index 1985104..5389065 100644 --- a/.mergequeue.example.toml +++ b/.mergequeue.example.toml @@ -44,6 +44,7 @@ registry_title = "DeployBot delivery registry" thread_active_hours = 72 ci_workflows = ["CI"] deploy_workflows = ["Deploy"] +batch_settle_seconds = 0 repair_hold_minutes = 60 hold_merges_while_releasing = true repair_branch_prefix = "deploybot/repair" @@ -52,10 +53,10 @@ merge_to_live_target_minutes = 10 auto_promote = true intent_scope = "head" # safest: a trusted source agent refreshes replacement heads pause_on_failure = true -# verified (default, safest) holds new merges until the cumulative release is -# live. ci-passed admits the next batch as soon as exact-main CI passes and lets -# deploy and health checks keep following, trading blast radius for throughput. -release_admission = "verified" +# merged (default) admits independent ready work immediately after the previous +# merge. ci-passed waits for exact-main CI; verified waits until production is +# live. Every mode pauses future merges when a later release failure is observed. +release_admission = "merged" # Receives best-effort events, including retryable thread-deployed messages. # webhook_url_env = "DEPLOYBOT_WEBHOOK_URL" diff --git a/README.md b/README.md index 99851d3..3cd737c 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,11 @@ integration PRs, follows `main` through production, and pauses after failures. ## Install -Install the reviewed `v0.2.24` source commit directly from GitHub: +Install the reviewed `v0.2.25` source commit directly from GitHub: ```bash python3 -m pip install \ - 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@73004ea7c9dcb81e7f1281c0687aea0897d1571d' + 'deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@12c6c03aa76a553fa4068279baa29e90a30bbeb1' deploybot init ``` @@ -90,12 +90,15 @@ completions, and completed external check suites. Keep its `workflows` list aligned with `pipeline.ci_workflows`. A five-minute scheduled reconciliation rereads all durable state in case GitHub concurrency coalesces the last pending event in a burst. The privileged worker never checks out or executes -pull-request code. The Action follows releases by default so the same serialized -worker can dispatch deployment when GitHub suppresses the `workflow_run` event -for token-dispatched CI. Pin the Action to the full reviewed release commit: +pull-request code. The Action advances releases to the configured admission +gate. In the default `merged` mode it returns after each healthy observation, +leaving completion to later release events and keeping the serialized merge +worker free. It can still dispatch deployment when GitHub suppresses the +`workflow_run` event for token-dispatched CI. Pin the Action to the full reviewed +release commit: ```yaml -- uses: Forward-Future/DeployBot@73004ea7c9dcb81e7f1281c0687aea0897d1571d +- uses: Forward-Future/DeployBot@12c6c03aa76a553fa4068279baa29e90a30bbeb1 ``` The Action uses GitHub's built-in workflow token. GitHub intentionally does not @@ -141,6 +144,13 @@ workflow name, base branch, head SHA, event, status, and conclusion to match the expected successful exact-main CI run. The deployment must still pull the current base branch and stop if it no longer equals `ci_sha`. +The deployment workflow must also acquire the repository's shared deployment +lock, fetch the base branch again after acquiring it, and coalesce superseded +requests onto that newest integrated SHA. It must never deploy an older SHA after +a newer one, and it keeps the lock through production health verification. These +release rules are unchanged by `release_admission = "merged"`; only merge +admission becomes asynchronous. + The workflow bot and each person allowed to request deployment must be explicitly listed: @@ -172,11 +182,11 @@ work, and creates integration PRs when configured. New batches contain at most `integration.max_batch_size` entries; later FIFO work remains in the next batch. A larger indivisible source-overlap or dependency closure is the sole exception: it ships alone, never mixed with unrelated work. -After any merge, admission stays closed until the cumulative exact-main release -is verified live, preventing newer merges from starving an older deployment. -Set `pipeline.release_admission = "ci-passed"` to reopen admission as soon as -exact-main CI is green—deploy and health checks keep following in the -background—when higher merge throughput is worth a larger failure blast radius. +By default, `pipeline.release_admission = "merged"`: after one healthy merge, +DeployBot immediately admits the next independent ready PR or batch. Exact-main +CI, deployment, and health checks keep tracking asynchronously, and a later real +failure pauses future merges. Use `ci-passed` to wait for exact-main CI before +admitting more work, or `verified` to wait until the cumulative revision is live. Draft status and incomplete checks or reviews remain waiting states; they do not create a repair latch. A conflict, failed gate, unresolved review, manual block, or stale authorized head @@ -269,7 +279,7 @@ ending the PR-opening-thread response. [pipeline] ci_workflows = ["CI"] deploy_workflows = ["Deploy"] -batch_settle_seconds = 15 +batch_settle_seconds = 0 ci_failure_grace_seconds = 90 promotion_workers = 4 hold_merges_while_releasing = true @@ -279,7 +289,7 @@ merge_to_live_target_minutes = 10 auto_promote = true intent_scope = "head" pause_on_failure = true -release_admission = "verified" # or "ci-passed" for higher merge throughput +release_admission = "merged" # or "ci-passed" / "verified" for stricter admission [[pipeline.verifications]] name = "Login" diff --git a/action.yml b/action.yml index 70341cf..8546666 100644 --- a/action.yml +++ b/action.yml @@ -6,7 +6,7 @@ inputs: required: false default: .mergequeue.toml follow: - description: Follow cumulative main through CI and deployment + description: Advance cumulative main to the configured release-admission gate required: false default: "true" dispatch_ci: diff --git a/adapters/claude-code/.claude-plugin/plugin.json b/adapters/claude-code/.claude-plugin/plugin.json index 3197f1a..76794d5 100644 --- a/adapters/claude-code/.claude-plugin/plugin.json +++ b/adapters/claude-code/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "deploybot", - "version": "0.2.24", + "version": "0.2.25", "description": "DeployBot: a provider-neutral GitHub merge queue for coding agents", "author": { "name": "DeployBot contributors" diff --git a/adapters/claude-code/.mcp.json b/adapters/claude-code/.mcp.json index dff1ee2..375d6e7 100644 --- a/adapters/claude-code/.mcp.json +++ b/adapters/claude-code/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@73004ea7c9dcb81e7f1281c0687aea0897d1571d", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@12c6c03aa76a553fa4068279baa29e90a30bbeb1", "deploybot-mcp" ] } diff --git a/adapters/claude-code/skills/deploybot/SKILL.md b/adapters/claude-code/skills/deploybot/SKILL.md index 4045725..707674c 100644 --- a/adapters/claude-code/skills/deploybot/SKILL.md +++ b/adapters/claude-code/skills/deploybot/SKILL.md @@ -87,8 +87,10 @@ Merge independent ready pull requests back-to-back. Route source-overlap groups through `create_integration_pull_request`; when policy mode is `all`, validate the entire frozen batch through that cumulative PR. Never invent a conflict resolution. Return the repair packet to its source thread, then call `resume` -after its new exact head passes. Finish with `follow_release`, following newer -cumulative base heads until CI, deployment, and configured health checks verify. +after its new exact head passes. Keep release tracking event-driven: in +`release_admission = "merged"` mode, admit independent ready work immediately +after a healthy merge while later events continue CI, deployment, and health +tracking. Scheduled reconciliation is a fallback, not the normal promotion path. Genuine repair blocks may hold overlapping ready work for the configured bounded repair window, but they remain merge-ineligible until the trusted source agent @@ -123,9 +125,10 @@ named thread already owns that failed SHA; wait for that repair and never create a competing PR. The owner is encoded in the atomic branch ref, so a registry write failure is recovered by calling the same tool again. -New batches are FIFO-bounded by `integration.max_batch_size`, and a merged batch -closes admission until its cumulative main revision is verified live. Do not -override either boundary for later work. Never execute merged PR code inside +New batches are FIFO-bounded by `integration.max_batch_size`. Honor the configured +release-admission gate: `merged` permits the next independent batch immediately, +while `ci-passed` and `verified` impose stricter release fences. A later observed +release failure pauses future merges in every mode. Never execute merged PR code inside the privileged coordinator; generated-artifact conflicts go to the elected repair owner for a normal reviewed rebuild. When PR-authored checks are required, use a GitHub App installation token, list its bot login in diff --git a/adapters/claude-code/skills/manage-merge-queue/SKILL.md b/adapters/claude-code/skills/manage-merge-queue/SKILL.md index d377d79..16745ec 100644 --- a/adapters/claude-code/skills/manage-merge-queue/SKILL.md +++ b/adapters/claude-code/skills/manage-merge-queue/SKILL.md @@ -26,8 +26,9 @@ poll or merge an unlabeled PR. Use `pipeline_status` and `react_to_delivery_event` for bursts. Skip blockers, honor dependencies, route overlap or cumulative validation through `create_integration_pull_request`, return repair packets to the source thread, -and use `resume_pull_request` after fresh review. Finish with `follow_release`; -a failed CI or deployment pauses the pipeline until verified recovery. +and use `resume_pull_request` after fresh review. In `release_admission = +"merged"` mode, admit independent ready work immediately after merge while +later events track CI and deployment; a later failure pauses the pipeline. A genuine repair remains merge-ineligible, but DeployBot may temporarily hold overlapping ready work for the configured bounded repair window so concurrent @@ -35,8 +36,8 @@ merges do not repeatedly invalidate the replacement head. Before creating an exact-main recovery, call `claim_release_repair`; only the returned `owned` thread may use the deterministic repair branch. Respect the -maximum batch size and keep new merges closed while an earlier release is -unfinished. +maximum batch size and the selected `merged`, `ci-passed`, or `verified` +release-admission fence. Immediately before asking the user to `unpause` or take another repair action, call `pipeline_status` again. Never show a stale pause prompt when durable state diff --git a/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json b/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json index d0ec9d3..156bb62 100644 --- a/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json +++ b/adapters/codex/agent-merge-queue/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "deploybot", - "version": "0.2.24", + "version": "0.2.25", "description": "Coordinate exact-head pull requests through verified deployment and thread notification", "author": { "name": "DeployBot contributors" diff --git a/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md b/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md index b85760c..055a843 100644 --- a/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md +++ b/adapters/codex/agent-merge-queue/skills/deploybot/SKILL.md @@ -81,9 +81,10 @@ Merge independent ready pull requests back-to-back. Route source-overlap groups through `deploybot integrate`; when policy mode is `all`, validate the entire frozen batch through that cumulative PR. Never invent a conflict resolution. Return the repair packet to its source thread, then run `deploybot resume` after -its new exact head passes. Finish with `deploybot follow --json`, following -newer cumulative base heads until CI, deployment, and configured health checks -verify. +its new exact head passes. Keep release tracking event-driven: in +`release_admission = "merged"` mode, admit independent ready work immediately +after a healthy merge while later events continue CI, deployment, and health +tracking. Scheduled reconciliation is a fallback, not the normal promotion path. Genuine repair blocks may hold overlapping ready work for the configured bounded repair window, but they remain merge-ineligible until the trusted source agent @@ -118,9 +119,10 @@ named thread already owns that failed SHA; wait for that repair and never create a competing PR. The owner is encoded in the atomic branch ref, so a registry write failure is recovered by calling the same tool again. -New batches are FIFO-bounded by `integration.max_batch_size`, and a merged batch -closes admission until its cumulative main revision is verified live. Do not -override either boundary for later work. Never execute merged PR code inside +New batches are FIFO-bounded by `integration.max_batch_size`. Honor the configured +release-admission gate: `merged` permits the next independent batch immediately, +while `ci-passed` and `verified` impose stricter release fences. A later observed +release failure pauses future merges in every mode. Never execute merged PR code inside the privileged coordinator; generated-artifact conflicts go to the elected repair owner for a normal reviewed rebuild. When PR-authored checks are required, use a GitHub App installation token, list its bot login in diff --git a/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md b/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md index 46860b7..48b56b5 100644 --- a/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md +++ b/adapters/codex/agent-merge-queue/skills/manage-merge-queue/SKILL.md @@ -27,8 +27,9 @@ Run `deploybot status --json` before a burst and `deploybot react` to coordinate it. Merge independent ready PRs back-to-back, skip blocked work, honor explicit dependencies, and use `deploybot integrate` for overlaps or a cumulative batch gate. Return repair packets to their source thread and run `deploybot resume` -after fresh review. Finish with `deploybot follow --json`; a failed CI or -deployment pauses the pipeline until verified recovery. +after fresh review. In `release_admission = "merged"` mode, admit independent +ready work immediately after merge while later events track CI and deployment; +a later failure pauses the pipeline. A genuine repair remains merge-ineligible, but DeployBot may temporarily hold overlapping ready work for the configured bounded repair window so concurrent @@ -36,8 +37,8 @@ merges do not repeatedly invalidate the replacement head. Before creating an exact-main recovery, run `deploybot claim-release-repair`; only the returned `owned` thread may use the deterministic repair branch. Respect the -maximum batch size and keep new merges closed while an earlier release is -unfinished. +maximum batch size and the selected `merged`, `ci-passed`, or `verified` +release-admission fence. Immediately before asking the user to `unpause` or take another repair action, run `deploybot status --json` again. Never show a stale pause prompt when diff --git a/adapters/cursor/.cursor/mcp.json b/adapters/cursor/.cursor/mcp.json index dff1ee2..375d6e7 100644 --- a/adapters/cursor/.cursor/mcp.json +++ b/adapters/cursor/.cursor/mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@73004ea7c9dcb81e7f1281c0687aea0897d1571d", + "deploybot-merge-queue[mcp] @ git+https://github.com/Forward-Future/DeployBot.git@12c6c03aa76a553fa4068279baa29e90a30bbeb1", "deploybot-mcp" ] } diff --git a/adapters/cursor/.cursor/rules/deploybot.mdc b/adapters/cursor/.cursor/rules/deploybot.mdc index b6a76da..b0e6174 100644 --- a/adapters/cursor/.cursor/rules/deploybot.mdc +++ b/adapters/cursor/.cursor/rules/deploybot.mdc @@ -15,6 +15,10 @@ the stable Cursor thread ID, never prompts or transcripts. Refresh intent only after replacement-head review. Only the coordinator may react, integrate, drain, follow, pause, or resume repaired work. +Honor `pipeline.release_admission`. In `merged` mode, admit the next independent +ready PR immediately after a healthy merge while release events continue CI, +deployment, and health tracking. A later failure pauses future merges normally. + Immediately before asking the user to unpause or take repair action, call `pipeline_status` again and suppress the request if durable state is already running or the release advanced. The original deploy instruction authorizes the diff --git a/adapters/cursor/.cursor/rules/manage-merge-queue.mdc b/adapters/cursor/.cursor/rules/manage-merge-queue.mdc index 79e56f9..a76f3af 100644 --- a/adapters/cursor/.cursor/rules/manage-merge-queue.mdc +++ b/adapters/cursor/.cursor/rules/manage-merge-queue.mdc @@ -13,4 +13,6 @@ wakes GitHub and promotes only after fresh exact-head gates; never poll or merge an unlabeled PR. Use `pipeline_status` and `react_to_delivery_event`, skip blockers, honor dependencies, route overlaps or cumulative validation through one integration PR, return repair packets to their source thread, atomically -resume after fresh review, and follow cumulative `main` through deployment. +resume after fresh review, and follow cumulative `main` through deployment. In +`release_admission = "merged"` mode, admit independent ready work immediately +after merge while later events continue release tracking. diff --git a/adapters/cursor/AGENTS.md b/adapters/cursor/AGENTS.md index 2c7dbf8..a3e98dc 100644 --- a/adapters/cursor/AGENTS.md +++ b/adapters/cursor/AGENTS.md @@ -27,7 +27,9 @@ branch is itself a merge and is forbidden outside DeployBot. Never poll, merge an unlabeled PR, or absorb unrelated work. Let the event worker promote fresh exact heads, use one integration PR for overlaps or cumulative validation, return repair packets to the source thread, atomically resume after -fresh review, and follow cumulative `main` through verified deployment. +fresh review, and follow cumulative `main` through verified deployment. When +`release_admission = "merged"`, admit independent ready work immediately after +merge while release events continue asynchronously; later failures still pause. For each verified `thread_notifications` entry, post its message back to the native PR-opening thread and only then call `acknowledge_thread_deployment`. Leave diff --git a/docs/reference.md b/docs/reference.md index ece430c..77c739a 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -1,7 +1,7 @@ # DeployBot reference This reference describes the CLI, MCP server, policy file, and GitHub Action in -DeployBot v0.2.24. GitHub labels and authenticated comments are the durable state; +DeployBot v0.2.25. GitHub labels and authenticated comments are the durable state; the CLI and MCP tools are two interfaces to the same operations. ## CLI @@ -166,7 +166,7 @@ Provider fields are: | `thread_active_hours` | Positive integer; default 72. Notification obligations and pending messages use their own non-expiring outbox. | | `ci_workflows` | Workflow names followed as exact-main CI. Default: `["CI"]`. | | `deploy_workflows` | Deployment workflow names. Default: `["Deploy"]`. | -| `batch_settle_seconds` | Non-negative window for coalescing near-ready deploy requests before freezing a batch. Default: 15. | +| `batch_settle_seconds` | Non-negative window for coalescing near-ready deploy requests before freezing a batch. Default: 0, so event-driven ready work freezes immediately; scheduled reconciliation is only a fallback. | | `ci_failure_grace_seconds` | Non-negative window for an exact-main CI retry to replace a failed attempt before the release fails. Default: 90. | | `promotion_workers` | Positive maximum number of deploy requests promoted concurrently. Default: 4. | | `repair_hold_minutes` | Positive maximum time that a genuine repair may hold overlapping ready work without becoming merge-eligible. Default: 60. | @@ -174,7 +174,7 @@ Provider fields are: | integration repair packet | Includes `source_pull_requests` and the complete `source_heads` map so the elected owner can verify every frozen source before resuming the cumulative PR. | | suppressed integration PR run | Integration `pull_request` runs, including `action_required` zero-job placeholders, are not exact CI evidence. DeployBot uses its own exact-branch `workflow_dispatch` run, whose real failures still fail closed. | | `hold_merges_while_releasing` | Default `true`; after a merge, admit no newer batch until the release reaches the `release_admission` gate. | -| `release_admission` | How far an in-flight release must progress before the next batch is admitted; allowed: `verified` (default, safest) waits for the cumulative exact-main revision to be live, `ci-passed` reopens admission once exact-main CI is green while deploy and health checks keep following in the background. `ci-passed` trades a larger failure blast radius for throughput, and verification and notifications for a release may be emitted by a later reaction rather than the merging one. | +| `release_admission` | How far an in-flight release must progress before the next independent batch is admitted. `merged` (default) reopens admission immediately after merge while CI, deployment, and health tracking continue asynchronously. `ci-passed` waits for exact-main CI; `verified` waits until the cumulative revision is live. A later observed CI, deployment, or health failure pauses future merges in every mode. | | `repair_branch_prefix` | Deterministic release-repair lease branch prefix; default `"deploybot/repair"`. | | `ready_to_merge_target_minutes` | Positive request-to-ready and queued-to-merge timing target; default 15. | | `merge_to_live_target_minutes` | Positive timing target; default 10. | diff --git a/examples/github-workflow.yml b/examples/github-workflow.yml index e1a6423..062ee42 100644 --- a/examples/github-workflow.yml +++ b/examples/github-workflow.yml @@ -77,10 +77,11 @@ jobs: with: ref: ${{ github.event.repository.default_branch }} persist-credentials: false - # v0.2.24 implementation; keep the full commit for privileged workflows. - - uses: Forward-Future/DeployBot@73004ea7c9dcb81e7f1281c0687aea0897d1571d + # v0.2.25 implementation; keep the full commit for privileged workflows. + - uses: Forward-Future/DeployBot@12c6c03aa76a553fa4068279baa29e90a30bbeb1 with: - # PR and review events reconcile quickly. Only release-owner events - # stay attached to cumulative main through CI and deployment. + # PR and review events reconcile immediately. Release-owner events + # advance to the configured admission gate; "merged" observations + # return quickly, while the schedule remains a fallback reconciliation. follow: ${{ github.event_name == 'workflow_run' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} timeout: ${{ (github.event_name == 'workflow_run' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && '2400' || '600' }} diff --git a/pyproject.toml b/pyproject.toml index f4e76ec..7b1c493 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "deploybot-merge-queue" -version = "0.2.24" +version = "0.2.25" description = "DeployBot: a provider-neutral GitHub merge queue for coding agents" readme = "README.md" license = "MIT" diff --git a/skills/deploybot/SKILL.md b/skills/deploybot/SKILL.md index 4045725..707674c 100644 --- a/skills/deploybot/SKILL.md +++ b/skills/deploybot/SKILL.md @@ -87,8 +87,10 @@ Merge independent ready pull requests back-to-back. Route source-overlap groups through `create_integration_pull_request`; when policy mode is `all`, validate the entire frozen batch through that cumulative PR. Never invent a conflict resolution. Return the repair packet to its source thread, then call `resume` -after its new exact head passes. Finish with `follow_release`, following newer -cumulative base heads until CI, deployment, and configured health checks verify. +after its new exact head passes. Keep release tracking event-driven: in +`release_admission = "merged"` mode, admit independent ready work immediately +after a healthy merge while later events continue CI, deployment, and health +tracking. Scheduled reconciliation is a fallback, not the normal promotion path. Genuine repair blocks may hold overlapping ready work for the configured bounded repair window, but they remain merge-ineligible until the trusted source agent @@ -123,9 +125,10 @@ named thread already owns that failed SHA; wait for that repair and never create a competing PR. The owner is encoded in the atomic branch ref, so a registry write failure is recovered by calling the same tool again. -New batches are FIFO-bounded by `integration.max_batch_size`, and a merged batch -closes admission until its cumulative main revision is verified live. Do not -override either boundary for later work. Never execute merged PR code inside +New batches are FIFO-bounded by `integration.max_batch_size`. Honor the configured +release-admission gate: `merged` permits the next independent batch immediately, +while `ci-passed` and `verified` impose stricter release fences. A later observed +release failure pauses future merges in every mode. Never execute merged PR code inside the privileged coordinator; generated-artifact conflicts go to the elected repair owner for a normal reviewed rebuild. When PR-authored checks are required, use a GitHub App installation token, list its bot login in diff --git a/skills/manage-merge-queue/SKILL.md b/skills/manage-merge-queue/SKILL.md index 6e06ac0..bea7814 100644 --- a/skills/manage-merge-queue/SKILL.md +++ b/skills/manage-merge-queue/SKILL.md @@ -54,12 +54,14 @@ A genuine repair remains merge-ineligible, but DeployBot may temporarily hold overlapping ready work for the configured bounded repair window so concurrent merges do not repeatedly invalidate the replacement head. -Finish with `follow_release`, tracking newer cumulative base heads through CI, -deployment, and configured health checks. A failure pauses further merges until +Track newer cumulative base heads through CI, deployment, and configured health +checks from release events. With `release_admission = "merged"`, immediately +admit the next independent ready batch after merge instead of occupying the merge +worker while production catches up. A later failure pauses further merges until the coordinator verifies recovery and unpauses. Before creating that recovery, call `claim_release_repair`; only the returned `owned` thread may use the -deterministic repair branch. Respect the configured maximum batch size and keep -new merges closed while an earlier exact-main release is unfinished. Record +deterministic repair branch. Respect the configured maximum batch size and the +selected `merged`, `ci-passed`, or `verified` release-admission fence. Record exact heads, review verdicts, merged commits, waiting items, repair packets, integration groups, and delivery timing. diff --git a/src/agent_merge_queue/__init__.py b/src/agent_merge_queue/__init__.py index 52ab33d..a1c3a9a 100644 --- a/src/agent_merge_queue/__init__.py +++ b/src/agent_merge_queue/__init__.py @@ -1,3 +1,3 @@ """DeployBot: a provider-neutral GitHub merge queue for coding agents.""" -__version__ = "0.2.24" +__version__ = "0.2.25" diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py index 9a570ad..6ae8219 100755 --- a/src/agent_merge_queue/cli.py +++ b/src/agent_merge_queue/cli.py @@ -15,7 +15,7 @@ import sys import time from concurrent.futures import ThreadPoolExecutor -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, replace from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any, Callable, Iterable @@ -28,6 +28,7 @@ follow_release, http_verifications, notify, + release_admitted, release_state, seconds_between, summarize_metrics, @@ -5092,9 +5093,13 @@ def command_drain( key = tuple(int(value) for value in group["pull_requests"]) integration_by_members[key] = group next_batch = list(batch_result["next_batch"]) - if batch_result["merged"]: - # One reaction owns one bounded release batch. Leave the FIFO - # remainder for the event that runs after exact-main verification. + if ( + batch_result["merged"] + and client.config.pipeline.release_admission != "merged" + ): + # Stricter admission modes own one bounded release batch at a time. + # Minimum-latency mode keeps draining independent FIFO batches in + # this same event, before CI or deployment begins. break if not next_batch: break @@ -5614,6 +5619,147 @@ def release_follow_needed(client: GitHub) -> bool: return True +def superseded_release_failure( + client: GitHub, + *, + current_main_sha: str, + verified_main_sha: str | None, + workflow_runs: list[dict[str, Any]], + recovered_main_sha: str | None, + timeout_seconds: int, + poll_seconds: int = 10, +) -> dict[str, Any] | None: + """Find a failed admitted release that a newer merge moved past.""" + records = client.thread_records(include_terminal=True) + if not isinstance(records, list): + records = [] + candidates = { + str(record.get("merge_sha") or "") + for record in records + if record.get("phase") == "merged" and record.get("merge_sha") + } + release_workflows = { + *client.config.pipeline.ci_workflows, + *client.config.pipeline.deploy_workflows, + } + candidates.update( + str(run.get("head_sha") or "") + for run in workflow_runs + if run.get("head_sha") + and str(run.get("name") or "") in release_workflows + and str(run.get("event") or "") + in {"push", "workflow_dispatch", "workflow_run", "schedule"} + and ( + not run.get("head_branch") + or str(run.get("head_branch")) == client.config.base_branch + ) + ) + states: dict[str, dict[str, Any]] = {} + for main_sha in candidates: + if main_sha in {current_main_sha, recovered_main_sha}: + continue + if not client.is_ancestor(main_sha, current_main_sha): + continue + if verified_main_sha and client.is_ancestor(main_sha, verified_main_sha): + continue + states[main_sha] = release_state( + main_sha=main_sha, + runs=workflow_runs, + config=client.config.pipeline, + ) + + verified_candidates = [ + main_sha for main_sha, value in states.items() if value["state"] == "verified" + ] + newest_verified = [ + main_sha + for main_sha in verified_candidates + if not any( + main_sha != other and client.is_ancestor(main_sha, other) + for other in verified_candidates + ) + ] + effective_verified_sha: str | None = None + if newest_verified: + effective_verified_sha = max( + newest_verified, + key=lambda main_sha: str( + (states[main_sha].get("latest_deploy") or {}).get("created_at") or "" + ), + ) + + class FixedBaseReleaseClient: + def __init__(self, wrapped: GitHub, base_sha: str) -> None: + self._wrapped = wrapped + self._base_sha = base_sha + self.config = replace( + wrapped.config, + pipeline=replace( + wrapped.config.pipeline, + pause_on_failure=False, + ), + ) + + def base_sha(self) -> str: + return self._base_sha + + def __getattr__(self, name: str) -> Any: + return getattr(self._wrapped, name) + + finalized = command_follow( + FixedBaseReleaseClient(client, effective_verified_sha), + timeout_seconds=timeout_seconds, + poll_seconds=poll_seconds, + json_output=False, + emit=False, + admit_gate="verified", + ) + if finalized.get("state") != "verified": + return finalized + + def failure_time(value: dict[str, Any]) -> str: + run = value.get("latest_deploy") or value.get("latest_ci") or {} + return str(run.get("updated_at") or run.get("created_at") or "") + + for main_sha, value in sorted( + states.items(), key=lambda item: failure_time(item[1]), reverse=True + ): + if effective_verified_sha and client.is_ancestor( + main_sha, effective_verified_sha + ): + continue + if value["state"] == "deploy-failed": + if str((value.get("latest_deploy") or {}).get("conclusion") or "") == ( + "cancelled" + ): + # A newer cumulative main normally cancels a superseded deploy. + # That is release coalescing, not production failure evidence. + continue + return value + if value["state"] != "ci-failed": + continue + if str((value.get("latest_ci") or {}).get("conclusion") or "") == ( + "cancelled" + ): + # Main moving again can cancel obsolete exact-main CI. The newest + # cumulative revision owns the release instead. + continue + grace = client.config.pipeline.ci_failure_grace_seconds + latest_ci = value.get("latest_ci") or {} + failed_at = parse_time( + str(latest_ci.get("updated_at") or latest_ci.get("created_at") or "") + ) + now = parse_time(utc_now()) + if ( + not grace + or failed_at is None + or now is None + or (now - failed_at).total_seconds() >= grace + ): + return value + return None + + def should_settle_batch(client: GitHub, entries: list[QueueEntry]) -> bool: has_ready = any( client.config.queue_label in entry.labels and entry.state == "ready" @@ -5779,12 +5925,13 @@ def command_react( and bool(control.get("recovered_main_sha")) and control.get("recovered_main_sha") == current_main_sha ) + raw_watermark = client.verified_main_sha() + admission_gate = client.config.pipeline.release_admission release_before_merge = release_state( main_sha=current_main_sha, runs=workflow_runs, config=client.config.pipeline, ) - raw_watermark = client.verified_main_sha() release_already_verified = raw_watermark == current_main_sha has_release_owner = ( not release_already_verified @@ -5829,12 +5976,83 @@ def command_react( # durable merged obligation owns it; historical runs for older # SHAs cannot make an unobservable release finish. client.record_verified_main(current_main_sha) + if ( + admission_gate == "merged" + and release_before_merge.get("state") != "verified" + and (raw_watermark is not None or has_release_owner) + ): + failed_release = superseded_release_failure( + client, + current_main_sha=current_main_sha, + verified_main_sha=( + str(raw_watermark) if isinstance(raw_watermark, str) else None + ), + workflow_runs=workflow_runs, + recovered_main_sha=( + str(control.get("recovered_main_sha") or "") or None + ), + timeout_seconds=timeout_seconds, + ) + if failed_release is not None: + if client.config.pipeline.pause_on_failure: + client.set_pipeline_control( + "paused", + f"{failed_release['state']} on {failed_release['main_sha']}", + main_sha=str(failed_release["main_sha"]), + ) + result = { + "state": "release-held", + "release": failed_release, + "promoted": {}, + "promoted_integrations": [], + "drain": {}, + "dispatched_ci": [], + "integrations": [], + "integration_checks": [], + "reconciled_merges": reconciled_merges, + } + print(json.dumps(result, indent=2, sort_keys=True)) + return result + if admission_gate == "merged" and release_before_merge.get("state") == ( + "awaiting-deploy" + ): + # Token-dispatched CI may not emit its workflow_run handoff. Start + # the exact authorized deployment before another merge can + # supersede this SHA; the deployment's own freshness check then + # safely coalesces it if main advances before the lock is acquired. + release_before_merge = command_follow( + client, + timeout_seconds=timeout_seconds, + poll_seconds=10, + json_output=False, + emit=False, + admit_gate=admission_gate, + ) + finalized_release = False + if ( + admission_gate == "merged" + and not release_already_verified + and release_before_merge.get("state") == "verified" + ): + # Complete durable watermark, thread-state, and notification + # bookkeeping before a new merge changes base_sha. Otherwise a busy + # merged-mode queue can keep overtaking already-live receipts. + release_before_merge = command_follow( + client, + timeout_seconds=timeout_seconds, + poll_seconds=10, + json_output=False, + emit=False, + admit_gate=admission_gate, + ) + finalized_release = release_before_merge.get("state") == "verified" release_is_verified = release_already_verified or ( release_before_merge.get("state") == "verified" ) if ( not release_already_verified and release_is_verified + and admission_gate != "merged" and client.config.pipeline.verifications ): health = http_verifications(client.config.pipeline) @@ -5848,24 +6066,22 @@ def command_react( "verifications": health, } release_is_verified = release_before_merge["state"] == "verified" - if release_is_verified: + if release_is_verified and not finalized_release: client.record_verified_main(current_main_sha) - # In "verified" mode the next batch waits for the cumulative release to - # be fully live. In "ci-passed" mode admission reopens as soon as - # exact-main CI is green (deploy in flight), so merges stop waiting on - # deploy and health-check time. - admission_gate = client.config.pipeline.release_admission - admitted_states = {"verified"} - if admission_gate == "ci-passed": - admitted_states |= {"awaiting-deploy", "deploying"} - release_admitted = ( + # Release admission is independent from release tracking. "merged" + # reopens immediately for healthy in-flight releases, "ci-passed" + # waits for exact-main CI, and "verified" waits until production is + # live. Every mode still stops on an observed release failure. + is_release_admitted = ( release_already_verified or release_is_verified - or release_before_merge.get("state") in admitted_states + or release_admitted( + str(release_before_merge.get("state") or ""), admission_gate + ) ) if ( has_release_owner - and not release_admitted + and not is_release_admitted and not recovering_current_main ): release = release_before_merge @@ -5878,7 +6094,9 @@ def command_react( emit=False, admit_gate=admission_gate, ) - if release.get("state") not in admitted_states: + if not release_admitted( + str(release.get("state") or ""), admission_gate + ): result = { "state": "release-held", "release": release, @@ -6528,6 +6746,7 @@ def main(argv: list[str] | None = None) -> int: timeout_seconds=arguments.timeout, poll_seconds=arguments.poll, json_output=arguments.json_output, + admit_gate=client.config.pipeline.release_admission, ) elif arguments.command == "pause": command_control(client, state="paused", reason=arguments.reason) diff --git a/src/agent_merge_queue/config.py b/src/agent_merge_queue/config.py index 168cbdd..f0860f9 100644 --- a/src/agent_merge_queue/config.py +++ b/src/agent_merge_queue/config.py @@ -92,7 +92,7 @@ class QueueConfig: ALLOWED_MERGE_METHODS = {"merge", "squash", "rebase"} ALLOWED_INTEGRATION_MODES = {"manual", "overlap", "all"} ALLOWED_INTENT_SCOPES = {"head"} -ALLOWED_RELEASE_ADMISSION = {"verified", "ci-passed"} +ALLOWED_RELEASE_ADMISSION = {"verified", "ci-passed", "merged"} DEFAULT_CONFIG = """\ [queue] base_branch = "main" @@ -127,7 +127,7 @@ class QueueConfig: thread_active_hours = 72 ci_workflows = ["CI"] deploy_workflows = ["Deploy"] -batch_settle_seconds = 15 +batch_settle_seconds = 0 ci_failure_grace_seconds = 90 promotion_workers = 4 repair_hold_minutes = 60 @@ -138,11 +138,11 @@ class QueueConfig: auto_promote = true intent_scope = "head" pause_on_failure = true -# verified (safest): hold new merges until the cumulative release is live. -# ci-passed: admit the next batch once exact-main CI passes; deploy and health -# checks keep following in the background, trading a larger failure blast radius -# for higher merge throughput. -release_admission = "verified" +# merged (default): admit independent ready work immediately after the previous +# merge while CI, deployment, and health checks continue asynchronously. +# ci-passed: wait for exact-main CI; verified: wait until the release is live. +# Any later release failure pauses future merges in every mode. +release_admission = "merged" # Receives best-effort events, including retryable thread-deployed messages. # webhook_url_env = "DEPLOYBOT_WEBHOOK_URL" @@ -425,7 +425,7 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig: allowed = ", ".join(sorted(ALLOWED_INTENT_SCOPES)) raise ConfigError(f"pipeline.intent_scope must be one of: {allowed}") release_admission = _require_string( - pipeline.get("release_admission"), "pipeline.release_admission", "verified" + pipeline.get("release_admission"), "pipeline.release_admission", "merged" ) if release_admission not in ALLOWED_RELEASE_ADMISSION: allowed = ", ".join(sorted(ALLOWED_RELEASE_ADMISSION)) @@ -497,7 +497,7 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig: batch_settle_seconds=_non_negative_int( pipeline.get("batch_settle_seconds"), "pipeline.batch_settle_seconds", - 15, + 0, ), ci_failure_grace_seconds=_non_negative_int( pipeline.get("ci_failure_grace_seconds"), diff --git a/src/agent_merge_queue/mcp_server.py b/src/agent_merge_queue/mcp_server.py index 29770c0..9b39e0a 100644 --- a/src/agent_merge_queue/mcp_server.py +++ b/src/agent_merge_queue/mcp_server.py @@ -175,7 +175,7 @@ def follow_release( repository: str | None = None, config: str | None = None, ) -> str: - """Follow newest exact main through CI, deployment, and health verification.""" + """Advance the newest exact-main release to its configured admission gate.""" return _run( "follow", "--timeout", diff --git a/src/agent_merge_queue/pipeline.py b/src/agent_merge_queue/pipeline.py index c466519..4bc713b 100644 --- a/src/agent_merge_queue/pipeline.py +++ b/src/agent_merge_queue/pipeline.py @@ -14,6 +14,16 @@ from .records import parse_time +def release_admitted(state: str, admit_gate: str) -> bool: + """Return whether a healthy release state permits another merge.""" + admitted = {"verified"} + if admit_gate == "ci-passed": + admitted.update({"awaiting-deploy", "deploying"}) + elif admit_gate == "merged": + admitted.update({"testing", "awaiting-deploy", "deploying"}) + return state in admitted + + def workflow_run(run: dict[str, Any]) -> dict[str, Any]: return { "id": run.get("id"), @@ -283,14 +293,12 @@ def follow_release( client.dispatch_deploy_workflows(ci_run=ci) ) dispatched_for.add(key) - if admit_gate == "ci-passed" and value["state"] in { - "awaiting-deploy", - "deploying", - }: - # Exact-main CI already passed and the deployment is in flight. Hand - # control back so the next batch can merge; a later reaction (or the - # scheduled reconciliation) follows this deployment through to - # verification, records the watermark, and emits notifications. + if value["state"] != "verified" and release_admitted( + str(value["state"]), admit_gate + ): + # The configured admission milestone has passed. Hand control back + # so independent ready work can merge; later release events (or the + # scheduled fallback) continue from durable GitHub state. return { **value, "dispatched_deployments": dispatched_deployments, diff --git a/tests/test_cli.py b/tests/test_cli.py index 7c0d6c8..1491fe0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -95,7 +95,8 @@ "required_checks": ["CI"], "dependency_directive": "Queue-after", "trusted_actors": ["trusted"], - } + }, + "pipeline": {"release_admission": "verified"}, } ) @@ -2366,7 +2367,15 @@ def test_reactor_holds_newly_merged_revision_before_ci_is_visible(self) -> None: def test_reactor_seeds_first_install_despite_historical_runs(self) -> None: sha = "a" * 40 client = Mock() - client.config = CONFIG + client.config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) client.pipeline_control.return_value = {"state": "running"} client.base_sha.return_value = sha client.workflow_runs.return_value = [ @@ -2374,13 +2383,16 @@ def test_reactor_seeds_first_install_despite_historical_runs(self) -> None: "id": 99, "name": "CI", "head_sha": "b" * 40, + "head_branch": "main", + "event": "workflow_dispatch", "status": "completed", - "conclusion": "success", + "conclusion": "failure", } ] client.verified_main_sha.return_value = None client.thread_records.return_value = [] client.deployment_notifications.return_value = [] + client.is_ancestor.return_value = True frozen = FreezeResult(None, [], [], [], []) with ( patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), @@ -2558,111 +2570,158 @@ def test_reactor_admits_at_ci_passed_when_configured(self) -> None: promote.assert_called_once() client.record_verified_main.assert_not_called() - def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: + def test_reactor_merges_next_pr_while_previous_release_is_in_ci(self) -> None: sha = "a" * 40 - client = object.__new__(GitHub) - client.config = CONFIG - client.repository = "example/repo" - client.trusted_logins = {"trusted"} - client.coordinator_logins = {"coordinator"} - client.require_actor = Mock(return_value="trusted") - client.base_sha = Mock(return_value=sha) - client.workflow_runs = Mock( - return_value=[ - { - "id": 7, - "name": "CI", - "head_sha": sha, - "status": "completed", - "conclusion": "success", - "updated_at": "2026-06-20T00:01:00Z", - }, - { - "id": 8, - "name": "Deploy", - "head_sha": sha, - "status": "completed", - "conclusion": "failure", - "created_at": "2026-06-20T00:02:00Z", + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], }, - ] - ) - client.registry_comments = Mock(return_value=[]) - client.registry_issue_number = Mock(return_value=42) - client.issue_comment = Mock() - client._json = Mock( - side_effect=[ - {"tree": {"sha": "t" * 40}}, - {"sha": "l" * 40}, - {}, - ] + "pipeline": {"release_admission": "merged"}, + } ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "in_progress", + "conclusion": None, + } + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [2], "waiting": [], "blocked": []}, + ), + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + patch( + "agent_merge_queue.cli.command_drain", + return_value={"merged": [{"number": 2}]}, + ), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) - result = client.claim_release_repair( - provider="codex", - thread_id="thread-1", + self.assertEqual(result["state"], "complete") + self.assertEqual(result["drain"]["merged"], [{"number": 2}]) + + def test_reactor_merged_mode_does_not_wait_for_pending_deployment(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "in_progress", + "conclusion": None, + }, + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ) as promote, + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) - self.assertEqual(result["state"], "owned") - self.assertEqual(result["branch"], f"deploybot/repair/{sha[:12]}") - self.assertEqual(result["run_id"], 8) - client.issue_comment.assert_called_once() + self.assertNotEqual(result.get("state"), "release-held") + promote.assert_called_once() - def test_repair_claim_ignores_unbacked_registry_owner(self) -> None: + def test_reactor_merged_mode_pauses_after_later_deploy_failure(self) -> None: sha = "a" * 40 - owner = { - "branch": f"deploybot/repair/{sha[:12]}", - "main_sha": sha, - "provider": "codex", - "thread_id": "thread-1", - } - client = object.__new__(GitHub) - client.config = CONFIG - client.repository = "example/repo" - client.trusted_logins = {"trusted"} - client.coordinator_logins = {"coordinator"} - client.require_actor = Mock(return_value="trusted") - client.base_sha = Mock(return_value=sha) - client.workflow_runs = Mock( - return_value=[ - { - "id": 7, - "name": "CI", - "head_sha": sha, - "status": "completed", - "conclusion": "failure", - } - ] - ) - client.registry_comments = Mock( - return_value=[ - { - "body": release_repair_body(owner), - "created_at": "2026-06-20T00:00:00Z", - "id": 1, - "user": {"login": "coordinator"}, - } - ] - ) - client.registry_issue_number = Mock(return_value=42) - client.issue_comment = Mock() - client._json = Mock( - side_effect=[ - {"tree": {"sha": "t" * 40}}, - {"sha": "l" * 40}, - {}, - ] + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "completed", + "conclusion": "failure", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=True, timeout_seconds=10) - result = client.claim_release_repair( - provider="claude", thread_id="thread-1" + self.assertEqual(result["state"], "release-held") + client.set_pipeline_control.assert_called_once_with( + "paused", f"deploy-failed on {sha}", main_sha=sha ) + promote.assert_not_called() - self.assertEqual(result["state"], "owned") - self.assertEqual(result["provider"], "claude") - client.registry_comments.assert_not_called() - - def test_release_repair_claim_accepts_failed_health_verification(self) -> None: + def test_reactor_dispatches_pending_release_before_next_merged_mode_pr( + self, + ) -> None: sha = "a" * 40 config = parse_config( { @@ -2670,28 +2729,580 @@ def test_release_repair_claim_accepts_failed_health_verification(self) -> None: "required_checks": ["CI"], "trusted_actors": ["trusted"], }, - "pipeline": { - "verifications": [ - {"name": "Login", "url": "https://example.test/login"} - ] - }, + "pipeline": {"release_admission": "merged"}, } ) - client = object.__new__(GitHub) + client = Mock() client.config = config - client.repository = "example/repo" - client.trusted_logins = {"trusted"} - client.coordinator_logins = {"coordinator"} - client.require_actor = Mock(return_value="trusted") - client.base_sha = Mock(return_value=sha) - client.workflow_runs = Mock( - return_value=[ - { - "id": 7, - "name": "CI", - "head_sha": sha, - "status": "completed", - "conclusion": "success", + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + "event": "workflow_dispatch", + } + ] + client.dispatch_deploy_workflows.return_value = [ + {"id": 9, "name": "Deploy", "ci_sha": sha, "ci_run_id": 1} + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [2], "waiting": [], "blocked": []}, + ), + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + patch( + "agent_merge_queue.cli.command_drain", + return_value={"merged": [{"number": 2}]}, + ), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["drain"]["merged"], [{"number": 2}]) + client.dispatch_deploy_workflows.assert_called_once() + + def test_reactor_pauses_for_failed_superseded_merged_mode_release(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = None + client.thread_records.return_value = [ + {"phase": "merged", "merge_sha": old, "pull_request": 1} + ] + client.is_ancestor.return_value = True + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": old, + "status": "completed", + "conclusion": "failure", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "release-held") + self.assertEqual(result["release"]["main_sha"], old) + client.set_pipeline_control.assert_called_once_with( + "paused", f"deploy-failed on {old}", main_sha=old + ) + promote.assert_not_called() + + def test_reactor_tracks_superseded_failure_without_thread_metadata(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = "f" * 40 + client.thread_records.return_value = [] + client.is_ancestor.side_effect = lambda _left, right: right == current + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "failure", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "release-held") + self.assertEqual(result["release"]["main_sha"], old) + client.set_pipeline_control.assert_called_once_with( + "paused", f"deploy-failed on {old}", main_sha=old + ) + promote.assert_not_called() + + def test_reactor_ignores_expected_superseded_ci_cancellation(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = "f" * 40 + client.thread_records.return_value = [] + client.deployment_notifications.return_value = [] + client.is_ancestor.side_effect = lambda _left, right: right == current + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "cancelled", + } + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ) as promote, + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "complete") + client.set_pipeline_control.assert_not_called() + promote.assert_called_once() + + def test_reactor_verifies_superseded_deployment_health(self) -> None: + old = "a" * 40 + current = "b" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": { + "release_admission": "merged", + "verifications": [ + {"name": "Login", "url": "https://example.test/login"} + ], + }, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = "f" * 40 + client.thread_records.return_value = [] + client.is_ancestor.side_effect = lambda _left, right: right == current + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": old, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + }, + ] + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch( + "agent_merge_queue.cli.http_verifications", + return_value=[{"name": "Login", "passed": False}], + ), + patch("agent_merge_queue.cli.time.monotonic", side_effect=[0, 1]), + patch("agent_merge_queue.cli.time.sleep") as sleep, + patch("agent_merge_queue.cli.command_promote") as promote, + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=1) + + self.assertEqual(result["state"], "release-held") + self.assertEqual(result["release"]["state"], "verify-failed") + client.set_pipeline_control.assert_called_once_with( + "paused", f"verify-failed on {old}", main_sha=old + ) + sleep.assert_not_called() + promote.assert_not_called() + + def test_reactor_uses_newer_verified_release_as_recovery_boundary(self) -> None: + failed = "a" * 40 + recovered = "b" * 40 + current = "c" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = current + client.verified_main_sha.return_value = "f" * 40 + client.thread_records.return_value = [] + client.deployment_notifications.return_value = [] + client.is_ancestor.side_effect = lambda left, right: ( + right == current or (left == failed and right == recovered) + ) + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": failed, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + "created_at": "2026-06-20T00:00:00Z", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": failed, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "failure", + "created_at": "2026-06-20T00:01:00Z", + }, + { + "id": 3, + "name": "CI", + "head_sha": recovered, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + "created_at": "2026-06-20T00:02:00Z", + }, + { + "id": 4, + "name": "Deploy", + "head_sha": recovered, + "head_branch": "main", + "event": "workflow_dispatch", + "status": "completed", + "conclusion": "success", + "created_at": "2026-06-20T00:03:00Z", + }, + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch( + "agent_merge_queue.cli.command_follow", + return_value={"state": "verified", "main_sha": recovered}, + ) as follow, + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ) as promote, + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "complete") + self.assertEqual(follow.call_args.args[0].base_sha(), recovered) + client.set_pipeline_control.assert_not_called() + promote.assert_called_once() + + def test_reactor_finalizes_verified_release_before_merged_mode_drain( + self, + ) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + client = Mock() + client.config = config + client.pipeline_control.return_value = {"state": "running"} + client.base_sha.return_value = sha + client.verified_main_sha.return_value = None + client.thread_records.return_value = [ + {"phase": "merged", "merge_sha": sha, "pull_request": 1} + ] + client.is_ancestor.return_value = True + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + ] + frozen = FreezeResult(None, [], [], [], []) + with ( + patch( + "agent_merge_queue.cli.reconcile_externally_merged_threads", + return_value=[], + ), + patch( + "agent_merge_queue.cli.command_follow", + return_value={ + "state": "verified", + "main_sha": sha, + "thread_notifications": [{"notification_id": "receipt"}], + }, + ) as follow, + patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]), + patch("agent_merge_queue.cli.promote_integrations", return_value=[]), + patch( + "agent_merge_queue.cli.command_promote", + return_value={"promoted": [], "waiting": [], "blocked": []}, + ), + patch("agent_merge_queue.cli.freeze_queue", return_value=frozen), + redirect_stdout(io.StringIO()), + ): + result = command_react(client, follow=False, timeout_seconds=10) + + self.assertEqual(result["state"], "complete") + follow.assert_called_once_with( + client, + timeout_seconds=10, + poll_seconds=10, + json_output=False, + emit=False, + admit_gate="merged", + ) + client.record_verified_main.assert_not_called() + + def test_release_repair_claim_creates_one_deterministic_lease(self) -> None: + sha = "a" * 40 + client = object.__new__(GitHub) + client.config = CONFIG + client.repository = "example/repo" + client.trusted_logins = {"trusted"} + client.coordinator_logins = {"coordinator"} + client.require_actor = Mock(return_value="trusted") + client.base_sha = Mock(return_value=sha) + client.workflow_runs = Mock( + return_value=[ + { + "id": 7, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + "updated_at": "2026-06-20T00:01:00Z", + }, + { + "id": 8, + "name": "Deploy", + "head_sha": sha, + "status": "completed", + "conclusion": "failure", + "created_at": "2026-06-20T00:02:00Z", + }, + ] + ) + client.registry_comments = Mock(return_value=[]) + client.registry_issue_number = Mock(return_value=42) + client.issue_comment = Mock() + client._json = Mock( + side_effect=[ + {"tree": {"sha": "t" * 40}}, + {"sha": "l" * 40}, + {}, + ] + ) + + result = client.claim_release_repair( + provider="codex", + thread_id="thread-1", + ) + + self.assertEqual(result["state"], "owned") + self.assertEqual(result["branch"], f"deploybot/repair/{sha[:12]}") + self.assertEqual(result["run_id"], 8) + client.issue_comment.assert_called_once() + + def test_repair_claim_ignores_unbacked_registry_owner(self) -> None: + sha = "a" * 40 + owner = { + "branch": f"deploybot/repair/{sha[:12]}", + "main_sha": sha, + "provider": "codex", + "thread_id": "thread-1", + } + client = object.__new__(GitHub) + client.config = CONFIG + client.repository = "example/repo" + client.trusted_logins = {"trusted"} + client.coordinator_logins = {"coordinator"} + client.require_actor = Mock(return_value="trusted") + client.base_sha = Mock(return_value=sha) + client.workflow_runs = Mock( + return_value=[ + { + "id": 7, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "failure", + } + ] + ) + client.registry_comments = Mock( + return_value=[ + { + "body": release_repair_body(owner), + "created_at": "2026-06-20T00:00:00Z", + "id": 1, + "user": {"login": "coordinator"}, + } + ] + ) + client.registry_issue_number = Mock(return_value=42) + client.issue_comment = Mock() + client._json = Mock( + side_effect=[ + {"tree": {"sha": "t" * 40}}, + {"sha": "l" * 40}, + {}, + ] + ) + + result = client.claim_release_repair( + provider="claude", thread_id="thread-1" + ) + + self.assertEqual(result["state"], "owned") + self.assertEqual(result["provider"], "claude") + client.registry_comments.assert_not_called() + + def test_release_repair_claim_accepts_failed_health_verification(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": { + "verifications": [ + {"name": "Login", "url": "https://example.test/login"} + ] + }, + } + ) + client = object.__new__(GitHub) + client.config = config + client.repository = "example/repo" + client.trusted_logins = {"trusted"} + client.coordinator_logins = {"coordinator"} + client.require_actor = Mock(return_value="trusted") + client.base_sha = Mock(return_value=sha) + client.workflow_runs = Mock( + return_value=[ + { + "id": 7, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", }, { "id": 8, @@ -3291,6 +3902,53 @@ def test_drain_stops_after_one_batch_merges(self) -> None: self.assertEqual(result["merged"][0]["number"], 1) self.assertEqual(result["next_batch"], [2]) + def test_drain_merged_mode_lands_independent_batches_back_to_back(self) -> None: + first_entry = entry(1, "a.py") + later_entry = entry(2, "b.py") + first = FreezeResult( + batch={"batch_id": "first"}, + queue=[first_entry], + blocked_queue=[], + next_batch=[later_entry], + overlap_groups=[], + ) + second = FreezeResult( + batch={"batch_id": "second"}, + queue=[later_entry], + blocked_queue=[], + next_batch=[], + overlap_groups=[], + ) + client = Mock() + client.config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": {"release_admission": "merged"}, + } + ) + with ( + patch( + "agent_merge_queue.cli.freeze_queue", + side_effect=[first, second], + ) as freeze, + patch( + "agent_merge_queue.cli.command_merge", return_value="m" * 40 + ) as merge, + redirect_stdout(io.StringIO()), + ): + result = command_drain(client, json_output=True) + + self.assertEqual(freeze.call_count, 2) + self.assertEqual(merge.call_count, 2) + self.assertEqual(result["batch_ids"], ["first", "second"]) + self.assertEqual( + [value["number"] for value in result["merged"]], + [1, 2], + ) + def test_reenqueue_toggles_label_to_wake_event_coordinator(self) -> None: value = entry(1) old_head = "a" * 40 @@ -6055,6 +6713,7 @@ def test_reactor_does_not_follow_conflicted_all_mode_integration_batch( "trusted_actors": ["trusted"], "coordinator_actors": ["coordinator"], }, + "pipeline": {"release_admission": "verified"}, "integration": {"mode": "all"}, } ) diff --git a/tests/test_config.py b/tests/test_config.py index b24ca8a..0326bae 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -115,7 +115,8 @@ def test_init_creates_a_loadable_safe_policy(self) -> None: self.assertEqual(path.name, ".mergequeue.toml") self.assertEqual(config.required_checks, ("CI",)) - self.assertEqual(config.pipeline.release_admission, "verified") + self.assertEqual(config.pipeline.release_admission, "merged") + self.assertEqual(config.pipeline.batch_settle_seconds, 0) self.assertEqual( config.trusted_actors, ("@repository-owner",), diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 31acf18..9d965aa 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -14,7 +14,10 @@ CONFIG = parse_config( - {"queue": {"required_checks": ["CI"], "trusted_actors": ["trusted"]}} + { + "queue": {"required_checks": ["CI"], "trusted_actors": ["trusted"]}, + "pipeline": {"release_admission": "verified"}, + } ) @@ -277,6 +280,132 @@ def test_follow_admits_at_ci_passed_without_waiting_for_deploy(self) -> None: self.assertEqual(result["dispatched_deployments"][0]["id"], 9) sleep.assert_not_called() + def test_follow_admits_at_merged_while_ci_is_still_running(self) -> None: + sha = "a" * 40 + client = Mock() + client.config = CONFIG + client.base_sha.return_value = sha + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "in_progress", + "conclusion": None, + } + ] + with ( + patch("agent_merge_queue.pipeline.time.sleep") as sleep, + patch("agent_merge_queue.pipeline.time.monotonic", return_value=0), + ): + result = follow_release( + client, + timeout_seconds=10, + poll_seconds=1, + admit_gate="merged", + ) + + self.assertEqual(result["state"], "testing") + sleep.assert_not_called() + + def test_merged_mode_dispatches_only_newest_main_deployment(self) -> None: + old = "a" * 40 + newest = "b" * 40 + old_ci = { + "id": 1, + "name": "CI", + "head_sha": old, + "status": "completed", + "conclusion": "success", + "event": "workflow_dispatch", + } + newest_ci = { + "id": 2, + "name": "CI", + "head_sha": newest, + "status": "completed", + "conclusion": "success", + "event": "workflow_dispatch", + } + client = Mock() + client.config = CONFIG + client.base_sha.return_value = newest + client.workflow_runs.return_value = [old_ci, newest_ci] + client.dispatch_deploy_workflows.return_value = [ + {"id": 9, "name": "Deploy", "ci_sha": newest, "ci_run_id": 2} + ] + + result = follow_release( + client, + timeout_seconds=10, + poll_seconds=1, + admit_gate="merged", + ) + + self.assertEqual(result["main_sha"], newest) + client.dispatch_deploy_workflows.assert_called_once() + dispatched_ci = client.dispatch_deploy_workflows.call_args.kwargs["ci_run"] + self.assertEqual(dispatched_ci["id"], newest_ci["id"]) + self.assertEqual(dispatched_ci["head_sha"], newest) + + def test_merged_mode_retries_health_before_reporting_failure(self) -> None: + sha = "a" * 40 + config = parse_config( + { + "queue": { + "required_checks": ["CI"], + "trusted_actors": ["trusted"], + }, + "pipeline": { + "verifications": [ + {"name": "Login", "url": "https://example.test/login"} + ] + }, + } + ) + client = Mock() + client.config = config + client.base_sha.return_value = sha + client.workflow_runs.return_value = [ + { + "id": 1, + "name": "CI", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + { + "id": 2, + "name": "Deploy", + "head_sha": sha, + "status": "completed", + "conclusion": "success", + }, + ] + with ( + patch( + "agent_merge_queue.pipeline.http_verifications", + side_effect=[ + [{"name": "Login", "passed": False}], + [{"name": "Login", "passed": True}], + ], + ), + patch("agent_merge_queue.pipeline.time.sleep") as sleep, + patch( + "agent_merge_queue.pipeline.time.monotonic", + side_effect=[0, 1, 2], + ), + ): + result = follow_release( + client, + timeout_seconds=10, + poll_seconds=1, + admit_gate="merged", + ) + + self.assertEqual(result["state"], "verified") + sleep.assert_called_once_with(1) + def test_follow_absorbs_a_ci_rerun_during_failure_grace(self) -> None: sha = "a" * 40 failed = { diff --git a/tests/test_skill.py b/tests/test_skill.py index 97ad62e..f573fb6 100644 --- a/tests/test_skill.py +++ b/tests/test_skill.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] CANONICAL = ROOT / "skills" / "deploybot" / "SKILL.md" -RELEASE_COMMIT = "73004ea7c9dcb81e7f1281c0687aea0897d1571d" +RELEASE_COMMIT = "12c6c03aa76a553fa4068279baa29e90a30bbeb1" CHECKOUT_COMMIT = "9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0"