Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .mergequeue.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ merge_to_live_target_minutes = 10
auto_promote = true
intent_scope = "head" # safest: a trusted source agent refreshes replacement heads
pause_on_failure = true
# verified (default, safest) holds new merges until the cumulative release is
# live. ci-passed admits the next batch as soon as exact-main CI passes and lets
# deploy and health checks keep following, trading blast radius for throughput.
release_admission = "verified"
# Receives best-effort events, including retryable thread-deployed messages.
# webhook_url_env = "DEPLOYBOT_WEBHOOK_URL"

Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ A larger indivisible source-overlap or dependency closure is the sole exception:
it ships alone, never mixed with unrelated work.
After any merge, admission stays closed until the cumulative exact-main release
is verified live, preventing newer merges from starving an older deployment.
Set `pipeline.release_admission = "ci-passed"` to reopen admission as soon as
exact-main CI is green—deploy and health checks keep following in the
background—when higher merge throughput is worth a larger failure blast radius.
Draft status and incomplete
checks or reviews remain waiting states; they do not create a repair latch. A
conflict, failed gate, unresolved review, manual block, or stale authorized head
Expand Down Expand Up @@ -262,6 +265,7 @@ merge_to_live_target_minutes = 10
auto_promote = true
intent_scope = "head"
pause_on_failure = true
release_admission = "verified" # or "ci-passed" for higher merge throughput

[[pipeline.verifications]]
name = "Login"
Expand Down
3 changes: 2 additions & 1 deletion docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ Provider fields are:
| `ci_failure_grace_seconds` | Non-negative window for an exact-main CI retry to replace a failed attempt before the release fails. Default: 90. |
| `promotion_workers` | Positive maximum number of deploy requests promoted concurrently. Default: 4. |
| `repair_hold_minutes` | Positive maximum time that a genuine repair may hold overlapping ready work without becoming merge-eligible. Default: 60. |
| `hold_merges_while_releasing` | Default `true`; after a merge, admit no newer batch until the cumulative exact-main revision is verified live. |
| `hold_merges_while_releasing` | Default `true`; after a merge, admit no newer batch until the release reaches the `release_admission` gate. |
| `release_admission` | How far an in-flight release must progress before the next batch is admitted; allowed: `verified` (default, safest) waits for the cumulative exact-main revision to be live, `ci-passed` reopens admission once exact-main CI is green while deploy and health checks keep following in the background. `ci-passed` trades a larger failure blast radius for throughput, and verification and notifications for a release may be emitted by a later reaction rather than the merging one. |
| `repair_branch_prefix` | Deterministic release-repair lease branch prefix; default `"deploybot/repair"`. |
| `ready_to_merge_target_minutes` | Positive request-to-ready and queued-to-merge timing target; default 15. |
| `merge_to_live_target_minutes` | Positive timing target; default 10. |
Expand Down
30 changes: 26 additions & 4 deletions src/agent_merge_queue/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5116,12 +5116,14 @@ def command_follow(
poll_seconds: int,
json_output: bool,
emit: bool = True,
admit_gate: str = "verified",
) -> dict[str, Any]:
try:
result = follow_release(
client,
timeout_seconds=timeout_seconds,
poll_seconds=poll_seconds,
admit_gate=admit_gate,
)
except QueueError as error:
if client.config.pipeline.pause_on_failure:
Expand Down Expand Up @@ -5537,7 +5539,24 @@ def command_react(
release_is_verified = release_before_merge["state"] == "verified"
if release_is_verified:
client.record_verified_main(current_main_sha)
if has_release_owner and not release_is_verified and not recovering_current_main:
# In "verified" mode the next batch waits for the cumulative release to
# be fully live. In "ci-passed" mode admission reopens as soon as
# exact-main CI is green (deploy in flight), so merges stop waiting on
# deploy and health-check time.
admission_gate = client.config.pipeline.release_admission
admitted_states = {"verified"}
if admission_gate == "ci-passed":
admitted_states |= {"awaiting-deploy", "deploying"}
release_admitted = (
release_already_verified
or release_is_verified
or release_before_merge.get("state") in admitted_states
)
if (
has_release_owner
and not release_admitted
and not recovering_current_main
):
release = release_before_merge
if follow:
release = command_follow(
Expand All @@ -5546,8 +5565,9 @@ def command_react(
poll_seconds=10,
json_output=False,
emit=False,
admit_gate=admission_gate,
)
if release.get("state") != "verified":
if release.get("state") not in admitted_states:
result = {
"state": "release-held",
"release": release,
Expand All @@ -5561,8 +5581,9 @@ def command_react(
}
print(json.dumps(result, indent=2, sort_keys=True))
return result
release_completed_before_merge = True
release_before_batch = release
if release.get("state") == "verified":
release_completed_before_merge = True
release_before_batch = release

def own_integration_checks(
numbers: Iterable[int] | None = None,
Expand Down Expand Up @@ -5770,6 +5791,7 @@ def own_integration_checks(
poll_seconds=10,
json_output=False,
emit=False,
admit_gate=client.config.pipeline.release_admission,
)
result = {
"state": "complete",
Expand Down
14 changes: 14 additions & 0 deletions src/agent_merge_queue/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class PipelineConfig:
auto_promote: bool
intent_scope: str
pause_on_failure: bool
release_admission: str
webhook_url_env: str | None
verifications: tuple[VerificationConfig, ...]

Expand Down Expand Up @@ -91,6 +92,7 @@ class QueueConfig:
ALLOWED_MERGE_METHODS = {"merge", "squash", "rebase"}
ALLOWED_INTEGRATION_MODES = {"manual", "overlap", "all"}
ALLOWED_INTENT_SCOPES = {"head"}
ALLOWED_RELEASE_ADMISSION = {"verified", "ci-passed"}
DEFAULT_CONFIG = """\
[queue]
base_branch = "main"
Expand Down Expand Up @@ -136,6 +138,11 @@ class QueueConfig:
auto_promote = true
intent_scope = "head"
pause_on_failure = true
# verified (safest): hold new merges until the cumulative release is live.
# ci-passed: admit the next batch once exact-main CI passes; deploy and health
# checks keep following in the background, trading a larger failure blast radius
# for higher merge throughput.
release_admission = "verified"
# Receives best-effort events, including retryable thread-deployed messages.
# webhook_url_env = "DEPLOYBOT_WEBHOOK_URL"

Expand Down Expand Up @@ -417,6 +424,12 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig:
if intent_scope not in ALLOWED_INTENT_SCOPES:
allowed = ", ".join(sorted(ALLOWED_INTENT_SCOPES))
raise ConfigError(f"pipeline.intent_scope must be one of: {allowed}")
release_admission = _require_string(
pipeline.get("release_admission"), "pipeline.release_admission", "verified"
)
if release_admission not in ALLOWED_RELEASE_ADMISSION:
allowed = ", ".join(sorted(ALLOWED_RELEASE_ADMISSION))
raise ConfigError(f"pipeline.release_admission must be one of: {allowed}")

return QueueConfig(
base_branch=_require_string(
Expand Down Expand Up @@ -530,6 +543,7 @@ def parse_config(payload: dict[str, Any]) -> QueueConfig:
"pipeline.pause_on_failure",
True,
),
release_admission=release_admission,
webhook_url_env=webhook_url_env,
verifications=_verifications(pipeline.get("verifications")),
),
Expand Down
14 changes: 14 additions & 0 deletions src/agent_merge_queue/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def follow_release(
*,
timeout_seconds: int,
poll_seconds: int,
admit_gate: str = "verified",
) -> dict[str, Any]:
deadline = time.monotonic() + timeout_seconds
observed_sha = ""
Expand Down Expand Up @@ -262,6 +263,19 @@ def follow_release(
client.dispatch_deploy_workflows(ci_run=ci)
)
dispatched_for.add(key)
if admit_gate == "ci-passed" and value["state"] in {
"awaiting-deploy",
"deploying",
}:
# Exact-main CI already passed and the deployment is in flight. Hand
# control back so the next batch can merge; a later reaction (or the
# scheduled reconciliation) follows this deployment through to
# verification, records the watermark, and emits notifications.
return {
**value,
"dispatched_deployments": dispatched_deployments,
"verifications": [],
}
if value["state"] == "verified":
checks = http_verifications(client.config.pipeline)
last_verifications = checks
Expand Down
85 changes: 85 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,90 @@ def test_reactor_requires_configured_health_before_reopening_admission(self) ->
self.assertEqual(result["release"]["state"], "verify-failed")
promote.assert_not_called()

def test_reactor_holds_at_awaiting_deploy_by_default(self) -> None:
sha = "a" * 40
client = Mock()
client.config = CONFIG
client.pipeline_control.return_value = {"state": "running"}
client.base_sha.return_value = sha
client.verified_main_sha.return_value = None
client.workflow_runs.return_value = [
{
"id": 1,
"name": "CI",
"head_sha": sha,
"status": "completed",
"conclusion": "success",
}
]
with (
patch(
"agent_merge_queue.cli.reconcile_externally_merged_threads",
return_value=[],
),
patch(
"agent_merge_queue.cli.command_follow",
return_value={"state": "awaiting-deploy", "main_sha": sha},
) as follow,
patch("agent_merge_queue.cli.command_promote") as promote,
redirect_stdout(io.StringIO()),
):
result = command_react(client, follow=True, timeout_seconds=10)

self.assertEqual(result["state"], "release-held")
promote.assert_not_called()
self.assertEqual(
follow.call_args.kwargs["admit_gate"], "verified"
)

def test_reactor_admits_at_ci_passed_when_configured(self) -> None:
sha = "a" * 40
config = parse_config(
{
"queue": {
"required_checks": ["CI"],
"trusted_actors": ["trusted"],
},
"pipeline": {"release_admission": "ci-passed"},
}
)
client = Mock()
client.config = config
client.pipeline_control.return_value = {"state": "running"}
client.base_sha.return_value = sha
client.verified_main_sha.return_value = None
client.workflow_runs.return_value = [
{
"id": 1,
"name": "CI",
"head_sha": sha,
"status": "completed",
"conclusion": "success",
}
]
frozen = FreezeResult(None, [], [], [], [])
with (
patch(
"agent_merge_queue.cli.reconcile_externally_merged_threads",
return_value=[],
),
patch("agent_merge_queue.cli.settle_integration_checks", return_value=[]),
patch("agent_merge_queue.cli.promote_integrations", return_value=[]),
patch(
"agent_merge_queue.cli.command_promote",
return_value={"promoted": [], "waiting": [], "blocked": []},
) as promote,
patch("agent_merge_queue.cli.freeze_queue", return_value=frozen),
redirect_stdout(io.StringIO()),
):
result = command_react(client, follow=False, timeout_seconds=10)

# CI is green on the in-flight release, so admission reopens instead of
# waiting for the deploy to finish.
self.assertNotEqual(result.get("state"), "release-held")
promote.assert_called_once()
client.record_verified_main.assert_not_called()

def test_release_repair_claim_creates_one_deterministic_lease(self) -> None:
sha = "a" * 40
client = object.__new__(GitHub)
Expand Down Expand Up @@ -5566,6 +5650,7 @@ def test_reactor_follows_release_without_a_new_merge(self) -> None:
poll_seconds=10,
json_output=False,
emit=False,
admit_gate="verified",
)
self.assertEqual(result["release"], release)

Expand Down
13 changes: 13 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def test_init_creates_a_loadable_safe_policy(self) -> None:

self.assertEqual(path.name, ".mergequeue.toml")
self.assertEqual(config.required_checks, ("CI",))
self.assertEqual(config.pipeline.release_admission, "verified")
self.assertEqual(
config.trusted_actors,
("@repository-owner",),
Expand Down Expand Up @@ -188,6 +189,7 @@ def test_parses_pipeline_integration_and_health_policy(self) -> None:
"promotion_workers": 3,
"repair_hold_minutes": 90,
"hold_merges_while_releasing": False,
"release_admission": "ci-passed",
"repair_branch_prefix": "repairs/main",
"auto_promote": False,
"verifications": [
Expand All @@ -212,6 +214,7 @@ def test_parses_pipeline_integration_and_health_policy(self) -> None:
self.assertEqual(config.pipeline.promotion_workers, 3)
self.assertEqual(config.pipeline.repair_hold_minutes, 90)
self.assertFalse(config.pipeline.hold_merges_while_releasing)
self.assertEqual(config.pipeline.release_admission, "ci-passed")
self.assertEqual(config.pipeline.repair_branch_prefix, "repairs/main")
self.assertFalse(config.pipeline.auto_promote)
self.assertEqual(config.pipeline.verifications[0].expected_status, 200)
Expand Down Expand Up @@ -293,6 +296,16 @@ def test_rejects_invalid_integration_mode_and_boolean(self) -> None:
"pipeline": {"intent_scope": "pull-request"},
}
)
with self.assertRaisesRegex(ConfigError, "release_admission"):
parse_config(
{
"queue": {
"required_checks": ["CI"],
"trusted_actors": ["trusted"],
},
"pipeline": {"release_admission": "deployed"},
}
)
with self.assertRaisesRegex(ConfigError, "batch_settle_seconds"):
parse_config(
{
Expand Down
1 change: 1 addition & 0 deletions tests/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def test_reference_names_every_policy_field(self) -> None:
"auto_promote",
"intent_scope",
"pause_on_failure",
"release_admission",
"webhook_url_env",
"verifications",
"name",
Expand Down
36 changes: 36 additions & 0 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,42 @@ def test_follow_dispatches_deploy_after_token_dispatched_ci(self) -> None:
self.assertEqual(result["state"], "verified")
self.assertEqual(result["dispatched_deployments"][0]["id"], 9)

def test_follow_admits_at_ci_passed_without_waiting_for_deploy(self) -> None:
sha = "a" * 40
ci = {
"id": 1,
"name": "CI",
"head_sha": sha,
"status": "completed",
"conclusion": "success",
"event": "workflow_dispatch",
"created_at": "2026-06-20T00:00:00Z",
}
client = Mock()
client.config = CONFIG
client.base_sha.return_value = sha
client.workflow_runs.return_value = [ci]
client.dispatch_deploy_workflows.return_value = [
{"id": 9, "name": "Deploy", "ci_sha": sha, "ci_run_id": 1}
]
with (
patch("agent_merge_queue.pipeline.time.sleep") as sleep,
patch("agent_merge_queue.pipeline.time.monotonic", return_value=0),
):
result = follow_release(
client,
timeout_seconds=10,
poll_seconds=1,
admit_gate="ci-passed",
)

# CI is green, so admission returns immediately even though the deploy is
# only just dispatched and never verified within this call.
client.dispatch_deploy_workflows.assert_called_once()
self.assertEqual(result["state"], "awaiting-deploy")
self.assertEqual(result["dispatched_deployments"][0]["id"], 9)
sleep.assert_not_called()

def test_follow_absorbs_a_ci_rerun_during_failure_grace(self) -> None:
sha = "a" * 40
failed = {
Expand Down