Forward-Future · mberman84 · Jun 22, 2026
diff --git a/docs/reference.md b/docs/reference.md
@@ -26,7 +26,7 @@ DeployBot resolves the pull request for the current branch.
 | `deploybot status [--json]` | Read active thread metadata, unbound open PRs, pending native notifications, PR stages, exact-head deploy intent, pre-queue intent overlaps, request-stage timing alerts, queue state, exact-main CI, deployment, and pipeline control state. |
 | `deploybot plan [--json]` | Read the ordered queue, dependencies, blockers, and source-overlap groups. |
 | `deploybot inspect [PR] [--json]` | Evaluate one exact PR head without granting merge authority. |
-| `deploybot metrics [--limit N] [--json]` | Summarize p50, p95, and maximum delivery timings for recent merged PRs. The default limit is 25. |
+| `deploybot metrics [--limit N] [--json]` | Summarize p50, p95, and maximum delivery timings for recent merged PRs, each stage's attainment against the configured speed targets, and the first-pass merge rate (deliveries that merged without a recorded repair handoff). The default limit is 25. |
 
 `status`, `plan`, `inspect`, `doctor`, and `metrics` are read-only. In
 particular, do not use `freeze` as a status command because it writes a durable

diff --git a/src/agent_merge_queue/cli.py b/src/agent_merge_queue/cli.py
@@ -5869,12 +5869,15 @@ def delivery_metrics(client: GitHub, *, limit: int) -> dict[str, Any]:
     comments_by_number = client.comments_for_pull_requests(
         int(pull["number"]) for pull in pulls
     )
+    repair_logins = coordinator_logins(client) | set(client.trusted_logins)
 
     def sample(pull: dict[str, Any]) -> dict[str, Any]:
         number = int(pull["number"])
         comments = comments_by_number.get(number, [])
         intent = latest_intent(comments, client.trusted_logins)
         marker = queue_marker_for_client(client, comments)
+        repair = latest_payload(comments, REPAIR_MARKER, repair_logins)
+        repaired = bool(repair) and not repair_marker_is_transitional(repair)
         merged_at = str(pull.get("merged_at") or "") or None
         merge_sha = str(pull.get("merge_commit_sha") or "")
         live_at: str | None = None
@@ -5897,6 +5900,7 @@ def sample(pull: dict[str, Any]) -> dict[str, Any]:
             "queued_at": queued_at,
             "merged_at": merged_at,
             "live_at": live_at,
+            "repaired": repaired,
             "request_to_queue_seconds": seconds_between(requested_at, queued_at),
             "queue_to_merge_seconds": seconds_between(queued_at, merged_at),
             "merge_to_live_seconds": seconds_between(merged_at, live_at),
@@ -5910,7 +5914,27 @@ def sample(pull: dict[str, Any]) -> dict[str, Any]:
             max_workers=min(client.config.pipeline.promotion_workers, len(pulls))
         ) as executor:
             samples = list(executor.map(sample, pulls))
-    return summarize_metrics(samples)
+    # Compare each stage against the operator's own speed targets so the
+    # historical view answers "are PRs going out the door quickly enough?",
+    # mirroring how `status` applies these same targets to live work.
+    ready_target = client.config.pipeline.ready_to_merge_target_minutes * 60
+    live_target = client.config.pipeline.merge_to_live_target_minutes * 60
+    targets = {
+        "queue_to_merge_seconds": ready_target,
+        "merge_to_live_seconds": live_target,
+        "request_to_live_seconds": ready_target + live_target,
+    }
+    summary = summarize_metrics(samples, targets=targets)
+    repaired_merges = sum(1 for value in samples if value.get("repaired"))
+    summary["reliability"] = {
+        "merged_samples": len(samples),
+        "first_pass_merges": len(samples) - repaired_merges,
+        "repaired_merges": repaired_merges,
+        "first_pass_rate": (
+            (len(samples) - repaired_merges) / len(samples) if samples else None
+        ),
+    }
+    return summary
 
 
 def print_doctor(rows: list[dict[str, Any]], *, json_output: bool) -> None:
@@ -6164,12 +6188,32 @@ def main(argv: list[str] | None = None) -> int:
                 print(json.dumps(value, indent=2, sort_keys=True))
             else:
                 print(f"delivery samples: {value['sample_count']}")
+                reliability = value.get("reliability") or {}
+                if reliability.get("merged_samples"):
+                    rate = reliability.get("first_pass_rate")
+                    rate_text = f"{rate * 100:.0f}%" if rate is not None else "n/a"
+                    print(
+                        "first-pass merges: "
+                        f"{reliability['first_pass_merges']}/"
+                        f"{reliability['merged_samples']} ({rate_text})"
+                    )
                 for name, summary in value.items():
-                    if name.endswith("_seconds"):
-                        print(
-                            f"{name}: p50={summary['p50']}s "
-                            f"p95={summary['p95']}s max={summary['max']}s"
+                    if not name.endswith("_seconds"):
+                        continue
+                    line = (
+                        f"{name}: p50={summary['p50']}s "
+                        f"p95={summary['p95']}s max={summary['max']}s"
+                    )
+                    if summary.get("target_seconds") is not None:
+                        within = summary.get("within_target_rate")
+                        within_text = (
+                            f"{within * 100:.0f}%" if within is not None else "n/a"
+                        )
+                        line += (
+                            f" target={summary['target_seconds']}s "
+                            f"within={within_text}"
                         )
+                    print(line)
         elif arguments.command == "block":
             command_block(client, arguments.pr, arguments.reason)
         elif arguments.command == "unblock":

diff --git a/src/agent_merge_queue/pipeline.py b/src/agent_merge_queue/pipeline.py
@@ -183,24 +183,38 @@ def percentile(values: Iterable[float], fraction: float) -> float | None:
     return ordered[index]
 
 
-def summarize_metrics(samples: list[dict[str, Any]]) -> dict[str, Any]:
+def summarize_metrics(
+    samples: list[dict[str, Any]],
+    *,
+    targets: dict[str, float] | None = None,
+) -> dict[str, Any]:
     stages = (
         "request_to_queue_seconds",
         "queue_to_merge_seconds",
         "merge_to_live_seconds",
         "request_to_live_seconds",
     )
+    targets = targets or {}
     summary: dict[str, Any] = {"sample_count": len(samples), "samples": samples}
     for stage in stages:
         values = [
             float(sample[stage]) for sample in samples if sample.get(stage) is not None
         ]
-        summary[stage] = {
+        stage_summary: dict[str, Any] = {
             "count": len(values),
             "p50": percentile(values, 0.50),
             "p95": percentile(values, 0.95),
             "max": max(values) if values else None,
         }
+        target = targets.get(stage)
+        if target is not None:
+            within = sum(1 for value in values if value <= target)
+            stage_summary["target_seconds"] = target
+            stage_summary["within_target"] = within
+            stage_summary["within_target_rate"] = (
+                within / len(values) if values else None
+            )
+        summary[stage] = stage_summary
     return summary
 
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -3902,6 +3902,109 @@ def test_metrics_skip_deployments_that_finished_before_the_merge(self) -> None:
             datetime(2026, 6, 20, 1, tzinfo=timezone.utc),
         )
 
+    def test_metrics_report_target_attainment_and_first_pass_rate(self) -> None:
+        clean_merge = "c" * 40
+        repaired_merge = "f" * 40
+        clean_deploy = "d" * 40
+        repaired_deploy = "e" * 40
+        head_42 = "a" * 40
+        head_43 = "b" * 40
+        client = Mock()
+        client.config = CONFIG
+        client.repository = "example/repo"
+        client.trusted_logins = {"trusted"}
+        client.coordinator_logins = {"trusted"}
+        client.successful_workflow_runs.return_value = [
+            {
+                "name": "Deploy",
+                "conclusion": "success",
+                "head_sha": clean_deploy,
+                "created_at": "2026-06-20T00:09:00Z",
+                "updated_at": "2026-06-20T00:09:00Z",
+            },
+            {
+                "name": "Deploy",
+                "conclusion": "success",
+                "head_sha": repaired_deploy,
+                "created_at": "2026-06-20T00:25:00Z",
+                "updated_at": "2026-06-20T00:25:00Z",
+            },
+        ]
+        client.recent_merged_pull_requests.return_value = [
+            {
+                "number": 42,
+                "merged_at": "2026-06-20T00:05:00Z",
+                "merge_commit_sha": clean_merge,
+            },
+            {
+                "number": 43,
+                "merged_at": "2026-06-20T00:05:00Z",
+                "merge_commit_sha": repaired_merge,
+            },
+        ]
+
+        def comment(login: str, body: str, created_at: str) -> dict[str, Any]:
+            return {"user": {"login": login}, "body": body, "created_at": created_at}
+
+        def intent_comment(head: str) -> dict[str, Any]:
+            return comment(
+                "trusted",
+                intent_body(
+                    intent_id=f"intent-{head[:1]}",
+                    state="requested",
+                    requested_at="2026-06-20T00:00:00Z",
+                    requested_head=head,
+                ),
+                "2026-06-20T00:00:00Z",
+            )
+
+        def queue_comment(head: str) -> dict[str, Any]:
+            return comment(
+                "trusted",
+                queue_state_body("queued", head, queued_at="2026-06-20T00:01:00Z"),
+                "2026-06-20T00:01:00Z",
+            )
+
+        client.comments_for_pull_requests.return_value = {
+            42: [intent_comment(head_42), queue_comment(head_42)],
+            43: [
+                intent_comment(head_43),
+                queue_comment(head_43),
+                comment(
+                    "trusted",
+                    repair_body({"head_sha": head_43, "reason": "CI failed"}),
+                    "2026-06-20T00:02:00Z",
+                ),
+            ],
+        }
+
+        def is_ancestor(merge_sha: str, deployed_sha: str) -> bool:
+            return (merge_sha, deployed_sha) in {
+                (clean_merge, clean_deploy),
+                (repaired_merge, repaired_deploy),
+            }
+
+        client.is_ancestor.side_effect = is_ancestor
+
+        result = delivery_metrics(client, limit=25)
+
+        self.assertEqual(result["sample_count"], 2)
+        self.assertEqual(result["reliability"]["first_pass_merges"], 1)
+        self.assertEqual(result["reliability"]["repaired_merges"], 1)
+        self.assertEqual(result["reliability"]["first_pass_rate"], 0.5)
+        merge_to_live = result["merge_to_live_seconds"]
+        self.assertEqual(merge_to_live["target_seconds"], 600)
+        self.assertEqual(merge_to_live["within_target"], 1)
+        self.assertEqual(merge_to_live["within_target_rate"], 0.5)
+        request_to_live = result["request_to_live_seconds"]
+        self.assertEqual(request_to_live["target_seconds"], 1500)
+        self.assertEqual(request_to_live["within_target"], 2)
+        self.assertNotIn("target_seconds", result["request_to_queue_seconds"])
+        repaired = {
+            value["pull_request"]: value["repaired"] for value in result["samples"]
+        }
+        self.assertEqual(repaired, {42: False, 43: True})
+
     def test_integration_seal_round_trips_through_git_commit(self) -> None:
         parent_sha = "p" * 40
         seal_sha = "s" * 40

diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
@@ -306,6 +306,28 @@ def test_metrics_report_p50_and_p95(self) -> None:
         self.assertEqual(summary["request_to_queue_seconds"]["p50"], 1)
         self.assertEqual(summary["request_to_queue_seconds"]["p95"], 3)
 
+    def test_metrics_report_target_attainment(self) -> None:
+        summary = summarize_metrics(
+            [
+                {"merge_to_live_seconds": 100},
+                {"merge_to_live_seconds": 700},
+            ],
+            targets={"merge_to_live_seconds": 600},
+        )
+        stage = summary["merge_to_live_seconds"]
+        self.assertEqual(stage["target_seconds"], 600)
+        self.assertEqual(stage["within_target"], 1)
+        self.assertEqual(stage["within_target_rate"], 0.5)
+
+    def test_metrics_omit_attainment_without_targets(self) -> None:
+        summary = summarize_metrics([{"merge_to_live_seconds": 100}])
+        self.assertNotIn("target_seconds", summary["merge_to_live_seconds"])
+        self.assertIsNone(
+            summarize_metrics([], targets={"merge_to_live_seconds": 600})[
+                "merge_to_live_seconds"
+            ]["within_target_rate"]
+        )
+
     def test_webhook_failure_never_blocks_delivery_state(self) -> None:
         config = parse_config(
             {