diff --git a/Makefile b/Makefile
index 68d5f781..2b9ae194 100644
--- a/Makefile
+++ b/Makefile
@@ -76,6 +76,9 @@ UPSTREAM_SRCS = $(SERVER_DIR)/upstream_connection.cc $(SERVER_DIR)/pool_partitio
 # Rate limit layer sources
 RATE_LIMIT_SRCS = $(SERVER_DIR)/token_bucket.cc $(SERVER_DIR)/rate_limit_zone.cc $(SERVER_DIR)/rate_limiter.cc
 
+# Circuit breaker layer sources
+CIRCUIT_BREAKER_SRCS = $(SERVER_DIR)/circuit_breaker_window.cc $(SERVER_DIR)/circuit_breaker_slice.cc $(SERVER_DIR)/retry_budget.cc $(SERVER_DIR)/circuit_breaker_host.cc $(SERVER_DIR)/circuit_breaker_manager.cc
+
 # CLI layer sources
 CLI_SRCS = $(SERVER_DIR)/cli_parser.cc $(SERVER_DIR)/signal_handler.cc $(SERVER_DIR)/pid_file.cc $(SERVER_DIR)/daemonizer.cc
 
@@ -122,7 +125,7 @@ NGHTTP2_SRC = $(THIRD_PARTY_DIR)/nghttp2/nghttp2_alpn.c \
 NGHTTP2_OBJ = $(NGHTTP2_SRC:.c=.o)
 
 # Server library sources (shared between test and production binaries)
-LIB_SRCS = $(REACTOR_SRCS) $(NETWORK_SRCS) $(SERVER_SRCS) $(THREAD_POOL_SRCS) $(FOUNDATION_SRCS) $(HTTP_SRCS) $(HTTP2_SRCS) $(WS_SRCS) $(TLS_SRCS) $(UPSTREAM_SRCS) $(RATE_LIMIT_SRCS) $(CLI_SRCS) $(UTIL_SRCS)
+LIB_SRCS = $(REACTOR_SRCS) $(NETWORK_SRCS) $(SERVER_SRCS) $(THREAD_POOL_SRCS) $(FOUNDATION_SRCS) $(HTTP_SRCS) $(HTTP2_SRCS) $(WS_SRCS) $(TLS_SRCS) $(UPSTREAM_SRCS) $(RATE_LIMIT_SRCS) $(CIRCUIT_BREAKER_SRCS) $(CLI_SRCS) $(UTIL_SRCS)
 
 # Test binary sources
 TEST_SRCS = $(LIB_SRCS) $(TEST_DIR)/test_framework.cc $(TEST_DIR)/run_test.cc
@@ -142,11 +145,12 @@ WS_HEADERS = $(LIB_DIR)/ws/websocket_connection.h $(LIB_DIR)/ws/websocket_frame.
 TLS_HEADERS = $(LIB_DIR)/tls/tls_context.h $(LIB_DIR)/tls/tls_connection.h $(LIB_DIR)/tls/tls_client_context.h
 UPSTREAM_HEADERS = $(LIB_DIR)/upstream/upstream_manager.h $(LIB_DIR)/upstream/upstream_host_pool.h $(LIB_DIR)/upstream/pool_partition.h $(LIB_DIR)/upstream/upstream_connection.h $(LIB_DIR)/upstream/upstream_lease.h $(LIB_DIR)/upstream/upstream_http_codec.h $(LIB_DIR)/upstream/http_request_serializer.h $(LIB_DIR)/upstream/header_rewriter.h $(LIB_DIR)/upstream/retry_policy.h $(LIB_DIR)/upstream/proxy_transaction.h $(LIB_DIR)/upstream/proxy_handler.h $(LIB_DIR)/upstream/upstream_response.h $(LIB_DIR)/upstream/upstream_callbacks.h
 RATE_LIMIT_HEADERS = $(LIB_DIR)/rate_limit/token_bucket.h $(LIB_DIR)/rate_limit/rate_limit_zone.h $(LIB_DIR)/rate_limit/rate_limiter.h
+CIRCUIT_BREAKER_HEADERS = $(LIB_DIR)/circuit_breaker/circuit_breaker_state.h $(LIB_DIR)/circuit_breaker/circuit_breaker_window.h $(LIB_DIR)/circuit_breaker/circuit_breaker_slice.h $(LIB_DIR)/circuit_breaker/retry_budget.h $(LIB_DIR)/circuit_breaker/circuit_breaker_host.h $(LIB_DIR)/circuit_breaker/circuit_breaker_manager.h
 CLI_HEADERS = $(LIB_DIR)/cli/cli_parser.h $(LIB_DIR)/cli/signal_handler.h $(LIB_DIR)/cli/pid_file.h $(LIB_DIR)/cli/version.h $(LIB_DIR)/cli/daemonizer.h
-TEST_HEADERS = $(TEST_DIR)/test_framework.h $(TEST_DIR)/http_test_client.h $(TEST_DIR)/basic_test.h $(TEST_DIR)/stress_test.h $(TEST_DIR)/race_condition_test.h $(TEST_DIR)/timeout_test.h $(TEST_DIR)/config_test.h $(TEST_DIR)/http_test.h $(TEST_DIR)/websocket_test.h $(TEST_DIR)/tls_test.h $(TEST_DIR)/cli_test.h $(TEST_DIR)/http2_test.h $(TEST_DIR)/route_test.h $(TEST_DIR)/upstream_pool_test.h $(TEST_DIR)/proxy_test.h
+TEST_HEADERS = $(TEST_DIR)/test_framework.h $(TEST_DIR)/http_test_client.h $(TEST_DIR)/basic_test.h $(TEST_DIR)/stress_test.h $(TEST_DIR)/race_condition_test.h $(TEST_DIR)/timeout_test.h $(TEST_DIR)/config_test.h $(TEST_DIR)/http_test.h $(TEST_DIR)/websocket_test.h $(TEST_DIR)/tls_test.h $(TEST_DIR)/cli_test.h $(TEST_DIR)/http2_test.h $(TEST_DIR)/route_test.h $(TEST_DIR)/upstream_pool_test.h $(TEST_DIR)/proxy_test.h $(TEST_DIR)/rate_limit_test.h $(TEST_DIR)/kqueue_test.h $(TEST_DIR)/circuit_breaker_test.h $(TEST_DIR)/circuit_breaker_components_test.h $(TEST_DIR)/circuit_breaker_integration_test.h $(TEST_DIR)/circuit_breaker_retry_budget_test.h $(TEST_DIR)/circuit_breaker_wait_queue_drain_test.h $(TEST_DIR)/circuit_breaker_observability_test.h $(TEST_DIR)/circuit_breaker_reload_test.h
 
 # All headers combined
-HEADERS = $(CORE_HEADERS) $(CALLBACK_HEADERS) $(REACTOR_HEADERS) $(NETWORK_HEADERS) $(SERVER_HEADERS) $(THREAD_POOL_HEADERS) $(UTIL_HEADERS) $(FOUNDATION_HEADERS) $(HTTP_HEADERS) $(HTTP2_HEADERS) $(WS_HEADERS) $(TLS_HEADERS) $(UPSTREAM_HEADERS) $(RATE_LIMIT_HEADERS) $(CLI_HEADERS) $(TEST_HEADERS)
+HEADERS = $(CORE_HEADERS) $(CALLBACK_HEADERS) $(REACTOR_HEADERS) $(NETWORK_HEADERS) $(SERVER_HEADERS) $(THREAD_POOL_HEADERS) $(UTIL_HEADERS) $(FOUNDATION_HEADERS) $(HTTP_HEADERS) $(HTTP2_HEADERS) $(WS_HEADERS) $(TLS_HEADERS) $(UPSTREAM_HEADERS) $(RATE_LIMIT_HEADERS) $(CIRCUIT_BREAKER_HEADERS) $(CLI_HEADERS) $(TEST_HEADERS)
 
 # Default target
 .DEFAULT_GOAL := all
@@ -238,6 +242,11 @@ test_rate_limit: $(TARGET)
 	@echo "Running rate limit tests only..."
 	./$(TARGET) rate_limit
 
+# Run only circuit breaker tests
+test_circuit_breaker: $(TARGET)
+	@echo "Running circuit breaker tests only..."
+	./$(TARGET) circuit_breaker
+
 # Display help information
 help:
 	@echo "Reactor Server C++ - Makefile Help"
@@ -318,4 +327,4 @@ help:
 # Build only the production server binary
 server: $(SERVER_TARGET)
 
-.PHONY: all clean test server test_basic test_stress test_race test_config test_http test_ws test_tls test_cli test_http2 test_upstream test_proxy test_rate_limit help
+.PHONY: all clean test server test_basic test_stress test_race test_config test_http test_ws test_tls test_cli test_http2 test_upstream test_proxy test_rate_limit test_circuit_breaker help
diff --git a/docs/circuit_breaker.md b/docs/circuit_breaker.md
new file mode 100644
index 00000000..ef3a5ef0
--- /dev/null
+++ b/docs/circuit_breaker.md
@@ -0,0 +1,149 @@
+# Circuit Breaker
+
+Per-upstream circuit breaking for the gateway, preventing cascading failures when a backend becomes unhealthy. Follows the resilience4j three-state machine (`CLOSED` → `OPEN` → `HALF_OPEN` → `CLOSED`), trips on either consecutive-failure or failure-rate thresholds, and short-circuits checkouts with `503 Service Unavailable` while the circuit is open. A separate **retry budget** caps the fraction of concurrent upstream work that may be retries, bounding the retry-storm amplification factor even when individual retries pass the breaker gate.
+
+---
+
+## Overview
+
+- **Per-dispatcher slices.** One `CircuitBreakerSlice` per dispatcher partition for each upstream. Hot-path `TryAcquire` / `Report*` calls are lock-free — each slice is dispatcher-thread-pinned.
+- **Three states.** `CLOSED` = normal traffic. `OPEN` = all requests short-circuited with 503 for the exponential-backoff open duration. `HALF_OPEN` = a bounded number of probe requests are admitted to test recovery; on success, closes; on failure, re-trips with longer backoff.
+- **Dual trip paths.** Either `consecutive_failures >= N` OR `failure_rate >= P%` over a sliding window (subject to `minimum_volume`).
+- **Retry budget.** Host-level cap: `max(retry_budget_min_concurrency, (in_flight - retries_in_flight) * retry_budget_percent / 100)`. Retries that exceed the cap receive `503` + `X-Retry-Budget-Exhausted: 1` instead of going to the upstream.
+- **Wait-queue drain on trip.** On every `CLOSED → OPEN` transition, the corresponding pool partition's wait queue is drained immediately with `503 + X-Circuit-Breaker: open` — queued waiters don't have to wait out the full open window.
+- **Dry-run mode.** `dry_run=true` computes decisions and logs them, but still admits traffic. Useful for staging a breaker in production without risk.
+- **Hot-reload.** Breaker-field edits (thresholds, window, probe budget, retry budget tuning, enabled toggle) apply live on SIGHUP — no restart required. Topology edits (host/port/pool/proxy/tls) still require a restart.
+
+---
+
+## Configuration
+
+Each `upstream` entry accepts a nested `circuit_breaker` block:
+
+```json
+{
+  "upstreams": [
+    {
+      "name": "orders",
+      "host": "orders-backend",
+      "port": 8080,
+      "circuit_breaker": {
+        "enabled": true,
+        "dry_run": false,
+        "consecutive_failure_threshold": 5,
+        "failure_rate_threshold": 50,
+        "minimum_volume": 20,
+        "window_seconds": 10,
+        "permitted_half_open_calls": 3,
+        "base_open_duration_ms": 5000,
+        "max_open_duration_ms": 60000,
+        "retry_budget_percent": 20,
+        "retry_budget_min_concurrency": 3
+      }
+    }
+  ]
+}
+```
+
+### Fields
+
+| Field | Type | Default | Meaning |
+|---|---|---|---|
+| `enabled` | bool | `false` | Master switch. When false, the slice is a zero-overhead no-op on the hot path. |
+| `dry_run` | bool | `false` | Shadow mode: log would-reject decisions but admit traffic. Both the state machine and the retry budget honor this flag. |
+| `consecutive_failure_threshold` | int | `5` | Trip when N consecutive failures are observed in `CLOSED`. Upper bound 10,000. |
+| `failure_rate_threshold` | int | `50` | Trip when `(failures / total) * 100 >= this` over the rolling window, provided `total >= minimum_volume`. 0-100. |
+| `minimum_volume` | int | `20` | Minimum calls-in-window before rate-based trip is even considered. Upper bound 10,000,000. |
+| `window_seconds` | int | `10` | Rolling window duration for the rate trip. >= 1. |
+| `permitted_half_open_calls` | int | `3` | Probe admissions allowed per `HALF_OPEN` cycle. A single success flips to `CLOSED`; a single failure re-trips to `OPEN`. Upper bound 1,000. |
+| `base_open_duration_ms` | int | `5000` | Initial open duration on first trip. Subsequent trips use `min(base << consecutive_trips, max)`. |
+| `max_open_duration_ms` | int | `60000` | Ceiling for the exponential-backoff open duration. |
+| `retry_budget_percent` | int | `20` | Retries capped at this % of non-retry in-flight traffic to the same host. 0-100. |
+| `retry_budget_min_concurrency` | int | `3` | Floor for the retry cap — always allow at least this many concurrent retries regardless of traffic level. |
+
+### Defaults (when `circuit_breaker` block is absent)
+
+`enabled=false`. The breaker is fully opt-in. No behavioral change from a pre-breaker gateway configuration.
+
+---
+
+## Client-facing responses
+
+Two distinct `503` variants, keyed off the reject source:
+
+**Circuit-open reject** — breaker is `OPEN` or in `HALF_OPEN`-full:
+```
+HTTP/1.1 503 Service Unavailable
+Retry-After: 5
+X-Circuit-Breaker: open              # or half_open
+X-Upstream-Host: orders-backend:8080
+Connection: close
+```
+
+- `Retry-After` derivation:
+  - `OPEN`: derived from the stored `open_until` deadline (time remaining until next probe).
+  - `HALF_OPEN`: derived from the *next* open duration (`base << consecutive_trips`) — reflects what the backoff would be if the in-flight probes fail. Base alone would under-report after multiple trips.
+  - Both paths: ceil-divide the millisecond value to seconds, capped at 3600s.
+- `X-Circuit-Breaker` distinguishes the two reject paths so operators can tell "backoff active" from "probing, no capacity left".
+
+**Retry-budget reject** — every retry attempt rejected because the host's budget is exhausted:
+```
+HTTP/1.1 503 Service Unavailable
+X-Retry-Budget-Exhausted: 1
+Connection: close
+```
+
+No `Retry-After` (the budget has no recovery clock — it depends on concurrent traffic). No `X-Circuit-Breaker` header (this reject path is orthogonal to the state machine).
+
+Both responses are **terminal**: the retry loop never retries a circuit-open or retry-budget-exhausted outcome.
+
+---
+
+## Hot reload
+
+All `circuit_breaker` fields on existing upstream services are hot-reloadable via `SIGHUP`. Reload semantics:
+
+| Edit | Behavior |
+|---|---|
+| Threshold change (failures, rate, window, probe budget, open durations) | Applied on the next `TryAcquire` / `Report*` call on each slice. Live state (`CLOSED`/`OPEN`/`HALF_OPEN`) is preserved. |
+| `enabled=true → false` | Live state reset to `CLOSED`; hot path short-circuits to `ADMITTED`. No transition callback fired. |
+| `enabled=false → true` | Live state reset to `CLOSED`. The transition callback (wired at startup) re-engages for future trips. |
+| `window_seconds` change | Rolling window reset. In-flight reports admitted pre-reload are invalidated (by `closed_gen_` bump); `consecutive_failures_` reset so stale counts can't trip the fresh window. In-flight `HALF_OPEN` probes are NOT invalidated (separate `halfopen_gen_` counter) — probe cycles complete normally. |
+| `retry_budget_percent` / `retry_budget_min_concurrency` | Applied immediately (atomic stores). In-flight counters preserved. |
+
+Topology edits (`host`, `port`, `pool.*`, `proxy.*`, `tls.*`) still require a restart; the gateway logs `"Reload: upstream topology changes require a restart to take effect"` and keeps the old pool alive. Breaker edits on the same reload are still applied live.
+
+---
+
+## Observability
+
+### Logs
+
+| Event | Level | Sample |
+|---|---|---|
+| `CLOSED → OPEN` trip | `warn` | `circuit breaker tripped service=orders host=orders-backend:8080 partition=0 trigger=consecutive consecutive_failures=5 window_total=12 window_fail_rate=41 open_for_ms=5000 consecutive_trips=1` |
+| `OPEN → HALF_OPEN` | `info` | `circuit breaker half-open ... probes_allowed=3` |
+| `HALF_OPEN → CLOSED` | `info` | `circuit breaker closed ... probes_succeeded=3` |
+| `HALF_OPEN → OPEN` re-trip | `warn` | `circuit breaker re-tripped ... trigger=probe_fail consecutive_trips=2 open_for_ms=10000` |
+| Reject (first of cycle) | `info` | `circuit breaker rejected ... state=open` |
+| Reject (subsequent) | `debug` | Same, at debug. |
+| Reject (dry-run) | `info` | `[dry-run] circuit breaker would reject ...` |
+| Retry budget exhausted | `warn` | `retry budget exhausted service=orders in_flight=45 retries_in_flight=9 cap=9 client_fd=... attempt=1` |
+| Reload applied | `info` | `circuit breaker config applied service=orders enabled=true window_s=10 fail_rate=50 consec_threshold=5` |
+| Wait-queue drain on trip | `info` | `PoolPartition draining wait queue on breaker trip: orders-backend:8080 queue_size=3` |
+
+### Snapshot API
+
+`CircuitBreakerManager::SnapshotAll()` returns one `CircuitBreakerHostSnapshot` per upstream with per-slice rows (`state`, `trips`, `rejected`, `probe_successes`, `probe_failures`) plus host-level aggregates (`total_trips`, `total_rejected`, `open_partitions`, `half_open_partitions`, `retries_in_flight`, `retries_rejected`, `in_flight`). A `/admin/breakers` HTTP endpoint that JSON-serializes this snapshot is **planned but not yet exposed** — the API is ready for future wiring.
+
+---
+
+## Design notes
+
+- **Dispatcher affinity.** Slices are pinned to their dispatcher thread — no CAS on the hot path. The trade-off: skewed request distribution across dispatchers can cause one partition to trip while another stays `CLOSED`. Uniform hashing keeps this mild in practice.
+- **Lazy `HALF_OPEN`.** The transition from `OPEN` happens on the next inbound `TryAcquire` once the open deadline elapses — no background timer. Envoy and resilience4j use the same model.
+- **Generation tokens.** Every admission is stamped with a per-domain generation counter (`closed_gen_` or `halfopen_gen_`, depending on state). `Report*` drops stale-generation completions so pre-transition requests can't pollute a fresh cycle. Window resizes bump only `closed_gen_` so in-flight probes aren't stranded.
+- **Retry budget CAS.** `TryConsumeRetry` uses `compare_exchange_weak` to serialize concurrent retry admissions. A plain load-check-add would let N callers all observe `current < cap` and all increment past the cap.
+- **Non-retry denominator.** The budget base is `in_flight - retries_in_flight`, not raw `in_flight`. Retries count in both terms but subtract out here so admitting a retry doesn't inflate its own cap.
+
+For the full design document (motivations, trade-offs, failure modes, revision history, test strategy), see [.claude/documents/design/CIRCUIT_BREAKER_DESIGN.md](../.claude/documents/design/CIRCUIT_BREAKER_DESIGN.md).
diff --git a/include/circuit_breaker/circuit_breaker_host.h b/include/circuit_breaker/circuit_breaker_host.h
new file mode 100644
index 00000000..67211667
--- /dev/null
+++ b/include/circuit_breaker/circuit_breaker_host.h
@@ -0,0 +1,118 @@
+#pragma once
+
+#include "common.h"
+#include "config/server_config.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
+#include "circuit_breaker/retry_budget.h"
+// <memory>, <string>, <vector> provided by common.h
+
+class Dispatcher;
+
+namespace circuit_breaker {
+
+// Observability snapshot of a single host, aggregated across all its
+// partition slices. Safe to call from any thread (relaxed reads of
+// atomic counters). Per-slice rows let dashboards detect skewed
+// failure distribution across dispatchers.
+struct CircuitBreakerHostSnapshot {
+    std::string service_name;
+    std::string host;
+    int port = 0;
+
+    struct SliceRow {
+        size_t dispatcher_index = 0;
+        State state = State::CLOSED;
+        int64_t trips = 0;
+        int64_t rejected = 0;
+        int64_t probe_successes = 0;
+        int64_t probe_failures = 0;
+    };
+    std::vector<SliceRow> slices;
+
+    // Aggregates across slices.
+    int64_t total_trips = 0;
+    int64_t total_rejected = 0;
+    int open_partitions = 0;
+    int half_open_partitions = 0;
+
+    // Retry budget state (per-host, shared across partitions).
+    int64_t retries_in_flight = 0;
+    int64_t retries_rejected = 0;
+    int64_t in_flight = 0;
+};
+
+// Per-upstream-service aggregation layer. Owns:
+//   - N CircuitBreakerSlice instances (one per dispatcher partition,
+//     each pinned to its dispatcher for lock-free hot-path access).
+//   - One RetryBudget (shared across partitions — retry %-of-in-flight
+//     is a host-level metric, not per-dispatcher).
+//
+// Lifetime: constructed by CircuitBreakerManager at server start, lives
+// for the server's lifetime. `service_name`, `host`, `port`, and the
+// slice vector are never mutated post-construction (keys are stable for
+// lock-free map lookup in the manager).
+class CircuitBreakerHost {
+public:
+    // `partition_count` must equal the number of dispatcher partitions
+    // in the server — typically NetServer's socket worker count or
+    // upstream pool's partition count. One slice is created per
+    // partition up-front.
+    CircuitBreakerHost(std::string service_name,
+                       std::string host,
+                       int port,
+                       size_t partition_count,
+                       const CircuitBreakerConfig& config);
+
+    CircuitBreakerHost(const CircuitBreakerHost&) = delete;
+    CircuitBreakerHost& operator=(const CircuitBreakerHost&) = delete;
+
+    // Hot-path lookup — returns nullptr only if `dispatcher_index` is
+    // out of range (programming error). Caller must invoke the
+    // returned slice's methods on its owning dispatcher thread.
+    CircuitBreakerSlice* GetSlice(size_t dispatcher_index);
+
+    // Owned retry budget. Never null for the host's lifetime; safe to
+    // cache the pointer. Shared across all partitions of this host.
+    RetryBudget* GetRetryBudget() { return retry_budget_.get(); }
+    const RetryBudget* GetRetryBudget() const { return retry_budget_.get(); }
+
+    // Aggregate snapshot across all slices + retry budget. Reads are
+    // relaxed atomic — eventually consistent across threads, which is
+    // fine for dashboards.
+    CircuitBreakerHostSnapshot Snapshot() const;
+
+    // Apply a new config to every slice. Because each slice is pinned
+    // to its dispatcher thread, the call is dispatched per-partition —
+    // the caller provides the dispatcher list in the same order used at
+    // construction. If `dispatchers.size() != slices_.size()`, the
+    // method logs an error and returns without applying.
+    //
+    // The retry-budget sub-fields (percent, min_concurrency) are
+    // updated immediately (atomic stores, any thread) as part of this
+    // call — they don't need dispatcher routing.
+    void Reload(const std::vector<std::shared_ptr<Dispatcher>>& dispatchers,
+                const CircuitBreakerConfig& new_config);
+
+    // Install a transition callback on every slice. Uniform callback
+    // across partitions — callers that need partition-specific behavior
+    // can read `slice->dispatcher_index()` inside the callback.
+    // Must be called before live traffic; thread-safety depends on
+    // slice-dispatcher affinity at the Reload layer.
+    void SetTransitionCallbackOnAllSlices(StateTransitionCallback cb);
+
+    // Accessors.
+    const std::string& service_name() const { return service_name_; }
+    const std::string& host() const { return host_; }
+    int port() const { return port_; }
+    size_t partition_count() const { return slices_.size(); }
+
+private:
+    std::string service_name_;
+    std::string host_;
+    int port_;
+    CircuitBreakerConfig config_;
+    std::vector<std::unique_ptr<CircuitBreakerSlice>> slices_;
+    std::unique_ptr<RetryBudget> retry_budget_;
+};
+
+}  // namespace circuit_breaker
diff --git a/include/circuit_breaker/circuit_breaker_manager.h b/include/circuit_breaker/circuit_breaker_manager.h
new file mode 100644
index 00000000..b4b32f06
--- /dev/null
+++ b/include/circuit_breaker/circuit_breaker_manager.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include "common.h"
+#include "circuit_breaker/circuit_breaker_host.h"
+// <memory>, <mutex>, <string>, <unordered_map>, <vector> provided by common.h
+
+class Dispatcher;
+
+namespace circuit_breaker {
+
+// Top-level circuit-breaker orchestrator. Mirrors the shape of
+// RateLimitManager: one instance lives on HttpServer, built once at
+// MarkServerReady, survives for the server's lifetime.
+//
+// Ownership (per design §3.1):
+//   HttpServer
+//     ├── upstream_manager_        (declared FIRST, destructs last)
+//     └── circuit_breaker_manager_ (declared SECOND, destructs first)
+//
+//   CircuitBreakerManager
+//     └── hosts_: unordered_map<service_name, unique_ptr<CircuitBreakerHost>>
+//
+// `hosts_` is built once in the constructor — keys are never added or
+// removed at runtime (topology is restart-only per the existing
+// upstream policy). This makes GetHost lock-free after construction,
+// which is critical for the hot path.
+//
+// Hot-reload: only `circuit_breaker` sub-fields on EXISTING
+// upstream services can be live-reloaded. New or removed service names
+// log a warn and are skipped — the caller (HttpServer::Reload) still
+// fires the "restart required" diagnostic in that case.
+class CircuitBreakerManager {
+public:
+    // Builds one CircuitBreakerHost per upstream in `upstreams` — even
+    // when upstreams[i].circuit_breaker.enabled is false — so a later
+    // reload that flips enabled to true can take effect without
+    // re-wiring transition callbacks (disabled slices hold the callback
+    // but never invoke it).
+    //
+    // `partition_count` must match the server's dispatcher partition
+    // count (upstream pool / NetServer worker count). `dispatchers`
+    // captures the dispatcher list so Reload can route per-slice work.
+    CircuitBreakerManager(
+        const std::vector<UpstreamConfig>& upstreams,
+        size_t partition_count,
+        std::vector<std::shared_ptr<Dispatcher>> dispatchers);
+
+    CircuitBreakerManager(const CircuitBreakerManager&) = delete;
+    CircuitBreakerManager& operator=(const CircuitBreakerManager&) = delete;
+
+    // Hot-path lookup — returns nullptr for unknown service names.
+    // Thread-safe (post-construction `hosts_` is read-only).
+    CircuitBreakerHost* GetHost(const std::string& service_name);
+    const CircuitBreakerHost* GetHost(const std::string& service_name) const;
+
+    // Apply breaker-field edits to EXISTING upstream services. Topology
+    // changes (new/removed service names) are logged at warn and
+    // skipped — HttpServer::Reload is the only layer that warns about
+    // topology, and this manager trusts that signal. Serialized by
+    // reload_mtx_ so concurrent Reload calls queue cleanly; the hot
+    // path does NOT take this lock.
+    void Reload(const std::vector<UpstreamConfig>& new_upstreams);
+
+    // Observability — snapshots every host. Safe from any thread.
+    std::vector<CircuitBreakerHostSnapshot> SnapshotAll() const;
+
+    // Test/admin helpers.
+    size_t host_count() const { return hosts_.size(); }
+
+private:
+    // Post-construction read-only — keys and unique_ptr values never
+    // change, so lookups don't need a lock.
+    std::unordered_map<std::string, std::unique_ptr<CircuitBreakerHost>> hosts_;
+    std::vector<std::shared_ptr<Dispatcher>> dispatchers_;
+
+    // Serializes concurrent Reload calls. NOT taken on the hot path.
+    mutable std::mutex reload_mtx_;
+};
+
+}  // namespace circuit_breaker
diff --git a/include/circuit_breaker/circuit_breaker_slice.h b/include/circuit_breaker/circuit_breaker_slice.h
new file mode 100644
index 00000000..d6899bae
--- /dev/null
+++ b/include/circuit_breaker/circuit_breaker_slice.h
@@ -0,0 +1,281 @@
+#pragma once
+
+#include "common.h"
+#include "config/server_config.h"
+#include "circuit_breaker/circuit_breaker_state.h"
+#include "circuit_breaker/circuit_breaker_window.h"
+// <chrono>, <atomic>, <string> provided by common.h
+
+namespace circuit_breaker {
+
+// One per-dispatcher slice of the breaker state for a given upstream host.
+// Dispatcher-thread-local for hot-path correctness: TryAcquire, ReportSuccess,
+// ReportFailure must only be called on the dispatcher that owns this slice.
+//
+// Observability counters (`trips_`, `rejected_`, etc.) are atomic so other
+// threads can snapshot them without synchronization. Everything else is
+// plain (no atomics) — single-writer, single-reader.
+class CircuitBreakerSlice {
+public:
+    // `time_source` defaults to steady_clock::now. Tests inject a mock clock.
+    using TimeSource = std::function<std::chrono::steady_clock::time_point()>;
+
+    CircuitBreakerSlice(std::string host_label,
+                        size_t dispatcher_index,
+                        const CircuitBreakerConfig& config,
+                        TimeSource time_source = nullptr);
+
+    // Non-copyable, non-movable: slices are pinned in a Host's vector and
+    // callbacks capture raw pointers.
+    CircuitBreakerSlice(const CircuitBreakerSlice&) = delete;
+    CircuitBreakerSlice& operator=(const CircuitBreakerSlice&) = delete;
+
+    // Return value of TryAcquire. `generation` is a monotonically-increasing
+    // token identifying which state-machine cycle the admission belongs to.
+    // Callers MUST pass it back to Report*() unchanged so the slice can drop
+    // late completions that belong to a prior cycle (crossed a state
+    // transition or a Reload()-reset boundary). Without this, stale
+    // completions can pollute the bookkeeping of a fresh CLOSED/HALF_OPEN
+    // cycle (e.g., a pre-toggle failure incrementing the post-toggle
+    // consecutive_failures_, or a pre-CLOSED'-cycle success wiping a
+    // legitimate post-CLOSED' counter).
+    struct Admission {
+        Decision decision;
+        uint64_t generation;
+    };
+
+    // Hot-path decision. Consults state + (if applicable) advances OPEN→HALF_OPEN
+    // and reserves a probe slot. Increments `rejected_` on REJECTED_OPEN*
+    // (both enforce and dry-run). Emits reject log on dispatcher thread.
+    // Returned generation must be threaded to the paired Report*().
+    Admission TryAcquire();
+
+    // Outcome reporting. `probe` is true iff the paired TryAcquire returned
+    // ADMITTED_PROBE. `admission_generation` is the generation returned by
+    // the paired TryAcquire — reports from a stale generation are silently
+    // dropped (observability counters still update so the outcome is not
+    // lost from dashboards). Report* may trigger state transitions and fire
+    // the transition callback.
+    void ReportSuccess(bool probe, uint64_t admission_generation);
+    void ReportFailure(FailureKind kind, bool probe, uint64_t admission_generation);
+
+    // Neutral completion — the admission never exercised the upstream.
+    // Use when the request was terminated locally before reaching the
+    // upstream (POOL_EXHAUSTED after admission, shutdown draining, client
+    // disconnect, RESULT_PARSE_ERROR self-attributable). Must NOT be used
+    // for upstream outcomes — those go to ReportSuccess / ReportFailure.
+    //
+    // For probe=true (HALF_OPEN admission): returns the probe slot to the
+    // cycle — decrements `half_open_inflight_` AND `half_open_admitted_`
+    // so a replacement probe can still exercise the upstream within this
+    // cycle's budget. Without this path, a probe that dies locally leaks
+    // its slot forever, eventually wedging the slice in HALF_OPEN.
+    //
+    // For probe=false (CLOSED admission): no-op — CLOSED admissions have
+    // no slot to release. The bool matches ReportSuccess/ReportFailure so
+    // callers can use the same dispatch pattern.
+    void ReportNeutral(bool probe, uint64_t admission_generation);
+
+    // Apply a new config (called on this slice's dispatcher thread).
+    // Preserves live state (CLOSED/OPEN/HALF_OPEN). Resets window if
+    // window_seconds changed.
+    void Reload(const CircuitBreakerConfig& new_config);
+
+    // Install or replace the state-transition callback. Safe to call before
+    // any traffic (startup wiring) OR after a hot-reload flips enabled=false→true.
+    // Callers must invoke on this slice's dispatcher thread.
+    void SetTransitionCallback(StateTransitionCallback cb);
+
+    // Observability — safe from any thread.
+    State    CurrentState() const { return state_.load(std::memory_order_acquire); }
+    int64_t  Trips()            const { return trips_.load(std::memory_order_relaxed); }
+    int64_t  Rejected()         const { return rejected_.load(std::memory_order_relaxed); }
+    int64_t  ProbeSuccesses()   const { return probe_successes_.load(std::memory_order_relaxed); }
+    int64_t  ProbeFailures()    const { return probe_failures_.load(std::memory_order_relaxed); }
+    // Rejections specifically caused by HALF_OPEN being out of probe slots
+    // (subset of `Rejected()`). Lets dashboards distinguish "backoff has not
+    // elapsed" from "probing, no capacity left".
+    int64_t  RejectedHalfOpenFull() const {
+        return rejected_half_open_full_.load(std::memory_order_relaxed);
+    }
+    // Number of Report* calls silently dropped because their admission
+    // generation no longer matches the relevant per-domain counter
+    // (closed_gen_ for non-probe, halfopen_gen_ for probe). These are
+    // reports of requests admitted before a state transition or a
+    // Reload()-reset. Useful for detecting mis-threaded admission tokens.
+    int64_t  ReportsStaleGeneration() const {
+        return reports_stale_generation_.load(std::memory_order_relaxed);
+    }
+
+    // **Test-only** accessor for the generation that the current state's
+    // next admission would receive. Returns `halfopen_gen_` when state is
+    // HALF_OPEN (probe admissions use that counter), otherwise `closed_gen_`
+    // (non-probe admissions use that counter). This matches what TryAcquire
+    // would stamp on a new admission right now.
+    //
+    // Production callers MUST use the generation returned by TryAcquire
+    // (racy otherwise — these getters are not atomic). Tests use it as
+    // ergonomic shorthand for "admission just happened in the current
+    // cycle", bypassing the need to thread a token per synthetic Report*.
+    uint64_t CurrentGenerationForTesting() const {
+        return (state_.load(std::memory_order_acquire) == State::HALF_OPEN)
+                   ? halfopen_gen_ : closed_gen_;
+    }
+    // Explicit per-domain getters for tests that cross state transitions
+    // while holding a captured generation from a specific domain.
+    uint64_t CurrentClosedGenForTesting()   const { return closed_gen_; }
+    uint64_t CurrentHalfOpenGenForTesting() const { return halfopen_gen_; }
+
+    const std::string& host_label() const { return host_label_; }
+    size_t dispatcher_index() const { return dispatcher_index_; }
+
+    // Read-only view of the live config. Dispatcher-thread-owned for
+    // writes (Reload only mutates here); readers on other threads get a
+    // potentially-torn read, which is acceptable for observability hints
+    // like Retry-After clamping.
+    const CircuitBreakerConfig& config() const { return config_; }
+
+    // Current open_until time. Used by ProxyTransaction to compute
+    // Retry-After. Returns zero ns when not OPEN.
+    std::chrono::steady_clock::time_point OpenUntil() const;
+
+    // Convenience predicate: whether OpenUntil() currently holds a
+    // non-zero deadline. Avoids callers hand-rolling the zero-epoch
+    // check against `time_since_epoch().count() > 0`.
+    bool IsOpenDeadlineSet() const {
+        return open_until_steady_ns_.load(std::memory_order_relaxed) > 0;
+    }
+
+    // Expected next open-duration in milliseconds if the slice re-trips
+    // from its current state. Computed from base_open_duration_ms
+    // shifted by the current `consecutive_trips_` count and clamped by
+    // max_open_duration_ms. Used by the Retry-After hint path for
+    // HALF_OPEN rejections, where there's no stored deadline but the
+    // next OPEN window (if the probe cycle fails) will follow the
+    // exponential-backoff curve — base alone would under-report after
+    // multiple trips.
+    //
+    // Safe from any thread (atomic load of consecutive_trips_ + plain
+    // reads of config_ fields). Config fields are dispatcher-owned but
+    // a slightly-torn read is fine for an observability hint.
+    int64_t NextOpenDurationMs() const;
+
+private:
+    // Logging label: "service=X host=Y:Z partition=N" built once.
+    std::string host_label_;
+    size_t dispatcher_index_;
+    CircuitBreakerConfig config_;
+
+    TimeSource time_source_;
+
+    // Hot-path state — state_ written on dispatcher, read by observers.
+    std::atomic<State> state_{State::CLOSED};
+    // Nanoseconds since steady_clock epoch — 0 when not OPEN.
+    std::atomic<int64_t> open_until_steady_ns_{0};
+    // Count of consecutive trips (OPEN entries) since last CLOSED —
+    // drives exponential backoff of open duration.
+    std::atomic<int> consecutive_trips_{0};
+
+    // Dispatcher-thread-only (no atomics).
+    int consecutive_failures_ = 0;
+    CircuitBreakerWindow window_;
+    int half_open_inflight_ = 0;
+    int half_open_successes_ = 0;
+    bool half_open_saw_failure_ = false;
+    // Total probes admitted in the CURRENT HALF_OPEN cycle. Never decrements
+    // within a cycle; resets on every cycle entry (TransitionOpenToHalfOpen)
+    // and cycle exit (TransitionHalfOpenToClosed / TripHalfOpenToOpen). This
+    // is what caps the cycle's probe budget — NOT half_open_inflight_, which
+    // can free slots as probes complete. Gating on inflight would let an
+    // early-completing probe's slot be reused, causing the cycle to admit
+    // more than permitted_half_open_calls total probes. The close check
+    // (successes >= snapshot) could then fire while a late-admitted probe
+    // is still running; its eventual failure would drop as stale (generation
+    // bumped by the transition) and the breaker would falsely mark an
+    // unhealthy host recovered.
+    int half_open_admitted_ = 0;
+    // Probe budget for the CURRENT HALF_OPEN cycle. Snapshotted from
+    // config_.permitted_half_open_calls at the moment TransitionOpenToHalfOpen
+    // fires. A live Reload() may lower (or raise) the config field mid-cycle;
+    // the snapshot ensures TryAcquire's slot gate and ReportSuccess's close
+    // check both operate against the budget that was in effect when the probes
+    // were admitted — preventing early close or indefinitely-open behaviour.
+    int half_open_permitted_snapshot_ = 0;
+
+    // Observability counters.
+    std::atomic<int64_t> trips_{0};
+    std::atomic<int64_t> rejected_{0};
+    std::atomic<int64_t> rejected_half_open_full_{0};
+    std::atomic<int64_t> probe_successes_{0};
+    std::atomic<int64_t> probe_failures_{0};
+
+    // One-shot flag: true after the slice has emitted a higher-level
+    // (info) log for the first rejection in the current OPEN/HALF_OPEN
+    // cycle. Reset on transition to CLOSED and on each fresh trip. Keeps
+    // per-request reject logs at debug while still surfacing the first
+    // post-trip reject in default-warn operator logs. Dispatcher-thread only.
+    bool first_reject_logged_for_open_ = false;
+
+    // Monotonic generation counters — one per admission domain. TryAcquire
+    // stamps the admission with the domain's current value; Report* compares
+    // against it and drops reports whose admission no longer matches a live
+    // cycle. Split into two counters so operations that reset ONE domain
+    // (e.g., window_seconds reload wipes the CLOSED rate window) don't
+    // invalidate admissions in the OTHER domain (HALF_OPEN probes) — which
+    // would strand probe capacity and wedge the slice in HALF_OPEN.
+    //
+    // Dispatcher-thread only — plain ints (no atomics needed).
+    //
+    //   closed_gen_   bumps on: TripClosedToOpen (CLOSED cycle ends),
+    //                           Reload enabled-toggle reset,
+    //                           Reload window_seconds change (rate-window wipe).
+    //   halfopen_gen_ bumps on: TripHalfOpenToOpen (HALF_OPEN cycle ends),
+    //                           TransitionHalfOpenToClosed (HALF_OPEN cycle ends on success),
+    //                           Reload enabled-toggle reset.
+    //
+    // Initial value 1 (so 0 can be a "not-applicable" sentinel for
+    // admissions returned from disabled slices or the REJECTED_* paths).
+    uint64_t closed_gen_   = 1;
+    uint64_t halfopen_gen_ = 1;
+
+    // Rejections silently dropped because their admission generation no
+    // longer matches `generation_`. Observability only; lets dashboards see
+    // how often the generation guard fires.
+    std::atomic<int64_t> reports_stale_generation_{0};
+
+    StateTransitionCallback transition_cb_;
+
+    // Internal transitions (dispatcher-thread).
+    // `now` is threaded through from ReportFailure so the window_total /
+    // window_fail_rate fields in the trip log reflect the SAME sliding-window
+    // view that ShouldTripClosed just saw — a fresh Now() here can cross a
+    // bucket boundary (especially with window_seconds=1 or under a dispatcher
+    // stall) and trigger Window::Advance's full-reset, zeroing the bucket that
+    // holds the failure which actually tripped the breaker.
+    void TripClosedToOpen(const char* trigger,
+                          std::chrono::steady_clock::time_point now);
+    void TransitionOpenToHalfOpen();
+    void TransitionHalfOpenToClosed();
+    void TripHalfOpenToOpen(const char* trigger);
+
+    // Emit the correct reject log line, bump counters, and return the matching
+    // Decision (enforce or dry-run). Used by both the OPEN (backoff active)
+    // and HALF_OPEN-full paths — keeps the three loggers/counters consistent.
+    Decision RejectWithLog(const char* state_label, bool half_open_full);
+
+    // Compute open duration for the current consecutive_trips_ value:
+    // min(base * 2^consecutive_trips, max). Always >= base_open_duration_ms.
+    std::chrono::nanoseconds ComputeOpenDuration() const;
+
+    // Check whether CLOSED trip conditions are met. Called after every failure.
+    // Takes `now` as a parameter so the caller can record the failure and
+    // evaluate the trip against THE SAME timestamp — otherwise a clock tick
+    // between AddFailure() and ShouldTripClosed() can advance the ring and
+    // wipe the just-recorded failure (critical when window_seconds is small:
+    // with window=1, a 1-second delta triggers the full-reset path).
+    bool ShouldTripClosed(std::chrono::steady_clock::time_point now);
+
+    std::chrono::steady_clock::time_point Now() const;
+};
+
+}  // namespace circuit_breaker
diff --git a/include/circuit_breaker/circuit_breaker_state.h b/include/circuit_breaker/circuit_breaker_state.h
new file mode 100644
index 00000000..92872f8b
--- /dev/null
+++ b/include/circuit_breaker/circuit_breaker_state.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include "common.h"
+// <cstdint>, <functional>, <string> provided by common.h
+
+// Circuit breaker state machine and classification enums. Used by
+// CircuitBreakerSlice, CircuitBreakerHost, CircuitBreakerManager, and
+// ProxyTransaction to talk about state, admission decisions, and
+// failure kinds.
+//
+// Three-state resilience4j-style machine:
+//
+//   CLOSED ──trip── OPEN ──(open_until elapsed)── HALF_OPEN ──success── CLOSED
+//                                                       │
+//                                                       failure
+//                                                       ▼
+//                                                      OPEN
+namespace circuit_breaker {
+
+enum class State : uint8_t {
+    CLOSED    = 0,
+    OPEN      = 1,
+    HALF_OPEN = 2,
+};
+
+// Result of CircuitBreakerSlice::TryAcquire. Callers branch on this enum
+// only — they never read the CircuitBreakerConfig directly. Dry-run policy
+// is encoded in the decision, not in a separate flag.
+enum class Decision : uint8_t {
+    ADMITTED,              // CLOSED — proceed to pool
+    ADMITTED_PROBE,        // HALF_OPEN probe slot consumed — proceed, tag as probe
+    REJECTED_OPEN,         // OPEN (or HALF_OPEN-full); ENFORCE — drop with 503
+    REJECTED_OPEN_DRYRUN,  // Shadow mode: slice would reject but operator asked
+                           // for pass-through. Caller proceeds to pool. Counters
+                           // and log already updated by TryAcquire.
+};
+
+// Failure classification. Only these kinds feed ReportFailure — 4xx and
+// local-capacity issues (POOL_EXHAUSTED, QUEUE_TIMEOUT, shutdown) are NOT
+// reported as failures.
+enum class FailureKind : uint8_t {
+    CONNECT_FAILURE,
+    RESPONSE_5XX,
+    RESPONSE_TIMEOUT,
+    UPSTREAM_DISCONNECT,
+};
+
+// Callback fired on every slice state transition. Runs on the slice's
+// owning dispatcher thread. Callers can compare old/new to key off a
+// specific edge (e.g. CLOSED→OPEN fires wait-queue drain).
+// `trigger` is a short static string such as "consecutive" / "rate" /
+// "probe_success" / "probe_fail" / "open_elapsed" for logging.
+//
+// TODO(post-v1): once a snapshot / admin JSON endpoint lands, convert
+// `trigger` to an `enum class TransitionTrigger` so the valid set is
+// compile-time checked rather than string-compared. See design doc §15.8.
+using StateTransitionCallback =
+    std::function<void(State old_state, State new_state, const char* trigger)>;
+
+// Convert a state to a short lowercase label for logging.
+inline const char* StateName(State s) {
+    switch (s) {
+        case State::CLOSED:    return "closed";
+        case State::OPEN:      return "open";
+        case State::HALF_OPEN: return "half_open";
+    }
+    return "unknown";
+}
+
+}  // namespace circuit_breaker
diff --git a/include/circuit_breaker/circuit_breaker_window.h b/include/circuit_breaker/circuit_breaker_window.h
new file mode 100644
index 00000000..12679bcd
--- /dev/null
+++ b/include/circuit_breaker/circuit_breaker_window.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include "common.h"
+// <vector>, <chrono> provided by common.h
+
+namespace circuit_breaker {
+
+// Time-bucketed sliding window. One bucket per second; ring indexed by
+// `epoch_sec % window_seconds`. Advances lazily on every Add* call:
+// when the incoming `now` is ahead of the recorded head, all buckets
+// that have aged out of the window are zeroed before the new increment.
+//
+// Dispatcher-thread-local by design — NO synchronization. Used from
+// CircuitBreakerSlice, which is owned by a single dispatcher.
+class CircuitBreakerWindow {
+public:
+    explicit CircuitBreakerWindow(int window_seconds);
+
+    // Record one outcome at `now`. Advances the ring if needed.
+    void AddSuccess(std::chrono::steady_clock::time_point now);
+    void AddFailure(std::chrono::steady_clock::time_point now);
+
+    // Observed counts across the current window. `now` is used to expire
+    // stale buckets before reading.
+    int64_t TotalCount(std::chrono::steady_clock::time_point now);
+    int64_t FailureCount(std::chrono::steady_clock::time_point now);
+
+    // Reset the ring to zero. Called on state transitions that should
+    // start a fresh observation (e.g. HALF_OPEN → CLOSED).
+    void Reset();
+
+    // Reinitialize for a new window size (config reload). Resets buckets.
+    void Resize(int new_window_seconds);
+
+    int window_seconds() const { return window_seconds_; }
+
+private:
+    struct Bucket {
+        int64_t total = 0;
+        int64_t failures = 0;
+    };
+
+    int window_seconds_;
+    std::vector<Bucket> buckets_;
+
+    // Epoch-seconds of the most recent observation. Used to compute how
+    // many buckets need to be zeroed on advance.
+    int64_t head_epoch_sec_ = -1;
+
+    // Advance the ring if `now_sec` is newer than `head_epoch_sec_`,
+    // zeroing any buckets that aged out.
+    void Advance(int64_t now_sec);
+
+    // Convert a steady_clock time_point to epoch-seconds (we only
+    // care about relative seconds; steady_clock is monotonic).
+    static int64_t ToEpochSec(std::chrono::steady_clock::time_point now);
+};
+
+}  // namespace circuit_breaker
diff --git a/include/circuit_breaker/retry_budget.h b/include/circuit_breaker/retry_budget.h
new file mode 100644
index 00000000..f8392013
--- /dev/null
+++ b/include/circuit_breaker/retry_budget.h
@@ -0,0 +1,151 @@
+#pragma once
+
+#include "common.h"
+// <atomic>, <cstdint> provided by common.h
+
+namespace circuit_breaker {
+
+// Retry budget — orthogonal to the breaker state machine.
+//
+// Problem: even when the circuit is CLOSED, a cascading failure on a
+// healthy-looking upstream can be amplified by per-request retries. If
+// 100 requests are in flight and each retries once, the upstream sees
+// 200. If each retries twice, 300. A sick-but-not-dead upstream gets
+// tipped over by the retry multiplier itself.
+//
+// Fix: cap concurrent retries as a fraction of concurrent non-retry
+// traffic plus a floor for low-volume correctness.
+//
+//   allowed_retries = max(min_concurrency,
+//                          (in_flight - retries_in_flight) * percent / 100)
+//
+// The subtraction is load-bearing: callers hold TrackInFlight() for
+// BOTH first attempts and retries (so the guard's RAII paired with
+// ReleaseRetry doesn't need a second counter on the hot path).
+// Without subtracting retries, admitting a retry increases in_flight
+// which increases the cap, and in steady state the effective ratio
+// converges above the configured percent of original traffic.
+//
+// The retry budget is PER-HOST (one instance owned by CircuitBreakerHost,
+// shared across its partitions — the percent math is about aggregate
+// upstream load, not per-dispatcher slicing). All counters are atomic
+// relaxed — snapshots can be slightly stale, which is fine for a
+// capacity gate on a retry storm.
+//
+// Usage:
+//   1. On every attempt (first or retry), call TrackInFlight() and keep
+//      the returned guard alive until the attempt completes. The guard
+//      decrements in_flight_ in its destructor.
+//   2. Before issuing a retry attempt, call TryConsumeRetry(). Proceed
+//      if it returns true; reject as RETRY_BUDGET_EXHAUSTED if false.
+//   3. When the retried attempt completes, call ReleaseRetry().
+class RetryBudget {
+public:
+    // `percent` — cap retries at this % of in-flight (0-100).
+    // `min_concurrency` — always allow at least this many concurrent
+    // retries regardless of in_flight; ensures low-volume correctness
+    // (without it, a 20% budget allows 0 retries when in_flight < 5).
+    RetryBudget(int percent, int min_concurrency);
+
+    // Non-copyable, non-movable. Lifetime-stable under its owner
+    // (CircuitBreakerHost).
+    RetryBudget(const RetryBudget&) = delete;
+    RetryBudget& operator=(const RetryBudget&) = delete;
+
+    // RAII guard — decrements in_flight_ on destruction. Move-only.
+    class InFlightGuard {
+    public:
+        InFlightGuard() = default;
+        explicit InFlightGuard(std::atomic<int64_t>* counter) : counter_(counter) {}
+        ~InFlightGuard() {
+            if (counter_) counter_->fetch_sub(1, std::memory_order_relaxed);
+        }
+        InFlightGuard(InFlightGuard&& o) noexcept : counter_(o.counter_) {
+            o.counter_ = nullptr;
+        }
+        InFlightGuard& operator=(InFlightGuard&& o) noexcept {
+            if (this != &o) {
+                if (counter_) counter_->fetch_sub(1, std::memory_order_relaxed);
+                counter_ = o.counter_;
+                o.counter_ = nullptr;
+            }
+            return *this;
+        }
+        InFlightGuard(const InFlightGuard&) = delete;
+        InFlightGuard& operator=(const InFlightGuard&) = delete;
+
+    private:
+        std::atomic<int64_t>* counter_ = nullptr;
+    };
+
+    // Call on every upstream attempt entry (first try OR retry). The
+    // returned guard MUST outlive the attempt — typically stored as a
+    // ProxyTransaction member. Never returns an empty guard.
+    InFlightGuard TrackInFlight();
+
+    // Call BEFORE issuing a retry attempt. Returns true if the retry
+    // fits under the budget (retries_in_flight < cap); caller must pair
+    // a true return with a matching ReleaseRetry when the retry
+    // completes. Returns false if over budget — caller must NOT retry
+    // and must NOT call ReleaseRetry.
+    //
+    // The cap is computed against a freshly-loaded in_flight snapshot:
+    //   cap = max(min_concurrency, in_flight * percent / 100)
+    bool TryConsumeRetry();
+
+    // Call when a consumed retry attempt finishes. Must be paired with a
+    // prior successful TryConsumeRetry.
+    void ReleaseRetry();
+
+    // Apply new tuning. Thread-safe (atomics). Preserves in-flight counters
+    // — only the admission formula changes.
+    void Reload(int percent, int min_concurrency);
+
+    // Observability — safe from any thread, relaxed.
+    int64_t InFlight() const {
+        return in_flight_.load(std::memory_order_relaxed);
+    }
+    // Compute the current effective retry cap for observability / log
+    // enrichment. Uses the same formula as TryConsumeRetry but without
+    // mutating retries_in_flight_. Returns the point-in-time cap against
+    // which a would-be retry admission would be compared. Slightly racy
+    // (separate loads of in_flight_ and retries_in_flight_ aren't atomic
+    // relative to each other), but the result is for dashboards / logs
+    // where a one-entry drift is noise.
+    int64_t ComputeCap() const {
+        int64_t in_flight = in_flight_.load(std::memory_order_relaxed);
+        int64_t retries = retries_in_flight_.load(std::memory_order_relaxed);
+        int pct = percent_.load(std::memory_order_relaxed);
+        int min_conc = min_concurrency_.load(std::memory_order_relaxed);
+        int64_t non_retry = in_flight - retries;
+        if (non_retry < 0) non_retry = 0;
+        int64_t pct_cap = (non_retry * pct) / 100;
+        return pct_cap > min_conc ? pct_cap : min_conc;
+    }
+    int64_t RetriesInFlight() const {
+        return retries_in_flight_.load(std::memory_order_relaxed);
+    }
+    int64_t RetriesRejected() const {
+        return retries_rejected_.load(std::memory_order_relaxed);
+    }
+
+    int percent() const { return percent_.load(std::memory_order_relaxed); }
+    int min_concurrency() const {
+        return min_concurrency_.load(std::memory_order_relaxed);
+    }
+
+private:
+    // Tuning — atomic so Reload() is lock-free.
+    std::atomic<int> percent_;
+    std::atomic<int> min_concurrency_;
+
+    // Counters (relaxed — admission decisions tolerate slightly stale
+    // reads; correctness depends on each guard's fetch_sub pairing with
+    // its increment, which holds under relaxed because they touch the
+    // same atomic).
+    std::atomic<int64_t> in_flight_{0};
+    std::atomic<int64_t> retries_in_flight_{0};
+    std::atomic<int64_t> retries_rejected_{0};
+};
+
+}  // namespace circuit_breaker
diff --git a/include/config/config_loader.h b/include/config/config_loader.h
index ba13f62a..2a76c3b8 100644
--- a/include/config/config_loader.h
+++ b/include/config/config_loader.h
@@ -3,6 +3,7 @@
 #include "config/server_config.h"
 #include <string>
 #include <stdexcept>
+#include <unordered_set>
 
 class ConfigLoader {
 public:
@@ -27,6 +28,42 @@ class ConfigLoader {
     // Throws std::invalid_argument if validation fails.
     static void Validate(const ServerConfig& config);
 
+    // Validate ONLY the fields that are live-reloadable without a
+    // restart — today this is the per-upstream circuit_breaker block
+    // plus a duplicate-name check.
+    //
+    // Used by the SIGHUP reload path, which downgrades the full
+    // `Validate()` failure to a warn because most of its rules cover
+    // restart-only fields. That downgrade is unsafe for live-
+    // reloadable fields: an invalid breaker threshold would be
+    // pushed into live slices even though the same value would be
+    // rejected at startup. Call this BEFORE applying a reloaded
+    // config and abort the reload if it throws.
+    //
+    // Scope of CB-field validation:
+    //   `live_upstream_names` lists service names CURRENTLY known to
+    //   the running server. CB fields are validated only for entries
+    //   whose name is in this set, because
+    //   `CircuitBreakerManager::Reload` only applies CB changes to
+    //   pre-existing hosts (new/removed names are restart-only and
+    //   skipped with a warn). Validating CB blocks for not-yet-
+    //   running entries would block otherwise-safe reloads — e.g. a
+    //   reload that stages a new upstream with an intentionally
+    //   placeholder breaker block would abort even though the live
+    //   server would never apply it. Pass an empty set when no
+    //   upstreams are running yet (only the duplicate-name check
+    //   runs in that case).
+    //
+    // Duplicate-name rejection runs unconditionally on the new
+    // config's upstream list: even for new/renamed entries, the
+    // file itself is malformed if names collide.
+    //
+    // Throws std::invalid_argument with a message identifying the
+    // offending upstream and field.
+    static void ValidateHotReloadable(
+        const ServerConfig& config,
+        const std::unordered_set<std::string>& live_upstream_names);
+
     // Return a ServerConfig with all default values.
     static ServerConfig Default();
 
diff --git a/include/config/server_config.h b/include/config/server_config.h
index bff3ffc4..ee879f28 100644
--- a/include/config/server_config.h
+++ b/include/config/server_config.h
@@ -138,6 +138,52 @@ struct ProxyConfig {
     bool operator!=(const ProxyConfig& o) const { return !(*this == o); }
 };
 
+struct CircuitBreakerConfig {
+    bool enabled = false;                      // Opt-in; off by default
+    bool dry_run = false;                      // Compute + log, but do not reject
+
+    // Trip conditions (ORed). Either alone is sufficient.
+    int consecutive_failure_threshold = 5;     // Trip after N consecutive failures
+    int failure_rate_threshold = 50;           // Trip when fail_rate >= N percent
+    int minimum_volume = 20;                   // Required window volume before
+                                               // failure_rate is consulted
+    int window_seconds = 10;                   // Sliding-window duration
+
+    // HALF_OPEN admission
+    int permitted_half_open_calls = 5;
+
+    // Recovery timing. open_duration = min(base * 2^consecutive_trips, max).
+    int base_open_duration_ms = 5000;
+    int max_open_duration_ms = 60000;
+
+    // Safety valve (future-proof for load-balanced services; no-op v1).
+    int max_ejection_percent_per_host_set = 50;
+
+    // Retry budget (orthogonal to the breaker). Caps concurrent retries to
+    // max(retry_budget_min_concurrency, in_flight * retry_budget_percent/100).
+    // Wired into the request path via ProxyTransaction's retry-budget
+    // gate in MaybeRetry; also read by
+    // CircuitBreakerHost to construct its owned RetryBudget.
+    int retry_budget_percent = 20;
+    int retry_budget_min_concurrency = 3;
+
+    bool operator==(const CircuitBreakerConfig& o) const {
+        return enabled == o.enabled &&
+               dry_run == o.dry_run &&
+               consecutive_failure_threshold == o.consecutive_failure_threshold &&
+               failure_rate_threshold == o.failure_rate_threshold &&
+               minimum_volume == o.minimum_volume &&
+               window_seconds == o.window_seconds &&
+               permitted_half_open_calls == o.permitted_half_open_calls &&
+               base_open_duration_ms == o.base_open_duration_ms &&
+               max_open_duration_ms == o.max_open_duration_ms &&
+               max_ejection_percent_per_host_set == o.max_ejection_percent_per_host_set &&
+               retry_budget_percent == o.retry_budget_percent &&
+               retry_budget_min_concurrency == o.retry_budget_min_concurrency;
+    }
+    bool operator!=(const CircuitBreakerConfig& o) const { return !(*this == o); }
+};
+
 struct UpstreamConfig {
     std::string name;
     std::string host;
@@ -145,7 +191,18 @@ struct UpstreamConfig {
     UpstreamTlsConfig tls;
     UpstreamPoolConfig pool;
     ProxyConfig proxy;
+    CircuitBreakerConfig circuit_breaker;
 
+    // Excludes `circuit_breaker` — breaker fields are live-reloadable via
+    // `CircuitBreakerManager::Reload`, which `HttpServer::Reload` invokes on
+    // every reload. Topology fields (name, host, port, tls, pool,
+    // proxy) remain restart-only; a mismatch here triggers the
+    // "restart required" warning in the outer reload.
+    //
+    // Contract: a config pair that differs ONLY in circuit_breaker fields
+    // must compare EQUAL so the outer reload doesn't fire a spurious warn.
+    // Any future field whose propagation path is wired into a live
+    // `*Manager::Reload` should be removed from this operator symmetrically.
     bool operator==(const UpstreamConfig& o) const {
         return name == o.name && host == o.host && port == o.port &&
                tls == o.tls && pool == o.pool && proxy == o.proxy;
diff --git a/include/http/http_server.h b/include/http/http_server.h
index bffbd854..f595d1c3 100644
--- a/include/http/http_server.h
+++ b/include/http/http_server.h
@@ -22,6 +22,10 @@
 class UpstreamManager;
 class ProxyHandler;
 
+namespace circuit_breaker {
+class CircuitBreakerManager;
+}
+
 class HttpServer {
 public:
     // Snapshot of server runtime statistics. All values are approximate
@@ -336,6 +340,16 @@ class HttpServer {
     std::vector<UpstreamConfig> upstream_configs_;
     std::unique_ptr<UpstreamManager> upstream_manager_;
 
+    // Circuit breaker — declared AFTER upstream_manager_ so destruction
+    // order is breaker-FIRST, pool-SECOND (design §3.1). On shutdown the
+    // breaker's slices may still be consulted by in-flight
+    // ProxyTransactions until they drain; destroying the breaker first
+    // (before the pool) is safe because UpstreamManager's outstanding
+    // breaker_manager_ pointer is checked against null on every lookup.
+    // Destroying the pool first would leave breaker slices holding
+    // dangling references.
+    std::unique_ptr<circuit_breaker::CircuitBreakerManager> circuit_breaker_manager_;
+
     // Rate limiting
     RateLimitConfig rate_limit_config_;
     std::unique_ptr<RateLimitManager> rate_limit_manager_;
diff --git a/include/upstream/pool_partition.h b/include/upstream/pool_partition.h
index 4c33a0cd..a6d904b2 100644
--- a/include/upstream/pool_partition.h
+++ b/include/upstream/pool_partition.h
@@ -25,6 +25,11 @@ class PoolPartition {
     static constexpr int CHECKOUT_CONNECT_TIMEOUT = -3;
     static constexpr int CHECKOUT_SHUTTING_DOWN   = -4;
     static constexpr int CHECKOUT_QUEUE_TIMEOUT   = -5;
+    // Delivered to wait-queue waiters drained on a breaker trip by
+    // DrainWaitQueueOnTrip. ProxyTransaction::OnCheckoutError maps
+    // this to RESULT_CIRCUIT_OPEN so the queued client gets the same
+    // circuit-open response a fresh requester would get.
+    static constexpr int CHECKOUT_CIRCUIT_OPEN    = -6;
 
     PoolPartition(std::shared_ptr<Dispatcher> dispatcher,
                   const std::string& upstream_host, int upstream_port,
@@ -85,6 +90,28 @@ class PoolPartition {
     // completion. Same pattern as ScheduleInitiateShutdown.
     void ScheduleForceCloseActive();
 
+    // Drain the wait queue on a CLOSED → OPEN breaker trip.
+    //
+    // Every live waiter receives CHECKOUT_CIRCUIT_OPEN (mapped by
+    // ProxyTransaction::OnCheckoutError to RESULT_CIRCUIT_OPEN, emitting
+    // the §12.1 circuit-open response). Cancelled waiters are dropped
+    // silently — the transaction already tore its side down via the
+    // framework abort hook. Does NOT set shutting_down_ (this is a
+    // transient drain, not a shutdown); the partition keeps its
+    // connections for HALF_OPEN probing when the open window elapses.
+    //
+    // Dispatcher-thread-only. The breaker's transition callback fires
+    // on the slice's owning dispatcher thread — the SAME dispatcher
+    // that owns this partition (one slice ↔ one partition by
+    // dispatcher_index). No enqueue needed.
+    //
+    // Rationale: without this drain, a queued waiter admitted by
+    // ConsultBreaker just before the trip would wait out the full
+    // `open_duration_ms` (up to 60s by default) before the pool's
+    // queue timeout rejects it. That's a visible latency spike for
+    // clients who are about to be served 503 anyway.
+    void DrainWaitQueueOnTrip();
+
     bool IsShuttingDown() const { return shutting_down_; }
 
     // Stats (dispatcher-thread-only reads)
diff --git a/include/upstream/proxy_transaction.h b/include/upstream/proxy_transaction.h
index 6e25c689..ccda6d24 100644
--- a/include/upstream/proxy_transaction.h
+++ b/include/upstream/proxy_transaction.h
@@ -6,6 +6,7 @@
 #include "upstream/header_rewriter.h"
 #include "upstream/retry_policy.h"
 #include "config/server_config.h"        // ProxyConfig (stored by value)
+#include "circuit_breaker/retry_budget.h" // RetryBudget::InFlightGuard (member-by-value)
 #include "http/http_callbacks.h"
 #include "http/http_response.h"
 // <string>, <map>, <unordered_map>, <memory>, <functional>, <chrono> provided by common.h
@@ -15,16 +16,28 @@ class UpstreamManager;
 class ConnectionHandler;
 class Dispatcher;
 
+namespace circuit_breaker {
+class CircuitBreakerSlice;
+}  // RetryBudget already defined via retry_budget.h
+
 class ProxyTransaction : public std::enable_shared_from_this<ProxyTransaction> {
 public:
     // Result codes for internal state tracking
-    static constexpr int RESULT_SUCCESS            = 0;
-    static constexpr int RESULT_CHECKOUT_FAILED    = -1;  // Upstream connect failure → 502
-    static constexpr int RESULT_SEND_FAILED        = -2;
-    static constexpr int RESULT_PARSE_ERROR        = -3;
-    static constexpr int RESULT_RESPONSE_TIMEOUT   = -4;
+    static constexpr int RESULT_SUCCESS             = 0;
+    static constexpr int RESULT_CHECKOUT_FAILED     = -1;  // Upstream connect failure → 502
+    static constexpr int RESULT_SEND_FAILED         = -2;
+    static constexpr int RESULT_PARSE_ERROR         = -3;
+    static constexpr int RESULT_RESPONSE_TIMEOUT    = -4;
     static constexpr int RESULT_UPSTREAM_DISCONNECT = -5;
-    static constexpr int RESULT_POOL_EXHAUSTED     = -6;  // Local capacity → 503
+    static constexpr int RESULT_POOL_EXHAUSTED      = -6;  // Local capacity → 503
+    // Circuit breaker rejected this attempt before it touched the upstream.
+    // Carries Retry-After + X-Circuit-Breaker headers (§12.1).
+    // Terminal — retry loop MUST NOT retry this outcome (§8).
+    static constexpr int RESULT_CIRCUIT_OPEN        = -7;
+    // Retry budget exhausted. No Retry-After; distinct header
+    // X-Retry-Budget-Exhausted so operators can tell the two 503s apart
+    // from circuit-open rejects.
+    static constexpr int RESULT_RETRY_BUDGET_EXHAUSTED = -8;
 
     // Constructor copies all needed fields from client_request (method, path,
     // query, headers, body, params, dispatcher_index, client_ip, client_tls,
@@ -145,6 +158,47 @@ class ProxyTransaction : public std::enable_shared_from_this<ProxyTransaction> {
     // Timing
     std::chrono::steady_clock::time_point start_time_;
 
+    // Circuit breaker integration — resolved once in Start() from
+    // `service_name_` + `dispatcher_index_`. Null when there's no
+    // CircuitBreakerManager attached (server has no upstreams, or the
+    // breaker is being built lazily) — the breaker is simply skipped in
+    // that case. Lifetime: the slice is owned by CircuitBreakerHost in
+    // CircuitBreakerManager on HttpServer, which outlives this transaction.
+    circuit_breaker::CircuitBreakerSlice* slice_ = nullptr;
+
+    // Per-host retry budget, resolved alongside `slice_` in Start() from
+    // the same CircuitBreakerHost. Null when there's no breaker attached
+    // for this service — in that case the transaction skips budget
+    // tracking entirely. Lifetime: the budget is owned by the host,
+    // which outlives this transaction (destruction order guaranteed by
+    // HttpServer member declaration).
+    circuit_breaker::RetryBudget* retry_budget_ = nullptr;
+
+    // Per-attempt in-flight tracker. Held for the duration of each
+    // attempt (first try and retries alike). Replaced on every
+    // AttemptCheckout — move-assignment decrements the counter for the
+    // prior attempt and increments for the new one, so a retrying
+    // transaction stays at a single in_flight unit. Default-constructed
+    // guard is empty (counter_ = nullptr): used when retry_budget_ is
+    // null or before the first ConsultBreaker admission.
+    circuit_breaker::RetryBudget::InFlightGuard inflight_guard_;
+
+    // Per-ATTEMPT admission state. Reset on each call to ConsultBreaker();
+    // paired Report*() calls thread the `generation` back so the slice
+    // can drop stale completions across state transitions (see
+    // CircuitBreakerSlice::Admission doc). generation_==0 is a sentinel
+    // for "no admission held" — slice domain gens start at 1 so a 0-gen
+    // report always drops safely.
+    uint64_t admission_generation_ = 0;
+    bool is_probe_ = false;
+
+    // Retry-budget token held by this transaction's current retry
+    // attempt (attempt_ > 0). Set true after a successful
+    // TryConsumeRetry in MaybeRetry; cleared by ReleaseRetryToken in
+    // Cleanup. Dry-run rejects proceed but the flag stays false — no
+    // token was consumed, so no ReleaseRetry is required.
+    bool retry_token_held_ = false;
+
     // Internal methods
     void AttemptCheckout();
     void OnCheckoutReady(UpstreamLease lease);
@@ -170,6 +224,60 @@ class ProxyTransaction : public std::enable_shared_from_this<ProxyTransaction> {
     void ArmResponseTimeout(int explicit_budget_ms = 0);
     void ClearResponseTimeout();
 
-    // Error response factory (maps result codes to HTTP responses)
+    // Error response factory (maps result codes to HTTP responses).
+    // Circuit-open and retry-budget responses need richer context
+    // (Retry-After from slice_, distinguishing header), so they have
+    // dedicated factories below — MakeErrorResponse falls back to a
+    // plain 503 for those codes if called generically.
     static HttpResponse MakeErrorResponse(int result_code);
+
+    // Emit the circuit-open response (design §12.1):
+    //   503 + Retry-After (seconds until slice->OpenUntil())
+    //       + X-Circuit-Breaker: open
+    //       + X-Upstream-Host: service:host:port
+    HttpResponse MakeCircuitOpenResponse() const;
+
+    // Emit the retry-budget-exhausted response (design §12.2):
+    //   503 + X-Retry-Budget-Exhausted: 1
+    static HttpResponse MakeRetryBudgetResponse();
+
+    // Breaker helpers — gate and outcome classification.
+    //
+    // ConsultBreaker: call at the top of AttemptCheckout. Populates
+    // admission_generation_ and is_probe_ on admission; delivers the
+    // circuit-open response and returns false on reject. Dry-run admits
+    // and returns true (slice already counted the would-reject).
+    // Returns true if the caller should proceed to CheckoutAsync.
+    bool ConsultBreaker();
+
+    // ReportBreakerOutcome: classify a result_code into
+    // success/failure/neutral (per design §7) and call slice->Report*
+    // with admission_generation_. Clears admission_generation_ so a
+    // double-report is impossible.
+    //
+    // failure_kind is ignored unless the outcome is a FailureKind-bearing
+    // result; the caller passes the appropriate kind for 5xx vs disconnect
+    // vs timeout since the slice treats them differently only for logs.
+    void ReportBreakerOutcome(int result_code);
+
+    // ReleaseBreakerAdmissionNeutral: release the admission slot without
+    // counting a success or failure. Used when the transaction is aborted
+    // locally (Cancel() on client disconnect, cancelled_ early-return
+    // after checkout, etc.) before an upstream health signal was observed.
+    //
+    // Without this, a HALF_OPEN probe slot is stranded if the client
+    // disconnects mid-probe — the slice stays in half_open_full until an
+    // external reset. No-op if admission_generation_ == 0. Clears
+    // admission_generation_ so a following ReportBreakerOutcome is a
+    // no-op.
+    void ReleaseBreakerAdmissionNeutral();
+
+    // Release the retry-budget token held by this attempt, if any.
+    // Idempotent via the retry_token_held_ flag — called from Cleanup
+    // between attempts (so the next retry's TryConsumeRetry sees a
+    // freshly-released counter) AND from the destructor / Cancel as
+    // safety nets. No-op when no budget was attached or no token was
+    // consumed (e.g. first attempt, or dry-run reject that didn't
+    // consume).
+    void ReleaseRetryToken();
 };
diff --git a/include/upstream/upstream_manager.h b/include/upstream/upstream_manager.h
index c308cbd3..346bc4d5 100644
--- a/include/upstream/upstream_manager.h
+++ b/include/upstream/upstream_manager.h
@@ -9,6 +9,10 @@
 
 class TlsClientContext;
 
+namespace circuit_breaker {
+class CircuitBreakerManager;
+}
+
 class UpstreamManager {
 public:
     UpstreamManager(const std::vector<UpstreamConfig>& upstreams,
@@ -59,6 +63,33 @@ class UpstreamManager {
     // Check if an upstream service is configured
     bool HasUpstream(const std::string& service_name) const;
 
+    // Look up the PoolPartition for (service_name, dispatcher_index).
+    // Returns nullptr if service is unknown or dispatcher_index is out
+    // of range. Used by the circuit-breaker transition callback (wired
+    // in HttpServer::MarkServerReady) to drain the wait queue on a
+    // CLOSED → OPEN trip. Must be called on the dispatcher thread
+    // identified by `dispatcher_index` — the returned partition's
+    // DrainWaitQueueOnTrip is dispatcher-thread-only.
+    PoolPartition* GetPoolPartition(const std::string& service_name,
+                                    size_t dispatcher_index);
+
+    // Install a non-owning pointer to the server's CircuitBreakerManager.
+    // Called once from HttpServer::MarkServerReady after both managers are
+    // constructed (§3.1). Lifetime guarantee: the CircuitBreakerManager
+    // is declared AFTER upstream_manager_ on HttpServer, so it destructs
+    // FIRST — UpstreamManager never reads through a dangling pointer on
+    // shutdown. Passing nullptr is allowed (detaches).
+    void AttachCircuitBreakerManager(circuit_breaker::CircuitBreakerManager* mgr) {
+        breaker_manager_.store(mgr, std::memory_order_release);
+    }
+
+    // Returns the attached breaker manager, or nullptr if no manager is
+    // attached. Safe from any thread (atomic load, acquire so any
+    // Attach-time publication is visible).
+    circuit_breaker::CircuitBreakerManager* GetCircuitBreakerManager() const {
+        return breaker_manager_.load(std::memory_order_acquire);
+    }
+
 private:
     // service_name → host pool. Built once at construction, never modified.
     std::unordered_map<std::string, std::unique_ptr<UpstreamHostPool>> pools_;
@@ -73,6 +104,14 @@ class UpstreamManager {
     // reject new checkouts before per-partition shutdown tasks execute.
     std::atomic<bool> shutting_down_{false};
 
+    // Non-owning pointer to the circuit-breaker manager, installed by
+    // HttpServer::MarkServerReady after both managers exist. Atomic so
+    // late-arriving hot-path reads in ProxyTransaction see either a
+    // coherent pointer or nullptr (never torn). Owned by HttpServer;
+    // lifetime outlives UpstreamManager (breaker destructs first —
+    // §3.1 ownership). Default nullptr — breaker is an opt-in layer.
+    std::atomic<circuit_breaker::CircuitBreakerManager*> breaker_manager_{nullptr};
+
     // Manager-owned atomic counter: total outstanding connections
     std::atomic<int64_t> outstanding_conns_{0};
 
diff --git a/server/circuit_breaker_host.cc b/server/circuit_breaker_host.cc
new file mode 100644
index 00000000..4523d3be
--- /dev/null
+++ b/server/circuit_breaker_host.cc
@@ -0,0 +1,142 @@
+#include "circuit_breaker/circuit_breaker_host.h"
+#include "dispatcher.h"
+#include "log/logger.h"
+
+namespace circuit_breaker {
+
+CircuitBreakerHost::CircuitBreakerHost(std::string service_name,
+                                        std::string host,
+                                        int port,
+                                        size_t partition_count,
+                                        const CircuitBreakerConfig& config)
+    : service_name_(std::move(service_name)),
+      host_(std::move(host)),
+      port_(port),
+      config_(config),
+      retry_budget_(std::make_unique<RetryBudget>(
+          config.retry_budget_percent,
+          config.retry_budget_min_concurrency)) {
+    // Clamp partition_count — a zero-partition host would be unusable
+    // (no slices to dispatch to). Tests or misuse may pass 0; log and
+    // clamp to 1 so the host is at least consistent.
+    if (partition_count == 0) {
+        logging::Get()->error(
+            "CircuitBreakerHost({}, {}:{}) constructed with 0 partitions; "
+            "clamping to 1",
+            service_name_, host_, port_);
+        partition_count = 1;
+    }
+
+    slices_.reserve(partition_count);
+    for (size_t i = 0; i < partition_count; ++i) {
+        // Per-slice label for logs — lets operators grep logs for a
+        // specific host:partition pair. Key=value form matches the
+        // format documented in circuit_breaker_slice.h:host_label_.
+        std::string label = "service=" + service_name_ +
+                            " host=" + host_ + ":" + std::to_string(port_) +
+                            " partition=" + std::to_string(i);
+        slices_.emplace_back(std::make_unique<CircuitBreakerSlice>(
+            std::move(label), i, config_));
+    }
+    logging::Get()->debug(
+        "CircuitBreakerHost created service={} host={}:{} partitions={} "
+        "enabled={} retry_budget={}%,min={}",
+        service_name_, host_, port_, partition_count,
+        config_.enabled,
+        config_.retry_budget_percent,
+        config_.retry_budget_min_concurrency);
+}
+
+CircuitBreakerSlice* CircuitBreakerHost::GetSlice(size_t dispatcher_index) {
+    if (dispatcher_index >= slices_.size()) return nullptr;
+    return slices_[dispatcher_index].get();
+}
+
+CircuitBreakerHostSnapshot CircuitBreakerHost::Snapshot() const {
+    CircuitBreakerHostSnapshot snap;
+    snap.service_name = service_name_;
+    snap.host = host_;
+    snap.port = port_;
+    snap.slices.reserve(slices_.size());
+
+    for (const auto& slice : slices_) {
+        CircuitBreakerHostSnapshot::SliceRow row;
+        row.dispatcher_index = slice->dispatcher_index();
+        row.state = slice->CurrentState();
+        row.trips = slice->Trips();
+        row.rejected = slice->Rejected();
+        row.probe_successes = slice->ProbeSuccesses();
+        row.probe_failures = slice->ProbeFailures();
+
+        snap.total_trips += row.trips;
+        snap.total_rejected += row.rejected;
+        if (row.state == State::OPEN) ++snap.open_partitions;
+        else if (row.state == State::HALF_OPEN) ++snap.half_open_partitions;
+
+        snap.slices.push_back(row);
+    }
+
+    // Retry budget aggregate (host-level, not per-partition).
+    snap.retries_in_flight = retry_budget_->RetriesInFlight();
+    snap.retries_rejected = retry_budget_->RetriesRejected();
+    snap.in_flight = retry_budget_->InFlight();
+
+    return snap;
+}
+
+void CircuitBreakerHost::Reload(
+        const std::vector<std::shared_ptr<Dispatcher>>& dispatchers,
+        const CircuitBreakerConfig& new_config) {
+    // Dispatcher list must match the slice count one-for-one — the
+    // slice at index i lives on dispatcher i. A size mismatch is a
+    // programming error (topology changed post-construction, which is
+    // restart-only); log and bail rather than mis-dispatching.
+    if (dispatchers.size() != slices_.size()) {
+        logging::Get()->error(
+            "CircuitBreakerHost::Reload({}:{}) dispatcher count mismatch: "
+            "got {}, expected {} — reload skipped",
+            service_name_, host_, dispatchers.size(), slices_.size());
+        return;
+    }
+
+    // Update host-level retry budget fields immediately — atomic stores,
+    // no dispatcher routing needed. RetryBudget::Reload clamps internally.
+    retry_budget_->Reload(new_config.retry_budget_percent,
+                          new_config.retry_budget_min_concurrency);
+
+    // Enqueue per-slice Reload on each owning dispatcher. The slice is
+    // dispatcher-thread-local for mutation, so the config swap must
+    // happen there. Passing slice as raw pointer is safe: slices_ is
+    // owned by `this` (the host), which outlives the manager's reload
+    // (enforced by CircuitBreakerManager's lifetime).
+    for (size_t i = 0; i < slices_.size(); ++i) {
+        CircuitBreakerSlice* slice = slices_[i].get();
+        auto& dispatcher = dispatchers[i];
+        if (!dispatcher) {
+            logging::Get()->error(
+                "CircuitBreakerHost::Reload({}:{}) null dispatcher at index {}",
+                service_name_, host_, i);
+            continue;
+        }
+        dispatcher->EnQueue([slice, new_config]() {
+            slice->Reload(new_config);
+        });
+    }
+
+    // Save the new config for future Snapshot() / construction-like
+    // operations. Other threads never read config_ directly.
+    config_ = new_config;
+}
+
+void CircuitBreakerHost::SetTransitionCallbackOnAllSlices(
+        StateTransitionCallback cb) {
+    for (auto& slice : slices_) {
+        // Copy the callback so each slice owns its own std::function.
+        // Passing by value into SetTransitionCallback gives each slice
+        // an independent copy, avoiding cross-partition std::function
+        // data races.
+        slice->SetTransitionCallback(cb);
+    }
+}
+
+}  // namespace circuit_breaker
diff --git a/server/circuit_breaker_manager.cc b/server/circuit_breaker_manager.cc
new file mode 100644
index 00000000..9e4934a3
--- /dev/null
+++ b/server/circuit_breaker_manager.cc
@@ -0,0 +1,124 @@
+#include "circuit_breaker/circuit_breaker_manager.h"
+#include "log/logger.h"
+#include <unordered_set>
+
+namespace circuit_breaker {
+
+CircuitBreakerManager::CircuitBreakerManager(
+        const std::vector<UpstreamConfig>& upstreams,
+        size_t partition_count,
+        std::vector<std::shared_ptr<Dispatcher>> dispatchers)
+    : dispatchers_(std::move(dispatchers)) {
+    // Invariant (production path): slices are indexed by dispatcher,
+    // so partition_count must match dispatcher count. Any divergence
+    // would cause every subsequent host->Reload() to silently skip
+    // (size-mismatch guard in CircuitBreakerHost::Reload) — fail
+    // loudly at startup instead of on reload.
+    //
+    // Exception: pure unit tests that don't exercise Reload pass an
+    // empty dispatcher list; skip the check in that case so those
+    // tests can continue to allocate slices without wiring up live
+    // dispatchers.
+    if (!dispatchers_.empty() && partition_count != dispatchers_.size()) {
+        logging::Get()->critical(
+            "CircuitBreakerManager: partition_count ({}) != dispatcher count "
+            "({}) — topology mismatch",
+            partition_count, dispatchers_.size());
+        throw std::invalid_argument(
+            "CircuitBreakerManager: partition_count must equal dispatcher count");
+    }
+
+    // Build one Host per upstream regardless of .circuit_breaker.enabled.
+    // Disabled hosts still need a live Slice so a later reload can flip
+    // them on without re-wiring transition callbacks (design §3.1).
+    hosts_.reserve(upstreams.size());
+    for (const auto& u : upstreams) {
+        if (u.name.empty()) {
+            // ConfigLoader::Validate rejects empty names upstream, but
+            // defense in depth — skip rather than insert an unreachable
+            // host with an empty key that would shadow future lookups.
+            logging::Get()->error(
+                "CircuitBreakerManager: skipping upstream with empty name");
+            continue;
+        }
+        auto [it, inserted] = hosts_.emplace(
+            u.name,
+            std::make_unique<CircuitBreakerHost>(
+                u.name, u.host, u.port, partition_count, u.circuit_breaker));
+        if (!inserted) {
+            // Duplicate service name — shouldn't happen (Validate checks
+            // uniqueness), but log so the collision is visible rather
+            // than silently dropping the second entry.
+            logging::Get()->error(
+                "CircuitBreakerManager: duplicate upstream name '{}' ignored",
+                u.name);
+        }
+    }
+    logging::Get()->info(
+        "CircuitBreakerManager initialized hosts={} partitions={}",
+        hosts_.size(), partition_count);
+}
+
+CircuitBreakerHost* CircuitBreakerManager::GetHost(
+        const std::string& service_name) {
+    auto it = hosts_.find(service_name);
+    return it == hosts_.end() ? nullptr : it->second.get();
+}
+
+const CircuitBreakerHost* CircuitBreakerManager::GetHost(
+        const std::string& service_name) const {
+    auto it = hosts_.find(service_name);
+    return it == hosts_.end() ? nullptr : it->second.get();
+}
+
+void CircuitBreakerManager::Reload(
+        const std::vector<UpstreamConfig>& new_upstreams) {
+    // Serialize with any other Reload calls. Hot path doesn't take this.
+    std::lock_guard<std::mutex> lk(reload_mtx_);
+
+    // Detect topology changes (added / removed service names) so we can
+    // log and skip — the authoritative "restart required" warning lives
+    // in HttpServer::Reload; we just honor the "existing hosts only"
+    // contract by applying breaker fields to matching names and nothing
+    // else.
+    std::unordered_set<std::string> new_names;
+    new_names.reserve(new_upstreams.size());
+    for (const auto& u : new_upstreams) new_names.insert(u.name);
+
+    for (const auto& u : new_upstreams) {
+        auto* host = GetHost(u.name);
+        if (!host) {
+            // New service name — topology change, skip. The outer
+            // reload layer warns.
+            logging::Get()->warn(
+                "CircuitBreakerManager::Reload: new upstream '{}' requires "
+                "restart (ignored)",
+                u.name);
+            continue;
+        }
+        host->Reload(dispatchers_, u.circuit_breaker);
+    }
+
+    // Log removals without touching the hosts (their removal also
+    // requires a restart).
+    for (const auto& [name, _] : hosts_) {
+        if (new_names.find(name) == new_names.end()) {
+            logging::Get()->warn(
+                "CircuitBreakerManager::Reload: removed upstream '{}' requires "
+                "restart (ignored)",
+                name);
+        }
+    }
+}
+
+std::vector<CircuitBreakerHostSnapshot>
+CircuitBreakerManager::SnapshotAll() const {
+    std::vector<CircuitBreakerHostSnapshot> snapshots;
+    snapshots.reserve(hosts_.size());
+    for (const auto& [_, host] : hosts_) {
+        snapshots.push_back(host->Snapshot());
+    }
+    return snapshots;
+}
+
+}  // namespace circuit_breaker
diff --git a/server/circuit_breaker_slice.cc b/server/circuit_breaker_slice.cc
new file mode 100644
index 00000000..e6bd1c93
--- /dev/null
+++ b/server/circuit_breaker_slice.cc
@@ -0,0 +1,675 @@
+#include "circuit_breaker/circuit_breaker_slice.h"
+#include "log/logger.h"
+
+namespace circuit_breaker {
+
+CircuitBreakerSlice::CircuitBreakerSlice(std::string host_label,
+                                         size_t dispatcher_index,
+                                         const CircuitBreakerConfig& config,
+                                         TimeSource time_source)
+    : host_label_(std::move(host_label)),
+      dispatcher_index_(dispatcher_index),
+      config_(config),
+      time_source_(std::move(time_source)),
+      window_(config.window_seconds) {
+}
+
+std::chrono::steady_clock::time_point CircuitBreakerSlice::Now() const {
+    if (time_source_) return time_source_();
+    return std::chrono::steady_clock::now();
+}
+
+std::chrono::steady_clock::time_point CircuitBreakerSlice::OpenUntil() const {
+    int64_t ns = open_until_steady_ns_.load(std::memory_order_acquire);
+    if (ns == 0) return std::chrono::steady_clock::time_point{};
+    return std::chrono::steady_clock::time_point(std::chrono::nanoseconds(ns));
+}
+
+// Cap the left-shift exponent used to compute open duration. `1 << 30` already
+// covers ~12.4 days of base open duration even before the `max_open_duration_ms`
+// clamp — higher shift amounts would invoke undefined behavior on `int`.
+static constexpr int MAX_OPEN_DURATION_SHIFT = 30;
+
+// Scale factor for integer percent math: `fails * PERCENT_SCALE >= threshold * total`.
+static constexpr int PERCENT_SCALE = 100;
+
+std::chrono::nanoseconds CircuitBreakerSlice::ComputeOpenDuration() const {
+    // Duration = base << consecutive_trips_ (shift expresses 2^n exponential).
+    // `consecutive_trips_` is the number of trips observed BEFORE this one, so
+    // the first trip uses 2^0 = 1x base, the second trip uses 2x, etc.
+    // Callers must increment consecutive_trips_ AFTER calling this method.
+    int trips = consecutive_trips_.load(std::memory_order_relaxed);
+    if (trips > MAX_OPEN_DURATION_SHIFT) trips = MAX_OPEN_DURATION_SHIFT;
+    // Clamp base/max for programmatic callers that bypass ConfigLoader::Validate
+    // (same hardening as CircuitBreakerWindow's ctor and the HALF_OPEN probe
+    // budget snapshot). Without these clamps:
+    //   - base_open_duration_ms <= 0: `base_ms << trips` is <= 0 → open_until
+    //     <= now → next TryAcquire immediately drains OPEN→HALF_OPEN,
+    //     disabling the backoff entirely.
+    //   - max_open_duration_ms < base_open_duration_ms: the overflow/clamp
+    //     branch (`scaled_ms > max_ms`) fires on every trip, pinning the
+    //     duration to a value smaller than base — same "no meaningful
+    //     backoff" effect.
+    // Clamp floors: base >= 1ms, max >= base.
+    int64_t base_ms = config_.base_open_duration_ms > 0
+                          ? config_.base_open_duration_ms : 1;
+    int64_t max_ms  = config_.max_open_duration_ms >= base_ms
+                          ? config_.max_open_duration_ms : base_ms;
+    int64_t scaled_ms = base_ms << trips;
+    if (scaled_ms < base_ms /* overflow */ || scaled_ms > max_ms) {
+        scaled_ms = max_ms;
+    }
+    return std::chrono::milliseconds(scaled_ms);
+}
+
+int64_t CircuitBreakerSlice::NextOpenDurationMs() const {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+        ComputeOpenDuration()).count();
+}
+
+bool CircuitBreakerSlice::ShouldTripClosed(
+        std::chrono::steady_clock::time_point now) {
+    if (consecutive_failures_ >= config_.consecutive_failure_threshold) {
+        return true;
+    }
+    int64_t total = window_.TotalCount(now);
+    if (total < config_.minimum_volume) return false;
+    int64_t fails = window_.FailureCount(now);
+    // Integer percent math: fails * PERCENT_SCALE >= threshold_pct * total.
+    return (fails * PERCENT_SCALE) >=
+           (static_cast<int64_t>(config_.failure_rate_threshold) * total);
+}
+
+void CircuitBreakerSlice::TripClosedToOpen(
+        const char* trigger, std::chrono::steady_clock::time_point now) {
+    // `now` is the same time_point the caller (ReportFailure) passed to
+    // AddFailure/ShouldTripClosed — reusing it keeps the trip log's
+    // window_total/window_fail_rate consistent with the rate check that
+    // fired the trip. Calling Now() fresh here would risk crossing a
+    // bucket boundary and logging window_total=0 for the very failure
+    // that tripped the breaker.
+    //
+    // Capture pre-reset observability context BEFORE mutating state.
+    // §11.1 log format asks for consecutive_failures + window_total +
+    // window_fail_rate at the trip event so operators can distinguish a
+    // "100 consecutive bad responses" trip from a "55% failure rate over
+    // a wide call window" trip — two very different operational stories
+    // that the `trigger` string alone doesn't fully capture.
+    int consec_at_trip = consecutive_failures_;
+    int64_t window_total = window_.TotalCount(now);
+    int64_t window_failures = window_.FailureCount(now);
+    int window_fail_rate_pct =
+        (window_total > 0)
+            ? static_cast<int>((window_failures * 100) / window_total)
+            : 0;
+
+    auto duration = ComputeOpenDuration();   // uses current consecutive_trips_
+    consecutive_trips_.fetch_add(1, std::memory_order_relaxed);
+    auto open_until = now + duration;
+    int64_t open_until_ns =
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+            open_until.time_since_epoch()).count();
+
+    open_until_steady_ns_.store(open_until_ns, std::memory_order_release);
+    state_.store(State::OPEN, std::memory_order_release);
+
+    // Reset on-trip bookkeeping.
+    consecutive_failures_ = 0;
+    half_open_inflight_ = 0;
+    half_open_successes_ = 0;
+    half_open_saw_failure_ = false;
+    half_open_admitted_ = 0;
+    first_reject_logged_for_open_ = false;
+    // Bump closed_gen_: non-probe admissions from the closing CLOSED cycle
+    // are now stale. Late Report(false, ...) calls for those requests drop.
+    // halfopen_gen_ is NOT bumped — OPEN holds no HALF_OPEN admissions.
+    ++closed_gen_;
+
+    trips_.fetch_add(1, std::memory_order_relaxed);
+
+    logging::Get()->warn(
+        "circuit breaker tripped {} trigger={} consecutive_failures={} "
+        "window_total={} window_fail_rate={} open_for_ms={} consecutive_trips={}",
+        host_label_, trigger, consec_at_trip,
+        window_total, window_fail_rate_pct,
+        std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(),
+        consecutive_trips_.load(std::memory_order_relaxed));
+
+    if (transition_cb_) transition_cb_(State::CLOSED, State::OPEN, trigger);
+}
+
+void CircuitBreakerSlice::TransitionOpenToHalfOpen() {
+    state_.store(State::HALF_OPEN, std::memory_order_release);
+    // Clear open_until_steady_ns_ per the OpenUntil() contract ("zero when
+    // not OPEN"). Leaving a stale deadline here would cause
+    // ProxyTransaction::MakeCircuitOpenResponse to compute a Retry-After
+    // from a past time_point (negative delta → floor at 1s, misleading for
+    // a reject in the HALF_OPEN probe-budget-full path). Retry-After for
+    // HALF_OPEN rejects is computed fresh by callers when needed.
+    open_until_steady_ns_.store(0, std::memory_order_release);
+    half_open_inflight_ = 0;
+    half_open_successes_ = 0;
+    half_open_saw_failure_ = false;
+    half_open_admitted_ = 0;
+    // Snapshot the probe budget for this cycle. A live Reload() during this
+    // HALF_OPEN episode may lower or raise config_.permitted_half_open_calls,
+    // but TryAcquire's slot gate (Case B) and ReportSuccess's close check must
+    // both operate against the budget that was in effect when probes were
+    // admitted. Without the snapshot: lowering the limit causes premature close
+    // (first success satisfies the reduced count → TransitionHalfOpenToClosed
+    // bumps halfopen_gen_ → remaining admitted probes become stale → their
+    // failures are silently dropped and the breaker falsely closes).
+    //
+    // Clamp to a minimum of 1. ConfigLoader::Validate() enforces >= 1 on the
+    // production path, but programmatic callers (tests, future direct users)
+    // that bypass validation could set permitted_half_open_calls <= 0. With
+    // snapshot=0, TryAcquire's Case B check (`inflight >= snapshot`) is
+    // immediately true for every probe → no probe ever admitted → no probe
+    // ever completes → half_open_inflight_ stays at 0 forever → slice is
+    // permanently stuck in HALF_OPEN rejecting all traffic. Matches the
+    // symmetric clamp in CircuitBreakerWindow's ctor.
+    int permitted = config_.permitted_half_open_calls;
+    half_open_permitted_snapshot_ = permitted > 0 ? permitted : 1;
+    // Reset the info-log "first reject" breadcrumb so the first rejection
+    // observed in the HALF_OPEN phase surfaces at info, not debug. HALF_OPEN
+    // rejection (recovery attempt failing or probe budget full) is
+    // operationally distinct from OPEN rejection (still backing off) and
+    // deserves its own breadcrumb in default-warn operator logs.
+    first_reject_logged_for_open_ = false;
+    // NOTE: neither closed_gen_ nor halfopen_gen_ is bumped here. No
+    // admissions are made in OPEN — the previous HALF_OPEN cycle (if any)
+    // already bumped halfopen_gen_ on its exit (TripHalfOpenToOpen) or on
+    // cycle-complete (TransitionHalfOpenToClosed), so any latent stale
+    // probes are already tagged. Bumping again would be redundant.
+
+    logging::Get()->info(
+        "circuit breaker half-open {} probes_allowed={}",
+        host_label_, half_open_permitted_snapshot_);
+
+    if (transition_cb_) {
+        transition_cb_(State::OPEN, State::HALF_OPEN, "open_elapsed");
+    }
+}
+
+void CircuitBreakerSlice::TransitionHalfOpenToClosed() {
+    // Capture actual probes-succeeded BEFORE resetting — the log then reflects
+    // reality instead of the configured target (the two are equal at the moment
+    // of transition today, but relying on that is brittle if the transition
+    // logic ever changes).
+    int probes_succeeded = half_open_successes_;
+
+    state_.store(State::CLOSED, std::memory_order_release);
+    open_until_steady_ns_.store(0, std::memory_order_release);
+    consecutive_trips_.store(0, std::memory_order_relaxed);
+    consecutive_failures_ = 0;
+    window_.Reset();
+    half_open_inflight_ = 0;
+    half_open_successes_ = 0;
+    half_open_saw_failure_ = false;
+    half_open_admitted_ = 0;
+    first_reject_logged_for_open_ = false;
+    // Bump halfopen_gen_: the just-completed HALF_OPEN cycle's probe
+    // admissions are now stale. closed_gen_ is NOT bumped — pre-trip
+    // CLOSED admissions were already invalidated by TripClosedToOpen
+    // when we left CLOSED.
+    ++halfopen_gen_;
+
+    logging::Get()->info(
+        "circuit breaker closed {} probes_succeeded={}",
+        host_label_, probes_succeeded);
+
+    if (transition_cb_) {
+        transition_cb_(State::HALF_OPEN, State::CLOSED, "probe_success");
+    }
+}
+
+void CircuitBreakerSlice::TripHalfOpenToOpen(const char* trigger) {
+    auto duration = ComputeOpenDuration();   // uses current consecutive_trips_
+    consecutive_trips_.fetch_add(1, std::memory_order_relaxed);
+    auto now = Now();
+    auto open_until = now + duration;
+    int64_t open_until_ns =
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+            open_until.time_since_epoch()).count();
+
+    open_until_steady_ns_.store(open_until_ns, std::memory_order_release);
+    state_.store(State::OPEN, std::memory_order_release);
+
+    half_open_inflight_ = 0;
+    half_open_successes_ = 0;
+    half_open_saw_failure_ = false;
+    half_open_admitted_ = 0;
+    first_reject_logged_for_open_ = false;
+    // Bump halfopen_gen_: probe admissions from the closing HALF_OPEN
+    // cycle are now stale. closed_gen_ is NOT bumped — no CLOSED
+    // admissions are outstanding (we came from HALF_OPEN, not CLOSED).
+    ++halfopen_gen_;
+
+    trips_.fetch_add(1, std::memory_order_relaxed);
+
+    logging::Get()->warn(
+        "circuit breaker re-tripped {} trigger={} open_for_ms={} consecutive_trips={}",
+        host_label_, trigger,
+        std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(),
+        consecutive_trips_.load(std::memory_order_relaxed));
+
+    if (transition_cb_) transition_cb_(State::HALF_OPEN, State::OPEN, trigger);
+}
+
+CircuitBreakerSlice::Admission CircuitBreakerSlice::TryAcquire() {
+    // Disabled fast path — zero overhead when config.enabled=false.
+    // Use generation 0 (sentinel) since the slice won't consult it on report.
+    if (!config_.enabled) {
+        return Admission{Decision::ADMITTED, /*generation=*/0};
+    }
+
+    State s = state_.load(std::memory_order_acquire);
+
+    if (s == State::OPEN) {
+        // Check whether the open window has elapsed.
+        int64_t open_until_ns =
+            open_until_steady_ns_.load(std::memory_order_acquire);
+        int64_t now_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                             Now().time_since_epoch()).count();
+        if (now_ns >= open_until_ns) {
+            // Transition OPEN → HALF_OPEN on this thread. Because slices are
+            // dispatcher-thread-pinned, no CAS is needed (a plain store is
+            // safe under the single-writer invariant).
+            TransitionOpenToHalfOpen();
+            s = State::HALF_OPEN;
+        } else {
+            // Rejected admissions get generation 0 — callers must not call
+            // Report* for a rejected admission, and 0 always compares stale
+            // (domain gens start at 1), so an accidental Report would drop
+            // safely rather than mutating state.
+            return Admission{RejectWithLog("open", /*half_open_full=*/false),
+                             /*generation=*/0};
+        }
+    }
+
+    if (s == State::HALF_OPEN) {
+        // Case A: a sibling probe already failed. Short-circuit remaining
+        // admissions — the breaker is guaranteed to re-trip once in-flight
+        // probes drain. This is operationally DIFFERENT from "budget
+        // exhausted" (case B): probe slots may still be free, we just know
+        // using them can't change the outcome. Track it with its own log
+        // label and do NOT bump `rejected_half_open_full_` — that counter
+        // is specifically "probing, no capacity left" for dashboards.
+        if (half_open_saw_failure_) {
+            return Admission{RejectWithLog("half_open_recovery_failing",
+                                           /*half_open_full=*/false),
+                             /*generation=*/0};
+        }
+        // Case B: probe budget exhausted for this cycle. "No capacity" — bump
+        // the dedicated counter so dashboards can tell this apart from
+        // saw_failure rejects.
+        //
+        // Gate on `half_open_admitted_` (total cycle admissions, never
+        // decrements), NOT on `half_open_inflight_`. Inflight drops when a
+        // probe completes, so gating on it would reuse the freed slot and let
+        // the cycle admit more than `snapshot` total probes. Consequences of
+        // that bug: the close check `successes >= snapshot` could fire before
+        // ALL admitted probes have reported (the reused-slot probe is still
+        // in flight); TransitionHalfOpenToClosed would bump halfopen_gen_;
+        // the late probe's failure would drop as stale — falsely marking an
+        // unhealthy host recovered.
+        //
+        // Use the cycle snapshot so a live Reload() that lowers
+        // permitted_half_open_calls mid-cycle doesn't change how many probes
+        // were promised to this cycle.
+        if (half_open_admitted_ >= half_open_permitted_snapshot_) {
+            return Admission{RejectWithLog("half_open_full",
+                                           /*half_open_full=*/true),
+                             /*generation=*/0};
+        }
+        half_open_admitted_++;
+        half_open_inflight_++;
+        // Probe admission — stamp with halfopen_gen_.
+        return Admission{Decision::ADMITTED_PROBE, halfopen_gen_};
+    }
+
+    // CLOSED: fast path — stamp with closed_gen_.
+    return Admission{Decision::ADMITTED, closed_gen_};
+}
+
+Decision CircuitBreakerSlice::RejectWithLog(const char* state_label,
+                                            bool half_open_full) {
+    rejected_.fetch_add(1, std::memory_order_relaxed);
+    if (half_open_full) {
+        rejected_half_open_full_.fetch_add(1, std::memory_order_relaxed);
+    }
+    // First reject in this OPEN/HALF_OPEN cycle is info — gives operators
+    // looking at a flurry of 503s a single high-level breadcrumb in default-
+    // warn logs without flooding them. Subsequent rejects are debug.
+    const bool first = !first_reject_logged_for_open_;
+    if (first) first_reject_logged_for_open_ = true;
+
+    if (config_.dry_run) {
+        if (first) {
+            logging::Get()->info(
+                "[dry-run] circuit breaker would reject {} state={}",
+                host_label_, state_label);
+        } else {
+            logging::Get()->debug(
+                "[dry-run] circuit breaker would reject {} state={}",
+                host_label_, state_label);
+        }
+        return Decision::REJECTED_OPEN_DRYRUN;
+    }
+    if (first) {
+        logging::Get()->info(
+            "circuit breaker rejecting {} state={} (first reject this cycle)",
+            host_label_, state_label);
+    } else {
+        logging::Get()->debug(
+            "circuit breaker rejected {} state={}", host_label_, state_label);
+    }
+    return Decision::REJECTED_OPEN;
+}
+
+void CircuitBreakerSlice::ReportSuccess(bool probe,
+                                        uint64_t admission_generation) {
+    if (!config_.enabled) return;
+
+    if (probe) {
+        // Record the completed-probe outcome for observability regardless of
+        // current state — this is a signal about upstream behavior, not a
+        // signal about our state machine.
+        probe_successes_.fetch_add(1, std::memory_order_relaxed);
+
+        // Generation guard: drop reports for probes admitted before the
+        // current HALF_OPEN cycle. Probes use halfopen_gen_ exclusively —
+        // so a window_seconds reload (bumps closed_gen_, NOT halfopen_gen_)
+        // does NOT invalidate in-flight probes, which would otherwise
+        // strand half_open_inflight_ at its pre-reload value and wedge the
+        // slice in HALF_OPEN/half_open_full.
+        if (admission_generation != halfopen_gen_) {
+            reports_stale_generation_.fetch_add(1, std::memory_order_relaxed);
+            return;
+        }
+
+        // Stale probe defense: we admitted this probe in HALF_OPEN, but the
+        // slice may have transitioned out (e.g., `Reload()` flipped enabled,
+        // `TransitionHalfOpenToClosed` already fired on sibling probes, or —
+        // operator toggle transitioned us to CLOSED via Reload().
+        // Only touch HALF_OPEN bookkeeping / fire transitions when state is
+        // STILL HALF_OPEN.
+        if (state_.load(std::memory_order_acquire) != State::HALF_OPEN) return;
+
+        if (half_open_inflight_ > 0) half_open_inflight_--;
+        if (half_open_saw_failure_) {
+            // A sibling probe already failed; whichever probe finishes last
+            // transitions to OPEN. Handle here only if this is the last probe.
+            if (half_open_inflight_ == 0) {
+                TripHalfOpenToOpen("probe_fail");
+            }
+            return;
+        }
+        half_open_successes_++;
+        // Use the cycle snapshot so a mid-cycle Reload() that lowers the
+        // limit doesn't close the breaker early (before all admitted probes
+        // have reported back), silently dropping the remaining probes' failures.
+        if (half_open_successes_ >= half_open_permitted_snapshot_) {
+            TransitionHalfOpenToClosed();
+        }
+        return;
+    }
+
+    // Non-probe success path — checked against closed_gen_.
+    if (admission_generation != closed_gen_) {
+        reports_stale_generation_.fetch_add(1, std::memory_order_relaxed);
+        return;
+    }
+    // Only meaningful when state is CLOSED. If the slice has since
+    // transitioned (e.g., other requests in this burst tripped it), this
+    // late outcome must NOT retroactively reset `consecutive_failures_` or
+    // pollute the window — a fresh CLOSED cycle after recovery would start
+    // with bogus success history. (Transitions bump `closed_gen_`, so the
+    // guard above catches this too; the state check is a direct guard for
+    // observability clarity.)
+    if (state_.load(std::memory_order_acquire) != State::CLOSED) return;
+    consecutive_failures_ = 0;
+    window_.AddSuccess(Now());
+}
+
+void CircuitBreakerSlice::ReportFailure(FailureKind kind, bool probe,
+                                        uint64_t admission_generation) {
+    (void)kind;  // Kind is used by higher layers for logging; slice itself
+                 // treats all failures the same way for trip math.
+    if (!config_.enabled) return;
+
+    if (probe) {
+        probe_failures_.fetch_add(1, std::memory_order_relaxed);
+
+        // Probes use halfopen_gen_ — see matching comment in ReportSuccess.
+        if (admission_generation != halfopen_gen_) {
+            reports_stale_generation_.fetch_add(1, std::memory_order_relaxed);
+            return;
+        }
+
+        // Stale probe defense — see matching comment in ReportSuccess above.
+        if (state_.load(std::memory_order_acquire) != State::HALF_OPEN) return;
+
+        if (half_open_inflight_ > 0) half_open_inflight_--;
+        half_open_saw_failure_ = true;
+        // On the last probe (or if all remaining complete) transition OPEN.
+        if (half_open_inflight_ == 0) {
+            TripHalfOpenToOpen("probe_fail");
+        }
+        return;
+    }
+
+    // Non-probe failure path — checked against closed_gen_.
+    if (admission_generation != closed_gen_) {
+        reports_stale_generation_.fetch_add(1, std::memory_order_relaxed);
+        return;
+    }
+    // Only count when CLOSED. Late failures from requests admitted in CLOSED
+    // but completing after a trip must NOT re-enter `TripClosedToOpen` —
+    // doing so double-increments `consecutive_trips_` (inflating
+    // open_duration) and fires a spurious CLOSED→OPEN transition edge that
+    // downstream consumers (wait-queue drain, snapshot telemetry) would see
+    // as a ghost trip. (Again, the generation guard above catches this too;
+    // keep the state check for observability clarity.)
+    if (state_.load(std::memory_order_acquire) != State::CLOSED) return;
+
+    consecutive_failures_++;
+    // Capture Now() once and reuse for both the record and the trip check.
+    // Separate Now() calls can cross a second boundary, letting TotalCount's
+    // internal Advance() zero the bucket we just wrote — with window_seconds=1,
+    // a 1-second delta trips the Advance full-reset path and the just-recorded
+    // failure disappears from the ring, missing a rate trip that should fire.
+    auto now = Now();
+    window_.AddFailure(now);
+
+    if (ShouldTripClosed(now)) {
+        const char* trigger =
+            (consecutive_failures_ >= config_.consecutive_failure_threshold)
+                ? "consecutive" : "rate";
+        // Thread `now` through so the trip log's window stats reflect the
+        // same view ShouldTripClosed just used.
+        TripClosedToOpen(trigger, now);
+    }
+}
+
+void CircuitBreakerSlice::ReportNeutral(bool probe,
+                                         uint64_t admission_generation) {
+    if (!config_.enabled) return;
+    if (!probe) {
+        // CLOSED-state admission: no slot to release. The bool parameter
+        // exists for API symmetry with ReportSuccess/ReportFailure; a
+        // neutral outcome in CLOSED simply means the breaker records
+        // nothing (which matches pre-neutral behavior — POOL_EXHAUSTED,
+        // shutdown, and similar local terminations were already "ignored"
+        // on the CLOSED path).
+        return;
+    }
+
+    // Probe: gate on halfopen_gen_ + current state, matching the other
+    // Report* paths. Stale (pre-transition or pre-reload) neutral
+    // completions drop silently into the stale-generation counter.
+    if (admission_generation != halfopen_gen_) {
+        reports_stale_generation_.fetch_add(1, std::memory_order_relaxed);
+        return;
+    }
+    if (state_.load(std::memory_order_acquire) != State::HALF_OPEN) return;
+
+    // Return the slot to the cycle. Decrement BOTH inflight and admitted:
+    //   - inflight so the last-probe re-trip logic below fires correctly,
+    //   - admitted so a replacement probe can still be admitted within
+    //     this cycle's budget (the whole point of a neutral release —
+    //     the upstream wasn't actually exercised by this admission).
+    if (half_open_inflight_ > 0) half_open_inflight_--;
+    if (half_open_admitted_ > 0) half_open_admitted_--;
+
+    // If an earlier sibling probe failed and this neutral release drains
+    // the last in-flight probe, the cycle must re-trip — otherwise the
+    // slice would wedge in HALF_OPEN with saw_failure=true, rejecting all
+    // future admissions via Case A forever. Mirrors the failure-path
+    // last-probe trigger.
+    if (half_open_saw_failure_ && half_open_inflight_ == 0) {
+        TripHalfOpenToOpen("probe_fail");
+    }
+}
+
+void CircuitBreakerSlice::Reload(const CircuitBreakerConfig& new_config) {
+    const bool enabled_changed = (config_.enabled != new_config.enabled);
+    const bool window_changed =
+        (config_.window_seconds != new_config.window_seconds);
+    // Snapshot the OLD dry_run before config_ is overwritten — used at
+    // the end of Reload to detect a true→false flip and signal the
+    // host to drain any waiters that accumulated during shadow mode.
+    const bool old_dry_run = config_.dry_run;
+
+    config_ = new_config;
+    if (window_changed) {
+        // Resize wipes the failure-rate ring buckets. Without bumping
+        // closed_gen_ here, late completions from pre-reload CLOSED
+        // admissions would pass the generation guard and repopulate the
+        // freshly empty window — mixing pre-reload and post-reload traffic
+        // in the rate-trip calc.
+        //
+        // CRUCIALLY: we bump ONLY closed_gen_, NOT halfopen_gen_.
+        // window_seconds affects only the CLOSED rate window. Bumping
+        // halfopen_gen_ too (as prior fix did) would invalidate in-flight
+        // probes, whose late reports could no longer decrement
+        // half_open_inflight_ or honor saw_failure/TripHalfOpenToOpen —
+        // wedging the slice in HALF_OPEN/half_open_full with full probe
+        // slots until another reset. Probe bookkeeping is untouched by
+        // Resize, so preserving halfopen_gen_ keeps probes live.
+        //
+        // Skip when enabled_changed is also true: the full-reset branch
+        // below bumps both generations as part of its larger reset.
+        window_.Resize(new_config.window_seconds);
+        if (!enabled_changed) {
+            // Reset consecutive_failures_ alongside the window wipe.
+            // Both are CLOSED-domain state from the same observation cycle.
+            // Bumping closed_gen_ drops all pre-reload CLOSED reports
+            // (correct — they must not seed the fresh window). But if
+            // consecutive_failures_ is NOT also reset, those dropped reports
+            // can no longer clear or advance the counter either, so the
+            // leftover count becomes an orphaned value that mis-fires future
+            // trip evaluations (spurious trip: pre-reload success was going
+            // to clear the counter but got dropped, so the next real failure
+            // crosses the threshold using a stale count).
+            consecutive_failures_ = 0;
+            ++closed_gen_;
+        }
+    }
+
+    if (enabled_changed) {
+        // Toggling `enabled` is an operator intent to start fresh, not a
+        // runtime state transition. Without this reset:
+        //   - Disabling while OPEN and re-enabling later would resume the
+        //     OPEN state and reject requests even though the operator
+        //     explicitly turned the breaker off and back on.
+        //   - Disabling while HALF_OPEN with in-flight probes would leave
+        //     inconsistent bookkeeping (inflight > 0, state=HALF_OPEN) that
+        //     a subsequent enable would interpret as live probes.
+        //   - Disabling mid-CLOSED-cycle and re-enabling would trip on the
+        //     very next failure because consecutive_failures_ persisted.
+        // Matches design doc §10.1 (enabled→disabled / disabled→enabled
+        // transitions both get a clean CLOSED start).
+        //
+        // Silent reset — no transition callback. The change is operator-
+        // initiated configuration, not a runtime state signal; firing the
+        // callback would cause PoolPartition::DrainWaitQueueOnTrip-style
+        // consumers (the wait-queue drain transition callback) to spuriously
+        // drain waiters on a config edit.
+        state_.store(State::CLOSED, std::memory_order_release);
+        open_until_steady_ns_.store(0, std::memory_order_release);
+        consecutive_trips_.store(0, std::memory_order_relaxed);
+        consecutive_failures_ = 0;
+        window_.Reset();
+        half_open_inflight_ = 0;
+        half_open_successes_ = 0;
+        half_open_saw_failure_ = false;
+        first_reject_logged_for_open_ = false;
+        // Fresh generations for BOTH domains: this is a full reset.
+        // Both pre-toggle non-probe admissions (closed_gen) and in-flight
+        // probes (halfopen_gen) are invalidated — their late reports
+        // silently drop, preserving clean-restart semantics.
+        ++closed_gen_;
+        ++halfopen_gen_;
+    }
+    // When `enabled` is unchanged: live state preserved — operator expects
+    // new thresholds to apply to the next evaluation, not to reset an
+    // in-progress trip.
+
+    logging::Get()->info(
+        "circuit breaker config applied {} enabled={} window_s={} "
+        "fail_rate={} consec_threshold={}{}",
+        host_label_, new_config.enabled, new_config.window_seconds,
+        new_config.failure_rate_threshold,
+        new_config.consecutive_failure_threshold,
+        enabled_changed ? " (enabled toggled — state reset to CLOSED)" : "");
+
+    // dry_run true→false on a slice that's STILL OPEN: enforcement is
+    // back on, but the OPEN→OPEN intra-state config edit doesn't fire
+    // any natural transition callback. The pool partition may have
+    // queued waiters from the shadow-mode period (the original
+    // CLOSED→OPEN drain was skipped because dry_run was true at the
+    // time). Without flushing them now, those queued requests will
+    // eventually dispatch to the unhealthy upstream once a pool slot
+    // frees, defeating the just-re-enabled enforcement.
+    //
+    // Signal the host via a synthetic OPEN→OPEN transition callback
+    // with trigger="dry_run_disabled". The HttpServer-installed
+    // callback recognizes this special trigger and drains the
+    // partition queue. Real state transitions never reuse the same
+    // old/new state with this trigger string, so there's no overlap.
+    //
+    // IMPORTANT — why this does NOT fire in HALF_OPEN: HALF_OPEN
+    // queues can mix two admission kinds that share a partition wait
+    // slot but differ on slice bookkeeping:
+    //   (a) Valid probes admitted within permitted_half_open_calls —
+    //       admission_generation_ = current halfopen_gen_, holding a
+    //       real half_open_inflight_/admitted_ slot. These drive
+    //       recovery on a healthy upstream and must NOT be disrupted
+    //       by an operator config flip.
+    //   (b) Dry-run-admitted shadow requests (half_open_full /
+    //       half_open_recovery_failing paths) — admission_generation_
+    //       = 0 (RejectWithLog sentinel). Their outcomes drop as
+    //       stale-gen on report, so they never influence the slice's
+    //       state machine and are bounded by pool queue size.
+    // DrainWaitQueueOnTrip is partition-wide and can't tell (a) from
+    // (b); draining would 503 valid probes (delaying/preventing
+    // recovery) to also drop the harmless (b). We accept the small
+    // bounded leak of (b) as the lesser evil.
+    //
+    // State is dispatcher-thread-only here; a plain load is sufficient.
+    if (old_dry_run && !new_config.dry_run &&
+        state_.load(std::memory_order_acquire) == State::OPEN &&
+        transition_cb_) {
+        logging::Get()->info(
+            "circuit breaker dry_run disabled while OPEN {} — "
+            "flushing wait queue", host_label_);
+        transition_cb_(State::OPEN, State::OPEN, "dry_run_disabled");
+    }
+}
+
+void CircuitBreakerSlice::SetTransitionCallback(StateTransitionCallback cb) {
+    transition_cb_ = std::move(cb);
+}
+
+}  // namespace circuit_breaker
diff --git a/server/circuit_breaker_window.cc b/server/circuit_breaker_window.cc
new file mode 100644
index 00000000..776c00ec
--- /dev/null
+++ b/server/circuit_breaker_window.cc
@@ -0,0 +1,99 @@
+#include "circuit_breaker/circuit_breaker_window.h"
+
+namespace circuit_breaker {
+
+// Map an epoch-second value into a non-negative bucket index. C++ built-in `%`
+// can return a negative result when the dividend is negative — and while
+// `steady_clock::time_since_epoch()` is zero-based on all mainstream
+// libstdc++/libc++ implementations, the standard does not strictly guarantee a
+// non-negative epoch across every implementation. The extra `+ w` and second
+// `% w` costs a single add + mod on the slow (negative) branch, zero observable
+// overhead on the common positive branch after the compiler eliminates the
+// redundant math.
+static inline size_t BucketIndex(int64_t epoch_sec, int window_seconds) {
+    const int64_t w = window_seconds;
+    return static_cast<size_t>(((epoch_sec % w) + w) % w);
+}
+
+CircuitBreakerWindow::CircuitBreakerWindow(int window_seconds)
+    // Clamp to a minimum of 1 bucket. ConfigLoader::Validate() rejects
+    // window_seconds <= 0 on the production path, but the constructor is a
+    // public API and programmatic callers (tests, future direct users) may
+    // bypass that validation. Without the clamp, BucketIndex() does `% 0` on
+    // the first Add/TotalCount and crashes; negative values violate the ring
+    // math. Matches Resize()'s clamp so the two entry points are symmetric.
+    : window_seconds_(window_seconds > 0 ? window_seconds : 1),
+      buckets_(static_cast<size_t>(window_seconds_)) {
+}
+
+int64_t CircuitBreakerWindow::ToEpochSec(
+    std::chrono::steady_clock::time_point now) {
+    return std::chrono::duration_cast<std::chrono::seconds>(
+               now.time_since_epoch()).count();
+}
+
+void CircuitBreakerWindow::Advance(int64_t now_sec) {
+    if (head_epoch_sec_ < 0) {
+        head_epoch_sec_ = now_sec;
+        return;
+    }
+    if (now_sec <= head_epoch_sec_) return;
+    int64_t delta = now_sec - head_epoch_sec_;
+    // If delta exceeds window size, everything is stale — full reset.
+    if (delta >= window_seconds_) {
+        for (auto& b : buckets_) { b.total = 0; b.failures = 0; }
+    } else {
+        // Zero buckets from head+1..now_sec inclusive.
+        for (int64_t s = head_epoch_sec_ + 1; s <= now_sec; ++s) {
+            size_t idx = BucketIndex(s, window_seconds_);
+            buckets_[idx].total = 0;
+            buckets_[idx].failures = 0;
+        }
+    }
+    head_epoch_sec_ = now_sec;
+}
+
+void CircuitBreakerWindow::AddSuccess(
+    std::chrono::steady_clock::time_point now) {
+    int64_t now_sec = ToEpochSec(now);
+    Advance(now_sec);
+    buckets_[BucketIndex(now_sec, window_seconds_)].total++;
+}
+
+void CircuitBreakerWindow::AddFailure(
+    std::chrono::steady_clock::time_point now) {
+    int64_t now_sec = ToEpochSec(now);
+    Advance(now_sec);
+    size_t idx = BucketIndex(now_sec, window_seconds_);
+    buckets_[idx].total++;
+    buckets_[idx].failures++;
+}
+
+int64_t CircuitBreakerWindow::TotalCount(
+    std::chrono::steady_clock::time_point now) {
+    Advance(ToEpochSec(now));
+    int64_t sum = 0;
+    for (const auto& b : buckets_) sum += b.total;
+    return sum;
+}
+
+int64_t CircuitBreakerWindow::FailureCount(
+    std::chrono::steady_clock::time_point now) {
+    Advance(ToEpochSec(now));
+    int64_t sum = 0;
+    for (const auto& b : buckets_) sum += b.failures;
+    return sum;
+}
+
+void CircuitBreakerWindow::Reset() {
+    for (auto& b : buckets_) { b.total = 0; b.failures = 0; }
+    head_epoch_sec_ = -1;
+}
+
+void CircuitBreakerWindow::Resize(int new_window_seconds) {
+    window_seconds_ = new_window_seconds > 0 ? new_window_seconds : 1;
+    buckets_.assign(static_cast<size_t>(window_seconds_), Bucket{});
+    head_epoch_sec_ = -1;
+}
+
+}  // namespace circuit_breaker
diff --git a/server/config_loader.cc b/server/config_loader.cc
index 80e9312f..38fb2fb4 100644
--- a/server/config_loader.cc
+++ b/server/config_loader.cc
@@ -267,6 +267,60 @@ ServerConfig ConfigLoader::LoadFromString(const std::string& json_str) {
                 }
             }
 
+            if (item.contains("circuit_breaker")) {
+                if (!item["circuit_breaker"].is_object())
+                    throw std::runtime_error("upstream circuit_breaker must be an object");
+                auto& cb = item["circuit_breaker"];
+                // Strict integer accessor: rejects float/bool/string inputs
+                // that nlohmann's default value<int>() would silently coerce
+                // (e.g., 1.9 → 1, true → 1). Without this, malformed configs
+                // pass Validate() and change breaker behavior in production.
+                auto cb_int = [&cb](const char* name, int default_val) -> int {
+                    if (!cb.contains(name)) return default_val;
+                    const auto& v = cb[name];
+                    if (!v.is_number_integer()) {
+                        throw std::invalid_argument(
+                            std::string("circuit_breaker.") + name +
+                            " must be an integer");
+                    }
+                    return v.get<int>();
+                };
+                auto cb_bool = [&cb](const char* name, bool default_val) -> bool {
+                    if (!cb.contains(name)) return default_val;
+                    const auto& v = cb[name];
+                    if (!v.is_boolean()) {
+                        throw std::invalid_argument(
+                            std::string("circuit_breaker.") + name +
+                            " must be a boolean");
+                    }
+                    return v.get<bool>();
+                };
+                upstream.circuit_breaker.enabled =
+                    cb_bool("enabled", false);
+                upstream.circuit_breaker.dry_run =
+                    cb_bool("dry_run", false);
+                upstream.circuit_breaker.consecutive_failure_threshold =
+                    cb_int("consecutive_failure_threshold", 5);
+                upstream.circuit_breaker.failure_rate_threshold =
+                    cb_int("failure_rate_threshold", 50);
+                upstream.circuit_breaker.minimum_volume =
+                    cb_int("minimum_volume", 20);
+                upstream.circuit_breaker.window_seconds =
+                    cb_int("window_seconds", 10);
+                upstream.circuit_breaker.permitted_half_open_calls =
+                    cb_int("permitted_half_open_calls", 5);
+                upstream.circuit_breaker.base_open_duration_ms =
+                    cb_int("base_open_duration_ms", 5000);
+                upstream.circuit_breaker.max_open_duration_ms =
+                    cb_int("max_open_duration_ms", 60000);
+                upstream.circuit_breaker.max_ejection_percent_per_host_set =
+                    cb_int("max_ejection_percent_per_host_set", 50);
+                upstream.circuit_breaker.retry_budget_percent =
+                    cb_int("retry_budget_percent", 20);
+                upstream.circuit_breaker.retry_budget_min_concurrency =
+                    cb_int("retry_budget_min_concurrency", 3);
+            }
+
             config.upstreams.push_back(std::move(upstream));
         }
     }
@@ -507,6 +561,115 @@ void ConfigLoader::ApplyEnvOverrides(ServerConfig& config) {
     if (val) config.rate_limit.status_code = EnvToInt(val, "REACTOR_RATE_LIMIT_STATUS_CODE");
 }
 
+void ConfigLoader::ValidateHotReloadable(
+        const ServerConfig& config,
+        const std::unordered_set<std::string>& live_upstream_names) {
+    // Mirrors the circuit_breaker validation block in Validate().
+    // Kept in lock-step with that block — any rule added there for a
+    // hot-reloadable field must be added here too, or the SIGHUP
+    // reload path would silently accept values the startup path
+    // rejects (which is exactly the regression this helper exists
+    // to prevent).
+
+    // Reject duplicate upstream service names BEFORE the per-upstream
+    // CB validation. Even for new/renamed entries, the file is
+    // malformed if names collide: `CircuitBreakerManager::Reload`
+    // iterates the new upstream list and applies each entry's
+    // `circuit_breaker` block to GetHost(name); duplicates would
+    // silently overwrite (last-write wins). Startup's full Validate()
+    // rejects the file outright; the hot-reload path must match.
+    // This rule runs UNCONDITIONALLY on the new config — it doesn't
+    // depend on `live_upstream_names`.
+    {
+        std::unordered_set<std::string> seen;
+        seen.reserve(config.upstreams.size());
+        for (size_t i = 0; i < config.upstreams.size(); ++i) {
+            const auto& name = config.upstreams[i].name;
+            if (!seen.insert(name).second) {
+                throw std::invalid_argument(
+                    "upstreams[" + std::to_string(i) +
+                    "] duplicate service name '" + name +
+                    "' (upstream service names must be unique)");
+            }
+        }
+    }
+
+    for (size_t i = 0; i < config.upstreams.size(); ++i) {
+        const auto& u = config.upstreams[i];
+        const std::string idx = "upstreams[" + std::to_string(i) + "]";
+
+        // CB-field validation is scoped to upstreams that are LIVE in
+        // the running server. CircuitBreakerManager::Reload only
+        // applies CB changes to pre-existing hosts — new/renamed
+        // entries are restart-only and skipped with a warn — so
+        // validating their CB blocks here would block otherwise-safe
+        // reloads (e.g. a reload that stages a new upstream alongside
+        // a log-level edit would abort even though the live server
+        // would never apply the new upstream's CB block).
+        //
+        // The empty-set case (no live upstreams yet) is handled by
+        // the same check: every entry is "new", so every entry is
+        // skipped — only the duplicate-name check runs.
+        if (live_upstream_names.find(u.name) == live_upstream_names.end()) {
+            continue;
+        }
+        const auto& cb = u.circuit_breaker;
+        if (cb.consecutive_failure_threshold < 1 ||
+            cb.consecutive_failure_threshold > 10000) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.consecutive_failure_threshold must be in [1, 10000]");
+        }
+        if (cb.failure_rate_threshold < 0 || cb.failure_rate_threshold > 100) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.failure_rate_threshold must be in [0, 100]");
+        }
+        if (cb.minimum_volume < 1 || cb.minimum_volume > 10000000) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.minimum_volume must be in [1, 10000000]");
+        }
+        if (cb.window_seconds < 1 || cb.window_seconds > 3600) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.window_seconds must be in [1, 3600]");
+        }
+        if (cb.permitted_half_open_calls < 1 ||
+            cb.permitted_half_open_calls > 1000) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.permitted_half_open_calls must be in [1, 1000]");
+        }
+        if (cb.base_open_duration_ms < 100) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.base_open_duration_ms must be >= 100");
+        }
+        if (cb.max_open_duration_ms < cb.base_open_duration_ms) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.max_open_duration_ms must be >= base_open_duration_ms");
+        }
+        if (cb.max_ejection_percent_per_host_set < 0 ||
+            cb.max_ejection_percent_per_host_set > 100) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.max_ejection_percent_per_host_set must be in [0, 100]");
+        }
+        if (cb.retry_budget_percent < 0 || cb.retry_budget_percent > 100) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.retry_budget_percent must be in [0, 100]");
+        }
+        if (cb.retry_budget_min_concurrency < 0) {
+            throw std::invalid_argument(
+                idx + " ('" + u.name +
+                "'): circuit_breaker.retry_budget_min_concurrency must be >= 0");
+        }
+    }
+}
+
 void ConfigLoader::Validate(const ServerConfig& config) {
     // Validate bind_host is a strict dotted-quad IPv4 address.
     // Use inet_pton (not inet_addr) to reject legacy shorthand forms
@@ -811,6 +974,69 @@ void ConfigLoader::Validate(const ServerConfig& config) {
                     idx + " ('" + u.name +
                     "'): proxy.retry.max_retries must be >= 0 and <= 10");
             }
+
+            // Circuit breaker validation.
+            //
+            // Upper bounds on counting fields are generous — they exist to
+            // catch pathological configs (typo like "10_000_000_000" or a
+            // missing unit conversion), not to constrain legitimate tuning.
+            // Defaults are 5 / 20 / 5; limits are 1000× to 50000× the defaults.
+            {
+                const auto& cb = u.circuit_breaker;
+                if (cb.consecutive_failure_threshold < 1 ||
+                    cb.consecutive_failure_threshold > 10000) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.consecutive_failure_threshold must be in [1, 10000]");
+                }
+                if (cb.failure_rate_threshold < 0 || cb.failure_rate_threshold > 100) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.failure_rate_threshold must be in [0, 100]");
+                }
+                if (cb.minimum_volume < 1 || cb.minimum_volume > 10000000) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.minimum_volume must be in [1, 10000000]");
+                }
+                if (cb.window_seconds < 1 || cb.window_seconds > 3600) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.window_seconds must be in [1, 3600]");
+                }
+                if (cb.permitted_half_open_calls < 1 ||
+                    cb.permitted_half_open_calls > 1000) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.permitted_half_open_calls must be in [1, 1000]");
+                }
+                if (cb.base_open_duration_ms < 100) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.base_open_duration_ms must be >= 100");
+                }
+                if (cb.max_open_duration_ms < cb.base_open_duration_ms) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.max_open_duration_ms must be >= base_open_duration_ms");
+                }
+                if (cb.max_ejection_percent_per_host_set < 0 ||
+                    cb.max_ejection_percent_per_host_set > 100) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.max_ejection_percent_per_host_set must be in [0, 100]");
+                }
+                if (cb.retry_budget_percent < 0 || cb.retry_budget_percent > 100) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.retry_budget_percent must be in [0, 100]");
+                }
+                if (cb.retry_budget_min_concurrency < 0) {
+                    throw std::invalid_argument(
+                        idx + " ('" + u.name +
+                        "'): circuit_breaker.retry_budget_min_concurrency must be >= 0");
+                }
+            }
             // Validate method names — reject unknowns and duplicates.
             // Duplicates would cause RouteAsync to throw at startup.
             {
@@ -1073,6 +1299,31 @@ std::string ConfigLoader::ToJson(const ServerConfig& config) {
 
             uj["proxy"] = pj;
         }
+        // Always serialize circuit_breaker — same rationale as proxy block.
+        if (u.circuit_breaker != CircuitBreakerConfig{}) {
+            nlohmann::json cbj;
+            cbj["enabled"] = u.circuit_breaker.enabled;
+            cbj["dry_run"] = u.circuit_breaker.dry_run;
+            cbj["consecutive_failure_threshold"] =
+                u.circuit_breaker.consecutive_failure_threshold;
+            cbj["failure_rate_threshold"] =
+                u.circuit_breaker.failure_rate_threshold;
+            cbj["minimum_volume"] = u.circuit_breaker.minimum_volume;
+            cbj["window_seconds"] = u.circuit_breaker.window_seconds;
+            cbj["permitted_half_open_calls"] =
+                u.circuit_breaker.permitted_half_open_calls;
+            cbj["base_open_duration_ms"] =
+                u.circuit_breaker.base_open_duration_ms;
+            cbj["max_open_duration_ms"] =
+                u.circuit_breaker.max_open_duration_ms;
+            cbj["max_ejection_percent_per_host_set"] =
+                u.circuit_breaker.max_ejection_percent_per_host_set;
+            cbj["retry_budget_percent"] =
+                u.circuit_breaker.retry_budget_percent;
+            cbj["retry_budget_min_concurrency"] =
+                u.circuit_breaker.retry_budget_min_concurrency;
+            uj["circuit_breaker"] = cbj;
+        }
         j["upstreams"].push_back(uj);
     }
 
diff --git a/server/http_server.cc b/server/http_server.cc
index b9edda92..67575de7 100644
--- a/server/http_server.cc
+++ b/server/http_server.cc
@@ -6,6 +6,10 @@
 #include "http2/http2_constants.h"
 #include "upstream/upstream_manager.h"
 #include "upstream/proxy_handler.h"
+#include "circuit_breaker/circuit_breaker_manager.h"
+#include "circuit_breaker/circuit_breaker_host.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
+#include "upstream/pool_partition.h"
 #include "log/logger.h"
 #include "log/log_utils.h"
 #include <algorithm>
@@ -410,6 +414,140 @@ void HttpServer::MarkServerReady() {
             throw;
         }
 
+        // Circuit breaker — built alongside the pool. One host per
+        // configured upstream (regardless of enabled), with one slice
+        // per dispatcher so hot-path TryAcquire is lock-free. Attached
+        // to UpstreamManager via a non-owning pointer so ProxyTransaction
+        // can reach it on the hot path via upstream_manager_->
+        // GetCircuitBreakerManager(). The manager is declared AFTER
+        // upstream_manager_ on HttpServer (see header) so teardown runs
+        // breaker-first, which matches the dangling-pointer safety rule
+        // in UpstreamManager::breaker_manager_.
+        try {
+            circuit_breaker_manager_ =
+                std::make_unique<circuit_breaker::CircuitBreakerManager>(
+                    upstream_configs_, dispatchers.size(), dispatchers);
+            upstream_manager_->AttachCircuitBreakerManager(
+                circuit_breaker_manager_.get());
+
+            // Wire CLOSED→OPEN transition callbacks for every slice of every
+            // host — regardless of `enabled=false`, per design §3.1 R3-1. A
+            // disabled slice never fires transitions (TryAcquire short-
+            // circuits to ADMITTED); wiring the callback costs nothing but
+            // lets a live reload flip enable=false→true without re-wiring.
+            //
+            // The callback routes trip events to the corresponding
+            // PoolPartition's DrainWaitQueueOnTrip so queued waiters fail
+            // fast with CHECKOUT_CIRCUIT_OPEN instead of waiting out the
+            // open window. Each slice gets a distinct callback that
+            // captures its (service, dispatcher_index) pair — we can't use
+            // SetTransitionCallbackOnAllSlices because that would install a
+            // single callback across slices that need different partition
+            // lookups.
+            //
+            // Safe to capture raw `UpstreamManager*`: CircuitBreakerManager
+            // destructs BEFORE UpstreamManager (§3.1 ownership), and slice
+            // callbacks only fire on dispatcher threads which are stopped
+            // before either manager is destroyed. So any live callback
+            // invocation sees a valid UpstreamManager.
+            UpstreamManager* um = upstream_manager_.get();
+            for (const auto& u : upstream_configs_) {
+                auto* host = circuit_breaker_manager_->GetHost(u.name);
+                if (!host) continue;
+                std::string service = u.name;
+                for (size_t i = 0; i < host->partition_count(); ++i) {
+                    auto* slice = host->GetSlice(i);
+                    if (!slice) continue;
+                    // Capture the slice pointer so the callback can read
+                    // the LIVE `dry_run` flag on every fire — operators
+                    // can toggle dry_run via SIGHUP, and the drain
+                    // decision must reflect the current setting, not a
+                    // snapshot from server startup. Slice lifetime is
+                    // tied to the manager (declared after upstream
+                    // manager → destructs first), so the raw pointer
+                    // outlives every possible callback invocation.
+                    auto* slice_ptr = slice;
+                    slice->SetTransitionCallback(
+                        [um, service, i, slice_ptr](
+                                circuit_breaker::State old_s,
+                                circuit_breaker::State new_s,
+                                const char* trigger) {
+                            // Three drain triggers, all entering OPEN:
+                            //   CLOSED→OPEN  : fresh trip; queued non-
+                            //     probe waiters need CHECKOUT_CIRCUIT_OPEN
+                            //     instead of waiting out the full open
+                            //     window.
+                            //   HALF_OPEN→OPEN : probe cycle re-tripped;
+                            //     probe admissions passed ConsultBreaker
+                            //     before CheckoutAsync, so saturated
+                            //     pools can leave them queued. Without
+                            //     draining they eventually dispatch to a
+                            //     known-bad upstream.
+                            //   OPEN→OPEN with trigger="dry_run_disabled"
+                            //     : synthetic signal from
+                            //     CircuitBreakerSlice::Reload when
+                            //     dry_run flips true→false on a slice
+                            //     that's still OPEN. The earlier trip
+                            //     skipped the drain (shadow mode); now
+                            //     enforcement is back on, queued
+                            //     waiters from that period must be
+                            //     flushed before the pool services
+                            //     them. Real transitions never use this
+                            //     trigger string with old==new==OPEN,
+                            //     so there's no overlap with normal
+                            //     state-machine signals.
+                            //     (The slice intentionally does NOT
+                            //     fire this signal in HALF_OPEN — see
+                            //     CircuitBreakerSlice::Reload for why
+                            //     valid probes must not be flushed.)
+                            const bool normal_trip =
+                                new_s == circuit_breaker::State::OPEN &&
+                                (old_s == circuit_breaker::State::CLOSED ||
+                                 old_s == circuit_breaker::State::HALF_OPEN);
+                            const bool dry_run_disable_drain =
+                                old_s == circuit_breaker::State::OPEN &&
+                                new_s == circuit_breaker::State::OPEN &&
+                                trigger != nullptr &&
+                                std::strcmp(trigger,
+                                            "dry_run_disabled") == 0;
+                            if (!normal_trip && !dry_run_disable_drain) {
+                                return;
+                            }
+                            // Dry-run shadow-mode contract: the slice
+                            // log-but-admits would-reject decisions, so
+                            // the wait-queue drain — which would
+                            // deliver hard 503s (CHECKOUT_CIRCUIT_OPEN
+                            // → RESULT_CIRCUIT_OPEN) to queued
+                            // waiters — must also be a no-op while
+                            // dry_run is true. Note: when this fires
+                            // via the dry_run_disabled trigger, the
+                            // slice's config_.dry_run was already
+                            // updated to false in Reload BEFORE the
+                            // synthetic callback, so this guard
+                            // correctly does NOT skip the drain in
+                            // that case.
+                            if (slice_ptr && slice_ptr->config().dry_run) {
+                                logging::Get()->info(
+                                    "[dry-run] circuit breaker would drain "
+                                    "wait queue on trip — skipping (shadow "
+                                    "mode) service={} partition={}",
+                                    service, i);
+                                return;
+                            }
+                            if (auto* part = um->GetPoolPartition(
+                                    service, i)) {
+                                part->DrainWaitQueueOnTrip();
+                            }
+                        });
+                }
+            }
+        } catch (...) {
+            logging::Get()->error(
+                "Circuit breaker init failed, stopping server");
+            net_server_.Stop();
+            throw;
+        }
+
         // Ensure the timer cadence is fast enough for upstream connect timeouts.
         // SetDeadline stores a ms-precision deadline, but TimerHandler only fires
         // at the timer scan interval. If connect_timeout_ms < current interval,
@@ -3451,8 +3589,16 @@ bool HttpServer::Reload(const ServerConfig& new_config) {
         //      field changes (timeouts, limits, log level).
         validation_copy.http2.enabled =
             http2_enabled_ && new_config.http2.enabled;
-        // Upstream configs are restart-only — clear them so staged edits
-        // in the config file don't block live-safe field reloads.
+        // Upstream configs are RESTART-ONLY for topology fields, but the
+        // per-upstream `circuit_breaker` block is HOT-RELOADABLE — clearing
+        // upstreams entirely from validation_copy would skip CB-field
+        // validation here. Instead: clear the topology-restart-only
+        // path (the full Validate would reject those) and run a separate
+        // ValidateHotReloadable on the original new_config so live-
+        // reloadable CB rules (range checks, duplicate names) are
+        // enforced symmetrically with the SIGHUP path in main.cc.
+        // Without this, in-process callers using HttpServer::Reload
+        // directly would bypass the gate that the CLI path enforces.
         validation_copy.upstreams.clear();
         // Rate limit config IS live-reloadable and MUST be validated.
         // Unlike upstreams (restart-only), rate_limit changes are applied
@@ -3465,6 +3611,29 @@ bool HttpServer::Reload(const ServerConfig& new_config) {
             logging::Get()->error("Reload() rejected invalid config: {}", e.what());
             return false;
         }
+        // Strict gate for hot-reloadable CB fields + duplicate names.
+        // Mirrors main.cc::ReloadConfig — both entry points must reject
+        // invalid CB tuning before it reaches live slices.
+        //
+        // CB validation is scoped to existing upstream names: only
+        // those entries get applied via CircuitBreakerManager::Reload,
+        // so validating CB blocks for new/renamed entries would
+        // block otherwise-safe reloads. `upstream_configs_` is the
+        // post-Start snapshot of running upstreams.
+        {
+            std::unordered_set<std::string> live_names;
+            live_names.reserve(upstream_configs_.size());
+            for (const auto& u : upstream_configs_) {
+                live_names.insert(u.name);
+            }
+            try {
+                ConfigLoader::ValidateHotReloadable(new_config, live_names);
+            } catch (const std::invalid_argument& e) {
+                logging::Get()->error("Reload() rejected invalid config: {}",
+                                      e.what());
+                return false;
+            }
+        }
     }
 
     // Three-phase update to prevent mid-reload connections from seeing
@@ -3645,11 +3814,51 @@ bool HttpServer::Reload(const ServerConfig& new_config) {
         rate_limit_manager_->Reload(new_config.rate_limit);
     }
 
-    // Upstream pool changes require a restart — pools are built once in Start()
-    // and cannot be rebuilt at runtime without a full drain cycle.
+    // Circuit breaker reload — live-propagates breaker-field edits on
+    // existing upstream services. CircuitBreakerManager::Reload is
+    // idempotent (atomic stores to unchanged values), so calling it
+    // unconditionally costs nothing when the operator didn't edit any
+    // breaker fields. Topology changes (added / removed service names)
+    // are logged as warn + skipped inside the manager; the outer
+    // restart-required warning still fires via the upstreams-inequality
+    // check below. After this call, update the breaker slices on every
+    // partition via per-dispatcher EnQueue — the manager handles that
+    // routing internally. The topology check itself now only diffs non-
+    // breaker fields (UpstreamConfig::operator== excludes circuit_breaker),
+    // so a CB-only SIGHUP is a clean hot reload with no spurious warn.
+    if (circuit_breaker_manager_) {
+        circuit_breaker_manager_->Reload(new_config.upstreams);
+    }
+
+    // Upstream topology changes (host/port/pool/proxy/tls) require a
+    // restart — pools are built once in Start() and cannot be rebuilt
+    // at runtime without a full drain cycle. The equality operator on
+    // UpstreamConfig deliberately excludes `circuit_breaker` so a CB-
+    // only edit doesn't trigger this warning (the reload above already
+    // applied the new breaker settings to live slices).
+    //
+    // When topology DIFFERS, we deliberately DO NOT copy the staged
+    // config into `upstream_configs_`: subsequent reloads (including
+    // the timer-cadence recomputation above) read from this vector to
+    // match live pool state. Adopting staged-but-inactive topology
+    // values would silently widen the dispatcher timer past the active
+    // pool timeouts — e.g. staging `pool.connect_timeout_ms=10000`
+    // (restart required) then reloading any unrelated field would
+    // recompute cadence from 10s while the live pool still uses 3s,
+    // firing connect-timeouts late. The CB-field portion of the edit
+    // was already applied live via `circuit_breaker_manager_->Reload`
+    // above, so the live slices carry the new tuning regardless of
+    // whether `upstream_configs_` shows it.
+    //
+    // When topology MATCHES (the common case, including CB-only
+    // edits), adopt the new snapshot as the fresh baseline so CB-
+    // field edits persist for later reload diffs.
     if (new_config.upstreams != upstream_configs_) {
-        logging::Get()->warn("Reload: upstream configuration changes require a "
-                             "restart to take effect (ignored)");
+        logging::Get()->warn("Reload: upstream topology changes require a "
+                             "restart to take effect (circuit-breaker "
+                             "field edits, if any, were applied live)");
+    } else {
+        upstream_configs_ = new_config.upstreams;
     }
 
     return true;
diff --git a/server/main.cc b/server/main.cc
index 06dd2551..e0fa7790 100644
--- a/server/main.cc
+++ b/server/main.cc
@@ -328,7 +328,41 @@ static bool ReloadConfig(const std::string& config_path,
             }
         }
     }
+    // Hot-reloadable fields (today: per-upstream `circuit_breaker.*`
+    // on existing services + duplicate-name uniqueness across the
+    // new file) are the only ones that go LIVE on a SIGHUP reload.
+    // Validate them strictly — a bad value here would be pushed into
+    // running slices and keep running until an operator-driven
+    // restart fixes the config file. Hard-reject so operators see
+    // the error immediately instead of discovering drift the next
+    // time the startup path rejects the same file.
+    //
+    // CB validation is scoped to existing upstream names —
+    // CircuitBreakerManager::Reload only applies CB changes to those.
+    // New/renamed upstreams are restart-only; their CB blocks are
+    // skipped here so an intentional placeholder doesn't block other
+    // live-safe edits in the same reload (log/rate-limit/breaker
+    // edits on existing services).
+    {
+        std::unordered_set<std::string> live_names;
+        live_names.reserve(current_config.upstreams.size());
+        for (const auto& u : current_config.upstreams) {
+            live_names.insert(u.name);
+        }
+        try {
+            ConfigLoader::ValidateHotReloadable(new_config, live_names);
+        } catch (const std::invalid_argument& e) {
+            logging::Get()->error("Config reload rejected: {}", e.what());
+            reopen_existing_logs();
+            return false;
+        }
+    }
+
     // Warn about restart-required field issues (not applied during reload).
+    // Full Validate() includes both hot-reloadable rules (already checked
+    // above) and restart-only rules; by the time we reach this point the
+    // hot-reloadable subset is known valid, so any exception thrown here
+    // is from restart-only rules and is legitimately a warn, not an error.
     try {
         ConfigLoader::Validate(new_config);
     } catch (const std::invalid_argument& e) {
@@ -427,6 +461,19 @@ static bool ReloadConfig(const std::string& config_path,
     auto saved_tls = current_config.tls;
     auto saved_workers = current_config.worker_threads;
     auto saved_h2_enabled = current_config.http2.enabled;
+    // Preserve upstreams for the same reason: HttpServer::Reload treats
+    // the whole upstream block as restart-required (see http_server.cc
+    // upstream_configs_ comparison), and that internal copy never changes
+    // post-startup. If we overwrote current_config.upstreams here, a
+    // breaker-only edit would stage into current_config while the live
+    // server keeps running the startup values — /stats and other
+    // current_config consumers would report phantom state, and subsequent
+    // identical reloads could produce inconsistent diagnostics. Pin to
+    // the running values until CircuitBreakerManager::Reload implements
+    // CircuitBreakerManager::Reload (the only upstream sub-field that
+    // becomes hot-reloadable); at that point this save becomes a
+    // partial-field save excluding circuit_breaker.
+    auto saved_upstreams = current_config.upstreams;
 
     current_config = new_config;
 
@@ -435,6 +482,7 @@ static bool ReloadConfig(const std::string& config_path,
     current_config.tls = saved_tls;
     current_config.worker_threads = saved_workers;
     current_config.http2.enabled = saved_h2_enabled;
+    current_config.upstreams = std::move(saved_upstreams);
 
     // Commit file-backed state only after full success — a failed reload
     // must not flip this flag or future reloads lose the defaults+env fallback.
diff --git a/server/pool_partition.cc b/server/pool_partition.cc
index 819c941d..a0ba866c 100644
--- a/server/pool_partition.cc
+++ b/server/pool_partition.cc
@@ -549,6 +549,41 @@ void PoolPartition::InitiateShutdown() {
     MaybeSignalDrain();
 }
 
+void PoolPartition::DrainWaitQueueOnTrip() {
+    // Hoist alive_ — a waiter's error_callback may synchronously trigger
+    // a request completion path that tears down the partition (e.g. the
+    // test harness). Same pattern used by InitiateShutdown.
+    auto alive = alive_;
+
+    if (shutting_down_) {
+        // Already draining via InitiateShutdown — that path will send
+        // CHECKOUT_SHUTTING_DOWN to every waiter. Don't double-fire.
+        return;
+    }
+
+    if (wait_queue_.empty()) return;
+
+    logging::Get()->info(
+        "PoolPartition draining wait queue on breaker trip: {}:{} "
+        "queue_size={}",
+        upstream_host_, upstream_port_, wait_queue_.size());
+
+    while (!wait_queue_.empty()) {
+        auto entry = std::move(wait_queue_.front());
+        wait_queue_.pop_front();
+        // Cancelled waiters have no callback to fire — the transaction
+        // already tore its side down via the framework abort hook.
+        if (IsEntryCancelled(entry)) {
+            continue;
+        }
+        // CHECKOUT_CIRCUIT_OPEN — ProxyTransaction::OnCheckoutError maps
+        // to RESULT_CIRCUIT_OPEN and delivers MakeCircuitOpenResponse()
+        // without touching the breaker (our own reject, don't feed back).
+        entry.error_callback(CHECKOUT_CIRCUIT_OPEN);
+        if (!alive->load(std::memory_order_acquire)) return;
+    }
+}
+
 void PoolPartition::ForceCloseActive() {
     // Collect transports + borrower callbacks, then move to zombie, then
     // close transports, then notify borrowers. This ordering ensures:
diff --git a/server/proxy_transaction.cc b/server/proxy_transaction.cc
index 18aa6193..d3e8bd82 100644
--- a/server/proxy_transaction.cc
+++ b/server/proxy_transaction.cc
@@ -2,6 +2,9 @@
 #include "upstream/upstream_manager.h"
 #include "upstream/upstream_connection.h"
 #include "upstream/http_request_serializer.h"
+#include "circuit_breaker/circuit_breaker_manager.h"
+#include "circuit_breaker/circuit_breaker_host.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
 #include "connection_handler.h"
 #include "dispatcher.h"
 // config/server_config.h provided by proxy_transaction.h (ProxyConfig stored by value)
@@ -110,12 +113,109 @@ void ProxyTransaction::Start() {
                           upstream_host_, upstream_port_,
                           method_, upstream_path);
 
+    // Resolve the circuit-breaker slice once. Null when no breaker is
+    // attached (server has no upstreams configured), or when the
+    // service/dispatcher pair is out of
+    // range. In any null case the breaker is simply bypassed — the
+    // transaction proceeds as if circuit breaking were disabled.
+    if (upstream_manager_ && dispatcher_index_ >= 0) {
+        auto* cbm = upstream_manager_->GetCircuitBreakerManager();
+        if (cbm) {
+            auto* host = cbm->GetHost(service_name_);
+            if (host) {
+                slice_ = host->GetSlice(static_cast<size_t>(dispatcher_index_));
+                // Cache the retry-budget pointer unconditionally when
+                // the host exists — usage at each attempt is gated by
+                // the live `slice_->config().enabled` flag so that
+                // SIGHUP toggles take effect on the next retry within
+                // a running transaction. Resolution-time gating would
+                // miss the flip in either direction.
+                retry_budget_ = host->GetRetryBudget();
+            }
+        }
+    }
+
     AttemptCheckout();
 }
 
 void ProxyTransaction::AttemptCheckout() {
     state_ = State::CHECKOUT_PENDING;
 
+    // Circuit breaker gate — consulted before every attempt (first try and
+    // retries both). Each attempt gets a fresh admission stamped with the
+    // slice's current generation. If the slice rejects with REJECTED_OPEN,
+    // ConsultBreaker delivers the §12.1 response and returns false; the
+    // retry loop treats RESULT_CIRCUIT_OPEN as terminal (§8) so a rejected
+    // retry produces a single 503 to the client, not a nested retry.
+    // Dry-run reject logs inside TryAcquire and returns ADMITTED through
+    // the decision enum (REJECTED_OPEN_DRYRUN), so ConsultBreaker proceeds.
+    if (!ConsultBreaker()) {
+        return;
+    }
+
+    // Retry-budget gate for retry attempts (attempt_ > 0). Gating here
+    // rather than in MaybeRetry means a delayed retry holds no token
+    // during its backoff sleep — the budget's `retries_in_flight`
+    // reflects only retries that are actually about to reach (or are
+    // reaching) the upstream, matching the "aggregate upstream load"
+    // semantics of the %-of-in-flight cap.
+    //
+    // Live-check `slice_->config().enabled` at each attempt — the
+    // cached `retry_budget_` pointer is resolved once in Start(), but
+    // the `enabled` flag is the documented live master switch. A
+    // SIGHUP flipping enabled=true→false mid-flight must stop
+    // enforcing the budget on subsequent retries; enabled=false→true
+    // mid-flight must start. Gating at the pointer level would miss
+    // both directions.
+    //
+    // The `!retry_token_held_` guard is defensive — Cleanup() between
+    // retry attempts always releases the prior token.
+    bool breaker_live_enabled = slice_ && slice_->config().enabled;
+    if (retry_budget_ && breaker_live_enabled &&
+        attempt_ > 0 && !retry_token_held_) {
+        bool is_dry_run = slice_->config().dry_run;
+        if (retry_budget_->TryConsumeRetry()) {
+            retry_token_held_ = true;
+        } else if (is_dry_run) {
+            logging::Get()->info(
+                "ProxyTransaction retry budget would-reject (dry-run) "
+                "client_fd={} service={} attempt={}",
+                client_fd_, service_name_, attempt_);
+        } else {
+            logging::Get()->warn(
+                "retry budget exhausted service={} in_flight={} "
+                "retries_in_flight={} cap={} client_fd={} attempt={}",
+                service_name_,
+                retry_budget_->InFlight(),
+                retry_budget_->RetriesInFlight(),
+                retry_budget_->ComputeCap(),
+                client_fd_, attempt_);
+            // CRITICAL: release the slice admission before bailing.
+            // ConsultBreaker() already admitted this attempt — in
+            // HALF_OPEN that means a probe slot was reserved
+            // (half_open_inflight_ / half_open_admitted_ both
+            // incremented). Returning here without releasing would
+            // strand that slot forever, wedging the slice in
+            // half_open_full until an operator-driven reload/reset.
+            // Neutral release decrements both counters for probes;
+            // no-op for non-probe (CLOSED) admissions, matching the
+            // general "local cause, no upstream signal" semantic.
+            ReleaseBreakerAdmissionNeutral();
+            state_ = State::FAILED;
+            DeliverResponse(MakeRetryBudgetResponse());
+            return;
+        }
+    }
+
+    // Track this attempt against the host-level retry budget's
+    // in_flight counter. Gated by the live `enabled` flag so disabling
+    // the breaker mid-flight stops tracking immediately; enabling it
+    // starts tracking at the next attempt. No-op when retry_budget_
+    // is null (no breaker manager / unknown host).
+    if (retry_budget_ && breaker_live_enabled) {
+        inflight_guard_ = retry_budget_->TrackInFlight();
+    }
+
     auto self = shared_from_this();
 
     // Lazily allocate the shared cancel token so the pool can drop
@@ -149,6 +249,11 @@ void ProxyTransaction::OnCheckoutReady(UpstreamLease lease) {
         // returns to the pool for another request to use, instead of
         // sitting idle attached to a torn-down transaction.
         lease.Release();
+        // Release the breaker admission neutrally — the upstream was
+        // never exercised, and stranding the slot would wedge a
+        // HALF_OPEN probe cycle. Cancel() may already have released;
+        // the helper is no-op in that case.
+        ReleaseBreakerAdmissionNeutral();
         return;
     }
     if (state_ != State::CHECKOUT_PENDING) {
@@ -224,21 +329,72 @@ void ProxyTransaction::OnCheckoutError(int error_code) {
     // Only retry actual network connect failures. Pool saturation
     // (POOL_EXHAUSTED, QUEUE_TIMEOUT) and shutdown should fail fast —
     // retrying under backpressure amplifies load on an already-stressed
-    // pool and stretches client latency with no benefit.
+    // pool and stretches client latency with no benefit. A breaker-drain
+    // reject (CHECKOUT_CIRCUIT_OPEN from the wait-queue drain) is also
+    // terminal: the
+    // client gets the same circuit-open response a fresh requester
+    // would, and the retry loop must not retry it.
+    //
+    // Breaker reporting: connect failures (both timeout and refused) are
+    // upstream-health signals → ReportFailure(CONNECT_FAILURE). Local
+    // capacity (POOL_EXHAUSTED, QUEUE_TIMEOUT) and shutdown are NOT
+    // reported — they don't imply upstream unhealthiness (design §7).
+    // CHECKOUT_CIRCUIT_OPEN is also not reported to the breaker (would
+    // be a feedback loop — our own reject counting against the upstream).
+    //
     // Import error codes from PoolPartition:
-    //   CHECKOUT_CONNECT_FAILED  = -2  → retryable
-    //   CHECKOUT_CONNECT_TIMEOUT = -3  → retryable
-    //   CHECKOUT_POOL_EXHAUSTED  = -1  → not retryable
-    //   CHECKOUT_QUEUE_TIMEOUT   = -5  → not retryable
-    //   CHECKOUT_SHUTTING_DOWN   = -4  → not retryable
+    //   CHECKOUT_CONNECT_FAILED  = -2  → retryable, report CONNECT_FAILURE
+    //   CHECKOUT_CONNECT_TIMEOUT = -3  → retryable, report CONNECT_FAILURE
+    //   CHECKOUT_POOL_EXHAUSTED  = -1  → not retryable, neutral-release probe
+    //   CHECKOUT_QUEUE_TIMEOUT   = -5  → not retryable, neutral-release probe
+    //   CHECKOUT_SHUTTING_DOWN   = -4  → not retryable, neutral-release probe
+    //   CHECKOUT_CIRCUIT_OPEN    = -6  → not retryable, do NOT report
     static constexpr int CONNECT_FAILED  = -2;
     static constexpr int CONNECT_TIMEOUT = -3;
+    static constexpr int CIRCUIT_OPEN    = -6;
+
+    if (error_code == CIRCUIT_OPEN) {
+        // Drain path: breaker tripped while this transaction was queued.
+        // Do NOT Report success/failure to the slice — our own reject
+        // must not feed back into the failure math. Emit the §12.1
+        // circuit-open response directly.
+        logging::Get()->info(
+            "ProxyTransaction checkout drained by circuit breaker "
+            "client_fd={} service={}",
+            client_fd_, service_name_);
+        // Neutral-release the slice admission instead of just clearing
+        // admission_generation_. Three drain paths reach here:
+        //   CLOSED→OPEN  : closed_gen_ was bumped by the trip; our
+        //                  generation is now stale → ReportNeutral
+        //                  drops as stale-gen. No state mutation. Safe.
+        //   HALF_OPEN→OPEN : halfopen_gen_ was bumped by the trip AND
+        //                  half_open_inflight_/admitted_ reset to 0 by
+        //                  TransitionOpenToHalfOpen's sibling path →
+        //                  ReportNeutral drops as stale-gen. Safe.
+        //   (Any future same-cycle drain without a generation bump):
+        //                  admission_generation_ is still current →
+        //                  ReportNeutral correctly returns the slot,
+        //                  preventing half_open_inflight_/admitted_
+        //                  from leaking and wedging the slice in
+        //                  half_open_full until the next reset.
+        // ReleaseBreakerAdmissionNeutral clears admission_generation_
+        // internally, so Cleanup/destructor won't double-report.
+        ReleaseBreakerAdmissionNeutral();
+        DeliverResponse(MakeCircuitOpenResponse());
+        return;
+    }
 
     if (error_code == CONNECT_FAILED || error_code == CONNECT_TIMEOUT) {
+        // Report connect failure to the breaker BEFORE retrying —
+        // otherwise the retry's ConsultBreaker might admit against a
+        // stale success count, delaying trip detection.
+        ReportBreakerOutcome(RESULT_CHECKOUT_FAILED);
         MaybeRetry(RetryPolicy::RetryCondition::CONNECT_FAILURE);
     } else {
         // Pool exhaustion, queue timeout, or shutdown — local capacity issue.
         // Use RESULT_POOL_EXHAUSTED → 503 (not 502 which implies upstream failure).
+        // Release the breaker slot neutrally — admission never reached upstream.
+        ReportBreakerOutcome(RESULT_POOL_EXHAUSTED);
         OnError(RESULT_POOL_EXHAUSTED,
                 "Pool checkout failed (local capacity, error=" +
                 std::to_string(error_code) + ")");
@@ -263,6 +419,13 @@ void ProxyTransaction::SendUpstreamRequest() {
         logging::Get()->warn("ProxyTransaction stale connection before send "
                              "client_fd={} service={} attempt={}",
                              client_fd_, service_name_, attempt_);
+        // Report to the breaker BEFORE retrying — MaybeRetry's
+        // AttemptCheckout will overwrite admission_generation_ on the
+        // next ConsultBreaker. Without this call, a probe in HALF_OPEN
+        // would leak its slot and the slice could stall in
+        // half_open_full; in CLOSED, the failure would be under-counted
+        // until the last retry ran through OnError.
+        ReportBreakerOutcome(RESULT_UPSTREAM_DISCONNECT);
         MaybeRetry(RetryPolicy::RetryCondition::UPSTREAM_DISCONNECT);
         return;
     }
@@ -340,6 +503,8 @@ void ProxyTransaction::OnUpstreamData(
                              "state={} attempt={}",
                              client_fd_, service_name_, upstream_fd,
                              static_cast<int>(state_), attempt_);
+        // Report BEFORE retry — see stale-connection path above for why.
+        ReportBreakerOutcome(RESULT_UPSTREAM_DISCONNECT);
         MaybeRetry(RetryPolicy::RetryCondition::UPSTREAM_DISCONNECT);
         return;
     }
@@ -517,10 +682,20 @@ void ProxyTransaction::OnResponseComplete() {
                              "service={} status={} attempt={}",
                              client_fd_, service_name_,
                              response.status_code, attempt_);
+        // Report failure BEFORE MaybeRetry — the retry's fresh
+        // ConsultBreaker must see the just-added failure in the window
+        // (and potentially reject if this was the trip-causing call).
+        // Pass a synthetic RESULT_CHECKOUT_FAILED-like signal; the
+        // classifier maps 5xx → FailureKind::RESPONSE_5XX.
+        ReportBreakerOutcome(/* sentinel */ -1000);
         MaybeRetry(RetryPolicy::RetryCondition::RESPONSE_5XX);
         return;
     }
 
+    // 2xx / 3xx / 4xx: upstream is healthy (from the breaker's
+    // perspective — 4xx is a client-side problem). Report success.
+    ReportBreakerOutcome(RESULT_SUCCESS);
+
     state_ = State::COMPLETE;
 
     auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
@@ -550,8 +725,19 @@ void ProxyTransaction::OnError(int result_code,
                          client_fd_, service_name_, result_code,
                          attempt_, duration.count(), log_message);
 
+    // Report the outcome if an admission is still held. Most error paths
+    // call ReportBreakerOutcome themselves BEFORE reaching OnError (so a
+    // retry's ConsultBreaker sees the fresh signal) — this is a safety
+    // net for error paths that skipped reporting, e.g., RESULT_SEND_FAILED
+    // and RESULT_RESPONSE_TIMEOUT from the on-upstream-data paths.
+    // ReportBreakerOutcome is idempotent: it clears admission_generation_
+    // on the first call so a double-call drops harmlessly.
+    ReportBreakerOutcome(result_code);
+
     state_ = State::FAILED;
-    HttpResponse error_response = MakeErrorResponse(result_code);
+    HttpResponse error_response = (result_code == RESULT_CIRCUIT_OPEN)
+        ? MakeCircuitOpenResponse()
+        : MakeErrorResponse(result_code);
     DeliverResponse(std::move(error_response));
 }
 
@@ -569,7 +755,14 @@ void ProxyTransaction::MaybeRetry(RetryPolicy::RetryCondition condition) {
                              client_fd_, service_name_, attempt_,
                              static_cast<int>(condition));
 
-        // Release old lease, clear callbacks, poison if tainted
+        // Release old lease, clear callbacks, poison if tainted.
+        // Cleanup also releases any retry token held by the previous
+        // retry attempt so the next TryConsumeRetry in AttemptCheckout
+        // sees a fresh counter. The retry-budget gate itself now lives
+        // at the top of AttemptCheckout — that way a delayed retry
+        // doesn't hold a token during its backoff sleep, which would
+        // otherwise pollute the budget's retries_in_flight with
+        // queued-but-sleeping work that hasn't reached the upstream.
         Cleanup();
         codec_.Reset();
         // Re-apply request method after reset — llhttp_init() zeroes
@@ -734,6 +927,25 @@ void ProxyTransaction::Cancel() {
     if (state_ != State::INIT && state_ != State::CHECKOUT_PENDING) {
         poison_connection_ = true;
     }
+    // Release any held breaker admission neutrally. Cancel() is always
+    // a LOCAL termination — client disconnect, framework-level abort,
+    // H2 stream reset, etc. Even when we poisoned a pooled connection
+    // mid-request, counting that as an upstream-health failure would
+    // trip the breaker against a backend that may be perfectly healthy
+    // (browser cancels, user-initiated timeouts, etc. are all common
+    // causes). The reviewer guidance is explicit: client-initiated
+    // aborts must be neutral from the breaker's perspective.
+    //
+    // Trade-off: in HALF_OPEN, ReportNeutral on a probe decrements
+    // both inflight and admitted, so a cancelled probe makes the slot
+    // eligible for a replacement admission in the same cycle. That is
+    // the documented design contract of ReportNeutral ("the upstream
+    // wasn't actually exercised by this admission" from the breaker's
+    // decision-math point of view — we didn't observe a success or
+    // failure), and it is acceptable: probes that genuinely succeed
+    // or fail still close / re-trip the cycle normally, and a broken
+    // upstream under cancel-spam will still fail those real probes.
+    ReleaseBreakerAdmissionNeutral();
     // Release the upstream lease back to the pool (or destroy it if
     // poisoned) and clear transport callbacks so any in-flight upstream
     // bytes land harmlessly.
@@ -741,6 +953,22 @@ void ProxyTransaction::Cancel() {
 }
 
 void ProxyTransaction::Cleanup() {
+    // Release any retry-budget token held by the attempt that just
+    // ended. Must happen BEFORE the next TryConsumeRetry in MaybeRetry
+    // so the new attempt sees accurate retries_in_flight. Idempotent
+    // via the retry_token_held_ flag.
+    ReleaseRetryToken();
+
+    // Release the in-flight guard from the just-ended attempt. If
+    // MaybeRetry schedules a delayed backoff, the gap between Cleanup
+    // and the eventual AttemptCheckout (which would move-assign a
+    // fresh guard) holds the old slot in `retry_budget_->in_flight_`
+    // for the entire backoff sleep. That inflates the effective
+    // denominator of the percent-cap formula, weakening the budget
+    // exactly during retry storms. Move-assign from a default
+    // (empty) guard decrements the old counter immediately.
+    inflight_guard_ = circuit_breaker::RetryBudget::InFlightGuard{};
+
     if (lease_) {
         auto* conn = lease_.Get();
         if (conn) {
@@ -851,6 +1079,13 @@ void ProxyTransaction::ArmResponseTimeout(int explicit_budget_ms) {
         if (self->state_ == State::SENDING_REQUEST ||
             self->state_ == State::AWAITING_RESPONSE ||
             self->state_ == State::RECEIVING_BODY) {
+            // Report BEFORE retry — MaybeRetry's AttemptCheckout will
+            // overwrite admission_generation_ on the next
+            // ConsultBreaker, stranding the current attempt's
+            // admission (probe slot leaks in HALF_OPEN; CLOSED
+            // under-counts the failure until the last retry hits
+            // OnError).
+            self->ReportBreakerOutcome(RESULT_RESPONSE_TIMEOUT);
             self->MaybeRetry(RetryPolicy::RetryCondition::RESPONSE_TIMEOUT);
         } else {
             self->OnError(RESULT_RESPONSE_TIMEOUT, "Response timeout");
@@ -886,6 +1121,32 @@ HttpResponse ProxyTransaction::MakeErrorResponse(int result_code) {
     if (result_code == RESULT_POOL_EXHAUSTED) {
         return HttpResponse::ServiceUnavailable();
     }
+    if (result_code == RESULT_RETRY_BUDGET_EXHAUSTED) {
+        return MakeRetryBudgetResponse();
+    }
+    if (result_code == RESULT_CIRCUIT_OPEN) {
+        // The static factory has no `this`, so it cannot build the
+        // fully §12.1-compliant response (Retry-After derived from
+        // slice state, X-Upstream-Host). All in-class paths for
+        // CIRCUIT_OPEN use the non-static MakeCircuitOpenResponse()
+        // — reaching this branch means a future caller forgot that
+        // rule. Log loudly so the mistake shows up in logs instead
+        // of producing a stealth regression against the contract.
+        //
+        // Still emit `X-Circuit-Breaker: open` + `Connection: close`
+        // so the response remains self-identifying as a circuit-open
+        // reject. Clients inspecting that header will correctly back
+        // off via their own client-side logic rather than treating
+        // this as an anonymous 503.
+        logging::Get()->error(
+            "ProxyTransaction::MakeErrorResponse(RESULT_CIRCUIT_OPEN) "
+            "invoked from static context — use MakeCircuitOpenResponse() "
+            "to emit §12.1-compliant headers");
+        HttpResponse resp = HttpResponse::ServiceUnavailable();
+        resp.Header("X-Circuit-Breaker", "open");
+        resp.Header("Connection", "close");
+        return resp;
+    }
     if (result_code == RESULT_CHECKOUT_FAILED ||
         result_code == RESULT_SEND_FAILED ||
         result_code == RESULT_PARSE_ERROR ||
@@ -894,3 +1155,225 @@ HttpResponse ProxyTransaction::MakeErrorResponse(int result_code) {
     }
     return HttpResponse::InternalError();
 }
+
+HttpResponse ProxyTransaction::MakeCircuitOpenResponse() const {
+    // TryAcquire() returns REJECTED_OPEN for three distinct situations:
+    //   * True OPEN: slice is in OPEN state, IsOpenDeadlineSet() is true,
+    //     Retry-After reflects remaining backoff from OpenUntil().
+    //   * HALF_OPEN reject (half_open_full or half_open_recovery_failing):
+    //     slice transitioned HALF_OPEN via TransitionOpenToHalfOpen, which
+    //     clears open_until. IsOpenDeadlineSet() is false. These rejects
+    //     wait on the in-flight probe cycle completing (success → CLOSED,
+    //     failure → re-trip with fresh backoff). Retry-After = 1 in this
+    //     branch would under-report the likely wait on a re-trip; ceil to
+    //     base_open_duration_ms as a conservative hint (the worst case is
+    //     re-trip + fresh backoff window).
+    // Emit a distinct X-Circuit-Breaker label for observability so
+    // operators can separate "true OPEN" from "HALF_OPEN recovery back-
+    // pressure" on dashboards.
+    int retry_after_secs = 1;
+    const char* breaker_label = "open";
+    // Absolute sanity ceiling — independent of config. Protects against
+    // ridiculous programmatic values that might slip past validation.
+    static constexpr int RETRY_AFTER_ABS_MAX_SECS = 3600;  // 1 hour
+    if (slice_) {
+        if (slice_->IsOpenDeadlineSet()) {
+            // True OPEN — Retry-After from the actual stored deadline.
+            // The deadline is authoritative: it's what the slice will
+            // actually honor, regardless of any subsequent config
+            // reload that might lower max_open_duration_ms. Clamping
+            // below the stored deadline would tell well-behaved clients
+            // to retry early and bounce on more 503s until the original
+            // deadline elapses.
+            auto open_until = slice_->OpenUntil();
+            auto now = std::chrono::steady_clock::now();
+            auto ms_remaining = std::chrono::duration_cast<std::chrono::milliseconds>(
+                open_until - now).count();
+            // Ceiling-round to seconds so we never advertise a window
+            // shorter than the actual remaining backoff.
+            int64_t diff = (ms_remaining + 999) / 1000;
+            if (diff < 1) diff = 1;
+            if (diff > RETRY_AFTER_ABS_MAX_SECS) diff = RETRY_AFTER_ABS_MAX_SECS;
+            retry_after_secs = static_cast<int>(diff);
+            breaker_label = "open";
+        } else if (slice_->CurrentState() ==
+                   circuit_breaker::State::HALF_OPEN) {
+            // HALF_OPEN reject — no deadline to read. Hint with the
+            // NEXT expected open duration (base << consecutive_trips_,
+            // clamped by max_open_duration_ms) rather than base alone:
+            // after multiple trips, exponential backoff has already
+            // grown the OPEN window, and advertising bare base would
+            // tell clients to retry far earlier than the breaker will
+            // admit even in the worst case (probe cycle fails, slice
+            // re-trips into the larger backoff).
+            int64_t next_ms = slice_->NextOpenDurationMs();
+            int hint = static_cast<int>(
+                std::max<int64_t>(1, (next_ms + 999) / 1000));
+            retry_after_secs = std::min(hint, RETRY_AFTER_ABS_MAX_SECS);
+            breaker_label = "half_open";
+        }
+        // Any other state (CLOSED): shouldn't reach here — ConsultBreaker
+        // only calls this on REJECTED_OPEN. Fall through with the
+        // conservative defaults (Retry-After=1, label="open") so a
+        // regression can't silently emit Retry-After=0.
+    }
+
+    HttpResponse resp;
+    resp.Status(HttpStatus::SERVICE_UNAVAILABLE);
+    resp.Text("Upstream circuit breaker is open; please retry later.\n");
+    resp.Header("Retry-After", std::to_string(retry_after_secs));
+    resp.Header("X-Circuit-Breaker", breaker_label);
+    // Hint operators (not clients) at which upstream tripped. Useful
+    // when a gateway fronts multiple backends; without this header, a
+    // 503 is opaque.
+    resp.Header("X-Upstream-Host",
+                   upstream_host_ + ":" + std::to_string(upstream_port_));
+    resp.Header("Connection", "close");
+    return resp;
+}
+
+HttpResponse ProxyTransaction::MakeRetryBudgetResponse() {
+    HttpResponse resp;
+    resp.Status(HttpStatus::SERVICE_UNAVAILABLE);
+    resp.Text("Upstream retry budget exhausted.\n");
+    resp.Header("X-Retry-Budget-Exhausted", "1");
+    resp.Header("Connection", "close");
+    return resp;
+}
+
+bool ProxyTransaction::ConsultBreaker() {
+    if (!slice_) {
+        // No breaker attached for this service. Proceed as if the
+        // breaker layer didn't exist. admission_generation_ stays 0 so
+        // any accidental ReportBreakerOutcome call is a no-op.
+        is_probe_ = false;
+        admission_generation_ = 0;
+        return true;
+    }
+    auto admission = slice_->TryAcquire();
+
+    // Stash the admission metadata for the paired Report*() call. Note
+    // we record this EVEN for REJECTED_OPEN (where generation_==0 is a
+    // sentinel) — it's harmless and keeps the branches simpler.
+    admission_generation_ = admission.generation;
+    is_probe_ = (admission.decision ==
+                 circuit_breaker::Decision::ADMITTED_PROBE);
+
+    if (admission.decision == circuit_breaker::Decision::REJECTED_OPEN) {
+        // Hard reject — slice counted it, logged it, and we must not
+        // touch the upstream. Emit §12.1 response and DO NOT Report
+        // back (would create a feedback loop — our own reject counting
+        // as a failure against the already-OPEN slice).
+        state_ = State::FAILED;
+        logging::Get()->info(
+            "ProxyTransaction circuit-open reject client_fd={} service={} "
+            "attempt={}",
+            client_fd_, service_name_, attempt_);
+        DeliverResponse(MakeCircuitOpenResponse());
+        // Clear admission_generation_ — there's nothing to Report.
+        admission_generation_ = 0;
+        return false;
+    }
+
+    // REJECTED_OPEN_DRYRUN: slice logged the would-reject and counted
+    // it; caller proceeds to the upstream. Fall through as admitted.
+    // ADMITTED / ADMITTED_PROBE: proceed.
+    return true;
+}
+
+void ProxyTransaction::ReleaseRetryToken() {
+    if (retry_token_held_ && retry_budget_) {
+        retry_budget_->ReleaseRetry();
+    }
+    retry_token_held_ = false;
+}
+
+void ProxyTransaction::ReleaseBreakerAdmissionNeutral() {
+    if (!slice_ || admission_generation_ == 0) return;
+
+    uint64_t gen = admission_generation_;
+    admission_generation_ = 0;
+    bool probe = is_probe_;
+    is_probe_ = false;
+
+    // Neutral release — no upstream health signal. Decrements the
+    // per-partition inflight (CLOSED) or the HALF_OPEN probe admitted
+    // counter, so a cancelled probe doesn't wedge the slice in
+    // half_open_full.
+    slice_->ReportNeutral(probe, gen);
+}
+
+void ProxyTransaction::ReportBreakerOutcome(int result_code) {
+    // No slice, or already reported: bail. admission_generation_==0 is
+    // the sentinel — slice domain generations start at 1, so a 0 gen
+    // would be rejected as stale anyway; the early return just avoids
+    // an unnecessary atomic load. The Report* methods themselves are
+    // idempotent against stale gens, but we also must not increment a
+    // probe_*/rejected_ counter for a non-event.
+    if (!slice_ || admission_generation_ == 0) return;
+
+    // Capture + clear in one go so concurrent / re-entrant calls bail.
+    uint64_t gen = admission_generation_;
+    admission_generation_ = 0;
+    bool probe = is_probe_;
+    is_probe_ = false;
+
+    using circuit_breaker::FailureKind;
+
+    // Synthetic sentinel for the OnResponseComplete 5xx path — maps to
+    // RESPONSE_5XX without needing a new public result code. Callers
+    // other than OnResponseComplete never use this value.
+    static constexpr int SENTINEL_5XX = -1000;
+
+    switch (result_code) {
+        case RESULT_SUCCESS:
+            slice_->ReportSuccess(probe, gen);
+            return;
+
+        case SENTINEL_5XX:
+            slice_->ReportFailure(FailureKind::RESPONSE_5XX, probe, gen);
+            return;
+
+        case RESULT_CHECKOUT_FAILED:
+            slice_->ReportFailure(FailureKind::CONNECT_FAILURE, probe, gen);
+            return;
+
+        case RESULT_RESPONSE_TIMEOUT:
+            slice_->ReportFailure(FailureKind::RESPONSE_TIMEOUT, probe, gen);
+            return;
+
+        case RESULT_UPSTREAM_DISCONNECT:
+        case RESULT_SEND_FAILED:
+            slice_->ReportFailure(FailureKind::UPSTREAM_DISCONNECT, probe, gen);
+            return;
+
+        case RESULT_POOL_EXHAUSTED:
+        case RESULT_PARSE_ERROR:
+            // Local outcomes — no upstream health signal. Release the
+            // admission slot neutrally so a probe doesn't leak the
+            // HALF_OPEN slot.
+            slice_->ReportNeutral(probe, gen);
+            return;
+
+        case RESULT_CIRCUIT_OPEN:
+        case RESULT_RETRY_BUDGET_EXHAUSTED:
+            // Our own rejects — MUST NOT feed back into the slice.
+            // These paths should not reach ReportBreakerOutcome (both
+            // clear admission_generation_ before delivering), but the
+            // defensive branch keeps the class-wide invariant: these
+            // outcomes are invisible to the breaker.
+            return;
+
+        default:
+            // Unknown result code — log and neutral-release to keep the
+            // probe bookkeeping consistent. A runtime log here is
+            // cheaper than a slice stuck in HALF_OPEN forever because a
+            // new result code slipped through unclassified.
+            logging::Get()->error(
+                "ReportBreakerOutcome: unclassified result_code={} "
+                "service={} — releasing neutrally",
+                result_code, service_name_);
+            slice_->ReportNeutral(probe, gen);
+            return;
+    }
+}
diff --git a/server/retry_budget.cc b/server/retry_budget.cc
new file mode 100644
index 00000000..9723d949
--- /dev/null
+++ b/server/retry_budget.cc
@@ -0,0 +1,97 @@
+#include "circuit_breaker/retry_budget.h"
+
+namespace circuit_breaker {
+
+namespace {
+
+// Clamp floors for direct-ctor / Reload callers that bypass
+// ConfigLoader::Validate(). Mirrors the hardening elsewhere in the
+// circuit-breaker code (window ctor, probe budget snapshot,
+// ComputeOpenDuration) so programmatic callers can't disable the
+// budget by passing pathological values.
+//   percent < 0          → 0 (pure min_concurrency floor, no %-based cap)
+//   percent > 100        → 100 (retries capped at total in_flight)
+//   min_concurrency < 0  → 0 (no floor)
+int ClampPercent(int p) {
+    if (p < 0) return 0;
+    if (p > 100) return 100;
+    return p;
+}
+int ClampMinConcurrency(int m) {
+    return m < 0 ? 0 : m;
+}
+
+}  // namespace
+
+RetryBudget::RetryBudget(int percent, int min_concurrency)
+    : percent_(ClampPercent(percent)),
+      min_concurrency_(ClampMinConcurrency(min_concurrency)) {}
+
+RetryBudget::InFlightGuard RetryBudget::TrackInFlight() {
+    in_flight_.fetch_add(1, std::memory_order_relaxed);
+    return InFlightGuard(&in_flight_);
+}
+
+bool RetryBudget::TryConsumeRetry() {
+    // Snapshot tuning + both in-flight counters once so the cap is
+    // computed against a consistent slice. Retrying the cap math inside
+    // the CAS loop would just churn without improving accuracy
+    // (in_flight is inherently a moving target).
+    int64_t in_flight = in_flight_.load(std::memory_order_relaxed);
+    int64_t retries_in_flight = retries_in_flight_.load(std::memory_order_relaxed);
+    int pct = percent_.load(std::memory_order_relaxed);
+    int min_conc = min_concurrency_.load(std::memory_order_relaxed);
+
+    // cap = max(min_concurrency, (in_flight - retries_in_flight) * percent / 100)
+    //
+    // Subtracting retries from the in_flight base prevents the budget
+    // from self-inflating: callers hold TrackInFlight() for BOTH first-
+    // attempts and retries (per the documented API), so admitting a
+    // retry increases in_flight_. Using the raw in_flight as the base
+    // would then increase the cap, which in steady state converges
+    // above the configured percentage of ORIGINAL traffic (e.g. a 20%
+    // budget with retries counted in would allow ~25% of originals to
+    // retry simultaneously; at higher percents the amplification grows
+    // faster).
+    //
+    // Floor the subtraction at 0: `retries_in_flight > in_flight` is
+    // transiently possible under racing increments (retry admitted and
+    // in_flight guard observed before first-attempt guard's pair) —
+    // clamp rather than letting the multiply go negative.
+    int64_t non_retry_in_flight = in_flight - retries_in_flight;
+    if (non_retry_in_flight < 0) non_retry_in_flight = 0;
+    int64_t pct_cap = (non_retry_in_flight * pct) / 100;
+    int64_t cap = pct_cap > min_conc ? pct_cap : min_conc;
+
+    // Atomically reserve a slot: load current, verify under cap, CAS up
+    // by 1. Separate load + fetch_add would let N concurrent callers
+    // all observe current < cap and all increment past the cap — under
+    // the cross-dispatcher load the retry budget is meant to protect
+    // against, the gate would stop bounding anything.
+    int64_t current = retries_in_flight;
+    while (current < cap) {
+        if (retries_in_flight_.compare_exchange_weak(
+                current, current + 1,
+                std::memory_order_acq_rel,
+                std::memory_order_relaxed)) {
+            return true;
+        }
+        // CAS failure — `current` was updated with the latest value;
+        // loop re-evaluates against cap. Spurious wakeups on weak CAS
+        // are also handled by the retry.
+    }
+    retries_rejected_.fetch_add(1, std::memory_order_relaxed);
+    return false;
+}
+
+void RetryBudget::ReleaseRetry() {
+    retries_in_flight_.fetch_sub(1, std::memory_order_relaxed);
+}
+
+void RetryBudget::Reload(int percent, int min_concurrency) {
+    percent_.store(ClampPercent(percent), std::memory_order_relaxed);
+    min_concurrency_.store(ClampMinConcurrency(min_concurrency),
+                           std::memory_order_relaxed);
+}
+
+}  // namespace circuit_breaker
diff --git a/server/upstream_manager.cc b/server/upstream_manager.cc
index 9cd5a284..c4a4314f 100644
--- a/server/upstream_manager.cc
+++ b/server/upstream_manager.cc
@@ -296,3 +296,13 @@ Dispatcher* UpstreamManager::GetDispatcherForIndex(size_t index) const {
 bool UpstreamManager::HasUpstream(const std::string& service_name) const {
     return pools_.find(service_name) != pools_.end();
 }
+
+PoolPartition* UpstreamManager::GetPoolPartition(
+        const std::string& service_name,
+        size_t dispatcher_index) {
+    auto it = pools_.find(service_name);
+    if (it == pools_.end()) {
+        return nullptr;
+    }
+    return it->second->GetPartition(dispatcher_index);
+}
diff --git a/test/circuit_breaker_components_test.h b/test/circuit_breaker_components_test.h
new file mode 100644
index 00000000..36285b16
--- /dev/null
+++ b/test/circuit_breaker_components_test.h
@@ -0,0 +1,507 @@
+#pragma once
+
+#include "test_framework.h"
+#include "config/server_config.h"
+#include "circuit_breaker/circuit_breaker_state.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
+#include "circuit_breaker/retry_budget.h"
+#include "circuit_breaker/circuit_breaker_host.h"
+#include "circuit_breaker/circuit_breaker_manager.h"
+#include "dispatcher.h"
+
+#include <iostream>
+#include <string>
+#include <thread>
+#include <vector>
+
+// Circuit-breaker component unit tests: RetryBudget, CircuitBreakerHost,
+// CircuitBreakerManager.
+//
+// These tests exercise the standalone data structures without any
+// integration into the request path (covered by the integration suite).
+// Every test constructs the object under test in isolation — no live
+// dispatchers, no network I/O. A minimal Dispatcher is instantiated only
+// where CircuitBreakerHost::Reload needs one to enqueue per-slice Reload
+// calls.
+namespace CircuitBreakerComponentsTests {
+
+using circuit_breaker::CircuitBreakerHost;
+using circuit_breaker::CircuitBreakerHostSnapshot;
+using circuit_breaker::CircuitBreakerManager;
+using circuit_breaker::Decision;
+using circuit_breaker::FailureKind;
+using circuit_breaker::RetryBudget;
+using circuit_breaker::State;
+
+static CircuitBreakerConfig DefaultCbConfig() {
+    CircuitBreakerConfig cb;
+    cb.enabled = true;
+    cb.consecutive_failure_threshold = 5;
+    cb.failure_rate_threshold = 50;
+    cb.minimum_volume = 20;
+    cb.window_seconds = 10;
+    cb.permitted_half_open_calls = 3;
+    cb.base_open_duration_ms = 5000;
+    cb.max_open_duration_ms = 60000;
+    cb.retry_budget_percent = 20;
+    cb.retry_budget_min_concurrency = 3;
+    return cb;
+}
+
+// ============================================================================
+// RetryBudget tests
+// ============================================================================
+
+// Min-concurrency floor: with tiny in_flight, min_concurrency still permits
+// the configured floor of concurrent retries (otherwise a 20% budget allows 0
+// retries when in_flight < 5 — useless in low-volume services).
+void TestRetryBudgetMinConcurrencyFloor() {
+    std::cout << "\n[TEST] RetryBudget: min_concurrency floor permits retries..."
+              << std::endl;
+    try {
+        // percent=20, min=3. Even with 0 in_flight, 3 retries allowed.
+        RetryBudget rb(20, 3);
+
+        // Without any in_flight, min floor is what gates us.
+        bool r1 = rb.TryConsumeRetry();  // 1/3
+        bool r2 = rb.TryConsumeRetry();  // 2/3
+        bool r3 = rb.TryConsumeRetry();  // 3/3
+        bool r4 = rb.TryConsumeRetry();  // over → rejected
+
+        bool pass = r1 && r2 && r3 && !r4 &&
+                    rb.RetriesInFlight() == 3 &&
+                    rb.RetriesRejected() == 1;
+
+        rb.ReleaseRetry(); rb.ReleaseRetry(); rb.ReleaseRetry();
+        pass = pass && rb.RetriesInFlight() == 0;
+
+        TestFramework::RecordTest("RetryBudget min_concurrency floor", pass,
+            pass ? "" : "r1=" + std::to_string(r1) +
+                        " r2=" + std::to_string(r2) +
+                        " r3=" + std::to_string(r3) +
+                        " r4=" + std::to_string(r4) +
+                        " inflight=" + std::to_string(rb.RetriesInFlight()) +
+                        " rejected=" + std::to_string(rb.RetriesRejected()),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("RetryBudget min_concurrency floor", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Percent-based cap scales with in_flight.
+//   percent=20, min=0, in_flight=50 → cap = 10 retries.
+void TestRetryBudgetPercentCap() {
+    std::cout << "\n[TEST] RetryBudget: percent cap scales with in_flight..."
+              << std::endl;
+    try {
+        RetryBudget rb(20, 0);  // no min floor — pure percent
+
+        // Push in_flight to 50 via guards that we intentionally keep
+        // alive. Per the documented API, callers hold TrackInFlight()
+        // for BOTH first attempts and retries — but TryConsumeRetry
+        // subtracts retries_in_flight from the base so the budget
+        // doesn't self-inflate as retries are admitted.
+        std::vector<RetryBudget::InFlightGuard> guards;
+        for (int i = 0; i < 50; ++i) guards.push_back(rb.TrackInFlight());
+
+        // With 50 non-retry in-flight and 20% budget the first
+        // admission is against cap=10, but each admission shrinks the
+        // non-retry base by 1. The admission count converges at r
+        // where r >= floor((50-r) * 20 / 100). Solving: r = 8. The
+        // pre-fix formula (cap computed from raw in_flight) would
+        // admit 10, drifting the effective ratio above 20% of
+        // originals.
+        int admitted = 0;
+        for (int i = 0; i < 20; ++i) {
+            if (rb.TryConsumeRetry()) ++admitted;
+        }
+        bool cap_hit = admitted == 8;
+        bool rejected_count = rb.RetriesRejected() == 12;
+
+        // Release guards — in_flight drops to 0; future TryConsumeRetry with
+        // min=0 and in_flight=0 rejects everything.
+        for (auto& g : guards) (void)std::move(g);
+        guards.clear();
+        for (int i = 0; i < admitted; ++i) rb.ReleaseRetry();
+
+        bool pass = cap_hit && rejected_count && rb.InFlight() == 0 &&
+                    rb.RetriesInFlight() == 0;
+        TestFramework::RecordTest("RetryBudget percent cap", pass,
+            pass ? "" : "admitted=" + std::to_string(admitted) +
+                        " rejected=" + std::to_string(rb.RetriesRejected()) +
+                        " inflight=" + std::to_string(rb.InFlight()),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("RetryBudget percent cap", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// TrackInFlight guards must be RAII-safe: destroying the guard decrements
+// in_flight_; moving the guard transfers ownership; self-move safe.
+void TestRetryBudgetInFlightGuardRaii() {
+    std::cout << "\n[TEST] RetryBudget: InFlightGuard RAII..." << std::endl;
+    try {
+        RetryBudget rb(20, 3);
+
+        bool zero_init = rb.InFlight() == 0;
+        {
+            auto g = rb.TrackInFlight();
+            bool one_after_track = rb.InFlight() == 1;
+
+            // Move-construct: counter transfers, original is empty.
+            auto g2 = std::move(g);
+            bool still_one_after_move = rb.InFlight() == 1;
+            // g is now empty, destroying it decrements nothing.
+            (void)g;
+
+            // g2 goes out of scope next.
+            if (!zero_init || !one_after_track || !still_one_after_move) {
+                TestFramework::RecordTest("RetryBudget InFlightGuard RAII",
+                    false, "mid-test state wrong",
+                    TestFramework::TestCategory::OTHER);
+                return;
+            }
+        }
+        bool zero_after_drop = rb.InFlight() == 0;
+        TestFramework::RecordTest("RetryBudget InFlightGuard RAII",
+            zero_after_drop,
+            zero_after_drop ? "" : "in_flight not zero after guard drop",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("RetryBudget InFlightGuard RAII",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Reload updates tuning atomically without resetting in-flight counters —
+// the admission formula changes, outstanding retries keep running.
+void TestRetryBudgetReloadPreservesCounters() {
+    std::cout << "\n[TEST] RetryBudget: Reload preserves in-flight..."
+              << std::endl;
+    try {
+        RetryBudget rb(20, 3);
+        bool r1 = rb.TryConsumeRetry();  // 1/3
+
+        // Tighten tuning mid-flight.
+        rb.Reload(10, 1);
+
+        // Outstanding retry is still tracked.
+        bool inflight_preserved = rb.RetriesInFlight() == 1;
+
+        // New tuning applies — min=1, so 1/1 retry allowed max.
+        // Current retries_in_flight=1 already, next attempt rejects.
+        bool r2 = rb.TryConsumeRetry();
+
+        rb.ReleaseRetry();
+        bool cleanup_ok = rb.RetriesInFlight() == 0;
+
+        bool pass = r1 && inflight_preserved && !r2 && cleanup_ok;
+        TestFramework::RecordTest("RetryBudget Reload preserves counters", pass,
+            pass ? "" : "r1=" + std::to_string(r1) +
+                        " inflight_preserved=" + std::to_string(inflight_preserved) +
+                        " r2=" + std::to_string(r2) +
+                        " cleanup_ok=" + std::to_string(cleanup_ok),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("RetryBudget Reload preserves counters",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Clamp guards: negative percent / negative min_concurrency are clamped at
+// construction (mirrors ConfigLoader::Validate — programmatic callers that
+// bypass validation get safe defaults).
+void TestRetryBudgetClampsInvalidTuning() {
+    std::cout << "\n[TEST] RetryBudget: clamps invalid tuning..." << std::endl;
+    try {
+        RetryBudget rb(-50, -10);
+        bool clamped = rb.percent() == 0 && rb.min_concurrency() == 0;
+
+        // Over-max percent clamps to 100.
+        RetryBudget rb2(500, 5);
+        bool over_clamped = rb2.percent() == 100;
+
+        // Reload also clamps.
+        rb.Reload(-1, -1);
+        bool reload_clamped = rb.percent() == 0 && rb.min_concurrency() == 0;
+
+        bool pass = clamped && over_clamped && reload_clamped;
+        TestFramework::RecordTest("RetryBudget clamps invalid tuning", pass,
+            pass ? "" :
+            "clamped=" + std::to_string(clamped) +
+            " over_clamped=" + std::to_string(over_clamped) +
+            " reload_clamped=" + std::to_string(reload_clamped),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("RetryBudget clamps invalid tuning",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// ============================================================================
+// CircuitBreakerHost tests
+// ============================================================================
+
+// Host creates partition_count slices, GetSlice looks up by index, out-of-
+// range returns nullptr (not a crash).
+void TestHostCreatesSlicesAndGetSlice() {
+    std::cout << "\n[TEST] CircuitBreakerHost: creates slices + GetSlice..."
+              << std::endl;
+    try {
+        auto cb = DefaultCbConfig();
+        CircuitBreakerHost host("svc", "10.0.0.1", 8080, 4, cb);
+
+        bool count_ok = host.partition_count() == 4;
+        bool slice0 = host.GetSlice(0) != nullptr;
+        bool slice3 = host.GetSlice(3) != nullptr;
+        bool slice4_null = host.GetSlice(4) == nullptr;  // out of range
+        bool slice_big_null = host.GetSlice(100) == nullptr;
+
+        // Retry budget always present.
+        bool rb_present = host.GetRetryBudget() != nullptr;
+
+        // Field getters.
+        bool fields_ok = host.service_name() == "svc" &&
+                        host.host() == "10.0.0.1" &&
+                        host.port() == 8080;
+
+        bool pass = count_ok && slice0 && slice3 && slice4_null &&
+                    slice_big_null && rb_present && fields_ok;
+        TestFramework::RecordTest("CircuitBreakerHost GetSlice", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CircuitBreakerHost GetSlice", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Host Snapshot aggregates counters across slices and rolls up states.
+void TestHostSnapshotAggregates() {
+    std::cout << "\n[TEST] CircuitBreakerHost: Snapshot aggregates..."
+              << std::endl;
+    try {
+        auto cb = DefaultCbConfig();
+        cb.consecutive_failure_threshold = 2;
+        cb.failure_rate_threshold = 100;
+        cb.minimum_volume = 1000;
+        CircuitBreakerHost host("svc", "h", 80, 3, cb);
+
+        // Trip slice 0 and 2 → 2 open_partitions, 1 closed.
+        for (int p : {0, 2}) {
+            auto* s = host.GetSlice(p);
+            for (int i = 0; i < 2; ++i) {
+                auto a = s->TryAcquire();
+                s->ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+            }
+        }
+
+        auto snap = host.Snapshot();
+
+        bool rows_ok = snap.slices.size() == 3;
+        bool total_trips = snap.total_trips == 2;
+        bool open = snap.open_partitions == 2;
+        bool halfopen = snap.half_open_partitions == 0;
+        bool svc_ok = snap.service_name == "svc" &&
+                      snap.host == "h" && snap.port == 80;
+
+        bool pass = rows_ok && total_trips && open && halfopen && svc_ok;
+        TestFramework::RecordTest("CircuitBreakerHost Snapshot aggregates", pass,
+            pass ? "" :
+            "rows=" + std::to_string(snap.slices.size()) +
+            " trips=" + std::to_string(snap.total_trips) +
+            " open=" + std::to_string(snap.open_partitions),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CircuitBreakerHost Snapshot aggregates",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Host Reload with mismatched dispatcher count logs error and does nothing.
+// Uses an empty dispatcher vector — the mismatch path must NOT dereference.
+void TestHostReloadDispatcherMismatchIsSafe() {
+    std::cout << "\n[TEST] CircuitBreakerHost: Reload dispatcher mismatch..."
+              << std::endl;
+    try {
+        auto cb = DefaultCbConfig();
+        CircuitBreakerHost host("svc", "h", 80, 3, cb);
+
+        auto new_cb = cb;
+        new_cb.failure_rate_threshold = 80;
+
+        // Mismatch: 0 dispatchers vs 3 slices. Must not crash, must not
+        // apply (retry budget atomics should stay at old values).
+        std::vector<std::shared_ptr<Dispatcher>> empty;
+        host.Reload(empty, new_cb);
+
+        // Retry budget fields should be unchanged — Reload bailed early.
+        bool rb_unchanged =
+            host.GetRetryBudget()->percent() == cb.retry_budget_percent &&
+            host.GetRetryBudget()->min_concurrency() ==
+                cb.retry_budget_min_concurrency;
+
+        TestFramework::RecordTest("CircuitBreakerHost Reload mismatch is safe",
+            rb_unchanged,
+            rb_unchanged ? "" : "retry budget incorrectly updated on bail",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CircuitBreakerHost Reload mismatch is safe",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// ============================================================================
+// CircuitBreakerManager tests
+// ============================================================================
+
+// Manager builds one host per upstream (regardless of enabled). GetHost
+// returns non-null for known names and null for unknown.
+void TestManagerGetHostLookup() {
+    std::cout << "\n[TEST] CircuitBreakerManager: GetHost lookup..."
+              << std::endl;
+    try {
+        std::vector<UpstreamConfig> upstreams(2);
+        upstreams[0].name = "svc-a";
+        upstreams[0].host = "10.0.0.1";
+        upstreams[0].port = 8080;
+        upstreams[0].circuit_breaker = DefaultCbConfig();
+        upstreams[1].name = "svc-b";
+        upstreams[1].host = "10.0.0.2";
+        upstreams[1].port = 9090;
+        upstreams[1].circuit_breaker = DefaultCbConfig();
+        upstreams[1].circuit_breaker.enabled = false;  // disabled still built
+
+        CircuitBreakerManager mgr(upstreams, 4, {});
+
+        bool count_ok = mgr.host_count() == 2;
+        auto* a = mgr.GetHost("svc-a");
+        auto* b = mgr.GetHost("svc-b");
+        auto* unknown = mgr.GetHost("nope");
+
+        bool a_ok = a != nullptr && a->port() == 8080 &&
+                    a->partition_count() == 4;
+        bool b_ok = b != nullptr && b->port() == 9090 &&
+                    b->partition_count() == 4;
+        bool unknown_null = unknown == nullptr;
+
+        bool pass = count_ok && a_ok && b_ok && unknown_null;
+        TestFramework::RecordTest("CircuitBreakerManager GetHost lookup", pass,
+            pass ? "" :
+            "count_ok=" + std::to_string(count_ok) +
+            " a=" + std::to_string(a_ok) +
+            " b=" + std::to_string(b_ok) +
+            " unknown_null=" + std::to_string(unknown_null),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CircuitBreakerManager GetHost lookup",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// SnapshotAll returns one entry per host; topology-preserved Reload logs and
+// skips new/removed names without crashing.
+void TestManagerSnapshotAllAndReloadSkipsTopologyChanges() {
+    std::cout << "\n[TEST] CircuitBreakerManager: SnapshotAll + Reload skips topology..."
+              << std::endl;
+    try {
+        std::vector<UpstreamConfig> upstreams(1);
+        upstreams[0].name = "svc-a";
+        upstreams[0].host = "h";
+        upstreams[0].port = 80;
+        upstreams[0].circuit_breaker = DefaultCbConfig();
+
+        CircuitBreakerManager mgr(upstreams, 2, {});
+
+        auto snaps = mgr.SnapshotAll();
+        bool one_snapshot = snaps.size() == 1;
+        bool snap_name_ok = snaps[0].service_name == "svc-a";
+
+        // Reload with a NEW name + REMOVED existing name — both must log
+        // warn and do nothing (topology is restart-only).
+        std::vector<UpstreamConfig> new_upstreams(1);
+        new_upstreams[0].name = "svc-NEW";
+        new_upstreams[0].host = "h";
+        new_upstreams[0].port = 80;
+        new_upstreams[0].circuit_breaker = DefaultCbConfig();
+
+        mgr.Reload(new_upstreams);
+
+        // Manager must still only know about svc-a (the original).
+        bool original_preserved = mgr.GetHost("svc-a") != nullptr;
+        bool new_not_added = mgr.GetHost("svc-NEW") == nullptr;
+        bool count_stable = mgr.host_count() == 1;
+
+        bool pass = one_snapshot && snap_name_ok && original_preserved &&
+                    new_not_added && count_stable;
+        TestFramework::RecordTest(
+            "CircuitBreakerManager SnapshotAll + topology-skip", pass,
+            pass ? "" :
+            "one_snap=" + std::to_string(one_snapshot) +
+            " name_ok=" + std::to_string(snap_name_ok) +
+            " preserved=" + std::to_string(original_preserved) +
+            " new_not_added=" + std::to_string(new_not_added) +
+            " count=" + std::to_string(mgr.host_count()),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CircuitBreakerManager SnapshotAll + topology-skip",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Empty-name upstream is skipped defensively (ConfigLoader::Validate rejects
+// empty names, but manager must not blow up if something slips through).
+void TestManagerSkipsEmptyNameUpstream() {
+    std::cout << "\n[TEST] CircuitBreakerManager: skips empty-name upstream..."
+              << std::endl;
+    try {
+        std::vector<UpstreamConfig> upstreams(2);
+        upstreams[0].name = "";  // defensive — should be skipped
+        upstreams[0].host = "h";
+        upstreams[0].port = 80;
+        upstreams[0].circuit_breaker = DefaultCbConfig();
+        upstreams[1].name = "svc-b";
+        upstreams[1].host = "h";
+        upstreams[1].port = 81;
+        upstreams[1].circuit_breaker = DefaultCbConfig();
+
+        CircuitBreakerManager mgr(upstreams, 2, {});
+
+        bool pass = mgr.host_count() == 1 &&
+                    mgr.GetHost("svc-b") != nullptr &&
+                    mgr.GetHost("") == nullptr;
+        TestFramework::RecordTest(
+            "CircuitBreakerManager skips empty-name upstream", pass,
+            pass ? "" : "count=" + std::to_string(mgr.host_count()),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CircuitBreakerManager skips empty-name upstream",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Run all circuit-breaker component unit tests.
+void RunAllTests() {
+    std::cout << "\n" << std::string(60, '=') << std::endl;
+    std::cout << "CIRCUIT BREAKER - COMPONENT UNIT TESTS" << std::endl;
+    std::cout << std::string(60, '=') << std::endl;
+
+    TestRetryBudgetMinConcurrencyFloor();
+    TestRetryBudgetPercentCap();
+    TestRetryBudgetInFlightGuardRaii();
+    TestRetryBudgetReloadPreservesCounters();
+    TestRetryBudgetClampsInvalidTuning();
+
+    TestHostCreatesSlicesAndGetSlice();
+    TestHostSnapshotAggregates();
+    TestHostReloadDispatcherMismatchIsSafe();
+
+    TestManagerGetHostLookup();
+    TestManagerSnapshotAllAndReloadSkipsTopologyChanges();
+    TestManagerSkipsEmptyNameUpstream();
+}
+
+}  // namespace CircuitBreakerComponentsTests
diff --git a/test/circuit_breaker_integration_test.h b/test/circuit_breaker_integration_test.h
new file mode 100644
index 00000000..10e72e5b
--- /dev/null
+++ b/test/circuit_breaker_integration_test.h
@@ -0,0 +1,1213 @@
+#pragma once
+
+// Integration tests: circuit breaker wired into ProxyTransaction +
+// UpstreamManager + HttpServer. Exercises the full request path end-to-end.
+//
+// Strategy: use a backend that returns 5xx on every request so repeated hits
+// trip the breaker via the consecutive-failure threshold. 5xx responses are
+// the cheapest way to accumulate failures (no connect timeouts to wait for).
+// Low thresholds keep tests fast.
+
+#include "test_framework.h"
+#include "test_server_runner.h"
+#include "http_test_client.h"
+#include "http/http_server.h"
+#include "config/server_config.h"
+#include "upstream/upstream_manager.h"
+#include "circuit_breaker/circuit_breaker_manager.h"
+#include "circuit_breaker/circuit_breaker_host.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
+
+#include <thread>
+#include <chrono>
+#include <atomic>
+
+namespace CircuitBreakerIntegrationTests {
+
+using circuit_breaker::State;
+
+// Shared helper: build an upstream config that proxies /echo → backend and
+// has a breaker configured with low thresholds for fast trip.
+static UpstreamConfig MakeBreakerUpstream(const std::string& name,
+                                           const std::string& host,
+                                           int port,
+                                           bool breaker_enabled,
+                                           int consecutive_threshold = 3) {
+    UpstreamConfig u;
+    u.name = name;
+    u.host = host;
+    u.port = port;
+    u.pool.max_connections       = 8;
+    u.pool.max_idle_connections  = 4;
+    u.pool.connect_timeout_ms    = 3000;
+    u.pool.idle_timeout_sec      = 30;
+    u.pool.max_lifetime_sec      = 3600;
+    u.pool.max_requests_per_conn = 0;
+
+    // Exact-match route — simpler than prefix patterns for integration tests.
+    u.proxy.route_prefix = "/fail";
+    u.proxy.strip_prefix = false;
+    u.proxy.response_timeout_ms = 2000;
+    // No retries — keeps the test deterministic: one request = one attempt.
+    u.proxy.retry.max_retries = 0;
+
+    u.circuit_breaker.enabled = breaker_enabled;
+    u.circuit_breaker.consecutive_failure_threshold = consecutive_threshold;
+    // Disable the rate-based trip path — we drive everything through
+    // consecutive failures to keep the test count predictable.
+    u.circuit_breaker.failure_rate_threshold = 100;
+    u.circuit_breaker.minimum_volume = 10000;
+    u.circuit_breaker.window_seconds = 10;
+    u.circuit_breaker.permitted_half_open_calls = 2;
+    u.circuit_breaker.base_open_duration_ms = 500;   // short so recovery test is quick
+    u.circuit_breaker.max_open_duration_ms = 60000;
+    return u;
+}
+
+// ---------------------------------------------------------------------------
+// Test 1: Breaker trips on consecutive 5xx responses and emits circuit-open
+// headers on the rejected request.
+// ---------------------------------------------------------------------------
+void TestBreakerTripsAfterConsecutiveFailures() {
+    std::cout << "\n[TEST] CB Integration: breaker trips after consecutive 5xx..."
+              << std::endl;
+    try {
+        // Backend always returns 502 — gateway classifies the response as
+        // FailureKind::RESPONSE_5XX and reports to the breaker on every attempt.
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("upstream err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // worker_threads=1 → all TCP connections land on dispatcher 0
+        // (NetServer shards new connections by fd%worker_threads), so
+        // per-request failures accumulate deterministically on slice[0]
+        // instead of splitting across multiple slices.  // single thread → single breaker partition exercised
+        gw.upstreams.push_back(
+            MakeBreakerUpstream("bad-svc", "127.0.0.1", backend_port,
+                                /*enabled=*/true, /*threshold=*/3));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Hit the failing backend threshold times — each 502 from backend
+        // propagates to the client as 502 (gateway pass-through) AND counts
+        // as a RESPONSE_5XX failure in the breaker.
+        for (int i = 0; i < 3; ++i) {
+            std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+            if (!TestHttpClient::HasStatus(r, 502)) {
+                TestFramework::RecordTest(
+                    "CB Integration: trip after consecutive failures", false,
+                    "pre-trip request " + std::to_string(i) + " expected 502, got: " +
+                    r.substr(0, 32));
+                return;
+            }
+        }
+
+        // Next request must be rejected by the breaker (not proxied). The
+        // response is 503 with X-Circuit-Breaker: open and Retry-After.
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        bool is_503 = TestHttpClient::HasStatus(r, 503);
+        bool has_breaker_header =
+            r.find("X-Circuit-Breaker: open") != std::string::npos ||
+            r.find("x-circuit-breaker: open") != std::string::npos;
+        bool has_retry_after =
+            r.find("Retry-After:") != std::string::npos ||
+            r.find("retry-after:") != std::string::npos;
+        bool has_upstream_host =
+            r.find("X-Upstream-Host:") != std::string::npos ||
+            r.find("x-upstream-host:") != std::string::npos;
+
+        bool pass = is_503 && has_breaker_header && has_retry_after &&
+                    has_upstream_host;
+        TestFramework::RecordTest(
+            "CB Integration: trip after consecutive failures", pass,
+            pass ? "" :
+            "is_503=" + std::to_string(is_503) +
+            " breaker_hdr=" + std::to_string(has_breaker_header) +
+            " retry_after=" + std::to_string(has_retry_after) +
+            " upstream_host=" + std::to_string(has_upstream_host) +
+            " body=" + r.substr(0, 256));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: trip after consecutive failures", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: When circuit_breaker.enabled=false, the breaker is bypassed entirely.
+// The same failure pattern that would trip an enabled breaker must leave the
+// pass-through path untouched — every request still reaches the backend.
+// ---------------------------------------------------------------------------
+void TestBreakerDisabledPassesThrough() {
+    std::cout << "\n[TEST] CB Integration: disabled breaker passes through..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // worker_threads=1 → all TCP connections land on dispatcher 0
+        // (NetServer shards new connections by fd%worker_threads), so
+        // per-request failures accumulate deterministically on slice[0]
+        // instead of splitting across multiple slices.
+        gw.upstreams.push_back(
+            MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                /*enabled=*/false, /*threshold=*/3));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // 10 requests — with breaker disabled, all 10 reach backend.
+        for (int i = 0; i < 10; ++i) {
+            std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+            if (!TestHttpClient::HasStatus(r, 502)) {
+                TestFramework::RecordTest(
+                    "CB Integration: disabled breaker passes through", false,
+                    "request " + std::to_string(i) + " expected 502, got: " +
+                    r.substr(0, 32));
+                return;
+            }
+        }
+
+        bool all_hit = backend_hits.load() == 10;
+        TestFramework::RecordTest(
+            "CB Integration: disabled breaker passes through", all_hit,
+            all_hit ? "" :
+            "expected 10 backend hits, got " + std::to_string(backend_hits.load()));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: disabled breaker passes through", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 3: 2xx responses are reported as success — they reset the
+// consecutive-failure counter so the breaker doesn't trip on interleaved
+// success/failure traffic.
+// ---------------------------------------------------------------------------
+void TestSuccessResetsConsecutiveFailureCounter() {
+    std::cout << "\n[TEST] CB Integration: 2xx success resets consecutive-failure counter..."
+              << std::endl;
+    try {
+        std::atomic<bool> fail_mode{true};
+        HttpServer backend("127.0.0.1", 0);
+        // Backend must serve /fail — that's the exact-match route the
+        // proxy forwards (MakeBreakerUpstream sets route_prefix="/fail",
+        // strip_prefix=false). A different backend path would leave
+        // the gateway 404-ing every request without ever exercising
+        // the proxy, and the CLOSED-state assertion below would pass
+        // for the wrong reason.
+        backend.Get("/fail", [&fail_mode](const HttpRequest&, HttpResponse& resp) {
+            if (fail_mode.load()) {
+                resp.Status(502).Body("err", "text/plain");
+            } else {
+                resp.Status(200).Body("ok", "text/plain");
+            }
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // worker_threads=1 → all TCP connections land on dispatcher 0
+        // (NetServer shards new connections by fd%worker_threads), so
+        // per-request failures accumulate deterministically on slice[0]
+        // instead of splitting across multiple slices.
+        gw.upstreams.push_back(
+            MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                /*enabled=*/true, /*threshold=*/3));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Pattern: F F S F F — 5 total: 2 fails, 1 success, 2 fails.
+        // With reset semantics, consecutive_failures_ never exceeds 2 → no trip.
+        for (int i = 0; i < 2; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);  // FAIL
+        }
+        fail_mode.store(false);
+        TestHttpClient::HttpGet(gw_port, "/fail", 3000);      // SUCCESS → reset
+        fail_mode.store(true);
+        for (int i = 0; i < 2; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);  // FAIL
+        }
+
+        // Inspect the breaker's state directly. The slice must be CLOSED
+        // AND must have observed activity — without the second check, a
+        // gateway that 404's every request (e.g. because the proxy route
+        // doesn't match) would also pass trivially.
+        auto* cbm = gateway.GetUpstreamManager() ?
+            gateway.GetUpstreamManager()->GetCircuitBreakerManager() : nullptr;
+        auto* host = cbm ? cbm->GetHost("svc") : nullptr;
+        auto* slice = host ? host->GetSlice(0) : nullptr;
+        bool still_closed = slice && slice->CurrentState() == State::CLOSED;
+        // No trip fired: total_trips should be zero for this slice.
+        int64_t trips = slice ? slice->Trips() : -1;
+        bool no_trips = (trips == 0);
+
+        bool pass = still_closed && no_trips;
+        TestFramework::RecordTest(
+            "CB Integration: success resets consecutive counter", pass,
+            pass ? "" :
+            "state=" + std::to_string(static_cast<int>(
+                slice ? slice->CurrentState() : State::CLOSED)) +
+            " trips=" + std::to_string(trips));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: success resets consecutive counter", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 4: After the trip, the live slice state is OPEN. Verifies the
+// integration actually drives the slice state machine (not just the response).
+// ---------------------------------------------------------------------------
+void TestTripDrivesSliceState() {
+    std::cout << "\n[TEST] CB Integration: trip drives slice state to OPEN..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // worker_threads=1 → all TCP connections land on dispatcher 0
+        // (NetServer shards new connections by fd%worker_threads), so
+        // per-request failures accumulate deterministically on slice[0]
+        // instead of splitting across multiple slices.
+        gw.upstreams.push_back(
+            MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                /*enabled=*/true, /*threshold=*/3));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // 3 failures → trip.
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+
+        // With worker_threads > 1 the 3 failing requests can land on either
+        // dispatcher (hash-dependent). Check the aggregate snapshot — at
+        // least one partition must be OPEN with exactly one trip recorded.
+        auto* cbm = gateway.GetUpstreamManager()->GetCircuitBreakerManager();
+        auto* host = cbm->GetHost("svc");
+        auto snap = host->Snapshot();
+        bool at_least_one_open = snap.open_partitions >= 1;
+        bool one_trip = snap.total_trips == 1;
+        // Sanity: the tripped partition should be the one that saw all 3
+        // failures (consecutive trip is single-slice, not cross-slice).
+        bool single_partition_tripped = snap.open_partitions == 1;
+
+        bool pass = at_least_one_open && one_trip && single_partition_tripped;
+        TestFramework::RecordTest(
+            "CB Integration: trip drives slice state to OPEN", pass,
+            pass ? "" :
+            "at_least_one_open=" + std::to_string(at_least_one_open) +
+            " one_trip=" + std::to_string(one_trip) +
+            " single_partition=" + std::to_string(single_partition_tripped) +
+            " (open_partitions=" + std::to_string(snap.open_partitions) +
+            ", total_trips=" + std::to_string(snap.total_trips) + ")");
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: trip drives slice state to OPEN", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 5: Breaker-rejected requests do NOT hit the backend. After the trip,
+// subsequent requests must be served locally (503) without any upstream I/O.
+// Prevents regression where the gate leaked admissions to a known-bad upstream.
+// ---------------------------------------------------------------------------
+void TestOpenBreakerShortCircuitsUpstreamCall() {
+    std::cout << "\n[TEST] CB Integration: OPEN breaker short-circuits upstream call..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // worker_threads=1 → all TCP connections land on dispatcher 0
+        // (NetServer shards new connections by fd%worker_threads), so
+        // per-request failures accumulate deterministically on slice[0]
+        // instead of splitting across multiple slices.
+        gw.upstreams.push_back(
+            MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                /*enabled=*/true, /*threshold=*/3));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // 3 failing requests to trip.
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+        int hits_at_trip = backend_hits.load();
+
+        // 5 more requests — all should be rejected locally.
+        for (int i = 0; i < 5; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+        int hits_after = backend_hits.load();
+
+        // Backend hits must not grow during the post-trip burst.
+        bool no_leak = hits_after == hits_at_trip;
+        TestFramework::RecordTest(
+            "CB Integration: OPEN short-circuits upstream call", no_leak,
+            no_leak ? "" :
+            "backend hits grew from " + std::to_string(hits_at_trip) +
+            " to " + std::to_string(hits_after) + " after trip");
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: OPEN short-circuits upstream call", false, e.what());
+    }
+}
+
+// Sanity check: verify the bare proxy setup works without the breaker
+// before blaming the breaker integration.
+void TestBareProxyWorks() {
+    std::cout << "\n[TEST] CB Integration: bare proxy (sanity)..." << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        UpstreamConfig u;
+        u.name = "svc";
+        u.host = "127.0.0.1";
+        u.port = backend_port;
+        u.pool.max_connections = 8;
+        u.pool.max_idle_connections = 4;
+        u.pool.connect_timeout_ms = 3000;
+        u.proxy.route_prefix = "/fail";
+        u.proxy.response_timeout_ms = 5000;
+        u.circuit_breaker.enabled = true;  // sanity + breaker enabled
+        u.circuit_breaker.consecutive_failure_threshold = 3;
+        u.circuit_breaker.failure_rate_threshold = 100;
+        u.circuit_breaker.minimum_volume = 10000;
+        u.circuit_breaker.window_seconds = 10;
+        u.circuit_breaker.permitted_half_open_calls = 2;
+        u.circuit_breaker.base_open_duration_ms = 500;
+        u.circuit_breaker.max_open_duration_ms = 60000;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 5000);
+        bool pass = TestHttpClient::HasStatus(r, 502);
+        TestFramework::RecordTest(
+            "CB Integration: bare proxy sanity", pass,
+            pass ? "" : "expected 502, got: " + r.substr(0, 128));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB Integration: bare proxy sanity",
+            false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 7: Retry-After header carries a sensible value — within [1, configured
+// max_open_duration_ms / 1000], and in the right ballpark of OpenUntil()-now.
+// ---------------------------------------------------------------------------
+void TestRetryAfterHeaderValue() {
+    std::cout << "\n[TEST] CB Integration: Retry-After value correctness..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // base_open_duration 2000ms, max 60_000ms — Retry-After should
+        // ceiling-round and fall inside [1, 60].
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/3);
+        u.circuit_breaker.base_open_duration_ms = 2000;
+        u.circuit_breaker.max_open_duration_ms  = 60000;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Trip the breaker.
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+
+        // Capture the open-rejection response.
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        bool is_503 = TestHttpClient::HasStatus(r, 503);
+
+        // Extract Retry-After integer value (case-insensitive header).
+        int retry_after = -1;
+        const char* markers[] = {"Retry-After:", "retry-after:"};
+        for (const char* m : markers) {
+            auto pos = r.find(m);
+            if (pos == std::string::npos) continue;
+            pos += std::string(m).size();
+            while (pos < r.size() && (r[pos] == ' ' || r[pos] == '\t')) ++pos;
+            int val = 0;
+            bool any = false;
+            while (pos < r.size() && r[pos] >= '0' && r[pos] <= '9') {
+                val = val * 10 + (r[pos] - '0');
+                any = true;
+                ++pos;
+            }
+            if (any) { retry_after = val; break; }
+        }
+
+        // Contract: value ≥ 1 and ≤ max_open_duration_ms / 1000 (60).
+        // For base_open_duration 2000ms the remaining-seconds at this
+        // moment is ≤ 2 (probably 1 or 2 after ceiling), so the upper
+        // sanity bound is generous but still rules out 300/3600-class
+        // buggy fallbacks.
+        bool in_range = (retry_after >= 1 && retry_after <= 60);
+        bool reasonable = (retry_after >= 1 && retry_after <= 3);
+
+        bool pass = is_503 && in_range && reasonable;
+        TestFramework::RecordTest(
+            "CB Integration: Retry-After value in range", pass,
+            pass ? "" :
+            "is_503=" + std::to_string(is_503) +
+            " retry_after=" + std::to_string(retry_after) +
+            " body=" + r.substr(0, 256));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: Retry-After value in range", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 8: Retry loop is terminal on CIRCUIT_OPEN — even with max_retries=3,
+// a request that hits an OPEN breaker gets exactly ONE 503 (no retry-flavored
+// second 503). Ensures ReportBreakerOutcome doesn't feed the reject back into
+// the breaker and MaybeRetry stays out.
+// ---------------------------------------------------------------------------
+void TestCircuitOpenTerminalForRetry() {
+    std::cout << "\n[TEST] CB Integration: CIRCUIT_OPEN terminal for retry loop..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // Retries enabled on 5xx — if the breaker reject leaked into
+        // MaybeRetry, the test would see extra backend hits after the
+        // trip. Long open window so the breaker stays OPEN for the
+        // duration of the post-trip assertion (no HALF_OPEN probe
+        // admission racing the test).
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/3);
+        u.proxy.retry.max_retries = 3;
+        u.proxy.retry.retry_on_5xx = true;
+        u.circuit_breaker.base_open_duration_ms = 30000;
+        u.circuit_breaker.max_open_duration_ms  = 60000;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Trip the breaker. Each pre-trip request may retry up to 3
+        // times (all failing 5xx), so backend sees up to 3*threshold=12
+        // hits. That's acceptable — we just care about post-trip behavior.
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 5000);
+        }
+        int pre_trip_hits = backend_hits.load();
+
+        // Post-trip request: expect a single 503 and NO new backend hits.
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        bool is_503 = TestHttpClient::HasStatus(r, 503);
+        int post_trip_hits = backend_hits.load();
+        bool no_new_hits = (post_trip_hits == pre_trip_hits);
+
+        bool pass = is_503 && no_new_hits;
+        TestFramework::RecordTest(
+            "CB Integration: CIRCUIT_OPEN terminal for retry", pass,
+            pass ? "" :
+            "is_503=" + std::to_string(is_503) +
+            " pre=" + std::to_string(pre_trip_hits) +
+            " post=" + std::to_string(post_trip_hits));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: CIRCUIT_OPEN terminal for retry", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 9: Dry-run mode — dry_run=true forwards rejected requests to the
+// upstream (pass-through) but still increments the rejected_ counter so
+// operators can observe the would-reject rate without production impact.
+// ---------------------------------------------------------------------------
+void TestDryRunPassthrough() {
+    std::cout << "\n[TEST] CB Integration: dry-run passthrough..." << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/3);
+        u.circuit_breaker.dry_run = true;  // would-reject, but still forward
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Trip thresholds with 5 requests. All should reach backend (502),
+        // not a 503 — dry-run never short-circuits.
+        for (int i = 0; i < 5; ++i) {
+            std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+            if (!TestHttpClient::HasStatus(r, 502)) {
+                TestFramework::RecordTest(
+                    "CB Integration: dry-run passthrough", false,
+                    "request " + std::to_string(i) +
+                    " expected 502, got: " + r.substr(0, 64));
+                return;
+            }
+        }
+
+        bool all_hit = (backend_hits.load() == 5);
+
+        // Verify the slice observed trips/rejected even though traffic passed.
+        auto* mgr = gateway.GetUpstreamManager() ?
+                     gateway.GetUpstreamManager()->GetCircuitBreakerManager() :
+                     nullptr;
+        int64_t trips = 0, rejected = 0;
+        if (mgr) {
+            auto* host = mgr->GetHost("svc");
+            if (host) {
+                auto snap = host->Snapshot();
+                trips = snap.total_trips;
+                rejected = snap.total_rejected;
+            }
+        }
+        // At least one trip fired (consecutive_threshold=3 → slice
+        // transitioned at least once during the run), and the post-trip
+        // requests were counted as would-reject (rejected > 0).
+        bool observed = (trips >= 1) && (rejected >= 1);
+
+        bool pass = all_hit && observed;
+        TestFramework::RecordTest(
+            "CB Integration: dry-run passthrough", pass,
+            pass ? "" :
+            "hits=" + std::to_string(backend_hits.load()) +
+            " trips=" + std::to_string(trips) +
+            " rejected=" + std::to_string(rejected));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: dry-run passthrough", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 10: HALF_OPEN → CLOSED recovery round-trip through the proxy. Trip the
+// breaker, wait for the open window to elapse, then serve success responses
+// and assert the slice transitions back to CLOSED (consecutive_successes
+// crosses the threshold — default 2 from DefaultCbConfig / integration config).
+// ---------------------------------------------------------------------------
+void TestHalfOpenRecoveryRoundTrip() {
+    std::cout << "\n[TEST] CB Integration: HALF_OPEN → CLOSED recovery..."
+              << std::endl;
+    try {
+        std::atomic<bool> fail_mode{true};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&fail_mode](const HttpRequest&, HttpResponse& resp) {
+            if (fail_mode.load()) {
+                resp.Status(502).Body("err", "text/plain");
+            } else {
+                resp.Status(200).Body("ok", "text/plain");
+            }
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/3);
+        // Short open duration so recovery path finishes quickly.
+        u.circuit_breaker.base_open_duration_ms = 300;
+        u.circuit_breaker.max_open_duration_ms = 1000;
+        // Two probes needed to close (default permitted_half_open_calls=2).
+        u.circuit_breaker.permitted_half_open_calls = 2;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Trip by hitting the failing backend.
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+
+        // Flip backend to success and wait for the open window to elapse.
+        fail_mode.store(false);
+        std::this_thread::sleep_for(std::chrono::milliseconds(500));
+
+        // Probe the proxy — each successful 200 advances HALF_OPEN toward
+        // CLOSED. Do more than permitted_half_open_calls; some will be
+        // rejected as half_open_full but the ones that are admitted will
+        // close the breaker.
+        bool saw_success = false;
+        for (int i = 0; i < 8; ++i) {
+            std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+            if (TestHttpClient::HasStatus(r, 200)) saw_success = true;
+            // Small gap between probes — HALF_OPEN only admits permitted
+            // probes per cycle; spacing lets subsequent probes observe a
+            // possibly-closed breaker.
+            std::this_thread::sleep_for(std::chrono::milliseconds(50));
+        }
+
+        // Verify slice aggregate: at least one CLOSED transition observed
+        // (probe_successes >= 1 and total_trips == 1 — we only tripped once).
+        auto* mgr = gateway.GetUpstreamManager() ?
+                     gateway.GetUpstreamManager()->GetCircuitBreakerManager() :
+                     nullptr;
+        int64_t probe_succ = 0;
+        int open_parts = 0, half_open_parts = 0;
+        if (mgr) {
+            auto* host = mgr->GetHost("svc");
+            if (host) {
+                auto snap = host->Snapshot();
+                probe_succ = 0;
+                for (const auto& row : snap.slices) {
+                    probe_succ += row.probe_successes;
+                }
+                open_parts = snap.open_partitions;
+                half_open_parts = snap.half_open_partitions;
+            }
+        }
+
+        // Recovery complete: saw at least one 200 through the breaker,
+        // at least one probe success counted, and no partition still
+        // stuck in OPEN (HALF_OPEN may still linger on the unused slice,
+        // which is fine for a 2-partition setup).
+        bool pass = saw_success && (probe_succ >= 1) && (open_parts == 0);
+        TestFramework::RecordTest(
+            "CB Integration: HALF_OPEN → CLOSED recovery", pass,
+            pass ? "" :
+            "saw_success=" + std::to_string(saw_success) +
+            " probe_succ=" + std::to_string(probe_succ) +
+            " open_parts=" + std::to_string(open_parts) +
+            " half_open_parts=" + std::to_string(half_open_parts));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: HALF_OPEN → CLOSED recovery", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 11: Retry-After ceils the config cap from a non-second-aligned
+// max_open_duration_ms (e.g. 1500ms → 2s, not 1s). Floor-rounding the cap
+// would clamp the advertised retry window below what the breaker honors,
+// causing well-behaved clients to re-hit the 503.
+// ---------------------------------------------------------------------------
+void TestRetryAfterCapCeilsNonAlignedMax() {
+    std::cout << "\n[TEST] CB Integration: Retry-After cap ceils non-aligned max..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // Configure a non-second-aligned max backoff. base = 1500ms so
+        // the actual OpenUntil-now at trip time is ~1.5s, which ceil-
+        // rounds to 2s. If cfg_cap_secs floor-rounded max_open_duration
+        // (1500ms → 1s), the clamp would drop Retry-After to 1s even
+        // though the breaker would keep rejecting through the second
+        // half of that window.
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/3);
+        u.circuit_breaker.base_open_duration_ms = 1500;
+        u.circuit_breaker.max_open_duration_ms  = 1500;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+
+        int retry_after = -1;
+        const char* markers[] = {"Retry-After:", "retry-after:"};
+        for (const char* m : markers) {
+            auto pos = r.find(m);
+            if (pos == std::string::npos) continue;
+            pos += std::string(m).size();
+            while (pos < r.size() && (r[pos] == ' ' || r[pos] == '\t')) ++pos;
+            int val = 0;
+            bool any = false;
+            while (pos < r.size() && r[pos] >= '0' && r[pos] <= '9') {
+                val = val * 10 + (r[pos] - '0');
+                any = true;
+                ++pos;
+            }
+            if (any) { retry_after = val; break; }
+        }
+
+        // Expectation: Retry-After is in [1, 2] — cfg_cap_secs ceil-
+        // rounds 1500ms to 2s, and the remaining-time ceil-rounds to
+        // 2 at the moment of trip (may be 1 if enough wall-clock has
+        // elapsed between trip and response). Critically it must NEVER
+        // be zero or exceed 2 (clamped to the 2s cap).
+        bool in_range = (retry_after >= 1 && retry_after <= 2);
+        TestFramework::RecordTest(
+            "CB Integration: Retry-After ceils non-aligned cap", in_range,
+            in_range ? "" :
+            "retry_after=" + std::to_string(retry_after));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: Retry-After ceils non-aligned cap", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 12: Retried failures are reported BEFORE the retry fires. With retries
+// enabled on 5xx, each attempt's outcome must be counted against the breaker;
+// otherwise the slice trips only after the final retry exhausts, under-
+// counting failures and potentially never tripping if retries mask enough of
+// them. Verifies the trip still happens within the expected number of client
+// requests once reporting is attached to the retry path.
+// ---------------------------------------------------------------------------
+void TestRetriedFailuresCountTowardTrip() {
+    std::cout << "\n[TEST] CB Integration: retried failures count toward trip..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        // Retries on 5xx enabled. threshold=3 — with retry_on_5xx, each
+        // client request produces 1 + max_retries=3 = 4 upstream
+        // attempts, each reporting RESPONSE_5XX via the ReportBreakerOutcome
+        // path that this fix patches in. The breaker must trip after
+        // at most 3 upstream failure reports (which the first client
+        // request alone produces).
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/3);
+        u.proxy.retry.max_retries = 3;
+        u.proxy.retry.retry_on_5xx = true;
+        u.circuit_breaker.base_open_duration_ms = 30000;
+        u.circuit_breaker.max_open_duration_ms  = 60000;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // One client request → 4 upstream attempts → 4 RESPONSE_5XX
+        // reports. Threshold=3 should trip during this single request.
+        TestHttpClient::HttpGet(gw_port, "/fail", 5000);
+
+        // Second client request must hit the OPEN breaker → 503.
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        bool is_503 = TestHttpClient::HasStatus(r, 503);
+        bool has_breaker_header =
+            r.find("X-Circuit-Breaker: open") != std::string::npos ||
+            r.find("x-circuit-breaker: open") != std::string::npos;
+
+        bool pass = is_503 && has_breaker_header;
+        TestFramework::RecordTest(
+            "CB Integration: retried failures count toward trip", pass,
+            pass ? "" :
+            "is_503=" + std::to_string(is_503) +
+            " breaker_hdr=" + std::to_string(has_breaker_header) +
+            " body=" + r.substr(0, 256));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: retried failures count toward trip", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 13: HALF_OPEN rejects emit a distinct X-Circuit-Breaker label.
+// TryAcquire returns REJECTED_OPEN for three situations (true OPEN,
+// half_open_full, half_open_recovery_failing). When the slice is in
+// HALF_OPEN, OpenUntil is cleared and a generic MakeCircuitOpenResponse
+// would fall back to Retry-After=1 + X-Circuit-Breaker:open — misleading
+// clients. The fix emits X-Circuit-Breaker:half_open for HALF_OPEN rejects
+// with a more conservative Retry-After hint.
+//
+// Strategy: trip the breaker, wait for the open window to elapse so the
+// slice transitions HALF_OPEN on the next admission attempt, then flood
+// concurrent requests so some hit half_open_full.
+// ---------------------------------------------------------------------------
+void TestHalfOpenRejectLabel() {
+    std::cout << "\n[TEST] CB Integration: HALF_OPEN reject label..."
+              << std::endl;
+    try {
+        // Backend hangs to keep probes in-flight so later concurrent
+        // requests hit half_open_full.
+        std::atomic<bool> hang{false};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&hang](const HttpRequest&, HttpResponse& resp) {
+            if (hang.load()) {
+                std::this_thread::sleep_for(std::chrono::milliseconds(600));
+            }
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/3);
+        u.circuit_breaker.base_open_duration_ms = 200;
+        u.circuit_breaker.max_open_duration_ms  = 500;
+        u.circuit_breaker.permitted_half_open_calls = 1;  // tiny budget
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Trip the breaker.
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+        // Wait for the open window to elapse so the next admission
+        // flips the slice to HALF_OPEN.
+        std::this_thread::sleep_for(std::chrono::milliseconds(300));
+
+        // Flip backend to hang so the probe occupies the single probe
+        // slot while we fire sibling requests that must hit half_open_full.
+        hang.store(true);
+
+        std::atomic<bool> saw_half_open{false};
+        std::atomic<bool> saw_open{false};
+        auto probe = [&](int id) {
+            (void)id;
+            std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 1500);
+            if (!TestHttpClient::HasStatus(r, 503)) return;
+            if (r.find("X-Circuit-Breaker: half_open") != std::string::npos ||
+                r.find("x-circuit-breaker: half_open") != std::string::npos) {
+                saw_half_open.store(true);
+            }
+            if (r.find("X-Circuit-Breaker: open") != std::string::npos ||
+                r.find("x-circuit-breaker: open") != std::string::npos) {
+                // We want to distinguish the labels; the "open" substring
+                // also matches "half_open". Only count true "open" if
+                // "half_open" didn't appear in THIS response.
+                if (r.find("half_open") == std::string::npos) {
+                    saw_open.store(true);
+                }
+            }
+        };
+
+        std::vector<std::thread> threads;
+        for (int i = 0; i < 6; ++i) {
+            threads.emplace_back(probe, i);
+            std::this_thread::sleep_for(std::chrono::milliseconds(20));
+        }
+        for (auto& t : threads) t.join();
+
+        // Pass if at least one HALF_OPEN-labelled reject was observed.
+        // saw_open may or may not be observed (some rejects could have
+        // hit between cycles) — the key contract is that HALF_OPEN
+        // rejects no longer get the plain "open" label.
+        bool pass = saw_half_open.load();
+        TestFramework::RecordTest(
+            "CB Integration: HALF_OPEN reject label", pass,
+            pass ? "" :
+            "saw_half_open=" + std::to_string(saw_half_open.load()) +
+            " saw_open=" + std::to_string(saw_open.load()));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: HALF_OPEN reject label", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 14: HALF_OPEN Retry-After reflects the current exponential backoff,
+// not just base_open_duration_ms. After multiple trips the next OPEN window
+// (base << consecutive_trips_, clamped by max) can exceed 1 second; the old
+// base-only hint (ceil(base/1000) = 1s for base=100ms) would under-report
+// the worst-case wait, which this test must fail for.
+//
+// Strategy: keep the backend failing and drive MULTIPLE re-trips by letting
+// the OPEN window elapse and single probe fail each cycle. Successful
+// recoveries must be avoided — TransitionHalfOpenToClosed resets
+// consecutive_trips_ to 0, which hides the exponential hint.
+// ---------------------------------------------------------------------------
+void TestHalfOpenRetryAfterScalesWithBackoff() {
+    std::cout << "\n[TEST] CB Integration: HALF_OPEN Retry-After exponential..."
+              << std::endl;
+    try {
+        // Backend fails fast by default. When `hang` is set, the
+        // handler blocks — used at the end to pin the probe slot so
+        // a concurrent request observes HALF_OPEN rejection.
+        std::atomic<bool> hang{false};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&hang](const HttpRequest&, HttpResponse& resp) {
+            if (hang.load()) {
+                std::this_thread::sleep_for(std::chrono::milliseconds(1500));
+            }
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;  // pin all traffic to slice[0]
+        gw.http2.enabled = false;
+        auto u = MakeBreakerUpstream("svc", "127.0.0.1", backend_port,
+                                     /*enabled=*/true, /*threshold=*/2);
+        u.circuit_breaker.base_open_duration_ms = 100;     // config minimum
+        u.circuit_breaker.max_open_duration_ms  = 8000;    // cap at 8s
+        u.circuit_breaker.permitted_half_open_calls = 1;   // single probe
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        auto* cbm = gateway.GetUpstreamManager() ?
+            gateway.GetUpstreamManager()->GetCircuitBreakerManager() : nullptr;
+        auto* host = cbm ? cbm->GetHost("svc") : nullptr;
+        auto* slice = host ? host->GetSlice(0) : nullptr;
+        if (!slice) {
+            TestFramework::RecordTest(
+                "CB Integration: HALF_OPEN Retry-After exponential-aware",
+                false, "slice lookup failed");
+            return;
+        }
+
+        // Initial trip: 2 consecutive failures with threshold=2.
+        for (int i = 0; i < 2; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+
+        // Drive consecutive_trips_ up by letting successive OPEN windows
+        // elapse and probes fail (no recovery → no reset). Stop when
+        // NextOpenDurationMs crosses 1000ms, which is the threshold
+        // where the HALF_OPEN Retry-After hint starts exceeding the
+        // base-only value (ceil(100ms)=1s).
+        //
+        // The slice re-trips on each failed probe; each trip doubles
+        // the open duration. We run ~8 cycles with safety margin which
+        // is comfortably past the trip count needed for Retry-After>=2.
+        for (int cycle = 0; cycle < 8; ++cycle) {
+            // Wait past the current open window. Upper bound: max=8s,
+            // so 1200ms is plenty for the first few short cycles, and
+            // we re-check after each request anyway.
+            int64_t next_ms = slice->NextOpenDurationMs();
+            // Current OPEN window is the one stored BEFORE the upcoming
+            // re-trip — we don't have that directly, so sleep past the
+            // NEXT duration as an over-approximation (next is always >=
+            // current). This ensures OPEN has elapsed.
+            auto sleep_ms = std::max<int64_t>(next_ms + 50, 200);
+            if (sleep_ms > 2000) sleep_ms = 2000;  // cap per cycle
+            std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
+
+            // One request — it should admit as a probe (HALF_OPEN),
+            // the backend fails fast (502), probe fails → re-trip with
+            // consecutive_trips_++ and fresh OPEN.
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+
+            // Bail early once the exponential hint crosses 1s → the
+            // subsequent HALF_OPEN reject will carry Retry-After >= 2.
+            if (slice->NextOpenDurationMs() >= 2000) break;
+        }
+
+        int64_t next_open_ms = slice->NextOpenDurationMs();
+        if (next_open_ms < 2000) {
+            TestFramework::RecordTest(
+                "CB Integration: HALF_OPEN Retry-After exponential-aware",
+                false,
+                "setup failed: next_open_ms=" + std::to_string(next_open_ms) +
+                " (need >= 2000 to distinguish from base-only hint)");
+            return;
+        }
+
+        // Now trigger a HALF_OPEN reject: wait for current OPEN to
+        // elapse, start a hanging probe (pins the slot), then fire a
+        // sibling request — it must see half_open_full with the
+        // exponential Retry-After.
+        int64_t post_wait_ms = next_open_ms + 100;
+        if (post_wait_ms > 4000) post_wait_ms = 4000;
+        std::this_thread::sleep_for(std::chrono::milliseconds(post_wait_ms));
+
+        hang.store(true);
+        std::thread probe([&]() {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3500);
+        });
+        // Let the probe get admitted and start hanging.
+        std::this_thread::sleep_for(std::chrono::milliseconds(200));
+
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 1500);
+        hang.store(false);
+        probe.join();
+
+        bool is_half_open =
+            r.find("X-Circuit-Breaker: half_open") != std::string::npos ||
+            r.find("x-circuit-breaker: half_open") != std::string::npos;
+
+        int retry_after = -1;
+        const char* markers[] = {"Retry-After:", "retry-after:"};
+        for (const char* m : markers) {
+            auto pos = r.find(m);
+            if (pos == std::string::npos) continue;
+            pos += std::string(m).size();
+            while (pos < r.size() && (r[pos] == ' ' || r[pos] == '\t')) ++pos;
+            int val = 0;
+            bool any = false;
+            while (pos < r.size() && r[pos] >= '0' && r[pos] <= '9') {
+                val = val * 10 + (r[pos] - '0');
+                any = true;
+                ++pos;
+            }
+            if (any) { retry_after = val; break; }
+        }
+
+        // Post-fix: Retry-After = ceil(next_open_ms / 1000) >= 2.
+        // Pre-fix (base-only): Retry-After = ceil(base/1000) = 1.
+        // Asserting >= 2 fails the pre-fix implementation.
+        bool retry_after_ok = (retry_after >= 2 && retry_after <= 8);
+        bool pass = is_half_open && retry_after_ok;
+        TestFramework::RecordTest(
+            "CB Integration: HALF_OPEN Retry-After exponential-aware", pass,
+            pass ? "" :
+            "is_half_open=" + std::to_string(is_half_open) +
+            " retry_after=" + std::to_string(retry_after) +
+            " next_open_ms=" + std::to_string(next_open_ms));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Integration: HALF_OPEN Retry-After exponential-aware",
+            false, e.what());
+    }
+}
+
+void RunAllTests() {
+    std::cout << "\n" << std::string(60, '=') << std::endl;
+    std::cout << "CIRCUIT BREAKER - INTEGRATION TESTS" << std::endl;
+    std::cout << std::string(60, '=') << std::endl;
+
+    TestBareProxyWorks();
+    TestBreakerTripsAfterConsecutiveFailures();
+    TestBreakerDisabledPassesThrough();
+    TestSuccessResetsConsecutiveFailureCounter();
+    TestTripDrivesSliceState();
+    TestOpenBreakerShortCircuitsUpstreamCall();
+    TestRetryAfterHeaderValue();
+    TestCircuitOpenTerminalForRetry();
+    TestDryRunPassthrough();
+    TestHalfOpenRecoveryRoundTrip();
+    TestRetryAfterCapCeilsNonAlignedMax();
+    TestRetriedFailuresCountTowardTrip();
+    TestHalfOpenRejectLabel();
+    TestHalfOpenRetryAfterScalesWithBackoff();
+}
+
+}  // namespace CircuitBreakerIntegrationTests
diff --git a/test/circuit_breaker_observability_test.h b/test/circuit_breaker_observability_test.h
new file mode 100644
index 00000000..42694a67
--- /dev/null
+++ b/test/circuit_breaker_observability_test.h
@@ -0,0 +1,405 @@
+#pragma once
+
+// Observability integration tests: observability — counter accuracy, snapshot
+// API correctness, and log emission.
+//
+// Phases 2-6 each added counters and log lines as a side effect of their
+// functional work. This suite locks those in as regressions:
+//
+//   * Counters (§11.2): trips, rejected, probe_successes, probe_failures,
+//     retries_rejected surface through CircuitBreakerManager::SnapshotAll.
+//   * Snapshot API (§11.3): per-slice rows aggregate into host-level
+//     totals; host-level fields (retries_in_flight / retries_rejected /
+//     in_flight) reflect the owning RetryBudget.
+//   * Logs (§11.1): the CLOSED→OPEN trip emits the full-context message
+//     including trigger, consecutive_failures, window_total,
+//     window_fail_rate, open_for_ms, and consecutive_trips.
+//
+// The log-emission test attaches a spdlog ring-buffer sink to the logger
+// for the duration of the test, triggers a trip, then asserts the
+// captured messages contain the expected fields. No log file I/O.
+
+#include "test_framework.h"
+#include "test_server_runner.h"
+#include "http_test_client.h"
+#include "http/http_server.h"
+#include "config/server_config.h"
+#include "upstream/upstream_manager.h"
+#include "circuit_breaker/circuit_breaker_manager.h"
+#include "circuit_breaker/circuit_breaker_host.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
+#include "log/logger.h"
+#include "spdlog/sinks/ringbuffer_sink.h"
+
+#include <thread>
+#include <chrono>
+#include <atomic>
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace CircuitBreakerObservabilityTests {
+
+using circuit_breaker::State;
+
+static UpstreamConfig MakeObservUpstream(const std::string& name,
+                                          const std::string& host,
+                                          int port,
+                                          int consecutive_threshold = 3) {
+    UpstreamConfig u;
+    u.name = name;
+    u.host = host;
+    u.port = port;
+    u.pool.max_connections       = 8;
+    u.pool.max_idle_connections  = 4;
+    u.pool.connect_timeout_ms    = 3000;
+    u.pool.idle_timeout_sec      = 30;
+    u.pool.max_lifetime_sec      = 3600;
+    u.pool.max_requests_per_conn = 0;
+
+    u.proxy.route_prefix = "/fail";
+    u.proxy.strip_prefix = false;
+    u.proxy.response_timeout_ms = 2000;
+    u.proxy.retry.max_retries = 0;
+
+    u.circuit_breaker.enabled = true;
+    u.circuit_breaker.consecutive_failure_threshold = consecutive_threshold;
+    u.circuit_breaker.failure_rate_threshold = 100;
+    u.circuit_breaker.minimum_volume = 10000;
+    u.circuit_breaker.window_seconds = 10;
+    u.circuit_breaker.permitted_half_open_calls = 2;
+    // Long open duration — keep the slice OPEN so post-trip assertions
+    // don't race a HALF_OPEN transition.
+    u.circuit_breaker.base_open_duration_ms = 30000;
+    u.circuit_breaker.max_open_duration_ms  = 60000;
+    return u;
+}
+
+// ---------------------------------------------------------------------------
+// Test 1: Snapshot API reflects per-slice trip/rejected counters and
+// host-level aggregates. Drives N+1 requests against a backend that always
+// 502s (N to trip, 1 more that the OPEN slice short-circuits) and asserts
+// the snapshot shows total_trips >= 1, total_rejected >= 1,
+// open_partitions >= 1.
+// ---------------------------------------------------------------------------
+void TestSnapshotReflectsCounters() {
+    std::cout << "\n[TEST] CB Observability: snapshot reflects counters..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        auto u = MakeObservUpstream("svc", "127.0.0.1", backend_port,
+                                    /*threshold=*/3);
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Trip (3 failures), then 2 more to accumulate rejected counter.
+        for (int i = 0; i < 3; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+        for (int i = 0; i < 2; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+
+        auto* cbm = gateway.GetUpstreamManager()->GetCircuitBreakerManager();
+        if (!cbm) {
+            TestFramework::RecordTest(
+                "CB Observability: snapshot reflects counters", false,
+                "no circuit breaker manager attached");
+            return;
+        }
+        auto snaps = cbm->SnapshotAll();
+        bool found = false;
+        int64_t trips = 0, rejected = 0, probe_s = 0, probe_f = 0;
+        int open_parts = 0;
+        for (const auto& s : snaps) {
+            if (s.service_name == "svc") {
+                trips = s.total_trips;
+                rejected = s.total_rejected;
+                open_parts = s.open_partitions;
+                for (const auto& row : s.slices) {
+                    probe_s += row.probe_successes;
+                    probe_f += row.probe_failures;
+                }
+                found = true;
+                break;
+            }
+        }
+
+        bool pass = found
+                    && trips >= 1
+                    && rejected >= 2   // 2 post-trip short-circuits
+                    && open_parts >= 1
+                    && probe_s == 0    // never entered HALF_OPEN
+                    && probe_f == 0;
+        TestFramework::RecordTest(
+            "CB Observability: snapshot reflects counters", pass,
+            pass ? "" :
+            "found=" + std::to_string(found) +
+            " trips=" + std::to_string(trips) +
+            " rejected=" + std::to_string(rejected) +
+            " open_parts=" + std::to_string(open_parts) +
+            " probe_s=" + std::to_string(probe_s) +
+            " probe_f=" + std::to_string(probe_f));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Observability: snapshot reflects counters", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: The CLOSED→OPEN trip log emits the §11.1 full-context message.
+// Attaches a spdlog ringbuffer_sink to the shared logger, triggers a trip,
+// then inspects the captured messages for the key tokens. The sink is
+// removed before the test returns so it doesn't affect later tests.
+// ---------------------------------------------------------------------------
+void TestTripLogEmission() {
+    std::cout << "\n[TEST] CB Observability: trip log emission..." << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        auto u = MakeObservUpstream("svc-log", "127.0.0.1", backend_port,
+                                    /*threshold=*/2);
+        gw.upstreams.push_back(u);
+
+        // `HttpServer` construction calls `logging::Init()` which rebuilds
+        // the default logger via `spdlog::set_default_logger`. Any sink
+        // attached BEFORE that point lands on a stale logger. Attach the
+        // ringbuffer sink AFTER the last HttpServer construction so it
+        // captures the live logger's output.
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        auto ring = std::make_shared<
+            spdlog::sinks::ringbuffer_sink_mt>(1024);
+        auto logger = logging::Get();
+        auto prev_level = logger->level();
+        logger->set_level(spdlog::level::debug);
+        logger->sinks().push_back(ring);
+
+        struct SinkGuard {
+            std::shared_ptr<spdlog::logger> logger;
+            std::shared_ptr<spdlog::sinks::ringbuffer_sink_mt> ring;
+            spdlog::level::level_enum prev_level;
+            ~SinkGuard() {
+                auto& sinks = logger->sinks();
+                sinks.erase(std::remove(sinks.begin(), sinks.end(),
+                                        std::shared_ptr<spdlog::sinks::sink>(ring)),
+                            sinks.end());
+                logger->set_level(prev_level);
+            }
+        } guard{logger, ring, prev_level};
+
+        // Drive exactly threshold=2 failures to trip.
+        TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+
+        // Give the dispatcher a breath to emit + the sink to settle.
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+
+        auto messages = ring->last_formatted();
+        // Scan for the trip message. Look for the static prefix plus the
+        // §11.1 field tokens.
+        bool saw_tripped = false;
+        bool has_trigger = false;
+        bool has_consec_failures = false;
+        bool has_window_total = false;
+        bool has_fail_rate = false;
+        bool has_open_for_ms = false;
+        bool has_consec_trips = false;
+        for (const auto& msg : messages) {
+            if (msg.find("circuit breaker tripped") == std::string::npos) {
+                continue;
+            }
+            saw_tripped = true;
+            if (msg.find("trigger=") != std::string::npos) has_trigger = true;
+            if (msg.find("consecutive_failures=") != std::string::npos)
+                has_consec_failures = true;
+            if (msg.find("window_total=") != std::string::npos)
+                has_window_total = true;
+            if (msg.find("window_fail_rate=") != std::string::npos)
+                has_fail_rate = true;
+            if (msg.find("open_for_ms=") != std::string::npos)
+                has_open_for_ms = true;
+            if (msg.find("consecutive_trips=") != std::string::npos)
+                has_consec_trips = true;
+        }
+
+        bool pass = saw_tripped && has_trigger && has_consec_failures &&
+                    has_window_total && has_fail_rate &&
+                    has_open_for_ms && has_consec_trips;
+        TestFramework::RecordTest(
+            "CB Observability: trip log emission", pass,
+            pass ? "" :
+            "saw_tripped=" + std::to_string(saw_tripped) +
+            " trigger=" + std::to_string(has_trigger) +
+            " consec_failures=" + std::to_string(has_consec_failures) +
+            " window_total=" + std::to_string(has_window_total) +
+            " fail_rate=" + std::to_string(has_fail_rate) +
+            " open_for_ms=" + std::to_string(has_open_for_ms) +
+            " consec_trips=" + std::to_string(has_consec_trips));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Observability: trip log emission", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 3: Retry-budget observability — the exhausted log carries the
+// §11.1 fields (service, in_flight, retries_in_flight, cap), and the
+// host snapshot reflects retries_rejected.
+// ---------------------------------------------------------------------------
+void TestRetryBudgetObservability() {
+    std::cout << "\n[TEST] CB Observability: retry budget observability..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        // Budget: zero percent AND zero floor → every retry rejected.
+        auto u = MakeObservUpstream("svc-budget", "127.0.0.1", backend_port,
+                                    /*threshold=*/10000);
+        u.proxy.retry.max_retries = 2;
+        u.proxy.retry.retry_on_5xx = true;
+        u.circuit_breaker.retry_budget_percent = 0;
+        u.circuit_breaker.retry_budget_min_concurrency = 0;
+        gw.upstreams.push_back(u);
+
+        // Attach the ringbuffer AFTER gateway construction — see
+        // TestTripLogEmission for rationale (HttpServer's ctor
+        // replaces the default logger via logging::Init, detaching
+        // any previously-attached sinks).
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        auto ring = std::make_shared<
+            spdlog::sinks::ringbuffer_sink_mt>(1024);
+        auto logger = logging::Get();
+        auto prev_level = logger->level();
+        logger->set_level(spdlog::level::debug);
+        logger->sinks().push_back(ring);
+
+        struct SinkGuard {
+            std::shared_ptr<spdlog::logger> logger;
+            std::shared_ptr<spdlog::sinks::ringbuffer_sink_mt> ring;
+            spdlog::level::level_enum prev_level;
+            ~SinkGuard() {
+                auto& sinks = logger->sinks();
+                sinks.erase(std::remove(sinks.begin(), sinks.end(),
+                                        std::shared_ptr<spdlog::sinks::sink>(ring)),
+                            sinks.end());
+                logger->set_level(prev_level);
+            }
+        } guard{logger, ring, prev_level};
+
+        // One client request: first attempt hits backend (502), retry
+        // blocked by budget → 503 + X-Retry-Budget-Exhausted.
+        TestHttpClient::HttpGet(gw_port, "/fail", 5000);
+
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+
+        auto messages = ring->last_formatted();
+        bool saw_exhausted = false;
+        bool has_service = false;
+        bool has_inflight = false;
+        bool has_retries_inflight = false;
+        bool has_cap = false;
+        for (const auto& msg : messages) {
+            if (msg.find("retry budget exhausted") == std::string::npos) {
+                continue;
+            }
+            saw_exhausted = true;
+            if (msg.find("service=") != std::string::npos) has_service = true;
+            if (msg.find("in_flight=") != std::string::npos)
+                has_inflight = true;
+            if (msg.find("retries_in_flight=") != std::string::npos)
+                has_retries_inflight = true;
+            if (msg.find("cap=") != std::string::npos) has_cap = true;
+        }
+
+        // Snapshot: retries_rejected must be >= 1 (every rejection increments).
+        int64_t retries_rejected = 0;
+        auto* cbm = gateway.GetUpstreamManager()->GetCircuitBreakerManager();
+        if (cbm) {
+            for (const auto& s : cbm->SnapshotAll()) {
+                if (s.service_name == "svc-budget") {
+                    // Host aggregate — single host, so the sum is the
+                    // host's retries_rejected. The snapshot doesn't yet
+                    // expose that directly — derive from RetryBudget
+                    // via the host getter.
+                    auto* host = cbm->GetHost("svc-budget");
+                    if (host) {
+                        retries_rejected =
+                            host->GetRetryBudget()->RetriesRejected();
+                    }
+                    break;
+                }
+            }
+        }
+
+        bool pass = saw_exhausted && has_service && has_inflight &&
+                    has_retries_inflight && has_cap &&
+                    retries_rejected >= 1;
+        TestFramework::RecordTest(
+            "CB Observability: retry budget observability", pass,
+            pass ? "" :
+            "saw_exhausted=" + std::to_string(saw_exhausted) +
+            " service=" + std::to_string(has_service) +
+            " inflight=" + std::to_string(has_inflight) +
+            " retries_inflight=" + std::to_string(has_retries_inflight) +
+            " cap=" + std::to_string(has_cap) +
+            " retries_rejected=" + std::to_string(retries_rejected));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Observability: retry budget observability", false, e.what());
+    }
+}
+
+void RunAllTests() {
+    std::cout << "\n" << std::string(60, '=') << std::endl;
+    std::cout << "CIRCUIT BREAKER - OBSERVABILITY TESTS" << std::endl;
+    std::cout << std::string(60, '=') << std::endl;
+
+    TestSnapshotReflectsCounters();
+    TestTripLogEmission();
+    TestRetryBudgetObservability();
+}
+
+}  // namespace CircuitBreakerObservabilityTests
diff --git a/test/circuit_breaker_reload_test.h b/test/circuit_breaker_reload_test.h
new file mode 100644
index 00000000..5b63e6b4
--- /dev/null
+++ b/test/circuit_breaker_reload_test.h
@@ -0,0 +1,594 @@
+#pragma once
+
+// Reload integration tests: hot-reload of circuit-breaker fields.
+//
+// UpstreamConfig::operator== now excludes `circuit_breaker` — a CB-only
+// SIGHUP is a clean reload that propagates via HttpServer::Reload →
+// CircuitBreakerManager::Reload → per-host per-slice Reload enqueued on
+// each owning dispatcher.
+//
+// Topology fields (host, port, pool, proxy, tls) remain restart-only.
+//
+// Strategy: construct a gateway with an enabled breaker, capture the
+// initial slice config, call HttpServer::Reload with an edited
+// CircuitBreakerConfig, and verify the slice's live config reflects the
+// edit. The reload-log capture also verifies the manager-level log lines
+// ("CircuitBreakerManager::Reload: new/removed upstream ...") fire for
+// topology-change SIGHUPs.
+
+#include "test_framework.h"
+#include "test_server_runner.h"
+#include "http_test_client.h"
+#include "http/http_server.h"
+#include "config/server_config.h"
+#include "upstream/upstream_manager.h"
+#include "circuit_breaker/circuit_breaker_manager.h"
+#include "circuit_breaker/circuit_breaker_host.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
+#include "log/logger.h"
+#include "spdlog/sinks/ringbuffer_sink.h"
+
+#include <thread>
+#include <chrono>
+#include <atomic>
+#include <string>
+#include <memory>
+
+namespace CircuitBreakerReloadTests {
+
+static UpstreamConfig MakeReloadUpstream(const std::string& name,
+                                          const std::string& host,
+                                          int port) {
+    UpstreamConfig u;
+    u.name = name;
+    u.host = host;
+    u.port = port;
+    u.pool.max_connections       = 8;
+    u.pool.max_idle_connections  = 4;
+    u.pool.connect_timeout_ms    = 3000;
+    u.pool.idle_timeout_sec      = 30;
+    u.pool.max_lifetime_sec      = 3600;
+    u.pool.max_requests_per_conn = 0;
+
+    u.proxy.route_prefix = "/fail";
+    u.proxy.strip_prefix = false;
+    u.proxy.response_timeout_ms = 2000;
+    u.proxy.retry.max_retries = 0;
+
+    u.circuit_breaker.enabled = true;
+    u.circuit_breaker.consecutive_failure_threshold = 3;
+    u.circuit_breaker.failure_rate_threshold = 100;
+    u.circuit_breaker.minimum_volume = 10000;
+    u.circuit_breaker.window_seconds = 10;
+    u.circuit_breaker.permitted_half_open_calls = 2;
+    u.circuit_breaker.base_open_duration_ms = 5000;
+    u.circuit_breaker.max_open_duration_ms  = 60000;
+    return u;
+}
+
+// ---------------------------------------------------------------------------
+// Test 1: CB-only SIGHUP propagates to live slice config.
+//
+// Build gateway with threshold=3. Reload with threshold=7. Verify the
+// slice's live config().consecutive_failure_threshold flipped to 7.
+// ---------------------------------------------------------------------------
+void TestCbReloadPropagatesToSlice() {
+    std::cout << "\n[TEST] CB Reload: reload propagates to slice..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        gw.upstreams.push_back(
+            MakeReloadUpstream("svc", "127.0.0.1", backend_port));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+
+        auto* cbm = gateway.GetUpstreamManager()->GetCircuitBreakerManager();
+        auto* host = cbm->GetHost("svc");
+        auto* slice = host->GetSlice(0);
+        int threshold_before = slice->config().consecutive_failure_threshold;
+        int window_before = slice->config().window_seconds;
+
+        // Build reloaded config with modified CB fields only.
+        ServerConfig reloaded = gw;
+        reloaded.upstreams[0].circuit_breaker.consecutive_failure_threshold = 7;
+        reloaded.upstreams[0].circuit_breaker.window_seconds = 20;
+
+        bool ok = gateway.Reload(reloaded);
+        // Reload enqueues per-slice updates on the owning dispatcher —
+        // brief sleep to let the dispatcher execute the queued Slice::Reload.
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+        int threshold_after = slice->config().consecutive_failure_threshold;
+        int window_after = slice->config().window_seconds;
+
+        bool pass = ok && threshold_before == 3 && window_before == 10
+                    && threshold_after == 7 && window_after == 20;
+        TestFramework::RecordTest(
+            "CB Reload: reload propagates to slice", pass,
+            pass ? "" :
+            "ok=" + std::to_string(ok) +
+            " threshold_before=" + std::to_string(threshold_before) +
+            " threshold_after=" + std::to_string(threshold_after) +
+            " window_before=" + std::to_string(window_before) +
+            " window_after=" + std::to_string(window_after));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Reload: reload propagates to slice", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: CB-only reload does NOT emit the topology "restart required"
+// warning. UpstreamConfig::operator== excludes circuit_breaker so a
+// CB-only edit doesn't make the outer config != comparison true — the
+// warning fires only on topology-field changes (host, port, pool, proxy,
+// tls), which remain restart-only.
+// ---------------------------------------------------------------------------
+void TestCbOnlyReloadNoRestartWarn() {
+    std::cout << "\n[TEST] CB Reload: CB-only reload emits no restart warn..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        gw.upstreams.push_back(
+            MakeReloadUpstream("svc", "127.0.0.1", backend_port));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+
+        // Attach ringbuffer sink AFTER gateway ctor (logging::Init
+        // rebuilds the default logger). See the observability test for rationale.
+        auto ring = std::make_shared<
+            spdlog::sinks::ringbuffer_sink_mt>(1024);
+        auto logger = logging::Get();
+        auto prev_level = logger->level();
+        logger->set_level(spdlog::level::debug);
+        logger->sinks().push_back(ring);
+
+        struct SinkGuard {
+            std::shared_ptr<spdlog::logger> logger;
+            std::shared_ptr<spdlog::sinks::ringbuffer_sink_mt> ring;
+            spdlog::level::level_enum prev_level;
+            ~SinkGuard() {
+                auto& sinks = logger->sinks();
+                sinks.erase(std::remove(sinks.begin(), sinks.end(),
+                                        std::shared_ptr<spdlog::sinks::sink>(ring)),
+                            sinks.end());
+                logger->set_level(prev_level);
+            }
+        } guard{logger, ring, prev_level};
+
+        ServerConfig reloaded = gw;
+        reloaded.upstreams[0].circuit_breaker.consecutive_failure_threshold = 9;
+
+        gateway.Reload(reloaded);
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+
+        bool saw_topology_warn = false;
+        bool saw_cb_config_applied = false;
+        for (const auto& msg : ring->last_formatted()) {
+            if (msg.find("upstream topology changes require a restart") !=
+                std::string::npos) {
+                saw_topology_warn = true;
+            }
+            if (msg.find("circuit breaker config applied") !=
+                std::string::npos) {
+                saw_cb_config_applied = true;
+            }
+        }
+
+        bool pass = !saw_topology_warn && saw_cb_config_applied;
+        TestFramework::RecordTest(
+            "CB Reload: CB-only reload emits no restart warn", pass,
+            pass ? "" :
+            "saw_topology_warn=" + std::to_string(saw_topology_warn) +
+            " saw_cb_config_applied=" + std::to_string(saw_cb_config_applied));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Reload: CB-only reload emits no restart warn", false,
+            e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 3: Topology change (pool field edit) STILL emits the restart warn
+// — the exclusion of circuit_breaker from operator== must NOT compromise
+// the restart-required signal for unreloadable fields.
+// ---------------------------------------------------------------------------
+void TestTopologyChangeStillEmitsRestartWarn() {
+    std::cout << "\n[TEST] CB Reload: topology change still warns..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        gw.upstreams.push_back(
+            MakeReloadUpstream("svc", "127.0.0.1", backend_port));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+
+        auto ring = std::make_shared<
+            spdlog::sinks::ringbuffer_sink_mt>(1024);
+        auto logger = logging::Get();
+        auto prev_level = logger->level();
+        logger->set_level(spdlog::level::debug);
+        logger->sinks().push_back(ring);
+
+        struct SinkGuard {
+            std::shared_ptr<spdlog::logger> logger;
+            std::shared_ptr<spdlog::sinks::ringbuffer_sink_mt> ring;
+            spdlog::level::level_enum prev_level;
+            ~SinkGuard() {
+                auto& sinks = logger->sinks();
+                sinks.erase(std::remove(sinks.begin(), sinks.end(),
+                                        std::shared_ptr<spdlog::sinks::sink>(ring)),
+                            sinks.end());
+                logger->set_level(prev_level);
+            }
+        } guard{logger, ring, prev_level};
+
+        ServerConfig reloaded = gw;
+        // Topology-level edit that operator== still detects.
+        reloaded.upstreams[0].pool.max_connections = 16;
+        // Also flip a breaker field so we verify BOTH happen on the
+        // same reload (live CB edit + topology warn).
+        reloaded.upstreams[0].circuit_breaker.consecutive_failure_threshold = 5;
+
+        gateway.Reload(reloaded);
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+
+        bool saw_topology_warn = false;
+        bool saw_cb_config_applied = false;
+        for (const auto& msg : ring->last_formatted()) {
+            if (msg.find("upstream topology changes require a restart") !=
+                std::string::npos) {
+                saw_topology_warn = true;
+            }
+            if (msg.find("circuit breaker config applied") !=
+                std::string::npos) {
+                saw_cb_config_applied = true;
+            }
+        }
+
+        bool pass = saw_topology_warn && saw_cb_config_applied;
+        TestFramework::RecordTest(
+            "CB Reload: topology change still warns", pass,
+            pass ? "" :
+            "saw_topology_warn=" + std::to_string(saw_topology_warn) +
+            " saw_cb_config_applied=" + std::to_string(saw_cb_config_applied));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Reload: topology change still warns", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 4: Disable → enable toggle via reload. A CB-only reload that sets
+// `enabled=false` must make the slice short-circuit admissions; a
+// subsequent reload flipping `enabled=true` must re-engage the state
+// machine without requiring a restart. Verifies the "wire transition
+// callbacks for ALL upstreams regardless of enabled" design (§3.1 R3-1).
+// ---------------------------------------------------------------------------
+void TestReloadDisableThenEnable() {
+    std::cout << "\n[TEST] CB Reload: reload disable→enable..." << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        gw.upstreams.push_back(
+            MakeReloadUpstream("svc", "127.0.0.1", backend_port));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+
+        auto* cbm = gateway.GetUpstreamManager()->GetCircuitBreakerManager();
+        auto* slice = cbm->GetHost("svc")->GetSlice(0);
+
+        // Start: enabled=true.
+        bool enabled_before = slice->config().enabled;
+
+        // Reload to enabled=false.
+        ServerConfig disabled = gw;
+        disabled.upstreams[0].circuit_breaker.enabled = false;
+        gateway.Reload(disabled);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        bool disabled_after = !slice->config().enabled;
+
+        // Reload back to enabled=true with a new threshold.
+        ServerConfig reenabled = gw;
+        reenabled.upstreams[0].circuit_breaker.enabled = true;
+        reenabled.upstreams[0].circuit_breaker.consecutive_failure_threshold = 11;
+        gateway.Reload(reenabled);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        bool enabled_again = slice->config().enabled;
+        int threshold_after = slice->config().consecutive_failure_threshold;
+
+        bool pass = enabled_before && disabled_after &&
+                    enabled_again && threshold_after == 11;
+        TestFramework::RecordTest(
+            "CB Reload: reload disable→enable", pass,
+            pass ? "" :
+            "enabled_before=" + std::to_string(enabled_before) +
+            " disabled_after=" + std::to_string(disabled_after) +
+            " enabled_again=" + std::to_string(enabled_again) +
+            " threshold_after=" + std::to_string(threshold_after));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Reload: reload disable→enable", false, e.what());
+    }
+}
+
+// Regression: a SIGHUP carrying an invalid CB threshold (e.g.
+// `consecutive_failure_threshold = 0`) on an EXISTING upstream must
+// be hard-rejected. The downgrade-to-warn behavior of the wider
+// `Validate()` call would otherwise push the bad value into live
+// slices even though startup rejects the same file.
+void TestReloadRejectsInvalidCbField() {
+    std::cout << "\n[TEST] CB Reload: invalid CB tuning is hard-rejected..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        gw.upstreams.push_back(
+            MakeReloadUpstream("svc", "127.0.0.1", backend_port));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+
+        // Build an invalid reload — threshold below the [1, 10000] range.
+        ServerConfig invalid = gw;
+        invalid.upstreams[0].circuit_breaker.consecutive_failure_threshold = 0;
+
+        bool reload_returned = gateway.Reload(invalid);
+        // The slice's threshold must NOT have been pushed live.
+        auto* cbm = gateway.GetUpstreamManager()->GetCircuitBreakerManager();
+        auto* slice = cbm->GetHost("svc")->GetSlice(0);
+        int live_threshold = slice->config().consecutive_failure_threshold;
+
+        bool pass = reload_returned == false && live_threshold == 3;
+        TestFramework::RecordTest(
+            "CB Reload: invalid CB tuning is hard-rejected", pass,
+            pass ? "" :
+            "reload_returned=" + std::to_string(reload_returned) +
+            " live_threshold=" + std::to_string(live_threshold) +
+            " (expected reload=false, threshold=3 unchanged)");
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Reload: invalid CB tuning is hard-rejected", false, e.what());
+    }
+}
+
+// Regression: with `dry_run=true`, the CLOSED→OPEN transition callback
+// must NOT drain the partition wait queue (shadow-mode contract: log
+// would-reject decisions, admit traffic). The breaker's dry_run check
+// inside the transition callback covers this; the regression we lock
+// in is the log-emitted breadcrumb plus the absence of CHECKOUT_CIRCUIT_OPEN
+// to queued waiters.
+void TestDryRunDoesNotDrainOnTrip() {
+    std::cout << "\n[TEST] CB Reload: dry-run skips wait-queue drain on trip..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        UpstreamConfig u = MakeReloadUpstream("svc", "127.0.0.1", backend_port);
+        u.circuit_breaker.dry_run = true;
+        u.circuit_breaker.consecutive_failure_threshold = 2;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+
+        auto ring = std::make_shared<
+            spdlog::sinks::ringbuffer_sink_mt>(1024);
+        auto logger = logging::Get();
+        auto prev_level = logger->level();
+        logger->set_level(spdlog::level::debug);
+        logger->sinks().push_back(ring);
+        struct SinkGuard {
+            std::shared_ptr<spdlog::logger> logger;
+            std::shared_ptr<spdlog::sinks::ringbuffer_sink_mt> ring;
+            spdlog::level::level_enum prev_level;
+            ~SinkGuard() {
+                auto& sinks = logger->sinks();
+                sinks.erase(std::remove(sinks.begin(), sinks.end(),
+                            std::shared_ptr<spdlog::sinks::sink>(ring)),
+                            sinks.end());
+                logger->set_level(prev_level);
+            }
+        } guard{logger, ring, prev_level};
+
+        int gw_port = gw_runner.GetPort();
+        // Trip the breaker via 2 failures.
+        for (int i = 0; i < 2; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+
+        bool saw_dryrun_drain_skip = false;
+        for (const auto& msg : ring->last_formatted()) {
+            if (msg.find("[dry-run] circuit breaker would drain wait queue") !=
+                std::string::npos) {
+                saw_dryrun_drain_skip = true;
+                break;
+            }
+        }
+
+        TestFramework::RecordTest(
+            "CB Reload: dry-run skips wait-queue drain on trip",
+            saw_dryrun_drain_skip,
+            saw_dryrun_drain_skip ? "" :
+            "expected '[dry-run] circuit breaker would drain wait queue' log line");
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Reload: dry-run skips wait-queue drain on trip", false, e.what());
+    }
+}
+
+// Regression: when `dry_run` flips true→false on a slice that's
+// currently OPEN, `Slice::Reload` fires a synthetic OPEN→OPEN
+// transition with trigger="dry_run_disabled". The HttpServer-installed
+// callback recognizes it and drains the partition queue so shadow-mode
+// waiters don't leak through to the upstream once enforcement is back on.
+void TestDryRunDisableOnOpenTriggersDrainSignal() {
+    std::cout << "\n[TEST] CB Reload: dry_run disable on OPEN triggers drain..."
+              << std::endl;
+    try {
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [](const HttpRequest&, HttpResponse& resp) {
+            resp.Status(502).Body("err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+        UpstreamConfig u = MakeReloadUpstream("svc", "127.0.0.1", backend_port);
+        u.circuit_breaker.dry_run = true;
+        u.circuit_breaker.consecutive_failure_threshold = 2;
+        u.circuit_breaker.base_open_duration_ms = 60000;  // long open window
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Trip the breaker (dry-run still records the trip; state goes OPEN).
+        for (int i = 0; i < 2; ++i) {
+            TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+        }
+
+        auto* cbm = gateway.GetUpstreamManager()->GetCircuitBreakerManager();
+        auto* slice = cbm->GetHost("svc")->GetSlice(0);
+        bool was_open = slice->CurrentState() == circuit_breaker::State::OPEN;
+
+        auto ring = std::make_shared<
+            spdlog::sinks::ringbuffer_sink_mt>(1024);
+        auto logger = logging::Get();
+        auto prev_level = logger->level();
+        logger->set_level(spdlog::level::debug);
+        logger->sinks().push_back(ring);
+        struct SinkGuard {
+            std::shared_ptr<spdlog::logger> logger;
+            std::shared_ptr<spdlog::sinks::ringbuffer_sink_mt> ring;
+            spdlog::level::level_enum prev_level;
+            ~SinkGuard() {
+                auto& sinks = logger->sinks();
+                sinks.erase(std::remove(sinks.begin(), sinks.end(),
+                            std::shared_ptr<spdlog::sinks::sink>(ring)),
+                            sinks.end());
+                logger->set_level(prev_level);
+            }
+        } guard{logger, ring, prev_level};
+
+        // Reload with dry_run=false, everything else same.
+        ServerConfig disable_dry = gw;
+        disable_dry.upstreams[0].circuit_breaker.dry_run = false;
+        gateway.Reload(disable_dry);
+        std::this_thread::sleep_for(std::chrono::milliseconds(150));
+
+        // The synthetic-callback fire path emits a slice-side log line.
+        bool saw_flush_log = false;
+        for (const auto& msg : ring->last_formatted()) {
+            if (msg.find("dry_run disabled while OPEN") != std::string::npos &&
+                msg.find("flushing wait queue") != std::string::npos) {
+                saw_flush_log = true;
+                break;
+            }
+        }
+        bool live_dry_run = slice->config().dry_run;
+        bool still_open = slice->CurrentState() == circuit_breaker::State::OPEN;
+
+        bool pass = was_open && !live_dry_run && saw_flush_log && still_open;
+        TestFramework::RecordTest(
+            "CB Reload: dry_run disable on OPEN triggers drain", pass,
+            pass ? "" :
+            "was_open=" + std::to_string(was_open) +
+            " live_dry_run=" + std::to_string(live_dry_run) +
+            " saw_flush_log=" + std::to_string(saw_flush_log) +
+            " still_open=" + std::to_string(still_open));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Reload: dry_run disable on OPEN triggers drain", false,
+            e.what());
+    }
+}
+
+void RunAllTests() {
+    std::cout << "\n" << std::string(60, '=') << std::endl;
+    std::cout << "CIRCUIT BREAKER - HOT-RELOAD TESTS" << std::endl;
+    std::cout << std::string(60, '=') << std::endl;
+
+    TestCbReloadPropagatesToSlice();
+    TestCbOnlyReloadNoRestartWarn();
+    TestTopologyChangeStillEmitsRestartWarn();
+    TestReloadDisableThenEnable();
+    TestReloadRejectsInvalidCbField();
+    TestDryRunDoesNotDrainOnTrip();
+    TestDryRunDisableOnOpenTriggersDrainSignal();
+}
+
+}  // namespace CircuitBreakerReloadTests
diff --git a/test/circuit_breaker_retry_budget_test.h b/test/circuit_breaker_retry_budget_test.h
new file mode 100644
index 00000000..608a0602
--- /dev/null
+++ b/test/circuit_breaker_retry_budget_test.h
@@ -0,0 +1,367 @@
+#pragma once
+
+// Retry-budget integration tests: retry budget wired into ProxyTransaction.
+//
+// The component suite covers the RetryBudget math (CAS, non-retry
+// denominator, min-concurrency floor) as unit tests against the
+// RetryBudget class in isolation. This suite tests the INTEGRATION:
+// ProxyTransaction resolves
+// `retry_budget_` from the same CircuitBreakerHost as `slice_`, tracks
+// every attempt's in_flight via the RAII guard, and consults
+// `TryConsumeRetry` before each retry. Exhaustion emits the §12.2
+// response (503 + `X-Retry-Budget-Exhausted: 1`) and does NOT feed
+// back into the slice's failure math.
+//
+// Strategy: backends that always 502 with `retry_on_5xx=true` drive the
+// retry path. A near-zero retry-budget (`percent=0, min_concurrency=0`)
+// rejects every retry deterministically without needing concurrent
+// client load. The circuit-breaker consecutive-failure threshold is
+// raised well above the retry count so the breaker stays CLOSED — the
+// budget gate is tested in isolation from the state machine.
+
+#include "test_framework.h"
+#include "test_server_runner.h"
+#include "http_test_client.h"
+#include "http/http_server.h"
+#include "config/server_config.h"
+
+#include <thread>
+#include <chrono>
+#include <atomic>
+#include <vector>
+
+namespace CircuitBreakerRetryBudgetTests {
+
+// Upstream config that always proxies /fail, with the circuit breaker
+// enabled so `retry_budget_` is resolved on `slice_`'s host. Breaker
+// thresholds intentionally unreachable for these tests — we want the
+// retry-budget gate fired in isolation, not co-tripping the state
+// machine.
+static UpstreamConfig MakeRetryBudgetUpstream(const std::string& name,
+                                              const std::string& host,
+                                              int port,
+                                              int retry_budget_percent,
+                                              int retry_budget_min_concurrency,
+                                              bool dry_run = false) {
+    UpstreamConfig u;
+    u.name = name;
+    u.host = host;
+    u.port = port;
+    u.pool.max_connections       = 16;
+    u.pool.max_idle_connections  = 8;
+    u.pool.connect_timeout_ms    = 3000;
+    u.pool.idle_timeout_sec      = 30;
+    u.pool.max_lifetime_sec      = 3600;
+    u.pool.max_requests_per_conn = 0;
+
+    u.proxy.route_prefix = "/fail";
+    u.proxy.strip_prefix = false;
+    u.proxy.response_timeout_ms = 2000;
+
+    u.circuit_breaker.enabled = true;
+    u.circuit_breaker.dry_run = dry_run;
+    // Breaker thresholds unreachable — we don't want the state machine
+    // tripping during a retry-budget test.
+    u.circuit_breaker.consecutive_failure_threshold = 10000;
+    u.circuit_breaker.failure_rate_threshold = 100;
+    u.circuit_breaker.minimum_volume = 10000;
+    u.circuit_breaker.window_seconds = 10;
+    u.circuit_breaker.permitted_half_open_calls = 2;
+    u.circuit_breaker.base_open_duration_ms = 30000;
+    u.circuit_breaker.max_open_duration_ms  = 60000;
+
+    u.circuit_breaker.retry_budget_percent = retry_budget_percent;
+    u.circuit_breaker.retry_budget_min_concurrency = retry_budget_min_concurrency;
+    return u;
+}
+
+static bool HasRetryBudgetHeader(const std::string& response) {
+    return response.find("X-Retry-Budget-Exhausted: 1") != std::string::npos ||
+           response.find("x-retry-budget-exhausted: 1") != std::string::npos;
+}
+
+// ---------------------------------------------------------------------------
+// Test 1: A retry attempt rejected by the retry-budget gate delivers 503 +
+// X-Retry-Budget-Exhausted instead of the upstream's 5xx. Verifies that
+// `TryConsumeRetry` runs BEFORE the retry executes and that
+// `MakeRetryBudgetResponse` is emitted through the standard DeliverResponse
+// path.
+//
+// retry_budget_percent=0 + retry_budget_min_concurrency=0 → cap = 0. Every
+// retry attempt's TryConsumeRetry returns false. First attempt is
+// unaffected (budget only gates retries), so the backend is hit exactly
+// once per client request; the retry is short-circuited locally.
+// ---------------------------------------------------------------------------
+void TestRetryBudgetRejectsRetry() {
+    std::cout << "\n[TEST] CB Retry Budget: retry budget rejects retry..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        auto u = MakeRetryBudgetUpstream("svc", "127.0.0.1", backend_port,
+                                         /*percent=*/0,
+                                         /*min_concurrency=*/0);
+        u.proxy.retry.max_retries = 3;
+        u.proxy.retry.retry_on_5xx = true;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 5000);
+
+        bool is_503 = TestHttpClient::HasStatus(r, 503);
+        bool has_budget_hdr = HasRetryBudgetHeader(r);
+        // Backend should have been hit exactly once (the first attempt);
+        // every retry was short-circuited by the budget gate.
+        int hits = backend_hits.load(std::memory_order_relaxed);
+        bool single_backend_hit = (hits == 1);
+
+        bool pass = is_503 && has_budget_hdr && single_backend_hit;
+        TestFramework::RecordTest(
+            "CB Retry Budget: retry budget rejects retry", pass,
+            pass ? "" :
+            "is_503=" + std::to_string(is_503) +
+            " budget_hdr=" + std::to_string(has_budget_hdr) +
+            " backend_hits=" + std::to_string(hits) +
+            " body=" + r.substr(0, 256));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Retry Budget: retry budget rejects retry", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: The min-concurrency floor admits retries even when the %-based
+// cap would be zero. With percent=0 + min_concurrency=5, a single sequential
+// client request's retry chain (1 first + 3 retries = 4 backend hits) all
+// fit under the floor and proceed normally to the upstream — no 503, no
+// X-Retry-Budget-Exhausted, and the client sees the final 5xx response.
+//
+// This is the symmetric test to Test 1: same near-zero %-cap, but a floor
+// large enough that retries aren't budget-gated. Proves the floor is
+// consulted (retries admitted) instead of the %-cap (retries rejected).
+// ---------------------------------------------------------------------------
+void TestRetryBudgetMinConcurrencyFloor() {
+    std::cout << "\n[TEST] CB Retry Budget: retry budget min-concurrency floor..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        // percent=0 → no %-based capacity. min_concurrency=5 → floor
+        // admits up to 5 concurrent retries, easily covering the 3
+        // sequential retries from a single client request.
+        auto u = MakeRetryBudgetUpstream("svc", "127.0.0.1", backend_port,
+                                         /*percent=*/0,
+                                         /*min_concurrency=*/5);
+        u.proxy.retry.max_retries = 3;
+        u.proxy.retry.retry_on_5xx = true;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 10000);
+
+        // Client sees the upstream's final 502 — no local 503, no
+        // X-Retry-Budget-Exhausted.
+        bool is_502 = TestHttpClient::HasStatus(r, 502);
+        bool no_budget_hdr = !HasRetryBudgetHeader(r);
+        // 1 first attempt + 3 retries admitted by the floor = 4 backend hits.
+        int hits = backend_hits.load(std::memory_order_relaxed);
+        bool all_retries_proceeded = (hits == 4);
+
+        bool pass = is_502 && no_budget_hdr && all_retries_proceeded;
+        TestFramework::RecordTest(
+            "CB Retry Budget: retry budget min-concurrency floor", pass,
+            pass ? "" :
+            "is_502=" + std::to_string(is_502) +
+            " no_budget_hdr=" + std::to_string(no_budget_hdr) +
+            " backend_hits=" + std::to_string(hits) +
+            " body=" + r.substr(0, 256));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Retry Budget: retry budget min-concurrency floor", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 3: Dry-run bypasses the retry-budget gate.
+//
+// With percent=0 + min_concurrency=0 (same as Test 1), TryConsumeRetry
+// returns false for every retry. But `circuit_breaker.dry_run=true`
+// switches the rejection path to a log-and-proceed: no token is
+// consumed, retry_token_held_ stays false, and AttemptCheckout runs as
+// though the budget was unlimited.
+//
+// Result: the client sees the upstream's 502 response (because the
+// retries actually fire), NOT a 503 + X-Retry-Budget-Exhausted.
+// ---------------------------------------------------------------------------
+void TestRetryBudgetDryRunPassthrough() {
+    std::cout << "\n[TEST] CB Retry Budget: retry budget dry-run passthrough..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        auto u = MakeRetryBudgetUpstream("svc", "127.0.0.1", backend_port,
+                                         /*percent=*/0,
+                                         /*min_concurrency=*/0,
+                                         /*dry_run=*/true);
+        u.proxy.retry.max_retries = 2;
+        u.proxy.retry.retry_on_5xx = true;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 10000);
+
+        // Retries proceeded despite would-reject decisions — the client
+        // sees the upstream's final 502, not our local 503.
+        bool is_502 = TestHttpClient::HasStatus(r, 502);
+        bool no_budget_hdr = !HasRetryBudgetHeader(r);
+        int hits = backend_hits.load(std::memory_order_relaxed);
+        bool all_attempts_ran = (hits == 3);  // 1 first + 2 retries
+
+        bool pass = is_502 && no_budget_hdr && all_attempts_ran;
+        TestFramework::RecordTest(
+            "CB Retry Budget: retry budget dry-run passthrough", pass,
+            pass ? "" :
+            "is_502=" + std::to_string(is_502) +
+            " no_budget_hdr=" + std::to_string(no_budget_hdr) +
+            " backend_hits=" + std::to_string(hits) +
+            " body=" + r.substr(0, 256));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Retry Budget: retry budget dry-run passthrough", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 4: First attempts are NOT budget-gated.
+//
+// The retry-budget cap applies only to retries (attempt_ > 0). First
+// attempts call TrackInFlight (which only ever increments) but skip
+// TryConsumeRetry entirely. With percent=0 + min_concurrency=0 and a
+// backend that always 200s, every client request must succeed — if the
+// gate accidentally ran on first attempts, we'd see 503s here.
+//
+// Guards against a regression where TryConsumeRetry is called before
+// the `attempt_ > 0` gate, or where the gate is placed in
+// AttemptCheckout instead of MaybeRetry.
+// ---------------------------------------------------------------------------
+void TestFirstAttemptsNotGated() {
+    std::cout << "\n[TEST] CB Retry Budget: first attempts not gated..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            resp.Status(200).Body("ok", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        auto u = MakeRetryBudgetUpstream("svc", "127.0.0.1", backend_port,
+                                         /*percent=*/0,
+                                         /*min_concurrency=*/0);
+        // No retries — every request is a first attempt.
+        u.proxy.retry.max_retries = 0;
+        gw.upstreams.push_back(u);
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        int client_count = 5;
+        int successes = 0;
+        for (int i = 0; i < client_count; ++i) {
+            std::string r = TestHttpClient::HttpGet(gw_port, "/fail", 3000);
+            if (TestHttpClient::HasStatus(r, 200)) ++successes;
+            if (HasRetryBudgetHeader(r)) {
+                // Any X-Retry-Budget-Exhausted on a first-attempt-only
+                // path is a bug. Record and bail.
+                TestFramework::RecordTest(
+                    "CB Retry Budget: first attempts not gated", false,
+                    "unexpected X-Retry-Budget-Exhausted on first-attempt path "
+                    "i=" + std::to_string(i));
+                return;
+            }
+        }
+
+        int hits = backend_hits.load(std::memory_order_relaxed);
+        bool pass = (successes == client_count) && (hits == client_count);
+        TestFramework::RecordTest(
+            "CB Retry Budget: first attempts not gated", pass,
+            pass ? "" :
+            "successes=" + std::to_string(successes) +
+            "/" + std::to_string(client_count) +
+            " backend_hits=" + std::to_string(hits));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Retry Budget: first attempts not gated", false, e.what());
+    }
+}
+
+void RunAllTests() {
+    std::cout << "\n" << std::string(60, '=') << std::endl;
+    std::cout << "CIRCUIT BREAKER - RETRY BUDGET INTEGRATION TESTS"
+              << std::endl;
+    std::cout << std::string(60, '=') << std::endl;
+
+    TestRetryBudgetRejectsRetry();
+    TestRetryBudgetMinConcurrencyFloor();
+    TestRetryBudgetDryRunPassthrough();
+    TestFirstAttemptsNotGated();
+}
+
+}  // namespace CircuitBreakerRetryBudgetTests
diff --git a/test/circuit_breaker_test.h b/test/circuit_breaker_test.h
new file mode 100644
index 00000000..bed54da0
--- /dev/null
+++ b/test/circuit_breaker_test.h
@@ -0,0 +1,2070 @@
+#pragma once
+
+#include "test_framework.h"
+#include "config/server_config.h"
+#include "circuit_breaker/circuit_breaker_state.h"
+#include "circuit_breaker/circuit_breaker_window.h"
+#include "circuit_breaker/circuit_breaker_slice.h"
+
+#include <chrono>
+#include <iostream>
+#include <string>
+
+namespace CircuitBreakerTests {
+
+using circuit_breaker::CircuitBreakerSlice;
+using circuit_breaker::CircuitBreakerWindow;
+using circuit_breaker::Decision;
+using circuit_breaker::FailureKind;
+using circuit_breaker::State;
+
+// A simple mock clock that advances only when the test tells it to.
+class MockClock {
+public:
+    std::chrono::steady_clock::time_point now{
+        // Choose a non-zero base so 0 is distinguishable from "not OPEN".
+        std::chrono::steady_clock::time_point(std::chrono::seconds(1'000'000))
+    };
+    void Advance(std::chrono::milliseconds ms) { now += ms; }
+    void AdvanceSec(int seconds) { now += std::chrono::seconds(seconds); }
+    std::chrono::steady_clock::time_point operator()() const { return now; }
+};
+
+// Build a config with default values — tests override specific fields.
+static CircuitBreakerConfig DefaultEnabledConfig() {
+    CircuitBreakerConfig cb;
+    cb.enabled = true;
+    cb.consecutive_failure_threshold = 5;
+    cb.failure_rate_threshold = 50;
+    cb.minimum_volume = 20;
+    cb.window_seconds = 10;
+    cb.permitted_half_open_calls = 5;
+    cb.base_open_duration_ms = 5000;
+    cb.max_open_duration_ms = 60000;
+    return cb;
+}
+
+// ============================================================================
+// State machine tests
+// ============================================================================
+
+void TestDisabledFastPath() {
+    std::cout << "\n[TEST] CB: Disabled fast path..." << std::endl;
+    try {
+        CircuitBreakerConfig cb;   // enabled=false by default
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        bool pass = slice.TryAcquire().decision == Decision::ADMITTED &&
+                    slice.CurrentState() == State::CLOSED;
+
+        // Reporting 100 failures must not trip.
+        for (int i = 0; i < 100; ++i) {
+            slice.ReportFailure(FailureKind::CONNECT_FAILURE, false, slice.CurrentGenerationForTesting());
+        }
+        pass = pass && slice.CurrentState() == State::CLOSED &&
+               slice.Trips() == 0;
+
+        TestFramework::RecordTest("CB: disabled fast path", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: disabled fast path", false, e.what(),
+            TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestClosedStaysClosedBelowConsecutiveThreshold() {
+    std::cout << "\n[TEST] CB: 4 failures below threshold..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 4; ++i) {
+            slice.ReportFailure(FailureKind::CONNECT_FAILURE, false, slice.CurrentGenerationForTesting());
+        }
+        bool pass = slice.CurrentState() == State::CLOSED &&
+                    slice.TryAcquire().decision == Decision::ADMITTED &&
+                    slice.Trips() == 0;
+        TestFramework::RecordTest("CB: 4 failures below threshold", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: 4 failures below threshold", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestConsecutiveFailureTrip() {
+    std::cout << "\n[TEST] CB: 5 consecutive failures trip..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        bool pass = slice.CurrentState() == State::OPEN &&
+                    slice.Trips() == 1 &&
+                    slice.TryAcquire().decision == Decision::REJECTED_OPEN;
+        TestFramework::RecordTest("CB: 5 consecutive failures trip", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: 5 consecutive failures trip", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestFailureRateTrip() {
+    std::cout << "\n[TEST] CB: failure-rate trip (50% of 20)..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.consecutive_failure_threshold = 1000;  // disable consec path
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Alternate 10 failures and 10 successes within the same second —
+        // ratio = 50%, total = 20 (>= minimum_volume).
+        for (int i = 0; i < 10; ++i) {
+            slice.ReportSuccess(false, slice.CurrentGenerationForTesting());
+        }
+        // A success between-failures clears consecutive_failures_, confirming
+        // only rate path can trip here.
+        for (int i = 0; i < 9; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        // Still CLOSED — 9/19 < 50%.
+        bool pass_pre = slice.CurrentState() == State::CLOSED;
+        // 10th failure brings ratio to 10/20 = 50% exactly — tripper.
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        bool pass = pass_pre && slice.CurrentState() == State::OPEN &&
+                    slice.Trips() == 1;
+        TestFramework::RecordTest("CB: failure-rate trip (50% of 20)", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: failure-rate trip (50% of 20)", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestMinimumVolumeGate() {
+    std::cout << "\n[TEST] CB: minimum_volume gate..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.consecutive_failure_threshold = 1000;  // disable consec path
+        cb.minimum_volume = 20;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // 19 total calls, all failures — should NOT trip (below volume).
+        for (int i = 0; i < 19; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        bool pass = slice.CurrentState() == State::CLOSED && slice.Trips() == 0;
+        TestFramework::RecordTest("CB: minimum_volume gate", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: minimum_volume gate", false, e.what(),
+            TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestOpenBeforeDurationStaysOpen() {
+    std::cout << "\n[TEST] CB: OPEN rejects before elapsed..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        // Advance less than base_open_duration_ms (5000ms).
+        clock->Advance(std::chrono::milliseconds(2000));
+        Decision d = slice.TryAcquire().decision;
+        bool pass = d == Decision::REJECTED_OPEN &&
+                    slice.CurrentState() == State::OPEN;
+        TestFramework::RecordTest("CB: OPEN rejects before elapsed", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: OPEN rejects before elapsed", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestOpenToHalfOpenAfterDuration() {
+    std::cout << "\n[TEST] CB: OPEN → HALF_OPEN after duration..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+        Decision d = slice.TryAcquire().decision;
+        bool pass = d == Decision::ADMITTED_PROBE &&
+                    slice.CurrentState() == State::HALF_OPEN;
+        TestFramework::RecordTest("CB: OPEN -> HALF_OPEN after duration", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: OPEN -> HALF_OPEN after duration",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestHalfOpenAllProbesSucceed() {
+    std::cout << "\n[TEST] CB: HALF_OPEN 5 probe successes close..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // Take 5 probes; report success on each.
+        for (int i = 0; i < cb.permitted_half_open_calls; ++i) {
+            Decision d = slice.TryAcquire().decision;
+            if (d != Decision::ADMITTED_PROBE) {
+                TestFramework::RecordTest(
+                    "CB: HALF_OPEN 5 probe successes close", false,
+                    "probe " + std::to_string(i) + " not ADMITTED_PROBE",
+                    TestFramework::TestCategory::OTHER);
+                return;
+            }
+            slice.ReportSuccess(true, slice.CurrentGenerationForTesting());
+        }
+        bool pass = slice.CurrentState() == State::CLOSED &&
+                    slice.ProbeSuccesses() == 5;
+        TestFramework::RecordTest("CB: HALF_OPEN 5 probe successes close",
+            pass, "", TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: HALF_OPEN 5 probe successes close",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestHalfOpenProbeFailureReopens() {
+    std::cout << "\n[TEST] CB: HALF_OPEN single probe fail re-opens..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // Take 1 probe, fail it.
+        Decision d = slice.TryAcquire().decision;
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, slice.CurrentGenerationForTesting());
+        bool pass = d == Decision::ADMITTED_PROBE &&
+                    slice.CurrentState() == State::OPEN &&
+                    slice.Trips() == 2 &&  // initial trip + re-trip
+                    slice.ProbeFailures() == 1;
+        TestFramework::RecordTest("CB: HALF_OPEN probe fail re-opens", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: HALF_OPEN probe fail re-opens", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestHalfOpenExhaustedSlotsRejected() {
+    std::cout << "\n[TEST] CB: HALF_OPEN over capacity rejects..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+        // Take 5 probes but DON'T report outcomes yet.
+        for (int i = 0; i < 5; ++i) slice.TryAcquire();
+        // 6th TryAcquire must reject (all slots taken).
+        Decision d = slice.TryAcquire().decision;
+        bool pass = d == Decision::REJECTED_OPEN;
+        TestFramework::RecordTest("CB: HALF_OPEN over capacity rejects",
+            pass, "", TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: HALF_OPEN over capacity rejects",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestExponentialBackoff() {
+    std::cout << "\n[TEST] CB: exponential backoff progression..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.base_open_duration_ms = 1000;
+        cb.max_open_duration_ms = 8000;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        auto trip_then_probe_fail = [&]() {
+            // Reach OPEN.
+            for (int i = 0; i < 5; ++i) {
+                slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+            }
+        };
+        auto measure_open_ms = [&]() {
+            // open_until - now at the instant of the trip.
+            auto open_until = slice.OpenUntil();
+            auto remaining = open_until - clock->now;
+            return std::chrono::duration_cast<std::chrono::milliseconds>(
+                       remaining).count();
+        };
+
+        // Trip 1 — expect ~1000ms.
+        trip_then_probe_fail();
+        int64_t d1 = measure_open_ms();
+        // Move to HALF_OPEN and fail the probe → trip 2.
+        clock->Advance(std::chrono::milliseconds(d1 + 1));
+        slice.TryAcquire();  // HALF_OPEN, ADMITTED_PROBE
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, slice.CurrentGenerationForTesting());
+        int64_t d2 = measure_open_ms();
+        clock->Advance(std::chrono::milliseconds(d2 + 1));
+        slice.TryAcquire();
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, slice.CurrentGenerationForTesting());
+        int64_t d3 = measure_open_ms();
+        clock->Advance(std::chrono::milliseconds(d3 + 1));
+        slice.TryAcquire();
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, slice.CurrentGenerationForTesting());
+        int64_t d4 = measure_open_ms();
+        clock->Advance(std::chrono::milliseconds(d4 + 1));
+        slice.TryAcquire();
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, slice.CurrentGenerationForTesting());
+        int64_t d5 = measure_open_ms();
+
+        // Expect 1000, 2000, 4000, 8000, 8000 (capped).
+        bool pass = d1 == 1000 && d2 == 2000 && d3 == 4000 &&
+                    d4 == 8000 && d5 == 8000;
+        std::string err = "d1=" + std::to_string(d1) + " d2=" + std::to_string(d2) +
+                          " d3=" + std::to_string(d3) + " d4=" + std::to_string(d4) +
+                          " d5=" + std::to_string(d5);
+        TestFramework::RecordTest("CB: exponential backoff",
+            pass, pass ? "" : err, TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: exponential backoff", false, e.what(),
+            TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestResetOnClose() {
+    std::cout << "\n[TEST] CB: consecutive_trips resets on close..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.base_open_duration_ms = 1000;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip 1.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(1001));
+        // Move to HALF_OPEN.
+        for (int i = 0; i < 5; ++i) {
+            slice.TryAcquire();
+            slice.ReportSuccess(true, slice.CurrentGenerationForTesting());
+        }
+        // Now CLOSED. Trip again — expect base_duration again (not doubled).
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        auto open_until = slice.OpenUntil();
+        auto remaining = open_until - clock->now;
+        int64_t d_after_close = std::chrono::duration_cast<
+            std::chrono::milliseconds>(remaining).count();
+        bool pass = d_after_close == 1000;
+        TestFramework::RecordTest("CB: trips reset on close", pass,
+            pass ? "" : "expected 1000ms, got " + std::to_string(d_after_close),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: trips reset on close", false, e.what(),
+            TestFramework::TestCategory::OTHER);
+    }
+}
+
+// ============================================================================
+// Window tests
+// ============================================================================
+
+void TestWindowBucketByCurrentSecond() {
+    std::cout << "\n[TEST] CB Window: bucket by current second..." << std::endl;
+    try {
+        CircuitBreakerWindow w(10);
+        auto t0 = std::chrono::steady_clock::time_point(std::chrono::seconds(100));
+        w.AddSuccess(t0);
+        w.AddFailure(t0);
+        w.AddFailure(t0);
+        bool pass = w.TotalCount(t0) == 3 && w.FailureCount(t0) == 2;
+        TestFramework::RecordTest("CB Window: bucket by current second", pass,
+            "", TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB Window: bucket by current second",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestWindowAdvanceSkipsStale() {
+    std::cout << "\n[TEST] CB Window: advance skips stale..." << std::endl;
+    try {
+        CircuitBreakerWindow w(10);
+        auto t0 = std::chrono::steady_clock::time_point(std::chrono::seconds(100));
+        w.AddFailure(t0);  // bucket 100%10 = 0
+        auto t1 = t0 + std::chrono::seconds(15);  // beyond window
+        // After long idle, incoming record should see zero history.
+        bool pre = w.TotalCount(t1) == 0;
+        w.AddSuccess(t1);
+        bool pass = pre && w.TotalCount(t1) == 1 && w.FailureCount(t1) == 0;
+        TestFramework::RecordTest("CB Window: advance skips stale", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB Window: advance skips stale", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestWindowPartialExpiry() {
+    std::cout << "\n[TEST] CB Window: partial expiry..." << std::endl;
+    try {
+        CircuitBreakerWindow w(10);
+        auto t0 = std::chrono::steady_clock::time_point(std::chrono::seconds(100));
+        w.AddFailure(t0);               // sec 100
+        auto t1 = t0 + std::chrono::seconds(5);
+        w.AddFailure(t1);               // sec 105
+        auto t2 = t0 + std::chrono::seconds(11);
+        // sec 100 is now out of window (100 + 10 <= 111 - 1 = 110). So:
+        // bucket 0 (sec 100 or sec 110) would have been zeroed when advancing
+        // from head=105 past sec 110.
+        bool pass = w.TotalCount(t2) == 1 && w.FailureCount(t2) == 1;
+        TestFramework::RecordTest("CB Window: partial expiry", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB Window: partial expiry", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestWindowReset() {
+    std::cout << "\n[TEST] CB Window: reset clears..." << std::endl;
+    try {
+        CircuitBreakerWindow w(10);
+        auto t0 = std::chrono::steady_clock::time_point(std::chrono::seconds(100));
+        w.AddFailure(t0); w.AddSuccess(t0); w.AddFailure(t0);
+        w.Reset();
+        bool pass = w.TotalCount(t0) == 0 && w.FailureCount(t0) == 0;
+        TestFramework::RecordTest("CB Window: reset clears", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB Window: reset clears", false, e.what(),
+            TestFramework::TestCategory::OTHER);
+    }
+}
+
+// ============================================================================
+// Dry-run + Reload + Edge cases
+// ============================================================================
+
+void TestDryRunAdmits() {
+    std::cout << "\n[TEST] CB: dry_run admits through OPEN..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.dry_run = true;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        // OPEN + dry_run → REJECTED_OPEN_DRYRUN (caller proceeds).
+        Decision d = slice.TryAcquire().decision;
+        bool pass = d == Decision::REJECTED_OPEN_DRYRUN &&
+                    slice.CurrentState() == State::OPEN &&
+                    slice.Rejected() == 1;
+        TestFramework::RecordTest("CB: dry_run admits through OPEN", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: dry_run admits through OPEN", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestReloadPreservesState() {
+    std::cout << "\n[TEST] CB: reload preserves live state..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        // OPEN at this point.
+        auto cb2 = cb;
+        cb2.consecutive_failure_threshold = 2;  // tighter
+        cb2.window_seconds = 30;                // triggers ring resize
+        slice.Reload(cb2);
+        // Still OPEN immediately after reload — live state preserved.
+        bool pass = slice.CurrentState() == State::OPEN;
+        TestFramework::RecordTest("CB: reload preserves live state", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: reload preserves live state", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestConsecutiveThresholdOne() {
+    std::cout << "\n[TEST] CB: threshold=1 single failure trips..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.consecutive_failure_threshold = 1;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        bool pass = slice.CurrentState() == State::OPEN && slice.Trips() == 1;
+        TestFramework::RecordTest("CB: threshold=1 single failure trips",
+            pass, "", TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: threshold=1 single failure trips",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestSuccessClearsConsecutive() {
+    std::cout << "\n[TEST] CB: success clears consecutive..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 4; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        slice.ReportSuccess(false, slice.CurrentGenerationForTesting());  // resets consecutive
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        // consecutive is back to 1, no trip.
+        bool pass = slice.CurrentState() == State::CLOSED;
+        TestFramework::RecordTest("CB: success clears consecutive", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: success clears consecutive", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// ============================================================================
+// Regression tests — critical bugs caught in code review
+// ============================================================================
+
+// BUG: late non-probe failure after trip re-entered TripClosedToOpen, inflating
+// consecutive_trips_ (→ longer backoff) and firing a spurious CLOSED→OPEN
+// transition edge. Fix: guard ReportFailure(probe=false) on state_ == CLOSED.
+void TestLateFailureAfterTripDoesNotInflateBackoff() {
+    std::cout << "\n[TEST] CB: late failure after trip does not inflate backoff..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.base_open_duration_ms = 1000;
+        cb.max_open_duration_ms = 60000;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Admit 10 requests in CLOSED. Slice state is single-threaded so
+        // admission + bookkeeping is serialized by the event loop — but in
+        // production the outcomes for those admitted requests can arrive after
+        // the slice has already tripped.
+        for (int i = 0; i < 10; ++i) {
+            Decision d = slice.TryAcquire().decision;
+            if (d != Decision::ADMITTED) {
+                TestFramework::RecordTest("CB: late failure after trip",
+                    false, "admission i=" + std::to_string(i) + " not ADMITTED",
+                    TestFramework::TestCategory::OTHER);
+                return;
+            }
+        }
+        // Report 5 failures — trip at the 5th.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        if (slice.CurrentState() != State::OPEN) {
+            TestFramework::RecordTest("CB: late failure after trip", false,
+                "expected OPEN after 5 failures",
+                TestFramework::TestCategory::OTHER);
+            return;
+        }
+        int64_t trips_after_first_trip = slice.Trips();
+        // Capture open_until immediately post-trip.
+        auto open_until_initial = slice.OpenUntil();
+
+        // Now the remaining 5 in-flight requests land with late failures.
+        // Before the fix, each of these would go through the CLOSED path,
+        // climb consecutive_failures_, and trigger another TripClosedToOpen
+        // even though state is already OPEN.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        int64_t trips_after_late = slice.Trips();
+        auto open_until_after_late = slice.OpenUntil();
+
+        bool pass = slice.CurrentState() == State::OPEN &&
+                    trips_after_late == trips_after_first_trip &&  // no ghost trip
+                    open_until_after_late == open_until_initial;    // backoff unchanged
+        TestFramework::RecordTest(
+            "CB: late failure after trip does not inflate backoff",
+            pass, pass ? "" :
+                  "trips: " + std::to_string(trips_after_first_trip) +
+                  " → " + std::to_string(trips_after_late),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: late failure after trip does not inflate backoff",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG: late non-probe success after trip would reset consecutive_failures_
+// and pollute the sliding window (pretending a fresh CLOSED cycle observed
+// successes). Fix: guard ReportSuccess(probe=false) on state_ == CLOSED.
+void TestLateSuccessAfterTripIgnored() {
+    std::cout << "\n[TEST] CB: late success after trip ignored..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        // Slice is OPEN now. A late success arrives — must not change state.
+        State pre = slice.CurrentState();
+        slice.ReportSuccess(false, slice.CurrentGenerationForTesting());
+        bool pass = pre == State::OPEN && slice.CurrentState() == State::OPEN;
+        TestFramework::RecordTest("CB: late success after trip ignored", pass,
+            "", TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: late success after trip ignored",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG: HALF_OPEN admission kept accepting probes after the first probe
+// failure (only enforcing `inflight < permitted`), so under load a failed
+// recovery cycle could keep leaking traffic indefinitely instead of re-OPENing
+// after the in-flight probes drained. Fix: short-circuit on saw_failure.
+void TestHalfOpenStopsAdmittingAfterFirstProbeFailure() {
+    std::cout << "\n[TEST] CB: HALF_OPEN stops admitting after probe fail..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.permitted_half_open_calls = 5;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip the breaker.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // Admit 2 probes. Report failure on the first (but NOT the second yet
+        // — leave 1 in-flight so we can observe the short-circuit).
+        Decision d1 = slice.TryAcquire().decision;   // ADMITTED_PROBE, inflight=1
+        Decision d2 = slice.TryAcquire().decision;   // ADMITTED_PROBE, inflight=2
+        if (d1 != Decision::ADMITTED_PROBE || d2 != Decision::ADMITTED_PROBE) {
+            TestFramework::RecordTest(
+                "CB: HALF_OPEN stops admitting after probe fail",
+                false, "probes not admitted as expected",
+                TestFramework::TestCategory::OTHER);
+            return;
+        }
+        // Fail the first probe — inflight drops to 1, saw_failure=true.
+        // Last-probe trip does not yet fire (inflight is still 1).
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, slice.CurrentGenerationForTesting());
+
+        // State must still be HALF_OPEN (final probe not yet completed).
+        State mid = slice.CurrentState();
+
+        // Subsequent TryAcquire — BEFORE fix this would succeed because
+        // inflight (1) < permitted (5). AFTER fix it short-circuits because
+        // saw_failure is set.
+        Decision d3 = slice.TryAcquire().decision;
+
+        bool pass = mid == State::HALF_OPEN &&
+                    d3 == Decision::REJECTED_OPEN;
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN stops admitting after probe fail",
+            pass, pass ? "" : "expected REJECTED_OPEN on 3rd TryAcquire",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN stops admitting after probe fail",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Verifies the dedicated HALF_OPEN-full counter is bumped separately from the
+// generic `rejected_` counter, so observability snapshots can distinguish
+// "open, backoff not elapsed" from "probing, no slots left".
+void TestHalfOpenFullCounterSeparate() {
+    std::cout << "\n[TEST] CB: HALF_OPEN_FULL counter separate..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.permitted_half_open_calls = 2;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip → OPEN reject increments generic counter only.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        slice.TryAcquire();  // REJECTED_OPEN (backoff active)
+        int64_t rejected_open_only = slice.Rejected();
+        int64_t half_open_full_open_only = slice.RejectedHalfOpenFull();
+
+        // Elapse backoff → HALF_OPEN. Fill the probe budget, then a 3rd
+        // TryAcquire rejects with half_open_full, incrementing both counters.
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+        slice.TryAcquire();                  // probe 1 admitted
+        slice.TryAcquire();                  // probe 2 admitted (budget full)
+        slice.TryAcquire();                  // REJECTED (full)
+        int64_t rejected_total = slice.Rejected();
+        int64_t half_open_full_total = slice.RejectedHalfOpenFull();
+
+        bool pass = rejected_open_only == 1 &&
+                    half_open_full_open_only == 0 &&
+                    rejected_total == 2 &&            // 1 OPEN + 1 HALF_OPEN_FULL
+                    half_open_full_total == 1;        // only the HALF_OPEN one
+        TestFramework::RecordTest("CB: HALF_OPEN_FULL counter separate",
+            pass, pass ? "" :
+                  "rej=" + std::to_string(rejected_total) +
+                  " hof=" + std::to_string(half_open_full_total),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: HALF_OPEN_FULL counter separate",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 2, P2): Reload preserved stale state across enabled
+// toggles. Disabling while OPEN and re-enabling later resumed the OPEN state,
+// rejecting requests despite an explicit operator off→on cycle. Disabling
+// after accumulated consecutive failures would re-trip on the very next
+// failure. Fix: reset state to CLOSED whenever enabled toggles.
+void TestReloadResetsStateOnEnabledToggleWhileOpen() {
+    std::cout << "\n[TEST] CB: reload resets state on enabled toggle (while OPEN)..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Drive to OPEN.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        if (slice.CurrentState() != State::OPEN) {
+            TestFramework::RecordTest(
+                "CB: reload resets state on enabled toggle (OPEN)", false,
+                "precondition: slice not OPEN",
+                TestFramework::TestCategory::OTHER);
+            return;
+        }
+
+        // Disable via reload — state must reset to CLOSED.
+        auto disabled = cb;
+        disabled.enabled = false;
+        slice.Reload(disabled);
+        bool disabled_closed = slice.CurrentState() == State::CLOSED;
+
+        // Re-enable via reload — state must remain CLOSED (no stale OPEN).
+        slice.Reload(cb);
+        bool reenabled_closed = slice.CurrentState() == State::CLOSED;
+
+        // And the slice must NOT insta-trip on a single failure (pre-fix,
+        // consecutive_failures_ could have persisted ≥ threshold).
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        bool one_fail_no_trip = slice.CurrentState() == State::CLOSED;
+
+        bool pass = disabled_closed && reenabled_closed && one_fail_no_trip;
+        TestFramework::RecordTest(
+            "CB: reload resets state on enabled toggle (OPEN)", pass,
+            pass ? "" : "disabled_closed=" + std::to_string(disabled_closed) +
+                        " reenabled_closed=" + std::to_string(reenabled_closed) +
+                        " one_fail_no_trip=" + std::to_string(one_fail_no_trip),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: reload resets state on enabled toggle (OPEN)", false, e.what(),
+            TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 2, P2, variant): if disable happens while
+// consecutive_failures_ has accumulated but not yet tripped, re-enable would
+// inherit that count and trip early on the next failure.
+void TestReloadResetsConsecutiveFailuresOnEnabledToggle() {
+    std::cout << "\n[TEST] CB: reload clears consecutive_failures on enable toggle..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.consecutive_failure_threshold = 5;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // 4 failures — just under threshold. State still CLOSED.
+        for (int i = 0; i < 4; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        if (slice.CurrentState() != State::CLOSED) {
+            TestFramework::RecordTest(
+                "CB: reload clears consecutive_failures", false,
+                "precondition: slice not CLOSED",
+                TestFramework::TestCategory::OTHER);
+            return;
+        }
+
+        // Disable then re-enable.
+        auto disabled = cb; disabled.enabled = false;
+        slice.Reload(disabled);
+        slice.Reload(cb);
+
+        // A single failure post-reenable must NOT trip — consecutive_failures_
+        // should have been reset to 0, not preserved at 4.
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        bool pass = slice.CurrentState() == State::CLOSED;
+        TestFramework::RecordTest(
+            "CB: reload clears consecutive_failures on enable toggle",
+            pass,
+            pass ? "" : "expected CLOSED after 1 post-reenable failure, got " +
+                        std::string(circuit_breaker::StateName(slice.CurrentState())),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: reload clears consecutive_failures on enable toggle",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Threshold-change-only reload (enabled unchanged) MUST preserve live state
+// per design §10. Regression guard for fix #1.
+void TestReloadThresholdChangePreservesState() {
+    std::cout << "\n[TEST] CB: reload preserves state when only thresholds change..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        // OPEN. Reload with a tighter threshold but enabled unchanged.
+        auto tighter = cb;
+        tighter.consecutive_failure_threshold = 2;
+        slice.Reload(tighter);
+        // State must remain OPEN — live state preservation.
+        bool pass = slice.CurrentState() == State::OPEN;
+        TestFramework::RecordTest(
+            "CB: reload preserves state on threshold-only change",
+            pass, pass ? "" : "expected OPEN, got " +
+                              std::string(circuit_breaker::StateName(slice.CurrentState())),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: reload preserves state on threshold-only change", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 2, P3): saw_failure short-circuit incorrectly bumped the
+// HALF_OPEN_FULL counter, polluting dashboards that need to distinguish
+// "probing, no capacity left" from "recovery attempt is failing".
+void TestSawFailureDoesNotBumpHalfOpenFullCounter() {
+    std::cout << "\n[TEST] CB: saw_failure reject does not bump HALF_OPEN_FULL..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.permitted_half_open_calls = 5;  // plenty of capacity
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // Admit 2 probes, fail the first — saw_failure=true, inflight=1.
+        slice.TryAcquire();  // probe 1 admitted
+        slice.TryAcquire();  // probe 2 admitted
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, slice.CurrentGenerationForTesting());
+
+        int64_t hof_before = slice.RejectedHalfOpenFull();
+        // Reject via saw_failure short-circuit (capacity is NOT exhausted —
+        // only 1 probe actually in flight, and permitted is 5).
+        Decision d = slice.TryAcquire().decision;
+        int64_t hof_after = slice.RejectedHalfOpenFull();
+
+        // Still REJECTED_OPEN (same client-visible outcome), but
+        // RejectedHalfOpenFull must NOT be incremented — this is a
+        // "recovery failing" reject, not a capacity reject.
+        bool pass = d == Decision::REJECTED_OPEN &&
+                    hof_before == 0 &&
+                    hof_after == 0;
+        TestFramework::RecordTest(
+            "CB: saw_failure reject does not bump HALF_OPEN_FULL",
+            pass, pass ? "" : "hof_before=" + std::to_string(hof_before) +
+                              " hof_after=" + std::to_string(hof_after),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: saw_failure reject does not bump HALF_OPEN_FULL",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 3, P2): TransitionOpenToHalfOpen deliberately left
+// `open_until_steady_ns_` populated, violating the documented OpenUntil()
+// contract ("zero when not OPEN"). A consumer computing Retry-After
+// from a HALF_OPEN slice would compute (stale_deadline - now), which is
+// negative once HALF_OPEN begins.
+void TestOpenUntilZeroWhenHalfOpen() {
+    std::cout << "\n[TEST] CB: OpenUntil() zero in HALF_OPEN..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip → OPEN. OpenUntil() must be non-zero (contract: zero iff NOT OPEN).
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false,
+                                slice.CurrentGenerationForTesting());
+        }
+        auto open_ns = slice.OpenUntil();
+        bool open_nonzero = open_ns != std::chrono::steady_clock::time_point{};
+
+        // Elapse backoff → HALF_OPEN via TryAcquire.
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+        auto a = slice.TryAcquire();
+        bool halfopen = slice.CurrentState() == State::HALF_OPEN &&
+                        a.decision == Decision::ADMITTED_PROBE;
+
+        // Contract: OpenUntil() zero now that state != OPEN.
+        auto halfopen_ns = slice.OpenUntil();
+        bool halfopen_zero = halfopen_ns == std::chrono::steady_clock::time_point{};
+
+        bool pass = open_nonzero && halfopen && halfopen_zero;
+        TestFramework::RecordTest(
+            "CB: OpenUntil() zero in HALF_OPEN",
+            pass, pass ? "" :
+                  "open_nonzero=" + std::to_string(open_nonzero) +
+                  " halfopen=" + std::to_string(halfopen) +
+                  " halfopen_zero=" + std::to_string(halfopen_zero),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: OpenUntil() zero in HALF_OPEN",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 3, P1): Reload reset the state on enabled toggle but
+// gave Report* no way to distinguish pre-toggle admissions from post-toggle
+// ones. Stale completions then polluted the fresh CLOSED cycle. Fixed with
+// a generation token captured at admission and checked at report.
+void TestStaleGenerationReportsDroppedAfterReloadToggle() {
+    std::cout << "\n[TEST] CB: stale-generation reports dropped after reload toggle..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.consecutive_failure_threshold = 3;  // make insta-trip detection easy
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Admit 3 requests in the original CLOSED cycle (generation = A).
+        auto a1 = slice.TryAcquire();
+        auto a2 = slice.TryAcquire();
+        auto a3 = slice.TryAcquire();
+        uint64_t gen_A = a1.generation;
+        bool same_gen_pre = a2.generation == gen_A && a3.generation == gen_A;
+
+        // Operator toggles: disable then re-enable → fresh CLOSED cycle.
+        auto disabled = cb; disabled.enabled = false;
+        slice.Reload(disabled);
+        slice.Reload(cb);
+        // After toggle, state is CLOSED and generation has advanced.
+        uint64_t gen_B = slice.CurrentGenerationForTesting();
+        bool generation_advanced = gen_B != gen_A;
+
+        // Late failures from the pre-toggle cycle arrive. Without the fix,
+        // these would increment consecutive_failures_ and trip the fresh
+        // cycle IMMEDIATELY (threshold=3, 3 late failures).
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, gen_A);
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, gen_A);
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, gen_A);
+
+        // Fresh cycle must be untouched.
+        bool state_still_closed = slice.CurrentState() == State::CLOSED;
+        bool stale_counter_bumped = slice.ReportsStaleGeneration() == 3;
+
+        // A fresh post-toggle admission + 3 REAL failures should still trip —
+        // so the guard didn't over-drop.
+        auto fresh = slice.TryAcquire();
+        for (int i = 0; i < 3; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, fresh.generation);
+        }
+        bool fresh_trips = slice.CurrentState() == State::OPEN;
+
+        bool pass = same_gen_pre && generation_advanced &&
+                    state_still_closed && stale_counter_bumped && fresh_trips;
+        TestFramework::RecordTest(
+            "CB: stale-generation reports dropped after reload toggle",
+            pass, pass ? "" :
+                  "same_gen_pre=" + std::to_string(same_gen_pre) +
+                  " gen_advanced=" + std::to_string(generation_advanced) +
+                  " state_closed=" + std::to_string(state_still_closed) +
+                  " stale_cnt=" + std::to_string(slice.ReportsStaleGeneration()) +
+                  " fresh_trips=" + std::to_string(fresh_trips),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: stale-generation reports dropped after reload toggle",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Generation also advances across state transitions (not just Reload), so
+// a report admitted in CLOSED cycle A that completes after OPEN → HALF_OPEN
+// → CLOSED cycle B is dropped instead of polluting cycle B's counters.
+void TestStaleGenerationReportsDroppedAcrossStateTransitions() {
+    std::cout << "\n[TEST] CB: stale reports dropped across CLOSED->OPEN->CLOSED..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // CLOSED cycle A — admit a request, capture its generation.
+        auto admit_A = slice.TryAcquire();
+        uint64_t gen_A = admit_A.generation;
+
+        // Drive to OPEN, then HALF_OPEN, then CLOSED (cycle B) via probe success.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false,
+                                slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+        for (int i = 0; i < cb.permitted_half_open_calls; ++i) {
+            auto p = slice.TryAcquire();  // probe
+            slice.ReportSuccess(true, p.generation);
+        }
+        bool cycleB_closed = slice.CurrentState() == State::CLOSED;
+        uint64_t gen_B = slice.CurrentGenerationForTesting();
+        bool gen_advanced = gen_B > gen_A;
+
+        // Now the original cycle-A request finally reports a success. In a
+        // world without the generation guard, this would reset cycle B's
+        // (freshly-zero) consecutive_failures_ and add to cycle B's window,
+        // polluting fresh telemetry.
+        int64_t stale_before = slice.ReportsStaleGeneration();
+        slice.ReportSuccess(false, gen_A);
+        int64_t stale_after = slice.ReportsStaleGeneration();
+        bool dropped = stale_after == stale_before + 1;
+
+        bool pass = cycleB_closed && gen_advanced && dropped;
+        TestFramework::RecordTest(
+            "CB: stale reports dropped across CLOSED->OPEN->CLOSED",
+            pass, pass ? "" :
+                  "cycleB_closed=" + std::to_string(cycleB_closed) +
+                  " gen_advanced=" + std::to_string(gen_advanced) +
+                  " dropped=" + std::to_string(dropped),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: stale reports dropped across CLOSED->OPEN->CLOSED",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 4, P2): Reload that resizes the rolling window without
+// toggling enabled cleared the window buckets but left generation_ unchanged.
+// Late reports from pre-reload admissions would carry the still-current
+// generation, pass the guard, and re-populate the freshly empty window —
+// mixing pre-reload and post-reload traffic. A pre-reload + post-reload
+// failure pair could satisfy minimum_volume / failure_rate immediately on
+// what should be a fresh observation cycle.
+void TestWindowResizeAdvancesGeneration() {
+    std::cout << "\n[TEST] CB: window resize advances generation..." << std::endl;
+    try {
+        // Use rate-trip path only (high consec threshold disables that path),
+        // a low minimum_volume so 2 failures suffice, and a high
+        // failure_rate_threshold so the trip relies on the rate calc.
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 1000;  // disable consecutive path
+        cb.failure_rate_threshold = 50;
+        cb.minimum_volume = 2;
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 5;
+        cb.base_open_duration_ms = 5000;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Pre-reload: admit a request and capture its generation.
+        auto admit_pre = slice.TryAcquire();
+        uint64_t gen_pre = admit_pre.generation;
+
+        // Reload: change window_seconds but keep enabled=true. Window is
+        // resized (cleared) and generation MUST advance so the pre-reload
+        // admission's late report doesn't seed the new window.
+        auto resized = cb;
+        resized.window_seconds = 30;
+        slice.Reload(resized);
+
+        uint64_t gen_post = slice.CurrentGenerationForTesting();
+        bool gen_advanced = gen_post != gen_pre;
+
+        // The pre-reload admission completes (failure). Without the fix,
+        // this would add one failure to the freshly-empty window. Then
+        // a post-reload admission's failure brings total=2 >= minimum_volume,
+        // failures=2/2=100% >= 50% → IMMEDIATE TRIP on a fresh window.
+        // With the fix, the pre-reload report is dropped (counted as stale).
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, gen_pre);
+
+        int64_t stale_after_pre = slice.ReportsStaleGeneration();
+
+        // Now a real post-reload admission and failure — single failure in
+        // a fresh window of size 30s. total=1, below minimum_volume=2 → no trip.
+        auto admit_post = slice.TryAcquire();
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, admit_post.generation);
+
+        bool state_still_closed = slice.CurrentState() == State::CLOSED;
+        bool stale_dropped = stale_after_pre == 1;
+
+        bool pass = gen_advanced && state_still_closed && stale_dropped;
+        TestFramework::RecordTest(
+            "CB: window resize advances generation",
+            pass, pass ? "" :
+                  "gen_advanced=" + std::to_string(gen_advanced) +
+                  " state_closed=" + std::to_string(state_still_closed) +
+                  " stale_count=" + std::to_string(slice.ReportsStaleGeneration()),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: window resize advances generation",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Regression guard: a reload that changes only thresholds (no window resize,
+// no enabled toggle) MUST preserve generation. Operator intent is "apply new
+// thresholds to existing observations" — the round-4 fix's window-resize
+// generation bump must NOT trigger here.
+void TestThresholdOnlyReloadDoesNotAdvanceGeneration() {
+    std::cout << "\n[TEST] CB: threshold-only reload preserves generation..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        auto admit = slice.TryAcquire();
+        uint64_t gen_pre = admit.generation;
+
+        // Tighten thresholds; same enabled, same window_seconds.
+        auto tightened = cb;
+        tightened.consecutive_failure_threshold = 2;
+        tightened.failure_rate_threshold = 30;
+        slice.Reload(tightened);
+
+        uint64_t gen_post = slice.CurrentGenerationForTesting();
+        bool gen_preserved = gen_post == gen_pre;
+
+        // The pre-reload admission's report should NOT be dropped — operator
+        // wants the new thresholds applied to existing in-flight observations.
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, gen_pre);
+        bool stale_zero = slice.ReportsStaleGeneration() == 0;
+
+        bool pass = gen_preserved && stale_zero;
+        TestFramework::RecordTest(
+            "CB: threshold-only reload preserves generation",
+            pass, pass ? "" :
+                  "gen_preserved=" + std::to_string(gen_preserved) +
+                  " stale_zero=" + std::to_string(stale_zero),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: threshold-only reload preserves generation",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 5, P1): Reload with window_seconds change while the
+// slice is HALF_OPEN used to bump the single `generation_`, invalidating
+// every in-flight probe. Those probes' late Report* calls then dropped
+// WITHOUT decrementing half_open_inflight_, wedging the slice in HALF_OPEN
+// with all probe slots stuck "in flight" forever — subsequent TryAcquires
+// rejected with half_open_full indefinitely until another full reset.
+//
+// Fix: split generation into closed_gen_ (non-probe, CLOSED-state data)
+// and halfopen_gen_ (probe, HALF_OPEN-state data). window_seconds reload
+// bumps only closed_gen_ because it only resets CLOSED-state data.
+void TestWindowResizeDuringHalfOpenDoesNotStrandProbes() {
+    std::cout << "\n[TEST] CB: window resize during HALF_OPEN preserves probes..."
+              << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        cb.permitted_half_open_calls = 3;
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Drive to HALF_OPEN.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false,
+                                slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // Admit all 3 probes (capture their admission tokens).
+        auto p1 = slice.TryAcquire();
+        auto p2 = slice.TryAcquire();
+        auto p3 = slice.TryAcquire();
+        bool all_admitted_probe = p1.decision == Decision::ADMITTED_PROBE &&
+                                  p2.decision == Decision::ADMITTED_PROBE &&
+                                  p3.decision == Decision::ADMITTED_PROBE;
+
+        // Reload window_seconds (enabled unchanged). PRE-fix: bumps single
+        // generation, invalidates p1/p2/p3 probes → stranded. POST-fix:
+        // bumps only closed_gen_, probe tokens still match halfopen_gen_.
+        auto resized = cb;
+        resized.window_seconds = 30;
+        slice.Reload(resized);
+
+        // closed_gen advanced, halfopen_gen preserved.
+        bool closed_gen_advanced = slice.CurrentClosedGenForTesting() !=
+                                   p1.generation;  // p1 was admitted in HALF_OPEN
+                                                   // but let's check against gen
+                                                   // we'd have captured in CLOSED
+        // Actually, directly: probes tokens must still match halfopen_gen_.
+        bool probe_gen_preserved =
+            p1.generation == slice.CurrentHalfOpenGenForTesting() &&
+            p2.generation == slice.CurrentHalfOpenGenForTesting() &&
+            p3.generation == slice.CurrentHalfOpenGenForTesting();
+
+        // Probes report success — each must be accepted and advance the
+        // HALF_OPEN → CLOSED transition.
+        slice.ReportSuccess(true, p1.generation);
+        slice.ReportSuccess(true, p2.generation);
+        slice.ReportSuccess(true, p3.generation);
+
+        // After 3 probe successes at permitted_half_open_calls=3, slice
+        // MUST have transitioned to CLOSED. Pre-fix: probes dropped, no
+        // progression, still HALF_OPEN with inflight stuck at 3.
+        bool closed_now = slice.CurrentState() == State::CLOSED;
+        // None of the probes were dropped as stale.
+        bool no_stale_drops = slice.ReportsStaleGeneration() == 0;
+        // All 3 probe successes counted.
+        bool all_probes_counted = slice.ProbeSuccesses() == 3;
+
+        bool pass = all_admitted_probe && probe_gen_preserved &&
+                    closed_now && no_stale_drops && all_probes_counted;
+        (void)closed_gen_advanced;  // (informational only)
+
+        TestFramework::RecordTest(
+            "CB: window resize during HALF_OPEN preserves probes",
+            pass, pass ? "" :
+                  "admitted=" + std::to_string(all_admitted_probe) +
+                  " probe_gen_preserved=" + std::to_string(probe_gen_preserved) +
+                  " closed_now=" + std::to_string(closed_now) +
+                  " stale=" + std::to_string(slice.ReportsStaleGeneration()) +
+                  " probe_success=" + std::to_string(slice.ProbeSuccesses()),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: window resize during HALF_OPEN preserves probes",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Companion guard: window_seconds reload MUST still invalidate pre-reload
+// CLOSED (non-probe) admissions. Ensures the split-gen didn't weaken the
+// round-4 fix.
+void TestWindowResizeStillInvalidatesClosedAdmissions() {
+    std::cout << "\n[TEST] CB: window resize invalidates CLOSED admissions..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 1000;  // disable consec path
+        cb.failure_rate_threshold = 50;
+        cb.minimum_volume = 2;
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 5;
+        cb.base_open_duration_ms = 5000;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        auto admit_pre = slice.TryAcquire();
+        uint64_t gen_pre = admit_pre.generation;
+
+        auto resized = cb; resized.window_seconds = 30;
+        slice.Reload(resized);
+
+        // Pre-reload CLOSED admission reports — must drop as stale.
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, gen_pre);
+        bool dropped_stale = slice.ReportsStaleGeneration() == 1;
+
+        // And state must remain CLOSED (pre-reload failure did NOT seed window).
+        auto admit_post = slice.TryAcquire();
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, admit_post.generation);
+        bool still_closed = slice.CurrentState() == State::CLOSED;
+
+        bool pass = dropped_stale && still_closed;
+        TestFramework::RecordTest(
+            "CB: window resize invalidates CLOSED admissions",
+            pass, pass ? "" :
+                  "dropped=" + std::to_string(dropped_stale) +
+                  " closed=" + std::to_string(still_closed),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: window resize invalidates CLOSED admissions",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 7, P2): Reload() lowering permitted_half_open_calls
+// while a HALF_OPEN cycle is active could close the breaker early and
+// discard failures from already-admitted probes.
+//
+// Scenario (5-probe cycle reloaded down to 1):
+//   TransitionOpenToHalfOpen: snapshot=5, admit 5 probes.
+//   Reload: permitted_half_open_calls → 1.
+//   First success arrives → half_open_successes_=1 ≥ NEW limit (1)
+//   → TransitionHalfOpenToClosed() fires → halfopen_gen_ bumped.
+//   Remaining 4 admitted probes are now stale → their failures DROPPED.
+//   Breaker falsely closes even though 4 probes have not reported yet.
+//
+// Fix: snapshot config_.permitted_half_open_calls into
+// half_open_permitted_snapshot_ at TransitionOpenToHalfOpen time.
+// TryAcquire (slot gate) and ReportSuccess (close check) both use the
+// snapshot so the cycle budget is frozen for its lifetime.
+void TestHalfOpenBudgetFrozenAcrossReload() {
+    std::cout << "\n[TEST] CB: HALF_OPEN budget frozen across mid-cycle reload..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 5;
+        cb.failure_rate_threshold = 100;   // disable rate-trip
+        cb.minimum_volume = 1000;          // disable rate-trip
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 2;  // exactly 2 probes for clean drain
+        cb.base_open_duration_ms = 100;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip the breaker.
+        for (int i = 0; i < 5; ++i) {
+            auto a = slice.TryAcquire();
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+        }
+        bool is_open = slice.CurrentState() == State::OPEN;
+
+        // Advance past open_until → OPEN→HALF_OPEN on next TryAcquire.
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // Admit both probes (budget=2; snapshot set to 2 at TransitionOpenToHalfOpen).
+        auto a0 = slice.TryAcquire();
+        auto a1 = slice.TryAcquire();
+        bool both_probes = (a0.decision == Decision::ADMITTED_PROBE) &&
+                           (a1.decision == Decision::ADMITTED_PROBE);
+        bool is_halfopen = slice.CurrentState() == State::HALF_OPEN;
+
+        // Lower the limit to 1 mid-cycle.
+        auto lowered = cb;
+        lowered.permitted_half_open_calls = 1;
+        slice.Reload(lowered);
+
+        // First probe succeeds.
+        // Without fix: successes(1) >= NEW config(1) → TransitionHalfOpenToClosed
+        //              → halfopen_gen_ bumped → second probe's failure DROPPED
+        //              → breaker falsely CLOSED.
+        // With fix:    successes(1) >= snapshot(2) is false → stays HALF_OPEN.
+        slice.ReportSuccess(true, a0.generation);
+        bool not_closed_after_one = slice.CurrentState() == State::HALF_OPEN;
+
+        // Second probe fails. inflight drops to 0 → TripHalfOpenToOpen fires.
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, a1.generation);
+        bool retripped = slice.CurrentState() == State::OPEN;
+
+        bool pass = is_open && both_probes && is_halfopen &&
+                    not_closed_after_one && retripped;
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN budget frozen across mid-cycle reload",
+            pass, pass ? "" :
+                  "is_open=" + std::to_string(is_open) +
+                  " both_probes=" + std::to_string(both_probes) +
+                  " is_halfopen=" + std::to_string(is_halfopen) +
+                  " not_closed_after_one=" + std::to_string(not_closed_after_one) +
+                  " retripped=" + std::to_string(retripped),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN budget frozen across mid-cycle reload",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 6, P2): Reload with window_seconds change preserved
+// consecutive_failures_ while bumping closed_gen_. Pre-reload CLOSED
+// reports are correctly blocked (stale gen), but they can no longer
+// clear or advance consecutive_failures_ either. The counter becomes an
+// orphaned relic from a prior observation cycle:
+//
+//   Scenario: 4 consecutive failures (threshold=5), reload window_seconds.
+//   Pre-reload success arrives → stale gen → DROPPED.
+//   Without fix: consecutive_failures_ stays at 4.
+//   Next real failure: consecutive_failures_ = 5 → SPURIOUS TRIP.
+//
+// Fix: reset consecutive_failures_ = 0 in the same branch that clears
+// the window on resize. Both are CLOSED-domain state from the same
+// observation cycle; invalidating one without resetting the other leaves
+// an inconsistent counter.
+void TestWindowResizeResetConsecutiveFailures() {
+    std::cout << "\n[TEST] CB: window resize resets consecutive_failures_..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 5;
+        cb.failure_rate_threshold = 100;  // rate-trip disabled (100% threshold)
+        cb.minimum_volume = 1000;         // rate-trip disabled (high volume gate)
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 5;
+        cb.base_open_duration_ms = 5000;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Accumulate 4 consecutive failures (one below the threshold of 5).
+        for (int i = 0; i < 4; ++i) {
+            auto a = slice.TryAcquire();
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+        }
+        bool pre_reload_closed = slice.CurrentState() == State::CLOSED;
+
+        // Capture a pre-reload admission.
+        auto pre_admit = slice.TryAcquire();
+        uint64_t pre_gen = pre_admit.generation;
+
+        // Window-only reload: wipes the rate window, bumps closed_gen_,
+        // and (with the fix) resets consecutive_failures_ to 0.
+        auto resized = cb;
+        resized.window_seconds = 30;
+        slice.Reload(resized);
+
+        // Pre-reload success arrives late — must be dropped (stale gen).
+        slice.ReportSuccess(false, pre_gen);
+        bool stale_dropped = slice.ReportsStaleGeneration() == 1;
+
+        // Verify consecutive_failures_ was reset: one real post-reload failure
+        // must NOT trip the breaker (counter is 1/5, not 5/5).
+        auto post_admit = slice.TryAcquire();
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, post_admit.generation);
+        bool no_spurious_trip = slice.CurrentState() == State::CLOSED;
+
+        bool pass = pre_reload_closed && stale_dropped && no_spurious_trip;
+        TestFramework::RecordTest(
+            "CB: window resize resets consecutive_failures_",
+            pass, pass ? "" :
+                  "pre_reload_closed=" + std::to_string(pre_reload_closed) +
+                  " stale_dropped=" + std::to_string(stale_dropped) +
+                  " no_spurious_trip=" + std::to_string(no_spurious_trip),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: window resize resets consecutive_failures_",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 9, P2-1): ReportFailure captured Now() separately in
+// AddFailure() and ShouldTripClosed()'s internal TotalCount/FailureCount
+// calls. If a second boundary elapsed between the two calls, Advance() could
+// wipe the just-recorded failure — with window_seconds=1, the 1-second delta
+// hits the delta >= window_seconds full-reset path and the failure
+// disappears before the trip evaluation runs. Fix: capture Now() once in
+// ReportFailure and thread it through ShouldTripClosed(now), AddFailure(now).
+//
+// Regression test injects a time source that returns T on the first call
+// and T+1s on every subsequent call, simulating the boundary crossing.
+// Post-fix, ReportFailure only calls Now() once — the fix is effective.
+// Pre-fix, the second Now() call inside ShouldTripClosed would advance the
+// ring and wipe the failure → no trip.
+void TestReportFailureUsesOneTimestampAcrossTripEval() {
+    std::cout << "\n[TEST] CB: ReportFailure uses single timestamp for trip eval..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 1000;  // disable consec path
+        cb.failure_rate_threshold = 100;          // rate=100% to trip on fail
+        cb.minimum_volume = 1;                    // single failure suffices
+        cb.window_seconds = 1;                    // boundary-sensitive
+        cb.permitted_half_open_calls = 5;
+        cb.base_open_duration_ms = 5000;
+        cb.max_open_duration_ms = 60000;
+
+        // Time source returns base on call #1 and base+1s on every call after.
+        // This simulates a clock tick between AddFailure (call 1) and any
+        // subsequent Now() inside ShouldTripClosed (call 2+).
+        auto base = std::chrono::steady_clock::time_point(
+            std::chrono::seconds(1'000'000));
+        int call_count = 0;
+        auto time_source = [&call_count, base]() {
+            int n = call_count++;
+            return n == 0 ? base : base + std::chrono::seconds(1);
+        };
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb, time_source);
+
+        // Admit + fail one request.
+        // Pre-fix trace (BUGGY): AddFailure(base) records in bucket[0]. Then
+        //   ShouldTripClosed()'s internal TotalCount(base+1s) calls Advance
+        //   → delta=1 >= window=1 → full reset wipes the bucket → total=0 <
+        //   minimum_volume=1 → NO TRIP. Rate trip missed.
+        // Post-fix: ReportFailure captures Now() once (=base), passes to
+        //   AddFailure(base) AND ShouldTripClosed(base). Ring stays aligned;
+        //   total=1, failures=1 → rate fires → TRIP to OPEN.
+        auto a = slice.TryAcquire();
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+
+        bool pass = slice.CurrentState() == State::OPEN;
+        TestFramework::RecordTest(
+            "CB: ReportFailure uses single timestamp for trip eval",
+            pass, pass ? "" :
+                  "expected OPEN, got state=" +
+                  std::to_string(static_cast<int>(slice.CurrentState())),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: ReportFailure uses single timestamp for trip eval",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 8, P2): CircuitBreakerWindow's constructor allocated
+// `max(1, window_seconds)` buckets but stored the RAW window_seconds_ value.
+// Programmatic callers bypassing ConfigLoader::Validate() (tests, future
+// direct users) that passed window_seconds <= 0 would trigger BucketIndex's
+// `% window_seconds_` on the first Add*/TotalCount call — dividing by zero
+// for 0, or violating ring math for negatives. Resize() already clamped.
+// Fix: constructor applies the same clamp so both entry points are symmetric.
+void TestWindowNonPositiveWindowSizeClamp() {
+    std::cout << "\n[TEST] CB: window ctor clamps non-positive sizes..."
+              << std::endl;
+    try {
+        // Zero would have crashed on % 0 before the fix.
+        CircuitBreakerWindow w0(0);
+        auto t = std::chrono::steady_clock::time_point(std::chrono::seconds(1000));
+        w0.AddSuccess(t);
+        w0.AddFailure(t);
+        bool zero_ok = (w0.TotalCount(t) == 2) && (w0.FailureCount(t) == 1);
+
+        // Negative values would have violated the ring math.
+        CircuitBreakerWindow wn(-5);
+        wn.AddSuccess(t);
+        bool negative_ok = wn.TotalCount(t) == 1;
+
+        bool pass = zero_ok && negative_ok;
+        TestFramework::RecordTest(
+            "CB: window ctor clamps non-positive sizes",
+            pass, pass ? "" :
+                  "zero_ok=" + std::to_string(zero_ok) +
+                  " negative_ok=" + std::to_string(negative_ok),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: window ctor clamps non-positive sizes",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 9, P3): CircuitBreakerSlice copied permitted_half_open_calls
+// into the HALF_OPEN snapshot verbatim. For programmatic callers bypassing
+// ConfigLoader::Validate() (same class as the window ctor clamp), a zero or
+// negative budget would permanently wedge the breaker in HALF_OPEN:
+//   TryAcquire (HALF_OPEN, case B): half_open_inflight_(0) >= snapshot(0)
+//   → every probe rejected as half_open_full → no probe ever admitted
+//   → no report ever fires → half_open_inflight_ stays at 0 forever.
+//
+// Fix: clamp the snapshot to min 1 at TransitionOpenToHalfOpen. Symmetric
+// with CircuitBreakerWindow's constructor clamp from round 8.
+void TestHalfOpenClampsNonPositiveProbeBudget() {
+    std::cout << "\n[TEST] CB: HALF_OPEN clamps non-positive probe budget..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 2;
+        cb.failure_rate_threshold = 100;
+        cb.minimum_volume = 1000;
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 0;   // bypasses Validate() — direct ctor
+        cb.base_open_duration_ms = 100;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip to OPEN.
+        for (int i = 0; i < 2; ++i) {
+            auto a = slice.TryAcquire();
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+        }
+
+        // Advance past open_until → OPEN→HALF_OPEN on next TryAcquire.
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // First TryAcquire triggers the transition. With the clamp, snapshot=1
+        // and this probe is admitted. Without the clamp, snapshot=0 → rejected
+        // as half_open_full → breaker stuck forever.
+        auto a0 = slice.TryAcquire();
+        bool probe_admitted = a0.decision == Decision::ADMITTED_PROBE;
+
+        // A successful probe closes the cycle (successes(1) >= snapshot(1)).
+        // Without the clamp this branch would never execute.
+        if (probe_admitted) {
+            slice.ReportSuccess(true, a0.generation);
+        }
+        bool recovered = slice.CurrentState() == State::CLOSED;
+
+        bool pass = probe_admitted && recovered;
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN clamps non-positive probe budget",
+            pass, pass ? "" :
+                  "probe_admitted=" + std::to_string(probe_admitted) +
+                  " recovered=" + std::to_string(recovered),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN clamps non-positive probe budget",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 10, P1): TryAcquire gated HALF_OPEN admission on
+// half_open_inflight_, so a probe slot was reused once an earlier probe
+// completed. With permitted_half_open_calls=2:
+//
+//   admit A → inflight=1, admitted=1
+//   admit B → inflight=2, admitted=2
+//   Report success on A → inflight=1, successes=1
+//   admit C → inflight(1) < snapshot(2) → ACCEPTED (BUG: 3rd admission)
+//   Report success on B → inflight=0, successes=2
+//   successes(2) >= snapshot(2) → TransitionHalfOpenToClosed fires
+//   → halfopen_gen_ bumped → C's eventual failure DROPPED as stale
+//   → breaker falsely marked recovered despite the probe failing.
+//
+// Fix: gate on half_open_admitted_ (total cycle admissions, never
+// decrements) instead of half_open_inflight_. The cycle can admit at most
+// `snapshot` probes total, regardless of how quickly earlier probes drain.
+void TestHalfOpenDoesNotReuseProbeSlots() {
+    std::cout << "\n[TEST] CB: HALF_OPEN does not reuse probe slots..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 2;
+        cb.failure_rate_threshold = 100;
+        cb.minimum_volume = 1000;
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 2;
+        cb.base_open_duration_ms = 100;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip to OPEN.
+        for (int i = 0; i < 2; ++i) {
+            auto a = slice.TryAcquire();
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        // Admit 2 probes (budget=2).
+        auto a = slice.TryAcquire();
+        auto b = slice.TryAcquire();
+        bool both_admitted = a.decision == Decision::ADMITTED_PROBE &&
+                             b.decision == Decision::ADMITTED_PROBE;
+
+        // Report success on A — freeing its inflight slot.
+        slice.ReportSuccess(true, a.generation);
+        bool still_halfopen = slice.CurrentState() == State::HALF_OPEN;
+
+        // Third admission attempt. With the fix: admitted(2) >= snapshot(2)
+        // → REJECTED. Without the fix: inflight(1) < snapshot(2) → ADMITTED,
+        // creating a ghost probe.
+        auto c = slice.TryAcquire();
+        bool third_rejected = c.decision == Decision::REJECTED_OPEN;
+
+        // Close the cycle by succeeding B.
+        slice.ReportSuccess(true, b.generation);
+        bool closed = slice.CurrentState() == State::CLOSED;
+
+        // Verify no stale-generation reports accumulated — if the 3rd admission
+        // had slipped through, its (dropped) report after the close would have
+        // bumped this counter. Since the admission is now rejected up front,
+        // this should stay zero.
+        bool no_stale_reports = slice.ReportsStaleGeneration() == 0;
+
+        bool pass = both_admitted && still_halfopen && third_rejected &&
+                    closed && no_stale_reports;
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN does not reuse probe slots",
+            pass, pass ? "" :
+                  "both_admitted=" + std::to_string(both_admitted) +
+                  " still_halfopen=" + std::to_string(still_halfopen) +
+                  " third_rejected=" + std::to_string(third_rejected) +
+                  " closed=" + std::to_string(closed) +
+                  " no_stale_reports=" + std::to_string(no_stale_reports),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: HALF_OPEN does not reuse probe slots",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 11, P1): Admission contract has ReportSuccess and
+// ReportFailure but no path for probes that complete without touching the
+// upstream (POOL_EXHAUSTED after probe admission, shutdown, client
+// disconnect, PARSE_ERROR). Following the §7 "don't report these as
+// failures" contract strictly, such probes would leak their inflight slot
+// forever — once half_open_admitted_ reaches snapshot, all further
+// admissions reject as half_open_full and nothing ever drains the cycle,
+// wedging the slice in HALF_OPEN.
+//
+// Fix: ReportNeutral decrements BOTH inflight (so the last-probe re-trip
+// still fires) and admitted (so a replacement probe can still exercise
+// the upstream within the cycle budget). No touch to successes / fails.
+void TestReportNeutralReleasesProbeSlot() {
+    std::cout << "\n[TEST] CB: ReportNeutral releases probe slot..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 2;
+        cb.failure_rate_threshold = 100;
+        cb.minimum_volume = 1000;
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 2;
+        cb.base_open_duration_ms = 100;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip to OPEN, advance past backoff, fully consume probe budget.
+        for (int i = 0; i < 2; ++i) {
+            auto a = slice.TryAcquire();
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        auto a = slice.TryAcquire();
+        auto b = slice.TryAcquire();
+        bool both_probes = a.decision == Decision::ADMITTED_PROBE &&
+                           b.decision == Decision::ADMITTED_PROBE;
+
+        // Budget full: 3rd admission rejected.
+        auto pre_release = slice.TryAcquire();
+        bool budget_full_before = pre_release.decision == Decision::REJECTED_OPEN;
+
+        // Neutral-release A: slot returns, replacement probe fits within budget.
+        slice.ReportNeutral(true, a.generation);
+
+        auto c = slice.TryAcquire();
+        bool replacement_admitted = c.decision == Decision::ADMITTED_PROBE;
+
+        // Cycle completes cleanly via B + C successes → CLOSED.
+        slice.ReportSuccess(true, b.generation);
+        slice.ReportSuccess(true, c.generation);
+        bool closed = slice.CurrentState() == State::CLOSED;
+
+        // Neutral release must NOT have bumped probe_failures / probe_successes.
+        bool counters_clean = slice.ProbeSuccesses() == 2 &&
+                              slice.ProbeFailures() == 0;
+
+        bool pass = both_probes && budget_full_before &&
+                    replacement_admitted && closed && counters_clean;
+        TestFramework::RecordTest(
+            "CB: ReportNeutral releases probe slot",
+            pass, pass ? "" :
+                  "both_probes=" + std::to_string(both_probes) +
+                  " budget_full_before=" + std::to_string(budget_full_before) +
+                  " replacement_admitted=" + std::to_string(replacement_admitted) +
+                  " closed=" + std::to_string(closed) +
+                  " counters_clean=" + std::to_string(counters_clean),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: ReportNeutral releases probe slot",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Companion: a neutral release that drains the last in-flight probe AFTER
+// a sibling failure must still trigger the HALF_OPEN→OPEN re-trip. Without
+// this last-probe hook in ReportNeutral, the slice would wedge in HALF_OPEN
+// with saw_failure=true rejecting every admission via Case A.
+void TestReportNeutralLastProbeAfterFailureReTrips() {
+    std::cout << "\n[TEST] CB: ReportNeutral re-trips as last probe after sibling fail..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 2;
+        cb.failure_rate_threshold = 100;
+        cb.minimum_volume = 1000;
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 2;
+        cb.base_open_duration_ms = 100;
+        cb.max_open_duration_ms = 60000;
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        for (int i = 0; i < 2; ++i) {
+            auto a = slice.TryAcquire();
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+
+        auto a = slice.TryAcquire();
+        auto b = slice.TryAcquire();
+
+        // A fails → saw_failure=true, inflight=1 (B still running), no re-trip yet.
+        slice.ReportFailure(FailureKind::RESPONSE_5XX, true, a.generation);
+        bool still_halfopen = slice.CurrentState() == State::HALF_OPEN;
+
+        // B neutral-releases → last in-flight drains. With the fix, the
+        // sibling-failure + last-probe hook fires TripHalfOpenToOpen.
+        slice.ReportNeutral(true, b.generation);
+        bool retripped = slice.CurrentState() == State::OPEN;
+
+        bool pass = still_halfopen && retripped;
+        TestFramework::RecordTest(
+            "CB: ReportNeutral re-trips as last probe after sibling fail",
+            pass, pass ? "" :
+                  "still_halfopen=" + std::to_string(still_halfopen) +
+                  " retripped=" + std::to_string(retripped),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: ReportNeutral re-trips as last probe after sibling fail",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// BUG (review round 12, P2): ComputeOpenDuration read base/max durations
+// straight from config_, so a programmatic caller bypassing
+// ConfigLoader::Validate() with base_open_duration_ms <= 0 or max < base
+// would compute scaled_ms <= 0. open_until = now + 0 → next TryAcquire
+// sees now_ns >= open_until_ns → transition to HALF_OPEN immediately.
+// The breaker never actually backed off. Fix: clamp base to >= 1ms and
+// max to >= base at the compute site, matching the window and probe
+// budget clamps.
+void TestComputeOpenDurationClampsInvalidBase() {
+    std::cout << "\n[TEST] CB: ComputeOpenDuration clamps invalid base/max..."
+              << std::endl;
+    try {
+        CircuitBreakerConfig cb;
+        cb.enabled = true;
+        cb.consecutive_failure_threshold = 2;
+        cb.failure_rate_threshold = 100;
+        cb.minimum_volume = 1000;
+        cb.window_seconds = 10;
+        cb.permitted_half_open_calls = 1;
+        cb.base_open_duration_ms = 0;    // bypass — would kill backoff
+        cb.max_open_duration_ms = 0;     // bypass — would kill backoff
+
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        // Trip to OPEN.
+        for (int i = 0; i < 2; ++i) {
+            auto a = slice.TryAcquire();
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, a.generation);
+        }
+        bool is_open = slice.CurrentState() == State::OPEN;
+
+        // Immediate TryAcquire: clock hasn't moved, so if the clamp holds
+        // (open_until >= now + 1ms), this MUST reject as "open" (not drain
+        // to HALF_OPEN). Without the fix, scaled_ms=0 → open_until==now →
+        // admission path immediately transitions to HALF_OPEN.
+        auto immediate = slice.TryAcquire();
+        bool rejected_as_open = immediate.decision == Decision::REJECTED_OPEN;
+        bool still_open = slice.CurrentState() == State::OPEN;
+
+        bool pass = is_open && rejected_as_open && still_open;
+        TestFramework::RecordTest(
+            "CB: ComputeOpenDuration clamps invalid base/max",
+            pass, pass ? "" :
+                  "is_open=" + std::to_string(is_open) +
+                  " rejected_as_open=" + std::to_string(rejected_as_open) +
+                  " still_open=" + std::to_string(still_open),
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB: ComputeOpenDuration clamps invalid base/max",
+            false, e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+void TestTransitionCallbackInvoked() {
+    std::cout << "\n[TEST] CB: transition callback invoked..." << std::endl;
+    try {
+        auto cb = DefaultEnabledConfig();
+        auto clock = std::make_shared<MockClock>();
+        CircuitBreakerSlice slice("svc:h:p p=0", 0, cb,
+            [clock]() { return clock->now; });
+
+        int closed_to_open = 0;
+        int open_to_halfopen = 0;
+        int halfopen_to_closed = 0;
+        slice.SetTransitionCallback(
+            [&](State o, State n, const char*) {
+                if (o == State::CLOSED && n == State::OPEN) closed_to_open++;
+                else if (o == State::OPEN && n == State::HALF_OPEN) open_to_halfopen++;
+                else if (o == State::HALF_OPEN && n == State::CLOSED) halfopen_to_closed++;
+            });
+
+        // Full cycle.
+        for (int i = 0; i < 5; ++i) {
+            slice.ReportFailure(FailureKind::RESPONSE_5XX, false, slice.CurrentGenerationForTesting());
+        }
+        clock->Advance(std::chrono::milliseconds(cb.base_open_duration_ms + 1));
+        for (int i = 0; i < cb.permitted_half_open_calls; ++i) {
+            slice.TryAcquire();
+            slice.ReportSuccess(true, slice.CurrentGenerationForTesting());
+        }
+        bool pass = closed_to_open == 1 && open_to_halfopen == 1 &&
+                    halfopen_to_closed == 1;
+        TestFramework::RecordTest("CB: transition callback invoked", pass, "",
+            TestFramework::TestCategory::OTHER);
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest("CB: transition callback invoked", false,
+            e.what(), TestFramework::TestCategory::OTHER);
+    }
+}
+
+// Run all circuit breaker unit tests.
+void RunAllTests() {
+    std::cout << "\n" << std::string(60, '=') << std::endl;
+    std::cout << "CIRCUIT BREAKER - UNIT TESTS" << std::endl;
+    std::cout << std::string(60, '=') << std::endl;
+
+    TestDisabledFastPath();
+    TestClosedStaysClosedBelowConsecutiveThreshold();
+    TestConsecutiveFailureTrip();
+    TestFailureRateTrip();
+    TestMinimumVolumeGate();
+    TestOpenBeforeDurationStaysOpen();
+    TestOpenToHalfOpenAfterDuration();
+    TestHalfOpenAllProbesSucceed();
+    TestHalfOpenProbeFailureReopens();
+    TestHalfOpenExhaustedSlotsRejected();
+    TestExponentialBackoff();
+    TestResetOnClose();
+    TestWindowBucketByCurrentSecond();
+    TestWindowAdvanceSkipsStale();
+    TestWindowPartialExpiry();
+    TestWindowReset();
+    TestDryRunAdmits();
+    TestReloadPreservesState();
+    TestConsecutiveThresholdOne();
+    TestSuccessClearsConsecutive();
+    TestLateFailureAfterTripDoesNotInflateBackoff();
+    TestLateSuccessAfterTripIgnored();
+    TestHalfOpenStopsAdmittingAfterFirstProbeFailure();
+    TestHalfOpenFullCounterSeparate();
+    TestReloadResetsStateOnEnabledToggleWhileOpen();
+    TestReloadResetsConsecutiveFailuresOnEnabledToggle();
+    TestReloadThresholdChangePreservesState();
+    TestSawFailureDoesNotBumpHalfOpenFullCounter();
+    TestOpenUntilZeroWhenHalfOpen();
+    TestStaleGenerationReportsDroppedAfterReloadToggle();
+    TestStaleGenerationReportsDroppedAcrossStateTransitions();
+    TestWindowResizeAdvancesGeneration();
+    TestThresholdOnlyReloadDoesNotAdvanceGeneration();
+    TestWindowResizeDuringHalfOpenDoesNotStrandProbes();
+    TestWindowResizeStillInvalidatesClosedAdmissions();
+    TestWindowResizeResetConsecutiveFailures();
+    TestHalfOpenBudgetFrozenAcrossReload();
+    TestWindowNonPositiveWindowSizeClamp();
+    TestReportFailureUsesOneTimestampAcrossTripEval();
+    TestHalfOpenClampsNonPositiveProbeBudget();
+    TestHalfOpenDoesNotReuseProbeSlots();
+    TestReportNeutralReleasesProbeSlot();
+    TestReportNeutralLastProbeAfterFailureReTrips();
+    TestComputeOpenDurationClampsInvalidBase();
+    TestTransitionCallbackInvoked();
+}
+
+}  // namespace CircuitBreakerTests
diff --git a/test/circuit_breaker_wait_queue_drain_test.h b/test/circuit_breaker_wait_queue_drain_test.h
new file mode 100644
index 00000000..d2200094
--- /dev/null
+++ b/test/circuit_breaker_wait_queue_drain_test.h
@@ -0,0 +1,261 @@
+#pragma once
+
+// Wait-queue-drain integration tests: wait-queue drain on CLOSED → OPEN trip.
+//
+// The integration suite covers "new requests after a trip hit
+// REJECTED_OPEN". This suite covers the orthogonal case: a request that passed ConsultBreaker
+// pre-trip and is waiting in the pool's bounded wait queue when the trip
+// fires. Without the drain, that waiter would sit until either the pool
+// frees a slot (and then re-hit the upstream — pointless traffic) or the
+// queue-timeout / open-duration elapses (up to 60s latency spike).
+//
+// Mechanism tested: `HttpServer::MarkServerReady` installs a transition
+// callback on every slice that routes CLOSED → OPEN to the corresponding
+// `PoolPartition::DrainWaitQueueOnTrip()`. Each waiter receives
+// `CHECKOUT_CIRCUIT_OPEN`, which `ProxyTransaction::OnCheckoutError` maps
+// to the standard circuit-open response (503 + `X-Circuit-Breaker: open`).
+//
+// Strategy: gate concurrency via a 1-connection pool. The first request
+// hangs at the backend long enough to let a second request queue behind
+// it. When the first's response lands (502), the breaker trips and the
+// drain fires, causing the queued request to receive 503 + circuit-open
+// headers instead of the backend's 502 (which would happen if the drain
+// were missing and the queued request proceeded).
+
+#include "test_framework.h"
+#include "test_server_runner.h"
+#include "http_test_client.h"
+#include "http/http_server.h"
+#include "config/server_config.h"
+
+#include <thread>
+#include <chrono>
+#include <atomic>
+#include <vector>
+#include <future>
+
+namespace CircuitBreakerWaitQueueDrainTests {
+
+static UpstreamConfig MakeDrainTripUpstream(const std::string& name,
+                                             const std::string& host,
+                                             int port,
+                                             bool breaker_enabled) {
+    UpstreamConfig u;
+    u.name = name;
+    u.host = host;
+    u.port = port;
+    // Single connection per partition — forces the second concurrent
+    // request to queue behind the first. Since tests run with
+    // worker_threads=1, one partition exists and it has exactly one
+    // connection slot.
+    u.pool.max_connections       = 1;
+    u.pool.max_idle_connections  = 1;
+    u.pool.connect_timeout_ms    = 3000;
+    u.pool.idle_timeout_sec      = 30;
+    u.pool.max_lifetime_sec      = 3600;
+    u.pool.max_requests_per_conn = 0;
+
+    u.proxy.route_prefix = "/fail";
+    u.proxy.strip_prefix = false;
+    u.proxy.response_timeout_ms = 5000;
+    u.proxy.retry.max_retries = 0;  // Deterministic — no retry confounds.
+
+    u.circuit_breaker.enabled = breaker_enabled;
+    u.circuit_breaker.consecutive_failure_threshold = 1;  // Trip on first 5xx.
+    u.circuit_breaker.failure_rate_threshold = 100;
+    u.circuit_breaker.minimum_volume = 10000;
+    u.circuit_breaker.window_seconds = 10;
+    u.circuit_breaker.permitted_half_open_calls = 2;
+    // Long open duration so the drain is unambiguously the thing that
+    // surfaces the 503 to the queued client — not a timer-driven
+    // HALF_OPEN recovery admitting a subsequent attempt.
+    u.circuit_breaker.base_open_duration_ms = 30000;
+    u.circuit_breaker.max_open_duration_ms  = 60000;
+    return u;
+}
+
+// ---------------------------------------------------------------------------
+// Test 1: CLOSED→OPEN trip drains queued waiter with 503 + X-Circuit-Breaker.
+//
+// Request A takes the single pool slot and hangs at the backend for ~300ms.
+// Request B queues (pool exhausted). At t≈300ms, A's backend response
+// arrives: 502 → slice trip → transition callback → DrainWaitQueueOnTrip →
+// B's error_callback fires with CHECKOUT_CIRCUIT_OPEN. B's client receives
+// 503 + `X-Circuit-Breaker: open`.
+//
+// Pre-fix (no drain): B waits ~300ms for A's slot to free, then hits the
+// backend itself, gets 502, client sees 502 — NOT 503 and NOT
+// X-Circuit-Breaker: open. The assertion `is_503 && has_breaker_header`
+// fails without the drain wiring.
+// ---------------------------------------------------------------------------
+void TestWaitQueueDrainedOnTrip() {
+    std::cout << "\n[TEST] CB Wait-Queue Drain: wait queue drained on trip..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            // Delay so the gateway's pool holds the connection long
+            // enough for a second client request to queue on it.
+            std::this_thread::sleep_for(std::chrono::milliseconds(300));
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;  // Single partition → single wait queue.
+        gw.http2.enabled = false;
+
+        gw.upstreams.push_back(
+            MakeDrainTripUpstream("svc", "127.0.0.1", backend_port,
+                                  /*breaker_enabled=*/true));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        // Launch A first (takes the one connection), then B 50ms later
+        // so B is guaranteed to enter the wait queue.
+        std::promise<std::string> a_resp, b_resp;
+        auto a_fut = a_resp.get_future();
+        auto b_fut = b_resp.get_future();
+        std::thread a([&]() {
+            a_resp.set_value(TestHttpClient::HttpGet(gw_port, "/fail", 5000));
+        });
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+        std::thread b([&]() {
+            b_resp.set_value(TestHttpClient::HttpGet(gw_port, "/fail", 5000));
+        });
+        a.join();
+        b.join();
+
+        std::string ra = a_fut.get();
+        std::string rb = b_fut.get();
+
+        // A unambiguously hits the backend (owns the slot) and sees 502.
+        bool a_is_502 = TestHttpClient::HasStatus(ra, 502);
+        // B must see the circuit-open short-circuit from the drain —
+        // NOT a 502 from the backend, which is what happens without
+        // the drain wiring.
+        bool b_is_503 = TestHttpClient::HasStatus(rb, 503);
+        bool b_has_breaker_hdr =
+            rb.find("X-Circuit-Breaker: open") != std::string::npos ||
+            rb.find("x-circuit-breaker: open") != std::string::npos;
+        // Exactly one backend hit — B was drained before making it to
+        // the upstream. Without the drain, backend_hits would be 2.
+        int hits = backend_hits.load(std::memory_order_relaxed);
+        bool single_hit = (hits == 1);
+
+        bool pass = a_is_502 && b_is_503 && b_has_breaker_hdr && single_hit;
+        TestFramework::RecordTest(
+            "CB Wait-Queue Drain: wait queue drained on trip", pass,
+            pass ? "" :
+            "a_is_502=" + std::to_string(a_is_502) +
+            " b_is_503=" + std::to_string(b_is_503) +
+            " b_breaker_hdr=" + std::to_string(b_has_breaker_hdr) +
+            " backend_hits=" + std::to_string(hits) +
+            " rb_head=" + rb.substr(0, 200));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Wait-Queue Drain: wait queue drained on trip", false, e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: With the breaker disabled, the drain does NOT fire — the queued
+// waiter proceeds to the upstream as it would absent the circuit-breaker
+// layer entirely.
+//
+// Same setup as Test 1 but `circuit_breaker.enabled=false`. Disabled slices
+// short-circuit in TryAcquire and never invoke transition callbacks, so
+// DrainWaitQueueOnTrip is never called. Request B must hit the backend
+// (backend_hits == 2) and receive the upstream's 502 — NOT a 503.
+// ---------------------------------------------------------------------------
+void TestDisabledBreakerDoesNotDrain() {
+    std::cout << "\n[TEST] CB Wait-Queue Drain: disabled breaker does not drain..."
+              << std::endl;
+    try {
+        std::atomic<int> backend_hits{0};
+        HttpServer backend("127.0.0.1", 0);
+        backend.Get("/fail", [&backend_hits](const HttpRequest&, HttpResponse& resp) {
+            backend_hits.fetch_add(1, std::memory_order_relaxed);
+            std::this_thread::sleep_for(std::chrono::milliseconds(300));
+            resp.Status(502).Body("upstream-err", "text/plain");
+        });
+        TestServerRunner<HttpServer> backend_runner(backend);
+        int backend_port = backend_runner.GetPort();
+
+        ServerConfig gw;
+        gw.bind_host = "127.0.0.1";
+        gw.bind_port = 0;
+        gw.worker_threads = 1;
+        gw.http2.enabled = false;
+
+        gw.upstreams.push_back(
+            MakeDrainTripUpstream("svc", "127.0.0.1", backend_port,
+                                  /*breaker_enabled=*/false));
+
+        HttpServer gateway(gw);
+        TestServerRunner<HttpServer> gw_runner(gateway);
+        int gw_port = gw_runner.GetPort();
+
+        std::promise<std::string> a_resp, b_resp;
+        auto a_fut = a_resp.get_future();
+        auto b_fut = b_resp.get_future();
+        std::thread a([&]() {
+            a_resp.set_value(TestHttpClient::HttpGet(gw_port, "/fail", 5000));
+        });
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+        std::thread b([&]() {
+            b_resp.set_value(TestHttpClient::HttpGet(gw_port, "/fail", 5000));
+        });
+        a.join();
+        b.join();
+
+        std::string ra = a_fut.get();
+        std::string rb = b_fut.get();
+
+        // Both reach the backend — disabled breaker = no drain.
+        bool a_is_502 = TestHttpClient::HasStatus(ra, 502);
+        bool b_is_502 = TestHttpClient::HasStatus(rb, 502);
+        // Neither should carry the circuit-open header.
+        bool no_breaker_on_a =
+            ra.find("X-Circuit-Breaker") == std::string::npos &&
+            ra.find("x-circuit-breaker") == std::string::npos;
+        bool no_breaker_on_b =
+            rb.find("X-Circuit-Breaker") == std::string::npos &&
+            rb.find("x-circuit-breaker") == std::string::npos;
+        int hits = backend_hits.load(std::memory_order_relaxed);
+        bool two_hits = (hits == 2);
+
+        bool pass = a_is_502 && b_is_502 && no_breaker_on_a &&
+                    no_breaker_on_b && two_hits;
+        TestFramework::RecordTest(
+            "CB Wait-Queue Drain: disabled breaker does not drain", pass,
+            pass ? "" :
+            "a_is_502=" + std::to_string(a_is_502) +
+            " b_is_502=" + std::to_string(b_is_502) +
+            " no_breaker_on_a=" + std::to_string(no_breaker_on_a) +
+            " no_breaker_on_b=" + std::to_string(no_breaker_on_b) +
+            " backend_hits=" + std::to_string(hits));
+    } catch (const std::exception& e) {
+        TestFramework::RecordTest(
+            "CB Wait-Queue Drain: disabled breaker does not drain", false, e.what());
+    }
+}
+
+void RunAllTests() {
+    std::cout << "\n" << std::string(60, '=') << std::endl;
+    std::cout << "CIRCUIT BREAKER - WAIT-QUEUE DRAIN ON TRIP TESTS"
+              << std::endl;
+    std::cout << std::string(60, '=') << std::endl;
+
+    TestWaitQueueDrainedOnTrip();
+    TestDisabledBreakerDoesNotDrain();
+}
+
+}  // namespace CircuitBreakerWaitQueueDrainTests
diff --git a/test/config_test.h b/test/config_test.h
index cfb90c7a..778f464b 100644
--- a/test/config_test.h
+++ b/test/config_test.h
@@ -348,6 +348,274 @@ namespace ConfigTests {
         }
     }
 
+    // Test 9: Circuit breaker defaults
+    void TestCircuitBreakerDefaults() {
+        std::cout << "\n[TEST] Circuit Breaker Defaults..." << std::endl;
+        try {
+            CircuitBreakerConfig cb;   // value-initialized defaults
+            bool pass = cb.enabled == false &&
+                        cb.dry_run == false &&
+                        cb.consecutive_failure_threshold == 5 &&
+                        cb.failure_rate_threshold == 50 &&
+                        cb.minimum_volume == 20 &&
+                        cb.window_seconds == 10 &&
+                        cb.permitted_half_open_calls == 5 &&
+                        cb.base_open_duration_ms == 5000 &&
+                        cb.max_open_duration_ms == 60000 &&
+                        cb.max_ejection_percent_per_host_set == 50 &&
+                        cb.retry_budget_percent == 20 &&
+                        cb.retry_budget_min_concurrency == 3;
+            TestFramework::RecordTest("Circuit Breaker Defaults", pass,
+                pass ? "" : "default value mismatch",
+                TestFramework::TestCategory::OTHER);
+        } catch (const std::exception& e) {
+            TestFramework::RecordTest("Circuit Breaker Defaults", false, e.what(),
+                TestFramework::TestCategory::OTHER);
+        }
+    }
+
+    // Test 10: Circuit breaker JSON parsing (populated block)
+    void TestCircuitBreakerJsonParse() {
+        std::cout << "\n[TEST] Circuit Breaker JSON Parse..." << std::endl;
+        try {
+            std::string json = R"({
+                "upstreams": [{
+                    "name": "svc",
+                    "host": "10.0.0.1",
+                    "port": 8080,
+                    "circuit_breaker": {
+                        "enabled": true,
+                        "dry_run": true,
+                        "consecutive_failure_threshold": 7,
+                        "failure_rate_threshold": 75,
+                        "minimum_volume": 50,
+                        "window_seconds": 30,
+                        "permitted_half_open_calls": 3,
+                        "base_open_duration_ms": 2000,
+                        "max_open_duration_ms": 120000,
+                        "max_ejection_percent_per_host_set": 33,
+                        "retry_budget_percent": 10,
+                        "retry_budget_min_concurrency": 5
+                    }
+                }]
+            })";
+            ServerConfig config = ConfigLoader::LoadFromString(json);
+            const auto& cb = config.upstreams.at(0).circuit_breaker;
+            bool pass = cb.enabled == true && cb.dry_run == true &&
+                        cb.consecutive_failure_threshold == 7 &&
+                        cb.failure_rate_threshold == 75 &&
+                        cb.minimum_volume == 50 &&
+                        cb.window_seconds == 30 &&
+                        cb.permitted_half_open_calls == 3 &&
+                        cb.base_open_duration_ms == 2000 &&
+                        cb.max_open_duration_ms == 120000 &&
+                        cb.max_ejection_percent_per_host_set == 33 &&
+                        cb.retry_budget_percent == 10 &&
+                        cb.retry_budget_min_concurrency == 5;
+            TestFramework::RecordTest("Circuit Breaker JSON Parse", pass,
+                pass ? "" : "parsed values mismatch",
+                TestFramework::TestCategory::OTHER);
+        } catch (const std::exception& e) {
+            TestFramework::RecordTest("Circuit Breaker JSON Parse", false, e.what(),
+                TestFramework::TestCategory::OTHER);
+        }
+    }
+
+    // Test 11: Circuit breaker JSON partial block uses defaults for missing fields
+    void TestCircuitBreakerJsonPartial() {
+        std::cout << "\n[TEST] Circuit Breaker JSON Partial..." << std::endl;
+        try {
+            std::string json = R"({
+                "upstreams": [{
+                    "name": "svc", "host": "10.0.0.1", "port": 8080,
+                    "circuit_breaker": {"enabled": true}
+                }]
+            })";
+            ServerConfig config = ConfigLoader::LoadFromString(json);
+            const auto& cb = config.upstreams.at(0).circuit_breaker;
+            bool pass = cb.enabled == true &&
+                        cb.consecutive_failure_threshold == 5 &&
+                        cb.window_seconds == 10;
+            TestFramework::RecordTest("Circuit Breaker JSON Partial", pass,
+                pass ? "" : "expected defaults for unset fields",
+                TestFramework::TestCategory::OTHER);
+        } catch (const std::exception& e) {
+            TestFramework::RecordTest("Circuit Breaker JSON Partial", false, e.what(),
+                TestFramework::TestCategory::OTHER);
+        }
+    }
+
+    // Test 12: Round-trip via ToJson() preserves circuit_breaker
+    void TestCircuitBreakerJsonRoundTrip() {
+        std::cout << "\n[TEST] Circuit Breaker JSON Round-Trip..." << std::endl;
+        try {
+            ServerConfig in;
+            UpstreamConfig u;
+            u.name = "svc"; u.host = "10.0.0.1"; u.port = 8080;
+            u.circuit_breaker.enabled = true;
+            u.circuit_breaker.window_seconds = 25;
+            u.circuit_breaker.failure_rate_threshold = 42;
+            in.upstreams.push_back(u);
+
+            std::string serialized = ConfigLoader::ToJson(in);
+            ServerConfig out = ConfigLoader::LoadFromString(serialized);
+
+            const auto& cb = out.upstreams.at(0).circuit_breaker;
+            bool pass = cb.enabled == true && cb.window_seconds == 25 &&
+                        cb.failure_rate_threshold == 42;
+            TestFramework::RecordTest("Circuit Breaker JSON Round-Trip", pass,
+                pass ? "" : "round-trip lost fields",
+                TestFramework::TestCategory::OTHER);
+        } catch (const std::exception& e) {
+            TestFramework::RecordTest("Circuit Breaker JSON Round-Trip", false,
+                e.what(), TestFramework::TestCategory::OTHER);
+        }
+    }
+
+    // Helper: assert a circuit_breaker JSON override is rejected by Validate().
+    static void ExpectValidationFailure(const std::string& name,
+                                        const std::string& cb_json_override,
+                                        const std::string& expected_substr) {
+        std::string json = std::string(R"({
+            "upstreams": [{
+                "name": "svc", "host": "10.0.0.1", "port": 8080,
+                "circuit_breaker": )") + cb_json_override + R"(
+            }]
+        })";
+        try {
+            ServerConfig config = ConfigLoader::LoadFromString(json);
+            ConfigLoader::Validate(config);
+            TestFramework::RecordTest(name, false,
+                "expected validation failure containing: " + expected_substr,
+                TestFramework::TestCategory::OTHER);
+        } catch (const std::invalid_argument& e) {
+            std::string msg(e.what());
+            bool pass = msg.find(expected_substr) != std::string::npos;
+            TestFramework::RecordTest(name, pass,
+                pass ? "" : std::string("wrong error: ") + msg,
+                TestFramework::TestCategory::OTHER);
+        } catch (const std::exception& e) {
+            TestFramework::RecordTest(name, false,
+                std::string("wrong exception type: ") + e.what(),
+                TestFramework::TestCategory::OTHER);
+        }
+    }
+
+    // Test 13: Validation rejects bad circuit_breaker fields
+    void TestCircuitBreakerValidation() {
+        std::cout << "\n[TEST] Circuit Breaker Validation..." << std::endl;
+        ExpectValidationFailure("CB Validation: consecutive_failure_threshold<1",
+            R"({"consecutive_failure_threshold": 0})",
+            "consecutive_failure_threshold must be in [1, 10000]");
+        ExpectValidationFailure("CB Validation: failure_rate_threshold>100",
+            R"({"failure_rate_threshold": 101})",
+            "failure_rate_threshold must be in [0, 100]");
+        ExpectValidationFailure("CB Validation: minimum_volume<1",
+            R"({"minimum_volume": 0})",
+            "minimum_volume must be in [1, 10000000]");
+        ExpectValidationFailure("CB Validation: window_seconds<1",
+            R"({"window_seconds": 0})",
+            "window_seconds must be in [1, 3600]");
+        ExpectValidationFailure("CB Validation: window_seconds>3600",
+            R"({"window_seconds": 3601})",
+            "window_seconds must be in [1, 3600]");
+        ExpectValidationFailure("CB Validation: base_open_duration_ms<100",
+            R"({"base_open_duration_ms": 50})",
+            "base_open_duration_ms must be >= 100");
+        ExpectValidationFailure("CB Validation: max<base",
+            R"({"base_open_duration_ms": 5000, "max_open_duration_ms": 1000})",
+            "max_open_duration_ms must be >= base_open_duration_ms");
+        ExpectValidationFailure("CB Validation: retry_budget_percent>100",
+            R"({"retry_budget_percent": 200})",
+            "retry_budget_percent must be in [0, 100]");
+        ExpectValidationFailure("CB Validation: retry_budget_min_concurrency<0",
+            R"({"retry_budget_min_concurrency": -1})",
+            "retry_budget_min_concurrency must be >= 0");
+        ExpectValidationFailure("CB Validation: max_ejection_percent>100",
+            R"({"max_ejection_percent_per_host_set": 150})",
+            "max_ejection_percent_per_host_set must be in [0, 100]");
+        ExpectValidationFailure("CB Validation: permitted_half_open_calls<1",
+            R"({"permitted_half_open_calls": 0})",
+            "permitted_half_open_calls must be in [1, 1000]");
+        // Upper-bound regressions — pathological configs must be rejected.
+        ExpectValidationFailure("CB Validation: consecutive_failure_threshold>10000",
+            R"({"consecutive_failure_threshold": 10001})",
+            "consecutive_failure_threshold must be in [1, 10000]");
+        ExpectValidationFailure("CB Validation: minimum_volume>10000000",
+            R"({"minimum_volume": 10000001})",
+            "minimum_volume must be in [1, 10000000]");
+        ExpectValidationFailure("CB Validation: permitted_half_open_calls>1000",
+            R"({"permitted_half_open_calls": 1001})",
+            "permitted_half_open_calls must be in [1, 1000]");
+        // Type-strictness guards: nlohmann's value<int>() silently coerces
+        // float/bool to int (1.9 → 1, true → 1). Rejecting at parse time is
+        // safer than letting malformed configs pass Validate() and change
+        // production breaker behavior.
+        ExpectValidationFailure("CB Validation: float rejected for int field",
+            R"({"window_seconds": 1.9})",
+            "circuit_breaker.window_seconds must be an integer");
+        ExpectValidationFailure("CB Validation: bool rejected for int field",
+            R"({"consecutive_failure_threshold": true})",
+            "circuit_breaker.consecutive_failure_threshold must be an integer");
+        ExpectValidationFailure("CB Validation: int rejected for bool field",
+            R"({"enabled": 1})",
+            "circuit_breaker.enabled must be a boolean");
+    }
+
+    // UpstreamConfig::operator== EXCLUDES circuit_breaker.
+    // CircuitBreakerManager::Reload is wired in HttpServer::Reload, so a
+    // CB-only SIGHUP is a clean hot reload. Excluding circuit_breaker from
+    // the equality check ensures the outer reload doesn't fire a spurious
+    // "restart required" warning on a pure CB-fields edit.
+    // Topology fields (name, host, port, tls, pool, proxy) remain
+    // restart-only and must still trigger inequality.
+    void TestCircuitBreakerEquality() {
+        std::cout << "\n[TEST] Circuit Breaker Equality (CB excluded from UpstreamConfig::operator==)..." << std::endl;
+        try {
+            UpstreamConfig a;
+            a.name = "svc"; a.host = "h"; a.port = 80;
+            UpstreamConfig b = a;
+
+            // Default equal.
+            bool equal_default = (a == b);
+
+            // Circuit-breaker-only edit must NOT break equality — breaker
+            // fields are live-reloadable via CircuitBreakerManager::Reload.
+            b.circuit_breaker.enabled = true;
+            b.circuit_breaker.window_seconds = 30;
+            bool cb_edit_invisible = (a == b);
+
+            // CircuitBreakerConfig::operator== still detects the field diff
+            // (CircuitBreakerManager::Reload relies on this inner comparison).
+            bool cb_fields_differ = (a.circuit_breaker != b.circuit_breaker);
+
+            // Topology changes still make configs unequal.
+            UpstreamConfig c = a;
+            c.host = "different";
+            bool topology_changed = (a != c);
+
+            UpstreamConfig d = a;
+            d.port = 9999;
+            bool port_change_detected = (a != d);
+
+            bool pass = equal_default && cb_edit_invisible &&
+                        cb_fields_differ && topology_changed &&
+                        port_change_detected;
+            TestFramework::RecordTest("Circuit Breaker Equality (CB excluded from UpstreamConfig::operator==)",
+                pass,
+                pass ? "" :
+                "equal_default=" + std::to_string(equal_default) +
+                " cb_edit_invisible=" + std::to_string(cb_edit_invisible) +
+                " cb_fields_differ=" + std::to_string(cb_fields_differ) +
+                " topology_changed=" + std::to_string(topology_changed) +
+                " port_change_detected=" + std::to_string(port_change_detected),
+                TestFramework::TestCategory::OTHER);
+        } catch (const std::exception& e) {
+            TestFramework::RecordTest("Circuit Breaker Equality (CB excluded from UpstreamConfig::operator==)",
+                false, e.what(), TestFramework::TestCategory::OTHER);
+        }
+    }
+
     // Run all config tests
     void RunAllTests() {
         std::cout << "\n" << std::string(60, '=') << std::endl;
@@ -362,6 +630,14 @@ namespace ConfigTests {
         TestValidationTlsNoCert();
         TestEnvOverrides();
         TestMissingFile();
+
+        // Circuit breaker config tests
+        TestCircuitBreakerDefaults();
+        TestCircuitBreakerJsonParse();
+        TestCircuitBreakerJsonPartial();
+        TestCircuitBreakerJsonRoundTrip();
+        TestCircuitBreakerValidation();
+        TestCircuitBreakerEquality();
     }
 
 } // namespace ConfigTests
diff --git a/test/run_test.cc b/test/run_test.cc
index 4edb0139..0419c6ee 100644
--- a/test/run_test.cc
+++ b/test/run_test.cc
@@ -13,6 +13,13 @@
 #include "upstream_pool_test.h"
 #include "proxy_test.h"
 #include "rate_limit_test.h"
+#include "circuit_breaker_test.h"
+#include "circuit_breaker_components_test.h"
+#include "circuit_breaker_integration_test.h"
+#include "circuit_breaker_retry_budget_test.h"
+#include "circuit_breaker_wait_queue_drain_test.h"
+#include "circuit_breaker_observability_test.h"
+#include "circuit_breaker_reload_test.h"
 #include "test_framework.h"
 #include <algorithm>
 #include <sys/resource.h>
@@ -77,6 +84,28 @@ void RunAllTest(){
     // Run rate limit tests
     RateLimitTests::RunAllTests();
 
+    // Run circuit breaker tests
+    CircuitBreakerTests::RunAllTests();
+
+    // Run circuit-breaker component unit tests (RetryBudget / Host / Manager)
+    CircuitBreakerComponentsTests::RunAllTests();
+
+    // Run circuit-breaker integration tests (end-to-end through
+    // ProxyTransaction + UpstreamManager + HttpServer)
+    CircuitBreakerIntegrationTests::RunAllTests();
+
+    // Run circuit-breaker retry-budget integration tests
+    CircuitBreakerRetryBudgetTests::RunAllTests();
+
+    // Run circuit-breaker wait-queue-drain-on-trip tests
+    CircuitBreakerWaitQueueDrainTests::RunAllTests();
+
+    // Run circuit-breaker observability tests
+    CircuitBreakerObservabilityTests::RunAllTests();
+
+    // Run circuit-breaker hot-reload tests
+    CircuitBreakerReloadTests::RunAllTests();
+
     std::cout << "====================================\n" << std::endl;
 }
 
@@ -155,6 +184,15 @@ int main(int argc, char* argv[]) {
         // Run rate limit tests
         }else if(mode == "rate_limit" || mode == "-L"){
             RateLimitTests::RunAllTests();
+        // Run circuit-breaker tests (unit + components + integration + retry-budget + drain + observability + reload)
+        }else if(mode == "circuit_breaker" || mode == "-B"){
+            CircuitBreakerTests::RunAllTests();
+            CircuitBreakerComponentsTests::RunAllTests();
+            CircuitBreakerIntegrationTests::RunAllTests();
+            CircuitBreakerRetryBudgetTests::RunAllTests();
+            CircuitBreakerWaitQueueDrainTests::RunAllTests();
+            CircuitBreakerObservabilityTests::RunAllTests();
+            CircuitBreakerReloadTests::RunAllTests();
         // Show help
         }else if(mode == "help" || mode == "-h" || mode == "--help"){
             PrintUsage(argv[0]);