Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ UPSTREAM_SRCS = $(SERVER_DIR)/upstream_connection.cc $(SERVER_DIR)/pool_partitio
# Rate limit layer sources
RATE_LIMIT_SRCS = $(SERVER_DIR)/token_bucket.cc $(SERVER_DIR)/rate_limit_zone.cc $(SERVER_DIR)/rate_limiter.cc

# Circuit breaker layer sources
CIRCUIT_BREAKER_SRCS = $(SERVER_DIR)/circuit_breaker_window.cc $(SERVER_DIR)/circuit_breaker_slice.cc $(SERVER_DIR)/retry_budget.cc $(SERVER_DIR)/circuit_breaker_host.cc $(SERVER_DIR)/circuit_breaker_manager.cc

# CLI layer sources
CLI_SRCS = $(SERVER_DIR)/cli_parser.cc $(SERVER_DIR)/signal_handler.cc $(SERVER_DIR)/pid_file.cc $(SERVER_DIR)/daemonizer.cc

Expand Down Expand Up @@ -122,7 +125,7 @@ NGHTTP2_SRC = $(THIRD_PARTY_DIR)/nghttp2/nghttp2_alpn.c \
NGHTTP2_OBJ = $(NGHTTP2_SRC:.c=.o)

# Server library sources (shared between test and production binaries)
LIB_SRCS = $(REACTOR_SRCS) $(NETWORK_SRCS) $(SERVER_SRCS) $(THREAD_POOL_SRCS) $(FOUNDATION_SRCS) $(HTTP_SRCS) $(HTTP2_SRCS) $(WS_SRCS) $(TLS_SRCS) $(UPSTREAM_SRCS) $(RATE_LIMIT_SRCS) $(CLI_SRCS) $(UTIL_SRCS)
LIB_SRCS = $(REACTOR_SRCS) $(NETWORK_SRCS) $(SERVER_SRCS) $(THREAD_POOL_SRCS) $(FOUNDATION_SRCS) $(HTTP_SRCS) $(HTTP2_SRCS) $(WS_SRCS) $(TLS_SRCS) $(UPSTREAM_SRCS) $(RATE_LIMIT_SRCS) $(CIRCUIT_BREAKER_SRCS) $(CLI_SRCS) $(UTIL_SRCS)

# Test binary sources
TEST_SRCS = $(LIB_SRCS) $(TEST_DIR)/test_framework.cc $(TEST_DIR)/run_test.cc
Expand All @@ -142,11 +145,12 @@ WS_HEADERS = $(LIB_DIR)/ws/websocket_connection.h $(LIB_DIR)/ws/websocket_frame.
TLS_HEADERS = $(LIB_DIR)/tls/tls_context.h $(LIB_DIR)/tls/tls_connection.h $(LIB_DIR)/tls/tls_client_context.h
UPSTREAM_HEADERS = $(LIB_DIR)/upstream/upstream_manager.h $(LIB_DIR)/upstream/upstream_host_pool.h $(LIB_DIR)/upstream/pool_partition.h $(LIB_DIR)/upstream/upstream_connection.h $(LIB_DIR)/upstream/upstream_lease.h $(LIB_DIR)/upstream/upstream_http_codec.h $(LIB_DIR)/upstream/http_request_serializer.h $(LIB_DIR)/upstream/header_rewriter.h $(LIB_DIR)/upstream/retry_policy.h $(LIB_DIR)/upstream/proxy_transaction.h $(LIB_DIR)/upstream/proxy_handler.h $(LIB_DIR)/upstream/upstream_response.h $(LIB_DIR)/upstream/upstream_callbacks.h
RATE_LIMIT_HEADERS = $(LIB_DIR)/rate_limit/token_bucket.h $(LIB_DIR)/rate_limit/rate_limit_zone.h $(LIB_DIR)/rate_limit/rate_limiter.h
CIRCUIT_BREAKER_HEADERS = $(LIB_DIR)/circuit_breaker/circuit_breaker_state.h $(LIB_DIR)/circuit_breaker/circuit_breaker_window.h $(LIB_DIR)/circuit_breaker/circuit_breaker_slice.h $(LIB_DIR)/circuit_breaker/retry_budget.h $(LIB_DIR)/circuit_breaker/circuit_breaker_host.h $(LIB_DIR)/circuit_breaker/circuit_breaker_manager.h
CLI_HEADERS = $(LIB_DIR)/cli/cli_parser.h $(LIB_DIR)/cli/signal_handler.h $(LIB_DIR)/cli/pid_file.h $(LIB_DIR)/cli/version.h $(LIB_DIR)/cli/daemonizer.h
TEST_HEADERS = $(TEST_DIR)/test_framework.h $(TEST_DIR)/http_test_client.h $(TEST_DIR)/basic_test.h $(TEST_DIR)/stress_test.h $(TEST_DIR)/race_condition_test.h $(TEST_DIR)/timeout_test.h $(TEST_DIR)/config_test.h $(TEST_DIR)/http_test.h $(TEST_DIR)/websocket_test.h $(TEST_DIR)/tls_test.h $(TEST_DIR)/cli_test.h $(TEST_DIR)/http2_test.h $(TEST_DIR)/route_test.h $(TEST_DIR)/upstream_pool_test.h $(TEST_DIR)/proxy_test.h
TEST_HEADERS = $(TEST_DIR)/test_framework.h $(TEST_DIR)/http_test_client.h $(TEST_DIR)/basic_test.h $(TEST_DIR)/stress_test.h $(TEST_DIR)/race_condition_test.h $(TEST_DIR)/timeout_test.h $(TEST_DIR)/config_test.h $(TEST_DIR)/http_test.h $(TEST_DIR)/websocket_test.h $(TEST_DIR)/tls_test.h $(TEST_DIR)/cli_test.h $(TEST_DIR)/http2_test.h $(TEST_DIR)/route_test.h $(TEST_DIR)/upstream_pool_test.h $(TEST_DIR)/proxy_test.h $(TEST_DIR)/rate_limit_test.h $(TEST_DIR)/kqueue_test.h $(TEST_DIR)/circuit_breaker_test.h $(TEST_DIR)/circuit_breaker_components_test.h $(TEST_DIR)/circuit_breaker_integration_test.h $(TEST_DIR)/circuit_breaker_retry_budget_test.h $(TEST_DIR)/circuit_breaker_wait_queue_drain_test.h $(TEST_DIR)/circuit_breaker_observability_test.h $(TEST_DIR)/circuit_breaker_reload_test.h

# All headers combined
HEADERS = $(CORE_HEADERS) $(CALLBACK_HEADERS) $(REACTOR_HEADERS) $(NETWORK_HEADERS) $(SERVER_HEADERS) $(THREAD_POOL_HEADERS) $(UTIL_HEADERS) $(FOUNDATION_HEADERS) $(HTTP_HEADERS) $(HTTP2_HEADERS) $(WS_HEADERS) $(TLS_HEADERS) $(UPSTREAM_HEADERS) $(RATE_LIMIT_HEADERS) $(CLI_HEADERS) $(TEST_HEADERS)
HEADERS = $(CORE_HEADERS) $(CALLBACK_HEADERS) $(REACTOR_HEADERS) $(NETWORK_HEADERS) $(SERVER_HEADERS) $(THREAD_POOL_HEADERS) $(UTIL_HEADERS) $(FOUNDATION_HEADERS) $(HTTP_HEADERS) $(HTTP2_HEADERS) $(WS_HEADERS) $(TLS_HEADERS) $(UPSTREAM_HEADERS) $(RATE_LIMIT_HEADERS) $(CIRCUIT_BREAKER_HEADERS) $(CLI_HEADERS) $(TEST_HEADERS)

# Default target
.DEFAULT_GOAL := all
Expand Down Expand Up @@ -238,6 +242,11 @@ test_rate_limit: $(TARGET)
@echo "Running rate limit tests only..."
./$(TARGET) rate_limit

# Run only circuit breaker tests
test_circuit_breaker: $(TARGET)
@echo "Running circuit breaker tests only..."
./$(TARGET) circuit_breaker

# Display help information
help:
@echo "Reactor Server C++ - Makefile Help"
Expand Down Expand Up @@ -318,4 +327,4 @@ help:
# Build only the production server binary
server: $(SERVER_TARGET)

.PHONY: all clean test server test_basic test_stress test_race test_config test_http test_ws test_tls test_cli test_http2 test_upstream test_proxy test_rate_limit help
.PHONY: all clean test server test_basic test_stress test_race test_config test_http test_ws test_tls test_cli test_http2 test_upstream test_proxy test_rate_limit test_circuit_breaker help
149 changes: 149 additions & 0 deletions docs/circuit_breaker.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# Circuit Breaker

Per-upstream circuit breaking for the gateway, preventing cascading failures when a backend becomes unhealthy. Follows the resilience4j three-state machine (`CLOSED` → `OPEN` → `HALF_OPEN` → `CLOSED`), trips on either consecutive-failure or failure-rate thresholds, and short-circuits checkouts with `503 Service Unavailable` while the circuit is open. A separate **retry budget** caps the fraction of concurrent upstream work that may be retries, bounding the retry-storm amplification factor even when individual retries pass the breaker gate.

---

## Overview

- **Per-dispatcher slices.** One `CircuitBreakerSlice` per dispatcher partition for each upstream. Hot-path `TryAcquire` / `Report*` calls are lock-free — each slice is dispatcher-thread-pinned.
- **Three states.** `CLOSED` = normal traffic. `OPEN` = all requests short-circuited with 503 for the exponential-backoff open duration. `HALF_OPEN` = a bounded number of probe requests are admitted to test recovery; on success, closes; on failure, re-trips with longer backoff.
- **Dual trip paths.** Either `consecutive_failures >= N` OR `failure_rate >= P%` over a sliding window (subject to `minimum_volume`).
- **Retry budget.** Host-level cap: `max(retry_budget_min_concurrency, (in_flight - retries_in_flight) * retry_budget_percent / 100)`. Retries that exceed the cap receive `503` + `X-Retry-Budget-Exhausted: 1` instead of going to the upstream.
- **Wait-queue drain on trip.** On every `CLOSED → OPEN` transition, the corresponding pool partition's wait queue is drained immediately with `503 + X-Circuit-Breaker: open` — queued waiters don't have to wait out the full open window.
- **Dry-run mode.** `dry_run=true` computes decisions and logs them, but still admits traffic. Useful for staging a breaker in production without risk.
- **Hot-reload.** Breaker-field edits (thresholds, window, probe budget, retry budget tuning, enabled toggle) apply live on SIGHUP — no restart required. Topology edits (host/port/pool/proxy/tls) still require a restart.

---

## Configuration

Each `upstream` entry accepts a nested `circuit_breaker` block:

```json
{
"upstreams": [
{
"name": "orders",
"host": "orders-backend",
"port": 8080,
"circuit_breaker": {
"enabled": true,
"dry_run": false,
"consecutive_failure_threshold": 5,
"failure_rate_threshold": 50,
"minimum_volume": 20,
"window_seconds": 10,
"permitted_half_open_calls": 3,
"base_open_duration_ms": 5000,
"max_open_duration_ms": 60000,
"retry_budget_percent": 20,
"retry_budget_min_concurrency": 3
}
}
]
}
```

### Fields

| Field | Type | Default | Meaning |
|---|---|---|---|
| `enabled` | bool | `false` | Master switch. When false, the slice is a zero-overhead no-op on the hot path. |
| `dry_run` | bool | `false` | Shadow mode: log would-reject decisions but admit traffic. Both the state machine and the retry budget honor this flag. |
| `consecutive_failure_threshold` | int | `5` | Trip when N consecutive failures are observed in `CLOSED`. Upper bound 10,000. |
| `failure_rate_threshold` | int | `50` | Trip when `(failures / total) * 100 >= this` over the rolling window, provided `total >= minimum_volume`. 0-100. |
| `minimum_volume` | int | `20` | Minimum calls-in-window before rate-based trip is even considered. Upper bound 10,000,000. |
| `window_seconds` | int | `10` | Rolling window duration for the rate trip. >= 1. |
| `permitted_half_open_calls` | int | `3` | Probe admissions allowed per `HALF_OPEN` cycle. A single success flips to `CLOSED`; a single failure re-trips to `OPEN`. Upper bound 1,000. |
| `base_open_duration_ms` | int | `5000` | Initial open duration on first trip. Subsequent trips use `min(base << consecutive_trips, max)`. |
| `max_open_duration_ms` | int | `60000` | Ceiling for the exponential-backoff open duration. |
| `retry_budget_percent` | int | `20` | Retries capped at this % of non-retry in-flight traffic to the same host. 0-100. |
| `retry_budget_min_concurrency` | int | `3` | Floor for the retry cap — always allow at least this many concurrent retries regardless of traffic level. |

### Defaults (when `circuit_breaker` block is absent)

`enabled=false`. The breaker is fully opt-in. No behavioral change from a pre-breaker gateway configuration.

---

## Client-facing responses

Two distinct `503` variants, keyed off the reject source:

**Circuit-open reject** — breaker is `OPEN` or in `HALF_OPEN`-full:
```
HTTP/1.1 503 Service Unavailable
Retry-After: 5
X-Circuit-Breaker: open # or half_open
X-Upstream-Host: orders-backend:8080
Connection: close
```

- `Retry-After` derivation:
- `OPEN`: derived from the stored `open_until` deadline (time remaining until next probe).
- `HALF_OPEN`: derived from the *next* open duration (`base << consecutive_trips`) — reflects what the backoff would be if the in-flight probes fail. Base alone would under-report after multiple trips.
- Both paths: ceil-divide the millisecond value to seconds, capped at 3600s.
- `X-Circuit-Breaker` distinguishes the two reject paths so operators can tell "backoff active" from "probing, no capacity left".

**Retry-budget reject** — every retry attempt rejected because the host's budget is exhausted:
```
HTTP/1.1 503 Service Unavailable
X-Retry-Budget-Exhausted: 1
Connection: close
```

No `Retry-After` (the budget has no recovery clock — it depends on concurrent traffic). No `X-Circuit-Breaker` header (this reject path is orthogonal to the state machine).

Both responses are **terminal**: the retry loop never retries a circuit-open or retry-budget-exhausted outcome.

---

## Hot reload

All `circuit_breaker` fields on existing upstream services are hot-reloadable via `SIGHUP`. Reload semantics:

| Edit | Behavior |
|---|---|
| Threshold change (failures, rate, window, probe budget, open durations) | Applied on the next `TryAcquire` / `Report*` call on each slice. Live state (`CLOSED`/`OPEN`/`HALF_OPEN`) is preserved. |
| `enabled=true → false` | Live state reset to `CLOSED`; hot path short-circuits to `ADMITTED`. No transition callback fired. |
| `enabled=false → true` | Live state reset to `CLOSED`. The transition callback (wired at startup) re-engages for future trips. |
| `window_seconds` change | Rolling window reset. In-flight reports admitted pre-reload are invalidated (by `closed_gen_` bump); `consecutive_failures_` reset so stale counts can't trip the fresh window. In-flight `HALF_OPEN` probes are NOT invalidated (separate `halfopen_gen_` counter) — probe cycles complete normally. |
| `retry_budget_percent` / `retry_budget_min_concurrency` | Applied immediately (atomic stores). In-flight counters preserved. |

Topology edits (`host`, `port`, `pool.*`, `proxy.*`, `tls.*`) still require a restart; the gateway logs `"Reload: upstream topology changes require a restart to take effect"` and keeps the old pool alive. Breaker edits on the same reload are still applied live.

---

## Observability

### Logs

| Event | Level | Sample |
|---|---|---|
| `CLOSED → OPEN` trip | `warn` | `circuit breaker tripped service=orders host=orders-backend:8080 partition=0 trigger=consecutive consecutive_failures=5 window_total=12 window_fail_rate=41 open_for_ms=5000 consecutive_trips=1` |
| `OPEN → HALF_OPEN` | `info` | `circuit breaker half-open ... probes_allowed=3` |
| `HALF_OPEN → CLOSED` | `info` | `circuit breaker closed ... probes_succeeded=3` |
| `HALF_OPEN → OPEN` re-trip | `warn` | `circuit breaker re-tripped ... trigger=probe_fail consecutive_trips=2 open_for_ms=10000` |
| Reject (first of cycle) | `info` | `circuit breaker rejected ... state=open` |
| Reject (subsequent) | `debug` | Same, at debug. |
| Reject (dry-run) | `info` | `[dry-run] circuit breaker would reject ...` |
| Retry budget exhausted | `warn` | `retry budget exhausted service=orders in_flight=45 retries_in_flight=9 cap=9 client_fd=... attempt=1` |
| Reload applied | `info` | `circuit breaker config applied service=orders enabled=true window_s=10 fail_rate=50 consec_threshold=5` |
| Wait-queue drain on trip | `info` | `PoolPartition draining wait queue on breaker trip: orders-backend:8080 queue_size=3` |

### Snapshot API

`CircuitBreakerManager::SnapshotAll()` returns one `CircuitBreakerHostSnapshot` per upstream with per-slice rows (`state`, `trips`, `rejected`, `probe_successes`, `probe_failures`) plus host-level aggregates (`total_trips`, `total_rejected`, `open_partitions`, `half_open_partitions`, `retries_in_flight`, `retries_rejected`, `in_flight`). A `/admin/breakers` HTTP endpoint that JSON-serializes this snapshot is **planned but not yet exposed** — the API is ready for future wiring.

---

## Design notes

- **Dispatcher affinity.** Slices are pinned to their dispatcher thread — no CAS on the hot path. The trade-off: skewed request distribution across dispatchers can cause one partition to trip while another stays `CLOSED`. Uniform hashing keeps this mild in practice.
- **Lazy `HALF_OPEN`.** The transition from `OPEN` happens on the next inbound `TryAcquire` once the open deadline elapses — no background timer. Envoy and resilience4j use the same model.
- **Generation tokens.** Every admission is stamped with a per-domain generation counter (`closed_gen_` or `halfopen_gen_`, depending on state). `Report*` drops stale-generation completions so pre-transition requests can't pollute a fresh cycle. Window resizes bump only `closed_gen_` so in-flight probes aren't stranded.
- **Retry budget CAS.** `TryConsumeRetry` uses `compare_exchange_weak` to serialize concurrent retry admissions. A plain load-check-add would let N callers all observe `current < cap` and all increment past the cap.
- **Non-retry denominator.** The budget base is `in_flight - retries_in_flight`, not raw `in_flight`. Retries count in both terms but subtract out here so admitting a retry doesn't inflate its own cap.

For the full design document (motivations, trade-offs, failure modes, revision history, test strategy), see [.claude/documents/design/CIRCUIT_BREAKER_DESIGN.md](../.claude/documents/design/CIRCUIT_BREAKER_DESIGN.md).
Loading
Loading