getaxonflow · saurabhjain1592 · May 20, 2026 · May 20, 2026
@@ -16,10 +16,34 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  where a customer's misconfigured deployment caused a tight 401 retry
  loop against community-saas (~30 401/hour from a single source IP).
  Rust SDK was already safe — 401 falls through the early `return Err(e)`
- path in `src/client.rs:517-525` because it isn't in the
+ path in `src/client.rs:560-565` because it isn't in the
  `{429, 402, 403}` retry-allowlist — but there was no explicit test for
  the contract until now. Mutation-tested: adding `401` to the allowlist
  fails the test, confirming the assertion isn't tautological.
+- **`test_429_is_retried_allowlist_contract`** companion regression
+ test — locks in the OTHER direction of the allowlist: that HTTP 429
+ (rate limit) DOES trigger retries up to `max_attempts`. Without this,
+ a future refactor dropping `*status != 429` from `execute_with_retry`
+ would silently make all 4xx terminal — breaking the rate-limit retry
+ contract — and `test_401_not_retried_issue_2275` alone wouldn't catch
+ it (401 stays terminal either way). Together the two tests bracket
+ the allowlist boundary. Mutation-tested: deleting the `*status != 429`
+ clause fails the test (wiremock panics on Drop because the mock is
+ only called once instead of `max_attempts` times).
+
+### Documentation
+
+- **Rustdoc on `execute_with_retry`** (`src/client.rs:532`) describing
+ the retry contract — which status codes retry (5xx + 429), which are
+ terminal (401 + everything else 4xx outside the allowlist) — citing
+ issue [#2275](https://github.com/getaxonflow/axonflow-enterprise/issues/2275)
+ and CHANGELOG.md for history.
+- **Clarifying comment above the retry-allowlist** explaining that
+ 402/403 are handled as success responses in `execute_request`
+ (`src/client.rs:586`) and never propagate to `execute_with_retry` as
+ errors, so the `*status != 402` / `*status != 403` clauses in the
+ allowlist are intentional belt-and-suspenders defense for any future
+ refactor of `execute_request` that converts 402/403 back to `Err`.
 
 ## [0.3.1] - 2026-05-20 — `runtime-e2e/x-client-id/` parity with the other 4 SDKs
 

@@ -498,6 +498,37 @@ impl AxonFlowClient {
         }
     }
 
+    /// Retry the request with exponential backoff, honoring the
+    /// SDK-wide retry contract.
+    ///
+    /// **Retried status codes:**
+    /// - 5xx — server-side failures (treated as transient).
+    /// - 429 — rate-limit responses (transient by definition).
+    /// - Transport-level errors (connection refused, DNS, TLS) —
+    ///   surfaced as non-`ApiError` variants of [`AxonFlowError`];
+    ///   the `if let AxonFlowError::ApiError { .. }` guard doesn't
+    ///   match them, so they fall through to `last_err = Some(e)` and
+    ///   retry on the next iteration.
+    ///
+    /// **Terminal status codes (early `return Err(e)`):**
+    /// - 401 — auth failure. Retrying with the same invalid
+    ///   credential just compounds the storm on the agent. See
+    ///   issue [#2275](https://github.com/getaxonflow/axonflow-enterprise/issues/2275)
+    ///   for the customer-observed retry loop that motivated the
+    ///   regression-locking test `test_401_not_retried_issue_2275`.
+    /// - 400, 404, 405, 406, 408, 409, 410, 411, 412, 413, 414, 415,
+    ///   416, 417, 418, 421, 422, 423, 424, 425, 426, 428, 431, 451 —
+    ///   every other 4xx that isn't in the `{429, 402, 403}` allowlist.
+    ///
+    /// **Caveat on 402/403:** `execute_request` returns 402 + 403 as
+    /// `Ok(client_resp)` because those are SUCCESS responses carrying
+    /// policy/quota envelope data — not errors. They never reach this
+    /// function as `Err`, so the `*status != 402` and `*status != 403`
+    /// clauses below are functionally dead in current code. They're
+    /// kept as intent-preserving belt-and-suspenders for any future
+    /// refactor that converts 402/403 back to `Err`.
+    ///
+    /// See `CHANGELOG.md` for the contract's history.
     async fn execute_with_retry(
         &self,
         req: &ClientRequest,
@@ -515,6 +546,17 @@ impl AxonFlowClient {
                 Ok(resp) => return Ok(resp),
                 Err(e) => {
                     if let AxonFlowError::ApiError { status, .. } = &e {
+                        // Retry allowlist: any 4xx NOT in {429, 402, 403} is
+                        // terminal. 5xx always retries (falls through to the
+                        // `last_err = Some(e)` path below).
+                        //
+                        // 402/403 NEVER reach this branch as `Err`: see
+                        // `execute_request` at line 586 — those statuses
+                        // return as `Ok(client_resp)` because they carry
+                        // policy/quota envelope data. The `*status != 402`
+                        // and `*status != 403` clauses are intentional
+                        // belt-and-suspenders for a hypothetical future
+                        // refactor that errors on those statuses.
                         if *status >= 400
                             && *status < 500
                             && *status != 429

@@ -377,6 +377,51 @@ async fn test_401_not_retried_issue_2275() {
     assert!(result.is_err(), "401 must propagate as an error");
 }
 
+// Companion to `test_401_not_retried_issue_2275` — locks in the OTHER
+// direction of the retry allowlist: 429 (rate limit) MUST trigger retry
+// up to `max_attempts`. Without this, a future refactor that drops
+// `*status != 429` from `execute_with_retry` would silently make every
+// 4xx terminal, breaking the rate-limit retry contract; the 401-not-
+// retried test alone wouldn't catch that flip (401 stays terminal
+// either way). `.expect(3)` makes wiremock fail the test (panic on
+// Drop) if the SDK fails to retry the documented number of times.
+#[tokio::test]
+async fn test_429_is_retried_allowlist_contract() {
+    let server = MockServer::start().await;
+
+    Mock::given(method("POST"))
+        .and(path("/api/request"))
+        .respond_with(ResponseTemplate::new(429).set_body_string("rate limited"))
+        .expect(3)
+        .mount(&server)
+        .await;
+
+    let config = AxonFlowConfig {
+        endpoint: server.uri(),
+        mode: Mode::Sandbox, // Disable fail-open so 429 surfaces as Err after exhausting retries
+        retry: RetryConfig {
+            enabled: true,
+            max_attempts: 3,
+            initial_delay: Duration::from_millis(1),
+        },
+        cache: CacheConfig {
+            enabled: false,
+            ..Default::default()
+        },
+        ..Default::default()
+    };
+    let client = AxonFlowClient::new(config).unwrap();
+
+    let result = client
+        .proxy_llm_call("user", "query", "chat", HashMap::new())
+        .await;
+
+    assert!(
+        result.is_err(),
+        "429 should propagate as an error after exhausting all retry attempts"
+    );
+}
+
 #[tokio::test]
 async fn test_list_connectors() {
     let server = MockServer::start().await;