diff --git a/src/agent/worker.rs b/src/agent/worker.rs
index 58c21d8e3..815838789 100644
--- a/src/agent/worker.rs
+++ b/src/agent/worker.rs
@@ -35,8 +35,9 @@ const MAX_OVERFLOW_RETRIES: usize = 2;
 /// Max consecutive transient provider error retries before giving up.
 /// Transient errors (upstream 500s, timeouts, rate limits that survived
 /// model-level retries) get a backoff-and-retry at the worker level so
-/// the worker survives temporary provider outages.
-const MAX_TRANSIENT_RETRIES: usize = 5;
+/// the worker survives temporary provider outages. Each retry triggers
+/// up to 3 model-level attempts, so 3 worker retries = 9 total attempts.
+const MAX_TRANSIENT_RETRIES: usize = 3;
 
 /// Base delay for worker-level transient error backoff (doubles each retry).
 const TRANSIENT_RETRY_BASE_DELAY: std::time::Duration = std::time::Duration::from_secs(5);
diff --git a/src/llm/anthropic/params.rs b/src/llm/anthropic/params.rs
index 11dcda8ef..6ecaf8f4b 100644
--- a/src/llm/anthropic/params.rs
+++ b/src/llm/anthropic/params.rs
@@ -97,8 +97,11 @@ pub fn build_anthropic_request(
         body["output_config"] = serde_json::json!({ "effort": effort });
     }
 
+    // Override the global 120s client timeout — large completions with
+    // extended thinking can easily take 5–10 minutes to generate.
     let builder = http_client
         .post(&url)
+        .timeout(std::time::Duration::from_secs(10 * 60))
         .header("anthropic-version", "2023-06-01")
         .header("content-type", "application/json");