diff --git a/src/agent/worker.rs b/src/agent/worker.rs index 58c21d8e3..815838789 100644 --- a/src/agent/worker.rs +++ b/src/agent/worker.rs @@ -35,8 +35,9 @@ const MAX_OVERFLOW_RETRIES: usize = 2; /// Max consecutive transient provider error retries before giving up. /// Transient errors (upstream 500s, timeouts, rate limits that survived /// model-level retries) get a backoff-and-retry at the worker level so -/// the worker survives temporary provider outages. -const MAX_TRANSIENT_RETRIES: usize = 5; +/// the worker survives temporary provider outages. Each retry triggers +/// up to 3 model-level attempts, so 3 worker retries = 9 total attempts. +const MAX_TRANSIENT_RETRIES: usize = 3; /// Base delay for worker-level transient error backoff (doubles each retry). const TRANSIENT_RETRY_BASE_DELAY: std::time::Duration = std::time::Duration::from_secs(5); diff --git a/src/llm/anthropic/params.rs b/src/llm/anthropic/params.rs index 11dcda8ef..6ecaf8f4b 100644 --- a/src/llm/anthropic/params.rs +++ b/src/llm/anthropic/params.rs @@ -97,8 +97,11 @@ pub fn build_anthropic_request( body["output_config"] = serde_json::json!({ "effort": effort }); } + // Override the global 120s client timeout — large completions with + // extended thinking can easily take 5–10 minutes to generate. let builder = http_client .post(&url) + .timeout(std::time::Duration::from_secs(10 * 60)) .header("anthropic-version", "2023-06-01") .header("content-type", "application/json");