From 2fef894042db94cce786e790ae284c44a498233a Mon Sep 17 00:00:00 2001 From: Andy Bonventre <365204+andybons@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:19:52 +0000 Subject: [PATCH] fix(proxy): increase ResponseHeaderTimeout from 30s to 5m MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLM inference (especially extended thinking models) can take well over 30s before the first response byte arrives. The 30s timeout caused read timeouts on slow-to-start completions through Bedrock and other LLM providers. 5 minutes covers extended thinking while still catching genuinely dead connections. Once the first byte arrives, Go's transport has no further timeout — streaming continues indefinitely, matching the behavior of LLM proxies like Portkey. --- CHANGELOG.md | 6 ++++++ proxy/mcp.go | 2 +- proxy/proxy.go | 4 ++-- proxy/relay.go | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afaeb1d..03c5003 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ Gatekeeper is a standalone credential-injecting TLS-intercepting proxy. It trans Gatekeeper is pre-1.0. The configuration schema and credential source interface may change between minor versions. +## v0.9.1 — 2026-04-26 + +### Fixed + +- **Increased response header timeout from 30s to 5m** — LLM inference (especially extended thinking models like Claude 3.7 Sonnet) can take well over 30 seconds before the first response byte; the previous 30s `ResponseHeaderTimeout` caused read timeouts on slow-to-start completions; the new 5-minute default covers extended thinking while still catching genuinely dead connections; applies to all transport paths (CONNECT interception, HTTP relay, MCP relay) + ## v0.9.0 — 2026-04-22 ### Added diff --git a/proxy/mcp.go b/proxy/mcp.go index 1d3e955..d8486b7 100644 --- a/proxy/mcp.go +++ b/proxy/mcp.go @@ -39,7 +39,7 @@ var mcpRelayClient = &http.Client{ KeepAlive: 30 * time.Second, }).DialContext, TLSHandshakeTimeout: 10 * time.Second, - ResponseHeaderTimeout: 30 * time.Second, + ResponseHeaderTimeout: 5 * time.Minute, IdleConnTimeout: 90 * time.Second, }, } diff --git a/proxy/proxy.go b/proxy/proxy.go index 17c768e..7331f60 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -126,7 +126,7 @@ var httpTransport = &http.Transport{ KeepAlive: 30 * time.Second, }).DialContext, TLSHandshakeTimeout: 10 * time.Second, - ResponseHeaderTimeout: 30 * time.Second, + ResponseHeaderTimeout: 5 * time.Minute, IdleConnTimeout: 90 * time.Second, } @@ -1922,7 +1922,7 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req RootCAs: p.upstreamCAs, // nil means system roots }, TLSHandshakeTimeout: 10 * time.Second, - ResponseHeaderTimeout: 30 * time.Second, + ResponseHeaderTimeout: 5 * time.Minute, MaxIdleConns: 100, IdleConnTimeout: 90 * time.Second, // Note: Do NOT set ForceAttemptHTTP2 here. This transport forwards diff --git a/proxy/relay.go b/proxy/relay.go index ab2022c..c2140c5 100644 --- a/proxy/relay.go +++ b/proxy/relay.go @@ -21,7 +21,7 @@ var relayClient = &http.Client{ KeepAlive: 30 * time.Second, }).DialContext, TLSHandshakeTimeout: 10 * time.Second, - ResponseHeaderTimeout: 30 * time.Second, + ResponseHeaderTimeout: 5 * time.Minute, IdleConnTimeout: 90 * time.Second, }, }