From 8a5ed447e1a57ce05c7e93eebea4fda49c5577c1 Mon Sep 17 00:00:00 2001
From: billybonks <sebastienstettler@gmail.com>
Date: Fri, 3 Apr 2026 15:53:13 +0800
Subject: [PATCH 01/14] breaking: delete claude code custom path

the custom path tried to make things easy but consumers
must manage there own token details as when its refreshed
they must update the values.

deleting all these custom paths also makes things much simpler
on our side
---
 lib/llm_gateway.rb                            |  7 --
 lib/llm_gateway/adapters/adapter.rb           |  3 +-
 .../adapters/claude_code/input_mapper.rb      | 14 ----
 .../adapters/claude_code/messages_adapter.rb  | 28 -------
 .../adapters/claude_code/option_mapper.rb     | 22 ------
 .../adapters/claude_code/output_mapper.rb     | 12 ---
 lib/llm_gateway/client.rb                     | 11 +--
 lib/llm_gateway/clients/claude.rb             | 48 ++++++++++++
 lib/llm_gateway/clients/claude_code.rb        |  5 +-
 .../clients/claude_code/oauth_flow.rb         |  2 +-
 .../clients/claude_code/token_manager.rb      |  2 +-
 test/client_builder_test.rb                   | 13 ++--
 test/integration/clients/claude_test.rb       | 74 +++++++++++++++++++
 13 files changed, 134 insertions(+), 107 deletions(-)
 delete mode 100644 lib/llm_gateway/adapters/claude_code/input_mapper.rb
 delete mode 100644 lib/llm_gateway/adapters/claude_code/messages_adapter.rb
 delete mode 100644 lib/llm_gateway/adapters/claude_code/option_mapper.rb
 delete mode 100644 lib/llm_gateway/adapters/claude_code/output_mapper.rb

diff --git a/lib/llm_gateway.rb b/lib/llm_gateway.rb
index cda2640..7cd9223 100644
--- a/lib/llm_gateway.rb
+++ b/lib/llm_gateway.rb
@@ -22,8 +22,6 @@
 
 require_relative "llm_gateway/adapters/claude/input_mapper"
 require_relative "llm_gateway/adapters/claude/output_mapper"
-require_relative "llm_gateway/adapters/claude_code/input_mapper"
-require_relative "llm_gateway/adapters/claude_code/output_mapper"
 require_relative "llm_gateway/adapters/open_ai/file_output_mapper"
 require_relative "llm_gateway/adapters/open_ai/chat_completions/input_mapper"
 require_relative "llm_gateway/adapters/open_ai/chat_completions/output_mapper"
@@ -38,7 +36,6 @@
 # Load adapter classes
 require_relative "llm_gateway/adapters/adapter"
 require_relative "llm_gateway/adapters/claude/messages_adapter"
-require_relative "llm_gateway/adapters/claude_code/messages_adapter"
 require_relative "llm_gateway/adapters/open_ai/chat_completions_adapter"
 require_relative "llm_gateway/adapters/open_ai/responses_adapter"
 require_relative "llm_gateway/adapters/openai_codex/responses_adapter"
@@ -118,10 +115,6 @@ def self.reset_configuration!
     client: Clients::Claude,
     adapter: Adapters::Claude::MessagesAdapter)
 
-  ProviderRegistry.register("anthropic_oauth_messages",
-    client: Clients::ClaudeCode,
-    adapter: Adapters::ClaudeCode::MessagesAdapter)
-
   ProviderRegistry.register("openai_apikey_completions",
     client: Clients::OpenAi,
     adapter: Adapters::OpenAi::ChatCompletionsAdapter)
diff --git a/lib/llm_gateway/adapters/adapter.rb b/lib/llm_gateway/adapters/adapter.rb
index 6c26a0a..31ede49 100644
--- a/lib/llm_gateway/adapters/adapter.rb
+++ b/lib/llm_gateway/adapters/adapter.rb
@@ -95,8 +95,7 @@ def stream_client_method
 
       def stream_api_name
         case self
-        when LlmGateway::Adapters::Claude::MessagesAdapter,
-             LlmGateway::Adapters::ClaudeCode::MessagesAdapter
+        when LlmGateway::Adapters::Claude::MessagesAdapter
           "messages"
         when LlmGateway::Adapters::OpenAi::ChatCompletionsAdapter,
              LlmGateway::Adapters::Groq::ChatCompletionsAdapter
diff --git a/lib/llm_gateway/adapters/claude_code/input_mapper.rb b/lib/llm_gateway/adapters/claude_code/input_mapper.rb
deleted file mode 100644
index 2c6debe..0000000
--- a/lib/llm_gateway/adapters/claude_code/input_mapper.rb
+++ /dev/null
@@ -1,14 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../claude/input_mapper"
-
-module LlmGateway
-  module Adapters
-    module ClaudeCode
-      class InputMapper < Claude::InputMapper
-        # Inherits all mapping from Claude::InputMapper
-        # The client handles OAuth-specific transformations (tool names, system prompt)
-      end
-    end
-  end
-end
diff --git a/lib/llm_gateway/adapters/claude_code/messages_adapter.rb b/lib/llm_gateway/adapters/claude_code/messages_adapter.rb
deleted file mode 100644
index 5c95cf1..0000000
--- a/lib/llm_gateway/adapters/claude_code/messages_adapter.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../adapter"
-require_relative "input_mapper"
-require_relative "option_mapper"
-require_relative "output_mapper"
-require_relative "../claude/output_mapper"
-require_relative "../claude/stream_mapper"
-
-module LlmGateway
-  module Adapters
-    module ClaudeCode
-      class MessagesAdapter < Adapter
-        def initialize(client)
-          super(
-            client,
-            input_mapper: InputMapper,
-            output_mapper: OutputMapper,
-            file_output_mapper: Claude::FileOutputMapper,
-            option_mapper: OptionMapper,
-            client_method: :chat,
-            stream_mapper: Claude::StreamMapper
-          )
-        end
-      end
-    end
-  end
-end
diff --git a/lib/llm_gateway/adapters/claude_code/option_mapper.rb b/lib/llm_gateway/adapters/claude_code/option_mapper.rb
deleted file mode 100644
index bc8cb99..0000000
--- a/lib/llm_gateway/adapters/claude_code/option_mapper.rb
+++ /dev/null
@@ -1,22 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../anthropic_option_mapper"
-
-module LlmGateway
-  module Adapters
-    module ClaudeCode
-      module OptionMapper
-        module_function
-
-        def map(options)
-          mapped_options = AnthropicOptionMapper.map(options)
-
-          max_completion_tokens = mapped_options.delete(:max_completion_tokens)
-          mapped_options[:max_tokens] = max_completion_tokens || mapped_options[:max_tokens] || 20480
-
-          mapped_options
-        end
-      end
-    end
-  end
-end
diff --git a/lib/llm_gateway/adapters/claude_code/output_mapper.rb b/lib/llm_gateway/adapters/claude_code/output_mapper.rb
deleted file mode 100644
index 430b623..0000000
--- a/lib/llm_gateway/adapters/claude_code/output_mapper.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../claude/output_mapper"
-
-module LlmGateway
-  module Adapters
-    module ClaudeCode
-      class OutputMapper < Claude::OutputMapper
-      end
-    end
-  end
-end
diff --git a/lib/llm_gateway/client.rb b/lib/llm_gateway/client.rb
index 0924a53..a89cdc2 100644
--- a/lib/llm_gateway/client.rb
+++ b/lib/llm_gateway/client.rb
@@ -42,7 +42,6 @@ def self.download_file(provider, **kwargs)
     end
 
     def self.provider_from_model(model)
-      return "anthropic" if model.start_with?("claude_code/")
       return "anthropic" if model.start_with?("claude")
       return "groq" if model.start_with?("llama")
       return "openai" if model.start_with?("gpt") ||
@@ -72,15 +71,7 @@ def self.provider_id_from_client(client)
     def self.build_adapter_from_model(model, api_key: nil, refresh_token: nil, expires_at: nil, api: nil)
       provider = provider_from_model(model)
 
-      if model.start_with?("claude_code/")
-        LlmGateway.build_provider(
-          provider: "anthropic_oauth_messages",
-          model_key: model,
-          access_token: api_key,
-          refresh_token: refresh_token,
-          expires_at: expires_at
-        )
-      elsif api == "responses"
+      if api == "responses"
         config = {
           provider: "#{provider}_apikey_responses",
           model_key: model
diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb
index 861a7f8..65a7d33 100644
--- a/lib/llm_gateway/clients/claude.rb
+++ b/lib/llm_gateway/clients/claude.rb
@@ -5,6 +5,8 @@
 module LlmGateway
   module Clients
     class Claude < BaseClient
+      CLAUDE_CODE_VERSION = "2.1.2"
+
       def initialize(model_key: "claude-3-7-sonnet-20250219", api_key: ENV["ANTHROPIC_API_KEY"])
         @base_endpoint = "https://api.anthropic.com/v1"
         super(model_key: model_key, api_key: api_key)
@@ -17,6 +19,18 @@ def chat(messages, **kwargs)
       def stream(messages, **kwargs, &block)
         post_stream("messages", build_body(messages, **kwargs), &block)
       end
+
+      def get_oauth_access_token(access_token:, refresh_token:, expires_at:, &block)
+        token_manager = LlmGateway::Clients::ClaudeCode::TokenManager.new(
+          access_token: access_token,
+          refresh_token: refresh_token,
+          expires_at: expires_at
+        )
+        token_manager.on_token_refresh = block if block_given?
+        token_manager.ensure_valid_token
+        token_manager.access_token
+      end
+
       def download_file(file_id)
         get("files/#{file_id}/content")
       end
@@ -34,12 +48,17 @@ def build_body(messages, tools: nil, system: [], **options)
         }
 
         body.merge!(tools: tools) if LlmGateway::Utils.present?(tools)
+
+        system = prepend_claude_code_identity(system) if claude_code_oauth_api_key?
+
         body.merge!(system: system) if LlmGateway::Utils.present?(system)
         body.merge!(options)
         body
       end
 
       def build_headers
+        return claude_code_oauth_headers if claude_code_oauth_api_key?
+
         {
           "anthropic-version" => "2023-06-01",
           "content-type" => "application/json",
@@ -48,6 +67,35 @@ def build_headers
         }
       end
 
+      def claude_code_oauth_api_key?
+        api_key.to_s.start_with?("sk-ant-oat")
+      end
+
+      def claude_code_oauth_headers
+        {
+          "anthropic-version" => "2023-06-01",
+          "content-type" => "application/json",
+          "Authorization" => "Bearer #{api_key}",
+          "anthropic-dangerous-direct-browser-access" => "true",
+          "anthropic-beta" => "claude-code-20250219,oauth-2025-04-20",
+          "user-agent" => "claude-cli/#{CLAUDE_CODE_VERSION} (external, cli)",
+          "x-app" => "cli"
+        }
+      end
+
+      def prepend_claude_code_identity(system)
+        identity = {
+          type: "text",
+          text: "You are Claude Code, Anthropic's official CLI for Claude."
+        }
+
+        if system.nil? || system.empty?
+          [ identity ]
+        else
+          [ identity ] + system
+        end
+      end
+
       def handle_client_specific_errors(response, error)
         case response.code.to_i
         when 400
diff --git a/lib/llm_gateway/clients/claude_code.rb b/lib/llm_gateway/clients/claude_code.rb
index c420fd1..c7e70c4 100644
--- a/lib/llm_gateway/clients/claude_code.rb
+++ b/lib/llm_gateway/clients/claude_code.rb
@@ -1,12 +1,13 @@
 # frozen_string_literal: true
 
 require_relative "claude"
-require_relative "claude_code/oauth_flow"
-require_relative "claude_code/token_manager"
 
 module LlmGateway
   module Clients
     class ClaudeCode < Claude
+      require_relative "claude_code/oauth_flow"
+      require_relative "claude_code/token_manager"
+
       CLAUDE_CODE_VERSION = "2.1.2"
       attr_reader :token_manager, :access_token
 
diff --git a/lib/llm_gateway/clients/claude_code/oauth_flow.rb b/lib/llm_gateway/clients/claude_code/oauth_flow.rb
index 8a9117d..35635ea 100644
--- a/lib/llm_gateway/clients/claude_code/oauth_flow.rb
+++ b/lib/llm_gateway/clients/claude_code/oauth_flow.rb
@@ -10,7 +10,7 @@
 
 module LlmGateway
   module Clients
-    class ClaudeCode < Claude
+    class ClaudeCode
       class OAuthFlow
         CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
         TOKEN_URL = "https://api.anthropic.com/v1/oauth/token"
diff --git a/lib/llm_gateway/clients/claude_code/token_manager.rb b/lib/llm_gateway/clients/claude_code/token_manager.rb
index a586f32..2f5e845 100644
--- a/lib/llm_gateway/clients/claude_code/token_manager.rb
+++ b/lib/llm_gateway/clients/claude_code/token_manager.rb
@@ -6,7 +6,7 @@
 
 module LlmGateway
   module Clients
-    class ClaudeCode < Claude
+    class ClaudeCode
       class TokenManager
         TOKEN_URL = "https://api.anthropic.com/v1/oauth/token"
         CLIENT_ID = OAuthFlow::CLIENT_ID
diff --git a/test/client_builder_test.rb b/test/client_builder_test.rb
index 3027e7e..ebd4361 100644
--- a/test/client_builder_test.rb
+++ b/test/client_builder_test.rb
@@ -13,16 +13,14 @@ class ClientBuilderTest < Test
     assert_instance_of LlmGateway::Clients::Claude, adapter.client
   end
 
-  test "builds claude code client with oauth messages provider" do
+  test "builds claude client with anthropic messages provider" do
     adapter = LlmGateway.build_provider({
-      provider: "anthropic_oauth_messages",
-      access_token: "test-access-token",
-      refresh_token: "test-refresh-token",
-      expires_at: Time.now + 3600
+      provider: "anthropic_apikey_messages",
+      api_key: "sk-ant-oat-test-token"
     })
 
-    assert_instance_of LlmGateway::Adapters::ClaudeCode::MessagesAdapter, adapter
-    assert_instance_of LlmGateway::Clients::ClaudeCode, adapter.client
+    assert_instance_of LlmGateway::Adapters::Claude::MessagesAdapter, adapter
+    assert_instance_of LlmGateway::Clients::Claude, adapter.client
   end
 
   test "builds openai client with default completions adapter" do
@@ -76,7 +74,6 @@ class ClientBuilderTest < Test
 
   test "provider registry exposes built in providers" do
     assert LlmGateway::ProviderRegistry.registered?("anthropic_apikey_messages")
-    assert LlmGateway::ProviderRegistry.registered?("anthropic_oauth_messages")
     assert LlmGateway::ProviderRegistry.registered?("openai_apikey_completions")
     assert LlmGateway::ProviderRegistry.registered?("openai_apikey_responses")
     assert LlmGateway::ProviderRegistry.registered?("groq_apikey_completions")
diff --git a/test/integration/clients/claude_test.rb b/test/integration/clients/claude_test.rb
index c277f3a..6864b30 100644
--- a/test/integration/clients/claude_test.rb
+++ b/test/integration/clients/claude_test.rb
@@ -144,4 +144,78 @@ def claude_client
     assert_equal "tool_use", result[:content][0][:type]
     assert_equal "get_weather", result[:content][0][:name]
   end
+
+  test "uses claude code oauth headers for sk-ant-oat api keys" do
+    stub_request(:post, "https://api.anthropic.com/v1/messages")
+      .to_return(status: 200, body: { id: "msg_1", content: [], usage: {} }.to_json,
+                 headers: { 'Content-Type': "application/json" })
+
+    LlmGateway::Clients::Claude.new(api_key: "sk-ant-oat-abc").chat([ { role: "user", content: "hello" } ])
+
+    assert_requested(:post, "https://api.anthropic.com/v1/messages",
+                     headers: {
+                       "Authorization" => "Bearer sk-ant-oat-abc",
+                       "anthropic-beta" => "claude-code-20250219,oauth-2025-04-20",
+                       "anthropic-dangerous-direct-browser-access" => "true",
+                       "x-app" => "cli"
+                     })
+  end
+
+  test "prepends claude code identity for sk-ant-oat api keys" do
+    stub_request(:post, "https://api.anthropic.com/v1/messages")
+      .with { |request|
+        body = JSON.parse(request.body)
+        system = body["system"]
+        system.is_a?(Array) &&
+          system.length == 1 &&
+          system[0]["type"] == "text" &&
+          system[0]["text"] == "You are Claude Code, Anthropic's official CLI for Claude."
+      }
+      .to_return(status: 200, body: { id: "msg_1", content: [], usage: {} }.to_json,
+                 headers: { 'Content-Type': "application/json" })
+
+    LlmGateway::Clients::Claude.new(api_key: "sk-ant-oat-abc").chat([ { role: "user", content: "hello" } ])
+  end
+
+  test "get_oauth_access_token returns existing non-expired token" do
+    token = claude_client.get_oauth_access_token(
+      access_token: "valid-token",
+      refresh_token: "refresh-token",
+      expires_at: Time.now + 3600
+    )
+
+    assert_equal "valid-token", token
+  end
+
+  test "get_oauth_access_token refreshes expired token and fires callback" do
+    callback_payload = nil
+
+    stub_request(:post, "https://api.anthropic.com/v1/oauth/token")
+      .to_return(
+        status: 200,
+        body: {
+          access_token: "new-access-token",
+          refresh_token: "new-refresh-token",
+          expires_in: 3600
+        }.to_json,
+        headers: { 'Content-Type': "application/json" }
+      )
+
+    token = claude_client.get_oauth_access_token(
+      access_token: "expired-token",
+      refresh_token: "refresh-token",
+      expires_at: Time.now - 60
+    ) do |access_token, refresh_token, expires_at|
+      callback_payload = {
+        access_token: access_token,
+        refresh_token: refresh_token,
+        expires_at: expires_at
+      }
+    end
+
+    assert_equal "new-access-token", token
+    assert_equal "new-access-token", callback_payload[:access_token]
+    assert_equal "new-refresh-token", callback_payload[:refresh_token]
+    assert callback_payload[:expires_at].is_a?(Time)
+  end
 end

From 3f0bcda03628a3966bd697d911b191ee6df37f16 Mon Sep 17 00:00:00 2001
From: billybonks <sebastienstettler@gmail.com>
Date: Fri, 3 Apr 2026 16:48:42 +0800
Subject: [PATCH 02/14] breaking: delete codex custom path

the custom path tried to make things easy but consumers
must manage there own token details as when its refreshed
they must update the values.

deleting all these custom paths also makes things much simpler
on our side
---
 lib/llm_gateway.rb                            |   7 +-
 .../openai_codex/responses_adapter.rb         |   4 +-
 lib/llm_gateway/clients/open_ai.rb            |  98 +++-
 lib/llm_gateway/clients/openai_codex.rb       | 207 -------
 .../clients/openai_codex/oauth_flow.rb        |   2 +-
 .../clients/openai_codex/token_manager.rb     |   2 +-
 test/integration/clients/openai_codex_test.rb | 515 ------------------
 test/integration/clients/openai_test.rb       |  69 +++
 8 files changed, 172 insertions(+), 732 deletions(-)
 delete mode 100644 lib/llm_gateway/clients/openai_codex.rb
 delete mode 100644 test/integration/clients/openai_codex_test.rb

diff --git a/lib/llm_gateway.rb b/lib/llm_gateway.rb
index 7cd9223..0a24d00 100644
--- a/lib/llm_gateway.rb
+++ b/lib/llm_gateway.rb
@@ -12,7 +12,8 @@
 require_relative "llm_gateway/clients/claude"
 require_relative "llm_gateway/clients/claude_code"
 require_relative "llm_gateway/clients/open_ai"
-require_relative "llm_gateway/clients/openai_codex"
+require_relative "llm_gateway/clients/openai_codex/oauth_flow"
+require_relative "llm_gateway/clients/openai_codex/token_manager"
 require_relative "llm_gateway/clients/groq"
 
 # Load adapters
@@ -66,7 +67,7 @@ module OpenAi
     end
 
     module OpenAiCodex
-      Client = LlmGateway::Clients::OpenAiCodex
+      Client = LlmGateway::Clients::OpenAi
     end
 
     module Groq
@@ -128,6 +129,6 @@ def self.reset_configuration!
     adapter: Adapters::Groq::ChatCompletionsAdapter)
 
   ProviderRegistry.register("openai_oauth_codex",
-    client: Clients::OpenAiCodex,
+    client: Clients::OpenAi,
     adapter: Adapters::OpenAiCodex::ResponsesAdapter)
 end
diff --git a/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb b/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb
index 67d4ad8..eb858e1 100644
--- a/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb
+++ b/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb
@@ -28,7 +28,7 @@ def initialize(client)
             output_mapper: OpenAi::Responses::OutputMapper,
             file_output_mapper: OpenAi::FileOutputMapper,
             option_mapper: OptionMapper,
-            client_method: :chat,
+            client_method: :chat_codex,
             stream_mapper: OpenAi::Responses::StreamMapper
           )
         end
@@ -36,7 +36,7 @@ def initialize(client)
         private
 
         def stream_client_method
-          :stream
+          :stream_codex
         end
 
         def stream_api_name
diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb
index a007785..02a6fa7 100644
--- a/lib/llm_gateway/clients/open_ai.rb
+++ b/lib/llm_gateway/clients/open_ai.rb
@@ -5,8 +5,13 @@
 module LlmGateway
   module Clients
     class OpenAi < BaseClient
-      def initialize(model_key: "gpt-4o", api_key: ENV["OPENAI_API_KEY"])
+      CODEX_BASE_ENDPOINT = "https://chatgpt.com/backend-api/codex"
+
+      attr_reader :account_id
+
+      def initialize(model_key: "gpt-4o", api_key: ENV["OPENAI_API_KEY"], account_id: nil)
         @base_endpoint = "https://api.openai.com/v1"
+        @account_id = account_id
         super(model_key: model_key, api_key: api_key)
       end
 
@@ -57,6 +62,36 @@ def stream_responses(messages, tools: nil, system: [], **options, &block)
         post_stream("responses", body, &block)
       end
 
+      def get_oauth_access_token(access_token:, refresh_token:, expires_at:, account_id: nil, &block)
+        token_manager = LlmGateway::Clients::OpenAi::TokenManager.new(
+          access_token: access_token,
+          refresh_token: refresh_token,
+          expires_at: expires_at,
+          account_id: account_id
+        )
+        token_manager.on_token_refresh = block if block_given?
+        token_manager.ensure_valid_token
+        token_manager.access_token
+      end
+
+      def chat_codex(messages, tools: nil, system: [], account_id: nil, **options)
+        body = build_codex_body(messages, system, tools, **options)
+
+        completed_response = nil
+        post_codex_stream("responses", body, account_id: account_id) do |raw_sse|
+          if raw_sse[:event] == "response.completed"
+            completed_response = raw_sse.dig(:data, :response)
+          end
+        end
+
+        completed_response
+      end
+
+      def stream_codex(messages, tools: nil, system: [], account_id: nil, **options, &block)
+        body = build_codex_body(messages, system, tools, **options)
+        post_codex_stream("responses", body, account_id: account_id, &block)
+      end
+
       def download_file(file_id)
         get("files/#{file_id}/content")
       end
@@ -75,6 +110,63 @@ def upload_file(filename, content, mime_type = "application/octet-stream", purpo
 
       private
 
+      def build_codex_body(messages, system, tools, **options)
+        instructions = Array(system).filter_map { |s| s.is_a?(Hash) ? s[:content] : s }.join("\n")
+        instructions = "You are a helpful assistant." if instructions.empty?
+
+        body = {
+          model: model_key,
+          instructions: instructions,
+          input: messages,
+          store: false,
+          include: [ "reasoning.encrypted_content" ],
+          stream: true
+        }
+
+        body[:tools] = tools if tools
+        body.merge!(options)
+
+        body
+      end
+
+      def codex_headers(account_id: nil)
+        effective_account_id = account_id || @account_id
+
+        headers = {
+          "content-type" => "application/json",
+          "Authorization" => "Bearer #{api_key}",
+          "OpenAI-Beta" => "responses=experimental"
+        }
+        headers["chatgpt-account-id"] = effective_account_id if effective_account_id
+        headers
+      end
+
+      def post_codex_stream(url_part, body = nil, account_id: nil, &block)
+        endpoint = "#{CODEX_BASE_ENDPOINT}/#{url_part.sub(%r{^/}, "")}"
+        uri = URI(endpoint)
+        http = Net::HTTP.new(uri.host, uri.port)
+        http.use_ssl = true
+        http.read_timeout = 480
+        http.open_timeout = 10
+
+        body.merge!(stream: true)
+        request = Net::HTTP::Post.new(uri)
+        codex_headers(account_id: account_id).each { |key, value| request[key] = value }
+        request.body = body.to_json if body
+
+        http.request(request) do |response|
+          unless response.code.to_i == 200
+            full_body = +""
+            response.read_body { |chunk| full_body << chunk }
+            response.instance_variable_set(:@body, full_body)
+            response.instance_variable_set(:@read, true)
+            handle_error(response)
+          end
+
+          parse_sse_stream(response, &block)
+        end
+      end
+
       def build_headers
         {
           "content-type" => "application/json",
@@ -92,9 +184,9 @@ def handle_client_specific_errors(response, error)
         when 503
           raise Errors::OverloadError.new(error["message"], error_code)
         end
-
         # If we get here, we didn't handle it specifically
-        raise Errors::APIStatusError.new(error["message"], error_code)
+        message = error["message"] || "OpenAI request failed with status #{response.code}"
+        raise Errors::APIStatusError.new(message, error_code)
       end
     end
   end
diff --git a/lib/llm_gateway/clients/openai_codex.rb b/lib/llm_gateway/clients/openai_codex.rb
deleted file mode 100644
index 56aa40d..0000000
--- a/lib/llm_gateway/clients/openai_codex.rb
+++ /dev/null
@@ -1,207 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "open_ai"
-require_relative "openai_codex/oauth_flow"
-require_relative "openai_codex/token_manager"
-
-module LlmGateway
-  module Clients
-    # OpenAI Codex OAuth client.
-    #
-    # Uses the ChatGPT backend Codex endpoint with OAuth bearer tokens
-    # (ChatGPT Plus / Pro subscription) rather than standard OpenAI API keys.
-    #
-    # The Codex backend requires streaming mode for all requests; the non-block
-    # +chat+ method streams internally and returns the completed response object.
-    #
-    # Usage (direct):
-    #
-    #   client = LlmGateway::Clients::OpenAiCodex.new(
-    #     access_token:  "...",
-    #     refresh_token: "...",
-    #     expires_at:    Time.now + 3600,
-    #     account_id:    "..."
-    #   )
-    #
-    #   # Non-streaming
-    #   response = client.chat([{ role: "user", content: "Hello" }])
-    #
-    #   # Streaming
-    #   client.stream([{ role: "user", content: "Hello" }]) { |sse| puts sse.inspect }
-    #
-    # First-time OAuth login:
-    #
-    #   tokens = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new.login
-    #   # => { access_token:, refresh_token:, expires_at:, account_id: }
-    #
-    class OpenAiCodex < OpenAi
-      CODEX_BASE_ENDPOINT = "https://chatgpt.com/backend-api/codex"
-
-      attr_reader :token_manager, :account_id
-      attr_accessor :prompt_cache_key
-
-      def initialize(
-        model_key: "gpt-4o",
-        access_token: nil,
-        refresh_token: nil,
-        expires_at: nil,
-        account_id: nil,
-        client_id: OAuthFlow::CLIENT_ID,
-        reasoning_effort: nil
-      )
-        @reasoning_effort = reasoning_effort
-
-        if refresh_token
-          @token_manager = TokenManager.new(
-            access_token: access_token,
-            refresh_token: refresh_token,
-            expires_at: expires_at,
-            account_id: account_id,
-            client_id: client_id
-          )
-          # Eagerly fetch a token only when none was provided
-          @token_manager.ensure_valid_token if access_token.nil?
-          access_token = @token_manager.access_token
-          @account_id  = @token_manager.account_id
-        end
-
-        @oauth_access_token = access_token
-        @account_id         = account_id || @account_id
-
-        # Pass the token as api_key to satisfy BaseClient; override the endpoint.
-        super(model_key: model_key, api_key: access_token)
-        @base_endpoint = CODEX_BASE_ENDPOINT
-      end
-
-      # Register a callback that fires whenever the access token is refreshed.
-      # The callback receives (access_token, refresh_token, expires_at).
-      def on_token_refresh=(callback)
-        @token_manager&.on_token_refresh = callback
-      end
-
-      # Send a chat request to the Codex backend.
-      #
-      # Without a block the stream is consumed internally and the completed
-      # response Hash is returned.  With a block, raw SSE event hashes are
-      # yielded as they arrive.
-      def chat(messages, tools: nil, system: [], **options)
-        ensure_valid_token
-
-        body = build_codex_body(messages, system, tools, **options)
-
-        if block_given?
-          post_stream_with_retry("responses", body) { |event| yield event }
-        else
-          # Codex requires streaming; accumulate and return the completed response.
-          completed_response = nil
-          post_stream_with_retry("responses", body) do |raw_sse|
-            if raw_sse[:event] == "response.completed"
-              completed_response = raw_sse.dig(:data, :response)
-            end
-          end
-          completed_response
-        end
-      end
-
-      # Streaming interface: yields raw SSE event hashes to the block.
-      def stream(messages, tools: nil, system: [], **options, &block)
-        ensure_valid_token
-
-        body = build_codex_body(messages, system, tools, **options)
-        post_stream_with_retry("responses", body, &block)
-      end
-
-      private
-
-      # ------------------------------------------------------------------
-      # Token helpers
-      # ------------------------------------------------------------------
-
-      def ensure_valid_token
-        return unless @token_manager
-
-        @token_manager.ensure_valid_token
-        @oauth_access_token = @token_manager.access_token
-        @account_id         = @token_manager.account_id
-      end
-
-      def post_with_retry(url_part, body = nil, extra_headers = {})
-        post(url_part, body, extra_headers)
-      rescue Errors::AuthenticationError => e
-        raise e unless @token_manager&.token_expired?
-
-        @token_manager.refresh_access_token!
-        @oauth_access_token = @token_manager.access_token
-        post(url_part, body, extra_headers)
-      end
-
-      def post_stream_with_retry(url_part, body = nil, extra_headers = {}, &block)
-        post_stream(url_part, body, extra_headers, &block)
-      rescue Errors::AuthenticationError => e
-        raise e unless @token_manager&.token_expired?
-
-        @token_manager.refresh_access_token!
-        @oauth_access_token = @token_manager.access_token
-        post_stream(url_part, body, extra_headers, &block)
-      end
-
-      # ------------------------------------------------------------------
-      # Body builder
-      # ------------------------------------------------------------------
-
-      def build_codex_body(messages, system, tools, **options)
-        instructions = Array(system).filter_map { |s|
-          s.is_a?(Hash) ? s[:content] : s
-        }.join("\n")
-        instructions = "You are a helpful assistant." if instructions.empty?
-
-        body = {
-          model: model_key,
-          instructions: instructions,
-          input: messages,
-          store: false,
-          include: [ "reasoning.encrypted_content" ],
-          stream: true
-        }
-
-        body[:prompt_cache_key]        = @prompt_cache_key     if @prompt_cache_key
-        body[:prompt_cache_retention]  = "24h"                 if @prompt_cache_key
-        body[:tools]                   = tools                 if tools
-
-        # Resolve reasoning effort: constructor-level @reasoning_effort takes
-        # precedence, then fall back to the unified per-call reasoning: param.
-        effort = @reasoning_effort || resolve_reasoning_effort(options[:reasoning])
-        body[:reasoning] = { effort: effort, summary: "detailed" } if effort
-
-        body
-      end
-
-      # Translate a reasoning value (string effort OR hash with :effort
-      # key) into a plain effort string understood by the Codex backend.
-      # Anthropic-style hashes (type: "enabled", budget_tokens: …) are ignored
-      # because the Codex backend has no equivalent concept.
-      def resolve_reasoning_effort(thinking)
-        case thinking
-        when String
-          thinking
-        when Hash
-          thinking[:effort] || thinking["effort"]
-        end
-      end
-
-      # ------------------------------------------------------------------
-      # Headers
-      # ------------------------------------------------------------------
-
-      def build_headers
-        headers = {
-          "content-type" => "application/json",
-          "Authorization" => "Bearer #{@oauth_access_token}",
-          "OpenAI-Beta" => "responses=experimental"
-        }
-        headers["chatgpt-account-id"] = @account_id if @account_id
-        headers
-      end
-    end
-  end
-end
diff --git a/lib/llm_gateway/clients/openai_codex/oauth_flow.rb b/lib/llm_gateway/clients/openai_codex/oauth_flow.rb
index e1b689d..de44f99 100644
--- a/lib/llm_gateway/clients/openai_codex/oauth_flow.rb
+++ b/lib/llm_gateway/clients/openai_codex/oauth_flow.rb
@@ -10,7 +10,7 @@
 
 module LlmGateway
   module Clients
-    class OpenAiCodex < OpenAi
+    class OpenAi
       class OAuthFlow
         CLIENT_ID    = "app_EMoamEEZ73f0CkXaXp7hrann"
         AUTHORIZE_URL = "https://auth.openai.com/oauth/authorize"
diff --git a/lib/llm_gateway/clients/openai_codex/token_manager.rb b/lib/llm_gateway/clients/openai_codex/token_manager.rb
index 25b276c..a6cd335 100644
--- a/lib/llm_gateway/clients/openai_codex/token_manager.rb
+++ b/lib/llm_gateway/clients/openai_codex/token_manager.rb
@@ -6,7 +6,7 @@
 
 module LlmGateway
   module Clients
-    class OpenAiCodex < OpenAi
+    class OpenAi
       class TokenManager
         attr_reader :access_token, :refresh_token, :expires_at, :account_id, :client_id
         attr_accessor :on_token_refresh
diff --git a/test/integration/clients/openai_codex_test.rb b/test/integration/clients/openai_codex_test.rb
deleted file mode 100644
index 5d75c80..0000000
--- a/test/integration/clients/openai_codex_test.rb
+++ /dev/null
@@ -1,515 +0,0 @@
-# frozen_string_literal: true
-
-require "test_helper"
-
-class OpenAiCodexClientTest < Test
-  CODEX_ENDPOINT = "https://chatgpt.com/backend-api/codex/responses"
-
-  teardown do
-    WebMock.reset!
-  end
-
-  # ---------------------------------------------------------------------------
-  # Helpers
-  # ---------------------------------------------------------------------------
-
-  def codex_client(access_token: "test-access-token", model_key: "gpt-4o", account_id: "acct_123")
-    LlmGateway::Clients::OpenAiCodex.new(
-      access_token: access_token,
-      model_key: model_key,
-      account_id: account_id
-    )
-  end
-
-  # Build a minimal SSE response that contains a response.completed event.
-  def completed_sse_body(response_id: "resp_123", model: "gpt-4o", text: "Hello!", tools: [])
-    output = if tools.any?
-      tools.map.with_index do |t, i|
-        {
-          type: "function_call",
-          id: "fc_#{i}",
-          call_id: t[:call_id] || "call_#{i}",
-          name: t[:name],
-          arguments: (t[:arguments] || {}).to_json
-        }
-      end
-    else
-      [
-        {
-          type: "message",
-          role: "assistant",
-          id: "msg_#{response_id}",
-          content: [ { type: "output_text", text: text } ]
-        }
-      ]
-    end
-
-    response_obj = {
-      id: response_id,
-      model: model,
-      output: output,
-      usage: { input_tokens: 10, output_tokens: 5 }
-    }
-
-    "event: response.completed\ndata: #{JSON.generate(response: response_obj)}\n\n"
-  end
-
-  def stub_stream_success(**kwargs)
-    stub_request(:post, CODEX_ENDPOINT)
-      .to_return(
-        status: 200,
-        body: completed_sse_body(**kwargs),
-        headers: { "Content-Type" => "text/event-stream" }
-      )
-  end
-
-  def stub_error_response(error_hash, status_code)
-    stub_request(:post, CODEX_ENDPOINT)
-      .to_return(
-        status: status_code,
-        body: { error: error_hash }.to_json,
-        headers: { "Content-Type" => "application/json" }
-      )
-  end
-
-  # ---------------------------------------------------------------------------
-  # Basic functionality
-  # ---------------------------------------------------------------------------
-
-  test "chat without block returns completed response hash" do
-    stub_stream_success(response_id: "resp_abc", text: "Hello!")
-
-    result = codex_client.chat([ { role: "user", content: "Hi" } ])
-
-    assert_equal "resp_abc", result[:id]
-    assert_equal "gpt-4o",   result[:model]
-    assert result[:output],  "Expected output in response"
-    assert result[:usage],   "Expected usage in response"
-  end
-
-  test "chat with block yields raw SSE events" do
-    stub_stream_success(response_id: "resp_block")
-
-    events = []
-    codex_client.chat([ { role: "user", content: "Hi" } ]) { |e| events << e }
-
-    completed = events.find { |e| e[:event] == "response.completed" }
-    assert completed, "Expected response.completed event"
-    assert_equal "resp_block", completed.dig(:data, :response, :id)
-  end
-
-  test "stream yields raw SSE events" do
-    stub_stream_success(response_id: "resp_stream")
-
-    events = []
-    codex_client.stream([ { role: "user", content: "Hi" } ]) { |e| events << e }
-
-    assert_any_event(events, "response.completed")
-  end
-
-  # ---------------------------------------------------------------------------
-  # Request body
-  # ---------------------------------------------------------------------------
-
-  test "sends required Codex body fields" do
-    stub_stream_success
-
-    codex_client.chat([ { role: "user", content: "Hi" } ])
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body = JSON.parse(req.body)
-      body["stream"]  == true &&
-        body["store"] == false &&
-        body["include"]&.include?("reasoning.encrypted_content") &&
-        body.key?("instructions") &&
-        body.key?("input")
-    end
-  end
-
-  test "passes instructions from system messages" do
-    stub_stream_success
-
-    system = [ { type: "text", content: "You are a coder." } ]
-    codex_client.chat([ { role: "user", content: "Hi" } ], system: system)
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body = JSON.parse(req.body)
-      body["instructions"] == "You are a coder."
-    end
-  end
-
-  test "defaults instructions to helpful assistant when system is empty" do
-    stub_stream_success
-
-    codex_client.chat([ { role: "user", content: "Hi" } ], system: [])
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body = JSON.parse(req.body)
-      body["instructions"] == "You are a helpful assistant."
-    end
-  end
-
-  test "includes tools when provided" do
-    stub_stream_success
-
-    tools = [ { type: "function", name: "get_weather", description: "Get weather", parameters: {} } ]
-    codex_client.chat([ { role: "user", content: "Weather?" } ], tools: tools)
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body        = JSON.parse(req.body)
-      sent_tools  = body["tools"] || []
-      sent_tools.any? { |t| t["name"] == "get_weather" }
-    end
-  end
-
-  test "includes prompt_cache_key and retention when set" do
-    stub_stream_success
-
-    client = codex_client
-    client.prompt_cache_key = "my-cache-key"
-    client.chat([ { role: "user", content: "Hi" } ])
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body = JSON.parse(req.body)
-      body["prompt_cache_key"] == "my-cache-key" &&
-        body["prompt_cache_retention"] == "24h"
-    end
-  end
-
-  test "includes reasoning when reasoning_effort is set" do
-    stub_stream_success
-
-    client = LlmGateway::Clients::OpenAiCodex.new(
-      access_token: "tok",
-      reasoning_effort: "medium"
-    )
-    client.chat([ { role: "user", content: "Hi" } ])
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body = JSON.parse(req.body)
-      body["reasoning"] == { "effort" => "medium", "summary" => "detailed" }
-    end
-  end
-
-  test "chat accepts unified reasoning option" do
-    stub_stream_success
-
-    codex_client.chat([ { role: "user", content: "Hi" } ], reasoning: { effort: "high" })
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body = JSON.parse(req.body)
-      body["reasoning"] == { "effort" => "high", "summary" => "detailed" }
-    end
-  end
-
-  test "stream accepts unified reasoning option" do
-    stub_stream_success
-
-    codex_client.stream([ { role: "user", content: "Hi" } ], reasoning: "low") { |_e| }
-
-    assert_requested(:post, CODEX_ENDPOINT) do |req|
-      body = JSON.parse(req.body)
-      body["reasoning"] == { "effort" => "low", "summary" => "detailed" }
-    end
-  end
-
-  # ---------------------------------------------------------------------------
-  # Headers
-  # ---------------------------------------------------------------------------
-
-  test "sends Bearer authorization header" do
-    stub_stream_success
-
-    codex_client(access_token: "my-oauth-token").chat([ { role: "user", content: "Hi" } ])
-
-    assert_requested(:post, CODEX_ENDPOINT,
-                     headers: { "Authorization" => "Bearer my-oauth-token" })
-  end
-
-  test "sends OpenAI-Beta responses=experimental header" do
-    stub_stream_success
-
-    codex_client.chat([ { role: "user", content: "Hi" } ])
-
-    assert_requested(:post, CODEX_ENDPOINT,
-                     headers: { "OpenAI-Beta" => "responses=experimental" })
-  end
-
-  test "sends chatgpt-account-id header when account_id present" do
-    stub_stream_success
-
-    codex_client(account_id: "acct_xyz").chat([ { role: "user", content: "Hi" } ])
-
-    assert_requested(:post, CODEX_ENDPOINT,
-                     headers: { "chatgpt-account-id" => "acct_xyz" })
-  end
-
-  test "omits chatgpt-account-id header when account_id is nil" do
-    stub_stream_success
-
-    LlmGateway::Clients::OpenAiCodex.new(access_token: "tok").chat([ { role: "user", content: "Hi" } ])
-
-    assert_not_requested(:post, CODEX_ENDPOINT,
-                         headers: { "chatgpt-account-id" => /.*/ })
-  end
-
-  # ---------------------------------------------------------------------------
-  # Error handling
-  # ---------------------------------------------------------------------------
-
-  test "raises AuthenticationError on 401" do
-    stub_error_response({ type: "authentication_error", message: "Invalid bearer token" }, 401)
-
-    error = assert_raises(LlmGateway::Errors::AuthenticationError) do
-      codex_client.chat([ { role: "user", content: "Hi" } ])
-    end
-    assert_equal "Invalid bearer token", error.message
-  end
-
-  test "raises BadRequestError on 400" do
-    stub_error_response({ type: "invalid_request_error", message: "Bad input" }, 400)
-
-    error = assert_raises(LlmGateway::Errors::BadRequestError) do
-      codex_client.chat([ { role: "user", content: "Hi" } ])
-    end
-    assert_equal "Bad input", error.message
-  end
-
-  test "raises NotFoundError on 404" do
-    stub_error_response({ type: "not_found_error", message: "model not found" }, 404)
-
-    assert_raises(LlmGateway::Errors::NotFoundError) do
-      codex_client.chat([ { role: "user", content: "Hi" } ])
-    end
-  end
-
-  test "raises RateLimitError on 429" do
-    stub_error_response({ type: "rate_limit_error", message: "rate limit exceeded" }, 429)
-
-    assert_raises(LlmGateway::Errors::RateLimitError) do
-      codex_client.chat([ { role: "user", content: "Hi" } ])
-    end
-  end
-
-  test "raises OverloadError on 503" do
-    stub_error_response({ type: "service_unavailable", message: "overloaded" }, 503)
-
-    assert_raises(LlmGateway::Errors::OverloadError) do
-      codex_client.chat([ { role: "user", content: "Hi" } ])
-    end
-  end
-
-  test "raises InternalServerError on 500" do
-    stub_error_response({ type: "server_error", message: "internal error" }, 500)
-
-    assert_raises(LlmGateway::Errors::InternalServerError) do
-      codex_client.chat([ { role: "user", content: "Hi" } ])
-    end
-  end
-
-  # ---------------------------------------------------------------------------
-  # Token manager
-  # ---------------------------------------------------------------------------
-
-  test "creates token manager when refresh_token is provided" do
-    stub_request(:post, "https://auth.openai.com/oauth/token")
-      .to_return(
-        status: 200,
-        body: {
-          access_token: "new-access-token",
-          refresh_token: "new-refresh-token",
-          expires_in: 3600
-        }.to_json,
-        headers: { "Content-Type" => "application/json" }
-      )
-
-    # No access_token → eagerly fetches
-    client = LlmGateway::Clients::OpenAiCodex.new(refresh_token: "old-refresh-token")
-
-    refute_nil client.token_manager
-    assert_equal "new-access-token", client.token_manager.access_token
-  end
-
-  test "skips eager refresh when access_token is supplied alongside refresh_token" do
-    client = LlmGateway::Clients::OpenAiCodex.new(
-      access_token: "existing-token",
-      refresh_token: "refresh-token",
-      expires_at: Time.now + 3600
-    )
-
-    refute_nil client.token_manager
-    assert_equal "existing-token", client.token_manager.access_token
-  end
-
-  test "retries request after token refresh on AuthenticationError with expired token" do
-    token_manager = mock("token_manager")
-    token_manager.stubs(:ensure_valid_token)
-    token_manager.stubs(:access_token).returns("refreshed-token")
-    token_manager.stubs(:account_id).returns(nil)
-    token_manager.stubs(:token_expired?).returns(true)
-    token_manager.expects(:refresh_access_token!).once
-
-    client = codex_client
-    client.instance_variable_set(:@token_manager, token_manager)
-
-    stub_request(:post, CODEX_ENDPOINT).to_return(
-      {
-        status: 401,
-        body: { error: { type: "authentication_error", message: "expired" } }.to_json,
-        headers: { "Content-Type" => "application/json" }
-      },
-      {
-        status: 200,
-        body: completed_sse_body(response_id: "resp_retry"),
-        headers: { "Content-Type" => "text/event-stream" }
-      }
-    )
-
-    result = client.chat([ { role: "user", content: "Hi" } ])
-    assert_equal "resp_retry", result[:id]
-  end
-
-  test "does not retry when token is not expired" do
-    token_manager = mock("token_manager")
-    token_manager.stubs(:ensure_valid_token)
-    token_manager.stubs(:access_token).returns("test-token")
-    token_manager.stubs(:account_id).returns(nil)
-    token_manager.stubs(:token_expired?).returns(false)
-
-    client = codex_client
-    client.instance_variable_set(:@token_manager, token_manager)
-
-    stub_error_response({ type: "authentication_error", message: "invalid token" }, 401)
-
-    assert_raises(LlmGateway::Errors::AuthenticationError) do
-      client.chat([ { role: "user", content: "Hi" } ])
-    end
-  end
-
-  test "on_token_refresh= delegates to token_manager" do
-    refresh_called = false
-    callback = ->(_at, _rt, _exp) { refresh_called = true }
-
-    stub_request(:post, "https://auth.openai.com/oauth/token")
-      .to_return(
-        status: 200,
-        body: {
-          access_token: "refreshed",
-          refresh_token: "new-rt",
-          expires_in: 3600
-        }.to_json,
-        headers: { "Content-Type" => "application/json" }
-      )
-
-    client = LlmGateway::Clients::OpenAiCodex.new(
-      access_token: "tok",
-      refresh_token: "rt",
-      expires_at: Time.now + 3600
-    )
-    client.on_token_refresh = callback
-    client.token_manager.refresh_access_token!
-
-    assert refresh_called, "Expected on_token_refresh callback to be called"
-  end
-
-  # ---------------------------------------------------------------------------
-  # OAuthFlow constants
-  # ---------------------------------------------------------------------------
-
-  test "OAuthFlow has the correct CLIENT_ID" do
-    assert_equal "app_EMoamEEZ73f0CkXaXp7hrann",
-                 LlmGateway::Clients::OpenAiCodex::OAuthFlow::CLIENT_ID
-  end
-
-  test "OAuthFlow start returns authorization_url, code_verifier, and state" do
-    flow   = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new
-    result = flow.start
-
-    assert result[:authorization_url].start_with?("https://auth.openai.com/oauth/authorize"),
-           "Expected OpenAI authorize URL"
-    assert result[:code_verifier], "Expected code_verifier"
-    assert result[:state],         "Expected state"
-  end
-
-  test "OAuthFlow authorization_url includes required params" do
-    flow   = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new
-    result = flow.start(state: "teststate")
-    uri    = URI.parse(result[:authorization_url])
-    params = URI.decode_www_form(uri.query).to_h
-
-    assert_equal "code",                         params["response_type"]
-    assert_equal "app_EMoamEEZ73f0CkXaXp7hrann", params["client_id"]
-    assert_equal "S256",                          params["code_challenge_method"]
-    assert_equal "teststate",                    params["state"]
-    assert_equal "true",                          params["codex_cli_simplified_flow"]
-  end
-
-  # ---------------------------------------------------------------------------
-  # TokenManager
-  # ---------------------------------------------------------------------------
-
-  test "TokenManager token_expired? returns true when expires_at is nil" do
-    tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new(refresh_token: "rt")
-    assert tm.token_expired?
-  end
-
-  test "TokenManager token_expired? returns false for future expiry" do
-    tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new(
-      refresh_token: "rt",
-      expires_at: Time.now + 3600
-    )
-    refute tm.token_expired?
-  end
-
-  test "TokenManager token_expired? returns true for past expiry" do
-    tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new(
-      refresh_token: "rt",
-      expires_at: Time.now - 1
-    )
-    assert tm.token_expired?
-  end
-
-  test "TokenManager refresh_access_token! updates tokens and fires callback" do
-    received = []
-
-    stub_request(:post, "https://auth.openai.com/oauth/token")
-      .to_return(
-        status: 200,
-        body: {
-          access_token: "new-at",
-          refresh_token: "new-rt",
-          expires_in: 7200
-        }.to_json,
-        headers: { "Content-Type" => "application/json" }
-      )
-
-    tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new(
-      refresh_token: "old-rt",
-      expires_at: Time.now - 1
-    )
-    tm.on_token_refresh = ->(at, rt, exp) { received << { at: at, rt: rt, exp: exp } }
-
-    tm.refresh_access_token!
-
-    assert_equal "new-at", tm.access_token
-    assert_equal "new-rt", tm.refresh_token
-    assert_equal 1,        received.size
-    assert_equal "new-at", received.first[:at]
-  end
-
-  # ---------------------------------------------------------------------------
-  # Provider registry
-  # ---------------------------------------------------------------------------
-
-  test "openai_oauth_codex is registered in ProviderRegistry" do
-    assert LlmGateway::ProviderRegistry.registered?("openai_oauth_codex"),
-           "Expected openai_oauth_codex to be registered"
-  end
-
-  private
-
-  def assert_any_event(events, event_type)
-    found = events.any? { |e| e[:event] == event_type }
-    assert found, "Expected to find event '#{event_type}' in #{events.map { |e| e[:event] }.inspect}"
-  end
-end
diff --git a/test/integration/clients/openai_test.rb b/test/integration/clients/openai_test.rb
index 5eee2bf..87db73a 100644
--- a/test/integration/clients/openai_test.rb
+++ b/test/integration/clients/openai_test.rb
@@ -113,4 +113,73 @@ def openai_client
       openai_client.chat([ { 'role': "user", 'content': "hello" } ])
     end
   end
+
+  test "get_oauth_access_token returns existing non-expired codex token" do
+    token = openai_client.get_oauth_access_token(
+      access_token: "valid-token",
+      refresh_token: "refresh-token",
+      expires_at: Time.now + 3600
+    )
+
+    assert_equal "valid-token", token
+  end
+
+  test "get_oauth_access_token refreshes expired codex token and fires callback" do
+    callback_payload = nil
+
+    stub_request(:post, "https://auth.openai.com/oauth/token")
+      .to_return(
+        status: 200,
+        body: {
+          access_token: "new-access-token",
+          refresh_token: "new-refresh-token",
+          expires_in: 3600
+        }.to_json,
+        headers: { 'Content-Type': "application/json" }
+      )
+
+    token = openai_client.get_oauth_access_token(
+      access_token: "expired-token",
+      refresh_token: "refresh-token",
+      expires_at: Time.now - 60
+    ) do |access_token, refresh_token, expires_at|
+      callback_payload = {
+        access_token: access_token,
+        refresh_token: refresh_token,
+        expires_at: expires_at
+      }
+    end
+
+    assert_equal "new-access-token", token
+    assert_equal "new-access-token", callback_payload[:access_token]
+    assert_equal "new-refresh-token", callback_payload[:refresh_token]
+    assert callback_payload[:expires_at].is_a?(Time)
+  end
+
+  test "chat_codex routes through codex endpoint" do
+    stub_request(:post, "https://chatgpt.com/backend-api/codex/responses")
+      .to_return(
+        status: 200,
+        body: "event: response.completed\ndata: #{JSON.generate(response: { id: "resp_1", model: "gpt-4o", output: [], usage: {} })}\n\n",
+        headers: { "Content-Type" => "text/event-stream" }
+      )
+
+    result = LlmGateway::Clients::OpenAi.new(api_key: "oauth-token").chat_codex([ { role: "user", content: "hello" } ])
+
+    assert_equal "resp_1", result[:id]
+  end
+
+  test "stream_codex yields codex SSE events" do
+    stub_request(:post, "https://chatgpt.com/backend-api/codex/responses")
+      .to_return(
+        status: 200,
+        body: "event: response.completed\ndata: #{JSON.generate(response: { id: "resp_stream", model: "gpt-4o", output: [], usage: {} })}\n\n",
+        headers: { "Content-Type" => "text/event-stream" }
+      )
+
+    events = []
+    LlmGateway::Clients::OpenAi.new(api_key: "oauth-token").stream_codex([ { role: "user", content: "hello" } ]) { |e| events << e }
+
+    assert events.any? { |e| e[:event] == "response.completed" }
+  end
 end

From 147bfc91082fc5f7d0caea426c0126c48b96d1b0 Mon Sep 17 00:00:00 2001
From: billybonks <sebastienstettler@gmail.com>
Date: Fri, 3 Apr 2026 17:02:43 +0800
Subject: [PATCH 03/14] udpate stream test

---
 test/integration/stream_test.rb | 219 ++++++++++++++++++++------------
 1 file changed, 138 insertions(+), 81 deletions(-)

diff --git a/test/integration/stream_test.rb b/test/integration/stream_test.rb
index 8637e0c..13b2eb7 100644
--- a/test/integration/stream_test.rb
+++ b/test/integration/stream_test.rb
@@ -4,6 +4,8 @@
 require "vcr"
 require "json"
 require "base64"
+require "time"
+require "fileutils"
 require_relative "../utils/calculator_tool_helper"
 
 class ProvidersJsonTest < Test
@@ -12,32 +14,106 @@ def teardown
     LlmGateway.reset_configuration!
   end
 
-  def load_provider(name)
-    providers_path = File.expand_path("../fixtures/providers.json", __dir__)
-    skip("Skipped: missing providers fixture at #{providers_path}") unless File.exist?(providers_path)
+  def load_provider(provider:, model:)
+    config = {
+      "provider" => provider,
+      "model_key" => model
+    }
+
+    case provider
+    when "openai_apikey_completions", "openai_apikey_responses"
+      api_key = ENV["OPENAI_API_KEY"].to_s
+      skip("Skipped: missing OPENAI_API_KEY") if api_key.empty?
+      config["api_key"] = api_key
+    when "anthropic_apikey_messages"
+      api_key = ENV["ANTHROPIC_API_KEY"].to_s
+      skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty?
+      config["api_key"] = api_key
+    when "anthropic_oauth_messages"
+      config["provider"] = "anthropic_apikey_messages"
+      config["api_key"] = oauth_access_token_for("anthropic")
+    when "openai_oauth_codex"
+      creds = load_auth_credentials("openai")
+      config["api_key"] = oauth_access_token_for("openai")
+      config["account_id"] = creds["account_id"] if creds["account_id"]
+    end
 
-    providers = JSON.parse(File.read(providers_path))
-    provider = providers.find { |entry| entry["name"] == name }
-    skip("Skipped: provider not found in providers.json: #{name}") unless provider
+    LlmGateway.build_provider(config)
+  end
 
-    config = provider.fetch("config").dup
-    key_env = config.delete("key_env")
-    config["key"] = ENV.fetch(key_env) if key_env
+  def skip_on_authentication_error
+    yield
+  rescue LlmGateway::Errors::AuthenticationError,
+         LlmGateway::Errors::BadRequestError,
+         LlmGateway::Errors::RateLimitError,
+         LlmGateway::Errors::APIStatusError => e
+    skip("Skipped due to provider error: #{e.message}")
+  end
 
-    LlmGateway.configure([
-      {
-        "name" => provider.fetch("name"),
-        "config" => config
-      }
-    ])
+  def auth_file_path
+    File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json"))
+  end
+
+  def load_auth_credentials(provider)
+    path = auth_file_path
+    skip("Skipped: missing auth file at #{path}") unless File.exist?(path)
+
+    auth = JSON.parse(File.read(path))
+    creds = auth[provider]
+    skip("Skipped: missing #{provider} credentials in #{path}") unless creds
 
-    LlmGateway.public_send(name)
+    creds
   end
 
-  def skip_on_authentication_error
-    yield
-  rescue LlmGateway::Errors::AuthenticationError => e
-    skip("Skipped due to authentication error: #{e.message}")
+  def persist_auth_credentials(provider, attributes)
+    path = auth_file_path
+    FileUtils.mkdir_p(File.dirname(path))
+
+    auth = File.exist?(path) ? JSON.parse(File.read(path)) : {}
+    auth[provider] ||= {}
+    auth[provider].merge!(attributes)
+
+    File.write(path, JSON.pretty_generate(auth) + "\n")
+  end
+
+  def oauth_access_token_for(provider)
+    creds = load_auth_credentials(provider)
+
+    case provider
+    when "anthropic"
+      token = LlmGateway::Clients::Claude.new.get_oauth_access_token(
+        access_token: creds["access_token"],
+        refresh_token: creds["refresh_token"],
+        expires_at: creds["expires_at"]
+      ) do |access_token, refresh_token, expires_at|
+        persist_auth_credentials("anthropic", {
+          "access_token" => access_token,
+          "refresh_token" => refresh_token,
+          "expires_at" => expires_at&.iso8601
+        })
+      end
+
+      persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"]
+      token
+    when "openai"
+      token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token(
+        access_token: creds["access_token"],
+        refresh_token: creds["refresh_token"],
+        expires_at: creds["expires_at"],
+        account_id: creds["account_id"]
+      ) do |access_token, refresh_token, expires_at|
+        persist_auth_credentials("openai", {
+          "access_token" => access_token,
+          "refresh_token" => refresh_token,
+          "expires_at" => expires_at&.iso8601
+        })
+      end
+
+      persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"]
+      token
+    else
+      raise ArgumentError, "Unsupported OAuth provider: #{provider}"
+    end
   end
 
   def assert_basic_text_generation_result(message, expected_text)
@@ -297,108 +373,89 @@ def basic_image_streaming_test(adapter)
     assert_includes lower_content, "red"
     assert_includes lower_content, "circle"
   end
-  def self.provider_names
-    providers_path = File.expand_path("../fixtures/providers.json", __dir__)
-    return [] unless File.exist?(providers_path)
-
-    JSON.parse(File.read(providers_path)).map { |entry| entry["name"] }
-  end
-
-  self.provider_names.each do |provider|
-    test "#{provider} basic text generation" do
+  def self.define_stream_tests_for(name:, provider:, model:)
+    test "#{name} basic text generation" do
       skip_on_authentication_error do
         without_vcr do
-          adapter = load_provider(provider)
+          adapter = load_provider(provider:, model:)
           basic_text_generation_test(adapter)
         end
       end
     end
 
-    test "#{provider} basic tool call" do
+    test "#{name} basic tool call" do
       skip_on_authentication_error do
         without_vcr do
-          adapter = load_provider(provider)
+          adapter = load_provider(provider:, model:)
           basic_tool_call(adapter)
         end
       end
     end
 
-    test "#{provider} basic thinking" do
+    test "#{name} basic thinking" do
       skip_on_authentication_error do
         without_vcr do
-          adapter = load_provider(provider)
+          adapter = load_provider(provider:, model:)
           basic_thinking_test(adapter, reasoning: "high")
         end
       end
     end
 
-    test "#{provider} text streaming" do
+    test "#{name} text streaming" do
       skip_on_authentication_error do
         without_vcr do
-          adapter = load_provider(provider)
+          adapter = load_provider(provider:, model:)
           basic_streaming_text_test(adapter)
         end
       end
     end
 
-    test "#{provider}  multi turn tool streaming" do
+    test "#{name} multi turn tool streaming" do
       skip_on_authentication_error do
         without_vcr do
-          adapter = load_provider(provider)
+          adapter = load_provider(provider:, model:)
           multi_turn_tool_stream_test(adapter, reasoning: "high")
         end
       end
     end
 
-    test "#{provider} image streaming" do
+    test "#{name} image streaming" do
       skip_on_authentication_error do
         without_vcr do
-          adapter = load_provider(provider)
+          adapter = load_provider(provider:, model:)
           basic_image_streaming_test(adapter)
         end
       end
     end
   end
 
-  # test "loads providers json and does anthropic basic text generation" do
-  #   without_vcr do
-  #     adapter = load_provider("anthropic_oauth")
-  #     basic_text_generation_test(adapter)
-  #   end
-  # end
-
-  # test "loads providers json and does anthropic basic tool call" do
-  #   without_vcr do
-  #     adapter = load_provider("anthropic_oauth")
-  #     basic_tool_call(adapter)
-  #   end
-  # end
-
-  # test "loads providers json and does anthropic basic thinking" do
-  #   without_vcr do
-  #     adapter = load_provider("anthropic_oauth")
-  #     basic_thinking_test(adapter)
-  #   end
-  # end
-
-  # test "loads providers json and does anthropic text streaming" do
-  #   without_vcr do
-  #     adapter = load_provider("anthropic_oauth")
-  #     basic_streaming_text_test(adapter)
-  #   end
-  # end
-
-  # test "loads providers json and does anthropic multi turn tool streaming" do
-  #   without_vcr do
-  #     adapter = load_provider("anthropic_oauth")
-  #     multi_turn_tool_stream_test(adapter)
-  #   end
-  # end
-
-  # test "loads providers json and does anthropic image streaming" do
-  #   without_vcr do
-  #     adapter = load_provider("anthropic_oauth")
-  #     basic_image_streaming_test(adapter)
-  #   end
-  # end
+  define_stream_tests_for(
+    name: "openai_apikey_completions_gpt_5_1",
+    provider: "openai_apikey_completions",
+    model: "gpt-5.1"
+  )
+
+  define_stream_tests_for(
+    name: "anthropic_apikey_messages_claude_sonnet_4",
+    provider: "anthropic_apikey_messages",
+    model: "claude-sonnet-4-20250514"
+  )
+
+  define_stream_tests_for(
+    name: "openai_apikey_responses_gpt_5_4",
+    provider: "openai_apikey_responses",
+    model: "gpt-5.4"
+  )
+
+  define_stream_tests_for(
+    name: "anthropic_oauth_messages_claude_sonnet_4",
+    provider: "anthropic_oauth_messages",
+    model: "claude-sonnet-4-20250514"
+  )
+
+  define_stream_tests_for(
+    name: "openai_oauth_codex_gpt_5_4",
+    provider: "openai_oauth_codex",
+    model: "gpt-5.4"
+  )
 end

From 8241a2b624d5c58736fdd3c1246e6a4c9da06822 Mon Sep 17 00:00:00 2001
From: billybonks <sebastienstettler@gmail.com>
Date: Fri, 3 Apr 2026 17:04:15 +0800
Subject: [PATCH 04/14] update oauth scripts

---
 scripts/create_anthropic_credentials.rb    | 23 ++++++++++++----
 scripts/create_openai_codex_credentials.rb | 31 +++++++++++++++-------
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/scripts/create_anthropic_credentials.rb b/scripts/create_anthropic_credentials.rb
index 701b2b6..a40a0c1 100755
--- a/scripts/create_anthropic_credentials.rb
+++ b/scripts/create_anthropic_credentials.rb
@@ -3,6 +3,7 @@
 
 require "optparse"
 require "json"
+require "fileutils"
 require_relative "../lib/llm_gateway"
 
 module Scripts
@@ -11,7 +12,7 @@ def initialize(argv)
       @options = {
         client_id: LlmGateway::Clients::ClaudeCode::OAuthFlow::CLIENT_ID,
         scopes: LlmGateway::Clients::ClaudeCode::OAuthFlow::DEFAULT_SCOPES,
-        output: nil
+        output: File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json"))
       }
       parse_options(argv)
     end
@@ -45,10 +46,7 @@ def run
         expires_at: tokens[:expires_at]&.iso8601
       }
 
-      if @options[:output]
-        File.write(@options[:output], JSON.pretty_generate(credentials) + "\n")
-        puts "Credentials written to #{@options[:output]}"
-      end
+      persist_credentials("anthropic", credentials)
 
       puts "Credentials:"
       puts JSON.pretty_generate(credentials)
@@ -82,6 +80,21 @@ def parse_options(argv)
       end.parse!(argv)
     end
 
+    def persist_credentials(provider, credentials)
+      output_path = File.expand_path(@options[:output])
+      FileUtils.mkdir_p(File.dirname(output_path))
+
+      existing = if File.exist?(output_path)
+        JSON.parse(File.read(output_path))
+      else
+        {}
+      end
+
+      existing[provider] = credentials
+      File.write(output_path, JSON.pretty_generate(existing) + "\n")
+      puts "Credentials written to #{output_path}"
+    end
+
     def shell_escape(value)
       return "''" if value.nil? || value.empty?
 
diff --git a/scripts/create_openai_codex_credentials.rb b/scripts/create_openai_codex_credentials.rb
index 203a86b..011e86b 100755
--- a/scripts/create_openai_codex_credentials.rb
+++ b/scripts/create_openai_codex_credentials.rb
@@ -3,22 +3,23 @@
 
 require "optparse"
 require "json"
+require "fileutils"
 require_relative "../lib/llm_gateway"
 
 module Scripts
   class CreateOpenAiCodexCredentials
     def initialize(argv)
       @options = {
-        client_id: LlmGateway::Clients::OpenAiCodex::OAuthFlow::CLIENT_ID,
-        redirect_uri: LlmGateway::Clients::OpenAiCodex::OAuthFlow::REDIRECT_URI,
-        scope: LlmGateway::Clients::OpenAiCodex::OAuthFlow::SCOPE,
-        output: nil
+        client_id: LlmGateway::Clients::OpenAi::OAuthFlow::CLIENT_ID,
+        redirect_uri: LlmGateway::Clients::OpenAi::OAuthFlow::REDIRECT_URI,
+        scope: LlmGateway::Clients::OpenAi::OAuthFlow::SCOPE,
+        output: File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json"))
       }
       parse_options(argv)
     end
 
     def run
-      flow = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new(
+      flow = LlmGateway::Clients::OpenAi::OAuthFlow.new(
         client_id: @options[:client_id],
         redirect_uri: @options[:redirect_uri],
         scope: @options[:scope]
@@ -49,10 +50,7 @@ def run
         expires_at: tokens[:expires_at]&.iso8601
       }
 
-      if @options[:output]
-        File.write(@options[:output], JSON.pretty_generate(credentials) + "\n")
-        puts "Credentials written to #{@options[:output]}"
-      end
+      persist_credentials("openai", credentials)
 
       puts ""
       puts "Credentials:"
@@ -92,6 +90,21 @@ def parse_options(argv)
       end.parse!(argv)
     end
 
+    def persist_credentials(provider, credentials)
+      output_path = File.expand_path(@options[:output])
+      FileUtils.mkdir_p(File.dirname(output_path))
+
+      existing = if File.exist?(output_path)
+        JSON.parse(File.read(output_path))
+      else
+        {}
+      end
+
+      existing[provider] = credentials
+      File.write(output_path, JSON.pretty_generate(existing) + "\n")
+      puts "Credentials written to #{output_path}"
+    end
+
     def shell_escape(value)
       return "''" if value.nil? || value.empty?
 

From 1812e4dc30c20d7cfb7d575636147c3f9033840f Mon Sep 17 00:00:00 2001
From: billybonks <sebastienstettler@gmail.com>
Date: Fri, 3 Apr 2026 21:30:23 +0800
Subject: [PATCH 05/14] fixup! refactor: move option wraggling to option mapper

---
 lib/llm_gateway/adapters/openai_codex/option_mapper.rb | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb
index 9694cae..1d9e149 100644
--- a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb
+++ b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb
@@ -10,7 +10,11 @@ module OptionMapper
 
         def map(options)
           mapped_options = OpenAi::Responses::OptionMapper.map(options)
-          mapped_options[:max_completion_tokens] ||= 20480
+
+          # Codex endpoint currently rejects token limit parameters.
+          mapped_options.delete(:max_output_tokens)
+          mapped_options.delete(:max_completion_tokens)
+
           mapped_options
         end
       end

From 27636f7e08e2765a3235519f9dba807fc7ef03af Mon Sep 17 00:00:00 2001
From: billybonks <sebastienstettler@gmail.com>
Date: Fri, 3 Apr 2026 21:31:03 +0800
Subject: [PATCH 06/14] fixup! refactor: move option wraggling to option mapper

---
 lib/llm_gateway/clients/open_ai.rb | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb
index 02a6fa7..059dffe 100644
--- a/lib/llm_gateway/clients/open_ai.rb
+++ b/lib/llm_gateway/clients/open_ai.rb
@@ -185,7 +185,14 @@ def handle_client_specific_errors(response, error)
           raise Errors::OverloadError.new(error["message"], error_code)
         end
         # If we get here, we didn't handle it specifically
-        message = error["message"] || "OpenAI request failed with status #{response.code}"
+        fallback_body = response.body.to_s.strip
+        fallback_message = if fallback_body.empty?
+          "OpenAI request failed with status #{response.code}"
+        else
+          "OpenAI request failed with status #{response.code}: #{fallback_body}"
+        end
+
+        message = error["message"] || fallback_message
         raise Errors::APIStatusError.new(message, error_code)
       end
     end

From f5fb0e19ef0ff528d8fb1b1ec7d46f7ba316a1ab Mon Sep 17 00:00:00 2001
From: billybonks <sebastienstettler@gmail.com>
Date: Sat, 4 Apr 2026 12:24:13 +0800
Subject: [PATCH 07/14] fix: throw prompt too long errors when streaming as
 well

---
 .../adapters/claude/stream_mapper.rb          |   4 +
 .../open_ai/chat_completions/stream_mapper.rb |   5 +
 .../open_ai/responses/stream_mapper.rb        |   5 +
 lib/llm_gateway/clients/claude.rb             |   7 +-
 lib/llm_gateway/clients/groq.rb               |  17 +-
 lib/llm_gateway/clients/open_ai.rb            |   9 +-
 lib/llm_gateway/errors.rb                     |  21 +++
 test/integration/prompt_too_long_live_test.rb | 157 ++++++++++++++++++
 8 files changed, 208 insertions(+), 17 deletions(-)
 create mode 100644 test/integration/prompt_too_long_live_test.rb

diff --git a/lib/llm_gateway/adapters/claude/stream_mapper.rb b/lib/llm_gateway/adapters/claude/stream_mapper.rb
index e9f4be8..fee3045 100644
--- a/lib/llm_gateway/adapters/claude/stream_mapper.rb
+++ b/lib/llm_gateway/adapters/claude/stream_mapper.rb
@@ -73,6 +73,10 @@ def map(chunk)
             message = error[:message] || "Stream error"
             code = error[:type]
 
+            if LlmGateway::Errors.context_overflow_message?(message)
+              raise LlmGateway::Errors::PromptTooLong.new(message, code)
+            end
+
             if code == "overloaded_error"
               raise LlmGateway::Errors::OverloadError.new(message, code)
             end
diff --git a/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb b/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb
index 31c3356..5ec83f3 100644
--- a/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb
+++ b/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb
@@ -228,6 +228,11 @@ def raise_stream_error!(data)
             error = data[:error].is_a?(Hash) ? data[:error] : data
             message = error[:message] || "Stream error"
             code = error[:code] || error[:type]
+
+            if LlmGateway::Errors.context_overflow_message?(message)
+              raise LlmGateway::Errors::PromptTooLong.new(message, code)
+            end
+
             raise LlmGateway::Errors::APIStatusError.new(message, code)
           end
         end
diff --git a/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb b/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb
index 9233b1a..cf58dfa 100644
--- a/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb
+++ b/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb
@@ -328,6 +328,11 @@ def raise_stream_error!(data)
             error = data[:error].is_a?(Hash) ? data[:error] : data
             message = error[:message] || "Stream error"
             code = error[:code] || error[:type]
+
+            if LlmGateway::Errors.context_overflow_message?(message)
+              raise LlmGateway::Errors::PromptTooLong.new(message, code)
+            end
+
             raise LlmGateway::Errors::APIStatusError.new(message, code)
           end
         end
diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb
index 65a7d33..33192f3 100644
--- a/lib/llm_gateway/clients/claude.rb
+++ b/lib/llm_gateway/clients/claude.rb
@@ -97,11 +97,8 @@ def prepend_claude_code_identity(system)
       end
 
       def handle_client_specific_errors(response, error)
-        case response.code.to_i
-        when 400
-          if error["message"]&.start_with?("prompt is too long")
-            raise Errors::PromptTooLong.new(error["message"], error["type"])
-          end
+        if Errors.context_overflow_message?(error["message"])
+          raise Errors::PromptTooLong.new(error["message"], error["type"])
         end
 
         # If we get here, we didn't handle it specifically
diff --git a/lib/llm_gateway/clients/groq.rb b/lib/llm_gateway/clients/groq.rb
index ab3a0b9..6b148f2 100644
--- a/lib/llm_gateway/clients/groq.rb
+++ b/lib/llm_gateway/clients/groq.rb
@@ -33,24 +33,21 @@ def build_headers
       def handle_client_specific_errors(response, error)
         # Groq likely uses 'code' like OpenAI since it's OpenAI-compatible
         error_code = error["code"]
+        error_message = error["message"]
+
+        if Errors.context_overflow_message?(error_message)
+          raise Errors::PromptTooLong.new(error_message, error["type"])
+        end
 
         case response.code.to_i
-        when 400
-          if error["message"]&.match?(/reduce the length of the messages/i)
-            raise Errors::PromptTooLong.new(error["message"], error["type"])
-          end
-        when 413
-          if error["message"]&.start_with?("Request too large")
-            raise Errors::PromptTooLong.new(error["message"], error["type"])
-          end
         when 429
           raise Errors::RateLimitError.new(error["type"], error_code) if error_code == "rate_limit_exceeded"
 
-          raise Errors::OverloadError.new(error["message"], error_code)
+          raise Errors::OverloadError.new(error_message, error_code)
         end
 
         # If we get here, we didn't handle it specifically
-        raise Errors::APIStatusError.new(error["message"], error_code)
+        raise Errors::APIStatusError.new(error_message, error_code)
       end
     end
   end
diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb
index 059dffe..71a0014 100644
--- a/lib/llm_gateway/clients/open_ai.rb
+++ b/lib/llm_gateway/clients/open_ai.rb
@@ -177,12 +177,17 @@ def build_headers
       def handle_client_specific_errors(response, error)
         # OpenAI uses 'code' instead of 'type' for error codes
         error_code = error["code"]
+        error_message = error["message"]
+
+        if Errors.context_overflow_message?(error_message)
+          raise Errors::PromptTooLong.new(error_message, error_code)
+        end
 
         case response.code.to_i
         when 429
-          raise Errors::RateLimitError.new(error["message"], error_code)
+          raise Errors::RateLimitError.new(error_message, error_code)
         when 503
-          raise Errors::OverloadError.new(error["message"], error_code)
+          raise Errors::OverloadError.new(error_message, error_code)
         end
         # If we get here, we didn't handle it specifically
         fallback_body = response.body.to_s.strip
diff --git a/lib/llm_gateway/errors.rb b/lib/llm_gateway/errors.rb
index cfdd315..1eef904 100644
--- a/lib/llm_gateway/errors.rb
+++ b/lib/llm_gateway/errors.rb
@@ -31,6 +31,27 @@ class UnsupportedModel < ClientError; end
     class UnsupportedProvider < ClientError; end
     class MissingMapperForProvider < ClientError; end
 
+    OVERFLOW_PATTERNS = [
+      /prompt is too long/i, # Anthropic
+      /exceeds the context window/i, # OpenAI
+      /reduce the length of the messages/i, # Groq
+      /maximum context length is \d+ tokens/i,
+      /context[_ ]length[_ ]exceeded/i,
+      /too many tokens/i,
+      /token limit exceeded/i,
+      /request too large.*tokens per min/i, # OpenAI TPM wording
+      /input tokens per minute/i, # Anthropic TPM wording
+      /reduce the prompt length/i,
+      /input or output tokens must be reduced/i
+    ].freeze
+
+    def self.context_overflow_message?(message)
+      text = message.to_s
+      return false if text.empty?
+
+      OVERFLOW_PATTERNS.any? { |pattern| pattern.match?(text) }
+    end
+
     class PromptError < BaseError; end
 
     class HallucinationError < PromptError; end
diff --git a/test/integration/prompt_too_long_live_test.rb b/test/integration/prompt_too_long_live_test.rb
new file mode 100644
index 0000000..be8acbb
--- /dev/null
+++ b/test/integration/prompt_too_long_live_test.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "json"
+require "time"
+require "fileutils"
+
+class PromptTooLongLiveTest < Test
+  def teardown
+    LlmGateway.reset_configuration!
+  end
+
+  def load_provider(provider:, model:)
+    config = {
+      "provider" => provider,
+      "model_key" => model
+    }
+
+    case provider
+    when "openai_apikey_completions", "openai_apikey_responses"
+      api_key = ENV["OPENAI_API_KEY"].to_s
+      skip("Skipped: missing OPENAI_API_KEY") if api_key.empty?
+      config["api_key"] = api_key
+    when "anthropic_apikey_messages"
+      api_key = ENV["ANTHROPIC_API_KEY"].to_s
+      skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty?
+      config["api_key"] = api_key
+    when "anthropic_oauth_messages"
+      config["provider"] = "anthropic_apikey_messages"
+      config["api_key"] = oauth_access_token_for("anthropic")
+    when "openai_oauth_codex"
+      creds = load_auth_credentials("openai")
+      config["api_key"] = oauth_access_token_for("openai")
+      config["account_id"] = creds["account_id"] if creds["account_id"]
+    end
+
+    LlmGateway.build_provider(config)
+  end
+
+  def auth_file_path
+    File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json"))
+  end
+
+  def load_auth_credentials(provider)
+    path = auth_file_path
+    skip("Skipped: missing auth file at #{path}") unless File.exist?(path)
+
+    auth = JSON.parse(File.read(path))
+    creds = auth[provider]
+    skip("Skipped: missing #{provider} credentials in #{path}") unless creds
+
+    creds
+  end
+
+  def persist_auth_credentials(provider, attributes)
+    path = auth_file_path
+    FileUtils.mkdir_p(File.dirname(path))
+
+    auth = File.exist?(path) ? JSON.parse(File.read(path)) : {}
+    auth[provider] ||= {}
+    auth[provider].merge!(attributes)
+
+    File.write(path, JSON.pretty_generate(auth) + "\n")
+  end
+
+  def oauth_access_token_for(provider)
+    creds = load_auth_credentials(provider)
+
+    case provider
+    when "anthropic"
+      token = LlmGateway::Clients::Claude.new.get_oauth_access_token(
+        access_token: creds["access_token"],
+        refresh_token: creds["refresh_token"],
+        expires_at: creds["expires_at"]
+      ) do |access_token, refresh_token, expires_at|
+        persist_auth_credentials("anthropic", {
+          "access_token" => access_token,
+          "refresh_token" => refresh_token,
+          "expires_at" => expires_at&.iso8601
+        })
+      end
+
+      persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"]
+      token
+    when "openai"
+      token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token(
+        access_token: creds["access_token"],
+        refresh_token: creds["refresh_token"],
+        expires_at: creds["expires_at"],
+        account_id: creds["account_id"]
+      ) do |access_token, refresh_token, expires_at|
+        persist_auth_credentials("openai", {
+          "access_token" => access_token,
+          "refresh_token" => refresh_token,
+          "expires_at" => expires_at&.iso8601
+        })
+      end
+
+      persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"]
+      token
+    else
+      raise ArgumentError, "Unsupported OAuth provider: #{provider}"
+    end
+  end
+
+  def huge_prompt
+    "Please reply with one short sentence.\n\n" + ("lorem ipsum dolor sit amet " * 240_000)
+  end
+
+  def assert_prompt_too_long(adapter, name, provider)
+    error = assert_raises(LlmGateway::Errors::PromptTooLong) do
+      adapter.stream(huge_prompt)
+    end
+
+    assert LlmGateway::Errors.context_overflow_message?(error.message),
+      "Expected prompt-length related error message for #{provider}, got: #{error.message}"
+  end
+
+  def self.define_prompt_too_long_debug_test(name:, provider:, model:)
+    test "#{name} prompt too long debug" do
+      without_vcr do
+        adapter = load_provider(provider:, model:)
+        assert_prompt_too_long(adapter, name, provider)
+      end
+    end
+  end
+
+  define_prompt_too_long_debug_test(
+    name: "openai_apikey_completions_gpt_5_1",
+    provider: "openai_apikey_completions",
+    model: "gpt-5.1"
+  )
+
+  define_prompt_too_long_debug_test(
+    name: "anthropic_apikey_messages_claude_sonnet_4",
+    provider: "anthropic_apikey_messages",
+    model: "claude-sonnet-4-20250514"
+  )
+
+  define_prompt_too_long_debug_test(
+    name: "openai_apikey_responses_gpt_5_4",
+    provider: "openai_apikey_responses",
+    model: "gpt-5.4"
+  )
+
+  define_prompt_too_long_debug_test(
+    name: "anthropic_oauth_messages_claude_sonnet_4",
+    provider: "anthropic_oauth_messages",
+    model: "claude-sonnet-4-20250514"
+  )
+
+  define_prompt_too_long_debug_test(
+    name: "openai_oauth_codex_gpt_5_4",
+    provider: "openai_oauth_codex",
+    model: "gpt-5.4"
+  )
+end

From bb61205e91236a8050f20dc92e966c473f8b42d5 Mon Sep 17 00:00:00 2001
From: gruv <gruv@gruv.dev>
Date: Mon, 6 Apr 2026 13:57:28 +0800
Subject: [PATCH 08/14] refactor: move all live tests to a shared helper

---
 test/integration/prompt_too_long_live_test.rb |  99 +---------------
 test/integration/stream_test.rb               | 107 +----------------
 test/utils/live_test_helper.rb                | 109 ++++++++++++++++++
 3 files changed, 115 insertions(+), 200 deletions(-)
 create mode 100644 test/utils/live_test_helper.rb

diff --git a/test/integration/prompt_too_long_live_test.rb b/test/integration/prompt_too_long_live_test.rb
index be8acbb..36a51e3 100644
--- a/test/integration/prompt_too_long_live_test.rb
+++ b/test/integration/prompt_too_long_live_test.rb
@@ -1,108 +1,15 @@
 # frozen_string_literal: true
 
 require "test_helper"
-require "json"
-require "time"
-require "fileutils"
+require_relative "../utils/live_test_helper"
 
 class PromptTooLongLiveTest < Test
+  include LiveTestHelper
+
   def teardown
     LlmGateway.reset_configuration!
   end
 
-  def load_provider(provider:, model:)
-    config = {
-      "provider" => provider,
-      "model_key" => model
-    }
-
-    case provider
-    when "openai_apikey_completions", "openai_apikey_responses"
-      api_key = ENV["OPENAI_API_KEY"].to_s
-      skip("Skipped: missing OPENAI_API_KEY") if api_key.empty?
-      config["api_key"] = api_key
-    when "anthropic_apikey_messages"
-      api_key = ENV["ANTHROPIC_API_KEY"].to_s
-      skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty?
-      config["api_key"] = api_key
-    when "anthropic_oauth_messages"
-      config["provider"] = "anthropic_apikey_messages"
-      config["api_key"] = oauth_access_token_for("anthropic")
-    when "openai_oauth_codex"
-      creds = load_auth_credentials("openai")
-      config["api_key"] = oauth_access_token_for("openai")
-      config["account_id"] = creds["account_id"] if creds["account_id"]
-    end
-
-    LlmGateway.build_provider(config)
-  end
-
-  def auth_file_path
-    File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json"))
-  end
-
-  def load_auth_credentials(provider)
-    path = auth_file_path
-    skip("Skipped: missing auth file at #{path}") unless File.exist?(path)
-
-    auth = JSON.parse(File.read(path))
-    creds = auth[provider]
-    skip("Skipped: missing #{provider} credentials in #{path}") unless creds
-
-    creds
-  end
-
-  def persist_auth_credentials(provider, attributes)
-    path = auth_file_path
-    FileUtils.mkdir_p(File.dirname(path))
-
-    auth = File.exist?(path) ? JSON.parse(File.read(path)) : {}
-    auth[provider] ||= {}
-    auth[provider].merge!(attributes)
-
-    File.write(path, JSON.pretty_generate(auth) + "\n")
-  end
-
-  def oauth_access_token_for(provider)
-    creds = load_auth_credentials(provider)
-
-    case provider
-    when "anthropic"
-      token = LlmGateway::Clients::Claude.new.get_oauth_access_token(
-        access_token: creds["access_token"],
-        refresh_token: creds["refresh_token"],
-        expires_at: creds["expires_at"]
-      ) do |access_token, refresh_token, expires_at|
-        persist_auth_credentials("anthropic", {
-          "access_token" => access_token,
-          "refresh_token" => refresh_token,
-          "expires_at" => expires_at&.iso8601
-        })
-      end
-
-      persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"]
-      token
-    when "openai"
-      token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token(
-        access_token: creds["access_token"],
-        refresh_token: creds["refresh_token"],
-        expires_at: creds["expires_at"],
-        account_id: creds["account_id"]
-      ) do |access_token, refresh_token, expires_at|
-        persist_auth_credentials("openai", {
-          "access_token" => access_token,
-          "refresh_token" => refresh_token,
-          "expires_at" => expires_at&.iso8601
-        })
-      end
-
-      persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"]
-      token
-    else
-      raise ArgumentError, "Unsupported OAuth provider: #{provider}"
-    end
-  end
-
   def huge_prompt
     "Please reply with one short sentence.\n\n" + ("lorem ipsum dolor sit amet " * 240_000)
   end
diff --git a/test/integration/stream_test.rb b/test/integration/stream_test.rb
index 13b2eb7..1ee751b 100644
--- a/test/integration/stream_test.rb
+++ b/test/integration/stream_test.rb
@@ -4,118 +4,17 @@
 require "vcr"
 require "json"
 require "base64"
-require "time"
-require "fileutils"
 require_relative "../utils/calculator_tool_helper"
+require_relative "../utils/live_test_helper"
 
 class ProvidersJsonTest < Test
   include CalculatorToolHelper
+  include LiveTestHelper
+
   def teardown
     LlmGateway.reset_configuration!
   end
 
-  def load_provider(provider:, model:)
-    config = {
-      "provider" => provider,
-      "model_key" => model
-    }
-
-    case provider
-    when "openai_apikey_completions", "openai_apikey_responses"
-      api_key = ENV["OPENAI_API_KEY"].to_s
-      skip("Skipped: missing OPENAI_API_KEY") if api_key.empty?
-      config["api_key"] = api_key
-    when "anthropic_apikey_messages"
-      api_key = ENV["ANTHROPIC_API_KEY"].to_s
-      skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty?
-      config["api_key"] = api_key
-    when "anthropic_oauth_messages"
-      config["provider"] = "anthropic_apikey_messages"
-      config["api_key"] = oauth_access_token_for("anthropic")
-    when "openai_oauth_codex"
-      creds = load_auth_credentials("openai")
-      config["api_key"] = oauth_access_token_for("openai")
-      config["account_id"] = creds["account_id"] if creds["account_id"]
-    end
-
-    LlmGateway.build_provider(config)
-  end
-
-  def skip_on_authentication_error
-    yield
-  rescue LlmGateway::Errors::AuthenticationError,
-         LlmGateway::Errors::BadRequestError,
-         LlmGateway::Errors::RateLimitError,
-         LlmGateway::Errors::APIStatusError => e
-    skip("Skipped due to provider error: #{e.message}")
-  end
-
-  def auth_file_path
-    File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json"))
-  end
-
-  def load_auth_credentials(provider)
-    path = auth_file_path
-    skip("Skipped: missing auth file at #{path}") unless File.exist?(path)
-
-    auth = JSON.parse(File.read(path))
-    creds = auth[provider]
-    skip("Skipped: missing #{provider} credentials in #{path}") unless creds
-
-    creds
-  end
-
-  def persist_auth_credentials(provider, attributes)
-    path = auth_file_path
-    FileUtils.mkdir_p(File.dirname(path))
-
-    auth = File.exist?(path) ? JSON.parse(File.read(path)) : {}
-    auth[provider] ||= {}
-    auth[provider].merge!(attributes)
-
-    File.write(path, JSON.pretty_generate(auth) + "\n")
-  end
-
-  def oauth_access_token_for(provider)
-    creds = load_auth_credentials(provider)
-
-    case provider
-    when "anthropic"
-      token = LlmGateway::Clients::Claude.new.get_oauth_access_token(
-        access_token: creds["access_token"],
-        refresh_token: creds["refresh_token"],
-        expires_at: creds["expires_at"]
-      ) do |access_token, refresh_token, expires_at|
-        persist_auth_credentials("anthropic", {
-          "access_token" => access_token,
-          "refresh_token" => refresh_token,
-          "expires_at" => expires_at&.iso8601
-        })
-      end
-
-      persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"]
-      token
-    when "openai"
-      token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token(
-        access_token: creds["access_token"],
-        refresh_token: creds["refresh_token"],
-        expires_at: creds["expires_at"],
-        account_id: creds["account_id"]
-      ) do |access_token, refresh_token, expires_at|
-        persist_auth_credentials("openai", {
-          "access_token" => access_token,
-          "refresh_token" => refresh_token,
-          "expires_at" => expires_at&.iso8601
-        })
-      end
-
-      persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"]
-      token
-    else
-      raise ArgumentError, "Unsupported OAuth provider: #{provider}"
-    end
-  end
-
   def assert_basic_text_generation_result(message, expected_text)
     assert_equal "assistant", message.role
     assert_operator message.usage[:input_tokens], :>, 0
diff --git a/test/utils/live_test_helper.rb b/test/utils/live_test_helper.rb
new file mode 100644
index 0000000..42e1769
--- /dev/null
+++ b/test/utils/live_test_helper.rb
@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+
+require "json"
+require "time"
+require "fileutils"
+
+module LiveTestHelper
+  def load_provider(provider:, model:)
+    config = {
+      "provider" => provider,
+      "model_key" => model
+    }
+
+    case provider
+    when "openai_apikey_completions", "openai_apikey_responses"
+      api_key = ENV["OPENAI_API_KEY"].to_s
+      skip("Skipped: missing OPENAI_API_KEY") if api_key.empty?
+      config["api_key"] = api_key
+    when "anthropic_apikey_messages"
+      api_key = ENV["ANTHROPIC_API_KEY"].to_s
+      skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty?
+      config["api_key"] = api_key
+    when "anthropic_oauth_messages"
+      config["provider"] = "anthropic_apikey_messages"
+      config["api_key"] = oauth_access_token_for("anthropic")
+    when "openai_oauth_codex"
+      creds = load_auth_credentials("openai")
+      config["api_key"] = oauth_access_token_for("openai")
+      config["account_id"] = creds["account_id"] if creds["account_id"]
+    end
+
+    LlmGateway.build_provider(config)
+  end
+
+  def skip_on_authentication_error
+    yield
+  rescue LlmGateway::Errors::AuthenticationError,
+         LlmGateway::Errors::BadRequestError,
+         LlmGateway::Errors::RateLimitError,
+         LlmGateway::Errors::APIStatusError => e
+    skip("Skipped due to provider error: #{e.message}")
+  end
+
+  def auth_file_path
+    File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json"))
+  end
+
+  def load_auth_credentials(provider)
+    path = auth_file_path
+    skip("Skipped: missing auth file at #{path}") unless File.exist?(path)
+
+    auth = JSON.parse(File.read(path))
+    creds = auth[provider]
+    skip("Skipped: missing #{provider} credentials in #{path}") unless creds
+
+    creds
+  end
+
+  def persist_auth_credentials(provider, attributes)
+    path = auth_file_path
+    FileUtils.mkdir_p(File.dirname(path))
+
+    auth = File.exist?(path) ? JSON.parse(File.read(path)) : {}
+    auth[provider] ||= {}
+    auth[provider].merge!(attributes)
+
+    File.write(path, JSON.pretty_generate(auth) + "\n")
+  end
+
+  def oauth_access_token_for(provider)
+    creds = load_auth_credentials(provider)
+
+    case provider
+    when "anthropic"
+      token = LlmGateway::Clients::Claude.new.get_oauth_access_token(
+        access_token: creds["access_token"],
+        refresh_token: creds["refresh_token"],
+        expires_at: creds["expires_at"]
+      ) do |access_token, refresh_token, expires_at|
+        persist_auth_credentials("anthropic", {
+          "access_token" => access_token,
+          "refresh_token" => refresh_token,
+          "expires_at" => expires_at&.iso8601
+        })
+      end
+
+      persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"]
+      token
+    when "openai"
+      token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token(
+        access_token: creds["access_token"],
+        refresh_token: creds["refresh_token"],
+        expires_at: creds["expires_at"],
+        account_id: creds["account_id"]
+      ) do |access_token, refresh_token, expires_at|
+        persist_auth_credentials("openai", {
+          "access_token" => access_token,
+          "refresh_token" => refresh_token,
+          "expires_at" => expires_at&.iso8601
+        })
+      end
+
+      persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"]
+      token
+    else
+      raise ArgumentError, "Unsupported OAuth provider: #{provider}"
+    end
+  end
+end

From 4b9ee9ca6b479a590b3ff3e66d0d7f960ae4a180 Mon Sep 17 00:00:00 2001
From: gruv <gruv@gruv.dev>
Date: Mon, 6 Apr 2026 14:46:02 +0800
Subject: [PATCH 09/14] test: fix test asserting wrong error type for prompt
 too long

---
 test/integration/clients/openai_test.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/clients/openai_test.rb b/test/integration/clients/openai_test.rb
index 87db73a..c5930a3 100644
--- a/test/integration/clients/openai_test.rb
+++ b/test/integration/clients/openai_test.rb
@@ -64,7 +64,7 @@ def openai_client
   end
 
   test "throws rate limit error" do
-    error = assert_raises(LlmGateway::Errors::RateLimitError) do
+    error = assert_raises(LlmGateway::Errors::PromptTooLong) do
       VCR.use_cassette(vcr_cassette_name) do
         openai_client.chat([ { 'role': "user", 'content': "aqklcsa," * 100_000 } ], **mapped_chat_options(max_completion_tokens: 4096))
       end

From b767fe73949de1fea5b86eee7968670151a8cb67 Mon Sep 17 00:00:00 2001
From: gruv <gruv@gruv.dev>
Date: Mon, 6 Apr 2026 14:49:20 +0800
Subject: [PATCH 10/14] feat: support prompt caching, with cache_retention
 cache_key options

for anthropic it will automatically add the cache tags, i have not tested
what happens when the user already set it but it will probably override it
---
 lib/llm_gateway.rb                            |   1 +
 .../adapters/anthropic_option_mapper.rb       |   6 +-
 .../open_ai/chat_completions/option_mapper.rb |   5 +
 .../open_ai/prompt_cache_option_mapper.rb     |  39 +++++
 .../open_ai/responses/option_mapper.rb        |   5 +
 .../adapters/openai_codex/option_mapper.rb    |   5 +
 lib/llm_gateway/clients/claude.rb             |  68 +++++++-
 lib/llm_gateway/clients/open_ai.rb            |   5 +-
 test/integration/cache_live_test.rb           | 153 ++++++++++++++++++
 9 files changed, 282 insertions(+), 5 deletions(-)
 create mode 100644 lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb
 create mode 100644 test/integration/cache_live_test.rb

diff --git a/lib/llm_gateway.rb b/lib/llm_gateway.rb
index 0a24d00..0c85e0d 100644
--- a/lib/llm_gateway.rb
+++ b/lib/llm_gateway.rb
@@ -24,6 +24,7 @@
 require_relative "llm_gateway/adapters/claude/input_mapper"
 require_relative "llm_gateway/adapters/claude/output_mapper"
 require_relative "llm_gateway/adapters/open_ai/file_output_mapper"
+require_relative "llm_gateway/adapters/open_ai/prompt_cache_option_mapper"
 require_relative "llm_gateway/adapters/open_ai/chat_completions/input_mapper"
 require_relative "llm_gateway/adapters/open_ai/chat_completions/output_mapper"
 require_relative "llm_gateway/adapters/open_ai/chat_completions/option_mapper"
diff --git a/lib/llm_gateway/adapters/anthropic_option_mapper.rb b/lib/llm_gateway/adapters/anthropic_option_mapper.rb
index fa79262..a5e5744 100644
--- a/lib/llm_gateway/adapters/anthropic_option_mapper.rb
+++ b/lib/llm_gateway/adapters/anthropic_option_mapper.rb
@@ -14,8 +14,12 @@ module AnthropicOptionMapper
       module_function
 
       def map(options)
-        mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens].include?(key) }
+        mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens prompt_cache_retention cache_key prompt_cache_key].include?(key) }
         mapped_options[:max_tokens] = options[:max_completion_tokens] || 20480
+
+        retention = options[:cache_retention]
+        mapped_options[:cache_retention] = retention unless retention.nil?
+
         reasoning = options[:reasoning]
         return mapped_options if reasoning.nil? || reasoning.to_s == "none"
 
diff --git a/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb b/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb
index cfbc810..4d5c430 100644
--- a/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb
+++ b/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb
@@ -5,6 +5,8 @@ module Adapters
     module OpenAi
       module ChatCompletions
         module OptionMapper
+          include LlmGateway::Adapters::OpenAi::PromptCacheOptionMapper
+
           VALID_REASONING_LEVELS = %w[low medium high xhigh].freeze
 
           module_function
@@ -13,6 +15,9 @@ def map(options)
             mapped_options = options.dup
             mapped_options[:max_completion_tokens] ||= 20_480
 
+            map_cache_key!(mapped_options)
+            map_prompt_cache_retention!(mapped_options)
+
             return mapped_options unless mapped_options.key?(:reasoning)
 
             reasoning = mapped_options.delete(:reasoning)
diff --git a/lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb b/lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb
new file mode 100644
index 0000000..99ef3e7
--- /dev/null
+++ b/lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+module LlmGateway
+  module Adapters
+    module OpenAi
+      module PromptCacheOptionMapper
+        def self.included(base)
+          base.extend(self)
+        end
+
+        def map_cache_key!(mapped_options)
+          cache_key = mapped_options.delete(:cache_key)
+          mapped_options.delete(:prompt_cache_key)
+          mapped_options[:prompt_cache_key] = cache_key unless cache_key.nil?
+        end
+
+        def map_prompt_cache_retention!(mapped_options)
+          retention = mapped_options.delete(:cache_retention)
+          mapped_options.delete(:prompt_cache_retention)
+          retention ||= "short" if mapped_options.key?(:prompt_cache_key)
+
+          case retention&.to_s
+          when nil
+            nil
+          when "short"
+            mapped_options[:prompt_cache_retention] = "in_memory"
+          when "long"
+            mapped_options[:prompt_cache_retention] = "24h"
+          when "none"
+            mapped_options.delete(:prompt_cache_key)
+          else
+            raise ArgumentError,
+              "Invalid cache_retention '#{retention}'. Use 'short', 'long', or 'none'."
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb b/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb
index 7ce374d..70f0bd4 100644
--- a/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb
+++ b/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb
@@ -5,6 +5,8 @@ module Adapters
     module OpenAi
       module Responses
         module OptionMapper
+          include LlmGateway::Adapters::OpenAi::PromptCacheOptionMapper
+
           VALID_REASONING_LEVELS = %w[low medium high xhigh].freeze
 
           module_function
@@ -15,6 +17,9 @@ def map(options)
             max_completion_tokens = mapped_options.delete(:max_completion_tokens)
             mapped_options[:max_output_tokens] = max_completion_tokens || mapped_options[:max_output_tokens] || 20_480
 
+            map_cache_key!(mapped_options)
+            map_prompt_cache_retention!(mapped_options)
+
             return mapped_options unless mapped_options.key?(:reasoning)
 
             reasoning = mapped_options.delete(:reasoning)
diff --git a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb
index 1d9e149..7f13645 100644
--- a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb
+++ b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb
@@ -15,6 +15,11 @@ def map(options)
           mapped_options.delete(:max_output_tokens)
           mapped_options.delete(:max_completion_tokens)
 
+          # Codex transport does not use retention flags in the request body.
+          mapped_options.delete(:prompt_cache_retention)
+          mapped_options.delete(:cacheRetention)
+          mapped_options.delete(:cache_retention)
+
           mapped_options
         end
       end
diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb
index 33192f3..de718b0 100644
--- a/lib/llm_gateway/clients/claude.rb
+++ b/lib/llm_gateway/clients/claude.rb
@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 
+require "uri"
 require_relative "../base_client"
 
 module LlmGateway
@@ -41,21 +42,83 @@ def upload_file(filename, content, mime_type = "application/octet-stream")
 
       private
 
-      def build_body(messages, tools: nil, system: [], **options)
+      def build_body(messages, tools: nil, system: [], cache_retention: nil, **options)
         body = {
           model: model_key,
-          messages: messages
+          messages: apply_message_cache_control(messages, cache_retention)
         }
 
         body.merge!(tools: tools) if LlmGateway::Utils.present?(tools)
 
         system = prepend_claude_code_identity(system) if claude_code_oauth_api_key?
+        system = apply_system_cache_control(system, cache_retention)
 
         body.merge!(system: system) if LlmGateway::Utils.present?(system)
         body.merge!(options)
         body
       end
 
+      def apply_system_cache_control(system, cache_retention)
+        return system if system.nil? || system.empty? || !system.is_a?(Array)
+
+        cache_control = anthropic_cache_control_for(cache_retention)
+        return system if cache_control.nil?
+
+        last_index = system.length - 1
+        system.each_with_index.map do |block, index|
+          block = block.dup
+          if index == last_index
+            block[:cache_control] = cache_control
+          else
+            block.delete(:cache_control)
+          end
+          block
+        end
+      end
+
+      def apply_message_cache_control(messages, cache_retention)
+        return messages if messages.nil? || messages.empty? || !messages.is_a?(Array)
+
+        cache_control = anthropic_cache_control_for(cache_retention)
+        return messages if cache_control.nil?
+
+        mapped_messages = messages.map(&:dup)
+        last_user_index = mapped_messages.rindex { |message| message[:role] == "user" }
+        return mapped_messages unless last_user_index
+
+        last_user_message = mapped_messages[last_user_index]
+        original_blocks = Array(last_user_message[:content])
+        tagged_indices = [(original_blocks.length - 2), (original_blocks.length - 1)].select { |i| i >= 0 }
+
+        content_blocks = original_blocks.each_with_index.map do |block, index|
+          block = block.is_a?(Hash) ? block.dup : { type: "text", text: block.to_s }
+          if tagged_indices.include?(index)
+            block[:cache_control] = cache_control
+          else
+            block.delete(:cache_control)
+          end
+          block
+        end
+
+        mapped_messages[last_user_index] = last_user_message.merge(content: content_blocks)
+        mapped_messages
+      end
+
+      def anthropic_cache_control_for(cache_retention)
+        return nil if cache_retention.nil?
+
+        retention = cache_retention.to_s
+        return nil if retention == "none"
+
+        cache_control = { type: "ephemeral" }
+        cache_control = cache_control.merge(ttl: "1h") if retention == "long" && anthropic_official_api?
+        cache_control
+      end
+
+      def anthropic_official_api?
+        URI(base_endpoint).host == "api.anthropic.com"
+      end
+
       def build_headers
         return claude_code_oauth_headers if claude_code_oauth_api_key?
 
@@ -101,7 +164,6 @@ def handle_client_specific_errors(response, error)
           raise Errors::PromptTooLong.new(error["message"], error["type"])
         end
 
-        # If we get here, we didn't handle it specifically
         raise Errors::APIStatusError.new(error["message"], error["type"])
       end
     end
diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb
index 71a0014..742a393 100644
--- a/lib/llm_gateway/clients/open_ai.rb
+++ b/lib/llm_gateway/clients/open_ai.rb
@@ -129,7 +129,7 @@ def build_codex_body(messages, system, tools, **options)
         body
       end
 
-      def codex_headers(account_id: nil)
+      def codex_headers(account_id: nil, **options)
         effective_account_id = account_id || @account_id
 
         headers = {
@@ -152,6 +152,9 @@ def post_codex_stream(url_part, body = nil, account_id: nil, &block)
         body.merge!(stream: true)
         request = Net::HTTP::Post.new(uri)
         codex_headers(account_id: account_id).each { |key, value| request[key] = value }
+        prompt_cache_key = body.delete(:prompt_cache_key)
+        request[:session_id] = prompt_cache_key if prompt_cache_key
+
         request.body = body.to_json if body
 
         http.request(request) do |response|
diff --git a/test/integration/cache_live_test.rb b/test/integration/cache_live_test.rb
new file mode 100644
index 0000000..200c899
--- /dev/null
+++ b/test/integration/cache_live_test.rb
@@ -0,0 +1,153 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "net/http"
+require "uri"
+require_relative "../utils/live_test_helper"
+
+class CacheLiveTest < Test
+  include LiveTestHelper
+
+  DOCUMENT_URL = "https://gist.githubusercontent.com/billybonks/f343b02cc67535475b8819d281763c21/raw/c55972e604ecc9b5b998ed44d9e9575cebaf2fc8/responses.md"
+
+  def teardown
+    LlmGateway.reset_configuration!
+  end
+
+  def fetch_document
+    uri = URI(DOCUMENT_URL)
+    response = Net::HTTP.get_response(uri)
+
+    unless response.is_a?(Net::HTTPSuccess)
+      raise "Failed to fetch document from #{DOCUMENT_URL}: HTTP #{response.code}"
+    end
+
+    response.body.encode("UTF-8", invalid: :replace, undef: :replace)
+  end
+
+  def run_two_turn_cache_probe(adapter, options: {})
+    document = fetch_document
+    first_prompt = <<~PROMPT
+      Read the following markdown document and remember it for the next question.
+
+      ---
+      #{document}
+      ---
+
+      Reply with exactly: loaded
+    PROMPT
+
+    first_response = adapter.stream(first_prompt, **options)
+
+    assert_equal "assistant", first_response.role
+    assert_nil first_response.error_message
+
+    second_transcript = [
+      { role: "user", content: first_prompt },
+      first_response.to_h,
+      { role: "user", content: "What is this file documenting? Reply in one sentence." }
+    ]
+
+    second_response = adapter.stream(second_transcript, **options)
+
+    assert_equal "assistant", second_response.role
+    assert_nil second_response.error_message
+    second_response
+  end
+
+  def assert_cache_hit_on_second_turn(adapter, options: {})
+    second_response = run_two_turn_cache_probe(adapter, options: options)
+
+    assert_operator second_response.usage[:cache_read_input_tokens], :>, 0,
+      "Expected cache_read_input_tokens > 0 with options #{options.inspect}, got #{second_response.usage.inspect}"
+  end
+
+  def assert_no_cache_hit_on_second_turn(adapter, options: {})
+    second_response = run_two_turn_cache_probe(adapter, options: options)
+
+    assert_equal 0, second_response.usage[:cache_read_input_tokens].to_i,
+      "Expected cache_read_input_tokens to be 0 with options #{options.inspect}, got #{second_response.usage.inspect}"
+  end
+
+  def self.define_cache_tests_for(name:, provider:, model:, options: {})
+    test "#{name} cache read tokens on second turn" do
+      skip_on_authentication_error do
+        without_vcr do
+          adapter = load_provider(provider:, model:)
+          if provider.start_with?("anthropic") && options[:cache_retention].to_s == "none"
+            assert_no_cache_hit_on_second_turn(adapter, options: options)
+          else
+            assert_cache_hit_on_second_turn(adapter, options: options)
+          end
+        end
+      end
+    end
+  end
+
+  define_cache_tests_for(
+    name: "openai_apikey_completions",
+    provider: "openai_apikey_completions",
+    model: "gpt-5.1",
+    options: {
+      cache_key: "openai_apikey_completions",
+      cache_retention: "short"
+    }
+  )
+
+  define_cache_tests_for(
+    name: "openai_apikey_completions_none",
+    provider: "openai_apikey_completions",
+    model: "gpt-5.1",
+    options: {
+      cache_key: "openai_apikey_completions_none",
+      cache_retention: "none"
+    }
+  )
+
+  define_cache_tests_for(
+    name: "openai_apikey_responses",
+    provider: "openai_apikey_responses",
+    model: "gpt-5.4",
+    options: {
+      cache_key: "openai_apikey_responses",
+      cache_retention: "short"
+    }
+  )
+
+  define_cache_tests_for(
+    name: "openai_apikey_responses_none",
+    provider: "openai_apikey_responses",
+    model: "gpt-5.4",
+    options: {
+      cache_key: "openai_apikey_responses_none",
+      cache_retention: "none"
+    }
+  )
+
+  define_cache_tests_for(
+    name: "openai_oauth_codex",
+    provider: "openai_oauth_codex",
+    model: "gpt-5.4",
+    options: {
+      cache_key: "openai_oauth_codex"
+    }
+  )
+
+  define_cache_tests_for(
+    name: "anthropic_apikey_messages",
+    provider: "anthropic_apikey_messages",
+    model: "claude-sonnet-4-20250514",
+    options: {
+      cache_retention: "short"
+    }
+  )
+
+  define_cache_tests_for(
+    name: "anthropic_apikey_messages_none",
+    provider: "anthropic_apikey_messages",
+    model: "claude-sonnet-4-20250514",
+    options: {
+      cache_retention: "none"
+    }
+  )
+end

From 1135629be615c49584e9bf3bda469f0b0091bfea Mon Sep 17 00:00:00 2001
From: gruv <gruv@gruv.dev>
Date: Mon, 6 Apr 2026 16:10:32 +0800
Subject: [PATCH 11/14] test: add tests for all option mappers

just makes it easier to see what are the behaviours this test
is not extensive for all options that could be passed
---
 .../options/anthropic_option_mapper_test.rb   | 51 +++++++++++++
 .../unit/options/claude_cache_control_test.rb | 73 ++++++++++++++++++
 test/unit/options/groq_option_mapper_test.rb  | 31 ++++++++
 ...nai_chat_completions_option_mapper_test.rb | 74 +++++++++++++++++++
 .../openai_codex_option_mapper_test.rb        | 30 ++++++++
 .../openai_responses_option_mapper_test.rb    | 62 ++++++++++++++++
 6 files changed, 321 insertions(+)
 create mode 100644 test/unit/options/anthropic_option_mapper_test.rb
 create mode 100644 test/unit/options/claude_cache_control_test.rb
 create mode 100644 test/unit/options/groq_option_mapper_test.rb
 create mode 100644 test/unit/options/openai_chat_completions_option_mapper_test.rb
 create mode 100644 test/unit/options/openai_codex_option_mapper_test.rb
 create mode 100644 test/unit/options/openai_responses_option_mapper_test.rb

diff --git a/test/unit/options/anthropic_option_mapper_test.rb b/test/unit/options/anthropic_option_mapper_test.rb
new file mode 100644
index 0000000..7172016
--- /dev/null
+++ b/test/unit/options/anthropic_option_mapper_test.rb
@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class AnthropicOptionMapperTest < Test
+  test "maps max_completion_tokens to max_tokens" do
+    mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(max_completion_tokens: 321)
+
+    assert_equal 321, mapped[:max_tokens]
+    refute mapped.key?(:max_completion_tokens)
+  end
+
+  test "sets default max_tokens" do
+    mapped = LlmGateway::Adapters::AnthropicOptionMapper.map({})
+
+    assert_equal 20_480, mapped[:max_tokens]
+  end
+
+  test "forwards cache_retention as is" do
+    mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(cache_retention: "long")
+
+    assert_equal "long", mapped[:cache_retention]
+    refute mapped.key?(:prompt_cache_retention)
+  end
+
+  test "forwards none cache_retention" do
+    mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(cache_retention: "none")
+
+    assert_equal "none", mapped[:cache_retention]
+  end
+
+  test "maps reasoning to thinking with budget tokens" do
+    mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "high")
+
+    assert_equal({ type: "enabled", budget_tokens: 10_240 }, mapped[:thinking])
+    refute mapped.key?(:reasoning)
+  end
+
+  test "none reasoning is removed" do
+    mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "none")
+
+    refute mapped.key?(:thinking)
+    refute mapped.key?(:reasoning)
+  end
+
+  test "raises for invalid reasoning" do
+    assert_raises(ArgumentError) do
+      LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "extreme")
+    end
+  end
+end
diff --git a/test/unit/options/claude_cache_control_test.rb b/test/unit/options/claude_cache_control_test.rb
new file mode 100644
index 0000000..507d011
--- /dev/null
+++ b/test/unit/options/claude_cache_control_test.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class ClaudeCacheControlTest < Test
+  test "when cache retention is passed it adds cache_control to last system block and up to two blocks in last user message" do
+    client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test")
+
+    body = client.send(
+      :build_body,
+      [
+        { role: "user", content: [ { type: "text", text: "hello" }, { type: "text", text: "world" } ] },
+        { role: "assistant", content: [ { type: "text", text: "ok" } ] },
+        { role: "user", content: [ { type: "text", text: "final" }, { type: "text", text: "question" } ] }
+      ],
+      system: [
+        { type: "text", text: "system 1" },
+        { type: "text", text: "system 2" }
+      ],
+      tools: [
+        { name: "tool_1", description: "Tool 1", input_schema: { type: "object", properties: {} } },
+        { name: "tool_2", description: "Tool 2", input_schema: { type: "object", properties: {} } }
+      ],
+      cache_retention: "short"
+    )
+
+    expected_cache_control = { type: "ephemeral" }
+
+    system_cache_control_count = body[:system].count { |block| block[:cache_control] == expected_cache_control }
+    tools_cache_control_count = body[:tools].count { |tool| tool[:cache_control] == expected_cache_control }
+
+    assert_nil body[:system][0][:cache_control]
+    assert_equal expected_cache_control, body[:system][1][:cache_control]
+
+    last_user_message = body[:messages].reverse.find { |message| message[:role] == "user" }
+    last_user_cache_control_count = Array(last_user_message[:content]).count do |block|
+      block[:cache_control] == expected_cache_control
+    end
+
+    assert_equal 1, system_cache_control_count
+    assert_equal 0, tools_cache_control_count
+    assert_equal 2, last_user_cache_control_count
+    assert_operator(system_cache_control_count + tools_cache_control_count + last_user_cache_control_count, :<=, 3)
+  end
+
+  test "uses ttl for long retention on official anthropic base url" do
+    client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test")
+
+    body = client.send(
+      :build_body,
+      [ { role: "user", content: [ { type: "text", text: "hello" } ] } ],
+      system: [ { type: "text", text: "system" } ],
+      cache_retention: "long"
+    )
+
+    assert_equal({ type: "ephemeral", ttl: "1h" }, body[:system][0][:cache_control])
+    assert_equal({ type: "ephemeral", ttl: "1h" }, body[:messages][0][:content][0][:cache_control])
+  end
+
+  test "does not mutate existing cache control when retention is none" do
+    client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test")
+
+    body = client.send(
+      :build_body,
+      [ { role: "user", content: [ { type: "text", text: "hello", cache_control: { type: "ephemeral" } } ] } ],
+      system: [ { type: "text", text: "system", cache_control: { type: "ephemeral" } } ],
+      cache_retention: "none"
+    )
+
+    assert_equal({ type: "ephemeral" }, body[:system][0][:cache_control])
+    assert_equal({ type: "ephemeral" }, body[:messages][0][:content][0][:cache_control])
+  end
+end
diff --git a/test/unit/options/groq_option_mapper_test.rb b/test/unit/options/groq_option_mapper_test.rb
new file mode 100644
index 0000000..45be414
--- /dev/null
+++ b/test/unit/options/groq_option_mapper_test.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class GroqOptionMapperTest < Test
+  test "sets defaults for temperature max_completion_tokens and response_format" do
+    mapped = LlmGateway::Adapters::Groq::OptionMapper.map({})
+
+    assert_equal 0, mapped[:temperature]
+    assert_equal 20_480, mapped[:max_completion_tokens]
+    assert_equal({ type: "text" }, mapped[:response_format])
+  end
+
+  test "preserves explicit values" do
+    mapped = LlmGateway::Adapters::Groq::OptionMapper.map(
+      temperature: 0.3,
+      max_completion_tokens: 123,
+      response_format: { type: "json_object" }
+    )
+
+    assert_equal 0.3, mapped[:temperature]
+    assert_equal 123, mapped[:max_completion_tokens]
+    assert_equal({ type: "json_object" }, mapped[:response_format])
+  end
+
+  test "normalizes string response_format" do
+    mapped = LlmGateway::Adapters::Groq::OptionMapper.map(response_format: "json_object")
+
+    assert_equal({ type: "json_object" }, mapped[:response_format])
+  end
+end
diff --git a/test/unit/options/openai_chat_completions_option_mapper_test.rb b/test/unit/options/openai_chat_completions_option_mapper_test.rb
new file mode 100644
index 0000000..f5a68bc
--- /dev/null
+++ b/test/unit/options/openai_chat_completions_option_mapper_test.rb
@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class OpenAiChatCompletionsOptionMapperTest < Test
+  test "sets default max_completion_tokens" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map({})
+
+    assert_equal 20_480, mapped[:max_completion_tokens]
+  end
+
+  test "maps cache_key and short retention" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(
+      cache_key: "abc",
+      cache_retention: "short"
+    )
+
+    assert_equal "abc", mapped[:prompt_cache_key]
+    assert_equal "in_memory", mapped[:prompt_cache_retention]
+  end
+
+  test "maps long retention" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(
+      cache_key: "abc",
+      cache_retention: "long"
+    )
+
+    assert_equal "abc", mapped[:prompt_cache_key]
+    assert_equal "24h", mapped[:prompt_cache_retention]
+  end
+
+  test "none retention removes prompt cache key" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(
+      cache_key: "abc",
+      cache_retention: "none"
+    )
+
+    refute mapped.key?(:prompt_cache_key)
+    refute mapped.key?(:prompt_cache_retention)
+  end
+
+  test "defaults retention to short when cache_key is present" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(cache_key: "abc")
+
+    assert_equal "abc", mapped[:prompt_cache_key]
+    assert_equal "in_memory", mapped[:prompt_cache_retention]
+  end
+
+  test "maps reasoning to reasoning_effort" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(reasoning: "high")
+
+    assert_equal "high", mapped[:reasoning_effort]
+    refute mapped.key?(:reasoning)
+  end
+
+  test "none reasoning is removed" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(reasoning: "none")
+
+    refute mapped.key?(:reasoning)
+    refute mapped.key?(:reasoning_effort)
+  end
+
+  test "raises for invalid reasoning" do
+    assert_raises(ArgumentError) do
+      LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(reasoning: "extreme")
+    end
+  end
+
+  test "raises for invalid cache retention" do
+    assert_raises(ArgumentError) do
+      LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(cache_retention: "week")
+    end
+  end
+end
diff --git a/test/unit/options/openai_codex_option_mapper_test.rb b/test/unit/options/openai_codex_option_mapper_test.rb
new file mode 100644
index 0000000..393ba79
--- /dev/null
+++ b/test/unit/options/openai_codex_option_mapper_test.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class OpenAiCodexOptionMapperTest < Test
+  test "keeps prompt_cache_key but removes retention fields" do
+    mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map(
+      cache_key: "abc",
+      cache_retention: "long"
+    )
+
+    assert_equal "abc", mapped[:prompt_cache_key]
+    refute mapped.key?(:prompt_cache_retention)
+    refute mapped.key?(:cacheRetention)
+    refute mapped.key?(:cache_retention)
+  end
+
+  test "removes token limit options" do
+    mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map(max_completion_tokens: 999)
+
+    refute mapped.key?(:max_output_tokens)
+    refute mapped.key?(:max_completion_tokens)
+  end
+
+  test "inherits reasoning mapping from openai responses" do
+    mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map(reasoning: "low")
+
+    assert_equal({ effort: "low", summary: "detailed" }, mapped[:reasoning])
+  end
+end
diff --git a/test/unit/options/openai_responses_option_mapper_test.rb b/test/unit/options/openai_responses_option_mapper_test.rb
new file mode 100644
index 0000000..06b3052
--- /dev/null
+++ b/test/unit/options/openai_responses_option_mapper_test.rb
@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class OpenAiResponsesOptionMapperTest < Test
+  test "maps max_completion_tokens to max_output_tokens" do
+    mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(max_completion_tokens: 777)
+
+    assert_equal 777, mapped[:max_output_tokens]
+    refute mapped.key?(:max_completion_tokens)
+  end
+
+  test "sets default max_output_tokens" do
+    mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map({})
+
+    assert_equal 20_480, mapped[:max_output_tokens]
+  end
+
+  test "maps cache_key and short retention" do
+    mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(
+      cache_key: "abc",
+      cache_retention: "short"
+    )
+
+    assert_equal "abc", mapped[:prompt_cache_key]
+    assert_equal "in_memory", mapped[:prompt_cache_retention]
+  end
+
+  test "none retention removes prompt cache key" do
+    mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(
+      cache_key: "abc",
+      cache_retention: "none"
+    )
+
+    refute mapped.key?(:prompt_cache_key)
+    refute mapped.key?(:prompt_cache_retention)
+  end
+
+  test "maps reasoning to reasoning hash" do
+    mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(reasoning: "medium")
+
+    assert_equal({ effort: "medium", summary: "detailed" }, mapped[:reasoning])
+  end
+
+  test "none reasoning is removed" do
+    mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(reasoning: "none")
+
+    refute mapped.key?(:reasoning)
+  end
+
+  test "raises for invalid reasoning" do
+    assert_raises(ArgumentError) do
+      LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(reasoning: "extreme")
+    end
+  end
+
+  test "raises for invalid cache retention" do
+    assert_raises(ArgumentError) do
+      LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(cache_retention: "week")
+    end
+  end
+end

From 3a1b1bfa0cf322586133561fd55153d5d3c3120d Mon Sep 17 00:00:00 2001
From: gruv <gruv@gruv.dev>
Date: Mon, 6 Apr 2026 16:42:22 +0800
Subject: [PATCH 12/14] fix: bug in response format mapping anthropic

also update all te test to test super set of all options we know of
---
 .../adapters/anthropic_option_mapper.rb        | 18 ++++++++++++++++--
 .../options/anthropic_option_mapper_test.rb    | 16 ++++++++++++++++
 test/unit/options/groq_option_mapper_test.rb   | 17 +++++++++++++++++
 ...enai_chat_completions_option_mapper_test.rb | 17 +++++++++++++++++
 .../options/openai_codex_option_mapper_test.rb | 15 +++++++++++++++
 .../openai_responses_option_mapper_test.rb     | 17 +++++++++++++++++
 test/unit/options/option_mapper_fixture.rb     | 16 ++++++++++++++++
 7 files changed, 114 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/options/option_mapper_fixture.rb

diff --git a/lib/llm_gateway/adapters/anthropic_option_mapper.rb b/lib/llm_gateway/adapters/anthropic_option_mapper.rb
index a5e5744..1fdaaa4 100644
--- a/lib/llm_gateway/adapters/anthropic_option_mapper.rb
+++ b/lib/llm_gateway/adapters/anthropic_option_mapper.rb
@@ -14,12 +14,15 @@ module AnthropicOptionMapper
       module_function
 
       def map(options)
-        mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens prompt_cache_retention cache_key prompt_cache_key].include?(key) }
-        mapped_options[:max_tokens] = options[:max_completion_tokens] || 20480
+        mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens response_format prompt_cache_retention cache_key prompt_cache_key].include?(key) }
+        mapped_options[:max_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_TOKENS
 
         retention = options[:cache_retention]
         mapped_options[:cache_retention] = retention unless retention.nil?
 
+        response_format = options[:response_format]
+        mapped_options[:output_config] = normalize_output_config(response_format) unless response_format.nil?
+
         reasoning = options[:reasoning]
         return mapped_options if reasoning.nil? || reasoning.to_s == "none"
 
@@ -27,6 +30,17 @@ def map(options)
         mapped_options
       end
 
+      def normalize_output_config(response_format)
+        format_type = response_format.is_a?(Hash) ? response_format[:type] || response_format["type"] : response_format
+
+        case format_type.to_s
+        when "json_object", "json_schema"
+          { format: "json_schema" }
+        else
+          { format: "text" }
+        end
+      end
+
       def normalize_reasoning(reasoning)
         budget_tokens = REASONING_EFFORT_BUDGET_TOKENS[reasoning.to_s] ||
           raise(ArgumentError,
diff --git a/test/unit/options/anthropic_option_mapper_test.rb b/test/unit/options/anthropic_option_mapper_test.rb
index 7172016..dd5ba33 100644
--- a/test/unit/options/anthropic_option_mapper_test.rb
+++ b/test/unit/options/anthropic_option_mapper_test.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require "test_helper"
+require_relative "option_mapper_fixture"
 
 class AnthropicOptionMapperTest < Test
   test "maps max_completion_tokens to max_tokens" do
@@ -48,4 +49,19 @@ class AnthropicOptionMapperTest < Test
       LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "extreme")
     end
   end
+
+  test "maps all supported options into final output" do
+    mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(OptionMapperFixture.superset_options)
+
+    assert_equal(
+      {
+        max_tokens: 1234,
+        cache_retention: "long",
+        thinking: { type: "enabled", budget_tokens: 10 * 1024 },
+        temperature: 0.2,
+        output_config: { format: "json_schema" }
+      },
+      mapped
+    )
+  end
 end
diff --git a/test/unit/options/groq_option_mapper_test.rb b/test/unit/options/groq_option_mapper_test.rb
index 45be414..8e0df03 100644
--- a/test/unit/options/groq_option_mapper_test.rb
+++ b/test/unit/options/groq_option_mapper_test.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require "test_helper"
+require_relative "option_mapper_fixture"
 
 class GroqOptionMapperTest < Test
   test "sets defaults for temperature max_completion_tokens and response_format" do
@@ -28,4 +29,20 @@ class GroqOptionMapperTest < Test
 
     assert_equal({ type: "json_object" }, mapped[:response_format])
   end
+
+  test "maps all supported options into final output" do
+    mapped = LlmGateway::Adapters::Groq::OptionMapper.map(OptionMapperFixture.superset_options)
+
+    assert_equal(
+      {
+        max_completion_tokens: 1234,
+        cache_key: "abc",
+        cache_retention: "long",
+        reasoning: "high",
+        temperature: 0.2,
+        response_format: { type: "json_object" }
+      },
+      mapped
+    )
+  end
 end
diff --git a/test/unit/options/openai_chat_completions_option_mapper_test.rb b/test/unit/options/openai_chat_completions_option_mapper_test.rb
index f5a68bc..41bd198 100644
--- a/test/unit/options/openai_chat_completions_option_mapper_test.rb
+++ b/test/unit/options/openai_chat_completions_option_mapper_test.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require "test_helper"
+require_relative "option_mapper_fixture"
 
 class OpenAiChatCompletionsOptionMapperTest < Test
   test "sets default max_completion_tokens" do
@@ -71,4 +72,20 @@ class OpenAiChatCompletionsOptionMapperTest < Test
       LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(cache_retention: "week")
     end
   end
+
+  test "maps all supported options into final output" do
+    mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(OptionMapperFixture.superset_options)
+
+    assert_equal(
+      {
+        max_completion_tokens: 1234,
+        prompt_cache_key: "abc",
+        prompt_cache_retention: "24h",
+        reasoning_effort: "high",
+        temperature: 0.2,
+        response_format: "json_object"
+      },
+      mapped
+    )
+  end
 end
diff --git a/test/unit/options/openai_codex_option_mapper_test.rb b/test/unit/options/openai_codex_option_mapper_test.rb
index 393ba79..4bb87ac 100644
--- a/test/unit/options/openai_codex_option_mapper_test.rb
+++ b/test/unit/options/openai_codex_option_mapper_test.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require "test_helper"
+require_relative "option_mapper_fixture"
 
 class OpenAiCodexOptionMapperTest < Test
   test "keeps prompt_cache_key but removes retention fields" do
@@ -27,4 +28,18 @@ class OpenAiCodexOptionMapperTest < Test
 
     assert_equal({ effort: "low", summary: "detailed" }, mapped[:reasoning])
   end
+
+  test "maps all supported options into final output" do
+    mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map(OptionMapperFixture.superset_options)
+
+    assert_equal(
+      {
+        prompt_cache_key: "abc",
+        reasoning: { effort: "high", summary: "detailed" },
+        temperature: 0.2,
+        response_format: "json_object"
+      },
+      mapped
+    )
+  end
 end
diff --git a/test/unit/options/openai_responses_option_mapper_test.rb b/test/unit/options/openai_responses_option_mapper_test.rb
index 06b3052..3e8d2e1 100644
--- a/test/unit/options/openai_responses_option_mapper_test.rb
+++ b/test/unit/options/openai_responses_option_mapper_test.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require "test_helper"
+require_relative "option_mapper_fixture"
 
 class OpenAiResponsesOptionMapperTest < Test
   test "maps max_completion_tokens to max_output_tokens" do
@@ -59,4 +60,20 @@ class OpenAiResponsesOptionMapperTest < Test
       LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(cache_retention: "week")
     end
   end
+
+  test "maps all supported options into final output" do
+    mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(OptionMapperFixture.superset_options)
+
+    assert_equal(
+      {
+        max_output_tokens: 1234,
+        prompt_cache_key: "abc",
+        prompt_cache_retention: "24h",
+        reasoning: { effort: "high", summary: "detailed" },
+        temperature: 0.2,
+        response_format: "json_object"
+      },
+      mapped
+    )
+  end
 end
diff --git a/test/unit/options/option_mapper_fixture.rb b/test/unit/options/option_mapper_fixture.rb
new file mode 100644
index 0000000..fb08136
--- /dev/null
+++ b/test/unit/options/option_mapper_fixture.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module OptionMapperFixture
+  module_function
+
+  def superset_options
+    {
+      max_completion_tokens: 1234,
+      cache_key: "abc",
+      cache_retention: "long",
+      reasoning: "high",
+      temperature: 0.2,
+      response_format: "json_object"
+    }
+  end
+end

From 03c43364a1659970946c602c68c6c0c54e4afdef Mon Sep 17 00:00:00 2001
From: gruv <gruv@gruv.dev>
Date: Mon, 6 Apr 2026 16:51:34 +0800
Subject: [PATCH 13/14] fixup! feat: support prompt caching, with
 cache_retention cache_key options

---
 lib/llm_gateway/clients/claude.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb
index de718b0..ce9e00d 100644
--- a/lib/llm_gateway/clients/claude.rb
+++ b/lib/llm_gateway/clients/claude.rb
@@ -88,7 +88,7 @@ def apply_message_cache_control(messages, cache_retention)
 
         last_user_message = mapped_messages[last_user_index]
         original_blocks = Array(last_user_message[:content])
-        tagged_indices = [(original_blocks.length - 2), (original_blocks.length - 1)].select { |i| i >= 0 }
+        tagged_indices = [ (original_blocks.length - 2), (original_blocks.length - 1) ].select { |i| i >= 0 }
 
         content_blocks = original_blocks.each_with_index.map do |block, index|
           block = block.is_a?(Hash) ? block.dup : { type: "text", text: block.to_s }

From 479a93a139a0ebd9f9548e6ff8a1aa644a95506e Mon Sep 17 00:00:00 2001
From: gruv <gruv@gruv.dev>
Date: Mon, 6 Apr 2026 16:56:57 +0800
Subject: [PATCH 14/14] refactor: claude supports automatic caching with
 cache-control option

this adds a cache at the last message, so we dont have to do it ourselves
---
 lib/llm_gateway/clients/claude.rb             | 36 ++++++++-----------
 .../unit/options/claude_cache_control_test.rb | 21 ++++++-----
 2 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb
index ce9e00d..ea41c2d 100644
--- a/lib/llm_gateway/clients/claude.rb
+++ b/lib/llm_gateway/clients/claude.rb
@@ -43,17 +43,21 @@ def upload_file(filename, content, mime_type = "application/octet-stream")
       private
 
       def build_body(messages, tools: nil, system: [], cache_retention: nil, **options)
+        cache_control = anthropic_cache_control_for(cache_retention)
+
         body = {
           model: model_key,
-          messages: apply_message_cache_control(messages, cache_retention)
+          messages: messages
         }
 
+        tools = apply_tools_cache_control(tools, cache_retention)
         body.merge!(tools: tools) if LlmGateway::Utils.present?(tools)
 
         system = prepend_claude_code_identity(system) if claude_code_oauth_api_key?
         system = apply_system_cache_control(system, cache_retention)
 
         body.merge!(system: system) if LlmGateway::Utils.present?(system)
+        body.merge!(cache_control: cache_control) unless cache_control.nil?
         body.merge!(options)
         body
       end
@@ -76,32 +80,22 @@ def apply_system_cache_control(system, cache_retention)
         end
       end
 
-      def apply_message_cache_control(messages, cache_retention)
-        return messages if messages.nil? || messages.empty? || !messages.is_a?(Array)
+      def apply_tools_cache_control(tools, cache_retention)
+        return tools if tools.nil? || tools.empty? || !tools.is_a?(Array)
 
         cache_control = anthropic_cache_control_for(cache_retention)
-        return messages if cache_control.nil?
-
-        mapped_messages = messages.map(&:dup)
-        last_user_index = mapped_messages.rindex { |message| message[:role] == "user" }
-        return mapped_messages unless last_user_index
+        return tools if cache_control.nil?
 
-        last_user_message = mapped_messages[last_user_index]
-        original_blocks = Array(last_user_message[:content])
-        tagged_indices = [ (original_blocks.length - 2), (original_blocks.length - 1) ].select { |i| i >= 0 }
-
-        content_blocks = original_blocks.each_with_index.map do |block, index|
-          block = block.is_a?(Hash) ? block.dup : { type: "text", text: block.to_s }
-          if tagged_indices.include?(index)
-            block[:cache_control] = cache_control
+        last_index = tools.length - 1
+        tools.each_with_index.map do |tool, index|
+          tool = tool.dup
+          if index == last_index
+            tool[:cache_control] = cache_control
           else
-            block.delete(:cache_control)
+            tool.delete(:cache_control)
           end
-          block
+          tool
         end
-
-        mapped_messages[last_user_index] = last_user_message.merge(content: content_blocks)
-        mapped_messages
       end
 
       def anthropic_cache_control_for(cache_retention)
diff --git a/test/unit/options/claude_cache_control_test.rb b/test/unit/options/claude_cache_control_test.rb
index 507d011..9a6c2be 100644
--- a/test/unit/options/claude_cache_control_test.rb
+++ b/test/unit/options/claude_cache_control_test.rb
@@ -3,7 +3,7 @@
 require "test_helper"
 
 class ClaudeCacheControlTest < Test
-  test "when cache retention is passed it adds cache_control to last system block and up to two blocks in last user message" do
+  test "when cache retention is passed it adds cache_control to last system and tool blocks and sets top-level cache_control" do
     client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test")
 
     body = client.send(
@@ -26,21 +26,20 @@ class ClaudeCacheControlTest < Test
 
     expected_cache_control = { type: "ephemeral" }
 
-    system_cache_control_count = body[:system].count { |block| block[:cache_control] == expected_cache_control }
-    tools_cache_control_count = body[:tools].count { |tool| tool[:cache_control] == expected_cache_control }
+    assert_equal expected_cache_control, body[:cache_control]
 
     assert_nil body[:system][0][:cache_control]
     assert_equal expected_cache_control, body[:system][1][:cache_control]
 
+    assert_nil body[:tools][0][:cache_control]
+    assert_equal expected_cache_control, body[:tools][1][:cache_control]
+
     last_user_message = body[:messages].reverse.find { |message| message[:role] == "user" }
     last_user_cache_control_count = Array(last_user_message[:content]).count do |block|
       block[:cache_control] == expected_cache_control
     end
 
-    assert_equal 1, system_cache_control_count
-    assert_equal 0, tools_cache_control_count
-    assert_equal 2, last_user_cache_control_count
-    assert_operator(system_cache_control_count + tools_cache_control_count + last_user_cache_control_count, :<=, 3)
+    assert_equal 0, last_user_cache_control_count
   end
 
   test "uses ttl for long retention on official anthropic base url" do
@@ -50,11 +49,14 @@ class ClaudeCacheControlTest < Test
       :build_body,
       [ { role: "user", content: [ { type: "text", text: "hello" } ] } ],
       system: [ { type: "text", text: "system" } ],
+      tools: [ { name: "tool_1", description: "Tool 1", input_schema: { type: "object", properties: {} } } ],
       cache_retention: "long"
     )
 
+    assert_equal({ type: "ephemeral", ttl: "1h" }, body[:cache_control])
     assert_equal({ type: "ephemeral", ttl: "1h" }, body[:system][0][:cache_control])
-    assert_equal({ type: "ephemeral", ttl: "1h" }, body[:messages][0][:content][0][:cache_control])
+    assert_equal({ type: "ephemeral", ttl: "1h" }, body[:tools][0][:cache_control])
+    assert_nil body[:messages][0][:content][0][:cache_control]
   end
 
   test "does not mutate existing cache control when retention is none" do
@@ -64,10 +66,13 @@ class ClaudeCacheControlTest < Test
       :build_body,
       [ { role: "user", content: [ { type: "text", text: "hello", cache_control: { type: "ephemeral" } } ] } ],
       system: [ { type: "text", text: "system", cache_control: { type: "ephemeral" } } ],
+      tools: [ { name: "tool_1", description: "Tool 1", cache_control: { type: "ephemeral" }, input_schema: { type: "object", properties: {} } } ],
       cache_retention: "none"
     )
 
+    assert_nil body[:cache_control]
     assert_equal({ type: "ephemeral" }, body[:system][0][:cache_control])
+    assert_equal({ type: "ephemeral" }, body[:tools][0][:cache_control])
     assert_equal({ type: "ephemeral" }, body[:messages][0][:content][0][:cache_control])
   end
 end