From 8a5ed447e1a57ce05c7e93eebea4fda49c5577c1 Mon Sep 17 00:00:00 2001 From: billybonks Date: Fri, 3 Apr 2026 15:53:13 +0800 Subject: [PATCH 01/14] breaking: delete claude code custom path the custom path tried to make things easy but consumers must manage there own token details as when its refreshed they must update the values. deleting all these custom paths also makes things much simpler on our side --- lib/llm_gateway.rb | 7 -- lib/llm_gateway/adapters/adapter.rb | 3 +- .../adapters/claude_code/input_mapper.rb | 14 ---- .../adapters/claude_code/messages_adapter.rb | 28 ------- .../adapters/claude_code/option_mapper.rb | 22 ------ .../adapters/claude_code/output_mapper.rb | 12 --- lib/llm_gateway/client.rb | 11 +-- lib/llm_gateway/clients/claude.rb | 48 ++++++++++++ lib/llm_gateway/clients/claude_code.rb | 5 +- .../clients/claude_code/oauth_flow.rb | 2 +- .../clients/claude_code/token_manager.rb | 2 +- test/client_builder_test.rb | 13 ++-- test/integration/clients/claude_test.rb | 74 +++++++++++++++++++ 13 files changed, 134 insertions(+), 107 deletions(-) delete mode 100644 lib/llm_gateway/adapters/claude_code/input_mapper.rb delete mode 100644 lib/llm_gateway/adapters/claude_code/messages_adapter.rb delete mode 100644 lib/llm_gateway/adapters/claude_code/option_mapper.rb delete mode 100644 lib/llm_gateway/adapters/claude_code/output_mapper.rb diff --git a/lib/llm_gateway.rb b/lib/llm_gateway.rb index cda2640..7cd9223 100644 --- a/lib/llm_gateway.rb +++ b/lib/llm_gateway.rb @@ -22,8 +22,6 @@ require_relative "llm_gateway/adapters/claude/input_mapper" require_relative "llm_gateway/adapters/claude/output_mapper" -require_relative "llm_gateway/adapters/claude_code/input_mapper" -require_relative "llm_gateway/adapters/claude_code/output_mapper" require_relative "llm_gateway/adapters/open_ai/file_output_mapper" require_relative "llm_gateway/adapters/open_ai/chat_completions/input_mapper" require_relative "llm_gateway/adapters/open_ai/chat_completions/output_mapper" @@ -38,7 +36,6 @@ # Load adapter classes require_relative "llm_gateway/adapters/adapter" require_relative "llm_gateway/adapters/claude/messages_adapter" -require_relative "llm_gateway/adapters/claude_code/messages_adapter" require_relative "llm_gateway/adapters/open_ai/chat_completions_adapter" require_relative "llm_gateway/adapters/open_ai/responses_adapter" require_relative "llm_gateway/adapters/openai_codex/responses_adapter" @@ -118,10 +115,6 @@ def self.reset_configuration! client: Clients::Claude, adapter: Adapters::Claude::MessagesAdapter) - ProviderRegistry.register("anthropic_oauth_messages", - client: Clients::ClaudeCode, - adapter: Adapters::ClaudeCode::MessagesAdapter) - ProviderRegistry.register("openai_apikey_completions", client: Clients::OpenAi, adapter: Adapters::OpenAi::ChatCompletionsAdapter) diff --git a/lib/llm_gateway/adapters/adapter.rb b/lib/llm_gateway/adapters/adapter.rb index 6c26a0a..31ede49 100644 --- a/lib/llm_gateway/adapters/adapter.rb +++ b/lib/llm_gateway/adapters/adapter.rb @@ -95,8 +95,7 @@ def stream_client_method def stream_api_name case self - when LlmGateway::Adapters::Claude::MessagesAdapter, - LlmGateway::Adapters::ClaudeCode::MessagesAdapter + when LlmGateway::Adapters::Claude::MessagesAdapter "messages" when LlmGateway::Adapters::OpenAi::ChatCompletionsAdapter, LlmGateway::Adapters::Groq::ChatCompletionsAdapter diff --git a/lib/llm_gateway/adapters/claude_code/input_mapper.rb b/lib/llm_gateway/adapters/claude_code/input_mapper.rb deleted file mode 100644 index 2c6debe..0000000 --- a/lib/llm_gateway/adapters/claude_code/input_mapper.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -require_relative "../claude/input_mapper" - -module LlmGateway - module Adapters - module ClaudeCode - class InputMapper < Claude::InputMapper - # Inherits all mapping from Claude::InputMapper - # The client handles OAuth-specific transformations (tool names, system prompt) - end - end - end -end diff --git a/lib/llm_gateway/adapters/claude_code/messages_adapter.rb b/lib/llm_gateway/adapters/claude_code/messages_adapter.rb deleted file mode 100644 index 5c95cf1..0000000 --- a/lib/llm_gateway/adapters/claude_code/messages_adapter.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true - -require_relative "../adapter" -require_relative "input_mapper" -require_relative "option_mapper" -require_relative "output_mapper" -require_relative "../claude/output_mapper" -require_relative "../claude/stream_mapper" - -module LlmGateway - module Adapters - module ClaudeCode - class MessagesAdapter < Adapter - def initialize(client) - super( - client, - input_mapper: InputMapper, - output_mapper: OutputMapper, - file_output_mapper: Claude::FileOutputMapper, - option_mapper: OptionMapper, - client_method: :chat, - stream_mapper: Claude::StreamMapper - ) - end - end - end - end -end diff --git a/lib/llm_gateway/adapters/claude_code/option_mapper.rb b/lib/llm_gateway/adapters/claude_code/option_mapper.rb deleted file mode 100644 index bc8cb99..0000000 --- a/lib/llm_gateway/adapters/claude_code/option_mapper.rb +++ /dev/null @@ -1,22 +0,0 @@ -# frozen_string_literal: true - -require_relative "../anthropic_option_mapper" - -module LlmGateway - module Adapters - module ClaudeCode - module OptionMapper - module_function - - def map(options) - mapped_options = AnthropicOptionMapper.map(options) - - max_completion_tokens = mapped_options.delete(:max_completion_tokens) - mapped_options[:max_tokens] = max_completion_tokens || mapped_options[:max_tokens] || 20480 - - mapped_options - end - end - end - end -end diff --git a/lib/llm_gateway/adapters/claude_code/output_mapper.rb b/lib/llm_gateway/adapters/claude_code/output_mapper.rb deleted file mode 100644 index 430b623..0000000 --- a/lib/llm_gateway/adapters/claude_code/output_mapper.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -require_relative "../claude/output_mapper" - -module LlmGateway - module Adapters - module ClaudeCode - class OutputMapper < Claude::OutputMapper - end - end - end -end diff --git a/lib/llm_gateway/client.rb b/lib/llm_gateway/client.rb index 0924a53..a89cdc2 100644 --- a/lib/llm_gateway/client.rb +++ b/lib/llm_gateway/client.rb @@ -42,7 +42,6 @@ def self.download_file(provider, **kwargs) end def self.provider_from_model(model) - return "anthropic" if model.start_with?("claude_code/") return "anthropic" if model.start_with?("claude") return "groq" if model.start_with?("llama") return "openai" if model.start_with?("gpt") || @@ -72,15 +71,7 @@ def self.provider_id_from_client(client) def self.build_adapter_from_model(model, api_key: nil, refresh_token: nil, expires_at: nil, api: nil) provider = provider_from_model(model) - if model.start_with?("claude_code/") - LlmGateway.build_provider( - provider: "anthropic_oauth_messages", - model_key: model, - access_token: api_key, - refresh_token: refresh_token, - expires_at: expires_at - ) - elsif api == "responses" + if api == "responses" config = { provider: "#{provider}_apikey_responses", model_key: model diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb index 861a7f8..65a7d33 100644 --- a/lib/llm_gateway/clients/claude.rb +++ b/lib/llm_gateway/clients/claude.rb @@ -5,6 +5,8 @@ module LlmGateway module Clients class Claude < BaseClient + CLAUDE_CODE_VERSION = "2.1.2" + def initialize(model_key: "claude-3-7-sonnet-20250219", api_key: ENV["ANTHROPIC_API_KEY"]) @base_endpoint = "https://api.anthropic.com/v1" super(model_key: model_key, api_key: api_key) @@ -17,6 +19,18 @@ def chat(messages, **kwargs) def stream(messages, **kwargs, &block) post_stream("messages", build_body(messages, **kwargs), &block) end + + def get_oauth_access_token(access_token:, refresh_token:, expires_at:, &block) + token_manager = LlmGateway::Clients::ClaudeCode::TokenManager.new( + access_token: access_token, + refresh_token: refresh_token, + expires_at: expires_at + ) + token_manager.on_token_refresh = block if block_given? + token_manager.ensure_valid_token + token_manager.access_token + end + def download_file(file_id) get("files/#{file_id}/content") end @@ -34,12 +48,17 @@ def build_body(messages, tools: nil, system: [], **options) } body.merge!(tools: tools) if LlmGateway::Utils.present?(tools) + + system = prepend_claude_code_identity(system) if claude_code_oauth_api_key? + body.merge!(system: system) if LlmGateway::Utils.present?(system) body.merge!(options) body end def build_headers + return claude_code_oauth_headers if claude_code_oauth_api_key? + { "anthropic-version" => "2023-06-01", "content-type" => "application/json", @@ -48,6 +67,35 @@ def build_headers } end + def claude_code_oauth_api_key? + api_key.to_s.start_with?("sk-ant-oat") + end + + def claude_code_oauth_headers + { + "anthropic-version" => "2023-06-01", + "content-type" => "application/json", + "Authorization" => "Bearer #{api_key}", + "anthropic-dangerous-direct-browser-access" => "true", + "anthropic-beta" => "claude-code-20250219,oauth-2025-04-20", + "user-agent" => "claude-cli/#{CLAUDE_CODE_VERSION} (external, cli)", + "x-app" => "cli" + } + end + + def prepend_claude_code_identity(system) + identity = { + type: "text", + text: "You are Claude Code, Anthropic's official CLI for Claude." + } + + if system.nil? || system.empty? + [ identity ] + else + [ identity ] + system + end + end + def handle_client_specific_errors(response, error) case response.code.to_i when 400 diff --git a/lib/llm_gateway/clients/claude_code.rb b/lib/llm_gateway/clients/claude_code.rb index c420fd1..c7e70c4 100644 --- a/lib/llm_gateway/clients/claude_code.rb +++ b/lib/llm_gateway/clients/claude_code.rb @@ -1,12 +1,13 @@ # frozen_string_literal: true require_relative "claude" -require_relative "claude_code/oauth_flow" -require_relative "claude_code/token_manager" module LlmGateway module Clients class ClaudeCode < Claude + require_relative "claude_code/oauth_flow" + require_relative "claude_code/token_manager" + CLAUDE_CODE_VERSION = "2.1.2" attr_reader :token_manager, :access_token diff --git a/lib/llm_gateway/clients/claude_code/oauth_flow.rb b/lib/llm_gateway/clients/claude_code/oauth_flow.rb index 8a9117d..35635ea 100644 --- a/lib/llm_gateway/clients/claude_code/oauth_flow.rb +++ b/lib/llm_gateway/clients/claude_code/oauth_flow.rb @@ -10,7 +10,7 @@ module LlmGateway module Clients - class ClaudeCode < Claude + class ClaudeCode class OAuthFlow CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" TOKEN_URL = "https://api.anthropic.com/v1/oauth/token" diff --git a/lib/llm_gateway/clients/claude_code/token_manager.rb b/lib/llm_gateway/clients/claude_code/token_manager.rb index a586f32..2f5e845 100644 --- a/lib/llm_gateway/clients/claude_code/token_manager.rb +++ b/lib/llm_gateway/clients/claude_code/token_manager.rb @@ -6,7 +6,7 @@ module LlmGateway module Clients - class ClaudeCode < Claude + class ClaudeCode class TokenManager TOKEN_URL = "https://api.anthropic.com/v1/oauth/token" CLIENT_ID = OAuthFlow::CLIENT_ID diff --git a/test/client_builder_test.rb b/test/client_builder_test.rb index 3027e7e..ebd4361 100644 --- a/test/client_builder_test.rb +++ b/test/client_builder_test.rb @@ -13,16 +13,14 @@ class ClientBuilderTest < Test assert_instance_of LlmGateway::Clients::Claude, adapter.client end - test "builds claude code client with oauth messages provider" do + test "builds claude client with anthropic messages provider" do adapter = LlmGateway.build_provider({ - provider: "anthropic_oauth_messages", - access_token: "test-access-token", - refresh_token: "test-refresh-token", - expires_at: Time.now + 3600 + provider: "anthropic_apikey_messages", + api_key: "sk-ant-oat-test-token" }) - assert_instance_of LlmGateway::Adapters::ClaudeCode::MessagesAdapter, adapter - assert_instance_of LlmGateway::Clients::ClaudeCode, adapter.client + assert_instance_of LlmGateway::Adapters::Claude::MessagesAdapter, adapter + assert_instance_of LlmGateway::Clients::Claude, adapter.client end test "builds openai client with default completions adapter" do @@ -76,7 +74,6 @@ class ClientBuilderTest < Test test "provider registry exposes built in providers" do assert LlmGateway::ProviderRegistry.registered?("anthropic_apikey_messages") - assert LlmGateway::ProviderRegistry.registered?("anthropic_oauth_messages") assert LlmGateway::ProviderRegistry.registered?("openai_apikey_completions") assert LlmGateway::ProviderRegistry.registered?("openai_apikey_responses") assert LlmGateway::ProviderRegistry.registered?("groq_apikey_completions") diff --git a/test/integration/clients/claude_test.rb b/test/integration/clients/claude_test.rb index c277f3a..6864b30 100644 --- a/test/integration/clients/claude_test.rb +++ b/test/integration/clients/claude_test.rb @@ -144,4 +144,78 @@ def claude_client assert_equal "tool_use", result[:content][0][:type] assert_equal "get_weather", result[:content][0][:name] end + + test "uses claude code oauth headers for sk-ant-oat api keys" do + stub_request(:post, "https://api.anthropic.com/v1/messages") + .to_return(status: 200, body: { id: "msg_1", content: [], usage: {} }.to_json, + headers: { 'Content-Type': "application/json" }) + + LlmGateway::Clients::Claude.new(api_key: "sk-ant-oat-abc").chat([ { role: "user", content: "hello" } ]) + + assert_requested(:post, "https://api.anthropic.com/v1/messages", + headers: { + "Authorization" => "Bearer sk-ant-oat-abc", + "anthropic-beta" => "claude-code-20250219,oauth-2025-04-20", + "anthropic-dangerous-direct-browser-access" => "true", + "x-app" => "cli" + }) + end + + test "prepends claude code identity for sk-ant-oat api keys" do + stub_request(:post, "https://api.anthropic.com/v1/messages") + .with { |request| + body = JSON.parse(request.body) + system = body["system"] + system.is_a?(Array) && + system.length == 1 && + system[0]["type"] == "text" && + system[0]["text"] == "You are Claude Code, Anthropic's official CLI for Claude." + } + .to_return(status: 200, body: { id: "msg_1", content: [], usage: {} }.to_json, + headers: { 'Content-Type': "application/json" }) + + LlmGateway::Clients::Claude.new(api_key: "sk-ant-oat-abc").chat([ { role: "user", content: "hello" } ]) + end + + test "get_oauth_access_token returns existing non-expired token" do + token = claude_client.get_oauth_access_token( + access_token: "valid-token", + refresh_token: "refresh-token", + expires_at: Time.now + 3600 + ) + + assert_equal "valid-token", token + end + + test "get_oauth_access_token refreshes expired token and fires callback" do + callback_payload = nil + + stub_request(:post, "https://api.anthropic.com/v1/oauth/token") + .to_return( + status: 200, + body: { + access_token: "new-access-token", + refresh_token: "new-refresh-token", + expires_in: 3600 + }.to_json, + headers: { 'Content-Type': "application/json" } + ) + + token = claude_client.get_oauth_access_token( + access_token: "expired-token", + refresh_token: "refresh-token", + expires_at: Time.now - 60 + ) do |access_token, refresh_token, expires_at| + callback_payload = { + access_token: access_token, + refresh_token: refresh_token, + expires_at: expires_at + } + end + + assert_equal "new-access-token", token + assert_equal "new-access-token", callback_payload[:access_token] + assert_equal "new-refresh-token", callback_payload[:refresh_token] + assert callback_payload[:expires_at].is_a?(Time) + end end From 3f0bcda03628a3966bd697d911b191ee6df37f16 Mon Sep 17 00:00:00 2001 From: billybonks Date: Fri, 3 Apr 2026 16:48:42 +0800 Subject: [PATCH 02/14] breaking: delete codex custom path the custom path tried to make things easy but consumers must manage there own token details as when its refreshed they must update the values. deleting all these custom paths also makes things much simpler on our side --- lib/llm_gateway.rb | 7 +- .../openai_codex/responses_adapter.rb | 4 +- lib/llm_gateway/clients/open_ai.rb | 98 +++- lib/llm_gateway/clients/openai_codex.rb | 207 ------- .../clients/openai_codex/oauth_flow.rb | 2 +- .../clients/openai_codex/token_manager.rb | 2 +- test/integration/clients/openai_codex_test.rb | 515 ------------------ test/integration/clients/openai_test.rb | 69 +++ 8 files changed, 172 insertions(+), 732 deletions(-) delete mode 100644 lib/llm_gateway/clients/openai_codex.rb delete mode 100644 test/integration/clients/openai_codex_test.rb diff --git a/lib/llm_gateway.rb b/lib/llm_gateway.rb index 7cd9223..0a24d00 100644 --- a/lib/llm_gateway.rb +++ b/lib/llm_gateway.rb @@ -12,7 +12,8 @@ require_relative "llm_gateway/clients/claude" require_relative "llm_gateway/clients/claude_code" require_relative "llm_gateway/clients/open_ai" -require_relative "llm_gateway/clients/openai_codex" +require_relative "llm_gateway/clients/openai_codex/oauth_flow" +require_relative "llm_gateway/clients/openai_codex/token_manager" require_relative "llm_gateway/clients/groq" # Load adapters @@ -66,7 +67,7 @@ module OpenAi end module OpenAiCodex - Client = LlmGateway::Clients::OpenAiCodex + Client = LlmGateway::Clients::OpenAi end module Groq @@ -128,6 +129,6 @@ def self.reset_configuration! adapter: Adapters::Groq::ChatCompletionsAdapter) ProviderRegistry.register("openai_oauth_codex", - client: Clients::OpenAiCodex, + client: Clients::OpenAi, adapter: Adapters::OpenAiCodex::ResponsesAdapter) end diff --git a/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb b/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb index 67d4ad8..eb858e1 100644 --- a/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +++ b/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb @@ -28,7 +28,7 @@ def initialize(client) output_mapper: OpenAi::Responses::OutputMapper, file_output_mapper: OpenAi::FileOutputMapper, option_mapper: OptionMapper, - client_method: :chat, + client_method: :chat_codex, stream_mapper: OpenAi::Responses::StreamMapper ) end @@ -36,7 +36,7 @@ def initialize(client) private def stream_client_method - :stream + :stream_codex end def stream_api_name diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb index a007785..02a6fa7 100644 --- a/lib/llm_gateway/clients/open_ai.rb +++ b/lib/llm_gateway/clients/open_ai.rb @@ -5,8 +5,13 @@ module LlmGateway module Clients class OpenAi < BaseClient - def initialize(model_key: "gpt-4o", api_key: ENV["OPENAI_API_KEY"]) + CODEX_BASE_ENDPOINT = "https://chatgpt.com/backend-api/codex" + + attr_reader :account_id + + def initialize(model_key: "gpt-4o", api_key: ENV["OPENAI_API_KEY"], account_id: nil) @base_endpoint = "https://api.openai.com/v1" + @account_id = account_id super(model_key: model_key, api_key: api_key) end @@ -57,6 +62,36 @@ def stream_responses(messages, tools: nil, system: [], **options, &block) post_stream("responses", body, &block) end + def get_oauth_access_token(access_token:, refresh_token:, expires_at:, account_id: nil, &block) + token_manager = LlmGateway::Clients::OpenAi::TokenManager.new( + access_token: access_token, + refresh_token: refresh_token, + expires_at: expires_at, + account_id: account_id + ) + token_manager.on_token_refresh = block if block_given? + token_manager.ensure_valid_token + token_manager.access_token + end + + def chat_codex(messages, tools: nil, system: [], account_id: nil, **options) + body = build_codex_body(messages, system, tools, **options) + + completed_response = nil + post_codex_stream("responses", body, account_id: account_id) do |raw_sse| + if raw_sse[:event] == "response.completed" + completed_response = raw_sse.dig(:data, :response) + end + end + + completed_response + end + + def stream_codex(messages, tools: nil, system: [], account_id: nil, **options, &block) + body = build_codex_body(messages, system, tools, **options) + post_codex_stream("responses", body, account_id: account_id, &block) + end + def download_file(file_id) get("files/#{file_id}/content") end @@ -75,6 +110,63 @@ def upload_file(filename, content, mime_type = "application/octet-stream", purpo private + def build_codex_body(messages, system, tools, **options) + instructions = Array(system).filter_map { |s| s.is_a?(Hash) ? s[:content] : s }.join("\n") + instructions = "You are a helpful assistant." if instructions.empty? + + body = { + model: model_key, + instructions: instructions, + input: messages, + store: false, + include: [ "reasoning.encrypted_content" ], + stream: true + } + + body[:tools] = tools if tools + body.merge!(options) + + body + end + + def codex_headers(account_id: nil) + effective_account_id = account_id || @account_id + + headers = { + "content-type" => "application/json", + "Authorization" => "Bearer #{api_key}", + "OpenAI-Beta" => "responses=experimental" + } + headers["chatgpt-account-id"] = effective_account_id if effective_account_id + headers + end + + def post_codex_stream(url_part, body = nil, account_id: nil, &block) + endpoint = "#{CODEX_BASE_ENDPOINT}/#{url_part.sub(%r{^/}, "")}" + uri = URI(endpoint) + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + http.read_timeout = 480 + http.open_timeout = 10 + + body.merge!(stream: true) + request = Net::HTTP::Post.new(uri) + codex_headers(account_id: account_id).each { |key, value| request[key] = value } + request.body = body.to_json if body + + http.request(request) do |response| + unless response.code.to_i == 200 + full_body = +"" + response.read_body { |chunk| full_body << chunk } + response.instance_variable_set(:@body, full_body) + response.instance_variable_set(:@read, true) + handle_error(response) + end + + parse_sse_stream(response, &block) + end + end + def build_headers { "content-type" => "application/json", @@ -92,9 +184,9 @@ def handle_client_specific_errors(response, error) when 503 raise Errors::OverloadError.new(error["message"], error_code) end - # If we get here, we didn't handle it specifically - raise Errors::APIStatusError.new(error["message"], error_code) + message = error["message"] || "OpenAI request failed with status #{response.code}" + raise Errors::APIStatusError.new(message, error_code) end end end diff --git a/lib/llm_gateway/clients/openai_codex.rb b/lib/llm_gateway/clients/openai_codex.rb deleted file mode 100644 index 56aa40d..0000000 --- a/lib/llm_gateway/clients/openai_codex.rb +++ /dev/null @@ -1,207 +0,0 @@ -# frozen_string_literal: true - -require_relative "open_ai" -require_relative "openai_codex/oauth_flow" -require_relative "openai_codex/token_manager" - -module LlmGateway - module Clients - # OpenAI Codex OAuth client. - # - # Uses the ChatGPT backend Codex endpoint with OAuth bearer tokens - # (ChatGPT Plus / Pro subscription) rather than standard OpenAI API keys. - # - # The Codex backend requires streaming mode for all requests; the non-block - # +chat+ method streams internally and returns the completed response object. - # - # Usage (direct): - # - # client = LlmGateway::Clients::OpenAiCodex.new( - # access_token: "...", - # refresh_token: "...", - # expires_at: Time.now + 3600, - # account_id: "..." - # ) - # - # # Non-streaming - # response = client.chat([{ role: "user", content: "Hello" }]) - # - # # Streaming - # client.stream([{ role: "user", content: "Hello" }]) { |sse| puts sse.inspect } - # - # First-time OAuth login: - # - # tokens = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new.login - # # => { access_token:, refresh_token:, expires_at:, account_id: } - # - class OpenAiCodex < OpenAi - CODEX_BASE_ENDPOINT = "https://chatgpt.com/backend-api/codex" - - attr_reader :token_manager, :account_id - attr_accessor :prompt_cache_key - - def initialize( - model_key: "gpt-4o", - access_token: nil, - refresh_token: nil, - expires_at: nil, - account_id: nil, - client_id: OAuthFlow::CLIENT_ID, - reasoning_effort: nil - ) - @reasoning_effort = reasoning_effort - - if refresh_token - @token_manager = TokenManager.new( - access_token: access_token, - refresh_token: refresh_token, - expires_at: expires_at, - account_id: account_id, - client_id: client_id - ) - # Eagerly fetch a token only when none was provided - @token_manager.ensure_valid_token if access_token.nil? - access_token = @token_manager.access_token - @account_id = @token_manager.account_id - end - - @oauth_access_token = access_token - @account_id = account_id || @account_id - - # Pass the token as api_key to satisfy BaseClient; override the endpoint. - super(model_key: model_key, api_key: access_token) - @base_endpoint = CODEX_BASE_ENDPOINT - end - - # Register a callback that fires whenever the access token is refreshed. - # The callback receives (access_token, refresh_token, expires_at). - def on_token_refresh=(callback) - @token_manager&.on_token_refresh = callback - end - - # Send a chat request to the Codex backend. - # - # Without a block the stream is consumed internally and the completed - # response Hash is returned. With a block, raw SSE event hashes are - # yielded as they arrive. - def chat(messages, tools: nil, system: [], **options) - ensure_valid_token - - body = build_codex_body(messages, system, tools, **options) - - if block_given? - post_stream_with_retry("responses", body) { |event| yield event } - else - # Codex requires streaming; accumulate and return the completed response. - completed_response = nil - post_stream_with_retry("responses", body) do |raw_sse| - if raw_sse[:event] == "response.completed" - completed_response = raw_sse.dig(:data, :response) - end - end - completed_response - end - end - - # Streaming interface: yields raw SSE event hashes to the block. - def stream(messages, tools: nil, system: [], **options, &block) - ensure_valid_token - - body = build_codex_body(messages, system, tools, **options) - post_stream_with_retry("responses", body, &block) - end - - private - - # ------------------------------------------------------------------ - # Token helpers - # ------------------------------------------------------------------ - - def ensure_valid_token - return unless @token_manager - - @token_manager.ensure_valid_token - @oauth_access_token = @token_manager.access_token - @account_id = @token_manager.account_id - end - - def post_with_retry(url_part, body = nil, extra_headers = {}) - post(url_part, body, extra_headers) - rescue Errors::AuthenticationError => e - raise e unless @token_manager&.token_expired? - - @token_manager.refresh_access_token! - @oauth_access_token = @token_manager.access_token - post(url_part, body, extra_headers) - end - - def post_stream_with_retry(url_part, body = nil, extra_headers = {}, &block) - post_stream(url_part, body, extra_headers, &block) - rescue Errors::AuthenticationError => e - raise e unless @token_manager&.token_expired? - - @token_manager.refresh_access_token! - @oauth_access_token = @token_manager.access_token - post_stream(url_part, body, extra_headers, &block) - end - - # ------------------------------------------------------------------ - # Body builder - # ------------------------------------------------------------------ - - def build_codex_body(messages, system, tools, **options) - instructions = Array(system).filter_map { |s| - s.is_a?(Hash) ? s[:content] : s - }.join("\n") - instructions = "You are a helpful assistant." if instructions.empty? - - body = { - model: model_key, - instructions: instructions, - input: messages, - store: false, - include: [ "reasoning.encrypted_content" ], - stream: true - } - - body[:prompt_cache_key] = @prompt_cache_key if @prompt_cache_key - body[:prompt_cache_retention] = "24h" if @prompt_cache_key - body[:tools] = tools if tools - - # Resolve reasoning effort: constructor-level @reasoning_effort takes - # precedence, then fall back to the unified per-call reasoning: param. - effort = @reasoning_effort || resolve_reasoning_effort(options[:reasoning]) - body[:reasoning] = { effort: effort, summary: "detailed" } if effort - - body - end - - # Translate a reasoning value (string effort OR hash with :effort - # key) into a plain effort string understood by the Codex backend. - # Anthropic-style hashes (type: "enabled", budget_tokens: …) are ignored - # because the Codex backend has no equivalent concept. - def resolve_reasoning_effort(thinking) - case thinking - when String - thinking - when Hash - thinking[:effort] || thinking["effort"] - end - end - - # ------------------------------------------------------------------ - # Headers - # ------------------------------------------------------------------ - - def build_headers - headers = { - "content-type" => "application/json", - "Authorization" => "Bearer #{@oauth_access_token}", - "OpenAI-Beta" => "responses=experimental" - } - headers["chatgpt-account-id"] = @account_id if @account_id - headers - end - end - end -end diff --git a/lib/llm_gateway/clients/openai_codex/oauth_flow.rb b/lib/llm_gateway/clients/openai_codex/oauth_flow.rb index e1b689d..de44f99 100644 --- a/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +++ b/lib/llm_gateway/clients/openai_codex/oauth_flow.rb @@ -10,7 +10,7 @@ module LlmGateway module Clients - class OpenAiCodex < OpenAi + class OpenAi class OAuthFlow CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" AUTHORIZE_URL = "https://auth.openai.com/oauth/authorize" diff --git a/lib/llm_gateway/clients/openai_codex/token_manager.rb b/lib/llm_gateway/clients/openai_codex/token_manager.rb index 25b276c..a6cd335 100644 --- a/lib/llm_gateway/clients/openai_codex/token_manager.rb +++ b/lib/llm_gateway/clients/openai_codex/token_manager.rb @@ -6,7 +6,7 @@ module LlmGateway module Clients - class OpenAiCodex < OpenAi + class OpenAi class TokenManager attr_reader :access_token, :refresh_token, :expires_at, :account_id, :client_id attr_accessor :on_token_refresh diff --git a/test/integration/clients/openai_codex_test.rb b/test/integration/clients/openai_codex_test.rb deleted file mode 100644 index 5d75c80..0000000 --- a/test/integration/clients/openai_codex_test.rb +++ /dev/null @@ -1,515 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" - -class OpenAiCodexClientTest < Test - CODEX_ENDPOINT = "https://chatgpt.com/backend-api/codex/responses" - - teardown do - WebMock.reset! - end - - # --------------------------------------------------------------------------- - # Helpers - # --------------------------------------------------------------------------- - - def codex_client(access_token: "test-access-token", model_key: "gpt-4o", account_id: "acct_123") - LlmGateway::Clients::OpenAiCodex.new( - access_token: access_token, - model_key: model_key, - account_id: account_id - ) - end - - # Build a minimal SSE response that contains a response.completed event. - def completed_sse_body(response_id: "resp_123", model: "gpt-4o", text: "Hello!", tools: []) - output = if tools.any? - tools.map.with_index do |t, i| - { - type: "function_call", - id: "fc_#{i}", - call_id: t[:call_id] || "call_#{i}", - name: t[:name], - arguments: (t[:arguments] || {}).to_json - } - end - else - [ - { - type: "message", - role: "assistant", - id: "msg_#{response_id}", - content: [ { type: "output_text", text: text } ] - } - ] - end - - response_obj = { - id: response_id, - model: model, - output: output, - usage: { input_tokens: 10, output_tokens: 5 } - } - - "event: response.completed\ndata: #{JSON.generate(response: response_obj)}\n\n" - end - - def stub_stream_success(**kwargs) - stub_request(:post, CODEX_ENDPOINT) - .to_return( - status: 200, - body: completed_sse_body(**kwargs), - headers: { "Content-Type" => "text/event-stream" } - ) - end - - def stub_error_response(error_hash, status_code) - stub_request(:post, CODEX_ENDPOINT) - .to_return( - status: status_code, - body: { error: error_hash }.to_json, - headers: { "Content-Type" => "application/json" } - ) - end - - # --------------------------------------------------------------------------- - # Basic functionality - # --------------------------------------------------------------------------- - - test "chat without block returns completed response hash" do - stub_stream_success(response_id: "resp_abc", text: "Hello!") - - result = codex_client.chat([ { role: "user", content: "Hi" } ]) - - assert_equal "resp_abc", result[:id] - assert_equal "gpt-4o", result[:model] - assert result[:output], "Expected output in response" - assert result[:usage], "Expected usage in response" - end - - test "chat with block yields raw SSE events" do - stub_stream_success(response_id: "resp_block") - - events = [] - codex_client.chat([ { role: "user", content: "Hi" } ]) { |e| events << e } - - completed = events.find { |e| e[:event] == "response.completed" } - assert completed, "Expected response.completed event" - assert_equal "resp_block", completed.dig(:data, :response, :id) - end - - test "stream yields raw SSE events" do - stub_stream_success(response_id: "resp_stream") - - events = [] - codex_client.stream([ { role: "user", content: "Hi" } ]) { |e| events << e } - - assert_any_event(events, "response.completed") - end - - # --------------------------------------------------------------------------- - # Request body - # --------------------------------------------------------------------------- - - test "sends required Codex body fields" do - stub_stream_success - - codex_client.chat([ { role: "user", content: "Hi" } ]) - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - body["stream"] == true && - body["store"] == false && - body["include"]&.include?("reasoning.encrypted_content") && - body.key?("instructions") && - body.key?("input") - end - end - - test "passes instructions from system messages" do - stub_stream_success - - system = [ { type: "text", content: "You are a coder." } ] - codex_client.chat([ { role: "user", content: "Hi" } ], system: system) - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - body["instructions"] == "You are a coder." - end - end - - test "defaults instructions to helpful assistant when system is empty" do - stub_stream_success - - codex_client.chat([ { role: "user", content: "Hi" } ], system: []) - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - body["instructions"] == "You are a helpful assistant." - end - end - - test "includes tools when provided" do - stub_stream_success - - tools = [ { type: "function", name: "get_weather", description: "Get weather", parameters: {} } ] - codex_client.chat([ { role: "user", content: "Weather?" } ], tools: tools) - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - sent_tools = body["tools"] || [] - sent_tools.any? { |t| t["name"] == "get_weather" } - end - end - - test "includes prompt_cache_key and retention when set" do - stub_stream_success - - client = codex_client - client.prompt_cache_key = "my-cache-key" - client.chat([ { role: "user", content: "Hi" } ]) - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - body["prompt_cache_key"] == "my-cache-key" && - body["prompt_cache_retention"] == "24h" - end - end - - test "includes reasoning when reasoning_effort is set" do - stub_stream_success - - client = LlmGateway::Clients::OpenAiCodex.new( - access_token: "tok", - reasoning_effort: "medium" - ) - client.chat([ { role: "user", content: "Hi" } ]) - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - body["reasoning"] == { "effort" => "medium", "summary" => "detailed" } - end - end - - test "chat accepts unified reasoning option" do - stub_stream_success - - codex_client.chat([ { role: "user", content: "Hi" } ], reasoning: { effort: "high" }) - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - body["reasoning"] == { "effort" => "high", "summary" => "detailed" } - end - end - - test "stream accepts unified reasoning option" do - stub_stream_success - - codex_client.stream([ { role: "user", content: "Hi" } ], reasoning: "low") { |_e| } - - assert_requested(:post, CODEX_ENDPOINT) do |req| - body = JSON.parse(req.body) - body["reasoning"] == { "effort" => "low", "summary" => "detailed" } - end - end - - # --------------------------------------------------------------------------- - # Headers - # --------------------------------------------------------------------------- - - test "sends Bearer authorization header" do - stub_stream_success - - codex_client(access_token: "my-oauth-token").chat([ { role: "user", content: "Hi" } ]) - - assert_requested(:post, CODEX_ENDPOINT, - headers: { "Authorization" => "Bearer my-oauth-token" }) - end - - test "sends OpenAI-Beta responses=experimental header" do - stub_stream_success - - codex_client.chat([ { role: "user", content: "Hi" } ]) - - assert_requested(:post, CODEX_ENDPOINT, - headers: { "OpenAI-Beta" => "responses=experimental" }) - end - - test "sends chatgpt-account-id header when account_id present" do - stub_stream_success - - codex_client(account_id: "acct_xyz").chat([ { role: "user", content: "Hi" } ]) - - assert_requested(:post, CODEX_ENDPOINT, - headers: { "chatgpt-account-id" => "acct_xyz" }) - end - - test "omits chatgpt-account-id header when account_id is nil" do - stub_stream_success - - LlmGateway::Clients::OpenAiCodex.new(access_token: "tok").chat([ { role: "user", content: "Hi" } ]) - - assert_not_requested(:post, CODEX_ENDPOINT, - headers: { "chatgpt-account-id" => /.*/ }) - end - - # --------------------------------------------------------------------------- - # Error handling - # --------------------------------------------------------------------------- - - test "raises AuthenticationError on 401" do - stub_error_response({ type: "authentication_error", message: "Invalid bearer token" }, 401) - - error = assert_raises(LlmGateway::Errors::AuthenticationError) do - codex_client.chat([ { role: "user", content: "Hi" } ]) - end - assert_equal "Invalid bearer token", error.message - end - - test "raises BadRequestError on 400" do - stub_error_response({ type: "invalid_request_error", message: "Bad input" }, 400) - - error = assert_raises(LlmGateway::Errors::BadRequestError) do - codex_client.chat([ { role: "user", content: "Hi" } ]) - end - assert_equal "Bad input", error.message - end - - test "raises NotFoundError on 404" do - stub_error_response({ type: "not_found_error", message: "model not found" }, 404) - - assert_raises(LlmGateway::Errors::NotFoundError) do - codex_client.chat([ { role: "user", content: "Hi" } ]) - end - end - - test "raises RateLimitError on 429" do - stub_error_response({ type: "rate_limit_error", message: "rate limit exceeded" }, 429) - - assert_raises(LlmGateway::Errors::RateLimitError) do - codex_client.chat([ { role: "user", content: "Hi" } ]) - end - end - - test "raises OverloadError on 503" do - stub_error_response({ type: "service_unavailable", message: "overloaded" }, 503) - - assert_raises(LlmGateway::Errors::OverloadError) do - codex_client.chat([ { role: "user", content: "Hi" } ]) - end - end - - test "raises InternalServerError on 500" do - stub_error_response({ type: "server_error", message: "internal error" }, 500) - - assert_raises(LlmGateway::Errors::InternalServerError) do - codex_client.chat([ { role: "user", content: "Hi" } ]) - end - end - - # --------------------------------------------------------------------------- - # Token manager - # --------------------------------------------------------------------------- - - test "creates token manager when refresh_token is provided" do - stub_request(:post, "https://auth.openai.com/oauth/token") - .to_return( - status: 200, - body: { - access_token: "new-access-token", - refresh_token: "new-refresh-token", - expires_in: 3600 - }.to_json, - headers: { "Content-Type" => "application/json" } - ) - - # No access_token → eagerly fetches - client = LlmGateway::Clients::OpenAiCodex.new(refresh_token: "old-refresh-token") - - refute_nil client.token_manager - assert_equal "new-access-token", client.token_manager.access_token - end - - test "skips eager refresh when access_token is supplied alongside refresh_token" do - client = LlmGateway::Clients::OpenAiCodex.new( - access_token: "existing-token", - refresh_token: "refresh-token", - expires_at: Time.now + 3600 - ) - - refute_nil client.token_manager - assert_equal "existing-token", client.token_manager.access_token - end - - test "retries request after token refresh on AuthenticationError with expired token" do - token_manager = mock("token_manager") - token_manager.stubs(:ensure_valid_token) - token_manager.stubs(:access_token).returns("refreshed-token") - token_manager.stubs(:account_id).returns(nil) - token_manager.stubs(:token_expired?).returns(true) - token_manager.expects(:refresh_access_token!).once - - client = codex_client - client.instance_variable_set(:@token_manager, token_manager) - - stub_request(:post, CODEX_ENDPOINT).to_return( - { - status: 401, - body: { error: { type: "authentication_error", message: "expired" } }.to_json, - headers: { "Content-Type" => "application/json" } - }, - { - status: 200, - body: completed_sse_body(response_id: "resp_retry"), - headers: { "Content-Type" => "text/event-stream" } - } - ) - - result = client.chat([ { role: "user", content: "Hi" } ]) - assert_equal "resp_retry", result[:id] - end - - test "does not retry when token is not expired" do - token_manager = mock("token_manager") - token_manager.stubs(:ensure_valid_token) - token_manager.stubs(:access_token).returns("test-token") - token_manager.stubs(:account_id).returns(nil) - token_manager.stubs(:token_expired?).returns(false) - - client = codex_client - client.instance_variable_set(:@token_manager, token_manager) - - stub_error_response({ type: "authentication_error", message: "invalid token" }, 401) - - assert_raises(LlmGateway::Errors::AuthenticationError) do - client.chat([ { role: "user", content: "Hi" } ]) - end - end - - test "on_token_refresh= delegates to token_manager" do - refresh_called = false - callback = ->(_at, _rt, _exp) { refresh_called = true } - - stub_request(:post, "https://auth.openai.com/oauth/token") - .to_return( - status: 200, - body: { - access_token: "refreshed", - refresh_token: "new-rt", - expires_in: 3600 - }.to_json, - headers: { "Content-Type" => "application/json" } - ) - - client = LlmGateway::Clients::OpenAiCodex.new( - access_token: "tok", - refresh_token: "rt", - expires_at: Time.now + 3600 - ) - client.on_token_refresh = callback - client.token_manager.refresh_access_token! - - assert refresh_called, "Expected on_token_refresh callback to be called" - end - - # --------------------------------------------------------------------------- - # OAuthFlow constants - # --------------------------------------------------------------------------- - - test "OAuthFlow has the correct CLIENT_ID" do - assert_equal "app_EMoamEEZ73f0CkXaXp7hrann", - LlmGateway::Clients::OpenAiCodex::OAuthFlow::CLIENT_ID - end - - test "OAuthFlow start returns authorization_url, code_verifier, and state" do - flow = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new - result = flow.start - - assert result[:authorization_url].start_with?("https://auth.openai.com/oauth/authorize"), - "Expected OpenAI authorize URL" - assert result[:code_verifier], "Expected code_verifier" - assert result[:state], "Expected state" - end - - test "OAuthFlow authorization_url includes required params" do - flow = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new - result = flow.start(state: "teststate") - uri = URI.parse(result[:authorization_url]) - params = URI.decode_www_form(uri.query).to_h - - assert_equal "code", params["response_type"] - assert_equal "app_EMoamEEZ73f0CkXaXp7hrann", params["client_id"] - assert_equal "S256", params["code_challenge_method"] - assert_equal "teststate", params["state"] - assert_equal "true", params["codex_cli_simplified_flow"] - end - - # --------------------------------------------------------------------------- - # TokenManager - # --------------------------------------------------------------------------- - - test "TokenManager token_expired? returns true when expires_at is nil" do - tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new(refresh_token: "rt") - assert tm.token_expired? - end - - test "TokenManager token_expired? returns false for future expiry" do - tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new( - refresh_token: "rt", - expires_at: Time.now + 3600 - ) - refute tm.token_expired? - end - - test "TokenManager token_expired? returns true for past expiry" do - tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new( - refresh_token: "rt", - expires_at: Time.now - 1 - ) - assert tm.token_expired? - end - - test "TokenManager refresh_access_token! updates tokens and fires callback" do - received = [] - - stub_request(:post, "https://auth.openai.com/oauth/token") - .to_return( - status: 200, - body: { - access_token: "new-at", - refresh_token: "new-rt", - expires_in: 7200 - }.to_json, - headers: { "Content-Type" => "application/json" } - ) - - tm = LlmGateway::Clients::OpenAiCodex::TokenManager.new( - refresh_token: "old-rt", - expires_at: Time.now - 1 - ) - tm.on_token_refresh = ->(at, rt, exp) { received << { at: at, rt: rt, exp: exp } } - - tm.refresh_access_token! - - assert_equal "new-at", tm.access_token - assert_equal "new-rt", tm.refresh_token - assert_equal 1, received.size - assert_equal "new-at", received.first[:at] - end - - # --------------------------------------------------------------------------- - # Provider registry - # --------------------------------------------------------------------------- - - test "openai_oauth_codex is registered in ProviderRegistry" do - assert LlmGateway::ProviderRegistry.registered?("openai_oauth_codex"), - "Expected openai_oauth_codex to be registered" - end - - private - - def assert_any_event(events, event_type) - found = events.any? { |e| e[:event] == event_type } - assert found, "Expected to find event '#{event_type}' in #{events.map { |e| e[:event] }.inspect}" - end -end diff --git a/test/integration/clients/openai_test.rb b/test/integration/clients/openai_test.rb index 5eee2bf..87db73a 100644 --- a/test/integration/clients/openai_test.rb +++ b/test/integration/clients/openai_test.rb @@ -113,4 +113,73 @@ def openai_client openai_client.chat([ { 'role': "user", 'content': "hello" } ]) end end + + test "get_oauth_access_token returns existing non-expired codex token" do + token = openai_client.get_oauth_access_token( + access_token: "valid-token", + refresh_token: "refresh-token", + expires_at: Time.now + 3600 + ) + + assert_equal "valid-token", token + end + + test "get_oauth_access_token refreshes expired codex token and fires callback" do + callback_payload = nil + + stub_request(:post, "https://auth.openai.com/oauth/token") + .to_return( + status: 200, + body: { + access_token: "new-access-token", + refresh_token: "new-refresh-token", + expires_in: 3600 + }.to_json, + headers: { 'Content-Type': "application/json" } + ) + + token = openai_client.get_oauth_access_token( + access_token: "expired-token", + refresh_token: "refresh-token", + expires_at: Time.now - 60 + ) do |access_token, refresh_token, expires_at| + callback_payload = { + access_token: access_token, + refresh_token: refresh_token, + expires_at: expires_at + } + end + + assert_equal "new-access-token", token + assert_equal "new-access-token", callback_payload[:access_token] + assert_equal "new-refresh-token", callback_payload[:refresh_token] + assert callback_payload[:expires_at].is_a?(Time) + end + + test "chat_codex routes through codex endpoint" do + stub_request(:post, "https://chatgpt.com/backend-api/codex/responses") + .to_return( + status: 200, + body: "event: response.completed\ndata: #{JSON.generate(response: { id: "resp_1", model: "gpt-4o", output: [], usage: {} })}\n\n", + headers: { "Content-Type" => "text/event-stream" } + ) + + result = LlmGateway::Clients::OpenAi.new(api_key: "oauth-token").chat_codex([ { role: "user", content: "hello" } ]) + + assert_equal "resp_1", result[:id] + end + + test "stream_codex yields codex SSE events" do + stub_request(:post, "https://chatgpt.com/backend-api/codex/responses") + .to_return( + status: 200, + body: "event: response.completed\ndata: #{JSON.generate(response: { id: "resp_stream", model: "gpt-4o", output: [], usage: {} })}\n\n", + headers: { "Content-Type" => "text/event-stream" } + ) + + events = [] + LlmGateway::Clients::OpenAi.new(api_key: "oauth-token").stream_codex([ { role: "user", content: "hello" } ]) { |e| events << e } + + assert events.any? { |e| e[:event] == "response.completed" } + end end From 147bfc91082fc5f7d0caea426c0126c48b96d1b0 Mon Sep 17 00:00:00 2001 From: billybonks Date: Fri, 3 Apr 2026 17:02:43 +0800 Subject: [PATCH 03/14] udpate stream test --- test/integration/stream_test.rb | 219 ++++++++++++++++++++------------ 1 file changed, 138 insertions(+), 81 deletions(-) diff --git a/test/integration/stream_test.rb b/test/integration/stream_test.rb index 8637e0c..13b2eb7 100644 --- a/test/integration/stream_test.rb +++ b/test/integration/stream_test.rb @@ -4,6 +4,8 @@ require "vcr" require "json" require "base64" +require "time" +require "fileutils" require_relative "../utils/calculator_tool_helper" class ProvidersJsonTest < Test @@ -12,32 +14,106 @@ def teardown LlmGateway.reset_configuration! end - def load_provider(name) - providers_path = File.expand_path("../fixtures/providers.json", __dir__) - skip("Skipped: missing providers fixture at #{providers_path}") unless File.exist?(providers_path) + def load_provider(provider:, model:) + config = { + "provider" => provider, + "model_key" => model + } + + case provider + when "openai_apikey_completions", "openai_apikey_responses" + api_key = ENV["OPENAI_API_KEY"].to_s + skip("Skipped: missing OPENAI_API_KEY") if api_key.empty? + config["api_key"] = api_key + when "anthropic_apikey_messages" + api_key = ENV["ANTHROPIC_API_KEY"].to_s + skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty? + config["api_key"] = api_key + when "anthropic_oauth_messages" + config["provider"] = "anthropic_apikey_messages" + config["api_key"] = oauth_access_token_for("anthropic") + when "openai_oauth_codex" + creds = load_auth_credentials("openai") + config["api_key"] = oauth_access_token_for("openai") + config["account_id"] = creds["account_id"] if creds["account_id"] + end - providers = JSON.parse(File.read(providers_path)) - provider = providers.find { |entry| entry["name"] == name } - skip("Skipped: provider not found in providers.json: #{name}") unless provider + LlmGateway.build_provider(config) + end - config = provider.fetch("config").dup - key_env = config.delete("key_env") - config["key"] = ENV.fetch(key_env) if key_env + def skip_on_authentication_error + yield + rescue LlmGateway::Errors::AuthenticationError, + LlmGateway::Errors::BadRequestError, + LlmGateway::Errors::RateLimitError, + LlmGateway::Errors::APIStatusError => e + skip("Skipped due to provider error: #{e.message}") + end - LlmGateway.configure([ - { - "name" => provider.fetch("name"), - "config" => config - } - ]) + def auth_file_path + File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json")) + end + + def load_auth_credentials(provider) + path = auth_file_path + skip("Skipped: missing auth file at #{path}") unless File.exist?(path) + + auth = JSON.parse(File.read(path)) + creds = auth[provider] + skip("Skipped: missing #{provider} credentials in #{path}") unless creds - LlmGateway.public_send(name) + creds end - def skip_on_authentication_error - yield - rescue LlmGateway::Errors::AuthenticationError => e - skip("Skipped due to authentication error: #{e.message}") + def persist_auth_credentials(provider, attributes) + path = auth_file_path + FileUtils.mkdir_p(File.dirname(path)) + + auth = File.exist?(path) ? JSON.parse(File.read(path)) : {} + auth[provider] ||= {} + auth[provider].merge!(attributes) + + File.write(path, JSON.pretty_generate(auth) + "\n") + end + + def oauth_access_token_for(provider) + creds = load_auth_credentials(provider) + + case provider + when "anthropic" + token = LlmGateway::Clients::Claude.new.get_oauth_access_token( + access_token: creds["access_token"], + refresh_token: creds["refresh_token"], + expires_at: creds["expires_at"] + ) do |access_token, refresh_token, expires_at| + persist_auth_credentials("anthropic", { + "access_token" => access_token, + "refresh_token" => refresh_token, + "expires_at" => expires_at&.iso8601 + }) + end + + persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"] + token + when "openai" + token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token( + access_token: creds["access_token"], + refresh_token: creds["refresh_token"], + expires_at: creds["expires_at"], + account_id: creds["account_id"] + ) do |access_token, refresh_token, expires_at| + persist_auth_credentials("openai", { + "access_token" => access_token, + "refresh_token" => refresh_token, + "expires_at" => expires_at&.iso8601 + }) + end + + persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"] + token + else + raise ArgumentError, "Unsupported OAuth provider: #{provider}" + end end def assert_basic_text_generation_result(message, expected_text) @@ -297,108 +373,89 @@ def basic_image_streaming_test(adapter) assert_includes lower_content, "red" assert_includes lower_content, "circle" end - def self.provider_names - providers_path = File.expand_path("../fixtures/providers.json", __dir__) - return [] unless File.exist?(providers_path) - - JSON.parse(File.read(providers_path)).map { |entry| entry["name"] } - end - - self.provider_names.each do |provider| - test "#{provider} basic text generation" do + def self.define_stream_tests_for(name:, provider:, model:) + test "#{name} basic text generation" do skip_on_authentication_error do without_vcr do - adapter = load_provider(provider) + adapter = load_provider(provider:, model:) basic_text_generation_test(adapter) end end end - test "#{provider} basic tool call" do + test "#{name} basic tool call" do skip_on_authentication_error do without_vcr do - adapter = load_provider(provider) + adapter = load_provider(provider:, model:) basic_tool_call(adapter) end end end - test "#{provider} basic thinking" do + test "#{name} basic thinking" do skip_on_authentication_error do without_vcr do - adapter = load_provider(provider) + adapter = load_provider(provider:, model:) basic_thinking_test(adapter, reasoning: "high") end end end - test "#{provider} text streaming" do + test "#{name} text streaming" do skip_on_authentication_error do without_vcr do - adapter = load_provider(provider) + adapter = load_provider(provider:, model:) basic_streaming_text_test(adapter) end end end - test "#{provider} multi turn tool streaming" do + test "#{name} multi turn tool streaming" do skip_on_authentication_error do without_vcr do - adapter = load_provider(provider) + adapter = load_provider(provider:, model:) multi_turn_tool_stream_test(adapter, reasoning: "high") end end end - test "#{provider} image streaming" do + test "#{name} image streaming" do skip_on_authentication_error do without_vcr do - adapter = load_provider(provider) + adapter = load_provider(provider:, model:) basic_image_streaming_test(adapter) end end end end - # test "loads providers json and does anthropic basic text generation" do - # without_vcr do - # adapter = load_provider("anthropic_oauth") - # basic_text_generation_test(adapter) - # end - # end - - # test "loads providers json and does anthropic basic tool call" do - # without_vcr do - # adapter = load_provider("anthropic_oauth") - # basic_tool_call(adapter) - # end - # end - - # test "loads providers json and does anthropic basic thinking" do - # without_vcr do - # adapter = load_provider("anthropic_oauth") - # basic_thinking_test(adapter) - # end - # end - - # test "loads providers json and does anthropic text streaming" do - # without_vcr do - # adapter = load_provider("anthropic_oauth") - # basic_streaming_text_test(adapter) - # end - # end - - # test "loads providers json and does anthropic multi turn tool streaming" do - # without_vcr do - # adapter = load_provider("anthropic_oauth") - # multi_turn_tool_stream_test(adapter) - # end - # end - - # test "loads providers json and does anthropic image streaming" do - # without_vcr do - # adapter = load_provider("anthropic_oauth") - # basic_image_streaming_test(adapter) - # end - # end + define_stream_tests_for( + name: "openai_apikey_completions_gpt_5_1", + provider: "openai_apikey_completions", + model: "gpt-5.1" + ) + + define_stream_tests_for( + name: "anthropic_apikey_messages_claude_sonnet_4", + provider: "anthropic_apikey_messages", + model: "claude-sonnet-4-20250514" + ) + + define_stream_tests_for( + name: "openai_apikey_responses_gpt_5_4", + provider: "openai_apikey_responses", + model: "gpt-5.4" + ) + + define_stream_tests_for( + name: "anthropic_oauth_messages_claude_sonnet_4", + provider: "anthropic_oauth_messages", + model: "claude-sonnet-4-20250514" + ) + + define_stream_tests_for( + name: "openai_oauth_codex_gpt_5_4", + provider: "openai_oauth_codex", + model: "gpt-5.4" + ) end From 8241a2b624d5c58736fdd3c1246e6a4c9da06822 Mon Sep 17 00:00:00 2001 From: billybonks Date: Fri, 3 Apr 2026 17:04:15 +0800 Subject: [PATCH 04/14] update oauth scripts --- scripts/create_anthropic_credentials.rb | 23 ++++++++++++---- scripts/create_openai_codex_credentials.rb | 31 +++++++++++++++------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/scripts/create_anthropic_credentials.rb b/scripts/create_anthropic_credentials.rb index 701b2b6..a40a0c1 100755 --- a/scripts/create_anthropic_credentials.rb +++ b/scripts/create_anthropic_credentials.rb @@ -3,6 +3,7 @@ require "optparse" require "json" +require "fileutils" require_relative "../lib/llm_gateway" module Scripts @@ -11,7 +12,7 @@ def initialize(argv) @options = { client_id: LlmGateway::Clients::ClaudeCode::OAuthFlow::CLIENT_ID, scopes: LlmGateway::Clients::ClaudeCode::OAuthFlow::DEFAULT_SCOPES, - output: nil + output: File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json")) } parse_options(argv) end @@ -45,10 +46,7 @@ def run expires_at: tokens[:expires_at]&.iso8601 } - if @options[:output] - File.write(@options[:output], JSON.pretty_generate(credentials) + "\n") - puts "Credentials written to #{@options[:output]}" - end + persist_credentials("anthropic", credentials) puts "Credentials:" puts JSON.pretty_generate(credentials) @@ -82,6 +80,21 @@ def parse_options(argv) end.parse!(argv) end + def persist_credentials(provider, credentials) + output_path = File.expand_path(@options[:output]) + FileUtils.mkdir_p(File.dirname(output_path)) + + existing = if File.exist?(output_path) + JSON.parse(File.read(output_path)) + else + {} + end + + existing[provider] = credentials + File.write(output_path, JSON.pretty_generate(existing) + "\n") + puts "Credentials written to #{output_path}" + end + def shell_escape(value) return "''" if value.nil? || value.empty? diff --git a/scripts/create_openai_codex_credentials.rb b/scripts/create_openai_codex_credentials.rb index 203a86b..011e86b 100755 --- a/scripts/create_openai_codex_credentials.rb +++ b/scripts/create_openai_codex_credentials.rb @@ -3,22 +3,23 @@ require "optparse" require "json" +require "fileutils" require_relative "../lib/llm_gateway" module Scripts class CreateOpenAiCodexCredentials def initialize(argv) @options = { - client_id: LlmGateway::Clients::OpenAiCodex::OAuthFlow::CLIENT_ID, - redirect_uri: LlmGateway::Clients::OpenAiCodex::OAuthFlow::REDIRECT_URI, - scope: LlmGateway::Clients::OpenAiCodex::OAuthFlow::SCOPE, - output: nil + client_id: LlmGateway::Clients::OpenAi::OAuthFlow::CLIENT_ID, + redirect_uri: LlmGateway::Clients::OpenAi::OAuthFlow::REDIRECT_URI, + scope: LlmGateway::Clients::OpenAi::OAuthFlow::SCOPE, + output: File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json")) } parse_options(argv) end def run - flow = LlmGateway::Clients::OpenAiCodex::OAuthFlow.new( + flow = LlmGateway::Clients::OpenAi::OAuthFlow.new( client_id: @options[:client_id], redirect_uri: @options[:redirect_uri], scope: @options[:scope] @@ -49,10 +50,7 @@ def run expires_at: tokens[:expires_at]&.iso8601 } - if @options[:output] - File.write(@options[:output], JSON.pretty_generate(credentials) + "\n") - puts "Credentials written to #{@options[:output]}" - end + persist_credentials("openai", credentials) puts "" puts "Credentials:" @@ -92,6 +90,21 @@ def parse_options(argv) end.parse!(argv) end + def persist_credentials(provider, credentials) + output_path = File.expand_path(@options[:output]) + FileUtils.mkdir_p(File.dirname(output_path)) + + existing = if File.exist?(output_path) + JSON.parse(File.read(output_path)) + else + {} + end + + existing[provider] = credentials + File.write(output_path, JSON.pretty_generate(existing) + "\n") + puts "Credentials written to #{output_path}" + end + def shell_escape(value) return "''" if value.nil? || value.empty? From 1812e4dc30c20d7cfb7d575636147c3f9033840f Mon Sep 17 00:00:00 2001 From: billybonks Date: Fri, 3 Apr 2026 21:30:23 +0800 Subject: [PATCH 05/14] fixup! refactor: move option wraggling to option mapper --- lib/llm_gateway/adapters/openai_codex/option_mapper.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb index 9694cae..1d9e149 100644 --- a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb +++ b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb @@ -10,7 +10,11 @@ module OptionMapper def map(options) mapped_options = OpenAi::Responses::OptionMapper.map(options) - mapped_options[:max_completion_tokens] ||= 20480 + + # Codex endpoint currently rejects token limit parameters. + mapped_options.delete(:max_output_tokens) + mapped_options.delete(:max_completion_tokens) + mapped_options end end From 27636f7e08e2765a3235519f9dba807fc7ef03af Mon Sep 17 00:00:00 2001 From: billybonks Date: Fri, 3 Apr 2026 21:31:03 +0800 Subject: [PATCH 06/14] fixup! refactor: move option wraggling to option mapper --- lib/llm_gateway/clients/open_ai.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb index 02a6fa7..059dffe 100644 --- a/lib/llm_gateway/clients/open_ai.rb +++ b/lib/llm_gateway/clients/open_ai.rb @@ -185,7 +185,14 @@ def handle_client_specific_errors(response, error) raise Errors::OverloadError.new(error["message"], error_code) end # If we get here, we didn't handle it specifically - message = error["message"] || "OpenAI request failed with status #{response.code}" + fallback_body = response.body.to_s.strip + fallback_message = if fallback_body.empty? + "OpenAI request failed with status #{response.code}" + else + "OpenAI request failed with status #{response.code}: #{fallback_body}" + end + + message = error["message"] || fallback_message raise Errors::APIStatusError.new(message, error_code) end end From f5fb0e19ef0ff528d8fb1b1ec7d46f7ba316a1ab Mon Sep 17 00:00:00 2001 From: billybonks Date: Sat, 4 Apr 2026 12:24:13 +0800 Subject: [PATCH 07/14] fix: throw prompt too long errors when streaming as well --- .../adapters/claude/stream_mapper.rb | 4 + .../open_ai/chat_completions/stream_mapper.rb | 5 + .../open_ai/responses/stream_mapper.rb | 5 + lib/llm_gateway/clients/claude.rb | 7 +- lib/llm_gateway/clients/groq.rb | 17 +- lib/llm_gateway/clients/open_ai.rb | 9 +- lib/llm_gateway/errors.rb | 21 +++ test/integration/prompt_too_long_live_test.rb | 157 ++++++++++++++++++ 8 files changed, 208 insertions(+), 17 deletions(-) create mode 100644 test/integration/prompt_too_long_live_test.rb diff --git a/lib/llm_gateway/adapters/claude/stream_mapper.rb b/lib/llm_gateway/adapters/claude/stream_mapper.rb index e9f4be8..fee3045 100644 --- a/lib/llm_gateway/adapters/claude/stream_mapper.rb +++ b/lib/llm_gateway/adapters/claude/stream_mapper.rb @@ -73,6 +73,10 @@ def map(chunk) message = error[:message] || "Stream error" code = error[:type] + if LlmGateway::Errors.context_overflow_message?(message) + raise LlmGateway::Errors::PromptTooLong.new(message, code) + end + if code == "overloaded_error" raise LlmGateway::Errors::OverloadError.new(message, code) end diff --git a/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb b/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb index 31c3356..5ec83f3 100644 --- a/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb +++ b/lib/llm_gateway/adapters/open_ai/chat_completions/stream_mapper.rb @@ -228,6 +228,11 @@ def raise_stream_error!(data) error = data[:error].is_a?(Hash) ? data[:error] : data message = error[:message] || "Stream error" code = error[:code] || error[:type] + + if LlmGateway::Errors.context_overflow_message?(message) + raise LlmGateway::Errors::PromptTooLong.new(message, code) + end + raise LlmGateway::Errors::APIStatusError.new(message, code) end end diff --git a/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb b/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb index 9233b1a..cf58dfa 100644 --- a/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb +++ b/lib/llm_gateway/adapters/open_ai/responses/stream_mapper.rb @@ -328,6 +328,11 @@ def raise_stream_error!(data) error = data[:error].is_a?(Hash) ? data[:error] : data message = error[:message] || "Stream error" code = error[:code] || error[:type] + + if LlmGateway::Errors.context_overflow_message?(message) + raise LlmGateway::Errors::PromptTooLong.new(message, code) + end + raise LlmGateway::Errors::APIStatusError.new(message, code) end end diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb index 65a7d33..33192f3 100644 --- a/lib/llm_gateway/clients/claude.rb +++ b/lib/llm_gateway/clients/claude.rb @@ -97,11 +97,8 @@ def prepend_claude_code_identity(system) end def handle_client_specific_errors(response, error) - case response.code.to_i - when 400 - if error["message"]&.start_with?("prompt is too long") - raise Errors::PromptTooLong.new(error["message"], error["type"]) - end + if Errors.context_overflow_message?(error["message"]) + raise Errors::PromptTooLong.new(error["message"], error["type"]) end # If we get here, we didn't handle it specifically diff --git a/lib/llm_gateway/clients/groq.rb b/lib/llm_gateway/clients/groq.rb index ab3a0b9..6b148f2 100644 --- a/lib/llm_gateway/clients/groq.rb +++ b/lib/llm_gateway/clients/groq.rb @@ -33,24 +33,21 @@ def build_headers def handle_client_specific_errors(response, error) # Groq likely uses 'code' like OpenAI since it's OpenAI-compatible error_code = error["code"] + error_message = error["message"] + + if Errors.context_overflow_message?(error_message) + raise Errors::PromptTooLong.new(error_message, error["type"]) + end case response.code.to_i - when 400 - if error["message"]&.match?(/reduce the length of the messages/i) - raise Errors::PromptTooLong.new(error["message"], error["type"]) - end - when 413 - if error["message"]&.start_with?("Request too large") - raise Errors::PromptTooLong.new(error["message"], error["type"]) - end when 429 raise Errors::RateLimitError.new(error["type"], error_code) if error_code == "rate_limit_exceeded" - raise Errors::OverloadError.new(error["message"], error_code) + raise Errors::OverloadError.new(error_message, error_code) end # If we get here, we didn't handle it specifically - raise Errors::APIStatusError.new(error["message"], error_code) + raise Errors::APIStatusError.new(error_message, error_code) end end end diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb index 059dffe..71a0014 100644 --- a/lib/llm_gateway/clients/open_ai.rb +++ b/lib/llm_gateway/clients/open_ai.rb @@ -177,12 +177,17 @@ def build_headers def handle_client_specific_errors(response, error) # OpenAI uses 'code' instead of 'type' for error codes error_code = error["code"] + error_message = error["message"] + + if Errors.context_overflow_message?(error_message) + raise Errors::PromptTooLong.new(error_message, error_code) + end case response.code.to_i when 429 - raise Errors::RateLimitError.new(error["message"], error_code) + raise Errors::RateLimitError.new(error_message, error_code) when 503 - raise Errors::OverloadError.new(error["message"], error_code) + raise Errors::OverloadError.new(error_message, error_code) end # If we get here, we didn't handle it specifically fallback_body = response.body.to_s.strip diff --git a/lib/llm_gateway/errors.rb b/lib/llm_gateway/errors.rb index cfdd315..1eef904 100644 --- a/lib/llm_gateway/errors.rb +++ b/lib/llm_gateway/errors.rb @@ -31,6 +31,27 @@ class UnsupportedModel < ClientError; end class UnsupportedProvider < ClientError; end class MissingMapperForProvider < ClientError; end + OVERFLOW_PATTERNS = [ + /prompt is too long/i, # Anthropic + /exceeds the context window/i, # OpenAI + /reduce the length of the messages/i, # Groq + /maximum context length is \d+ tokens/i, + /context[_ ]length[_ ]exceeded/i, + /too many tokens/i, + /token limit exceeded/i, + /request too large.*tokens per min/i, # OpenAI TPM wording + /input tokens per minute/i, # Anthropic TPM wording + /reduce the prompt length/i, + /input or output tokens must be reduced/i + ].freeze + + def self.context_overflow_message?(message) + text = message.to_s + return false if text.empty? + + OVERFLOW_PATTERNS.any? { |pattern| pattern.match?(text) } + end + class PromptError < BaseError; end class HallucinationError < PromptError; end diff --git a/test/integration/prompt_too_long_live_test.rb b/test/integration/prompt_too_long_live_test.rb new file mode 100644 index 0000000..be8acbb --- /dev/null +++ b/test/integration/prompt_too_long_live_test.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true + +require "test_helper" +require "json" +require "time" +require "fileutils" + +class PromptTooLongLiveTest < Test + def teardown + LlmGateway.reset_configuration! + end + + def load_provider(provider:, model:) + config = { + "provider" => provider, + "model_key" => model + } + + case provider + when "openai_apikey_completions", "openai_apikey_responses" + api_key = ENV["OPENAI_API_KEY"].to_s + skip("Skipped: missing OPENAI_API_KEY") if api_key.empty? + config["api_key"] = api_key + when "anthropic_apikey_messages" + api_key = ENV["ANTHROPIC_API_KEY"].to_s + skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty? + config["api_key"] = api_key + when "anthropic_oauth_messages" + config["provider"] = "anthropic_apikey_messages" + config["api_key"] = oauth_access_token_for("anthropic") + when "openai_oauth_codex" + creds = load_auth_credentials("openai") + config["api_key"] = oauth_access_token_for("openai") + config["account_id"] = creds["account_id"] if creds["account_id"] + end + + LlmGateway.build_provider(config) + end + + def auth_file_path + File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json")) + end + + def load_auth_credentials(provider) + path = auth_file_path + skip("Skipped: missing auth file at #{path}") unless File.exist?(path) + + auth = JSON.parse(File.read(path)) + creds = auth[provider] + skip("Skipped: missing #{provider} credentials in #{path}") unless creds + + creds + end + + def persist_auth_credentials(provider, attributes) + path = auth_file_path + FileUtils.mkdir_p(File.dirname(path)) + + auth = File.exist?(path) ? JSON.parse(File.read(path)) : {} + auth[provider] ||= {} + auth[provider].merge!(attributes) + + File.write(path, JSON.pretty_generate(auth) + "\n") + end + + def oauth_access_token_for(provider) + creds = load_auth_credentials(provider) + + case provider + when "anthropic" + token = LlmGateway::Clients::Claude.new.get_oauth_access_token( + access_token: creds["access_token"], + refresh_token: creds["refresh_token"], + expires_at: creds["expires_at"] + ) do |access_token, refresh_token, expires_at| + persist_auth_credentials("anthropic", { + "access_token" => access_token, + "refresh_token" => refresh_token, + "expires_at" => expires_at&.iso8601 + }) + end + + persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"] + token + when "openai" + token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token( + access_token: creds["access_token"], + refresh_token: creds["refresh_token"], + expires_at: creds["expires_at"], + account_id: creds["account_id"] + ) do |access_token, refresh_token, expires_at| + persist_auth_credentials("openai", { + "access_token" => access_token, + "refresh_token" => refresh_token, + "expires_at" => expires_at&.iso8601 + }) + end + + persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"] + token + else + raise ArgumentError, "Unsupported OAuth provider: #{provider}" + end + end + + def huge_prompt + "Please reply with one short sentence.\n\n" + ("lorem ipsum dolor sit amet " * 240_000) + end + + def assert_prompt_too_long(adapter, name, provider) + error = assert_raises(LlmGateway::Errors::PromptTooLong) do + adapter.stream(huge_prompt) + end + + assert LlmGateway::Errors.context_overflow_message?(error.message), + "Expected prompt-length related error message for #{provider}, got: #{error.message}" + end + + def self.define_prompt_too_long_debug_test(name:, provider:, model:) + test "#{name} prompt too long debug" do + without_vcr do + adapter = load_provider(provider:, model:) + assert_prompt_too_long(adapter, name, provider) + end + end + end + + define_prompt_too_long_debug_test( + name: "openai_apikey_completions_gpt_5_1", + provider: "openai_apikey_completions", + model: "gpt-5.1" + ) + + define_prompt_too_long_debug_test( + name: "anthropic_apikey_messages_claude_sonnet_4", + provider: "anthropic_apikey_messages", + model: "claude-sonnet-4-20250514" + ) + + define_prompt_too_long_debug_test( + name: "openai_apikey_responses_gpt_5_4", + provider: "openai_apikey_responses", + model: "gpt-5.4" + ) + + define_prompt_too_long_debug_test( + name: "anthropic_oauth_messages_claude_sonnet_4", + provider: "anthropic_oauth_messages", + model: "claude-sonnet-4-20250514" + ) + + define_prompt_too_long_debug_test( + name: "openai_oauth_codex_gpt_5_4", + provider: "openai_oauth_codex", + model: "gpt-5.4" + ) +end From bb61205e91236a8050f20dc92e966c473f8b42d5 Mon Sep 17 00:00:00 2001 From: gruv Date: Mon, 6 Apr 2026 13:57:28 +0800 Subject: [PATCH 08/14] refactor: move all live tests to a shared helper --- test/integration/prompt_too_long_live_test.rb | 99 +--------------- test/integration/stream_test.rb | 107 +---------------- test/utils/live_test_helper.rb | 109 ++++++++++++++++++ 3 files changed, 115 insertions(+), 200 deletions(-) create mode 100644 test/utils/live_test_helper.rb diff --git a/test/integration/prompt_too_long_live_test.rb b/test/integration/prompt_too_long_live_test.rb index be8acbb..36a51e3 100644 --- a/test/integration/prompt_too_long_live_test.rb +++ b/test/integration/prompt_too_long_live_test.rb @@ -1,108 +1,15 @@ # frozen_string_literal: true require "test_helper" -require "json" -require "time" -require "fileutils" +require_relative "../utils/live_test_helper" class PromptTooLongLiveTest < Test + include LiveTestHelper + def teardown LlmGateway.reset_configuration! end - def load_provider(provider:, model:) - config = { - "provider" => provider, - "model_key" => model - } - - case provider - when "openai_apikey_completions", "openai_apikey_responses" - api_key = ENV["OPENAI_API_KEY"].to_s - skip("Skipped: missing OPENAI_API_KEY") if api_key.empty? - config["api_key"] = api_key - when "anthropic_apikey_messages" - api_key = ENV["ANTHROPIC_API_KEY"].to_s - skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty? - config["api_key"] = api_key - when "anthropic_oauth_messages" - config["provider"] = "anthropic_apikey_messages" - config["api_key"] = oauth_access_token_for("anthropic") - when "openai_oauth_codex" - creds = load_auth_credentials("openai") - config["api_key"] = oauth_access_token_for("openai") - config["account_id"] = creds["account_id"] if creds["account_id"] - end - - LlmGateway.build_provider(config) - end - - def auth_file_path - File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json")) - end - - def load_auth_credentials(provider) - path = auth_file_path - skip("Skipped: missing auth file at #{path}") unless File.exist?(path) - - auth = JSON.parse(File.read(path)) - creds = auth[provider] - skip("Skipped: missing #{provider} credentials in #{path}") unless creds - - creds - end - - def persist_auth_credentials(provider, attributes) - path = auth_file_path - FileUtils.mkdir_p(File.dirname(path)) - - auth = File.exist?(path) ? JSON.parse(File.read(path)) : {} - auth[provider] ||= {} - auth[provider].merge!(attributes) - - File.write(path, JSON.pretty_generate(auth) + "\n") - end - - def oauth_access_token_for(provider) - creds = load_auth_credentials(provider) - - case provider - when "anthropic" - token = LlmGateway::Clients::Claude.new.get_oauth_access_token( - access_token: creds["access_token"], - refresh_token: creds["refresh_token"], - expires_at: creds["expires_at"] - ) do |access_token, refresh_token, expires_at| - persist_auth_credentials("anthropic", { - "access_token" => access_token, - "refresh_token" => refresh_token, - "expires_at" => expires_at&.iso8601 - }) - end - - persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"] - token - when "openai" - token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token( - access_token: creds["access_token"], - refresh_token: creds["refresh_token"], - expires_at: creds["expires_at"], - account_id: creds["account_id"] - ) do |access_token, refresh_token, expires_at| - persist_auth_credentials("openai", { - "access_token" => access_token, - "refresh_token" => refresh_token, - "expires_at" => expires_at&.iso8601 - }) - end - - persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"] - token - else - raise ArgumentError, "Unsupported OAuth provider: #{provider}" - end - end - def huge_prompt "Please reply with one short sentence.\n\n" + ("lorem ipsum dolor sit amet " * 240_000) end diff --git a/test/integration/stream_test.rb b/test/integration/stream_test.rb index 13b2eb7..1ee751b 100644 --- a/test/integration/stream_test.rb +++ b/test/integration/stream_test.rb @@ -4,118 +4,17 @@ require "vcr" require "json" require "base64" -require "time" -require "fileutils" require_relative "../utils/calculator_tool_helper" +require_relative "../utils/live_test_helper" class ProvidersJsonTest < Test include CalculatorToolHelper + include LiveTestHelper + def teardown LlmGateway.reset_configuration! end - def load_provider(provider:, model:) - config = { - "provider" => provider, - "model_key" => model - } - - case provider - when "openai_apikey_completions", "openai_apikey_responses" - api_key = ENV["OPENAI_API_KEY"].to_s - skip("Skipped: missing OPENAI_API_KEY") if api_key.empty? - config["api_key"] = api_key - when "anthropic_apikey_messages" - api_key = ENV["ANTHROPIC_API_KEY"].to_s - skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty? - config["api_key"] = api_key - when "anthropic_oauth_messages" - config["provider"] = "anthropic_apikey_messages" - config["api_key"] = oauth_access_token_for("anthropic") - when "openai_oauth_codex" - creds = load_auth_credentials("openai") - config["api_key"] = oauth_access_token_for("openai") - config["account_id"] = creds["account_id"] if creds["account_id"] - end - - LlmGateway.build_provider(config) - end - - def skip_on_authentication_error - yield - rescue LlmGateway::Errors::AuthenticationError, - LlmGateway::Errors::BadRequestError, - LlmGateway::Errors::RateLimitError, - LlmGateway::Errors::APIStatusError => e - skip("Skipped due to provider error: #{e.message}") - end - - def auth_file_path - File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json")) - end - - def load_auth_credentials(provider) - path = auth_file_path - skip("Skipped: missing auth file at #{path}") unless File.exist?(path) - - auth = JSON.parse(File.read(path)) - creds = auth[provider] - skip("Skipped: missing #{provider} credentials in #{path}") unless creds - - creds - end - - def persist_auth_credentials(provider, attributes) - path = auth_file_path - FileUtils.mkdir_p(File.dirname(path)) - - auth = File.exist?(path) ? JSON.parse(File.read(path)) : {} - auth[provider] ||= {} - auth[provider].merge!(attributes) - - File.write(path, JSON.pretty_generate(auth) + "\n") - end - - def oauth_access_token_for(provider) - creds = load_auth_credentials(provider) - - case provider - when "anthropic" - token = LlmGateway::Clients::Claude.new.get_oauth_access_token( - access_token: creds["access_token"], - refresh_token: creds["refresh_token"], - expires_at: creds["expires_at"] - ) do |access_token, refresh_token, expires_at| - persist_auth_credentials("anthropic", { - "access_token" => access_token, - "refresh_token" => refresh_token, - "expires_at" => expires_at&.iso8601 - }) - end - - persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"] - token - when "openai" - token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token( - access_token: creds["access_token"], - refresh_token: creds["refresh_token"], - expires_at: creds["expires_at"], - account_id: creds["account_id"] - ) do |access_token, refresh_token, expires_at| - persist_auth_credentials("openai", { - "access_token" => access_token, - "refresh_token" => refresh_token, - "expires_at" => expires_at&.iso8601 - }) - end - - persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"] - token - else - raise ArgumentError, "Unsupported OAuth provider: #{provider}" - end - end - def assert_basic_text_generation_result(message, expected_text) assert_equal "assistant", message.role assert_operator message.usage[:input_tokens], :>, 0 diff --git a/test/utils/live_test_helper.rb b/test/utils/live_test_helper.rb new file mode 100644 index 0000000..42e1769 --- /dev/null +++ b/test/utils/live_test_helper.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +require "json" +require "time" +require "fileutils" + +module LiveTestHelper + def load_provider(provider:, model:) + config = { + "provider" => provider, + "model_key" => model + } + + case provider + when "openai_apikey_completions", "openai_apikey_responses" + api_key = ENV["OPENAI_API_KEY"].to_s + skip("Skipped: missing OPENAI_API_KEY") if api_key.empty? + config["api_key"] = api_key + when "anthropic_apikey_messages" + api_key = ENV["ANTHROPIC_API_KEY"].to_s + skip("Skipped: missing ANTHROPIC_API_KEY") if api_key.empty? + config["api_key"] = api_key + when "anthropic_oauth_messages" + config["provider"] = "anthropic_apikey_messages" + config["api_key"] = oauth_access_token_for("anthropic") + when "openai_oauth_codex" + creds = load_auth_credentials("openai") + config["api_key"] = oauth_access_token_for("openai") + config["account_id"] = creds["account_id"] if creds["account_id"] + end + + LlmGateway.build_provider(config) + end + + def skip_on_authentication_error + yield + rescue LlmGateway::Errors::AuthenticationError, + LlmGateway::Errors::BadRequestError, + LlmGateway::Errors::RateLimitError, + LlmGateway::Errors::APIStatusError => e + skip("Skipped due to provider error: #{e.message}") + end + + def auth_file_path + File.expand_path(ENV.fetch("LLM_GATEWAY_AUTH_FILE", "~/.config/llm_gateway/auth.json")) + end + + def load_auth_credentials(provider) + path = auth_file_path + skip("Skipped: missing auth file at #{path}") unless File.exist?(path) + + auth = JSON.parse(File.read(path)) + creds = auth[provider] + skip("Skipped: missing #{provider} credentials in #{path}") unless creds + + creds + end + + def persist_auth_credentials(provider, attributes) + path = auth_file_path + FileUtils.mkdir_p(File.dirname(path)) + + auth = File.exist?(path) ? JSON.parse(File.read(path)) : {} + auth[provider] ||= {} + auth[provider].merge!(attributes) + + File.write(path, JSON.pretty_generate(auth) + "\n") + end + + def oauth_access_token_for(provider) + creds = load_auth_credentials(provider) + + case provider + when "anthropic" + token = LlmGateway::Clients::Claude.new.get_oauth_access_token( + access_token: creds["access_token"], + refresh_token: creds["refresh_token"], + expires_at: creds["expires_at"] + ) do |access_token, refresh_token, expires_at| + persist_auth_credentials("anthropic", { + "access_token" => access_token, + "refresh_token" => refresh_token, + "expires_at" => expires_at&.iso8601 + }) + end + + persist_auth_credentials("anthropic", { "access_token" => token }) if token != creds["access_token"] + token + when "openai" + token = LlmGateway::Clients::OpenAi.new.get_oauth_access_token( + access_token: creds["access_token"], + refresh_token: creds["refresh_token"], + expires_at: creds["expires_at"], + account_id: creds["account_id"] + ) do |access_token, refresh_token, expires_at| + persist_auth_credentials("openai", { + "access_token" => access_token, + "refresh_token" => refresh_token, + "expires_at" => expires_at&.iso8601 + }) + end + + persist_auth_credentials("openai", { "access_token" => token }) if token != creds["access_token"] + token + else + raise ArgumentError, "Unsupported OAuth provider: #{provider}" + end + end +end From 4b9ee9ca6b479a590b3ff3e66d0d7f960ae4a180 Mon Sep 17 00:00:00 2001 From: gruv Date: Mon, 6 Apr 2026 14:46:02 +0800 Subject: [PATCH 09/14] test: fix test asserting wrong error type for prompt too long --- test/integration/clients/openai_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/clients/openai_test.rb b/test/integration/clients/openai_test.rb index 87db73a..c5930a3 100644 --- a/test/integration/clients/openai_test.rb +++ b/test/integration/clients/openai_test.rb @@ -64,7 +64,7 @@ def openai_client end test "throws rate limit error" do - error = assert_raises(LlmGateway::Errors::RateLimitError) do + error = assert_raises(LlmGateway::Errors::PromptTooLong) do VCR.use_cassette(vcr_cassette_name) do openai_client.chat([ { 'role': "user", 'content': "aqklcsa," * 100_000 } ], **mapped_chat_options(max_completion_tokens: 4096)) end From b767fe73949de1fea5b86eee7968670151a8cb67 Mon Sep 17 00:00:00 2001 From: gruv Date: Mon, 6 Apr 2026 14:49:20 +0800 Subject: [PATCH 10/14] feat: support prompt caching, with cache_retention cache_key options for anthropic it will automatically add the cache tags, i have not tested what happens when the user already set it but it will probably override it --- lib/llm_gateway.rb | 1 + .../adapters/anthropic_option_mapper.rb | 6 +- .../open_ai/chat_completions/option_mapper.rb | 5 + .../open_ai/prompt_cache_option_mapper.rb | 39 +++++ .../open_ai/responses/option_mapper.rb | 5 + .../adapters/openai_codex/option_mapper.rb | 5 + lib/llm_gateway/clients/claude.rb | 68 +++++++- lib/llm_gateway/clients/open_ai.rb | 5 +- test/integration/cache_live_test.rb | 153 ++++++++++++++++++ 9 files changed, 282 insertions(+), 5 deletions(-) create mode 100644 lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb create mode 100644 test/integration/cache_live_test.rb diff --git a/lib/llm_gateway.rb b/lib/llm_gateway.rb index 0a24d00..0c85e0d 100644 --- a/lib/llm_gateway.rb +++ b/lib/llm_gateway.rb @@ -24,6 +24,7 @@ require_relative "llm_gateway/adapters/claude/input_mapper" require_relative "llm_gateway/adapters/claude/output_mapper" require_relative "llm_gateway/adapters/open_ai/file_output_mapper" +require_relative "llm_gateway/adapters/open_ai/prompt_cache_option_mapper" require_relative "llm_gateway/adapters/open_ai/chat_completions/input_mapper" require_relative "llm_gateway/adapters/open_ai/chat_completions/output_mapper" require_relative "llm_gateway/adapters/open_ai/chat_completions/option_mapper" diff --git a/lib/llm_gateway/adapters/anthropic_option_mapper.rb b/lib/llm_gateway/adapters/anthropic_option_mapper.rb index fa79262..a5e5744 100644 --- a/lib/llm_gateway/adapters/anthropic_option_mapper.rb +++ b/lib/llm_gateway/adapters/anthropic_option_mapper.rb @@ -14,8 +14,12 @@ module AnthropicOptionMapper module_function def map(options) - mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens].include?(key) } + mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens prompt_cache_retention cache_key prompt_cache_key].include?(key) } mapped_options[:max_tokens] = options[:max_completion_tokens] || 20480 + + retention = options[:cache_retention] + mapped_options[:cache_retention] = retention unless retention.nil? + reasoning = options[:reasoning] return mapped_options if reasoning.nil? || reasoning.to_s == "none" diff --git a/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb b/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb index cfbc810..4d5c430 100644 --- a/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb +++ b/lib/llm_gateway/adapters/open_ai/chat_completions/option_mapper.rb @@ -5,6 +5,8 @@ module Adapters module OpenAi module ChatCompletions module OptionMapper + include LlmGateway::Adapters::OpenAi::PromptCacheOptionMapper + VALID_REASONING_LEVELS = %w[low medium high xhigh].freeze module_function @@ -13,6 +15,9 @@ def map(options) mapped_options = options.dup mapped_options[:max_completion_tokens] ||= 20_480 + map_cache_key!(mapped_options) + map_prompt_cache_retention!(mapped_options) + return mapped_options unless mapped_options.key?(:reasoning) reasoning = mapped_options.delete(:reasoning) diff --git a/lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb b/lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb new file mode 100644 index 0000000..99ef3e7 --- /dev/null +++ b/lib/llm_gateway/adapters/open_ai/prompt_cache_option_mapper.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module LlmGateway + module Adapters + module OpenAi + module PromptCacheOptionMapper + def self.included(base) + base.extend(self) + end + + def map_cache_key!(mapped_options) + cache_key = mapped_options.delete(:cache_key) + mapped_options.delete(:prompt_cache_key) + mapped_options[:prompt_cache_key] = cache_key unless cache_key.nil? + end + + def map_prompt_cache_retention!(mapped_options) + retention = mapped_options.delete(:cache_retention) + mapped_options.delete(:prompt_cache_retention) + retention ||= "short" if mapped_options.key?(:prompt_cache_key) + + case retention&.to_s + when nil + nil + when "short" + mapped_options[:prompt_cache_retention] = "in_memory" + when "long" + mapped_options[:prompt_cache_retention] = "24h" + when "none" + mapped_options.delete(:prompt_cache_key) + else + raise ArgumentError, + "Invalid cache_retention '#{retention}'. Use 'short', 'long', or 'none'." + end + end + end + end + end +end diff --git a/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb b/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb index 7ce374d..70f0bd4 100644 --- a/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb +++ b/lib/llm_gateway/adapters/open_ai/responses/option_mapper.rb @@ -5,6 +5,8 @@ module Adapters module OpenAi module Responses module OptionMapper + include LlmGateway::Adapters::OpenAi::PromptCacheOptionMapper + VALID_REASONING_LEVELS = %w[low medium high xhigh].freeze module_function @@ -15,6 +17,9 @@ def map(options) max_completion_tokens = mapped_options.delete(:max_completion_tokens) mapped_options[:max_output_tokens] = max_completion_tokens || mapped_options[:max_output_tokens] || 20_480 + map_cache_key!(mapped_options) + map_prompt_cache_retention!(mapped_options) + return mapped_options unless mapped_options.key?(:reasoning) reasoning = mapped_options.delete(:reasoning) diff --git a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb index 1d9e149..7f13645 100644 --- a/lib/llm_gateway/adapters/openai_codex/option_mapper.rb +++ b/lib/llm_gateway/adapters/openai_codex/option_mapper.rb @@ -15,6 +15,11 @@ def map(options) mapped_options.delete(:max_output_tokens) mapped_options.delete(:max_completion_tokens) + # Codex transport does not use retention flags in the request body. + mapped_options.delete(:prompt_cache_retention) + mapped_options.delete(:cacheRetention) + mapped_options.delete(:cache_retention) + mapped_options end end diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb index 33192f3..de718b0 100644 --- a/lib/llm_gateway/clients/claude.rb +++ b/lib/llm_gateway/clients/claude.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "uri" require_relative "../base_client" module LlmGateway @@ -41,21 +42,83 @@ def upload_file(filename, content, mime_type = "application/octet-stream") private - def build_body(messages, tools: nil, system: [], **options) + def build_body(messages, tools: nil, system: [], cache_retention: nil, **options) body = { model: model_key, - messages: messages + messages: apply_message_cache_control(messages, cache_retention) } body.merge!(tools: tools) if LlmGateway::Utils.present?(tools) system = prepend_claude_code_identity(system) if claude_code_oauth_api_key? + system = apply_system_cache_control(system, cache_retention) body.merge!(system: system) if LlmGateway::Utils.present?(system) body.merge!(options) body end + def apply_system_cache_control(system, cache_retention) + return system if system.nil? || system.empty? || !system.is_a?(Array) + + cache_control = anthropic_cache_control_for(cache_retention) + return system if cache_control.nil? + + last_index = system.length - 1 + system.each_with_index.map do |block, index| + block = block.dup + if index == last_index + block[:cache_control] = cache_control + else + block.delete(:cache_control) + end + block + end + end + + def apply_message_cache_control(messages, cache_retention) + return messages if messages.nil? || messages.empty? || !messages.is_a?(Array) + + cache_control = anthropic_cache_control_for(cache_retention) + return messages if cache_control.nil? + + mapped_messages = messages.map(&:dup) + last_user_index = mapped_messages.rindex { |message| message[:role] == "user" } + return mapped_messages unless last_user_index + + last_user_message = mapped_messages[last_user_index] + original_blocks = Array(last_user_message[:content]) + tagged_indices = [(original_blocks.length - 2), (original_blocks.length - 1)].select { |i| i >= 0 } + + content_blocks = original_blocks.each_with_index.map do |block, index| + block = block.is_a?(Hash) ? block.dup : { type: "text", text: block.to_s } + if tagged_indices.include?(index) + block[:cache_control] = cache_control + else + block.delete(:cache_control) + end + block + end + + mapped_messages[last_user_index] = last_user_message.merge(content: content_blocks) + mapped_messages + end + + def anthropic_cache_control_for(cache_retention) + return nil if cache_retention.nil? + + retention = cache_retention.to_s + return nil if retention == "none" + + cache_control = { type: "ephemeral" } + cache_control = cache_control.merge(ttl: "1h") if retention == "long" && anthropic_official_api? + cache_control + end + + def anthropic_official_api? + URI(base_endpoint).host == "api.anthropic.com" + end + def build_headers return claude_code_oauth_headers if claude_code_oauth_api_key? @@ -101,7 +164,6 @@ def handle_client_specific_errors(response, error) raise Errors::PromptTooLong.new(error["message"], error["type"]) end - # If we get here, we didn't handle it specifically raise Errors::APIStatusError.new(error["message"], error["type"]) end end diff --git a/lib/llm_gateway/clients/open_ai.rb b/lib/llm_gateway/clients/open_ai.rb index 71a0014..742a393 100644 --- a/lib/llm_gateway/clients/open_ai.rb +++ b/lib/llm_gateway/clients/open_ai.rb @@ -129,7 +129,7 @@ def build_codex_body(messages, system, tools, **options) body end - def codex_headers(account_id: nil) + def codex_headers(account_id: nil, **options) effective_account_id = account_id || @account_id headers = { @@ -152,6 +152,9 @@ def post_codex_stream(url_part, body = nil, account_id: nil, &block) body.merge!(stream: true) request = Net::HTTP::Post.new(uri) codex_headers(account_id: account_id).each { |key, value| request[key] = value } + prompt_cache_key = body.delete(:prompt_cache_key) + request[:session_id] = prompt_cache_key if prompt_cache_key + request.body = body.to_json if body http.request(request) do |response| diff --git a/test/integration/cache_live_test.rb b/test/integration/cache_live_test.rb new file mode 100644 index 0000000..200c899 --- /dev/null +++ b/test/integration/cache_live_test.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require "test_helper" +require "net/http" +require "uri" +require_relative "../utils/live_test_helper" + +class CacheLiveTest < Test + include LiveTestHelper + + DOCUMENT_URL = "https://gist.githubusercontent.com/billybonks/f343b02cc67535475b8819d281763c21/raw/c55972e604ecc9b5b998ed44d9e9575cebaf2fc8/responses.md" + + def teardown + LlmGateway.reset_configuration! + end + + def fetch_document + uri = URI(DOCUMENT_URL) + response = Net::HTTP.get_response(uri) + + unless response.is_a?(Net::HTTPSuccess) + raise "Failed to fetch document from #{DOCUMENT_URL}: HTTP #{response.code}" + end + + response.body.encode("UTF-8", invalid: :replace, undef: :replace) + end + + def run_two_turn_cache_probe(adapter, options: {}) + document = fetch_document + first_prompt = <<~PROMPT + Read the following markdown document and remember it for the next question. + + --- + #{document} + --- + + Reply with exactly: loaded + PROMPT + + first_response = adapter.stream(first_prompt, **options) + + assert_equal "assistant", first_response.role + assert_nil first_response.error_message + + second_transcript = [ + { role: "user", content: first_prompt }, + first_response.to_h, + { role: "user", content: "What is this file documenting? Reply in one sentence." } + ] + + second_response = adapter.stream(second_transcript, **options) + + assert_equal "assistant", second_response.role + assert_nil second_response.error_message + second_response + end + + def assert_cache_hit_on_second_turn(adapter, options: {}) + second_response = run_two_turn_cache_probe(adapter, options: options) + + assert_operator second_response.usage[:cache_read_input_tokens], :>, 0, + "Expected cache_read_input_tokens > 0 with options #{options.inspect}, got #{second_response.usage.inspect}" + end + + def assert_no_cache_hit_on_second_turn(adapter, options: {}) + second_response = run_two_turn_cache_probe(adapter, options: options) + + assert_equal 0, second_response.usage[:cache_read_input_tokens].to_i, + "Expected cache_read_input_tokens to be 0 with options #{options.inspect}, got #{second_response.usage.inspect}" + end + + def self.define_cache_tests_for(name:, provider:, model:, options: {}) + test "#{name} cache read tokens on second turn" do + skip_on_authentication_error do + without_vcr do + adapter = load_provider(provider:, model:) + if provider.start_with?("anthropic") && options[:cache_retention].to_s == "none" + assert_no_cache_hit_on_second_turn(adapter, options: options) + else + assert_cache_hit_on_second_turn(adapter, options: options) + end + end + end + end + end + + define_cache_tests_for( + name: "openai_apikey_completions", + provider: "openai_apikey_completions", + model: "gpt-5.1", + options: { + cache_key: "openai_apikey_completions", + cache_retention: "short" + } + ) + + define_cache_tests_for( + name: "openai_apikey_completions_none", + provider: "openai_apikey_completions", + model: "gpt-5.1", + options: { + cache_key: "openai_apikey_completions_none", + cache_retention: "none" + } + ) + + define_cache_tests_for( + name: "openai_apikey_responses", + provider: "openai_apikey_responses", + model: "gpt-5.4", + options: { + cache_key: "openai_apikey_responses", + cache_retention: "short" + } + ) + + define_cache_tests_for( + name: "openai_apikey_responses_none", + provider: "openai_apikey_responses", + model: "gpt-5.4", + options: { + cache_key: "openai_apikey_responses_none", + cache_retention: "none" + } + ) + + define_cache_tests_for( + name: "openai_oauth_codex", + provider: "openai_oauth_codex", + model: "gpt-5.4", + options: { + cache_key: "openai_oauth_codex" + } + ) + + define_cache_tests_for( + name: "anthropic_apikey_messages", + provider: "anthropic_apikey_messages", + model: "claude-sonnet-4-20250514", + options: { + cache_retention: "short" + } + ) + + define_cache_tests_for( + name: "anthropic_apikey_messages_none", + provider: "anthropic_apikey_messages", + model: "claude-sonnet-4-20250514", + options: { + cache_retention: "none" + } + ) +end From 1135629be615c49584e9bf3bda469f0b0091bfea Mon Sep 17 00:00:00 2001 From: gruv Date: Mon, 6 Apr 2026 16:10:32 +0800 Subject: [PATCH 11/14] test: add tests for all option mappers just makes it easier to see what are the behaviours this test is not extensive for all options that could be passed --- .../options/anthropic_option_mapper_test.rb | 51 +++++++++++++ .../unit/options/claude_cache_control_test.rb | 73 ++++++++++++++++++ test/unit/options/groq_option_mapper_test.rb | 31 ++++++++ ...nai_chat_completions_option_mapper_test.rb | 74 +++++++++++++++++++ .../openai_codex_option_mapper_test.rb | 30 ++++++++ .../openai_responses_option_mapper_test.rb | 62 ++++++++++++++++ 6 files changed, 321 insertions(+) create mode 100644 test/unit/options/anthropic_option_mapper_test.rb create mode 100644 test/unit/options/claude_cache_control_test.rb create mode 100644 test/unit/options/groq_option_mapper_test.rb create mode 100644 test/unit/options/openai_chat_completions_option_mapper_test.rb create mode 100644 test/unit/options/openai_codex_option_mapper_test.rb create mode 100644 test/unit/options/openai_responses_option_mapper_test.rb diff --git a/test/unit/options/anthropic_option_mapper_test.rb b/test/unit/options/anthropic_option_mapper_test.rb new file mode 100644 index 0000000..7172016 --- /dev/null +++ b/test/unit/options/anthropic_option_mapper_test.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require "test_helper" + +class AnthropicOptionMapperTest < Test + test "maps max_completion_tokens to max_tokens" do + mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(max_completion_tokens: 321) + + assert_equal 321, mapped[:max_tokens] + refute mapped.key?(:max_completion_tokens) + end + + test "sets default max_tokens" do + mapped = LlmGateway::Adapters::AnthropicOptionMapper.map({}) + + assert_equal 20_480, mapped[:max_tokens] + end + + test "forwards cache_retention as is" do + mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(cache_retention: "long") + + assert_equal "long", mapped[:cache_retention] + refute mapped.key?(:prompt_cache_retention) + end + + test "forwards none cache_retention" do + mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(cache_retention: "none") + + assert_equal "none", mapped[:cache_retention] + end + + test "maps reasoning to thinking with budget tokens" do + mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "high") + + assert_equal({ type: "enabled", budget_tokens: 10_240 }, mapped[:thinking]) + refute mapped.key?(:reasoning) + end + + test "none reasoning is removed" do + mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "none") + + refute mapped.key?(:thinking) + refute mapped.key?(:reasoning) + end + + test "raises for invalid reasoning" do + assert_raises(ArgumentError) do + LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "extreme") + end + end +end diff --git a/test/unit/options/claude_cache_control_test.rb b/test/unit/options/claude_cache_control_test.rb new file mode 100644 index 0000000..507d011 --- /dev/null +++ b/test/unit/options/claude_cache_control_test.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require "test_helper" + +class ClaudeCacheControlTest < Test + test "when cache retention is passed it adds cache_control to last system block and up to two blocks in last user message" do + client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test") + + body = client.send( + :build_body, + [ + { role: "user", content: [ { type: "text", text: "hello" }, { type: "text", text: "world" } ] }, + { role: "assistant", content: [ { type: "text", text: "ok" } ] }, + { role: "user", content: [ { type: "text", text: "final" }, { type: "text", text: "question" } ] } + ], + system: [ + { type: "text", text: "system 1" }, + { type: "text", text: "system 2" } + ], + tools: [ + { name: "tool_1", description: "Tool 1", input_schema: { type: "object", properties: {} } }, + { name: "tool_2", description: "Tool 2", input_schema: { type: "object", properties: {} } } + ], + cache_retention: "short" + ) + + expected_cache_control = { type: "ephemeral" } + + system_cache_control_count = body[:system].count { |block| block[:cache_control] == expected_cache_control } + tools_cache_control_count = body[:tools].count { |tool| tool[:cache_control] == expected_cache_control } + + assert_nil body[:system][0][:cache_control] + assert_equal expected_cache_control, body[:system][1][:cache_control] + + last_user_message = body[:messages].reverse.find { |message| message[:role] == "user" } + last_user_cache_control_count = Array(last_user_message[:content]).count do |block| + block[:cache_control] == expected_cache_control + end + + assert_equal 1, system_cache_control_count + assert_equal 0, tools_cache_control_count + assert_equal 2, last_user_cache_control_count + assert_operator(system_cache_control_count + tools_cache_control_count + last_user_cache_control_count, :<=, 3) + end + + test "uses ttl for long retention on official anthropic base url" do + client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test") + + body = client.send( + :build_body, + [ { role: "user", content: [ { type: "text", text: "hello" } ] } ], + system: [ { type: "text", text: "system" } ], + cache_retention: "long" + ) + + assert_equal({ type: "ephemeral", ttl: "1h" }, body[:system][0][:cache_control]) + assert_equal({ type: "ephemeral", ttl: "1h" }, body[:messages][0][:content][0][:cache_control]) + end + + test "does not mutate existing cache control when retention is none" do + client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test") + + body = client.send( + :build_body, + [ { role: "user", content: [ { type: "text", text: "hello", cache_control: { type: "ephemeral" } } ] } ], + system: [ { type: "text", text: "system", cache_control: { type: "ephemeral" } } ], + cache_retention: "none" + ) + + assert_equal({ type: "ephemeral" }, body[:system][0][:cache_control]) + assert_equal({ type: "ephemeral" }, body[:messages][0][:content][0][:cache_control]) + end +end diff --git a/test/unit/options/groq_option_mapper_test.rb b/test/unit/options/groq_option_mapper_test.rb new file mode 100644 index 0000000..45be414 --- /dev/null +++ b/test/unit/options/groq_option_mapper_test.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +require "test_helper" + +class GroqOptionMapperTest < Test + test "sets defaults for temperature max_completion_tokens and response_format" do + mapped = LlmGateway::Adapters::Groq::OptionMapper.map({}) + + assert_equal 0, mapped[:temperature] + assert_equal 20_480, mapped[:max_completion_tokens] + assert_equal({ type: "text" }, mapped[:response_format]) + end + + test "preserves explicit values" do + mapped = LlmGateway::Adapters::Groq::OptionMapper.map( + temperature: 0.3, + max_completion_tokens: 123, + response_format: { type: "json_object" } + ) + + assert_equal 0.3, mapped[:temperature] + assert_equal 123, mapped[:max_completion_tokens] + assert_equal({ type: "json_object" }, mapped[:response_format]) + end + + test "normalizes string response_format" do + mapped = LlmGateway::Adapters::Groq::OptionMapper.map(response_format: "json_object") + + assert_equal({ type: "json_object" }, mapped[:response_format]) + end +end diff --git a/test/unit/options/openai_chat_completions_option_mapper_test.rb b/test/unit/options/openai_chat_completions_option_mapper_test.rb new file mode 100644 index 0000000..f5a68bc --- /dev/null +++ b/test/unit/options/openai_chat_completions_option_mapper_test.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require "test_helper" + +class OpenAiChatCompletionsOptionMapperTest < Test + test "sets default max_completion_tokens" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map({}) + + assert_equal 20_480, mapped[:max_completion_tokens] + end + + test "maps cache_key and short retention" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map( + cache_key: "abc", + cache_retention: "short" + ) + + assert_equal "abc", mapped[:prompt_cache_key] + assert_equal "in_memory", mapped[:prompt_cache_retention] + end + + test "maps long retention" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map( + cache_key: "abc", + cache_retention: "long" + ) + + assert_equal "abc", mapped[:prompt_cache_key] + assert_equal "24h", mapped[:prompt_cache_retention] + end + + test "none retention removes prompt cache key" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map( + cache_key: "abc", + cache_retention: "none" + ) + + refute mapped.key?(:prompt_cache_key) + refute mapped.key?(:prompt_cache_retention) + end + + test "defaults retention to short when cache_key is present" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(cache_key: "abc") + + assert_equal "abc", mapped[:prompt_cache_key] + assert_equal "in_memory", mapped[:prompt_cache_retention] + end + + test "maps reasoning to reasoning_effort" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(reasoning: "high") + + assert_equal "high", mapped[:reasoning_effort] + refute mapped.key?(:reasoning) + end + + test "none reasoning is removed" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(reasoning: "none") + + refute mapped.key?(:reasoning) + refute mapped.key?(:reasoning_effort) + end + + test "raises for invalid reasoning" do + assert_raises(ArgumentError) do + LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(reasoning: "extreme") + end + end + + test "raises for invalid cache retention" do + assert_raises(ArgumentError) do + LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(cache_retention: "week") + end + end +end diff --git a/test/unit/options/openai_codex_option_mapper_test.rb b/test/unit/options/openai_codex_option_mapper_test.rb new file mode 100644 index 0000000..393ba79 --- /dev/null +++ b/test/unit/options/openai_codex_option_mapper_test.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require "test_helper" + +class OpenAiCodexOptionMapperTest < Test + test "keeps prompt_cache_key but removes retention fields" do + mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map( + cache_key: "abc", + cache_retention: "long" + ) + + assert_equal "abc", mapped[:prompt_cache_key] + refute mapped.key?(:prompt_cache_retention) + refute mapped.key?(:cacheRetention) + refute mapped.key?(:cache_retention) + end + + test "removes token limit options" do + mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map(max_completion_tokens: 999) + + refute mapped.key?(:max_output_tokens) + refute mapped.key?(:max_completion_tokens) + end + + test "inherits reasoning mapping from openai responses" do + mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map(reasoning: "low") + + assert_equal({ effort: "low", summary: "detailed" }, mapped[:reasoning]) + end +end diff --git a/test/unit/options/openai_responses_option_mapper_test.rb b/test/unit/options/openai_responses_option_mapper_test.rb new file mode 100644 index 0000000..06b3052 --- /dev/null +++ b/test/unit/options/openai_responses_option_mapper_test.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require "test_helper" + +class OpenAiResponsesOptionMapperTest < Test + test "maps max_completion_tokens to max_output_tokens" do + mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(max_completion_tokens: 777) + + assert_equal 777, mapped[:max_output_tokens] + refute mapped.key?(:max_completion_tokens) + end + + test "sets default max_output_tokens" do + mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map({}) + + assert_equal 20_480, mapped[:max_output_tokens] + end + + test "maps cache_key and short retention" do + mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map( + cache_key: "abc", + cache_retention: "short" + ) + + assert_equal "abc", mapped[:prompt_cache_key] + assert_equal "in_memory", mapped[:prompt_cache_retention] + end + + test "none retention removes prompt cache key" do + mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map( + cache_key: "abc", + cache_retention: "none" + ) + + refute mapped.key?(:prompt_cache_key) + refute mapped.key?(:prompt_cache_retention) + end + + test "maps reasoning to reasoning hash" do + mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(reasoning: "medium") + + assert_equal({ effort: "medium", summary: "detailed" }, mapped[:reasoning]) + end + + test "none reasoning is removed" do + mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(reasoning: "none") + + refute mapped.key?(:reasoning) + end + + test "raises for invalid reasoning" do + assert_raises(ArgumentError) do + LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(reasoning: "extreme") + end + end + + test "raises for invalid cache retention" do + assert_raises(ArgumentError) do + LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(cache_retention: "week") + end + end +end From 3a1b1bfa0cf322586133561fd55153d5d3c3120d Mon Sep 17 00:00:00 2001 From: gruv Date: Mon, 6 Apr 2026 16:42:22 +0800 Subject: [PATCH 12/14] fix: bug in response format mapping anthropic also update all te test to test super set of all options we know of --- .../adapters/anthropic_option_mapper.rb | 18 ++++++++++++++++-- .../options/anthropic_option_mapper_test.rb | 16 ++++++++++++++++ test/unit/options/groq_option_mapper_test.rb | 17 +++++++++++++++++ ...enai_chat_completions_option_mapper_test.rb | 17 +++++++++++++++++ .../options/openai_codex_option_mapper_test.rb | 15 +++++++++++++++ .../openai_responses_option_mapper_test.rb | 17 +++++++++++++++++ test/unit/options/option_mapper_fixture.rb | 16 ++++++++++++++++ 7 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 test/unit/options/option_mapper_fixture.rb diff --git a/lib/llm_gateway/adapters/anthropic_option_mapper.rb b/lib/llm_gateway/adapters/anthropic_option_mapper.rb index a5e5744..1fdaaa4 100644 --- a/lib/llm_gateway/adapters/anthropic_option_mapper.rb +++ b/lib/llm_gateway/adapters/anthropic_option_mapper.rb @@ -14,12 +14,15 @@ module AnthropicOptionMapper module_function def map(options) - mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens prompt_cache_retention cache_key prompt_cache_key].include?(key) } - mapped_options[:max_tokens] = options[:max_completion_tokens] || 20480 + mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens response_format prompt_cache_retention cache_key prompt_cache_key].include?(key) } + mapped_options[:max_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_TOKENS retention = options[:cache_retention] mapped_options[:cache_retention] = retention unless retention.nil? + response_format = options[:response_format] + mapped_options[:output_config] = normalize_output_config(response_format) unless response_format.nil? + reasoning = options[:reasoning] return mapped_options if reasoning.nil? || reasoning.to_s == "none" @@ -27,6 +30,17 @@ def map(options) mapped_options end + def normalize_output_config(response_format) + format_type = response_format.is_a?(Hash) ? response_format[:type] || response_format["type"] : response_format + + case format_type.to_s + when "json_object", "json_schema" + { format: "json_schema" } + else + { format: "text" } + end + end + def normalize_reasoning(reasoning) budget_tokens = REASONING_EFFORT_BUDGET_TOKENS[reasoning.to_s] || raise(ArgumentError, diff --git a/test/unit/options/anthropic_option_mapper_test.rb b/test/unit/options/anthropic_option_mapper_test.rb index 7172016..dd5ba33 100644 --- a/test/unit/options/anthropic_option_mapper_test.rb +++ b/test/unit/options/anthropic_option_mapper_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "test_helper" +require_relative "option_mapper_fixture" class AnthropicOptionMapperTest < Test test "maps max_completion_tokens to max_tokens" do @@ -48,4 +49,19 @@ class AnthropicOptionMapperTest < Test LlmGateway::Adapters::AnthropicOptionMapper.map(reasoning: "extreme") end end + + test "maps all supported options into final output" do + mapped = LlmGateway::Adapters::AnthropicOptionMapper.map(OptionMapperFixture.superset_options) + + assert_equal( + { + max_tokens: 1234, + cache_retention: "long", + thinking: { type: "enabled", budget_tokens: 10 * 1024 }, + temperature: 0.2, + output_config: { format: "json_schema" } + }, + mapped + ) + end end diff --git a/test/unit/options/groq_option_mapper_test.rb b/test/unit/options/groq_option_mapper_test.rb index 45be414..8e0df03 100644 --- a/test/unit/options/groq_option_mapper_test.rb +++ b/test/unit/options/groq_option_mapper_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "test_helper" +require_relative "option_mapper_fixture" class GroqOptionMapperTest < Test test "sets defaults for temperature max_completion_tokens and response_format" do @@ -28,4 +29,20 @@ class GroqOptionMapperTest < Test assert_equal({ type: "json_object" }, mapped[:response_format]) end + + test "maps all supported options into final output" do + mapped = LlmGateway::Adapters::Groq::OptionMapper.map(OptionMapperFixture.superset_options) + + assert_equal( + { + max_completion_tokens: 1234, + cache_key: "abc", + cache_retention: "long", + reasoning: "high", + temperature: 0.2, + response_format: { type: "json_object" } + }, + mapped + ) + end end diff --git a/test/unit/options/openai_chat_completions_option_mapper_test.rb b/test/unit/options/openai_chat_completions_option_mapper_test.rb index f5a68bc..41bd198 100644 --- a/test/unit/options/openai_chat_completions_option_mapper_test.rb +++ b/test/unit/options/openai_chat_completions_option_mapper_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "test_helper" +require_relative "option_mapper_fixture" class OpenAiChatCompletionsOptionMapperTest < Test test "sets default max_completion_tokens" do @@ -71,4 +72,20 @@ class OpenAiChatCompletionsOptionMapperTest < Test LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(cache_retention: "week") end end + + test "maps all supported options into final output" do + mapped = LlmGateway::Adapters::OpenAi::ChatCompletions::OptionMapper.map(OptionMapperFixture.superset_options) + + assert_equal( + { + max_completion_tokens: 1234, + prompt_cache_key: "abc", + prompt_cache_retention: "24h", + reasoning_effort: "high", + temperature: 0.2, + response_format: "json_object" + }, + mapped + ) + end end diff --git a/test/unit/options/openai_codex_option_mapper_test.rb b/test/unit/options/openai_codex_option_mapper_test.rb index 393ba79..4bb87ac 100644 --- a/test/unit/options/openai_codex_option_mapper_test.rb +++ b/test/unit/options/openai_codex_option_mapper_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "test_helper" +require_relative "option_mapper_fixture" class OpenAiCodexOptionMapperTest < Test test "keeps prompt_cache_key but removes retention fields" do @@ -27,4 +28,18 @@ class OpenAiCodexOptionMapperTest < Test assert_equal({ effort: "low", summary: "detailed" }, mapped[:reasoning]) end + + test "maps all supported options into final output" do + mapped = LlmGateway::Adapters::OpenAiCodex::OptionMapper.map(OptionMapperFixture.superset_options) + + assert_equal( + { + prompt_cache_key: "abc", + reasoning: { effort: "high", summary: "detailed" }, + temperature: 0.2, + response_format: "json_object" + }, + mapped + ) + end end diff --git a/test/unit/options/openai_responses_option_mapper_test.rb b/test/unit/options/openai_responses_option_mapper_test.rb index 06b3052..3e8d2e1 100644 --- a/test/unit/options/openai_responses_option_mapper_test.rb +++ b/test/unit/options/openai_responses_option_mapper_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "test_helper" +require_relative "option_mapper_fixture" class OpenAiResponsesOptionMapperTest < Test test "maps max_completion_tokens to max_output_tokens" do @@ -59,4 +60,20 @@ class OpenAiResponsesOptionMapperTest < Test LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(cache_retention: "week") end end + + test "maps all supported options into final output" do + mapped = LlmGateway::Adapters::OpenAi::Responses::OptionMapper.map(OptionMapperFixture.superset_options) + + assert_equal( + { + max_output_tokens: 1234, + prompt_cache_key: "abc", + prompt_cache_retention: "24h", + reasoning: { effort: "high", summary: "detailed" }, + temperature: 0.2, + response_format: "json_object" + }, + mapped + ) + end end diff --git a/test/unit/options/option_mapper_fixture.rb b/test/unit/options/option_mapper_fixture.rb new file mode 100644 index 0000000..fb08136 --- /dev/null +++ b/test/unit/options/option_mapper_fixture.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module OptionMapperFixture + module_function + + def superset_options + { + max_completion_tokens: 1234, + cache_key: "abc", + cache_retention: "long", + reasoning: "high", + temperature: 0.2, + response_format: "json_object" + } + end +end From 03c43364a1659970946c602c68c6c0c54e4afdef Mon Sep 17 00:00:00 2001 From: gruv Date: Mon, 6 Apr 2026 16:51:34 +0800 Subject: [PATCH 13/14] fixup! feat: support prompt caching, with cache_retention cache_key options --- lib/llm_gateway/clients/claude.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb index de718b0..ce9e00d 100644 --- a/lib/llm_gateway/clients/claude.rb +++ b/lib/llm_gateway/clients/claude.rb @@ -88,7 +88,7 @@ def apply_message_cache_control(messages, cache_retention) last_user_message = mapped_messages[last_user_index] original_blocks = Array(last_user_message[:content]) - tagged_indices = [(original_blocks.length - 2), (original_blocks.length - 1)].select { |i| i >= 0 } + tagged_indices = [ (original_blocks.length - 2), (original_blocks.length - 1) ].select { |i| i >= 0 } content_blocks = original_blocks.each_with_index.map do |block, index| block = block.is_a?(Hash) ? block.dup : { type: "text", text: block.to_s } From 479a93a139a0ebd9f9548e6ff8a1aa644a95506e Mon Sep 17 00:00:00 2001 From: gruv Date: Mon, 6 Apr 2026 16:56:57 +0800 Subject: [PATCH 14/14] refactor: claude supports automatic caching with cache-control option this adds a cache at the last message, so we dont have to do it ourselves --- lib/llm_gateway/clients/claude.rb | 36 ++++++++----------- .../unit/options/claude_cache_control_test.rb | 21 ++++++----- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/lib/llm_gateway/clients/claude.rb b/lib/llm_gateway/clients/claude.rb index ce9e00d..ea41c2d 100644 --- a/lib/llm_gateway/clients/claude.rb +++ b/lib/llm_gateway/clients/claude.rb @@ -43,17 +43,21 @@ def upload_file(filename, content, mime_type = "application/octet-stream") private def build_body(messages, tools: nil, system: [], cache_retention: nil, **options) + cache_control = anthropic_cache_control_for(cache_retention) + body = { model: model_key, - messages: apply_message_cache_control(messages, cache_retention) + messages: messages } + tools = apply_tools_cache_control(tools, cache_retention) body.merge!(tools: tools) if LlmGateway::Utils.present?(tools) system = prepend_claude_code_identity(system) if claude_code_oauth_api_key? system = apply_system_cache_control(system, cache_retention) body.merge!(system: system) if LlmGateway::Utils.present?(system) + body.merge!(cache_control: cache_control) unless cache_control.nil? body.merge!(options) body end @@ -76,32 +80,22 @@ def apply_system_cache_control(system, cache_retention) end end - def apply_message_cache_control(messages, cache_retention) - return messages if messages.nil? || messages.empty? || !messages.is_a?(Array) + def apply_tools_cache_control(tools, cache_retention) + return tools if tools.nil? || tools.empty? || !tools.is_a?(Array) cache_control = anthropic_cache_control_for(cache_retention) - return messages if cache_control.nil? - - mapped_messages = messages.map(&:dup) - last_user_index = mapped_messages.rindex { |message| message[:role] == "user" } - return mapped_messages unless last_user_index + return tools if cache_control.nil? - last_user_message = mapped_messages[last_user_index] - original_blocks = Array(last_user_message[:content]) - tagged_indices = [ (original_blocks.length - 2), (original_blocks.length - 1) ].select { |i| i >= 0 } - - content_blocks = original_blocks.each_with_index.map do |block, index| - block = block.is_a?(Hash) ? block.dup : { type: "text", text: block.to_s } - if tagged_indices.include?(index) - block[:cache_control] = cache_control + last_index = tools.length - 1 + tools.each_with_index.map do |tool, index| + tool = tool.dup + if index == last_index + tool[:cache_control] = cache_control else - block.delete(:cache_control) + tool.delete(:cache_control) end - block + tool end - - mapped_messages[last_user_index] = last_user_message.merge(content: content_blocks) - mapped_messages end def anthropic_cache_control_for(cache_retention) diff --git a/test/unit/options/claude_cache_control_test.rb b/test/unit/options/claude_cache_control_test.rb index 507d011..9a6c2be 100644 --- a/test/unit/options/claude_cache_control_test.rb +++ b/test/unit/options/claude_cache_control_test.rb @@ -3,7 +3,7 @@ require "test_helper" class ClaudeCacheControlTest < Test - test "when cache retention is passed it adds cache_control to last system block and up to two blocks in last user message" do + test "when cache retention is passed it adds cache_control to last system and tool blocks and sets top-level cache_control" do client = LlmGateway::Clients::Claude.new(model_key: "claude-3", api_key: "test") body = client.send( @@ -26,21 +26,20 @@ class ClaudeCacheControlTest < Test expected_cache_control = { type: "ephemeral" } - system_cache_control_count = body[:system].count { |block| block[:cache_control] == expected_cache_control } - tools_cache_control_count = body[:tools].count { |tool| tool[:cache_control] == expected_cache_control } + assert_equal expected_cache_control, body[:cache_control] assert_nil body[:system][0][:cache_control] assert_equal expected_cache_control, body[:system][1][:cache_control] + assert_nil body[:tools][0][:cache_control] + assert_equal expected_cache_control, body[:tools][1][:cache_control] + last_user_message = body[:messages].reverse.find { |message| message[:role] == "user" } last_user_cache_control_count = Array(last_user_message[:content]).count do |block| block[:cache_control] == expected_cache_control end - assert_equal 1, system_cache_control_count - assert_equal 0, tools_cache_control_count - assert_equal 2, last_user_cache_control_count - assert_operator(system_cache_control_count + tools_cache_control_count + last_user_cache_control_count, :<=, 3) + assert_equal 0, last_user_cache_control_count end test "uses ttl for long retention on official anthropic base url" do @@ -50,11 +49,14 @@ class ClaudeCacheControlTest < Test :build_body, [ { role: "user", content: [ { type: "text", text: "hello" } ] } ], system: [ { type: "text", text: "system" } ], + tools: [ { name: "tool_1", description: "Tool 1", input_schema: { type: "object", properties: {} } } ], cache_retention: "long" ) + assert_equal({ type: "ephemeral", ttl: "1h" }, body[:cache_control]) assert_equal({ type: "ephemeral", ttl: "1h" }, body[:system][0][:cache_control]) - assert_equal({ type: "ephemeral", ttl: "1h" }, body[:messages][0][:content][0][:cache_control]) + assert_equal({ type: "ephemeral", ttl: "1h" }, body[:tools][0][:cache_control]) + assert_nil body[:messages][0][:content][0][:cache_control] end test "does not mutate existing cache control when retention is none" do @@ -64,10 +66,13 @@ class ClaudeCacheControlTest < Test :build_body, [ { role: "user", content: [ { type: "text", text: "hello", cache_control: { type: "ephemeral" } } ] } ], system: [ { type: "text", text: "system", cache_control: { type: "ephemeral" } } ], + tools: [ { name: "tool_1", description: "Tool 1", cache_control: { type: "ephemeral" }, input_schema: { type: "object", properties: {} } } ], cache_retention: "none" ) + assert_nil body[:cache_control] assert_equal({ type: "ephemeral" }, body[:system][0][:cache_control]) + assert_equal({ type: "ephemeral" }, body[:tools][0][:cache_control]) assert_equal({ type: "ephemeral" }, body[:messages][0][:content][0][:cache_control]) end end