diff --git a/backend/internal/capability/capability.go b/backend/internal/capability/capability.go index 08c120e..dce948d 100644 --- a/backend/internal/capability/capability.go +++ b/backend/internal/capability/capability.go @@ -33,6 +33,7 @@ var rules = []rule{ {"o4", []core.Capability{core.CapToolCalling, core.CapReasoning, core.CapStructuredOutput}, false}, {"claude", []core.Capability{core.CapToolCalling, core.CapVision, core.CapReasoning, core.CapLongContext}, false}, {"gemini", []core.Capability{core.CapToolCalling, core.CapVision, core.CapAudioInput, core.CapLongContext}, false}, + {"deepseek-v4", []core.Capability{core.CapToolCalling, core.CapVision, core.CapReasoning, core.CapLongContext}, false}, {"deepseek", []core.Capability{core.CapToolCalling, core.CapReasoning}, false}, {"glm", []core.Capability{core.CapToolCalling, core.CapLongContext}, false}, {"minimax", []core.Capability{core.CapToolCalling, core.CapLongContext}, false}, diff --git a/backend/internal/connectors/catalog.go b/backend/internal/connectors/catalog.go index 27f076d..3693574 100644 --- a/backend/internal/connectors/catalog.go +++ b/backend/internal/connectors/catalog.go @@ -211,7 +211,7 @@ func apiKeyProviders() []ProviderSpec { {ID: "deepseek", DisplayName: "DeepSeek", Alias: "ds", Dialect: core.DialectOpenAI, BaseURL: "https://api.deepseek.com", AuthKind: "api_key", ServiceKinds: llm(), Color: "#4D6BFE", Website: "https://deepseek.com", APIKeyURL: "https://platform.deepseek.com/api_keys", - InputPerM: 0.27, OutputPerM: 1.1}, + InputPerM: 0.14, OutputPerM: 0.28}, {ID: "glm", DisplayName: "GLM Coding", Alias: "glm", Dialect: core.DialectAnthropic, BaseURL: "https://api.z.ai/api/anthropic/v1", AuthKind: "api_key", ServiceKinds: llm(), Color: "#2563EB", Website: "https://open.bigmodel.cn", APIKeyURL: "https://open.bigmodel.cn/usercenter/apikeys", diff --git a/backend/internal/connectors/catalog_test.go b/backend/internal/connectors/catalog_test.go index c61dc19..a218c06 100644 --- a/backend/internal/connectors/catalog_test.go +++ b/backend/internal/connectors/catalog_test.go @@ -79,11 +79,35 @@ func TestFindModel(t *testing.T) { if _, ok := FindModel("commandcode", "deepseek/deepseek-v4-pro"); !ok { t.Error("expected to find commandcode/deepseek/deepseek-v4-pro") } + if _, ok := FindModel("deepseek", "deepseek-v4-pro-max"); !ok { + t.Error("expected to find deepseek/deepseek-v4-pro-max") + } + if _, ok := FindModel("deepseek", "deepseek-v4-pro-none"); !ok { + t.Error("expected to find deepseek/deepseek-v4-pro-none") + } if _, ok := FindModel("openai", "nonexistent-model"); ok { t.Error("expected miss for nonexistent model") } } +func TestDeepSeekPricing(t *testing.T) { + flash, ok := ModelPriceByProviderModel("deepseek", "deepseek-v4-flash") + if !ok { + t.Fatal("missing deepseek-v4-flash price") + } + if flash.InputPerM != 0.14 || flash.OutputPerM != 0.28 || flash.CachedInputPerM != 0.0028 || flash.ReasoningPerM != 0.28 { + t.Fatalf("unexpected deepseek-v4-flash pricing: %+v", flash) + } + + pro, ok := ModelPriceByProviderModel("deepseek", "deepseek-v4-pro") + if !ok { + t.Fatal("missing deepseek-v4-pro price") + } + if pro.InputPerM != 0.435 || pro.OutputPerM != 0.87 || pro.CachedInputPerM != 0.003625 || pro.ReasoningPerM != 0.87 { + t.Fatalf("unexpected deepseek-v4-pro pricing: %+v", pro) + } +} + func TestCommandCodeCatalogVisible(t *testing.T) { spec, ok := SpecByID("commandcode") if !ok { diff --git a/backend/internal/connectors/model_prices.go b/backend/internal/connectors/model_prices.go index 38e28c3..2ca1abd 100644 --- a/backend/internal/connectors/model_prices.go +++ b/backend/internal/connectors/model_prices.go @@ -174,9 +174,15 @@ func anthropicModelPrices() []ModelPrice { func deepseekModelPrices() []ModelPrice { return []ModelPrice{ - {Provider: "deepseek", Model: "deepseek-chat", InputPerM: 0.27, OutputPerM: 1.1, CachedInputPerM: 0.07, CacheWritePerM: 0.27}, - {Provider: "deepseek", Model: "deepseek-coder", InputPerM: 0.27, OutputPerM: 1.1, CachedInputPerM: 0.07, CacheWritePerM: 0.27}, - {Provider: "deepseek", Model: "deepseek-reasoner", InputPerM: 0.55, OutputPerM: 2.19, CachedInputPerM: 0.14, CacheWritePerM: 0.55, ReasoningPerM: 2.19}, + {Provider: "deepseek", Model: "deepseek-chat", InputPerM: 0.14, OutputPerM: 0.28, CachedInputPerM: 0.0028, CacheWritePerM: 0.14, ReasoningPerM: 0.28}, + {Provider: "deepseek", Model: "deepseek-reasoner", InputPerM: 0.14, OutputPerM: 0.28, CachedInputPerM: 0.0028, CacheWritePerM: 0.14, ReasoningPerM: 0.28}, + {Provider: "deepseek", Model: "deepseek-r1", InputPerM: 0.14, OutputPerM: 0.28, CachedInputPerM: 0.0028, CacheWritePerM: 0.14, ReasoningPerM: 0.28}, + {Provider: "deepseek", Model: "deepseek-v3.2-chat", InputPerM: 0.14, OutputPerM: 0.28, CachedInputPerM: 0.0028, CacheWritePerM: 0.14, ReasoningPerM: 0.28}, + {Provider: "deepseek", Model: "deepseek-v3.2-reasoner", InputPerM: 0.14, OutputPerM: 0.28, CachedInputPerM: 0.0028, CacheWritePerM: 0.14, ReasoningPerM: 0.28}, + {Provider: "deepseek", Model: "deepseek-v4-flash", InputPerM: 0.14, OutputPerM: 0.28, CachedInputPerM: 0.0028, CacheWritePerM: 0.14, ReasoningPerM: 0.28}, + {Provider: "deepseek", Model: "deepseek-v4-pro", InputPerM: 0.435, OutputPerM: 0.87, CachedInputPerM: 0.003625, CacheWritePerM: 0.435, ReasoningPerM: 0.87}, + {Provider: "deepseek", Model: "deepseek-v4-pro-max", InputPerM: 0.435, OutputPerM: 0.87, CachedInputPerM: 0.003625, CacheWritePerM: 0.435, ReasoningPerM: 0.87}, + {Provider: "deepseek", Model: "deepseek-v4-pro-none", InputPerM: 0.435, OutputPerM: 0.87, CachedInputPerM: 0.003625, CacheWritePerM: 0.435, ReasoningPerM: 0.87}, } } diff --git a/backend/internal/connectors/models.go b/backend/internal/connectors/models.go index b0c0f97..2683d13 100644 --- a/backend/internal/connectors/models.go +++ b/backend/internal/connectors/models.go @@ -73,8 +73,9 @@ var providerModels = map[string][]ModelSpec{ m("claude-3-5-sonnet-20241022", "Claude 3.5 Sonnet"), }, "deepseek": { - m("deepseek-chat", "DeepSeek Chat"), m("deepseek-reasoner", "DeepSeek Reasoner"), - m("deepseek-v4-pro", "DeepSeek V4 Pro"), m("deepseek-v4-flash", "DeepSeek V4 Flash"), + m("deepseek-v4-pro", "DeepSeek V4 Pro"), m("deepseek-v4-pro-max", "DeepSeek V4 Pro Max"), + m("deepseek-v4-pro-none", "DeepSeek V4 Pro No Thinking"), m("deepseek-v4-flash", "DeepSeek V4 Flash"), + m("deepseek-chat", "DeepSeek V3.2 Chat"), m("deepseek-reasoner", "DeepSeek V3.2 Reasoner"), }, "glm": {m("glm-5.1", "GLM 5.1"), m("glm-5", "GLM 5"), m("glm-4.7", "GLM 4.7"), m("glm-4.6v", "GLM 4.6V (Vision)")}, "glm-cn": {m("glm-5.1", "GLM 5.1"), m("glm-5", "GLM 5"), m("glm-4.7", "GLM-4.7"), m("glm-4.6", "GLM-4.6"), m("glm-4.5-air", "GLM-4.5-Air")}, diff --git a/backend/internal/transform/openai.go b/backend/internal/transform/openai.go index 7075adf..c98d127 100644 --- a/backend/internal/transform/openai.go +++ b/backend/internal/transform/openai.go @@ -1,8 +1,9 @@ package transform import ( - json "github.com/mydisha/keirouter/backend/internal/fastjson" + "bytes" "fmt" + json "github.com/mydisha/keirouter/backend/internal/fastjson" "io" "strings" @@ -18,17 +19,24 @@ func (OpenAICodec) Dialect() core.Dialect { return core.DialectOpenAI } // ---- wire types ------------------------------------------------------------- type oaiRequest struct { - Model string `json:"model"` - Messages []oaiMessage `json:"messages"` - Tools []oaiTool `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - MaxTokens *int `json:"max_tokens,omitempty"` - Stop []string `json:"stop,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOpts *oaiStreamOpt `json:"stream_options,omitempty"` - ResponseFormat json.RawMessage `json:"response_format,omitempty"` + Model string `json:"model"` + Messages []oaiMessage `json:"messages"` + Tools []oaiTool `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + MaxTokens *int `json:"max_tokens,omitempty"` + Stop []string `json:"stop,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOpts *oaiStreamOpt `json:"stream_options,omitempty"` + ResponseFormat json.RawMessage `json:"response_format,omitempty"` + ReasoningEffort string `json:"reasoning_effort,omitempty"` + Thinking *oaiThinking `json:"thinking,omitempty"` + ExtraBody map[string]any `json:"extra_body,omitempty"` +} + +type oaiThinking struct { + Type string `json:"type,omitempty"` } type oaiStreamOpt struct { @@ -36,11 +44,11 @@ type oaiStreamOpt struct { } type oaiMessage struct { - Role string `json:"role"` + Role string `json:"role"` Content json.RawMessage `json:"content,omitempty"` - Name string `json:"name,omitempty"` - ToolCalls []oaiToolCall `json:"tool_calls,omitempty"` - ToolCallID string `json:"tool_call_id,omitempty"` + Name string `json:"name,omitempty"` + ToolCalls []oaiToolCall `json:"tool_calls,omitempty"` + ToolCallID string `json:"tool_call_id,omitempty"` // ReasoningContent carries thinking/reasoning text that must be echoed // back on follow-up turns for DeepSeek, MiniMax, and similar providers. // Omitted when empty to avoid 400 errors on providers that don't support it. @@ -51,8 +59,8 @@ type oaiToolCall struct { ID string `json:"id"` Type string `json:"type"` Function struct { - Name string `json:"name"` - Arguments string `json:"arguments"` + Name string `json:"name"` + Arguments json.RawMessage `json:"arguments"` } `json:"function"` } @@ -82,6 +90,16 @@ func (OpenAICodec) ParseRequest(body []byte) (*core.ChatRequest, error) { Stream: raw.Stream, ToolChoice: raw.ToolChoice, } + if raw.ReasoningEffort != "" { + req.Reasoning = &core.ReasoningConfig{Effort: raw.ReasoningEffort} + } else if raw.Thinking != nil && raw.Thinking.Type != "" { + switch strings.ToLower(raw.Thinking.Type) { + case "disabled": + req.Reasoning = &core.ReasoningConfig{Effort: "none"} + case "enabled": + req.Reasoning = &core.ReasoningConfig{Effort: "auto"} + } + } for _, t := range raw.Tools { if t.Type != "function" && t.Type != "" { @@ -137,7 +155,7 @@ func parseOAIMessage(m oaiMessage) (msg core.Message, isSystem bool, sysText str ToolCall: &core.ToolCall{ ID: tc.ID, Name: tc.Function.Name, - Arguments: json.RawMessage(tc.Function.Arguments), + Arguments: normalizeOpenAIToolArguments(tc.Function.Arguments), }, }) } @@ -306,26 +324,129 @@ func fallbackResponseFormat(responseFormat json.RawMessage) json.RawMessage { return out } +// reasoningPlaceholder is injected as reasoning_content on assistant messages +// that lack real reasoning when the target requires it. A single space is the +// minimal non-empty value that satisfies upstream "must be passed back" +// validation without polluting the conversation. +const reasoningPlaceholder = " " + +// reasoningScope controls how aggressively reasoning_content is injected on +// assistant messages that lack it. DeepSeek requires it on ALL assistant +// turns, while Kimi only requires it on turns that carry tool_calls. +type reasoningScope int + +const ( + reasoningNone reasoningScope = iota + reasoningAll // inject on all assistant messages (DeepSeek) + reasoningToolCalls // inject only on assistant messages with tool_calls (Kimi) +) + +// reasoningEchoScope classifies a provider/model into the reasoning_content +// injection scope. DeepSeek's thinking-capable models (including those served +// via OpenAI-compatible aggregators like OpenRouter, SiliconFlow, Fireworks) +// require it on every assistant turn. Kimi models require it only on turns +// that carry tool_calls. Other providers don't need it at all. +func reasoningEchoScope(providerID, model string) reasoningScope { + m := strings.ToLower(model) + if providerID == "deepseek" || strings.Contains(m, "deepseek") { + return reasoningAll + } + // Kimi models (e.g. kimi-k2, kimi-latest) require reasoning_content only + // on assistant turns with tool_calls. + if strings.HasPrefix(m, "kimi-") || strings.HasPrefix(m, "moonshot/kimi-") { + return reasoningToolCalls + } + return reasoningNone +} + +// requiresReasoningEcho reports whether the given provider/model echoes +// reasoning_content in thinking mode and rejects (400) follow-up turns that +// omit it. This is kept for backward compatibility with tests and for gating +// DeepSeek-specific request fixes. +func requiresReasoningEcho(providerID, model string) bool { + return reasoningEchoScope(providerID, model) != reasoningNone +} + +// isDeepSeekTarget reports whether the target is a DeepSeek model. Used to gate +// DeepSeek-specific request fixes (thinking mode, tool ID sanitization, missing +// tool response fills) that should not run for Kimi or other providers. +func isDeepSeekTarget(providerID, model string) bool { + if providerID == "deepseek" { + return true + } + return strings.Contains(strings.ToLower(model), "deepseek") +} + +// shouldInjectReasoning reports whether a placeholder reasoning_content should +// be injected on the given rendered message. The scope controls the breadth: +// - reasoningAll: all assistant messages without real reasoning (DeepSeek) +// - reasoningToolCalls: only assistant messages that carry tool_calls (Kimi) +// +// Messages that already have genuine reasoning_content are never overwritten so +// the real chain-of-thought is preserved. +func shouldInjectReasoning(scope reasoningScope, msg oaiMessage) bool { + if scope == reasoningNone || msg.Role != string(core.RoleAssistant) { + return false + } + if msg.ReasoningContent != "" { + return false // genuine reasoning — never overwrite + } + if scope == reasoningToolCalls { + return len(msg.ToolCalls) > 0 + } + return true // reasoningAll +} + // RenderRequestForProvider renders an OpenAI request for a specific provider, -// applying provider-specific fallbacks (e.g. json_schema → json_object). +// applying provider-specific fallbacks (e.g. json_schema → json_object) and, +// for reasoning providers, ensuring assistant messages carry reasoning_content. func (c OpenAICodec) RenderRequestForProvider(req *core.ChatRequest, providerID string) ([]byte, error) { + scope := reasoningEchoScope(providerID, req.Model) if needsJSONSchemaFallback(providerID, req.ResponseFormat) { clone := *req clone.ResponseFormat = fallbackResponseFormat(req.ResponseFormat) - return c.RenderRequest(&clone) + return renderOAIRequestForProvider(&clone, providerID, scope) } - return c.RenderRequest(req) + return renderOAIRequestForProvider(req, providerID, scope) } func (OpenAICodec) RenderRequest(req *core.ChatRequest) ([]byte, error) { + return renderOAIRequest(req, reasoningNone) +} + +func renderOAIRequestForProvider(req *core.ChatRequest, providerID string, scope reasoningScope) ([]byte, error) { + out, err := buildOAIRequest(req, scope) + if err != nil { + return nil, err + } + if isDeepSeekTarget(providerID, req.Model) { + applyDeepSeekRequestFixes(out, req, providerID) + } + return json.Marshal(out) +} + +// renderOAIRequest renders a canonical request to the OpenAI wire format. When +// scope is non-none, assistant messages that carry no real reasoning get a +// placeholder reasoning_content so reasoning-mode providers (DeepSeek, Kimi, +// etc.) don't reject the follow-up turn with a 400. Messages with genuine +// reasoning are left untouched so the real chain-of-thought is preserved. +func renderOAIRequest(req *core.ChatRequest, scope reasoningScope) ([]byte, error) { + out, err := buildOAIRequest(req, scope) + if err != nil { + return nil, err + } + return json.Marshal(out) +} + +func buildOAIRequest(req *core.ChatRequest, scope reasoningScope) (*oaiRequest, error) { out := oaiRequest{ - Model: req.Model, - Temperature: req.Temperature, - TopP: req.TopP, - MaxTokens: req.MaxTokens, - Stop: req.Stop, - Stream: req.Stream, - ToolChoice: req.ToolChoice, + Model: req.Model, + Temperature: req.Temperature, + TopP: req.TopP, + MaxTokens: req.MaxTokens, + Stop: req.Stop, + Stream: req.Stream, + ToolChoice: req.ToolChoice, ResponseFormat: req.ResponseFormat, } // Note: stream_options with include_usage is intentionally omitted. Many @@ -338,7 +459,12 @@ func (OpenAICodec) RenderRequest(req *core.ChatRequest) ([]byte, error) { } for _, m := range req.Messages { - out.Messages = append(out.Messages, renderOAIMessage(m)) + for _, msg := range renderOAIMessages(m) { + if shouldInjectReasoning(scope, msg) { + msg.ReasoningContent = reasoningPlaceholder + } + out.Messages = append(out.Messages, msg) + } } for _, t := range req.Tools { @@ -350,7 +476,202 @@ func (OpenAICodec) RenderRequest(req *core.ChatRequest) ([]byte, error) { out.Tools = append(out.Tools, tool) } - return json.Marshal(out) + return &out, nil +} + +func applyDeepSeekRequestFixes(out *oaiRequest, req *core.ChatRequest, providerID string) { + applyDeepSeekThinking(out, req, providerID) + normalizeDeepSeekToolMessages(out.Messages) + out.Messages = fillMissingDeepSeekToolResponses(out.Messages) +} + +func applyDeepSeekThinking(out *oaiRequest, req *core.ChatRequest, providerID string) { + if req.Reasoning != nil { + effort := strings.ToLower(req.Reasoning.Effort) + if effort == "none" || effort == "off" { + out.Thinking = &oaiThinking{Type: "disabled"} + out.ReasoningEffort = "" + } else { + out.Thinking = &oaiThinking{Type: "enabled"} + if effort == "max" || effort == "xhigh" { + out.ReasoningEffort = "max" + } else { + out.ReasoningEffort = "high" + } + } + } + + if providerID != "deepseek" { + return + } + switch strings.ToLower(req.Model) { + case "deepseek-v4-pro-max": + out.Model = "deepseek-v4-pro" + out.Thinking = nil + out.ReasoningEffort = "max" + setDeepSeekExtraThinking(out, "enabled") + case "deepseek-v4-pro-none": + out.Model = "deepseek-v4-pro" + out.Thinking = nil + out.ReasoningEffort = "" + setDeepSeekExtraThinking(out, "disabled") + } +} + +func setDeepSeekExtraThinking(out *oaiRequest, typ string) { + if out.ExtraBody == nil { + out.ExtraBody = map[string]any{} + } + out.ExtraBody["thinking"] = map[string]any{"type": typ} +} + +func normalizeDeepSeekToolMessages(messages []oaiMessage) { + for i := range messages { + msg := &messages[i] + if msg.Role == "tool" && msg.ToolCallID != "" { + msg.ToolCallID = sanitizeDeepSeekToolID(msg.ToolCallID, i, 0, "") + } + if msg.Role != "assistant" || len(msg.ToolCalls) == 0 { + continue + } + for j := range msg.ToolCalls { + tc := &msg.ToolCalls[j] + name := firstNonEmpty(tc.Function.Name, "tool") + tc.ID = sanitizeDeepSeekToolID(tc.ID, i, j, name) + if tc.Type == "" { + tc.Type = "function" + } + tc.Function.Arguments = ensureToolArgumentsJSONString(tc.Function.Arguments) + } + } +} + +func sanitizeDeepSeekToolID(id string, msgIndex, tcIndex int, toolName string) string { + var b strings.Builder + for _, r := range id { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { + b.WriteRune(r) + } + } + if b.Len() > 0 { + return b.String() + } + name := sanitizeDeepSeekToolID(toolName, msgIndex, tcIndex, "") + if name != "" { + return fmt.Sprintf("call_msg%d_tc%d_%s", msgIndex, tcIndex, name) + } + return fmt.Sprintf("call_msg%d_tc%d", msgIndex, tcIndex) +} + +func ensureToolArgumentsJSONString(raw json.RawMessage) json.RawMessage { + trimmed := bytes.TrimSpace(raw) + if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) { + trimmed = []byte("{}") + } + var s string + if err := json.Unmarshal(trimmed, &s); err == nil { + if s == "" { + s = "{}" + } + out, _ := json.Marshal(s) + return out + } + out, _ := json.Marshal(string(trimmed)) + return out +} + +func fillMissingDeepSeekToolResponses(messages []oaiMessage) []oaiMessage { + var out []oaiMessage + for i, msg := range messages { + out = append(out, msg) + if msg.Role != "assistant" || len(msg.ToolCalls) == 0 { + continue + } + // Scan all subsequent consecutive tool messages to collect which + // tool_call_ids already have a response. This correctly handles + // multiple tool calls followed by multiple tool messages. + responded := make(map[string]bool) + for j := i + 1; j < len(messages) && messages[j].Role == "tool"; j++ { + if messages[j].ToolCallID != "" { + responded[messages[j].ToolCallID] = true + } + } + for _, id := range deepSeekToolCallIDs(msg) { + if responded[id] { + continue + } + content, _ := json.Marshal("") + out = append(out, oaiMessage{Role: "tool", ToolCallID: id, Content: content}) + } + } + return out +} + +func deepSeekToolCallIDs(msg oaiMessage) []string { + ids := make([]string, 0, len(msg.ToolCalls)) + for _, tc := range msg.ToolCalls { + if tc.ID != "" { + ids = append(ids, tc.ID) + } + } + return ids +} + +func hasDeepSeekToolResult(msg oaiMessage, id string) bool { + return msg.Role == "tool" && msg.ToolCallID == id +} + +// renderOAIMessages splits a canonical message into one or more OpenAI wire +// messages. In the OpenAI format, each tool result must be its own message with +// role "tool" and a tool_call_id. Other content (text, images, tool calls) is +// rendered into a single message via renderOAIMessage. Tool result messages +// are emitted first, followed by any remaining content. +// +// This is critical for cross-dialect translation: Anthropic groups multiple +// tool_result blocks into a single user message, but OpenAI (and DeepSeek) +// require each tool result as a separate "tool" role message. Without this +// splitting, assistant messages with N tool_calls would be followed by only 1 +// tool message, triggering "insufficient tool messages" 400 errors. +func renderOAIMessages(m core.Message) []oaiMessage { + // Fast path: no tool results → delegate to the single-message renderer. + hasToolResult := false + for _, p := range m.Content { + if p.Type == core.PartToolResult { + hasToolResult = true + break + } + } + if !hasToolResult { + return []oaiMessage{renderOAIMessage(m)} + } + + var out []oaiMessage + // Emit each tool result as its own OpenAI tool message. + for _, p := range m.Content { + if p.Type != core.PartToolResult { + continue + } + content, _ := json.Marshal(p.ToolResult.Content) + out = append(out, oaiMessage{ + Role: "tool", + ToolCallID: p.ToolResult.CallID, + Content: content, + }) + } + + // If there's remaining content (text, images, tool calls), render it + // into a separate message with the original role. + var remaining []core.ContentPart + for _, p := range m.Content { + if p.Type != core.PartToolResult { + remaining = append(remaining, p) + } + } + if len(remaining) > 0 { + trimmed := core.Message{Role: m.Role, Name: m.Name, Content: remaining} + out = append(out, renderOAIMessage(trimmed)) + } + return out } func renderOAIMessage(m core.Message) oaiMessage { @@ -383,9 +704,12 @@ func renderOAIMessage(m core.Message) oaiMessage { tc.ID = p.ToolCall.ID tc.Type = "function" tc.Function.Name = p.ToolCall.Name - tc.Function.Arguments = string(p.ToolCall.Arguments) + tc.Function.Arguments = ensureToolArgumentsJSONString(p.ToolCall.Arguments) out.ToolCalls = append(out.ToolCalls, tc) case core.PartToolResult: + // Tool results are handled by renderOAIMessages; if we reach + // here via direct renderOAIMessage call, handle the first one + // for backward compatibility. out.Role = "tool" out.ToolCallID = p.ToolResult.CallID content, _ := json.Marshal(p.ToolResult.Content) @@ -428,12 +752,12 @@ type oaiResponse struct { Model string `json:"model"` Choices []struct { Message struct { - Role string `json:"role"` - Content string `json:"content"` + Role string `json:"role"` + Content string `json:"content"` // ReasoningContent carries thinking/reasoning text from models // that expose it as a structured field (DeepSeek, some MiMo). ReasoningContent string `json:"reasoning_content"` - ToolCalls []oaiToolCall `json:"tool_calls"` + ToolCalls []oaiToolCall `json:"tool_calls"` } `json:"message"` FinishReason string `json:"finish_reason"` } `json:"choices"` @@ -441,10 +765,10 @@ type oaiResponse struct { } type oaiUsage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` - PromptTokensDetails *struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + PromptTokensDetails *struct { CachedTokens int `json:"cached_tokens"` } `json:"prompt_tokens_details,omitempty"` } @@ -502,7 +826,7 @@ func (OpenAICodec) buildResponse(raw oaiResponse, model string) (*core.ChatRespo ToolCall: &core.ToolCall{ ID: tc.ID, Name: tc.Function.Name, - Arguments: json.RawMessage(tc.Function.Arguments), + Arguments: normalizeOpenAIToolArguments(tc.Function.Arguments), }, }) } @@ -546,9 +870,12 @@ func (OpenAICodec) RenderResponse(resp *core.ChatResponse) ([]byte, error) { func renderOAIChoice(resp *core.ChatResponse) map[string]any { message := map[string]any{"role": "assistant"} var text strings.Builder + var thinking strings.Builder var toolCalls []map[string]any for _, p := range resp.Message.Content { switch p.Type { + case core.PartThinking: + thinking.WriteString(p.Text) case core.PartText: text.WriteString(p.Text) case core.PartToolCall: @@ -567,9 +894,16 @@ func renderOAIChoice(resp *core.ChatResponse) map[string]any { } else { message["content"] = nil } + // Surface structured reasoning so clients can replay it on follow-up turns + // (DeepSeek/MiniMax thinking mode require reasoning_content to be echoed + // back or the next request returns a 400). + if thinking.Len() > 0 { + message["reasoning_content"] = thinking.String() + } if len(toolCalls) > 0 { message["tool_calls"] = toolCalls } + return map[string]any{ "index": 0, "message": message, diff --git a/backend/internal/transform/openai_reasoning_test.go b/backend/internal/transform/openai_reasoning_test.go new file mode 100644 index 0000000..76c8521 --- /dev/null +++ b/backend/internal/transform/openai_reasoning_test.go @@ -0,0 +1,662 @@ +package transform + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/mydisha/keirouter/backend/internal/core" + "github.com/stretchr/testify/require" +) + +// TestOpenAI_RenderStreamChunk_Thinking verifies structured reasoning is +// emitted to the client as reasoning_content (issue #17: DeepSeek thinking +// mode requires it on follow-up turns). +func TestOpenAI_RenderStreamChunk_Thinking(t *testing.T) { + state := &StreamState{Model: "deepseek-reasoner", MessageID: "id1"} + events, err := OpenAICodec{}.RenderStreamChunk( + core.StreamChunk{Type: core.ChunkThinking, Delta: "let me think"}, state) + require.NoError(t, err) + require.Len(t, events, 1) + + payload := strings.TrimPrefix(string(events[0]), "data: ") + var got struct { + Choices []struct { + Delta struct { + Role string `json:"role"` + ReasoningContent string `json:"reasoning_content"` + } `json:"delta"` + } `json:"choices"` + } + require.NoError(t, json.Unmarshal([]byte(strings.TrimSpace(payload)), &got)) + require.Len(t, got.Choices, 1) + require.Equal(t, "assistant", got.Choices[0].Delta.Role) + require.Equal(t, "let me think", got.Choices[0].Delta.ReasoningContent) +} + +// TestOpenAI_StreamReasoning_RoundTrip verifies a reasoning_content delta from +// upstream is parsed and re-rendered back to the client without loss. +func TestOpenAI_StreamReasoning_RoundTrip(t *testing.T) { + line := []byte(`{"id":"c1","model":"deepseek-reasoner","choices":[{"delta":{"reasoning_content":"step one"}}]}`) + chunks, err := OpenAICodec{}.ParseStreamLine(line, "deepseek-reasoner") + require.NoError(t, err) + require.Len(t, chunks, 1) + require.Equal(t, core.ChunkThinking, chunks[0].Type) + require.Equal(t, "step one", chunks[0].Delta) + + state := &StreamState{Model: "deepseek-reasoner"} + events, err := OpenAICodec{}.RenderStreamChunk(chunks[0], state) + require.NoError(t, err) + require.Contains(t, string(events[0]), "reasoning_content") + require.Contains(t, string(events[0]), "step one") +} + +// TestOpenAI_RenderResponse_Reasoning verifies non-streaming responses surface +// reasoning_content for clients that replay it. +func TestOpenAI_RenderResponse_Reasoning(t *testing.T) { + resp := &core.ChatResponse{ + Model: "deepseek-reasoner", + Message: core.Message{ + Role: core.RoleAssistant, + Content: []core.ContentPart{ + {Type: core.PartThinking, Text: "internal reasoning"}, + {Type: core.PartText, Text: "the answer"}, + }, + }, + FinishReason: core.FinishStop, + } + body, err := OpenAICodec{}.RenderResponse(resp) + require.NoError(t, err) + + var got struct { + Choices []struct { + Message struct { + Content string `json:"content"` + ReasoningContent string `json:"reasoning_content"` + } `json:"message"` + } `json:"choices"` + } + require.NoError(t, json.Unmarshal(body, &got)) + require.Len(t, got.Choices, 1) + require.Equal(t, "the answer", got.Choices[0].Message.Content) + require.Equal(t, "internal reasoning", got.Choices[0].Message.ReasoningContent) +} + +// TestOpenAI_RenderRequest_InjectReasoningPlaceholder verifies the safety net: +// for DeepSeek targets, assistant messages without reasoning get a placeholder +// reasoning_content so the upstream doesn't reject the turn with a 400. +func TestOpenAI_RenderRequest_InjectReasoningPlaceholder(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-chat", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "hi"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{{Type: core.PartText, Text: "hello"}}}, + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "continue"}}}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + // Assistant message (index 1) should carry the placeholder. + require.Equal(t, "assistant", got.Messages[1].Role) + require.Equal(t, reasoningPlaceholder, got.Messages[1].ReasoningContent) +} + +// TestOpenAI_RenderRequest_PreservesRealReasoning verifies genuine reasoning is +// kept intact (not overwritten by the placeholder). +func TestOpenAI_RenderRequest_PreservesRealReasoning(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-chat", + Messages: []core.Message{ + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartThinking, Text: "real chain of thought"}, + {Type: core.PartText, Text: "hello"}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + require.Equal(t, "real chain of thought", got.Messages[0].ReasoningContent) +} + +// TestOpenAI_RenderRequest_NoInjectForNonDeepSeek verifies non-DeepSeek targets +// are untouched (avoid sending reasoning_content to providers that reject it). +func TestOpenAI_RenderRequest_NoInjectForNonDeepSeek(t *testing.T) { + req := &core.ChatRequest{ + Model: "gpt-4o", + Messages: []core.Message{ + {Role: core.RoleAssistant, Content: []core.ContentPart{{Type: core.PartText, Text: "hello"}}}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "openai") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + require.Empty(t, got.Messages[0].ReasoningContent) +} + +// TestRequiresReasoningEcho covers provider- and model-based detection. +func TestRequiresReasoningEcho(t *testing.T) { + require.True(t, requiresReasoningEcho("deepseek", "deepseek-chat")) + require.True(t, requiresReasoningEcho("openrouter", "deepseek/deepseek-chat")) + require.True(t, requiresReasoningEcho("siliconflow", "deepseek-ai/DeepSeek-V3.2")) + require.False(t, requiresReasoningEcho("openai", "gpt-4o")) + require.False(t, requiresReasoningEcho("groq", "llama-3.3-70b")) +} + +func TestOpenAI_RenderRequest_DeepSeekToolCallFixes(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-v4-flash", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "hi"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartToolCall, ToolCall: &core.ToolCall{Name: "get_weather", Arguments: json.RawMessage(`{"city":"Jakarta"}`)}}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + require.Len(t, got.Messages, 3) + assistant := got.Messages[1] + require.Equal(t, reasoningPlaceholder, assistant.ReasoningContent) + require.Len(t, assistant.ToolCalls, 1) + require.Equal(t, "call_msg1_tc0_get_weather", assistant.ToolCalls[0].ID) + require.Equal(t, "function", assistant.ToolCalls[0].Type) + + var args string + require.NoError(t, json.Unmarshal(assistant.ToolCalls[0].Function.Arguments, &args)) + require.JSONEq(t, `{"city":"Jakarta"}`, args) + require.Equal(t, "tool", got.Messages[2].Role) + require.Equal(t, assistant.ToolCalls[0].ID, got.Messages[2].ToolCallID) +} + +func TestOpenAI_RenderRequest_NonDeepSeekToolCallsUntouched(t *testing.T) { + req := &core.ChatRequest{ + Model: "gpt-4o", + Messages: []core.Message{ + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartToolCall, ToolCall: &core.ToolCall{Name: "get_weather", Arguments: json.RawMessage(`{"city":"Jakarta"}`)}}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "openai") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + require.Len(t, got.Messages, 1) + require.Empty(t, got.Messages[0].ReasoningContent) + require.Empty(t, got.Messages[0].ToolCalls[0].ID) +} + +func TestOpenAI_RenderRequest_DeepSeekV4ProAliases(t *testing.T) { + cases := []struct { + model string + wantEffort string + wantThinking string + }{ + {model: "deepseek-v4-pro-max", wantEffort: "max", wantThinking: "enabled"}, + {model: "deepseek-v4-pro-none", wantEffort: "", wantThinking: "disabled"}, + } + for _, tc := range cases { + t.Run(tc.model, func(t *testing.T) { + req := &core.ChatRequest{Model: tc.model, Messages: []core.Message{{Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "hi"}}}}} + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + require.Equal(t, "deepseek-v4-pro", got.Model) + require.Equal(t, tc.wantEffort, got.ReasoningEffort) + require.NotNil(t, got.ExtraBody["thinking"]) + thinking := got.ExtraBody["thinking"].(map[string]any) + require.Equal(t, tc.wantThinking, thinking["type"]) + }) + } +} + +// TestOpenAI_RenderRequest_MultipleToolResultsSplit verifies that a canonical +// message containing multiple PartToolResult parts (as produced by Anthropic +// clients like Claude Code) is correctly split into separate OpenAI tool +// messages. This is the core fix for the "insufficient tool messages following +// tool_calls message" 400 error from DeepSeek. +func TestOpenAI_RenderRequest_MultipleToolResultsSplit(t *testing.T) { + req := &core.ChatRequest{ + Model: "gpt-4o", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "check both"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_1", Name: "read_file", Arguments: json.RawMessage(`{"path":"a.txt"}`)}}, + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_2", Name: "read_file", Arguments: json.RawMessage(`{"path":"b.txt"}`)}}, + }}, + // Anthropic groups both tool results into one user message. + {Role: core.RoleUser, Content: []core.ContentPart{ + {Type: core.PartToolResult, ToolResult: &core.ToolResult{CallID: "call_1", Content: "contents of a"}}, + {Type: core.PartToolResult, ToolResult: &core.ToolResult{CallID: "call_2", Content: "contents of b"}}, + {Type: core.PartText, Text: "now summarize"}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequest(req) + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + // Expect: user, assistant(tool_calls), tool(call_1), tool(call_2), user(text) + require.Len(t, got.Messages, 5, "expected 5 messages: user, assistant, tool, tool, user") + + // Verify both tool messages are present with correct IDs. + toolMsgs := 0 + for _, m := range got.Messages { + if m.Role != "tool" { + continue + } + toolMsgs++ + switch m.ToolCallID { + case "call_1": + require.Contains(t, string(m.Content), "contents of a") + case "call_2": + require.Contains(t, string(m.Content), "contents of b") + default: + t.Errorf("unexpected tool_call_id: %s", m.ToolCallID) + } + } + require.Equal(t, 2, toolMsgs, "expected exactly 2 tool messages") + + // Verify the trailing user text is preserved. + lastMsg := got.Messages[4] + require.Equal(t, "user", lastMsg.Role) + require.Contains(t, string(lastMsg.Content), "now summarize") +} + +// TestOpenAI_RenderRequest_DeepSeekMultipleToolCallsWithResults verifies the +// full DeepSeek path: multiple tool calls with all results present. The +// fillMissingDeepSeekToolResponses safety net should NOT insert any synthetic +// messages when all results exist. +func TestOpenAI_RenderRequest_DeepSeekMultipleToolCallsWithResults(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-v4-flash", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "check both"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_1", Name: "read_file", Arguments: json.RawMessage(`{"path":"a"}`)}}, + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_2", Name: "read_file", Arguments: json.RawMessage(`{"path":"b"}`)}}, + }}, + {Role: core.RoleUser, Content: []core.ContentPart{ + {Type: core.PartToolResult, ToolResult: &core.ToolResult{CallID: "call_1", Content: "a-content"}}, + {Type: core.PartToolResult, ToolResult: &core.ToolResult{CallID: "call_2", Content: "b-content"}}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + // Expect: user, assistant, tool(call_1), tool(call_2) — no synthetic fills. + require.Len(t, got.Messages, 4, "should not insert synthetic messages when all results present") + + // Verify both tool results are present and match sanitized IDs. + assistant := got.Messages[1] + require.Len(t, assistant.ToolCalls, 2) + id1 := assistant.ToolCalls[0].ID + id2 := assistant.ToolCalls[1].ID + + responded := map[string]bool{} + for _, m := range got.Messages { + if m.Role == "tool" { + responded[m.ToolCallID] = true + } + } + require.True(t, responded[id1], "tool call 1 result missing") + require.True(t, responded[id2], "tool call 2 result missing") +} + +// TestOpenAI_RenderRequest_DeepSeekMultipleToolCallsPartialResults verifies +// that when an assistant makes multiple tool calls but only some have results, +// synthetic empty responses are inserted for the missing ones. +func TestOpenAI_RenderRequest_DeepSeekMultipleToolCallsPartialResults(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-v4-flash", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "check both"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_1", Name: "read_file", Arguments: json.RawMessage(`{}`)}}, + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_2", Name: "read_file", Arguments: json.RawMessage(`{}`)}}, + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_3", Name: "read_file", Arguments: json.RawMessage(`{}`)}}, + }}, + // Only call_2 has a result; call_1 and call_3 are missing. + {Role: core.RoleUser, Content: []core.ContentPart{ + {Type: core.PartToolResult, ToolResult: &core.ToolResult{CallID: "call_2", Content: "found it"}}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + + assistant := got.Messages[1] + require.Len(t, assistant.ToolCalls, 3) + + // Collect all tool messages and their IDs. + toolResponses := map[string]string{} + for _, m := range got.Messages { + if m.Role == "tool" { + toolResponses[m.ToolCallID] = string(m.Content) + } + } + + // All 3 tool calls should have responses (1 real + 2 synthetic). + for _, tc := range assistant.ToolCalls { + content, ok := toolResponses[tc.ID] + require.True(t, ok, "missing tool response for ID %s", tc.ID) + if tc.ID == assistant.ToolCalls[1].ID { + require.Contains(t, content, "found it") + } + } + require.Len(t, toolResponses, 3, "expected 3 total tool responses") +} + +// TestOpenAI_RenderRequest_DeepSeekMultipleToolMessagesNotDuplicated verifies +// that when multiple tool messages follow an assistant message, the +// fillMissingDeepSeekToolResponses function correctly recognizes all of them +// and does not insert duplicate synthetic messages. +func TestOpenAI_RenderRequest_DeepSeekMultipleToolMessagesNotDuplicated(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-v4-flash", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "go"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_a", Name: "search", Arguments: json.RawMessage(`{}`)}}, + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_b", Name: "search", Arguments: json.RawMessage(`{}`)}}, + }}, + // Two separate tool messages (already in OpenAI format via canonical). + {Role: core.RoleTool, Content: []core.ContentPart{ + {Type: core.PartToolResult, ToolResult: &core.ToolResult{CallID: "call_a", Content: "result a"}}, + }}, + {Role: core.RoleTool, Content: []core.ContentPart{ + {Type: core.PartToolResult, ToolResult: &core.ToolResult{CallID: "call_b", Content: "result b"}}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + // Expect: user, assistant, tool, tool — no duplicates. + require.Len(t, got.Messages, 4, "should not duplicate existing tool messages") + + toolCount := 0 + for _, m := range got.Messages { + if m.Role == "tool" { + toolCount++ + } + } + require.Equal(t, 2, toolCount, "expected exactly 2 tool messages, no duplicates") +} + +func TestOpenAI_RenderRequest_DeepSeekReasoningEffortMapping(t *testing.T) { + cases := []struct { + effort string + want string + }{ + {effort: "low", want: "high"}, + {effort: "medium", want: "high"}, + {effort: "high", want: "high"}, + {effort: "xhigh", want: "max"}, + {effort: "max", want: "max"}, + } + for _, tc := range cases { + t.Run(tc.effort, func(t *testing.T) { + req := &core.ChatRequest{Model: "deepseek-v4-flash", Reasoning: &core.ReasoningConfig{Effort: tc.effort}} + body, err := OpenAICodec{}.RenderRequestForProvider(req, "custom-openai") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + require.Equal(t, tc.want, got.ReasoningEffort) + require.NotNil(t, got.Thinking) + require.Equal(t, "enabled", got.Thinking.Type) + }) + } +} + +// ---- Kimi scope tests ---- + +// TestRequiresReasoningEcho_Kimi verifies Kimi models are detected (toolCalls scope). +func TestRequiresReasoningEcho_Kimi(t *testing.T) { + require.True(t, requiresReasoningEcho("moonshot", "kimi-k2")) + require.True(t, requiresReasoningEcho("openrouter", "moonshot/kimi-k2")) + require.True(t, requiresReasoningEcho("custom", "kimi-latest")) + require.False(t, requiresReasoningEcho("openai", "gpt-4o")) +} + +// TestReasoningEchoScope_Kimi verifies scope classification: DeepSeek=all, Kimi=toolCalls. +func TestReasoningEchoScope_Kimi(t *testing.T) { + require.Equal(t, reasoningAll, reasoningEchoScope("deepseek", "deepseek-chat")) + require.Equal(t, reasoningAll, reasoningEchoScope("openrouter", "deepseek/deepseek-chat")) + require.Equal(t, reasoningToolCalls, reasoningEchoScope("moonshot", "kimi-k2")) + require.Equal(t, reasoningToolCalls, reasoningEchoScope("custom", "kimi-latest")) + require.Equal(t, reasoningNone, reasoningEchoScope("openai", "gpt-4o")) +} + +// TestOpenAI_RenderRequest_KimiInjectOnlyOnToolCalls verifies Kimi scope: +// reasoning_content is injected ONLY on assistant messages with tool_calls. +func TestOpenAI_RenderRequest_KimiInjectOnlyOnToolCalls(t *testing.T) { + req := &core.ChatRequest{ + Model: "kimi-k2", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "hi"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{{Type: core.PartText, Text: "hello"}}}, + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "call the tool"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "call_1", Name: "search", Arguments: json.RawMessage(`{"q":"test"}`)}}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "moonshot") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + + var assistantMsgs []oaiMessage + for _, m := range got.Messages { + if m.Role == "assistant" { + assistantMsgs = append(assistantMsgs, m) + } + } + require.Len(t, assistantMsgs, 2) + require.Empty(t, assistantMsgs[0].ReasoningContent, "Kimi: plain text assistant should not get reasoning") + require.Equal(t, reasoningPlaceholder, assistantMsgs[1].ReasoningContent, "Kimi: tool_calls assistant should get reasoning") +} + +// TestOpenAI_RenderRequest_KimiPreservesRealReasoning verifies genuine reasoning +// on Kimi messages is preserved (not overwritten by placeholder). +func TestOpenAI_RenderRequest_KimiPreservesRealReasoning(t *testing.T) { + req := &core.ChatRequest{ + Model: "kimi-k2", + Messages: []core.Message{ + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartThinking, Text: "real reasoning"}, + {Type: core.PartToolCall, ToolCall: &core.ToolCall{ID: "c1", Name: "test", Arguments: json.RawMessage(`{}`)}}, + }}, + }, + } + body, err := OpenAICodec{}.RenderRequestForProvider(req, "moonshot") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + for _, m := range got.Messages { + if m.Role == "assistant" { + require.Equal(t, "real reasoning", m.ReasoningContent) + } + } +} + +// ---- Issue #17 scenario tests: chain + streaming + multi-turn reasoning ---- + +// TestOpenAI_StreamMultiTurnReasoning simulates a multi-turn streaming +// conversation with DeepSeek thinking mode (issue #17 scenario): turn 1 +// produces reasoning, turn 2 sends it back. Verifies the full round-trip. +func TestOpenAI_StreamMultiTurnReasoning(t *testing.T) { + codec := OpenAICodec{} + + // Turn 1: upstream streams reasoning + text + streamLines := [][]byte{ + []byte(`{"id":"c1","model":"deepseek-chat","choices":[{"delta":{"reasoning_content":"Let me think about this"}}]}`), + []byte(`{"id":"c1","model":"deepseek-chat","choices":[{"delta":{"content":"The answer is 42"}}]}`), + []byte(`{"id":"c1","model":"deepseek-chat","choices":[{"finish_reason":"stop"}]}`), + } + + var allChunks []core.StreamChunk + for _, line := range streamLines { + chunks, err := codec.ParseStreamLine(line, "deepseek-chat") + require.NoError(t, err) + allChunks = append(allChunks, chunks...) + } + + require.NotEmpty(t, allChunks) + hasThinking, hasText := false, false + for _, c := range allChunks { + if c.Type == core.ChunkThinking { + hasThinking = true + require.Equal(t, "Let me think about this", c.Delta) + } + if c.Type == core.ChunkText { + hasText = true + require.Equal(t, "The answer is 42", c.Delta) + } + } + require.True(t, hasThinking, "should capture reasoning_content from stream") + require.True(t, hasText, "should capture text from stream") + + // Turn 2: client sends back reasoning_content on the assistant message + turn2Req := &core.ChatRequest{ + Model: "deepseek-chat", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "what is the answer"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartThinking, Text: "Let me think about this"}, + {Type: core.PartText, Text: "The answer is 42"}, + }}, + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "explain why"}}}, + }, + } + body, err := codec.RenderRequestForProvider(turn2Req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + + for _, m := range got.Messages { + if m.Role == "assistant" { + require.Equal(t, "Let me think about this", m.ReasoningContent, + "real reasoning should be preserved, not overwritten with placeholder") + } + } +} + +// TestOpenAI_ChainFallbackReasoningInjection simulates a chain where fallback +// goes to DeepSeek. The request must have reasoning_content injected for the +// DeepSeek target even though the client originally sent it for a different +// provider. Mirrors the pipeline's cloneForAttempt + RenderRequestForProvider. +func TestOpenAI_ChainFallbackReasoningInjection(t *testing.T) { + req := &core.ChatRequest{ + Model: "chain:coding", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "hi"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{{Type: core.PartText, Text: "hello"}}}, + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "continue"}}}, + }, + } + + // Simulate cloneForAttempt with the DeepSeek fallback model + fallbackReq := *req + fallbackReq.Model = "deepseek-chat" + body, err := OpenAICodec{}.RenderRequestForProvider(&fallbackReq, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + + for _, m := range got.Messages { + if m.Role == "assistant" { + require.Equal(t, reasoningPlaceholder, m.ReasoningContent, + "chain fallback to DeepSeek must inject reasoning_content") + } + } +} + +// TestOpenAI_CrossDialectReasoningRoundTrip verifies reasoning survives a +// cross-dialect round-trip: PartThinking (from Anthropic client) → +// reasoning_content (for DeepSeek upstream). Covers the "Anthropic client → +// DeepSeek upstream" path via the canonical intermediate. +func TestOpenAI_CrossDialectReasoningRoundTrip(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-chat", + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "forecast"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{ + {Type: core.PartThinking, Text: "I should consider the weather"}, + {Type: core.PartText, Text: "It might rain today"}, + }}, + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "elaborate"}}}, + }, + } + + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + + for _, m := range got.Messages { + if m.Role == "assistant" { + require.Equal(t, "I should consider the weather", m.ReasoningContent, + "PartThinking must become reasoning_content for DeepSeek") + } + } +} + +// TestOpenAI_RenderRequest_StreamRawPathReasoning verifies the zero-copy +// streaming path (StreamRaw → RenderRequestForProvider) also injects +// reasoning_content. Explicitly tested to prevent regressions in the fast path. +func TestOpenAI_RenderRequest_StreamRawPathReasoning(t *testing.T) { + req := &core.ChatRequest{ + Model: "deepseek-chat", + Stream: true, + Messages: []core.Message{ + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "hi"}}}, + {Role: core.RoleAssistant, Content: []core.ContentPart{{Type: core.PartText, Text: "hello"}}}, + {Role: core.RoleUser, Content: []core.ContentPart{{Type: core.PartText, Text: "again"}}}, + }, + } + // This is exactly what StreamRaw does internally + body, err := OpenAICodec{}.RenderRequestForProvider(req, "deepseek") + require.NoError(t, err) + + var got oaiRequest + require.NoError(t, json.Unmarshal(body, &got)) + require.True(t, got.Stream, "stream flag should be set") + + for _, m := range got.Messages { + if m.Role == "assistant" { + require.Equal(t, reasoningPlaceholder, m.ReasoningContent, + "StreamRaw path must inject reasoning for DeepSeek") + } + } +} \ No newline at end of file diff --git a/backend/internal/transform/openai_stream.go b/backend/internal/transform/openai_stream.go index 665b4bc..e605bf8 100644 --- a/backend/internal/transform/openai_stream.go +++ b/backend/internal/transform/openai_stream.go @@ -212,12 +212,23 @@ func normalizeNestedOpenAIToolObject(raw json.RawMessage) json.RawMessage { func (OpenAICodec) RenderStreamChunk(chunk core.StreamChunk, state *StreamState) ([][]byte, error) { delta := map[string]any{} switch chunk.Type { + case core.ChunkThinking: + // Echo structured reasoning back to the client as reasoning_content so + // clients that replay it on follow-up turns (Cursor, Cline, etc.) keep + // the real reasoning. DeepSeek/MiniMax thinking mode requires this + // field on subsequent turns or it returns a 400. + if !state.SentRole { + delta["role"] = "assistant" + state.SentRole = true + } + delta["reasoning_content"] = chunk.Delta case core.ChunkText: if !state.SentRole { delta["role"] = "assistant" state.SentRole = true } delta["content"] = chunk.Delta + case core.ChunkToolCall: if !state.SentRole { delta["role"] = "assistant"