fix: handle reasoning_content for Qwen3.5 and similar models

intel352 · claude · intel352 · commit 8a25cd9cbcf4 · 2026-03-30T13:41:56.000-04:00
fromOpenAIResponse now extracts reasoning_content from ExtraFields
when content is empty (reasoning models put output there).
Also increase default max_tokens to 8192 for reasoning models.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/provider/llama_cpp.go b/provider/llama_cpp.go
@@ -17,7 +17,7 @@ const (
 	defaultLlamaCppPort        = 8081
 	defaultLlamaCppGPULayers   = -1
 	defaultLlamaCppContextSize = 8192
-	defaultLlamaCppMaxTokens   = 4096
+	defaultLlamaCppMaxTokens   = 8192
 )
 
 // LlamaCppConfig holds configuration for the LlamaCpp provider.
diff --git a/provider/openai_convert.go b/provider/openai_convert.go
@@ -81,6 +81,16 @@ func fromOpenAIResponse(resp *openaisdk.ChatCompletion) (*Response, error) {
 	}
 	msg := resp.Choices[0].Message
 	result.Content = msg.Content
+	// For reasoning models (Qwen3.5, etc.) that put output in reasoning_content
+	// instead of content, extract it from the raw JSON extra fields.
+	if result.Content == "" {
+		if rc, ok := msg.JSON.ExtraFields["reasoning_content"]; ok && rc.Valid() {
+			var reasoning string
+			if err := json.Unmarshal([]byte(rc.Raw()), &reasoning); err == nil && reasoning != "" {
+				result.Content = reasoning
+			}
+		}
+	}
 	for _, tc := range msg.ToolCalls {
 		var args map[string]any
 		if tc.Function.Arguments != "" {

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ const (`
`17`	`17`	`defaultLlamaCppPort = 8081`
`18`	`18`	`defaultLlamaCppGPULayers = -1`
`19`	`19`	`defaultLlamaCppContextSize = 8192`
`20`		`- defaultLlamaCppMaxTokens = 4096`
	`20`	`+ defaultLlamaCppMaxTokens = 8192`
`21`	`21`	`)`
`22`	`22`
`23`	`23`	`// LlamaCppConfig holds configuration for the LlamaCpp provider.`