fix: disable Ollama thinking by default, use native config

intel352 · claude · intel352 · commit fd592765c6c8 · 2026-04-07T02:02:27.000-04:00
Replace skipCommonCfg with customConfig that sends Ollama-native
GenerateContentConfig{Think: ThinkEnabled(false)}. This:

1. Suppresses thinking/reasoning output that was leaking as text in
   complex multi-tool prompts (team execution)
2. Reduces latency (no reasoning computation)
3. Uses NumPredict for max tokens (Ollama-native, not maxOutputTokens)

Verified: Chat() returns clean content with empty Thinking field.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/genkit/adapter.go b/genkit/adapter.go
@@ -12,12 +12,16 @@ import (
 
 // genkitProvider adapts a Genkit model to provider.Provider.
 type genkitProvider struct {
-	g              *gk.Genkit
-	modelName      string // "provider/model" format e.g. "anthropic/claude-sonnet-4-6"
-	name           string
-	authInfo       provider.AuthModeInfo
-	maxTokens      int  // 0 means use model default
-	skipCommonCfg  bool // true for providers that reject GenerationCommonConfig (e.g. Ollama)
+	g         *gk.Genkit
+	modelName string // "provider/model" format e.g. "anthropic/claude-sonnet-4-6"
+	name      string
+	authInfo  provider.AuthModeInfo
+	maxTokens int // 0 means use model default
+
+	// customConfig, when non-nil, is sent via ai.WithConfig instead of
+	// GenerationCommonConfig. Used for providers with their own config
+	// schemas (e.g. Ollama's GenerateContentConfig with Think support).
+	customConfig any
 
 	mu           sync.Mutex
 	definedTools map[string]bool // tracks which tool names are registered
@@ -61,10 +65,13 @@ func (p *genkitProvider) resolveToolRefs(tools []provider.ToolDef) []ai.ToolRef
 	return refs
 }
 
-// generationConfig returns a WithConfig option when maxTokens is configured.
-// Returns nil for providers that don't support GenerationCommonConfig (e.g. Ollama).
+// generationConfig returns a WithConfig option for the provider.
+// Uses customConfig if set (e.g. Ollama), otherwise GenerationCommonConfig.
 func (p *genkitProvider) generationConfig() ai.GenerateOption {
-	if p.skipCommonCfg || p.maxTokens <= 0 {
+	if p.customConfig != nil {
+		return ai.WithConfig(p.customConfig)
+	}
+	if p.maxTokens <= 0 {
 		return nil
 	}
 	return ai.WithConfig(&ai.GenerationCommonConfig{MaxOutputTokens: p.maxTokens})
diff --git a/genkit/providers.go b/genkit/providers.go
@@ -123,12 +123,22 @@ func NewOllamaProvider(ctx context.Context, model, serverAddress string, maxToke
 	}
 	p := &ollamaPlugin.Ollama{ServerAddress: serverAddress, Timeout: 300} // 5 min — model loading can be slow
 	g := initGenkitWithPlugin(ctx, gk.WithPlugins(p))
+	// Build Ollama-native config. Disable thinking by default — it adds
+	// latency and leaks reasoning as text in complex multi-tool prompts.
+	// Users who want thinking can enable it per-session later.
+	ollamaCfg := &ollamaPlugin.GenerateContentConfig{
+		Think: ollamaPlugin.ThinkEnabled(false),
+	}
+	if maxTokens > 0 {
+		ollamaCfg.NumPredict = &maxTokens
+	}
+
 	return &genkitProvider{
-		g:             g,
-		modelName:     "ollama/" + model,
-		name:          "ollama",
-		maxTokens:     maxTokens,
-		skipCommonCfg: true, // Ollama rejects GenerationCommonConfig (maxOutputTokens)
+		g:            g,
+		modelName:    "ollama/" + model,
+		name:         "ollama",
+		maxTokens:    maxTokens,
+		customConfig: ollamaCfg,
 		authInfo: provider.AuthModeInfo{
 			Mode:        "none",
 			DisplayName: "Ollama (local)",