Skip to content

Commit fd59276

Browse files
intel352claude
andcommitted
fix: disable Ollama thinking by default, use native config
Replace skipCommonCfg with customConfig that sends Ollama-native GenerateContentConfig{Think: ThinkEnabled(false)}. This: 1. Suppresses thinking/reasoning output that was leaking as text in complex multi-tool prompts (team execution) 2. Reduces latency (no reasoning computation) 3. Uses NumPredict for max tokens (Ollama-native, not maxOutputTokens) Verified: Chat() returns clean content with empty Thinking field. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 99adcdf commit fd59276

File tree

2 files changed

+31
-14
lines changed

2 files changed

+31
-14
lines changed

genkit/adapter.go

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,16 @@ import (
1212

1313
// genkitProvider adapts a Genkit model to provider.Provider.
1414
type genkitProvider struct {
15-
g *gk.Genkit
16-
modelName string // "provider/model" format e.g. "anthropic/claude-sonnet-4-6"
17-
name string
18-
authInfo provider.AuthModeInfo
19-
maxTokens int // 0 means use model default
20-
skipCommonCfg bool // true for providers that reject GenerationCommonConfig (e.g. Ollama)
15+
g *gk.Genkit
16+
modelName string // "provider/model" format e.g. "anthropic/claude-sonnet-4-6"
17+
name string
18+
authInfo provider.AuthModeInfo
19+
maxTokens int // 0 means use model default
20+
21+
// customConfig, when non-nil, is sent via ai.WithConfig instead of
22+
// GenerationCommonConfig. Used for providers with their own config
23+
// schemas (e.g. Ollama's GenerateContentConfig with Think support).
24+
customConfig any
2125

2226
mu sync.Mutex
2327
definedTools map[string]bool // tracks which tool names are registered
@@ -61,10 +65,13 @@ func (p *genkitProvider) resolveToolRefs(tools []provider.ToolDef) []ai.ToolRef
6165
return refs
6266
}
6367

64-
// generationConfig returns a WithConfig option when maxTokens is configured.
65-
// Returns nil for providers that don't support GenerationCommonConfig (e.g. Ollama).
68+
// generationConfig returns a WithConfig option for the provider.
69+
// Uses customConfig if set (e.g. Ollama), otherwise GenerationCommonConfig.
6670
func (p *genkitProvider) generationConfig() ai.GenerateOption {
67-
if p.skipCommonCfg || p.maxTokens <= 0 {
71+
if p.customConfig != nil {
72+
return ai.WithConfig(p.customConfig)
73+
}
74+
if p.maxTokens <= 0 {
6875
return nil
6976
}
7077
return ai.WithConfig(&ai.GenerationCommonConfig{MaxOutputTokens: p.maxTokens})

genkit/providers.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,22 @@ func NewOllamaProvider(ctx context.Context, model, serverAddress string, maxToke
123123
}
124124
p := &ollamaPlugin.Ollama{ServerAddress: serverAddress, Timeout: 300} // 5 min — model loading can be slow
125125
g := initGenkitWithPlugin(ctx, gk.WithPlugins(p))
126+
// Build Ollama-native config. Disable thinking by default — it adds
127+
// latency and leaks reasoning as text in complex multi-tool prompts.
128+
// Users who want thinking can enable it per-session later.
129+
ollamaCfg := &ollamaPlugin.GenerateContentConfig{
130+
Think: ollamaPlugin.ThinkEnabled(false),
131+
}
132+
if maxTokens > 0 {
133+
ollamaCfg.NumPredict = &maxTokens
134+
}
135+
126136
return &genkitProvider{
127-
g: g,
128-
modelName: "ollama/" + model,
129-
name: "ollama",
130-
maxTokens: maxTokens,
131-
skipCommonCfg: true, // Ollama rejects GenerationCommonConfig (maxOutputTokens)
137+
g: g,
138+
modelName: "ollama/" + model,
139+
name: "ollama",
140+
maxTokens: maxTokens,
141+
customConfig: ollamaCfg,
132142
authInfo: provider.AuthModeInfo{
133143
Mode: "none",
134144
DisplayName: "Ollama (local)",

0 commit comments

Comments
 (0)