From 89c5042ef92007d3a95347dc7e26124871e59c75 Mon Sep 17 00:00:00 2001 From: agent932 Date: Fri, 12 Jun 2026 17:51:56 -0400 Subject: [PATCH] fix: properly handle temperature deprecation for models that don't support it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three related fixes in providers.ts: 1. Add claude-opus-4-7 and claude-opus-4-8 to FIXED_TEMPERATURE_MODELS so temperature is clamped before the request is sent. 2. Broaden isTempUnsupported catch condition to also match Anthropic's error format (status=400, invalid_request_error) which does not set err.code='unsupported_value' — causing the retry to never fire. 3. Fix buildCompletionConfig to accept temperature as number | undefined and omit the parameter entirely when undefined, rather than retrying with temperature=1 which is also rejected by these models. Update tryCompletion and runStream signatures accordingly. Fixes inference failures when using claude-opus-4-7 or claude-opus-4-8. --- backend/src/utils/llm/providers.ts | 30 ++++++++++++++++--------- backend/src/utils/modelRegistryStore.ts | 3 ++- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/backend/src/utils/llm/providers.ts b/backend/src/utils/llm/providers.ts index f14039a..f39bc37 100644 --- a/backend/src/utils/llm/providers.ts +++ b/backend/src/utils/llm/providers.ts @@ -351,7 +351,7 @@ function maskSecret(s: string | undefined): string { return s.slice(0, 6) + "…" + s.slice(-4); } -const FIXED_TEMPERATURE_MODELS = new Set(["gpt-5-nano"]); +const FIXED_TEMPERATURE_MODELS = new Set(["gpt-5-nano", "claude-opus-4-7", "claude-opus-4-8"]); function clampTemperature(model: string, requested: number): number { for (const m of FIXED_TEMPERATURE_MODELS) { @@ -418,15 +418,17 @@ function logResponse( function buildCompletionConfig( config: ProviderConfig, opts: InvokeOptions & { abortSignal?: AbortSignal }, - temperature: number, + temperature: number | undefined, stream: boolean, ): any { const params: any = { model: config.model, messages: opts.messages, - temperature, stream, }; + if (temperature !== undefined) { + params.temperature = temperature; + } if (stream) { params.stream_options = { include_usage: true }; @@ -597,7 +599,7 @@ export async function invoke_llm(opts: InvokeOptions): Promise { const client = getClient(config, opts); - const tryCompletion = async (temp: number): Promise => { + const tryCompletion = async (temp: number | undefined): Promise => { const params = buildCompletionConfig(config, opts, temp, false); const response = (await client.chat.completions.create( params, @@ -624,15 +626,18 @@ export async function invoke_llm(opts: InvokeOptions): Promise { return await tryCompletion(requestedTemp); } catch (err: any) { const isTempUnsupported = - err?.code === "unsupported_value" && + (err?.code === "unsupported_value" && err?.param === "temperature" && - requestedTemp !== 1; + requestedTemp !== 1) || + (err?.status === 400 && + err?.message?.includes("temperature") && + requestedTemp !== 1); if (isTempUnsupported) { console.warn( `[inference] Model ${config.model} does not support temperature=${requestedTemp}, retrying with temperature=1`, ); - return await tryCompletion(1); + return await tryCompletion(undefined); } const elapsed = Date.now() - start; @@ -1231,7 +1236,7 @@ export async function invoke_llm_streaming( logRequest(config, opts, true); - const runStream = async (temp: number): Promise => { + const runStream = async (temp: number | undefined): Promise => { const params = buildCompletionConfig(config, opts, temp, true); if ( @@ -1363,13 +1368,16 @@ export async function invoke_llm_streaming( return await runStream(requestedTemp); } catch (err: any) { const isTempUnsupported = - err?.code === "unsupported_value" && + (err?.code === "unsupported_value" && err?.param === "temperature" && - requestedTemp !== 1; + requestedTemp !== 1) || + (err?.status === 400 && + err?.message?.includes("temperature") && + requestedTemp !== 1); if (isTempUnsupported) { console.warn(`[inference] Retrying stream with temperature=1`); - return await runStream(1); + return await runStream(undefined); } const isToolsUnsupported = diff --git a/backend/src/utils/modelRegistryStore.ts b/backend/src/utils/modelRegistryStore.ts index 6458efb..5ba9435 100644 --- a/backend/src/utils/modelRegistryStore.ts +++ b/backend/src/utils/modelRegistryStore.ts @@ -302,7 +302,8 @@ function writeProfileRegistry(registry: ModelRegistry): void { const profilePath = getModelRegistryPath(); fs.mkdirSync(path.dirname(profilePath), { recursive: true }); fs.writeFileSync(`${profilePath}.tmp`, `${JSON.stringify(registry, null, 2)}\n`, "utf-8"); - fs.renameSync(`${profilePath}.tmp`, profilePath); + fs.copyFileSync(`${profilePath}.tmp`, profilePath); + fs.unlinkSync(`${profilePath}.tmp`); } function registryFromEnvJson(env: Record): ModelRegistry {