diff --git a/backend/src/utils/llm/providers.ts b/backend/src/utils/llm/providers.ts index f14039a..f39bc37 100644 --- a/backend/src/utils/llm/providers.ts +++ b/backend/src/utils/llm/providers.ts @@ -351,7 +351,7 @@ function maskSecret(s: string | undefined): string { return s.slice(0, 6) + "…" + s.slice(-4); } -const FIXED_TEMPERATURE_MODELS = new Set(["gpt-5-nano"]); +const FIXED_TEMPERATURE_MODELS = new Set(["gpt-5-nano", "claude-opus-4-7", "claude-opus-4-8"]); function clampTemperature(model: string, requested: number): number { for (const m of FIXED_TEMPERATURE_MODELS) { @@ -418,15 +418,17 @@ function logResponse( function buildCompletionConfig( config: ProviderConfig, opts: InvokeOptions & { abortSignal?: AbortSignal }, - temperature: number, + temperature: number | undefined, stream: boolean, ): any { const params: any = { model: config.model, messages: opts.messages, - temperature, stream, }; + if (temperature !== undefined) { + params.temperature = temperature; + } if (stream) { params.stream_options = { include_usage: true }; @@ -597,7 +599,7 @@ export async function invoke_llm(opts: InvokeOptions): Promise { const client = getClient(config, opts); - const tryCompletion = async (temp: number): Promise => { + const tryCompletion = async (temp: number | undefined): Promise => { const params = buildCompletionConfig(config, opts, temp, false); const response = (await client.chat.completions.create( params, @@ -624,15 +626,18 @@ export async function invoke_llm(opts: InvokeOptions): Promise { return await tryCompletion(requestedTemp); } catch (err: any) { const isTempUnsupported = - err?.code === "unsupported_value" && + (err?.code === "unsupported_value" && err?.param === "temperature" && - requestedTemp !== 1; + requestedTemp !== 1) || + (err?.status === 400 && + err?.message?.includes("temperature") && + requestedTemp !== 1); if (isTempUnsupported) { console.warn( `[inference] Model ${config.model} does not support temperature=${requestedTemp}, retrying with temperature=1`, ); - return await tryCompletion(1); + return await tryCompletion(undefined); } const elapsed = Date.now() - start; @@ -1231,7 +1236,7 @@ export async function invoke_llm_streaming( logRequest(config, opts, true); - const runStream = async (temp: number): Promise => { + const runStream = async (temp: number | undefined): Promise => { const params = buildCompletionConfig(config, opts, temp, true); if ( @@ -1363,13 +1368,16 @@ export async function invoke_llm_streaming( return await runStream(requestedTemp); } catch (err: any) { const isTempUnsupported = - err?.code === "unsupported_value" && + (err?.code === "unsupported_value" && err?.param === "temperature" && - requestedTemp !== 1; + requestedTemp !== 1) || + (err?.status === 400 && + err?.message?.includes("temperature") && + requestedTemp !== 1); if (isTempUnsupported) { console.warn(`[inference] Retrying stream with temperature=1`); - return await runStream(1); + return await runStream(undefined); } const isToolsUnsupported = diff --git a/backend/src/utils/modelRegistryStore.ts b/backend/src/utils/modelRegistryStore.ts index 6458efb..5ba9435 100644 --- a/backend/src/utils/modelRegistryStore.ts +++ b/backend/src/utils/modelRegistryStore.ts @@ -302,7 +302,8 @@ function writeProfileRegistry(registry: ModelRegistry): void { const profilePath = getModelRegistryPath(); fs.mkdirSync(path.dirname(profilePath), { recursive: true }); fs.writeFileSync(`${profilePath}.tmp`, `${JSON.stringify(registry, null, 2)}\n`, "utf-8"); - fs.renameSync(`${profilePath}.tmp`, profilePath); + fs.copyFileSync(`${profilePath}.tmp`, profilePath); + fs.unlinkSync(`${profilePath}.tmp`); } function registryFromEnvJson(env: Record): ModelRegistry {