Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions backend/src/utils/llm/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ function maskSecret(s: string | undefined): string {
return s.slice(0, 6) + "…" + s.slice(-4);
}

const FIXED_TEMPERATURE_MODELS = new Set(["gpt-5-nano"]);
const FIXED_TEMPERATURE_MODELS = new Set(["gpt-5-nano", "claude-opus-4-7", "claude-opus-4-8"]);

function clampTemperature(model: string, requested: number): number {
for (const m of FIXED_TEMPERATURE_MODELS) {
Expand Down Expand Up @@ -418,15 +418,17 @@ function logResponse(
function buildCompletionConfig(
config: ProviderConfig,
opts: InvokeOptions & { abortSignal?: AbortSignal },
temperature: number,
temperature: number | undefined,
stream: boolean,
): any {
const params: any = {
model: config.model,
messages: opts.messages,
temperature,
stream,
};
if (temperature !== undefined) {
params.temperature = temperature;
}

if (stream) {
params.stream_options = { include_usage: true };
Expand Down Expand Up @@ -597,7 +599,7 @@ export async function invoke_llm(opts: InvokeOptions): Promise<InvokeResult> {

const client = getClient(config, opts);

const tryCompletion = async (temp: number): Promise<InvokeResult> => {
const tryCompletion = async (temp: number | undefined): Promise<InvokeResult> => {
const params = buildCompletionConfig(config, opts, temp, false);
const response = (await client.chat.completions.create(
params,
Expand All @@ -624,15 +626,18 @@ export async function invoke_llm(opts: InvokeOptions): Promise<InvokeResult> {
return await tryCompletion(requestedTemp);
} catch (err: any) {
const isTempUnsupported =
err?.code === "unsupported_value" &&
(err?.code === "unsupported_value" &&
err?.param === "temperature" &&
requestedTemp !== 1;
requestedTemp !== 1) ||
(err?.status === 400 &&
err?.message?.includes("temperature") &&
requestedTemp !== 1);

if (isTempUnsupported) {
console.warn(
`[inference] Model ${config.model} does not support temperature=${requestedTemp}, retrying with temperature=1`,
);
return await tryCompletion(1);
return await tryCompletion(undefined);
}

const elapsed = Date.now() - start;
Expand Down Expand Up @@ -1231,7 +1236,7 @@ export async function invoke_llm_streaming(

logRequest(config, opts, true);

const runStream = async (temp: number): Promise<InvokeResult> => {
const runStream = async (temp: number | undefined): Promise<InvokeResult> => {
const params = buildCompletionConfig(config, opts, temp, true);

if (
Expand Down Expand Up @@ -1363,13 +1368,16 @@ export async function invoke_llm_streaming(
return await runStream(requestedTemp);
} catch (err: any) {
const isTempUnsupported =
err?.code === "unsupported_value" &&
(err?.code === "unsupported_value" &&
err?.param === "temperature" &&
requestedTemp !== 1;
requestedTemp !== 1) ||
(err?.status === 400 &&
err?.message?.includes("temperature") &&
requestedTemp !== 1);

if (isTempUnsupported) {
console.warn(`[inference] Retrying stream with temperature=1`);
return await runStream(1);
return await runStream(undefined);
}

const isToolsUnsupported =
Expand Down
3 changes: 2 additions & 1 deletion backend/src/utils/modelRegistryStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,8 @@ function writeProfileRegistry(registry: ModelRegistry): void {
const profilePath = getModelRegistryPath();
fs.mkdirSync(path.dirname(profilePath), { recursive: true });
fs.writeFileSync(`${profilePath}.tmp`, `${JSON.stringify(registry, null, 2)}\n`, "utf-8");
fs.renameSync(`${profilePath}.tmp`, profilePath);
fs.copyFileSync(`${profilePath}.tmp`, profilePath);
fs.unlinkSync(`${profilePath}.tmp`);
}

function registryFromEnvJson(env: Record<string, string>): ModelRegistry {
Expand Down