diff --git a/.changeset/fix-openrouter-callmodel-metrics.md b/.changeset/fix-openrouter-callmodel-metrics.md new file mode 100644 index 000000000..0cf6abd79 --- /dev/null +++ b/.changeset/fix-openrouter-callmodel-metrics.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix(openrouter): avoid double-counting callModel token and cost metrics diff --git a/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json b/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json index ddf193a91..4533e3b10 100644 --- a/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json +++ b/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json @@ -282,17 +282,7 @@ "model": "gemini-2.5-flash-lite", "provider": "google" }, - "metric_keys": [ - "completion_reasoning_tokens", - "completion_tokens", - "cost", - "cost_upstream_inference_cost", - "cost_upstream_inference_input_cost", - "cost_upstream_inference_output_cost", - "prompt_cached_tokens", - "prompt_tokens", - "tokens" - ], + "metric_keys": [], "name": "openrouter.callModel", "root_span_id": "", "span_id": "", diff --git a/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0911.span-events.json b/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0911.span-events.json index b3a7cc45a..35bccc17a 100644 --- a/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0911.span-events.json +++ b/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0911.span-events.json @@ -238,17 +238,7 @@ "model": "gemini-2.5-flash-lite", "provider": "google" }, - "metric_keys": [ - "completion_reasoning_tokens", - "completion_tokens", - "cost", - "cost_upstream_inference_cost", - "cost_upstream_inference_input_cost", - "cost_upstream_inference_output_cost", - "prompt_cached_tokens", - "prompt_tokens", - "tokens" - ], + "metric_keys": [], "name": "openrouter.callModel", "root_span_id": "", "span_id": "", diff --git a/js/src/instrumentation/plugins/openrouter-agent-plugin.test.ts b/js/src/instrumentation/plugins/openrouter-agent-plugin.test.ts index 137232d6a..b531ca4ab 100644 --- a/js/src/instrumentation/plugins/openrouter-agent-plugin.test.ts +++ b/js/src/instrumentation/plugins/openrouter-agent-plugin.test.ts @@ -430,6 +430,7 @@ describe("OpenRouter Agent Plugin", () => { inputTokens: 10, outputTokens: 4, totalTokens: 14, + cost: 0.01, }, }; const finalResponse = { @@ -447,6 +448,7 @@ describe("OpenRouter Agent Plugin", () => { inputTokens: 12, outputTokens: 3, totalTokens: 15, + cost: 0.02, }, }; const request = { @@ -544,11 +546,10 @@ describe("OpenRouter Agent Plugin", () => { turn_count: 2, }); expect(callModelSpan?.output).toMatchObject(finalResponse.output); - expect(callModelSpan?.metrics).toMatchObject({ - prompt_tokens: 22, - completion_tokens: 7, - tokens: 29, - }); + expect(callModelSpan?.metrics?.prompt_tokens).toBeUndefined(); + expect(callModelSpan?.metrics?.completion_tokens).toBeUndefined(); + expect(callModelSpan?.metrics?.tokens).toBeUndefined(); + expect(callModelSpan?.metrics?.cost).toBeUndefined(); expect(turnSpans).toHaveLength(2); expect(turnSpans[0]?.metadata).toMatchObject({ @@ -559,6 +560,12 @@ describe("OpenRouter Agent Plugin", () => { step: 1, step_type: "initial", }); + expect(turnSpans[0]?.metrics).toMatchObject({ + prompt_tokens: 10, + completion_tokens: 4, + tokens: 14, + cost: 0.01, + }); expect(turnSpans[1]?.metadata).toMatchObject({ provider: TEST_PROVIDER, model: TEST_MODEL, @@ -567,6 +574,24 @@ describe("OpenRouter Agent Plugin", () => { step: 2, step_type: "continue", }); + expect(turnSpans[1]?.metrics).toMatchObject({ + prompt_tokens: 12, + completion_tokens: 3, + tokens: 15, + cost: 0.02, + }); + expect( + spans.reduce( + (sum: number, span: any) => sum + (span.metrics?.prompt_tokens ?? 0), + 0, + ), + ).toBe(22); + expect( + spans.reduce( + (sum: number, span: any) => sum + (span.metrics?.cost ?? 0), + 0, + ), + ).toBeCloseTo(0.03); expect(toolSpan?.span_attributes).toMatchObject({ name: "lookup_weather", diff --git a/js/src/instrumentation/plugins/openrouter-agent-plugin.ts b/js/src/instrumentation/plugins/openrouter-agent-plugin.ts index 414d110a5..f95336371 100644 --- a/js/src/instrumentation/plugins/openrouter-agent-plugin.ts +++ b/js/src/instrumentation/plugins/openrouter-agent-plugin.ts @@ -920,10 +920,16 @@ function patchOpenRouterCallModelResult(args: { finalResponse, rounds.length + 1, ); + const metrics = + tracedTurnCount === 0 + ? aggregateOpenRouterCallModelMetrics(rounds, finalResponse) + : undefined; span.log({ output: extractOpenRouterResponseOutput(finalResponse, fallbackOutput), ...(metadata ? { metadata } : {}), - metrics: aggregateOpenRouterCallModelMetrics(rounds, finalResponse), + // Child turn spans already carry per-response usage. Duplicating those + // metrics on the parent makes trace-level token/cost totals double count. + ...(metrics && Object.keys(metrics).length > 0 ? { metrics } : {}), }); span.end(); return; diff --git a/js/src/instrumentation/plugins/openrouter-plugin.test.ts b/js/src/instrumentation/plugins/openrouter-plugin.test.ts index bc67af92a..27933d9cc 100644 --- a/js/src/instrumentation/plugins/openrouter-plugin.test.ts +++ b/js/src/instrumentation/plugins/openrouter-plugin.test.ts @@ -432,6 +432,7 @@ describe("OpenRouter Plugin", () => { inputTokens: 10, outputTokens: 4, totalTokens: 14, + cost: 0.01, }, }; const finalResponse = { @@ -449,6 +450,7 @@ describe("OpenRouter Plugin", () => { inputTokens: 12, outputTokens: 3, totalTokens: 15, + cost: 0.02, }, }; const request = { @@ -543,11 +545,10 @@ describe("OpenRouter Plugin", () => { turn_count: 2, }); expect(callModelSpan?.output).toMatchObject(finalResponse.output); - expect(callModelSpan?.metrics).toMatchObject({ - prompt_tokens: 22, - completion_tokens: 7, - tokens: 29, - }); + expect(callModelSpan?.metrics?.prompt_tokens).toBeUndefined(); + expect(callModelSpan?.metrics?.completion_tokens).toBeUndefined(); + expect(callModelSpan?.metrics?.tokens).toBeUndefined(); + expect(callModelSpan?.metrics?.cost).toBeUndefined(); expect(turnSpans).toHaveLength(2); expect(turnSpans[0]?.metadata).toMatchObject({ @@ -558,6 +559,12 @@ describe("OpenRouter Plugin", () => { step: 1, step_type: "initial", }); + expect(turnSpans[0]?.metrics).toMatchObject({ + prompt_tokens: 10, + completion_tokens: 4, + tokens: 14, + cost: 0.01, + }); expect(turnSpans[1]?.metadata).toMatchObject({ provider: TEST_PROVIDER, model: TEST_MODEL, @@ -566,6 +573,24 @@ describe("OpenRouter Plugin", () => { step: 2, step_type: "continue", }); + expect(turnSpans[1]?.metrics).toMatchObject({ + prompt_tokens: 12, + completion_tokens: 3, + tokens: 15, + cost: 0.02, + }); + expect( + spans.reduce( + (sum: number, span: any) => sum + (span.metrics?.prompt_tokens ?? 0), + 0, + ), + ).toBe(22); + expect( + spans.reduce( + (sum: number, span: any) => sum + (span.metrics?.cost ?? 0), + 0, + ), + ).toBeCloseTo(0.03); expect(toolSpan?.span_attributes).toMatchObject({ name: "lookup_weather", diff --git a/js/src/instrumentation/plugins/openrouter-plugin.ts b/js/src/instrumentation/plugins/openrouter-plugin.ts index 2edf46d3c..ec022b43f 100644 --- a/js/src/instrumentation/plugins/openrouter-plugin.ts +++ b/js/src/instrumentation/plugins/openrouter-plugin.ts @@ -1102,10 +1102,16 @@ function patchOpenRouterCallModelResult(args: { finalResponse, rounds.length + 1, ); + const metrics = + tracedTurnCount === 0 + ? aggregateOpenRouterCallModelMetrics(rounds, finalResponse) + : undefined; span.log({ output: extractOpenRouterResponseOutput(finalResponse, fallbackOutput), ...(metadata ? { metadata } : {}), - metrics: aggregateOpenRouterCallModelMetrics(rounds, finalResponse), + // Child turn spans already carry per-response usage. Duplicating those + // metrics on the parent makes trace-level token/cost totals double count. + ...(metrics && Object.keys(metrics).length > 0 ? { metrics } : {}), }); span.end(); return;