Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-openrouter-callmodel-metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"braintrust": patch
---

fix(openrouter): avoid double-counting callModel token and cost metrics
Original file line number Diff line number Diff line change
Expand Up @@ -282,17 +282,7 @@
"model": "gemini-2.5-flash-lite",
"provider": "google"
},
"metric_keys": [
"completion_reasoning_tokens",
"completion_tokens",
"cost",
"cost_upstream_inference_cost",
"cost_upstream_inference_input_cost",
"cost_upstream_inference_output_cost",
"prompt_cached_tokens",
"prompt_tokens",
"tokens"
],
"metric_keys": [],
"name": "openrouter.callModel",
"root_span_id": "<span:1>",
"span_id": "<span:15>",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,17 +238,7 @@
"model": "gemini-2.5-flash-lite",
"provider": "google"
},
"metric_keys": [
"completion_reasoning_tokens",
"completion_tokens",
"cost",
"cost_upstream_inference_cost",
"cost_upstream_inference_input_cost",
"cost_upstream_inference_output_cost",
"prompt_cached_tokens",
"prompt_tokens",
"tokens"
],
"metric_keys": [],
"name": "openrouter.callModel",
"root_span_id": "<span:1>",
"span_id": "<span:13>",
Expand Down
35 changes: 30 additions & 5 deletions js/src/instrumentation/plugins/openrouter-agent-plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ describe("OpenRouter Agent Plugin", () => {
inputTokens: 10,
outputTokens: 4,
totalTokens: 14,
cost: 0.01,
},
};
const finalResponse = {
Expand All @@ -447,6 +448,7 @@ describe("OpenRouter Agent Plugin", () => {
inputTokens: 12,
outputTokens: 3,
totalTokens: 15,
cost: 0.02,
},
};
const request = {
Expand Down Expand Up @@ -544,11 +546,10 @@ describe("OpenRouter Agent Plugin", () => {
turn_count: 2,
});
expect(callModelSpan?.output).toMatchObject(finalResponse.output);
expect(callModelSpan?.metrics).toMatchObject({
prompt_tokens: 22,
completion_tokens: 7,
tokens: 29,
});
expect(callModelSpan?.metrics?.prompt_tokens).toBeUndefined();
expect(callModelSpan?.metrics?.completion_tokens).toBeUndefined();
expect(callModelSpan?.metrics?.tokens).toBeUndefined();
expect(callModelSpan?.metrics?.cost).toBeUndefined();

expect(turnSpans).toHaveLength(2);
expect(turnSpans[0]?.metadata).toMatchObject({
Expand All @@ -559,6 +560,12 @@ describe("OpenRouter Agent Plugin", () => {
step: 1,
step_type: "initial",
});
expect(turnSpans[0]?.metrics).toMatchObject({
prompt_tokens: 10,
completion_tokens: 4,
tokens: 14,
cost: 0.01,
});
expect(turnSpans[1]?.metadata).toMatchObject({
provider: TEST_PROVIDER,
model: TEST_MODEL,
Expand All @@ -567,6 +574,24 @@ describe("OpenRouter Agent Plugin", () => {
step: 2,
step_type: "continue",
});
expect(turnSpans[1]?.metrics).toMatchObject({
prompt_tokens: 12,
completion_tokens: 3,
tokens: 15,
cost: 0.02,
});
expect(
spans.reduce(
(sum: number, span: any) => sum + (span.metrics?.prompt_tokens ?? 0),
0,
),
).toBe(22);
expect(
spans.reduce(
(sum: number, span: any) => sum + (span.metrics?.cost ?? 0),
0,
),
).toBeCloseTo(0.03);

expect(toolSpan?.span_attributes).toMatchObject({
name: "lookup_weather",
Expand Down
8 changes: 7 additions & 1 deletion js/src/instrumentation/plugins/openrouter-agent-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -920,10 +920,16 @@ function patchOpenRouterCallModelResult(args: {
finalResponse,
rounds.length + 1,
);
const metrics =
tracedTurnCount === 0
? aggregateOpenRouterCallModelMetrics(rounds, finalResponse)
: undefined;
span.log({
output: extractOpenRouterResponseOutput(finalResponse, fallbackOutput),
...(metadata ? { metadata } : {}),
metrics: aggregateOpenRouterCallModelMetrics(rounds, finalResponse),
// Child turn spans already carry per-response usage. Duplicating those
// metrics on the parent makes trace-level token/cost totals double count.
...(metrics && Object.keys(metrics).length > 0 ? { metrics } : {}),
});
span.end();
return;
Expand Down
35 changes: 30 additions & 5 deletions js/src/instrumentation/plugins/openrouter-plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ describe("OpenRouter Plugin", () => {
inputTokens: 10,
outputTokens: 4,
totalTokens: 14,
cost: 0.01,
},
};
const finalResponse = {
Expand All @@ -449,6 +450,7 @@ describe("OpenRouter Plugin", () => {
inputTokens: 12,
outputTokens: 3,
totalTokens: 15,
cost: 0.02,
},
};
const request = {
Expand Down Expand Up @@ -543,11 +545,10 @@ describe("OpenRouter Plugin", () => {
turn_count: 2,
});
expect(callModelSpan?.output).toMatchObject(finalResponse.output);
expect(callModelSpan?.metrics).toMatchObject({
prompt_tokens: 22,
completion_tokens: 7,
tokens: 29,
});
expect(callModelSpan?.metrics?.prompt_tokens).toBeUndefined();
expect(callModelSpan?.metrics?.completion_tokens).toBeUndefined();
expect(callModelSpan?.metrics?.tokens).toBeUndefined();
expect(callModelSpan?.metrics?.cost).toBeUndefined();

expect(turnSpans).toHaveLength(2);
expect(turnSpans[0]?.metadata).toMatchObject({
Expand All @@ -558,6 +559,12 @@ describe("OpenRouter Plugin", () => {
step: 1,
step_type: "initial",
});
expect(turnSpans[0]?.metrics).toMatchObject({
prompt_tokens: 10,
completion_tokens: 4,
tokens: 14,
cost: 0.01,
});
expect(turnSpans[1]?.metadata).toMatchObject({
provider: TEST_PROVIDER,
model: TEST_MODEL,
Expand All @@ -566,6 +573,24 @@ describe("OpenRouter Plugin", () => {
step: 2,
step_type: "continue",
});
expect(turnSpans[1]?.metrics).toMatchObject({
prompt_tokens: 12,
completion_tokens: 3,
tokens: 15,
cost: 0.02,
});
expect(
spans.reduce(
(sum: number, span: any) => sum + (span.metrics?.prompt_tokens ?? 0),
0,
),
).toBe(22);
expect(
spans.reduce(
(sum: number, span: any) => sum + (span.metrics?.cost ?? 0),
0,
),
).toBeCloseTo(0.03);

expect(toolSpan?.span_attributes).toMatchObject({
name: "lookup_weather",
Expand Down
8 changes: 7 additions & 1 deletion js/src/instrumentation/plugins/openrouter-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1102,10 +1102,16 @@ function patchOpenRouterCallModelResult(args: {
finalResponse,
rounds.length + 1,
);
const metrics =
tracedTurnCount === 0
? aggregateOpenRouterCallModelMetrics(rounds, finalResponse)
: undefined;
Comment on lines +1105 to +1108
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Update e2e snapshots for removed parent metrics

When callModel traces child turns, this branch now omits metrics from the parent span, but the checked-in OpenRouter e2e contract snapshots still expect metric_keys on openrouter.callModel (for example e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0911.span-events.json lists prompt_tokens, tokens, and cost for that span). The openrouter-call-model-operation scenario exercises the tool-turn path, so pnpm run test:e2e will fail snapshot comparison until those snapshots/assertions are updated with this intentional trace contract change.

Useful? React with 👍 / 👎.

span.log({
output: extractOpenRouterResponseOutput(finalResponse, fallbackOutput),
...(metadata ? { metadata } : {}),
metrics: aggregateOpenRouterCallModelMetrics(rounds, finalResponse),
// Child turn spans already carry per-response usage. Duplicating those
// metrics on the parent makes trace-level token/cost totals double count.
...(metrics && Object.keys(metrics).length > 0 ? { metrics } : {}),
});
span.end();
return;
Expand Down
Loading