From e4cfecfba918a4f41a5b7237746d0aeb29679b41 Mon Sep 17 00:00:00 2001 From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com> Date: Tue, 19 May 2026 21:14:34 +1000 Subject: [PATCH 1/3] fix: recognize top-level Step[] envelope in gemini-interactions input Clients that follow the live /v1beta/interactions wire contract send Array at the top level of input (e.g. { type: 'user_input', content: [...] }), but the parser only recognized string, Turn[], and Content[]. Step[] fell through to the Content[] branch, which read text only from top-level blocks and produced an empty user message, so userMessage-based fixtures never matched and requests 404'd. Add an InteractionsStep type, a STEP_TYPES discriminator (no role + known type), and a Step[] branch that maps user_input to user messages, model_output to assistant messages (text and/or function_call tool_calls), and function_result / code_execution_result / url_context_result / google_search_result / google_maps_result / mcp_server_tool_result / file_search_result to tool messages keyed by call_id. Closes #228. --- src/__tests__/gemini-interactions.test.ts | 164 ++++++++++++++++++++++ src/gemini-interactions.ts | 89 +++++++++++- 2 files changed, 250 insertions(+), 3 deletions(-) diff --git a/src/__tests__/gemini-interactions.test.ts b/src/__tests__/gemini-interactions.test.ts index 1bb6a01..76cdba8 100644 --- a/src/__tests__/gemini-interactions.test.ts +++ b/src/__tests__/gemini-interactions.test.ts @@ -540,6 +540,154 @@ describe("geminiInteractionsToCompletionRequest", () => { expect(result.messages).toHaveLength(1); expect(result.messages[0].content).toBe("from-content"); }); + + // ─── Step[] envelope (live API wire contract) ───────────────────────── + + it("converts Step[] user_input step to a user message", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [{ type: "user_input", content: [{ type: "text", text: "hi" }] }], + }); + expect(result.messages).toEqual([{ role: "user", content: "hi" }]); + }); + + it("converts Step[] user_input with multiple text parts (concatenates)", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [ + { + type: "user_input", + content: [ + { type: "text", text: "part one " }, + { type: "text", text: "part two" }, + ], + }, + ], + }); + expect(result.messages).toEqual([{ role: "user", content: "part one part two" }]); + }); + + it("converts Step[] function_result step to a tool message", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [ + { + type: "function_result", + call_id: "call_abc", + name: "get_weather", + result: { temperature: 72 }, + }, + ], + }); + expect(result.messages).toHaveLength(1); + expect(result.messages[0].role).toBe("tool"); + expect(result.messages[0].content).toBe('{"temperature":72}'); + expect(result.messages[0].tool_call_id).toBe("call_abc"); + }); + + it("passes through Step[] function_result with string result", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [{ type: "function_result", call_id: "call_x", result: "ok" }], + }); + expect(result.messages).toEqual([{ role: "tool", content: "ok", tool_call_id: "call_x" }]); + }); + + it("converts Step[] model_output text step to an assistant message", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [ + { type: "user_input", content: [{ type: "text", text: "hi" }] }, + { type: "model_output", content: [{ type: "text", text: "hello" }] }, + ], + }); + expect(result.messages).toEqual([ + { role: "user", content: "hi" }, + { role: "assistant", content: "hello" }, + ]); + }); + + it("converts Step[] model_output with function_call into assistant tool_calls", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [ + { + type: "model_output", + content: [ + { type: "text", text: "Calling tool..." }, + { + type: "function_call", + name: "search", + id: "call_x", + arguments: { query: "test" }, + }, + ], + }, + ], + }); + expect(result.messages).toHaveLength(1); + expect(result.messages[0].role).toBe("assistant"); + expect(result.messages[0].content).toBe("Calling tool..."); + expect(result.messages[0].tool_calls).toHaveLength(1); + expect(result.messages[0].tool_calls![0].function.name).toBe("search"); + expect(result.messages[0].tool_calls![0].id).toBe("call_x"); + }); + + it("converts a multi-step Step[] agent loop in order", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [ + { type: "user_input", content: [{ type: "text", text: "what's the weather?" }] }, + { + type: "model_output", + content: [ + { + type: "function_call", + name: "get_weather", + id: "call_w", + arguments: { city: "NYC" }, + }, + ], + }, + { type: "function_result", call_id: "call_w", result: { temp: 72 } }, + { type: "user_input", content: [{ type: "text", text: "thanks" }] }, + ], + }); + expect(result.messages).toHaveLength(4); + expect(result.messages[0]).toEqual({ role: "user", content: "what's the weather?" }); + expect(result.messages[1].role).toBe("assistant"); + expect(result.messages[1].tool_calls![0].function.name).toBe("get_weather"); + expect(result.messages[2]).toEqual({ + role: "tool", + content: '{"temp":72}', + tool_call_id: "call_w", + }); + expect(result.messages[3]).toEqual({ role: "user", content: "thanks" }); + }); + + it("handles other *_result Step types as tool messages", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [ + { + type: "code_execution_result", + call_id: "call_code", + result: "stdout: 42\n", + }, + ], + }); + expect(result.messages).toEqual([ + { role: "tool", content: "stdout: 42\n", tool_call_id: "call_code" }, + ]); + }); + + it("handles Step[] user_input with empty content", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [{ type: "user_input", content: [] }], + }); + expect(result.messages).toEqual([{ role: "user", content: "" }]); + }); }); // ─── Unit tests: response builders ────────────────────────────────────── @@ -809,6 +957,22 @@ describe("Gemini Interactions — non-streaming", () => { expect(body.error.code).toBe("UNAVAILABLE"); }); + it("matches userMessage fixture when input is Step[] envelope (issue #228)", async () => { + // Reproduces the live wire contract that Google's /v1beta/interactions accepts: + // a top-level Array where each step is { type: "user_input", content: [...] }. + // Pre-fix this fell through to the Content[] branch and produced an empty user + // message, so userMessage-based fixtures never matched. + instance = await createServer([textFixture]); + const res = await post(`${instance.url}/v1beta/interactions`, { + model: "gemini-2.5-flash", + stream: false, + input: [{ type: "user_input", content: [{ type: "text", text: "hello" }] }], + }); + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.outputs).toEqual([{ type: "text", text: "Hi there!" }]); + }); + it("handles sequenceIndex for multi-turn", async () => { instance = await createServer([...allFixtures]); const r1 = await post(`${instance.url}/v1beta/interactions`, { diff --git a/src/gemini-interactions.ts b/src/gemini-interactions.ts index 86d7446..e5e1182 100644 --- a/src/gemini-interactions.ts +++ b/src/gemini-interactions.ts @@ -60,6 +60,38 @@ interface InteractionsTurn { parts?: InteractionsContentBlock[]; } +/** + * Top-level Step envelope accepted by the live Gemini Interactions API. + * The SDK's TypeScript union does not include Step[], but the wire contract + * does — clients following the live API send these at the top level of `input`. + * Discriminated by `type`; no `role` field (distinguishes from Turn[]). + */ +interface InteractionsStep { + type: string; + content?: InteractionsContentBlock[]; + call_id?: string; + id?: string; + name?: string; + result?: unknown; + output?: unknown; + is_error?: boolean; + signature?: string; +} + +/** Step types whose payload is a tool/agent result keyed by call_id. */ +const STEP_RESULT_TYPES = new Set([ + "function_result", + "code_execution_result", + "url_context_result", + "google_search_result", + "google_maps_result", + "mcp_server_tool_result", + "file_search_result", +]); + +/** All recognized top-level Step types (used as the Step[] discriminator). */ +const STEP_TYPES = new Set(["user_input", "model_output", ...STEP_RESULT_TYPES]); + interface InteractionsFunctionTool { type: "function"; name: string; @@ -69,7 +101,7 @@ interface InteractionsFunctionTool { interface InteractionsRequest { model?: string; - input?: string | InteractionsTurn[] | InteractionsContentBlock[]; + input?: string | InteractionsTurn[] | InteractionsStep[] | InteractionsContentBlock[]; system_instruction?: string; tools?: InteractionsFunctionTool[]; generation_config?: { @@ -101,8 +133,18 @@ export function geminiInteractionsToCompletionRequest( // Simple string input → single user message messages.push({ role: "user", content: req.input }); } else if (Array.isArray(req.input)) { - // Could be Turn[] or Content[] - const firstItem = req.input[0]; + // Could be Turn[], Step[], or Content[] + const firstItem = req.input[0] as + | InteractionsTurn + | InteractionsStep + | InteractionsContentBlock + | undefined; + const isStepArray = + !!firstItem && + !("role" in firstItem) && + typeof firstItem.type === "string" && + STEP_TYPES.has(firstItem.type); + if (firstItem && "role" in firstItem) { // Turn[] format for (const turn of req.input as InteractionsTurn[]) { @@ -164,6 +206,47 @@ export function geminiInteractionsToCompletionRequest( } } } + } else if (isStepArray) { + // Step[] format — the wire contract Google's /v1beta/interactions accepts. + for (const step of req.input as InteractionsStep[]) { + if (step.type === "user_input") { + const text = (step.content ?? []) + .filter((p) => p.type === "text") + .map((p) => p.text ?? "") + .join(""); + messages.push({ role: "user", content: text }); + } else if (step.type === "model_output") { + const blocks = step.content ?? []; + const funcCallParts = blocks.filter((p) => p.type === "function_call"); + const textParts = blocks.filter((p) => p.type === "text"); + const textContent = textParts.map((p) => p.text ?? "").join(""); + + if (funcCallParts.length > 0) { + messages.push({ + role: "assistant", + content: textContent || null, + tool_calls: funcCallParts.map((p) => ({ + id: p.id ?? p.call_id ?? generateToolCallId(), + type: "function" as const, + function: { + name: p.name ?? "", + arguments: JSON.stringify(p.arguments ?? {}), + }, + })), + }); + } else { + messages.push({ role: "assistant", content: textContent }); + } + } else if (STEP_RESULT_TYPES.has(step.type)) { + const resultValue = step.result ?? step.output; + messages.push({ + role: "tool", + content: + typeof resultValue === "string" ? resultValue : JSON.stringify(resultValue ?? ""), + tool_call_id: step.call_id ?? step.id ?? "", + }); + } + } } else { // Content[] format — single user message with content blocks const textParts = (req.input as InteractionsContentBlock[]).filter( From 16f9a7876c28f21ba4a9651a20b36012226a9cef Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 19 May 2026 05:53:50 -0700 Subject: [PATCH 2/3] test: add Step[] coverage for drift, parameterized result types, and empty model_output Sync drift-collector PROVIDER_MAP with Gemini Interactions entry, add it.each for all 5 remaining STEP_RESULT_TYPES, test model_output with empty/undefined content, and add Step[] input variant to drift test suite. --- src/__tests__/drift-collector.test.ts | 10 +++ .../drift/gemini-interactions.drift.ts | 49 +++++++++++- src/__tests__/drift/providers.ts | 74 +++++++++++++++++++ src/__tests__/gemini-interactions.test.ts | 39 ++++++++++ 4 files changed, 171 insertions(+), 1 deletion(-) diff --git a/src/__tests__/drift-collector.test.ts b/src/__tests__/drift-collector.test.ts index f5f6036..0239582 100644 --- a/src/__tests__/drift-collector.test.ts +++ b/src/__tests__/drift-collector.test.ts @@ -131,6 +131,16 @@ const PROVIDER_MAP: Record = { typesFile: null, sdkShapesFile: "src/__tests__/drift/sdk-shapes.ts", }, + "Gemini Interactions": { + builderFile: "src/gemini-interactions.ts", + builderFunctions: [ + "buildInteractionsTextResponse", + "buildInteractionsToolCallResponse", + "buildInteractionsTextSSEEvents", + "buildInteractionsToolCallSSEEvents", + ], + typesFile: null, + }, }; const SDK_SHAPES_FILE = "src/__tests__/drift/sdk-shapes.ts"; diff --git a/src/__tests__/drift/gemini-interactions.drift.ts b/src/__tests__/drift/gemini-interactions.drift.ts index 17b2a5b..f29ab01 100644 --- a/src/__tests__/drift/gemini-interactions.drift.ts +++ b/src/__tests__/drift/gemini-interactions.drift.ts @@ -16,7 +16,11 @@ import { geminiInteractionsStreamEventShapes, geminiInteractionsToolCallStreamEventShapes, } from "./sdk-shapes.js"; -import { geminiInteractionsNonStreaming, geminiInteractionsStreaming } from "./providers.js"; +import { + geminiInteractionsNonStreaming, + geminiInteractionsNonStreamingSteps, + geminiInteractionsStreaming, +} from "./providers.js"; import { httpPost, parseInteractionsSSE, startDriftServer, stopDriftServer } from "./helpers.js"; // --------------------------------------------------------------------------- @@ -81,6 +85,49 @@ describe.skipIf(!GOOGLE_API_KEY)("Gemini Interactions API drift", () => { ).toEqual([]); }); + it("non-streaming text shape matches (Step[] input)", async () => { + const sdkShape = geminiInteractionsResponseShape(); + + let realRes; + try { + realRes = await geminiInteractionsNonStreamingSteps(config, "Say hello"); + } catch (err) { + console.warn( + "Gemini Interactions API unavailable:", + err instanceof Error ? err.message : String(err), + ); + return; + } + + if ( + !realRes.body || + (typeof realRes.body === "object" && Object.keys(realRes.body).length === 0) + ) { + console.warn("Gemini Interactions non-streaming API returned empty body — skipping"); + return; + } + + const mockRes = await httpPost(`${instance.url}/v1beta/interactions`, { + model: "gemini-2.5-flash", + input: [{ type: "user_input", content: [{ type: "text", text: "Say hello" }] }], + stream: false, + }); + + const realShape = extractShape(realRes.body); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport( + "Gemini Interactions (non-streaming text, Step[] input)", + diffs, + ); + + expect( + diffs.filter((d) => d.severity === "critical"), + report, + ).toEqual([]); + }); + it("streaming text event sequence and shapes match", async () => { const sdkEvents = geminiInteractionsStreamEventShapes(); diff --git a/src/__tests__/drift/providers.ts b/src/__tests__/drift/providers.ts index cd43692..90d3738 100644 --- a/src/__tests__/drift/providers.ts +++ b/src/__tests__/drift/providers.ts @@ -452,6 +452,80 @@ export async function geminiInteractionsStreaming( }; } +export async function geminiInteractionsNonStreamingSteps( + config: ProviderConfig, + input: string, + tools?: object[], +): Promise { + const body: Record = { + model: "gemini-2.5-flash", + input: [{ type: "user_input", content: [{ type: "text", text: input }] }], + stream: false, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry( + `https://generativelanguage.googleapis.com/v1beta/interactions`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-goog-api-key": config.apiKey, + }, + body: JSON.stringify(body), + }, + ); + + const raw = await res.text(); + return { + status: res.status, + body: parseJsonResponse(raw, res.status, "Gemini Interactions"), + raw, + }; +} + +export async function geminiInteractionsStreamingSteps( + config: ProviderConfig, + input: string, + tools?: object[], +): Promise { + const body: Record = { + model: "gemini-2.5-flash", + input: [{ type: "user_input", content: [{ type: "text", text: input }] }], + stream: true, + }; + if (tools) body.tools = tools; + + const res = await fetchWithRetry( + `https://generativelanguage.googleapis.com/v1beta/interactions`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-goog-api-key": config.apiKey, + }, + body: JSON.stringify(body), + }, + ); + + const raw = await res.text(); + assertOk(raw, res.status, "Gemini Interactions streaming"); + // Interactions uses data-only SSE (data: {...}\n\n) with event_type inside the JSON + const parsed = parseDataOnlySSE(raw); + const rawEvents = parsed.map((p) => { + const data = p.data as Record; + return { + type: (data.event_type as string) ?? "unknown", + data: data, + }; + }); + return { + status: res.status, + events: toSSEEventShapes(rawEvents), + rawEvents, + }; +} + // --------------------------------------------------------------------------- // OpenAI Embeddings // --------------------------------------------------------------------------- diff --git a/src/__tests__/gemini-interactions.test.ts b/src/__tests__/gemini-interactions.test.ts index 76cdba8..66d604b 100644 --- a/src/__tests__/gemini-interactions.test.ts +++ b/src/__tests__/gemini-interactions.test.ts @@ -681,6 +681,29 @@ describe("geminiInteractionsToCompletionRequest", () => { ]); }); + it.each([ + "url_context_result", + "google_search_result", + "google_maps_result", + "mcp_server_tool_result", + "file_search_result", + ])("handles Step[] %s as a tool message", (stepType) => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [ + { + type: stepType, + call_id: `call_${stepType}`, + result: { data: "test" }, + }, + ], + }); + expect(result.messages).toHaveLength(1); + expect(result.messages[0].role).toBe("tool"); + expect(result.messages[0].content).toBe('{"data":"test"}'); + expect(result.messages[0].tool_call_id).toBe(`call_${stepType}`); + }); + it("handles Step[] user_input with empty content", () => { const result = geminiInteractionsToCompletionRequest({ model: "gemini-2.5-flash", @@ -688,6 +711,22 @@ describe("geminiInteractionsToCompletionRequest", () => { }); expect(result.messages).toEqual([{ role: "user", content: "" }]); }); + + it("handles Step[] model_output with undefined content", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [{ type: "model_output" }], + }); + expect(result.messages).toEqual([{ role: "assistant", content: "" }]); + }); + + it("handles Step[] model_output with empty content array", () => { + const result = geminiInteractionsToCompletionRequest({ + model: "gemini-2.5-flash", + input: [{ type: "model_output", content: [] }], + }); + expect(result.messages).toEqual([{ role: "assistant", content: "" }]); + }); }); // ─── Unit tests: response builders ────────────────────────────────────── From c008a0bd1ac9186ac882de5901e4854c10376c2c Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 19 May 2026 06:00:30 -0700 Subject: [PATCH 3/3] test: add missing content+toolCalls builders to Gemini Interactions PROVIDER_MAP Both drift-collector.test.ts and drift-report-collector.ts were listing 4 of 6 builder functions. Added buildInteractionsContentWithToolCallsResponse and buildInteractionsContentWithToolCallsSSEEvents. --- scripts/drift-report-collector.ts | 2 ++ src/__tests__/drift-collector.test.ts | 2 ++ 2 files changed, 4 insertions(+) diff --git a/scripts/drift-report-collector.ts b/scripts/drift-report-collector.ts index 2724f92..f6614ed 100644 --- a/scripts/drift-report-collector.ts +++ b/scripts/drift-report-collector.ts @@ -137,8 +137,10 @@ const PROVIDER_MAP: Record = { builderFunctions: [ "buildInteractionsTextResponse", "buildInteractionsToolCallResponse", + "buildInteractionsContentWithToolCallsResponse", "buildInteractionsTextSSEEvents", "buildInteractionsToolCallSSEEvents", + "buildInteractionsContentWithToolCallsSSEEvents", ], typesFile: null, }, diff --git a/src/__tests__/drift-collector.test.ts b/src/__tests__/drift-collector.test.ts index 0239582..d749934 100644 --- a/src/__tests__/drift-collector.test.ts +++ b/src/__tests__/drift-collector.test.ts @@ -136,8 +136,10 @@ const PROVIDER_MAP: Record = { builderFunctions: [ "buildInteractionsTextResponse", "buildInteractionsToolCallResponse", + "buildInteractionsContentWithToolCallsResponse", "buildInteractionsTextSSEEvents", "buildInteractionsToolCallSSEEvents", + "buildInteractionsContentWithToolCallsSSEEvents", ], typesFile: null, },