diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index a918833..c323669 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -28,8 +28,12 @@ jobs: - name: Run E2E tests env: - DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} - DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL }} + ANTHROPIC_MODEL: ${{ secrets.ANTHROPIC_MODEL }} run: pnpm test -- --reporter=json --outputFile=e2e-results.json - name: Upload E2E results diff --git a/packages/agent/__tests__/e2e/react-loop.e2e.test.ts b/packages/agent/__tests__/e2e/react-loop.e2e.test.ts new file mode 100644 index 0000000..6c61bd9 --- /dev/null +++ b/packages/agent/__tests__/e2e/react-loop.e2e.test.ts @@ -0,0 +1,95 @@ +/** + * Agent ReAct loop E2E test — full reasoning cycle with a real LLM. + * + * Verifies that createAgent can run a complete ReAct loop: the model + * receives a question, invokes a tool, receives the tool result, and + * produces a final text answer. + * + * Skipped when OPENAI_API_KEY is not set (uses OpenAI-compatible adapter). + * Falls back to ANTHROPIC_API_KEY if OPENAI_API_KEY is unavailable. + * + * Environment variables (OpenAI-compatible): + * OPENAI_API_KEY — API key + * OPENAI_BASE_URL — Base URL + * OPENAI_MODEL — Model name + * + * Environment variables (Anthropic-compatible): + * ANTHROPIC_API_KEY — API key + * ANTHROPIC_BASE_URL — Base URL + * ANTHROPIC_MODEL — Model name + */ + +import { describe, it, expect } from "vitest"; +import { + registerProvider, + createModel, + createOpenAIProvider, + createAnthropicProvider, + clearProviders, +} from "@openlinkos/ai"; +import { createAgent } from "../../src/index.js"; +import type { ToolDefinition } from "../../src/types.js"; + +const OPENAI_KEY = process.env.OPENAI_API_KEY; +const ANTHROPIC_KEY = process.env.ANTHROPIC_API_KEY; +const HAS_PROVIDER = !!(OPENAI_KEY || ANTHROPIC_KEY); + +function setupModel() { + clearProviders(); + if (OPENAI_KEY) { + registerProvider(createOpenAIProvider()); + const modelName = process.env.OPENAI_MODEL ?? "gpt-4o-mini"; + return createModel(`openai:${modelName}`, { + ...(process.env.OPENAI_BASE_URL ? { baseURL: process.env.OPENAI_BASE_URL } : {}), + }); + } + registerProvider(createAnthropicProvider()); + const modelName = process.env.ANTHROPIC_MODEL ?? "claude-sonnet-4-20250514"; + return createModel(`anthropic:${modelName}`, { + ...(process.env.ANTHROPIC_BASE_URL ? { baseURL: process.env.ANTHROPIC_BASE_URL } : {}), + }); +} + +describe.skipIf(!HAS_PROVIDER)("Agent ReAct loop E2E", () => { + it("completes a full ReAct cycle: question → tool call → answer", async () => { + const model = setupModel(); + + const tools: ToolDefinition[] = [ + { + name: "get_weather", + description: "Get the current weather for a given city. Returns temperature in Celsius.", + parameters: { + type: "object", + properties: { city: { type: "string", description: "City name" } }, + required: ["city"], + }, + execute: async (params) => ({ city: params.city, temp_c: 22, condition: "sunny" }), + }, + ]; + + const agent = createAgent({ + name: "weather-agent", + model, + systemPrompt: + "You are a weather assistant. Use the get_weather tool to answer weather questions. Always use the tool before answering.", + tools, + maxIterations: 5, + }); + + const response = await agent.run("What is the weather in Tokyo?"); + + // The agent should have produced a final text response + expect(response.text).toBeTruthy(); + expect(response.text.toLowerCase()).toMatch(/tokyo|22|sunny/); + + // At least one tool call should have been made + expect(response.toolCalls.length).toBeGreaterThan(0); + expect(response.toolCalls[0].name).toBe("get_weather"); + + // Steps should include both the model reasoning and tool execution + expect(response.steps.length).toBeGreaterThan(0); + + // Usage should be tracked + expect(response.usage.totalTokens).toBeGreaterThan(0); + }, 30_000); +}); diff --git a/packages/agent/__tests__/e2e/tool-calling.e2e.test.ts b/packages/agent/__tests__/e2e/tool-calling.e2e.test.ts new file mode 100644 index 0000000..a3fd6a0 --- /dev/null +++ b/packages/agent/__tests__/e2e/tool-calling.e2e.test.ts @@ -0,0 +1,169 @@ +/** + * Agent tool-calling E2E test — tool call + result verification. + * + * Verifies that the agent correctly invokes tools with the right parameters, + * receives results, and incorporates them into the final response. + * + * Skipped when OPENAI_API_KEY is not set (uses OpenAI-compatible adapter). + * Falls back to ANTHROPIC_API_KEY if OPENAI_API_KEY is unavailable. + * + * Environment variables (OpenAI-compatible): + * OPENAI_API_KEY — API key + * OPENAI_BASE_URL — Base URL + * OPENAI_MODEL — Model name + * + * Environment variables (Anthropic-compatible): + * ANTHROPIC_API_KEY — API key + * ANTHROPIC_BASE_URL — Base URL + * ANTHROPIC_MODEL — Model name + */ + +import { describe, it, expect } from "vitest"; +import { + registerProvider, + createModel, + createOpenAIProvider, + createAnthropicProvider, + clearProviders, +} from "@openlinkos/ai"; +import { createAgent } from "../../src/index.js"; +import type { ToolDefinition } from "../../src/types.js"; + +const OPENAI_KEY = process.env.OPENAI_API_KEY; +const ANTHROPIC_KEY = process.env.ANTHROPIC_API_KEY; +const HAS_PROVIDER = !!(OPENAI_KEY || ANTHROPIC_KEY); + +function setupModel() { + clearProviders(); + if (OPENAI_KEY) { + registerProvider(createOpenAIProvider()); + const modelName = process.env.OPENAI_MODEL ?? "gpt-4o-mini"; + return createModel(`openai:${modelName}`, { + ...(process.env.OPENAI_BASE_URL ? { baseURL: process.env.OPENAI_BASE_URL } : {}), + }); + } + registerProvider(createAnthropicProvider()); + const modelName = process.env.ANTHROPIC_MODEL ?? "claude-sonnet-4-20250514"; + return createModel(`anthropic:${modelName}`, { + ...(process.env.ANTHROPIC_BASE_URL ? { baseURL: process.env.ANTHROPIC_BASE_URL } : {}), + }); +} + +describe.skipIf(!HAS_PROVIDER)("Agent tool-calling E2E", () => { + it("passes correct parameters to tool and uses result", async () => { + const model = setupModel(); + let receivedParams: Record | undefined; + + const tools: ToolDefinition[] = [ + { + name: "calculate", + description: + "Calculate the result of a mathematical expression. Accepts two numbers and an operator.", + parameters: { + type: "object", + properties: { + a: { type: "number", description: "First operand" }, + b: { type: "number", description: "Second operand" }, + operator: { + type: "string", + enum: ["add", "subtract", "multiply", "divide"], + description: "The arithmetic operator", + }, + }, + required: ["a", "b", "operator"], + }, + execute: async (params) => { + receivedParams = params; + const a = params.a as number; + const b = params.b as number; + const op = params.operator as string; + switch (op) { + case "add": + return { result: a + b }; + case "subtract": + return { result: a - b }; + case "multiply": + return { result: a * b }; + case "divide": + return { result: a / b }; + default: + return { error: "Unknown operator" }; + } + }, + }, + ]; + + const agent = createAgent({ + name: "calc-agent", + model, + systemPrompt: + "You are a calculator assistant. Use the calculate tool to perform arithmetic. Always use the tool, do not calculate yourself.", + tools, + maxIterations: 5, + }); + + const response = await agent.run("What is 7 multiplied by 6?"); + + // The tool should have been called with correct parameters + expect(receivedParams).toBeDefined(); + expect(receivedParams!.a).toBe(7); + expect(receivedParams!.b).toBe(6); + expect(receivedParams!.operator).toBe("multiply"); + + // The final response should contain the result + expect(response.text).toBeTruthy(); + expect(response.text).toContain("42"); + + // Verify tool call metadata + expect(response.toolCalls.length).toBeGreaterThan(0); + expect(response.toolCalls[0].name).toBe("calculate"); + }, 30_000); + + it("handles multiple tool calls in sequence", async () => { + const model = setupModel(); + const callLog: string[] = []; + + const tools: ToolDefinition[] = [ + { + name: "get_population", + description: "Get the population of a city.", + parameters: { + type: "object", + properties: { city: { type: "string", description: "City name" } }, + required: ["city"], + }, + execute: async (params) => { + callLog.push(`get_population:${params.city}`); + const data: Record = { + Tokyo: 14_000_000, + London: 9_000_000, + }; + const city = params.city as string; + return { city, population: data[city] ?? 0 }; + }, + }, + ]; + + const agent = createAgent({ + name: "population-agent", + model, + systemPrompt: + "You are a demographics assistant. Use the get_population tool to look up city populations. Call the tool once for each city requested.", + tools, + maxIterations: 10, + }); + + const response = await agent.run( + "What are the populations of Tokyo and London? Look up each city.", + ); + + // Both cities should have been looked up + expect(callLog).toContain("get_population:Tokyo"); + expect(callLog).toContain("get_population:London"); + + // The response should mention both populations + expect(response.text).toBeTruthy(); + expect(response.text).toMatch(/14[,.]?000[,.]?000/); + expect(response.text).toMatch(/9[,.]?000[,.]?000/); + }, 30_000); +}); diff --git a/packages/ai/__tests__/e2e/anthropic-compat.e2e.test.ts b/packages/ai/__tests__/e2e/anthropic-compat.e2e.test.ts new file mode 100644 index 0000000..c7692eb --- /dev/null +++ b/packages/ai/__tests__/e2e/anthropic-compat.e2e.test.ts @@ -0,0 +1,70 @@ +/** + * Anthropic-compatible adapter E2E tests — generate, stream, tools. + * + * Tests the Anthropic Messages API protocol adapter with any compatible + * endpoint. Skipped when ANTHROPIC_API_KEY is not set. + * + * Environment variables: + * ANTHROPIC_API_KEY — API key for the Anthropic-compatible endpoint + * ANTHROPIC_BASE_URL — Base URL (default: https://api.anthropic.com) + * ANTHROPIC_MODEL — Model name (default: claude-sonnet-4-20250514) + */ + +import { describe, it, expect } from "vitest"; +import { AnthropicProvider } from "../../src/providers/anthropic.js"; +import { collectText } from "../../src/stream.js"; +import type { ToolDefinition } from "../../src/types.js"; + +const API_KEY = process.env.ANTHROPIC_API_KEY; +const BASE_URL = process.env.ANTHROPIC_BASE_URL; +const MODEL = process.env.ANTHROPIC_MODEL ?? "claude-sonnet-4-20250514"; + +describe.skipIf(!API_KEY)("Anthropic-compatible adapter E2E", () => { + const provider = new AnthropicProvider(); + const messages = [{ role: "user" as const, content: "Say hello in one word." }]; + const options = { + modelName: MODEL, + ...(BASE_URL ? { baseURL: BASE_URL } : {}), + }; + + it("generate returns text", async () => { + const response = await provider.generate(messages, options); + expect(response.text).toBeTruthy(); + expect(response.finishReason).toBe("stop"); + expect(response.usage.totalTokens).toBeGreaterThan(0); + }, 30_000); + + it("stream returns text deltas", async () => { + const stream = await provider.stream(messages, options); + const text = await collectText(stream); + expect(text.length).toBeGreaterThan(0); + }, 30_000); + + it("generateWithTools triggers tool call", async () => { + const tools: ToolDefinition[] = [ + { + name: "get_weather", + description: "Get weather for a city", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + ]; + + const response = await provider.generateWithTools( + [ + { + role: "user", + content: "What is the weather in Paris? Use the get_weather tool.", + }, + ], + tools, + options, + ); + + expect(response.toolCalls.length).toBeGreaterThan(0); + expect(response.toolCalls[0].name).toBe("get_weather"); + }, 30_000); +}); diff --git a/packages/ai/__tests__/e2e/openai-compat.e2e.test.ts b/packages/ai/__tests__/e2e/openai-compat.e2e.test.ts new file mode 100644 index 0000000..e7f9c4b --- /dev/null +++ b/packages/ai/__tests__/e2e/openai-compat.e2e.test.ts @@ -0,0 +1,92 @@ +/** + * OpenAI-compatible adapter E2E tests — generate, stream, tools, structured output. + * + * Tests the OpenAI Chat Completions API protocol adapter with any compatible + * endpoint. Skipped when OPENAI_API_KEY is not set. + * + * Environment variables: + * OPENAI_API_KEY — API key for the OpenAI-compatible endpoint + * OPENAI_BASE_URL — Base URL (default: https://api.openai.com/v1) + * OPENAI_MODEL — Model name (default: gpt-4o-mini) + */ + +import { describe, it, expect } from "vitest"; +import { OpenAIProvider } from "../../src/providers/openai.js"; +import { collectText } from "../../src/stream.js"; +import type { ToolDefinition } from "../../src/types.js"; + +const API_KEY = process.env.OPENAI_API_KEY; +const BASE_URL = process.env.OPENAI_BASE_URL; +const MODEL = process.env.OPENAI_MODEL ?? "gpt-4o-mini"; + +describe.skipIf(!API_KEY)("OpenAI-compatible adapter E2E", () => { + const provider = new OpenAIProvider(); + const messages = [{ role: "user" as const, content: "Say hello in one word." }]; + const options = { + modelName: MODEL, + ...(BASE_URL ? { baseURL: BASE_URL } : {}), + }; + + it("generate returns text", async () => { + const response = await provider.generate(messages, options); + expect(response.text).toBeTruthy(); + expect(response.finishReason).toBe("stop"); + expect(response.usage.totalTokens).toBeGreaterThan(0); + }, 30_000); + + it("stream returns text deltas", async () => { + const stream = await provider.stream(messages, options); + const text = await collectText(stream); + expect(text.length).toBeGreaterThan(0); + }, 30_000); + + it("generateWithTools triggers tool call", async () => { + const tools: ToolDefinition[] = [ + { + name: "get_weather", + description: "Get weather for a city", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + ]; + + const response = await provider.generateWithTools( + [{ role: "user", content: "What is the weather in Paris? Use the tool." }], + tools, + options, + ); + + expect(response.toolCalls.length).toBeGreaterThan(0); + expect(response.toolCalls[0].name).toBe("get_weather"); + }, 30_000); + + it("structured output via responseFormat", async () => { + const response = await provider.generate( + [ + { + role: "user", + content: + "Return a JSON object with fields: name (string), age (number). Use name=Alice, age=30.", + }, + ], + { + ...options, + responseFormat: { + type: "json", + schema: { + type: "object", + properties: { name: { type: "string" }, age: { type: "number" } }, + required: ["name", "age"], + }, + }, + }, + ); + + const parsed = JSON.parse(response.text!); + expect(parsed.name).toBe("Alice"); + expect(parsed.age).toBe(30); + }, 30_000); +}); diff --git a/packages/ai/__tests__/e2e/smoke.test.ts b/packages/ai/__tests__/e2e/smoke.test.ts deleted file mode 100644 index b148e9b..0000000 --- a/packages/ai/__tests__/e2e/smoke.test.ts +++ /dev/null @@ -1,57 +0,0 @@ -/** - * E2E smoke tests for DeepSeek and Qwen providers. - * - * These tests hit real APIs and are skipped when the corresponding - * environment variable is not set. Run manually or via CI with secrets. - */ - -import { describe, it, expect } from "vitest"; -import { DeepSeekProvider } from "../../src/providers/deepseek.js"; -import { QwenProvider } from "../../src/providers/qwen.js"; -import { collectText } from "../../src/stream.js"; - -// --------------------------------------------------------------------------- -// DeepSeek -// --------------------------------------------------------------------------- - -describe.skipIf(!process.env.DEEPSEEK_API_KEY)("DeepSeek E2E", () => { - const provider = new DeepSeekProvider(); - const messages = [{ role: "user" as const, content: "Say hello in one word." }]; - const options = { modelName: "deepseek-chat" }; - - it("generate returns text", async () => { - const response = await provider.generate(messages, options); - expect(response.text).toBeTruthy(); - expect(response.finishReason).toBe("stop"); - expect(response.usage.totalTokens).toBeGreaterThan(0); - }, 30_000); - - it("stream returns text deltas", async () => { - const stream = await provider.stream(messages, options); - const text = await collectText(stream); - expect(text.length).toBeGreaterThan(0); - }, 30_000); -}); - -// --------------------------------------------------------------------------- -// Qwen -// --------------------------------------------------------------------------- - -describe.skipIf(!process.env.DASHSCOPE_API_KEY)("Qwen E2E", () => { - const provider = new QwenProvider(); - const messages = [{ role: "user" as const, content: "Say hello in one word." }]; - const options = { modelName: "qwen-turbo" }; - - it("generate returns text", async () => { - const response = await provider.generate(messages, options); - expect(response.text).toBeTruthy(); - expect(response.finishReason).toBe("stop"); - expect(response.usage.totalTokens).toBeGreaterThan(0); - }, 30_000); - - it("stream returns text deltas", async () => { - const stream = await provider.stream(messages, options); - const text = await collectText(stream); - expect(text.length).toBeGreaterThan(0); - }, 30_000); -});