diff --git a/.changeset/coder-sdk-runners.md b/.changeset/coder-sdk-runners.md new file mode 100644 index 0000000000..31c995b513 --- /dev/null +++ b/.changeset/coder-sdk-runners.md @@ -0,0 +1,7 @@ +--- +'@electric-ax/agents': minor +--- + +feat: drive the coder entity via Claude Code and Codex SDKs instead of the `claude` / `codex` CLI binaries + +The `coder` entity now invokes `@anthropic-ai/claude-agent-sdk` and `@openai/codex-sdk` directly, so the host no longer needs `claude` or `codex` installed on PATH — both SDKs ship their own platform-specific subprocess binaries as optional dependencies. Events stream from the SDK iterators into the entity's durable event collection live, replacing the previous JSONL file-watcher and post-run discovery plumbing. diff --git a/packages/agents/package.json b/packages/agents/package.json index 5c7bf66967..f436c02b57 100644 --- a/packages/agents/package.json +++ b/packages/agents/package.json @@ -40,13 +40,15 @@ "./package.json": "./package.json" }, "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.123", "@anthropic-ai/sdk": "^0.78.0", "@durable-streams/state": "npm:@electric-ax/durable-streams-state-beta@^0.3.1", "@electric-ax/agents-runtime": "workspace:*", "@mariozechner/pi-agent-core": "^0.70.2", "@mariozechner/pi-ai": "^0.70.2", + "@openai/codex-sdk": "^0.125.0", "@sinclair/typebox": "^0.34.48", - "agent-session-protocol": "^0.0.2", + "agent-session-protocol": "^0.0.8", "better-sqlite3": "^11.10.0", "nanoid": "^3.3.11", "pino": "^10.3.1", diff --git a/packages/agents/src/agents/coding-session.ts b/packages/agents/src/agents/coding-session.ts index 447e09e5fd..9249c0b9b5 100644 --- a/packages/agents/src/agents/coding-session.ts +++ b/packages/agents/src/agents/coding-session.ts @@ -1,22 +1,10 @@ -import { spawn } from 'node:child_process' -import { watch, promises as fsp } from 'node:fs' -import { homedir } from 'node:os' -import path from 'node:path' import { z } from 'zod' import { - deserializeCursor, - discoverSessions, importLocalSession, loadSession, - resolveSession, serializeCursor, - tailSession, -} from 'agent-session-protocol' -import type { - NormalizedEvent, - SerializedSessionCursor, - SessionCursor, } from 'agent-session-protocol' +import type { NormalizedEvent } from 'agent-session-protocol' import { CODING_SESSION_CURSOR_COLLECTION_TYPE, CODING_SESSION_EVENT_COLLECTION_TYPE, @@ -30,13 +18,22 @@ import type { WakeEvent, } from '@electric-ax/agents-runtime' +import { claudeSdkRunner } from './runners/claude-sdk.js' +import { codexSdkRunner } from './runners/codex-sdk.js' + /** - * Abstraction over the claude/codex CLI. Default implementation spawns - * the real binary; tests can inject a fake. + * Abstraction over a coding-agent runner. The default implementations + * drive `@anthropic-ai/claude-agent-sdk` and `@openai/codex-sdk` + * directly; tests can inject a fake. + * + * Runners stream `NormalizedEvent`s via `onEvent` as the agent makes + * progress, and call `onSessionId` once with the new (or resumed) + * session id so the orchestrator can persist it on the entity. * * `sessionId` is undefined for the first prompt on a fresh session — - * the runner should then let the CLI generate its own id. For every - * subsequent prompt, pass the id so the CLI resumes that conversation. + * the runner should then let the SDK generate its own id and emit it + * via `onSessionId`. For every subsequent prompt, pass the id so the + * SDK resumes that conversation. */ export interface CodingSessionCliRunner { run(opts: { @@ -44,165 +41,18 @@ export interface CodingSessionCliRunner { sessionId?: string cwd: string prompt: string + onEvent?: (ev: NormalizedEvent) => void + onSessionId?: (id: string) => void }): Promise<{ exitCode: number; stdout: string; stderr: string }> } const defaultCliRunner: CodingSessionCliRunner = { async run(opts) { - return new Promise((resolve, reject) => { - // Claude Code: prompt goes in on stdin (not argv). Needs - // --dangerously-skip-permissions because the session runs - // autonomously — any tool call would otherwise block on an - // interactive approval prompt and exit 1. - // Codex: prompt is an argv; stdin is ignored. Needs - // --skip-git-repo-check because `codex exec` refuses to run in - // a directory that isn't a trusted-dir and isn't a git repo, - // and we can't assume callers have configured trust for the - // cwd they pointed the entity at. - const isClaude = opts.agent === `claude` - const bin = isClaude ? `claude` : `codex` - const args = isClaude - ? opts.sessionId - ? [`-r`, opts.sessionId, `--dangerously-skip-permissions`, `-p`] - : [`--dangerously-skip-permissions`, `-p`] - : opts.sessionId - ? [ - `exec`, - `--skip-git-repo-check`, - `resume`, - opts.sessionId, - opts.prompt, - ] - : [`exec`, `--skip-git-repo-check`, opts.prompt] - const child = spawn(bin, args, { - cwd: opts.cwd, - stdio: [isClaude ? `pipe` : `ignore`, `pipe`, `pipe`], - }) - // Cap how much output we hold on the heap. Only the first ~800 - // chars of each stream show up in error messages, but a verbose - // CLI session can produce megabytes — keep just enough for a - // meaningful diagnostic and discard the rest. - const MAX_BUF_CHARS = 4096 - let stdout = `` - let stderr = `` - child.stdout?.on(`data`, (d: Buffer) => { - if (stdout.length < MAX_BUF_CHARS) { - stdout += d.toString().slice(0, MAX_BUF_CHARS - stdout.length) - } - }) - child.stderr?.on(`data`, (d: Buffer) => { - if (stderr.length < MAX_BUF_CHARS) { - stderr += d.toString().slice(0, MAX_BUF_CHARS - stderr.length) - } - }) - child.on(`error`, reject) - child.on(`exit`, (code) => { - resolve({ exitCode: code ?? -1, stdout, stderr }) - }) - if (isClaude && child.stdin) { - child.stdin.write(opts.prompt) - child.stdin.end() - } - }) + const runner = opts.agent === `claude` ? claudeSdkRunner : codexSdkRunner + return runner.run(opts) }, } -export async function discoverNewestSession( - agent: CodingAgentType, - cwd: string, - excludeIds: ReadonlySet -): Promise { - const all = await discoverSessions(agent) - const candidates = all.filter( - (s) => !excludeIds.has(s.sessionId) && (!s.cwd || s.cwd === cwd) - ) - if (candidates.length === 0) return null - // discoverSessions returns most-recent-first for each agent, so - // the first match is what the CLI just wrote. - return candidates[0]!.sessionId -} - -/** - * Compute the candidate directories where Claude Code stores per-cwd - * session JSONL files. Claude resolves the cwd to its realpath when - * choosing the directory name (so /tmp/foo on macOS lands under - * `-private-tmp-foo`), but the entity may have been spawned with the - * non-realpath form. Return both candidates so the caller can union - * their contents. - */ -export async function getClaudeProjectDirs( - cwd: string -): Promise> { - const home = homedir() - const make = (c: string): string => - path.join(home, `.claude`, `projects`, c.replace(/\//g, `-`)) - const dirs = [make(cwd)] - try { - const real = await fsp.realpath(cwd) - if (real !== cwd) dirs.push(make(real)) - } catch { - // cwd may not exist on disk yet — skip realpath - } - return dirs -} - -export async function listClaudeJsonlIdsByCwd( - cwd: string -): Promise> { - const ids = new Set() - for (const dir of await getClaudeProjectDirs(cwd)) { - try { - const files = await fsp.readdir(dir) - for (const f of files) { - if (f.endsWith(`.jsonl`)) ids.add(f.slice(0, -`.jsonl`.length)) - } - } catch { - // dir may not exist (no prior runs in this cwd) - } - } - return ids -} - -/** - * Deterministic-path discovery for a freshly created session. After the - * Claude CLI runs in `-p` mode it writes the new JSONL straight into - * `~/.claude/projects//.jsonl` *without* leaving a - * `~/.claude/sessions/.json` lock file (those are interactive-only), - * so `discoverSessions` can miss it. Compute the expected dir directly - * and diff its contents against a pre-run snapshot. Returns the newest - * fresh sessionId or null. Codex falls back to discoverNewestSession. - */ -export async function findNewSessionAfterRun( - agent: CodingAgentType, - cwd: string, - preDirectIds: ReadonlySet, - preDiscoveredIds: ReadonlySet -): Promise { - if (agent === `claude`) { - const dirs = await getClaudeProjectDirs(cwd) - let best: { id: string; mtime: number } | null = null - for (const dir of dirs) { - try { - const files = await fsp.readdir(dir) - for (const f of files) { - if (!f.endsWith(`.jsonl`)) continue - const id = f.slice(0, -`.jsonl`.length) - if (preDirectIds.has(id)) continue - const st = await fsp.stat(path.join(dir, f)).catch(() => null) - if (!st) continue - if (!best || st.mtimeMs > best.mtime) { - best = { id, mtime: st.mtimeMs } - } - } - } catch { - // dir may not exist - } - } - if (best) return best.id - } - return discoverNewestSession(agent, cwd, preDiscoveredIds) -} - const sessionMetaRowSchema = z.object({ key: z.literal(`current`), electricSessionId: z.string(), @@ -216,7 +66,13 @@ const sessionMetaRowSchema = z.object({ const cursorStateRowSchema = z.object({ key: z.literal(`current`), - /** JSON-serialized SerializedSessionCursor, or empty string if none yet. */ + /** + * JSON-serialized SerializedSessionCursor or empty string. Used as a + * "have I seeded the events collection from the JSONL yet?" marker for + * imported / attached sessions — once non-empty, we don't reseed. + * The SDK runners stream events live, so this is no longer used for + * tail/cursor state past first wake. + */ cursor: z.string(), lastProcessedInboxKey: z.string().optional(), }) @@ -261,9 +117,9 @@ interface InboxRow { } export interface RegisterCodingSessionOptions { - /** Working directory the CLI runs in when `args.cwd` is not provided. Defaults to `process.cwd()`. */ + /** Working directory the runner uses when `args.cwd` is not provided. Defaults to `process.cwd()`. */ defaultWorkingDirectory?: string - /** Override the CLI runner (for tests or alternate backends). */ + /** Override the runner (for tests or alternate backends). */ cliRunner?: CodingSessionCliRunner } @@ -317,120 +173,6 @@ function appendIfNew(ctx: LiveMirrorCtx, event: NormalizedEvent): void { ctx.actions.events_insert({ row }) } -/** - * Mirror every event that lands in the JSONL file while `runWork` is - * executing (i.e. while the CLI is running). Returns the advanced cursor - * and the `runWork` result once everything has settled and every append - * has been persisted to the entity's durable stream. - * - * If setup fails (e.g. the session file can't be resolved), `runWork` - * still runs — but nothing is mirrored and `setupError` is populated so - * the caller can surface the condition. If `runWork` throws, the error - * propagates after the watcher has been cleaned up. - */ -async function runWithLiveMirror(opts: { - agent: CodingAgentType - nativeSessionId: string - serializedCursor: SerializedSessionCursor | null - ctx: LiveMirrorCtx - runWork: () => Promise -}): Promise<{ - cursor: SerializedSessionCursor | null - setupError?: unknown - result: T -}> { - let cursor: SessionCursor | null = null - let setupError: unknown = undefined - - try { - const session = await resolveSession(opts.nativeSessionId, opts.agent) - if (opts.serializedCursor) { - cursor = deserializeCursor({ - ...opts.serializedCursor, - path: session.path, - }) - } else { - // First real tail — absorb whatever's already on disk (e.g. the - // pre-existing user turn for an imported session, or nothing for - // a freshly-created empty file). - const initial = await loadSession({ - sessionId: opts.nativeSessionId, - agent: opts.agent, - }) - for (const ev of initial.events) appendIfNew(opts.ctx, ev) - cursor = initial.cursor - } - } catch (e) { - setupError = e - } - - if (!cursor) { - // Setup failed — just run and surface the error to the caller. - const result = await opts.runWork() - return { cursor: opts.serializedCursor, setupError, result } - } - - let activeCursor: SessionCursor = cursor - let busy = false - let pending = false - let stopped = false - - const drainOnce = async (): Promise => { - if (stopped && busy) return - if (busy) { - pending = true - return - } - busy = true - try { - const res = await tailSession({ cursor: activeCursor }) - activeCursor = res.cursor - for (const ev of res.newEvents) appendIfNew(opts.ctx, ev) - } catch { - // Transient read errors (truncation, rename during rotation) — - // the final tail after runWork settles will catch up. - } finally { - busy = false - if (pending && !stopped) { - pending = false - void drainOnce() - } - } - } - - const fileWatcher = watch(activeCursor.path, () => { - void drainOnce() - }) - const pollHandle = setInterval(() => { - void drainOnce() - }, 1500) - - let result: T - try { - result = await opts.runWork() - } finally { - stopped = true - clearInterval(pollHandle) - fileWatcher.close() - // Wait for any in-flight drain to settle before doing the final tail. - while (busy) { - await new Promise((r) => setTimeout(r, 10)) - } - // Final tail — catches anything written between the last watcher - // tick and the watcher shutdown. - try { - const final = await tailSession({ cursor: activeCursor }) - activeCursor = final.cursor - for (const ev of final.newEvents) appendIfNew(opts.ctx, ev) - } catch { - // Swallow; the caller's own post-run tail/persistence will - // surface the condition if it matters. - } - } - - return { cursor: serializeCursor(activeCursor), setupError, result } -} - export function registerCodingSession( registry: EntityRegistry, options: RegisterCodingSessionOptions = {} @@ -439,7 +181,7 @@ export function registerCodingSession( const defaultCwd = options.defaultWorkingDirectory ?? process.cwd() registry.define(`coder`, { - description: `Runs a Claude Code / Codex CLI session and mirrors its normalized event stream into a durable store. Prompts arrive via message_received (type: "prompt") and are executed serially.`, + description: `Runs a Claude Code / Codex SDK session and mirrors its normalized event stream into a durable store. Prompts arrive via message_received (type: "prompt") and are executed serially.`, creationSchema: creationArgsSchema, inboxSchemas: { prompt: promptMessageSchema, @@ -623,13 +365,13 @@ export function registerCodingSession( }, }) - // Record the CLI invocation as a `runs` collection event so - // observers waking on `runFinished` are notified when the turn - // ends. Without this the parent (e.g. Horton via spawn_coder) - // would never be woken because the coder bypasses useAgent. + // Record the run as a `runs` collection event so observers + // waking on `runFinished` are notified when the turn ends. + // Without this the parent (e.g. Horton via spawn_coder) would + // never be woken because the coder bypasses useAgent. const recordedRun = ctx.recordRun() // Snapshot the existing event keys so we can identify which - // events are appended during this CLI run and surface their + // events are appended during this run and surface their // assistant text as the run's response payload. const eventKeysBefore = new Set( ( @@ -649,123 +391,49 @@ export function registerCodingSession( }, } - let nextCursorJson = runningCursor.cursor - - if (!runningMeta.nativeSessionId) { - // First real prompt on a fresh session. Let the CLI create - // its own jsonl (writing an empty one ourselves breaks - // `claude -r ` — claude can't resume an empty file). - // After it exits, diff the on-disk sessions to find the - // new id, then load and mirror in one shot. Snapshot both - // the deterministic per-cwd directory (works for Claude - // `-p` runs that don't drop a metadata lock file) and - // discoverSessions (covers Codex + interactive Claude - // sessions) before the run so either path can spot the - // freshly written session. - const preDirectIds = - runningMeta.agent === `claude` - ? await listClaudeJsonlIdsByCwd(runningMeta.cwd) - : new Set() - const preDiscoveredIds = new Set( - (await discoverSessions(runningMeta.agent)).map( - (s) => s.sessionId - ) - ) - const cliResult = await runner.run({ - agent: runningMeta.agent, - cwd: runningMeta.cwd, - prompt, - }) - if (cliResult.exitCode !== 0) { - throw new Error( - `[coding-session] ${runningMeta.agent} CLI exited ${cliResult.exitCode}. stderr=${cliResult.stderr.slice(0, 800) || ``} stdout=${cliResult.stdout.slice(0, 800) || ``}` - ) - } - const foundId = await findNewSessionAfterRun( - runningMeta.agent, - runningMeta.cwd, - preDirectIds, - preDiscoveredIds - ) - if (!foundId) { - throw new Error( - `[coding-session] ${runningMeta.agent} CLI succeeded but no new session file was found` - ) - } - ctx.db.actions.sessionMeta_update({ - key: `current`, - updater: (d: SessionMetaRow) => { - d.nativeSessionId = foundId - }, - }) - runningMeta = { ...runningMeta, nativeSessionId: foundId } - - // Post-run full load. No live streaming on the first prompt - // since the file didn't exist when we started. - const initial = await loadSession({ - sessionId: foundId, - agent: runningMeta.agent, - }) - for (const ev of initial.events) appendIfNew(mirrorCtx, ev) - nextCursorJson = JSON.stringify(serializeCursor(initial.cursor)) - } else { - // Existing session: stream events into the DS while the CLI - // runs, so the UI sees the prompt turn, assistant tokens, - // and tool calls as they land. - const serializedCursor = runningCursor.cursor - ? (JSON.parse(runningCursor.cursor) as SerializedSessionCursor) - : null - - const { - cursor: nextSerialized, - setupError, - result: cliResult, - } = await runWithLiveMirror({ - agent: runningMeta.agent, - nativeSessionId: runningMeta.nativeSessionId, - serializedCursor, - ctx: mirrorCtx, - runWork: () => - runner.run({ - agent: runningMeta.agent, - sessionId: runningMeta.nativeSessionId, - cwd: runningMeta.cwd, - prompt, - }), - }) - - if (setupError) { - throw setupError instanceof Error - ? setupError - : new Error(String(setupError)) - } - if (cliResult.exitCode !== 0) { - throw new Error( - `[coding-session] ${runningMeta.agent} CLI exited ${cliResult.exitCode}. stderr=${cliResult.stderr.slice(0, 800) || ``} stdout=${cliResult.stdout.slice(0, 800) || ``}` - ) - } + const cliResult = await runner.run({ + agent: runningMeta.agent, + ...(runningMeta.nativeSessionId + ? { sessionId: runningMeta.nativeSessionId } + : {}), + cwd: runningMeta.cwd, + prompt, + onEvent: (ev) => appendIfNew(mirrorCtx, ev), + onSessionId: (id) => { + if (runningMeta.nativeSessionId === id) return + ctx.db.actions.sessionMeta_update({ + key: `current`, + updater: (d: SessionMetaRow) => { + d.nativeSessionId = id + }, + }) + runningMeta = { ...runningMeta, nativeSessionId: id } + }, + }) - const persistedCursor = nextSerialized ?? serializedCursor - nextCursorJson = persistedCursor - ? JSON.stringify(persistedCursor) - : `` + if (cliResult.exitCode !== 0) { + throw new Error( + `[coding-session] ${runningMeta.agent} runner exited ${cliResult.exitCode}. stderr=${cliResult.stderr.slice(0, 800) || ``} stdout=${cliResult.stdout.slice(0, 800) || ``}` + ) } ctx.db.actions.cursorState_update({ key: `current`, updater: (d: CursorStateRow) => { - d.cursor = nextCursorJson + // Cursor is now just a "have we seeded?" marker — set to + // any non-empty string after the first successful run. + if (!d.cursor) d.cursor = `sdk-stream` d.lastProcessedInboxKey = inboxMsg.key }, }) runningCursor = { ...runningCursor, - cursor: nextCursorJson, + cursor: runningCursor.cursor || `sdk-stream`, lastProcessedInboxKey: inboxMsg.key, } - // Pipe assistant_message text from this run into text_delta - // events linked to recordedRun so the runFinished wake's - // `includeResponse` payload carries the coder's reply. + // Pipe assistant_message text from this run into recordedRun + // so the runFinished wake's `includeResponse` payload carries + // the coder's reply. for (const row of ctx.db.collections.events .toArray as unknown as Array<{ key: string diff --git a/packages/agents/src/agents/runners/claude-sdk.ts b/packages/agents/src/agents/runners/claude-sdk.ts new file mode 100644 index 0000000000..d6295e0615 --- /dev/null +++ b/packages/agents/src/agents/runners/claude-sdk.ts @@ -0,0 +1,195 @@ +import { query } from '@anthropic-ai/claude-agent-sdk' +import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk' +import { normalizeClaudeEvent } from 'agent-session-protocol' +import type { ClaudeEntry } from 'agent-session-protocol' + +import type { CodingSessionCliRunner } from '../coding-session.js' +import { subprocessEnvWithoutKey } from './env.js' + +/** + * SDK-backed runner for Claude. Drives `query()` from + * `@anthropic-ai/claude-agent-sdk`, iterates the resulting async + * generator, adapts each `SDKMessage` to the `ClaudeEntry` shape + * `normalizeClaudeEvent` expects, and forwards each emitted normalized + * event via the `onEvent` callback. + * + * The Claude SDK ships its own subprocess binary as an optional + * platform-specific dep, so this no longer requires a globally + * installed `claude` CLI on PATH. + */ +export const claudeSdkRunner: CodingSessionCliRunner = { + async run(opts) { + const q = query({ + prompt: opts.prompt, + options: { + cwd: opts.cwd, + // Hide ANTHROPIC_API_KEY from the spawned `claude` subprocess + // so it falls back to whatever auth the user has configured + // (typically OAuth tokens written by `claude login`). Horton + // still needs the key in its own process.env to talk to the + // Anthropic API directly, so we strip it here rather than + // leaving it unset everywhere. + env: subprocessEnvWithoutKey(`ANTHROPIC_API_KEY`), + ...(opts.sessionId ? { resume: opts.sessionId } : {}), + // The Claude SDK requires *both* of these to skip approvals: + // `permissionMode: 'bypassPermissions'` selects the bypass + // policy and `allowDangerouslySkipPermissions: true` is the + // explicit acknowledgement gate (the SDK throws unless that + // boolean is set when the bypass mode is used). They are not + // redundant despite the name overlap. + permissionMode: `bypassPermissions`, + allowDangerouslySkipPermissions: true, + }, + }) + + let capturedSessionId: string | null = opts.sessionId ?? null + let resultMessage: { + is_error: boolean + result?: string + error?: string + } | null = null + + try { + for await (const msg of q) { + const sid = (msg as { session_id?: string }).session_id + if (sid && sid !== capturedSessionId) { + capturedSessionId = sid + opts.onSessionId?.(sid) + } + + if (msg.type === `result`) { + resultMessage = { + is_error: msg.is_error, + result: `result` in msg ? msg.result : undefined, + error: + `subtype` in msg && msg.subtype !== `success` + ? msg.subtype + : undefined, + } + } + + const entry = sdkMessageToClaudeEntry(msg) + if (!entry) continue + for (const ev of normalizeClaudeEvent(entry)) opts.onEvent?.(ev) + } + } catch (e) { + const message = e instanceof Error ? e.message : String(e) + return { exitCode: -1, stdout: ``, stderr: message } + } + + if (resultMessage?.is_error) { + return { + exitCode: 1, + stdout: resultMessage.result ?? ``, + stderr: resultMessage.error ?? `claude SDK reported is_error`, + } + } + return { exitCode: 0, stdout: resultMessage?.result ?? ``, stderr: `` } + }, +} + +/** + * Adapt one `SDKMessage` to the `ClaudeEntry` shape the JSONL + * normaliser expects. The SDK and the JSONL share *most* fields but + * differ in casing on a few keys (`session_id` vs `sessionId`, + * `claude_code_version` vs `version`, `duration_ms` vs `durationMs`). + * Everything else is structurally compatible. + * + * Returns null for SDK-only message types (status pings, retries, hook + * lifecycle, etc.) that have no JSONL counterpart. + * + * Exported for unit testing — the runner is the only production caller. + */ +export function sdkMessageToClaudeEntry(msg: SDKMessage): ClaudeEntry | null { + const ts = + (msg as { timestamp?: string }).timestamp ?? new Date().toISOString() + const sessionId = (msg as { session_id?: string }).session_id + + if (msg.type === `system`) { + if (`subtype` in msg && msg.subtype === `init`) { + return { + type: `system`, + subtype: `init`, + timestamp: ts, + sessionId, + cwd: msg.cwd, + version: msg.claude_code_version, + message: { model: msg.model }, + } + } + if (`subtype` in msg && msg.subtype === `compact_boundary`) { + return { + type: `system`, + subtype: `compact_boundary`, + timestamp: ts, + sessionId, + } + } + return null + } + + if (msg.type === `user`) { + const inner = msg.message as + | { role?: string; content?: unknown } + | undefined + return { + type: `user`, + timestamp: ts, + sessionId, + message: { + role: `user`, + content: inner?.content, + }, + } + } + + if (msg.type === `assistant`) { + const inner = msg.message as { + role?: string + model?: string + content?: unknown + stop_reason?: string + usage?: { + input_tokens?: number + output_tokens?: number + cache_read_input_tokens?: number + cache_creation_input_tokens?: number + } + } + return { + type: `assistant`, + timestamp: ts, + sessionId, + message: { + role: `assistant`, + model: inner.model, + content: inner.content, + stop_reason: inner.stop_reason, + usage: inner.usage, + }, + } + } + + if (msg.type === `result`) { + return { + type: `result`, + timestamp: ts, + sessionId, + subtype: msg.subtype, + durationMs: msg.duration_ms, + message: msg.usage + ? { + usage: { + input_tokens: msg.usage.input_tokens, + output_tokens: msg.usage.output_tokens, + cache_read_input_tokens: msg.usage.cache_read_input_tokens, + cache_creation_input_tokens: + msg.usage.cache_creation_input_tokens, + }, + } + : undefined, + } + } + + return null +} diff --git a/packages/agents/src/agents/runners/codex-sdk.ts b/packages/agents/src/agents/runners/codex-sdk.ts new file mode 100644 index 0000000000..ca09f7a17c --- /dev/null +++ b/packages/agents/src/agents/runners/codex-sdk.ts @@ -0,0 +1,359 @@ +import { Codex } from '@openai/codex-sdk' +import type { + AgentMessageItem, + CommandExecutionItem, + ErrorItem, + FileChangeItem, + McpToolCallItem, + ReasoningItem, + ThreadItem, + WebSearchItem, +} from '@openai/codex-sdk' +import { normalizeToolName } from 'agent-session-protocol' +import type { NormalizedEvent } from 'agent-session-protocol' + +import type { CodingSessionCliRunner } from '../coding-session.js' +import { subprocessEnvWithoutKey } from './env.js' + +/** + * SDK-backed runner for Codex. Codex's SDK exposes ThreadEvents that + * wrap higher-level UI items (CommandExecutionItem, FileChangeItem, + * etc.) — these are NOT the same shape as the lower-level + * `response_item` payloads that land in the rollout JSONL, so we can't + * route them through `normalizeCodexEvent`. Instead this runner + * synthesises `NormalizedEvent`s directly from each completed + * ThreadItem. + * + * Each tool-style item is emitted as a tool_call when it starts and a + * matching tool_result when it completes, so the UI shows the same + * lifecycle it would for a CLI-driven session. + */ +export const codexSdkRunner: CodingSessionCliRunner = { + async run(opts) { + // Hide OPENAI_API_KEY from the spawned `codex` subprocess so it + // falls back to user-configured credentials (`codex login` writes + // tokens to `~/.codex/auth.json`). Symmetric with the Claude + // runner — neither coder runner consumes the parent process's API + // keys so a Horton+coder co-tenant can keep the keys in scope for + // direct API calls without leaking them into the CLI subprocesses. + const codex = new Codex({ env: subprocessEnvWithoutKey(`OPENAI_API_KEY`) }) + // Mirror what the CLI runner did: write access in the cwd and no + // interactive approval prompts. Without these the SDK defaults to + // `read-only` + `on-request` and the agent fails the moment it + // tries to edit a file. + const threadOptions = { + workingDirectory: opts.cwd, + skipGitRepoCheck: true, + sandboxMode: `workspace-write` as const, + approvalPolicy: `never` as const, + } + const thread = opts.sessionId + ? codex.resumeThread(opts.sessionId, threadOptions) + : codex.startThread(threadOptions) + + let turnFailed: { message: string } | null = null + let assistantText = `` + let capturedSessionId: string | null = opts.sessionId ?? null + + try { + const { events } = await thread.runStreamed(opts.prompt) + for await (const ev of events) { + if (!capturedSessionId && thread.id) { + capturedSessionId = thread.id + opts.onSessionId?.(thread.id) + } + + switch (ev.type) { + case `thread.started`: { + if (!capturedSessionId) { + capturedSessionId = ev.thread_id + opts.onSessionId?.(ev.thread_id) + } + opts.onEvent?.({ + v: 1, + ts: Date.now(), + type: `session_init`, + sessionId: ev.thread_id, + cwd: opts.cwd, + agent: `codex`, + }) + break + } + case `item.started`: { + const startEvents = threadItemStartedToEvents(ev.item) + for (const e of startEvents) opts.onEvent?.(e) + break + } + case `item.completed`: { + const completeEvents = threadItemCompletedToEvents(ev.item) + for (const e of completeEvents) opts.onEvent?.(e) + if (ev.item.type === `agent_message`) { + assistantText += (assistantText ? `\n` : ``) + ev.item.text + } + break + } + case `turn.completed`: { + opts.onEvent?.({ + v: 1, + ts: Date.now(), + type: `turn_complete`, + success: true, + usage: { + inputTokens: ev.usage.input_tokens, + outputTokens: ev.usage.output_tokens, + cachedInputTokens: ev.usage.cached_input_tokens, + reasoningOutputTokens: ev.usage.reasoning_output_tokens, + }, + }) + break + } + case `turn.failed`: { + turnFailed = { message: ev.error.message } + opts.onEvent?.({ + v: 1, + ts: Date.now(), + type: `turn_aborted`, + reason: ev.error.message, + }) + break + } + case `error`: { + turnFailed = { message: ev.message } + opts.onEvent?.({ + v: 1, + ts: Date.now(), + type: `error`, + message: ev.message, + }) + break + } + case `item.updated`: + case `turn.started`: + break + } + } + } catch (e) { + const message = e instanceof Error ? e.message : String(e) + return { exitCode: -1, stdout: ``, stderr: message } + } + + if (turnFailed) { + return { exitCode: 1, stdout: assistantText, stderr: turnFailed.message } + } + return { exitCode: 0, stdout: assistantText, stderr: `` } + }, +} + +/** Exported for unit testing — the runner is the only production caller. */ +export function threadItemStartedToEvents( + item: ThreadItem +): Array { + const ts = Date.now() + switch (item.type) { + case `command_execution`: + return [commandExecutionToToolCall(item, ts)] + case `mcp_tool_call`: + return [mcpToolCallToToolCall(item, ts)] + case `web_search`: + return [webSearchToToolCall(item, ts)] + case `file_change`: + case `agent_message`: + case `reasoning`: + case `todo_list`: + case `error`: + return [] + } +} + +/** Exported for unit testing — the runner is the only production caller. */ +export function threadItemCompletedToEvents( + item: ThreadItem +): Array { + const ts = Date.now() + switch (item.type) { + case `agent_message`: + return [agentMessageToEvent(item, ts)] + case `reasoning`: + return [reasoningToEvent(item, ts)] + case `command_execution`: + return [commandExecutionToToolResult(item, ts)] + case `file_change`: + return fileChangeToEvents(item, ts) + case `mcp_tool_call`: + return [mcpToolCallToToolResult(item, ts)] + case `web_search`: + // Codex's WebSearchItem doesn't expose the search results to the + // SDK consumer (only `query`), so we can't produce a meaningful + // tool_result payload. Emit an empty one anyway to honor the + // tool_call→tool_result contract — without it any UI rendering + // tool lifecycles would show a perpetually-pending web search. + return [webSearchToToolResult(item, ts)] + case `todo_list`: + return [] + case `error`: + return [errorItemToEvent(item, ts)] + } +} + +function agentMessageToEvent( + item: AgentMessageItem, + ts: number +): NormalizedEvent { + return { + v: 1, + ts, + type: `assistant_message`, + text: item.text, + phase: `final`, + } +} + +function reasoningToEvent(item: ReasoningItem, ts: number): NormalizedEvent { + return { + v: 1, + ts, + type: `thinking`, + summary: item.text.slice(0, 200) || `(thinking)`, + text: item.text || null, + } +} + +function commandExecutionToToolCall( + item: CommandExecutionItem, + ts: number +): NormalizedEvent { + const mapping = normalizeToolName(`exec_command`, `codex`, { + command: item.command, + }) + return { + v: 1, + ts, + type: `tool_call`, + callId: item.id, + tool: mapping.normalized, + originalTool: mapping.originalTool, + originalAgent: `codex`, + input: { command: item.command }, + } +} + +function commandExecutionToToolResult( + item: CommandExecutionItem, + ts: number +): NormalizedEvent { + const isError = item.status === `failed` || (item.exit_code ?? 0) !== 0 + return { + v: 1, + ts, + type: `tool_result`, + callId: item.id, + output: item.aggregated_output, + isError, + ...(item.exit_code !== undefined ? { exitCode: item.exit_code } : {}), + } +} + +function fileChangeToEvents( + item: FileChangeItem, + ts: number +): Array { + const isError = item.status === `failed` + // Synthesise a tool_call + tool_result pair for the patch as a whole. + // Codex doesn't expose per-file ids, so we use the FileChangeItem's id + // for both events. + const summary = item.changes.map((c) => `${c.kind} ${c.path}`).join(`\n`) + const allAdds = item.changes.every((c) => c.kind === `add`) + const tool = allAdds ? `file_write` : `file_edit` + return [ + { + v: 1, + ts, + type: `tool_call`, + callId: item.id, + tool, + originalTool: `apply_patch`, + originalAgent: `codex`, + input: { changes: item.changes }, + }, + { + v: 1, + ts, + type: `tool_result`, + callId: item.id, + output: summary, + isError, + }, + ] +} + +function mcpToolCallToToolCall( + item: McpToolCallItem, + ts: number +): NormalizedEvent { + return { + v: 1, + ts, + type: `tool_call`, + callId: item.id, + tool: item.tool, + originalTool: item.tool, + originalAgent: `codex`, + input: (item.arguments as Record) ?? {}, + } +} + +function mcpToolCallToToolResult( + item: McpToolCallItem, + ts: number +): NormalizedEvent { + const isError = item.status === `failed` + const output = item.error + ? item.error.message + : item.result + ? JSON.stringify(item.result.structured_content ?? item.result.content) + : `` + return { + v: 1, + ts, + type: `tool_result`, + callId: item.id, + output, + isError, + } +} + +function webSearchToToolCall(item: WebSearchItem, ts: number): NormalizedEvent { + return { + v: 1, + ts, + type: `tool_call`, + callId: item.id, + tool: `web_search`, + originalTool: `web_search`, + originalAgent: `codex`, + input: { query: item.query }, + } +} + +function webSearchToToolResult( + item: WebSearchItem, + ts: number +): NormalizedEvent { + return { + v: 1, + ts, + type: `tool_result`, + callId: item.id, + output: ``, + isError: false, + } +} + +function errorItemToEvent(item: ErrorItem, ts: number): NormalizedEvent { + return { + v: 1, + ts, + type: `error`, + message: item.message, + } +} diff --git a/packages/agents/src/agents/runners/env.ts b/packages/agents/src/agents/runners/env.ts new file mode 100644 index 0000000000..41a989d412 --- /dev/null +++ b/packages/agents/src/agents/runners/env.ts @@ -0,0 +1,22 @@ +/** + * Build a subprocess `env` derived from `process.env` minus a single + * variable. Used by the SDK runners to hide a parent-process API key + * (e.g. `ANTHROPIC_API_KEY`) from the spawned `claude` / `codex` + * subprocess so the binary falls back to user-configured credentials + * (`claude login` OAuth, `~/.codex/auth.json`, etc.) instead of using + * the API key. + * + * Both SDKs replace the subprocess env with this object when provided + * — they don't merge — so we have to spread `process.env` first to + * preserve `HOME`, `PATH`, and everything else the binary needs. + */ +export function subprocessEnvWithoutKey( + keyName: string +): Record { + const out: Record = {} + for (const [k, v] of Object.entries(process.env)) { + if (k === keyName) continue + if (typeof v === `string`) out[k] = v + } + return out +} diff --git a/packages/agents/test/coding-session.test.ts b/packages/agents/test/coding-session.test.ts index 42452512bd..b479acb45a 100644 --- a/packages/agents/test/coding-session.test.ts +++ b/packages/agents/test/coding-session.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it, vi } from 'vitest' import { createEntityRegistry } from '@electric-ax/agents-runtime' import { registerCodingSession } from '../src/agents/coding-session' +import type { NormalizedEvent } from 'agent-session-protocol' function makeFakeCtx(opts: { firstWake: boolean @@ -81,6 +82,7 @@ function makeFakeCtx(opts: { sessionMeta: { get: (k: string) => state.sessionMeta!.get(k) }, cursorState: { get: (k: string) => state.cursorState!.get(k) }, events: { + get: (k: string) => state.events!.get(k), get toArray() { return Array.from(state.events!.values()) }, @@ -178,16 +180,26 @@ describe(`registerCodingSession`, () => { }) it(`invokes the injected cliRunner for a queued prompt and mirrors normalized events`, async () => { - // Inject a fake runner + fake agent-session-protocol pullEvents path - // by pre-populating the cursorState (so pullNewEvents takes the tail - // branch) and attaching to an existing nativeSessionId (so the - // lazy-create path that hits the filesystem is bypassed). - // - // This still exercises the handler's queue-drain logic without - // touching ~/.claude or ~/.codex. + // Pre-populate the cursorState with a non-empty seeded marker so + // the initial-mirror path is skipped (no filesystem touch). The + // injected runner streams events and the orchestrator should + // append them to the events collection and complete cleanly. const runner = { - run: vi.fn(async () => ({ exitCode: 0, stdout: ``, stderr: `` })), + run: vi.fn( + async (callArgs: { + onEvent?: (ev: NormalizedEvent) => void + onSessionId?: (id: string) => void + }) => { + callArgs.onEvent?.({ + v: 1, + ts: 1714000000000, + type: `assistant_message`, + text: `hi back`, + }) + return { exitCode: 0, stdout: `hi back`, stderr: `` } + } + ), } const registry = createEntityRegistry() registerCodingSession(registry, { @@ -196,7 +208,7 @@ describe(`registerCodingSession`, () => { }) const def = registry.get(`coder`)! - const { ctx, state, calls } = makeFakeCtx({ + const { ctx, state } = makeFakeCtx({ firstWake: false, args: { agent: `claude`, nativeSessionId: `existing-uuid` }, inbox: [ @@ -217,22 +229,20 @@ describe(`registerCodingSession`, () => { cwd: `/tmp/x`, status: `idle`, }, - cursorState: { key: `current`, cursor: ``, eventCounter: 0 }, + cursorState: { + key: `current`, + cursor: `sdk-stream`, + eventCounter: 0, + }, }, }) - // The handler will call resolveSession + loadSession under the hood, - // which hit the filesystem. Expect this call to throw — we're - // asserting the error surfaces cleanly as a failed prompt rather - // than a hang. - await expect( - def.definition.handler( - ctx as unknown as Parameters[0], - { type: `message_received` } as unknown as Parameters< - typeof def.definition.handler - >[1] - ) - ).rejects.toThrow() + await def.definition.handler( + ctx as unknown as Parameters[0], + { type: `message_received` } as unknown as Parameters< + typeof def.definition.handler + >[1] + ) // Runner was invoked with the prompt expect(runner.run).toHaveBeenCalledTimes(1) @@ -247,14 +257,16 @@ describe(`registerCodingSession`, () => { expect(call.prompt).toBe(`say hi`) expect(call.sessionId).toBe(`existing-uuid`) - // Meta was flipped to error with a diagnostic message + // Streamed event made it into the events collection + expect(state.events!.size).toBe(1) + const event = Array.from(state.events!.values())[0]! + expect(event.type).toBe(`assistant_message`) + + // Meta is back to idle and the inbox key is marked processed const meta = state.sessionMeta!.get(`current`)! - expect(meta.status).toBe(`error`) - expect(typeof meta.error).toBe(`string`) - // The prompt is marked as processed so it won't be retried on the next wake + expect(meta.status).toBe(`idle`) const cursor = state.cursorState!.get(`current`)! expect(cursor.lastProcessedInboxKey).toBe(`m-001`) - void calls // reserved for future assertions }) it(`accepts inbox messages without message_type (bare /send from generic UI)`, async () => { @@ -289,18 +301,20 @@ describe(`registerCodingSession`, () => { cwd: `/tmp/x`, status: `idle`, }, - cursorState: { key: `current`, cursor: ``, eventCounter: 0 }, + cursorState: { + key: `current`, + cursor: `sdk-stream`, + eventCounter: 0, + }, }, }) - await expect( - def.definition.handler( - ctx as unknown as Parameters[0], - { type: `message_received` } as unknown as Parameters< - typeof def.definition.handler - >[1] - ) - ).rejects.toThrow() // resolveSession fails for a synthetic id — same as the other test + await def.definition.handler( + ctx as unknown as Parameters[0], + { type: `message_received` } as unknown as Parameters< + typeof def.definition.handler + >[1] + ) expect(runner.run).toHaveBeenCalledTimes(1) const call = ( diff --git a/packages/agents/test/find-new-session-after-run.test.ts b/packages/agents/test/find-new-session-after-run.test.ts deleted file mode 100644 index 73d6aee3ba..0000000000 --- a/packages/agents/test/find-new-session-after-run.test.ts +++ /dev/null @@ -1,165 +0,0 @@ -import * as fs from 'node:fs' -import * as fsp from 'node:fs/promises' -import * as path from 'node:path' -import { tmpdir } from 'node:os' -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import { - findNewSessionAfterRun, - getClaudeProjectDirs, - listClaudeJsonlIdsByCwd, -} from '../src/agents/coding-session' - -// Each test runs against a private fake $HOME so the real -// `~/.claude/projects/` is never touched. `homedir()` reads HOME, so -// stubbing it via vitest is enough to redirect every path-derivation -// helper inside coding-session.ts. -let fakeHome: string - -beforeEach(() => { - fakeHome = fs.mkdtempSync(path.join(tmpdir(), `coder-test-`)) - vi.stubEnv(`HOME`, fakeHome) -}) - -afterEach(() => { - vi.unstubAllEnvs() - fs.rmSync(fakeHome, { recursive: true, force: true }) -}) - -function projectsDirFor(cwd: string): string { - return path.join(fakeHome, `.claude`, `projects`, cwd.replace(/\//g, `-`)) -} - -async function writeJsonl( - cwd: string, - sessionId: string, - opts: { mtimeOffsetMs?: number } = {} -): Promise { - const dir = projectsDirFor(cwd) - await fsp.mkdir(dir, { recursive: true }) - const file = path.join(dir, `${sessionId}.jsonl`) - await fsp.writeFile(file, ``) - if (opts.mtimeOffsetMs !== undefined) { - const t = new Date(Date.now() + opts.mtimeOffsetMs) - await fsp.utimes(file, t, t) - } -} - -describe(`findNewSessionAfterRun (claude)`, () => { - it(`returns null when the per-cwd projects directory doesn't exist`, async () => { - const result = await findNewSessionAfterRun( - `claude`, - `/tmp/nope`, - new Set(), - new Set() - ) - expect(result).toBeNull() - }) - - it(`returns the sessionId of the only new jsonl in the cwd dir`, async () => { - const cwd = `/tmp/cwd-a` - await writeJsonl(cwd, `aaa-111`) - - const result = await findNewSessionAfterRun( - `claude`, - cwd, - new Set(), - new Set() - ) - expect(result).toBe(`aaa-111`) - }) - - it(`picks the newest by mtime when multiple new jsonls are present`, async () => { - const cwd = `/tmp/cwd-b` - await writeJsonl(cwd, `older`, { mtimeOffsetMs: -10_000 }) - await writeJsonl(cwd, `newest`, { mtimeOffsetMs: 0 }) - await writeJsonl(cwd, `middle`, { mtimeOffsetMs: -5_000 }) - - const result = await findNewSessionAfterRun( - `claude`, - cwd, - new Set(), - new Set() - ) - expect(result).toBe(`newest`) - }) - - it(`filters out sessionIds that were already present before the run`, async () => { - const cwd = `/tmp/cwd-c` - await writeJsonl(cwd, `pre-1`, { mtimeOffsetMs: 0 }) - await writeJsonl(cwd, `post-1`, { mtimeOffsetMs: -1_000 }) - - const result = await findNewSessionAfterRun( - `claude`, - cwd, - new Set([`pre-1`]), - new Set() - ) - expect(result).toBe(`post-1`) - }) - - it(`falls back to discoverNewestSession (returning null here, since no real ~/.claude/sessions lock files exist) when nothing is found in the deterministic dir`, async () => { - const result = await findNewSessionAfterRun( - `claude`, - `/tmp/cwd-empty`, - new Set(), - new Set() - ) - expect(result).toBeNull() - }) -}) - -describe(`getClaudeProjectDirs`, () => { - it(`returns the sanitized-cwd directory under fake $HOME`, async () => { - const dirs = await getClaudeProjectDirs(`/private/tmp/foo`) - // realpath resolution may produce a second candidate when the path - // exists on disk; in this test the path doesn't exist, so we get - // exactly the raw-form candidate. - expect(dirs[0]).toBe( - path.join(fakeHome, `.claude`, `projects`, `-private-tmp-foo`) - ) - }) - - it(`also returns the realpath-resolved candidate when the cwd is a symlink`, async () => { - // /tmp on macOS is a symlink to /private/tmp; we replicate that - // shape inside the fake home so the test is portable. - const target = path.join(fakeHome, `realdir`) - const link = path.join(fakeHome, `linkdir`) - fs.mkdirSync(target, { recursive: true }) - fs.symlinkSync(target, link) - - const dirs = await getClaudeProjectDirs(link) - expect(dirs.length).toBe(2) - expect(dirs[0]).toContain(link.replace(/\//g, `-`)) - expect(dirs[1]).toContain(target.replace(/\//g, `-`)) - }) -}) - -describe(`listClaudeJsonlIdsByCwd`, () => { - it(`unions ids across realpath and raw-form dirs and ignores non-jsonl files`, async () => { - const cwd = `/tmp/cwd-list` - await writeJsonl(cwd, `id-1`) - await writeJsonl(cwd, `id-2`) - // Drop a non-jsonl into the same dir to confirm it's ignored. - await fsp.writeFile(path.join(projectsDirFor(cwd), `notes.txt`), `x`) - - const ids = await listClaudeJsonlIdsByCwd(cwd) - expect(Array.from(ids).sort()).toEqual([`id-1`, `id-2`]) - }) - - it(`returns an empty set when the cwd has no projects directory`, async () => { - const ids = await listClaudeJsonlIdsByCwd(`/tmp/cwd-absent`) - expect(ids.size).toBe(0) - }) -}) - -describe(`findNewSessionAfterRun (codex)`, () => { - it(`falls through to discoverNewestSession (no codex sessions on the fake $HOME → null)`, async () => { - const result = await findNewSessionAfterRun( - `codex`, - `/tmp/cwd-codex`, - new Set(), - new Set() - ) - expect(result).toBeNull() - }) -}) diff --git a/packages/agents/test/runners/claude-sdk.test.ts b/packages/agents/test/runners/claude-sdk.test.ts new file mode 100644 index 0000000000..947702eada --- /dev/null +++ b/packages/agents/test/runners/claude-sdk.test.ts @@ -0,0 +1,150 @@ +import { describe, expect, it } from 'vitest' +import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk' +import { sdkMessageToClaudeEntry } from '../../src/agents/runners/claude-sdk' + +describe(`sdkMessageToClaudeEntry`, () => { + it(`maps a system/init message to a ClaudeEntry init entry`, () => { + const msg = { + type: `system`, + subtype: `init`, + session_id: `s-1`, + cwd: `/tmp/x`, + claude_code_version: `2.1.83`, + model: `claude-sonnet-4-5`, + tools: [], + mcp_servers: [], + slash_commands: [], + output_style: ``, + skills: [], + plugins: [], + apiKeySource: `user`, + permissionMode: `default`, + uuid: `u-1`, + } as unknown as SDKMessage + const entry = sdkMessageToClaudeEntry(msg) + expect(entry).toMatchObject({ + type: `system`, + subtype: `init`, + sessionId: `s-1`, + cwd: `/tmp/x`, + version: `2.1.83`, + message: { model: `claude-sonnet-4-5` }, + }) + }) + + it(`maps a system/compact_boundary message`, () => { + const msg = { + type: `system`, + subtype: `compact_boundary`, + session_id: `s-1`, + compact_metadata: { trigger: `auto`, pre_tokens: 100 }, + uuid: `u-2`, + } as unknown as SDKMessage + const entry = sdkMessageToClaudeEntry(msg) + expect(entry).toMatchObject({ + type: `system`, + subtype: `compact_boundary`, + sessionId: `s-1`, + }) + }) + + it(`maps a user message`, () => { + const msg = { + type: `user`, + session_id: `s-1`, + message: { role: `user`, content: `hello` }, + parent_tool_use_id: null, + } as unknown as SDKMessage + const entry = sdkMessageToClaudeEntry(msg) + expect(entry).toMatchObject({ + type: `user`, + sessionId: `s-1`, + message: { role: `user`, content: `hello` }, + }) + }) + + it(`maps an assistant message and preserves usage + stop_reason`, () => { + const msg = { + type: `assistant`, + session_id: `s-1`, + message: { + id: `m-1`, + type: `message`, + role: `assistant`, + model: `claude-sonnet-4-5`, + content: [{ type: `text`, text: `hi` }], + stop_reason: `end_turn`, + stop_sequence: null, + usage: { + input_tokens: 10, + output_tokens: 5, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0, + }, + }, + parent_tool_use_id: null, + uuid: `u-3`, + } as unknown as SDKMessage + const entry = sdkMessageToClaudeEntry(msg) + expect(entry).toMatchObject({ + type: `assistant`, + sessionId: `s-1`, + message: { + role: `assistant`, + model: `claude-sonnet-4-5`, + stop_reason: `end_turn`, + usage: { input_tokens: 10, output_tokens: 5 }, + }, + }) + // content is forwarded through so normalizeClaudeEvent can iterate it + expect((entry!.message!.content as Array)[0]).toMatchObject({ + type: `text`, + text: `hi`, + }) + }) + + it(`maps a result message and renames duration_ms to durationMs`, () => { + const msg = { + type: `result`, + subtype: `success`, + session_id: `s-1`, + duration_ms: 1234, + duration_api_ms: 1000, + is_error: false, + num_turns: 1, + result: `done`, + stop_reason: `end_turn`, + total_cost_usd: 0.01, + usage: { + input_tokens: 10, + output_tokens: 5, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0, + }, + modelUsage: {}, + permission_denials: [], + uuid: `u-4`, + } as unknown as SDKMessage + const entry = sdkMessageToClaudeEntry(msg) + expect(entry).toMatchObject({ + type: `result`, + subtype: `success`, + sessionId: `s-1`, + durationMs: 1234, + message: { + usage: { input_tokens: 10, output_tokens: 5 }, + }, + }) + }) + + it(`returns null for SDK-only message types`, () => { + const msg = { + type: `auth_status`, + session_id: `s-1`, + isAuthenticating: false, + output: [], + uuid: `u-5`, + } as unknown as SDKMessage + expect(sdkMessageToClaudeEntry(msg)).toBeNull() + }) +}) diff --git a/packages/agents/test/runners/codex-sdk.test.ts b/packages/agents/test/runners/codex-sdk.test.ts new file mode 100644 index 0000000000..c056863c09 --- /dev/null +++ b/packages/agents/test/runners/codex-sdk.test.ts @@ -0,0 +1,270 @@ +import { describe, expect, it } from 'vitest' +import type { ThreadItem } from '@openai/codex-sdk' +import { + threadItemCompletedToEvents, + threadItemStartedToEvents, +} from '../../src/agents/runners/codex-sdk' + +describe(`threadItemStartedToEvents`, () => { + it(`maps command_execution to a tool_call (terminal for an unclassified cmd)`, () => { + const item: ThreadItem = { + id: `i-1`, + type: `command_execution`, + command: `pwd`, + aggregated_output: ``, + status: `in_progress`, + } + const events = threadItemStartedToEvents(item) + expect(events).toHaveLength(1) + const ev = events[0]! + expect(ev).toMatchObject({ + type: `tool_call`, + callId: `i-1`, + tool: `terminal`, + originalTool: `exec_command`, + originalAgent: `codex`, + input: { command: `pwd` }, + }) + }) + + it(`classifies cat as file_read`, () => { + const item: ThreadItem = { + id: `i-2`, + type: `command_execution`, + command: `cat /tmp/x.txt`, + aggregated_output: ``, + status: `in_progress`, + } + const ev = threadItemStartedToEvents(item)[0]! + if (ev.type !== `tool_call`) throw new Error(`unexpected`) + expect(ev.tool).toBe(`file_read`) + }) + + it(`maps mcp_tool_call to a tool_call carrying the MCP tool name`, () => { + const item: ThreadItem = { + id: `i-3`, + type: `mcp_tool_call`, + server: `my-server`, + tool: `my_tool`, + arguments: { foo: 1 }, + status: `in_progress`, + } + const ev = threadItemStartedToEvents(item)[0]! + expect(ev).toMatchObject({ + type: `tool_call`, + callId: `i-3`, + tool: `my_tool`, + originalTool: `my_tool`, + input: { foo: 1 }, + }) + }) + + it(`maps web_search to a tool_call`, () => { + const item: ThreadItem = { + id: `i-4`, + type: `web_search`, + query: `electric sql`, + } + const ev = threadItemStartedToEvents(item)[0]! + expect(ev).toMatchObject({ + type: `tool_call`, + callId: `i-4`, + tool: `web_search`, + input: { query: `electric sql` }, + }) + }) + + it(`emits nothing on start for items completed in one event`, () => { + const items: Array = [ + { id: `m-1`, type: `agent_message`, text: `hello` }, + { id: `r-1`, type: `reasoning`, text: `step 1` }, + { + id: `f-1`, + type: `file_change`, + changes: [{ path: `a.txt`, kind: `add` }], + status: `completed`, + }, + ] + for (const item of items) { + expect(threadItemStartedToEvents(item)).toEqual([]) + } + }) +}) + +describe(`threadItemCompletedToEvents`, () => { + it(`maps agent_message to assistant_message with phase=final`, () => { + const item: ThreadItem = { + id: `m-1`, + type: `agent_message`, + text: `done`, + } + expect(threadItemCompletedToEvents(item)[0]).toMatchObject({ + type: `assistant_message`, + text: `done`, + phase: `final`, + }) + }) + + it(`maps reasoning to thinking`, () => { + const item: ThreadItem = { + id: `r-1`, + type: `reasoning`, + text: `the user asked for X so I'll do Y`, + } + const ev = threadItemCompletedToEvents(item)[0]! + expect(ev).toMatchObject({ + type: `thinking`, + text: `the user asked for X so I'll do Y`, + }) + if (ev.type === `thinking`) { + expect(ev.summary.length).toBeLessThanOrEqual(200) + } + }) + + it(`maps a successful command_execution to tool_result`, () => { + const item: ThreadItem = { + id: `i-1`, + type: `command_execution`, + command: `ls /`, + aggregated_output: `bin\netc`, + exit_code: 0, + status: `completed`, + } + expect(threadItemCompletedToEvents(item)[0]).toMatchObject({ + type: `tool_result`, + callId: `i-1`, + output: `bin\netc`, + isError: false, + exitCode: 0, + }) + }) + + it(`marks a non-zero exit as error`, () => { + const item: ThreadItem = { + id: `i-2`, + type: `command_execution`, + command: `false`, + aggregated_output: ``, + exit_code: 1, + status: `completed`, + } + const ev = threadItemCompletedToEvents(item)[0]! + if (ev.type !== `tool_result`) throw new Error(`unexpected`) + expect(ev.isError).toBe(true) + }) + + it(`emits paired tool_call+tool_result for file_change`, () => { + const item: ThreadItem = { + id: `f-1`, + type: `file_change`, + changes: [ + { path: `a.txt`, kind: `add` }, + { path: `b.txt`, kind: `update` }, + ], + status: `completed`, + } + const events = threadItemCompletedToEvents(item) + expect(events).toHaveLength(2) + expect(events[0]).toMatchObject({ + type: `tool_call`, + callId: `f-1`, + tool: `file_edit`, // mixed adds + updates → file_edit + originalTool: `apply_patch`, + }) + expect(events[1]).toMatchObject({ + type: `tool_result`, + callId: `f-1`, + isError: false, + }) + }) + + it(`maps an all-add file_change to file_write`, () => { + const item: ThreadItem = { + id: `f-2`, + type: `file_change`, + changes: [{ path: `new.txt`, kind: `add` }], + status: `completed`, + } + const ev = threadItemCompletedToEvents(item)[0]! + if (ev.type !== `tool_call`) throw new Error(`unexpected`) + expect(ev.tool).toBe(`file_write`) + }) + + it(`maps mcp_tool_call success to tool_result with structured content`, () => { + const item: ThreadItem = { + id: `i-3`, + type: `mcp_tool_call`, + server: `s`, + tool: `t`, + arguments: {}, + result: { + content: [], + structured_content: { ok: true }, + }, + status: `completed`, + } + const ev = threadItemCompletedToEvents(item)[0]! + expect(ev).toMatchObject({ + type: `tool_result`, + callId: `i-3`, + isError: false, + }) + if (ev.type === `tool_result`) { + expect(JSON.parse(ev.output)).toEqual({ ok: true }) + } + }) + + it(`maps mcp_tool_call failure to error tool_result`, () => { + const item: ThreadItem = { + id: `i-4`, + type: `mcp_tool_call`, + server: `s`, + tool: `t`, + arguments: {}, + error: { message: `boom` }, + status: `failed`, + } + const ev = threadItemCompletedToEvents(item)[0]! + expect(ev).toMatchObject({ + type: `tool_result`, + callId: `i-4`, + output: `boom`, + isError: true, + }) + }) + + it(`closes the web_search lifecycle with an empty tool_result`, () => { + const item: ThreadItem = { + id: `i-5`, + type: `web_search`, + query: `q`, + } + expect(threadItemCompletedToEvents(item)[0]).toMatchObject({ + type: `tool_result`, + callId: `i-5`, + output: ``, + isError: false, + }) + }) + + it(`maps error items to an error event`, () => { + const item: ThreadItem = { + id: `e-1`, + type: `error`, + message: `something broke`, + } + expect(threadItemCompletedToEvents(item)[0]).toMatchObject({ + type: `error`, + message: `something broke`, + }) + }) + + it(`returns empty for todo_list (no normalized counterpart yet)`, () => { + const item: ThreadItem = { + id: `t-1`, + type: `todo_list`, + items: [{ text: `do x`, completed: false }], + } + expect(threadItemCompletedToEvents(item)).toEqual([]) + }) +}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c25aaa3b3b..1edf5f68b0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1502,6 +1502,9 @@ importers: packages/agents: dependencies: + '@anthropic-ai/claude-agent-sdk': + specifier: ^0.2.123 + version: 0.2.123(zod@4.3.6) '@anthropic-ai/sdk': specifier: ^0.78.0 version: 0.78.0(zod@4.3.6) @@ -1517,12 +1520,15 @@ importers: '@mariozechner/pi-ai': specifier: ^0.70.2 version: 0.70.2(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6) + '@openai/codex-sdk': + specifier: ^0.125.0 + version: 0.125.0 '@sinclair/typebox': specifier: ^0.34.48 version: 0.34.49 agent-session-protocol: - specifier: ^0.0.2 - version: 0.0.2 + specifier: ^0.0.8 + version: 0.0.8 better-sqlite3: specifier: ^11.10.0 version: 11.10.0 @@ -2398,6 +2404,52 @@ packages: '@antfu/install-pkg@1.1.0': resolution: {integrity: sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ==} + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.2.123': + resolution: {integrity: sha512-tYAXCjlXZQklsUs0J//gip3fZQRzhlH5OCgvNXV70qe7A1iiwHqO2KPGvEHV1L+deEKQoMZmTaCOrQpN6zju3w==} + cpu: [arm64] + os: [darwin] + + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.2.123': + resolution: {integrity: sha512-AcUC6sTon6z6HculP87KsAOeTMRLBwpovdhcXUTjXUpo/8nplJ7lBEzWjZCHt8FF1KuN/WBy1Z4bDg/59TQDmA==} + cpu: [x64] + os: [darwin] + + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.2.123': + resolution: {integrity: sha512-bYgRiaf2q+yVbGAoUluuhqrEW1zexL34+3HDmK9DneKXa2K2EJpw4M6Sq4XoBD/JezGaemoAP78Xv/M/QUS1OQ==} + cpu: [arm64] + os: [linux] + + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.2.123': + resolution: {integrity: sha512-7+GnbcF3/aZ8RJ1WmU/ogtPsOpknBAoUPer90MvZuFYBLPT9iI/U7f24gjrOHuYdcbDA5n7jFlhcfIO26F5DJQ==} + cpu: [arm64] + os: [linux] + + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.2.123': + resolution: {integrity: sha512-IX95lFKhmmndY/YPfWPsVV+C3rLYJmuuq5wCS53p6jYIkCMxH1iGfhBGF1EUWcXO4Uc8yqXFmQ3aaxMzOOPrwA==} + cpu: [x64] + os: [linux] + + '@anthropic-ai/claude-agent-sdk-linux-x64@0.2.123': + resolution: {integrity: sha512-Xi+Rwk8uP5vWEnawJOlsk179fr0ATLl5J90MlbLj+puKaX5svEq8ljS+P3zq6zHTJeKh9GKLzPf7bc5YJKwcew==} + cpu: [x64] + os: [linux] + + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.2.123': + resolution: {integrity: sha512-WDZmAQG1rOiqNLZlSXaCjSWmqJvLk2io+vFQWWqSy2b5HCk9pa3PadLiaLztiihyk81wPhH9Q/44kOxdyfEGMw==} + cpu: [arm64] + os: [win32] + + '@anthropic-ai/claude-agent-sdk-win32-x64@0.2.123': + resolution: {integrity: sha512-588xrd1i6d4kXQ6FqwL+cgBiN4evRQSi5DCtPa02CZ3VEbuVQBeFlyPlD8tfWtNNeGZ4NM8kjPNNzZz5omezPA==} + cpu: [x64] + os: [win32] + + '@anthropic-ai/claude-agent-sdk@0.2.123': + resolution: {integrity: sha512-a4TysYoR9DBdkM9Uwh4J5ub7TwKmRPe5hFiWh4En+IKC+qkk5UFkxFM22c//cZjYZKynHX0ah2t6LUqb+najYA==} + engines: {node: '>=18.0.0'} + peerDependencies: + zod: ^4.0.0 + '@anthropic-ai/sdk@0.73.0': resolution: {integrity: sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==} hasBin: true @@ -2416,6 +2468,15 @@ packages: zod: optional: true + '@anthropic-ai/sdk@0.81.0': + resolution: {integrity: sha512-D4K5PvEV6wPiRtVlVsJHIUhHAmOZ6IT/I9rKlTf84gR7GyyAurPJK7z9BOf/AZqC5d1DhYQGJNKRmV+q8dGhgw==} + hasBin: true + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + peerDependenciesMeta: + zod: + optional: true + '@anthropic-ai/sdk@0.90.0': resolution: {integrity: sha512-MzZtPabJF1b0FTDl6Z6H5ljphPwACLGP13lu8MTiB8jXaW/YXlpOp+Po2cVou3MPM5+f5toyLnul9whKCy7fBg==} hasBin: true @@ -5982,6 +6043,51 @@ packages: resolution: {integrity: sha512-hAX0pT/73190NLqBPPWSdBVGtbY6VOhWYK3qqHqtXQ1gK7kS2yz4+ivsN07hpJ6I3aeMtKP6J6npsEKOAzuTLA==} engines: {node: '>=20.0'} + '@openai/codex-sdk@0.125.0': + resolution: {integrity: sha512-1xCIHdSbQVF880nJ2aVWdPIsWZbSpKODwuP9y/gvtChDYhYfYEW0DKp2H8ZlctkzIjlzS/WzYmP6ZZPHIvs2Dg==} + engines: {node: '>=18'} + + '@openai/codex@0.125.0': + resolution: {integrity: sha512-GiE9wlgL95u/5BRirY5d3EaRLU1tu7Y1R09R8lCHHVmcQdSmhS809FdPDWH3gIYHS7ZriAPqXwJ3aLA0WKl40Q==} + engines: {node: '>=16'} + hasBin: true + + '@openai/codex@0.125.0-darwin-arm64': + resolution: {integrity: sha512-Gn2fHiSO0XgyHp1OSd5DWUTm66Bv9UEuipW5pVEj1E+hWZCOrdqnYttllKFWtRGj5yiKefNX3JIxONgh/ZwlOQ==} + engines: {node: '>=16'} + cpu: [arm64] + os: [darwin] + + '@openai/codex@0.125.0-darwin-x64': + resolution: {integrity: sha512-TZ5Lek2X/UXTI9LXFxzarvQaJeuTrqVh4POc7soO/8RclVnCxADnCf15sivxLd5eiFW4t0myGoeVoM4lciRiRg==} + engines: {node: '>=16'} + cpu: [x64] + os: [darwin] + + '@openai/codex@0.125.0-linux-arm64': + resolution: {integrity: sha512-pPnJoJD6rZ2Iin0zNt/up36bO2/EOp2B+1/rPHu/lSq3PJbT3Fmnfut2kJy5LylXb7bGA2XQbtqOogZzIbnlkA==} + engines: {node: '>=16'} + cpu: [arm64] + os: [linux] + + '@openai/codex@0.125.0-linux-x64': + resolution: {integrity: sha512-K2NTTEeBpz/G+N2x17UGWfauRt3So+ir4f+U/60l5PPnYEJB/w3YZrlXo2G9og8Dm9BqtoBAjoPV74sRv9tWWQ==} + engines: {node: '>=16'} + cpu: [x64] + os: [linux] + + '@openai/codex@0.125.0-win32-arm64': + resolution: {integrity: sha512-zxoUakw9oIHIFrAyk400XkkLBJFA6nOym0NDq6sQ/jhdcYraKqNSRCII2nsBwZHk+/4zgUvuk52iuutgysY/rQ==} + engines: {node: '>=16'} + cpu: [arm64] + os: [win32] + + '@openai/codex@0.125.0-win32-x64': + resolution: {integrity: sha512-ofpOK+OWH5QFuUZ9pTM0d/PcXUXiIP5z5DpRcE9MlucJoyOl4Zy4Nu3NcuHF4YzCkZMQb6x3j0tjDEPHKqNQzw==} + engines: {node: '>=16'} + cpu: [x64] + os: [win32] + '@opentelemetry/api@1.9.1': resolution: {integrity: sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==} engines: {node: '>=8.0.0'} @@ -10162,8 +10268,8 @@ packages: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} - agent-session-protocol@0.0.2: - resolution: {integrity: sha512-mGGQKUB9RrOCl8y22uF1AJlcGV+v1Z+SII/wEDgUp4p0Emx83w8s1C+4Mtjn18ksnr3ia5eI9jwwLguNlZ5RDw==} + agent-session-protocol@0.0.8: + resolution: {integrity: sha512-1LHPvWzole19D+Iv8vj6ktMy5epw5tYtq9g35si3t0YHLHPSXuWL7mSqZb9oIt2ZeXtwO7vJE9hTrTM7gckjvQ==} engines: {node: '>=18.0.0'} hasBin: true @@ -18057,6 +18163,7 @@ packages: uuid@10.0.0: resolution: {integrity: sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==} + deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028). hasBin: true uuid@11.1.0: @@ -18065,14 +18172,17 @@ packages: uuid@7.0.3: resolution: {integrity: sha512-DPSke0pXhTZgoF/d+WSt2QaKMCFSfx7QegxEWT+JOuHF5aWrKEn0G+ztjuJg/gG8/ItK+rbPCD/yNv8yyih6Cg==} + deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028). hasBin: true uuid@8.0.0: resolution: {integrity: sha512-jOXGuXZAWdsTH7eZLtyXMqUb9EcWMGZNbL9YcGBJl4MH4nrxHmZJhEHvyLFrkxo+28uLb/NYRcStH48fnD0Vzw==} + deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028). hasBin: true uuid@9.0.1: resolution: {integrity: sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==} + deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028). hasBin: true valibot@1.0.0: @@ -19064,6 +19174,48 @@ snapshots: package-manager-detector: 1.6.0 tinyexec: 1.0.2 + '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk-darwin-x64@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-arm64@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk-linux-x64@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk-win32-arm64@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk-win32-x64@0.2.123': + optional: true + + '@anthropic-ai/claude-agent-sdk@0.2.123(zod@4.3.6)': + dependencies: + '@anthropic-ai/sdk': 0.81.0(zod@4.3.6) + '@modelcontextprotocol/sdk': 1.29.0(zod@4.3.6) + zod: 4.3.6 + optionalDependencies: + '@anthropic-ai/claude-agent-sdk-darwin-arm64': 0.2.123 + '@anthropic-ai/claude-agent-sdk-darwin-x64': 0.2.123 + '@anthropic-ai/claude-agent-sdk-linux-arm64': 0.2.123 + '@anthropic-ai/claude-agent-sdk-linux-arm64-musl': 0.2.123 + '@anthropic-ai/claude-agent-sdk-linux-x64': 0.2.123 + '@anthropic-ai/claude-agent-sdk-linux-x64-musl': 0.2.123 + '@anthropic-ai/claude-agent-sdk-win32-arm64': 0.2.123 + '@anthropic-ai/claude-agent-sdk-win32-x64': 0.2.123 + transitivePeerDependencies: + - '@cfworker/json-schema' + - supports-color + '@anthropic-ai/sdk@0.73.0(zod@4.3.6)': dependencies: json-schema-to-ts: 3.1.1 @@ -19076,6 +19228,12 @@ snapshots: optionalDependencies: zod: 4.3.6 + '@anthropic-ai/sdk@0.81.0(zod@4.3.6)': + dependencies: + json-schema-to-ts: 3.1.1 + optionalDependencies: + zod: 4.3.6 + '@anthropic-ai/sdk@0.90.0(zod@4.3.6)': dependencies: json-schema-to-ts: 3.1.1 @@ -23494,7 +23652,7 @@ snapshots: jose: 6.2.3 json-schema-typed: 8.0.2 pkce-challenge: 5.0.1 - raw-body: 3.0.0 + raw-body: 3.0.2 zod: 3.25.76 zod-to-json-schema: 3.25.2(zod@3.25.76) transitivePeerDependencies: @@ -23516,22 +23674,21 @@ snapshots: jose: 6.2.3 json-schema-typed: 8.0.2 pkce-challenge: 5.0.1 - raw-body: 3.0.0 + raw-body: 3.0.2 zod: 4.3.6 zod-to-json-schema: 3.25.2(zod@4.3.6) transitivePeerDependencies: - supports-color - optional: true '@modelcontextprotocol/sdk@1.6.1': dependencies: content-type: 1.0.5 cors: 2.8.5 eventsource: 3.0.7 - express: 5.1.0 - express-rate-limit: 7.5.1(express@5.1.0) + express: 5.2.1 + express-rate-limit: 7.5.1(express@5.2.1) pkce-challenge: 4.1.0 - raw-body: 3.0.0 + raw-body: 3.0.2 zod: 3.25.76 zod-to-json-schema: 3.25.2(zod@3.25.76) transitivePeerDependencies: @@ -23651,6 +23808,37 @@ snapshots: '@oozcitak/util@10.0.0': {} + '@openai/codex-sdk@0.125.0': + dependencies: + '@openai/codex': 0.125.0 + + '@openai/codex@0.125.0': + optionalDependencies: + '@openai/codex-darwin-arm64': '@openai/codex@0.125.0-darwin-arm64' + '@openai/codex-darwin-x64': '@openai/codex@0.125.0-darwin-x64' + '@openai/codex-linux-arm64': '@openai/codex@0.125.0-linux-arm64' + '@openai/codex-linux-x64': '@openai/codex@0.125.0-linux-x64' + '@openai/codex-win32-arm64': '@openai/codex@0.125.0-win32-arm64' + '@openai/codex-win32-x64': '@openai/codex@0.125.0-win32-x64' + + '@openai/codex@0.125.0-darwin-arm64': + optional: true + + '@openai/codex@0.125.0-darwin-x64': + optional: true + + '@openai/codex@0.125.0-linux-arm64': + optional: true + + '@openai/codex@0.125.0-linux-x64': + optional: true + + '@openai/codex@0.125.0-win32-arm64': + optional: true + + '@openai/codex@0.125.0-win32-x64': + optional: true + '@opentelemetry/api@1.9.1': {} '@oxc-minify/binding-android-arm64@0.96.0': @@ -29587,7 +29775,7 @@ snapshots: agent-base@7.1.4: {} - agent-session-protocol@0.0.2: + agent-session-protocol@0.0.8: dependencies: '@durable-streams/client': 0.2.3 '@modelcontextprotocol/sdk': 1.29.0(zod@3.25.76) @@ -30150,7 +30338,7 @@ snapshots: bytes: 3.1.2 content-type: 1.0.5 debug: 4.4.3 - http-errors: 2.0.0 + http-errors: 2.0.1 iconv-lite: 0.7.2 on-finished: 2.4.1 qs: 6.15.1 @@ -32506,9 +32694,9 @@ snapshots: exponential-backoff@3.1.2: {} - express-rate-limit@7.5.1(express@5.1.0): + express-rate-limit@7.5.1(express@5.2.1): dependencies: - express: 5.1.0 + express: 5.2.1 express-rate-limit@8.4.1(express@5.2.1): dependencies: @@ -32598,19 +32786,19 @@ snapshots: etag: 1.8.1 finalhandler: 2.1.0 fresh: 2.0.0 - http-errors: 2.0.0 + http-errors: 2.0.1 merge-descriptors: 2.0.0 mime-types: 3.0.1 on-finished: 2.4.1 once: 1.4.0 parseurl: 1.3.3 proxy-addr: 2.0.7 - qs: 6.14.0 + qs: 6.15.1 range-parser: 1.2.1 router: 2.2.0 send: 1.2.0 serve-static: 2.2.0 - statuses: 2.0.1 + statuses: 2.0.2 type-is: 2.0.1 vary: 1.1.2 transitivePeerDependencies: