From b8aea1f439876ed22eb0c33235b3046002d5f45a Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sun, 3 May 2026 11:10:33 +0100 Subject: [PATCH] fix(agents): support configurable builtin models Allow Horton and Worker to use configured Anthropic or OpenAI models, including UI-selectable reasoning effort for compatible OpenAI reasoning models. Co-authored-by: Cursor --- .../agents-runtime/src/tools/context-tools.ts | 30 +-- .../agents-runtime/test/context-tools.test.ts | 9 + packages/agents/src/agents/horton.ts | 63 ++++- packages/agents/src/agents/worker.ts | 40 ++- packages/agents/src/bootstrap.ts | 12 +- packages/agents/src/model-catalog.ts | 234 ++++++++++++++++++ packages/agents/src/server.ts | 2 +- packages/agents/src/tools/spawn-worker.ts | 17 +- .../test/horton-model-selection.test.ts | 99 ++++++++ .../agents/test/horton-system-prompt.test.ts | 11 + packages/agents/test/model-catalog.test.ts | 108 ++++++++ .../agents/test/spawn-worker-tool.test.ts | 29 +++ .../test/worker-least-privilege.test.ts | 47 +++- 13 files changed, 668 insertions(+), 33 deletions(-) create mode 100644 packages/agents/src/model-catalog.ts create mode 100644 packages/agents/test/horton-model-selection.test.ts create mode 100644 packages/agents/test/model-catalog.test.ts diff --git a/packages/agents-runtime/src/tools/context-tools.ts b/packages/agents-runtime/src/tools/context-tools.ts index 3e1adfbfb5..c5bb823341 100644 --- a/packages/agents-runtime/src/tools/context-tools.ts +++ b/packages/agents-runtime/src/tools/context-tools.ts @@ -1,4 +1,4 @@ -import { z } from 'zod' +import { Type } from '@sinclair/typebox' import type { AgentTool } from '../types' export interface ContextToolsContext { @@ -25,10 +25,10 @@ export function createContextTools(ctx: ContextToolsContext): Array { name: `load_timeline_range`, label: `Load Timeline Range`, description: `Load the rendered messages for a dropped timeline offset range.`, - parameters: z.object({ - from: z.number(), - to: z.number(), - }) as unknown as AgentTool[`parameters`], + parameters: Type.Object({ + from: Type.Number(), + to: Type.Number(), + }) as AgentTool[`parameters`], execute: async (_toolCallId, params) => textResult( await ctx.loadTimelineRange( @@ -43,12 +43,12 @@ export function createContextTools(ctx: ContextToolsContext): Array { name: `load_source_range`, label: `Load Source Range`, description: `Load a character range from a truncated source snapshot.`, - parameters: z.object({ - name: z.string(), - from: z.number(), - to: z.number(), - snapshot: z.string(), - }) as unknown as AgentTool[`parameters`], + parameters: Type.Object({ + name: Type.String(), + from: Type.Number(), + to: Type.Number(), + snapshot: Type.String(), + }) as AgentTool[`parameters`], execute: async (_toolCallId, params) => textResult( await ctx.loadSourceRange( @@ -65,10 +65,10 @@ export function createContextTools(ctx: ContextToolsContext): Array { name: `load_context_history`, label: `Load Context History`, description: `Load a tombstoned context entry by its original offset.`, - parameters: z.object({ - id: z.string(), - offset: z.string(), - }) as unknown as AgentTool[`parameters`], + parameters: Type.Object({ + id: Type.String(), + offset: Type.String(), + }) as AgentTool[`parameters`], execute: async (_toolCallId, params) => textResult( await ctx.loadContextHistory( diff --git a/packages/agents-runtime/test/context-tools.test.ts b/packages/agents-runtime/test/context-tools.test.ts index f4f4336195..6f6ae55b9d 100644 --- a/packages/agents-runtime/test/context-tools.test.ts +++ b/packages/agents-runtime/test/context-tools.test.ts @@ -21,6 +21,15 @@ function firstText(result: { } describe(`context tools`, () => { + it(`uses OpenAI-compatible object schemas with properties`, () => { + for (const tool of createContextTools(makeCtx())) { + expect(tool.parameters).toMatchObject({ + type: `object`, + properties: expect.any(Object), + }) + } + }) + it(`load_timeline_range returns the expected payload`, async () => { const tool = createContextTools(makeCtx()).find( (candidate) => candidate.name === `load_timeline_range` diff --git a/packages/agents/src/agents/horton.ts b/packages/agents/src/agents/horton.ts index 2547b9e430..fdea14e03f 100644 --- a/packages/agents/src/agents/horton.ts +++ b/packages/agents/src/agents/horton.ts @@ -1,8 +1,15 @@ import Anthropic from '@anthropic-ai/sdk' +import { z } from 'zod' import { serverLog } from '../log' import { createHortonDocsSupport } from '../docs/knowledge-base' import { createSkillTools } from '../skills/tools' import { createSpawnWorkerTool } from '../tools/spawn-worker' +import { + modelChoiceValues, + REASONING_EFFORT_VALUES, + resolveBuiltinModelConfig, + type BuiltinModelCatalog, +} from '../model-catalog' import { createPromptCoderTool, createSpawnCoderTool, @@ -151,7 +158,13 @@ export async function generateTitle( export function buildHortonSystemPrompt( workingDirectory: string, - opts: { hasDocsSupport?: boolean; hasSkills?: boolean; docsUrl?: string } = {} + opts: { + hasDocsSupport?: boolean + hasSkills?: boolean + docsUrl?: string + modelProvider?: string + modelId?: string + } = {} ): string { const docsTools = opts.hasDocsSupport ? `\n- search_durable_agents_docs: hybrid search over the built-in Durable Agents docs index` @@ -202,6 +215,11 @@ Don't force onboarding. If someone just wants to chat or code, let them. When in - The docs site covers: Usage (entity definition, handlers, tools, state, spawning, coordination, waking, shared state, client integration, app setup), Reference (handler context, entity definitions, configurations, tools, state proxies, wake events, registries), Entities (Horton, Worker), and Patterns (Manager-Worker, Pipeline, Map-Reduce, Dispatcher, Blackboard, Reactive Observers). - For general coding questions unrelated to Electric Agents, use brave_search or your own knowledge.` : `` + const modelGuidance = + opts.modelProvider && opts.modelId + ? `\n# Runtime model +You are currently running via provider "${opts.modelProvider}" with model "${opts.modelId}". If the user asks what model or provider you are using, answer with these exact runtime values. Do not infer your model identity from training data or from the name of another coding tool.` + : `` return `You are Horton, a friendly and capable assistant. You can chat, research the web, read and edit code, run shell commands, and dispatch subagents (workers) for isolated subtasks. Be warm and engaging in conversation; be precise and concrete when working with code. # Greetings @@ -223,7 +241,7 @@ ${docsTools}${skillsTools} - Prefer edit over write when modifying existing files. - You must read a file before you can edit it. - Use absolute paths or paths relative to the current working directory. -${docsGuidance}${skillsGuidance}${onboardingGuidance}${docsUrlGuidance} +${modelGuidance}${docsGuidance}${skillsGuidance}${onboardingGuidance}${docsUrlGuidance} # Risky actions Pause and confirm with the user before: @@ -262,7 +280,10 @@ export function createHortonTools( workingDirectory: string, ctx: HandlerContext, readSet: Set, - opts: { docsSearchTool?: AgentTool } = {} + opts: { + docsSearchTool?: AgentTool + modelConfig?: ReturnType + } = {} ): Array { return [ createBashTool(workingDirectory), @@ -271,7 +292,7 @@ export function createHortonTools( createEditTool(workingDirectory, readSet), braveSearchTool, fetchUrlTool, - createSpawnWorkerTool(ctx), + createSpawnWorkerTool(ctx, opts.modelConfig), createSpawnCoderTool(ctx), createPromptCoderTool(ctx), ...(opts.docsSearchTool ? [opts.docsSearchTool] : []), @@ -302,6 +323,7 @@ function createAssistantHandler(options: { docsSupport: HortonDocsSupport | null docsSearchTool?: AgentTool skillsRegistry: SkillsRegistry | null + modelCatalog: BuiltinModelCatalog docsUrl?: string }) { const { @@ -310,6 +332,7 @@ function createAssistantHandler(options: { docsSupport, docsSearchTool, skillsRegistry, + modelCatalog, docsUrl, } = options const hasSkills = Boolean(skillsRegistry && skillsRegistry.catalog.size > 0) @@ -319,9 +342,13 @@ function createAssistantHandler(options: { wake: WakeEvent ): Promise { const readSet = new Set() + const modelConfig = resolveBuiltinModelConfig(modelCatalog, ctx.args) const tools = [ ...ctx.electricTools, - ...createHortonTools(workingDirectory, ctx, readSet, { docsSearchTool }), + ...createHortonTools(workingDirectory, ctx, readSet, { + docsSearchTool, + modelConfig, + }), ...(skillsRegistry && skillsRegistry.catalog.size > 0 ? createSkillTools(skillsRegistry, ctx) : []), @@ -383,8 +410,10 @@ function createAssistantHandler(options: { hasDocsSupport: Boolean(docsSupport), hasSkills, docsUrl, + modelProvider: modelConfig.provider, + modelId: String(modelConfig.model), }), - model: HORTON_MODEL, + ...modelConfig, tools, ...(streamFn && { streamFn }), }) @@ -422,10 +451,16 @@ export function registerHorton( workingDirectory: string streamFn?: StreamFn skillsRegistry?: SkillsRegistry | null + modelCatalog: BuiltinModelCatalog docsUrl?: string } ): Array { - const { workingDirectory, streamFn, skillsRegistry = null } = options + const { + workingDirectory, + streamFn, + skillsRegistry = null, + modelCatalog, + } = options const docsUrl = options.docsUrl ?? process.env.HORTON_DOCS_URL if (process.env.BRAVE_SEARCH_API_KEY) { @@ -451,11 +486,25 @@ export function registerHorton( docsSupport, docsSearchTool, skillsRegistry, + modelCatalog, docsUrl, }) + const hortonCreationSchema = z.object({ + model: z + .enum(modelChoiceValues(modelCatalog)) + .default(modelCatalog.defaultChoice.value), + reasoningEffort: z + .enum(REASONING_EFFORT_VALUES) + .default(`auto`) + .describe( + `Reasoning effort for compatible reasoning models. Auto uses a safe provider default.` + ), + }) + registry.define(`horton`, { description: `Friendly capable assistant — chat, code, research, dispatch`, + creationSchema: hortonCreationSchema, handler: assistantHandler, }) diff --git a/packages/agents/src/agents/worker.ts b/packages/agents/src/agents/worker.ts index 1b01fc3b72..6fc820aa39 100644 --- a/packages/agents/src/agents/worker.ts +++ b/packages/agents/src/agents/worker.ts @@ -9,7 +9,11 @@ import { createWriteTool, } from '@electric-ax/agents-runtime/tools' import { WORKER_TOOL_NAMES, createSpawnWorkerTool } from '../tools/spawn-worker' -import { HORTON_MODEL } from './horton' +import { + REASONING_EFFORT_VALUES, + resolveBuiltinModelConfig, + type BuiltinModelCatalog, +} from '../model-catalog' import type { WorkerToolName } from '../tools/spawn-worker' import type { AgentTool, StreamFn } from '@mariozechner/pi-agent-core' import type { @@ -25,6 +29,9 @@ interface WorkerArgs { tools: Array sharedDb?: { id: string; schema: SharedStateSchemaMap } sharedDbToolMode?: `full` | `write-only` + model?: string + provider?: string + reasoningEffort?: string } function isWorkerToolName(value: unknown): value is WorkerToolName { @@ -84,6 +91,23 @@ function parseWorkerArgs(value: Readonly>): WorkerArgs { throw new Error(`[worker] must provide tools and/or sharedDb`) } + if (typeof value.model === `string`) { + args.model = value.model + } + + if (typeof value.provider === `string`) { + args.provider = value.provider + } + + if ( + typeof value.reasoningEffort === `string` && + (REASONING_EFFORT_VALUES as ReadonlyArray).includes( + value.reasoningEffort + ) + ) { + args.reasoningEffort = value.reasoningEffort + } + return args } @@ -254,9 +278,13 @@ function buildSharedStateTools( export function registerWorker( registry: EntityRegistry, - options: { workingDirectory: string; streamFn?: StreamFn } + options: { + workingDirectory: string + streamFn?: StreamFn + modelCatalog: BuiltinModelCatalog + } ): void { - const { workingDirectory, streamFn } = options + const { workingDirectory, streamFn, modelCatalog } = options registry.define(`worker`, { description: `Internal — generic worker spawned by other agents. Configure via spawn args (systemPrompt + tools + optional sharedDb).`, async handler(ctx) { @@ -268,6 +296,10 @@ export function registerWorker( ctx, readSet ) + const modelConfig = resolveBuiltinModelConfig( + modelCatalog, + args as unknown as Readonly> + ) const sharedStateTools: Array = [] if (args.sharedDb) { @@ -285,7 +317,7 @@ export function registerWorker( ctx.useAgent({ systemPrompt: `${args.systemPrompt}${WORKER_PROMPT_FOOTER}`, - model: HORTON_MODEL, + ...modelConfig, tools: [...builtinTools, ...sharedStateTools], ...(streamFn && { streamFn }), }) diff --git a/packages/agents/src/bootstrap.ts b/packages/agents/src/bootstrap.ts index 5d3ec8b3c7..50623f9b72 100644 --- a/packages/agents/src/bootstrap.ts +++ b/packages/agents/src/bootstrap.ts @@ -12,6 +12,7 @@ import { serverLog } from './log' import { registerCodingSession } from './agents/coding-session' import { registerHorton } from './agents/horton' import { registerWorker } from './agents/worker' +import { createBuiltinModelCatalog } from './model-catalog' import { createSkillsRegistry } from './skills/registry' import type { AgentTool, @@ -76,9 +77,13 @@ export async function createBuiltinAgentHandler( createElectricTools, } = options - if (!streamFn && !process.env.ANTHROPIC_API_KEY) { + const modelCatalog = await createBuiltinModelCatalog({ + allowMockFallback: Boolean(streamFn), + }) + + if (!modelCatalog) { serverLog.warn( - `[builtin-agents] ANTHROPIC_API_KEY not set — skipping built-in agent registration` + `[builtin-agents] no supported model provider API key found — set ANTHROPIC_API_KEY or OPENAI_API_KEY` ) return null } @@ -111,9 +116,10 @@ export async function createBuiltinAgentHandler( workingDirectory: cwd, streamFn, skillsRegistry, + modelCatalog, }) - registerWorker(registry, { workingDirectory: cwd, streamFn }) + registerWorker(registry, { workingDirectory: cwd, streamFn, modelCatalog }) typeNames.push(`worker`) registerCodingSession(registry, { defaultWorkingDirectory: cwd }) diff --git a/packages/agents/src/model-catalog.ts b/packages/agents/src/model-catalog.ts new file mode 100644 index 0000000000..fad07ecaa7 --- /dev/null +++ b/packages/agents/src/model-catalog.ts @@ -0,0 +1,234 @@ +import { getModels } from '@mariozechner/pi-ai' +import type { AgentConfig } from '@electric-ax/agents-runtime' + +export type BuiltinModelProvider = `anthropic` | `openai` + +export interface BuiltinModelChoice { + provider: BuiltinModelProvider + id: string + label: string + value: string + reasoning: boolean +} + +export interface BuiltinModelCatalog { + choices: Array + defaultChoice: BuiltinModelChoice +} + +export const REASONING_EFFORT_VALUES = [ + `auto`, + `minimal`, + `low`, + `medium`, + `high`, +] as const + +export type BuiltinReasoningEffort = (typeof REASONING_EFFORT_VALUES)[number] +type ExplicitReasoningEffort = Exclude + +export type BuiltinAgentModelConfig = Pick< + AgentConfig, + `model` | `provider` | `onPayload` +> & { + reasoningEffort?: ExplicitReasoningEffort +} + +type PersistedModelConfig = Pick & { + reasoningEffort?: ExplicitReasoningEffort +} + +const DEFAULT_ANTHROPIC_MODEL = `claude-sonnet-4-6` +const DEFAULT_OPENAI_MODEL = `gpt-4.1` + +function hasEnv(name: string): boolean { + return (process.env[name]?.trim().length ?? 0) > 0 +} + +function modelValue(provider: BuiltinModelProvider, id: string): string { + return `${provider}:${id}` +} + +function providerLabel(provider: BuiltinModelProvider): string { + return provider === `anthropic` ? `Anthropic` : `OpenAI` +} + +function configuredProviders(): Array { + const providers: Array = [] + if (hasEnv(`ANTHROPIC_API_KEY`)) providers.push(`anthropic`) + if (hasEnv(`OPENAI_API_KEY`)) providers.push(`openai`) + return providers +} + +function mockFallbackCatalog(): BuiltinModelCatalog { + const fallback = { + provider: `anthropic` as const, + id: DEFAULT_ANTHROPIC_MODEL, + label: `Anthropic ${DEFAULT_ANTHROPIC_MODEL}`, + value: modelValue(`anthropic`, DEFAULT_ANTHROPIC_MODEL), + reasoning: true, + } + return { choices: [fallback], defaultChoice: fallback } +} + +async function fetchAvailableModelIds( + provider: BuiltinModelProvider +): Promise | null> { + try { + const res = + provider === `anthropic` + ? await fetch(`https://api.anthropic.com/v1/models`, { + headers: { + 'x-api-key': process.env.ANTHROPIC_API_KEY ?? ``, + 'anthropic-version': `2023-06-01`, + }, + signal: AbortSignal.timeout(3_000), + }) + : await fetch(`https://api.openai.com/v1/models`, { + headers: { + authorization: `Bearer ${process.env.OPENAI_API_KEY ?? ``}`, + }, + signal: AbortSignal.timeout(3_000), + }) + + if (res.status === 401 || res.status === 403) return new Set() + if (!res.ok) return null + + const body = (await res.json()) as { data?: Array<{ id?: unknown }> } + const ids = new Set( + (body.data ?? []) + .map((model) => model.id) + .filter((id): id is string => typeof id === `string`) + ) + + return ids.size > 0 ? ids : null + } catch { + return null + } +} + +async function choicesForProvider( + provider: BuiltinModelProvider +): Promise> { + const knownModels = getModels(provider) + const availableIds = await fetchAvailableModelIds(provider) + const models = + availableIds === null + ? knownModels + : knownModels.filter((model) => availableIds.has(model.id)) + + return models.map((model) => ({ + provider, + id: model.id, + label: `${providerLabel(provider)} ${model.name}`, + value: modelValue(provider, model.id), + reasoning: model.reasoning, + })) +} + +function withProviderPayloadDefaults( + config: PersistedModelConfig, + choice: BuiltinModelChoice, + reasoningEffort: ExplicitReasoningEffort | null +): BuiltinAgentModelConfig { + if (choice.provider !== `openai` || !choice.reasoning) return config + + const effort = reasoningEffort ?? `minimal` + + return { + ...config, + onPayload: (payload) => { + if (typeof payload !== `object` || payload === null) return undefined + const body = payload as Record + const existingReasoning = + typeof body.reasoning === `object` && body.reasoning !== null + ? (body.reasoning as Record) + : {} + + return { + ...body, + reasoning: { + ...existingReasoning, + effort, + }, + } + }, + } +} + +function parseReasoningEffort(value: unknown): ExplicitReasoningEffort | null { + return value === `minimal` || + value === `low` || + value === `medium` || + value === `high` + ? value + : null +} + +export async function createBuiltinModelCatalog( + options: { + allowMockFallback?: boolean + } = {} +): Promise { + const providers = configuredProviders() + + if (providers.length === 0 && options.allowMockFallback) { + return mockFallbackCatalog() + } + + const choices = ( + await Promise.all(providers.map((provider) => choicesForProvider(provider))) + ).flat() + + if (choices.length === 0) { + return options.allowMockFallback ? mockFallbackCatalog() : null + } + + const defaultChoice = + choices.find( + (choice) => + choice.provider === `anthropic` && choice.id === DEFAULT_ANTHROPIC_MODEL + ) ?? + choices.find( + (choice) => + choice.provider === `openai` && choice.id === DEFAULT_OPENAI_MODEL + ) ?? + choices[0]! + + return { choices, defaultChoice } +} + +export function resolveBuiltinModelConfig( + catalog: BuiltinModelCatalog, + args: Readonly> +): BuiltinAgentModelConfig { + const modelArg = args.model + const providerArg = args.provider + const reasoningEffort = parseReasoningEffort(args.reasoningEffort) + const selected = + typeof modelArg === `string` + ? catalog.choices.find( + (choice) => + choice.value === modelArg || + (choice.id === modelArg && choice.provider === providerArg) + ) + : undefined + + const choice = selected ?? catalog.defaultChoice + const config = { + provider: choice.provider, + model: choice.id, + ...(reasoningEffort && { reasoningEffort }), + } + + return withProviderPayloadDefaults(config, choice, reasoningEffort) +} + +export function modelChoiceValues( + catalog: BuiltinModelCatalog +): [string, ...Array] { + return catalog.choices.map((choice) => choice.value) as [ + string, + ...Array, + ] +} diff --git a/packages/agents/src/server.ts b/packages/agents/src/server.ts index 3a11c16338..cf9daf3cbd 100644 --- a/packages/agents/src/server.ts +++ b/packages/agents/src/server.ts @@ -124,7 +124,7 @@ export class BuiltinAgentsServer { }) if (!this.bootstrap) { throw new Error( - `ANTHROPIC_API_KEY must be set before starting builtin agents` + `ANTHROPIC_API_KEY or OPENAI_API_KEY must be set before starting builtin agents` ) } diff --git a/packages/agents/src/tools/spawn-worker.ts b/packages/agents/src/tools/spawn-worker.ts index c6cdb7ce35..7d9493d396 100644 --- a/packages/agents/src/tools/spawn-worker.ts +++ b/packages/agents/src/tools/spawn-worker.ts @@ -1,6 +1,7 @@ import { Type } from '@sinclair/typebox' import { nanoid } from 'nanoid' import { serverLog } from '../log' +import type { BuiltinAgentModelConfig } from '../model-catalog' import type { AgentTool } from '@mariozechner/pi-agent-core' import type { HandlerContext } from '@electric-ax/agents-runtime' @@ -16,7 +17,10 @@ export const WORKER_TOOL_NAMES = [ export type WorkerToolName = (typeof WORKER_TOOL_NAMES)[number] -export function createSpawnWorkerTool(ctx: HandlerContext): AgentTool { +export function createSpawnWorkerTool( + ctx: HandlerContext, + modelConfig?: BuiltinAgentModelConfig +): AgentTool { return { name: `spawn_worker`, label: `Spawn Worker`, @@ -65,11 +69,20 @@ export function createSpawnWorkerTool(ctx: HandlerContext): AgentTool { } const id = nanoid(10) + const workerModelArgs = modelConfig + ? { + provider: modelConfig.provider, + model: modelConfig.model, + ...(modelConfig.reasoningEffort && { + reasoningEffort: modelConfig.reasoningEffort, + }), + } + : {} try { const handle = await ctx.spawn( `worker`, id, - { systemPrompt, tools }, + { systemPrompt, tools, ...workerModelArgs }, { initialMessage, wake: { on: `runFinished`, includeResponse: true }, diff --git a/packages/agents/test/horton-model-selection.test.ts b/packages/agents/test/horton-model-selection.test.ts new file mode 100644 index 0000000000..4869599d8b --- /dev/null +++ b/packages/agents/test/horton-model-selection.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it, vi } from 'vitest' +import { createEntityRegistry } from '@electric-ax/agents-runtime' +import { registerHorton } from '../src/agents/horton' +import type { BuiltinModelCatalog } from '../src/model-catalog' + +const modelCatalog: BuiltinModelCatalog = { + defaultChoice: { + provider: `anthropic`, + id: `claude-sonnet-4-6`, + label: `Anthropic Claude Sonnet 4.6`, + value: `anthropic:claude-sonnet-4-6`, + reasoning: true, + }, + choices: [ + { + provider: `anthropic`, + id: `claude-sonnet-4-6`, + label: `Anthropic Claude Sonnet 4.6`, + value: `anthropic:claude-sonnet-4-6`, + reasoning: true, + }, + { + provider: `openai`, + id: `gpt-4.1`, + label: `OpenAI GPT-4.1`, + value: `openai:gpt-4.1`, + reasoning: false, + }, + ], +} + +describe(`horton model selection`, () => { + it(`exposes available models in its creation schema`, () => { + const registry = createEntityRegistry() + registerHorton(registry, { + workingDirectory: `/tmp`, + modelCatalog, + }) + + const def = registry.get(`horton`) + expect(def?.definition.creationSchema).toBeDefined() + const jsonSchema = ( + def!.definition.creationSchema as { + [`~standard`]?: { jsonSchema?: { input?: () => unknown } } + } + )[`~standard`]?.jsonSchema?.input?.() as { + properties?: { + model?: { enum?: Array } + reasoningEffort?: { enum?: Array; default?: string } + } + } + + expect(jsonSchema.properties?.model?.enum).toEqual([ + `anthropic:claude-sonnet-4-6`, + `openai:gpt-4.1`, + ]) + expect(jsonSchema.properties?.reasoningEffort?.enum).toEqual([ + `auto`, + `minimal`, + `low`, + `medium`, + `high`, + ]) + expect(jsonSchema.properties?.reasoningEffort?.default).toBe(`auto`) + }) + + it(`uses the selected model when running Horton`, async () => { + const registry = createEntityRegistry() + registerHorton(registry, { + workingDirectory: `/tmp`, + modelCatalog, + }) + + const def = registry.get(`horton`) + const useAgent = vi.fn() + const run = vi.fn(async () => {}) + const fakeCtx = { + args: { model: `openai:gpt-4.1` }, + electricTools: [], + events: [], + firstWake: false, + tags: {}, + db: { collections: { inbox: { toArray: [] } } }, + useContext: vi.fn(), + useAgent, + agent: { run }, + } as any + + await def!.definition.handler(fakeCtx, { type: `message_received` } as any) + + expect(useAgent).toHaveBeenCalledWith( + expect.objectContaining({ + provider: `openai`, + model: `gpt-4.1`, + }) + ) + expect(run).toHaveBeenCalledTimes(1) + }) +}) diff --git a/packages/agents/test/horton-system-prompt.test.ts b/packages/agents/test/horton-system-prompt.test.ts index b360dbc174..b1e47c62ed 100644 --- a/packages/agents/test/horton-system-prompt.test.ts +++ b/packages/agents/test/horton-system-prompt.test.ts @@ -36,4 +36,15 @@ describe(`buildHortonSystemPrompt`, () => { expect(prompt).toContain(`/quickstart`) expect(prompt).not.toContain(`/tutorial`) }) + + it(`includes runtime model identity when provided`, () => { + const prompt = buildHortonSystemPrompt(`/tmp/test`, { + modelProvider: `openai`, + modelId: `gpt-4.1`, + }) + + expect(prompt).toContain(`# Runtime model`) + expect(prompt).toContain(`provider "openai"`) + expect(prompt).toContain(`model "gpt-4.1"`) + }) }) diff --git a/packages/agents/test/model-catalog.test.ts b/packages/agents/test/model-catalog.test.ts new file mode 100644 index 0000000000..15b729d146 --- /dev/null +++ b/packages/agents/test/model-catalog.test.ts @@ -0,0 +1,108 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { + createBuiltinModelCatalog, + resolveBuiltinModelConfig, +} from '../src/model-catalog' + +const originalEnv = { ...process.env } + +describe(`model catalog`, () => { + beforeEach(() => { + vi.stubGlobal( + `fetch`, + vi.fn(async () => ({ + ok: true, + json: async () => ({ + data: [{ id: `gpt-4.1` }, { id: `gpt-5` }, { id: `not-in-runtime` }], + }), + })) + ) + process.env = { ...originalEnv } + delete process.env.ANTHROPIC_API_KEY + process.env.OPENAI_API_KEY = `test-openai-key` + }) + + afterEach(() => { + process.env = { ...originalEnv } + vi.unstubAllGlobals() + }) + + it(`lists runtime-supported models available to configured providers`, async () => { + const catalog = await createBuiltinModelCatalog() + + expect(catalog).not.toBeNull() + expect(catalog!.choices.map((choice) => choice.value)).toContain( + `openai:gpt-4.1` + ) + expect(catalog!.choices.map((choice) => choice.value)).not.toContain( + `openai:not-in-runtime` + ) + }) + + it(`resolves selected model values into agent config`, async () => { + const catalog = await createBuiltinModelCatalog() + + expect( + resolveBuiltinModelConfig(catalog!, { model: `openai:gpt-4.1` }) + ).toEqual({ + provider: `openai`, + model: `gpt-4.1`, + }) + }) + + it(`sets a valid reasoning effort for OpenAI reasoning models`, async () => { + const catalog = await createBuiltinModelCatalog() + const config = resolveBuiltinModelConfig(catalog!, { + model: `openai:gpt-5`, + }) + + expect(config).toMatchObject({ + provider: `openai`, + model: `gpt-5`, + }) + expect(config.onPayload).toBeTypeOf(`function`) + + const payload = config.onPayload!( + { reasoning: { effort: `none` } }, + {} as any + ) + + expect(payload).toEqual({ + reasoning: { effort: `minimal` }, + }) + }) + + it(`uses explicit reasoning effort for OpenAI reasoning models`, async () => { + const catalog = await createBuiltinModelCatalog() + const config = resolveBuiltinModelConfig(catalog!, { + model: `openai:gpt-5`, + reasoningEffort: `high`, + }) + + expect(config.reasoningEffort).toBe(`high`) + + const payload = config.onPayload!( + { reasoning: { effort: `none` } }, + {} as any + ) + + expect(payload).toEqual({ + reasoning: { effort: `high` }, + }) + }) + + it(`does not expose providers whose keys are rejected`, async () => { + vi.stubGlobal( + `fetch`, + vi.fn(async () => ({ + ok: false, + status: 401, + json: async () => ({}), + })) + ) + + const catalog = await createBuiltinModelCatalog() + + expect(catalog).toBeNull() + }) +}) diff --git a/packages/agents/test/spawn-worker-tool.test.ts b/packages/agents/test/spawn-worker-tool.test.ts index 39a795dc4b..1d30886522 100644 --- a/packages/agents/test/spawn-worker-tool.test.ts +++ b/packages/agents/test/spawn-worker-tool.test.ts @@ -37,6 +37,35 @@ describe(`spawn_worker tool`, () => { expect(text).toMatch(/end your turn/i) }) + it(`passes the selected model config to the spawned worker`, async () => { + const spawn = vi.fn(async (type, id) => ({ + entityUrl: `/${type}/${id}`, + writeToken: `tok`, + txid: 1, + })) + const ctx = { spawn } as any + const tool = createSpawnWorkerTool(ctx, { + provider: `openai`, + model: `gpt-4.1`, + reasoningEffort: `high`, + }) + + await tool.execute(`call-model`, { + systemPrompt: `Do a thing`, + tools: [`read`], + initialMessage: `Please do it`, + }) + + const [, , args] = spawn.mock.calls[0]! as Array + expect(args).toEqual({ + systemPrompt: `Do a thing`, + tools: [`read`], + provider: `openai`, + model: `gpt-4.1`, + reasoningEffort: `high`, + }) + }) + it(`rejects when tools is empty`, async () => { const spawn = vi.fn() const ctx = { spawn } as any diff --git a/packages/agents/test/worker-least-privilege.test.ts b/packages/agents/test/worker-least-privilege.test.ts index 03c5ea1310..623ed357ad 100644 --- a/packages/agents/test/worker-least-privilege.test.ts +++ b/packages/agents/test/worker-least-privilege.test.ts @@ -2,10 +2,29 @@ import { describe, expect, it, vi } from 'vitest' import { createEntityRegistry } from '@electric-ax/agents-runtime' import { registerWorker } from '../src/agents/worker' +const modelCatalog = { + defaultChoice: { + provider: `anthropic` as const, + id: `claude-sonnet-4-6`, + label: `Anthropic Claude Sonnet 4.6`, + value: `anthropic:claude-sonnet-4-6`, + reasoning: true, + }, + choices: [ + { + provider: `anthropic` as const, + id: `claude-sonnet-4-6`, + label: `Anthropic Claude Sonnet 4.6`, + value: `anthropic:claude-sonnet-4-6`, + reasoning: true, + }, + ], +} + describe(`worker tool-list assembly`, () => { it(`grants only the tools the spawner asked for; never includes ctx.electricTools`, async () => { const registry = createEntityRegistry() - registerWorker(registry, { workingDirectory: `/tmp` }) + registerWorker(registry, { workingDirectory: `/tmp`, modelCatalog }) const def = registry.get(`worker`) expect(def).toBeDefined() @@ -44,4 +63,30 @@ describe(`worker tool-list assembly`, () => { expect(names).not.toContain(`electric_agents.scheduleCron`) expect(names).not.toContain(`electric_agents.send`) }) + + it(`uses the model selected in spawn args`, async () => { + const registry = createEntityRegistry() + registerWorker(registry, { workingDirectory: `/tmp`, modelCatalog }) + + const def = registry.get(`worker`) + const useAgent = vi.fn() + const fakeCtx = { + args: { + systemPrompt: `do a thing`, + tools: [`bash`], + model: `anthropic:claude-sonnet-4-6`, + }, + useAgent, + agent: { run: vi.fn(async () => {}) }, + } as any + + await def!.definition.handler(fakeCtx, {} as any) + + expect(useAgent).toHaveBeenCalledWith( + expect.objectContaining({ + provider: `anthropic`, + model: `claude-sonnet-4-6`, + }) + ) + }) })