diff --git a/apps/web/src/app/api/openrouter/[...path]/route.ts b/apps/web/src/app/api/openrouter/[...path]/route.ts index ad765d144..b7e944298 100644 --- a/apps/web/src/app/api/openrouter/[...path]/route.ts +++ b/apps/web/src/app/api/openrouter/[...path]/route.ts @@ -89,7 +89,11 @@ import { import { normalizeModelId } from '@/lib/ai-gateway/model-utils'; import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models'; import { isCloudflareIP } from '@/lib/cloudflare-ip'; -import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/auto-model'; +import { + getMorphRouterCandidates, + isKiloAutoModel, + KILO_AUTO_FREE_MODEL, +} from '@/lib/ai-gateway/auto-model'; import { applyResolvedAutoModel } from '@/lib/ai-gateway/auto-model/resolution'; import type { MicrodollarUsageContext } from '@/lib/ai-gateway/processUsage.types'; import { @@ -731,6 +735,15 @@ export async function POST(request: NextRequest): Promise m.id === model) || model === KILO_AUTO_LEGACY_MODEL; } + +// Models each kilo-auto tier may route among when a per-prompt router (the +// Morph model router, consulted by the auto-routing worker) picks the model +// instead of the static mode mapping. Membership here is a product decision: +// frontier may roam across frontier-class models from any provider, balanced +// across mid-priced models. Tiers absent from this map (free rotates by +// availability, small is balance-based) keep static resolution only. +// +// Per .specs/model-experiments.md, experimented public ids must never be +// added to these candidate sets. +const MORPH_ROUTER_TIER_CANDIDATES: Record = { + [KILO_AUTO_FRONTIER_MODEL.id]: [ + CLAUDE_OPUS_CURRENT_MODEL_ID, + CLAUDE_SONNET_CURRENT_MODEL_ID, + GPT_CURRENT_MODEL_ID, + GEMINI_PRO_CURRENT_MODEL_ID, + ], + [KILO_AUTO_BALANCED_MODEL.id]: [ + QWEN37_PLUS_MODEL_ID, + CLAUDE_HAIKU_CURRENT_MODEL_ID, + GEMINI_FLASH_CURRENT_MODEL_ID, + ], + [KILO_AUTO_LEGACY_MODEL]: [ + QWEN37_PLUS_MODEL_ID, + CLAUDE_HAIKU_CURRENT_MODEL_ID, + GEMINI_FLASH_CURRENT_MODEL_ID, + ], +}; + +export function getMorphRouterCandidates(autoModel: string): readonly string[] { + return MORPH_ROUTER_TIER_CANDIDATES[autoModel] ?? []; +} diff --git a/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts b/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts index 5500a1803..b9b4a7ff8 100644 --- a/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts +++ b/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts @@ -85,6 +85,7 @@ describe('scheduleAutoRoutingMirror', () => { mode: 'code', userAgent: 'Kilo-Code/1.2.3', bodyBytes: 512, + routing: null, }); // TypeScript cannot see the schema's runtime refinements (.trim().min(1) // etc.), so round-trip the built payload through the worker's validator. @@ -95,6 +96,24 @@ describe('scheduleAutoRoutingMirror', () => { expect(headers.get('content-type')).toBe('application/json'); }); + it('passes the kilo-auto routing context through to the worker', async () => { + const routing = { + autoModel: 'kilo-auto/frontier', + candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'], + resolvedModel: 'anthropic/claude-opus-4.8', + }; + scheduleAutoRoutingMirror( + { ...makeParams(), routing }, + work => scheduledWork.push(work), + options + ); + await scheduledWork[0](); + + const payload = JSON.parse(mockedFetch.mock.calls[0][1]?.body as string); + expect(payload.routing).toEqual(routing); + expect(() => MirrorPayloadSchema.parse(payload)).not.toThrow(); + }); + it('skips mirroring when the body cannot be normalized, with a log for visibility', async () => { scheduleAutoRoutingMirror( { ...makeParams(), body: { stream: true } }, diff --git a/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts b/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts index 6192bb9be..e8415a46f 100644 --- a/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts +++ b/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts @@ -1,5 +1,9 @@ import { normalizeClassifierInput } from '@kilocode/auto-routing-contracts'; -import type { ClassifierApiKind, MirrorPayload } from '@kilocode/auto-routing-contracts'; +import type { + ClassifierApiKind, + MirrorPayload, + RoutingContext, +} from '@kilocode/auto-routing-contracts'; import { after } from 'next/server'; import { AUTO_ROUTING_WORKER_URL, INTERNAL_API_SECRET } from '@/lib/config.server'; import { warnExceptInTest } from '@/lib/utils.server'; @@ -19,6 +23,9 @@ type ScheduleAutoRoutingMirrorParams = { clientRequestId: string | null; mode: string | null; userAgent: string | null; + // Present only for kilo-auto requests: the tier, its router candidates, + // and the model the static resolver picked. + routing?: RoutingContext | null; authContext?: Promise<{ organizationId?: string | null }>; }; @@ -62,6 +69,7 @@ async function sendAutoRoutingMirror( mode: params.mode, userAgent: params.userAgent, bodyBytes: params.bodyBytes, + routing: params.routing ?? null, }; const response = await fetch(`${workerUrl}/decide`, { diff --git a/packages/auto-routing-contracts/src/contracts.test.ts b/packages/auto-routing-contracts/src/contracts.test.ts index 56257f8f0..fe285e41d 100644 --- a/packages/auto-routing-contracts/src/contracts.test.ts +++ b/packages/auto-routing-contracts/src/contracts.test.ts @@ -55,6 +55,58 @@ describe('auto routing contracts', () => { }) ).toEqual({ cost: 0, decision: null, classifierResult: null }); + // Routing context is optional (deploys never coordinate) and validated + // when present. + const routing = { + autoModel: 'kilo-auto/frontier', + candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'], + resolvedModel: 'anthropic/claude-opus-4.8', + }; + expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing })).toMatchObject({ routing }); + expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing: null })).toMatchObject({ + routing: null, + }); + expect(() => + MirrorPayloadSchema.parse({ ...mirrorPayload, routing: { autoModel: '' } }) + ).toThrow(); + expect(() => + MirrorPayloadSchema.parse({ + ...mirrorPayload, + routing: { ...routing, candidateModels: [''] }, + }) + ).toThrow(); + + const routerDecision = { + source: 'morph_router', + model: 'anthropic/claude-sonnet-4.6', + routerModel: 'claude-sonnet-4-6', + difficulty: 'easy', + confidence: 0.97, + ambiguity: 'low', + domain: 'coding', + }; + expect( + AutoRoutingDecisionResponseSchema.parse({ + cost: 0, + decision: routerDecision, + classifierResult: null, + }) + ).toMatchObject({ decision: { model: 'anthropic/claude-sonnet-4.6' } }); + expect( + AutoRoutingDecisionResponseSchema.parse({ + cost: 0, + decision: { ...routerDecision, difficulty: null, confidence: null }, + classifierResult: null, + }) + ).toMatchObject({ decision: { difficulty: null } }); + expect(() => + AutoRoutingDecisionResponseSchema.parse({ + cost: 0, + decision: { ...routerDecision, source: 'other_router' }, + classifierResult: null, + }) + ).toThrow(); + expect( AutoRoutingDecisionResponseSchema.parse({ cost: 0, diff --git a/packages/auto-routing-contracts/src/index.ts b/packages/auto-routing-contracts/src/index.ts index ef537f600..f6660f3f9 100644 --- a/packages/auto-routing-contracts/src/index.ts +++ b/packages/auto-routing-contracts/src/index.ts @@ -7,6 +7,23 @@ export { type NormalizedClassifierInput, } from './input'; +// Routing context for kilo-auto requests: which pseudo-model the user +// selected, which models that tier may route among, and what the gateway's +// static resolver picked. The worker uses it to produce (and score) routing +// decisions; non-auto requests carry no routing context. +export const RoutingContextSchema = z.object({ + // The kilo-auto pseudo-model from the original request, e.g. + // 'kilo-auto/frontier'. + autoModel: z.string().trim().min(1), + // Kilo public model ids this tier may route among. The gateway owns this + // set so tier membership stays a product decision, not a worker default. + candidateModels: z.array(z.string().trim().min(1)).max(32), + // The model the static resolver picked for this request; the baseline a + // router decision is compared against. + resolvedModel: z.string().trim().min(1).nullable(), +}); +export type RoutingContext = z.infer; + // What the gateway mirrors to the auto-routing worker per request: the // already-normalized classifier input plus caller identity. The gateway // normalizes before sending so the multi-hundred-KB request body never @@ -25,6 +42,8 @@ export const MirrorPayloadSchema = z.object({ // Size of the original request body, kept as an analytics dimension now // that the body itself is no longer mirrored. bodyBytes: z.number().int().nonnegative(), + // Optional so gateway and worker deploys never have to coordinate. + routing: RoutingContextSchema.nullable().optional(), }); export type MirrorPayload = z.infer; @@ -96,9 +115,24 @@ export const ClassifierOutputSchema = z }); export type ClassifierOutput = z.infer; +// A routing decision produced by the Morph model router +// (https://docs.morphllm.com/sdk/components/router). `model` is the Kilo +// public id to serve; `routerModel` is the router-catalog id it mapped from. +// Classification heads below their confidence threshold come back null. +export const RouterDecisionSchema = z.object({ + source: z.literal('morph_router'), + model: z.string().trim().min(1), + routerModel: z.string().trim().min(1), + difficulty: z.string().nullable(), + confidence: z.number().nullable(), + ambiguity: z.string().nullable(), + domain: z.string().nullable(), +}); +export type RouterDecision = z.infer; + export const AutoRoutingDecisionResponseSchema = z.object({ cost: z.number(), - decision: z.null(), + decision: RouterDecisionSchema.nullable(), classifierResult: z .object({ classification: ClassifierOutputSchema, diff --git a/services/auto-routing/.dev.vars.example b/services/auto-routing/.dev.vars.example new file mode 100644 index 000000000..62dac656e --- /dev/null +++ b/services/auto-routing/.dev.vars.example @@ -0,0 +1,19 @@ +# Auto-routing worker local development secrets. +# +# Copy this file to .dev.vars to boot the worker locally: +# cp .dev.vars.example .dev.vars +# +# Wrangler serves these as the local values for the secrets-store bindings +# declared in wrangler.jsonc. +# +# Bearer token the gateway uses to call this worker; any non-empty value +# works locally as long as requests send the same one. +INTERNAL_API_SECRET_PROD=local-dev-secret + +# OpenRouter key used by the prompt classifier (https://openrouter.ai/keys). +OPENROUTER_API_KEY=replace-me + +# Morph key used by the model router (https://morphllm.com — dashboard > API +# keys). Only consulted when the morph_router_enabled KV flag is 'true': +# wrangler kv key put morph_router_enabled true --binding AUTO_ROUTING_CONFIG --local +MORPH_API_KEY=replace-me diff --git a/services/auto-routing/src/classifier-config.ts b/services/auto-routing/src/classifier-config.ts index 6b0687a53..9f854e9cd 100644 --- a/services/auto-routing/src/classifier-config.ts +++ b/services/auto-routing/src/classifier-config.ts @@ -4,6 +4,7 @@ import { ttlCached } from './ttl-cache'; export const CLASSIFIER_MODEL_CONFIG_KEY = 'classifier_model'; export const DECISION_LOG_SAMPLE_RATE_CONFIG_KEY = 'decision_log_sample_rate'; +export const MORPH_ROUTER_ENABLED_CONFIG_KEY = 'morph_router_enabled'; // Successful decisions are high volume (~30/s) and only needed for latency // and cache hit-rate percentiles, so they are sampled by default. The rate @@ -38,9 +39,18 @@ const decisionLogSampleRateCache = ttlCached( } ); +// Morph router decisions are off unless explicitly enabled, so the worker +// never sends prompt prefixes to a third-party router without an operator +// opting in. Same KV+TTL pattern as the classifier model. +const morphRouterEnabledCache = ttlCached(CONFIG_CACHE_TTL_MS, async (env: ClassifierConfigEnv) => { + const configured = await env.AUTO_ROUTING_CONFIG.get(MORPH_ROUTER_ENABLED_CONFIG_KEY); + return configured?.trim() === 'true'; +}); + export function clearClassifierConfigCache(): void { classifierModelCache.clear(); decisionLogSampleRateCache.clear(); + morphRouterEnabledCache.clear(); } // Config reads run before the guarded decision path. A transient KV failure @@ -69,6 +79,10 @@ export function getDecisionLogSampleRate(env: ClassifierConfigEnv): Promise { + return morphRouterEnabledCache.get(env).catch(failClosed(MORPH_ROUTER_ENABLED_CONFIG_KEY, false)); +} + export async function setClassifierModel( env: ClassifierConfigEnv, model: string diff --git a/services/auto-routing/src/decide.ts b/services/auto-routing/src/decide.ts index 3cc94edc5..0cc087bde 100644 --- a/services/auto-routing/src/decide.ts +++ b/services/auto-routing/src/decide.ts @@ -3,12 +3,17 @@ import type { AutoRoutingDecisionResponse, MirrorPayload, NormalizedClassifierInput, + RouterDecision, } from '@kilocode/auto-routing-contracts'; import { formatError } from '@kilocode/worker-utils'; import type { Handler } from 'hono'; import { writeClassifierMetricsDataPoint } from './classifier-analytics'; import type { ClassifierAnalyticsStatus } from './classifier-analytics'; -import { getClassifierModel, getDecisionLogSampleRate } from './classifier-config'; +import { + getClassifierModel, + getDecisionLogSampleRate, + getMorphRouterEnabled, +} from './classifier-config'; import type { ClassifierOutput } from './classifier-output'; import { computeContentHashes, @@ -17,9 +22,15 @@ import { hashIdentifierForTelemetry, } from './conversation-identity'; import type { ContentHashes } from './conversation-identity'; -import { getCachedClassification, putCachedClassification } from './decision-cache'; +import { + getCachedClassification, + getCachedRouterDecision, + putCachedClassification, + putCachedRouterDecision, +} from './decision-cache'; import { ClassifierRunError, classifyNormalizedInput } from './model-classifier'; import type { ClassifierRunResult } from './model-classifier'; +import { MorphRouterError, routeWithMorphRouter, routerConfigFingerprint } from './morph-router'; import type { HonoEnv } from './hono-env'; // Isolate-scoped request counter, used to correlate latency with isolate @@ -29,19 +40,23 @@ let isolateRequestSeq = 0; function decisionResponse( cost: number, classification: ClassifierOutput, - normalized: NormalizedClassifierInput + normalized: NormalizedClassifierInput, + decision: RouterDecision | null ): AutoRoutingDecisionResponse { return { cost, - decision: null, + decision, classifierResult: { classification, normalized }, }; } -function emptyDecisionResponse(cost = 0): AutoRoutingDecisionResponse { +function emptyDecisionResponse( + cost = 0, + decision: RouterDecision | null = null +): AutoRoutingDecisionResponse { return { cost, - decision: null, + decision, classifierResult: null, }; } @@ -248,6 +263,122 @@ function recordDecision( ); } +type RouterDecisionStatus = 'routed' | `skipped:${string}` | `router_error:${string}`; + +// Single sink for router telemetry, mirroring recordDecision: routed +// outcomes (including cache hits) and skips are sampled, errors always log +// at warn. Skips fire on every request of a tier with no routable +// candidates, so they share the success sample rate. +function recordRouterDecision( + ctx: DecisionContext, + durationMs: number, + status: RouterDecisionStatus, + decision: RouterDecision | null, + cacheHit: boolean, + details: Record +): void { + const isFailure = status.startsWith('router_error:'); + if (!isFailure && Math.random() >= ctx.successSampleRate) { + return; + } + const log = isFailure ? console.warn : console.log; + log( + JSON.stringify({ + event: 'auto_routing_router_decision', + status, + cacheHit, + autoModel: ctx.payload.routing?.autoModel ?? null, + resolvedModel: ctx.payload.routing?.resolvedModel ?? null, + routedModel: decision?.model ?? null, + routerModel: decision?.routerModel ?? null, + difficulty: decision?.difficulty ?? null, + confidence: decision?.confidence ?? null, + ambiguity: decision?.ambiguity ?? null, + domain: decision?.domain ?? null, + routerDurationMs: Math.round(durationMs), + requestedModel: ctx.payload.input.requestedModel, + apiKind: ctx.payload.input.apiKind, + sessionId: ctx.payload.sessionId, + clientRequestId: ctx.payload.clientRequestId, + userIdHash: ctx.userIdHash, + mode: ctx.payload.mode, + reqSeq: ctx.reqSeq, + colo: ctx.colo, + ...details, + }) + ); +} + +// Produces the Morph router decision for kilo-auto requests that carry +// routing context. Runs alongside classification and never throws: a router +// failure must not cost the caller a classification (or vice versa), so all +// failure modes collapse to a null decision plus telemetry. +async function resolveRouterDecision( + env: Env, + ctx: DecisionContext, + waitUntil: (promise: Promise) => void +): Promise { + const routing = ctx.payload.routing; + if (!routing) return null; + if (!(await getMorphRouterEnabled(env))) return null; + + const startedAt = performance.now(); + const fingerprint = routerConfigFingerprint(routing); + const cached = await getCachedRouterDecision( + env, + ctx.conversationKey, + ctx.hashes.exact, + fingerprint + ); + if (cached) { + recordRouterDecision(ctx, performance.now() - startedAt, 'routed', cached, true, {}); + return cached; + } + + try { + const outcome = await routeWithMorphRouter(env, routing, ctx.payload.input); + if (outcome.kind === 'skipped') { + recordRouterDecision( + ctx, + performance.now() - startedAt, + `skipped:${outcome.reason}`, + null, + false, + {} + ); + return null; + } + waitUntil( + putCachedRouterDecision( + env, + ctx.conversationKey, + ctx.hashes.exact, + fingerprint, + outcome.decision + ) + ); + recordRouterDecision(ctx, performance.now() - startedAt, 'routed', outcome.decision, false, { + policy: outcome.policy, + candidateCount: outcome.candidateCount, + }); + return outcome.decision; + } catch (error) { + const status: RouterDecisionStatus = + error instanceof MorphRouterError + ? `router_error:${error.failureStage}` + : 'router_error:unexpected_error'; + recordRouterDecision( + ctx, + performance.now() - startedAt, + status, + null, + false, + formatError(error) + ); + return null; + } +} + export const decideHandler: Handler = async c => { let rawBody: unknown; try { @@ -281,6 +412,12 @@ export const decideHandler: Handler = async c => { successSampleRate, }; + // The router decision runs alongside the classification flow; both + // resolve before the response so the gateway gets one combined result. + const routerDecisionPromise = resolveRouterDecision(c.env, ctx, promise => + c.executionCtx.waitUntil(promise) + ); + const cached = await getCachedClassification( c.env, ctx.conversationKey, @@ -293,7 +430,7 @@ export const decideHandler: Handler = async c => { classifierModel, classification: cached, }); - return c.json(decisionResponse(0, cached, payload.input)); + return c.json(decisionResponse(0, cached, payload.input, await routerDecisionPromise)); } try { @@ -312,14 +449,25 @@ export const decideHandler: Handler = async c => { ); } recordDecision(c.env, ctx, performance.now() - startedAt, { kind: 'model', classifier }); - // When routing decisions are implemented, include the prior decision for - // this session as an input alongside classifier output. - return c.json(decisionResponse(classifier.cost ?? 0, classifier.classification, payload.input)); + return c.json( + decisionResponse( + classifier.cost ?? 0, + classifier.classification, + payload.input, + await routerDecisionPromise + ) + ); } catch (error) { recordDecision(c.env, ctx, performance.now() - startedAt, { kind: 'error', error }); // A failed run can still have billed the first attempt (e.g. a valid-but- // invalid response followed by a throwing retry), so report that cost - // even though there is no usable classifier result. - return c.json(emptyDecisionResponse(getClassifierFailureMetadata(error).cost ?? 0)); + // even though there is no usable classifier result. A router decision is + // still useful without a classification, so it is returned regardless. + return c.json( + emptyDecisionResponse( + getClassifierFailureMetadata(error).cost ?? 0, + await routerDecisionPromise + ) + ); } }; diff --git a/services/auto-routing/src/decision-cache.ts b/services/auto-routing/src/decision-cache.ts index a4bd929bf..a878254e7 100644 --- a/services/auto-routing/src/decision-cache.ts +++ b/services/auto-routing/src/decision-cache.ts @@ -1,4 +1,9 @@ -import { ClassifierOutputSchema, type ClassifierOutput } from '@kilocode/auto-routing-contracts'; +import { + ClassifierOutputSchema, + RouterDecisionSchema, + type ClassifierOutput, + type RouterDecision, +} from '@kilocode/auto-routing-contracts'; import { DurableObject } from 'cloudflare:workers'; // Mirrored agent sessions classify the same prompt prefixes on every API @@ -13,13 +18,18 @@ const ENTRY_TTL_MS = 30 * 60 * 1000; // Cloudflare caps storage.delete() at 128 keys per call. const DELETE_BATCH_SIZE = 128; +// Classifications and router decisions share one object per conversation; +// entries may have been written by an older worker version, so read sites +// validate values with the matching schema before serving them. +type CacheableValue = ClassifierOutput | RouterDecision; + type StoredEntry = { - value: ClassifierOutput; + value: CacheableValue; storedAt: number; }; export class AutoRoutingDecisionCacheDO extends DurableObject { - async getEntry(key: string): Promise { + async getEntry(key: string): Promise { const entry = await this.ctx.storage.get(key); if (!entry) return null; if (Date.now() - entry.storedAt > ENTRY_TTL_MS) { @@ -29,7 +39,7 @@ export class AutoRoutingDecisionCacheDO extends DurableObject { return entry.value; } - async putEntry(key: string, value: ClassifierOutput): Promise { + async putEntry(key: string, value: CacheableValue): Promise { await this.ctx.storage.put(key, { value, storedAt: Date.now() } satisfies StoredEntry); // A fixed-period sweep (rather than an idle alarm pushed out on every // write) so storage stays bounded even when distinct conversations @@ -109,3 +119,44 @@ export async function putCachedClassification( // Cache writes are best effort and must not fail the decision. } } + +function routerEntryKey(contentHash: string, configFingerprint: string): string { + // The candidate-set/policy fingerprint is part of the key so tier or + // policy changes never serve a model the new config would not pick. + return `morph:${configFingerprint}:${contentHash}`; +} + +export async function getCachedRouterDecision( + env: DecisionCacheEnv, + conversationKey: string, + contentHash: string, + configFingerprint: string +): Promise { + try { + const value = await cacheStub(env, conversationKey).getEntry( + routerEntryKey(contentHash, configFingerprint) + ); + if (!value) return null; + const parsed = RouterDecisionSchema.safeParse(value); + return parsed.success ? parsed.data : null; + } catch { + return null; + } +} + +export async function putCachedRouterDecision( + env: DecisionCacheEnv, + conversationKey: string, + contentHash: string, + configFingerprint: string, + decision: RouterDecision +): Promise { + try { + await cacheStub(env, conversationKey).putEntry( + routerEntryKey(contentHash, configFingerprint), + decision + ); + } catch { + // Cache writes are best effort and must not fail the decision. + } +} diff --git a/services/auto-routing/src/index.test.ts b/services/auto-routing/src/index.test.ts index 89b9ba675..7c53a95a4 100644 --- a/services/auto-routing/src/index.test.ts +++ b/services/auto-routing/src/index.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { clearClassifierConfigCache } from './classifier-config'; import { app } from './index'; import { ClassifierRunError } from './model-classifier'; +import { clearMorphApiKeyCache, MORPH_ROUTER_ENDPOINT } from './morph-router'; import type * as ModelClassifierModule from './model-classifier'; const classifyNormalizedInput = vi.hoisted(() => vi.fn()); @@ -25,6 +26,9 @@ const env = { INTERNAL_API_SECRET_PROD: { get: async () => 'classifier-token', }, + MORPH_API_KEY: { + get: async () => 'morph-key', + }, AUTO_ROUTING_CONFIG: { get: configGet, put: configPut, @@ -117,6 +121,7 @@ function decideRequest(payload: unknown) { describe('auto routing worker', () => { beforeEach(() => { clearClassifierConfigCache(); + clearMorphApiKeyCache(); classifyNormalizedInput.mockReset(); classifyNormalizedInput.mockResolvedValue(mockClassifierResult); writeDataPoint.mockReset(); @@ -385,6 +390,206 @@ describe('auto routing worker', () => { }); }); + describe('morph router decisions', () => { + const frontierRouting = { + autoModel: 'kilo-auto/frontier', + candidateModels: [ + 'anthropic/claude-opus-4.8', + 'anthropic/claude-sonnet-4.6', + 'openai/gpt-5.5', + 'google/gemini-3.1-pro-preview', + ], + resolvedModel: 'anthropic/claude-opus-4.8', + }; + + const morphDecision = { + source: 'morph_router', + model: 'anthropic/claude-sonnet-4.6', + routerModel: 'claude-sonnet-4-6', + difficulty: 'easy', + confidence: 0.97, + ambiguity: 'low', + domain: 'coding', + }; + + function enableMorphRouter() { + configGet.mockImplementation(async (key: string) => + key === 'morph_router_enabled' ? 'true' : null + ); + } + + function mockMorphResponse() { + mockedFetch.mockResolvedValueOnce( + new Response( + JSON.stringify({ + model: 'claude-sonnet-4-6', + provider: 'anthropic', + difficulty: 'easy', + confidence: 0.97, + ambiguity: 'low', + domain: 'coding', + }), + { status: 200 } + ) + ); + } + + it('returns a morph decision alongside the classification when enabled', async () => { + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(Math, 'random').mockReturnValue(0); + enableMorphRouter(); + mockMorphResponse(); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toEqual({ + cost: 0.00000123, + decision: morphDecision, + classifierResult: { + classification: mockClassification, + normalized: normalizedInput, + }, + }); + expect(mockedFetch).toHaveBeenCalledWith( + MORPH_ROUTER_ENDPOINT, + expect.objectContaining({ method: 'POST' }) + ); + // One classifier decision line and one router decision line. + const routerLog = logSpy.mock.calls + .map(call => JSON.parse(String(call[0]))) + .find(line => line.event === 'auto_routing_router_decision'); + expect(routerLog).toMatchObject({ + status: 'routed', + cacheHit: false, + autoModel: 'kilo-auto/frontier', + resolvedModel: 'anthropic/claude-opus-4.8', + routedModel: 'anthropic/claude-sonnet-4.6', + routerModel: 'claude-sonnet-4-6', + difficulty: 'easy', + policy: 'capability_heavy', + candidateCount: 4, + userIdHash: expect.stringMatching(/^[0-9a-f]{16}$/), + }); + expect(JSON.stringify(routerLog)).not.toContain('user-1'); + // The fresh decision is cached for the conversation, scoped by the + // candidate-set fingerprint. + expect(cachePutEntry).toHaveBeenCalledWith( + expect.stringMatching(/^morph:capability_heavy:/), + morphDecision + ); + }); + + it('returns a null decision without calling Morph when the flag is off', async () => { + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: null }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('returns a null decision for payloads without routing context', async () => { + enableMorphRouter(); + + const response = await decideRequest(mirrorPayload()); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: null }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('serves cached router decisions without calling Morph again', async () => { + enableMorphRouter(); + cacheGetEntry.mockImplementation(async (key: string) => + key.startsWith('morph:') ? morphDecision : null + ); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: morphDecision }); + expect(mockedFetch).not.toHaveBeenCalled(); + expect(cachePutEntry).toHaveBeenCalledTimes(1); // classification only + }); + + it('still returns the classification when the router call fails', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + enableMorphRouter(); + mockedFetch.mockResolvedValueOnce(new Response('overloaded', { status: 503 })); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toEqual({ + cost: 0.00000123, + decision: null, + classifierResult: { + classification: mockClassification, + normalized: normalizedInput, + }, + }); + const routerLog = warnSpy.mock.calls + .map(call => JSON.parse(String(call[0]))) + .find(line => line.event === 'auto_routing_router_decision'); + expect(routerLog).toMatchObject({ status: 'router_error:http_503' }); + }); + + it('still returns the router decision when the classifier fails', async () => { + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(Math, 'random').mockReturnValue(0); + enableMorphRouter(); + mockMorphResponse(); + classifyNormalizedInput.mockRejectedValueOnce( + new ClassifierRunError('classifier exploded', { + cost: null, + classifierModel: 'google/gemini-2.5-flash-lite', + }) + ); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toEqual({ + cost: 0, + decision: morphDecision, + classifierResult: null, + }); + }); + + it('logs skipped routing for tiers without enough routable candidates', async () => { + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(Math, 'random').mockReturnValue(0); + enableMorphRouter(); + + const response = await decideRequest( + mirrorPayload({ + routing: { + autoModel: 'kilo-auto/balanced', + candidateModels: ['qwen/qwen3.7-plus'], + resolvedModel: 'qwen/qwen3.7-plus', + }, + }) + ); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: null }); + expect(mockedFetch).not.toHaveBeenCalled(); + const routerLog = logSpy.mock.calls + .map(call => JSON.parse(String(call[0]))) + .find(line => line.event === 'auto_routing_router_decision'); + expect(routerLog).toMatchObject({ status: 'skipped:insufficient_candidates' }); + }); + + it('rejects payloads with malformed routing context', async () => { + const response = await decideRequest( + mirrorPayload({ routing: { autoModel: 'kilo-auto/frontier' } }) + ); + + expect(response.status).toBe(400); + await expect(response.json()).resolves.toEqual({ error: 'Invalid classifier payload' }); + }); + }); + it('rejects invalid JSON wrapper bodies', async () => { const response = await request('/decide', { method: 'POST', diff --git a/services/auto-routing/src/morph-router.test.ts b/services/auto-routing/src/morph-router.test.ts new file mode 100644 index 000000000..396afd0e8 --- /dev/null +++ b/services/auto-routing/src/morph-router.test.ts @@ -0,0 +1,257 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { NormalizedClassifierInput, RoutingContext } from '@kilocode/auto-routing-contracts'; +import { + MORPH_ROUTER_ENDPOINT, + MorphRouterError, + buildRouterInput, + clearMorphApiKeyCache, + routeWithMorphRouter, + routerConfigFingerprint, +} from './morph-router'; + +const originalFetch = globalThis.fetch; +const mockedFetch = vi.fn(); +const apiKeyGet = vi.fn(async () => 'morph-key'); + +const env = { MORPH_API_KEY: { get: apiKeyGet } } as unknown as Pick; + +const normalizedInput: NormalizedClassifierInput = { + apiKind: 'chat_completions', + requestedModel: 'kilo-auto/frontier', + systemPromptPrefix: 'You are Kilo Code.', + userPromptPrefix: 'Add a null check to this getter.', + latestUserPromptPrefix: null, + messageCount: 1, + hasTools: true, + stream: true, + providerHints: { provider: null, providerOptions: null }, +}; + +const frontierRouting: RoutingContext = { + autoModel: 'kilo-auto/frontier', + candidateModels: [ + 'anthropic/claude-opus-4.8', + 'anthropic/claude-sonnet-4.6', + 'openai/gpt-5.5', + 'google/gemini-3.1-pro-preview', + ], + resolvedModel: 'anthropic/claude-opus-4.8', +}; + +function morphResponse(body: Record, status = 200) { + return new Response(JSON.stringify(body), { status }); +} + +describe('routeWithMorphRouter', () => { + beforeEach(() => { + clearMorphApiKeyCache(); + apiKeyGet.mockClear(); + mockedFetch.mockReset(); + globalThis.fetch = mockedFetch; + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it('routes among mapped candidates and reverse-maps the decision to a Kilo id', async () => { + mockedFetch.mockResolvedValueOnce( + morphResponse({ + model: 'gpt-5.5', + provider: 'openai', + difficulty: 'hard', + confidence: 0.91, + ambiguity: 'low', + domain: 'coding', + }) + ); + + const outcome = await routeWithMorphRouter(env, frontierRouting, normalizedInput); + + expect(outcome).toEqual({ + kind: 'routed', + policy: 'capability_heavy', + candidateCount: 4, + decision: { + source: 'morph_router', + model: 'openai/gpt-5.5', + routerModel: 'gpt-5.5', + difficulty: 'hard', + confidence: 0.91, + ambiguity: 'low', + domain: 'coding', + }, + }); + expect(mockedFetch).toHaveBeenCalledTimes(1); + const [url, init] = mockedFetch.mock.calls[0] ?? []; + expect(url).toBe(MORPH_ROUTER_ENDPOINT); + expect(init?.headers).toMatchObject({ authorization: 'Bearer morph-key' }); + expect(JSON.parse(init?.body as string)).toEqual({ + input: 'Add a null check to this getter.', + allowed_models: ['claude-opus-4-8', 'claude-sonnet-4-6', 'gpt-5.5', 'gemini-3.1-pro-preview'], + policy: 'capability_heavy', + default_model: 'claude-opus-4-8', + }); + }); + + it('uses the static resolver pick as the ambiguity fallback default', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'claude-sonnet-4-6' })); + + await routeWithMorphRouter( + env, + { ...frontierRouting, resolvedModel: 'anthropic/claude-sonnet-4.6' }, + normalizedInput + ); + + const body = JSON.parse(mockedFetch.mock.calls[0]?.[1]?.body as string); + expect(body.default_model).toBe('claude-sonnet-4-6'); + }); + + it('falls back to the first mapped candidate when the resolved model is unroutable', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'claude-haiku-4-5-20251001' })); + + const outcome = await routeWithMorphRouter( + env, + { + autoModel: 'kilo-auto/balanced', + candidateModels: [ + 'qwen/qwen3.7-plus', + 'anthropic/claude-haiku-4.5', + 'google/gemini-3.5-flash', + ], + resolvedModel: 'qwen/qwen3.7-plus', + }, + normalizedInput + ); + + const body = JSON.parse(mockedFetch.mock.calls[0]?.[1]?.body as string); + // qwen has no Morph catalog mapping, so it is neither an allowed model + // nor the default. + expect(body.allowed_models).toEqual(['claude-haiku-4-5-20251001', 'gemini-3.5-flash']); + expect(body.default_model).toBe('claude-haiku-4-5-20251001'); + expect(body.policy).toBe('balanced'); + expect(outcome).toMatchObject({ + kind: 'routed', + decision: { model: 'anthropic/claude-haiku-4.5' }, + }); + }); + + it('classifies with the latest user prompt when the conversation has continued', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'gpt-5.5' })); + + await routeWithMorphRouter(env, frontierRouting, { + ...normalizedInput, + latestUserPromptPrefix: 'Now refactor the entire module to use the new API.', + }); + + const body = JSON.parse(mockedFetch.mock.calls[0]?.[1]?.body as string); + expect(body.input).toBe('Now refactor the entire module to use the new API.'); + }); + + it('skips tiers with fewer than two routable candidates', async () => { + const outcome = await routeWithMorphRouter( + env, + { + autoModel: 'kilo-auto/balanced', + candidateModels: ['qwen/qwen3.7-plus', 'google/gemini-3.5-flash'], + resolvedModel: 'qwen/qwen3.7-plus', + }, + normalizedInput + ); + + expect(outcome).toEqual({ kind: 'skipped', reason: 'insufficient_candidates' }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('skips tiers without a policy mapping', async () => { + const outcome = await routeWithMorphRouter( + env, + { ...frontierRouting, autoModel: 'kilo-auto/imaginary' }, + normalizedInput + ); + + expect(outcome).toEqual({ kind: 'skipped', reason: 'unknown_tier' }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('skips requests without any user prompt to classify', async () => { + const outcome = await routeWithMorphRouter(env, frontierRouting, { + ...normalizedInput, + userPromptPrefix: null, + latestUserPromptPrefix: null, + }); + + expect(outcome).toEqual({ kind: 'skipped', reason: 'no_prompt' }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('rejects decisions outside the allowed candidate set', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'claude-haiku-4-5-20251001' })); + + await expect(routeWithMorphRouter(env, frontierRouting, normalizedInput)).rejects.toMatchObject( + { + name: 'MorphRouterError', + failureStage: 'invalid_response', + } + ); + }); + + it('surfaces upstream HTTP failures with their status', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ error: 'overloaded' }, 503)); + + await expect(routeWithMorphRouter(env, frontierRouting, normalizedInput)).rejects.toMatchObject( + { + name: 'MorphRouterError', + failureStage: 'http_503', + } + ); + }); + + it('maps timeouts to a timeout failure stage', async () => { + mockedFetch.mockRejectedValueOnce(new DOMException('timed out', 'TimeoutError')); + + await expect(routeWithMorphRouter(env, frontierRouting, normalizedInput)).rejects.toMatchObject( + { + name: 'MorphRouterError', + failureStage: 'timeout', + } + ); + }); + + it('rejects malformed router responses', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ best: 'gpt-5.5' })); + + await expect( + routeWithMorphRouter(env, frontierRouting, normalizedInput) + ).rejects.toBeInstanceOf(MorphRouterError); + }); +}); + +describe('buildRouterInput', () => { + it('prefers the latest user prompt and falls back to the initial one', () => { + expect(buildRouterInput({ ...normalizedInput, latestUserPromptPrefix: 'latest' })).toBe( + 'latest' + ); + expect(buildRouterInput(normalizedInput)).toBe('Add a null check to this getter.'); + expect( + buildRouterInput({ + ...normalizedInput, + userPromptPrefix: ' ', + latestUserPromptPrefix: null, + }) + ).toBeNull(); + }); +}); + +describe('routerConfigFingerprint', () => { + it('is stable across candidate ordering and scoped by policy', () => { + const reordered = { + ...frontierRouting, + candidateModels: [...frontierRouting.candidateModels].reverse(), + }; + expect(routerConfigFingerprint(reordered)).toBe(routerConfigFingerprint(frontierRouting)); + expect( + routerConfigFingerprint({ ...frontierRouting, autoModel: 'kilo-auto/balanced' }) + ).not.toBe(routerConfigFingerprint(frontierRouting)); + }); +}); diff --git a/services/auto-routing/src/morph-router.ts b/services/auto-routing/src/morph-router.ts new file mode 100644 index 000000000..313ace22f --- /dev/null +++ b/services/auto-routing/src/morph-router.ts @@ -0,0 +1,226 @@ +import * as z from 'zod'; +import type { + NormalizedClassifierInput, + RouterDecision, + RoutingContext, +} from '@kilocode/auto-routing-contracts'; +import { ttlCached } from './ttl-cache'; + +// Morph's multimodel router classifies a prompt and picks the best model +// from an allowed set (https://docs.morphllm.com/sdk/components/router). +// One POST per decision, ~200ms typical. +export const MORPH_ROUTER_ENDPOINT = 'https://api.morphllm.com/v1/router/multimodel'; + +// Generous relative to Morph's ~200ms typical latency; decisions are +// shadow-mode today, so a slow call should fail the decision, not pile up +// against the gateway's background mirror budget. +const MORPH_ROUTER_TIMEOUT_MS = 5_000; + +// The router only needs enough prompt to classify; matches the prefix caps +// the classifier already applies to mirrored input. +const ROUTER_INPUT_MAX_LENGTH = 1_000; + +// Kilo public ids <-> Morph router catalog ids. Only models present in +// Morph's catalog can participate in a routed decision; unmapped candidates +// are dropped before the call (and reported via candidateCount telemetry). +const KILO_TO_MORPH_MODEL: Record = { + 'anthropic/claude-opus-4.8': 'claude-opus-4-8', + 'anthropic/claude-sonnet-4.6': 'claude-sonnet-4-6', + 'anthropic/claude-haiku-4.5': 'claude-haiku-4-5-20251001', + 'openai/gpt-5.5': 'gpt-5.5', + 'google/gemini-3.1-pro-preview': 'gemini-3.1-pro-preview', + 'google/gemini-3.5-flash': 'gemini-3.5-flash', + 'deepseek/deepseek-v4-pro:discounted': 'deepseek-v4-pro', + 'deepseek/deepseek-v4-flash:discounted': 'deepseek-v4-flash', +}; + +const MORPH_TO_KILO_MODEL = new Map( + Object.entries(KILO_TO_MORPH_MODEL).map(([kiloId, morphId]) => [morphId, kiloId]) +); + +// Tier intent -> router policy. Frontier never trades quality for cost; +// balanced lets the router break ties on cost; small hunts for the cheapest +// qualified model. +const AUTO_MODEL_POLICY: Record = { + 'kilo-auto/frontier': 'capability_heavy', + 'kilo-auto/balanced': 'balanced', + 'kilo/auto': 'balanced', + 'kilo-auto/small': 'cost_efficient', + 'kilo-auto/free': 'cost_efficient', +}; + +const morphRouterResponseSchema = z.looseObject({ + model: z.string().trim().min(1), + difficulty: z.string().optional(), + confidence: z.number().optional(), + ambiguity: z.string().optional(), + ambiguity_confidence: z.number().optional(), + domain: z.string().optional(), + domain_confidence: z.number().optional(), +}); + +export type MorphRouterFailureStage = `http_${number}` | 'timeout' | 'fetch' | 'invalid_response'; + +export class MorphRouterError extends Error { + readonly failureStage: MorphRouterFailureStage; + + constructor(message: string, failureStage: MorphRouterFailureStage) { + super(message); + this.name = 'MorphRouterError'; + this.failureStage = failureStage; + } +} + +export type MorphRouteSkipReason = 'no_prompt' | 'insufficient_candidates' | 'unknown_tier'; + +export type MorphRouteOutcome = + | { kind: 'routed'; decision: RouterDecision; policy: string; candidateCount: number } + | { kind: 'skipped'; reason: MorphRouteSkipReason }; + +type MorphRouterEnv = Pick; + +// Same pattern as the OpenRouter key: cache the plain key string at module +// scope so each decision skips the secrets-store read, with a TTL that keeps +// rotations effective within five minutes. +const API_KEY_CACHE_TTL_MS = 300_000; + +const apiKeyCache = ttlCached(API_KEY_CACHE_TTL_MS, (env: MorphRouterEnv) => + env.MORPH_API_KEY.get() +); + +export function clearMorphApiKeyCache(): void { + apiKeyCache.clear(); +} + +// The prompt the router classifies: the latest user turn when present (it +// redirects the current request), otherwise the opening turn. System prompts +// are agent boilerplate and would dominate the classification, so they are +// deliberately excluded — Morph receives at most one bounded user prompt +// prefix, never the conversation or tool results. +export function buildRouterInput(input: NormalizedClassifierInput): string | null { + const prompt = input.latestUserPromptPrefix ?? input.userPromptPrefix; + if (!prompt || prompt.trim().length === 0) return null; + return prompt.slice(0, ROUTER_INPUT_MAX_LENGTH); +} + +// The fingerprint scopes cached decisions to a specific candidate set and +// policy, so tier-membership or policy changes never serve stale models. +export function routerConfigFingerprint(routing: RoutingContext): string { + const policy = AUTO_MODEL_POLICY[routing.autoModel] ?? 'unknown'; + const mapped = mappedCandidates(routing).map(candidate => candidate.morphId); + return `${policy}:${[...mapped].sort().join(',')}`; +} + +function mappedCandidates(routing: RoutingContext): Array<{ kiloId: string; morphId: string }> { + const seen = new Set(); + const candidates: Array<{ kiloId: string; morphId: string }> = []; + for (const kiloId of routing.candidateModels) { + const morphId = KILO_TO_MORPH_MODEL[kiloId]; + if (!morphId || seen.has(morphId)) continue; + seen.add(morphId); + candidates.push({ kiloId, morphId }); + } + return candidates; +} + +export async function routeWithMorphRouter( + env: MorphRouterEnv, + routing: RoutingContext, + input: NormalizedClassifierInput +): Promise { + const policy = AUTO_MODEL_POLICY[routing.autoModel]; + if (!policy) { + return { kind: 'skipped', reason: 'unknown_tier' }; + } + const routerInput = buildRouterInput(input); + if (!routerInput) { + return { kind: 'skipped', reason: 'no_prompt' }; + } + const candidates = mappedCandidates(routing); + // With fewer than two routable models there is no decision to make. + if (candidates.length < 2) { + return { kind: 'skipped', reason: 'insufficient_candidates' }; + } + + const resolvedCandidate = candidates.find( + candidate => candidate.kiloId === routing.resolvedModel + ); + // When the prompt is too ambiguous to size, the router returns + // default_model as-is; the static resolver's pick keeps that case + // behavior-identical to routing without Morph. + const defaultModel = (resolvedCandidate ?? candidates[0]).morphId; + + const response = await morphRouterFetch(env, { + input: routerInput, + allowed_models: candidates.map(candidate => candidate.morphId), + policy, + default_model: defaultModel, + }); + + const kiloModel = MORPH_TO_KILO_MODEL.get(response.model); + // Candidates were sent as allowed_models, so anything else back means the + // router ignored the allow-list; never serve a model the tier doesn't own. + if (!kiloModel || !candidates.some(candidate => candidate.kiloId === kiloModel)) { + throw new MorphRouterError( + `Morph router returned a model outside the allowed candidates`, + 'invalid_response' + ); + } + + return { + kind: 'routed', + policy, + candidateCount: candidates.length, + decision: { + source: 'morph_router', + model: kiloModel, + routerModel: response.model, + difficulty: response.difficulty ?? null, + confidence: response.confidence ?? null, + ambiguity: response.ambiguity ?? null, + domain: response.domain ?? null, + }, + }; +} + +async function morphRouterFetch( + env: MorphRouterEnv, + body: Record +): Promise> { + const apiKey = await apiKeyCache.get(env); + let response: Response; + try { + response = await fetch(MORPH_ROUTER_ENDPOINT, { + method: 'POST', + headers: { + authorization: `Bearer ${apiKey}`, + 'content-type': 'application/json', + }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(MORPH_ROUTER_TIMEOUT_MS), + }); + } catch (error) { + const isTimeout = error instanceof DOMException && error.name === 'TimeoutError'; + throw new MorphRouterError( + isTimeout ? 'Morph router request timed out' : 'Morph router request failed', + isTimeout ? 'timeout' : 'fetch' + ); + } + if (!response.ok) { + throw new MorphRouterError( + `Morph router returned ${response.status}`, + `http_${response.status}` + ); + } + let json: unknown; + try { + json = await response.json(); + } catch { + throw new MorphRouterError('Morph router returned invalid JSON', 'invalid_response'); + } + const parsed = morphRouterResponseSchema.safeParse(json); + if (!parsed.success) { + throw new MorphRouterError('Morph router returned an unexpected shape', 'invalid_response'); + } + return parsed.data; +} diff --git a/services/auto-routing/worker-configuration.d.ts b/services/auto-routing/worker-configuration.d.ts index 6b69a65d5..d61c27a80 100644 --- a/services/auto-routing/worker-configuration.d.ts +++ b/services/auto-routing/worker-configuration.d.ts @@ -1,11 +1,12 @@ /* eslint-disable */ -// Generated by Wrangler by running `wrangler types --include-runtime=false` (hash: 0d84c4429525cf1b432d2ffe636e1ca8) +// Generated by Wrangler by running `wrangler types --include-runtime=false` (hash: 74c4eacc91b14192928c9267613f0a38) interface __BaseEnv_Env { AUTO_ROUTING_CONFIG: KVNamespace; AUTO_ROUTING_CLASSIFIER_METRICS_V2: AnalyticsEngineDataset; INTERNAL_API_SECRET_PROD: SecretsStoreSecret; OPENROUTER_API_KEY: SecretsStoreSecret; O11Y_CF_AE_API_TOKEN: SecretsStoreSecret; + MORPH_API_KEY: SecretsStoreSecret; O11Y_CF_ACCOUNT_ID: "e115e769bcdd4c3d66af59d3332cb394"; AUTO_ROUTING_DECISION_CACHE: DurableObjectNamespace; } diff --git a/services/auto-routing/wrangler.jsonc b/services/auto-routing/wrangler.jsonc index ddcf6d9ba..922dfa218 100644 --- a/services/auto-routing/wrangler.jsonc +++ b/services/auto-routing/wrangler.jsonc @@ -67,5 +67,10 @@ "store_id": "342a86d9e3a94da698e82d0c6e2a36f0", "secret_name": "O11Y_CF_AE_API_TOKEN", }, + { + "binding": "MORPH_API_KEY", + "store_id": "342a86d9e3a94da698e82d0c6e2a36f0", + "secret_name": "MORPH_API_KEY", + }, ], }