From d37c9e63e96b4fd0abdad55813273ac21a3903dd Mon Sep 17 00:00:00 2001 From: Shrey Birmiwal Date: Fri, 12 Jun 2026 12:41:38 -0700 Subject: [PATCH] feat(auto-routing): Morph model router decisions for kilo-auto tiers The auto-routing worker can now produce real per-prompt routing decisions via the Morph model router, filling the decision field the contract has carried as null. The gateway sends tier routing context (candidates plus the static resolver's pick) with each mirror; the worker consults Morph in parallel with the existing classifier, caches decisions per conversation, and returns them in shadow mode behind the morph_router_enabled KV flag. Co-Authored-By: Claude Fable 5 --- .../src/app/api/openrouter/[...path]/route.ts | 15 +- .../src/lib/ai-gateway/auto-model/index.ts | 38 +++ .../ai-gateway/auto-routing-mirror.test.ts | 19 ++ .../src/lib/ai-gateway/auto-routing-mirror.ts | 10 +- .../src/contracts.test.ts | 52 ++++ packages/auto-routing-contracts/src/index.ts | 36 ++- services/auto-routing/.dev.vars.example | 19 ++ .../auto-routing/src/classifier-config.ts | 14 + services/auto-routing/src/decide.ts | 172 +++++++++++- services/auto-routing/src/decision-cache.ts | 59 +++- services/auto-routing/src/index.test.ts | 205 ++++++++++++++ .../auto-routing/src/morph-router.test.ts | 257 ++++++++++++++++++ services/auto-routing/src/morph-router.ts | 226 +++++++++++++++ .../auto-routing/worker-configuration.d.ts | 3 +- services/auto-routing/wrangler.jsonc | 5 + 15 files changed, 1110 insertions(+), 20 deletions(-) create mode 100644 services/auto-routing/.dev.vars.example create mode 100644 services/auto-routing/src/morph-router.test.ts create mode 100644 services/auto-routing/src/morph-router.ts diff --git a/apps/web/src/app/api/openrouter/[...path]/route.ts b/apps/web/src/app/api/openrouter/[...path]/route.ts index ad765d144c..b7e9442985 100644 --- a/apps/web/src/app/api/openrouter/[...path]/route.ts +++ b/apps/web/src/app/api/openrouter/[...path]/route.ts @@ -89,7 +89,11 @@ import { import { normalizeModelId } from '@/lib/ai-gateway/model-utils'; import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models'; import { isCloudflareIP } from '@/lib/cloudflare-ip'; -import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/auto-model'; +import { + getMorphRouterCandidates, + isKiloAutoModel, + KILO_AUTO_FREE_MODEL, +} from '@/lib/ai-gateway/auto-model'; import { applyResolvedAutoModel } from '@/lib/ai-gateway/auto-model/resolution'; import type { MicrodollarUsageContext } from '@/lib/ai-gateway/processUsage.types'; import { @@ -731,6 +735,15 @@ export async function POST(request: NextRequest): Promise m.id === model) || model === KILO_AUTO_LEGACY_MODEL; } + +// Models each kilo-auto tier may route among when a per-prompt router (the +// Morph model router, consulted by the auto-routing worker) picks the model +// instead of the static mode mapping. Membership here is a product decision: +// frontier may roam across frontier-class models from any provider, balanced +// across mid-priced models. Tiers absent from this map (free rotates by +// availability, small is balance-based) keep static resolution only. +// +// Per .specs/model-experiments.md, experimented public ids must never be +// added to these candidate sets. +const MORPH_ROUTER_TIER_CANDIDATES: Record = { + [KILO_AUTO_FRONTIER_MODEL.id]: [ + CLAUDE_OPUS_CURRENT_MODEL_ID, + CLAUDE_SONNET_CURRENT_MODEL_ID, + GPT_CURRENT_MODEL_ID, + GEMINI_PRO_CURRENT_MODEL_ID, + ], + [KILO_AUTO_BALANCED_MODEL.id]: [ + QWEN37_PLUS_MODEL_ID, + CLAUDE_HAIKU_CURRENT_MODEL_ID, + GEMINI_FLASH_CURRENT_MODEL_ID, + ], + [KILO_AUTO_LEGACY_MODEL]: [ + QWEN37_PLUS_MODEL_ID, + CLAUDE_HAIKU_CURRENT_MODEL_ID, + GEMINI_FLASH_CURRENT_MODEL_ID, + ], +}; + +export function getMorphRouterCandidates(autoModel: string): readonly string[] { + return MORPH_ROUTER_TIER_CANDIDATES[autoModel] ?? []; +} diff --git a/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts b/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts index 5500a18033..b9b4a7ff84 100644 --- a/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts +++ b/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts @@ -85,6 +85,7 @@ describe('scheduleAutoRoutingMirror', () => { mode: 'code', userAgent: 'Kilo-Code/1.2.3', bodyBytes: 512, + routing: null, }); // TypeScript cannot see the schema's runtime refinements (.trim().min(1) // etc.), so round-trip the built payload through the worker's validator. @@ -95,6 +96,24 @@ describe('scheduleAutoRoutingMirror', () => { expect(headers.get('content-type')).toBe('application/json'); }); + it('passes the kilo-auto routing context through to the worker', async () => { + const routing = { + autoModel: 'kilo-auto/frontier', + candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'], + resolvedModel: 'anthropic/claude-opus-4.8', + }; + scheduleAutoRoutingMirror( + { ...makeParams(), routing }, + work => scheduledWork.push(work), + options + ); + await scheduledWork[0](); + + const payload = JSON.parse(mockedFetch.mock.calls[0][1]?.body as string); + expect(payload.routing).toEqual(routing); + expect(() => MirrorPayloadSchema.parse(payload)).not.toThrow(); + }); + it('skips mirroring when the body cannot be normalized, with a log for visibility', async () => { scheduleAutoRoutingMirror( { ...makeParams(), body: { stream: true } }, diff --git a/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts b/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts index 6192bb9bef..e8415a46fb 100644 --- a/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts +++ b/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts @@ -1,5 +1,9 @@ import { normalizeClassifierInput } from '@kilocode/auto-routing-contracts'; -import type { ClassifierApiKind, MirrorPayload } from '@kilocode/auto-routing-contracts'; +import type { + ClassifierApiKind, + MirrorPayload, + RoutingContext, +} from '@kilocode/auto-routing-contracts'; import { after } from 'next/server'; import { AUTO_ROUTING_WORKER_URL, INTERNAL_API_SECRET } from '@/lib/config.server'; import { warnExceptInTest } from '@/lib/utils.server'; @@ -19,6 +23,9 @@ type ScheduleAutoRoutingMirrorParams = { clientRequestId: string | null; mode: string | null; userAgent: string | null; + // Present only for kilo-auto requests: the tier, its router candidates, + // and the model the static resolver picked. + routing?: RoutingContext | null; authContext?: Promise<{ organizationId?: string | null }>; }; @@ -62,6 +69,7 @@ async function sendAutoRoutingMirror( mode: params.mode, userAgent: params.userAgent, bodyBytes: params.bodyBytes, + routing: params.routing ?? null, }; const response = await fetch(`${workerUrl}/decide`, { diff --git a/packages/auto-routing-contracts/src/contracts.test.ts b/packages/auto-routing-contracts/src/contracts.test.ts index 56257f8f05..fe285e41db 100644 --- a/packages/auto-routing-contracts/src/contracts.test.ts +++ b/packages/auto-routing-contracts/src/contracts.test.ts @@ -55,6 +55,58 @@ describe('auto routing contracts', () => { }) ).toEqual({ cost: 0, decision: null, classifierResult: null }); + // Routing context is optional (deploys never coordinate) and validated + // when present. + const routing = { + autoModel: 'kilo-auto/frontier', + candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'], + resolvedModel: 'anthropic/claude-opus-4.8', + }; + expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing })).toMatchObject({ routing }); + expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing: null })).toMatchObject({ + routing: null, + }); + expect(() => + MirrorPayloadSchema.parse({ ...mirrorPayload, routing: { autoModel: '' } }) + ).toThrow(); + expect(() => + MirrorPayloadSchema.parse({ + ...mirrorPayload, + routing: { ...routing, candidateModels: [''] }, + }) + ).toThrow(); + + const routerDecision = { + source: 'morph_router', + model: 'anthropic/claude-sonnet-4.6', + routerModel: 'claude-sonnet-4-6', + difficulty: 'easy', + confidence: 0.97, + ambiguity: 'low', + domain: 'coding', + }; + expect( + AutoRoutingDecisionResponseSchema.parse({ + cost: 0, + decision: routerDecision, + classifierResult: null, + }) + ).toMatchObject({ decision: { model: 'anthropic/claude-sonnet-4.6' } }); + expect( + AutoRoutingDecisionResponseSchema.parse({ + cost: 0, + decision: { ...routerDecision, difficulty: null, confidence: null }, + classifierResult: null, + }) + ).toMatchObject({ decision: { difficulty: null } }); + expect(() => + AutoRoutingDecisionResponseSchema.parse({ + cost: 0, + decision: { ...routerDecision, source: 'other_router' }, + classifierResult: null, + }) + ).toThrow(); + expect( AutoRoutingDecisionResponseSchema.parse({ cost: 0, diff --git a/packages/auto-routing-contracts/src/index.ts b/packages/auto-routing-contracts/src/index.ts index ef537f600e..f6660f3f92 100644 --- a/packages/auto-routing-contracts/src/index.ts +++ b/packages/auto-routing-contracts/src/index.ts @@ -7,6 +7,23 @@ export { type NormalizedClassifierInput, } from './input'; +// Routing context for kilo-auto requests: which pseudo-model the user +// selected, which models that tier may route among, and what the gateway's +// static resolver picked. The worker uses it to produce (and score) routing +// decisions; non-auto requests carry no routing context. +export const RoutingContextSchema = z.object({ + // The kilo-auto pseudo-model from the original request, e.g. + // 'kilo-auto/frontier'. + autoModel: z.string().trim().min(1), + // Kilo public model ids this tier may route among. The gateway owns this + // set so tier membership stays a product decision, not a worker default. + candidateModels: z.array(z.string().trim().min(1)).max(32), + // The model the static resolver picked for this request; the baseline a + // router decision is compared against. + resolvedModel: z.string().trim().min(1).nullable(), +}); +export type RoutingContext = z.infer; + // What the gateway mirrors to the auto-routing worker per request: the // already-normalized classifier input plus caller identity. The gateway // normalizes before sending so the multi-hundred-KB request body never @@ -25,6 +42,8 @@ export const MirrorPayloadSchema = z.object({ // Size of the original request body, kept as an analytics dimension now // that the body itself is no longer mirrored. bodyBytes: z.number().int().nonnegative(), + // Optional so gateway and worker deploys never have to coordinate. + routing: RoutingContextSchema.nullable().optional(), }); export type MirrorPayload = z.infer; @@ -96,9 +115,24 @@ export const ClassifierOutputSchema = z }); export type ClassifierOutput = z.infer; +// A routing decision produced by the Morph model router +// (https://docs.morphllm.com/sdk/components/router). `model` is the Kilo +// public id to serve; `routerModel` is the router-catalog id it mapped from. +// Classification heads below their confidence threshold come back null. +export const RouterDecisionSchema = z.object({ + source: z.literal('morph_router'), + model: z.string().trim().min(1), + routerModel: z.string().trim().min(1), + difficulty: z.string().nullable(), + confidence: z.number().nullable(), + ambiguity: z.string().nullable(), + domain: z.string().nullable(), +}); +export type RouterDecision = z.infer; + export const AutoRoutingDecisionResponseSchema = z.object({ cost: z.number(), - decision: z.null(), + decision: RouterDecisionSchema.nullable(), classifierResult: z .object({ classification: ClassifierOutputSchema, diff --git a/services/auto-routing/.dev.vars.example b/services/auto-routing/.dev.vars.example new file mode 100644 index 0000000000..62dac656eb --- /dev/null +++ b/services/auto-routing/.dev.vars.example @@ -0,0 +1,19 @@ +# Auto-routing worker local development secrets. +# +# Copy this file to .dev.vars to boot the worker locally: +# cp .dev.vars.example .dev.vars +# +# Wrangler serves these as the local values for the secrets-store bindings +# declared in wrangler.jsonc. +# +# Bearer token the gateway uses to call this worker; any non-empty value +# works locally as long as requests send the same one. +INTERNAL_API_SECRET_PROD=local-dev-secret + +# OpenRouter key used by the prompt classifier (https://openrouter.ai/keys). +OPENROUTER_API_KEY=replace-me + +# Morph key used by the model router (https://morphllm.com — dashboard > API +# keys). Only consulted when the morph_router_enabled KV flag is 'true': +# wrangler kv key put morph_router_enabled true --binding AUTO_ROUTING_CONFIG --local +MORPH_API_KEY=replace-me diff --git a/services/auto-routing/src/classifier-config.ts b/services/auto-routing/src/classifier-config.ts index 6b0687a539..9f854e9cdc 100644 --- a/services/auto-routing/src/classifier-config.ts +++ b/services/auto-routing/src/classifier-config.ts @@ -4,6 +4,7 @@ import { ttlCached } from './ttl-cache'; export const CLASSIFIER_MODEL_CONFIG_KEY = 'classifier_model'; export const DECISION_LOG_SAMPLE_RATE_CONFIG_KEY = 'decision_log_sample_rate'; +export const MORPH_ROUTER_ENABLED_CONFIG_KEY = 'morph_router_enabled'; // Successful decisions are high volume (~30/s) and only needed for latency // and cache hit-rate percentiles, so they are sampled by default. The rate @@ -38,9 +39,18 @@ const decisionLogSampleRateCache = ttlCached( } ); +// Morph router decisions are off unless explicitly enabled, so the worker +// never sends prompt prefixes to a third-party router without an operator +// opting in. Same KV+TTL pattern as the classifier model. +const morphRouterEnabledCache = ttlCached(CONFIG_CACHE_TTL_MS, async (env: ClassifierConfigEnv) => { + const configured = await env.AUTO_ROUTING_CONFIG.get(MORPH_ROUTER_ENABLED_CONFIG_KEY); + return configured?.trim() === 'true'; +}); + export function clearClassifierConfigCache(): void { classifierModelCache.clear(); decisionLogSampleRateCache.clear(); + morphRouterEnabledCache.clear(); } // Config reads run before the guarded decision path. A transient KV failure @@ -69,6 +79,10 @@ export function getDecisionLogSampleRate(env: ClassifierConfigEnv): Promise { + return morphRouterEnabledCache.get(env).catch(failClosed(MORPH_ROUTER_ENABLED_CONFIG_KEY, false)); +} + export async function setClassifierModel( env: ClassifierConfigEnv, model: string diff --git a/services/auto-routing/src/decide.ts b/services/auto-routing/src/decide.ts index 3cc94edc56..0cc087bde3 100644 --- a/services/auto-routing/src/decide.ts +++ b/services/auto-routing/src/decide.ts @@ -3,12 +3,17 @@ import type { AutoRoutingDecisionResponse, MirrorPayload, NormalizedClassifierInput, + RouterDecision, } from '@kilocode/auto-routing-contracts'; import { formatError } from '@kilocode/worker-utils'; import type { Handler } from 'hono'; import { writeClassifierMetricsDataPoint } from './classifier-analytics'; import type { ClassifierAnalyticsStatus } from './classifier-analytics'; -import { getClassifierModel, getDecisionLogSampleRate } from './classifier-config'; +import { + getClassifierModel, + getDecisionLogSampleRate, + getMorphRouterEnabled, +} from './classifier-config'; import type { ClassifierOutput } from './classifier-output'; import { computeContentHashes, @@ -17,9 +22,15 @@ import { hashIdentifierForTelemetry, } from './conversation-identity'; import type { ContentHashes } from './conversation-identity'; -import { getCachedClassification, putCachedClassification } from './decision-cache'; +import { + getCachedClassification, + getCachedRouterDecision, + putCachedClassification, + putCachedRouterDecision, +} from './decision-cache'; import { ClassifierRunError, classifyNormalizedInput } from './model-classifier'; import type { ClassifierRunResult } from './model-classifier'; +import { MorphRouterError, routeWithMorphRouter, routerConfigFingerprint } from './morph-router'; import type { HonoEnv } from './hono-env'; // Isolate-scoped request counter, used to correlate latency with isolate @@ -29,19 +40,23 @@ let isolateRequestSeq = 0; function decisionResponse( cost: number, classification: ClassifierOutput, - normalized: NormalizedClassifierInput + normalized: NormalizedClassifierInput, + decision: RouterDecision | null ): AutoRoutingDecisionResponse { return { cost, - decision: null, + decision, classifierResult: { classification, normalized }, }; } -function emptyDecisionResponse(cost = 0): AutoRoutingDecisionResponse { +function emptyDecisionResponse( + cost = 0, + decision: RouterDecision | null = null +): AutoRoutingDecisionResponse { return { cost, - decision: null, + decision, classifierResult: null, }; } @@ -248,6 +263,122 @@ function recordDecision( ); } +type RouterDecisionStatus = 'routed' | `skipped:${string}` | `router_error:${string}`; + +// Single sink for router telemetry, mirroring recordDecision: routed +// outcomes (including cache hits) and skips are sampled, errors always log +// at warn. Skips fire on every request of a tier with no routable +// candidates, so they share the success sample rate. +function recordRouterDecision( + ctx: DecisionContext, + durationMs: number, + status: RouterDecisionStatus, + decision: RouterDecision | null, + cacheHit: boolean, + details: Record +): void { + const isFailure = status.startsWith('router_error:'); + if (!isFailure && Math.random() >= ctx.successSampleRate) { + return; + } + const log = isFailure ? console.warn : console.log; + log( + JSON.stringify({ + event: 'auto_routing_router_decision', + status, + cacheHit, + autoModel: ctx.payload.routing?.autoModel ?? null, + resolvedModel: ctx.payload.routing?.resolvedModel ?? null, + routedModel: decision?.model ?? null, + routerModel: decision?.routerModel ?? null, + difficulty: decision?.difficulty ?? null, + confidence: decision?.confidence ?? null, + ambiguity: decision?.ambiguity ?? null, + domain: decision?.domain ?? null, + routerDurationMs: Math.round(durationMs), + requestedModel: ctx.payload.input.requestedModel, + apiKind: ctx.payload.input.apiKind, + sessionId: ctx.payload.sessionId, + clientRequestId: ctx.payload.clientRequestId, + userIdHash: ctx.userIdHash, + mode: ctx.payload.mode, + reqSeq: ctx.reqSeq, + colo: ctx.colo, + ...details, + }) + ); +} + +// Produces the Morph router decision for kilo-auto requests that carry +// routing context. Runs alongside classification and never throws: a router +// failure must not cost the caller a classification (or vice versa), so all +// failure modes collapse to a null decision plus telemetry. +async function resolveRouterDecision( + env: Env, + ctx: DecisionContext, + waitUntil: (promise: Promise) => void +): Promise { + const routing = ctx.payload.routing; + if (!routing) return null; + if (!(await getMorphRouterEnabled(env))) return null; + + const startedAt = performance.now(); + const fingerprint = routerConfigFingerprint(routing); + const cached = await getCachedRouterDecision( + env, + ctx.conversationKey, + ctx.hashes.exact, + fingerprint + ); + if (cached) { + recordRouterDecision(ctx, performance.now() - startedAt, 'routed', cached, true, {}); + return cached; + } + + try { + const outcome = await routeWithMorphRouter(env, routing, ctx.payload.input); + if (outcome.kind === 'skipped') { + recordRouterDecision( + ctx, + performance.now() - startedAt, + `skipped:${outcome.reason}`, + null, + false, + {} + ); + return null; + } + waitUntil( + putCachedRouterDecision( + env, + ctx.conversationKey, + ctx.hashes.exact, + fingerprint, + outcome.decision + ) + ); + recordRouterDecision(ctx, performance.now() - startedAt, 'routed', outcome.decision, false, { + policy: outcome.policy, + candidateCount: outcome.candidateCount, + }); + return outcome.decision; + } catch (error) { + const status: RouterDecisionStatus = + error instanceof MorphRouterError + ? `router_error:${error.failureStage}` + : 'router_error:unexpected_error'; + recordRouterDecision( + ctx, + performance.now() - startedAt, + status, + null, + false, + formatError(error) + ); + return null; + } +} + export const decideHandler: Handler = async c => { let rawBody: unknown; try { @@ -281,6 +412,12 @@ export const decideHandler: Handler = async c => { successSampleRate, }; + // The router decision runs alongside the classification flow; both + // resolve before the response so the gateway gets one combined result. + const routerDecisionPromise = resolveRouterDecision(c.env, ctx, promise => + c.executionCtx.waitUntil(promise) + ); + const cached = await getCachedClassification( c.env, ctx.conversationKey, @@ -293,7 +430,7 @@ export const decideHandler: Handler = async c => { classifierModel, classification: cached, }); - return c.json(decisionResponse(0, cached, payload.input)); + return c.json(decisionResponse(0, cached, payload.input, await routerDecisionPromise)); } try { @@ -312,14 +449,25 @@ export const decideHandler: Handler = async c => { ); } recordDecision(c.env, ctx, performance.now() - startedAt, { kind: 'model', classifier }); - // When routing decisions are implemented, include the prior decision for - // this session as an input alongside classifier output. - return c.json(decisionResponse(classifier.cost ?? 0, classifier.classification, payload.input)); + return c.json( + decisionResponse( + classifier.cost ?? 0, + classifier.classification, + payload.input, + await routerDecisionPromise + ) + ); } catch (error) { recordDecision(c.env, ctx, performance.now() - startedAt, { kind: 'error', error }); // A failed run can still have billed the first attempt (e.g. a valid-but- // invalid response followed by a throwing retry), so report that cost - // even though there is no usable classifier result. - return c.json(emptyDecisionResponse(getClassifierFailureMetadata(error).cost ?? 0)); + // even though there is no usable classifier result. A router decision is + // still useful without a classification, so it is returned regardless. + return c.json( + emptyDecisionResponse( + getClassifierFailureMetadata(error).cost ?? 0, + await routerDecisionPromise + ) + ); } }; diff --git a/services/auto-routing/src/decision-cache.ts b/services/auto-routing/src/decision-cache.ts index a4bd929bf8..a878254e76 100644 --- a/services/auto-routing/src/decision-cache.ts +++ b/services/auto-routing/src/decision-cache.ts @@ -1,4 +1,9 @@ -import { ClassifierOutputSchema, type ClassifierOutput } from '@kilocode/auto-routing-contracts'; +import { + ClassifierOutputSchema, + RouterDecisionSchema, + type ClassifierOutput, + type RouterDecision, +} from '@kilocode/auto-routing-contracts'; import { DurableObject } from 'cloudflare:workers'; // Mirrored agent sessions classify the same prompt prefixes on every API @@ -13,13 +18,18 @@ const ENTRY_TTL_MS = 30 * 60 * 1000; // Cloudflare caps storage.delete() at 128 keys per call. const DELETE_BATCH_SIZE = 128; +// Classifications and router decisions share one object per conversation; +// entries may have been written by an older worker version, so read sites +// validate values with the matching schema before serving them. +type CacheableValue = ClassifierOutput | RouterDecision; + type StoredEntry = { - value: ClassifierOutput; + value: CacheableValue; storedAt: number; }; export class AutoRoutingDecisionCacheDO extends DurableObject { - async getEntry(key: string): Promise { + async getEntry(key: string): Promise { const entry = await this.ctx.storage.get(key); if (!entry) return null; if (Date.now() - entry.storedAt > ENTRY_TTL_MS) { @@ -29,7 +39,7 @@ export class AutoRoutingDecisionCacheDO extends DurableObject { return entry.value; } - async putEntry(key: string, value: ClassifierOutput): Promise { + async putEntry(key: string, value: CacheableValue): Promise { await this.ctx.storage.put(key, { value, storedAt: Date.now() } satisfies StoredEntry); // A fixed-period sweep (rather than an idle alarm pushed out on every // write) so storage stays bounded even when distinct conversations @@ -109,3 +119,44 @@ export async function putCachedClassification( // Cache writes are best effort and must not fail the decision. } } + +function routerEntryKey(contentHash: string, configFingerprint: string): string { + // The candidate-set/policy fingerprint is part of the key so tier or + // policy changes never serve a model the new config would not pick. + return `morph:${configFingerprint}:${contentHash}`; +} + +export async function getCachedRouterDecision( + env: DecisionCacheEnv, + conversationKey: string, + contentHash: string, + configFingerprint: string +): Promise { + try { + const value = await cacheStub(env, conversationKey).getEntry( + routerEntryKey(contentHash, configFingerprint) + ); + if (!value) return null; + const parsed = RouterDecisionSchema.safeParse(value); + return parsed.success ? parsed.data : null; + } catch { + return null; + } +} + +export async function putCachedRouterDecision( + env: DecisionCacheEnv, + conversationKey: string, + contentHash: string, + configFingerprint: string, + decision: RouterDecision +): Promise { + try { + await cacheStub(env, conversationKey).putEntry( + routerEntryKey(contentHash, configFingerprint), + decision + ); + } catch { + // Cache writes are best effort and must not fail the decision. + } +} diff --git a/services/auto-routing/src/index.test.ts b/services/auto-routing/src/index.test.ts index 89b9ba675c..7c53a95a4f 100644 --- a/services/auto-routing/src/index.test.ts +++ b/services/auto-routing/src/index.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { clearClassifierConfigCache } from './classifier-config'; import { app } from './index'; import { ClassifierRunError } from './model-classifier'; +import { clearMorphApiKeyCache, MORPH_ROUTER_ENDPOINT } from './morph-router'; import type * as ModelClassifierModule from './model-classifier'; const classifyNormalizedInput = vi.hoisted(() => vi.fn()); @@ -25,6 +26,9 @@ const env = { INTERNAL_API_SECRET_PROD: { get: async () => 'classifier-token', }, + MORPH_API_KEY: { + get: async () => 'morph-key', + }, AUTO_ROUTING_CONFIG: { get: configGet, put: configPut, @@ -117,6 +121,7 @@ function decideRequest(payload: unknown) { describe('auto routing worker', () => { beforeEach(() => { clearClassifierConfigCache(); + clearMorphApiKeyCache(); classifyNormalizedInput.mockReset(); classifyNormalizedInput.mockResolvedValue(mockClassifierResult); writeDataPoint.mockReset(); @@ -385,6 +390,206 @@ describe('auto routing worker', () => { }); }); + describe('morph router decisions', () => { + const frontierRouting = { + autoModel: 'kilo-auto/frontier', + candidateModels: [ + 'anthropic/claude-opus-4.8', + 'anthropic/claude-sonnet-4.6', + 'openai/gpt-5.5', + 'google/gemini-3.1-pro-preview', + ], + resolvedModel: 'anthropic/claude-opus-4.8', + }; + + const morphDecision = { + source: 'morph_router', + model: 'anthropic/claude-sonnet-4.6', + routerModel: 'claude-sonnet-4-6', + difficulty: 'easy', + confidence: 0.97, + ambiguity: 'low', + domain: 'coding', + }; + + function enableMorphRouter() { + configGet.mockImplementation(async (key: string) => + key === 'morph_router_enabled' ? 'true' : null + ); + } + + function mockMorphResponse() { + mockedFetch.mockResolvedValueOnce( + new Response( + JSON.stringify({ + model: 'claude-sonnet-4-6', + provider: 'anthropic', + difficulty: 'easy', + confidence: 0.97, + ambiguity: 'low', + domain: 'coding', + }), + { status: 200 } + ) + ); + } + + it('returns a morph decision alongside the classification when enabled', async () => { + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(Math, 'random').mockReturnValue(0); + enableMorphRouter(); + mockMorphResponse(); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toEqual({ + cost: 0.00000123, + decision: morphDecision, + classifierResult: { + classification: mockClassification, + normalized: normalizedInput, + }, + }); + expect(mockedFetch).toHaveBeenCalledWith( + MORPH_ROUTER_ENDPOINT, + expect.objectContaining({ method: 'POST' }) + ); + // One classifier decision line and one router decision line. + const routerLog = logSpy.mock.calls + .map(call => JSON.parse(String(call[0]))) + .find(line => line.event === 'auto_routing_router_decision'); + expect(routerLog).toMatchObject({ + status: 'routed', + cacheHit: false, + autoModel: 'kilo-auto/frontier', + resolvedModel: 'anthropic/claude-opus-4.8', + routedModel: 'anthropic/claude-sonnet-4.6', + routerModel: 'claude-sonnet-4-6', + difficulty: 'easy', + policy: 'capability_heavy', + candidateCount: 4, + userIdHash: expect.stringMatching(/^[0-9a-f]{16}$/), + }); + expect(JSON.stringify(routerLog)).not.toContain('user-1'); + // The fresh decision is cached for the conversation, scoped by the + // candidate-set fingerprint. + expect(cachePutEntry).toHaveBeenCalledWith( + expect.stringMatching(/^morph:capability_heavy:/), + morphDecision + ); + }); + + it('returns a null decision without calling Morph when the flag is off', async () => { + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: null }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('returns a null decision for payloads without routing context', async () => { + enableMorphRouter(); + + const response = await decideRequest(mirrorPayload()); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: null }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('serves cached router decisions without calling Morph again', async () => { + enableMorphRouter(); + cacheGetEntry.mockImplementation(async (key: string) => + key.startsWith('morph:') ? morphDecision : null + ); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: morphDecision }); + expect(mockedFetch).not.toHaveBeenCalled(); + expect(cachePutEntry).toHaveBeenCalledTimes(1); // classification only + }); + + it('still returns the classification when the router call fails', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + enableMorphRouter(); + mockedFetch.mockResolvedValueOnce(new Response('overloaded', { status: 503 })); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toEqual({ + cost: 0.00000123, + decision: null, + classifierResult: { + classification: mockClassification, + normalized: normalizedInput, + }, + }); + const routerLog = warnSpy.mock.calls + .map(call => JSON.parse(String(call[0]))) + .find(line => line.event === 'auto_routing_router_decision'); + expect(routerLog).toMatchObject({ status: 'router_error:http_503' }); + }); + + it('still returns the router decision when the classifier fails', async () => { + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(Math, 'random').mockReturnValue(0); + enableMorphRouter(); + mockMorphResponse(); + classifyNormalizedInput.mockRejectedValueOnce( + new ClassifierRunError('classifier exploded', { + cost: null, + classifierModel: 'google/gemini-2.5-flash-lite', + }) + ); + + const response = await decideRequest(mirrorPayload({ routing: frontierRouting })); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toEqual({ + cost: 0, + decision: morphDecision, + classifierResult: null, + }); + }); + + it('logs skipped routing for tiers without enough routable candidates', async () => { + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(Math, 'random').mockReturnValue(0); + enableMorphRouter(); + + const response = await decideRequest( + mirrorPayload({ + routing: { + autoModel: 'kilo-auto/balanced', + candidateModels: ['qwen/qwen3.7-plus'], + resolvedModel: 'qwen/qwen3.7-plus', + }, + }) + ); + + expect(response.status).toBe(200); + await expect(response.json()).resolves.toMatchObject({ decision: null }); + expect(mockedFetch).not.toHaveBeenCalled(); + const routerLog = logSpy.mock.calls + .map(call => JSON.parse(String(call[0]))) + .find(line => line.event === 'auto_routing_router_decision'); + expect(routerLog).toMatchObject({ status: 'skipped:insufficient_candidates' }); + }); + + it('rejects payloads with malformed routing context', async () => { + const response = await decideRequest( + mirrorPayload({ routing: { autoModel: 'kilo-auto/frontier' } }) + ); + + expect(response.status).toBe(400); + await expect(response.json()).resolves.toEqual({ error: 'Invalid classifier payload' }); + }); + }); + it('rejects invalid JSON wrapper bodies', async () => { const response = await request('/decide', { method: 'POST', diff --git a/services/auto-routing/src/morph-router.test.ts b/services/auto-routing/src/morph-router.test.ts new file mode 100644 index 0000000000..396afd0e85 --- /dev/null +++ b/services/auto-routing/src/morph-router.test.ts @@ -0,0 +1,257 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { NormalizedClassifierInput, RoutingContext } from '@kilocode/auto-routing-contracts'; +import { + MORPH_ROUTER_ENDPOINT, + MorphRouterError, + buildRouterInput, + clearMorphApiKeyCache, + routeWithMorphRouter, + routerConfigFingerprint, +} from './morph-router'; + +const originalFetch = globalThis.fetch; +const mockedFetch = vi.fn(); +const apiKeyGet = vi.fn(async () => 'morph-key'); + +const env = { MORPH_API_KEY: { get: apiKeyGet } } as unknown as Pick; + +const normalizedInput: NormalizedClassifierInput = { + apiKind: 'chat_completions', + requestedModel: 'kilo-auto/frontier', + systemPromptPrefix: 'You are Kilo Code.', + userPromptPrefix: 'Add a null check to this getter.', + latestUserPromptPrefix: null, + messageCount: 1, + hasTools: true, + stream: true, + providerHints: { provider: null, providerOptions: null }, +}; + +const frontierRouting: RoutingContext = { + autoModel: 'kilo-auto/frontier', + candidateModels: [ + 'anthropic/claude-opus-4.8', + 'anthropic/claude-sonnet-4.6', + 'openai/gpt-5.5', + 'google/gemini-3.1-pro-preview', + ], + resolvedModel: 'anthropic/claude-opus-4.8', +}; + +function morphResponse(body: Record, status = 200) { + return new Response(JSON.stringify(body), { status }); +} + +describe('routeWithMorphRouter', () => { + beforeEach(() => { + clearMorphApiKeyCache(); + apiKeyGet.mockClear(); + mockedFetch.mockReset(); + globalThis.fetch = mockedFetch; + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it('routes among mapped candidates and reverse-maps the decision to a Kilo id', async () => { + mockedFetch.mockResolvedValueOnce( + morphResponse({ + model: 'gpt-5.5', + provider: 'openai', + difficulty: 'hard', + confidence: 0.91, + ambiguity: 'low', + domain: 'coding', + }) + ); + + const outcome = await routeWithMorphRouter(env, frontierRouting, normalizedInput); + + expect(outcome).toEqual({ + kind: 'routed', + policy: 'capability_heavy', + candidateCount: 4, + decision: { + source: 'morph_router', + model: 'openai/gpt-5.5', + routerModel: 'gpt-5.5', + difficulty: 'hard', + confidence: 0.91, + ambiguity: 'low', + domain: 'coding', + }, + }); + expect(mockedFetch).toHaveBeenCalledTimes(1); + const [url, init] = mockedFetch.mock.calls[0] ?? []; + expect(url).toBe(MORPH_ROUTER_ENDPOINT); + expect(init?.headers).toMatchObject({ authorization: 'Bearer morph-key' }); + expect(JSON.parse(init?.body as string)).toEqual({ + input: 'Add a null check to this getter.', + allowed_models: ['claude-opus-4-8', 'claude-sonnet-4-6', 'gpt-5.5', 'gemini-3.1-pro-preview'], + policy: 'capability_heavy', + default_model: 'claude-opus-4-8', + }); + }); + + it('uses the static resolver pick as the ambiguity fallback default', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'claude-sonnet-4-6' })); + + await routeWithMorphRouter( + env, + { ...frontierRouting, resolvedModel: 'anthropic/claude-sonnet-4.6' }, + normalizedInput + ); + + const body = JSON.parse(mockedFetch.mock.calls[0]?.[1]?.body as string); + expect(body.default_model).toBe('claude-sonnet-4-6'); + }); + + it('falls back to the first mapped candidate when the resolved model is unroutable', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'claude-haiku-4-5-20251001' })); + + const outcome = await routeWithMorphRouter( + env, + { + autoModel: 'kilo-auto/balanced', + candidateModels: [ + 'qwen/qwen3.7-plus', + 'anthropic/claude-haiku-4.5', + 'google/gemini-3.5-flash', + ], + resolvedModel: 'qwen/qwen3.7-plus', + }, + normalizedInput + ); + + const body = JSON.parse(mockedFetch.mock.calls[0]?.[1]?.body as string); + // qwen has no Morph catalog mapping, so it is neither an allowed model + // nor the default. + expect(body.allowed_models).toEqual(['claude-haiku-4-5-20251001', 'gemini-3.5-flash']); + expect(body.default_model).toBe('claude-haiku-4-5-20251001'); + expect(body.policy).toBe('balanced'); + expect(outcome).toMatchObject({ + kind: 'routed', + decision: { model: 'anthropic/claude-haiku-4.5' }, + }); + }); + + it('classifies with the latest user prompt when the conversation has continued', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'gpt-5.5' })); + + await routeWithMorphRouter(env, frontierRouting, { + ...normalizedInput, + latestUserPromptPrefix: 'Now refactor the entire module to use the new API.', + }); + + const body = JSON.parse(mockedFetch.mock.calls[0]?.[1]?.body as string); + expect(body.input).toBe('Now refactor the entire module to use the new API.'); + }); + + it('skips tiers with fewer than two routable candidates', async () => { + const outcome = await routeWithMorphRouter( + env, + { + autoModel: 'kilo-auto/balanced', + candidateModels: ['qwen/qwen3.7-plus', 'google/gemini-3.5-flash'], + resolvedModel: 'qwen/qwen3.7-plus', + }, + normalizedInput + ); + + expect(outcome).toEqual({ kind: 'skipped', reason: 'insufficient_candidates' }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('skips tiers without a policy mapping', async () => { + const outcome = await routeWithMorphRouter( + env, + { ...frontierRouting, autoModel: 'kilo-auto/imaginary' }, + normalizedInput + ); + + expect(outcome).toEqual({ kind: 'skipped', reason: 'unknown_tier' }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('skips requests without any user prompt to classify', async () => { + const outcome = await routeWithMorphRouter(env, frontierRouting, { + ...normalizedInput, + userPromptPrefix: null, + latestUserPromptPrefix: null, + }); + + expect(outcome).toEqual({ kind: 'skipped', reason: 'no_prompt' }); + expect(mockedFetch).not.toHaveBeenCalled(); + }); + + it('rejects decisions outside the allowed candidate set', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ model: 'claude-haiku-4-5-20251001' })); + + await expect(routeWithMorphRouter(env, frontierRouting, normalizedInput)).rejects.toMatchObject( + { + name: 'MorphRouterError', + failureStage: 'invalid_response', + } + ); + }); + + it('surfaces upstream HTTP failures with their status', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ error: 'overloaded' }, 503)); + + await expect(routeWithMorphRouter(env, frontierRouting, normalizedInput)).rejects.toMatchObject( + { + name: 'MorphRouterError', + failureStage: 'http_503', + } + ); + }); + + it('maps timeouts to a timeout failure stage', async () => { + mockedFetch.mockRejectedValueOnce(new DOMException('timed out', 'TimeoutError')); + + await expect(routeWithMorphRouter(env, frontierRouting, normalizedInput)).rejects.toMatchObject( + { + name: 'MorphRouterError', + failureStage: 'timeout', + } + ); + }); + + it('rejects malformed router responses', async () => { + mockedFetch.mockResolvedValueOnce(morphResponse({ best: 'gpt-5.5' })); + + await expect( + routeWithMorphRouter(env, frontierRouting, normalizedInput) + ).rejects.toBeInstanceOf(MorphRouterError); + }); +}); + +describe('buildRouterInput', () => { + it('prefers the latest user prompt and falls back to the initial one', () => { + expect(buildRouterInput({ ...normalizedInput, latestUserPromptPrefix: 'latest' })).toBe( + 'latest' + ); + expect(buildRouterInput(normalizedInput)).toBe('Add a null check to this getter.'); + expect( + buildRouterInput({ + ...normalizedInput, + userPromptPrefix: ' ', + latestUserPromptPrefix: null, + }) + ).toBeNull(); + }); +}); + +describe('routerConfigFingerprint', () => { + it('is stable across candidate ordering and scoped by policy', () => { + const reordered = { + ...frontierRouting, + candidateModels: [...frontierRouting.candidateModels].reverse(), + }; + expect(routerConfigFingerprint(reordered)).toBe(routerConfigFingerprint(frontierRouting)); + expect( + routerConfigFingerprint({ ...frontierRouting, autoModel: 'kilo-auto/balanced' }) + ).not.toBe(routerConfigFingerprint(frontierRouting)); + }); +}); diff --git a/services/auto-routing/src/morph-router.ts b/services/auto-routing/src/morph-router.ts new file mode 100644 index 0000000000..313ace22f8 --- /dev/null +++ b/services/auto-routing/src/morph-router.ts @@ -0,0 +1,226 @@ +import * as z from 'zod'; +import type { + NormalizedClassifierInput, + RouterDecision, + RoutingContext, +} from '@kilocode/auto-routing-contracts'; +import { ttlCached } from './ttl-cache'; + +// Morph's multimodel router classifies a prompt and picks the best model +// from an allowed set (https://docs.morphllm.com/sdk/components/router). +// One POST per decision, ~200ms typical. +export const MORPH_ROUTER_ENDPOINT = 'https://api.morphllm.com/v1/router/multimodel'; + +// Generous relative to Morph's ~200ms typical latency; decisions are +// shadow-mode today, so a slow call should fail the decision, not pile up +// against the gateway's background mirror budget. +const MORPH_ROUTER_TIMEOUT_MS = 5_000; + +// The router only needs enough prompt to classify; matches the prefix caps +// the classifier already applies to mirrored input. +const ROUTER_INPUT_MAX_LENGTH = 1_000; + +// Kilo public ids <-> Morph router catalog ids. Only models present in +// Morph's catalog can participate in a routed decision; unmapped candidates +// are dropped before the call (and reported via candidateCount telemetry). +const KILO_TO_MORPH_MODEL: Record = { + 'anthropic/claude-opus-4.8': 'claude-opus-4-8', + 'anthropic/claude-sonnet-4.6': 'claude-sonnet-4-6', + 'anthropic/claude-haiku-4.5': 'claude-haiku-4-5-20251001', + 'openai/gpt-5.5': 'gpt-5.5', + 'google/gemini-3.1-pro-preview': 'gemini-3.1-pro-preview', + 'google/gemini-3.5-flash': 'gemini-3.5-flash', + 'deepseek/deepseek-v4-pro:discounted': 'deepseek-v4-pro', + 'deepseek/deepseek-v4-flash:discounted': 'deepseek-v4-flash', +}; + +const MORPH_TO_KILO_MODEL = new Map( + Object.entries(KILO_TO_MORPH_MODEL).map(([kiloId, morphId]) => [morphId, kiloId]) +); + +// Tier intent -> router policy. Frontier never trades quality for cost; +// balanced lets the router break ties on cost; small hunts for the cheapest +// qualified model. +const AUTO_MODEL_POLICY: Record = { + 'kilo-auto/frontier': 'capability_heavy', + 'kilo-auto/balanced': 'balanced', + 'kilo/auto': 'balanced', + 'kilo-auto/small': 'cost_efficient', + 'kilo-auto/free': 'cost_efficient', +}; + +const morphRouterResponseSchema = z.looseObject({ + model: z.string().trim().min(1), + difficulty: z.string().optional(), + confidence: z.number().optional(), + ambiguity: z.string().optional(), + ambiguity_confidence: z.number().optional(), + domain: z.string().optional(), + domain_confidence: z.number().optional(), +}); + +export type MorphRouterFailureStage = `http_${number}` | 'timeout' | 'fetch' | 'invalid_response'; + +export class MorphRouterError extends Error { + readonly failureStage: MorphRouterFailureStage; + + constructor(message: string, failureStage: MorphRouterFailureStage) { + super(message); + this.name = 'MorphRouterError'; + this.failureStage = failureStage; + } +} + +export type MorphRouteSkipReason = 'no_prompt' | 'insufficient_candidates' | 'unknown_tier'; + +export type MorphRouteOutcome = + | { kind: 'routed'; decision: RouterDecision; policy: string; candidateCount: number } + | { kind: 'skipped'; reason: MorphRouteSkipReason }; + +type MorphRouterEnv = Pick; + +// Same pattern as the OpenRouter key: cache the plain key string at module +// scope so each decision skips the secrets-store read, with a TTL that keeps +// rotations effective within five minutes. +const API_KEY_CACHE_TTL_MS = 300_000; + +const apiKeyCache = ttlCached(API_KEY_CACHE_TTL_MS, (env: MorphRouterEnv) => + env.MORPH_API_KEY.get() +); + +export function clearMorphApiKeyCache(): void { + apiKeyCache.clear(); +} + +// The prompt the router classifies: the latest user turn when present (it +// redirects the current request), otherwise the opening turn. System prompts +// are agent boilerplate and would dominate the classification, so they are +// deliberately excluded — Morph receives at most one bounded user prompt +// prefix, never the conversation or tool results. +export function buildRouterInput(input: NormalizedClassifierInput): string | null { + const prompt = input.latestUserPromptPrefix ?? input.userPromptPrefix; + if (!prompt || prompt.trim().length === 0) return null; + return prompt.slice(0, ROUTER_INPUT_MAX_LENGTH); +} + +// The fingerprint scopes cached decisions to a specific candidate set and +// policy, so tier-membership or policy changes never serve stale models. +export function routerConfigFingerprint(routing: RoutingContext): string { + const policy = AUTO_MODEL_POLICY[routing.autoModel] ?? 'unknown'; + const mapped = mappedCandidates(routing).map(candidate => candidate.morphId); + return `${policy}:${[...mapped].sort().join(',')}`; +} + +function mappedCandidates(routing: RoutingContext): Array<{ kiloId: string; morphId: string }> { + const seen = new Set(); + const candidates: Array<{ kiloId: string; morphId: string }> = []; + for (const kiloId of routing.candidateModels) { + const morphId = KILO_TO_MORPH_MODEL[kiloId]; + if (!morphId || seen.has(morphId)) continue; + seen.add(morphId); + candidates.push({ kiloId, morphId }); + } + return candidates; +} + +export async function routeWithMorphRouter( + env: MorphRouterEnv, + routing: RoutingContext, + input: NormalizedClassifierInput +): Promise { + const policy = AUTO_MODEL_POLICY[routing.autoModel]; + if (!policy) { + return { kind: 'skipped', reason: 'unknown_tier' }; + } + const routerInput = buildRouterInput(input); + if (!routerInput) { + return { kind: 'skipped', reason: 'no_prompt' }; + } + const candidates = mappedCandidates(routing); + // With fewer than two routable models there is no decision to make. + if (candidates.length < 2) { + return { kind: 'skipped', reason: 'insufficient_candidates' }; + } + + const resolvedCandidate = candidates.find( + candidate => candidate.kiloId === routing.resolvedModel + ); + // When the prompt is too ambiguous to size, the router returns + // default_model as-is; the static resolver's pick keeps that case + // behavior-identical to routing without Morph. + const defaultModel = (resolvedCandidate ?? candidates[0]).morphId; + + const response = await morphRouterFetch(env, { + input: routerInput, + allowed_models: candidates.map(candidate => candidate.morphId), + policy, + default_model: defaultModel, + }); + + const kiloModel = MORPH_TO_KILO_MODEL.get(response.model); + // Candidates were sent as allowed_models, so anything else back means the + // router ignored the allow-list; never serve a model the tier doesn't own. + if (!kiloModel || !candidates.some(candidate => candidate.kiloId === kiloModel)) { + throw new MorphRouterError( + `Morph router returned a model outside the allowed candidates`, + 'invalid_response' + ); + } + + return { + kind: 'routed', + policy, + candidateCount: candidates.length, + decision: { + source: 'morph_router', + model: kiloModel, + routerModel: response.model, + difficulty: response.difficulty ?? null, + confidence: response.confidence ?? null, + ambiguity: response.ambiguity ?? null, + domain: response.domain ?? null, + }, + }; +} + +async function morphRouterFetch( + env: MorphRouterEnv, + body: Record +): Promise> { + const apiKey = await apiKeyCache.get(env); + let response: Response; + try { + response = await fetch(MORPH_ROUTER_ENDPOINT, { + method: 'POST', + headers: { + authorization: `Bearer ${apiKey}`, + 'content-type': 'application/json', + }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(MORPH_ROUTER_TIMEOUT_MS), + }); + } catch (error) { + const isTimeout = error instanceof DOMException && error.name === 'TimeoutError'; + throw new MorphRouterError( + isTimeout ? 'Morph router request timed out' : 'Morph router request failed', + isTimeout ? 'timeout' : 'fetch' + ); + } + if (!response.ok) { + throw new MorphRouterError( + `Morph router returned ${response.status}`, + `http_${response.status}` + ); + } + let json: unknown; + try { + json = await response.json(); + } catch { + throw new MorphRouterError('Morph router returned invalid JSON', 'invalid_response'); + } + const parsed = morphRouterResponseSchema.safeParse(json); + if (!parsed.success) { + throw new MorphRouterError('Morph router returned an unexpected shape', 'invalid_response'); + } + return parsed.data; +} diff --git a/services/auto-routing/worker-configuration.d.ts b/services/auto-routing/worker-configuration.d.ts index 6b69a65d5b..d61c27a803 100644 --- a/services/auto-routing/worker-configuration.d.ts +++ b/services/auto-routing/worker-configuration.d.ts @@ -1,11 +1,12 @@ /* eslint-disable */ -// Generated by Wrangler by running `wrangler types --include-runtime=false` (hash: 0d84c4429525cf1b432d2ffe636e1ca8) +// Generated by Wrangler by running `wrangler types --include-runtime=false` (hash: 74c4eacc91b14192928c9267613f0a38) interface __BaseEnv_Env { AUTO_ROUTING_CONFIG: KVNamespace; AUTO_ROUTING_CLASSIFIER_METRICS_V2: AnalyticsEngineDataset; INTERNAL_API_SECRET_PROD: SecretsStoreSecret; OPENROUTER_API_KEY: SecretsStoreSecret; O11Y_CF_AE_API_TOKEN: SecretsStoreSecret; + MORPH_API_KEY: SecretsStoreSecret; O11Y_CF_ACCOUNT_ID: "e115e769bcdd4c3d66af59d3332cb394"; AUTO_ROUTING_DECISION_CACHE: DurableObjectNamespace; } diff --git a/services/auto-routing/wrangler.jsonc b/services/auto-routing/wrangler.jsonc index ddcf6d9baa..922dfa218d 100644 --- a/services/auto-routing/wrangler.jsonc +++ b/services/auto-routing/wrangler.jsonc @@ -67,5 +67,10 @@ "store_id": "342a86d9e3a94da698e82d0c6e2a36f0", "secret_name": "O11Y_CF_AE_API_TOKEN", }, + { + "binding": "MORPH_API_KEY", + "store_id": "342a86d9e3a94da698e82d0c6e2a36f0", + "secret_name": "MORPH_API_KEY", + }, ], }