Kilo-Org · shreybirmiwalmorph · Jun 12, 2026
diff --git a/apps/web/src/app/api/openrouter/[...path]/route.ts b/apps/web/src/app/api/openrouter/[...path]/route.ts
@@ -89,7 +89,11 @@ import {
 import { normalizeModelId } from '@/lib/ai-gateway/model-utils';
 import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models';
 import { isCloudflareIP } from '@/lib/cloudflare-ip';
-import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/auto-model';
+import {
+  getMorphRouterCandidates,
+  isKiloAutoModel,
+  KILO_AUTO_FREE_MODEL,
+} from '@/lib/ai-gateway/auto-model';
 import { applyResolvedAutoModel } from '@/lib/ai-gateway/auto-model/resolution';
 import type { MicrodollarUsageContext } from '@/lib/ai-gateway/processUsage.types';
 import {
@@ -731,6 +735,15 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
     clientRequestId,
     mode: modeHeader,
     userAgent: extractHeaderAndLimitLength(request, 'user-agent'),
+    // effectiveModelIdLowerCased is final here: static auto resolution and
+    // any rules-engine override have both been applied.
+    routing: autoModel
+      ? {
+          autoModel,
+          candidateModels: [...getMorphRouterCandidates(autoModel)],
+          resolvedModel: effectiveModelIdLowerCased,
+        }
+      : null,
     authContext: Promise.resolve({ organizationId }),
   });
 

diff --git a/apps/web/src/lib/ai-gateway/auto-model/index.ts b/apps/web/src/lib/ai-gateway/auto-model/index.ts
@@ -1,9 +1,15 @@
 import { z } from 'zod';
 import {
+  CLAUDE_HAIKU_CURRENT_MODEL_ID,
   CLAUDE_OPUS_CURRENT_MODEL_ID,
   claude_sonnet_clawsetup_model,
   CLAUDE_SONNET_CURRENT_MODEL_ID,
 } from '@/lib/ai-gateway/providers/anthropic.constants';
+import { GPT_CURRENT_MODEL_ID } from '@/lib/ai-gateway/providers/openai';
+import {
+  GEMINI_FLASH_CURRENT_MODEL_ID,
+  GEMINI_PRO_CURRENT_MODEL_ID,
+} from '@/lib/ai-gateway/providers/google';
 import type { OpenRouterReasoningConfig } from '@/lib/ai-gateway/providers/openrouter/types';
 import type { OpenCodeSettings, Verbosity } from '@kilocode/db/schema-types';
 import { QWEN37_PLUS_MODEL_ID } from '@/lib/ai-gateway/custom-pricing';
@@ -164,3 +170,35 @@ export const AUTO_MODELS = [
 export function isKiloAutoModel(model: string) {
   return AUTO_MODELS.some(m => m.id === model) || model === KILO_AUTO_LEGACY_MODEL;
 }
+
+// Models each kilo-auto tier may route among when a per-prompt router (the
+// Morph model router, consulted by the auto-routing worker) picks the model
+// instead of the static mode mapping. Membership here is a product decision:
+// frontier may roam across frontier-class models from any provider, balanced
+// across mid-priced models. Tiers absent from this map (free rotates by
+// availability, small is balance-based) keep static resolution only.
+//
+// Per .specs/model-experiments.md, experimented public ids must never be
+// added to these candidate sets.
+const MORPH_ROUTER_TIER_CANDIDATES: Record<string, readonly string[]> = {
+  [KILO_AUTO_FRONTIER_MODEL.id]: [
+    CLAUDE_OPUS_CURRENT_MODEL_ID,
+    CLAUDE_SONNET_CURRENT_MODEL_ID,
+    GPT_CURRENT_MODEL_ID,
+    GEMINI_PRO_CURRENT_MODEL_ID,
+  ],
+  [KILO_AUTO_BALANCED_MODEL.id]: [
+    QWEN37_PLUS_MODEL_ID,
+    CLAUDE_HAIKU_CURRENT_MODEL_ID,
+    GEMINI_FLASH_CURRENT_MODEL_ID,
+  ],
+  [KILO_AUTO_LEGACY_MODEL]: [
+    QWEN37_PLUS_MODEL_ID,
+    CLAUDE_HAIKU_CURRENT_MODEL_ID,
+    GEMINI_FLASH_CURRENT_MODEL_ID,
+  ],
+};
+
+export function getMorphRouterCandidates(autoModel: string): readonly string[] {
+  return MORPH_ROUTER_TIER_CANDIDATES[autoModel] ?? [];
+}
diff --git a/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts b/apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts
@@ -85,6 +85,7 @@ describe('scheduleAutoRoutingMirror', () => {
       mode: 'code',
       userAgent: 'Kilo-Code/1.2.3',
       bodyBytes: 512,
+      routing: null,
     });
     // TypeScript cannot see the schema's runtime refinements (.trim().min(1)
     // etc.), so round-trip the built payload through the worker's validator.
@@ -95,6 +96,24 @@ describe('scheduleAutoRoutingMirror', () => {
     expect(headers.get('content-type')).toBe('application/json');
   });
 
+  it('passes the kilo-auto routing context through to the worker', async () => {
+    const routing = {
+      autoModel: 'kilo-auto/frontier',
+      candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'],
+      resolvedModel: 'anthropic/claude-opus-4.8',
+    };
+    scheduleAutoRoutingMirror(
+      { ...makeParams(), routing },
+      work => scheduledWork.push(work),
+      options
+    );
+    await scheduledWork[0]();
+
+    const payload = JSON.parse(mockedFetch.mock.calls[0][1]?.body as string);
+    expect(payload.routing).toEqual(routing);
+    expect(() => MirrorPayloadSchema.parse(payload)).not.toThrow();
+  });
+
   it('skips mirroring when the body cannot be normalized, with a log for visibility', async () => {
     scheduleAutoRoutingMirror(
       { ...makeParams(), body: { stream: true } },

diff --git a/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts b/apps/web/src/lib/ai-gateway/auto-routing-mirror.ts
@@ -1,5 +1,9 @@
 import { normalizeClassifierInput } from '@kilocode/auto-routing-contracts';
-import type { ClassifierApiKind, MirrorPayload } from '@kilocode/auto-routing-contracts';
+import type {
+  ClassifierApiKind,
+  MirrorPayload,
+  RoutingContext,
+} from '@kilocode/auto-routing-contracts';
 import { after } from 'next/server';
 import { AUTO_ROUTING_WORKER_URL, INTERNAL_API_SECRET } from '@/lib/config.server';
 import { warnExceptInTest } from '@/lib/utils.server';
@@ -19,6 +23,9 @@ type ScheduleAutoRoutingMirrorParams = {
   clientRequestId: string | null;
   mode: string | null;
   userAgent: string | null;
+  // Present only for kilo-auto requests: the tier, its router candidates,
+  // and the model the static resolver picked.
+  routing?: RoutingContext | null;
   authContext?: Promise<{ organizationId?: string | null }>;
 };
 
@@ -62,6 +69,7 @@ async function sendAutoRoutingMirror(
     mode: params.mode,
     userAgent: params.userAgent,
     bodyBytes: params.bodyBytes,
+    routing: params.routing ?? null,
   };
 
   const response = await fetch(`${workerUrl}/decide`, {

diff --git a/packages/auto-routing-contracts/src/contracts.test.ts b/packages/auto-routing-contracts/src/contracts.test.ts
@@ -55,6 +55,58 @@ describe('auto routing contracts', () => {
       })
     ).toEqual({ cost: 0, decision: null, classifierResult: null });
 
+    // Routing context is optional (deploys never coordinate) and validated
+    // when present.
+    const routing = {
+      autoModel: 'kilo-auto/frontier',
+      candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'],
+      resolvedModel: 'anthropic/claude-opus-4.8',
+    };
+    expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing })).toMatchObject({ routing });
+    expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing: null })).toMatchObject({
+      routing: null,
+    });
+    expect(() =>
+      MirrorPayloadSchema.parse({ ...mirrorPayload, routing: { autoModel: '' } })
+    ).toThrow();
+    expect(() =>
+      MirrorPayloadSchema.parse({
+        ...mirrorPayload,
+        routing: { ...routing, candidateModels: [''] },
+      })
+    ).toThrow();
+
+    const routerDecision = {
+      source: 'morph_router',
+      model: 'anthropic/claude-sonnet-4.6',
+      routerModel: 'claude-sonnet-4-6',
+      difficulty: 'easy',
+      confidence: 0.97,
+      ambiguity: 'low',
+      domain: 'coding',
+    };
+    expect(
+      AutoRoutingDecisionResponseSchema.parse({
+        cost: 0,
+        decision: routerDecision,
+        classifierResult: null,
+      })
+    ).toMatchObject({ decision: { model: 'anthropic/claude-sonnet-4.6' } });
+    expect(
+      AutoRoutingDecisionResponseSchema.parse({
+        cost: 0,
+        decision: { ...routerDecision, difficulty: null, confidence: null },
+        classifierResult: null,
+      })
+    ).toMatchObject({ decision: { difficulty: null } });
+    expect(() =>
+      AutoRoutingDecisionResponseSchema.parse({
+        cost: 0,
+        decision: { ...routerDecision, source: 'other_router' },
+        classifierResult: null,
+      })
+    ).toThrow();
+
     expect(
       AutoRoutingDecisionResponseSchema.parse({
         cost: 0,

diff --git a/packages/auto-routing-contracts/src/index.ts b/packages/auto-routing-contracts/src/index.ts
@@ -7,6 +7,23 @@ export {
   type NormalizedClassifierInput,
 } from './input';
 
+// Routing context for kilo-auto requests: which pseudo-model the user
+// selected, which models that tier may route among, and what the gateway's
+// static resolver picked. The worker uses it to produce (and score) routing
+// decisions; non-auto requests carry no routing context.
+export const RoutingContextSchema = z.object({
+  // The kilo-auto pseudo-model from the original request, e.g.
+  // 'kilo-auto/frontier'.
+  autoModel: z.string().trim().min(1),
+  // Kilo public model ids this tier may route among. The gateway owns this
+  // set so tier membership stays a product decision, not a worker default.
+  candidateModels: z.array(z.string().trim().min(1)).max(32),
+  // The model the static resolver picked for this request; the baseline a
+  // router decision is compared against.
+  resolvedModel: z.string().trim().min(1).nullable(),
+});
+export type RoutingContext = z.infer<typeof RoutingContextSchema>;
+
 // What the gateway mirrors to the auto-routing worker per request: the
 // already-normalized classifier input plus caller identity. The gateway
 // normalizes before sending so the multi-hundred-KB request body never
@@ -25,6 +42,8 @@ export const MirrorPayloadSchema = z.object({
   // Size of the original request body, kept as an analytics dimension now
   // that the body itself is no longer mirrored.
   bodyBytes: z.number().int().nonnegative(),
+  // Optional so gateway and worker deploys never have to coordinate.
+  routing: RoutingContextSchema.nullable().optional(),
 });
 export type MirrorPayload = z.infer<typeof MirrorPayloadSchema>;
 
@@ -96,9 +115,24 @@ export const ClassifierOutputSchema = z
   });
 export type ClassifierOutput = z.infer<typeof ClassifierOutputSchema>;
 
+// A routing decision produced by the Morph model router
+// (https://docs.morphllm.com/sdk/components/router). `model` is the Kilo
+// public id to serve; `routerModel` is the router-catalog id it mapped from.
+// Classification heads below their confidence threshold come back null.
+export const RouterDecisionSchema = z.object({
+  source: z.literal('morph_router'),
+  model: z.string().trim().min(1),
+  routerModel: z.string().trim().min(1),
+  difficulty: z.string().nullable(),
+  confidence: z.number().nullable(),
+  ambiguity: z.string().nullable(),
+  domain: z.string().nullable(),
+});
+export type RouterDecision = z.infer<typeof RouterDecisionSchema>;
+
 export const AutoRoutingDecisionResponseSchema = z.object({
   cost: z.number(),
-  decision: z.null(),
+  decision: RouterDecisionSchema.nullable(),
   classifierResult: z
     .object({
       classification: ClassifierOutputSchema,

diff --git a/services/auto-routing/.dev.vars.example b/services/auto-routing/.dev.vars.example
@@ -0,0 +1,19 @@
+# Auto-routing worker local development secrets.
+#
+# Copy this file to .dev.vars to boot the worker locally:
+#   cp .dev.vars.example .dev.vars
+#
+# Wrangler serves these as the local values for the secrets-store bindings
+# declared in wrangler.jsonc.
+#
+# Bearer token the gateway uses to call this worker; any non-empty value
+# works locally as long as requests send the same one.
+INTERNAL_API_SECRET_PROD=local-dev-secret
+
+# OpenRouter key used by the prompt classifier (https://openrouter.ai/keys).
+OPENROUTER_API_KEY=replace-me
+
+# Morph key used by the model router (https://morphllm.com — dashboard > API
+# keys). Only consulted when the morph_router_enabled KV flag is 'true':
+#   wrangler kv key put morph_router_enabled true --binding AUTO_ROUTING_CONFIG --local
+MORPH_API_KEY=replace-me
diff --git a/services/auto-routing/src/classifier-config.ts b/services/auto-routing/src/classifier-config.ts
@@ -4,6 +4,7 @@ import { ttlCached } from './ttl-cache';
 
 export const CLASSIFIER_MODEL_CONFIG_KEY = 'classifier_model';
 export const DECISION_LOG_SAMPLE_RATE_CONFIG_KEY = 'decision_log_sample_rate';
+export const MORPH_ROUTER_ENABLED_CONFIG_KEY = 'morph_router_enabled';
 
 // Successful decisions are high volume (~30/s) and only needed for latency
 // and cache hit-rate percentiles, so they are sampled by default. The rate
@@ -38,9 +39,18 @@ const decisionLogSampleRateCache = ttlCached(
   }
 );
 
+// Morph router decisions are off unless explicitly enabled, so the worker
+// never sends prompt prefixes to a third-party router without an operator
+// opting in. Same KV+TTL pattern as the classifier model.
+const morphRouterEnabledCache = ttlCached(CONFIG_CACHE_TTL_MS, async (env: ClassifierConfigEnv) => {
+  const configured = await env.AUTO_ROUTING_CONFIG.get(MORPH_ROUTER_ENABLED_CONFIG_KEY);
+  return configured?.trim() === 'true';
+});
+
 export function clearClassifierConfigCache(): void {
   classifierModelCache.clear();
   decisionLogSampleRateCache.clear();
+  morphRouterEnabledCache.clear();
 }
 
 // Config reads run before the guarded decision path. A transient KV failure
@@ -69,6 +79,10 @@ export function getDecisionLogSampleRate(env: ClassifierConfigEnv): Promise<numb
     .catch(failClosed(DECISION_LOG_SAMPLE_RATE_CONFIG_KEY, DEFAULT_DECISION_LOG_SAMPLE_RATE));
 }
 
+export function getMorphRouterEnabled(env: ClassifierConfigEnv): Promise<boolean> {
+  return morphRouterEnabledCache.get(env).catch(failClosed(MORPH_ROUTER_ENABLED_CONFIG_KEY, false));
+}
+
 export async function setClassifierModel(
   env: ClassifierConfigEnv,
   model: string