Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion apps/web/src/app/api/openrouter/[...path]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ import {
import { normalizeModelId } from '@/lib/ai-gateway/model-utils';
import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models';
import { isCloudflareIP } from '@/lib/cloudflare-ip';
import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/auto-model';
import {
getMorphRouterCandidates,
isKiloAutoModel,
KILO_AUTO_FREE_MODEL,
} from '@/lib/ai-gateway/auto-model';
import { applyResolvedAutoModel } from '@/lib/ai-gateway/auto-model/resolution';
import type { MicrodollarUsageContext } from '@/lib/ai-gateway/processUsage.types';
import {
Expand Down Expand Up @@ -731,6 +735,15 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
clientRequestId,
mode: modeHeader,
userAgent: extractHeaderAndLimitLength(request, 'user-agent'),
// effectiveModelIdLowerCased is final here: static auto resolution and
// any rules-engine override have both been applied.
routing: autoModel
? {
autoModel,
candidateModels: [...getMorphRouterCandidates(autoModel)],
resolvedModel: effectiveModelIdLowerCased,
}
: null,
authContext: Promise.resolve({ organizationId }),
});

Expand Down
38 changes: 38 additions & 0 deletions apps/web/src/lib/ai-gateway/auto-model/index.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import { z } from 'zod';
import {
CLAUDE_HAIKU_CURRENT_MODEL_ID,
CLAUDE_OPUS_CURRENT_MODEL_ID,
claude_sonnet_clawsetup_model,
CLAUDE_SONNET_CURRENT_MODEL_ID,
} from '@/lib/ai-gateway/providers/anthropic.constants';
import { GPT_CURRENT_MODEL_ID } from '@/lib/ai-gateway/providers/openai';
import {
GEMINI_FLASH_CURRENT_MODEL_ID,
GEMINI_PRO_CURRENT_MODEL_ID,
} from '@/lib/ai-gateway/providers/google';
import type { OpenRouterReasoningConfig } from '@/lib/ai-gateway/providers/openrouter/types';
import type { OpenCodeSettings, Verbosity } from '@kilocode/db/schema-types';
import { QWEN37_PLUS_MODEL_ID } from '@/lib/ai-gateway/custom-pricing';
Expand Down Expand Up @@ -164,3 +170,35 @@ export const AUTO_MODELS = [
export function isKiloAutoModel(model: string) {
return AUTO_MODELS.some(m => m.id === model) || model === KILO_AUTO_LEGACY_MODEL;
}

// Models each kilo-auto tier may route among when a per-prompt router (the
// Morph model router, consulted by the auto-routing worker) picks the model
// instead of the static mode mapping. Membership here is a product decision:
// frontier may roam across frontier-class models from any provider, balanced
// across mid-priced models. Tiers absent from this map (free rotates by
// availability, small is balance-based) keep static resolution only.
//
// Per .specs/model-experiments.md, experimented public ids must never be
// added to these candidate sets.
const MORPH_ROUTER_TIER_CANDIDATES: Record<string, readonly string[]> = {
[KILO_AUTO_FRONTIER_MODEL.id]: [
CLAUDE_OPUS_CURRENT_MODEL_ID,
CLAUDE_SONNET_CURRENT_MODEL_ID,
GPT_CURRENT_MODEL_ID,
GEMINI_PRO_CURRENT_MODEL_ID,
],
[KILO_AUTO_BALANCED_MODEL.id]: [
QWEN37_PLUS_MODEL_ID,
CLAUDE_HAIKU_CURRENT_MODEL_ID,
GEMINI_FLASH_CURRENT_MODEL_ID,
],
[KILO_AUTO_LEGACY_MODEL]: [
QWEN37_PLUS_MODEL_ID,
CLAUDE_HAIKU_CURRENT_MODEL_ID,
GEMINI_FLASH_CURRENT_MODEL_ID,
],
};

export function getMorphRouterCandidates(autoModel: string): readonly string[] {
return MORPH_ROUTER_TIER_CANDIDATES[autoModel] ?? [];
}
19 changes: 19 additions & 0 deletions apps/web/src/lib/ai-gateway/auto-routing-mirror.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ describe('scheduleAutoRoutingMirror', () => {
mode: 'code',
userAgent: 'Kilo-Code/1.2.3',
bodyBytes: 512,
routing: null,
});
// TypeScript cannot see the schema's runtime refinements (.trim().min(1)
// etc.), so round-trip the built payload through the worker's validator.
Expand All @@ -95,6 +96,24 @@ describe('scheduleAutoRoutingMirror', () => {
expect(headers.get('content-type')).toBe('application/json');
});

it('passes the kilo-auto routing context through to the worker', async () => {
const routing = {
autoModel: 'kilo-auto/frontier',
candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'],
resolvedModel: 'anthropic/claude-opus-4.8',
};
scheduleAutoRoutingMirror(
{ ...makeParams(), routing },
work => scheduledWork.push(work),
options
);
await scheduledWork[0]();

const payload = JSON.parse(mockedFetch.mock.calls[0][1]?.body as string);
expect(payload.routing).toEqual(routing);
expect(() => MirrorPayloadSchema.parse(payload)).not.toThrow();
});

it('skips mirroring when the body cannot be normalized, with a log for visibility', async () => {
scheduleAutoRoutingMirror(
{ ...makeParams(), body: { stream: true } },
Expand Down
10 changes: 9 additions & 1 deletion apps/web/src/lib/ai-gateway/auto-routing-mirror.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { normalizeClassifierInput } from '@kilocode/auto-routing-contracts';
import type { ClassifierApiKind, MirrorPayload } from '@kilocode/auto-routing-contracts';
import type {
ClassifierApiKind,
MirrorPayload,
RoutingContext,
} from '@kilocode/auto-routing-contracts';
import { after } from 'next/server';
import { AUTO_ROUTING_WORKER_URL, INTERNAL_API_SECRET } from '@/lib/config.server';
import { warnExceptInTest } from '@/lib/utils.server';
Expand All @@ -19,6 +23,9 @@ type ScheduleAutoRoutingMirrorParams = {
clientRequestId: string | null;
mode: string | null;
userAgent: string | null;
// Present only for kilo-auto requests: the tier, its router candidates,
// and the model the static resolver picked.
routing?: RoutingContext | null;
authContext?: Promise<{ organizationId?: string | null }>;
};

Expand Down Expand Up @@ -62,6 +69,7 @@ async function sendAutoRoutingMirror(
mode: params.mode,
userAgent: params.userAgent,
bodyBytes: params.bodyBytes,
routing: params.routing ?? null,
};

const response = await fetch(`${workerUrl}/decide`, {
Expand Down
52 changes: 52 additions & 0 deletions packages/auto-routing-contracts/src/contracts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,58 @@ describe('auto routing contracts', () => {
})
).toEqual({ cost: 0, decision: null, classifierResult: null });

// Routing context is optional (deploys never coordinate) and validated
// when present.
const routing = {
autoModel: 'kilo-auto/frontier',
candidateModels: ['anthropic/claude-opus-4.8', 'openai/gpt-5.5'],
resolvedModel: 'anthropic/claude-opus-4.8',
};
expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing })).toMatchObject({ routing });
expect(MirrorPayloadSchema.parse({ ...mirrorPayload, routing: null })).toMatchObject({
routing: null,
});
expect(() =>
MirrorPayloadSchema.parse({ ...mirrorPayload, routing: { autoModel: '' } })
).toThrow();
expect(() =>
MirrorPayloadSchema.parse({
...mirrorPayload,
routing: { ...routing, candidateModels: [''] },
})
).toThrow();

const routerDecision = {
source: 'morph_router',
model: 'anthropic/claude-sonnet-4.6',
routerModel: 'claude-sonnet-4-6',
difficulty: 'easy',
confidence: 0.97,
ambiguity: 'low',
domain: 'coding',
};
expect(
AutoRoutingDecisionResponseSchema.parse({
cost: 0,
decision: routerDecision,
classifierResult: null,
})
).toMatchObject({ decision: { model: 'anthropic/claude-sonnet-4.6' } });
expect(
AutoRoutingDecisionResponseSchema.parse({
cost: 0,
decision: { ...routerDecision, difficulty: null, confidence: null },
classifierResult: null,
})
).toMatchObject({ decision: { difficulty: null } });
expect(() =>
AutoRoutingDecisionResponseSchema.parse({
cost: 0,
decision: { ...routerDecision, source: 'other_router' },
classifierResult: null,
})
).toThrow();

expect(
AutoRoutingDecisionResponseSchema.parse({
cost: 0,
Expand Down
36 changes: 35 additions & 1 deletion packages/auto-routing-contracts/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ export {
type NormalizedClassifierInput,
} from './input';

// Routing context for kilo-auto requests: which pseudo-model the user
// selected, which models that tier may route among, and what the gateway's
// static resolver picked. The worker uses it to produce (and score) routing
// decisions; non-auto requests carry no routing context.
export const RoutingContextSchema = z.object({
// The kilo-auto pseudo-model from the original request, e.g.
// 'kilo-auto/frontier'.
autoModel: z.string().trim().min(1),
// Kilo public model ids this tier may route among. The gateway owns this
// set so tier membership stays a product decision, not a worker default.
candidateModels: z.array(z.string().trim().min(1)).max(32),
// The model the static resolver picked for this request; the baseline a
// router decision is compared against.
resolvedModel: z.string().trim().min(1).nullable(),
});
export type RoutingContext = z.infer<typeof RoutingContextSchema>;

// What the gateway mirrors to the auto-routing worker per request: the
// already-normalized classifier input plus caller identity. The gateway
// normalizes before sending so the multi-hundred-KB request body never
Expand All @@ -25,6 +42,8 @@ export const MirrorPayloadSchema = z.object({
// Size of the original request body, kept as an analytics dimension now
// that the body itself is no longer mirrored.
bodyBytes: z.number().int().nonnegative(),
// Optional so gateway and worker deploys never have to coordinate.
routing: RoutingContextSchema.nullable().optional(),
});
export type MirrorPayload = z.infer<typeof MirrorPayloadSchema>;

Expand Down Expand Up @@ -96,9 +115,24 @@ export const ClassifierOutputSchema = z
});
export type ClassifierOutput = z.infer<typeof ClassifierOutputSchema>;

// A routing decision produced by the Morph model router
// (https://docs.morphllm.com/sdk/components/router). `model` is the Kilo
// public id to serve; `routerModel` is the router-catalog id it mapped from.
// Classification heads below their confidence threshold come back null.
export const RouterDecisionSchema = z.object({
source: z.literal('morph_router'),
model: z.string().trim().min(1),
routerModel: z.string().trim().min(1),
difficulty: z.string().nullable(),
confidence: z.number().nullable(),
ambiguity: z.string().nullable(),
domain: z.string().nullable(),
});
export type RouterDecision = z.infer<typeof RouterDecisionSchema>;

export const AutoRoutingDecisionResponseSchema = z.object({
cost: z.number(),
decision: z.null(),
decision: RouterDecisionSchema.nullable(),
classifierResult: z
.object({
classification: ClassifierOutputSchema,
Expand Down
19 changes: 19 additions & 0 deletions services/auto-routing/.dev.vars.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Auto-routing worker local development secrets.
#
# Copy this file to .dev.vars to boot the worker locally:
# cp .dev.vars.example .dev.vars
#
# Wrangler serves these as the local values for the secrets-store bindings
# declared in wrangler.jsonc.
#
# Bearer token the gateway uses to call this worker; any non-empty value
# works locally as long as requests send the same one.
INTERNAL_API_SECRET_PROD=local-dev-secret

# OpenRouter key used by the prompt classifier (https://openrouter.ai/keys).
OPENROUTER_API_KEY=replace-me

# Morph key used by the model router (https://morphllm.com — dashboard > API
# keys). Only consulted when the morph_router_enabled KV flag is 'true':
# wrangler kv key put morph_router_enabled true --binding AUTO_ROUTING_CONFIG --local
MORPH_API_KEY=replace-me
14 changes: 14 additions & 0 deletions services/auto-routing/src/classifier-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { ttlCached } from './ttl-cache';

export const CLASSIFIER_MODEL_CONFIG_KEY = 'classifier_model';
export const DECISION_LOG_SAMPLE_RATE_CONFIG_KEY = 'decision_log_sample_rate';
export const MORPH_ROUTER_ENABLED_CONFIG_KEY = 'morph_router_enabled';

// Successful decisions are high volume (~30/s) and only needed for latency
// and cache hit-rate percentiles, so they are sampled by default. The rate
Expand Down Expand Up @@ -38,9 +39,18 @@ const decisionLogSampleRateCache = ttlCached(
}
);

// Morph router decisions are off unless explicitly enabled, so the worker
// never sends prompt prefixes to a third-party router without an operator
// opting in. Same KV+TTL pattern as the classifier model.
const morphRouterEnabledCache = ttlCached(CONFIG_CACHE_TTL_MS, async (env: ClassifierConfigEnv) => {
const configured = await env.AUTO_ROUTING_CONFIG.get(MORPH_ROUTER_ENABLED_CONFIG_KEY);
return configured?.trim() === 'true';
});

export function clearClassifierConfigCache(): void {
classifierModelCache.clear();
decisionLogSampleRateCache.clear();
morphRouterEnabledCache.clear();
}

// Config reads run before the guarded decision path. A transient KV failure
Expand Down Expand Up @@ -69,6 +79,10 @@ export function getDecisionLogSampleRate(env: ClassifierConfigEnv): Promise<numb
.catch(failClosed(DECISION_LOG_SAMPLE_RATE_CONFIG_KEY, DEFAULT_DECISION_LOG_SAMPLE_RATE));
}

export function getMorphRouterEnabled(env: ClassifierConfigEnv): Promise<boolean> {
return morphRouterEnabledCache.get(env).catch(failClosed(MORPH_ROUTER_ENABLED_CONFIG_KEY, false));
}

export async function setClassifierModel(
env: ClassifierConfigEnv,
model: string
Expand Down
Loading