Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/managed-kimi-code-anthropic-beta-api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Route managed Kimi Code models on the Anthropic-compatible protocol through the beta Messages API.
4 changes: 4 additions & 0 deletions packages/agent-core/src/config/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ export const ModelAliasSchema = z.object({
// model-name version inference. Needed for custom-named Anthropic endpoints
// whose model name does not encode a parseable Claude version.
adaptiveThinking: z.boolean().optional(),
// Route the Anthropic transport through the beta Messages API
// (`POST /v1/messages?beta=true`) instead of the standard endpoint. Used by
// managed Kimi Code models that declare `protocol: 'anthropic'`.
betaApi: z.boolean().optional(),
});

export type ModelAlias = z.infer<typeof ModelAliasSchema>;
Expand Down
10 changes: 9 additions & 1 deletion packages/agent-core/src/session/provider-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ interface ProviderManagerOptions {
readonly kimiRequestHeaders?: Record<string, string>;
readonly resolveOAuthTokenProvider?: OAuthTokenProviderResolver;
readonly promptCacheKey?: string;
// remove before commit
readonly adaptiveThinkingOverride?: () => boolean | undefined;
}

type AuthorizedRequest = <T>(
Expand Down Expand Up @@ -110,6 +112,9 @@ export class ProviderManager implements ModelProvider {
);
}

// remove before commit
const adaptiveThinkingOverride = this.options.adaptiveThinkingOverride?.();
const effectiveAdaptiveThinking = adaptiveThinkingOverride ?? alias.adaptiveThinking;
Comment on lines +115 to +117

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P3 Badge Remove the leftover adaptive-thinking override hook

When a ProviderManager is constructed with this new option, it replaces alias.adaptiveThinking for every resolved model, so a test/debug hook left enabled in any runtime path silently ignores the per-model config this code is trying to thread through. The inline remove before commit marker suggests this was not intended; please drop the option and pass alias.adaptiveThinking directly.

Useful? React with 👍 / 👎.

const provider = toKosongProviderConfig(
providerConfig,
alias.model,
Expand All @@ -118,7 +123,8 @@ export class ProviderManager implements ModelProvider {
alias.maxOutputSize,
alias.reasoningKey,
this.options.promptCacheKey,
alias.adaptiveThinking,
effectiveAdaptiveThinking,
alias.betaApi,
);

return {
Expand Down Expand Up @@ -234,6 +240,7 @@ function toKosongProviderConfig(
reasoningKey: string | undefined,
promptCacheKey: string | undefined,
adaptiveThinking: boolean | undefined,
betaApi: boolean | undefined,
): KosongProviderConfig {
const effectiveType = modelProtocol === 'anthropic' ? 'anthropic' : provider.type;
switch (effectiveType) {
Expand All @@ -249,6 +256,7 @@ function toKosongProviderConfig(
apiKey: providerApiKey(provider),
...(maxOutputSize !== undefined ? { defaultMaxTokens: maxOutputSize } : {}),
...(adaptiveThinking !== undefined ? { adaptiveThinking } : {}),
...(betaApi !== undefined ? { betaApi } : {}),
// Session affinity: Anthropic's analog of OpenAI `prompt_cache_key` is
// `metadata.user_id` on the Messages API (cache-affinity / end-user id).
...(promptCacheKey !== undefined ? { metadata: { user_id: promptCacheKey } } : {}),
Expand Down
29 changes: 29 additions & 0 deletions packages/agent-core/test/harness/runtime-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,35 @@ describe('resolveRuntimeProvider maxOutputSize forwarding', () => {
});
});

it('forwards alias.betaApi to the anthropic provider config', () => {
const resolved = resolveRuntimeProvider({
config: {
...BASE_CONFIG,
providers: {
...BASE_CONFIG.providers,
anthropic: { type: 'anthropic', apiKey: 'sk-anthropic' },
},
models: {
...BASE_CONFIG.models!,
'kimi-alias': {
provider: 'anthropic',
model: 'kimi-for-coding',
maxContextSize: 200000,
protocol: 'anthropic',
betaApi: true,
},
},
},
model: 'kimi-alias',
});

expect(resolved.provider).toMatchObject({
type: 'anthropic',
model: 'kimi-for-coding',
betaApi: true,
});
});

it('omits adaptiveThinking when alias.adaptiveThinking is unset', () => {
const resolved = resolveRuntimeProvider({
config: {
Expand Down
48 changes: 38 additions & 10 deletions packages/kosong/src/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ export interface AnthropicOptions {
* encode a parseable Claude version. Leave undefined to infer from the name.
*/
adaptiveThinking?: boolean | undefined;
/**
* Use the Anthropic **beta** Messages API (`client.beta.messages.create`,
* `POST /v1/messages?beta=true`) instead of the standard Messages API.
*
* Beta features (`betaFeatures`) are then sent via the request `betas`
* field rather than the `anthropic-beta` header. Defaults to false, which
* keeps the standard endpoint + header behavior.
*/
betaApi?: boolean | undefined;
clientFactory?: (auth: ProviderRequestAuth) => Anthropic;
}

Expand Down Expand Up @@ -908,13 +917,15 @@ export class AnthropicChatProvider implements ChatProvider {
private _defaultHeaders: Record<string, string | null> | undefined;
private _clientFactory: ((auth: ProviderRequestAuth) => Anthropic) | undefined;
private _adaptiveThinking: boolean | undefined;
private _betaApi: boolean;
private _explicitMaxTokens: boolean;

constructor(options: AnthropicOptions) {
this._model = options.model;
this._stream = options.stream ?? true;
this._metadata = options.metadata;
this._adaptiveThinking = options.adaptiveThinking;
this._betaApi = options.betaApi ?? false;
this._apiKey =
options.apiKey === undefined || options.apiKey.length === 0 ? undefined : options.apiKey;
this._baseUrl = options.baseUrl;
Expand Down Expand Up @@ -1039,10 +1050,13 @@ export class AnthropicChatProvider implements ChatProvider {
kwargs['output_config'] = this._generationKwargs.output_config;
}

// Build beta headers
// Build the beta feature list. On the standard Messages API these travel
// via the `anthropic-beta` header; on the beta Messages API (`betaApi`) the
// SDK reads them from the request `betas` field and sets the header itself,
// so we must not also set the header (that would duplicate it).
const betas = this._generationKwargs.betaFeatures ?? [];
const extraHeaders: Record<string, string> = {};
if (betas.length > 0) {
if (!this._betaApi && betas.length > 0) {
extraHeaders['anthropic-beta'] = betas.join(',');
}

Expand Down Expand Up @@ -1074,6 +1088,10 @@ export class AnthropicChatProvider implements ChatProvider {
createParams['metadata'] = this._metadata;
}

if (this._betaApi && betas.length > 0) {
createParams['betas'] = betas;
}

const requestOptions: Record<string, unknown> = {};
const headers = mergeRequestHeaders(extraHeaders, options?.auth?.headers);
if (headers !== undefined) {
Expand All @@ -1090,10 +1108,15 @@ export class AnthropicChatProvider implements ChatProvider {
// The helper reparses accumulated input_json_delta buffers on every chunk,
// which becomes synchronous O(n^2) work for large streamed tool arguments.
try {
const stream = await client.messages.create(
{ ...createParams, stream: true } as unknown as MessageCreateParamsStreaming,
finalRequestOptions,
);
const stream = this._betaApi
? await client.beta.messages.create(
{ ...createParams, stream: true } as unknown as MessageCreateParamsStreaming,
finalRequestOptions,
)
: await client.messages.create(
{ ...createParams, stream: true } as unknown as MessageCreateParamsStreaming,
finalRequestOptions,
);
return new AnthropicStreamedMessage(stream, true);
} catch (error: unknown) {
throw convertAnthropicError(error);
Expand All @@ -1102,10 +1125,15 @@ export class AnthropicChatProvider implements ChatProvider {

// Non-streaming fallback
try {
const response = await client.messages.create(
{ ...createParams, stream: false } as unknown as MessageCreateParams,
finalRequestOptions,
);
const response = this._betaApi
? await client.beta.messages.create(
{ ...createParams, stream: false } as unknown as MessageCreateParams,
finalRequestOptions,
)
: await client.messages.create(
{ ...createParams, stream: false } as unknown as MessageCreateParams,
finalRequestOptions,
);
return new AnthropicStreamedMessage(response, false);
} catch (error: unknown) {
throw convertAnthropicError(error);
Expand Down
77 changes: 77 additions & 0 deletions packages/kosong/test/anthropic.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,83 @@ const MUL_TOOL: Tool = {
const B64_PNG =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAA' +
'DUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';

/**
* Capture the request body sent to the Anthropic beta Messages API by mocking
* the client (non-stream mode). Also asserts the standard Messages API was
* not called.
*/
async function captureBetaRequestBody(
provider: AnthropicChatProvider,
systemPrompt: string,
tools: Tool[],
history: Message[],
): Promise<Record<string, unknown>> {
let capturedParams: Record<string, unknown> | undefined;
let capturedOptions: Record<string, unknown> | undefined;

(provider as any)._client.beta.messages.create = vi
.fn()
.mockImplementation((params: unknown, options?: unknown) => {
capturedParams = params as Record<string, unknown>;
capturedOptions = options as Record<string, unknown> | undefined;
return Promise.resolve(makeAnthropicResponse());
});
const standardCreate = vi.fn();
(provider as any)._client.messages.create = standardCreate;

const stream = await provider.generate(systemPrompt, tools, history);
for await (const part of stream) {
void part;
}

if (capturedParams === undefined) {
throw new Error('Expected provider.generate() to call beta.messages.create');
}
expect(standardCreate).not.toHaveBeenCalled();

const result = { ...capturedParams };
if (capturedOptions !== undefined && capturedOptions['headers'] !== undefined) {
result['_extra_headers'] = capturedOptions['headers'];
}
return result;
}

describe('betaApi', () => {
const history: Message[] = [
{ role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] },
];

it('routes to client.beta.messages.create with betas in the body and no beta header', async () => {
const provider = new AnthropicChatProvider({
model: 'kimi-for-coding',
apiKey: 'test-key',
defaultMaxTokens: 1024,
stream: false,
betaApi: true,
});
const body = await captureBetaRequestBody(provider, '', [], history);

expect(body['betas']).toEqual(['interleaved-thinking-2025-05-14']);
const headers = body['_extra_headers'] as Record<string, string> | undefined;
expect(headers?.['anthropic-beta']).toBeUndefined();
});

it('keeps beta features in the anthropic-beta header when betaApi is off', async () => {
const provider = new AnthropicChatProvider({
model: 'kimi-for-coding',
apiKey: 'test-key',
defaultMaxTokens: 1024,
stream: false,
});
const body = await captureRequestBody(provider, '', [], history);

expect(body['betas']).toBeUndefined();
const headers = body['_extra_headers'] as Record<string, string> | undefined;
expect(headers?.['anthropic-beta']).toContain('interleaved-thinking-2025-05-14');
});
});

describe('AnthropicChatProvider', () => {
it('does not read ANTHROPIC_API_KEY from process.env inside the adapter', () => {
const previousApiKey = process.env['ANTHROPIC_API_KEY'];
Expand Down
16 changes: 16 additions & 0 deletions packages/oauth/src/managed-kimi-code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ export interface ManagedKimiModelAlias {
capabilities?: string[] | undefined;
displayName?: string | undefined;
protocol?: ManagedKimiCodeProtocol;
betaApi?: boolean;
readonly [key: string]: unknown;
}

Expand Down Expand Up @@ -477,13 +478,28 @@ export function applyManagedKimiCodeConfig(
}
for (const model of options.models) {
const capabilities = capabilitiesForModel(model);
// Kimi's Anthropic-compatible endpoint only accepts adaptive thinking
// (`thinking: { type: 'adaptive' }`); the kosong adapter otherwise infers
// budget-based thinking from the model name, which fails for Kimi model ids.
// Restrict the override to thinking-capable models: the UI treats
// `adaptiveThinking === true` as "supports a thinking toggle", so marking a
// non-thinking model would misrepresent it.
const supportsAdaptiveThinking =
model.protocol === 'anthropic' &&
(capabilities?.includes('thinking') === true ||
capabilities?.includes('always_thinking') === true);
existingModels[managedModelKey(model.id)] = {
provider: KIMI_CODE_PROVIDER_NAME,
model: model.id,
maxContextSize: model.contextLength,
capabilities,
displayName: model.displayName,
protocol: model.protocol,
// Kimi's anthropic-compatible endpoint is served behind the beta Messages
// API (`/v1/messages?beta=true`), so route anthropic-protocol models
// through `client.beta.messages.create`.
...(model.protocol === 'anthropic' ? { betaApi: true } : {}),
...(supportsAdaptiveThinking ? { adaptiveThinking: true } : {}),
};
}

Expand Down
1 change: 1 addition & 0 deletions packages/oauth/test/managed-kimi-code.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,7 @@ describe('managed protocol routing', () => {
expect(config.models?.['kimi-code/kimi-for-coding']).toMatchObject({
provider: KIMI_CODE_PROVIDER_NAME,
protocol: 'anthropic',
betaApi: true,
});
});

Expand Down
Loading