MoonshotAI · sailist · Jun 28, 2026 · Jun 29, 2026 · chatgpt-codex-connector · Jun 28, 2026
diff --git a/.changeset/managed-kimi-code-anthropic-beta-api.md b/.changeset/managed-kimi-code-anthropic-beta-api.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": patch
+---
+
+Route managed Kimi Code models on the Anthropic-compatible protocol through the beta Messages API.
diff --git a/packages/agent-core/src/config/schema.ts b/packages/agent-core/src/config/schema.ts
@@ -50,6 +50,10 @@ export const ModelAliasSchema = z.object({
   // model-name version inference. Needed for custom-named Anthropic endpoints
   // whose model name does not encode a parseable Claude version.
   adaptiveThinking: z.boolean().optional(),
+  // Route the Anthropic transport through the beta Messages API
+  // (`POST /v1/messages?beta=true`) instead of the standard endpoint. Used by
+  // managed Kimi Code models that declare `protocol: 'anthropic'`.
+  betaApi: z.boolean().optional(),
 });
 
 export type ModelAlias = z.infer<typeof ModelAliasSchema>;

diff --git a/packages/agent-core/src/session/provider-manager.ts b/packages/agent-core/src/session/provider-manager.ts
@@ -31,6 +31,8 @@ interface ProviderManagerOptions {
   readonly kimiRequestHeaders?: Record<string, string>;
   readonly resolveOAuthTokenProvider?: OAuthTokenProviderResolver;
   readonly promptCacheKey?: string;
+  // remove before commit
+  readonly adaptiveThinkingOverride?: () => boolean | undefined;
 }
 
 type AuthorizedRequest = <T>(
@@ -110,6 +112,9 @@ export class ProviderManager implements ModelProvider {
       );
     }
 
+    // remove before commit
+    const adaptiveThinkingOverride = this.options.adaptiveThinkingOverride?.();
+    const effectiveAdaptiveThinking = adaptiveThinkingOverride ?? alias.adaptiveThinking;
     const provider = toKosongProviderConfig(
       providerConfig,
       alias.model,
@@ -118,7 +123,8 @@ export class ProviderManager implements ModelProvider {
       alias.maxOutputSize,
       alias.reasoningKey,
       this.options.promptCacheKey,
-      alias.adaptiveThinking,
+      effectiveAdaptiveThinking,
+      alias.betaApi,
     );
 
     return {
@@ -234,6 +240,7 @@ function toKosongProviderConfig(
   reasoningKey: string | undefined,
   promptCacheKey: string | undefined,
   adaptiveThinking: boolean | undefined,
+  betaApi: boolean | undefined,
 ): KosongProviderConfig {
   const effectiveType = modelProtocol === 'anthropic' ? 'anthropic' : provider.type;
   switch (effectiveType) {
@@ -249,6 +256,7 @@ function toKosongProviderConfig(
         apiKey: providerApiKey(provider),
         ...(maxOutputSize !== undefined ? { defaultMaxTokens: maxOutputSize } : {}),
         ...(adaptiveThinking !== undefined ? { adaptiveThinking } : {}),
+        ...(betaApi !== undefined ? { betaApi } : {}),
         // Session affinity: Anthropic's analog of OpenAI `prompt_cache_key` is
         // `metadata.user_id` on the Messages API (cache-affinity / end-user id).
         ...(promptCacheKey !== undefined ? { metadata: { user_id: promptCacheKey } } : {}),

diff --git a/packages/agent-core/test/harness/runtime-provider.test.ts b/packages/agent-core/test/harness/runtime-provider.test.ts
@@ -338,6 +338,35 @@ describe('resolveRuntimeProvider maxOutputSize forwarding', () => {
     });
   });
 
+  it('forwards alias.betaApi to the anthropic provider config', () => {
+    const resolved = resolveRuntimeProvider({
+      config: {
+        ...BASE_CONFIG,
+        providers: {
+          ...BASE_CONFIG.providers,
+          anthropic: { type: 'anthropic', apiKey: 'sk-anthropic' },
+        },
+        models: {
+          ...BASE_CONFIG.models!,
+          'kimi-alias': {
+            provider: 'anthropic',
+            model: 'kimi-for-coding',
+            maxContextSize: 200000,
+            protocol: 'anthropic',
+            betaApi: true,
+          },
+        },
+      },
+      model: 'kimi-alias',
+    });
+
+    expect(resolved.provider).toMatchObject({
+      type: 'anthropic',
+      model: 'kimi-for-coding',
+      betaApi: true,
+    });
+  });
+
   it('omits adaptiveThinking when alias.adaptiveThinking is unset', () => {
     const resolved = resolveRuntimeProvider({
       config: {

diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
@@ -91,6 +91,15 @@ export interface AnthropicOptions {
    * encode a parseable Claude version. Leave undefined to infer from the name.
    */
   adaptiveThinking?: boolean | undefined;
+  /**
+   * Use the Anthropic **beta** Messages API (`client.beta.messages.create`,
+   * `POST /v1/messages?beta=true`) instead of the standard Messages API.
+   *
+   * Beta features (`betaFeatures`) are then sent via the request `betas`
+   * field rather than the `anthropic-beta` header. Defaults to false, which
+   * keeps the standard endpoint + header behavior.
+   */
+  betaApi?: boolean | undefined;
   clientFactory?: (auth: ProviderRequestAuth) => Anthropic;
 }
 
@@ -908,13 +917,15 @@ export class AnthropicChatProvider implements ChatProvider {
   private _defaultHeaders: Record<string, string | null> | undefined;
   private _clientFactory: ((auth: ProviderRequestAuth) => Anthropic) | undefined;
   private _adaptiveThinking: boolean | undefined;
+  private _betaApi: boolean;
   private _explicitMaxTokens: boolean;
 
   constructor(options: AnthropicOptions) {
     this._model = options.model;
     this._stream = options.stream ?? true;
     this._metadata = options.metadata;
     this._adaptiveThinking = options.adaptiveThinking;
+    this._betaApi = options.betaApi ?? false;
     this._apiKey =
       options.apiKey === undefined || options.apiKey.length === 0 ? undefined : options.apiKey;
     this._baseUrl = options.baseUrl;
@@ -1039,10 +1050,13 @@ export class AnthropicChatProvider implements ChatProvider {
       kwargs['output_config'] = this._generationKwargs.output_config;
     }
 
-    // Build beta headers
+    // Build the beta feature list. On the standard Messages API these travel
+    // via the `anthropic-beta` header; on the beta Messages API (`betaApi`) the
+    // SDK reads them from the request `betas` field and sets the header itself,
+    // so we must not also set the header (that would duplicate it).
     const betas = this._generationKwargs.betaFeatures ?? [];
     const extraHeaders: Record<string, string> = {};
-    if (betas.length > 0) {
+    if (!this._betaApi && betas.length > 0) {
       extraHeaders['anthropic-beta'] = betas.join(',');
     }
 
@@ -1074,6 +1088,10 @@ export class AnthropicChatProvider implements ChatProvider {
       createParams['metadata'] = this._metadata;
     }
 
+    if (this._betaApi && betas.length > 0) {
+      createParams['betas'] = betas;
+    }
+
     const requestOptions: Record<string, unknown> = {};
     const headers = mergeRequestHeaders(extraHeaders, options?.auth?.headers);
     if (headers !== undefined) {
@@ -1090,10 +1108,15 @@ export class AnthropicChatProvider implements ChatProvider {
       // The helper reparses accumulated input_json_delta buffers on every chunk,
       // which becomes synchronous O(n^2) work for large streamed tool arguments.
       try {
-        const stream = await client.messages.create(
-          { ...createParams, stream: true } as unknown as MessageCreateParamsStreaming,
-          finalRequestOptions,
-        );
+        const stream = this._betaApi
+          ? await client.beta.messages.create(
+              { ...createParams, stream: true } as unknown as MessageCreateParamsStreaming,
+              finalRequestOptions,
+            )
+          : await client.messages.create(
+              { ...createParams, stream: true } as unknown as MessageCreateParamsStreaming,
+              finalRequestOptions,
+            );
         return new AnthropicStreamedMessage(stream, true);
       } catch (error: unknown) {
         throw convertAnthropicError(error);
@@ -1102,10 +1125,15 @@ export class AnthropicChatProvider implements ChatProvider {
 
     // Non-streaming fallback
     try {
-      const response = await client.messages.create(
-        { ...createParams, stream: false } as unknown as MessageCreateParams,
-        finalRequestOptions,
-      );
+      const response = this._betaApi
+        ? await client.beta.messages.create(
+            { ...createParams, stream: false } as unknown as MessageCreateParams,
+            finalRequestOptions,
+          )
+        : await client.messages.create(
+            { ...createParams, stream: false } as unknown as MessageCreateParams,
+            finalRequestOptions,
+          );
       return new AnthropicStreamedMessage(response, false);
     } catch (error: unknown) {
       throw convertAnthropicError(error);

diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts
@@ -141,6 +141,83 @@ const MUL_TOOL: Tool = {
 const B64_PNG =
   'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAA' +
   'DUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+
+/**
+ * Capture the request body sent to the Anthropic beta Messages API by mocking
+ * the client (non-stream mode). Also asserts the standard Messages API was
+ * not called.
+ */
+async function captureBetaRequestBody(
+  provider: AnthropicChatProvider,
+  systemPrompt: string,
+  tools: Tool[],
+  history: Message[],
+): Promise<Record<string, unknown>> {
+  let capturedParams: Record<string, unknown> | undefined;
+  let capturedOptions: Record<string, unknown> | undefined;
+
+  (provider as any)._client.beta.messages.create = vi
+    .fn()
+    .mockImplementation((params: unknown, options?: unknown) => {
+      capturedParams = params as Record<string, unknown>;
+      capturedOptions = options as Record<string, unknown> | undefined;
+      return Promise.resolve(makeAnthropicResponse());
+    });
+  const standardCreate = vi.fn();
+  (provider as any)._client.messages.create = standardCreate;
+
+  const stream = await provider.generate(systemPrompt, tools, history);
+  for await (const part of stream) {
+    void part;
+  }
+
+  if (capturedParams === undefined) {
+    throw new Error('Expected provider.generate() to call beta.messages.create');
+  }
+  expect(standardCreate).not.toHaveBeenCalled();
+
+  const result = { ...capturedParams };
+  if (capturedOptions !== undefined && capturedOptions['headers'] !== undefined) {
+    result['_extra_headers'] = capturedOptions['headers'];
+  }
+  return result;
+}
+
+describe('betaApi', () => {
+  const history: Message[] = [
+    { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] },
+  ];
+
+  it('routes to client.beta.messages.create with betas in the body and no beta header', async () => {
+    const provider = new AnthropicChatProvider({
+      model: 'kimi-for-coding',
+      apiKey: 'test-key',
+      defaultMaxTokens: 1024,
+      stream: false,
+      betaApi: true,
+    });
+    const body = await captureBetaRequestBody(provider, '', [], history);
+
+    expect(body['betas']).toEqual(['interleaved-thinking-2025-05-14']);
+    const headers = body['_extra_headers'] as Record<string, string> | undefined;
+    expect(headers?.['anthropic-beta']).toBeUndefined();
+  });
+
+  it('keeps beta features in the anthropic-beta header when betaApi is off', async () => {
+    const provider = new AnthropicChatProvider({
+      model: 'kimi-for-coding',
+      apiKey: 'test-key',
+      defaultMaxTokens: 1024,
+      stream: false,
+    });
+    const body = await captureRequestBody(provider, '', [], history);
+
+    expect(body['betas']).toBeUndefined();
+    const headers = body['_extra_headers'] as Record<string, string> | undefined;
+    expect(headers?.['anthropic-beta']).toContain('interleaved-thinking-2025-05-14');
+  });
+});
+
 describe('AnthropicChatProvider', () => {
   it('does not read ANTHROPIC_API_KEY from process.env inside the adapter', () => {
     const previousApiKey = process.env['ANTHROPIC_API_KEY'];

diff --git a/packages/oauth/src/managed-kimi-code.ts b/packages/oauth/src/managed-kimi-code.ts
@@ -127,6 +127,7 @@ export interface ManagedKimiModelAlias {
   capabilities?: string[] | undefined;
   displayName?: string | undefined;
   protocol?: ManagedKimiCodeProtocol;
+  betaApi?: boolean;
   readonly [key: string]: unknown;
 }
 
@@ -477,13 +478,28 @@ export function applyManagedKimiCodeConfig(
   }
   for (const model of options.models) {
     const capabilities = capabilitiesForModel(model);
+    // Kimi's Anthropic-compatible endpoint only accepts adaptive thinking
+    // (`thinking: { type: 'adaptive' }`); the kosong adapter otherwise infers
+    // budget-based thinking from the model name, which fails for Kimi model ids.
+    // Restrict the override to thinking-capable models: the UI treats
+    // `adaptiveThinking === true` as "supports a thinking toggle", so marking a
+    // non-thinking model would misrepresent it.
+    const supportsAdaptiveThinking =
+      model.protocol === 'anthropic' &&
+      (capabilities?.includes('thinking') === true ||
+        capabilities?.includes('always_thinking') === true);
     existingModels[managedModelKey(model.id)] = {
       provider: KIMI_CODE_PROVIDER_NAME,
       model: model.id,
       maxContextSize: model.contextLength,
       capabilities,
       displayName: model.displayName,
       protocol: model.protocol,
+      // Kimi's anthropic-compatible endpoint is served behind the beta Messages
+      // API (`/v1/messages?beta=true`), so route anthropic-protocol models
+      // through `client.beta.messages.create`.
+      ...(model.protocol === 'anthropic' ? { betaApi: true } : {}),
+      ...(supportsAdaptiveThinking ? { adaptiveThinking: true } : {}),
     };
   }
 

diff --git a/packages/oauth/test/managed-kimi-code.test.ts b/packages/oauth/test/managed-kimi-code.test.ts
@@ -1121,6 +1121,7 @@ describe('managed protocol routing', () => {
     expect(config.models?.['kimi-code/kimi-for-coding']).toMatchObject({
       provider: KIMI_CODE_PROVIDER_NAME,
       protocol: 'anthropic',
+      betaApi: true,
     });
   });