From 2b616ce904679a4e358ccad485f12b9a23d5b341 Mon Sep 17 00:00:00 2001
From: sumleo <29517764+sumleo@users.noreply.github.com>
Date: Wed, 17 Jun 2026 08:54:51 +0800
Subject: [PATCH] fix: gate prompt caching by provider, not model name only

---
 frontend/app/api/chat/route.ts | 29 +++++++++---------
 frontend/lib/ai-providers.ts   | 54 +++++++++++++++++++++++++++++-----
 2 files changed, 62 insertions(+), 21 deletions(-)
diff --git a/frontend/app/api/chat/route.ts b/frontend/app/api/chat/route.ts
index 101d895..144d79b 100644
--- a/frontend/app/api/chat/route.ts
+++ b/frontend/app/api/chat/route.ts
@@ -8,7 +8,11 @@ import {
     streamText,
 } from "ai"
 import { z } from "zod"
-import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
+import {
+    getAIModel,
+    getCacheBreakpointProviderOptions,
+    supportsPromptCaching,
+} from "@/lib/ai-providers"
 import { findCachedResponse } from "@/lib/cached-responses"
 import {
     getTelemetryConfig,
@@ -230,13 +234,16 @@ async function handleChatRequest(req: Request): Promise<Response> {
     }
 
     // Get AI model with optional client overrides
-    const { model, providerOptions, headers, modelId } =
+    const { model, providerOptions, headers, modelId, provider } =
         getAIModel(clientOverrides)
 
-    // Check if model supports prompt caching
-    const shouldCache = supportsPromptCaching(modelId)
+    // Check if model supports prompt caching for the resolved provider
+    const shouldCache = supportsPromptCaching(modelId, provider)
+    // The cache marker shape differs per provider's AI SDK adapter
+    const cacheBreakpointProviderOptions =
+        getCacheBreakpointProviderOptions(provider)
     console.log(
-        `[Prompt Caching] ${shouldCache ? "ENABLED" : "DISABLED"} for model: ${modelId}`,
+        `[Prompt Caching] ${shouldCache ? "ENABLED" : "DISABLED"} for model: ${modelId} (provider: ${provider})`,
     )
 
     // Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
@@ -313,9 +320,7 @@ ${lastMessageText}
             if (enhancedMessages[i].role === "assistant") {
                 enhancedMessages[i] = {
                     ...enhancedMessages[i],
-                    providerOptions: {
-                        bedrock: { cachePoint: { type: "default" } },
-                    },
+                    providerOptions: cacheBreakpointProviderOptions,
                 }
                 break // Only cache the last assistant message
             }
@@ -333,9 +338,7 @@ ${lastMessageText}
             role: "system" as const,
             content: systemMessage,
             ...(shouldCache && {
-                providerOptions: {
-                    bedrock: { cachePoint: { type: "default" } },
-                },
+                providerOptions: cacheBreakpointProviderOptions,
             }),
         },
         // Cache breakpoint 2: Previous and Current diagram XML context
@@ -343,9 +346,7 @@ ${lastMessageText}
             role: "system" as const,
             content: `${previousXml ? `Previous diagram XML (before user's last message):\n"""xml\n${previousXml}\n"""\n\n` : ""}Current diagram XML (AUTHORITATIVE - the source of truth):\n"""xml\n${xml || ""}\n"""\n\nIMPORTANT: The "Current diagram XML" is the SINGLE SOURCE OF TRUTH for what's on the canvas right now. The user can manually add, delete, or modify shapes directly in draw.io. Always count and describe elements based on the CURRENT XML, not on what you previously generated. If both previous and current XML are shown, compare them to understand what the user changed. When using edit_diagram, COPY search patterns exactly from the CURRENT XML - attribute order matters!`,
             ...(shouldCache && {
-                providerOptions: {
-                    bedrock: { cachePoint: { type: "default" } },
-                },
+                providerOptions: cacheBreakpointProviderOptions,
             }),
         },
     ]
diff --git a/frontend/lib/ai-providers.ts b/frontend/lib/ai-providers.ts
index 875888f..2fe6734 100644
--- a/frontend/lib/ai-providers.ts
+++ b/frontend/lib/ai-providers.ts
@@ -24,6 +24,7 @@ interface ModelConfig {
     providerOptions?: any
     headers?: Record<string, string>
     modelId: string
+    provider: ProviderName
 }
 
 export interface ClientOverrides {
@@ -673,19 +674,58 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
         providerOptions = customProviderOptions
     }
 
-    return { model, providerOptions, headers, modelId }
+    return { model, providerOptions, headers, modelId, provider }
 }
 
+// Providers whose AI SDK adapters understand Claude prompt-caching markers.
+const PROMPT_CACHING_PROVIDERS: ProviderName[] = [
+    "bedrock",
+    "anthropic",
+    "openrouter",
+]
+
 /**
- * Check if a model supports prompt caching.
- * Currently only Claude models on Bedrock support prompt caching.
+ * Check if prompt caching can be applied for the given model and provider.
+ *
+ * Prompt caching requires both a Claude model and a provider whose AI SDK
+ * adapter understands a Claude cache marker. When the provider is omitted the
+ * check falls back to the model name only (backwards compatible).
  */
-export function supportsPromptCaching(modelId: string): boolean {
-    // Bedrock prompt caching is supported for Claude models
-    return (
+export function supportsPromptCaching(
+    modelId: string,
+    provider?: ProviderName,
+): boolean {
+    const isClaudeModel =
         modelId.includes("claude") ||
         modelId.includes("anthropic") ||
         modelId.startsWith("us.anthropic") ||
         modelId.startsWith("eu.anthropic")
-    )
+
+    if (!isClaudeModel) {
+        return false
+    }
+
+    // Without a provider we can only gate on the model name (legacy behaviour).
+    if (!provider) {
+        return true
+    }
+
+    return PROMPT_CACHING_PROVIDERS.includes(provider)
+}
+
+/**
+ * Build the provider-specific marker that flags a message for prompt caching.
+ *
+ * Each AI SDK adapter expects the cache marker under its own namespace:
+ * Bedrock uses `cachePoint`, while the Anthropic adapter (also used for
+ * OpenRouter Claude models) expects `cacheControl`.
+ */
+export function getCacheBreakpointProviderOptions(
+    provider: ProviderName,
+): Record<string, unknown> {
+    if (provider === "bedrock") {
+        return { bedrock: { cachePoint: { type: "default" } } }
+    }
+
+    return { anthropic: { cacheControl: { type: "ephemeral" } } }
 }