Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions frontend/app/api/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ import {
streamText,
} from "ai"
import { z } from "zod"
import { getAIModel, supportsPromptCaching } from "@/lib/ai-providers"
import {
getAIModel,
getCacheBreakpointProviderOptions,
supportsPromptCaching,
} from "@/lib/ai-providers"
import { findCachedResponse } from "@/lib/cached-responses"
import {
getTelemetryConfig,
Expand Down Expand Up @@ -230,13 +234,16 @@ async function handleChatRequest(req: Request): Promise<Response> {
}

// Get AI model with optional client overrides
const { model, providerOptions, headers, modelId } =
const { model, providerOptions, headers, modelId, provider } =
getAIModel(clientOverrides)

// Check if model supports prompt caching
const shouldCache = supportsPromptCaching(modelId)
// Check if model supports prompt caching for the resolved provider
const shouldCache = supportsPromptCaching(modelId, provider)
// The cache marker shape differs per provider's AI SDK adapter
const cacheBreakpointProviderOptions =
getCacheBreakpointProviderOptions(provider)
console.log(
`[Prompt Caching] ${shouldCache ? "ENABLED" : "DISABLED"} for model: ${modelId}`,
`[Prompt Caching] ${shouldCache ? "ENABLED" : "DISABLED"} for model: ${modelId} (provider: ${provider})`,
)

// Get the appropriate system prompt based on model (extended for Opus/Haiku 4.5)
Expand Down Expand Up @@ -313,9 +320,7 @@ ${lastMessageText}
if (enhancedMessages[i].role === "assistant") {
enhancedMessages[i] = {
...enhancedMessages[i],
providerOptions: {
bedrock: { cachePoint: { type: "default" } },
},
providerOptions: cacheBreakpointProviderOptions,
}
break // Only cache the last assistant message
}
Expand All @@ -333,19 +338,15 @@ ${lastMessageText}
role: "system" as const,
content: systemMessage,
...(shouldCache && {
providerOptions: {
bedrock: { cachePoint: { type: "default" } },
},
providerOptions: cacheBreakpointProviderOptions,
}),
},
// Cache breakpoint 2: Previous and Current diagram XML context
{
role: "system" as const,
content: `${previousXml ? `Previous diagram XML (before user's last message):\n"""xml\n${previousXml}\n"""\n\n` : ""}Current diagram XML (AUTHORITATIVE - the source of truth):\n"""xml\n${xml || ""}\n"""\n\nIMPORTANT: The "Current diagram XML" is the SINGLE SOURCE OF TRUTH for what's on the canvas right now. The user can manually add, delete, or modify shapes directly in draw.io. Always count and describe elements based on the CURRENT XML, not on what you previously generated. If both previous and current XML are shown, compare them to understand what the user changed. When using edit_diagram, COPY search patterns exactly from the CURRENT XML - attribute order matters!`,
...(shouldCache && {
providerOptions: {
bedrock: { cachePoint: { type: "default" } },
},
providerOptions: cacheBreakpointProviderOptions,
}),
},
]
Expand Down
54 changes: 47 additions & 7 deletions frontend/lib/ai-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ interface ModelConfig {
providerOptions?: any
headers?: Record<string, string>
modelId: string
provider: ProviderName
}

export interface ClientOverrides {
Expand Down Expand Up @@ -673,19 +674,58 @@ export function getAIModel(overrides?: ClientOverrides): ModelConfig {
providerOptions = customProviderOptions
}

return { model, providerOptions, headers, modelId }
return { model, providerOptions, headers, modelId, provider }
}

// Providers whose AI SDK adapters understand Claude prompt-caching markers.
const PROMPT_CACHING_PROVIDERS: ProviderName[] = [
"bedrock",
"anthropic",
"openrouter",
]

/**
* Check if a model supports prompt caching.
* Currently only Claude models on Bedrock support prompt caching.
* Check if prompt caching can be applied for the given model and provider.
*
* Prompt caching requires both a Claude model and a provider whose AI SDK
* adapter understands a Claude cache marker. When the provider is omitted the
* check falls back to the model name only (backwards compatible).
*/
export function supportsPromptCaching(modelId: string): boolean {
// Bedrock prompt caching is supported for Claude models
return (
export function supportsPromptCaching(
modelId: string,
provider?: ProviderName,
): boolean {
const isClaudeModel =
modelId.includes("claude") ||
modelId.includes("anthropic") ||
modelId.startsWith("us.anthropic") ||
modelId.startsWith("eu.anthropic")
)

if (!isClaudeModel) {
return false
}

// Without a provider we can only gate on the model name (legacy behaviour).
if (!provider) {
return true
}

return PROMPT_CACHING_PROVIDERS.includes(provider)
}

/**
* Build the provider-specific marker that flags a message for prompt caching.
*
* Each AI SDK adapter expects the cache marker under its own namespace:
* Bedrock uses `cachePoint`, while the Anthropic adapter (also used for
* OpenRouter Claude models) expects `cacheControl`.
*/
export function getCacheBreakpointProviderOptions(
provider: ProviderName,
): Record<string, unknown> {
if (provider === "bedrock") {
return { bedrock: { cachePoint: { type: "default" } } }
}

return { anthropic: { cacheControl: { type: "ephemeral" } } }
}