Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions opencane/providers/litellm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,31 +115,42 @@ def _apply_cache_control(
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None,
) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
"""Return copies of messages/tools with ephemeral cache_control hints."""
new_messages: list[dict[str, Any]] = []
for msg in messages:
if msg.get("role") != "system":
new_messages.append(msg)
continue
"""Return copies of messages and tools with cache_control injected.

Two breakpoints are placed:
1. System message — caches the static system prompt.
2. Second-to-last message — caches conversation history prefix.
"""
cache_marker = {"type": "ephemeral"}
new_messages = list(messages)

def _mark(msg: dict[str, Any]) -> dict[str, Any]:
content = msg.get("content")
if isinstance(content, str):
new_content: Any = [{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}]
elif isinstance(content, list) and content:
return {
**msg,
"content": [{"type": "text", "text": content, "cache_control": cache_marker}],
}
if isinstance(content, list) and content:
new_content = list(content)
last = new_content[-1]
if isinstance(last, dict):
new_content[-1] = {**last, "cache_control": {"type": "ephemeral"}}
else:
new_messages.append(msg)
continue
new_content[-1] = {**last, "cache_control": cache_marker}
return {**msg, "content": new_content}
return msg

# Breakpoint 1: system message.
if new_messages and new_messages[0].get("role") == "system":
new_messages[0] = _mark(new_messages[0])

new_messages.append({**msg, "content": new_content})
# Breakpoint 2: second-to-last message (conversation history prefix).
if len(new_messages) >= 3:
new_messages[-2] = _mark(new_messages[-2])

new_tools = tools
if tools:
new_tools = list(tools)
new_tools[-1] = {**new_tools[-1], "cache_control": {"type": "ephemeral"}}
new_tools[-1] = {**new_tools[-1], "cache_control": cache_marker}

return new_messages, new_tools

Expand Down
10 changes: 8 additions & 2 deletions tests/test_litellm_prompt_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ def test_supports_cache_control_respects_gateway_capability_flag() -> None:
assert not aihubmix_provider._supports_cache_control("anthropic/claude-sonnet-4-5")


def test_apply_cache_control_marks_system_and_last_tool_without_mutating_inputs() -> None:
def test_apply_cache_control_marks_system_history_breakpoint_and_last_tool() -> None:
provider = LiteLLMProvider(default_model="anthropic/claude-sonnet-4-5")
messages = [
{"role": "system", "content": "system prompt"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "answer"},
]
tools = [
{"type": "function", "function": {"name": "tool_a"}},
Expand All @@ -51,11 +52,15 @@ def test_apply_cache_control_marks_system_and_last_tool_without_mutating_inputs(

assert isinstance(new_messages[0]["content"], list)
assert new_messages[0]["content"][0]["cache_control"]["type"] == "ephemeral"
assert isinstance(new_messages[1]["content"], list)
assert new_messages[1]["content"][0]["cache_control"]["type"] == "ephemeral"
assert new_messages[2]["content"] == "answer"
assert new_tools is not None
assert new_tools[-1]["cache_control"]["type"] == "ephemeral"

assert "cache_control" not in tools[-1]
assert messages[0]["content"] == "system prompt"
assert messages[1]["content"] == "hello"


@pytest.mark.asyncio
Expand All @@ -74,11 +79,12 @@ async def _fake_acompletion(**kwargs: Any) -> Any:
messages=[
{"role": "system", "content": "system prompt"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "answer"},
],
tools=[{"type": "function", "function": {"name": "tool_a"}}],
)

kwargs = captured["kwargs"]
assert kwargs["messages"][0]["content"][0]["cache_control"]["type"] == "ephemeral"
assert kwargs["messages"][1]["content"][0]["cache_control"]["type"] == "ephemeral"
assert kwargs["tools"][-1]["cache_control"]["type"] == "ephemeral"

Loading