From 2b86e1f41d65c8d18f7e49b65202d5b15014c106 Mon Sep 17 00:00:00 2001 From: Arbit Chen Date: Sun, 15 Mar 2026 06:36:58 -0700 Subject: [PATCH] feat: add in-memory route cache to Router --- src/tokenwise/router.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/tokenwise/router.py b/src/tokenwise/router.py index e7d4a8b..3790917 100644 --- a/src/tokenwise/router.py +++ b/src/tokenwise/router.py @@ -45,6 +45,9 @@ "complex": (2000, 1000), } +# Simple in-memory route cache: key = (query, strategy, budget) -> model_id +_route_cache: dict[tuple, str] = {} + def _detect_capabilities(query: str) -> list[str]: """Detect likely required capabilities from query text.""" @@ -127,6 +130,15 @@ def route( if isinstance(strategy, str): strategy = RoutingStrategy(strategy) + # Check cache first + cache_key = (query, str(strategy), budget) + if cache_key in _route_cache: + cached_id = _route_cache[cache_key] + models = self.registry.find_models() + for m in models: + if m.id == cached_id: + return m + # ── Stage 1: Scenario detection ────────────────────────────── primary_cap, complexity = self._detect_scenario(query, required_capability) @@ -156,11 +168,15 @@ def route( # Apply strategy preference if strategy == RoutingStrategy.CHEAPEST: - return self._route_cheapest(candidates) + result = self._route_cheapest(candidates) elif strategy == RoutingStrategy.BEST_QUALITY: - return self._route_best_quality(candidates) + result = self._route_best_quality(candidates) else: # balanced - return self._route_balanced(candidates, complexity) + result = self._route_balanced(candidates, complexity) + + # Store in cache + _route_cache[cache_key] = result.id + return result def route_with_trace( self,