Skip to content

Commit 79ea09b

Browse files
committed
ensure cache keys change when profile content changes. add e2e tests to ensure cache is being used on actual queries
1 parent 6b80f54 commit 79ea09b

19 files changed

+935
-698
lines changed

specs/llm_cache_design.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ from pydantic import BaseModel, Field, field_validator
219219

220220
class CacheBackend(str, Enum):
221221
"""Cache backend implementations."""
222-
SQLITE = "sqlite"
222+
LOCAL = "local"
223223
MEMORY = "memory"
224224
DISABLED = "disabled"
225225

@@ -228,7 +228,7 @@ class CacheConfig(BaseModel):
228228
229229
Attributes:
230230
enabled: Whether caching is enabled (default: True)
231-
backend: Cache backend to use (default: SQLITE)
231+
backend: Cache backend to use (default: LOCAL)
232232
ttl: Time-to-live duration string (default: "1h")
233233
Examples: "30m", "2h", "7d"
234234
max_size_mb: Maximum cache size before LRU eviction (default: 1000)
@@ -261,7 +261,7 @@ class CacheConfig(BaseModel):
261261
"""
262262

263263
enabled: bool = Field(default=True)
264-
backend: CacheBackend = Field(default=CacheBackend.SQLITE)
264+
backend: CacheBackend = Field(default=CacheBackend.LOCAL)
265265
ttl: str = Field(default="1h")
266266
max_size_mb: int = Field(default=1000, gt=0, le=100000)
267267
namespace: str = Field(default="default")

src/fenic/_backends/local/session_state.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _initialize_cache(self, config: ResolvedSessionConfig, base_path: Path):
8888
logger.info(
8989
f"Initializing LLM cache at {cache_db_path} with TTL {cache_config.ttl}"
9090
)
91-
if cache_config.backend == CacheBackend.SQLITE:
91+
if cache_config.backend == CacheBackend.LOCAL:
9292
return SQLiteLLMCache(
9393
db_path=str(cache_db_path),
9494
ttl_seconds=cache_config.ttl_seconds,

src/fenic/_inference/anthropic/anthropic_batch_chat_completions_client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
if TYPE_CHECKING:
66
from fenic._inference.cache.protocol import LLMResponseCache
77

8+
import json
9+
from dataclasses import asdict
10+
811
import anthropic
912
from anthropic import (
1013
AnthropicError,
@@ -120,6 +123,16 @@ def __init__(
120123
default_profile_name=default_profile_name,
121124
)
122125

126+
def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
127+
"""Get hash of the resolved profile configuration."""
128+
try:
129+
profile = self._profile_manager.get_profile_by_name(profile_name)
130+
profile_data = asdict(profile)
131+
serialized = json.dumps(profile_data, sort_keys=True, default=str)
132+
return str(hash(serialized))
133+
except Exception:
134+
return None
135+
123136
async def make_single_request(
124137
self, request: FenicCompletionsRequest
125138
) -> Union[None, FenicCompletionsResponse, TransientException, FatalException]:
Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
"""LLM response caching for Fenic."""
22

3-
from fenic._inference.cache.key_generator import CacheKeyGenerator
43
from fenic._inference.cache.protocol import CachedResponse, CacheStats, LLMResponseCache
54
from fenic._inference.cache.sqlite_cache import SQLiteLLMCache
65

76
__all__ = [
87
"CachedResponse",
98
"CacheStats",
109
"LLMResponseCache",
11-
"CacheKeyGenerator",
1210
"SQLiteLLMCache",
1311
]

src/fenic/_inference/cache/key_generator.py

Lines changed: 0 additions & 89 deletions
This file was deleted.

src/fenic/_inference/cache/protocol.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,14 @@
22

33
from dataclasses import dataclass
44
from datetime import datetime
5-
from typing import Dict, List, Optional, Protocol
5+
from typing import Dict, List, Optional, Protocol, Union
66

7-
from fenic._inference.types import FenicCompletionsResponse, ResponseUsage
7+
from fenic._inference.types import (
8+
FenicCompletionsRequest,
9+
FenicCompletionsResponse,
10+
FenicEmbeddingsRequest,
11+
ResponseUsage,
12+
)
813

914

1015
@dataclass
@@ -143,9 +148,26 @@ def set(
143148
```
144149
"""
145150

146-
def get(self, cache_key: str) -> Optional[CachedResponse]:
147-
"""Retrieve cached response.
151+
def compute_key(
152+
self,
153+
request: Union[FenicCompletionsRequest, FenicEmbeddingsRequest],
154+
model: str,
155+
profile_hash: Optional[str] = None,
156+
) -> str:
157+
"""Compute a deterministic cache key for a request.
158+
159+
Args:
160+
request: The request object (e.g. FenicCompletionsRequest or FenicEmbeddingsRequest).
161+
model: The model name.
162+
profile_hash: Optional hash of the resolved model profile configuration.
148163
164+
Returns:
165+
A unique cache key string.
166+
"""
167+
...
168+
169+
def get(self, cache_key: str) -> Optional[CachedResponse]:
170+
"""Retrieve a cached response.
149171
Args:
150172
cache_key: Unique key for the cached response.
151173

src/fenic/_inference/cache/sqlite_cache.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
"""SQLite-backed LLM response cache implementation."""
22

3+
import hashlib
34
import json
45
import logging
56
import queue
67
import sqlite3
78
import threading
89
from datetime import datetime, timedelta
910
from pathlib import Path
10-
from typing import Dict, List, Optional
11+
from typing import Dict, List, Optional, Union
1112

1213
from fenic._inference.cache.protocol import CachedResponse, CacheStats, LLMResponseCache
13-
from fenic._inference.types import FenicCompletionsResponse
14+
from fenic._inference.types import (
15+
FenicCompletionsRequest,
16+
FenicCompletionsResponse,
17+
FenicEmbeddingsRequest,
18+
)
1419

1520
logger = logging.getLogger(__name__)
1621

@@ -132,6 +137,50 @@ def __init__(
132137
f"(ttl={ttl_seconds}s, max_size={max_size_mb}MB, namespace={namespace}, pool_size={max_connections})"
133138
)
134139

140+
def compute_key(
141+
self,
142+
request: Union[FenicCompletionsRequest, FenicEmbeddingsRequest],
143+
model: str,
144+
profile_hash: Optional[str] = None,
145+
) -> str:
146+
"""Compute SHA-256 hash of request parameters.
147+
148+
Args:
149+
request: The completion or embedding request to hash. Currently only FenicCompletionsRequest
150+
is supported. FenicEmbeddingsRequest support will be added in a future PR.
151+
model: The model name.
152+
profile_hash: Optional hash of the resolved model profile configuration.
153+
154+
Returns:
155+
64-character hexadecimal SHA-256 hash string.
156+
"""
157+
if isinstance(request, FenicCompletionsRequest):
158+
# Build key data with all relevant parameters
159+
key_data = {
160+
"model": model,
161+
"messages": request.messages.encode().hex(),
162+
"max_tokens": request.max_completion_tokens,
163+
"temperature": request.temperature,
164+
"model_profile": request.model_profile,
165+
"profile_hash": profile_hash,
166+
"top_logprobs": request.top_logprobs,
167+
}
168+
169+
# Include structured output schema if present
170+
if request.structured_output:
171+
key_data["structured_output"] = request.structured_output.schema_fingerprint
172+
173+
elif isinstance(request, FenicEmbeddingsRequest):
174+
raise NotImplementedError("Embedding requests are not yet supported for caching.")
175+
else:
176+
raise ValueError(f"Unsupported request type for caching: {type(request)}")
177+
178+
# Serialize to JSON with deterministic ordering
179+
serialized = json.dumps(key_data, sort_keys=True).encode("utf-8")
180+
181+
# Compute SHA-256 hash
182+
return hashlib.sha256(serialized).hexdigest()
183+
135184
def _create_connection(self) -> sqlite3.Connection:
136185
"""Create a new database connection with proper settings.
137186

src/fenic/_inference/cohere/cohere_batch_embeddings_client.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import hashlib
2+
import json
23
import logging
4+
from dataclasses import asdict
35
from typing import List, Optional, Union
46

57
import cohere
@@ -72,6 +74,16 @@ def __init__(
7274
default_profile_name=default_profile_name,
7375
)
7476

77+
def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
78+
"""Get hash of the resolved profile configuration."""
79+
try:
80+
profile = self._profile_manager.get_profile_by_name(profile_name)
81+
profile_data = asdict(profile)
82+
serialized = json.dumps(profile_data, sort_keys=True, default=str)
83+
return str(hash(serialized))
84+
except Exception:
85+
return None
86+
7587
async def make_single_request(
7688
self, request: FenicEmbeddingsRequest
7789
) -> Union[None, List[float], TransientException, FatalException]:

src/fenic/_inference/google/gemini_batch_embeddings_client.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import hashlib
2+
import json
3+
from dataclasses import asdict
24
from typing import List, Optional, Union
35

46
from google.genai.errors import ClientError, ServerError
@@ -59,6 +61,16 @@ def __init__(
5961
default_profile_name=default_profile_name,
6062
)
6163

64+
def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
65+
"""Get hash of the resolved profile configuration."""
66+
try:
67+
profile = self._profile_manager.get_profile_by_name(profile_name)
68+
profile_data = asdict(profile)
69+
serialized = json.dumps(profile_data, sort_keys=True, default=str)
70+
return str(hash(serialized))
71+
except Exception:
72+
return None
73+
6274
async def make_single_request(
6375
self, request: FenicEmbeddingsRequest
6476
) -> Union[None, List[float], TransientException, FatalException]:

src/fenic/_inference/google/gemini_native_chat_completions_client.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
if TYPE_CHECKING:
66
from fenic._inference.cache.protocol import LLMResponseCache
77

8+
import json
9+
from dataclasses import asdict
10+
811
from google.genai.errors import ClientError, ServerError
912
from google.genai.types import (
1013
FinishReason,
@@ -114,6 +117,19 @@ def __init__(
114117
default_profile_name=default_profile_name,
115118
)
116119

120+
def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
121+
"""Get hash of the resolved profile configuration."""
122+
try:
123+
profile = self._profile_manager.get_profile_by_name(profile_name)
124+
# Serialize profile to JSON string and hash it
125+
# Using default=str to handle any non-serializable types if present
126+
profile_data = asdict(profile)
127+
serialized = json.dumps(profile_data, sort_keys=True, default=str)
128+
return str(hash(serialized))
129+
except Exception as e:
130+
logger.warning(f"Failed to hash profile {profile_name}: {e}")
131+
return None
132+
117133
def reset_metrics(self):
118134
"""Reset metrics to initial state."""
119135
self._metrics = LMMetrics()

0 commit comments

Comments
 (0)