typedef-ai
diff --git a/‎specs/llm_cache_design.md‎
Lines changed: 3 additions & 3 deletions b/‎specs/llm_cache_design.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/fenic/_backends/local/session_state.py‎
Lines changed: 1 addition & 1 deletion b/‎src/fenic/_backends/local/session_state.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/fenic/_inference/anthropic/anthropic_batch_chat_completions_client.py‎
Lines changed: 13 additions & 0 deletions b/‎src/fenic/_inference/anthropic/anthropic_batch_chat_completions_client.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/fenic/_inference/cache/__init__.py‎
Lines changed: 0 additions & 2 deletions b/‎src/fenic/_inference/cache/__init__.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎src/fenic/_inference/cache/key_generator.py‎
Lines changed: 0 additions & 89 deletions b/‎src/fenic/_inference/cache/key_generator.py‎
Lines changed: 0 additions & 89 deletions
diff --git a/‎src/fenic/_inference/cache/protocol.py‎
Lines changed: 26 additions & 4 deletions b/‎src/fenic/_inference/cache/protocol.py‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎src/fenic/_inference/cache/sqlite_cache.py‎
Lines changed: 51 additions & 2 deletions b/‎src/fenic/_inference/cache/sqlite_cache.py‎
Lines changed: 51 additions & 2 deletions
diff --git a/‎src/fenic/_inference/cohere/cohere_batch_embeddings_client.py‎
Lines changed: 12 additions & 0 deletions b/‎src/fenic/_inference/cohere/cohere_batch_embeddings_client.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/fenic/_inference/google/gemini_batch_embeddings_client.py‎
Lines changed: 12 additions & 0 deletions b/‎src/fenic/_inference/google/gemini_batch_embeddings_client.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/fenic/_inference/google/gemini_native_chat_completions_client.py‎
Lines changed: 16 additions & 0 deletions b/‎src/fenic/_inference/google/gemini_native_chat_completions_client.py‎
Lines changed: 16 additions & 0 deletions
@@ -219,7 +219,7 @@ from pydantic import BaseModel, Field, field_validator
 
 class CacheBackend(str, Enum):
     """Cache backend implementations."""
-    SQLITE = "sqlite"
+    LOCAL = "local"
     MEMORY = "memory"
     DISABLED = "disabled"
 
@@ -228,7 +228,7 @@ class CacheConfig(BaseModel):
 
     Attributes:
         enabled: Whether caching is enabled (default: True)
-        backend: Cache backend to use (default: SQLITE)
+        backend: Cache backend to use (default: LOCAL)
         ttl: Time-to-live duration string (default: "1h")
             Examples: "30m", "2h", "7d"
         max_size_mb: Maximum cache size before LRU eviction (default: 1000)
@@ -261,7 +261,7 @@ class CacheConfig(BaseModel):
     """
 
     enabled: bool = Field(default=True)
-    backend: CacheBackend = Field(default=CacheBackend.SQLITE)
+    backend: CacheBackend = Field(default=CacheBackend.LOCAL)
     ttl: str = Field(default="1h")
     max_size_mb: int = Field(default=1000, gt=0, le=100000)
     namespace: str = Field(default="default")
 
@@ -88,7 +88,7 @@ def _initialize_cache(self, config: ResolvedSessionConfig, base_path: Path):
             logger.info(
                 f"Initializing LLM cache at {cache_db_path} with TTL {cache_config.ttl}"
             )
-            if cache_config.backend == CacheBackend.SQLITE:
+            if cache_config.backend == CacheBackend.LOCAL:
                 return SQLiteLLMCache(
                     db_path=str(cache_db_path),
                     ttl_seconds=cache_config.ttl_seconds,
 
@@ -5,6 +5,9 @@
 if TYPE_CHECKING:
     from fenic._inference.cache.protocol import LLMResponseCache
 
+import json
+from dataclasses import asdict
+
 import anthropic
 from anthropic import (
     AnthropicError,
@@ -120,6 +123,16 @@ def __init__(
             default_profile_name=default_profile_name,
         )
 
+    def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
+        """Get hash of the resolved profile configuration."""
+        try:
+            profile = self._profile_manager.get_profile_by_name(profile_name)
+            profile_data = asdict(profile)
+            serialized = json.dumps(profile_data, sort_keys=True, default=str)
+            return str(hash(serialized))
+        except Exception:
+            return None
+
     async def make_single_request(
         self, request: FenicCompletionsRequest
     ) -> Union[None, FenicCompletionsResponse, TransientException, FatalException]:
 
@@ -1,13 +1,11 @@
 """LLM response caching for Fenic."""
 
-from fenic._inference.cache.key_generator import CacheKeyGenerator
 from fenic._inference.cache.protocol import CachedResponse, CacheStats, LLMResponseCache
 from fenic._inference.cache.sqlite_cache import SQLiteLLMCache
 
 __all__ = [
     "CachedResponse",
     "CacheStats",
     "LLMResponseCache",
-    "CacheKeyGenerator",
     "SQLiteLLMCache",
 ]
@@ -2,9 +2,14 @@
 
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Dict, List, Optional, Protocol
+from typing import Dict, List, Optional, Protocol, Union
 
-from fenic._inference.types import FenicCompletionsResponse, ResponseUsage
+from fenic._inference.types import (
+    FenicCompletionsRequest,
+    FenicCompletionsResponse,
+    FenicEmbeddingsRequest,
+    ResponseUsage,
+)
 
 
 @dataclass
@@ -143,9 +148,26 @@ def set(
         ```
     """
 
-    def get(self, cache_key: str) -> Optional[CachedResponse]:
-        """Retrieve cached response.
+    def compute_key(
+        self,
+        request: Union[FenicCompletionsRequest, FenicEmbeddingsRequest],
+        model: str,
+        profile_hash: Optional[str] = None,
+    ) -> str:
+        """Compute a deterministic cache key for a request.
+
+        Args:
+            request: The request object (e.g. FenicCompletionsRequest or FenicEmbeddingsRequest).
+            model: The model name.
+            profile_hash: Optional hash of the resolved model profile configuration.
 
+        Returns:
+            A unique cache key string.
+        """
+        ...
+
+    def get(self, cache_key: str) -> Optional[CachedResponse]:
+        """Retrieve a cached response.
         Args:
             cache_key: Unique key for the cached response.
 
 
@@ -1,16 +1,21 @@
 """SQLite-backed LLM response cache implementation."""
 
+import hashlib
 import json
 import logging
 import queue
 import sqlite3
 import threading
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 from fenic._inference.cache.protocol import CachedResponse, CacheStats, LLMResponseCache
-from fenic._inference.types import FenicCompletionsResponse
+from fenic._inference.types import (
+    FenicCompletionsRequest,
+    FenicCompletionsResponse,
+    FenicEmbeddingsRequest,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -132,6 +137,50 @@ def __init__(
             f"(ttl={ttl_seconds}s, max_size={max_size_mb}MB, namespace={namespace}, pool_size={max_connections})"
         )
 
+    def compute_key(
+        self,
+        request: Union[FenicCompletionsRequest, FenicEmbeddingsRequest],
+        model: str,
+        profile_hash: Optional[str] = None,
+    ) -> str:
+        """Compute SHA-256 hash of request parameters.
+
+        Args:
+            request: The completion or embedding request to hash. Currently only FenicCompletionsRequest
+                is supported. FenicEmbeddingsRequest support will be added in a future PR.
+            model: The model name.
+            profile_hash: Optional hash of the resolved model profile configuration.
+
+        Returns:
+            64-character hexadecimal SHA-256 hash string.
+        """
+        if isinstance(request, FenicCompletionsRequest):
+            # Build key data with all relevant parameters
+            key_data = {
+                "model": model,
+                "messages": request.messages.encode().hex(),
+                "max_tokens": request.max_completion_tokens,
+                "temperature": request.temperature,
+                "model_profile": request.model_profile,
+                "profile_hash": profile_hash,
+                "top_logprobs": request.top_logprobs,
+            }
+
+            # Include structured output schema if present
+            if request.structured_output:
+                key_data["structured_output"] = request.structured_output.schema_fingerprint
+
+        elif isinstance(request, FenicEmbeddingsRequest):
+            raise NotImplementedError("Embedding requests are not yet supported for caching.")
+        else:
+            raise ValueError(f"Unsupported request type for caching: {type(request)}")
+
+        # Serialize to JSON with deterministic ordering
+        serialized = json.dumps(key_data, sort_keys=True).encode("utf-8")
+
+        # Compute SHA-256 hash
+        return hashlib.sha256(serialized).hexdigest()
+
     def _create_connection(self) -> sqlite3.Connection:
         """Create a new database connection with proper settings.
 
 
@@ -1,5 +1,7 @@
 import hashlib
+import json
 import logging
+from dataclasses import asdict
 from typing import List, Optional, Union
 
 import cohere
@@ -72,6 +74,16 @@ def __init__(
             default_profile_name=default_profile_name,
         )
 
+    def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
+        """Get hash of the resolved profile configuration."""
+        try:
+            profile = self._profile_manager.get_profile_by_name(profile_name)
+            profile_data = asdict(profile)
+            serialized = json.dumps(profile_data, sort_keys=True, default=str)
+            return str(hash(serialized))
+        except Exception:
+            return None
+
     async def make_single_request(
         self, request: FenicEmbeddingsRequest
     ) -> Union[None, List[float], TransientException, FatalException]:
 
@@ -1,4 +1,6 @@
 import hashlib
+import json
+from dataclasses import asdict
 from typing import List, Optional, Union
 
 from google.genai.errors import ClientError, ServerError
@@ -59,6 +61,16 @@ def __init__(
             default_profile_name=default_profile_name,
         )
 
+    def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
+        """Get hash of the resolved profile configuration."""
+        try:
+            profile = self._profile_manager.get_profile_by_name(profile_name)
+            profile_data = asdict(profile)
+            serialized = json.dumps(profile_data, sort_keys=True, default=str)
+            return str(hash(serialized))
+        except Exception:
+            return None
+
     async def make_single_request(
         self, request: FenicEmbeddingsRequest
     ) -> Union[None, List[float], TransientException, FatalException]:
 
@@ -5,6 +5,9 @@
 if TYPE_CHECKING:
     from fenic._inference.cache.protocol import LLMResponseCache
 
+import json
+from dataclasses import asdict
+
 from google.genai.errors import ClientError, ServerError
 from google.genai.types import (
     FinishReason,
@@ -114,6 +117,19 @@ def __init__(
             default_profile_name=default_profile_name,
         )
 
+    def get_profile_hash(self, profile_name: Optional[str]) -> Optional[str]:
+        """Get hash of the resolved profile configuration."""
+        try:
+            profile = self._profile_manager.get_profile_by_name(profile_name)
+            # Serialize profile to JSON string and hash it
+            # Using default=str to handle any non-serializable types if present
+            profile_data = asdict(profile)
+            serialized = json.dumps(profile_data, sort_keys=True, default=str)
+            return str(hash(serialized))
+        except Exception as e:
+            logger.warning(f"Failed to hash profile {profile_name}: {e}")
+            return None
+
     def reset_metrics(self):
         """Reset metrics to initial state."""
         self._metrics = LMMetrics()
Original file line number	Diff line number	Diff line change
`@@ -88,7 +88,7 @@ def _initialize_cache(self, config: ResolvedSessionConfig, base_path: Path):`
`88`	`88`	`logger.info(`
`89`	`89`	`f"Initializing LLM cache at {cache_db_path} with TTL {cache_config.ttl}"`
`90`	`90`	`)`
`91`		`- if cache_config.backend == CacheBackend.SQLITE:`
	`91`	`+ if cache_config.backend == CacheBackend.LOCAL:`
`92`	`92`	`return SQLiteLLMCache(`
`93`	`93`	`db_path=str(cache_db_path),`
`94`	`94`	`ttl_seconds=cache_config.ttl_seconds,`