Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 28 additions & 58 deletions redisvl/extensions/cache/llm/langcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

from typing import Any, Dict, List, Literal, Optional
from urllib.parse import quote, unquote

from redisvl.extensions.cache.llm.base import BaseLLMCache
from redisvl.extensions.cache.llm.schema import CacheHit
Expand All @@ -15,37 +16,6 @@
logger = get_logger(__name__)


_LANGCACHE_ATTR_ENCODE_TRANS = str.maketrans(
{
",": ",", # U+FF0C FULLWIDTH COMMA
"/": "∕", # U+2215 DIVISION SLASH
"\\": "\", # U+FF3C FULLWIDTH REVERSE SOLIDUS (backslash)
"?": "?", # U+FF1F FULLWIDTH QUESTION MARK
}
)


_LANGCACHE_ATTR_DECODE_TRANS = str.maketrans(
{v: k for k, v in _LANGCACHE_ATTR_ENCODE_TRANS.items()}
)


def _encode_attribute_value_for_langcache(value: str) -> str:
"""Encode a string attribute value for use with the LangCache service.

LangCache applies validation and matching rules to attribute values. In
particular, the managed service can reject values containing commas (",")
and may not reliably match filters on values containing slashes ("/").

To keep attribute values round-trippable *and* usable for attribute
filtering, we replace these characters with visually similar Unicode
variants that the service accepts. A precomputed ``str.translate`` table is
used so values are scanned only once.
"""

return value.translate(_LANGCACHE_ATTR_ENCODE_TRANS)


def _encode_attributes_for_langcache(attributes: Dict[str, Any]) -> Dict[str, Any]:
"""Return a copy of *attributes* with string values safely encoded.

Expand All @@ -61,25 +31,17 @@ def _encode_attributes_for_langcache(attributes: Dict[str, Any]) -> Dict[str, An
safe_attributes: Dict[str, Any] = dict(attributes)
for key, value in attributes.items():
if isinstance(value, str):
encoded = _encode_attribute_value_for_langcache(value)
# Percent-encode all characters (no ``safe`` set) so punctuation and
# other special characters cannot interfere with LangCache's
# underlying query/tokenization rules.
encoded = quote(value, safe="")
if encoded != value:
safe_attributes[key] = encoded
changed = True

return safe_attributes if changed else attributes


def _decode_attribute_value_from_langcache(value: str) -> str:
"""Decode a string attribute value returned from the LangCache service.

This reverses :func:`_encode_attribute_value_for_langcache`, translating the
fullwidth comma and division slash characters back to their ASCII
counterparts so callers see the original values they stored.
"""

return value.translate(_LANGCACHE_ATTR_DECODE_TRANS)


def _decode_attributes_from_langcache(attributes: Dict[str, Any]) -> Dict[str, Any]:
"""Return a copy of *attributes* with string values safely decoded.

Expand All @@ -95,7 +57,7 @@ def _decode_attributes_from_langcache(attributes: Dict[str, Any]) -> Dict[str, A
decoded_attributes: Dict[str, Any] = dict(attributes)
for key, value in attributes.items():
if isinstance(value, str):
decoded = _decode_attribute_value_from_langcache(value)
decoded = unquote(value)
if decoded != value:
decoded_attributes[key] = decoded
changed = True
Expand Down Expand Up @@ -472,7 +434,7 @@ def store(
vector (Optional[List[float]]): Not supported by LangCache API.
metadata (Optional[Dict[str, Any]]): Optional metadata (stored as attributes).
filters (Optional[Dict[str, Any]]): Not supported.
ttl (Optional[int]): Optional TTL override (not supported by LangCache).
ttl (Optional[int]): Optional TTL override in seconds.

Returns:
str: The entry ID for the cached entry.
Expand All @@ -491,18 +453,22 @@ def store(
if filters is not None:
logger.warning("LangCache does not support filters")

if ttl is not None:
logger.warning("LangCache does not support per-entry TTL")

# Store using the LangCache client; only send attributes if provided (non-empty)
try:
ttl_millis = round(ttl * 1000) if ttl is not None else None
if metadata:
safe_metadata = _encode_attributes_for_langcache(metadata)
result = self._client.set(
prompt=prompt, response=response, attributes=safe_metadata
prompt=prompt,
response=response,
attributes=safe_metadata,
ttl_millis=ttl_millis,
)
else:
result = self._client.set(prompt=prompt, response=response)
result = self._client.set(
prompt=prompt,
response=response,
ttl_millis=ttl_millis,
)
except Exception as e: # narrow for known SDK error when possible
try:
from langcache.errors import BadRequestErrorResponseContent
Expand Down Expand Up @@ -541,7 +507,7 @@ async def astore(
vector (Optional[List[float]]): Not supported by LangCache API.
metadata (Optional[Dict[str, Any]]): Optional metadata (stored as attributes).
filters (Optional[Dict[str, Any]]): Not supported.
ttl (Optional[int]): Optional TTL override (not supported by LangCache).
ttl (Optional[int]): Optional TTL override in seconds.

Returns:
str: The entry ID for the cached entry.
Expand All @@ -560,18 +526,22 @@ async def astore(
if filters is not None:
logger.warning("LangCache does not support filters")

if ttl is not None:
logger.warning("LangCache does not support per-entry TTL")

# Store using the LangCache client (async); only send attributes if provided (non-empty)
try:
ttl_millis = round(ttl * 1000) if ttl is not None else None
if metadata:
safe_metadata = _encode_attributes_for_langcache(metadata)
result = await self._client.set_async(
prompt=prompt, response=response, attributes=safe_metadata
prompt=prompt,
response=response,
attributes=safe_metadata,
ttl_millis=ttl_millis,
)
else:
result = await self._client.set_async(prompt=prompt, response=response)
result = await self._client.set_async(
prompt=prompt,
response=response,
ttl_millis=ttl_millis,
)
except Exception as e:
try:
from langcache.errors import BadRequestErrorResponseContent
Expand Down
58 changes: 57 additions & 1 deletion tests/integration/test_langcache_semantic_cache_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,33 @@ def test_store_and_check_sync(
assert hits[0]["response"] == response
assert hits[0]["prompt"] == prompt

def test_store_with_per_entry_ttl_expires(
self, langcache_with_attrs: LangCacheSemanticCache
) -> None:
"""Per-entry TTL should cause individual entries to expire."""

prompt = "Per-entry TTL test"
response = "This entry should expire quickly."

entry_id = langcache_with_attrs.store(
prompt=prompt,
response=response,
ttl=2,
)
assert entry_id

# Immediately after storing, the entry should be retrievable.
hits = langcache_with_attrs.check(prompt=prompt, num_results=5)
assert any(hit["response"] == response for hit in hits)

# Wait for TTL to elapse and confirm the entry is no longer returned.
import time

time.sleep(3)

hits_after_ttl = langcache_with_attrs.check(prompt=prompt, num_results=5)
assert not any(hit["response"] == response for hit in hits_after_ttl)

@pytest.mark.asyncio
async def test_store_and_check_async(
self, langcache_with_attrs: LangCacheSemanticCache
Expand All @@ -106,6 +133,35 @@ async def test_store_and_check_async(
assert hits[0]["response"] == response
assert hits[0]["prompt"] == prompt

@pytest.mark.asyncio
async def test_astore_with_per_entry_ttl_expires(
self, langcache_with_attrs: LangCacheSemanticCache
) -> None:
"""Async per-entry TTL should cause individual entries to expire."""

prompt = "Async per-entry TTL test"
response = "This async entry should expire quickly."

entry_id = await langcache_with_attrs.astore(
prompt=prompt,
response=response,
ttl=2,
)
assert entry_id

hits = await langcache_with_attrs.acheck(prompt=prompt, num_results=5)
assert any(hit["response"] == response for hit in hits)

import asyncio

await asyncio.sleep(3)

hits_after_ttl = await langcache_with_attrs.acheck(
prompt=prompt,
num_results=5,
)
assert not any(hit["response"] == response for hit in hits_after_ttl)

def test_store_with_metadata_and_check_with_attributes(
self, langcache_with_attrs: LangCacheSemanticCache
) -> None:
Expand Down Expand Up @@ -321,7 +377,7 @@ def test_attribute_values_with_special_chars_round_trip_and_filter(
"""Backslash and question-mark values should round-trip via filters.

These values previously failed attribute filtering on this LangCache
instance; with client-side encoding/decoding they should now be
instance; with URL-style percent encoding they should now be
filterable and round-trip correctly.
"""

Expand Down
Loading