@@ -268,6 +268,92 @@ def test_attribute_value_with_comma_and_slash_is_encoded_for_llm_string(
268268 hit .get ("metadata" , {}).get ("llm_string" ) == raw_llm_string for hit in hits
269269 )
270270
271+ def test_attribute_value_with_all_tokenizer_separators_round_trip_and_filter (
272+ self , langcache_with_attrs : LangCacheSemanticCache
273+ ) -> None :
274+ """All tokenizer separator characters should round-trip via filters.
275+
276+ This exercises the set of punctuation described in the underlying
277+ RediSearch text-field tokenization docs to ensure that our
278+ client-side encoding/decoding and LangCache's attribute handling
279+ together can store and filter on values containing these characters.
280+ """
281+
282+ separators = ",.<>{}[]\" ':;!@#$%^&*()-+=~"
283+ raw_llm_string = f"tenant { separators } value"
284+
285+ prompt = "Attribute encoding for all tokenizer separators"
286+ response = "Response for all tokenizer separators."
287+
288+ entry_id = langcache_with_attrs .store (
289+ prompt = prompt ,
290+ response = response ,
291+ metadata = {"llm_string" : raw_llm_string },
292+ )
293+ assert entry_id
294+
295+ hits = langcache_with_attrs .check (
296+ prompt = prompt ,
297+ attributes = {"llm_string" : raw_llm_string },
298+ num_results = 5 ,
299+ )
300+
301+ assert hits , "No hits returned for llm_string value with separators"
302+ assert any (
303+ hit .get ("prompt" ) == prompt
304+ and hit .get ("response" ) == response
305+ and hit .get ("metadata" , {}).get ("llm_string" ) == raw_llm_string
306+ for hit in hits
307+ )
308+
309+ @pytest .mark .parametrize (
310+ "raw_value" ,
311+ [
312+ r"tenant\\with\\backslash" ,
313+ "tenant?with?question" ,
314+ ],
315+ )
316+ def test_attribute_values_with_special_chars_round_trip_and_filter (
317+ self ,
318+ langcache_with_attrs : LangCacheSemanticCache ,
319+ raw_value : str ,
320+ ) -> None :
321+ """Backslash and question-mark values should round-trip via filters.
322+
323+ These values previously failed attribute filtering on this LangCache
324+ instance; with client-side encoding/decoding they should now be
325+ filterable and round-trip correctly.
326+ """
327+
328+ prompt = f"Special chars attribute { raw_value } "
329+ response = f"Response for { raw_value } "
330+
331+ entry_id = langcache_with_attrs .store (
332+ prompt = prompt ,
333+ response = response ,
334+ metadata = {"llm_string" : raw_value },
335+ )
336+ assert entry_id
337+
338+ hits = langcache_with_attrs .check (
339+ prompt = prompt ,
340+ attributes = {"llm_string" : raw_value },
341+ num_results = 5 ,
342+ )
343+
344+ # Look for a matching hit for this prompt/response/metadata triple.
345+ match_found = any (
346+ hit .get ("prompt" ) == prompt
347+ and hit .get ("response" ) == response
348+ and hit .get ("metadata" , {}).get ("llm_string" ) == raw_value
349+ for hit in hits
350+ )
351+
352+ assert match_found , (
353+ "Expected llm_string value to be filterable, but no matching "
354+ f"hit was found: { raw_value !r} "
355+ )
356+
271357
272358@pytest .mark .requires_api_keys
273359class TestLangCacheSemanticCacheIntegrationWithoutAttributes :
0 commit comments