Skip to content

Commit 94eea52

Browse files
tylerhutchersonjustin-cechmanek
authored andcommitted
Support client-side schema validation using Pydantic (#304)
This PR implements a layered architecture for managing and validating searchable data in Redis, with clear separation of concerns between schema definition, data validation, and storage operations. - `IndexSchema` provides the blueprint for data structure and constraints - Defines fields with specific types (TEXT, TAG, NUMERIC, GEO, VECTOR) - Supports different storage types (HASH, JSON) with appropriate configuration - `SchemaModelGenerator` dynamically creates Pydantic models from schema definitions - Implements a caching mechanism to avoid redundant model generation - Maps Redis field types to appropriate Python/Pydantic types - Provides type-specific validators: - VECTOR: validates dimensions and value ranges (e.g., INT8 range checks) - GEO: validates geographic coordinate format - NUMERIC: prevents boolean values - `BaseStorage` is the abstract class provides the foundation for Redis operations - Specialized implementations (HashStorage, JsonStorage) for different Redis data types - Enforces schema validation during write operations when set to True - Implements optimized batch operations using Redis pipelines - Supports both synchronous and asynchronous interfaces - Handles key generation, preprocessing, and error handling The `SearchIndex` contains the setting `validate_on_load`, which defaults on `False`. Objects are preprocessed and validated against the schema Objects are prepared with appropriate keys Batch writing occurs using Redis pipelines for efficiency TTL (expiration) can be applied if specified Keys are fetched in batches using pipelines Data is converted from Redis format to Python objects Bytes are automatically converted to appropriate types
1 parent 298d055 commit 94eea52

25 files changed

+1936
-581
lines changed

.github/workflows/test.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ jobs:
133133
with:
134134
credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
135135

136+
- name: Set HuggingFace token
137+
run: |
138+
mkdir -p ~/.huggingface
139+
echo '{"token":"${{ secrets.HF_TOKEN }}"}' > ~/.huggingface/token
140+
136141
- name: Run tests
137142
if: matrix.connection == 'plain' && matrix.redis-version == 'latest'
138143
env:
@@ -149,6 +154,7 @@ jobs:
149154
OPENAI_API_VERSION: ${{ secrets.OPENAI_API_VERSION }}
150155
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
151156
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
157+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
152158
run: |
153159
make test-all
154160
@@ -173,6 +179,7 @@ jobs:
173179
OPENAI_API_VERSION: ${{ secrets.OPENAI_API_VERSION }}
174180
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
175181
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
182+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
176183
run: |
177184
docker run -d --name redis -p 6379:6379 redis/redis-stack-server:latest
178185
if [[ "${{ matrix.python-version }}" > "3.9" ]]; then

docs/user_guide/01_getting_started.ipynb

Lines changed: 138 additions & 100 deletions
Large diffs are not rendered by default.

poetry.lock

Lines changed: 288 additions & 157 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ tabulate = "^0.9.0"
3434
ml-dtypes = "^0.4.0"
3535
python-ulid = "^3.0.0"
3636
nltk = { version = "^3.8.1", optional = true }
37+
jsonpath-ng = "^1.5.0"
3738
openai = { version = "^1.13.0", optional = true }
3839
sentence-transformers = { version = "^3.4.0", optional = true }
3940
scipy = [

redisvl/exceptions.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,32 @@
1-
class RedisVLException(Exception):
2-
"""Base RedisVL exception"""
1+
"""
2+
RedisVL Exception Classes
33
4+
This module defines all custom exceptions used throughout the RedisVL library.
5+
"""
46

5-
class RedisModuleVersionError(RedisVLException):
6-
"""Invalid module versions installed"""
77

8+
class RedisVLError(Exception):
9+
"""Base exception for all RedisVL errors."""
810

9-
class RedisSearchError(RedisVLException):
10-
"""Error while performing a search or aggregate request"""
11+
pass
12+
13+
14+
class RedisModuleVersionError(RedisVLError):
15+
"""Error raised when required Redis modules are missing or have incompatible versions."""
16+
17+
pass
18+
19+
20+
class RedisSearchError(RedisVLError):
21+
"""Error raised for Redis Search specific operations."""
22+
23+
pass
24+
25+
26+
class SchemaValidationError(RedisVLError):
27+
"""Error when validating data against a schema."""
28+
29+
def __init__(self, message, index=None):
30+
if index is not None:
31+
message = f"Validation failed for object at index {index}: {message}"
32+
super().__init__(message)

redisvl/extensions/llmcache/semantic.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,8 @@ def __init__(
9595
}
9696

9797
# Use the index name as the key prefix by default
98-
if "prefix" in kwargs:
99-
prefix = kwargs["prefix"]
100-
else:
101-
prefix = name
102-
103-
dtype = kwargs.get("dtype")
98+
prefix = kwargs.pop("prefix", name)
99+
dtype = kwargs.pop("dtype", None)
104100

105101
# Validate a provided vectorizer or set the default
106102
if vectorizer:
@@ -111,7 +107,10 @@ def __init__(
111107
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
112108
)
113109
else:
114-
vectorizer_kwargs = {"dtype": dtype} if dtype else {}
110+
vectorizer_kwargs = kwargs
111+
112+
if dtype:
113+
vectorizer_kwargs.update(**{"dtype": dtype})
115114

116115
vectorizer = HFTextVectorizer(
117116
model="sentence-transformers/all-mpnet-base-v2",

redisvl/extensions/router/semantic.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def __init__(
7272
connection_kwargs (Dict[str, Any]): The connection arguments
7373
for the redis client. Defaults to empty {}.
7474
"""
75-
dtype = kwargs.get("dtype")
75+
dtype = kwargs.pop("dtype", None)
7676

7777
# Validate a provided vectorizer or set the default
7878
if vectorizer:
@@ -83,8 +83,15 @@ def __init__(
8383
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
8484
)
8585
else:
86-
vectorizer_kwargs = {"dtype": dtype} if dtype else {}
87-
vectorizer = HFTextVectorizer(**vectorizer_kwargs)
86+
vectorizer_kwargs = kwargs
87+
88+
if dtype:
89+
vectorizer_kwargs.update(**{"dtype": dtype})
90+
91+
vectorizer = HFTextVectorizer(
92+
model="sentence-transformers/all-mpnet-base-v2",
93+
**vectorizer_kwargs,
94+
)
8895

8996
if routing_config is None:
9097
routing_config = RoutingConfig()

redisvl/extensions/session_manager/semantic_session.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def __init__(
7171
super().__init__(name, session_tag)
7272

7373
prefix = prefix or name
74-
dtype = kwargs.get("dtype")
74+
dtype = kwargs.pop("dtype", None)
7575

7676
# Validate a provided vectorizer or set the default
7777
if vectorizer:
@@ -82,10 +82,13 @@ def __init__(
8282
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
8383
)
8484
else:
85-
vectorizer_kwargs = {"dtype": dtype} if dtype else {}
85+
vectorizer_kwargs = kwargs
86+
87+
if dtype:
88+
vectorizer_kwargs.update(**{"dtype": dtype})
8689

8790
vectorizer = HFTextVectorizer(
88-
model="sentence-transformers/msmarco-distilbert-cos-v5",
91+
model="sentence-transformers/all-mpnet-base-v2",
8992
**vectorizer_kwargs,
9093
)
9194

0 commit comments

Comments
 (0)