Skip to content

Commit 57b37a7

Browse files
committed
fix: resolve IndexSchema fields type annotation inconsistency (#361)
Fix conflicting type expectations in IndexSchema constructor and validation. The type annotation declared `fields: Dict[str, BaseField]` but the validator raised an error when fields was actually provided as a dict. Changes: - Update model validator to accept both list and dict formats for fields - Support dict with field definitions: {"field_name": {"name": "field_name", "type": "text"}} - Support dict with BaseField instances: {"field_name": BaseFieldInstance} - Maintain backward compatibility with existing list format
1 parent b32ed95 commit 57b37a7

File tree

2 files changed

+220
-12
lines changed

2 files changed

+220
-12
lines changed

redisvl/schema/schema.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import re
22
from enum import Enum
33
from pathlib import Path
4-
from typing import Any, Dict, List, Literal
4+
from typing import Any, Dict, List, Literal, Union
55

66
import yaml
77
from pydantic import BaseModel, model_validator
@@ -144,7 +144,10 @@ class IndexSchema(BaseModel):
144144
index: IndexInfo
145145
"""Details of the basic index configurations."""
146146
fields: Dict[str, BaseField] = {}
147-
"""Fields associated with the search index and their properties"""
147+
"""Fields associated with the search index and their properties.
148+
149+
Note: When creating from dict/YAML, provide fields as a list of field definitions.
150+
The validator will convert them to a Dict[str, BaseField] internally."""
148151
version: Literal["0.1.0"] = "0.1.0"
149152
"""Version of the underlying index schema."""
150153

@@ -181,17 +184,36 @@ def validate_and_create_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
181184

182185
input_fields = values.get("fields", [])
183186
prepared_fields: Dict[str, BaseField] = {}
184-
# Handle old fields format temporarily
187+
188+
# Handle both list and dict formats for fields
185189
if isinstance(input_fields, dict):
186-
raise ValueError("New schema format introduced; please update schema spec.")
187-
# Process and create fields
188-
for field_input in input_fields:
189-
field = cls._make_field(index.storage_type, **field_input)
190-
if field.name in prepared_fields:
191-
raise ValueError(
192-
f"Duplicate field name: {field.name}. Field names must be unique across all fields."
193-
)
194-
prepared_fields[field.name] = field
190+
# If fields is already a dict of BaseField instances, use it directly
191+
for name, field in input_fields.items():
192+
if isinstance(field, BaseField):
193+
prepared_fields[name] = field
194+
elif isinstance(field, dict):
195+
# If it's a dict of field definitions, create fields
196+
field_obj = cls._make_field(index.storage_type, **field)
197+
if field_obj.name != name:
198+
raise ValueError(
199+
f"Field name mismatch: key '{name}' vs field name '{field_obj.name}'"
200+
)
201+
prepared_fields[name] = field_obj
202+
else:
203+
raise ValueError(
204+
f"Invalid field type for '{name}': expected BaseField or dict"
205+
)
206+
elif isinstance(input_fields, list):
207+
# Process list of field definitions (standard format)
208+
for field_input in input_fields:
209+
field = cls._make_field(index.storage_type, **field_input)
210+
if field.name in prepared_fields:
211+
raise ValueError(
212+
f"Duplicate field name: {field.name}. Field names must be unique across all fields."
213+
)
214+
prepared_fields[field.name] = field
215+
else:
216+
raise ValueError(f"Fields must be a list or dict, got {type(input_fields)}")
195217

196218
values["fields"] = prepared_fields
197219
values["index"] = index
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
"""
2+
Test for IndexSchema type annotation issue.
3+
TDD test file for issue #361.
4+
5+
This test verifies that the IndexSchema fields type annotation
6+
matches what the validator actually expects.
7+
"""
8+
9+
from typing import Dict, List
10+
11+
import pytest
12+
13+
from redisvl.schema import IndexSchema
14+
from redisvl.schema.fields import BaseField, TagField, TextField
15+
16+
17+
class TestIndexSchemaTypeIssue:
18+
"""Test IndexSchema type annotation and validation consistency."""
19+
20+
def test_fields_as_list_works(self):
21+
"""Test that fields as a list (what validator expects) works."""
22+
schema_dict = {
23+
"index": {
24+
"name": "test_index",
25+
"prefix": "test",
26+
"storage_type": "hash",
27+
},
28+
"fields": [
29+
{"name": "text_field", "type": "text"},
30+
{"name": "tag_field", "type": "tag"},
31+
],
32+
}
33+
34+
# This should work since validator expects list
35+
schema = IndexSchema.from_dict(schema_dict)
36+
assert schema is not None
37+
assert isinstance(schema.fields, dict) # After processing, it becomes a dict
38+
assert "text_field" in schema.fields
39+
assert "tag_field" in schema.fields
40+
41+
def test_fields_as_dict_should_work_per_type_annotation(self):
42+
"""Test that fields as dict (per type annotation) now works after fix."""
43+
schema_dict = {
44+
"index": {
45+
"name": "test_index",
46+
"prefix": "test",
47+
"storage_type": "hash",
48+
},
49+
"fields": {
50+
"text_field": {"name": "text_field", "type": "text"},
51+
"tag_field": {"name": "tag_field", "type": "tag"},
52+
},
53+
}
54+
55+
# According to type annotation `fields: Dict[str, BaseField] = {}`
56+
# this should work, and now it does after fixing the validator
57+
schema = IndexSchema.from_dict(schema_dict)
58+
assert schema is not None
59+
assert isinstance(schema.fields, dict)
60+
assert "text_field" in schema.fields
61+
assert "tag_field" in schema.fields
62+
63+
def test_type_annotation_matches_runtime_behavior(self):
64+
"""Test that the type annotation should match what the code actually accepts."""
65+
# Get the type annotation
66+
from typing import get_type_hints
67+
68+
hints = get_type_hints(IndexSchema)
69+
fields_type = hints.get("fields")
70+
71+
# The annotation says Dict[str, BaseField]
72+
assert fields_type == Dict[str, BaseField]
73+
74+
# But the validator expects List and converts to Dict
75+
# This is the inconsistency reported in issue #361
76+
77+
def test_fields_empty_list_works(self):
78+
"""Test that empty fields list works."""
79+
schema_dict = {
80+
"index": {
81+
"name": "test_index",
82+
"prefix": "test",
83+
"storage_type": "hash",
84+
},
85+
"fields": [], # Empty list should work
86+
}
87+
88+
schema = IndexSchema.from_dict(schema_dict)
89+
assert schema is not None
90+
assert schema.fields == {}
91+
92+
def test_fields_default_value(self):
93+
"""Test default value for fields."""
94+
schema_dict = {
95+
"index": {
96+
"name": "test_index",
97+
"prefix": "test",
98+
"storage_type": "hash",
99+
},
100+
# No fields provided
101+
}
102+
103+
schema = IndexSchema.from_dict(schema_dict)
104+
assert schema is not None
105+
assert schema.fields == {} # Default should be empty dict
106+
107+
def test_direct_instantiation_with_dict_fields(self):
108+
"""Test direct instantiation with dict fields (should work after fix)."""
109+
# After processing, fields are stored as Dict[str, BaseField]
110+
# So direct instantiation with proper dict should work
111+
112+
# Create fields
113+
text_field = TextField(name="text_field")
114+
tag_field = TagField(name="tag_field")
115+
116+
# Try to create schema with fields as dict
117+
# This is what the type annotation suggests should work
118+
schema = IndexSchema(
119+
index={
120+
"name": "test_index",
121+
"prefix": "test",
122+
"storage_type": "hash",
123+
},
124+
fields={"text_field": text_field, "tag_field": tag_field},
125+
)
126+
assert schema is not None
127+
assert isinstance(schema.fields, dict)
128+
assert "text_field" in schema.fields
129+
assert "tag_field" in schema.fields
130+
131+
def test_yaml_format_fields_as_list(self):
132+
"""Test that YAML format uses fields as list."""
133+
yaml_content = """
134+
index:
135+
name: test_index
136+
prefix: test
137+
storage_type: hash
138+
fields:
139+
- name: text_field
140+
type: text
141+
- name: tag_field
142+
type: tag
143+
"""
144+
145+
# YAML format uses list, which is what validator expects
146+
# This test documents the expected YAML format
147+
import yaml
148+
149+
schema_dict = yaml.safe_load(yaml_content)
150+
assert isinstance(schema_dict["fields"], list)
151+
152+
schema = IndexSchema.from_dict(schema_dict)
153+
assert schema is not None
154+
assert isinstance(schema.fields, dict)
155+
156+
def test_dict_fields_with_name_mismatch_fails(self):
157+
"""Test that dict fields with mismatched names fail properly."""
158+
schema_dict = {
159+
"index": {
160+
"name": "test_index",
161+
"prefix": "test",
162+
"storage_type": "hash",
163+
},
164+
"fields": {
165+
"wrong_key": {"name": "correct_name", "type": "text"}, # Key != name
166+
},
167+
}
168+
169+
with pytest.raises(ValueError, match="Field name mismatch"):
170+
IndexSchema.from_dict(schema_dict)
171+
172+
def test_dict_fields_with_invalid_field_type_fails(self):
173+
"""Test that dict fields with invalid field types fail properly."""
174+
schema_dict = {
175+
"index": {
176+
"name": "test_index",
177+
"prefix": "test",
178+
"storage_type": "hash",
179+
},
180+
"fields": {
181+
"text_field": "invalid_field_type", # Should be dict or BaseField
182+
},
183+
}
184+
185+
with pytest.raises(ValueError, match="Invalid field type"):
186+
IndexSchema.from_dict(schema_dict)

0 commit comments

Comments
 (0)