Skip to content

Commit 4e37406

Browse files
committed
tokenizer helper function
1 parent 3dc2b62 commit 4e37406

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

redisvl/query/query.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,6 @@ def __init__(
456456
nltk.download('stopwords')
457457
self._stopwords = set(stopwords.words('english'))
458458

459-
460459
super().__init__(query_string)
461460

462461
# Handle query modifiers
@@ -479,6 +478,7 @@ def __init__(
479478

480479
def _tokenize_query(self, user_query: str) -> str:
481480
"""Convert a raw user query to a redis full text query joined by ORs"""
481+
482482
words = word_tokenize(user_query)
483483

484484
tokens = [token.strip().strip(",").lower() for token in user_query.split()]
@@ -499,6 +499,12 @@ def _build_query_string(self) -> str:
499499
#TODO is this method even needed? use
500500
return text_and_filter
501501

502+
# from redisvl.utils.token_escaper import TokenEscaper
503+
# escaper = TokenEscaper()
504+
# def tokenize_and_escape_query(user_query: str) -> str:
505+
# """Convert a raw user query to a redis full text query joined by ORs"""
506+
# tokens = [escaper.escape(token.strip().strip(",").replace("“", "").replace("”", "").lower()) for token in user_query.split()]
507+
# return " | ".join([token for token in tokens if token and token not in stopwords_en])
502508

503509
class HybridQuery(VectorQuery, TextQuery):
504510
def __init__():

0 commit comments

Comments
 (0)