Skip to content

Commit 504b3c2

Browse files
committed
Review comments addressed
1 parent cca18a3 commit 504b3c2

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

examples/vector_search.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
Coherence Vectors
3434
=================
3535
36-
Coherence Python client can handle few different types of vector,
37-
this example will use the FloatVector type
36+
Coherence Python client can handle a few different types of vector,
37+
this example will use the FloatVector type.
3838
3939
Just like any other data type in Coherence, vectors are stored in normal
4040
Coherence caches. The vector may be stored as the actual cache value,
@@ -57,7 +57,7 @@
5757
the cache. The dataset is a json file and the example will use Coherence json
5858
support to read and store the data.
5959
60-
The schema of the JSON movie data looks like this
60+
The schema of the JSON movie data looks like this:
6161
6262
+--------------------+-------------------------------------------------------+
6363
| Field Name | Description |
@@ -100,7 +100,6 @@
100100
101101
The SimilaritySearch aggregator is used to perform a Knn vector search on a
102102
cache in the same way that normal Coherence aggregators are used.
103-
104103
"""
105104

106105

@@ -115,7 +114,7 @@ class MovieRepository:
115114
"""
116115

117116
EMBEDDING_DIMENSIONS: Final[int] = 384
118-
"""Embedding dimension for all-MiniLM-L6-v2"""
117+
"""Embedding dimension for all-MiniLM-L6-v2."""
119118

120119
VECTOR_FIELD: Final[str] = "embeddings"
121120
"""The name of the field in the json containing the embeddings."""
@@ -125,7 +124,7 @@ class MovieRepository:
125124

126125
def __init__(self, movies: NamedMap) -> None:
127126
"""
128-
Creates an instance of the MovieRepository
127+
Creates an instance of the MovieRepository.
129128
130129
:param movies: The Coherence NamedMap is the cache used to store the
131130
movie data.
@@ -137,10 +136,10 @@ def __init__(self, movies: NamedMap) -> None:
137136

138137
async def load(self, filename: str) -> None:
139138
"""
140-
Loads the movie data into the NamedMao using the specified zip file
139+
Loads the movie data into the NamedMao using the specified zip file.
141140
142-
:param filename: Name of the movies json zip file
143-
:return: None
141+
:param filename: Name of the movies json zip file.
142+
:return: None.
144143
"""
145144
try:
146145
with gzip.open(filename, "rt", encoding="utf-8") as f:
@@ -155,34 +154,35 @@ async def load(self, filename: str) -> None:
155154
try:
156155
f.close()
157156
except NameError:
158-
pass # File was never opened, so nothing to close
157+
pass # File was never opened, so nothing to close.
159158
except Exception as e:
160159
print(f"An error occurred while closing the file: {e}")
161160

162161
# iterate over list of movie objects (dictionary) to load them into
163-
# Coherence cache
162+
# Coherence cache.
164163
for movie in data:
165164
# get the title of the movie
166165
title: str = movie.get("title")
167166
# get the full plot of the movie
168167
full_plot: str = movie.get("fullplot")
169168
key: str = title
170-
# text of the full_plot converted to a vector
169+
# text of the full_plot converted to a vector.
171170
vector: FloatVector = self.vectorize(full_plot)
172-
# vector is added to the movie object
171+
# vector is added to the movie object.
173172
movie[self.VECTOR_FIELD] = vector
174-
# The movie object is added to the cache using the "title" field as the cache key
173+
# The movie object is added to the cache using the "title" field
174+
# as the cache key.
175175
await self.movies.put(key, movie)
176176

177177
def vectorize(self, input_string: str) -> FloatVector:
178-
"""vectorize method takes a String value and returns a FloatVector"""
178+
"""vectorize method takes a String value and returns a FloatVector."""
179179

180180
# model used to creat embeddings for the input_string
181-
# in this example model used is onnx-models/all-MiniLM-L6-v2-onnx
181+
# in this example model used is onnx-models/all-MiniLM-L6-v2-onnx.
182182
embeddings: List[float] = self.model.encode(input_string).tolist()
183183

184184
# The vector returned is normalized, which makes future operations on
185-
# the vector more efficient
185+
# the vector more efficient.
186186
return FloatVector(Vectors.normalize(embeddings))
187187

188188
async def search(self, search_text: str, count: int, filter: Filter = Filters.always()) -> List[QueryResult]:
@@ -194,23 +194,23 @@ async def search(self, search_text: str, count: int, filter: Filter = Filters.al
194194
parameter can be The filter is used to reduce the cache entries used
195195
to perform the k-nn search.
196196
197-
:param search_text: the text to nearest match on the movie full plot
198-
:param count: the count of the nearest matches to return :param
199-
filter: an optional Filter to use to further reduce the movies to be
200-
queried
201-
:return: a List of QueryResult objects
197+
:param search_text: the text to nearest match on the movie full plot.
198+
:param count: the count of the nearest matches to return.
199+
:param filter: an optional Filter to use to further reduce the movies
200+
to be queried.
201+
:return: a List of QueryResult objects.
202202
"""
203203

204204
# create a FloatVector of the search_text
205205
vector: FloatVector = self.vectorize(search_text)
206-
# create the SimilaritySearch aggregator using the above vector and count
206+
# create the SimilaritySearch aggregator using the above vector and count.
207207
search: SimilaritySearch = SimilaritySearch(self.VALUE_EXTRACTOR, vector, count)
208208
# perform the k-nn search using the above aggregator and optional filter and
209-
# returns a list of QueryResults
209+
# returns a list of QueryResults.
210210
return await self.movies.aggregate(search, filter=filter)
211211

212212

213-
# Name of the compressed gzip json file that has data for the movies
213+
# Name of the compressed gzip json file that has data for the movies.
214214
MOVIE_JSON_FILENAME: Final[str] = "movies.json.gzip"
215215

216216

@@ -219,7 +219,7 @@ async def do_run() -> None:
219219
# Create a new session to the Coherence server using the default host and
220220
# port i.e. localhost:1408
221221
session: Session = await Session.create()
222-
# Create a NamedMao called movies with key of str and value of dict
222+
# Create a NamedMap called movies with key of str and value of dict
223223
movie_db: NamedMap[str, dict] = await session.get_map("movies")
224224
try:
225225
# an instance of class MovieRepository is create passing the above
@@ -236,7 +236,7 @@ async def do_run() -> None:
236236
# the nearest matches. The second parameter is a count of the number
237237
# of nearest neighbours to search for.
238238
#
239-
# Below a search for five movies roughly based on "star travel and space ships"
239+
# Below, a search for five movies roughly based on "star travel and space ships"
240240
# is being done
241241
results = await movies_repo.search("star travel and space ships", 5)
242242
print("Search results:")
@@ -248,8 +248,8 @@ async def do_run() -> None:
248248
# to reduce the cache entries used to perform the nearest neighbours
249249
# (k-nn) search.
250250
#
251-
# Below any movie with a plot similar to "star travel and space
252-
# ships" was searched for. In addition a Filter is used to narrow down
251+
# Below, any movie with a plot similar to "star travel and space
252+
# ships" was searched for. In addition, a Filter is used to narrow down
253253
# the search i.e. movies that starred "Harrison Ford". The filter
254254
# will be applied to the cast field of the json object.
255255
cast_extractor = Extractors.extract("cast")

0 commit comments

Comments
 (0)