Skip to content

Commit c605611

Browse files
committed
COH-32065 - Use numpy package to improve performance of Vector.normalize() api
1 parent 58309f9 commit c605611

File tree

3 files changed

+12
-22
lines changed

3 files changed

+12
-22
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ pymitter = ">=0.4,<1.1"
3030
typing-extensions = ">=4.11,<4.14"
3131
types-protobuf = "5.29.1.20250403"
3232
pympler = "1.1"
33+
numpy = "2.0.2"
3334

3435
[tool.poetry.dev-dependencies]
3536
pytest = "~8.3"

src/coherence/ai.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
# Copyright (c) 2022, 2024, Oracle and/or its affiliates.
1+
# Copyright (c) 2022, 2025, Oracle and/or its affiliates.
22
# Licensed under the Universal Permissive License v 1.0 as shown at
33
# https://oss.oracle.com/licenses/upl.
44

55
from __future__ import annotations
66

77
import base64
8-
import math
98
from abc import ABC
109
from collections import OrderedDict
1110
from typing import Any, Dict, List, Optional, TypeVar, Union, cast
11+
import numpy as np
1212

1313
import jsonpickle
1414

@@ -342,19 +342,8 @@ class Vectors:
342342
EPSILON = 1e-30 # Python automatically handles float precision
343343

344344
@staticmethod
345-
def normalize(array: List[float]) -> List[float]:
346-
norm = 0.0
347-
c_dim = len(array)
348-
349-
# Calculate the norm (sum of squares)
350-
for v in array:
351-
norm += v * v
352-
353-
# Compute the normalization factor (inverse of the square root of the sum of squares)
354-
norm = 1.0 / (math.sqrt(norm) + Vectors.EPSILON)
355-
356-
# Apply the normalization factor to each element in the array
357-
for i in range(c_dim):
358-
array[i] = array[i] * norm
359-
360-
return array
345+
def normalize_numpy(array: list[float]) -> list[float]:
346+
np_array = np.array(array, dtype=np.float64)
347+
norm = np.linalg.norm(np_array) + Vectors.EPSILON
348+
normalized_array = np_array / norm
349+
return normalized_array.tolist()

tests/e2e/test_ai.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ async def populate_vectors(vectors: NamedCache[int, ValueWithVector]) -> ValueWi
5454

5555
# Assign normalized vectors to the first 5 entries
5656
for i in range(5):
57-
values[i] = ValueWithVector(FloatVector(Vectors.normalize(matches[i])), str(i), i)
57+
values[i] = ValueWithVector(FloatVector(Vectors.normalize_numpy(matches[i])), str(i), i)
5858
await vectors.put(i, values[i])
5959

6060
# Fill the remaining values with random vectors
6161
for i in range(5, count):
62-
values[i] = ValueWithVector(FloatVector(Vectors.normalize(random_floats(DIMENSIONS))), str(i), i)
62+
values[i] = ValueWithVector(FloatVector(Vectors.normalize_numpy(random_floats(DIMENSIONS))), str(i), i)
6363
await vectors.put(i, values[i])
6464

6565
return cast(ValueWithVector, values[0])
@@ -79,13 +79,13 @@ async def populate_document_chunk_vectors(vectors: NamedCache[int, DocumentChunk
7979

8080
# Assign normalized vectors to the first 5 entries
8181
for i in range(5):
82-
values[i] = DocumentChunk(str(i), metadata=None, vector=FloatVector(Vectors.normalize(matches[i])))
82+
values[i] = DocumentChunk(str(i), metadata=None, vector=FloatVector(Vectors.normalize_numpy(matches[i])))
8383
await vectors.put(i, values[i])
8484

8585
# Fill the remaining values with random vectors
8686
for i in range(5, count):
8787
values[i] = DocumentChunk(
88-
str(i), metadata=None, vector=FloatVector(Vectors.normalize(random_floats(DIMENSIONS)))
88+
str(i), metadata=None, vector=FloatVector(Vectors.normalize_numpy(random_floats(DIMENSIONS)))
8989
)
9090
await vectors.put(i, values[i])
9191

0 commit comments

Comments
 (0)