=================================== FAILURES ===================================
__________________________ test_cagra_vpq_compression __________________________
def test_cagra_vpq_compression():
dim = 64
pq_len = 2
> run_cagra_build_search_test(
n_cols=dim, compression=cagra.CompressionParams(pq_dim=dim / pq_len)
)
tests/test_cagra.py:240:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
n_rows = 10000, n_cols = 64, n_queries = 100, k = 10
dtype = <class 'numpy.float32'>, metric = 'sqeuclidean'
intermediate_graph_degree = 128, graph_degree = 64, build_algo = 'ivf_pq'
array_type = 'device', compare = True, inplace = True, test_extend = False
search_params = SearchParams(type=CAGRA, max_queries=0, itopk_size=64, max_iterations=0, algo=100, team_size=0, search_width=1, min_it...ze=0, hashmap_mode=100, hashmap_min_bitlen=0, hashmap_max_fill_rate=0.5, num_random_samplings=1, rand_xor_mask=1213332)
compression = <cuvs.neighbors.cagra.cagra.CompressionParams object at 0xe9586c9654b0>
serialize = False
def run_cagra_build_search_test(
n_rows=10000,
n_cols=10,
n_queries=100,
k=10,
dtype=np.float32,
metric="sqeuclidean",
intermediate_graph_degree=128,
graph_degree=64,
build_algo="ivf_pq",
array_type="device",
compare=True,
inplace=True,
test_extend=False,
search_params={},
compression=None,
serialize=False,
):
dataset = generate_data((n_rows, n_cols), dtype)
if metric == "inner_product" or metric == "cosine":
if dtype in [np.int8, np.uint8]:
pytest.skip("skip normalization for int8/uint8 data")
dataset = normalize(dataset, norm="l2", axis=1)
dataset_device = device_ndarray(dataset)
build_params = cagra.IndexParams(
metric=metric,
intermediate_graph_degree=intermediate_graph_degree,
graph_degree=graph_degree,
build_algo=build_algo,
compression=compression,
)
if test_extend:
dataset_1 = dataset[: n_rows // 2, :]
dataset_2 = dataset[n_rows // 2 :, :]
extend_params = cagra.ExtendParams()
if array_type == "device":
dataset_1_device = device_ndarray(dataset_1)
dataset_2_device = device_ndarray(dataset_2)
index = cagra.build(build_params, dataset_1_device)
index = cagra.extend(extend_params, index, dataset_2_device)
else:
index = cagra.build(build_params, dataset_1)
index = cagra.extend(index, dataset_2)
else:
if array_type == "device":
index = cagra.build(build_params, dataset_device)
else:
index = cagra.build(build_params, dataset)
if serialize:
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
temp_filename = f.name
cagra.save(temp_filename, index)
index = cagra.load(temp_filename)
queries = generate_data((n_queries, n_cols), dtype)
out_idx = np.zeros((n_queries, k), dtype=np.uint32)
out_dist = np.zeros((n_queries, k), dtype=np.float32)
queries_device = device_ndarray(queries)
out_idx_device = device_ndarray(out_idx) if inplace else None
out_dist_device = device_ndarray(out_dist) if inplace else None
search_params = cagra.SearchParams(**search_params)
ret_output = cagra.search(
search_params,
index,
queries_device,
k,
neighbors=out_idx_device,
distances=out_dist_device,
)
if not inplace:
out_dist_device, out_idx_device = ret_output
if not compare:
return
out_idx = out_idx_device.copy_to_host()
out_dist = out_dist_device.copy_to_host()
# Calculate reference values with sklearn
skl_metric = {
"sqeuclidean": "sqeuclidean",
"inner_product": "cosine",
"euclidean": "euclidean",
"cosine": "cosine",
}[metric]
nn_skl = NearestNeighbors(
n_neighbors=k, algorithm="brute", metric=skl_metric
)
nn_skl.fit(dataset)
skl_idx = nn_skl.kneighbors(queries, return_distance=False)
recall = calc_recall(out_idx, skl_idx)
> assert recall > 0.7
E assert 0.001 > 0.7
tests/test_cagra.py:122: AssertionError
-------- generated xml file: /__w/cuvs/cuvs/test-results/junit-cuvs.xml --------
================================ tests coverage ================================
_______________ coverage: platform linux, python 3.12.13-final-0 _______________
PR #1965 acts as a workaround to fix CI failures. The reason is transitive dependence on openblas (through sklearn) and we are seeing problems with > 0.3.30. We shouldn't plan to keep this ceiling for too long though because ceilings like this will make it hard for all-of-RAPIDS environments like the devcontainers to solve as time passes and we upgrade versions of other things.
Here is the log of the failing test that was fixed by the version ceiling: