[Tracker] Version Ceiling for Openblas dependency

PR https://github.com/rapidsai/cuvs/pull/1965 acts as a workaround to fix CI failures. The reason is transitive dependence on openblas (through sklearn) and we are seeing problems with > 0.3.30. We shouldn't plan to keep this ceiling for too long though because ceilings like this will make it hard for all-of-RAPIDS environments like the devcontainers to solve as time passes and we upgrade versions of other things.

Here is the log of the failing test that was fixed by the version ceiling:
```
=================================== FAILURES ===================================
__________________________ test_cagra_vpq_compression __________________________

    def test_cagra_vpq_compression():
        dim = 64
        pq_len = 2
>       run_cagra_build_search_test(
            n_cols=dim, compression=cagra.CompressionParams(pq_dim=dim / pq_len)
        )

tests/test_cagra.py:240: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

n_rows = 10000, n_cols = 64, n_queries = 100, k = 10
dtype = <class 'numpy.float32'>, metric = 'sqeuclidean'
intermediate_graph_degree = 128, graph_degree = 64, build_algo = 'ivf_pq'
array_type = 'device', compare = True, inplace = True, test_extend = False
search_params = SearchParams(type=CAGRA, max_queries=0, itopk_size=64, max_iterations=0, algo=100, team_size=0, search_width=1, min_it...ze=0, hashmap_mode=100, hashmap_min_bitlen=0, hashmap_max_fill_rate=0.5, num_random_samplings=1, rand_xor_mask=1213332)
compression = <cuvs.neighbors.cagra.cagra.CompressionParams object at 0xe9586c9654b0>
serialize = False

    def run_cagra_build_search_test(
        n_rows=10000,
        n_cols=10,
        n_queries=100,
        k=10,
        dtype=np.float32,
        metric="sqeuclidean",
        intermediate_graph_degree=128,
        graph_degree=64,
        build_algo="ivf_pq",
        array_type="device",
        compare=True,
        inplace=True,
        test_extend=False,
        search_params={},
        compression=None,
        serialize=False,
    ):
        dataset = generate_data((n_rows, n_cols), dtype)
        if metric == "inner_product" or metric == "cosine":
            if dtype in [np.int8, np.uint8]:
                pytest.skip("skip normalization for int8/uint8 data")
            dataset = normalize(dataset, norm="l2", axis=1)
        dataset_device = device_ndarray(dataset)
    
        build_params = cagra.IndexParams(
            metric=metric,
            intermediate_graph_degree=intermediate_graph_degree,
            graph_degree=graph_degree,
            build_algo=build_algo,
            compression=compression,
        )
    
        if test_extend:
            dataset_1 = dataset[: n_rows // 2, :]
            dataset_2 = dataset[n_rows // 2 :, :]
            extend_params = cagra.ExtendParams()
            if array_type == "device":
                dataset_1_device = device_ndarray(dataset_1)
                dataset_2_device = device_ndarray(dataset_2)
    
                index = cagra.build(build_params, dataset_1_device)
                index = cagra.extend(extend_params, index, dataset_2_device)
            else:
                index = cagra.build(build_params, dataset_1)
                index = cagra.extend(index, dataset_2)
        else:
            if array_type == "device":
                index = cagra.build(build_params, dataset_device)
            else:
                index = cagra.build(build_params, dataset)
    
        if serialize:
            with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
                temp_filename = f.name
            cagra.save(temp_filename, index)
            index = cagra.load(temp_filename)
    
        queries = generate_data((n_queries, n_cols), dtype)
        out_idx = np.zeros((n_queries, k), dtype=np.uint32)
        out_dist = np.zeros((n_queries, k), dtype=np.float32)
    
        queries_device = device_ndarray(queries)
        out_idx_device = device_ndarray(out_idx) if inplace else None
        out_dist_device = device_ndarray(out_dist) if inplace else None
    
        search_params = cagra.SearchParams(**search_params)
    
        ret_output = cagra.search(
            search_params,
            index,
            queries_device,
            k,
            neighbors=out_idx_device,
            distances=out_dist_device,
        )
    
        if not inplace:
            out_dist_device, out_idx_device = ret_output
    
        if not compare:
            return
    
        out_idx = out_idx_device.copy_to_host()
        out_dist = out_dist_device.copy_to_host()
    
        # Calculate reference values with sklearn
        skl_metric = {
            "sqeuclidean": "sqeuclidean",
            "inner_product": "cosine",
            "euclidean": "euclidean",
            "cosine": "cosine",
        }[metric]
        nn_skl = NearestNeighbors(
            n_neighbors=k, algorithm="brute", metric=skl_metric
        )
        nn_skl.fit(dataset)
        skl_idx = nn_skl.kneighbors(queries, return_distance=False)
    
        recall = calc_recall(out_idx, skl_idx)
>       assert recall > 0.7
E       assert 0.001 > 0.7

tests/test_cagra.py:122: AssertionError
-------- generated xml file: /__w/cuvs/cuvs/test-results/junit-cuvs.xml --------
================================ tests coverage ================================
_______________ coverage: platform linux, python 3.12.13-final-0 _______________
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Tracker] Version Ceiling for Openblas dependency #1967

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[Tracker] Version Ceiling for Openblas dependency #1967

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions