Skip to content

Commit 83af932

Browse files
authored
v0.24.3 Additions to support building (#55)
* Additions to support building * Add arrospace_conf_typed. Fix projected search * fix tests * fix some clippoo * modernise CI
1 parent bbf72cb commit 83af932

27 files changed

+618
-492
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,15 @@ jobs:
2828
~/.cargo
2929
./target
3030
key: ${{ runner.os }}-cargo-${{ matrix.platform.target }}-${{ hashFiles('**/Cargo.toml') }}
31-
restore-keys: ${{ runner.os }}-cargo-${{ matrix.platform.target }}-${{ hashFiles('**/Cargo.toml') }}
31+
restore-keys: |
32+
${{ runner.os }}-cargo-${{ matrix.platform.target }}-
3233
- name: Install Rust toolchain
33-
uses: actions-rs/toolchain@v1
34+
uses: dtolnay/rust-toolchain@stable
3435
with:
35-
toolchain: stable
36-
target: ${{ matrix.platform.target }}
37-
profile: minimal
38-
default: true
39-
- name: Install test runner for wasm
40-
if: matrix.platform.target == 'wasm32-unknown-unknown'
41-
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
36+
targets: ${{ matrix.platform.target }}
4237
- name: Stable Build with all features
43-
uses: actions-rs/cargo@v1
44-
with:
45-
command: build
46-
args: --all-features --target ${{ matrix.platform.target }}
38+
run: cargo build --all-features --target ${{ matrix.platform.target }}
4739
- name: Stable Build without features
48-
uses: actions-rs/cargo@v1
49-
with:
50-
command: build
51-
args: --target ${{ matrix.platform.target }}
40+
run: cargo build --target ${{ matrix.platform.target }}
5241
- name: Tests
53-
uses: actions-rs/cargo@v1
54-
with:
55-
command: test
56-
args: --all-features
42+
run: cargo test --all-features

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "arrowspace"
3-
version = "0.24.2"
3+
version = "0.24.3"
44
edition = "2024"
55
description = "Spectral vector search with taumode (λτ) indexing"
66
authors = ["Lorenzo <[email protected]>"]

src/builder.rs

Lines changed: 33 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ impl FromStr for Pipeline {
4949

5050
#[derive(Clone, PartialEq)]
5151
pub struct ArrowSpaceBuilder {
52-
pub(crate) n_items_original: usize,
52+
pub(crate) nitems: usize,
53+
pub(crate) nfeatures: usize,
5354
pub prebuilt_spectral: bool, // true if spectral laplacian has been computed
5455

5556
// Lambda-graph parameters (the canonical path)
@@ -68,23 +69,18 @@ pub struct ArrowSpaceBuilder {
6869
// activate sampling, default false
6970
pub sampling: Option<SamplerType>,
7071

71-
// activate dimensionality reduction and projection matrix
72-
// Projection data: dims reduction data (needed to prepare the query vector)
73-
pub(crate) projection_matrix: Option<ImplicitProjection>, // F × r (if projection was used)
74-
pub(crate) reduced_dim: Option<usize>, // r (reduced dimension, None if no projection)
75-
pub(crate) extra_reduced_dim: bool, // optional extra dimensionality reduction for energymaps
76-
7772
// Synthetic index configuration (used `with_synthesis`)
7873
pub synthesis: TauMode, // (tau_mode)
7974

8075
/// Max clusters X (default: nfeatures; cap on centroids)
8176
pub(crate) cluster_max_clusters: Option<usize>,
8277
/// Squared L2 threshold for new cluster creation (default 1.0)
8378
pub(crate) cluster_radius: f64,
79+
/// used for clustering and dimensionality reduction (if active)
8480
pub(crate) clustering_seed: Option<u64>,
8581
pub(crate) deterministic_clustering: bool,
8682

87-
// dimensionality reduction with random projection (dafault false)
83+
/// dimensionality reduction with random projection (dafault false)
8884
pub(crate) use_dims_reduction: bool,
8985
pub(crate) extra_dims_reduction: bool,
9086
pub(crate) rp_eps: f64,
@@ -97,7 +93,8 @@ impl Default for ArrowSpaceBuilder {
9793
fn default() -> Self {
9894
debug!("Creating ArrowSpaceBuilder with default parameters");
9995
Self {
100-
n_items_original: 0,
96+
nitems: 0,
97+
nfeatures: 0,
10198
// arrows: ArrowSpace::default(),
10299
prebuilt_spectral: false,
103100

@@ -114,9 +111,6 @@ impl Default for ArrowSpaceBuilder {
114111
sparsity_check: false,
115112
// sampling default
116113
sampling: Some(SamplerType::Simple(0.6)),
117-
projection_matrix: None,
118-
reduced_dim: None,
119-
extra_reduced_dim: false,
120114
// Clustering defaults
121115
cluster_max_clusters: None, // will be set to nfeatures at build time
122116
cluster_radius: 1.0,
@@ -211,13 +205,12 @@ impl ClusteringHeuristic for ArrowSpaceBuilder {
211205
"Applying JL projection: {} features → {} dimensions (ε={:.2})",
212206
n_features, target_dim, self.rp_eps
213207
);
214-
let implicit_proj = ImplicitProjection::new(n_features, target_dim);
208+
let implicit_proj =
209+
ImplicitProjection::new(n_features, target_dim, self.clustering_seed);
215210
let projected = crate::reduction::project_matrix(&clustered_dm, &implicit_proj);
216211

217212
aspace.projection_matrix = Some(implicit_proj.clone());
218213
aspace.reduced_dim = Some(target_dim);
219-
self.projection_matrix = Some(implicit_proj);
220-
self.reduced_dim = Some(target_dim);
221214

222215
let compression = n_features as f64 / target_dim as f64;
223216
info!(
@@ -337,13 +330,12 @@ impl ClusteringHeuristic for ArrowSpaceBuilder {
337330
"Applying JL projection: {} features → {} dimensions (ε={:.2})",
338331
n_features, target_dim, builder.rp_eps
339332
);
340-
let implicit_proj = ImplicitProjection::new(n_features, target_dim);
333+
let implicit_proj =
334+
ImplicitProjection::new(n_features, target_dim, builder.clustering_seed);
341335
let projected = crate::reduction::project_matrix(&clustered_dm, &implicit_proj);
342336

343337
aspace.projection_matrix = Some(implicit_proj.clone());
344338
aspace.reduced_dim = Some(target_dim);
345-
builder.projection_matrix = Some(implicit_proj);
346-
builder.reduced_dim = Some(target_dim);
347339

348340
let compression = n_features as f64 / target_dim as f64;
349341
info!(
@@ -394,9 +386,6 @@ impl ArrowSpaceBuilder {
394386
result.normalise = self.normalise;
395387
result.sparsity_check = self.sparsity_check;
396388
result.sampling = self.sampling.clone();
397-
result.projection_matrix = self.projection_matrix.clone();
398-
result.reduced_dim = self.reduced_dim;
399-
result.extra_reduced_dim = self.extra_reduced_dim;
400389
result.use_dims_reduction = self.use_dims_reduction;
401390
result.rp_eps = self.rp_eps;
402391
result.persistence = self.persistence.clone();
@@ -495,7 +484,7 @@ impl ArrowSpaceBuilder {
495484
/// Enable extra-dimensionality reduction after clustering (energymaps only, optional)
496485
pub fn with_extra_dims_reduction(mut self, enable: bool) -> Self {
497486
assert!(
498-
self.use_dims_reduction == true,
487+
self.use_dims_reduction,
499488
"extra dims reduction needs base reduction"
500489
);
501490
self.extra_dims_reduction = enable;
@@ -566,8 +555,9 @@ impl ArrowSpaceBuilder {
566555
/// with_synthesis was called, in which case synthetic lambdas are computed on that graph.
567556
pub fn build(mut self, rows: Vec<Vec<f64>>) -> (ArrowSpace, GraphLaplacian) {
568557
let n_items = rows.len();
569-
self.n_items_original = n_items;
558+
self.nitems = n_items;
570559
let n_features = rows.first().map(|r| r.len()).unwrap_or(0);
560+
self.nfeatures = n_features;
571561
let start = std::time::Instant::now();
572562

573563
// set baseline for topk
@@ -791,8 +781,9 @@ impl ArrowSpaceBuilder {
791781
energy_params: Option<EnergyParams>,
792782
) -> (ArrowSpace, GraphLaplacian) {
793783
let n_items = rows.shape().0;
794-
self.n_items_original = n_items;
784+
self.nitems = n_items;
795785
let n_features = rows.shape().1;
786+
self.nfeatures = n_features;
796787
let start = std::time::Instant::now();
797788

798789
// set baseline for topk
@@ -870,19 +861,20 @@ impl ArrowSpaceBuilder {
870861
}
871862
Pipeline::Energy | Pipeline::Default => {
872863
assert!(
873-
self.use_dims_reduction == true,
864+
self.use_dims_reduction,
874865
"When using energy pipeline, dim reduction is needed"
875866
);
876867
assert!(
877868
energy_params.is_some(),
878869
"if using energy pipeline, energy_params should be some"
879870
);
880-
if self.prebuilt_spectral == true {
871+
if self.prebuilt_spectral {
881872
panic!(
882873
"Spectral mode not compatible with energy pipeline, please do not enable for energy search"
883874
);
884875
}
885-
self.n_items_original = rows.shape().0;
876+
self.nitems = rows.shape().0;
877+
self.nfeatures = rows.shape().1;
886878

887879
// ============================================================
888880
// Stage 1: Clustering with sampling and optional projection
@@ -928,7 +920,7 @@ impl ArrowSpaceBuilder {
928920
let sub_centroids: DenseMatrix<f64> = ArrowSpace::diffuse_and_split_subcentroids(
929921
&centroids,
930922
&l0,
931-
&energy_params.as_ref().unwrap(),
923+
energy_params.as_ref().unwrap(),
932924
);
933925

934926
assert_eq!(sub_centroids.shape().1, centroids.shape().1);
@@ -953,7 +945,11 @@ impl ArrowSpaceBuilder {
953945
current_features, target_dim, self.rp_eps
954946
);
955947

956-
let implicit_proj = ImplicitProjection::new(current_features, target_dim);
948+
let implicit_proj = ImplicitProjection::new(
949+
current_features,
950+
target_dim,
951+
self.clustering_seed,
952+
);
957953
let projected = project_matrix(&sub_centroids, &implicit_proj);
958954

959955
info!(
@@ -982,7 +978,7 @@ impl ArrowSpaceBuilder {
982978

983979
// Step 6: Build Laplacian on sub_centroids using energy dispersion
984980
let (gl_energy, _, _) =
985-
self.build_energy_laplacian(&sub_centroids, &energy_params.as_ref().unwrap());
981+
self.build_energy_laplacian(&sub_centroids, energy_params.as_ref().unwrap());
986982

987983
assert_eq!(
988984
gl_energy.shape().1,
@@ -1002,7 +998,7 @@ impl ArrowSpaceBuilder {
1002998
ArrowSpace::subcentroids_from_dense_matrix(sub_centroids.clone());
1003999
subcentroid_space.taumode = aspace.taumode;
10041000
subcentroid_space.projection_matrix = aspace.projection_matrix.clone();
1005-
subcentroid_space.reduced_dim = aspace.reduced_dim.clone();
1001+
subcentroid_space.reduced_dim = aspace.reduced_dim;
10061002
// safeguard to clear signals
10071003
subcentroid_space.signals = sprs::CsMat::empty(sprs::CSR, 0);
10081004

@@ -1218,7 +1214,7 @@ impl fmt::Display for ArrowSpaceBuilder {
12181214
/// // Parse back to HashMap
12191215
/// let config_map: HashMap<String, String> = parse_builder_config(&config_string);
12201216
/// ```
1221-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1217+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
12221218
write!(
12231219
f,
12241220
"prebuilt_spectral={}, \
@@ -1352,6 +1348,9 @@ impl ArrowSpaceBuilder {
13521348
pub fn builder_config_typed(&self) -> HashMap<String, ConfigValue> {
13531349
let mut config = HashMap::new();
13541350

1351+
config.insert("nitems".to_string(), ConfigValue::Usize(self.nitems));
1352+
config.insert("nfeatures".to_string(), ConfigValue::Usize(self.nfeatures));
1353+
13551354
config.insert(
13561355
"prebuilt_spectral".to_string(),
13571356
ConfigValue::Bool(self.prebuilt_spectral),
@@ -1374,49 +1373,13 @@ impl ArrowSpaceBuilder {
13741373
);
13751374
config.insert(
13761375
"synthesis".to_string(),
1377-
ConfigValue::TauMode(self.synthesis.clone()),
1376+
ConfigValue::TauMode(self.synthesis),
13781377
);
13791378
config.insert(
13801379
"sampling".to_string(),
13811380
ConfigValue::OptionSamplerType(self.sampling.clone()),
13821381
);
13831382

1384-
// projection matrix
1385-
if self.projection_matrix.is_some() {
1386-
config.insert(
1387-
"pj_mtx_original_dim".to_string(),
1388-
ConfigValue::OptionUsize(Some(
1389-
self.projection_matrix.as_ref().unwrap().original_dim,
1390-
)),
1391-
);
1392-
config.insert(
1393-
"pj_mtx_reduced_dim".to_string(),
1394-
ConfigValue::OptionUsize(Some(
1395-
self.projection_matrix.as_ref().unwrap().reduced_dim,
1396-
)),
1397-
);
1398-
config.insert(
1399-
"pj_mtx_seed".to_string(),
1400-
ConfigValue::OptionU64(Some(self.projection_matrix.as_ref().unwrap().seed)),
1401-
);
1402-
1403-
config.insert(
1404-
"extra_reduced_dim".to_string(),
1405-
ConfigValue::Bool(self.extra_dims_reduction),
1406-
);
1407-
} else {
1408-
config.insert(
1409-
"pj_mtx_original_dim".to_string(),
1410-
ConfigValue::OptionUsize(None),
1411-
);
1412-
config.insert(
1413-
"pj_mtx_reduced_dim".to_string(),
1414-
ConfigValue::OptionUsize(None),
1415-
);
1416-
config.insert("pj_mtx_seed".to_string(), ConfigValue::OptionU64(None));
1417-
config.insert("extra_reduced_dim".to_string(), ConfigValue::Bool(false));
1418-
}
1419-
14201383
config.insert(
14211384
"cluster_max_clusters".to_string(),
14221385
ConfigValue::OptionUsize(self.cluster_max_clusters),
@@ -1444,7 +1407,7 @@ impl ArrowSpaceBuilder {
14441407
}
14451408

14461409
impl fmt::Display for ConfigValue {
1447-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1410+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
14481411
match self {
14491412
// Primitive types
14501413
ConfigValue::Bool(v) => write!(f, "{}", v),

src/clustering.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//! Calinski-Harabasz analysis, and adaptive thresholding
1010
//! - Helper methods for distance computations and pilot k-means
1111
//! - Parallel implementations for performance-critical operations
12-
//! Incremental clustering with optimal K heuristics for `ArrowSpace`.
12+
//! Incremental clustering with optimal K heuristics for `ArrowSpace`.
1313
//!
1414
//! **DETERMINISTIC**: All random operations use fixed seed 128 for reproducibility.
1515

0 commit comments

Comments
 (0)