@@ -49,7 +49,8 @@ impl FromStr for Pipeline {
4949
5050#[ derive( Clone , PartialEq ) ]
5151pub struct ArrowSpaceBuilder {
52- pub ( crate ) n_items_original : usize ,
52+ pub ( crate ) nitems : usize ,
53+ pub ( crate ) nfeatures : usize ,
5354 pub prebuilt_spectral : bool , // true if spectral laplacian has been computed
5455
5556 // Lambda-graph parameters (the canonical path)
@@ -68,23 +69,18 @@ pub struct ArrowSpaceBuilder {
6869 // activate sampling, default false
6970 pub sampling : Option < SamplerType > ,
7071
71- // activate dimensionality reduction and projection matrix
72- // Projection data: dims reduction data (needed to prepare the query vector)
73- pub ( crate ) projection_matrix : Option < ImplicitProjection > , // F × r (if projection was used)
74- pub ( crate ) reduced_dim : Option < usize > , // r (reduced dimension, None if no projection)
75- pub ( crate ) extra_reduced_dim : bool , // optional extra dimensionality reduction for energymaps
76-
7772 // Synthetic index configuration (used `with_synthesis`)
7873 pub synthesis : TauMode , // (tau_mode)
7974
8075 /// Max clusters X (default: nfeatures; cap on centroids)
8176 pub ( crate ) cluster_max_clusters : Option < usize > ,
8277 /// Squared L2 threshold for new cluster creation (default 1.0)
8378 pub ( crate ) cluster_radius : f64 ,
79+ /// used for clustering and dimensionality reduction (if active)
8480 pub ( crate ) clustering_seed : Option < u64 > ,
8581 pub ( crate ) deterministic_clustering : bool ,
8682
87- // dimensionality reduction with random projection (dafault false)
83+ /// dimensionality reduction with random projection (dafault false)
8884 pub ( crate ) use_dims_reduction : bool ,
8985 pub ( crate ) extra_dims_reduction : bool ,
9086 pub ( crate ) rp_eps : f64 ,
@@ -97,7 +93,8 @@ impl Default for ArrowSpaceBuilder {
9793 fn default ( ) -> Self {
9894 debug ! ( "Creating ArrowSpaceBuilder with default parameters" ) ;
9995 Self {
100- n_items_original : 0 ,
96+ nitems : 0 ,
97+ nfeatures : 0 ,
10198 // arrows: ArrowSpace::default(),
10299 prebuilt_spectral : false ,
103100
@@ -114,9 +111,6 @@ impl Default for ArrowSpaceBuilder {
114111 sparsity_check : false ,
115112 // sampling default
116113 sampling : Some ( SamplerType :: Simple ( 0.6 ) ) ,
117- projection_matrix : None ,
118- reduced_dim : None ,
119- extra_reduced_dim : false ,
120114 // Clustering defaults
121115 cluster_max_clusters : None , // will be set to nfeatures at build time
122116 cluster_radius : 1.0 ,
@@ -211,13 +205,12 @@ impl ClusteringHeuristic for ArrowSpaceBuilder {
211205 "Applying JL projection: {} features → {} dimensions (ε={:.2})" ,
212206 n_features, target_dim, self . rp_eps
213207 ) ;
214- let implicit_proj = ImplicitProjection :: new ( n_features, target_dim) ;
208+ let implicit_proj =
209+ ImplicitProjection :: new ( n_features, target_dim, self . clustering_seed ) ;
215210 let projected = crate :: reduction:: project_matrix ( & clustered_dm, & implicit_proj) ;
216211
217212 aspace. projection_matrix = Some ( implicit_proj. clone ( ) ) ;
218213 aspace. reduced_dim = Some ( target_dim) ;
219- self . projection_matrix = Some ( implicit_proj) ;
220- self . reduced_dim = Some ( target_dim) ;
221214
222215 let compression = n_features as f64 / target_dim as f64 ;
223216 info ! (
@@ -337,13 +330,12 @@ impl ClusteringHeuristic for ArrowSpaceBuilder {
337330 "Applying JL projection: {} features → {} dimensions (ε={:.2})" ,
338331 n_features, target_dim, builder. rp_eps
339332 ) ;
340- let implicit_proj = ImplicitProjection :: new ( n_features, target_dim) ;
333+ let implicit_proj =
334+ ImplicitProjection :: new ( n_features, target_dim, builder. clustering_seed ) ;
341335 let projected = crate :: reduction:: project_matrix ( & clustered_dm, & implicit_proj) ;
342336
343337 aspace. projection_matrix = Some ( implicit_proj. clone ( ) ) ;
344338 aspace. reduced_dim = Some ( target_dim) ;
345- builder. projection_matrix = Some ( implicit_proj) ;
346- builder. reduced_dim = Some ( target_dim) ;
347339
348340 let compression = n_features as f64 / target_dim as f64 ;
349341 info ! (
@@ -394,9 +386,6 @@ impl ArrowSpaceBuilder {
394386 result. normalise = self . normalise ;
395387 result. sparsity_check = self . sparsity_check ;
396388 result. sampling = self . sampling . clone ( ) ;
397- result. projection_matrix = self . projection_matrix . clone ( ) ;
398- result. reduced_dim = self . reduced_dim ;
399- result. extra_reduced_dim = self . extra_reduced_dim ;
400389 result. use_dims_reduction = self . use_dims_reduction ;
401390 result. rp_eps = self . rp_eps ;
402391 result. persistence = self . persistence . clone ( ) ;
@@ -495,7 +484,7 @@ impl ArrowSpaceBuilder {
495484 /// Enable extra-dimensionality reduction after clustering (energymaps only, optional)
496485 pub fn with_extra_dims_reduction ( mut self , enable : bool ) -> Self {
497486 assert ! (
498- self . use_dims_reduction == true ,
487+ self . use_dims_reduction,
499488 "extra dims reduction needs base reduction"
500489 ) ;
501490 self . extra_dims_reduction = enable;
@@ -566,8 +555,9 @@ impl ArrowSpaceBuilder {
566555 /// with_synthesis was called, in which case synthetic lambdas are computed on that graph.
567556 pub fn build ( mut self , rows : Vec < Vec < f64 > > ) -> ( ArrowSpace , GraphLaplacian ) {
568557 let n_items = rows. len ( ) ;
569- self . n_items_original = n_items;
558+ self . nitems = n_items;
570559 let n_features = rows. first ( ) . map ( |r| r. len ( ) ) . unwrap_or ( 0 ) ;
560+ self . nfeatures = n_features;
571561 let start = std:: time:: Instant :: now ( ) ;
572562
573563 // set baseline for topk
@@ -791,8 +781,9 @@ impl ArrowSpaceBuilder {
791781 energy_params : Option < EnergyParams > ,
792782 ) -> ( ArrowSpace , GraphLaplacian ) {
793783 let n_items = rows. shape ( ) . 0 ;
794- self . n_items_original = n_items;
784+ self . nitems = n_items;
795785 let n_features = rows. shape ( ) . 1 ;
786+ self . nfeatures = n_features;
796787 let start = std:: time:: Instant :: now ( ) ;
797788
798789 // set baseline for topk
@@ -870,19 +861,20 @@ impl ArrowSpaceBuilder {
870861 }
871862 Pipeline :: Energy | Pipeline :: Default => {
872863 assert ! (
873- self . use_dims_reduction == true ,
864+ self . use_dims_reduction,
874865 "When using energy pipeline, dim reduction is needed"
875866 ) ;
876867 assert ! (
877868 energy_params. is_some( ) ,
878869 "if using energy pipeline, energy_params should be some"
879870 ) ;
880- if self . prebuilt_spectral == true {
871+ if self . prebuilt_spectral {
881872 panic ! (
882873 "Spectral mode not compatible with energy pipeline, please do not enable for energy search"
883874 ) ;
884875 }
885- self . n_items_original = rows. shape ( ) . 0 ;
876+ self . nitems = rows. shape ( ) . 0 ;
877+ self . nfeatures = rows. shape ( ) . 1 ;
886878
887879 // ============================================================
888880 // Stage 1: Clustering with sampling and optional projection
@@ -928,7 +920,7 @@ impl ArrowSpaceBuilder {
928920 let sub_centroids: DenseMatrix < f64 > = ArrowSpace :: diffuse_and_split_subcentroids (
929921 & centroids,
930922 & l0,
931- & energy_params. as_ref ( ) . unwrap ( ) ,
923+ energy_params. as_ref ( ) . unwrap ( ) ,
932924 ) ;
933925
934926 assert_eq ! ( sub_centroids. shape( ) . 1 , centroids. shape( ) . 1 ) ;
@@ -953,7 +945,11 @@ impl ArrowSpaceBuilder {
953945 current_features, target_dim, self . rp_eps
954946 ) ;
955947
956- let implicit_proj = ImplicitProjection :: new ( current_features, target_dim) ;
948+ let implicit_proj = ImplicitProjection :: new (
949+ current_features,
950+ target_dim,
951+ self . clustering_seed ,
952+ ) ;
957953 let projected = project_matrix ( & sub_centroids, & implicit_proj) ;
958954
959955 info ! (
@@ -982,7 +978,7 @@ impl ArrowSpaceBuilder {
982978
983979 // Step 6: Build Laplacian on sub_centroids using energy dispersion
984980 let ( gl_energy, _, _) =
985- self . build_energy_laplacian ( & sub_centroids, & energy_params. as_ref ( ) . unwrap ( ) ) ;
981+ self . build_energy_laplacian ( & sub_centroids, energy_params. as_ref ( ) . unwrap ( ) ) ;
986982
987983 assert_eq ! (
988984 gl_energy. shape( ) . 1 ,
@@ -1002,7 +998,7 @@ impl ArrowSpaceBuilder {
1002998 ArrowSpace :: subcentroids_from_dense_matrix ( sub_centroids. clone ( ) ) ;
1003999 subcentroid_space. taumode = aspace. taumode ;
10041000 subcentroid_space. projection_matrix = aspace. projection_matrix . clone ( ) ;
1005- subcentroid_space. reduced_dim = aspace. reduced_dim . clone ( ) ;
1001+ subcentroid_space. reduced_dim = aspace. reduced_dim ;
10061002 // safeguard to clear signals
10071003 subcentroid_space. signals = sprs:: CsMat :: empty ( sprs:: CSR , 0 ) ;
10081004
@@ -1218,7 +1214,7 @@ impl fmt::Display for ArrowSpaceBuilder {
12181214 /// // Parse back to HashMap
12191215 /// let config_map: HashMap<String, String> = parse_builder_config(&config_string);
12201216 /// ```
1221- fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
1217+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
12221218 write ! (
12231219 f,
12241220 "prebuilt_spectral={}, \
@@ -1352,6 +1348,9 @@ impl ArrowSpaceBuilder {
13521348 pub fn builder_config_typed ( & self ) -> HashMap < String , ConfigValue > {
13531349 let mut config = HashMap :: new ( ) ;
13541350
1351+ config. insert ( "nitems" . to_string ( ) , ConfigValue :: Usize ( self . nitems ) ) ;
1352+ config. insert ( "nfeatures" . to_string ( ) , ConfigValue :: Usize ( self . nfeatures ) ) ;
1353+
13551354 config. insert (
13561355 "prebuilt_spectral" . to_string ( ) ,
13571356 ConfigValue :: Bool ( self . prebuilt_spectral ) ,
@@ -1374,49 +1373,13 @@ impl ArrowSpaceBuilder {
13741373 ) ;
13751374 config. insert (
13761375 "synthesis" . to_string ( ) ,
1377- ConfigValue :: TauMode ( self . synthesis . clone ( ) ) ,
1376+ ConfigValue :: TauMode ( self . synthesis ) ,
13781377 ) ;
13791378 config. insert (
13801379 "sampling" . to_string ( ) ,
13811380 ConfigValue :: OptionSamplerType ( self . sampling . clone ( ) ) ,
13821381 ) ;
13831382
1384- // projection matrix
1385- if self . projection_matrix . is_some ( ) {
1386- config. insert (
1387- "pj_mtx_original_dim" . to_string ( ) ,
1388- ConfigValue :: OptionUsize ( Some (
1389- self . projection_matrix . as_ref ( ) . unwrap ( ) . original_dim ,
1390- ) ) ,
1391- ) ;
1392- config. insert (
1393- "pj_mtx_reduced_dim" . to_string ( ) ,
1394- ConfigValue :: OptionUsize ( Some (
1395- self . projection_matrix . as_ref ( ) . unwrap ( ) . reduced_dim ,
1396- ) ) ,
1397- ) ;
1398- config. insert (
1399- "pj_mtx_seed" . to_string ( ) ,
1400- ConfigValue :: OptionU64 ( Some ( self . projection_matrix . as_ref ( ) . unwrap ( ) . seed ) ) ,
1401- ) ;
1402-
1403- config. insert (
1404- "extra_reduced_dim" . to_string ( ) ,
1405- ConfigValue :: Bool ( self . extra_dims_reduction ) ,
1406- ) ;
1407- } else {
1408- config. insert (
1409- "pj_mtx_original_dim" . to_string ( ) ,
1410- ConfigValue :: OptionUsize ( None ) ,
1411- ) ;
1412- config. insert (
1413- "pj_mtx_reduced_dim" . to_string ( ) ,
1414- ConfigValue :: OptionUsize ( None ) ,
1415- ) ;
1416- config. insert ( "pj_mtx_seed" . to_string ( ) , ConfigValue :: OptionU64 ( None ) ) ;
1417- config. insert ( "extra_reduced_dim" . to_string ( ) , ConfigValue :: Bool ( false ) ) ;
1418- }
1419-
14201383 config. insert (
14211384 "cluster_max_clusters" . to_string ( ) ,
14221385 ConfigValue :: OptionUsize ( self . cluster_max_clusters ) ,
@@ -1444,7 +1407,7 @@ impl ArrowSpaceBuilder {
14441407}
14451408
14461409impl fmt:: Display for ConfigValue {
1447- fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
1410+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
14481411 match self {
14491412 // Primitive types
14501413 ConfigValue :: Bool ( v) => write ! ( f, "{}" , v) ,
0 commit comments