Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"search_directories": [
"test_data/disk_index_search"
],
"jobs": [
{
"type": "async-index-build",
"content": {
"source": {
"index-source": "Build",
"data_type": "float32",
"data": "disk_index_siftsmall_learn_256pts_data.fbin",
"distance": "squared_l2",
"max_degree": 32,
"l_build": 50,
"alpha": 1.2,
"backedge_ratio": 1.0,
"num_threads": 1,
"num_start_points": 1,
"num_insert_attempts": 1,
"saturate_inserts": false,
"start_point_strategy": "medoid"
},
"search_phase": {
"search-type": "topk-high-selectivity-multihop-filter",
"queries": "disk_index_sample_query_10pts.fbin",
"groundtruth": "gt_small_filter.bin",
"query_predicates": "query.10.label.jsonl",
"data_labels": "data.256.label.jsonl",
"reps": 5,
"num_threads": [
1
],
"runs": [
{
"search_n": 20,
"search_l": [
20,
30,
40,
50,
100,
200
],
"recall_k": 10
}
]
}
}
}
]
}
38 changes: 38 additions & 0 deletions diskann-benchmark/src/backend/index/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,44 @@ where
.collect(),
)?;

let search_results = search::knn::run(&multihop, &groundtruth, steps)?;
result.append(AggregatedSearchResults::Topk(search_results));
Ok(result)
}
SearchPhase::TopkHighSelectivityMultihopFilter(search_phase) => {
// Handle MultiHop Topk search phase with high-selectivity optimization
// This uses RejectAndNeedExpand to enable exploration queue
let mut result = BuildResult::new_topk(build_stats);

// Save construction stats before running queries.
checkpoint.checkpoint(&result)?;

let queries: Arc<Matrix<T>> = Arc::new(datafiles::load_dataset(datafiles::BinFile(
&search_phase.queries,
))?);

let groundtruth =
datafiles::load_range_groundtruth(datafiles::BinFile(&search_phase.groundtruth))?;

let steps = search::knn::SearchSteps::new(
search_phase.reps,
&search_phase.num_threads,
&search_phase.runs,
);

let bit_maps =
generate_bitmaps(&search_phase.query_predicates, &search_phase.data_labels)?;

let multihop = benchmark_core::search::graph::MultiHop::new(
index,
queries,
benchmark_core::search::graph::Strategy::broadcast(search_strategy),
bit_maps
.into_iter()
.map(utils::filters::as_high_selectivity_query_label_provider)
.collect(),
)?;

let search_results = search::knn::run(&multihop, &groundtruth, steps)?;
result.append(AggregatedSearchResults::Topk(search_results));
Ok(result)
Expand Down
50 changes: 50 additions & 0 deletions diskann-benchmark/src/backend/index/spherical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,56 @@ mod imp {
writeln!(output, "\n\n{}", result)?;
Ok(result)
}
SearchPhase::TopkHighSelectivityMultihopFilter(search_phase) => {
// Handle MultiHop Topk search with high-selectivity optimization

// Save construction stats before running queries.
_checkpoint.checkpoint(&result)?;

let queries: Arc<Matrix<f32>> = Arc::new(datafiles::load_dataset(
datafiles::BinFile(&search_phase.queries),
)?);

let groundtruth = datafiles::load_groundtruth(datafiles::BinFile(
&search_phase.groundtruth,
))?;

let steps = search::knn::SearchSteps::new(
search_phase.reps,
&search_phase.num_threads,
&search_phase.runs,
);

let bit_maps = generate_bitmaps(
&search_phase.query_predicates,
&search_phase.data_labels,
)?;

let bit_map_filters: Arc<[_]> = bit_maps
.into_iter()
.map(utils::filters::as_high_selectivity_query_label_provider)
.collect();

for &layout in self.input.query_layouts.iter() {
let multihop = benchmark_core::search::graph::MultiHop::new(
index.clone(),
queries.clone(),
benchmark_core::search::graph::Strategy::broadcast(
inmem::spherical::Quantized::search(layout.into()),
),
bit_map_filters.clone(),
)?;

let search_results =
search::knn::run(&multihop, &groundtruth, steps)?;
result.append(SearchRun {
layout,
results: AggregatedSearchResults::Topk(search_results),
});
}
writeln!(output, "\n\n{}", result)?;
Ok(result)
}
}
}
}
Expand Down
42 changes: 42 additions & 0 deletions diskann-benchmark/src/inputs/async_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,43 @@ impl CheckDeserialization for MultiHopSearchPhase {
}
}

/// Multi-hop search phase with high-selectivity optimization enabled.
///
/// This search type uses `RejectAndNeedExpand` for rejected nodes, enabling
/// the exploration queue mechanism. This is beneficial when the filter has
/// high selectivity (few matching vectors), as it allows the search to
/// continue exploring through non-matching nodes even when the primary
/// queue is exhausted.
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct HighSelectivityMultiHopSearchPhase {
pub(crate) queries: InputFile,
pub(crate) query_predicates: InputFile,
pub(crate) groundtruth: InputFile,
pub(crate) reps: NonZeroUsize,
pub(crate) data_labels: InputFile,
// Enable sweeping threads
pub(crate) num_threads: Vec<NonZeroUsize>,
pub(crate) runs: Vec<GraphSearch>,
}

impl CheckDeserialization for HighSelectivityMultiHopSearchPhase {
fn check_deserialization(&mut self, checker: &mut Checker) -> Result<(), anyhow::Error> {
// Check the validity of the input files.
self.queries.check_deserialization(checker)?;

self.query_predicates.check_deserialization(checker)?;
self.data_labels.check_deserialization(checker)?;

self.groundtruth.check_deserialization(checker)?;
for (i, run) in self.runs.iter_mut().enumerate() {
run.check_deserialization(checker)
.with_context(|| format!("search run {}", i))?;
}

Ok(())
}
}

/// A one-to-one correspondence with [`diskann::index::config::IntraBatchCandidates`].
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
Expand Down Expand Up @@ -333,6 +370,8 @@ pub(crate) enum SearchPhase {
Range(RangeSearchPhase),
TopkBetaFilter(BetaSearchPhase),
TopkMultihopFilter(MultiHopSearchPhase),
/// Multi-hop search with high-selectivity optimization (exploration queue enabled).
TopkHighSelectivityMultihopFilter(HighSelectivityMultiHopSearchPhase),
}

impl CheckDeserialization for SearchPhase {
Expand All @@ -342,6 +381,9 @@ impl CheckDeserialization for SearchPhase {
SearchPhase::Range(phase) => phase.check_deserialization(checker),
SearchPhase::TopkBetaFilter(phase) => phase.check_deserialization(checker),
SearchPhase::TopkMultihopFilter(phase) => phase.check_deserialization(checker),
SearchPhase::TopkHighSelectivityMultihopFilter(phase) => {
phase.check_deserialization(checker)
}
}
}
}
Expand Down
Loading
Loading