microsoft · gopalrs · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 16, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,7 +21,7 @@ env:
   RUST_BACKTRACE: 1
   # The features we want to explicitly test. For example, the `flatbuffers-build` feature
   # of `diskann-quantization` requires additional setup and so must not be included by default.
-  DISKANN_FEATURES: "virtual_storage,bf_tree,spherical-quantization,product-quantization,tracing,experimental_diversity_search,disk-index,flatbuffers,linalg,codegen"
+  DISKANN_FEATURES: "virtual_storage,bf_tree,spherical-quantization,product-quantization,tracing,experimental_diversity_search,disk-index,flatbuffers,linalg,codegen,document-index"
 
   # Use the Rust version specified in rust-toolchain.toml
   rust_stable: "1.92"

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/diskann-benchmark/Cargo.toml b/diskann-benchmark/Cargo.toml
@@ -63,6 +63,9 @@ scalar-quantization = []
 # Enable minmax-quantization based algorithms
 minmax-quantization = []
 
+# Enable Document Index benchmarks
+document-index = []
+
 # Enable Disk Index benchmarks
 disk-index = [
     "diskann-disk/perf_test",

diff --git a/diskann-benchmark/example/document-filter.json b/diskann-benchmark/example/document-filter.json
@@ -0,0 +1,39 @@
+{
+    "search_directories": [
+        "test_data/disk_index_search"
+    ],
+    "jobs": [
+        {
+            "type": "document-index-build",
+            "content": {
+                "build": {
+                    "data_type": "float32",
+                    "data": "disk_index_siftsmall_learn_256pts_data.fbin",
+                    "data_labels": "data.256.label.jsonl",
+                    "distance": "squared_l2",
+                    "max_degree": 32,
+                    "l_build": 50,
+                    "alpha": 1.2,
+                    "num_threads": 4
+                },
+                "search": {
+                    "queries": "disk_index_sample_query_10pts.fbin",
+                    "query_predicates": "query.10.label.jsonl",
+                    "groundtruth": "disk_index_10pts_idx_uint32_truth_search_filter_res.bin",
+                    "beta": 0.5,
+                    "reps": 5,
+                    "num_threads": [
+                        1
+                    ],
+                    "runs": [
+                        {
+                            "search_n": 20,
+                            "search_l": [20, 30, 40],
+                            "recall_k": 10
+                        }
+                    ]
+                }
+            }
+        }
+    ]
+}