Skip to content

Commit c89bfcd

Browse files
committed
fix indexing add test
1 parent 5b6b945 commit c89bfcd

File tree

1 file changed

+53
-2
lines changed

1 file changed

+53
-2
lines changed

kernel/src/actions/visitors.rs

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,9 @@ impl RemoveVisitor {
199199

200200
let deletion_vector = visit_deletion_vector_at(row_index, &getters[8..])?;
201201

202-
let base_row_id: Option<i64> = getters[12].get_opt(row_index, "remove.baseRowId")?;
202+
let base_row_id: Option<i64> = getters[13].get_opt(row_index, "remove.baseRowId")?;
203203
let default_row_commit_version: Option<i64> =
204-
getters[13].get_opt(row_index, "remove.defaultRowCommitVersion")?;
204+
getters[14].get_opt(row_index, "remove.defaultRowCommitVersion")?;
205205

206206
Ok(Remove {
207207
path,
@@ -865,6 +865,57 @@ mod tests {
865865
);
866866
}
867867

868+
#[test]
869+
fn test_parse_remove_all_fields_unique() {
870+
// This test verifies that all fields in the Remove action are correctly parsed
871+
// and that each field gets a unique value, ensuring no index collisions
872+
let json_strings: StringArray = vec![
873+
r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"#,
874+
r#"{"metaData":{"id":"test-id","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1670892997849}}"#,
875+
r#"{"remove":{"path":"test-path.parquet","deletionTimestamp":1234567890,"dataChange":false,"extendedFileMetadata":true,"partitionValues":{"part":"value"},"size":9999,"stats":"{\"numRecords\":42}","deletionVector":{"storageType":"u","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":3},"baseRowId":100,"defaultRowCommitVersion":5}}"#,
876+
]
877+
.into();
878+
let batch = parse_json_batch(json_strings);
879+
let mut remove_visitor = RemoveVisitor::default();
880+
remove_visitor.visit_rows_of(batch.as_ref()).unwrap();
881+
882+
assert_eq!(
883+
remove_visitor.removes.len(),
884+
1,
885+
"Expected exactly one remove action"
886+
);
887+
888+
let remove = &remove_visitor.removes[0];
889+
890+
// Verify each field has the expected unique value
891+
assert_eq!(remove.path, "test-path.parquet", "path mismatch");
892+
assert_eq!(remove.deletion_timestamp, Some(1234567890), "deletion_timestamp mismatch");
893+
assert_eq!(remove.data_change, false, "data_change mismatch");
894+
assert_eq!(remove.extended_file_metadata, Some(true), "extended_file_metadata mismatch");
895+
assert_eq!(
896+
remove.partition_values,
897+
Some(HashMap::from([("part".to_string(), "value".to_string())])),
898+
"partition_values mismatch"
899+
);
900+
assert_eq!(remove.size, Some(9999), "size mismatch");
901+
assert_eq!(remove.stats, Some(r#"{"numRecords":42}"#.to_string()), "stats mismatch");
902+
903+
// Verify deletion vector fields
904+
let dv = remove.deletion_vector.as_ref().expect("deletion_vector should be present");
905+
assert_eq!(dv.path_or_inline_dv, "vBn[lx{q8@P<9BNH/isA", "deletion_vector.path_or_inline_dv mismatch");
906+
assert_eq!(dv.offset, Some(1), "deletion_vector.offset mismatch");
907+
assert_eq!(dv.size_in_bytes, 36, "deletion_vector.size_in_bytes mismatch");
908+
assert_eq!(dv.cardinality, 3, "deletion_vector.cardinality mismatch");
909+
910+
// Verify row tracking fields (these would have been incorrect with the bug)
911+
assert_eq!(remove.base_row_id, Some(100), "base_row_id mismatch - check getter index");
912+
assert_eq!(
913+
remove.default_row_commit_version,
914+
Some(5),
915+
"default_row_commit_version mismatch - check getter index"
916+
);
917+
}
918+
868919
#[test]
869920
fn test_parse_txn() {
870921
let json_strings: StringArray = vec![

0 commit comments

Comments
 (0)