@@ -178,7 +178,7 @@ impl RemoveVisitor {
178178 getters : & [ & ' a dyn GetData < ' a > ] ,
179179 ) -> DeltaResult < Remove > {
180180 require ! (
181- getters. len( ) == 14 ,
181+ getters. len( ) == 15 ,
182182 Error :: InternalError ( format!(
183183 "Wrong number of RemoveVisitor getters: {}" ,
184184 getters. len( )
@@ -194,14 +194,14 @@ impl RemoveVisitor {
194194 getters[ 4 ] . get_opt ( row_index, "remove.partitionValues" ) ?;
195195
196196 let size: Option < i64 > = getters[ 5 ] . get_opt ( row_index, "remove.size" ) ?;
197+ let stats: Option < String > = getters[ 6 ] . get_opt ( row_index, "remove.stats" ) ?;
198+ // TODO(nick) tags are skipped in getters[7]
197199
198- // TODO(nick) tags are skipped in getters[6]
199-
200- let deletion_vector = visit_deletion_vector_at ( row_index, & getters[ 7 ..] ) ?;
200+ let deletion_vector = visit_deletion_vector_at ( row_index, & getters[ 8 ..] ) ?;
201201
202- let base_row_id: Option < i64 > = getters[ 12 ] . get_opt ( row_index, "remove.baseRowId" ) ?;
202+ let base_row_id: Option < i64 > = getters[ 13 ] . get_opt ( row_index, "remove.baseRowId" ) ?;
203203 let default_row_commit_version: Option < i64 > =
204- getters[ 13 ] . get_opt ( row_index, "remove.defaultRowCommitVersion" ) ?;
204+ getters[ 14 ] . get_opt ( row_index, "remove.defaultRowCommitVersion" ) ?;
205205
206206 Ok ( Remove {
207207 path,
@@ -210,6 +210,7 @@ impl RemoveVisitor {
210210 extended_file_metadata,
211211 partition_values,
212212 size,
213+ stats,
213214 tags : None ,
214215 deletion_vector,
215216 base_row_id,
@@ -834,7 +835,7 @@ mod tests {
834835 let json_strings: StringArray = vec ! [
835836 r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"# ,
836837 r#"{"metaData":{"id":"aff5cb91-8cd9-4195-aef9-446908507302","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c1","c2"],"configuration":{},"createdTime":1670892997849}}"# ,
837- r#"{"remove":{"path":"c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"partitionValues":{"c1":"4","c2":"c"},"size":452}}"# ,
838+ r#"{"remove":{"path":"c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"partitionValues":{"c1":"4","c2":"c"},"size":452,"stats":"{\"numRecords\":1}" }}"# ,
838839 ]
839840 . into ( ) ;
840841 let batch = parse_json_batch ( json_strings) ;
@@ -850,6 +851,7 @@ mod tests {
850851 ( "c2" . to_string ( ) , "c" . to_string ( ) ) ,
851852 ] ) ) ,
852853 size : Some ( 452 ) ,
854+ stats : Some ( r#"{"numRecords":1}"# . to_string ( ) ) ,
853855 ..Default :: default ( )
854856 } ;
855857 assert_eq ! (
@@ -863,6 +865,82 @@ mod tests {
863865 ) ;
864866 }
865867
868+ #[ test]
869+ fn test_parse_remove_all_fields_unique ( ) {
870+ // This test verifies that all fields in the Remove action are correctly parsed
871+ // and that each field gets a unique value, ensuring no index collisions
872+ let json_strings: StringArray = vec ! [
873+ r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"# ,
874+ r#"{"metaData":{"id":"test-id","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1670892997849}}"# ,
875+ r#"{"remove":{"path":"test-path.parquet","deletionTimestamp":1234567890,"dataChange":false,"extendedFileMetadata":true,"partitionValues":{"part":"value"},"size":9999,"stats":"{\"numRecords\":42}","deletionVector":{"storageType":"u","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":3},"baseRowId":100,"defaultRowCommitVersion":5}}"# ,
876+ ]
877+ . into ( ) ;
878+ let batch = parse_json_batch ( json_strings) ;
879+ let mut remove_visitor = RemoveVisitor :: default ( ) ;
880+ remove_visitor. visit_rows_of ( batch. as_ref ( ) ) . unwrap ( ) ;
881+
882+ assert_eq ! (
883+ remove_visitor. removes. len( ) ,
884+ 1 ,
885+ "Expected exactly one remove action"
886+ ) ;
887+
888+ let remove = & remove_visitor. removes [ 0 ] ;
889+
890+ // Verify each field has the expected unique value
891+ assert_eq ! ( remove. path, "test-path.parquet" , "path mismatch" ) ;
892+ assert_eq ! (
893+ remove. deletion_timestamp,
894+ Some ( 1234567890 ) ,
895+ "deletion_timestamp mismatch"
896+ ) ;
897+ assert ! ( !remove. data_change, "data_change mismatch" ) ;
898+ assert_eq ! (
899+ remove. extended_file_metadata,
900+ Some ( true ) ,
901+ "extended_file_metadata mismatch"
902+ ) ;
903+ assert_eq ! (
904+ remove. partition_values,
905+ Some ( HashMap :: from( [ ( "part" . to_string( ) , "value" . to_string( ) ) ] ) ) ,
906+ "partition_values mismatch"
907+ ) ;
908+ assert_eq ! ( remove. size, Some ( 9999 ) , "size mismatch" ) ;
909+ assert_eq ! (
910+ remove. stats,
911+ Some ( r#"{"numRecords":42}"# . to_string( ) ) ,
912+ "stats mismatch"
913+ ) ;
914+
915+ // Verify deletion vector fields
916+ let dv = remove
917+ . deletion_vector
918+ . as_ref ( )
919+ . expect ( "deletion_vector should be present" ) ;
920+ assert_eq ! (
921+ dv. path_or_inline_dv, "vBn[lx{q8@P<9BNH/isA" ,
922+ "deletion_vector.path_or_inline_dv mismatch"
923+ ) ;
924+ assert_eq ! ( dv. offset, Some ( 1 ) , "deletion_vector.offset mismatch" ) ;
925+ assert_eq ! (
926+ dv. size_in_bytes, 36 ,
927+ "deletion_vector.size_in_bytes mismatch"
928+ ) ;
929+ assert_eq ! ( dv. cardinality, 3 , "deletion_vector.cardinality mismatch" ) ;
930+
931+ // Verify row tracking fields (these would have been incorrect with the bug)
932+ assert_eq ! (
933+ remove. base_row_id,
934+ Some ( 100 ) ,
935+ "base_row_id mismatch - check getter index"
936+ ) ;
937+ assert_eq ! (
938+ remove. default_row_commit_version,
939+ Some ( 5 ) ,
940+ "default_row_commit_version mismatch - check getter index"
941+ ) ;
942+ }
943+
866944 #[ test]
867945 fn test_parse_txn ( ) {
868946 let json_strings: StringArray = vec ! [
0 commit comments