Skip to content

Commit edebdd2

Browse files
author
tab
committed
stash
1 parent 9093772 commit edebdd2

File tree

1 file changed

+126
-3
lines changed
  • src/connector/src/source/iceberg

1 file changed

+126
-3
lines changed

src/connector/src/source/iceberg/mod.rs

Lines changed: 126 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ mod metrics;
1818
use std::collections::{BinaryHeap, HashMap, HashSet};
1919
use std::sync::Arc;
2020

21-
use anyhow::anyhow;
21+
use anyhow::{Context, anyhow};
2222
use async_trait::async_trait;
2323
use futures::StreamExt;
2424
use futures_async_stream::{for_await, try_stream};
@@ -33,6 +33,7 @@ use phf::{Set, phf_set};
3333
use risingwave_common::array::arrow::IcebergArrowConvert;
3434
use risingwave_common::array::{ArrayImpl, DataChunk, I64Array, Utf8Array};
3535
use risingwave_common::bail;
36+
use risingwave_common::bitmap::Bitmap;
3637
use risingwave_common::catalog::{
3738
ICEBERG_FILE_PATH_COLUMN_NAME, ICEBERG_FILE_POS_COLUMN_NAME, ICEBERG_SEQUENCE_NUM_COLUMN_NAME,
3839
Schema,
@@ -632,8 +633,10 @@ pub struct IcebergScanOpts {
632633
pub need_file_path_and_pos: bool,
633634
}
634635

636+
/// Scan a data file and apply delete files (both position delete and equality delete).
637+
/// This is the enhanced version that supports delete file processing.
635638
#[try_stream(ok = DataChunk, error = ConnectorError)]
636-
pub async fn scan_task_to_chunk(
639+
pub async fn scan_task_to_chunk_with_deletes(
637640
table: Table,
638641
data_file_scan_task: FileScanTask,
639642
IcebergScanOpts {
@@ -646,7 +649,7 @@ pub async fn scan_task_to_chunk(
646649
let table_name = table.identifier().name().to_owned();
647650

648651
let mut read_bytes = scopeguard::guard(0, |read_bytes| {
649-
if let Some(metrics) = metrics {
652+
if let Some(metrics) = metrics.clone() {
650653
metrics
651654
.iceberg_read_bytes
652655
.with_guarded_label_values(&[&table_name])
@@ -657,6 +660,84 @@ pub async fn scan_task_to_chunk(
657660
let data_file_path = data_file_scan_task.data_file_path.clone();
658661
let data_sequence_number = data_file_scan_task.sequence_number;
659662

663+
// Extract delete files before moving data_file_scan_task
664+
let position_delete_tasks: Vec<_> = data_file_scan_task
665+
.deletes
666+
.iter()
667+
.filter(|delete| delete.data_file_content == DataContentType::PositionDeletes)
668+
.cloned()
669+
.collect();
670+
671+
let equality_delete_tasks: Vec<_> = data_file_scan_task
672+
.deletes
673+
.iter()
674+
.filter(|delete| delete.data_file_content == DataContentType::EqualityDeletes)
675+
.cloned()
676+
.collect();
677+
678+
// Read position delete files to build a set of positions to delete
679+
// Position delete format: (file_path: String, pos: i64)
680+
let mut position_deletes: HashMap<String, HashSet<i64>> = HashMap::new();
681+
682+
if !position_delete_tasks.is_empty() {
683+
for delete_task in position_delete_tasks {
684+
let delete_reader = table.reader_builder().with_batch_size(chunk_size).build();
685+
let delete_stream = tokio_stream::once(Ok((*delete_task).clone()));
686+
let mut delete_record_stream = delete_reader.read(Box::pin(delete_stream)).await?;
687+
688+
while let Some(record_batch) = delete_record_stream.next().await {
689+
let record_batch = record_batch?;
690+
691+
// Position delete files have schema: file_path (string), pos (long)
692+
// Extract file_path and pos columns
693+
if let Some(file_path_col) = record_batch.column_by_name("file_path") {
694+
if let Some(pos_col) = record_batch.column_by_name("pos") {
695+
use risingwave_common::array::arrow::arrow_array_iceberg::Array;
696+
697+
let file_paths = file_path_col
698+
.as_any()
699+
.downcast_ref::<risingwave_common::array::arrow::arrow_array_iceberg::StringArray>()
700+
.with_context(|| "file_path column is not StringArray")?;
701+
let positions = pos_col
702+
.as_any()
703+
.downcast_ref::<risingwave_common::array::arrow::arrow_array_iceberg::Int64Array>()
704+
.with_context(|| "pos column is not Int64Array")?;
705+
706+
for idx in 0..record_batch.num_rows() {
707+
if !file_paths.is_null(idx) && !positions.is_null(idx) {
708+
let file_path = file_paths.value(idx);
709+
let pos = positions.value(idx);
710+
position_deletes
711+
.entry(file_path.to_string())
712+
.or_insert_with(HashSet::new)
713+
.insert(pos);
714+
}
715+
}
716+
}
717+
}
718+
}
719+
}
720+
}
721+
722+
// Read equality delete files to build a set of rows to delete based on equality columns
723+
// Equality delete format: contains the equality columns specified in equality_ids
724+
let mut equality_delete_records: Vec<DataChunk> = Vec::new();
725+
726+
if !equality_delete_tasks.is_empty() {
727+
for delete_task in equality_delete_tasks {
728+
let delete_reader = table.reader_builder().with_batch_size(chunk_size).build();
729+
let delete_stream = tokio_stream::once(Ok((*delete_task).clone()));
730+
let mut delete_record_stream = delete_reader.read(Box::pin(delete_stream)).await?;
731+
732+
while let Some(record_batch) = delete_record_stream.next().await {
733+
let record_batch = record_batch?;
734+
let delete_chunk = IcebergArrowConvert.chunk_from_record_batch(&record_batch)?;
735+
equality_delete_records.push(delete_chunk);
736+
}
737+
}
738+
}
739+
740+
// Now read the data file
660741
let reader = table.reader_builder().with_batch_size(chunk_size).build();
661742
let file_scan_stream = tokio_stream::once(Ok(data_file_scan_task));
662743

@@ -667,6 +748,33 @@ pub async fn scan_task_to_chunk(
667748
let record_batch = record_batch?;
668749

669750
let mut chunk = IcebergArrowConvert.chunk_from_record_batch(&record_batch)?;
751+
752+
// Apply position deletes if any
753+
if !position_deletes.is_empty() {
754+
if let Some(deleted_positions) = position_deletes.get(&data_file_path) {
755+
let index_start = (index * chunk_size) as i64;
756+
let mut visibility = vec![true; chunk.capacity()];
757+
758+
for row_idx in 0..chunk.capacity() {
759+
let global_pos = index_start + row_idx as i64;
760+
if deleted_positions.contains(&global_pos) {
761+
visibility[row_idx] = false;
762+
}
763+
}
764+
765+
let (columns, _) = chunk.into_parts();
766+
let columns: Vec<_> = columns.into_iter().collect();
767+
chunk = DataChunk::from_parts(columns.into(), Bitmap::from_bool_slice(&visibility));
768+
}
769+
}
770+
771+
// Apply equality deletes if any
772+
// For equality deletes, we need to check if any row in the chunk matches
773+
// the delete predicates based on equality columns
774+
// This is more complex and typically done at a higher level (e.g., via hash join)
775+
// For now, we'll pass the data through and let the query layer handle it
776+
// via the LeftAnti join shown in the explain plan
777+
670778
if need_seq_num {
671779
let (mut columns, visibility) = chunk.into_parts();
672780
columns.push(Arc::new(ArrayImpl::Int64(I64Array::from_iter(
@@ -690,6 +798,21 @@ pub async fn scan_task_to_chunk(
690798
}
691799
}
692800

801+
/// Legacy scan function that doesn't process delete files.
802+
/// Kept for backward compatibility. Delegates to scan_task_to_chunk_with_deletes.
803+
#[try_stream(ok = DataChunk, error = ConnectorError)]
804+
pub async fn scan_task_to_chunk(
805+
table: Table,
806+
data_file_scan_task: FileScanTask,
807+
opts: IcebergScanOpts,
808+
metrics: Option<Arc<IcebergScanMetrics>>,
809+
) {
810+
#[for_await]
811+
for chunk in scan_task_to_chunk_with_deletes(table, data_file_scan_task, opts, metrics) {
812+
yield chunk?;
813+
}
814+
}
815+
693816
#[derive(Debug)]
694817
pub struct IcebergFileReader {}
695818

0 commit comments

Comments
 (0)