From c35969baf4244f6491197c813a3d4d1b49acf269 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Mon, 20 Oct 2025 18:25:18 -0300 Subject: [PATCH 1/9] new crate utils --- Cargo.lock | 4 + Cargo.toml | 3 + crates/rbuilder-utils/Cargo.toml | 40 + .../src/clickhouse_with_backup/backup.rs | 855 ++++++++++++++++++ .../src/clickhouse_with_backup/mod.rs | 194 ++++ .../src/clickhouse_with_backup/primitives.rs | 33 + crates/rbuilder-utils/src/lib.rs | 1 + 7 files changed, 1130 insertions(+) create mode 100644 crates/rbuilder-utils/Cargo.toml create mode 100644 crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs create mode 100644 crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs create mode 100644 crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs create mode 100644 crates/rbuilder-utils/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 4940515df..06e08c089 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9451,6 +9451,10 @@ dependencies = [ "tracing", ] +[[package]] +name = "rbuilder-utils" +version = "0.1.0" + [[package]] name = "rdrand" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 0acf086bd..8085b02f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ exclude = [".github/"] [workspace] members = [ "crates/rbuilder-primitives", + "crates/rbuilder-utils", "crates/rbuilder-config", "crates/rbuilder", "crates/rbuilder-operator", @@ -28,6 +29,7 @@ default-members = [ "crates/reth-rbuilder", "crates/rbuilder-rebalancer", "crates/rbuilder-primitives", + "crates/rbuilder-utils", "crates/test-relay", "crates/bid-scraper", ] @@ -197,6 +199,7 @@ eth-sparse-mpt = { path = "crates/eth-sparse-mpt" } bid-scraper = { path = "crates/bid-scraper" } rbuilder = { path = "crates/rbuilder" } rbuilder-primitives = { path = "crates/rbuilder-primitives" } +rbuilder-utils = { path = "crates/rbuilder-utils" } rbuilder-config = { path = "crates/rbuilder-config" } sysperf = { path = "crates/sysperf" } metrics_macros = { path = "crates/rbuilder/src/telemetry/metrics_macros" } diff --git a/crates/rbuilder-utils/Cargo.toml b/crates/rbuilder-utils/Cargo.toml new file mode 100644 index 000000000..9e08ce772 --- /dev/null +++ b/crates/rbuilder-utils/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "rbuilder-utils" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +homepage.workspace = true +repository.workspace = true +exclude.workspace = true + +strum = "0.27" +strum_macros = "0.27" +tokio = { version = "1.40.0", default-features = false } +clickhouse = { git = "https://github.com/ClickHouse/clickhouse-rs", rev = "8cf3d2e138dd121367fa10e875d3f91374b075b2", features = [ + "inserter", + "time", + "uuid", + "native-tls" +] } +clickhouse-derive = { version = "0.2.0" } +redb = { version = "3.1.0" } + +# misc +derivative.workspace = true +integer-encoding = "4.0.0" +sha2 = { workspace = true, features = ["asm"] } +uuid = { version = "1.6.1", features = ["serde", "v5", "v4"] } +governor = "0.6.3" +ahash.workspace = true +reqwest = { workspace = true, features = ["blocking"] } +serde_with = { workspace = true, features = ["time_0_3"] } +toml.workspace = true +tracing.workspace = true +time.workspace = true +thiserror.workspace = true +eyre.workspace = true +serde.workspace = true +derive_more.workspace = true +serde_json.workspace = true + diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs new file mode 100644 index 000000000..e6e4f489c --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs @@ -0,0 +1,855 @@ +use std::{ + collections::VecDeque, + marker::PhantomData, + path::PathBuf, + sync::{Arc, RwLock}, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, +}; + +use clickhouse::inserter::Inserter; +use derive_more::{Deref, DerefMut}; +use redb::{ReadableDatabase, ReadableTable, ReadableTableMetadata}; +use strum::AsRefStr; +use tokio::sync::mpsc; + +/// A default maximum size in bytes for the in-memory backup of failed commits. +pub(crate) const MAX_MEMORY_BACKUP_SIZE_BYTES: u64 = 1024 * 1024 * 1024; // 1 GiB +/// A default maximum size in bytes for the disk backup of failed commits. +pub(crate) const MAX_DISK_BACKUP_SIZE_BYTES: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB + +/// The default path where the backup database is stored. For tests, a temporary file is used. +fn default_disk_backup_database_path() -> PathBuf { + #[cfg(test)] + return tempfile::NamedTempFile::new().unwrap().path().to_path_buf(); + #[cfg(not(test))] + { + let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string()); + PathBuf::from(home) + .join(".buildernet-orderflow-proxy") + .join("clickhouse_backup.db") + } +} + +/// Tracing target for the backup actor. +const TARGET: &str = "indexer::backup"; + +/// A type alias for disk backup keys. +type DiskBackupKey = u128; +/// A type alias for disk backup tables. +type Table<'a> = redb::TableDefinition<'a, DiskBackupKey, Vec>; + +/// The source of a backed-up failed commit. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BackupSource { + Disk(DiskBackupKey), + Memory, +} + +/// Generates a new unique key for disk backup entries, based on current system time in +/// milliseconds. +fn new_disk_backup_key() -> DiskBackupKey { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_micros() +} + +/// Represents data we failed to commit to clickhouse, including the rows and some information +/// about the size of such data. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub(crate) struct FailedCommit { + /// The actual rows we were trying to commit. + rows: Vec, + /// The quantities related to such commit, like the total size in bytes. + quantities: Quantities, +} + +impl FailedCommit { + pub(crate) fn new(rows: Vec, quantities: Quantities) -> Self { + Self { rows, quantities } + } +} + +impl Default for FailedCommit { + fn default() -> Self { + Self { + rows: Vec::new(), + quantities: Quantities::ZERO, + } + } +} + +/// A [`FailedCommit`] along with its source (disk or memory). +struct RetrievedFailedCommit { + source: BackupSource, + commit: FailedCommit, +} + +/// A wrapper over a [`VecDeque`] of [`FailedCommit`] with added functionality. +/// +/// Newly failed commits are pushed to the front of the queue, so the oldest are at the back. +#[derive(Deref, DerefMut)] +struct FailedCommits(VecDeque>); + +impl FailedCommits { + /// Get the aggregated quantities of the failed commits; + #[inline] + fn quantities(&self) -> Quantities { + let total_size_bytes = self.iter().map(|c| c.quantities.bytes).sum::(); + let total_rows = self.iter().map(|c| c.quantities.rows).sum::(); + let total_transactions = self.iter().map(|c| c.quantities.transactions).sum::(); + + Quantities { + bytes: total_size_bytes, + rows: total_rows, + transactions: total_transactions, + } + } +} + +impl Default for FailedCommits { + fn default() -> Self { + Self(VecDeque::default()) + } +} + +/// Configuration for the [`DiskBackup`] of failed commits. +#[derive(Debug)] +pub(crate) struct DiskBackupConfig { + /// The path where the backup database is stored. + path: PathBuf, + /// The maximum size in bytes for holding past failed commits on disk. + max_size_bytes: u64, + /// The interval at which buffered writes are flushed to disk. + flush_interval: tokio::time::Interval, +} + +impl DiskBackupConfig { + pub(crate) fn new() -> Self { + Self { + path: default_disk_backup_database_path(), + max_size_bytes: MAX_DISK_BACKUP_SIZE_BYTES, + flush_interval: tokio::time::interval(Duration::from_secs(30)), + } + } + + pub(crate) fn with_path>(mut self, path: Option

) -> Self { + if let Some(p) = path { + self.path = p.into(); + } + self + } + + pub(crate) fn with_max_size_bytes(mut self, max_size_bytes: Option) -> Self { + if let Some(max_size_bytes) = max_size_bytes { + self.max_size_bytes = max_size_bytes; + } + self + } + + #[allow(dead_code)] + pub(crate) fn with_immediate_commit_interval(mut self, interval: Option) -> Self { + if let Some(interval) = interval { + self.flush_interval = tokio::time::interval(interval); + } + self + } +} + +impl Clone for DiskBackupConfig { + fn clone(&self) -> Self { + Self { + path: self.path.clone(), + max_size_bytes: self.max_size_bytes, + flush_interval: tokio::time::interval(self.flush_interval.period()), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct MemoryBackupConfig { + /// The maximum size in bytes for holding past failed commits in-memory. Once we go over this + /// threshold, pressure is applied and old commits are dropped. + pub max_size_bytes: u64, +} + +impl MemoryBackupConfig { + pub(crate) fn new(max_size_bytes: u64) -> Self { + Self { max_size_bytes } + } +} + +impl Default for MemoryBackupConfig { + fn default() -> Self { + Self { + max_size_bytes: MAX_MEMORY_BACKUP_SIZE_BYTES, + } + } +} + +/// Data retrieved from disk, along with its key and some stats. +pub(crate) struct DiskRetrieval { + pub(crate) key: K, + pub(crate) value: V, + pub(crate) stats: BackupSourceStats, +} + +/// Errors that can occur during disk backup operations. Mostly wrapping redb and serde errors. +#[derive(Debug, thiserror::Error, AsRefStr)] +pub(crate) enum DiskBackupError { + #[error(transparent)] + Database(#[from] redb::DatabaseError), + #[error(transparent)] + Transactions(#[from] redb::TransactionError), + #[error(transparent)] + Table(#[from] redb::TableError), + #[error(transparent)] + Storage(#[from] redb::StorageError), + #[error(transparent)] + Commit(#[from] redb::CommitError), + #[error(transparent)] + Durability(#[from] redb::SetDurabilityError), + #[error(transparent)] + Compaction(#[from] redb::CompactionError), + #[error("serialization error: {0}")] + Serde(#[from] serde_json::Error), + #[error("backup size limit exceeded: {0} bytes")] + SizeExceeded(u64), + #[error("failed to join flushing task")] + JoinTask, +} + +/// A disk backup for failed commits. This handle to a database allows to write only to one table +/// for scoped access. If you want to write to another table, clone it using +/// [`Self::clone_with_table`]. +#[derive(Debug)] +pub(crate) struct DiskBackup { + db: Arc>, + config: DiskBackupConfig, + + _marker: PhantomData, +} + +impl DiskBackup { + pub(crate) fn new( + config: DiskBackupConfig, + task_executor: &TaskExecutor, + ) -> Result { + // Ensure all parent directories exist, so that the database can be initialized correctly. + if let Some(parent) = config.path.parent() { + std::fs::create_dir_all(parent)?; + } + + let db = redb::Database::create(&config.path)?; + + let disk_backup = Self { + db: Arc::new(RwLock::new(db)), + config, + _marker: Default::default(), + }; + + task_executor.spawn({ + let disk_backup: Self = disk_backup.clone(); + async move { + disk_backup.flush_routine().await; + } + }); + + Ok(disk_backup) + } + + /// Like `clone`, but allows to change the type parameter `U`. + pub(crate) fn clone_to(&self) -> DiskBackup { + DiskBackup { + db: self.db.clone(), + config: self.config.clone(), + _marker: Default::default(), + } + } +} + +impl Clone for DiskBackup { + fn clone(&self) -> Self { + Self { + db: self.db.clone(), + config: self.config.clone(), + _marker: Default::default(), + } + } +} + +impl DiskBackup { + /// Saves a new failed commit to disk. `commit_immediately` indicates whether to force + /// durability on write. + fn save(&mut self, data: &FailedCommit) -> Result { + let table_def = Table::new(T::ORDER); + // NOTE: not efficient, but we don't expect to store a lot of data here. + let bytes = serde_json::to_vec(&data)?; + + let writer = self.db.write().expect("not poisoned").begin_write()?; + let (stored_bytes, rows) = { + let mut table = writer.open_table(table_def)?; + if table.stats()?.stored_bytes() > self.config.max_size_bytes { + return Err(DiskBackupError::SizeExceeded(self.config.max_size_bytes)); + } + + table.insert(new_disk_backup_key(), bytes)?; + + (table.stats()?.stored_bytes(), table.len()?) + }; + writer.commit()?; + + Ok(BackupSourceStats { + size_bytes: stored_bytes, + total_batches: rows as usize, + }) + } + + /// Retrieves the oldest failed commit from disk, if any. + fn retrieve_oldest( + &mut self, + ) -> Result>>, DiskBackupError> { + let table_def = Table::new(T::ORDER); + + let reader = self.db.read().expect("not poisoned").begin_read()?; + let table = match reader.open_table(table_def) { + Ok(t) => t, + Err(redb::TableError::TableDoesNotExist(_)) => { + // No table means no data. + return Ok(None); + } + Err(e) => { + return Err(e.into()); + } + }; + + let stored_bytes = table.stats()?.stored_bytes(); + let rows = table.len()? as usize; + let stats = BackupSourceStats { + size_bytes: stored_bytes, + total_batches: rows, + }; + + // Retreives in sorted order. + let Some(entry_res) = table.iter()?.next() else { + return Ok(None); + }; + let (key, rows_raw) = entry_res?; + let commit: FailedCommit = serde_json::from_slice(&rows_raw.value())?; + + Ok(Some(DiskRetrieval { + key: key.value(), + value: commit, + stats, + })) + } + + /// Deletes the failed commit with the given key from disk. + fn delete(&mut self, key: DiskBackupKey) -> Result { + let table_def = Table::new(T::ORDER); + + let mut writer = self.db.write().expect("not poisoned").begin_write()?; + writer.set_durability(redb::Durability::Immediate)?; + + let (stored_bytes, rows) = { + let mut table = writer.open_table(table_def)?; + table.remove(key)?; + (table.stats()?.stored_bytes(), table.len()?) + }; + writer.commit()?; + + Ok(BackupSourceStats { + size_bytes: stored_bytes, + total_batches: rows as usize, + }) + } + + /// Explicity flushes any pending writes to disk. This is async to avoid blocking the main + /// thread. + async fn flush(&mut self) -> Result<(), DiskBackupError> { + let db = self.db.clone(); + + // Since this can easily block by a second or two, send it to a blocking thread. + tokio::task::spawn_blocking(move || { + let mut db = db.write().expect("not poisoned"); + let mut writer = db.begin_write()?; + + // If there is no data to flush, don't do anything. + if writer.stats()?.stored_bytes() == 0 { + return Ok(()); + } + + writer.set_durability(redb::Durability::Immediate)?; + writer.commit()?; + + db.compact()?; + Ok(()) + }) + .await + .map_err(|_| DiskBackupError::JoinTask)? + } + + /// Takes an instance of self and performs a flush routine if the immediate flush interval has + /// ticked. + async fn flush_routine(mut self) { + loop { + self.config.flush_interval.tick().await; + let start = Instant::now(); + match self.flush().await { + Ok(_) => { + tracing::debug!(target: TARGET, elapsed = ?start.elapsed(), "flushed backup write buffer to disk"); + } + Err(e) => { + tracing::error!(target: TARGET, ?e, "failed to flush backup write buffer to disk"); + } + } + } + } +} + +/// Statistics about the Clickhouse data stored in a certain backup source (disk or memory). +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct BackupSourceStats { + /// The total size in bytes of failed commit batches stored. + size_bytes: u64, + /// The total number of failed commit batches stored. + total_batches: usize, +} + +/// An in-memory backup for failed commits. +#[derive(Deref, DerefMut)] +struct MemoryBackup { + /// The in-memory cache of failed commits. + #[deref] + #[deref_mut] + failed_commits: FailedCommits, + /// The configuration for the in-memory backup. + config: MemoryBackupConfig, + /// The statistics about the in-memory backup. + stats: BackupSourceStats, +} + +impl MemoryBackup { + /// Updates the internal statistics and returns them. + fn update_stats(&mut self) -> BackupSourceStats { + let quantities = self.failed_commits.quantities(); + let new_len = self.failed_commits.len(); + + self.stats = BackupSourceStats { + size_bytes: quantities.bytes, + total_batches: new_len, + }; + self.stats + } + + /// Checks whether the threshold for maximum size has been exceeded. + fn threshold_exceeded(&self) -> bool { + self.stats.size_bytes > self.config.max_size_bytes && self.failed_commits.len() > 1 + } + + /// Drops the oldest failed commit if the threshold has been exceeded, returning the updated + /// stats + fn drop_excess(&mut self) -> Option<(BackupSourceStats, Quantities)> { + if self.threshold_exceeded() { + self.failed_commits.pop_back(); + Some((self.update_stats(), self.failed_commits.quantities())) + } else { + None + } + } + + /// Saves a new failed commit into memory, updating the stats. + fn save(&mut self, data: FailedCommit) -> BackupSourceStats { + self.failed_commits.push_front(data); + self.update_stats() + } + + /// Retrieves the oldest failed commit from memory, updating the stats. + fn retrieve_oldest(&mut self) -> Option> { + let oldest = self.failed_commits.pop_back(); + self.update_stats(); + oldest + } +} + +// Needed otherwise requires T: Default +impl Default for MemoryBackup { + fn default() -> Self { + Self { + failed_commits: FailedCommits::default(), + config: MemoryBackupConfig::default(), + stats: BackupSourceStats::default(), + } + } +} + +/// An backup actor for Clickhouse data. This actor receives [`FailedCommit`]s and saves them on +/// disk and in memory in case of failure of the former, and periodically tries to commit them back +/// again to Clickhouse. Since memory is finite, there is an upper bound on how much memory this +/// data structure holds. Once this has been hit, pressure applies, meaning that we try again a +/// certain failed commit for a finite number of times, and then we discard it to accomdate new +/// data. +pub(crate) struct Backup { + /// The receiver of failed commit attempts. + /// + /// Rationale for sending multiple rows instead of sending rows: the backup abstraction must + /// periodically block to write data to the inserter and try to commit it to clickhouse. Each + /// attempt results in doing the previous step. This could clog the channel which will receive + /// individual rows, leading to potential row losses. + /// + /// By sending backup data less often, we give time gaps for these operation to be performed. + rx: mpsc::Receiver>, + /// The disk cache of failed commits. + disk_backup: DiskBackup, + /// The in-memory cache of failed commits. + memory_backup: MemoryBackup, + /// A clickhouse inserter for committing again the data. + inserter: Inserter, + /// The interval at which we try to backup data. + interval: BackoffInterval, + + /// A failed commit retrieved from either disk or memory, waiting to be retried. + last_cached: Option>, + + /// Whether to use only the in-memory backup (for testing purposes). + #[cfg(test)] + use_only_memory_backup: bool, +} + +impl Backup { + pub(crate) fn new( + rx: mpsc::Receiver>, + inserter: Inserter, + disk_backup: DiskBackup, + ) -> Self { + Self { + rx, + inserter, + interval: Default::default(), + memory_backup: MemoryBackup::default(), + disk_backup, + last_cached: None, + #[cfg(test)] + use_only_memory_backup: false, + } + } + + /// Override the default memory backup configuration. + pub(crate) fn with_memory_backup_config(mut self, config: MemoryBackupConfig) -> Self { + self.memory_backup.config = config; + self + } + + /// Backs up a failed commit, first trying to write to disk, then to memory. + fn backup(&mut self, failed_commit: FailedCommit) { + let quantities = failed_commit.quantities; + tracing::debug!(target: TARGET, order = T::ORDER, bytes = ?quantities.bytes, rows = ?quantities.rows, "backing up failed commit"); + + #[cfg(test)] + if self.use_only_memory_backup { + self.memory_backup.save(failed_commit); + self.last_cached = self + .last_cached + .take() + .filter(|cached| cached.source != BackupSource::Memory); + return; + } + + let start = Instant::now(); + match self.disk_backup.save(&failed_commit) { + Ok(stats) => { + tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "saved failed commit to disk"); + IndexerMetrics::set_clickhouse_disk_backup_size( + stats.size_bytes, + stats.total_batches, + T::ORDER, + ); + + return; + } + Err(e) => { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit, trying in-memory"); + IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + } + }; + + let stats = self.memory_backup.save(failed_commit); + IndexerMetrics::set_clickhouse_memory_backup_size( + stats.size_bytes, + stats.total_batches, + T::ORDER, + ); + tracing::debug!(target: TARGET, order = T::ORDER, bytes = ?quantities.bytes, rows = ?quantities.rows, ?stats, "saved failed commit in-memory"); + + if let Some((stats, oldest_quantities)) = self.memory_backup.drop_excess() { + tracing::warn!(target: TARGET, order = T::ORDER, ?stats, "failed commits exceeded max memory backup size, dropping oldest"); + IndexerMetrics::process_clickhouse_backup_data_lost_quantities(&oldest_quantities); + // Clear the cached last commit if it was from memory and we just dropped it. + self.last_cached = self + .last_cached + .take() + .filter(|cached| cached.source != BackupSource::Memory); + } + } + + /// Retrieves the oldest failed commit, first trying from memory, then from disk. + fn retrieve_oldest(&mut self) -> Option> { + if let Some(cached) = self.last_cached.take() { + tracing::debug!(target: TARGET, order = T::ORDER, rows = cached.commit.rows.len(), "retrieved last cached failed commit"); + return Some(cached); + } + + if let Some(commit) = self.memory_backup.retrieve_oldest() { + tracing::debug!(target: TARGET, order = T::ORDER, rows = commit.rows.len(), "retrieved oldest failed commit from memory"); + return Some(RetrievedFailedCommit { + source: BackupSource::Memory, + commit, + }); + } + + match self.disk_backup.retrieve_oldest() { + Ok(maybe_commit) => { + maybe_commit.inspect(|data| { + tracing::debug!(target: TARGET, order = T::ORDER, rows = data.stats.total_batches, "retrieved oldest failed commit from disk"); + }) + .map(|data| RetrievedFailedCommit { + source: BackupSource::Disk(data.key), + commit: data.value, + }) + } + Err(e) => { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to retrieve oldest failed commit from disk"); + IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + None + } + } + } + + /// Populates the inserter with the rows from the given failed commit. + async fn populate_inserter(&mut self, commit: &FailedCommit) { + for row in &commit.rows { + let value_ref = T::to_row_ref(row); + + if let Err(e) = self.inserter.write(value_ref).await { + IndexerMetrics::increment_clickhouse_write_failures(e.to_string()); + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter"); + continue; + } + } + } + + /// Purges a committed failed commit from disk, if applicable. + async fn purge_commit(&mut self, retrieved: &RetrievedFailedCommit) { + if let BackupSource::Disk(key) = retrieved.source { + let start = Instant::now(); + match self.disk_backup.delete(key) { + Ok(stats) => { + tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "deleted failed commit from disk"); + IndexerMetrics::set_clickhouse_disk_backup_size( + stats.size_bytes, + stats.total_batches, + T::ORDER, + ); + } + Err(e) => { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to purge failed commit from disk"); + } + } + tracing::debug!(target: TARGET, order = T::ORDER, "purged committed failed commit from disk"); + } + } + + /// Run the backup actor until it is possible to receive messages. + /// + /// If some data were stored on disk previously, they will be retried first. + pub(crate) async fn run(&mut self) { + loop { + tokio::select! { + maybe_failed_commit = self.rx.recv() => { + let Some(failed_commit) = maybe_failed_commit else { + tracing::error!(target: TARGET, order = T::ORDER, "backup channel closed"); + break; + }; + + self.backup(failed_commit); + } + _ = self.interval.tick() => { + let Some(oldest) = self.retrieve_oldest() else { + self.interval.reset(); + IndexerMetrics::set_clickhouse_backup_empty_size(T::ORDER); + continue // Nothing to do! + }; + + self.populate_inserter(&oldest.commit).await; + + let start = Instant::now(); + match self.inserter.force_commit().await { + Ok(quantities) => { + tracing::info!(target: TARGET, order = T::ORDER, ?quantities, "successfully backed up"); + IndexerMetrics::process_clickhouse_backup_data_quantities(&quantities.into()); + IndexerMetrics::record_clickhouse_batch_commit_time(start.elapsed()); + self.interval.reset(); + self.purge_commit(&oldest).await; + } + Err(e) => { + tracing::error!(target: TARGET, order = T::ORDER, ?e, quantities = ?oldest.commit.quantities, "failed to commit bundle to clickhouse from backup"); + IndexerMetrics::increment_clickhouse_commit_failures(e.to_string()); + self.last_cached = Some(oldest); + continue; + } + } + } + } + } + } + + /// To call on shutdown, tries make a last-resort attempt to post back to Clickhouse all + /// in-memory data. + pub(crate) async fn end(mut self) { + for failed_commit in self.memory_backup.failed_commits.drain(..) { + for row in &failed_commit.rows { + let value_ref = T::to_row_ref(row); + + if let Err(e) = self.inserter.write(value_ref).await { + tracing::error!( target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter during shutdown"); + IndexerMetrics::increment_clickhouse_write_failures(e.to_string()); + continue; + } + } + if let Err(e) = self.inserter.force_commit().await { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit backup to CH during shutdown, trying disk"); + IndexerMetrics::increment_clickhouse_commit_failures(e.to_string()); + } + + if let Err(e) = self.disk_backup.save(&failed_commit) { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit to disk backup during shutdown"); + IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + } + } + + if let Err(e) = self.disk_backup.flush().await { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to flush disk backup during shutdown"); + IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + } else { + tracing::info!(target: TARGET, order = T::ORDER, "flushed disk backup during shutdown"); + } + + if let Err(e) = self.inserter.end().await { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to end backup inserter during shutdown"); + } else { + tracing::info!(target: TARGET, order = T::ORDER, "successfully ended backup inserter during shutdown"); + } + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + + use crate::{ + indexer::{ + click::{ + models::BundleRow, + tests::{create_clickhouse_bundles_table, create_test_clickhouse_client}, + }, + tests::system_bundle_example, + BUNDLE_TABLE_NAME, + }, + spawn_clickhouse_backup, + tasks::TaskManager, + }; + + // Uncomment to enable logging during tests. + // use tracing::level_filters::LevelFilter; + // use tracing_subscriber::{layer::SubscriberExt as _, util::SubscriberInitExt as _, EnvFilter}; + + impl Backup { + fn new_test( + rx: mpsc::Receiver>, + inserter: Inserter, + disk_backup: DiskBackup, + use_only_memory_backup: bool, + ) -> Self { + Self { + rx, + inserter, + interval: Default::default(), + memory_backup: MemoryBackup::default(), + disk_backup, + last_cached: None, + use_only_memory_backup, + } + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn backup_e2e_works() { + // Uncomment to toggle logs. + // let registry = tracing_subscriber::registry().with( + // EnvFilter::builder().with_default_directive(LevelFilter::DEBUG.into()). + // from_env_lossy(), ); + // let _ = registry.with(tracing_subscriber::fmt::layer()).try_init(); + + let memory_backup_only = [false, true]; + + let task_manager = TaskManager::new(tokio::runtime::Handle::current()); + let task_executor = task_manager.executor(); + + for use_memory_only in memory_backup_only { + println!( + "---- Running backup_memory_e2e_works with use_memory_only = {use_memory_only} ----" + ); + + // 1. Spin up Clickhouse. No validation because we're testing both receipts and bundles, + // and validation on U256 is not supported. + let (image, client, _) = create_test_clickhouse_client(false).await.unwrap(); + create_clickhouse_bundles_table(&client).await.unwrap(); + + let tempfile = tempfile::NamedTempFile::new().unwrap(); + + let disk_backup = DiskBackup::new( + DiskBackupConfig::new().with_path(tempfile.path().to_path_buf().into()), + &task_executor, + ) + .expect("could not create disk backup"); + + let (tx, rx) = mpsc::channel(128); + let mut bundle_backup = Backup::::new_test( + rx, + client + .inserter(BUNDLE_TABLE_NAME) + .with_timeouts(Some(Duration::from_secs(2)), Some(Duration::from_secs(12))), + disk_backup, + use_memory_only, + ); + + spawn_clickhouse_backup!(task_executor, bundle_backup, "bundles"); + + let quantities = Quantities { + bytes: 512, + rows: 1, + transactions: 1, + }; // approximated + let bundle_row: BundleRow = (system_bundle_example(), "buildernet".to_string()).into(); + let bundle_rows = Vec::from([bundle_row]); + let failed_commit = FailedCommit::::new(bundle_rows.clone(), quantities); + + tx.send(failed_commit).await.unwrap(); + // Wait some time to let the backup process it + tokio::time::sleep(Duration::from_millis(100)).await; + + let results = client + .query(&format!("select * from {BUNDLE_TABLE_NAME}")) + .fetch_all::() + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(bundle_rows, results, "expected, got"); + + drop(image); + } + } +} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs new file mode 100644 index 000000000..1e049efc0 --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs @@ -0,0 +1,194 @@ +//! Indexing functionality powered by Clickhouse. + +use std::{ + fmt::Debug, + time::{Duration, Instant}, +}; + +use clickhouse::{ + error::Result as ClickhouseResult, + inserter::{Inserter, Quantities}, + Client as ClickhouseClient, Row, +}; +use tokio::sync::mpsc; + +use crate::clickhouse_with_backup::primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}; + +mod backup; +pub(crate) mod primitives; + +/// An clickhouse inserter with some sane defaults. +fn default_inserter(client: &ClickhouseClient, table_name: &str) -> Inserter { + // TODO: make this configurable. + let send_timeout = Duration::from_secs(2); + let end_timeout = Duration::from_secs(3); + + client + .inserter::(table_name) + .with_period(Some(Duration::from_secs(4))) // Dump every 4s + .with_period_bias(0.1) // 4±(0.1*4) + .with_max_bytes(128 * 1024 * 1024) // 128MiB + .with_max_rows(65_536) + .with_timeouts(Some(send_timeout), Some(end_timeout)) +} + +/// A wrapper over a Clickhouse [`Inserter`] that supports a backup mechanism. +struct ClickhouseInserter { + /// The inner Clickhouse inserter client. + inner: Inserter, + /// A small in-memory backup of the current data we're trying to commit. In case this fails to + /// be inserted into Clickhouse, it is sent to the backup actor. + rows_backup: Vec, + /// The channel where to send data to be backed up. + backup_tx: mpsc::Sender>, +} + +impl ClickhouseInserter { + fn new(inner: Inserter, backup_tx: mpsc::Sender>) -> Self { + let rows_backup = Vec::new(); + Self { + inner, + rows_backup, + backup_tx, + } + } + + /// Writes the provided order into the inner Clickhouse writer buffer. + async fn write(&mut self, row: T) { + let hash = row.hash(); + let value_ref = ClickhouseRowExt::to_row_ref(&row); + + if let Err(e) = self.inner.write(value_ref).await { + IndexerMetrics::increment_clickhouse_write_failures(e.to_string()); + tracing::error!(target: TARGET, order = T::ORDER, ?e, %hash, "failed to write to clickhouse inserter"); + return; + } + + // NOTE: we don't backup if writing failes. The reason is that if this fails, then the same + // writing to the backup inserter should fail. + self.rows_backup.push(row); + } + + /// Tries to commit to Clickhouse if the conditions are met. In case of failures, data is sent + /// to the backup actor for retries. + async fn commit(&mut self) { + let pending = self.inner.pending().clone().into(); // This is cheap to clone. + + let start = Instant::now(); + match self.inner.commit().await { + Ok(quantities) => { + if quantities == Quantities::ZERO.into() { + tracing::trace!(target: TARGET, order = T::ORDER, "committed to inserter"); + } else { + tracing::debug!(target: TARGET, order = T::ORDER, ?quantities, "inserted batch to clickhouse"); + IndexerMetrics::process_clickhouse_quantities(&quantities.into()); + IndexerMetrics::record_clickhouse_batch_commit_time(start.elapsed()); + // Clear the backup rows. + self.rows_backup.clear(); + } + } + Err(e) => { + IndexerMetrics::increment_clickhouse_commit_failures(e.to_string()); + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit bundle to clickhouse"); + + let rows = std::mem::take(&mut self.rows_backup); + let failed_commit = FailedCommit::new(rows, pending); + + if let Err(e) = self.backup_tx.try_send(failed_commit) { + tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to send rows backup"); + } + } + } + } + + /// Ends the current `INSERT` and whole `Inserter` unconditionally. + async fn end(self) -> ClickhouseResult { + self.inner.end().await.map(Into::into) + } +} + +impl std::fmt::Debug for ClickhouseInserter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ClickhouseInserter") + .field("inserter", &T::ORDER.to_string()) + .field("rows_backup_len", &self.rows_backup.len()) + .finish() + } +} + +/// A long-lived actor to run a [`ClickhouseIndexer`] until it possible to receive new order to +/// index. +struct InserterRunner { + /// The channel from which we can receive new orders to index. + rx: mpsc::Receiver, + /// The underlying Clickhouse inserter. + inserter: ClickhouseInserter, + /// The name of the local operator to use when adding data to clickhouse. + builder_name: String, +} + +impl InserterRunner { + fn new( + rx: mpsc::Receiver, + inserter: ClickhouseInserter, + builder_name: String, + ) -> Self { + Self { + rx, + inserter, + builder_name, + } + } + + /// Run the inserter until it is possible to receive new orders. + async fn run_loop(&mut self) { + let mut sampler = Sampler::default() + .with_sample_size(self.rx.capacity() / 2) + .with_interval(Duration::from_secs(4)); + + while let Some(order) = self.rx.recv().await { + tracing::trace!(target: TARGET, order = T::ORDER, hash = %order.hash(), "received data to index"); + sampler.sample(|| { + IndexerMetrics::set_clickhouse_queue_size(self.rx.len(), T::ORDER); + }); + + let row = order.to_row(self.builder_name.clone()); + self.inserter.write(row).await; + self.inserter.commit().await; + } + tracing::error!(target: TARGET, order = T::ORDER, "tx channel closed, indexer will stop running"); + } +} + +/// The configuration used in a [`ClickhouseClient`]. +#[derive(Debug, Clone)] +pub(crate) struct ClickhouseClientConfig { + host: String, + database: String, + username: String, + password: String, + validation: bool, +} + +impl ClickhouseClientConfig { + fn new(args: &ClickhouseArgs, validation: bool) -> Self { + Self { + host: args.host.clone().expect("host is set"), + database: args.database.clone().expect("database is set"), + username: args.username.clone().expect("username is set"), + password: args.password.clone().expect("password is set"), + validation, + } + } +} + +impl From for ClickhouseClient { + fn from(config: ClickhouseClientConfig) -> Self { + ClickhouseClient::default() + .with_url(config.host) + .with_database(config.database) + .with_user(config.username) + .with_password(config.password) + .with_validation(config.validation) + } +} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs new file mode 100644 index 000000000..11bfc68e1 --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs @@ -0,0 +1,33 @@ +use alloy_primitives::B256; +use clickhouse::{Row, RowWrite}; +use serde::{de::DeserializeOwned, Serialize}; +pub(crate) trait ClickhouseRowExt: + Row + RowWrite + Serialize + DeserializeOwned + Sync + Send + 'static +{ + /// The type of such row, e.g. "bundles" or "bundle_receipts". Used as backup db table name and + /// for informational purposes. + const ORDER: &'static str; + + /// An identifier of such row. + fn hash(&self) -> B256; + + /// Internal function that takes the inner row types and extracts the reference needed for + /// Clickhouse inserter functions like `Inserter::write`. While a default implementation is not + /// provided, it should suffice to simply return `row`. + fn to_row_ref(row: &Self) -> &::Value<'_>; +} + +/// An high-level order type that can be indexed in clickhouse. +pub(crate) trait ClickhouseIndexableOrder: Sized { + /// The associated inner row type that can be serialized into Clickhouse data. + type ClickhouseRowType: ClickhouseRowExt; + + /// The type of such order, e.g. "bundles" or "transactions". For informational purposes. + const ORDER: &'static str; + + /// An identifier of such order. + fn hash(&self) -> B256; + + /// Converts such order into the associated Clickhouse row type. + fn to_row(self, builder_name: String) -> Self::ClickhouseRowType; +} diff --git a/crates/rbuilder-utils/src/lib.rs b/crates/rbuilder-utils/src/lib.rs new file mode 100644 index 000000000..b72aab9ab --- /dev/null +++ b/crates/rbuilder-utils/src/lib.rs @@ -0,0 +1 @@ +pub mod clickhouse_with_backup; From 8d0a03d52d1c8508aca0f3e1744ca95f73664f72 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Tue, 21 Oct 2025 14:58:34 -0300 Subject: [PATCH 2/9] working.... --- crates/rbuilder-utils/Cargo.toml | 28 +++++++++++-------- .../src/clickhouse_with_backup/backup.rs | 4 ++- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/crates/rbuilder-utils/Cargo.toml b/crates/rbuilder-utils/Cargo.toml index 9e08ce772..32d64435c 100644 --- a/crates/rbuilder-utils/Cargo.toml +++ b/crates/rbuilder-utils/Cargo.toml @@ -8,18 +8,7 @@ homepage.workspace = true repository.workspace = true exclude.workspace = true -strum = "0.27" -strum_macros = "0.27" -tokio = { version = "1.40.0", default-features = false } -clickhouse = { git = "https://github.com/ClickHouse/clickhouse-rs", rev = "8cf3d2e138dd121367fa10e875d3f91374b075b2", features = [ - "inserter", - "time", - "uuid", - "native-tls" -] } -clickhouse-derive = { version = "0.2.0" } -redb = { version = "3.1.0" } - +[dependencies] # misc derivative.workspace = true integer-encoding = "4.0.0" @@ -38,3 +27,18 @@ serde.workspace = true derive_more.workspace = true serde_json.workspace = true + +# alloy +alloy-primitives.workspace = true + +strum = "0.27" +strum_macros = "0.27" +tokio = { version = "1.40.0", default-features = false } +clickhouse = { git = "https://github.com/ClickHouse/clickhouse-rs", rev = "8cf3d2e138dd121367fa10e875d3f91374b075b2", features = [ + "inserter", + "time", + "uuid", + "native-tls" +] } +clickhouse-derive = { version = "0.2.0" } +redb = { version = "3.1.0" } diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs index e6e4f489c..7ed94844f 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs @@ -6,12 +6,14 @@ use std::{ time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; -use clickhouse::inserter::Inserter; +use clickhouse::inserter::{Inserter, Quantities}; use derive_more::{Deref, DerefMut}; use redb::{ReadableDatabase, ReadableTable, ReadableTableMetadata}; use strum::AsRefStr; use tokio::sync::mpsc; +use crate::clickhouse_with_backup::primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}; + /// A default maximum size in bytes for the in-memory backup of failed commits. pub(crate) const MAX_MEMORY_BACKUP_SIZE_BYTES: u64 = 1024 * 1024 * 1024; // 1 GiB /// A default maximum size in bytes for the disk backup of failed commits. From 9443558c1d56a0f326b64697970eed67ac8ba0b6 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Tue, 21 Oct 2025 14:58:40 -0300 Subject: [PATCH 3/9] lock --- Cargo.lock | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 97 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06e08c089..2106053a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2480,7 +2480,7 @@ dependencies = [ "bstr", "bytes", "cityhash-rs", - "clickhouse-derive", + "clickhouse-derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "futures", "futures-channel", "http-body-util", @@ -2489,7 +2489,7 @@ dependencies = [ "hyper-util", "lz4_flex", "replace_with", - "sealed", + "sealed 0.5.0", "serde", "static_assertions", "thiserror 1.0.69", @@ -2499,6 +2499,35 @@ dependencies = [ "uuid", ] +[[package]] +name = "clickhouse" +version = "0.13.3" +source = "git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2#8cf3d2e138dd121367fa10e875d3f91374b075b2" +dependencies = [ + "bstr", + "bytes", + "cityhash-rs", + "clickhouse-derive 0.2.0 (git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2)", + "clickhouse-types", + "futures-channel", + "futures-util", + "http-body-util", + "hyper 1.7.0", + "hyper-tls 0.6.0", + "hyper-util", + "lz4_flex", + "quanta", + "replace_with", + "sealed 0.6.0", + "serde", + "static_assertions", + "thiserror 2.0.17", + "time", + "tokio", + "url", + "uuid", +] + [[package]] name = "clickhouse-derive" version = "0.2.0" @@ -2511,6 +2540,26 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "clickhouse-derive" +version = "0.2.0" +source = "git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2#8cf3d2e138dd121367fa10e875d3f91374b075b2" +dependencies = [ + "proc-macro2 1.0.101", + "quote 1.0.41", + "serde_derive_internals", + "syn 2.0.106", +] + +[[package]] +name = "clickhouse-types" +version = "0.1.0" +source = "git+https://github.com/ClickHouse/clickhouse-rs?rev=8cf3d2e138dd121367fa10e875d3f91374b075b2#8cf3d2e138dd121367fa10e875d3f91374b075b2" +dependencies = [ + "bytes", + "thiserror 2.0.17", +] + [[package]] name = "cloudabi" version = "0.0.3" @@ -9341,7 +9390,7 @@ dependencies = [ "alloy-transport-http", "bid-scraper", "built", - "clickhouse", + "clickhouse 0.12.2", "ctor", "derivative", "exponential-backoff", @@ -9454,6 +9503,31 @@ dependencies = [ [[package]] name = "rbuilder-utils" version = "0.1.0" +dependencies = [ + "ahash", + "alloy-primitives 1.4.1", + "clickhouse 0.13.3", + "clickhouse-derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "derivative", + "derive_more 2.0.1", + "eyre", + "governor", + "integer-encoding", + "redb", + "reqwest 0.12.24", + "serde", + "serde_json", + "serde_with", + "sha2 0.10.9", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 1.0.69", + "time", + "tokio", + "toml 0.8.23", + "tracing", + "uuid", +] [[package]] name = "rdrand" @@ -9470,6 +9544,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3edd4d5d42c92f0a659926464d4cce56b562761267ecf0f469d85b7de384175" +[[package]] +name = "redb" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae323eb086579a3769daa2c753bb96deb95993c534711e0dbe881b5192906a06" +dependencies = [ + "libc", +] + [[package]] name = "redis" version = "0.25.4" @@ -13098,6 +13181,17 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "sealed" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f968c5ea23d555e670b449c1c5e7b2fc399fdaec1d304a17cd48e288abc107" +dependencies = [ + "proc-macro2 1.0.101", + "quote 1.0.41", + "syn 2.0.106", +] + [[package]] name = "sec1" version = "0.3.0" From 320396e82efb8b2ffa5eeecfdfe10569327ec932 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Wed, 22 Oct 2025 16:52:54 -0300 Subject: [PATCH 4/9] almost there --- crates/rbuilder-utils/Cargo.toml | 21 +- crates/rbuilder-utils/src/clickhouse/mod.rs | 38 + .../src/clickhouse_with_backup/backup.rs | 185 +--- .../src/clickhouse_with_backup/macros.rs | 54 ++ .../src/clickhouse_with_backup/metrics.rs | 15 + .../src/clickhouse_with_backup/mod.rs | 77 +- .../src/clickhouse_with_backup/primitives.rs | 1 + crates/rbuilder-utils/src/format/mod.rs | 18 + crates/rbuilder-utils/src/lib.rs | 4 + crates/rbuilder-utils/src/metrics/backoff.rs | 325 +++++++ crates/rbuilder-utils/src/metrics/mod.rs | 48 + crates/rbuilder-utils/src/tokio/mod.rs | 863 ++++++++++++++++++ crates/rbuilder-utils/src/tokio/shutdown.rs | 162 ++++ 13 files changed, 1642 insertions(+), 169 deletions(-) create mode 100644 crates/rbuilder-utils/src/clickhouse/mod.rs create mode 100644 crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs create mode 100644 crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs create mode 100644 crates/rbuilder-utils/src/format/mod.rs create mode 100644 crates/rbuilder-utils/src/metrics/backoff.rs create mode 100644 crates/rbuilder-utils/src/metrics/mod.rs create mode 100644 crates/rbuilder-utils/src/tokio/mod.rs create mode 100644 crates/rbuilder-utils/src/tokio/shutdown.rs diff --git a/crates/rbuilder-utils/Cargo.toml b/crates/rbuilder-utils/Cargo.toml index 32d64435c..e58904e3c 100644 --- a/crates/rbuilder-utils/Cargo.toml +++ b/crates/rbuilder-utils/Cargo.toml @@ -33,7 +33,14 @@ alloy-primitives.workspace = true strum = "0.27" strum_macros = "0.27" -tokio = { version = "1.40.0", default-features = false } +tokio = { version = "1.40.0", default-features = false, features = [ + "sync", + "time", + "rt-multi-thread", + "macros", + "test-util" +] } + clickhouse = { git = "https://github.com/ClickHouse/clickhouse-rs", rev = "8cf3d2e138dd121367fa10e875d3f91374b075b2", features = [ "inserter", "time", @@ -42,3 +49,15 @@ clickhouse = { git = "https://github.com/ClickHouse/clickhouse-rs", rev = "8cf3d ] } clickhouse-derive = { version = "0.2.0" } redb = { version = "3.1.0" } +tempfile = { version = "3.23.0" } +rand = "0.9.2" +futures = { version = "0.3" } +futures-util = { version = "0.3.31" } + +# tracing +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } +tracing-futures = "0.2.5" + +# misc +auto_impl = "1.3.0" +dyn-clone = "1.0.20" diff --git a/crates/rbuilder-utils/src/clickhouse/mod.rs b/crates/rbuilder-utils/src/clickhouse/mod.rs new file mode 100644 index 000000000..07d049574 --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse/mod.rs @@ -0,0 +1,38 @@ +use serde::{Deserialize, Serialize}; + +/// Equilalent of `clickhouse::inserter::Quantities` with more traits derived. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +pub struct Quantities { + pub bytes: u64, + pub rows: u64, + pub transactions: u64, +} + +impl Quantities { + /// Just zero quantities, nothing special. + pub const ZERO: Quantities = Quantities { + bytes: 0, + rows: 0, + transactions: 0, + }; +} + +impl From for Quantities { + fn from(value: clickhouse::inserter::Quantities) -> Self { + Self { + bytes: value.bytes, + rows: value.rows, + transactions: value.transactions, + } + } +} + +impl From for clickhouse::inserter::Quantities { + fn from(value: Quantities) -> Self { + Self { + bytes: value.bytes, + rows: value.rows, + transactions: value.transactions, + } + } +} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs index 7ed94844f..638d18097 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs @@ -6,33 +6,26 @@ use std::{ time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; -use clickhouse::inserter::{Inserter, Quantities}; +use clickhouse::inserter::Inserter; use derive_more::{Deref, DerefMut}; use redb::{ReadableDatabase, ReadableTable, ReadableTableMetadata}; use strum::AsRefStr; use tokio::sync::mpsc; -use crate::clickhouse_with_backup::primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}; - -/// A default maximum size in bytes for the in-memory backup of failed commits. -pub(crate) const MAX_MEMORY_BACKUP_SIZE_BYTES: u64 = 1024 * 1024 * 1024; // 1 GiB -/// A default maximum size in bytes for the disk backup of failed commits. -pub(crate) const MAX_DISK_BACKUP_SIZE_BYTES: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB - -/// The default path where the backup database is stored. For tests, a temporary file is used. -fn default_disk_backup_database_path() -> PathBuf { - #[cfg(test)] - return tempfile::NamedTempFile::new().unwrap().path().to_path_buf(); - #[cfg(not(test))] - { - let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string()); - PathBuf::from(home) - .join(".buildernet-orderflow-proxy") - .join("clickhouse_backup.db") - } -} +use crate::{ + clickhouse::Quantities, + clickhouse_with_backup::{ + default_disk_backup_database_path, + metrics::Metrics, + primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, + MAX_DISK_BACKUP_SIZE_BYTES, MAX_MEMORY_BACKUP_SIZE_BYTES, + }, + format::FormatBytes, + metrics::backoff::BackoffInterval, + tokio::TaskExecutor, +}; -/// Tracing target for the backup actor. +/// @PendingDX REMOVETracing target for the backup actor. const TARGET: &str = "indexer::backup"; /// A type alias for disk backup keys. @@ -129,7 +122,7 @@ pub(crate) struct DiskBackupConfig { impl DiskBackupConfig { pub(crate) fn new() -> Self { Self { - path: default_disk_backup_database_path(), + path: default_disk_backup_database_path().into(), max_size_bytes: MAX_DISK_BACKUP_SIZE_BYTES, flush_interval: tokio::time::interval(Duration::from_secs(30)), } @@ -491,7 +484,7 @@ impl Default for MemoryBackup { /// data structure holds. Once this has been hit, pressure applies, meaning that we try again a /// certain failed commit for a finite number of times, and then we discard it to accomdate new /// data. -pub(crate) struct Backup { +pub(crate) struct Backup { /// The receiver of failed commit attempts. /// /// Rationale for sending multiple rows instead of sending rows: the backup abstraction must @@ -516,9 +509,10 @@ pub(crate) struct Backup { /// Whether to use only the in-memory backup (for testing purposes). #[cfg(test)] use_only_memory_backup: bool, + _metrics_phantom: std::marker::PhantomData, } -impl Backup { +impl Backup { pub(crate) fn new( rx: mpsc::Receiver>, inserter: Inserter, @@ -533,6 +527,7 @@ impl Backup { last_cached: None, #[cfg(test)] use_only_memory_backup: false, + _metrics_phantom: std::marker::PhantomData, } } @@ -561,7 +556,7 @@ impl Backup { match self.disk_backup.save(&failed_commit) { Ok(stats) => { tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "saved failed commit to disk"); - IndexerMetrics::set_clickhouse_disk_backup_size( + MetricsType::set_clickhouse_disk_backup_size( stats.size_bytes, stats.total_batches, T::ORDER, @@ -571,12 +566,12 @@ impl Backup { } Err(e) => { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit, trying in-memory"); - IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); } }; let stats = self.memory_backup.save(failed_commit); - IndexerMetrics::set_clickhouse_memory_backup_size( + MetricsType::set_clickhouse_memory_backup_size( stats.size_bytes, stats.total_batches, T::ORDER, @@ -585,7 +580,7 @@ impl Backup { if let Some((stats, oldest_quantities)) = self.memory_backup.drop_excess() { tracing::warn!(target: TARGET, order = T::ORDER, ?stats, "failed commits exceeded max memory backup size, dropping oldest"); - IndexerMetrics::process_clickhouse_backup_data_lost_quantities(&oldest_quantities); + MetricsType::process_clickhouse_backup_data_lost_quantities(&oldest_quantities); // Clear the cached last commit if it was from memory and we just dropped it. self.last_cached = self .last_cached @@ -621,7 +616,7 @@ impl Backup { } Err(e) => { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to retrieve oldest failed commit from disk"); - IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); None } } @@ -633,7 +628,7 @@ impl Backup { let value_ref = T::to_row_ref(row); if let Err(e) = self.inserter.write(value_ref).await { - IndexerMetrics::increment_clickhouse_write_failures(e.to_string()); + MetricsType::increment_clickhouse_write_failures(e.to_string()); tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter"); continue; } @@ -647,7 +642,7 @@ impl Backup { match self.disk_backup.delete(key) { Ok(stats) => { tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "deleted failed commit from disk"); - IndexerMetrics::set_clickhouse_disk_backup_size( + MetricsType::set_clickhouse_disk_backup_size( stats.size_bytes, stats.total_batches, T::ORDER, @@ -678,7 +673,7 @@ impl Backup { _ = self.interval.tick() => { let Some(oldest) = self.retrieve_oldest() else { self.interval.reset(); - IndexerMetrics::set_clickhouse_backup_empty_size(T::ORDER); + MetricsType::set_clickhouse_backup_empty_size(T::ORDER); continue // Nothing to do! }; @@ -688,14 +683,14 @@ impl Backup { match self.inserter.force_commit().await { Ok(quantities) => { tracing::info!(target: TARGET, order = T::ORDER, ?quantities, "successfully backed up"); - IndexerMetrics::process_clickhouse_backup_data_quantities(&quantities.into()); - IndexerMetrics::record_clickhouse_batch_commit_time(start.elapsed()); + MetricsType::process_clickhouse_backup_data_quantities(&quantities.into()); + MetricsType::record_clickhouse_batch_commit_time(start.elapsed()); self.interval.reset(); self.purge_commit(&oldest).await; } Err(e) => { tracing::error!(target: TARGET, order = T::ORDER, ?e, quantities = ?oldest.commit.quantities, "failed to commit bundle to clickhouse from backup"); - IndexerMetrics::increment_clickhouse_commit_failures(e.to_string()); + MetricsType::increment_clickhouse_commit_failures(e.to_string()); self.last_cached = Some(oldest); continue; } @@ -714,24 +709,24 @@ impl Backup { if let Err(e) = self.inserter.write(value_ref).await { tracing::error!( target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter during shutdown"); - IndexerMetrics::increment_clickhouse_write_failures(e.to_string()); + MetricsType::increment_clickhouse_write_failures(e.to_string()); continue; } } if let Err(e) = self.inserter.force_commit().await { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit backup to CH during shutdown, trying disk"); - IndexerMetrics::increment_clickhouse_commit_failures(e.to_string()); + MetricsType::increment_clickhouse_commit_failures(e.to_string()); } if let Err(e) = self.disk_backup.save(&failed_commit) { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit to disk backup during shutdown"); - IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); } } if let Err(e) = self.disk_backup.flush().await { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to flush disk backup during shutdown"); - IndexerMetrics::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); } else { tracing::info!(target: TARGET, order = T::ORDER, "flushed disk backup during shutdown"); } @@ -743,115 +738,3 @@ impl Backup { } } } - -#[cfg(test)] -mod tests { - use std::time::Duration; - - use super::*; - - use crate::{ - indexer::{ - click::{ - models::BundleRow, - tests::{create_clickhouse_bundles_table, create_test_clickhouse_client}, - }, - tests::system_bundle_example, - BUNDLE_TABLE_NAME, - }, - spawn_clickhouse_backup, - tasks::TaskManager, - }; - - // Uncomment to enable logging during tests. - // use tracing::level_filters::LevelFilter; - // use tracing_subscriber::{layer::SubscriberExt as _, util::SubscriberInitExt as _, EnvFilter}; - - impl Backup { - fn new_test( - rx: mpsc::Receiver>, - inserter: Inserter, - disk_backup: DiskBackup, - use_only_memory_backup: bool, - ) -> Self { - Self { - rx, - inserter, - interval: Default::default(), - memory_backup: MemoryBackup::default(), - disk_backup, - last_cached: None, - use_only_memory_backup, - } - } - } - - #[tokio::test(flavor = "multi_thread")] - async fn backup_e2e_works() { - // Uncomment to toggle logs. - // let registry = tracing_subscriber::registry().with( - // EnvFilter::builder().with_default_directive(LevelFilter::DEBUG.into()). - // from_env_lossy(), ); - // let _ = registry.with(tracing_subscriber::fmt::layer()).try_init(); - - let memory_backup_only = [false, true]; - - let task_manager = TaskManager::new(tokio::runtime::Handle::current()); - let task_executor = task_manager.executor(); - - for use_memory_only in memory_backup_only { - println!( - "---- Running backup_memory_e2e_works with use_memory_only = {use_memory_only} ----" - ); - - // 1. Spin up Clickhouse. No validation because we're testing both receipts and bundles, - // and validation on U256 is not supported. - let (image, client, _) = create_test_clickhouse_client(false).await.unwrap(); - create_clickhouse_bundles_table(&client).await.unwrap(); - - let tempfile = tempfile::NamedTempFile::new().unwrap(); - - let disk_backup = DiskBackup::new( - DiskBackupConfig::new().with_path(tempfile.path().to_path_buf().into()), - &task_executor, - ) - .expect("could not create disk backup"); - - let (tx, rx) = mpsc::channel(128); - let mut bundle_backup = Backup::::new_test( - rx, - client - .inserter(BUNDLE_TABLE_NAME) - .with_timeouts(Some(Duration::from_secs(2)), Some(Duration::from_secs(12))), - disk_backup, - use_memory_only, - ); - - spawn_clickhouse_backup!(task_executor, bundle_backup, "bundles"); - - let quantities = Quantities { - bytes: 512, - rows: 1, - transactions: 1, - }; // approximated - let bundle_row: BundleRow = (system_bundle_example(), "buildernet".to_string()).into(); - let bundle_rows = Vec::from([bundle_row]); - let failed_commit = FailedCommit::::new(bundle_rows.clone(), quantities); - - tx.send(failed_commit).await.unwrap(); - // Wait some time to let the backup process it - tokio::time::sleep(Duration::from_millis(100)).await; - - let results = client - .query(&format!("select * from {BUNDLE_TABLE_NAME}")) - .fetch_all::() - .await - .unwrap(); - - assert_eq!(results.len(), 1); - assert_eq!(bundle_rows, results, "expected, got"); - - drop(image); - } - } -} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs new file mode 100644 index 000000000..ea51900cd --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs @@ -0,0 +1,54 @@ +//! Helpful macros spawning clickhouse indexer tasks. + +// Rationale: a simple text-replacement macro was much more effective compared to fighting the +// compiler with additional trait bounds on the [`clickhouse::Row`] trait. + +#[macro_export] +macro_rules! spawn_clickhouse_inserter { + ($executor:ident, $runner:ident, $name:expr) => {{ + $executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { + let mut shutdown_guard = None; + tokio::select! { + _ = $runner.run_loop() => { + tracing::info!(target: TARGET, "clickhouse {} indexer channel closed", $name); + } + guard = shutdown => { + tracing::info!(target: TARGET, "Received shutdown for {} indexer, performing cleanup", $name); + shutdown_guard = Some(guard); + }, + } + + match $runner.inserter.end().await { + Ok(quantities) => { + tracing::info!(target: TARGET, ?quantities, "finalized clickhouse {} inserter", $name); + } + Err(e) => { + tracing::error!(target: TARGET, ?e, "failed to write end insertion of {} to indexer", $name); + } + } + + drop(shutdown_guard); + }); + }}; +} + +#[macro_export] +macro_rules! spawn_clickhouse_backup { + ($executor:ident, $backup:ident, $name: expr) => {{ + $executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { + let mut shutdown_guard = None; + tokio::select! { + _ = $backup.run() => { + tracing::info!(target: TARGET, "clickhouse {} backup channel closed", $name); + } + guard = shutdown => { + tracing::info!(target: TARGET, "Received shutdown for {} backup, performing cleanup", $name); + shutdown_guard = Some(guard); + }, + } + + $backup.end().await; + drop(shutdown_guard); + }); + }}; +} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs new file mode 100644 index 000000000..434175859 --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs @@ -0,0 +1,15 @@ +use crate::clickhouse::Quantities; +use std::time::Duration; + +/// Metrics updated by the clickhouse_with_backup mod. +pub trait Metrics { + fn increment_clickhouse_write_failures(err: String); + fn process_clickhouse_quantities(quantities: &Quantities); + fn record_clickhouse_batch_commit_time(duration: Duration); + fn increment_clickhouse_commit_failures(err: String); + fn set_clickhouse_queue_size(size: usize, order: &'static str); + fn set_clickhouse_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); + fn increment_clickhouse_backup_disk_errors(order: &'static str, error: &str); + fn set_clickhouse_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); + fn process_clickhouse_backup_data_lost_quantities(quantities: &Quantities); +} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs index 1e049efc0..ba8e1cd44 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs @@ -5,18 +5,57 @@ use std::{ time::{Duration, Instant}, }; +/// The tracing target for this indexer crate. @PendingDX REMOVE +const TARGET: &str = "indexer"; + use clickhouse::{ - error::Result as ClickhouseResult, - inserter::{Inserter, Quantities}, - Client as ClickhouseClient, Row, + error::Result as ClickhouseResult, inserter::Inserter, Client as ClickhouseClient, Row, }; use tokio::sync::mpsc; -use crate::clickhouse_with_backup::primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}; +use crate::{ + clickhouse::Quantities, + clickhouse_with_backup::{ + backup::FailedCommit, + metrics::Metrics, + primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, + }, + metrics::Sampler, +}; mod backup; +pub mod macros; +pub mod metrics; +/// mod macros; +/// mod models; pub(crate) mod primitives; +/// A default maximum size in bytes for the in-memory backup of failed commits. +pub(crate) const MAX_MEMORY_BACKUP_SIZE_BYTES: u64 = 1024 * 1024 * 1024; // 1 GiB +/// A default maximum size in bytes for the disk backup of failed commits. +pub(crate) const MAX_DISK_BACKUP_SIZE_BYTES: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB + +/// The default path where the backup database is stored. For tests, a temporary file is used. +pub(crate) fn default_disk_backup_database_path() -> String { + #[cfg(test)] + return tempfile::NamedTempFile::new() + .unwrap() + .path() + .to_string_lossy() + .to_string(); + #[cfg(not(test))] + { + use std::path::PathBuf; + + let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string()); + PathBuf::from(home) + .join(".buildernet-orderflow-proxy") + .join("clickhouse_backup.db") + .to_string_lossy() + .to_string() + } +} + /// An clickhouse inserter with some sane defaults. fn default_inserter(client: &ClickhouseClient, table_name: &str) -> Inserter { // TODO: make this configurable. @@ -33,7 +72,7 @@ fn default_inserter(client: &ClickhouseClient, table_name: &str) -> Inse } /// A wrapper over a Clickhouse [`Inserter`] that supports a backup mechanism. -struct ClickhouseInserter { +struct ClickhouseInserter { /// The inner Clickhouse inserter client. inner: Inserter, /// A small in-memory backup of the current data we're trying to commit. In case this fails to @@ -41,15 +80,17 @@ struct ClickhouseInserter { rows_backup: Vec, /// The channel where to send data to be backed up. backup_tx: mpsc::Sender>, + _metrics_phantom: std::marker::PhantomData, } -impl ClickhouseInserter { +impl ClickhouseInserter { fn new(inner: Inserter, backup_tx: mpsc::Sender>) -> Self { let rows_backup = Vec::new(); Self { inner, rows_backup, backup_tx, + _metrics_phantom: std::marker::PhantomData, } } @@ -59,7 +100,7 @@ impl ClickhouseInserter { let value_ref = ClickhouseRowExt::to_row_ref(&row); if let Err(e) = self.inner.write(value_ref).await { - IndexerMetrics::increment_clickhouse_write_failures(e.to_string()); + MetricsType::increment_clickhouse_write_failures(e.to_string()); tracing::error!(target: TARGET, order = T::ORDER, ?e, %hash, "failed to write to clickhouse inserter"); return; } @@ -81,14 +122,14 @@ impl ClickhouseInserter { tracing::trace!(target: TARGET, order = T::ORDER, "committed to inserter"); } else { tracing::debug!(target: TARGET, order = T::ORDER, ?quantities, "inserted batch to clickhouse"); - IndexerMetrics::process_clickhouse_quantities(&quantities.into()); - IndexerMetrics::record_clickhouse_batch_commit_time(start.elapsed()); + MetricsType::process_clickhouse_quantities(&quantities.into()); + MetricsType::record_clickhouse_batch_commit_time(start.elapsed()); // Clear the backup rows. self.rows_backup.clear(); } } Err(e) => { - IndexerMetrics::increment_clickhouse_commit_failures(e.to_string()); + MetricsType::increment_clickhouse_commit_failures(e.to_string()); tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit bundle to clickhouse"); let rows = std::mem::take(&mut self.rows_backup); @@ -107,7 +148,7 @@ impl ClickhouseInserter { } } -impl std::fmt::Debug for ClickhouseInserter { +impl std::fmt::Debug for ClickhouseInserter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ClickhouseInserter") .field("inserter", &T::ORDER.to_string()) @@ -118,19 +159,19 @@ impl std::fmt::Debug for ClickhouseInserter { /// A long-lived actor to run a [`ClickhouseIndexer`] until it possible to receive new order to /// index. -struct InserterRunner { +struct InserterRunner { /// The channel from which we can receive new orders to index. rx: mpsc::Receiver, /// The underlying Clickhouse inserter. - inserter: ClickhouseInserter, + inserter: ClickhouseInserter, /// The name of the local operator to use when adding data to clickhouse. builder_name: String, } -impl InserterRunner { +impl InserterRunner { fn new( rx: mpsc::Receiver, - inserter: ClickhouseInserter, + inserter: ClickhouseInserter, builder_name: String, ) -> Self { Self { @@ -149,7 +190,7 @@ impl InserterRunner { while let Some(order) = self.rx.recv().await { tracing::trace!(target: TARGET, order = T::ORDER, hash = %order.hash(), "received data to index"); sampler.sample(|| { - IndexerMetrics::set_clickhouse_queue_size(self.rx.len(), T::ORDER); + MetricsType::set_clickhouse_queue_size(self.rx.len(), T::ORDER); }); let row = order.to_row(self.builder_name.clone()); @@ -170,7 +211,8 @@ pub(crate) struct ClickhouseClientConfig { validation: bool, } -impl ClickhouseClientConfig { +/// @PendingDX +/*impl ClickhouseClientConfig { fn new(args: &ClickhouseArgs, validation: bool) -> Self { Self { host: args.host.clone().expect("host is set"), @@ -181,6 +223,7 @@ impl ClickhouseClientConfig { } } } +*/ impl From for ClickhouseClient { fn from(config: ClickhouseClientConfig) -> Self { diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs index 11bfc68e1..d691956a8 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs @@ -1,6 +1,7 @@ use alloy_primitives::B256; use clickhouse::{Row, RowWrite}; use serde::{de::DeserializeOwned, Serialize}; + pub(crate) trait ClickhouseRowExt: Row + RowWrite + Serialize + DeserializeOwned + Sync + Send + 'static { diff --git a/crates/rbuilder-utils/src/format/mod.rs b/crates/rbuilder-utils/src/format/mod.rs new file mode 100644 index 000000000..99fb22a52 --- /dev/null +++ b/crates/rbuilder-utils/src/format/mod.rs @@ -0,0 +1,18 @@ +/// A trait for types that can be formatted as a human-readable size in bytes. +pub trait FormatBytes { + fn format_bytes(&self) -> String; +} + +impl FormatBytes for u64 { + fn format_bytes(&self) -> String { + if *self < 1024 { + format!("{}B", self) + } else if *self < 1024 * 1024 { + format!("{}KiB", self / 1024) + } else if *self < 1024 * 1024 * 1024 { + format!("{}MiB", self / 1024 / 1024) + } else { + format!("{}GiB", self / 1024 / 1024 / 1024) + } + } +} diff --git a/crates/rbuilder-utils/src/lib.rs b/crates/rbuilder-utils/src/lib.rs index b72aab9ab..b0a52744b 100644 --- a/crates/rbuilder-utils/src/lib.rs +++ b/crates/rbuilder-utils/src/lib.rs @@ -1 +1,5 @@ +pub mod clickhouse; pub mod clickhouse_with_backup; +pub mod format; +pub mod metrics; +pub mod tokio; diff --git a/crates/rbuilder-utils/src/metrics/backoff.rs b/crates/rbuilder-utils/src/metrics/backoff.rs new file mode 100644 index 000000000..542eba7aa --- /dev/null +++ b/crates/rbuilder-utils/src/metrics/backoff.rs @@ -0,0 +1,325 @@ +//! Time-related utilies. + +use std::{ + future::{poll_fn, Future as _}, + iter::Iterator, + pin::Pin, + task::{Context, Poll}, + time::Duration, +}; + +/// A random number generator for applying jitter to [`std::time::Duration`]. +#[derive(Debug, Clone)] +pub(crate) struct Jitter; + +impl Jitter { + /// Apply jitter to provided duration, by multiplying it for a random number between 0 and 2. + pub(crate) fn apply_to(duration: Duration) -> Duration { + duration.mul_f64(rand::random::() * 2_f64) + } +} + +/// A retry strategy driven by exponential back-off. +/// +/// The power corresponds to the number of past attempts. +/// +/// Taken from +#[derive(Debug, Clone)] +pub(crate) struct ExponentialBackoff { + current: u64, + base: u64, + factor: u64, + max_delay: Option, +} + +#[allow(dead_code)] +impl ExponentialBackoff { + /// Constructs a new exponential back-off strategy, + /// given a base duration in milliseconds. + /// + /// The resulting duration is calculated by taking the base to the `n`-th power, + /// where `n` denotes the number of past attempts. + pub(crate) fn from_millis(base: u64) -> ExponentialBackoff { + ExponentialBackoff { + current: base, + base, + factor: 1u64, + max_delay: None, + } + } + + /// A multiplicative factor that will be applied to the retry delay. + /// + /// For example, using a factor of `1000` will make each delay in units of seconds. + /// + /// Default factor is `1`. + pub(crate) fn factor(mut self, factor: u64) -> ExponentialBackoff { + self.factor = factor; + self + } + + /// Apply a maximum delay. No retry delay will be longer than this `Duration`. + pub(crate) fn max_delay(mut self, duration: Duration) -> ExponentialBackoff { + self.max_delay = Some(duration); + self + } + + /// Reset the backoff to the initial state. + pub(crate) fn reset(&mut self) { + self.current = self.base; + } +} + +impl Iterator for ExponentialBackoff { + type Item = Duration; + + // TODO: change this logic, so that we can always multiply base by a factor. + // e.g. base = 8, factor = 2 yields to: 8ms, 16ms, 32ms, 64ms, ... + fn next(&mut self) -> Option { + // set delay duration by applying factor + let duration = if let Some(duration) = self.current.checked_mul(self.factor) { + Duration::from_millis(duration) + } else { + Duration::from_millis(u64::MAX) + }; + + // check if we reached max delay + if let Some(ref max_delay) = self.max_delay { + if duration > *max_delay { + return Some(*max_delay); + } + } + + if let Some(next) = self.current.checked_mul(self.base) { + self.current = next; + } else { + self.current = u64::MAX; + } + + Some(duration) + } +} + +/// An interval heavily inspired by [`tokio::time::Interval`], that supports exponential back-off +/// and jitter. +#[derive(Debug)] +pub(crate) struct BackoffInterval { + /// Future that completes the next time the `Interval` yields a value. + delay: Pin>, + + /// The exponential backoff configuration. + backoff: ExponentialBackoff, + + /// An optional jitter to apply to the ticks. + jitter: bool, +} + +impl BackoffInterval { + /// Creates a new interval that ticks immediately. + pub(crate) fn new(backoff: ExponentialBackoff) -> Self { + let start = tokio::time::Instant::now(); + let delay = Box::pin(tokio::time::sleep_until(start)); + Self { + delay, + backoff, + jitter: false, + } + } + + pub(crate) fn with_jitter(mut self) -> Self { + self.jitter = true; + self + } + + pub(crate) fn poll_tick(&mut self, cx: &mut Context<'_>) -> Poll { + // Wait for the delay to be done + std::task::ready!(Pin::new(&mut self.delay).poll(cx)); + + // Get the time when we were schedulued to tick + let timeout = self.delay.deadline(); + + // CHANGE: use custom logic that takes into a account backoff and jitter to calculate new + // instant. + let next = self.next(); + + // CHANGE: Unfortunately, [`tokio::time::Sleep::reset_without_reregister`] isn't + // pub(crate)lic so we have to register the waker again. + self.delay.as_mut().reset(next); + + Poll::Ready(timeout) + } + + /// Completes when the next instant in the interval has been reached. + pub(crate) async fn tick(&mut self) -> tokio::time::Instant { + let instant = poll_fn(|cx| self.poll_tick(cx)); + + instant.await + } + + /// Resets backoff to the initial state, and the next tick will happen after the initial period + /// returned by [`ExponentialBackoff`]. + pub(crate) fn reset(&mut self) { + self.backoff.reset(); + let next = self.next(); + self.delay.as_mut().reset(next); + } + + /// Return the next instant at which the interval should tick. + fn next(&mut self) -> tokio::time::Instant { + let now = tokio::time::Instant::now(); + // We provide a [`tokio::time::MissedTickBehavior::Delay`] behavior but we also add backoff + // and jitter if the user configured it. + let mut period = self + .backoff + .next() + .expect("ExponentialBackoff never returns None"); + if self.jitter { + period = Jitter::apply_to(period); + } + now.checked_add(period).expect("no overflow") + } +} + +impl Default for BackoffInterval { + fn default() -> Self { + // So will return 4, 16, 64, 256, 1024, ... milliseconds with jitter. + Self::new(ExponentialBackoff::from_millis(4).max_delay(Duration::from_millis(8192))) + .with_jitter() + } +} + +#[cfg(test)] +mod tests { + use tokio::time::{Duration, Instant}; + + use super::*; + + #[test] + fn exp_backoff_returns_some_exponential_base_10() { + let mut s = ExponentialBackoff::from_millis(10); + + assert_eq!(s.next(), Some(Duration::from_millis(10))); + assert_eq!(s.next(), Some(Duration::from_millis(100))); + assert_eq!(s.next(), Some(Duration::from_millis(1000))); + } + + #[test] + fn exp_backoff_returns_some_exponential_base_2() { + let mut s = ExponentialBackoff::from_millis(2); + + assert_eq!(s.next(), Some(Duration::from_millis(2))); + assert_eq!(s.next(), Some(Duration::from_millis(4))); + assert_eq!(s.next(), Some(Duration::from_millis(8))); + } + + #[test] + fn exp_backoff_saturates_at_maximum_value() { + let mut s = ExponentialBackoff::from_millis(u64::MAX - 1); + + assert_eq!(s.next(), Some(Duration::from_millis(u64::MAX - 1))); + assert_eq!(s.next(), Some(Duration::from_millis(u64::MAX))); + assert_eq!(s.next(), Some(Duration::from_millis(u64::MAX))); + } + + #[test] + fn exp_backoff_can_use_factor_to_get_seconds() { + let factor = 1000; + let mut s = ExponentialBackoff::from_millis(2).factor(factor); + + assert_eq!(s.next(), Some(Duration::from_secs(2))); + assert_eq!(s.next(), Some(Duration::from_secs(4))); + assert_eq!(s.next(), Some(Duration::from_secs(8))); + } + + #[test] + fn exp_backoff_stops_increasing_at_max_delay() { + let mut s = ExponentialBackoff::from_millis(2).max_delay(Duration::from_millis(4)); + + assert_eq!(s.next(), Some(Duration::from_millis(2))); + assert_eq!(s.next(), Some(Duration::from_millis(4))); + assert_eq!(s.next(), Some(Duration::from_millis(4))); + } + + #[test] + fn exp_backoff_returns_max_when_max_less_than_base() { + let mut s = ExponentialBackoff::from_millis(20).max_delay(Duration::from_millis(10)); + + assert_eq!(s.next(), Some(Duration::from_millis(10))); + assert_eq!(s.next(), Some(Duration::from_millis(10))); + } + + // Tests with `start_paused = true` consists of tests with [`tokio::time::pause`] and + // require manual advancement of time with [`tokio::time::advance`] or with sleeps. + + #[tokio::test(start_paused = true)] + async fn backoff_interval_ticks_as_expected() { + let backoff = ExponentialBackoff::from_millis(2); + let mut backoff_clone = backoff.clone(); + let mut interval = BackoffInterval::new(backoff); + + let before = Instant::now(); + let t1 = interval.tick().await; + assert_eq!(t1, before); + let t2 = interval.tick().await; + assert_eq!(t2, t1 + backoff_clone.next().unwrap()); + let t3 = interval.tick().await; + assert_eq!(t3, t2 + backoff_clone.next().unwrap()); + let t4 = interval.tick().await; + assert_eq!(t4, t3 + backoff_clone.next().unwrap()); + } + + #[tokio::test(start_paused = true)] + async fn backoff_interval_resets_properly() { + let backoff = ExponentialBackoff::from_millis(2); + let mut backoff_clone = backoff.clone(); + let mut interval = BackoffInterval::new(backoff); + + interval.tick().await; + interval.tick().await; + interval.tick().await; + interval.tick().await; + + interval.reset(); + let now = Instant::now(); + let expected_delay = backoff_clone.next().unwrap(); + let actual = interval.tick().await; + + assert_eq!(now + expected_delay, actual); + } + + #[tokio::test(start_paused = true)] + async fn backoff_interval_with_jitter_works() { + // No jitter + { + let beginning = Instant::now(); + + let backoff = ExponentialBackoff::from_millis(5); + let mut backoff_clone = backoff.clone(); + let mut interval = BackoffInterval::new(backoff); + + let t1 = interval.tick().await; + assert_eq!(t1, beginning); // First tick is immediate + + let t2 = interval.tick().await; + assert_eq!(t2, t1 + backoff_clone.next().unwrap()); + + let t3 = interval.tick().await; + assert_eq!(t3, t2 + backoff_clone.next().unwrap()); + } + + // Jitter + { + let beginning = Instant::now(); + + let backoff = ExponentialBackoff::from_millis(5); + let mut backoff_clone = backoff.clone(); + let mut interval = BackoffInterval::new(backoff).with_jitter(); + let t1 = interval.tick().await; + assert_eq!(t1, beginning); // First tick is immediate + + // Next tick will be 5ms later, but jitter changes it. + let t2 = interval.tick().await; + assert_ne!(t2, t1 + backoff_clone.next().unwrap()); + } + } +} diff --git a/crates/rbuilder-utils/src/metrics/mod.rs b/crates/rbuilder-utils/src/metrics/mod.rs new file mode 100644 index 000000000..b429ce335 --- /dev/null +++ b/crates/rbuilder-utils/src/metrics/mod.rs @@ -0,0 +1,48 @@ +pub mod backoff; + +use std::time::{Duration, Instant}; + +/// A simple sampler that executes a closure every `sample_size` calls, or if a certain amount of +/// time has passed since last sampling call. +#[derive(Debug, Clone)] +pub struct Sampler { + sample_size: usize, + counter: usize, + start: Instant, + interval: Duration, +} + +impl Default for Sampler { + fn default() -> Self { + Self { + sample_size: 4096, + counter: 0, + start: Instant::now(), + interval: Duration::from_secs(10), + } + } +} + +impl Sampler { + pub fn with_sample_size(mut self, sample_size: usize) -> Self { + self.sample_size = sample_size; + self + } + + pub fn with_interval(mut self, interval: Duration) -> Self { + self.start = Instant::now() - interval; + self + } + + /// Call this function to potentially execute the sample closure if we have reached the sample + /// size, or enough time has passed. Otherwise, it increments the internal counter. + pub fn sample(&mut self, f: impl FnOnce()) { + if self.counter >= self.sample_size || self.start.elapsed() >= self.interval { + self.counter = 0; + self.start = Instant::now(); + f(); + } else { + self.counter += 1; + } + } +} diff --git a/crates/rbuilder-utils/src/tokio/mod.rs b/crates/rbuilder-utils/src/tokio/mod.rs new file mode 100644 index 000000000..5ac9c67c7 --- /dev/null +++ b/crates/rbuilder-utils/src/tokio/mod.rs @@ -0,0 +1,863 @@ +//! Task management utilities. +//! +//! Taken from `reth_tasks` crate (https://github.com/paradigmxyz/reth/blob/main/crates/tasks/src/lib.rs) and adapted. +//! +//! This crate exposes two main abstractions: a [`TaskManager`] and a [`TaskExecutor`]. The +//! [`TaskManager`] is a centralized entity responsible, as the name suggests, for managing tasks, +//! while the [`TaskExecutor`] is used to spawn tasks onto a Tokio runtime. +//! +//! ## Architecture +//! +//! The [`TaskManager`] holds a [`tokio`] runtime handle that is needed to create child executor +//! that actually spawn tasks. Other than that, it contains: +//! - a receiver for task events (like packing of critical tasks); +//! - a sender for task events, used by the executors to let spawned task report events; +//! - a counter which tracks how many tasks that need graceful shutdown are currently running. +//! +//! Tasks can be also spawned as "critical" and/or with "graceful shutdown" support. +//! Critical tasks when they terminate they send a message to the [`TaskManager`] which in turn +//! will terminate itself after sending a shutdown signal to all long-running tasks. It is up to +//! the application to wait enough time before closing the process to allow graceful shutdown tasks +//! to complete. +//! Graceful shutdown tasks are spawned with a [`GracefulShutdown`] signal that can be awaited, and +//! resolves when a shutdown is explicitely requested by the executor or manager. That can be +//! before a SIGINT/SIGTERM signal is received or when a critical task panics. The +//! [`GracefulShutdown`] signal resolves to a [`GracefulShutdownGuard`]. This guard is simply a +//! shared counter that when dropped, decrements. This used by tasks to notify the manager that the +//! graceful shutdown has completed. + +use dyn_clone::DynClone; +use futures_util::{ + future::{select, BoxFuture}, + Future, FutureExt, TryFutureExt, +}; +use shutdown::{signal, GracefulShutdown, GracefulShutdownGuard, Shutdown, Signal}; +use std::{ + any::Any, + fmt::{Display, Formatter}, + pin::{pin, Pin}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + task::{ready, Context, Poll}, +}; +use tokio::{ + runtime::Handle, + sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}, + task::JoinHandle, +}; +use tracing_futures::Instrument; + +pub mod shutdown; + +/// A type that can spawn tasks. +/// +/// The main purpose of this type is to abstract over [`TaskExecutor`] so it's more convenient to +/// provide default impls for testing. +/// +/// +/// # Examples +/// +/// Use the [`TokioTaskExecutor`] that spawns with [`tokio::task::spawn`] +/// +/// ``` +/// # async fn t() { +/// use buildernet_orderflow_proxy::tasks::{TaskSpawner, TokioTaskExecutor}; +/// let executor = TokioTaskExecutor::default(); +/// +/// let task = executor.spawn(Box::pin(async { +/// // -- snip -- +/// })); +/// task.await.unwrap(); +/// # } +/// ``` +/// +/// Use the [`TaskExecutor`] that spawns task directly onto the tokio runtime via the [Handle]. +/// +/// ``` +/// # use buildernet_orderflow_proxy::tasks::TaskManager; +/// fn t() { +/// use buildernet_orderflow_proxy::tasks::TaskSpawner; +/// let rt = tokio::runtime::Runtime::new().unwrap(); +/// let manager = TaskManager::new(rt.handle().clone()); +/// let executor = manager.executor(); +/// let task = TaskSpawner::spawn(&executor, Box::pin(async { +/// // -- snip -- +/// })); +/// rt.block_on(task).unwrap(); +/// # } +/// ``` +/// +/// The [`TaskSpawner`] trait is [`DynClone`] so `Box` are also `Clone`. +#[auto_impl::auto_impl(&, Arc)] +pub trait TaskSpawner: Send + Sync + Unpin + std::fmt::Debug + DynClone { + /// Spawns the task onto the runtime. + /// See also [`Handle::spawn`]. + fn spawn(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()>; + + /// This spawns a critical task onto the runtime. + fn spawn_critical(&self, name: &'static str, fut: BoxFuture<'static, ()>) -> JoinHandle<()>; + + /// Spawns a blocking task onto the runtime. + fn spawn_blocking(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()>; + + /// This spawns a critical blocking task onto the runtime. + fn spawn_critical_blocking( + &self, + name: &'static str, + fut: BoxFuture<'static, ()>, + ) -> JoinHandle<()>; +} + +dyn_clone::clone_trait_object!(TaskSpawner); + +/// An [`TaskSpawner`] that uses [`tokio::task::spawn`] to execute tasks +#[derive(Debug, Clone, Default)] +#[non_exhaustive] +pub struct TokioTaskExecutor; + +impl TokioTaskExecutor { + /// Converts the instance to a boxed [`TaskSpawner`]. + pub fn boxed(self) -> Box { + Box::new(self) + } +} + +impl TaskSpawner for TokioTaskExecutor { + fn spawn(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { + tokio::task::spawn(fut) + } + + fn spawn_critical(&self, _name: &'static str, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { + tokio::task::spawn(fut) + } + + fn spawn_blocking(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { + tokio::task::spawn_blocking(move || tokio::runtime::Handle::current().block_on(fut)) + } + + fn spawn_critical_blocking( + &self, + _name: &'static str, + fut: BoxFuture<'static, ()>, + ) -> JoinHandle<()> { + tokio::task::spawn_blocking(move || tokio::runtime::Handle::current().block_on(fut)) + } +} + +/// Many reth components require to spawn tasks for long-running jobs. For example `discovery` +/// spawns tasks to handle egress and ingress of udp traffic or `network` that spawns session tasks +/// that handle the traffic to and from a peer. +/// +/// To unify how tasks are created, the [`TaskManager`] provides access to the configured Tokio +/// runtime. A [`TaskManager`] stores the [`tokio::runtime::Handle`] it is associated with. In this +/// way it is possible to configure on which runtime a task is executed. +/// +/// The main purpose of this type is to be able to monitor if a critical task panicked, for +/// diagnostic purposes, since tokio task essentially fail silently. Therefore, this type is a +/// Stream that yields the name of panicked task, See [`TaskExecutor::spawn_critical`]. In order to +/// execute Tasks use the [`TaskExecutor`] type [`TaskManager::executor`]. +#[derive(Debug)] +#[must_use = "TaskManager must be polled to monitor critical tasks"] +pub struct TaskManager { + /// Handle to the tokio runtime this task manager is associated with. + /// + /// See [`Handle`] docs. + handle: Handle, + /// Sender half for sending task events to this type + task_events_tx: UnboundedSender, + /// Receiver for task events + task_events_rx: UnboundedReceiver, + /// The [Signal] to fire when all tasks should be shutdown. + /// + /// This is fired when dropped. + signal: Option, + /// Receiver of the shutdown signal. + on_shutdown: Shutdown, + /// How many [`GracefulShutdown`] tasks are currently active + graceful_tasks: Arc, +} + +// === impl TaskManager === + +impl TaskManager { + /// Returns a __new__ [`TaskManager`] over the currently running Runtime. + /// + /// This must be polled for the duration of the program. + /// + /// To obtain the current [`TaskExecutor`] see [`TaskExecutor::current`]. + /// + /// # Panics + /// + /// This will panic if called outside the context of a Tokio runtime. + pub fn current() -> Self { + let handle = Handle::current(); + Self::new(handle) + } + + /// Create a new instance connected to the given handle's tokio runtime. + /// + /// This also sets the global [`TaskExecutor`]. + pub fn new(handle: Handle) -> Self { + let (task_events_tx, task_events_rx) = unbounded_channel(); + let (signal, on_shutdown) = signal(); + Self { + handle, + task_events_tx, + task_events_rx, + signal: Some(signal), + on_shutdown, + graceful_tasks: Arc::new(AtomicUsize::new(0)), + } + } + + /// Returns a new [`TaskExecutor`] that can spawn new tasks onto the tokio runtime this type is + /// connected to. + pub fn executor(&self) -> TaskExecutor { + TaskExecutor { + handle: self.handle.clone(), + on_shutdown: self.on_shutdown.clone(), + task_events_tx: self.task_events_tx.clone(), + graceful_tasks: Arc::clone(&self.graceful_tasks), + } + } + + /// Fires the shutdown signal and awaits until all tasks are shutdown. + pub fn graceful_shutdown(self) { + let _ = self.do_graceful_shutdown(None); + } + + /// Fires the shutdown signal and awaits until all tasks are shutdown. + /// + /// Returns true if all tasks were shutdown before the timeout elapsed. + pub fn graceful_shutdown_with_timeout(self, timeout: std::time::Duration) -> bool { + self.do_graceful_shutdown(Some(timeout)) + } + + fn do_graceful_shutdown(self, timeout: Option) -> bool { + drop(self.signal); + let when = timeout.map(|t| std::time::Instant::now() + t); + while self.graceful_tasks.load(Ordering::Relaxed) > 0 { + if when.map(|when| std::time::Instant::now() > when).unwrap_or(false) { + tracing::debug!("graceful shutdown timed out"); + return false; + } + std::hint::spin_loop(); + } + + tracing::debug!("gracefully shut down"); + true + } +} + +/// An endless future that resolves if a critical task panicked. +/// +/// See [`TaskExecutor::spawn_critical`] +impl Future for TaskManager { + type Output = Result<(), PanickedTaskError>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + match ready!(self.as_mut().get_mut().task_events_rx.poll_recv(cx)) { + Some(TaskEvent::Panic(err)) => Poll::Ready(Err(err)), + Some(TaskEvent::GracefulShutdown) | None => { + if let Some(signal) = self.get_mut().signal.take() { + signal.fire(); + } + Poll::Ready(Ok(())) + } + } + } +} + +/// Error with the name of the task that panicked and an error downcasted to string, if possible. +#[derive(Debug, thiserror::Error, PartialEq, Eq)] +pub struct PanickedTaskError { + task_name: &'static str, + error: Option, +} + +impl Display for PanickedTaskError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let task_name = self.task_name; + if let Some(error) = &self.error { + write!(f, "Critical task `{task_name}` panicked: `{error}`") + } else { + write!(f, "Critical task `{task_name}` panicked") + } + } +} + +impl PanickedTaskError { + fn new(task_name: &'static str, error: Box) -> Self { + let error = match error.downcast::() { + Ok(value) => Some(*value), + Err(error) => match error.downcast::<&str>() { + Ok(value) => Some(value.to_string()), + Err(_) => None, + }, + }; + + Self { task_name, error } + } +} + +/// Represents the events that the `TaskManager`'s main future can receive. +#[derive(Debug)] +enum TaskEvent { + /// Indicates that a critical task has panicked. + Panic(PanickedTaskError), + /// A signal requesting a graceful shutdown of the `TaskManager`. + GracefulShutdown, +} + +/// A type that can spawn new tokio tasks +#[derive(Debug, Clone)] +pub struct TaskExecutor { + /// Handle to the tokio runtime this task manager is associated with. + /// + /// See [`Handle`] docs. + handle: Handle, + /// Receiver of the shutdown signal. + on_shutdown: Shutdown, + /// Sender half for sending task events to this type + task_events_tx: UnboundedSender, + /// How many [`GracefulShutdown`] tasks are currently active + graceful_tasks: Arc, +} + +// === impl TaskExecutor === + +impl TaskExecutor { + /// Returns the [Handle] to the tokio runtime. + pub const fn handle(&self) -> &Handle { + &self.handle + } + + /// Returns the receiver of the shutdown signal. + pub const fn on_shutdown_signal(&self) -> &Shutdown { + &self.on_shutdown + } + + /// Spawns a future on the tokio runtime depending on the [`TaskKind`] + fn spawn_on_rt(&self, fut: F, task_kind: TaskKind) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + match task_kind { + TaskKind::Default => self.handle.spawn(fut), + TaskKind::Blocking => { + let handle = self.handle.clone(); + self.handle.spawn_blocking(move || handle.block_on(fut)) + } + } + } + + /// Spawns a regular task depending on the given [`TaskKind`] + fn spawn_task_as(&self, fut: F, task_kind: TaskKind) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + let on_shutdown = self.on_shutdown.clone(); + + // Wrap the original future to increment the finished tasks counter upon completion + let task = { + async move { + let fut = pin!(fut); + let _ = select(on_shutdown, fut).await; + } + } + .in_current_span(); + + self.spawn_on_rt(task, task_kind) + } + + /// Spawns the task onto the runtime. + /// The given future resolves as soon as the [Shutdown] signal is received. + /// + /// See also [`Handle::spawn`]. + pub fn spawn(&self, fut: F) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + self.spawn_task_as(fut, TaskKind::Default) + } + + /// Spawns a blocking task onto the runtime. + /// The given future resolves as soon as the [Shutdown] signal is received. + /// + /// See also [`Handle::spawn_blocking`]. + pub fn spawn_blocking(&self, fut: F) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + self.spawn_task_as(fut, TaskKind::Blocking) + } + + /// Spawns the task onto the runtime. + /// The given future resolves as soon as the [Shutdown] signal is received. + /// + /// See also [`Handle::spawn`]. + pub fn spawn_with_signal(&self, f: impl FnOnce(Shutdown) -> F) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + let on_shutdown = self.on_shutdown.clone(); + let fut = f(on_shutdown); + + let task = fut.in_current_span(); + + self.handle.spawn(task) + } + + /// Spawns a critical task depending on the given [`TaskKind`] + fn spawn_critical_as( + &self, + name: &'static str, + fut: F, + task_kind: TaskKind, + ) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + let panicked_tasks_tx = self.task_events_tx.clone(); + let on_shutdown = self.on_shutdown.clone(); + + // wrap the task in catch unwind + let task = std::panic::AssertUnwindSafe(fut) + .catch_unwind() + .map_err(move |error| { + let task_error = PanickedTaskError::new(name, error); + tracing::error!("{task_error}"); + let _ = panicked_tasks_tx.send(TaskEvent::Panic(task_error)); + }) + .in_current_span(); + + let task = async move { + let task = pin!(task); + let _ = select(on_shutdown, task).await; + }; + + self.spawn_on_rt(task, task_kind) + } + + /// This spawns a critical blocking task onto the runtime. + /// The given future resolves as soon as the [Shutdown] signal is received. + /// + /// If this task panics, the [`TaskManager`] is notified. + pub fn spawn_critical_blocking(&self, name: &'static str, fut: F) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + self.spawn_critical_as(name, fut, TaskKind::Blocking) + } + + /// This spawns a critical task onto the runtime. + /// The given future resolves as soon as the [Shutdown] signal is received. + /// + /// If this task panics, the [`TaskManager`] is notified. + pub fn spawn_critical(&self, name: &'static str, fut: F) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + self.spawn_critical_as(name, fut, TaskKind::Default) + } + + /// This spawns a critical task onto the runtime. + /// + /// If this task panics, the [`TaskManager`] is notified. + pub fn spawn_critical_with_shutdown_signal( + &self, + name: &'static str, + f: impl FnOnce(Shutdown) -> F, + ) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + let panicked_tasks_tx = self.task_events_tx.clone(); + let on_shutdown = self.on_shutdown.clone(); + let fut = f(on_shutdown); + + // wrap the task in catch unwind + let task = std::panic::AssertUnwindSafe(fut) + .catch_unwind() + .map_err(move |error| { + let task_error = PanickedTaskError::new(name, error); + tracing::error!("{task_error}"); + let _ = panicked_tasks_tx.send(TaskEvent::Panic(task_error)); + }) + .map(drop) + .in_current_span(); + + self.handle.spawn(task) + } + + /// This spawns a critical task onto the runtime. + /// + /// If this task panics, the [`TaskManager`] is notified. + /// The [`TaskManager`] will wait until the given future has completed before shutting down. + /// + /// # Example + /// + /// ```no_run + /// # async fn t(executor: buildernet_orderflow_proxy::tasks::TaskExecutor) { + /// + /// executor.spawn_critical_with_graceful_shutdown_signal("grace", |shutdown| async move { + /// // await the shutdown signal + /// let guard = shutdown.await; + /// // do work before exiting the program + /// tokio::time::sleep(std::time::Duration::from_secs(1)).await; + /// // allow graceful shutdown + /// drop(guard); + /// }); + /// # } + /// ``` + pub fn spawn_critical_with_graceful_shutdown_signal( + &self, + name: &'static str, + f: impl FnOnce(GracefulShutdown) -> F, + ) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + let panicked_tasks_tx = self.task_events_tx.clone(); + let on_shutdown = GracefulShutdown::new( + self.on_shutdown.clone(), + GracefulShutdownGuard::new(Arc::clone(&self.graceful_tasks)), + ); + let fut = f(on_shutdown); + + // wrap the task in catch unwind + let task = std::panic::AssertUnwindSafe(fut) + .catch_unwind() + .map_err(move |error| { + let task_error = PanickedTaskError::new(name, error); + tracing::error!("{task_error}"); + let _ = panicked_tasks_tx.send(TaskEvent::Panic(task_error)); + }) + .map(drop) + .in_current_span(); + + self.handle.spawn(task) + } + + /// This spawns a regular task onto the runtime. + /// + /// The [`TaskManager`] will wait until the given future has completed before shutting down. + /// + /// # Example + /// + /// ```no_run + /// # async fn t(executor: buildernet_orderflow_proxy::tasks::TaskExecutor) { + /// + /// executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { + /// // await the shutdown signal + /// let guard = shutdown.await; + /// // do work before exiting the program + /// tokio::time::sleep(std::time::Duration::from_secs(1)).await; + /// // allow graceful shutdown + /// drop(guard); + /// }); + /// # } + /// ``` + pub fn spawn_with_graceful_shutdown_signal( + &self, + f: impl FnOnce(GracefulShutdown) -> F, + ) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + let on_shutdown = GracefulShutdown::new( + self.on_shutdown.clone(), + GracefulShutdownGuard::new(Arc::clone(&self.graceful_tasks)), + ); + let fut = f(on_shutdown); + + self.handle.spawn(fut) + } + + /// Sends a request to the `TaskManager` to initiate a graceful shutdown. + /// + /// Caution: This will terminate the entire program. + /// + /// The [`TaskManager`] upon receiving this event, will terminate and initiate the shutdown that + /// can be handled via the returned [`GracefulShutdown`]. + pub fn initiate_graceful_shutdown( + &self, + ) -> Result> { + self.task_events_tx + .send(TaskEvent::GracefulShutdown) + .map_err(|_send_error_with_task_event| tokio::sync::mpsc::error::SendError(()))?; + + Ok(GracefulShutdown::new( + self.on_shutdown.clone(), + GracefulShutdownGuard::new(Arc::clone(&self.graceful_tasks)), + )) + } +} + +impl TaskSpawner for TaskExecutor { + fn spawn(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { + self.spawn(fut) + } + + fn spawn_critical(&self, name: &'static str, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { + Self::spawn_critical(self, name, fut) + } + + fn spawn_blocking(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { + self.spawn_blocking(fut) + } + + fn spawn_critical_blocking( + &self, + name: &'static str, + fut: BoxFuture<'static, ()>, + ) -> JoinHandle<()> { + Self::spawn_critical_blocking(self, name, fut) + } +} + +/// `TaskSpawner` with extended behaviour +#[auto_impl::auto_impl(&, Arc)] +pub trait TaskSpawnerExt: Send + Sync + Unpin + std::fmt::Debug + DynClone { + /// This spawns a critical task onto the runtime. + /// + /// If this task panics, the [`TaskManager`] is notified. + /// The [`TaskManager`] will wait until the given future has completed before shutting down. + fn spawn_critical_with_graceful_shutdown_signal( + &self, + name: &'static str, + f: impl FnOnce(GracefulShutdown) -> F, + ) -> JoinHandle<()> + where + F: Future + Send + 'static; + + /// This spawns a regular task onto the runtime. + /// + /// The [`TaskManager`] will wait until the given future has completed before shutting down. + fn spawn_with_graceful_shutdown_signal( + &self, + f: impl FnOnce(GracefulShutdown) -> F, + ) -> JoinHandle<()> + where + F: Future + Send + 'static; +} + +impl TaskSpawnerExt for TaskExecutor { + fn spawn_critical_with_graceful_shutdown_signal( + &self, + name: &'static str, + f: impl FnOnce(GracefulShutdown) -> F, + ) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + Self::spawn_critical_with_graceful_shutdown_signal(self, name, f) + } + + fn spawn_with_graceful_shutdown_signal( + &self, + f: impl FnOnce(GracefulShutdown) -> F, + ) -> JoinHandle<()> + where + F: Future + Send + 'static, + { + Self::spawn_with_graceful_shutdown_signal(self, f) + } +} + +/// Determines how a task is spawned +enum TaskKind { + /// Spawn the task to the default executor [`Handle::spawn`] + Default, + /// Spawn the task to the blocking executor [`Handle::spawn_blocking`] + Blocking, +} + +/// Error returned by `try_current` when no task executor has been configured. +#[derive(Debug, Default, thiserror::Error)] +#[error("No current task executor available.")] +#[non_exhaustive] +pub struct NoCurrentTaskExecutorError; + +#[cfg(test)] +mod tests { + use super::*; + use std::{sync::atomic::AtomicBool, time::Duration}; + + #[test] + fn test_cloneable() { + #[derive(Clone)] + struct ExecutorWrapper { + _e: Box, + } + + let executor: Box = Box::::default(); + let _e = dyn_clone::clone_box(&*executor); + + let e = ExecutorWrapper { _e }; + let _e2 = e; + } + + #[test] + fn test_critical() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let handle = runtime.handle().clone(); + let manager = TaskManager::new(handle); + let executor = manager.executor(); + + executor.spawn_critical("this is a critical task", async { panic!("intentionally panic") }); + + runtime.block_on(async move { + let err_result = manager.await; + assert!(err_result.is_err(), "Expected TaskManager to return an error due to panic"); + let panicked_err = err_result.unwrap_err(); + + assert_eq!(panicked_err.task_name, "this is a critical task"); + assert_eq!(panicked_err.error, Some("intentionally panic".to_string())); + }) + } + + // Tests that spawned tasks are terminated if the `TaskManager` drops + #[test] + fn test_manager_shutdown_critical() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let handle = runtime.handle().clone(); + let manager = TaskManager::new(handle.clone()); + let executor = manager.executor(); + + let (signal, shutdown) = signal(); + + executor.spawn_critical("this is a critical task", async move { + tokio::time::sleep(Duration::from_millis(200)).await; + drop(signal); + }); + + drop(manager); + + handle.block_on(shutdown); + } + + // Tests that spawned tasks are terminated if the `TaskManager` drops + #[test] + fn test_manager_shutdown() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let handle = runtime.handle().clone(); + let manager = TaskManager::new(handle.clone()); + let executor = manager.executor(); + + let (signal, shutdown) = signal(); + + executor.spawn(Box::pin(async move { + tokio::time::sleep(Duration::from_millis(200)).await; + drop(signal); + })); + + drop(manager); + + handle.block_on(shutdown); + } + + #[test] + fn test_manager_graceful_shutdown() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let handle = runtime.handle().clone(); + let manager = TaskManager::new(handle); + let executor = manager.executor(); + + let val = Arc::new(AtomicBool::new(false)); + let c = val.clone(); + executor.spawn_critical_with_graceful_shutdown_signal("grace", |shutdown| async move { + let _guard = shutdown.await; + tokio::time::sleep(Duration::from_millis(200)).await; + c.store(true, Ordering::Relaxed); + }); + + manager.graceful_shutdown(); + assert!(val.load(Ordering::Relaxed)); + } + + #[test] + fn test_manager_graceful_shutdown_many() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let handle = runtime.handle().clone(); + let manager = TaskManager::new(handle); + let executor = manager.executor(); + + let counter = Arc::new(AtomicUsize::new(0)); + let num = 10; + for _ in 0..num { + let c = counter.clone(); + executor.spawn_critical_with_graceful_shutdown_signal( + "grace", + move |shutdown| async move { + let _guard = shutdown.await; + tokio::time::sleep(Duration::from_millis(200)).await; + c.fetch_add(1, Ordering::SeqCst); + }, + ); + } + + manager.graceful_shutdown(); + assert_eq!(counter.load(Ordering::Relaxed), num); + } + + #[test] + fn test_manager_graceful_shutdown_timeout() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let handle = runtime.handle().clone(); + let manager = TaskManager::new(handle); + let executor = manager.executor(); + + let timeout = Duration::from_millis(500); + let val = Arc::new(AtomicBool::new(false)); + let val2 = val.clone(); + executor.spawn_critical_with_graceful_shutdown_signal("grace", |shutdown| async move { + let _guard = shutdown.await; + tokio::time::sleep(timeout * 3).await; + val2.store(true, Ordering::Relaxed); + unreachable!("should not be reached"); + }); + + manager.graceful_shutdown_with_timeout(timeout); + assert!(!val.load(Ordering::Relaxed)); + } + + #[test] + fn test_graceful_shutdown_triggered_by_executor() { + let runtime = tokio::runtime::Runtime::new().unwrap(); + let task_manager = TaskManager::new(runtime.handle().clone()); + let executor = task_manager.executor(); + + let task_did_shutdown_flag = Arc::new(AtomicBool::new(false)); + let flag_clone = task_did_shutdown_flag.clone(); + + let spawned_task_handle = executor.spawn_with_signal(|shutdown_signal| async move { + shutdown_signal.await; + flag_clone.store(true, Ordering::SeqCst); + }); + + let manager_future_handle = runtime.spawn(task_manager); + + let send_result = executor.initiate_graceful_shutdown(); + assert!(send_result.is_ok(), "Sending the graceful shutdown signal should succeed and return a GracefulShutdown future"); + + let manager_final_result = runtime.block_on(manager_future_handle); + + assert!(manager_final_result.is_ok(), "TaskManager task should not panic"); + assert_eq!( + manager_final_result.unwrap(), + Ok(()), + "TaskManager should resolve cleanly with Ok(()) after graceful shutdown request" + ); + + let task_join_result = runtime.block_on(spawned_task_handle); + assert!(task_join_result.is_ok(), "Spawned task should complete without panic"); + + assert!( + task_did_shutdown_flag.load(Ordering::Relaxed), + "Task should have received the shutdown signal and set the flag" + ); + } +} diff --git a/crates/rbuilder-utils/src/tokio/shutdown.rs b/crates/rbuilder-utils/src/tokio/shutdown.rs new file mode 100644 index 000000000..e108c31c5 --- /dev/null +++ b/crates/rbuilder-utils/src/tokio/shutdown.rs @@ -0,0 +1,162 @@ +//! Helper for shutdown signals + +use futures_util::{ + future::{FusedFuture, Shared}, + FutureExt, +}; +use std::{ + future::Future, + pin::Pin, + sync::{atomic::AtomicUsize, Arc}, + task::{ready, Context, Poll}, +}; +use tokio::sync::oneshot; + +/// A [`Future`] that resolves when the shutdown event has been fired. +/// +/// Compared to [`Shutdown`] it is "graceful", meaning that when it resolves it returns a +/// [`GracefulShutdownGuard`]. +#[derive(Debug)] +pub struct GracefulShutdown { + shutdown: Shutdown, + guard: Option, +} + +impl GracefulShutdown { + /// Creates a new instance of `Self`. To do so, it requires a [`Shutdown`] future, that will + /// drive `Self` to resolution, and the [`GracefulShutdownGuard`] used to notify the completion + /// of the graceful shutdown produre. + pub(crate) const fn new(shutdown: Shutdown, guard: GracefulShutdownGuard) -> Self { + Self { shutdown, guard: Some(guard) } + } +} + +impl Future for GracefulShutdown { + type Output = GracefulShutdownGuard; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + ready!(self.shutdown.poll_unpin(cx)); + Poll::Ready(self.get_mut().guard.take().expect("Future polled after completion")) + } +} + +impl Clone for GracefulShutdown { + fn clone(&self) -> Self { + Self { + shutdown: self.shutdown.clone(), + guard: self.guard.as_ref().map(|g| GracefulShutdownGuard::new(Arc::clone(&g.0))), + } + } +} + +/// A guard that fires once dropped to signal the [`TaskManager`](crate::TaskManager) that the +/// [`GracefulShutdown`] has completed. +#[derive(Debug)] +#[must_use = "if unused the task will not be gracefully shutdown"] +pub struct GracefulShutdownGuard(Arc); + +impl GracefulShutdownGuard { + pub(crate) fn new(counter: Arc) -> Self { + counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + Self(counter) + } +} + +impl Drop for GracefulShutdownGuard { + fn drop(&mut self) { + self.0.fetch_sub(1, std::sync::atomic::Ordering::SeqCst); + } +} + +/// A [`Future`] that resolves when a shutdown event is fired. +#[derive(Debug, Clone)] +pub struct Shutdown( + /// The internal [`oneshot`] channel receiver, wrapped in a + /// [`futures_util::FutureExt::shared`] so that it can be cloned and polled from multiple + /// tasks. + Shared>, +); + +impl Future for Shutdown { + type Output = (); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let pin = self.get_mut(); + if pin.0.is_terminated() || pin.0.poll_unpin(cx).is_ready() { + Poll::Ready(()) + } else { + Poll::Pending + } + } +} + +/// Shutdown signal that fires either manually or on drop by closing the channel +#[derive(Debug)] +pub struct Signal(oneshot::Sender<()>); + +impl Signal { + /// Fire the signal manually. + pub fn fire(self) { + let _ = self.0.send(()); + } +} + +/// Create a channel pair that's used to propagate shutdown event +pub fn signal() -> (Signal, Shutdown) { + let (sender, receiver) = oneshot::channel(); + (Signal(sender), Shutdown(receiver.shared())) +} + +#[cfg(test)] +mod tests { + use super::*; + use futures_util::future::join_all; + use std::time::Duration; + + #[tokio::test(flavor = "multi_thread")] + async fn test_shutdown() { + let (_signal, _shutdown) = signal(); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_drop_signal() { + let (signal, shutdown) = signal(); + + tokio::task::spawn(async move { + tokio::time::sleep(Duration::from_millis(500)).await; + drop(signal) + }); + + shutdown.await; + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_multi_shutdowns() { + let (signal, shutdown) = signal(); + + let mut tasks = Vec::with_capacity(100); + for _ in 0..100 { + let shutdown = shutdown.clone(); + let task = tokio::task::spawn(async move { + shutdown.await; + }); + tasks.push(task); + } + + drop(signal); + + join_all(tasks).await; + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_drop_signal_from_thread() { + let (signal, shutdown) = signal(); + + let _thread = std::thread::spawn(|| { + std::thread::sleep(Duration::from_millis(500)); + drop(signal) + }); + + shutdown.await; + } +} From 3781bd12aedae989e68ae0a16bfe070c52865ab4 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Wed, 22 Oct 2025 16:58:45 -0300 Subject: [PATCH 5/9] lock --- Cargo.lock | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 2106053a1..960b99bfe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9506,13 +9506,18 @@ version = "0.1.0" dependencies = [ "ahash", "alloy-primitives 1.4.1", + "auto_impl", "clickhouse 0.13.3", "clickhouse-derive 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "derivative", "derive_more 2.0.1", + "dyn-clone", "eyre", + "futures", + "futures-util", "governor", "integer-encoding", + "rand 0.9.2", "redb", "reqwest 0.12.24", "serde", @@ -9521,11 +9526,14 @@ dependencies = [ "sha2 0.10.9", "strum 0.27.2", "strum_macros 0.27.2", + "tempfile", "thiserror 1.0.69", "time", "tokio", "toml 0.8.23", "tracing", + "tracing-futures", + "tracing-subscriber 0.3.20", "uuid", ] From 482e75114230f557d99c3bdfdc378a25bbb3fadb Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Thu, 23 Oct 2025 09:31:37 -0300 Subject: [PATCH 6/9] final polish --- .../src/clickhouse_with_backup/backup.rs | 81 +++++++++---------- .../src/clickhouse_with_backup/metrics.rs | 20 ++--- .../src/clickhouse_with_backup/mod.rs | 48 ++++------- .../src/clickhouse_with_backup/primitives.rs | 4 +- crates/rbuilder-utils/src/tokio/mod.rs | 34 +++++--- crates/rbuilder-utils/src/tokio/shutdown.rs | 17 +++- 6 files changed, 107 insertions(+), 97 deletions(-) diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs index 638d18097..7ce25e6cf 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs @@ -25,8 +25,7 @@ use crate::{ tokio::TaskExecutor, }; -/// @PendingDX REMOVETracing target for the backup actor. -const TARGET: &str = "indexer::backup"; +const TARGET: &str = "clickhouse_with_backup::backup"; /// A type alias for disk backup keys. type DiskBackupKey = u128; @@ -52,7 +51,7 @@ fn new_disk_backup_key() -> DiskBackupKey { /// Represents data we failed to commit to clickhouse, including the rows and some information /// about the size of such data. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub(crate) struct FailedCommit { +pub struct FailedCommit { /// The actual rows we were trying to commit. rows: Vec, /// The quantities related to such commit, like the total size in bytes. @@ -60,7 +59,7 @@ pub(crate) struct FailedCommit { } impl FailedCommit { - pub(crate) fn new(rows: Vec, quantities: Quantities) -> Self { + pub fn new(rows: Vec, quantities: Quantities) -> Self { Self { rows, quantities } } } @@ -110,7 +109,7 @@ impl Default for FailedCommits { /// Configuration for the [`DiskBackup`] of failed commits. #[derive(Debug)] -pub(crate) struct DiskBackupConfig { +pub struct DiskBackupConfig { /// The path where the backup database is stored. path: PathBuf, /// The maximum size in bytes for holding past failed commits on disk. @@ -120,7 +119,7 @@ pub(crate) struct DiskBackupConfig { } impl DiskBackupConfig { - pub(crate) fn new() -> Self { + pub fn new() -> Self { Self { path: default_disk_backup_database_path().into(), max_size_bytes: MAX_DISK_BACKUP_SIZE_BYTES, @@ -128,14 +127,14 @@ impl DiskBackupConfig { } } - pub(crate) fn with_path>(mut self, path: Option

) -> Self { + pub fn with_path>(mut self, path: Option

) -> Self { if let Some(p) = path { self.path = p.into(); } self } - pub(crate) fn with_max_size_bytes(mut self, max_size_bytes: Option) -> Self { + pub fn with_max_size_bytes(mut self, max_size_bytes: Option) -> Self { if let Some(max_size_bytes) = max_size_bytes { self.max_size_bytes = max_size_bytes; } @@ -143,7 +142,7 @@ impl DiskBackupConfig { } #[allow(dead_code)] - pub(crate) fn with_immediate_commit_interval(mut self, interval: Option) -> Self { + pub fn with_immediate_commit_interval(mut self, interval: Option) -> Self { if let Some(interval) = interval { self.flush_interval = tokio::time::interval(interval); } @@ -151,6 +150,12 @@ impl DiskBackupConfig { } } +impl Default for DiskBackupConfig { + fn default() -> Self { + Self::new() + } +} + impl Clone for DiskBackupConfig { fn clone(&self) -> Self { Self { @@ -162,14 +167,14 @@ impl Clone for DiskBackupConfig { } #[derive(Debug, Clone, Copy)] -pub(crate) struct MemoryBackupConfig { +pub struct MemoryBackupConfig { /// The maximum size in bytes for holding past failed commits in-memory. Once we go over this /// threshold, pressure is applied and old commits are dropped. pub max_size_bytes: u64, } impl MemoryBackupConfig { - pub(crate) fn new(max_size_bytes: u64) -> Self { + pub fn new(max_size_bytes: u64) -> Self { Self { max_size_bytes } } } @@ -218,7 +223,7 @@ pub(crate) enum DiskBackupError { /// for scoped access. If you want to write to another table, clone it using /// [`Self::clone_with_table`]. #[derive(Debug)] -pub(crate) struct DiskBackup { +pub struct DiskBackup { db: Arc>, config: DiskBackupConfig, @@ -226,7 +231,7 @@ pub(crate) struct DiskBackup { } impl DiskBackup { - pub(crate) fn new( + pub fn new( config: DiskBackupConfig, task_executor: &TaskExecutor, ) -> Result { @@ -254,7 +259,7 @@ impl DiskBackup { } /// Like `clone`, but allows to change the type parameter `U`. - pub(crate) fn clone_to(&self) -> DiskBackup { + pub fn clone_to(&self) -> DiskBackup { DiskBackup { db: self.db.clone(), config: self.config.clone(), @@ -484,7 +489,7 @@ impl Default for MemoryBackup { /// data structure holds. Once this has been hit, pressure applies, meaning that we try again a /// certain failed commit for a finite number of times, and then we discard it to accomdate new /// data. -pub(crate) struct Backup { +pub struct Backup { /// The receiver of failed commit attempts. /// /// Rationale for sending multiple rows instead of sending rows: the backup abstraction must @@ -513,7 +518,7 @@ pub(crate) struct Backup { } impl Backup { - pub(crate) fn new( + pub fn new( rx: mpsc::Receiver>, inserter: Inserter, disk_backup: DiskBackup, @@ -532,7 +537,7 @@ impl Backup { } /// Override the default memory backup configuration. - pub(crate) fn with_memory_backup_config(mut self, config: MemoryBackupConfig) -> Self { + pub fn with_memory_backup_config(mut self, config: MemoryBackupConfig) -> Self { self.memory_backup.config = config; self } @@ -556,31 +561,23 @@ impl Backup { match self.disk_backup.save(&failed_commit) { Ok(stats) => { tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "saved failed commit to disk"); - MetricsType::set_clickhouse_disk_backup_size( - stats.size_bytes, - stats.total_batches, - T::ORDER, - ); + MetricsType::set_disk_backup_size(stats.size_bytes, stats.total_batches, T::ORDER); return; } Err(e) => { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit, trying in-memory"); - MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); } }; let stats = self.memory_backup.save(failed_commit); - MetricsType::set_clickhouse_memory_backup_size( - stats.size_bytes, - stats.total_batches, - T::ORDER, - ); + MetricsType::set_memory_backup_size(stats.size_bytes, stats.total_batches, T::ORDER); tracing::debug!(target: TARGET, order = T::ORDER, bytes = ?quantities.bytes, rows = ?quantities.rows, ?stats, "saved failed commit in-memory"); if let Some((stats, oldest_quantities)) = self.memory_backup.drop_excess() { tracing::warn!(target: TARGET, order = T::ORDER, ?stats, "failed commits exceeded max memory backup size, dropping oldest"); - MetricsType::process_clickhouse_backup_data_lost_quantities(&oldest_quantities); + MetricsType::process_backup_data_lost_quantities(&oldest_quantities); // Clear the cached last commit if it was from memory and we just dropped it. self.last_cached = self .last_cached @@ -616,7 +613,7 @@ impl Backup { } Err(e) => { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to retrieve oldest failed commit from disk"); - MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); None } } @@ -628,7 +625,7 @@ impl Backup { let value_ref = T::to_row_ref(row); if let Err(e) = self.inserter.write(value_ref).await { - MetricsType::increment_clickhouse_write_failures(e.to_string()); + MetricsType::increment_write_failures(e.to_string()); tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter"); continue; } @@ -642,7 +639,7 @@ impl Backup { match self.disk_backup.delete(key) { Ok(stats) => { tracing::debug!(target: TARGET, order = T::ORDER, total_size = stats.size_bytes.format_bytes(), elapsed = ?start.elapsed(), "deleted failed commit from disk"); - MetricsType::set_clickhouse_disk_backup_size( + MetricsType::set_disk_backup_size( stats.size_bytes, stats.total_batches, T::ORDER, @@ -659,7 +656,7 @@ impl Backup { /// Run the backup actor until it is possible to receive messages. /// /// If some data were stored on disk previously, they will be retried first. - pub(crate) async fn run(&mut self) { + pub async fn run(&mut self) { loop { tokio::select! { maybe_failed_commit = self.rx.recv() => { @@ -673,7 +670,7 @@ impl Backup { _ = self.interval.tick() => { let Some(oldest) = self.retrieve_oldest() else { self.interval.reset(); - MetricsType::set_clickhouse_backup_empty_size(T::ORDER); + MetricsType::set_backup_empty_size(T::ORDER); continue // Nothing to do! }; @@ -683,14 +680,14 @@ impl Backup { match self.inserter.force_commit().await { Ok(quantities) => { tracing::info!(target: TARGET, order = T::ORDER, ?quantities, "successfully backed up"); - MetricsType::process_clickhouse_backup_data_quantities(&quantities.into()); - MetricsType::record_clickhouse_batch_commit_time(start.elapsed()); + MetricsType::process_backup_data_quantities(&quantities.into()); + MetricsType::record_batch_commit_time(start.elapsed()); self.interval.reset(); self.purge_commit(&oldest).await; } Err(e) => { tracing::error!(target: TARGET, order = T::ORDER, ?e, quantities = ?oldest.commit.quantities, "failed to commit bundle to clickhouse from backup"); - MetricsType::increment_clickhouse_commit_failures(e.to_string()); + MetricsType::increment_commit_failures(e.to_string()); self.last_cached = Some(oldest); continue; } @@ -702,31 +699,31 @@ impl Backup { /// To call on shutdown, tries make a last-resort attempt to post back to Clickhouse all /// in-memory data. - pub(crate) async fn end(mut self) { + pub async fn end(mut self) { for failed_commit in self.memory_backup.failed_commits.drain(..) { for row in &failed_commit.rows { let value_ref = T::to_row_ref(row); if let Err(e) = self.inserter.write(value_ref).await { tracing::error!( target: TARGET, order = T::ORDER, ?e, "failed to write to backup inserter during shutdown"); - MetricsType::increment_clickhouse_write_failures(e.to_string()); + MetricsType::increment_write_failures(e.to_string()); continue; } } if let Err(e) = self.inserter.force_commit().await { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit backup to CH during shutdown, trying disk"); - MetricsType::increment_clickhouse_commit_failures(e.to_string()); + MetricsType::increment_commit_failures(e.to_string()); } if let Err(e) = self.disk_backup.save(&failed_commit) { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to write commit to disk backup during shutdown"); - MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); } } if let Err(e) = self.disk_backup.flush().await { tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to flush disk backup during shutdown"); - MetricsType::increment_clickhouse_backup_disk_errors(T::ORDER, e.as_ref()); + MetricsType::increment_backup_disk_errors(T::ORDER, e.as_ref()); } else { tracing::info!(target: TARGET, order = T::ORDER, "flushed disk backup during shutdown"); } diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs index 434175859..1a6d1df39 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs @@ -3,13 +3,15 @@ use std::time::Duration; /// Metrics updated by the clickhouse_with_backup mod. pub trait Metrics { - fn increment_clickhouse_write_failures(err: String); - fn process_clickhouse_quantities(quantities: &Quantities); - fn record_clickhouse_batch_commit_time(duration: Duration); - fn increment_clickhouse_commit_failures(err: String); - fn set_clickhouse_queue_size(size: usize, order: &'static str); - fn set_clickhouse_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); - fn increment_clickhouse_backup_disk_errors(order: &'static str, error: &str); - fn set_clickhouse_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); - fn process_clickhouse_backup_data_lost_quantities(quantities: &Quantities); + fn increment_write_failures(err: String); + fn process_quantities(quantities: &Quantities); + fn record_batch_commit_time(duration: Duration); + fn increment_commit_failures(err: String); + fn set_queue_size(size: usize, order: &'static str); + fn set_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); + fn increment_backup_disk_errors(order: &'static str, error: &str); + fn set_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); + fn process_backup_data_lost_quantities(quantities: &Quantities); + fn process_backup_data_quantities(quantities: &Quantities); + fn set_backup_empty_size(order: &'static str); } diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs index ba8e1cd44..104c380fb 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs @@ -23,20 +23,20 @@ use crate::{ metrics::Sampler, }; -mod backup; +pub mod backup; pub mod macros; pub mod metrics; /// mod macros; /// mod models; -pub(crate) mod primitives; +pub mod primitives; /// A default maximum size in bytes for the in-memory backup of failed commits. -pub(crate) const MAX_MEMORY_BACKUP_SIZE_BYTES: u64 = 1024 * 1024 * 1024; // 1 GiB +pub const MAX_MEMORY_BACKUP_SIZE_BYTES: u64 = 1024 * 1024 * 1024; // 1 GiB /// A default maximum size in bytes for the disk backup of failed commits. -pub(crate) const MAX_DISK_BACKUP_SIZE_BYTES: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB +pub const MAX_DISK_BACKUP_SIZE_BYTES: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB /// The default path where the backup database is stored. For tests, a temporary file is used. -pub(crate) fn default_disk_backup_database_path() -> String { +pub fn default_disk_backup_database_path() -> String { #[cfg(test)] return tempfile::NamedTempFile::new() .unwrap() @@ -57,7 +57,7 @@ pub(crate) fn default_disk_backup_database_path() -> String { } /// An clickhouse inserter with some sane defaults. -fn default_inserter(client: &ClickhouseClient, table_name: &str) -> Inserter { +pub fn default_inserter(client: &ClickhouseClient, table_name: &str) -> Inserter { // TODO: make this configurable. let send_timeout = Duration::from_secs(2); let end_timeout = Duration::from_secs(3); @@ -72,7 +72,7 @@ fn default_inserter(client: &ClickhouseClient, table_name: &str) -> Inse } /// A wrapper over a Clickhouse [`Inserter`] that supports a backup mechanism. -struct ClickhouseInserter { +pub struct ClickhouseInserter { /// The inner Clickhouse inserter client. inner: Inserter, /// A small in-memory backup of the current data we're trying to commit. In case this fails to @@ -84,7 +84,7 @@ struct ClickhouseInserter { } impl ClickhouseInserter { - fn new(inner: Inserter, backup_tx: mpsc::Sender>) -> Self { + pub fn new(inner: Inserter, backup_tx: mpsc::Sender>) -> Self { let rows_backup = Vec::new(); Self { inner, @@ -100,7 +100,7 @@ impl ClickhouseInserter ClickhouseInserter { - MetricsType::increment_clickhouse_commit_failures(e.to_string()); + MetricsType::increment_commit_failures(e.to_string()); tracing::error!(target: TARGET, order = T::ORDER, ?e, "failed to commit bundle to clickhouse"); let rows = std::mem::take(&mut self.rows_backup); @@ -143,7 +143,7 @@ impl ClickhouseInserter ClickhouseResult { + pub async fn end(self) -> ClickhouseResult { self.inner.end().await.map(Into::into) } } @@ -159,7 +159,7 @@ impl std::fmt::Debug for ClickhouseInserter { +pub struct InserterRunner { /// The channel from which we can receive new orders to index. rx: mpsc::Receiver, /// The underlying Clickhouse inserter. @@ -169,7 +169,7 @@ struct InserterRunner { } impl InserterRunner { - fn new( + pub fn new( rx: mpsc::Receiver, inserter: ClickhouseInserter, builder_name: String, @@ -182,7 +182,7 @@ impl InserterRunner InserterRunner Self { - Self { - host: args.host.clone().expect("host is set"), - database: args.database.clone().expect("database is set"), - username: args.username.clone().expect("username is set"), - password: args.password.clone().expect("password is set"), - validation, - } - } -} -*/ - impl From for ClickhouseClient { fn from(config: ClickhouseClientConfig) -> Self { ClickhouseClient::default() diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs index d691956a8..9bc53031b 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs @@ -2,7 +2,7 @@ use alloy_primitives::B256; use clickhouse::{Row, RowWrite}; use serde::{de::DeserializeOwned, Serialize}; -pub(crate) trait ClickhouseRowExt: +pub trait ClickhouseRowExt: Row + RowWrite + Serialize + DeserializeOwned + Sync + Send + 'static { /// The type of such row, e.g. "bundles" or "bundle_receipts". Used as backup db table name and @@ -19,7 +19,7 @@ pub(crate) trait ClickhouseRowExt: } /// An high-level order type that can be indexed in clickhouse. -pub(crate) trait ClickhouseIndexableOrder: Sized { +pub trait ClickhouseIndexableOrder: Sized { /// The associated inner row type that can be serialized into Clickhouse data. type ClickhouseRowType: ClickhouseRowExt; diff --git a/crates/rbuilder-utils/src/tokio/mod.rs b/crates/rbuilder-utils/src/tokio/mod.rs index 5ac9c67c7..0066bbb3a 100644 --- a/crates/rbuilder-utils/src/tokio/mod.rs +++ b/crates/rbuilder-utils/src/tokio/mod.rs @@ -63,7 +63,7 @@ pub mod shutdown; /// /// ``` /// # async fn t() { -/// use buildernet_orderflow_proxy::tasks::{TaskSpawner, TokioTaskExecutor}; +/// use rbuilder_utils::tokio::{TaskSpawner, TokioTaskExecutor}; /// let executor = TokioTaskExecutor::default(); /// /// let task = executor.spawn(Box::pin(async { @@ -76,9 +76,9 @@ pub mod shutdown; /// Use the [`TaskExecutor`] that spawns task directly onto the tokio runtime via the [Handle]. /// /// ``` -/// # use buildernet_orderflow_proxy::tasks::TaskManager; +/// # use rbuilder_utils::tokio::TaskManager; /// fn t() { -/// use buildernet_orderflow_proxy::tasks::TaskSpawner; +/// use rbuilder_utils::tokio::TaskSpawner; /// let rt = tokio::runtime::Runtime::new().unwrap(); /// let manager = TaskManager::new(rt.handle().clone()); /// let executor = manager.executor(); @@ -239,7 +239,10 @@ impl TaskManager { drop(self.signal); let when = timeout.map(|t| std::time::Instant::now() + t); while self.graceful_tasks.load(Ordering::Relaxed) > 0 { - if when.map(|when| std::time::Instant::now() > when).unwrap_or(false) { + if when + .map(|when| std::time::Instant::now() > when) + .unwrap_or(false) + { tracing::debug!("graceful shutdown timed out"); return false; } @@ -500,7 +503,7 @@ impl TaskExecutor { /// # Example /// /// ```no_run - /// # async fn t(executor: buildernet_orderflow_proxy::tasks::TaskExecutor) { + /// # async fn t(executor: rbuilder_utils::tokio::TaskExecutor) { /// /// executor.spawn_critical_with_graceful_shutdown_signal("grace", |shutdown| async move { /// // await the shutdown signal @@ -548,7 +551,7 @@ impl TaskExecutor { /// # Example /// /// ```no_run - /// # async fn t(executor: buildernet_orderflow_proxy::tasks::TaskExecutor) { + /// # async fn t(executor: rbuilder_utils::tokio::TaskExecutor) { /// /// executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { /// // await the shutdown signal @@ -707,11 +710,16 @@ mod tests { let manager = TaskManager::new(handle); let executor = manager.executor(); - executor.spawn_critical("this is a critical task", async { panic!("intentionally panic") }); + executor.spawn_critical("this is a critical task", async { + panic!("intentionally panic") + }); runtime.block_on(async move { let err_result = manager.await; - assert!(err_result.is_err(), "Expected TaskManager to return an error due to panic"); + assert!( + err_result.is_err(), + "Expected TaskManager to return an error due to panic" + ); let panicked_err = err_result.unwrap_err(); assert_eq!(panicked_err.task_name, "this is a critical task"); @@ -845,7 +853,10 @@ mod tests { let manager_final_result = runtime.block_on(manager_future_handle); - assert!(manager_final_result.is_ok(), "TaskManager task should not panic"); + assert!( + manager_final_result.is_ok(), + "TaskManager task should not panic" + ); assert_eq!( manager_final_result.unwrap(), Ok(()), @@ -853,7 +864,10 @@ mod tests { ); let task_join_result = runtime.block_on(spawned_task_handle); - assert!(task_join_result.is_ok(), "Spawned task should complete without panic"); + assert!( + task_join_result.is_ok(), + "Spawned task should complete without panic" + ); assert!( task_did_shutdown_flag.load(Ordering::Relaxed), diff --git a/crates/rbuilder-utils/src/tokio/shutdown.rs b/crates/rbuilder-utils/src/tokio/shutdown.rs index e108c31c5..6128e759a 100644 --- a/crates/rbuilder-utils/src/tokio/shutdown.rs +++ b/crates/rbuilder-utils/src/tokio/shutdown.rs @@ -27,7 +27,10 @@ impl GracefulShutdown { /// drive `Self` to resolution, and the [`GracefulShutdownGuard`] used to notify the completion /// of the graceful shutdown produre. pub(crate) const fn new(shutdown: Shutdown, guard: GracefulShutdownGuard) -> Self { - Self { shutdown, guard: Some(guard) } + Self { + shutdown, + guard: Some(guard), + } } } @@ -36,7 +39,12 @@ impl Future for GracefulShutdown { fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { ready!(self.shutdown.poll_unpin(cx)); - Poll::Ready(self.get_mut().guard.take().expect("Future polled after completion")) + Poll::Ready( + self.get_mut() + .guard + .take() + .expect("Future polled after completion"), + ) } } @@ -44,7 +52,10 @@ impl Clone for GracefulShutdown { fn clone(&self) -> Self { Self { shutdown: self.shutdown.clone(), - guard: self.guard.as_ref().map(|g| GracefulShutdownGuard::new(Arc::clone(&g.0))), + guard: self + .guard + .as_ref() + .map(|g| GracefulShutdownGuard::new(Arc::clone(&g.0))), } } } From 1a9eeb6e22f764f285e4c3185d04e193608d1d3b Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Thu, 23 Oct 2025 10:59:13 -0300 Subject: [PATCH 7/9] working with orderflow proxy --- crates/rbuilder-utils/Cargo.toml | 4 +++ .../src/clickhouse_with_backup/backup.rs | 27 ++++++++++++++++--- .../src/clickhouse_with_backup/macros.rs | 18 ++++++------- .../src/clickhouse_with_backup/mod.rs | 27 ++++++++++++++----- 4 files changed, 58 insertions(+), 18 deletions(-) diff --git a/crates/rbuilder-utils/Cargo.toml b/crates/rbuilder-utils/Cargo.toml index e58904e3c..83a858a29 100644 --- a/crates/rbuilder-utils/Cargo.toml +++ b/crates/rbuilder-utils/Cargo.toml @@ -61,3 +61,7 @@ tracing-futures = "0.2.5" # misc auto_impl = "1.3.0" dyn-clone = "1.0.20" + +[features] +default = [] +test-utils = [] diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs index 7ce25e6cf..b1d86751a 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs @@ -512,7 +512,7 @@ pub struct Backup { last_cached: Option>, /// Whether to use only the in-memory backup (for testing purposes). - #[cfg(test)] + #[cfg(any(test, feature = "test-utils"))] use_only_memory_backup: bool, _metrics_phantom: std::marker::PhantomData, } @@ -530,7 +530,7 @@ impl Backup { memory_backup: MemoryBackup::default(), disk_backup, last_cached: None, - #[cfg(test)] + #[cfg(any(test, feature = "test-utils"))] use_only_memory_backup: false, _metrics_phantom: std::marker::PhantomData, } @@ -547,7 +547,7 @@ impl Backup { let quantities = failed_commit.quantities; tracing::debug!(target: TARGET, order = T::ORDER, bytes = ?quantities.bytes, rows = ?quantities.rows, "backing up failed commit"); - #[cfg(test)] + #[cfg(any(test, feature = "test-utils"))] if self.use_only_memory_backup { self.memory_backup.save(failed_commit); self.last_cached = self @@ -735,3 +735,24 @@ impl Backup { } } } + +#[cfg(any(test, feature = "test-utils"))] +impl Backup { + pub fn new_test( + rx: mpsc::Receiver>, + inserter: Inserter, + disk_backup: DiskBackup, + use_only_memory_backup: bool, + ) -> Self { + Self { + rx, + inserter, + interval: Default::default(), + memory_backup: MemoryBackup::default(), + disk_backup, + last_cached: None, + use_only_memory_backup, + _metrics_phantom: PhantomData, + } + } +} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs index ea51900cd..5b08591c4 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs @@ -5,25 +5,25 @@ #[macro_export] macro_rules! spawn_clickhouse_inserter { - ($executor:ident, $runner:ident, $name:expr) => {{ + ($executor:ident, $runner:ident, $name:expr, $target:expr) => {{ $executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { let mut shutdown_guard = None; tokio::select! { _ = $runner.run_loop() => { - tracing::info!(target: TARGET, "clickhouse {} indexer channel closed", $name); + tracing::info!(target: $target, "clickhouse {} indexer channel closed", $name); } guard = shutdown => { - tracing::info!(target: TARGET, "Received shutdown for {} indexer, performing cleanup", $name); + tracing::info!(target: $target, "Received shutdown for {} indexer, performing cleanup", $name); shutdown_guard = Some(guard); }, } - match $runner.inserter.end().await { + match $runner.end().await { Ok(quantities) => { - tracing::info!(target: TARGET, ?quantities, "finalized clickhouse {} inserter", $name); + tracing::info!(target: $target, ?quantities, "finalized clickhouse {} inserter", $name); } Err(e) => { - tracing::error!(target: TARGET, ?e, "failed to write end insertion of {} to indexer", $name); + tracing::error!(target: $target, ?e, "failed to write end insertion of {} to indexer", $name); } } @@ -34,15 +34,15 @@ macro_rules! spawn_clickhouse_inserter { #[macro_export] macro_rules! spawn_clickhouse_backup { - ($executor:ident, $backup:ident, $name: expr) => {{ + ($executor:ident, $backup:ident, $name: expr, $target:expr) => {{ $executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { let mut shutdown_guard = None; tokio::select! { _ = $backup.run() => { - tracing::info!(target: TARGET, "clickhouse {} backup channel closed", $name); + tracing::info!(target: $target, "clickhouse {} backup channel closed", $name); } guard = shutdown => { - tracing::info!(target: TARGET, "Received shutdown for {} backup, performing cleanup", $name); + tracing::info!(target: $target, "Received shutdown for {} backup, performing cleanup", $name); shutdown_guard = Some(guard); }, } diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs index 104c380fb..32308c1fb 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs @@ -168,6 +168,17 @@ pub struct InserterRunner { builder_name: String, } +impl std::fmt::Debug + for InserterRunner +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InserterRunner") + .field("inserter", &T::ORDER.to_string()) + .field("rx", &self.rx) + .finish() + } +} + impl InserterRunner { pub fn new( rx: mpsc::Receiver, @@ -199,16 +210,20 @@ impl InserterRunner ClickhouseResult { + self.inserter.end().await + } } /// The configuration used in a [`ClickhouseClient`]. #[derive(Debug, Clone)] -pub(crate) struct ClickhouseClientConfig { - host: String, - database: String, - username: String, - password: String, - validation: bool, +pub struct ClickhouseClientConfig { + pub host: String, + pub database: String, + pub username: String, + pub password: String, + pub validation: bool, } impl From for ClickhouseClient { From 77f35b4ecd39ac547e92de2f11a7497676d3bfd4 Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Fri, 24 Oct 2025 14:13:54 -0300 Subject: [PATCH 8/9] moved stuff around. --- crates/rbuilder-utils/Cargo.toml | 2 + .../src/{metrics => }/backoff.rs | 0 .../backup}/macros.rs | 0 .../src/clickhouse/backup/metrics.rs | 66 ++ .../backup.rs => clickhouse/backup/mod.rs} | 24 +- .../backup}/primitives.rs | 0 .../mod.rs => clickhouse/indexer.rs} | 19 +- crates/rbuilder-utils/src/clickhouse/mod.rs | 2 + .../src/clickhouse_with_backup/metrics.rs | 17 - crates/rbuilder-utils/src/lib.rs | 6 +- crates/rbuilder-utils/src/metrics/mod.rs | 2 - crates/rbuilder-utils/src/tokio/mod.rs | 877 ------------------ crates/rbuilder-utils/src/tokio/shutdown.rs | 173 ---- 13 files changed, 97 insertions(+), 1091 deletions(-) rename crates/rbuilder-utils/src/{metrics => }/backoff.rs (100%) rename crates/rbuilder-utils/src/{clickhouse_with_backup => clickhouse/backup}/macros.rs (100%) create mode 100644 crates/rbuilder-utils/src/clickhouse/backup/metrics.rs rename crates/rbuilder-utils/src/{clickhouse_with_backup/backup.rs => clickhouse/backup/mod.rs} (98%) rename crates/rbuilder-utils/src/{clickhouse_with_backup => clickhouse/backup}/primitives.rs (100%) rename crates/rbuilder-utils/src/{clickhouse_with_backup/mod.rs => clickhouse/indexer.rs} (96%) delete mode 100644 crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs delete mode 100644 crates/rbuilder-utils/src/tokio/mod.rs delete mode 100644 crates/rbuilder-utils/src/tokio/shutdown.rs diff --git a/crates/rbuilder-utils/Cargo.toml b/crates/rbuilder-utils/Cargo.toml index 83a858a29..af8576010 100644 --- a/crates/rbuilder-utils/Cargo.toml +++ b/crates/rbuilder-utils/Cargo.toml @@ -9,6 +9,8 @@ repository.workspace = true exclude.workspace = true [dependencies] +reth-tasks = { git = "https://github.com/paradigmxyz/reth", rev = "9c30bf7af5e0d45deaf5917375c9922c16654b28" } + # misc derivative.workspace = true integer-encoding = "4.0.0" diff --git a/crates/rbuilder-utils/src/metrics/backoff.rs b/crates/rbuilder-utils/src/backoff.rs similarity index 100% rename from crates/rbuilder-utils/src/metrics/backoff.rs rename to crates/rbuilder-utils/src/backoff.rs diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs b/crates/rbuilder-utils/src/clickhouse/backup/macros.rs similarity index 100% rename from crates/rbuilder-utils/src/clickhouse_with_backup/macros.rs rename to crates/rbuilder-utils/src/clickhouse/backup/macros.rs diff --git a/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs b/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs new file mode 100644 index 000000000..03293fb5a --- /dev/null +++ b/crates/rbuilder-utils/src/clickhouse/backup/metrics.rs @@ -0,0 +1,66 @@ +use crate::clickhouse::Quantities; +use std::time::Duration; + +/// Metrics updated by the clickhouse_with_backup mod. +pub trait Metrics { + fn increment_write_failures(err: String); + fn process_quantities(quantities: &Quantities); + fn record_batch_commit_time(duration: Duration); + fn increment_commit_failures(err: String); + fn set_queue_size(size: usize, order: &'static str); + fn set_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); + fn increment_backup_disk_errors(order: &'static str, error: &str); + fn set_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); + fn process_backup_data_lost_quantities(quantities: &Quantities); + fn process_backup_data_quantities(quantities: &Quantities); + fn set_backup_empty_size(order: &'static str); +} + +/// Feeling lazy? Grafana is too expensive for you? +/// Use NullMetrics! +pub struct NullMetrics {} +impl Metrics for NullMetrics { + fn increment_write_failures(_err: String) { + // No-op + } + + fn process_quantities(_quantities: &Quantities) { + // No-op + } + + fn record_batch_commit_time(_duration: Duration) { + // No-op + } + + fn increment_commit_failures(_err: String) { + // No-op + } + + fn set_queue_size(_size: usize, _order: &'static str) { + // No-op + } + + fn set_disk_backup_size(_size_bytes: u64, _batches: usize, _order: &'static str) { + // No-op + } + + fn increment_backup_disk_errors(_order: &'static str, _error: &str) { + // No-op + } + + fn set_memory_backup_size(_size_bytes: u64, _batches: usize, _order: &'static str) { + // No-op + } + + fn process_backup_data_lost_quantities(_quantities: &Quantities) { + // No-op + } + + fn process_backup_data_quantities(_quantities: &Quantities) { + // No-op + } + + fn set_backup_empty_size(_order: &'static str) { + // No-op + } +} diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs b/crates/rbuilder-utils/src/clickhouse/backup/mod.rs similarity index 98% rename from crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs rename to crates/rbuilder-utils/src/clickhouse/backup/mod.rs index b1d86751a..67d800bd3 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/backup.rs +++ b/crates/rbuilder-utils/src/clickhouse/backup/mod.rs @@ -1,3 +1,7 @@ +pub mod macros; +pub mod metrics; +pub mod primitives; + use std::{ collections::VecDeque, marker::PhantomData, @@ -13,16 +17,20 @@ use strum::AsRefStr; use tokio::sync::mpsc; use crate::{ - clickhouse::Quantities, - clickhouse_with_backup::{ - default_disk_backup_database_path, - metrics::Metrics, - primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, - MAX_DISK_BACKUP_SIZE_BYTES, MAX_MEMORY_BACKUP_SIZE_BYTES, + backoff::BackoffInterval, + clickhouse::{ + backup::{ + metrics::Metrics, + primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, + }, + indexer::{ + default_disk_backup_database_path, MAX_DISK_BACKUP_SIZE_BYTES, + MAX_MEMORY_BACKUP_SIZE_BYTES, + }, + Quantities, }, format::FormatBytes, - metrics::backoff::BackoffInterval, - tokio::TaskExecutor, + tasks::TaskExecutor, }; const TARGET: &str = "clickhouse_with_backup::backup"; diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs b/crates/rbuilder-utils/src/clickhouse/backup/primitives.rs similarity index 100% rename from crates/rbuilder-utils/src/clickhouse_with_backup/primitives.rs rename to crates/rbuilder-utils/src/clickhouse/backup/primitives.rs diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs b/crates/rbuilder-utils/src/clickhouse/indexer.rs similarity index 96% rename from crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs rename to crates/rbuilder-utils/src/clickhouse/indexer.rs index 32308c1fb..bfc732ed8 100644 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse/indexer.rs @@ -14,22 +14,17 @@ use clickhouse::{ use tokio::sync::mpsc; use crate::{ - clickhouse::Quantities, - clickhouse_with_backup::{ - backup::FailedCommit, - metrics::Metrics, - primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, + clickhouse::{ + backup::{ + metrics::Metrics, + primitives::{ClickhouseIndexableOrder, ClickhouseRowExt}, + FailedCommit, + }, + Quantities, }, metrics::Sampler, }; -pub mod backup; -pub mod macros; -pub mod metrics; -/// mod macros; -/// mod models; -pub mod primitives; - /// A default maximum size in bytes for the in-memory backup of failed commits. pub const MAX_MEMORY_BACKUP_SIZE_BYTES: u64 = 1024 * 1024 * 1024; // 1 GiB /// A default maximum size in bytes for the disk backup of failed commits. diff --git a/crates/rbuilder-utils/src/clickhouse/mod.rs b/crates/rbuilder-utils/src/clickhouse/mod.rs index 07d049574..0176f1472 100644 --- a/crates/rbuilder-utils/src/clickhouse/mod.rs +++ b/crates/rbuilder-utils/src/clickhouse/mod.rs @@ -1,3 +1,5 @@ +pub mod backup; +pub mod indexer; use serde::{Deserialize, Serialize}; /// Equilalent of `clickhouse::inserter::Quantities` with more traits derived. diff --git a/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs b/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs deleted file mode 100644 index 1a6d1df39..000000000 --- a/crates/rbuilder-utils/src/clickhouse_with_backup/metrics.rs +++ /dev/null @@ -1,17 +0,0 @@ -use crate::clickhouse::Quantities; -use std::time::Duration; - -/// Metrics updated by the clickhouse_with_backup mod. -pub trait Metrics { - fn increment_write_failures(err: String); - fn process_quantities(quantities: &Quantities); - fn record_batch_commit_time(duration: Duration); - fn increment_commit_failures(err: String); - fn set_queue_size(size: usize, order: &'static str); - fn set_disk_backup_size(size_bytes: u64, batches: usize, order: &'static str); - fn increment_backup_disk_errors(order: &'static str, error: &str); - fn set_memory_backup_size(size_bytes: u64, batches: usize, order: &'static str); - fn process_backup_data_lost_quantities(quantities: &Quantities); - fn process_backup_data_quantities(quantities: &Quantities); - fn set_backup_empty_size(order: &'static str); -} diff --git a/crates/rbuilder-utils/src/lib.rs b/crates/rbuilder-utils/src/lib.rs index b0a52744b..3de0c1df0 100644 --- a/crates/rbuilder-utils/src/lib.rs +++ b/crates/rbuilder-utils/src/lib.rs @@ -1,5 +1,7 @@ +pub mod backoff; pub mod clickhouse; -pub mod clickhouse_with_backup; pub mod format; pub mod metrics; -pub mod tokio; +pub mod tasks { + pub use reth_tasks::*; +} diff --git a/crates/rbuilder-utils/src/metrics/mod.rs b/crates/rbuilder-utils/src/metrics/mod.rs index b429ce335..b5c608de9 100644 --- a/crates/rbuilder-utils/src/metrics/mod.rs +++ b/crates/rbuilder-utils/src/metrics/mod.rs @@ -1,5 +1,3 @@ -pub mod backoff; - use std::time::{Duration, Instant}; /// A simple sampler that executes a closure every `sample_size` calls, or if a certain amount of diff --git a/crates/rbuilder-utils/src/tokio/mod.rs b/crates/rbuilder-utils/src/tokio/mod.rs deleted file mode 100644 index 0066bbb3a..000000000 --- a/crates/rbuilder-utils/src/tokio/mod.rs +++ /dev/null @@ -1,877 +0,0 @@ -//! Task management utilities. -//! -//! Taken from `reth_tasks` crate (https://github.com/paradigmxyz/reth/blob/main/crates/tasks/src/lib.rs) and adapted. -//! -//! This crate exposes two main abstractions: a [`TaskManager`] and a [`TaskExecutor`]. The -//! [`TaskManager`] is a centralized entity responsible, as the name suggests, for managing tasks, -//! while the [`TaskExecutor`] is used to spawn tasks onto a Tokio runtime. -//! -//! ## Architecture -//! -//! The [`TaskManager`] holds a [`tokio`] runtime handle that is needed to create child executor -//! that actually spawn tasks. Other than that, it contains: -//! - a receiver for task events (like packing of critical tasks); -//! - a sender for task events, used by the executors to let spawned task report events; -//! - a counter which tracks how many tasks that need graceful shutdown are currently running. -//! -//! Tasks can be also spawned as "critical" and/or with "graceful shutdown" support. -//! Critical tasks when they terminate they send a message to the [`TaskManager`] which in turn -//! will terminate itself after sending a shutdown signal to all long-running tasks. It is up to -//! the application to wait enough time before closing the process to allow graceful shutdown tasks -//! to complete. -//! Graceful shutdown tasks are spawned with a [`GracefulShutdown`] signal that can be awaited, and -//! resolves when a shutdown is explicitely requested by the executor or manager. That can be -//! before a SIGINT/SIGTERM signal is received or when a critical task panics. The -//! [`GracefulShutdown`] signal resolves to a [`GracefulShutdownGuard`]. This guard is simply a -//! shared counter that when dropped, decrements. This used by tasks to notify the manager that the -//! graceful shutdown has completed. - -use dyn_clone::DynClone; -use futures_util::{ - future::{select, BoxFuture}, - Future, FutureExt, TryFutureExt, -}; -use shutdown::{signal, GracefulShutdown, GracefulShutdownGuard, Shutdown, Signal}; -use std::{ - any::Any, - fmt::{Display, Formatter}, - pin::{pin, Pin}, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, - task::{ready, Context, Poll}, -}; -use tokio::{ - runtime::Handle, - sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}, - task::JoinHandle, -}; -use tracing_futures::Instrument; - -pub mod shutdown; - -/// A type that can spawn tasks. -/// -/// The main purpose of this type is to abstract over [`TaskExecutor`] so it's more convenient to -/// provide default impls for testing. -/// -/// -/// # Examples -/// -/// Use the [`TokioTaskExecutor`] that spawns with [`tokio::task::spawn`] -/// -/// ``` -/// # async fn t() { -/// use rbuilder_utils::tokio::{TaskSpawner, TokioTaskExecutor}; -/// let executor = TokioTaskExecutor::default(); -/// -/// let task = executor.spawn(Box::pin(async { -/// // -- snip -- -/// })); -/// task.await.unwrap(); -/// # } -/// ``` -/// -/// Use the [`TaskExecutor`] that spawns task directly onto the tokio runtime via the [Handle]. -/// -/// ``` -/// # use rbuilder_utils::tokio::TaskManager; -/// fn t() { -/// use rbuilder_utils::tokio::TaskSpawner; -/// let rt = tokio::runtime::Runtime::new().unwrap(); -/// let manager = TaskManager::new(rt.handle().clone()); -/// let executor = manager.executor(); -/// let task = TaskSpawner::spawn(&executor, Box::pin(async { -/// // -- snip -- -/// })); -/// rt.block_on(task).unwrap(); -/// # } -/// ``` -/// -/// The [`TaskSpawner`] trait is [`DynClone`] so `Box` are also `Clone`. -#[auto_impl::auto_impl(&, Arc)] -pub trait TaskSpawner: Send + Sync + Unpin + std::fmt::Debug + DynClone { - /// Spawns the task onto the runtime. - /// See also [`Handle::spawn`]. - fn spawn(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()>; - - /// This spawns a critical task onto the runtime. - fn spawn_critical(&self, name: &'static str, fut: BoxFuture<'static, ()>) -> JoinHandle<()>; - - /// Spawns a blocking task onto the runtime. - fn spawn_blocking(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()>; - - /// This spawns a critical blocking task onto the runtime. - fn spawn_critical_blocking( - &self, - name: &'static str, - fut: BoxFuture<'static, ()>, - ) -> JoinHandle<()>; -} - -dyn_clone::clone_trait_object!(TaskSpawner); - -/// An [`TaskSpawner`] that uses [`tokio::task::spawn`] to execute tasks -#[derive(Debug, Clone, Default)] -#[non_exhaustive] -pub struct TokioTaskExecutor; - -impl TokioTaskExecutor { - /// Converts the instance to a boxed [`TaskSpawner`]. - pub fn boxed(self) -> Box { - Box::new(self) - } -} - -impl TaskSpawner for TokioTaskExecutor { - fn spawn(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { - tokio::task::spawn(fut) - } - - fn spawn_critical(&self, _name: &'static str, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { - tokio::task::spawn(fut) - } - - fn spawn_blocking(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { - tokio::task::spawn_blocking(move || tokio::runtime::Handle::current().block_on(fut)) - } - - fn spawn_critical_blocking( - &self, - _name: &'static str, - fut: BoxFuture<'static, ()>, - ) -> JoinHandle<()> { - tokio::task::spawn_blocking(move || tokio::runtime::Handle::current().block_on(fut)) - } -} - -/// Many reth components require to spawn tasks for long-running jobs. For example `discovery` -/// spawns tasks to handle egress and ingress of udp traffic or `network` that spawns session tasks -/// that handle the traffic to and from a peer. -/// -/// To unify how tasks are created, the [`TaskManager`] provides access to the configured Tokio -/// runtime. A [`TaskManager`] stores the [`tokio::runtime::Handle`] it is associated with. In this -/// way it is possible to configure on which runtime a task is executed. -/// -/// The main purpose of this type is to be able to monitor if a critical task panicked, for -/// diagnostic purposes, since tokio task essentially fail silently. Therefore, this type is a -/// Stream that yields the name of panicked task, See [`TaskExecutor::spawn_critical`]. In order to -/// execute Tasks use the [`TaskExecutor`] type [`TaskManager::executor`]. -#[derive(Debug)] -#[must_use = "TaskManager must be polled to monitor critical tasks"] -pub struct TaskManager { - /// Handle to the tokio runtime this task manager is associated with. - /// - /// See [`Handle`] docs. - handle: Handle, - /// Sender half for sending task events to this type - task_events_tx: UnboundedSender, - /// Receiver for task events - task_events_rx: UnboundedReceiver, - /// The [Signal] to fire when all tasks should be shutdown. - /// - /// This is fired when dropped. - signal: Option, - /// Receiver of the shutdown signal. - on_shutdown: Shutdown, - /// How many [`GracefulShutdown`] tasks are currently active - graceful_tasks: Arc, -} - -// === impl TaskManager === - -impl TaskManager { - /// Returns a __new__ [`TaskManager`] over the currently running Runtime. - /// - /// This must be polled for the duration of the program. - /// - /// To obtain the current [`TaskExecutor`] see [`TaskExecutor::current`]. - /// - /// # Panics - /// - /// This will panic if called outside the context of a Tokio runtime. - pub fn current() -> Self { - let handle = Handle::current(); - Self::new(handle) - } - - /// Create a new instance connected to the given handle's tokio runtime. - /// - /// This also sets the global [`TaskExecutor`]. - pub fn new(handle: Handle) -> Self { - let (task_events_tx, task_events_rx) = unbounded_channel(); - let (signal, on_shutdown) = signal(); - Self { - handle, - task_events_tx, - task_events_rx, - signal: Some(signal), - on_shutdown, - graceful_tasks: Arc::new(AtomicUsize::new(0)), - } - } - - /// Returns a new [`TaskExecutor`] that can spawn new tasks onto the tokio runtime this type is - /// connected to. - pub fn executor(&self) -> TaskExecutor { - TaskExecutor { - handle: self.handle.clone(), - on_shutdown: self.on_shutdown.clone(), - task_events_tx: self.task_events_tx.clone(), - graceful_tasks: Arc::clone(&self.graceful_tasks), - } - } - - /// Fires the shutdown signal and awaits until all tasks are shutdown. - pub fn graceful_shutdown(self) { - let _ = self.do_graceful_shutdown(None); - } - - /// Fires the shutdown signal and awaits until all tasks are shutdown. - /// - /// Returns true if all tasks were shutdown before the timeout elapsed. - pub fn graceful_shutdown_with_timeout(self, timeout: std::time::Duration) -> bool { - self.do_graceful_shutdown(Some(timeout)) - } - - fn do_graceful_shutdown(self, timeout: Option) -> bool { - drop(self.signal); - let when = timeout.map(|t| std::time::Instant::now() + t); - while self.graceful_tasks.load(Ordering::Relaxed) > 0 { - if when - .map(|when| std::time::Instant::now() > when) - .unwrap_or(false) - { - tracing::debug!("graceful shutdown timed out"); - return false; - } - std::hint::spin_loop(); - } - - tracing::debug!("gracefully shut down"); - true - } -} - -/// An endless future that resolves if a critical task panicked. -/// -/// See [`TaskExecutor::spawn_critical`] -impl Future for TaskManager { - type Output = Result<(), PanickedTaskError>; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - match ready!(self.as_mut().get_mut().task_events_rx.poll_recv(cx)) { - Some(TaskEvent::Panic(err)) => Poll::Ready(Err(err)), - Some(TaskEvent::GracefulShutdown) | None => { - if let Some(signal) = self.get_mut().signal.take() { - signal.fire(); - } - Poll::Ready(Ok(())) - } - } - } -} - -/// Error with the name of the task that panicked and an error downcasted to string, if possible. -#[derive(Debug, thiserror::Error, PartialEq, Eq)] -pub struct PanickedTaskError { - task_name: &'static str, - error: Option, -} - -impl Display for PanickedTaskError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let task_name = self.task_name; - if let Some(error) = &self.error { - write!(f, "Critical task `{task_name}` panicked: `{error}`") - } else { - write!(f, "Critical task `{task_name}` panicked") - } - } -} - -impl PanickedTaskError { - fn new(task_name: &'static str, error: Box) -> Self { - let error = match error.downcast::() { - Ok(value) => Some(*value), - Err(error) => match error.downcast::<&str>() { - Ok(value) => Some(value.to_string()), - Err(_) => None, - }, - }; - - Self { task_name, error } - } -} - -/// Represents the events that the `TaskManager`'s main future can receive. -#[derive(Debug)] -enum TaskEvent { - /// Indicates that a critical task has panicked. - Panic(PanickedTaskError), - /// A signal requesting a graceful shutdown of the `TaskManager`. - GracefulShutdown, -} - -/// A type that can spawn new tokio tasks -#[derive(Debug, Clone)] -pub struct TaskExecutor { - /// Handle to the tokio runtime this task manager is associated with. - /// - /// See [`Handle`] docs. - handle: Handle, - /// Receiver of the shutdown signal. - on_shutdown: Shutdown, - /// Sender half for sending task events to this type - task_events_tx: UnboundedSender, - /// How many [`GracefulShutdown`] tasks are currently active - graceful_tasks: Arc, -} - -// === impl TaskExecutor === - -impl TaskExecutor { - /// Returns the [Handle] to the tokio runtime. - pub const fn handle(&self) -> &Handle { - &self.handle - } - - /// Returns the receiver of the shutdown signal. - pub const fn on_shutdown_signal(&self) -> &Shutdown { - &self.on_shutdown - } - - /// Spawns a future on the tokio runtime depending on the [`TaskKind`] - fn spawn_on_rt(&self, fut: F, task_kind: TaskKind) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - match task_kind { - TaskKind::Default => self.handle.spawn(fut), - TaskKind::Blocking => { - let handle = self.handle.clone(); - self.handle.spawn_blocking(move || handle.block_on(fut)) - } - } - } - - /// Spawns a regular task depending on the given [`TaskKind`] - fn spawn_task_as(&self, fut: F, task_kind: TaskKind) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - let on_shutdown = self.on_shutdown.clone(); - - // Wrap the original future to increment the finished tasks counter upon completion - let task = { - async move { - let fut = pin!(fut); - let _ = select(on_shutdown, fut).await; - } - } - .in_current_span(); - - self.spawn_on_rt(task, task_kind) - } - - /// Spawns the task onto the runtime. - /// The given future resolves as soon as the [Shutdown] signal is received. - /// - /// See also [`Handle::spawn`]. - pub fn spawn(&self, fut: F) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - self.spawn_task_as(fut, TaskKind::Default) - } - - /// Spawns a blocking task onto the runtime. - /// The given future resolves as soon as the [Shutdown] signal is received. - /// - /// See also [`Handle::spawn_blocking`]. - pub fn spawn_blocking(&self, fut: F) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - self.spawn_task_as(fut, TaskKind::Blocking) - } - - /// Spawns the task onto the runtime. - /// The given future resolves as soon as the [Shutdown] signal is received. - /// - /// See also [`Handle::spawn`]. - pub fn spawn_with_signal(&self, f: impl FnOnce(Shutdown) -> F) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - let on_shutdown = self.on_shutdown.clone(); - let fut = f(on_shutdown); - - let task = fut.in_current_span(); - - self.handle.spawn(task) - } - - /// Spawns a critical task depending on the given [`TaskKind`] - fn spawn_critical_as( - &self, - name: &'static str, - fut: F, - task_kind: TaskKind, - ) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - let panicked_tasks_tx = self.task_events_tx.clone(); - let on_shutdown = self.on_shutdown.clone(); - - // wrap the task in catch unwind - let task = std::panic::AssertUnwindSafe(fut) - .catch_unwind() - .map_err(move |error| { - let task_error = PanickedTaskError::new(name, error); - tracing::error!("{task_error}"); - let _ = panicked_tasks_tx.send(TaskEvent::Panic(task_error)); - }) - .in_current_span(); - - let task = async move { - let task = pin!(task); - let _ = select(on_shutdown, task).await; - }; - - self.spawn_on_rt(task, task_kind) - } - - /// This spawns a critical blocking task onto the runtime. - /// The given future resolves as soon as the [Shutdown] signal is received. - /// - /// If this task panics, the [`TaskManager`] is notified. - pub fn spawn_critical_blocking(&self, name: &'static str, fut: F) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - self.spawn_critical_as(name, fut, TaskKind::Blocking) - } - - /// This spawns a critical task onto the runtime. - /// The given future resolves as soon as the [Shutdown] signal is received. - /// - /// If this task panics, the [`TaskManager`] is notified. - pub fn spawn_critical(&self, name: &'static str, fut: F) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - self.spawn_critical_as(name, fut, TaskKind::Default) - } - - /// This spawns a critical task onto the runtime. - /// - /// If this task panics, the [`TaskManager`] is notified. - pub fn spawn_critical_with_shutdown_signal( - &self, - name: &'static str, - f: impl FnOnce(Shutdown) -> F, - ) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - let panicked_tasks_tx = self.task_events_tx.clone(); - let on_shutdown = self.on_shutdown.clone(); - let fut = f(on_shutdown); - - // wrap the task in catch unwind - let task = std::panic::AssertUnwindSafe(fut) - .catch_unwind() - .map_err(move |error| { - let task_error = PanickedTaskError::new(name, error); - tracing::error!("{task_error}"); - let _ = panicked_tasks_tx.send(TaskEvent::Panic(task_error)); - }) - .map(drop) - .in_current_span(); - - self.handle.spawn(task) - } - - /// This spawns a critical task onto the runtime. - /// - /// If this task panics, the [`TaskManager`] is notified. - /// The [`TaskManager`] will wait until the given future has completed before shutting down. - /// - /// # Example - /// - /// ```no_run - /// # async fn t(executor: rbuilder_utils::tokio::TaskExecutor) { - /// - /// executor.spawn_critical_with_graceful_shutdown_signal("grace", |shutdown| async move { - /// // await the shutdown signal - /// let guard = shutdown.await; - /// // do work before exiting the program - /// tokio::time::sleep(std::time::Duration::from_secs(1)).await; - /// // allow graceful shutdown - /// drop(guard); - /// }); - /// # } - /// ``` - pub fn spawn_critical_with_graceful_shutdown_signal( - &self, - name: &'static str, - f: impl FnOnce(GracefulShutdown) -> F, - ) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - let panicked_tasks_tx = self.task_events_tx.clone(); - let on_shutdown = GracefulShutdown::new( - self.on_shutdown.clone(), - GracefulShutdownGuard::new(Arc::clone(&self.graceful_tasks)), - ); - let fut = f(on_shutdown); - - // wrap the task in catch unwind - let task = std::panic::AssertUnwindSafe(fut) - .catch_unwind() - .map_err(move |error| { - let task_error = PanickedTaskError::new(name, error); - tracing::error!("{task_error}"); - let _ = panicked_tasks_tx.send(TaskEvent::Panic(task_error)); - }) - .map(drop) - .in_current_span(); - - self.handle.spawn(task) - } - - /// This spawns a regular task onto the runtime. - /// - /// The [`TaskManager`] will wait until the given future has completed before shutting down. - /// - /// # Example - /// - /// ```no_run - /// # async fn t(executor: rbuilder_utils::tokio::TaskExecutor) { - /// - /// executor.spawn_with_graceful_shutdown_signal(|shutdown| async move { - /// // await the shutdown signal - /// let guard = shutdown.await; - /// // do work before exiting the program - /// tokio::time::sleep(std::time::Duration::from_secs(1)).await; - /// // allow graceful shutdown - /// drop(guard); - /// }); - /// # } - /// ``` - pub fn spawn_with_graceful_shutdown_signal( - &self, - f: impl FnOnce(GracefulShutdown) -> F, - ) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - let on_shutdown = GracefulShutdown::new( - self.on_shutdown.clone(), - GracefulShutdownGuard::new(Arc::clone(&self.graceful_tasks)), - ); - let fut = f(on_shutdown); - - self.handle.spawn(fut) - } - - /// Sends a request to the `TaskManager` to initiate a graceful shutdown. - /// - /// Caution: This will terminate the entire program. - /// - /// The [`TaskManager`] upon receiving this event, will terminate and initiate the shutdown that - /// can be handled via the returned [`GracefulShutdown`]. - pub fn initiate_graceful_shutdown( - &self, - ) -> Result> { - self.task_events_tx - .send(TaskEvent::GracefulShutdown) - .map_err(|_send_error_with_task_event| tokio::sync::mpsc::error::SendError(()))?; - - Ok(GracefulShutdown::new( - self.on_shutdown.clone(), - GracefulShutdownGuard::new(Arc::clone(&self.graceful_tasks)), - )) - } -} - -impl TaskSpawner for TaskExecutor { - fn spawn(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { - self.spawn(fut) - } - - fn spawn_critical(&self, name: &'static str, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { - Self::spawn_critical(self, name, fut) - } - - fn spawn_blocking(&self, fut: BoxFuture<'static, ()>) -> JoinHandle<()> { - self.spawn_blocking(fut) - } - - fn spawn_critical_blocking( - &self, - name: &'static str, - fut: BoxFuture<'static, ()>, - ) -> JoinHandle<()> { - Self::spawn_critical_blocking(self, name, fut) - } -} - -/// `TaskSpawner` with extended behaviour -#[auto_impl::auto_impl(&, Arc)] -pub trait TaskSpawnerExt: Send + Sync + Unpin + std::fmt::Debug + DynClone { - /// This spawns a critical task onto the runtime. - /// - /// If this task panics, the [`TaskManager`] is notified. - /// The [`TaskManager`] will wait until the given future has completed before shutting down. - fn spawn_critical_with_graceful_shutdown_signal( - &self, - name: &'static str, - f: impl FnOnce(GracefulShutdown) -> F, - ) -> JoinHandle<()> - where - F: Future + Send + 'static; - - /// This spawns a regular task onto the runtime. - /// - /// The [`TaskManager`] will wait until the given future has completed before shutting down. - fn spawn_with_graceful_shutdown_signal( - &self, - f: impl FnOnce(GracefulShutdown) -> F, - ) -> JoinHandle<()> - where - F: Future + Send + 'static; -} - -impl TaskSpawnerExt for TaskExecutor { - fn spawn_critical_with_graceful_shutdown_signal( - &self, - name: &'static str, - f: impl FnOnce(GracefulShutdown) -> F, - ) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - Self::spawn_critical_with_graceful_shutdown_signal(self, name, f) - } - - fn spawn_with_graceful_shutdown_signal( - &self, - f: impl FnOnce(GracefulShutdown) -> F, - ) -> JoinHandle<()> - where - F: Future + Send + 'static, - { - Self::spawn_with_graceful_shutdown_signal(self, f) - } -} - -/// Determines how a task is spawned -enum TaskKind { - /// Spawn the task to the default executor [`Handle::spawn`] - Default, - /// Spawn the task to the blocking executor [`Handle::spawn_blocking`] - Blocking, -} - -/// Error returned by `try_current` when no task executor has been configured. -#[derive(Debug, Default, thiserror::Error)] -#[error("No current task executor available.")] -#[non_exhaustive] -pub struct NoCurrentTaskExecutorError; - -#[cfg(test)] -mod tests { - use super::*; - use std::{sync::atomic::AtomicBool, time::Duration}; - - #[test] - fn test_cloneable() { - #[derive(Clone)] - struct ExecutorWrapper { - _e: Box, - } - - let executor: Box = Box::::default(); - let _e = dyn_clone::clone_box(&*executor); - - let e = ExecutorWrapper { _e }; - let _e2 = e; - } - - #[test] - fn test_critical() { - let runtime = tokio::runtime::Runtime::new().unwrap(); - let handle = runtime.handle().clone(); - let manager = TaskManager::new(handle); - let executor = manager.executor(); - - executor.spawn_critical("this is a critical task", async { - panic!("intentionally panic") - }); - - runtime.block_on(async move { - let err_result = manager.await; - assert!( - err_result.is_err(), - "Expected TaskManager to return an error due to panic" - ); - let panicked_err = err_result.unwrap_err(); - - assert_eq!(panicked_err.task_name, "this is a critical task"); - assert_eq!(panicked_err.error, Some("intentionally panic".to_string())); - }) - } - - // Tests that spawned tasks are terminated if the `TaskManager` drops - #[test] - fn test_manager_shutdown_critical() { - let runtime = tokio::runtime::Runtime::new().unwrap(); - let handle = runtime.handle().clone(); - let manager = TaskManager::new(handle.clone()); - let executor = manager.executor(); - - let (signal, shutdown) = signal(); - - executor.spawn_critical("this is a critical task", async move { - tokio::time::sleep(Duration::from_millis(200)).await; - drop(signal); - }); - - drop(manager); - - handle.block_on(shutdown); - } - - // Tests that spawned tasks are terminated if the `TaskManager` drops - #[test] - fn test_manager_shutdown() { - let runtime = tokio::runtime::Runtime::new().unwrap(); - let handle = runtime.handle().clone(); - let manager = TaskManager::new(handle.clone()); - let executor = manager.executor(); - - let (signal, shutdown) = signal(); - - executor.spawn(Box::pin(async move { - tokio::time::sleep(Duration::from_millis(200)).await; - drop(signal); - })); - - drop(manager); - - handle.block_on(shutdown); - } - - #[test] - fn test_manager_graceful_shutdown() { - let runtime = tokio::runtime::Runtime::new().unwrap(); - let handle = runtime.handle().clone(); - let manager = TaskManager::new(handle); - let executor = manager.executor(); - - let val = Arc::new(AtomicBool::new(false)); - let c = val.clone(); - executor.spawn_critical_with_graceful_shutdown_signal("grace", |shutdown| async move { - let _guard = shutdown.await; - tokio::time::sleep(Duration::from_millis(200)).await; - c.store(true, Ordering::Relaxed); - }); - - manager.graceful_shutdown(); - assert!(val.load(Ordering::Relaxed)); - } - - #[test] - fn test_manager_graceful_shutdown_many() { - let runtime = tokio::runtime::Runtime::new().unwrap(); - let handle = runtime.handle().clone(); - let manager = TaskManager::new(handle); - let executor = manager.executor(); - - let counter = Arc::new(AtomicUsize::new(0)); - let num = 10; - for _ in 0..num { - let c = counter.clone(); - executor.spawn_critical_with_graceful_shutdown_signal( - "grace", - move |shutdown| async move { - let _guard = shutdown.await; - tokio::time::sleep(Duration::from_millis(200)).await; - c.fetch_add(1, Ordering::SeqCst); - }, - ); - } - - manager.graceful_shutdown(); - assert_eq!(counter.load(Ordering::Relaxed), num); - } - - #[test] - fn test_manager_graceful_shutdown_timeout() { - let runtime = tokio::runtime::Runtime::new().unwrap(); - let handle = runtime.handle().clone(); - let manager = TaskManager::new(handle); - let executor = manager.executor(); - - let timeout = Duration::from_millis(500); - let val = Arc::new(AtomicBool::new(false)); - let val2 = val.clone(); - executor.spawn_critical_with_graceful_shutdown_signal("grace", |shutdown| async move { - let _guard = shutdown.await; - tokio::time::sleep(timeout * 3).await; - val2.store(true, Ordering::Relaxed); - unreachable!("should not be reached"); - }); - - manager.graceful_shutdown_with_timeout(timeout); - assert!(!val.load(Ordering::Relaxed)); - } - - #[test] - fn test_graceful_shutdown_triggered_by_executor() { - let runtime = tokio::runtime::Runtime::new().unwrap(); - let task_manager = TaskManager::new(runtime.handle().clone()); - let executor = task_manager.executor(); - - let task_did_shutdown_flag = Arc::new(AtomicBool::new(false)); - let flag_clone = task_did_shutdown_flag.clone(); - - let spawned_task_handle = executor.spawn_with_signal(|shutdown_signal| async move { - shutdown_signal.await; - flag_clone.store(true, Ordering::SeqCst); - }); - - let manager_future_handle = runtime.spawn(task_manager); - - let send_result = executor.initiate_graceful_shutdown(); - assert!(send_result.is_ok(), "Sending the graceful shutdown signal should succeed and return a GracefulShutdown future"); - - let manager_final_result = runtime.block_on(manager_future_handle); - - assert!( - manager_final_result.is_ok(), - "TaskManager task should not panic" - ); - assert_eq!( - manager_final_result.unwrap(), - Ok(()), - "TaskManager should resolve cleanly with Ok(()) after graceful shutdown request" - ); - - let task_join_result = runtime.block_on(spawned_task_handle); - assert!( - task_join_result.is_ok(), - "Spawned task should complete without panic" - ); - - assert!( - task_did_shutdown_flag.load(Ordering::Relaxed), - "Task should have received the shutdown signal and set the flag" - ); - } -} diff --git a/crates/rbuilder-utils/src/tokio/shutdown.rs b/crates/rbuilder-utils/src/tokio/shutdown.rs deleted file mode 100644 index 6128e759a..000000000 --- a/crates/rbuilder-utils/src/tokio/shutdown.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! Helper for shutdown signals - -use futures_util::{ - future::{FusedFuture, Shared}, - FutureExt, -}; -use std::{ - future::Future, - pin::Pin, - sync::{atomic::AtomicUsize, Arc}, - task::{ready, Context, Poll}, -}; -use tokio::sync::oneshot; - -/// A [`Future`] that resolves when the shutdown event has been fired. -/// -/// Compared to [`Shutdown`] it is "graceful", meaning that when it resolves it returns a -/// [`GracefulShutdownGuard`]. -#[derive(Debug)] -pub struct GracefulShutdown { - shutdown: Shutdown, - guard: Option, -} - -impl GracefulShutdown { - /// Creates a new instance of `Self`. To do so, it requires a [`Shutdown`] future, that will - /// drive `Self` to resolution, and the [`GracefulShutdownGuard`] used to notify the completion - /// of the graceful shutdown produre. - pub(crate) const fn new(shutdown: Shutdown, guard: GracefulShutdownGuard) -> Self { - Self { - shutdown, - guard: Some(guard), - } - } -} - -impl Future for GracefulShutdown { - type Output = GracefulShutdownGuard; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - ready!(self.shutdown.poll_unpin(cx)); - Poll::Ready( - self.get_mut() - .guard - .take() - .expect("Future polled after completion"), - ) - } -} - -impl Clone for GracefulShutdown { - fn clone(&self) -> Self { - Self { - shutdown: self.shutdown.clone(), - guard: self - .guard - .as_ref() - .map(|g| GracefulShutdownGuard::new(Arc::clone(&g.0))), - } - } -} - -/// A guard that fires once dropped to signal the [`TaskManager`](crate::TaskManager) that the -/// [`GracefulShutdown`] has completed. -#[derive(Debug)] -#[must_use = "if unused the task will not be gracefully shutdown"] -pub struct GracefulShutdownGuard(Arc); - -impl GracefulShutdownGuard { - pub(crate) fn new(counter: Arc) -> Self { - counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - Self(counter) - } -} - -impl Drop for GracefulShutdownGuard { - fn drop(&mut self) { - self.0.fetch_sub(1, std::sync::atomic::Ordering::SeqCst); - } -} - -/// A [`Future`] that resolves when a shutdown event is fired. -#[derive(Debug, Clone)] -pub struct Shutdown( - /// The internal [`oneshot`] channel receiver, wrapped in a - /// [`futures_util::FutureExt::shared`] so that it can be cloned and polled from multiple - /// tasks. - Shared>, -); - -impl Future for Shutdown { - type Output = (); - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - let pin = self.get_mut(); - if pin.0.is_terminated() || pin.0.poll_unpin(cx).is_ready() { - Poll::Ready(()) - } else { - Poll::Pending - } - } -} - -/// Shutdown signal that fires either manually or on drop by closing the channel -#[derive(Debug)] -pub struct Signal(oneshot::Sender<()>); - -impl Signal { - /// Fire the signal manually. - pub fn fire(self) { - let _ = self.0.send(()); - } -} - -/// Create a channel pair that's used to propagate shutdown event -pub fn signal() -> (Signal, Shutdown) { - let (sender, receiver) = oneshot::channel(); - (Signal(sender), Shutdown(receiver.shared())) -} - -#[cfg(test)] -mod tests { - use super::*; - use futures_util::future::join_all; - use std::time::Duration; - - #[tokio::test(flavor = "multi_thread")] - async fn test_shutdown() { - let (_signal, _shutdown) = signal(); - } - - #[tokio::test(flavor = "multi_thread")] - async fn test_drop_signal() { - let (signal, shutdown) = signal(); - - tokio::task::spawn(async move { - tokio::time::sleep(Duration::from_millis(500)).await; - drop(signal) - }); - - shutdown.await; - } - - #[tokio::test(flavor = "multi_thread")] - async fn test_multi_shutdowns() { - let (signal, shutdown) = signal(); - - let mut tasks = Vec::with_capacity(100); - for _ in 0..100 { - let shutdown = shutdown.clone(); - let task = tokio::task::spawn(async move { - shutdown.await; - }); - tasks.push(task); - } - - drop(signal); - - join_all(tasks).await; - } - - #[tokio::test(flavor = "multi_thread")] - async fn test_drop_signal_from_thread() { - let (signal, shutdown) = signal(); - - let _thread = std::thread::spawn(|| { - std::thread::sleep(Duration::from_millis(500)); - drop(signal) - }); - - shutdown.await; - } -} From 00c10299eb4fad7e9952b6e27b03693856e914ff Mon Sep 17 00:00:00 2001 From: Daniel Xifra Date: Fri, 24 Oct 2025 15:44:36 -0300 Subject: [PATCH 9/9] lock --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 960b99bfe..c6e1d36f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9520,6 +9520,7 @@ dependencies = [ "rand 0.9.2", "redb", "reqwest 0.12.24", + "reth-tasks", "serde", "serde_json", "serde_with",