Skip to content

Commit 59f4f68

Browse files
committed
chunk-size cli args
1 parent f23b25d commit 59f4f68

File tree

5 files changed

+177
-12
lines changed

5 files changed

+177
-12
lines changed

README.md

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,74 @@ graph TD
192192
style BlobMerkle fill:#ffe1e1
193193
style SPM0Root fill:#fff4e1
194194
style Storage fill:#e1ffe1
195+
```
196+
197+
### Smart Defaults - Automatic Chunk Size Selection
198+
199+
The automatic chunk size selection behavior is based on these key parameters:
200+
201+
1. When Chunking Kicks In
202+
203+
Chunking is automatically used when:
204+
blob_size > max_blob_size_for_n_shards(n_shards, encoding_type)
205+
206+
Where:
207+
- max_blob_size_for_n_shards = source_symbols_per_blob × max_symbol_size
208+
- max_symbol_size = 65,534 bytes (u16::MAX - 1) for RS2 encoding
209+
- source_symbols_per_blob = n_primary × n_secondary (depends on shard count)
210+
211+
Example for 1000 shards:
212+
- Primary source symbols: 334
213+
- Secondary source symbols: 667
214+
- Total source symbols: 334 × 667 = 222,778
215+
- Max single-chunk size: 222,778 × 65,534 = ~13.9 GB
216+
217+
So for a typical network with 1000 shards, chunking automatically kicks in for blobs larger than
218+
~13.9 GB.
219+
220+
2. Default Chunk Size
221+
222+
When chunking is needed, the system uses:
223+
pub const DEFAULT_CHUNK_SIZE: u64 = 10 * 1024 * 1024; // 10 MB
224+
225+
This was chosen based on several factors documented in the code:
226+
- Memory efficiency: 10 MB chunks keep memory usage reasonable during encoding/decoding
227+
- Metadata overhead: At 10 MB per chunk with 1000 shards, metadata is only 0.64% overhead (64 KB
228+
metadata per 10 MB chunk)
229+
- Streaming performance: Smaller chunks enable faster initial data delivery
230+
- Storage granularity: Reasonable balance between network round-trips and overhead
231+
232+
3. Constraints
233+
234+
The system enforces:
235+
- Minimum chunk size: 10 MB (prevents excessive metadata overhead)
236+
- Maximum chunks per blob: 1000 (bounds total metadata size to ~64 MB)
237+
238+
4. Practical Examples
239+
240+
Small blob (< 13.9 GB with 1000 shards):
241+
walrus store --epochs 5 small_file.bin # 1 GB file
242+
→ Uses standard RS2 encoding (single chunk)
243+
→ No chunking needed
244+
245+
Large blob (> 13.9 GB with 1000 shards):
246+
walrus store --epochs 5 large_file.bin # 50 GB file
247+
→ Automatically uses RS2Chunked encoding
248+
→ Chunk size: 10 MB (DEFAULT_CHUNK_SIZE)
249+
→ Number of chunks: 5120 (50 GB / 10 MB)
250+
251+
Manual override:
252+
walrus store --epochs 5 --chunk-size 20971520 large_file.bin # 50 GB with 20 MB chunks
253+
→ Forces RS2Chunked encoding
254+
→ Chunk size: 20 MB (user specified)
255+
→ Number of chunks: 2560 (50 GB / 20 MB)
256+
→ Useful for systems with more memory available
257+
258+
5. Why Manual Override is Useful
259+
260+
- Memory-constrained environments: Use smaller chunks (e.g., 5 MB) to reduce peak memory usage
261+
- Performance tuning: Larger chunks (e.g., 20-50 MB) may improve throughput when memory is abundant
262+
- Testing: Validate chunking behavior with smaller test files by forcing chunked mode
195263

264+
The smart defaults ensure that most users never need to think about chunking—it "just works" when
265+
blobs exceed single-chunk limits, while still giving advanced users control when needed.

crates/walrus-sdk/src/client.rs

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,11 @@ impl WalrusNodeClient<SuiContractClient> {
919919
let walrus_store_blobs =
920920
WalrusStoreBlob::<String>::default_unencoded_blobs_from_slice(blobs, attributes);
921921
let start = Instant::now();
922-
let encoded_blobs = self.encode_blobs(walrus_store_blobs, store_args.encoding_type)?;
922+
let encoded_blobs = self.encode_blobs_with_chunk_size(
923+
walrus_store_blobs,
924+
store_args.encoding_type,
925+
store_args.chunk_size,
926+
)?;
923927
store_args.maybe_observe_encoding_latency(start.elapsed());
924928

925929
let (failed_blobs, encoded_blobs): (Vec<_>, Vec<_>) =
@@ -965,7 +969,11 @@ impl WalrusNodeClient<SuiContractClient> {
965969
let walrus_store_blobs =
966970
WalrusStoreBlob::<String>::default_unencoded_blobs_from_slice(&blobs, &[]);
967971

968-
let encoded_blobs = self.encode_blobs(walrus_store_blobs, store_args.encoding_type)?;
972+
let encoded_blobs = self.encode_blobs_with_chunk_size(
973+
walrus_store_blobs,
974+
store_args.encoding_type,
975+
store_args.chunk_size,
976+
)?;
969977
let (failed_blobs, encoded_blobs): (Vec<_>, Vec<_>) =
970978
encoded_blobs.into_iter().partition(|blob| blob.is_failed());
971979

@@ -1011,7 +1019,11 @@ impl WalrusNodeClient<SuiContractClient> {
10111019
let walrus_store_blobs =
10121020
WalrusStoreBlob::<String>::default_unencoded_blobs_from_slice(blobs, &[]);
10131021

1014-
let encoded_blobs = self.encode_blobs(walrus_store_blobs, store_args.encoding_type)?;
1022+
let encoded_blobs = self.encode_blobs_with_chunk_size(
1023+
walrus_store_blobs,
1024+
store_args.encoding_type,
1025+
store_args.chunk_size,
1026+
)?;
10151027
let (failed_blobs, encoded_blobs): (Vec<_>, Vec<_>) =
10161028
encoded_blobs.into_iter().partition(|blob| blob.is_failed());
10171029

@@ -1041,6 +1053,17 @@ impl WalrusNodeClient<SuiContractClient> {
10411053
&self,
10421054
walrus_store_blobs: Vec<WalrusStoreBlob<'a, T>>,
10431055
encoding_type: EncodingType,
1056+
) -> ClientResult<Vec<WalrusStoreBlob<'a, T>>> {
1057+
self.encode_blobs_with_chunk_size(walrus_store_blobs, encoding_type, None)
1058+
}
1059+
1060+
/// Encodes multiple blobs with optional chunk size override.
1061+
#[tracing::instrument(skip_all, fields(count = walrus_store_blobs.len()))]
1062+
pub fn encode_blobs_with_chunk_size<'a, T: Debug + Clone + Send + Sync>(
1063+
&self,
1064+
walrus_store_blobs: Vec<WalrusStoreBlob<'a, T>>,
1065+
encoding_type: EncodingType,
1066+
chunk_size: Option<u64>,
10441067
) -> ClientResult<Vec<WalrusStoreBlob<'a, T>>> {
10451068
if walrus_store_blobs.is_empty() {
10461069
return Ok(Vec::new());
@@ -1075,10 +1098,11 @@ impl WalrusNodeClient<SuiContractClient> {
10751098

10761099
let multi_pb_clone = multi_pb.clone();
10771100
let unencoded_blob = blob.get_blob();
1078-
let encode_result = self.encode_pairs_and_metadata(
1101+
let encode_result = self.encode_pairs_and_metadata_with_chunk_size(
10791102
unencoded_blob,
10801103
encoding_type,
10811104
multi_pb_clone.as_ref(),
1105+
chunk_size,
10821106
);
10831107
blob.with_encode_result(encode_result)
10841108
})
@@ -1100,6 +1124,18 @@ impl WalrusNodeClient<SuiContractClient> {
11001124
blob: &[u8],
11011125
encoding_type: EncodingType,
11021126
multi_pb: &MultiProgress,
1127+
) -> ClientResult<(Vec<SliverPair>, VerifiedBlobMetadataWithId)> {
1128+
self.encode_pairs_and_metadata_with_chunk_size(blob, encoding_type, multi_pb, None)
1129+
}
1130+
1131+
/// Encodes a blob into sliver pairs and metadata with optional chunk size override.
1132+
#[tracing::instrument(skip_all)]
1133+
pub fn encode_pairs_and_metadata_with_chunk_size(
1134+
&self,
1135+
blob: &[u8],
1136+
encoding_type: EncodingType,
1137+
multi_pb: &MultiProgress,
1138+
chunk_size_override: Option<u64>,
11031139
) -> ClientResult<(Vec<SliverPair>, VerifiedBlobMetadataWithId)> {
11041140
use walrus_core::encoding::{
11051141
ChunkedBlobEncoder,
@@ -1116,10 +1152,23 @@ impl WalrusNodeClient<SuiContractClient> {
11161152
let n_shards = self.encoding_config.n_shards();
11171153
let max_single_chunk_size = max_blob_size_for_n_shards(n_shards, encoding_type);
11181154

1119-
let (pairs, metadata) = if blob_size > max_single_chunk_size {
1120-
// Use chunked encoding for large blobs
1121-
let (num_chunks, chunk_size) =
1122-
compute_chunk_parameters(blob_size, n_shards, encoding_type);
1155+
// If chunk_size_override is provided or blob is too large, use chunked encoding
1156+
let use_chunked = chunk_size_override.is_some() || blob_size > max_single_chunk_size;
1157+
1158+
let (pairs, metadata) = if use_chunked {
1159+
// Use chunked encoding for large blobs or when explicitly requested
1160+
let (num_chunks, chunk_size) = if let Some(override_size) = chunk_size_override {
1161+
// Validate that the override chunk size is reasonable
1162+
if override_size == 0 {
1163+
return Err(ClientError::store_blob_internal(
1164+
"chunk size must be greater than 0".to_string()
1165+
));
1166+
}
1167+
let num_chunks = (blob_size + override_size - 1) / override_size;
1168+
(num_chunks as u32, override_size)
1169+
} else {
1170+
compute_chunk_parameters(blob_size, n_shards, encoding_type)
1171+
};
11231172

11241173
spinner.set_message(format!("encoding large blob ({} chunks)", num_chunks));
11251174
tracing::info!(

crates/walrus-sdk/src/client/store_args.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ pub struct StoreArgs {
3333
pub metrics: Option<Arc<ClientMetrics>>,
3434
/// The optional upload relay client, that allows to store the blob via the relay.
3535
pub upload_relay_client: Option<UploadRelayClient>,
36+
/// Optional chunk size for chunked encoding, in bytes.
37+
pub chunk_size: Option<u64>,
3638
}
3739

3840
impl StoreArgs {
@@ -52,6 +54,7 @@ impl StoreArgs {
5254
post_store,
5355
metrics: None,
5456
upload_relay_client: None,
57+
chunk_size: None,
5558
}
5659
}
5760

@@ -69,6 +72,7 @@ impl StoreArgs {
6972
post_store: PostStoreAction::Keep,
7073
metrics: None,
7174
upload_relay_client: None,
75+
chunk_size: None,
7276
}
7377
}
7478

@@ -124,6 +128,12 @@ impl StoreArgs {
124128
self.metrics.as_ref()
125129
}
126130

131+
/// Sets the chunk size for chunked encoding.
132+
pub fn with_chunk_size(mut self, chunk_size: Option<u64>) -> Self {
133+
self.chunk_size = chunk_size;
134+
self
135+
}
136+
127137
/// Convenience method for `with_store_optimizations(StoreOptimizations::none())`.
128138
pub fn no_store_optimizations(self) -> Self {
129139
self.with_store_optimizations(StoreOptimizations::none())

crates/walrus-service/src/client/cli/args.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,15 @@ pub struct CommonStoreOptions {
11631163
#[arg(long, value_enum)]
11641164
#[serde(default)]
11651165
pub upload_mode: Option<UploadModeCli>,
1166+
/// The chunk size for chunked encoding, in bytes.
1167+
///
1168+
/// When specified, forces the use of chunked encoding (RS2Chunked) for the blob, splitting it
1169+
/// into chunks of the specified size. This is useful for storing very large blobs that exceed
1170+
/// memory limits. If not specified, the SDK will automatically choose between standard and
1171+
/// chunked encoding based on the blob size.
1172+
#[arg(long)]
1173+
#[serde(default)]
1174+
pub chunk_size: Option<u64>,
11661175
}
11671176

11681177
#[serde_as]
@@ -1922,6 +1931,7 @@ mod tests {
19221931
upload_relay: None,
19231932
skip_tip_confirmation: false,
19241933
upload_mode: None,
1934+
chunk_size: None,
19251935
},
19261936
})
19271937
}

crates/walrus-service/src/client/cli/runner.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ impl ClientCommandRunner {
671671
upload_relay,
672672
confirmation,
673673
upload_mode,
674+
chunk_size,
674675
}: StoreOptions,
675676
) -> Result<()> {
676677
epoch_arg.exactly_one_is_some()?;
@@ -693,7 +694,17 @@ impl ClientCommandRunner {
693694
anyhow::bail!("deletable blobs cannot be shared");
694695
}
695696

696-
let encoding_type = encoding_type.unwrap_or(DEFAULT_ENCODING);
697+
// If chunk_size is specified, force chunked encoding
698+
let encoding_type = if chunk_size.is_some() {
699+
if encoding_type.is_some() && encoding_type != Some(EncodingType::RS2Chunked) {
700+
anyhow::bail!(
701+
"chunk-size can only be used with RS2Chunked encoding; remove --encoding-type or set it to rs2-chunked"
702+
);
703+
}
704+
EncodingType::RS2Chunked
705+
} else {
706+
encoding_type.unwrap_or(DEFAULT_ENCODING)
707+
};
697708

698709
if dry_run {
699710
return Self::store_dry_run(client, files, encoding_type, epochs_ahead, self.json)
@@ -716,7 +727,8 @@ impl ClientCommandRunner {
716727
store_optimizations,
717728
persistence,
718729
post_store,
719-
);
730+
)
731+
.with_chunk_size(chunk_size);
720732

721733
if let Some(upload_relay) = upload_relay {
722734
let upload_relay_client = UploadRelayClient::new(
@@ -826,6 +838,7 @@ impl ClientCommandRunner {
826838
upload_relay,
827839
confirmation,
828840
upload_mode,
841+
chunk_size,
829842
}: StoreOptions,
830843
) -> Result<()> {
831844
epoch_arg.exactly_one_is_some()?;
@@ -838,10 +851,21 @@ impl ClientCommandRunner {
838851
anyhow::bail!("deletable blobs cannot be shared");
839852
}
840853

841-
let encoding_type = encoding_type.unwrap_or(DEFAULT_ENCODING);
842854
// Apply CLI upload preset to the in-memory config before building the client, if provided.
843855
let mut config = self.config?;
844856
config = apply_upload_mode_to_config(config, upload_mode.unwrap_or(UploadMode::Balanced));
857+
858+
// If chunk_size is specified, force chunked encoding
859+
let encoding_type = if chunk_size.is_some() {
860+
if encoding_type.is_some() && encoding_type != Some(EncodingType::RS2Chunked) {
861+
anyhow::bail!(
862+
"chunk-size can only be used with RS2Chunked encoding; remove --encoding-type or set it to rs2-chunked"
863+
);
864+
}
865+
EncodingType::RS2Chunked
866+
} else {
867+
encoding_type.unwrap_or(DEFAULT_ENCODING)
868+
};
845869
let client = get_contract_client(config, self.wallet, self.gas_budget, &None).await?;
846870

847871
let system_object = client.sui_client().read_client.get_system_object().await?;
@@ -872,7 +896,8 @@ impl ClientCommandRunner {
872896
store_optimizations,
873897
persistence,
874898
post_store,
875-
);
899+
)
900+
.with_chunk_size(chunk_size);
876901

877902
if let Some(upload_relay) = upload_relay {
878903
let upload_relay_client = UploadRelayClient::new(
@@ -1607,6 +1632,7 @@ struct StoreOptions {
16071632
upload_relay: Option<Url>,
16081633
confirmation: UserConfirmation,
16091634
upload_mode: Option<UploadMode>,
1635+
chunk_size: Option<NonZeroUsize>,
16101636
}
16111637

16121638
impl TryFrom<CommonStoreOptions> for StoreOptions {

0 commit comments

Comments
 (0)