@@ -1976,6 +1976,12 @@ message SplitBlobRequest {
1976
1976
// length of the blob digest hashes and the digest functions announced
1977
1977
// in the server's capabilities.
1978
1978
DigestFunction.Value digest_function = 3 ;
1979
+
1980
+ // The chunking function that the client prefers to use.
1981
+ //
1982
+ // The server MAY use a different chunking function. The client MUST check
1983
+ // the chunking function used in the response.
1984
+ ChunkingFunction.Value chunking_function = 4 ;
1979
1985
}
1980
1986
1981
1987
// A response message for
@@ -1988,6 +1994,9 @@ message SplitBlobResponse {
1988
1994
// The server MUST use the same digest function as the one explicitly or
1989
1995
// implicitly (through hash length) specified in the split request.
1990
1996
repeated Digest chunk_digests = 1 ;
1997
+
1998
+ // The chunking function used to split the blob.
1999
+ ChunkingFunction.Value chunking_function = 2 ;
1991
2000
}
1992
2001
1993
2002
// A request message for
@@ -2023,6 +2032,9 @@ message SpliceBlobRequest {
2023
2032
// server SHOULD infer the digest function using the length of the blob digest
2024
2033
// hashes and the digest functions announced in the server's capabilities.
2025
2034
DigestFunction.Value digest_function = 4 ;
2035
+
2036
+ // The chunking function that the client used to split the blob.
2037
+ ChunkingFunction.Value chunking_function = 5 ;
2026
2038
}
2027
2039
2028
2040
// A response message for
@@ -2158,6 +2170,49 @@ message DigestFunction {
2158
2170
}
2159
2171
}
2160
2172
2173
+ // The chunking function is used to split a blob into chunks.
2174
+ //
2175
+ // The server SHOULD advertise the chunking functions it supports via the
2176
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
2177
+ // field.
2178
+ //
2179
+ // The client SHOULD use one of the chunking functions advertised by the server.
2180
+ //
2181
+ // When blob splitting and splicing is used at the same time, the clients and
2182
+ // the server SHOULD agree out-of-band upon a chunking algorithm used by both
2183
+ // parties to benefit from each others chunk data and avoid unnecessary data
2184
+ // duplication.
2185
+ message ChunkingFunction {
2186
+ enum Value {
2187
+ // A generic chunking function. If a server supports blob splitting/splicing
2188
+ // and advertises this value, it can be safely assumed that the original
2189
+ // blob can be recreated by concatenating the chunks. No other assumptions
2190
+ // about the chunking algorithm can be made.
2191
+ UNKNOWN = 0 ;
2192
+
2193
+ // This is a variant of the FastCDC chunking algorithm as described in the
2194
+ // 2020 paper by Wen Xia, et al.
2195
+ // See https://ieeexplore.ieee.org/document/9055082 for details.
2196
+ // Reference implementation could be found in the Rust library
2197
+ // https://docs.rs/fastcdc/3.2.1/fastcdc/v2020/index.html
2198
+ // with the gear tables available at
2199
+ // https://github.com/nlfiedler/fastcdc-rs/blob/3.2.1/src/v2020/mod.rs
2200
+ //
2201
+ // Server which supports this chunking function MUST advertise the following
2202
+ // configuration parameters through the CacheCapabilities message:
2203
+ // - normalization_level
2204
+ // - min_chunk_size_bytes
2205
+ // - avg_chunk_size_bytes
2206
+ // - max_chunk_size_bytes
2207
+ // - seed
2208
+ //
2209
+ // Client MUST use these advertised parameters to setup the FastCDC chunker.
2210
+ // The remaining parameters, such as mask_s, mask_l can be derrived from the
2211
+ // average chunk size parameter.
2212
+ FASTCDC_2020 = 1 ;
2213
+ }
2214
+ }
2215
+
2161
2216
// Describes the server/instance capabilities for updating the action cache.
2162
2217
message ActionCacheUpdateCapabilities {
2163
2218
bool update_enabled = 1 ;
@@ -2279,6 +2334,50 @@ message CacheCapabilities {
2279
2334
// [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2280
2335
// operation.
2281
2336
bool blob_splice_support = 10 ;
2337
+
2338
+ // The chunking configuration of the server.
2339
+ ChunkingConfiguration chunking_configuration = 11 ;
2340
+ }
2341
+
2342
+ // The chunking configuration of the server.
2343
+ message ChunkingConfiguration {
2344
+ // If any of the advertised parameters are not within the expected range,
2345
+ // the client SHOULD ignore FastCDC chunking function support.
2346
+ message FastCDCParams {
2347
+ // The normalization level for the FastCDC chunking algorithm.
2348
+ // The value MUST be between 0 and 3.
2349
+ uint32 normalization_level = 1 ;
2350
+
2351
+ // The minimum chunk size for the FastCDC chunking algorithm.
2352
+ // The value MUST be between 256 bytes and 64 KiB.
2353
+ uint64 min_chunk_size_bytes = 2 ;
2354
+
2355
+ // The average chunk size for the FastCDC chunking algorithm.
2356
+ // The value MUST be between 1 KiB and 256 KiB.
2357
+ uint64 avg_chunk_size_bytes = 3 ;
2358
+
2359
+ // The maximum chunk size for the FastCDC chunking algorithm.
2360
+ // The value MUST be between 4 KiB and 4 MiB.
2361
+ uint64 max_chunk_size_bytes = 4 ;
2362
+
2363
+ // The seed for the FastCDC chunking algorithm.
2364
+ uint32 seed = 5 ;
2365
+ }
2366
+
2367
+ // A list of chunking algorithms that the server supports for splitting and
2368
+ // splicing blobs.
2369
+ repeated ChunkingFunction.Value supported_chunking_algorithms = 1 ;
2370
+
2371
+ // The minimum blob size that should be considered for chunking.
2372
+ // Blobs smaller than this threshold SHOULD be sent as single blobs.
2373
+ // If unset, clients SHOULD use max_cas_blob_size_bytes as the
2374
+ // minimum blob size for chunking.
2375
+ // If both this field and max_cas_blob_size_bytes are unset, clients
2376
+ // MAY chunk blobs of any size.
2377
+ uint64 min_blob_size_for_chunking_bytes = 2 ;
2378
+
2379
+ // The parameters for the FastCDC chunking algorithm.
2380
+ FastCDCParams fastcdc_params = 3 ;
2282
2381
}
2283
2382
2284
2383
// Capabilities of the remote execution system.
0 commit comments