@@ -439,6 +439,115 @@ service ContentAddressableStorage {
439
439
rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
440
440
option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
441
441
}
442
+
443
+ // Split a blob into chunks.
444
+ //
445
+ // This call splits a blob into chunks, stores the chunks in the CAS, and
446
+ // returns a list of the chunk digests. Using this list, a client can check
447
+ // which chunks are locally available and just fetch the missing ones. The
448
+ // desired blob can be assembled by concatenating the fetched chunks in the
449
+ // order of the digests from the list.
450
+ //
451
+ // This rpc can be used to reduce the required data to download a large blob
452
+ // from CAS if chunks from earlier downloads of a different version of this
453
+ // blob are locally available. For this procedure to work properly, blobs
454
+ // SHOULD be split in a content-defined way, rather than with fixed-sized
455
+ // chunking.
456
+ //
457
+ // If a split request is answered successfully, a client can expect the
458
+ // following guarantees from the server:
459
+ // 1. The blob chunks are stored in CAS.
460
+ // 2. Concatenating the blob chunks in the order of the digest list returned
461
+ // by the server results in the original blob.
462
+ //
463
+ // Servers MAY implement this functionality, but MUST declare whether they
464
+ // support it or not by setting the
465
+ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
466
+ // field accordingly.
467
+ //
468
+ // Clients MAY use this functionality, it is just an optimization to reduce
469
+ // download network traffic, when downloading large blobs from the CAS.
470
+ // However, clients MUST first check the server capabilities, whether blob
471
+ // splitting is supported by the server.
472
+ //
473
+ // Clients SHOULD verify whether the digest of the blob assembled by the
474
+ // fetched chunks results in the requested blob digest.
475
+ //
476
+ // Since the generated chunks are stored as blobs, they underlie the same
477
+ // lifetimes as other blobs. In particular, the chunk lifetimes are
478
+ // independent from the lifetime of the original blob:
479
+ // * A blob and any chunk derived from it may be evicted from the CAS at
480
+ // different times.
481
+ // * A call to Split extends the lifetime of the original blob, and sets
482
+ // the lifetimes of the resulting chunks (or extends the lifetimes of
483
+ // already-existing chunks).
484
+ // * Touching a chunk extends its lifetime, but does not extend the
485
+ // lifetime of the original blob.
486
+ // * Touching the original blob extends its lifetime, but does not extend
487
+ // the lifetimes of chunks derived from it.
488
+ //
489
+ // When blob splitting and splicing is used at the same time, the clients and
490
+ // the server SHOULD agree out-of-band upon a chunking algorithm used by both
491
+ // parties to benefit from each others chunk data and avoid unnecessary data
492
+ // duplication.
493
+ //
494
+ // Errors:
495
+ //
496
+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
497
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
498
+ // chunks.
499
+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
500
+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
501
+ }
502
+
503
+ // Splice a blob from chunks.
504
+ //
505
+ // This is the complementary operation to the
506
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
507
+ // function to handle the chunked upload of large blobs to save upload
508
+ // traffic.
509
+ //
510
+ // If a client needs to upload a large blob and is able to split a blob into
511
+ // chunks in such a way that reusable chunks are obtained, e.g., by means of
512
+ // content-defined chunking, it can first determine which parts of the blob
513
+ // are already available in the remote CAS and upload the missing chunks, and
514
+ // then use this API to instruct the server to splice the original blob from
515
+ // the remotely available blob chunks.
516
+ //
517
+ // Servers MAY implement this functionality, but MUST declare whether they
518
+ // support it or not by setting the
519
+ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
520
+ // field accordingly.
521
+ //
522
+ // Clients MAY use this functionality, it is just an optimization to reduce
523
+ // upload traffic, when uploading large blobs to the CAS. However, clients
524
+ // MUST first check the server capabilities, whether blob splicing is
525
+ // supported by the server.
526
+ //
527
+ // In order to ensure data consistency of the CAS, the server MUST only add
528
+ // entries to the CAS under a hash the server verified itself. In particular,
529
+ // it MUST NOT trust the result hash provided by the client. The server MAY
530
+ // accept a request as no-op if the client-provided result hash is already in
531
+ // CAS; the life time of that blob is then extended as usual. If the
532
+ // client-provided result is not in CAS, the server SHOULD verify the result
533
+ // hash sent by the client and reject requests where a different splice result
534
+ // is obtained.
535
+ //
536
+ // When blob splitting and splicing is used at the same time, the clients and
537
+ // the server SHOULD agree out-of-band upon a chunking algorithm used by both
538
+ // parties to benefit from each others chunk data and avoid unnecessary data
539
+ // duplication.
540
+ //
541
+ // Errors:
542
+ //
543
+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
544
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
545
+ // spliced blob.
546
+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
547
+ // provided expected digest.
548
+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
549
+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
550
+ }
442
551
}
443
552
444
553
// The Capabilities service may be used by remote execution clients to query
@@ -1846,6 +1955,86 @@ message GetTreeResponse {
1846
1955
string next_page_token = 2 ;
1847
1956
}
1848
1957
1958
+ // A request message for
1959
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1960
+ message SplitBlobRequest {
1961
+ // The instance of the execution system to operate against. A server may
1962
+ // support multiple instances of the execution system (with their own workers,
1963
+ // storage, caches, etc.). The server MAY require use of this field to select
1964
+ // between them in an implementation-defined fashion, otherwise it can be
1965
+ // omitted.
1966
+ string instance_name = 1 ;
1967
+
1968
+ // The digest of the blob to be split.
1969
+ Digest blob_digest = 2 ;
1970
+
1971
+ // The digest function of the blob to be split.
1972
+ //
1973
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1974
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1975
+ // that case the server SHOULD infer the digest function using the
1976
+ // length of the blob digest hashes and the digest functions announced
1977
+ // in the server's capabilities.
1978
+ DigestFunction.Value digest_function = 3 ;
1979
+ }
1980
+
1981
+ // A response message for
1982
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1983
+ message SplitBlobResponse {
1984
+ // The ordered list of digests of the chunks into which the blob was split.
1985
+ // The original blob is assembled by concatenating the chunk data according to
1986
+ // the order of the digests given by this list.
1987
+ //
1988
+ // The server MUST use the same digest function as the one explicitly or
1989
+ // implicitly (through hash length) specified in the split request.
1990
+ repeated Digest chunk_digests = 1 ;
1991
+ }
1992
+
1993
+ // A request message for
1994
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1995
+ message SpliceBlobRequest {
1996
+ // The instance of the execution system to operate against. A server may
1997
+ // support multiple instances of the execution system (with their own workers,
1998
+ // storage, caches, etc.). The server MAY require use of this field to select
1999
+ // between them in an implementation-defined fashion, otherwise it can be
2000
+ // omitted.
2001
+ string instance_name = 1 ;
2002
+
2003
+ // Expected digest of the spliced blob. The client SHOULD set this field due
2004
+ // to the following reasons:
2005
+ // 1. It allows the server to perform an early existence check of the blob
2006
+ // before spending the splicing effort, as described in the
2007
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2008
+ // documentation.
2009
+ // 2. It allows servers with different storage backends to dispatch the
2010
+ // request to the correct storage backend based on the size and/or the
2011
+ // hash of the blob.
2012
+ Digest blob_digest = 2 ;
2013
+
2014
+ // The ordered list of digests of the chunks which need to be concatenated to
2015
+ // assemble the original blob.
2016
+ repeated Digest chunk_digests = 3 ;
2017
+
2018
+ // The digest function of all chunks to be concatenated and of the blob to be
2019
+ // spliced. The server MUST use the same digest function for both cases.
2020
+ //
2021
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256, SHA384,
2022
+ // SHA512, or VSO, the client MAY leave this field unset. In that case the
2023
+ // server SHOULD infer the digest function using the length of the blob digest
2024
+ // hashes and the digest functions announced in the server's capabilities.
2025
+ DigestFunction.Value digest_function = 4 ;
2026
+ }
2027
+
2028
+ // A response message for
2029
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
2030
+ message SpliceBlobResponse {
2031
+ // Computed digest of the spliced blob.
2032
+ //
2033
+ // The server MUST use the same digest function as the one explicitly or
2034
+ // implicitly (through hash length) specified in the splice request.
2035
+ Digest blob_digest = 1 ;
2036
+ }
2037
+
1849
2038
// A request message for
1850
2039
// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
1851
2040
message GetCapabilitiesRequest {
@@ -2076,6 +2265,20 @@ message CacheCapabilities {
2076
2265
// - If the cache implementation returns a given limit, it MAY still serve
2077
2266
// blobs larger than this limit.
2078
2267
int64 max_cas_blob_size_bytes = 8 ;
2268
+
2269
+ // Whether blob splitting is supported for the particular server/instance. If
2270
+ // yes, the server/instance implements the specified behavior for blob
2271
+ // splitting and a meaningful result can be expected from the
2272
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
2273
+ // operation.
2274
+ bool blob_split_support = 9 ;
2275
+
2276
+ // Whether blob splicing is supported for the particular server/instance. If
2277
+ // yes, the server/instance implements the specified behavior for blob
2278
+ // splicing and a meaningful result can be expected from the
2279
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2280
+ // operation.
2281
+ bool blob_splice_support = 10 ;
2079
2282
}
2080
2283
2081
2284
// Capabilities of the remote execution system.
0 commit comments