From 1a9b2d86347a52c0aea5a4b89195a0c81bc0ffc6 Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Fri, 13 Oct 2023 10:00:20 +0200 Subject: [PATCH 1/6] remote_exec.proto: support ZSTD with dictionary Closes #272 --- .../execution/v2/remote_execution.proto | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/build/bazel/remote/execution/v2/remote_execution.proto b/build/bazel/remote/execution/v2/remote_execution.proto index 0bad5d1e..ea8967e9 100644 --- a/build/bazel/remote/execution/v2/remote_execution.proto +++ b/build/bazel/remote/execution/v2/remote_execution.proto @@ -1984,7 +1984,7 @@ message Compressor { // not need to advertise it. IDENTITY = 0; - // Zstandard compression. + // Zstandard compression without dictionary. ZSTD = 1; // RFC 1951 Deflate. This format is identical to what is used by ZIP @@ -1997,9 +1997,30 @@ message Compressor { // Brotli compression. BROTLI = 3; + + // Zstandard compression with dictionary. + // + // When this is used, the server MUST advertise the dictionaries by + // including + // [ZstdDictionaryRegistry][build.bazel.remote.execution.v2.ZstdDictionaryRegistry] + // digest in CacheCapabilities. + ZSTD_DICT = 4; } } +message ZstdDictionaryRegistry { + // Each file respresent a single Zstandard dictionary + // with name being the `dictId` that was added to the header + // of the compressed file. + // + // A special file with name `default` is used to represent the + // default dictionary that is used when no `dictId` is specified. + // Clients SHOULD prefer to use the default dictionary when possible. + // + // The `default` dictionary MUST be present. + repeated FileNode dictionaries = 1; +} + // Capabilities of the remote cache system. message CacheCapabilities { // All the digest functions supported by the remote cache. @@ -2033,6 +2054,11 @@ message CacheCapabilities { // [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs] // requests. repeated Compressor.Value supported_batch_update_compressors = 7; + + // The digest of the + // [ZstdDictionaryRegistry][build.bazel.remote.execution.v2.ZstdDictionaryRegistry] + // that contains all the dictionaries supported by the remote cache. + Digest zstd_dictionary_registry = 8; } // Capabilities of the remote execution system. From 2b0669a461f368465aea9e35b2ac806f42e1b532 Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Mon, 22 Jan 2024 13:58:05 +0100 Subject: [PATCH 2/6] Switch to use ZstdDictionaries --- .../execution/v2/remote_execution.proto | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/build/bazel/remote/execution/v2/remote_execution.proto b/build/bazel/remote/execution/v2/remote_execution.proto index ea8967e9..a67b6625 100644 --- a/build/bazel/remote/execution/v2/remote_execution.proto +++ b/build/bazel/remote/execution/v2/remote_execution.proto @@ -2002,23 +2002,29 @@ message Compressor { // // When this is used, the server MUST advertise the dictionaries by // including - // [ZstdDictionaryRegistry][build.bazel.remote.execution.v2.ZstdDictionaryRegistry] + // [ZstdDictionaries][build.bazel.remote.execution.v2.ZstdDictionaries] // digest in CacheCapabilities. ZSTD_DICT = 4; } } -message ZstdDictionaryRegistry { - // Each file respresent a single Zstandard dictionary - // with name being the `dictId` that was added to the header - // of the compressed file. - // - // A special file with name `default` is used to represent the - // default dictionary that is used when no `dictId` is specified. - // Clients SHOULD prefer to use the default dictionary when possible. - // - // The `default` dictionary MUST be present. - repeated FileNode dictionaries = 1; +message ZstdDictionaries { + message Dictionary { + uint32 id 1; + Digest digest 2; + } + + // The dictionary ID that clients SHOULD use when uploading + // objects into the Content Addressable Storage. An entry for + // this dictionary ID MUST be present in `dictionaries`. + uint32 default_dictionary_id = 1; + + // Dictionaries that may be used by the server when returning + // objects stored in the Content Addressable Storage. The key + // corresponds to a dictionary ID, as described in RFC 8878, + // section 5, while the value refers to a dictionary + // as described in RFC 8878, chapter 6. + repeated Dictionary dictionaries = 2; } // Capabilities of the remote cache system. @@ -2056,9 +2062,9 @@ message CacheCapabilities { repeated Compressor.Value supported_batch_update_compressors = 7; // The digest of the - // [ZstdDictionaryRegistry][build.bazel.remote.execution.v2.ZstdDictionaryRegistry] + // [ZstdDictionaries][build.bazel.remote.execution.v2.ZstdDictionaries] // that contains all the dictionaries supported by the remote cache. - Digest zstd_dictionary_registry = 8; + Digest zstd_dictionaries = 8; } // Capabilities of the remote execution system. From 4cd59446516c3f3195573ce72922351681bb00ba Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Mon, 22 Jan 2024 14:06:03 +0100 Subject: [PATCH 3/6] added qualified digest msg --- build/bazel/remote/execution/v2/remote_execution.proto | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/build/bazel/remote/execution/v2/remote_execution.proto b/build/bazel/remote/execution/v2/remote_execution.proto index a67b6625..d7946550 100644 --- a/build/bazel/remote/execution/v2/remote_execution.proto +++ b/build/bazel/remote/execution/v2/remote_execution.proto @@ -2029,6 +2029,11 @@ message ZstdDictionaries { // Capabilities of the remote cache system. message CacheCapabilities { + message QualifiedDigest { + Digest digest = 1; + DigestFunction.value digest_function = 2; + } + // All the digest functions supported by the remote cache. // Remote cache may support multiple digest functions simultaneously. repeated DigestFunction.Value digest_functions = 1; @@ -2064,7 +2069,7 @@ message CacheCapabilities { // The digest of the // [ZstdDictionaries][build.bazel.remote.execution.v2.ZstdDictionaries] // that contains all the dictionaries supported by the remote cache. - Digest zstd_dictionaries = 8; + QualifiedDigest zstd_dictionaries = 8; } // Capabilities of the remote execution system. From 759270241b149ee3ed0d5afc34aa8c59c7ea0abb Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Mon, 22 Jan 2024 15:47:54 +0100 Subject: [PATCH 4/6] added digest functions negotiation --- .../execution/v2/remote_execution.proto | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/build/bazel/remote/execution/v2/remote_execution.proto b/build/bazel/remote/execution/v2/remote_execution.proto index d7946550..1190acf6 100644 --- a/build/bazel/remote/execution/v2/remote_execution.proto +++ b/build/bazel/remote/execution/v2/remote_execution.proto @@ -1823,6 +1823,15 @@ message GetCapabilitiesRequest { // between them in an implementation-defined fashion, otherwise it can be // omitted. string instance_name = 1; + + // All the digest functions supported by the client. + // Server should consider which digest functions the client supports to use + // the correct Digest in the + // [ServerCapabilities][build.bazel.remote.execution.v2.ServerCapabilities] + // response. + // If there is no overlapping between client's and server's supported digest + // functions, then server MAY omit the digest fields in the response. + repeated DigestFunction.Value digest_functions = 1; } // A response message for @@ -2069,6 +2078,16 @@ message CacheCapabilities { // The digest of the // [ZstdDictionaries][build.bazel.remote.execution.v2.ZstdDictionaries] // that contains all the dictionaries supported by the remote cache. + // + // Digest function used should be one of the digest functions that both + // client and server support, as specified in digest_functions field in + // [CacheCapabilities.digest_functions][build.bazel.remote.execution.v2.CacheCapabilities.digest_functions] + // and + // [GetCapabilitiesRequest.digest_functions][build.bazel.remote.execution.v2.GetCapabilitiesRequest.digest_functions]. + // + // If there is no overlapping between client's and server's supported + // dictionaries, then server MAY omit the dictionaries field in the + // response. QualifiedDigest zstd_dictionaries = 8; } From 8bf9907177e8e59fad929ffd78caf54b28d73ecc Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Tue, 23 Jan 2024 16:55:25 +0100 Subject: [PATCH 5/6] switch to dedicated rpc --- .../execution/v2/remote_execution.proto | 98 +++++++++---------- 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/build/bazel/remote/execution/v2/remote_execution.proto b/build/bazel/remote/execution/v2/remote_execution.proto index 1190acf6..4f715f59 100644 --- a/build/bazel/remote/execution/v2/remote_execution.proto +++ b/build/bazel/remote/execution/v2/remote_execution.proto @@ -430,6 +430,13 @@ service ContentAddressableStorage { rpc GetTree(GetTreeRequest) returns (stream GetTreeResponse) { option (google.api.http) = { get: "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" }; } + + // Fetch the list of dictionaries use in Zstd compression. + // + // Servers MUST implement this RPC method iff the `ZSTD_DICT` compressor is supported. + rpc GetZstdDictionaries(GetZstdDictionariesRequest) returns (GetZstdDictionariesResponse) { + option (google.api.http) = { get: "/v2/{instance_name=**}/blobs:zstdDictionaries" }; + } } // The Capabilities service may be used by remote execution clients to query @@ -1814,6 +1821,39 @@ message GetTreeResponse { string next_page_token = 2; } +message GetZstdDictionariesRequest { + // The instance of the execution system to operate against. A server may + // support multiple instances of the execution system (with their own workers, + // storage, caches, etc.). The server MAY require use of this field to select + // between them in an implementation-defined fashion, otherwise it can be + // omitted. + string instance_name = 1; + + // The digest functions supported by the client. + // Must be one of the server's supported digest functions listed in + // [CacheCapabilities.digest_functions][build.bazel.remote.execution.v2.CacheCapabilities.digest_functions]. + DigestFunction.Value digest_function = 2; +} + +message GetZstdDictionariesResponse { + message Dictionary { + uint32 id = 1; + Digest digest = 2; + } + + // The dictionary ID that clients SHOULD use when uploading + // objects into the Content Addressable Storage. An entry for + // this dictionary ID MUST be present in `dictionaries`. + uint32 default_dictionary_id = 1; + + // Dictionaries that may be used by the server when returning + // objects stored in the Content Addressable Storage. The key + // corresponds to a dictionary ID, as described in RFC 8878, + // section 5, while the value refers to a dictionary + // as described in RFC 8878, chapter 6. + repeated Dictionary dictionaries = 2; +} + // A request message for // [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities]. message GetCapabilitiesRequest { @@ -1823,15 +1863,6 @@ message GetCapabilitiesRequest { // between them in an implementation-defined fashion, otherwise it can be // omitted. string instance_name = 1; - - // All the digest functions supported by the client. - // Server should consider which digest functions the client supports to use - // the correct Digest in the - // [ServerCapabilities][build.bazel.remote.execution.v2.ServerCapabilities] - // response. - // If there is no overlapping between client's and server's supported digest - // functions, then server MAY omit the digest fields in the response. - repeated DigestFunction.Value digest_functions = 1; } // A response message for @@ -2009,40 +2040,18 @@ message Compressor { // Zstandard compression with dictionary. // - // When this is used, the server MUST advertise the dictionaries by - // including - // [ZstdDictionaries][build.bazel.remote.execution.v2.ZstdDictionaries] - // digest in CacheCapabilities. + // Servers which support this compressor MUST implement + // [ContentAddressableStorage.GetZstdDictionaries][build.bazel.remote.execution.v2.ContentAddressableStorage.GetZstdDictionaries]. + // + // Clients which support this compressor SHOULD call `GetZstdDictionaries` + // rpc to obtain the dictionaries from the server to be used for + // ZSTD compression/decompression. ZSTD_DICT = 4; } } -message ZstdDictionaries { - message Dictionary { - uint32 id 1; - Digest digest 2; - } - - // The dictionary ID that clients SHOULD use when uploading - // objects into the Content Addressable Storage. An entry for - // this dictionary ID MUST be present in `dictionaries`. - uint32 default_dictionary_id = 1; - - // Dictionaries that may be used by the server when returning - // objects stored in the Content Addressable Storage. The key - // corresponds to a dictionary ID, as described in RFC 8878, - // section 5, while the value refers to a dictionary - // as described in RFC 8878, chapter 6. - repeated Dictionary dictionaries = 2; -} - // Capabilities of the remote cache system. message CacheCapabilities { - message QualifiedDigest { - Digest digest = 1; - DigestFunction.value digest_function = 2; - } - // All the digest functions supported by the remote cache. // Remote cache may support multiple digest functions simultaneously. repeated DigestFunction.Value digest_functions = 1; @@ -2074,21 +2083,6 @@ message CacheCapabilities { // [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs] // requests. repeated Compressor.Value supported_batch_update_compressors = 7; - - // The digest of the - // [ZstdDictionaries][build.bazel.remote.execution.v2.ZstdDictionaries] - // that contains all the dictionaries supported by the remote cache. - // - // Digest function used should be one of the digest functions that both - // client and server support, as specified in digest_functions field in - // [CacheCapabilities.digest_functions][build.bazel.remote.execution.v2.CacheCapabilities.digest_functions] - // and - // [GetCapabilitiesRequest.digest_functions][build.bazel.remote.execution.v2.GetCapabilitiesRequest.digest_functions]. - // - // If there is no overlapping between client's and server's supported - // dictionaries, then server MAY omit the dictionaries field in the - // response. - QualifiedDigest zstd_dictionaries = 8; } // Capabilities of the remote execution system. From 0acc48b288833af44fbd53093a7e31448ae82159 Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Wed, 31 Jan 2024 18:56:44 +0100 Subject: [PATCH 6/6] Apply suggestions from code review Co-authored-by: Brandon Duffany --- build/bazel/remote/execution/v2/remote_execution.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build/bazel/remote/execution/v2/remote_execution.proto b/build/bazel/remote/execution/v2/remote_execution.proto index 4f715f59..ca1dca67 100644 --- a/build/bazel/remote/execution/v2/remote_execution.proto +++ b/build/bazel/remote/execution/v2/remote_execution.proto @@ -1829,7 +1829,8 @@ message GetZstdDictionariesRequest { // omitted. string instance_name = 1; - // The digest functions supported by the client. + // The digest function in use by the client, which determines the + // digest function for all Digests in the response. // Must be one of the server's supported digest functions listed in // [CacheCapabilities.digest_functions][build.bazel.remote.execution.v2.CacheCapabilities.digest_functions]. DigestFunction.Value digest_function = 2;