From aa2e130e8f0919b8d674ad5462797684d90aa52a Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Tue, 23 Sep 2025 10:38:19 -0400 Subject: [PATCH 1/5] Expose CRT file IO options --- awscli/customizations/s3/factory.py | 8 +++++++ awscli/customizations/s3/transferconfig.py | 25 ++++++++++++++++++++-- awscli/s3transfer/crt.py | 8 ++++++- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/awscli/customizations/s3/factory.py b/awscli/customizations/s3/factory.py index 8bcf2710dd75..9045227271dd 100644 --- a/awscli/customizations/s3/factory.py +++ b/awscli/customizations/s3/factory.py @@ -138,6 +138,14 @@ def _create_crt_client(self, params, runtime_config): create_crt_client_kwargs['crt_credentials_provider'] = ( crt_credentials_provider ) + fio_options = {} + if (val := runtime_config.get('should_stream')) is not None: + fio_options['should_stream'] = val + if (val := runtime_config.get('disk_throughput')) is not None: + fio_options['disk_throughput_gbps'] = val + if (val := runtime_config.get('direct_io')) is not None: + fio_options['direct_io'] = val + create_crt_client_kwargs['fio_options'] = fio_options return create_s3_crt_client(**create_crt_client_kwargs) diff --git a/awscli/customizations/s3/transferconfig.py b/awscli/customizations/s3/transferconfig.py index 5227217d868c..5502ea93b0cc 100644 --- a/awscli/customizations/s3/transferconfig.py +++ b/awscli/customizations/s3/transferconfig.py @@ -15,6 +15,7 @@ # commands. import logging +from botocore.utils import ensure_boolean from s3transfer.manager import TransferConfig from awscli.customizations.s3 import constants @@ -31,6 +32,9 @@ 'preferred_transfer_client': constants.AUTO_RESOLVE_TRANSFER_CLIENT, 'target_bandwidth': None, 'io_chunksize': 256 * 1024, + 'should_stream': None, + 'disk_throughput': None, + 'direct_io': None, } @@ -47,9 +51,18 @@ class RuntimeConfig: 'max_bandwidth', 'target_bandwidth', 'io_chunksize', + 'disk_throughput', + ] + HUMAN_READABLE_SIZES = [ + 'multipart_chunksize', + 'multipart_threshold', + 'io_chunksize', + ] + HUMAN_READABLE_RATES = [ + 'max_bandwidth', + 'target_bandwidth', + 'disk_throughput', ] - HUMAN_READABLE_SIZES = ['multipart_chunksize', 'multipart_threshold', 'io_chunksize'] - HUMAN_READABLE_RATES = ['max_bandwidth', 'target_bandwidth'] SUPPORTED_CHOICES = { 'preferred_transfer_client': [ constants.AUTO_RESOLVE_TRANSFER_CLIENT, @@ -62,6 +75,7 @@ class RuntimeConfig: 'default': constants.CLASSIC_TRANSFER_CLIENT } } + BOOLEANS = ['should_stream', 'direct_io'] @staticmethod def defaults(): @@ -83,6 +97,7 @@ def build_config(self, **kwargs): runtime_config.update(kwargs) self._convert_human_readable_sizes(runtime_config) self._convert_human_readable_rates(runtime_config) + self._convert_booleans(runtime_config) self._resolve_choice_aliases(runtime_config) self._validate_config(runtime_config) return runtime_config @@ -116,6 +131,12 @@ def _convert_human_readable_rates(self, runtime_config): 'second (e.g. 10Mb/s or 800Kb/s)' % value ) + def _convert_booleans(self, runtime_config): + for attr in self.BOOLEANS: + value = runtime_config.get(attr) + if value is not None: + runtime_config[attr] = ensure_boolean(value) + def _human_readable_rate_to_int(self, value): # The human_readable_to_int() utility only supports integers (e.g. 1024) # as strings and human readable sizes (e.g. 10MB, 5GB). It does not diff --git a/awscli/s3transfer/crt.py b/awscli/s3transfer/crt.py index c110e29fdcbd..09af3c4e4288 100644 --- a/awscli/s3transfer/crt.py +++ b/awscli/s3transfer/crt.py @@ -33,6 +33,7 @@ ) from awscrt.s3 import ( S3Client, + S3FileIoOptions, S3RequestTlsMode, S3RequestType, S3ResponseError, @@ -44,7 +45,7 @@ from botocore.exceptions import NoCredentialsError from botocore.useragent import register_feature_id from botocore.utils import ArnParser, InvalidArnException, is_s3express_bucket -from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS, MB +from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS, GB, MB from s3transfer.exceptions import TransferNotDoneError from s3transfer.futures import BaseTransferFuture, BaseTransferMeta from s3transfer.utils import CallArgs, OSUtils, get_callbacks @@ -87,6 +88,7 @@ def create_s3_crt_client( part_size=8 * MB, use_ssl=True, verify=None, + fio_options=None, ): """ :type region: str @@ -153,6 +155,9 @@ def create_s3_crt_client( target_gbps = _get_crt_throughput_target_gbps( provided_throughput_target_bytes=target_throughput ) + fio_options = fio_options or {} + if disk_throughput := fio_options.get('disk_throughput_gbps'): + fio_options['disk_throughput_gbps'] = disk_throughput * 8 / GB return S3Client( bootstrap=bootstrap, region=region, @@ -162,6 +167,7 @@ def create_s3_crt_client( tls_connection_options=tls_connection_options, throughput_target_gbps=target_gbps, enable_s3express=True, + fio_options=S3FileIoOptions(**fio_options), ) From 719fcd294f2f86fd78b2dfcae51d6afa431defaa Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Tue, 23 Sep 2025 13:35:50 -0400 Subject: [PATCH 2/5] Add tests --- awscli/customizations/s3/factory.py | 4 +- awscli/s3transfer/crt.py | 10 ++-- tests/unit/customizations/s3/test_factory.py | 23 +++++++- .../customizations/s3/test_transferconfig.py | 29 ++++++++++ tests/unit/s3transfer/test_crt.py | 53 +++++++++++++++++++ 5 files changed, 112 insertions(+), 7 deletions(-) diff --git a/awscli/customizations/s3/factory.py b/awscli/customizations/s3/factory.py index 9045227271dd..6b383ac0bc18 100644 --- a/awscli/customizations/s3/factory.py +++ b/awscli/customizations/s3/factory.py @@ -15,6 +15,7 @@ import awscrt.s3 from botocore.client import Config from botocore.httpsession import DEFAULT_CA_BUNDLE +from s3transfer.constants import GB from s3transfer.crt import ( BotocoreCRTCredentialsWrapper, BotocoreCRTRequestSerializer, @@ -142,7 +143,8 @@ def _create_crt_client(self, params, runtime_config): if (val := runtime_config.get('should_stream')) is not None: fio_options['should_stream'] = val if (val := runtime_config.get('disk_throughput')) is not None: - fio_options['disk_throughput_gbps'] = val + # Convert bytes to gigabits. + fio_options['disk_throughput_gbps'] = val * 8 / GB if (val := runtime_config.get('direct_io')) is not None: fio_options['direct_io'] = val create_crt_client_kwargs['fio_options'] = fio_options diff --git a/awscli/s3transfer/crt.py b/awscli/s3transfer/crt.py index 09af3c4e4288..2442406073dd 100644 --- a/awscli/s3transfer/crt.py +++ b/awscli/s3transfer/crt.py @@ -45,7 +45,7 @@ from botocore.exceptions import NoCredentialsError from botocore.useragent import register_feature_id from botocore.utils import ArnParser, InvalidArnException, is_s3express_bucket -from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS, GB, MB +from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS, MB from s3transfer.exceptions import TransferNotDoneError from s3transfer.futures import BaseTransferFuture, BaseTransferMeta from s3transfer.utils import CallArgs, OSUtils, get_callbacks @@ -155,9 +155,9 @@ def create_s3_crt_client( target_gbps = _get_crt_throughput_target_gbps( provided_throughput_target_bytes=target_throughput ) - fio_options = fio_options or {} - if disk_throughput := fio_options.get('disk_throughput_gbps'): - fio_options['disk_throughput_gbps'] = disk_throughput * 8 / GB + crt_fio_options = None + if fio_options: + crt_fio_options = S3FileIoOptions(**fio_options) return S3Client( bootstrap=bootstrap, region=region, @@ -167,7 +167,7 @@ def create_s3_crt_client( tls_connection_options=tls_connection_options, throughput_target_gbps=target_gbps, enable_s3express=True, - fio_options=S3FileIoOptions(**fio_options), + fio_options=crt_fio_options, ) diff --git a/tests/unit/customizations/s3/test_factory.py b/tests/unit/customizations/s3/test_factory.py index f153fec5de12..71a8afe7d792 100644 --- a/tests/unit/customizations/s3/test_factory.py +++ b/tests/unit/customizations/s3/test_factory.py @@ -13,7 +13,7 @@ import awscrt.s3 import pytest import s3transfer.crt -from awscrt.s3 import S3RequestTlsMode +from awscrt.s3 import S3FileIoOptions, S3RequestTlsMode from botocore.config import Config from botocore.credentials import Credentials from botocore.httpsession import DEFAULT_CA_BUNDLE @@ -483,6 +483,27 @@ def test_target_bandwidth_configure_for_crt_manager(self, mock_crt_client): self.assert_is_crt_manager(transfer_manager) self.assert_expected_throughput_target_gbps(mock_crt_client, 8) + @mock.patch('s3transfer.crt.S3Client') + def test_fio_options_configure_for_crt_manager(self, mock_crt_client): + self.runtime_config = self.get_runtime_config( + preferred_transfer_client='crt', + should_stream=True, + disk_throughput='5GB/s', + direct_io=True, + ) + transfer_manager = self.factory.create_transfer_manager( + self.params, self.runtime_config + ) + expected_fio_options = S3FileIoOptions( + should_stream=True, + disk_throughput_gbps=40.0, + direct_io=True, + ) + self.assert_is_crt_manager(transfer_manager) + self.assertEqual( + mock_crt_client.call_args[1]['fio_options'], expected_fio_options + ) + @mock.patch('s3transfer.crt.get_recommended_throughput_target_gbps') @mock.patch('s3transfer.crt.S3Client') def test_target_bandwidth_uses_crt_recommended_throughput( diff --git a/tests/unit/customizations/s3/test_transferconfig.py b/tests/unit/customizations/s3/test_transferconfig.py index 77e5ab46e9e7..0deecf860997 100644 --- a/tests/unit/customizations/s3/test_transferconfig.py +++ b/tests/unit/customizations/s3/test_transferconfig.py @@ -103,6 +103,12 @@ def test_set_preferred_transfer_client(self, provided, resolved): ('target_bandwidth', '1000', 1000), ('target_bandwidth', '1000B/s', 1000), ('target_bandwidth', '8000b/s', 1000), + # disk_throughput cases + ('disk_throughput', '1MB/s', 1024 * 1024), + ('disk_throughput', '10Mb/s', 10 * 1024 * 1024 / 8), + ('disk_throughput', '1000', 1000), + ('disk_throughput', '1000B/s', 1000), + ('disk_throughput', '8000b/s', 1000), ], ) def test_rate_conversions(self, config_name, provided, expected): @@ -127,6 +133,13 @@ def test_rate_conversions(self, config_name, provided, expected): ('target_bandwidth', '100/s'), ('target_bandwidth', ''), ('target_bandwidth', 'value-with-no-digits'), + # disk_throughput cases + ('disk_throughput', '1MB'), + ('disk_throughput', '1B'), + ('disk_throughput', '1b'), + ('disk_throughput', '100/s'), + ('disk_throughput', ''), + ('disk_throughput', 'value-with-no-digits'), ], ) def test_invalid_rate_values(self, config_name, provided): @@ -138,6 +151,22 @@ def test_validates_preferred_transfer_client_choices(self): with pytest.raises(transferconfig.InvalidConfigError): self.build_config_with(preferred_transfer_client='not-supported') + @pytest.mark.parametrize( + 'attr,val,expected', + [ + ('should_stream', 'true', True), + ('should_stream', 'false', False), + ('should_stream', None, None), + ('direct_io', 'true', True), + ('direct_io', 'false', False), + ('direct_io', None, None), + ], + ) + def test_convert_booleans(self, attr, val, expected): + params = {attr: val} + runtime_config = self.build_config_with(**params) + assert runtime_config[attr] == expected + class TestConvertToS3TransferConfig: def test_convert(self): diff --git a/tests/unit/s3transfer/test_crt.py b/tests/unit/s3transfer/test_crt.py index a6b5da1b516a..5e863d2428c3 100644 --- a/tests/unit/s3transfer/test_crt.py +++ b/tests/unit/s3transfer/test_crt.py @@ -16,6 +16,7 @@ from botocore.credentials import Credentials, ReadOnlyCredentials from botocore.exceptions import ClientError, NoCredentialsError from botocore.session import Session +from s3transfer.constants import GB from s3transfer.exceptions import TransferNotDoneError from s3transfer.utils import CallArgs @@ -365,3 +366,55 @@ def test_target_throughput( def test_always_enables_s3express(self, mock_s3_crt_client): s3transfer.crt.create_s3_crt_client('us-west-2') assert mock_s3_crt_client.call_args[1]['enable_s3express'] is True + + @pytest.mark.parametrize( + 'fio_options,should_stream,disk_throughput,direct_io', + [ + ({'should_stream': True}, True, 0.0, False), + ({'disk_throughput_gbps': 8}, False, 8, False), + ({'direct_io': True}, False, 0.0, True), + ( + {'should_stream': True, 'disk_throughput_gbps': 8}, + True, + 8, + False, + ), + ({'should_stream': True, 'direct_io': True}, True, 0.0, True), + ({'disk_throughput_gbps': 8, 'direct_io': True}, False, 8, True), + ( + { + 'should_stream': True, + 'disk_throughput_gbps': 8, + 'direct_io': True, + }, + True, + 8, + True, + ), + ], + ) + def test_fio_options( + self, + fio_options, + should_stream, + disk_throughput, + direct_io, + mock_s3_crt_client, + ): + params = {'fio_options': fio_options} + s3transfer.crt.create_s3_crt_client( + 'us-west-2', + **params, + ) + assert ( + mock_s3_crt_client.call_args[1]['fio_options'].should_stream + is should_stream + ) + assert ( + mock_s3_crt_client.call_args[1]['fio_options'].disk_throughput_gbps + == disk_throughput + ) + assert ( + mock_s3_crt_client.call_args[1]['fio_options'].direct_io + is direct_io + ) From 50772e54d783c5caffdd9993c1ab3bcb487cd92f Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Tue, 23 Sep 2025 14:22:28 -0400 Subject: [PATCH 3/5] Docs and changelog --- .../next-release/enhancement-crt-82800.json | 5 +++ awscli/s3transfer/crt.py | 3 ++ awscli/topics/s3-config.rst | 44 +++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 .changes/next-release/enhancement-crt-82800.json diff --git a/.changes/next-release/enhancement-crt-82800.json b/.changes/next-release/enhancement-crt-82800.json new file mode 100644 index 000000000000..050d7d00384b --- /dev/null +++ b/.changes/next-release/enhancement-crt-82800.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "crt", + "description": "Exposes new CRT options for S3 file IO" +} diff --git a/awscli/s3transfer/crt.py b/awscli/s3transfer/crt.py index 2442406073dd..0d51b3d63a44 100644 --- a/awscli/s3transfer/crt.py +++ b/awscli/s3transfer/crt.py @@ -132,6 +132,9 @@ def create_s3_crt_client( * path/to/cert/bundle.pem - A filename of the CA cert bundle to use. Specify this argument if you want to use a custom CA cert bundle instead of the default one on your system. + + :type fio_options: Optional[dict] + :param fio_options: Kwargs to use to build an `awscrt.s3.S3FileIoOptions`. """ event_loop_group = EventLoopGroup(num_threads) diff --git a/awscli/topics/s3-config.rst b/awscli/topics/s3-config.rst index c7979d230bcc..0d1f899862b1 100644 --- a/awscli/topics/s3-config.rst +++ b/awscli/topics/s3-config.rst @@ -382,6 +382,50 @@ adjustments mid-transfer command in order to increase throughput and reach the requested bandwidth. +should_stream +------------- +.. note:: + This configuration option is only supported when the ``preferred_transfer_client`` + configuration value is set to or resolves to ``crt``. The ``classic`` transfer + client does not support this configuration option. + +**Default** - ``false`` + +If set to ``true``, the CRT client will skip buffering parts in-memory before +sending PUT requests. + + +disk_throughput +--------------- +.. note:: + This configuration option is only supported when the ``preferred_transfer_client`` + configuration value is set to or resolves to ``crt``. The ``classic`` transfer + client does not support this configuration option. + +**Default** - ``10.0`` + +The estimated target disk throughput. This value is only applied if +``should_stream`` is set to ``true``.This value can be specified using +the same semantics as ``target_throughput``, that is either as the +number of bytes per second as an integer, or using a rate suffix. + + +direct_io +--------- +.. note:: + This configuration option is only supported when the ``preferred_transfer_client`` + configuration value is set to or resolves to ``crt``. The ``classic`` transfer + client does not support this configuration option. + +.. note:: + This configuration option is only supported on Linux. + +**Default** - ``false`` + +If set to ``true``, the CRT client will enable direct IO to bypass the OS +cache when sending PUT requests. Enabling direct IO may be useful in cases +where the disk IO outperforms the kernel cache. + Experimental Configuration Values ================================= From e613fcad4c57ecb4dab82daa31ed1f9418cd4c89 Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Thu, 25 Sep 2025 13:57:13 -0400 Subject: [PATCH 4/5] Address feedback --- .changes/next-release/enhancement-crt-82800.json | 2 +- awscli/customizations/s3/factory.py | 3 +-- tests/unit/customizations/s3/test_factory.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.changes/next-release/enhancement-crt-82800.json b/.changes/next-release/enhancement-crt-82800.json index 050d7d00384b..e7b2ba0db52c 100644 --- a/.changes/next-release/enhancement-crt-82800.json +++ b/.changes/next-release/enhancement-crt-82800.json @@ -1,5 +1,5 @@ { "type": "enhancement", - "category": "crt", + "category": "awscrt", "description": "Exposes new CRT options for S3 file IO" } diff --git a/awscli/customizations/s3/factory.py b/awscli/customizations/s3/factory.py index 6b383ac0bc18..ca8c8937181a 100644 --- a/awscli/customizations/s3/factory.py +++ b/awscli/customizations/s3/factory.py @@ -15,7 +15,6 @@ import awscrt.s3 from botocore.client import Config from botocore.httpsession import DEFAULT_CA_BUNDLE -from s3transfer.constants import GB from s3transfer.crt import ( BotocoreCRTCredentialsWrapper, BotocoreCRTRequestSerializer, @@ -144,7 +143,7 @@ def _create_crt_client(self, params, runtime_config): fio_options['should_stream'] = val if (val := runtime_config.get('disk_throughput')) is not None: # Convert bytes to gigabits. - fio_options['disk_throughput_gbps'] = val * 8 / GB + fio_options['disk_throughput_gbps'] = val * 8 / 1_000_000_000 if (val := runtime_config.get('direct_io')) is not None: fio_options['direct_io'] = val create_crt_client_kwargs['fio_options'] = fio_options diff --git a/tests/unit/customizations/s3/test_factory.py b/tests/unit/customizations/s3/test_factory.py index 71a8afe7d792..f18b6f06724c 100644 --- a/tests/unit/customizations/s3/test_factory.py +++ b/tests/unit/customizations/s3/test_factory.py @@ -488,7 +488,7 @@ def test_fio_options_configure_for_crt_manager(self, mock_crt_client): self.runtime_config = self.get_runtime_config( preferred_transfer_client='crt', should_stream=True, - disk_throughput='5GB/s', + disk_throughput=1000**3, direct_io=True, ) transfer_manager = self.factory.create_transfer_manager( @@ -496,7 +496,7 @@ def test_fio_options_configure_for_crt_manager(self, mock_crt_client): ) expected_fio_options = S3FileIoOptions( should_stream=True, - disk_throughput_gbps=40.0, + disk_throughput_gbps=8.0, direct_io=True, ) self.assert_is_crt_manager(transfer_manager) From 4e4c216679531559a3ff5fa67c3f3211465b3ce4 Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Fri, 7 Nov 2025 13:27:05 -0500 Subject: [PATCH 5/5] Fix spacing --- awscli/topics/s3-config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awscli/topics/s3-config.rst b/awscli/topics/s3-config.rst index 0d1f899862b1..2857691a514e 100644 --- a/awscli/topics/s3-config.rst +++ b/awscli/topics/s3-config.rst @@ -405,7 +405,7 @@ disk_throughput **Default** - ``10.0`` The estimated target disk throughput. This value is only applied if -``should_stream`` is set to ``true``.This value can be specified using +``should_stream`` is set to ``true``. This value can be specified using the same semantics as ``target_throughput``, that is either as the number of bytes per second as an integer, or using a rate suffix.