Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/next-release/enhancement-crt-82800.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "enhancement",
"category": "awscrt",
"description": "Exposes new CRT options for S3 file IO"
}
9 changes: 9 additions & 0 deletions awscli/customizations/s3/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,15 @@ def _create_crt_client(self, params, runtime_config):
create_crt_client_kwargs['crt_credentials_provider'] = (
crt_credentials_provider
)
fio_options = {}
if (val := runtime_config.get('should_stream')) is not None:
fio_options['should_stream'] = val
if (val := runtime_config.get('disk_throughput')) is not None:
# Convert bytes to gigabits.
fio_options['disk_throughput_gbps'] = val * 8 / 1_000_000_000
if (val := runtime_config.get('direct_io')) is not None:
fio_options['direct_io'] = val
create_crt_client_kwargs['fio_options'] = fio_options

return create_s3_crt_client(**create_crt_client_kwargs)

Expand Down
25 changes: 23 additions & 2 deletions awscli/customizations/s3/transferconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# commands.
import logging

from botocore.utils import ensure_boolean
from s3transfer.manager import TransferConfig

from awscli.customizations.s3 import constants
Expand All @@ -31,6 +32,9 @@
'preferred_transfer_client': constants.AUTO_RESOLVE_TRANSFER_CLIENT,
'target_bandwidth': None,
'io_chunksize': 256 * 1024,
'should_stream': None,
'disk_throughput': None,
'direct_io': None,
}


Expand All @@ -47,9 +51,18 @@ class RuntimeConfig:
'max_bandwidth',
'target_bandwidth',
'io_chunksize',
'disk_throughput',
]
HUMAN_READABLE_SIZES = [
'multipart_chunksize',
'multipart_threshold',
'io_chunksize',
]
HUMAN_READABLE_RATES = [
'max_bandwidth',
'target_bandwidth',
'disk_throughput',
]
HUMAN_READABLE_SIZES = ['multipart_chunksize', 'multipart_threshold', 'io_chunksize']
HUMAN_READABLE_RATES = ['max_bandwidth', 'target_bandwidth']
SUPPORTED_CHOICES = {
'preferred_transfer_client': [
constants.AUTO_RESOLVE_TRANSFER_CLIENT,
Expand All @@ -62,6 +75,7 @@ class RuntimeConfig:
'default': constants.CLASSIC_TRANSFER_CLIENT
}
}
BOOLEANS = ['should_stream', 'direct_io']

@staticmethod
def defaults():
Expand All @@ -83,6 +97,7 @@ def build_config(self, **kwargs):
runtime_config.update(kwargs)
self._convert_human_readable_sizes(runtime_config)
self._convert_human_readable_rates(runtime_config)
self._convert_booleans(runtime_config)
self._resolve_choice_aliases(runtime_config)
self._validate_config(runtime_config)
return runtime_config
Expand Down Expand Up @@ -116,6 +131,12 @@ def _convert_human_readable_rates(self, runtime_config):
'second (e.g. 10Mb/s or 800Kb/s)' % value
)

def _convert_booleans(self, runtime_config):
for attr in self.BOOLEANS:
value = runtime_config.get(attr)
if value is not None:
runtime_config[attr] = ensure_boolean(value)

def _human_readable_rate_to_int(self, value):
# The human_readable_to_int() utility only supports integers (e.g. 1024)
# as strings and human readable sizes (e.g. 10MB, 5GB). It does not
Expand Down
9 changes: 9 additions & 0 deletions awscli/s3transfer/crt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
)
from awscrt.s3 import (
S3Client,
S3FileIoOptions,
S3RequestTlsMode,
S3RequestType,
S3ResponseError,
Expand Down Expand Up @@ -87,6 +88,7 @@ def create_s3_crt_client(
part_size=8 * MB,
use_ssl=True,
verify=None,
fio_options=None,
):
"""
:type region: str
Expand Down Expand Up @@ -130,6 +132,9 @@ def create_s3_crt_client(
* path/to/cert/bundle.pem - A filename of the CA cert bundle to
use. Specify this argument if you want to use a custom CA cert
bundle instead of the default one on your system.
:type fio_options: Optional[dict]
:param fio_options: Kwargs to use to build an `awscrt.s3.S3FileIoOptions`.
"""

event_loop_group = EventLoopGroup(num_threads)
Expand All @@ -153,6 +158,9 @@ def create_s3_crt_client(
target_gbps = _get_crt_throughput_target_gbps(
provided_throughput_target_bytes=target_throughput
)
crt_fio_options = None
if fio_options:
crt_fio_options = S3FileIoOptions(**fio_options)
return S3Client(
bootstrap=bootstrap,
region=region,
Expand All @@ -162,6 +170,7 @@ def create_s3_crt_client(
tls_connection_options=tls_connection_options,
throughput_target_gbps=target_gbps,
enable_s3express=True,
fio_options=crt_fio_options,
)


Expand Down
44 changes: 44 additions & 0 deletions awscli/topics/s3-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,50 @@ adjustments mid-transfer command in order to increase throughput and reach the
requested bandwidth.


should_stream
-------------
.. note::
This configuration option is only supported when the ``preferred_transfer_client``
configuration value is set to or resolves to ``crt``. The ``classic`` transfer
client does not support this configuration option.

**Default** - ``false``

If set to ``true``, the CRT client will skip buffering parts in-memory before
sending PUT requests.


disk_throughput
---------------
.. note::
This configuration option is only supported when the ``preferred_transfer_client``
configuration value is set to or resolves to ``crt``. The ``classic`` transfer
client does not support this configuration option.

**Default** - ``10.0``

The estimated target disk throughput. This value is only applied if
``should_stream`` is set to ``true``. This value can be specified using
the same semantics as ``target_throughput``, that is either as the
number of bytes per second as an integer, or using a rate suffix.


direct_io
---------
.. note::
This configuration option is only supported when the ``preferred_transfer_client``
configuration value is set to or resolves to ``crt``. The ``classic`` transfer
client does not support this configuration option.

.. note::
This configuration option is only supported on Linux.

**Default** - ``false``

If set to ``true``, the CRT client will enable direct IO to bypass the OS
cache when sending PUT requests. Enabling direct IO may be useful in cases
where the disk IO outperforms the kernel cache.

Experimental Configuration Values
=================================

Expand Down
23 changes: 22 additions & 1 deletion tests/unit/customizations/s3/test_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import awscrt.s3
import pytest
import s3transfer.crt
from awscrt.s3 import S3RequestTlsMode
from awscrt.s3 import S3FileIoOptions, S3RequestTlsMode
from botocore.config import Config
from botocore.credentials import Credentials
from botocore.httpsession import DEFAULT_CA_BUNDLE
Expand Down Expand Up @@ -483,6 +483,27 @@ def test_target_bandwidth_configure_for_crt_manager(self, mock_crt_client):
self.assert_is_crt_manager(transfer_manager)
self.assert_expected_throughput_target_gbps(mock_crt_client, 8)

@mock.patch('s3transfer.crt.S3Client')
def test_fio_options_configure_for_crt_manager(self, mock_crt_client):
self.runtime_config = self.get_runtime_config(
preferred_transfer_client='crt',
should_stream=True,
disk_throughput=1000**3,
direct_io=True,
)
transfer_manager = self.factory.create_transfer_manager(
self.params, self.runtime_config
)
expected_fio_options = S3FileIoOptions(
should_stream=True,
disk_throughput_gbps=8.0,
direct_io=True,
)
self.assert_is_crt_manager(transfer_manager)
self.assertEqual(
mock_crt_client.call_args[1]['fio_options'], expected_fio_options
)

@mock.patch('s3transfer.crt.get_recommended_throughput_target_gbps')
@mock.patch('s3transfer.crt.S3Client')
def test_target_bandwidth_uses_crt_recommended_throughput(
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/customizations/s3/test_transferconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ def test_set_preferred_transfer_client(self, provided, resolved):
('target_bandwidth', '1000', 1000),
('target_bandwidth', '1000B/s', 1000),
('target_bandwidth', '8000b/s', 1000),
# disk_throughput cases
('disk_throughput', '1MB/s', 1024 * 1024),
('disk_throughput', '10Mb/s', 10 * 1024 * 1024 / 8),
('disk_throughput', '1000', 1000),
('disk_throughput', '1000B/s', 1000),
('disk_throughput', '8000b/s', 1000),
],
)
def test_rate_conversions(self, config_name, provided, expected):
Expand All @@ -127,6 +133,13 @@ def test_rate_conversions(self, config_name, provided, expected):
('target_bandwidth', '100/s'),
('target_bandwidth', ''),
('target_bandwidth', 'value-with-no-digits'),
# disk_throughput cases
('disk_throughput', '1MB'),
('disk_throughput', '1B'),
('disk_throughput', '1b'),
('disk_throughput', '100/s'),
('disk_throughput', ''),
('disk_throughput', 'value-with-no-digits'),
],
)
def test_invalid_rate_values(self, config_name, provided):
Expand All @@ -138,6 +151,22 @@ def test_validates_preferred_transfer_client_choices(self):
with pytest.raises(transferconfig.InvalidConfigError):
self.build_config_with(preferred_transfer_client='not-supported')

@pytest.mark.parametrize(
'attr,val,expected',
[
('should_stream', 'true', True),
('should_stream', 'false', False),
('should_stream', None, None),
('direct_io', 'true', True),
('direct_io', 'false', False),
('direct_io', None, None),
],
)
def test_convert_booleans(self, attr, val, expected):
params = {attr: val}
runtime_config = self.build_config_with(**params)
assert runtime_config[attr] == expected


class TestConvertToS3TransferConfig:
def test_convert(self):
Expand Down
53 changes: 53 additions & 0 deletions tests/unit/s3transfer/test_crt.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from botocore.credentials import Credentials, ReadOnlyCredentials
from botocore.exceptions import ClientError, NoCredentialsError
from botocore.session import Session
from s3transfer.constants import GB
from s3transfer.exceptions import TransferNotDoneError
from s3transfer.utils import CallArgs

Expand Down Expand Up @@ -365,3 +366,55 @@ def test_target_throughput(
def test_always_enables_s3express(self, mock_s3_crt_client):
s3transfer.crt.create_s3_crt_client('us-west-2')
assert mock_s3_crt_client.call_args[1]['enable_s3express'] is True

@pytest.mark.parametrize(
'fio_options,should_stream,disk_throughput,direct_io',
[
({'should_stream': True}, True, 0.0, False),
({'disk_throughput_gbps': 8}, False, 8, False),
({'direct_io': True}, False, 0.0, True),
(
{'should_stream': True, 'disk_throughput_gbps': 8},
True,
8,
False,
),
({'should_stream': True, 'direct_io': True}, True, 0.0, True),
({'disk_throughput_gbps': 8, 'direct_io': True}, False, 8, True),
(
{
'should_stream': True,
'disk_throughput_gbps': 8,
'direct_io': True,
},
True,
8,
True,
),
],
)
def test_fio_options(
self,
fio_options,
should_stream,
disk_throughput,
direct_io,
mock_s3_crt_client,
):
params = {'fio_options': fio_options}
s3transfer.crt.create_s3_crt_client(
'us-west-2',
**params,
)
assert (
mock_s3_crt_client.call_args[1]['fio_options'].should_stream
is should_stream
)
assert (
mock_s3_crt_client.call_args[1]['fio_options'].disk_throughput_gbps
== disk_throughput
)
assert (
mock_s3_crt_client.call_args[1]['fio_options'].direct_io
is direct_io
)
Loading