Skip to content

Commit 672798b

Browse files
CaptainSameSameer SharmaSameer Sharma
authored
MLCOMPUTE-36 | change default cachedExecutorIdleTimeout and expose get_dra_configs as public (#82)
* MLCOMPUTE-36 | change default cachedExecutorIdleTimeout and expose get_dra_configs as public * MLCOMPUTE-36 | bump up version Co-authored-by: Sameer Sharma <[email protected]> Co-authored-by: Sameer Sharma <[email protected]>
1 parent 5cbb07b commit 672798b

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

service_configuration_lib/spark_config.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
DEFAULT_CLUSTERMAN_OBSERVED_SCALING_TIME = 15 # minutes
4242
DEFAULT_SQL_SHUFFLE_PARTITIONS = 128
4343
DEFAULT_DRA_EXECUTOR_ALLOCATION_RATIO = 0.8
44-
DEFAULT_DRA_CACHED_EXECUTOR_IDLE_TIMEOUT = '420s'
44+
DEFAULT_DRA_CACHED_EXECUTOR_IDLE_TIMEOUT = '900s'
4545
DEFAULT_DRA_MIN_EXECUTOR_RATIO = 0.25
4646

4747

@@ -254,7 +254,7 @@ def _append_sql_shuffle_partitions_conf(spark_opts: Dict[str, str]) -> Dict[str,
254254
return spark_opts
255255

256256

257-
def _get_dra_configs(spark_opts: Dict[str, str]) -> Dict[str, str]:
257+
def get_dra_configs(spark_opts: Dict[str, str]) -> Dict[str, str]:
258258
if (
259259
'spark.dynamicAllocation.enabled' not in spark_opts or
260260
str(spark_opts['spark.dynamicAllocation.enabled']) != 'true'
@@ -271,6 +271,13 @@ def _get_dra_configs(spark_opts: Dict[str, str]) -> Dict[str, str]:
271271
spark_opts, 'spark.dynamicAllocation.cachedExecutorIdleTimeout',
272272
str(DEFAULT_DRA_CACHED_EXECUTOR_IDLE_TIMEOUT),
273273
)
274+
log.warning(
275+
f'\nSetting spark.dynamicAllocation.cachedExecutorIdleTimeout as {DEFAULT_DRA_CACHED_EXECUTOR_IDLE_TIMEOUT}. '
276+
f'Executor with cached data block will be released if it has been idle for this duration. '
277+
f'If you wish to change the value of cachedExecutorIdleTimeout, please provide the exact value of '
278+
f'spark.dynamicAllocation.cachedExecutorIdleTimeout in --spark-args. If your job is performing bad because '
279+
f'the cached data was lost, please consider increasing this value.\n',
280+
)
274281

275282
if 'spark.dynamicAllocation.minExecutors' not in spark_opts:
276283
# the ratio of total executors to be used as minExecutors
@@ -809,7 +816,7 @@ def get_spark_conf(
809816
raise ValueError('Unknown resource_manager, should be either [mesos,kubernetes]')
810817

811818
# configure dynamic resource allocation configs
812-
spark_conf = _get_dra_configs(spark_conf)
819+
spark_conf = get_dra_configs(spark_conf)
813820

814821
# configure spark_event_log
815822
spark_conf = _append_event_log_conf(spark_conf, *aws_creds)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
setup(
1919
name='service-configuration-lib',
20-
version='2.10.7',
20+
version='2.10.8',
2121
provides=['service_configuration_lib'],
2222
description='Start, stop, and inspect Yelp SOA services',
2323
url='https://github.com/Yelp/service_configuration_lib',

tests/spark_config_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ def test_adjust_spark_requested_resources_error(
475475
'spark.dynamicAllocation.enabled': 'true',
476476
'spark.dynamicAllocation.shuffleTracking.enabled': 'true',
477477
'spark.dynamicAllocation.executorAllocationRatio': '0.8',
478-
'spark.dynamicAllocation.cachedExecutorIdleTimeout': '420s',
478+
'spark.dynamicAllocation.cachedExecutorIdleTimeout': '900s',
479479
'spark.dynamicAllocation.minExecutors': '0',
480480
'spark.dynamicAllocation.maxExecutors': '2',
481481
'spark.executor.instances': '0',
@@ -495,7 +495,7 @@ def test_adjust_spark_requested_resources_error(
495495
'spark.dynamicAllocation.initialExecutors': '128',
496496
'spark.dynamicAllocation.shuffleTracking.enabled': 'true',
497497
'spark.dynamicAllocation.executorAllocationRatio': '0.8',
498-
'spark.dynamicAllocation.cachedExecutorIdleTimeout': '420s',
498+
'spark.dynamicAllocation.cachedExecutorIdleTimeout': '900s',
499499
'spark.executor.instances': '128',
500500
},
501501
),
@@ -510,7 +510,7 @@ def test_adjust_spark_requested_resources_error(
510510
'spark.dynamicAllocation.minExecutors': '205',
511511
'spark.dynamicAllocation.shuffleTracking.enabled': 'true',
512512
'spark.dynamicAllocation.executorAllocationRatio': '0.8',
513-
'spark.dynamicAllocation.cachedExecutorIdleTimeout': '420s',
513+
'spark.dynamicAllocation.cachedExecutorIdleTimeout': '900s',
514514
'spark.executor.instances': '205',
515515
},
516516
),
@@ -541,7 +541,7 @@ def test_get_dra_configs(
541541
user_spark_opts,
542542
expected_output,
543543
):
544-
output = spark_config._get_dra_configs(user_spark_opts)
544+
output = spark_config.get_dra_configs(user_spark_opts)
545545
for key in expected_output.keys():
546546
assert output[key] == expected_output[key], f'wrong value for {key}'
547547

0 commit comments

Comments
 (0)