Skip to content

Commit bc7b3c9

Browse files
authored
Merge pull request #6791 from grondo/issue#6781
job-exec: only adjust timelimit for jobs when start delay exceeds a configurable percent of job duration
2 parents 2b690f1 + 6431390 commit bc7b3c9

File tree

3 files changed

+64
-9
lines changed

3 files changed

+64
-9
lines changed

doc/man5/flux-config-exec.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,17 @@ barrier-timeout
8282
nodes on which the barrier is waiting. To disable the barrier timeout,
8383
set this value to ``"0"``. (Default: ``30m``).
8484

85+
max-start-delay-percent
86+
(optional) Specify the maximum allowed delay, as a percentage of a job's
87+
duration, between when a job is allocated (i.e. the starttime recorded
88+
in _R_) and when the execution system receives the start request from
89+
the job manager. If the delay exceeds this percentage, then extend the
90+
job's effective expiration by the delay. This prevents short duration
91+
jobs from having their runtime significantly reduced, while avoiding a
92+
differential between the actual resource set expiration and the time
93+
at which a ``timeout`` exception is raised for longer running jobs,
94+
where any runtime impact will be negligible. The default is 25 percent.
95+
8596
testexec
8697
(options) A table of keys (see :ref:`testexec`) for configuring the
8798
**job-exec** test execution implementation (used in mainly for testing).

src/modules/job-exec/job-exec.c

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
#include "checkpoint.h"
108108
#include "exec_config.h"
109109

110+
static double max_start_delay_percent;
110111
static double kill_timeout;
111112
static int max_kill_count;
112113
static int term_signal;
@@ -568,6 +569,28 @@ static void timelimit_cb (flux_reactor_t *r,
568569
jobinfo_killtimer_start (job, job->kill_timeout);
569570
}
570571

572+
static double jobinfo_adjust_expiration (struct jobinfo *job,
573+
double starttime,
574+
double expiration)
575+
{
576+
double duration = expiration - starttime;
577+
double delta = job->t0 - starttime;
578+
579+
/* If the difference between the job's official starttime and the
580+
* time of the start request is a large percentage of the job's total
581+
* duration, then adjust the job's actual expiration to account for this
582+
* difference. This prevents a short job from having its runtime
583+
* significantly reduced, while avoiding creating a differential between
584+
* the actual resource set expiration for longer jobs (the common case)
585+
* where it doesn't matter.
586+
*
587+
* See also: https://github.com/flux-framework/flux-core/issues/6781
588+
*/
589+
if ((delta / duration) * 100 > max_start_delay_percent)
590+
return expiration + delta;
591+
return expiration;
592+
}
593+
571594
static int jobinfo_set_expiration (struct jobinfo *job)
572595
{
573596
flux_watcher_t *w = NULL;
@@ -600,14 +623,8 @@ static int jobinfo_set_expiration (struct jobinfo *job)
600623
if (job->t0 == 0.)
601624
job->t0 = now;
602625

603-
/* Adjust expiration time based on delay between when scheduler
604-
* created R and when we received this job. O/w jobs may be
605-
* terminated due to timeouts prematurely when the system
606-
* is very busy, which can cause long delays between alloc and
607-
* start events.
608-
*/
609626
if (starttime > 0.)
610-
expiration += job->t0 - starttime;
627+
expiration = jobinfo_adjust_expiration (job, starttime, expiration);
611628

612629
offset = expiration - now;
613630
if (offset < 0.) {
@@ -1483,15 +1500,17 @@ static int job_exec_set_config_globals (flux_t *h,
14831500
*
14841501
* So we must re-initialize globals everytime we reload the module.
14851502
*/
1503+
max_start_delay_percent = 25.0;
14861504
kill_timeout = 5.0;
14871505
max_kill_count = 8;
14881506
term_signal = SIGTERM;
14891507
kill_signal = SIGKILL;
14901508

14911509
if (flux_conf_unpack (conf,
14921510
&error,
1493-
"{s?{s?s s?s s?s s?i}}",
1511+
"{s?{s?F s?s s?s s?s s?i}}",
14941512
"exec",
1513+
"max-start-delay-percent", &max_start_delay_percent,
14951514
"kill-timeout", &kto,
14961515
"term-signal", &tsignal,
14971516
"kill-signal", &ksignal,
@@ -1621,12 +1640,18 @@ static json_t *running_job_stats (struct job_exec_ctx *ctx)
16211640
json_t *entry;
16221641
char *critical_ranks;
16231642
json_t *impl_stats = NULL;
1643+
double expiration = 0.;
16241644

16251645
if (!(critical_ranks = idset_encode (job->critical_ranks,
16261646
IDSET_FLAG_RANGE)))
16271647
goto error;
16281648

1629-
entry = json_pack ("{s:s s:s s:s s:i s:i s:i s:i s:i s:i s:f s:i s:i}",
1649+
if (job->expiration_timer)
1650+
expiration = flux_watcher_next_wakeup (job->expiration_timer);
1651+
1652+
1653+
entry = json_pack ("{s:s s:s s:s s:i s:i s:i s:i s:i s:i"
1654+
" s:f s:f s:i s:i}",
16301655
"implementation",
16311656
job->impl ? job->impl->name : "none",
16321657
"ns", job->ns,
@@ -1637,6 +1662,7 @@ static json_t *running_job_stats (struct job_exec_ctx *ctx)
16371662
"started", job->started,
16381663
"running", job->running,
16391664
"finalizing", job->finalizing,
1665+
"expiration", expiration,
16401666
"kill_timeout", job->kill_timeout,
16411667
"kill_count", job->kill_count,
16421668
"kill_shell_count", job->kill_shell_count);

t/t2900-job-timelimits.t

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,22 @@ test_expect_success 'expired job can also be canceled' '
9393
expired_cancel_test $(perl -E "say $TIMEOUT*$scale") && break
9494
done
9595
'
96+
test_expect_success 'expiration can be extended via max-start-delay-percent' '
97+
flux config load <<-EOF &&
98+
[exec]
99+
max-start-delay-percent = 0
100+
EOF
101+
test_when_finished "echo | flux config load" &&
102+
flux jobtap load \
103+
${FLUX_BUILD_DIR}/t/job-manager/plugins/.libs/perilog-test.so &&
104+
test_when_finished "flux jobtap remove perilog-test.so" &&
105+
jobid=$(flux submit -t 1m sleep 120) &&
106+
flux job wait-event -Hvt 30 $jobid start &&
107+
exp=$(flux job info $jobid R | jq .execution.expiration) &&
108+
exec_exp=$(flux module stats job-exec | jq .jobs.${jobid}.expiration) &&
109+
test_debug "echo expiration=${exp} exec_expiration=${exec_exp}" &&
110+
flux module stats job-exec | jq -e ".jobs.${jobid}.expiration > $exp" &&
111+
flux cancel $jobid &&
112+
flux job wait-event -t 30 $jobid clean
113+
'
96114
test_done

0 commit comments

Comments
 (0)