Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: replace cleanup scriptlets with shutdown script #5860

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/man1/flux-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ The following configuration keys may be printed with
The rc1 script path used by :man1:`flux-broker`, unless overridden by
the ``broker.rc1_path`` broker attribute.

**shutdown_path**
The shutdown script path used by :man1:`flux-broker`, unless overridden by
the ``broker.shutdown_path`` broker attribute.

**rc3_path**
The rc3 script path used by :man1:`flux-broker`, unless overridden by
the ``broker.rc1_path`` broker attribute.
Expand Down
4 changes: 4 additions & 0 deletions doc/man7/flux-broker-attributes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ broker.quorum-timeout [Updates: C]
broker.rc1_path [Updates: C]
The path to the broker's rc1 script. Default: ``${prefix}/etc/flux/rc1``.

broker.shutdown_path [Updates: C]
The path to the broker's shutdown script. Default:
``${prefix}/etc/flux/shutdown``.

broker.rc3_path [Updates: C]
The path to the broker's rc3 script. Default: ``${prefix}/etc/flux/rc3``.

Expand Down
3 changes: 2 additions & 1 deletion etc/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ tmpfiles_DATA = flux.conf

dist_fluxconf_SCRIPTS = \
rc1 \
rc3
rc3 \
shutdown

dist_fluxrc1_SCRIPTS = \
rc1.d/02-cron
Expand Down
10 changes: 0 additions & 10 deletions etc/rc1
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,3 @@ if test $RANK -eq 0 -a "${FLUX_SCHED_MODULE}" != "none" \
-a -z "$(lookup_sched_module)"; then
flux module load ${FLUX_SCHED_MODULE:-sched-simple}
fi

if test $RANK -eq 0; then
if test -z "${FLUX_DISABLE_JOB_CLEANUP}"; then
flux admin cleanup-push <<-EOT
flux queue stop --quiet --all --nocheckpoint
flux cancel --user=all --quiet --states RUN
flux queue idle --quiet
EOT
fi
fi
5 changes: 5 additions & 0 deletions etc/shutdown
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/sh

flux queue stop --quiet --all --nocheckpoint
flux cancel --user=all --quiet --states RUN
flux queue idle --quiet
49 changes: 35 additions & 14 deletions src/broker/broker.c
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,11 @@
flux_conf_builtin_get ("rc1_path", FLUX_CONF_AUTO),
0) < 0)
log_err_exit ("attr_add rc1_path");

if (attr_add (attrs,
"broker.shutdown_path",
flux_conf_builtin_get ("shutdown_path", FLUX_CONF_AUTO),
0) < 0)
log_err_exit ("attr_add shutdown_path");

Check warning on line 601 in src/broker/broker.c

View check run for this annotation

Codecov / codecov/patch

src/broker/broker.c#L601

Added line #L601 was not covered by tests
if (attr_add (attrs,
"broker.rc3_path",
flux_conf_builtin_get ("rc3_path", FLUX_CONF_AUTO),
Expand Down Expand Up @@ -723,7 +727,7 @@

static int create_runat_phases (broker_ctx_t *ctx)
{
const char *rc1, *rc3, *local_uri;
const char *rc1, *rc3, *shutdown, *local_uri;
bool rc2_none = false;

if (attr_get (ctx->attrs, "local-uri", &local_uri, NULL) < 0) {
Expand All @@ -734,6 +738,10 @@
log_err ("broker.rc1_path is not set");
return -1;
}
if (attr_get (ctx->attrs, "broker.shutdown_path", &shutdown, NULL) < 0) {
log_err ("broker.shutdown_path is not set");
return -1;

Check warning on line 743 in src/broker/broker.c

View check run for this annotation

Codecov / codecov/patch

src/broker/broker.c#L742-L743

Added lines #L742 - L743 were not covered by tests
}
if (attr_get (ctx->attrs, "broker.rc3_path", &rc3, NULL) < 0) {
log_err ("broker.rc3_path is not set");
return -1;
Expand All @@ -749,33 +757,46 @@
/* rc1 - initialization
*/
if (rc1 && strlen (rc1) > 0) {
if (runat_push_shell_command (ctx->runat,
"rc1",
rc1,
RUNAT_FLAG_LOG_STDIO) < 0) {
log_err ("runat_push_shell_command rc1");
if (runat_push_command_line (ctx->runat,
"rc1",
rc1,
RUNAT_FLAG_LOG_STDIO) < 0) {
log_err ("runat_push_command_line rc1");

Check warning on line 764 in src/broker/broker.c

View check run for this annotation

Codecov / codecov/patch

src/broker/broker.c#L764

Added line #L764 was not covered by tests
return -1;
}
}

/* rc2 - initial program
*/
if (ctx->rank == 0 && !rc2_none) {
if (create_runat_rc2 (ctx->runat, ctx->init_shell_cmd,
ctx->init_shell_cmd_len) < 0) {
if (create_runat_rc2 (ctx->runat,
ctx->init_shell_cmd,
ctx->init_shell_cmd_len) < 0) {
log_err ("create_runat_rc2");
return -1;
}
}

/* shutdown - clean up in preparation for instance shutdown
*/
if (ctx->rank == 0 && shutdown && strlen (shutdown) > 0) {
if (runat_push_command_line (ctx->runat,
"shutdown",
shutdown,
RUNAT_FLAG_LOG_STDIO) < 0) {
log_err ("runat_push_command_line shutdown");
return -1;

Check warning on line 788 in src/broker/broker.c

View check run for this annotation

Codecov / codecov/patch

src/broker/broker.c#L787-L788

Added lines #L787 - L788 were not covered by tests
}
}

/* rc3 - finalization
*/
if (rc3 && strlen (rc3) > 0) {
if (runat_push_shell_command (ctx->runat,
"rc3",
rc3,
RUNAT_FLAG_LOG_STDIO) < 0) {
log_err ("runat_push_shell_command rc3");
if (runat_push_command_line (ctx->runat,
"rc3",
rc3,
RUNAT_FLAG_LOG_STDIO) < 0) {
log_err ("runat_push_command_line rc3");

Check warning on line 799 in src/broker/broker.c

View check run for this annotation

Codecov / codecov/patch

src/broker/broker.c#L799

Added line #L799 was not covered by tests
return -1;
}
}
Expand Down
30 changes: 30 additions & 0 deletions src/broker/runat.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "src/common/libczmqcontainers/czmq_containers.h"
#include "src/common/libutil/log.h"
#include "src/common/libutil/monotime.h"
#include "src/common/libutil/errno_safe.h"
#include "ccan/str/str.h"

#include "runat.h"
Expand Down Expand Up @@ -552,6 +553,29 @@
return -1;
}

int runat_push_command_line (struct runat *r,
const char *name,
const char *cmdline,
int flags)
{
int rc;
char *argz = NULL;
size_t argz_len = 0;

if (!cmdline) {
errno = EINVAL;
return -1;

Check warning on line 567 in src/broker/runat.c

View check run for this annotation

Codecov / codecov/patch

src/broker/runat.c#L566-L567

Added lines #L566 - L567 were not covered by tests
}
rc = argz_create_sep (cmdline, ' ', &argz, &argz_len);
if (rc != 0) {
errno = rc;
return -1;

Check warning on line 572 in src/broker/runat.c

View check run for this annotation

Codecov / codecov/patch

src/broker/runat.c#L571-L572

Added lines #L571 - L572 were not covered by tests
}
rc = runat_push_command (r, name, argz, argz_len, flags);
ERRNO_SAFE_WRAP (free, argz);
return rc;
}

int runat_get_exit_code (struct runat *r, const char *name, int *rc)
{
struct runat_entry *entry;
Expand Down Expand Up @@ -662,6 +686,12 @@
errstr = "commands array is empty";
goto error;
}
/* Transition: treat "cleanup" as an alias for "shutdown"
* so framework projects that are still using flux admin cleanup-push
* can add commands to "shutdown".
*/
if (streq (name, "cleanup"))
name = "shutdown";

Check warning on line 694 in src/broker/runat.c

View check run for this annotation

Codecov / codecov/patch

src/broker/runat.c#L693-L694

Added lines #L693 - L694 were not covered by tests
json_array_foreach (commands, index, el) {
const char *cmdline = json_string_value (el);
if (!cmdline || strlen (cmdline) == 0) {
Expand Down
7 changes: 7 additions & 0 deletions src/broker/runat.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ int runat_push_command (struct runat *r,
size_t argz_len,
int flags);

/* Same but with command specified by cmdline.
*/
int runat_push_command_line (struct runat *r,
const char *name,
const char *cmdline,
int flags);

/* Get exit code of completed command list.
* If multiple commands fail, the exit code is that of the first failure.
*/
Expand Down
18 changes: 12 additions & 6 deletions src/broker/state_machine.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,9 @@
#endif
}

/* In the cleanup state, we run the shutdown script. When the shutdown
* script is complete, we enter shutdown state.
*/
static void action_cleanup (struct state_machine *s)
{
/* Prevent new downstream clients from saying hello, but
Expand All @@ -366,9 +369,12 @@
*/
overlay_shutdown (s->ctx->overlay, false);

if (runat_is_defined (s->ctx->runat, "cleanup")) {
if (runat_start (s->ctx->runat, "cleanup", runat_completion_cb, s) < 0) {
flux_log_error (s->ctx->h, "runat_start cleanup");
if (runat_is_defined (s->ctx->runat, "shutdown")) {
if (runat_start (s->ctx->runat,
"shutdown",
runat_completion_cb,
s) < 0) {
flux_log_error (s->ctx->h, "runat_start shutdown");

Check warning on line 377 in src/broker/state_machine.c

View check run for this annotation

Codecov / codecov/patch

src/broker/state_machine.c#L377

Added line #L377 was not covered by tests
state_machine_post (s, "cleanup-fail");
}
}
Expand Down Expand Up @@ -535,8 +541,8 @@
state_machine_post (s, "shutdown");
break;
case STATE_CLEANUP:
if (runat_abort (s->ctx->runat, "cleanup") < 0)
flux_log_error (h, "runat_abort cleanup (signal %d)", signum);
if (runat_abort (s->ctx->runat, "shutdown") < 0)
flux_log_error (h, "runat_abort shutdown (signal %d)", signum);

Check warning on line 545 in src/broker/state_machine.c

View check run for this annotation

Codecov / codecov/patch

src/broker/state_machine.c#L544-L545

Added lines #L544 - L545 were not covered by tests
break;
case STATE_FINALIZE:
(void)runat_abort (s->ctx->runat, "rc3");
Expand Down Expand Up @@ -613,7 +619,7 @@
s->ctx->exit_rc = rc;
state_machine_post (s, rc == 0 ? "rc2-success" : "rc2-fail");
}
else if (streq (name, "cleanup")) {
else if (streq (name, "shutdown")) {
if (s->ctx->exit_rc == 0 && rc != 0)
s->ctx->exit_rc = rc;
state_machine_post (s, rc == 0 ? "cleanup-success" : "cleanup-fail");
Expand Down
5 changes: 5 additions & 0 deletions src/common/libflux/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ static struct builtin builtin_tab[] = {
.val_installed = FLUXCONFDIR "/rc1",
.val_intree = ABS_TOP_SRCDIR "/etc/rc1",
},
{
.key = "shutdown_path",
.val_installed = FLUXCONFDIR "/shutdown",
.val_intree = ABS_TOP_SRCDIR "/etc/shutdown",
},
{
.key = "rc3_path",
.val_installed = FLUXCONFDIR "/rc3",
Expand Down
2 changes: 2 additions & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ EXTRA_DIST= \
rc/rc1-job \
rc/rc3-kvs \
rc/rc3-job \
rc/shutdown-kvs \
rc/shutdown-job \
shell/input \
shell/output \
shell/initrc/tests \
Expand Down
2 changes: 1 addition & 1 deletion t/issues/t2284-initial-program-format-chars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

for s in %h %g %%h %f; do
echo "Running flux broker echo $s"
output=$(flux broker -Sbroker.rc1_path= -Sbroker.rc3_path= /bin/echo $s)
output=$(flux broker -Sbroker.rc1_path= -Sbroker.shutdown_path= -Sbroker.rc3_path= /bin/echo $s)
test "$output" = "$s"
done
exit 0
1 change: 1 addition & 0 deletions t/issues/t4482-flush-list-corruption.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@ chmod +x t4482.sh

flux start -s 1 \
-o,--setattr=broker.rc1_path= \
-o,--setattr=broker.shutdown_path= \
-o,--setattr=broker.rc3_path= \
./t4482.sh
6 changes: 0 additions & 6 deletions t/rc/rc1-job
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,3 @@ if [ $RANK -eq 0 ]
then
flux module debug --setbit 0x2 sched-simple
fi

test $RANK -ne 0 || flux admin cleanup-push <<-EOT
flux queue stop --all --nocheckpoint
flux cancel --all --states RUN
flux queue idle
EOT
5 changes: 5 additions & 0 deletions t/rc/shutdown-job
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/sh

flux queue stop --quiet --all --nocheckpoint
flux cancel --user=all --quiet --states RUN
flux queue idle --quiet
1 change: 1 addition & 0 deletions t/rc/shutdown-kvs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#!/bin/sh
5 changes: 5 additions & 0 deletions t/sharness.d/flux-sharness.sh
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ test_under_flux() {
if test "$personality" = "minimal"; then
RC1_PATH=""
RC3_PATH=""
SHUTDOWN_PATH=""
elif test "$personality" = "system"; then
# Pre-create broker rundir so we know it in advance and
# make_bootstrap_config() can use it for ipc:// socket paths.
Expand All @@ -234,11 +235,14 @@ test_under_flux() {
unset root
elif test "$personality" != "full"; then
RC1_PATH=$FLUX_SOURCE_DIR/t/rc/rc1-$personality
SHUTDOWN_PATH=$FLUX_SOURCE_DIR/t/rc/shutdown-$personality
RC3_PATH=$FLUX_SOURCE_DIR/t/rc/rc3-$personality
test -x $RC1_PATH || error "cannot execute $RC1_PATH"
test -x $SHUTDOWN_PATH || error "cannot execute $SHUTDOWN_PATH"
test -x $RC3_PATH || error "cannot execute $RC3_PATH"
else
unset RC1_PATH
unset SHUTDOWN_PATH
unset RC3_PATH
fi

Expand Down Expand Up @@ -267,6 +271,7 @@ test_under_flux() {
${BROKER_RUNDIR+--test-rundir=${BROKER_RUNDIR}} \
${BROKER_RUNDIR+--test-rundir-cleanup} \
${RC1_PATH+-o -Sbroker.rc1_path=${RC1_PATH}} \
${SHUTDOWN_PATH+-o -Sbroker.shutdown_path=${SHUTDOWN_PATH}} \
${RC3_PATH+-o -Sbroker.rc3_path=${RC3_PATH}} \
${sysopts} \
${logopts} \
Expand Down
2 changes: 1 addition & 1 deletion t/t0001-basic.t
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ test_expect_success 'flux fortune with art works' '

# Minimal is sufficient for these tests, but test_under_flux unavailable
# clear the RC paths
ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path="
ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.shutdown_path="

test_expect_success 'flux-start in exec mode works' "
flux start ${ARGS} flux getattr size | grep -x 1
Expand Down
2 changes: 1 addition & 1 deletion t/t0003-module.t
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ test_expect_success 'module: remove testmod if loaded' '
'
test_expect_success 'module: load without unload causes broker failure' '
test_must_fail flux start \
-o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \
-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.shutdown_path= \
flux module load content 2>nounload.err
'
test_expect_success 'module: module name is called out' '
Expand Down
Loading
Loading