From ac46dacb9bd7f048891c0084fc5b7e31bade39a1 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 4 Apr 2024 09:55:49 -0700 Subject: [PATCH 1/7] libflux: add 'shutdown_path' config, broker attr Problem: the shutdown script will coexist with rc1 and rc3 as an alternative to the "cleanup" commands currently pushed into broker memory by rc1, but it has no path configuration. Add builtin 'shutdown_path' config key, like to 'rc1_path' and 'rc3_path'. Add 'broker.shutdown_path' broker attribute, like 'broker.rc1_path' and 'broker.rc3_path'. Update flux-config(1) and flux-broker-attributes(7). --- doc/man1/flux-config.rst | 4 ++++ doc/man7/flux-broker-attributes.rst | 4 ++++ src/common/libflux/conf.c | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/doc/man1/flux-config.rst b/doc/man1/flux-config.rst index 99158d011339..29b4e0016fbb 100644 --- a/doc/man1/flux-config.rst +++ b/doc/man1/flux-config.rst @@ -111,6 +111,10 @@ The following configuration keys may be printed with The rc1 script path used by :man1:`flux-broker`, unless overridden by the ``broker.rc1_path`` broker attribute. +**shutdown_path** + The shutdown script path used by :man1:`flux-broker`, unless overridden by + the ``broker.shutdown_path`` broker attribute. + **rc3_path** The rc3 script path used by :man1:`flux-broker`, unless overridden by the ``broker.rc1_path`` broker attribute. diff --git a/doc/man7/flux-broker-attributes.rst b/doc/man7/flux-broker-attributes.rst index 624dba590297..4a447c4dc8f3 100644 --- a/doc/man7/flux-broker-attributes.rst +++ b/doc/man7/flux-broker-attributes.rst @@ -118,6 +118,10 @@ broker.quorum-timeout [Updates: C] broker.rc1_path [Updates: C] The path to the broker's rc1 script. Default: ``${prefix}/etc/flux/rc1``. +broker.shutdown_path [Updates: C] + The path to the broker's shutdown script. Default: + ``${prefix}/etc/flux/shutdown``. + broker.rc3_path [Updates: C] The path to the broker's rc3 script. Default: ``${prefix}/etc/flux/rc3``. diff --git a/src/common/libflux/conf.c b/src/common/libflux/conf.c index 781766af59ec..34efee9f607b 100644 --- a/src/common/libflux/conf.c +++ b/src/common/libflux/conf.c @@ -94,6 +94,11 @@ static struct builtin builtin_tab[] = { .val_installed = FLUXCONFDIR "/rc1", .val_intree = ABS_TOP_SRCDIR "/etc/rc1", }, + { + .key = "shutdown_path", + .val_installed = FLUXCONFDIR "/shutdown", + .val_intree = ABS_TOP_SRCDIR "/etc/shutdown", + }, { .key = "rc3_path", .val_installed = FLUXCONFDIR "/rc3", From 491e06bca5ac34e98a647eb2ac384336f9d20827 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 4 Apr 2024 10:13:28 -0700 Subject: [PATCH 2/7] broker: run shutdown script not cleanup commands Problem: the cleanup commands pushed into broker memory in rc1 are not easily maintained or extended. Create a new "shutdown" script that lives next to rc1 and rc3. Run this script instead of the "cleanup" commands when the broker CLEANUP state is entered. Upon completion of the shutdown script, the broker transitions to the SHUTDOWN state. --- etc/Makefile.am | 3 ++- etc/rc1 | 10 ---------- etc/shutdown | 5 +++++ src/broker/broker.c | 24 ++++++++++++++++++++++-- src/broker/state_machine.c | 18 ++++++++++++------ 5 files changed, 41 insertions(+), 19 deletions(-) create mode 100755 etc/shutdown diff --git a/etc/Makefile.am b/etc/Makefile.am index 38015b321f94..656f76473911 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -10,7 +10,8 @@ tmpfiles_DATA = flux.conf dist_fluxconf_SCRIPTS = \ rc1 \ - rc3 + rc3 \ + shutdown dist_fluxrc1_SCRIPTS = \ rc1.d/02-cron diff --git a/etc/rc1 b/etc/rc1 index cddac74c2d02..1bdf31f8fa41 100755 --- a/etc/rc1 +++ b/etc/rc1 @@ -109,13 +109,3 @@ if test $RANK -eq 0 -a "${FLUX_SCHED_MODULE}" != "none" \ -a -z "$(lookup_sched_module)"; then flux module load ${FLUX_SCHED_MODULE:-sched-simple} fi - -if test $RANK -eq 0; then - if test -z "${FLUX_DISABLE_JOB_CLEANUP}"; then - flux admin cleanup-push <<-EOT - flux queue stop --quiet --all --nocheckpoint - flux cancel --user=all --quiet --states RUN - flux queue idle --quiet - EOT - fi -fi diff --git a/etc/shutdown b/etc/shutdown new file mode 100755 index 000000000000..37effe32cbd3 --- /dev/null +++ b/etc/shutdown @@ -0,0 +1,5 @@ +#!/bin/sh + +flux queue stop --quiet --all --nocheckpoint +flux cancel --user=all --quiet --states RUN +flux queue idle --quiet diff --git a/src/broker/broker.c b/src/broker/broker.c index ee7d91a02e7a..c73886c21ed8 100644 --- a/src/broker/broker.c +++ b/src/broker/broker.c @@ -594,7 +594,11 @@ static void init_attrs_rc_paths (attr_t *attrs) flux_conf_builtin_get ("rc1_path", FLUX_CONF_AUTO), 0) < 0) log_err_exit ("attr_add rc1_path"); - + if (attr_add (attrs, + "broker.shutdown_path", + flux_conf_builtin_get ("shutdown_path", FLUX_CONF_AUTO), + 0) < 0) + log_err_exit ("attr_add shutdown_path"); if (attr_add (attrs, "broker.rc3_path", flux_conf_builtin_get ("rc3_path", FLUX_CONF_AUTO), @@ -723,7 +727,7 @@ static int create_runat_rc2 (struct runat *r, const char *argz, size_t argz_len) static int create_runat_phases (broker_ctx_t *ctx) { - const char *rc1, *rc3, *local_uri; + const char *rc1, *rc3, *shutdown, *local_uri; bool rc2_none = false; if (attr_get (ctx->attrs, "local-uri", &local_uri, NULL) < 0) { @@ -734,6 +738,10 @@ static int create_runat_phases (broker_ctx_t *ctx) log_err ("broker.rc1_path is not set"); return -1; } + if (attr_get (ctx->attrs, "broker.shutdown_path", &shutdown, NULL) < 0) { + log_err ("broker.shutdown_path is not set"); + return -1; + } if (attr_get (ctx->attrs, "broker.rc3_path", &rc3, NULL) < 0) { log_err ("broker.rc3_path is not set"); return -1; @@ -768,6 +776,18 @@ static int create_runat_phases (broker_ctx_t *ctx) } } + /* shutdown - clean up in preparation for instance shutdown + */ + if (ctx->rank == 0 && shutdown && strlen (shutdown) > 0) { + if (runat_push_shell_command (ctx->runat, + "shutdown", + shutdown, + RUNAT_FLAG_LOG_STDIO) < 0) { + log_err ("runat_push_shell_command shutdown"); + return -1; + } + } + /* rc3 - finalization */ if (rc3 && strlen (rc3) > 0) { diff --git a/src/broker/state_machine.c b/src/broker/state_machine.c index a806ea1256eb..c11d16c0b3fd 100644 --- a/src/broker/state_machine.c +++ b/src/broker/state_machine.c @@ -358,6 +358,9 @@ static void action_run (struct state_machine *s) #endif } +/* In the cleanup state, we run the shutdown script. When the shutdown + * script is complete, we enter shutdown state. + */ static void action_cleanup (struct state_machine *s) { /* Prevent new downstream clients from saying hello, but @@ -366,9 +369,12 @@ static void action_cleanup (struct state_machine *s) */ overlay_shutdown (s->ctx->overlay, false); - if (runat_is_defined (s->ctx->runat, "cleanup")) { - if (runat_start (s->ctx->runat, "cleanup", runat_completion_cb, s) < 0) { - flux_log_error (s->ctx->h, "runat_start cleanup"); + if (runat_is_defined (s->ctx->runat, "shutdown")) { + if (runat_start (s->ctx->runat, + "shutdown", + runat_completion_cb, + s) < 0) { + flux_log_error (s->ctx->h, "runat_start shutdown"); state_machine_post (s, "cleanup-fail"); } } @@ -535,8 +541,8 @@ void state_machine_kill (struct state_machine *s, int signum) state_machine_post (s, "shutdown"); break; case STATE_CLEANUP: - if (runat_abort (s->ctx->runat, "cleanup") < 0) - flux_log_error (h, "runat_abort cleanup (signal %d)", signum); + if (runat_abort (s->ctx->runat, "shutdown") < 0) + flux_log_error (h, "runat_abort shutdown (signal %d)", signum); break; case STATE_FINALIZE: (void)runat_abort (s->ctx->runat, "rc3"); @@ -613,7 +619,7 @@ static void runat_completion_cb (struct runat *r, const char *name, void *arg) s->ctx->exit_rc = rc; state_machine_post (s, rc == 0 ? "rc2-success" : "rc2-fail"); } - else if (streq (name, "cleanup")) { + else if (streq (name, "shutdown")) { if (s->ctx->exit_rc == 0 && rc != 0) s->ctx->exit_rc = rc; state_machine_post (s, rc == 0 ? "cleanup-success" : "cleanup-fail"); From dfbca80a1596e22ecf437464ec851af5caa4aa6c Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 4 Apr 2024 10:23:17 -0700 Subject: [PATCH 3/7] testsuite: add shutdown to test personalities Problem: test "personalites" like "job" and "kvs" select alternate rc1 and rc3 scripts, but not shutdown. Require that a personality define a shutdown script even if empty, and add them for "job" and "kvs". --- t/Makefile.am | 2 ++ t/rc/rc1-job | 6 ------ t/rc/shutdown-job | 5 +++++ t/rc/shutdown-kvs | 1 + t/sharness.d/flux-sharness.sh | 5 +++++ 5 files changed, 13 insertions(+), 6 deletions(-) create mode 100755 t/rc/shutdown-job create mode 100755 t/rc/shutdown-kvs diff --git a/t/Makefile.am b/t/Makefile.am index c0c38c27f0fc..8f5eb522cd87 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -303,6 +303,8 @@ EXTRA_DIST= \ rc/rc1-job \ rc/rc3-kvs \ rc/rc3-job \ + rc/shutdown-kvs \ + rc/shutdown-job \ shell/input \ shell/output \ shell/initrc/tests \ diff --git a/t/rc/rc1-job b/t/rc/rc1-job index 1a02897c874c..a3c78a470fd8 100755 --- a/t/rc/rc1-job +++ b/t/rc/rc1-job @@ -66,9 +66,3 @@ if [ $RANK -eq 0 ] then flux module debug --setbit 0x2 sched-simple fi - -test $RANK -ne 0 || flux admin cleanup-push <<-EOT - flux queue stop --all --nocheckpoint - flux cancel --all --states RUN - flux queue idle -EOT diff --git a/t/rc/shutdown-job b/t/rc/shutdown-job new file mode 100755 index 000000000000..37effe32cbd3 --- /dev/null +++ b/t/rc/shutdown-job @@ -0,0 +1,5 @@ +#!/bin/sh + +flux queue stop --quiet --all --nocheckpoint +flux cancel --user=all --quiet --states RUN +flux queue idle --quiet diff --git a/t/rc/shutdown-kvs b/t/rc/shutdown-kvs new file mode 100755 index 000000000000..1a2485251c33 --- /dev/null +++ b/t/rc/shutdown-kvs @@ -0,0 +1 @@ +#!/bin/sh diff --git a/t/sharness.d/flux-sharness.sh b/t/sharness.d/flux-sharness.sh index 9bc072e56524..6a9b74781306 100644 --- a/t/sharness.d/flux-sharness.sh +++ b/t/sharness.d/flux-sharness.sh @@ -222,6 +222,7 @@ test_under_flux() { if test "$personality" = "minimal"; then RC1_PATH="" RC3_PATH="" + SHUTDOWN_PATH="" elif test "$personality" = "system"; then # Pre-create broker rundir so we know it in advance and # make_bootstrap_config() can use it for ipc:// socket paths. @@ -234,11 +235,14 @@ test_under_flux() { unset root elif test "$personality" != "full"; then RC1_PATH=$FLUX_SOURCE_DIR/t/rc/rc1-$personality + SHUTDOWN_PATH=$FLUX_SOURCE_DIR/t/rc/shutdown-$personality RC3_PATH=$FLUX_SOURCE_DIR/t/rc/rc3-$personality test -x $RC1_PATH || error "cannot execute $RC1_PATH" + test -x $SHUTDOWN_PATH || error "cannot execute $SHUTDOWN_PATH" test -x $RC3_PATH || error "cannot execute $RC3_PATH" else unset RC1_PATH + unset SHUTDOWN_PATH unset RC3_PATH fi @@ -267,6 +271,7 @@ test_under_flux() { ${BROKER_RUNDIR+--test-rundir=${BROKER_RUNDIR}} \ ${BROKER_RUNDIR+--test-rundir-cleanup} \ ${RC1_PATH+-o -Sbroker.rc1_path=${RC1_PATH}} \ + ${SHUTDOWN_PATH+-o -Sbroker.shutdown_path=${SHUTDOWN_PATH}} \ ${RC3_PATH+-o -Sbroker.rc3_path=${RC3_PATH}} \ ${sysopts} \ ${logopts} \ From 3bea87c6df5f47899d86943ebd02680027d9d16f Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 4 Apr 2024 10:58:02 -0700 Subject: [PATCH 4/7] testuite: update tests for new shutdown script Problem: some tests assume that clearing the rc1/rc3 paths is sufficient to bypass all default startup and shutdown activities, but now the shutdown script path must also be cleared. Modify test scripts to take the shutdown script into consideration when modifying rc1/rc3 paths. --- .../t2284-initial-program-format-chars.sh | 2 +- t/issues/t4482-flush-list-corruption.sh | 1 + t/t0001-basic.t | 2 +- t/t0003-module.t | 2 +- t/t0012-content-sqlite.t | 21 ++++- t/t0013-config-file.t | 9 +- t/t0014-runlevel.t | 91 +++++++++---------- t/t0024-content-s3.t | 3 + t/t0025-broker-state-machine.t | 12 ++- t/t2004-hydra.t | 2 +- t/t2010-kvs-snapshot-restore.t | 3 + t/t2219-job-manager-restart.t | 45 +++++---- t/t2807-dump-cmd.t | 2 + t/t3100-flux-in-flux.t | 2 +- t/t3200-instance-restart.t | 1 + t/t3202-instance-restart-testexec.t | 7 +- t/t3203-instance-recovery.t | 4 + 17 files changed, 124 insertions(+), 85 deletions(-) diff --git a/t/issues/t2284-initial-program-format-chars.sh b/t/issues/t2284-initial-program-format-chars.sh index 9be984959287..bf8b6df54eea 100755 --- a/t/issues/t2284-initial-program-format-chars.sh +++ b/t/issues/t2284-initial-program-format-chars.sh @@ -3,7 +3,7 @@ for s in %h %g %%h %f; do echo "Running flux broker echo $s" - output=$(flux broker -Sbroker.rc1_path= -Sbroker.rc3_path= /bin/echo $s) + output=$(flux broker -Sbroker.rc1_path= -Sbroker.shutdown_path= -Sbroker.rc3_path= /bin/echo $s) test "$output" = "$s" done exit 0 diff --git a/t/issues/t4482-flush-list-corruption.sh b/t/issues/t4482-flush-list-corruption.sh index 60eeff2a0e78..9d2c2d5a6064 100755 --- a/t/issues/t4482-flush-list-corruption.sh +++ b/t/issues/t4482-flush-list-corruption.sh @@ -39,5 +39,6 @@ chmod +x t4482.sh flux start -s 1 \ -o,--setattr=broker.rc1_path= \ + -o,--setattr=broker.shutdown_path= \ -o,--setattr=broker.rc3_path= \ ./t4482.sh diff --git a/t/t0001-basic.t b/t/t0001-basic.t index 7f6e9702292a..2653a039655e 100755 --- a/t/t0001-basic.t +++ b/t/t0001-basic.t @@ -132,7 +132,7 @@ test_expect_success 'flux fortune with art works' ' # Minimal is sufficient for these tests, but test_under_flux unavailable # clear the RC paths -ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=" +ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.shutdown_path=" test_expect_success 'flux-start in exec mode works' " flux start ${ARGS} flux getattr size | grep -x 1 diff --git a/t/t0003-module.t b/t/t0003-module.t index 05be2181aef6..b253d4acd5b3 100755 --- a/t/t0003-module.t +++ b/t/t0003-module.t @@ -286,7 +286,7 @@ test_expect_success 'module: remove testmod if loaded' ' ' test_expect_success 'module: load without unload causes broker failure' ' test_must_fail flux start \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + -o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.shutdown_path= \ flux module load content 2>nounload.err ' test_expect_success 'module: module name is called out' ' diff --git a/t/t0012-content-sqlite.t b/t/t0012-content-sqlite.t index cf10529bad14..13fbd534f6dd 100755 --- a/t/t0012-content-sqlite.t +++ b/t/t0012-content-sqlite.t @@ -20,6 +20,7 @@ RPC=${FLUX_BUILD_DIR}/t/request/rpc SPAMUTIL="${FLUX_BUILD_DIR}/t/kvs/content-spam" rc1_kvs=$SHARNESS_TEST_SRCDIR/rc/rc1-kvs rc3_kvs=$SHARNESS_TEST_SRCDIR/rc/rc3-kvs +shutdown_kvs=$SHARNESS_TEST_SRCDIR/rc/shutdown-kvs test_expect_success 'load content module with lower purge/age thresholds' ' flux exec flux module load content \ @@ -333,12 +334,18 @@ test_expect_success 'reload module with no options and verify modes' ' test_expect_success 'run flux without statedir and verify modes' ' - flux start -o,-Sbroker.rc1_path=$rc1_kvs,-Sbroker.rc3_path=$rc3_kvs \ + flux start \ + -o,-Sbroker.rc1_path=$rc1_kvs \ + -o,-Sbroker.shutdown_path=$shutdown_kvs \ + -o,-Sbroker.rc3_path=$rc3_kvs \ flux dmesg >logs3 && grep "journal_mode=OFF synchronous=OFF" logs3 ' test_expect_success 'run flux with statedir and verify modes' ' - flux start -o,-Sbroker.rc1_path=$rc1_kvs,-Sbroker.rc3_path=$rc3_kvs \ + flux start \ + -o,-Sbroker.rc1_path=$rc1_kvs \ + -o,-Sbroker.shutdown_path=$shutdown_kvs \ + -o,-Sbroker.rc3_path=$rc3_kvs \ -o,-Sstatedir=$(pwd) flux dmesg >logs4 && grep "journal_mode=WAL synchronous=NORMAL" logs4 ' @@ -346,7 +353,10 @@ test_expect_success 'run flux with statedir and verify modes' ' # Will create in WAL mode since statedir is set recreate_database() { - flux start -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + flux start \ + -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ + -o,-Sbroker.rc3_path= \ -o,-Sstatedir=$(pwd) bash -c \ "flux module load content && flux module load content-sqlite truncate && \ @@ -355,7 +365,10 @@ recreate_database() } load_module_xfail() { - flux start -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + flux start \ + -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ + -o,-Sbroker.rc3_path= \ -o,-Sstatedir=$(pwd) bash -c \ "flux module load content; \ flux module load content-sqlite; \ diff --git a/t/t0013-config-file.t b/t/t0013-config-file.t index 2cb2fb8b9dbe..710117a4d58c 100755 --- a/t/t0013-config-file.t +++ b/t/t0013-config-file.t @@ -10,7 +10,7 @@ test_description='Test config file overlay bootstrap' # Avoid loading unnecessary modules in back to back broker tests -ARGS="-Sbroker.rc1_path= -Sbroker.rc3_path=" +ARGS="-Sbroker.rc1_path= -Sbroker.rc3_path= -Sbroker.shutdown_path=" # This option is compiled out of flux if zeromq is too old if flux broker ${ARGS} flux getattr tbon.tcp_user_timeout >/dev/null 2>&1; then @@ -227,6 +227,7 @@ test_expect_success 'start size=2 instance with ipc://' ' EOT flux start -s2 --test-hosts=fake[0-1] \ -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ -o,--config-path=conf8 \ ./attrdump.sh >ipc.out && cat <<-EXP >ipc.exp && @@ -257,6 +258,7 @@ test_expect_success 'start size=3 instance with ipc:// and custom topology' ' EOT flux start --test-size=3 --test-hosts=fake[0-2] \ -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ -o,--config-path=conf8a \ flux getattr tbon.maxlevel >conf8a.out && echo 2 >conf8a.exp && @@ -288,8 +290,8 @@ test_expect_success NO_CHAIN_LINT 'a warning is printed when upstream URI has un host = "fake1" EOT FLUX_FAKE_HOSTNAME=fake1 \ - flux broker -vv -Sbroker.rc1_path=,-Sbroker.rc3_path= \ - --config-path=conf8b 2>warn.err & + flux broker -vv -Sbroker.rc1_path= -Sbroker.rc3_path= \ + -Sbroker.shutdown_path= --config-path=conf8b 2>warn.err & echo $! >warn.pid && waitgrep "unable to resolve upstream peer" warn.err 30 ' @@ -325,6 +327,7 @@ test_expect_success 'start size=4 instance with tcp://' ' EOT flux start -s4 --test-hosts=fake[0-3] \ -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ -o,--config-path=conf9 \ ./attrdump.sh >tcp.out && cat <<-EXP >tcp.exp && diff --git a/t/t0014-runlevel.t b/t/t0014-runlevel.t index 193cff972c4e..d858cebe9600 100755 --- a/t/t0014-runlevel.t +++ b/t/t0014-runlevel.t @@ -10,13 +10,17 @@ test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile --debug test_expect_success 'initial program is run when rc1/rc3 are nullified' ' flux start -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + -o,-Sbroker.rc1_path= \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ /bin/true 2>normal.log ' test_expect_success 'rc1 failure causes instance failure' ' test_expect_code 1 flux start \ - -o,-Sbroker.rc1_path=/bin/false,-Sbroker.rc3_path= \ + -o,-Sbroker.rc1_path=/bin/false \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Slog-stderr-level=6 \ sleep 3600 2>rc1_failure.log ' @@ -25,7 +29,9 @@ test_expect_success 'rc1 bad path handled same as failure' ' ( SHELL=/bin/sh && test_expect_code 127 flux start \ - -o,-Sbroker.rc1_path=rc1-nonexist,-Sbroker.rc3_path= \ + -o,-Sbroker.rc1_path=rc1-nonexist \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Slog-stderr-level=6 \ /bin/true 2>bad1.log ) @@ -35,15 +41,20 @@ test_expect_success 'default initial program is $SHELL' ' run_timeout --env=SHELL=/bin/sh 60 \ flux $SHARNESS_TEST_SRCDIR/scripts/runpty.py -i none \ flux start -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + -o,-Sbroker.rc1_path= \ + -o,,-Sbroker.rc3_path= \ + -o,,-Sbroker.shutdown_path= \ >shell.log && grep "rc2.0: /bin/sh Exited" shell.log ' test_expect_success 'rc2 failure if stdin not a tty' ' test_expect_code 1 \ - flux start -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ + flux start \ + -o,-Slog-stderr-level=6 \ + -o,-Sbroker.rc1_path= \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ 2>shell-notty.log && grep "not a tty" shell-notty.log ' @@ -59,67 +70,46 @@ test_expect_success 'broker.rc2_none terminates by signal without error' ' for timeout in 0.5 1 2 4; do run_timeout -s ALRM $timeout flux start \ -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.rc2_none && + -o,-Sbroker.rc1_path= \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ + -o,-Sbroker.rc2_none && break done ' -test_expect_success 'flux admin cleanup-push /bin/true works' ' +test_expect_success 'shutdown=/bin/true works' ' flux start -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ - flux admin cleanup-push /bin/true -' - -test_expect_success 'flux admin cleanup-push /bin/false causes instance failure' ' - test_expect_code 1 flux start -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ - flux admin cleanup-push /bin/false -' - -test_expect_success 'cleanup does not run if rc1 fails' ' - test_expect_code 1 flux start -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=/bin/false,-Sbroker.rc3_path= \ - flux admin cleanup-push memorable-string 2>nocleanup.err && \ - test_must_fail grep memorable-string nocleanup.err + -o,-Sbroker.rc1_path= \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path=/bin/true \ + /bin/true ' - -test_expect_success 'flux admin cleanup-push (empty) fails' ' +test_expect_success 'shutdown=/bin/false causes instance failure' ' test_expect_code 1 flux start \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ - flux admin cleanup-push "" 2>push.err && - grep "cannot push an empty command line" push.err + -o,-Slog-stderr-level=6 \ + -o,-Sbroker.rc1_path= \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path=/bin/false \ + /bin/true ' -test_expect_success 'flux admin cleanup-push with no commands fails' ' +test_expect_success 'shutdown does not run if rc1 fails' ' test_expect_code 1 flux start \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ - flux admin cleanup-push push2.err && - grep "commands array is empty" push2.err -' - -test_expect_success 'flux admin cleanup-push (stdin) works' ' - echo /bin/true | flux start -o,-Slog-stderr-level=6 \ - -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ - flux admin cleanup-push 2>push-stdin.err && - grep cleanup.0 push-stdin.err -' - -test_expect_success 'flux admin cleanup-push (stdin) retains cmd block order' ' - flux start -o,-Sbroker.rc1_path=,-Sbroker.rc3_path= \ -o,-Slog-stderr-level=6 \ - flux admin cleanup-push <<-EOT 2>hello.err && - echo Hello world - echo Hello solar system - EOT - grep "cleanup.0: Hello world" hello.err && - grep "cleanup.1: Hello solar system" hello.err + -o,-Sbroker.rc1_path=/bin/false \ + -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path=memorable-string \ + /bin/true 2>nocleanup.err && + test_must_fail grep memorable-string nocleanup.err ' -test_expect_success 'capture the environment for all three rc scripts' ' +test_expect_success 'capture the environment for all rc scripts' ' SLURM_FOO=42 flux start \ -o,-Slog-stderr-level=6 \ -o,-Sbroker.rc1_path="bash -c printenv >rc1.env" \ -o,-Sbroker.rc3_path="bash -c printenv >rc3.env" \ + -o,-Sbroker.shutdown_path="bash -c printenv >shutdown.env" \ "bash -c printenv >rc2.env" ' @@ -166,6 +156,7 @@ test_expect_success 'capture the environment for instance run as a job' ' -o,-Slog-stderr-level=6 \ -o,-Sbroker.rc1_path="bash -c printenv >rc1.env2" \ -o,-Sbroker.rc3_path="bash -c printenv >rc3.env2" \ + -o,-Sbroker.shutdown_path="bash -c printenv >shutdown.env2" \ "bash -c printenv >rc2.env2" ' diff --git a/t/t0024-content-s3.t b/t/t0024-content-s3.t index 128914661f97..a68204c20732 100755 --- a/t/t0024-content-s3.t +++ b/t/t0024-content-s3.t @@ -349,12 +349,14 @@ EOF test_expect_success 'run instance with content-s3 module loaded' ' flux start -o,--setattr=broker.rc1_path=$(pwd)/rc1-content-s3 \ -o,--setattr=broker.rc3_path=$(pwd)/rc3-content-s3 \ + -o,--setattr=broker.shutdown_path= \ flux kvs put testkey=43 ' test_expect_success 're-run instance with content-s3 module loaded' ' flux start -o,--setattr=broker.rc1_path=$(pwd)/rc1-content-s3 \ -o,--setattr=broker.rc3_path=$(pwd)/rc3-content-s3 \ + -o,--setattr=broker.shutdown_path= \ flux kvs get testkey >gets3.out ' @@ -366,6 +368,7 @@ test_expect_success 'content from previous instance survived (s3)' ' test_expect_success 're-run instance, verify checkpoint date saved (s3)' ' flux start -o,--setattr=broker.rc1_path=$(pwd)/rc1-content-s3 \ -o,--setattr=broker.rc3_path=$(pwd)/rc3-content-s3 \ + -o,--setattr=broker.shutdown_path= \ flux dmesg >dmesgs3.out ' diff --git a/t/t0025-broker-state-machine.t b/t/t0025-broker-state-machine.t index 5b31552318e6..f488522d31c2 100755 --- a/t/t0025-broker-state-machine.t +++ b/t/t0025-broker-state-machine.t @@ -9,7 +9,7 @@ test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile RPC=${FLUX_BUILD_DIR}/t/request/rpc SRPC=${FLUX_BUILD_DIR}/t/request/rpc_stream -ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=" +ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.shutdown_path=" GROUPSCMD="flux python ${SHARNESS_TEST_SRCDIR}/scripts/groups.py" test_expect_success 'quorum reached on instance with 1 TBON level' ' @@ -83,6 +83,7 @@ test_expect_success 'instance functions with late-joiner' ' flux start -s2 \ -o,-Slog-stderr-level=6 \ -o,-Sbroker.rc1_path="$(pwd)/rc1_block" \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path= \ -o,-Sbroker.quorum=1 \ $(pwd)/rc2_unblock >late.out && @@ -115,6 +116,7 @@ test_expect_success 'monitor reports INIT(2) in rc1' ' echo 2 >rc1.exp && flux start \ -o,-Sbroker.rc1_path=$(pwd)/rc_getstate \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path= \ /bin/true && test_cmp rc1.exp rc.out @@ -124,17 +126,19 @@ test_expect_success 'monitor reports RUN(4) in rc2' ' echo 4 >rc2.exp && flux start \ -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path= \ $(pwd)/rc_getstate && test_cmp rc2.exp rc.out ' -test_expect_success 'monitor reports CLEANUP(5) in cleanup script' ' +test_expect_success 'monitor reports CLEANUP(5) in shutdown script' ' echo 5 >cleanup.exp && flux start \ -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path=$(pwd)/rc_getstate \ -o,-Sbroker.rc3_path= \ - bash -c "echo $(pwd)/rc_getstate | flux admin cleanup-push" && + /bin/true && test_cmp cleanup.exp rc.out ' @@ -142,6 +146,7 @@ test_expect_success 'monitor reports FINALIZE(7) in rc3' ' echo 7 >rc3.exp && flux start \ -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path=$(pwd)/rc_getstate \ /bin/true && test_cmp rc3.exp rc.out @@ -232,6 +237,7 @@ test_expect_success 'capture state transitions from instance with rc3 failure' ' test_expect_code 1 flux start \ -o,-Slog-filename=states_rc3.log \ -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path=/bin/false \ /bin/true ' diff --git a/t/t2004-hydra.t b/t/t2004-hydra.t index da29b1c14f47..b3cde396e0a3 100755 --- a/t/t2004-hydra.t +++ b/t/t2004-hydra.t @@ -7,7 +7,7 @@ test_description='Test that MPICH Hydra can launch Flux' test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile . `dirname $0`/sharness.sh PMI_INFO=${FLUX_BUILD_DIR}/src/common/libpmi/test_pmi_info -ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=" +ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.shutdown_path=" if ! which mpiexec.hydra 2>/dev/null; then skip_all='skipping hydra-launching-flux tests, mpiexec.hydra unavailable' diff --git a/t/t2010-kvs-snapshot-restore.t b/t/t2010-kvs-snapshot-restore.t index f4ccab08ddf6..dea4071bc218 100755 --- a/t/t2010-kvs-snapshot-restore.t +++ b/t/t2010-kvs-snapshot-restore.t @@ -117,6 +117,7 @@ test_expect_success 'run instance with statedir set (files)' ' flux start -o,--setattr=statedir=$(pwd) \ -o,--setattr=broker.rc1_path=$(pwd)/rc1-content-files \ -o,--setattr=broker.rc3_path=$(pwd)/rc3-content-files \ + -o,--setattr=broker.shutdown_path= \ flux kvs put --sequence testkey=43 > start_sequence_files.out ' @@ -129,6 +130,7 @@ test_expect_success 're-run instance with statedir set (files)' ' flux start -o,--setattr=statedir=$(pwd) \ -o,--setattr=broker.rc1_path=$(pwd)/rc1-content-files \ -o,--setattr=broker.rc3_path=$(pwd)/rc3-content-files \ + -o,--setattr=broker.shutdown_path= \ flux kvs get testkey >getfiles.out ' @@ -167,6 +169,7 @@ test_expect_success 're-run instance, verify checkpoint date saved (files)' ' flux start -o,--setattr=statedir=$(pwd) \ -o,--setattr=broker.rc1_path=$(pwd)/rc1-content-files \ -o,--setattr=broker.rc3_path=$(pwd)/rc3-content-files \ + -o,--setattr=broker.shutdown_path= \ flux dmesg >dmesgfiles.out ' diff --git a/t/t2219-job-manager-restart.t b/t/t2219-job-manager-restart.t index c4b268e3ae75..b649a65af540 100755 --- a/t/t2219-job-manager-restart.t +++ b/t/t2219-job-manager-restart.t @@ -6,16 +6,17 @@ test_description='Test flux job manager restart' DUMPS=${SHARNESS_TEST_SRCDIR}/job-manager/dumps -export FLUX_DISABLE_JOB_CLEANUP=t +COMMON_START_OPTS=-o,--config-path=$(pwd)/conf.d,-Sbroker.shutdown_path= test_expect_success 'start instance with empty kvs, run one job, and dump' ' flux start -o,-Scontent.dump=dump.tar \ + -o,-Sbroker.shutdown_path= \ flux run --env-remove=* /bin/true && test -f $(pwd)/dump.tar ' restart_flux() { - flux start -o,-Scontent.restore=$1 \ + flux start -o,-Scontent.restore=$1,-Sbroker.shutdown_path= \ flux module stats job-manager } @@ -23,7 +24,9 @@ restart_flux() { # "not replayed" warnings were logged restart_with_job_warning() { local out=$(basename $1).dmesg - flux start -o,-Scontent.restore=$1 /bin/true 2>$out + flux start -o,-Scontent.restore=$1 \ + -o,-Sbroker.shutdown_path= \ + /bin/true 2>$out result=$? cat $out test $result -eq 0 && grep -q "not replayed:" $out @@ -61,23 +64,28 @@ test_expect_success 'and max_jobid is still greater than zero' ' ' test_expect_success 'purging all jobs triggers jobid checkpoint update' ' - flux start bash -c "flux run --env-remove=* /bin/true && \ + flux start -o,-Sbroker.shutdown_path= \ + bash -c "flux run --env-remove=* /bin/true && \ flux job purge -f --num-limit=0 && \ flux kvs get checkpoint.job-manager" ' test_expect_success 'verify that anon queue disable persists across restart' ' flux start -o,-Scontent.dump=dump_dis.tar \ + -o,-Sbroker.shutdown_path= \ flux queue disable disable-restart-test && flux start -o,-Scontent.restore=dump_dis.tar \ + -o,-Sbroker.shutdown_path= \ flux queue status >dump_dis.out && grep "disabled: disable-restart-test" dump_dis.out ' test_expect_success 'verify that anon queue stopped persists across restart' ' flux start -o,-Scontent.dump=dump_stopped.tar \ + -o,-Sbroker.shutdown_path= \ flux queue stop stop-restart-test && flux start -o,-Scontent.restore=dump_stopped.tar \ + -o,-Sbroker.shutdown_path= \ flux queue status >dump_stopped.out && grep "stopped: stop-restart-test" dump_stopped.out ' @@ -88,22 +96,22 @@ test_expect_success 'verify that named queue enable/disable persists across rest [queues.debug] [queues.batch] EOT - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.dump=dump_queue_enable1.tar \ flux queue status >dump_queue_enable_1.out && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_enable1.tar \ -o,-Scontent.dump=dump_queue_enable2.tar \ flux queue disable --queue=batch xyzzy && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_enable2.tar \ -o,-Scontent.dump=dump_queue_enable3.tar \ flux queue status >dump_queue_enable_2.out && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_enable3.tar \ -o,-Scontent.dump=dump_queue_enable4.tar \ flux queue enable --queue=batch && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_enable4.tar \ -o,-Scontent.dump=dump_queue_enable5.tar \ flux queue status >dump_queue_enable_3.out && @@ -119,6 +127,7 @@ test_expect_success 'verify that named queue enable/disable persists across rest # is tested test_expect_success 'verify that instance can restart after config change' ' flux start -o,-Scontent.restore=dump_queue_enable5.tar \ + -o,-Sbroker.shutdown_path= \ flux queue status >dump_queue_reconf.out && grep "^Job submission is enabled" dump_queue_reconf.out ' @@ -129,22 +138,22 @@ test_expect_success 'verify that named queue start/stop persists across restart' [queues.debug] [queues.batch] EOT - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.dump=dump_queue_start1.tar \ flux queue status >dump_queue_start_1.out && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_start1.tar \ -o,-Scontent.dump=dump_queue_start2.tar \ flux queue start --queue=batch && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_start2.tar \ -o,-Scontent.dump=dump_queue_start3.tar \ flux queue status >dump_queue_start_2.out && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_start3.tar \ -o,-Scontent.dump=dump_queue_start4.tar \ flux queue stop --queue=batch xyzzy && - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_start4.tar \ -o,-Scontent.dump=dump_queue_start5.tar \ flux queue status >dump_queue_start_3.out && @@ -162,13 +171,13 @@ test_expect_success 'checkpointed queue no longer configured on restart is ignor [queues.debug] [queues.batch] EOT - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.dump=dump_queue_missing.tar \ flux queue disable --queue batch xyzzy && cat >conf.d/queues.toml <<-EOT && [queues.debug] EOT - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_missing.tar \ flux queue status >dump_queue_missing.out && grep "^debug: Job submission is enabled" dump_queue_missing.out && @@ -182,14 +191,14 @@ test_expect_success 'new queue configured on restart uses defaults' ' [queues.debug] [queues.batch] EOT - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.dump=dump_queue_ignored.tar \ flux queue disable --queue batch xyzzy && cat >conf.d/queues.toml <<-EOT && [queues.debug] [queues.newqueue] EOT - flux start -o,--config-path=$(pwd)/conf.d \ + flux start ${COMMON_START_OPTS} \ -o,-Scontent.restore=dump_queue_ignored.tar \ flux queue status >dump_queue_ignored.out && grep "^debug: Job submission is enabled" dump_queue_ignored.out && diff --git a/t/t2807-dump-cmd.t b/t/t2807-dump-cmd.t index e195d6f2f4f8..d10a91274ce9 100755 --- a/t/t2807-dump-cmd.t +++ b/t/t2807-dump-cmd.t @@ -200,6 +200,7 @@ reader() { local dbdir=$1 flux start -o,-Sbroker.rc1_path= \ -o,-Sbroker.rc3_path=\ + -o,-Sbroker.shutdown_path=\ -o,-Sstatedir=$dbdir\ bash -c "\ flux module load content && \ @@ -214,6 +215,7 @@ writer() { local dbdir=$1 flux start -o,-Sbroker.rc1_path= \ -o,-Sbroker.rc3_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Sstatedir=$dbdir \ bash -c "\ flux module load content && \ diff --git a/t/t3100-flux-in-flux.t b/t/t3100-flux-in-flux.t index 36e146bdbdbb..789749bc2557 100755 --- a/t/t3100-flux-in-flux.t +++ b/t/t3100-flux-in-flux.t @@ -10,7 +10,7 @@ SIZE=$(test_size_large) test_under_flux ${SIZE} echo "# $0: flux session size will be ${SIZE}" -ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=" +ARGS="-o,-Sbroker.rc1_path=,-Sbroker.rc3_path=,-Sbroker.shutdown_path=" test_expect_success "flux can run flux instance as a job" ' run_timeout 60 flux run -n1 -N1 \ flux start ${ARGS} flux getattr size >size.out && diff --git a/t/t3200-instance-restart.t b/t/t3200-instance-restart.t index c0c04d577ee3..cfba30ef56aa 100755 --- a/t/t3200-instance-restart.t +++ b/t/t3200-instance-restart.t @@ -58,6 +58,7 @@ test_expect_success 'most recent period is still running' ' test_expect_success 'doctor startlog to look like a crash' ' flux start -o,--setattr=statedir=$(pwd) \ -o,-Sbroker.rc1_path=$SHARNESS_TEST_SRCDIR/rc/rc1-kvs \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path=$SHARNESS_TEST_SRCDIR/rc/rc3-kvs \ flux startlog --post-start-event ' diff --git a/t/t3202-instance-restart-testexec.t b/t/t3202-instance-restart-testexec.t index 8ddd9c5e2ff0..38b53c60cab7 100755 --- a/t/t3202-instance-restart-testexec.t +++ b/t/t3202-instance-restart-testexec.t @@ -6,10 +6,9 @@ test_description='Test instance restart and still running jobs with testexec' test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile . `dirname $0`/sharness.sh -export FLUX_DISABLE_JOB_CLEANUP=t - test_expect_success 'run a testexec job in persistent instance (long run)' ' flux start -o,--setattr=statedir=$(pwd) \ + -o,--setattr=broker.shutdown_path= \ flux submit \ --flags=debug \ --setattr=system.exec.test.run_duration=100s \ @@ -18,6 +17,7 @@ test_expect_success 'run a testexec job in persistent instance (long run)' ' test_expect_success 'restart instance, reattach to running job, cancel it (long run)' ' flux start -o,--setattr=statedir=$(pwd) \ + -o,--setattr=broker.shutdown_path= \ sh -c "flux job eventlog $(cat id1.out) > eventlog_long1.out; \ flux jobs -n > jobs_long1.out; \ flux cancel $(cat id1.out)" && @@ -28,6 +28,7 @@ test_expect_success 'restart instance, reattach to running job, cancel it (long test_expect_success 'restart instance, job completed (long run)' ' flux start -o,--setattr=statedir=$(pwd) \ + -o,--setattr=broker.shutdown_path= \ sh -c "flux job eventlog $(cat id1.out) > eventlog_long2.out; \ flux jobs -n > jobs_long2.out" && grep "finish" eventlog_long2.out | grep status && @@ -39,6 +40,7 @@ test_expect_success 'restart instance, job completed (long run)' ' # instance restarted test_expect_success 'run a testexec job in persistent instance (exit run)' ' flux start -o,--setattr=statedir=$(pwd) \ + -o,--setattr=broker.shutdown_path= \ flux submit \ --flags=debug \ --setattr=system.exec.test.reattach_finish=1 \ @@ -48,6 +50,7 @@ test_expect_success 'run a testexec job in persistent instance (exit run)' ' test_expect_success 'restart instance, reattach to running job, its finished (exit run)' ' flux start -o,--setattr=statedir=$(pwd) \ + -o,--setattr=broker.shutdown_path= \ sh -c "flux job eventlog $(cat id2.out) > eventlog_exit1.out" && grep "reattach-start" eventlog_exit1.out && grep "reattach-finish" eventlog_exit1.out && diff --git a/t/t3203-instance-recovery.t b/t/t3203-instance-recovery.t index 94fc9f42fb59..c01207aff132 100755 --- a/t/t3203-instance-recovery.t +++ b/t/t3203-instance-recovery.t @@ -24,6 +24,7 @@ test_expect_success 'expected broker attributes are set in recovery mode' ' EOT flux start --recovery=$(pwd)/test1 \ -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path= \ bash -c " \ flux getattr broker.recovery-mode && \ @@ -35,6 +36,7 @@ test_expect_success 'banner message is printed in interactive recovery mode' ' run_timeout --env=SHELL=/bin/sh 120 \ $runpty -i none flux start \ -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path= \ --recovery=$(pwd)/test1 >banner.out && grep "Entering Flux recovery mode" banner.out @@ -42,6 +44,7 @@ test_expect_success 'banner message is printed in interactive recovery mode' ' test_expect_success 'rc1 failure is ignored in recovery mode' ' flux start --recovery=$(pwd)/test1 \ -o,-Sbroker.rc1_path=/bin/false \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path= \ echo "hello world" >hello.out && grep hello hello.out @@ -65,6 +68,7 @@ test_expect_success 'banner message warns changes are not persistent' ' run_timeout --env=SHELL=/bin/sh 120 \ $runpty -i none flux start \ -o,-Sbroker.rc1_path= \ + -o,-Sbroker.shutdown_path= \ -o,-Sbroker.rc3_path= \ --recovery=$(pwd)/test1.tar >banner2.out && grep "changes will not be preserved" banner2.out From 06d97b920028a9ab337dc600f673f34fd342a9e5 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 4 Apr 2024 15:59:12 -0700 Subject: [PATCH 5/7] broker: fix line breaks Problem: a function call with a long parameter list is not broken to one per line, as per project norms. Fix line breaks. --- src/broker/broker.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/broker/broker.c b/src/broker/broker.c index c73886c21ed8..039da30f0a3c 100644 --- a/src/broker/broker.c +++ b/src/broker/broker.c @@ -769,8 +769,9 @@ static int create_runat_phases (broker_ctx_t *ctx) /* rc2 - initial program */ if (ctx->rank == 0 && !rc2_none) { - if (create_runat_rc2 (ctx->runat, ctx->init_shell_cmd, - ctx->init_shell_cmd_len) < 0) { + if (create_runat_rc2 (ctx->runat, + ctx->init_shell_cmd, + ctx->init_shell_cmd_len) < 0) { log_err ("create_runat_rc2"); return -1; } From e8004d47ed4f12b1ce7cc3b91c8db6921960af5b Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 4 Apr 2024 16:26:02 -0700 Subject: [PATCH 6/7] broker: run rc scripts directly Problem: rc1, rc3 scripts are run with shell -c