From dd77ed5a62835154ab723f7ee277960cccd9b6b9 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Thu, 21 Nov 2024 10:02:49 -0800 Subject: [PATCH] shell: add executable name to doom exceptions Problem: Exceptions from the doom plugin do not specify what job failed. It can be confusing to users which job failed amongst multiple in a batch job, or if the broker from a batch job itself failed. Add the job executable name to the doom exception outputs. Fixes #6357 --- src/shell/doom.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/shell/doom.c b/src/shell/doom.c index 498787fbb94b..1188ccce1899 100644 --- a/src/shell/doom.c +++ b/src/shell/doom.c @@ -35,6 +35,7 @@ #include #include +#include #include "src/common/libeventlog/eventlog.h" #include "src/common/libutil/fsd.h" @@ -96,6 +97,17 @@ static const char *doom_exit_host (struct shell_doom *doom) return hostlist_nth (doom->hl, nth); } +static char *get_jobspec_command_arg0 (struct shell_doom *doom) +{ + static char buf[PATH_MAX+1] = {0}; + json_t *s = json_array_get (doom->shell->info->jobspec->command, 0); + const char *path = json_string_value (s); + char *rv; + snprintf (buf, PATH_MAX, "%s", path); + rv = basename (buf); + return rv; +} + static void doom_check (struct shell_doom *doom, int rank, int exitcode, @@ -118,7 +130,8 @@ static void doom_check (struct shell_doom *doom, if (doom->exit_on_error && doom->exit_rc != 0) { shell_die (doom->exit_rc, - "%srank %d on host %s failed and exit-on-error is set", + "%s: %srank %d on host %s failed and exit-on-error is set", + get_jobspec_command_arg0 (doom), doom->lost_shell ? "shell " : "", doom->exit_rank, doom_exit_host (doom)); @@ -206,7 +219,8 @@ static void doom_timeout (flux_reactor_t *r, fsd_format_duration (fsd, sizeof (fsd), doom->timeout); shell_die (doom->exit_rc, - "%srank %d on host %s exited and exit-timeout=%s has expired", + "%s: %srank %d on host %s exited and exit-timeout=%s has expired", + get_jobspec_command_arg0 (doom), doom->lost_shell ? "shell " : "", doom->exit_rank, doom_exit_host (doom),