Skip to content

Commit

Permalink
shell: add executable name to doom exceptions
Browse files Browse the repository at this point in the history
Problem: Exceptions from the doom plugin do not specify what
job failed.  It can be confusing to users which job failed amongst
multiple in a batch job, or if the broker from a batch job itself
failed.

Add the job executable name to the doom exception outputs.

Fixes #6357
  • Loading branch information
chu11 committed Nov 22, 2024
1 parent 8015f88 commit dd77ed5
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions src/shell/doom.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include <jansson.h>
#include <assert.h>
#include <libgen.h>

#include "src/common/libeventlog/eventlog.h"
#include "src/common/libutil/fsd.h"
Expand Down Expand Up @@ -96,6 +97,17 @@ static const char *doom_exit_host (struct shell_doom *doom)
return hostlist_nth (doom->hl, nth);
}

static char *get_jobspec_command_arg0 (struct shell_doom *doom)
{
static char buf[PATH_MAX+1] = {0};
json_t *s = json_array_get (doom->shell->info->jobspec->command, 0);
const char *path = json_string_value (s);
char *rv;
snprintf (buf, PATH_MAX, "%s", path);
rv = basename (buf);
return rv;
}

static void doom_check (struct shell_doom *doom,
int rank,
int exitcode,
Expand All @@ -118,7 +130,8 @@ static void doom_check (struct shell_doom *doom,

if (doom->exit_on_error && doom->exit_rc != 0) {
shell_die (doom->exit_rc,
"%srank %d on host %s failed and exit-on-error is set",
"%s: %srank %d on host %s failed and exit-on-error is set",
get_jobspec_command_arg0 (doom),
doom->lost_shell ? "shell " : "",
doom->exit_rank,
doom_exit_host (doom));
Expand Down Expand Up @@ -206,7 +219,8 @@ static void doom_timeout (flux_reactor_t *r,

fsd_format_duration (fsd, sizeof (fsd), doom->timeout);
shell_die (doom->exit_rc,
"%srank %d on host %s exited and exit-timeout=%s has expired",
"%s: %srank %d on host %s exited and exit-timeout=%s has expired",
get_jobspec_command_arg0 (doom),
doom->lost_shell ? "shell " : "",
doom->exit_rank,
doom_exit_host (doom),
Expand Down

0 comments on commit dd77ed5

Please sign in to comment.