Skip to content

Commit

Permalink
flux-job: output jobid with exception
Browse files Browse the repository at this point in the history
Problem: In a batch job, it may be difficult to discern which
job had an exception.

When flux job attach outputs an exception, also output the jobid
of the job.

Update tests in t2608-job-shell-log.t and t2304-sched-simple-alloc-check.t
for change in expected output.
  • Loading branch information
chu11 committed Nov 23, 2024
1 parent dd77ed5 commit 31800a8
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
13 changes: 8 additions & 5 deletions src/cmd/job/attach.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "src/common/libsubprocess/fbuf.h"
#include "src/common/libsubprocess/fbuf_watcher.h"
#include "src/common/libtaskmap/taskmap_private.h"
#include "src/common/libjob/idf58.h"

#include "src/common/libterminus/pty.h"
#include "src/common/libdebugged/debugged.h"
Expand Down Expand Up @@ -1161,11 +1162,13 @@ void attach_event_continuation (flux_future_t *f, void *arg)

if (ctx->statusline)
fprintf (stderr, "\r\033[K");
fprintf (stderr, "%.3fs: job.exception type=%s severity=%d %s\n",
event->timestamp - ctx->timestamp_zero,
type,
severity,
note);
fprintf (stderr,
"%.3fs: job.exception %s type=%s severity=%d %s\n",
event->timestamp - ctx->timestamp_zero,
ctx->jobid,
type,
severity,
note);

ctx->fatal_exception = (severity == 0);

Expand Down
2 changes: 1 addition & 1 deletion t/t2304-sched-simple-alloc-check.t
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ test_expect_success 'submit consecutive jobs that exceed their time limit' '
done) 2>joberr
'
test_expect_success 'some jobs received timeout exception' '
grep "job.exception type=timeout" joberr
grep "job.exception" joberr | grep "type=timeout"
'
test_expect_success 'no jobs received alloc-check exception' '
test_must_fail grep "job.exception type=alloc-check" joberr
Expand Down
4 changes: 2 additions & 2 deletions t/t2608-job-shell-log.t
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@ done
test_expect_success 'flux-shell: missing command logs fatal error' '
test_expect_code 127 flux run nosuchcommand 2>missing.err &&
grep "flux-shell\[0\]: FATAL: task 0.*: start failed" missing.err &&
grep "job.exception type=exec severity=0 task 0.*: start failed" missing.err &&
grep "job.exception" missing.err | grep "type=exec severity=0 task 0.*: start failed" &&
grep "No such file or directory" missing.err
'

test_expect_success 'flux-shell: illegal command logs fatal error' '
mkdir adirectory &&
test_expect_code 126 flux run ./adirectory 2>illegal.err &&
grep "flux-shell\[0\]: FATAL: task 0.*: start failed" illegal.err &&
grep "job.exception type=exec severity=0 task 0.*: start failed" illegal.err &&
grep "job.exception" illegal.err | grep "type=exec severity=0 task 0.*: start failed" &&
grep "Permission denied" illegal.err
'

Expand Down

0 comments on commit 31800a8

Please sign in to comment.