From 31800a895fe329d09fe2c727641927f279528488 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Fri, 22 Nov 2024 14:44:05 -0800 Subject: [PATCH] flux-job: output jobid with exception Problem: In a batch job, it may be difficult to discern which job had an exception. When flux job attach outputs an exception, also output the jobid of the job. Update tests in t2608-job-shell-log.t and t2304-sched-simple-alloc-check.t for change in expected output. --- src/cmd/job/attach.c | 13 ++++++++----- t/t2304-sched-simple-alloc-check.t | 2 +- t/t2608-job-shell-log.t | 4 ++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/cmd/job/attach.c b/src/cmd/job/attach.c index 55455aa4e621..6123ba2b8bef 100644 --- a/src/cmd/job/attach.c +++ b/src/cmd/job/attach.c @@ -34,6 +34,7 @@ #include "src/common/libsubprocess/fbuf.h" #include "src/common/libsubprocess/fbuf_watcher.h" #include "src/common/libtaskmap/taskmap_private.h" +#include "src/common/libjob/idf58.h" #include "src/common/libterminus/pty.h" #include "src/common/libdebugged/debugged.h" @@ -1161,11 +1162,13 @@ void attach_event_continuation (flux_future_t *f, void *arg) if (ctx->statusline) fprintf (stderr, "\r\033[K"); - fprintf (stderr, "%.3fs: job.exception type=%s severity=%d %s\n", - event->timestamp - ctx->timestamp_zero, - type, - severity, - note); + fprintf (stderr, + "%.3fs: job.exception %s type=%s severity=%d %s\n", + event->timestamp - ctx->timestamp_zero, + ctx->jobid, + type, + severity, + note); ctx->fatal_exception = (severity == 0); diff --git a/t/t2304-sched-simple-alloc-check.t b/t/t2304-sched-simple-alloc-check.t index fb1045920699..3c66b2163ee4 100755 --- a/t/t2304-sched-simple-alloc-check.t +++ b/t/t2304-sched-simple-alloc-check.t @@ -71,7 +71,7 @@ test_expect_success 'submit consecutive jobs that exceed their time limit' ' done) 2>joberr ' test_expect_success 'some jobs received timeout exception' ' - grep "job.exception type=timeout" joberr + grep "job.exception" joberr | grep "type=timeout" ' test_expect_success 'no jobs received alloc-check exception' ' test_must_fail grep "job.exception type=alloc-check" joberr diff --git a/t/t2608-job-shell-log.t b/t/t2608-job-shell-log.t index 6d6af05ba2e2..bc413d680161 100755 --- a/t/t2608-job-shell-log.t +++ b/t/t2608-job-shell-log.t @@ -99,7 +99,7 @@ done test_expect_success 'flux-shell: missing command logs fatal error' ' test_expect_code 127 flux run nosuchcommand 2>missing.err && grep "flux-shell\[0\]: FATAL: task 0.*: start failed" missing.err && - grep "job.exception type=exec severity=0 task 0.*: start failed" missing.err && + grep "job.exception" missing.err | grep "type=exec severity=0 task 0.*: start failed" && grep "No such file or directory" missing.err ' @@ -107,7 +107,7 @@ test_expect_success 'flux-shell: illegal command logs fatal error' ' mkdir adirectory && test_expect_code 126 flux run ./adirectory 2>illegal.err && grep "flux-shell\[0\]: FATAL: task 0.*: start failed" illegal.err && - grep "job.exception type=exec severity=0 task 0.*: start failed" illegal.err && + grep "job.exception" illegal.err | grep "type=exec severity=0 task 0.*: start failed" && grep "Permission denied" illegal.err '