From dc518c6b43f1192f150c49fe837141f5b5a301b7 Mon Sep 17 00:00:00 2001 From: Lin Guo Date: Wed, 11 Dec 2024 16:02:40 +0000 Subject: [PATCH] Improve on the query script --- .../slurm_workflow_manager.py | 2 +- .../slurm/workflow_manager.py | 22 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/lib/ramble/ramble/test/workflow_manager_functionality/slurm_workflow_manager.py b/lib/ramble/ramble/test/workflow_manager_functionality/slurm_workflow_manager.py index a9e487f77..231eb2421 100644 --- a/lib/ramble/ramble/test/workflow_manager_functionality/slurm_workflow_manager.py +++ b/lib/ramble/ramble/test/workflow_manager_functionality/slurm_workflow_manager.py @@ -52,7 +52,7 @@ def test_slurm_workflow(): ws._re_read() workspace("setup", "--dry-run", global_args=["-D", ws.root]) - # assert the batch_submit is overriden, pointing to the generated script + # assert the batch_submit is overridden, pointing to the generated script all_exec_file = os.path.join(ws.root, "all_experiments") with open(all_exec_file) as f: content = f.read() diff --git a/var/ramble/repos/builtin/workflow_managers/slurm/workflow_manager.py b/var/ramble/repos/builtin/workflow_managers/slurm/workflow_manager.py index 6be44ed4a..01575e8ab 100644 --- a/var/ramble/repos/builtin/workflow_managers/slurm/workflow_manager.py +++ b/var/ramble/repos/builtin/workflow_managers/slurm/workflow_manager.py @@ -12,7 +12,7 @@ _ensure_job_id_snippet = r""" job_id=$(< {experiment_run_dir}/.slurm_job) -if [ -z "${job_id:-}"]; then +if [ -z "${job_id:-}" ]; then echo "No valid job_id found" 1>&2 exit 1 fi @@ -133,7 +133,25 @@ def __init__(self, dry_run=False): ) def generate_query_command(self, job_id): - return f"squeue -j {job_id}" + return rf""" +status=$(squeue -h -o "%t" -j {job_id}) +if [ -z "$status" ]; then + status=$(sacct -j {job_id} -o state -X -n) +fi +if [ ! -z "$status" ]; then + # Define a mapping between sacct/squeue status to ramble counterpart + declare -A status_map + status_map["PD"]="SETUP" + status_map["R"]="RUNNING" + status_map["CF"]="SETUP" + status_map["CG"]="COMPLETE" + status_map["COMPLETED"]="COMPLETE" + if [ -v status_map["$status"] ]; then + status=${{status_map["$status"]}} + fi +fi +echo job {job_id} has status: $status + """ def generate_cancel_command(self, job_id): return f"scancel {job_id}"