From 8f63230bfd7b6eb777afe015bb1dd72d2b3944c6 Mon Sep 17 00:00:00 2001 From: Henry Addison Date: Mon, 5 Aug 2024 09:52:40 +0100 Subject: [PATCH] add helper scripts for queuing a training job on jasmin --- bin/jasmin/lotus-wrapper | 17 +++++++++++++++++ bin/jasmin/queue-training | 8 ++++++++ 2 files changed, 25 insertions(+) create mode 100755 bin/jasmin/lotus-wrapper create mode 100755 bin/jasmin/queue-training diff --git a/bin/jasmin/lotus-wrapper b/bin/jasmin/lotus-wrapper new file mode 100755 index 000000000..d21588f4f --- /dev/null +++ b/bin/jasmin/lotus-wrapper @@ -0,0 +1,17 @@ +#!/bin/bash +# Wrapper script around commands for training a model to queue on LOTUS on JASMIN + +source ~/.bashrc +mamba activate mv-mlde + +set -euo pipefail + +cd /home/users/vf20964/code/mlde + +export DERIVED_DATA=/gws/nopw/j04/bris_climdyn/henrya/bp-backups/ +export KK_SLACK_WH_URL=https://hooks.slack.com +export WANDB_EXPERIMENT_NAME="ml-downscaling-emulator" + +# python bin/main.py --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_cncsnpp.py --workdir ${DERIVED_DATA}/score-sde/deterministic/ukcp_local_pr_12em_cncsnpp/bham_pSTV --mode train + +python bin/main.py --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_plain_unet.py --workdir ${DERIVED_DATA}/score-sde/deterministic/ukcp_local_pr_12em_plain_unet/bham_pSTV --mode train diff --git a/bin/jasmin/queue-training b/bin/jasmin/queue-training new file mode 100755 index 000000000..08d437997 --- /dev/null +++ b/bin/jasmin/queue-training @@ -0,0 +1,8 @@ +#!/bin/bash +# Script for queueing a training job on LOTUS on JASMIN via lotus-wrapper script + +set -euo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +sbatch --partition=high-mem --time=02:00:00 --mem=128G ${SCRIPT_DIR}/lotus-wrapper