From ec25e393da446d4d4bf28337703983aed04ce2c5 Mon Sep 17 00:00:00 2001 From: Henry Addison Date: Mon, 5 Aug 2024 22:00:32 +0100 Subject: [PATCH] pass args through to tell what training to queue --- bin/jasmin/lotus-training-wrapper | 17 +++++++++++++++++ bin/jasmin/lotus-wrapper | 27 --------------------------- bin/jasmin/queue-training | 15 ++++++++++++++- 3 files changed, 31 insertions(+), 28 deletions(-) create mode 100755 bin/jasmin/lotus-training-wrapper delete mode 100755 bin/jasmin/lotus-wrapper diff --git a/bin/jasmin/lotus-training-wrapper b/bin/jasmin/lotus-training-wrapper new file mode 100755 index 00000000..7552faa3 --- /dev/null +++ b/bin/jasmin/lotus-training-wrapper @@ -0,0 +1,17 @@ +#!/bin/bash +# Wrapper script around commands for training a model to queue on LOTUS on JASMIN + +module load gcc + +source ~/.bashrc +mamba activate mv-mlde + +set -euo pipefail + +cd /home/users/vf20964/code/mlde + +export DERIVED_DATA=/gws/nopw/j04/bris_climdyn/henrya/bp-backups/ +export KK_SLACK_WH_URL=https://hooks.slack.com +export WANDB_EXPERIMENT_NAME="ml-downscaling-emulator" + +python bin/main.py $@ diff --git a/bin/jasmin/lotus-wrapper b/bin/jasmin/lotus-wrapper deleted file mode 100755 index f576a861..00000000 --- a/bin/jasmin/lotus-wrapper +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Wrapper script around commands for training a model to queue on LOTUS on JASMIN - -module load gcc - -source ~/.bashrc -mamba activate mv-mlde - -set -euo pipefail - -cd /home/users/vf20964/code/mlde - -export DERIVED_DATA=/gws/nopw/j04/bris_climdyn/henrya/bp-backups/ -export KK_SLACK_WH_URL=https://hooks.slack.com -export WANDB_EXPERIMENT_NAME="ml-downscaling-emulator" - -# 12em NCSN++ -# python bin/main.py --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_cncsnpp.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_12em_cncsnpp/bham_pSTV --mode train - -# 1em NCSN++ -python bin/main.py --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_1em_cncsnpp.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_1em_cncsnpp/bham_pSTV --mode train - -# 12em Plain U-Net -# python bin/main.py --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_plain_unet.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_12em_plain_unet/bham_pSTV --mode train - -# 1em Plain U-Net -# python bin/main.py --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_plain_unet.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_1em_plain_unet/bham_pSTV --mode train --config.data.dataset_name bham64_ccpm-4x_1em_psl-sphum4th-temp4th-vort4th_pr diff --git a/bin/jasmin/queue-training b/bin/jasmin/queue-training index 789d5f25..9e20de63 100755 --- a/bin/jasmin/queue-training +++ b/bin/jasmin/queue-training @@ -5,4 +5,17 @@ set -euo pipefail SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -sbatch --gres=gpu:1 --partition=orchid --account=orchid --time=1-00:00:00 --mem=128G ${SCRIPT_DIR}/lotus-wrapper +# 12em NCSN++ +# --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_cncsnpp.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_12em_cncsnpp/bham_pSTV --mode train + +# 1em NCSN++ +# --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_1em_cncsnpp.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_1em_cncsnpp/bham_pSTV --mode train + +# 12em Plain U-Net +# --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_plain_unet.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_12em_plain_unet/bham_pSTV --mode train + +# 1em Plain U-Net +# --config src/ml_downscaling_emulator/score_sde_pytorch/configs/deterministic/ukcp_local_pr_12em_plain_unet.py --workdir /gws/nopw/j04/bris_climdyn/henrya/workdirs/score-sde/deterministic/ukcp_local_pr_1em_plain_unet/bham_pSTV --mode train --config.data.dataset_name bham64_ccpm-4x_1em_psl-sphum4th-temp4th-vort4th_pr + + +sbatch --gres=gpu:1 --partition=orchid --account=orchid --time=1-00:00:00 --mem=128G -- ${SCRIPT_DIR}/lotus-wrapper $@