-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_aws_c5n18x.sh
executable file
·70 lines (57 loc) · 2.06 KB
/
run_aws_c5n18x.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
#=============================
# Run forecast for c5n.18xlarge
# instances (need to set number
# of tasks per node). -> most comparable to GCE c2-std-60
#============================
# Source .bashrc in case local_setup.sh
# was run in an automated way immediately
# before this script (the changes in .bashrc
# from local_setup.sh are sourced in its
# child shell, but not the parent).
source ~/.bashrc
# Move data to home with local.setup.sh
# (Not done automatically here in case
# cluster is persistent.)
# Create launch script
# Modifications wrt version posted by Smith et al. (2020):
# 1. Adjust --nodes and --ntasks-per-node to match number
# of CPU = vCPU/2 on instance. For example, 2 x 16 = 4 x 8.
# The number of CPU available should be >= --ntasks-per-node x OMP_NUM_THREADS
# This particular model needs 2 x 16 x 6 = 4 x 8 x 6 = 192 threads.
# In the two cases above, 16x6 means 2 quantity 96CPU instances and
# 8x6 means 4 quantity 48CPU instances.
# 2. We do not need to module load libfabric-aws since the node
# is able to run fi_info by default.
# 3. Change the output logging from %j to %J.%t
# 4. Remove #SBATCH --exclusive to sidestep current login issues.
# AV: Modifying this to run from wherever the repo is being launched. Use workflow to specify this location.
# cd /shared/wrf/conus_12km/
cd ${HOME}/wrf/conus_12km
export I_MPI_FABRIC=efa
cat > slurm-wrf-conus12km.sh <<EOF
#!/bin/bash
#SBATCH --job-name=WRF
#SBATCH --output=conus-%J.%t.out
#SBATCH --nodes=8
#SBATCH --ntasks-per-node=4
#SBATCH --exclusive
spack load intel-oneapi-mpi
spack load wrf
wrf_exe=$(spack location -i wrf)/run/wrf.exe
set -x
ulimit -s unlimited
ulimit -a
export OMP_NUM_THREADS=6
export I_MPI_FABRICS=efa
export I_MPI_PIN_DOMAIN=omp
export KMP_AFFINITY=compact
export I_MPI_DEBUG=6
time mpiexec.hydra -np \$SLURM_NTASKS --ppn \$SLURM_NTASKS_PER_NODE \$wrf_exe
echo $? > wrf.exit.code
EOF
# Run it!
echo; echo "Running sbatch slurm-wrf-conus12km.sh from ${PWD}"
sbatch slurm-wrf-conus12km.sh
# Clean up
rm -f slurm-wrf-conus12km.sh