-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_training.job
36 lines (31 loc) · 1.23 KB
/
model_training.job
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
#
#SBATCH --job-name=gpu_model_training
#SBATCH --output=logfiles/model_training-%J.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --partition=gpu
#SBATCH --time=11:00:00
#SBATCH --cpus-per-task=20 # Adjusted number of CPUs per task
#SBATCH --gpus-per-task=1
##SBATCH --mail-type=ALL
##SBATCH --mail-user=<ENTER_YOUR_USERNAME>@crimson.ua.edu
## Printing information about the Slurm Job
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
echo "Name of the cluster on which the job is executing." $SLURM_CLUSTER_NAME
echo "Number of tasks to be initiated on each node." $SLURM_TASKS_PER_NODE
echo "Number of cpus requested per task." $SLURM_CPUS_PER_TASK
echo "Number of CPUS on the allocated node." $SLURM_CPUS_ON_NODE
echo "Total number of processes in the current job." $SLURM_NTASKS
echo "List of nodes allocated to the job" $SLURM_NODELIST
echo "Total number of nodes in the job's resource allocation." $SLURM_NNODES
echo "List of allocated GPUs." $CUDA_VISIBLE_DEVICES
## Load any module that you need
module load Anaconda3
module load CUDA
## Run any spacific environment you have
source activate ar-goes
echo 'environment activated'
python train_model.py
echo 'HPRC script done!!!'
echo '------------------------------------------------------'