Skip to content

Commit

Permalink
Set correct nodes for icon (cpu + gpu)
Browse files Browse the repository at this point in the history
  • Loading branch information
mjaehn committed Oct 24, 2023
1 parent e06f5f2 commit 5cf6e1c
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 8 deletions.
3 changes: 2 additions & 1 deletion cases/icon-test/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

model: icon
constraint: gpu
run_on: gpu
compute_queue: normal
ntasks_per_node: 12
restart_step: 6
Expand Down Expand Up @@ -44,7 +45,7 @@ icon:
binary_file: ./src/icon/bin/icon
runjob_filename: icon_runjob.cfg
compute_queue: normal
walltime: '00:20:00'
walltime: '00:30:00'
np_tot: 6
np_io: 1
np_restart: 1
Expand Down
5 changes: 2 additions & 3 deletions cases/icon-test/icon_runjob.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
#SBATCH --account={cfg.compute_account}
#SBATCH --time={cfg.icon_walltime}
#SBATCH --nodes={cfg.icon_np_tot}
#SBATCH --ntasks-per-core=1
#SBATCH --ntasks-per-node={cfg.ntasks_per_node}
#SBATCH --cpus-per-task=1
#SBATCH --partition={cfg.compute_queue}
#SBATCH --constraint={cfg.constraint}
#SBATCH --hint=nomultithread
Expand Down Expand Up @@ -96,6 +94,7 @@ cat > NAMELIST_{cfg.casename} << EOF
lart = .FALSE. ! main switch for ART
debug_check_level = 10
restart_filename = "{cfg.icon_restart_out}/{cfg.output_filename}_<rsttime>.nc"
activate_sync_timers = .TRUE.
/
! diffusion_nml: horizontal (numerical) diffusion ----------------------------
Expand Down Expand Up @@ -168,7 +167,7 @@ cat > NAMELIST_{cfg.casename} << EOF
itype_pres_msl = 5 ! method for computation of mean sea level pressure
itype_rh = 1 ! method for computation of relative humidity
lmask_boundary = .TRUE. ! mask out interpolation zone in output
restart_file_type = 5
restart_file_type = 4
/
! limarea_nml: settings for limited area mode ---------------------------------
Expand Down
18 changes: 14 additions & 4 deletions run_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,20 @@ def set_account(self):

def set_node_info(self):
if self.constraint == 'gpu':
self.ntasks_per_node = 12
self.mpich_cuda = ('export MPICH_RDMA_ENABLED_CUDA=1\n'
'export MPICH_G2G_PIPELINE=256\n'
'export CRAY_CUDA_MPS=1\n')
if self.model.startswith('icon'):
if self.run_on == 'gpu':
self.ntasks_per_node = 1
elif self.run_on == 'cpu':
self.ntasks_per_node = 12
else:
raise ValueError(
"Invalid value for 'run_on' in the configuration."
"It should be either 'gpu' or 'cpu'.")
else:
self.ntasks_per_node = 12
self.mpich_cuda = ('export MPICH_RDMA_ENABLED_CUDA=1\n'
'export MPICH_G2G_PIPELINE=256\n'
'export CRAY_CUDA_MPS=1\n')
elif self.constraint == 'mc':
self.ntasks_per_node = 36
self.mpich_cuda = ''
Expand Down

0 comments on commit 5cf6e1c

Please sign in to comment.