Skip to content

Commit

Permalink
Set machine as cfg variable and adapt for euler
Browse files Browse the repository at this point in the history
  • Loading branch information
mjaehn committed Feb 20, 2024
1 parent f4dcd63 commit 157c606
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 36 deletions.
99 changes: 73 additions & 26 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from subprocess import run, CalledProcessError
import os
import yaml
import socket
from datetime import timedelta

from jobs import tools
Expand Down Expand Up @@ -61,11 +62,15 @@ def __init__(self, casename):
self.case_root = self.work_root / self.casename
self.log_file = self.case_root / "chain_status.log"

# Set machine based on hostname
self.set_machine()

# Set workflow and async attributes and initiate job ids dict
self.set_workflow()

# Specific settings based on the node type ('gpu' or 'mc')
self.set_node_info()
if self.machine == 'daint':
self.set_node_info()

def load_config_file(self):
"""Load configuration settings from a YAML file and set them as attributes.
Expand Down Expand Up @@ -140,6 +145,19 @@ def set_account(self):
# Use standard account
self.compute_account = os.popen("id -gn").read().splitlines()[0]

def set_machine(self):
try:
hostname = socket.gethostname()
if hostname.startswith('daint'):
self.machine = 'daint'
elif hostname.startswith('eu-'):
self.machine = 'euler'
else:
raise ValueError(f"Unsupported hostname: {hostname}")
print(f"You are on the {self.machine} machine.")
except Exception as e:
print(f"Error occurred: {e}")

def set_node_info(self):
"""Set node-specific information based on configuration settings.
Expand Down Expand Up @@ -421,21 +439,39 @@ def submit_basic_python(self, job_name):
"""
# Build job script
walltime = getattr(self, 'walltime', {}).get(job_name, "00:30:00")
script_lines = [
'#!/usr/bin/env bash',
f'#SBATCH --job-name={job_name}',
'#SBATCH --nodes=1',
f'#SBATCH --time={walltime}',
f'#SBATCH --output={self.logfile}',
'#SBATCH --open-mode=append',
f'#SBATCH --account={self.compute_account}',
f'#SBATCH --partition={self.compute_queue}',
f'#SBATCH --constraint={self.constraint}',
'',
f'cd {self.chain_src_dir}',
f'./run_chain.py {self.casename} -j {job_name} -c {self.chunk_id} -f -s --no-logging',
'',
]
if self.machine == 'daint':
script_lines = [
'#!/usr/bin/env bash',
f'#SBATCH --job-name={job_name}',
'#SBATCH --nodes=1',
f'#SBATCH --time={walltime}',
f'#SBATCH --output={self.logfile}',
'#SBATCH --open-mode=append',
f'#SBATCH --account={self.compute_account}',
f'#SBATCH --partition={self.compute_queue}',
f'#SBATCH --constraint={self.constraint}',
'',
f'cd {self.chain_src_dir}',
f'./run_chain.py {self.casename} -j {job_name} -c {self.chunk_id} -f -s --no-logging',
'',
]
elif self.machine == 'euler':
script_lines = [
'#!/usr/bin/env bash',
f'#SBATCH --job-name={job_name}',
'#SBATCH --ntasks=1',
f'#SBATCH --time={walltime}',
f'#SBATCH --output={self.logfile}',
'#SBATCH --open-mode=append',
f'#SBATCH --partition={self.compute_queue}',
f'#SBATCH --constraint={self.constraint}',
'',
f'cd {self.chain_src_dir}',
'eval "$(conda shell.bash hook)"',
'conda activate proc-chain',
f'./run_chain.py {self.casename} -j {job_name} -c {self.chunk_id} -f -s --no-logging',
'',
]

job_path = self.chain_root / 'job_scripts'
job_path.mkdir(parents=True, exist_ok=True)
Expand All @@ -459,16 +495,27 @@ def wait_for_previous(self):
job_file = self.case_root / 'submit.wait.slurm'
log_file = self.case_root / 'wait.log'
dep_str = ':'.join(map(str, dep_ids))
script_lines = [
'#!/usr/bin/env bash', '#SBATCH --job-name="wait"',
'#SBATCH --nodes=1', '#SBATCH --time=00:01:00',
f'#SBATCH --output={log_file}',
f'#SBATCH --account={self.compute_account}',
f'#SBATCH --partition={self.compute_queue}',
f'#SBATCH --constraint={self.constraint}',
f'#SBATCH --dependency=afterany:{dep_str}', '', '# Do nothing',
'exit 0'
]
if self.machine == 'daint':
script_lines = [
'#!/usr/bin/env bash', '#SBATCH --job-name="wait"',
'#SBATCH --nodes=1', '#SBATCH --time=00:01:00',
f'#SBATCH --output={log_file}',
f'#SBATCH --account={self.compute_account}',
f'#SBATCH --partition={self.compute_queue}',
f'#SBATCH --constraint={self.constraint}',
f'#SBATCH --dependency=afterany:{dep_str}', '', '# Do nothing',
'exit 0'
]
elif self.machine == 'euler':
script_lines = [
'#!/usr/bin/env bash', '#SBATCH --job-name="wait"',
'#SBATCH --ntasks=1', '#SBATCH --time=00:01:00',
f'#SBATCH --output={log_file}',
f'#SBATCH --partition={self.compute_queue}',
f'#SBATCH --constraint={self.constraint}',
f'#SBATCH --dependency=afterany:{dep_str}', '', '# Do nothing',
'exit 0'
]
with open(job_file, mode='w') as wait_job:
wait_job.write('\n'.join(script_lines))

Expand Down
30 changes: 21 additions & 9 deletions jobs/prepare_icon.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,27 @@ def main(cfg):

logging.info('Copy ICON input data (IC/BC) to working directory')
# Copy input files to scratch
script_lines = [
'#!/usr/bin/env bash',
f'#SBATCH --job-name="copy_input_{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}"',
f'#SBATCH --account={cfg.compute_account}', '#SBATCH --time=00:10:00',
f'#SBATCH --partition={cfg.compute_queue}',
f'#SBATCH --constraint={cfg.constraint}', '#SBATCH --nodes=1',
f'#SBATCH --output={cfg.logfile}', '#SBATCH --open-mode=append',
f'#SBATCH --chdir={cfg.icon_work}', ''
]
if cfg.machine == 'daint':
script_lines = [
'#!/usr/bin/env bash',
f'#SBATCH --job-name="copy_input_{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}"',
f'#SBATCH --account={cfg.compute_account}',
'#SBATCH --time=00:10:00',
f'#SBATCH --partition={cfg.compute_queue}',
f'#SBATCH --constraint={cfg.constraint}', '#SBATCH --nodes=1',
f'#SBATCH --output={cfg.logfile}', '#SBATCH --open-mode=append',
f'#SBATCH --chdir={cfg.icon_work}', ''
]
elif cfg.machine == 'euler':
script_lines = [
'#!/usr/bin/env bash',
f'#SBATCH --job-name="copy_input_{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}"',
'#SBATCH --time=00:10:00',
f'#SBATCH --partition={cfg.compute_queue}',
f'#SBATCH --constraint={cfg.constraint}', '#SBATCH --ntasks=1',
f'#SBATCH --output={cfg.logfile}', '#SBATCH --open-mode=append',
f'#SBATCH --chdir={cfg.icon_work}', ''
]
for target, destination in zip(cfg.input_files.values(),
cfg.input_files_scratch.values()):
script_lines.append(f'rsync -av {target} {destination}')
Expand Down
2 changes: 1 addition & 1 deletion run_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def main():
cfg.force_sync = False

# Check constraint
if cfg.constraint:
if cfg.constraint and cfg.machine == 'daint':
assert cfg.constraint in ['gpu', 'mc'], ("Unknown constraint, use"
"gpu or mc")

Expand Down

0 comments on commit 157c606

Please sign in to comment.