From 46f94e2fc26979e8d078d9903c1c85cf7feed2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20J=C3=A4hn?= Date: Tue, 24 Oct 2023 15:34:53 +0200 Subject: [PATCH] Further work on icon restart --- cases/icon-test/config.yaml | 6 ++--- cases/icon-test/icon_runjob.cfg | 13 +++++++++-- jobs/icon.py | 6 +++++ jobs/prepare_data.py | 2 +- jobs/tools/__init__.py | 41 +++++++++++++++++++++++++++++++++ run_chain.py | 1 + 6 files changed, 63 insertions(+), 6 deletions(-) diff --git a/cases/icon-test/config.yaml b/cases/icon-test/config.yaml index 1a2b6915..dcc652ec 100644 --- a/cases/icon-test/config.yaml +++ b/cases/icon-test/config.yaml @@ -2,7 +2,7 @@ model: icon constraint: gpu -run_on: gpu +run_on: cpu compute_queue: normal ntasks_per_node: 12 restart_step: 6 @@ -46,8 +46,8 @@ icon: runjob_filename: icon_runjob.cfg compute_queue: normal walltime: '00:30:00' - np_tot: 6 + np_tot: 12 np_io: 1 - np_restart: 1 + np_restart: 0 np_prefetch: 1 diff --git a/cases/icon-test/icon_runjob.cfg b/cases/icon-test/icon_runjob.cfg index b8367931..5d08b5d2 100755 --- a/cases/icon-test/icon_runjob.cfg +++ b/cases/icon-test/icon_runjob.cfg @@ -43,6 +43,15 @@ cat > icon_master.namelist << EOF lrestart = {cfg.lrestart} ! .TRUE.=current experiment is resumed / +! master_time_control_nml: --------------------------------------------------- +&master_time_control_nml +calendar = 'proleptic gregorian' +restartTimeIntval = '{cfg.restart_step_iso}' +checkpointTimeIntval = '{cfg.restart_step_iso}' +experimentStartDate = '{cfg.ini_datetime_string}' +experimentStopDate = '{cfg.end_datetime_string}' +/ + ! master_model_nml: repeated for each model ---------------------------------- &master_model_nml model_type = 1 ! identifies which component to run (atmosphere,ocean,...) @@ -86,7 +95,7 @@ cat > NAMELIST_{cfg.casename} << EOF ltransport = .TRUE. ! compute large-scale tracer transport ntracer = 0 ! number of advected tracers iforcing = 3 ! forcing of dynamics and transport by parameterized processes - msg_level = 7 ! detailed report during integration + msg_level = 13 ! detailed report during integration ltimer = .TRUE. ! timer for monitoring the runtime of specific routines timers_level = 10 ! performance timer granularity check_uuid_gracefully = .TRUE. ! give only warnings for non-matching uuids @@ -167,7 +176,7 @@ cat > NAMELIST_{cfg.casename} << EOF itype_pres_msl = 5 ! method for computation of mean sea level pressure itype_rh = 1 ! method for computation of relative humidity lmask_boundary = .TRUE. ! mask out interpolation zone in output - restart_file_type = 4 + restart_file_type = 5 / ! limarea_nml: settings for limited area mode --------------------------------- diff --git a/jobs/icon.py b/jobs/icon.py index d24b1614..fa1e7c84 100644 --- a/jobs/icon.py +++ b/jobs/icon.py @@ -64,6 +64,12 @@ def main(starttime, hstart, hstop, cfg, model_cfg): tools.copy_file(cfg.icon_binary_file, os.path.join(cfg.icon_work, execname)) + # Symlink the restart file to the last run into the icon/run folder + if cfg.lrestart == '.TRUE.': + restart_filename = 'restart_atm_DOM01.nc' + restart_file = os.path.join(cfg.icon_restart_in, restart_filename) + tools.symlink_file(restart_file, os.path.join(cfg.icon_work, restart_filename)) + # Get name of initial file if hasattr(cfg, 'inicond_filename'): inidata_filename = os.path.join(cfg.icon_input_icbc, diff --git a/jobs/prepare_data.py b/jobs/prepare_data.py index 45035f66..4e9e27df 100644 --- a/jobs/prepare_data.py +++ b/jobs/prepare_data.py @@ -58,7 +58,7 @@ def set_cfg_variables(cfg, starttime, hstart, hstop): setattr(cfg, 'icon_restart_out', os.path.join(cfg.chain_root, 'icon', 'restart')) setattr(cfg, 'icon_restart_in', - os.path.join(cfg.chain_root_last_run, 'icon', 'restart')) + os.path.join(cfg.chain_root_last_run, 'icon', 'run')) setattr(cfg, 'icon_input_icbc_last_run', os.path.join(cfg.chain_root_last_run, 'icon', 'input', 'icbc')) diff --git a/jobs/tools/__init__.py b/jobs/tools/__init__.py index c300d936..bd15df94 100644 --- a/jobs/tools/__init__.py +++ b/jobs/tools/__init__.py @@ -195,6 +195,47 @@ def copy_file(source_path, dest_path, output_log=False): logging.info("Copied {} to {}".format(source_path, dest_path)) +def symlink_file(source_path, dest_path, output_log=False): + """Create a symbolic link from source_path to dest_path + + Use os.symlink to create the symbolic link. + dest_path can be either a directory or a filepath. + If it is a directory, the link name will be kept, + if it is a filepath, the link name will be used. + + Provides error description to the logfiles + + Parameters + ---------- + source_path : str + Path to the source file or directory + dest_path : str + Path to the destination directory or destination link + output_log : bool, optional + Whether to log messages (default is False) + """ + + try: + if os.path.lexists(dest_path): + os.remove(dest_path) + os.symlink(source_path, dest_path) + except FileNotFoundError: + if output_log: + logging.error(f"Source file or directory not found at {source_path}") + raise + except PermissionError: + if output_log: + logging.error(f"Creating symbolic link from {source_path} to {dest_path} failed due to a permission error.") + raise + except (OSError, Exception) as e: + if output_log: + logging.error(f"Creating symbolic link from {source_path} to {dest_path} failed with {type(e).__name__}") + raise + + if output_log: + logging.info(f"Created symbolic link from {source_path} to {dest_path}") + + def rename_file(source_path, dest_path, output_log=False): """Copy a file from source_path to dest_path diff --git a/run_chain.py b/run_chain.py index 299cc880..53dfd7f1 100755 --- a/run_chain.py +++ b/run_chain.py @@ -344,6 +344,7 @@ def run_chain(work_root, model_cfg, cfg, start_time, hstart, hstop, job_names, # Restart step if 'restart' in model_cfg['models'][cfg.model]['features']: setattr(cfg, 'restart_step', hstop - hstart) + setattr(cfg, 'restart_step_iso', f'PT{int(cfg.restart_step)}H') # If nested run: use output of mother-simulation if 'nesting' in model_cfg['models'][