diff --git a/.github/workflows/docs-latest.yml b/.github/workflows/docs-latest.yml
index 969a3f7e..0f48bfca 100644
--- a/.github/workflows/docs-latest.yml
+++ b/.github/workflows/docs-latest.yml
@@ -8,18 +8,20 @@ on:
- main
jobs:
- deploy-docs:
+ deploy-docs-latest:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
+
- name: Build docs
uses: C2SM/sphinx-action@sphinx-latest
with:
- pre-build-command: "pip install sphinx_rtd_theme && pip install sphinx-copybutton"
build-command: "sphinx-build -b html . _build"
docs-folder: "docs/"
- - uses: peaceiris/actions-gh-pages@v3
+
+ - name: Deploy on GitHub Pages
+ uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs/_build
diff --git a/.github/workflows/docs-pr-preview.yml b/.github/workflows/docs-pr-preview.yml
index 9d1ee4c1..d578a6d7 100644
--- a/.github/workflows/docs-pr-preview.yml
+++ b/.github/workflows/docs-pr-preview.yml
@@ -1,7 +1,9 @@
-name: Deploy PR previews
+name: Build and Deploy Documentation to PR Previews
on:
pull_request:
+ paths:
+ - 'docs/**'
types:
- opened
- reopened
@@ -16,10 +18,10 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
+
- name: Build docs
uses: C2SM/sphinx-action@sphinx-latest
with:
- pre-build-command: "pip install sphinx_rtd_theme && pip install sphinx-copybutton"
build-command: "sphinx-build -b html . _build"
docs-folder: "docs/"
diff --git a/.github/workflows/docs-tag.yml b/.github/workflows/docs-tag.yml
index e3a4c433..90772a63 100644
--- a/.github/workflows/docs-tag.yml
+++ b/.github/workflows/docs-tag.yml
@@ -6,23 +6,26 @@ on:
- '*'
jobs:
- deploy-docs:
+ deploy-docs-tag:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
+
- name: Get release
id: get_release
uses: bruceadams/get-release@v1.3.2
env:
GITHUB_TOKEN: ${{ github.token }}
+
- name: Build docs
uses: C2SM/sphinx-action@sphinx-latest
with:
- pre-build-command: "pip install sphinx_rtd_theme && pip install sphinx-copybutton"
build-command: "sphinx-build -b html . _build"
docs-folder: "docs/"
- - uses: peaceiris/actions-gh-pages@v3
+
+ - name: Deploy on GitHub Pages
+ uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs/_build
diff --git a/.gitignore b/.gitignore
index 2838e4a4..b2df1de5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
docs/build/
+docs/_build/
*__pycache__/
*.pyc
fieldextra.diagnostic
@@ -6,5 +7,6 @@ input_processing-chain.tgz
input/
output/
work/
-src/*/
+ext/*/
*.code-workspace
+.vscode/
diff --git a/README.md b/README.md
index d8f8099e..09ff995b 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Processing Chain for COSMO and ICON Simulations
+# Processing Chain
The Processing Chain is a python script that prepares necessary input
data, submits compute-jobs to the queue on Piz Daint and does
@@ -9,71 +9,19 @@ e.g., by creating your own case or adding new jobs.
## Environment Setup
-The following steps allow you to create and use your own virtual
-environment to run the Processing Chain. We recommend to use a conda
-environment for the usage of the provided scripts. Please follow the
-instruction for the installation. The following steps only need to be
-performed once.
-
-### 1\. Install Miniconda
-
-Install as user specific Miniconda, e.g. on your `$HOME` directory,
-which is the default location.
-
-> **Note**: Only conda itself should be installed in your `$HOME`.
-> All environments should be stored in your `$PROJECT` directory,
-> otherwise you risk filling up your `$HOME` directory. See below for instructions.
-
-To install the latest Miniconda, type:
-
- wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
- bash Miniconda3-latest-Linux-x86_64.sh
-
-Further deails on Miniconda can be found on the [Miniconda documentation page](https://docs.conda.io/en/latest/miniconda.html).
-
-### 2\. Create the Conda Environment
-
-Create a conda environment `proc-chain` with and install requirements:
-
- conda env create --prefix $PROJECT/envs/proc-chain -f env/environment.yml
-
-To be able to activate your conda environment by simply using `conda activate proc-chain` instead of the full path, add the following to your `.bashrc`:
-
- export CONDA_ENVS_PATH=$PROJECT/envs
-
-Activate the environment (use "source activate" in case "conda activate"
-does not work):
-
- conda activate proc-chain
-
-If you already have the environment but want to update it:
-
- conda env update --file env/environment.yml --prune
-
-### 3\. Store user-specific data
-
-To register your email address and standard project account, store them into
-these files within your home directory:
-
- echo > ~/.acct
- echo > ~/.forward
-
-These settings are optional. The Processing Chain will first check the content
-of those files. If desired, the corresponding variables can be overridden by setting
-the `compute_account` and `user_mail` variables in the `config.yaml` file.
+To setup your conda environment for the Processing Chain, please refer
+to the part in the [official documentation](https://c2sm.github.io/processing-chain/latest/environment.html).
## Run the Chain
-Once everything has been set up correctly according to the above steps,
-you just need to execute the following command to activate your
-environment (if not done already):
+To activate your conda environment, type:
conda activate proc-chain
To test if your environment has been successfully set, use the command
line help to display the available arguments for the main script:
- python run_chain.py -h
+ ./run_chain.py -h
To run the test cases with their standard jobs, please ensure
that you clone the Processing Chain to `$SCRATCH`, as input and
@@ -89,9 +37,8 @@ For these pre-defined test cases, you can use the Jenkins script
./jenkins/scripts/jenkins.sh
-This script calls other scripts that are located in `jenkins/scripts/`.
-They will
-- activate the conda environment (if not done already)
+This script calls other scripts that are located in `jenkins/scripts/` and will:
+- activate the conda environment
- setup spack-c2sm
- download input data to `input/`
- build `int2lm`, `cosmo-ghg`, `icon` and `icon-art`
@@ -104,16 +51,14 @@ They will
To run the test cases manually, type:
-```bash
# replace with one of the above tests
- python run_chain.py
-```
+ ./run_chain.py
## Documentation
For more information about the file structure, configuration options,
-namelist templates etc., please read the official
-[documentation](https://c2sm.github.io/processing-chain/).
+namelist templates etc., please read the [official
+documentation](https://c2sm.github.io/processing-chain/latest/).
## Contributing
diff --git a/cases/cosmo-ghg-spinup-test/config.yaml b/cases/cosmo-ghg-spinup-test/config.yaml
index 2351b766..b52048ba 100644
--- a/cases/cosmo-ghg-spinup-test/config.yaml
+++ b/cases/cosmo-ghg-spinup-test/config.yaml
@@ -1,10 +1,11 @@
# Configuration file for the 'cosmo-ghg-spinup-test' case with COSMO-GHG
-model: cosmo-ghg
+workflow: cosmo-ghg-spinup
constraint: gpu
+run_on: gpu
+compute_queue: normal
ntasks_per_node: 12
restart_step: PT6H
-variant: spinup
spinup: 3
startdate: 2015-01-01T00:00:00Z
enddate: 2015-01-01T18:00:00Z
@@ -47,7 +48,7 @@ online_vprm:
int2lm:
extpar_dir: ./input/cosmo-ghg/extpar
extpar_filename: test_domain.nc
- binary_file: ./src/int2lm/test/testsuite/int2lm
+ binary_file: ./ext/int2lm/test/testsuite/int2lm
namelist_filename: int2lm_INPUT.cfg
runjob_filename: int2lm_runjob.cfg
compute_queue: normal
@@ -69,7 +70,7 @@ post_int2lm:
- CO2_A2
cosmo:
- binary_file: ./src/cosmo-ghg/cosmo/ACC/cosmo_gpu
+ binary_file: ./ext/cosmo-ghg/cosmo/ACC/cosmo_gpu
namelist_prefix: cosmo_INPUT_
runjob_filename: cosmo_runjob.cfg
compute_queue: normal
diff --git a/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg b/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg
index ca14d636..608b8d15 100644
--- a/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg
+++ b/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg
@@ -1,5 +1,5 @@
#!/bin/bash -l
-#SBATCH --job-name="cosmo_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}"
+#SBATCH --job-name=cosmo
#SBATCH --account={cfg.compute_account}
#SBATCH --time={walltime}
#SBATCH --nodes={np_tot}
@@ -34,7 +34,7 @@ echo "============== StartTime: `date +%s` s"
echo "============== StartTime: `date`"
echo "====================================================="
-srun -u ./{execname} >> {logfile} 2>&1
+srun -u ./{cfg.cosmo_execname} >> {logfile} 2>&1
pid=$?
echo "====================================================="
diff --git a/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg b/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg
index c3c80e53..9a3ae2e0 100644
--- a/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg
+++ b/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg
@@ -1,5 +1,5 @@
#!/bin/bash -l
-#SBATCH --job-name=int2lm_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}
+#SBATCH --job-name=int2lm
#SBATCH --account={cfg.compute_account}
#SBATCH --time={walltime}
#SBATCH --nodes={nodes}
diff --git a/cases/cosmo-ghg-test/config.yaml b/cases/cosmo-ghg-test/config.yaml
index dc1134e9..b954e0f2 100644
--- a/cases/cosmo-ghg-test/config.yaml
+++ b/cases/cosmo-ghg-test/config.yaml
@@ -1,7 +1,9 @@
# Configuration file for the 'cosmo-ghg-test' case with COSMO-GHG
-model: cosmo-ghg
+workflow: cosmo-ghg
constraint: gpu
+run_on: gpu
+compute_queue: normal
ntasks_per_node: 12
restart_step: PT6H
startdate: 2015-01-01T00:00:00Z
@@ -45,7 +47,7 @@ online_vprm:
int2lm:
extpar_dir: ./input/cosmo-ghg/extpar
extpar_filename: test_domain.nc
- binary_file: ./src/int2lm/test/testsuite/int2lm
+ binary_file: ./ext/int2lm/test/testsuite/int2lm
namelist_filename: int2lm_INPUT.cfg
runjob_filename: int2lm_runjob.cfg
compute_queue: normal
@@ -67,7 +69,7 @@ post_int2lm:
- CO2_A2
cosmo:
- binary_file: ./src/cosmo-ghg/cosmo/ACC/cosmo_gpu
+ binary_file: ./ext/cosmo-ghg/cosmo/ACC/cosmo_gpu
namelist_prefix: cosmo_INPUT_
runjob_filename: cosmo_runjob.cfg
compute_queue: normal
diff --git a/cases/cosmo-ghg-test/cosmo_runjob.cfg b/cases/cosmo-ghg-test/cosmo_runjob.cfg
index ca14d636..608b8d15 100644
--- a/cases/cosmo-ghg-test/cosmo_runjob.cfg
+++ b/cases/cosmo-ghg-test/cosmo_runjob.cfg
@@ -1,5 +1,5 @@
#!/bin/bash -l
-#SBATCH --job-name="cosmo_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}"
+#SBATCH --job-name=cosmo
#SBATCH --account={cfg.compute_account}
#SBATCH --time={walltime}
#SBATCH --nodes={np_tot}
@@ -34,7 +34,7 @@ echo "============== StartTime: `date +%s` s"
echo "============== StartTime: `date`"
echo "====================================================="
-srun -u ./{execname} >> {logfile} 2>&1
+srun -u ./{cfg.cosmo_execname} >> {logfile} 2>&1
pid=$?
echo "====================================================="
diff --git a/cases/cosmo-ghg-test/int2lm_runjob.cfg b/cases/cosmo-ghg-test/int2lm_runjob.cfg
index c3c80e53..9a3ae2e0 100644
--- a/cases/cosmo-ghg-test/int2lm_runjob.cfg
+++ b/cases/cosmo-ghg-test/int2lm_runjob.cfg
@@ -1,5 +1,5 @@
#!/bin/bash -l
-#SBATCH --job-name=int2lm_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}
+#SBATCH --job-name=int2lm
#SBATCH --account={cfg.compute_account}
#SBATCH --time={walltime}
#SBATCH --nodes={nodes}
diff --git a/cases/icon-art-global-test/config.py b/cases/icon-art-global-test/config.py
deleted file mode 100644
index 5c7e98e7..00000000
--- a/cases/icon-art-global-test/config.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-"""
-Configuration file for the 'icon-art-global' case with ICON-ART
-"""
-
-# GENERAL SETTINGS ===========================================================
-user = os.environ['USER']
-if user == 'jenkins':
- compute_account = 'g110'
-elif os.path.exists(os.environ['HOME'] + '/.acct'):
- with open(os.environ['HOME'] + '/.acct', 'r') as file:
- compute_account = file.read().rstrip()
-else:
- compute_account = os.popen("id -gn").read().splitlines()[0]
-compute_host = 'daint'
-compute_queue = 'normal'
-constraint = 'gpu' # 'mc'
-
-model = 'icon-art-global'
-restart_step = 24 # hours
-
-# Number of tasks per node
-ntasks_per_node = 36 if constraint == 'mc' else 12
-
-# Case name = pathname in cases/
-casename = os.path.basename(os.path.dirname(os.path.realpath(__file__)))
-
-# Root directory of the sourcecode of the chain (where run_chain.py is)
-chain_src_dir = os.getcwd()
-
-# Root directory of the working space of the chain
-work_root = os.path.join(chain_src_dir, 'work')
-
-# Case directory
-case_dir = os.path.join(chain_src_dir, 'cases', casename)
-
-# -----------------------------------------------------------
-# SIMULATION
-# -----------------------------------------------------------
-
-# Executable
-icon_bin = os.path.join(chain_src_dir, 'src', 'icon-art', 'bin', 'icon')
-
-# eccodes
-eccodes_dir = os.path.join(chain_src_dir, 'input', 'eccodes_definitions')
-
-# Paths for namelists and slurm runscript templates
-# icon_runjob = os.path.join(case_dir, 'icon_runjob_withoutart.cfg')
-icon_runjob = os.path.join(case_dir, 'icon_runjob.cfg')
-icon_era5_inijob = os.path.join(case_dir, 'icon_era5_inicond.sh')
-icon_era5_nudgingjob = os.path.join(case_dir, 'icon_era5_nudging.sh')
-icon_species_inijob = os.path.join(case_dir, 'icon_species_inicond.sh')
-icon_species_nudgingjob = os.path.join(case_dir, 'icon_species_nudging.sh')
-
-# Number of hours between two output data
-output_writing_step = 12 # TO MODIFY
-
-# Initial conditios
-era5_inicond = False # TO MODIFY
-species_inicond = True
-species2restart = ['TROH']
-
-# Nudging (meteorological and tracers)
-era5_global_nudging = False
-species_global_nudging = False
-species2nudge = []
-nudging_step = 12
-
-# Walltimes and domain decomposition
-if compute_queue == "normal":
- icon_walltime = "00:30:00"
- icon_np_tot = 2
-elif compute_queue == "debug":
- icon_walltime = "00:30:00"
- icon_np_tot = 2
-
-# -----------------------------------------------------------
-# INPUT DATA
-# -----------------------------------------------------------
-# ART settings-----------------------------------------------
-input_root = os.path.join(chain_src_dir, 'input', model)
-art_input_folder = os.path.join(input_root, 'art')
-
-input_files = {
- 'inicond_filename': ['era2icon_R2B03_2022060200.nc', 'icbc'],
- 'map_file_nudging': ['map_file.nudging', 'icbc'],
- 'dynamics_grid_filename': ["iconR2B03-DOM01.nc", 'grid'],
- 'radiation_grid_filename': ["iconR2B03-DOM01.nc", 'grid'],
- 'extpar_filename': ["extpar_iconR2B03-DOM01.nc", 'grid'],
- 'cldopt_filename': ['ECHAM6_CldOptProps.nc', 'rad'],
- 'lrtm_filename': ['rrtmg_lw.nc', 'rad'],
- 'oh_molec_filename': ['oh_gcp2022_icongrid.nc', 'chemistry'],
- 'pntSrc_xml_filename': ['point-sources.xml', 'config'],
- 'chemtracer_xml_filename': ['tracers.xml', 'config'],
-}
-
-# -----------------------------------------------------------
-# Additional settings derived from constants
-# -----------------------------------------------------------
-
-# Nudge type (global or nothing)
-nudge_type = 2 if era5_global_nudging else 0
-
-# Time step for global nudging in seconds
-nudging_step_seconds = nudging_step * 3600
-
-# Prescribed initial conditions for CH4, CO and/or OH
-iart_init_gas = 4 if species_inicond else 0
diff --git a/cases/icon-art-global-test/config.yaml b/cases/icon-art-global-test/config.yaml
index 2344a8df..2fe3f096 100644
--- a/cases/icon-art-global-test/config.yaml
+++ b/cases/icon-art-global-test/config.yaml
@@ -1,6 +1,6 @@
# Configuration file for the 'icon-art-global-test' case with ICON
-model: icon-art-global
+workflow: icon-art-global
constraint: gpu
run_on: cpu
compute_queue: normal
@@ -28,6 +28,11 @@ species_global_nudging: False
species2nudge: []
nudging_step: 6
+walltime:
+ prepare_icon: '00:15:00'
+ prepare_art_global: '00:10:00'
+ icon: '00:05:00'
+
era5:
inicond: False
global_nudging: False
@@ -59,7 +64,7 @@ input_files:
pntSrc_xml_filename: ./input/icon-art-global/config/point-sources.xml
icon:
- binary_file: ./src/icon-art/bin/icon
+ binary_file: ./ext/icon-art/bin/icon
runjob_filename: icon_runjob.cfg
era5_inijob: icon_era5_inicond.sh
era5_nudgingjob: icon_era5_nudging.sh
@@ -67,7 +72,6 @@ icon:
species_nudgingjob: icon_species_nudging.sh
output_writing_step: 6
compute_queue: normal
- walltime: '00:10:00'
np_tot: 4
np_io: 1
np_restart: 1
diff --git a/cases/icon-art-global-test/icon_runjob.cfg b/cases/icon-art-global-test/icon_runjob.cfg
index 1e324a59..d241a8cf 100644
--- a/cases/icon-art-global-test/icon_runjob.cfg
+++ b/cases/icon-art-global-test/icon_runjob.cfg
@@ -1,13 +1,13 @@
#!/usr/bin/env bash
-#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}"
+#SBATCH --job-name=icon
#SBATCH --account={cfg.compute_account}
-#SBATCH --time={cfg.icon_walltime}
+#SBATCH --time={cfg.walltime_icon}
#SBATCH --nodes={cfg.icon_np_tot}
#SBATCH --ntasks-per-node={cfg.ntasks_per_node}
#SBATCH --partition={cfg.compute_queue}
#SBATCH --constraint={cfg.constraint}
#SBATCH --hint=nomultithread
-#SBATCH --output={logfile}
+#SBATCH --output={cfg.logfile}
#SBATCH --open-mode=append
#SBATCH --chdir={cfg.icon_work}
@@ -388,22 +388,12 @@ EOF
# ----------------------------------------------------------------------
# run the model!
# ----------------------------------------------------------------------
- srun ./icon.exe
-
-
-
-# ! output_nml: specifies an output stream --------------------------------------
-# &output_nml
-# filetype = 4 ! output format: 2=GRIB2, 4=NETCDFv2
-# dom = -1 ! write all domains
-# output_bounds = 0., 2678400., 3600. ! start, end, increment
-# steps_per_file = 1 ! number of steps per file
-# mode = 1 ! 1: forecast mode (relative t-axis), 2: climate mode (absolute t-axis)
-# include_last = .TRUE.
-# output_filename = 'ICON-ART'
-# filename_format = '{cfg.icon_output}/_latlon_' ! file name base
-# remap = 1 ! 1: remap to lat-lon grid
-# reg_lon_def = -179.,2,179
-# reg_lat_def = 90.,-1,-90.
-# ml_varlist = 'z_ifc','z_mc','pres','pres_sfc','qc','rh','rho','temp','u','v','w','group:ART_CHEMISTRY',
-# /
+handle_error(){{
+ # Check for invalid pointer error at the end of icon-art
+ if grep -q "free(): invalid pointer" {cfg.logfile} && grep -q "clean-up finished" {cfg.logfile}; then
+ exit 0
+ else
+ exit 1
+ fi
+}}
+srun ./{cfg.icon_execname} || handle_error
diff --git a/cases/icon-art-oem-test/config.yaml b/cases/icon-art-oem-test/config.yaml
index 5a3d8511..a50fbacc 100644
--- a/cases/icon-art-oem-test/config.yaml
+++ b/cases/icon-art-oem-test/config.yaml
@@ -1,6 +1,6 @@
# Configuration file for the 'icon-art-oem-test' case with ICON
-model: icon-art-oem
+workflow: icon-art-oem
constraint: gpu
run_on: cpu
compute_queue: normal
@@ -21,6 +21,13 @@ filename_format: _DOM_
lateral_boundary_grid_order: lateral_boundary
art_input_folder: ./input/icon-art-oem/ART
+walltime:
+ prepare_icon: '00:10:00'
+ icontools: '00:30:00'
+ prepare_art: '00:10:00'
+ prepare_art_oem: '00:10:00'
+ icon: '00:30:00'
+
meteo:
dir: ./input/meteo
prefix: ifs_
@@ -34,6 +41,8 @@ chem:
nameformat: '%Y%m%d_%H'
suffix: .grb
inc: 3
+ remap_tracers:
+ CH4_BG: TRCH4_chemtr
icontools_runjobs:
- icontools_remap_ic_runjob.cfg
@@ -60,7 +69,7 @@ input_files:
oem_monthofyear_nc: ./input/icon-art-oem/OEM/monthofyear.nc
icon:
- binary_file: ./src/icon-art/bin/icon
+ binary_file: ./ext/icon-art/bin/icon
runjob_filename: icon_runjob.cfg
compute_queue: normal
walltime: '00:10:00'
diff --git a/cases/icon-art-oem-test/icon_runjob.cfg b/cases/icon-art-oem-test/icon_runjob.cfg
index 3e69720c..883c8d86 100644
--- a/cases/icon-art-oem-test/icon_runjob.cfg
+++ b/cases/icon-art-oem-test/icon_runjob.cfg
@@ -1,13 +1,13 @@
#!/usr/bin/env bash
-#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}"
+#SBATCH --job-name=icon
#SBATCH --account={cfg.compute_account}
-#SBATCH --time={cfg.icon_walltime}
+#SBATCH --time={cfg.walltime_icon}
#SBATCH --nodes={cfg.icon_np_tot}
#SBATCH --ntasks-per-node={cfg.ntasks_per_node}
#SBATCH --partition={cfg.compute_queue}
#SBATCH --constraint={cfg.constraint}
#SBATCH --hint=nomultithread
-#SBATCH --output={logfile}
+#SBATCH --output={cfg.logfile}
#SBATCH --open-mode=append
#SBATCH --chdir={cfg.icon_work}
@@ -368,4 +368,12 @@ EOF
# ----------------------------------------------------------------------
# run the model!
# ----------------------------------------------------------------------
- srun ./icon.exe
+handle_error(){{
+ # Check for invalid pointer error at the end of icon-art
+ if grep -q "free(): invalid pointer" {cfg.logfile} && grep -q "clean-up finished" {cfg.logfile}; then
+ exit 0
+ else
+ exit 1
+ fi
+}}
+srun ./{cfg.icon_execname} || handle_error
diff --git a/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg
index f81d9bdd..1f454071 100644
--- a/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg
+++ b/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="iconsub_{cfg.startdate_sim_yyyymmddhh}"
+#SBATCH --job-name=iconsub
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg
index 4d80d10c..48fb9f3a 100644
--- a/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg
+++ b/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}"
+#SBATCH --job-name=iconremap_ic_chem
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -e -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg
index 7cfdb530..e704afb8 100644
--- a/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg
+++ b/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="iconremap_{cfg.startdate_sim_yyyymmddhh}"
+#SBATCH --job-name=iconremap_ic
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg
index be948240..e629494b 100644
--- a/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg
+++ b/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}"
+#SBATCH --job-name=iconremap_lbc
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -e -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg
index 3bc183c3..38dc6b07 100644
--- a/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg
+++ b/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="iconremap_lbc_{cfg.startdate_sim_yyyymmddhh}"
+#SBATCH --job-name=iconremap_lbc
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/cases/icon-test/config.yaml b/cases/icon-test/config.yaml
index e1d58782..06065e42 100644
--- a/cases/icon-test/config.yaml
+++ b/cases/icon-test/config.yaml
@@ -1,6 +1,6 @@
-# Configuration file for the 'icon-test' case with ICON
+# Configuration file for the 'icon-async-test' case with ICON
-model: icon
+workflow: icon
constraint: gpu
run_on: cpu
compute_queue: normal
@@ -18,6 +18,11 @@ output_filename: NWP_LAM
filename_format: _DOM_
lateral_boundary_grid_order: lateral_boundary
+walltime:
+ prepare_icon: '00:10:00'
+ icontools: '00:30:00'
+ icon: '00:30:00'
+
meteo:
dir: ./input/meteo
prefix: ifs_
@@ -41,10 +46,9 @@ input_files:
map_file_ana: ./input/icon/mapping/map_file.ana
icon:
- binary_file: ./src/icon/bin/icon
+ binary_file: ./ext/icon/bin/icon
runjob_filename: icon_runjob.cfg
compute_queue: normal
- walltime: '00:10:00'
np_tot: 8
np_io: 1
np_restart: 1
diff --git a/cases/icon-test/icon_runjob.cfg b/cases/icon-test/icon_runjob.cfg
index 09ff2c3a..88c8b735 100755
--- a/cases/icon-test/icon_runjob.cfg
+++ b/cases/icon-test/icon_runjob.cfg
@@ -1,13 +1,13 @@
#!/usr/bin/env bash
-#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}"
+#SBATCH --job-name=icon
#SBATCH --account={cfg.compute_account}
-#SBATCH --time={cfg.icon_walltime}
+#SBATCH --time={cfg.walltime_icon}
#SBATCH --nodes={cfg.icon_np_tot}
#SBATCH --ntasks-per-node={cfg.ntasks_per_node}
#SBATCH --partition={cfg.compute_queue}
#SBATCH --constraint={cfg.constraint}
#SBATCH --hint=nomultithread
-#SBATCH --output={logfile}
+#SBATCH --output={cfg.logfile}
#SBATCH --open-mode=append
#SBATCH --chdir={cfg.icon_work}
@@ -342,4 +342,4 @@ EOF
# ----------------------------------------------------------------------
# run the model!
# ----------------------------------------------------------------------
- srun ./icon.exe
+srun ./{cfg.icon_execname} || handle_error
diff --git a/cases/icon-test/icontools_remap_00_lbc_runjob.cfg b/cases/icon-test/icontools_remap_00_lbc_runjob.cfg
index f81d9bdd..1f454071 100755
--- a/cases/icon-test/icontools_remap_00_lbc_runjob.cfg
+++ b/cases/icon-test/icontools_remap_00_lbc_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="iconsub_{cfg.startdate_sim_yyyymmddhh}"
+#SBATCH --job-name=iconsub
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/cases/icon-test/icontools_remap_ic_runjob.cfg b/cases/icon-test/icontools_remap_ic_runjob.cfg
index 7cfdb530..e704afb8 100755
--- a/cases/icon-test/icontools_remap_ic_runjob.cfg
+++ b/cases/icon-test/icontools_remap_ic_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="iconremap_{cfg.startdate_sim_yyyymmddhh}"
+#SBATCH --job-name=iconremap_ic
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg b/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg
index 3bc183c3..38dc6b07 100755
--- a/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg
+++ b/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-#SBATCH --job-name="iconremap_lbc_{cfg.startdate_sim_yyyymmddhh}"
+#SBATCH --job-name=iconremap_lbc
#SBATCH --account={cfg.compute_account}
#SBATCH --chdir={cfg.icon_work}
#SBATCH --partition={cfg.compute_queue}
@@ -17,7 +17,7 @@ set -x
export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions
-. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh
+. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh
spack load icontools
#-----------------------------------------------------------------------------
diff --git a/config.py b/config.py
new file mode 100644
index 00000000..18293554
--- /dev/null
+++ b/config.py
@@ -0,0 +1,609 @@
+from subprocess import run, CalledProcessError
+import os
+import yaml
+from datetime import timedelta
+
+from jobs import tools
+from pathlib import Path
+
+
+class Config():
+
+ def __init__(self, casename):
+ """Initialize an instance of the Config class.
+
+ Initializes an instance of the Config class with user-specific
+ and default attributes. The class represents a processing chain for a
+ particular case, and its attributes are populated based on the provided
+ `casename`.
+
+ Parameters
+ ----------
+ casename : str
+ The identifier for the case, typically specifying the configuration
+ and settings to be used in the processing chain.
+
+ Attributes
+ ----------
+ user_name : str
+ The username of the current user, obtained from the 'USER' environment variable.
+ email : str
+ The user's email address, initially set to None and updated using the `set_email` method.
+ casename : str
+ The specified case name for the processing chain.
+ chain_src_dir : str
+ The source directory for the processing chain, typically the current working directory.
+ case_path : str
+ The path to the case directory under 'cases/' for the specified `casename`.
+ work_root : str
+ The root directory for processing chain execution, typically located under the source directory.
+
+ Notes
+ -----
+ The method also loads user-defined attributes from the configuration file,
+ sets specific settings based on the node type ('gpu' or 'mc'), and initializes
+ other instance-specific attributes.
+ """
+ # Global attributes (initialized with default values)
+ self.user_name = os.environ['USER']
+ self.set_email()
+ self.casename = casename
+ self.set_account()
+
+ self.chain_src_dir = Path.cwd()
+ self.case_path = self.chain_src_dir / 'cases' / self.casename
+ self.work_root = self.chain_src_dir / 'work'
+
+ # User-defined attributes from config file
+ self.load_config_file()
+
+ # Set case root
+ self.case_root = self.work_root / self.casename
+ self.log_file = self.case_root / "chain_status.log"
+
+ # Set workflow and async attributes and initiate job ids dict
+ self.set_workflow()
+
+ # Specific settings based on the node type ('gpu' or 'mc')
+ self.set_node_info()
+
+ def load_config_file(self):
+ """Load configuration settings from a YAML file and set them as attributes.
+
+ This method reads the configuration settings from a YAML file located in
+ the 'cases/casename' directory and sets them as attributes of the instance.
+
+ Returns
+ -------
+ Config
+ The same `Config` instance with configuration settings as attributes.
+
+ Raises
+ ------
+ FileNotFoundError
+ If the specified configuration file or case directory is not found.
+
+ Notes
+ -----
+ If the configuration file does not exist, the method will attempt to suggest
+ a similar case directory based on a Levenshtein distance comparison with
+ existing case directories. The method directly assigns values from the
+ configuration file to instance attributes for easy access.
+ """
+ cfg_file = Path('cases', self.casename, 'config.yaml').resolve()
+
+ if not cfg_file.is_file():
+ all_cases = [
+ path.name for path in os.scandir('cases') if path.is_dir()
+ ]
+ closest_name = min([(tools.levenshtein(self.casename, name), name)
+ for name in all_cases],
+ key=lambda x: x[0])[1]
+ raise FileNotFoundError(
+ f"Case-directory '{self.casename}' not found, did you mean '{closest_name}'?"
+ )
+
+ try:
+ with cfg_file.open('r') as yaml_file:
+ cfg_data = yaml.load(yaml_file, Loader=yaml.FullLoader)
+ except FileNotFoundError:
+ raise FileNotFoundError(
+ f"No file 'config.yaml' in {cfg_file.parent}")
+
+ # Directly assign values to instance attributes
+ for key, value in cfg_data.items():
+ setattr(self, key, value)
+
+ def set_account(self):
+ """Set the compute account based on user information.
+
+ This method determines the compute account to be used based on the user's
+ name and system configuration.
+
+ Notes
+ -----
+ - If the user name is 'jenkins', the compute account is set to 'g110' for
+ Jenkins testing.
+ - If an account is specified in the user's '~/.acct' file, it will be used
+ as the compute account.
+ - If neither of the above conditions is met, the standard account is
+ determined using the 'id -gn' command.
+ """
+ if self.user_name == 'jenkins':
+ # g110 account for Jenkins testing
+ self.compute_account = 'g110'
+ elif (p := Path.home() / '.acct').exists():
+ # Use account specified in ~/.acct file
+ with p.open('r') as file:
+ self.compute_account = file.read().rstrip()
+ else:
+ # Use standard account
+ self.compute_account = os.popen("id -gn").read().splitlines()[0]
+
+ def set_node_info(self):
+ """Set node-specific information based on configuration settings.
+
+ This method configures node-specific settings, such as the number of tasks
+ per node and CUDA-related environment variables, based on the provided
+ configuration settings in the instance.
+
+ Returns
+ -------
+ Config
+ The same `Config` instance with updated node-specific attributes.
+
+ Raises
+ ------
+ ValueError
+ If the 'constraint' or 'run_on' configuration values are invalid.
+ """
+ if self.constraint == 'gpu':
+ if hasattr(self, 'icon'):
+ if self.run_on == 'gpu':
+ self.ntasks_per_node = 1
+ elif self.run_on == 'cpu':
+ self.ntasks_per_node = 12
+ else:
+ raise ValueError(
+ "Invalid value for 'run_on' in the configuration."
+ "It should be either 'gpu' or 'cpu'.")
+ else:
+ self.ntasks_per_node = 12
+ self.mpich_cuda = ('export MPICH_RDMA_ENABLED_CUDA=1\n'
+ 'export MPICH_G2G_PIPELINE=256\n'
+ 'export CRAY_CUDA_MPS=1\n')
+ elif self.constraint == 'mc':
+ self.ntasks_per_node = 36
+ self.mpich_cuda = ''
+ else:
+ raise ValueError(
+ "Invalid value for 'constraint' in the configuration."
+ "It should be either 'gpu' or 'mc'.")
+
+ def set_workflow(self):
+ """set workflow and async attr, initiate job ids dict"""
+ # If a workflow name is specified, load from workflows.yaml
+ if isinstance(self.workflow, str):
+ self.workflow_name = self.workflow
+ with open('workflows.yaml') as file:
+ workflows = yaml.safe_load(file)
+ self.workflow = workflows[self.workflow_name]
+ # Otherwise, use custom workflow from config.yaml directly
+ elif isinstance(self.workflow, dict):
+ self.workflow_name = 'custom'
+ else:
+ raise InvalidWorkflowType(
+ "Invalid workflow type. Must be either a string or a dictionary."
+ )
+
+ assert 'dependencies' in self.workflow, "Missing 'dependencies' in workflow. Exiting."
+
+ # Initiate empty job ids dictionnary so that it can be filled in later
+ self.job_ids = {'current': {}, 'previous': {}}
+
+ def set_restart_step_hours(self):
+ """Set the restart step in hours.
+
+ Converts the 'restart_step' attribute, which is in ISO8601 duration format,
+ to hours and stores the result in the 'restart_step_hours' attribute.
+
+ Returns
+ -------
+ Config
+ The same `Config` instance with the 'restart_step_hours' attribute set.
+ """
+ self.restart_step_hours = int(
+ tools.iso8601_duration_to_hours(self.restart_step))
+
+ def set_email(self):
+ """Set the user's email address based on system configuration.
+
+ This method determines the user's email address based on the user's name
+ and system configuration.
+
+ Returns
+ -------
+ Config
+ The same `Config` instance with the `user_mail` attribute set.
+
+ Notes
+ -----
+ - If the user name is 'jenkins', the user's email address is set to None.
+ - If an email address is specified in the user's '~/.forward' file, it will
+ be used as the user's email address.
+ - If neither of the above conditions is met, the user's email address is set
+ to None.
+ """
+ if self.user_name == 'jenkins':
+ self.user_mail = None
+ elif (p := Path.home() / '.forward').exists():
+ with p.open('r') as file:
+ self.user_mail = file.read().rstrip()
+ else:
+ self.user_mail = None
+
+ def print_config(self):
+ """Print the configuration attributes and their values.
+
+ This method displays the configuration attributes and their corresponding
+ values in a formatted manner. Lists and dictionaries within the configuration
+ are also displayed with appropriate indentation.
+
+ Notes
+ -----
+ - The maximum column width for the attribute names is automatically determined.
+ - The method prints the attribute name, its type, and its value.
+ - If an attribute is a list, it is displayed with each item indented.
+ - If an attribute is a dictionary, it is also displayed with each key-value
+ pair indented.
+ """
+ # max_col_width = max(len(key) for key in vars(self)) + 1
+ max_col_width = 27
+
+ print("\nConfiguration:")
+ print(f"{'Attribute':<{max_col_width}} Type Value")
+ print("-" * 80)
+ for key, value in vars(self).items():
+ if isinstance(value, list):
+ # If the value is a list, format it with indentation
+ print(f"{key:<{max_col_width}} list")
+ for item in value:
+ item_type = "Path" if type(
+ item).__name__ == "PosixPath" else type(item).__name__
+ print(f" - {item:<{max_col_width-4}} {item_type}")
+ elif isinstance(value, dict):
+ # If the value is a dictionary, format it as before
+ print(f"{key:<{max_col_width}} dict")
+ for sub_key, sub_value in value.items():
+ sub_value_type = "Path" if type(
+ sub_value).__name__ == "PosixPath" else type(
+ sub_value).__name__
+ print(
+ f" - {sub_key:<{max_col_width-4}} {sub_value_type:<4} {sub_value}"
+ )
+ else:
+ # Standard output
+ key_type = type(key).__name__
+ print(f"{key:<{max_col_width}} {key_type:<4} {value}")
+
+ def convert_paths_to_absolute(self, dct=None):
+ """Convert relative file paths to absolute paths in the configuration.
+
+ Recursively convert all strings starting with './' in the instance
+ attributes to absolute paths.
+ """
+ if dct is None:
+ self.convert_paths_to_absolute(dct=vars(self))
+ else:
+ for k, v in dct.items():
+ if isinstance(v, dict):
+ self.convert_paths_to_absolute(dct=v)
+ elif isinstance(v, str) and v.startswith('./'):
+ dct[k] = Path(v).absolute()
+
+ def create_vars_from_dicts(self, dct=None, key=None):
+ """Create instance attributes from dictionary entries in the configuration.
+
+ This method recursively iterates through the instance's attribute dictionary
+ and checks for dictionary values. For each dictionary encountered, it creates
+ new instance attributes by concatenating the original attribute name and the
+ dictionary key, and assigns the corresponding values.
+ """
+ if dct is None:
+ self.create_vars_from_dicts(dct=vars(self).copy())
+ else:
+ for k, v in dct.items():
+ subkey = k if key is None else key + '_' + k
+ if isinstance(v, dict):
+ self.create_vars_from_dicts(dct=v, key=subkey)
+ else:
+ setattr(self, subkey, v)
+
+ def get_chunk_list(self):
+ self.chunk_list = []
+ for startdate_sim in tools.iter_hours(self.startdate, self.enddate,
+ self.restart_step_hours):
+ enddate_sim = startdate_sim + timedelta(
+ hours=self.restart_step_hours)
+ if 'spinup' in self.workflow['features'] and hasattr(
+ self, 'spinup'):
+ if startdate_sim > self.startdate:
+ startdate_sim = startdate_sim - timedelta(
+ hours=self.spinup)
+
+ startdate_sim_yyyymmddhh = startdate_sim.strftime("%Y%m%d%H")
+ enddate_sim_yyyymmddhh = enddate_sim.strftime("%Y%m%d%H")
+ chunk_id = f"{startdate_sim_yyyymmddhh}_{enddate_sim_yyyymmddhh}"
+
+ if enddate_sim > self.enddate:
+ continue
+
+ self.chunk_list.append(chunk_id)
+
+ def get_previous_chunk_id(self, current_chunk_id):
+ """Get the previous chunk ID based on the current `chunk_id`"""
+ index = self.chunk_list.index(current_chunk_id)
+ if index > 0:
+ self.chunk_id_prev = self.chunk_list[index - 1]
+ else:
+ self.chunk_id_prev = None
+
+ def get_dep_ids(self, job_name, add_dep=None):
+ """Get dependency job ids for `job_name`"""
+ # Initial list of dependencies
+ if add_dep is not None:
+ if isinstance(add_dep, int):
+ dep_id_list = [add_dep]
+ else:
+ try:
+ dep_id_list = list(add_dep)
+ except TypeError:
+ print("add_dep must be an iterable")
+ else:
+ dep_id_list = []
+
+ # Add job dependencies
+ if deps := self.workflow['dependencies'].get(job_name):
+ for stage in 'previous', 'current':
+ if dep_stage := deps.get(stage):
+ for job in dep_stage:
+ # Could be that dep job id does not exist, e.g.,
+ # if dep job is deactivated or it's the first chunk
+ if dep_id := self.job_ids[stage].get(job):
+ dep_id_list.extend(dep_id)
+ return dep_id_list
+
+ def get_dep_cmd(self, job_name, add_dep=None):
+ """Generate the part of the sbatch command that sepcifies dependencies for `job_name`"""
+ # Needed for nested run_chain.py
+ if self.force_sync:
+ return '--wait'
+
+ if dep_ids := self.get_dep_ids(job_name, add_dep=add_dep):
+ dep_str = ':'.join(map(str, dep_ids))
+ return f'--dependency=afterok:{dep_str}'
+
+ # job_name has no dependencies but still belongs to an async workflow
+ # so don't use --wait
+ return None
+
+ def submit(self, job_name, script, add_dep=None):
+ """Submit job with dependencies"""
+ script_path = Path(script)
+ sbatch_cmd = ['sbatch', '--parsable']
+ if dep_cmd := self.get_dep_cmd(job_name, add_dep=add_dep):
+ sbatch_cmd.append(dep_cmd)
+ sbatch_cmd.append(script_path.name)
+
+ try:
+ result = run(sbatch_cmd,
+ cwd=script_path.parent,
+ capture_output=True,
+ check=True)
+ except CalledProcessError as e:
+ with open(self.logfile('a')) as f:
+ f.write(e)
+ raise (e)
+
+ job_id = int(result.stdout)
+ print(f' └── Submitted batch job {job_id}')
+
+ if job_name not in self.job_ids['current']:
+ self.job_ids['current'][job_name] = [job_id]
+ else:
+ self.job_ids['current'][job_name].append(job_id)
+
+ return job_id
+
+ def submit_basic_python(self, job_name):
+ """Create an sbatch script to launch basic python jobs individually.
+ Use run_chain.py arguments to submit those jobs.
+ """
+ # Build job script
+ walltime = getattr(self, 'walltime', {}).get(job_name, "00:30:00")
+ script_lines = [
+ '#!/usr/bin/env bash',
+ f'#SBATCH --job-name={job_name}',
+ '#SBATCH --nodes=1',
+ f'#SBATCH --time={walltime}',
+ f'#SBATCH --output={self.logfile}',
+ '#SBATCH --open-mode=append',
+ f'#SBATCH --account={self.compute_account}',
+ f'#SBATCH --partition={self.compute_queue}',
+ f'#SBATCH --constraint={self.constraint}',
+ '',
+ f'cd {self.chain_src_dir}',
+ f'./run_chain.py {self.casename} -j {job_name} -c {self.chunk_id} -f -s --no-logging',
+ '',
+ ]
+
+ job_path = self.chain_root / 'job_scripts'
+ job_path.mkdir(parents=True, exist_ok=True)
+ job_file = job_path / f'{job_name}.sh'
+ with open(job_file, mode='w') as job_script:
+ job_script.write('\n'.join(script_lines))
+
+ # Submit job
+ self.submit(job_name, job_file)
+
+ def wait_for_previous(self):
+ """Wait for all jobs of the previous stage to be finished.
+
+ Do this by submitting a fake job depending on all jobs from the
+ 'previous' stage.
+ """
+ dep_ids = []
+ for ids in self.job_ids['previous'].values():
+ dep_ids.extend(ids)
+ if dep_ids:
+ job_file = self.case_root / 'submit.wait.slurm'
+ log_file = self.case_root / 'wait.log'
+ dep_str = ':'.join(map(str, dep_ids))
+ script_lines = [
+ '#!/usr/bin/env bash', '#SBATCH --job-name="wait"',
+ '#SBATCH --nodes=1', '#SBATCH --time=00:01:00',
+ f'#SBATCH --output={log_file}',
+ f'#SBATCH --account={self.compute_account}',
+ f'#SBATCH --partition={self.compute_queue}',
+ f'#SBATCH --constraint={self.constraint}',
+ f'#SBATCH --dependency=afterany:{dep_str}', '', '# Do nothing',
+ 'exit 0'
+ ]
+ with open(job_file, mode='w') as wait_job:
+ wait_job.write('\n'.join(script_lines))
+
+ run(['sbatch', '--wait', job_file], check=True)
+
+ def cycle(self):
+ """Cycle to next chunk
+
+ - Wait for previous chunk to finish
+ - print summary of previous chunk jobs
+ - Check for success of all previous jobs
+ - cycle job ids and chunk id"""
+
+ # - ML -
+ # - This method could do more of the cycling, like dates
+ # - The config object could host more info and cycle it instead
+ # of recomputing stuff like previous chunk dates
+
+ # Skip if very first chunk
+ if self.job_ids['previous']:
+ # Wait for previous chunk to be done
+ self.wait_for_previous()
+
+ # Get and print previous chunk Slurm summary
+ self.get_previous_slurm_summary()
+ self.print_previous_slurm_summary()
+
+ # Check for success of all previous jobs
+ self.check_previous_chunk_success()
+
+ # Cycle info
+ self.job_ids['previous'] = self.job_ids['current']
+ self.previous_chunk_id = self.chunk_id
+
+ # Monitor last chunk
+ if self.enddate_sim >= self.enddate:
+ self.wait_for_previous()
+ self.get_previous_slurm_summary()
+ self.print_previous_slurm_summary()
+ self.check_previous_chunk_success()
+
+ @staticmethod
+ def get_job_info(job_id,
+ slurm_keys=['JobName', 'Elapsed', 'ExitCode'],
+ parse=True):
+ """Retrieve slurm job info as given by sacct
+
+ if parse is True, return the raw string from sacct else parse info into a dict.
+ All possible keys are given by `sacct --helpformat`"""
+
+ # Get info from sacct
+ cmd = ["sacct", f"--format={', '.join(slurm_keys)}", "-j", str(job_id)]
+
+ if parse:
+ cmd.append("--parsable")
+
+ info_str = run(cmd, capture_output=True, check=True).stdout
+
+ if parse:
+ # Parse in a dictionnary before returning
+ # The inner most process should be the relevant one, hence the 1 index
+ slurm_info = info_str.split(b'\n')[1].split(b'|')
+ return {k: v.decode() for k, v in zip(slurm_keys, slurm_info)}
+ else:
+ return info_str.decode()
+
+ def get_previous_slurm_summary(self,
+ info_keys=[
+ 'JobName', 'JobID', 'Partition',
+ 'NNodes', 'State', 'Start', 'End',
+ 'Elapsed'
+ ]):
+ """get slurm info summary or all jobs of previous chunk"""
+
+ # Store requested keys in object
+ self.info_keys = info_keys
+
+ # Get job info for all jobs
+ self.slurm_info = {}
+ for job_name in self.jobs:
+ for job_id in self.job_ids['previous'][job_name]:
+ self.slurm_info[job_name] = []
+ self.slurm_info[job_name].append(
+ self.get_job_info(job_id, slurm_keys=info_keys,
+ parse=True))
+
+ def print_previous_slurm_summary(self):
+ # Width of printed slurm piece of information
+ info_width = {
+ 'JobName': 13,
+ 'JobID': 8,
+ 'Partition': 9,
+ 'NNodes': 3,
+ 'State': 14,
+ 'Start': 13,
+ 'End': 13,
+ 'Elapsed': 9
+ }
+
+ # Build table header and line format
+ headers = []
+ hlines = []
+ formats = []
+ for k in self.info_keys:
+ j = info_width[k]
+ kh = '#N' if k == 'NNodes' else k
+ formats.append(f"{{{k}:>{j}.{j}}}")
+ headers.append(f"{kh:>{j}.{j}}")
+ hlines.append("-" * j)
+
+ table_header = '\n'.join((' '.join(headers), ' '.join(hlines)))
+ line_format = " ".join(formats)
+
+ with self.log_file.open('a') as f:
+ f.write(f"Job summary for chunk {self.previous_chunk_id}\n")
+ f.write(table_header)
+ f.write('\n')
+ for job_name in self.jobs:
+ for info in self.slurm_info[job_name]:
+ f.write(line_format.format(**info))
+ f.write('\n')
+ f.write('\n')
+
+ def check_previous_chunk_success(self):
+ status = 0
+ failed_jobs = []
+ for job_name, info_list in self.slurm_info.items():
+ for info in info_list:
+ if info['State'] != 'COMPLETED':
+ failed_jobs.append(job_name)
+ status += 1
+
+ if status > 0:
+ raise RuntimeError(f"The following job(s) failed: {failed_jobs}")
+
+
+class InvalidWorkflowType(Exception):
+ pass
diff --git a/config/models.yaml b/config/models.yaml
deleted file mode 100644
index a19f188c..00000000
--- a/config/models.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-models:
- cosmo:
- jobs: ['prepare_data', 'int2lm', 'cosmo', 'post_cosmo']
- features: ['restart', 'spinup']
- cosmo-ghg:
- jobs: ['prepare_data', 'emissions', 'biofluxes', 'oem', 'online_vprm',
- 'int2lm', 'post_int2lm', 'cosmo', 'post_cosmo']
- features: ['restart', 'tracers', 'spinup']
- cosmo-art:
- jobs: ['prepare_data', 'emissions', 'obs_nudging', 'photo_rate',
- 'int2lm', 'cosmo', 'post_cosmo']
- features: ['nesting', 'spinup']
- icon:
- jobs: ['prepare_data', 'icon']
- features: ['restart']
- icon-art:
- jobs: ['prepare_data', 'icon']
- features: ['restart']
- icon-art-global:
- jobs: ['prepare_data', 'icon']
- features: ['restart']
- icon-art-oem:
- jobs: ['prepare_data', 'icon']
- features: ['restart']
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 00000000..8ec08ace
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,21 @@
+# Makefile for Sphinx documentation
+
+SPHINXOPTS = -c ./
+SPHINXBUILD = sphinx-build
+SOURCEDIR = .
+BUILDDIR = build
+
+.PHONY: help clean html
+
+help:
+ @echo "Please use \`make ' where is one of"
+ @echo " html to make standalone HTML files"
+
+clean:
+ rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(SPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
diff --git a/docs/_static/processing-chain-favicon.ico b/docs/_static/processing-chain-favicon.ico
new file mode 100644
index 00000000..95d31d5d
Binary files /dev/null and b/docs/_static/processing-chain-favicon.ico differ
diff --git a/docs/_static/processing-chain-logo-notext.png b/docs/_static/processing-chain-logo-notext.png
new file mode 100644
index 00000000..9f870bf7
Binary files /dev/null and b/docs/_static/processing-chain-logo-notext.png differ
diff --git a/docs/_static/processing-chain-logo-small.png b/docs/_static/processing-chain-logo-small.png
new file mode 100644
index 00000000..dc81a858
Binary files /dev/null and b/docs/_static/processing-chain-logo-small.png differ
diff --git a/docs/_static/processing-chain-logo.png b/docs/_static/processing-chain-logo.png
new file mode 100644
index 00000000..c7e2dc93
Binary files /dev/null and b/docs/_static/processing-chain-logo.png differ
diff --git a/docs/_static/processing_chain_workflow_icon_art.png b/docs/_static/processing_chain_workflow_icon_art.png
new file mode 100644
index 00000000..4a5164d0
Binary files /dev/null and b/docs/_static/processing_chain_workflow_icon_art.png differ
diff --git a/docs/code-structure.rst b/docs/code-structure.rst
new file mode 100644
index 00000000..9a7ee7ab
--- /dev/null
+++ b/docs/code-structure.rst
@@ -0,0 +1,55 @@
+.. _code-structure-section:
+
+Code Structure
+--------------
+
+The Processing Chain code is structured as follows:
+
+.. code-block:: bash
+
+ $ tree -L 3 -F --dirsfirst
+ .
+ ├── cases/ # folder where all cases are stored
+ │ ├── cosmo-ghg-spinup-test/ # COSMO-GHG test case with spinup restart
+ │ │ ├── config.yaml # case configuration file
+ │ │ ├── *.cfg # templates for namelists & batch jobs
+ │ │ └── *.csv # CSV files with tracer information
+ │ ├── cosmo-ghg-test/ # COSMO-GHG testcase with standard restart
+ │ │ ├── config.yaml
+ │ │ ├── *.cfg
+ │ │ └── *.csv
+ │ ├── icon-art-global-test/ # ICON-ART test case (global domain)
+ │ │ ├── config.yaml
+ │ │ ├── icon_runjob.cfg # template for ICON-ART runjob
+ │ │ ├── *.sh # pre-processing scripts
+ │ │ └── mypartab
+ │ ├── icon-art-oem-test/ # ICON-ART test case with online emissions
+ │ │ ├── config.yaml
+ │ │ └── *.cfg
+ │ └── icon-test/ # ICON test case
+ │ ├── config.yaml
+ │ └── *.cfg
+ ├── docs/ # folder for Sphinx documentation
+ │ ├── _static/ # folder for static assets
+ │ │ ├── custom.css # custom CSS styles
+ │ │ └── *.png|ico # additional image assets
+ │ ├── tables/ # folder for tables used in documentation
+ │ │ └── *.csv # CSV files containing table data
+ │ ├── conf.py # configuration file for the Sphinx builder
+ │ └── *.rst # documentation files (reStructuredText)
+ ├── env/
+ │ └── environment.yml # conda environment file
+ ├── ext/ # folder for other code (spack, models, etc.)
+ ├── jenkins/ # automated Jenkins testing
+ │ ├── scripts/
+ │ │ └── *.sh # individual Shell scripts for testing
+ │ └── Jenkinsfile # text file containing the Jenkins pipeline
+ ├── jobs/
+ │ ├── tools/
+ │ │ └── *.py # tool scripts
+ │ └── *.py # job scripts
+ ├── LICENSE # license file
+ ├── README.md # README file
+ ├── config.py # file containing the Config class
+ ├── run_chain.py # main script
+ └── workflows.yaml # file to store workflows with job dependencies
diff --git a/docs/conf.py b/docs/conf.py
index 98475ac5..740fc891 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,11 +12,17 @@
import os
import sys
+sys.path.insert(0, os.path.abspath('..')) # Root directory
+sys.path.insert(0, os.path.abspath('../jobs')) # Jobs subfolder
+sys.path.insert(0, os.path.abspath('../jobs/tools')) # Tools subfolder
+
# -- Project information ------------------------------------------------
project = 'Processing Chain'
-copyright = '2018-2023, C2SM'
+copyright = '2018-2024, C2SM'
author = 'Processing Chain Administrators'
+version = 'v3.1'
+release = 'v3.1'
# -- General configuration ------------------------------------------------
@@ -24,11 +30,15 @@
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
- 'sphinx.ext.autosectionlabel', 'sphinx.ext.todo', 'sphinx_rtd_theme',
- 'sphinx_copybutton', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig',
- 'sphinx.ext.viewcode', 'sphinx.ext.napoleon'
+ 'sphinx.ext.autodoc', 'sphinx.ext.autosectionlabel', 'sphinx.ext.todo',
+ 'sphinx_rtd_theme', 'sphinx_copybutton', 'sphinx.ext.mathjax',
+ 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon'
]
+# autodoc options
+autodoc_member_order = 'bysource'
+toc_object_entries_show_parents = 'all'
+
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@@ -55,8 +65,10 @@
# further. For a list of options available for each theme, see the
# documentation.
#
+html_logo = '_static/processing-chain-logo-small.png'
+html_favicon = '_static/processing-chain-favicon.ico'
html_theme_options = {
- 'logo_only': False,
+ 'logo_only': True,
'display_version': True,
'prev_next_buttons_location': 'bottom',
'style_external_links': False,
@@ -65,7 +77,7 @@
# Toc options
'collapse_navigation': False,
'sticky_navigation': True,
- 'navigation_depth': 4,
+ 'navigation_depth': 2,
'includehidden': True,
'titles_only': False
}
diff --git a/docs/config.rst b/docs/config.rst
index ecfdb124..f38ef329 100644
--- a/docs/config.rst
+++ b/docs/config.rst
@@ -1,333 +1,190 @@
.. _config-section:
-The processing chain uses cases to describe a simulation. A case is a
-subdirectory in ``cases/``, containing a :ref:`config.py` and several
-:ref:`namelists` (for example ``int2lm_INPUT.cfg``) which define the
-simulation.
+The Processing Chain uses cases to describe a simulation. A case is a
+subdirectory in ``cases/``, containing a ``config.yaml`` and several
+`namelist` (e.g., ``int2lm_INPUT.cfg``) and `runscripts` (e.g.,
+``icon_runjob.cfg``) :ref:`templates`,
+which define the simulation.
-.. _config.py:
+.. _config.yaml:
Configuration File
------------------
-The configuration file contains most of the information that the :ref:`jobs-section` need to prepare and run the simulation, for example the location of the input data.
-This configuration-file is imported as a module in ``run_chain.py``, and therefore
-it can contain python expression which are evaluated at runtime.
-
-..
- Creating these tables by hand is a pain. Use the script/csv in the tables/ director
-
-|
-
-General Variables in ``config.py``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| **Name** | **Description** | **Used in** |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``target`` | **COSMO** or **COSMOART**, defaults to **COSMO** if omitted | all |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``subtarget`` | **NONE** or **SPINUP**, defaults to **NONE** if omitted | all |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``constraint`` | Either 'gpu' or 'mc'. Controls on which nodes int2lm and cosmo run. Make sure your executables are compiled for the chosen architecure. | :func:`jobs.cosmo.main`, :func:`jobs.icon.main`, :func:`jobs.int2lm.main` |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``mail_address`` | The processing-chain sends encountered errors to this address | :func:`jobs.tools.__init__.send_mail` |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``compute_host`` | On which infrastructure the processing chain is run. Usually 'daint' | :func:`jobs.post_cosmo.main`, :func:`jobs.icon.main`, :func:`jobs.reduce_output.main` |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``compute_queue`` | Either 'debug' or 'normal' | :func:`jobs.int2lm.main`, :func:`jobs.cosmo.main`, :func:`jobs.icon.main` |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``compute_account`` | Which project the simulation belongs to | :func:`jobs.int2lm.main`, :func:`jobs.cosmo.main`, :func:`jobs.icon.main` |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``chain_src_dir`` | Path to the root of the chain | all |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``casename`` | Name of the simulation, the same as the directory-name the ``config.py``-file is in | all |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``input_root`` | Path to zhe root of the input-direcetory tree | all |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``output_root`` | Path to where the results, logs and nameslists are copied to after the simulation is done | :func:`jobs.post_cosmo.main`, :func:`jobs.icon.main` |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-| ``work_root`` | Path to where the processing chain copies the input files to and starts the simulation from | all |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+
-
-|
-
-Variables for :func:`jobs.biofluxes.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-| **Name** | **Description** | **Target** |
-+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-| ``vprm_dir`` | Path to the directory containing bioflux-files | COSMO |
-+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-| ``vprm_prefix`` | Prefix of the the bioflux-files. Filenames are assumed to be ``{vprm_prefix}YYYYMMDDHH.nc``. If multiple bioflux-datasets exists, this is a list of prefixes. All files are assumed to be in ``vprm_dir`` | COSMO |
-+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-
-|
-
-Variables for :func:`jobs.cosmo.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| **Name** | **Description** | **Target** |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_bin`` | Path to the cosmo(art) executable | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_namelist`` | Path to the cosmo namelist-templates, ending in ``cosmo_INPUT_``. The ending, for example ``IO`` or ``ORG``, is added by the cosmo-job | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_runjob`` | Path to the cosmo runjob-template | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_walltime`` | Requested time for the cosmo slurm-batchjob | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_nodes`` | Number of nodes for the cosmo slurm-batchjob | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``ntasks_per_node`` | Number of tasks per node | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_np_x`` | Number of processes in the x direction | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_np_y`` | Number of processes in the y direction | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_np_io`` | Number of processes for IO | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``cosmo_np_tot`` | Total number of processes | COSMO, COSMO-ART |
-+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-
-
-Variables for :func:`jobs.emissions.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| **Name** | **Description** | **Target** |
-+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``emissions_dir`` | Path to the input directory where the emissions-files are found. If there are multiple emissions-datasets, this is a list of paths to the directories of the datasets. | COSMO, COSMO-ART |
-+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``emis_gridname`` | Prefix of the emissions-files. List for multiple datasets. Emission-filenames are assumed to be ``{emis_gridname}YYYYMMDD.nc`` | COSMO, COSMO-ART |
-+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-
-|
-
-Variables for :func:`jobs.icon.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| **Name** | **Description** | **Target** |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``icon_bin`` | Path to the ICON executable | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``icon_runjob`` | Path to the ICON runjob template | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``icon_namelist_master`` | Path to the ICON master namelist template | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``icon_namelist_nwp`` | Path to the ICON NWP namelist template | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``icon_walltime`` | Requested time for the ICON slurm-batchjob | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``icon_np_tot`` | Total number of processes | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``input_root_mapping`` | Path to the ICON dictionairy files | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``map_file_ana`` | Name of the ICON dictionary file for analysis data | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``latbc_filename`` | Name of the lateral boundary files (including key-words) for LAM-simulations | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-| ``inidata_filename`` | Name of the intial condition file | ICON |
-+------------------------------+--------------------------------------------------------------------------------------+--------------------+
-
-|
-
-Variables for :func:`jobs.int2lm.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| **Name** | **Description** | **Target** |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``meteo_prefix`` | Prefix of the meteo-files. Meteo-filenames are assumed to be ``{meteo_prefix}YYMMDD`` | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_extpar_dir`` | Path to the directory containing the extpar-file for int2lm | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_extpar_file`` | The name of the int2lm extpar-file | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_bin`` | Path to the int2lm executable | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_namelist`` | Path to the int2lm namelist-template | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_runjob`` | Path to the int2lm runjob-template | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_walltime`` | Requested time for the int2lm slurm-batchjob | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_nodes`` | Number of nodes for the int2lm slurm-batchjob | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_ntasks_per_node`` | Number of tasks per node | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_np_x`` | Number of processes in the x direction | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_np_y`` | Number of processes in the y direction | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_np_tot`` | Total number of processes | COSMO, COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_libgrib_dir`` | Path to the libgrib-directory used by the int2lm-executable | COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_lu_dir`` | Path to the directory containing the landuse file | COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_lu_file`` | Filename (inlcuding ending) of the landuse file | COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_pft_dir`` | Path to the directory containing the plant functional type file | COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``int2lm_pft_dir`` | Filename (including ending) of the plant functional type file | COSMO-ART |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-| ``post_int2lm_species`` | List of variables for the post_int2lm-job | COMSO |
-+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+
-
-|
-
-Variables for :func:`jobs.prepare_data.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| **Name** | **Description** | **Target** |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``meteo_dir`` | Path to the directory where the meteo-files are found. For a nested run, this is the casename of the mother-run. In that case, ``meteo_prefix`` and ```meteo_inc`` can be omitted | COSMO, COSMO-ART, ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``meteo_prefix`` | Prefix of the meteo-files. Meteo-filenames are assumed to be ``{meteo_prefix}YYMMDD`` | COSMO, COSMO-ART, ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``meteo_nameformat`` | Format for meteo files | COSMO, COSMO-ART, ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``meteo_inc`` | Increment in hours between meteo-files | COSMO, COSMO-ART, ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``icontools_dir`` | Path to the icontools executables | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``iconremap_bin`` | Name of the iconremap executable | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``iconsub_bin`` | Name of the iconsub executable | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``icontools_parameter`` | Dictionary containing names of the icontools runscript and namelist templates | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``mozart_file_orig`` | Path to input-file for mozart-files | COSMO-ART |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``mozart_dir_proc`` | Path to the processed mozart-files. Processed mozart-files are stored here, if there are files found here then ``file_dir_orig`` is not used | COSMO-ART |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``mozart_inc`` | Increment in hours between mozart-files | COSMO-ART |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``mozart_prefix`` | Prefix of the mozart-files | COSMO-ART |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``cams_dir_orig`` | Path to input-directory for CAMS-files | COSMO |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``cams_dir_proc`` | Path to the processed CAMS-files. Processed CAMS-files are stored here, if there are files found here then ``cams_dir_orig`` is not used | COSMO |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``cams_parameters`` | Parameters for the processing of CAMS-data, more information in ``config.py`` of example case | COSMO |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``icontools_dir`` | Path to the icontools executables | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``iconremap_bin`` | Name of the iconremap executable | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``iconsub_bin`` | Name of the iconsub executable | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``icontools_parameter`` | Dictionary containing names of the icontools runscript and namelist templates | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``input_root_chem`` | Path to the chemical input files | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``chem_prefix`` | Prefix of the chemical input files | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``chem_nameformat`` | Name format of the chemical input files | ICON |
-+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-
-|
-
-Variables for :func:`jobs.oae.main`
+The case-dependent configuration file ``/config.yaml`` contains most
+of the information that the :ref:`jobs` need to prepare
+and run the simulation, for example the location of the input data.
+This configuration file is loaded in ``run_chain.py`` as an instance
+of the ``Config()`` class in ``config.py``.
+
+Configuration Variables
+~~~~~~~~~~~~~~~~~~~~~~~
+
+This is a non-exhaustive list containing the most important configuration variables:
+
++------------------------+-------------------------------------------------------------------------+
+| Variable | Description |
++========================+=========================================================================+
+|| ``case_path`` || The path to the case directory under ``cases/`` for the specified |
+|| || casename. |
++------------------------+-------------------------------------------------------------------------+
+| ``casename`` | The name of the case. Derived from the folder name under ``case_path``. |
++------------------------+-------------------------------------------------------------------------+
+|| ``chain_src_dir`` || The source directory for the processing chain, typically the current |
+|| || working directory. |
++------------------------+-------------------------------------------------------------------------+
+| ``compute_account`` | The compute account to be used based on user information. |
++------------------------+-------------------------------------------------------------------------+
+| ``constraint`` | The computational constraint (``gpu`` or ``mc``). |
++------------------------+-------------------------------------------------------------------------+
+|| ``email`` || The user's email address, initially set to None and updated using the |
+|| || set_email method. |
++------------------------+-------------------------------------------------------------------------+
+|| ``enddate`` || The end date of the simulation in ISO 8601 format |
+|| || (``YYYY-MM-DDTHH:mm:ssZ``). |
++------------------------+-------------------------------------------------------------------------+
+| ``jobs`` | List of job-names to be executed. |
++------------------------+-------------------------------------------------------------------------+
+| ``log_finished_dir`` | The directory for finished log files. |
++------------------------+-------------------------------------------------------------------------+
+| ``log_working_dir`` | The directory for working log files. |
++------------------------+-------------------------------------------------------------------------+
+| ``ntasks_per_node`` | The number of tasks per node, based on the node type. |
++------------------------+-------------------------------------------------------------------------+
+| ``restart_step`` | The restart step in ISO 8601 format. |
++------------------------+-------------------------------------------------------------------------+
+| ``restart_step_hours`` | The restart step in hours, derived from the ``restart_step`` attribute. |
++------------------------+-------------------------------------------------------------------------+
+| ``run_on`` | The architecture the model runs on (``cpu`` or ``gpu``). |
++------------------------+-------------------------------------------------------------------------+
+| ``spinup`` | Spin-up duration in hours. Activates spinup behavior if set. |
++------------------------+-------------------------------------------------------------------------+
+|| ``startdate`` || The start date of the simulation in ISO 8601 format |
+|| || (``YYYY-MM-DDTHH:mm:ssZ``). |
++------------------------+-------------------------------------------------------------------------+
+| ``user_mail`` | The user's email address, determined based on system configuration. |
++------------------------+-------------------------------------------------------------------------+
+|| ``user_name`` || Your email address to receive notifications. Either provide it |
+|| || directly here or in ``~/.forward``. |
++------------------------+-------------------------------------------------------------------------+
+| ``workflow`` | The name of the workflow from ``workflows.yaml`` or a self-defined one. |
++------------------------+-------------------------------------------------------------------------+
+|| ``work_root`` || The working directory where all output is stored. Should be somewhere |
+|| || on ``$SCRATCH$``. By default, it is set to ``/work``. |
++------------------------+-------------------------------------------------------------------------+
+
+
+Variables to Set in ``config.yaml``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| **Name** | **Description** | **Target** |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| ``oae_dir`` | Path to where the online emission datasets are found | COMSO, ICON |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| ``oae_gridded_emissions_nc`` | Name of the main emission netCDF file (online emissions) | COSMO, ICON |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| ``oae_vertical_profiles_nc`` | Name of the netCDF file for vertical profile scaling (online emissions) | COMSO, ICON |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| ``oae_hourofday_nc`` | Name of the netCDF file for 'hour of day' scaling (online emissions) | COSMO, ICON |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| ``oae_hourofyear_nc`` | Name of the netCDF file for 'hour of year' scaling (online emissions) | COSMO, ICON |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| ``oae_dayofweek_nc`` | Name of the netCDF file for 'day of week' scaling (online emissions) | COSMO, ICON |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-| ``oae_monthofyear_nc`` | Name of the netCDF file for 'month of year' scaling (online emissions) | COSMO, ICON |
-+----------------------------------+---------------------------------------------------------------------------------+--------------------+
-
-|
-
-Variables for :func:`jobs.obs_nudging.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| **Name** | **Description** | **Target** |
-+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``bs_nudging_dir`` | Path to where the nudging-datasets are found | COSMO, COSMO-ART |
-+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``obs_nudging_prefixes`` | List of prefixes of nuding-files to copy | COMSO, COSMO-ART |
-+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``obs_nudging_date_format`` | Date format of the nudging-files. If the obs-nudging-file is called ``cdfin_temp-20150204000000-20150205000000``, the dateformat is ``-%Y%m%d%H%M%S`` | COSMO, COSMO-ART |
-+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-
-|
-
-Variables for :func:`jobs.octe.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+
-| **Name** | **Description** | **Target** |
-+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+
-| ``octe_lambdas`` | Path to the netCDF file containing the lambda values used for generating online carbontracker ensembles | COSMO |
-+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+
-| ``octe_maps`` | Path to the netCDF file containing the regional and pft-type maps for generating online carbontracker ensembles | COSMO |
-+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+
-
-|
-
-Variables for :func:`jobs.online_vprm.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-| **Name** | **Description** | **Target** |
-+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-| ``online_vprm_dir`` | Path to the directory containing netCDF files with pre-processed MODIS reflectance (``modis_filename``) and vegetation class fraction (``vegetation_filename``) data | COSMO |
-+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-| ``modis_filename`` | Name of the pre-processed MODIS netCDF file | COSMO |
-+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-| ``vegetation_filename`` | Name of the pre-processed vegetation class fraction netCDF file | COSMO |
-+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+
-
-|
-
-Variables for :func:`jobs.photo_rate.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-------------------------+------------------------------------------+--------------------+
-| **Name** | **Description** | **Target** |
-+-------------------------+------------------------------------------+--------------------+
-| ``photo_rate_file`` | Path to the photolysis-rate file | COSMO-ART |
-+-------------------------+------------------------------------------+--------------------+
-
-|
-
-Variables for :func:`jobs.post_int2lm.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-----------------------------+---------------------------------------------------+--------------------+
-| **Name** | **Description** | **Target** |
-+-----------------------------+---------------------------------------------------+--------------------+
-| ``post_int2lm_species`` | List of variables for the post_int2lm-job | COSMO |
-+-----------------------------+---------------------------------------------------+--------------------+
-
-|
-
-Variables for :func:`jobs.reduce_output.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| **Name** | **Description** | **Target** |
-+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``output_levels`` | Number of output levels (starting from ground level) for the ``reduce_output`` job, defaults to ``-1`` if omitted, which means that all levels are kept | COSMO, COSMO-ART |
-+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-| ``convert_gas`` | Switch to convert the unit of trace gases to ppm or ppb. This leads to a better compression rate. Defaults to ``True`` of omitted. | COSMO, COSMO-ART |
-+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+
-
-|
-Variables for :func:`jobs.verify_chain.main`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| **Name** | **Description** | **Target** |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``reference_dir`` | Path to the reference output | COSMO, COSMO-ART, ICON |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``output_dir`` | Path to the output of cosmo. If the :func:`jobs.post_cosmo.main` job is executed, this can be set to ``None`` and the path of the post_cosmo-job will be used | COSMO, COSMO-ART, ICON |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
-| ``values_to_check`` | Which files and variables are compared. This is a dict with a tuple of filenames as key. The first key element is the filename of the reference file, the second key element is the filename of the output-file of cosmo (usually ``lffdYYYYMMDDHH.nc`` and the value is a list of variables to compare between these two files) | COSMO, COSMO-ART, ICON |
-+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+
+Here are two examples of which general variables should be set by the user in the
+case configuration file.
+
+Header of ``config.yaml`` for the ``cosmo-ghg-spinup-test`` case
+================================================================
+
+.. code-block:: yaml
+
+ workflow: cosmo-ghg
+ constraint: gpu
+ ntasks_per_node: 12
+ restart_step: PT6H
+ spinup: 3
+ startdate: 2015-01-01T00:00:00Z
+ enddate: 2015-01-01T18:00:00Z
+
+Header of ``config.yaml`` for the ``icon-art-oem-test`` case
+============================================================
+
+.. code-block:: yaml
+
+ workflow: icon-art-oem
+ constraint: gpu
+ run_on: cpu
+ compute_queue: normal
+ ntasks_per_node: 12
+ restart_step: PT6H
+ startdate: 2018-01-01T00:00:00Z
+ enddate: 2018-01-01T12:00:00Z
+
+ eccodes_dir: ./input/eccodes_definitions
+ iconremap_bin: iconremap
+ iconsub_bin: iconsub
+ latbc_filename: ifs__lbc.nc
+ inidata_prefix: ifs_init_
+ inidata_nameformat: '%Y%m%d%H'
+ inidata_filename_suffix: .nc
+ output_filename: icon-art-oem-test
+ filename_format: _DOM_
+ lateral_boundary_grid_order: lateral_boundary
+ art_input_folder: ./input/icon-art-oem/ART
+
+Further variables
+=================
+
+Furthermore, there are additional variables to set that are tied to the individual jobs.
+These config variables themselves are dictionaries. Let's have a look at and example
+for the the ``cfg.meteo`` variable:
+
+.. code-block:: yaml
+
+ meteo:
+ dir: ./input/cosmo-ghg/meteo
+ prefix: laf
+ nameformat: laf%Y%m%d%H
+ inc: 1
+
+These config variables can be accessed via ``cfg.meteo['dir']``, ``cfg.meteo['prefix']``, etc.
+as they are Python dictionaries.
+
+.. hint::
+ In :ref:`namelist and runscript template` files
+ (see next section), this accessing does not work because of how the ``.format()``
+ method is implemented in Python. For that reason, the Processing Chain automatically
+ creates new variables in the form of ``cfg.meteo_dir``, ``cfg.meteo_prefix``, etc.
+ at the start to make them accessible for namelist and runjob templates.
+
+List of dictionary variables
+****************************
+
+The following is a list of dictionary variables that exist for the Processing Chain.
+For the individual elements of those variables, please refer to the ``config.yaml``
+files within the test cases.
+
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| Dictionary variable | Used in job |
++=======================+=====================================================================================================================================+
+| ``meteo`` | ``prepare_cosmo``, ``prepare_icon``, ``icontools``, ``int2lm``, ``icon`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``icontools_runjobs`` | ``icontools`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``input_files`` | ``prepare_icon`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``chem`` | ``prepare_icon`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``era5`` | ``prepare_icon`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``cams`` | ``prepare_cosmo`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``emissions`` | ``emissions`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``vprm`` | ``biofluxes`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``oem`` | ``oem``, ``cosmo`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``online_vprm`` | ``online_vprm`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``int2lm`` | ``prepare_cosmo``, ``emissions``, ``biofluxes``, ``octe``, ``int2lm``, ``post_int2lm``, ``cosmo``, ``post_cosmo`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``post_int2lm`` | ``post_int2lm`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``cosmo`` | ``reduce_output``, ``oem``, ``photo_rate``, ``octe``, ``check_output``, ``post_cosmo``, ``cosmo``, ``obs_nudging``, ``online_vprm`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``reduce_output`` | ``reduce_output`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``post_cosmo`` | ``post_cosmo`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``verify_chain`` | ``verify_chain`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
+| ``icon`` | ``oem``, ``prepare_icon``, ``icon`` |
++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/docs/environment.rst b/docs/environment.rst
new file mode 100644
index 00000000..58707f7a
--- /dev/null
+++ b/docs/environment.rst
@@ -0,0 +1,62 @@
+.. _environment-section:
+
+Conda Environment
+=================
+
+The following steps allow you to create and use your own virtual environment to run the Processing Chain. We recommend using a conda environment for the usage of the provided scripts. Please follow the instructions for the installation. The following steps only need to be performed once.
+
+1. Install Miniconda
+~~~~~~~~~~~~~~~~~~~~
+
+Install Miniconda as user-specific Miniconda, e.g., in your ``$HOME`` directory, which is the default location.
+
+.. note::
+ Only conda itself should be installed in your ``$HOME``. All environments should be stored in your ``$PROJECT`` directory; otherwise, you risk filling up your ``$HOME`` directory. See below for instructions.
+
+To install the latest Miniconda, type:
+
+.. code-block:: bash
+
+ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+ bash Miniconda3-latest-Linux-x86_64.sh
+
+Further details on Miniconda can be found on the `Miniconda documentation page `_.
+
+2. Create the Conda Environment
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Create a conda environment ``proc-chain`` and install the requirements:
+
+.. code-block:: bash
+
+ conda env create --prefix $PROJECT/envs/proc-chain -f env/environment.yml
+
+To be able to activate your conda environment by simply using ``conda activate proc-chain`` instead of the full path, add the following to your ``.bashrc``:
+
+.. code-block:: bash
+
+ export CONDA_ENVS_PATH=$PROJECT/envs
+
+Activate the environment (use "source activate" in case "conda activate" does not work):
+
+.. code-block:: bash
+
+ conda activate proc-chain
+
+If you already have the environment but want to update it:
+
+.. code-block:: bash
+
+ conda env update --file env/environment.yml --prune
+
+3. Store user-specific data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To register your email address and standard project account, store them in these files within your home directory:
+
+.. code-block:: bash
+
+ echo > ~/.acct
+ echo > ~/.forward
+
+These settings are optional. The Processing Chain will first check the content of those files. If desired, the corresponding variables can be overridden by setting the ``compute_account`` and ``user_mail`` variables in the ``config.yaml`` file.
diff --git a/docs/features.rst b/docs/features.rst
new file mode 100644
index 00000000..c0f14d0a
--- /dev/null
+++ b/docs/features.rst
@@ -0,0 +1,13 @@
+.. _features-section:
+
+Feature Overview
+================
+
+- Asynchronous submission of compute jobs to the HPC queue
+- Intuitive definition of job dependencies
+- Automatic cycling over time periods including folder structure creation
+- Various jobs for pre- and post-processing steps
+- Using model built-in restarts or custom spinup
+- Nested runs possible
+- Easy creation of own cases and workflows
+- Various examples for COSMO and ICON workflows available
\ No newline at end of file
diff --git a/docs/file-structure.rst b/docs/file-structure.rst
deleted file mode 100644
index 770b7e54..00000000
--- a/docs/file-structure.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-.. _file-structure-section:
-
-File Structure
---------------
-
-::
-
- + README.rst
- + run_script.py # main script
- + jobs/
- | + *.py # jobs-files
- | \ tools/ # tools-files
- + cases/ # example test cases
- | + cosmo-ghg-11km-test/ # COSMO-GHG example
- | | + config.py # user-configuration
- | | \ *.cfg # templates for namelists & batch-jobs
- | + cosmo-art-mother-test/ # COSMO-ART example (mother domain)
- | | + config.py
- | | \ *.cfg
- | + cosmo-art-nested-test/ # COSMO-ART example (nested domain)
- | | + config.py
- | | \ *.cfg
- | \ icon-test # ICON example
- | | + config.py
- | | \ *.cfg
- | \ icon-oem-test # ICON-OEM example
- | | + config.py
- | | \ *.cfg
- + docs/
- + source/ # *.rst documentation files
- \ Makefile # Makefile for generating docs
-
diff --git a/docs/flowchart.md b/docs/flowchart.md
new file mode 100644
index 00000000..202d1ac2
--- /dev/null
+++ b/docs/flowchart.md
@@ -0,0 +1,5 @@
+flowchart LR
+ prepare_icon --> icontools
+ icontools --> prepare_art
+ prepare_art --> icon_1
+ icon_1 --> icon_2
diff --git a/docs/functions.rst b/docs/functions.rst
new file mode 100644
index 00000000..4305a932
--- /dev/null
+++ b/docs/functions.rst
@@ -0,0 +1,155 @@
+.. _functions-section:
+
+Jobs
+----
+
+* :func:`jobs.biofluxes.main`
+* :func:`jobs.check_output.main`
+* :func:`jobs.cosmo.main`
+* :func:`jobs.emissions.main`
+* :func:`jobs.icon.main`
+* :func:`jobs.icontools.main`
+* :func:`jobs.int2lm.main`
+* :func:`jobs.obs_nudging.main`
+* :func:`jobs.octe.main`
+* :func:`jobs.oem.main`
+* :func:`jobs.online_vprm.main`
+* :func:`jobs.photo_rate.main`
+* :func:`jobs.post_cosmo.main`
+* :func:`jobs.post_int2lm.main`
+* :func:`jobs.prepare_cosmo.main`
+* :func:`jobs.prepare_icon.main`
+* :func:`jobs.reduce_output.main`
+* :func:`jobs.verify_chain.main`
+
+-------------------------------------------
+
+.. autofunction:: jobs.biofluxes.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.check_output.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.cosmo.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.emissions.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.icon.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.icontools.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.int2lm.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.obs_nudging.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.octe.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.oem.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.online_vprm.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.photo_rate.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.post_cosmo.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.post_int2lm.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.prepare_cosmo.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.prepare_icon.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.reduce_output.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.verify_chain.main
+
+
+Tools
+-----
+
+The tools are a collection of functions used by the jobs. Most of those
+functions are well documented and listed here. For others, one may take
+a look into ``jobs/tools`` directly.
+
+* :func:`jobs.tools.cams4int2cosmo.main`
+* :func:`jobs.tools.check_model.check_model`
+* :func:`jobs.tools.comp_nc.datasets_equal`
+* :func:`jobs.tools.ctnoaa4int2cosmo.main`
+* :func:`jobs.tools.mozart2int2lm.main`
+* :func:`jobs.tools.reduce_output_start_end.main`
+* :func:`jobs.tools.string2char.main`
+* :func:`jobs.tools.vprmsplit.main`
+* :func:`jobs.tools.write_cosmo_input_ghg.main`
+* :func:`jobs.tools.write_int2lm_input_art.main`
+
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.cams4int2cosmo.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.check_model.check_model
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.comp_nc.datasets_equal
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.ctnoaa4int2cosmo.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.mozart2int2lm.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.reduce_output_start_end.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.string2char.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.vprmsplit.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.write_cosmo_input_ghg.main
+
+-------------------------------------------
+
+.. autofunction:: jobs.tools.write_int2lm_input_art.main
diff --git a/docs/howtorun.rst b/docs/howtorun.rst
index 65004786..9d074ee7 100644
--- a/docs/howtorun.rst
+++ b/docs/howtorun.rst
@@ -1,118 +1,245 @@
-.. _script-section:
+.. _howtorun-section:
How to Run
==========
The Python file ``run_chain.py`` in the root directory is the main script of the
Processing Chain.
-It reads the user's input from the command line and from the ``config.py`` file of the
+It reads the user's input from the command line and from the ``config.yaml`` file of the
respective case.
Then it will start the Processing Chain.
Starting the Chain
------------------
-The chain has to be run with the following command: ::
+The chain has to be run with the following command:
+
+.. code-block:: bash
+
+ $ ./run_chain.py
+
+Here, ```` is the name of a directory in the ``cases/``-directory where
+there is a ``config.yaml``-file specifying the configuration, as well as templates
+for the necessary namelist files for **int2lm**, **COSMO** or **ICON**. It may also
+contain additional runscripts to be submitted via ``sbatch``.
+
+.. hint::
+ Technically, you can run several cases (instead of a single case) in one command,
+ which is useful for nested runs, for example. This can be achieved by running
+ ``./run_chain.py ``. With that, the full chain is executed for
+ ``case1`` first, and afterwards for ``case2``.
+
+There are several optional arguments available to change the behavior of the chain:
+
+.. code-block:: bash
+
+ $ ./run_chain.py -h
+
+* ``-h``, ``--help``
+ Show this help message and exit.
+* ``-j [JOB_LIST ...]``, ``--jobs [JOB_LIST ...]``
+ List of job names to be executed.
+ A job is a ``.py`` file in i``jobs/`` with a ``main()`` function, which
+ handles one aspect of the Processing Chain, for
+ example copying ``meteo`` input data or launching a
+ job for ``int2lm``. Jobs are executed in the order
+ in which they are given here. If no jobs are
+ given, default jobs will be executed as defined
+ in ``config/models.yaml``.
+* ``-f``, ``--force``
+ Force the Processing Chain to redo all specified
+ jobs, even if they have been started already or
+ were finished previously. WARNING: Only logfiles
+ get deleted, other effects of a given job
+ (copied files etc.) are simply overwritten. This
+ may cause errors or unexpected behavior.
+* ``-r``, ``--resume``
+ Resume the Processing Chain by restarting the
+ last unfinished job. WARNING: Only the logfile
+ gets deleted, other effects of a given job
+ (copied files etc.) are simply overwritten. This
+ may cause errors or unexpected behavior.
- $ python run_chain.py -j [jobs]
-
-```` is the name of a directory in the ``cases/``-directory where
-there is a ``config.py``-file specifying the configurations, as well as templates
-for the necessary namelist files for **int2lm**, **COSMO** or **ICON**.
-
-If you don't supply a joblist, the default joblist will be executed.
-
-For **COSMO**, that is ``prepare_data`` ``emissions`` ``biofluxes`` ``int2lm``
-``post_int2lm`` ``cosmo`` ``post_cosmo``,
-
-For **COSMOART** it is ``prepare_data`` ``emissions`` ``obs_nudging``
-``photo_rate`` ``int2lm`` ``cosmo`` ``post_cosmo``.
-
-For **ICON** it is ``prepare_data`` ``icon``.
-
-For **ICONART** it is ``prepare_data`` ``icon``.
-
-For **ICONARTOEM** it is ``prepare_data`` ``oae`` ``icon``.
-
-The model type can be chosen by setting the variable ``target`` in the ``config.py``-file.
-
-To run the **COSMO-GHG** example test case, run::
-
- $ python run_chain.py cosmo-ghg-11km-test 2015-01-01 0 24 -j prepare_data emissions biofluxes int2lm post_int2lm cosmo post_cosmo
-
-To run the **COSMO-ART** example case, run::
-
- $ python run_chain.py cosmo-art-mother-test cosmo-art-nested-test 2015-06-26 0 24 -j prepare_data emissions obs_nudging photo_rate int2lm cosmo post_cosmo
-
-To run the **ICON** or **ICON-ART** example cases, run::
-
- $ python run_chain.py icon-test 2018-01-01 0 24 -j prepare_data icon
-
-or::
-
- $ python run_chain.py icon-art-test 2018-01-01 0 24 -j prepare_data icon
-
-To run the **ICON-OEM** example cases, run::
-
- $ python run_chain.py icon-oem-test 2018-01-01 0 24 -j prepare_data oae icon
-
-or::
-
- $ python run_chain.py icon-oem-ensembles-test 2018-01-01 0 24 -j prepare_data oae icon
-
What it Does
------------
-The script ``run_chain.py`` reads the command-line arguments and the config-file.
-It then calls the function :func:`run_chain.restart_runs` which divides the
-simuation time according to the specified restart steps. Then it calls
-:func:`run_chain.run_chain` for each sub-run. This function sets up the directory
-structure of the chain and then starts the specified :ref:`jobs`
-sequentially.
-
-The directory structure generated by the Processing Chain for a **COSMO** run
-looks like this:::
-
- cfg.work_root/
- + output/cfg.output_root/
- \ /cfg.chain_root/
- + cfg.int2lm_base/
- | + cfg.int2lm_input/
- | + cfg.int2lm_work/
- | \ cfg.int2lm_output/
- + cfg.cosmo_base/
- | + cfg.cosmo_work/
- | + cfg.cosmo_output/
- | \ cfg.cosmo_restart_out/
- \ checkpoints/
- + cfg.log_working_dir/
- \ cfg.log_finished_dir/
+The script ``run_chain.py`` reads the command line arguments and the config file
+from the specified case.
+It then calls the function :func:`run_chain.restart_runs`, which divides the
+simulation time according to the specified restart steps. Then it calls
+:func:`run_chain.run_chunk` for each part (chunk) of the simulation workflow.
+This function sets up the directory structure of the chain and then submits the
+specified :ref:`jobs` via ``sbatch`` to the Slurm workload manager,
+taking job dependencies into account.
+
+Test Cases
+----------
+
+The following test cases are available:
+
+* ``cosmo-ghg-spinup-test``
+* ``cosmo-ghg-test``
+* ``icon-test``
+* ``icon-art-oem-test``
+* ``icon-art-global-test``
+
+To be able to run these test cases, it is necessary to provide the input data,
+to setup spack and to compile the models and tools. All this is automized via
+the script::
+
+ $ ./jenkins/scripts/jenkins.sh
+
+This will run all the individual scripts in ``jenkins/scripts/``, which
+can also be launched separately if desired.
+
+These cases undergo regulary testing to ensure that the Processing Chain runs
+correctly. A corresponding Jenkins plan is launched on a weekly basis and
+when triggered within a GitHub pull request.
+
+Directory Structure
+-------------------
+
+The directory structure generated by the Processing Chain for a ``cosmo-ghg``
+run looks like this:
+
+.. code-block:: bash
+
+ cfg.work_root/cfg.casename/
+ └── cfg.chain_root/
+ ├── checkpoints/
+ │ ├── cfg.log_working_dir/
+ │ ├── cfg.log_finished_dir/
+ ├── cfg.cosmo_base/
+ │ ├── cfg.cosmo_work/
+ │ ├── cfg.cosmo_output/
+ │ ├── cfg.cosmo_restart_out/
+ └── cfg.int2lm_base/
+ ├── cfg.int2lm_input/
+ ├── cfg.int2lm_work/
+ └── cfg.int2lm_output/
+
+As one can see, it creates working directories for both the ``int2lm`` preprocessor
+and ``cosmo``. Additionally, and this is always the case, the ``checkpoints``
+directory holds all the job logfiles. Whenever a job has successfully finished,
+the logfile is copied from the ``working`` to the ``finished`` sub-directory.
-Running the ``cosmo-ghg-11km-test``-case therefore produces the following directories:::
-
- $SCRATCH/processing_chain/
- + output/cosmo-ghg-11km-test
- \ cosmo-ghg-11km-test/2015010100_0_24/
- + int2lm/
- | + input/
- | | + emissions/
- | | + extpart/
- | | + prepare_data/
- | | \ vprm/
- | + run/
- | | + int2lm # executable
- | | + INPUT
- | | \ run.job
- | \ output/
- + cosmo/
- | + run/
- | | + cosmo # executable
- | | + INPUT_*
- | | \ run.job
- | + output/
- | \ restart/
- \ checkpoints
- + working/ # 1 logfile per started job
- \ finished # 1 logfile per finished job
-
-
+Running the ``cosmo-ghg-test`` case therefore produces the following
+directories and files (showing four levels of directories deep):
+
+.. code-block:: bash
+
+ work/cosmo-ghg-test
+ ├── 2015010100_2015010106/
+ │ ├── checkpoints/
+ │ │ ├── finished/
+ │ │ │ ├── biofluxes
+ │ │ │ ├── cosmo
+ │ │ │ ├── emissions
+ │ │ │ ├── int2lm
+ │ │ │ ├── oem
+ │ │ │ ├── online_vprm
+ │ │ │ ├── post_cosmo
+ │ │ │ ├── post_int2lm
+ │ │ │ └── prepare_cosmo
+ │ │ └── working/
+ │ │ ├── biofluxes
+ │ │ ├── cosmo
+ │ │ ├── emissions
+ │ │ ├── int2lm
+ │ │ ├── oem
+ │ │ ├── online_vprm
+ │ │ ├── post_cosmo
+ │ │ ├── post_int2lm
+ │ │ └── prepare_cosmo
+ │ ├── cosmo/
+ │ │ ├── input/
+ │ │ │ ├── oem/
+ │ │ │ └── vprm/
+ │ │ ├── output/
+ │ │ │ └── lffd*.nc
+ │ │ ├── restart/
+ │ │ │ └── lrff00060000o.nc
+ │ │ └── run/
+ │ │ ├── cosmo-ghg
+ │ │ ├── INPUT_*
+ │ │ ├── post_cosmo.job
+ │ │ ├── run.job
+ │ │ └── YU*
+ │ └── int2lm/
+ │ ├── input/
+ │ │ ├── emissions
+ │ │ ├── extpar
+ │ │ ├── icbc
+ │ │ ├── meteo
+ │ │ └── vprm
+ │ ├── output/
+ │ │ ├── laf*.nc
+ │ │ └── lbfd*.nc
+ │ └── run/
+ │ ├── INPUT
+ │ ├── INPUT_ART
+ │ ├── int2lm
+ │ ├── OUTPUT
+ │ ├── run.job
+ │ └── YU*
+ └── 2015010106_2015010112/
+ ├── checkpoints/
+ │ ├── finished/
+ │ │ ├── biofluxes
+ │ │ ├── cosmo
+ │ │ ├── emissions
+ │ │ ├── int2lm
+ │ │ ├── oem
+ │ │ ├── online_vprm
+ │ │ ├── post_cosmo
+ │ │ ├── post_int2lm
+ │ │ └── prepare_cosmo
+ │ └── working/
+ │ ├── biofluxes
+ │ ├── cosmo
+ │ ├── emissions
+ │ ├── int2lm
+ │ ├── oem
+ │ ├── online_vprm
+ │ ├── post_cosmo
+ │ ├── post_int2lm
+ │ └── prepare_cosmo
+ ├── cosmo/
+ │ ├── input/
+ │ │ ├── oem
+ │ │ └── vprm
+ │ ├── output/
+ │ │ └── lffd*.nc
+ │ ├── restart/
+ │ │ └── lrff00060000o.nc
+ │ └── run/
+ │ ├── cosmo-ghg
+ │ ├── INPUT_*
+ │ ├── post_cosmo.job
+ │ ├── run.job
+ │ └── YU*
+ └── int2lm/
+ ├── input/
+ │ ├── emissions
+ │ ├── extpar
+ │ ├── icbc
+ │ ├── meteo
+ │ └── vprm
+ ├── output/
+ │ ├── laf*.nc
+ │ └── lbfd*.nc
+ └── run/
+ ├── INPUT
+ ├── INPUT_ART
+ ├── int2lm
+ ├── OUTPUT
+ ├── run.job
+ └── YU*
+
+-------------------------------------------
+
+.. autofunction:: run_chain.run_chunk
+
+-------------------------------------------
+
+.. autofunction:: run_chain.restart_runs
diff --git a/docs/index.rst b/docs/index.rst
index 8e691530..169bf730 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,25 +3,48 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
+Processing Chain
+================
+
+The Processing Chain is a Python-based workflow tool designed to streamline
+weather and climate simulations.
+It facilitates the preparation of essential input data, submission of compute
+jobs to the queue on CSCS HPC systems, and the implementation of post-processing
+steps.
+In addition to supporting standard versions of the COSMO and ICON models,
+it is equipped to handle various model variants, notably COSMO-GHG
+(Greenhouse Gas Extension) and ICON-ART (Aerosols and Reactive Trace Gases)
+
+The Processing Chain can be easily customized to meet your specific requirements.
+This includes defining custom workflows, creating your own simulation cases,
+and integrating new jobs and auxiliary scripts.
+
.. toctree::
:maxdepth: 2
:caption: Getting Started
- file-structure
+ features
+ environment
howtorun
.. toctree::
- :maxdepth: 3
+ :maxdepth: 2
:caption: Configuration
+ code-structure
config
namelists
.. toctree::
- :maxdepth: 3
- :caption: Jobs
+ :maxdepth: 2
+ :caption: Jobs & Workflows
jobs
- tools
+
+.. toctree::
+ :maxdepth: 2
+ :caption: API
+
+ functions
diff --git a/docs/jobs.rst b/docs/jobs.rst
index 90358b7c..96cae42b 100644
--- a/docs/jobs.rst
+++ b/docs/jobs.rst
@@ -3,54 +3,70 @@
Overview
--------
-The jobs described here are available for use in the processing chain.
-For every target, you can choose from a list of available jobs.
-As some jobs depend on the result of others, the order indicated here
-has to be respected.
-
-**COSMO**:
-
-1. :func:`jobs.biofluxes.main` | :func:`jobs.prepare_data.main` |
- :func:`jobs.emissions.main` | :func:`jobs.online_vprm.main` |
- :func:`jobs.oae.main` | :func:`jobs.obs_nudging.main`
-2. :func:`jobs.int2lm.main`
-3. :func:`jobs.post_int2lm.main`
-4. :func:`jobs.octe.main`
-5. :func:`jobs.cosmo.main`
-6. :func:`jobs.check_output.main`
-7. :func:`jobs.reduce_output.main`
-8. :func:`jobs.post_cosmo.main` | :func:`jobs.verify_chain.main`
-
-**COSMOART**:
-
-1. :func:`jobs.emissions.main` | :func:`jobs.prepare_data.main` |
- :func:`jobs.photo_rate.main` | :func:`jobs.obs_nudging.main` |
- :func:`jobs.online_vprm.main` | :func:`jobs.oae.main`
-2. :func:`jobs.int2lm.main`
-3. :func:`jobs.cosmo.main`
-4. :func:`jobs.check_output.main`
-5. :func:`jobs.reduce_output.main`
-6. :func:`jobs.post_cosmo.main` | :func:`jobs.verify_chain.main`
-
-**ICON**:
-
-1. :func:`jobs.prepare_data.main`
-2. :func:`jobs.icon.main`
-
-**ICONART**:
-
-1. :func:`jobs.prepare_data.main`
-2. :func:`jobs.icon.main`
-
-
-**ICONARTOEM**:
-
-1. :func:`jobs.prepare_data.main`
-2. :func:`jobs.oae.main`
-3. :func:`jobs.icon.main`
-
-
-Adding new jobs
+Jobs have to be part of the respective workflow. They are submitted via ``sbatch``
+to the Slurm workload manager.
+
+The order of job submission is based on the list given in ``workflows.yaml``
+(or in ``config.yaml`` in case a custom, user-defined workflow is used.)
+
+Let's have a look at the ``icon-art`` example:
+
+.. code-block:: yaml
+
+ icon-art:
+ features:
+ - restart
+ jobs:
+ - prepare_icon
+ - icontools
+ - prepare_art
+ - icon
+
+This workflow consists of four jobs: ``prepare_icon``, ``icontools``,
+``prepare_art`` and ``icon``.
+
+These jobs will be submitted, however, they are not starting at the same time,
+because some of them depend on others:
+
+.. code-block:: yaml
+
+ dependencies:
+ icontools:
+ current:
+ - prepare_icon
+ prepare_art:
+ current:
+ - icontools
+ icon:
+ current:
+ - prepare_icon
+ - icontools
+ - prepare_art
+ previous:
+ - icon
+
+Since ``icontools`` depends on ``prepare_icon``, and ``prepare_art`` depends
+on ``icontools``, the order of execution is ``prepare_icon`` --> ``icontools``
+--> ``prepare_art``. Note that if we had another job in there without dependencies,
+it would run in parallel to the others.
+
+Since ``icon`` depends on all other jobs, it will be executed last. Note that
+these dependencies are all listed under the ``current`` keyword, targeting
+the current chunk. For ``icon``, there is an additional ``previous`` keyword.
+This means that an ``icon`` simulation will always wait until the simulation
+from the last chunk is finished (because the restart file has to be available).
+
+Another effect of this workflow definition is that the ``prepare_icon``,
+``icontools`` and ``prepare_art`` jobs will also be launched for the next chunk,
+as they are not dependent on their previous ones.
+
+.. figure:: _static/processing_chain_workflow_icon_art.png
+ :alt: Flowchart for the ``icon-art`` workflow.
+
+ Flowchart for the ``icon-art`` workflow.
+
+
+Adding New Jobs
---------------
Adding a new job to the chain is simple:
@@ -59,26 +75,5 @@ Adding a new job to the chain is simple:
a function called ``main`` which takes the same arguments as every other job.
Make sure the function is documented with a docstring.
2. Import it in ``jobs/__init__.py`` to make it accessible to ``run_chain.py``.
-3. Add the function to the documentation. You find the file describing this page
- at ``doc/source/jobs.rst``.
-
-List of available jobs
-----------------------
-
-* :func:`jobs.biofluxes.main`
-* :func:`jobs.check_output.main`
-* :func:`jobs.cosmo.main`
-* :func:`jobs.emissions.main`
-* :func:`jobs.icon.main`
-* :func:`jobs.int2lm.main`
-* :func:`jobs.prepare_data.main`
-* :func:`jobs.oae.main`
-* :func:`jobs.obs_nudging.main`
-* :func:`jobs.octe.main`
-* :func:`jobs.online_vprm.main`
-* :func:`jobs.photo_rate.main`
-* :func:`jobs.post_cosmo.main`
-* :func:`jobs.post_int2lm.main`
-* :func:`jobs.reduce_output.main`
-* :func:`jobs.verify_chain.main`
+3. Add the job to your workflow.
diff --git a/docs/namelists.rst b/docs/namelists.rst
index d76cfa40..20da47ca 100644
--- a/docs/namelists.rst
+++ b/docs/namelists.rst
@@ -1,25 +1,34 @@
-.. _namelists:
+.. _namelists-section:
-Namelist Templates
-------------------
+Namelist and Runscript Templates
+--------------------------------
-Namelists for **int2lm** and **COSMO** are generated using templates which are also located in
-the cases-directory. These templates are essentially textfiles containing "normal" namelist
-parameters and python-variables in curly braces.
+The namelists and run jobs for **int2lm** and **COSMO**, as well as for **icontools** and **ICON** are dynamically generated
+using templates located in the ``cases/`` directory. These templates are essentially
+text files containing "normal" namelist parameters alongside Python variables enclosed in curly braces.
-These files get read by their respective job.
-The resulting string is formatted using python's ``.format()``-function which replaces the
-python-variables with their value. The formatted strings are then saved as namelist-files in the
-run-directory of their respective jobs and then read by the executable. ::
+During runtime, these template files are read by their respective jobs.
+The resulting strings are formatted through Python's ``.format()`` function, facilitating the
+substitution of Python variables with their corresponding value.
+Subsequently, the formatted strings are then saved as the actual namelist and run scripts in the
+run directory of their respective jobs.
- cases/example/example_namelist.cfg -> [read file] ->
- "exvar = '{cfg.prefix}{cfg.suffix}'" -> ["".format(cfg)] ->
- "exvar = 'pref_suff.nc'" -> [write to disk] ->
- int2lm/run/example_namelist
+.. code-block::
-The same procedure is done for the slurm-runscripts for **int2lm** and **COSMO**.
+ cases/example/example_namelist.cfg -> [read file] ->
+ "namelist_var = '{cfg.prefix}{cfg.suffix}'" -> ["".format(cfg)] ->
+ "namelist_var = 'pref_suff.nc'" -> [write to disk] ->
+ int2lm/run/example_namelist
A special case is ``INPUT_ART`` for **int2lm** and ``INPUT_GHG`` for **COSMO** . These namelists are
generated by :func:`jobs.tools.write_int2lm_input_art.main` and :func:`jobs.tools.write_cosmo_input_ghg.main`
from ``.csv``-files containing all necessary information.
+----------------------------------------------------
+
+.. autofunction:: jobs.tools.write_int2lm_input_art.main
+
+----------------------------------------------------
+
+.. autofunction:: jobs.tools.write_cosmo_input_ghg.main
+
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 1d8a9189..d2894daf 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,2 +1,14 @@
-psphinxTheme
-karma-sphinx-theme
\ No newline at end of file
+cdo
+nco
+netcdf4
+numpy
+f90nml
+cartopy
+matplotlib
+scipy
+pillow
+xarray
+cdsapi
+sphinx
+sphinx_rtd_theme
+sphinx-copybutton
diff --git a/docs/tools.rst b/docs/tools.rst
deleted file mode 100644
index 9aebd600..00000000
--- a/docs/tools.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. _tools-section:
-
-Tools
-=====
-
-The tools are a collection of functions used by the jobs. Most of those
-functions are well documented and listed here. For others, one may take
-a look into ``jobs/tools`` directly.
-
-Conversion Functions
---------------------
-
-These functions are used by the job :func:`jobs.prepare_data.main`. They convert data into
-a format usable by **int2lm**
diff --git a/env/environment.yml b/env/environment.yml
index 06795923..09381fd4 100644
--- a/env/environment.yml
+++ b/env/environment.yml
@@ -4,7 +4,6 @@ channels:
- defaults
dependencies:
- python=3.9
- - sphinx_rtd_theme
- cdo
- nco
- netcdf4
@@ -13,8 +12,10 @@ dependencies:
- cartopy
- matplotlib
- scipy
- - sphinx
- pillow
- xarray
- - dask
- cdsapi
+ - scikit-learn
+ - sphinx
+ - sphinx_rtd_theme
+ - sphinx-copybutton
diff --git a/src/.gitkeep b/ext/.gitkeep
similarity index 100%
rename from src/.gitkeep
rename to ext/.gitkeep
diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile
index 21c95360..07a8219b 100644
--- a/jenkins/Jenkinsfile
+++ b/jenkins/Jenkinsfile
@@ -117,7 +117,7 @@ pipeline {
steps {
sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh
conda activate proc-chain
- . ${WORKSPACE}/src/spack-c2sm/setup-env.sh
+ . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh
./jenkins/scripts/test_cosmo-ghg.sh'''
}
post {
@@ -134,7 +134,7 @@ pipeline {
steps {
sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh
conda activate proc-chain
- . ${WORKSPACE}/src/spack-c2sm/setup-env.sh
+ . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh
./jenkins/scripts/test_cosmo-ghg-spinup.sh'''
}
post {
@@ -151,7 +151,7 @@ pipeline {
steps {
sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh
conda activate proc-chain
- . ${WORKSPACE}/src/spack-c2sm/setup-env.sh
+ . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh
./jenkins/scripts/test_icon.sh'''
}
post {
@@ -168,7 +168,7 @@ pipeline {
steps {
sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh
conda activate proc-chain
- . ${WORKSPACE}/src/spack-c2sm/setup-env.sh
+ . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh
./jenkins/scripts/test_icon-art-oem.sh'''
}
post {
@@ -185,7 +185,7 @@ pipeline {
steps {
sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh
conda activate proc-chain
- . ${WORKSPACE}/src/spack-c2sm/setup-env.sh
+ . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh
./jenkins/scripts/test_icon-art-global.sh'''
}
post {
diff --git a/jenkins/scripts/build_cosmo-ghg.sh b/jenkins/scripts/build_cosmo-ghg.sh
index 027acc5a..2167fec7 100755
--- a/jenkins/scripts/build_cosmo-ghg.sh
+++ b/jenkins/scripts/build_cosmo-ghg.sh
@@ -13,7 +13,7 @@ function error {
BRANCH=c2sm
GIT_REMOTE=git@github.com:C2SM-RCM/cosmo-ghg.git
-pushd src
+pushd ext
# Activate spack
. spack-c2sm/setup-env.sh
diff --git a/jenkins/scripts/build_icon-art.sh b/jenkins/scripts/build_icon-art.sh
index 36cf15cb..8ba5225e 100755
--- a/jenkins/scripts/build_icon-art.sh
+++ b/jenkins/scripts/build_icon-art.sh
@@ -13,7 +13,7 @@ function error {
BRANCH=art
GIT_REMOTE=git@github.com:C2SM/icon.git
-pushd src
+pushd ext
# Activate spack
. spack-c2sm/setup-env.sh
diff --git a/jenkins/scripts/build_icon.sh b/jenkins/scripts/build_icon.sh
index bff225c9..15e1d82a 100755
--- a/jenkins/scripts/build_icon.sh
+++ b/jenkins/scripts/build_icon.sh
@@ -13,7 +13,7 @@ function error {
BRANCH=main
GIT_REMOTE=git@github.com:C2SM/icon.git
-pushd src
+pushd ext
# Activate spack
. spack-c2sm/setup-env.sh
diff --git a/jenkins/scripts/build_int2lm.sh b/jenkins/scripts/build_int2lm.sh
index 2af403a3..1ecca336 100755
--- a/jenkins/scripts/build_int2lm.sh
+++ b/jenkins/scripts/build_int2lm.sh
@@ -13,7 +13,7 @@ function error {
BRANCH=c2sm-features
GIT_REMOTE=git@github.com:C2SM-RCM/int2lm.git
-pushd src
+pushd ext
# Activate spack
. spack-c2sm/setup-env.sh
diff --git a/jenkins/scripts/jenkins.sh b/jenkins/scripts/jenkins.sh
index d6298e19..8eb4d26e 100755
--- a/jenkins/scripts/jenkins.sh
+++ b/jenkins/scripts/jenkins.sh
@@ -23,9 +23,9 @@ eval "$(conda shell.bash hook)"
conda activate proc-chain
# Setup spack
-if [[ -d src/spack-c2sm ]]; then
+if [[ -d ext/spack-c2sm ]]; then
echo spack folder already exists - activating spack...
- . src/spack-c2sm/setup-env.sh
+ . ext/spack-c2sm/setup-env.sh
else
echo building spack...
./jenkins/scripts/setup-spack.sh
@@ -41,7 +41,7 @@ else
fi
# Build int2lm
-if [[ -f src/int2lm/test/testsuite/int2lm ]]; then
+if [[ -f ext/int2lm/test/testsuite/int2lm ]]; then
echo int2lm executable already exists - skipping build...
else
echo building int2lm...
@@ -49,7 +49,7 @@ else
fi
# Build COSMO-GHG
-if [[ -f src/cosmo-ghg/cosmo/ACC/cosmo_gpu ]]; then
+if [[ -f ext/cosmo-ghg/cosmo/ACC/cosmo_gpu ]]; then
echo cosmo executable already exists - skipping build.
else
echo building cosmo...
@@ -57,7 +57,7 @@ else
fi
# Build ICON
-if [[ -f src/icon/bin/icon ]]; then
+if [[ -f ext/icon/bin/icon ]]; then
echo icon executable already exists - skipping build.
else
echo building icon...
@@ -65,7 +65,7 @@ else
fi
# Build ICON-ART
-if [[ -f src/icon-art/bin/icon ]]; then
+if [[ -f ext/icon-art/bin/icon ]]; then
echo icon-art executable already exists - skipping build.
else
echo building icon-art...
@@ -73,7 +73,7 @@ else
fi
# Test COSMO-GHG
-if [[ -f work/cosmo-ghg-test/2015010100_6_12/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then
+if [[ -f work/cosmo-ghg-test/2015010106_2015010112/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then
echo cosmo-ghg test case already finished - skipping test.
else
echo running cosmo-ghg test case...
@@ -81,7 +81,7 @@ else
fi
# Test COSMO-GHG (spinup)
-if [[ -f work/cosmo-ghg-spinup-test/2015010106_-3_6/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then
+if [[ -f work/cosmo-ghg-spinup-test/2015010109_2015010118/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then
echo cosmo-ghg test case already finished - skipping test.
else
echo running cosmo-ghg-spinup test case...
@@ -89,7 +89,7 @@ else
fi
# Test ICON
-if [[ -f work/icon-test/2018010100_6_12/checkpoints/finished/icon && "$force_execution" == false ]]; then
+if [[ -f work/icon-test/2018010106_2018010112/checkpoints/finished/icon && "$force_execution" == false ]]; then
echo icon test case already finished - skipping test.
else
echo running icon test case...
@@ -97,7 +97,7 @@ else
fi
# Test ICON-ART
-if [[ -f work/icon-art-oem-test/2018010100_0_24/checkpoints/finished/icon && "$force_execution" == false ]]; then
+if [[ -f work/icon-art-oem-test/2018010106_2018010112/checkpoints/finished/icon && "$force_execution" == false ]]; then
echo icon-art test case already finished - skipping test.
else
echo running icon-art-oem test case...
@@ -105,7 +105,7 @@ else
fi
# Test ICON-ART-GLOBAL
-if [[ -f work/icon-art-global-test/2018010100_0_24/checkpoints/finished/icon && "$force_execution" == false ]]; then
+if [[ -f work/icon-art-global-test/2018010106_2018010112/checkpoints/finished/icon && "$force_execution" == false ]]; then
echo icon-art-global test case already finished - skipping test.
else
echo running icon-art-global test case...
diff --git a/jenkins/scripts/setup-spack.sh b/jenkins/scripts/setup-spack.sh
index 93945c0b..13988c5f 100755
--- a/jenkins/scripts/setup-spack.sh
+++ b/jenkins/scripts/setup-spack.sh
@@ -13,9 +13,9 @@ function error {
TAG=v0.18.1.12
GIT_REMOTE=https://github.com/C2SM/spack-c2sm.git
-rm -fr src/spack-c2sm
+rm -fr ext/spack-c2sm
-pushd src
+pushd ext
git clone --depth 1 --recurse-submodules --shallow-submodules -b ${TAG} ${GIT_REMOTE}
. spack-c2sm/setup-env.sh
popd
diff --git a/jobs/__init__.py b/jobs/__init__.py
index 08cc0cdb..332f34a8 100644
--- a/jobs/__init__.py
+++ b/jobs/__init__.py
@@ -1,20 +1,24 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
-from . import prepare_data
-from . import emissions
-from . import oem
from . import biofluxes
-from . import int2lm
-from . import post_int2lm
+from . import check_output
from . import cosmo
+from . import emissions
+from . import icon
+from . import icontools
+from . import int2lm
+from . import obs_nudging
+from . import octe
+from . import oem
+from . import online_vprm
+from . import photo_rate
from . import post_cosmo
+from . import post_int2lm
+from . import prepare_art
+from . import prepare_art_oem
+from . import prepare_art_global
+from . import prepare_cosmo
+from . import prepare_icon
from . import reduce_output
from . import verify_chain
-from . import photo_rate
-from . import obs_nudging
-from . import online_vprm
-from . import octe
-from . import check_output
-from . import icon
diff --git a/jobs/biofluxes.py b/jobs/biofluxes.py
index 78471f09..cc9a738e 100644
--- a/jobs/biofluxes.py
+++ b/jobs/biofluxes.py
@@ -1,39 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
-# Create VPRM biogenic fluxes from VPRM and put them into the input folder
-#
-# result in case of success: all VPRM input-files necessary are found in
-# ${int2lm_input}/vprm/
-#
-# Dominik Brunner, July 2013
-#
-# 2013-07-18 Initial release, based on Christoph Knotes' emissions.bash
-# In the current version, the program only checks for the presence
-# of the constant-in-time emissions file and creates a soft link in the int2lm
-# input directory (brd)
-# 2018-06-25 Translated to Python (arp)
import os
import logging
-from . import tools, prepare_data
+from . import tools, prepare_cosmo
-def main(cfg, model_cfg):
- """Prepare the biofluxes-files for the simulation.
+BASIC_PYTHON_JOB = True
- Only necessary for **COSMO** simulations.
- Copy biofluxes files from project folder (``cfg.vprm['dir']``) to int2lm input
- folder on scratch (``cfg.int2lm_input/vprm``).
+def main(cfg):
+ """Prepare biofluxes files for COSMO simulations.
+
+ Copies biofluxes files from the project folder (:attr:`cfg.vprm['dir']`)
+ to the int2lm input folder on scratch (:attr:`cfg.int2lm_input`/vprm).
Parameters
- ----------
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- tools.check_model(cfg, 'cosmo-ghg')
- cfg = prepare_data.set_cfg_variables(cfg, model_cfg)
+ tools.change_logfile(cfg.logfile)
+ prepare_cosmo.set_cfg_variables(cfg)
scratch_path = os.path.join(cfg.int2lm_input, 'vprm')
diff --git a/jobs/check_output.py b/jobs/check_output.py
index 133e969c..5ab99614 100644
--- a/jobs/check_output.py
+++ b/jobs/check_output.py
@@ -28,6 +28,8 @@
except ImportError:
import tools
+BASIC_PYTHON_JOB = True
+
def pkl_path(folder, pid=None):
""" Returns the path (and creates it, if necessary) to the stored
@@ -69,7 +71,7 @@ def timeseries_path(cfg):
Parameters
----------
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
Returns
@@ -89,7 +91,7 @@ def maps_path(cfg):
Parameters
----------
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
Returns
@@ -108,7 +110,7 @@ def animations_path(cfg):
Parameters
----------
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
Returns
@@ -211,7 +213,7 @@ def plot_timeseries(cfg, units):
Parameters
----------
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
units : dict
Dictionary containing units os variables
@@ -385,7 +387,7 @@ def merge_data(cfg):
Parameters
----------
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
Returns
@@ -647,7 +649,7 @@ def create_map_directories(cfg, data, units):
Parameters
----------
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
data: pandas.DataFrame
Dataframe containing diagnostic values for each variable
@@ -671,7 +673,7 @@ def create_animations(cfg):
Parameters
----------
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
"""
data_path = pkl_path(cfg.output_root)
@@ -699,20 +701,21 @@ def create_animations(cfg):
duration=300)
-def main(cfg, model_cfg):
- """Checks output variables whether they are in a phyiscally reasonable
- range.
+def main(cfg):
+ """Check output variables for physical reasonability and create plots.
- Stores the time series of the minimum, the maximum, the mean, and
- the std of the variables as a pandas object into a pickle file.
+ This function checks the output variables to ensure they are in a physically
+ reasonable range. It stores the time series of the minimum, maximum, mean, and
+ standard deviation of the variables as a pandas object into a pickle file.
- Creates per-variable plots from the stored time series data.
+ It also creates per-variable plots from the stored time series data.
Parameters
- ----------
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
+ tools.change_logfile(cfg.logfile)
date = dt.datetime.today()
to_print = """check_output
@@ -722,8 +725,6 @@ def main(cfg, model_cfg):
============== StartTime: %s
=====================================================""" % date.strftime("%s")
- logfile = os.path.join(cfg.log_working_dir, "check_output")
- logging.basicConfig(filename=logfile, level=logging.INFO)
logging.info(to_print)
# if cfg.compute_host!="daint":
@@ -748,7 +749,7 @@ def main(cfg, model_cfg):
#SBATCH --time=00:30:00
#SBATCH --constraint=mc
#SBATCH --ntasks=1
-#SBATCH --output={logfile}
+#SBATCH --output={cfg.logfile}
export EASYBUILD_PREFIX=/store/empa/em05/easybuild
@@ -778,7 +779,7 @@ def main(cfg, model_cfg):
cosmo_output=cfg.cosmo_output,
output_root=cfg.output_root,
work_log=cfg.log_working_dir,
- logfile=logfile,
+ logfile=cfg.logfile,
chain=cfg.chain_src_dir,
chain_root=cfg.chain_root,
action='get_data')
@@ -819,7 +820,7 @@ def main(cfg, model_cfg):
casename=cfg.casename,
cosmo_output=cfg.cosmo_output,
output_root=cfg.output_root,
- logfile=logfile,
+ logfile=cfg.logfile,
chain=cfg.chain_src_dir,
chain_root=cfg.chain_root,
action='plot_maps')
@@ -859,9 +860,10 @@ def main(cfg, model_cfg):
logging.info(to_print)
# Check for errors
- with open(logfile) as f:
+ with open(cfg.logfile) as f:
if 'ERROR' in f.read():
- raise RuntimeError('Logfile containing errors! See %s' % logfile)
+ raise RuntimeError('Logfile containing errors! See %s' %
+ cfg.logfile)
if __name__ == '__main__':
diff --git a/jobs/cosmo.py b/jobs/cosmo.py
index 4c43e815..a110661e 100644
--- a/jobs/cosmo.py
+++ b/jobs/cosmo.py
@@ -1,101 +1,41 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
-# Setup the namelist for a COSMO tracer run and submit the job to the queue
-#
-# result in case of success: forecast fields found in
-# ${cosmo_output}
-#
-# Dominik Brunner, July 2013
-#
-# 2013-07-21 Initial release, adopted from Christoph Knote's cosmo.bash (brd)
-# 2018-07-10 Translated to Python (muq)
import logging
import os
import subprocess
-import csv
-from .tools import write_cosmo_input_ghg
-from . import tools
-from datetime import datetime, timedelta
-
-
-def set_cfg_variables(cfg, model_cfg):
- setattr(cfg, 'cosmo_base', os.path.join(cfg.chain_root, 'cosmo'))
- setattr(cfg, 'cosmo_input', os.path.join(cfg.chain_root, 'cosmo', 'input'))
- setattr(cfg, 'cosmo_run', os.path.join(cfg.chain_root, 'cosmo', 'run'))
- setattr(cfg, 'cosmo_output', os.path.join(cfg.chain_root, 'cosmo',
- 'output'))
- setattr(cfg, 'cosmo_output_reduced',
- os.path.join(cfg.chain_root, 'cosmo', 'output_reduced'))
-
- # Number of tracers
- if 'tracers' in model_cfg['models'][cfg.model]['features']:
- tracer_csvfile = os.path.join(cfg.chain_src_dir, 'cases', cfg.casename,
- 'cosmo_tracers.csv')
- if os.path.isfile(tracer_csvfile):
- with open(tracer_csvfile, 'r') as csv_file:
- reader = csv.DictReader(csv_file, delimiter=',')
- reader = [r for r in reader if r[''] != '#']
- setattr(cfg, 'in_tracers', len(reader))
- else:
- raise FileNotFoundError(f"File not found: {tracer_csvfile}")
-
- # tracer_start namelist paramter for spinup simulation
- if hasattr(cfg, 'spinup'):
- if cfg.first_one:
- setattr(cfg, 'tracer_start', 0)
- else:
- setattr(cfg, 'tracer_start', cfg.spinup)
- else:
- setattr(cfg, 'tracer_start', 0)
-
- # asynchronous I/O
- if hasattr(cfg, 'cfg.cosmo_np_io'):
- if cfg.cosmo_np_io == 0:
- setattr(cfg, 'lasync_io', '.FALSE.')
- setattr(cfg, 'num_iope_percomm', 0)
- else:
- setattr(cfg, 'lasync_io', '.TRUE.')
- setattr(cfg, 'num_iope_percomm', 1)
-
- return cfg
+from pathlib import Path
+from datetime import datetime
+from .tools import write_cosmo_input_ghg
+from . import tools, prepare_cosmo
-def main(cfg, model_cfg):
- """Setup the namelists for a **COSMO** tracer run and submit the job to
- the queue
+BASIC_PYTHON_JOB = True
- Necessary for both **COSMO** and **COSMOART** simulations.
- Decide if the soil model should be TERRA or TERRA multi-layer depending on
- ``startdate`` of the simulation.
+def main(cfg):
+ """Setup the namelists for a COSMO run and submit the job to the queue.
- Create necessary directory structure to run **COSMO** (run, output and
- restart directories, defined in ``cfg.cosmo_run``, ``cfg.cosmo_output``
+ Create necessary directory structure to run COSMO (run, output, and
+ restart directories, defined in ``cfg.cosmo_run``, ``cfg.cosmo_output``,
and ``cfg.cosmo_restart_out``).
- Copy the **COSMO**-executable from
- ``cfg.cosmo_bin`` to ``cfg.cosmo_run/cosmo``.
+ Copy the COSMO-executable from
+ ``cfg.cosmo['binary_file']`` to ``cfg.cosmo_run/cfg.cosmo['execname']``.
- Convert the tracer-csv-file to a **COSMO**-namelist file.
+ Convert the tracer csv file to a COSMO namelist file.
- Format the **COSMO**-namelist-templates
- (**COSMO**: ``AF,ORG,IO,DYN,PHY,DIA,ASS``,
- **COSMOART**: ``ART,ASS,DIA,DYN,EPS,INI,IO,ORG,PHY``)
- using the information in ``cfg``.
-
- Format the runscript-template and submit the job.
+ Format the COSMO namelist templates using the information in ``cfg``.
+ Format the runscript template and submit the job.
Parameters
----------
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- cfg = set_cfg_variables(cfg, model_cfg)
- logfile = os.path.join(cfg.log_working_dir, "cosmo")
- logfile_finish = os.path.join(cfg.log_finished_dir, "cosmo")
+ tools.change_logfile(cfg.logfile)
+ prepare_cosmo.set_cfg_variables(cfg)
logging.info("Setup the namelist for a COSMO tracer run and "
"submit the job to the queue")
@@ -116,11 +56,10 @@ def main(cfg, model_cfg):
tools.create_dir(ini_dir, "cosmo_input_initial")
startfiletime = datetime.strptime(cfg.laf_startfile[-10:], "%Y%m%d%H")
if cfg.startdate_sim >= startfiletime:
- starttime_last = cfg.startdate_sim - timedelta(
- hours=cfg.restart_step)
work_root = os.path.dirname(os.path.dirname(cfg.chain_root))
last_output_path = os.path.join(work_root, cfg.casename,
- cfg.job_id_prev, 'cosmo', 'output')
+ cfg.chunk_id_prev, 'cosmo',
+ 'output')
laf_output_refdate = cfg.startdate_sim.strftime("%Y%m%d%H")
last_laf_filename = "laf" + laf_output_refdate
# At the beginning, use original laf_startfile
@@ -158,7 +97,7 @@ def main(cfg, model_cfg):
laf_output_refdate=laf_output_refdate,
))
# Execute fieldextra
- with open(logfile, "a+") as log:
+ with open(cfg.logfile, "a+") as log:
result = subprocess.run(
[cfg.fieldextra_bin, output_file_merge],
stdout=log)
@@ -176,23 +115,21 @@ def main(cfg, model_cfg):
# Create restart directory if feature is present and
# if there is no spinup
- if 'restart' in model_cfg['models'][cfg.model]['features'] and not \
+ if 'restart' in cfg.workflow['features'] and not \
hasattr(cfg, 'spinup'):
tools.create_dir(cfg.cosmo_restart_out, "cosmo_restart_out")
# Copy cosmo executable
- cfg.cosmo['execname'] = cfg.model.lower()
+ cfg.cosmo_execname = Path(cfg.cosmo['binary_file']).name
tools.copy_file(cfg.cosmo['binary_file'],
- os.path.join(cfg.cosmo_run, cfg.cosmo['execname']))
+ cfg.cosmo_run / cfg.cosmo_execname)
# Prepare namelist and submit job
tracer_csvfile = os.path.join(cfg.chain_src_dir, 'cases', cfg.casename,
'cosmo_tracers.csv')
- if cfg.model == 'cosmo':
- namelist_names = ['ORG', 'IO', 'DYN', 'PHY', 'DIA', 'ASS', 'SAT']
- elif cfg.model == 'cosmo-ghg':
+ if hasattr(cfg, 'cams') or hasattr(cfg, 'mozart'):
namelist_names = ['AF', 'ORG', 'IO', 'DYN', 'GHG', 'PHY', 'DIA', 'ASS']
- elif cfg.model == 'cosmo-art':
+ elif hasattr(cfg, 'photo_rate'):
namelist_names = [
'ART', 'ASS', 'DIA', 'DYN', 'EPS', 'INI', 'IO', 'ORG', 'PHY'
]
@@ -200,6 +137,8 @@ def main(cfg, model_cfg):
# When doing online emissions in COSMO-ART, an additional
# namelist is required
namelist_names += ['OAE']
+ elif hasattr(cfg, 'cosmo'):
+ namelist_names = ['ORG', 'IO', 'DYN', 'PHY', 'DIA', 'ASS', 'SAT']
for section in namelist_names:
namelist_file = os.path.join(
@@ -231,7 +170,7 @@ def main(cfg, model_cfg):
# Append INPUT_GHG namelist with tracer definitions from csv file
if os.path.isfile(tracer_csvfile):
- if cfg.model == 'cosmo-ghg':
+ if hasattr(cfg, 'cams') or hasattr(cfg, 'mozart'):
input_ghg_filename = os.path.join(cfg.cosmo_run, 'INPUT_GHG')
write_cosmo_input_ghg.main(tracer_csvfile, input_ghg_filename, cfg)
@@ -242,18 +181,15 @@ def main(cfg, model_cfg):
with open(runscript_file) as input_file:
cosmo_runscript = input_file.read()
- output_file = os.path.join(cfg.cosmo_run, "run.job")
- with open(output_file, "w") as outf:
+ Path(cfg.cosmo_run).mkdir(parents=True, exist_ok=True)
+ script = (cfg.cosmo_run / 'run_cosmo.job')
+ with open(script, "w") as outf:
outf.write(
cosmo_runscript.format(cfg=cfg,
**cfg.cosmo,
np_tot=np_tot,
- logfile=logfile,
- logfile_finish=logfile_finish))
+ logfile=cfg.logfile,
+ logfile_finish=cfg.logfile_finish))
- result = subprocess.run(
- ["sbatch", "--wait",
- os.path.join(cfg.cosmo_run, 'run.job')])
- exitcode = result.returncode
- if exitcode != 0:
- raise RuntimeError("sbatch returned exitcode {}".format(exitcode))
+ # Submit job
+ cfg.submit('cosmo', script)
diff --git a/jobs/emissions.py b/jobs/emissions.py
index cf98c92f..27516da3 100644
--- a/jobs/emissions.py
+++ b/jobs/emissions.py
@@ -1,35 +1,24 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
-# Create anthropogenic emissions and put them into the input folder
-#
-# Result in case of success: all emission input-files necessary are found in
-# ${int2lm_input}/emissions/
-#
-# Dominik Brunner, July 2013
-#
-# 2013-07-18 Initial release, based on Christoph Knotes' emissions.bash
-# In the current version, the program only checks for the presence
-# of the constant-in-time emissions file and creates a soft link in
-# the int2lm input directory (brd)
-# 2018-06-25 Translated to Python (arp)
import os
import logging
-from . import tools, prepare_data
+from . import tools, prepare_cosmo
+BASIC_PYTHON_JOB = True
-def main(cfg, model_cfg):
- """Copy emission files to the **int2lm** input directory.
- Necessary for both **COSMO** and **COSMOART** simulations.
+def main(cfg):
+ """Copy emission files to the int2lm input directory.
+
+ Necessary for both COSMO and COSMOART simulations.
Copy emission files from project folder (``cfg.emissions['dir']``) to
- **int2lm** input folder on scratch (``cfg.int2lm_input/emissions``).
+ int2lm input folder on scratch (``cfg.int2lm_input/emissions``).
- For **COSMO** simulations, converts the the netCDF-variable-names
- from ``string`` to ``char`` (necessary for **int2lm**).
+ For COSMO simulations, converts the netCDF-variable-names
+ from ``string`` to ``char`` (necessary for int2lm).
If there are multiple emission-datasets (cfg.emissions['dir'] is a list of
paths), they are copied as follows::
@@ -40,16 +29,11 @@ def main(cfg, model_cfg):
Parameters
----------
- starttime : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the starttime
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- cfg = prepare_data.set_cfg_variables(cfg, model_cfg)
+ tools.change_logfile(cfg.logfile)
+ prepare_cosmo.set_cfg_variables(cfg)
dest_prefix = "emis_"
if not isinstance(cfg.emissions['dir'], list):
@@ -84,5 +68,5 @@ def main(cfg, model_cfg):
# convert grid_mapping_name from string (NF90_STRING) to char
# (NF90_CHAR) (needed for int2lm to work)
- if cfg.model.startswith('cosmo'):
+ if hasattr(cfg, 'cosmo'):
tools.string2char.main(dest_path)
diff --git a/jobs/icon.py b/jobs/icon.py
index a63c835b..250df513 100644
--- a/jobs/icon.py
+++ b/jobs/icon.py
@@ -1,66 +1,41 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
-# Setup the namelist for an ICON run and submit the job to the queue
-#
-# result in case of success: forecast fields found in
-# ${icon_output}
-#
-# Michael Jähn, February 2021
-#
-# 2021-04-26 Initial release
-# 2021-11-21 Updated for ICON-ART
import logging
-import os
-import subprocess
-from . import tools, prepare_data
+from pathlib import Path
+from . import tools, prepare_icon
+BASIC_PYTHON_JOB = False
-def main(cfg, model_cfg):
- """Setup the namelists for an **ICON** tracer run and submit the job to
- the queue
- Necessary for both **ICON** and **ICONART** simulations.
+def main(cfg):
+ """Setup the namelists for an ICON run and submit the job to
+ the queue.
- Create necessary directory structure to run **ICON** (run, output and
- restart directories, defined in ``cfg.icon_work``, ``cfg.icon_output``
- and ``cfg.icon_restart_out``).
-
- Copy the **ICON**-executable from
+ Copy the ICON-executable from
``cfg.icon_binary_file`` to ``cfg.icon_work/icon.exe``.
- Use the tracer-csv-file to append **ICON**-namelist file.
-
- Format the **ICON**-namelist-templates:
+ Format the ICON-namelist-templates:
``icon_master.namelist.cfg, icon_NAMELIST_NWP.cfg``,
using the information in ``cfg``.
Format the runscript-template and submit the job.
Parameters
- ----------
- starttime : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the starttime
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- cfg = prepare_data.set_cfg_variables(cfg, model_cfg)
-
- logfile = os.path.join(cfg.log_working_dir, "icon")
- logfile_finish = os.path.join(cfg.log_finished_dir, "icon")
+ prepare_icon.set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
logging.info("Setup the namelist for an ICON run and "
"submit the job to the queue")
# Copy icon executable
- execname = 'icon.exe'
- tools.copy_file(cfg.icon_binary_file, os.path.join(cfg.icon_work,
- execname))
+ cfg.icon_execname = Path(cfg.icon['binary_file']).name
+ tools.create_dir(cfg.icon_work, "icon_work")
+ tools.copy_file(cfg.icon_binary_file, cfg.icon_work / cfg.icon_execname)
# Symlink the restart file to the last run into the icon/run folder
if cfg.lrestart == '.TRUE.':
@@ -68,36 +43,17 @@ def main(cfg, model_cfg):
# Get name of initial file
if hasattr(cfg, 'inicond_filename'):
- inidata_filename = os.path.join(cfg.icon_input_icbc,
- cfg.inicond_filename)
+ inidata_filename = cfg.icon_input_icbc / cfg.inicond_filename
else:
- inidata_filename = os.path.join(
- cfg.icon_input_icbc,
+ inidata_filename = cfg.icon_input_icbc / str(
cfg.startdate_sim.strftime(cfg.meteo['prefix'] +
cfg.meteo['nameformat']) + '.nc')
# Write run script (run_icon.job)
- icon_runjob = os.path.join(cfg.case_path, cfg.icon_runjob_filename)
- with open(icon_runjob) as input_file:
- to_write = input_file.read()
- output_file = os.path.join(cfg.icon_work, "run_icon.job")
- with open(output_file, "w") as outf:
- outf.write(
- to_write.format(cfg=cfg,
- inidata_filename=inidata_filename,
- logfile=logfile,
- logfile_finish=logfile_finish))
-
- result = subprocess.run(
- ["sbatch", "--wait",
- os.path.join(cfg.icon_work, 'run_icon.job')])
- exitcode = result.returncode
-
- # In case of ICON-ART, ignore the "invalid pointer" error on successful run
- if cfg.model.startswith('icon-art'):
- if tools.grep("free(): invalid pointer", logfile)['success'] and \
- tools.grep("clean-up finished", logfile)['success']:
- exitcode = 0
+ template = (cfg.case_path / cfg.icon_runjob_filename).read_text()
+ script_str = template.format(cfg=cfg, inidata_filename=inidata_filename)
+ script = (cfg.icon_work / 'run_icon.job')
+ script.write_text(script_str)
- if exitcode != 0:
- raise RuntimeError("sbatch returned exitcode {}".format(exitcode))
+ # Submit run script
+ cfg.submit('icon', script)
diff --git a/jobs/icontools.py b/jobs/icontools.py
new file mode 100644
index 00000000..a0f367b8
--- /dev/null
+++ b/jobs/icontools.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import logging
+import os
+import xarray as xr
+import numpy as np
+from . import tools, prepare_icon
+
+BASIC_PYTHON_JOB = True
+
+
+def main(cfg):
+ """
+ - Add GEOSP to all meteo files
+ - Submit the runscript for the DWD ICON tools to remap the meteorological files.
+ - All runscripts specified in ``cfg.icontools_runjobs`` are submitted.
+ - The meteorological files are read from the original input directory
+ (``cfg.input_root_meteo``), and the remapped meteorological files are saved
+ in the input folder on scratch (``cfg.icon_input/icbc``).
+ """
+ prepare_icon.set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
+
+ #-----------------------------------------------------
+ # Create LBC datafile lists (each at 00 UTC and others)
+ #-----------------------------------------------------
+ datafile_list = []
+ datafile_list_rest = []
+ datafile_list_chem = []
+ for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim,
+ cfg.meteo['inc']):
+ meteo_file = cfg.icon_input_icbc / (
+ cfg.meteo['prefix'] + time.strftime(cfg.meteo['nameformat']))
+ if hasattr(cfg, 'art_input_folder'):
+ chem_file = cfg.icon_input_icbc / (
+ cfg.chem['prefix'] + time.strftime(cfg.chem_nameformat))
+ datafile_list_chem.append(str(chem_file) + cfg.chem['suffix'])
+ if str(meteo_file).endswith('00'):
+ datafile_list.append(str(meteo_file) + cfg.meteo['suffix'])
+ else:
+ datafile_list_rest.append(str(meteo_file) + cfg.meteo['suffix'])
+ datafile_list = ' '.join([str(v) for v in datafile_list])
+ datafile_list_rest = ' '.join([str(v) for v in datafile_list_rest])
+ datafile_list_chem = ' '.join([str(v) for v in datafile_list_chem])
+
+ #-----------------------------------------------------
+ # Write and submit ICONTOOLS runscripts
+ #-----------------------------------------------------
+ dep_id = None
+ for runscript in cfg.icontools_runjobs:
+ with (cfg.case_path / runscript).open() as input_file:
+ to_write = input_file.read()
+ runscript_path = cfg.icon_work / f"{runscript}.job"
+ with runscript_path.open("w") as outf:
+ outf.write(
+ to_write.format(cfg=cfg,
+ meteo=cfg.meteo,
+ logfile=cfg.logfile,
+ logfile_finish=cfg.logfile_finish,
+ datafile_list=datafile_list,
+ datafile_list_rest=datafile_list_rest,
+ datafile_list_chem=datafile_list_chem))
+
+ # Submitting icontools runscripts sequentially
+ logging.info(f" Starting icontools runscript {runscript}.")
+ dep_id = cfg.submit('icontools', runscript_path, add_dep=dep_id)
+
+ logging.info("Add GEOSP to all meteo files")
+ for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim,
+ cfg.meteo['inc']):
+ # Specify file names
+ geosp_filename = time.replace(
+ hour=0).strftime(cfg.meteo['prefix'] +
+ cfg.meteo['nameformat']) + '_lbc.nc'
+ geosp_file = os.path.join(cfg.icon_input_icbc, geosp_filename)
+ src_filename = time.strftime(cfg.meteo['prefix'] +
+ cfg.meteo['nameformat']) + '_lbc.nc'
+ src_file = os.path.join(cfg.icon_input_icbc, src_filename)
+ merged_filename = time.strftime(cfg.meteo['prefix'] +
+ cfg.meteo['nameformat']) + '_merged.nc'
+ merged_file = os.path.join(cfg.icon_input_icbc, merged_filename)
+
+ # Copy GEOSP file from last run if not present
+ if hasattr(cfg,
+ 'icon_input_icbc_prev') and not os.path.exists(geosp_file):
+ geosp_src_file = os.path.join(cfg.icon_input_icbc_prev,
+ geosp_filename)
+ tools.copy_file(geosp_src_file,
+ cfg.icon_input_icbc,
+ output_log=True)
+
+ # Load GEOSP data array as da_geosp at time 00:
+ ds = xr.open_dataset(src_file)
+ ds_geosp = xr.open_dataset(geosp_file)
+ da_geosp = ds_geosp['GEOSP']
+
+ # Merge GEOSP-dataset with other timesteps
+ if (time.hour != 0):
+ # Change values of time dimension to current time
+ da_geosp = da_geosp.assign_coords(time=[np.datetime64(time)])
+ # Merge GEOSP into temporary file
+ ds_merged = xr.merge([ds, da_geosp])
+ ds_merged.attrs = ds.attrs
+ ds_merged.to_netcdf(merged_file)
+ # Logging info for merging GEOSP
+ logging.info("Added GEOSP to file {}".format(merged_file))
+ # Rename file to get original file name
+ tools.rename_file(merged_file, src_file)
+ logging.info('OK')
diff --git a/jobs/int2lm.py b/jobs/int2lm.py
index da94a56c..83249fa3 100644
--- a/jobs/int2lm.py
+++ b/jobs/int2lm.py
@@ -1,71 +1,50 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
-# Setup the namelist for int2lm and submit the job to the queue
-#
-# Dominik Brunner, July 2013
-#
-# 2013-07-20 Initial release, based on Christoph Knote's int2lm.bash (brd)
-# 2017-01-15 adapted for hypatia and project SmartCarb (brd)
-# 2018-08-03 Translated to Python (jae)
import os
import logging
import shutil
-import subprocess
import pytz
-from datetime import datetime, timedelta
-from . import tools, prepare_data
+from datetime import datetime
+from . import tools, prepare_cosmo
-def set_cfg_variables(cfg, model_cfg):
+BASIC_PYTHON_JOB = True
- setattr(cfg, 'int2lm_run', os.path.join(cfg.chain_root, 'int2lm', 'run'))
- setattr(cfg, 'int2lm_output',
- os.path.join(cfg.chain_root, 'int2lm', 'output'))
- return cfg
+def main(cfg):
+ """Setup the namelist for int2lm and submit the job to the queue.
+ Necessary for both COSMO and COSMOART simulations.
-def main(cfg, model_cfg):
- """Setup the namelist for **int2lm** and submit the job to the queue.
-
- Necessary for both **COSMO** and **COSMOART** simulations.
-
Decide if the soil model should be TERRA or TERRA multi-layer depending on
- ``startdate`` of the simulation.
+ `startdate` of the simulation.
- Create necessary directory structure to run **int2lm** (run and output
+ Create necessary directory structure to run int2lm (run and output
directories, defined in ``cfg.int2lm`` and ``cfg.int2lm['output']``).
- Copy the **int2lm**-executable from ``cfg.int2lm['binary_file']`` to
+ Copy the int2lm-executable from ``cfg.int2lm['binary_file']`` to
``cfg.int2lm['work']/int2lm``.
Copy the extpar-file ``cfg.int2lm['extpar_file']`` to
``cfg.int2lm_run/work``.
- **COSMOART**: Copy the ``libgrib_api`` files to
+ COSMOART: Copy the ``libgrib_api`` files to
``cfg.int2lm['work']/libgrib_api``.
- **COSMO**: Convert the tracer-csv-files into a **int2lm**-namelist file.
+ COSMO: Convert the tracer-csv-files into an int2lm-namelist file.
- Format the **int2lm**-namelist-template using the information in ``cfg``.
+ Format the int2lm-namelist-template using the information in ``cfg``.
Format the runscript-template and submit the job.
Parameters
----------
- starttime : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the starttime
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- cfg = prepare_data.set_cfg_variables(cfg, model_cfg)
- cfg = set_cfg_variables(cfg, model_cfg)
+ tools.change_logfile(cfg.logfile)
+ prepare_cosmo.set_cfg_variables(cfg)
# Total number of processes
np_tot = cfg.int2lm['np_x'] * cfg.int2lm['np_y']
@@ -89,7 +68,7 @@ def main(cfg, model_cfg):
extpar_dir)
# Copy landuse and plant-functional-type files
- if cfg.model == 'cosmo-art':
+ if hasattr(cfg, 'photo_rate'):
lu_file_src = cfg.int2lm['lu_file']
lu_file_dst = os.path.join(extpar_dir, 'landuse.nc')
tools.copy_file(lu_file_src, lu_file_dst)
@@ -159,12 +138,8 @@ def main(cfg, model_cfg):
cfg.int2lm['runjob_filename'])) as input_file:
int2lm_runscript = input_file.read()
- # Logfile variables
- logfile = os.path.join(cfg.log_working_dir, "int2lm")
- logfile_finish = os.path.join(cfg.log_finished_dir, "int2lm")
-
- output_file = os.path.join(int2lm_run, "run.job")
- with open(output_file, "w") as outf:
+ script = (cfg.int2lm_run / 'run_int2lm.job')
+ with open(script, "w") as outf:
outf.write(
int2lm_runscript.format(cfg=cfg,
**cfg.int2lm,
@@ -173,13 +148,8 @@ def main(cfg, model_cfg):
ini_hour=cfg.startdate_sim_yyyymmddhh[8:],
np_tot=np_tot,
hstop_int2lm=hstop_int2lm,
- logfile=logfile,
- logfile_finish=logfile_finish))
+ logfile=cfg.logfile,
+ logfile_finish=cfg.logfile_finish))
# Submit job
- result = subprocess.run(
- ["sbatch", "--wait",
- os.path.join(int2lm_run, "run.job")])
- exitcode = result.returncode
- if exitcode != 0:
- raise RuntimeError("sbatch returned exitcode {}".format(exitcode))
+ cfg.submit('int2lm', script)
diff --git a/jobs/obs_nudging.py b/jobs/obs_nudging.py
index bf2557cb..57119af1 100644
--- a/jobs/obs_nudging.py
+++ b/jobs/obs_nudging.py
@@ -3,13 +3,14 @@
import os
import logging
-import shutil
from datetime import timedelta
from . import tools
+BASIC_PYTHON_JOB = True
-def main(cfg, model_cfg):
+
+def main(cfg):
"""Copy and rename the obs_nudging files to the **COSMO** input directory.
In the folder ``cfg.obs_nudging_dir``, the files are saved in the format
@@ -23,10 +24,11 @@ def main(cfg, model_cfg):
Also copies the blacklist-file blklsttmp.
Parameters
- ----------
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
+ tools.change_logfile(cfg.logfile)
dest_dir = os.path.join(cfg.cosmo_input, "obs_nudging")
tools.create_dir(dest_dir, "obs nudging input")
diff --git a/jobs/octe.py b/jobs/octe.py
index b52f2a03..1efbe486 100644
--- a/jobs/octe.py
+++ b/jobs/octe.py
@@ -11,6 +11,8 @@
from . import tools
+BASIC_PYTHON_JOB = True
+
def create_dir_and_copy_input(dest_dir, lambdas_src, maps_src):
"""Create a directory at dest_dir (**COSMO** input) and copy src there.
@@ -161,28 +163,23 @@ def perturb_bgs_in_dir(lambdas_nc, directory):
entry.name))
-def main(cfg, model_cfg):
+def main(cfg):
"""Copy necessary input files for **COSMO** and perturb BG.
- Copies the NetCDF-files found at cfg.octe_maps and cfg.octe_lambdas to
+ Copies the NetCDF-files found at ``cfg.octe_maps`` and ``cfg.octe_lambdas`` to
the **COSMO** input-directory.
Perturbs the background tracer field. To do that, it reads the lambda-value
- from the cfg.octe_lambdas (last value along the nparam-dimension) and
+ from the ``cfg.octe_lambdas`` (last value along the nparam-dimension) and
scales the BG-field produced by int2lm, creating a new variable for each
ensemble.
Parameters
----------
- starttime : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the starttime
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
+ tools.change_logfile(cfg.logfile)
dest_dir = join(cfg.cosmo_input, 'octe')
create_dir_and_copy_input(dest_dir=dest_dir,
lambdas_src=cfg.octe_lambdas,
diff --git a/jobs/oem.py b/jobs/oem.py
index 358af4e9..e9cf42d7 100644
--- a/jobs/oem.py
+++ b/jobs/oem.py
@@ -1,29 +1,30 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
import os
import logging
-from . import tools, cosmo
+from . import tools, prepare_cosmo
+BASIC_PYTHON_JOB = True
-def main(cfg, model_cfg):
+
+def main(cfg):
"""Copy emission and profile files to the **cosmo** or **icon** input
directory.
Parameters
----------
- starttime : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the starttime
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
+
+ Raises
+ ------
+ RuntimeError
+ If an error occurs during the process.
"""
- cfg = cosmo.set_cfg_variables(cfg, model_cfg)
+ tools.change_logfile(cfg.logfile)
+ prepare_cosmo.set_cfg_variables(cfg)
oem_dir = cfg.oem['dir']
oem_gridded_emissions_nc = os.path.join(oem_dir,
@@ -51,9 +52,9 @@ def main(cfg, model_cfg):
raise RuntimeError("At least one of (hod/dow/moy) or (hoy) netcdfs "
" have to be given for online emissions")
- if cfg.model.startswith('icon'):
+ if hasattr(cfg, 'icon'):
input_dir = cfg.icon_input
- else:
+ elif hasattr(cfg, 'cosmo'):
input_dir = cfg.cosmo_input
dest_dir = os.path.join(input_dir, "oem")
tools.create_dir(dest_dir, "online emissions input")
diff --git a/jobs/online_vprm.py b/jobs/online_vprm.py
index 106457cf..a1e2312e 100644
--- a/jobs/online_vprm.py
+++ b/jobs/online_vprm.py
@@ -1,29 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-#
import os
import logging
-from . import tools
+from . import tools, prepare_cosmo
+BASIC_PYTHON_JOB = True
-def main(cfg, model_cfg):
- """Copy MODIS surface reflectance data and vegatation class fraction file
+
+def main(cfg):
+ """Copy MODIS surface reflectance data and vegetation class fraction file
to the **cosmo** input directory.
Parameters
----------
- starttime : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the starttime
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
-
+ tools.change_logfile(cfg.logfile)
+ prepare_cosmo.set_cfg_variables(cfg)
dest_modis = 'modis.nc'
dest_vegetation = 'vegetation.nc'
diff --git a/jobs/photo_rate.py b/jobs/photo_rate.py
index 7f8360b2..afdb4716 100644
--- a/jobs/photo_rate.py
+++ b/jobs/photo_rate.py
@@ -6,27 +6,23 @@
from . import tools
+BASIC_PYTHON_JOB = True
-def main(cfg, model_cfg):
+
+def main(cfg):
"""Copy photolysis-rate file to the **COSMOART** input directory.
Only necessary for **COSMOART** simulations.
- Copy the photolysis-rate file from the project (``cfg.photo_rate_file``) to
- the **COSMOART** input folder on scratch (``cfg.cosmo_input/art_photolysis``).
+ Copy the photolysis-rate file from the project (`cfg.photo_rate_file`) to
+ the **COSMOART** input folder on scratch (`cfg.cosmo_input/art_photolysis`).
Parameters
- ----------
- start_time : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the start_time
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- tools.check_model(cfg, 'cosmo-art')
+ tools.change_logfile(cfg.logfile)
logging.info("Copying photolysis-rate file from {} to {}".format(
cfg.photo_rate_file,
diff --git a/jobs/post_cosmo.py b/jobs/post_cosmo.py
index 56d4dc1d..0442915a 100644
--- a/jobs/post_cosmo.py
+++ b/jobs/post_cosmo.py
@@ -1,16 +1,13 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Copy cosmo output from scratch to store (or anywhere else)
-
-### DEVELOPMENT VERSION ###
-
import logging
import os
import datetime
-from subprocess import call
-from . import tools, int2lm, cosmo
+from . import tools, prepare_cosmo
+
+BASIC_PYTHON_JOB = False
def logfile_header_template():
@@ -49,13 +46,13 @@ def runscript_commands_template():
])
-def main(cfg, model_cfg):
+def main(cfg):
"""Copy the output of a **COSMO**-run to a user-defined position.
Write a runscript to copy all files (**COSMO** settings & output,
**int2lm** settings, logfiles) from ``cfg.cosmo_run``,
``cfg.cosmo_output``, ``cfg.int2lm_run``, ``cfg.log_finished_dir`` to
- ``cfg.output_root/...`` .
+ ``cfg.output_root/...``.
If the job ``reduce_output`` has been run before ``post_cosmo``, a
directory ``cfg.cosmo_output_reduced`` is created. In this case,
``cfg.cosmo_output_reduced`` is copied instead of ``cfg.cosmo_output``.
@@ -64,15 +61,12 @@ def main(cfg, model_cfg):
Parameters
----------
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- cfg = int2lm.set_cfg_variables(cfg, model_cfg)
- cfg = cosmo.set_cfg_variables(cfg, model_cfg)
+ tools.change_logfile(cfg.logfile)
+ prepare_cosmo.set_cfg_variables(cfg)
- logfile = os.path.join(cfg.log_working_dir, "post_cosmo")
- cosmo_run_dir = cfg.cosmo_run
- runscript_path = os.path.join(cfg.cosmo_run, "post_cosmo.job")
copy_path = os.path.join(
cfg.post_cosmo['output_root'],
cfg.startdate_sim_yyyymmddhh + "_" + cfg.enddate_sim_yyyymmddhh)
@@ -84,16 +78,16 @@ def main(cfg, model_cfg):
runscript_content = "#!/bin/bash\n"
runscript_content += runscript_header_template().format(
compute_account=cfg.compute_account,
- logfile=logfile,
+ logfile=cfg.logfile,
constraint=cfg.constraint,
cosmo_run=cfg.cosmo_run)
if os.path.isdir(cfg.cosmo_output_reduced):
- cosmo_output_src = cfg.cosmo_output_reduced.rstrip('/')
+ cosmo_output_src = str(cfg.cosmo_output_reduced).rstrip('/')
cosmo_output_dest = os.path.join(copy_path,
"cosmo_output_reduced").rstrip('/')
else:
- cosmo_output_src = cfg.cosmo_output.rstrip('/')
+ cosmo_output_src = str(cfg.cosmo_output).rstrip('/')
cosmo_output_dest = os.path.join(copy_path, "cosmo_output").rstrip('/')
# Create new directories
@@ -110,36 +104,22 @@ def main(cfg, model_cfg):
# Format the runscript
runscript_content += runscript_commands_template().format(
target_dir=copy_path.rstrip('/'),
- int2lm_run_src=cfg.int2lm_run.rstrip('/'),
+ int2lm_run_src=str(cfg.int2lm_run).rstrip('/'),
int2lm_run_dest=int2lm_run_path.rstrip('/'),
- cosmo_run_src=cfg.cosmo_run.rstrip('/'),
+ cosmo_run_src=str(cfg.cosmo_run).rstrip('/'),
cosmo_run_dest=cosmo_run_path.rstrip('/'),
cosmo_output_src=cosmo_output_src,
cosmo_output_dest=cosmo_output_dest_path,
- logs_src=cfg.log_finished_dir.rstrip('/'),
+ logs_src=str(cfg.log_finished_dir).rstrip('/'),
logs_dest=logs_path.rstrip('/'))
- # Wait for Cosmo to finish first
- tools.check_job_completion(cfg.log_finished_dir, "cosmo")
-
- with open(runscript_path, "w") as script:
- script.write(runscript_content)
+ os.makedirs(cfg.cosmo_run, exist_ok=True)
+ script = (cfg.cosmo_run / 'run_post_cosmo.job')
+ with open(script, "w") as outf:
+ outf.write(runscript_content)
logging.info("Submitting the copy job to the xfer queue")
logging.info("Make sure you have the module 'xalt' unloaded!")
- sbatch_wait = getattr(cfg, "wait", "True")
-
- if sbatch_wait:
- exitcode = call(["sbatch", "--wait", runscript_path])
- logging.info(logfile_header_template().format(
- "ENDS", str(datetime.datetime.today())))
-
- # copy own logfile aswell
- tools.copy_file(logfile, os.path.join(copy_path, "logs/"))
-
- else:
- exitcode = call(["sbatch", runscript_path])
-
- if exitcode != 0:
- raise RuntimeError("sbatch returned exitcode {}".format(exitcode))
+ # Submit job
+ cfg.submit('post_cosmo', script)
diff --git a/jobs/post_int2lm.py b/jobs/post_int2lm.py
index 787f7907..b0e78c0c 100644
--- a/jobs/post_int2lm.py
+++ b/jobs/post_int2lm.py
@@ -5,11 +5,14 @@
import os
import glob
import netCDF4 as nc
+
from datetime import datetime, timedelta
-from . import tools, int2lm
+from . import tools, prepare_cosmo
+
+BASIC_PYTHON_JOB = True
-def main(cfg, model_cfg):
+def main(cfg):
"""Combine multiple **int2lm** tracer-output files into a single one for
**COSMO**.
@@ -25,10 +28,11 @@ def main(cfg, model_cfg):
Parameters
----------
- cfg : config-object
- Object holding all user-configuration parameters as attributes
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
- cfg = int2lm.set_cfg_variables(cfg, model_cfg)
+ prepare_cosmo.set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
# Int2lm processing always starts at hstart=0, thus modifying inidate
inidate_int2lm_yyyymmddhh = cfg.startdate_sim_yyyymmddhh
diff --git a/jobs/prepare_art.py b/jobs/prepare_art.py
new file mode 100644
index 00000000..749fd7ce
--- /dev/null
+++ b/jobs/prepare_art.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import logging
+import xarray as xr
+import numpy as np
+from . import tools, prepare_icon
+
+BASIC_PYTHON_JOB = True
+
+
+def main(cfg):
+ """
+ Prepare ICON-ART simulations.
+
+ - Add GEOSP to all meteo files that don't contain it
+ - Add Q (copy of QV) and/or PS to initial file
+ """
+ prepare_icon.set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
+
+ logging.info('Add Q (copy of QV) and/or PS to initial file')
+ meteo_file = os.path.join(
+ cfg.icon_input_icbc,
+ cfg.startdate_sim.strftime(cfg.meteo['prefix'] +
+ cfg.meteo['nameformat']) + '.nc')
+ if os.path.isfile(meteo_file):
+ merged_file = os.path.join(
+ cfg.icon_input_icbc,
+ cfg.startdate_sim.strftime(cfg.meteo['prefix'] +
+ cfg.meteo['nameformat']) + '_merged.nc')
+ ds = xr.open_dataset(meteo_file)
+ merging = False
+ if 'PS' not in ds:
+ if 'LNPS' not in ds:
+ raise KeyError(
+ f"'LNPS' must be found in the initial conditions file {meteo_file}"
+ )
+ merging = True
+ ds['PS'] = ds['LNPS']
+ ds['PS'].attrs = ds['LNPS'].attrs
+ ds['PS'] = np.exp(ds['PS'])
+ ds['PS'] = ds['PS'].squeeze(dim='lev_2')
+ ds['PS'].attrs["long_name"] = 'surface pressure'
+ ds['PS'].attrs['units'] = 'Pa'
+ logging.info(f"Added PS to file {meteo_file}")
+ if 'Q' not in ds:
+ merging = True
+ ds['Q'] = ds['QV']
+ logging.info(f"Added Q to file {meteo_file}")
+ if merging:
+ ds.to_netcdf(merged_file)
+ tools.rename_file(merged_file, meteo_file)
+ logging.info('OK')
diff --git a/jobs/prepare_art_global.py b/jobs/prepare_art_global.py
new file mode 100644
index 00000000..a425d6b9
--- /dev/null
+++ b/jobs/prepare_art_global.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import logging
+import xarray as xr
+import shutil
+import subprocess
+from . import tools, prepare_icon
+from pathlib import Path # noqa: F401
+from .tools.interpolate_data import create_oh_for_restart, create_oh_for_inicond # noqa: F401
+from .tools.fetch_external_data import fetch_era5, fetch_era5_nudging
+
+BASIC_PYTHON_JOB = True
+
+
+def main(cfg):
+ """
+ Prepare global ICON-ART simulations.
+
+ Parameters
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
+ """
+ prepare_icon.set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
+ logging.info("Prepare ICON-ART for global simulations")
+
+ # -- Download ERA5 data and create the inicond file
+ if cfg.era5_inicond and cfg.lrestart == '.FALSE.':
+ # -- Fetch ERA5 data
+ fetch_era5(cfg.startdate_sim, cfg.icon_input_icbc)
+
+ # -- Copy ERA5 processing script (icon_era5_inicond.job) in workdir
+ with open(cfg.icon_era5_inijob) as input_file:
+ to_write = input_file.read()
+ output_file = os.path.join(cfg.icon_input_icbc, 'icon_era5_inicond.sh')
+ with open(output_file, "w") as outf:
+ outf.write(to_write.format(cfg=cfg))
+
+ # -- Copy mypartab in workdir
+ shutil.copy(
+ os.path.join(os.path.dirname(cfg.icon_era5_inijob), 'mypartab'),
+ os.path.join(cfg.icon_input_icbc, 'mypartab'))
+
+ # -- Run ERA5 processing script
+ process = subprocess.Popen([
+ "bash",
+ os.path.join(cfg.icon_input_icbc, 'icon_era5_inicond.sh')
+ ],
+ stdout=subprocess.PIPE)
+ process.communicate()
+
+ # -----------------------------------------------------
+ # Create tracer initial conditions
+ # -----------------------------------------------------
+
+ # -- Download and add CAMS data to the inicond file if needed
+ if cfg.species_inicond:
+
+ if cfg.lrestart == '.FALSE.':
+
+ ext_restart = ''
+ filename = cfg.input_files_scratch_inicond_filename
+
+ # -- Copy the script for processing external tracer data in workdir
+ with open(os.path.join(cfg.case_path,
+ cfg.icon_species_inijob)) as input_file:
+ to_write = input_file.read()
+ output_file = os.path.join(cfg.icon_input_icbc,
+ cfg.icon_species_inijob)
+ with open(output_file, "w") as outf:
+ outf.write(
+ to_write.format(cfg=cfg,
+ filename=filename,
+ ext_restart=ext_restart,
+ year=cfg.startdate_sim.year,
+ month=cfg.startdate_sim.month,
+ day=cfg.startdate_sim.day))
+
+ # -- Run ERA5 processing script
+ process = subprocess.Popen(["bash", output_file],
+ stdout=subprocess.PIPE)
+ process.communicate()
+
+ # -- Create initial conditions for OH concentrations
+ if 'TROH' in cfg.species2restart:
+ create_oh_for_inicond(cfg, cfg.startdate_sim.month)
+
+ else:
+
+ # -- Check the extension of tracer variables in the restart file
+ ds_restart = xr.open_dataset(cfg.restart_file) # noqa: F841
+ tracer_name = cfg.species2restart[0] # noqa: F841
+ # FIXME:
+ # var_restart = [
+ # IndexError: list index out of range
+ # var_restart = [
+ # var for var in ds_restart.data_vars.keys()
+ # if var.startswith(tracer_name)
+ # ][0]
+ # ext_restart = var_restart.replace(tracer_name, '')
+
+ # -- Change OH concentrations in the restart file
+ # if 'TROH' in cfg.species2restart:
+ # create_oh_for_restart(cfg, cfg.startdate_sim.month,
+ # ext_restart)
+
+ # -----------------------------------------------------
+ # Create meteorological and tracer nudging conditions
+ # -----------------------------------------------------
+
+ # -- If global nudging, download and process ERA5 and CAMS data
+ if cfg.era5_global_nudging:
+
+ for time in tools.iter_hours(cfg.startdate_sim,
+ cfg.enddate_sim,
+ step=cfg.nudging_step):
+
+ # -- Give a name to the nudging file
+ timestr = time.strftime('%Y%m%d%H')
+ filename = 'era2icon_R2B03_{timestr}_nudging.nc'.format(
+ timestr=timestr)
+
+ # -- If initial time, copy the initial conditions to be used as boundary conditions
+ if time == cfg.startdate_sim and cfg.era5_inicond:
+ shutil.copy(cfg.input_files_scratch_inicond_filename,
+ os.path.join(cfg.icon_input_icbc, filename))
+ continue
+
+ # -- Fetch ERA5 data
+ fetch_era5_nudging(time, cfg.icon_input_icbc)
+
+ # -- Copy ERA5 processing script (icon_era5_nudging.job) in workdir
+ with open(cfg.icon_era5_nudgingjob) as input_file:
+ to_write = input_file.read()
+ output_file = os.path.join(
+ cfg.icon_input_icbc, 'icon_era5_nudging_{}.sh'.format(timestr))
+ with open(output_file, "w") as outf:
+ outf.write(to_write.format(cfg=cfg, filename=filename))
+
+ # -- Copy mypartab in workdir
+ if not os.path.exists(os.path.join(cfg.icon_input_icbc,
+ 'mypartab')):
+ shutil.copy(
+ os.path.join(os.path.dirname(cfg.icon_era5_nudgingjob),
+ 'mypartab'),
+ os.path.join(cfg.icon_input_icbc, 'mypartab'))
+
+ # -- Run ERA5 processing script
+ process = subprocess.Popen([
+ "bash",
+ os.path.join(cfg.icon_input_icbc,
+ 'icon_era5_nudging_{}.sh'.format(timestr))
+ ],
+ stdout=subprocess.PIPE)
+ process.communicate()
+
+ if cfg.species_global_nudging:
+
+ # -- Copy CAMS processing script (icon_cams_nudging.job) in workdir
+ with open(cfg.icon_species_nudgingjob) as input_file:
+ to_write = input_file.read()
+ output_file = os.path.join(
+ cfg.icon_input_icbc,
+ 'icon_cams_nudging_{}.sh'.format(timestr))
+ with open(output_file, "w") as outf:
+ outf.write(to_write.format(cfg=cfg, filename=filename))
+
+ # -- Run ERA5 processing script
+ process = subprocess.Popen([
+ "bash",
+ os.path.join(cfg.icon_input_icbc,
+ 'icon_cams_nudging_{}.sh'.format(timestr))
+ ],
+ stdout=subprocess.PIPE)
+ process.communicate()
+
+ logging.info("OK")
diff --git a/jobs/prepare_art_oem.py b/jobs/prepare_art_oem.py
new file mode 100644
index 00000000..b421aa1d
--- /dev/null
+++ b/jobs/prepare_art_oem.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import logging
+import xarray as xr
+from . import tools, prepare_icon
+
+BASIC_PYTHON_JOB = True
+
+
+def main(cfg):
+ """
+ ICON-ART-OEM preparations
+ """
+ prepare_icon.set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
+ logging.info('Merging IC and LBC')
+
+ if cfg.input_files['oem_gridded_emissions_nc']:
+ for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim,
+ cfg.meteo['inc']):
+ if time == cfg.startdate_sim:
+ #------------
+ # Merge IC:
+ #------------
+ meteo_file = os.path.join(
+ cfg.icon_input_icbc,
+ time.strftime(cfg.meteo['prefix'] +
+ cfg.meteo['nameformat']) + '.nc')
+ if os.path.isfile(meteo_file):
+ chem_file = os.path.join(
+ cfg.icon_input_icbc, cfg.chem['prefix'] +
+ time.strftime(cfg.chem['nameformat']) + '.nc')
+ merged_file = os.path.join(
+ cfg.icon_input_icbc,
+ time.strftime(cfg.meteo['prefix'] +
+ cfg.meteo['nameformat']) + '_merged.nc')
+ ds_meteo = xr.open_dataset(meteo_file)
+ ds_chem = xr.open_dataset(chem_file)
+ # LNPS --> PS
+ ds_chem['PS'] = ds_chem['LNPS']
+ ds_chem['PS'].attrs = ds_chem['LNPS'].attrs
+ ds_chem['PS'] = ds_chem['PS'].squeeze(dim='lev_2')
+ ds_chem['PS'].attrs["long_name"] = 'surface pressure'
+ # merge:
+ ds_merged = xr.merge([ds_meteo, ds_chem],
+ compat="override")
+ #ds_merged.attrs = ds.attrs
+ ds_merged.to_netcdf(merged_file)
+ # Rename file to get original file name
+ tools.rename_file(merged_file, meteo_file)
+ tools.remove_file(chem_file)
+ logging.info(
+ "Added chemical tracer to file {}".format(merged_file))
+
+ #------------
+ # Merge LBC:
+ #------------
+ meteo_file = os.path.join(
+ cfg.icon_input_icbc,
+ time.strftime(cfg.meteo['prefix'] + cfg.meteo['nameformat']) +
+ '_lbc.nc')
+ chem_file = os.path.join(
+ cfg.icon_input_icbc, cfg.chem['prefix'] +
+ time.strftime(cfg.chem_nameformat) + '_lbc.nc')
+ merged_file = os.path.join(
+ cfg.icon_input_icbc,
+ time.strftime(cfg.meteo['prefix'] + cfg.meteo['nameformat']) +
+ '_merged.nc')
+ ds_meteo = xr.open_dataset(meteo_file)
+ ds_chem = xr.open_dataset(chem_file)
+ # LNPS --> PS
+ ds_chem['PS'] = ds_chem['LNPS']
+ ds_chem['PS'].attrs = ds_chem['LNPS'].attrs
+ ds_chem['PS'].attrs["long_name"] = 'surface pressure'
+ # Remapping chemical tracer names
+ if "remap_tracers" in cfg.chem:
+ for chem_in, chem_out in cfg.chem['remap_tracers'].items():
+ ds_chem[chem_out] = ds_chem[chem_in]
+ # merge:
+ ds_merged = xr.merge([ds_meteo, ds_chem], compat="override")
+ #ds_merged.attrs = ds.attrs
+ ds_merged.to_netcdf(merged_file)
+ # Rename file to get original file name
+ tools.rename_file(merged_file, meteo_file)
+ tools.remove_file(chem_file)
+ logging.info(
+ "Added chemical tracer to file {}".format(merged_file))
diff --git a/jobs/prepare_cosmo.py b/jobs/prepare_cosmo.py
new file mode 100644
index 00000000..8cea9005
--- /dev/null
+++ b/jobs/prepare_cosmo.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pathlib import Path
+import logging
+import csv
+import os
+from datetime import timedelta
+from . import tools
+
+BASIC_PYTHON_JOB = True
+
+
+def set_cfg_variables(cfg):
+ cfg.int2lm_root = cfg.chain_root / 'int2lm'
+ cfg.int2lm_input = cfg.int2lm_root / 'input'
+ cfg.int2lm_run = cfg.chain_root / 'int2lm' / 'run'
+ cfg.int2lm_output = cfg.chain_root / 'int2lm' / 'output'
+
+ cfg.cosmo_base = cfg.chain_root / 'cosmo'
+ cfg.cosmo_input = cfg.chain_root / 'cosmo' / 'input'
+ cfg.cosmo_run = cfg.chain_root / 'cosmo' / 'run'
+ cfg.cosmo_output = cfg.chain_root / 'cosmo' / 'output'
+ cfg.cosmo_output_reduced = cfg.chain_root / 'cosmo' / 'output_reduced'
+
+ # Number of tracers
+ if 'tracers' in cfg.workflow['features']:
+ tracer_csvfile = cfg.chain_src_dir / 'cases' / cfg.casename / 'cosmo_tracers.csv'
+ if tracer_csvfile.is_file():
+ with open(tracer_csvfile, 'r') as csv_file:
+ reader = csv.DictReader(csv_file, delimiter=',')
+ reader = [r for r in reader if r[''] != '#']
+ cfg.in_tracers = len(reader)
+ else:
+ raise FileNotFoundError(f"File not found: {tracer_csvfile}")
+
+ # tracer_start namelist parameter for spinup simulation
+ if hasattr(cfg, 'spinup'):
+ if cfg.first_one:
+ cfg.tracer_start = 0
+ else:
+ cfg.tracer_start = cfg.spinup
+ else:
+ cfg.tracer_start = 0
+
+ # asynchronous I/O
+ if hasattr(cfg, 'cfg.cosmo_np_io'):
+ if cfg.cosmo_np_io == 0:
+ cfg.lasync_io = '.FALSE.'
+ cfg.num_iope_percomm = 0
+ else:
+ cfg.lasync_io = '.TRUE.'
+ cfg.num_iope_percomm = 1
+
+ # If nested run: use output of mother-simulation
+ if 'nesting' in cfg.workflow['features'] and not os.path.isdir(
+ cfg.meteo.dir):
+ # if ifs_hres_dir doesn't point to a directory,
+ # it is the name of the mother run
+ mother_name = cfg.meteo.dir
+ cfg.meteo.dir = cfg.work_root / mother_name / cfg.chunk_id / 'cosmo' / 'output'
+ cfg.meteo.inc = 1
+ cfg.meteo.prefix = 'lffd'
+
+
+def main(cfg):
+ """
+ **COSMO Data Preparation**
+
+ This function prepares input data for COSMO simulations by creating necessary directories,
+ copying meteorological files, and handling specific data processing.
+
+ - Copy meteorological files to **int2lm** input.
+ - Create the necessary directory ``cfg.int2lm_input/meteo``.
+ - Copy meteorological files from the project directory (``cfg.meteo['dir']/cfg.meteo['prefix']YYYYMMDDHH``)
+ to the int2lm input folder on scratch (``cfg.int2lm_input/meteo``).
+ - For nested runs (meteorological files are COSMO output: ``cfg.meteo['prefix'] == 'lffd'``),
+ also copy the ``*c.nc``-file with constant parameters.
+
+ Parameters
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
+
+ Raises
+ ------
+ RuntimeError
+ If any subprocess returns a non-zero exit code during execution.
+ """
+ set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
+
+ logging.info('COSMO analysis data for IC/BC')
+
+ dest_path = cfg.int2lm_input / 'meteo'
+ tools.create_dir(dest_path, "meteo input")
+
+ source_nameformat = cfg.meteo['nameformat']
+ if cfg.meteo['prefix'] == 'lffd':
+ # nested runs use cosmoart-output as meteo data
+ # have to copy the *c.nc-file
+ src_file = (cfg.meteo['dir'] /
+ cfg.startdate_sim.strftime(source_nameformat + 'c.nc'))
+
+ tools.copy_file(src_file, dest_path, output_log=True)
+
+ logging.info("Copied constant-param file from {} to {}".format(
+ src_file, dest_path))
+
+ # extend nameformat with ending to match cosmo-output
+ source_nameformat += '.nc'
+
+ if cfg.meteo['prefix'] == 'efsf':
+ source_nameformat = cfg.meteo['prefix'] + '%y%m%d%H'
+
+ num_steps = 0
+ meteo_dir = cfg.meteo['dir']
+ subdir = meteo_dir / cfg.startdate_sim.strftime('%y%m%d%H')
+ for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim,
+ cfg.meteo['inc']):
+ dest_path = cfg.int2lm_input / 'meteo'
+ src_file = meteo_dir / time.strftime(source_nameformat)
+
+ if cfg.meteo['prefix'] == 'efsf':
+ if time == cfg.startdate_sim:
+ src_file = subdir / ('eas' + time.strftime('%Y%m%d%H'))
+ if not src_file.exists() and cfg.meteo.get('dir_alt') \
+ is not None:
+ meteo_dir = cfg.meteo['dir_alt']
+ subdir = meteo_dir / cfg.startdate_sim.strftime('%y%m%d%H')
+ src_file = subdir / ('eas' + time.strftime('%Y%m%d%H'))
+ dest_path = cfg.int2lm_input / 'meteo' / (cfg.meteo['prefix'] +
+ '00000000')
+ else:
+ td = time - cfg.startdate_sim - timedelta(hours=6 * num_steps)
+ days = str(td.days).zfill(2)
+ hours = str(td.seconds // 3600).zfill(2)
+ td_total = time - cfg.startdate_sim
+ days_total = str(td_total.days).zfill(2)
+ hours_total = str(td_total.seconds // 3600).zfill(2)
+
+ src_file = subdir / (cfg.meteo['prefix'] + days + hours +
+ '0000')
+ dest_path = cfg.int2lm_input / 'meteo' / (
+ cfg.meteo['prefix'] + days_total + hours_total + '0000')
+
+ # Next time, change directory
+ checkdir = meteo_dir / time.strftime('%y%m%d%H')
+ if checkdir.is_dir():
+ num_steps += 1
+ subdir = checkdir
+ elif cfg.meteo.get('dir_alt') is not None:
+ checkdir = cfg.meteo['dir_alt'] / time.strftime('%y%m%d%H')
+ if checkdir.is_dir():
+ num_steps += 1
+ subdir = checkdir
+ meteo_dir = cfg.meteo['dir_alt']
+ logging.info(
+ "Switching to other input directory from {} to {}".
+ format(cfg.meteo['dir'], cfg.meteo['dir_alt']))
+ elif not src_file.exists():
+ # special case for MeteoSwiss COSMO-7 data
+ archive = Path('/store/mch/msopr/owm/COSMO-7')
+ yy = time.strftime("%y")
+ path = archive / 'ANA' + yy
+ src_file = path / time.strftime(source_nameformat)
+
+ # copy meteo file from project folder to
+ tools.copy_file(src_file, dest_path, output_log=True)
+
+ logging.info("Copied file from {} to {}".format(src_file, dest_path))
+
+ # Other IC/BC data
+ inv_to_process = []
+ if hasattr(cfg, 'cams'):
+ try:
+ CAMS = dict(fullname="CAMS",
+ nickname="cams",
+ executable="cams4int2cosmo",
+ indir=cfg.cams['dir_orig'],
+ outdir=cfg.cams['dir_proc'],
+ param=[{
+ 'inc': cfg.cams['inc'],
+ 'suffix': cfg.cams['suffix']
+ }])
+ inv_to_process.append(CAMS)
+ except AttributeError:
+ pass
+ try:
+ CT = dict(fullname="CarbonTracker",
+ nickname="ct",
+ executable="ctnoaa4int2cosmo",
+ indir=cfg.ct_dir_orig,
+ outdir=cfg.ct_dir_proc,
+ param=cfg.ct_parameters)
+ inv_to_process.append(CT)
+ except AttributeError:
+ pass
+ elif hasattr(cfg, 'mozart'):
+ try:
+ MOZART = dict(fullname='MOZART',
+ nickname='mozart',
+ executable='mozart2int2lm',
+ indir=cfg.mozart_file_orig,
+ outdir=cfg.mozart_dir_proc,
+ param=[{
+ 'inc': cfg.mozart_inc,
+ 'suffix': cfg.mozart_prefix
+ }])
+ inv_to_process.append(MOZART)
+ except AttributeError:
+ pass
+
+ if inv_to_process:
+ logging.info("Processing " +
+ ", ".join([i["fullname"]
+ for i in inv_to_process]) + " data")
+
+ scratch_path = cfg.int2lm_input / 'icbc'
+ tools.create_dir(scratch_path, "icbc input")
+
+ for inv in inv_to_process:
+ logging.info(inv["fullname"] + " files")
+ tools.create_dir(inv["outdir"], "processed " + inv["fullname"])
+
+ for p in inv["param"]:
+ inc = p["inc"]
+ for time in tools.iter_hours(cfg.startdate_sim,
+ cfg.enddate_sim, inc):
+ logging.info(time)
+
+ filename = inv["outdir"] / (
+ p["suffix"] + "_" + time.strftime("%Y%m%d%H") + ".nc")
+ if not filename.exists():
+ logging.info(filename)
+ try:
+ to_call = getattr(tools, inv["executable"])
+ to_call.main(time, inv["indir"], inv["outdir"], p)
+ except:
+ logging.error("Preprocessing " + inv["fullname"] +
+ " data failed")
+ raise
+
+ # copy to (temporary) run input directory
+ tools.copy_file(filename, scratch_path, output_log=True)
+
+ logging.info("OK")
diff --git a/jobs/prepare_data.py b/jobs/prepare_data.py
deleted file mode 100644
index 894d2563..00000000
--- a/jobs/prepare_data.py
+++ /dev/null
@@ -1,704 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Prepare initial and boundary conditions
-#
-# In case of ICON:
-# Prepare input for meteorological initial and boundary conditions
-# by remapping the files onto the ICON grid (for IC) and the
-# auxillary lateral-boundary grid (for BC) with the DWD ICON tools
-# and saving them in the input folder.
-# Currently, the input files are assumed to be ifs data.
-# The files are read-in in grib2-format and the the remapped
-# files are saved in netCDF-format (currently only netCDF works
-# for ICON when then the simulation is driven by ifs-data).
-#
-# result in case of success: all meteo input-files necessary are found in
-# ${int2lm_input}/meteo/
-#
-# Dominik Brunner, July 2013
-#
-# 2013-07-16 Initial release, based on Christoph Knote script
-# 2017-01-15 Modified for hypatia and project SmartCarb
-# 2018-06-21 Translated to Python (kug)
-# 2021-02-28 Modified for ICON-simulations (stem)
-# 2021-11-12 Modified for ICON-ART-simulations (mjaehn)
-
-import os
-import logging
-import shutil
-import subprocess
-from datetime import timedelta
-import xarray as xr
-import numpy as np
-from . import tools
-from .tools.interpolate_data import create_oh_for_restart, create_oh_for_inicond
-from .tools.fetch_external_data import fetch_era5, fetch_era5_nudging
-from calendar import monthrange
-
-
-def set_cfg_variables(cfg, model_cfg):
- # TODO: Change setattr() to direct assignment
- if cfg.model.startswith('cosmo'):
- setattr(cfg, 'int2lm_root', os.path.join(cfg.chain_root, 'int2lm'))
- setattr(cfg, 'int2lm_input', os.path.join(cfg.int2lm_root, 'input'))
- elif cfg.model.startswith('icon'):
- setattr(cfg, 'icon_base', os.path.join(cfg.chain_root, 'icon'))
- setattr(cfg, 'icon_input', os.path.join(cfg.chain_root, 'icon',
- 'input'))
- setattr(cfg, 'icon_input_icbc',
- os.path.join(cfg.chain_root, 'icon', 'input', 'icbc'))
- setattr(cfg, 'icon_work', os.path.join(cfg.chain_root, 'icon', 'run'))
- setattr(cfg, 'icon_output',
- os.path.join(cfg.chain_root, 'icon', 'output'))
- setattr(cfg, 'icon_output_reduced',
- os.path.join(cfg.chain_root, 'icon', 'output_reduced'))
- setattr(cfg, 'icon_restart_out',
- os.path.join(cfg.chain_root, 'icon', 'restart'))
- setattr(cfg, 'icon_restart_in',
- os.path.join(cfg.chain_root_prev, 'icon', 'run'))
- setattr(cfg, 'icon_input_icbc_prev',
- os.path.join(cfg.chain_root_prev, 'icon', 'input', 'icbc'))
-
- cfg.input_files_scratch = {}
- for varname in cfg.input_files:
- cfg.input_files_scratch[varname] = os.path.join(
- cfg.icon_input, os.path.basename(cfg.input_files[varname]))
- cfg.create_vars_from_dicts()
-
- cfg.ini_datetime_string = cfg.startdate.strftime('%Y-%m-%dT%H:00:00Z')
- cfg.end_datetime_string = cfg.enddate.strftime('%Y-%m-%dT%H:00:00Z')
-
- if cfg.model == 'icon-art-oem':
- cfg.startdate_sim_yyyymmdd_hh = cfg.startdate_sim.strftime(
- '%Y%m%d_%H')
-
- if cfg.model == 'icon-art-global':
- # Nudge type (global or nothing)
- cfg.nudge_type = 2 if cfg.era5_global_nudging else 0
- # Time step for global nudging in seconds
- cfg.nudging_step_seconds = cfg.nudging_step * 3600
- # Prescribed initial conditions for CH4, CO and/or OH
- cfg.iart_init_gas = 4 if cfg.species_inicond else 0
-
- if cfg.lrestart == '.TRUE.':
- cfg.restart_filename = 'restart_atm_DOM01.nc'
- cfg.restart_file = os.path.join(cfg.icon_restart_in,
- cfg.restart_filename)
- cfg.restart_file_scratch = os.path.join(cfg.icon_work,
- cfg.restart_filename)
-
- return cfg
-
-
-def main(cfg, model_cfg):
- """
- **ICON**
-
- Create necessary directories ``cfg.icon_input_icbc``
- and ''cfg.icon_work''
-
- Submitting the runscript for the DWD ICON tools to remap the meteo files.
-
- All runscripts specified in ``cfg.icontools_runjobs`` are submitted.
-
- The meteo files are read-in from the original input directory
- (``cfg.input_root_meteo``) and the remapped meteo files are
- saved in the input folder on scratch (``cfg.icon_input/icbc``).
-
- The constant variable 'GEOSP' is added to the files not containing it
- using python-cdo bindings.
-
- **COSMO**
-
- Copy meteo files to **int2lm** input.
-
- Create necessary directory ``cfg.int2lm_input/meteo``. Copy meteo files
- from project directory (``cfg.meteo['dir']/cfg.meteo['prefix']YYYYMMDDHH``) to
- int2lm input folder on scratch (``cfg.int2lm_input/meteo``).
-
- For nested runs (meteo files are cosmo-output: ``cfg.meteo['prefix'] ==
- 'lffd'``), also the ``*c.nc``-file with constant parameters is copied.
-
-
- Parameters
- ----------
- startdate : datetime-object
- The start date of the simulation
- enddate : datetime-object
- The end date of the simulation
- cfg : config-object
- Object holding all user-configuration parameters as attributes
- """
-
- cfg = set_cfg_variables(cfg, model_cfg)
-
- if cfg.model.startswith('icon'):
- logging.info('ICON input data (IC/BC)')
-
- #-----------------------------------------------------
- # Create directories
- #-----------------------------------------------------
- tools.create_dir(cfg.icon_work, "icon_work")
- tools.create_dir(cfg.icon_input_icbc, "icon_input_icbc")
- tools.create_dir(cfg.icon_output, "icon_output")
- tools.create_dir(cfg.icon_restart_out, "icon_restart_out")
-
- #-----------------------------------------------------
- # Copy input files
- #-----------------------------------------------------
- for varname in cfg.input_files:
- varname_scratch = f'{varname}_scratch'
- tools.copy_file(cfg.input_files[varname],
- cfg.input_files_scratch[varname],
- output_log=True)
-
- if cfg.model == 'icon-art-global':
- # -- Download ERA5 data and create the inicond file
- if cfg.era5_inicond and cfg.lrestart == '.FALSE.':
- # -- Fetch ERA5 data
- fetch_era5(cfg.startdate_sim, cfg.icon_input_icbc)
-
- # -- Copy ERA5 processing script (icon_era5_inicond.job) in workdir
- with open(cfg.icon_era5_inijob) as input_file:
- to_write = input_file.read()
- output_file = os.path.join(cfg.icon_input_icbc,
- 'icon_era5_inicond.sh')
- with open(output_file, "w") as outf:
- outf.write(to_write.format(cfg=cfg))
-
- # -- Copy mypartab in workdir
- shutil.copy(
- os.path.join(os.path.dirname(cfg.icon_era5_inijob),
- 'mypartab'),
- os.path.join(cfg.icon_input_icbc, 'mypartab'))
-
- # -- Run ERA5 processing script
- process = subprocess.Popen([
- "bash",
- os.path.join(cfg.icon_input_icbc, 'icon_era5_inicond.sh')
- ],
- stdout=subprocess.PIPE)
- process.communicate()
-
- # -----------------------------------------------------
- # Create tracer initial conditions
- # -----------------------------------------------------
-
- # -- Download and add CAMS data to the inicond file if needed
- if cfg.species_inicond:
-
- if cfg.lrestart == '.FALSE.':
-
- ext_restart = ''
- filename = cfg.input_files_scratch_inicond_filename
-
- # -- Copy the script for processing external tracer data in workdir
- with open(
- os.path.join(
- cfg.case_path,
- cfg.icon_species_inijob)) as input_file:
- to_write = input_file.read()
- output_file = os.path.join(cfg.icon_input_icbc,
- cfg.icon_species_inijob)
- with open(output_file, "w") as outf:
- outf.write(
- to_write.format(cfg=cfg,
- filename=filename,
- ext_restart=ext_restart,
- year=cfg.startdate_sim.year,
- month=cfg.startdate_sim.month,
- day=cfg.startdate_sim.day))
-
- # -- Run ERA5 processing script
- process = subprocess.Popen(["bash", output_file],
- stdout=subprocess.PIPE)
- process.communicate()
-
- # -- Create initial conditions for OH concentrations
- if 'TROH' in cfg.species2restart:
- create_oh_for_inicond(cfg, cfg.startdate_sim.month)
-
- else:
-
- # -- Check the extension of tracer variables in the restart file
- ds_restart = xr.open_dataset(cfg.restart_file)
- tracer_name = cfg.species2restart[0]
- # FIXME:
- # var_restart = [
- # IndexError: list index out of range
- # var_restart = [
- # var for var in ds_restart.data_vars.keys()
- # if var.startswith(tracer_name)
- # ][0]
- # ext_restart = var_restart.replace(tracer_name, '')
-
- # -- Change OH concentrations in the restart file
- # if 'TROH' in cfg.species2restart:
- # create_oh_for_restart(cfg, cfg.startdate_sim.month,
- # ext_restart)
-
- # -----------------------------------------------------
- # Create meteorological and tracer nudging conditions
- # -----------------------------------------------------
-
- # -- If global nudging, download and process ERA5 and CAMS data
- if cfg.era5_global_nudging:
-
- for time in tools.iter_hours(cfg.startdate_sim,
- cfg.enddate_sim,
- step=cfg.nudging_step):
-
- # -- Give a name to the nudging file
- timestr = time.strftime('%Y%m%d%H')
- filename = 'era2icon_R2B03_{timestr}_nudging.nc'.format(
- timestr=timestr)
-
- # -- If initial time, copy the initial conditions to be used as boundary conditions
- if time == cfg.startdate_sim and cfg.era5_inicond:
- shutil.copy(
- cfg.input_files_scratch_inicond_filename,
- os.path.join(cfg.icon_input_icbc, filename))
- continue
-
- # -- Fetch ERA5 data
- fetch_era5_nudging(time, cfg.icon_input_icbc)
-
- # -- Copy ERA5 processing script (icon_era5_nudging.job) in workdir
- with open(cfg.icon_era5_nudgingjob) as input_file:
- to_write = input_file.read()
- output_file = os.path.join(
- cfg.icon_input_icbc,
- 'icon_era5_nudging_{}.sh'.format(timestr))
- with open(output_file, "w") as outf:
- outf.write(to_write.format(cfg=cfg, filename=filename))
-
- # -- Copy mypartab in workdir
- if not os.path.exists(
- os.path.join(cfg.icon_input_icbc, 'mypartab')):
- shutil.copy(
- os.path.join(
- os.path.dirname(cfg.icon_era5_nudgingjob),
- 'mypartab'),
- os.path.join(cfg.icon_input_icbc, 'mypartab'))
-
- # -- Run ERA5 processing script
- process = subprocess.Popen([
- "bash",
- os.path.join(cfg.icon_input_icbc,
- 'icon_era5_nudging_{}.sh'.format(timestr))
- ],
- stdout=subprocess.PIPE)
- process.communicate()
-
- if cfg.species_global_nudging:
-
- # -- Copy CAMS processing script (icon_cams_nudging.job) in workdir
- with open(cfg.icon_species_nudgingjob) as input_file:
- to_write = input_file.read()
- output_file = os.path.join(
- cfg.icon_input_icbc,
- 'icon_cams_nudging_{}.sh'.format(timestr))
- with open(output_file, "w") as outf:
- outf.write(
- to_write.format(cfg=cfg, filename=filename))
-
- # -- Run ERA5 processing script
- process = subprocess.Popen([
- "bash",
- os.path.join(
- cfg.icon_input_icbc,
- 'icon_cams_nudging_{}.sh'.format(timestr))
- ],
- stdout=subprocess.PIPE)
- process.communicate()
-
- else: # non-global ICON-ART
- #-----------------------------------------------------
- # Create LBC datafile lists (each at 00 UTC and others)
- #-----------------------------------------------------
- datafile_list = []
- datafile_list_rest = []
- datafile_list_chem = []
- for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim,
- cfg.meteo['inc']):
- meteo_file = os.path.join(
- cfg.icon_input_icbc, cfg.meteo['prefix'] +
- time.strftime(cfg.meteo['nameformat']))
- if cfg.model == 'icon-art' or cfg.model == 'icon-art-oem':
- chem_file = os.path.join(
- cfg.icon_input_icbc, cfg.chem['prefix'] +
- time.strftime(cfg.chem_nameformat))
- datafile_list_chem.append(chem_file + cfg.chem['suffix'])
- if meteo_file.endswith('00'):
- datafile_list.append(meteo_file + cfg.meteo['suffix'])
- else:
- datafile_list_rest.append(meteo_file + cfg.meteo['suffix'])
- datafile_list = ' '.join([str(v) for v in datafile_list])
- datafile_list_rest = ' '.join([str(v) for v in datafile_list_rest])
- datafile_list_chem = ' '.join([str(v) for v in datafile_list_chem])
-
- #-----------------------------------------------------
- # Write and submit runscripts
- #-----------------------------------------------------
- for runscript in cfg.icontools_runjobs:
- logfile = os.path.join(cfg.log_working_dir, 'prepare_data')
- logfile_finish = os.path.join(cfg.log_finished_dir,
- 'prepare_data')
- with open(os.path.join(cfg.case_path,
- runscript)) as input_file:
- to_write = input_file.read()
- output_run = os.path.join(cfg.icon_work, "%s.job" % runscript)
- with open(output_run, "w") as outf:
- outf.write(
- to_write.format(cfg=cfg,
- meteo=cfg.meteo,
- logfile=logfile,
- logfile_finish=logfile_finish,
- datafile_list=datafile_list,
- datafile_list_rest=datafile_list_rest,
- datafile_list_chem=datafile_list_chem))
- logging.info(f" Starting icontools runscript {runscript}.")
- result = subprocess.run([
- "sbatch", "--wait",
- os.path.join(cfg.icon_work, "%s.job" % runscript)
- ])
- exitcode = result.returncode
- if exitcode != 0:
- raise RuntimeError(
- "sbatch returned exitcode {}".format(exitcode))
- logging.info(f"{runscript} successfully executed.")
-
- #-----------------------------------------------------
- # Add GEOSP to all meteo files
- #-----------------------------------------------------
- for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim,
- cfg.meteo['inc']):
- # Specify file names
- geosp_filename = time.replace(
- hour=0).strftime(cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) + '_lbc.nc'
- geosp_file = os.path.join(cfg.icon_input_icbc, geosp_filename)
- src_filename = time.strftime(
- cfg.meteo['prefix'] + cfg.meteo['nameformat']) + '_lbc.nc'
- src_file = os.path.join(cfg.icon_input_icbc, src_filename)
- merged_filename = time.strftime(
- cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) + '_merged.nc'
- merged_file = os.path.join(cfg.icon_input_icbc,
- merged_filename)
-
- # Copy GEOSP file from last run if not present
- if not os.path.exists(geosp_file):
- geosp_src_file = os.path.join(cfg.icon_input_icbc_prev,
- geosp_filename)
- tools.copy_file(geosp_src_file,
- cfg.icon_input_icbc,
- output_log=True)
-
- # Load GEOSP data array as da_geosp at time 00:
- ds = xr.open_dataset(src_file)
- ds_geosp = xr.open_dataset(geosp_file)
- da_geosp = ds_geosp['GEOSP']
-
- # Merge GEOSP-dataset with other timesteps
- if (time.hour != 0):
- # Change values of time dimension to current time
- da_geosp = da_geosp.assign_coords(
- time=[np.datetime64(time)])
- # Merge GEOSP into temporary file
- ds_merged = xr.merge([ds, da_geosp])
- ds_merged.attrs = ds.attrs
- ds_merged.to_netcdf(merged_file)
- # Logging info for merging GEOSP
- logging.info("Added GEOSP to file {}".format(merged_file))
- # Rename file to get original file name
- tools.rename_file(merged_file, src_file)
-
- #-----------------------------------------------------
- # Add Q (copy of QV) and/or PS to initial file
- #-----------------------------------------------------
- if cfg.model.startswith('icon-art'):
- meteo_file = os.path.join(
- cfg.icon_input_icbc,
- cfg.startdate_sim.strftime(cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) +
- '.nc')
- if os.path.isfile(meteo_file):
- merged_file = os.path.join(
- cfg.icon_input_icbc,
- cfg.startdate_sim.strftime(cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) +
- '_merged.nc')
- ds = xr.open_dataset(meteo_file)
- merging = False
- if 'PS' not in ds:
- if 'LNPS' not in ds:
- raise KeyError(
- f"'LNPS' must be found in the initial conditions file {meteo_file}"
- )
- merging = True
- ds['PS'] = ds['LNPS']
- ds['PS'].attrs = ds['LNPS'].attrs
- ds['PS'] = np.exp(ds['PS'])
- ds['PS'] = ds['PS'].squeeze(dim='lev_2')
- ds['PS'].attrs["long_name"] = 'surface pressure'
- ds['PS'].attrs['units'] = 'Pa'
- logging.info(f"Added PS to file {meteo_file}")
- if 'Q' not in ds:
- merging = True
- ds['Q'] = ds['QV']
- logging.info(f"Added Q to file {meteo_file}")
- if merging:
- ds.to_netcdf(merged_file)
- tools.rename_file(merged_file, meteo_file)
-
- #-----------------------------------------------------
- # In case of OEM: merge chem tracers with meteo-files
- #-----------------------------------------------------
- if cfg.model == 'icon-art-oem':
- for time in tools.iter_hours(cfg.startdate_sim,
- cfg.enddate_sim,
- cfg.meteo['inc']):
- if time == cfg.startdate_sim:
- #------------
- # Merge IC:
- #------------
- meteo_file = os.path.join(
- cfg.icon_input_icbc,
- time.strftime(cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) + '.nc')
- if os.path.isfile(meteo_file):
- chem_file = os.path.join(
- cfg.icon_input_icbc, cfg.chem['prefix'] +
- time.strftime(cfg.chem['nameformat']) + '.nc')
- merged_file = os.path.join(
- cfg.icon_input_icbc,
- time.strftime(cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) +
- '_merged.nc')
- ds_meteo = xr.open_dataset(meteo_file)
- ds_chem = xr.open_dataset(chem_file)
- # LNPS --> PS
- ds_chem['PS'] = ds_chem['LNPS']
- ds_chem['PS'].attrs = ds_chem['LNPS'].attrs
- ds_chem['PS'] = ds_chem['PS'].squeeze(dim='lev_2')
- ds_chem['PS'].attrs[
- "long_name"] = 'surface pressure'
- # merge:
- ds_merged = xr.merge([ds_meteo, ds_chem],
- compat="override")
- #ds_merged.attrs = ds.attrs
- ds_merged.to_netcdf(merged_file)
- # Rename file to get original file name
- tools.rename_file(merged_file, meteo_file)
- tools.remove_file(chem_file)
- logging.info(
- "Added chemical tracer to file {}".format(
- merged_file))
-
- #------------
- # Merge LBC:
- #------------
- meteo_file = os.path.join(
- cfg.icon_input_icbc,
- time.strftime(cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) + '_lbc.nc')
- chem_file = os.path.join(
- cfg.icon_input_icbc, cfg.chem['prefix'] +
- time.strftime(cfg.chem_nameformat) + '_lbc.nc')
- merged_file = os.path.join(
- cfg.icon_input_icbc,
- time.strftime(cfg.meteo['prefix'] +
- cfg.meteo['nameformat']) + '_merged.nc')
- ds_meteo = xr.open_dataset(meteo_file)
- ds_chem = xr.open_dataset(chem_file)
- # LNPS --> PS
- ds_chem['PS'] = ds_chem['LNPS']
- ds_chem['PS'].attrs = ds_chem['LNPS'].attrs
- ds_chem['PS'].attrs["long_name"] = 'surface pressure'
- ds_chem['TRCH4_chemtr'] = ds_chem['CH4_BG']
- # merge:
- ds_merged = xr.merge([ds_meteo, ds_chem],
- compat="override")
- #ds_merged.attrs = ds.attrs
- ds_merged.to_netcdf(merged_file)
- # Rename file to get original file name
- tools.rename_file(merged_file, meteo_file)
- tools.remove_file(chem_file)
- logging.info(
- "Added chemical tracer to file {}".format(merged_file))
-
- # If COSMO (and not ICON):
- else:
- logging.info('COSMO analysis data for IC/BC')
-
- dest_path = os.path.join(cfg.int2lm_input, 'meteo')
- tools.create_dir(dest_path, "meteo input")
-
- source_nameformat = cfg.meteo['nameformat']
- if cfg.meteo['prefix'] == 'lffd':
- # nested runs use cosmoart-output as meteo data
- # have to copy the *c.nc-file
- src_file = os.path.join(
- cfg.meteo['dir'],
- cfg.startdate_sim.strftime(source_nameformat + 'c.nc'))
-
- tools.copy_file(src_file, dest_path, output_log=True)
-
- logging.info("Copied constant-param file from {} to {}".format(
- src_file, dest_path))
-
- # extend nameformat with ending to match cosmo-output
- source_nameformat += '.nc'
-
- if cfg.meteo['prefix'] == 'efsf':
- source_nameformat = cfg.meteo['prefix'] + '%y%m%d%H'
-
- num_steps = 0
- meteo_dir = cfg.meteo['dir']
- subdir = os.path.join(meteo_dir,
- cfg.startdate_sim.strftime('%y%m%d%H'))
- for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim,
- cfg.meteo['inc']):
- dest_path = os.path.join(cfg.int2lm_input, 'meteo')
- src_file = os.path.join(meteo_dir,
- time.strftime(source_nameformat))
-
- if cfg.meteo['prefix'] == 'efsf':
- if time == cfg.startdate_sim:
- src_file = os.path.join(subdir,
- 'eas' + time.strftime('%Y%m%d%H'))
- if not os.path.isfile(src_file) and cfg.meteo.get('dir_alt') \
- is not None:
- meteo_dir = cfg.meteo['dir_alt']
- subdir = os.path.join(
- meteo_dir, cfg.startdate_sim.strftime('%y%m%d%H'))
- src_file = os.path.join(
- subdir, 'eas' + time.strftime('%Y%m%d%H'))
- dest_path = os.path.join(cfg.int2lm_input, 'meteo',
- cfg.meteo['prefix'] + '00000000')
- else:
- td = time - cfg.startdate_sim - timedelta(hours=6 *
- num_steps)
- days = str(td.days).zfill(2)
- hours = str(td.seconds // 3600).zfill(2)
- td_total = time - cfg.startdate_sim
- days_total = str(td_total.days).zfill(2)
- hours_total = str(td_total.seconds // 3600).zfill(2)
-
- src_file = os.path.join(
- subdir, cfg.meteo['prefix'] + days + hours + '0000')
- dest_path = os.path.join(
- cfg.int2lm_input, 'meteo', cfg.meteo['prefix'] +
- days_total + hours_total + '0000')
-
- # Next time, change directory
- checkdir = os.path.join(meteo_dir,
- time.strftime('%y%m%d%H'))
- if os.path.isdir(checkdir):
- num_steps += 1
- subdir = checkdir
- elif cfg.meteo.get('dir_alt') is not None:
- checkdir = os.path.join(cfg.meteo['dir_alt'],
- time.strftime('%y%m%d%H'))
- if os.path.isdir(checkdir):
- num_steps += 1
- subdir = checkdir
- meteo_dir = cfg.meteo['dir_alt']
- logging.info(
- "Switching to other input directory from {} to {}"
- .format(cfg.meteo['dir'],
- cfg.meteo['dir_alt']))
- elif not os.path.exists(src_file):
- # special case for MeteoSwiss COSMO-7 data
- archive = '/store/mch/msopr/owm/COSMO-7'
- yy = time.strftime("%y")
- path = '/'.join([archive, 'ANA' + yy])
- src_file = os.path.join(path, time.strftime(source_nameformat))
-
- # copy meteo file from project folder to
- tools.copy_file(src_file, dest_path, output_log=True)
-
- logging.info("Copied file from {} to {}".format(
- src_file, dest_path))
-
- # Other IC/BC data
- inv_to_process = []
- if cfg.model == 'cosmo-ghg':
- try:
- CAMS = dict(fullname="CAMS",
- nickname="cams",
- executable="cams4int2cosmo",
- indir=cfg.cams['dir_orig'],
- outdir=cfg.cams['dir_proc'],
- param=[{
- 'inc': cfg.cams['inc'],
- 'suffix': cfg.cams['suffix']
- }])
- inv_to_process.append(CAMS)
- except AttributeError:
- pass
- try:
- CT = dict(fullname="CarbonTracker",
- nickname="ct",
- executable="ctnoaa4int2cosmo",
- indir=cfg.ct_dir_orig,
- outdir=cfg.ct_dir_proc,
- param=cfg.ct_parameters)
- inv_to_process.append(CT)
- except AttributeError:
- pass
- elif cfg.model == 'cosmo-art':
- try:
- MOZART = dict(fullname='MOZART',
- nickname='mozart',
- executable='mozart2int2lm',
- indir=cfg.mozart_file_orig,
- outdir=cfg.mozart_dir_proc,
- param=[{
- 'inc': cfg.mozart_inc,
- 'suffix': cfg.mozart_prefix
- }])
- inv_to_process.append(MOZART)
- except AttributeError:
- pass
-
- if cfg.model == 'cosmo-ghg' or cfg.model == 'cosmo-art':
- logging.info("Processing " +
- ", ".join([i["fullname"]
- for i in inv_to_process]) + " data")
-
- scratch_path = os.path.join(cfg.int2lm_input, 'icbc')
- tools.create_dir(scratch_path, "icbc input")
-
- for inv in inv_to_process:
- logging.info(inv["fullname"] + " files")
- tools.create_dir(inv["outdir"], "processed " + inv["fullname"])
-
- for p in inv["param"]:
- inc = p["inc"]
- for time in tools.iter_hours(cfg.startdate_sim,
- cfg.enddate_sim, inc):
- logging.info(time)
-
- filename = os.path.join(
- inv["outdir"], p["suffix"] + "_" +
- time.strftime("%Y%m%d%H") + ".nc")
- if not os.path.exists(filename):
- logging.info(filename)
- try:
- to_call = getattr(tools, inv["executable"])
- to_call.main(time, inv["indir"], inv["outdir"],
- p)
- except:
- logging.error("Preprocessing " +
- inv["fullname"] + " data failed")
- raise
-
- # copy to (temporary) run input directory
- tools.copy_file(filename,
- scratch_path,
- output_log=True)
-
- logging.info("OK")
diff --git a/jobs/prepare_icon.py b/jobs/prepare_icon.py
new file mode 100644
index 00000000..ca3d6872
--- /dev/null
+++ b/jobs/prepare_icon.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pathlib import Path
+import logging
+from . import tools
+
+BASIC_PYTHON_JOB = True
+
+
+def set_cfg_variables(cfg):
+ cfg.icon_base = cfg.chain_root / 'icon'
+ cfg.icon_input = cfg.icon_base / 'input'
+ cfg.icon_input_icbc = cfg.icon_input / 'icbc'
+ cfg.icon_work = cfg.icon_base / 'run'
+ cfg.icon_output = cfg.icon_base / 'output'
+ cfg.icon_output_reduced = cfg.icon_base / 'output_reduced'
+ cfg.icon_restart_out = cfg.icon_base / 'restart'
+ if cfg.chunk_id_prev:
+ cfg.icon_restart_in = cfg.chain_root_prev / 'icon' / 'run'
+ cfg.icon_input_icbc_prev = cfg.chain_root_prev / 'icon' / 'input' / 'icbc'
+
+ cfg.input_files_scratch = {}
+ for dsc, file in cfg.input_files.items():
+ cfg.input_files[dsc] = (p := Path(file))
+ cfg.input_files_scratch[dsc] = cfg.icon_input / p.name
+
+ cfg.create_vars_from_dicts()
+
+ cfg.ini_datetime_string = cfg.startdate.strftime('%Y-%m-%dT%H:00:00Z')
+ cfg.end_datetime_string = cfg.enddate.strftime('%Y-%m-%dT%H:00:00Z')
+
+ if cfg.lrestart == '.TRUE.':
+ cfg.restart_filename = 'restart_atm_DOM01.nc'
+ cfg.restart_file = cfg.icon_restart_in / cfg.restart_filename
+ cfg.restart_file_scratch = cfg.icon_work / cfg.restart_filename
+
+ # Nudge type (global or nothing)
+ cfg.nudge_type = 2 if hasattr(cfg,
+ 'era5') and cfg.era5_global_nudging else 0
+ # Time step for global nudging in seconds
+ cfg.nudging_step_seconds = cfg.nudging_step * 3600 if hasattr(
+ cfg, 'nudging_step') else None
+ # Prescribed initial conditions for CH4, CO and/or OH
+ cfg.iart_init_gas = 4 if hasattr(
+ cfg, 'species_inicond') and cfg.species_inicond else 0
+
+ cfg.startdate_sim_yyyymmdd_hh = cfg.startdate_sim.strftime('%Y%m%d_%H')
+
+
+def main(cfg):
+ """
+ **ICON Data Preparation**
+
+ This function prepares input data for ICON simulations by creating necessary directories,
+ copying meteorological files, and handling specific data processing.
+
+ - Create working directories and copy input files
+
+ Parameters
+ ----------
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
+
+ Raises
+ ------
+ RuntimeError
+ If any subprocess returns a non-zero exit code during execution.
+ """
+ set_cfg_variables(cfg)
+ tools.change_logfile(cfg.logfile)
+
+ # Create directories
+ tools.create_dir(cfg.icon_work, "icon_work")
+ tools.create_dir(cfg.icon_input_icbc, "icon_input_icbc")
+ tools.create_dir(cfg.icon_output, "icon_output")
+ tools.create_dir(cfg.icon_restart_out, "icon_restart_out")
+
+ logging.info('Copy ICON input data (IC/BC) to working directory')
+ # Copy input files to scratch
+ script_lines = [
+ '#!/usr/bin/env bash',
+ f'#SBATCH --job-name="copy_input_{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}"',
+ f'#SBATCH --account={cfg.compute_account}', '#SBATCH --time=00:10:00',
+ f'#SBATCH --partition={cfg.compute_queue}',
+ f'#SBATCH --constraint={cfg.constraint}', '#SBATCH --nodes=1',
+ f'#SBATCH --output={cfg.logfile}', '#SBATCH --open-mode=append',
+ f'#SBATCH --chdir={cfg.icon_work}', ''
+ ]
+ for target, destination in zip(cfg.input_files.values(),
+ cfg.input_files_scratch.values()):
+ script_lines.append(f'rsync -av {target} {destination}')
+
+ with (script := cfg.icon_work / 'copy_input.job').open('w') as f:
+ f.write('\n'.join(script_lines))
+
+ cfg.submit('prepare_icon', script)
+ logging.info("OK")
diff --git a/jobs/reduce_output.py b/jobs/reduce_output.py
index c0ef560a..af68b2cc 100644
--- a/jobs/reduce_output.py
+++ b/jobs/reduce_output.py
@@ -15,15 +15,17 @@
from . import tools
+BASIC_PYTHON_JOB = True
-def main(startdate, enddate, cfg, model_cfg):
+
+def main(cfg):
"""
Calculates 2D column data and writes them into a new netCDF file.
Only a fixed number of levels from **COSMO** output are considered.
Those files are written into a new directory ``cosmo_output_reduced``.
The number of levels is set by the configuration variable
- ``cfg.output_levels`` (default = all levels).
+ ``cfg.reduce_output['output_levels']`` (default = all levels).
**Important**: If several ``GRIBOUT`` sections are used to split the output
data, then this code only works in case of the following:
@@ -39,16 +41,10 @@ def main(startdate, enddate, cfg, model_cfg):
Parameters
----------
- starttime : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the starttime
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
"""
-
+ tools.change_logfile(cfg.logfile)
cosmo_output = cfg.cosmo_output
output_path = cfg.cosmo_output_reduced
@@ -73,6 +69,12 @@ def main(startdate, enddate, cfg, model_cfg):
# Wait for Cosmo to finish first
tools.check_job_completion(cfg.log_finished_dir, "cosmo")
+
+ # Number of levels and switch for unit conversion for 'reduce_output' job
+ if not hasattr(cfg, 'output_levels'):
+ cfg.output_levels = -1
+ if not hasattr(cfg, 'convert_gas'):
+ cfg.convert_gas = True
"""Get list of constant files"""
cfiles = []
read_cfile = False
@@ -118,16 +120,15 @@ def main(startdate, enddate, cfg, model_cfg):
py_file = os.path.join(tool_path, 'reduce_output_start_end.py')
alternate_csv_file = os.path.join(cfg.chain_src_dir, 'cases', cfg.casename,
'variables.csv')
- logfile = os.path.join(cfg.log_working_dir, 'reduce_output')
logging.info('Submitting job to the queue...')
result = subprocess.run([
- "sbatch", '--output=' + logfile, '--open-mode=append', '--wait',
+ "sbatch", '--output=' + cfg.logfile, '--open-mode=append', '--wait',
bash_file, py_file, cosmo_output, output_path, str_startdate,
str_enddate,
- str(cfg.output_levels),
+ str(cfg.reduce_output['output_levels']),
str(output_step), alternate_csv_file,
- str(cfg.convert_gas)
+ str(cfg.reduce_output['convert_gas'])
])
exitcode = result.returncode
diff --git a/jobs/tools/ICON_to_point.py b/jobs/tools/ICON_to_point.py
new file mode 100644
index 00000000..65371be1
--- /dev/null
+++ b/jobs/tools/ICON_to_point.py
@@ -0,0 +1,355 @@
+import numpy as np
+import xarray as xr
+from sklearn.neighbors import BallTree
+from scipy import argmin
+import argparse
+
+
+def get_horizontal_distances(longitude, latitude, icon_grid_path, k=5):
+ """
+ Get horizontal distances between points and their k nearest
+ neighbours on the ICON grid using a quick BallTree algorithm
+
+ Parameters
+ ----------
+ longitude : list or 1D np.array
+ e.g., [12] or np.array([12,13,14])
+
+ latitude : list or 1D np.array
+ e.g., [52] or np.array([52,53,54])
+
+ icon_grid_path : str
+ Contains the path to the ICON grid
+
+ k : int, default is 5
+ Sets the number of nearest neighbours desired
+
+ Returns
+ -------
+ distances: 2D np.array
+ Contains the distance-on-a-sphere between the target point(s)
+ and its nearest neighbours
+
+ indices: 2D np.array
+ Contains the indices to the ICON grid cells of the corresponding
+ nearest neighbours
+ """
+ # Get ICON grid specifics
+ icon_grid = xr.open_dataset(icon_grid_path)
+ clon = icon_grid.clon.values
+ clat = icon_grid.clat.values
+
+ # Generate BallTree
+ icon_lat_lon = np.column_stack([clat, clon])
+ tree = BallTree(icon_lat_lon, metric='haversine')
+
+ # Query BallTree
+ target_lat_lon = np.column_stack(
+ [np.deg2rad(latitude), np.deg2rad(longitude)])
+ (distances, indices) = tree.query(target_lat_lon,
+ k=k,
+ return_distance=True)
+
+ if np.any(distances == 0):
+ print(
+ 'The longitude/latitude coincides identically with an ICON cell, which is an issue for the inverse distance weighting.'
+ )
+ print('I will slightly modify this value to avoid errors.')
+ distances[distances == 0] = 1e-12
+
+ if np.any(distances is np.nan):
+ raise ValueError(
+ 'The distance between ICON and your lat/lon point could not be established...'
+ )
+
+ # NB: the 'distances' are in units of radians; i.e., it assumes the Earth is a unit sphere!
+ # To get realistic distances, you need to multiply 'distances' with 6371e3 meters, i.e., the
+ # radius of the earth. However, such a constant factor cancels out when we compute the
+ # horizontal interpolation weights (which are normalized!), so there is no need to apply the
+ # multiplication with 6371e3.
+
+ return distances, indices
+
+
+def get_nearest_vertical_distances(model_topography, model_levels,
+ base_height_msl, inlet_height_agl,
+ interpolation_strategy):
+ """
+ Get the 2 nearest distances between ICON grid points and specified
+ station altitudes
+
+ Parameters
+ ----------
+ model_topography : 1D np.array
+ This is the elevation over mean sea level of the ICON grid
+
+ model_levels : 2D np.array
+ Dimensions [ICON_heights, number_of_samples]
+
+ base_height_msl : list or 1D np.array
+ e.g., [20,] or np.array([72,180,40])
+
+ inlet_height_agl : list or 1D np.array
+ e.g., [15,] or np.array([15, 21, 42])
+
+ interpolation_strategy : list of strings
+ e.g., ['ground',] or ['ground','mountain','ground']
+ Can be 'ground' or 'mountain', or 'middle' (the latter is between the ground and mountain approach)
+ 'ground' uses the model topography + station altitude over ground
+ 'mountain' uses the absolute altitude over mean sea level
+
+ Returns
+ -------
+ vertical_distances : 3D np.array
+ Contains the absolute (!) distance between the target point(s)
+ and its 2 nearest neighbour levels
+
+ vertical_indices: 3D np.array
+ Contains the indices to the ICON height levels of the corresponding 2
+ nearest neighbour levels
+ """
+ # Get the target sampling altitude with a list comprehension
+ target_altitude = [
+ model_topography.isel({
+ "station": i
+ }).values + inlet_height_agl[i] if strategy == 'ground' else
+ np.repeat(base_height_msl[i], model_topography.shape[1]) +
+ inlet_height_agl[i] if strategy == 'mountain' else
+ np.repeat(base_height_msl[i], model_topography.shape[1]) / 2 +
+ model_topography.isel({
+ "station": i
+ }).values / 2 + inlet_height_agl[i]
+ # if strategy=='middle'
+ for (i, strategy) in enumerate(interpolation_strategy)
+ ]
+ target_altitude = xr.DataArray(target_altitude, dims=['station', 'ncells'])
+
+ # Select 2 closest neighbouring levels
+ first_negative = (model_levels <= target_altitude).argmax(
+ dim=model_levels.dims[0]) # First index where model lies below target
+ vertical_indices = np.stack(
+ [first_negative, first_negative - 1],
+ axis=0) # Second index thus lies /above/ the target
+ vertical_indices[:, first_negative == 0] = model_levels.values.shape[
+ 0] - 1 # If no result found: sample lies below lowest model level. Set it to the lowest model level
+
+ # Sample the corresponding vertical distances between the target and the model levels
+ vertical_distances = np.take_along_axis(
+ (model_levels - target_altitude).values, vertical_indices, axis=0)
+
+ return np.abs(vertical_distances).T, vertical_indices.T
+
+
+def icon_to_point(longitude,
+ latitude,
+ inlet_height_agl,
+ base_height_msl,
+ icon_field_path,
+ icon_grid_path,
+ interpolation_strategy,
+ k=5,
+ field_name=None):
+ """
+ Function to interpolate ICON fields to point locations
+
+ Parameters
+ ----------
+ longitude : list or 1D np.array
+ e.g., [12,] or np.array([12,13,14])
+
+ latitude : list or 1D np.array
+ e.g., [52,] or np.array([52,53,54])
+
+ inlet_height_agl : list or 1D np.array
+ e.g., [20,] or np.array([72,180,40])
+ This is the height of the *base station over mean sea level*
+ (e.g., for Cabau: base_height_msl=0,
+ inlet_height_agl=27)
+
+ base_height_msl : list or 1D np.array
+ e.g., [15,] or np.array([15, 21, 42])
+ This is the altitude of the *station above the ground*
+ (e.g., for Jungfraujoch: base_height_msl=3850,
+ inlet_height_agl=5)
+
+ icon_field_path : str
+ Contains the path to the unstructured ICON output
+
+ icon_grid_path : str
+ Contains the path to the ICON grid
+
+ interpolation_strategy : list of strings
+ e.g., ['ground',] or ['ground','mountain','ground']
+ Can be 'ground' or 'mountain', or 'middle' (the latter is between the ground and mountain approach)
+ 'ground' uses the model topography + station altitude over ground
+ 'mountain' uses the absolute altitude over mean sea level
+
+ k : int, default is 5
+ Sets the number of horizontal nearest neighbours desired
+
+ field_name : str, or list of strings, optional
+ e.g. 'qv', or ['qv','temp'], or None
+ If no field_name is set, the whole dataset is interpolated
+ in the vertical and horizontal directions.
+
+ Returns
+ -------
+ xr.Dataset
+ An Xarray dataset organised by 'station', containing the original
+ input specifications, and the vertically and horizontally interpolated
+ values
+ """
+
+ # Load dataset
+ icon_field = xr.open_dataset(icon_field_path)
+ # Get dimension names
+ icon_heights = icon_field.z_mc.dims[
+ 0] # Dimension name (something like "heights_5")
+ icon_cells = icon_field.z_mc.dims[
+ 1] # Dimension name (something like "ncells")
+ icon_field[icon_cells] = icon_field[
+ icon_cells] # Explicitly assign 'ncells'
+
+ # --- Horizontal grid selection & interpolation weights
+ # Get k nearest horizontal distances (for use in inverse distance weighing)
+ horizontal_distances, icon_grid_indices = get_horizontal_distances(
+ longitude, latitude, icon_grid_path, k=k)
+
+ horizontal_interp = 1 / horizontal_distances / (
+ 1 / horizontal_distances).sum(axis=1, keepdims=True)
+ weights_horizontal = xr.DataArray(horizontal_interp,
+ dims=["station", icon_cells])
+ ind_X = xr.DataArray(icon_grid_indices, dims=["station", icon_cells])
+ icon_subset = icon_field.isel({icon_cells: ind_X})
+
+ # --- Vertical level selection & interpolation weights
+ # Get 2 nearest vertical distances (for use in linear interpolation)
+ model_topography = icon_subset.z_ifc[-1]
+ model_levels = icon_subset.z_mc
+ vertical_distances, icon_level_indices = get_nearest_vertical_distances(
+ model_topography, model_levels, inlet_height_agl, base_height_msl,
+ interpolation_strategy)
+
+ vertical_interp = vertical_distances[:, :, ::-1] / (vertical_distances.sum(
+ axis=-1, keepdims=True))
+ # Say, you have the point's vertical position, and the next two model layers are positioned at [-5, +15] meters offset.
+ # Then linear interpolation between those two points is simply [15/(15+5), 5/(15+5)]=[3/4 1/4]. That is what the code does (and why it reverses the order on the last axis; and why I only need the absolute vertical distances).
+ # (As a curiosity, linear interpolation is the same as inverse distance weighting with 2 points. But this formulation is more stable than the inverse distance weighting, as divisions with 0 may otherwise occur!)
+
+ weights_vertical = xr.DataArray(vertical_interp,
+ dims=["ncells", "station", icon_heights])
+ ind_Z = xr.DataArray(icon_level_indices,
+ dims=["ncells", "station", icon_heights])
+
+ # --- Generate output
+ # Subset the ICON field if we want only a few fields of output
+ if field_name is not None:
+ icon_subset = icon_subset[field_name]
+ # Include the input station parameters in the output
+ ds = xr.Dataset({
+ 'longitude': (['station'], longitude),
+ 'latitude': (['station'], latitude),
+ 'inlet_height_agl': (['station'], inlet_height_agl),
+ 'base_height_msl': (['station'], base_height_msl),
+ 'interpolation_strategy': (['station'], interpolation_strategy)
+ })
+ # Perform the interpolations
+ icon_subset = icon_subset.isel({icon_heights: ind_Z})
+ icon_out = icon_subset.weighted(weights_vertical.fillna(0)).sum(
+ dim=icon_heights,
+ skipna=True).weighted(weights_horizontal).sum(dim=icon_cells)
+ icon_out = icon_out.where(
+ ~(weights_vertical.sum(dim=[icon_cells, icon_heights],
+ skipna=False)).isnull()
+ ) # Remove out of bounds values where weights_vertical has NaNs
+ return xr.merge([icon_out, ds])
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description='Interpolate ICON output to point locations.')
+ parser.add_argument('-lon',
+ dest='longitude',
+ default=None,
+ type=float,
+ help='Longitude of interpolation target')
+ parser.add_argument('-lat',
+ dest='latitude',
+ default=None,
+ type=float,
+ help='Latitude of interpolation target')
+ parser.add_argument(
+ '-asl',
+ dest='elevation',
+ default=None,
+ type=float,
+ help=
+ 'Station surface elevation above sea level [absolute height asl: elevation+altitude]'
+ )
+ parser.add_argument(
+ '-alt',
+ dest='altitude',
+ default=None,
+ type=float,
+ help=
+ 'Station altitude over surface [absolute height asl: elevation+altitude]'
+ )
+ parser.add_argument('-fields',
+ dest='icon_field',
+ default=None,
+ type=str,
+ help='The ICON output fields')
+ parser.add_argument('-grid',
+ dest='icon_grid',
+ default=None,
+ type=str,
+ help='The ICON grid dynamic grid file')
+ parser.add_argument(
+ '-strat',
+ dest='strategy',
+ default='ground',
+ type=str,
+ help=
+ 'The interpolation strategy (should be "mountain", "ground", or "middle")'
+ )
+ parser.add_argument(
+ '-k',
+ dest='k',
+ default=4,
+ type=int,
+ help='Number of nearest neighbours to interpolate with (e.g., 4 or 5)')
+ parser.add_argument(
+ '-field_name',
+ dest='field_name',
+ default=None,
+ type=str,
+ help='Field name to extract (if left out, all variables are extracted)'
+ )
+ parser.add_argument('-output',
+ dest='output_dest',
+ default=None,
+ type=str,
+ help='Output NetCDF destination')
+ args = parser.parse_args()
+
+ # Example run (note: most inputs should be lists, and the performance is optimized for these lists!)
+ output = icon_to_point(longitude=[
+ args.longitude,
+ ],
+ latitude=[
+ args.latitude,
+ ],
+ inlet_height_agl=[
+ args.elevation,
+ ],
+ base_height_msl=[
+ args.altitude,
+ ],
+ icon_field_path=args.icon_field,
+ icon_grid_path=args.icon_grid,
+ interpolation_strategy=[
+ args.strategy,
+ ],
+ k=args.k,
+ field_name=args.field_name)
+ output.to_netcdf(args.output_dest)
diff --git a/jobs/tools/__init__.py b/jobs/tools/__init__.py
index 6df6546c..15f39ead 100644
--- a/jobs/tools/__init__.py
+++ b/jobs/tools/__init__.py
@@ -166,14 +166,18 @@ def change_logfile(filename):
"""
fileh = logging.FileHandler(filename, 'a', delay=True)
- # log_format = logging.Formatter('%(levelname)s:%(message)s')
- # fileh.setFormatter(log_format)
+ log_format = logging.Formatter('%(levelname)s: %(message)s')
+ fileh.setFormatter(log_format)
log = logging.getLogger() # root logger
+ log.setLevel(logging.INFO) # Set the desired logging level
+
if len(log.handlers) > 0:
- log.handlers = [fileh] # set the new handler
+ # If there are existing handlers, replace them with the new handler
+ log.handlers = [fileh]
else:
- logging.basicConfig(filename=filename, level=logging.INFO)
+ # If no existing handlers, add the new handler
+ log.addHandler(fileh)
def create_dir(path, readable_name):
@@ -195,6 +199,7 @@ def create_dir(path, readable_name):
"""
try:
os.makedirs(path, exist_ok=True)
+ logging.info(f"Created {readable_name} directory at path {path}")
except (OSError, Exception) as e:
logging.error("Creating {} directory at path {} failed with {}".format(
readable_name, path,
@@ -416,11 +421,11 @@ def check_job_completion(log_finished_dir, job, waittime=3000):
Parameters
----------
- cfg : config-object
+ cfg : Config
log_finished_dir : directory for logfiles of finished jobs
- job: string of job name, e.g. "meteo"
+ job: string of job name, e.g. "prepare_icon"
waittime : time to wait (factor of .1 second)
Defaults to 3000 (300 seconds)
diff --git a/jobs/tools/check_model.py b/jobs/tools/check_model.py
index 9a1b29c1..b7c9c409 100644
--- a/jobs/tools/check_model.py
+++ b/jobs/tools/check_model.py
@@ -5,18 +5,19 @@
def check_model(cfg, model='COSMO'):
"""Check that the model specified in cfg matched the prescribed model.
- Check that cfg.model == model. If not, raises a value-error.
+ Check that cfg.workflow_name == model. If not, raises a value-error.
Ignores capitalization of the strings
Parameters
----------
- cfg : config-object
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
model : str
Prescribed model
"""
#don't care about capitalization
- if not cfg.model.lower() == model.lower():
+ if not cfg.workflow_name.lower() == model.lower():
raise ValueError("The model specified in the configuration file is {}"
", but the job only applies to {}.".format(
- cfg.model, model))
+ cfg.workflow_name, model))
diff --git a/jobs/tools/write_cosmo_input_ghg.py b/jobs/tools/write_cosmo_input_ghg.py
index e0a11ef3..f17804e0 100644
--- a/jobs/tools/write_cosmo_input_ghg.py
+++ b/jobs/tools/write_cosmo_input_ghg.py
@@ -96,6 +96,8 @@ def main(csv_filename, namelist_filename, cfg=None):
Path to the source csv-file
namelist_filename : str
Path to the namelist file that will be created
+ cfg : Config
+ Object holding all user-configuration parameters as attributes.
"""
with open(csv_filename, 'r') as csv_file:
diff --git a/jobs/verify_chain.py b/jobs/verify_chain.py
index 61e31365..05b537a9 100644
--- a/jobs/verify_chain.py
+++ b/jobs/verify_chain.py
@@ -1,17 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Job to verify the correctness of the chain. The output of the example-case is
-# compared to a reference output.
-
-# Author: dao, david.ochsner@empa.ch
-
import os
import logging
import netCDF4 as nc
from . import tools
+BASIC_PYTHON_JOB = True
+
def comp_data(dataset1, dataset2, variables):
"""Use tools.helper.datasets_equal to compare the datasets.
@@ -19,52 +16,50 @@ def comp_data(dataset1, dataset2, variables):
tools.helper.datasets_equal(dataset1, dataset2, variables, verbose=True)
-def main(cfg, model_cfg):
+def main(cfg):
"""Compare outputs of the chain to a reference.
- Looks for the reference-file in ``cfg.reference_dir``.
+ Looks for the reference-file in ``cfg.verify_chain['reference_dir']``.
- Looks for the output file in ``cfg.output_dir`` (if not ``None``), else it
+ Looks for the output file in ``cfg.verify_chain['output_dir']`` (if not ``None``), else it
goes to the output directory created by the **COSMO**-job.
- In the dict ``cfg.values_to_check``, the user specifies the names of the
+ In the dict ``cfg.verify_chain['values_to_check']``, the user specifies the names of the
files to be compared as keys, and the variables to compare as a list.
To compare the temperatures of the last output of the example case, the
- following variables should be added to the ``config.py`` file: ::
+ following variables should be added to the ``config.yaml`` file: ::
- reference_dir = os.path.join(input_root, "reference_output")
- output_dir = None
- values_to_check = {("reference_lffd2015010200.nc","lffd2015010200.nc"):
+ verify_chain['reference_dir'] = os.path.join(input_root, "reference_output")
+ verify_chain['output_dir'] = None
+ verify_chain['values_to_check'] = {("reference_lffd2015010200.nc","lffd2015010200.nc"):
['T']}
Parameters
----------
- start_time : datetime-object
- The starting date of the simulation
- hstart : int
- Offset (in hours) of the actual start from the start_time
- hstop : int
- Length of simulation (in hours)
- cfg : config-object
+ cfg : Config
Object holding all user-configuration parameters as attributes
"""
+ tools.change_logfile(cfg.logfile)
logging.info("Started verification")
- for (ref_file, run_file), variables in cfg.values_to_check.items():
+ for (ref_file,
+ run_file), variables in cfg.verify_chain['values_to_check'].items():
logging.info("Comparing " + str(variables))
# reference file location
- ref_file_path = os.path.join(cfg.reference_dir, ref_file)
+ ref_file_path = os.path.join(cfg.verify_chain['reference_dir'],
+ ref_file)
# run data location
- if cfg.output_dir is None:
+ if cfg.verify_chain['output_dir'] is None:
# Standard output location
run_file_path = os.path.join(
cfg.output_root, cfg.startdate_sim_yyyymmddhh + "_" +
cfg.enddate_sim_yyyymmddhh, "cosmo_output", run_file)
else:
# User-provided output location
- run_file_path = os.path.join(cfg.output_dir, run_file)
+ run_file_path = os.path.join(cfg.verify_chain['output_dir'],
+ run_file)
logging.info("Output file: " + str(run_file_path))
logging.info("Reference file: " + str(ref_file_path))
diff --git a/run_chain.py b/run_chain.py
index 743b54c2..2a6a43b0 100755
--- a/run_chain.py
+++ b/run_chain.py
@@ -2,32 +2,29 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
-
+import pytz
import logging
-import os
-import sys
-import time
import shutil
import argparse
-import yaml
import jobs
from jobs import tools
+from config import Config
def parse_arguments():
- """Parse command line arguments for the processing chain script.
+ """Parse command line arguments for the Processing Chain script.
Parses and retrieves command line arguments, allowing users to specify
run identifiers, jobs to execute, and various options to control the
- execution of the processing chain.
+ execution of the Processing Chain.
Returns
-------
argparse.Namespace
A namespace object containing parsed command line arguments.
"""
- parser = argparse.ArgumentParser(description="Run the processing chain.")
+ parser = argparse.ArgumentParser(description="Run the Processing Chain.")
parser.add_argument("casenames",
nargs='+',
@@ -36,15 +33,15 @@ def parse_arguments():
"to be in cases//. The runs are executed "
"sequentially in the order they're given here.")
- jobs_help = ("List of job-names to be executed. A job is a .py-"
+ jobs_help = ("List of job names to be executed. A job is a .py "
"file in jobs/ with a main()-function which "
- "handles one aspect of the processing chain, for "
+ "handles one aspect of the Processing Chain, for "
"example copying meteo-input data or launching a "
"job for int2lm. "
"Jobs are executed in the order in which they are "
"given here. "
"If no jobs are given, default jobs will be executed"
- "as defined in config/models.yaml.")
+ "as defined in config/workflows.yaml.")
parser.add_argument("-j",
"--jobs",
nargs='*',
@@ -52,27 +49,45 @@ def parse_arguments():
help=jobs_help,
default=None)
+ chunks_help = ("List of chunks to be executed. A chunk is time"
+ "frame within the total simulation period."
+ "It has the format `YYYYMMDDHH_YYYYMMDDHH`."
+ "If no chunks are given, all chunks within the"
+ "simulation period will be executed.")
+ parser.add_argument("-c",
+ "--chunks",
+ nargs='*',
+ dest="chunk_list",
+ help=chunks_help,
+ default=None)
+
+ sync_help = ("Force synchronous execution.")
+ parser.add_argument("-s",
+ "--force-sync",
+ action='store_true',
+ help=sync_help)
+
+ no_logging_help = ("Disable logging for chain_status.log.")
+ parser.add_argument("--no-logging",
+ action='store_false',
+ dest="enable_logging",
+ default=True,
+ help=no_logging_help)
+
force_help = ("Force the processing chain to redo all specified jobs,"
" even if they have been started already or were finished"
" previously. WARNING: Only logfiles get deleted,"
" other effects of a given job (copied files etc.)"
- " are simply overwritten. This may cause errors.")
+ " are simply overwritten. This may cause errors"
+ " or unexpected behavior.")
parser.add_argument("-f", "--force", action='store_true', help=force_help)
- tries_help = ("Amount of time the cosmo job is re-tried before crashing."
- " Default is 1.")
- parser.add_argument("-t",
- "--try",
- help=tries_help,
- dest="ntry",
- type=int,
- default=1)
-
resume_help = (
- "Resume the processing chain by restarting the last unfinished job."
+ "Resume the Processing Chain by restarting the last unfinished job."
" WARNING: Only the logfile gets deleted,"
" other effects of a given job (copied files etc.)"
- " are simply overwritten. This may cause errors.")
+ " are simply overwritten. This may cause errors."
+ " or unexpected behavior.")
parser.add_argument("-r",
"--resume",
help=resume_help,
@@ -84,355 +99,16 @@ def parse_arguments():
return args
-class Config():
-
- def __init__(self, casename):
- """Initialize an instance of the Config class.
-
- Initializes an instance of the Config class with user-specific
- and default attributes. The class represents a processing chain for a
- particular case, and its attributes are populated based on the provided
- `casename`.
-
- Parameters
- ----------
- casename : str
- The identifier for the case, typically specifying the configuration
- and settings to be used in the processing chain.
-
- Attributes
- ----------
- user_name : str
- The username of the current user, obtained from the 'USER' environment variable.
- email : str
- The user's email address, initially set to None and updated using the `set_email` method.
- casename : str
- The specified case name for the processing chain.
- chain_src_dir : str
- The source directory for the processing chain, typically the current working directory.
- case_path : str
- The path to the case directory under 'cases/' for the specified `casename`.
- work_root : str
- The root directory for processing chain execution, typically located under the source directory.
-
- Notes
- -----
- The method also loads user-defined attributes from the configuration file,
- sets specific settings based on the node type ('gpu' or 'mc'), and initializes
- other instance-specific attributes.
- """
- # Global attributes (initialized with default values)
- self.user_name = os.environ['USER']
- self.set_email()
- self.casename = casename
- self.set_account()
-
- self.chain_src_dir = os.getcwd()
- self.case_path = os.path.join(self.chain_src_dir, 'cases',
- self.casename)
- self.work_root = os.path.join(self.chain_src_dir, 'work')
-
- # User-defined attributes from config file
- self.load_config_file(casename)
-
- # Specific settings based on the node type ('gpu' or 'mc')
- self.set_node_info()
-
- def load_config_file(self, casename):
- """Load configuration settings from a YAML file and set them as attributes.
-
- This method reads the configuration settings from a YAML file located in
- the 'cases/casename' directory and sets them as attributes of the instance.
-
- Parameters
- ----------
- casename : str
- Name of the folder in 'cases/' where the configuration files are stored.
-
- Returns
- -------
- Config
- The same `Config` instance with configuration settings as attributes.
-
- Raises
- ------
- FileNotFoundError
- If the specified configuration file or case directory is not found.
-
- Notes
- -----
- If the configuration file does not exist, the method will attempt to suggest
- a similar case directory based on a Levenshtein distance comparison with
- existing case directories. The method directly assigns values from the
- configuration file to instance attributes for easy access.
- """
- cfg_file = os.path.join('cases', casename, 'config.yaml')
-
- if not os.path.isfile(cfg_file):
- all_cases = [
- path.name for path in os.scandir('cases') if path.is_dir()
- ]
- closest_name = min([(tools.levenshtein(casename, name), name)
- for name in all_cases],
- key=lambda x: x[0])[1]
- raise FileNotFoundError(
- f"Case-directory '{casename}' not found, did you mean '{closest_name}'?"
- )
-
- try:
- with open(cfg_file, 'r') as yaml_file:
- cfg_data = yaml.load(yaml_file, Loader=yaml.FullLoader)
- except FileNotFoundError:
- raise FileNotFoundError(
- f"No file 'config.yaml' in {os.path.dirname(cfg_file)}")
-
- # Directly assign values to instance attributes
- for key, value in cfg_data.items():
- setattr(self, key, value)
-
- return self
-
- def set_account(self):
- """Set the compute account based on user information.
-
- This method determines the compute account to be used based on the user's
- name and system configuration.
-
- Returns
- -------
- Config
- The same `Config` instance with the `compute_account` attribute set.
-
- Notes
- -----
- - If the user name is 'jenkins', the compute account is set to 'g110' for
- Jenkins testing.
- - If an account is specified in the user's '~/.acct' file, it will be used
- as the compute account.
- - If neither of the above conditions is met, the standard account is
- determined using the 'id -gn' command.
- """
- if self.user_name == 'jenkins':
- # g110 account for Jenkins testing
- self.compute_account = 'g110'
- elif os.path.exists(os.environ['HOME'] + '/.acct'):
- # Use account specified in ~/.acct file
- with open(os.environ['HOME'] + '/.acct', 'r') as file:
- self.compute_account = file.read().rstrip()
- else:
- # Use standard account
- self.compute_account = os.popen("id -gn").read().splitlines()[0]
-
- return self
-
- def set_node_info(self):
- """Set node-specific information based on configuration settings.
-
- This method configures node-specific settings, such as the number of tasks
- per node and CUDA-related environment variables, based on the provided
- configuration settings in the instance.
-
- Returns
- -------
- Config
- The same `Config` instance with updated node-specific attributes.
-
- Raises
- ------
- ValueError
- If the 'constraint' or 'run_on' configuration values are invalid.
- """
- if self.constraint == 'gpu':
- if self.model.startswith('icon'):
- if self.run_on == 'gpu':
- self.ntasks_per_node = 1
- elif self.run_on == 'cpu':
- self.ntasks_per_node = 12
- else:
- raise ValueError(
- "Invalid value for 'run_on' in the configuration."
- "It should be either 'gpu' or 'cpu'.")
- else:
- self.ntasks_per_node = 12
- self.mpich_cuda = ('export MPICH_RDMA_ENABLED_CUDA=1\n'
- 'export MPICH_G2G_PIPELINE=256\n'
- 'export CRAY_CUDA_MPS=1\n')
- elif self.constraint == 'mc':
- self.ntasks_per_node = 36
- self.mpich_cuda = ''
- else:
- raise ValueError(
- "Invalid value for 'constraint' in the configuration."
- "It should be either 'gpu' or 'mc'.")
-
- return self
-
- def set_restart_step_hours(self):
- """Set the restart step in hours.
-
- Converts the 'restart_step' attribute, which is in ISO8601 duration format,
- to hours and stores the result in the 'restart_step_hours' attribute.
-
- Returns
- -------
- Config
- The same `Config` instance with the 'restart_step_hours' attribute set.
- """
- self.restart_step_hours = int(
- tools.iso8601_duration_to_hours(self.restart_step))
-
- return self
-
- def set_email(self):
- """Set the user's email address based on system configuration.
-
- This method determines the user's email address based on the user's name
- and system configuration.
-
- Returns
- -------
- Config
- The same `Config` instance with the `user_mail` attribute set.
-
- Notes
- -----
- - If the user name is 'jenkins', the user's email address is set to None.
- - If an email address is specified in the user's '~/.forward' file, it will
- be used as the user's email address.
- - If neither of the above conditions is met, the user's email address is set
- to None.
- """
- if self.user_name == 'jenkins':
- self.user_mail = None
- elif os.path.exists(os.environ['HOME'] + '/.forward'):
- with open(os.environ['HOME'] + '/.forward', 'r') as file:
- self.user_mail = file.read().rstrip()
- else:
- self.user_mail = None
-
- return self
-
- def print_config(self):
- """Print the configuration attributes and their values.
-
- This method displays the configuration attributes and their corresponding
- values in a formatted manner. Lists and dictionaries within the configuration
- are also displayed with appropriate indentation.
-
- Notes
- -----
- - The maximum column width for the attribute names is automatically determined.
- - The method prints the attribute name, its type, and its value.
- - If an attribute is a list, it is displayed with each item indented.
- - If an attribute is a dictionary, it is also displayed with each key-value
- pair indented.
- """
- # max_col_width = max(len(key) for key in vars(self)) + 1
- max_col_width = 27
-
- print("\nConfiguration:")
- print(f"{'Attribute':<{max_col_width}} Type Value")
- print("-" * 80)
- for key, value in vars(self).items():
- if isinstance(value, list):
- # If the value is a list, format it with indentation
- print(f"{key:<{max_col_width}} list")
- for item in value:
- item_type = type(item).__name__
- print(f" - {item:<{max_col_width-4}} {item_type}")
- elif isinstance(value, dict):
- # If the value is a dictionary, format it as before
- print(f"{key:<{max_col_width}} dict")
- for sub_key, sub_value in value.items():
- sub_value_type = type(sub_value).__name__
- print(
- f" - {sub_key:<{max_col_width-4}} {sub_value_type:<4} {sub_value}"
- )
- else:
- # Standard output
- key_type = type(key).__name__
- print(f"{key:<{max_col_width}} {key_type:<4} {value}")
-
- def convert_paths_to_absolute(self):
- """Convert relative file paths to absolute paths in the configuration.
-
- This method iterates through all variables and their dictionary entries in
- the configuration and checks for string values that represent file paths.
- If a file path is relative (starts with './'), it is converted to an
- absolute path using `os.path.abspath`.
-
- Returns
- -------
- Config
- The same `Config` instance with relative file paths converted to absolute paths.
- """
- # Loop through all variables and their dictionary entries
- for attr_name, attr_value in self.__dict__.items():
- if isinstance(attr_value, str):
- if os.path.isabs(attr_value):
- # If the value is already an absolute path, continue to the next iteration
- continue
- # Convert relative paths to absolute paths
- if attr_value.startswith('./'):
- self.__dict__[attr_name] = os.path.abspath(attr_value)
- elif isinstance(attr_value, dict):
- # If the attribute is a dictionary, loop through its entries
- for key, value in attr_value.items():
- if isinstance(value, str):
- if os.path.isabs(value):
- # If the value is already an absolute path, continue to the next iteration
- continue
- # Convert relative paths to absolute paths
- if value.startswith('./'):
- self.__dict__[attr_name][key] = os.path.abspath(
- value)
-
- return self
-
- def create_vars_from_dicts(self):
- """Create instance attributes from dictionary entries in the configuration.
-
- This method iterates through the instance's attribute dictionary and checks
- for dictionary values. For each dictionary encountered, it creates new
- instance attributes by concatenating the original attribute name and the
- dictionary key, and assigns the corresponding values.
-
- Returns
- -------
- Config
- The same `Config` instance with new attributes created from dictionary entries.
- """
- # Create a copy of the object's __dict__ to avoid modifying it during iteration
- object_dict = vars(self).copy()
-
- for key, value in object_dict.items():
- if isinstance(value, dict):
- for sub_key, sub_value in value.items():
- setattr(self, key + '_' + sub_key, sub_value)
- return self
-
-
-def run_chain(work_root, model_cfg, cfg, startdate_sim, enddate_sim, job_names,
- force, resume):
- """Run the processing chain, managing job execution and logging.
-
- This function sets up and manages the execution of a processing chain, handling
+def run_chunk(cfg, force, resume):
+ """Run a chunk of the processing chain, managing job execution and logging.
+
+ This function sets up and manages the execution of a Processing Chain, handling
job execution, logging, and various configuration settings.
Parameters
----------
- work_root : str
- The path to the directory where the processing chain writes files during execution.
- model_cfg : dict
- Configuration settings for the modeling framework.
cfg : Config
Object holding user-defined configuration parameters as attributes.
- startdate_sim : datetime-object
- The start date of the simulation.
- enddate_sim : datetime-object
- The end date of the simulation.
- job_names : list of str
- List of names of jobs to execute on every timeslice.
force : bool
If True, it will force the execution of jobs regardless of their completion status.
resume : bool
@@ -449,194 +125,108 @@ def run_chain(work_root, model_cfg, cfg, startdate_sim, enddate_sim, job_names,
- It checks for job completion status and resumes or forces execution accordingly.
- Job log files are managed, and errors or timeouts are handled with notifications.
"""
- # Write current start and end dates to config variables
- cfg.startdate_sim = startdate_sim
- cfg.enddate_sim = enddate_sim
-
# Set forecast time
cfg.forecasttime = (cfg.enddate_sim -
cfg.startdate_sim).total_seconds() / 3600
- # String variables for startdate_sim
- cfg.startdate_sim_yyyymmddhh = startdate_sim.strftime('%Y%m%d%H')
- cfg.enddate_sim_yyyymmddhh = enddate_sim.strftime('%Y%m%d%H')
-
- # Folder naming and structure
- cfg.job_id = f'{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}'
- cfg.chain_root = os.path.join(work_root, cfg.casename, cfg.job_id)
-
- # Config variables for spinup runs (datetimes, job-id, etc.)
- if hasattr(cfg, 'spinup'):
- if cfg.first_one: # first run in spinup
- cfg.chain_root_prev = None
- else: # consecutive runs in spinup
- cfg.startdate_sim_yyyymmddhh = cfg.startdate_sim.strftime(
- '%Y%m%d%H')
- enddate_sim_yyyymmddhh_prev = (
- cfg.enddate_sim -
- timedelta(hours=cfg.restart_step_hours)).strftime('%Y%m%d%H')
-
- if cfg.second_one:
- startdate_sim_yyyymmddhh_prev = (cfg.enddate_sim - timedelta(
- hours=2 * cfg.restart_step_hours)).strftime('%Y%m%d%H')
- else: # all other runs (i.e., get job_id from previous run)
- startdate_sim_yyyymmddhh_prev = (
- cfg.enddate_sim -
- timedelta(hours=2 * cfg.restart_step_hours +
- cfg.spinup)).strftime('%Y%m%d%H')
-
- cfg.job_id_prev = f'{startdate_sim_yyyymmddhh_prev}_{enddate_sim_yyyymmddhh_prev}'
- cfg.chain_root_prev = os.path.join(work_root, cfg.casename,
- cfg.job_id_prev)
- cfg.last_cosmo_output = os.path.join(cfg.chain_root_prev, 'cosmo',
- 'output')
-
- # No restart for spinup simulations (= default values for no restart)
- cfg.cosmo_restart_out = ''
- cfg.cosmo_restart_in = ''
- elif 'restart' in model_cfg['models'][cfg.model]['features']:
- cfg.startdate_sim_prev = cfg.startdate_sim - timedelta(
- hours=cfg.restart_step_hours)
- cfg.enddate_sim_prev = cfg.enddate_sim - timedelta(
- hours=cfg.restart_step_hours)
- cfg.startdate_sim_prev_yyyymmddhh = cfg.startdate_sim_prev.strftime(
- '%Y%m%d%H')
- cfg.enddate_sim_prev_yyyymmddhh = cfg.enddate_sim_prev.strftime(
- '%Y%m%d%H')
-
- cfg.job_id_prev = f'{cfg.startdate_sim_prev_yyyymmddhh}_{cfg.enddate_sim_prev_yyyymmddhh}'
- cfg.chain_root_prev = os.path.join(work_root, cfg.casename,
- cfg.job_id_prev)
-
- # Set restart directories
- cfg.cosmo_restart_out = os.path.join(cfg.chain_root, 'cosmo',
- 'restart')
- cfg.cosmo_restart_in = os.path.join(cfg.chain_root_prev, 'cosmo',
- 'restart')
-
- # Check constraint
- if hasattr(cfg, 'constraint'):
- assert cfg.constraint in ['gpu', 'mc'], ("Unknown constraint, use"
- "gpu or mc")
-
- # If nested run: use output of mother-simulation
- if 'nesting' in model_cfg['models'][
- cfg.model]['features'] and not os.path.isdir(cfg.meteo.dir):
- # if ifs_hres_dir doesn't point to a directory,
- # it is the name of the mother run
- mother_name = cfg.meteo.dir
- cfg.meteo.dir = os.path.join(work_root, mother_name, cfg.job_id,
- 'cosmo', 'output')
- cfg.meteo.inc = 1
- cfg.meteo.prefix = 'lffd'
-
# Logging
- log_working_dir = os.path.join(cfg.chain_root, 'checkpoints', 'working')
- log_finished_dir = os.path.join(cfg.chain_root, 'checkpoints', 'finished')
- setattr(cfg, 'log_working_dir', log_working_dir)
- setattr(cfg, 'log_finished_dir', log_finished_dir)
+ cfg.chain_root = cfg.work_root / cfg.casename / cfg.chunk_id
+ cfg.log_working_dir = cfg.chain_root / 'checkpoints' / 'working'
+ cfg.log_finished_dir = cfg.chain_root / 'checkpoints' / 'finished'
# Create working directories
tools.create_dir(cfg.chain_root, "chain_root")
- tools.create_dir(log_working_dir, "log_working")
- tools.create_dir(log_finished_dir, "log_finished")
-
- # Number of levels and switch for unit conversion for 'reduce_output' job
- if not hasattr(cfg, 'output_levels'):
- setattr(cfg, 'output_levels', -1)
- if not hasattr(cfg, 'convert_gas'):
- setattr(cfg, 'convert_gas', True)
-
- # run jobs (if required)
- for job in job_names:
- skip = False
-
- # if exists job is currently worked on or has been finished
- if os.path.exists(os.path.join(log_working_dir, job)):
- if not force:
- while True:
- if os.path.exists(os.path.join(log_finished_dir, job)):
- print('Skip "%s" for chain "%s"' % (job, cfg.job_id))
- skip = True
- break
- elif resume:
- resume = False
- break
- else:
- print('Wait for "%s" of chain "%s"' %
- (job, cfg.job_id))
- sys.stdout.flush()
- for _ in range(3000):
- time.sleep(0.1)
+ tools.create_dir(cfg.log_working_dir, "log_working")
+ tools.create_dir(cfg.log_finished_dir, "log_finished")
+
+ # Config variables for spinup and restart runs
+ cfg.cosmo_restart_in = ''
+ cfg.cosmo_restart_out = ''
+ if hasattr(cfg, 'spinup'):
+ if cfg.chunk_id_prev:
+ cfg.chain_root_prev = cfg.work_root / cfg.casename / cfg.chunk_id_prev
+ cfg.last_cosmo_output = cfg.chain_root_prev / 'cosmo' / 'output'
+ elif 'restart' in cfg.workflow['features']:
+ if cfg.chunk_id_prev:
+ cfg.chain_root_prev = cfg.work_root / cfg.casename / cfg.chunk_id_prev
+ cfg.cosmo_restart_in = cfg.chain_root_prev / 'cosmo' / 'restart'
+ cfg.cosmo_restart_out = cfg.chain_root / 'cosmo' / 'restart'
+
+ if not cfg.force_sync:
+ # Empty curent job ids
+ cfg.job_ids['current'] = {}
+
+ # Submit current chunk
+ for job_name in cfg.jobs:
+ if (cfg.log_finished_dir / job_name).exists() and not force:
+ # Skip job if already finished
+ print(f' └── Skipping "{job_name}" job')
+ skip = True
else:
- os.remove(os.path.join(log_working_dir, job))
- try:
- os.remove(os.path.join(log_finished_dir, job))
- except FileNotFoundError:
- pass
-
- if not skip:
- print('Process "%s" for chain "%s"' % (job, cfg.job_id))
- sys.stdout.flush()
-
- try_count = 1 + (args.ntry - 1) * (job == 'cosmo')
- while try_count > 0:
- try_count -= 1
- try:
- # Change the log file
- logfile = os.path.join(cfg.log_working_dir, job)
- logfile_finish = os.path.join(cfg.log_finished_dir, job)
- tools.change_logfile(logfile)
-
- # Launch the job
- to_call = getattr(jobs, job)
- to_call.main(cfg, model_cfg)
-
- shutil.copy(logfile, logfile_finish)
-
- exitcode = 0
- try_count = 0
- except:
- subject = "ERROR or TIMEOUT in job '%s' for chain '%s'" % (
- job, cfg.job_id)
- logging.exception(subject)
- if cfg.user_mail:
- message = tools.prepare_message(
- os.path.join(log_working_dir, job))
- logging.info('Sending log file to %s' % cfg.user_mail)
- tools.send_mail(cfg.user_mail, subject, message)
- if try_count == 0:
- raise RuntimeError(subject)
-
- if exitcode != 0 or not os.path.exists(
- os.path.join(log_finished_dir, job)):
- subject = "ERROR or TIMEOUT in job '%s' for chain '%s'" % (
- job, cfg.job_id)
+ print(f' └── Submitting "{job_name}" job')
+
+ # Logfile settings
+ cfg.logfile = cfg.log_working_dir / job_name
+ cfg.logfile_finish = cfg.log_finished_dir / job_name
+
+ # Submit the job
+ job = getattr(jobs, job_name)
+ if hasattr(job, 'BASIC_PYTHON_JOB') and job.BASIC_PYTHON_JOB:
+ cfg.submit_basic_python(job_name)
+ else:
+ job.main(cfg)
+
+ # Wait for previous chunk jobs, monitor them and cycle info
+ cfg.cycle()
+
+ else: # For nested run_chain.py
+ for job_name in cfg.jobs:
+ print(f' └── Process "{job_name}" for chunk "{cfg.chunk_id}"')
+ try:
+ # Change the log file
+ cfg.logfile = cfg.log_working_dir / job_name
+ cfg.logfile_finish = cfg.log_finished_dir / job_name
+
+ # Launch the job
+ to_call = getattr(jobs, job_name)
+ to_call.main(cfg)
+
+ shutil.copy(cfg.logfile, cfg.logfile_finish)
+
+ exitcode = 0
+ except Exception:
+ exitcode = 1
+ subject = "ERROR or TIMEOUT in job '%s' for chunk '%s'" % (
+ job_name, cfg.chunk_id)
+ logging.exception(subject)
+ if cfg.user_mail:
+ message = tools.prepare_message(cfg.log_working_dir /
+ job_name)
+ logging.info('Sending log file to %s' % cfg.user_mail)
+ tools.send_mail(cfg.user_mail, subject, message)
+
+ if exitcode != 0 or not (cfg.log_finished_dir / job_name).exists():
+ subject = "ERROR or TIMEOUT in job '%s' for chunk '%s'" % (
+ job_name, cfg.chunk_id)
if cfg.user_mail:
- message = tools.prepare_message(
- os.path.join(log_working_dir, job))
+ message = tools.prepare_message(cfg.log_working_dir /
+ job_name)
logging.info('Sending log file to %s' % cfg.user_mail)
tools.send_mail(cfg.user_mail, subject, message)
raise RuntimeError(subject)
-def restart_runs(work_root, model_cfg, cfg, job_names, force, resume):
+def restart_runs(cfg, force, resume):
"""Start subchains in specified intervals and manage restarts.
This function slices the total runtime of the processing chain according to the
- `cfg.restart_step_hours` configuration. It calls `run_chain()` for each
+ `cfg.restart_step_hours` configuration. It calls `run_chunk()` for each
specified interval.
Parameters
----------
- work_root : str
- The path to the directory in which the chain writes files during execution.
- model_cfg : dict
- Configuration settings for the modeling framework.
cfg : Config
Object holding all user-configuration parameters as attributes.
- job_names : list of str
- List of names of jobs to execute on every timeslice.
force : bool
If True, it will force the execution of jobs regardless of their completion status.
resume : bool
@@ -644,123 +234,49 @@ def restart_runs(work_root, model_cfg, cfg, job_names, force, resume):
Notes
-----
- - The function iterates over specified intervals, calling `run_chain()` for each.
+ - The function iterates over specified intervals, calling `run_chunk()` for each.
- It manages restart settings and logging for each subchain.
"""
- # run restarts
- for startdate_sim in tools.iter_hours(cfg.startdate, cfg.enddate,
- cfg.restart_step_hours):
- enddate_sim = startdate_sim + timedelta(hours=cfg.restart_step_hours)
- if enddate_sim > cfg.enddate:
- continue
-
- # Set restart variable (only takes effect for ICON)
- if startdate_sim == cfg.startdate:
- setattr(cfg, "lrestart", '.FALSE.')
- else:
- setattr(cfg, "lrestart", '.TRUE.')
-
- print("Starting run with startdate {}".format(startdate_sim))
-
- run_chain(work_root=work_root,
- model_cfg=model_cfg,
- cfg=cfg,
- startdate_sim=startdate_sim,
- enddate_sim=enddate_sim,
- job_names=job_names,
- force=force,
- resume=resume)
-
-
-def restart_runs_spinup(work_root, model_cfg, cfg, job_names, force, resume):
- """Start subchains in specified intervals and manage restarts with spin-up.
-
- This function slices the total runtime of the processing chain according to the
- `cfg.restart_step_hours` configuration. It calls `run_chain()` for each specified
- interval, managing restarts with spin-up.
-
- Parameters
- ----------
- work_root : str
- The path to the directory in which the chain writes files during execution.
- model_cfg : dict
- Configuration settings for the modeling framework.
- cfg : Config
- Object holding all user-configuration parameters as attributes.
- job_names : list of str
- List of names of jobs to execute on every timeslice.
- force : bool
- If True, it will force the execution of jobs regardless of their completion status.
- resume : bool
- If True, it will resume the last unfinished job.
-
- Notes
- -----
- - The function iterates over specified intervals, calling `run_chain()` for each.
- - It manages restart settings and logging for each subchain, including spin-up.
- """
- for startdate_sim in tools.iter_hours(cfg.startdate, cfg.enddate,
- cfg.restart_step_hours):
- if startdate_sim == cfg.startdate:
- setattr(cfg, "first_one", True)
- setattr(cfg, "second_one", False)
- setattr(cfg, "lrestart", '.FALSE.')
- run_time = cfg.restart_step_hours
- startdate_sim_spinup = startdate_sim
- elif startdate_sim == cfg.startdate + timedelta(
- hours=cfg.restart_step_hours):
- setattr(cfg, "first_one", False)
- setattr(cfg, "second_one", True)
- setattr(cfg, "lrestart", '.TRUE.')
- run_time = cfg.restart_step_hours + cfg.spinup
- startdate_sim_spinup = startdate_sim - timedelta(hours=cfg.spinup)
+ for chunk_id in cfg.chunks:
+ cfg.chunk_id = chunk_id
+ cfg.get_previous_chunk_id(cfg.chunk_id)
+ cfg.startdate_sim_yyyymmddhh = cfg.chunk_id[0:10]
+ cfg.enddate_sim_yyyymmddhh = cfg.chunk_id[-10:]
+ cfg.startdate_sim = datetime.strptime(
+ cfg.startdate_sim_yyyymmddhh, "%Y%m%d%H").replace(tzinfo=pytz.UTC)
+ cfg.enddate_sim = datetime.strptime(
+ cfg.enddate_sim_yyyymmddhh, "%Y%m%d%H").replace(tzinfo=pytz.UTC)
+
+ if 'spinup' in cfg.workflow['features'] and hasattr(cfg, 'spinup'):
+ if cfg.startdate_sim == cfg.startdate:
+ cfg.first_one = True
+ cfg.second_one = False
+ cfg.lrestart = '.FALSE.'
+ elif cfg.startdate_sim == cfg.startdate + timedelta(
+ hours=cfg.restart_step_hours):
+ cfg.first_one = False
+ cfg.second_one = True
+ cfg.lrestart = '.TRUE.'
+ else:
+ cfg.first_one = False
+ cfg.second_one = False
+ cfg.lrestart = '.TRUE.'
else:
- setattr(cfg, "first_one", False)
- setattr(cfg, "second_one", False)
- setattr(cfg, "lrestart", '.TRUE.')
- run_time = cfg.restart_step_hours + cfg.spinup
- startdate_sim_spinup = startdate_sim - timedelta(hours=cfg.spinup)
-
- # If current enddate is later than global enddate, skip
- enddate_sim = startdate_sim + timedelta(hours=cfg.restart_step_hours)
- if enddate_sim > cfg.enddate:
- continue
-
- print(f'Runtime of sub-simulation: {run_time} h')
-
- run_chain(work_root=work_root,
- model_cfg=model_cfg,
- cfg=cfg,
- startdate_sim=startdate_sim_spinup,
- enddate_sim=enddate_sim,
- job_names=job_names,
- force=force,
- resume=resume)
-
+ # Set restart variable (only takes effect for ICON)
+ cfg.lrestart = ".FALSE." if cfg.startdate_sim == cfg.startdate else ".TRUE."
-def load_model_config_yaml(yamlfile):
- """Load model configuration from a YAML file.
+ print(f'└── Starting chunk "{cfg.chunk_id}"')
- Parameters
- ----------
- yamlfile : str
- The path to the YAML file containing the model configuration.
-
- Returns
- -------
- dict
- A dictionary representing the model configuration loaded from the YAML file.
- """
- with open(yamlfile) as file:
- model_cfg = yaml.safe_load(file)
- return model_cfg
+ run_chunk(cfg=cfg, force=force, resume=resume)
-if __name__ == '__main__':
+def main():
"""Main script for running a processing chain.
- This script handles the execution of a processing chain for one or more specified cases. It loads model configurations, prepares the environment, and starts the chain based on the provided settings.
+ This script handles the execution of a processing chain for one or more
+ specified cases. It loads model configurations, prepares the environment,
+ and starts the chain based on the provided settings.
Parameters
----------
@@ -769,14 +285,15 @@ def load_model_config_yaml(yamlfile):
Notes
-----
- This script uses command-line arguments to specify cases and job lists.
- - It loads model configurations, converts paths to absolute, sets restart settings, and starts the chain.
- - Depending on the model's features, it may run with or without restarts or utilize spin-up restarts.
+ - It loads model configurations, converts paths to absolute, sets restart
+ settings, and starts the chain.
+ - Depending on the model's features, it may run with or without restarts
+ or utilize spin-up restarts.
"""
args = parse_arguments()
for casename in args.casenames:
# Load configs
- model_cfg = load_model_config_yaml('config/models.yaml')
cfg = Config(casename)
# Convert relative to absolute paths
@@ -785,9 +302,6 @@ def load_model_config_yaml(yamlfile):
# Set restart step in hours
cfg.set_restart_step_hours()
- # Print config before duplication of dict variables
- cfg.print_config()
-
# Duplicate variables in the form of _ for better
# access within namelist template.
# E.g.: cfg.meteo['dir'] -> cfg.meteo_dir
@@ -795,36 +309,52 @@ def load_model_config_yaml(yamlfile):
# Check if jobs are set or if default ones are used
if args.job_list is None:
- args.job_list = model_cfg['models'][cfg.model]['jobs']
+ cfg.jobs = cfg.workflow['jobs']
+ else:
+ cfg.jobs = args.job_list
+
+ # Check sync is forced
+ if args.force_sync:
+ cfg.force_sync = True
+ else:
+ cfg.force_sync = False
- print(f"Starting chain for case {casename} and model {cfg.model}")
+ # Check constraint
+ if cfg.constraint:
+ assert cfg.constraint in ['gpu', 'mc'], ("Unknown constraint, use"
+ "gpu or mc")
+
+ # Get complete chunk list
+ cfg.get_chunk_list()
+
+ # Print config before chain starts
+ cfg.print_config()
+
+ # Get custom chunks if specified
+ cfg.chunks = args.chunk_list if args.chunk_list else cfg.chunk_list
+
+ tools.create_dir(cfg.case_root, "case_root")
+
+ print("╔════════════════════════════════════════╗")
+ print("║ Starting Processing Chain ║")
+ print("╠════════════════════════════════════════╣")
+ print(f"║ Case: {casename: <27} ║")
+ print(f"║ Workflow: {cfg.workflow_name: <27} ║")
+ print("╚════════════════════════════════════════╝")
# Check for restart compatibility and spinup
- if 'restart' in model_cfg['models'][cfg.model]['features']:
- if hasattr(cfg, 'spinup'):
- print("Using spin-up restarts.")
- restart_runs_spinup(work_root=cfg.work_root,
- model_cfg=model_cfg,
- cfg=cfg,
- job_names=args.job_list,
- force=args.force,
- resume=args.resume)
- else:
- print("Using built-in model restarts.")
- restart_runs(work_root=cfg.work_root,
- model_cfg=model_cfg,
- cfg=cfg,
- job_names=args.job_list,
- force=args.force,
- resume=args.resume)
+ if 'restart' in cfg.workflow['features']:
+ restart_runs(cfg=cfg, force=args.force, resume=args.resume)
else:
print("No restarts are used.")
- run_chain(work_root=cfg.work_root,
- cfg=cfg,
- startdate_sim=cfg.startdate,
- enddate_sim=cfg.enddate,
- job_names=args.job_list,
- force=args.force,
- resume=args.resume)
-
- print('>>> Finished the processing chain successfully <<<')
+ cfg.startdate_sim = cfg.startdate
+ cfg.enddate_sim = cfg.enddate
+ run_chunk(cfg=cfg, force=args.force, resume=args.resume)
+
+ print("╔════════════════════════════════════════╗")
+ print("║ Processing Chain Completed ║")
+ print("╚════════════════════════════════════════╝")
+
+
+if __name__ == '__main__':
+ main()
diff --git a/workflows.yaml b/workflows.yaml
new file mode 100644
index 00000000..f87c22b5
--- /dev/null
+++ b/workflows.yaml
@@ -0,0 +1,237 @@
+cosmo:
+ features:
+ - restart
+ jobs:
+ - prepare_cosmo
+ - int2lm
+ - cosmo
+ - post_cosmo
+ dependencies:
+ int2lm:
+ current:
+ - prepare_cosmo
+ cosmo:
+ previous:
+ - cosmo
+ post_cosmo:
+ current:
+ - cosmo
+
+cosmo-ghg:
+ features:
+ - restart
+ - tracers
+ jobs:
+ - prepare_cosmo
+ - emissions
+ - biofluxes
+ - oem
+ - online_vprm
+ - int2lm
+ - post_int2lm
+ - cosmo
+ - post_cosmo
+ dependencies:
+ emissions:
+ current:
+ - prepare_cosmo
+ biofluxes:
+ current:
+ - prepare_cosmo
+ oem:
+ current:
+ - prepare_cosmo
+ online_vprm:
+ current:
+ - prepare_cosmo
+ int2lm:
+ current:
+ - prepare_cosmo
+ - emissions
+ - biofluxes
+ - oem
+ - online_vprm
+ post_int2lm:
+ current:
+ - int2lm
+ cosmo:
+ current:
+ - post_int2lm
+ previous:
+ - cosmo
+ post_cosmo:
+ current:
+ - cosmo
+
+cosmo-ghg-spinup:
+ features:
+ - restart
+ - tracers
+ - spinup
+ jobs:
+ - prepare_cosmo
+ - emissions
+ - biofluxes
+ - oem
+ - online_vprm
+ - int2lm
+ - post_int2lm
+ - cosmo
+ - post_cosmo
+ dependencies:
+ emissions:
+ current:
+ - prepare_cosmo
+ biofluxes:
+ current:
+ - prepare_cosmo
+ oem:
+ current:
+ - prepare_cosmo
+ online_vprm:
+ current:
+ - prepare_cosmo
+ int2lm:
+ current:
+ - prepare_cosmo
+ - emissions
+ - biofluxes
+ - oem
+ - online_vprm
+ post_int2lm:
+ current:
+ - int2lm
+ previous:
+ - cosmo
+ cosmo:
+ current:
+ - post_int2lm
+ previous:
+ - cosmo
+ post_cosmo:
+ current:
+ - cosmo
+
+cosmo-art:
+ features:
+ - nesting
+ - spinup
+ jobs:
+ - prepare_cosmo
+ - emissions
+ - obs_nudging
+ - photo_rate
+ - int2lm
+ - cosmo
+ - post_cosmo
+ dependencies:
+ emissions:
+ current:
+ - prepare_cosmo
+ obs_nudging:
+ current:
+ - prepare_cosmo
+ photo_rate:
+ current:
+ - prepare_cosmo
+ int2lm:
+ current:
+ - prepare_cosmo
+ - emissions
+ - obs_nudging
+ - photo_rate
+ cosmo:
+ previous:
+ - cosmo
+ post_cosmo:
+ current:
+ - cosmo
+
+icon:
+ features:
+ - restart
+ jobs:
+ - prepare_icon
+ - icontools
+ - icon
+ dependencies:
+ icontools:
+ current:
+ - prepare_icon
+ icon:
+ current:
+ - prepare_icon
+ - icontools
+ previous:
+ - icon
+
+icon-art:
+ features:
+ - restart
+ jobs:
+ - prepare_icon
+ - icontools
+ - prepare_art
+ - icon
+ dependencies:
+ icontools:
+ current:
+ - prepare_icon
+ prepare_art:
+ current:
+ - icontools
+ icon:
+ current:
+ - prepare_icon
+ - icontools
+ - prepare_art
+ previous:
+ - icon
+
+icon-art-global:
+ features:
+ - restart
+ jobs:
+ - prepare_icon
+ - prepare_art_global
+ - icon
+ dependencies:
+ prepare_art_global:
+ current:
+ - prepare_icon
+ previous:
+ - icon
+ icon:
+ current:
+ - prepare_icon
+ - prepare_art_global
+ previous:
+ - icon
+
+icon-art-oem:
+ features:
+ - restart
+ jobs:
+ - prepare_icon
+ - icontools
+ - prepare_art
+ - prepare_art_oem
+ - icon
+ dependencies:
+ icontools:
+ current:
+ - prepare_icon
+ prepare_art:
+ current:
+ - icontools
+ prepare_art_oem:
+ current:
+ - prepare_art
+ icon:
+ current:
+ - prepare_icon
+ - icontools
+ - prepare_art
+ - prepare_art_oem
+ previous:
+ - icon