diff --git a/.github/workflows/docs-latest.yml b/.github/workflows/docs-latest.yml index 969a3f7e..0f48bfca 100644 --- a/.github/workflows/docs-latest.yml +++ b/.github/workflows/docs-latest.yml @@ -8,18 +8,20 @@ on: - main jobs: - deploy-docs: + deploy-docs-latest: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 + - name: Build docs uses: C2SM/sphinx-action@sphinx-latest with: - pre-build-command: "pip install sphinx_rtd_theme && pip install sphinx-copybutton" build-command: "sphinx-build -b html . _build" docs-folder: "docs/" - - uses: peaceiris/actions-gh-pages@v3 + + - name: Deploy on GitHub Pages + uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs/_build diff --git a/.github/workflows/docs-pr-preview.yml b/.github/workflows/docs-pr-preview.yml index 9d1ee4c1..d578a6d7 100644 --- a/.github/workflows/docs-pr-preview.yml +++ b/.github/workflows/docs-pr-preview.yml @@ -1,7 +1,9 @@ -name: Deploy PR previews +name: Build and Deploy Documentation to PR Previews on: pull_request: + paths: + - 'docs/**' types: - opened - reopened @@ -16,10 +18,10 @@ jobs: steps: - name: Checkout uses: actions/checkout@v3 + - name: Build docs uses: C2SM/sphinx-action@sphinx-latest with: - pre-build-command: "pip install sphinx_rtd_theme && pip install sphinx-copybutton" build-command: "sphinx-build -b html . _build" docs-folder: "docs/" diff --git a/.github/workflows/docs-tag.yml b/.github/workflows/docs-tag.yml index e3a4c433..90772a63 100644 --- a/.github/workflows/docs-tag.yml +++ b/.github/workflows/docs-tag.yml @@ -6,23 +6,26 @@ on: - '*' jobs: - deploy-docs: + deploy-docs-tag: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 + - name: Get release id: get_release uses: bruceadams/get-release@v1.3.2 env: GITHUB_TOKEN: ${{ github.token }} + - name: Build docs uses: C2SM/sphinx-action@sphinx-latest with: - pre-build-command: "pip install sphinx_rtd_theme && pip install sphinx-copybutton" build-command: "sphinx-build -b html . _build" docs-folder: "docs/" - - uses: peaceiris/actions-gh-pages@v3 + + - name: Deploy on GitHub Pages + uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs/_build diff --git a/.gitignore b/.gitignore index 2838e4a4..b2df1de5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ docs/build/ +docs/_build/ *__pycache__/ *.pyc fieldextra.diagnostic @@ -6,5 +7,6 @@ input_processing-chain.tgz input/ output/ work/ -src/*/ +ext/*/ *.code-workspace +.vscode/ diff --git a/README.md b/README.md index d8f8099e..09ff995b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Processing Chain for COSMO and ICON Simulations +# Processing Chain Processing Chain The Processing Chain is a python script that prepares necessary input data, submits compute-jobs to the queue on Piz Daint and does @@ -9,71 +9,19 @@ e.g., by creating your own case or adding new jobs. ## Environment Setup -The following steps allow you to create and use your own virtual -environment to run the Processing Chain. We recommend to use a conda -environment for the usage of the provided scripts. Please follow the -instruction for the installation. The following steps only need to be -performed once. - -### 1\. Install Miniconda - -Install as user specific Miniconda, e.g. on your `$HOME` directory, -which is the default location. - -> **Note**: Only conda itself should be installed in your `$HOME`. -> All environments should be stored in your `$PROJECT` directory, -> otherwise you risk filling up your `$HOME` directory. See below for instructions. - -To install the latest Miniconda, type: - - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh - bash Miniconda3-latest-Linux-x86_64.sh - -Further deails on Miniconda can be found on the [Miniconda documentation page](https://docs.conda.io/en/latest/miniconda.html). - -### 2\. Create the Conda Environment - -Create a conda environment `proc-chain` with and install requirements: - - conda env create --prefix $PROJECT/envs/proc-chain -f env/environment.yml - -To be able to activate your conda environment by simply using `conda activate proc-chain` instead of the full path, add the following to your `.bashrc`: - - export CONDA_ENVS_PATH=$PROJECT/envs - -Activate the environment (use "source activate" in case "conda activate" -does not work): - - conda activate proc-chain - -If you already have the environment but want to update it: - - conda env update --file env/environment.yml --prune - -### 3\. Store user-specific data - -To register your email address and standard project account, store them into -these files within your home directory: - - echo > ~/.acct - echo > ~/.forward - -These settings are optional. The Processing Chain will first check the content -of those files. If desired, the corresponding variables can be overridden by setting -the `compute_account` and `user_mail` variables in the `config.yaml` file. +To setup your conda environment for the Processing Chain, please refer +to the part in the [official documentation](https://c2sm.github.io/processing-chain/latest/environment.html). ## Run the Chain -Once everything has been set up correctly according to the above steps, -you just need to execute the following command to activate your -environment (if not done already): +To activate your conda environment, type: conda activate proc-chain To test if your environment has been successfully set, use the command line help to display the available arguments for the main script: - python run_chain.py -h + ./run_chain.py -h To run the test cases with their standard jobs, please ensure that you clone the Processing Chain to `$SCRATCH`, as input and @@ -89,9 +37,8 @@ For these pre-defined test cases, you can use the Jenkins script ./jenkins/scripts/jenkins.sh -This script calls other scripts that are located in `jenkins/scripts/`. -They will -- activate the conda environment (if not done already) +This script calls other scripts that are located in `jenkins/scripts/` and will: +- activate the conda environment - setup spack-c2sm - download input data to `input/` - build `int2lm`, `cosmo-ghg`, `icon` and `icon-art` @@ -104,16 +51,14 @@ They will To run the test cases manually, type: -```bash # replace with one of the above tests - python run_chain.py -``` + ./run_chain.py ## Documentation For more information about the file structure, configuration options, -namelist templates etc., please read the official -[documentation](https://c2sm.github.io/processing-chain/). +namelist templates etc., please read the [official +documentation](https://c2sm.github.io/processing-chain/latest/). ## Contributing diff --git a/cases/cosmo-ghg-spinup-test/config.yaml b/cases/cosmo-ghg-spinup-test/config.yaml index 2351b766..b52048ba 100644 --- a/cases/cosmo-ghg-spinup-test/config.yaml +++ b/cases/cosmo-ghg-spinup-test/config.yaml @@ -1,10 +1,11 @@ # Configuration file for the 'cosmo-ghg-spinup-test' case with COSMO-GHG -model: cosmo-ghg +workflow: cosmo-ghg-spinup constraint: gpu +run_on: gpu +compute_queue: normal ntasks_per_node: 12 restart_step: PT6H -variant: spinup spinup: 3 startdate: 2015-01-01T00:00:00Z enddate: 2015-01-01T18:00:00Z @@ -47,7 +48,7 @@ online_vprm: int2lm: extpar_dir: ./input/cosmo-ghg/extpar extpar_filename: test_domain.nc - binary_file: ./src/int2lm/test/testsuite/int2lm + binary_file: ./ext/int2lm/test/testsuite/int2lm namelist_filename: int2lm_INPUT.cfg runjob_filename: int2lm_runjob.cfg compute_queue: normal @@ -69,7 +70,7 @@ post_int2lm: - CO2_A2 cosmo: - binary_file: ./src/cosmo-ghg/cosmo/ACC/cosmo_gpu + binary_file: ./ext/cosmo-ghg/cosmo/ACC/cosmo_gpu namelist_prefix: cosmo_INPUT_ runjob_filename: cosmo_runjob.cfg compute_queue: normal diff --git a/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg b/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg index ca14d636..608b8d15 100644 --- a/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg +++ b/cases/cosmo-ghg-spinup-test/cosmo_runjob.cfg @@ -1,5 +1,5 @@ #!/bin/bash -l -#SBATCH --job-name="cosmo_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}" +#SBATCH --job-name=cosmo #SBATCH --account={cfg.compute_account} #SBATCH --time={walltime} #SBATCH --nodes={np_tot} @@ -34,7 +34,7 @@ echo "============== StartTime: `date +%s` s" echo "============== StartTime: `date`" echo "=====================================================" -srun -u ./{execname} >> {logfile} 2>&1 +srun -u ./{cfg.cosmo_execname} >> {logfile} 2>&1 pid=$? echo "=====================================================" diff --git a/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg b/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg index c3c80e53..9a3ae2e0 100644 --- a/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg +++ b/cases/cosmo-ghg-spinup-test/int2lm_runjob.cfg @@ -1,5 +1,5 @@ #!/bin/bash -l -#SBATCH --job-name=int2lm_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh} +#SBATCH --job-name=int2lm #SBATCH --account={cfg.compute_account} #SBATCH --time={walltime} #SBATCH --nodes={nodes} diff --git a/cases/cosmo-ghg-test/config.yaml b/cases/cosmo-ghg-test/config.yaml index dc1134e9..b954e0f2 100644 --- a/cases/cosmo-ghg-test/config.yaml +++ b/cases/cosmo-ghg-test/config.yaml @@ -1,7 +1,9 @@ # Configuration file for the 'cosmo-ghg-test' case with COSMO-GHG -model: cosmo-ghg +workflow: cosmo-ghg constraint: gpu +run_on: gpu +compute_queue: normal ntasks_per_node: 12 restart_step: PT6H startdate: 2015-01-01T00:00:00Z @@ -45,7 +47,7 @@ online_vprm: int2lm: extpar_dir: ./input/cosmo-ghg/extpar extpar_filename: test_domain.nc - binary_file: ./src/int2lm/test/testsuite/int2lm + binary_file: ./ext/int2lm/test/testsuite/int2lm namelist_filename: int2lm_INPUT.cfg runjob_filename: int2lm_runjob.cfg compute_queue: normal @@ -67,7 +69,7 @@ post_int2lm: - CO2_A2 cosmo: - binary_file: ./src/cosmo-ghg/cosmo/ACC/cosmo_gpu + binary_file: ./ext/cosmo-ghg/cosmo/ACC/cosmo_gpu namelist_prefix: cosmo_INPUT_ runjob_filename: cosmo_runjob.cfg compute_queue: normal diff --git a/cases/cosmo-ghg-test/cosmo_runjob.cfg b/cases/cosmo-ghg-test/cosmo_runjob.cfg index ca14d636..608b8d15 100644 --- a/cases/cosmo-ghg-test/cosmo_runjob.cfg +++ b/cases/cosmo-ghg-test/cosmo_runjob.cfg @@ -1,5 +1,5 @@ #!/bin/bash -l -#SBATCH --job-name="cosmo_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}" +#SBATCH --job-name=cosmo #SBATCH --account={cfg.compute_account} #SBATCH --time={walltime} #SBATCH --nodes={np_tot} @@ -34,7 +34,7 @@ echo "============== StartTime: `date +%s` s" echo "============== StartTime: `date`" echo "=====================================================" -srun -u ./{execname} >> {logfile} 2>&1 +srun -u ./{cfg.cosmo_execname} >> {logfile} 2>&1 pid=$? echo "=====================================================" diff --git a/cases/cosmo-ghg-test/int2lm_runjob.cfg b/cases/cosmo-ghg-test/int2lm_runjob.cfg index c3c80e53..9a3ae2e0 100644 --- a/cases/cosmo-ghg-test/int2lm_runjob.cfg +++ b/cases/cosmo-ghg-test/int2lm_runjob.cfg @@ -1,5 +1,5 @@ #!/bin/bash -l -#SBATCH --job-name=int2lm_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh} +#SBATCH --job-name=int2lm #SBATCH --account={cfg.compute_account} #SBATCH --time={walltime} #SBATCH --nodes={nodes} diff --git a/cases/icon-art-global-test/config.py b/cases/icon-art-global-test/config.py deleted file mode 100644 index 5c7e98e7..00000000 --- a/cases/icon-art-global-test/config.py +++ /dev/null @@ -1,108 +0,0 @@ -import os -""" -Configuration file for the 'icon-art-global' case with ICON-ART -""" - -# GENERAL SETTINGS =========================================================== -user = os.environ['USER'] -if user == 'jenkins': - compute_account = 'g110' -elif os.path.exists(os.environ['HOME'] + '/.acct'): - with open(os.environ['HOME'] + '/.acct', 'r') as file: - compute_account = file.read().rstrip() -else: - compute_account = os.popen("id -gn").read().splitlines()[0] -compute_host = 'daint' -compute_queue = 'normal' -constraint = 'gpu' # 'mc' - -model = 'icon-art-global' -restart_step = 24 # hours - -# Number of tasks per node -ntasks_per_node = 36 if constraint == 'mc' else 12 - -# Case name = pathname in cases/ -casename = os.path.basename(os.path.dirname(os.path.realpath(__file__))) - -# Root directory of the sourcecode of the chain (where run_chain.py is) -chain_src_dir = os.getcwd() - -# Root directory of the working space of the chain -work_root = os.path.join(chain_src_dir, 'work') - -# Case directory -case_dir = os.path.join(chain_src_dir, 'cases', casename) - -# ----------------------------------------------------------- -# SIMULATION -# ----------------------------------------------------------- - -# Executable -icon_bin = os.path.join(chain_src_dir, 'src', 'icon-art', 'bin', 'icon') - -# eccodes -eccodes_dir = os.path.join(chain_src_dir, 'input', 'eccodes_definitions') - -# Paths for namelists and slurm runscript templates -# icon_runjob = os.path.join(case_dir, 'icon_runjob_withoutart.cfg') -icon_runjob = os.path.join(case_dir, 'icon_runjob.cfg') -icon_era5_inijob = os.path.join(case_dir, 'icon_era5_inicond.sh') -icon_era5_nudgingjob = os.path.join(case_dir, 'icon_era5_nudging.sh') -icon_species_inijob = os.path.join(case_dir, 'icon_species_inicond.sh') -icon_species_nudgingjob = os.path.join(case_dir, 'icon_species_nudging.sh') - -# Number of hours between two output data -output_writing_step = 12 # TO MODIFY - -# Initial conditios -era5_inicond = False # TO MODIFY -species_inicond = True -species2restart = ['TROH'] - -# Nudging (meteorological and tracers) -era5_global_nudging = False -species_global_nudging = False -species2nudge = [] -nudging_step = 12 - -# Walltimes and domain decomposition -if compute_queue == "normal": - icon_walltime = "00:30:00" - icon_np_tot = 2 -elif compute_queue == "debug": - icon_walltime = "00:30:00" - icon_np_tot = 2 - -# ----------------------------------------------------------- -# INPUT DATA -# ----------------------------------------------------------- -# ART settings----------------------------------------------- -input_root = os.path.join(chain_src_dir, 'input', model) -art_input_folder = os.path.join(input_root, 'art') - -input_files = { - 'inicond_filename': ['era2icon_R2B03_2022060200.nc', 'icbc'], - 'map_file_nudging': ['map_file.nudging', 'icbc'], - 'dynamics_grid_filename': ["iconR2B03-DOM01.nc", 'grid'], - 'radiation_grid_filename': ["iconR2B03-DOM01.nc", 'grid'], - 'extpar_filename': ["extpar_iconR2B03-DOM01.nc", 'grid'], - 'cldopt_filename': ['ECHAM6_CldOptProps.nc', 'rad'], - 'lrtm_filename': ['rrtmg_lw.nc', 'rad'], - 'oh_molec_filename': ['oh_gcp2022_icongrid.nc', 'chemistry'], - 'pntSrc_xml_filename': ['point-sources.xml', 'config'], - 'chemtracer_xml_filename': ['tracers.xml', 'config'], -} - -# ----------------------------------------------------------- -# Additional settings derived from constants -# ----------------------------------------------------------- - -# Nudge type (global or nothing) -nudge_type = 2 if era5_global_nudging else 0 - -# Time step for global nudging in seconds -nudging_step_seconds = nudging_step * 3600 - -# Prescribed initial conditions for CH4, CO and/or OH -iart_init_gas = 4 if species_inicond else 0 diff --git a/cases/icon-art-global-test/config.yaml b/cases/icon-art-global-test/config.yaml index 2344a8df..2fe3f096 100644 --- a/cases/icon-art-global-test/config.yaml +++ b/cases/icon-art-global-test/config.yaml @@ -1,6 +1,6 @@ # Configuration file for the 'icon-art-global-test' case with ICON -model: icon-art-global +workflow: icon-art-global constraint: gpu run_on: cpu compute_queue: normal @@ -28,6 +28,11 @@ species_global_nudging: False species2nudge: [] nudging_step: 6 +walltime: + prepare_icon: '00:15:00' + prepare_art_global: '00:10:00' + icon: '00:05:00' + era5: inicond: False global_nudging: False @@ -59,7 +64,7 @@ input_files: pntSrc_xml_filename: ./input/icon-art-global/config/point-sources.xml icon: - binary_file: ./src/icon-art/bin/icon + binary_file: ./ext/icon-art/bin/icon runjob_filename: icon_runjob.cfg era5_inijob: icon_era5_inicond.sh era5_nudgingjob: icon_era5_nudging.sh @@ -67,7 +72,6 @@ icon: species_nudgingjob: icon_species_nudging.sh output_writing_step: 6 compute_queue: normal - walltime: '00:10:00' np_tot: 4 np_io: 1 np_restart: 1 diff --git a/cases/icon-art-global-test/icon_runjob.cfg b/cases/icon-art-global-test/icon_runjob.cfg index 1e324a59..d241a8cf 100644 --- a/cases/icon-art-global-test/icon_runjob.cfg +++ b/cases/icon-art-global-test/icon_runjob.cfg @@ -1,13 +1,13 @@ #!/usr/bin/env bash -#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}" +#SBATCH --job-name=icon #SBATCH --account={cfg.compute_account} -#SBATCH --time={cfg.icon_walltime} +#SBATCH --time={cfg.walltime_icon} #SBATCH --nodes={cfg.icon_np_tot} #SBATCH --ntasks-per-node={cfg.ntasks_per_node} #SBATCH --partition={cfg.compute_queue} #SBATCH --constraint={cfg.constraint} #SBATCH --hint=nomultithread -#SBATCH --output={logfile} +#SBATCH --output={cfg.logfile} #SBATCH --open-mode=append #SBATCH --chdir={cfg.icon_work} @@ -388,22 +388,12 @@ EOF # ---------------------------------------------------------------------- # run the model! # ---------------------------------------------------------------------- - srun ./icon.exe - - - -# ! output_nml: specifies an output stream -------------------------------------- -# &output_nml -# filetype = 4 ! output format: 2=GRIB2, 4=NETCDFv2 -# dom = -1 ! write all domains -# output_bounds = 0., 2678400., 3600. ! start, end, increment -# steps_per_file = 1 ! number of steps per file -# mode = 1 ! 1: forecast mode (relative t-axis), 2: climate mode (absolute t-axis) -# include_last = .TRUE. -# output_filename = 'ICON-ART' -# filename_format = '{cfg.icon_output}/_latlon_' ! file name base -# remap = 1 ! 1: remap to lat-lon grid -# reg_lon_def = -179.,2,179 -# reg_lat_def = 90.,-1,-90. -# ml_varlist = 'z_ifc','z_mc','pres','pres_sfc','qc','rh','rho','temp','u','v','w','group:ART_CHEMISTRY', -# / +handle_error(){{ + # Check for invalid pointer error at the end of icon-art + if grep -q "free(): invalid pointer" {cfg.logfile} && grep -q "clean-up finished" {cfg.logfile}; then + exit 0 + else + exit 1 + fi +}} +srun ./{cfg.icon_execname} || handle_error diff --git a/cases/icon-art-oem-test/config.yaml b/cases/icon-art-oem-test/config.yaml index 5a3d8511..a50fbacc 100644 --- a/cases/icon-art-oem-test/config.yaml +++ b/cases/icon-art-oem-test/config.yaml @@ -1,6 +1,6 @@ # Configuration file for the 'icon-art-oem-test' case with ICON -model: icon-art-oem +workflow: icon-art-oem constraint: gpu run_on: cpu compute_queue: normal @@ -21,6 +21,13 @@ filename_format: _DOM_ lateral_boundary_grid_order: lateral_boundary art_input_folder: ./input/icon-art-oem/ART +walltime: + prepare_icon: '00:10:00' + icontools: '00:30:00' + prepare_art: '00:10:00' + prepare_art_oem: '00:10:00' + icon: '00:30:00' + meteo: dir: ./input/meteo prefix: ifs_ @@ -34,6 +41,8 @@ chem: nameformat: '%Y%m%d_%H' suffix: .grb inc: 3 + remap_tracers: + CH4_BG: TRCH4_chemtr icontools_runjobs: - icontools_remap_ic_runjob.cfg @@ -60,7 +69,7 @@ input_files: oem_monthofyear_nc: ./input/icon-art-oem/OEM/monthofyear.nc icon: - binary_file: ./src/icon-art/bin/icon + binary_file: ./ext/icon-art/bin/icon runjob_filename: icon_runjob.cfg compute_queue: normal walltime: '00:10:00' diff --git a/cases/icon-art-oem-test/icon_runjob.cfg b/cases/icon-art-oem-test/icon_runjob.cfg index 3e69720c..883c8d86 100644 --- a/cases/icon-art-oem-test/icon_runjob.cfg +++ b/cases/icon-art-oem-test/icon_runjob.cfg @@ -1,13 +1,13 @@ #!/usr/bin/env bash -#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}" +#SBATCH --job-name=icon #SBATCH --account={cfg.compute_account} -#SBATCH --time={cfg.icon_walltime} +#SBATCH --time={cfg.walltime_icon} #SBATCH --nodes={cfg.icon_np_tot} #SBATCH --ntasks-per-node={cfg.ntasks_per_node} #SBATCH --partition={cfg.compute_queue} #SBATCH --constraint={cfg.constraint} #SBATCH --hint=nomultithread -#SBATCH --output={logfile} +#SBATCH --output={cfg.logfile} #SBATCH --open-mode=append #SBATCH --chdir={cfg.icon_work} @@ -368,4 +368,12 @@ EOF # ---------------------------------------------------------------------- # run the model! # ---------------------------------------------------------------------- - srun ./icon.exe +handle_error(){{ + # Check for invalid pointer error at the end of icon-art + if grep -q "free(): invalid pointer" {cfg.logfile} && grep -q "clean-up finished" {cfg.logfile}; then + exit 0 + else + exit 1 + fi +}} +srun ./{cfg.icon_execname} || handle_error diff --git a/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg index f81d9bdd..1f454071 100644 --- a/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg +++ b/cases/icon-art-oem-test/icontools_remap_00_lbc_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="iconsub_{cfg.startdate_sim_yyyymmddhh}" +#SBATCH --job-name=iconsub #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg index 4d80d10c..48fb9f3a 100644 --- a/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg +++ b/cases/icon-art-oem-test/icontools_remap_ic_chem_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}" +#SBATCH --job-name=iconremap_ic_chem #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -e -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg index 7cfdb530..e704afb8 100644 --- a/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg +++ b/cases/icon-art-oem-test/icontools_remap_ic_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="iconremap_{cfg.startdate_sim_yyyymmddhh}" +#SBATCH --job-name=iconremap_ic #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg index be948240..e629494b 100644 --- a/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg +++ b/cases/icon-art-oem-test/icontools_remap_lbc_chem_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.forecasttime}" +#SBATCH --job-name=iconremap_lbc #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -e -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg b/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg index 3bc183c3..38dc6b07 100644 --- a/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg +++ b/cases/icon-art-oem-test/icontools_remap_lbc_rest_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="iconremap_lbc_{cfg.startdate_sim_yyyymmddhh}" +#SBATCH --job-name=iconremap_lbc #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/cases/icon-test/config.yaml b/cases/icon-test/config.yaml index e1d58782..06065e42 100644 --- a/cases/icon-test/config.yaml +++ b/cases/icon-test/config.yaml @@ -1,6 +1,6 @@ -# Configuration file for the 'icon-test' case with ICON +# Configuration file for the 'icon-async-test' case with ICON -model: icon +workflow: icon constraint: gpu run_on: cpu compute_queue: normal @@ -18,6 +18,11 @@ output_filename: NWP_LAM filename_format: _DOM_ lateral_boundary_grid_order: lateral_boundary +walltime: + prepare_icon: '00:10:00' + icontools: '00:30:00' + icon: '00:30:00' + meteo: dir: ./input/meteo prefix: ifs_ @@ -41,10 +46,9 @@ input_files: map_file_ana: ./input/icon/mapping/map_file.ana icon: - binary_file: ./src/icon/bin/icon + binary_file: ./ext/icon/bin/icon runjob_filename: icon_runjob.cfg compute_queue: normal - walltime: '00:10:00' np_tot: 8 np_io: 1 np_restart: 1 diff --git a/cases/icon-test/icon_runjob.cfg b/cases/icon-test/icon_runjob.cfg index 09ff2c3a..88c8b735 100755 --- a/cases/icon-test/icon_runjob.cfg +++ b/cases/icon-test/icon_runjob.cfg @@ -1,13 +1,13 @@ #!/usr/bin/env bash -#SBATCH --job-name="{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}" +#SBATCH --job-name=icon #SBATCH --account={cfg.compute_account} -#SBATCH --time={cfg.icon_walltime} +#SBATCH --time={cfg.walltime_icon} #SBATCH --nodes={cfg.icon_np_tot} #SBATCH --ntasks-per-node={cfg.ntasks_per_node} #SBATCH --partition={cfg.compute_queue} #SBATCH --constraint={cfg.constraint} #SBATCH --hint=nomultithread -#SBATCH --output={logfile} +#SBATCH --output={cfg.logfile} #SBATCH --open-mode=append #SBATCH --chdir={cfg.icon_work} @@ -342,4 +342,4 @@ EOF # ---------------------------------------------------------------------- # run the model! # ---------------------------------------------------------------------- - srun ./icon.exe +srun ./{cfg.icon_execname} || handle_error diff --git a/cases/icon-test/icontools_remap_00_lbc_runjob.cfg b/cases/icon-test/icontools_remap_00_lbc_runjob.cfg index f81d9bdd..1f454071 100755 --- a/cases/icon-test/icontools_remap_00_lbc_runjob.cfg +++ b/cases/icon-test/icontools_remap_00_lbc_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="iconsub_{cfg.startdate_sim_yyyymmddhh}" +#SBATCH --job-name=iconsub #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/cases/icon-test/icontools_remap_ic_runjob.cfg b/cases/icon-test/icontools_remap_ic_runjob.cfg index 7cfdb530..e704afb8 100755 --- a/cases/icon-test/icontools_remap_ic_runjob.cfg +++ b/cases/icon-test/icontools_remap_ic_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="iconremap_{cfg.startdate_sim_yyyymmddhh}" +#SBATCH --job-name=iconremap_ic #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg b/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg index 3bc183c3..38dc6b07 100755 --- a/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg +++ b/cases/icon-test/icontools_remap_lbc_rest_runjob.cfg @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#SBATCH --job-name="iconremap_lbc_{cfg.startdate_sim_yyyymmddhh}" +#SBATCH --job-name=iconremap_lbc #SBATCH --account={cfg.compute_account} #SBATCH --chdir={cfg.icon_work} #SBATCH --partition={cfg.compute_queue} @@ -17,7 +17,7 @@ set -x export ECCODES_DEFINITION_PATH={cfg.eccodes_dir}/definitions.edzw-2.12.5-2:{cfg.eccodes_dir}/definitions -. {cfg.chain_src_dir}/src/spack-c2sm/setup-env.sh +. {cfg.chain_src_dir}/ext/spack-c2sm/setup-env.sh spack load icontools #----------------------------------------------------------------------------- diff --git a/config.py b/config.py new file mode 100644 index 00000000..18293554 --- /dev/null +++ b/config.py @@ -0,0 +1,609 @@ +from subprocess import run, CalledProcessError +import os +import yaml +from datetime import timedelta + +from jobs import tools +from pathlib import Path + + +class Config(): + + def __init__(self, casename): + """Initialize an instance of the Config class. + + Initializes an instance of the Config class with user-specific + and default attributes. The class represents a processing chain for a + particular case, and its attributes are populated based on the provided + `casename`. + + Parameters + ---------- + casename : str + The identifier for the case, typically specifying the configuration + and settings to be used in the processing chain. + + Attributes + ---------- + user_name : str + The username of the current user, obtained from the 'USER' environment variable. + email : str + The user's email address, initially set to None and updated using the `set_email` method. + casename : str + The specified case name for the processing chain. + chain_src_dir : str + The source directory for the processing chain, typically the current working directory. + case_path : str + The path to the case directory under 'cases/' for the specified `casename`. + work_root : str + The root directory for processing chain execution, typically located under the source directory. + + Notes + ----- + The method also loads user-defined attributes from the configuration file, + sets specific settings based on the node type ('gpu' or 'mc'), and initializes + other instance-specific attributes. + """ + # Global attributes (initialized with default values) + self.user_name = os.environ['USER'] + self.set_email() + self.casename = casename + self.set_account() + + self.chain_src_dir = Path.cwd() + self.case_path = self.chain_src_dir / 'cases' / self.casename + self.work_root = self.chain_src_dir / 'work' + + # User-defined attributes from config file + self.load_config_file() + + # Set case root + self.case_root = self.work_root / self.casename + self.log_file = self.case_root / "chain_status.log" + + # Set workflow and async attributes and initiate job ids dict + self.set_workflow() + + # Specific settings based on the node type ('gpu' or 'mc') + self.set_node_info() + + def load_config_file(self): + """Load configuration settings from a YAML file and set them as attributes. + + This method reads the configuration settings from a YAML file located in + the 'cases/casename' directory and sets them as attributes of the instance. + + Returns + ------- + Config + The same `Config` instance with configuration settings as attributes. + + Raises + ------ + FileNotFoundError + If the specified configuration file or case directory is not found. + + Notes + ----- + If the configuration file does not exist, the method will attempt to suggest + a similar case directory based on a Levenshtein distance comparison with + existing case directories. The method directly assigns values from the + configuration file to instance attributes for easy access. + """ + cfg_file = Path('cases', self.casename, 'config.yaml').resolve() + + if not cfg_file.is_file(): + all_cases = [ + path.name for path in os.scandir('cases') if path.is_dir() + ] + closest_name = min([(tools.levenshtein(self.casename, name), name) + for name in all_cases], + key=lambda x: x[0])[1] + raise FileNotFoundError( + f"Case-directory '{self.casename}' not found, did you mean '{closest_name}'?" + ) + + try: + with cfg_file.open('r') as yaml_file: + cfg_data = yaml.load(yaml_file, Loader=yaml.FullLoader) + except FileNotFoundError: + raise FileNotFoundError( + f"No file 'config.yaml' in {cfg_file.parent}") + + # Directly assign values to instance attributes + for key, value in cfg_data.items(): + setattr(self, key, value) + + def set_account(self): + """Set the compute account based on user information. + + This method determines the compute account to be used based on the user's + name and system configuration. + + Notes + ----- + - If the user name is 'jenkins', the compute account is set to 'g110' for + Jenkins testing. + - If an account is specified in the user's '~/.acct' file, it will be used + as the compute account. + - If neither of the above conditions is met, the standard account is + determined using the 'id -gn' command. + """ + if self.user_name == 'jenkins': + # g110 account for Jenkins testing + self.compute_account = 'g110' + elif (p := Path.home() / '.acct').exists(): + # Use account specified in ~/.acct file + with p.open('r') as file: + self.compute_account = file.read().rstrip() + else: + # Use standard account + self.compute_account = os.popen("id -gn").read().splitlines()[0] + + def set_node_info(self): + """Set node-specific information based on configuration settings. + + This method configures node-specific settings, such as the number of tasks + per node and CUDA-related environment variables, based on the provided + configuration settings in the instance. + + Returns + ------- + Config + The same `Config` instance with updated node-specific attributes. + + Raises + ------ + ValueError + If the 'constraint' or 'run_on' configuration values are invalid. + """ + if self.constraint == 'gpu': + if hasattr(self, 'icon'): + if self.run_on == 'gpu': + self.ntasks_per_node = 1 + elif self.run_on == 'cpu': + self.ntasks_per_node = 12 + else: + raise ValueError( + "Invalid value for 'run_on' in the configuration." + "It should be either 'gpu' or 'cpu'.") + else: + self.ntasks_per_node = 12 + self.mpich_cuda = ('export MPICH_RDMA_ENABLED_CUDA=1\n' + 'export MPICH_G2G_PIPELINE=256\n' + 'export CRAY_CUDA_MPS=1\n') + elif self.constraint == 'mc': + self.ntasks_per_node = 36 + self.mpich_cuda = '' + else: + raise ValueError( + "Invalid value for 'constraint' in the configuration." + "It should be either 'gpu' or 'mc'.") + + def set_workflow(self): + """set workflow and async attr, initiate job ids dict""" + # If a workflow name is specified, load from workflows.yaml + if isinstance(self.workflow, str): + self.workflow_name = self.workflow + with open('workflows.yaml') as file: + workflows = yaml.safe_load(file) + self.workflow = workflows[self.workflow_name] + # Otherwise, use custom workflow from config.yaml directly + elif isinstance(self.workflow, dict): + self.workflow_name = 'custom' + else: + raise InvalidWorkflowType( + "Invalid workflow type. Must be either a string or a dictionary." + ) + + assert 'dependencies' in self.workflow, "Missing 'dependencies' in workflow. Exiting." + + # Initiate empty job ids dictionnary so that it can be filled in later + self.job_ids = {'current': {}, 'previous': {}} + + def set_restart_step_hours(self): + """Set the restart step in hours. + + Converts the 'restart_step' attribute, which is in ISO8601 duration format, + to hours and stores the result in the 'restart_step_hours' attribute. + + Returns + ------- + Config + The same `Config` instance with the 'restart_step_hours' attribute set. + """ + self.restart_step_hours = int( + tools.iso8601_duration_to_hours(self.restart_step)) + + def set_email(self): + """Set the user's email address based on system configuration. + + This method determines the user's email address based on the user's name + and system configuration. + + Returns + ------- + Config + The same `Config` instance with the `user_mail` attribute set. + + Notes + ----- + - If the user name is 'jenkins', the user's email address is set to None. + - If an email address is specified in the user's '~/.forward' file, it will + be used as the user's email address. + - If neither of the above conditions is met, the user's email address is set + to None. + """ + if self.user_name == 'jenkins': + self.user_mail = None + elif (p := Path.home() / '.forward').exists(): + with p.open('r') as file: + self.user_mail = file.read().rstrip() + else: + self.user_mail = None + + def print_config(self): + """Print the configuration attributes and their values. + + This method displays the configuration attributes and their corresponding + values in a formatted manner. Lists and dictionaries within the configuration + are also displayed with appropriate indentation. + + Notes + ----- + - The maximum column width for the attribute names is automatically determined. + - The method prints the attribute name, its type, and its value. + - If an attribute is a list, it is displayed with each item indented. + - If an attribute is a dictionary, it is also displayed with each key-value + pair indented. + """ + # max_col_width = max(len(key) for key in vars(self)) + 1 + max_col_width = 27 + + print("\nConfiguration:") + print(f"{'Attribute':<{max_col_width}} Type Value") + print("-" * 80) + for key, value in vars(self).items(): + if isinstance(value, list): + # If the value is a list, format it with indentation + print(f"{key:<{max_col_width}} list") + for item in value: + item_type = "Path" if type( + item).__name__ == "PosixPath" else type(item).__name__ + print(f" - {item:<{max_col_width-4}} {item_type}") + elif isinstance(value, dict): + # If the value is a dictionary, format it as before + print(f"{key:<{max_col_width}} dict") + for sub_key, sub_value in value.items(): + sub_value_type = "Path" if type( + sub_value).__name__ == "PosixPath" else type( + sub_value).__name__ + print( + f" - {sub_key:<{max_col_width-4}} {sub_value_type:<4} {sub_value}" + ) + else: + # Standard output + key_type = type(key).__name__ + print(f"{key:<{max_col_width}} {key_type:<4} {value}") + + def convert_paths_to_absolute(self, dct=None): + """Convert relative file paths to absolute paths in the configuration. + + Recursively convert all strings starting with './' in the instance + attributes to absolute paths. + """ + if dct is None: + self.convert_paths_to_absolute(dct=vars(self)) + else: + for k, v in dct.items(): + if isinstance(v, dict): + self.convert_paths_to_absolute(dct=v) + elif isinstance(v, str) and v.startswith('./'): + dct[k] = Path(v).absolute() + + def create_vars_from_dicts(self, dct=None, key=None): + """Create instance attributes from dictionary entries in the configuration. + + This method recursively iterates through the instance's attribute dictionary + and checks for dictionary values. For each dictionary encountered, it creates + new instance attributes by concatenating the original attribute name and the + dictionary key, and assigns the corresponding values. + """ + if dct is None: + self.create_vars_from_dicts(dct=vars(self).copy()) + else: + for k, v in dct.items(): + subkey = k if key is None else key + '_' + k + if isinstance(v, dict): + self.create_vars_from_dicts(dct=v, key=subkey) + else: + setattr(self, subkey, v) + + def get_chunk_list(self): + self.chunk_list = [] + for startdate_sim in tools.iter_hours(self.startdate, self.enddate, + self.restart_step_hours): + enddate_sim = startdate_sim + timedelta( + hours=self.restart_step_hours) + if 'spinup' in self.workflow['features'] and hasattr( + self, 'spinup'): + if startdate_sim > self.startdate: + startdate_sim = startdate_sim - timedelta( + hours=self.spinup) + + startdate_sim_yyyymmddhh = startdate_sim.strftime("%Y%m%d%H") + enddate_sim_yyyymmddhh = enddate_sim.strftime("%Y%m%d%H") + chunk_id = f"{startdate_sim_yyyymmddhh}_{enddate_sim_yyyymmddhh}" + + if enddate_sim > self.enddate: + continue + + self.chunk_list.append(chunk_id) + + def get_previous_chunk_id(self, current_chunk_id): + """Get the previous chunk ID based on the current `chunk_id`""" + index = self.chunk_list.index(current_chunk_id) + if index > 0: + self.chunk_id_prev = self.chunk_list[index - 1] + else: + self.chunk_id_prev = None + + def get_dep_ids(self, job_name, add_dep=None): + """Get dependency job ids for `job_name`""" + # Initial list of dependencies + if add_dep is not None: + if isinstance(add_dep, int): + dep_id_list = [add_dep] + else: + try: + dep_id_list = list(add_dep) + except TypeError: + print("add_dep must be an iterable") + else: + dep_id_list = [] + + # Add job dependencies + if deps := self.workflow['dependencies'].get(job_name): + for stage in 'previous', 'current': + if dep_stage := deps.get(stage): + for job in dep_stage: + # Could be that dep job id does not exist, e.g., + # if dep job is deactivated or it's the first chunk + if dep_id := self.job_ids[stage].get(job): + dep_id_list.extend(dep_id) + return dep_id_list + + def get_dep_cmd(self, job_name, add_dep=None): + """Generate the part of the sbatch command that sepcifies dependencies for `job_name`""" + # Needed for nested run_chain.py + if self.force_sync: + return '--wait' + + if dep_ids := self.get_dep_ids(job_name, add_dep=add_dep): + dep_str = ':'.join(map(str, dep_ids)) + return f'--dependency=afterok:{dep_str}' + + # job_name has no dependencies but still belongs to an async workflow + # so don't use --wait + return None + + def submit(self, job_name, script, add_dep=None): + """Submit job with dependencies""" + script_path = Path(script) + sbatch_cmd = ['sbatch', '--parsable'] + if dep_cmd := self.get_dep_cmd(job_name, add_dep=add_dep): + sbatch_cmd.append(dep_cmd) + sbatch_cmd.append(script_path.name) + + try: + result = run(sbatch_cmd, + cwd=script_path.parent, + capture_output=True, + check=True) + except CalledProcessError as e: + with open(self.logfile('a')) as f: + f.write(e) + raise (e) + + job_id = int(result.stdout) + print(f' └── Submitted batch job {job_id}') + + if job_name not in self.job_ids['current']: + self.job_ids['current'][job_name] = [job_id] + else: + self.job_ids['current'][job_name].append(job_id) + + return job_id + + def submit_basic_python(self, job_name): + """Create an sbatch script to launch basic python jobs individually. + Use run_chain.py arguments to submit those jobs. + """ + # Build job script + walltime = getattr(self, 'walltime', {}).get(job_name, "00:30:00") + script_lines = [ + '#!/usr/bin/env bash', + f'#SBATCH --job-name={job_name}', + '#SBATCH --nodes=1', + f'#SBATCH --time={walltime}', + f'#SBATCH --output={self.logfile}', + '#SBATCH --open-mode=append', + f'#SBATCH --account={self.compute_account}', + f'#SBATCH --partition={self.compute_queue}', + f'#SBATCH --constraint={self.constraint}', + '', + f'cd {self.chain_src_dir}', + f'./run_chain.py {self.casename} -j {job_name} -c {self.chunk_id} -f -s --no-logging', + '', + ] + + job_path = self.chain_root / 'job_scripts' + job_path.mkdir(parents=True, exist_ok=True) + job_file = job_path / f'{job_name}.sh' + with open(job_file, mode='w') as job_script: + job_script.write('\n'.join(script_lines)) + + # Submit job + self.submit(job_name, job_file) + + def wait_for_previous(self): + """Wait for all jobs of the previous stage to be finished. + + Do this by submitting a fake job depending on all jobs from the + 'previous' stage. + """ + dep_ids = [] + for ids in self.job_ids['previous'].values(): + dep_ids.extend(ids) + if dep_ids: + job_file = self.case_root / 'submit.wait.slurm' + log_file = self.case_root / 'wait.log' + dep_str = ':'.join(map(str, dep_ids)) + script_lines = [ + '#!/usr/bin/env bash', '#SBATCH --job-name="wait"', + '#SBATCH --nodes=1', '#SBATCH --time=00:01:00', + f'#SBATCH --output={log_file}', + f'#SBATCH --account={self.compute_account}', + f'#SBATCH --partition={self.compute_queue}', + f'#SBATCH --constraint={self.constraint}', + f'#SBATCH --dependency=afterany:{dep_str}', '', '# Do nothing', + 'exit 0' + ] + with open(job_file, mode='w') as wait_job: + wait_job.write('\n'.join(script_lines)) + + run(['sbatch', '--wait', job_file], check=True) + + def cycle(self): + """Cycle to next chunk + + - Wait for previous chunk to finish + - print summary of previous chunk jobs + - Check for success of all previous jobs + - cycle job ids and chunk id""" + + # - ML - + # - This method could do more of the cycling, like dates + # - The config object could host more info and cycle it instead + # of recomputing stuff like previous chunk dates + + # Skip if very first chunk + if self.job_ids['previous']: + # Wait for previous chunk to be done + self.wait_for_previous() + + # Get and print previous chunk Slurm summary + self.get_previous_slurm_summary() + self.print_previous_slurm_summary() + + # Check for success of all previous jobs + self.check_previous_chunk_success() + + # Cycle info + self.job_ids['previous'] = self.job_ids['current'] + self.previous_chunk_id = self.chunk_id + + # Monitor last chunk + if self.enddate_sim >= self.enddate: + self.wait_for_previous() + self.get_previous_slurm_summary() + self.print_previous_slurm_summary() + self.check_previous_chunk_success() + + @staticmethod + def get_job_info(job_id, + slurm_keys=['JobName', 'Elapsed', 'ExitCode'], + parse=True): + """Retrieve slurm job info as given by sacct + + if parse is True, return the raw string from sacct else parse info into a dict. + All possible keys are given by `sacct --helpformat`""" + + # Get info from sacct + cmd = ["sacct", f"--format={', '.join(slurm_keys)}", "-j", str(job_id)] + + if parse: + cmd.append("--parsable") + + info_str = run(cmd, capture_output=True, check=True).stdout + + if parse: + # Parse in a dictionnary before returning + # The inner most process should be the relevant one, hence the 1 index + slurm_info = info_str.split(b'\n')[1].split(b'|') + return {k: v.decode() for k, v in zip(slurm_keys, slurm_info)} + else: + return info_str.decode() + + def get_previous_slurm_summary(self, + info_keys=[ + 'JobName', 'JobID', 'Partition', + 'NNodes', 'State', 'Start', 'End', + 'Elapsed' + ]): + """get slurm info summary or all jobs of previous chunk""" + + # Store requested keys in object + self.info_keys = info_keys + + # Get job info for all jobs + self.slurm_info = {} + for job_name in self.jobs: + for job_id in self.job_ids['previous'][job_name]: + self.slurm_info[job_name] = [] + self.slurm_info[job_name].append( + self.get_job_info(job_id, slurm_keys=info_keys, + parse=True)) + + def print_previous_slurm_summary(self): + # Width of printed slurm piece of information + info_width = { + 'JobName': 13, + 'JobID': 8, + 'Partition': 9, + 'NNodes': 3, + 'State': 14, + 'Start': 13, + 'End': 13, + 'Elapsed': 9 + } + + # Build table header and line format + headers = [] + hlines = [] + formats = [] + for k in self.info_keys: + j = info_width[k] + kh = '#N' if k == 'NNodes' else k + formats.append(f"{{{k}:>{j}.{j}}}") + headers.append(f"{kh:>{j}.{j}}") + hlines.append("-" * j) + + table_header = '\n'.join((' '.join(headers), ' '.join(hlines))) + line_format = " ".join(formats) + + with self.log_file.open('a') as f: + f.write(f"Job summary for chunk {self.previous_chunk_id}\n") + f.write(table_header) + f.write('\n') + for job_name in self.jobs: + for info in self.slurm_info[job_name]: + f.write(line_format.format(**info)) + f.write('\n') + f.write('\n') + + def check_previous_chunk_success(self): + status = 0 + failed_jobs = [] + for job_name, info_list in self.slurm_info.items(): + for info in info_list: + if info['State'] != 'COMPLETED': + failed_jobs.append(job_name) + status += 1 + + if status > 0: + raise RuntimeError(f"The following job(s) failed: {failed_jobs}") + + +class InvalidWorkflowType(Exception): + pass diff --git a/config/models.yaml b/config/models.yaml deleted file mode 100644 index a19f188c..00000000 --- a/config/models.yaml +++ /dev/null @@ -1,24 +0,0 @@ -models: - cosmo: - jobs: ['prepare_data', 'int2lm', 'cosmo', 'post_cosmo'] - features: ['restart', 'spinup'] - cosmo-ghg: - jobs: ['prepare_data', 'emissions', 'biofluxes', 'oem', 'online_vprm', - 'int2lm', 'post_int2lm', 'cosmo', 'post_cosmo'] - features: ['restart', 'tracers', 'spinup'] - cosmo-art: - jobs: ['prepare_data', 'emissions', 'obs_nudging', 'photo_rate', - 'int2lm', 'cosmo', 'post_cosmo'] - features: ['nesting', 'spinup'] - icon: - jobs: ['prepare_data', 'icon'] - features: ['restart'] - icon-art: - jobs: ['prepare_data', 'icon'] - features: ['restart'] - icon-art-global: - jobs: ['prepare_data', 'icon'] - features: ['restart'] - icon-art-oem: - jobs: ['prepare_data', 'icon'] - features: ['restart'] diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..8ec08ace --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +# Makefile for Sphinx documentation + +SPHINXOPTS = -c ./ +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = build + +.PHONY: help clean html + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(SPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + diff --git a/docs/_static/processing-chain-favicon.ico b/docs/_static/processing-chain-favicon.ico new file mode 100644 index 00000000..95d31d5d Binary files /dev/null and b/docs/_static/processing-chain-favicon.ico differ diff --git a/docs/_static/processing-chain-logo-notext.png b/docs/_static/processing-chain-logo-notext.png new file mode 100644 index 00000000..9f870bf7 Binary files /dev/null and b/docs/_static/processing-chain-logo-notext.png differ diff --git a/docs/_static/processing-chain-logo-small.png b/docs/_static/processing-chain-logo-small.png new file mode 100644 index 00000000..dc81a858 Binary files /dev/null and b/docs/_static/processing-chain-logo-small.png differ diff --git a/docs/_static/processing-chain-logo.png b/docs/_static/processing-chain-logo.png new file mode 100644 index 00000000..c7e2dc93 Binary files /dev/null and b/docs/_static/processing-chain-logo.png differ diff --git a/docs/_static/processing_chain_workflow_icon_art.png b/docs/_static/processing_chain_workflow_icon_art.png new file mode 100644 index 00000000..4a5164d0 Binary files /dev/null and b/docs/_static/processing_chain_workflow_icon_art.png differ diff --git a/docs/code-structure.rst b/docs/code-structure.rst new file mode 100644 index 00000000..9a7ee7ab --- /dev/null +++ b/docs/code-structure.rst @@ -0,0 +1,55 @@ +.. _code-structure-section: + +Code Structure +-------------- + +The Processing Chain code is structured as follows: + +.. code-block:: bash + + $ tree -L 3 -F --dirsfirst + . + ├── cases/ # folder where all cases are stored + │ ├── cosmo-ghg-spinup-test/ # COSMO-GHG test case with spinup restart + │ │ ├── config.yaml # case configuration file + │ │ ├── *.cfg # templates for namelists & batch jobs + │ │ └── *.csv # CSV files with tracer information + │ ├── cosmo-ghg-test/ # COSMO-GHG testcase with standard restart + │ │ ├── config.yaml + │ │ ├── *.cfg + │ │ └── *.csv + │ ├── icon-art-global-test/ # ICON-ART test case (global domain) + │ │ ├── config.yaml + │ │ ├── icon_runjob.cfg # template for ICON-ART runjob + │ │ ├── *.sh # pre-processing scripts + │ │ └── mypartab + │ ├── icon-art-oem-test/ # ICON-ART test case with online emissions + │ │ ├── config.yaml + │ │ └── *.cfg + │ └── icon-test/ # ICON test case + │ ├── config.yaml + │ └── *.cfg + ├── docs/ # folder for Sphinx documentation + │ ├── _static/ # folder for static assets + │ │ ├── custom.css # custom CSS styles + │ │ └── *.png|ico # additional image assets + │ ├── tables/ # folder for tables used in documentation + │ │ └── *.csv # CSV files containing table data + │ ├── conf.py # configuration file for the Sphinx builder + │ └── *.rst # documentation files (reStructuredText) + ├── env/ + │ └── environment.yml # conda environment file + ├── ext/ # folder for other code (spack, models, etc.) + ├── jenkins/ # automated Jenkins testing + │ ├── scripts/ + │ │ └── *.sh # individual Shell scripts for testing + │ └── Jenkinsfile # text file containing the Jenkins pipeline + ├── jobs/ + │ ├── tools/ + │ │ └── *.py # tool scripts + │ └── *.py # job scripts + ├── LICENSE # license file + ├── README.md # README file + ├── config.py # file containing the Config class + ├── run_chain.py # main script + └── workflows.yaml # file to store workflows with job dependencies diff --git a/docs/conf.py b/docs/conf.py index 98475ac5..740fc891 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,11 +12,17 @@ import os import sys +sys.path.insert(0, os.path.abspath('..')) # Root directory +sys.path.insert(0, os.path.abspath('../jobs')) # Jobs subfolder +sys.path.insert(0, os.path.abspath('../jobs/tools')) # Tools subfolder + # -- Project information ------------------------------------------------ project = 'Processing Chain' -copyright = '2018-2023, C2SM' +copyright = '2018-2024, C2SM' author = 'Processing Chain Administrators' +version = 'v3.1' +release = 'v3.1' # -- General configuration ------------------------------------------------ @@ -24,11 +30,15 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autosectionlabel', 'sphinx.ext.todo', 'sphinx_rtd_theme', - 'sphinx_copybutton', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', 'sphinx.ext.napoleon' + 'sphinx.ext.autodoc', 'sphinx.ext.autosectionlabel', 'sphinx.ext.todo', + 'sphinx_rtd_theme', 'sphinx_copybutton', 'sphinx.ext.mathjax', + 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon' ] +# autodoc options +autodoc_member_order = 'bysource' +toc_object_entries_show_parents = 'all' + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -55,8 +65,10 @@ # further. For a list of options available for each theme, see the # documentation. # +html_logo = '_static/processing-chain-logo-small.png' +html_favicon = '_static/processing-chain-favicon.ico' html_theme_options = { - 'logo_only': False, + 'logo_only': True, 'display_version': True, 'prev_next_buttons_location': 'bottom', 'style_external_links': False, @@ -65,7 +77,7 @@ # Toc options 'collapse_navigation': False, 'sticky_navigation': True, - 'navigation_depth': 4, + 'navigation_depth': 2, 'includehidden': True, 'titles_only': False } diff --git a/docs/config.rst b/docs/config.rst index ecfdb124..f38ef329 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -1,333 +1,190 @@ .. _config-section: -The processing chain uses cases to describe a simulation. A case is a -subdirectory in ``cases/``, containing a :ref:`config.py` and several -:ref:`namelists` (for example ``int2lm_INPUT.cfg``) which define the -simulation. +The Processing Chain uses cases to describe a simulation. A case is a +subdirectory in ``cases/``, containing a ``config.yaml`` and several +`namelist` (e.g., ``int2lm_INPUT.cfg``) and `runscripts` (e.g., +``icon_runjob.cfg``) :ref:`templates`, +which define the simulation. -.. _config.py: +.. _config.yaml: Configuration File ------------------ -The configuration file contains most of the information that the :ref:`jobs-section` need to prepare and run the simulation, for example the location of the input data. -This configuration-file is imported as a module in ``run_chain.py``, and therefore -it can contain python expression which are evaluated at runtime. - -.. - Creating these tables by hand is a pain. Use the script/csv in the tables/ director - -| - -General Variables in ``config.py`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| **Name** | **Description** | **Used in** | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``target`` | **COSMO** or **COSMOART**, defaults to **COSMO** if omitted | all | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``subtarget`` | **NONE** or **SPINUP**, defaults to **NONE** if omitted | all | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``constraint`` | Either 'gpu' or 'mc'. Controls on which nodes int2lm and cosmo run. Make sure your executables are compiled for the chosen architecure. | :func:`jobs.cosmo.main`, :func:`jobs.icon.main`, :func:`jobs.int2lm.main` | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``mail_address`` | The processing-chain sends encountered errors to this address | :func:`jobs.tools.__init__.send_mail` | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``compute_host`` | On which infrastructure the processing chain is run. Usually 'daint' | :func:`jobs.post_cosmo.main`, :func:`jobs.icon.main`, :func:`jobs.reduce_output.main` | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``compute_queue`` | Either 'debug' or 'normal' | :func:`jobs.int2lm.main`, :func:`jobs.cosmo.main`, :func:`jobs.icon.main` | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``compute_account`` | Which project the simulation belongs to | :func:`jobs.int2lm.main`, :func:`jobs.cosmo.main`, :func:`jobs.icon.main` | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``chain_src_dir`` | Path to the root of the chain | all | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``casename`` | Name of the simulation, the same as the directory-name the ``config.py``-file is in | all | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``input_root`` | Path to zhe root of the input-direcetory tree | all | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``output_root`` | Path to where the results, logs and nameslists are copied to after the simulation is done | :func:`jobs.post_cosmo.main`, :func:`jobs.icon.main` | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ -| ``work_root`` | Path to where the processing chain copies the input files to and starts the simulation from | all | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+ - -| - -Variables for :func:`jobs.biofluxes.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ -| **Name** | **Description** | **Target** | -+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ -| ``vprm_dir`` | Path to the directory containing bioflux-files | COSMO | -+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ -| ``vprm_prefix`` | Prefix of the the bioflux-files. Filenames are assumed to be ``{vprm_prefix}YYYYMMDDHH.nc``. If multiple bioflux-datasets exists, this is a list of prefixes. All files are assumed to be in ``vprm_dir`` | COSMO | -+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ - -| - -Variables for :func:`jobs.cosmo.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| **Name** | **Description** | **Target** | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_bin`` | Path to the cosmo(art) executable | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_namelist`` | Path to the cosmo namelist-templates, ending in ``cosmo_INPUT_``. The ending, for example ``IO`` or ``ORG``, is added by the cosmo-job | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_runjob`` | Path to the cosmo runjob-template | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_walltime`` | Requested time for the cosmo slurm-batchjob | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_nodes`` | Number of nodes for the cosmo slurm-batchjob | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``ntasks_per_node`` | Number of tasks per node | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_np_x`` | Number of processes in the x direction | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_np_y`` | Number of processes in the y direction | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_np_io`` | Number of processes for IO | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``cosmo_np_tot`` | Total number of processes | COSMO, COSMO-ART | -+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ - - -Variables for :func:`jobs.emissions.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| **Name** | **Description** | **Target** | -+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``emissions_dir`` | Path to the input directory where the emissions-files are found. If there are multiple emissions-datasets, this is a list of paths to the directories of the datasets. | COSMO, COSMO-ART | -+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``emis_gridname`` | Prefix of the emissions-files. List for multiple datasets. Emission-filenames are assumed to be ``{emis_gridname}YYYYMMDD.nc`` | COSMO, COSMO-ART | -+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ - -| - -Variables for :func:`jobs.icon.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| **Name** | **Description** | **Target** | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``icon_bin`` | Path to the ICON executable | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``icon_runjob`` | Path to the ICON runjob template | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``icon_namelist_master`` | Path to the ICON master namelist template | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``icon_namelist_nwp`` | Path to the ICON NWP namelist template | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``icon_walltime`` | Requested time for the ICON slurm-batchjob | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``icon_np_tot`` | Total number of processes | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``input_root_mapping`` | Path to the ICON dictionairy files | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``map_file_ana`` | Name of the ICON dictionary file for analysis data | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``latbc_filename`` | Name of the lateral boundary files (including key-words) for LAM-simulations | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ -| ``inidata_filename`` | Name of the intial condition file | ICON | -+------------------------------+--------------------------------------------------------------------------------------+--------------------+ - -| - -Variables for :func:`jobs.int2lm.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| **Name** | **Description** | **Target** | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``meteo_prefix`` | Prefix of the meteo-files. Meteo-filenames are assumed to be ``{meteo_prefix}YYMMDD`` | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_extpar_dir`` | Path to the directory containing the extpar-file for int2lm | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_extpar_file`` | The name of the int2lm extpar-file | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_bin`` | Path to the int2lm executable | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_namelist`` | Path to the int2lm namelist-template | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_runjob`` | Path to the int2lm runjob-template | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_walltime`` | Requested time for the int2lm slurm-batchjob | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_nodes`` | Number of nodes for the int2lm slurm-batchjob | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_ntasks_per_node`` | Number of tasks per node | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_np_x`` | Number of processes in the x direction | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_np_y`` | Number of processes in the y direction | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_np_tot`` | Total number of processes | COSMO, COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_libgrib_dir`` | Path to the libgrib-directory used by the int2lm-executable | COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_lu_dir`` | Path to the directory containing the landuse file | COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_lu_file`` | Filename (inlcuding ending) of the landuse file | COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_pft_dir`` | Path to the directory containing the plant functional type file | COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``int2lm_pft_dir`` | Filename (including ending) of the plant functional type file | COSMO-ART | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ -| ``post_int2lm_species`` | List of variables for the post_int2lm-job | COMSO | -+--------------------------------+-----------------------------------------------------------------------------------------------+--------------------------+ - -| - -Variables for :func:`jobs.prepare_data.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| **Name** | **Description** | **Target** | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``meteo_dir`` | Path to the directory where the meteo-files are found. For a nested run, this is the casename of the mother-run. In that case, ``meteo_prefix`` and ```meteo_inc`` can be omitted | COSMO, COSMO-ART, ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``meteo_prefix`` | Prefix of the meteo-files. Meteo-filenames are assumed to be ``{meteo_prefix}YYMMDD`` | COSMO, COSMO-ART, ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``meteo_nameformat`` | Format for meteo files | COSMO, COSMO-ART, ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``meteo_inc`` | Increment in hours between meteo-files | COSMO, COSMO-ART, ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``icontools_dir`` | Path to the icontools executables | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``iconremap_bin`` | Name of the iconremap executable | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``iconsub_bin`` | Name of the iconsub executable | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``icontools_parameter`` | Dictionary containing names of the icontools runscript and namelist templates | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``mozart_file_orig`` | Path to input-file for mozart-files | COSMO-ART | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``mozart_dir_proc`` | Path to the processed mozart-files. Processed mozart-files are stored here, if there are files found here then ``file_dir_orig`` is not used | COSMO-ART | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``mozart_inc`` | Increment in hours between mozart-files | COSMO-ART | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``mozart_prefix`` | Prefix of the mozart-files | COSMO-ART | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``cams_dir_orig`` | Path to input-directory for CAMS-files | COSMO | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``cams_dir_proc`` | Path to the processed CAMS-files. Processed CAMS-files are stored here, if there are files found here then ``cams_dir_orig`` is not used | COSMO | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``cams_parameters`` | Parameters for the processing of CAMS-data, more information in ``config.py`` of example case | COSMO | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``icontools_dir`` | Path to the icontools executables | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``iconremap_bin`` | Name of the iconremap executable | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``iconsub_bin`` | Name of the iconsub executable | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``icontools_parameter`` | Dictionary containing names of the icontools runscript and namelist templates | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``input_root_chem`` | Path to the chemical input files | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``chem_prefix`` | Prefix of the chemical input files | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``chem_nameformat`` | Name format of the chemical input files | ICON | -+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ - -| - -Variables for :func:`jobs.oae.main` +The case-dependent configuration file ``/config.yaml`` contains most +of the information that the :ref:`jobs` need to prepare +and run the simulation, for example the location of the input data. +This configuration file is loaded in ``run_chain.py`` as an instance +of the ``Config()`` class in ``config.py``. + +Configuration Variables +~~~~~~~~~~~~~~~~~~~~~~~ + +This is a non-exhaustive list containing the most important configuration variables: + ++------------------------+-------------------------------------------------------------------------+ +| Variable | Description | ++========================+=========================================================================+ +|| ``case_path`` || The path to the case directory under ``cases/`` for the specified | +|| || casename. | ++------------------------+-------------------------------------------------------------------------+ +| ``casename`` | The name of the case. Derived from the folder name under ``case_path``. | ++------------------------+-------------------------------------------------------------------------+ +|| ``chain_src_dir`` || The source directory for the processing chain, typically the current | +|| || working directory. | ++------------------------+-------------------------------------------------------------------------+ +| ``compute_account`` | The compute account to be used based on user information. | ++------------------------+-------------------------------------------------------------------------+ +| ``constraint`` | The computational constraint (``gpu`` or ``mc``). | ++------------------------+-------------------------------------------------------------------------+ +|| ``email`` || The user's email address, initially set to None and updated using the | +|| || set_email method. | ++------------------------+-------------------------------------------------------------------------+ +|| ``enddate`` || The end date of the simulation in ISO 8601 format | +|| || (``YYYY-MM-DDTHH:mm:ssZ``). | ++------------------------+-------------------------------------------------------------------------+ +| ``jobs`` | List of job-names to be executed. | ++------------------------+-------------------------------------------------------------------------+ +| ``log_finished_dir`` | The directory for finished log files. | ++------------------------+-------------------------------------------------------------------------+ +| ``log_working_dir`` | The directory for working log files. | ++------------------------+-------------------------------------------------------------------------+ +| ``ntasks_per_node`` | The number of tasks per node, based on the node type. | ++------------------------+-------------------------------------------------------------------------+ +| ``restart_step`` | The restart step in ISO 8601 format. | ++------------------------+-------------------------------------------------------------------------+ +| ``restart_step_hours`` | The restart step in hours, derived from the ``restart_step`` attribute. | ++------------------------+-------------------------------------------------------------------------+ +| ``run_on`` | The architecture the model runs on (``cpu`` or ``gpu``). | ++------------------------+-------------------------------------------------------------------------+ +| ``spinup`` | Spin-up duration in hours. Activates spinup behavior if set. | ++------------------------+-------------------------------------------------------------------------+ +|| ``startdate`` || The start date of the simulation in ISO 8601 format | +|| || (``YYYY-MM-DDTHH:mm:ssZ``). | ++------------------------+-------------------------------------------------------------------------+ +| ``user_mail`` | The user's email address, determined based on system configuration. | ++------------------------+-------------------------------------------------------------------------+ +|| ``user_name`` || Your email address to receive notifications. Either provide it | +|| || directly here or in ``~/.forward``. | ++------------------------+-------------------------------------------------------------------------+ +| ``workflow`` | The name of the workflow from ``workflows.yaml`` or a self-defined one. | ++------------------------+-------------------------------------------------------------------------+ +|| ``work_root`` || The working directory where all output is stored. Should be somewhere | +|| || on ``$SCRATCH$``. By default, it is set to ``/work``. | ++------------------------+-------------------------------------------------------------------------+ + + +Variables to Set in ``config.yaml`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| **Name** | **Description** | **Target** | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| ``oae_dir`` | Path to where the online emission datasets are found | COMSO, ICON | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| ``oae_gridded_emissions_nc`` | Name of the main emission netCDF file (online emissions) | COSMO, ICON | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| ``oae_vertical_profiles_nc`` | Name of the netCDF file for vertical profile scaling (online emissions) | COMSO, ICON | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| ``oae_hourofday_nc`` | Name of the netCDF file for 'hour of day' scaling (online emissions) | COSMO, ICON | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| ``oae_hourofyear_nc`` | Name of the netCDF file for 'hour of year' scaling (online emissions) | COSMO, ICON | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| ``oae_dayofweek_nc`` | Name of the netCDF file for 'day of week' scaling (online emissions) | COSMO, ICON | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ -| ``oae_monthofyear_nc`` | Name of the netCDF file for 'month of year' scaling (online emissions) | COSMO, ICON | -+----------------------------------+---------------------------------------------------------------------------------+--------------------+ - -| - -Variables for :func:`jobs.obs_nudging.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| **Name** | **Description** | **Target** | -+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``bs_nudging_dir`` | Path to where the nudging-datasets are found | COSMO, COSMO-ART | -+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``obs_nudging_prefixes`` | List of prefixes of nuding-files to copy | COMSO, COSMO-ART | -+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``obs_nudging_date_format`` | Date format of the nudging-files. If the obs-nudging-file is called ``cdfin_temp-20150204000000-20150205000000``, the dateformat is ``-%Y%m%d%H%M%S`` | COSMO, COSMO-ART | -+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ - -| - -Variables for :func:`jobs.octe.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+ -| **Name** | **Description** | **Target** | -+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+ -| ``octe_lambdas`` | Path to the netCDF file containing the lambda values used for generating online carbontracker ensembles | COSMO | -+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+ -| ``octe_maps`` | Path to the netCDF file containing the regional and pft-type maps for generating online carbontracker ensembles | COSMO | -+------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+ - -| - -Variables for :func:`jobs.online_vprm.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ -| **Name** | **Description** | **Target** | -+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ -| ``online_vprm_dir`` | Path to the directory containing netCDF files with pre-processed MODIS reflectance (``modis_filename``) and vegetation class fraction (``vegetation_filename``) data | COSMO | -+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ -| ``modis_filename`` | Name of the pre-processed MODIS netCDF file | COSMO | -+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ -| ``vegetation_filename`` | Name of the pre-processed vegetation class fraction netCDF file | COSMO | -+-------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------+ - -| - -Variables for :func:`jobs.photo_rate.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-------------------------+------------------------------------------+--------------------+ -| **Name** | **Description** | **Target** | -+-------------------------+------------------------------------------+--------------------+ -| ``photo_rate_file`` | Path to the photolysis-rate file | COSMO-ART | -+-------------------------+------------------------------------------+--------------------+ - -| - -Variables for :func:`jobs.post_int2lm.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-----------------------------+---------------------------------------------------+--------------------+ -| **Name** | **Description** | **Target** | -+-----------------------------+---------------------------------------------------+--------------------+ -| ``post_int2lm_species`` | List of variables for the post_int2lm-job | COSMO | -+-----------------------------+---------------------------------------------------+--------------------+ - -| - -Variables for :func:`jobs.reduce_output.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| **Name** | **Description** | **Target** | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``output_levels`` | Number of output levels (starting from ground level) for the ``reduce_output`` job, defaults to ``-1`` if omitted, which means that all levels are kept | COSMO, COSMO-ART | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ -| ``convert_gas`` | Switch to convert the unit of trace gases to ppm or ppb. This leads to a better compression rate. Defaults to ``True`` of omitted. | COSMO, COSMO-ART | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------+ - -| -Variables for :func:`jobs.verify_chain.main` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| **Name** | **Description** | **Target** | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``reference_dir`` | Path to the reference output | COSMO, COSMO-ART, ICON | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``output_dir`` | Path to the output of cosmo. If the :func:`jobs.post_cosmo.main` job is executed, this can be set to ``None`` and the path of the post_cosmo-job will be used | COSMO, COSMO-ART, ICON | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ -| ``values_to_check`` | Which files and variables are compared. This is a dict with a tuple of filenames as key. The first key element is the filename of the reference file, the second key element is the filename of the output-file of cosmo (usually ``lffdYYYYMMDDHH.nc`` and the value is a list of variables to compare between these two files) | COSMO, COSMO-ART, ICON | -+-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------+ +Here are two examples of which general variables should be set by the user in the +case configuration file. + +Header of ``config.yaml`` for the ``cosmo-ghg-spinup-test`` case +================================================================ + +.. code-block:: yaml + + workflow: cosmo-ghg + constraint: gpu + ntasks_per_node: 12 + restart_step: PT6H + spinup: 3 + startdate: 2015-01-01T00:00:00Z + enddate: 2015-01-01T18:00:00Z + +Header of ``config.yaml`` for the ``icon-art-oem-test`` case +============================================================ + +.. code-block:: yaml + + workflow: icon-art-oem + constraint: gpu + run_on: cpu + compute_queue: normal + ntasks_per_node: 12 + restart_step: PT6H + startdate: 2018-01-01T00:00:00Z + enddate: 2018-01-01T12:00:00Z + + eccodes_dir: ./input/eccodes_definitions + iconremap_bin: iconremap + iconsub_bin: iconsub + latbc_filename: ifs__lbc.nc + inidata_prefix: ifs_init_ + inidata_nameformat: '%Y%m%d%H' + inidata_filename_suffix: .nc + output_filename: icon-art-oem-test + filename_format: _DOM_ + lateral_boundary_grid_order: lateral_boundary + art_input_folder: ./input/icon-art-oem/ART + +Further variables +================= + +Furthermore, there are additional variables to set that are tied to the individual jobs. +These config variables themselves are dictionaries. Let's have a look at and example +for the the ``cfg.meteo`` variable: + +.. code-block:: yaml + + meteo: + dir: ./input/cosmo-ghg/meteo + prefix: laf + nameformat: laf%Y%m%d%H + inc: 1 + +These config variables can be accessed via ``cfg.meteo['dir']``, ``cfg.meteo['prefix']``, etc. +as they are Python dictionaries. + +.. hint:: + In :ref:`namelist and runscript template` files + (see next section), this accessing does not work because of how the ``.format()`` + method is implemented in Python. For that reason, the Processing Chain automatically + creates new variables in the form of ``cfg.meteo_dir``, ``cfg.meteo_prefix``, etc. + at the start to make them accessible for namelist and runjob templates. + +List of dictionary variables +**************************** + +The following is a list of dictionary variables that exist for the Processing Chain. +For the individual elements of those variables, please refer to the ``config.yaml`` +files within the test cases. + ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| Dictionary variable | Used in job | ++=======================+=====================================================================================================================================+ +| ``meteo`` | ``prepare_cosmo``, ``prepare_icon``, ``icontools``, ``int2lm``, ``icon`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``icontools_runjobs`` | ``icontools`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``input_files`` | ``prepare_icon`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``chem`` | ``prepare_icon`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``era5`` | ``prepare_icon`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``cams`` | ``prepare_cosmo`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``emissions`` | ``emissions`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``vprm`` | ``biofluxes`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``oem`` | ``oem``, ``cosmo`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``online_vprm`` | ``online_vprm`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``int2lm`` | ``prepare_cosmo``, ``emissions``, ``biofluxes``, ``octe``, ``int2lm``, ``post_int2lm``, ``cosmo``, ``post_cosmo`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``post_int2lm`` | ``post_int2lm`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``cosmo`` | ``reduce_output``, ``oem``, ``photo_rate``, ``octe``, ``check_output``, ``post_cosmo``, ``cosmo``, ``obs_nudging``, ``online_vprm`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``reduce_output`` | ``reduce_output`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``post_cosmo`` | ``post_cosmo`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``verify_chain`` | ``verify_chain`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ +| ``icon`` | ``oem``, ``prepare_icon``, ``icon`` | ++-----------------------+-------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/environment.rst b/docs/environment.rst new file mode 100644 index 00000000..58707f7a --- /dev/null +++ b/docs/environment.rst @@ -0,0 +1,62 @@ +.. _environment-section: + +Conda Environment +================= + +The following steps allow you to create and use your own virtual environment to run the Processing Chain. We recommend using a conda environment for the usage of the provided scripts. Please follow the instructions for the installation. The following steps only need to be performed once. + +1. Install Miniconda +~~~~~~~~~~~~~~~~~~~~ + +Install Miniconda as user-specific Miniconda, e.g., in your ``$HOME`` directory, which is the default location. + +.. note:: + Only conda itself should be installed in your ``$HOME``. All environments should be stored in your ``$PROJECT`` directory; otherwise, you risk filling up your ``$HOME`` directory. See below for instructions. + +To install the latest Miniconda, type: + +.. code-block:: bash + + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + bash Miniconda3-latest-Linux-x86_64.sh + +Further details on Miniconda can be found on the `Miniconda documentation page `_. + +2. Create the Conda Environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a conda environment ``proc-chain`` and install the requirements: + +.. code-block:: bash + + conda env create --prefix $PROJECT/envs/proc-chain -f env/environment.yml + +To be able to activate your conda environment by simply using ``conda activate proc-chain`` instead of the full path, add the following to your ``.bashrc``: + +.. code-block:: bash + + export CONDA_ENVS_PATH=$PROJECT/envs + +Activate the environment (use "source activate" in case "conda activate" does not work): + +.. code-block:: bash + + conda activate proc-chain + +If you already have the environment but want to update it: + +.. code-block:: bash + + conda env update --file env/environment.yml --prune + +3. Store user-specific data +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To register your email address and standard project account, store them in these files within your home directory: + +.. code-block:: bash + + echo > ~/.acct + echo > ~/.forward + +These settings are optional. The Processing Chain will first check the content of those files. If desired, the corresponding variables can be overridden by setting the ``compute_account`` and ``user_mail`` variables in the ``config.yaml`` file. diff --git a/docs/features.rst b/docs/features.rst new file mode 100644 index 00000000..c0f14d0a --- /dev/null +++ b/docs/features.rst @@ -0,0 +1,13 @@ +.. _features-section: + +Feature Overview +================ + +- Asynchronous submission of compute jobs to the HPC queue +- Intuitive definition of job dependencies +- Automatic cycling over time periods including folder structure creation +- Various jobs for pre- and post-processing steps +- Using model built-in restarts or custom spinup +- Nested runs possible +- Easy creation of own cases and workflows +- Various examples for COSMO and ICON workflows available \ No newline at end of file diff --git a/docs/file-structure.rst b/docs/file-structure.rst deleted file mode 100644 index 770b7e54..00000000 --- a/docs/file-structure.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. _file-structure-section: - -File Structure --------------- - -:: - - + README.rst - + run_script.py # main script - + jobs/ - | + *.py # jobs-files - | \ tools/ # tools-files - + cases/ # example test cases - | + cosmo-ghg-11km-test/ # COSMO-GHG example - | | + config.py # user-configuration - | | \ *.cfg # templates for namelists & batch-jobs - | + cosmo-art-mother-test/ # COSMO-ART example (mother domain) - | | + config.py - | | \ *.cfg - | + cosmo-art-nested-test/ # COSMO-ART example (nested domain) - | | + config.py - | | \ *.cfg - | \ icon-test # ICON example - | | + config.py - | | \ *.cfg - | \ icon-oem-test # ICON-OEM example - | | + config.py - | | \ *.cfg - + docs/ - + source/ # *.rst documentation files - \ Makefile # Makefile for generating docs - diff --git a/docs/flowchart.md b/docs/flowchart.md new file mode 100644 index 00000000..202d1ac2 --- /dev/null +++ b/docs/flowchart.md @@ -0,0 +1,5 @@ +flowchart LR + prepare_icon --> icontools + icontools --> prepare_art + prepare_art --> icon_1 + icon_1 --> icon_2 diff --git a/docs/functions.rst b/docs/functions.rst new file mode 100644 index 00000000..4305a932 --- /dev/null +++ b/docs/functions.rst @@ -0,0 +1,155 @@ +.. _functions-section: + +Jobs +---- + +* :func:`jobs.biofluxes.main` +* :func:`jobs.check_output.main` +* :func:`jobs.cosmo.main` +* :func:`jobs.emissions.main` +* :func:`jobs.icon.main` +* :func:`jobs.icontools.main` +* :func:`jobs.int2lm.main` +* :func:`jobs.obs_nudging.main` +* :func:`jobs.octe.main` +* :func:`jobs.oem.main` +* :func:`jobs.online_vprm.main` +* :func:`jobs.photo_rate.main` +* :func:`jobs.post_cosmo.main` +* :func:`jobs.post_int2lm.main` +* :func:`jobs.prepare_cosmo.main` +* :func:`jobs.prepare_icon.main` +* :func:`jobs.reduce_output.main` +* :func:`jobs.verify_chain.main` + +------------------------------------------- + +.. autofunction:: jobs.biofluxes.main + +------------------------------------------- + +.. autofunction:: jobs.check_output.main + +------------------------------------------- + +.. autofunction:: jobs.cosmo.main + +------------------------------------------- + +.. autofunction:: jobs.emissions.main + +------------------------------------------- + +.. autofunction:: jobs.icon.main + +------------------------------------------- + +.. autofunction:: jobs.icontools.main + +------------------------------------------- + +.. autofunction:: jobs.int2lm.main + +------------------------------------------- + +.. autofunction:: jobs.obs_nudging.main + +------------------------------------------- + +.. autofunction:: jobs.octe.main + +------------------------------------------- + +.. autofunction:: jobs.oem.main + +------------------------------------------- + +.. autofunction:: jobs.online_vprm.main + +------------------------------------------- + +.. autofunction:: jobs.photo_rate.main + +------------------------------------------- + +.. autofunction:: jobs.post_cosmo.main + +------------------------------------------- + +.. autofunction:: jobs.post_int2lm.main + +------------------------------------------- + +.. autofunction:: jobs.prepare_cosmo.main + +------------------------------------------- + +.. autofunction:: jobs.prepare_icon.main + +------------------------------------------- + +.. autofunction:: jobs.reduce_output.main + +------------------------------------------- + +.. autofunction:: jobs.verify_chain.main + + +Tools +----- + +The tools are a collection of functions used by the jobs. Most of those +functions are well documented and listed here. For others, one may take +a look into ``jobs/tools`` directly. + +* :func:`jobs.tools.cams4int2cosmo.main` +* :func:`jobs.tools.check_model.check_model` +* :func:`jobs.tools.comp_nc.datasets_equal` +* :func:`jobs.tools.ctnoaa4int2cosmo.main` +* :func:`jobs.tools.mozart2int2lm.main` +* :func:`jobs.tools.reduce_output_start_end.main` +* :func:`jobs.tools.string2char.main` +* :func:`jobs.tools.vprmsplit.main` +* :func:`jobs.tools.write_cosmo_input_ghg.main` +* :func:`jobs.tools.write_int2lm_input_art.main` + + +------------------------------------------- + +.. autofunction:: jobs.tools.cams4int2cosmo.main + +------------------------------------------- + +.. autofunction:: jobs.tools.check_model.check_model + +------------------------------------------- + +.. autofunction:: jobs.tools.comp_nc.datasets_equal + +------------------------------------------- + +.. autofunction:: jobs.tools.ctnoaa4int2cosmo.main + +------------------------------------------- + +.. autofunction:: jobs.tools.mozart2int2lm.main + +------------------------------------------- + +.. autofunction:: jobs.tools.reduce_output_start_end.main + +------------------------------------------- + +.. autofunction:: jobs.tools.string2char.main + +------------------------------------------- + +.. autofunction:: jobs.tools.vprmsplit.main + +------------------------------------------- + +.. autofunction:: jobs.tools.write_cosmo_input_ghg.main + +------------------------------------------- + +.. autofunction:: jobs.tools.write_int2lm_input_art.main diff --git a/docs/howtorun.rst b/docs/howtorun.rst index 65004786..9d074ee7 100644 --- a/docs/howtorun.rst +++ b/docs/howtorun.rst @@ -1,118 +1,245 @@ -.. _script-section: +.. _howtorun-section: How to Run ========== The Python file ``run_chain.py`` in the root directory is the main script of the Processing Chain. -It reads the user's input from the command line and from the ``config.py`` file of the +It reads the user's input from the command line and from the ``config.yaml`` file of the respective case. Then it will start the Processing Chain. Starting the Chain ------------------ -The chain has to be run with the following command: :: +The chain has to be run with the following command: + +.. code-block:: bash + + $ ./run_chain.py + +Here, ```` is the name of a directory in the ``cases/``-directory where +there is a ``config.yaml``-file specifying the configuration, as well as templates +for the necessary namelist files for **int2lm**, **COSMO** or **ICON**. It may also +contain additional runscripts to be submitted via ``sbatch``. + +.. hint:: + Technically, you can run several cases (instead of a single case) in one command, + which is useful for nested runs, for example. This can be achieved by running + ``./run_chain.py ``. With that, the full chain is executed for + ``case1`` first, and afterwards for ``case2``. + +There are several optional arguments available to change the behavior of the chain: + +.. code-block:: bash + + $ ./run_chain.py -h + +* ``-h``, ``--help`` + Show this help message and exit. +* ``-j [JOB_LIST ...]``, ``--jobs [JOB_LIST ...]`` + List of job names to be executed. + A job is a ``.py`` file in i``jobs/`` with a ``main()`` function, which + handles one aspect of the Processing Chain, for + example copying ``meteo`` input data or launching a + job for ``int2lm``. Jobs are executed in the order + in which they are given here. If no jobs are + given, default jobs will be executed as defined + in ``config/models.yaml``. +* ``-f``, ``--force`` + Force the Processing Chain to redo all specified + jobs, even if they have been started already or + were finished previously. WARNING: Only logfiles + get deleted, other effects of a given job + (copied files etc.) are simply overwritten. This + may cause errors or unexpected behavior. +* ``-r``, ``--resume`` + Resume the Processing Chain by restarting the + last unfinished job. WARNING: Only the logfile + gets deleted, other effects of a given job + (copied files etc.) are simply overwritten. This + may cause errors or unexpected behavior. - $ python run_chain.py -j [jobs] - -```` is the name of a directory in the ``cases/``-directory where -there is a ``config.py``-file specifying the configurations, as well as templates -for the necessary namelist files for **int2lm**, **COSMO** or **ICON**. - -If you don't supply a joblist, the default joblist will be executed. - -For **COSMO**, that is ``prepare_data`` ``emissions`` ``biofluxes`` ``int2lm`` -``post_int2lm`` ``cosmo`` ``post_cosmo``, - -For **COSMOART** it is ``prepare_data`` ``emissions`` ``obs_nudging`` -``photo_rate`` ``int2lm`` ``cosmo`` ``post_cosmo``. - -For **ICON** it is ``prepare_data`` ``icon``. - -For **ICONART** it is ``prepare_data`` ``icon``. - -For **ICONARTOEM** it is ``prepare_data`` ``oae`` ``icon``. - -The model type can be chosen by setting the variable ``target`` in the ``config.py``-file. - -To run the **COSMO-GHG** example test case, run:: - - $ python run_chain.py cosmo-ghg-11km-test 2015-01-01 0 24 -j prepare_data emissions biofluxes int2lm post_int2lm cosmo post_cosmo - -To run the **COSMO-ART** example case, run:: - - $ python run_chain.py cosmo-art-mother-test cosmo-art-nested-test 2015-06-26 0 24 -j prepare_data emissions obs_nudging photo_rate int2lm cosmo post_cosmo - -To run the **ICON** or **ICON-ART** example cases, run:: - - $ python run_chain.py icon-test 2018-01-01 0 24 -j prepare_data icon - -or:: - - $ python run_chain.py icon-art-test 2018-01-01 0 24 -j prepare_data icon - -To run the **ICON-OEM** example cases, run:: - - $ python run_chain.py icon-oem-test 2018-01-01 0 24 -j prepare_data oae icon - -or:: - - $ python run_chain.py icon-oem-ensembles-test 2018-01-01 0 24 -j prepare_data oae icon - What it Does ------------ -The script ``run_chain.py`` reads the command-line arguments and the config-file. -It then calls the function :func:`run_chain.restart_runs` which divides the -simuation time according to the specified restart steps. Then it calls -:func:`run_chain.run_chain` for each sub-run. This function sets up the directory -structure of the chain and then starts the specified :ref:`jobs` -sequentially. - -The directory structure generated by the Processing Chain for a **COSMO** run -looks like this::: - - cfg.work_root/ - + output/cfg.output_root/ - \ /cfg.chain_root/ - + cfg.int2lm_base/ - | + cfg.int2lm_input/ - | + cfg.int2lm_work/ - | \ cfg.int2lm_output/ - + cfg.cosmo_base/ - | + cfg.cosmo_work/ - | + cfg.cosmo_output/ - | \ cfg.cosmo_restart_out/ - \ checkpoints/ - + cfg.log_working_dir/ - \ cfg.log_finished_dir/ +The script ``run_chain.py`` reads the command line arguments and the config file +from the specified case. +It then calls the function :func:`run_chain.restart_runs`, which divides the +simulation time according to the specified restart steps. Then it calls +:func:`run_chain.run_chunk` for each part (chunk) of the simulation workflow. +This function sets up the directory structure of the chain and then submits the +specified :ref:`jobs` via ``sbatch`` to the Slurm workload manager, +taking job dependencies into account. + +Test Cases +---------- + +The following test cases are available: + +* ``cosmo-ghg-spinup-test`` +* ``cosmo-ghg-test`` +* ``icon-test`` +* ``icon-art-oem-test`` +* ``icon-art-global-test`` + +To be able to run these test cases, it is necessary to provide the input data, +to setup spack and to compile the models and tools. All this is automized via +the script:: + + $ ./jenkins/scripts/jenkins.sh + +This will run all the individual scripts in ``jenkins/scripts/``, which +can also be launched separately if desired. + +These cases undergo regulary testing to ensure that the Processing Chain runs +correctly. A corresponding Jenkins plan is launched on a weekly basis and +when triggered within a GitHub pull request. + +Directory Structure +------------------- + +The directory structure generated by the Processing Chain for a ``cosmo-ghg`` +run looks like this: + +.. code-block:: bash + + cfg.work_root/cfg.casename/ + └── cfg.chain_root/ + ├── checkpoints/ + │ ├── cfg.log_working_dir/ + │ ├── cfg.log_finished_dir/ + ├── cfg.cosmo_base/ + │ ├── cfg.cosmo_work/ + │ ├── cfg.cosmo_output/ + │ ├── cfg.cosmo_restart_out/ + └── cfg.int2lm_base/ + ├── cfg.int2lm_input/ + ├── cfg.int2lm_work/ + └── cfg.int2lm_output/ + +As one can see, it creates working directories for both the ``int2lm`` preprocessor +and ``cosmo``. Additionally, and this is always the case, the ``checkpoints`` +directory holds all the job logfiles. Whenever a job has successfully finished, +the logfile is copied from the ``working`` to the ``finished`` sub-directory. -Running the ``cosmo-ghg-11km-test``-case therefore produces the following directories::: - - $SCRATCH/processing_chain/ - + output/cosmo-ghg-11km-test - \ cosmo-ghg-11km-test/2015010100_0_24/ - + int2lm/ - | + input/ - | | + emissions/ - | | + extpart/ - | | + prepare_data/ - | | \ vprm/ - | + run/ - | | + int2lm # executable - | | + INPUT - | | \ run.job - | \ output/ - + cosmo/ - | + run/ - | | + cosmo # executable - | | + INPUT_* - | | \ run.job - | + output/ - | \ restart/ - \ checkpoints - + working/ # 1 logfile per started job - \ finished # 1 logfile per finished job - - +Running the ``cosmo-ghg-test`` case therefore produces the following +directories and files (showing four levels of directories deep): + +.. code-block:: bash + + work/cosmo-ghg-test + ├── 2015010100_2015010106/ + │ ├── checkpoints/ + │ │ ├── finished/ + │ │ │ ├── biofluxes + │ │ │ ├── cosmo + │ │ │ ├── emissions + │ │ │ ├── int2lm + │ │ │ ├── oem + │ │ │ ├── online_vprm + │ │ │ ├── post_cosmo + │ │ │ ├── post_int2lm + │ │ │ └── prepare_cosmo + │ │ └── working/ + │ │ ├── biofluxes + │ │ ├── cosmo + │ │ ├── emissions + │ │ ├── int2lm + │ │ ├── oem + │ │ ├── online_vprm + │ │ ├── post_cosmo + │ │ ├── post_int2lm + │ │ └── prepare_cosmo + │ ├── cosmo/ + │ │ ├── input/ + │ │ │ ├── oem/ + │ │ │ └── vprm/ + │ │ ├── output/ + │ │ │ └── lffd*.nc + │ │ ├── restart/ + │ │ │ └── lrff00060000o.nc + │ │ └── run/ + │ │ ├── cosmo-ghg + │ │ ├── INPUT_* + │ │ ├── post_cosmo.job + │ │ ├── run.job + │ │ └── YU* + │ └── int2lm/ + │ ├── input/ + │ │ ├── emissions + │ │ ├── extpar + │ │ ├── icbc + │ │ ├── meteo + │ │ └── vprm + │ ├── output/ + │ │ ├── laf*.nc + │ │ └── lbfd*.nc + │ └── run/ + │ ├── INPUT + │ ├── INPUT_ART + │ ├── int2lm + │ ├── OUTPUT + │ ├── run.job + │ └── YU* + └── 2015010106_2015010112/ + ├── checkpoints/ + │ ├── finished/ + │ │ ├── biofluxes + │ │ ├── cosmo + │ │ ├── emissions + │ │ ├── int2lm + │ │ ├── oem + │ │ ├── online_vprm + │ │ ├── post_cosmo + │ │ ├── post_int2lm + │ │ └── prepare_cosmo + │ └── working/ + │ ├── biofluxes + │ ├── cosmo + │ ├── emissions + │ ├── int2lm + │ ├── oem + │ ├── online_vprm + │ ├── post_cosmo + │ ├── post_int2lm + │ └── prepare_cosmo + ├── cosmo/ + │ ├── input/ + │ │ ├── oem + │ │ └── vprm + │ ├── output/ + │ │ └── lffd*.nc + │ ├── restart/ + │ │ └── lrff00060000o.nc + │ └── run/ + │ ├── cosmo-ghg + │ ├── INPUT_* + │ ├── post_cosmo.job + │ ├── run.job + │ └── YU* + └── int2lm/ + ├── input/ + │ ├── emissions + │ ├── extpar + │ ├── icbc + │ ├── meteo + │ └── vprm + ├── output/ + │ ├── laf*.nc + │ └── lbfd*.nc + └── run/ + ├── INPUT + ├── INPUT_ART + ├── int2lm + ├── OUTPUT + ├── run.job + └── YU* + +------------------------------------------- + +.. autofunction:: run_chain.run_chunk + +------------------------------------------- + +.. autofunction:: run_chain.restart_runs diff --git a/docs/index.rst b/docs/index.rst index 8e691530..169bf730 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,25 +3,48 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. +Processing Chain +================ + +The Processing Chain is a Python-based workflow tool designed to streamline +weather and climate simulations. +It facilitates the preparation of essential input data, submission of compute +jobs to the queue on CSCS HPC systems, and the implementation of post-processing +steps. +In addition to supporting standard versions of the COSMO and ICON models, +it is equipped to handle various model variants, notably COSMO-GHG +(Greenhouse Gas Extension) and ICON-ART (Aerosols and Reactive Trace Gases) + +The Processing Chain can be easily customized to meet your specific requirements. +This includes defining custom workflows, creating your own simulation cases, +and integrating new jobs and auxiliary scripts. + .. toctree:: :maxdepth: 2 :caption: Getting Started - file-structure + features + environment howtorun .. toctree:: - :maxdepth: 3 + :maxdepth: 2 :caption: Configuration + code-structure config namelists .. toctree:: - :maxdepth: 3 - :caption: Jobs + :maxdepth: 2 + :caption: Jobs & Workflows jobs - tools + +.. toctree:: + :maxdepth: 2 + :caption: API + + functions diff --git a/docs/jobs.rst b/docs/jobs.rst index 90358b7c..96cae42b 100644 --- a/docs/jobs.rst +++ b/docs/jobs.rst @@ -3,54 +3,70 @@ Overview -------- -The jobs described here are available for use in the processing chain. -For every target, you can choose from a list of available jobs. -As some jobs depend on the result of others, the order indicated here -has to be respected. - -**COSMO**: - -1. :func:`jobs.biofluxes.main` | :func:`jobs.prepare_data.main` | - :func:`jobs.emissions.main` | :func:`jobs.online_vprm.main` | - :func:`jobs.oae.main` | :func:`jobs.obs_nudging.main` -2. :func:`jobs.int2lm.main` -3. :func:`jobs.post_int2lm.main` -4. :func:`jobs.octe.main` -5. :func:`jobs.cosmo.main` -6. :func:`jobs.check_output.main` -7. :func:`jobs.reduce_output.main` -8. :func:`jobs.post_cosmo.main` | :func:`jobs.verify_chain.main` - -**COSMOART**: - -1. :func:`jobs.emissions.main` | :func:`jobs.prepare_data.main` | - :func:`jobs.photo_rate.main` | :func:`jobs.obs_nudging.main` | - :func:`jobs.online_vprm.main` | :func:`jobs.oae.main` -2. :func:`jobs.int2lm.main` -3. :func:`jobs.cosmo.main` -4. :func:`jobs.check_output.main` -5. :func:`jobs.reduce_output.main` -6. :func:`jobs.post_cosmo.main` | :func:`jobs.verify_chain.main` - -**ICON**: - -1. :func:`jobs.prepare_data.main` -2. :func:`jobs.icon.main` - -**ICONART**: - -1. :func:`jobs.prepare_data.main` -2. :func:`jobs.icon.main` - - -**ICONARTOEM**: - -1. :func:`jobs.prepare_data.main` -2. :func:`jobs.oae.main` -3. :func:`jobs.icon.main` - - -Adding new jobs +Jobs have to be part of the respective workflow. They are submitted via ``sbatch`` +to the Slurm workload manager. + +The order of job submission is based on the list given in ``workflows.yaml`` +(or in ``config.yaml`` in case a custom, user-defined workflow is used.) + +Let's have a look at the ``icon-art`` example: + +.. code-block:: yaml + + icon-art: + features: + - restart + jobs: + - prepare_icon + - icontools + - prepare_art + - icon + +This workflow consists of four jobs: ``prepare_icon``, ``icontools``, +``prepare_art`` and ``icon``. + +These jobs will be submitted, however, they are not starting at the same time, +because some of them depend on others: + +.. code-block:: yaml + + dependencies: + icontools: + current: + - prepare_icon + prepare_art: + current: + - icontools + icon: + current: + - prepare_icon + - icontools + - prepare_art + previous: + - icon + +Since ``icontools`` depends on ``prepare_icon``, and ``prepare_art`` depends +on ``icontools``, the order of execution is ``prepare_icon`` --> ``icontools`` +--> ``prepare_art``. Note that if we had another job in there without dependencies, +it would run in parallel to the others. + +Since ``icon`` depends on all other jobs, it will be executed last. Note that +these dependencies are all listed under the ``current`` keyword, targeting +the current chunk. For ``icon``, there is an additional ``previous`` keyword. +This means that an ``icon`` simulation will always wait until the simulation +from the last chunk is finished (because the restart file has to be available). + +Another effect of this workflow definition is that the ``prepare_icon``, +``icontools`` and ``prepare_art`` jobs will also be launched for the next chunk, +as they are not dependent on their previous ones. + +.. figure:: _static/processing_chain_workflow_icon_art.png + :alt: Flowchart for the ``icon-art`` workflow. + + Flowchart for the ``icon-art`` workflow. + + +Adding New Jobs --------------- Adding a new job to the chain is simple: @@ -59,26 +75,5 @@ Adding a new job to the chain is simple: a function called ``main`` which takes the same arguments as every other job. Make sure the function is documented with a docstring. 2. Import it in ``jobs/__init__.py`` to make it accessible to ``run_chain.py``. -3. Add the function to the documentation. You find the file describing this page - at ``doc/source/jobs.rst``. - -List of available jobs ----------------------- - -* :func:`jobs.biofluxes.main` -* :func:`jobs.check_output.main` -* :func:`jobs.cosmo.main` -* :func:`jobs.emissions.main` -* :func:`jobs.icon.main` -* :func:`jobs.int2lm.main` -* :func:`jobs.prepare_data.main` -* :func:`jobs.oae.main` -* :func:`jobs.obs_nudging.main` -* :func:`jobs.octe.main` -* :func:`jobs.online_vprm.main` -* :func:`jobs.photo_rate.main` -* :func:`jobs.post_cosmo.main` -* :func:`jobs.post_int2lm.main` -* :func:`jobs.reduce_output.main` -* :func:`jobs.verify_chain.main` +3. Add the job to your workflow. diff --git a/docs/namelists.rst b/docs/namelists.rst index d76cfa40..20da47ca 100644 --- a/docs/namelists.rst +++ b/docs/namelists.rst @@ -1,25 +1,34 @@ -.. _namelists: +.. _namelists-section: -Namelist Templates ------------------- +Namelist and Runscript Templates +-------------------------------- -Namelists for **int2lm** and **COSMO** are generated using templates which are also located in -the cases-directory. These templates are essentially textfiles containing "normal" namelist -parameters and python-variables in curly braces. +The namelists and run jobs for **int2lm** and **COSMO**, as well as for **icontools** and **ICON** are dynamically generated +using templates located in the ``cases/`` directory. These templates are essentially +text files containing "normal" namelist parameters alongside Python variables enclosed in curly braces. -These files get read by their respective job. -The resulting string is formatted using python's ``.format()``-function which replaces the -python-variables with their value. The formatted strings are then saved as namelist-files in the -run-directory of their respective jobs and then read by the executable. :: +During runtime, these template files are read by their respective jobs. +The resulting strings are formatted through Python's ``.format()`` function, facilitating the +substitution of Python variables with their corresponding value. +Subsequently, the formatted strings are then saved as the actual namelist and run scripts in the +run directory of their respective jobs. - cases/example/example_namelist.cfg -> [read file] -> - "exvar = '{cfg.prefix}{cfg.suffix}'" -> ["".format(cfg)] -> - "exvar = 'pref_suff.nc'" -> [write to disk] -> - int2lm/run/example_namelist +.. code-block:: -The same procedure is done for the slurm-runscripts for **int2lm** and **COSMO**. + cases/example/example_namelist.cfg -> [read file] -> + "namelist_var = '{cfg.prefix}{cfg.suffix}'" -> ["".format(cfg)] -> + "namelist_var = 'pref_suff.nc'" -> [write to disk] -> + int2lm/run/example_namelist A special case is ``INPUT_ART`` for **int2lm** and ``INPUT_GHG`` for **COSMO** . These namelists are generated by :func:`jobs.tools.write_int2lm_input_art.main` and :func:`jobs.tools.write_cosmo_input_ghg.main` from ``.csv``-files containing all necessary information. +---------------------------------------------------- + +.. autofunction:: jobs.tools.write_int2lm_input_art.main + +---------------------------------------------------- + +.. autofunction:: jobs.tools.write_cosmo_input_ghg.main + diff --git a/docs/requirements.txt b/docs/requirements.txt index 1d8a9189..d2894daf 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,14 @@ -psphinxTheme -karma-sphinx-theme \ No newline at end of file +cdo +nco +netcdf4 +numpy +f90nml +cartopy +matplotlib +scipy +pillow +xarray +cdsapi +sphinx +sphinx_rtd_theme +sphinx-copybutton diff --git a/docs/tools.rst b/docs/tools.rst deleted file mode 100644 index 9aebd600..00000000 --- a/docs/tools.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. _tools-section: - -Tools -===== - -The tools are a collection of functions used by the jobs. Most of those -functions are well documented and listed here. For others, one may take -a look into ``jobs/tools`` directly. - -Conversion Functions --------------------- - -These functions are used by the job :func:`jobs.prepare_data.main`. They convert data into -a format usable by **int2lm** diff --git a/env/environment.yml b/env/environment.yml index 06795923..09381fd4 100644 --- a/env/environment.yml +++ b/env/environment.yml @@ -4,7 +4,6 @@ channels: - defaults dependencies: - python=3.9 - - sphinx_rtd_theme - cdo - nco - netcdf4 @@ -13,8 +12,10 @@ dependencies: - cartopy - matplotlib - scipy - - sphinx - pillow - xarray - - dask - cdsapi + - scikit-learn + - sphinx + - sphinx_rtd_theme + - sphinx-copybutton diff --git a/src/.gitkeep b/ext/.gitkeep similarity index 100% rename from src/.gitkeep rename to ext/.gitkeep diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index 21c95360..07a8219b 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -117,7 +117,7 @@ pipeline { steps { sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh conda activate proc-chain - . ${WORKSPACE}/src/spack-c2sm/setup-env.sh + . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh ./jenkins/scripts/test_cosmo-ghg.sh''' } post { @@ -134,7 +134,7 @@ pipeline { steps { sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh conda activate proc-chain - . ${WORKSPACE}/src/spack-c2sm/setup-env.sh + . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh ./jenkins/scripts/test_cosmo-ghg-spinup.sh''' } post { @@ -151,7 +151,7 @@ pipeline { steps { sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh conda activate proc-chain - . ${WORKSPACE}/src/spack-c2sm/setup-env.sh + . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh ./jenkins/scripts/test_icon.sh''' } post { @@ -168,7 +168,7 @@ pipeline { steps { sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh conda activate proc-chain - . ${WORKSPACE}/src/spack-c2sm/setup-env.sh + . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh ./jenkins/scripts/test_icon-art-oem.sh''' } post { @@ -185,7 +185,7 @@ pipeline { steps { sh '''source ${WORKSPACE}/miniconda/etc/profile.d/conda.sh conda activate proc-chain - . ${WORKSPACE}/src/spack-c2sm/setup-env.sh + . ${WORKSPACE}/ext/spack-c2sm/setup-env.sh ./jenkins/scripts/test_icon-art-global.sh''' } post { diff --git a/jenkins/scripts/build_cosmo-ghg.sh b/jenkins/scripts/build_cosmo-ghg.sh index 027acc5a..2167fec7 100755 --- a/jenkins/scripts/build_cosmo-ghg.sh +++ b/jenkins/scripts/build_cosmo-ghg.sh @@ -13,7 +13,7 @@ function error { BRANCH=c2sm GIT_REMOTE=git@github.com:C2SM-RCM/cosmo-ghg.git -pushd src +pushd ext # Activate spack . spack-c2sm/setup-env.sh diff --git a/jenkins/scripts/build_icon-art.sh b/jenkins/scripts/build_icon-art.sh index 36cf15cb..8ba5225e 100755 --- a/jenkins/scripts/build_icon-art.sh +++ b/jenkins/scripts/build_icon-art.sh @@ -13,7 +13,7 @@ function error { BRANCH=art GIT_REMOTE=git@github.com:C2SM/icon.git -pushd src +pushd ext # Activate spack . spack-c2sm/setup-env.sh diff --git a/jenkins/scripts/build_icon.sh b/jenkins/scripts/build_icon.sh index bff225c9..15e1d82a 100755 --- a/jenkins/scripts/build_icon.sh +++ b/jenkins/scripts/build_icon.sh @@ -13,7 +13,7 @@ function error { BRANCH=main GIT_REMOTE=git@github.com:C2SM/icon.git -pushd src +pushd ext # Activate spack . spack-c2sm/setup-env.sh diff --git a/jenkins/scripts/build_int2lm.sh b/jenkins/scripts/build_int2lm.sh index 2af403a3..1ecca336 100755 --- a/jenkins/scripts/build_int2lm.sh +++ b/jenkins/scripts/build_int2lm.sh @@ -13,7 +13,7 @@ function error { BRANCH=c2sm-features GIT_REMOTE=git@github.com:C2SM-RCM/int2lm.git -pushd src +pushd ext # Activate spack . spack-c2sm/setup-env.sh diff --git a/jenkins/scripts/jenkins.sh b/jenkins/scripts/jenkins.sh index d6298e19..8eb4d26e 100755 --- a/jenkins/scripts/jenkins.sh +++ b/jenkins/scripts/jenkins.sh @@ -23,9 +23,9 @@ eval "$(conda shell.bash hook)" conda activate proc-chain # Setup spack -if [[ -d src/spack-c2sm ]]; then +if [[ -d ext/spack-c2sm ]]; then echo spack folder already exists - activating spack... - . src/spack-c2sm/setup-env.sh + . ext/spack-c2sm/setup-env.sh else echo building spack... ./jenkins/scripts/setup-spack.sh @@ -41,7 +41,7 @@ else fi # Build int2lm -if [[ -f src/int2lm/test/testsuite/int2lm ]]; then +if [[ -f ext/int2lm/test/testsuite/int2lm ]]; then echo int2lm executable already exists - skipping build... else echo building int2lm... @@ -49,7 +49,7 @@ else fi # Build COSMO-GHG -if [[ -f src/cosmo-ghg/cosmo/ACC/cosmo_gpu ]]; then +if [[ -f ext/cosmo-ghg/cosmo/ACC/cosmo_gpu ]]; then echo cosmo executable already exists - skipping build. else echo building cosmo... @@ -57,7 +57,7 @@ else fi # Build ICON -if [[ -f src/icon/bin/icon ]]; then +if [[ -f ext/icon/bin/icon ]]; then echo icon executable already exists - skipping build. else echo building icon... @@ -65,7 +65,7 @@ else fi # Build ICON-ART -if [[ -f src/icon-art/bin/icon ]]; then +if [[ -f ext/icon-art/bin/icon ]]; then echo icon-art executable already exists - skipping build. else echo building icon-art... @@ -73,7 +73,7 @@ else fi # Test COSMO-GHG -if [[ -f work/cosmo-ghg-test/2015010100_6_12/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then +if [[ -f work/cosmo-ghg-test/2015010106_2015010112/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then echo cosmo-ghg test case already finished - skipping test. else echo running cosmo-ghg test case... @@ -81,7 +81,7 @@ else fi # Test COSMO-GHG (spinup) -if [[ -f work/cosmo-ghg-spinup-test/2015010106_-3_6/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then +if [[ -f work/cosmo-ghg-spinup-test/2015010109_2015010118/checkpoints/finished/post_cosmo && "$force_execution" == false ]]; then echo cosmo-ghg test case already finished - skipping test. else echo running cosmo-ghg-spinup test case... @@ -89,7 +89,7 @@ else fi # Test ICON -if [[ -f work/icon-test/2018010100_6_12/checkpoints/finished/icon && "$force_execution" == false ]]; then +if [[ -f work/icon-test/2018010106_2018010112/checkpoints/finished/icon && "$force_execution" == false ]]; then echo icon test case already finished - skipping test. else echo running icon test case... @@ -97,7 +97,7 @@ else fi # Test ICON-ART -if [[ -f work/icon-art-oem-test/2018010100_0_24/checkpoints/finished/icon && "$force_execution" == false ]]; then +if [[ -f work/icon-art-oem-test/2018010106_2018010112/checkpoints/finished/icon && "$force_execution" == false ]]; then echo icon-art test case already finished - skipping test. else echo running icon-art-oem test case... @@ -105,7 +105,7 @@ else fi # Test ICON-ART-GLOBAL -if [[ -f work/icon-art-global-test/2018010100_0_24/checkpoints/finished/icon && "$force_execution" == false ]]; then +if [[ -f work/icon-art-global-test/2018010106_2018010112/checkpoints/finished/icon && "$force_execution" == false ]]; then echo icon-art-global test case already finished - skipping test. else echo running icon-art-global test case... diff --git a/jenkins/scripts/setup-spack.sh b/jenkins/scripts/setup-spack.sh index 93945c0b..13988c5f 100755 --- a/jenkins/scripts/setup-spack.sh +++ b/jenkins/scripts/setup-spack.sh @@ -13,9 +13,9 @@ function error { TAG=v0.18.1.12 GIT_REMOTE=https://github.com/C2SM/spack-c2sm.git -rm -fr src/spack-c2sm +rm -fr ext/spack-c2sm -pushd src +pushd ext git clone --depth 1 --recurse-submodules --shallow-submodules -b ${TAG} ${GIT_REMOTE} . spack-c2sm/setup-env.sh popd diff --git a/jobs/__init__.py b/jobs/__init__.py index 08cc0cdb..332f34a8 100644 --- a/jobs/__init__.py +++ b/jobs/__init__.py @@ -1,20 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# -from . import prepare_data -from . import emissions -from . import oem from . import biofluxes -from . import int2lm -from . import post_int2lm +from . import check_output from . import cosmo +from . import emissions +from . import icon +from . import icontools +from . import int2lm +from . import obs_nudging +from . import octe +from . import oem +from . import online_vprm +from . import photo_rate from . import post_cosmo +from . import post_int2lm +from . import prepare_art +from . import prepare_art_oem +from . import prepare_art_global +from . import prepare_cosmo +from . import prepare_icon from . import reduce_output from . import verify_chain -from . import photo_rate -from . import obs_nudging -from . import online_vprm -from . import octe -from . import check_output -from . import icon diff --git a/jobs/biofluxes.py b/jobs/biofluxes.py index 78471f09..cc9a738e 100644 --- a/jobs/biofluxes.py +++ b/jobs/biofluxes.py @@ -1,39 +1,27 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# -# Create VPRM biogenic fluxes from VPRM and put them into the input folder -# -# result in case of success: all VPRM input-files necessary are found in -# ${int2lm_input}/vprm/ -# -# Dominik Brunner, July 2013 -# -# 2013-07-18 Initial release, based on Christoph Knotes' emissions.bash -# In the current version, the program only checks for the presence -# of the constant-in-time emissions file and creates a soft link in the int2lm -# input directory (brd) -# 2018-06-25 Translated to Python (arp) import os import logging -from . import tools, prepare_data +from . import tools, prepare_cosmo -def main(cfg, model_cfg): - """Prepare the biofluxes-files for the simulation. +BASIC_PYTHON_JOB = True - Only necessary for **COSMO** simulations. - Copy biofluxes files from project folder (``cfg.vprm['dir']``) to int2lm input - folder on scratch (``cfg.int2lm_input/vprm``). +def main(cfg): + """Prepare biofluxes files for COSMO simulations. + + Copies biofluxes files from the project folder (:attr:`cfg.vprm['dir']`) + to the int2lm input folder on scratch (:attr:`cfg.int2lm_input`/vprm). Parameters - ---------- - cfg : config-object - Object holding all user-configuration parameters as attributes + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. """ - tools.check_model(cfg, 'cosmo-ghg') - cfg = prepare_data.set_cfg_variables(cfg, model_cfg) + tools.change_logfile(cfg.logfile) + prepare_cosmo.set_cfg_variables(cfg) scratch_path = os.path.join(cfg.int2lm_input, 'vprm') diff --git a/jobs/check_output.py b/jobs/check_output.py index 133e969c..5ab99614 100644 --- a/jobs/check_output.py +++ b/jobs/check_output.py @@ -28,6 +28,8 @@ except ImportError: import tools +BASIC_PYTHON_JOB = True + def pkl_path(folder, pid=None): """ Returns the path (and creates it, if necessary) to the stored @@ -69,7 +71,7 @@ def timeseries_path(cfg): Parameters ---------- - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes Returns @@ -89,7 +91,7 @@ def maps_path(cfg): Parameters ---------- - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes Returns @@ -108,7 +110,7 @@ def animations_path(cfg): Parameters ---------- - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes Returns @@ -211,7 +213,7 @@ def plot_timeseries(cfg, units): Parameters ---------- - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes units : dict Dictionary containing units os variables @@ -385,7 +387,7 @@ def merge_data(cfg): Parameters ---------- - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes Returns @@ -647,7 +649,7 @@ def create_map_directories(cfg, data, units): Parameters ---------- - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes data: pandas.DataFrame Dataframe containing diagnostic values for each variable @@ -671,7 +673,7 @@ def create_animations(cfg): Parameters ---------- - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes """ data_path = pkl_path(cfg.output_root) @@ -699,20 +701,21 @@ def create_animations(cfg): duration=300) -def main(cfg, model_cfg): - """Checks output variables whether they are in a phyiscally reasonable - range. +def main(cfg): + """Check output variables for physical reasonability and create plots. - Stores the time series of the minimum, the maximum, the mean, and - the std of the variables as a pandas object into a pickle file. + This function checks the output variables to ensure they are in a physically + reasonable range. It stores the time series of the minimum, maximum, mean, and + standard deviation of the variables as a pandas object into a pickle file. - Creates per-variable plots from the stored time series data. + It also creates per-variable plots from the stored time series data. Parameters - ---------- - cfg : config-object - Object holding all user-configuration parameters as attributes + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. """ + tools.change_logfile(cfg.logfile) date = dt.datetime.today() to_print = """check_output @@ -722,8 +725,6 @@ def main(cfg, model_cfg): ============== StartTime: %s =====================================================""" % date.strftime("%s") - logfile = os.path.join(cfg.log_working_dir, "check_output") - logging.basicConfig(filename=logfile, level=logging.INFO) logging.info(to_print) # if cfg.compute_host!="daint": @@ -748,7 +749,7 @@ def main(cfg, model_cfg): #SBATCH --time=00:30:00 #SBATCH --constraint=mc #SBATCH --ntasks=1 -#SBATCH --output={logfile} +#SBATCH --output={cfg.logfile} export EASYBUILD_PREFIX=/store/empa/em05/easybuild @@ -778,7 +779,7 @@ def main(cfg, model_cfg): cosmo_output=cfg.cosmo_output, output_root=cfg.output_root, work_log=cfg.log_working_dir, - logfile=logfile, + logfile=cfg.logfile, chain=cfg.chain_src_dir, chain_root=cfg.chain_root, action='get_data') @@ -819,7 +820,7 @@ def main(cfg, model_cfg): casename=cfg.casename, cosmo_output=cfg.cosmo_output, output_root=cfg.output_root, - logfile=logfile, + logfile=cfg.logfile, chain=cfg.chain_src_dir, chain_root=cfg.chain_root, action='plot_maps') @@ -859,9 +860,10 @@ def main(cfg, model_cfg): logging.info(to_print) # Check for errors - with open(logfile) as f: + with open(cfg.logfile) as f: if 'ERROR' in f.read(): - raise RuntimeError('Logfile containing errors! See %s' % logfile) + raise RuntimeError('Logfile containing errors! See %s' % + cfg.logfile) if __name__ == '__main__': diff --git a/jobs/cosmo.py b/jobs/cosmo.py index 4c43e815..a110661e 100644 --- a/jobs/cosmo.py +++ b/jobs/cosmo.py @@ -1,101 +1,41 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# -# Setup the namelist for a COSMO tracer run and submit the job to the queue -# -# result in case of success: forecast fields found in -# ${cosmo_output} -# -# Dominik Brunner, July 2013 -# -# 2013-07-21 Initial release, adopted from Christoph Knote's cosmo.bash (brd) -# 2018-07-10 Translated to Python (muq) import logging import os import subprocess -import csv -from .tools import write_cosmo_input_ghg -from . import tools -from datetime import datetime, timedelta - - -def set_cfg_variables(cfg, model_cfg): - setattr(cfg, 'cosmo_base', os.path.join(cfg.chain_root, 'cosmo')) - setattr(cfg, 'cosmo_input', os.path.join(cfg.chain_root, 'cosmo', 'input')) - setattr(cfg, 'cosmo_run', os.path.join(cfg.chain_root, 'cosmo', 'run')) - setattr(cfg, 'cosmo_output', os.path.join(cfg.chain_root, 'cosmo', - 'output')) - setattr(cfg, 'cosmo_output_reduced', - os.path.join(cfg.chain_root, 'cosmo', 'output_reduced')) - - # Number of tracers - if 'tracers' in model_cfg['models'][cfg.model]['features']: - tracer_csvfile = os.path.join(cfg.chain_src_dir, 'cases', cfg.casename, - 'cosmo_tracers.csv') - if os.path.isfile(tracer_csvfile): - with open(tracer_csvfile, 'r') as csv_file: - reader = csv.DictReader(csv_file, delimiter=',') - reader = [r for r in reader if r[''] != '#'] - setattr(cfg, 'in_tracers', len(reader)) - else: - raise FileNotFoundError(f"File not found: {tracer_csvfile}") - - # tracer_start namelist paramter for spinup simulation - if hasattr(cfg, 'spinup'): - if cfg.first_one: - setattr(cfg, 'tracer_start', 0) - else: - setattr(cfg, 'tracer_start', cfg.spinup) - else: - setattr(cfg, 'tracer_start', 0) - - # asynchronous I/O - if hasattr(cfg, 'cfg.cosmo_np_io'): - if cfg.cosmo_np_io == 0: - setattr(cfg, 'lasync_io', '.FALSE.') - setattr(cfg, 'num_iope_percomm', 0) - else: - setattr(cfg, 'lasync_io', '.TRUE.') - setattr(cfg, 'num_iope_percomm', 1) - - return cfg +from pathlib import Path +from datetime import datetime +from .tools import write_cosmo_input_ghg +from . import tools, prepare_cosmo -def main(cfg, model_cfg): - """Setup the namelists for a **COSMO** tracer run and submit the job to - the queue +BASIC_PYTHON_JOB = True - Necessary for both **COSMO** and **COSMOART** simulations. - Decide if the soil model should be TERRA or TERRA multi-layer depending on - ``startdate`` of the simulation. +def main(cfg): + """Setup the namelists for a COSMO run and submit the job to the queue. - Create necessary directory structure to run **COSMO** (run, output and - restart directories, defined in ``cfg.cosmo_run``, ``cfg.cosmo_output`` + Create necessary directory structure to run COSMO (run, output, and + restart directories, defined in ``cfg.cosmo_run``, ``cfg.cosmo_output``, and ``cfg.cosmo_restart_out``). - Copy the **COSMO**-executable from - ``cfg.cosmo_bin`` to ``cfg.cosmo_run/cosmo``. + Copy the COSMO-executable from + ``cfg.cosmo['binary_file']`` to ``cfg.cosmo_run/cfg.cosmo['execname']``. - Convert the tracer-csv-file to a **COSMO**-namelist file. + Convert the tracer csv file to a COSMO namelist file. - Format the **COSMO**-namelist-templates - (**COSMO**: ``AF,ORG,IO,DYN,PHY,DIA,ASS``, - **COSMOART**: ``ART,ASS,DIA,DYN,EPS,INI,IO,ORG,PHY``) - using the information in ``cfg``. - - Format the runscript-template and submit the job. + Format the COSMO namelist templates using the information in ``cfg``. + Format the runscript template and submit the job. Parameters ---------- - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. """ - cfg = set_cfg_variables(cfg, model_cfg) - logfile = os.path.join(cfg.log_working_dir, "cosmo") - logfile_finish = os.path.join(cfg.log_finished_dir, "cosmo") + tools.change_logfile(cfg.logfile) + prepare_cosmo.set_cfg_variables(cfg) logging.info("Setup the namelist for a COSMO tracer run and " "submit the job to the queue") @@ -116,11 +56,10 @@ def main(cfg, model_cfg): tools.create_dir(ini_dir, "cosmo_input_initial") startfiletime = datetime.strptime(cfg.laf_startfile[-10:], "%Y%m%d%H") if cfg.startdate_sim >= startfiletime: - starttime_last = cfg.startdate_sim - timedelta( - hours=cfg.restart_step) work_root = os.path.dirname(os.path.dirname(cfg.chain_root)) last_output_path = os.path.join(work_root, cfg.casename, - cfg.job_id_prev, 'cosmo', 'output') + cfg.chunk_id_prev, 'cosmo', + 'output') laf_output_refdate = cfg.startdate_sim.strftime("%Y%m%d%H") last_laf_filename = "laf" + laf_output_refdate # At the beginning, use original laf_startfile @@ -158,7 +97,7 @@ def main(cfg, model_cfg): laf_output_refdate=laf_output_refdate, )) # Execute fieldextra - with open(logfile, "a+") as log: + with open(cfg.logfile, "a+") as log: result = subprocess.run( [cfg.fieldextra_bin, output_file_merge], stdout=log) @@ -176,23 +115,21 @@ def main(cfg, model_cfg): # Create restart directory if feature is present and # if there is no spinup - if 'restart' in model_cfg['models'][cfg.model]['features'] and not \ + if 'restart' in cfg.workflow['features'] and not \ hasattr(cfg, 'spinup'): tools.create_dir(cfg.cosmo_restart_out, "cosmo_restart_out") # Copy cosmo executable - cfg.cosmo['execname'] = cfg.model.lower() + cfg.cosmo_execname = Path(cfg.cosmo['binary_file']).name tools.copy_file(cfg.cosmo['binary_file'], - os.path.join(cfg.cosmo_run, cfg.cosmo['execname'])) + cfg.cosmo_run / cfg.cosmo_execname) # Prepare namelist and submit job tracer_csvfile = os.path.join(cfg.chain_src_dir, 'cases', cfg.casename, 'cosmo_tracers.csv') - if cfg.model == 'cosmo': - namelist_names = ['ORG', 'IO', 'DYN', 'PHY', 'DIA', 'ASS', 'SAT'] - elif cfg.model == 'cosmo-ghg': + if hasattr(cfg, 'cams') or hasattr(cfg, 'mozart'): namelist_names = ['AF', 'ORG', 'IO', 'DYN', 'GHG', 'PHY', 'DIA', 'ASS'] - elif cfg.model == 'cosmo-art': + elif hasattr(cfg, 'photo_rate'): namelist_names = [ 'ART', 'ASS', 'DIA', 'DYN', 'EPS', 'INI', 'IO', 'ORG', 'PHY' ] @@ -200,6 +137,8 @@ def main(cfg, model_cfg): # When doing online emissions in COSMO-ART, an additional # namelist is required namelist_names += ['OAE'] + elif hasattr(cfg, 'cosmo'): + namelist_names = ['ORG', 'IO', 'DYN', 'PHY', 'DIA', 'ASS', 'SAT'] for section in namelist_names: namelist_file = os.path.join( @@ -231,7 +170,7 @@ def main(cfg, model_cfg): # Append INPUT_GHG namelist with tracer definitions from csv file if os.path.isfile(tracer_csvfile): - if cfg.model == 'cosmo-ghg': + if hasattr(cfg, 'cams') or hasattr(cfg, 'mozart'): input_ghg_filename = os.path.join(cfg.cosmo_run, 'INPUT_GHG') write_cosmo_input_ghg.main(tracer_csvfile, input_ghg_filename, cfg) @@ -242,18 +181,15 @@ def main(cfg, model_cfg): with open(runscript_file) as input_file: cosmo_runscript = input_file.read() - output_file = os.path.join(cfg.cosmo_run, "run.job") - with open(output_file, "w") as outf: + Path(cfg.cosmo_run).mkdir(parents=True, exist_ok=True) + script = (cfg.cosmo_run / 'run_cosmo.job') + with open(script, "w") as outf: outf.write( cosmo_runscript.format(cfg=cfg, **cfg.cosmo, np_tot=np_tot, - logfile=logfile, - logfile_finish=logfile_finish)) + logfile=cfg.logfile, + logfile_finish=cfg.logfile_finish)) - result = subprocess.run( - ["sbatch", "--wait", - os.path.join(cfg.cosmo_run, 'run.job')]) - exitcode = result.returncode - if exitcode != 0: - raise RuntimeError("sbatch returned exitcode {}".format(exitcode)) + # Submit job + cfg.submit('cosmo', script) diff --git a/jobs/emissions.py b/jobs/emissions.py index cf98c92f..27516da3 100644 --- a/jobs/emissions.py +++ b/jobs/emissions.py @@ -1,35 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# -# Create anthropogenic emissions and put them into the input folder -# -# Result in case of success: all emission input-files necessary are found in -# ${int2lm_input}/emissions/ -# -# Dominik Brunner, July 2013 -# -# 2013-07-18 Initial release, based on Christoph Knotes' emissions.bash -# In the current version, the program only checks for the presence -# of the constant-in-time emissions file and creates a soft link in -# the int2lm input directory (brd) -# 2018-06-25 Translated to Python (arp) import os import logging -from . import tools, prepare_data +from . import tools, prepare_cosmo +BASIC_PYTHON_JOB = True -def main(cfg, model_cfg): - """Copy emission files to the **int2lm** input directory. - Necessary for both **COSMO** and **COSMOART** simulations. +def main(cfg): + """Copy emission files to the int2lm input directory. + + Necessary for both COSMO and COSMOART simulations. Copy emission files from project folder (``cfg.emissions['dir']``) to - **int2lm** input folder on scratch (``cfg.int2lm_input/emissions``). + int2lm input folder on scratch (``cfg.int2lm_input/emissions``). - For **COSMO** simulations, converts the the netCDF-variable-names - from ``string`` to ``char`` (necessary for **int2lm**). + For COSMO simulations, converts the netCDF-variable-names + from ``string`` to ``char`` (necessary for int2lm). If there are multiple emission-datasets (cfg.emissions['dir'] is a list of paths), they are copied as follows:: @@ -40,16 +29,11 @@ def main(cfg, model_cfg): Parameters ---------- - starttime : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the starttime - hstop : int - Length of simulation (in hours) - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. """ - cfg = prepare_data.set_cfg_variables(cfg, model_cfg) + tools.change_logfile(cfg.logfile) + prepare_cosmo.set_cfg_variables(cfg) dest_prefix = "emis_" if not isinstance(cfg.emissions['dir'], list): @@ -84,5 +68,5 @@ def main(cfg, model_cfg): # convert grid_mapping_name from string (NF90_STRING) to char # (NF90_CHAR) (needed for int2lm to work) - if cfg.model.startswith('cosmo'): + if hasattr(cfg, 'cosmo'): tools.string2char.main(dest_path) diff --git a/jobs/icon.py b/jobs/icon.py index a63c835b..250df513 100644 --- a/jobs/icon.py +++ b/jobs/icon.py @@ -1,66 +1,41 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# -# Setup the namelist for an ICON run and submit the job to the queue -# -# result in case of success: forecast fields found in -# ${icon_output} -# -# Michael Jähn, February 2021 -# -# 2021-04-26 Initial release -# 2021-11-21 Updated for ICON-ART import logging -import os -import subprocess -from . import tools, prepare_data +from pathlib import Path +from . import tools, prepare_icon +BASIC_PYTHON_JOB = False -def main(cfg, model_cfg): - """Setup the namelists for an **ICON** tracer run and submit the job to - the queue - Necessary for both **ICON** and **ICONART** simulations. +def main(cfg): + """Setup the namelists for an ICON run and submit the job to + the queue. - Create necessary directory structure to run **ICON** (run, output and - restart directories, defined in ``cfg.icon_work``, ``cfg.icon_output`` - and ``cfg.icon_restart_out``). - - Copy the **ICON**-executable from + Copy the ICON-executable from ``cfg.icon_binary_file`` to ``cfg.icon_work/icon.exe``. - Use the tracer-csv-file to append **ICON**-namelist file. - - Format the **ICON**-namelist-templates: + Format the ICON-namelist-templates: ``icon_master.namelist.cfg, icon_NAMELIST_NWP.cfg``, using the information in ``cfg``. Format the runscript-template and submit the job. Parameters - ---------- - starttime : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the starttime - hstop : int - Length of simulation (in hours) - cfg : config-object - Object holding all user-configuration parameters as attributes + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. """ - cfg = prepare_data.set_cfg_variables(cfg, model_cfg) - - logfile = os.path.join(cfg.log_working_dir, "icon") - logfile_finish = os.path.join(cfg.log_finished_dir, "icon") + prepare_icon.set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) logging.info("Setup the namelist for an ICON run and " "submit the job to the queue") # Copy icon executable - execname = 'icon.exe' - tools.copy_file(cfg.icon_binary_file, os.path.join(cfg.icon_work, - execname)) + cfg.icon_execname = Path(cfg.icon['binary_file']).name + tools.create_dir(cfg.icon_work, "icon_work") + tools.copy_file(cfg.icon_binary_file, cfg.icon_work / cfg.icon_execname) # Symlink the restart file to the last run into the icon/run folder if cfg.lrestart == '.TRUE.': @@ -68,36 +43,17 @@ def main(cfg, model_cfg): # Get name of initial file if hasattr(cfg, 'inicond_filename'): - inidata_filename = os.path.join(cfg.icon_input_icbc, - cfg.inicond_filename) + inidata_filename = cfg.icon_input_icbc / cfg.inicond_filename else: - inidata_filename = os.path.join( - cfg.icon_input_icbc, + inidata_filename = cfg.icon_input_icbc / str( cfg.startdate_sim.strftime(cfg.meteo['prefix'] + cfg.meteo['nameformat']) + '.nc') # Write run script (run_icon.job) - icon_runjob = os.path.join(cfg.case_path, cfg.icon_runjob_filename) - with open(icon_runjob) as input_file: - to_write = input_file.read() - output_file = os.path.join(cfg.icon_work, "run_icon.job") - with open(output_file, "w") as outf: - outf.write( - to_write.format(cfg=cfg, - inidata_filename=inidata_filename, - logfile=logfile, - logfile_finish=logfile_finish)) - - result = subprocess.run( - ["sbatch", "--wait", - os.path.join(cfg.icon_work, 'run_icon.job')]) - exitcode = result.returncode - - # In case of ICON-ART, ignore the "invalid pointer" error on successful run - if cfg.model.startswith('icon-art'): - if tools.grep("free(): invalid pointer", logfile)['success'] and \ - tools.grep("clean-up finished", logfile)['success']: - exitcode = 0 + template = (cfg.case_path / cfg.icon_runjob_filename).read_text() + script_str = template.format(cfg=cfg, inidata_filename=inidata_filename) + script = (cfg.icon_work / 'run_icon.job') + script.write_text(script_str) - if exitcode != 0: - raise RuntimeError("sbatch returned exitcode {}".format(exitcode)) + # Submit run script + cfg.submit('icon', script) diff --git a/jobs/icontools.py b/jobs/icontools.py new file mode 100644 index 00000000..a0f367b8 --- /dev/null +++ b/jobs/icontools.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import logging +import os +import xarray as xr +import numpy as np +from . import tools, prepare_icon + +BASIC_PYTHON_JOB = True + + +def main(cfg): + """ + - Add GEOSP to all meteo files + - Submit the runscript for the DWD ICON tools to remap the meteorological files. + - All runscripts specified in ``cfg.icontools_runjobs`` are submitted. + - The meteorological files are read from the original input directory + (``cfg.input_root_meteo``), and the remapped meteorological files are saved + in the input folder on scratch (``cfg.icon_input/icbc``). + """ + prepare_icon.set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) + + #----------------------------------------------------- + # Create LBC datafile lists (each at 00 UTC and others) + #----------------------------------------------------- + datafile_list = [] + datafile_list_rest = [] + datafile_list_chem = [] + for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim, + cfg.meteo['inc']): + meteo_file = cfg.icon_input_icbc / ( + cfg.meteo['prefix'] + time.strftime(cfg.meteo['nameformat'])) + if hasattr(cfg, 'art_input_folder'): + chem_file = cfg.icon_input_icbc / ( + cfg.chem['prefix'] + time.strftime(cfg.chem_nameformat)) + datafile_list_chem.append(str(chem_file) + cfg.chem['suffix']) + if str(meteo_file).endswith('00'): + datafile_list.append(str(meteo_file) + cfg.meteo['suffix']) + else: + datafile_list_rest.append(str(meteo_file) + cfg.meteo['suffix']) + datafile_list = ' '.join([str(v) for v in datafile_list]) + datafile_list_rest = ' '.join([str(v) for v in datafile_list_rest]) + datafile_list_chem = ' '.join([str(v) for v in datafile_list_chem]) + + #----------------------------------------------------- + # Write and submit ICONTOOLS runscripts + #----------------------------------------------------- + dep_id = None + for runscript in cfg.icontools_runjobs: + with (cfg.case_path / runscript).open() as input_file: + to_write = input_file.read() + runscript_path = cfg.icon_work / f"{runscript}.job" + with runscript_path.open("w") as outf: + outf.write( + to_write.format(cfg=cfg, + meteo=cfg.meteo, + logfile=cfg.logfile, + logfile_finish=cfg.logfile_finish, + datafile_list=datafile_list, + datafile_list_rest=datafile_list_rest, + datafile_list_chem=datafile_list_chem)) + + # Submitting icontools runscripts sequentially + logging.info(f" Starting icontools runscript {runscript}.") + dep_id = cfg.submit('icontools', runscript_path, add_dep=dep_id) + + logging.info("Add GEOSP to all meteo files") + for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim, + cfg.meteo['inc']): + # Specify file names + geosp_filename = time.replace( + hour=0).strftime(cfg.meteo['prefix'] + + cfg.meteo['nameformat']) + '_lbc.nc' + geosp_file = os.path.join(cfg.icon_input_icbc, geosp_filename) + src_filename = time.strftime(cfg.meteo['prefix'] + + cfg.meteo['nameformat']) + '_lbc.nc' + src_file = os.path.join(cfg.icon_input_icbc, src_filename) + merged_filename = time.strftime(cfg.meteo['prefix'] + + cfg.meteo['nameformat']) + '_merged.nc' + merged_file = os.path.join(cfg.icon_input_icbc, merged_filename) + + # Copy GEOSP file from last run if not present + if hasattr(cfg, + 'icon_input_icbc_prev') and not os.path.exists(geosp_file): + geosp_src_file = os.path.join(cfg.icon_input_icbc_prev, + geosp_filename) + tools.copy_file(geosp_src_file, + cfg.icon_input_icbc, + output_log=True) + + # Load GEOSP data array as da_geosp at time 00: + ds = xr.open_dataset(src_file) + ds_geosp = xr.open_dataset(geosp_file) + da_geosp = ds_geosp['GEOSP'] + + # Merge GEOSP-dataset with other timesteps + if (time.hour != 0): + # Change values of time dimension to current time + da_geosp = da_geosp.assign_coords(time=[np.datetime64(time)]) + # Merge GEOSP into temporary file + ds_merged = xr.merge([ds, da_geosp]) + ds_merged.attrs = ds.attrs + ds_merged.to_netcdf(merged_file) + # Logging info for merging GEOSP + logging.info("Added GEOSP to file {}".format(merged_file)) + # Rename file to get original file name + tools.rename_file(merged_file, src_file) + logging.info('OK') diff --git a/jobs/int2lm.py b/jobs/int2lm.py index da94a56c..83249fa3 100644 --- a/jobs/int2lm.py +++ b/jobs/int2lm.py @@ -1,71 +1,50 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# -# Setup the namelist for int2lm and submit the job to the queue -# -# Dominik Brunner, July 2013 -# -# 2013-07-20 Initial release, based on Christoph Knote's int2lm.bash (brd) -# 2017-01-15 adapted for hypatia and project SmartCarb (brd) -# 2018-08-03 Translated to Python (jae) import os import logging import shutil -import subprocess import pytz -from datetime import datetime, timedelta -from . import tools, prepare_data +from datetime import datetime +from . import tools, prepare_cosmo -def set_cfg_variables(cfg, model_cfg): +BASIC_PYTHON_JOB = True - setattr(cfg, 'int2lm_run', os.path.join(cfg.chain_root, 'int2lm', 'run')) - setattr(cfg, 'int2lm_output', - os.path.join(cfg.chain_root, 'int2lm', 'output')) - return cfg +def main(cfg): + """Setup the namelist for int2lm and submit the job to the queue. + Necessary for both COSMO and COSMOART simulations. -def main(cfg, model_cfg): - """Setup the namelist for **int2lm** and submit the job to the queue. - - Necessary for both **COSMO** and **COSMOART** simulations. - Decide if the soil model should be TERRA or TERRA multi-layer depending on - ``startdate`` of the simulation. + `startdate` of the simulation. - Create necessary directory structure to run **int2lm** (run and output + Create necessary directory structure to run int2lm (run and output directories, defined in ``cfg.int2lm`` and ``cfg.int2lm['output']``). - Copy the **int2lm**-executable from ``cfg.int2lm['binary_file']`` to + Copy the int2lm-executable from ``cfg.int2lm['binary_file']`` to ``cfg.int2lm['work']/int2lm``. Copy the extpar-file ``cfg.int2lm['extpar_file']`` to ``cfg.int2lm_run/work``. - **COSMOART**: Copy the ``libgrib_api`` files to + COSMOART: Copy the ``libgrib_api`` files to ``cfg.int2lm['work']/libgrib_api``. - **COSMO**: Convert the tracer-csv-files into a **int2lm**-namelist file. + COSMO: Convert the tracer-csv-files into an int2lm-namelist file. - Format the **int2lm**-namelist-template using the information in ``cfg``. + Format the int2lm-namelist-template using the information in ``cfg``. Format the runscript-template and submit the job. Parameters ---------- - starttime : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the starttime - hstop : int - Length of simulation (in hours) - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. """ - cfg = prepare_data.set_cfg_variables(cfg, model_cfg) - cfg = set_cfg_variables(cfg, model_cfg) + tools.change_logfile(cfg.logfile) + prepare_cosmo.set_cfg_variables(cfg) # Total number of processes np_tot = cfg.int2lm['np_x'] * cfg.int2lm['np_y'] @@ -89,7 +68,7 @@ def main(cfg, model_cfg): extpar_dir) # Copy landuse and plant-functional-type files - if cfg.model == 'cosmo-art': + if hasattr(cfg, 'photo_rate'): lu_file_src = cfg.int2lm['lu_file'] lu_file_dst = os.path.join(extpar_dir, 'landuse.nc') tools.copy_file(lu_file_src, lu_file_dst) @@ -159,12 +138,8 @@ def main(cfg, model_cfg): cfg.int2lm['runjob_filename'])) as input_file: int2lm_runscript = input_file.read() - # Logfile variables - logfile = os.path.join(cfg.log_working_dir, "int2lm") - logfile_finish = os.path.join(cfg.log_finished_dir, "int2lm") - - output_file = os.path.join(int2lm_run, "run.job") - with open(output_file, "w") as outf: + script = (cfg.int2lm_run / 'run_int2lm.job') + with open(script, "w") as outf: outf.write( int2lm_runscript.format(cfg=cfg, **cfg.int2lm, @@ -173,13 +148,8 @@ def main(cfg, model_cfg): ini_hour=cfg.startdate_sim_yyyymmddhh[8:], np_tot=np_tot, hstop_int2lm=hstop_int2lm, - logfile=logfile, - logfile_finish=logfile_finish)) + logfile=cfg.logfile, + logfile_finish=cfg.logfile_finish)) # Submit job - result = subprocess.run( - ["sbatch", "--wait", - os.path.join(int2lm_run, "run.job")]) - exitcode = result.returncode - if exitcode != 0: - raise RuntimeError("sbatch returned exitcode {}".format(exitcode)) + cfg.submit('int2lm', script) diff --git a/jobs/obs_nudging.py b/jobs/obs_nudging.py index bf2557cb..57119af1 100644 --- a/jobs/obs_nudging.py +++ b/jobs/obs_nudging.py @@ -3,13 +3,14 @@ import os import logging -import shutil from datetime import timedelta from . import tools +BASIC_PYTHON_JOB = True -def main(cfg, model_cfg): + +def main(cfg): """Copy and rename the obs_nudging files to the **COSMO** input directory. In the folder ``cfg.obs_nudging_dir``, the files are saved in the format @@ -23,10 +24,11 @@ def main(cfg, model_cfg): Also copies the blacklist-file blklsttmp. Parameters - ---------- - cfg : config-object - Object holding all user-configuration parameters as attributes + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. """ + tools.change_logfile(cfg.logfile) dest_dir = os.path.join(cfg.cosmo_input, "obs_nudging") tools.create_dir(dest_dir, "obs nudging input") diff --git a/jobs/octe.py b/jobs/octe.py index b52f2a03..1efbe486 100644 --- a/jobs/octe.py +++ b/jobs/octe.py @@ -11,6 +11,8 @@ from . import tools +BASIC_PYTHON_JOB = True + def create_dir_and_copy_input(dest_dir, lambdas_src, maps_src): """Create a directory at dest_dir (**COSMO** input) and copy src there. @@ -161,28 +163,23 @@ def perturb_bgs_in_dir(lambdas_nc, directory): entry.name)) -def main(cfg, model_cfg): +def main(cfg): """Copy necessary input files for **COSMO** and perturb BG. - Copies the NetCDF-files found at cfg.octe_maps and cfg.octe_lambdas to + Copies the NetCDF-files found at ``cfg.octe_maps`` and ``cfg.octe_lambdas`` to the **COSMO** input-directory. Perturbs the background tracer field. To do that, it reads the lambda-value - from the cfg.octe_lambdas (last value along the nparam-dimension) and + from the ``cfg.octe_lambdas`` (last value along the nparam-dimension) and scales the BG-field produced by int2lm, creating a new variable for each ensemble. Parameters ---------- - starttime : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the starttime - hstop : int - Length of simulation (in hours) - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. """ + tools.change_logfile(cfg.logfile) dest_dir = join(cfg.cosmo_input, 'octe') create_dir_and_copy_input(dest_dir=dest_dir, lambdas_src=cfg.octe_lambdas, diff --git a/jobs/oem.py b/jobs/oem.py index 358af4e9..e9cf42d7 100644 --- a/jobs/oem.py +++ b/jobs/oem.py @@ -1,29 +1,30 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# import os import logging -from . import tools, cosmo +from . import tools, prepare_cosmo +BASIC_PYTHON_JOB = True -def main(cfg, model_cfg): + +def main(cfg): """Copy emission and profile files to the **cosmo** or **icon** input directory. Parameters ---------- - starttime : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the starttime - hstop : int - Length of simulation (in hours) - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. + + Raises + ------ + RuntimeError + If an error occurs during the process. """ - cfg = cosmo.set_cfg_variables(cfg, model_cfg) + tools.change_logfile(cfg.logfile) + prepare_cosmo.set_cfg_variables(cfg) oem_dir = cfg.oem['dir'] oem_gridded_emissions_nc = os.path.join(oem_dir, @@ -51,9 +52,9 @@ def main(cfg, model_cfg): raise RuntimeError("At least one of (hod/dow/moy) or (hoy) netcdfs " " have to be given for online emissions") - if cfg.model.startswith('icon'): + if hasattr(cfg, 'icon'): input_dir = cfg.icon_input - else: + elif hasattr(cfg, 'cosmo'): input_dir = cfg.cosmo_input dest_dir = os.path.join(input_dir, "oem") tools.create_dir(dest_dir, "online emissions input") diff --git a/jobs/online_vprm.py b/jobs/online_vprm.py index 106457cf..a1e2312e 100644 --- a/jobs/online_vprm.py +++ b/jobs/online_vprm.py @@ -1,29 +1,25 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# import os import logging -from . import tools +from . import tools, prepare_cosmo +BASIC_PYTHON_JOB = True -def main(cfg, model_cfg): - """Copy MODIS surface reflectance data and vegatation class fraction file + +def main(cfg): + """Copy MODIS surface reflectance data and vegetation class fraction file to the **cosmo** input directory. Parameters ---------- - starttime : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the starttime - hstop : int - Length of simulation (in hours) - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. """ - + tools.change_logfile(cfg.logfile) + prepare_cosmo.set_cfg_variables(cfg) dest_modis = 'modis.nc' dest_vegetation = 'vegetation.nc' diff --git a/jobs/photo_rate.py b/jobs/photo_rate.py index 7f8360b2..afdb4716 100644 --- a/jobs/photo_rate.py +++ b/jobs/photo_rate.py @@ -6,27 +6,23 @@ from . import tools +BASIC_PYTHON_JOB = True -def main(cfg, model_cfg): + +def main(cfg): """Copy photolysis-rate file to the **COSMOART** input directory. Only necessary for **COSMOART** simulations. - Copy the photolysis-rate file from the project (``cfg.photo_rate_file``) to - the **COSMOART** input folder on scratch (``cfg.cosmo_input/art_photolysis``). + Copy the photolysis-rate file from the project (`cfg.photo_rate_file`) to + the **COSMOART** input folder on scratch (`cfg.cosmo_input/art_photolysis`). Parameters - ---------- - start_time : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the start_time - hstop : int - Length of simulation (in hours) - cfg : config-object - Object holding all user-configuration parameters as attributes + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. """ - tools.check_model(cfg, 'cosmo-art') + tools.change_logfile(cfg.logfile) logging.info("Copying photolysis-rate file from {} to {}".format( cfg.photo_rate_file, diff --git a/jobs/post_cosmo.py b/jobs/post_cosmo.py index 56d4dc1d..0442915a 100644 --- a/jobs/post_cosmo.py +++ b/jobs/post_cosmo.py @@ -1,16 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copy cosmo output from scratch to store (or anywhere else) - -### DEVELOPMENT VERSION ### - import logging import os import datetime -from subprocess import call -from . import tools, int2lm, cosmo +from . import tools, prepare_cosmo + +BASIC_PYTHON_JOB = False def logfile_header_template(): @@ -49,13 +46,13 @@ def runscript_commands_template(): ]) -def main(cfg, model_cfg): +def main(cfg): """Copy the output of a **COSMO**-run to a user-defined position. Write a runscript to copy all files (**COSMO** settings & output, **int2lm** settings, logfiles) from ``cfg.cosmo_run``, ``cfg.cosmo_output``, ``cfg.int2lm_run``, ``cfg.log_finished_dir`` to - ``cfg.output_root/...`` . + ``cfg.output_root/...``. If the job ``reduce_output`` has been run before ``post_cosmo``, a directory ``cfg.cosmo_output_reduced`` is created. In this case, ``cfg.cosmo_output_reduced`` is copied instead of ``cfg.cosmo_output``. @@ -64,15 +61,12 @@ def main(cfg, model_cfg): Parameters ---------- - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. """ - cfg = int2lm.set_cfg_variables(cfg, model_cfg) - cfg = cosmo.set_cfg_variables(cfg, model_cfg) + tools.change_logfile(cfg.logfile) + prepare_cosmo.set_cfg_variables(cfg) - logfile = os.path.join(cfg.log_working_dir, "post_cosmo") - cosmo_run_dir = cfg.cosmo_run - runscript_path = os.path.join(cfg.cosmo_run, "post_cosmo.job") copy_path = os.path.join( cfg.post_cosmo['output_root'], cfg.startdate_sim_yyyymmddhh + "_" + cfg.enddate_sim_yyyymmddhh) @@ -84,16 +78,16 @@ def main(cfg, model_cfg): runscript_content = "#!/bin/bash\n" runscript_content += runscript_header_template().format( compute_account=cfg.compute_account, - logfile=logfile, + logfile=cfg.logfile, constraint=cfg.constraint, cosmo_run=cfg.cosmo_run) if os.path.isdir(cfg.cosmo_output_reduced): - cosmo_output_src = cfg.cosmo_output_reduced.rstrip('/') + cosmo_output_src = str(cfg.cosmo_output_reduced).rstrip('/') cosmo_output_dest = os.path.join(copy_path, "cosmo_output_reduced").rstrip('/') else: - cosmo_output_src = cfg.cosmo_output.rstrip('/') + cosmo_output_src = str(cfg.cosmo_output).rstrip('/') cosmo_output_dest = os.path.join(copy_path, "cosmo_output").rstrip('/') # Create new directories @@ -110,36 +104,22 @@ def main(cfg, model_cfg): # Format the runscript runscript_content += runscript_commands_template().format( target_dir=copy_path.rstrip('/'), - int2lm_run_src=cfg.int2lm_run.rstrip('/'), + int2lm_run_src=str(cfg.int2lm_run).rstrip('/'), int2lm_run_dest=int2lm_run_path.rstrip('/'), - cosmo_run_src=cfg.cosmo_run.rstrip('/'), + cosmo_run_src=str(cfg.cosmo_run).rstrip('/'), cosmo_run_dest=cosmo_run_path.rstrip('/'), cosmo_output_src=cosmo_output_src, cosmo_output_dest=cosmo_output_dest_path, - logs_src=cfg.log_finished_dir.rstrip('/'), + logs_src=str(cfg.log_finished_dir).rstrip('/'), logs_dest=logs_path.rstrip('/')) - # Wait for Cosmo to finish first - tools.check_job_completion(cfg.log_finished_dir, "cosmo") - - with open(runscript_path, "w") as script: - script.write(runscript_content) + os.makedirs(cfg.cosmo_run, exist_ok=True) + script = (cfg.cosmo_run / 'run_post_cosmo.job') + with open(script, "w") as outf: + outf.write(runscript_content) logging.info("Submitting the copy job to the xfer queue") logging.info("Make sure you have the module 'xalt' unloaded!") - sbatch_wait = getattr(cfg, "wait", "True") - - if sbatch_wait: - exitcode = call(["sbatch", "--wait", runscript_path]) - logging.info(logfile_header_template().format( - "ENDS", str(datetime.datetime.today()))) - - # copy own logfile aswell - tools.copy_file(logfile, os.path.join(copy_path, "logs/")) - - else: - exitcode = call(["sbatch", runscript_path]) - - if exitcode != 0: - raise RuntimeError("sbatch returned exitcode {}".format(exitcode)) + # Submit job + cfg.submit('post_cosmo', script) diff --git a/jobs/post_int2lm.py b/jobs/post_int2lm.py index 787f7907..b0e78c0c 100644 --- a/jobs/post_int2lm.py +++ b/jobs/post_int2lm.py @@ -5,11 +5,14 @@ import os import glob import netCDF4 as nc + from datetime import datetime, timedelta -from . import tools, int2lm +from . import tools, prepare_cosmo + +BASIC_PYTHON_JOB = True -def main(cfg, model_cfg): +def main(cfg): """Combine multiple **int2lm** tracer-output files into a single one for **COSMO**. @@ -25,10 +28,11 @@ def main(cfg, model_cfg): Parameters ---------- - cfg : config-object - Object holding all user-configuration parameters as attributes + cfg : Config + Object holding all user-configuration parameters as attributes. """ - cfg = int2lm.set_cfg_variables(cfg, model_cfg) + prepare_cosmo.set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) # Int2lm processing always starts at hstart=0, thus modifying inidate inidate_int2lm_yyyymmddhh = cfg.startdate_sim_yyyymmddhh diff --git a/jobs/prepare_art.py b/jobs/prepare_art.py new file mode 100644 index 00000000..749fd7ce --- /dev/null +++ b/jobs/prepare_art.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import logging +import xarray as xr +import numpy as np +from . import tools, prepare_icon + +BASIC_PYTHON_JOB = True + + +def main(cfg): + """ + Prepare ICON-ART simulations. + + - Add GEOSP to all meteo files that don't contain it + - Add Q (copy of QV) and/or PS to initial file + """ + prepare_icon.set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) + + logging.info('Add Q (copy of QV) and/or PS to initial file') + meteo_file = os.path.join( + cfg.icon_input_icbc, + cfg.startdate_sim.strftime(cfg.meteo['prefix'] + + cfg.meteo['nameformat']) + '.nc') + if os.path.isfile(meteo_file): + merged_file = os.path.join( + cfg.icon_input_icbc, + cfg.startdate_sim.strftime(cfg.meteo['prefix'] + + cfg.meteo['nameformat']) + '_merged.nc') + ds = xr.open_dataset(meteo_file) + merging = False + if 'PS' not in ds: + if 'LNPS' not in ds: + raise KeyError( + f"'LNPS' must be found in the initial conditions file {meteo_file}" + ) + merging = True + ds['PS'] = ds['LNPS'] + ds['PS'].attrs = ds['LNPS'].attrs + ds['PS'] = np.exp(ds['PS']) + ds['PS'] = ds['PS'].squeeze(dim='lev_2') + ds['PS'].attrs["long_name"] = 'surface pressure' + ds['PS'].attrs['units'] = 'Pa' + logging.info(f"Added PS to file {meteo_file}") + if 'Q' not in ds: + merging = True + ds['Q'] = ds['QV'] + logging.info(f"Added Q to file {meteo_file}") + if merging: + ds.to_netcdf(merged_file) + tools.rename_file(merged_file, meteo_file) + logging.info('OK') diff --git a/jobs/prepare_art_global.py b/jobs/prepare_art_global.py new file mode 100644 index 00000000..a425d6b9 --- /dev/null +++ b/jobs/prepare_art_global.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import logging +import xarray as xr +import shutil +import subprocess +from . import tools, prepare_icon +from pathlib import Path # noqa: F401 +from .tools.interpolate_data import create_oh_for_restart, create_oh_for_inicond # noqa: F401 +from .tools.fetch_external_data import fetch_era5, fetch_era5_nudging + +BASIC_PYTHON_JOB = True + + +def main(cfg): + """ + Prepare global ICON-ART simulations. + + Parameters + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. + """ + prepare_icon.set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) + logging.info("Prepare ICON-ART for global simulations") + + # -- Download ERA5 data and create the inicond file + if cfg.era5_inicond and cfg.lrestart == '.FALSE.': + # -- Fetch ERA5 data + fetch_era5(cfg.startdate_sim, cfg.icon_input_icbc) + + # -- Copy ERA5 processing script (icon_era5_inicond.job) in workdir + with open(cfg.icon_era5_inijob) as input_file: + to_write = input_file.read() + output_file = os.path.join(cfg.icon_input_icbc, 'icon_era5_inicond.sh') + with open(output_file, "w") as outf: + outf.write(to_write.format(cfg=cfg)) + + # -- Copy mypartab in workdir + shutil.copy( + os.path.join(os.path.dirname(cfg.icon_era5_inijob), 'mypartab'), + os.path.join(cfg.icon_input_icbc, 'mypartab')) + + # -- Run ERA5 processing script + process = subprocess.Popen([ + "bash", + os.path.join(cfg.icon_input_icbc, 'icon_era5_inicond.sh') + ], + stdout=subprocess.PIPE) + process.communicate() + + # ----------------------------------------------------- + # Create tracer initial conditions + # ----------------------------------------------------- + + # -- Download and add CAMS data to the inicond file if needed + if cfg.species_inicond: + + if cfg.lrestart == '.FALSE.': + + ext_restart = '' + filename = cfg.input_files_scratch_inicond_filename + + # -- Copy the script for processing external tracer data in workdir + with open(os.path.join(cfg.case_path, + cfg.icon_species_inijob)) as input_file: + to_write = input_file.read() + output_file = os.path.join(cfg.icon_input_icbc, + cfg.icon_species_inijob) + with open(output_file, "w") as outf: + outf.write( + to_write.format(cfg=cfg, + filename=filename, + ext_restart=ext_restart, + year=cfg.startdate_sim.year, + month=cfg.startdate_sim.month, + day=cfg.startdate_sim.day)) + + # -- Run ERA5 processing script + process = subprocess.Popen(["bash", output_file], + stdout=subprocess.PIPE) + process.communicate() + + # -- Create initial conditions for OH concentrations + if 'TROH' in cfg.species2restart: + create_oh_for_inicond(cfg, cfg.startdate_sim.month) + + else: + + # -- Check the extension of tracer variables in the restart file + ds_restart = xr.open_dataset(cfg.restart_file) # noqa: F841 + tracer_name = cfg.species2restart[0] # noqa: F841 + # FIXME: + # var_restart = [ + # IndexError: list index out of range + # var_restart = [ + # var for var in ds_restart.data_vars.keys() + # if var.startswith(tracer_name) + # ][0] + # ext_restart = var_restart.replace(tracer_name, '') + + # -- Change OH concentrations in the restart file + # if 'TROH' in cfg.species2restart: + # create_oh_for_restart(cfg, cfg.startdate_sim.month, + # ext_restart) + + # ----------------------------------------------------- + # Create meteorological and tracer nudging conditions + # ----------------------------------------------------- + + # -- If global nudging, download and process ERA5 and CAMS data + if cfg.era5_global_nudging: + + for time in tools.iter_hours(cfg.startdate_sim, + cfg.enddate_sim, + step=cfg.nudging_step): + + # -- Give a name to the nudging file + timestr = time.strftime('%Y%m%d%H') + filename = 'era2icon_R2B03_{timestr}_nudging.nc'.format( + timestr=timestr) + + # -- If initial time, copy the initial conditions to be used as boundary conditions + if time == cfg.startdate_sim and cfg.era5_inicond: + shutil.copy(cfg.input_files_scratch_inicond_filename, + os.path.join(cfg.icon_input_icbc, filename)) + continue + + # -- Fetch ERA5 data + fetch_era5_nudging(time, cfg.icon_input_icbc) + + # -- Copy ERA5 processing script (icon_era5_nudging.job) in workdir + with open(cfg.icon_era5_nudgingjob) as input_file: + to_write = input_file.read() + output_file = os.path.join( + cfg.icon_input_icbc, 'icon_era5_nudging_{}.sh'.format(timestr)) + with open(output_file, "w") as outf: + outf.write(to_write.format(cfg=cfg, filename=filename)) + + # -- Copy mypartab in workdir + if not os.path.exists(os.path.join(cfg.icon_input_icbc, + 'mypartab')): + shutil.copy( + os.path.join(os.path.dirname(cfg.icon_era5_nudgingjob), + 'mypartab'), + os.path.join(cfg.icon_input_icbc, 'mypartab')) + + # -- Run ERA5 processing script + process = subprocess.Popen([ + "bash", + os.path.join(cfg.icon_input_icbc, + 'icon_era5_nudging_{}.sh'.format(timestr)) + ], + stdout=subprocess.PIPE) + process.communicate() + + if cfg.species_global_nudging: + + # -- Copy CAMS processing script (icon_cams_nudging.job) in workdir + with open(cfg.icon_species_nudgingjob) as input_file: + to_write = input_file.read() + output_file = os.path.join( + cfg.icon_input_icbc, + 'icon_cams_nudging_{}.sh'.format(timestr)) + with open(output_file, "w") as outf: + outf.write(to_write.format(cfg=cfg, filename=filename)) + + # -- Run ERA5 processing script + process = subprocess.Popen([ + "bash", + os.path.join(cfg.icon_input_icbc, + 'icon_cams_nudging_{}.sh'.format(timestr)) + ], + stdout=subprocess.PIPE) + process.communicate() + + logging.info("OK") diff --git a/jobs/prepare_art_oem.py b/jobs/prepare_art_oem.py new file mode 100644 index 00000000..b421aa1d --- /dev/null +++ b/jobs/prepare_art_oem.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import logging +import xarray as xr +from . import tools, prepare_icon + +BASIC_PYTHON_JOB = True + + +def main(cfg): + """ + ICON-ART-OEM preparations + """ + prepare_icon.set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) + logging.info('Merging IC and LBC') + + if cfg.input_files['oem_gridded_emissions_nc']: + for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim, + cfg.meteo['inc']): + if time == cfg.startdate_sim: + #------------ + # Merge IC: + #------------ + meteo_file = os.path.join( + cfg.icon_input_icbc, + time.strftime(cfg.meteo['prefix'] + + cfg.meteo['nameformat']) + '.nc') + if os.path.isfile(meteo_file): + chem_file = os.path.join( + cfg.icon_input_icbc, cfg.chem['prefix'] + + time.strftime(cfg.chem['nameformat']) + '.nc') + merged_file = os.path.join( + cfg.icon_input_icbc, + time.strftime(cfg.meteo['prefix'] + + cfg.meteo['nameformat']) + '_merged.nc') + ds_meteo = xr.open_dataset(meteo_file) + ds_chem = xr.open_dataset(chem_file) + # LNPS --> PS + ds_chem['PS'] = ds_chem['LNPS'] + ds_chem['PS'].attrs = ds_chem['LNPS'].attrs + ds_chem['PS'] = ds_chem['PS'].squeeze(dim='lev_2') + ds_chem['PS'].attrs["long_name"] = 'surface pressure' + # merge: + ds_merged = xr.merge([ds_meteo, ds_chem], + compat="override") + #ds_merged.attrs = ds.attrs + ds_merged.to_netcdf(merged_file) + # Rename file to get original file name + tools.rename_file(merged_file, meteo_file) + tools.remove_file(chem_file) + logging.info( + "Added chemical tracer to file {}".format(merged_file)) + + #------------ + # Merge LBC: + #------------ + meteo_file = os.path.join( + cfg.icon_input_icbc, + time.strftime(cfg.meteo['prefix'] + cfg.meteo['nameformat']) + + '_lbc.nc') + chem_file = os.path.join( + cfg.icon_input_icbc, cfg.chem['prefix'] + + time.strftime(cfg.chem_nameformat) + '_lbc.nc') + merged_file = os.path.join( + cfg.icon_input_icbc, + time.strftime(cfg.meteo['prefix'] + cfg.meteo['nameformat']) + + '_merged.nc') + ds_meteo = xr.open_dataset(meteo_file) + ds_chem = xr.open_dataset(chem_file) + # LNPS --> PS + ds_chem['PS'] = ds_chem['LNPS'] + ds_chem['PS'].attrs = ds_chem['LNPS'].attrs + ds_chem['PS'].attrs["long_name"] = 'surface pressure' + # Remapping chemical tracer names + if "remap_tracers" in cfg.chem: + for chem_in, chem_out in cfg.chem['remap_tracers'].items(): + ds_chem[chem_out] = ds_chem[chem_in] + # merge: + ds_merged = xr.merge([ds_meteo, ds_chem], compat="override") + #ds_merged.attrs = ds.attrs + ds_merged.to_netcdf(merged_file) + # Rename file to get original file name + tools.rename_file(merged_file, meteo_file) + tools.remove_file(chem_file) + logging.info( + "Added chemical tracer to file {}".format(merged_file)) diff --git a/jobs/prepare_cosmo.py b/jobs/prepare_cosmo.py new file mode 100644 index 00000000..8cea9005 --- /dev/null +++ b/jobs/prepare_cosmo.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pathlib import Path +import logging +import csv +import os +from datetime import timedelta +from . import tools + +BASIC_PYTHON_JOB = True + + +def set_cfg_variables(cfg): + cfg.int2lm_root = cfg.chain_root / 'int2lm' + cfg.int2lm_input = cfg.int2lm_root / 'input' + cfg.int2lm_run = cfg.chain_root / 'int2lm' / 'run' + cfg.int2lm_output = cfg.chain_root / 'int2lm' / 'output' + + cfg.cosmo_base = cfg.chain_root / 'cosmo' + cfg.cosmo_input = cfg.chain_root / 'cosmo' / 'input' + cfg.cosmo_run = cfg.chain_root / 'cosmo' / 'run' + cfg.cosmo_output = cfg.chain_root / 'cosmo' / 'output' + cfg.cosmo_output_reduced = cfg.chain_root / 'cosmo' / 'output_reduced' + + # Number of tracers + if 'tracers' in cfg.workflow['features']: + tracer_csvfile = cfg.chain_src_dir / 'cases' / cfg.casename / 'cosmo_tracers.csv' + if tracer_csvfile.is_file(): + with open(tracer_csvfile, 'r') as csv_file: + reader = csv.DictReader(csv_file, delimiter=',') + reader = [r for r in reader if r[''] != '#'] + cfg.in_tracers = len(reader) + else: + raise FileNotFoundError(f"File not found: {tracer_csvfile}") + + # tracer_start namelist parameter for spinup simulation + if hasattr(cfg, 'spinup'): + if cfg.first_one: + cfg.tracer_start = 0 + else: + cfg.tracer_start = cfg.spinup + else: + cfg.tracer_start = 0 + + # asynchronous I/O + if hasattr(cfg, 'cfg.cosmo_np_io'): + if cfg.cosmo_np_io == 0: + cfg.lasync_io = '.FALSE.' + cfg.num_iope_percomm = 0 + else: + cfg.lasync_io = '.TRUE.' + cfg.num_iope_percomm = 1 + + # If nested run: use output of mother-simulation + if 'nesting' in cfg.workflow['features'] and not os.path.isdir( + cfg.meteo.dir): + # if ifs_hres_dir doesn't point to a directory, + # it is the name of the mother run + mother_name = cfg.meteo.dir + cfg.meteo.dir = cfg.work_root / mother_name / cfg.chunk_id / 'cosmo' / 'output' + cfg.meteo.inc = 1 + cfg.meteo.prefix = 'lffd' + + +def main(cfg): + """ + **COSMO Data Preparation** + + This function prepares input data for COSMO simulations by creating necessary directories, + copying meteorological files, and handling specific data processing. + + - Copy meteorological files to **int2lm** input. + - Create the necessary directory ``cfg.int2lm_input/meteo``. + - Copy meteorological files from the project directory (``cfg.meteo['dir']/cfg.meteo['prefix']YYYYMMDDHH``) + to the int2lm input folder on scratch (``cfg.int2lm_input/meteo``). + - For nested runs (meteorological files are COSMO output: ``cfg.meteo['prefix'] == 'lffd'``), + also copy the ``*c.nc``-file with constant parameters. + + Parameters + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. + + Raises + ------ + RuntimeError + If any subprocess returns a non-zero exit code during execution. + """ + set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) + + logging.info('COSMO analysis data for IC/BC') + + dest_path = cfg.int2lm_input / 'meteo' + tools.create_dir(dest_path, "meteo input") + + source_nameformat = cfg.meteo['nameformat'] + if cfg.meteo['prefix'] == 'lffd': + # nested runs use cosmoart-output as meteo data + # have to copy the *c.nc-file + src_file = (cfg.meteo['dir'] / + cfg.startdate_sim.strftime(source_nameformat + 'c.nc')) + + tools.copy_file(src_file, dest_path, output_log=True) + + logging.info("Copied constant-param file from {} to {}".format( + src_file, dest_path)) + + # extend nameformat with ending to match cosmo-output + source_nameformat += '.nc' + + if cfg.meteo['prefix'] == 'efsf': + source_nameformat = cfg.meteo['prefix'] + '%y%m%d%H' + + num_steps = 0 + meteo_dir = cfg.meteo['dir'] + subdir = meteo_dir / cfg.startdate_sim.strftime('%y%m%d%H') + for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim, + cfg.meteo['inc']): + dest_path = cfg.int2lm_input / 'meteo' + src_file = meteo_dir / time.strftime(source_nameformat) + + if cfg.meteo['prefix'] == 'efsf': + if time == cfg.startdate_sim: + src_file = subdir / ('eas' + time.strftime('%Y%m%d%H')) + if not src_file.exists() and cfg.meteo.get('dir_alt') \ + is not None: + meteo_dir = cfg.meteo['dir_alt'] + subdir = meteo_dir / cfg.startdate_sim.strftime('%y%m%d%H') + src_file = subdir / ('eas' + time.strftime('%Y%m%d%H')) + dest_path = cfg.int2lm_input / 'meteo' / (cfg.meteo['prefix'] + + '00000000') + else: + td = time - cfg.startdate_sim - timedelta(hours=6 * num_steps) + days = str(td.days).zfill(2) + hours = str(td.seconds // 3600).zfill(2) + td_total = time - cfg.startdate_sim + days_total = str(td_total.days).zfill(2) + hours_total = str(td_total.seconds // 3600).zfill(2) + + src_file = subdir / (cfg.meteo['prefix'] + days + hours + + '0000') + dest_path = cfg.int2lm_input / 'meteo' / ( + cfg.meteo['prefix'] + days_total + hours_total + '0000') + + # Next time, change directory + checkdir = meteo_dir / time.strftime('%y%m%d%H') + if checkdir.is_dir(): + num_steps += 1 + subdir = checkdir + elif cfg.meteo.get('dir_alt') is not None: + checkdir = cfg.meteo['dir_alt'] / time.strftime('%y%m%d%H') + if checkdir.is_dir(): + num_steps += 1 + subdir = checkdir + meteo_dir = cfg.meteo['dir_alt'] + logging.info( + "Switching to other input directory from {} to {}". + format(cfg.meteo['dir'], cfg.meteo['dir_alt'])) + elif not src_file.exists(): + # special case for MeteoSwiss COSMO-7 data + archive = Path('/store/mch/msopr/owm/COSMO-7') + yy = time.strftime("%y") + path = archive / 'ANA' + yy + src_file = path / time.strftime(source_nameformat) + + # copy meteo file from project folder to + tools.copy_file(src_file, dest_path, output_log=True) + + logging.info("Copied file from {} to {}".format(src_file, dest_path)) + + # Other IC/BC data + inv_to_process = [] + if hasattr(cfg, 'cams'): + try: + CAMS = dict(fullname="CAMS", + nickname="cams", + executable="cams4int2cosmo", + indir=cfg.cams['dir_orig'], + outdir=cfg.cams['dir_proc'], + param=[{ + 'inc': cfg.cams['inc'], + 'suffix': cfg.cams['suffix'] + }]) + inv_to_process.append(CAMS) + except AttributeError: + pass + try: + CT = dict(fullname="CarbonTracker", + nickname="ct", + executable="ctnoaa4int2cosmo", + indir=cfg.ct_dir_orig, + outdir=cfg.ct_dir_proc, + param=cfg.ct_parameters) + inv_to_process.append(CT) + except AttributeError: + pass + elif hasattr(cfg, 'mozart'): + try: + MOZART = dict(fullname='MOZART', + nickname='mozart', + executable='mozart2int2lm', + indir=cfg.mozart_file_orig, + outdir=cfg.mozart_dir_proc, + param=[{ + 'inc': cfg.mozart_inc, + 'suffix': cfg.mozart_prefix + }]) + inv_to_process.append(MOZART) + except AttributeError: + pass + + if inv_to_process: + logging.info("Processing " + + ", ".join([i["fullname"] + for i in inv_to_process]) + " data") + + scratch_path = cfg.int2lm_input / 'icbc' + tools.create_dir(scratch_path, "icbc input") + + for inv in inv_to_process: + logging.info(inv["fullname"] + " files") + tools.create_dir(inv["outdir"], "processed " + inv["fullname"]) + + for p in inv["param"]: + inc = p["inc"] + for time in tools.iter_hours(cfg.startdate_sim, + cfg.enddate_sim, inc): + logging.info(time) + + filename = inv["outdir"] / ( + p["suffix"] + "_" + time.strftime("%Y%m%d%H") + ".nc") + if not filename.exists(): + logging.info(filename) + try: + to_call = getattr(tools, inv["executable"]) + to_call.main(time, inv["indir"], inv["outdir"], p) + except: + logging.error("Preprocessing " + inv["fullname"] + + " data failed") + raise + + # copy to (temporary) run input directory + tools.copy_file(filename, scratch_path, output_log=True) + + logging.info("OK") diff --git a/jobs/prepare_data.py b/jobs/prepare_data.py deleted file mode 100644 index 894d2563..00000000 --- a/jobs/prepare_data.py +++ /dev/null @@ -1,704 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Prepare initial and boundary conditions -# -# In case of ICON: -# Prepare input for meteorological initial and boundary conditions -# by remapping the files onto the ICON grid (for IC) and the -# auxillary lateral-boundary grid (for BC) with the DWD ICON tools -# and saving them in the input folder. -# Currently, the input files are assumed to be ifs data. -# The files are read-in in grib2-format and the the remapped -# files are saved in netCDF-format (currently only netCDF works -# for ICON when then the simulation is driven by ifs-data). -# -# result in case of success: all meteo input-files necessary are found in -# ${int2lm_input}/meteo/ -# -# Dominik Brunner, July 2013 -# -# 2013-07-16 Initial release, based on Christoph Knote script -# 2017-01-15 Modified for hypatia and project SmartCarb -# 2018-06-21 Translated to Python (kug) -# 2021-02-28 Modified for ICON-simulations (stem) -# 2021-11-12 Modified for ICON-ART-simulations (mjaehn) - -import os -import logging -import shutil -import subprocess -from datetime import timedelta -import xarray as xr -import numpy as np -from . import tools -from .tools.interpolate_data import create_oh_for_restart, create_oh_for_inicond -from .tools.fetch_external_data import fetch_era5, fetch_era5_nudging -from calendar import monthrange - - -def set_cfg_variables(cfg, model_cfg): - # TODO: Change setattr() to direct assignment - if cfg.model.startswith('cosmo'): - setattr(cfg, 'int2lm_root', os.path.join(cfg.chain_root, 'int2lm')) - setattr(cfg, 'int2lm_input', os.path.join(cfg.int2lm_root, 'input')) - elif cfg.model.startswith('icon'): - setattr(cfg, 'icon_base', os.path.join(cfg.chain_root, 'icon')) - setattr(cfg, 'icon_input', os.path.join(cfg.chain_root, 'icon', - 'input')) - setattr(cfg, 'icon_input_icbc', - os.path.join(cfg.chain_root, 'icon', 'input', 'icbc')) - setattr(cfg, 'icon_work', os.path.join(cfg.chain_root, 'icon', 'run')) - setattr(cfg, 'icon_output', - os.path.join(cfg.chain_root, 'icon', 'output')) - setattr(cfg, 'icon_output_reduced', - os.path.join(cfg.chain_root, 'icon', 'output_reduced')) - setattr(cfg, 'icon_restart_out', - os.path.join(cfg.chain_root, 'icon', 'restart')) - setattr(cfg, 'icon_restart_in', - os.path.join(cfg.chain_root_prev, 'icon', 'run')) - setattr(cfg, 'icon_input_icbc_prev', - os.path.join(cfg.chain_root_prev, 'icon', 'input', 'icbc')) - - cfg.input_files_scratch = {} - for varname in cfg.input_files: - cfg.input_files_scratch[varname] = os.path.join( - cfg.icon_input, os.path.basename(cfg.input_files[varname])) - cfg.create_vars_from_dicts() - - cfg.ini_datetime_string = cfg.startdate.strftime('%Y-%m-%dT%H:00:00Z') - cfg.end_datetime_string = cfg.enddate.strftime('%Y-%m-%dT%H:00:00Z') - - if cfg.model == 'icon-art-oem': - cfg.startdate_sim_yyyymmdd_hh = cfg.startdate_sim.strftime( - '%Y%m%d_%H') - - if cfg.model == 'icon-art-global': - # Nudge type (global or nothing) - cfg.nudge_type = 2 if cfg.era5_global_nudging else 0 - # Time step for global nudging in seconds - cfg.nudging_step_seconds = cfg.nudging_step * 3600 - # Prescribed initial conditions for CH4, CO and/or OH - cfg.iart_init_gas = 4 if cfg.species_inicond else 0 - - if cfg.lrestart == '.TRUE.': - cfg.restart_filename = 'restart_atm_DOM01.nc' - cfg.restart_file = os.path.join(cfg.icon_restart_in, - cfg.restart_filename) - cfg.restart_file_scratch = os.path.join(cfg.icon_work, - cfg.restart_filename) - - return cfg - - -def main(cfg, model_cfg): - """ - **ICON** - - Create necessary directories ``cfg.icon_input_icbc`` - and ''cfg.icon_work'' - - Submitting the runscript for the DWD ICON tools to remap the meteo files. - - All runscripts specified in ``cfg.icontools_runjobs`` are submitted. - - The meteo files are read-in from the original input directory - (``cfg.input_root_meteo``) and the remapped meteo files are - saved in the input folder on scratch (``cfg.icon_input/icbc``). - - The constant variable 'GEOSP' is added to the files not containing it - using python-cdo bindings. - - **COSMO** - - Copy meteo files to **int2lm** input. - - Create necessary directory ``cfg.int2lm_input/meteo``. Copy meteo files - from project directory (``cfg.meteo['dir']/cfg.meteo['prefix']YYYYMMDDHH``) to - int2lm input folder on scratch (``cfg.int2lm_input/meteo``). - - For nested runs (meteo files are cosmo-output: ``cfg.meteo['prefix'] == - 'lffd'``), also the ``*c.nc``-file with constant parameters is copied. - - - Parameters - ---------- - startdate : datetime-object - The start date of the simulation - enddate : datetime-object - The end date of the simulation - cfg : config-object - Object holding all user-configuration parameters as attributes - """ - - cfg = set_cfg_variables(cfg, model_cfg) - - if cfg.model.startswith('icon'): - logging.info('ICON input data (IC/BC)') - - #----------------------------------------------------- - # Create directories - #----------------------------------------------------- - tools.create_dir(cfg.icon_work, "icon_work") - tools.create_dir(cfg.icon_input_icbc, "icon_input_icbc") - tools.create_dir(cfg.icon_output, "icon_output") - tools.create_dir(cfg.icon_restart_out, "icon_restart_out") - - #----------------------------------------------------- - # Copy input files - #----------------------------------------------------- - for varname in cfg.input_files: - varname_scratch = f'{varname}_scratch' - tools.copy_file(cfg.input_files[varname], - cfg.input_files_scratch[varname], - output_log=True) - - if cfg.model == 'icon-art-global': - # -- Download ERA5 data and create the inicond file - if cfg.era5_inicond and cfg.lrestart == '.FALSE.': - # -- Fetch ERA5 data - fetch_era5(cfg.startdate_sim, cfg.icon_input_icbc) - - # -- Copy ERA5 processing script (icon_era5_inicond.job) in workdir - with open(cfg.icon_era5_inijob) as input_file: - to_write = input_file.read() - output_file = os.path.join(cfg.icon_input_icbc, - 'icon_era5_inicond.sh') - with open(output_file, "w") as outf: - outf.write(to_write.format(cfg=cfg)) - - # -- Copy mypartab in workdir - shutil.copy( - os.path.join(os.path.dirname(cfg.icon_era5_inijob), - 'mypartab'), - os.path.join(cfg.icon_input_icbc, 'mypartab')) - - # -- Run ERA5 processing script - process = subprocess.Popen([ - "bash", - os.path.join(cfg.icon_input_icbc, 'icon_era5_inicond.sh') - ], - stdout=subprocess.PIPE) - process.communicate() - - # ----------------------------------------------------- - # Create tracer initial conditions - # ----------------------------------------------------- - - # -- Download and add CAMS data to the inicond file if needed - if cfg.species_inicond: - - if cfg.lrestart == '.FALSE.': - - ext_restart = '' - filename = cfg.input_files_scratch_inicond_filename - - # -- Copy the script for processing external tracer data in workdir - with open( - os.path.join( - cfg.case_path, - cfg.icon_species_inijob)) as input_file: - to_write = input_file.read() - output_file = os.path.join(cfg.icon_input_icbc, - cfg.icon_species_inijob) - with open(output_file, "w") as outf: - outf.write( - to_write.format(cfg=cfg, - filename=filename, - ext_restart=ext_restart, - year=cfg.startdate_sim.year, - month=cfg.startdate_sim.month, - day=cfg.startdate_sim.day)) - - # -- Run ERA5 processing script - process = subprocess.Popen(["bash", output_file], - stdout=subprocess.PIPE) - process.communicate() - - # -- Create initial conditions for OH concentrations - if 'TROH' in cfg.species2restart: - create_oh_for_inicond(cfg, cfg.startdate_sim.month) - - else: - - # -- Check the extension of tracer variables in the restart file - ds_restart = xr.open_dataset(cfg.restart_file) - tracer_name = cfg.species2restart[0] - # FIXME: - # var_restart = [ - # IndexError: list index out of range - # var_restart = [ - # var for var in ds_restart.data_vars.keys() - # if var.startswith(tracer_name) - # ][0] - # ext_restart = var_restart.replace(tracer_name, '') - - # -- Change OH concentrations in the restart file - # if 'TROH' in cfg.species2restart: - # create_oh_for_restart(cfg, cfg.startdate_sim.month, - # ext_restart) - - # ----------------------------------------------------- - # Create meteorological and tracer nudging conditions - # ----------------------------------------------------- - - # -- If global nudging, download and process ERA5 and CAMS data - if cfg.era5_global_nudging: - - for time in tools.iter_hours(cfg.startdate_sim, - cfg.enddate_sim, - step=cfg.nudging_step): - - # -- Give a name to the nudging file - timestr = time.strftime('%Y%m%d%H') - filename = 'era2icon_R2B03_{timestr}_nudging.nc'.format( - timestr=timestr) - - # -- If initial time, copy the initial conditions to be used as boundary conditions - if time == cfg.startdate_sim and cfg.era5_inicond: - shutil.copy( - cfg.input_files_scratch_inicond_filename, - os.path.join(cfg.icon_input_icbc, filename)) - continue - - # -- Fetch ERA5 data - fetch_era5_nudging(time, cfg.icon_input_icbc) - - # -- Copy ERA5 processing script (icon_era5_nudging.job) in workdir - with open(cfg.icon_era5_nudgingjob) as input_file: - to_write = input_file.read() - output_file = os.path.join( - cfg.icon_input_icbc, - 'icon_era5_nudging_{}.sh'.format(timestr)) - with open(output_file, "w") as outf: - outf.write(to_write.format(cfg=cfg, filename=filename)) - - # -- Copy mypartab in workdir - if not os.path.exists( - os.path.join(cfg.icon_input_icbc, 'mypartab')): - shutil.copy( - os.path.join( - os.path.dirname(cfg.icon_era5_nudgingjob), - 'mypartab'), - os.path.join(cfg.icon_input_icbc, 'mypartab')) - - # -- Run ERA5 processing script - process = subprocess.Popen([ - "bash", - os.path.join(cfg.icon_input_icbc, - 'icon_era5_nudging_{}.sh'.format(timestr)) - ], - stdout=subprocess.PIPE) - process.communicate() - - if cfg.species_global_nudging: - - # -- Copy CAMS processing script (icon_cams_nudging.job) in workdir - with open(cfg.icon_species_nudgingjob) as input_file: - to_write = input_file.read() - output_file = os.path.join( - cfg.icon_input_icbc, - 'icon_cams_nudging_{}.sh'.format(timestr)) - with open(output_file, "w") as outf: - outf.write( - to_write.format(cfg=cfg, filename=filename)) - - # -- Run ERA5 processing script - process = subprocess.Popen([ - "bash", - os.path.join( - cfg.icon_input_icbc, - 'icon_cams_nudging_{}.sh'.format(timestr)) - ], - stdout=subprocess.PIPE) - process.communicate() - - else: # non-global ICON-ART - #----------------------------------------------------- - # Create LBC datafile lists (each at 00 UTC and others) - #----------------------------------------------------- - datafile_list = [] - datafile_list_rest = [] - datafile_list_chem = [] - for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim, - cfg.meteo['inc']): - meteo_file = os.path.join( - cfg.icon_input_icbc, cfg.meteo['prefix'] + - time.strftime(cfg.meteo['nameformat'])) - if cfg.model == 'icon-art' or cfg.model == 'icon-art-oem': - chem_file = os.path.join( - cfg.icon_input_icbc, cfg.chem['prefix'] + - time.strftime(cfg.chem_nameformat)) - datafile_list_chem.append(chem_file + cfg.chem['suffix']) - if meteo_file.endswith('00'): - datafile_list.append(meteo_file + cfg.meteo['suffix']) - else: - datafile_list_rest.append(meteo_file + cfg.meteo['suffix']) - datafile_list = ' '.join([str(v) for v in datafile_list]) - datafile_list_rest = ' '.join([str(v) for v in datafile_list_rest]) - datafile_list_chem = ' '.join([str(v) for v in datafile_list_chem]) - - #----------------------------------------------------- - # Write and submit runscripts - #----------------------------------------------------- - for runscript in cfg.icontools_runjobs: - logfile = os.path.join(cfg.log_working_dir, 'prepare_data') - logfile_finish = os.path.join(cfg.log_finished_dir, - 'prepare_data') - with open(os.path.join(cfg.case_path, - runscript)) as input_file: - to_write = input_file.read() - output_run = os.path.join(cfg.icon_work, "%s.job" % runscript) - with open(output_run, "w") as outf: - outf.write( - to_write.format(cfg=cfg, - meteo=cfg.meteo, - logfile=logfile, - logfile_finish=logfile_finish, - datafile_list=datafile_list, - datafile_list_rest=datafile_list_rest, - datafile_list_chem=datafile_list_chem)) - logging.info(f" Starting icontools runscript {runscript}.") - result = subprocess.run([ - "sbatch", "--wait", - os.path.join(cfg.icon_work, "%s.job" % runscript) - ]) - exitcode = result.returncode - if exitcode != 0: - raise RuntimeError( - "sbatch returned exitcode {}".format(exitcode)) - logging.info(f"{runscript} successfully executed.") - - #----------------------------------------------------- - # Add GEOSP to all meteo files - #----------------------------------------------------- - for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim, - cfg.meteo['inc']): - # Specify file names - geosp_filename = time.replace( - hour=0).strftime(cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + '_lbc.nc' - geosp_file = os.path.join(cfg.icon_input_icbc, geosp_filename) - src_filename = time.strftime( - cfg.meteo['prefix'] + cfg.meteo['nameformat']) + '_lbc.nc' - src_file = os.path.join(cfg.icon_input_icbc, src_filename) - merged_filename = time.strftime( - cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + '_merged.nc' - merged_file = os.path.join(cfg.icon_input_icbc, - merged_filename) - - # Copy GEOSP file from last run if not present - if not os.path.exists(geosp_file): - geosp_src_file = os.path.join(cfg.icon_input_icbc_prev, - geosp_filename) - tools.copy_file(geosp_src_file, - cfg.icon_input_icbc, - output_log=True) - - # Load GEOSP data array as da_geosp at time 00: - ds = xr.open_dataset(src_file) - ds_geosp = xr.open_dataset(geosp_file) - da_geosp = ds_geosp['GEOSP'] - - # Merge GEOSP-dataset with other timesteps - if (time.hour != 0): - # Change values of time dimension to current time - da_geosp = da_geosp.assign_coords( - time=[np.datetime64(time)]) - # Merge GEOSP into temporary file - ds_merged = xr.merge([ds, da_geosp]) - ds_merged.attrs = ds.attrs - ds_merged.to_netcdf(merged_file) - # Logging info for merging GEOSP - logging.info("Added GEOSP to file {}".format(merged_file)) - # Rename file to get original file name - tools.rename_file(merged_file, src_file) - - #----------------------------------------------------- - # Add Q (copy of QV) and/or PS to initial file - #----------------------------------------------------- - if cfg.model.startswith('icon-art'): - meteo_file = os.path.join( - cfg.icon_input_icbc, - cfg.startdate_sim.strftime(cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + - '.nc') - if os.path.isfile(meteo_file): - merged_file = os.path.join( - cfg.icon_input_icbc, - cfg.startdate_sim.strftime(cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + - '_merged.nc') - ds = xr.open_dataset(meteo_file) - merging = False - if 'PS' not in ds: - if 'LNPS' not in ds: - raise KeyError( - f"'LNPS' must be found in the initial conditions file {meteo_file}" - ) - merging = True - ds['PS'] = ds['LNPS'] - ds['PS'].attrs = ds['LNPS'].attrs - ds['PS'] = np.exp(ds['PS']) - ds['PS'] = ds['PS'].squeeze(dim='lev_2') - ds['PS'].attrs["long_name"] = 'surface pressure' - ds['PS'].attrs['units'] = 'Pa' - logging.info(f"Added PS to file {meteo_file}") - if 'Q' not in ds: - merging = True - ds['Q'] = ds['QV'] - logging.info(f"Added Q to file {meteo_file}") - if merging: - ds.to_netcdf(merged_file) - tools.rename_file(merged_file, meteo_file) - - #----------------------------------------------------- - # In case of OEM: merge chem tracers with meteo-files - #----------------------------------------------------- - if cfg.model == 'icon-art-oem': - for time in tools.iter_hours(cfg.startdate_sim, - cfg.enddate_sim, - cfg.meteo['inc']): - if time == cfg.startdate_sim: - #------------ - # Merge IC: - #------------ - meteo_file = os.path.join( - cfg.icon_input_icbc, - time.strftime(cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + '.nc') - if os.path.isfile(meteo_file): - chem_file = os.path.join( - cfg.icon_input_icbc, cfg.chem['prefix'] + - time.strftime(cfg.chem['nameformat']) + '.nc') - merged_file = os.path.join( - cfg.icon_input_icbc, - time.strftime(cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + - '_merged.nc') - ds_meteo = xr.open_dataset(meteo_file) - ds_chem = xr.open_dataset(chem_file) - # LNPS --> PS - ds_chem['PS'] = ds_chem['LNPS'] - ds_chem['PS'].attrs = ds_chem['LNPS'].attrs - ds_chem['PS'] = ds_chem['PS'].squeeze(dim='lev_2') - ds_chem['PS'].attrs[ - "long_name"] = 'surface pressure' - # merge: - ds_merged = xr.merge([ds_meteo, ds_chem], - compat="override") - #ds_merged.attrs = ds.attrs - ds_merged.to_netcdf(merged_file) - # Rename file to get original file name - tools.rename_file(merged_file, meteo_file) - tools.remove_file(chem_file) - logging.info( - "Added chemical tracer to file {}".format( - merged_file)) - - #------------ - # Merge LBC: - #------------ - meteo_file = os.path.join( - cfg.icon_input_icbc, - time.strftime(cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + '_lbc.nc') - chem_file = os.path.join( - cfg.icon_input_icbc, cfg.chem['prefix'] + - time.strftime(cfg.chem_nameformat) + '_lbc.nc') - merged_file = os.path.join( - cfg.icon_input_icbc, - time.strftime(cfg.meteo['prefix'] + - cfg.meteo['nameformat']) + '_merged.nc') - ds_meteo = xr.open_dataset(meteo_file) - ds_chem = xr.open_dataset(chem_file) - # LNPS --> PS - ds_chem['PS'] = ds_chem['LNPS'] - ds_chem['PS'].attrs = ds_chem['LNPS'].attrs - ds_chem['PS'].attrs["long_name"] = 'surface pressure' - ds_chem['TRCH4_chemtr'] = ds_chem['CH4_BG'] - # merge: - ds_merged = xr.merge([ds_meteo, ds_chem], - compat="override") - #ds_merged.attrs = ds.attrs - ds_merged.to_netcdf(merged_file) - # Rename file to get original file name - tools.rename_file(merged_file, meteo_file) - tools.remove_file(chem_file) - logging.info( - "Added chemical tracer to file {}".format(merged_file)) - - # If COSMO (and not ICON): - else: - logging.info('COSMO analysis data for IC/BC') - - dest_path = os.path.join(cfg.int2lm_input, 'meteo') - tools.create_dir(dest_path, "meteo input") - - source_nameformat = cfg.meteo['nameformat'] - if cfg.meteo['prefix'] == 'lffd': - # nested runs use cosmoart-output as meteo data - # have to copy the *c.nc-file - src_file = os.path.join( - cfg.meteo['dir'], - cfg.startdate_sim.strftime(source_nameformat + 'c.nc')) - - tools.copy_file(src_file, dest_path, output_log=True) - - logging.info("Copied constant-param file from {} to {}".format( - src_file, dest_path)) - - # extend nameformat with ending to match cosmo-output - source_nameformat += '.nc' - - if cfg.meteo['prefix'] == 'efsf': - source_nameformat = cfg.meteo['prefix'] + '%y%m%d%H' - - num_steps = 0 - meteo_dir = cfg.meteo['dir'] - subdir = os.path.join(meteo_dir, - cfg.startdate_sim.strftime('%y%m%d%H')) - for time in tools.iter_hours(cfg.startdate_sim, cfg.enddate_sim, - cfg.meteo['inc']): - dest_path = os.path.join(cfg.int2lm_input, 'meteo') - src_file = os.path.join(meteo_dir, - time.strftime(source_nameformat)) - - if cfg.meteo['prefix'] == 'efsf': - if time == cfg.startdate_sim: - src_file = os.path.join(subdir, - 'eas' + time.strftime('%Y%m%d%H')) - if not os.path.isfile(src_file) and cfg.meteo.get('dir_alt') \ - is not None: - meteo_dir = cfg.meteo['dir_alt'] - subdir = os.path.join( - meteo_dir, cfg.startdate_sim.strftime('%y%m%d%H')) - src_file = os.path.join( - subdir, 'eas' + time.strftime('%Y%m%d%H')) - dest_path = os.path.join(cfg.int2lm_input, 'meteo', - cfg.meteo['prefix'] + '00000000') - else: - td = time - cfg.startdate_sim - timedelta(hours=6 * - num_steps) - days = str(td.days).zfill(2) - hours = str(td.seconds // 3600).zfill(2) - td_total = time - cfg.startdate_sim - days_total = str(td_total.days).zfill(2) - hours_total = str(td_total.seconds // 3600).zfill(2) - - src_file = os.path.join( - subdir, cfg.meteo['prefix'] + days + hours + '0000') - dest_path = os.path.join( - cfg.int2lm_input, 'meteo', cfg.meteo['prefix'] + - days_total + hours_total + '0000') - - # Next time, change directory - checkdir = os.path.join(meteo_dir, - time.strftime('%y%m%d%H')) - if os.path.isdir(checkdir): - num_steps += 1 - subdir = checkdir - elif cfg.meteo.get('dir_alt') is not None: - checkdir = os.path.join(cfg.meteo['dir_alt'], - time.strftime('%y%m%d%H')) - if os.path.isdir(checkdir): - num_steps += 1 - subdir = checkdir - meteo_dir = cfg.meteo['dir_alt'] - logging.info( - "Switching to other input directory from {} to {}" - .format(cfg.meteo['dir'], - cfg.meteo['dir_alt'])) - elif not os.path.exists(src_file): - # special case for MeteoSwiss COSMO-7 data - archive = '/store/mch/msopr/owm/COSMO-7' - yy = time.strftime("%y") - path = '/'.join([archive, 'ANA' + yy]) - src_file = os.path.join(path, time.strftime(source_nameformat)) - - # copy meteo file from project folder to - tools.copy_file(src_file, dest_path, output_log=True) - - logging.info("Copied file from {} to {}".format( - src_file, dest_path)) - - # Other IC/BC data - inv_to_process = [] - if cfg.model == 'cosmo-ghg': - try: - CAMS = dict(fullname="CAMS", - nickname="cams", - executable="cams4int2cosmo", - indir=cfg.cams['dir_orig'], - outdir=cfg.cams['dir_proc'], - param=[{ - 'inc': cfg.cams['inc'], - 'suffix': cfg.cams['suffix'] - }]) - inv_to_process.append(CAMS) - except AttributeError: - pass - try: - CT = dict(fullname="CarbonTracker", - nickname="ct", - executable="ctnoaa4int2cosmo", - indir=cfg.ct_dir_orig, - outdir=cfg.ct_dir_proc, - param=cfg.ct_parameters) - inv_to_process.append(CT) - except AttributeError: - pass - elif cfg.model == 'cosmo-art': - try: - MOZART = dict(fullname='MOZART', - nickname='mozart', - executable='mozart2int2lm', - indir=cfg.mozart_file_orig, - outdir=cfg.mozart_dir_proc, - param=[{ - 'inc': cfg.mozart_inc, - 'suffix': cfg.mozart_prefix - }]) - inv_to_process.append(MOZART) - except AttributeError: - pass - - if cfg.model == 'cosmo-ghg' or cfg.model == 'cosmo-art': - logging.info("Processing " + - ", ".join([i["fullname"] - for i in inv_to_process]) + " data") - - scratch_path = os.path.join(cfg.int2lm_input, 'icbc') - tools.create_dir(scratch_path, "icbc input") - - for inv in inv_to_process: - logging.info(inv["fullname"] + " files") - tools.create_dir(inv["outdir"], "processed " + inv["fullname"]) - - for p in inv["param"]: - inc = p["inc"] - for time in tools.iter_hours(cfg.startdate_sim, - cfg.enddate_sim, inc): - logging.info(time) - - filename = os.path.join( - inv["outdir"], p["suffix"] + "_" + - time.strftime("%Y%m%d%H") + ".nc") - if not os.path.exists(filename): - logging.info(filename) - try: - to_call = getattr(tools, inv["executable"]) - to_call.main(time, inv["indir"], inv["outdir"], - p) - except: - logging.error("Preprocessing " + - inv["fullname"] + " data failed") - raise - - # copy to (temporary) run input directory - tools.copy_file(filename, - scratch_path, - output_log=True) - - logging.info("OK") diff --git a/jobs/prepare_icon.py b/jobs/prepare_icon.py new file mode 100644 index 00000000..ca3d6872 --- /dev/null +++ b/jobs/prepare_icon.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pathlib import Path +import logging +from . import tools + +BASIC_PYTHON_JOB = True + + +def set_cfg_variables(cfg): + cfg.icon_base = cfg.chain_root / 'icon' + cfg.icon_input = cfg.icon_base / 'input' + cfg.icon_input_icbc = cfg.icon_input / 'icbc' + cfg.icon_work = cfg.icon_base / 'run' + cfg.icon_output = cfg.icon_base / 'output' + cfg.icon_output_reduced = cfg.icon_base / 'output_reduced' + cfg.icon_restart_out = cfg.icon_base / 'restart' + if cfg.chunk_id_prev: + cfg.icon_restart_in = cfg.chain_root_prev / 'icon' / 'run' + cfg.icon_input_icbc_prev = cfg.chain_root_prev / 'icon' / 'input' / 'icbc' + + cfg.input_files_scratch = {} + for dsc, file in cfg.input_files.items(): + cfg.input_files[dsc] = (p := Path(file)) + cfg.input_files_scratch[dsc] = cfg.icon_input / p.name + + cfg.create_vars_from_dicts() + + cfg.ini_datetime_string = cfg.startdate.strftime('%Y-%m-%dT%H:00:00Z') + cfg.end_datetime_string = cfg.enddate.strftime('%Y-%m-%dT%H:00:00Z') + + if cfg.lrestart == '.TRUE.': + cfg.restart_filename = 'restart_atm_DOM01.nc' + cfg.restart_file = cfg.icon_restart_in / cfg.restart_filename + cfg.restart_file_scratch = cfg.icon_work / cfg.restart_filename + + # Nudge type (global or nothing) + cfg.nudge_type = 2 if hasattr(cfg, + 'era5') and cfg.era5_global_nudging else 0 + # Time step for global nudging in seconds + cfg.nudging_step_seconds = cfg.nudging_step * 3600 if hasattr( + cfg, 'nudging_step') else None + # Prescribed initial conditions for CH4, CO and/or OH + cfg.iart_init_gas = 4 if hasattr( + cfg, 'species_inicond') and cfg.species_inicond else 0 + + cfg.startdate_sim_yyyymmdd_hh = cfg.startdate_sim.strftime('%Y%m%d_%H') + + +def main(cfg): + """ + **ICON Data Preparation** + + This function prepares input data for ICON simulations by creating necessary directories, + copying meteorological files, and handling specific data processing. + + - Create working directories and copy input files + + Parameters + ---------- + cfg : Config + Object holding all user-configuration parameters as attributes. + + Raises + ------ + RuntimeError + If any subprocess returns a non-zero exit code during execution. + """ + set_cfg_variables(cfg) + tools.change_logfile(cfg.logfile) + + # Create directories + tools.create_dir(cfg.icon_work, "icon_work") + tools.create_dir(cfg.icon_input_icbc, "icon_input_icbc") + tools.create_dir(cfg.icon_output, "icon_output") + tools.create_dir(cfg.icon_restart_out, "icon_restart_out") + + logging.info('Copy ICON input data (IC/BC) to working directory') + # Copy input files to scratch + script_lines = [ + '#!/usr/bin/env bash', + f'#SBATCH --job-name="copy_input_{cfg.casename}_{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}"', + f'#SBATCH --account={cfg.compute_account}', '#SBATCH --time=00:10:00', + f'#SBATCH --partition={cfg.compute_queue}', + f'#SBATCH --constraint={cfg.constraint}', '#SBATCH --nodes=1', + f'#SBATCH --output={cfg.logfile}', '#SBATCH --open-mode=append', + f'#SBATCH --chdir={cfg.icon_work}', '' + ] + for target, destination in zip(cfg.input_files.values(), + cfg.input_files_scratch.values()): + script_lines.append(f'rsync -av {target} {destination}') + + with (script := cfg.icon_work / 'copy_input.job').open('w') as f: + f.write('\n'.join(script_lines)) + + cfg.submit('prepare_icon', script) + logging.info("OK") diff --git a/jobs/reduce_output.py b/jobs/reduce_output.py index c0ef560a..af68b2cc 100644 --- a/jobs/reduce_output.py +++ b/jobs/reduce_output.py @@ -15,15 +15,17 @@ from . import tools +BASIC_PYTHON_JOB = True -def main(startdate, enddate, cfg, model_cfg): + +def main(cfg): """ Calculates 2D column data and writes them into a new netCDF file. Only a fixed number of levels from **COSMO** output are considered. Those files are written into a new directory ``cosmo_output_reduced``. The number of levels is set by the configuration variable - ``cfg.output_levels`` (default = all levels). + ``cfg.reduce_output['output_levels']`` (default = all levels). **Important**: If several ``GRIBOUT`` sections are used to split the output data, then this code only works in case of the following: @@ -39,16 +41,10 @@ def main(startdate, enddate, cfg, model_cfg): Parameters ---------- - starttime : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the starttime - hstop : int - Length of simulation (in hours) - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes """ - + tools.change_logfile(cfg.logfile) cosmo_output = cfg.cosmo_output output_path = cfg.cosmo_output_reduced @@ -73,6 +69,12 @@ def main(startdate, enddate, cfg, model_cfg): # Wait for Cosmo to finish first tools.check_job_completion(cfg.log_finished_dir, "cosmo") + + # Number of levels and switch for unit conversion for 'reduce_output' job + if not hasattr(cfg, 'output_levels'): + cfg.output_levels = -1 + if not hasattr(cfg, 'convert_gas'): + cfg.convert_gas = True """Get list of constant files""" cfiles = [] read_cfile = False @@ -118,16 +120,15 @@ def main(startdate, enddate, cfg, model_cfg): py_file = os.path.join(tool_path, 'reduce_output_start_end.py') alternate_csv_file = os.path.join(cfg.chain_src_dir, 'cases', cfg.casename, 'variables.csv') - logfile = os.path.join(cfg.log_working_dir, 'reduce_output') logging.info('Submitting job to the queue...') result = subprocess.run([ - "sbatch", '--output=' + logfile, '--open-mode=append', '--wait', + "sbatch", '--output=' + cfg.logfile, '--open-mode=append', '--wait', bash_file, py_file, cosmo_output, output_path, str_startdate, str_enddate, - str(cfg.output_levels), + str(cfg.reduce_output['output_levels']), str(output_step), alternate_csv_file, - str(cfg.convert_gas) + str(cfg.reduce_output['convert_gas']) ]) exitcode = result.returncode diff --git a/jobs/tools/ICON_to_point.py b/jobs/tools/ICON_to_point.py new file mode 100644 index 00000000..65371be1 --- /dev/null +++ b/jobs/tools/ICON_to_point.py @@ -0,0 +1,355 @@ +import numpy as np +import xarray as xr +from sklearn.neighbors import BallTree +from scipy import argmin +import argparse + + +def get_horizontal_distances(longitude, latitude, icon_grid_path, k=5): + """ + Get horizontal distances between points and their k nearest + neighbours on the ICON grid using a quick BallTree algorithm + + Parameters + ---------- + longitude : list or 1D np.array + e.g., [12] or np.array([12,13,14]) + + latitude : list or 1D np.array + e.g., [52] or np.array([52,53,54]) + + icon_grid_path : str + Contains the path to the ICON grid + + k : int, default is 5 + Sets the number of nearest neighbours desired + + Returns + ------- + distances: 2D np.array + Contains the distance-on-a-sphere between the target point(s) + and its nearest neighbours + + indices: 2D np.array + Contains the indices to the ICON grid cells of the corresponding + nearest neighbours + """ + # Get ICON grid specifics + icon_grid = xr.open_dataset(icon_grid_path) + clon = icon_grid.clon.values + clat = icon_grid.clat.values + + # Generate BallTree + icon_lat_lon = np.column_stack([clat, clon]) + tree = BallTree(icon_lat_lon, metric='haversine') + + # Query BallTree + target_lat_lon = np.column_stack( + [np.deg2rad(latitude), np.deg2rad(longitude)]) + (distances, indices) = tree.query(target_lat_lon, + k=k, + return_distance=True) + + if np.any(distances == 0): + print( + 'The longitude/latitude coincides identically with an ICON cell, which is an issue for the inverse distance weighting.' + ) + print('I will slightly modify this value to avoid errors.') + distances[distances == 0] = 1e-12 + + if np.any(distances is np.nan): + raise ValueError( + 'The distance between ICON and your lat/lon point could not be established...' + ) + + # NB: the 'distances' are in units of radians; i.e., it assumes the Earth is a unit sphere! + # To get realistic distances, you need to multiply 'distances' with 6371e3 meters, i.e., the + # radius of the earth. However, such a constant factor cancels out when we compute the + # horizontal interpolation weights (which are normalized!), so there is no need to apply the + # multiplication with 6371e3. + + return distances, indices + + +def get_nearest_vertical_distances(model_topography, model_levels, + base_height_msl, inlet_height_agl, + interpolation_strategy): + """ + Get the 2 nearest distances between ICON grid points and specified + station altitudes + + Parameters + ---------- + model_topography : 1D np.array + This is the elevation over mean sea level of the ICON grid + + model_levels : 2D np.array + Dimensions [ICON_heights, number_of_samples] + + base_height_msl : list or 1D np.array + e.g., [20,] or np.array([72,180,40]) + + inlet_height_agl : list or 1D np.array + e.g., [15,] or np.array([15, 21, 42]) + + interpolation_strategy : list of strings + e.g., ['ground',] or ['ground','mountain','ground'] + Can be 'ground' or 'mountain', or 'middle' (the latter is between the ground and mountain approach) + 'ground' uses the model topography + station altitude over ground + 'mountain' uses the absolute altitude over mean sea level + + Returns + ------- + vertical_distances : 3D np.array + Contains the absolute (!) distance between the target point(s) + and its 2 nearest neighbour levels + + vertical_indices: 3D np.array + Contains the indices to the ICON height levels of the corresponding 2 + nearest neighbour levels + """ + # Get the target sampling altitude with a list comprehension + target_altitude = [ + model_topography.isel({ + "station": i + }).values + inlet_height_agl[i] if strategy == 'ground' else + np.repeat(base_height_msl[i], model_topography.shape[1]) + + inlet_height_agl[i] if strategy == 'mountain' else + np.repeat(base_height_msl[i], model_topography.shape[1]) / 2 + + model_topography.isel({ + "station": i + }).values / 2 + inlet_height_agl[i] + # if strategy=='middle' + for (i, strategy) in enumerate(interpolation_strategy) + ] + target_altitude = xr.DataArray(target_altitude, dims=['station', 'ncells']) + + # Select 2 closest neighbouring levels + first_negative = (model_levels <= target_altitude).argmax( + dim=model_levels.dims[0]) # First index where model lies below target + vertical_indices = np.stack( + [first_negative, first_negative - 1], + axis=0) # Second index thus lies /above/ the target + vertical_indices[:, first_negative == 0] = model_levels.values.shape[ + 0] - 1 # If no result found: sample lies below lowest model level. Set it to the lowest model level + + # Sample the corresponding vertical distances between the target and the model levels + vertical_distances = np.take_along_axis( + (model_levels - target_altitude).values, vertical_indices, axis=0) + + return np.abs(vertical_distances).T, vertical_indices.T + + +def icon_to_point(longitude, + latitude, + inlet_height_agl, + base_height_msl, + icon_field_path, + icon_grid_path, + interpolation_strategy, + k=5, + field_name=None): + """ + Function to interpolate ICON fields to point locations + + Parameters + ---------- + longitude : list or 1D np.array + e.g., [12,] or np.array([12,13,14]) + + latitude : list or 1D np.array + e.g., [52,] or np.array([52,53,54]) + + inlet_height_agl : list or 1D np.array + e.g., [20,] or np.array([72,180,40]) + This is the height of the *base station over mean sea level* + (e.g., for Cabau: base_height_msl=0, + inlet_height_agl=27) + + base_height_msl : list or 1D np.array + e.g., [15,] or np.array([15, 21, 42]) + This is the altitude of the *station above the ground* + (e.g., for Jungfraujoch: base_height_msl=3850, + inlet_height_agl=5) + + icon_field_path : str + Contains the path to the unstructured ICON output + + icon_grid_path : str + Contains the path to the ICON grid + + interpolation_strategy : list of strings + e.g., ['ground',] or ['ground','mountain','ground'] + Can be 'ground' or 'mountain', or 'middle' (the latter is between the ground and mountain approach) + 'ground' uses the model topography + station altitude over ground + 'mountain' uses the absolute altitude over mean sea level + + k : int, default is 5 + Sets the number of horizontal nearest neighbours desired + + field_name : str, or list of strings, optional + e.g. 'qv', or ['qv','temp'], or None + If no field_name is set, the whole dataset is interpolated + in the vertical and horizontal directions. + + Returns + ------- + xr.Dataset + An Xarray dataset organised by 'station', containing the original + input specifications, and the vertically and horizontally interpolated + values + """ + + # Load dataset + icon_field = xr.open_dataset(icon_field_path) + # Get dimension names + icon_heights = icon_field.z_mc.dims[ + 0] # Dimension name (something like "heights_5") + icon_cells = icon_field.z_mc.dims[ + 1] # Dimension name (something like "ncells") + icon_field[icon_cells] = icon_field[ + icon_cells] # Explicitly assign 'ncells' + + # --- Horizontal grid selection & interpolation weights + # Get k nearest horizontal distances (for use in inverse distance weighing) + horizontal_distances, icon_grid_indices = get_horizontal_distances( + longitude, latitude, icon_grid_path, k=k) + + horizontal_interp = 1 / horizontal_distances / ( + 1 / horizontal_distances).sum(axis=1, keepdims=True) + weights_horizontal = xr.DataArray(horizontal_interp, + dims=["station", icon_cells]) + ind_X = xr.DataArray(icon_grid_indices, dims=["station", icon_cells]) + icon_subset = icon_field.isel({icon_cells: ind_X}) + + # --- Vertical level selection & interpolation weights + # Get 2 nearest vertical distances (for use in linear interpolation) + model_topography = icon_subset.z_ifc[-1] + model_levels = icon_subset.z_mc + vertical_distances, icon_level_indices = get_nearest_vertical_distances( + model_topography, model_levels, inlet_height_agl, base_height_msl, + interpolation_strategy) + + vertical_interp = vertical_distances[:, :, ::-1] / (vertical_distances.sum( + axis=-1, keepdims=True)) + # Say, you have the point's vertical position, and the next two model layers are positioned at [-5, +15] meters offset. + # Then linear interpolation between those two points is simply [15/(15+5), 5/(15+5)]=[3/4 1/4]. That is what the code does (and why it reverses the order on the last axis; and why I only need the absolute vertical distances). + # (As a curiosity, linear interpolation is the same as inverse distance weighting with 2 points. But this formulation is more stable than the inverse distance weighting, as divisions with 0 may otherwise occur!) + + weights_vertical = xr.DataArray(vertical_interp, + dims=["ncells", "station", icon_heights]) + ind_Z = xr.DataArray(icon_level_indices, + dims=["ncells", "station", icon_heights]) + + # --- Generate output + # Subset the ICON field if we want only a few fields of output + if field_name is not None: + icon_subset = icon_subset[field_name] + # Include the input station parameters in the output + ds = xr.Dataset({ + 'longitude': (['station'], longitude), + 'latitude': (['station'], latitude), + 'inlet_height_agl': (['station'], inlet_height_agl), + 'base_height_msl': (['station'], base_height_msl), + 'interpolation_strategy': (['station'], interpolation_strategy) + }) + # Perform the interpolations + icon_subset = icon_subset.isel({icon_heights: ind_Z}) + icon_out = icon_subset.weighted(weights_vertical.fillna(0)).sum( + dim=icon_heights, + skipna=True).weighted(weights_horizontal).sum(dim=icon_cells) + icon_out = icon_out.where( + ~(weights_vertical.sum(dim=[icon_cells, icon_heights], + skipna=False)).isnull() + ) # Remove out of bounds values where weights_vertical has NaNs + return xr.merge([icon_out, ds]) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Interpolate ICON output to point locations.') + parser.add_argument('-lon', + dest='longitude', + default=None, + type=float, + help='Longitude of interpolation target') + parser.add_argument('-lat', + dest='latitude', + default=None, + type=float, + help='Latitude of interpolation target') + parser.add_argument( + '-asl', + dest='elevation', + default=None, + type=float, + help= + 'Station surface elevation above sea level [absolute height asl: elevation+altitude]' + ) + parser.add_argument( + '-alt', + dest='altitude', + default=None, + type=float, + help= + 'Station altitude over surface [absolute height asl: elevation+altitude]' + ) + parser.add_argument('-fields', + dest='icon_field', + default=None, + type=str, + help='The ICON output fields') + parser.add_argument('-grid', + dest='icon_grid', + default=None, + type=str, + help='The ICON grid dynamic grid file') + parser.add_argument( + '-strat', + dest='strategy', + default='ground', + type=str, + help= + 'The interpolation strategy (should be "mountain", "ground", or "middle")' + ) + parser.add_argument( + '-k', + dest='k', + default=4, + type=int, + help='Number of nearest neighbours to interpolate with (e.g., 4 or 5)') + parser.add_argument( + '-field_name', + dest='field_name', + default=None, + type=str, + help='Field name to extract (if left out, all variables are extracted)' + ) + parser.add_argument('-output', + dest='output_dest', + default=None, + type=str, + help='Output NetCDF destination') + args = parser.parse_args() + + # Example run (note: most inputs should be lists, and the performance is optimized for these lists!) + output = icon_to_point(longitude=[ + args.longitude, + ], + latitude=[ + args.latitude, + ], + inlet_height_agl=[ + args.elevation, + ], + base_height_msl=[ + args.altitude, + ], + icon_field_path=args.icon_field, + icon_grid_path=args.icon_grid, + interpolation_strategy=[ + args.strategy, + ], + k=args.k, + field_name=args.field_name) + output.to_netcdf(args.output_dest) diff --git a/jobs/tools/__init__.py b/jobs/tools/__init__.py index 6df6546c..15f39ead 100644 --- a/jobs/tools/__init__.py +++ b/jobs/tools/__init__.py @@ -166,14 +166,18 @@ def change_logfile(filename): """ fileh = logging.FileHandler(filename, 'a', delay=True) - # log_format = logging.Formatter('%(levelname)s:%(message)s') - # fileh.setFormatter(log_format) + log_format = logging.Formatter('%(levelname)s: %(message)s') + fileh.setFormatter(log_format) log = logging.getLogger() # root logger + log.setLevel(logging.INFO) # Set the desired logging level + if len(log.handlers) > 0: - log.handlers = [fileh] # set the new handler + # If there are existing handlers, replace them with the new handler + log.handlers = [fileh] else: - logging.basicConfig(filename=filename, level=logging.INFO) + # If no existing handlers, add the new handler + log.addHandler(fileh) def create_dir(path, readable_name): @@ -195,6 +199,7 @@ def create_dir(path, readable_name): """ try: os.makedirs(path, exist_ok=True) + logging.info(f"Created {readable_name} directory at path {path}") except (OSError, Exception) as e: logging.error("Creating {} directory at path {} failed with {}".format( readable_name, path, @@ -416,11 +421,11 @@ def check_job_completion(log_finished_dir, job, waittime=3000): Parameters ---------- - cfg : config-object + cfg : Config log_finished_dir : directory for logfiles of finished jobs - job: string of job name, e.g. "meteo" + job: string of job name, e.g. "prepare_icon" waittime : time to wait (factor of .1 second) Defaults to 3000 (300 seconds) diff --git a/jobs/tools/check_model.py b/jobs/tools/check_model.py index 9a1b29c1..b7c9c409 100644 --- a/jobs/tools/check_model.py +++ b/jobs/tools/check_model.py @@ -5,18 +5,19 @@ def check_model(cfg, model='COSMO'): """Check that the model specified in cfg matched the prescribed model. - Check that cfg.model == model. If not, raises a value-error. + Check that cfg.workflow_name == model. If not, raises a value-error. Ignores capitalization of the strings Parameters ---------- - cfg : config-object + cfg : Config + Object holding all user-configuration parameters as attributes. model : str Prescribed model """ #don't care about capitalization - if not cfg.model.lower() == model.lower(): + if not cfg.workflow_name.lower() == model.lower(): raise ValueError("The model specified in the configuration file is {}" ", but the job only applies to {}.".format( - cfg.model, model)) + cfg.workflow_name, model)) diff --git a/jobs/tools/write_cosmo_input_ghg.py b/jobs/tools/write_cosmo_input_ghg.py index e0a11ef3..f17804e0 100644 --- a/jobs/tools/write_cosmo_input_ghg.py +++ b/jobs/tools/write_cosmo_input_ghg.py @@ -96,6 +96,8 @@ def main(csv_filename, namelist_filename, cfg=None): Path to the source csv-file namelist_filename : str Path to the namelist file that will be created + cfg : Config + Object holding all user-configuration parameters as attributes. """ with open(csv_filename, 'r') as csv_file: diff --git a/jobs/verify_chain.py b/jobs/verify_chain.py index 61e31365..05b537a9 100644 --- a/jobs/verify_chain.py +++ b/jobs/verify_chain.py @@ -1,17 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Job to verify the correctness of the chain. The output of the example-case is -# compared to a reference output. - -# Author: dao, david.ochsner@empa.ch - import os import logging import netCDF4 as nc from . import tools +BASIC_PYTHON_JOB = True + def comp_data(dataset1, dataset2, variables): """Use tools.helper.datasets_equal to compare the datasets. @@ -19,52 +16,50 @@ def comp_data(dataset1, dataset2, variables): tools.helper.datasets_equal(dataset1, dataset2, variables, verbose=True) -def main(cfg, model_cfg): +def main(cfg): """Compare outputs of the chain to a reference. - Looks for the reference-file in ``cfg.reference_dir``. + Looks for the reference-file in ``cfg.verify_chain['reference_dir']``. - Looks for the output file in ``cfg.output_dir`` (if not ``None``), else it + Looks for the output file in ``cfg.verify_chain['output_dir']`` (if not ``None``), else it goes to the output directory created by the **COSMO**-job. - In the dict ``cfg.values_to_check``, the user specifies the names of the + In the dict ``cfg.verify_chain['values_to_check']``, the user specifies the names of the files to be compared as keys, and the variables to compare as a list. To compare the temperatures of the last output of the example case, the - following variables should be added to the ``config.py`` file: :: + following variables should be added to the ``config.yaml`` file: :: - reference_dir = os.path.join(input_root, "reference_output") - output_dir = None - values_to_check = {("reference_lffd2015010200.nc","lffd2015010200.nc"): + verify_chain['reference_dir'] = os.path.join(input_root, "reference_output") + verify_chain['output_dir'] = None + verify_chain['values_to_check'] = {("reference_lffd2015010200.nc","lffd2015010200.nc"): ['T']} Parameters ---------- - start_time : datetime-object - The starting date of the simulation - hstart : int - Offset (in hours) of the actual start from the start_time - hstop : int - Length of simulation (in hours) - cfg : config-object + cfg : Config Object holding all user-configuration parameters as attributes """ + tools.change_logfile(cfg.logfile) logging.info("Started verification") - for (ref_file, run_file), variables in cfg.values_to_check.items(): + for (ref_file, + run_file), variables in cfg.verify_chain['values_to_check'].items(): logging.info("Comparing " + str(variables)) # reference file location - ref_file_path = os.path.join(cfg.reference_dir, ref_file) + ref_file_path = os.path.join(cfg.verify_chain['reference_dir'], + ref_file) # run data location - if cfg.output_dir is None: + if cfg.verify_chain['output_dir'] is None: # Standard output location run_file_path = os.path.join( cfg.output_root, cfg.startdate_sim_yyyymmddhh + "_" + cfg.enddate_sim_yyyymmddhh, "cosmo_output", run_file) else: # User-provided output location - run_file_path = os.path.join(cfg.output_dir, run_file) + run_file_path = os.path.join(cfg.verify_chain['output_dir'], + run_file) logging.info("Output file: " + str(run_file_path)) logging.info("Reference file: " + str(ref_file_path)) diff --git a/run_chain.py b/run_chain.py index 743b54c2..2a6a43b0 100755 --- a/run_chain.py +++ b/run_chain.py @@ -2,32 +2,29 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta - +import pytz import logging -import os -import sys -import time import shutil import argparse -import yaml import jobs from jobs import tools +from config import Config def parse_arguments(): - """Parse command line arguments for the processing chain script. + """Parse command line arguments for the Processing Chain script. Parses and retrieves command line arguments, allowing users to specify run identifiers, jobs to execute, and various options to control the - execution of the processing chain. + execution of the Processing Chain. Returns ------- argparse.Namespace A namespace object containing parsed command line arguments. """ - parser = argparse.ArgumentParser(description="Run the processing chain.") + parser = argparse.ArgumentParser(description="Run the Processing Chain.") parser.add_argument("casenames", nargs='+', @@ -36,15 +33,15 @@ def parse_arguments(): "to be in cases//. The runs are executed " "sequentially in the order they're given here.") - jobs_help = ("List of job-names to be executed. A job is a .py-" + jobs_help = ("List of job names to be executed. A job is a .py " "file in jobs/ with a main()-function which " - "handles one aspect of the processing chain, for " + "handles one aspect of the Processing Chain, for " "example copying meteo-input data or launching a " "job for int2lm. " "Jobs are executed in the order in which they are " "given here. " "If no jobs are given, default jobs will be executed" - "as defined in config/models.yaml.") + "as defined in config/workflows.yaml.") parser.add_argument("-j", "--jobs", nargs='*', @@ -52,27 +49,45 @@ def parse_arguments(): help=jobs_help, default=None) + chunks_help = ("List of chunks to be executed. A chunk is time" + "frame within the total simulation period." + "It has the format `YYYYMMDDHH_YYYYMMDDHH`." + "If no chunks are given, all chunks within the" + "simulation period will be executed.") + parser.add_argument("-c", + "--chunks", + nargs='*', + dest="chunk_list", + help=chunks_help, + default=None) + + sync_help = ("Force synchronous execution.") + parser.add_argument("-s", + "--force-sync", + action='store_true', + help=sync_help) + + no_logging_help = ("Disable logging for chain_status.log.") + parser.add_argument("--no-logging", + action='store_false', + dest="enable_logging", + default=True, + help=no_logging_help) + force_help = ("Force the processing chain to redo all specified jobs," " even if they have been started already or were finished" " previously. WARNING: Only logfiles get deleted," " other effects of a given job (copied files etc.)" - " are simply overwritten. This may cause errors.") + " are simply overwritten. This may cause errors" + " or unexpected behavior.") parser.add_argument("-f", "--force", action='store_true', help=force_help) - tries_help = ("Amount of time the cosmo job is re-tried before crashing." - " Default is 1.") - parser.add_argument("-t", - "--try", - help=tries_help, - dest="ntry", - type=int, - default=1) - resume_help = ( - "Resume the processing chain by restarting the last unfinished job." + "Resume the Processing Chain by restarting the last unfinished job." " WARNING: Only the logfile gets deleted," " other effects of a given job (copied files etc.)" - " are simply overwritten. This may cause errors.") + " are simply overwritten. This may cause errors." + " or unexpected behavior.") parser.add_argument("-r", "--resume", help=resume_help, @@ -84,355 +99,16 @@ def parse_arguments(): return args -class Config(): - - def __init__(self, casename): - """Initialize an instance of the Config class. - - Initializes an instance of the Config class with user-specific - and default attributes. The class represents a processing chain for a - particular case, and its attributes are populated based on the provided - `casename`. - - Parameters - ---------- - casename : str - The identifier for the case, typically specifying the configuration - and settings to be used in the processing chain. - - Attributes - ---------- - user_name : str - The username of the current user, obtained from the 'USER' environment variable. - email : str - The user's email address, initially set to None and updated using the `set_email` method. - casename : str - The specified case name for the processing chain. - chain_src_dir : str - The source directory for the processing chain, typically the current working directory. - case_path : str - The path to the case directory under 'cases/' for the specified `casename`. - work_root : str - The root directory for processing chain execution, typically located under the source directory. - - Notes - ----- - The method also loads user-defined attributes from the configuration file, - sets specific settings based on the node type ('gpu' or 'mc'), and initializes - other instance-specific attributes. - """ - # Global attributes (initialized with default values) - self.user_name = os.environ['USER'] - self.set_email() - self.casename = casename - self.set_account() - - self.chain_src_dir = os.getcwd() - self.case_path = os.path.join(self.chain_src_dir, 'cases', - self.casename) - self.work_root = os.path.join(self.chain_src_dir, 'work') - - # User-defined attributes from config file - self.load_config_file(casename) - - # Specific settings based on the node type ('gpu' or 'mc') - self.set_node_info() - - def load_config_file(self, casename): - """Load configuration settings from a YAML file and set them as attributes. - - This method reads the configuration settings from a YAML file located in - the 'cases/casename' directory and sets them as attributes of the instance. - - Parameters - ---------- - casename : str - Name of the folder in 'cases/' where the configuration files are stored. - - Returns - ------- - Config - The same `Config` instance with configuration settings as attributes. - - Raises - ------ - FileNotFoundError - If the specified configuration file or case directory is not found. - - Notes - ----- - If the configuration file does not exist, the method will attempt to suggest - a similar case directory based on a Levenshtein distance comparison with - existing case directories. The method directly assigns values from the - configuration file to instance attributes for easy access. - """ - cfg_file = os.path.join('cases', casename, 'config.yaml') - - if not os.path.isfile(cfg_file): - all_cases = [ - path.name for path in os.scandir('cases') if path.is_dir() - ] - closest_name = min([(tools.levenshtein(casename, name), name) - for name in all_cases], - key=lambda x: x[0])[1] - raise FileNotFoundError( - f"Case-directory '{casename}' not found, did you mean '{closest_name}'?" - ) - - try: - with open(cfg_file, 'r') as yaml_file: - cfg_data = yaml.load(yaml_file, Loader=yaml.FullLoader) - except FileNotFoundError: - raise FileNotFoundError( - f"No file 'config.yaml' in {os.path.dirname(cfg_file)}") - - # Directly assign values to instance attributes - for key, value in cfg_data.items(): - setattr(self, key, value) - - return self - - def set_account(self): - """Set the compute account based on user information. - - This method determines the compute account to be used based on the user's - name and system configuration. - - Returns - ------- - Config - The same `Config` instance with the `compute_account` attribute set. - - Notes - ----- - - If the user name is 'jenkins', the compute account is set to 'g110' for - Jenkins testing. - - If an account is specified in the user's '~/.acct' file, it will be used - as the compute account. - - If neither of the above conditions is met, the standard account is - determined using the 'id -gn' command. - """ - if self.user_name == 'jenkins': - # g110 account for Jenkins testing - self.compute_account = 'g110' - elif os.path.exists(os.environ['HOME'] + '/.acct'): - # Use account specified in ~/.acct file - with open(os.environ['HOME'] + '/.acct', 'r') as file: - self.compute_account = file.read().rstrip() - else: - # Use standard account - self.compute_account = os.popen("id -gn").read().splitlines()[0] - - return self - - def set_node_info(self): - """Set node-specific information based on configuration settings. - - This method configures node-specific settings, such as the number of tasks - per node and CUDA-related environment variables, based on the provided - configuration settings in the instance. - - Returns - ------- - Config - The same `Config` instance with updated node-specific attributes. - - Raises - ------ - ValueError - If the 'constraint' or 'run_on' configuration values are invalid. - """ - if self.constraint == 'gpu': - if self.model.startswith('icon'): - if self.run_on == 'gpu': - self.ntasks_per_node = 1 - elif self.run_on == 'cpu': - self.ntasks_per_node = 12 - else: - raise ValueError( - "Invalid value for 'run_on' in the configuration." - "It should be either 'gpu' or 'cpu'.") - else: - self.ntasks_per_node = 12 - self.mpich_cuda = ('export MPICH_RDMA_ENABLED_CUDA=1\n' - 'export MPICH_G2G_PIPELINE=256\n' - 'export CRAY_CUDA_MPS=1\n') - elif self.constraint == 'mc': - self.ntasks_per_node = 36 - self.mpich_cuda = '' - else: - raise ValueError( - "Invalid value for 'constraint' in the configuration." - "It should be either 'gpu' or 'mc'.") - - return self - - def set_restart_step_hours(self): - """Set the restart step in hours. - - Converts the 'restart_step' attribute, which is in ISO8601 duration format, - to hours and stores the result in the 'restart_step_hours' attribute. - - Returns - ------- - Config - The same `Config` instance with the 'restart_step_hours' attribute set. - """ - self.restart_step_hours = int( - tools.iso8601_duration_to_hours(self.restart_step)) - - return self - - def set_email(self): - """Set the user's email address based on system configuration. - - This method determines the user's email address based on the user's name - and system configuration. - - Returns - ------- - Config - The same `Config` instance with the `user_mail` attribute set. - - Notes - ----- - - If the user name is 'jenkins', the user's email address is set to None. - - If an email address is specified in the user's '~/.forward' file, it will - be used as the user's email address. - - If neither of the above conditions is met, the user's email address is set - to None. - """ - if self.user_name == 'jenkins': - self.user_mail = None - elif os.path.exists(os.environ['HOME'] + '/.forward'): - with open(os.environ['HOME'] + '/.forward', 'r') as file: - self.user_mail = file.read().rstrip() - else: - self.user_mail = None - - return self - - def print_config(self): - """Print the configuration attributes and their values. - - This method displays the configuration attributes and their corresponding - values in a formatted manner. Lists and dictionaries within the configuration - are also displayed with appropriate indentation. - - Notes - ----- - - The maximum column width for the attribute names is automatically determined. - - The method prints the attribute name, its type, and its value. - - If an attribute is a list, it is displayed with each item indented. - - If an attribute is a dictionary, it is also displayed with each key-value - pair indented. - """ - # max_col_width = max(len(key) for key in vars(self)) + 1 - max_col_width = 27 - - print("\nConfiguration:") - print(f"{'Attribute':<{max_col_width}} Type Value") - print("-" * 80) - for key, value in vars(self).items(): - if isinstance(value, list): - # If the value is a list, format it with indentation - print(f"{key:<{max_col_width}} list") - for item in value: - item_type = type(item).__name__ - print(f" - {item:<{max_col_width-4}} {item_type}") - elif isinstance(value, dict): - # If the value is a dictionary, format it as before - print(f"{key:<{max_col_width}} dict") - for sub_key, sub_value in value.items(): - sub_value_type = type(sub_value).__name__ - print( - f" - {sub_key:<{max_col_width-4}} {sub_value_type:<4} {sub_value}" - ) - else: - # Standard output - key_type = type(key).__name__ - print(f"{key:<{max_col_width}} {key_type:<4} {value}") - - def convert_paths_to_absolute(self): - """Convert relative file paths to absolute paths in the configuration. - - This method iterates through all variables and their dictionary entries in - the configuration and checks for string values that represent file paths. - If a file path is relative (starts with './'), it is converted to an - absolute path using `os.path.abspath`. - - Returns - ------- - Config - The same `Config` instance with relative file paths converted to absolute paths. - """ - # Loop through all variables and their dictionary entries - for attr_name, attr_value in self.__dict__.items(): - if isinstance(attr_value, str): - if os.path.isabs(attr_value): - # If the value is already an absolute path, continue to the next iteration - continue - # Convert relative paths to absolute paths - if attr_value.startswith('./'): - self.__dict__[attr_name] = os.path.abspath(attr_value) - elif isinstance(attr_value, dict): - # If the attribute is a dictionary, loop through its entries - for key, value in attr_value.items(): - if isinstance(value, str): - if os.path.isabs(value): - # If the value is already an absolute path, continue to the next iteration - continue - # Convert relative paths to absolute paths - if value.startswith('./'): - self.__dict__[attr_name][key] = os.path.abspath( - value) - - return self - - def create_vars_from_dicts(self): - """Create instance attributes from dictionary entries in the configuration. - - This method iterates through the instance's attribute dictionary and checks - for dictionary values. For each dictionary encountered, it creates new - instance attributes by concatenating the original attribute name and the - dictionary key, and assigns the corresponding values. - - Returns - ------- - Config - The same `Config` instance with new attributes created from dictionary entries. - """ - # Create a copy of the object's __dict__ to avoid modifying it during iteration - object_dict = vars(self).copy() - - for key, value in object_dict.items(): - if isinstance(value, dict): - for sub_key, sub_value in value.items(): - setattr(self, key + '_' + sub_key, sub_value) - return self - - -def run_chain(work_root, model_cfg, cfg, startdate_sim, enddate_sim, job_names, - force, resume): - """Run the processing chain, managing job execution and logging. - - This function sets up and manages the execution of a processing chain, handling +def run_chunk(cfg, force, resume): + """Run a chunk of the processing chain, managing job execution and logging. + + This function sets up and manages the execution of a Processing Chain, handling job execution, logging, and various configuration settings. Parameters ---------- - work_root : str - The path to the directory where the processing chain writes files during execution. - model_cfg : dict - Configuration settings for the modeling framework. cfg : Config Object holding user-defined configuration parameters as attributes. - startdate_sim : datetime-object - The start date of the simulation. - enddate_sim : datetime-object - The end date of the simulation. - job_names : list of str - List of names of jobs to execute on every timeslice. force : bool If True, it will force the execution of jobs regardless of their completion status. resume : bool @@ -449,194 +125,108 @@ def run_chain(work_root, model_cfg, cfg, startdate_sim, enddate_sim, job_names, - It checks for job completion status and resumes or forces execution accordingly. - Job log files are managed, and errors or timeouts are handled with notifications. """ - # Write current start and end dates to config variables - cfg.startdate_sim = startdate_sim - cfg.enddate_sim = enddate_sim - # Set forecast time cfg.forecasttime = (cfg.enddate_sim - cfg.startdate_sim).total_seconds() / 3600 - # String variables for startdate_sim - cfg.startdate_sim_yyyymmddhh = startdate_sim.strftime('%Y%m%d%H') - cfg.enddate_sim_yyyymmddhh = enddate_sim.strftime('%Y%m%d%H') - - # Folder naming and structure - cfg.job_id = f'{cfg.startdate_sim_yyyymmddhh}_{cfg.enddate_sim_yyyymmddhh}' - cfg.chain_root = os.path.join(work_root, cfg.casename, cfg.job_id) - - # Config variables for spinup runs (datetimes, job-id, etc.) - if hasattr(cfg, 'spinup'): - if cfg.first_one: # first run in spinup - cfg.chain_root_prev = None - else: # consecutive runs in spinup - cfg.startdate_sim_yyyymmddhh = cfg.startdate_sim.strftime( - '%Y%m%d%H') - enddate_sim_yyyymmddhh_prev = ( - cfg.enddate_sim - - timedelta(hours=cfg.restart_step_hours)).strftime('%Y%m%d%H') - - if cfg.second_one: - startdate_sim_yyyymmddhh_prev = (cfg.enddate_sim - timedelta( - hours=2 * cfg.restart_step_hours)).strftime('%Y%m%d%H') - else: # all other runs (i.e., get job_id from previous run) - startdate_sim_yyyymmddhh_prev = ( - cfg.enddate_sim - - timedelta(hours=2 * cfg.restart_step_hours + - cfg.spinup)).strftime('%Y%m%d%H') - - cfg.job_id_prev = f'{startdate_sim_yyyymmddhh_prev}_{enddate_sim_yyyymmddhh_prev}' - cfg.chain_root_prev = os.path.join(work_root, cfg.casename, - cfg.job_id_prev) - cfg.last_cosmo_output = os.path.join(cfg.chain_root_prev, 'cosmo', - 'output') - - # No restart for spinup simulations (= default values for no restart) - cfg.cosmo_restart_out = '' - cfg.cosmo_restart_in = '' - elif 'restart' in model_cfg['models'][cfg.model]['features']: - cfg.startdate_sim_prev = cfg.startdate_sim - timedelta( - hours=cfg.restart_step_hours) - cfg.enddate_sim_prev = cfg.enddate_sim - timedelta( - hours=cfg.restart_step_hours) - cfg.startdate_sim_prev_yyyymmddhh = cfg.startdate_sim_prev.strftime( - '%Y%m%d%H') - cfg.enddate_sim_prev_yyyymmddhh = cfg.enddate_sim_prev.strftime( - '%Y%m%d%H') - - cfg.job_id_prev = f'{cfg.startdate_sim_prev_yyyymmddhh}_{cfg.enddate_sim_prev_yyyymmddhh}' - cfg.chain_root_prev = os.path.join(work_root, cfg.casename, - cfg.job_id_prev) - - # Set restart directories - cfg.cosmo_restart_out = os.path.join(cfg.chain_root, 'cosmo', - 'restart') - cfg.cosmo_restart_in = os.path.join(cfg.chain_root_prev, 'cosmo', - 'restart') - - # Check constraint - if hasattr(cfg, 'constraint'): - assert cfg.constraint in ['gpu', 'mc'], ("Unknown constraint, use" - "gpu or mc") - - # If nested run: use output of mother-simulation - if 'nesting' in model_cfg['models'][ - cfg.model]['features'] and not os.path.isdir(cfg.meteo.dir): - # if ifs_hres_dir doesn't point to a directory, - # it is the name of the mother run - mother_name = cfg.meteo.dir - cfg.meteo.dir = os.path.join(work_root, mother_name, cfg.job_id, - 'cosmo', 'output') - cfg.meteo.inc = 1 - cfg.meteo.prefix = 'lffd' - # Logging - log_working_dir = os.path.join(cfg.chain_root, 'checkpoints', 'working') - log_finished_dir = os.path.join(cfg.chain_root, 'checkpoints', 'finished') - setattr(cfg, 'log_working_dir', log_working_dir) - setattr(cfg, 'log_finished_dir', log_finished_dir) + cfg.chain_root = cfg.work_root / cfg.casename / cfg.chunk_id + cfg.log_working_dir = cfg.chain_root / 'checkpoints' / 'working' + cfg.log_finished_dir = cfg.chain_root / 'checkpoints' / 'finished' # Create working directories tools.create_dir(cfg.chain_root, "chain_root") - tools.create_dir(log_working_dir, "log_working") - tools.create_dir(log_finished_dir, "log_finished") - - # Number of levels and switch for unit conversion for 'reduce_output' job - if not hasattr(cfg, 'output_levels'): - setattr(cfg, 'output_levels', -1) - if not hasattr(cfg, 'convert_gas'): - setattr(cfg, 'convert_gas', True) - - # run jobs (if required) - for job in job_names: - skip = False - - # if exists job is currently worked on or has been finished - if os.path.exists(os.path.join(log_working_dir, job)): - if not force: - while True: - if os.path.exists(os.path.join(log_finished_dir, job)): - print('Skip "%s" for chain "%s"' % (job, cfg.job_id)) - skip = True - break - elif resume: - resume = False - break - else: - print('Wait for "%s" of chain "%s"' % - (job, cfg.job_id)) - sys.stdout.flush() - for _ in range(3000): - time.sleep(0.1) + tools.create_dir(cfg.log_working_dir, "log_working") + tools.create_dir(cfg.log_finished_dir, "log_finished") + + # Config variables for spinup and restart runs + cfg.cosmo_restart_in = '' + cfg.cosmo_restart_out = '' + if hasattr(cfg, 'spinup'): + if cfg.chunk_id_prev: + cfg.chain_root_prev = cfg.work_root / cfg.casename / cfg.chunk_id_prev + cfg.last_cosmo_output = cfg.chain_root_prev / 'cosmo' / 'output' + elif 'restart' in cfg.workflow['features']: + if cfg.chunk_id_prev: + cfg.chain_root_prev = cfg.work_root / cfg.casename / cfg.chunk_id_prev + cfg.cosmo_restart_in = cfg.chain_root_prev / 'cosmo' / 'restart' + cfg.cosmo_restart_out = cfg.chain_root / 'cosmo' / 'restart' + + if not cfg.force_sync: + # Empty curent job ids + cfg.job_ids['current'] = {} + + # Submit current chunk + for job_name in cfg.jobs: + if (cfg.log_finished_dir / job_name).exists() and not force: + # Skip job if already finished + print(f' └── Skipping "{job_name}" job') + skip = True else: - os.remove(os.path.join(log_working_dir, job)) - try: - os.remove(os.path.join(log_finished_dir, job)) - except FileNotFoundError: - pass - - if not skip: - print('Process "%s" for chain "%s"' % (job, cfg.job_id)) - sys.stdout.flush() - - try_count = 1 + (args.ntry - 1) * (job == 'cosmo') - while try_count > 0: - try_count -= 1 - try: - # Change the log file - logfile = os.path.join(cfg.log_working_dir, job) - logfile_finish = os.path.join(cfg.log_finished_dir, job) - tools.change_logfile(logfile) - - # Launch the job - to_call = getattr(jobs, job) - to_call.main(cfg, model_cfg) - - shutil.copy(logfile, logfile_finish) - - exitcode = 0 - try_count = 0 - except: - subject = "ERROR or TIMEOUT in job '%s' for chain '%s'" % ( - job, cfg.job_id) - logging.exception(subject) - if cfg.user_mail: - message = tools.prepare_message( - os.path.join(log_working_dir, job)) - logging.info('Sending log file to %s' % cfg.user_mail) - tools.send_mail(cfg.user_mail, subject, message) - if try_count == 0: - raise RuntimeError(subject) - - if exitcode != 0 or not os.path.exists( - os.path.join(log_finished_dir, job)): - subject = "ERROR or TIMEOUT in job '%s' for chain '%s'" % ( - job, cfg.job_id) + print(f' └── Submitting "{job_name}" job') + + # Logfile settings + cfg.logfile = cfg.log_working_dir / job_name + cfg.logfile_finish = cfg.log_finished_dir / job_name + + # Submit the job + job = getattr(jobs, job_name) + if hasattr(job, 'BASIC_PYTHON_JOB') and job.BASIC_PYTHON_JOB: + cfg.submit_basic_python(job_name) + else: + job.main(cfg) + + # Wait for previous chunk jobs, monitor them and cycle info + cfg.cycle() + + else: # For nested run_chain.py + for job_name in cfg.jobs: + print(f' └── Process "{job_name}" for chunk "{cfg.chunk_id}"') + try: + # Change the log file + cfg.logfile = cfg.log_working_dir / job_name + cfg.logfile_finish = cfg.log_finished_dir / job_name + + # Launch the job + to_call = getattr(jobs, job_name) + to_call.main(cfg) + + shutil.copy(cfg.logfile, cfg.logfile_finish) + + exitcode = 0 + except Exception: + exitcode = 1 + subject = "ERROR or TIMEOUT in job '%s' for chunk '%s'" % ( + job_name, cfg.chunk_id) + logging.exception(subject) + if cfg.user_mail: + message = tools.prepare_message(cfg.log_working_dir / + job_name) + logging.info('Sending log file to %s' % cfg.user_mail) + tools.send_mail(cfg.user_mail, subject, message) + + if exitcode != 0 or not (cfg.log_finished_dir / job_name).exists(): + subject = "ERROR or TIMEOUT in job '%s' for chunk '%s'" % ( + job_name, cfg.chunk_id) if cfg.user_mail: - message = tools.prepare_message( - os.path.join(log_working_dir, job)) + message = tools.prepare_message(cfg.log_working_dir / + job_name) logging.info('Sending log file to %s' % cfg.user_mail) tools.send_mail(cfg.user_mail, subject, message) raise RuntimeError(subject) -def restart_runs(work_root, model_cfg, cfg, job_names, force, resume): +def restart_runs(cfg, force, resume): """Start subchains in specified intervals and manage restarts. This function slices the total runtime of the processing chain according to the - `cfg.restart_step_hours` configuration. It calls `run_chain()` for each + `cfg.restart_step_hours` configuration. It calls `run_chunk()` for each specified interval. Parameters ---------- - work_root : str - The path to the directory in which the chain writes files during execution. - model_cfg : dict - Configuration settings for the modeling framework. cfg : Config Object holding all user-configuration parameters as attributes. - job_names : list of str - List of names of jobs to execute on every timeslice. force : bool If True, it will force the execution of jobs regardless of their completion status. resume : bool @@ -644,123 +234,49 @@ def restart_runs(work_root, model_cfg, cfg, job_names, force, resume): Notes ----- - - The function iterates over specified intervals, calling `run_chain()` for each. + - The function iterates over specified intervals, calling `run_chunk()` for each. - It manages restart settings and logging for each subchain. """ - # run restarts - for startdate_sim in tools.iter_hours(cfg.startdate, cfg.enddate, - cfg.restart_step_hours): - enddate_sim = startdate_sim + timedelta(hours=cfg.restart_step_hours) - if enddate_sim > cfg.enddate: - continue - - # Set restart variable (only takes effect for ICON) - if startdate_sim == cfg.startdate: - setattr(cfg, "lrestart", '.FALSE.') - else: - setattr(cfg, "lrestart", '.TRUE.') - - print("Starting run with startdate {}".format(startdate_sim)) - - run_chain(work_root=work_root, - model_cfg=model_cfg, - cfg=cfg, - startdate_sim=startdate_sim, - enddate_sim=enddate_sim, - job_names=job_names, - force=force, - resume=resume) - - -def restart_runs_spinup(work_root, model_cfg, cfg, job_names, force, resume): - """Start subchains in specified intervals and manage restarts with spin-up. - - This function slices the total runtime of the processing chain according to the - `cfg.restart_step_hours` configuration. It calls `run_chain()` for each specified - interval, managing restarts with spin-up. - - Parameters - ---------- - work_root : str - The path to the directory in which the chain writes files during execution. - model_cfg : dict - Configuration settings for the modeling framework. - cfg : Config - Object holding all user-configuration parameters as attributes. - job_names : list of str - List of names of jobs to execute on every timeslice. - force : bool - If True, it will force the execution of jobs regardless of their completion status. - resume : bool - If True, it will resume the last unfinished job. - - Notes - ----- - - The function iterates over specified intervals, calling `run_chain()` for each. - - It manages restart settings and logging for each subchain, including spin-up. - """ - for startdate_sim in tools.iter_hours(cfg.startdate, cfg.enddate, - cfg.restart_step_hours): - if startdate_sim == cfg.startdate: - setattr(cfg, "first_one", True) - setattr(cfg, "second_one", False) - setattr(cfg, "lrestart", '.FALSE.') - run_time = cfg.restart_step_hours - startdate_sim_spinup = startdate_sim - elif startdate_sim == cfg.startdate + timedelta( - hours=cfg.restart_step_hours): - setattr(cfg, "first_one", False) - setattr(cfg, "second_one", True) - setattr(cfg, "lrestart", '.TRUE.') - run_time = cfg.restart_step_hours + cfg.spinup - startdate_sim_spinup = startdate_sim - timedelta(hours=cfg.spinup) + for chunk_id in cfg.chunks: + cfg.chunk_id = chunk_id + cfg.get_previous_chunk_id(cfg.chunk_id) + cfg.startdate_sim_yyyymmddhh = cfg.chunk_id[0:10] + cfg.enddate_sim_yyyymmddhh = cfg.chunk_id[-10:] + cfg.startdate_sim = datetime.strptime( + cfg.startdate_sim_yyyymmddhh, "%Y%m%d%H").replace(tzinfo=pytz.UTC) + cfg.enddate_sim = datetime.strptime( + cfg.enddate_sim_yyyymmddhh, "%Y%m%d%H").replace(tzinfo=pytz.UTC) + + if 'spinup' in cfg.workflow['features'] and hasattr(cfg, 'spinup'): + if cfg.startdate_sim == cfg.startdate: + cfg.first_one = True + cfg.second_one = False + cfg.lrestart = '.FALSE.' + elif cfg.startdate_sim == cfg.startdate + timedelta( + hours=cfg.restart_step_hours): + cfg.first_one = False + cfg.second_one = True + cfg.lrestart = '.TRUE.' + else: + cfg.first_one = False + cfg.second_one = False + cfg.lrestart = '.TRUE.' else: - setattr(cfg, "first_one", False) - setattr(cfg, "second_one", False) - setattr(cfg, "lrestart", '.TRUE.') - run_time = cfg.restart_step_hours + cfg.spinup - startdate_sim_spinup = startdate_sim - timedelta(hours=cfg.spinup) - - # If current enddate is later than global enddate, skip - enddate_sim = startdate_sim + timedelta(hours=cfg.restart_step_hours) - if enddate_sim > cfg.enddate: - continue - - print(f'Runtime of sub-simulation: {run_time} h') - - run_chain(work_root=work_root, - model_cfg=model_cfg, - cfg=cfg, - startdate_sim=startdate_sim_spinup, - enddate_sim=enddate_sim, - job_names=job_names, - force=force, - resume=resume) - + # Set restart variable (only takes effect for ICON) + cfg.lrestart = ".FALSE." if cfg.startdate_sim == cfg.startdate else ".TRUE." -def load_model_config_yaml(yamlfile): - """Load model configuration from a YAML file. + print(f'└── Starting chunk "{cfg.chunk_id}"') - Parameters - ---------- - yamlfile : str - The path to the YAML file containing the model configuration. - - Returns - ------- - dict - A dictionary representing the model configuration loaded from the YAML file. - """ - with open(yamlfile) as file: - model_cfg = yaml.safe_load(file) - return model_cfg + run_chunk(cfg=cfg, force=force, resume=resume) -if __name__ == '__main__': +def main(): """Main script for running a processing chain. - This script handles the execution of a processing chain for one or more specified cases. It loads model configurations, prepares the environment, and starts the chain based on the provided settings. + This script handles the execution of a processing chain for one or more + specified cases. It loads model configurations, prepares the environment, + and starts the chain based on the provided settings. Parameters ---------- @@ -769,14 +285,15 @@ def load_model_config_yaml(yamlfile): Notes ----- - This script uses command-line arguments to specify cases and job lists. - - It loads model configurations, converts paths to absolute, sets restart settings, and starts the chain. - - Depending on the model's features, it may run with or without restarts or utilize spin-up restarts. + - It loads model configurations, converts paths to absolute, sets restart + settings, and starts the chain. + - Depending on the model's features, it may run with or without restarts + or utilize spin-up restarts. """ args = parse_arguments() for casename in args.casenames: # Load configs - model_cfg = load_model_config_yaml('config/models.yaml') cfg = Config(casename) # Convert relative to absolute paths @@ -785,9 +302,6 @@ def load_model_config_yaml(yamlfile): # Set restart step in hours cfg.set_restart_step_hours() - # Print config before duplication of dict variables - cfg.print_config() - # Duplicate variables in the form of _ for better # access within namelist template. # E.g.: cfg.meteo['dir'] -> cfg.meteo_dir @@ -795,36 +309,52 @@ def load_model_config_yaml(yamlfile): # Check if jobs are set or if default ones are used if args.job_list is None: - args.job_list = model_cfg['models'][cfg.model]['jobs'] + cfg.jobs = cfg.workflow['jobs'] + else: + cfg.jobs = args.job_list + + # Check sync is forced + if args.force_sync: + cfg.force_sync = True + else: + cfg.force_sync = False - print(f"Starting chain for case {casename} and model {cfg.model}") + # Check constraint + if cfg.constraint: + assert cfg.constraint in ['gpu', 'mc'], ("Unknown constraint, use" + "gpu or mc") + + # Get complete chunk list + cfg.get_chunk_list() + + # Print config before chain starts + cfg.print_config() + + # Get custom chunks if specified + cfg.chunks = args.chunk_list if args.chunk_list else cfg.chunk_list + + tools.create_dir(cfg.case_root, "case_root") + + print("╔════════════════════════════════════════╗") + print("║ Starting Processing Chain ║") + print("╠════════════════════════════════════════╣") + print(f"║ Case: {casename: <27} ║") + print(f"║ Workflow: {cfg.workflow_name: <27} ║") + print("╚════════════════════════════════════════╝") # Check for restart compatibility and spinup - if 'restart' in model_cfg['models'][cfg.model]['features']: - if hasattr(cfg, 'spinup'): - print("Using spin-up restarts.") - restart_runs_spinup(work_root=cfg.work_root, - model_cfg=model_cfg, - cfg=cfg, - job_names=args.job_list, - force=args.force, - resume=args.resume) - else: - print("Using built-in model restarts.") - restart_runs(work_root=cfg.work_root, - model_cfg=model_cfg, - cfg=cfg, - job_names=args.job_list, - force=args.force, - resume=args.resume) + if 'restart' in cfg.workflow['features']: + restart_runs(cfg=cfg, force=args.force, resume=args.resume) else: print("No restarts are used.") - run_chain(work_root=cfg.work_root, - cfg=cfg, - startdate_sim=cfg.startdate, - enddate_sim=cfg.enddate, - job_names=args.job_list, - force=args.force, - resume=args.resume) - - print('>>> Finished the processing chain successfully <<<') + cfg.startdate_sim = cfg.startdate + cfg.enddate_sim = cfg.enddate + run_chunk(cfg=cfg, force=args.force, resume=args.resume) + + print("╔════════════════════════════════════════╗") + print("║ Processing Chain Completed ║") + print("╚════════════════════════════════════════╝") + + +if __name__ == '__main__': + main() diff --git a/workflows.yaml b/workflows.yaml new file mode 100644 index 00000000..f87c22b5 --- /dev/null +++ b/workflows.yaml @@ -0,0 +1,237 @@ +cosmo: + features: + - restart + jobs: + - prepare_cosmo + - int2lm + - cosmo + - post_cosmo + dependencies: + int2lm: + current: + - prepare_cosmo + cosmo: + previous: + - cosmo + post_cosmo: + current: + - cosmo + +cosmo-ghg: + features: + - restart + - tracers + jobs: + - prepare_cosmo + - emissions + - biofluxes + - oem + - online_vprm + - int2lm + - post_int2lm + - cosmo + - post_cosmo + dependencies: + emissions: + current: + - prepare_cosmo + biofluxes: + current: + - prepare_cosmo + oem: + current: + - prepare_cosmo + online_vprm: + current: + - prepare_cosmo + int2lm: + current: + - prepare_cosmo + - emissions + - biofluxes + - oem + - online_vprm + post_int2lm: + current: + - int2lm + cosmo: + current: + - post_int2lm + previous: + - cosmo + post_cosmo: + current: + - cosmo + +cosmo-ghg-spinup: + features: + - restart + - tracers + - spinup + jobs: + - prepare_cosmo + - emissions + - biofluxes + - oem + - online_vprm + - int2lm + - post_int2lm + - cosmo + - post_cosmo + dependencies: + emissions: + current: + - prepare_cosmo + biofluxes: + current: + - prepare_cosmo + oem: + current: + - prepare_cosmo + online_vprm: + current: + - prepare_cosmo + int2lm: + current: + - prepare_cosmo + - emissions + - biofluxes + - oem + - online_vprm + post_int2lm: + current: + - int2lm + previous: + - cosmo + cosmo: + current: + - post_int2lm + previous: + - cosmo + post_cosmo: + current: + - cosmo + +cosmo-art: + features: + - nesting + - spinup + jobs: + - prepare_cosmo + - emissions + - obs_nudging + - photo_rate + - int2lm + - cosmo + - post_cosmo + dependencies: + emissions: + current: + - prepare_cosmo + obs_nudging: + current: + - prepare_cosmo + photo_rate: + current: + - prepare_cosmo + int2lm: + current: + - prepare_cosmo + - emissions + - obs_nudging + - photo_rate + cosmo: + previous: + - cosmo + post_cosmo: + current: + - cosmo + +icon: + features: + - restart + jobs: + - prepare_icon + - icontools + - icon + dependencies: + icontools: + current: + - prepare_icon + icon: + current: + - prepare_icon + - icontools + previous: + - icon + +icon-art: + features: + - restart + jobs: + - prepare_icon + - icontools + - prepare_art + - icon + dependencies: + icontools: + current: + - prepare_icon + prepare_art: + current: + - icontools + icon: + current: + - prepare_icon + - icontools + - prepare_art + previous: + - icon + +icon-art-global: + features: + - restart + jobs: + - prepare_icon + - prepare_art_global + - icon + dependencies: + prepare_art_global: + current: + - prepare_icon + previous: + - icon + icon: + current: + - prepare_icon + - prepare_art_global + previous: + - icon + +icon-art-oem: + features: + - restart + jobs: + - prepare_icon + - icontools + - prepare_art + - prepare_art_oem + - icon + dependencies: + icontools: + current: + - prepare_icon + prepare_art: + current: + - icontools + prepare_art_oem: + current: + - prepare_art + icon: + current: + - prepare_icon + - icontools + - prepare_art + - prepare_art_oem + previous: + - icon