diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index 976f7fa..0501c3a 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -1,7 +1,7 @@ from __future__ import annotations import time -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Annotated, Any, ClassVar, Literal @@ -82,43 +82,6 @@ def convert_datetime(cls, value) -> datetime: return datetime.fromisoformat(value) -class _CliArgsBaseModel(BaseModel): - """Base class for cli_arguments specifications""" - - # TODO: Even allow for `str`, or always require list? - positional: str | list[str] | None = None - # Field needed for child class doing pydantic parsing - keyword: dict[str, str] | None = Field(default_factory=dict) - flags: str | list[str] | None = None - source_file: str | list[str] | None = None - - # TODO: Should we allow users to pass it without the hyphen(s), and prepend them automatically? - # TODO: While convenient, it could be a bad idea, if users put in wrong things. Better to be explicit. - @field_validator("keyword", mode="before") - @classmethod - def validate_keyword_args(cls, value): - """Ensure keyword arguments start with '-' or '--'.""" - if value is not None: - invalid_keys = [key for key in value if not key.startswith(("-", "--"))] - if invalid_keys: - invalid_kwarg_exc = f"Invalid keyword arguments: {', '.join(invalid_keys)}" - raise ValueError(invalid_kwarg_exc) - return value - - @field_validator("flags", mode="before") - @classmethod - def validate_flag_args(cls, value): - """Ensure positional arguments start with '-' or '--'.""" - if value is not None: - if isinstance(value, str): - value = [value] - invalid_flags = [arg for arg in value if not arg.startswith(("-", "--"))] - if invalid_flags: - invalid_flags_exc = f"Invalid positional arguments: {', '.join(invalid_flags)}" - raise ValueError(invalid_flags_exc) - return value - - class TargetNodesBaseModel(_NamedBaseModel): """class for targeting other task or data nodes in the graph @@ -306,16 +269,109 @@ class ConfigRootTask(ConfigBaseTask): plugin: ClassVar[Literal["_root"]] = "_root" +# By using a frozen class we only need to validate on initialization +@dataclass(frozen=True) +class ShellCliArgument: + """A holder for a CLI argument to simplify access. + + Stores CLI arguments of the form "file", "--init", "{file}" or "{--init file}". These examples translate into + ShellCliArguments ShellCliArgument(name="file", references_data_item=False, cli_option_of_data_item=None), + ShellCliArgument(name="--init", references_data_item=False, cli_option_of_data_item=None), + ShellCliArgument(name="file", references_data_item=True, cli_option_of_data_item=None), + ShellCliArgument(name="file", references_data_item=True, cli_option_of_data_item="--init") + + Attributes: + name: Name of the argument. For the examples it is "file", "--init", "file" and "file" + references_data_item: Specifies if the argument references a data item signified by enclosing it by curly + brackets. + cli_option_of_data_item: The CLI option associated to the data item. + """ + + name: str + references_data_item: bool + cli_option_of_data_item: str | None = None + + def __post_init__(self): + if self.cli_option_of_data_item is not None and not self.references_data_item: + msg = "data_item_option cannot be not None if cli_option_of_data_item is False" + raise ValueError(msg) + + @classmethod + def from_cli_argument(cls, arg: str) -> ShellCliArgument: + len_arg_with_option = 2 + len_arg_no_option = 1 + references_data_item = arg.startswith("{") and arg.endswith("}") + # remove curly brackets "{--init file}" -> "--init file" + arg_unwrapped = arg[1:-1] if arg.startswith("{") and arg.endswith("}") else arg + + # "--init file" -> ["--init", "file"] + input_arg = arg_unwrapped.split() + if len(input_arg) != len_arg_with_option and len(input_arg) != len_arg_no_option: + msg = f"Expected argument of format {{data}} or {{option data}} but found {arg}" + raise ValueError(msg) + name = input_arg[0] if len(input_arg) == len_arg_no_option else input_arg[1] + cli_option_of_data_item = input_arg[0] if len(input_arg) == len_arg_with_option else None + return cls(name, references_data_item, cli_option_of_data_item) + + @dataclass class ConfigShellTaskSpecs: plugin: ClassVar[Literal["shell"]] = "shell" command: str = "" - cli_arguments: _CliArgsBaseModel | None = None + cli_arguments: list[ShellCliArgument] = field(default_factory=list) + env_source_files: list[str] = field(default_factory=list) src: str | None = None class ConfigShellTask(ConfigBaseTask, ConfigShellTaskSpecs): - pass + command: str = "" + cli_arguments: list[ShellCliArgument] = Field(default_factory=list) + env_source_files: list[str] = Field(default_factory=list) + + @field_validator("cli_arguments", mode="before") + @classmethod + def validate_cli_arguments(cls, value: str) -> list[ShellCliArgument]: + return cls.parse_cli_arguments(value) + + @field_validator("env_source_files", mode="before") + @classmethod + def validate_env_source_files(cls, value: str | list[str]) -> list[str]: + return [value] if value is str else value + + @staticmethod + def split_cli_arguments(cli_arguments: str) -> list[str]: + """Splits the CLI arguments into a list of separate entities. + + Splits the CLI arguments by whitespaces except if the whitespace is contained within curly brackets. For example + the string + "-D --CMAKE_CXX_COMPILER=${CXX_COMPILER} {--init file}" + will be splitted into the list + ["-D", "--CMAKE_CXX_COMPILER=${CXX_COMPILER}", "{--init file}"] + """ + + nb_open_curly_brackets = 0 + last_split_idx = 0 + splits = [] + for i, char in enumerate(cli_arguments): + if char == " " and not nb_open_curly_brackets: + # we ommit the space in the splitting therefore we only store up to i but move the last_split_idx to i+1 + splits.append(cli_arguments[last_split_idx:i]) + last_split_idx = i + 1 + elif char == "{": + nb_open_curly_brackets += 1 + elif char == "}": + if nb_open_curly_brackets == 0: + msg = "Invalid input for cli_arguments. Found a closing curly bracket before an opening in {cli_argumentss!r}" + raise ValueError(msg) + nb_open_curly_brackets -= 1 + + if last_split_idx != len(cli_arguments): + splits.append(cli_arguments[last_split_idx : len(cli_arguments)]) + return splits + + @staticmethod + def parse_cli_arguments(cli_arguments: str) -> list[ShellCliArgument]: + return [ShellCliArgument.from_cli_argument(arg) for arg in ConfigShellTask.split_cli_arguments(cli_arguments)] @dataclass @@ -341,6 +397,9 @@ class ConfigBaseData(_NamedBaseModel, ConfigBaseDataSpecs): """ parameters: list[str] = [] + type: str | None = None + src: str | None = None + format: str | None = None @field_validator("type") @classmethod diff --git a/tests/cases/large/config/test_config_large.yml b/tests/cases/large/config/test_config_large.yml index 05c261b..266e210 100644 --- a/tests/cases/large/config/test_config_large.yml +++ b/tests/cases/large/config/test_config_large.yml @@ -62,11 +62,7 @@ tasks: - extpar: plugin: shell # no extpar plugin available yet command: $PWD/examples/files/scripts/extpar - cli_arguments: - keyword: - --input: obs_data - flags: - - --verbose + cli_arguments: "--verbose {--input obs_data}" uenv: squashfs: path/to/squashfs mount_point: runtime/mount/point @@ -75,13 +71,8 @@ tasks: - preproc: plugin: shell command: $PWD/examples/files/scripts/cleanup.sh - cli_arguments: - positional: - - grid_file - keyword: - -p: extpar_file - -e: ERA5 - source_file: dummy_source_file + cli_arguments: "{-p extpar_file} {-e ERA5} {grid_file}" + env_source_file: dummy_source_file nodes: 4 walltime: 00:02:00 uenv: @@ -90,10 +81,7 @@ tasks: - icon: plugin: icon command: $PWD/examples/files/scripts/icon - cli_arguments: - keyword: - -g: grid_file - --input: icon_input + cli_arguments: "{-g grid_file} {--input icon_input}" nodes: 40 walltime: 23:59:59 namelists: @@ -105,9 +93,7 @@ tasks: - postproc_1: plugin: shell command: $PWD/examples/files/scripts/main_script_ocn.sh - cli_arguments: - keyword: - --input: stream_1 + cli_arguments: "{--input stream_1}" nodes: 2 walltime: 00:05:00 uenv: @@ -116,12 +102,7 @@ tasks: - postproc_2: plugin: shell command: $PWD/examples/files/scripts/main_script_atm.sh - cli_arguments: - keyword: - --input: stream_2 - # `arg_option` should be in `tasks` section instead - # How to implement this? Even needed with keyword-arguments? - # arg_option: --input + cli_arguments: "{--input stream_2}" nodes: 2 walltime: 00:05:00 src: path/to/src/dir @@ -131,19 +112,13 @@ tasks: - store_and_clean_1: plugin: shell command: $PWD/examples/files/scripts/post_clean.sh - cli_arguments: - keyword: - --input: postout_1 - --stream: stream_1 - --icon_input: icon_input + cli_arguments: "{--input postout_1} {--stream stream_1} {--icon_input icon_input}" nodes: 1 walltime: 00:01:00 - store_and_clean_2: plugin: shell command: $PWD/examples/files/scripts/post_clean.sh - cli_arguments: - keyword: - --input: postout_2 + cli_arguments: "{--input postout_2}" nodes: 1 walltime: 00:01:00 data: diff --git a/tests/cases/large/data/test_config_large.txt b/tests/cases/large/data/test_config_large.txt index 24445df..749ac46 100644 --- a/tests/cases/large/data/test_config_large.txt +++ b/tests/cases/large/data/test_config_large.txt @@ -13,7 +13,8 @@ cycles: walltime: time.struct_time(tm_year=1900, tm_mon=1, tm_mday=1, tm_hour=0, tm_min=2, tm_sec=0, tm_wday=0, tm_yday=1, tm_isdst=-1) plugin: 'shell' command: '$PWD/examples/files/scripts/extpar' - cli arguments: positional=None keyword={'--input': 'obs_data'} flags=['--verbose'] source_file=None + cli arguments: [ShellCliArgument(name='--verbose', references_data_item=False, cli_option_of_data_item=None), ShellCliArgument(name='obs_data', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - icon_bimonthly [date: 2025-01-01 00:00:00]: tasks: - preproc [date: 2025-01-01 00:00:00]: @@ -32,7 +33,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2025-01-01 00:00:00]: input: - grid_file @@ -64,7 +66,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2025-01-01 00:00:00]: input: - postout_1 [date: 2025-01-01 00:00:00] @@ -80,7 +83,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2025-03-01 00:00:00]: tasks: - preproc [date: 2025-03-01 00:00:00]: @@ -99,7 +103,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2025-03-01 00:00:00]: input: - grid_file @@ -132,7 +137,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2025-03-01 00:00:00]: input: - postout_1 [date: 2025-03-01 00:00:00] @@ -148,7 +154,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2025-05-01 00:00:00]: tasks: - preproc [date: 2025-05-01 00:00:00]: @@ -169,7 +176,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2025-05-01 00:00:00]: input: - grid_file @@ -202,7 +210,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2025-05-01 00:00:00]: input: - postout_1 [date: 2025-05-01 00:00:00] @@ -218,7 +227,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2025-07-01 00:00:00]: tasks: - preproc [date: 2025-07-01 00:00:00]: @@ -239,7 +249,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2025-07-01 00:00:00]: input: - grid_file @@ -272,7 +283,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2025-07-01 00:00:00]: input: - postout_1 [date: 2025-07-01 00:00:00] @@ -288,7 +300,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2025-09-01 00:00:00]: tasks: - preproc [date: 2025-09-01 00:00:00]: @@ -309,7 +322,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2025-09-01 00:00:00]: input: - grid_file @@ -342,7 +356,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2025-09-01 00:00:00]: input: - postout_1 [date: 2025-09-01 00:00:00] @@ -358,7 +373,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2025-11-01 00:00:00]: tasks: - preproc [date: 2025-11-01 00:00:00]: @@ -379,7 +395,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2025-11-01 00:00:00]: input: - grid_file @@ -412,7 +429,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2025-11-01 00:00:00]: input: - postout_1 [date: 2025-11-01 00:00:00] @@ -428,7 +446,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2026-01-01 00:00:00]: tasks: - preproc [date: 2026-01-01 00:00:00]: @@ -449,7 +468,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2026-01-01 00:00:00]: input: - grid_file @@ -482,7 +502,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2026-01-01 00:00:00]: input: - postout_1 [date: 2026-01-01 00:00:00] @@ -498,7 +519,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2026-03-01 00:00:00]: tasks: - preproc [date: 2026-03-01 00:00:00]: @@ -519,7 +541,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2026-03-01 00:00:00]: input: - grid_file @@ -552,7 +575,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2026-03-01 00:00:00]: input: - postout_1 [date: 2026-03-01 00:00:00] @@ -568,7 +592,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2026-05-01 00:00:00]: tasks: - preproc [date: 2026-05-01 00:00:00]: @@ -589,7 +614,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2026-05-01 00:00:00]: input: - grid_file @@ -622,7 +648,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2026-05-01 00:00:00]: input: - postout_1 [date: 2026-05-01 00:00:00] @@ -638,7 +665,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2026-07-01 00:00:00]: tasks: - preproc [date: 2026-07-01 00:00:00]: @@ -659,7 +687,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2026-07-01 00:00:00]: input: - grid_file @@ -692,7 +721,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2026-07-01 00:00:00]: input: - postout_1 [date: 2026-07-01 00:00:00] @@ -708,7 +738,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2026-09-01 00:00:00]: tasks: - preproc [date: 2026-09-01 00:00:00]: @@ -729,7 +760,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2026-09-01 00:00:00]: input: - grid_file @@ -762,7 +794,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2026-09-01 00:00:00]: input: - postout_1 [date: 2026-09-01 00:00:00] @@ -778,7 +811,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - icon_bimonthly [date: 2026-11-01 00:00:00]: tasks: - preproc [date: 2026-11-01 00:00:00]: @@ -799,7 +833,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/cleanup.sh' - cli arguments: positional=['grid_file'] keyword={'-p': 'extpar_file', '-e': 'ERA5'} flags=None source_file='dummy_source_file' + cli arguments: [ShellCliArgument(name='extpar_file', references_data_item=True, cli_option_of_data_item='-p'), ShellCliArgument(name='ERA5', references_data_item=True, cli_option_of_data_item='-e'), ShellCliArgument(name='grid_file', references_data_item=True, cli_option_of_data_item=None)] + env source files: [] - icon [date: 2026-11-01 00:00:00]: input: - grid_file @@ -832,7 +867,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_ocn.sh' - cli arguments: positional=None keyword={'--input': 'stream_1'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - store_and_clean_1 [date: 2026-11-01 00:00:00]: input: - postout_1 [date: 2026-11-01 00:00:00] @@ -848,7 +884,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_1', '--stream': 'stream_1', '--icon_input': 'icon_input'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_1', references_data_item=True, cli_option_of_data_item='--input'), ShellCliArgument(name='stream_1', references_data_item=True, cli_option_of_data_item='--stream'), ShellCliArgument(name='icon_input', references_data_item=True, cli_option_of_data_item='--icon_input')] + env source files: [] - yearly [date: 2025-01-01 00:00:00]: tasks: - postproc_2 [date: 2025-01-01 00:00:00]: @@ -870,7 +907,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_atm.sh' - cli arguments: positional=None keyword={'--input': 'stream_2'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_2', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] src: 'path/to/src/dir' - store_and_clean_2 [date: 2025-01-01 00:00:00]: input: @@ -891,7 +929,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_2'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='postout_2', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] - yearly [date: 2026-01-01 00:00:00]: tasks: - postproc_2 [date: 2026-01-01 00:00:00]: @@ -913,7 +952,8 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/main_script_atm.sh' - cli arguments: positional=None keyword={'--input': 'stream_2'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='stream_2', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] src: 'path/to/src/dir' - store_and_clean_2 [date: 2026-01-01 00:00:00]: input: @@ -934,4 +974,5 @@ cycles: end date: 2027-01-01 00:00:00 plugin: 'shell' command: '$PWD/examples/files/scripts/post_clean.sh' - cli arguments: positional=None keyword={'--input': 'postout_2'} flags=None source_file=None \ No newline at end of file + cli arguments: [ShellCliArgument(name='postout_2', references_data_item=True, cli_option_of_data_item='--input')] + env source files: [] \ No newline at end of file diff --git a/tests/cases/parameters/config/test_config_parameters.yml b/tests/cases/parameters/config/test_config_parameters.yml index 25b22dd..c434253 100644 --- a/tests/cases/parameters/config/test_config_parameters.yml +++ b/tests/cases/parameters/config/test_config_parameters.yml @@ -46,9 +46,7 @@ tasks: - icon: plugin: shell command: $PWD/tests/files/scripts/icon.py - cli_arguments: - keyword: - --restart: icon_restart + cli_arguments: "{--restart icon_restart} {--init initial_conditions} {--forcing forcing}" parameters: [foo, bar] - statistics_foo: plugin: shell diff --git a/tests/cases/parameters/data/test_config_parameters.txt b/tests/cases/parameters/data/test_config_parameters.txt index a4bd223..1085bee 100644 --- a/tests/cases/parameters/data/test_config_parameters.txt +++ b/tests/cases/parameters/data/test_config_parameters.txt @@ -14,7 +14,8 @@ cycles: end date: 2026-03-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/icon.py' - cli arguments: positional=None keyword={'--restart': 'icon_restart'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='icon_restart', references_data_item=True, cli_option_of_data_item='--restart'), ShellCliArgument(name='initial_conditions', references_data_item=True, cli_option_of_data_item='--init'), ShellCliArgument(name='forcing', references_data_item=True, cli_option_of_data_item='--forcing')] + env source files: [] - icon [date: 2026-01-01 00:00:00, foo: 0, bar: 3.5]: input: - initial conditions @@ -28,7 +29,8 @@ cycles: end date: 2026-03-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/icon.py' - cli arguments: positional=None keyword={'--restart': 'icon_restart'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='icon_restart', references_data_item=True, cli_option_of_data_item='--restart'), ShellCliArgument(name='initial_conditions', references_data_item=True, cli_option_of_data_item='--init'), ShellCliArgument(name='forcing', references_data_item=True, cli_option_of_data_item='--forcing')] + env source files: [] - icon [date: 2026-01-01 00:00:00, foo: 1, bar: 3.0]: input: - initial conditions @@ -42,7 +44,8 @@ cycles: end date: 2026-03-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/icon.py' - cli arguments: positional=None keyword={'--restart': 'icon_restart'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='icon_restart', references_data_item=True, cli_option_of_data_item='--restart'), ShellCliArgument(name='initial_conditions', references_data_item=True, cli_option_of_data_item='--init'), ShellCliArgument(name='forcing', references_data_item=True, cli_option_of_data_item='--forcing')] + env source files: [] - icon [date: 2026-01-01 00:00:00, foo: 1, bar: 3.5]: input: - initial conditions @@ -56,7 +59,8 @@ cycles: end date: 2026-03-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/icon.py' - cli arguments: positional=None keyword={'--restart': 'icon_restart'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='icon_restart', references_data_item=True, cli_option_of_data_item='--restart'), ShellCliArgument(name='initial_conditions', references_data_item=True, cli_option_of_data_item='--init'), ShellCliArgument(name='forcing', references_data_item=True, cli_option_of_data_item='--forcing')] + env source files: [] - statistics_foo [date: 2026-01-01 00:00:00, bar: 3.0]: input: - icon_output [date: 2026-01-01 00:00:00, foo: 0, bar: 3.0] @@ -69,6 +73,8 @@ cycles: end date: 2026-03-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/statistics.py' + cli arguments: [] + env source files: [] - statistics_foo [date: 2026-01-01 00:00:00, bar: 3.5]: input: - icon_output [date: 2026-01-01 00:00:00, foo: 0, bar: 3.5] @@ -81,6 +87,8 @@ cycles: end date: 2026-03-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/statistics.py' + cli arguments: [] + env source files: [] - statistics_foo_bar [date: 2026-01-01 00:00:00]: input: - analysis_foo [date: 2026-01-01 00:00:00, bar: 3.5] @@ -93,6 +101,8 @@ cycles: end date: 2026-03-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/statistics.py' + cli arguments: [] + env source files: [] - yearly [date: 2026-01-01 00:00:00]: tasks: - merge [date: 2026-01-01 00:00:00]: @@ -105,4 +115,6 @@ cycles: start date: 2026-01-01 00:00:00 end date: 2026-03-01 00:00:00 plugin: 'shell' - command: '$PWD/tests/files/scripts/merge.py' \ No newline at end of file + command: '$PWD/tests/files/scripts/merge.py' + cli arguments: [] + env source files: [] \ No newline at end of file diff --git a/tests/cases/small/config/test_config_small.yml b/tests/cases/small/config/test_config_small.yml index 8b163fc..900e1c7 100644 --- a/tests/cases/small/config/test_config_small.yml +++ b/tests/cases/small/config/test_config_small.yml @@ -24,9 +24,7 @@ tasks: - icon: plugin: shell command: $PWD/tests/files/scripts/icon.py - cli_arguments: - keyword: - --restart: icon_restart + cli_arguments: "{--restart icon_restart} {--init initial_conditions}" - cleanup: plugin: shell command: $PWD/tests/files/scripts/cleanup.py diff --git a/tests/cases/small/data/test_config_small.txt b/tests/cases/small/data/test_config_small.txt index 8af67b5..5e0e293 100644 --- a/tests/cases/small/data/test_config_small.txt +++ b/tests/cases/small/data/test_config_small.txt @@ -11,7 +11,8 @@ cycles: end date: 2026-06-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/icon.py' - cli arguments: positional=None keyword={'--restart': 'icon_restart'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='icon_restart', references_data_item=True, cli_option_of_data_item='--restart'), ShellCliArgument(name='initial_conditions', references_data_item=True, cli_option_of_data_item='--init')] + env source files: [] - bimonthly_tasks [date: 2026-03-01 00:00:00]: tasks: - icon [date: 2026-03-01 00:00:00]: @@ -26,7 +27,8 @@ cycles: end date: 2026-06-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/icon.py' - cli arguments: positional=None keyword={'--restart': 'icon_restart'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='icon_restart', references_data_item=True, cli_option_of_data_item='--restart'), ShellCliArgument(name='initial_conditions', references_data_item=True, cli_option_of_data_item='--init')] + env source files: [] - bimonthly_tasks [date: 2026-05-01 00:00:00]: tasks: - icon [date: 2026-05-01 00:00:00]: @@ -41,7 +43,8 @@ cycles: end date: 2026-06-01 00:00:00 plugin: 'shell' command: '$PWD/tests/files/scripts/icon.py' - cli arguments: positional=None keyword={'--restart': 'icon_restart'} flags=None source_file=None + cli arguments: [ShellCliArgument(name='icon_restart', references_data_item=True, cli_option_of_data_item='--restart'), ShellCliArgument(name='initial_conditions', references_data_item=True, cli_option_of_data_item='--init')] + env source files: [] - lastly: tasks: - cleanup: @@ -50,4 +53,6 @@ cycles: name: 'cleanup' coordinates: {} plugin: 'shell' - command: '$PWD/tests/files/scripts/cleanup.py' \ No newline at end of file + command: '$PWD/tests/files/scripts/cleanup.py' + cli arguments: [] + env source files: [] \ No newline at end of file diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index 5c253a3..dd9f793 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -3,10 +3,27 @@ import pytest from sirocco.core import Workflow +from sirocco.parsing._yaml_data_models import ConfigShellTask, ShellCliArgument from sirocco.pretty_print import PrettyPrinter from sirocco.vizgraph import VizGraph +# configs that are tested for parsing +def test_parsing_cli_parameters(): + cli_arguments = "-D --CMAKE_CXX_COMPILER=${CXX_COMPILER} {--init file}" + assert ConfigShellTask.split_cli_arguments(cli_arguments) == [ + "-D", + "--CMAKE_CXX_COMPILER=${CXX_COMPILER}", + "{--init file}", + ] + + assert ConfigShellTask.parse_cli_arguments(cli_arguments) == [ + ShellCliArgument("-D", False, None), + ShellCliArgument("--CMAKE_CXX_COMPILER=${CXX_COMPILER}", False, None), + ShellCliArgument("file", True, "--init"), + ] + + @pytest.fixture def pprinter(): return PrettyPrinter() @@ -35,8 +52,9 @@ def test_parse_config_file(config_paths, pprinter): if test_str != reference_str: new_path = Path(config_paths["txt"]).with_suffix(".new.txt") new_path.write_text(test_str) - msg = f"Workflow graph doesn't match serialized data. New graph string dumped to {new_path}." - raise ValueError(msg) + assert ( + reference_str == test_str + ), f"Workflow graph doesn't match serialized data. New graph string dumped to {new_path}." @pytest.mark.skip(reason="don't run it each time, uncomment to regenerate serilaized data")