From d4a5c7dca9ab71d9230f7b393692dc0670200c95 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 11:45:39 +0100 Subject: [PATCH 01/39] add: First version of new IR with dependency resolving --- src/sirocco/core.py | 916 ++++++---------------- src/sirocco/parsing/_yaml_data_models.py | 101 +-- tests/files/configs/test_config_large.yml | 49 +- tests/test_wc_workflow.py | 4 +- 4 files changed, 306 insertions(+), 764 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index a620626c..df8f5bb8 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,689 +1,277 @@ from __future__ import annotations - +from dataclasses import dataclass +from typing import Any from datetime import datetime -from itertools import chain -from os.path import expandvars -from pathlib import Path -from typing import TYPE_CHECKING - -from isoduration import parse_duration -from isoduration.types import Duration -from sirocco.parsing._utils import TimeUtils +from sirocco.parsing._yaml_data_models import ( + _DataBaseModel, + ConfigCycleTaskDepend, + ConfigCycleTask, + ConfigCycleTaskInput, + ConfigCycleTaskOutput, + ConfigData, + ConfigTask, + ConfigWorkflow, + load_workflow_config, +) if TYPE_CHECKING: - from collections.abc import Generator - - -class _DataBase: - def __init__( - self, - name: str, - type: str, # noqa: A002 - src: str, - lag: list[Duration], - date: list[datetime], - arg_option: str | None, - *, - available: bool, - ): - self._name = name - - self._src = src - self._path = Path(expandvars(self._src)) - - self._type = type - if self._type not in ["file", "dir"]: - msg = f"Data type {self._type!r} not supported. Please use 'file' or 'dir'." - raise ValueError(msg) - - if len(lag) > 0 and len(date) > 0: - msg = "Either 'lag' or 'date' can be nonempty. Not both." - raise ValueError(msg) - - # COMMENT I think we should just disallow multiple lags, and enforce the user to write multiple lags - # I am not sure how this work with icon as it does not need positional arguments - # or rather how does it work with plugins - if arg_option is not None and (len(lag) > 1 or len(date) > 1): - msg = ( - "You cannot give an arg_option when multiple lags and dates are given. " - "They must be positional arguments, since passing them to one option is ambiguous." - ) - raise ValueError(msg) - - self._lag = lag - self._date = date - self._arg_option = arg_option - self._available = available - - @property - def name(self) -> str: - """The name of this data instance that is used as identifier.""" - return self._name - - @property - def type(self) -> str: - """The data type.""" - return self._type - - @property - def src(self) -> str: - return self._src - - @property - def path(self) -> Path: - return self._path - - @property - def lag(self) -> list[Duration]: - return self._lag - - @property - def date(self) -> list[datetime]: - return self._date - - @property - def arg_option(self) -> str | None: - return self._arg_option - - @property - def available(self) -> bool: - return self._available - - -class Data(_DataBase): - def __init__( - self, - name: str, - type: str, # noqa: A002 - src: str, - lag: list[Duration], - date: list[datetime], - arg_option: str | None, - *, - available: bool, - ): - super().__init__(name, type, src, lag, date, arg_option, available=available) - self._task: Task | None = None - - def unroll(self, unrolled_task: UnrolledTask) -> Generator[UnrolledData, None, None]: - if len(self._date) == 0 and len(self._lag) == 0: - yield UnrolledData.from_data(self, unrolled_task, unrolled_task.unrolled_date) - - for lag in self._lag: - lagged_date = unrolled_task.unrolled_date + lag - if ( - lagged_date >= unrolled_task.unrolled_cycle.start_date - and lagged_date <= unrolled_task.unrolled_cycle.end_date - ): - yield UnrolledData.from_data(self, unrolled_task, lagged_date) - - for date in self._date: - yield UnrolledData.from_data(self, unrolled_task, date) - - def __repr__(self) -> str: - if self.task is None: - identifier = f"{self.__class__.__name__} '{self.name}'" - else: - identifier = f"{self.__class__.__name__} '{self.name}' attached task '{self.task}'" - return super().__repr__().replace(f"{self.__class__.__name__}", identifier) - - @property - def task(self) -> Task | None: - return self._task - - @task.setter - def task(self, task: Task): - if self._task is not None: - msg = f"Data {self} was already assigned to task {self._task}. Cannot assign task to task {task}." - raise ValueError(msg) - self._task = task - - -class UnrolledData(_DataBase): - """ - Data that are created during the unrolling of a cycle. - This class should be only initiated through unrolling a cycle. - """ + from collections.abc import Generator, Iterable, Iterator + type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput | ConfigCycleTaskOutput + + +class Task: + """Internal representation of a task node""" + + name: str + outputs: list[Data] + inputs: list[Data] + wait_on: list[Task] + date: datetime | None = None + # TODO This list is too long. We should start with the set of supported + # keywords and extend it as we support more + command: str | None = None + command_option: str | None = None + input_arg_options: dict[str, str] | None = None + host: str | None = None + account: str | None = None + plugin: str | None = None + config: str | None = None + uenv: dict | None = None + nodes: int | None = None + walltime: str | None = None + src: str | None = None + conda_env: str | None = None + + def __init__(self, + config: ConfigTask, + task_ref: ConfigCycleTask, + workflow: Workflow, + date: datetime | None = None): + self.name = config.name + self.date = date + self.inputs = [] + self.outputs = [] + self.wait_on = [] + self.workflow = workflow + # Long list of not always supported keywords + self.command = config.command + self.command_option = config.command_option + self.input_arg_options = config.input_arg_options + self.host = config.host + self.account = config.account + self.plugin = config.plugin + self.config = config.config + self.uenv = config.uenv + self.nodes = config.nodes + self.walltime = config.walltime + self.src = config.src + self.conda_env = config.conda_env + + for input_spec in task_ref.inputs: + self.inputs.append(data for data in workflow.data.get((input_spec, self.date))) + for output_spec in task_ref.outputs: + self.outputs.append(self.workflow.data[output_spec.name, self.date]) + # Store for actual linking in link_wait_on_tasks() once all tasks are created + self._wait_on_specs = task_ref.depends + + def link_wait_on_tasks(self): + for wait_on_spec in self._wait_on_specs: + self.wait_on.append(task for task in self.workflow.tasks.get((wait_on_spec, self.date))) + + +@dataclass(kw_only=True) +class Data: + """Internal representation of a data node""" + + name: str + type: str + src: str + available: bool + date: datetime | None = None @classmethod - def from_data(cls, data: Data, unrolled_task: UnrolledTask, unrolled_date: datetime): + def from_config(cls, config: _DataBaseModel, *, date: datetime = None): return cls( - unrolled_task, - unrolled_date, - data.name, - data.type, - data.src, - data.lag, - data.date, - data.arg_option, - available=data.available, + name=config.name, + type=config.type, + src=config.src, + available=config.available, + date=date, ) - def __init__( - self, - unrolled_task: UnrolledTask, - unrolled_date: datetime, - name: str, - type: str, # noqa: A002 - src: str, - lag: list[Duration], - date: list[datetime], - arg_option: str | None, - *, - available: bool, - ): - super().__init__(name, type, src, lag, date, arg_option, available=available) - self._unrolled_task = unrolled_task - self._unrolled_date = unrolled_date - - def __repr__(self) -> str: - if self.unrolled_task is None: - identifier = f"{self.__class__.__name__} '{self.name}' with date {self.unrolled_date}" + +class TimeSeries(): + """Dictionnary of objects accessed by date, checking start and end dates""" + + # start_date: datetime | None = None + # end_date: datetime | None = None + # _dict: dict[datetime, Any] = {} + + def __init__(self): + self.start_date = None + self.end_date = None + self._dict = {} + + def __setitem__(self, date: datetime, data: Any) -> None: + if date in self._dict.keys(): + raise KeyError(f"date {date} already used, cannot set twice") + self._dict[date] = data + if self.start_date is None: + self.start_date = date + self.end_date = date + elif date < self.start_date: + self.start_date = date + elif date > self.end_date: + self.end_date = date + + def __getitem__(self, date: datetime) -> Any: + if date < self.start_date or date > self.end_date: + # TODO proper logging + print(f"WARNING: date {date} is out of bounds, ignoring.") + return None + if date not in self._dict: + msg = f"date {date} not found" + raise KeyError(msg) + return self._dict[date] + + +# TODO metaclass to generate stores of specific data type (avoid `Any`) +class Store: + """Container for TimeSeries or unique data""" + + def __init__(self): + self._dict: dict[str, Any] = {} + + def __setitem__(self, key: str | tuple(str, datetime|None), value: Any) -> None: + if isinstance(key, tuple): + name, date = key else: - identifier = f"{self.__class__.__name__} '{self.name}' with date {self.unrolled_date} attached to task {self.unrolled_task}" - return super().__repr__().replace(f"{self.__class__.__name__}", identifier) - - @property - def unrolled_date(self) -> datetime: - return self._unrolled_date - - @property - def unrolled_task(self) -> UnrolledTask: - return self._unrolled_task - - -class _DependencyBase: - def __init__(self, depend_on_task_name: str, lag: list[Duration], date: list[datetime], cycle_name: str | None): - self._depend_on_task_name = depend_on_task_name - if len(lag) > 0 and len(date) > 0: - msg = "Only one key 'lag' or 'date' is allowed. Not both." - raise ValueError(msg) - - self._lag = lag - self._date = date - self._cycle_name = cycle_name - - @property - def depend_on_task_name(self) -> str: - return self._depend_on_task_name - - @property - def lag(self) -> list[Duration]: - return self._lag - - @property - def date(self) -> list[datetime]: - return self._date - - @property - def cycle_name(self) -> str | None: - return self._cycle_name - - -class Dependency(_DependencyBase): - def __init__(self, depend_on_task_name: str, lag: list[Duration], date: list[datetime], cycle_name: str | None): - super().__init__(depend_on_task_name, lag, date, cycle_name) - self._task: Task | None = None - - def unroll(self, unrolled_task: UnrolledTask) -> Generator[UnrolledDependency, None, None]: - if len(self._date) == 0 and len(self._lag) == 0: - yield UnrolledDependency.from_dependency(self, unrolled_task, unrolled_task.unrolled_date) - - for lag in self._lag: - lagged_date = unrolled_task.unrolled_date + lag - if ( - lagged_date >= unrolled_task.unrolled_cycle.start_date - and lagged_date <= unrolled_task.unrolled_cycle.end_date - ): - yield UnrolledDependency.from_dependency(self, unrolled_task, lagged_date) - - for date in self._date: - yield UnrolledDependency.from_dependency(self, unrolled_task, date) - - @property - def task(self) -> Task | None: - return self._task - - @task.setter - def task(self, task: Task): - if self.task is not None: - msg = f"Dependency was already assigned to task {self.task}. Cannot assign to task {task}." - raise ValueError(msg) - self._task = task - - def __repr__(self) -> str: - if self._cycle_name is None: - identifier = f"{self.__class__.__name__} on task '{self.depend_on_task_name}' attached to task {self.task}" + name, date = key, None + if date is None: + if name in self._dict: + raise KeyError(f"single entry {name} already set") + else: + self._dict[name] = value else: - identifier = f"{self.__class__.__name__} on task '{self.depend_on_task_name}' in cycle '{self.cycle_name}' attached to task {self.task}" - return super().__repr__().replace(f"{self.__class__.__name__}", identifier) - - -class UnrolledDependency(_DependencyBase): - """ - This class should be only initiated through unrolling a cycle. - """ - - @classmethod - def from_dependency(cls, depend: Dependency, unrolled_task: UnrolledTask, unrolled_date: datetime): - return cls(unrolled_task, unrolled_date, depend.depend_on_task_name, depend.lag, depend.date, depend.cycle_name) - - def __init__( - self, - unrolled_task: UnrolledTask, - unrolled_date: datetime, - depend_on_task_name: str, - lag: list[Duration], - date: list[datetime], - cycle_name: str | None, - ): - super().__init__(depend_on_task_name, lag, date, cycle_name) - self._unrolled_task = unrolled_task - self._unrolled_date = unrolled_date - - def __repr__(self) -> str: - if self._cycle_name is None: - identifier = ( - f"{self.__class__.__name__} on task '{self.depend_on_task_name}' with date {self.unrolled_date}" - ) + if name in self._dict: + if isinstance(self._dict[name], TimeSeries): + self._dict[name][date] = value + else: + raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + + def __getitem__(self, key: str | tuple(str, datetime|None)) -> Any: + if isinstance(key, tuple): + name, date = key else: - identifier = f"{self.__class__.__name__} on task '{self.depend_on_task_name}' in cycle '{self.cycle_name}' with date {self.unrolled_date}" - return super().__repr__().replace(f"{self.__class__.__name__}", identifier) - - @property - def depend_on_task(self) -> UnrolledTask: - """ - throws error if not found - """ - # for now we only support looking in the same cycle - workflow = self._unrolled_task.unrolled_cycle.workflow - if self._cycle_name is None: - tasks_to_search = [ - cycle.unrolled_tasks for cycle in workflow.unrolled_cycles if cycle.unrolled_date == self._unrolled_date - ] - potential_tasks = [ - task for task in chain.from_iterable(tasks_to_search) if task.name == self._depend_on_task_name - ] - if len(potential_tasks) > 1: - msg = ( - f"Found multiple instances of the task '{self._depend_on_task_name}' with date {self._unrolled_date}" - " for dependency of the task {self._unrolled_task}. Please specify a cycle name." - ) - raise ValueError(msg) - if len(potential_tasks) == 0: - msg = ( - f"Found no instance of the task '{self._depend_on_task_name}' with date {self._unrolled_date}" - f" for dependency attached to task {self._unrolled_task}." - ) - raise ValueError(msg) - return potential_tasks[0] - - cycle = workflow.unrolled_cycles_map[(self._cycle_name, self._unrolled_date)] - return cycle.unrolled_tasks_map[self._depend_on_task_name] - - @property - def unrolled_task(self) -> UnrolledTask: - return self._unrolled_task - - @property - def unrolled_date(self) -> datetime: - return self._unrolled_date - - -class _TaskBase: - """ - Common class for Task and UnrolledTask to reduce code duplications - """ - - def __init__( - self, - name: str, - command: str, - inputs: list[Data], - outputs: list[Data], - depends: list[Dependency], - command_option: str | None, - ): - self._name = name - self._command = expandvars(command) - self._inputs = inputs - self._outputs = outputs - self._depends = depends - self._command_option = command_option - - @property - def name(self) -> str: - return self._name - - @property - def command(self) -> str: - return self._command - - @property - def inputs(self) -> list[Data]: - return self._inputs - - @property - def outputs(self) -> list[Data]: - return self._outputs - - @property - def command_option(self) -> str | None: - return self._command_option - - @property - def depends(self) -> list[Dependency]: - return self._depends - - -class Task(_TaskBase): - """A task that is created during the unrolling of a cycle.""" - - def __init__( - self, - name: str, - command: str, - inputs: list[Data], - outputs: list[Data], - depends: list[Dependency], - command_option: str | None, - ): - super().__init__(name, command, inputs, outputs, depends, command_option) - for input_ in inputs: - input_.task = self - for output in outputs: - output.task = self - for depend in depends: - depend.task = self - self._cycle: Cycle | None = None - - def __repr__(self) -> str: - identifier = f"Task '{self.name}'" - if self.cycle is not None: - identifier += f" in cycle {self.cycle.name}" - return super().__repr__().replace("Task", identifier) - - def unroll(self, unrolled_cycle: UnrolledCycle) -> Generator[tuple[str, UnrolledTask], None, None]: - # an unrolled task is just one task, since the date is determined - # by the cycle, but we keep the pattern for consistency - unrolled_task = UnrolledTask.from_task(self, unrolled_cycle) - yield unrolled_task.name, unrolled_task - - @property - def cycle(self) -> Cycle | None: - return self._cycle - - @cycle.setter - def cycle(self, cycle: Cycle): - if self._cycle is not None: - msg = f"Task {self} was already assigned to cycle {self._cycle}. Cannot assign task to cycle {cycle}." - raise ValueError(msg) - self._cycle = cycle - - -class UnrolledTask(_TaskBase): - """ - This class should be only initiated through unrolling a cycle. - """ - - @classmethod - def from_task(cls, task: Task, unrolled_cycle: UnrolledCycle): - return cls( - unrolled_cycle, task.name, task.command, task.inputs, task.outputs, task.depends, task.command_option - ) - - def __init__( - self, - unrolled_cycle: UnrolledCycle, - name: str, - command: str, - inputs: list[Data], - outputs: list[Data], - depends: list[Dependency], - command_option: str | None, - ): - super().__init__(name, command, inputs, outputs, depends, command_option) - self._unrolled_cycle = unrolled_cycle - self._unrolled_inputs = list(self.unroll_inputs()) - self._unrolled_outputs = list(self.unroll_outputs()) - self._unrolled_depends = list(self.unroll_depends()) - - def __repr__(self) -> str: - if self.unrolled_cycle is None: - identifier = f"Task '{self.name}' with date {self.unrolled_date}" + name, date = key, None + if date is None: + if name in self._dict: + if isinstance(self._dict[name], TimeSeries): + raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + else: + return self._dict[name] + else: + if name in self._dict: + if isinstance(self._dict[name], TimeSeries): + return self._dict[name][date] + else: + raise KeyError(f"entry {name} is not a TimeSeries, cannot be accessed must by date") + + def add(self, key: str | tuple[str, datetime|None], value: Any) -> None: + if isinstance(key, tuple): + name, date = key else: - identifier = f"Task '{self.name}' in cycle {self.unrolled_cycle.name} with date {self.unrolled_date}" - return super().__repr__().replace("Task", identifier) - - def unroll_inputs(self) -> Generator[UnrolledData, None, None]: - """ - Outputs the inputs together with a unique identifier within the task - """ - for input_ in self._inputs: - yield from input_.unroll(self) - - def unroll_outputs(self) -> Generator[UnrolledData, None, None]: - for output in self._outputs: - yield from output.unroll(self) - - def unroll_depends(self) -> Generator[UnrolledDependency, None, None]: - for depend in self._depends: - yield from depend.unroll(self) - - @property - def unrolled_inputs(self) -> list[UnrolledData]: - return self._unrolled_inputs - - @property - def unrolled_outputs(self) -> list[UnrolledData]: - return self._unrolled_outputs - - @property - def unrolled_depends(self) -> list[UnrolledDependency]: - return self._unrolled_depends - - @property - def unrolled_date(self) -> datetime: - return self._unrolled_cycle.unrolled_date - - @property - def unrolled_cycle(self) -> UnrolledCycle: - return self._unrolled_cycle - - -class _CycleBase: - def __init__( - self, - name: str, - tasks: list[Task], - start_date: str | datetime, - end_date: str | datetime, - period: str | Duration | None = None, - ): - self._name = name - self._tasks = tasks - self._start_date = start_date if isinstance(start_date, datetime) else datetime.fromisoformat(start_date) - self._end_date = end_date if isinstance(end_date, datetime) else datetime.fromisoformat(end_date) - - if self._start_date > self._end_date: - msg = "For cycle {self} the start_date {start_date} lies after given end_date {end_date}." - raise ValueError(msg) - - self._period = period if period is None or isinstance(period, Duration) else parse_duration(period) - if self._period is not None and TimeUtils.duration_is_less_equal_zero(self._period): - msg = f"For cycle {self} the period {period} is negative or zero." - raise ValueError(msg) - - task_names = set() - for task in self._tasks: - if task.name in task_names: - msg = f"List of tasks does contain tasks with duplicate names. The task name '{task.name}' has been found twice." - raise ValueError(msg) - task_names.add(task.name) - - @property - def name(self) -> str: - return self._name - - @property - def start_date(self) -> datetime: - return self._start_date - - @property - def end_date(self) -> datetime: - return self._end_date - - @property - def period(self) -> Duration | None: - return self._period - - @property - def tasks(self) -> list[Task]: - return self._tasks - - -class Cycle(_CycleBase): - def __init__( - self, - name: str, - tasks: list[Task], - start_date: str | datetime, - end_date: str | datetime, - period: str | Duration | None, - ): - super().__init__(name, tasks, start_date, end_date, period) - for task in self._tasks: - task.cycle = self - - self._workflow: Workflow | None = None - - def __repr__(self) -> str: - if self.workflow is None: - identifier = f"Cycle '{self.name}'" + name, date = key, None + if date is None: + if name in self._dict: + if isinstance(self._dict[name], TimeSeries): + raise ValueError(f"TimeSeries object requires a date as key") + raise ValueError(f"{name} already set, cannot set twice") + self._dict[name] = value else: - identifier = f"Cycle '{self.name}' in workflow {self.workflow.name}" - return super().__repr__().replace("Cycle", identifier) - - def unroll(self) -> Generator[tuple[str, datetime, UnrolledCycle], None, None]: - if self._workflow is None: - msg = f"Cannot unroll cycle {self} because it was not attached to a workflow before." - raise ValueError(msg) - current_date = self._start_date - while current_date <= self._end_date: - unrolled_cycle = UnrolledCycle.from_cycle(self, current_date, self._workflow) - yield unrolled_cycle.name, unrolled_cycle.unrolled_date, unrolled_cycle - if self._period is None: - break + if name not in self._dict: + self._dict[name] = TimeSeries() + self._dict[name][date] = value + + def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator(Any): + name = spec.name + if isinstance(self._dict[name], TimeSeries): + if ref_date is None: + raise ValueError("TimeSeries object must be referenced by dates") else: - current_date += self._period - - @property - def workflow(self) -> Workflow | None: - return self._workflow - - @workflow.setter - def workflow(self, workflow: Workflow): - if self._workflow is not None: - msg = f"Cycle {self} was already assigned to workflow {self._workflow}. Cannot assign cycle to workflow {workflow}." - raise ValueError(msg) - self._workflow = workflow - - -class UnrolledCycle(_CycleBase): - """ - This class should be only initiated through unrolling a cycle. - """ - - @classmethod - def from_cycle(cls, cycle: Cycle, unrolled_date: datetime, workflow: Workflow): - return cls(unrolled_date, cycle.name, cycle.tasks, cycle.start_date, cycle.end_date, cycle.period, workflow) - - def __init__( - self, - unrolled_date: datetime, - name: str, - tasks: list[Task], - start_date: str | datetime, - end_date: str | datetime, - period: str | Duration | None, - workflow: Workflow, - ): - super().__init__(name, tasks, start_date, end_date, period) - - self._unrolled_date = unrolled_date - - self._unrolled_tasks_map = dict(self.unroll_tasks()) - self._workflow = workflow - - def __repr__(self) -> str: - if self.workflow is None: - identifier = f"UnrolledCycle '{self.name}' with date {self.unrolled_date}" + for target_date in spec.resolve_target_dates(ref_date): + yield self._dict[name][target_date] else: - identifier = f"UnrolledCycle '{self.name}' in workflow {self.workflow.name} with date {self.unrolled_date}" - return super().__repr__().replace("UnrolledCycle", identifier) + if spec.lag or spec.date: + raise ValueError(f"item {name} is not a TimeSeries, cannot be referenced vis date or lag") + else: + yield self._dict[name] - def unroll_tasks(self) -> Generator[tuple[str, UnrolledTask], None, None]: - for task in self._tasks: - yield from task.unroll(self) + if ref_date is None: + if isinstance(self._dict[name], TimeSeries): + raise ValueError(f"TimeSeries object requires a date as key") + return self._dict[name] + else: + return self._dict[name][date] - @property - def unrolled_tasks(self) -> list[UnrolledTask]: - return list(self._unrolled_tasks_map.values()) + def values(self) -> Iterator[Any]: + for item in self._dict.values(): + if isinstance(item, TimeSeries): + for subitem in item._dict.values(): + yield subitem + else: + yield item - @property - def unrolled_tasks_map(self) -> dict[str, UnrolledTask]: - return self._unrolled_tasks_map - @property - def unrolled_date(self) -> datetime: - return self._unrolled_date +@dataclass(kw_only=True) +class Cycle: + """Internal reprenstation of a cycle""" - @property - def workflow(self) -> Workflow: - return self._workflow + name: str + tasks: list[Task] + date: datetime | None = None class Workflow: - def __init__(self, name: str, cycles: list[Cycle]): - self._name = name - self._cycles = cycles - for cycle in self._cycles: - cycle.workflow = self - self._validate_cycles() - self._unrolled_cycles_map = {(name, date): cycle for name, date, cycle in self.unroll_cycles()} - - unrolled_outputs = [] - for unrolled_cycle in self.unrolled_cycles: - for unrolled_task in unrolled_cycle.unrolled_tasks: - unrolled_outputs.extend(unrolled_task.unrolled_outputs) - self._unrolled_outputs = unrolled_outputs - - def _validate_cycles(self): - """Checks if the defined workflow is correctly referencing key names.""" - cycle_names = set() - for cycle in self._cycles: - if cycle.name in cycle_names: - msg = f"List of cycles does contain cycles with duplicate names. The cycle name '{cycle.name}' has been found twice." - raise ValueError(msg) - cycle_names.add(cycle.name) - - def unroll_cycles(self) -> Generator[tuple[str, datetime, UnrolledCycle], None, None]: - for cycle in self._cycles: - yield from cycle.unroll() - - @property - def name(self) -> str: - return self._name - - @property - def cycles(self) -> list[Cycle]: - return self._cycles - - @property - def unrolled_cycles(self) -> list[UnrolledCycle]: - return list(self._unrolled_cycles_map.values()) - - @property - def unrolled_cycles_map(self) -> dict[tuple[str, datetime], UnrolledCycle]: - return self._unrolled_cycles_map + """Internal reprensentation of a worflow""" + + def __init__(self, workflow_config: ConfigWorkflow) -> None: + + self.tasks = Store() + self.data = Store() + self.cycles = {} + + ind = ' ' + # 1 - create availalbe data nodes + for data_config in workflow_config.data.available: + self.data.add(data_config.name, Data.from_config(data_config, date=None)) + + # 2 - create output data nodes + for cycle_config in workflow_config.cycles: + for date in cycle_config.dates(): + for task_ref in cycle_config.tasks: + for data_ref in task_ref.outputs: + data_name = data_ref.name + data_config = workflow_config.data_dict[data_name] + self.data.add((data_name, date), Data.from_config(data_config, date=date)) + + # 3 - create cycles and tasks + for cycle_config in workflow_config.cycles: + cycle_name = cycle_config.name + for date in cycle_config.dates(): + cycle_tasks = [] + for task_ref in cycle_config.tasks: + task_name = task_ref.name + task_config = workflow_config.task_dict[task_name] + self.tasks.add((task_name, date), task := Task(task_config, task_ref, workflow=self, date=date)) + cycle_tasks.append(task) + self.cycles[cycle_name] = Cycle(name=cycle_name, tasks=cycle_tasks, date=date) + + # 4 - Link wait on tasks + for task in self.tasks.values(): + task.link_wait_on_tasks() + + @classmethod + def from_yaml(cls, config_path: str): + return cls(load_workflow_config(config_path)) diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index 98c96cbe..cb984578 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -1,4 +1,5 @@ from __future__ import annotations +from collections.abc import Iterator import time from datetime import datetime @@ -9,9 +10,6 @@ from isoduration import parse_duration from isoduration.types import Duration # pydantic needs type # noqa: TCH002 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator - -from sirocco import core - from ._utils import TimeUtils @@ -77,14 +75,27 @@ def convert_datetimes(cls, value) -> list[datetime]: values = value if isinstance(value, list) else [value] return [datetime.fromisoformat(value) for value in values] + def resolve_target_dates(self, ref_date: datetime | None) -> Iterator[datetime]: + if not self.lag and not self.date: + yield ref_date + if self.lag: + for l in self.lag: + yield ref_date + l + if self.date: + for d in self.date: + yield d + class ConfigTask(_NamedBaseModel): """ To create an instance of a task defined in a workflow file """ + # TODO This list is too large. We should start with the set of supported + # keywords and extend it as we support more command: str command_option: str | None = None + input_arg_options: dict[str, str] | None = None host: str | None = None account: str | None = None plugin: str | None = None @@ -146,7 +157,9 @@ class ConfigGeneratedData(_DataBaseModel): class ConfigData(BaseModel): - available: list[ConfigAvailableData] + """To create the container of available and generated data""" + + available: list[ConfigAvailableData] | None = None generated: list[ConfigGeneratedData] @@ -155,6 +168,7 @@ class ConfigCycleTaskDepend(_NamedBaseModel, _LagDateBaseModel): To create an instance of a input or output in a task in a cycle defined in a workflow file. """ + # TODO Move to "wait_on" keyword in yaml instead of "depend" name: str # name of the task it depends on cycle_name: str | None = None @@ -253,6 +267,16 @@ def convert_datetime(cls, value) -> None | datetime: def convert_duration(cls, value): return None if value is None else parse_duration(value) + @model_validator(mode="before") + @classmethod + def check_start_date_end_date_period_combination(cls, data: Any) -> Any: + if ("start_date" in data) ^ ("end_date" in data): + msg = f"in cycle {data['name']}: both start_date and end_date must be provided or none of them." + raise ValueError(msg) + if 'period' in data and 'start_date' not in data: + msg = f"in cycle {data['name']}: period provided without start and end dates." + return data + @model_validator(mode="after") def check_start_date_before_end_date(self) -> ConfigCycle: if self.start_date is not None and self.end_date is not None and self.start_date > self.end_date: @@ -267,77 +291,26 @@ def check_period_is_not_negative_or_zero(self) -> ConfigCycle: raise ValueError(msg) return self + def dates(self) -> Iterator[datetime]: + yield (date := self.start_date) + if self.period is not None: + while (date := date + self.period) < self.end_date: + yield date + class ConfigWorkflow(BaseModel): name: str | None = None - start_date: datetime - end_date: datetime cycles: list[ConfigCycle] tasks: list[ConfigTask] data: ConfigData data_dict: dict = {} task_dict: dict = {} - @field_validator("start_date", "end_date", mode="before") - @classmethod - def convert_datetime(cls, value) -> None | datetime: - return None if value is None else datetime.fromisoformat(value) - @model_validator(mode="after") - def check_start_date_before_end_date(self) -> ConfigWorkflow: - if self.start_date is not None and self.end_date is not None and self.start_date > self.end_date: - msg = "For workflow {self._name!r} the start_date {start_date!r} lies after given end_date {end_date!r}." - raise ValueError(msg) - return self - - def to_core_workflow(self): - self.data_dict = {data.name: data for data in self.data.available} | { - data.name: data for data in self.data.generated - } + def build_internal_dicts(self) -> ConfigWorkflow: + self.data_dict = {data.name: data for data in self.data.available} | {data.name: data for data in self.data.generated} self.task_dict = {task.name: task for task in self.tasks} - - core_cycles = [self._to_core_cycle(cycle) for cycle in self.cycles] - return core.Workflow(self.name, core_cycles) - - def _to_core_cycle(self, cycle: ConfigCycle) -> core.Cycle: - core_tasks = [self._to_core_task(task) for task in cycle.tasks] - start_date = self.start_date if cycle.start_date is None else cycle.start_date - end_date = self.end_date if cycle.end_date is None else cycle.end_date - return core.Cycle(cycle.name, core_tasks, start_date, end_date, cycle.period) - - def _to_core_task(self, cycle_task: ConfigCycleTask) -> core.Task: - inputs = [] - outputs = [] - dependencies = [] - - for input_ in cycle_task.inputs: - if (data := self.data_dict.get(input_.name)) is None: - msg = f"Task {cycle_task.name!r} has input {input_.name!r} that is not specied in the data section." - raise ValueError(msg) - core_data = core.Data( - input_.name, data.type, data.src, input_.lag, input_.date, input_.arg_option, available=data.available - ) - inputs.append(core_data) - - for output in cycle_task.outputs: - if (data := self.data_dict.get(output.name)) is None: - msg = f"Task {cycle_task.name!r} has output {output.name!r} that is not specied in the data section." - raise ValueError(msg) - core_data = core.Data(output.name, data.type, data.src, [], [], None, available=False) - outputs.append(core_data) - - for depend in cycle_task.depends: - core_dependency = core.Dependency(depend.name, depend.lag, depend.date, depend.cycle_name) - dependencies.append(core_dependency) - - return core.Task( - cycle_task.name, - self.task_dict[cycle_task.name].command, - inputs, - outputs, - dependencies, - self.task_dict[cycle_task.name].command_option, - ) + return self def load_workflow_config(workflow_config: str) -> ConfigWorkflow: diff --git a/tests/files/configs/test_config_large.yml b/tests/files/configs/test_config_large.yml index 63223394..537405c1 100644 --- a/tests/files/configs/test_config_large.yml +++ b/tests/files/configs/test_config_large.yml @@ -1,57 +1,39 @@ --- -start_date: 2026-01-01T00:00 -end_date: 2027-01-01T00:00 +start_date: &root_start_date '2025-01-01T00:00' +end_date: &root_end_date '2027-01-01T00:00' cycles: - init: tasks: - extpar: - inputs: - - obs_data: - arg_option: --input + inputs: [obs_data] outputs: [extpar_file] - icon_bimonthly: + start_date: *root_start_date + end_date: *root_end_date period: P2M tasks: - preproc: - inputs: - - grid_file: - arg_option: -g - - extpar_file: - date: 2026-01-01T00:00 - arg_option: -p - - ERA5: - arg_option: -e + inputs: [grid_file, extpar_file, ERA5] outputs: [icon_input] depends: - icon: lag: -P4M - icon: inputs: - - grid_file: - arg_option: -g - - icon_input: - arg_option: --input - #- icon_restart: - # lag: '-P2M' - outputs: - - stream_1 - - stream_2 - #- icon_restart + - grid_file + - icon_input + - icon_restart: + lag: '-P2M' + outputs: [stream_1, stream_2, icon_restart] - postproc_1: - inputs: - - stream_1: - arg_option: --input + inputs: [stream_1] outputs: [postout_1] - store_and_clean_1: - inputs: - - postout_1: - arg_option: --input - - stream_1: - arg_option: --stream - - icon_input: - arg_option: --icon_input + inputs: [postout_1, stream_1, icon_input] outputs: [stored_data_1] - yearly: + start_date: *root_start_date + end_date: *root_end_date period: P1Y tasks: - postproc_2: @@ -63,7 +45,6 @@ cycles: - store_and_clean_2: inputs: - postout_2: - arg_option: --input - stream_2: lag: [P0M] outputs: diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index af421bf1..48276860 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -1,7 +1,7 @@ import pytest from sirocco.parsing import load_workflow_config - +from sirocco.core import Workflow @pytest.fixture def config_file_small(): @@ -13,4 +13,4 @@ def config_file_small(): ) def test_parse_config_file(config_file): config_workflow = load_workflow_config(config_file) - config_workflow.to_core_workflow() + core_workflow = Workflow(config_workflow) From 09494652819c0596f74a74388dbba041c17e2056 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 15:32:45 +0100 Subject: [PATCH 02/39] ref: move input_arg_options to task definition --- tests/files/configs/test_config_large.yml | 27 +++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/files/configs/test_config_large.yml b/tests/files/configs/test_config_large.yml index 537405c1..0d786ef6 100644 --- a/tests/files/configs/test_config_large.yml +++ b/tests/files/configs/test_config_large.yml @@ -39,14 +39,14 @@ cycles: - postproc_2: inputs: - stream_2: - lag: [P0M] + lag: ['P0M', 'P2M', 'P4M', 'P6M', 'P8M', 'P10M'] arg_option: --input outputs: [postout_2] - store_and_clean_2: inputs: - - postout_2: + - postout_2 - stream_2: - lag: [P0M] + lag: ['P0M', 'P2M', 'P4M', 'P6M', 'P8M', 'P10M'] outputs: - stored_data_2 # Each task and piece of data (input and output of tasks) used to @@ -59,7 +59,9 @@ tasks: - extpar: plugin: extpar command: $PWD/examples/files/scripts/extpar - command_option: --verbose # todo implement support + command_option: '--verbose' # todo implement support + input_arg_options: + obs_data: '--input' config: path/to/namelists/dir uenv: squashfs: path/to/squashfs @@ -69,6 +71,10 @@ tasks: - preproc: plugin: AiiDA Shell command: $PWD/examples/files/scripts/cleanup.sh + input_arg_options: + grid_file: '-g' + extpar_file : '-p' + ERA5: '-e' nodes: 4 walltime: 00:02:00 config: path/to/config/dir @@ -78,6 +84,9 @@ tasks: - icon: plugin: icon command: $PWD/examples/files/scripts/icon + input_arg_options: + grid_file: '-g' + icon_input: '--input' nodes: 40 walltime: 23:59:59 config: path/to/namelists/dir @@ -87,6 +96,8 @@ tasks: - postproc_1: plugin: AiiDA Shell command: $PWD/examples/files/scripts/main_script_ocn.sh + input_arg_options: + stream_1: '--input' nodes: 2 walltime: 00:05:00 conda_env: path/to/yaml/env/file @@ -96,6 +107,8 @@ tasks: - postproc_2: plugin: AiiDA Shell command: $PWD/examples/files/scripts/main_script_atm.sh + input_arg_options: + stream_2: '--input' nodes: 2 walltime: 00:05:00 src: path/to/src/dir @@ -106,11 +119,17 @@ tasks: - store_and_clean_1: plugin: AiiDA Shell command: $PWD/examples/files/scripts/post_clean.sh + input_arg_options: + postout_1: '--input' + stream_1: '--stream' + icon_input: '--icon_input' nodes: 1 walltime: 00:01:00 - store_and_clean_2: plugin: AiiDA Shell command: $PWD/examples/files/scripts/post_clean.sh + input_arg_options: + postout_2: '--input' nodes: 1 walltime: 00:01:00 data: From 6cdec0faff5b64e367e41cd6b57e724e8f00b807 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 15:33:44 +0100 Subject: [PATCH 03/39] fix: missing part in __setitem__ --- src/sirocco/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index df8f5bb8..5eb774ee 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -159,6 +159,9 @@ def __setitem__(self, key: str | tuple(str, datetime|None), value: Any) -> None: self._dict[name][date] = value else: raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + else: + self._dict[name] = TimeSeries() + self._dict[name][date] = value def __getitem__(self, key: str | tuple(str, datetime|None)) -> Any: if isinstance(key, tuple): From fb972225c97d99c4d5f23a7f4ca896a11789d74e Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 15:34:15 +0100 Subject: [PATCH 04/39] ref: some minor typing changes --- src/sirocco/core.py | 17 +++++++---------- src/sirocco/parsing/_yaml_data_models.py | 4 +++- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 5eb774ee..9eb69f47 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any +from typing import Any, TYPE_CHECKING + from datetime import datetime from sirocco.parsing._yaml_data_models import ( @@ -17,7 +18,7 @@ if TYPE_CHECKING: from collections.abc import Generator, Iterable, Iterator - type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput | ConfigCycleTaskOutput + type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput class Task: @@ -104,14 +105,10 @@ def from_config(cls, config: _DataBaseModel, *, date: datetime = None): class TimeSeries(): """Dictionnary of objects accessed by date, checking start and end dates""" - # start_date: datetime | None = None - # end_date: datetime | None = None - # _dict: dict[datetime, Any] = {} - def __init__(self): - self.start_date = None - self.end_date = None - self._dict = {} + self.start_date: datetime | None = None + self.end_date: datetime | None = None + self._dict: dict[str: Any] = {} def __setitem__(self, date: datetime, data: Any) -> None: if date in self._dict.keys(): @@ -141,7 +138,7 @@ class Store: """Container for TimeSeries or unique data""" def __init__(self): - self._dict: dict[str, Any] = {} + self._dict: dict[str, TimeSeries | Any] = {} def __setitem__(self, key: str | tuple(str, datetime|None), value: Any) -> None: if isinstance(key, tuple): diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index cb984578..d2b378f1 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -1,5 +1,7 @@ from __future__ import annotations -from collections.abc import Iterator +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from collections.abc import Iterator import time from datetime import datetime From 536bf3dc7a3b7a2df027a8d2787f70f75e578a0a Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 16:25:27 +0100 Subject: [PATCH 05/39] ref(core): add method redundant with __setitem__ --- src/sirocco/core.py | 28 +++++------------------ tests/files/configs/test_config_large.yml | 2 +- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 9eb69f47..0d8aaff9 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -101,7 +101,7 @@ def from_config(cls, config: _DataBaseModel, *, date: datetime = None): date=date, ) - +# TODO metaclass to generate stores of specific data type (avoid `Any`) class TimeSeries(): """Dictionnary of objects accessed by date, checking start and end dates""" @@ -178,22 +178,6 @@ def __getitem__(self, key: str | tuple(str, datetime|None)) -> Any: else: raise KeyError(f"entry {name} is not a TimeSeries, cannot be accessed must by date") - def add(self, key: str | tuple[str, datetime|None], value: Any) -> None: - if isinstance(key, tuple): - name, date = key - else: - name, date = key, None - if date is None: - if name in self._dict: - if isinstance(self._dict[name], TimeSeries): - raise ValueError(f"TimeSeries object requires a date as key") - raise ValueError(f"{name} already set, cannot set twice") - self._dict[name] = value - else: - if name not in self._dict: - self._dict[name] = TimeSeries() - self._dict[name][date] = value - def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator(Any): name = spec.name if isinstance(self._dict[name], TimeSeries): @@ -240,12 +224,12 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: self.tasks = Store() self.data = Store() - self.cycles = {} + self.cycles = Store() ind = ' ' # 1 - create availalbe data nodes for data_config in workflow_config.data.available: - self.data.add(data_config.name, Data.from_config(data_config, date=None)) + self.data[data_config.name] = Data.from_config(data_config, date=None) # 2 - create output data nodes for cycle_config in workflow_config.cycles: @@ -254,7 +238,7 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: for data_ref in task_ref.outputs: data_name = data_ref.name data_config = workflow_config.data_dict[data_name] - self.data.add((data_name, date), Data.from_config(data_config, date=date)) + self.data[data_name, date] = Data.from_config(data_config, date=date) # 3 - create cycles and tasks for cycle_config in workflow_config.cycles: @@ -264,9 +248,9 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: for task_ref in cycle_config.tasks: task_name = task_ref.name task_config = workflow_config.task_dict[task_name] - self.tasks.add((task_name, date), task := Task(task_config, task_ref, workflow=self, date=date)) + self.tasks[task_name, date] = (task := Task(task_config, task_ref, workflow=self, date=date)) cycle_tasks.append(task) - self.cycles[cycle_name] = Cycle(name=cycle_name, tasks=cycle_tasks, date=date) + self.cycles[cycle_name, date] = Cycle(name=cycle_name, tasks=cycle_tasks, date=date) # 4 - Link wait on tasks for task in self.tasks.values(): diff --git a/tests/files/configs/test_config_large.yml b/tests/files/configs/test_config_large.yml index 0d786ef6..5ff10716 100644 --- a/tests/files/configs/test_config_large.yml +++ b/tests/files/configs/test_config_large.yml @@ -72,7 +72,7 @@ tasks: plugin: AiiDA Shell command: $PWD/examples/files/scripts/cleanup.sh input_arg_options: - grid_file: '-g' + grid_file: '-g' extpar_file : '-p' ERA5: '-e' nodes: 4 From 81313911a818a39ba310e20ee980a1455a66488f Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 16:33:36 +0100 Subject: [PATCH 06/39] fix: adapt small test yaml file --- tests/files/configs/test_config_small.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/files/configs/test_config_small.yml b/tests/files/configs/test_config_small.yml index fe06be9f..6832ffa4 100644 --- a/tests/files/configs/test_config_small.yml +++ b/tests/files/configs/test_config_small.yml @@ -1,18 +1,17 @@ --- -start_date: 2026-01-01T00:00 -end_date: 2026-06-01T00:00 +start_date: &root_start_date '2026-01-01T00:00' +end_date: &root_end_date '2026-06-01T00:00' cycles: - bimonthly_tasks: + start_date: *root_start_date + end_date: *root_end_date period: P2M tasks: - icon: inputs: - icon_restart: - arg_option: --restart lag: -P2M - outputs: - - icon_output - - icon_restart + outputs: [icon_output, icon_restart] - lastly: tasks: - cleanup: @@ -23,6 +22,8 @@ tasks: - icon: plugin: shell command: $PWD/tests/files/scripts/icon.py + input_arg_options: + icon_restart: '--restart' - cleanup: plugin: shell command: $PWD/tests/files/scripts/cleanup.py From d00c4404086b58e3a97632992760fc7ecafe961a Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 17:48:30 +0100 Subject: [PATCH 07/39] add: first version of workflow string representation and some bug fixes --- src/sirocco/core.py | 119 ++++++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 36 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 0d8aaff9..be12a15e 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -9,7 +9,6 @@ ConfigCycleTaskDepend, ConfigCycleTask, ConfigCycleTaskInput, - ConfigCycleTaskOutput, ConfigData, ConfigTask, ConfigWorkflow, @@ -70,7 +69,9 @@ def __init__(self, self.conda_env = config.conda_env for input_spec in task_ref.inputs: - self.inputs.append(data for data in workflow.data.get((input_spec, self.date))) + for data in workflow.data.get(input_spec, self.date): + if data is not None: + self.inputs.append(data) for output_spec in task_ref.outputs: self.outputs.append(self.workflow.data[output_spec.name, self.date]) # Store for actual linking in link_wait_on_tasks() once all tasks are created @@ -78,7 +79,9 @@ def __init__(self, def link_wait_on_tasks(self): for wait_on_spec in self._wait_on_specs: - self.wait_on.append(task for task in self.workflow.tasks.get((wait_on_spec, self.date))) + for task in self.workflow.tasks.get(wait_on_spec, self.date): + if task is not None: + self.wait_on.append(task) @dataclass(kw_only=True) @@ -145,17 +148,15 @@ def __setitem__(self, key: str | tuple(str, datetime|None), value: Any) -> None: name, date = key else: name, date = key, None - if date is None: - if name in self._dict: + if name in self._dict: + if not isinstance(self._dict[name], TimeSeries): raise KeyError(f"single entry {name} already set") - else: - self._dict[name] = value + if date is None: + raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + self._dict[name][date] = value else: - if name in self._dict: - if isinstance(self._dict[name], TimeSeries): - self._dict[name][date] = value - else: - raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + if date is None: + self._dict[name] = value else: self._dict[name] = TimeSeries() self._dict[name][date] = value @@ -165,39 +166,29 @@ def __getitem__(self, key: str | tuple(str, datetime|None)) -> Any: name, date = key else: name, date = key, None - if date is None: - if name in self._dict: - if isinstance(self._dict[name], TimeSeries): - raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") - else: - return self._dict[name] + + if name not in self._dict: + raise KeyError(f"entry {name} not found in Store") + if isinstance(self._dict[name], TimeSeries): + if date is None: + raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + return self._dict[name][date] else: - if name in self._dict: - if isinstance(self._dict[name], TimeSeries): - return self._dict[name][date] - else: - raise KeyError(f"entry {name} is not a TimeSeries, cannot be accessed must by date") + if date is not None: + raise KeyError(f"entry {name} is not a TimeSeries, cannot be accessed by date") + return self._dict[name] def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator(Any): name = spec.name if isinstance(self._dict[name], TimeSeries): if ref_date is None: raise ValueError("TimeSeries object must be referenced by dates") - else: - for target_date in spec.resolve_target_dates(ref_date): - yield self._dict[name][target_date] + for target_date in spec.resolve_target_dates(ref_date): + yield self._dict[name][target_date] else: if spec.lag or spec.date: - raise ValueError(f"item {name} is not a TimeSeries, cannot be referenced vis date or lag") - else: - yield self._dict[name] - - if ref_date is None: - if isinstance(self._dict[name], TimeSeries): - raise ValueError(f"TimeSeries object requires a date as key") - return self._dict[name] - else: - return self._dict[name][date] + raise ValueError(f"item {name} is not a TimeSeries, cannot be referenced via date or lag") + yield self._dict[name] def values(self) -> Iterator[Any]: for item in self._dict.values(): @@ -256,6 +247,62 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: for task in self.tasks.values(): task.link_wait_on_tasks() + def __str__(self): + light_red = '\x1b[91m' + light_green = '\x1b[92m' + light_blue = '\x1b[94m' + bold = '\x1b[1m' + reset = '\x1b[0m' + + ind = '' + lines = [] + lines.append(f"{ind}cycles:") + ind += ' ' + for cycle in self.cycles.values(): + line = f"{ind}- {light_green}{bold}{cycle.name}{reset}" + if (date := cycle.date) is not None: + line += f" {light_green}[{date}]" + lines.append(line + f"{reset}:") + ind += ' ' + lines.append(f"{ind}tasks:") + ind += ' ' + for task in cycle.tasks: + line = f"{ind}- {light_red}{bold}{task.name}{reset}" + if (date := task.date) is not None: + line += f" {light_red}[{date}]" + lines.append(line + f"{reset}:") + ind += ' ' + lines.append(f"{ind}input:") + ind += ' ' + for data in task.inputs: + line = f"{ind}- {light_blue}{bold}{data.name}{reset}" + if (date := data.date) is not None: + line += f" {light_blue}[{date}]" + lines.append(line + f"{reset}") + ind = ind[:-2] + lines.append(f"{ind}output:") + ind += ' ' + for data in task.outputs: + line = f"{ind}- {light_blue}{bold}{data.name}{reset}" + if (date := data.date) is not None: + line += f" {light_blue}[{date}]" + lines.append(line + f"{reset}") + ind = ind[:-2] + if task.wait_on: + lines.append(f"{ind}wait on:") + ind += ' ' + for task in task.wait_on: + line = f"{ind}- {light_red}{bold}{task.name}{reset}" + if (date := task.date) is not None: + line += f" {light_red}[{date}]" + lines.append(line + f"{reset}") + ind = ind[:-2] + ind = ind[:-4] + ind = ind[:-4] + ind = ind[:-2] + ind = ind[:-2] + return '\n'.join(lines) + @classmethod def from_yaml(cls, config_path: str): return cls(load_workflow_config(config_path)) From 5b294e788c174386e275f6f8e06282d183c7689d Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 18:27:12 +0100 Subject: [PATCH 08/39] fix: allow referencing TimeSeries by absolute date --- src/sirocco/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index be12a15e..d88cdd78 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -181,7 +181,7 @@ def __getitem__(self, key: str | tuple(str, datetime|None)) -> Any: def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator(Any): name = spec.name if isinstance(self._dict[name], TimeSeries): - if ref_date is None: + if ref_date is None and spec.date is []: raise ValueError("TimeSeries object must be referenced by dates") for target_date in spec.resolve_target_dates(ref_date): yield self._dict[name][target_date] From da81e2ace8e7ebd77c19bb7368f97e57e458e0d7 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 21:45:36 +0100 Subject: [PATCH 09/39] ref:add: generic TimeSeries type and refactor __str__ methods --- src/sirocco/core.py | 122 ++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index d88cdd78..122ab93c 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,6 +1,6 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, TYPE_CHECKING +from typing import Any, TYPE_CHECKING, TypeVar, Generic from datetime import datetime @@ -17,12 +17,30 @@ if TYPE_CHECKING: from collections.abc import Generator, Iterable, Iterator - type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput +type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput +TimeSeriesObject = TypeVar('TimeSeriesObject') +light_red = '\x1b[91m' +light_green = '\x1b[92m' +light_blue = '\x1b[94m' +bold = '\x1b[1m' +reset = '\x1b[0m' -class Task: + +class NodeStr: + + def __str__(self): + ret_str = f"{self.color}{bold}{self.name}{reset}" + if self.date is not None: + ret_str += f" {self.color}[{self.date}]" + return ret_str + f"{reset}" + + +class Task(NodeStr): """Internal representation of a task node""" + color: str = light_red + name: str outputs: list[Data] inputs: list[Data] @@ -85,9 +103,10 @@ def link_wait_on_tasks(self): @dataclass(kw_only=True) -class Data: +class Data(NodeStr): """Internal representation of a data node""" + color: str = light_blue name: str type: str src: str @@ -104,16 +123,26 @@ def from_config(cls, config: _DataBaseModel, *, date: datetime = None): date=date, ) -# TODO metaclass to generate stores of specific data type (avoid `Any`) -class TimeSeries(): + +@dataclass(kw_only=True) +class Cycle(NodeStr): + """Internal reprenstation of a cycle""" + + color: str = light_green + name: str + tasks: list[Task] + date: datetime | None = None + + +class TimeSeries(Generic[TimeSeriesObject]): """Dictionnary of objects accessed by date, checking start and end dates""" - def __init__(self): + def __init__(self) -> None: self.start_date: datetime | None = None self.end_date: datetime | None = None - self._dict: dict[str: Any] = {} + self._dict: dict[str: TimeSeriesObject] = {} - def __setitem__(self, date: datetime, data: Any) -> None: + def __setitem__(self, date: datetime, data: TimeSeriesObject) -> None: if date in self._dict.keys(): raise KeyError(f"date {date} already used, cannot set twice") self._dict[date] = data @@ -125,10 +154,11 @@ def __setitem__(self, date: datetime, data: Any) -> None: elif date > self.end_date: self.end_date = date - def __getitem__(self, date: datetime) -> Any: + def __getitem__(self, date: datetime) -> TimeSeriesObject: if date < self.start_date or date > self.end_date: # TODO proper logging - print(f"WARNING: date {date} is out of bounds, ignoring.") + item = next(iter(self._dict.values())) + print(f"WARNING: date {date} for item {item.name} is out of bounds [{self.start_date} - {self.end_date}], ignoring.") return None if date not in self._dict: msg = f"date {date} not found" @@ -137,13 +167,13 @@ def __getitem__(self, date: datetime) -> Any: # TODO metaclass to generate stores of specific data type (avoid `Any`) -class Store: +class Store(Generic[TimeSeriesObject]): """Container for TimeSeries or unique data""" def __init__(self): - self._dict: dict[str, TimeSeries | Any] = {} + self._dict: dict[str, TimeSeries | TimeSeriesObject] = {} - def __setitem__(self, key: str | tuple(str, datetime|None), value: Any) -> None: + def __setitem__(self, key: str | tuple(str, datetime|None), value: TimeSeriesObject) -> None: if isinstance(key, tuple): name, date = key else: @@ -161,7 +191,7 @@ def __setitem__(self, key: str | tuple(str, datetime|None), value: Any) -> None: self._dict[name] = TimeSeries() self._dict[name][date] = value - def __getitem__(self, key: str | tuple(str, datetime|None)) -> Any: + def __getitem__(self, key: str | tuple(str, datetime|None)) -> TimeSeriesObject: if isinstance(key, tuple): name, date = key else: @@ -178,7 +208,7 @@ def __getitem__(self, key: str | tuple(str, datetime|None)) -> Any: raise KeyError(f"entry {name} is not a TimeSeries, cannot be accessed by date") return self._dict[name] - def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator(Any): + def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator(TimeSeriesObject): name = spec.name if isinstance(self._dict[name], TimeSeries): if ref_date is None and spec.date is []: @@ -199,25 +229,14 @@ def values(self) -> Iterator[Any]: yield item -@dataclass(kw_only=True) -class Cycle: - """Internal reprenstation of a cycle""" - - name: str - tasks: list[Task] - date: datetime | None = None - - class Workflow: """Internal reprensentation of a worflow""" def __init__(self, workflow_config: ConfigWorkflow) -> None: - self.tasks = Store() self.data = Store() self.cycles = Store() - ind = ' ' # 1 - create availalbe data nodes for data_config in workflow_config.data.available: self.data[data_config.name] = Data.from_config(data_config, date=None) @@ -248,54 +267,35 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: task.link_wait_on_tasks() def __str__(self): - light_red = '\x1b[91m' - light_green = '\x1b[92m' - light_blue = '\x1b[94m' - bold = '\x1b[1m' - reset = '\x1b[0m' - ind = '' lines = [] lines.append(f"{ind}cycles:") ind += ' ' for cycle in self.cycles.values(): - line = f"{ind}- {light_green}{bold}{cycle.name}{reset}" - if (date := cycle.date) is not None: - line += f" {light_green}[{date}]" - lines.append(line + f"{reset}:") + lines.append(f"{ind}- {cycle}:") ind += ' ' lines.append(f"{ind}tasks:") ind += ' ' for task in cycle.tasks: - line = f"{ind}- {light_red}{bold}{task.name}{reset}" - if (date := task.date) is not None: - line += f" {light_red}[{date}]" - lines.append(line + f"{reset}:") + lines.append(f"{ind}- {task}:") ind += ' ' - lines.append(f"{ind}input:") - ind += ' ' - for data in task.inputs: - line = f"{ind}- {light_blue}{bold}{data.name}{reset}" - if (date := data.date) is not None: - line += f" {light_blue}[{date}]" - lines.append(line + f"{reset}") - ind = ind[:-2] - lines.append(f"{ind}output:") - ind += ' ' - for data in task.outputs: - line = f"{ind}- {light_blue}{bold}{data.name}{reset}" - if (date := data.date) is not None: - line += f" {light_blue}[{date}]" - lines.append(line + f"{reset}") - ind = ind[:-2] + if task.inputs: + lines.append(f"{ind}input:") + ind += ' ' + for data in task.inputs: + lines.append(f"{ind}- {data}") + ind = ind[:-2] + if task.outputs: + lines.append(f"{ind}output:") + ind += ' ' + for data in task.outputs: + lines.append(f"{ind}- {data}") + ind = ind[:-2] if task.wait_on: lines.append(f"{ind}wait on:") ind += ' ' for task in task.wait_on: - line = f"{ind}- {light_red}{bold}{task.name}{reset}" - if (date := task.date) is not None: - line += f" {light_red}[{date}]" - lines.append(line + f"{reset}") + lines.append(f"{ind}- {task}") ind = ind[:-2] ind = ind[:-4] ind = ind[:-4] From f77d5769621d9dcd1944f7c2982d93f3857de8be Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 22:01:24 +0100 Subject: [PATCH 10/39] add: serialized data TODO: add test against serialized data --- tests/files/configs/test_config_large.txt | 433 ++++++++++++++++++++++ tests/files/configs/test_config_small.txt | 28 ++ tests/test_wc_workflow.py | 1 + 3 files changed, 462 insertions(+) create mode 100644 tests/files/configs/test_config_large.txt create mode 100644 tests/files/configs/test_config_small.txt diff --git a/tests/files/configs/test_config_large.txt b/tests/files/configs/test_config_large.txt new file mode 100644 index 00000000..446ddbe7 --- /dev/null +++ b/tests/files/configs/test_config_large.txt @@ -0,0 +1,433 @@ +cycles: + - init: + tasks: + - extpar: + input: + - obs_data + output: + - extpar_file + - icon_bimonthly [2025-01-01 00:00:00]: + tasks: + - preproc [2025-01-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2025-01-01 00:00:00] + - icon [2025-01-01 00:00:00]: + input: + - grid_file + - icon_input [2025-01-01 00:00:00] + output: + - stream_1 [2025-01-01 00:00:00] + - stream_2 [2025-01-01 00:00:00] + - icon_restart [2025-01-01 00:00:00] + - postproc_1 [2025-01-01 00:00:00]: + input: + - stream_1 [2025-01-01 00:00:00] + output: + - postout_1 [2025-01-01 00:00:00] + - store_and_clean_1 [2025-01-01 00:00:00]: + input: + - postout_1 [2025-01-01 00:00:00] + - stream_1 [2025-01-01 00:00:00] + - icon_input [2025-01-01 00:00:00] + output: + - stored_data_1 [2025-01-01 00:00:00] + - icon_bimonthly [2025-03-01 00:00:00]: + tasks: + - preproc [2025-03-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2025-03-01 00:00:00] + - icon [2025-03-01 00:00:00]: + input: + - grid_file + - icon_input [2025-03-01 00:00:00] + - icon_restart [2025-01-01 00:00:00] + output: + - stream_1 [2025-03-01 00:00:00] + - stream_2 [2025-03-01 00:00:00] + - icon_restart [2025-03-01 00:00:00] + - postproc_1 [2025-03-01 00:00:00]: + input: + - stream_1 [2025-03-01 00:00:00] + output: + - postout_1 [2025-03-01 00:00:00] + - store_and_clean_1 [2025-03-01 00:00:00]: + input: + - postout_1 [2025-03-01 00:00:00] + - stream_1 [2025-03-01 00:00:00] + - icon_input [2025-03-01 00:00:00] + output: + - stored_data_1 [2025-03-01 00:00:00] + - icon_bimonthly [2025-05-01 00:00:00]: + tasks: + - preproc [2025-05-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2025-05-01 00:00:00] + wait on: + - icon [2025-01-01 00:00:00] + - icon [2025-05-01 00:00:00]: + input: + - grid_file + - icon_input [2025-05-01 00:00:00] + - icon_restart [2025-03-01 00:00:00] + output: + - stream_1 [2025-05-01 00:00:00] + - stream_2 [2025-05-01 00:00:00] + - icon_restart [2025-05-01 00:00:00] + - postproc_1 [2025-05-01 00:00:00]: + input: + - stream_1 [2025-05-01 00:00:00] + output: + - postout_1 [2025-05-01 00:00:00] + - store_and_clean_1 [2025-05-01 00:00:00]: + input: + - postout_1 [2025-05-01 00:00:00] + - stream_1 [2025-05-01 00:00:00] + - icon_input [2025-05-01 00:00:00] + output: + - stored_data_1 [2025-05-01 00:00:00] + - icon_bimonthly [2025-07-01 00:00:00]: + tasks: + - preproc [2025-07-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2025-07-01 00:00:00] + wait on: + - icon [2025-03-01 00:00:00] + - icon [2025-07-01 00:00:00]: + input: + - grid_file + - icon_input [2025-07-01 00:00:00] + - icon_restart [2025-05-01 00:00:00] + output: + - stream_1 [2025-07-01 00:00:00] + - stream_2 [2025-07-01 00:00:00] + - icon_restart [2025-07-01 00:00:00] + - postproc_1 [2025-07-01 00:00:00]: + input: + - stream_1 [2025-07-01 00:00:00] + output: + - postout_1 [2025-07-01 00:00:00] + - store_and_clean_1 [2025-07-01 00:00:00]: + input: + - postout_1 [2025-07-01 00:00:00] + - stream_1 [2025-07-01 00:00:00] + - icon_input [2025-07-01 00:00:00] + output: + - stored_data_1 [2025-07-01 00:00:00] + - icon_bimonthly [2025-09-01 00:00:00]: + tasks: + - preproc [2025-09-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2025-09-01 00:00:00] + wait on: + - icon [2025-05-01 00:00:00] + - icon [2025-09-01 00:00:00]: + input: + - grid_file + - icon_input [2025-09-01 00:00:00] + - icon_restart [2025-07-01 00:00:00] + output: + - stream_1 [2025-09-01 00:00:00] + - stream_2 [2025-09-01 00:00:00] + - icon_restart [2025-09-01 00:00:00] + - postproc_1 [2025-09-01 00:00:00]: + input: + - stream_1 [2025-09-01 00:00:00] + output: + - postout_1 [2025-09-01 00:00:00] + - store_and_clean_1 [2025-09-01 00:00:00]: + input: + - postout_1 [2025-09-01 00:00:00] + - stream_1 [2025-09-01 00:00:00] + - icon_input [2025-09-01 00:00:00] + output: + - stored_data_1 [2025-09-01 00:00:00] + - icon_bimonthly [2025-11-01 00:00:00]: + tasks: + - preproc [2025-11-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2025-11-01 00:00:00] + wait on: + - icon [2025-07-01 00:00:00] + - icon [2025-11-01 00:00:00]: + input: + - grid_file + - icon_input [2025-11-01 00:00:00] + - icon_restart [2025-09-01 00:00:00] + output: + - stream_1 [2025-11-01 00:00:00] + - stream_2 [2025-11-01 00:00:00] + - icon_restart [2025-11-01 00:00:00] + - postproc_1 [2025-11-01 00:00:00]: + input: + - stream_1 [2025-11-01 00:00:00] + output: + - postout_1 [2025-11-01 00:00:00] + - store_and_clean_1 [2025-11-01 00:00:00]: + input: + - postout_1 [2025-11-01 00:00:00] + - stream_1 [2025-11-01 00:00:00] + - icon_input [2025-11-01 00:00:00] + output: + - stored_data_1 [2025-11-01 00:00:00] + - icon_bimonthly [2026-01-01 00:00:00]: + tasks: + - preproc [2026-01-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2026-01-01 00:00:00] + wait on: + - icon [2025-09-01 00:00:00] + - icon [2026-01-01 00:00:00]: + input: + - grid_file + - icon_input [2026-01-01 00:00:00] + - icon_restart [2025-11-01 00:00:00] + output: + - stream_1 [2026-01-01 00:00:00] + - stream_2 [2026-01-01 00:00:00] + - icon_restart [2026-01-01 00:00:00] + - postproc_1 [2026-01-01 00:00:00]: + input: + - stream_1 [2026-01-01 00:00:00] + output: + - postout_1 [2026-01-01 00:00:00] + - store_and_clean_1 [2026-01-01 00:00:00]: + input: + - postout_1 [2026-01-01 00:00:00] + - stream_1 [2026-01-01 00:00:00] + - icon_input [2026-01-01 00:00:00] + output: + - stored_data_1 [2026-01-01 00:00:00] + - icon_bimonthly [2026-03-01 00:00:00]: + tasks: + - preproc [2026-03-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2026-03-01 00:00:00] + wait on: + - icon [2025-11-01 00:00:00] + - icon [2026-03-01 00:00:00]: + input: + - grid_file + - icon_input [2026-03-01 00:00:00] + - icon_restart [2026-01-01 00:00:00] + output: + - stream_1 [2026-03-01 00:00:00] + - stream_2 [2026-03-01 00:00:00] + - icon_restart [2026-03-01 00:00:00] + - postproc_1 [2026-03-01 00:00:00]: + input: + - stream_1 [2026-03-01 00:00:00] + output: + - postout_1 [2026-03-01 00:00:00] + - store_and_clean_1 [2026-03-01 00:00:00]: + input: + - postout_1 [2026-03-01 00:00:00] + - stream_1 [2026-03-01 00:00:00] + - icon_input [2026-03-01 00:00:00] + output: + - stored_data_1 [2026-03-01 00:00:00] + - icon_bimonthly [2026-05-01 00:00:00]: + tasks: + - preproc [2026-05-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2026-05-01 00:00:00] + wait on: + - icon [2026-01-01 00:00:00] + - icon [2026-05-01 00:00:00]: + input: + - grid_file + - icon_input [2026-05-01 00:00:00] + - icon_restart [2026-03-01 00:00:00] + output: + - stream_1 [2026-05-01 00:00:00] + - stream_2 [2026-05-01 00:00:00] + - icon_restart [2026-05-01 00:00:00] + - postproc_1 [2026-05-01 00:00:00]: + input: + - stream_1 [2026-05-01 00:00:00] + output: + - postout_1 [2026-05-01 00:00:00] + - store_and_clean_1 [2026-05-01 00:00:00]: + input: + - postout_1 [2026-05-01 00:00:00] + - stream_1 [2026-05-01 00:00:00] + - icon_input [2026-05-01 00:00:00] + output: + - stored_data_1 [2026-05-01 00:00:00] + - icon_bimonthly [2026-07-01 00:00:00]: + tasks: + - preproc [2026-07-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2026-07-01 00:00:00] + wait on: + - icon [2026-03-01 00:00:00] + - icon [2026-07-01 00:00:00]: + input: + - grid_file + - icon_input [2026-07-01 00:00:00] + - icon_restart [2026-05-01 00:00:00] + output: + - stream_1 [2026-07-01 00:00:00] + - stream_2 [2026-07-01 00:00:00] + - icon_restart [2026-07-01 00:00:00] + - postproc_1 [2026-07-01 00:00:00]: + input: + - stream_1 [2026-07-01 00:00:00] + output: + - postout_1 [2026-07-01 00:00:00] + - store_and_clean_1 [2026-07-01 00:00:00]: + input: + - postout_1 [2026-07-01 00:00:00] + - stream_1 [2026-07-01 00:00:00] + - icon_input [2026-07-01 00:00:00] + output: + - stored_data_1 [2026-07-01 00:00:00] + - icon_bimonthly [2026-09-01 00:00:00]: + tasks: + - preproc [2026-09-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2026-09-01 00:00:00] + wait on: + - icon [2026-05-01 00:00:00] + - icon [2026-09-01 00:00:00]: + input: + - grid_file + - icon_input [2026-09-01 00:00:00] + - icon_restart [2026-07-01 00:00:00] + output: + - stream_1 [2026-09-01 00:00:00] + - stream_2 [2026-09-01 00:00:00] + - icon_restart [2026-09-01 00:00:00] + - postproc_1 [2026-09-01 00:00:00]: + input: + - stream_1 [2026-09-01 00:00:00] + output: + - postout_1 [2026-09-01 00:00:00] + - store_and_clean_1 [2026-09-01 00:00:00]: + input: + - postout_1 [2026-09-01 00:00:00] + - stream_1 [2026-09-01 00:00:00] + - icon_input [2026-09-01 00:00:00] + output: + - stored_data_1 [2026-09-01 00:00:00] + - icon_bimonthly [2026-11-01 00:00:00]: + tasks: + - preproc [2026-11-01 00:00:00]: + input: + - grid_file + - extpar_file + - ERA5 + output: + - icon_input [2026-11-01 00:00:00] + wait on: + - icon [2026-07-01 00:00:00] + - icon [2026-11-01 00:00:00]: + input: + - grid_file + - icon_input [2026-11-01 00:00:00] + - icon_restart [2026-09-01 00:00:00] + output: + - stream_1 [2026-11-01 00:00:00] + - stream_2 [2026-11-01 00:00:00] + - icon_restart [2026-11-01 00:00:00] + - postproc_1 [2026-11-01 00:00:00]: + input: + - stream_1 [2026-11-01 00:00:00] + output: + - postout_1 [2026-11-01 00:00:00] + - store_and_clean_1 [2026-11-01 00:00:00]: + input: + - postout_1 [2026-11-01 00:00:00] + - stream_1 [2026-11-01 00:00:00] + - icon_input [2026-11-01 00:00:00] + output: + - stored_data_1 [2026-11-01 00:00:00] + - yearly [2025-01-01 00:00:00]: + tasks: + - postproc_2 [2025-01-01 00:00:00]: + input: + - stream_2 [2025-01-01 00:00:00] + - stream_2 [2025-03-01 00:00:00] + - stream_2 [2025-05-01 00:00:00] + - stream_2 [2025-07-01 00:00:00] + - stream_2 [2025-09-01 00:00:00] + - stream_2 [2025-11-01 00:00:00] + output: + - postout_2 [2025-01-01 00:00:00] + - store_and_clean_2 [2025-01-01 00:00:00]: + input: + - postout_2 [2025-01-01 00:00:00] + - stream_2 [2025-01-01 00:00:00] + - stream_2 [2025-03-01 00:00:00] + - stream_2 [2025-05-01 00:00:00] + - stream_2 [2025-07-01 00:00:00] + - stream_2 [2025-09-01 00:00:00] + - stream_2 [2025-11-01 00:00:00] + output: + - stored_data_2 [2025-01-01 00:00:00] + - yearly [2026-01-01 00:00:00]: + tasks: + - postproc_2 [2026-01-01 00:00:00]: + input: + - stream_2 [2026-01-01 00:00:00] + - stream_2 [2026-03-01 00:00:00] + - stream_2 [2026-05-01 00:00:00] + - stream_2 [2026-07-01 00:00:00] + - stream_2 [2026-09-01 00:00:00] + - stream_2 [2026-11-01 00:00:00] + output: + - postout_2 [2026-01-01 00:00:00] + - store_and_clean_2 [2026-01-01 00:00:00]: + input: + - postout_2 [2026-01-01 00:00:00] + - stream_2 [2026-01-01 00:00:00] + - stream_2 [2026-03-01 00:00:00] + - stream_2 [2026-05-01 00:00:00] + - stream_2 [2026-07-01 00:00:00] + - stream_2 [2026-09-01 00:00:00] + - stream_2 [2026-11-01 00:00:00] + output: + - stored_data_2 [2026-01-01 00:00:00] \ No newline at end of file diff --git a/tests/files/configs/test_config_small.txt b/tests/files/configs/test_config_small.txt new file mode 100644 index 00000000..3559fa03 --- /dev/null +++ b/tests/files/configs/test_config_small.txt @@ -0,0 +1,28 @@ +cycles: + - bimonthly_tasks [2026-01-01 00:00:00]: + tasks: + - icon [2026-01-01 00:00:00]: + output: + - icon_output [2026-01-01 00:00:00] + - icon_restart [2026-01-01 00:00:00] + - bimonthly_tasks [2026-03-01 00:00:00]: + tasks: + - icon [2026-03-01 00:00:00]: + input: + - icon_restart [2026-01-01 00:00:00] + output: + - icon_output [2026-03-01 00:00:00] + - icon_restart [2026-03-01 00:00:00] + - bimonthly_tasks [2026-05-01 00:00:00]: + tasks: + - icon [2026-05-01 00:00:00]: + input: + - icon_restart [2026-03-01 00:00:00] + output: + - icon_output [2026-05-01 00:00:00] + - icon_restart [2026-05-01 00:00:00] + - lastly: + tasks: + - cleanup: + wait on: + - icon [2026-05-01 00:00:00] \ No newline at end of file diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index 48276860..c9692202 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -14,3 +14,4 @@ def config_file_small(): def test_parse_config_file(config_file): config_workflow = load_workflow_config(config_file) core_workflow = Workflow(config_workflow) + # TODO add test to compare str(core_workflow) against "tests/files/configs/test_config_xxx.txt" From 0a3bb69c62d2b248b8f3699c31c58eace71efdb9 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 22:04:25 +0100 Subject: [PATCH 11/39] del: cleanup --- src/sirocco/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 122ab93c..06832a80 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -166,7 +166,6 @@ def __getitem__(self, date: datetime) -> TimeSeriesObject: return self._dict[date] -# TODO metaclass to generate stores of specific data type (avoid `Any`) class Store(Generic[TimeSeriesObject]): """Container for TimeSeries or unique data""" From 3e54506b4dd1bd7a385d31b6df80215d1375e058 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 22:11:53 +0100 Subject: [PATCH 12/39] fix: hatch fmt --- src/sirocco/core.py | 20 +++++++++----------- src/sirocco/parsing/_yaml_data_models.py | 3 +++ tests/test_wc_workflow.py | 3 ++- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 06832a80..36ed1d26 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,22 +1,21 @@ from __future__ import annotations -from dataclasses import dataclass -from typing import Any, TYPE_CHECKING, TypeVar, Generic +from dataclasses import dataclass from datetime import datetime +from typing import TYPE_CHECKING, Any, Generic, TypeVar from sirocco.parsing._yaml_data_models import ( - _DataBaseModel, - ConfigCycleTaskDepend, ConfigCycleTask, + ConfigCycleTaskDepend, ConfigCycleTaskInput, - ConfigData, ConfigTask, ConfigWorkflow, + _DataBaseModel, load_workflow_config, ) if TYPE_CHECKING: - from collections.abc import Generator, Iterable, Iterator + from collections.abc import Iterator type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput TimeSeriesObject = TypeVar('TimeSeriesObject') @@ -183,12 +182,11 @@ def __setitem__(self, key: str | tuple(str, datetime|None), value: TimeSeriesObj if date is None: raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") self._dict[name][date] = value + elif date is None: + self._dict[name] = value else: - if date is None: - self._dict[name] = value - else: - self._dict[name] = TimeSeries() - self._dict[name][date] = value + self._dict[name] = TimeSeries() + self._dict[name][date] = value def __getitem__(self, key: str | tuple(str, datetime|None)) -> TimeSeriesObject: if isinstance(key, tuple): diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index d2b378f1..204c2a64 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -1,5 +1,7 @@ from __future__ import annotations + from typing import TYPE_CHECKING + if TYPE_CHECKING: from collections.abc import Iterator @@ -12,6 +14,7 @@ from isoduration import parse_duration from isoduration.types import Duration # pydantic needs type # noqa: TCH002 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + from ._utils import TimeUtils diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index c9692202..bf75f598 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -1,7 +1,8 @@ import pytest -from sirocco.parsing import load_workflow_config from sirocco.core import Workflow +from sirocco.parsing import load_workflow_config + @pytest.fixture def config_file_small(): From 49394c8e732a159834d7a686fe55fa6b9a8ebb46 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 22:36:34 +0100 Subject: [PATCH 13/39] fix: hatch fmt --- src/sirocco/core.py | 63 +++++++++++++----------- src/sirocco/parsing/_yaml_data_models.py | 13 +++-- tests/test_wc_workflow.py | 2 +- 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 36ed1d26..4e97263c 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,8 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from datetime import datetime -from typing import TYPE_CHECKING, Any, Generic, TypeVar +from typing import TYPE_CHECKING, Generic, TypeVar from sirocco.parsing._yaml_data_models import ( ConfigCycleTask, @@ -16,6 +15,8 @@ if TYPE_CHECKING: from collections.abc import Iterator + from datetime import datetime + type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput TimeSeriesObject = TypeVar('TimeSeriesObject') @@ -45,8 +46,8 @@ class Task(NodeStr): inputs: list[Data] wait_on: list[Task] date: datetime | None = None - # TODO This list is too long. We should start with the set of supported - # keywords and extend it as we support more + # TODO: This list is too long. We should start with the set of supported + # keywords and extend it as we support more command: str | None = None command_option: str | None = None input_arg_options: dict[str, str] | None = None @@ -113,7 +114,7 @@ class Data(NodeStr): date: datetime | None = None @classmethod - def from_config(cls, config: _DataBaseModel, *, date: datetime = None): + def from_config(cls, config: _DataBaseModel, *, date: datetime | None = None): return cls( name=config.name, type=config.type, @@ -142,8 +143,9 @@ def __init__(self) -> None: self._dict: dict[str: TimeSeriesObject] = {} def __setitem__(self, date: datetime, data: TimeSeriesObject) -> None: - if date in self._dict.keys(): - raise KeyError(f"date {date} already used, cannot set twice") + if date in self._dict: + msg = f"date {date} already used, cannot set twice" + raise KeyError(msg) self._dict[date] = data if self.start_date is None: self.start_date = date @@ -155,7 +157,7 @@ def __setitem__(self, date: datetime, data: TimeSeriesObject) -> None: def __getitem__(self, date: datetime) -> TimeSeriesObject: if date < self.start_date or date > self.end_date: - # TODO proper logging + # TODO: add proper logging for warnings, info, etc item = next(iter(self._dict.values())) print(f"WARNING: date {date} for item {item.name} is out of bounds [{self.start_date} - {self.end_date}], ignoring.") return None @@ -164,6 +166,9 @@ def __getitem__(self, date: datetime) -> TimeSeriesObject: raise KeyError(msg) return self._dict[date] + def values(self) -> Iterator[TimeSeriesObject]: + yield from self._dict.values() + class Store(Generic[TimeSeriesObject]): """Container for TimeSeries or unique data""" @@ -178,9 +183,11 @@ def __setitem__(self, key: str | tuple(str, datetime|None), value: TimeSeriesObj name, date = key, None if name in self._dict: if not isinstance(self._dict[name], TimeSeries): - raise KeyError(f"single entry {name} already set") + msg = f"single entry {name} already set" + raise KeyError(msg) if date is None: - raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + msg = f"entry {name} is a TimeSeries, must be accessed by date" + raise KeyError(msg) self._dict[name][date] = value elif date is None: self._dict[name] = value @@ -195,33 +202,36 @@ def __getitem__(self, key: str | tuple(str, datetime|None)) -> TimeSeriesObject: name, date = key, None if name not in self._dict: - raise KeyError(f"entry {name} not found in Store") + msg = f"entry {name} not found in Store" + raise KeyError(msg) if isinstance(self._dict[name], TimeSeries): if date is None: - raise KeyError(f"entry {name} is a TimeSeries, must be accessed by date") + msg = f"entry {name} is a TimeSeries, must be accessed by date" + raise KeyError(msg) return self._dict[name][date] - else: - if date is not None: - raise KeyError(f"entry {name} is not a TimeSeries, cannot be accessed by date") - return self._dict[name] + if date is not None: + msg = f"entry {name} is not a TimeSeries, cannot be accessed by date" + raise KeyError(msg) + return self._dict[name] - def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator(TimeSeriesObject): + def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator[TimeSeriesObject]: name = spec.name if isinstance(self._dict[name], TimeSeries): if ref_date is None and spec.date is []: - raise ValueError("TimeSeries object must be referenced by dates") + msg = "TimeSeries object must be referenced by dates" + raise ValueError(msg) for target_date in spec.resolve_target_dates(ref_date): yield self._dict[name][target_date] else: if spec.lag or spec.date: - raise ValueError(f"item {name} is not a TimeSeries, cannot be referenced via date or lag") + msg = f"item {name} is not a TimeSeries, cannot be referenced via date or lag" + raise ValueError(msg) yield self._dict[name] - def values(self) -> Iterator[Any]: + def values(self) -> Iterator[TimeSeriesObject]: for item in self._dict.values(): if isinstance(item, TimeSeries): - for subitem in item._dict.values(): - yield subitem + yield from item.values() else: yield item @@ -279,20 +289,17 @@ def __str__(self): if task.inputs: lines.append(f"{ind}input:") ind += ' ' - for data in task.inputs: - lines.append(f"{ind}- {data}") + lines.extend(f"{ind}- {data}" for data in task.inputs) ind = ind[:-2] if task.outputs: lines.append(f"{ind}output:") ind += ' ' - for data in task.outputs: - lines.append(f"{ind}- {data}") + lines.extend(f"{ind}- {data}" for data in task.outputs) ind = ind[:-2] if task.wait_on: lines.append(f"{ind}wait on:") ind += ' ' - for task in task.wait_on: - lines.append(f"{ind}- {task}") + lines.extend(f"{ind}- {wait_task}" for wait_task in task.wait_on) ind = ind[:-2] ind = ind[:-4] ind = ind[:-4] diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index 204c2a64..b3a27057 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -84,11 +84,10 @@ def resolve_target_dates(self, ref_date: datetime | None) -> Iterator[datetime]: if not self.lag and not self.date: yield ref_date if self.lag: - for l in self.lag: - yield ref_date + l + for lag in self.lag: + yield ref_date + lag if self.date: - for d in self.date: - yield d + yield from self.date class ConfigTask(_NamedBaseModel): @@ -96,8 +95,8 @@ class ConfigTask(_NamedBaseModel): To create an instance of a task defined in a workflow file """ - # TODO This list is too large. We should start with the set of supported - # keywords and extend it as we support more + # TODO: This list is too large. We should start with the set of supported + # keywords and extend it as we support more command: str command_option: str | None = None input_arg_options: dict[str, str] | None = None @@ -173,7 +172,7 @@ class ConfigCycleTaskDepend(_NamedBaseModel, _LagDateBaseModel): To create an instance of a input or output in a task in a cycle defined in a workflow file. """ - # TODO Move to "wait_on" keyword in yaml instead of "depend" + # TODO: Move to "wait_on" keyword in yaml instead of "depend" name: str # name of the task it depends on cycle_name: str | None = None diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index bf75f598..037d87e4 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -15,4 +15,4 @@ def config_file_small(): def test_parse_config_file(config_file): config_workflow = load_workflow_config(config_file) core_workflow = Workflow(config_workflow) - # TODO add test to compare str(core_workflow) against "tests/files/configs/test_config_xxx.txt" + # TODO: add test to compare str(core_workflow) against "tests/files/configs/test_config_xxx.txt" From 3b348f6c3946e69c76ab6f047d2b9e9a30426ccc Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 4 Nov 2024 22:38:57 +0100 Subject: [PATCH 14/39] fix: hatch fmt --- tests/test_wc_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index 037d87e4..a6444aa3 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -14,5 +14,5 @@ def config_file_small(): ) def test_parse_config_file(config_file): config_workflow = load_workflow_config(config_file) - core_workflow = Workflow(config_workflow) + _ = Workflow(config_workflow) # TODO: add test to compare str(core_workflow) against "tests/files/configs/test_config_xxx.txt" From 1025f67c2fdb24465535471f164c357a45150b5b Mon Sep 17 00:00:00 2001 From: Matthieu Date: Tue, 5 Nov 2024 09:26:12 +0100 Subject: [PATCH 15/39] add: logging --- src/sirocco/core.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 4e97263c..49c524ef 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,4 +1,7 @@ from __future__ import annotations +import logging +logging.basicConfig() +logger = logging.getLogger(__name__) from dataclasses import dataclass from typing import TYPE_CHECKING, Generic, TypeVar @@ -29,6 +32,8 @@ class NodeStr: + color: str + def __str__(self): ret_str = f"{self.color}{bold}{self.name}{reset}" if self.date is not None: @@ -157,9 +162,8 @@ def __setitem__(self, date: datetime, data: TimeSeriesObject) -> None: def __getitem__(self, date: datetime) -> TimeSeriesObject: if date < self.start_date or date > self.end_date: - # TODO: add proper logging for warnings, info, etc item = next(iter(self._dict.values())) - print(f"WARNING: date {date} for item {item.name} is out of bounds [{self.start_date} - {self.end_date}], ignoring.") + logger.warning(f"date {date} for item '{item.name}' is out of bounds [{self.start_date} - {self.end_date}], ignoring.") return None if date not in self._dict: msg = f"date {date} not found" From 07d45ef665288e4ec90dee2707916c5c745f0d41 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Tue, 5 Nov 2024 09:30:45 +0100 Subject: [PATCH 16/39] fix: hatch fmt --- src/sirocco/core.py | 58 ++++++++++++------------ src/sirocco/parsing/_yaml_data_models.py | 6 ++- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 49c524ef..0db0cf83 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,8 +1,6 @@ from __future__ import annotations -import logging -logging.basicConfig() -logger = logging.getLogger(__name__) +import logging from dataclasses import dataclass from typing import TYPE_CHECKING, Generic, TypeVar @@ -20,18 +18,21 @@ from collections.abc import Iterator from datetime import datetime + +logging.basicConfig() +logger = logging.getLogger(__name__) + type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput -TimeSeriesObject = TypeVar('TimeSeriesObject') +TimeSeriesObject = TypeVar("TimeSeriesObject") -light_red = '\x1b[91m' -light_green = '\x1b[92m' -light_blue = '\x1b[94m' -bold = '\x1b[1m' -reset = '\x1b[0m' +light_red = "\x1b[91m" +light_green = "\x1b[92m" +light_blue = "\x1b[94m" +bold = "\x1b[1m" +reset = "\x1b[0m" class NodeStr: - color: str def __str__(self): @@ -66,11 +67,7 @@ class Task(NodeStr): src: str | None = None conda_env: str | None = None - def __init__(self, - config: ConfigTask, - task_ref: ConfigCycleTask, - workflow: Workflow, - date: datetime | None = None): + def __init__(self, config: ConfigTask, task_ref: ConfigCycleTask, workflow: Workflow, date: datetime | None = None): self.name = config.name self.date = date self.inputs = [] @@ -145,7 +142,7 @@ class TimeSeries(Generic[TimeSeriesObject]): def __init__(self) -> None: self.start_date: datetime | None = None self.end_date: datetime | None = None - self._dict: dict[str: TimeSeriesObject] = {} + self._dict: dict[str:TimeSeriesObject] = {} def __setitem__(self, date: datetime, data: TimeSeriesObject) -> None: if date in self._dict: @@ -163,7 +160,10 @@ def __setitem__(self, date: datetime, data: TimeSeriesObject) -> None: def __getitem__(self, date: datetime) -> TimeSeriesObject: if date < self.start_date or date > self.end_date: item = next(iter(self._dict.values())) - logger.warning(f"date {date} for item '{item.name}' is out of bounds [{self.start_date} - {self.end_date}], ignoring.") + msg = ( + f"date {date} for item '{item.name}' is out of bounds [{self.start_date} - {self.end_date}], ignoring." + ) + logger.warning(msg) return None if date not in self._dict: msg = f"date {date} not found" @@ -180,7 +180,7 @@ class Store(Generic[TimeSeriesObject]): def __init__(self): self._dict: dict[str, TimeSeries | TimeSeriesObject] = {} - def __setitem__(self, key: str | tuple(str, datetime|None), value: TimeSeriesObject) -> None: + def __setitem__(self, key: str | tuple(str, datetime | None), value: TimeSeriesObject) -> None: if isinstance(key, tuple): name, date = key else: @@ -199,7 +199,7 @@ def __setitem__(self, key: str | tuple(str, datetime|None), value: TimeSeriesObj self._dict[name] = TimeSeries() self._dict[name][date] = value - def __getitem__(self, key: str | tuple(str, datetime|None)) -> TimeSeriesObject: + def __getitem__(self, key: str | tuple(str, datetime | None)) -> TimeSeriesObject: if isinstance(key, tuple): name, date = key else: @@ -218,7 +218,7 @@ def __getitem__(self, key: str | tuple(str, datetime|None)) -> TimeSeriesObject: raise KeyError(msg) return self._dict[name] - def get(self, spec: ConfigCycleSpec, ref_date: datetime|None = None) -> Iterator[TimeSeriesObject]: + def get(self, spec: ConfigCycleSpec, ref_date: datetime | None = None) -> Iterator[TimeSeriesObject]: name = spec.name if isinstance(self._dict[name], TimeSeries): if ref_date is None and spec.date is []: @@ -278,38 +278,38 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: task.link_wait_on_tasks() def __str__(self): - ind = '' + ind = "" lines = [] lines.append(f"{ind}cycles:") - ind += ' ' + ind += " " for cycle in self.cycles.values(): lines.append(f"{ind}- {cycle}:") - ind += ' ' + ind += " " lines.append(f"{ind}tasks:") - ind += ' ' + ind += " " for task in cycle.tasks: lines.append(f"{ind}- {task}:") - ind += ' ' + ind += " " if task.inputs: lines.append(f"{ind}input:") - ind += ' ' + ind += " " lines.extend(f"{ind}- {data}" for data in task.inputs) ind = ind[:-2] if task.outputs: lines.append(f"{ind}output:") - ind += ' ' + ind += " " lines.extend(f"{ind}- {data}" for data in task.outputs) ind = ind[:-2] if task.wait_on: lines.append(f"{ind}wait on:") - ind += ' ' + ind += " " lines.extend(f"{ind}- {wait_task}" for wait_task in task.wait_on) ind = ind[:-2] ind = ind[:-4] ind = ind[:-4] ind = ind[:-2] ind = ind[:-2] - return '\n'.join(lines) + return "\n".join(lines) @classmethod def from_yaml(cls, config_path: str): diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index b3a27057..d44587d5 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -277,7 +277,7 @@ def check_start_date_end_date_period_combination(cls, data: Any) -> Any: if ("start_date" in data) ^ ("end_date" in data): msg = f"in cycle {data['name']}: both start_date and end_date must be provided or none of them." raise ValueError(msg) - if 'period' in data and 'start_date' not in data: + if "period" in data and "start_date" not in data: msg = f"in cycle {data['name']}: period provided without start and end dates." return data @@ -312,7 +312,9 @@ class ConfigWorkflow(BaseModel): @model_validator(mode="after") def build_internal_dicts(self) -> ConfigWorkflow: - self.data_dict = {data.name: data for data in self.data.available} | {data.name: data for data in self.data.generated} + self.data_dict = {data.name: data for data in self.data.available} | { + data.name: data for data in self.data.generated + } self.task_dict = {task.name: task for task in self.tasks} return self From 458aaa5ebb10676de67f40ac28b4bc59326827f9 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 15:25:55 +0100 Subject: [PATCH 17/39] ref: string handling --- pyproject.toml | 3 +- src/sirocco/core.py | 51 +- tests/files/configs/test_config_large.txt | 618 +++++++++++----------- tests/files/configs/test_config_small.txt | 34 +- 4 files changed, 359 insertions(+), 347 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4530d4b8..c3997f1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,8 @@ dependencies = [ "isoduration", "pydantic", "pydantic-yaml", - "aiida-core>=2.5" + "aiida-core>=2.5", + "termcolor" ] [project.urls] Repository = "https://github.com/C2SM/Sirocco.git" diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 0db0cf83..8b4f73e5 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -2,7 +2,9 @@ import logging from dataclasses import dataclass -from typing import TYPE_CHECKING, Generic, TypeVar +from typing import TYPE_CHECKING, Generic, Literal, TypeVar + +from termcolor import colored from sirocco.parsing._yaml_data_models import ( ConfigCycleTask, @@ -25,27 +27,26 @@ type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput TimeSeriesObject = TypeVar("TimeSeriesObject") -light_red = "\x1b[91m" -light_green = "\x1b[92m" -light_blue = "\x1b[94m" -bold = "\x1b[1m" -reset = "\x1b[0m" - class NodeStr: color: str - def __str__(self): - ret_str = f"{self.color}{bold}{self.name}{reset}" + def _str_pretty_(self) -> str: + repr_str = colored(self.name, self.color, attrs=["bold"]) if self.date is not None: - ret_str += f" {self.color}[{self.date}]" - return ret_str + f"{reset}" + repr_str += colored(f" [{self.date}]", self.color) + return repr_str + + def __str__(self) -> str: + if self.date is None: + return self.name + return f"{self.name} [{self.date}]" class Task(NodeStr): """Internal representation of a task node""" - color: str = light_red + color: str = "light_red" name: str outputs: list[Data] @@ -108,7 +109,7 @@ def link_wait_on_tasks(self): class Data(NodeStr): """Internal representation of a data node""" - color: str = light_blue + color: str = "light_blue" name: str type: str src: str @@ -130,7 +131,7 @@ def from_config(cls, config: _DataBaseModel, *, date: datetime | None = None): class Cycle(NodeStr): """Internal reprenstation of a cycle""" - color: str = light_green + color: str = "light_green" name: str tasks: list[Task] date: datetime | None = None @@ -277,33 +278,34 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: for task in self.tasks.values(): task.link_wait_on_tasks() - def __str__(self): + def _str_from_method(self, method_name: Literal["__str__", "_str_pretty_"]) -> str: + str_method = getattr(NodeStr, method_name) ind = "" lines = [] lines.append(f"{ind}cycles:") ind += " " for cycle in self.cycles.values(): - lines.append(f"{ind}- {cycle}:") + lines.append(f"{ind}- {str_method(cycle)}:") ind += " " lines.append(f"{ind}tasks:") ind += " " for task in cycle.tasks: - lines.append(f"{ind}- {task}:") + lines.append(f"{ind}- {str_method(task)}:") ind += " " if task.inputs: lines.append(f"{ind}input:") ind += " " - lines.extend(f"{ind}- {data}" for data in task.inputs) + lines.extend(f"{ind}- {str_method(data)}" for data in task.inputs) ind = ind[:-2] if task.outputs: lines.append(f"{ind}output:") ind += " " - lines.extend(f"{ind}- {data}" for data in task.outputs) + lines.extend(f"{ind}- {str_method(data)}" for data in task.outputs) ind = ind[:-2] if task.wait_on: lines.append(f"{ind}wait on:") ind += " " - lines.extend(f"{ind}- {wait_task}" for wait_task in task.wait_on) + lines.extend(f"{ind}- {str_method(wait_task)}" for wait_task in task.wait_on) ind = ind[:-2] ind = ind[:-4] ind = ind[:-4] @@ -311,6 +313,15 @@ def __str__(self): ind = ind[:-2] return "\n".join(lines) + def __str__(self): + return self._str_from_method("__str__") + + def _str_pretty_(self): + return self._str_from_method("_str_pretty_") + + def _repr_pretty_(self, p, cycle): + p.text(self._str_pretty_() if not cycle else "...") + @classmethod def from_yaml(cls, config_path: str): return cls(load_workflow_config(config_path)) diff --git a/tests/files/configs/test_config_large.txt b/tests/files/configs/test_config_large.txt index 446ddbe7..8f227b94 100644 --- a/tests/files/configs/test_config_large.txt +++ b/tests/files/configs/test_config_large.txt @@ -1,433 +1,433 @@ cycles: - - init: + - init: tasks: - - extpar: + - extpar: input: - - obs_data + - obs_data output: - - extpar_file - - icon_bimonthly [2025-01-01 00:00:00]: + - extpar_file + - icon_bimonthly [2025-01-01 00:00:00]: tasks: - - preproc [2025-01-01 00:00:00]: + - preproc [2025-01-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2025-01-01 00:00:00] - - icon [2025-01-01 00:00:00]: + - icon_input [2025-01-01 00:00:00] + - icon [2025-01-01 00:00:00]: input: - - grid_file - - icon_input [2025-01-01 00:00:00] + - grid_file + - icon_input [2025-01-01 00:00:00] output: - - stream_1 [2025-01-01 00:00:00] - - stream_2 [2025-01-01 00:00:00] - - icon_restart [2025-01-01 00:00:00] - - postproc_1 [2025-01-01 00:00:00]: + - stream_1 [2025-01-01 00:00:00] + - stream_2 [2025-01-01 00:00:00] + - icon_restart [2025-01-01 00:00:00] + - postproc_1 [2025-01-01 00:00:00]: input: - - stream_1 [2025-01-01 00:00:00] + - stream_1 [2025-01-01 00:00:00] output: - - postout_1 [2025-01-01 00:00:00] - - store_and_clean_1 [2025-01-01 00:00:00]: + - postout_1 [2025-01-01 00:00:00] + - store_and_clean_1 [2025-01-01 00:00:00]: input: - - postout_1 [2025-01-01 00:00:00] - - stream_1 [2025-01-01 00:00:00] - - icon_input [2025-01-01 00:00:00] + - postout_1 [2025-01-01 00:00:00] + - stream_1 [2025-01-01 00:00:00] + - icon_input [2025-01-01 00:00:00] output: - - stored_data_1 [2025-01-01 00:00:00] - - icon_bimonthly [2025-03-01 00:00:00]: + - stored_data_1 [2025-01-01 00:00:00] + - icon_bimonthly [2025-03-01 00:00:00]: tasks: - - preproc [2025-03-01 00:00:00]: + - preproc [2025-03-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2025-03-01 00:00:00] - - icon [2025-03-01 00:00:00]: + - icon_input [2025-03-01 00:00:00] + - icon [2025-03-01 00:00:00]: input: - - grid_file - - icon_input [2025-03-01 00:00:00] - - icon_restart [2025-01-01 00:00:00] + - grid_file + - icon_input [2025-03-01 00:00:00] + - icon_restart [2025-01-01 00:00:00] output: - - stream_1 [2025-03-01 00:00:00] - - stream_2 [2025-03-01 00:00:00] - - icon_restart [2025-03-01 00:00:00] - - postproc_1 [2025-03-01 00:00:00]: + - stream_1 [2025-03-01 00:00:00] + - stream_2 [2025-03-01 00:00:00] + - icon_restart [2025-03-01 00:00:00] + - postproc_1 [2025-03-01 00:00:00]: input: - - stream_1 [2025-03-01 00:00:00] + - stream_1 [2025-03-01 00:00:00] output: - - postout_1 [2025-03-01 00:00:00] - - store_and_clean_1 [2025-03-01 00:00:00]: + - postout_1 [2025-03-01 00:00:00] + - store_and_clean_1 [2025-03-01 00:00:00]: input: - - postout_1 [2025-03-01 00:00:00] - - stream_1 [2025-03-01 00:00:00] - - icon_input [2025-03-01 00:00:00] + - postout_1 [2025-03-01 00:00:00] + - stream_1 [2025-03-01 00:00:00] + - icon_input [2025-03-01 00:00:00] output: - - stored_data_1 [2025-03-01 00:00:00] - - icon_bimonthly [2025-05-01 00:00:00]: + - stored_data_1 [2025-03-01 00:00:00] + - icon_bimonthly [2025-05-01 00:00:00]: tasks: - - preproc [2025-05-01 00:00:00]: + - preproc [2025-05-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2025-05-01 00:00:00] + - icon_input [2025-05-01 00:00:00] wait on: - - icon [2025-01-01 00:00:00] - - icon [2025-05-01 00:00:00]: + - icon [2025-01-01 00:00:00] + - icon [2025-05-01 00:00:00]: input: - - grid_file - - icon_input [2025-05-01 00:00:00] - - icon_restart [2025-03-01 00:00:00] + - grid_file + - icon_input [2025-05-01 00:00:00] + - icon_restart [2025-03-01 00:00:00] output: - - stream_1 [2025-05-01 00:00:00] - - stream_2 [2025-05-01 00:00:00] - - icon_restart [2025-05-01 00:00:00] - - postproc_1 [2025-05-01 00:00:00]: + - stream_1 [2025-05-01 00:00:00] + - stream_2 [2025-05-01 00:00:00] + - icon_restart [2025-05-01 00:00:00] + - postproc_1 [2025-05-01 00:00:00]: input: - - stream_1 [2025-05-01 00:00:00] + - stream_1 [2025-05-01 00:00:00] output: - - postout_1 [2025-05-01 00:00:00] - - store_and_clean_1 [2025-05-01 00:00:00]: + - postout_1 [2025-05-01 00:00:00] + - store_and_clean_1 [2025-05-01 00:00:00]: input: - - postout_1 [2025-05-01 00:00:00] - - stream_1 [2025-05-01 00:00:00] - - icon_input [2025-05-01 00:00:00] + - postout_1 [2025-05-01 00:00:00] + - stream_1 [2025-05-01 00:00:00] + - icon_input [2025-05-01 00:00:00] output: - - stored_data_1 [2025-05-01 00:00:00] - - icon_bimonthly [2025-07-01 00:00:00]: + - stored_data_1 [2025-05-01 00:00:00] + - icon_bimonthly [2025-07-01 00:00:00]: tasks: - - preproc [2025-07-01 00:00:00]: + - preproc [2025-07-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2025-07-01 00:00:00] + - icon_input [2025-07-01 00:00:00] wait on: - - icon [2025-03-01 00:00:00] - - icon [2025-07-01 00:00:00]: + - icon [2025-03-01 00:00:00] + - icon [2025-07-01 00:00:00]: input: - - grid_file - - icon_input [2025-07-01 00:00:00] - - icon_restart [2025-05-01 00:00:00] + - grid_file + - icon_input [2025-07-01 00:00:00] + - icon_restart [2025-05-01 00:00:00] output: - - stream_1 [2025-07-01 00:00:00] - - stream_2 [2025-07-01 00:00:00] - - icon_restart [2025-07-01 00:00:00] - - postproc_1 [2025-07-01 00:00:00]: + - stream_1 [2025-07-01 00:00:00] + - stream_2 [2025-07-01 00:00:00] + - icon_restart [2025-07-01 00:00:00] + - postproc_1 [2025-07-01 00:00:00]: input: - - stream_1 [2025-07-01 00:00:00] + - stream_1 [2025-07-01 00:00:00] output: - - postout_1 [2025-07-01 00:00:00] - - store_and_clean_1 [2025-07-01 00:00:00]: + - postout_1 [2025-07-01 00:00:00] + - store_and_clean_1 [2025-07-01 00:00:00]: input: - - postout_1 [2025-07-01 00:00:00] - - stream_1 [2025-07-01 00:00:00] - - icon_input [2025-07-01 00:00:00] + - postout_1 [2025-07-01 00:00:00] + - stream_1 [2025-07-01 00:00:00] + - icon_input [2025-07-01 00:00:00] output: - - stored_data_1 [2025-07-01 00:00:00] - - icon_bimonthly [2025-09-01 00:00:00]: + - stored_data_1 [2025-07-01 00:00:00] + - icon_bimonthly [2025-09-01 00:00:00]: tasks: - - preproc [2025-09-01 00:00:00]: + - preproc [2025-09-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2025-09-01 00:00:00] + - icon_input [2025-09-01 00:00:00] wait on: - - icon [2025-05-01 00:00:00] - - icon [2025-09-01 00:00:00]: + - icon [2025-05-01 00:00:00] + - icon [2025-09-01 00:00:00]: input: - - grid_file - - icon_input [2025-09-01 00:00:00] - - icon_restart [2025-07-01 00:00:00] + - grid_file + - icon_input [2025-09-01 00:00:00] + - icon_restart [2025-07-01 00:00:00] output: - - stream_1 [2025-09-01 00:00:00] - - stream_2 [2025-09-01 00:00:00] - - icon_restart [2025-09-01 00:00:00] - - postproc_1 [2025-09-01 00:00:00]: + - stream_1 [2025-09-01 00:00:00] + - stream_2 [2025-09-01 00:00:00] + - icon_restart [2025-09-01 00:00:00] + - postproc_1 [2025-09-01 00:00:00]: input: - - stream_1 [2025-09-01 00:00:00] + - stream_1 [2025-09-01 00:00:00] output: - - postout_1 [2025-09-01 00:00:00] - - store_and_clean_1 [2025-09-01 00:00:00]: + - postout_1 [2025-09-01 00:00:00] + - store_and_clean_1 [2025-09-01 00:00:00]: input: - - postout_1 [2025-09-01 00:00:00] - - stream_1 [2025-09-01 00:00:00] - - icon_input [2025-09-01 00:00:00] + - postout_1 [2025-09-01 00:00:00] + - stream_1 [2025-09-01 00:00:00] + - icon_input [2025-09-01 00:00:00] output: - - stored_data_1 [2025-09-01 00:00:00] - - icon_bimonthly [2025-11-01 00:00:00]: + - stored_data_1 [2025-09-01 00:00:00] + - icon_bimonthly [2025-11-01 00:00:00]: tasks: - - preproc [2025-11-01 00:00:00]: + - preproc [2025-11-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2025-11-01 00:00:00] + - icon_input [2025-11-01 00:00:00] wait on: - - icon [2025-07-01 00:00:00] - - icon [2025-11-01 00:00:00]: + - icon [2025-07-01 00:00:00] + - icon [2025-11-01 00:00:00]: input: - - grid_file - - icon_input [2025-11-01 00:00:00] - - icon_restart [2025-09-01 00:00:00] + - grid_file + - icon_input [2025-11-01 00:00:00] + - icon_restart [2025-09-01 00:00:00] output: - - stream_1 [2025-11-01 00:00:00] - - stream_2 [2025-11-01 00:00:00] - - icon_restart [2025-11-01 00:00:00] - - postproc_1 [2025-11-01 00:00:00]: + - stream_1 [2025-11-01 00:00:00] + - stream_2 [2025-11-01 00:00:00] + - icon_restart [2025-11-01 00:00:00] + - postproc_1 [2025-11-01 00:00:00]: input: - - stream_1 [2025-11-01 00:00:00] + - stream_1 [2025-11-01 00:00:00] output: - - postout_1 [2025-11-01 00:00:00] - - store_and_clean_1 [2025-11-01 00:00:00]: + - postout_1 [2025-11-01 00:00:00] + - store_and_clean_1 [2025-11-01 00:00:00]: input: - - postout_1 [2025-11-01 00:00:00] - - stream_1 [2025-11-01 00:00:00] - - icon_input [2025-11-01 00:00:00] + - postout_1 [2025-11-01 00:00:00] + - stream_1 [2025-11-01 00:00:00] + - icon_input [2025-11-01 00:00:00] output: - - stored_data_1 [2025-11-01 00:00:00] - - icon_bimonthly [2026-01-01 00:00:00]: + - stored_data_1 [2025-11-01 00:00:00] + - icon_bimonthly [2026-01-01 00:00:00]: tasks: - - preproc [2026-01-01 00:00:00]: + - preproc [2026-01-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2026-01-01 00:00:00] + - icon_input [2026-01-01 00:00:00] wait on: - - icon [2025-09-01 00:00:00] - - icon [2026-01-01 00:00:00]: + - icon [2025-09-01 00:00:00] + - icon [2026-01-01 00:00:00]: input: - - grid_file - - icon_input [2026-01-01 00:00:00] - - icon_restart [2025-11-01 00:00:00] + - grid_file + - icon_input [2026-01-01 00:00:00] + - icon_restart [2025-11-01 00:00:00] output: - - stream_1 [2026-01-01 00:00:00] - - stream_2 [2026-01-01 00:00:00] - - icon_restart [2026-01-01 00:00:00] - - postproc_1 [2026-01-01 00:00:00]: + - stream_1 [2026-01-01 00:00:00] + - stream_2 [2026-01-01 00:00:00] + - icon_restart [2026-01-01 00:00:00] + - postproc_1 [2026-01-01 00:00:00]: input: - - stream_1 [2026-01-01 00:00:00] + - stream_1 [2026-01-01 00:00:00] output: - - postout_1 [2026-01-01 00:00:00] - - store_and_clean_1 [2026-01-01 00:00:00]: + - postout_1 [2026-01-01 00:00:00] + - store_and_clean_1 [2026-01-01 00:00:00]: input: - - postout_1 [2026-01-01 00:00:00] - - stream_1 [2026-01-01 00:00:00] - - icon_input [2026-01-01 00:00:00] + - postout_1 [2026-01-01 00:00:00] + - stream_1 [2026-01-01 00:00:00] + - icon_input [2026-01-01 00:00:00] output: - - stored_data_1 [2026-01-01 00:00:00] - - icon_bimonthly [2026-03-01 00:00:00]: + - stored_data_1 [2026-01-01 00:00:00] + - icon_bimonthly [2026-03-01 00:00:00]: tasks: - - preproc [2026-03-01 00:00:00]: + - preproc [2026-03-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2026-03-01 00:00:00] + - icon_input [2026-03-01 00:00:00] wait on: - - icon [2025-11-01 00:00:00] - - icon [2026-03-01 00:00:00]: + - icon [2025-11-01 00:00:00] + - icon [2026-03-01 00:00:00]: input: - - grid_file - - icon_input [2026-03-01 00:00:00] - - icon_restart [2026-01-01 00:00:00] + - grid_file + - icon_input [2026-03-01 00:00:00] + - icon_restart [2026-01-01 00:00:00] output: - - stream_1 [2026-03-01 00:00:00] - - stream_2 [2026-03-01 00:00:00] - - icon_restart [2026-03-01 00:00:00] - - postproc_1 [2026-03-01 00:00:00]: + - stream_1 [2026-03-01 00:00:00] + - stream_2 [2026-03-01 00:00:00] + - icon_restart [2026-03-01 00:00:00] + - postproc_1 [2026-03-01 00:00:00]: input: - - stream_1 [2026-03-01 00:00:00] + - stream_1 [2026-03-01 00:00:00] output: - - postout_1 [2026-03-01 00:00:00] - - store_and_clean_1 [2026-03-01 00:00:00]: + - postout_1 [2026-03-01 00:00:00] + - store_and_clean_1 [2026-03-01 00:00:00]: input: - - postout_1 [2026-03-01 00:00:00] - - stream_1 [2026-03-01 00:00:00] - - icon_input [2026-03-01 00:00:00] + - postout_1 [2026-03-01 00:00:00] + - stream_1 [2026-03-01 00:00:00] + - icon_input [2026-03-01 00:00:00] output: - - stored_data_1 [2026-03-01 00:00:00] - - icon_bimonthly [2026-05-01 00:00:00]: + - stored_data_1 [2026-03-01 00:00:00] + - icon_bimonthly [2026-05-01 00:00:00]: tasks: - - preproc [2026-05-01 00:00:00]: + - preproc [2026-05-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2026-05-01 00:00:00] + - icon_input [2026-05-01 00:00:00] wait on: - - icon [2026-01-01 00:00:00] - - icon [2026-05-01 00:00:00]: + - icon [2026-01-01 00:00:00] + - icon [2026-05-01 00:00:00]: input: - - grid_file - - icon_input [2026-05-01 00:00:00] - - icon_restart [2026-03-01 00:00:00] + - grid_file + - icon_input [2026-05-01 00:00:00] + - icon_restart [2026-03-01 00:00:00] output: - - stream_1 [2026-05-01 00:00:00] - - stream_2 [2026-05-01 00:00:00] - - icon_restart [2026-05-01 00:00:00] - - postproc_1 [2026-05-01 00:00:00]: + - stream_1 [2026-05-01 00:00:00] + - stream_2 [2026-05-01 00:00:00] + - icon_restart [2026-05-01 00:00:00] + - postproc_1 [2026-05-01 00:00:00]: input: - - stream_1 [2026-05-01 00:00:00] + - stream_1 [2026-05-01 00:00:00] output: - - postout_1 [2026-05-01 00:00:00] - - store_and_clean_1 [2026-05-01 00:00:00]: + - postout_1 [2026-05-01 00:00:00] + - store_and_clean_1 [2026-05-01 00:00:00]: input: - - postout_1 [2026-05-01 00:00:00] - - stream_1 [2026-05-01 00:00:00] - - icon_input [2026-05-01 00:00:00] + - postout_1 [2026-05-01 00:00:00] + - stream_1 [2026-05-01 00:00:00] + - icon_input [2026-05-01 00:00:00] output: - - stored_data_1 [2026-05-01 00:00:00] - - icon_bimonthly [2026-07-01 00:00:00]: + - stored_data_1 [2026-05-01 00:00:00] + - icon_bimonthly [2026-07-01 00:00:00]: tasks: - - preproc [2026-07-01 00:00:00]: + - preproc [2026-07-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2026-07-01 00:00:00] + - icon_input [2026-07-01 00:00:00] wait on: - - icon [2026-03-01 00:00:00] - - icon [2026-07-01 00:00:00]: + - icon [2026-03-01 00:00:00] + - icon [2026-07-01 00:00:00]: input: - - grid_file - - icon_input [2026-07-01 00:00:00] - - icon_restart [2026-05-01 00:00:00] + - grid_file + - icon_input [2026-07-01 00:00:00] + - icon_restart [2026-05-01 00:00:00] output: - - stream_1 [2026-07-01 00:00:00] - - stream_2 [2026-07-01 00:00:00] - - icon_restart [2026-07-01 00:00:00] - - postproc_1 [2026-07-01 00:00:00]: + - stream_1 [2026-07-01 00:00:00] + - stream_2 [2026-07-01 00:00:00] + - icon_restart [2026-07-01 00:00:00] + - postproc_1 [2026-07-01 00:00:00]: input: - - stream_1 [2026-07-01 00:00:00] + - stream_1 [2026-07-01 00:00:00] output: - - postout_1 [2026-07-01 00:00:00] - - store_and_clean_1 [2026-07-01 00:00:00]: + - postout_1 [2026-07-01 00:00:00] + - store_and_clean_1 [2026-07-01 00:00:00]: input: - - postout_1 [2026-07-01 00:00:00] - - stream_1 [2026-07-01 00:00:00] - - icon_input [2026-07-01 00:00:00] + - postout_1 [2026-07-01 00:00:00] + - stream_1 [2026-07-01 00:00:00] + - icon_input [2026-07-01 00:00:00] output: - - stored_data_1 [2026-07-01 00:00:00] - - icon_bimonthly [2026-09-01 00:00:00]: + - stored_data_1 [2026-07-01 00:00:00] + - icon_bimonthly [2026-09-01 00:00:00]: tasks: - - preproc [2026-09-01 00:00:00]: + - preproc [2026-09-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2026-09-01 00:00:00] + - icon_input [2026-09-01 00:00:00] wait on: - - icon [2026-05-01 00:00:00] - - icon [2026-09-01 00:00:00]: + - icon [2026-05-01 00:00:00] + - icon [2026-09-01 00:00:00]: input: - - grid_file - - icon_input [2026-09-01 00:00:00] - - icon_restart [2026-07-01 00:00:00] + - grid_file + - icon_input [2026-09-01 00:00:00] + - icon_restart [2026-07-01 00:00:00] output: - - stream_1 [2026-09-01 00:00:00] - - stream_2 [2026-09-01 00:00:00] - - icon_restart [2026-09-01 00:00:00] - - postproc_1 [2026-09-01 00:00:00]: + - stream_1 [2026-09-01 00:00:00] + - stream_2 [2026-09-01 00:00:00] + - icon_restart [2026-09-01 00:00:00] + - postproc_1 [2026-09-01 00:00:00]: input: - - stream_1 [2026-09-01 00:00:00] + - stream_1 [2026-09-01 00:00:00] output: - - postout_1 [2026-09-01 00:00:00] - - store_and_clean_1 [2026-09-01 00:00:00]: + - postout_1 [2026-09-01 00:00:00] + - store_and_clean_1 [2026-09-01 00:00:00]: input: - - postout_1 [2026-09-01 00:00:00] - - stream_1 [2026-09-01 00:00:00] - - icon_input [2026-09-01 00:00:00] + - postout_1 [2026-09-01 00:00:00] + - stream_1 [2026-09-01 00:00:00] + - icon_input [2026-09-01 00:00:00] output: - - stored_data_1 [2026-09-01 00:00:00] - - icon_bimonthly [2026-11-01 00:00:00]: + - stored_data_1 [2026-09-01 00:00:00] + - icon_bimonthly [2026-11-01 00:00:00]: tasks: - - preproc [2026-11-01 00:00:00]: + - preproc [2026-11-01 00:00:00]: input: - - grid_file - - extpar_file - - ERA5 + - grid_file + - extpar_file + - ERA5 output: - - icon_input [2026-11-01 00:00:00] + - icon_input [2026-11-01 00:00:00] wait on: - - icon [2026-07-01 00:00:00] - - icon [2026-11-01 00:00:00]: + - icon [2026-07-01 00:00:00] + - icon [2026-11-01 00:00:00]: input: - - grid_file - - icon_input [2026-11-01 00:00:00] - - icon_restart [2026-09-01 00:00:00] + - grid_file + - icon_input [2026-11-01 00:00:00] + - icon_restart [2026-09-01 00:00:00] output: - - stream_1 [2026-11-01 00:00:00] - - stream_2 [2026-11-01 00:00:00] - - icon_restart [2026-11-01 00:00:00] - - postproc_1 [2026-11-01 00:00:00]: + - stream_1 [2026-11-01 00:00:00] + - stream_2 [2026-11-01 00:00:00] + - icon_restart [2026-11-01 00:00:00] + - postproc_1 [2026-11-01 00:00:00]: input: - - stream_1 [2026-11-01 00:00:00] + - stream_1 [2026-11-01 00:00:00] output: - - postout_1 [2026-11-01 00:00:00] - - store_and_clean_1 [2026-11-01 00:00:00]: + - postout_1 [2026-11-01 00:00:00] + - store_and_clean_1 [2026-11-01 00:00:00]: input: - - postout_1 [2026-11-01 00:00:00] - - stream_1 [2026-11-01 00:00:00] - - icon_input [2026-11-01 00:00:00] + - postout_1 [2026-11-01 00:00:00] + - stream_1 [2026-11-01 00:00:00] + - icon_input [2026-11-01 00:00:00] output: - - stored_data_1 [2026-11-01 00:00:00] - - yearly [2025-01-01 00:00:00]: + - stored_data_1 [2026-11-01 00:00:00] + - yearly [2025-01-01 00:00:00]: tasks: - - postproc_2 [2025-01-01 00:00:00]: - input: - - stream_2 [2025-01-01 00:00:00] - - stream_2 [2025-03-01 00:00:00] - - stream_2 [2025-05-01 00:00:00] - - stream_2 [2025-07-01 00:00:00] - - stream_2 [2025-09-01 00:00:00] - - stream_2 [2025-11-01 00:00:00] - output: - - postout_2 [2025-01-01 00:00:00] - - store_and_clean_2 [2025-01-01 00:00:00]: - input: - - postout_2 [2025-01-01 00:00:00] - - stream_2 [2025-01-01 00:00:00] - - stream_2 [2025-03-01 00:00:00] - - stream_2 [2025-05-01 00:00:00] - - stream_2 [2025-07-01 00:00:00] - - stream_2 [2025-09-01 00:00:00] - - stream_2 [2025-11-01 00:00:00] - output: - - stored_data_2 [2025-01-01 00:00:00] - - yearly [2026-01-01 00:00:00]: + - postproc_2 [2025-01-01 00:00:00]: + input: + - stream_2 [2025-01-01 00:00:00] + - stream_2 [2025-03-01 00:00:00] + - stream_2 [2025-05-01 00:00:00] + - stream_2 [2025-07-01 00:00:00] + - stream_2 [2025-09-01 00:00:00] + - stream_2 [2025-11-01 00:00:00] + output: + - postout_2 [2025-01-01 00:00:00] + - store_and_clean_2 [2025-01-01 00:00:00]: + input: + - postout_2 [2025-01-01 00:00:00] + - stream_2 [2025-01-01 00:00:00] + - stream_2 [2025-03-01 00:00:00] + - stream_2 [2025-05-01 00:00:00] + - stream_2 [2025-07-01 00:00:00] + - stream_2 [2025-09-01 00:00:00] + - stream_2 [2025-11-01 00:00:00] + output: + - stored_data_2 [2025-01-01 00:00:00] + - yearly [2026-01-01 00:00:00]: tasks: - - postproc_2 [2026-01-01 00:00:00]: - input: - - stream_2 [2026-01-01 00:00:00] - - stream_2 [2026-03-01 00:00:00] - - stream_2 [2026-05-01 00:00:00] - - stream_2 [2026-07-01 00:00:00] - - stream_2 [2026-09-01 00:00:00] - - stream_2 [2026-11-01 00:00:00] - output: - - postout_2 [2026-01-01 00:00:00] - - store_and_clean_2 [2026-01-01 00:00:00]: - input: - - postout_2 [2026-01-01 00:00:00] - - stream_2 [2026-01-01 00:00:00] - - stream_2 [2026-03-01 00:00:00] - - stream_2 [2026-05-01 00:00:00] - - stream_2 [2026-07-01 00:00:00] - - stream_2 [2026-09-01 00:00:00] - - stream_2 [2026-11-01 00:00:00] - output: - - stored_data_2 [2026-01-01 00:00:00] \ No newline at end of file + - postproc_2 [2026-01-01 00:00:00]: + input: + - stream_2 [2026-01-01 00:00:00] + - stream_2 [2026-03-01 00:00:00] + - stream_2 [2026-05-01 00:00:00] + - stream_2 [2026-07-01 00:00:00] + - stream_2 [2026-09-01 00:00:00] + - stream_2 [2026-11-01 00:00:00] + output: + - postout_2 [2026-01-01 00:00:00] + - store_and_clean_2 [2026-01-01 00:00:00]: + input: + - postout_2 [2026-01-01 00:00:00] + - stream_2 [2026-01-01 00:00:00] + - stream_2 [2026-03-01 00:00:00] + - stream_2 [2026-05-01 00:00:00] + - stream_2 [2026-07-01 00:00:00] + - stream_2 [2026-09-01 00:00:00] + - stream_2 [2026-11-01 00:00:00] + output: + - stored_data_2 [2026-01-01 00:00:00] \ No newline at end of file diff --git a/tests/files/configs/test_config_small.txt b/tests/files/configs/test_config_small.txt index 3559fa03..f7f2aa3d 100644 --- a/tests/files/configs/test_config_small.txt +++ b/tests/files/configs/test_config_small.txt @@ -1,28 +1,28 @@ cycles: - - bimonthly_tasks [2026-01-01 00:00:00]: + - bimonthly_tasks [2026-01-01 00:00:00]: tasks: - - icon [2026-01-01 00:00:00]: + - icon [2026-01-01 00:00:00]: output: - - icon_output [2026-01-01 00:00:00] - - icon_restart [2026-01-01 00:00:00] - - bimonthly_tasks [2026-03-01 00:00:00]: + - icon_output [2026-01-01 00:00:00] + - icon_restart [2026-01-01 00:00:00] + - bimonthly_tasks [2026-03-01 00:00:00]: tasks: - - icon [2026-03-01 00:00:00]: + - icon [2026-03-01 00:00:00]: input: - - icon_restart [2026-01-01 00:00:00] + - icon_restart [2026-01-01 00:00:00] output: - - icon_output [2026-03-01 00:00:00] - - icon_restart [2026-03-01 00:00:00] - - bimonthly_tasks [2026-05-01 00:00:00]: + - icon_output [2026-03-01 00:00:00] + - icon_restart [2026-03-01 00:00:00] + - bimonthly_tasks [2026-05-01 00:00:00]: tasks: - - icon [2026-05-01 00:00:00]: + - icon [2026-05-01 00:00:00]: input: - - icon_restart [2026-03-01 00:00:00] + - icon_restart [2026-03-01 00:00:00] output: - - icon_output [2026-05-01 00:00:00] - - icon_restart [2026-05-01 00:00:00] - - lastly: + - icon_output [2026-05-01 00:00:00] + - icon_restart [2026-05-01 00:00:00] + - lastly: tasks: - - cleanup: + - cleanup: wait on: - - icon [2026-05-01 00:00:00] \ No newline at end of file + - icon [2026-05-01 00:00:00] \ No newline at end of file From f8ba4808de2e8483be060a60936a12950fb90b53 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 21:42:18 +0100 Subject: [PATCH 18/39] fix: check if TImeSeries initialized before testing dates --- src/sirocco/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 8b4f73e5..96407b3f 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -159,6 +159,12 @@ def __setitem__(self, date: datetime, data: TimeSeriesObject) -> None: self.end_date = date def __getitem__(self, date: datetime) -> TimeSeriesObject: + if self.start_date is None: + msg = "TimeSeries still empty, cannot access by date" + raise ValueError(msg) + if date not in self._dict: + msg = f"date {date} not found" + raise KeyError(msg) if date < self.start_date or date > self.end_date: item = next(iter(self._dict.values())) msg = ( @@ -166,9 +172,6 @@ def __getitem__(self, date: datetime) -> TimeSeriesObject: ) logger.warning(msg) return None - if date not in self._dict: - msg = f"date {date} not found" - raise KeyError(msg) return self._dict[date] def values(self) -> Iterator[TimeSeriesObject]: From ced1dbdef8daa243d863023e867a70116e844ed9 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 22:10:32 +0100 Subject: [PATCH 19/39] fix: revert of test order again. the other way raises the KeyError we want to avoid when date is out of bounds --- src/sirocco/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 96407b3f..c54061f1 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -162,9 +162,6 @@ def __getitem__(self, date: datetime) -> TimeSeriesObject: if self.start_date is None: msg = "TimeSeries still empty, cannot access by date" raise ValueError(msg) - if date not in self._dict: - msg = f"date {date} not found" - raise KeyError(msg) if date < self.start_date or date > self.end_date: item = next(iter(self._dict.values())) msg = ( @@ -172,6 +169,10 @@ def __getitem__(self, date: datetime) -> TimeSeriesObject: ) logger.warning(msg) return None + if date not in self._dict: + item = next(iter(self._dict.values())) + msg = f"date {date} for item '{item.name}' not found" + raise KeyError(msg) return self._dict[date] def values(self) -> Iterator[TimeSeriesObject]: From 384a95b6415715ee92d6561e4320ed5f3c0b67f3 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 22:11:46 +0100 Subject: [PATCH 20/39] ref: use dataclass for Task as well replace custom __init__ by from_config classmethod --- src/sirocco/core.py | 61 +++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index c54061f1..7878f4f8 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import TYPE_CHECKING, Generic, Literal, TypeVar from termcolor import colored @@ -43,16 +43,17 @@ def __str__(self) -> str: return f"{self.name} [{self.date}]" +@dataclass class Task(NodeStr): - """Internal representation of a task node""" - color: str = "light_red" name: str - outputs: list[Data] - inputs: list[Data] - wait_on: list[Task] + workflow: Workflow + outputs: list[Data] = field(default_factory=list) + inputs: list[Data] = field(default_factory=list) + wait_on: list[Task] = field(default_factory=list) date: datetime | None = None + color: str = "light_red" # TODO: This list is too long. We should start with the set of supported # keywords and extend it as we support more command: str | None = None @@ -68,35 +69,31 @@ class Task(NodeStr): src: str | None = None conda_env: str | None = None - def __init__(self, config: ConfigTask, task_ref: ConfigCycleTask, workflow: Workflow, date: datetime | None = None): - self.name = config.name - self.date = date - self.inputs = [] - self.outputs = [] - self.wait_on = [] - self.workflow = workflow - # Long list of not always supported keywords - self.command = config.command - self.command_option = config.command_option - self.input_arg_options = config.input_arg_options - self.host = config.host - self.account = config.account - self.plugin = config.plugin - self.config = config.config - self.uenv = config.uenv - self.nodes = config.nodes - self.walltime = config.walltime - self.src = config.src - self.conda_env = config.conda_env - + # use classmethod instead of custom init + @classmethod + def from_config(cls, config: ConfigTask, task_ref: ConfigCycleTask, workflow: Workflow, date: datetime | None = None) -> Self: + inputs: list[Data] = [] for input_spec in task_ref.inputs: - for data in workflow.data.get(input_spec, self.date): + for data in workflow.data.get(input_spec, date): if data is not None: - self.inputs.append(data) + inputs.append(data) + + outputs: list[Data] = [] for output_spec in task_ref.outputs: - self.outputs.append(self.workflow.data[output_spec.name, self.date]) + outputs.append(workflow.data[output_spec.name, date]) + + new = cls( + date=date, + inputs=inputs, + outputs=outputs, + workflow=workflow, + **dict(config) # use the fact that pydantic models can be turned into dicts easily + ) # this works because dataclass has generated this init for us + # Store for actual linking in link_wait_on_tasks() once all tasks are created - self._wait_on_specs = task_ref.depends + new._wait_on_specs = task_ref.depends + + return new def link_wait_on_tasks(self): for wait_on_spec in self._wait_on_specs: @@ -274,7 +271,7 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: for task_ref in cycle_config.tasks: task_name = task_ref.name task_config = workflow_config.task_dict[task_name] - self.tasks[task_name, date] = (task := Task(task_config, task_ref, workflow=self, date=date)) + self.tasks[task_name, date] = (task := Task.from_config(task_config, task_ref, workflow=self, date=date)) cycle_tasks.append(task) self.cycles[cycle_name, date] = Cycle(name=cycle_name, tasks=cycle_tasks, date=date) From 444df90ef0de75f0df1282b15a531cc5373f0b1e Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 22:42:52 +0100 Subject: [PATCH 21/39] hatch fmt --- src/sirocco/core.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 7878f4f8..73f61e6d 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -2,7 +2,7 @@ import logging from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Generic, Literal, TypeVar +from typing import TYPE_CHECKING, Generic, Literal, Self, TypeVar from termcolor import colored @@ -45,8 +45,6 @@ def __str__(self) -> str: @dataclass class Task(NodeStr): - - name: str workflow: Workflow outputs: list[Data] = field(default_factory=list) @@ -71,35 +69,31 @@ class Task(NodeStr): # use classmethod instead of custom init @classmethod - def from_config(cls, config: ConfigTask, task_ref: ConfigCycleTask, workflow: Workflow, date: datetime | None = None) -> Self: + def from_config( + cls, config: ConfigTask, task_ref: ConfigCycleTask, workflow: Workflow, date: datetime | None = None + ) -> Self: inputs: list[Data] = [] for input_spec in task_ref.inputs: - for data in workflow.data.get(input_spec, date): - if data is not None: - inputs.append(data) - - outputs: list[Data] = [] - for output_spec in task_ref.outputs: - outputs.append(workflow.data[output_spec.name, date]) + inputs.extend(data for data in workflow.data.get(input_spec, date) if data is not None) + outputs: list[Data] = [workflow.data[output_spec.name, date] for output_spec in task_ref.outputs] new = cls( date=date, inputs=inputs, outputs=outputs, workflow=workflow, - **dict(config) # use the fact that pydantic models can be turned into dicts easily + **dict(config), # use the fact that pydantic models can be turned into dicts easily ) # this works because dataclass has generated this init for us # Store for actual linking in link_wait_on_tasks() once all tasks are created - new._wait_on_specs = task_ref.depends + new.wait_on_specs = task_ref.depends return new def link_wait_on_tasks(self): - for wait_on_spec in self._wait_on_specs: - for task in self.workflow.tasks.get(wait_on_spec, self.date): - if task is not None: - self.wait_on.append(task) + self.wait_on: list[Task] = [] + for wait_on_spec in self.wait_on_specs: + self.wait_on.extend(task for task in self.workflow.tasks.get(wait_on_spec, self.date) if task is not None) @dataclass(kw_only=True) @@ -165,7 +159,7 @@ def __getitem__(self, date: datetime) -> TimeSeriesObject: f"date {date} for item '{item.name}' is out of bounds [{self.start_date} - {self.end_date}], ignoring." ) logger.warning(msg) - return None + return if date not in self._dict: item = next(iter(self._dict.values())) msg = f"date {date} for item '{item.name}' not found" @@ -271,7 +265,9 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: for task_ref in cycle_config.tasks: task_name = task_ref.name task_config = workflow_config.task_dict[task_name] - self.tasks[task_name, date] = (task := Task.from_config(task_config, task_ref, workflow=self, date=date)) + self.tasks[task_name, date] = ( + task := Task.from_config(task_config, task_ref, workflow=self, date=date) + ) cycle_tasks.append(task) self.cycles[cycle_name, date] = Cycle(name=cycle_name, tasks=cycle_tasks, date=date) From 656c455f1239cf1c198a609a5aa95d480a615541 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 23:11:35 +0100 Subject: [PATCH 22/39] ref: move resolve_dates from parsing to core --- src/sirocco/core.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 73f61e6d..b7f5c326 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -214,13 +214,22 @@ def __getitem__(self, key: str | tuple(str, datetime | None)) -> TimeSeriesObjec raise KeyError(msg) return self._dict[name] + def resolve_target_dates(self, spec, ref_date: datetime | None) -> Iterator[datetime]: + if not spec.lag and not spec.date: + yield ref_date + if spec.lag: + for lag in spec.lag: + yield ref_date + lag + if spec.date: + yield from spec.date + def get(self, spec: ConfigCycleSpec, ref_date: datetime | None = None) -> Iterator[TimeSeriesObject]: name = spec.name if isinstance(self._dict[name], TimeSeries): if ref_date is None and spec.date is []: msg = "TimeSeries object must be referenced by dates" raise ValueError(msg) - for target_date in spec.resolve_target_dates(ref_date): + for target_date in self.resolve_target_dates(spec, ref_date): yield self._dict[name][target_date] else: if spec.lag or spec.date: From 8052088615b2a50ba0d4d1e35b82577f63207660 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 23:12:47 +0100 Subject: [PATCH 23/39] fix: remove also from parsing ... --- src/sirocco/parsing/_yaml_data_models.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index d44587d5..a173d4db 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -80,15 +80,6 @@ def convert_datetimes(cls, value) -> list[datetime]: values = value if isinstance(value, list) else [value] return [datetime.fromisoformat(value) for value in values] - def resolve_target_dates(self, ref_date: datetime | None) -> Iterator[datetime]: - if not self.lag and not self.date: - yield ref_date - if self.lag: - for lag in self.lag: - yield ref_date + lag - if self.date: - yield from self.date - class ConfigTask(_NamedBaseModel): """ From dc828f78ebd53cd732ad186204463ccc19316639 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 23:23:30 +0100 Subject: [PATCH 24/39] ref: move cycle date iterator from parsing to core --- src/sirocco/core.py | 12 +++++++++--- src/sirocco/parsing/_yaml_data_models.py | 6 ------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index b7f5c326..8a2e0434 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -12,13 +12,13 @@ ConfigCycleTaskInput, ConfigTask, ConfigWorkflow, - _DataBaseModel, load_workflow_config, ) if TYPE_CHECKING: from collections.abc import Iterator from datetime import datetime + from sirocco.parsing._yaml_data_models import ConfigCycle, _DateBaseModel logging.basicConfig() @@ -259,7 +259,7 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: # 2 - create output data nodes for cycle_config in workflow_config.cycles: - for date in cycle_config.dates(): + for date in self.cycle_dates(cycle_config): for task_ref in cycle_config.tasks: for data_ref in task_ref.outputs: data_name = data_ref.name @@ -269,7 +269,7 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: # 3 - create cycles and tasks for cycle_config in workflow_config.cycles: cycle_name = cycle_config.name - for date in cycle_config.dates(): + for date in self.cycle_dates(cycle_config): cycle_tasks = [] for task_ref in cycle_config.tasks: task_name = task_ref.name @@ -284,6 +284,12 @@ def __init__(self, workflow_config: ConfigWorkflow) -> None: for task in self.tasks.values(): task.link_wait_on_tasks() + def cycle_dates(self, cycle_config: ConfigCycle) -> Iterator[datetime]: + yield (date := cycle_config.start_date) + if cycle_config.period is not None: + while (date := date + cycle_config.period) < cycle_config.end_date: + yield date + def _str_from_method(self, method_name: Literal["__str__", "_str_pretty_"]) -> str: str_method = getattr(NodeStr, method_name) ind = "" diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index a173d4db..fa218acf 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -286,12 +286,6 @@ def check_period_is_not_negative_or_zero(self) -> ConfigCycle: raise ValueError(msg) return self - def dates(self) -> Iterator[datetime]: - yield (date := self.start_date) - if self.period is not None: - while (date := date + self.period) < self.end_date: - yield date - class ConfigWorkflow(BaseModel): name: str | None = None From 5006bc0a876a8c0ad77bdd2716954353a94dda00 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Thu, 7 Nov 2024 23:27:03 +0100 Subject: [PATCH 25/39] fix: hatch fmt --- src/sirocco/core.py | 3 ++- src/sirocco/parsing/_yaml_data_models.py | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 8a2e0434..47abb2ed 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -18,7 +18,8 @@ if TYPE_CHECKING: from collections.abc import Iterator from datetime import datetime - from sirocco.parsing._yaml_data_models import ConfigCycle, _DateBaseModel + + from sirocco.parsing._yaml_data_models import ConfigCycle, _DataBaseModel logging.basicConfig() diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index fa218acf..70bee0f7 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -1,10 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Iterator - import time from datetime import datetime from os.path import expandvars From 988c873f1fd4effa5c82dc1e3ee51333fb9fe568 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 10:21:15 +0100 Subject: [PATCH 26/39] ref: put underscore back even if ruff complains --- src/sirocco/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 47abb2ed..57a96e2f 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -21,11 +21,12 @@ from sirocco.parsing._yaml_data_models import ConfigCycle, _DataBaseModel + type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput logging.basicConfig() logger = logging.getLogger(__name__) -type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput + TimeSeriesObject = TypeVar("TimeSeriesObject") @@ -87,13 +88,15 @@ def from_config( ) # this works because dataclass has generated this init for us # Store for actual linking in link_wait_on_tasks() once all tasks are created - new.wait_on_specs = task_ref.depends + new._wait_on_specs = task_ref.depends # noqa: SLF001 we don't have access to self in a dataclass + # and setting an underscored attribute from + # the class itself raises SLF001 return new def link_wait_on_tasks(self): self.wait_on: list[Task] = [] - for wait_on_spec in self.wait_on_specs: + for wait_on_spec in self._wait_on_specs: self.wait_on.extend(task for task in self.workflow.tasks.get(wait_on_spec, self.date) if task is not None) From 5f8a03429740e161bff855804cf42de32fd73bae Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 10:34:14 +0100 Subject: [PATCH 27/39] add: test for serialized workflow IR graph It doesn't test the non graph part, so data and task descriptions at the bottom of the yaml file --- tests/test_wc_workflow.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index a6444aa3..de6806e1 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -2,7 +2,7 @@ from sirocco.core import Workflow from sirocco.parsing import load_workflow_config - +from pathlib import Path @pytest.fixture def config_file_small(): @@ -13,6 +13,5 @@ def config_file_small(): "config_file", ["tests/files/configs/test_config_small.yml", "tests/files/configs/test_config_large.yml"] ) def test_parse_config_file(config_file): - config_workflow = load_workflow_config(config_file) - _ = Workflow(config_workflow) - # TODO: add test to compare str(core_workflow) against "tests/files/configs/test_config_xxx.txt" + workflow = Workflow.from_yaml(config_file) + assert str(workflow) == Path(config_file).with_suffix('.txt').read_text() From 126426c0186c9304147e413453735bee8df114a4 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 11:18:13 +0100 Subject: [PATCH 28/39] ref: not only atmospheric science! --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c3997f1e..b5a896b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ 'Programming Language :: Python', 'Programming Language :: Python :: 3.12', 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Atmospheric Science', + 'Topic :: Scientific/Engineering :: Climate Science', ] keywords = ["workflow"," icon", "aiida", "aiida-workgraph"] requires-python = '>=3.12' From 1e7dae7a8c7f969252f0af032499bb66cb02b3e7 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 11:20:09 +0100 Subject: [PATCH 29/39] add:test: test against serialized data --- tests/test_wc_workflow.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index de6806e1..67388b6f 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -1,17 +1,30 @@ +from pathlib import Path + import pytest from sirocco.core import Workflow -from sirocco.parsing import load_workflow_config -from pathlib import Path + @pytest.fixture def config_file_small(): return "files/configs/" -@pytest.mark.parametrize( - "config_file", ["tests/files/configs/test_config_small.yml", "tests/files/configs/test_config_large.yml"] -) +config_test_files = ["tests/files/configs/test_config_small.yml", "tests/files/configs/test_config_large.yml"] + + +@pytest.mark.parametrize("config_file", config_test_files) def test_parse_config_file(config_file): - workflow = Workflow.from_yaml(config_file) - assert str(workflow) == Path(config_file).with_suffix('.txt').read_text() + reference_str = Path(config_file).with_suffix(".txt").read_text() + test_str = str(Workflow.from_yaml(config_file)) + if test_str != reference_str: + new_path = Path(config_file).with_suffix("new.txt") + new_path.write_text(test_str) + msg = f"Workflow graph doesn't match serialized data. New graph string dumped to {new_path}." + raise ValueError(msg) + + +@pytest.mark.skip(reason="don't run it each time, uncomment to regenerate serilaized data") +@pytest.mark.parametrize("config_file", config_test_files) +def test_serialize_workflow(config_file): + Path(config_file).with_suffix(".txt").write_text(str(Workflow.from_yaml(config_file))) From 48c77f5d8ff7342c05d4379d568bf909b83b252a Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 11:32:24 +0100 Subject: [PATCH 30/39] fix: Climate Science classifier does not exist It's a pitty! --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5a896b7..c3997f1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ 'Programming Language :: Python', 'Programming Language :: Python :: 3.12', 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Climate Science', + 'Topic :: Scientific/Engineering :: Atmospheric Science', ] keywords = ["workflow"," icon", "aiida", "aiida-workgraph"] requires-python = '>=3.12' From 7c66992cf6d367a660edecdad22da018c420c0a4 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 11:51:29 +0100 Subject: [PATCH 31/39] ref: remove _ from DataBaseModel, used outside --- src/sirocco/core.py | 6 +++--- src/sirocco/parsing/_yaml_data_models.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 57a96e2f..93aac62d 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -19,7 +19,7 @@ from collections.abc import Iterator from datetime import datetime - from sirocco.parsing._yaml_data_models import ConfigCycle, _DataBaseModel + from sirocco.parsing._yaml_data_models import ConfigCycle, DataBaseModel type ConfigCycleSpec = ConfigCycleTaskDepend | ConfigCycleTaskInput @@ -218,7 +218,7 @@ def __getitem__(self, key: str | tuple(str, datetime | None)) -> TimeSeriesObjec raise KeyError(msg) return self._dict[name] - def resolve_target_dates(self, spec, ref_date: datetime | None) -> Iterator[datetime]: + def _resolve_target_dates(self, spec, ref_date: datetime | None) -> Iterator[datetime]: if not spec.lag and not spec.date: yield ref_date if spec.lag: @@ -233,7 +233,7 @@ def get(self, spec: ConfigCycleSpec, ref_date: datetime | None = None) -> Iterat if ref_date is None and spec.date is []: msg = "TimeSeries object must be referenced by dates" raise ValueError(msg) - for target_date in self.resolve_target_dates(spec, ref_date): + for target_date in self._resolve_target_dates(spec, ref_date): yield self._dict[name][target_date] else: if spec.lag or spec.date: diff --git a/src/sirocco/parsing/_yaml_data_models.py b/src/sirocco/parsing/_yaml_data_models.py index 70bee0f7..9bfef60e 100644 --- a/src/sirocco/parsing/_yaml_data_models.py +++ b/src/sirocco/parsing/_yaml_data_models.py @@ -115,7 +115,7 @@ def convert_to_struct_time(cls, value: str | None) -> time.struct_time | None: return None if value is None else time.strptime(value, "%H:%M:%S") -class _DataBaseModel(_NamedBaseModel): +class DataBaseModel(_NamedBaseModel): """ To create an instance of a data defined in a workflow file. """ @@ -138,11 +138,11 @@ def available(self) -> bool: return isinstance(self, ConfigAvailableData) -class ConfigAvailableData(_DataBaseModel): +class ConfigAvailableData(DataBaseModel): pass -class ConfigGeneratedData(_DataBaseModel): +class ConfigGeneratedData(DataBaseModel): pass From efa4206123efdc6b54442e348a84add8fd28e836 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 11:52:54 +0100 Subject: [PATCH 32/39] fix: hatch fmt --- src/sirocco/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 93aac62d..a837d5c6 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -112,7 +112,7 @@ class Data(NodeStr): date: datetime | None = None @classmethod - def from_config(cls, config: _DataBaseModel, *, date: datetime | None = None): + def from_config(cls, config: DataBaseModel, *, date: datetime | None = None): return cls( name=config.name, type=config.type, From 2f1c751d6cb39b1791cd2bdfc440f9069c24ade8 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 13:54:01 +0100 Subject: [PATCH 33/39] ref: _resolve_target_dates is a staticmethod --- src/sirocco/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index a837d5c6..fc947b54 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -218,7 +218,8 @@ def __getitem__(self, key: str | tuple(str, datetime | None)) -> TimeSeriesObjec raise KeyError(msg) return self._dict[name] - def _resolve_target_dates(self, spec, ref_date: datetime | None) -> Iterator[datetime]: + @staticmethod + def _resolve_target_dates(spec, ref_date: datetime | None) -> Iterator[datetime]: if not spec.lag and not spec.date: yield ref_date if spec.lag: From 3f7f67774f1b4f327dc138d403e8f5983e5f5be9 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 14:04:11 +0100 Subject: [PATCH 34/39] fix:test: invalid suffix --- tests/test_wc_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index 67388b6f..e2e490c9 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -18,7 +18,7 @@ def test_parse_config_file(config_file): reference_str = Path(config_file).with_suffix(".txt").read_text() test_str = str(Workflow.from_yaml(config_file)) if test_str != reference_str: - new_path = Path(config_file).with_suffix("new.txt") + new_path = Path(config_file).with_suffix(".new.txt") new_path.write_text(test_str) msg = f"Workflow graph doesn't match serialized data. New graph string dumped to {new_path}." raise ValueError(msg) From dd646b429d6ac34442171284e1462f302e22fe35 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 14:22:58 +0100 Subject: [PATCH 35/39] ref: move testing data to its own directory --- tests/files/{configs => data}/test_config_large.txt | 0 tests/files/{configs => data}/test_config_small.txt | 0 tests/test_wc_workflow.py | 3 ++- 3 files changed, 2 insertions(+), 1 deletion(-) rename tests/files/{configs => data}/test_config_large.txt (100%) rename tests/files/{configs => data}/test_config_small.txt (100%) diff --git a/tests/files/configs/test_config_large.txt b/tests/files/data/test_config_large.txt similarity index 100% rename from tests/files/configs/test_config_large.txt rename to tests/files/data/test_config_large.txt diff --git a/tests/files/configs/test_config_small.txt b/tests/files/data/test_config_small.txt similarity index 100% rename from tests/files/configs/test_config_small.txt rename to tests/files/data/test_config_small.txt diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index e2e490c9..fc18fa23 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -15,7 +15,8 @@ def config_file_small(): @pytest.mark.parametrize("config_file", config_test_files) def test_parse_config_file(config_file): - reference_str = Path(config_file).with_suffix(".txt").read_text() + config_path = Path(config_file) + reference_str = (config_path.parent/".."/"data"/config_path.name).with_suffix('.txt').read_text() test_str = str(Workflow.from_yaml(config_file)) if test_str != reference_str: new_path = Path(config_file).with_suffix(".new.txt") From fa4d2873b5a3d32da7954703c48bc513b54d12d1 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 14:29:55 +0100 Subject: [PATCH 36/39] fix: generate test data in the right directory --- tests/test_wc_workflow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index fc18fa23..8b6f203b 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -28,4 +28,6 @@ def test_parse_config_file(config_file): @pytest.mark.skip(reason="don't run it each time, uncomment to regenerate serilaized data") @pytest.mark.parametrize("config_file", config_test_files) def test_serialize_workflow(config_file): - Path(config_file).with_suffix(".txt").write_text(str(Workflow.from_yaml(config_file))) + config_path = Path(config_file) + reference_path = (config_path.parent/".."/"data"/config_path.name).with_suffix('.txt') + reference_path.write_text(str(Workflow.from_yaml(config_file))) From 8299b68fc361ba243f47f2f3ef12f6bcbb48d3c9 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Fri, 8 Nov 2024 14:49:48 +0100 Subject: [PATCH 37/39] fix: hatch fmt --- tests/test_wc_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_wc_workflow.py b/tests/test_wc_workflow.py index 8b6f203b..9d20e6b1 100644 --- a/tests/test_wc_workflow.py +++ b/tests/test_wc_workflow.py @@ -16,7 +16,7 @@ def config_file_small(): @pytest.mark.parametrize("config_file", config_test_files) def test_parse_config_file(config_file): config_path = Path(config_file) - reference_str = (config_path.parent/".."/"data"/config_path.name).with_suffix('.txt').read_text() + reference_str = (config_path.parent / ".." / "data" / config_path.name).with_suffix(".txt").read_text() test_str = str(Workflow.from_yaml(config_file)) if test_str != reference_str: new_path = Path(config_file).with_suffix(".new.txt") @@ -29,5 +29,5 @@ def test_parse_config_file(config_file): @pytest.mark.parametrize("config_file", config_test_files) def test_serialize_workflow(config_file): config_path = Path(config_file) - reference_path = (config_path.parent/".."/"data"/config_path.name).with_suffix('.txt') + reference_path = (config_path.parent / ".." / "data" / config_path.name).with_suffix(".txt") reference_path.write_text(str(Workflow.from_yaml(config_file))) From f4316650ba3c6894e64a97bde465e6ddea25c150 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 11 Nov 2024 15:16:18 +0100 Subject: [PATCH 38/39] fix: `get` name clashing with `dict` method + typo --- src/sirocco/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index fc947b54..7b13c49d 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -76,7 +76,7 @@ def from_config( ) -> Self: inputs: list[Data] = [] for input_spec in task_ref.inputs: - inputs.extend(data for data in workflow.data.get(input_spec, date) if data is not None) + inputs.extend(data for data in workflow.data.iter_from_cycle_spec(input_spec, date) if data is not None) outputs: list[Data] = [workflow.data[output_spec.name, date] for output_spec in task_ref.outputs] new = cls( @@ -97,7 +97,7 @@ def from_config( def link_wait_on_tasks(self): self.wait_on: list[Task] = [] for wait_on_spec in self._wait_on_specs: - self.wait_on.extend(task for task in self.workflow.tasks.get(wait_on_spec, self.date) if task is not None) + self.wait_on.extend(task for task in self.workflow.tasks.iter_from_cycle_spec(wait_on_spec, self.date) if task is not None) @dataclass(kw_only=True) @@ -228,7 +228,7 @@ def _resolve_target_dates(spec, ref_date: datetime | None) -> Iterator[datetime] if spec.date: yield from spec.date - def get(self, spec: ConfigCycleSpec, ref_date: datetime | None = None) -> Iterator[TimeSeriesObject]: + def iter_from_cycle_spec(self, spec: ConfigCycleSpec, ref_date: datetime | None = None) -> Iterator[TimeSeriesObject]: name = spec.name if isinstance(self._dict[name], TimeSeries): if ref_date is None and spec.date is []: @@ -251,7 +251,7 @@ def values(self) -> Iterator[TimeSeriesObject]: class Workflow: - """Internal reprensentation of a worflow""" + """Internal reprensentation of a workflow""" def __init__(self, workflow_config: ConfigWorkflow) -> None: self.tasks = Store() From c82f0b51f20a725c3835e99207f2afbcf3bfe387 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Mon, 11 Nov 2024 15:20:57 +0100 Subject: [PATCH 39/39] fix: hatch fmt --- src/sirocco/core.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/sirocco/core.py b/src/sirocco/core.py index 7b13c49d..b7e85a19 100644 --- a/src/sirocco/core.py +++ b/src/sirocco/core.py @@ -97,7 +97,9 @@ def from_config( def link_wait_on_tasks(self): self.wait_on: list[Task] = [] for wait_on_spec in self._wait_on_specs: - self.wait_on.extend(task for task in self.workflow.tasks.iter_from_cycle_spec(wait_on_spec, self.date) if task is not None) + self.wait_on.extend( + task for task in self.workflow.tasks.iter_from_cycle_spec(wait_on_spec, self.date) if task is not None + ) @dataclass(kw_only=True) @@ -228,7 +230,9 @@ def _resolve_target_dates(spec, ref_date: datetime | None) -> Iterator[datetime] if spec.date: yield from spec.date - def iter_from_cycle_spec(self, spec: ConfigCycleSpec, ref_date: datetime | None = None) -> Iterator[TimeSeriesObject]: + def iter_from_cycle_spec( + self, spec: ConfigCycleSpec, ref_date: datetime | None = None + ) -> Iterator[TimeSeriesObject]: name = spec.name if isinstance(self._dict[name], TimeSeries): if ref_date is None and spec.date is []: