From ed802dcab410b970aea59abd324b2a70cc9061c3 Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Mon, 15 Jan 2024 16:17:53 +0100 Subject: [PATCH] Fix and change table sorting in Python (#903) Fixes the first two items of #601. Also adds a test. --- docs/core/usage.qmd | 46 ++++++++++++++-------------- python/ribasim/ribasim/config.py | 40 ++++++++++++++++++++---- python/ribasim/ribasim/input_base.py | 9 +++--- python/ribasim/tests/conftest.py | 5 +++ python/ribasim/tests/test_io.py | 23 ++++++++++++++ 5 files changed, 89 insertions(+), 34 deletions(-) diff --git a/docs/core/usage.qmd b/docs/core/usage.qmd index 1a2915196..92272053a 100644 --- a/docs/core/usage.qmd +++ b/docs/core/usage.qmd @@ -347,8 +347,8 @@ Lets a fraction (in [0,1]) of the incoming flow trough. column | type | unit | restriction ------------- | ------- | ------------ | ----------- node_id | Int | - | sorted +control_state | String | - | (optional) sorted per node_id fraction | Float64 | - | in the interval [0,1] -control_state | String | - | (optional) # TabulatedRatingCurve @@ -358,10 +358,10 @@ relation between the storage of a connected Basin (via the outlet level) and its column | type | unit | restriction ------------- | ------- | ------------ | ----------- node_id | Int | - | sorted +control_state | String | - | (optional) sorted per node_id active | Bool | - | (optional, default true) level | Float64 | $m$ | sorted per control_state discharge | Float64 | $m^3 s^{-1}$ | non-negative -control_state | String | - | (optional) sorted per node_id node_id | discharge | level ------- |----------- |------- @@ -381,8 +381,8 @@ Note that a `node_id` can be either in this table or in the static one, but not column | type | unit | restriction --------- | ------- | ------------ | ----------- -time | DateTime | - | sorted -node_id | Int | - | sorted per time +node_id | Int | - | sorted +time | DateTime | - | sorted per node_id level | Float64 | $m$ | - discharge | Float64 | $m^3 s^{-1}$ | non-negative @@ -397,11 +397,11 @@ When PID controlled, the pump must point away from the controlled basin in terms column | type | unit | restriction --------- | ------- | ------------ | ----------- node_id | Int | - | sorted +control_state | String | - | (optional) sorted per node_id active | Bool | - | (optional, default true) flow_rate | Float64 | $m^3 s^{-1}$ | non-negative min_flow_rate | Float64 | $m^3 s^{-1}$ | (optional, default 0.0) max_flow_rate | Float64 | $m^3 s^{-1}$ | (optional) -control_state | String | - | (optional) # Outlet @@ -411,12 +411,12 @@ When PID controlled, the outlet must point towards the controlled basin in terms column | type | unit | restriction --------- | ------- | ------------ | ----------- node_id | Int | - | sorted +control_state | String | - | (optional) sorted per node_id active | Bool | - | (optional, default true) flow_rate | Float64 | $m^3 s^{-1}$ | non-negative min_flow_rate | Float64 | $m^3 s^{-1}$ | (optional, default 0.0) max_flow_rate | Float64 | $m^3 s^{-1}$ | (optional) min_crest_level | Float64 | $m$ | (optional) -control_state | String | - | (optional) # User @@ -453,11 +453,11 @@ Note that a `node_id` can be either in this table or in the static one, but not column | type | unit | restriction ------------- | -------- | ------------ | ----------- node_id | Int | - | sorted +priority | Int | - | sorted per node id time | DateTime | - | sorted per priority per node id demand | Float64 | $m^3 s^{-1}$ | - return_factor | Float64 | - | between [0 - 1] min_level | Float64 | $m$ | (optional) -priority | Int | - | sorted per node id ## Allocation results @@ -502,8 +502,8 @@ Note that a `node_id` can be either in this table or in the static one, but not column | type | unit | restriction --------- | ------- | ------------ | ----------- -time | DateTime | - | sorted -node_id | Int | - | sorted per time +node_id | Int | - | sorted +time | DateTime | - | sorted per node_id level | Float64 | $m$ | - # FlowBoundary @@ -533,8 +533,8 @@ Note that a `node_id` can be either in this table or in the static one, but not column | type | unit | restriction --------- | ------- | ------------ | ----------- -time | DateTime | - | sorted -node_id | Int | - | sorted per time +node_id | Int | - | sorted +time | DateTime | - | sorted per node_id flow_rate | Float64 | $m^3 s^{-1}$ | non-negative # LinearResistance @@ -544,9 +544,9 @@ Flow proportional to the level difference between the connected basins. column | type | unit | restriction ------------- | ------- | ------------ | ----------- node_id | Int | - | sorted +control_state | String | - | (optional) sorted per node_id active | Bool | - | (optional, default true) resistance | Float64 | $sm^{-2}$ | - -control_state | String | - | (optional) # ManningResistance @@ -555,12 +555,12 @@ Flow through this connection is estimated by conservation of energy and the Mann column | type | unit | restriction ------------- | ------- | ------------ | ----------- node_id | Int | - | sorted +control_state | String | - | (optional) sorted per node_id active | Bool | - | (optional, default true) length | Float64 | $m$ | positive manning_n | Float64 | $s m^{-\frac{1}{3}}$ | positive profile_width | Float64 | $m$ | positive profile_slope | Float64 | - | - -control_state | String | - | (optional) # Terminal @@ -584,9 +584,9 @@ The condition schema defines conditions of the form 'the discrete_control node w column | type | unit | restriction ----------------- | -------- | ------- | ----------- node_id | Int | - | sorted -listen_feature_id | Int | - | - -variable | String | - | must be "level" or "flow_rate" -greater_than | Float64 | various | - +listen_feature_id | Int | - | sorted per node_id +variable | String | - | must be "level" or "flow_rate", sorted per listen_feature_id +greater_than | Float64 | various | sorted per variable look_ahead | Float64 | $s$ | Only on transient boundary conditions, non-negative (optional, default 0) ## DiscreteControl / logic @@ -595,8 +595,8 @@ The logic schema defines which control states are triggered based on the truth o DiscreteControl is applied in the Julia core as follows: - During the simulation it is checked whether the truth of any of the conditions changes. -- When a condition changes, the corresponding discrrete_control node id is retrieved (node_id in the condition schema above). -- The truth value of all the conditions this discrete_control node lisens to are retrieved, in the order as they are specified in the condition schema. This is then converted into a string of "T" for true and "F" for false. This string we call the truth state.* +- When a condition changes, the corresponding discrete_control node id is retrieved (node_id in the condition schema above). +- The truth value of all the conditions this discrete_control node listens to are retrieved, in the order as they are specified in the condition schema. This is then converted into a string of "T" for true and "F" for false. This string we call the truth state.* - The table below determines for the given discrete_control node ID and truth state what the corresponding control state is. - For all the nodes this discrete_control node affects (as given by the "control" edges in [Edges / static](usage.qmd#edge)), their parameters are set to those parameters in `NodeType / static` corresponding to the determined control state. @@ -605,8 +605,8 @@ DiscreteControl is applied in the Julia core as follows: column | type | unit | restriction -------------- | -------- | ---- | ----------- node_id | Int | - | sorted -truth_state | String | - | Consists of the characters "T" (true), "F" (false), "U" (upcrossing), "D" (downcrossing) and "*" (any) -control_state | String | - | +control_state | String | - | - +truth_state | String | - | Consists of the characters "T" (true), "F" (false), "U" (upcrossing), "D" (downcrossing) and "*" (any), sorted per node_id ## DiscreteControl results @@ -630,13 +630,13 @@ In the future controlling the flow on a particular edge could be supported. column | type | unit | restriction -------------- | -------- | -------- | ----------- node_id | Int | - | sorted +control_state | String | - | (optional) sorted per node_id active | Bool | - | (optional, default true) listen_node_id | Int | - | - target | Float64 | $m$ | - proportional | Float64 | $s^{-1}$ | - integral | Float64 | $s^{-2}$ | - derivative | Float64 | - | - -control_state | String | - | - ## PidControl / time @@ -650,8 +650,8 @@ Note that a `node_id` can be either in this table or in the static one, but not column | type | unit | restriction -------------- | -------- | -------- | ----------- -node_id | Int | - | sorted per time -time | DateTime | - | sorted +node_id | Int | - | sorted +time | DateTime | - | sorted per node_id listen_node_id | Int | - | - target | Float64 | $m$ | - proportional | Float64 | $s^{-1}$ | - diff --git a/python/ribasim/ribasim/config.py b/python/ribasim/ribasim/config.py index ca8983d5f..e992e811b 100644 --- a/python/ribasim/ribasim/config.py +++ b/python/ribasim/ribasim/config.py @@ -82,6 +82,8 @@ class Terminal(NodeModel): default_factory=TableModel[TerminalStaticSchema] ) + _sort_keys: dict[str, list[str]] = {"static": ["node_id"]} + class PidControl(NodeModel): static: TableModel[PidControlStaticSchema] = Field( @@ -91,7 +93,10 @@ class PidControl(NodeModel): default_factory=TableModel[PidControlTimeSchema] ) - _sort_keys: dict[str, list[str]] = {"time": ["time", "node_id"]} + _sort_keys: dict[str, list[str]] = { + "static": ["node_id", "control_state"], + "time": ["node_id", "time"], + } class LevelBoundary(NodeModel): @@ -102,7 +107,10 @@ class LevelBoundary(NodeModel): default_factory=TableModel[LevelBoundaryTimeSchema] ) - _sort_keys: dict[str, list[str]] = {"time": ["time", "node_id"]} + _sort_keys: dict[str, list[str]] = { + "static": ["node_id"], + "time": ["node_id", "time"], + } class Pump(NodeModel): @@ -110,6 +118,8 @@ class Pump(NodeModel): default_factory=TableModel[PumpStaticSchema] ) + _sort_keys: dict[str, list[str]] = {"static": ["node_id", "control_state"]} + class TabulatedRatingCurve(NodeModel): static: TableModel[TabulatedRatingCurveStaticSchema] = Field( @@ -119,8 +129,8 @@ class TabulatedRatingCurve(NodeModel): default_factory=TableModel[TabulatedRatingCurveTimeSchema] ) _sort_keys: dict[str, list[str]] = { - "static": ["node_id", "level"], - "time": ["time", "node_id", "level"], + "static": ["node_id", "control_state", "level"], + "time": ["node_id", "time", "level"], } @@ -144,7 +154,10 @@ class FlowBoundary(NodeModel): default_factory=TableModel[FlowBoundaryTimeSchema] ) - _sort_keys: dict[str, list[str]] = {"time": ["time", "node_id"]} + _sort_keys: dict[str, list[str]] = { + "static": ["node_id"], + "time": ["node_id", "time"], + } class Basin(NodeModel): @@ -165,8 +178,10 @@ class Basin(NodeModel): ) _sort_keys: dict[str, list[str]] = { + "static": ["node_id"], + "state": ["node_id"], "profile": ["node_id", "level"], - "time": ["time", "node_id"], + "time": ["node_id", "time"], "subgrid": ["subgrid_id", "basin_level"], } @@ -176,6 +191,8 @@ class ManningResistance(NodeModel): default_factory=TableModel[ManningResistanceStaticSchema] ) + _sort_keys: dict[str, list[str]] = {"static": ["node_id", "control_state"]} + class DiscreteControl(NodeModel): condition: TableModel[DiscreteControlConditionSchema] = Field( @@ -185,20 +202,31 @@ class DiscreteControl(NodeModel): default_factory=TableModel[DiscreteControlLogicSchema] ) + _sort_keys: dict[str, list[str]] = { + "condition": ["node_id", "listen_feature_id", "variable", "greater_than"], + "logic": ["node_id", "truth_state"], + } + class Outlet(NodeModel): static: TableModel[OutletStaticSchema] = Field( default_factory=TableModel[OutletStaticSchema] ) + _sort_keys: dict[str, list[str]] = {"static": ["node_id", "control_state"]} + class LinearResistance(NodeModel): static: TableModel[LinearResistanceStaticSchema] = Field( default_factory=TableModel[LinearResistanceStaticSchema] ) + _sort_keys: dict[str, list[str]] = {"static": ["node_id", "control_state"]} + class FractionalFlow(NodeModel): static: TableModel[FractionalFlowStaticSchema] = Field( default_factory=TableModel[FractionalFlowStaticSchema] ) + + _sort_keys: dict[str, list[str]] = {"static": ["node_id", "control_state"]} diff --git a/python/ribasim/ribasim/input_base.py b/python/ribasim/ribasim/input_base.py index f9aadb3b5..1538bfafa 100644 --- a/python/ribasim/ribasim/input_base.py +++ b/python/ribasim/ribasim/input_base.py @@ -289,10 +289,9 @@ def _from_arrow(cls, path: FilePath) -> pd.DataFrame: directory = context_file_loading.get().get("directory", Path(".")) return pd.read_feather(directory / path) - def sort(self, sort_keys: list[str] = ["node_id"]): - """Sort all input tables as required. + def sort(self, sort_keys: list[str]): + """Sort the table as required. - Tables are sorted by "node_id", unless otherwise specified. Sorting is done automatically before writing the table. """ if self.df is not None: @@ -375,7 +374,7 @@ def _write_table(self, path: FilePath) -> None: gdf.to_file(path, layer=self.tablename(), driver="GPKG") - def sort(self, sort_keys: list[str] = ["node_id"]): + def sort(self, sort_keys: list[str]): self.df.sort_index(inplace=True) @@ -435,7 +434,7 @@ def _save(self, directory: DirectoryPath, input_dir: DirectoryPath, **kwargs): getattr(self, field)._save( directory, input_dir, - sort_keys=self._sort_keys.get("field", ["node_id"]), + sort_keys=self._sort_keys[field], ) def _repr_content(self) -> str: diff --git a/python/ribasim/tests/conftest.py b/python/ribasim/tests/conftest.py index 4e4c1d09f..f6dbb730d 100644 --- a/python/ribasim/tests/conftest.py +++ b/python/ribasim/tests/conftest.py @@ -32,3 +32,8 @@ def backwater() -> ribasim.Model: @pytest.fixture() def discrete_control_of_pid_control() -> ribasim.Model: return ribasim_testmodels.discrete_control_of_pid_control_model() + + +@pytest.fixture() +def level_setpoint_with_minmax() -> ribasim.Model: + return ribasim_testmodels.level_setpoint_with_minmax_model() diff --git a/python/ribasim/tests/test_io.py b/python/ribasim/tests/test_io.py index 888c692f8..b1ac52056 100644 --- a/python/ribasim/tests/test_io.py +++ b/python/ribasim/tests/test_io.py @@ -102,3 +102,26 @@ def test_extra_columns(basic_transient): node = Node(df=df) assert "meta_node_id" in node.df.columns + + +def test_sort(level_setpoint_with_minmax, tmp_path): + model = level_setpoint_with_minmax + table = model.discrete_control.condition + + # apply a wrong sort, then call the sort method to restore order + table.df.sort_values("greater_than", ascending=False, inplace=True) + assert table.df.iloc[0]["greater_than"] == 15.0 + sort_keys = model.discrete_control._sort_keys["condition"] + assert sort_keys == ["node_id", "listen_feature_id", "variable", "greater_than"] + table.sort(sort_keys) + assert table.df.iloc[0]["greater_than"] == 5.0 + + # re-apply wrong sort, then check if it gets sorted on write + table.df.sort_values("greater_than", ascending=False, inplace=True) + model.write(tmp_path / "basic/ribasim.toml") + # write sorts the model in place + assert table.df.iloc[0]["greater_than"] == 5.0 + model_loaded = ribasim.Model(filepath=tmp_path / "basic/ribasim.toml") + table_loaded = model_loaded.discrete_control.condition + assert table_loaded.df.iloc[0]["greater_than"] == 5.0 + __assert_equal(table.df, table_loaded.df)