From 35dfc2c61d406e8e22bdb5eeb293562bf1e2f418 Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Tue, 9 Apr 2024 16:54:38 +0200 Subject: [PATCH] Sort only Edge fid so edge_id becomes more stable (#1363) Before we sorted the Edge table like this: ```python sort_keys = [ "from_node_type", "from_node_id", "to_node_type", "to_node_id", ] ``` This made it appear a bit more neat, though it served no other purpose. The `fid` index was mostly an implementation detail that the user did not specify, and it went from 1:n following the sorting above. However `fid` becomes `edge_id` in the flow.arrow. Therefore when a users added a new edge, usually half of all `edge_id`s changed, making post-processing unnecessarily difficult. Therefore this PR removes this sorting, such that the input order is retained, keeping the old `edge_id`s stable. With this I think we can close #1310. It is useful to have a single identifier value for an Edge, even though it is somewhat superfluous. With this PR it becomes stable, unless users start modifying `fid`s themselves. --- core/test/run_models_test.jl | 2 +- core/test/validation_test.jl | 4 ++-- python/ribasim/ribasim/geometry/edge.py | 17 ++++++++--------- python/ribasim/ribasim/input_base.py | 4 +++- python/ribasim/ribasim/model.py | 5 ++++- python/ribasim/tests/test_io.py | 19 ++++++++++++------- python/ribasim/tests/test_model.py | 4 ++-- 7 files changed, 32 insertions(+), 23 deletions(-) diff --git a/core/test/run_models_test.jl b/core/test/run_models_test.jl index 63132c4f6..5c9037cd2 100644 --- a/core/test/run_models_test.jl +++ b/core/test/run_models_test.jl @@ -127,7 +127,7 @@ @testset "Results values" begin @test flow.time[1] == DateTime(2020) - @test coalesce.(flow.edge_id[1:2], -1) == [1, 2] + @test coalesce.(flow.edge_id[1:2], -1) == [0, 1] @test flow.from_node_id[1:2] == [6, 0] @test flow.to_node_id[1:2] == [0, 922] diff --git a/core/test/validation_test.jl b/core/test/validation_test.jl index d9eaa68d9..cb4909860 100644 --- a/core/test/validation_test.jl +++ b/core/test/validation_test.jl @@ -361,10 +361,10 @@ end @test length(logger.logs) == 2 @test logger.logs[1].level == Error @test logger.logs[1].message == - "Invalid edge type 'foo' for edge #1 from node #1 to node #2." + "Invalid edge type 'foo' for edge #0 from node #1 to node #2." @test logger.logs[2].level == Error @test logger.logs[2].message == - "Invalid edge type 'bar' for edge #2 from node #2 to node #3." + "Invalid edge type 'bar' for edge #1 from node #2 to node #3." end @testitem "Subgrid validation" begin diff --git a/python/ribasim/ribasim/geometry/edge.py b/python/ribasim/ribasim/geometry/edge.py index 760055d7e..2106821f4 100644 --- a/python/ribasim/ribasim/geometry/edge.py +++ b/python/ribasim/ribasim/geometry/edge.py @@ -76,21 +76,20 @@ def add( crs=self.df.crs, ) - self.df = GeoDataFrame[EdgeSchema](pd.concat([self.df, table_to_append])) + self.df = GeoDataFrame[EdgeSchema]( + pd.concat([self.df, table_to_append], ignore_index=True) + ) + self.df.index.name = "fid" def get_where_edge_type(self, edge_type: str) -> NDArray[np.bool_]: assert self.df is not None return (self.df.edge_type == edge_type).to_numpy() def sort(self): - assert self.df is not None - sort_keys = [ - "from_node_type", - "from_node_id", - "to_node_type", - "to_node_id", - ] - self.df.sort_values(sort_keys, ignore_index=True, inplace=True) + # Only sort the index (fid / edge_id) since this needs to be sorted in a GeoPackage. + # Under most circumstances, this retains the input order, + # making the edge_id as stable as possible; useful for post-processing. + self.df.sort_index(inplace=True) def plot(self, **kwargs) -> Axes: assert self.df is not None diff --git a/python/ribasim/ribasim/input_base.py b/python/ribasim/ribasim/input_base.py index 8f57e6067..d46905bae 100644 --- a/python/ribasim/ribasim/input_base.py +++ b/python/ribasim/ribasim/input_base.py @@ -360,7 +360,9 @@ def _write_geopackage(self, path: Path) -> None: path : Path """ assert self.df is not None - self.df.to_file(path, layer=self.tablename(), driver="GPKG") + # the index name must be fid otherwise it will generate a separate fid column + self.df.index.name = "fid" + self.df.to_file(path, layer=self.tablename(), index=True, driver="GPKG") class ChildModel(BaseModel): diff --git a/python/ribasim/ribasim/model.py b/python/ribasim/ribasim/model.py index 864d17116..34c808f73 100644 --- a/python/ribasim/ribasim/model.py +++ b/python/ribasim/ribasim/model.py @@ -175,8 +175,8 @@ def _save(self, directory: DirectoryPath, input_dir: DirectoryPath): if not node.df["node_id"].is_unique: raise ValueError("node_id must be unique") node.df.set_index("node_id", drop=False, inplace=True) - node.df.sort_index(inplace=True) node.df.index.name = "fid" + node.df.sort_index(inplace=True) node._save(directory, input_dir) for sub in self._nodes(): @@ -207,6 +207,7 @@ def node_table(self) -> NodeTable: df = pd.concat(df_chunks, ignore_index=True) node_table = NodeTable(df=df) node_table.sort() + node_table.df.index.name = "fid" return node_table def _nodes(self) -> Generator[MultiNodeModel, Any, None]: @@ -413,6 +414,7 @@ def to_xugrid(self): edge_df = edge_df[edge_df.edge_type == "flow"] node_id = node_df.node_id.to_numpy() + edge_id = edge_df.index.to_numpy() from_node_id = edge_df.from_node_id.to_numpy() to_node_id = edge_df.to_node_id.to_numpy() @@ -443,6 +445,7 @@ def to_xugrid(self): uds = xugrid.UgridDataset(None, grid) uds = uds.assign_coords(node_id=(node_dim, node_id)) + uds = uds.assign_coords(edge_id=(edge_dim, edge_id)) uds = uds.assign_coords(from_node_id=(edge_dim, from_node_id)) uds = uds.assign_coords(to_node_id=(edge_dim, to_node_id)) diff --git a/python/ribasim/tests/test_io.py b/python/ribasim/tests/test_io.py index 46edcfbd6..476cf8485 100644 --- a/python/ribasim/tests/test_io.py +++ b/python/ribasim/tests/test_io.py @@ -79,7 +79,7 @@ def test_repr(): assert isinstance(pump_static._repr_html_(), str) -def test_extra_columns(basic_transient): +def test_extra_columns(): terminal_static = terminal.Static(meta_id=[-1, -2, -3]) assert "meta_id" in terminal_static.df.columns assert (terminal_static.df.meta_id == [-1, -2, -3]).all() @@ -106,14 +106,12 @@ def test_sort(level_setpoint_with_minmax, tmp_path): table.sort() assert table.df.iloc[0]["greater_than"] == 5.0 - edge.df.sort_values("from_node_type", ascending=False, inplace=True) - assert edge.df.iloc[0]["from_node_type"] != "Basin" - edge.sort() - assert edge.df.iloc[0]["from_node_type"] == "Basin" + # The edge table is not sorted + assert edge.df.iloc[1]["from_node_type"] == "Pump" + assert edge.df.iloc[1]["from_node_id"] == 3 # re-apply wrong sort, then check if it gets sorted on write table.df.sort_values("greater_than", ascending=False, inplace=True) - edge.df.sort_values("from_node_type", ascending=False, inplace=True) model.write(tmp_path / "basic/ribasim.toml") # write sorts the model in place assert table.df.iloc[0]["greater_than"] == 5.0 @@ -121,13 +119,16 @@ def test_sort(level_setpoint_with_minmax, tmp_path): table_loaded = model_loaded.discrete_control.condition edge_loaded = model_loaded.edge assert table_loaded.df.iloc[0]["greater_than"] == 5.0 - assert edge.df.iloc[0]["from_node_type"] == "Basin" + assert edge.df.iloc[1]["from_node_type"] == "Pump" + assert edge.df.iloc[1]["from_node_id"] == 3 __assert_equal(table.df, table_loaded.df) __assert_equal(edge.df, edge_loaded.df) def test_roundtrip(trivial, tmp_path): model1 = trivial + # set custom Edge index + model1.edge.df.index = [15, 12] model1dir = tmp_path / "model1" model2dir = tmp_path / "model2" # read a model and then write it to a different path @@ -142,6 +143,10 @@ def test_roundtrip(trivial, tmp_path): model2dir / "ribasim.toml" ).read_text() + # check if custom Edge indexes are retained (sorted) + assert (model1.edge.df.index == [12, 15]).all() + assert (model2.edge.df.index == [12, 15]).all() + # check if all tables are the same __assert_equal(model1.node_table().df, model2.node_table().df) __assert_equal(model1.edge.df, model2.edge.df) diff --git a/python/ribasim/tests/test_model.py b/python/ribasim/tests/test_model.py index d7311b63d..bdc60368c 100644 --- a/python/ribasim/tests/test_model.py +++ b/python/ribasim/tests/test_model.py @@ -162,8 +162,8 @@ def test_write_adds_fid_in_tables(basic, tmp_path): # for edge no index was provided, but it still needs to write it to file nrow = len(model_orig.edge.df) - assert model_orig.edge.df.index.name is None - assert model_orig.edge.df.index.equals(pd.Index(np.full(nrow, 0))) + assert model_orig.edge.df.index.name == "fid" + assert model_orig.edge.df.index.equals(pd.RangeIndex(nrow)) model_orig.write(tmp_path / "basic/ribasim.toml") with connect(tmp_path / "basic/database.gpkg") as connection: