Don't require consecutive Node IDs (#885)

Fixes #694 It basically was already fixed in the metagraphs refactor, I just had to remove the validation check from Python. This also adds support for 0, but not negative IDs, as I feel those could be confusing. I used the trivial model to add some ID variations, and directly added some tests of the Arrow file results that we didn't really have yet.
Deltares · Dec 14, 2023 · f20ef47 · f20ef47
1 parent 091420b
commit f20ef47
Show file tree

Hide file tree

Showing 5 changed files with 102 additions and 52 deletions.
diff --git a/core/src/bmi.jl b/core/src/bmi.jl
@@ -248,7 +248,7 @@ function create_callbacks(
             save_subgrid_level,
             saved_subgrid_level;
             saveat,
-            save_start = false,
+            save_start = true,
         )
         push!(callbacks, export_cb)
     end

diff --git a/core/test/run_models_test.jl b/core/test/run_models_test.jl
@@ -1,15 +1,89 @@
 @testitem "trivial model" begin
     using SciMLBase: successful_retcode
+    using Tables: Tables
+    using Tables.DataAPI: nrow
+    using Dates: DateTime
+    import Arrow
+    using Ribasim: timesteps
 
     toml_path = normpath(@__DIR__, "../../generated_testmodels/trivial/ribasim.toml")
     @test ispath(toml_path)
     model = Ribasim.run(toml_path)
     @test model isa Ribasim.Model
     @test successful_retcode(model)
+    (; p) = model.integrator
+
+    # read all results as bytes first to avoid memory mapping
+    # which can have cleanup issues due to file locking
+    flow_bytes = read(normpath(dirname(toml_path), "results/flow.arrow"))
+    basin_bytes = read(normpath(dirname(toml_path), "results/basin.arrow"))
+    control_bytes = read(normpath(dirname(toml_path), "results/control.arrow"))
+    allocation_bytes = read(normpath(dirname(toml_path), "results/allocation.arrow"))
+    subgrid_bytes = read(normpath(dirname(toml_path), "results/subgrid_levels.arrow"))
+
+    flow = Arrow.Table(flow_bytes)
+    basin = Arrow.Table(basin_bytes)
+    control = Arrow.Table(control_bytes)
+    allocation = Arrow.Table(allocation_bytes)
+    subgrid = Arrow.Table(subgrid_bytes)
+
+    @testset "Schema" begin
+        @test Tables.schema(flow) == Tables.Schema(
+            (:time, :edge_id, :from_node_id, :to_node_id, :flow),
+            (DateTime, Union{Int, Missing}, Int, Int, Float64),
+        )
+        @test Tables.schema(basin) == Tables.Schema(
+            (:time, :node_id, :storage, :level),
+            (DateTime, Int, Float64, Float64),
+        )
+        @test Tables.schema(control) == Tables.Schema(
+            (:time, :control_node_id, :truth_state, :control_state),
+            (DateTime, Int, String, String),
+        )
+        @test Tables.schema(allocation) == Tables.Schema(
+            (
+                :time,
+                :allocation_network_id,
+                :user_node_id,
+                :priority,
+                :demand,
+                :allocated,
+                :abstracted,
+            ),
+            (DateTime, Int, Int, Int, Float64, Float64, Float64),
+        )
+        @test Tables.schema(subgrid) ==
+              Tables.Schema((:time, :subgrid_id, :subgrid_level), (DateTime, Int, Float64))
+    end
+
+    @testset "Results size" begin
+        nsaved = length(timesteps(model))
+        @test nsaved > 10
+        # t0 has no flow, 2 flow edges and 2 boundary condition flows
+        @test nrow(flow) == (nsaved - 1) * 4
+        @test nrow(basin) == nsaved
+        @test nrow(control) == 0
+        @test nrow(allocation) == 0
+        @test nrow(subgrid) == nsaved * length(p.subgrid.level)
+    end
 
-    # The exporter interpolates 1:1 for three subgrid elements, but shifted by 1.0 meter.
-    subgrid = model.integrator.p.subgrid
-    @test all(diff(subgrid.level) .≈ 1.0)
+    @testset "Results values" begin
+        @test flow.time[1] > DateTime(2020)
+        @test coalesce.(flow.edge_id[1:4], -1) == [-1, -1, 9, 11]
+        @test flow.from_node_id[1:4] == [6, typemax(Int), 0, 6]
+        @test flow.to_node_id[1:4] == [6, typemax(Int), typemax(Int), 0]
+
+        @test basin.storage[1] == 1.0
+        @test basin.level[1] ≈ 0.044711584
+
+        # The exporter interpolates 1:1 for three subgrid elements, but shifted by 1.0 meter.
+        @test length(p.subgrid.level) == 3
+        @test diff(p.subgrid.level) ≈ [-1.0, 2.0]
+        # TODO The original subgrid IDs are lost and mapped to 1, 2, 3
+        @test subgrid.subgrid_id[1:3] == [11, 22, 33] broken = true
+        @test subgrid.subgrid_level[1:3] == [0.0, -1.0, 1.0]
+        @test subgrid.subgrid_level[(end - 2):end] == p.subgrid.level
+    end
 end
 
 @testitem "bucket model" begin

diff --git a/python/ribasim/ribasim/model.py b/python/ribasim/ribasim/model.py
@@ -267,43 +267,26 @@ def children(self):
     def validate_model_node_field_ids(self):
         """Check whether the node IDs of the node_type fields are valid."""
 
-        n_nodes = self.network.n_nodes()
-
         # Check node IDs of node fields
         all_node_ids = set[int]()
         for node in self.nodes().values():
             all_node_ids.update(node.node_ids())
 
         unique, counts = np.unique(list(all_node_ids), return_counts=True)
-
-        node_ids_positive_integers = np.greater(unique, 0) & np.equal(
-            unique.astype(int), unique
+        node_ids_negative_integers = np.less(unique, 0) | np.not_equal(
+            unique.astype(np.int64), unique
         )
 
-        if not node_ids_positive_integers.all():
+        if node_ids_negative_integers.any():
             raise ValueError(
-                f"Node IDs must be positive integers, got {unique[~node_ids_positive_integers]}."
+                f"Node IDs must be non-negative integers, got {unique[node_ids_negative_integers]}."
             )
 
         if (counts > 1).any():
             raise ValueError(
                 f"These node IDs were assigned to multiple node types: {unique[(counts > 1)]}."
             )
 
-        if not np.array_equal(unique, np.arange(n_nodes) + 1):
-            node_ids_missing = set(np.arange(n_nodes) + 1) - set(unique)
-            node_ids_over = set(unique) - set(np.arange(n_nodes) + 1)
-            msg = [
-                f"Expected node IDs from 1 to {n_nodes} (the number of rows in self.network.node.df)."
-            ]
-            if len(node_ids_missing) > 0:
-                msg.append(f"These node IDs are missing: {node_ids_missing}.")
-
-            if len(node_ids_over) > 0:
-                msg.append(f"These node IDs are unexpected: {node_ids_over}.")
-
-            raise ValueError(" ".join(msg))
-
     def validate_model_node_ids(self):
         """Check whether the node IDs in the data tables correspond to the node IDs in the network."""
 

diff --git a/python/ribasim/tests/test_model.py b/python/ribasim/tests/test_model.py
@@ -71,7 +71,7 @@ def test_invalid_node_id(basic):
 
     with pytest.raises(
         ValueError,
-        match=re.escape("Node IDs must be positive integers, got [-1]."),
+        match=re.escape("Node IDs must be non-negative integers, got [-1]."),
     ):
         model.validate_model_node_field_ids()
 
@@ -112,24 +112,16 @@ def test_node_ids_unsequential(basic):
     model = basic
 
     basin = model.basin
-
     basin.profile = pd.DataFrame(
         data={
             "node_id": [1, 1, 3, 3, 6, 6, 1000, 1000],
             "area": [0.01, 1000.0] * 4,
             "level": [0.0, 1.0] * 4,
         }
     )
-
     basin.static.df["node_id"] = [1, 3, 6, 1000]
 
-    with pytest.raises(ValueError) as excinfo:
-        model.validate_model_node_field_ids()
-
-    assert (
-        "Expected node IDs from 1 to 17 (the number of rows in self.network.node.df). These node IDs are missing: {9}. These node IDs are unexpected: {1000}."
-        in str(excinfo.value)
-    )
+    model.validate_model_node_field_ids()
 
 
 def test_tabulated_rating_curve_model(tabulated_rating_curve, tmp_path):

diff --git a/python/ribasim_testmodels/ribasim_testmodels/trivial.py b/python/ribasim_testmodels/ribasim_testmodels/trivial.py
@@ -7,12 +7,13 @@
 def trivial_model() -> ribasim.Model:
     """Trivial model with just a basin, tabulated rating curve and terminal node"""
 
-    # Set up the nodes:
+    # largest signed 64 bit integer, to check encoding
+    terminal_id = 9223372036854775807
     xy = np.array(
         [
-            (400.0, 200.0),  # 1: Basin
-            (450.0, 200.0),  # 2: TabulatedRatingCurve
-            (500.0, 200.0),  # 3: Terminal
+            (400.0, 200.0),  # 6: Basin
+            (450.0, 200.0),  # 0: TabulatedRatingCurve
+            (500.0, 200.0),  # <terminal_id>: Terminal
         ]
     )
     node_xy = gpd.points_from_xy(x=xy[:, 0], y=xy[:, 1])
@@ -21,19 +22,18 @@ def trivial_model() -> ribasim.Model:
         "TabulatedRatingCurve",
         "Terminal",
     ]
-    # Make sure the feature id starts at 1: explicitly give an index.
     node = ribasim.Node(
         df=gpd.GeoDataFrame(
             data={"type": node_type},
-            index=pd.Index(np.arange(len(xy)) + 1, name="fid"),
+            index=pd.Index([6, 0, terminal_id], name="fid"),
             geometry=node_xy,
             crs="EPSG:28992",
         )
     )
 
     # Setup the edges:
-    from_id = np.array([1, 2], dtype=np.int64)
-    to_id = np.array([2, 3], dtype=np.int64)
+    from_id = np.array([6, 0], dtype=np.int64)
+    to_id = np.array([0, terminal_id], dtype=np.int64)
     lines = node.geometry_from_connectivity(from_id, to_id)
     edge = ribasim.Edge(
         df=gpd.GeoDataFrame(
@@ -42,6 +42,7 @@ def trivial_model() -> ribasim.Model:
                 "to_node_id": to_id,
                 "edge_type": len(from_id) * ["flow"],
             },
+            index=pd.Index([11, 9], name="fid"),
             geometry=lines,
             crs="EPSG:28992",
         )
@@ -50,7 +51,7 @@ def trivial_model() -> ribasim.Model:
     # Setup the basins:
     profile = pd.DataFrame(
         data={
-            "node_id": [1, 1],
+            "node_id": [6, 6],
             "area": [0.01, 1000.0],
             "level": [0.0, 1.0],
         }
@@ -64,7 +65,7 @@ def trivial_model() -> ribasim.Model:
 
     static = pd.DataFrame(
         data={
-            "node_id": [1],
+            "node_id": [6],
             "drainage": [0.0],
             "potential_evaporation": [evaporation],
             "infiltration": [0.0],
@@ -75,14 +76,14 @@ def trivial_model() -> ribasim.Model:
 
     # Create a subgrid level interpolation from one basin to three elements. Scale one to one, but:
     #
-    # 1. start at -1.0
-    # 2. start at 0.0
-    # 3. start at 1.0
+    # 22. start at -1.0
+    # 11. start at 0.0
+    # 33. start at 1.0
     #
     subgrid = pd.DataFrame(
         data={
-            "subgrid_id": [1, 1, 2, 2, 3, 3],
-            "node_id": [1, 1, 1, 1, 1, 1],
+            "subgrid_id": [22, 22, 11, 11, 33, 33],
+            "node_id": [6, 6, 6, 6, 6, 6],
             "basin_level": [0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
             "subgrid_level": [-1.0, 0.0, 0.0, 1.0, 1.0, 2.0],
         }
@@ -96,7 +97,7 @@ def trivial_model() -> ribasim.Model:
     rating_curve = ribasim.TabulatedRatingCurve(
         static=pd.DataFrame(
             data={
-                "node_id": [2, 2],
+                "node_id": [0, 0],
                 "level": [0.0, 1.0],
                 "discharge": [0.0, q1000],
             }
@@ -106,7 +107,7 @@ def trivial_model() -> ribasim.Model:
     terminal = ribasim.Terminal(
         static=pd.DataFrame(
             data={
-                "node_id": [3],
+                "node_id": [terminal_id],
             }
         )
     )