Skip to content

Commit

Permalink
Don't require consecutive Node IDs (#885)
Browse files Browse the repository at this point in the history
Fixes #694

It basically was already fixed in the metagraphs refactor, I just had to
remove the validation check from Python.

This also adds support for 0, but not negative IDs, as I feel those
could be confusing.

I used the trivial model to add some ID variations, and directly added
some tests of the Arrow file results that we didn't really have yet.
  • Loading branch information
visr authored Dec 14, 2023
1 parent 091420b commit f20ef47
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 52 deletions.
2 changes: 1 addition & 1 deletion core/src/bmi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ function create_callbacks(
save_subgrid_level,
saved_subgrid_level;
saveat,
save_start = false,
save_start = true,
)
push!(callbacks, export_cb)
end
Expand Down
80 changes: 77 additions & 3 deletions core/test/run_models_test.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,89 @@
@testitem "trivial model" begin
using SciMLBase: successful_retcode
using Tables: Tables
using Tables.DataAPI: nrow
using Dates: DateTime
import Arrow
using Ribasim: timesteps

toml_path = normpath(@__DIR__, "../../generated_testmodels/trivial/ribasim.toml")
@test ispath(toml_path)
model = Ribasim.run(toml_path)
@test model isa Ribasim.Model
@test successful_retcode(model)
(; p) = model.integrator

# read all results as bytes first to avoid memory mapping
# which can have cleanup issues due to file locking
flow_bytes = read(normpath(dirname(toml_path), "results/flow.arrow"))
basin_bytes = read(normpath(dirname(toml_path), "results/basin.arrow"))
control_bytes = read(normpath(dirname(toml_path), "results/control.arrow"))
allocation_bytes = read(normpath(dirname(toml_path), "results/allocation.arrow"))
subgrid_bytes = read(normpath(dirname(toml_path), "results/subgrid_levels.arrow"))

flow = Arrow.Table(flow_bytes)
basin = Arrow.Table(basin_bytes)
control = Arrow.Table(control_bytes)
allocation = Arrow.Table(allocation_bytes)
subgrid = Arrow.Table(subgrid_bytes)

@testset "Schema" begin
@test Tables.schema(flow) == Tables.Schema(
(:time, :edge_id, :from_node_id, :to_node_id, :flow),
(DateTime, Union{Int, Missing}, Int, Int, Float64),
)
@test Tables.schema(basin) == Tables.Schema(
(:time, :node_id, :storage, :level),
(DateTime, Int, Float64, Float64),
)
@test Tables.schema(control) == Tables.Schema(
(:time, :control_node_id, :truth_state, :control_state),
(DateTime, Int, String, String),
)
@test Tables.schema(allocation) == Tables.Schema(
(
:time,
:allocation_network_id,
:user_node_id,
:priority,
:demand,
:allocated,
:abstracted,
),
(DateTime, Int, Int, Int, Float64, Float64, Float64),
)
@test Tables.schema(subgrid) ==
Tables.Schema((:time, :subgrid_id, :subgrid_level), (DateTime, Int, Float64))
end

@testset "Results size" begin
nsaved = length(timesteps(model))
@test nsaved > 10
# t0 has no flow, 2 flow edges and 2 boundary condition flows
@test nrow(flow) == (nsaved - 1) * 4
@test nrow(basin) == nsaved
@test nrow(control) == 0
@test nrow(allocation) == 0
@test nrow(subgrid) == nsaved * length(p.subgrid.level)
end

# The exporter interpolates 1:1 for three subgrid elements, but shifted by 1.0 meter.
subgrid = model.integrator.p.subgrid
@test all(diff(subgrid.level) .≈ 1.0)
@testset "Results values" begin
@test flow.time[1] > DateTime(2020)
@test coalesce.(flow.edge_id[1:4], -1) == [-1, -1, 9, 11]
@test flow.from_node_id[1:4] == [6, typemax(Int), 0, 6]
@test flow.to_node_id[1:4] == [6, typemax(Int), typemax(Int), 0]

@test basin.storage[1] == 1.0
@test basin.level[1] 0.044711584

# The exporter interpolates 1:1 for three subgrid elements, but shifted by 1.0 meter.
@test length(p.subgrid.level) == 3
@test diff(p.subgrid.level) [-1.0, 2.0]
# TODO The original subgrid IDs are lost and mapped to 1, 2, 3
@test subgrid.subgrid_id[1:3] == [11, 22, 33] broken = true
@test subgrid.subgrid_level[1:3] == [0.0, -1.0, 1.0]
@test subgrid.subgrid_level[(end - 2):end] == p.subgrid.level
end
end

@testitem "bucket model" begin
Expand Down
25 changes: 4 additions & 21 deletions python/ribasim/ribasim/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,43 +267,26 @@ def children(self):
def validate_model_node_field_ids(self):
"""Check whether the node IDs of the node_type fields are valid."""

n_nodes = self.network.n_nodes()

# Check node IDs of node fields
all_node_ids = set[int]()
for node in self.nodes().values():
all_node_ids.update(node.node_ids())

unique, counts = np.unique(list(all_node_ids), return_counts=True)

node_ids_positive_integers = np.greater(unique, 0) & np.equal(
unique.astype(int), unique
node_ids_negative_integers = np.less(unique, 0) | np.not_equal(
unique.astype(np.int64), unique
)

if not node_ids_positive_integers.all():
if node_ids_negative_integers.any():
raise ValueError(
f"Node IDs must be positive integers, got {unique[~node_ids_positive_integers]}."
f"Node IDs must be non-negative integers, got {unique[node_ids_negative_integers]}."
)

if (counts > 1).any():
raise ValueError(
f"These node IDs were assigned to multiple node types: {unique[(counts > 1)]}."
)

if not np.array_equal(unique, np.arange(n_nodes) + 1):
node_ids_missing = set(np.arange(n_nodes) + 1) - set(unique)
node_ids_over = set(unique) - set(np.arange(n_nodes) + 1)
msg = [
f"Expected node IDs from 1 to {n_nodes} (the number of rows in self.network.node.df)."
]
if len(node_ids_missing) > 0:
msg.append(f"These node IDs are missing: {node_ids_missing}.")

if len(node_ids_over) > 0:
msg.append(f"These node IDs are unexpected: {node_ids_over}.")

raise ValueError(" ".join(msg))

def validate_model_node_ids(self):
"""Check whether the node IDs in the data tables correspond to the node IDs in the network."""

Expand Down
12 changes: 2 additions & 10 deletions python/ribasim/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_invalid_node_id(basic):

with pytest.raises(
ValueError,
match=re.escape("Node IDs must be positive integers, got [-1]."),
match=re.escape("Node IDs must be non-negative integers, got [-1]."),
):
model.validate_model_node_field_ids()

Expand Down Expand Up @@ -112,24 +112,16 @@ def test_node_ids_unsequential(basic):
model = basic

basin = model.basin

basin.profile = pd.DataFrame(
data={
"node_id": [1, 1, 3, 3, 6, 6, 1000, 1000],
"area": [0.01, 1000.0] * 4,
"level": [0.0, 1.0] * 4,
}
)

basin.static.df["node_id"] = [1, 3, 6, 1000]

with pytest.raises(ValueError) as excinfo:
model.validate_model_node_field_ids()

assert (
"Expected node IDs from 1 to 17 (the number of rows in self.network.node.df). These node IDs are missing: {9}. These node IDs are unexpected: {1000}."
in str(excinfo.value)
)
model.validate_model_node_field_ids()


def test_tabulated_rating_curve_model(tabulated_rating_curve, tmp_path):
Expand Down
35 changes: 18 additions & 17 deletions python/ribasim_testmodels/ribasim_testmodels/trivial.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
def trivial_model() -> ribasim.Model:
"""Trivial model with just a basin, tabulated rating curve and terminal node"""

# Set up the nodes:
# largest signed 64 bit integer, to check encoding
terminal_id = 9223372036854775807
xy = np.array(
[
(400.0, 200.0), # 1: Basin
(450.0, 200.0), # 2: TabulatedRatingCurve
(500.0, 200.0), # 3: Terminal
(400.0, 200.0), # 6: Basin
(450.0, 200.0), # 0: TabulatedRatingCurve
(500.0, 200.0), # <terminal_id>: Terminal
]
)
node_xy = gpd.points_from_xy(x=xy[:, 0], y=xy[:, 1])
Expand All @@ -21,19 +22,18 @@ def trivial_model() -> ribasim.Model:
"TabulatedRatingCurve",
"Terminal",
]
# Make sure the feature id starts at 1: explicitly give an index.
node = ribasim.Node(
df=gpd.GeoDataFrame(
data={"type": node_type},
index=pd.Index(np.arange(len(xy)) + 1, name="fid"),
index=pd.Index([6, 0, terminal_id], name="fid"),
geometry=node_xy,
crs="EPSG:28992",
)
)

# Setup the edges:
from_id = np.array([1, 2], dtype=np.int64)
to_id = np.array([2, 3], dtype=np.int64)
from_id = np.array([6, 0], dtype=np.int64)
to_id = np.array([0, terminal_id], dtype=np.int64)
lines = node.geometry_from_connectivity(from_id, to_id)
edge = ribasim.Edge(
df=gpd.GeoDataFrame(
Expand All @@ -42,6 +42,7 @@ def trivial_model() -> ribasim.Model:
"to_node_id": to_id,
"edge_type": len(from_id) * ["flow"],
},
index=pd.Index([11, 9], name="fid"),
geometry=lines,
crs="EPSG:28992",
)
Expand All @@ -50,7 +51,7 @@ def trivial_model() -> ribasim.Model:
# Setup the basins:
profile = pd.DataFrame(
data={
"node_id": [1, 1],
"node_id": [6, 6],
"area": [0.01, 1000.0],
"level": [0.0, 1.0],
}
Expand All @@ -64,7 +65,7 @@ def trivial_model() -> ribasim.Model:

static = pd.DataFrame(
data={
"node_id": [1],
"node_id": [6],
"drainage": [0.0],
"potential_evaporation": [evaporation],
"infiltration": [0.0],
Expand All @@ -75,14 +76,14 @@ def trivial_model() -> ribasim.Model:

# Create a subgrid level interpolation from one basin to three elements. Scale one to one, but:
#
# 1. start at -1.0
# 2. start at 0.0
# 3. start at 1.0
# 22. start at -1.0
# 11. start at 0.0
# 33. start at 1.0
#
subgrid = pd.DataFrame(
data={
"subgrid_id": [1, 1, 2, 2, 3, 3],
"node_id": [1, 1, 1, 1, 1, 1],
"subgrid_id": [22, 22, 11, 11, 33, 33],
"node_id": [6, 6, 6, 6, 6, 6],
"basin_level": [0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
"subgrid_level": [-1.0, 0.0, 0.0, 1.0, 1.0, 2.0],
}
Expand All @@ -96,7 +97,7 @@ def trivial_model() -> ribasim.Model:
rating_curve = ribasim.TabulatedRatingCurve(
static=pd.DataFrame(
data={
"node_id": [2, 2],
"node_id": [0, 0],
"level": [0.0, 1.0],
"discharge": [0.0, q1000],
}
Expand All @@ -106,7 +107,7 @@ def trivial_model() -> ribasim.Model:
terminal = ribasim.Terminal(
static=pd.DataFrame(
data={
"node_id": [3],
"node_id": [terminal_id],
}
)
)
Expand Down

0 comments on commit f20ef47

Please sign in to comment.