Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python edge validation #1765

Merged
merged 32 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b1470cd
connectivity validation function implemented
Jingru923 Aug 26, 2024
a3ec21f
raise error if when adding edge
Jingru923 Aug 26, 2024
d71736a
fix build error
Jingru923 Aug 27, 2024
8618950
fix pytest error
Jingru923 Aug 27, 2024
fed1102
fix pytest error 2
Jingru923 Aug 27, 2024
e408fd7
fix build error 2
Jingru923 Aug 27, 2024
979b993
fix build error 3
Jingru923 Aug 27, 2024
d4c97e6
add unit test for validation
Jingru923 Aug 27, 2024
a0e3214
adding neighbor limit
Jingru923 Aug 28, 2024
df50143
check minimum for flow edge
Jingru923 Aug 28, 2024
f326e39
pass pytest
Jingru923 Aug 28, 2024
26bc35d
added unit test for flow edge neighbor bound
Jingru923 Aug 28, 2024
aa0a173
add another tests for outneighbor
Jingru923 Aug 28, 2024
b330e95
unit tests for minimum in-/outneighbor
Jingru923 Aug 29, 2024
1d26183
wrote unit test for maximum control edge neighbor
Jingru923 Aug 29, 2024
103958d
delete useless print line
Jingru923 Aug 29, 2024
a997501
Merge branch 'main' into py-validation
Jingru923 Aug 29, 2024
66d5a1e
add keyword of validation in Model class
Jingru923 Aug 29, 2024
7c52bd8
fix mypy error
Jingru923 Aug 29, 2024
7688be2
fix py311 py310 test 1
Jingru923 Aug 29, 2024
59c5233
fix julia unit test
Jingru923 Aug 29, 2024
58911ea
move validation unit tests to a different file
Jingru923 Aug 29, 2024
6c9622f
adding unit test for minimum control edge amount
Jingru923 Aug 29, 2024
be13e64
move validation of edge.add to a separate function
Jingru923 Aug 30, 2024
4177665
suggest possible downstream notetype in error message
Jingru923 Aug 30, 2024
894d8c7
address all comments
Jingru923 Sep 2, 2024
d9d5394
refactor and split check_neighbor_amount function
Jingru923 Sep 2, 2024
fe203cf
Merge branch 'main' into py-validation
Jingru923 Sep 2, 2024
088b869
fix outlets after merge
Jingru923 Sep 2, 2024
bae5d6f
2nd round of refactoring
Jingru923 Sep 3, 2024
450d81c
move validation function up a bit
Jingru923 Sep 3, 2024
2761043
3rd round of refactor
Jingru923 Sep 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions python/ribasim/ribasim/geometry/edge.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@

from ribasim.input_base import SpatialTableModel
from ribasim.utils import UsedIDs
from ribasim.validation import (
can_connect,
control_edge_neighbor_amount,
flow_edge_neighbor_amount,
node_type_connectivity,
)

from .base import _GeoBaseSchema

Expand Down Expand Up @@ -82,6 +88,11 @@ def add(
the allocation algorithm, and should thus not be set for every edge in a subnetwork.
**kwargs : Dict
"""
if not can_connect(from_node.node_type, to_node.node_type):
raise ValueError(
f"Node of type {to_node.node_type} cannot be downstream of node of type {from_node.node_type}. Possible downstream node: {node_type_connectivity[from_node.node_type]}."
)

geometry_to_append = (
[LineString([from_node.geometry, to_node.geometry])]
if geometry is None
Expand Down Expand Up @@ -112,13 +123,46 @@ def add(
index=pd.Index([edge_id], name="edge_id"),
)

self._validate_edge(to_node, from_node, edge_type)
Jingru923 marked this conversation as resolved.
Show resolved Hide resolved

self.df = GeoDataFrame[EdgeSchema](pd.concat([self.df, table_to_append]))
if self.df.duplicated(subset=["from_node_id", "to_node_id"]).any():
raise ValueError(
f"Edges have to be unique, but edge with from_node_id {from_node.node_id} to_node_id {to_node.node_id} already exists."
)
self._used_edge_ids.add(edge_id)

def _validate_edge(self, to_node: NodeData, from_node: NodeData, edge_type: str):
Jingru923 marked this conversation as resolved.
Show resolved Hide resolved
assert self.df is not None
in_neighbor: int = self.df.loc[
(self.df["to_node_id"] == to_node.node_id)
& (self.df["edge_type"] == edge_type)
].shape[0]

out_neighbor: int = self.df.loc[
(self.df["from_node_id"] == from_node.node_id)
& (self.df["edge_type"] == edge_type)
].shape[0]
# validation on neighbor amount
if edge_type == "flow":
if in_neighbor >= flow_edge_neighbor_amount[to_node.node_type][1]:
Jingru923 marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"Node {to_node.node_id} can have at most {flow_edge_neighbor_amount[to_node.node_type][1]} flow edge inneighbor(s) (got {in_neighbor})"
)
if out_neighbor >= flow_edge_neighbor_amount[from_node.node_type][3]:
raise ValueError(
f"Node {from_node.node_id} can have at most {flow_edge_neighbor_amount[from_node.node_type][3]} flow edge outneighbor(s) (got {out_neighbor})"
)
elif edge_type == "control":
if in_neighbor >= control_edge_neighbor_amount[to_node.node_type][1]:
raise ValueError(
f"Node {to_node.node_id} can have at most {control_edge_neighbor_amount[to_node.node_type][1]} control edge inneighbor(s) (got {in_neighbor})"
)
if out_neighbor >= control_edge_neighbor_amount[from_node.node_type][3]:
raise ValueError(
f"Node {from_node.node_id} can have at most {control_edge_neighbor_amount[from_node.node_type][3]} control edge outneighbor(s) (got {out_neighbor})"
)

def _get_where_edge_type(self, edge_type: str) -> NDArray[np.bool_]:
assert self.df is not None
return (self.df.edge_type == edge_type).to_numpy()
Expand Down
140 changes: 138 additions & 2 deletions python/ribasim/ribasim/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import logging
from collections.abc import Generator
from os import PathLike
from pathlib import Path
Expand Down Expand Up @@ -58,6 +59,7 @@
_node_lookup_numpy,
_time_in_ns,
)
from ribasim.validation import control_edge_neighbor_amount, flow_edge_neighbor_amount

try:
import xugrid
Expand Down Expand Up @@ -100,6 +102,7 @@ class Model(FileModel):
user_demand: UserDemand = Field(default_factory=UserDemand)

edge: EdgeTable = Field(default_factory=EdgeTable)
use_validation: bool = Field(default=True)

_used_node_ids: UsedIDs = PrivateAttr(default_factory=UsedIDs)

Expand Down Expand Up @@ -265,8 +268,10 @@ def write(self, filepath: str | PathLike[str]) -> Path:
filepath : str | PathLike[str]
A file path with .toml extension.
"""
# TODO
# self.validate_model()

if self.use_validation:
Jingru923 marked this conversation as resolved.
Show resolved Hide resolved
self._validate_model()

filepath = Path(filepath)
self.filepath = filepath
if not filepath.suffix == ".toml":
Expand All @@ -280,6 +285,137 @@ def write(self, filepath: str | PathLike[str]) -> Path:
context_file_writing.set({})
return fn

def _validate_model(self):
df_edge = self.edge.df
df_chunks = [node.node.df.set_crs(self.crs) for node in self._nodes()]
df_node = pd.concat(df_chunks)

df_graph = df_edge
# Join df_edge with df_node to get to_node_type
df_graph = df_graph.join(
df_node[["node_type"]], on="from_node_id", how="left", rsuffix="_from"
)
df_graph = df_graph.rename(columns={"node_type": "from_node_type"})

df_graph = df_graph.join(
df_node[["node_type"]], on="to_node_id", how="left", rsuffix="_to"
)
df_graph = df_graph.rename(columns={"node_type": "to_node_type"})

if not self._check_neighbor_amount(
Jingru923 marked this conversation as resolved.
Show resolved Hide resolved
df_graph, flow_edge_neighbor_amount, "flow", df_node["node_type"]
):
raise ValueError("Minimum flow inneighbor or outneighbor unsatisfied")
if not self._check_neighbor_amount(
df_graph, control_edge_neighbor_amount, "control", df_node["node_type"]
):
raise ValueError("Minimum control inneighbor or outneighbor unsatisfied")

def _check_neighbor_amount(
self,
df_graph: pd.DataFrame,
edge_amount: dict[str, list[int]],
edge_type: str,
nodes,
) -> bool:
"""Check if the neighbor amount of the two nodes connected by the given edge meet the minimum requirements."""

is_valid = True

# filter graph by edge type
df_graph = df_graph.loc[df_graph["edge_type"] == edge_type]

# count occurrence of "from_node" which reflects the number of outneighbors
from_node_count = (
df_graph.groupby("from_node_id").size().reset_index(name="from_node_count") # type: ignore
)

# append from_node_count column to from_node_id and from_node_type
from_node_info = (
df_graph[["from_node_id", "from_node_type"]]
.drop_duplicates()
.merge(from_node_count, on="from_node_id", how="left")
)
from_node_info = from_node_info[
["from_node_id", "from_node_count", "from_node_type"]
]

# add the node that is not the upstream of any other nodes
from_node_info = self._add_source_sink_node(nodes, from_node_info, "from")

# loop over all the "from_node" and check if they have enough outneighbor
for _, row in from_node_info.iterrows():
# from node's outneighbor
if row["from_node_count"] < edge_amount[row["from_node_type"]][2]:
is_valid = False
logging.error(
f"Node {row['from_node_id']} must have at least {edge_amount[row['from_node_type']][2]} outneighbor(s) (got {row['from_node_count']})"
)

# count occurrence of "to_node" which reflects the number of inneighbors
to_node_count = (
df_graph.groupby("to_node_id").size().reset_index(name="to_node_count") # type: ignore
)

# append to_node_count column to result
to_node_info = (
df_graph[["to_node_id", "to_node_type"]]
.drop_duplicates()
.merge(to_node_count, on="to_node_id", how="left")
)
to_node_info = to_node_info[["to_node_id", "to_node_count", "to_node_type"]]

# add the node that is not the downstream of any other nodes
to_node_info = self._add_source_sink_node(nodes, to_node_info, "to")

# loop over all the "to_node" and check if they have enough inneighbor
for _, row in to_node_info.iterrows():
if row["to_node_count"] < edge_amount[row["to_node_type"]][0]:
is_valid = False
logging.error(
f"Node {row['to_node_id']} must have at least {edge_amount[row['to_node_type']][0]} inneighbor(s) (got {row['to_node_count']})"
)

return is_valid

def _add_source_sink_node(
Jingru923 marked this conversation as resolved.
Show resolved Hide resolved
self, nodes, node_info: pd.DataFrame, direction: str
) -> pd.DataFrame:
"""Loop over node table.

Add the nodes whose id are missing in the from_node and to_node column in the edge table because they are not the upstream or downstrem of other nodes.

Specify that their occurrence in from_node table or to_node table is 0.
"""

if direction == "from":
Jingru923 marked this conversation as resolved.
Show resolved Hide resolved
# loop over nodes, add the one that is not the downstream of any other nodes
for index, node in enumerate(nodes):
if nodes.index[index] not in node_info["from_node_id"].to_numpy():
new_row = {
"from_node_id": nodes.index[index],
"from_node_count": 0,
"from_node_type": node,
}
node_info = pd.concat(
[node_info, pd.DataFrame([new_row])], ignore_index=True
)

elif direction == "to":
# loop over nodes, add the one that is not the downstream of any other nodes
for index, node in enumerate(nodes):
if nodes.index[index] not in node_info["to_node_id"].to_numpy():
new_row = {
"to_node_id": nodes.index[index],
"to_node_count": 0,
"to_node_type": node,
}
node_info = pd.concat(
[node_info, pd.DataFrame([new_row])], ignore_index=True
)

return node_info

@classmethod
def _load(cls, filepath: Path | None) -> dict[str, Any]:
context_file_loading.set({})
Expand Down
85 changes: 85 additions & 0 deletions python/ribasim/ribasim/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Table for connectivity between different node types
# "Basin": ["LinearResistance"] means that the downstream of basin can be LinearResistance only
node_type_connectivity: dict[str, list[str]] = {
"Basin": [
"LinearResistance",
"ManningResistance",
"TabulatedRatingCurve",
"Pump",
"Outlet",
"UserDemand",
],
"LinearResistance": ["Basin", "LevelBoundary"],
"ManningResistance": ["Basin"],
"TabulatedRatingCurve": ["Basin", "Terminal", "LevelBoundary"],
"LevelBoundary": ["LinearResistance", "Pump", "Outlet", "TabulatedRatingCurve"],
"FlowBoundary": ["Basin", "Terminal", "LevelBoundary"],
"Pump": ["Basin", "Terminal", "LevelBoundary"],
"Outlet": ["Basin", "Terminal", "LevelBoundary"],
"Terminal": [],
"DiscreteControl": [
"Pump",
"Outlet",
"TabulatedRatingCurve",
"LinearResistance",
"ManningResistance",
"PidControl",
],
"ContinuousControl": ["Pump", "Outlet"],
"PidControl": ["Pump", "Outlet"],
"UserDemand": ["Basin", "Terminal", "LevelBoundary"],
"LevelDemand": ["Basin"],
"FlowDemand": [
"LinearResistance",
"ManningResistance",
"TabulatedRatingCurve",
"Pump",
"Outlet",
],
}


# Function to validate connectivity between two node types
def can_connect(node_type_up: str, node_type_down: str) -> bool:
if node_type_up in node_type_connectivity:
return node_type_down in node_type_connectivity[node_type_up]
return False


flow_edge_neighbor_amount: dict[str, list[int]] = {
# list[int] = [in_min, in_max, out_min, out_max]
"Basin": [0, int(1e9), 0, int(1e9)],
"LinearResistance": [1, 1, 1, 1],
"ManningResistance": [1, 1, 1, 1],
"TabulatedRatingCurve": [1, 1, 1, int(1e9)],
"LevelBoundary": [0, int(1e9), 0, int(1e9)],
"FlowBoundary": [0, 0, 1, int(1e9)],
"Pump": [1, 1, 1, int(1e9)],
"Outlet": [1, 1, 1, 1],
"Terminal": [1, int(1e9), 0, 0],
"DiscreteControl": [0, 0, 0, 0],
"ContinuousControl": [0, 0, 0, 0],
"PidControl": [0, 0, 0, 0],
"UserDemand": [1, 1, 1, 1],
"LevelDemand": [0, 0, 0, 0],
"FlowDemand": [0, 0, 0, 0],
}

control_edge_neighbor_amount: dict[str, list[int]] = {
# list[int] = [in_min, in_max, out_min, out_max]
"Basin": [0, 1, 0, 0],
"LinearResistance": [0, 1, 0, 0],
"ManningResistance": [0, 1, 0, 0],
"TabulatedRatingCurve": [0, 1, 0, 0],
"LevelBoundary": [0, 0, 0, 0],
"FlowBoundary": [0, 0, 0, 0],
"Pump": [0, 1, 0, 0],
"Outlet": [0, 1, 0, 0],
"Terminal": [0, 0, 0, 0],
"DiscreteControl": [0, 0, 1, int(1e9)],
"ContinuousControl": [0, 0, 1, int(1e9)],
"PidControl": [0, 1, 1, 1],
"UserDemand": [0, 0, 0, 0],
"LevelDemand": [0, 0, 1, int(1e9)],
"FlowDemand": [0, 0, 1, 1],
}
10 changes: 10 additions & 0 deletions python/ribasim/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,21 @@ def bucket() -> ribasim.Model:
return ribasim_testmodels.bucket_model()


@pytest.fixture()
def pid_control_equation() -> ribasim.Model:
return ribasim_testmodels.pid_control_equation_model()


@pytest.fixture()
def tabulated_rating_curve() -> ribasim.Model:
return ribasim_testmodels.tabulated_rating_curve_model()


@pytest.fixture()
def outlet() -> ribasim.Model:
return ribasim_testmodels.outlet_model()


@pytest.fixture()
def backwater() -> ribasim.Model:
return ribasim_testmodels.backwater_model()
Expand Down
10 changes: 9 additions & 1 deletion python/ribasim/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,15 @@ def test_extra_spatial_columns():
[basin.Profile(area=1000.0, level=[0.0, 1.0]), basin.State(level=[1.0])],
)
with pytest.raises(ValidationError):
model.edge.add(model.basin[1], model.user_demand[2], foo=1)
model.user_demand.add(
Node(4, Point(1, -0.5), meta_id=3),
[
user_demand.Static(
demand=[1e-4], return_factor=0.9, min_level=0.9, priority=1
)
],
)
model.edge.add(model.basin[1], model.user_demand[4], foo=1)


def test_edge_autoincrement(basic):
Expand Down
Loading
Loading