From 8ab7798873cf9396424aa4a93e5ba2cd03f7a13e Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Mon, 25 Mar 2024 22:16:45 +0100 Subject: [PATCH] Use 32 bit signed integer on the Python side as well --- python/ribasim/ribasim/config.py | 5 +- python/ribasim/ribasim/geometry/area.py | 3 +- python/ribasim/ribasim/geometry/edge.py | 12 +-- python/ribasim/ribasim/geometry/node.py | 4 +- python/ribasim/ribasim/model.py | 9 ++- python/ribasim/ribasim/schemas.py | 75 ++++++++++--------- python/ribasim/tests/test_io.py | 4 +- python/ribasim/tests/test_model.py | 2 + .../ribasim_testmodels/backwater.py | 2 +- utils/gen_python.jl | 2 +- utils/templates/schemas.py.jinja | 3 +- 11 files changed, 65 insertions(+), 56 deletions(-) diff --git a/python/ribasim/ribasim/config.py b/python/ribasim/ribasim/config.py index 4f4d45fa4..f9605111d 100644 --- a/python/ribasim/ribasim/config.py +++ b/python/ribasim/ribasim/config.py @@ -3,6 +3,7 @@ from enum import Enum from typing import Any +import numpy as np import pandas as pd import pydantic from geopandas import GeoDataFrame @@ -98,10 +99,10 @@ def __init__(self, node_id: int, geometry: Point, **kwargs) -> None: def into_geodataframe(self, node_type: str) -> GeoDataFrame: return GeoDataFrame( data={ - "node_id": pd.Series([self.node_id], dtype=int), + "node_id": pd.Series([self.node_id], dtype=np.int32), "node_type": pd.Series([node_type], dtype=str), "name": pd.Series([self.name], dtype=str), - "subnetwork_id": pd.Series([self.subnetwork_id], dtype=pd.Int64Dtype()), + "subnetwork_id": pd.Series([self.subnetwork_id], dtype=pd.Int32Dtype()), }, geometry=[self.geometry], ) diff --git a/python/ribasim/ribasim/geometry/area.py b/python/ribasim/ribasim/geometry/area.py index 950165eed..e06945475 100644 --- a/python/ribasim/ribasim/geometry/area.py +++ b/python/ribasim/ribasim/geometry/area.py @@ -1,5 +1,6 @@ from typing import Any +import numpy as np import pandera as pa from pandera.typing import Series from pandera.typing.geopandas import GeoSeries @@ -8,5 +9,5 @@ class BasinAreaSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) geometry: GeoSeries[Any] = pa.Field(default=None, nullable=True) diff --git a/python/ribasim/ribasim/geometry/edge.py b/python/ribasim/ribasim/geometry/edge.py index ba92a824a..07a4a45d1 100644 --- a/python/ribasim/ribasim/geometry/edge.py +++ b/python/ribasim/ribasim/geometry/edge.py @@ -28,11 +28,11 @@ class NodeData(NamedTuple): class EdgeSchema(pa.SchemaModel): name: Series[str] = pa.Field(default="") from_node_type: Series[str] = pa.Field(nullable=True) - from_node_id: Series[int] = pa.Field(default=0, coerce=True) + from_node_id: Series[np.int32] = pa.Field(default=0, coerce=True) to_node_type: Series[str] = pa.Field(nullable=True) - to_node_id: Series[int] = pa.Field(default=0, coerce=True) + to_node_id: Series[np.int32] = pa.Field(default=0, coerce=True) edge_type: Series[str] = pa.Field(default="flow", coerce=True) - subnetwork_id: Series[pd.Int64Dtype] = pa.Field( + subnetwork_id: Series[pd.Int32Dtype] = pa.Field( default=pd.NA, nullable=True, coerce=True ) geometry: GeoSeries[Any] = pa.Field(default=None, nullable=True) @@ -70,12 +70,12 @@ def add( table_to_append = GeoDataFrame[EdgeSchema]( data={ "from_node_type": pd.Series([from_node.node_type], dtype=str), - "from_node_id": pd.Series([from_node.node_id], dtype=int), + "from_node_id": pd.Series([from_node.node_id], dtype=np.int32), "to_node_type": pd.Series([to_node.node_type], dtype=str), - "to_node_id": pd.Series([to_node.node_id], dtype=int), + "to_node_id": pd.Series([to_node.node_id], dtype=np.int32), "edge_type": pd.Series([edge_type], dtype=str), "name": pd.Series([name], dtype=str), - "subnetwork_id": pd.Series([subnetwork_id], dtype=pd.Int64Dtype()), + "subnetwork_id": pd.Series([subnetwork_id], dtype=pd.Int32Dtype()), }, geometry=geometry_to_append, ) diff --git a/python/ribasim/ribasim/geometry/node.py b/python/ribasim/ribasim/geometry/node.py index d5916a56f..b61d83aeb 100644 --- a/python/ribasim/ribasim/geometry/node.py +++ b/python/ribasim/ribasim/geometry/node.py @@ -15,10 +15,10 @@ class NodeSchema(pa.SchemaModel): - node_id: Series[int] + node_id: Series[np.int32] name: Series[str] = pa.Field(default="") node_type: Series[str] = pa.Field(default="") - subnetwork_id: Series[pd.Int64Dtype] = pa.Field( + subnetwork_id: Series[pd.Int32Dtype] = pa.Field( default=pd.NA, nullable=True, coerce=True ) geometry: GeoSeries[Any] = pa.Field(default=None, nullable=True) diff --git a/python/ribasim/ribasim/model.py b/python/ribasim/ribasim/model.py index d0faaeb12..c22fdc398 100644 --- a/python/ribasim/ribasim/model.py +++ b/python/ribasim/ribasim/model.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any +import numpy as np import pandas as pd import tomli import tomli_w @@ -237,10 +238,10 @@ def reset_contextvar(self) -> "Model": def plot_control_listen(self, ax): df_listen_edge = pd.DataFrame( data={ - "control_node_id": pd.Series([], dtype="int"), - "control_node_type": pd.Series([], dtype="str"), - "listen_node_id": pd.Series([], dtype="int"), - "listen_node_type": pd.Series([], dtype="str"), + "control_node_id": pd.Series([], dtype=np.int32), + "control_node_type": pd.Series([], dtype=str), + "listen_node_id": pd.Series([], dtype=np.int32), + "listen_node_type": pd.Series([], dtype=str), } ) diff --git a/python/ribasim/ribasim/schemas.py b/python/ribasim/ribasim/schemas.py index 34dc9dba1..93f5e9f3f 100644 --- a/python/ribasim/ribasim/schemas.py +++ b/python/ribasim/ribasim/schemas.py @@ -1,5 +1,6 @@ # Automatically generated file. Do not modify. +import numpy as np import pandera as pa from pandera.dtypes import Timestamp from pandera.typing import Series @@ -12,18 +13,18 @@ class Config: class BasinProfileSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) area: Series[float] = pa.Field(nullable=False) level: Series[float] = pa.Field(nullable=False) class BasinStateSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) level: Series[float] = pa.Field(nullable=False) class BasinStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) drainage: Series[float] = pa.Field(nullable=True) potential_evaporation: Series[float] = pa.Field(nullable=True) infiltration: Series[float] = pa.Field(nullable=True) @@ -32,14 +33,14 @@ class BasinStaticSchema(_BaseSchema): class BasinSubgridSchema(_BaseSchema): - subgrid_id: Series[int] = pa.Field(nullable=False, default=0) - node_id: Series[int] = pa.Field(nullable=False, default=0) + subgrid_id: Series[np.int32] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) basin_level: Series[float] = pa.Field(nullable=False) subgrid_level: Series[float] = pa.Field(nullable=False) class BasinTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) drainage: Series[float] = pa.Field(nullable=True) potential_evaporation: Series[float] = pa.Field(nullable=True) @@ -49,80 +50,80 @@ class BasinTimeSchema(_BaseSchema): class DiscreteControlConditionSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) listen_node_type: Series[str] = pa.Field(nullable=False) - listen_node_id: Series[int] = pa.Field(nullable=False, default=0) + listen_node_id: Series[np.int32] = pa.Field(nullable=False, default=0) variable: Series[str] = pa.Field(nullable=False) greater_than: Series[float] = pa.Field(nullable=False) look_ahead: Series[float] = pa.Field(nullable=True) class DiscreteControlLogicSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) truth_state: Series[str] = pa.Field(nullable=False) control_state: Series[str] = pa.Field(nullable=False) class FlowBoundaryStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) flow_rate: Series[float] = pa.Field(nullable=False) class FlowBoundaryTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) flow_rate: Series[float] = pa.Field(nullable=False) class FlowDemandStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) demand: Series[float] = pa.Field(nullable=False) - priority: Series[int] = pa.Field(nullable=False, default=0) + priority: Series[np.int32] = pa.Field(nullable=False, default=0) class FlowDemandTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) demand: Series[float] = pa.Field(nullable=False) - priority: Series[int] = pa.Field(nullable=False, default=0) + priority: Series[np.int32] = pa.Field(nullable=False, default=0) class FractionalFlowStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) fraction: Series[float] = pa.Field(nullable=False) control_state: Series[str] = pa.Field(nullable=True) class LevelBoundaryStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) level: Series[float] = pa.Field(nullable=False) class LevelBoundaryTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) level: Series[float] = pa.Field(nullable=False) class LevelDemandStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) min_level: Series[float] = pa.Field(nullable=False) max_level: Series[float] = pa.Field(nullable=False) - priority: Series[int] = pa.Field(nullable=False, default=0) + priority: Series[np.int32] = pa.Field(nullable=False, default=0) class LevelDemandTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) min_level: Series[float] = pa.Field(nullable=False) max_level: Series[float] = pa.Field(nullable=False) - priority: Series[int] = pa.Field(nullable=False, default=0) + priority: Series[np.int32] = pa.Field(nullable=False, default=0) class LinearResistanceStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) resistance: Series[float] = pa.Field(nullable=False) max_flow_rate: Series[float] = pa.Field(nullable=True) @@ -130,7 +131,7 @@ class LinearResistanceStaticSchema(_BaseSchema): class ManningResistanceStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) length: Series[float] = pa.Field(nullable=False) manning_n: Series[float] = pa.Field(nullable=False) @@ -140,7 +141,7 @@ class ManningResistanceStaticSchema(_BaseSchema): class OutletStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) flow_rate: Series[float] = pa.Field(nullable=False) min_flow_rate: Series[float] = pa.Field(nullable=True) @@ -150,10 +151,10 @@ class OutletStaticSchema(_BaseSchema): class PidControlStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) listen_node_type: Series[str] = pa.Field(nullable=False) - listen_node_id: Series[int] = pa.Field(nullable=False, default=0) + listen_node_id: Series[np.int32] = pa.Field(nullable=False, default=0) target: Series[float] = pa.Field(nullable=False) proportional: Series[float] = pa.Field(nullable=False) integral: Series[float] = pa.Field(nullable=False) @@ -162,9 +163,9 @@ class PidControlStaticSchema(_BaseSchema): class PidControlTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) listen_node_type: Series[str] = pa.Field(nullable=False) - listen_node_id: Series[int] = pa.Field(nullable=False, default=0) + listen_node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) target: Series[float] = pa.Field(nullable=False) proportional: Series[float] = pa.Field(nullable=False) @@ -174,7 +175,7 @@ class PidControlTimeSchema(_BaseSchema): class PumpStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) flow_rate: Series[float] = pa.Field(nullable=False) min_flow_rate: Series[float] = pa.Field(nullable=True) @@ -183,7 +184,7 @@ class PumpStaticSchema(_BaseSchema): class TabulatedRatingCurveStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) level: Series[float] = pa.Field(nullable=False) flow_rate: Series[float] = pa.Field(nullable=False) @@ -191,29 +192,29 @@ class TabulatedRatingCurveStaticSchema(_BaseSchema): class TabulatedRatingCurveTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) level: Series[float] = pa.Field(nullable=False) flow_rate: Series[float] = pa.Field(nullable=False) class TerminalStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) class UserDemandStaticSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) active: Series[pa.BOOL] = pa.Field(nullable=True) demand: Series[float] = pa.Field(nullable=False) return_factor: Series[float] = pa.Field(nullable=False) min_level: Series[float] = pa.Field(nullable=False) - priority: Series[int] = pa.Field(nullable=False, default=0) + priority: Series[np.int32] = pa.Field(nullable=False, default=0) class UserDemandTimeSchema(_BaseSchema): - node_id: Series[int] = pa.Field(nullable=False, default=0) + node_id: Series[np.int32] = pa.Field(nullable=False, default=0) time: Series[Timestamp] = pa.Field(nullable=False) demand: Series[float] = pa.Field(nullable=False) return_factor: Series[float] = pa.Field(nullable=False) min_level: Series[float] = pa.Field(nullable=False) - priority: Series[int] = pa.Field(nullable=False, default=0) + priority: Series[np.int32] = pa.Field(nullable=False, default=0) diff --git a/python/ribasim/tests/test_io.py b/python/ribasim/tests/test_io.py index 1a86f1b10..e0b316106 100644 --- a/python/ribasim/tests/test_io.py +++ b/python/ribasim/tests/test_io.py @@ -1,5 +1,6 @@ from datetime import datetime +import numpy as np import pytest import ribasim import tomli @@ -57,7 +58,8 @@ def test_basic_transient(basic_transient, tmp_path): __assert_equal(model_orig.edge.df, model_loaded.edge.df) time = model_loaded.basin.time - assert model_orig.basin.time.df.time[0] == time.df.time[0] + assert model_orig.basin.time.df.time.iloc[0] == time.df.time.iloc[0] + assert time.df.node_id.dtype == np.int32 __assert_equal(model_orig.basin.time.df, time.df) assert time.df.shape == (1468, 7) diff --git a/python/ribasim/tests/test_model.py b/python/ribasim/tests/test_model.py index d5f3b4207..3c4ded6bb 100644 --- a/python/ribasim/tests/test_model.py +++ b/python/ribasim/tests/test_model.py @@ -181,6 +181,8 @@ def test_node_table(basic): node = model.node_table() df = node.df assert df.geometry.is_unique + assert df.node_id.dtype == np.int32 + assert df.subnetwork_id.dtype == pd.Int32Dtype() assert df.node_type.iloc[0] == "Basin" assert df.node_type.iloc[-1] == "Terminal" diff --git a/python/ribasim_testmodels/ribasim_testmodels/backwater.py b/python/ribasim_testmodels/ribasim_testmodels/backwater.py index 3d60f5f08..a01d563e0 100644 --- a/python/ribasim_testmodels/ribasim_testmodels/backwater.py +++ b/python/ribasim_testmodels/ribasim_testmodels/backwater.py @@ -18,7 +18,7 @@ def backwater_model(): node_type[0] = "FlowBoundary" node_type[-1] = "LevelBoundary" - ids = np.arange(1, node_type.size + 1, dtype=int) + ids = np.arange(1, node_type.size + 1, dtype=np.int32) model = ribasim.Model( starttime="2020-01-01", diff --git a/utils/gen_python.jl b/utils/gen_python.jl index 52b689716..db4c13548 100644 --- a/utils/gen_python.jl +++ b/utils/gen_python.jl @@ -6,7 +6,7 @@ using OteraEngine using Ribasim pythontype(::Type{<:AbstractString}) = "Series[str]" -pythontype(::Type{<:Integer}) = "Series[int]" +pythontype(::Type{<:Integer}) = "Series[np.int32]" pythontype(::Type{<:AbstractFloat}) = "Series[float]" pythontype(::Type{<:Number}) = "Series[float]" pythontype(::Type{<:Bool}) = "Series[pa.BOOL]" # pa.BOOL is a nullable boolean type, bool is not nullable diff --git a/utils/templates/schemas.py.jinja b/utils/templates/schemas.py.jinja index db2a8df50..2b513110e 100644 --- a/utils/templates/schemas.py.jinja +++ b/utils/templates/schemas.py.jinja @@ -1,5 +1,6 @@ # Automatically generated file. Do not modify. +import numpy as np import pandera as pa from pandera.dtypes import Timestamp from pandera.typing import Series @@ -14,7 +15,7 @@ class _BaseSchema(pa.DataFrameModel): {% for m in models %} class {{m[:name]}}Schema(_BaseSchema): {% for f in m[:fields] %} - {% if (f[2] == "Series[int]") %} + {% if (f[2] == "Series[np.int32]") %} {{ f[1] }}: {{ f[2] }} = pa.Field(nullable={{ f[3] }}, default=0) {% else %} {{ f[1] }}: {{ f[2] }} = pa.Field(nullable={{ f[3] }})