From 8ede13d6d6bf0fa896f4ca7ecc82bda730bf2558 Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Fri, 17 Jan 2025 14:25:21 +0100 Subject: [PATCH 1/9] misc parameterisatie --- notebooks/noorderzijlvest/01_fix_model.py | 26 +++ .../noorderzijlvest/02_parameterize_model.py | 154 ++++++++++++++++ src/ribasim_nl/ribasim_nl/model.py | 7 +- .../ribasim_nl/parametrization/__init__.py | 3 + .../ribasim_nl/parametrization/conversions.py | 77 ++++++++ .../parametrization/empty_static_df.py | 38 ++++ .../parametrization/target_level.py | 165 ++++++++++++++++++ 7 files changed, 468 insertions(+), 2 deletions(-) create mode 100644 notebooks/noorderzijlvest/02_parameterize_model.py create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/__init__.py create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/conversions.py create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/empty_static_df.py create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/target_level.py diff --git a/notebooks/noorderzijlvest/01_fix_model.py b/notebooks/noorderzijlvest/01_fix_model.py index 21c7d8a..0058b59 100644 --- a/notebooks/noorderzijlvest/01_fix_model.py +++ b/notebooks/noorderzijlvest/01_fix_model.py @@ -10,6 +10,7 @@ from shapely.ops import snap, split from ribasim_nl import CloudStorage, Model, Network, NetworkValidator +from ribasim_nl.case_conversions import pascal_to_snake_case from ribasim_nl.reset_static_tables import reset_static_tables cloud = CloudStorage() @@ -309,8 +310,33 @@ def get_network_node(point): model.remove_unassigned_basin_area() + +# %% TabulatedRatingCurve to Outlet + +# TabulatedRatingCurve to Outlet +for row in model.node_table().df[model.node_table().df.node_type == "TabulatedRatingCurve"].itertuples(): + node_id = row.Index + model.update_node(node_id=node_id, node_type="Outlet") + +# %% sanitize node-tables +node_columns = model.basin.node.columns() + ["meta_code_waterbeheerder", "meta_categorie"] + +# name to code +model.outlet.node.df.loc[:, "meta_code_waterbeheerder"] = model.outlet.node.df.name +model.pump.node.df.loc[:, "meta_code_waterbeheerder"] = model.pump.node.df.name + +# remove names and clean columns +for node_type in model.node_table().df.node_type.unique(): + table = getattr(model, pascal_to_snake_case(node_type)) + table.node.df.name = "" + columns = [col for col in table.node.df.columns if col in node_columns] + table.node.df = table.node.df[columns] + + # %% write model model.use_validation = True model.write(ribasim_toml) model.report_basin_area() model.report_internal_basins() + +# %% diff --git a/notebooks/noorderzijlvest/02_parameterize_model.py b/notebooks/noorderzijlvest/02_parameterize_model.py new file mode 100644 index 0000000..c1e8e2a --- /dev/null +++ b/notebooks/noorderzijlvest/02_parameterize_model.py @@ -0,0 +1,154 @@ +# %% +import time + +import geopandas as gpd +import numpy as np +import pandas as pd + +from ribasim_nl import CloudStorage, Model +from ribasim_nl.parametrization import empty_static_df +from ribasim_nl.parametrization.target_level import ( + downstream_target_levels, + upstream_target_levels, +) + +cloud = CloudStorage() +authority = "Noorderzijlvest" +short_name = "nzv" +waterbeheercode = 34 +kwk_names = {"KSL011": "R.J. Cleveringsluizen"} + +parameter_defaults = { + "Afvoergemaal": {"upstream_level_offset": 0.0, "downstream_level_offset": 0.3, "flow_rate_mm_per_day": 15}, + "Aanvoergemaal": {"upstream_level_offset": 0.2, "downstream_level_offset": 0.0, "flow_rate": 0.0}, + "Uitlaat": {"upstream_level_offset": 0.0, "downstream_level_offset": 0.3, "flow_rate_mm_per_day": 50}, + "Inlaat": {"upstream_level_offset": 0.2, "downstream_level_offset": 0.0, "flow_rate": 0.0}, +} + + +# files +gemaal_gpkg = cloud.joinpath("Basisgegevens", "GKW", "20250116", "gemaal.gpkg") +stuw_gpkg = cloud.joinpath("Basisgegevens", "GKW", "20250116", "stuw.gpkg") +sluis_gpkg = cloud.joinpath("Basisgegevens", "GKW", "20250116", "sluis.gpkg") +static_data_xlsx = cloud.joinpath( + "Noorderzijlvest", + "verwerkt", + "parameters", + "static_data.xlsx", +) + +# read files +_dfs = (gpd.read_file(i) for i in (gemaal_gpkg, stuw_gpkg, sluis_gpkg)) +_dfs = (i[i.nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}")] for i in _dfs) +name_series = pd.concat(_dfs, ignore_index=True).drop_duplicates("code").set_index("code")["naam"] +for k, v in kwk_names.items(): + name_series[k] = v + + +gemaal_df = gpd.read_file(gemaal_gpkg, fid_as_index=True).nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}") +stuw_df = gpd.read_file(stuw_gpkg, fid_as_index=True).nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}") +sluis_gpkg = gpd.read_file(sluis_gpkg, fid_as_index=True).nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}") + +gemaal_static_data = pd.read_excel(static_data_xlsx, sheet_name="Pump").set_index("code") +outlet_static_data = pd.read_excel(static_data_xlsx, sheet_name="Outlet").set_index("code") + +# read model +ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_fix_model", f"{short_name}.toml") +model = Model.read(ribasim_toml) + +# %% +start_time = time.time() +# %% +# Outlet +# %% parameterize +# set name +mask = model.outlet.node.df.meta_code_waterbeheerder.isin(name_series.index) +model.outlet.node.df.loc[~mask, "name"] = "" +model.outlet.node.df.loc[mask, "name"] = model.outlet.node.df[mask]["meta_code_waterbeheerder"].apply( + lambda x: name_series[x] +) + +# construct parameters +static_df = empty_static_df(model=model, node_type="Outlet", meta_columns=["meta_code_waterbeheerder"]) + +static_data = outlet_static_data.copy() + +# update-function from static_data in Excel +static_data["node_id"] = static_df.set_index("meta_code_waterbeheerder").loc[static_data.index].node_id +static_data.columns = [i if i in static_df.columns else f"meta_{i}" for i in static_data.columns] +static_data = static_data.set_index("node_id") + +static_df.set_index("node_id", inplace=True) +for col in static_data.columns: + series = static_data[static_data[col].notna()] + static_df.loc[series.index.to_numpy(), col] = series +static_df.reset_index(inplace=True) + +# update-function from defaults + +for category, parameters in parameter_defaults.items(): + print(category) + + mask = static_df["meta_categorie"] == category + + # flow_rate; all in sub_mask == True needs to be filled + sub_mask = static_df[mask]["flow_rate"].isna() + indices = sub_mask.index.to_numpy() + if sub_mask.any(): + # fill nan with flow_rate_mm_day if provided + + if "flow_rate_mm_per_day" in parameters.keys(): + flow_rate_mm_per_day = parameters["flow_rate_mm_per_day"] + flow_rate = np.array( + [ + (model.get_upstream_basins(node_id).area.sum() * flow_rate_mm_per_day / 1000) / 86400 + for node_id in static_df[mask][sub_mask].node_id + ], + dtype=float, + ) + scale = 10 ** (3 - 1 - np.where(flow_rate > 0, np.floor(np.log10(flow_rate)), 0)) + static_df.loc[indices, "flow_rate"] = np.round(flow_rate * scale) / scale + elif "flow_rate" in parameters.keys(): + static_df.loc[indices, "flow_rate"] = parameters["flow_rate"] + else: + raise ValueError(f"Can't set flow_rate for node_ids {static_df.loc[indices, "node_id"].to_numpy()}") + + print("Elapsed Time:", time.time() - start_time, "seconds") + # min_upstream_level + sub_mask = static_df[mask]["min_upstream_level"].isna() + if sub_mask.any(): + # calculate upstream levels + upstream_level_offset = parameters["upstream_level_offset"] + upstream_levels = upstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) + + # assign upstream levels to static_df + static_df.set_index("node_id", inplace=True) + static_df.loc[upstream_levels.index, "min_upstream_level"] = (upstream_levels - upstream_level_offset).round(2) + static_df.reset_index(inplace=True) + print("Elapsed Time:", time.time() - start_time, "seconds") + # max_downstream_level + sub_mask = static_df[mask]["max_downstream_level"].isna() + if sub_mask.any(): + # calculate downstream_levels + downstream_level_offset = parameters["downstream_level_offset"] + downstream_levels = downstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) + + # assign upstream levels to static_df + static_df.set_index("node_id", inplace=True) + static_df.loc[downstream_levels.index, "max_downstream_level"] = ( + downstream_levels + downstream_level_offset + ).round(2) + static_df.reset_index(inplace=True) + +# sanitize df +static_df.drop(columns=["meta_code_waterbeheerder"], inplace=True) +model.outlet.static.df = static_df + + +# %% write + +# Write model +ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_parameterized_model", f"{short_name}.toml") +model.write(ribasim_toml) + +print("Elapsed Time:", time.time() - start_time, "seconds") diff --git a/src/ribasim_nl/ribasim_nl/model.py b/src/ribasim_nl/ribasim_nl/model.py index 7dbe869..21e4d5d 100644 --- a/src/ribasim_nl/ribasim_nl/model.py +++ b/src/ribasim_nl/ribasim_nl/model.py @@ -501,7 +501,7 @@ def update_basin_area(self, node_id: int, geometry: Polygon | MultiPolygon, basi self.basin.area.df.loc[mask, ["node_id"]] = node_id - def add_basin_area(self, geometry: MultiPolygon, node_id: int | None = None): + def add_basin_area(self, geometry: MultiPolygon, node_id: int | None = None, meta_streefpeil: float | None = None): # if node_id is None, get an available node_id if pd.isna(node_id): basin_df = self.basin.node.df[self.basin.node.df.within(geometry)] @@ -524,7 +524,10 @@ def add_basin_area(self, geometry: MultiPolygon, node_id: int | None = None): raise ValueError(f"geometry-type {geometry.geom_type} is not valid. Provide (Multi)Polygon instead") # if all correct, assign - area_df = gpd.GeoDataFrame({"node_id": [node_id], "geometry": [geometry]}, crs=self.crs) + data = {"node_id": [node_id], "geometry": [geometry]} + if meta_streefpeil is not None: + data = {**data, "meta_streefpeil": [meta_streefpeil]} + area_df = gpd.GeoDataFrame(data, crs=self.crs) area_df.index.name = "fid" area_df.index += self.basin.area.df.index.max() + 1 self.basin.area.df = pd.concat([self.basin.area.df, area_df]) diff --git a/src/ribasim_nl/ribasim_nl/parametrization/__init__.py b/src/ribasim_nl/ribasim_nl/parametrization/__init__.py new file mode 100644 index 0000000..3541dac --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/__init__.py @@ -0,0 +1,3 @@ +from ribasim_nl.parametrization.empty_static_df import empty_static_df + +__all__ = ["empty_static_df"] diff --git a/src/ribasim_nl/ribasim_nl/parametrization/conversions.py b/src/ribasim_nl/ribasim_nl/parametrization/conversions.py new file mode 100644 index 0000000..a6b818d --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/conversions.py @@ -0,0 +1,77 @@ +# %% +import math +from decimal import ROUND_HALF_UP, Decimal + +import pandas as pd + + +def mm_per_day_to_m3_per_second(mm_per_day: float, area_m2: float, precision: float | int = 0.001): + """ + Convert rainfall rate from mm/day to m³/s based on the given area. + + Parameters + ---------- + mm_per_day (float): Millimeters per day (net rainfall). + area_m2 (float): Area in square meters. + precision (float): Optional rounding precision (e.g., 10, 100, 0.1). Defaults t= 0.001 + + Returns + ------- + float: Flow rate in cubic meters per second (m³/s). + """ + # Convert mm/day to m³/day + volume_m3_per_day = mm_per_day * area_m2 * 1e-3 + + # Convert m³/day to m³/s + flow_rate_m3_per_s = volume_m3_per_day / 86400 + + return round_to_precision(flow_rate_m3_per_s, precision) + + +def round_to_precision(number: float, precision: float | int): + """ + Round a number to the nearest multiple of the specified precision. + + Parameters + ---------- + number (float): The number to round. + precision (float): The rounding precision (e.g., 10, 100, 0.1). + + Returns + ------- + float: The rounded number. + """ + if pd.isna(number): # can't round nans + return number + + number = Decimal(str(number)) + precision = Decimal(str(precision)) + rounded = (number / precision).quantize(Decimal("1"), rounding=ROUND_HALF_UP) * precision + + return float(rounded) + + +def round_to_significant_digits(number: float, significant_digits: int = 3) -> float: + """ + Rounds a number to a specified maximum number of significant digits. + + Parameters + ---------- + number (float): The input number. + significant_digits (int): The maximum number of significant digits. + + Returns + ------- + float: The rounded number. + """ + if (number == 0) | pd.isna(number): + return number # Zero or nan remains zero or nan + if significant_digits <= 0: + raise ValueError("max_significant_digits must be a positive integer.") + + # Determine the order of magnitude of the number + exponent = int(math.floor(math.log10(abs(number)))) + # Calculate the rounding precision + precision = 10 ** (exponent - significant_digits + 1) + # Round the number to the nearest significant digit + return round_to_precision(number, precision) diff --git a/src/ribasim_nl/ribasim_nl/parametrization/empty_static_df.py b/src/ribasim_nl/ribasim_nl/parametrization/empty_static_df.py new file mode 100644 index 0000000..df9ed15 --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/empty_static_df.py @@ -0,0 +1,38 @@ +import pandas as pd +from ribasim import nodes + +from ribasim_nl import Model +from ribasim_nl.case_conversions import pascal_to_snake_case + + +def empty_static_df(model: Model, node_type: str, meta_columns: list[str] = []) -> pd.DataFrame: + """Return an empty DataFrame for a Static-table. + + This allows the user to generate a DataFrame for a Static table with correct dtypes yet empty fields when not allowed by schema + + Args: + model (Model): ribasim Model + node_type (str): node_type to get DataFrame for + meta_columns (list, optional): meta-columns from Node-table to append to DataFrame. Defaults to []. + + Returns + ------- + pd.DataFrame: DataFrame for static table + """ + # read node from model + node = getattr(model, pascal_to_snake_case(node_type)) + meta_columns = [i for i in meta_columns if i in node.node.df.columns] + + # get correct dtypes + dtypes = getattr(nodes, pascal_to_snake_case(node_type)).Static(**{k: [] for k in node.static.columns()}).df.dtypes + + # populate dataframe + df = node.node.df.reset_index()[["node_id"] + meta_columns] + + for column, dtype in dtypes.to_dict().items(): + if column in df.columns: + df.loc[:, column] = df[column].astype(dtype) + else: + df[column] = pd.Series(dtype=dtype) + + return df[dtypes.index.to_list() + meta_columns] diff --git a/src/ribasim_nl/ribasim_nl/parametrization/target_level.py b/src/ribasim_nl/ribasim_nl/parametrization/target_level.py new file mode 100644 index 0000000..d5c5304 --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/target_level.py @@ -0,0 +1,165 @@ +import numpy as np +import pandas as pd + +from ribasim_nl import Model +from ribasim_nl.parametrization.conversions import round_to_precision + +valid_node_types = ["Outlet", "TabulatedRatingCurve", "Pump"] + + +def upstream_target_levels( + model: Model, node_ids: list[int], target_level_column: str = "meta_streefpeil" +) -> pd.Series: + """Return upstream target_levels for a list of node_ids if these node_ids are present in model.basin.area.df + + Args: + model (Model): model (Model): Ribasim model + node_ids (list[int]): Ribasim node_ids for connector Nodes + target_level_column (str, optional): column in basin.area.df containing target-level. Defaults to "meta_streefpeil". + + Returns + ------- + pd.Series: float-type series with target-levels + """ + # get upstream node_id from edge_table + df = model.edge.df[model.edge.df["to_node_id"].isin(node_ids)][["to_node_id", "from_node_id"]] + df.set_index("to_node_id", inplace=True) + df.index.name = "node_id" + df.columns = ["upstream_node_id"] + + # get in_basin_mask; we can only get target_levels if upstream_node_id is present as node_id in model.basin.area.df + df = df.reset_index().set_index("upstream_node_id") + in_basin_mask = df.index.isin(model.basin.area.df.node_id) + + # get upstream_target_level from basin.area.df + df.loc[in_basin_mask, "upstream_target_level"] = model.basin.area.df.set_index("node_id").loc[ + df[in_basin_mask].index, target_level_column + ] + + # sanitize df and only return upstream_target_level_series + series = df.reset_index().set_index("node_id").sort_index()["upstream_target_level"] + + return series + + +def downstream_target_levels( + model: Model, node_ids: list[int], target_level_column: str = "meta_streefpeil" +) -> pd.Series: + """Return downstream target_levels for a list of node_ids if these node_ids are present in model.basin.area.df + + Args: + model (Model): model (Model): Ribasim model + node_ids (list[int]): Ribasim node_ids for connector Nodes + target_level_column (str, optional): column in basin.area.df containing target-level. Defaults to "meta_streefpeil". + + Returns + ------- + pd.Series: float-type series with target-levels + """ + # get downstream node_id from edge_table + df = model.edge.df[model.edge.df["from_node_id"].isin(node_ids)][["from_node_id", "to_node_id"]] + df.set_index("from_node_id", inplace=True) + df.index.name = "node_id" + df.columns = ["downstream_node_id"] + + # get in_basin_mask; we can only get target_levels if downstream_node_id is present as node_id in model.basin.area.df + df = df.reset_index().set_index("downstream_node_id") + in_basin_mask = df.index.isin(model.basin.area.df.node_id) + + # get downstream_target_level from basin.area.df + df.loc[in_basin_mask, "downstream_target_level"] = model.basin.area.df.set_index("node_id").loc[ + df[in_basin_mask].index, target_level_column + ] + + # sanitize df and only return downstream_target_level_series + series = df.reset_index().set_index("node_id").sort_index()["downstream_target_level"] + + return series + + +def upstream_target_level( + model: Model, node_id: int, target_level_column: str = "meta_streefpeil", precision: int | float = 0.01 +) -> float: + f"""Return the upstream target-level of a connector Node if it is provided in as column-value in a the basin.area + + Args: + model (Model): Ribasim model + node_id (int): node_id of connector Node + target_level_column (str, optional): column in basin.area that holds target_levels. Defaults to "meta_streefpeil". + precision (float): The rounding precision (e.g., 10, 100, 0.1). Default = 0.01 + + Raises: + ValueError: if node_id does not belong to a connector Node with type {valid_node_types} + + Returns: + float: upstream target-level + """ + # check if we have a valid node to get upstream_level from + node_type = model.get_node_type(node_id=node_id) + if node_type not in valid_node_types: + raise ValueError(f"node_type of node_id {node_id} not valid: {node_type} not in {valid_node_types}") + + us_node_id = model.upstream_node_id(node_id=node_id) + return get_target_level( + model=model, node_id=us_node_id, target_level_column=target_level_column, precision=precision + ) + + +def downstream_target_level( + model: Model, node_id: int, target_level_column: str = "meta_streefpeil", precision: int | float = 0.01 +) -> float: + f"""Return the downstream target-level of a connector Node if it is provided in as column-value in a the basin.area + + Args: + model (Model): Ribasim model + node_id (int): node_id of connector Node + target_level_column (str, optional): column in basin.area that holds target_levels. Defaults to "meta_streefpeil". + precision (float): The rounding precision (e.g., 10, 100, 0.1). Default = 0.01 + + Raises: + ValueError: if node_id does not belong to a connector Node with type {valid_node_types} + + Returns: + float: donwstream target-level + """ + # check if we have a valid node to get upstream_level from + node_type = model.get_node_type(node_id=node_id) + if node_type not in valid_node_types: + raise ValueError(f"node_type of node_id {node_id} not valid: {node_type} not in {valid_node_types}") + + ds_node_id = model.downstream_node_id(node_id=node_id) + return get_target_level( + model=model, node_id=ds_node_id, target_level_column=target_level_column, precision=precision + ) + + +def get_target_level( + model: Model, node_id: int, target_level_column: str = "meta_streefpeil", precision: int | float = 0.01 +) -> float: + """Return the target-level of a basin if it is provided in as column-value in a the basin.area + + Args: + model (Model): Ribasim model + node_id (int): node_id of basin Node + target_level_column (str, optional): _description_. Defaults to "meta_streefpeil". + precision (float): The rounding precision (e.g., 10, 100, 0.1). Default = 0.01 + + Returns + ------- + float: _description_ + """ + # if us_node_id is None, we return NA + if node_id is None: + return np.nan + else: # if us_node_type != Basin, we don't have a target_level + us_node_type = model.get_node_type(node_id=node_id) + if us_node_type != "Basin": + return np.nan + else: + if node_id in model.basin.area.df.node_id.to_numpy(): + return round_to_precision( + number=model.basin.area.df.set_index("node_id").at[node_id, target_level_column], + precision=precision, + ) + else: + return np.nan From 691ee412b01cdaa5728f8754849398a92867666b Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Mon, 20 Jan 2025 09:10:46 +0100 Subject: [PATCH 2/9] werkend concept --- .../noorderzijlvest/02_parameterize_model.py | 193 ++++++++---------- src/ribasim_nl/ribasim_nl/model.py | 31 ++- .../parametrization/target_level.py | 92 --------- 3 files changed, 114 insertions(+), 202 deletions(-) diff --git a/notebooks/noorderzijlvest/02_parameterize_model.py b/notebooks/noorderzijlvest/02_parameterize_model.py index c1e8e2a..baab505 100644 --- a/notebooks/noorderzijlvest/02_parameterize_model.py +++ b/notebooks/noorderzijlvest/02_parameterize_model.py @@ -1,12 +1,13 @@ # %% import time -import geopandas as gpd import numpy as np import pandas as pd from ribasim_nl import CloudStorage, Model +from ribasim_nl.case_conversions import pascal_to_snake_case from ribasim_nl.parametrization import empty_static_df +from ribasim_nl.parametrization.conversions import round_to_significant_digits from ribasim_nl.parametrization.target_level import ( downstream_target_levels, upstream_target_levels, @@ -15,134 +16,110 @@ cloud = CloudStorage() authority = "Noorderzijlvest" short_name = "nzv" -waterbeheercode = 34 -kwk_names = {"KSL011": "R.J. Cleveringsluizen"} -parameter_defaults = { - "Afvoergemaal": {"upstream_level_offset": 0.0, "downstream_level_offset": 0.3, "flow_rate_mm_per_day": 15}, - "Aanvoergemaal": {"upstream_level_offset": 0.2, "downstream_level_offset": 0.0, "flow_rate": 0.0}, - "Uitlaat": {"upstream_level_offset": 0.0, "downstream_level_offset": 0.3, "flow_rate_mm_per_day": 50}, - "Inlaat": {"upstream_level_offset": 0.2, "downstream_level_offset": 0.0, "flow_rate": 0.0}, -} - -# files -gemaal_gpkg = cloud.joinpath("Basisgegevens", "GKW", "20250116", "gemaal.gpkg") -stuw_gpkg = cloud.joinpath("Basisgegevens", "GKW", "20250116", "stuw.gpkg") -sluis_gpkg = cloud.joinpath("Basisgegevens", "GKW", "20250116", "sluis.gpkg") static_data_xlsx = cloud.joinpath( "Noorderzijlvest", "verwerkt", "parameters", "static_data.xlsx", ) +static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names +defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) -# read files -_dfs = (gpd.read_file(i) for i in (gemaal_gpkg, stuw_gpkg, sluis_gpkg)) -_dfs = (i[i.nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}")] for i in _dfs) -name_series = pd.concat(_dfs, ignore_index=True).drop_duplicates("code").set_index("code")["naam"] -for k, v in kwk_names.items(): - name_series[k] = v - - -gemaal_df = gpd.read_file(gemaal_gpkg, fid_as_index=True).nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}") -stuw_df = gpd.read_file(stuw_gpkg, fid_as_index=True).nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}") -sluis_gpkg = gpd.read_file(sluis_gpkg, fid_as_index=True).nen3610id.str.startswith(f"NL.WBHCODE.{waterbeheercode}") -gemaal_static_data = pd.read_excel(static_data_xlsx, sheet_name="Pump").set_index("code") -outlet_static_data = pd.read_excel(static_data_xlsx, sheet_name="Outlet").set_index("code") +# read model # read model ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_fix_model", f"{short_name}.toml") model = Model.read(ribasim_toml) -# %% start_time = time.time() # %% -# Outlet -# %% parameterize -# set name -mask = model.outlet.node.df.meta_code_waterbeheerder.isin(name_series.index) -model.outlet.node.df.loc[~mask, "name"] = "" -model.outlet.node.df.loc[mask, "name"] = model.outlet.node.df[mask]["meta_code_waterbeheerder"].apply( - lambda x: name_series[x] -) +# parameterize + +for node_type in ["Pump", "Outlet"]: + # start with an empty static_df with the correct columns and meta_code_waterbeheerder + static_df = empty_static_df(model=model, node_type=node_type, meta_columns=["meta_code_waterbeheerder"]) + + # update on static_table + if node_type in static_data_sheets: + static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") + + # in case there is more defined in static_data than in the model + static_data = static_data[static_data.index.isin(static_df["meta_code_waterbeheerder"])] + + # update-function from static_data in Excel + static_data.loc[:, "node_id"] = static_df.set_index("meta_code_waterbeheerder").loc[static_data.index].node_id + static_data.columns = [i if i in static_df.columns else f"meta_{i}" for i in static_data.columns] + static_data = static_data.set_index("node_id") -# construct parameters -static_df = empty_static_df(model=model, node_type="Outlet", meta_columns=["meta_code_waterbeheerder"]) - -static_data = outlet_static_data.copy() - -# update-function from static_data in Excel -static_data["node_id"] = static_df.set_index("meta_code_waterbeheerder").loc[static_data.index].node_id -static_data.columns = [i if i in static_df.columns else f"meta_{i}" for i in static_data.columns] -static_data = static_data.set_index("node_id") - -static_df.set_index("node_id", inplace=True) -for col in static_data.columns: - series = static_data[static_data[col].notna()] - static_df.loc[series.index.to_numpy(), col] = series -static_df.reset_index(inplace=True) - -# update-function from defaults - -for category, parameters in parameter_defaults.items(): - print(category) - - mask = static_df["meta_categorie"] == category - - # flow_rate; all in sub_mask == True needs to be filled - sub_mask = static_df[mask]["flow_rate"].isna() - indices = sub_mask.index.to_numpy() - if sub_mask.any(): - # fill nan with flow_rate_mm_day if provided - - if "flow_rate_mm_per_day" in parameters.keys(): - flow_rate_mm_per_day = parameters["flow_rate_mm_per_day"] - flow_rate = np.array( - [ - (model.get_upstream_basins(node_id).area.sum() * flow_rate_mm_per_day / 1000) / 86400 - for node_id in static_df[mask][sub_mask].node_id - ], - dtype=float, - ) - scale = 10 ** (3 - 1 - np.where(flow_rate > 0, np.floor(np.log10(flow_rate)), 0)) - static_df.loc[indices, "flow_rate"] = np.round(flow_rate * scale) / scale - elif "flow_rate" in parameters.keys(): - static_df.loc[indices, "flow_rate"] = parameters["flow_rate"] - else: - raise ValueError(f"Can't set flow_rate for node_ids {static_df.loc[indices, "node_id"].to_numpy()}") - - print("Elapsed Time:", time.time() - start_time, "seconds") - # min_upstream_level - sub_mask = static_df[mask]["min_upstream_level"].isna() - if sub_mask.any(): - # calculate upstream levels - upstream_level_offset = parameters["upstream_level_offset"] - upstream_levels = upstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) - - # assign upstream levels to static_df - static_df.set_index("node_id", inplace=True) - static_df.loc[upstream_levels.index, "min_upstream_level"] = (upstream_levels - upstream_level_offset).round(2) - static_df.reset_index(inplace=True) - print("Elapsed Time:", time.time() - start_time, "seconds") - # max_downstream_level - sub_mask = static_df[mask]["max_downstream_level"].isna() - if sub_mask.any(): - # calculate downstream_levels - downstream_level_offset = parameters["downstream_level_offset"] - downstream_levels = downstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) - - # assign upstream levels to static_df static_df.set_index("node_id", inplace=True) - static_df.loc[downstream_levels.index, "max_downstream_level"] = ( - downstream_levels + downstream_level_offset - ).round(2) + for col in static_data.columns: + series = static_data[static_data[col].notna()][col] + if col == "flow_rate": + series = series.apply(round_to_significant_digits) + static_df.loc[series.index.to_numpy(), col] = series static_df.reset_index(inplace=True) -# sanitize df -static_df.drop(columns=["meta_code_waterbeheerder"], inplace=True) -model.outlet.static.df = static_df + # update-function from defaults + + for row in defaults_df.itertuples(): + category = row.Index + mask = static_df["meta_categorie"] == category + + # flow_rate; all in sub_mask == True needs to be filled + sub_mask = static_df[mask]["flow_rate"].isna() + indices = sub_mask.index.to_numpy() + if sub_mask.any(): + # fill nan with flow_rate_mm_day if provided + + if not pd.isna(row.flow_rate_mm_per_day): + flow_rate_mm_per_day = row.flow_rate_mm_per_day + flow_rate = np.array( + [ + round_to_significant_digits( + model.get_upstream_basins(node_id).area.sum() * flow_rate_mm_per_day / 1000 / 86400 + ) + for node_id in static_df[mask][sub_mask].node_id + ], + dtype=float, + ) + static_df.loc[indices, "flow_rate"] = flow_rate + elif not pd.isna(row.flow_rate): + static_df.loc[indices, "flow_rate"] = row.flow_rate + else: + raise ValueError(f"Can't set flow_rate for node_ids {static_df.loc[indices, "node_id"].to_numpy()}") + + # min_upstream_level + sub_mask = static_df[mask]["min_upstream_level"].isna() + if sub_mask.any(): + # calculate upstream levels + upstream_level_offset = row.upstream_level_offset + upstream_levels = upstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) + + # assign upstream levels to static_df + static_df.set_index("node_id", inplace=True) + static_df.loc[upstream_levels.index, "min_upstream_level"] = ( + upstream_levels - upstream_level_offset + ).round(2) + static_df.reset_index(inplace=True) + sub_mask = static_df[mask]["max_downstream_level"].isna() + if sub_mask.any(): + # calculate downstream_levels + downstream_level_offset = row.downstream_level_offset + downstream_levels = downstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) + + # assign upstream levels to static_df + static_df.set_index("node_id", inplace=True) + static_df.loc[downstream_levels.index, "max_downstream_level"] = ( + downstream_levels + downstream_level_offset + ).round(2) + static_df.reset_index(inplace=True) + + # sanitize df + static_df.drop(columns=["meta_code_waterbeheerder"], inplace=True) + getattr(model, pascal_to_snake_case(node_type)).static.df = static_df # %% write diff --git a/src/ribasim_nl/ribasim_nl/model.py b/src/ribasim_nl/ribasim_nl/model.py index 21e4d5d..882b87c 100644 --- a/src/ribasim_nl/ribasim_nl/model.py +++ b/src/ribasim_nl/ribasim_nl/model.py @@ -1,5 +1,6 @@ # %% import warnings +from collections import deque from pathlib import Path from typing import Literal @@ -126,9 +127,35 @@ def update_state(self, time_stamp: pd.Timestamp | None = None): self.basin.state.df = df # methods relying on networkx. Discuss making this all in a subclass of Model + # def _upstream_nodes(self, node_id): + # # get upstream nodes + # return list(nx.traversal.bfs_tree(self.graph, node_id, reverse=True)) + def _upstream_nodes(self, node_id): - # get upstream nodes - return list(nx.traversal.bfs_tree(self.graph, node_id, reverse=True)) + visited = set() # To keep track of visited nodes + upstream_nodes = set() # To store the result + + # BFS using a deque + queue = deque([node_id]) + + while queue: + current_node = queue.popleft() + + # Avoid re-visiting nodes + if current_node in visited: + continue + visited.add(current_node) + + # Check predecessors (upstream neighbors) + for predecessor in self.graph.predecessors(current_node): + if predecessor not in visited: + upstream_nodes.add(predecessor) + + # Stop traversal if 'uitlaat' is True + if not self.graph.nodes[predecessor].get("function", "inlet"): + queue.append(predecessor) + + return upstream_nodes def _downstream_nodes(self, node_id): # get downstream nodes diff --git a/src/ribasim_nl/ribasim_nl/parametrization/target_level.py b/src/ribasim_nl/ribasim_nl/parametrization/target_level.py index d5c5304..794aabc 100644 --- a/src/ribasim_nl/ribasim_nl/parametrization/target_level.py +++ b/src/ribasim_nl/ribasim_nl/parametrization/target_level.py @@ -1,10 +1,6 @@ -import numpy as np import pandas as pd from ribasim_nl import Model -from ribasim_nl.parametrization.conversions import round_to_precision - -valid_node_types = ["Outlet", "TabulatedRatingCurve", "Pump"] def upstream_target_levels( @@ -75,91 +71,3 @@ def downstream_target_levels( series = df.reset_index().set_index("node_id").sort_index()["downstream_target_level"] return series - - -def upstream_target_level( - model: Model, node_id: int, target_level_column: str = "meta_streefpeil", precision: int | float = 0.01 -) -> float: - f"""Return the upstream target-level of a connector Node if it is provided in as column-value in a the basin.area - - Args: - model (Model): Ribasim model - node_id (int): node_id of connector Node - target_level_column (str, optional): column in basin.area that holds target_levels. Defaults to "meta_streefpeil". - precision (float): The rounding precision (e.g., 10, 100, 0.1). Default = 0.01 - - Raises: - ValueError: if node_id does not belong to a connector Node with type {valid_node_types} - - Returns: - float: upstream target-level - """ - # check if we have a valid node to get upstream_level from - node_type = model.get_node_type(node_id=node_id) - if node_type not in valid_node_types: - raise ValueError(f"node_type of node_id {node_id} not valid: {node_type} not in {valid_node_types}") - - us_node_id = model.upstream_node_id(node_id=node_id) - return get_target_level( - model=model, node_id=us_node_id, target_level_column=target_level_column, precision=precision - ) - - -def downstream_target_level( - model: Model, node_id: int, target_level_column: str = "meta_streefpeil", precision: int | float = 0.01 -) -> float: - f"""Return the downstream target-level of a connector Node if it is provided in as column-value in a the basin.area - - Args: - model (Model): Ribasim model - node_id (int): node_id of connector Node - target_level_column (str, optional): column in basin.area that holds target_levels. Defaults to "meta_streefpeil". - precision (float): The rounding precision (e.g., 10, 100, 0.1). Default = 0.01 - - Raises: - ValueError: if node_id does not belong to a connector Node with type {valid_node_types} - - Returns: - float: donwstream target-level - """ - # check if we have a valid node to get upstream_level from - node_type = model.get_node_type(node_id=node_id) - if node_type not in valid_node_types: - raise ValueError(f"node_type of node_id {node_id} not valid: {node_type} not in {valid_node_types}") - - ds_node_id = model.downstream_node_id(node_id=node_id) - return get_target_level( - model=model, node_id=ds_node_id, target_level_column=target_level_column, precision=precision - ) - - -def get_target_level( - model: Model, node_id: int, target_level_column: str = "meta_streefpeil", precision: int | float = 0.01 -) -> float: - """Return the target-level of a basin if it is provided in as column-value in a the basin.area - - Args: - model (Model): Ribasim model - node_id (int): node_id of basin Node - target_level_column (str, optional): _description_. Defaults to "meta_streefpeil". - precision (float): The rounding precision (e.g., 10, 100, 0.1). Default = 0.01 - - Returns - ------- - float: _description_ - """ - # if us_node_id is None, we return NA - if node_id is None: - return np.nan - else: # if us_node_type != Basin, we don't have a target_level - us_node_type = model.get_node_type(node_id=node_id) - if us_node_type != "Basin": - return np.nan - else: - if node_id in model.basin.area.df.node_id.to_numpy(): - return round_to_precision( - number=model.basin.area.df.set_index("node_id").at[node_id, target_level_column], - precision=precision, - ) - else: - return np.nan From a43237876502fc98cd2832e1818cd742281c4cce Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Mon, 20 Jan 2025 15:47:40 +0100 Subject: [PATCH 3/9] working upstream/downstream --- .../noorderzijlvest/02_parameterize_model.py | 37 ++++++++-- src/ribasim_nl/ribasim_nl/downstream.py | 43 +++++++++++ src/ribasim_nl/ribasim_nl/model.py | 72 +++++++++---------- src/ribasim_nl/ribasim_nl/upstream.py | 43 +++++++++++ 4 files changed, 150 insertions(+), 45 deletions(-) create mode 100644 src/ribasim_nl/ribasim_nl/downstream.py create mode 100644 src/ribasim_nl/ribasim_nl/upstream.py diff --git a/notebooks/noorderzijlvest/02_parameterize_model.py b/notebooks/noorderzijlvest/02_parameterize_model.py index baab505..b0a52ea 100644 --- a/notebooks/noorderzijlvest/02_parameterize_model.py +++ b/notebooks/noorderzijlvest/02_parameterize_model.py @@ -35,15 +35,38 @@ model = Model.read(ribasim_toml) start_time = time.time() -# %% -# parameterize +# %% add functions + +# functions +# update function - column for node_type in ["Pump", "Outlet"]: - # start with an empty static_df with the correct columns and meta_code_waterbeheerder - static_df = empty_static_df(model=model, node_type=node_type, meta_columns=["meta_code_waterbeheerder"]) + table = getattr(model, pascal_to_snake_case(node_type)).node + if node_type in static_data_sheets: + # add node_id to static_data + static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") + static_data = static_data[static_data.index.isin(table.df["meta_code_waterbeheerder"])] + static_data.loc[:, "node_id"] = ( + table.df.reset_index().set_index("meta_code_waterbeheerder").loc[static_data.index].node_id + ) + static_data.set_index("node_id", inplace=True) + + # add function to node via categorie + for row in defaults_df.itertuples(): + category = row.Index + function = row.function + static_data.loc[static_data["categorie"] == category, "meta_function"] = function + table.df.loc[static_data.index, "meta_function"] = static_data["meta_function"] + + +# %% parameterize +for node_type in ["Pump", "Outlet"]: # update on static_table if node_type in static_data_sheets: + # start with an empty static_df with the correct columns and meta_code_waterbeheerder + static_df = empty_static_df(model=model, node_type=node_type, meta_columns=["meta_code_waterbeheerder"]) + static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") # in case there is more defined in static_data than in the model @@ -63,7 +86,6 @@ static_df.reset_index(inplace=True) # update-function from defaults - for row in defaults_df.itertuples(): category = row.Index mask = static_df["meta_categorie"] == category @@ -79,7 +101,10 @@ flow_rate = np.array( [ round_to_significant_digits( - model.get_upstream_basins(node_id).area.sum() * flow_rate_mm_per_day / 1000 / 86400 + model.get_upstream_basins(node_id, stop_at_inlet=True).area.sum() + * flow_rate_mm_per_day + / 1000 + / 86400 ) for node_id in static_df[mask][sub_mask].node_id ], diff --git a/src/ribasim_nl/ribasim_nl/downstream.py b/src/ribasim_nl/ribasim_nl/downstream.py new file mode 100644 index 0000000..c222dcb --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/downstream.py @@ -0,0 +1,43 @@ +from collections import deque + +import networkx as nx + + +def downstream_nodes(graph: nx.DiGraph, node_id: int, stop_at_outlet=True): + """Efficiently find all downstream nodes in a directed graph starting from a given node, + stopping traversal at nodes stopping at the next outlet. + + Parameters + ---------- + - graph (nx.DiGraph): The directed graph. + - start_node: The node to start the search from. + - stop_at_outlet (bool): To stop at the next inlet(s) + + Returns + ------- + - set: A set of all downstream nodes excluding the starting node. + """ # noqa: D205 + visited = set() # To keep track of visited nodes + node_ids = set({node_id}) # To store the result + + # BFS using a deque + queue = deque([node_id]) + + while queue: + current_node = queue.popleft() + + # Avoid re-visiting nodes + if current_node in visited: + continue + visited.add(current_node) + + # Check successors (downstream neighbors) + for successor in graph.successors(current_node): + if successor not in visited: + node_ids.add(successor) + + # Stop traversal if 'function' is 'outlet' + if (not graph.nodes[successor].get("function", "outlet")) | (not stop_at_outlet): + queue.append(successor) + + return node_ids diff --git a/src/ribasim_nl/ribasim_nl/model.py b/src/ribasim_nl/ribasim_nl/model.py index 882b87c..31f010b 100644 --- a/src/ribasim_nl/ribasim_nl/model.py +++ b/src/ribasim_nl/ribasim_nl/model.py @@ -1,6 +1,5 @@ # %% import warnings -from collections import deque from pathlib import Path from typing import Literal @@ -17,8 +16,10 @@ from shapely.geometry.base import BaseGeometry from ribasim_nl.case_conversions import pascal_to_snake_case +from ribasim_nl.downstream import downstream_nodes from ribasim_nl.geometry import split_basin from ribasim_nl.run_model import run +from ribasim_nl.upstream import upstream_nodes manning_data = manning_resistance.Static(length=[100], manning_n=[0.04], profile_width=[10], profile_slope=[1]) level_data = level_boundary.Static(level=[0]) @@ -95,14 +96,31 @@ def basin_outstate(self): def graph(self): # create a DiGraph from edge-table if self._graph is None: - self._graph = nx.from_pandas_edgelist( + graph = nx.from_pandas_edgelist( df=self.edge.df[["from_node_id", "to_node_id"]], source="from_node_id", target="to_node_id", create_using=nx.DiGraph, ) + if "meta_function" not in self.node_table().df.columns: + node_attributes = {node_id: {"function": ""} for node_id in self.node_table().df.index} + else: + node_attributes = ( + self.node_table() + .df.rename(columns={"meta_function": "function"})[["function"]] + .to_dict(orient="index") + ) + nx.set_node_attributes(graph, node_attributes) + + self._graph = graph + return self._graph + @property + def reset_graph(self): + self._graph = None + return self.graph + @property def next_node_id(self): return self.node_table().df.index.max() + 1 @@ -127,54 +145,30 @@ def update_state(self, time_stamp: pd.Timestamp | None = None): self.basin.state.df = df # methods relying on networkx. Discuss making this all in a subclass of Model - # def _upstream_nodes(self, node_id): - # # get upstream nodes - # return list(nx.traversal.bfs_tree(self.graph, node_id, reverse=True)) - - def _upstream_nodes(self, node_id): - visited = set() # To keep track of visited nodes - upstream_nodes = set() # To store the result - - # BFS using a deque - queue = deque([node_id]) - - while queue: - current_node = queue.popleft() - - # Avoid re-visiting nodes - if current_node in visited: - continue - visited.add(current_node) - - # Check predecessors (upstream neighbors) - for predecessor in self.graph.predecessors(current_node): - if predecessor not in visited: - upstream_nodes.add(predecessor) - - # Stop traversal if 'uitlaat' is True - if not self.graph.nodes[predecessor].get("function", "inlet"): - queue.append(predecessor) - - return upstream_nodes + def _upstream_nodes(self, node_id, **kwargs): + # get upstream nodes + # return list(nx.traversal.bfs_tree(self.graph, node_id, reverse=True)) + return upstream_nodes(graph=self.graph, node_id=node_id, **kwargs) - def _downstream_nodes(self, node_id): + def _downstream_nodes(self, node_id, **kwargs): # get downstream nodes - return list(nx.traversal.bfs_tree(self.graph, node_id)) + return downstream_nodes(graph=self.graph, node_id=node_id, **kwargs) + # return list(nx.traversal.bfs_tree(self.graph, node_id)) - def get_upstream_basins(self, node_id): + def get_upstream_basins(self, node_id, **kwargs): # get upstream basin area - upstream_node_ids = self._upstream_nodes(node_id) + upstream_node_ids = self._upstream_nodes(node_id, **kwargs) return self.basin.area.df[self.basin.area.df.node_id.isin(upstream_node_ids)] - def get_upstream_edges(self, node_id): + def get_upstream_edges(self, node_id, **kwargs): # get upstream edges - upstream_node_ids = self._upstream_nodes(node_id) + upstream_node_ids = self._upstream_nodes(node_id, **kwargs) mask = self.edge.df.from_node_id.isin(upstream_node_ids[1:]) & self.edge.df.to_node_id.isin(upstream_node_ids) return self.edge.df[mask] - def get_downstream_edges(self, node_id): + def get_downstream_edges(self, node_id, **kwargs): # get upstream edges - downstream_node_ids = self._downstream_nodes(node_id) + downstream_node_ids = self._downstream_nodes(node_id, **kwargs) mask = self.edge.df.from_node_id.isin(downstream_node_ids) & self.edge.df.to_node_id.isin( downstream_node_ids[1:] ) diff --git a/src/ribasim_nl/ribasim_nl/upstream.py b/src/ribasim_nl/ribasim_nl/upstream.py new file mode 100644 index 0000000..5432910 --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/upstream.py @@ -0,0 +1,43 @@ +from collections import deque + +import networkx as nx + + +def upstream_nodes(graph: nx.DiGraph, node_id: int, stop_at_inlet: bool = False): + """Efficiently find all upstream nodes in a directed graph starting from a given node, + stopping traversal at nodes stopping at the next inlet. + + Parameters + ---------- + - graph (nx.DiGraph): The directed graph. + - start_node (int): The node to start the search from. + - stop_at_inlet (bool): To stop at the next inlet(s) + + Returns + ------- + - set: A set of all upstream nodes excluding the starting node. + """ # noqa: D205 + visited = set() # To keep track of visited nodes + node_ids = set({node_id}) # To store the result + + # BFS using a deque + queue = deque([node_id]) + + while queue: + current_node = queue.popleft() + + # Avoid re-visiting nodes + if current_node in visited: + continue + visited.add(current_node) + + # Check predecessors (upstream neighbors) + for predecessor in graph.predecessors(current_node): + if predecessor not in visited: + node_ids.add(predecessor) + + # Stop traversal if 'function' is 'inlet' + if (not graph.nodes[predecessor].get("function", "inlet")) | (not stop_at_inlet): + queue.append(predecessor) + + return node_ids From b388e68f861dafe147ed763aab62b8ff5a713a85 Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Mon, 20 Jan 2025 20:04:18 +0100 Subject: [PATCH 4/9] working version --- .../noorderzijlvest/02_parameterize_model.py | 34 ++++++++++++------- src/ribasim_nl/ribasim_nl/downstream.py | 4 +-- src/ribasim_nl/ribasim_nl/model.py | 5 +++ src/ribasim_nl/ribasim_nl/upstream.py | 3 +- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/notebooks/noorderzijlvest/02_parameterize_model.py b/notebooks/noorderzijlvest/02_parameterize_model.py index b0a52ea..6ce495a 100644 --- a/notebooks/noorderzijlvest/02_parameterize_model.py +++ b/notebooks/noorderzijlvest/02_parameterize_model.py @@ -97,19 +97,27 @@ # fill nan with flow_rate_mm_day if provided if not pd.isna(row.flow_rate_mm_per_day): - flow_rate_mm_per_day = row.flow_rate_mm_per_day - flow_rate = np.array( - [ - round_to_significant_digits( - model.get_upstream_basins(node_id, stop_at_inlet=True).area.sum() - * flow_rate_mm_per_day - / 1000 - / 86400 - ) - for node_id in static_df[mask][sub_mask].node_id - ], - dtype=float, - ) + unit_conversion = row.flow_rate_mm_per_day / 1000 / 86400 + if row.function == "outlet": + flow_rate = np.array( + [ + round_to_significant_digits( + model.get_upstream_basins(node_id, stop_at_inlet=True).area.sum() * unit_conversion + ) + for node_id in static_df[mask][sub_mask].node_id + ], + dtype=float, + ) + elif row.function == "inlet": + flow_rate = np.array( + [ + round_to_significant_digits( + model.get_downstream_basins(node_id, stop_at_outlet=True).area.sum() * unit_conversion + ) + for node_id in static_df[mask][sub_mask].node_id + ], + dtype=float, + ) static_df.loc[indices, "flow_rate"] = flow_rate elif not pd.isna(row.flow_rate): static_df.loc[indices, "flow_rate"] = row.flow_rate diff --git a/src/ribasim_nl/ribasim_nl/downstream.py b/src/ribasim_nl/ribasim_nl/downstream.py index c222dcb..d7a6484 100644 --- a/src/ribasim_nl/ribasim_nl/downstream.py +++ b/src/ribasim_nl/ribasim_nl/downstream.py @@ -3,7 +3,7 @@ import networkx as nx -def downstream_nodes(graph: nx.DiGraph, node_id: int, stop_at_outlet=True): +def downstream_nodes(graph: nx.DiGraph, node_id: int, stop_at_outlet: bool = False): """Efficiently find all downstream nodes in a directed graph starting from a given node, stopping traversal at nodes stopping at the next outlet. @@ -37,7 +37,7 @@ def downstream_nodes(graph: nx.DiGraph, node_id: int, stop_at_outlet=True): node_ids.add(successor) # Stop traversal if 'function' is 'outlet' - if (not graph.nodes[successor].get("function", "outlet")) | (not stop_at_outlet): + if (not stop_at_outlet) | (not graph.nodes[successor].get("function") == "outlet"): queue.append(successor) return node_ids diff --git a/src/ribasim_nl/ribasim_nl/model.py b/src/ribasim_nl/ribasim_nl/model.py index 31f010b..a22ce14 100644 --- a/src/ribasim_nl/ribasim_nl/model.py +++ b/src/ribasim_nl/ribasim_nl/model.py @@ -160,6 +160,11 @@ def get_upstream_basins(self, node_id, **kwargs): upstream_node_ids = self._upstream_nodes(node_id, **kwargs) return self.basin.area.df[self.basin.area.df.node_id.isin(upstream_node_ids)] + def get_downstream_basins(self, node_id, **kwargs): + # get upstream basin area + downstream_node_ids = self._downstream_nodes(node_id, **kwargs) + return self.basin.area.df[self.basin.area.df.node_id.isin(downstream_node_ids)] + def get_upstream_edges(self, node_id, **kwargs): # get upstream edges upstream_node_ids = self._upstream_nodes(node_id, **kwargs) diff --git a/src/ribasim_nl/ribasim_nl/upstream.py b/src/ribasim_nl/ribasim_nl/upstream.py index 5432910..64ffaa5 100644 --- a/src/ribasim_nl/ribasim_nl/upstream.py +++ b/src/ribasim_nl/ribasim_nl/upstream.py @@ -1,3 +1,4 @@ +# %% from collections import deque import networkx as nx @@ -37,7 +38,7 @@ def upstream_nodes(graph: nx.DiGraph, node_id: int, stop_at_inlet: bool = False) node_ids.add(predecessor) # Stop traversal if 'function' is 'inlet' - if (not graph.nodes[predecessor].get("function", "inlet")) | (not stop_at_inlet): + if (not stop_at_inlet) | (not graph.nodes[predecessor].get("function") == "inlet"): queue.append(predecessor) return node_ids From 90152f21b9bd131c7ae270b92e9601abe404a436 Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Thu, 23 Jan 2025 09:35:10 +0100 Subject: [PATCH 5/9] names and updated to main --- notebooks/noorderzijlvest/01_fix_model.py | 14 ++++++++++++- pixi.lock | 24 +++++++++++------------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/notebooks/noorderzijlvest/01_fix_model.py b/notebooks/noorderzijlvest/01_fix_model.py index 4a1d4fe..ddfe6d5 100644 --- a/notebooks/noorderzijlvest/01_fix_model.py +++ b/notebooks/noorderzijlvest/01_fix_model.py @@ -10,6 +10,7 @@ from ribasim_nl import CloudStorage, Model, Network, NetworkValidator from ribasim_nl.case_conversions import pascal_to_snake_case +from ribasim_nl.gkw import get_data_from_gkw from ribasim_nl.reset_static_tables import reset_static_tables cloud = CloudStorage() @@ -306,10 +307,21 @@ def get_network_node(point): model.outlet.node.df.loc[:, "meta_code_waterbeheerder"] = model.outlet.node.df.name model.pump.node.df.loc[:, "meta_code_waterbeheerder"] = model.pump.node.df.name +df = get_data_from_gkw(authority="Noorderzijlvest", layers=["gemaal", "stuw", "sluis"]) +df.set_index("code", inplace=True) +names = df["naam"] +names.loc["KSL011"] = "R.J. Cleveringensluizen" + # remove names and clean columns for node_type in model.node_table().df.node_type.unique(): table = getattr(model, pascal_to_snake_case(node_type)) - table.node.df.name = "" + + if "meta_code_waterbeheerder" in table.node.df.columns: + table.node.df.loc[:, "name"] = table.node.df["meta_code_waterbeheerder"].apply( + lambda x: names[x] if x in names.index.to_numpy() else "" + ) + else: + table.node.df.name = "" columns = [col for col in table.node.df.columns if col in node_columns] table.node.df = table.node.df[columns] diff --git a/pixi.lock b/pixi.lock index 4007b2f..1e375d3 100644 --- a/pixi.lock +++ b/pixi.lock @@ -11644,20 +11644,20 @@ packages: timestamp: 1598024297745 - pypi: ../Ribasim/python/ribasim name: ribasim - version: 2025.1.0 - sha256: bc76f0d6b28e203efbaa6ced5c6e7c542e2304e784989ec6c3152a516cd1c041 + version: 2024.11.0 + sha256: ba8a6a24955e974dd0d7ae81ad59546134417f48eabce10cb280ac5f8c3290db requires_dist: - - geopandas>=1.0 - - matplotlib>=3.7 - - numpy>=1.25 - - pandas>=2.0 + - geopandas + - matplotlib + - numpy + - pandas - pandera>=0.20 - - pyarrow>=17.0 - - pydantic>=2.0 - - pyogrio>=0.8 + - pyarrow + - pydantic~=2.0 + - pyogrio - shapely>=2.0 - - tomli-w>=1.0 - - tomli>=2.0 + - tomli + - tomli-w - jinja2 ; extra == 'all' - networkx ; extra == 'all' - pytest ; extra == 'all' @@ -11675,7 +11675,7 @@ packages: - pytest-xdist ; extra == 'tests' - ribasim-testmodels ; extra == 'tests' - teamcity-messages ; extra == 'tests' - requires_python: '>=3.11' + requires_python: '>=3.10' editable: true - conda: https://conda.anaconda.org/conda-forge/noarch/ribasim-2025.1.0-pyhd8ed1ab_0.conda sha256: 67804f3024f7d3b020a34ddbfef75892ac338bdb443bc52e9014cc10190a1f88 From 440bed7481a210d02511b1f15890e5aafdb532f0 Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Thu, 23 Jan 2025 12:19:10 +0100 Subject: [PATCH 6/9] to functions --- .../noorderzijlvest/02_parameterize_model.py | 100 +++++++++++++++--- 1 file changed, 83 insertions(+), 17 deletions(-) diff --git a/notebooks/noorderzijlvest/02_parameterize_model.py b/notebooks/noorderzijlvest/02_parameterize_model.py index 6ce495a..163f138 100644 --- a/notebooks/noorderzijlvest/02_parameterize_model.py +++ b/notebooks/noorderzijlvest/02_parameterize_model.py @@ -1,5 +1,7 @@ # %% import time +from pathlib import Path +from typing import Literal import numpy as np import pandas as pd @@ -24,8 +26,6 @@ "parameters", "static_data.xlsx", ) -static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names -defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) # read model @@ -34,15 +34,25 @@ ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_fix_model", f"{short_name}.toml") model = Model.read(ribasim_toml) -start_time = time.time() -# %% add functions +# %% add functions # functions - -# update function - column -for node_type in ["Pump", "Outlet"]: - table = getattr(model, pascal_to_snake_case(node_type)).node +def add_function_to_nodes(model: Model, node_type: Literal["Pump", "Outlet"], static_data_xlsx: Path) -> Model: + """Add function-column 'meta_function' to ribasim.Model node table using an Excel spreadsheet with data. + + Args: + model (Model): Ribasim model + node_type (Literal["Pump", "Outlet"]): Either "Pump" or "Outlet". + static_data_xlsx (Path): Excel spreadsheet with node_types + + Returns + ------- + Model: updated model + """ + static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names + # update function - column if node_type in static_data_sheets: + table = getattr(model, pascal_to_snake_case(node_type)).node # add node_id to static_data static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") static_data = static_data[static_data.index.isin(table.df["meta_code_waterbeheerder"])] @@ -52,6 +62,7 @@ static_data.set_index("node_id", inplace=True) # add function to node via categorie + defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) for row in defaults_df.itertuples(): category = row.Index function = row.function @@ -59,21 +70,41 @@ table.df.loc[static_data.index, "meta_function"] = static_data["meta_function"] + return model -# %% parameterize -for node_type in ["Pump", "Outlet"]: - # update on static_table + +def create_static_df( + model: Model, + node_type: Literal["Pump", "Outlet"], + static_data_xlsx: Path | None = None, + code_column: str = "meta_code_waterbeheerder", +) -> pd.DataFrame: + """Create static df and update from Excel spreadsheet. + + Args: + model (Model): Ribasim model + node_type (Literal["Pump", "Outlet"]): Either "Pump" or "Outlet". + static_data_xlsx (Path): Excel spreadsheet with node_types + + Returns + ------- + pd.DataFrame DataFrame in format of static table (ignoring NoData) + """ + # start with an empty static_df with the correct columns and meta_code_waterbeheerder + static_df = empty_static_df(model=model, node_type=node_type, meta_columns=[code_column]) + + # update data with static data in Excel + static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names if node_type in static_data_sheets: - # start with an empty static_df with the correct columns and meta_code_waterbeheerder - static_df = empty_static_df(model=model, node_type=node_type, meta_columns=["meta_code_waterbeheerder"]) + static_df = empty_static_df(model=model, node_type=node_type, meta_columns=[code_column]) static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") # in case there is more defined in static_data than in the model - static_data = static_data[static_data.index.isin(static_df["meta_code_waterbeheerder"])] + static_data = static_data[static_data.index.isin(static_df[code_column])] # update-function from static_data in Excel - static_data.loc[:, "node_id"] = static_df.set_index("meta_code_waterbeheerder").loc[static_data.index].node_id + static_data.loc[:, "node_id"] = static_df.set_index(code_column).loc[static_data.index].node_id static_data.columns = [i if i in static_df.columns else f"meta_{i}" for i in static_data.columns] static_data = static_data.set_index("node_id") @@ -85,7 +116,22 @@ static_df.loc[series.index.to_numpy(), col] = series static_df.reset_index(inplace=True) + return static_df + + +def defaults_to_static_df(static_df: pd.DataFrame, static_data_xlsx: Path) -> pd.DataFrame: + """Fill nodata in static table with defaults. + + Args: + static_df (pd.DataFrame): DataFrame in format of static table (with nodata) + static_data_xlsx (Path): Excel containing defaults table + + Returns + ------- + pd.DataFrame: DataFrame in format of static table + """ # update-function from defaults + defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) for row in defaults_df.itertuples(): category = row.Index mask = static_df["meta_categorie"] == category @@ -122,7 +168,7 @@ elif not pd.isna(row.flow_rate): static_df.loc[indices, "flow_rate"] = row.flow_rate else: - raise ValueError(f"Can't set flow_rate for node_ids {static_df.loc[indices, "node_id"].to_numpy()}") + raise ValueError(f"Can't set flow_rate for node_ids {static_df.loc[indices, 'node_id'].to_numpy()}") # min_upstream_level sub_mask = static_df[mask]["min_upstream_level"].isna() @@ -150,7 +196,27 @@ ).round(2) static_df.reset_index(inplace=True) - # sanitize df + return static_df + + +start_time = time.time() + +# %% add (meta_)function to node-table +for node_type in ["Pump", "Outlet"]: + model = add_function_to_nodes(model=model, static_data_xlsx=static_data_xlsx, node_type=node_type) + +# %% add static_data to static-tables and fill nan with defaults +for node_type in ["Pump", "Outlet"]: + # create static table from Excel data + static_df = create_static_df( + model=model, + node_type=node_type, + static_data_xlsx=static_data_xlsx, + code_column="meta_code_waterbeheerder", + ) + # fill with defaults + static_df = defaults_to_static_df(static_df=static_df, static_data_xlsx=static_data_xlsx) + # sanitize df and update model static_df.drop(columns=["meta_code_waterbeheerder"], inplace=True) getattr(model, pascal_to_snake_case(node_type)).static.df = static_df From b507952d889ade41b3c386b2fc3b0f24530008ec Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Fri, 24 Jan 2025 11:18:27 +0100 Subject: [PATCH 7/9] as class method --- .../noorderzijlvest/02_parameterize_model.py | 212 +----------------- src/ribasim_nl/ribasim_nl/model.py | 9 + .../ribasim_nl/parametrization/__init__.py | 3 - .../parametrization/basin_tables.py | 76 +++++++ .../ribasim_nl/parametrization/conversions.py | 7 +- .../{empty_static_df.py => empty_table.py} | 20 +- .../manning_resistance_table.py | 39 ++++ .../ribasim_nl/parametrization/node_table.py | 43 ++++ .../parametrization/parameterize.py | 44 ++++ .../parametrization/pump_and_outlet_tables.py | 157 +++++++++++++ .../parametrization/target_level.py | 2 +- 11 files changed, 402 insertions(+), 210 deletions(-) create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/basin_tables.py rename src/ribasim_nl/ribasim_nl/parametrization/{empty_static_df.py => empty_table.py} (62%) create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/manning_resistance_table.py create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/node_table.py create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/parameterize.py create mode 100644 src/ribasim_nl/ribasim_nl/parametrization/pump_and_outlet_tables.py diff --git a/notebooks/noorderzijlvest/02_parameterize_model.py b/notebooks/noorderzijlvest/02_parameterize_model.py index 163f138..c433206 100644 --- a/notebooks/noorderzijlvest/02_parameterize_model.py +++ b/notebooks/noorderzijlvest/02_parameterize_model.py @@ -1,19 +1,7 @@ # %% import time -from pathlib import Path -from typing import Literal - -import numpy as np -import pandas as pd from ribasim_nl import CloudStorage, Model -from ribasim_nl.case_conversions import pascal_to_snake_case -from ribasim_nl.parametrization import empty_static_df -from ribasim_nl.parametrization.conversions import round_to_significant_digits -from ribasim_nl.parametrization.target_level import ( - downstream_target_levels, - upstream_target_levels, -) cloud = CloudStorage() authority = "Noorderzijlvest" @@ -28,203 +16,25 @@ ) -# read model +# %% -# read model +# read ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_fix_model", f"{short_name}.toml") model = Model.read(ribasim_toml) - -# %% add functions -# functions -def add_function_to_nodes(model: Model, node_type: Literal["Pump", "Outlet"], static_data_xlsx: Path) -> Model: - """Add function-column 'meta_function' to ribasim.Model node table using an Excel spreadsheet with data. - - Args: - model (Model): Ribasim model - node_type (Literal["Pump", "Outlet"]): Either "Pump" or "Outlet". - static_data_xlsx (Path): Excel spreadsheet with node_types - - Returns - ------- - Model: updated model - """ - static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names - # update function - column - if node_type in static_data_sheets: - table = getattr(model, pascal_to_snake_case(node_type)).node - # add node_id to static_data - static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") - static_data = static_data[static_data.index.isin(table.df["meta_code_waterbeheerder"])] - static_data.loc[:, "node_id"] = ( - table.df.reset_index().set_index("meta_code_waterbeheerder").loc[static_data.index].node_id - ) - static_data.set_index("node_id", inplace=True) - - # add function to node via categorie - defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) - for row in defaults_df.itertuples(): - category = row.Index - function = row.function - static_data.loc[static_data["categorie"] == category, "meta_function"] = function - - table.df.loc[static_data.index, "meta_function"] = static_data["meta_function"] - - return model - - -def create_static_df( - model: Model, - node_type: Literal["Pump", "Outlet"], - static_data_xlsx: Path | None = None, - code_column: str = "meta_code_waterbeheerder", -) -> pd.DataFrame: - """Create static df and update from Excel spreadsheet. - - Args: - model (Model): Ribasim model - node_type (Literal["Pump", "Outlet"]): Either "Pump" or "Outlet". - static_data_xlsx (Path): Excel spreadsheet with node_types - - Returns - ------- - pd.DataFrame DataFrame in format of static table (ignoring NoData) - """ - # start with an empty static_df with the correct columns and meta_code_waterbeheerder - static_df = empty_static_df(model=model, node_type=node_type, meta_columns=[code_column]) - - # update data with static data in Excel - static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names - if node_type in static_data_sheets: - static_df = empty_static_df(model=model, node_type=node_type, meta_columns=[code_column]) - - static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") - - # in case there is more defined in static_data than in the model - static_data = static_data[static_data.index.isin(static_df[code_column])] - - # update-function from static_data in Excel - static_data.loc[:, "node_id"] = static_df.set_index(code_column).loc[static_data.index].node_id - static_data.columns = [i if i in static_df.columns else f"meta_{i}" for i in static_data.columns] - static_data = static_data.set_index("node_id") - - static_df.set_index("node_id", inplace=True) - for col in static_data.columns: - series = static_data[static_data[col].notna()][col] - if col == "flow_rate": - series = series.apply(round_to_significant_digits) - static_df.loc[series.index.to_numpy(), col] = series - static_df.reset_index(inplace=True) - - return static_df - - -def defaults_to_static_df(static_df: pd.DataFrame, static_data_xlsx: Path) -> pd.DataFrame: - """Fill nodata in static table with defaults. - - Args: - static_df (pd.DataFrame): DataFrame in format of static table (with nodata) - static_data_xlsx (Path): Excel containing defaults table - - Returns - ------- - pd.DataFrame: DataFrame in format of static table - """ - # update-function from defaults - defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) - for row in defaults_df.itertuples(): - category = row.Index - mask = static_df["meta_categorie"] == category - - # flow_rate; all in sub_mask == True needs to be filled - sub_mask = static_df[mask]["flow_rate"].isna() - indices = sub_mask.index.to_numpy() - if sub_mask.any(): - # fill nan with flow_rate_mm_day if provided - - if not pd.isna(row.flow_rate_mm_per_day): - unit_conversion = row.flow_rate_mm_per_day / 1000 / 86400 - if row.function == "outlet": - flow_rate = np.array( - [ - round_to_significant_digits( - model.get_upstream_basins(node_id, stop_at_inlet=True).area.sum() * unit_conversion - ) - for node_id in static_df[mask][sub_mask].node_id - ], - dtype=float, - ) - elif row.function == "inlet": - flow_rate = np.array( - [ - round_to_significant_digits( - model.get_downstream_basins(node_id, stop_at_outlet=True).area.sum() * unit_conversion - ) - for node_id in static_df[mask][sub_mask].node_id - ], - dtype=float, - ) - static_df.loc[indices, "flow_rate"] = flow_rate - elif not pd.isna(row.flow_rate): - static_df.loc[indices, "flow_rate"] = row.flow_rate - else: - raise ValueError(f"Can't set flow_rate for node_ids {static_df.loc[indices, 'node_id'].to_numpy()}") - - # min_upstream_level - sub_mask = static_df[mask]["min_upstream_level"].isna() - if sub_mask.any(): - # calculate upstream levels - upstream_level_offset = row.upstream_level_offset - upstream_levels = upstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) - - # assign upstream levels to static_df - static_df.set_index("node_id", inplace=True) - static_df.loc[upstream_levels.index, "min_upstream_level"] = ( - upstream_levels - upstream_level_offset - ).round(2) - static_df.reset_index(inplace=True) - sub_mask = static_df[mask]["max_downstream_level"].isna() - if sub_mask.any(): - # calculate downstream_levels - downstream_level_offset = row.downstream_level_offset - downstream_levels = downstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) - - # assign upstream levels to static_df - static_df.set_index("node_id", inplace=True) - static_df.loc[downstream_levels.index, "max_downstream_level"] = ( - downstream_levels + downstream_level_offset - ).round(2) - static_df.reset_index(inplace=True) - - return static_df - - start_time = time.time() +# %% +# parameterize +model.parameterize(static_data_xlsx=static_data_xlsx, precipitation_mm_per_day=10) +print("Elapsed Time:", time.time() - start_time, "seconds") -# %% add (meta_)function to node-table -for node_type in ["Pump", "Outlet"]: - model = add_function_to_nodes(model=model, static_data_xlsx=static_data_xlsx, node_type=node_type) - -# %% add static_data to static-tables and fill nan with defaults -for node_type in ["Pump", "Outlet"]: - # create static table from Excel data - static_df = create_static_df( - model=model, - node_type=node_type, - static_data_xlsx=static_data_xlsx, - code_column="meta_code_waterbeheerder", - ) - # fill with defaults - static_df = defaults_to_static_df(static_df=static_df, static_data_xlsx=static_data_xlsx) - # sanitize df and update model - static_df.drop(columns=["meta_code_waterbeheerder"], inplace=True) - getattr(model, pascal_to_snake_case(node_type)).static.df = static_df - - -# %% write +# %% # Write model ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_parameterized_model", f"{short_name}.toml") model.write(ribasim_toml) -print("Elapsed Time:", time.time() - start_time, "seconds") +# %% + +# test-run +assert model.run() == 0 diff --git a/src/ribasim_nl/ribasim_nl/model.py b/src/ribasim_nl/ribasim_nl/model.py index a22ce14..9789360 100644 --- a/src/ribasim_nl/ribasim_nl/model.py +++ b/src/ribasim_nl/ribasim_nl/model.py @@ -18,6 +18,7 @@ from ribasim_nl.case_conversions import pascal_to_snake_case from ribasim_nl.downstream import downstream_nodes from ribasim_nl.geometry import split_basin +from ribasim_nl.parametrization.parameterize import Parameterize from ribasim_nl.run_model import run from ribasim_nl.upstream import upstream_nodes @@ -77,6 +78,14 @@ class Model(Model): _basin_results: Results | None = None _basin_outstate: Results | None = None _graph: nx.Graph | None = None + _parameterize: Parameterize | None = None + + def __init__(self, **data): + super().__init__(**data) + self._parameterize = Parameterize(model=self) + + def parameterize(self, **kwargs): + self._parameterize.run(**kwargs) @property def basin_results(self): diff --git a/src/ribasim_nl/ribasim_nl/parametrization/__init__.py b/src/ribasim_nl/ribasim_nl/parametrization/__init__.py index 3541dac..e69de29 100644 --- a/src/ribasim_nl/ribasim_nl/parametrization/__init__.py +++ b/src/ribasim_nl/ribasim_nl/parametrization/__init__.py @@ -1,3 +0,0 @@ -from ribasim_nl.parametrization.empty_static_df import empty_static_df - -__all__ = ["empty_static_df"] diff --git a/src/ribasim_nl/ribasim_nl/parametrization/basin_tables.py b/src/ribasim_nl/ribasim_nl/parametrization/basin_tables.py new file mode 100644 index 0000000..89a36e6 --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/basin_tables.py @@ -0,0 +1,76 @@ +import pandas as pd + +from ribasim_nl.model import Model +from ribasim_nl.parametrization.conversions import round_to_precision +from ribasim_nl.parametrization.empty_table import empty_table_df + + +def update_basin_static( + model: Model, precipitation_mm_per_day: float | None = None, evaporation_mm_per_day: float | None = None +): + """Add precipitation and/or evaporation to the model.basin.static table from basin.area + + Args: + model (Model): Ribasim Model + precipitation_mm_per_day (float | None, optional): Precipitation in mm/day. Defaults to None. + evaporation_mm_per_day (float | None, optional): Evaporation in mm/day. Defaults to None. + """ + static_df = empty_table_df(model, node_type="Basin", table_type="Static", fill_value=0) + + area = model.basin.area.df.set_index("node_id").geometry.area + if precipitation_mm_per_day is not None: + precipitation = area * (precipitation_mm_per_day * 0.001 / 86400) # m3/s + static_df.loc[:, "precipitation"] = precipitation[static_df.node_id].to_numpy() + if evaporation_mm_per_day is not None: + evaporation = area * (evaporation_mm_per_day * 0.001 / 86400) # m3/s + static_df.loc[:, "evaporation"] = evaporation[static_df.node_id].to_numpy() + + +def update_basin_profile( + model: Model, + percentages_map: dict = {"hoofdwater": 90, "doorgaand": 10, "bergend": 3}, + default_percentage: int = 10, + profile_depth=2, +): + # read profile from basin-table + profile = model.basin.area.df.copy() + + # determine the profile area, which is also used for the profile + # profile["area"] = profile["geometry"].area * (default_percentage / 100) + profile = profile[["node_id", "meta_streefpeil", "geometry"]] + profile = profile.rename(columns={"meta_streefpeil": "level"}) + + # get open-water percentages per category + profile["percentage"] = default_percentage + for category, percentage in percentages_map.items(): + node_ids = model.basin.node.df.loc[model.basin.node.df.meta_categorie == category].index.to_numpy() + profile.loc[profile.node_id.isin(node_ids), "percentage"] = percentage + + # calculate area at invert from percentage + profile["area"] = profile["geometry"].area * profile["percentage"] / 100 + profile.drop(columns=["geometry", "percentage"], inplace=True) + + # define the profile-bottom + profile_bottom = profile.copy() + profile_bottom["area"] = 0.1 + profile_bottom["level"] -= profile_depth + + # define the profile slightly above the bottom of the bakje + profile_slightly_above_bottom = profile.copy() + profile_slightly_above_bottom["level"] -= profile_depth - 0.01 # remain one centimeter above the bottom + + # combine all profiles by concatenating them, and sort on node_id, level and area. + profile_df = pd.concat([profile_bottom, profile_slightly_above_bottom, profile]) + profile_df = profile_df.sort_values(by=["node_id", "level", "area"], ascending=True).reset_index(drop=True) + profile_df.loc[:, "area"] = profile_df["area"].apply(round_to_precision, args=(0.1,)) + + model.basin.profile.df = profile_df + + +def update_basin_state(model: Model): + """Update basin state by max profile-level + + Args: + model (Model): Ribasim Model + """ + model.basin.state.df = model.basin.profile.df.groupby("node_id").max().reset_index()[["node_id", "level"]] diff --git a/src/ribasim_nl/ribasim_nl/parametrization/conversions.py b/src/ribasim_nl/ribasim_nl/parametrization/conversions.py index a6b818d..d25e168 100644 --- a/src/ribasim_nl/ribasim_nl/parametrization/conversions.py +++ b/src/ribasim_nl/ribasim_nl/parametrization/conversions.py @@ -45,8 +45,11 @@ def round_to_precision(number: float, precision: float | int): return number number = Decimal(str(number)) - precision = Decimal(str(precision)) - rounded = (number / precision).quantize(Decimal("1"), rounding=ROUND_HALF_UP) * precision + if precision == 0: + rounded = (number).quantize(Decimal("1"), rounding=ROUND_HALF_UP) + else: + precision = Decimal(str(precision)) + rounded = (number / precision).quantize(Decimal("1"), rounding=ROUND_HALF_UP) * precision return float(rounded) diff --git a/src/ribasim_nl/ribasim_nl/parametrization/empty_static_df.py b/src/ribasim_nl/ribasim_nl/parametrization/empty_table.py similarity index 62% rename from src/ribasim_nl/ribasim_nl/parametrization/empty_static_df.py rename to src/ribasim_nl/ribasim_nl/parametrization/empty_table.py index df9ed15..45c45a5 100644 --- a/src/ribasim_nl/ribasim_nl/parametrization/empty_static_df.py +++ b/src/ribasim_nl/ribasim_nl/parametrization/empty_table.py @@ -1,11 +1,19 @@ +from typing import Literal + import pandas as pd from ribasim import nodes -from ribasim_nl import Model from ribasim_nl.case_conversions import pascal_to_snake_case +from ribasim_nl.model import Model -def empty_static_df(model: Model, node_type: str, meta_columns: list[str] = []) -> pd.DataFrame: +def empty_table_df( + model: Model, + node_type: str, + table_type: Literal["Static", "Profile"], + meta_columns: list[str] = [], + fill_value: float | None = None, +) -> pd.DataFrame: """Return an empty DataFrame for a Static-table. This allows the user to generate a DataFrame for a Static table with correct dtypes yet empty fields when not allowed by schema @@ -14,6 +22,7 @@ def empty_static_df(model: Model, node_type: str, meta_columns: list[str] = []) model (Model): ribasim Model node_type (str): node_type to get DataFrame for meta_columns (list, optional): meta-columns from Node-table to append to DataFrame. Defaults to []. + fill_value (float, optional): float value to fill all numeric non-meta and not node_id columns with Returns ------- @@ -24,7 +33,9 @@ def empty_static_df(model: Model, node_type: str, meta_columns: list[str] = []) meta_columns = [i for i in meta_columns if i in node.node.df.columns] # get correct dtypes - dtypes = getattr(nodes, pascal_to_snake_case(node_type)).Static(**{k: [] for k in node.static.columns()}).df.dtypes + dtypes = getattr(getattr(nodes, pascal_to_snake_case(node_type)), table_type)( + **{k: [] for k in node.static.columns()} + ).df.dtypes # populate dataframe df = node.node.df.reset_index()[["node_id"] + meta_columns] @@ -35,4 +46,7 @@ def empty_static_df(model: Model, node_type: str, meta_columns: list[str] = []) else: df[column] = pd.Series(dtype=dtype) + if (fill_value is not None) and pd.api.types.is_numeric_dtype(dtype) and (column != "node_id"): + df.loc[:, column] = fill_value + return df[dtypes.index.to_list() + meta_columns] diff --git a/src/ribasim_nl/ribasim_nl/parametrization/manning_resistance_table.py b/src/ribasim_nl/ribasim_nl/parametrization/manning_resistance_table.py new file mode 100644 index 0000000..d57783c --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/manning_resistance_table.py @@ -0,0 +1,39 @@ +from ribasim_nl.model import Model +from ribasim_nl.parametrization.conversions import round_to_precision +from ribasim_nl.parametrization.empty_table import empty_table_df + + +def update_manning_resistance_static( + model: Model, profile_slope: float = 1, profile_width: float = 25, manning_n: float = 0.04 +): + """Generate a default manning-table. + + Args: + model (Model): Ribasim Model + profile_slope (float, optional): Slope of the cross section talud. Defaults to 1. + profile_width (float, optional): _description_. Defaults to 25. + manning_n (float, optional): _description_. Defaults to 0.04. + + Returns + ------- + pd.DataFrame: dataframe for static Manning-table in Ribasim model + """ + # empty dataframe + static_df = empty_table_df(model=model, node_type="ManningResistance", table_type="Static") + + # length from length edges + length = [ + round_to_precision( + model.edge.df[(model.edge.df.from_node_id == node_id) | (model.edge.df.to_node_id == node_id)].length.sum(), + precision=10, + ) + for node_id in static_df.node_id + ] + + # set variables to dataframe + static_df.loc[:, "length"] = length + static_df.loc[:, "profile_slope"] = profile_slope + static_df.loc[:, "profile_width"] = profile_width + static_df.loc[:, "manning_n"] = manning_n + + model.manning_resistance.static.df = static_df diff --git a/src/ribasim_nl/ribasim_nl/parametrization/node_table.py b/src/ribasim_nl/ribasim_nl/parametrization/node_table.py new file mode 100644 index 0000000..3c1448d --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/node_table.py @@ -0,0 +1,43 @@ +from pathlib import Path +from typing import Literal + +import pandas as pd + +from ribasim_nl.case_conversions import pascal_to_snake_case +from ribasim_nl.model import Model + + +def populate_function_column(model: Model, node_type: Literal["Pump", "Outlet"], static_data_xlsx: Path) -> Model: + """Add function-column 'meta_function' to ribasim.Model node table using an Excel spreadsheet with data. + + Args: + model (Model): Ribasim model + node_type (Literal["Pump", "Outlet"]): Either "Pump" or "Outlet". + static_data_xlsx (Path): Excel spreadsheet with node_types + + Returns + ------- + Model: updated model + """ + static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names + # update function - column + if node_type in static_data_sheets: + table = getattr(model, pascal_to_snake_case(node_type)).node + # add node_id to static_data + static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") + static_data = static_data[static_data.index.isin(table.df["meta_code_waterbeheerder"])] + static_data.loc[:, "node_id"] = ( + table.df.reset_index().set_index("meta_code_waterbeheerder").loc[static_data.index].node_id + ) + static_data.set_index("node_id", inplace=True) + + # add function to node via categorie + defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) + for row in defaults_df.itertuples(): + category = row.Index + function = row.function + static_data.loc[static_data["categorie"] == category, "meta_function"] = function + + table.df.loc[static_data.index, "meta_function"] = static_data["meta_function"] + + return model diff --git a/src/ribasim_nl/ribasim_nl/parametrization/parameterize.py b/src/ribasim_nl/ribasim_nl/parametrization/parameterize.py new file mode 100644 index 0000000..55ff7bd --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/parameterize.py @@ -0,0 +1,44 @@ +from pathlib import Path + +from pydantic import BaseModel + +from ribasim_nl.model import Model +from ribasim_nl.parametrization.basin_tables import update_basin_profile, update_basin_state, update_basin_static +from ribasim_nl.parametrization.manning_resistance_table import update_manning_resistance_static +from ribasim_nl.parametrization.node_table import populate_function_column +from ribasim_nl.parametrization.pump_and_outlet_tables import update_pump_outlet_static + + +class Parameterize(BaseModel): + model: Model + static_data_xlsx: Path | None = None + precipitation_mm_per_day: int | None = None + evaporation_mm_per_day: int | None = None + + def run(self, **kwargs): + # update class properties + for key, value in kwargs.items(): + if value is not None: + setattr(self, key, value) + + # add meta_function as we will need that for further parametization of Outlet and Pump + if "meta_function" not in self.model.node_table().df.columns: + for node_type in ["Pump", "Outlet"]: + populate_function_column(model=self.model, static_data_xlsx=self.static_data_xlsx, node_type=node_type) + + # parameterize Pump and Outlet nodes + for node_type in ["Pump", "Outlet"]: + update_pump_outlet_static( + self.model, + node_type=node_type, + static_data_xlsx=self.static_data_xlsx, + code_column="meta_code_waterbeheerder", + ) + + # ManningResistance + update_manning_resistance_static(self.model) + + # Basin profile and state + update_basin_profile(model=self.model) + update_basin_state(model=self.model) + update_basin_static(model=self.model, precipitation_mm_per_day=10) diff --git a/src/ribasim_nl/ribasim_nl/parametrization/pump_and_outlet_tables.py b/src/ribasim_nl/ribasim_nl/parametrization/pump_and_outlet_tables.py new file mode 100644 index 0000000..181408c --- /dev/null +++ b/src/ribasim_nl/ribasim_nl/parametrization/pump_and_outlet_tables.py @@ -0,0 +1,157 @@ +from pathlib import Path +from typing import Literal + +import numpy as np +import pandas as pd + +from ribasim_nl.case_conversions import pascal_to_snake_case +from ribasim_nl.model import Model +from ribasim_nl.parametrization.conversions import round_to_significant_digits +from ribasim_nl.parametrization.empty_table import empty_table_df +from ribasim_nl.parametrization.target_level import downstream_target_levels, upstream_target_levels + + +def create_static_df( + model: Model, + node_type: Literal["Pump", "Outlet"], + static_data_xlsx: Path | None = None, + code_column: str = "meta_code_waterbeheerder", +) -> pd.DataFrame: + """Create static df and update from Excel spreadsheet. + + Args: + model (Model): Ribasim model + node_type (Literal["Pump", "Outlet"]): Either "Pump" or "Outlet". + static_data_xlsx (Path): Excel spreadsheet with node_types + + Returns + ------- + pd.DataFrame DataFrame in format of static table (ignoring NoData) + """ + # start with an empty static_df with the correct columns and meta_code_waterbeheerder + static_df = empty_table_df(model=model, node_type=node_type, table_type="Static", meta_columns=[code_column]) + + # update data with static data in Excel + if static_data_xlsx is not None: + static_data_sheets = pd.ExcelFile(static_data_xlsx).sheet_names + if node_type in static_data_sheets: + static_data = pd.read_excel(static_data_xlsx, sheet_name=node_type).set_index("code") + + # in case there is more defined in static_data than in the model + static_data = static_data[static_data.index.isin(static_df[code_column])] + + # update-function from static_data in Excel + static_data.loc[:, "node_id"] = static_df.set_index(code_column).loc[static_data.index].node_id + static_data.columns = [i if i in static_df.columns else f"meta_{i}" for i in static_data.columns] + static_data = static_data.set_index("node_id") + + static_df.set_index("node_id", inplace=True) + for col in static_data.columns: + series = static_data[static_data[col].notna()][col] + if col == "flow_rate": + series = series.apply(round_to_significant_digits) + static_df.loc[series.index.to_numpy(), col] = series + static_df.reset_index(inplace=True) + + return static_df + + +def defaults_to_static_df(model: Model, static_df: pd.DataFrame, static_data_xlsx: Path) -> pd.DataFrame: + """Fill nodata in static table with defaults. + + Args: + model (Model): Ribasim model + static_df (pd.DataFrame): DataFrame in format of static table (with nodata) + static_data_xlsx (Path): Excel containing defaults table + + Returns + ------- + pd.DataFrame: DataFrame in format of static table + """ + # update-function from defaults + defaults_df = pd.read_excel(static_data_xlsx, sheet_name="defaults", index_col=0) + for row in defaults_df.itertuples(): + category = row.Index + mask = static_df["meta_categorie"] == category + + # flow_rate; all in sub_mask == True needs to be filled + sub_mask = static_df[mask]["flow_rate"].isna() + indices = sub_mask.index.to_numpy() + if sub_mask.any(): + # fill nan with flow_rate_mm_day if provided + + if not pd.isna(row.flow_rate_mm_per_day): + unit_conversion = row.flow_rate_mm_per_day / 1000 / 86400 + if row.function == "outlet": + flow_rate = np.array( + [ + round_to_significant_digits( + model.get_upstream_basins(node_id, stop_at_inlet=True).area.sum() * unit_conversion + ) + for node_id in static_df[mask][sub_mask].node_id + ], + dtype=float, + ) + elif row.function == "inlet": + flow_rate = np.array( + [ + round_to_significant_digits( + model.get_downstream_basins(node_id, stop_at_outlet=True).area.sum() * unit_conversion + ) + for node_id in static_df[mask][sub_mask].node_id + ], + dtype=float, + ) + static_df.loc[indices, "flow_rate"] = flow_rate + elif not pd.isna(row.flow_rate): + static_df.loc[indices, "flow_rate"] = row.flow_rate + else: + raise ValueError(f"Can't set flow_rate for node_ids {static_df.loc[indices, 'node_id'].to_numpy()}") + + # min_upstream_level + sub_mask = static_df[mask]["min_upstream_level"].isna() + if sub_mask.any(): + # calculate upstream levels + upstream_level_offset = row.upstream_level_offset + upstream_levels = upstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) + + # assign upstream levels to static_df + static_df.set_index("node_id", inplace=True) + static_df.loc[upstream_levels.index, "min_upstream_level"] = ( + upstream_levels - upstream_level_offset + ).round(2) + static_df.reset_index(inplace=True) + sub_mask = static_df[mask]["max_downstream_level"].isna() + if sub_mask.any(): + # calculate downstream_levels + downstream_level_offset = row.downstream_level_offset + downstream_levels = downstream_target_levels(model=model, node_ids=static_df[mask][sub_mask].node_id) + + # assign upstream levels to static_df + static_df.set_index("node_id", inplace=True) + static_df.loc[downstream_levels.index, "max_downstream_level"] = ( + downstream_levels + downstream_level_offset + ).round(2) + static_df.reset_index(inplace=True) + + return static_df + + +def update_pump_outlet_static( + model: Model, + node_type: Literal["Pump", "Outlet"], + static_data_xlsx: Path | None = None, + code_column: str = "meta_code_waterbeheerder", +): + # init static_table with static_data_xlsx + static_df = create_static_df( + model=model, + node_type=node_type, + static_data_xlsx=static_data_xlsx, + code_column=code_column, + ) + # fill with defaults + static_df = defaults_to_static_df(model=model, static_df=static_df, static_data_xlsx=static_data_xlsx) + # sanitize df and update model + static_df.drop(columns=["meta_code_waterbeheerder"], inplace=True) + getattr(model, pascal_to_snake_case(node_type)).static.df = static_df diff --git a/src/ribasim_nl/ribasim_nl/parametrization/target_level.py b/src/ribasim_nl/ribasim_nl/parametrization/target_level.py index 794aabc..9e12853 100644 --- a/src/ribasim_nl/ribasim_nl/parametrization/target_level.py +++ b/src/ribasim_nl/ribasim_nl/parametrization/target_level.py @@ -1,6 +1,6 @@ import pandas as pd -from ribasim_nl import Model +from ribasim_nl.model import Model def upstream_target_levels( From 3b81ea8dd0ce5cbd853ffbb8ddeb31892dbacda5 Mon Sep 17 00:00:00 2001 From: Daniel Tollenaar Date: Fri, 24 Jan 2025 11:21:58 +0100 Subject: [PATCH 8/9] sync cloud --- notebooks/noorderzijlvest/02_parameterize_model.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/notebooks/noorderzijlvest/02_parameterize_model.py b/notebooks/noorderzijlvest/02_parameterize_model.py index c433206..0ad1eb4 100644 --- a/notebooks/noorderzijlvest/02_parameterize_model.py +++ b/notebooks/noorderzijlvest/02_parameterize_model.py @@ -14,12 +14,16 @@ "parameters", "static_data.xlsx", ) +ribasim_dir = cloud.joinpath(authority, "modellen", f"{authority}_fix_model") +ribasim_toml = ribasim_dir / f"{short_name}.toml" +# you need the excel, but the model should be local-only by running 01_fix_model.py +cloud.synchronize(filepaths=[static_data_xlsx]) +cloud.synchronize(filepaths=[ribasim_dir], check_on_remote=False) # %% # read -ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_fix_model", f"{short_name}.toml") model = Model.read(ribasim_toml) start_time = time.time() @@ -33,8 +37,3 @@ # Write model ribasim_toml = cloud.joinpath(authority, "modellen", f"{authority}_parameterized_model", f"{short_name}.toml") model.write(ribasim_toml) - -# %% - -# test-run -assert model.run() == 0 From 8bded2656b0839e077e2c794c9d2845a9274b6ba Mon Sep 17 00:00:00 2001 From: Martijn Visser Date: Mon, 27 Jan 2025 10:17:46 +0100 Subject: [PATCH 9/9] Undo pixi.lock changes --- pixi.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pixi.lock b/pixi.lock index 1e375d3..4007b2f 100644 --- a/pixi.lock +++ b/pixi.lock @@ -11644,20 +11644,20 @@ packages: timestamp: 1598024297745 - pypi: ../Ribasim/python/ribasim name: ribasim - version: 2024.11.0 - sha256: ba8a6a24955e974dd0d7ae81ad59546134417f48eabce10cb280ac5f8c3290db + version: 2025.1.0 + sha256: bc76f0d6b28e203efbaa6ced5c6e7c542e2304e784989ec6c3152a516cd1c041 requires_dist: - - geopandas - - matplotlib - - numpy - - pandas + - geopandas>=1.0 + - matplotlib>=3.7 + - numpy>=1.25 + - pandas>=2.0 - pandera>=0.20 - - pyarrow - - pydantic~=2.0 - - pyogrio + - pyarrow>=17.0 + - pydantic>=2.0 + - pyogrio>=0.8 - shapely>=2.0 - - tomli - - tomli-w + - tomli-w>=1.0 + - tomli>=2.0 - jinja2 ; extra == 'all' - networkx ; extra == 'all' - pytest ; extra == 'all' @@ -11675,7 +11675,7 @@ packages: - pytest-xdist ; extra == 'tests' - ribasim-testmodels ; extra == 'tests' - teamcity-messages ; extra == 'tests' - requires_python: '>=3.10' + requires_python: '>=3.11' editable: true - conda: https://conda.anaconda.org/conda-forge/noarch/ribasim-2025.1.0-pyhd8ed1ab_0.conda sha256: 67804f3024f7d3b020a34ddbfef75892ac338bdb443bc52e9014cc10190a1f88