diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 05387e3..5d7c0bd 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -18,8 +18,6 @@ jobs: matrix: os: - ubuntu-latest - - macOS-latest - - windows-latest arch: - x86 steps: diff --git a/.typos.toml b/.typos.toml index d6baf54..71dcc56 100644 --- a/.typos.toml +++ b/.typos.toml @@ -1,2 +1,2 @@ [files] -extend-exclude = ["*.csv", "*.json"] +extend-exclude = ["*.csv", "*.json", "*.ipynb"] diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..f2bc796 --- /dev/null +++ b/environment.yml @@ -0,0 +1,36 @@ +# ribasim environment with some extra goodies to work in NL-context +name: ribasim + +channels: + - conda-forge + +dependencies: + - black + - black-jupyter + - build + - datamodel-code-generator + - geopandas + - jupyterlab + - matplotlib + - mypy + - openpyxl + - pandas!=2.1.0 + - pandas-stubs + - pandera + - pip + - pip: + - quartodoc + - pre-commit + - pyarrow + - pydantic=1 + - pyogrio + - pytest + - pytest-cov + - python>=3.9 + - ruff + - shapely>=2.0 + - tomli + - tomli-w + - twine + - xarray + - xmipy diff --git a/notebooks/nl-kunstwerken.ipynb b/notebooks/nl-kunstwerken.ipynb new file mode 100644 index 0000000..7a0137b --- /dev/null +++ b/notebooks/nl-kunstwerken.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "from shapely.geometry import Point\n", + "from pathlib import Path\n", + "import os\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Voorbereiding\n", + "\n", + "Globale variabelen\n", + "`DATA_DIR`: De locale directory waar project-data staat opgeslagen\n", + "`EXCEL_FILE`: Het Excel-bestand dat moet worden ingelezen\n", + "`CRS`: De projectile waarin de ruimtelijke data moet worden opgeslagen (28992 = Rijksdriehoekstelsel) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# environmnt variables\n", + "DATA_DIR = os.getenv(\"RIBASIM_NL_DATA_DIR\")\n", + "RIBASIM_NL_CLOUD_PASS = os.getenv(\"RIBASIM_NL_CLOUD_PASS\")\n", + "assert DATA_DIR is not None\n", + "assert RIBASIM_NL_CLOUD_PASS is not None\n", + "\n", + "EXCEL_FILE = r\"# Overzicht kunstwerken primaire keringen waterschappen_ET.xlsx\"\n", + "CRS = 28992\n", + "RIBASIM_NL_CLOUD_USER = \"nhi_api\"\n", + "WEBDAV_URL = \"https://deltares.thegood.cloud/remote.php/dav\"\n", + "BASE_URL = f\"{WEBDAV_URL}/files/{RIBASIM_NL_CLOUD_USER}/D-HYDRO modeldata\"\n", + "\n", + "# file-paths\n", + "kunstwerken_xlsx = Path(DATA_DIR) / EXCEL_FILE\n", + "kunstwerken_gpkg = kunstwerken_xlsx.parent / \"nl_kunstwerken.gpkg\"\n", + "\n", + "\n", + "def upload_file(url, path):\n", + " with open(path, \"rb\") as f:\n", + " r = requests.put(\n", + " url, data=f, auth=(RIBASIM_NL_CLOUD_USER, RIBASIM_NL_CLOUD_PASS)\n", + " )\n", + " r.raise_for_status()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inlezen NL kunstwerken vanuit data dir\n", + "We lezen de geleverde excel in en:\n", + "- skippen de eerste 6 regels boven de header\n", + "- gooien, voor dit project, irrelevante kolommen weg\n", + "- hernoemen de kolom met organisatie-naam, x en y coordinaat\n", + "- transformeren de x en y coordinaat; wordt NaN wanneer data dat niet toelaat (text of missend)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# skip first rows\n", + "kunstwerken_df = pd.read_excel(kunstwerken_xlsx, skiprows=6)\n", + "\n", + "# drop irrelevant columns\n", + "columns = kunstwerken_df.columns[1:13]\n", + "kunstwerken_df = kunstwerken_df.loc[:, columns]\n", + "\n", + "# rename columns into our liking\n", + "kunstwerken_df.rename(\n", + " columns={\n", + " \"Unnamed: 1\": \"organisatie\",\n", + " \"Y coördinaat RD\": \"y\",\n", + " \"X coördinaat RD\": \"x\",\n", + " },\n", + " inplace=True,\n", + ")\n", + "\n", + "# drop no-data rows\n", + "kunstwerken_df = kunstwerken_df[~kunstwerken_df[\"organisatie\"].isna()]\n", + "\n", + "# convert x/y to numeric\n", + "kunstwerken_df[\"x\"] = pd.to_numeric(kunstwerken_df[\"x\"], errors=\"coerce\")\n", + "kunstwerken_df[\"y\"] = pd.to_numeric(kunstwerken_df[\"y\"], errors=\"coerce\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aanmaken niet-ruimtelijke table\n", + "Waar x/y coordinaten mizzen maken we een niet-ruimtelijke table die we wegschrijven in geen GeoPackage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make a non-spatial GeoDataFrame where x/y are missing\n", + "kunstwerken_non_spatial_df = kunstwerken_df[\n", + " kunstwerken_df[\"x\"].isna() | kunstwerken_df[\"y\"].isna()\n", + "]\n", + "kunstwerken_non_spatial_gdf = gpd.GeoDataFrame(\n", + " kunstwerken_non_spatial_df, geometry=gpd.GeoSeries(), crs=28992\n", + ")\n", + "\n", + "# writ to GeoPackage\n", + "kunstwerken_non_spatial_gdf.to_file(\n", + " kunstwerken_gpkg, layer=\"kunstwerken (geen coordinaten)\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aanmaken ruimtelijke table\n", + "Waar x/y coordinaten beschikbaar zijn maken we een ruimtelijke table die we wegschrijven in geen GeoPackage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make a spatial GeoDataFrame where x/y exist\n", + "kunstwerken_spatial_df = kunstwerken_df[\n", + " ~kunstwerken_df[\"x\"].isna() & ~kunstwerken_df[\"y\"].isna()\n", + "]\n", + "geometry_series = gpd.GeoSeries(\n", + " kunstwerken_spatial_df.apply((lambda x: Point(x.x, x.y)), axis=1)\n", + ")\n", + "kunstwerken_spatial_gdf = gpd.GeoDataFrame(\n", + " kunstwerken_spatial_df, geometry=geometry_series, crs=CRS\n", + ")\n", + "\n", + "# write to GeoPackage\n", + "kunstwerken_spatial_gdf.to_file(kunstwerken_gpkg, layer=\"kunstwerken\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload geopackage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "to_url = f\"{BASE_URL}/Rijkswaterstaat/{kunstwerken_gpkg.name}\"\n", + "upload_file(to_url, kunstwerken_gpkg)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/uitlaten_inlaten.ipynb b/notebooks/uitlaten_inlaten.ipynb new file mode 100644 index 0000000..eaf1572 --- /dev/null +++ b/notebooks/uitlaten_inlaten.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "from shapely.geometry import Point\n", + "from pathlib import Path\n", + "import fiona\n", + "import os\n", + "import requests\n", + "from hydamo import HyDAMO\n", + "\n", + "import warnings\n", + "\n", + "warnings.simplefilter(\"ignore\", UserWarning)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Voorbereiding\n", + "\n", + "Globale variabelen\n", + "- `DATA_DIR`: De locale directory waar project-data staat opgeslagen\n", + "- `EXCEL_FILE`: Het Excel-bestand dat moet worden ingelezen\n", + "- `CRS`: De projectile waarin de ruimtelijke data moet worden opgeslagen (28992 = Rijksdriehoekstelsel) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# environmnt variables\n", + "DATA_DIR = os.getenv(\"RIBASIM_NL_DATA_DIR\")\n", + "RIBASIM_NL_CLOUD_PASS = os.getenv(\"RIBASIM_NL_CLOUD_PASS\")\n", + "assert DATA_DIR is not None\n", + "assert RIBASIM_NL_CLOUD_PASS is not None\n", + "\n", + "DATA_DIR = Path(DATA_DIR)\n", + "EXCEL_FILE = r\"uitlaten_inlaten.xlsx\"\n", + "CRS = 28992\n", + "RIBASIM_NL_CLOUD_USER = \"nhi_api\"\n", + "WEBDAV_URL = \"https://deltares.thegood.cloud/remote.php/dav\"\n", + "BASE_URL = f\"{WEBDAV_URL}/files/{RIBASIM_NL_CLOUD_USER}/D-HYDRO modeldata\"\n", + "\n", + "# file-paths\n", + "kunstwerken_xlsx = Path(DATA_DIR) / EXCEL_FILE\n", + "kunstwerken_gpkg = kunstwerken_xlsx.parent / f\"{kunstwerken_xlsx.stem}.gpkg\"\n", + "\n", + "\n", + "def upload_file(url, path):\n", + " with open(path, \"rb\") as f:\n", + " r = requests.put(\n", + " url, data=f, auth=(RIBASIM_NL_CLOUD_USER, RIBASIM_NL_CLOUD_PASS)\n", + " )\n", + " r.raise_for_status()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inlezen NL kunstwerken vanuit data dir\n", + "- Inlezen Excel\n", + "- Nu masken we (nog) op kunstwerken die we uit files kunnen trekken" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kunstwerken_df = pd.read_excel(kunstwerken_xlsx)\n", + "files_mask = ~kunstwerken_df[\"damo_bestand\"].isna()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ophalen kunstwerken\n", + "- Aanmaken lege data-dict\n", + "- loopen over kunstwerken/lagen en daar de relevante kunstwerken uit halen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create data-dict for every layer\n", + "data = {}\n", + "for layer in kunstwerken_df[\"hydamo_object\"].unique():\n", + " data[layer] = []\n", + "\n", + "# group by file and check if exists\n", + "for file, file_df in kunstwerken_df[files_mask].groupby(\"damo_bestand\"):\n", + " file = DATA_DIR.joinpath(file)\n", + " assert file.exists()\n", + "\n", + " # open per layer, and check if specified layer xists\n", + " for layer, layer_df in file_df.groupby(\"damo_laag\"):\n", + " file_layers = fiona.listlayers(file)\n", + " if (\n", + " len(file_layers) == 1\n", + " ): # in case single-layer files, users don't understand a `layer-property` and make mistakes\n", + " layer = file_layers[0]\n", + " assert layer in fiona.listlayers(file)\n", + " print(f\"reading {file.name}, layer {layer}\")\n", + " gdf = gpd.read_file(file, layer=layer)\n", + "\n", + " # read every row this file-layer group and get the source-info\n", + " for kwk_row in layer_df.itertuples():\n", + " # get the index from the used code or name column\n", + " damo_index = kwk_row.damo_ident_kolom\n", + " src_index = getattr(kwk_row, f\"damo_{damo_index}_kolom\").strip()\n", + " index_value = str(kwk_row.damo_waarde)\n", + "\n", + " # read the source\n", + " src_row = gdf.set_index(src_index).loc[index_value]\n", + "\n", + " # populate the result\n", + " result = {}\n", + " # populate code and naam fields\n", + " for damo_att in [\"code\", \"naam\"]:\n", + " if damo_att == damo_index:\n", + " result[damo_att] = index_value\n", + " else:\n", + " column = getattr(kwk_row, f\"damo_{damo_att}_kolom\").strip()\n", + " result[damo_att] = str(getattr(src_row, column))\n", + "\n", + " # get the geometry. We get the centroid to avoid flatten all kinds of mult-features\n", + " result[\"geometry\"] = Point(src_row.geometry.centroid)\n", + "\n", + " # add it to our data dictionary\n", + " data[kwk_row.hydamo_object] += [result]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wegschrijven HyDAMO\n", + "- lokaal\n", + "- op TheGoodCloud" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hydamo = HyDAMO(\"2.2.1\")\n", + "for layer in data.keys():\n", + " if layer != \"duikersifonhevel\":\n", + " gdf = gpd.GeoDataFrame(data[layer], crs=CRS)\n", + " getattr(hydamo, layer).set_data(gdf, check_columns=False)\n", + "\n", + "hydamo.to_geopackage(kunstwerken_gpkg)\n", + "\n", + "for file in [kunstwerken_xlsx, kunstwerken_gpkg]:\n", + " to_url = f\"{BASE_URL}/HyDAMO_geconstrueerd/{file.name}\"\n", + " upload_file(to_url, file)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/hydamo/environment.yml b/src/hydamo/environment.yml deleted file mode 100644 index 72e9a60..0000000 --- a/src/hydamo/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: hydamo - -dependencies: - - python=3.9.17 - - black - - fiona=1.9.1 - - GDAL=3.6.2 - - geopandas=0.12.2 - - pytest diff --git a/src/hydamo/tests/test_datamodel.py b/src/hydamo/tests/test_datamodel.py index bc48212..22c6f4a 100644 --- a/src/hydamo/tests/test_datamodel.py +++ b/src/hydamo/tests/test_datamodel.py @@ -60,8 +60,8 @@ def test_download(tmp_path): rozema.geometry assert isinstance(rozema.geometry, shapely.Point) - assert len(damo.stuw) == 1 - assert damo.stuw.naam[0] == "CASPARGOUW STUW" + assert not damo.stuw.empty + assert "CASPARGOUW STUW" in damo.stuw.naam.to_numpy() # export file to_path = path.with_suffix(".to.gpkg")