Skip to content

Commit

Permalink
Enable migrations from older database schemas. (#1764)
Browse files Browse the repository at this point in the history
Fixes Deltares/Ribasim-NL#138

This has no schema version (stored in the geopackage) yet, but this is
something we could implement in the future.

---------

Co-authored-by: Marnix <[email protected]>
  • Loading branch information
evetion and deltamarnix authored Aug 29, 2024
1 parent 23bbbbc commit 8d89e74
Show file tree
Hide file tree
Showing 16 changed files with 302 additions and 87 deletions.
1 change: 1 addition & 0 deletions .teamcity/Templates/RegressionTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ open class RegressionTest (platformOs: String) : Template() {
scriptContent = header +
"""
pixi run python utils/get_benchmark.py %MiniO_credential_token% "benchmark/"
pixi run python utils/get_benchmark.py %MiniO_credential_token% "hws_2024_7_0/"
pixi run test-ribasim-regression
""".trimIndent()
}
Expand Down
61 changes: 1 addition & 60 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ repository = "https://github.com/Deltares/Ribasim"

[tasks]
# Tests
test-ribasim-python = "pytest --numprocesses=4 python/ribasim/tests"
test-ribasim-python-cov = "pytest --numprocesses=4 --cov=ribasim --cov-report=xml python/ribasim/tests"
test-ribasim-python = "pytest --numprocesses=4 -m 'not regression' python/ribasim/tests"
test-ribasim-python-cov = "pytest --numprocesses=4 --cov=ribasim --cov-report=xml -m 'not regression' python/ribasim/tests"
test-ribasim-api = "pytest --basetemp=python/ribasim_api/tests/temp --junitxml=report.xml python/ribasim_api/tests"

[feature.dev.tasks]
Expand Down Expand Up @@ -85,11 +85,13 @@ test-ribasim-cli = "pytest --numprocesses=4 --basetemp=build/tests/temp --junitx
test-ribasim-core = { cmd = "julia --project=core --eval 'using Pkg; Pkg.test()'", depends_on = [
"generate-testmodels",
] }
test-ribasim-migration = { cmd = "pytest --numprocesses=4 -m regression python/ribasim/tests" }
test-ribasim-core-cov = { cmd = "julia --project=core --eval 'using Pkg; Pkg.test(coverage=true, julia_args=[\"--check-bounds=yes\"])'", depends_on = [
"generate-testmodels",
] }
test-ribasim-regression = { cmd = "julia --project=core --eval 'using Pkg; Pkg.test(julia_args=[\"--check-bounds=yes\"], test_args=[\"regression\"])'", depends_on = [
"generate-testmodels",
"test-ribasim-migration",
] }
generate-testmodels = { cmd = "python utils/generate-testmodels.py", inputs = [
"python/ribasim",
Expand Down
5 changes: 5 additions & 0 deletions python/ribasim/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,8 @@ path = "ribasim/__init__.py"

[tool.hatch.build.targets.sdist]
artifacts = ["delwaq/reference/*", "delwaq/template/*"]

[tool.pytest.ini_options]
markers = [
"regression: Older models that are not on the current database schema.",
]
2 changes: 1 addition & 1 deletion python/ribasim/ribasim/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__version__ = "2024.10.0"

__schema_version__ = 1

from ribasim.config import Allocation, Logging, Node, Solver
from ribasim.geometry.edge import EdgeTable
Expand Down
65 changes: 65 additions & 0 deletions python/ribasim/ribasim/db_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from contextlib import closing
from pathlib import Path
from sqlite3 import Connection, connect


def esc_id(identifier: str) -> str:
"""Escape SQLite identifiers."""
identifier = identifier.replace('"', '""')
return f'"{identifier}"'


def exists(connection: Connection, name: str) -> bool:
"""Check if a table exists in a SQLite database."""
with closing(connection.cursor()) as cursor:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name=?", (name,)
)
result = cursor.fetchone()
return result is not None


def _set_gpkg_attribute_table(connection: Connection, table: str) -> None:
# Set geopackage attribute table
with closing(connection.cursor()) as cursor:
sql = "INSERT OR REPLACE INTO gpkg_contents (table_name, data_type, identifier) VALUES (?, ?, ?)"
cursor.execute(sql, (table, "attributes", table))
connection.commit()


CREATE_TABLE_SQL = """
CREATE TABLE IF NOT EXISTS ribasim_metadata (
key TEXT PRIMARY KEY,
value TEXT
);
"""


def _get_db_schema_version(db_path: Path) -> int:
"""
Get the schema version of the database.
For older models, the version is assumed to be zero,
which is smaller than the initial schema version of the database.
"""
with closing(connect(db_path)) as connection:
if not exists(connection, "ribasim_metadata"):
return 0
with closing(connection.cursor()) as cursor:
cursor.execute(
"SELECT value FROM ribasim_metadata WHERE key='schema_version'"
)
return int(cursor.fetchone()[0])


def _set_db_schema_version(db_path: Path, version: int = 1) -> None:
with closing(connect(db_path)) as connection:
if not exists(connection, "metadata"):
with closing(connection.cursor()) as cursor:
cursor.execute(CREATE_TABLE_SQL)
cursor.execute(
"INSERT OR REPLACE INTO ribasim_metadata (key, value) VALUES ('schema_version', ?)",
(version,),
)
_set_gpkg_attribute_table(connection, "ribasim_metadata")
connection.commit()
36 changes: 15 additions & 21 deletions python/ribasim/ribasim/input_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from contextlib import closing
from contextvars import ContextVar
from pathlib import Path
from sqlite3 import Connection, connect
from sqlite3 import connect
from typing import (
Any,
Generic,
Expand All @@ -31,6 +31,12 @@
)

import ribasim
from ribasim.db_utils import (
_get_db_schema_version,
_set_gpkg_attribute_table,
esc_id,
exists,
)
from ribasim.schemas import _BaseSchema

from .styles import _add_styles_to_geopackage
Expand Down Expand Up @@ -67,21 +73,6 @@
TableT = TypeVar("TableT", bound=_BaseSchema)


def esc_id(identifier: str) -> str:
"""Escape SQLite identifiers."""
return '"' + identifier.replace('"', '""') + '"'


def exists(connection: Connection, name: str) -> bool:
"""Check if a table exists in a SQLite database."""
with closing(connection.cursor()) as cursor:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name=?", (name,)
)
result = cursor.fetchone()
return result is not None


TABLES = ["profile", "state", "static", "time", "logic", "condition"]


Expand Down Expand Up @@ -178,9 +169,15 @@ class TableModel(FileModel, Generic[TableT]):

@field_validator("df")
@classmethod
def _check_extra_columns(cls, v: DataFrame[TableT]):
def _check_schema(cls, v: DataFrame[TableT]):
"""Allow only extra columns with `meta_` prefix."""
if isinstance(v, (pd.DataFrame, gpd.GeoDataFrame)):
# On reading from geopackage, migrate the tables when necessary
db_path = context_file_loading.get().get("database")
if db_path is not None:
version = _get_db_schema_version(db_path)
if version < ribasim.__schema_version__:
v = cls.tableschema().migrate(v)
for colname in v.columns:
if colname not in cls.columns() and not colname.startswith("meta_"):
raise ValueError(
Expand Down Expand Up @@ -277,11 +274,8 @@ def _write_geopackage(self, temp_path: Path) -> None:
dtype={"fid": "INTEGER PRIMARY KEY AUTOINCREMENT"},
)

_set_gpkg_attribute_table(connection, table)
# Set geopackage attribute table
with closing(connection.cursor()) as cursor:
sql = "INSERT INTO gpkg_contents (table_name, data_type, identifier) VALUES (?, ?, ?)"
cursor.execute(sql, (table, "attributes", table))
connection.commit()

def _write_arrow(self, filepath: Path, directory: Path, input_dir: Path) -> None:
"""Write the contents of the input to a an arrow file."""
Expand Down
71 changes: 71 additions & 0 deletions python/ribasim/ribasim/migrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import warnings

from geopandas import GeoDataFrame
from pandas import DataFrame


def nodeschema_migration(gdf: GeoDataFrame) -> GeoDataFrame:
if "node_id" in gdf.columns:
warnings.warn("Migrating outdated Node table.", UserWarning)
assert gdf["node_id"].is_unique, "Node IDs have to be unique."
gdf.set_index("node_id", inplace=True)

return gdf


def edgeschema_migration(gdf: GeoDataFrame) -> GeoDataFrame:
if "from_node_type" in gdf.columns:
warnings.warn("Migrating outdated Edge table.", UserWarning)
gdf.drop("from_node_type", inplace=True, axis=1)
if "to_node_type" in gdf.columns:
warnings.warn("Migrating outdated Edge table.", UserWarning)
gdf.drop("to_node_type", inplace=True, axis=1)
if "edge_id" in gdf.columns:
warnings.warn("Migrating outdated Edge table.", UserWarning)
gdf.set_index("edge_id", inplace=True)

return gdf


def basinstaticschema_migration(df: DataFrame) -> DataFrame:
if "urban_runoff" in df.columns:
warnings.warn("Migrating outdated Basin / Static table.", UserWarning)
df.drop("urban_runoff", inplace=True, axis=1)

return df


def basintimeschema_migration(df: DataFrame) -> DataFrame:
if "urban_runoff" in df.columns:
warnings.warn("Migrating outdated Basin / Time table.", UserWarning)
df.drop("urban_runoff", inplace=True, axis=1)

return df


def continuouscontrolvariableschema_migration(df: DataFrame) -> DataFrame:
if "listen_node_type" in df.columns:
warnings.warn(
"Migrating outdated ContinuousControl / Variable table.", UserWarning
)
df.drop("listen_node_type", inplace=True, axis=1)

return df


def discretecontrolvariableschema_migration(df: DataFrame) -> DataFrame:
if "listen_node_type" in df.columns:
warnings.warn(
"Migrating outdated DiscreteControl / Variable table.", UserWarning
)
df.drop("listen_node_type", inplace=True, axis=1)

return df


def pidcontrolstaticschema_migration(df: DataFrame) -> DataFrame:
if "listen_node_type" in df.columns:
warnings.warn("Migrating outdated PidControl / Static table.", UserWarning)
df.drop("listen_node_type", inplace=True, axis=1)

return df
5 changes: 5 additions & 0 deletions python/ribasim/ribasim/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
Terminal,
UserDemand,
)
from ribasim.db_utils import _set_db_schema_version
from ribasim.geometry.edge import EdgeSchema, EdgeTable
from ribasim.geometry.node import NodeTable
from ribasim.input_base import (
Expand Down Expand Up @@ -180,12 +181,16 @@ def _save(self, directory: DirectoryPath, input_dir: DirectoryPath):
db_path.parent.mkdir(parents=True, exist_ok=True)
db_path.unlink(missing_ok=True)
context_file_writing.get()["database"] = db_path

self.edge._save(directory, input_dir)
node = self.node_table()

assert node.df is not None
node._save(directory, input_dir)

# Run after geopackage schema has been created
_set_db_schema_version(db_path, ribasim.__schema_version__)

for sub in self._nodes():
sub._save(directory, input_dir)

Expand Down
11 changes: 11 additions & 0 deletions python/ribasim/ribasim/schemas.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# Automatically generated file. Do not modify.

from typing import Any, Callable

import pandera as pa
from pandera.dtypes import Int32, Timestamp
from pandera.typing import Index, Series

from ribasim import migrations


class _BaseSchema(pa.DataFrameModel):
class Config:
Expand All @@ -14,6 +18,13 @@ class Config:
def _index_name(self) -> str:
return "fid"

@classmethod
def migrate(cls, df: Any) -> Any:
f: Callable[[Any], Any] = getattr(
migrations, str(cls.__name__).lower() + "_migration", lambda x: x
)
return f(df)


class BasinConcentrationExternalSchema(_BaseSchema):
fid: Index[Int32] = pa.Field(default=1, check_name=True, coerce=True)
Expand Down
Loading

0 comments on commit 8d89e74

Please sign in to comment.