Skip to content

Commit

Permalink
Models API (#190)
Browse files Browse the repository at this point in the history
First draft of #134. API dynamically generated from a pooch registry. Alongside the registry a TOML file mapping model names to the set of model input files. These can be built and distributed with the package at release time. Then model input files can be downloaded at runtime.

This approach gives us the flexibility to support input files stored anywhere on GitHub, e.g. in the test models repos. It does not provide access to models defined with flopy, since this package does not depend on flopy.
  • Loading branch information
wpbonelli authored Mar 5, 2025
1 parent 835de1c commit e64ce4c
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 3 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,6 @@ data_backup
autotest/temp/

# uv lockfile
uv.lock
uv.lock

modflow_devtools/data
30 changes: 30 additions & 0 deletions docs/md/models.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Models API

The `modflow_devtools.models` module provides programmatic access to MODFLOW 6 example models via [Pooch](https://www.fatiando.org/pooch/latest/index.html).

When the module is imported, it checks for the existence of the registry in models files. If they are found, it loads the registry and dynamically generates functions for each model, attaching them to the module namespace.

Each function returns a list of files. Example usage:

```python
import modflow_devtools.models as models

files = models.some_model()
```

## Developers

The `make_registry.py` script is responsible for generating a registry text file and a mapping between files and models. This script should be run in the CI pipeline at release time before the package is built. The generated registry file and model mapping are used to create a pooch instance for fetching model files, and should be distributed with the package.

The script can be executed with `python -m modflow_devtools.make_registry` and accepts the following options:

- `--path` or `-p`: Specifies the directory containing model directories. If not provided, the default path is used.
- `--append` or `-a`: If specified, the script will append to the existing registry file instead of overwriting it.
- `--base-url` or `-b`: Specifies the base URL for the registry file. If not provided, the default base URL is used.

For example, to create a registry of models in the MF6 test models repositories, each of which is checked out in the current working directory:

```shell
python -m modflow_devtools.make_registry -p modflow6-testmodels -b https://github.com/MODFLOW-ORG/modflow6-testmodels/raw/master
python -m modflow_devtools.make_registry -p modflow6-largetestmodels -a -b https://github.com/MODFLOW-ORG/modflow6-largetestmodels/raw/master
```
90 changes: 90 additions & 0 deletions modflow_devtools/make_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import argparse
import hashlib
from pathlib import Path

import tomli_w as tomli

from modflow_devtools.misc import get_model_paths

REPO_OWNER = "MODFLOW-ORG"
REPO_NAME = "modflow-devtools"
REPO_REF = "develop"
PROJ_ROOT = Path(__file__).parents[1]
DATA_RELPATH = "data"
DATA_PATH = PROJ_ROOT / REPO_NAME / DATA_RELPATH
REGISTRY_PATH = DATA_PATH / "registry.txt"
MODELS_PATH = DATA_PATH / "models.toml"
BASE_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}/raw/{REPO_REF}/{DATA_RELPATH}/"


def _sha256(path: Path) -> str:
"""
Compute the SHA256 hash of the given file.
Reference: https://stackoverflow.com/a/44873382/6514033
"""
h = hashlib.sha256()
b = bytearray(128 * 1024)
mv = memoryview(b)
with path.open("rb", buffering=0) as f:
for n in iter(lambda: f.readinto(mv), 0):
h.update(mv[:n])
return h.hexdigest()


def write_registry(
path: Path, registry_path: Path, base_url: str, append: bool = False
):
if not registry_path.exists():
registry_path.parent.mkdir(parents=True, exist_ok=True)

models = {}
exclude = [".DS_Store"]
with registry_path.open("a+" if append else "w") as f:
if not path.is_dir():
raise NotADirectoryError(f"Path {path} is not a directory.")
for mp in get_model_paths(path):
for p in mp.rglob("*"):
if "compare" in str(p):
continue
if p.is_file() and not any(e in p.name for e in exclude):
relpath = p.relative_to(path)
name = str(relpath).replace("/", "_").replace("-", "_")
hash = _sha256(p)
url = f"{base_url}/{relpath!s}"
line = f"{name} {hash} {url}"
f.write(line + "\n")
key = str(relpath.parent).replace("/", "_").replace("-", "_")
if key not in models:
models[key] = []
models[key].append(name)

models_path = registry_path.parent / "models.toml"
with models_path.open("ab+" if append else "wb") as mf:
tomli.dump(models, mf)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert DFN files to TOML.")
parser.add_argument(
"--path",
"-p",
type=str,
help="Directory containing model directories.",
)
parser.add_argument(
"--append",
"-a",
action="store_true",
help="Append to the registry file instead of overwriting.",
)
parser.add_argument(
"--base-url",
"-b",
type=str,
help="Base URL for the registry file.",
)
args = parser.parse_args()
path = Path(args.path) if args.path else DATA_PATH
base_url = args.base_url if args.base_url else BASE_URL

write_registry(path, REGISTRY_PATH, base_url, args.append)
49 changes: 49 additions & 0 deletions modflow_devtools/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from pathlib import Path

import pooch
import tomli

import modflow_devtools

REPO_OWNER = "MODFLOW-ORG"
REPO_NAME = "modflow-devtools"
REPO_REF = "develop"
PROJ_ROOT = Path(__file__).parents[1]
DATA_RELPATH = "data"
DATA_PATH = PROJ_ROOT / REPO_NAME / DATA_RELPATH
REGISTRY_PATH = DATA_PATH / "registry.txt"
MODELS_PATH = DATA_PATH / "models.toml"
BASE_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}/raw/{REPO_REF}/{DATA_RELPATH}/"
VERSION = modflow_devtools.__version__.rpartition(".dev")[0]
FETCHER = pooch.create(
path=pooch.os_cache(REPO_NAME),
base_url=BASE_URL,
version=VERSION,
registry=None,
)

if not REGISTRY_PATH.exists():
raise FileNotFoundError(f"Registry file {REGISTRY_PATH} not found.")

if not MODELS_PATH.exists():
raise FileNotFoundError(f"Models file {MODELS_PATH} not found.")

FETCHER.load_registry(REGISTRY_PATH)


def _generate_function(model_name: str, files: list) -> callable:
def model_function() -> list:
return [FETCHER.fetch(file) for file in files]

model_function.__name__ = model_name
return model_function


def _make_functions(models_path: Path, registry_path: Path):
with models_path.open("rb") as f:
models = tomli.load(f)
for model_name, files in models.items():
globals()[model_name] = _generate_function(model_name, files)


_make_functions(MODELS_PATH, REGISTRY_PATH)
15 changes: 13 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ dfn = [
"tomli",
"tomli-w"
]
dev = ["modflow-devtools[lint,test,docs,dfn]"]
models = [
"pooch",
"tomli",
"tomli-w"
]
dev = ["modflow-devtools[lint,test,docs,dfn,models]"]

[dependency-groups]
build = [
Expand Down Expand Up @@ -111,12 +116,18 @@ dfn = [
"tomli",
"tomli-w"
]
models = [
"pooch",
"tomli",
"tomli-w"
]
dev = [
{include-group = "build"},
{include-group = "lint"},
{include-group = "test"},
{include-group = "docs"},
{include-group = "dfn"}
{include-group = "dfn"},
{include-group = "models"},
]

[project.urls]
Expand Down

0 comments on commit e64ce4c

Please sign in to comment.