diff --git a/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.md rename to .github/CODE_OF_CONDUCT.md diff --git a/CONTRIBUTING.md b/.github/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING.md rename to .github/CONTRIBUTING.md diff --git a/.github/ci/min-core-deps.yml b/.github/ci/min-core-deps.yml new file mode 100644 index 000000000..ce9d2a5a2 --- /dev/null +++ b/.github/ci/min-core-deps.yml @@ -0,0 +1,26 @@ +name: xarray-tests +channels: + - conda-forge + - nodefaults +dependencies: + # MINIMUM VERSIONS POLICY: keep track of minimum versions + # for core packages. Dev and conda release builds should use this as reference. + # Run ci/min_deps_check.py to verify that this file respects the policy. + - python=3.10 + - cftime=1.6 + - cgen=2020.1 + - dask=2022.8 + - matplotlib-base=3.5 + # netcdf follows a 1.major.minor[.patch] convention + # (see https://github.com/Unidata/netcdf4-python/issues/1090) + - netcdf4=1.6 + - numpy=1.23 + - platformdirs=2.5 + - psutil=5.9 + - pymbolic=2022.1 + - pytest=7.1 + - scipy=1.9 + - trajan=0.1 + - tqdm=4.64 + - xarray=2022.6 + - zarr=2.12 diff --git a/.github/ci/min_deps_check.py b/.github/ci/min_deps_check.py new file mode 100644 index 000000000..9fe8eaa39 --- /dev/null +++ b/.github/ci/min_deps_check.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +"""Fetch from conda database all available versions of dependencies and their +publication date. Compare it against requirements/min-core-deps.yml to verify the +policy on obsolete dependencies is being followed. Print a pretty report :) + +Adapted from xarray: +https://github.com/pydata/xarray/blob/a04d857a03d1fb04317d636a7f23239cb9034491/ci/min_deps_check.py +""" + +from __future__ import annotations + +import itertools +import sys +from collections.abc import Iterator +from datetime import datetime + +import conda.api # type: ignore[import] +import yaml +from dateutil.relativedelta import relativedelta + +CHANNELS = ["conda-forge", "defaults"] +IGNORE_DEPS = {} + +POLICY_MONTHS = {"python": 3 * 12} +POLICY_MONTHS_DEFAULT = 24 +POLICY_OVERRIDE: dict[str, tuple[int, int]] = {} +errors = [] + + +def error(msg: str) -> None: + global errors + errors.append(msg) + print("ERROR:", msg) + + +def warning(msg: str) -> None: + print("WARNING:", msg) + + +def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]: + """Load requirements/min-all-deps.yml + + Yield (package name, major version, minor version, [patch version]) + """ + global errors + + with open(fname) as fh: + contents = yaml.safe_load(fh) + for row in contents["dependencies"]: + if isinstance(row, dict) and list(row) == ["pip"]: + continue + pkg, eq, version = row.partition("=") + if pkg.rstrip("<>") in IGNORE_DEPS: + continue + if pkg.endswith("<") or pkg.endswith(">") or eq != "=": + error("package should be pinned with exact version: " + row) + continue + + try: + version_tup = tuple(int(x) for x in version.split(".")) + except ValueError: + raise ValueError("non-numerical version: " + row) + + if len(version_tup) == 2: + yield (pkg, *version_tup, None) # type: ignore[misc] + elif len(version_tup) == 3: + yield (pkg, *version_tup) # type: ignore[misc] + else: + raise ValueError("expected major.minor or major.minor.patch: " + row) + + +def query_conda(pkg: str) -> dict[tuple[int, int], datetime]: + """Query the conda repository for a specific package + + Return map of {(major version, minor version): publication date} + """ + + def metadata(entry): + version = entry.version + + time = datetime.fromtimestamp(entry.timestamp) + major, minor = map(int, version.split(".")[:2]) + + return (major, minor), time + + raw_data = conda.api.SubdirData.query_all(pkg, channels=CHANNELS) + data = sorted(metadata(entry) for entry in raw_data if entry.timestamp != 0) + + release_dates = { + version: [time for _, time in group if time is not None] + for version, group in itertools.groupby(data, key=lambda x: x[0]) + } + out = {version: min(dates) for version, dates in release_dates.items() if dates} + + # Hardcoded fix to work around incorrect dates in conda + if pkg == "python": + out.update( + { + (2, 7): datetime(2010, 6, 3), + (3, 5): datetime(2015, 9, 13), + (3, 6): datetime(2016, 12, 23), + (3, 7): datetime(2018, 6, 27), + (3, 8): datetime(2019, 10, 14), + (3, 9): datetime(2020, 10, 5), + (3, 10): datetime(2021, 10, 4), + (3, 11): datetime(2022, 10, 24), + } + ) + + return out + + +def process_pkg(pkg: str, req_major: int, req_minor: int, req_patch: int | None) -> tuple[str, str, str, str, str, str]: + """Compare package version from requirements file to available versions in conda. + Return row to build pandas dataframe: + + - package name + - major.minor.[patch] version in requirements file + - publication date of version in requirements file (YYYY-MM-DD) + - major.minor version suggested by policy + - publication date of version suggested by policy (YYYY-MM-DD) + - status ("<", "=", "> (!)") + """ + print(f"Analyzing {pkg}...") + versions = query_conda(pkg) + + try: + req_published = versions[req_major, req_minor] + except KeyError: + error("not found in conda: " + pkg) + return pkg, fmt_version(req_major, req_minor, req_patch), "-", "-", "-", "(!)" + + policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT) + policy_published = datetime.now() - relativedelta(months=policy_months) + + filtered_versions = [version for version, published in versions.items() if published < policy_published] + policy_major, policy_minor = max(filtered_versions, default=(req_major, req_minor)) + + try: + policy_major, policy_minor = POLICY_OVERRIDE[pkg] + except KeyError: + pass + policy_published_actual = versions[policy_major, policy_minor] + + if (req_major, req_minor) < (policy_major, policy_minor): + status = "<" + elif (req_major, req_minor) > (policy_major, policy_minor): + status = "> (!)" + delta = relativedelta(datetime.now(), req_published).normalized() + n_months = delta.years * 12 + delta.months + warning( + f"Package is too new: {pkg}={req_major}.{req_minor} was " + f"published on {req_published:%Y-%m-%d} " + f"which was {n_months} months ago (policy is {policy_months} months)" + ) + else: + status = "=" + + if req_patch is not None: + warning("patch version should not appear in requirements file: " + pkg) + status += " (w)" + + return ( + pkg, + fmt_version(req_major, req_minor, req_patch), + req_published.strftime("%Y-%m-%d"), + fmt_version(policy_major, policy_minor), + policy_published_actual.strftime("%Y-%m-%d"), + status, + ) + + +def fmt_version(major: int, minor: int, patch: int | None = None) -> str: + if patch is None: + return f"{major}.{minor}" + else: + return f"{major}.{minor}.{patch}" + + +def main() -> None: + fname = sys.argv[1] + rows = [process_pkg(pkg, major, minor, patch) for pkg, major, minor, patch in parse_requirements(fname)] + + print("\nPackage Required Policy Status") + print("----------------- -------------------- -------------------- ------") + fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}" + for row in rows: + print(fmt.format(*row)) + + if errors: + print("\nErrors:") + print("-------") + for i, e in enumerate(errors): + print(f"{i+1}. {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/check-min-deps.yml b/.github/workflows/check-min-deps.yml new file mode 100644 index 000000000..56fb5cf7b --- /dev/null +++ b/.github/workflows/check-min-deps.yml @@ -0,0 +1,29 @@ +name: CI Additional +on: + workflow_dispatch: # allows you to trigger manually + schedule: + - cron: "0 0 1 */3 *" # Run every 3 months + +jobs: + min-version-policy: + name: Minimum Version Policy + runs-on: "ubuntu-latest" + defaults: + run: + shell: bash -l {0} + + steps: + - uses: actions/checkout@v4 + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-name: min-deps + create-args: >- + python=3.12 + pyyaml + conda + python-dateutil + + - name: Core deps minimum versions policy + run: | + python .github/ci/min_deps_check.py .github/ci/min-core-deps.yml diff --git a/codecov.yml b/codecov.yml index 6294560a4..3ba50f502 100644 --- a/codecov.yml +++ b/codecov.yml @@ -6,7 +6,13 @@ coverage: patch: default: informational: true -comment: false # Only on PR when theres a change in coverage +comment: + layout: " diff, flags, files" + behavior: default + require_changes: true + require_base: false + require_head: true + hide_project_coverage: true # When modifying this file, please validate using # curl -X POST --data-binary @codecov.yml https://codecov.io/validate diff --git a/docs/contributing.rst b/docs/contributing.rst index bb442eaca..04fce51d7 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -1,5 +1,5 @@ Contributing to Parcels -====================== +======================= Why contribute? --------------- diff --git a/docs/documentation/index.rst b/docs/documentation/index.rst index c58974c57..20fe5a60d 100644 --- a/docs/documentation/index.rst +++ b/docs/documentation/index.rst @@ -4,7 +4,7 @@ Documentation and Tutorials Parcels has several documentation and tutorial Jupyter notebooks and scripts which go through various aspects of Parcels. Static versions of the notebooks are available below via the gallery in the site, with the interactive notebooks being available either completely online at the following `Binder link `_. Following the gallery of notebooks is a list of scripts which provide additional examples to users. You can work with the example notebooks and scripts locally by downloading :download:`parcels_tutorials.zip ` and running with your own Parcels installation. .. warning:: - When browsing/downloading the tutorials, it's important that you are using the documentation corresponding to the version of Parcels that you have installed. You can find which parcels version you have installed by doing ``import parcels`` followed by ``print(parcels.__version__)``. If you don't want to use the latest version of Parcels, you can browse prior versions of the documentation by using the version switcher in the bottom right of this page. + In v3.1.0 we updated kernels in the tutorials to use ``parcels.ParcelsRandom`` instead of ``from parcels import ParcelsRandom``. Due to our C-conversion code, using ``parcels.ParcelsRandom`` only works with v3.1.0+. When browsing/downloading the tutorials, it's important that you are using the documentation corresponding to the version of Parcels that you have installed. You can find which parcels version you have installed by doing ``import parcels`` followed by ``print(parcels.__version__)``. If you don't want to use the latest version of Parcels, you can browse prior versions of the documentation by using the version switcher in the bottom right of this page. .. nbgallery:: :caption: Overview diff --git a/docs/examples/tutorial_nemo_curvilinear.ipynb b/docs/examples/tutorial_nemo_curvilinear.ipynb index 065c96dd4..0b4b807e2 100644 --- a/docs/examples/tutorial_nemo_curvilinear.ipynb +++ b/docs/examples/tutorial_nemo_curvilinear.ipynb @@ -226,7 +226,7 @@ "source": [ "## Speeding up `ParticleSet` initialisation by efficiently finding particle start-locations on the `Grid`\n", "\n", - "On a Curvilinear grid, determining the location of each `Particle` on the grid is more complicated and therefore takes longer than on a Rectilinear grid. Since Parcels version 2.2.2, a function is available on the `ParticleSet` class, that speeds up the look-up. After creating the `ParticleSet`, but before running the `ParticleSet.execute()`, simply call the function `ParticleSet.populate_indices()`. Note that this only works if you have the [pykdtree](https://anaconda.org/conda-forge/pykdtree) package installed, which is only included in the Parcels dependencies in version >= 2.2.2\n" + "On a Curvilinear grid, determining the location of each `Particle` on the grid is more complicated and therefore takes longer than on a Rectilinear grid. Since Parcels version 2.2.2, a function is available on the `ParticleSet` class, that speeds up the look-up. After creating the `ParticleSet`, but before running the `ParticleSet.execute()`, simply call the function `ParticleSet.populate_indices()`.\n" ] }, { diff --git a/docs/examples/tutorial_output.ipynb b/docs/examples/tutorial_output.ipynb index f98e6972c..db5f62586 100644 --- a/docs/examples/tutorial_output.ipynb +++ b/docs/examples/tutorial_output.ipynb @@ -190,9 +190,11 @@ ], "source": [ "np.set_printoptions(linewidth=160)\n", - "ns_per_hour = np.timedelta64(1, \"h\") # nanoseconds in an hour\n", + "one_hour = np.timedelta64(1, \"h\") # Define timedelta object to help with conversion\n", "\n", - "print(data_xarray[\"time\"].data.compute() / ns_per_hour) # time is stored in nanoseconds" + "print(\n", + " data_xarray[\"time\"].data.compute() / one_hour\n", + ") # timedelta / timedelta -> float number of hours" ] }, { @@ -227,7 +229,7 @@ " np.sqrt(np.square(np.diff(x)) + np.square(np.diff(y))), axis=1\n", ") # d = (dx^2 + dy^2)^(1/2)\n", "\n", - "real_time = data_xarray[\"time\"] / ns_per_hour # convert time to hours\n", + "real_time = data_xarray[\"time\"] / one_hour # convert time to hours\n", "time_since_release = (\n", " real_time.values.transpose() - real_time.values[:, 0]\n", ") # substract the initial time from each timeseries" diff --git a/docs/index.rst b/docs/index.rst index 2e0fcc50a..0fe54efa0 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,5 +19,6 @@ If you need more help with Parcels, try the `Discussions page on GitHub Tutorials & Documentation API reference + Release Notes Contributing OceanParcels website diff --git a/environment.yml b/environment.yml index dd1101cc6..e244acca1 100644 --- a/environment.yml +++ b/environment.yml @@ -22,7 +22,6 @@ dependencies: - dask>=2.0 - nbval - scikit-learn - - pykdtree - zarr>=2.11.0,!=2.18.0 # Formatting diff --git a/parcels/particleset.py b/parcels/particleset.py index fc4462e14..c4007cf20 100644 --- a/parcels/particleset.py +++ b/parcels/particleset.py @@ -6,6 +6,7 @@ import cftime import numpy as np import xarray as xr +from scipy.spatial import KDTree from tqdm import tqdm try: @@ -13,10 +14,6 @@ except ModuleNotFoundError: MPI = None -try: - from pykdtree.kdtree import KDTree -except ModuleNotFoundError: - KDTree = None from parcels.application_kernels.advection import AdvectionRK4 from parcels.compilation.codecompiler import GNUCompiler @@ -446,33 +443,30 @@ def neighbors_by_coor(self, coor): neighbor_ids = self.particledata.data["id"][neighbor_idx] return neighbor_ids + # TODO: This method is only tested in tutorial notebook. Add unit test? def populate_indices(self): """Pre-populate guesses of particle xi/yi indices using a kdtree. This is only intended for curvilinear grids, where the initial index search may be quite expensive. """ - if KDTree is None: - logger.warning("KDTree is not installed, pre-populated guesses are not indexed") - return - else: - for i, grid in enumerate(self.fieldset.gridset.grids): - if not isinstance(grid, CurvilinearGrid): - continue - - tree_data = np.stack((grid.lon.flat, grid.lat.flat), axis=-1) - IN = np.all(~np.isnan(tree_data), axis=1) - tree = KDTree(tree_data[IN, :]) - # stack all the particle positions for a single query - pts = np.stack((self.particledata.data["lon"], self.particledata.data["lat"]), axis=-1) - # query datatype needs to match tree datatype - _, idx_nan = tree.query(pts.astype(tree_data.dtype)) - - idx = np.where(IN)[0][idx_nan] - yi, xi = np.unravel_index(idx, grid.lon.shape) - - self.particledata.data["xi"][:, i] = xi - self.particledata.data["yi"][:, i] = yi + for i, grid in enumerate(self.fieldset.gridset.grids): + if not isinstance(grid, CurvilinearGrid): + continue + + tree_data = np.stack((grid.lon.flat, grid.lat.flat), axis=-1) + IN = np.all(~np.isnan(tree_data), axis=1) + tree = KDTree(tree_data[IN, :]) + # stack all the particle positions for a single query + pts = np.stack((self.particledata.data["lon"], self.particledata.data["lat"]), axis=-1) + # query datatype needs to match tree datatype + _, idx_nan = tree.query(pts.astype(tree_data.dtype)) + + idx = np.where(IN)[0][idx_nan] + yi, xi = np.unravel_index(idx, grid.lon.shape) + + self.particledata.data["xi"][:, i] = xi + self.particledata.data["yi"][:, i] = yi @classmethod def from_list( diff --git a/pyproject.toml b/pyproject.toml index 7a9c8d63a..d42401fbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,8 @@ ignore = [ "RUF005", # Prefer `next(iter(variable.items()))` over single element slice "RUF015", - + # Use `X | Y` in `isinstance` (see https://github.com/home-assistant/core/issues/123850) + "UP038", # TODO: ignore for now (requires more work). Remove ignore once fixed # Missing docstring in public module