Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc: Remove pykdtree dependency, codecov config, SPEC 0 tooling and other changes #1666

Merged
merged 11 commits into from
Aug 22, 2024
29 changes: 29 additions & 0 deletions .github/workflows/check-min-deps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: CI Additional
on:
workflow_dispatch: # allows you to trigger manually
schedule:
- cron: "0 0 1 */3 *" # Run every 3 months

jobs:
min-version-policy:
name: Minimum Version Policy
runs-on: "ubuntu-latest"
defaults:
run:
shell: bash -l {0}

steps:
- uses: actions/checkout@v4
- name: Setup micromamba
uses: mamba-org/setup-micromamba@v1
with:
environment-name: min-deps
create-args: >-
python=3.12
pyyaml
conda
python-dateutil

- name: Core deps minimum versions policy
run: |
python ci/min_deps_check.py ci/min-core-deps.yml
26 changes: 26 additions & 0 deletions ci/min-core-deps.yml
VeckoTheGecko marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: xarray-tests
channels:
- conda-forge
- nodefaults
dependencies:
# MINIMUM VERSIONS POLICY: keep track of minimum versions
# for core packages. Dev and conda release builds should use this as reference.
# Run ci/min_deps_check.py to verify that this file respects the policy.
- python=3.10
- cftime=1.6
- cgen=2020.1
- dask=2022.8
- matplotlib-base=3.5
# netcdf follows a 1.major.minor[.patch] convention
# (see https://github.com/Unidata/netcdf4-python/issues/1090)
- netcdf4=1.6
- numpy=1.23
- platformdirs=2.5
- psutil=5.9
- pymbolic=2022.1
- pytest=7.1
- scipy=1.9
- trajan=0.1
- tqdm=4.64
- xarray=2022.6
- zarr=2.12
199 changes: 199 additions & 0 deletions ci/min_deps_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python
"""Fetch from conda database all available versions of dependencies and their
publication date. Compare it against requirements/min-core-deps.yml to verify the
policy on obsolete dependencies is being followed. Print a pretty report :)

Adapted from xarray:
https://github.com/pydata/xarray/blob/a04d857a03d1fb04317d636a7f23239cb9034491/ci/min_deps_check.py
"""

from __future__ import annotations

import itertools
import sys
from collections.abc import Iterator
from datetime import datetime

import conda.api # type: ignore[import]
import yaml
from dateutil.relativedelta import relativedelta

CHANNELS = ["conda-forge", "defaults"]
IGNORE_DEPS = {}

POLICY_MONTHS = {"python": 3 * 12}
POLICY_MONTHS_DEFAULT = 24
POLICY_OVERRIDE: dict[str, tuple[int, int]] = {}
errors = []


def error(msg: str) -> None:
global errors
errors.append(msg)
print("ERROR:", msg)


def warning(msg: str) -> None:
print("WARNING:", msg)


def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]:
"""Load requirements/min-all-deps.yml

Yield (package name, major version, minor version, [patch version])
"""
global errors

with open(fname) as fh:
contents = yaml.safe_load(fh)
for row in contents["dependencies"]:
if isinstance(row, dict) and list(row) == ["pip"]:
continue
pkg, eq, version = row.partition("=")
if pkg.rstrip("<>") in IGNORE_DEPS:
continue
if pkg.endswith("<") or pkg.endswith(">") or eq != "=":
error("package should be pinned with exact version: " + row)
continue

try:
version_tup = tuple(int(x) for x in version.split("."))
except ValueError:
raise ValueError("non-numerical version: " + row)

if len(version_tup) == 2:
yield (pkg, *version_tup, None) # type: ignore[misc]
elif len(version_tup) == 3:
yield (pkg, *version_tup) # type: ignore[misc]
else:
raise ValueError("expected major.minor or major.minor.patch: " + row)


def query_conda(pkg: str) -> dict[tuple[int, int], datetime]:
"""Query the conda repository for a specific package

Return map of {(major version, minor version): publication date}
"""

def metadata(entry):
version = entry.version

time = datetime.fromtimestamp(entry.timestamp)
major, minor = map(int, version.split(".")[:2])

return (major, minor), time

raw_data = conda.api.SubdirData.query_all(pkg, channels=CHANNELS)
data = sorted(metadata(entry) for entry in raw_data if entry.timestamp != 0)

release_dates = {
version: [time for _, time in group if time is not None]
for version, group in itertools.groupby(data, key=lambda x: x[0])
}
out = {version: min(dates) for version, dates in release_dates.items() if dates}

# Hardcoded fix to work around incorrect dates in conda
if pkg == "python":
out.update(
{
(2, 7): datetime(2010, 6, 3),
(3, 5): datetime(2015, 9, 13),
(3, 6): datetime(2016, 12, 23),
(3, 7): datetime(2018, 6, 27),
(3, 8): datetime(2019, 10, 14),
(3, 9): datetime(2020, 10, 5),
(3, 10): datetime(2021, 10, 4),
(3, 11): datetime(2022, 10, 24),
}
)

return out


def process_pkg(pkg: str, req_major: int, req_minor: int, req_patch: int | None) -> tuple[str, str, str, str, str, str]:
"""Compare package version from requirements file to available versions in conda.
Return row to build pandas dataframe:

- package name
- major.minor.[patch] version in requirements file
- publication date of version in requirements file (YYYY-MM-DD)
- major.minor version suggested by policy
- publication date of version suggested by policy (YYYY-MM-DD)
- status ("<", "=", "> (!)")
"""
print(f"Analyzing {pkg}...")
versions = query_conda(pkg)

try:
req_published = versions[req_major, req_minor]
except KeyError:
error("not found in conda: " + pkg)
return pkg, fmt_version(req_major, req_minor, req_patch), "-", "-", "-", "(!)"

policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT)
policy_published = datetime.now() - relativedelta(months=policy_months)

filtered_versions = [version for version, published in versions.items() if published < policy_published]
policy_major, policy_minor = max(filtered_versions, default=(req_major, req_minor))

try:
policy_major, policy_minor = POLICY_OVERRIDE[pkg]
except KeyError:
pass
policy_published_actual = versions[policy_major, policy_minor]

if (req_major, req_minor) < (policy_major, policy_minor):
status = "<"
elif (req_major, req_minor) > (policy_major, policy_minor):
status = "> (!)"
delta = relativedelta(datetime.now(), req_published).normalized()
n_months = delta.years * 12 + delta.months
warning(
f"Package is too new: {pkg}={req_major}.{req_minor} was "
f"published on {req_published:%Y-%m-%d} "
f"which was {n_months} months ago (policy is {policy_months} months)"
)
else:
status = "="

if req_patch is not None:
warning("patch version should not appear in requirements file: " + pkg)
status += " (w)"

return (
pkg,
fmt_version(req_major, req_minor, req_patch),
req_published.strftime("%Y-%m-%d"),
fmt_version(policy_major, policy_minor),
policy_published_actual.strftime("%Y-%m-%d"),
status,
)


def fmt_version(major: int, minor: int, patch: int | None = None) -> str:
if patch is None:
return f"{major}.{minor}"
else:
return f"{major}.{minor}.{patch}"


def main() -> None:
fname = sys.argv[1]
rows = [process_pkg(pkg, major, minor, patch) for pkg, major, minor, patch in parse_requirements(fname)]

print("\nPackage Required Policy Status")
print("----------------- -------------------- -------------------- ------")
fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}"
for row in rows:
print(fmt.format(*row))

if errors:
print("\nErrors:")
print("-------")
for i, e in enumerate(errors):
print(f"{i+1}. {e}")
sys.exit(1)


if __name__ == "__main__":
main()
8 changes: 7 additions & 1 deletion codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ coverage:
patch:
default:
informational: true
comment: false # Only on PR when theres a change in coverage
comment:
layout: " diff, flags, files"
behavior: default
require_changes: true
require_base: false
require_head: true
hide_project_coverage: true

# When modifying this file, please validate using
# curl -X POST --data-binary @codecov.yml https://codecov.io/validate
2 changes: 1 addition & 1 deletion docs/contributing.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Contributing to Parcels
======================
=======================

Why contribute?
---------------
Expand Down
2 changes: 1 addition & 1 deletion docs/documentation/index.rst
VeckoTheGecko marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Documentation and Tutorials
Parcels has several documentation and tutorial Jupyter notebooks and scripts which go through various aspects of Parcels. Static versions of the notebooks are available below via the gallery in the site, with the interactive notebooks being available either completely online at the following `Binder link <https://mybinder.org/v2/gh/OceanParcels/parcels/master?labpath=docs%2Fexamples%2Fparcels_tutorial.ipynb>`_. Following the gallery of notebooks is a list of scripts which provide additional examples to users. You can work with the example notebooks and scripts locally by downloading :download:`parcels_tutorials.zip </_downloads/parcels_tutorials.zip>` and running with your own Parcels installation.

.. warning::
When browsing/downloading the tutorials, it's important that you are using the documentation corresponding to the version of Parcels that you have installed. You can find which parcels version you have installed by doing ``import parcels`` followed by ``print(parcels.__version__)``. If you don't want to use the latest version of Parcels, you can browse prior versions of the documentation by using the version switcher in the bottom right of this page.
In v3.1.0 we updated kernels in the tutorials to use ``parcels.ParcelsRandom`` instead of ``from parcels import ParcelsRandom``. Due to our C-conversion code, using ``parcels.ParcelsRandom`` only works with v3.1.0+. When browsing/downloading the tutorials, it's important that you are using the documentation corresponding to the version of Parcels that you have installed. You can find which parcels version you have installed by doing ``import parcels`` followed by ``print(parcels.__version__)``. If you don't want to use the latest version of Parcels, you can browse prior versions of the documentation by using the version switcher in the bottom right of this page.

.. nbgallery::
:caption: Overview
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/tutorial_nemo_curvilinear.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@
"source": [
"## Speeding up `ParticleSet` initialisation by efficiently finding particle start-locations on the `Grid`\n",
"\n",
"On a Curvilinear grid, determining the location of each `Particle` on the grid is more complicated and therefore takes longer than on a Rectilinear grid. Since Parcels version 2.2.2, a function is available on the `ParticleSet` class, that speeds up the look-up. After creating the `ParticleSet`, but before running the `ParticleSet.execute()`, simply call the function `ParticleSet.populate_indices()`. Note that this only works if you have the [pykdtree](https://anaconda.org/conda-forge/pykdtree) package installed, which is only included in the Parcels dependencies in version >= 2.2.2\n"
"On a Curvilinear grid, determining the location of each `Particle` on the grid is more complicated and therefore takes longer than on a Rectilinear grid. Since Parcels version 2.2.2, a function is available on the `ParticleSet` class, that speeds up the look-up. After creating the `ParticleSet`, but before running the `ParticleSet.execute()`, simply call the function `ParticleSet.populate_indices()`.\n"
]
},
{
Expand Down
6 changes: 4 additions & 2 deletions docs/examples/tutorial_output.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,11 @@
],
"source": [
"np.set_printoptions(linewidth=160)\n",
"ns_per_hour = np.timedelta64(1, \"h\") # nanoseconds in an hour\n",
"one_hour = np.timedelta64(1, \"h\") # Define timedelta object to help with conversion\n",
"\n",
"print(data_xarray[\"time\"].data.compute() / ns_per_hour) # time is stored in nanoseconds"
"print(\n",
" data_xarray[\"time\"].data.compute() / one_hour\n",
") # timedelta / timedelta -> float number of hours"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ If you need more help with Parcels, try the `Discussions page on GitHub <https:/
Installation <installation>
Tutorials & Documentation <documentation/index>
API reference <reference>
Release Notes <https://github.com/OceanParcels/Parcels/releases>
Contributing <contributing>
OceanParcels website <https://oceanparcels.org/>
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ dependencies:
- dask>=2.0
- nbval
- scikit-learn
- pykdtree
- zarr>=2.11.0,!=2.18.0

# Formatting
Expand Down
44 changes: 19 additions & 25 deletions parcels/particleset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@
import cftime
import numpy as np
import xarray as xr
from scipy.spatial import KDTree
from tqdm import tqdm

try:
from mpi4py import MPI
except ModuleNotFoundError:
MPI = None

try:
from pykdtree.kdtree import KDTree
except ModuleNotFoundError:
KDTree = None

from parcels.application_kernels.advection import AdvectionRK4
from parcels.compilation.codecompiler import GNUCompiler
Expand Down Expand Up @@ -446,33 +443,30 @@ def neighbors_by_coor(self, coor):
neighbor_ids = self.particledata.data["id"][neighbor_idx]
return neighbor_ids

# TODO: This method is only tested in tutorial notebook. Add unit test?
def populate_indices(self):
"""Pre-populate guesses of particle xi/yi indices using a kdtree.

This is only intended for curvilinear grids, where the initial index search
may be quite expensive.
"""
if KDTree is None:
logger.warning("KDTree is not installed, pre-populated guesses are not indexed")
return
else:
for i, grid in enumerate(self.fieldset.gridset.grids):
if not isinstance(grid, CurvilinearGrid):
continue

tree_data = np.stack((grid.lon.flat, grid.lat.flat), axis=-1)
IN = np.all(~np.isnan(tree_data), axis=1)
tree = KDTree(tree_data[IN, :])
# stack all the particle positions for a single query
pts = np.stack((self.particledata.data["lon"], self.particledata.data["lat"]), axis=-1)
# query datatype needs to match tree datatype
_, idx_nan = tree.query(pts.astype(tree_data.dtype))

idx = np.where(IN)[0][idx_nan]
yi, xi = np.unravel_index(idx, grid.lon.shape)

self.particledata.data["xi"][:, i] = xi
self.particledata.data["yi"][:, i] = yi
for i, grid in enumerate(self.fieldset.gridset.grids):
if not isinstance(grid, CurvilinearGrid):
continue

tree_data = np.stack((grid.lon.flat, grid.lat.flat), axis=-1)
IN = np.all(~np.isnan(tree_data), axis=1)
tree = KDTree(tree_data[IN, :])
# stack all the particle positions for a single query
pts = np.stack((self.particledata.data["lon"], self.particledata.data["lat"]), axis=-1)
# query datatype needs to match tree datatype
_, idx_nan = tree.query(pts.astype(tree_data.dtype))

idx = np.where(IN)[0][idx_nan]
yi, xi = np.unravel_index(idx, grid.lon.shape)

self.particledata.data["xi"][:, i] = xi
self.particledata.data["yi"][:, i] = yi

@classmethod
def from_list(
Expand Down
Loading
Loading