From 3fb4f9caa569895eb4578cdf673ba3f38b9bd47d Mon Sep 17 00:00:00 2001 From: MacPingu Date: Mon, 27 Nov 2023 07:42:59 +0100 Subject: [PATCH] Add regrid operator (#111) * add regrid operation * Updated tests to include fixture; got test_regrid running. * update conda env with clisops >= 0.12 * update requirements for clisops * added roocs-grids conda package * test with conda on github * added xesmf * fix errors in docs build * update rtd config * updated history * pep8 * enabled lint * test also on python 3.10 and 3.11 --------- Co-authored-by: ellesmith88 Co-authored-by: Ag Stephens --- .github/workflows/main.yml | 39 ++++++++------ daops/ops/regrid.py | 80 ++++++++++++++++++++++++++++ environment.yml | 10 ++-- requirements.txt | 9 ++-- tests/test_operations/test_regrid.py | 36 +++++++++++++ 5 files changed, 152 insertions(+), 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 79398d4..57654c6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -5,37 +5,46 @@ on: [ push, pull_request ] jobs: build: + name: Conda Build with Python${{ matrix.python-version }} runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11"] + defaults: + run: + shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install packages run: | sudo apt-get -y install pandoc - if: matrix.python-version == 3.8 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + if: matrix.python-version == 3.9 + - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} + uses: mamba-org/setup-micromamba@v1 with: - python-version: ${{ matrix.python-version }} + cache-downloads: true + cache-environment: true + environment-file: environment.yml + create-args: >- + conda + python=${{ matrix.python-version }} + - name: Conda and Mamba versions + run: | + conda --version + echo "micromamba: $(micromamba --version)" - name: Install dependencies run: | - python -m pip install --upgrade pip - # pip install flake8 black pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + python -m pip install --no-user --editable "." if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi - # pip install the package, command-line unit tests work - pip install --no-deps -e . - name: Lint with flake8 run: make lint - if: matrix.python-version == 3.8 + if: matrix.python-version == 3.9 # - name: Check formatting with black -# run: black --check --target-version py38 daops tests -# if: matrix.python-version == 3.8 +# run: black --check --target-version py39 daops tests +# if: matrix.python-version == 3.9 - name: Test with pytest run: | python -m pytest -v tests - name: Build docs 🏗️ run: make docs - if: matrix.python-version == 3.8 + if: matrix.python-version == 3.9 diff --git a/daops/ops/regrid.py b/daops/ops/regrid.py index e69de29..d255ade 100644 --- a/daops/ops/regrid.py +++ b/daops/ops/regrid.py @@ -0,0 +1,80 @@ +from clisops.ops.regrid import regrid as clisops_regrid +from roocs_utils.parameter import collection_parameter +from roocs_utils.parameter import dimension_parameter + +from daops.ops.base import Operation + +__all__ = [ + "regrid", +] + + +class Regrid(Operation): + def _resolve_params(self, collection, **params): + """ + Resolve the input parameters to `self.params` and parameterise + collection parameter and set to `self.collection`. + """ + # need to verify method, grid and adaptive masking threshold are correct format - parameters? + collection = collection_parameter.CollectionParameter(collection) + + self.collection = collection + self.params = { + "method": params.get("method"), + "adaptive_masking_threshold": params.get("adaptive_masking_threshold"), + "grid": params.get("grid"), + } + + def get_operation_callable(self): + return clisops_regrid + + +def regrid( + collection, + method="nn", + adaptive_masking_threshold=0.5, + grid="1deg", + output_dir=None, + output_type="netcdf", + split_method="time:auto", + file_namer="standard", + apply_fixes=True, +): + """ + Regrid input dataset according to specified method and output grid. + The adaptive masking threshold can also be specified. + + Parameters + ---------- + collection: Collection of datasets to process, sequence or string of comma separated dataset identifiers. + method: The method by which to regrid. + adaptive_masking_threshold: + grid: The desired output grid. + output_dir: str or path like object describing output directory for output files. + output_type: {"netcdf", "nc", "zarr", "xarray"} + split_method: {"time:auto"} + file_namer: {"standard", "simple"} + apply_fixes: Boolean. If True fixes will be applied to datasets if needed. Default is True. + + Returns + ------- + List of outputs in the selected type: a list of xarray Datasets or file paths. + + + Examples + -------- + | collection: ("cmip6.ukesm1.r1.gn.tasmax.v20200101",) + | method: "nn" + | adaptive_masking_threshold: 0.5 + | grid: "1deg" + | output_type: "netcdf" + | output_dir: "/cache/wps/procs/req0111" + | split_method: "time:auto" + | file_namer: "standard" + | apply_fixes: True + + """ + + result_set = Regrid(**locals()).calculate() + + return result_set diff --git a/environment.yml b/environment.yml index 0f82041..b53dd87 100644 --- a/environment.yml +++ b/environment.yml @@ -5,15 +5,17 @@ channels: - defaults dependencies: - pip - - numpy>=1.16 - - xarray>=0.15 + - numpy >=1.16 + - xarray >=0.21,<2023.3.0 # https://github.com/pydata/xarray/issues/7794 - dask - netcdf4 - - clisops>=0.10.0 + - clisops >=0.12.0 + - roocs-grids >=0.1.2 + - xesmf >=0.8.2 - elasticsearch>=7.9.1 - roocs-utils>=0.6.2 # logging - - loguru>=0.5.3 + - loguru >=0.5.3 # catalog - intake - fsspec diff --git a/requirements.txt b/requirements.txt index 004f005..67175ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,14 @@ numpy>=1.16 -xarray>=0.19 +xarray>=0.21 +xesmf>=0.8.2 dask[complete] cftime netcdf4 elasticsearch>=8.0.1 -clisops>=0.10.0 -roocs-utils>=0.6.2 +clisops>=0.12.0 +## clisops @ git+https://github.com/roocs/clisops.git@master#egg=clisops +roocs-utils>=0.6.4 +roocs_grids>=0.1.2 # logging loguru>=0.5.3 # catalog diff --git a/tests/test_operations/test_regrid.py b/tests/test_operations/test_regrid.py index e69de29..f48b5b5 100644 --- a/tests/test_operations/test_regrid.py +++ b/tests/test_operations/test_regrid.py @@ -0,0 +1,36 @@ +import os + +import pytest +import xarray as xr + + +# from daops.ops.regrid import regrid + +# TODO: remove when upgraded to new clisops version +# pytestmark = pytest.mark.xfail(reason="needs clisops>=0.12 with regrid operator") + +CMIP6_IDS = ["CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.Omon.tos.gn.v20190710"] + + +def _check_output_nc(result, fname="output_001.nc"): + assert fname in [os.path.basename(_) for _ in result.file_uris] + + +@pytest.mark.online +def test_regrid(tmpdir, load_esgf_test_data): + from daops.ops.regrid import regrid + + result = regrid( + CMIP6_IDS[0], + method="conservative", + adaptive_masking_threshold=0.5, + grid="1deg", + output_dir=tmpdir, + file_namer="simple", + apply_fixes=False, + ) + + _check_output_nc(result) + ds = xr.open_dataset(result.file_uris[0], use_cftime=True) + assert "time" in ds.dims + assert "tos" in ds