From 08a7ee42dfe7fc2e159be7da908df2f68c38515f Mon Sep 17 00:00:00 2001 From: Yang Date: Tue, 15 Aug 2023 10:53:08 +0200 Subject: [PATCH] Implement era5 land (#21) * add era5 land dataset * add demo notebook * import era5 land in init * update alma convention * move shared parser function to utils * add tests for utils * add test for era5 land dataset * remove unneccesary duplication check * Revert "remove unneccesary duplication check" This reverts commit d2242b6798987a671b8825d80cfe4214f34dc856. * era5land inherit from era5 * make a general ecmwf dataset and inherit for era5(land) * please mypy * move era5 land to era5.py * drop py3.8, fix circular import and please mypy with proper protocol usage * fix dataset import in notebooks * implement consistent zampy name for ecmwf datasets --- demo/era5-land_dataset_demo.ipynb | 1388 +++++++++++++++++ demo/era5_dataset_demo.ipynb | 96 +- demo/eth_dataset_demo.ipynb | 4 +- pyproject.toml | 6 +- src/zampy/conventions/ALMA.json | 8 + src/zampy/datasets/__init__.py | 13 +- src/zampy/datasets/catalog.py | 14 + src/zampy/datasets/dataset_protocol.py | 21 +- src/zampy/datasets/ecmwf_dataset.py | 150 ++ src/zampy/datasets/era5.py | 249 +-- src/zampy/datasets/eth_canopy_height.py | 28 +- src/zampy/datasets/utils.py | 103 +- src/zampy/datasets/validation.py | 5 +- src/zampy/utils/regrid.py | 3 +- tests/test_converter.py | 2 +- .../era5-land_air_temperature_1996-1.nc | Bin 0 -> 162496 bytes .../era5-land_dewpoint_temperature_1996-1.nc | Bin 0 -> 162496 bytes tests/test_data/era5-land/properties.json | 0 ...era5_eastward_component_of_wind_1996-1.nc} | Bin ...ra5_northward_component_of_wind_1996-1.nc} | Bin ....nc => era5_total_precipitation_1996-1.nc} | Bin tests/test_datasets/test_era5.py | 87 +- tests/test_datasets/test_era5_land.py | 143 ++ tests/test_recipes/generate_test_data.py | 4 +- tests/test_recipes/recipes/era5_recipe.yml | 2 +- tests/test_recipes/test_simple_recipe.py | 2 +- tests/test_utils.py | 116 +- tests/test_validation.py | 2 +- 28 files changed, 2048 insertions(+), 398 deletions(-) create mode 100644 demo/era5-land_dataset_demo.ipynb create mode 100644 src/zampy/datasets/catalog.py create mode 100644 src/zampy/datasets/ecmwf_dataset.py create mode 100644 tests/test_data/era5-land/era5-land_air_temperature_1996-1.nc create mode 100644 tests/test_data/era5-land/era5-land_dewpoint_temperature_1996-1.nc create mode 100644 tests/test_data/era5-land/properties.json rename tests/test_data/era5/{era5_10m_u_component_of_wind_1996-1.nc => era5_eastward_component_of_wind_1996-1.nc} (100%) rename tests/test_data/era5/{era5_10m_v_component_of_wind_1996-1.nc => era5_northward_component_of_wind_1996-1.nc} (100%) rename tests/test_data/era5/{era5_mean_total_precipitation_rate_1996-1.nc => era5_total_precipitation_1996-1.nc} (100%) create mode 100644 tests/test_datasets/test_era5_land.py diff --git a/demo/era5-land_dataset_demo.ipynb b/demo/era5-land_dataset_demo.ipynb new file mode 100644 index 0000000..c8d8323 --- /dev/null +++ b/demo/era5-land_dataset_demo.ipynb @@ -0,0 +1,1388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handle ERA5 land dataset with Zampy\n", + "Demo notebook for developers." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yangliu/mambaforge/envs/ecoextreml/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from zampy.datasets.catalog import ERA5Land\n", + "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "download_dir = work_dir / \"download\"\n", + "ingest_dir = work_dir / \"ingest\"\n", + "times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2010-01-31T23:00:00\"))\n", + "bbox_demo = SpatialBounds(54, 56, 1, 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 50%|█████ | 1/2 [00:00<00:00, 2.30it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'era5-land_2m_temperature_2010-1.nc' already exists, skipping...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 2/2 [00:00<00:00, 2.77it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'era5-land_2m_dewpoint_temperature_2010-1.nc' already exists, skipping...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "era5_land_dataset = ERA5Land()\n", + "era5_land_dataset.download(\n", + " download_dir=download_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"air_temperature\", \"dewpoint_temperature\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data ingestion to the unified format in `zampy`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "era5_land_dataset.ingest(download_dir, ingest_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ds = era5_land_dataset.load(\n", + " ingest_dir=ingest_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"air_temperature\", \"dewpoint_temperature\"],\n", + " resolution=1.0,\n", + " regrid_method=\"flox\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:               (time: 744, latitude: 54, longitude: 54)\n",
+       "Coordinates:\n",
+       "  * time                  (time) datetime64[ns] 2010-01-01 ... 2010-01-31T23:...\n",
+       "  * latitude              (latitude) float64 1.0 2.0 3.0 4.0 ... 52.0 53.0 54.0\n",
+       "  * longitude             (longitude) float64 3.0 4.0 5.0 6.0 ... 54.0 55.0 56.0\n",
+       "Data variables:\n",
+       "    air_temperature       (time, latitude, longitude) float32 dask.array<chunksize=(744, 54, 54), meta=np.ndarray>\n",
+       "    dewpoint_temperature  (time, latitude, longitude) float32 dask.array<chunksize=(744, 54, 54), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.6\n",
+       "    history:      2023-08-08 08:58:46 GMT by grib_to_netcdf-2.25.1: /opt/ecmw...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 744, latitude: 54, longitude: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-01-01 ... 2010-01-31T23:...\n", + " * latitude (latitude) float64 1.0 2.0 3.0 4.0 ... 52.0 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 6.0 ... 54.0 55.0 56.0\n", + "Data variables:\n", + " air_temperature (time, latitude, longitude) float32 dask.array\n", + " dewpoint_temperature (time, latitude, longitude) float32 dask.array\n", + "Attributes:\n", + " Conventions: CF-1.6\n", + " history: 2023-08-08 08:58:46 GMT by grib_to_netcdf-2.25.1: /opt/ecmw..." + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "air_temperature renamed to Tair.\n", + "dewpoint_temperature renamed to d2m.\n", + "Conversion of dataset 'era5-land' following ALMA convention is complete!\n" + ] + } + ], + "source": [ + "from zampy.datasets import converter\n", + "\n", + "ds_convert = converter.convert(ds, era5_land_dataset, \"ALMA\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:    (time: 744, latitude: 54, longitude: 54)\n",
+       "Coordinates:\n",
+       "  * time       (time) datetime64[ns] 2010-01-01 ... 2010-01-31T23:00:00\n",
+       "  * latitude   (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n",
+       "  * longitude  (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n",
+       "Data variables:\n",
+       "    Tair       (time, latitude, longitude) float32 dask.array<chunksize=(744, 54, 54), meta=np.ndarray>\n",
+       "    d2m        (time, latitude, longitude) float32 dask.array<chunksize=(744, 54, 54), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  ALMA\n",
+       "    history:      2023-08-08 08:58:46 GMT by grib_to_netcdf-2.25.1: /opt/ecmw...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 744, latitude: 54, longitude: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-01-01 ... 2010-01-31T23:00:00\n", + " * latitude (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n", + "Data variables:\n", + " Tair (time, latitude, longitude) float32 dask.array\n", + " d2m (time, latitude, longitude) float32 dask.array\n", + "Attributes:\n", + " Conventions: ALMA\n", + " history: 2023-08-08 08:58:46 GMT by grib_to_netcdf-2.25.1: /opt/ecmw..." + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_convert" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ecoextreml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/demo/era5_dataset_demo.ipynb b/demo/era5_dataset_demo.ipynb index 5232850..b068e8b 100644 --- a/demo/era5_dataset_demo.ipynb +++ b/demo/era5_dataset_demo.ipynb @@ -12,10 +12,19 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yangliu/mambaforge/envs/ecoextreml/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import numpy as np\n", - "from zampy.datasets import ERA5\n", + "from zampy.datasets.catalog import ERA5\n", "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", "from pathlib import Path" ] @@ -45,11 +54,25 @@ "execution_count": 3, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1/1 [00:55<00:00, 55.99s/it]" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "File 'era5_10m_v_component_of_wind_2010-1.nc' already exists, skipping...\n" + "Download era5_eastward_component_of_wind_2010-1.nc successfully.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" ] }, { @@ -69,7 +92,7 @@ " download_dir=download_dir,\n", " time_bounds=times,\n", " spatial_bounds=bbox_demo,\n", - " variable_names=[\"10m_v_component_of_wind\"], #\"surface_pressure\", \"mean_total_precipitation_rate\"\n", + " variable_names=[\"eastward_component_of_wind\"],\n", ")" ] }, @@ -89,7 +112,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "File 'era5_10m_v_component_of_wind_2010-1.nc' already exists, skipping...\n" + "File 'era5_10m_v_component_of_wind_2010-1.nc' already exists, skipping...\n", + "File 'era5_10m_u_component_of_wind_1996-1.nc' already exists, skipping...\n" ] }, { @@ -117,7 +141,7 @@ " ingest_dir=ingest_dir,\n", " time_bounds=times,\n", " spatial_bounds=bbox_demo,\n", - " variable_names=[\"10m_v_component_of_wind\"],\n", + " variable_names=[\"eastward_component_of_wind\"],\n", " resolution=1.0,\n", " regrid_method=\"flox\",\n", ")" @@ -495,25 +519,25 @@ " fill: currentColor;\n", "}\n", "
<xarray.Dataset>\n",
-       "Dimensions:                  (time: 744, latitude: 54, longitude: 54)\n",
+       "Dimensions:                     (time: 744, latitude: 54, longitude: 54)\n",
        "Coordinates:\n",
-       "  * time                     (time) datetime64[ns] 2010-01-01 ... 2010-01-31T...\n",
-       "  * latitude                 (latitude) float64 1.0 2.0 3.0 ... 52.0 53.0 54.0\n",
-       "  * longitude                (longitude) float64 3.0 4.0 5.0 ... 54.0 55.0 56.0\n",
+       "  * time                        (time) datetime64[ns] 2010-01-01 ... 2010-01-...\n",
+       "  * latitude                    (latitude) float64 1.0 2.0 3.0 ... 53.0 54.0\n",
+       "  * longitude                   (longitude) float64 3.0 4.0 5.0 ... 55.0 56.0\n",
        "Data variables:\n",
-       "    10m_v_component_of_wind  (time, latitude, longitude) float32 dask.array<chunksize=(744, 54, 54), meta=np.ndarray>\n",
+       "    eastward_component_of_wind  (time, latitude, longitude) float32 dask.array<chunksize=(744, 54, 54), meta=np.ndarray>\n",
        "Attributes:\n",
        "    Conventions:  CF-1.6\n",
-       "    history:      2023-07-11 10:17:10 GMT by grib_to_netcdf-2.25.1: /opt/ecmw...
  • Conventions :
    CF-1.6
    history :
    2023-08-15 07:24:00 GMT by grib_to_netcdf-2.25.1: /opt/ecmwf/mars-client/bin/grib_to_netcdf.bin -S param -o /cache/data7/adaptor.mars.internal-1692084238.7452817-16098-8-ce373adb-533b-4a5a-9608-8413adf74b28.nc /cache/tmp/ce373adb-533b-4a5a-9608-8413adf74b28-adaptor.mars.internal-1692084195.6905272-16098-15-tmp.grib
  • " ], "text/plain": [ "\n", - "Dimensions: (time: 744, latitude: 54, longitude: 54)\n", + "Dimensions: (time: 744, latitude: 54, longitude: 54)\n", "Coordinates:\n", - " * time (time) datetime64[ns] 2010-01-01 ... 2010-01-31T...\n", - " * latitude (latitude) float64 1.0 2.0 3.0 ... 52.0 53.0 54.0\n", - " * longitude (longitude) float64 3.0 4.0 5.0 ... 54.0 55.0 56.0\n", + " * time (time) datetime64[ns] 2010-01-01 ... 2010-01-...\n", + " * latitude (latitude) float64 1.0 2.0 3.0 ... 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 ... 55.0 56.0\n", "Data variables:\n", - " 10m_v_component_of_wind (time, latitude, longitude) float32 dask.array\n", + " eastward_component_of_wind (time, latitude, longitude) float32 dask.array\n", "Attributes:\n", " Conventions: CF-1.6\n", - " history: 2023-07-11 10:17:10 GMT by grib_to_netcdf-2.25.1: /opt/ecmw..." + " history: 2023-08-15 07:24:00 GMT by grib_to_netcdf-2.25.1: /opt/ecmw..." ] }, "execution_count": 6, @@ -646,7 +670,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "10m_v_component_of_wind renamed to Wind_E.\n", + "eastward_component_of_wind renamed to Wind_E.\n", "Conversion of dataset 'era5' following ALMA convention is complete!\n" ] } @@ -1038,16 +1062,16 @@ " Wind_E (time, latitude, longitude) float32 dask.array<chunksize=(744, 54, 54), meta=np.ndarray>\n", "Attributes:\n", " Conventions: ALMA\n", - " history: 2023-07-11 10:17:10 GMT by grib_to_netcdf-2.25.1: /opt/ecmw...
  • Conventions :
    ALMA
    history :
    2023-08-15 07:24:00 GMT by grib_to_netcdf-2.25.1: /opt/ecmwf/mars-client/bin/grib_to_netcdf.bin -S param -o /cache/data7/adaptor.mars.internal-1692084238.7452817-16098-8-ce373adb-533b-4a5a-9608-8413adf74b28.nc /cache/tmp/ce373adb-533b-4a5a-9608-8413adf74b28-adaptor.mars.internal-1692084195.6905272-16098-15-tmp.grib
  • " ], "text/plain": [ "\n", @@ -1159,7 +1183,7 @@ " Wind_E (time, latitude, longitude) float32 dask.array\n", "Attributes:\n", " Conventions: ALMA\n", - " history: 2023-07-11 10:17:10 GMT by grib_to_netcdf-2.25.1: /opt/ecmw..." + " history: 2023-08-15 07:24:00 GMT by grib_to_netcdf-2.25.1: /opt/ecmw..." ] }, "execution_count": 8, diff --git a/demo/eth_dataset_demo.ipynb b/demo/eth_dataset_demo.ipynb index 0922b2c..d7c1c38 100644 --- a/demo/eth_dataset_demo.ipynb +++ b/demo/eth_dataset_demo.ipynb @@ -24,7 +24,7 @@ "outputs": [], "source": [ "import numpy as np\n", - "from zampy.datasets import EthCanopyHeight\n", + "from zampy.datasets.catalog import EthCanopyHeight\n", "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", "from pathlib import Path\n", "\n", @@ -2660,7 +2660,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.0" }, "orig_nbformat": 4 }, diff --git a/pyproject.toml b/pyproject.toml index 11b5773..e787454 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ classifiers = [ "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ] dependencies = [ "requests", @@ -135,10 +136,11 @@ testpaths = ["tests"] [tool.mypy] ignore_missing_imports = true disallow_untyped_defs = true +python_version = "3.9" [tool.black] line-length = 88 -target-version = ['py38', 'py39', 'py310'] +target-version = ['py39', 'py310', 'py311'] include = '\.pyi?$' [tool.ruff] @@ -169,7 +171,7 @@ line-length = 88 exclude = ["docs", "build"] # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -target-version = "py38" +target-version = "py39" [tool.ruff.per-file-ignores] "tests/**" = ["D"] diff --git a/src/zampy/conventions/ALMA.json b/src/zampy/conventions/ALMA.json index 8398410..03592f2 100644 --- a/src/zampy/conventions/ALMA.json +++ b/src/zampy/conventions/ALMA.json @@ -60,5 +60,13 @@ "total_precipitation": { "variable": "Rainf", "units": "millimeter/second" + }, + "air_temperature": { + "variable": "Tair", + "units": "kelvin" + }, + "dewpoint_temperature": { + "variable": "d2m", + "units": "kelvin" } } \ No newline at end of file diff --git a/src/zampy/datasets/__init__.py b/src/zampy/datasets/__init__.py index ccf538e..155a9d2 100644 --- a/src/zampy/datasets/__init__.py +++ b/src/zampy/datasets/__init__.py @@ -1,16 +1,7 @@ """Datasets implementations.""" from zampy.datasets import dataset_protocol from zampy.datasets import validation -from zampy.datasets.era5 import ERA5 -from zampy.datasets.eth_canopy_height import EthCanopyHeight +from zampy.datasets.catalog import DATASETS -__all__ = ["dataset_protocol", "validation", "EthCanopyHeight", "ERA5"] - - -# This object tracks which datasets are available. -DATASETS: dict[str, type[dataset_protocol.Dataset]] = { - # All lowercase key. - "era5": ERA5, - "eth_canopy_height": EthCanopyHeight, -} +__all__ = ["dataset_protocol", "validation", "DATASETS"] diff --git a/src/zampy/datasets/catalog.py b/src/zampy/datasets/catalog.py new file mode 100644 index 0000000..ed0a891 --- /dev/null +++ b/src/zampy/datasets/catalog.py @@ -0,0 +1,14 @@ +"""Catalog of datasets.""" +from zampy.datasets import dataset_protocol +from zampy.datasets.era5 import ERA5 +from zampy.datasets.era5 import ERA5Land +from zampy.datasets.eth_canopy_height import EthCanopyHeight + + +# This object tracks which datasets are available. +DATASETS: dict[str, type[dataset_protocol.Dataset]] = { + # All lowercase key. + "era5": ERA5, + "era5_land": ERA5Land, + "eth_canopy_height": EthCanopyHeight, +} diff --git a/src/zampy/datasets/dataset_protocol.py b/src/zampy/datasets/dataset_protocol.py index 77b7042..1b1de29 100644 --- a/src/zampy/datasets/dataset_protocol.py +++ b/src/zampy/datasets/dataset_protocol.py @@ -1,14 +1,11 @@ """Outline of the dataset protocol.""" import json import shutil -from abc import abstractmethod from dataclasses import dataclass from pathlib import Path from typing import Any -from typing import List from typing import Optional from typing import Protocol -from typing import Tuple import numpy as np import xarray as xr @@ -79,21 +76,20 @@ class Dataset(Protocol): crs: str license: str bib: str - raw_variables: Tuple[Variable, ...] - variable_names: Tuple[str, ...] - variables: Tuple[Variable, ...] + raw_variables: list[Variable] + variable_names: list[str] + variables: list[Variable] def __init__(self) -> None: """Init.""" ... - @abstractmethod def download( self, download_dir: Path, time_bounds: TimeBounds, spatial_bounds: SpatialBounds, - variable_names: List[str], + variable_names: list[str], overwrite: bool = False, ) -> bool: """Download the data. @@ -111,7 +107,6 @@ def download( """ ... - @abstractmethod def ingest( self, download_dir: Path, @@ -130,7 +125,6 @@ def ingest( """ ... - @abstractmethod def load( self, ingest_dir: Path, @@ -138,7 +132,7 @@ def load( spatial_bounds: SpatialBounds, resolution: float, regrid_method: str, - variable_names: List[str], + variable_names: list[str], ) -> xr.Dataset: """Get the dataset as an xarray Dataset. @@ -160,7 +154,6 @@ def load( """ ... - @abstractmethod def convert( self, ingest_dir: Path, @@ -182,7 +175,7 @@ def write_properties_file( dataset_folder: Path, spatial_bounds: SpatialBounds, time_bounds: TimeBounds, - variable_names: List[str], + variable_names: list[str], ) -> None: """Write the (serialized) spatial and time bounds to a json file. @@ -211,7 +204,7 @@ def write_properties_file( def read_properties_file( dataset_folder: Path, -) -> Tuple[SpatialBounds, TimeBounds, List[str]]: +) -> tuple[SpatialBounds, TimeBounds, list[str]]: """Load the serialized spatial and time bounds from the json file. Args: diff --git a/src/zampy/datasets/ecmwf_dataset.py b/src/zampy/datasets/ecmwf_dataset.py new file mode 100644 index 0000000..e05b8e0 --- /dev/null +++ b/src/zampy/datasets/ecmwf_dataset.py @@ -0,0 +1,150 @@ +"""Base module for datasets available on CDS.""" + +from pathlib import Path +from typing import Union +import xarray as xr +from zampy.datasets import converter +from zampy.datasets import utils +from zampy.datasets import validation +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds +from zampy.datasets.dataset_protocol import Variable +from zampy.datasets.dataset_protocol import copy_properties_file +from zampy.datasets.dataset_protocol import write_properties_file +from zampy.utils import regrid + + +## Ignore missing class/method docstrings: they are implemented in the Dataset class. +# ruff: noqa: D102 + + +class ECMWFDataset: # noqa: D101 + name: str + time_bounds: TimeBounds + spatial_bounds = SpatialBounds(90, 180, -90, -180) + crs = "EPSG:4326" + + raw_variables: list[Variable] + cds_var_names: dict[str, str] + variable_names: list[str] + variables: list[Variable] + license = "cc-by-4.0" + bib = """ + @article{hersbach2020era5, + title={The ERA5 global reanalysis}, + author={Hersbach, Hans et al.}, + journal={Quarterly Journal of the Royal Meteorological Society}, + volume={146}, + number={730}, + pages={1999--2049}, + year={2020}, + publisher={Wiley Online Library} + } + """ + cds_dataset: str + + def __init__(self) -> None: + """Init.""" + pass + + def download( + self, + download_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + variable_names: list[str], + overwrite: bool = False, + ) -> bool: + validation.validate_download_request( + self, + download_dir, + time_bounds, + spatial_bounds, + variable_names, + ) + + download_folder = download_dir / self.name + download_folder.mkdir(parents=True, exist_ok=True) + + utils.cds_request( + dataset=self.cds_dataset, + variables=variable_names, + time_bounds=time_bounds, + spatial_bounds=spatial_bounds, + path=download_folder, + cds_var_names=self.cds_var_names, + overwrite=overwrite, + ) + + write_properties_file( + download_folder, spatial_bounds, time_bounds, variable_names + ) + + return True + + def ingest( + self, + download_dir: Path, + ingest_dir: Path, + overwrite: bool = False, + ) -> bool: + download_folder = download_dir / self.name + ingest_folder = ingest_dir / self.name + ingest_folder.mkdir(parents=True, exist_ok=True) + + data_file_pattern = f"{self.name}_*.nc" + data_files = list(download_folder.glob(data_file_pattern)) + + for file in data_files: + utils.convert_to_zampy( + ingest_folder, + file=file, + overwrite=overwrite, + ) + + copy_properties_file(download_folder, ingest_folder) + + return True + + def load( + self, + ingest_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + resolution: float, + regrid_method: str, + variable_names: list[str], + ) -> xr.Dataset: + files: list[Path] = [] + for var in self.variable_names: + if var in variable_names: + files += (ingest_dir / self.name).glob(f"{self.name}_{var}*.nc") + + ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) + ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) + ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method) + + return ds + + def convert( + self, + ingest_dir: Path, + convention: Union[str, Path], + ) -> bool: + converter.check_convention(convention) + ingest_folder = ingest_dir / self.name + + data_file_pattern = f"{self.name}_*.nc" + + data_files = list(ingest_folder.glob(data_file_pattern)) + + for file in data_files: + # start conversion process + print(f"Start processing file `{file.name}`.") + ds = xr.open_dataset(file, chunks={"x": 50, "y": 50}) + ds = converter.convert(ds, dataset=self, convention=convention) + # TODO: support derived variables + # TODO: other calculations + # call ds.compute() + + return True diff --git a/src/zampy/datasets/era5.py b/src/zampy/datasets/era5.py index 6c7c64d..ff36a0b 100644 --- a/src/zampy/datasets/era5.py +++ b/src/zampy/datasets/era5.py @@ -1,247 +1,60 @@ """ERA5 dataset.""" -from pathlib import Path -from typing import List -from typing import Union import numpy as np -import xarray as xr -from zampy.datasets import converter -from zampy.datasets import utils -from zampy.datasets import validation -from zampy.datasets.dataset_protocol import Dataset -from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds from zampy.datasets.dataset_protocol import Variable -from zampy.datasets.dataset_protocol import copy_properties_file -from zampy.datasets.dataset_protocol import write_properties_file +from zampy.datasets.ecmwf_dataset import ECMWFDataset from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP from zampy.reference.variables import unit_registry -from zampy.utils import regrid -## Ignore missing class/method docstrings: they are implemented in the Dataset class. -# ruff: noqa: D102 - - -class ERA5(Dataset): # noqa: D101 +class ERA5(ECMWFDataset): # noqa: D101 name = "era5" - time_bounds = TimeBounds(np.datetime64("1940-01-01"), np.datetime64("2023-06-30")) - spatial_bounds = SpatialBounds(90, 180, -90, -180) + time_bounds = TimeBounds(np.datetime64("1940-01-01"), np.datetime64("2023-07-31")) - raw_variables = ( + raw_variables = [ Variable(name="mtpr", unit=unit_registry.kilogram_per_square_meter_second), Variable(name="strd", unit=unit_registry.joule_per_square_meter), Variable(name="ssrd", unit=unit_registry.joule_per_square_meter), Variable(name="sp", unit=unit_registry.pascal), Variable(name="u10", unit=unit_registry.meter_per_second), Variable(name="v10", unit=unit_registry.meter_per_second), - ) + ] # variable names used in cdsapi downloading request - variable_names = ( - "mean_total_precipitation_rate", - "surface_thermal_radiation_downwards", - "surface_solar_radiation_downwards", - "surface_pressure", - "10m_u_component_of_wind", - "10m_v_component_of_wind", - ) - - license = "cc-by-4.0" - bib = """ - @article{hersbach2020era5, - title={The ERA5 global reanalysis}, - author={Hersbach, Hans et al.}, - journal={Quarterly Journal of the Royal Meteorological Society}, - volume={146}, - number={730}, - pages={1999--2049}, - year={2020}, - publisher={Wiley Online Library} - } - """ - - def download( - self, - download_dir: Path, - time_bounds: TimeBounds, - spatial_bounds: SpatialBounds, - variable_names: List[str], - overwrite: bool = False, - ) -> bool: - validation.validate_download_request( - self, - download_dir, - time_bounds, - spatial_bounds, - variable_names, - ) - - download_folder = download_dir / self.name - download_folder.mkdir(parents=True, exist_ok=True) - - utils.cds_request( - dataset="reanalysis-era5-single-levels", - variables=variable_names, - time_bounds=time_bounds, - spatial_bounds=spatial_bounds, - path=download_folder, - overwrite=overwrite, - ) - - write_properties_file( - download_folder, spatial_bounds, time_bounds, variable_names - ) - - return True - - def ingest( - self, - download_dir: Path, - ingest_dir: Path, - overwrite: bool = False, - ) -> bool: - download_folder = download_dir / self.name - ingest_folder = ingest_dir / self.name - ingest_folder.mkdir(parents=True, exist_ok=True) - - data_file_pattern = "era5_*.nc" - data_files = list(download_folder.glob(data_file_pattern)) - - for file in data_files: - convert_to_zampy( - ingest_folder, - file=file, - overwrite=overwrite, - ) - - copy_properties_file(download_folder, ingest_folder) - - return True - - def load( - self, - ingest_dir: Path, - time_bounds: TimeBounds, - spatial_bounds: SpatialBounds, - resolution: float, - regrid_method: str, - variable_names: List[str], - ) -> xr.Dataset: - files: List[Path] = [] - for var in self.variable_names: - if var in variable_names: - files += (ingest_dir / self.name).glob(f"era5_{var}*.nc") + cds_var_names = { + "total_precipitation": "mean_total_precipitation_rate", + "surface_thermal_radiation_downwards": "surface_thermal_radiation_downwards", + "surface_solar_radiation_downwards": "surface_solar_radiation_downwards", + "surface_pressure": "surface_pressure", + "eastward_component_of_wind": "10m_u_component_of_wind", + "northward_component_of_wind": "10m_v_component_of_wind", + } - ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) - ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) - ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method) + variable_names = list(cds_var_names.keys()) - return ds + variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names] - def convert( - self, - ingest_dir: Path, - convention: Union[str, Path], - ) -> bool: - converter.check_convention(convention) - ingest_folder = ingest_dir / self.name + cds_dataset = "reanalysis-era5-single-levels" - data_file_pattern = "era5_*.nc" - data_files = list(ingest_folder.glob(data_file_pattern)) +class ERA5Land(ECMWFDataset): # noqa: D101 + name = "era5-land" + time_bounds = TimeBounds(np.datetime64("1950-01-01"), np.datetime64("2023-07-31")) - for file in data_files: - # start conversion process - print(f"Start processing file `{file.name}`.") - ds = xr.open_dataset(file, chunks={"x": 50, "y": 50}) - ds = converter.convert(ds, dataset=self, convention=convention) - # TODO: support derived variables - # TODO: other calculations - # call ds.compute() + raw_variables = [ + Variable(name="t2m", unit=unit_registry.kelvin), + Variable(name="d2m", unit=unit_registry.kelvin), + ] - return True - - -def convert_to_zampy( - ingest_folder: Path, - file: Path, - overwrite: bool = False, -) -> None: - """Convert the downloaded nc files to standard CF/Zampy netCDF files. - - The downloaded ERA5 data already follows CF1.6 convention. However, it uses - (abbreviated) variable name instead of standard name, which prohibits the format - conversion. Therefore we need to ingest the downloaded files and rename all - variables to standard names. - - Args: - ingest_folder: Folder where the files have to be written to. - file: Path to the ERA5 nc file. - overwrite: Overwrite all existing files. If False, file that already exist will - be skipped. - """ - ncfile = ingest_folder / file.with_suffix(".nc").name - if ncfile.exists() and not overwrite: - print(f"File '{ncfile.name}' already exists, skipping...") - else: - ds = parse_nc_file(file) - - ds.to_netcdf(path=ncfile) - - -var_reference_era5_to_zampy = { - "mtpr": "total_precipitation", - "strd": "surface_thermal_radiation_downwards", - "ssrd": "surface_solar_radiation_downwards", - "sp": "surface_pressure", - "u10": "eastward_component_of_wind", - "v10": "northward_component_of_wind", -} - -WATER_DENSITY = 997.0 # kg/m3 - - -def parse_nc_file(file: Path) -> xr.Dataset: - """Parse the downloaded ERA5 nc files, to CF/Zampy standard dataset. - - Args: - file: Path to the ERA5 nc file. - - Returns: - CF/Zampy formatted xarray Dataset - """ - # Open chunked: will be dask array -> file writing can be parallelized. - ds = xr.open_dataset(file, chunks={"x": 50, "y": 50}) - - for variable in ds.variables: - if variable in var_reference_era5_to_zampy: - var = str(variable) # Cast to string to please mypy - variable_name = var_reference_era5_to_zampy[var] - ds = ds.rename({var: variable_name}) - # convert radiation to flux J/m2 to W/m2 - # https://confluence.ecmwf.int/pages/viewpage.action?pageId=155337784 - if variable_name in ( - "surface_solar_radiation_downwards", - "surface_thermal_radiation_downwards", - ): - ds[variable_name] = ds[variable_name] / 3600 - # conversion precipitation kg/m2s to mm/s - elif variable_name == "total_precipitation": - ds[variable_name] = ds[variable_name] / WATER_DENSITY - ds[variable_name].attrs["units"] = "meter_per_second" - # convert from m/s to mm/s - ds = converter._convert_var( - ds, variable_name, VARIABLE_REFERENCE_LOOKUP[variable_name].unit - ) + # variable names used in cdsapi downloading request + cds_var_names = { + "air_temperature": "2m_temperature", + "dewpoint_temperature": "2m_dewpoint_temperature", + } - ds[variable_name].attrs["units"] = str( - VARIABLE_REFERENCE_LOOKUP[variable_name].unit - ) - ds[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ - variable_name - ].desc + variable_names = list(cds_var_names.keys()) - # TODO: add dataset attributes. + variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names] - return ds + cds_dataset = "reanalysis-era5-land" diff --git a/src/zampy/datasets/eth_canopy_height.py b/src/zampy/datasets/eth_canopy_height.py index 9e8a2a0..aedc5b0 100644 --- a/src/zampy/datasets/eth_canopy_height.py +++ b/src/zampy/datasets/eth_canopy_height.py @@ -1,14 +1,12 @@ """ETH canopy height dataset.""" import gzip from pathlib import Path -from typing import List from typing import Union import numpy as np import xarray as xr from zampy.datasets import converter from zampy.datasets import utils from zampy.datasets import validation -from zampy.datasets.dataset_protocol import Dataset from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds from zampy.datasets.dataset_protocol import Variable @@ -27,20 +25,18 @@ # ruff: noqa: D102 -class EthCanopyHeight(Dataset): # noqa: D101 +class EthCanopyHeight: # noqa: D101 name = "eth-canopy-height" time_bounds = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31")) spatial_bounds = SpatialBounds(90, 180, -90, -180) crs = "EPSG:4326" - raw_variables = ( + raw_variables = [ Variable(name="h_canopy", unit=unit_registry.meter), Variable(name="h_canopy_SD", unit=unit_registry.meter), - ) - variable_names = ("height_of_vegetation", "height_of_vegetation_standard_deviation") - variables = ( - VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names - ) # type: ignore + ] + variable_names = ["height_of_vegetation", "height_of_vegetation_standard_deviation"] + variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names] license = "cc-by-4.0" bib = """ @@ -55,12 +51,16 @@ class EthCanopyHeight(Dataset): # noqa: D101 data_url = "https://share.phys.ethz.ch/~pf/nlangdata/ETH_GlobalCanopyHeight_10m_2020_version1/3deg_cogs/" + def __init__(self) -> None: + """Init.""" + pass + def download( self, download_dir: Path, time_bounds: TimeBounds, spatial_bounds: SpatialBounds, - variable_names: List[str], + variable_names: list[str], overwrite: bool = False, ) -> bool: validation.validate_download_request( @@ -128,9 +128,9 @@ def load( spatial_bounds: SpatialBounds, resolution: float, regrid_method: str, - variable_names: List[str], + variable_names: list[str], ) -> xr.Dataset: - files: List[Path] = [] + files: list[Path] = [] if self.variable_names[0] in variable_names: files += (ingest_dir / self.name).glob("*Map.nc") if self.variable_names[1] in variable_names: @@ -165,7 +165,7 @@ def convert( return True -def get_filenames(bounds: SpatialBounds, sd_file: bool = False) -> List[str]: +def get_filenames(bounds: SpatialBounds, sd_file: bool = False) -> list[str]: """Get all valid ETH canopy height dataset filenames within given spatial bounds. Args: @@ -202,7 +202,7 @@ def get_filenames(bounds: SpatialBounds, sd_file: bool = False) -> List[str]: return get_valid_filenames(fnames) -def get_valid_filenames(filenames: List[str]) -> List[str]: +def get_valid_filenames(filenames: list[str]) -> list[str]: """Returns a new list with only the valid filenames.""" valid_name_file = ( Path(__file__).parent / "assets" / "h_canopy_filenames_compressed.txt.gz" diff --git a/src/zampy/datasets/utils.py b/src/zampy/datasets/utils.py index c6a6214..e60168c 100644 --- a/src/zampy/datasets/utils.py +++ b/src/zampy/datasets/utils.py @@ -1,17 +1,18 @@ """Shared utilities from datasets.""" import urllib.request from pathlib import Path -from typing import List from typing import Optional -from typing import Tuple from typing import Union import cdsapi import pandas as pd import requests +import xarray as xr from tqdm import tqdm from tqdm.contrib.itertools import product +from zampy.datasets import converter from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds +from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP PRODUCT_FNAME = { @@ -72,10 +73,11 @@ def get_file_size(fpath: Path) -> int: def cds_request( dataset: str, - variables: List[str], + variables: list[str], time_bounds: TimeBounds, spatial_bounds: SpatialBounds, path: Path, + cds_var_names: dict[str, str], overwrite: bool, ) -> None: """Download data via CDS API. @@ -90,10 +92,11 @@ def cds_request( Args: dataset: Dataset name for retrieval via `cdsapi`. - variables: Zampy variable. + variables: Zampy variables. time_bounds: Zampy time bounds object. spatial_bounds: Zampy spatial bounds object. path: File path to which the data should be saved. + cds_var_names: Variable names from CDS server side. overwrite: If an existing file (of the same size!) should be overwritten. """ fname = PRODUCT_FNAME[dataset] @@ -120,7 +123,7 @@ def cds_request( dataset, { "product_type": "reanalysis", - "variable": [variable], + "variable": [cds_var_names[variable]], "year": year, "month": month, # fmt: off @@ -156,8 +159,96 @@ def cds_request( print(f"File '{fpath.name}' already exists, skipping...") -def time_bounds_to_year_month(time_bounds: TimeBounds) -> List[Tuple[str, str]]: +def time_bounds_to_year_month(time_bounds: TimeBounds) -> list[tuple[str, str]]: """Return year/month pairs.""" date_range = pd.date_range(start=time_bounds.start, end=time_bounds.end, freq="M") year_month_pairs = [(str(date.year), str(date.month)) for date in date_range] return year_month_pairs + + +def convert_to_zampy( + ingest_folder: Path, + file: Path, + overwrite: bool = False, +) -> None: + """Convert the downloaded nc files to standard CF/Zampy netCDF files. + + The downloaded ERA5/ERA5-land data already follows CF1.6 convention. However, + it uses (abbreviated) variable name instead of standard name, which prohibits + the format conversion. Therefore we need to ingest the downloaded files and + rename all variables to standard names. + + Args: + ingest_folder: Folder where the files have to be written to. + file: Path to the ERA5 nc file. + overwrite: Overwrite all existing files. If False, file that already exist will + be skipped. + """ + ncfile = ingest_folder / file.with_suffix(".nc").name + if ncfile.exists() and not overwrite: + print(f"File '{ncfile.name}' already exists, skipping...") + else: + ds = parse_nc_file(file) + + ds.to_netcdf(path=ncfile) + + +var_reference_era5_to_zampy = { + # era5 variables + "mtpr": "total_precipitation", + "strd": "surface_thermal_radiation_downwards", + "ssrd": "surface_solar_radiation_downwards", + "sp": "surface_pressure", + "u10": "eastward_component_of_wind", + "v10": "northward_component_of_wind", + # era5-land variables + "t2m": "air_temperature", + "d2m": "dewpoint_temperature", +} + +WATER_DENSITY = 997.0 # kg/m3 + + +def parse_nc_file(file: Path) -> xr.Dataset: + """Parse the downloaded ERA5 nc files, to CF/Zampy standard dataset. + + Args: + file: Path to the ERA5 nc file. + + Returns: + CF/Zampy formatted xarray Dataset + """ + # Open chunked: will be dask array -> file writing can be parallelized. + ds = xr.open_dataset(file, chunks={"x": 50, "y": 50}) + + for variable in ds.variables: + if variable in var_reference_era5_to_zampy: + var = str(variable) # Cast to string to please mypy + variable_name = var_reference_era5_to_zampy[var] + ds = ds.rename({var: variable_name}) + # convert radiation to flux J/m2 to W/m2 + # https://confluence.ecmwf.int/pages/viewpage.action?pageId=155337784 + if variable_name in ( + "surface_solar_radiation_downwards", + "surface_thermal_radiation_downwards", + ): + ds[variable_name] = ds[variable_name] / 3600 + # conversion precipitation kg/m2s to mm/s + elif variable_name == "total_precipitation": + ds[variable_name] = ds[variable_name] / WATER_DENSITY + ds[variable_name].attrs["units"] = "meter_per_second" + # convert from m/s to mm/s + ds = converter._convert_var( + ds, variable_name, VARIABLE_REFERENCE_LOOKUP[variable_name].unit + ) + + ds[variable_name].attrs["units"] = str( + VARIABLE_REFERENCE_LOOKUP[variable_name].unit + ) + ds[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ + variable_name + ].desc + + # TODO: add dataset attributes. + + return ds diff --git a/src/zampy/datasets/validation.py b/src/zampy/datasets/validation.py index 5da63c8..df5f9f6 100644 --- a/src/zampy/datasets/validation.py +++ b/src/zampy/datasets/validation.py @@ -1,6 +1,5 @@ """Checks for user input validation.""" from pathlib import Path -from typing import List from zampy.datasets.dataset_protocol import Dataset from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds @@ -23,7 +22,7 @@ def validate_download_request( download_dir: Path, time_bounds: TimeBounds, spatial_bounds: SpatialBounds, - variable_names: List[str], + variable_names: list[str], ) -> None: """Validate the user's download request against the dataset. @@ -42,7 +41,7 @@ def validate_download_request( def compare_variables( dataset: Dataset, - variable_names: List[str], + variable_names: list[str], ) -> None: """Compare the user's requested variables to the dataset's variables. diff --git a/src/zampy/utils/regrid.py b/src/zampy/utils/regrid.py index c52af19..9533363 100644 --- a/src/zampy/utils/regrid.py +++ b/src/zampy/utils/regrid.py @@ -1,5 +1,4 @@ """Zampy regridding functions.""" -from typing import Tuple import numpy as np import pandas as pd import xarray as xr @@ -19,7 +18,7 @@ def assert_xesmf_available() -> None: ) from e -def infer_resolution(dataset: xr.Dataset) -> Tuple[float, float]: +def infer_resolution(dataset: xr.Dataset) -> tuple[float, float]: """Infer the resolution of a dataset's latitude and longitude coordinates. Args: diff --git a/tests/test_converter.py b/tests/test_converter.py index 7214dd4..ca2239c 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -5,8 +5,8 @@ import pytest import xarray as xr from test_datasets import data_folder -from zampy.datasets import EthCanopyHeight from zampy.datasets import converter +from zampy.datasets.catalog import EthCanopyHeight from zampy.datasets.eth_canopy_height import parse_tiff_file diff --git a/tests/test_data/era5-land/era5-land_air_temperature_1996-1.nc b/tests/test_data/era5-land/era5-land_air_temperature_1996-1.nc new file mode 100644 index 0000000000000000000000000000000000000000..278c95ada57775a1dfd2262b8bb4dcdedfb29c44 GIT binary patch literal 162496 zcmeI$&u>&!6u|L2oida%l=52;qymESqqa~$NQjSlEtXh{Qe$++*few~V}>bCrzkFj zr6#&i7ydvD3-u2W7aC*o+!zf%B;W#n#26R2K@BkySm8bQoja5fqK)9fd~e#GzW2_% z@4oZy{mg4$?C#t$rEy+kT|a{H1K^^iZM zHa6Cogrqb#%g^O{jn`$vw$yC-nW#@nuq|0xz2}qq`1jO&iHUmMG?R+MrJ+J)bTI#0 zoM2|nH*zqUFtu0p+a|tVd>7{OoL|ds_*0f75;LWfx`h5>e|I~iyO}cLX)X8b$${yZ zY&x#kAx>zLBT0V~Huutj_`cpt*$vwsjze{1sT+srO z9xRjwD)|x9YzH6Te2W{XtD`;1N9iEFLbj78We00>!=BB3eP)I|Amx_Z?%cYqtG93K z_TK)!aC1+WNiUK8Ni!inp?CAPE|=e^`PCxZ?PDKxe;L~xO@T?W+yXWz&S#l`+1k?63yO5InXNip567vtwP z)q-Jrx~fIb*1ku~T$|QxzvtL*nXS@O?YLZzl4P5}Gj?0bJ$8GR-Il5Kk0*i6H~U^V zd1iaq{b0N#K8sh4`Cbm$Htz0>DV_S_$*Ub6mX&PVt8o}#hjMdi`!MS5E&cn4Va!jn zzkB}b(C!tKZL1D#V|*pWeza7mjJTTH#pmY5?%~n$NXtl}v@_q*zG1_*VrqVIv4 z3@qJcb8Ht)aToo);X~!&z4>ybU_Y>=k45%d`q`H+j}(SWn^vzZxlguD9j*7Zwy)eh zxNGgE_SW{c*7d2|ZJPaj7b2uJcuE(_`L?BG+SvsCtT6S0NUxuNa?C9J^ye!wtm?{q zHa_==J9;ypjVu(Y-Bi;i3uQ;wmaZLLy(;CpFR;VorZ^ z;ef9fkqS@&DnJFsRe=Y>?LA}P(LDD0o@M+W+n(Io{r>aV?_Z;d_66THzu)&${kYfT zKHq=J`}}9LWB(5Zzu)KkZ6AHUzRu}iSEf3CeV3~4_x((+8K>_jU9;bhe@e}MpWjro z-{()Q+3(+O^p&|f9^cQjntb1n``g1F^Y`ofk(VxMe!tI`cW-Hazt5L+F~ z^`>nba#6<`X}Rd+aobkrqTRCamgb^5$!p0)Z%f|XT(nNoX5^w%l9$d!Pf1=X7u8Fi z$wlu--sNm`pXB|XjZRD6`D|2_ymQ$oC22oqqa%{{O*Yynd1tcG*OGTU8 z8{^Km+i^XNnIK8Fx%;+pqT4d2LE|KijT$FwOlzE?u}R}pjm;XTX`HTchQ^s1XK9?R zagN5@G|tsHPvh+x=WAS`@eYj(HMVHHQ{y6yi#0CMc$dbd8kcFjTjM<%mup<1aizvp K8e28qtMO0UyUdvY literal 0 HcmV?d00001 diff --git a/tests/test_data/era5-land/era5-land_dewpoint_temperature_1996-1.nc b/tests/test_data/era5-land/era5-land_dewpoint_temperature_1996-1.nc new file mode 100644 index 0000000000000000000000000000000000000000..c7db40bbd7c28de2551d17e061353fad81e3b718 GIT binary patch literal 162496 zcmeI$Piz!b9Ki9n-LkZ0DdoQiN(BTJtSywk331qIvBc6UHAZ)hO=Xv|X4&F)tD=c; z)I<;JK@l|^)VtAx#+ZyAjD|lF@F4z+@t_xtF-D^&e7`rpx0Ds4jp)IAU)ugUGrxKB z<~Q$scG@TUdbZDOxw569smb_uPFwUygWJ`q2R?o3!0N%io?Q*;Hyf6@eaY#1$e&Ul zTN+G4Qrg<&=M8#|=VZgS)B^dLXiQ45Ety}t@14f@_tX-JiALQt&84yOXt6qxFZ>uM znE6YL984xm{Z;+8iLV#mh57RJ_aAk`pSdEDm?xbyB=i^iyW1h%&65$&ZogSi4ovq9 z(|yhkaayY!N&1_xxxXEZ@9Wi+-LUQUIMha#y5z{x+axtNfRyEx&lZnr&xN-_l6c%}M*~v0r|=CoWgtq;mPS<6|-{4^WGPS{z9c%v>#Z~$1?jZ{XAHxj2FksTi32ByHB>Q-5r}cy4LK? z?^(aKtD~#4V`FNuO|zfxLWHy?Pw7%Q-?o%YJDZ@N6{b-T8SwK@j+v$Jes@lWRa==4 z$LAh(M=$5Ik%c0)n`Js>q3r72-n*-JAnYA>3uS}7tgM0jNOfeqP(8n5@^;8&DTHbI ztB-U4-lIb|i4(tmHu>I;*5BKIHX^r#?XbSz{>s~vM-&{0t9?ilq(-U#q((YJ%;~c~ z9rX1gQUNMJ1*pK3DsW3U*gyFl&10|cS;qgd?a8g*?>~?I{&kvYU+`V?`+YyPk9$4t z^Zlp1&wofe_Wxk;`+dIO_R;6->zw{|WoqNscd6Qb-_MM?ar%DJb^HDJXV&fa`K@*P zeg3Sv{r>GnUzuy;@%_xM%lG}bzdh_Rf4{yTdFhho_xpT#_m<}O`+Ru^m*)3h_(Em> zrQvTdPU9yPpaN8Y3Qz$mKn17(6`%rCfC~IiDzHb|eGddg>cBi;9pC^C-~bNb01n^) z4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR| z-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb z01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^) z4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR| z-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb z01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^) z4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR| z-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb z01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^) z4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR| z-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb z01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^) z4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR| z-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb z01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^) z4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR| z-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n^)4&VR|-~bNb01n{5<#)hH3v$s5 zhi%)Oi@MiI%S9)T+qNbb?UjYMG8Z*SUVAQjQSuh$q79NZHy53fymT(QU-DA9s8RAv zE_zw=e$PgmB=6^JbXxMh&qgK5`!*Y;B<<^LbX4-rW}_{VcP1NsB6-KNQCadn$VQT8 z4rPK9vir$Q;Kq446O797*D`?{?;DvQA$d_IaN|Cn368jaGl3ibx0ztQ8%HK^_hG`| zW9cs)2J!ubK~ef!83yjY)`dY*`U}Fq-Cutgyd?eI9R}_`_k_Vl>2Ev?Bu$#TzKwDJ z#<=tCc3cl*rb&`*?!IlD?zW6+(%7uAMdJ*OX^k^AwrZTEu}$M_jdL{4)i_V%e2oh< zF4TC1#zh*h)VNsV5{*}ByjtT@jqMt*(YQ?Ga*ZoAUaN7X##I`x(|Enc8#J!gxJKiR K8ap)Jr11|YS6hn! literal 0 HcmV?d00001 diff --git a/tests/test_data/era5-land/properties.json b/tests/test_data/era5-land/properties.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data/era5/era5_10m_u_component_of_wind_1996-1.nc b/tests/test_data/era5/era5_eastward_component_of_wind_1996-1.nc similarity index 100% rename from tests/test_data/era5/era5_10m_u_component_of_wind_1996-1.nc rename to tests/test_data/era5/era5_eastward_component_of_wind_1996-1.nc diff --git a/tests/test_data/era5/era5_10m_v_component_of_wind_1996-1.nc b/tests/test_data/era5/era5_northward_component_of_wind_1996-1.nc similarity index 100% rename from tests/test_data/era5/era5_10m_v_component_of_wind_1996-1.nc rename to tests/test_data/era5/era5_northward_component_of_wind_1996-1.nc diff --git a/tests/test_data/era5/era5_mean_total_precipitation_rate_1996-1.nc b/tests/test_data/era5/era5_total_precipitation_1996-1.nc similarity index 100% rename from tests/test_data/era5/era5_mean_total_precipitation_rate_1996-1.nc rename to tests/test_data/era5/era5_total_precipitation_1996-1.nc diff --git a/tests/test_datasets/test_era5.py b/tests/test_datasets/test_era5.py index c3908b5..2d166c2 100644 --- a/tests/test_datasets/test_era5.py +++ b/tests/test_datasets/test_era5.py @@ -37,7 +37,8 @@ def test_download(self, mock_retrieve, valid_path_cds, dummy_dir): """ times = TimeBounds(np.datetime64("2010-01-01"), np.datetime64("2010-01-31")) bbox = SpatialBounds(54, 56, 1, 3) - variable = ["10m_v_component_of_wind"] + variable = ["eastward_component_of_wind"] + cds_var_names = ["10m_u_component_of_wind"] download_dir = Path(dummy_dir, "download") era5_dataset = era5.ERA5() @@ -57,7 +58,7 @@ def test_download(self, mock_retrieve, valid_path_cds, dummy_dir): "reanalysis-era5-single-levels", { "product_type": "reanalysis", - "variable": variable, + "variable": cds_var_names, "year": "2010", "month": "1", # fmt: off @@ -100,7 +101,7 @@ def ingest_dummy_data(self, temp_dir): Path( temp_dir, "era5", - "era5_10m_v_component_of_wind_1996-1.nc", + "era5_northward_component_of_wind_1996-1.nc", ) ) @@ -109,13 +110,13 @@ def ingest_dummy_data(self, temp_dir): def test_ingest(self, dummy_dir): """Test ingest function.""" ds, _ = self.ingest_dummy_data(dummy_dir) - assert type(ds) == xr.Dataset + assert isinstance(ds, xr.Dataset) def test_load(self): """Test load function.""" times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-01-02")) bbox = SpatialBounds(39, -107, 37, -109) - variable = ["10m_v_component_of_wind"] + variable = ["northward_component_of_wind"] era5_dataset = era5.ERA5() @@ -140,79 +141,3 @@ def test_convert(self, dummy_dir): _, era5_dataset = self.ingest_dummy_data(dummy_dir) era5_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") # TODO: finish this test when the function is complete. - - -def test_convert_to_zampy(dummy_dir): - """Test function for converting file to zampy format.""" - ingest_folder = Path(data_folder, "era5") - era5.convert_to_zampy( - ingest_folder=Path(dummy_dir), - file=Path(ingest_folder, "era5_10m_v_component_of_wind_1996-1.nc"), - overwrite=True, - ) - - ds = xr.load_dataset(Path(dummy_dir, "era5_10m_v_component_of_wind_1996-1.nc")) - - assert list(ds.data_vars)[0] == "northward_component_of_wind" - - -def test_parse_nc_file_10m_wind(): - """Test parsing netcdf file function with 10 meter velocity u/v component.""" - variables = { - "10m_v_component_of_wind": "northward_component_of_wind", - "10m_u_component_of_wind": "eastward_component_of_wind", - } - for variable in variables: - ds = era5.parse_nc_file(data_folder / "era5" / f"era5_{variable}_1996-1.nc") - expected_var_name = variables[variable] - assert list(ds.data_vars)[0] == expected_var_name - assert ds[expected_var_name].attrs["units"] == "meter_per_second" - - -def test_parse_nc_file_radiation(): - """Test parsing netcdf file function with surface radiation.""" - variables = { - "surface_thermal_radiation_downwards": "strd", - "surface_solar_radiation_downwards": "ssrd", - } - for variable in variables: - ds_original = xr.load_dataset( - data_folder / "era5" / f"era5_{variable}_1996-1.nc" - ) - ds = era5.parse_nc_file(data_folder / "era5" / f"era5_{variable}_1996-1.nc") - - assert list(ds.data_vars)[0] == variable - assert ds[variable].attrs["units"] == "watt_per_square_meter" - assert np.allclose( - ds_original[variables[variable]].values, - ds[variable].values * 3600, - equal_nan=True, - ) - - -def test_parse_nc_file_precipitation(): - """Test parsing netcdf file function with precipitation.""" - ds_original = xr.load_dataset( - data_folder / "era5" / "era5_mean_total_precipitation_rate_1996-1.nc" - ) - ds = era5.parse_nc_file( - data_folder / "era5" / "era5_mean_total_precipitation_rate_1996-1.nc" - ) - expected_var_name = "total_precipitation" - - assert list(ds.data_vars)[0] == expected_var_name - assert ds["total_precipitation"].attrs["units"] == "millimeter_per_second" - assert np.allclose( - ds_original["mtpr"].values, - ds["total_precipitation"].values * era5.WATER_DENSITY / 1000, - equal_nan=True, - ) - - -def test_parse_nc_file_pressure(): - """Test parsing netcdf file function with surface pressure.""" - ds = era5.parse_nc_file(data_folder / "era5" / "era5_surface_pressure_1996-1.nc") - expected_var_name = "surface_pressure" - - assert list(ds.data_vars)[0] == expected_var_name - assert ds["surface_pressure"].attrs["units"] == "pascal" diff --git a/tests/test_datasets/test_era5_land.py b/tests/test_datasets/test_era5_land.py new file mode 100644 index 0000000..db00847 --- /dev/null +++ b/tests/test_datasets/test_era5_land.py @@ -0,0 +1,143 @@ +"""Unit test for ERA5-land dataset.""" + +import json +from pathlib import Path +from unittest.mock import patch +import numpy as np +import pytest +import xarray as xr +from zampy.datasets.catalog import ERA5Land +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds +from . import data_folder + + +@pytest.fixture(scope="function") +def valid_path_cds(tmp_path_factory): + """Create a dummy .cdsapirc file.""" + fn = tmp_path_factory.mktemp("usrhome") / ".cdsapirc" + with open(fn, mode="w", encoding="utf-8") as f: + f.write("url: a\nkey: 123:abc-def") + return fn + + +@pytest.fixture(scope="function") +def dummy_dir(tmp_path_factory): + """Create a dummpy directory for testing.""" + return tmp_path_factory.mktemp("data") + + +class TestERA5Land: + """Test the ERA5Land class.""" + + @patch("cdsapi.Client.retrieve") + def test_download(self, mock_retrieve, valid_path_cds, dummy_dir): + """Test download functionality. + Here we mock the downloading and save property file to a fake path. + """ + times = TimeBounds(np.datetime64("2010-01-01"), np.datetime64("2010-01-31")) + bbox = SpatialBounds(54, 56, 1, 3) + variable = ["dewpoint_temperature"] + cds_var_names = ["2m_dewpoint_temperature"] + download_dir = Path(dummy_dir, "download") + + era5_land_dataset = ERA5Land() + # create a dummy .cdsapirc + patching = patch("zampy.datasets.utils.CDSAPI_CONFIG_PATH", valid_path_cds) + with patching: + era5_land_dataset.download( + download_dir=download_dir, + time_bounds=times, + spatial_bounds=bbox, + variable_names=variable, + overwrite=True, + ) + + # make sure that the download is called + mock_retrieve.assert_called_once_with( + "reanalysis-era5-land", + { + "product_type": "reanalysis", + "variable": cds_var_names, + "year": "2010", + "month": "1", + # fmt: off + "day": [ + "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", + "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", + "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", + "31", + ], + "time": [ + "00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", + "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", + "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", + "21:00", "22:00", "23:00", + ], + # fmt: on + "area": [ + bbox.north, + bbox.west, + bbox.south, + bbox.east, + ], + "format": "netcdf", + }, + ) + + # check property file + with (download_dir / "era5-land" / "properties.json").open( + mode="r", encoding="utf-8" + ) as file: + json_dict = json.load(file) + # check property + assert json_dict["variable_names"] == variable + + def ingest_dummy_data(self, temp_dir): + """Ingest dummy tif data to nc for other tests.""" + era5_land_dataset = ERA5Land() + era5_land_dataset.ingest(download_dir=data_folder, ingest_dir=Path(temp_dir)) + ds = xr.load_dataset( + Path( + temp_dir, + "era5-land", + "era5-land_dewpoint_temperature_1996-1.nc", + ) + ) + + return ds, era5_land_dataset + + def test_ingest(self, dummy_dir): + """Test ingest function.""" + ds, _ = self.ingest_dummy_data(dummy_dir) + assert isinstance(ds, xr.Dataset) + + def test_load(self): + """Test load function.""" + times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-01-02")) + bbox = SpatialBounds(39, -107, 37, -109) + variable = ["dewpoint_temperature"] + + era5_land_dataset = ERA5Land() + + ds = era5_land_dataset.load( + ingest_dir=Path(data_folder), + time_bounds=times, + spatial_bounds=bbox, + variable_names=variable, + resolution=1.0, + regrid_method="flox", + ) + + # we assert the regridded coordinates + expected_lat = [37.0, 38.0, 39.0] + expected_lon = [-109.0, -108.0, -107.0] + + np.testing.assert_allclose(ds.latitude.values, expected_lat) + np.testing.assert_allclose(ds.longitude.values, expected_lon) + + def test_convert(self, dummy_dir): + """Test convert function.""" + _, era5_land_dataset = self.ingest_dummy_data(dummy_dir) + era5_land_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") + # TODO: finish this test when the function is complete. diff --git a/tests/test_recipes/generate_test_data.py b/tests/test_recipes/generate_test_data.py index 46b9b96..016c6cf 100644 --- a/tests/test_recipes/generate_test_data.py +++ b/tests/test_recipes/generate_test_data.py @@ -46,8 +46,8 @@ def generate_era5_file( ERA5_LOOKUP = { # name: (unit, fname) - "10m_u_component_of_wind": ("m s**-1", "u10"), - "10m_v_component_of_wind": ("m s**-1", "v10"), + "eastward_component_of_wind": ("m s**-1", "u10"), + "northward_component_of_wind": ("m s**-1", "v10"), "surface_pressure": ("Pa", "sp"), } diff --git a/tests/test_recipes/recipes/era5_recipe.yml b/tests/test_recipes/recipes/era5_recipe.yml index 576fb15..b925921 100644 --- a/tests/test_recipes/recipes/era5_recipe.yml +++ b/tests/test_recipes/recipes/era5_recipe.yml @@ -7,7 +7,7 @@ download: datasets: era5: variables: - - 10m_v_component_of_wind + - northward_component_of_wind - surface_pressure convert: diff --git a/tests/test_recipes/test_simple_recipe.py b/tests/test_recipes/test_simple_recipe.py index 064c7b9..a709010 100644 --- a/tests/test_recipes/test_simple_recipe.py +++ b/tests/test_recipes/test_simple_recipe.py @@ -26,7 +26,7 @@ def test_recipe(tmp_path: Path, mocker): time_bounds = TimeBounds( np.datetime64("2020-01-01T00:00"), np.datetime64("2020-12-31T23:59") ) - variables = ["10m_v_component_of_wind", "surface_pressure"] + variables = ["northward_component_of_wind", "surface_pressure"] generate_test_data.generate_era5_files( directory=tmp_path / "download", diff --git a/tests/test_utils.py b/tests/test_utils.py index 5239c2c..f7c5a7c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,6 +5,8 @@ from unittest.mock import patch import numpy as np import pytest +import xarray as xr +from test_datasets import data_folder from zampy.datasets import utils from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds @@ -85,7 +87,8 @@ def valid_path_cds(tmp_path_factory): def test_cds_request(mock_retrieve, valid_path_cds): """ "Test cds request for downloading data from CDS server.""" product = "reanalysis-era5-single-levels" - variables = ["10m_v_component_of_wind"] + variables = ["eastward_component_of_wind"] + cds_var_names = {"eastward_component_of_wind": "10m_u_component_of_wind"} time_bounds = TimeBounds( np.datetime64("2010-01-01T00:00:00"), np.datetime64("2010-01-31T23:00:00") ) @@ -97,14 +100,20 @@ def test_cds_request(mock_retrieve, valid_path_cds): patching = patch("zampy.datasets.utils.CDSAPI_CONFIG_PATH", valid_path_cds) with patching: utils.cds_request( - product, variables, time_bounds, spatial_bounds, path, overwrite + product, + variables, + time_bounds, + spatial_bounds, + path, + cds_var_names, + overwrite, ) mock_retrieve.assert_called_with( product, { "product_type": "reanalysis", - "variable": variables, + "variable": ["10m_u_component_of_wind"], "year": "2010", "month": "1", # fmt: off @@ -138,3 +147,104 @@ def test_time_bounds_to_year_month(): expected = [("2010", "1")] year_month_pairs = utils.time_bounds_to_year_month(times) assert expected == year_month_pairs + + +@pytest.fixture(scope="function") +def dummy_dir(tmp_path_factory): + """Create a dummpy directory for testing.""" + return tmp_path_factory.mktemp("data") + + +def test_convert_to_zampy(dummy_dir): + """Test function for converting file to zampy format.""" + ingest_folder = Path(data_folder, "era5") + utils.convert_to_zampy( + ingest_folder=Path(dummy_dir), + file=Path(ingest_folder, "era5_northward_component_of_wind_1996-1.nc"), + overwrite=True, + ) + + ds = xr.load_dataset(Path(dummy_dir, "era5_northward_component_of_wind_1996-1.nc")) + + assert list(ds.data_vars)[0] == "northward_component_of_wind" + + +def test_parse_nc_file_10m_wind(): + """Test parsing netcdf file function with 10 meter velocity u/v component.""" + variables = ["northward_component_of_wind", "eastward_component_of_wind"] + for variable in variables: + ds = utils.parse_nc_file(data_folder / "era5" / f"era5_{variable}_1996-1.nc") + expected_var_name = variable + assert list(ds.data_vars)[0] == expected_var_name + assert ds[expected_var_name].attrs["units"] == "meter_per_second" + + +def test_parse_nc_file_radiation(): + """Test parsing netcdf file function with surface radiation.""" + variables = { + "surface_thermal_radiation_downwards": "strd", + "surface_solar_radiation_downwards": "ssrd", + } + for variable in variables: + ds_original = xr.load_dataset( + data_folder / "era5" / f"era5_{variable}_1996-1.nc" + ) + ds = utils.parse_nc_file(data_folder / "era5" / f"era5_{variable}_1996-1.nc") + + assert list(ds.data_vars)[0] == variable + assert ds[variable].attrs["units"] == "watt_per_square_meter" + assert np.allclose( + ds_original[variables[variable]].values, + ds[variable].values * 3600, + equal_nan=True, + ) + + +def test_parse_nc_file_precipitation(): + """Test parsing netcdf file function with precipitation.""" + ds_original = xr.load_dataset( + data_folder / "era5" / "era5_total_precipitation_1996-1.nc" + ) + ds = utils.parse_nc_file( + data_folder / "era5" / "era5_total_precipitation_1996-1.nc" + ) + expected_var_name = "total_precipitation" + + assert list(ds.data_vars)[0] == expected_var_name + assert ds["total_precipitation"].attrs["units"] == "millimeter_per_second" + assert np.allclose( + ds_original["mtpr"].values, + ds["total_precipitation"].values * utils.WATER_DENSITY / 1000, + equal_nan=True, + ) + + +def test_parse_nc_file_pressure(): + """Test parsing netcdf file function with surface pressure.""" + ds = utils.parse_nc_file(data_folder / "era5" / "era5_surface_pressure_1996-1.nc") + expected_var_name = "surface_pressure" + + assert list(ds.data_vars)[0] == expected_var_name + assert ds["surface_pressure"].attrs["units"] == "pascal" + + +def test_parse_nc_file_air_temperature(): + """Test parsing netcdf file function with 2 meter temperature.""" + ds = utils.parse_nc_file( + data_folder / "era5-land" / "era5-land_air_temperature_1996-1.nc" + ) + expected_var_name = "air_temperature" + + assert list(ds.data_vars)[0] == expected_var_name + assert ds["air_temperature"].attrs["units"] == "kelvin" + + +def test_parse_nc_file_dew_temperature(): + """Test parsing netcdf file function with 2 meter dewpoint temperature.""" + ds = utils.parse_nc_file( + data_folder / "era5-land" / "era5-land_dewpoint_temperature_1996-1.nc" + ) + expected_var_name = "dewpoint_temperature" + + assert list(ds.data_vars)[0] == expected_var_name + assert ds["dewpoint_temperature"].attrs["units"] == "kelvin" diff --git a/tests/test_validation.py b/tests/test_validation.py index 9e35ba2..819e6f5 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -2,8 +2,8 @@ import numpy as np import pytest -from zampy.datasets import EthCanopyHeight from zampy.datasets import validation +from zampy.datasets.catalog import EthCanopyHeight from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds