Skip to content

Commit

Permalink
Implement era5 land (#21)
Browse files Browse the repository at this point in the history
* add era5 land dataset

* add demo notebook

* import era5 land in init

* update alma convention

* move shared parser function to utils

* add tests for utils

* add test for era5 land dataset

* remove unneccesary duplication check

* Revert "remove unneccesary duplication check"

This reverts commit d2242b6.

* era5land inherit from era5

* make a general ecmwf dataset and inherit for era5(land)

* please mypy

* move era5 land to era5.py

* drop py3.8, fix circular import and please mypy with proper protocol usage

* fix dataset import in notebooks

* implement consistent zampy name for ecmwf datasets
  • Loading branch information
geek-yang authored Aug 15, 2023
1 parent 73f36c2 commit 08a7ee4
Show file tree
Hide file tree
Showing 28 changed files with 2,048 additions and 398 deletions.
1,388 changes: 1,388 additions & 0 deletions demo/era5-land_dataset_demo.ipynb

Large diffs are not rendered by default.

96 changes: 60 additions & 36 deletions demo/era5_dataset_demo.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions demo/eth_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"outputs": [],
"source": [
"import numpy as np\n",
"from zampy.datasets import EthCanopyHeight\n",
"from zampy.datasets.catalog import EthCanopyHeight\n",
"from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n",
"from pathlib import Path\n",
"\n",
Expand Down Expand Up @@ -2660,7 +2660,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.0"
},
"orig_nbformat": 4
},
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
dependencies = [
"requests",
Expand Down Expand Up @@ -135,10 +136,11 @@ testpaths = ["tests"]
[tool.mypy]
ignore_missing_imports = true
disallow_untyped_defs = true
python_version = "3.9"

[tool.black]
line-length = 88
target-version = ['py38', 'py39', 'py310']
target-version = ['py39', 'py310', 'py311']
include = '\.pyi?$'

[tool.ruff]
Expand Down Expand Up @@ -169,7 +171,7 @@ line-length = 88
exclude = ["docs", "build"]
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
target-version = "py38"
target-version = "py39"

[tool.ruff.per-file-ignores]
"tests/**" = ["D"]
Expand Down
8 changes: 8 additions & 0 deletions src/zampy/conventions/ALMA.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,13 @@
"total_precipitation": {
"variable": "Rainf",
"units": "millimeter/second"
},
"air_temperature": {
"variable": "Tair",
"units": "kelvin"
},
"dewpoint_temperature": {
"variable": "d2m",
"units": "kelvin"
}
}
13 changes: 2 additions & 11 deletions src/zampy/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
"""Datasets implementations."""
from zampy.datasets import dataset_protocol
from zampy.datasets import validation
from zampy.datasets.era5 import ERA5
from zampy.datasets.eth_canopy_height import EthCanopyHeight
from zampy.datasets.catalog import DATASETS


__all__ = ["dataset_protocol", "validation", "EthCanopyHeight", "ERA5"]


# This object tracks which datasets are available.
DATASETS: dict[str, type[dataset_protocol.Dataset]] = {
# All lowercase key.
"era5": ERA5,
"eth_canopy_height": EthCanopyHeight,
}
__all__ = ["dataset_protocol", "validation", "DATASETS"]
14 changes: 14 additions & 0 deletions src/zampy/datasets/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Catalog of datasets."""
from zampy.datasets import dataset_protocol
from zampy.datasets.era5 import ERA5
from zampy.datasets.era5 import ERA5Land
from zampy.datasets.eth_canopy_height import EthCanopyHeight


# This object tracks which datasets are available.
DATASETS: dict[str, type[dataset_protocol.Dataset]] = {
# All lowercase key.
"era5": ERA5,
"era5_land": ERA5Land,
"eth_canopy_height": EthCanopyHeight,
}
21 changes: 7 additions & 14 deletions src/zampy/datasets/dataset_protocol.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
"""Outline of the dataset protocol."""
import json
import shutil
from abc import abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from typing import List
from typing import Optional
from typing import Protocol
from typing import Tuple
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -79,21 +76,20 @@ class Dataset(Protocol):
crs: str
license: str
bib: str
raw_variables: Tuple[Variable, ...]
variable_names: Tuple[str, ...]
variables: Tuple[Variable, ...]
raw_variables: list[Variable]
variable_names: list[str]
variables: list[Variable]

def __init__(self) -> None:
"""Init."""
...

@abstractmethod
def download(
self,
download_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
variable_names: List[str],
variable_names: list[str],
overwrite: bool = False,
) -> bool:
"""Download the data.
Expand All @@ -111,7 +107,6 @@ def download(
"""
...

@abstractmethod
def ingest(
self,
download_dir: Path,
Expand All @@ -130,15 +125,14 @@ def ingest(
"""
...

@abstractmethod
def load(
self,
ingest_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: List[str],
variable_names: list[str],
) -> xr.Dataset:
"""Get the dataset as an xarray Dataset.
Expand All @@ -160,7 +154,6 @@ def load(
"""
...

@abstractmethod
def convert(
self,
ingest_dir: Path,
Expand All @@ -182,7 +175,7 @@ def write_properties_file(
dataset_folder: Path,
spatial_bounds: SpatialBounds,
time_bounds: TimeBounds,
variable_names: List[str],
variable_names: list[str],
) -> None:
"""Write the (serialized) spatial and time bounds to a json file.
Expand Down Expand Up @@ -211,7 +204,7 @@ def write_properties_file(

def read_properties_file(
dataset_folder: Path,
) -> Tuple[SpatialBounds, TimeBounds, List[str]]:
) -> tuple[SpatialBounds, TimeBounds, list[str]]:
"""Load the serialized spatial and time bounds from the json file.
Args:
Expand Down
150 changes: 150 additions & 0 deletions src/zampy/datasets/ecmwf_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""Base module for datasets available on CDS."""

from pathlib import Path
from typing import Union
import xarray as xr
from zampy.datasets import converter
from zampy.datasets import utils
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
from zampy.datasets.dataset_protocol import TimeBounds
from zampy.datasets.dataset_protocol import Variable
from zampy.datasets.dataset_protocol import copy_properties_file
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.utils import regrid


## Ignore missing class/method docstrings: they are implemented in the Dataset class.
# ruff: noqa: D102


class ECMWFDataset: # noqa: D101
name: str
time_bounds: TimeBounds
spatial_bounds = SpatialBounds(90, 180, -90, -180)
crs = "EPSG:4326"

raw_variables: list[Variable]
cds_var_names: dict[str, str]
variable_names: list[str]
variables: list[Variable]
license = "cc-by-4.0"
bib = """
@article{hersbach2020era5,
title={The ERA5 global reanalysis},
author={Hersbach, Hans et al.},
journal={Quarterly Journal of the Royal Meteorological Society},
volume={146},
number={730},
pages={1999--2049},
year={2020},
publisher={Wiley Online Library}
}
"""
cds_dataset: str

def __init__(self) -> None:
"""Init."""
pass

def download(
self,
download_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
variable_names: list[str],
overwrite: bool = False,
) -> bool:
validation.validate_download_request(
self,
download_dir,
time_bounds,
spatial_bounds,
variable_names,
)

download_folder = download_dir / self.name
download_folder.mkdir(parents=True, exist_ok=True)

utils.cds_request(
dataset=self.cds_dataset,
variables=variable_names,
time_bounds=time_bounds,
spatial_bounds=spatial_bounds,
path=download_folder,
cds_var_names=self.cds_var_names,
overwrite=overwrite,
)

write_properties_file(
download_folder, spatial_bounds, time_bounds, variable_names
)

return True

def ingest(
self,
download_dir: Path,
ingest_dir: Path,
overwrite: bool = False,
) -> bool:
download_folder = download_dir / self.name
ingest_folder = ingest_dir / self.name
ingest_folder.mkdir(parents=True, exist_ok=True)

data_file_pattern = f"{self.name}_*.nc"
data_files = list(download_folder.glob(data_file_pattern))

for file in data_files:
utils.convert_to_zampy(
ingest_folder,
file=file,
overwrite=overwrite,
)

copy_properties_file(download_folder, ingest_folder)

return True

def load(
self,
ingest_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: list[str],
) -> xr.Dataset:
files: list[Path] = []
for var in self.variable_names:
if var in variable_names:
files += (ingest_dir / self.name).glob(f"{self.name}_{var}*.nc")

ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200})
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))
ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method)

return ds

def convert(
self,
ingest_dir: Path,
convention: Union[str, Path],
) -> bool:
converter.check_convention(convention)
ingest_folder = ingest_dir / self.name

data_file_pattern = f"{self.name}_*.nc"

data_files = list(ingest_folder.glob(data_file_pattern))

for file in data_files:
# start conversion process
print(f"Start processing file `{file.name}`.")
ds = xr.open_dataset(file, chunks={"x": 50, "y": 50})
ds = converter.convert(ds, dataset=self, convention=convention)
# TODO: support derived variables
# TODO: other calculations
# call ds.compute()

return True
Loading

0 comments on commit 08a7ee4

Please sign in to comment.