-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add era5 land dataset * add demo notebook * import era5 land in init * update alma convention * move shared parser function to utils * add tests for utils * add test for era5 land dataset * remove unneccesary duplication check * Revert "remove unneccesary duplication check" This reverts commit d2242b6. * era5land inherit from era5 * make a general ecmwf dataset and inherit for era5(land) * please mypy * move era5 land to era5.py * drop py3.8, fix circular import and please mypy with proper protocol usage * fix dataset import in notebooks * implement consistent zampy name for ecmwf datasets
- Loading branch information
Showing
28 changed files
with
2,048 additions
and
398 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,7 @@ | ||
"""Datasets implementations.""" | ||
from zampy.datasets import dataset_protocol | ||
from zampy.datasets import validation | ||
from zampy.datasets.era5 import ERA5 | ||
from zampy.datasets.eth_canopy_height import EthCanopyHeight | ||
from zampy.datasets.catalog import DATASETS | ||
|
||
|
||
__all__ = ["dataset_protocol", "validation", "EthCanopyHeight", "ERA5"] | ||
|
||
|
||
# This object tracks which datasets are available. | ||
DATASETS: dict[str, type[dataset_protocol.Dataset]] = { | ||
# All lowercase key. | ||
"era5": ERA5, | ||
"eth_canopy_height": EthCanopyHeight, | ||
} | ||
__all__ = ["dataset_protocol", "validation", "DATASETS"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
"""Catalog of datasets.""" | ||
from zampy.datasets import dataset_protocol | ||
from zampy.datasets.era5 import ERA5 | ||
from zampy.datasets.era5 import ERA5Land | ||
from zampy.datasets.eth_canopy_height import EthCanopyHeight | ||
|
||
|
||
# This object tracks which datasets are available. | ||
DATASETS: dict[str, type[dataset_protocol.Dataset]] = { | ||
# All lowercase key. | ||
"era5": ERA5, | ||
"era5_land": ERA5Land, | ||
"eth_canopy_height": EthCanopyHeight, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
"""Base module for datasets available on CDS.""" | ||
|
||
from pathlib import Path | ||
from typing import Union | ||
import xarray as xr | ||
from zampy.datasets import converter | ||
from zampy.datasets import utils | ||
from zampy.datasets import validation | ||
from zampy.datasets.dataset_protocol import SpatialBounds | ||
from zampy.datasets.dataset_protocol import TimeBounds | ||
from zampy.datasets.dataset_protocol import Variable | ||
from zampy.datasets.dataset_protocol import copy_properties_file | ||
from zampy.datasets.dataset_protocol import write_properties_file | ||
from zampy.utils import regrid | ||
|
||
|
||
## Ignore missing class/method docstrings: they are implemented in the Dataset class. | ||
# ruff: noqa: D102 | ||
|
||
|
||
class ECMWFDataset: # noqa: D101 | ||
name: str | ||
time_bounds: TimeBounds | ||
spatial_bounds = SpatialBounds(90, 180, -90, -180) | ||
crs = "EPSG:4326" | ||
|
||
raw_variables: list[Variable] | ||
cds_var_names: dict[str, str] | ||
variable_names: list[str] | ||
variables: list[Variable] | ||
license = "cc-by-4.0" | ||
bib = """ | ||
@article{hersbach2020era5, | ||
title={The ERA5 global reanalysis}, | ||
author={Hersbach, Hans et al.}, | ||
journal={Quarterly Journal of the Royal Meteorological Society}, | ||
volume={146}, | ||
number={730}, | ||
pages={1999--2049}, | ||
year={2020}, | ||
publisher={Wiley Online Library} | ||
} | ||
""" | ||
cds_dataset: str | ||
|
||
def __init__(self) -> None: | ||
"""Init.""" | ||
pass | ||
|
||
def download( | ||
self, | ||
download_dir: Path, | ||
time_bounds: TimeBounds, | ||
spatial_bounds: SpatialBounds, | ||
variable_names: list[str], | ||
overwrite: bool = False, | ||
) -> bool: | ||
validation.validate_download_request( | ||
self, | ||
download_dir, | ||
time_bounds, | ||
spatial_bounds, | ||
variable_names, | ||
) | ||
|
||
download_folder = download_dir / self.name | ||
download_folder.mkdir(parents=True, exist_ok=True) | ||
|
||
utils.cds_request( | ||
dataset=self.cds_dataset, | ||
variables=variable_names, | ||
time_bounds=time_bounds, | ||
spatial_bounds=spatial_bounds, | ||
path=download_folder, | ||
cds_var_names=self.cds_var_names, | ||
overwrite=overwrite, | ||
) | ||
|
||
write_properties_file( | ||
download_folder, spatial_bounds, time_bounds, variable_names | ||
) | ||
|
||
return True | ||
|
||
def ingest( | ||
self, | ||
download_dir: Path, | ||
ingest_dir: Path, | ||
overwrite: bool = False, | ||
) -> bool: | ||
download_folder = download_dir / self.name | ||
ingest_folder = ingest_dir / self.name | ||
ingest_folder.mkdir(parents=True, exist_ok=True) | ||
|
||
data_file_pattern = f"{self.name}_*.nc" | ||
data_files = list(download_folder.glob(data_file_pattern)) | ||
|
||
for file in data_files: | ||
utils.convert_to_zampy( | ||
ingest_folder, | ||
file=file, | ||
overwrite=overwrite, | ||
) | ||
|
||
copy_properties_file(download_folder, ingest_folder) | ||
|
||
return True | ||
|
||
def load( | ||
self, | ||
ingest_dir: Path, | ||
time_bounds: TimeBounds, | ||
spatial_bounds: SpatialBounds, | ||
resolution: float, | ||
regrid_method: str, | ||
variable_names: list[str], | ||
) -> xr.Dataset: | ||
files: list[Path] = [] | ||
for var in self.variable_names: | ||
if var in variable_names: | ||
files += (ingest_dir / self.name).glob(f"{self.name}_{var}*.nc") | ||
|
||
ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) | ||
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) | ||
ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method) | ||
|
||
return ds | ||
|
||
def convert( | ||
self, | ||
ingest_dir: Path, | ||
convention: Union[str, Path], | ||
) -> bool: | ||
converter.check_convention(convention) | ||
ingest_folder = ingest_dir / self.name | ||
|
||
data_file_pattern = f"{self.name}_*.nc" | ||
|
||
data_files = list(ingest_folder.glob(data_file_pattern)) | ||
|
||
for file in data_files: | ||
# start conversion process | ||
print(f"Start processing file `{file.name}`.") | ||
ds = xr.open_dataset(file, chunks={"x": 50, "y": 50}) | ||
ds = converter.convert(ds, dataset=self, convention=convention) | ||
# TODO: support derived variables | ||
# TODO: other calculations | ||
# call ds.compute() | ||
|
||
return True |
Oops, something went wrong.