Skip to content

Commit

Permalink
implement consistent zampy name for ecmwf datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
Yang committed Aug 15, 2023
1 parent e483b85 commit 92435ac
Show file tree
Hide file tree
Showing 16 changed files with 107 additions and 74 deletions.
4 changes: 2 additions & 2 deletions demo/era5-land_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@
" download_dir=download_dir,\n",
" time_bounds=times,\n",
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"2m_temperature\", \"2m_dewpoint_temperature\"],\n",
" variable_names=[\"air_temperature\", \"dewpoint_temperature\"],\n",
")"
]
},
Expand Down Expand Up @@ -147,7 +147,7 @@
" ingest_dir=ingest_dir,\n",
" time_bounds=times,\n",
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"2m_temperature\", \"2m_dewpoint_temperature\"],\n",
" variable_names=[\"air_temperature\", \"dewpoint_temperature\"],\n",
" resolution=1.0,\n",
" regrid_method=\"flox\",\n",
")"
Expand Down
94 changes: 59 additions & 35 deletions demo/era5_dataset_demo.ipynb

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions src/zampy/datasets/ecmwf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def download(
download_dir,
time_bounds,
spatial_bounds,
[self.cds_var_names[var] for var in self.cds_var_names.keys()],
variable_names,
)

download_folder = download_dir / self.name
Expand All @@ -72,6 +72,7 @@ def download(
time_bounds=time_bounds,
spatial_bounds=spatial_bounds,
path=download_folder,
cds_var_names=self.cds_var_names,
overwrite=overwrite,
)

Expand Down Expand Up @@ -115,7 +116,7 @@ def load(
variable_names: list[str],
) -> xr.Dataset:
files: list[Path] = []
for var in self.cds_var_names.keys():
for var in self.variable_names:
if var in variable_names:
files += (ingest_dir / self.name).glob(f"{self.name}_{var}*.nc")

Expand Down
14 changes: 7 additions & 7 deletions src/zampy/datasets/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ class ERA5(ECMWFDataset): # noqa: D101

# variable names used in cdsapi downloading request
cds_var_names = {
"mean_total_precipitation_rate": "total_precipitation",
"total_precipitation": "mean_total_precipitation_rate",
"surface_thermal_radiation_downwards": "surface_thermal_radiation_downwards",
"surface_solar_radiation_downwards": "surface_solar_radiation_downwards",
"surface_pressure": "surface_pressure",
"10m_u_component_of_wind": "eastward_component_of_wind",
"10m_v_component_of_wind": "northward_component_of_wind",
"eastward_component_of_wind": "10m_u_component_of_wind",
"northward_component_of_wind": "10m_v_component_of_wind",
}

variable_names = list(cds_var_names.values())
variable_names = list(cds_var_names.keys())

variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names]

Expand All @@ -49,11 +49,11 @@ class ERA5Land(ECMWFDataset): # noqa: D101

# variable names used in cdsapi downloading request
cds_var_names = {
"2m_temperature": "air_temperature",
"2m_dewpoint_temperature": "dewpoint_temperature",
"air_temperature": "2m_temperature",
"dewpoint_temperature": "2m_dewpoint_temperature",
}

variable_names = list(cds_var_names.values())
variable_names = list(cds_var_names.keys())

variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names]

Expand Down
6 changes: 4 additions & 2 deletions src/zampy/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def cds_request(
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
path: Path,
cds_var_names: dict[str, str],
overwrite: bool,
) -> None:
"""Download data via CDS API.
Expand All @@ -91,10 +92,11 @@ def cds_request(
Args:
dataset: Dataset name for retrieval via `cdsapi`.
variables: Zampy variable.
variables: Zampy variables.
time_bounds: Zampy time bounds object.
spatial_bounds: Zampy spatial bounds object.
path: File path to which the data should be saved.
cds_var_names: Variable names from CDS server side.
overwrite: If an existing file (of the same size!) should be overwritten.
"""
fname = PRODUCT_FNAME[dataset]
Expand All @@ -121,7 +123,7 @@ def cds_request(
dataset,
{
"product_type": "reanalysis",
"variable": [variable],
"variable": [cds_var_names[variable]],
"year": year,
"month": month,
# fmt: off
Expand Down
9 changes: 5 additions & 4 deletions tests/test_datasets/test_era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def test_download(self, mock_retrieve, valid_path_cds, dummy_dir):
"""
times = TimeBounds(np.datetime64("2010-01-01"), np.datetime64("2010-01-31"))
bbox = SpatialBounds(54, 56, 1, 3)
variable = ["10m_v_component_of_wind"]
variable = ["eastward_component_of_wind"]
cds_var_names = ["10m_u_component_of_wind"]
download_dir = Path(dummy_dir, "download")

era5_dataset = era5.ERA5()
Expand All @@ -57,7 +58,7 @@ def test_download(self, mock_retrieve, valid_path_cds, dummy_dir):
"reanalysis-era5-single-levels",
{
"product_type": "reanalysis",
"variable": variable,
"variable": cds_var_names,
"year": "2010",
"month": "1",
# fmt: off
Expand Down Expand Up @@ -100,7 +101,7 @@ def ingest_dummy_data(self, temp_dir):
Path(
temp_dir,
"era5",
"era5_10m_v_component_of_wind_1996-1.nc",
"era5_northward_component_of_wind_1996-1.nc",
)
)

Expand All @@ -115,7 +116,7 @@ def test_load(self):
"""Test load function."""
times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-01-02"))
bbox = SpatialBounds(39, -107, 37, -109)
variable = ["10m_v_component_of_wind"]
variable = ["northward_component_of_wind"]

era5_dataset = era5.ERA5()

Expand Down
9 changes: 5 additions & 4 deletions tests/test_datasets/test_era5_land.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def test_download(self, mock_retrieve, valid_path_cds, dummy_dir):
"""
times = TimeBounds(np.datetime64("2010-01-01"), np.datetime64("2010-01-31"))
bbox = SpatialBounds(54, 56, 1, 3)
variable = ["2m_dewpoint_temperature"]
variable = ["dewpoint_temperature"]
cds_var_names = ["2m_dewpoint_temperature"]
download_dir = Path(dummy_dir, "download")

era5_land_dataset = ERA5Land()
Expand All @@ -57,7 +58,7 @@ def test_download(self, mock_retrieve, valid_path_cds, dummy_dir):
"reanalysis-era5-land",
{
"product_type": "reanalysis",
"variable": variable,
"variable": cds_var_names,
"year": "2010",
"month": "1",
# fmt: off
Expand Down Expand Up @@ -100,7 +101,7 @@ def ingest_dummy_data(self, temp_dir):
Path(
temp_dir,
"era5-land",
"era5-land_2m_dewpoint_temperature_1996-1.nc",
"era5-land_dewpoint_temperature_1996-1.nc",
)
)

Expand All @@ -115,7 +116,7 @@ def test_load(self):
"""Test load function."""
times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-01-02"))
bbox = SpatialBounds(39, -107, 37, -109)
variable = ["2m_dewpoint_temperature"]
variable = ["dewpoint_temperature"]

era5_land_dataset = ERA5Land()

Expand Down
4 changes: 2 additions & 2 deletions tests/test_recipes/generate_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def generate_era5_file(


ERA5_LOOKUP = { # name: (unit, fname)
"10m_u_component_of_wind": ("m s**-1", "u10"),
"10m_v_component_of_wind": ("m s**-1", "v10"),
"eastward_component_of_wind": ("m s**-1", "u10"),
"northward_component_of_wind": ("m s**-1", "v10"),
"surface_pressure": ("Pa", "sp"),
}

Expand Down
2 changes: 1 addition & 1 deletion tests/test_recipes/recipes/era5_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ download:
datasets:
era5:
variables:
- 10m_v_component_of_wind
- northward_component_of_wind
- surface_pressure

convert:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_recipes/test_simple_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_recipe(tmp_path: Path, mocker):
time_bounds = TimeBounds(
np.datetime64("2020-01-01T00:00"), np.datetime64("2020-12-31T23:59")
)
variables = ["10m_v_component_of_wind", "surface_pressure"]
variables = ["northward_component_of_wind", "surface_pressure"]

generate_test_data.generate_era5_files(
directory=tmp_path / "download",
Expand Down
32 changes: 18 additions & 14 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def valid_path_cds(tmp_path_factory):
def test_cds_request(mock_retrieve, valid_path_cds):
""" "Test cds request for downloading data from CDS server."""
product = "reanalysis-era5-single-levels"
variables = ["10m_v_component_of_wind"]
variables = ["eastward_component_of_wind"]
cds_var_names = {"eastward_component_of_wind": "10m_u_component_of_wind"}
time_bounds = TimeBounds(
np.datetime64("2010-01-01T00:00:00"), np.datetime64("2010-01-31T23:00:00")
)
Expand All @@ -99,14 +100,20 @@ def test_cds_request(mock_retrieve, valid_path_cds):
patching = patch("zampy.datasets.utils.CDSAPI_CONFIG_PATH", valid_path_cds)
with patching:
utils.cds_request(
product, variables, time_bounds, spatial_bounds, path, overwrite
product,
variables,
time_bounds,
spatial_bounds,
path,
cds_var_names,
overwrite,
)

mock_retrieve.assert_called_with(
product,
{
"product_type": "reanalysis",
"variable": variables,
"variable": ["10m_u_component_of_wind"],
"year": "2010",
"month": "1",
# fmt: off
Expand Down Expand Up @@ -153,24 +160,21 @@ def test_convert_to_zampy(dummy_dir):
ingest_folder = Path(data_folder, "era5")
utils.convert_to_zampy(
ingest_folder=Path(dummy_dir),
file=Path(ingest_folder, "era5_10m_v_component_of_wind_1996-1.nc"),
file=Path(ingest_folder, "era5_northward_component_of_wind_1996-1.nc"),
overwrite=True,
)

ds = xr.load_dataset(Path(dummy_dir, "era5_10m_v_component_of_wind_1996-1.nc"))
ds = xr.load_dataset(Path(dummy_dir, "era5_northward_component_of_wind_1996-1.nc"))

assert list(ds.data_vars)[0] == "northward_component_of_wind"


def test_parse_nc_file_10m_wind():
"""Test parsing netcdf file function with 10 meter velocity u/v component."""
variables = {
"10m_v_component_of_wind": "northward_component_of_wind",
"10m_u_component_of_wind": "eastward_component_of_wind",
}
variables = ["northward_component_of_wind", "eastward_component_of_wind"]
for variable in variables:
ds = utils.parse_nc_file(data_folder / "era5" / f"era5_{variable}_1996-1.nc")
expected_var_name = variables[variable]
expected_var_name = variable
assert list(ds.data_vars)[0] == expected_var_name
assert ds[expected_var_name].attrs["units"] == "meter_per_second"

Expand Down Expand Up @@ -199,10 +203,10 @@ def test_parse_nc_file_radiation():
def test_parse_nc_file_precipitation():
"""Test parsing netcdf file function with precipitation."""
ds_original = xr.load_dataset(
data_folder / "era5" / "era5_mean_total_precipitation_rate_1996-1.nc"
data_folder / "era5" / "era5_total_precipitation_1996-1.nc"
)
ds = utils.parse_nc_file(
data_folder / "era5" / "era5_mean_total_precipitation_rate_1996-1.nc"
data_folder / "era5" / "era5_total_precipitation_1996-1.nc"
)
expected_var_name = "total_precipitation"

Expand All @@ -227,7 +231,7 @@ def test_parse_nc_file_pressure():
def test_parse_nc_file_air_temperature():
"""Test parsing netcdf file function with 2 meter temperature."""
ds = utils.parse_nc_file(
data_folder / "era5-land" / "era5-land_2m_temperature_1996-1.nc"
data_folder / "era5-land" / "era5-land_air_temperature_1996-1.nc"
)
expected_var_name = "air_temperature"

Expand All @@ -238,7 +242,7 @@ def test_parse_nc_file_air_temperature():
def test_parse_nc_file_dew_temperature():
"""Test parsing netcdf file function with 2 meter dewpoint temperature."""
ds = utils.parse_nc_file(
data_folder / "era5-land" / "era5-land_2m_dewpoint_temperature_1996-1.nc"
data_folder / "era5-land" / "era5-land_dewpoint_temperature_1996-1.nc"
)
expected_var_name = "dewpoint_temperature"

Expand Down

0 comments on commit 92435ac

Please sign in to comment.