From cfdf9afa601bf5c1a530aaceceae0d3566d783a8 Mon Sep 17 00:00:00 2001 From: veronikasamborska1994 <32176660+veronikasamborska1994@users.noreply.github.com> Date: Wed, 7 Aug 2024 19:50:59 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8A=20climate:=20era-5=20august=20upda?= =?UTF-8?q?te=20(#3081)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dag/climate.yml | 2 +- .../climate/2023-12-20/surface_temperature.py | 10 ++- poetry.lock | 66 ++++++++++++++---- pyproject.toml | 1 + .../2024-08-06/surface_temperature.gz.dvc | 27 ++++++++ .../climate/2024-08-06/surface_temperature.py | 69 +++++++++++++++++++ 6 files changed, 159 insertions(+), 16 deletions(-) create mode 100644 snapshots/climate/2024-08-06/surface_temperature.gz.dvc create mode 100644 snapshots/climate/2024-08-06/surface_temperature.py diff --git a/dag/climate.yml b/dag/climate.yml index 20024b04172..d4873bed521 100644 --- a/dag/climate.yml +++ b/dag/climate.yml @@ -32,7 +32,7 @@ steps: # Copernicus Climate Change Service - Surface temperature. # data://meadow/climate/2023-12-20/surface_temperature: - - snapshot://climate/2024-07-08/surface_temperature.gz + - snapshot://climate/2024-08-06/surface_temperature.gz - snapshot://countries/2023-12-27/world_bank.zip data://garden/climate/2023-12-20/surface_temperature: - data://meadow/climate/2023-12-20/surface_temperature diff --git a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py index c78e623cb4e..c290d0c420b 100644 --- a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py +++ b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py @@ -1,5 +1,7 @@ """Load a snapshot and create a meadow dataset.""" + import gzip +import io import zipfile import geopandas as gpd @@ -25,8 +27,12 @@ def _load_data_array(snap: Snapshot) -> xr.DataArray: log.info("load_data_array.start") # Load data from snapshot. - with gzip.open(snap.path, "r") as _file: - ds = xr.open_dataset(_file) + with gzip.open(snap.path, "rb") as file: + file_content = file.read() + + # Create an in-memory bytes file and load the dataset + with io.BytesIO(file_content) as memfile: + ds = xr.open_dataset(memfile).load() # .load() ensures data is eagerly loaded # The latest 3 months in this dataset are made available through ERA5T, which is slightly different to ERA5. In the downloaded file, an extra dimenions ‘expver’ indicates which data is ERA5 (expver = 1) and which is ERA5T (expver = 5). # If a value is missing in the first dataset, it is filled with the value from the second dataset. diff --git a/poetry.lock b/poetry.lock index 519671500bf..f59b9dcf30d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "affine" @@ -2200,6 +2200,57 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h5netcdf" +version = "1.3.0" +description = "netCDF4 via h5py" +optional = false +python-versions = ">=3.9" +files = [ + {file = "h5netcdf-1.3.0-py3-none-any.whl", hash = "sha256:f2df69dcd3665dc9c4d43eb6529dedd113b2508090d12ac973573305a8406465"}, + {file = "h5netcdf-1.3.0.tar.gz", hash = "sha256:a171c027daeb34b24c24a3b6304195b8eabbb6f10c748256ed3cfe19806383cf"}, +] + +[package.dependencies] +h5py = "*" +packaging = "*" + +[package.extras] +test = ["netCDF4", "pytest"] + +[[package]] +name = "h5py" +version = "3.11.0" +description = "Read and write HDF5 files from Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "h5py-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1625fd24ad6cfc9c1ccd44a66dac2396e7ee74940776792772819fc69f3a3731"}, + {file = "h5py-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c072655ad1d5fe9ef462445d3e77a8166cbfa5e599045f8aa3c19b75315f10e5"}, + {file = "h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77b19a40788e3e362b54af4dcf9e6fde59ca016db2c61360aa30b47c7b7cef00"}, + {file = "h5py-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:ef4e2f338fc763f50a8113890f455e1a70acd42a4d083370ceb80c463d803972"}, + {file = "h5py-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd732a08187a9e2a6ecf9e8af713f1d68256ee0f7c8b652a32795670fb481ba"}, + {file = "h5py-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75bd7b3d93fbeee40860fd70cdc88df4464e06b70a5ad9ce1446f5f32eb84007"}, + {file = "h5py-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52c416f8eb0daae39dabe71415cb531f95dce2d81e1f61a74537a50c63b28ab3"}, + {file = "h5py-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:083e0329ae534a264940d6513f47f5ada617da536d8dccbafc3026aefc33c90e"}, + {file = "h5py-3.11.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a76cae64080210389a571c7d13c94a1a6cf8cb75153044fd1f822a962c97aeab"}, + {file = "h5py-3.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3736fe21da2b7d8a13fe8fe415f1272d2a1ccdeff4849c1421d2fb30fd533bc"}, + {file = "h5py-3.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa6ae84a14103e8dc19266ef4c3e5d7c00b68f21d07f2966f0ca7bdb6c2761fb"}, + {file = "h5py-3.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:21dbdc5343f53b2e25404673c4f00a3335aef25521bd5fa8c707ec3833934892"}, + {file = "h5py-3.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:754c0c2e373d13d6309f408325343b642eb0f40f1a6ad21779cfa9502209e150"}, + {file = "h5py-3.11.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:731839240c59ba219d4cb3bc5880d438248533366f102402cfa0621b71796b62"}, + {file = "h5py-3.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ec9df3dd2018904c4cc06331951e274f3f3fd091e6d6cc350aaa90fa9b42a76"}, + {file = "h5py-3.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:55106b04e2c83dfb73dc8732e9abad69d83a436b5b82b773481d95d17b9685e1"}, + {file = "h5py-3.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f4e025e852754ca833401777c25888acb96889ee2c27e7e629a19aee288833f0"}, + {file = "h5py-3.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c4b760082626120031d7902cd983d8c1f424cdba2809f1067511ef283629d4b"}, + {file = "h5py-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67462d0669f8f5459529de179f7771bd697389fcb3faab54d63bf788599a48ea"}, + {file = "h5py-3.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:d9c944d364688f827dc889cf83f1fca311caf4fa50b19f009d1f2b525edd33a3"}, + {file = "h5py-3.11.0.tar.gz", hash = "sha256:7b7e8f78072a2edec87c9836f25f34203fd492a4475709a18b417a33cfb21fa9"}, +] + +[package.dependencies] +numpy = ">=1.17.3" + [[package]] name = "htbuilder" version = "0.6.2" @@ -5421,7 +5472,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -5429,16 +5479,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -5455,7 +5497,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -5463,7 +5504,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -8176,4 +8216,4 @@ test = ["pytest", "pytest-cov"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.12" -content-hash = "8e25475cdf1f85361bdc9d2f7ef222912486432b596fbb697e826d07167919a7" +content-hash = "bcba43573783bd547852de8cd97058af55d06df3b06ed7ab4d4e87500aac0820" diff --git a/pyproject.toml b/pyproject.toml index ce2447c657f..28a7708c560 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ pymysql = "^1.1.1" tiktoken = "^0.7.0" earthengine-api = "^0.1.411" python-docx = "^1.1.2" +h5netcdf = "^1.3.0" [tool.poetry.group.api.dependencies] fastapi = "^0.109.0" diff --git a/snapshots/climate/2024-08-06/surface_temperature.gz.dvc b/snapshots/climate/2024-08-06/surface_temperature.gz.dvc new file mode 100644 index 00000000000..fd8137c2dea --- /dev/null +++ b/snapshots/climate/2024-08-06/surface_temperature.gz.dvc @@ -0,0 +1,27 @@ +meta: + origin: + title_snapshot: ERA5 Monthly Averaged Data on Single Levels from 1940 to Present - Monthly Averages of 2m Surface Temperature + title: ERA5 monthly averaged data on single levels from 1940 to present + description: |- + ERA5 is the latest climate reanalysis produced by ECMWF, providing hourly data on many atmospheric, land-surface and sea-state parameters together with estimates of uncertainty. + + ERA5 data are available in the Climate Data Store on regular latitude-longitude grids at 0.25° x 0.25° resolution, with atmospheric parameters on 37 pressure levels. + + ERA5 is available from 1940 and continues to be extended forward in time, with daily updates being made available 5 days behind real time + + Initial release data, i.e., data no more than three months behind real time, are called ERA5T. + producer: Contains modified Copernicus Climate Change Service information + version_producer: 2 + citation_full: |- + Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 monthly averaged data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.f17050d7 (Accessed on 08-July-2024) + url_main: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-monthly-means?tab=overview + date_accessed: 2024-08-06 + date_published: 2019-04-18 + license: + name: Copernicus License + url: https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf + +outs: + - md5: e6348789b23c09d9413a856b017acda0 + size: 2353412191 + path: surface_temperature.gz diff --git a/snapshots/climate/2024-08-06/surface_temperature.py b/snapshots/climate/2024-08-06/surface_temperature.py new file mode 100644 index 00000000000..8235104ab21 --- /dev/null +++ b/snapshots/climate/2024-08-06/surface_temperature.py @@ -0,0 +1,69 @@ +"""Script to create a snapshot of the monthly averaged surface temperature data from 1950 to present from the Copernicus Climate Change Service. + + The script assumes that the data is available on the CDS API. + Instructions on how to access the API on a Mac are here: https://confluence.ecmwf.int/display/CKB/How+to+install+and+use+CDS+API+on+macOS + + More information on how to access the data is here: https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-monthly-means?tab=overview + + The data is downloaded as a NetCDF file. Tutorials for using the Copernicus API are here and work with the NETCDF format are here: https://ecmwf-projects.github.io/copernicus-training-c3s/cds-tutorial.html + """ + +import gzip +import shutil +import tempfile +from pathlib import Path + +# CDS API +import cdsapi +import click +import xarray as xr + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/surface_temperature.gz") + # Save data as a compressed temporary file. + with tempfile.TemporaryDirectory() as temp_dir: + c = cdsapi.Client() + output_file = Path(temp_dir) / "era5_monthly_t2m_eur.nc" + + c.retrieve( + "reanalysis-era5-single-levels-monthly-means", + { + "product_type": "monthly_averaged_reanalysis", + "variable": "2m_temperature", + "year": [str(year) for year in range(1940, 2025)], + "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"], + "time": "00:00", + "area": [90, -180, -90, 180], + "format": "netcdf", + }, + output_file, + ) + + # Convert data to float32 + with xr.open_dataset(output_file) as ds: + # Use smaller types + ds["t2m"] = ds["t2m"].astype("float32") + + ds.to_netcdf(output_file) + + # Compress the file + with open(output_file, "rb") as f_in: + with gzip.open(str(output_file) + ".gz", "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + + gzip_file = str(output_file) + ".gz" + # Upload snapshot. + snap.create_snapshot(filename=gzip_file, upload=upload) + + +if __name__ == "__main__": + main()