diff --git a/demo/land_cover_dataset_demo.ipynb b/demo/land_cover_dataset_demo.ipynb index 92f152d..521266a 100644 --- a/demo/land_cover_dataset_demo.ipynb +++ b/demo/land_cover_dataset_demo.ipynb @@ -42,6 +42,13 @@ "bbox_demo = SpatialBounds(54, 56, 1, 3)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download dataset." + ] + }, { "cell_type": "code", "execution_count": 3, @@ -51,7 +58,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 50%|█████ | 1/2 [00:00<00:00, 2.97it/s]" + " 50%|█████ | 1/2 [00:00<00:00, 1.84it/s]" ] }, { @@ -60,6 +67,37 @@ "text": [ "File 'land-cover_LCCS_MAP_300m_2010.zip' already exists, skipping...\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 2/2 [00:00<00:00, 2.30it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2011.zip' already exists, skipping...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -71,6 +109,519 @@ " variable_names=[\"land_cover\"],\n", ")" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data ingestion to the unified format in `zampy`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2011.nc' already exists, skipping...\n", + "File 'land-cover_LCCS_MAP_300m_2010.nc' already exists, skipping...\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this step could take some time\n", + "land_cover_dataset.ingest(download_dir, ingest_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ds = land_cover_dataset.load(\n", + " ingest_dir=ingest_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"land_cover\"],\n", + " resolution=1.0,\n", + " regrid_method=\"most_common\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:     (time: 2, latitude: 54, longitude: 54)\n",
+       "Coordinates:\n",
+       "  * time        (time) datetime64[ns] 2010-01-01 2011-01-01\n",
+       "  * latitude    (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n",
+       "  * longitude   (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n",
+       "Data variables:\n",
+       "    land_cover  (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n",
+       "Attributes: (12/38)\n",
+       "    id:                         ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n",
+       "    title:                      Land Cover Map of ESA CCI brokered by CDS\n",
+       "    summary:                    This dataset characterizes the land cover of ...\n",
+       "    type:                       ESACCI-LC-L4-LCCS-Map-300m-P1Y\n",
+       "    project:                    Climate Change Initiative - European Space Ag...\n",
+       "    references:                 http://www.esa-landcover-cci.org/\n",
+       "    ...                         ...\n",
+       "    geospatial_lon_max:         180\n",
+       "    spatial_resolution:         300m\n",
+       "    geospatial_lat_units:       degrees_north\n",
+       "    geospatial_lat_resolution:  0.002778\n",
+       "    geospatial_lon_units:       degrees_east\n",
+       "    geospatial_lon_resolution:  0.002778
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2, latitude: 54, longitude: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-01-01 2011-01-01\n", + " * latitude (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n", + "Data variables:\n", + " land_cover (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n", + "Attributes: (12/38)\n", + " id: ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n", + " title: Land Cover Map of ESA CCI brokered by CDS\n", + " summary: This dataset characterizes the land cover of ...\n", + " type: ESACCI-LC-L4-LCCS-Map-300m-P1Y\n", + " project: Climate Change Initiative - European Space Ag...\n", + " references: http://www.esa-landcover-cci.org/\n", + " ... ...\n", + " geospatial_lon_max: 180\n", + " spatial_resolution: 300m\n", + " geospatial_lat_units: degrees_north\n", + " geospatial_lat_resolution: 0.002778\n", + " geospatial_lon_units: degrees_east\n", + " geospatial_lon_resolution: 0.002778" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds" + ] } ], "metadata": { diff --git a/src/zampy/datasets/cds_utils.py b/src/zampy/datasets/cds_utils.py index e81f583..ff03511 100644 --- a/src/zampy/datasets/cds_utils.py +++ b/src/zampy/datasets/cds_utils.py @@ -129,10 +129,10 @@ def cds_request_land_cover( r = c.retrieve( dataset, { - 'variable': "all", - 'format': "zip", - 'year': year, - 'version': version, + "variable": "all", + "format": "zip", + "year": year, + "version": version, }, ) fpath = path / f"{fname}_LCCS_MAP_300m_{year}.zip" diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index cf7b571..2b964a8 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -114,6 +114,42 @@ def ingest( return True + def load( + self, + ingest_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + resolution: float, + regrid_method: str, # Unused in land-cover dataset + variable_names: list[str], + ) -> xr.Dataset: + files: list[Path] = [] + for var in variable_names: + if var not in self.variable_names: + msg = ( + "One or more variables are not in this dataset.\n" + f"Please check input. Dataset: '{self.name}'\n" + f"Variables: '{variable_names}'" + ) + raise ValueError(msg) + files = list((ingest_dir / self.name).glob(f"{self.name}_*.nc")) + + ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) + ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) + new_grid = xarray_regrid.Grid( + north=spatial_bounds.north, + east=spatial_bounds.east, + south=spatial_bounds.south, + west=spatial_bounds.west, + resolution_lat=resolution, + resolution_lon=resolution, + ) + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + + return ds_regrid + def unzip_raw_to_netcdf( ingest_folder: Path, @@ -165,7 +201,7 @@ def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset: east=180, south=-90, west=-180, - resolution_lat=0.25, + resolution_lat=0.25, # same as resolution of ERA5, must be sufficient resolution_lon=0.25, )