From e7fed01f3cabf61f32292c040d324681cb82a65d Mon Sep 17 00:00:00 2001 From: Qiusheng Wu Date: Fri, 22 Dec 2023 16:16:39 -0500 Subject: [PATCH] Add support for xee (#1859) --- docs/notebooks/140_ee_to_xarray.ipynb | 179 +++++++++++++++++++++ docs/tutorials.md | 1 + examples/README.md | 1 + examples/notebooks/140_ee_to_xarray.ipynb | 179 +++++++++++++++++++++ geemap/common.py | 180 ++++++++++++++++++++++ mkdocs.yml | 1 + requirements_all.txt | 3 +- 7 files changed, 543 insertions(+), 1 deletion(-) create mode 100644 docs/notebooks/140_ee_to_xarray.ipynb create mode 100644 examples/notebooks/140_ee_to_xarray.ipynb diff --git a/docs/notebooks/140_ee_to_xarray.ipynb b/docs/notebooks/140_ee_to_xarray.ipynb new file mode 100644 index 0000000000..4feb005cd9 --- /dev/null +++ b/docs/notebooks/140_ee_to_xarray.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open\n", + "\n", + "**Converting Earth Engine images to an Xarray Dataset**\n", + "\n", + "This notebook demonstrates how to convert Earth Engine images to an Xarray Dataset using [xee](https://github.com/google/Xee)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -U geemap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ee\n", + "import geemap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "geemap.ee_initialize()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Opening the [ERA5-Land hourly dataset](https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_LAND_HOURLY) in Earth Engine and converting it to an Xarray Dataset. This is a huge dataset and it may take a minute or two to load. Please be patient." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = geemap.ee_to_xarray('ECMWF/ERA5_LAND/HOURLY', n_images=100)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open all bands in a specific projection and spatial resolution. Similarly, it may take a minute or two to load." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = geemap.ee_to_xarray('ECMWF/ERA5_LAND/HOURLY', crs='EPSG:4326', scale=0.25, n_images=100)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open an ImageCollection (maybe, with EE-side filtering or processing):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = ee.ImageCollection('ECMWF/ERA5_LAND/HOURLY').filterDate('1992-10-05', '1993-03-31')\n", + "ds = geemap.ee_to_xarray(dataset, crs='EPSG:4326', scale=0.25)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open an ImageCollection with a specific EE projection or geometry:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = ee.ImageCollection('ECMWF/ERA5_LAND/HOURLY').filterDate('1992-10-05', '1993-03-31')\n", + "geometry = ee.Geometry.Rectangle(113.33, -43.63, 153.56, -10.66)\n", + "ds = geemap.ee_to_xarray(\n", + " dataset,\n", + " projection=dataset.first().select(0).projection(),\n", + " geometry=geometry\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Opening a single image:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image = ee.Image(\"LANDSAT/LC08/C02/T1_TOA/LC08_044034_20140318\")\n", + "ds = geemap.ee_to_xarray(image)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open multiple ImageCollections into one xarray.Dataset, all with the same projection. This one may take a few minutes to load." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = geemap.ee_to_xarray(\n", + " dataset=['ECMWF/ERA5_LAND/HOURLY', 'NASA/GDDP-CMIP6'],\n", + " n_images=100,\n", + " crs='EPSG:4326',\n", + " scale=0.25\n", + " )\n", + "ds" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/tutorials.md b/docs/tutorials.md index e9cb7f93e5..db3b5ef581 100644 --- a/docs/tutorials.md +++ b/docs/tutorials.md @@ -151,3 +151,4 @@ More video tutorials for geemap and Earth Engine are available on my [YouTube ch 137. Creating a rectangular grid covering a region of interest for computing zonal statistics ([notebook](https://geemap.org/notebooks/137_create_grid)) 138. Clipping Earth Engine images interactively with the Draw Control ([notebook](https://geemap.org/notebooks/138_draw_control)) 139. Converting an Earth Engine to an image ([notebook](https://geemap.org/notebooks/139_layer_to_image)) +140. Converting Earth Engine images to an Xarray Dataset ([notebook](https://geemap.org/notebooks/140_ee_to_xarray)) diff --git a/examples/README.md b/examples/README.md index c71f025e45..db28d90fe3 100644 --- a/examples/README.md +++ b/examples/README.md @@ -156,6 +156,7 @@ More video tutorials for geemap and Earth Engine are available on my [YouTube ch 137. Creating a rectangular grid covering a region of interest for computing zonal statistics ([notebook](https://geemap.org/notebooks/137_create_grid)) 138. Clipping Earth Engine images interactively with the Draw Control ([notebook](https://geemap.org/notebooks/138_draw_control)) 139. Converting an Earth Engine to an image ([notebook](https://geemap.org/notebooks/139_layer_to_image)) +140. Converting Earth Engine images to an Xarray Dataset ([notebook](https://geemap.org/notebooks/140_ee_to_xarray)) ### 1. Introducing the geemap Python package for interactive mapping with Google Earth Engine diff --git a/examples/notebooks/140_ee_to_xarray.ipynb b/examples/notebooks/140_ee_to_xarray.ipynb new file mode 100644 index 0000000000..4feb005cd9 --- /dev/null +++ b/examples/notebooks/140_ee_to_xarray.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open\n", + "\n", + "**Converting Earth Engine images to an Xarray Dataset**\n", + "\n", + "This notebook demonstrates how to convert Earth Engine images to an Xarray Dataset using [xee](https://github.com/google/Xee)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -U geemap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ee\n", + "import geemap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "geemap.ee_initialize()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Opening the [ERA5-Land hourly dataset](https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_LAND_HOURLY) in Earth Engine and converting it to an Xarray Dataset. This is a huge dataset and it may take a minute or two to load. Please be patient." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = geemap.ee_to_xarray('ECMWF/ERA5_LAND/HOURLY', n_images=100)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open all bands in a specific projection and spatial resolution. Similarly, it may take a minute or two to load." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = geemap.ee_to_xarray('ECMWF/ERA5_LAND/HOURLY', crs='EPSG:4326', scale=0.25, n_images=100)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open an ImageCollection (maybe, with EE-side filtering or processing):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = ee.ImageCollection('ECMWF/ERA5_LAND/HOURLY').filterDate('1992-10-05', '1993-03-31')\n", + "ds = geemap.ee_to_xarray(dataset, crs='EPSG:4326', scale=0.25)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open an ImageCollection with a specific EE projection or geometry:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = ee.ImageCollection('ECMWF/ERA5_LAND/HOURLY').filterDate('1992-10-05', '1993-03-31')\n", + "geometry = ee.Geometry.Rectangle(113.33, -43.63, 153.56, -10.66)\n", + "ds = geemap.ee_to_xarray(\n", + " dataset,\n", + " projection=dataset.first().select(0).projection(),\n", + " geometry=geometry\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Opening a single image:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image = ee.Image(\"LANDSAT/LC08/C02/T1_TOA/LC08_044034_20140318\")\n", + "ds = geemap.ee_to_xarray(image)\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open multiple ImageCollections into one xarray.Dataset, all with the same projection. This one may take a few minutes to load." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = geemap.ee_to_xarray(\n", + " dataset=['ECMWF/ERA5_LAND/HOURLY', 'NASA/GDDP-CMIP6'],\n", + " n_images=100,\n", + " crs='EPSG:4326',\n", + " scale=0.25\n", + " )\n", + "ds" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/geemap/common.py b/geemap/common.py index 1713a1a969..baaf9188ef 100644 --- a/geemap/common.py +++ b/geemap/common.py @@ -1837,6 +1837,37 @@ def check_package(name, URL=""): ) +def install_package(package): + """Install a Python package. + + Args: + package (str | list): The package name or a GitHub URL or a list of package names or GitHub URLs. + """ + import subprocess + + if isinstance(package, str): + packages = [package] + + for package in packages: + if package.startswith("https"): + package = f"git+{package}" + + # Execute pip install command and show output in real-time + command = f"pip install {package}" + process = subprocess.Popen(command.split(), stdout=subprocess.PIPE) + + # Print output in real-time + while True: + output = process.stdout.readline() + if output == b"" and process.poll() is not None: + break + if output: + print(output.decode("utf-8").strip()) + + # Wait for process to complete + process.wait() + + def clone_repo(out_dir=".", unzip=True): """Clones the geemap GitHub repository. @@ -3367,6 +3398,154 @@ def ee_to_numpy(ee_object, region=None, scale=None, bands=None, **kwargs): raise Exception(e) +def ee_to_xarray( + dataset, + drop_variables=None, + io_chunks=None, + n_images=-1, + mask_and_scale=True, + decode_times=True, + decode_timedelta=None, + use_cftime=None, + concat_characters=True, + decode_coords=True, + crs=None, + scale=None, + projection=None, + geometry=None, + primary_dim_name=None, + primary_dim_property=None, + ee_mask_value=None, + ee_initialize=True, + **kwargs, +): + """Open an Earth Engine ImageCollection as an Xarray Dataset. This function is a wrapper for + xee. EarthEngineBackendEntrypoint.open_dataset(). + See https://github.com/google/Xee/blob/main/xee/ext.py#L886 + + Args: + dataset: An asset ID for an ImageCollection, or an + ee.ImageCollection object. + drop_variables (optional): Variables or bands to drop before opening. + io_chunks (optional): Specifies the chunking strategy for loading data + from EE. By default, this automatically calculates optional chunks based + on the `request_byte_limit`. + n_images (optional): The max number of EE images in the collection to + open. Useful when there are a large number of images in the collection + since calculating collection size can be slow. -1 indicates that all + images should be included. + mask_and_scale (optional): Lazily scale (using scale_factor and + add_offset) and mask (using _FillValue). + decode_times (optional): Decode cf times (e.g., integers since "hours + since 2000-01-01") to np.datetime64. + decode_timedelta (optional): If True, decode variables and coordinates + with time units in {"days", "hours", "minutes", "seconds", + "milliseconds", "microseconds"} into timedelta objects. If False, leave + them encoded as numbers. If None (default), assume the same value of + decode_time. + use_cftime (optional): Only relevant if encoded dates come from a standard + calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. + concat_characters (optional): Should character arrays be concatenated to + strings, for example: ["h", "e", "l", "l", "o"] -> "hello" + decode_coords (optional): bool or {"coordinates", "all"}, Controls which + variables are set as coordinate variables: - "coordinates" or True: Set + variables referred to in the ``'coordinates'`` attribute of the datasets + or individual variables as coordinate variables. - "all": Set variables + referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as + coordinate variables. + crs (optional): The coordinate reference system (a CRS code or WKT + string). This defines the frame of reference to coalesce all variables + upon opening. By default, data is opened with `EPSG:4326'. + scale (optional): The scale in the `crs` or `projection`'s units of + measure -- either meters or degrees. This defines the scale that all + data is represented in upon opening. By default, the scale is 1° when + the CRS is in degrees or 10,000 when in meters. + projection (optional): Specify an `ee.Projection` object to define the + `scale` and `crs` (or other coordinate reference system) with which to + coalesce all variables upon opening. By default, the scale and reference + system is set by the the `crs` and `scale` arguments. + geometry (optional): Specify an `ee.Geometry` to define the regional + bounds when opening the data. When not set, the bounds are defined by + the CRS's 'area_of_use` boundaries. If those aren't present, the bounds + are derived from the geometry of the first image of the collection. + primary_dim_name (optional): Override the name of the primary dimension of + the output Dataset. By default, the name is 'time'. + primary_dim_property (optional): Override the `ee.Image` property for + which to derive the values of the primary dimension. By default, this is + 'system:time_start'. + ee_mask_value (optional): Value to mask to EE nodata values. By default, + this is 'np.iinfo(np.int32).max' i.e. 2147483647. + request_byte_limit: the max allowed bytes to request at a time from Earth + Engine. By default, it is 48MBs. + ee_initialize (optional): Whether to initialize ee with the high-volume endpoint. Defaults to True. + + Returns: + An xarray.Dataset that streams in remote data from Earth Engine. + """ + try: + import xee + except ImportError: + install_package("xee") + import xee + + import xarray as xr + + kwargs["drop_variables"] = drop_variables + kwargs["io_chunks"] = io_chunks + kwargs["n_images"] = n_images + kwargs["mask_and_scale"] = mask_and_scale + kwargs["decode_times"] = decode_times + kwargs["decode_timedelta"] = decode_timedelta + kwargs["use_cftime"] = use_cftime + kwargs["concat_characters"] = concat_characters + kwargs["decode_coords"] = decode_coords + kwargs["crs"] = crs + kwargs["scale"] = scale + kwargs["projection"] = projection + kwargs["geometry"] = geometry + kwargs["primary_dim_name"] = primary_dim_name + kwargs["primary_dim_property"] = primary_dim_property + kwargs["ee_mask_value"] = ee_mask_value + kwargs["engine"] = "ee" + + if ee_initialize: + opt_url = "https://earthengine-highvolume.googleapis.com" + ee.Initialize(opt_url=opt_url) + + if isinstance(dataset, str): + if not dataset.startswith("ee://"): + dataset = "ee://" + dataset + elif isinstance(dataset, ee.Image): + dataset = ee.ImageCollection(dataset) + elif isinstance(dataset, ee.ImageCollection): + pass + elif isinstance(dataset, list): + items = [] + for item in dataset: + if isinstance(item, str) and not item.startswith("ee://"): + item = "ee://" + item + items.append(item) + dataset = items + else: + raise ValueError( + "The dataset must be an ee.Image, ee.ImageCollection, or a list of ee.Image." + ) + + if isinstance(dataset, list): + ds = xr.open_mfdataset(dataset, **kwargs) + else: + ds = xr.open_dataset(dataset, **kwargs) + + return ds + + def download_ee_video(collection, video_args, out_gif, timeout=300, proxies=None): """Downloads a video thumbnail as a GIF image from Earth Engine. @@ -6865,6 +7044,7 @@ def zonal_stats( zonal_statistics = zonal_stats + def zonal_stats_by_group( in_value_raster, in_zone_vector, diff --git a/mkdocs.yml b/mkdocs.yml index a19ce86a47..4967327571 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -283,6 +283,7 @@ nav: - notebooks/137_create_grid.ipynb - notebooks/138_draw_control.ipynb - notebooks/139_layer_to_image.ipynb + - notebooks/140_ee_to_xarray.ipynb # - miscellaneous: # - notebooks/cartoee_colab.ipynb # - notebooks/cartoee_colorbar.ipynb diff --git a/requirements_all.txt b/requirements_all.txt index a852ac03ab..8cb56449cb 100644 --- a/requirements_all.txt +++ b/requirements_all.txt @@ -24,4 +24,5 @@ rio-cogeo rioxarray sqlalchemy streamlit-folium -voila \ No newline at end of file +voila +xee \ No newline at end of file