adds align_bounds_with_dataset to adjust bounds in reader.part() me…

…thod (#670)
cogeotiff · Jan 16, 2024 · 5cc79a7 · 5cc79a7
1 parent 93b502b
commit 5cc79a7
Show file tree

Hide file tree

Showing 6 changed files with 406 additions and 90 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,8 +1,12 @@
 
-# unreleased
+# 6.3.0 (unreleased)
 
 * do not use `warpedVRT` when overwriting the dataset nodata value
 
+* add `align_bounds_with_dataset` option in `rio_tiler.reader.part` to align input bounds with the dataset resolution
+
+    <img src="https://github.com/cogeotiff/rio-tiler/assets/10407788/0e340d3d-e5d9-4558-93f7-3f307c017510" style="max-width: 500px;">
+
 # 6.2.10 (2024-01-08)
 
 * remove default Endpoint URL in AWS S3 Client for STAC Reader
@@ -88,7 +92,7 @@ This release was made while we waited on a fix for https://github.com/cogeotiff/
 
 * add `cmocean` colormaps
 
-    <img src="https://raw.githubusercontent.com/cogeotiff/rio-tiler/main/docs/src/img/colormaps_for_oceanography.png" style="max-width: 500px;"></a>
+    <img src="https://raw.githubusercontent.com/cogeotiff/rio-tiler/main/docs/src/img/colormaps_for_oceanography.png" style="max-width: 500px;">
 
 * allow uppercase in `cmap.get` method
 

diff --git a/docs/src/advanced/statistics.md b/docs/src/advanced/statistics.md
@@ -1,5 +1,5 @@
 
-### Statistics
+#### Form `Readers`
 
 `rio-tiler`'s Readers provide simple `.statistics` method to retrieve dataset global statistics
 
@@ -34,83 +34,19 @@ print(stats["1"].model_dump().keys())
 ]
 ```
 
-### Zonal Statistics
+#### ImageData
 
-You can easily extend the `statistics()` method to create a `.zonal_statistics` one which will accept input features to get statistics from.
+You can get statistics from `ImageData` objects which are returned by all rio-tiler reader methods (e.g. `.tile()`, `.preview()`, `.part()`, ...)
 
 ```python
-
-import attr
-from typing import Any, Union, Optional, List, Dict
-
-from rio_tiler import io
-from rio_tiler.models import BandStatistics
-
-from geojson_pydantic.features import Feature, FeatureCollection
-from geojson_pydantic.geometries import Polygon
-
-class Reader(io.Reader):
-    """Custom Reader with zonal_statistics method."""
-
-    def zonal_statistics(
-            self,
-            geojson: Union[FeatureCollection, Feature],
-            categorical: bool = False,
-            categories: Optional[List[float]] = None,
-            percentiles: Optional[List[int]] = None,
-            hist_options: Optional[Dict] = None,
-            max_size: int = None,
-            **kwargs: Any,
-        ) -> Union[FeatureCollection, Feature]:
-            """Return statistics from GeoJSON features.
-
-            Args:
-                geojson (Feature or FeatureCollection): a GeoJSON Feature or FeatureCollection.
-                categorical (bool): treat input data as categorical data. Defaults to False.
-                categories (list of numbers, optional): list of categories to return value for.
-                percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`.
-                hist_options (dict, optional): Options to forward to numpy.histogram function.
-                max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to None.
-                kwargs (optional): Options to forward to `self.preview`.
-
-            Returns:
-                Feature or FeatureCollection
-
-            """
-            kwargs = {**self.options, **kwargs}
-
-            hist_options = hist_options or {}
-
-            fc = geojson
-            # We transform the input Feature to a FeatureCollection
-            if isinstance(fc, Feature):
-                fc = FeatureCollection(type="FeatureCollection", features=[geojson])
-
-            for feature in fc:
-                # Get data overlapping with the feature (using Reader.feature method)
-                data = self.feature(
-                    feature.model_dump(exclude_none=True),
-                    max_size=max_size,
-                    **kwargs,
-                )
-                stats = data.statistics(
-                    categorical=categorical,
-                    categories=categories,
-                    percentiles=percentiles,
-                    hist_options=hist_options,
-                )
-
-                # Update input feature properties and add the statistics
-                feature.properties = feature.properties or {}
-                feature.properties.update({"statistics": stats})
-
-            return fc.features[0] if isinstance(geojson, Feature) else fc
+with Reader("cog.tif") as src:
+    image = src.preview()
+    stats = image.statistics()
 ```
 
-
 ### Area Weighted Statistics
 
-When getting statistics from `feature`, you may want to calculate values from the pixels which intersect with the geometry but also take the pixel intersection percentage into account. Starting with rio-tiler `6.2.0`, we've added a `coverage` option to the `statistics` utility which enable the user to pass an array representing the coverage percentage such as:
+When getting statistics from a `feature`, you may want to calculate values from the pixels which intersect with the geometry but also take the pixel intersection percentage into account. Starting with rio-tiler `6.2.0`, we've added a `coverage` option to the `statistics` utility which enable the user to pass an array representing the coverage percentage such as:
 
 ```python
 import numpy
@@ -133,34 +69,110 @@ assert stats[0]["mean"] == 1.125  # (1 * 0.5 + 2 * 0.0 + 3 * 1.0 + 4 * 0.25) / 4
 assert stats[0]["count"] == 1.75  # (0.5 + 0 + 1 + 0.25) sum of the coverage array
 ```
 
-When using with a `feature`, your code might look something like:
+#### Adjusting geometry `align_bounds_with_dataset=True`
 
-```python
-from rio_tiler.io import Reader
-from rio_tiler.constants import WGS84_CRS
+In rio-tiler `6.3,0` a new option has been introduced to reduce artifacts and produce more precise zonal statistics. This option is available in the low-level `reader.part()` method used in rio-tiler reader's `.feature()` and `.part()` methods.
 
-with Reader(path) as src:
-    # First get the array for the feature
-    data = src_dst.feature(
+```python
+with Reader("cog.tif") as src:
+    data = src.feature(
         shape,
         shape_crs=WGS84_CRS,
+        align_bounds_with_dataset=True,
     )
 
-    # Get the coverage % array, using ImageData.get_coverage_array method
     coverage_array = data.get_coverage_array(
-        shape, shape_crs=WGS84_CRS
+        shape,
+        shape_crs=WGS84_CRS,
     )
 
-    # Get statistics (ImageData.statistics is calling `rio_tiler.utils.get_array_statistics`)
     stats = data.statistics(coverage=coverage_array)
 ```
 
-!!! warnings
-    The coverage weights will only have influence on specific statistics:
+When passing `align_bounds_with_dataset=True` to the  `reader.part()` method (forwared from `.feature` or `.part` reader's method), rio-tiler will adjust the input geometry bounds to math the input dataset resolution/transform and avoid unnecessary resampling.
 
-    - `mean`
-    - `count`
-    - `sum`
-    - `std`
-    - `median`
+<img src="https://github.com/cogeotiff/rio-tiler/assets/10407788/0e340d3d-e5d9-4558-93f7-3f307c017510" style="max-width: 800px;">
 
+
+### Zonal Statistics method
+
+You can easily extend the rio-tiler's reader to add a `.zonal_statistics()` method as:
+
+```python
+
+import attr
+from typing import Any, Union, Optional, List, Dict
+
+from rio_tiler import io
+from rio_tiler.models import BandStatistics
+
+from geojson_pydantic.features import Feature, FeatureCollection
+from geojson_pydantic.geometries import Polygon
+
+class Reader(io.Reader):
+    """Custom Reader with zonal_statistics method."""
+
+    def zonal_statistics(
+        self,
+        geojson: Union[FeatureCollection, Feature],
+        categorical: bool = False,
+        categories: Optional[List[float]] = None,
+        percentiles: Optional[List[int]] = None,
+        hist_options: Optional[Dict] = None,
+        max_size: int = None,
+        **kwargs: Any,
+    ) -> Union[FeatureCollection, Feature]:
+        """Return statistics from GeoJSON features.
+
+        Args:
+            geojson (Feature or FeatureCollection): a GeoJSON Feature or FeatureCollection.
+            categorical (bool): treat input data as categorical data. Defaults to False.
+            categories (list of numbers, optional): list of categories to return value for.
+            percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`.
+            hist_options (dict, optional): Options to forward to numpy.histogram function.
+            max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to None.
+            kwargs (optional): Options to forward to `self.preview`.
+
+        Returns:
+            Feature or FeatureCollection
+
+        """
+        kwargs = {**self.options, **kwargs}
+
+        hist_options = hist_options or {}
+
+        fc = geojson
+        # We transform the input Feature to a FeatureCollection
+        if isinstance(fc, Feature):
+            fc = FeatureCollection(type="FeatureCollection", features=[geojson])
+
+        for feature in fc:
+            geom = feature.model_dump(exclude_none=True)
+
+            # Get data overlapping with the feature (using Reader.feature method)
+            data = self.feature(
+                geom,
+                shape_crs=WGS84_CRS,
+                align_bounds_with_dataset=True,
+                max_size=max_size,
+                **kwargs,
+            )
+            coverage_array = data.get_coverage_array(
+                geom,
+                shape_crs=WGS84_CRS,
+            )
+
+            stats = data.statistics(
+                categorical=categorical,
+                categories=categories,
+                percentiles=percentiles,
+                hist_options=hist_options,
+                coverage=coverage_array,
+            )
+
+            # Update input feature properties and add the statistics
+            feature.properties = feature.properties or {}
+            feature.properties.update({"statistics": stats})
+
+        return fc.features[0] if isinstance(geojson, Feature) else fc
+```
diff --git a/rio_tiler/reader.py b/rio_tiler/reader.py
@@ -12,6 +12,7 @@
 from rasterio.crs import CRS
 from rasterio.enums import ColorInterp, MaskFlags, Resampling
 from rasterio.io import DatasetReader, DatasetWriter
+from rasterio.transform import array_bounds
 from rasterio.vrt import WarpedVRT
 from rasterio.warp import transform as transform_coords
 from rasterio.warp import transform_bounds
@@ -289,6 +290,7 @@ def part(
     force_binary_mask: bool = True,
     nodata: Optional[NoData] = None,
     vrt_options: Optional[Dict] = None,
+    align_bounds_with_dataset: bool = False,
     resampling_method: RIOResampling = "nearest",
     reproject_method: WarpResampling = "nearest",
     unscale: bool = False,
@@ -312,6 +314,7 @@ def part(
         buffer (float, optional): Buffer to apply to each bbox edge. Defaults to `0.`.
         nodata (int or float, optional): Overwrite dataset internal nodata value.
         vrt_options (dict, optional): Options to be passed to the rasterio.warp.WarpedVRT class.
+        align_bounds_with_dataset (bool): Align input bounds with dataset transform. Defaults to `False`.
         resampling_method (RIOResampling, optional): RasterIO resampling algorithm. Defaults to `nearest`.
         reproject_method (WarpResampling, optional): WarpKernel resampling algorithm. Defaults to `nearest`.
         unscale (bool, optional): Apply 'scales' and 'offsets' on output data value. Defaults to `False`.
@@ -363,7 +366,9 @@ def part(
             src_dst,
             bounds,
             dst_crs=dst_crs,
+            align_bounds_with_dataset=align_bounds_with_dataset,
         )
+        bounds = array_bounds(vrt_height, vrt_width, vrt_transform)
 
         if max_size and not (width and height):
             height, width = _get_width_height(max_size, vrt_height, vrt_width)
@@ -379,7 +384,9 @@ def part(
                 src_dst,
                 bounds,
                 dst_crs=dst_crs,
+                align_bounds_with_dataset=align_bounds_with_dataset,
             )
+            bounds = array_bounds(vrt_height, vrt_width, vrt_transform)
 
         if padding > 0 and not is_aligned(src_dst, bounds, bounds_crs=dst_crs):
             vrt_transform = vrt_transform * Affine.translation(-padding, -padding)
@@ -415,6 +422,18 @@ def part(
 
     # else no re-projection needed
     window = windows.from_bounds(*bounds, transform=src_dst.transform)
+    if align_bounds_with_dataset:
+        (row_start, row_stop), (col_start, col_stop) = window.toranges()
+        row_start, row_stop = int(math.floor(row_start)), int(math.ceil(row_stop))
+        col_start, col_stop = int(math.floor(col_start)), int(math.ceil(col_stop))
+        window = windows.Window(
+            col_off=col_start,
+            row_off=row_start,
+            width=max(col_stop - col_start, 0.0),
+            height=max(row_stop - row_start, 0.0),
+        )
+        bounds = windows.bounds(window, src_dst.transform)
+
     if max_size and not (width and height):
         height, width = _get_width_height(
             max_size, round(window.height), round(window.width)

diff --git a/rio_tiler/utils.py b/rio_tiler/utils.py
@@ -1,5 +1,6 @@
 """rio_tiler.utils: utility functions."""
 
+import math
 import warnings
 from io import BytesIO
 from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union
@@ -229,6 +230,7 @@ def get_vrt_transform(
     width: Optional[int] = None,
     dst_crs: CRS = WEB_MERCATOR_CRS,
     window_precision: int = 6,
+    align_bounds_with_dataset: bool = False,
 ) -> Tuple[Affine, int, int]:
     """Calculate VRT transform.
 
@@ -238,6 +240,7 @@ def get_vrt_transform(
         height (int, optional): Desired output height of the array for the input bounds.
         width (int, optional): Desired output width of the array for the input bounds.
         dst_crs (rasterio.crs.CRS, optional): Target Coordinate Reference System. Defaults to `epsg:3857`.
+        align_bounds_with_dataset (bool): Align input bounds with dataset transform. Defaults to `False`.
 
     Returns:
         tuple: VRT transform (affine.Affine), width (int) and height (int)
@@ -299,6 +302,19 @@ def get_vrt_transform(
         # Get Bounds for the rounded window
         bounds = src_dst.window_bounds(w)
 
+    elif align_bounds_with_dataset:
+        window = windows.from_bounds(*bounds, transform=dst_transform)
+        (row_start, row_stop), (col_start, col_stop) = window.toranges()
+        row_start, row_stop = int(math.floor(row_start)), int(math.ceil(row_stop))
+        col_start, col_stop = int(math.floor(col_start)), int(math.ceil(col_stop))
+        window = windows.Window(
+            col_off=col_start,
+            row_off=row_start,
+            width=max(col_stop - col_start, 0.0),
+            height=max(row_stop - row_start, 0.0),
+        )
+        bounds = windows.bounds(window, dst_transform)
+
     w, s, e, n = bounds
 
     # TODO: Explain