diff --git a/CHANGES.md b/CHANGES.md
index e516132f..4e72641d 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,8 +1,12 @@
-# unreleased
+# 6.3.0 (unreleased)
* do not use `warpedVRT` when overwriting the dataset nodata value
+* add `align_bounds_with_dataset` option in `rio_tiler.reader.part` to align input bounds with the dataset resolution
+
+
+
# 6.2.10 (2024-01-08)
* remove default Endpoint URL in AWS S3 Client for STAC Reader
@@ -88,7 +92,7 @@ This release was made while we waited on a fix for https://github.com/cogeotiff/
* add `cmocean` colormaps
-
+
* allow uppercase in `cmap.get` method
diff --git a/docs/src/advanced/statistics.md b/docs/src/advanced/statistics.md
index 8209b50c..f3acc561 100644
--- a/docs/src/advanced/statistics.md
+++ b/docs/src/advanced/statistics.md
@@ -1,5 +1,5 @@
-### Statistics
+#### Form `Readers`
`rio-tiler`'s Readers provide simple `.statistics` method to retrieve dataset global statistics
@@ -34,83 +34,19 @@ print(stats["1"].model_dump().keys())
]
```
-### Zonal Statistics
+#### ImageData
-You can easily extend the `statistics()` method to create a `.zonal_statistics` one which will accept input features to get statistics from.
+You can get statistics from `ImageData` objects which are returned by all rio-tiler reader methods (e.g. `.tile()`, `.preview()`, `.part()`, ...)
```python
-
-import attr
-from typing import Any, Union, Optional, List, Dict
-
-from rio_tiler import io
-from rio_tiler.models import BandStatistics
-
-from geojson_pydantic.features import Feature, FeatureCollection
-from geojson_pydantic.geometries import Polygon
-
-class Reader(io.Reader):
- """Custom Reader with zonal_statistics method."""
-
- def zonal_statistics(
- self,
- geojson: Union[FeatureCollection, Feature],
- categorical: bool = False,
- categories: Optional[List[float]] = None,
- percentiles: Optional[List[int]] = None,
- hist_options: Optional[Dict] = None,
- max_size: int = None,
- **kwargs: Any,
- ) -> Union[FeatureCollection, Feature]:
- """Return statistics from GeoJSON features.
-
- Args:
- geojson (Feature or FeatureCollection): a GeoJSON Feature or FeatureCollection.
- categorical (bool): treat input data as categorical data. Defaults to False.
- categories (list of numbers, optional): list of categories to return value for.
- percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`.
- hist_options (dict, optional): Options to forward to numpy.histogram function.
- max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to None.
- kwargs (optional): Options to forward to `self.preview`.
-
- Returns:
- Feature or FeatureCollection
-
- """
- kwargs = {**self.options, **kwargs}
-
- hist_options = hist_options or {}
-
- fc = geojson
- # We transform the input Feature to a FeatureCollection
- if isinstance(fc, Feature):
- fc = FeatureCollection(type="FeatureCollection", features=[geojson])
-
- for feature in fc:
- # Get data overlapping with the feature (using Reader.feature method)
- data = self.feature(
- feature.model_dump(exclude_none=True),
- max_size=max_size,
- **kwargs,
- )
- stats = data.statistics(
- categorical=categorical,
- categories=categories,
- percentiles=percentiles,
- hist_options=hist_options,
- )
-
- # Update input feature properties and add the statistics
- feature.properties = feature.properties or {}
- feature.properties.update({"statistics": stats})
-
- return fc.features[0] if isinstance(geojson, Feature) else fc
+with Reader("cog.tif") as src:
+ image = src.preview()
+ stats = image.statistics()
```
-
### Area Weighted Statistics
-When getting statistics from `feature`, you may want to calculate values from the pixels which intersect with the geometry but also take the pixel intersection percentage into account. Starting with rio-tiler `6.2.0`, we've added a `coverage` option to the `statistics` utility which enable the user to pass an array representing the coverage percentage such as:
+When getting statistics from a `feature`, you may want to calculate values from the pixels which intersect with the geometry but also take the pixel intersection percentage into account. Starting with rio-tiler `6.2.0`, we've added a `coverage` option to the `statistics` utility which enable the user to pass an array representing the coverage percentage such as:
```python
import numpy
@@ -133,34 +69,110 @@ assert stats[0]["mean"] == 1.125 # (1 * 0.5 + 2 * 0.0 + 3 * 1.0 + 4 * 0.25) / 4
assert stats[0]["count"] == 1.75 # (0.5 + 0 + 1 + 0.25) sum of the coverage array
```
-When using with a `feature`, your code might look something like:
+#### Adjusting geometry `align_bounds_with_dataset=True`
-```python
-from rio_tiler.io import Reader
-from rio_tiler.constants import WGS84_CRS
+In rio-tiler `6.3,0` a new option has been introduced to reduce artifacts and produce more precise zonal statistics. This option is available in the low-level `reader.part()` method used in rio-tiler reader's `.feature()` and `.part()` methods.
-with Reader(path) as src:
- # First get the array for the feature
- data = src_dst.feature(
+```python
+with Reader("cog.tif") as src:
+ data = src.feature(
shape,
shape_crs=WGS84_CRS,
+ align_bounds_with_dataset=True,
)
- # Get the coverage % array, using ImageData.get_coverage_array method
coverage_array = data.get_coverage_array(
- shape, shape_crs=WGS84_CRS
+ shape,
+ shape_crs=WGS84_CRS,
)
- # Get statistics (ImageData.statistics is calling `rio_tiler.utils.get_array_statistics`)
stats = data.statistics(coverage=coverage_array)
```
-!!! warnings
- The coverage weights will only have influence on specific statistics:
+When passing `align_bounds_with_dataset=True` to the `reader.part()` method (forwared from `.feature` or `.part` reader's method), rio-tiler will adjust the input geometry bounds to math the input dataset resolution/transform and avoid unnecessary resampling.
- - `mean`
- - `count`
- - `sum`
- - `std`
- - `median`
+
+
+### Zonal Statistics method
+
+You can easily extend the rio-tiler's reader to add a `.zonal_statistics()` method as:
+
+```python
+
+import attr
+from typing import Any, Union, Optional, List, Dict
+
+from rio_tiler import io
+from rio_tiler.models import BandStatistics
+
+from geojson_pydantic.features import Feature, FeatureCollection
+from geojson_pydantic.geometries import Polygon
+
+class Reader(io.Reader):
+ """Custom Reader with zonal_statistics method."""
+
+ def zonal_statistics(
+ self,
+ geojson: Union[FeatureCollection, Feature],
+ categorical: bool = False,
+ categories: Optional[List[float]] = None,
+ percentiles: Optional[List[int]] = None,
+ hist_options: Optional[Dict] = None,
+ max_size: int = None,
+ **kwargs: Any,
+ ) -> Union[FeatureCollection, Feature]:
+ """Return statistics from GeoJSON features.
+
+ Args:
+ geojson (Feature or FeatureCollection): a GeoJSON Feature or FeatureCollection.
+ categorical (bool): treat input data as categorical data. Defaults to False.
+ categories (list of numbers, optional): list of categories to return value for.
+ percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`.
+ hist_options (dict, optional): Options to forward to numpy.histogram function.
+ max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to None.
+ kwargs (optional): Options to forward to `self.preview`.
+
+ Returns:
+ Feature or FeatureCollection
+
+ """
+ kwargs = {**self.options, **kwargs}
+
+ hist_options = hist_options or {}
+
+ fc = geojson
+ # We transform the input Feature to a FeatureCollection
+ if isinstance(fc, Feature):
+ fc = FeatureCollection(type="FeatureCollection", features=[geojson])
+
+ for feature in fc:
+ geom = feature.model_dump(exclude_none=True)
+
+ # Get data overlapping with the feature (using Reader.feature method)
+ data = self.feature(
+ geom,
+ shape_crs=WGS84_CRS,
+ align_bounds_with_dataset=True,
+ max_size=max_size,
+ **kwargs,
+ )
+ coverage_array = data.get_coverage_array(
+ geom,
+ shape_crs=WGS84_CRS,
+ )
+
+ stats = data.statistics(
+ categorical=categorical,
+ categories=categories,
+ percentiles=percentiles,
+ hist_options=hist_options,
+ coverage=coverage_array,
+ )
+
+ # Update input feature properties and add the statistics
+ feature.properties = feature.properties or {}
+ feature.properties.update({"statistics": stats})
+
+ return fc.features[0] if isinstance(geojson, Feature) else fc
+```
diff --git a/rio_tiler/reader.py b/rio_tiler/reader.py
index f3244bc2..8fe18dda 100644
--- a/rio_tiler/reader.py
+++ b/rio_tiler/reader.py
@@ -12,6 +12,7 @@
from rasterio.crs import CRS
from rasterio.enums import ColorInterp, MaskFlags, Resampling
from rasterio.io import DatasetReader, DatasetWriter
+from rasterio.transform import array_bounds
from rasterio.vrt import WarpedVRT
from rasterio.warp import transform as transform_coords
from rasterio.warp import transform_bounds
@@ -289,6 +290,7 @@ def part(
force_binary_mask: bool = True,
nodata: Optional[NoData] = None,
vrt_options: Optional[Dict] = None,
+ align_bounds_with_dataset: bool = False,
resampling_method: RIOResampling = "nearest",
reproject_method: WarpResampling = "nearest",
unscale: bool = False,
@@ -312,6 +314,7 @@ def part(
buffer (float, optional): Buffer to apply to each bbox edge. Defaults to `0.`.
nodata (int or float, optional): Overwrite dataset internal nodata value.
vrt_options (dict, optional): Options to be passed to the rasterio.warp.WarpedVRT class.
+ align_bounds_with_dataset (bool): Align input bounds with dataset transform. Defaults to `False`.
resampling_method (RIOResampling, optional): RasterIO resampling algorithm. Defaults to `nearest`.
reproject_method (WarpResampling, optional): WarpKernel resampling algorithm. Defaults to `nearest`.
unscale (bool, optional): Apply 'scales' and 'offsets' on output data value. Defaults to `False`.
@@ -363,7 +366,9 @@ def part(
src_dst,
bounds,
dst_crs=dst_crs,
+ align_bounds_with_dataset=align_bounds_with_dataset,
)
+ bounds = array_bounds(vrt_height, vrt_width, vrt_transform)
if max_size and not (width and height):
height, width = _get_width_height(max_size, vrt_height, vrt_width)
@@ -379,7 +384,9 @@ def part(
src_dst,
bounds,
dst_crs=dst_crs,
+ align_bounds_with_dataset=align_bounds_with_dataset,
)
+ bounds = array_bounds(vrt_height, vrt_width, vrt_transform)
if padding > 0 and not is_aligned(src_dst, bounds, bounds_crs=dst_crs):
vrt_transform = vrt_transform * Affine.translation(-padding, -padding)
@@ -415,6 +422,18 @@ def part(
# else no re-projection needed
window = windows.from_bounds(*bounds, transform=src_dst.transform)
+ if align_bounds_with_dataset:
+ (row_start, row_stop), (col_start, col_stop) = window.toranges()
+ row_start, row_stop = int(math.floor(row_start)), int(math.ceil(row_stop))
+ col_start, col_stop = int(math.floor(col_start)), int(math.ceil(col_stop))
+ window = windows.Window(
+ col_off=col_start,
+ row_off=row_start,
+ width=max(col_stop - col_start, 0.0),
+ height=max(row_stop - row_start, 0.0),
+ )
+ bounds = windows.bounds(window, src_dst.transform)
+
if max_size and not (width and height):
height, width = _get_width_height(
max_size, round(window.height), round(window.width)
diff --git a/rio_tiler/utils.py b/rio_tiler/utils.py
index 237f2329..19ef25e9 100644
--- a/rio_tiler/utils.py
+++ b/rio_tiler/utils.py
@@ -1,5 +1,6 @@
"""rio_tiler.utils: utility functions."""
+import math
import warnings
from io import BytesIO
from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union
@@ -229,6 +230,7 @@ def get_vrt_transform(
width: Optional[int] = None,
dst_crs: CRS = WEB_MERCATOR_CRS,
window_precision: int = 6,
+ align_bounds_with_dataset: bool = False,
) -> Tuple[Affine, int, int]:
"""Calculate VRT transform.
@@ -238,6 +240,7 @@ def get_vrt_transform(
height (int, optional): Desired output height of the array for the input bounds.
width (int, optional): Desired output width of the array for the input bounds.
dst_crs (rasterio.crs.CRS, optional): Target Coordinate Reference System. Defaults to `epsg:3857`.
+ align_bounds_with_dataset (bool): Align input bounds with dataset transform. Defaults to `False`.
Returns:
tuple: VRT transform (affine.Affine), width (int) and height (int)
@@ -299,6 +302,19 @@ def get_vrt_transform(
# Get Bounds for the rounded window
bounds = src_dst.window_bounds(w)
+ elif align_bounds_with_dataset:
+ window = windows.from_bounds(*bounds, transform=dst_transform)
+ (row_start, row_stop), (col_start, col_stop) = window.toranges()
+ row_start, row_stop = int(math.floor(row_start)), int(math.ceil(row_stop))
+ col_start, col_stop = int(math.floor(col_start)), int(math.ceil(col_stop))
+ window = windows.Window(
+ col_off=col_start,
+ row_off=row_start,
+ width=max(col_stop - col_start, 0.0),
+ height=max(row_stop - row_start, 0.0),
+ )
+ bounds = windows.bounds(window, dst_transform)
+
w, s, e, n = bounds
# TODO: Explain
diff --git a/tests/test_io_rasterio.py b/tests/test_io_rasterio.py
index 98b9c3a3..153c328e 100644
--- a/tests/test_io_rasterio.py
+++ b/tests/test_io_rasterio.py
@@ -1013,3 +1013,101 @@ def test_metadata_img():
img = src.preview()
assert img.dataset_statistics
assert img.metadata
+
+
+def test_feature_statistics():
+ """Test feature statistics method implemented in titiler."""
+ # square
+ square = {
+ "type": "Feature",
+ "properties": {},
+ "geometry": {
+ "coordinates": [
+ [
+ [-56.85853679288809, 73.6870721652219],
+ [-56.85853679288809, 73.18595963998644],
+ [-54.97274279983506, 73.18595963998644],
+ [-54.97274279983506, 73.6870721652219],
+ [-56.85853679288809, 73.6870721652219],
+ ]
+ ],
+ "type": "Polygon",
+ },
+ }
+
+ square_crs = {
+ "type": "Polygon",
+ "coordinates": [
+ [
+ [442337.0, 8175239.0],
+ [517915.0, 8175239.0],
+ [517915.0, 8134628.0],
+ [442337.0, 8134628.0],
+ [442337.0, 8175239.0],
+ ]
+ ],
+ }
+
+ # Case 1 - image should be aligned with the bounds
+ # because we reproject to the shape crs
+ with Reader(COGEO) as src:
+ image = src.feature(
+ square,
+ dst_crs=WGS84_CRS,
+ shape_crs=WGS84_CRS,
+ )
+ coverage_array = image.get_coverage_array(
+ square,
+ shape_crs=WGS84_CRS,
+ )
+ stats = image.statistics(coverage=coverage_array)
+ assert numpy.unique(coverage_array).tolist() == [1.0]
+
+ # Case 2 - image not aligned with bounds because we align the
+ # bounds to the reprojected dataset
+ with Reader(COGEO) as src:
+ image = src.feature(
+ square,
+ dst_crs=WGS84_CRS,
+ shape_crs=WGS84_CRS,
+ align_bounds_with_dataset=True,
+ )
+ coverage_array = image.get_coverage_array(
+ square,
+ shape_crs=WGS84_CRS,
+ )
+ stats_align = image.statistics(coverage=coverage_array)
+ assert not numpy.unique(coverage_array).tolist() == [1.0]
+
+ assert stats["b1"].mean != stats_align["b1"].mean
+
+ # Case 3 - square geometry in dataset CRS
+ with Reader(COGEO) as src:
+ image = src.feature(
+ square_crs,
+ dst_crs=src.crs,
+ shape_crs=src.crs,
+ )
+ coverage_array = image.get_coverage_array(
+ square_crs,
+ shape_crs=src.crs,
+ )
+ stats = image.statistics(coverage=coverage_array)
+ assert numpy.unique(coverage_array).tolist() == [1.0]
+
+ # Case 4 - square geometry in dataset CRS but aligned with dataset
+ with Reader(COGEO) as src:
+ image = src.feature(
+ square_crs,
+ dst_crs=src.crs,
+ shape_crs=src.crs,
+ align_bounds_with_dataset=True,
+ )
+ coverage_array = image.get_coverage_array(
+ square_crs,
+ shape_crs=src.crs,
+ )
+ stats_align = image.statistics(coverage=coverage_array)
+ assert not numpy.unique(coverage_array).tolist() == [1.0]
+
+ assert stats["b1"].mean != stats_align["b1"].mean
diff --git a/tests/test_reader.py b/tests/test_reader.py
index 7b99614e..ca7204f2 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -9,6 +9,7 @@
from rasterio.warp import transform_bounds
from rio_tiler import constants, reader
+from rio_tiler.constants import WGS84_CRS
from rio_tiler.errors import PointOutsideBounds, TileOutsideBounds
S3_KEY = "hro_sources/colorado/201404_13SED190110_201404_0x1500m_CL_1.tif"
@@ -669,3 +670,169 @@ def test_part_no_VRT():
assert img_small.height == 1
assert img_small.width == 1
assert_array_almost_equal(img_small.bounds, bounds_small_dst_crs)
+
+
+@pytest.mark.parametrize(
+ "bounds,crs",
+ [
+ (
+ (
+ -56.624124590533825,
+ 73.50183615350426,
+ -56.530950796449005,
+ 73.52687881825946,
+ ),
+ "epsg:32621",
+ ), # Case 1 - square bounds within dataset
+ (
+ (
+ -62.841631140841685,
+ 73.15163488990189,
+ -60.36648908847309,
+ 73.97773652099218,
+ ),
+ "epsg:32621",
+ ), # Case 2 - boundless (left)
+ (
+ (
+ -52.927554190740736,
+ 73.3960640725901,
+ -51.96837664926392,
+ 73.77350422465656,
+ ),
+ "epsg:32621",
+ ), # Case 3 - boundless (right)
+ (
+ (
+ -57.15027188947926,
+ 74.56177365126999,
+ -56.37556339673152,
+ 74.75029925196495,
+ ),
+ "epsg:32621",
+ ), # Case 4 - boundless (top)
+ (
+ (
+ -55.86202533996874,
+ 71.8988448629112,
+ -54.6335972683694,
+ 72.28789003457715,
+ ),
+ "epsg:32621",
+ ), # Case 5 - boundless (bottom)
+ (
+ (
+ -62.968685159182414,
+ 71.95907543637196,
+ -51.60091205568341,
+ 74.78461407516858,
+ ),
+ "epsg:32621",
+ ), # Case 6 - boundless whole raster
+ (
+ (
+ -66.79529480522785,
+ 74.22513769476188,
+ -65.89488418613195,
+ 74.48258818252089,
+ ),
+ "epsg:32621",
+ ), # Case 7 - outside bounds
+ # With Reprojection
+ (
+ (
+ -56.624124590533825,
+ 73.50183615350426,
+ -56.530950796449005,
+ 73.52687881825946,
+ ),
+ "epsg:4326",
+ ), # Case 1 - square bounds within dataset
+ (
+ (
+ -62.841631140841685,
+ 73.15163488990189,
+ -60.36648908847309,
+ 73.97773652099218,
+ ),
+ "epsg:4326",
+ ), # Case 2 - boundless (left)
+ (
+ (
+ -52.927554190740736,
+ 73.3960640725901,
+ -51.96837664926392,
+ 73.77350422465656,
+ ),
+ "epsg:4326",
+ ), # Case 3 - boundless (right)
+ (
+ (
+ -57.15027188947926,
+ 74.56177365126999,
+ -56.37556339673152,
+ 74.75029925196495,
+ ),
+ "epsg:4326",
+ ), # Case 4 - boundless (top)
+ (
+ (
+ -55.86202533996874,
+ 71.8988448629112,
+ -54.6335972683694,
+ 72.28789003457715,
+ ),
+ "epsg:4326",
+ ), # Case 5 - boundless (bottom)
+ (
+ (
+ -62.968685159182414,
+ 71.95907543637196,
+ -51.60091205568341,
+ 74.78461407516858,
+ ),
+ "epsg:4326",
+ ), # Case 6 - boundless whole raster
+ (
+ (
+ -66.79529480522785,
+ 74.22513769476188,
+ -65.89488418613195,
+ 74.48258818252089,
+ ),
+ "epsg:4326",
+ ), # Case 7 - outside bounds
+ ],
+)
+def test_part_align_transform(bounds, crs):
+ """test `align_bounds_with_dataset` option."""
+ with rasterio.open(COG) as src_dst:
+ img = reader.part(
+ src_dst,
+ bounds,
+ dst_crs=crs,
+ bounds_crs="epsg:4326",
+ align_bounds_with_dataset=True,
+ )
+ img_default = reader.part(
+ src_dst,
+ bounds,
+ dst_crs=crs,
+ bounds_crs="epsg:4326",
+ align_bounds_with_dataset=False,
+ )
+ assert not img.array.shape == img_default.array.shape
+ assert not img.bounds == img_default.bounds
+
+ # output image aligned with bounds should have the origin
+ # with the bounds UL
+ if crs != WGS84_CRS:
+ bounds = transform_bounds(WGS84_CRS, crs, *bounds, densify_pts=21)
+
+ assert round(img_default.transform.c, 5) == round(bounds[0], 5)
+ assert round(img_default.transform.f, 5) == round(bounds[3], 5)
+
+ # output image bounds aligned to the dataset transform should have the origin
+ # with the bounds greater than UL
+ assert img.transform.c < bounds[0]
+ assert img.transform.f > bounds[3]