From 22c80c60568e89b17cc363c6da77db08c7cb3b27 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Wed, 27 Sep 2023 19:00:17 +0200 Subject: [PATCH] update docs --- CHANGES.md | 32 ++++++ docs/mkdocs.yml | 2 +- docs/src/advanced/statistics.md | 166 +++++++++++++++++++++++++++++++ docs/src/advanced/zonal_stats.md | 82 --------------- 4 files changed, 199 insertions(+), 83 deletions(-) create mode 100644 docs/src/advanced/statistics.md delete mode 100644 docs/src/advanced/zonal_stats.md diff --git a/CHANGES.md b/CHANGES.md index 689c74b5..c0dfd730 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,36 @@ +# 6.2.0 (TBD) + +* allow area-weighted statistics by adding `coverage` option in `rio_tiler.utils.get_array_statistics` + + ```python + # Data Array + # 1, 2 + # 3, 4 + data = numpy.ma.array((1, 2, 3, 4)).reshape((1, 2, 2)) + + # Coverage Array + # 0.5, 0 + # 1, 0.25 + coverage = numpy.array((0.5, 0, 1, 0.25)).reshape((2, 2)) + + stats = utils.get_array_statistics(data, coverage=coverage) + assert len(stats) == 1 + assert stats[0]["min"] == 1 + assert stats[0]["max"] == 4 + assert stats[0]["mean"] == 1.125 # (1 * 0.5 + 2 * 0.0 + 3 * 1.0 + 4 * 0.25) / 4 + assert stats[0]["count"] == 1.75 # (0.5 + 0 + 1 + 0.25) sum of the coverage array + + stats = utils.get_array_statistics(data) + assert len(stats) == 1 + assert stats[0]["min"] == 1 + assert stats[0]["max"] == 4 + assert stats[0]["mean"] == 2.5 + assert stats[0]["count"] == 4 + ``` + +* add `rio_tiler.utils.get_coverage_array` method to create a `coverage %` array + # 6.1.0 (2023-09-15) * add `width`, `height` and `count` properties in `MosaicMethodBase` diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 42eddccf..ae1ba924 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -34,7 +34,7 @@ nav: - Advanced Topics: - Base classes and custom readers: 'advanced/custom_readers.md' - Read Polygon-shaped regions: 'advanced/feature.md' - - Zonal statistics: 'advanced/zonal_stats.md' + - Statistics: 'advanced/statistics.md' - Create a Dynamic Tiler: 'advanced/dynamic_tiler.md' - TileMatrixSet: 'advanced/tms.md' - Examples: diff --git a/docs/src/advanced/statistics.md b/docs/src/advanced/statistics.md new file mode 100644 index 00000000..8209b50c --- /dev/null +++ b/docs/src/advanced/statistics.md @@ -0,0 +1,166 @@ + +### Statistics + +`rio-tiler`'s Readers provide simple `.statistics` method to retrieve dataset global statistics + +```python +with Reader("my.tif") as src: + stats = src.statistics() + +# Statistics result is in form of Dict[str, rio_tiler.models.BandStatistics] +print(stats.keys()) +>>> ["b1", "b2", "b3"] + +# rio_tiler.models.BandStatistics is a pydantic model +print(stats["1"].model_dump().keys()) +[ + "min", + "max", + "mean", + "count", + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "histogram", + "valid_percent", + "masked_pixels", + "valid_pixels", + # Percentile entries depend on user inputs + "percentile_2", + "percentile_98", +] +``` + +### Zonal Statistics + +You can easily extend the `statistics()` method to create a `.zonal_statistics` one which will accept input features to get statistics from. + +```python + +import attr +from typing import Any, Union, Optional, List, Dict + +from rio_tiler import io +from rio_tiler.models import BandStatistics + +from geojson_pydantic.features import Feature, FeatureCollection +from geojson_pydantic.geometries import Polygon + +class Reader(io.Reader): + """Custom Reader with zonal_statistics method.""" + + def zonal_statistics( + self, + geojson: Union[FeatureCollection, Feature], + categorical: bool = False, + categories: Optional[List[float]] = None, + percentiles: Optional[List[int]] = None, + hist_options: Optional[Dict] = None, + max_size: int = None, + **kwargs: Any, + ) -> Union[FeatureCollection, Feature]: + """Return statistics from GeoJSON features. + + Args: + geojson (Feature or FeatureCollection): a GeoJSON Feature or FeatureCollection. + categorical (bool): treat input data as categorical data. Defaults to False. + categories (list of numbers, optional): list of categories to return value for. + percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`. + hist_options (dict, optional): Options to forward to numpy.histogram function. + max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to None. + kwargs (optional): Options to forward to `self.preview`. + + Returns: + Feature or FeatureCollection + + """ + kwargs = {**self.options, **kwargs} + + hist_options = hist_options or {} + + fc = geojson + # We transform the input Feature to a FeatureCollection + if isinstance(fc, Feature): + fc = FeatureCollection(type="FeatureCollection", features=[geojson]) + + for feature in fc: + # Get data overlapping with the feature (using Reader.feature method) + data = self.feature( + feature.model_dump(exclude_none=True), + max_size=max_size, + **kwargs, + ) + stats = data.statistics( + categorical=categorical, + categories=categories, + percentiles=percentiles, + hist_options=hist_options, + ) + + # Update input feature properties and add the statistics + feature.properties = feature.properties or {} + feature.properties.update({"statistics": stats}) + + return fc.features[0] if isinstance(geojson, Feature) else fc +``` + + +### Area Weighted Statistics + +When getting statistics from `feature`, you may want to calculate values from the pixels which intersect with the geometry but also take the pixel intersection percentage into account. Starting with rio-tiler `6.2.0`, we've added a `coverage` option to the `statistics` utility which enable the user to pass an array representing the coverage percentage such as: + +```python +import numpy +from rio_tiler.utils import get_array_statistics +# Data Array +# 1, 2 +# 3, 4 +data = numpy.ma.array((1, 2, 3, 4)).reshape((1, 2, 2)) + +# Coverage Array +# 0.5, 0 +# 1, 0.25 +coverage = numpy.array((0.5, 0, 1, 0.25)).reshape((2, 2)) + +stats = get_array_statistics(data, coverage=coverage) +assert len(stats) == 1 +assert stats[0]["min"] == 1 +assert stats[0]["max"] == 4 +assert stats[0]["mean"] == 1.125 # (1 * 0.5 + 2 * 0.0 + 3 * 1.0 + 4 * 0.25) / 4 +assert stats[0]["count"] == 1.75 # (0.5 + 0 + 1 + 0.25) sum of the coverage array +``` + +When using with a `feature`, your code might look something like: + +```python +from rio_tiler.io import Reader +from rio_tiler.constants import WGS84_CRS + +with Reader(path) as src: + # First get the array for the feature + data = src_dst.feature( + shape, + shape_crs=WGS84_CRS, + ) + + # Get the coverage % array, using ImageData.get_coverage_array method + coverage_array = data.get_coverage_array( + shape, shape_crs=WGS84_CRS + ) + + # Get statistics (ImageData.statistics is calling `rio_tiler.utils.get_array_statistics`) + stats = data.statistics(coverage=coverage_array) +``` + +!!! warnings + The coverage weights will only have influence on specific statistics: + + - `mean` + - `count` + - `sum` + - `std` + - `median` + diff --git a/docs/src/advanced/zonal_stats.md b/docs/src/advanced/zonal_stats.md deleted file mode 100644 index 6824fdad..00000000 --- a/docs/src/advanced/zonal_stats.md +++ /dev/null @@ -1,82 +0,0 @@ -`rio-tiler`'s Readers provide simple `.statistics` method to retrieve dataset statistics (min, max, histogram...). We can easily extend this to create a `.zonal_statistics` method which will accept input features to get statistics from. - -```python - -import attr -from typing import Any, Union, Optional, List, Dict - -from rio_tiler import io -from rio_tiler.utils import get_array_statistics -from rio_tiler.models import BandStatistics - -from geojson_pydantic.features import Feature, FeatureCollection -from geojson_pydantic.geometries import Polygon - -class Reader(io.Reader): - """Custom Reader with zonal_statistics method.""" - - def zonal_statistics( - self, - geojson: Union[FeatureCollection, Feature], - categorical: bool = False, - categories: Optional[List[float]] = None, - percentiles: List[int] = [2, 98], - hist_options: Optional[Dict] = None, - max_size: int = None, - **kwargs: Any, - ) -> FeatureCollection: - """Return statistics from GeoJSON features. - - Args: - geojson (Feature or FeatureCollection): a GeoJSON Feature or FeatureCollection. - categorical (bool): treat input data as categorical data. Defaults to False. - categories (list of numbers, optional): list of categories to return value for. - percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`. - hist_options (dict, optional): Options to forward to numpy.histogram function. - max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to None. - kwargs (optional): Options to forward to `self.preview`. - - Returns: - FeatureCollection - - """ - kwargs = {**self._kwargs, **kwargs} - - hist_options = hist_options or {} - - # We transform the input Feature to a FeatureCollection - if not isinstance(geojson, FeatureCollection): - geojson = FeatureCollection(features=[geojson]) - - for feature in geojson: - # Get data overlapping with the feature (using Reader.feature method) - data = self.feature( - feature.model_dump(exclude_none=True), - max_size=max_size, - **kwargs, - ) - - # Get band statistics for the data - stats = get_array_statistics( - data.as_masked(), - categorical=categorical, - categories=categories, - percentiles=percentiles, - **hist_options, - ) - - # Update input feature properties and add the statistics - feature.properties = feature.properties or {} - feature.properties.update( - { - "statistics": { - f"{data.band_names[ix]}": BandStatistics( - **stats[ix] - ) - for ix in range(len(stats)) - } - } - ) - - return geojson -```