From 9ab10cc364a483d39f9c53973dc6e86da2691438 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 1 Aug 2023 22:01:59 +0200 Subject: [PATCH] Allow to pass a dict for the summarized fields, fix the doc rendering (#1195) * Allow to pass a dict for the summarized fields, fix the doc rendering * Apply suggestions from code review Co-authored-by: Julia Signell --------- Co-authored-by: Julia Signell --- CHANGELOG.md | 8 +++++++ pystac/summaries.py | 47 ++++++++++++++++++++++++++++------------- tests/test_summaries.py | 16 +++++++++++++- 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e96f0785..2c3cfc7bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,18 @@ ## [Unreleased] +### Added + +- Allow to pass a Dict with field names and summary strategies to the `fields` parameter in the `Summarizer` constructor ([#1195](https://github.com/stac-utils/pystac/pull/1195)) + ### Changed - Pin jsonschema version to <4.18 until regresssions are fixed +### Fixed + +- Fix the documentation rendering of the `fields` parameter in the `Summarizer` constructor ([#1195](https://github.com/stac-utils/pystac/pull/1195)) + ## [v1.8.2] - 2023-07-12 ### Fixed diff --git a/pystac/summaries.py b/pystac/summaries.py index e2c391df5..a065d6531 100644 --- a/pystac/summaries.py +++ b/pystac/summaries.py @@ -130,36 +130,53 @@ class SummaryStrategy(Enum): class Summarizer: """The Summarizer computes summaries from values, following the definition of fields - to summarize provided in a json file. + to summarize. - For more information about the structure of the fields json file, see: + The fields to summarize can be provided as a JSON file or as a dictionary of + field names and SummaryStrategys. If nothing is provided, a default JSON file + will be used. + Only fields that are in the Item `properties` can be summarized. + Thus it is not possible to summarize the top-level fields such as `id` or `assets`. + + For more information about the structure of the fields JSON file, see: https://github.com/stac-utils/stac-fields + The default JSON file used is a snapshot of the following file at the time of + the pystac release: + https://cdn.jsdelivr.net/npm/@radiantearth/stac-fields/fields-normalized.json + Args: - fields (str): the path to the json file with field descriptions. - If no file is passed, a default one will be used. + fields: A string containing the path to the json file with field descriptions. + Alternatively, a dict with the field names as keys and SummaryStrategys + as values. + If nothing is passed, a default file with field descriptions will be used. """ summaryfields: Dict[str, SummaryStrategy] - def __init__(self, fields: Optional[str] = None): - jsonfields = _get_fields_json(fields) - self._set_field_definitions(jsonfields) + def __init__(self, fields: Optional[Union[str, Dict[str, SummaryStrategy]]] = None): + if isinstance(fields, dict): + self._set_field_definitions(fields) + else: + jsonfields = _get_fields_json(fields) + self._set_field_definitions(jsonfields["metadata"]) def _set_field_definitions(self, fields: Dict[str, Any]) -> None: self.summaryfields = {} - for name, desc in fields["metadata"].items(): - if isinstance(desc, dict): + for name, desc in fields.items(): + strategy: SummaryStrategy = SummaryStrategy.DEFAULT + if isinstance(desc, SummaryStrategy): + strategy = desc + elif isinstance(desc, dict): strategy_value = desc.get("summary", True) try: - strategy: SummaryStrategy = SummaryStrategy(strategy_value) + strategy = SummaryStrategy(strategy_value) except ValueError: - strategy = SummaryStrategy.DEFAULT - if strategy != SummaryStrategy.DONT_SUMMARIZE: - self.summaryfields[name] = strategy - else: - self.summaryfields[name] = SummaryStrategy.DEFAULT + pass + + if strategy != SummaryStrategy.DONT_SUMMARIZE: + self.summaryfields[name] = strategy def _update_with_item(self, summaries: Summaries, item: Item) -> None: for k, v in item.properties.items(): diff --git a/tests/test_summaries.py b/tests/test_summaries.py index 62c77b78c..208e2632d 100644 --- a/tests/test_summaries.py +++ b/tests/test_summaries.py @@ -2,7 +2,7 @@ import unittest from typing import Any -from pystac.summaries import RangeSummary, Summaries, Summarizer +from pystac.summaries import RangeSummary, Summaries, Summarizer, SummaryStrategy from tests.utils import TestCases @@ -30,6 +30,20 @@ def test_summary_custom_fields_file(self) -> None: self.assertIsNone(summaries_dict.get("eo:bands")) self.assertEqual(len(summaries_dict["proj:epsg"]), 1) + def test_summary_custom_fields_dict(self) -> None: + coll = TestCases.case_5() + spec = { + "eo:bands": SummaryStrategy.DONT_SUMMARIZE, + "proj:epsg": SummaryStrategy.ARRAY, + } + obj = Summarizer(spec) + self.assertTrue("eo:bands" not in obj.summaryfields) + self.assertEqual(obj.summaryfields["proj:epsg"], SummaryStrategy.ARRAY) + summaries = obj.summarize(coll.get_items(recursive=True)) + summaries_dict = summaries.to_dict() + self.assertIsNone(summaries_dict.get("eo:bands")) + self.assertEqual(len(summaries_dict["proj:epsg"]), 1) + def test_summary_wrong_custom_fields_file(self) -> None: coll = TestCases.case_5() with self.assertRaises(FileNotFoundError) as context: