From ff98360ef3bfdde2ed8ada17d5792485adb1097c Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 11 Nov 2024 13:02:47 -0500 Subject: [PATCH] trovesearch pagination tests --- .../_common_trovesearch_tests.py | 43 ++++++++++++++++++- trove/trovesearch/page_cursor.py | 21 ++++++--- trove/trovesearch/search_params.py | 8 +--- trove/trovesearch/search_response.py | 26 ++--------- 4 files changed, 61 insertions(+), 37 deletions(-) diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index d912b961c..f3eff4813 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -1,6 +1,7 @@ from typing import Iterable, Iterator -from datetime import date +from datetime import date, timedelta import itertools +import math from urllib.parse import urlencode from primitive_metadata import primitive_rdf as rdf @@ -71,6 +72,46 @@ def test_cardsearch(self): _actual_result_iris = set(_actual_result_iris) self.assertEqual(_expected_result_iris, _actual_result_iris, msg=f'?{_queryparams}') + def test_cardsearch_pagination(self): + _cards: list[trove_db.Indexcard] = [] + _expected_iris = set() + _page_size = 7 + _total_count = 55 + _start_date = date(1999, 12, 31) + for _i in range(_total_count): + _card_iri = BLARG[f'i{_i}'] + _expected_iris.add(_card_iri) + _cards.append(self._create_indexcard(_card_iri, { + _card_iri: { + RDF.type: {BLARG.Thing}, + DCTERMS.title: {rdf.literal(f'card #{_i}')}, + DCTERMS.created: {rdf.literal(_start_date + timedelta(weeks=_i, days=_i))}, + }, + })) + self._index_indexcards(_cards) + # gather all pages results: + _querystring: str = f'page[size]={_page_size}' + _result_iris: set[str] = set() + _page_count = 0 + while True: + _cardsearch_response = self.current_index.pls_handle_cardsearch( + CardsearchParams.from_querystring(_querystring), + ) + _page_iris = { + self._indexcard_focus_by_uuid[_result.card_uuid] + for _result in _cardsearch_response.search_result_page + } + self.assertFalse(_result_iris.intersection(_page_iris)) + self.assertLessEqual(len(_page_iris), _page_size) + _result_iris.update(_page_iris) + _page_count += 1 + _next_cursor = _cardsearch_response.cursor.next_cursor() + if _next_cursor is None: + break + _querystring = urlencode({'page[cursor]': _next_cursor.as_queryparam_value()}) + self.assertEqual(_page_count, math.ceil(_total_count / _page_size)) + self.assertEqual(_result_iris, _expected_iris) + def test_valuesearch(self): self._fill_test_data_for_querying() _valuesearch_cases = itertools.chain( diff --git a/trove/trovesearch/page_cursor.py b/trove/trovesearch/page_cursor.py index 61c7c03b4..0428b78d5 100644 --- a/trove/trovesearch/page_cursor.py +++ b/trove/trovesearch/page_cursor.py @@ -12,6 +12,7 @@ MANY_MORE = -1 +MAX_OFFSET = 9997 @dataclasses.dataclass @@ -22,7 +23,7 @@ class PageCursor: @classmethod def from_queryparam_value(cls, cursor_value: str) -> typing.Self: try: - (_type_key, _args) = json.loads(base64.urlsafe_b64decode(cursor_value)) + (_type_key, *_args) = json.loads(base64.urlsafe_b64decode(cursor_value)) _cls = _PageCursorTypes[_type_key].value assert issubclass(_cls, cls) return _cls(*_args) @@ -46,8 +47,7 @@ def is_basic(self) -> bool: def is_valid(self) -> bool: return self.page_size > 0 and ( - self.total_count == MANY_MORE - or self.total_count >= 0 + self.total_count == MANY_MORE or self.total_count >= 0 ) def has_many_more(self) -> bool: @@ -72,20 +72,27 @@ class OffsetCursor(PageCursor): def is_valid(self) -> bool: return ( super().is_valid() - and 0 <= self.start_offset + and 0 <= self.start_offset <= MAX_OFFSET + and ( + self.total_count == MANY_MORE + or self.start_offset < self.total_count + ) ) def is_first_page(self) -> bool: return self.start_offset == 0 def next_cursor(self): - return dataclasses.replace(self, start_offset=(self.start_offset + self.page_size)) + _next = dataclasses.replace(self, start_offset=(self.start_offset + self.page_size)) + return (_next if _next.is_valid() else None) def prev_cursor(self): - return dataclasses.replace(self, start_offset=(self.start_offset - self.page_size)) + _prev = dataclasses.replace(self, start_offset=(self.start_offset - self.page_size)) + return (_prev if _prev.is_valid() else None) def first_cursor(self): - return dataclasses.replace(self, start_offset=0) + _first = dataclasses.replace(self, start_offset=0) + return (_first if _first.is_valid() else None) @dataclasses.dataclass diff --git a/trove/trovesearch/search_params.py b/trove/trovesearch/search_params.py index 8c7234222..67469e80f 100644 --- a/trove/trovesearch/search_params.py +++ b/trove/trovesearch/search_params.py @@ -51,10 +51,6 @@ DEFAULT_PAGE_SIZE = 13 MAX_PAGE_SIZE = 101 -# limits on paging -VALUESEARCH_MAX = 234 -CARDSEARCH_MAX = 9997 - # between each step in a property path "foo.bar.baz" PROPERTYPATH_DELIMITER = '.' @@ -94,11 +90,11 @@ class BaseTroveParams: accept_mediatype: str | None @classmethod - def from_querystring(cls, querystring: str) -> BaseTroveParams: # TODO py3.11: typing.Self + def from_querystring(cls, querystring: str) -> typing.Self: return cls.from_queryparams(queryparams_from_querystring(querystring)) @classmethod - def from_queryparams(cls, queryparams: QueryparamDict) -> BaseTroveParams: + def from_queryparams(cls, queryparams: QueryparamDict) -> typing.Self: return cls(**cls.parse_queryparams(queryparams)) @classmethod diff --git a/trove/trovesearch/search_response.py b/trove/trovesearch/search_response.py index d60a6d998..19bbdfe6c 100644 --- a/trove/trovesearch/search_response.py +++ b/trove/trovesearch/search_response.py @@ -7,11 +7,7 @@ PageCursor, ReproduciblyRandomSampleCursor, ) -from trove.trovesearch.search_params import ( - VALUESEARCH_MAX, - CARDSEARCH_MAX, - CardsearchParams, -) +from trove.trovesearch.search_params import CardsearchParams from trove.vocab.namespaces import TROVE from trove.vocab.trove import trove_indexcard_namespace @@ -80,10 +76,6 @@ def __post_init__(self): class PagedResponse: cursor: PageCursor - @property - def max_offset(self) -> int: - raise NotImplementedError - @property def total_result_count(self) -> BoundedCount: return ( @@ -99,8 +91,6 @@ class CardsearchResponse(PagedResponse): related_propertypath_results: list['PropertypathUsage'] cardsearch_params: CardsearchParams - max_offset = CARDSEARCH_MAX - def __post_init__(self): _cursor = self.cursor if ( @@ -114,24 +104,14 @@ def __post_init__(self): for (_i, _id) in enumerate(_cursor.first_page_ids) } self.search_result_page.sort(key=lambda _r: _ordering_by_id[_r.card_id]) - else: - _should_start_reproducible_randomness = ( - not _cursor.has_many_more() - and any( - not _filter.is_type_filter() # look for a non-default filter - for _filter in self.cardsearch_params.cardsearch_filter_set - ) - ) - if _should_start_reproducible_randomness: - _cursor.first_page_ids = [_result.card_id for _result in self.search_result_page] + elif not _cursor.has_many_more(): + _cursor.first_page_ids = [_result.card_id for _result in self.search_result_page] @dataclasses.dataclass class ValuesearchResponse(PagedResponse): search_result_page: Iterable[ValuesearchResult] - max_offset = VALUESEARCH_MAX - ### # local helpers