Skip to content

Commit

Permalink
trovesearch pagination tests
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Nov 11, 2024
1 parent 5378787 commit ff98360
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 37 deletions.
43 changes: 42 additions & 1 deletion tests/share/search/index_strategy/_common_trovesearch_tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Iterable, Iterator
from datetime import date
from datetime import date, timedelta
import itertools
import math
from urllib.parse import urlencode

from primitive_metadata import primitive_rdf as rdf
Expand Down Expand Up @@ -71,6 +72,46 @@ def test_cardsearch(self):
_actual_result_iris = set(_actual_result_iris)
self.assertEqual(_expected_result_iris, _actual_result_iris, msg=f'?{_queryparams}')

def test_cardsearch_pagination(self):
_cards: list[trove_db.Indexcard] = []
_expected_iris = set()
_page_size = 7
_total_count = 55
_start_date = date(1999, 12, 31)
for _i in range(_total_count):
_card_iri = BLARG[f'i{_i}']
_expected_iris.add(_card_iri)
_cards.append(self._create_indexcard(_card_iri, {
_card_iri: {
RDF.type: {BLARG.Thing},
DCTERMS.title: {rdf.literal(f'card #{_i}')},
DCTERMS.created: {rdf.literal(_start_date + timedelta(weeks=_i, days=_i))},
},
}))
self._index_indexcards(_cards)
# gather all pages results:
_querystring: str = f'page[size]={_page_size}'
_result_iris: set[str] = set()
_page_count = 0
while True:
_cardsearch_response = self.current_index.pls_handle_cardsearch(
CardsearchParams.from_querystring(_querystring),
)
_page_iris = {
self._indexcard_focus_by_uuid[_result.card_uuid]
for _result in _cardsearch_response.search_result_page
}
self.assertFalse(_result_iris.intersection(_page_iris))
self.assertLessEqual(len(_page_iris), _page_size)
_result_iris.update(_page_iris)
_page_count += 1
_next_cursor = _cardsearch_response.cursor.next_cursor()
if _next_cursor is None:
break
_querystring = urlencode({'page[cursor]': _next_cursor.as_queryparam_value()})
self.assertEqual(_page_count, math.ceil(_total_count / _page_size))
self.assertEqual(_result_iris, _expected_iris)

def test_valuesearch(self):
self._fill_test_data_for_querying()
_valuesearch_cases = itertools.chain(
Expand Down
21 changes: 14 additions & 7 deletions trove/trovesearch/page_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@


MANY_MORE = -1
MAX_OFFSET = 9997


@dataclasses.dataclass
Expand All @@ -22,7 +23,7 @@ class PageCursor:
@classmethod
def from_queryparam_value(cls, cursor_value: str) -> typing.Self:
try:
(_type_key, _args) = json.loads(base64.urlsafe_b64decode(cursor_value))
(_type_key, *_args) = json.loads(base64.urlsafe_b64decode(cursor_value))
_cls = _PageCursorTypes[_type_key].value
assert issubclass(_cls, cls)
return _cls(*_args)
Expand All @@ -46,8 +47,7 @@ def is_basic(self) -> bool:

def is_valid(self) -> bool:
return self.page_size > 0 and (
self.total_count == MANY_MORE
or self.total_count >= 0
self.total_count == MANY_MORE or self.total_count >= 0
)

def has_many_more(self) -> bool:
Expand All @@ -72,20 +72,27 @@ class OffsetCursor(PageCursor):
def is_valid(self) -> bool:
return (
super().is_valid()
and 0 <= self.start_offset
and 0 <= self.start_offset <= MAX_OFFSET
and (
self.total_count == MANY_MORE
or self.start_offset < self.total_count
)
)

def is_first_page(self) -> bool:
return self.start_offset == 0

def next_cursor(self):
return dataclasses.replace(self, start_offset=(self.start_offset + self.page_size))
_next = dataclasses.replace(self, start_offset=(self.start_offset + self.page_size))
return (_next if _next.is_valid() else None)

def prev_cursor(self):
return dataclasses.replace(self, start_offset=(self.start_offset - self.page_size))
_prev = dataclasses.replace(self, start_offset=(self.start_offset - self.page_size))
return (_prev if _prev.is_valid() else None)

def first_cursor(self):
return dataclasses.replace(self, start_offset=0)
_first = dataclasses.replace(self, start_offset=0)
return (_first if _first.is_valid() else None)


@dataclasses.dataclass
Expand Down
8 changes: 2 additions & 6 deletions trove/trovesearch/search_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@
DEFAULT_PAGE_SIZE = 13
MAX_PAGE_SIZE = 101

# limits on paging
VALUESEARCH_MAX = 234
CARDSEARCH_MAX = 9997

# between each step in a property path "foo.bar.baz"
PROPERTYPATH_DELIMITER = '.'

Expand Down Expand Up @@ -94,11 +90,11 @@ class BaseTroveParams:
accept_mediatype: str | None

@classmethod
def from_querystring(cls, querystring: str) -> BaseTroveParams: # TODO py3.11: typing.Self
def from_querystring(cls, querystring: str) -> typing.Self:
return cls.from_queryparams(queryparams_from_querystring(querystring))

@classmethod
def from_queryparams(cls, queryparams: QueryparamDict) -> BaseTroveParams:
def from_queryparams(cls, queryparams: QueryparamDict) -> typing.Self:
return cls(**cls.parse_queryparams(queryparams))

@classmethod
Expand Down
26 changes: 3 additions & 23 deletions trove/trovesearch/search_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@
PageCursor,
ReproduciblyRandomSampleCursor,
)
from trove.trovesearch.search_params import (
VALUESEARCH_MAX,
CARDSEARCH_MAX,
CardsearchParams,
)
from trove.trovesearch.search_params import CardsearchParams
from trove.vocab.namespaces import TROVE
from trove.vocab.trove import trove_indexcard_namespace

Expand Down Expand Up @@ -80,10 +76,6 @@ def __post_init__(self):
class PagedResponse:
cursor: PageCursor

@property
def max_offset(self) -> int:
raise NotImplementedError

@property
def total_result_count(self) -> BoundedCount:
return (
Expand All @@ -99,8 +91,6 @@ class CardsearchResponse(PagedResponse):
related_propertypath_results: list['PropertypathUsage']
cardsearch_params: CardsearchParams

max_offset = CARDSEARCH_MAX

def __post_init__(self):
_cursor = self.cursor
if (
Expand All @@ -114,24 +104,14 @@ def __post_init__(self):
for (_i, _id) in enumerate(_cursor.first_page_ids)
}
self.search_result_page.sort(key=lambda _r: _ordering_by_id[_r.card_id])
else:
_should_start_reproducible_randomness = (
not _cursor.has_many_more()
and any(
not _filter.is_type_filter() # look for a non-default filter
for _filter in self.cardsearch_params.cardsearch_filter_set
)
)
if _should_start_reproducible_randomness:
_cursor.first_page_ids = [_result.card_id for _result in self.search_result_page]
elif not _cursor.has_many_more():
_cursor.first_page_ids = [_result.card_id for _result in self.search_result_page]


@dataclasses.dataclass
class ValuesearchResponse(PagedResponse):
search_result_page: Iterable[ValuesearchResult]

max_offset = VALUESEARCH_MAX


###
# local helpers
Expand Down

0 comments on commit ff98360

Please sign in to comment.