Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
More tests and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
chouinar committed Sep 9, 2024
1 parent a62640d commit e4b9480
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 32 deletions.
38 changes: 31 additions & 7 deletions api/src/adapters/search/opensearch_query_builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import typing
import datetime
import typing

from src.pagination.pagination_models import SortDirection

Expand All @@ -18,6 +18,7 @@ class SearchQueryBuilder:
* Sorted by relevancy score descending
* Scored on titles containing "king"
* Where the author is one of Brandon Sanderson or J R.R. Tolkien
* With a page count between 300 and 1000
* Returning aggregate counts of books by those authors in the full results
This query could either be built manually and look like:
Expand Down Expand Up @@ -53,6 +54,12 @@ class SearchQueryBuilder:
"Brandon Sanderson",
"J R.R. Tolkien"
]
},
"range": {
"publication_date": {
"gte": 300,
"lte": 1000
}
}
}
]
Expand All @@ -76,6 +83,7 @@ class SearchQueryBuilder:
.sort_by([("relevancy", SortDirection.DESCENDING)])
.simple_query("king", fields=["title.keyword"])
.filter_terms("author.keyword", terms=["Brandon Sanderson", "J R.R. Tolkien"])
.filter_int_range("page_count", 300, 1000)
.aggregation_terms(aggregation_name="author", field_name="author.keyword", minimum_count=0)
.build()
"""
Expand Down Expand Up @@ -151,12 +159,20 @@ def filter_terms(self, field: str, terms: list) -> typing.Self:
self.filters.append({"terms": {field: terms}})
return self

def filter_int_range(self, field: str, min_value: int | None, max_value: int | None) -> typing.Self:
def filter_int_range(
self, field: str, min_value: int | None, max_value: int | None
) -> typing.Self:
"""
TODO - docs
For a given field, filter results to a range of integer values.
If min or max is not provided, the range is unbounded and only
affects the minimum or maximum possible value. At least one min or max value must be specified.
These filters do not affect the relevancy score, they are purely
a binary filter on the overall results.
"""
if min_value is None and max_value is None:
raise Exception("TODO - can't filter when both are none")
raise ValueError("Cannot use int range filter if both min and max are None")

range_filter = {}
if min_value is not None:
Expand All @@ -167,12 +183,20 @@ def filter_int_range(self, field: str, min_value: int | None, max_value: int | N
self.filters.append({"range": {field: range_filter}})
return self

def filter_date_range(self, field: str, start_date: datetime.date | None, end_date: datetime.date | None) -> typing.Self:
def filter_date_range(
self, field: str, start_date: datetime.date | None, end_date: datetime.date | None
) -> typing.Self:
"""
TODO - docs
For a given field, filter results to a range of dates.
If start or end is not provided, the range is unbounded and only
affects the start or end date. At least one start or end date must be specified.
These filters do not affect the relevancy score, they are purely
a binary filter on the overall results.
"""
if start_date is None and end_date is None:
raise Exception("TODO - can't filter when both are none")
raise ValueError("Cannot use date range filter if both start and end are None")

range_filter = {}
if start_date is not None:
Expand Down
117 changes: 92 additions & 25 deletions api/tests/src/adapters/search/test_opensearch_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,9 @@ def validate_valid_request(
f"Request generated was invalid and caused an error in search client: {json_value}"
)

assert resp.records == expected_results, f"{[record['title'] for record in resp.records]} != {[expected['title'] for expected in expected_results]}"
assert (
resp.records == expected_results
), f"{[record['title'] for record in resp.records]} != {[expected['title'] for expected in expected_results]}"

if expected_aggregations is not None:
assert resp.aggregations == expected_aggregations
Expand Down Expand Up @@ -377,24 +379,45 @@ def test_query_builder_filter_terms(

validate_valid_request(search_client, search_index, builder, expected_results)



@pytest.mark.parametrize("start_date,end_date,expected_results",[
# Date range that will include all results
(date(1900, 1, 1), date(2050, 1, 1), FULL_DATA),
# Start only date range that will get all results
(date(1950, 1, 1), None, FULL_DATA),
# End only date range that will get all results
(None, date(2025, 1, 1), FULL_DATA),
# Range that filters to just oldest
(date(1950, 1, 1), date(1960, 1, 1), [FELLOWSHIP_OF_THE_RING, TWO_TOWERS, RETURN_OF_THE_KING]),
# Unbounded range for oldest
(None, date(1990, 1, 1), [FELLOWSHIP_OF_THE_RING, TWO_TOWERS, RETURN_OF_THE_KING])
# TODO - more cases
])
def test_query_builder_filter_date_range(self, search_client, search_index, start_date, end_date, expected_results):
builder = SearchQueryBuilder().sort_by([]).filter_date_range("publication_date", start_date, end_date)
@pytest.mark.parametrize(
"start_date,end_date,expected_results",
[
# Date range that will include all results
(date(1900, 1, 1), date(2050, 1, 1), FULL_DATA),
# Start only date range that will get all results
(date(1950, 1, 1), None, FULL_DATA),
# End only date range that will get all results
(None, date(2025, 1, 1), FULL_DATA),
# Range that filters to just oldest
(
date(1950, 1, 1),
date(1960, 1, 1),
[FELLOWSHIP_OF_THE_RING, TWO_TOWERS, RETURN_OF_THE_KING],
),
# Unbounded range for oldest few
(None, date(1990, 1, 1), [FELLOWSHIP_OF_THE_RING, TWO_TOWERS, RETURN_OF_THE_KING]),
# Unbounded range for newest few
(date(2011, 8, 1), None, [WORDS_OF_RADIANCE, OATHBRINGER, RHYTHM_OF_WAR]),
# Selecting a few in the middle
(
date(2005, 1, 1),
date(2014, 1, 1),
[WAY_OF_KINGS, FEAST_FOR_CROWS, DANCE_WITH_DRAGONS],
),
# Exact date
(date(1954, 7, 29), date(1954, 7, 29), [FELLOWSHIP_OF_THE_RING]),
# None fetched in range
(date(1981, 1, 1), date(1989, 1, 1), []),
],
)
def test_query_builder_filter_date_range(
self, search_client, search_index, start_date, end_date, expected_results
):
builder = (
SearchQueryBuilder()
.sort_by([])
.filter_date_range("publication_date", start_date, end_date)
)

expected_ranges = {}
if start_date is not None:
Expand All @@ -413,12 +436,33 @@ def test_query_builder_filter_date_range(self, search_client, search_index, star

validate_valid_request(search_client, search_index, builder, expected_results)

@pytest.mark.parametrize("min_value,max_value,expected_results",[
(1, 2000, FULL_DATA),
(2000, 3000, [])
])
def test_query_builder_filter_int_range(self, search_client, search_index, min_value, max_value, expected_results):
builder = SearchQueryBuilder().sort_by([]).filter_int_range("page_count", min_value, max_value)
@pytest.mark.parametrize(
"min_value,max_value,expected_results",
[
# All fetched
(1, 2000, FULL_DATA),
# None fetched
(2000, 3000, []),
# "Short" books
(300, 700, [GAME_OF_THRONES, FELLOWSHIP_OF_THE_RING, TWO_TOWERS, RETURN_OF_THE_KING]),
# Unbounded short
(None, 416, [TWO_TOWERS, RETURN_OF_THE_KING]),
# Unbounded long
(1050, None, [WORDS_OF_RADIANCE, OATHBRINGER, RHYTHM_OF_WAR, DANCE_WITH_DRAGONS]),
# Middle length
(
500,
1010,
[WAY_OF_KINGS, GAME_OF_THRONES, CLASH_OF_KINGS, STORM_OF_SWORDS, FEAST_FOR_CROWS],
),
],
)
def test_query_builder_filter_int_range(
self, search_client, search_index, min_value, max_value, expected_results
):
builder = (
SearchQueryBuilder().sort_by([]).filter_int_range("page_count", min_value, max_value)
)

expected_ranges = {}
if min_value is not None:
Expand All @@ -437,6 +481,29 @@ def test_query_builder_filter_int_range(self, search_client, search_index, min_v

validate_valid_request(search_client, search_index, builder, expected_results)

def test_multiple_ranges(self, search_client, search_index):
# Sanity test that we can specify multiple ranges (in this case, a date + int range)
# in the same query
builder = (
SearchQueryBuilder()
.sort_by([])
.filter_int_range("page_count", 600, 1100)
.filter_date_range("publication_date", date(2000, 1, 1), date(2013, 1, 1))
)

expected_results = [WAY_OF_KINGS, STORM_OF_SWORDS, FEAST_FOR_CROWS, DANCE_WITH_DRAGONS]
validate_valid_request(
search_client, search_index, builder, expected_results=expected_results
)

def test_filter_int_range_both_none(self):
with pytest.raises(ValueError, match="Cannot use int range filter"):
SearchQueryBuilder().filter_int_range("test_field", None, None)

def test_filter_date_range_both_none(self):
with pytest.raises(ValueError, match="Cannot use date range filter"):
SearchQueryBuilder().filter_date_range("test_field", None, None)

@pytest.mark.parametrize(
"query,fields,expected_results,expected_aggregations",
[
Expand Down

0 comments on commit e4b9480

Please sign in to comment.