diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index 8f5aeb1b4..b2e5b2bea 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -142,9 +142,8 @@ def swap_alias_index( self.delete_index(index) def search_raw(self, index_name: str, search_query: dict) -> dict: - # TODO - add more when we build out the request/response parsing logic - # we use something like Pydantic to help reorganize the response - # object into something easier to parse. + # Simple wrapper around search if you don't want the request or response + # object handled in any special way. return self._client.search(index=index_name, body=search_query) def search( diff --git a/api/src/adapters/search/opensearch_query_builder.py b/api/src/adapters/search/opensearch_query_builder.py index 778cdbdc0..4aa4e07e5 100644 --- a/api/src/adapters/search/opensearch_query_builder.py +++ b/api/src/adapters/search/opensearch_query_builder.py @@ -4,6 +4,81 @@ class SearchQueryBuilder: + """ + Utility to help build queries to OpenSearch + + This helps with making sure everything we want in a search query goes + to the right spot in the large JSON object we're building. Note that + it still requires some understanding of OpenSearch (eg. when to add ".keyword" to a field name) + + For example, if you wanted to build a query against a search index containing + books with the following: + * Page size of 5, page number 1 + * Sorted by relevancy score descending + * Scored on titles containing "king" + * Where the author is one of Brandon Sanderson or J R.R. Tolkien + * Returning aggregate counts of books by those authors in the full results + + This query could either be built manually and look like: + + { + "size": 5, + "from": 0, + "track_scores": true, + "sort": [ + { + "_score": { + "order": "desc" + } + } + ], + "query": { + "bool": { + "must": [ + { + "simple_query_string": { + "query": "king", + "fields": [ + "title.keyword" + ], + "default_operator": "AND" + } + } + ], + "filter": [ + { + "terms": { + "author.keyword": [ + "Brandon Sanderson", + "J R.R. Tolkien" + ] + } + } + ] + } + }, + "aggs": { + "author": { + "terms": { + "field": "author.keyword", + "size": 25, + "min_doc_count": 0 + } + } + } + } + + Or you could use the builder and produce the same result: + + search_query = SearchQueryBuilder() + .pagination(page_size=5, page_number=1) + .sort_by([("relevancy", SortDirection.DESCENDING)]) + .simple_query("king", fields=["title.keyword"]) + .filter_terms("author.keyword", terms=["Brandon Sanderson", "J R.R. Tolkien"]) + .aggregation_terms(aggregation_name="author", field_name="author.keyword", minimum_count=0) + .build() + """ + def __init__(self) -> None: self.page_size = 25 self.page_number = 1