From 62ba7f1599df40acfa7de93f15fa81168b0f073e Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Thu, 16 May 2024 16:59:21 -0400 Subject: [PATCH 01/19] [Issue #9] Setup opensearch locally --- api/Makefile | 15 +++- api/bin/wait-for-local-opensearch.sh | 31 +++++++ api/docker-compose.yml | 45 ++++++++++ api/local.env | 9 ++ api/src/adapters/opensearch/__init__.py | 0 .../adapters/opensearch/opensearch_client.py | 33 ++++++++ .../adapters/opensearch/opensearch_config.py | 34 ++++++++ api/tests/src/adapters/opensearch/__init__.py | 0 .../adapters/opensearch/test_opensearch.py | 83 +++++++++++++++++++ 9 files changed, 249 insertions(+), 1 deletion(-) create mode 100755 api/bin/wait-for-local-opensearch.sh create mode 100644 api/src/adapters/opensearch/__init__.py create mode 100644 api/src/adapters/opensearch/opensearch_client.py create mode 100644 api/src/adapters/opensearch/opensearch_config.py create mode 100644 api/tests/src/adapters/opensearch/__init__.py create mode 100644 api/tests/src/adapters/opensearch/test_opensearch.py diff --git a/api/Makefile b/api/Makefile index f2774d3a7..d5daab1d2 100644 --- a/api/Makefile +++ b/api/Makefile @@ -100,7 +100,7 @@ start-debug: run-logs: start docker-compose logs --follow --no-color $(APP_NAME) -init: build init-db +init: build init-db init-opensearch clean-volumes: ## Remove project docker volumes (which includes the DB state) docker-compose down --volumes @@ -179,6 +179,19 @@ create-erds: # Create ERD diagrams for our DB schema setup-postgres-db: ## Does any initial setup necessary for our local database to work $(PY_RUN_CMD) setup-postgres-db +################################################## +# Opensearch +################################################## + +init-opensearch: start-opensearch +# TODO - in subsequent PRs, we'll add more to this command to setup the search index locally + +start-opensearch: + docker-compose up --detach opensearch-node + docker-compose up --detach opensearch-dashboards + ./bin/wait-for-local-opensearch.sh + + ################################################## # Testing diff --git a/api/bin/wait-for-local-opensearch.sh b/api/bin/wait-for-local-opensearch.sh new file mode 100755 index 000000000..a14af8048 --- /dev/null +++ b/api/bin/wait-for-local-opensearch.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# wait-for-local-opensearch + +set -e + +# Color formatting +RED='\033[0;31m' +NO_COLOR='\033[0m' + +MAX_WAIT_TIME=30 # seconds +WAIT_TIME=0 + +# Curl the healthcheck endpoint of the local opensearch +# until it returns a success response +until curl --output /dev/null --silent http://localhost:9200/_cluster/health; +do + echo "waiting on OpenSearch to initialize..." + sleep 3 + + WAIT_TIME=$(($WAIT_TIME+3)) + if [ $WAIT_TIME -gt $MAX_WAIT_TIME ] + then + echo -e "${RED}ERROR: OpenSearch appears to not be starting up, running \"docker logs opensearch-node\" to troubleshoot.${NO_COLOR}" + docker logs opensearch-node + exit 1 + fi +done + +echo "OpenSearch is ready after ~${WAIT_TIME} seconds" + + diff --git a/api/docker-compose.yml b/api/docker-compose.yml index a364c74c3..88668a89e 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -12,6 +12,46 @@ services: volumes: - grantsdbdata:/var/lib/postgresql/data + opensearch-node: + image: opensearchproject/opensearch:latest + container_name: opensearch-node + environment: + - cluster.name=opensearch-cluster # Name the cluster + - node.name=opensearch-node # Name the node that will run in this container + - discovery.type=single-node # Nodes to look for when discovering the cluster + - bootstrap.memory_lock=true # Disable JVM heap memory swapping + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM + - DISABLE_INSTALL_DEMO_CONFIG=true # Prevents execution of bundled demo script which installs demo certificates and security configurations to OpenSearch + - DISABLE_SECURITY_PLUGIN=true # Disables Security plugin + ulimits: + memlock: + soft: -1 # Set memlock to unlimited (no soft or hard limit) + hard: -1 + nofile: + soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536 + hard: 65536 + volumes: + - opensearch-data:/usr/share/opensearch/data # Creates volume called opensearch-data and mounts it to the container + ports: + - 9200:9200 # REST API + - 9600:9600 # Performance Analyzer + networks: + - opensearch-net # All of the containers will join the same Docker bridge network + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:latest + container_name: opensearch-dashboards + ports: + - 5601:5601 # Map host port 5601 to container port 5601 + expose: + - "5601" # Expose port 5601 for web access to OpenSearch Dashboards + environment: + - 'OPENSEARCH_HOSTS=["http://opensearch-node:9200"]' + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true # disables security dashboards plugin in OpenSearch Dashboards + networks: + - opensearch-net + + grants-api: build: context: . @@ -28,6 +68,11 @@ services: - .:/api depends_on: - grants-db + - opensearch-node volumes: grantsdbdata: + opensearch-data: + +networks: + opensearch-net: diff --git a/api/local.env b/api/local.env index fc1c1c1a4..f0abae62b 100644 --- a/api/local.env +++ b/api/local.env @@ -59,6 +59,15 @@ DB_SSL_MODE=allow # could contain sensitive information. HIDE_SQL_PARAMETER_LOGS=TRUE +############################ +# Opensearch Environment Variables +############################ + +OPENSEARCH_HOST=host.docker.internal +OPENSEARCH_PORT=9200 +OPENSEARCH_USE_SSL=FALSE +OPENSEARCH_VERIFY_CERTS=FALSE + ############################ # AWS Defaults ############################ diff --git a/api/src/adapters/opensearch/__init__.py b/api/src/adapters/opensearch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/src/adapters/opensearch/opensearch_client.py b/api/src/adapters/opensearch/opensearch_client.py new file mode 100644 index 000000000..734dc3672 --- /dev/null +++ b/api/src/adapters/opensearch/opensearch_client.py @@ -0,0 +1,33 @@ +from typing import Any + +import opensearchpy + +from src.adapters.opensearch.opensearch_config import OpensearchConfig, get_opensearch_config + +# More configuration/setup coming in: +# TODO - https://github.com/navapbc/simpler-grants-gov/issues/13 + + + +def get_opensearch_client( + opensearch_config: OpensearchConfig | None = None, +) -> opensearchpy.OpenSearch: + if opensearch_config is None: + opensearch_config = get_opensearch_config() + + # See: https://opensearch.org/docs/latest/clients/python-low-level/ for more details + return opensearchpy.OpenSearch(**_get_connection_parameters(opensearch_config)) + +def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: + + # TODO - we'll want to add the AWS connection params here when we set that up + # See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-amazon-opensearch-serverless + + return dict( + hosts=[{"host": opensearch_config.host, "port":opensearch_config.port}], + http_compress=True, + use_ssl=opensearch_config.use_ssl, + verify_certs=opensearch_config.verify_certs, + ssl_assert_hostname=False, + ssl_show_warn=False, + ) \ No newline at end of file diff --git a/api/src/adapters/opensearch/opensearch_config.py b/api/src/adapters/opensearch/opensearch_config.py new file mode 100644 index 000000000..72af1b174 --- /dev/null +++ b/api/src/adapters/opensearch/opensearch_config.py @@ -0,0 +1,34 @@ +import logging + +from pydantic import Field + +from src.util.env_config import PydanticBaseEnvConfig +from pydantic_settings import SettingsConfigDict + +logger = logging.getLogger(__name__) + + +class OpensearchConfig(PydanticBaseEnvConfig): + model_config = SettingsConfigDict(env_prefix="OPENSEARCH_") + + host: str # OPENSEARCH_HOST + port: int # OPENSEARCH_PORT + use_ssl: bool = Field(default=True) # OPENSEARCH_USE_SSL + verify_certs: bool = Field(default=True) # OPENSEARCH_VERIFY_CERTS + + + +def get_opensearch_config() -> OpensearchConfig: + opensearch_config = OpensearchConfig() + + logger.info( + "Constructed opensearch configuration", + extra={ + "host": opensearch_config.host, + "port": opensearch_config.port, + "use_ssl": opensearch_config.use_ssl, + "verify_certs": opensearch_config.verify_certs + }, + ) + + return opensearch_config diff --git a/api/tests/src/adapters/opensearch/__init__.py b/api/tests/src/adapters/opensearch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/tests/src/adapters/opensearch/test_opensearch.py b/api/tests/src/adapters/opensearch/test_opensearch.py new file mode 100644 index 000000000..7c896704d --- /dev/null +++ b/api/tests/src/adapters/opensearch/test_opensearch.py @@ -0,0 +1,83 @@ +import uuid + +import opensearchpy +import pytest + +from src.adapters.opensearch.opensearch_client import get_opensearch_client + +######################################## +# This is a placeholder set of tests, +# we'll evolve / change the structure +# as we continue developing this +# +# Just wanted something simple so I can verify +# the early steps of this setup are working +# before we actually have code to use +######################################## + + +@pytest.fixture(scope="session") +def search_client() -> opensearchpy.OpenSearch: + # TODO - move this to conftest + return get_opensearch_client() + + +@pytest.fixture(scope="session") +def opportunity_index(search_client): + # TODO - will adjust this in the future to use utils we'll build + # for setting up / aliasing indexes. For now, keep it simple + + index_name = f"test_{uuid.uuid4().int}_opportunity" + + search_client.indices.create(index_name, body={}) + + try: + yield index_name + finally: + search_client.indices.delete(index_name) + + +def test_index_is_running(search_client, opportunity_index): + existing_indexes = search_client.cat.indices(format="json") + + found_opportunity_index = False + for index in existing_indexes: + if index["index"] == opportunity_index: + found_opportunity_index = True + break + + assert found_opportunity_index is True + + # Add a few records to the index + + record1 = { + "opportunity_id": 1, + "opportunity_title": "Research into how to make a search engine", + "opportunity_status": "posted", + } + record2 = { + "opportunity_id": 2, + "opportunity_title": "Research about words, and more words!", + "opportunity_status": "forecasted", + } + + search_client.index(index=opportunity_index, body=record1, id=1, refresh=True) + search_client.index(index=opportunity_index, body=record2, id=2, refresh=True) + + search_request = { + "query": { + "bool": { + "must": { + "simple_query_string": {"query": "research", "fields": ["opportunity_title"]} + } + } + } + } + response = search_client.search(index=opportunity_index, body=search_request) + assert response["hits"]["total"]["value"] == 2 + + filter_request = { + "query": {"bool": {"filter": [{"terms": {"opportunity_status": ["forecasted"]}}]}} + } + response = search_client.search(index=opportunity_index, body=filter_request) + assert response["hits"]["total"]["value"] == 1 From 1922340300be58c6c655f7c300c604afff6b7093 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Fri, 17 May 2024 09:52:09 -0400 Subject: [PATCH 02/19] Some rearranging of files --- api/src/adapters/opensearch/__init__.py | 0 api/src/adapters/search/__init__.py | 4 +++ .../opensearch_client.py | 13 +++++--- .../opensearch_config.py | 13 ++++---- api/tests/conftest.py | 29 +++++++++++++++++ .../adapters/opensearch/test_opensearch.py | 31 ++----------------- 6 files changed, 50 insertions(+), 40 deletions(-) delete mode 100644 api/src/adapters/opensearch/__init__.py create mode 100644 api/src/adapters/search/__init__.py rename api/src/adapters/{opensearch => search}/opensearch_client.py (73%) rename api/src/adapters/{opensearch => search}/opensearch_config.py (71%) diff --git a/api/src/adapters/opensearch/__init__.py b/api/src/adapters/opensearch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/api/src/adapters/search/__init__.py b/api/src/adapters/search/__init__.py new file mode 100644 index 000000000..166441e1d --- /dev/null +++ b/api/src/adapters/search/__init__.py @@ -0,0 +1,4 @@ +from src.adapters.search.opensearch_client import SearchClient, get_opensearch_client +from src.adapters.search.opensearch_config import get_opensearch_config + +__all__ = ["SearchClient", "get_opensearch_client", "get_opensearch_config"] diff --git a/api/src/adapters/opensearch/opensearch_client.py b/api/src/adapters/search/opensearch_client.py similarity index 73% rename from api/src/adapters/opensearch/opensearch_client.py rename to api/src/adapters/search/opensearch_client.py index 734dc3672..dadcfd7c4 100644 --- a/api/src/adapters/opensearch/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -2,32 +2,35 @@ import opensearchpy -from src.adapters.opensearch.opensearch_config import OpensearchConfig, get_opensearch_config +from src.adapters.search.opensearch_config import OpensearchConfig, get_opensearch_config # More configuration/setup coming in: # TODO - https://github.com/navapbc/simpler-grants-gov/issues/13 +# Alias the OpenSearch client so that it doesn't need to be imported everywhere +# and to make it clear it's a client +SearchClient = opensearchpy.OpenSearch def get_opensearch_client( opensearch_config: OpensearchConfig | None = None, -) -> opensearchpy.OpenSearch: +) -> SearchClient: if opensearch_config is None: opensearch_config = get_opensearch_config() # See: https://opensearch.org/docs/latest/clients/python-low-level/ for more details return opensearchpy.OpenSearch(**_get_connection_parameters(opensearch_config)) -def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: +def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: # TODO - we'll want to add the AWS connection params here when we set that up # See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-amazon-opensearch-serverless return dict( - hosts=[{"host": opensearch_config.host, "port":opensearch_config.port}], + hosts=[{"host": opensearch_config.host, "port": opensearch_config.port}], http_compress=True, use_ssl=opensearch_config.use_ssl, verify_certs=opensearch_config.verify_certs, ssl_assert_hostname=False, ssl_show_warn=False, - ) \ No newline at end of file + ) diff --git a/api/src/adapters/opensearch/opensearch_config.py b/api/src/adapters/search/opensearch_config.py similarity index 71% rename from api/src/adapters/opensearch/opensearch_config.py rename to api/src/adapters/search/opensearch_config.py index 72af1b174..4975feb3e 100644 --- a/api/src/adapters/opensearch/opensearch_config.py +++ b/api/src/adapters/search/opensearch_config.py @@ -1,9 +1,9 @@ import logging from pydantic import Field +from pydantic_settings import SettingsConfigDict from src.util.env_config import PydanticBaseEnvConfig -from pydantic_settings import SettingsConfigDict logger = logging.getLogger(__name__) @@ -11,11 +11,10 @@ class OpensearchConfig(PydanticBaseEnvConfig): model_config = SettingsConfigDict(env_prefix="OPENSEARCH_") - host: str # OPENSEARCH_HOST - port: int # OPENSEARCH_PORT - use_ssl: bool = Field(default=True) # OPENSEARCH_USE_SSL - verify_certs: bool = Field(default=True) # OPENSEARCH_VERIFY_CERTS - + host: str # OPENSEARCH_HOST + port: int # OPENSEARCH_PORT + use_ssl: bool = Field(default=True) # OPENSEARCH_USE_SSL + verify_certs: bool = Field(default=True) # OPENSEARCH_VERIFY_CERTS def get_opensearch_config() -> OpensearchConfig: @@ -27,7 +26,7 @@ def get_opensearch_config() -> OpensearchConfig: "host": opensearch_config.host, "port": opensearch_config.port, "use_ssl": opensearch_config.use_ssl, - "verify_certs": opensearch_config.verify_certs + "verify_certs": opensearch_config.verify_certs, }, ) diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 928932b67..97173e9a7 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -11,6 +11,7 @@ import src.adapters.db as db import src.app as app_entry import tests.src.db.models.factories as factories +from src.adapters import search from src.constants.schema import Schemas from src.db import models from src.db.models.lookup.sync_lookup_values import sync_lookup_values @@ -143,6 +144,34 @@ def test_foreign_schema(db_schema_prefix): return f"{db_schema_prefix}{Schemas.LEGACY}" +#################### +# Opensearch Fixtures +#################### + + +@pytest.fixture(scope="session") +def search_client() -> search.SearchClient: + return search.get_opensearch_client() + + +@pytest.fixture(scope="session") +def opportunity_index(search_client): + # TODO - will adjust this in the future to use utils we'll build + # for setting up / aliasing indexes. For now, keep it simple + + # create a random index name just to make sure it won't ever conflict + # with an actual one, similar to how we create schemas for database tests + index_name = f"test_{uuid.uuid4().int}_opportunity" + + search_client.indices.create(index_name, body={}) + + try: + yield index_name + finally: + # Try to clean up the index at the end + search_client.indices.delete(index_name) + + #################### # Test App & Client #################### diff --git a/api/tests/src/adapters/opensearch/test_opensearch.py b/api/tests/src/adapters/opensearch/test_opensearch.py index 7c896704d..490ffcb3b 100644 --- a/api/tests/src/adapters/opensearch/test_opensearch.py +++ b/api/tests/src/adapters/opensearch/test_opensearch.py @@ -1,10 +1,3 @@ -import uuid - -import opensearchpy -import pytest - -from src.adapters.opensearch.opensearch_client import get_opensearch_client - ######################################## # This is a placeholder set of tests, # we'll evolve / change the structure @@ -16,28 +9,10 @@ ######################################## -@pytest.fixture(scope="session") -def search_client() -> opensearchpy.OpenSearch: - # TODO - move this to conftest - return get_opensearch_client() - - -@pytest.fixture(scope="session") -def opportunity_index(search_client): - # TODO - will adjust this in the future to use utils we'll build - # for setting up / aliasing indexes. For now, keep it simple - - index_name = f"test_{uuid.uuid4().int}_opportunity" - - search_client.indices.create(index_name, body={}) - - try: - yield index_name - finally: - search_client.indices.delete(index_name) - - def test_index_is_running(search_client, opportunity_index): + # Very simple test, will rewrite / remove later once we have something + # more meaningful to test. + existing_indexes = search_client.cat.indices(format="json") found_opportunity_index = False From 649339c9ea4ae9fa0a2336e6790bc5eca96f659e Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Fri, 17 May 2024 10:13:56 -0400 Subject: [PATCH 03/19] Dependency fixes --- api/poetry.lock | 79 ++++++++++++++++++++++++++++++++++------------ api/pyproject.toml | 7 ++++ 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/api/poetry.lock b/api/poetry.lock index 2b372c21e..e24672ff5 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -978,7 +978,7 @@ files = [ [package.dependencies] marshmallow = [ - {version = ">=3.13.0,<4.0"}, + {version = ">=3.13.0,<4.0", optional = true, markers = "python_version < \"3.7\" or extra != \"enum\""}, {version = ">=3.18.0,<4.0", optional = true, markers = "python_version >= \"3.7\" and extra == \"enum\""}, ] typeguard = {version = ">=2.4.1,<4.0.0", optional = true, markers = "extra == \"union\""} @@ -1106,6 +1106,30 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "opensearch-py" +version = "2.5.0" +description = "Python client for OpenSearch" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,<4,>=2.7" +files = [ + {file = "opensearch-py-2.5.0.tar.gz", hash = "sha256:0dde4ac7158a717d92a8cd81964cb99705a4b80bcf9258ba195b9a9f23f5226d"}, + {file = "opensearch_py-2.5.0-py2.py3-none-any.whl", hash = "sha256:cf093a40e272b60663f20417fc1264ac724dcf1e03c1a4542a6b44835b1e6c49"}, +] + +[package.dependencies] +certifi = ">=2022.12.07" +python-dateutil = "*" +requests = ">=2.4.0,<3.0.0" +six = "*" +urllib3 = ">=1.26.18,<2" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +develop = ["black", "botocore", "coverage (<8.0.0)", "jinja2", "mock", "myst-parser", "pytest (>=3.0.0)", "pytest-cov", "pytest-mock (<4.0.0)", "pytz", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +docs = ["aiohttp (>=3,<4)", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +kerberos = ["requests-kerberos"] + [[package]] name = "packaging" version = "24.0" @@ -1563,7 +1587,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1571,16 +1594,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1597,7 +1612,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1605,7 +1619,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -1913,6 +1926,31 @@ files = [ {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"}, ] +[[package]] +name = "types-requests" +version = "2.31.0.1" +description = "Typing stubs for requests" +optional = false +python-versions = "*" +files = [ + {file = "types-requests-2.31.0.1.tar.gz", hash = "sha256:3de667cffa123ce698591de0ad7db034a5317457a596eb0b4944e5a9d9e8d1ac"}, + {file = "types_requests-2.31.0.1-py3-none-any.whl", hash = "sha256:afb06ef8f25ba83d59a1d424bd7a5a939082f94b94e90ab5e6116bd2559deaa3"}, +] + +[package.dependencies] +types-urllib3 = "*" + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +description = "Typing stubs for urllib3" +optional = false +python-versions = "*" +files = [ + {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, + {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, +] + [[package]] name = "typing-extensions" version = "4.11.0" @@ -1952,20 +1990,19 @@ files = [ [[package]] name = "urllib3" -version = "2.2.1" +version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.8" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, - {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, + {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, + {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "watchdog" @@ -2061,4 +2098,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "c53875955c1b910c3d4aa1748dce786e3cfa6f507895d7ca4111391333decb13" +content-hash = "9671a2d68d2b1bc91b8ce111a7a32d08292475e0d1c4f058c33bf650349757e0" diff --git a/api/pyproject.toml b/api/pyproject.toml index f0a06b447..0f3c2f10b 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -22,6 +22,7 @@ gunicorn = "^22.0.0" psycopg = { extras = ["binary"], version = "^3.1.10" } pydantic-settings = "^2.0.3" flask-cors = "^4.0.0" +opensearch-py = "^2.5.0" [tool.poetry.group.dev.dependencies] black = "^23.9.1" @@ -43,6 +44,12 @@ sadisplay = "0.4.9" ruff = "^0.4.0" debugpy = "^1.8.1" freezegun = "^1.5.0" +# This isn't the latest version of types-requests +# because otherwise it depends on urllib3 v2 but opensearch-py +# needs urlib3 v1. This should be temporary as opensearch-py +# has an unreleased change to switch to v2, so I'm guessing +# in the next few weeks we can just make this the latest? +types-requests = "2.31.0.1" [build-system] requires = ["poetry-core>=1.0.0"] From 2126171f0e4c61d1fb6c2d923813a7928747107e Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Fri, 17 May 2024 10:29:50 -0400 Subject: [PATCH 04/19] Trying something else for the network setup? --- api/docker-compose.yml | 3 ++- api/tests/src/adapters/{opensearch => search}/__init__.py | 0 .../src/adapters/{opensearch => search}/test_opensearch.py | 0 3 files changed, 2 insertions(+), 1 deletion(-) rename api/tests/src/adapters/{opensearch => search}/__init__.py (100%) rename api/tests/src/adapters/{opensearch => search}/test_opensearch.py (100%) diff --git a/api/docker-compose.yml b/api/docker-compose.yml index 88668a89e..c1744c520 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -51,7 +51,6 @@ services: networks: - opensearch-net - grants-api: build: context: . @@ -69,6 +68,8 @@ services: depends_on: - grants-db - opensearch-node + extra_hosts: + - "host.docker.internal:host-gateway" volumes: grantsdbdata: diff --git a/api/tests/src/adapters/opensearch/__init__.py b/api/tests/src/adapters/search/__init__.py similarity index 100% rename from api/tests/src/adapters/opensearch/__init__.py rename to api/tests/src/adapters/search/__init__.py diff --git a/api/tests/src/adapters/opensearch/test_opensearch.py b/api/tests/src/adapters/search/test_opensearch.py similarity index 100% rename from api/tests/src/adapters/opensearch/test_opensearch.py rename to api/tests/src/adapters/search/test_opensearch.py From 8f80852601026a11ceb25d8981a57f88f9b2b176 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Fri, 17 May 2024 10:36:18 -0400 Subject: [PATCH 05/19] Simplify the networking/docker setup --- api/docker-compose.yml | 9 --------- api/local.env | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/api/docker-compose.yml b/api/docker-compose.yml index c1744c520..9ec206214 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -35,8 +35,6 @@ services: ports: - 9200:9200 # REST API - 9600:9600 # Performance Analyzer - networks: - - opensearch-net # All of the containers will join the same Docker bridge network opensearch-dashboards: image: opensearchproject/opensearch-dashboards:latest @@ -48,8 +46,6 @@ services: environment: - 'OPENSEARCH_HOSTS=["http://opensearch-node:9200"]' - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true # disables security dashboards plugin in OpenSearch Dashboards - networks: - - opensearch-net grants-api: build: @@ -68,12 +64,7 @@ services: depends_on: - grants-db - opensearch-node - extra_hosts: - - "host.docker.internal:host-gateway" volumes: grantsdbdata: opensearch-data: - -networks: - opensearch-net: diff --git a/api/local.env b/api/local.env index f0abae62b..4ca4c86b5 100644 --- a/api/local.env +++ b/api/local.env @@ -63,7 +63,7 @@ HIDE_SQL_PARAMETER_LOGS=TRUE # Opensearch Environment Variables ############################ -OPENSEARCH_HOST=host.docker.internal +OPENSEARCH_HOST=opensearch-node OPENSEARCH_PORT=9200 OPENSEARCH_USE_SSL=FALSE OPENSEARCH_VERIFY_CERTS=FALSE From f02f3d39f0d95b52f512464112174a52e063e3d4 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Tue, 21 May 2024 15:34:26 -0400 Subject: [PATCH 06/19] [Issue #10] Populate the search index from the opportunity tables --- api/src/adapters/search/__init__.py | 4 +- api/src/adapters/search/opensearch_client.py | 115 ++++++++++++++++-- api/src/app.py | 4 + api/src/search/__init__.py | 0 api/src/search/backend/__init__.py | 2 + .../backend/load_opportunities_to_index.py | 113 +++++++++++++++++ api/src/search/backend/load_search_data.py | 15 +++ .../backend/load_search_data_blueprint.py | 5 + api/tests/conftest.py | 24 ++-- .../src/adapters/search/test_opensearch.py | 58 --------- .../adapters/search/test_opensearch_client.py | 105 ++++++++++++++++ api/tests/src/search/__init__.py | 0 api/tests/src/search/backend/__init__.py | 0 .../test_load_opportunities_to_index.py | 87 +++++++++++++ 14 files changed, 452 insertions(+), 80 deletions(-) create mode 100644 api/src/search/__init__.py create mode 100644 api/src/search/backend/__init__.py create mode 100644 api/src/search/backend/load_opportunities_to_index.py create mode 100644 api/src/search/backend/load_search_data.py create mode 100644 api/src/search/backend/load_search_data_blueprint.py delete mode 100644 api/tests/src/adapters/search/test_opensearch.py create mode 100644 api/tests/src/adapters/search/test_opensearch_client.py create mode 100644 api/tests/src/search/__init__.py create mode 100644 api/tests/src/search/backend/__init__.py create mode 100644 api/tests/src/search/backend/test_load_opportunities_to_index.py diff --git a/api/src/adapters/search/__init__.py b/api/src/adapters/search/__init__.py index 166441e1d..6b2607a04 100644 --- a/api/src/adapters/search/__init__.py +++ b/api/src/adapters/search/__init__.py @@ -1,4 +1,4 @@ -from src.adapters.search.opensearch_client import SearchClient, get_opensearch_client +from src.adapters.search.opensearch_client import SearchClient from src.adapters.search.opensearch_config import get_opensearch_config -__all__ = ["SearchClient", "get_opensearch_client", "get_opensearch_config"] +__all__ = ["SearchClient", "get_opensearch_config"] diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index dadcfd7c4..b93d33917 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -1,25 +1,114 @@ -from typing import Any +import logging +from typing import Any, Sequence import opensearchpy from src.adapters.search.opensearch_config import OpensearchConfig, get_opensearch_config -# More configuration/setup coming in: -# TODO - https://github.com/navapbc/simpler-grants-gov/issues/13 +logger = logging.getLogger(__name__) -# Alias the OpenSearch client so that it doesn't need to be imported everywhere -# and to make it clear it's a client -SearchClient = opensearchpy.OpenSearch +class SearchClient: + def __init__(self, opensearch_config: OpensearchConfig | None = None) -> None: + if opensearch_config is None: + opensearch_config = get_opensearch_config() -def get_opensearch_client( - opensearch_config: OpensearchConfig | None = None, -) -> SearchClient: - if opensearch_config is None: - opensearch_config = get_opensearch_config() + # See: https://opensearch.org/docs/latest/clients/python-low-level/ for more details + self._client = opensearchpy.OpenSearch(**_get_connection_parameters(opensearch_config)) - # See: https://opensearch.org/docs/latest/clients/python-low-level/ for more details - return opensearchpy.OpenSearch(**_get_connection_parameters(opensearch_config)) + def create_index( + self, index_name: str, *, shard_count: int = 1, replica_count: int = 1 + ) -> None: + """ + Create an empty search index + """ + body = { + "settings": { + "index": {"number_of_shards": shard_count, "number_of_replicas": replica_count} + } + } + + logger.info("Creating search index %s", index_name, extra={"index_name": index_name}) + self._client.indices.create(index_name, body=body) + + def delete_index(self, index_name: str) -> None: + """ + Delete an index. Can also delete all indexes via a prefix. + """ + logger.info("Deleting search index %s", index_name, extra={"index_name": index_name}) + self._client.indices.delete(index=index_name) + + def bulk_upsert( + self, + index_name: str, + records: Sequence[dict[str, Any]], + primary_key_field: str, + *, + refresh: bool = True + ) -> None: + """ + Bulk upsert records to an index + + See: https://opensearch.org/docs/latest/api-reference/document-apis/bulk/ for details + In this method we only use the "index" operation which creates or updates a record + based on the id value. + """ + + bulk_operations = [] + + for record in records: + # For each record, we create two entries in the bulk operation list + # which include the unique ID + the actual record on separate lines + # When this is sent to the search index, this will send two lines like: + # + # {"index": {"_id": 123}} + # {"opportunity_id": 123, "opportunity_title": "example title", ...} + bulk_operations.append({"index": {"_id": record[primary_key_field]}}) + bulk_operations.append(record) + + logger.info( + "Upserting records to %s", + index_name, + extra={"index_name": index_name, "record_count": int(len(bulk_operations) / 2)}, + ) + self._client.bulk(index=index_name, body=bulk_operations, refresh=refresh) + + def swap_alias_index( + self, index_name: str, alias_name: str, *, delete_prior_indexes: bool = False + ) -> None: + """ + For a given index, set it to the given alias. If any existing index(es) are + attached to the alias, remove them from the alias. + + This operation is done atomically. + """ + extra = {"index_name": index_name, "index_alias": alias_name} + logger.info("Swapping index that backs alias %s", alias_name, extra=extra) + + existing_index_mapping = self._client.cat.aliases(alias_name, format="json") + existing_indexes = [i["index"] for i in existing_index_mapping] + + logger.info( + "Found existing indexes", extra=extra | {"existing_indexes": ",".join(existing_indexes)} + ) + + actions = [{"add": {"index": index_name, "alias": alias_name}}] + + for index in existing_indexes: + actions.append({"remove": {"index": index, "alias": alias_name}}) + + self._client.indices.update_aliases({"actions": actions}) + + # Cleanup old indexes now that they aren't connected to the alias + if delete_prior_indexes: + for index in existing_indexes: + self.delete_index(index) + + def search(self, index_name: str, search_query: dict) -> dict: + # TODO - add more when we build out the request/response parsing logic + # we use something like Pydantic to help reorganize the response + # object into something easier to parse. + return self._client.search(index=index_name, body=search_query) def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: diff --git a/api/src/app.py b/api/src/app.py index 8e617cce8..6864783b0 100644 --- a/api/src/app.py +++ b/api/src/app.py @@ -17,6 +17,7 @@ from src.api.schemas import response_schema from src.auth.api_key_auth import get_app_security_scheme from src.data_migration.data_migration_blueprint import data_migration_blueprint +from src.search.backend.load_search_data_blueprint import load_search_data_blueprint from src.task import task_blueprint logger = logging.getLogger(__name__) @@ -101,8 +102,11 @@ def register_blueprints(app: APIFlask) -> None: app.register_blueprint(healthcheck_blueprint) app.register_blueprint(opportunities_v0_blueprint) app.register_blueprint(opportunities_v0_1_blueprint) + + # Non-api blueprints app.register_blueprint(data_migration_blueprint) app.register_blueprint(task_blueprint) + app.register_blueprint(load_search_data_blueprint) def get_project_root_dir() -> str: diff --git a/api/src/search/__init__.py b/api/src/search/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/src/search/backend/__init__.py b/api/src/search/backend/__init__.py new file mode 100644 index 000000000..00a43e108 --- /dev/null +++ b/api/src/search/backend/__init__.py @@ -0,0 +1,2 @@ +# import all files so they get initialized and attached to the blueprint +from . import load_search_data # noqa: F401 diff --git a/api/src/search/backend/load_opportunities_to_index.py b/api/src/search/backend/load_opportunities_to_index.py new file mode 100644 index 000000000..bf3c97c06 --- /dev/null +++ b/api/src/search/backend/load_opportunities_to_index.py @@ -0,0 +1,113 @@ +import logging +from enum import StrEnum +from typing import Iterator, Sequence + +from pydantic import Field +from pydantic_settings import SettingsConfigDict +from sqlalchemy import select +from sqlalchemy.orm import noload, selectinload + +import src.adapters.db as db +import src.adapters.search as search +from src.api.opportunities_v0_1.opportunity_schemas import OpportunitySchema +from src.db.models.opportunity_models import CurrentOpportunitySummary, Opportunity +from src.task.task import Task +from src.util.datetime_util import get_now_us_eastern_datetime +from src.util.env_config import PydanticBaseEnvConfig + +logger = logging.getLogger(__name__) + + +class LoadOpportunitiesToIndexConfig(PydanticBaseEnvConfig): + model_config = SettingsConfigDict(env_prefix="LOAD_OPP_SEARCH_") + + shard_count: int = Field(default=1) # LOAD_OPP_SEARCH_SHARD_COUNT + replica_count: int = Field(default=1) # LOAD_OPP_SEARCH_REPLICA_COUNT + + # TODO - these might make sense to come from some sort of opportunity-search-index-config? + # look into this a bit more when we setup the search endpoint itself. + alias_name: str = Field(default="opportunity-index-alias") # LOAD_OPP_SEARCH_ALIAS_NAME + index_prefix: str = Field(default="opportunity-index") # LOAD_OPP_INDEX_PREFIX + + +class LoadOpportunitiesToIndex(Task): + class Metrics(StrEnum): + RECORDS_LOADED = "records_loaded" + + def __init__( + self, + db_session: db.Session, + search_client: search.SearchClient, + config: LoadOpportunitiesToIndexConfig | None = None, + ) -> None: + super().__init__(db_session) + + self.search_client = search_client + + if config is None: + config = LoadOpportunitiesToIndexConfig() + self.config = config + + current_timestamp = get_now_us_eastern_datetime().strftime("%Y-%m-%d_%H-%M-%S") + self.index_name = f"{self.config.index_prefix}-{current_timestamp}" + self.set_metrics({"index_name": self.index_name}) + + def run_task(self) -> None: + # create the index + self.search_client.create_index( + self.index_name, + shard_count=self.config.shard_count, + replica_count=self.config.replica_count, + ) + + # load the records + for opp_batch in self.fetch_opportunities(): + self.load_records(opp_batch) + + # handle aliasing of endpoints + self.search_client.swap_alias_index( + self.index_name, self.config.alias_name, delete_prior_indexes=True + ) + + def fetch_opportunities(self) -> Iterator[Sequence[Opportunity]]: + """ + Fetch the opportunities in batches. The iterator returned + will give you each individual batch to be processed. + + Fetches all opportunities where: + * is_draft = False + * current_opportunity_summary is not None + """ + return ( + self.db_session.execute( + select(Opportunity) + .join(CurrentOpportunitySummary) + # TODO - the join might be enough for the query? + .where( + Opportunity.is_draft.is_(False), + CurrentOpportunitySummary.opportunity_status.isnot(None), + ) + .options(selectinload("*"), noload(Opportunity.all_opportunity_summaries)) + .execution_options(yield_per=5000) + ) + .scalars() + .partitions() + ) + + def load_records(self, records: Sequence[Opportunity]) -> None: + logger.info("Loading batch of opportunities...") + schema = OpportunitySchema() # TODO - switch to the v1 version when that is merged + json_records = [] + + for record in records: + logger.info( + "Preparing opportunity for upload to search index", + extra={ + "opportunity_id": record.opportunity_id, + "opportunity_status": record.opportunity_status, + }, + ) + json_records.append(schema.dump(record)) + self.increment(self.Metrics.RECORDS_LOADED) + + self.search_client.bulk_upsert(self.index_name, json_records, "opportunity_id") diff --git a/api/src/search/backend/load_search_data.py b/api/src/search/backend/load_search_data.py new file mode 100644 index 000000000..cf6f0445f --- /dev/null +++ b/api/src/search/backend/load_search_data.py @@ -0,0 +1,15 @@ +import src.adapters.db as db +import src.adapters.search as search +from src.adapters.db import flask_db +from src.search.backend.load_opportunities_to_index import LoadOpportunitiesToIndex +from src.search.backend.load_search_data_blueprint import load_search_data_blueprint + + +@load_search_data_blueprint.cli.command( + "load-opportunity-data", help="Load opportunity data from our database to the search index" +) +@flask_db.with_db_session() +def load_opportunity_data(db_session: db.Session) -> None: + search_client = search.SearchClient() + + LoadOpportunitiesToIndex(db_session, search_client).run() diff --git a/api/src/search/backend/load_search_data_blueprint.py b/api/src/search/backend/load_search_data_blueprint.py new file mode 100644 index 000000000..fffd9f915 --- /dev/null +++ b/api/src/search/backend/load_search_data_blueprint.py @@ -0,0 +1,5 @@ +from apiflask import APIBlueprint + +load_search_data_blueprint = APIBlueprint( + "load-search-data", __name__, enable_openapi=False, cli_group="load-search-data" +) diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 97173e9a7..4b45c4f2c 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -151,25 +151,35 @@ def test_foreign_schema(db_schema_prefix): @pytest.fixture(scope="session") def search_client() -> search.SearchClient: - return search.get_opensearch_client() + client = search.SearchClient() + try: + yield client + finally: + # Just in case a test setup an index + # in a way that didn't clean it up, delete + # all indexes at the end of a run that start with test + client.delete_index("test-*") @pytest.fixture(scope="session") def opportunity_index(search_client): - # TODO - will adjust this in the future to use utils we'll build - # for setting up / aliasing indexes. For now, keep it simple - # create a random index name just to make sure it won't ever conflict # with an actual one, similar to how we create schemas for database tests - index_name = f"test_{uuid.uuid4().int}_opportunity" + index_name = f"test-opportunity-index-{uuid.uuid4().int}" - search_client.indices.create(index_name, body={}) + search_client.create_index(index_name) try: yield index_name finally: # Try to clean up the index at the end - search_client.indices.delete(index_name) + search_client.delete_index(index_name) + + +@pytest.fixture(scope="session") +def opportunity_index_alias(search_client): + # Note we don't actually create anything, this is just a random name + return f"test-opportunity-index-alias-{uuid.uuid4().int}" #################### diff --git a/api/tests/src/adapters/search/test_opensearch.py b/api/tests/src/adapters/search/test_opensearch.py deleted file mode 100644 index 490ffcb3b..000000000 --- a/api/tests/src/adapters/search/test_opensearch.py +++ /dev/null @@ -1,58 +0,0 @@ -######################################## -# This is a placeholder set of tests, -# we'll evolve / change the structure -# as we continue developing this -# -# Just wanted something simple so I can verify -# the early steps of this setup are working -# before we actually have code to use -######################################## - - -def test_index_is_running(search_client, opportunity_index): - # Very simple test, will rewrite / remove later once we have something - # more meaningful to test. - - existing_indexes = search_client.cat.indices(format="json") - - found_opportunity_index = False - for index in existing_indexes: - if index["index"] == opportunity_index: - found_opportunity_index = True - break - - assert found_opportunity_index is True - - # Add a few records to the index - - record1 = { - "opportunity_id": 1, - "opportunity_title": "Research into how to make a search engine", - "opportunity_status": "posted", - } - record2 = { - "opportunity_id": 2, - "opportunity_title": "Research about words, and more words!", - "opportunity_status": "forecasted", - } - - search_client.index(index=opportunity_index, body=record1, id=1, refresh=True) - search_client.index(index=opportunity_index, body=record2, id=2, refresh=True) - - search_request = { - "query": { - "bool": { - "must": { - "simple_query_string": {"query": "research", "fields": ["opportunity_title"]} - } - } - } - } - response = search_client.search(index=opportunity_index, body=search_request) - assert response["hits"]["total"]["value"] == 2 - - filter_request = { - "query": {"bool": {"filter": [{"terms": {"opportunity_status": ["forecasted"]}}]}} - } - response = search_client.search(index=opportunity_index, body=filter_request) - assert response["hits"]["total"]["value"] == 1 diff --git a/api/tests/src/adapters/search/test_opensearch_client.py b/api/tests/src/adapters/search/test_opensearch_client.py new file mode 100644 index 000000000..d9ba22194 --- /dev/null +++ b/api/tests/src/adapters/search/test_opensearch_client.py @@ -0,0 +1,105 @@ +import uuid + +import pytest + +######################################################################## +# These tests are primarily looking to validate +# that our wrappers around the OpenSearch client +# are being used correctly / account for error cases correctly. +# +# We are not validating all the intricacies of OpenSearch itself. +######################################################################## + + +@pytest.fixture +def generic_index(search_client): + # This is very similar to the opportunity_index fixture, but + # is reused per unit test rather than a global value + index_name = f"test-index-{uuid.uuid4().int}" + + search_client.create_index(index_name) + + try: + yield index_name + finally: + # Try to clean up the index at the end + search_client.delete_index(index_name) + + +def test_create_and_delete_index_duplicate(search_client): + index_name = f"test-index-{uuid.uuid4().int}" + + search_client.create_index(index_name) + with pytest.raises(Exception, match="already exists"): + search_client.create_index(index_name) + + # Cleanup the index + search_client.delete_index(index_name) + with pytest.raises(Exception, match="no such index"): + search_client.delete_index(index_name) + + +def test_bulk_upsert(search_client, generic_index): + records = [ + {"id": 1, "title": "Green Eggs & Ham", "notes": "why are the eggs green?"}, + {"id": 2, "title": "The Cat in the Hat", "notes": "silly cat wears a hat"}, + {"id": 3, "title": "One Fish, Two Fish, Red Fish, Blue Fish", "notes": "fish"}, + ] + + search_client.bulk_upsert(generic_index, records, primary_key_field="id") + + # Verify the records are in the index + for record in records: + assert search_client._client.get(generic_index, record["id"])["_source"] == record + + # Can update + add more + records = [ + {"id": 1, "title": "Green Eggs & Ham", "notes": "Sam, eat the eggs"}, + {"id": 2, "title": "The Cat in the Hat", "notes": "watch the movie"}, + {"id": 3, "title": "One Fish, Two Fish, Red Fish, Blue Fish", "notes": "colors & numbers"}, + {"id": 4, "title": "How the Grinch Stole Christmas", "notes": "who"}, + ] + search_client.bulk_upsert(generic_index, records, primary_key_field="id") + + for record in records: + assert search_client._client.get(generic_index, record["id"])["_source"] == record + + +def test_swap_alias_index(search_client, generic_index): + alias_name = f"tmp-alias-{uuid.uuid4().int}" + + # Populate the generic index, we won't immediately use this one + records = [ + {"id": 1, "data": "abc123"}, + {"id": 2, "data": "def456"}, + {"id": 3, "data": "xyz789"}, + ] + search_client.bulk_upsert(generic_index, records, primary_key_field="id") + + # Create a different index that we'll attach to the alias first. + tmp_index = f"test-tmp-index-{uuid.uuid4().int}" + search_client.create_index(tmp_index) + # Add a few records + tmp_index_records = [ + {"id": 1, "data": "abc123"}, + {"id": 2, "data": "xyz789"}, + ] + search_client.bulk_upsert(tmp_index, tmp_index_records, primary_key_field="id") + + # Set the alias + search_client.swap_alias_index(tmp_index, alias_name, delete_prior_indexes=True) + + # Can search by this alias and get records from the tmp index + resp = search_client.search(alias_name, {}) + resp_records = [record["_source"] for record in resp["hits"]["hits"]] + assert resp_records == tmp_index_records + + # Swap the index to the generic one + delete the tmp one + search_client.swap_alias_index(generic_index, alias_name, delete_prior_indexes=True) + + resp = search_client.search(alias_name, {}) + resp_records = [record["_source"] for record in resp["hits"]["hits"]] + assert resp_records == records + + # Verify the tmp one was deleted + assert search_client._client.indices.exists(tmp_index) is False diff --git a/api/tests/src/search/__init__.py b/api/tests/src/search/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/tests/src/search/backend/__init__.py b/api/tests/src/search/backend/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/tests/src/search/backend/test_load_opportunities_to_index.py b/api/tests/src/search/backend/test_load_opportunities_to_index.py new file mode 100644 index 000000000..10b3fc075 --- /dev/null +++ b/api/tests/src/search/backend/test_load_opportunities_to_index.py @@ -0,0 +1,87 @@ +import pytest + +from src.search.backend.load_opportunities_to_index import ( + LoadOpportunitiesToIndex, + LoadOpportunitiesToIndexConfig, +) +from tests.conftest import BaseTestClass +from tests.src.db.models.factories import OpportunityFactory + + +class TestLoadOpportunitiesToIndex(BaseTestClass): + @pytest.fixture(scope="class") + def load_opportunities_to_index(self, db_session, search_client, opportunity_index_alias): + config = LoadOpportunitiesToIndexConfig( + alias_name=opportunity_index_alias, index_prefix="test-load-opps" + ) + return LoadOpportunitiesToIndex(db_session, search_client, config) + + def test_load_opportunities_to_index( + self, + truncate_opportunities, + enable_factory_create, + search_client, + opportunity_index_alias, + load_opportunities_to_index, + ): + # Create 25 opportunities we will load into the search index + opportunities = [] + opportunities.extend(OpportunityFactory.create_batch(size=6, is_posted_summary=True)) + opportunities.extend(OpportunityFactory.create_batch(size=3, is_forecasted_summary=True)) + opportunities.extend(OpportunityFactory.create_batch(size=2, is_closed_summary=True)) + opportunities.extend(OpportunityFactory.create_batch(size=8, is_archived_non_forecast_summary=True)) + opportunities.extend(OpportunityFactory.create_batch(size=6, is_archived_forecast_summary=True)) + + # Create some opportunities that won't get fetched / loaded into search + OpportunityFactory.create_batch(size=3, is_draft=True) + OpportunityFactory.create_batch(size=4, no_current_summary=True) + + load_opportunities_to_index.run() + # Verify some metrics first + assert ( + len(opportunities) + == load_opportunities_to_index.metrics[ + load_opportunities_to_index.Metrics.RECORDS_LOADED + ] + ) + + # Just do some rough validation that the data is present + resp = search_client.search(opportunity_index_alias, {"size": 100}) + + total_records = resp["hits"]["total"]["value"] + assert total_records == len(opportunities) + + records = [record["_source"] for record in resp["hits"]["hits"]] + assert set([opp.opportunity_id for opp in opportunities]) == set( + [record["opportunity_id"] for record in records] + ) + + # Rerunning without changing anything about the data in the DB doesn't meaningfully change anything + load_opportunities_to_index.index_name = load_opportunities_to_index.index_name + "-another" + load_opportunities_to_index.run() + resp = search_client.search(opportunity_index_alias, {"size": 100}) + + total_records = resp["hits"]["total"]["value"] + assert total_records == len(opportunities) + + records = [record["_source"] for record in resp["hits"]["hits"]] + assert set([opp.opportunity_id for opp in opportunities]) == set( + [record["opportunity_id"] for record in records] + ) + + # Rerunning but first add a few more opportunities to show up + opportunities.extend(OpportunityFactory.create_batch(size=3)) + load_opportunities_to_index.index_name = ( + load_opportunities_to_index.index_name + "-new-data" + ) + load_opportunities_to_index.run() + + resp = search_client.search(opportunity_index_alias, {"size": 100}) + + total_records = resp["hits"]["total"]["value"] + assert total_records == len(opportunities) + + records = [record["_source"] for record in resp["hits"]["hits"]] + assert set([opp.opportunity_id for opp in opportunities]) == set( + [record["opportunity_id"] for record in records] + ) From 49c2a2b8680307949788ba8794b2fa97d53d5a53 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Tue, 21 May 2024 15:52:19 -0400 Subject: [PATCH 07/19] Slightly tidying up --- api/src/search/backend/load_opportunities_to_index.py | 1 - .../search/backend/test_load_opportunities_to_index.py | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/api/src/search/backend/load_opportunities_to_index.py b/api/src/search/backend/load_opportunities_to_index.py index bf3c97c06..455e6ddfb 100644 --- a/api/src/search/backend/load_opportunities_to_index.py +++ b/api/src/search/backend/load_opportunities_to_index.py @@ -82,7 +82,6 @@ def fetch_opportunities(self) -> Iterator[Sequence[Opportunity]]: self.db_session.execute( select(Opportunity) .join(CurrentOpportunitySummary) - # TODO - the join might be enough for the query? .where( Opportunity.is_draft.is_(False), CurrentOpportunitySummary.opportunity_status.isnot(None), diff --git a/api/tests/src/search/backend/test_load_opportunities_to_index.py b/api/tests/src/search/backend/test_load_opportunities_to_index.py index 10b3fc075..a079b83c8 100644 --- a/api/tests/src/search/backend/test_load_opportunities_to_index.py +++ b/api/tests/src/search/backend/test_load_opportunities_to_index.py @@ -29,8 +29,12 @@ def test_load_opportunities_to_index( opportunities.extend(OpportunityFactory.create_batch(size=6, is_posted_summary=True)) opportunities.extend(OpportunityFactory.create_batch(size=3, is_forecasted_summary=True)) opportunities.extend(OpportunityFactory.create_batch(size=2, is_closed_summary=True)) - opportunities.extend(OpportunityFactory.create_batch(size=8, is_archived_non_forecast_summary=True)) - opportunities.extend(OpportunityFactory.create_batch(size=6, is_archived_forecast_summary=True)) + opportunities.extend( + OpportunityFactory.create_batch(size=8, is_archived_non_forecast_summary=True) + ) + opportunities.extend( + OpportunityFactory.create_batch(size=6, is_archived_forecast_summary=True) + ) # Create some opportunities that won't get fetched / loaded into search OpportunityFactory.create_batch(size=3, is_draft=True) From 25edfab3d6012327ef42991b94a54f098e333049 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Wed, 22 May 2024 13:49:59 -0400 Subject: [PATCH 08/19] [Issue #14] Setup utils for creating requests and parsing responses from search --- api/src/adapters/search/opensearch_client.py | 6 +- .../search/opensearch_query_builder.py | 104 +++++++++++++++ .../adapters/search/opensearch_response.py | 125 ++++++++++++++++++ .../adapters/search/test_opensearch_client.py | 10 +- 4 files changed, 238 insertions(+), 7 deletions(-) create mode 100644 api/src/adapters/search/opensearch_query_builder.py create mode 100644 api/src/adapters/search/opensearch_response.py diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index b93d33917..465afdc25 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -4,6 +4,7 @@ import opensearchpy from src.adapters.search.opensearch_config import OpensearchConfig, get_opensearch_config +from src.adapters.search.opensearch_response import SearchResponse logger = logging.getLogger(__name__) @@ -104,12 +105,15 @@ def swap_alias_index( for index in existing_indexes: self.delete_index(index) - def search(self, index_name: str, search_query: dict) -> dict: + def search_raw(self, index_name: str, search_query: dict) -> dict: # TODO - add more when we build out the request/response parsing logic # we use something like Pydantic to help reorganize the response # object into something easier to parse. return self._client.search(index=index_name, body=search_query) + def search(self, index_name: str, search_query: dict, include_scores: bool = True) -> SearchResponse: + response = self._client.search(index=index_name, body=search_query) + return SearchResponse.from_opensearch_response(response, include_scores) def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: # TODO - we'll want to add the AWS connection params here when we set that up diff --git a/api/src/adapters/search/opensearch_query_builder.py b/api/src/adapters/search/opensearch_query_builder.py new file mode 100644 index 000000000..1133c951a --- /dev/null +++ b/api/src/adapters/search/opensearch_query_builder.py @@ -0,0 +1,104 @@ +import typing + +SORT_DIRECTION = typing.Literal["asc", "desc"] # TODO - use an enum? + +class SearchQueryBuilder: + + def __init__(self) -> None: + self.page_size = 25 + self.page_number = 1 + + self.sort_by = "relevancy_score" + self.sort_direction: SORT_DIRECTION = "asc" + + self.must = [] + + self.filters = [] + self.aggregations = [] + + def pagination(self, page_size: int, page_number: int) -> typing.Self: + """ + Set the pagination for the search request. + + Note that page number should be the human-readable page number + and start counting from 1. + """ + self.page_size = page_size + self.page_number = page_number + return self + + def sorting(self, sort_by: str, sort_direction: SORT_DIRECTION) -> typing.Self: + self.sort_by = sort_by + self.sort_direction = sort_direction + return self + + def simple_query(self, query: str, fields: list[str]) -> typing.Self: + """ + Adds a simple_query_string which queries against the provided fields. + + The fields must include the full path to the object, and can include optional suffixes + to adjust the weighting. For example "opportunity_title^4" would increase any scores + derived from that field by 4x. + + See: https://opensearch.org/docs/latest/query-dsl/full-text/simple-query-string/ + """ + self.must.append( + { + "simple_query_string": { + "query": query, + "default_operator": "AND", + "fields": fields + } + } + ) + + return self + + def filter_terms(self, field: str, terms: list[str | int]) -> typing.Self: + self.filters.append({"terms": {field: terms}}) + return self + + def aggregation_terms(self, aggregation_name: str, field_name: str, size: int = 25) -> typing.Self: + self.aggregations.append({ + aggregation_name: { + "terms": { + "field": field_name, + "size": size + } + } + }) + return self + + def build(self) -> dict: + page_offset = self.page_size * (self.page_number - 1) + + + request = { + "size": self.page_size, + "from": page_offset, + # Always include the scores in the response objects + # even if we're sorting by non-relevancy + "track_scores": True + } + + if self.sort_by != "relevancy_score": + request["sort"] = [ + { + self.sort_by: { + "order": self.sort_direction + } + } + ] + + bool_query = {} + + if len(self.must) > 0: + bool_query["must"] = self.must + + if len(self.filters) > 0: + bool_query["filter"] = self.filters + + if len(bool_query) > 0: + request["bool"] = bool_query + + return request \ No newline at end of file diff --git a/api/src/adapters/search/opensearch_response.py b/api/src/adapters/search/opensearch_response.py new file mode 100644 index 000000000..a54659023 --- /dev/null +++ b/api/src/adapters/search/opensearch_response.py @@ -0,0 +1,125 @@ +import typing +import dataclasses + +@dataclasses.dataclass +class SearchResponse: + + total_records: int + + records: list[dict[str, typing.Any]] + + aggregations: dict[str, dict[str, int]] + + + + @classmethod + def from_opensearch_response(cls, raw_json: dict[str, typing.Any], include_scores: bool = True) -> typing.Self: + """ + Convert a raw search response into something a bit more manageable + by un-nesting and restructuring a few of they key fields. + """ + + """ + The hits object looks like: + { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 22.180708, + "hits": [ + { + "_index": "opportunity-index-2024-05-21_15-49-24", + "_id": "4", + "_score": 22.180708, + "_source": { + "opportunity_id": 4, + "opportunity_number": "ABC123-XYZ", + } + } + ] + } + """ + hits = raw_json.get("hits", {}) + hits_total = hits.get("total", {}) + total_records = hits_total.get("value", 0) + + + raw_records: list[dict[str, typing.Any]] = hits.get("hits", []) + + records = [] + for raw_record in raw_records: + record = raw_record.get("_source") + + if include_scores: + score: int | None = raw_record.get("_score", None) + record["relevancy_score"] = score + + records.append(record) + + + + raw_aggs: dict[str, dict[str, typing.Any]] = raw_json.get("aggregations", {}) + aggregations = _parse_aggregations(raw_aggs) + + + + return cls(total_records, records, aggregations) + + +def _parse_aggregations(raw_aggs: dict[str, dict[str, typing.Any]] | None) -> dict[str, dict[str, int]]: + # Note that this is assuming the response from a terms aggregation + # https://opensearch.org/docs/latest/aggregations/bucket/terms/ + + if raw_aggs is None: + return {} + + """ + Terms aggregations look like: + + "aggregations": { + "applicant_types": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "for_profit_organizations_other_than_small_businesses", + "doc_count": 1 + }, + { + "key": "other", + "doc_count": 1 + }, + { + "key": "state_governments", + "doc_count": 1 + } + ] + }, + "agencies": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "USAID", + "doc_count": 3 + } + ] + } + } + """ + + aggregations: dict[str, dict[str, int]] = {} + for field, raw_agg_value in raw_aggs.items(): + buckets: list[dict[str, typing.Any]] = raw_agg_value.get("buckets", []) + + field_aggregation: dict[str, int] = {} + for bucket in buckets: + key = bucket.get("key") + count = bucket.get("doc_count") + + field_aggregation[key] = count + + aggregations[field] = field_aggregation + + return aggregations \ No newline at end of file diff --git a/api/tests/src/adapters/search/test_opensearch_client.py b/api/tests/src/adapters/search/test_opensearch_client.py index d9ba22194..916c6effd 100644 --- a/api/tests/src/adapters/search/test_opensearch_client.py +++ b/api/tests/src/adapters/search/test_opensearch_client.py @@ -90,16 +90,14 @@ def test_swap_alias_index(search_client, generic_index): search_client.swap_alias_index(tmp_index, alias_name, delete_prior_indexes=True) # Can search by this alias and get records from the tmp index - resp = search_client.search(alias_name, {}) - resp_records = [record["_source"] for record in resp["hits"]["hits"]] - assert resp_records == tmp_index_records + resp = search_client.search(alias_name, {}, include_scores=False) + assert resp.records == tmp_index_records # Swap the index to the generic one + delete the tmp one search_client.swap_alias_index(generic_index, alias_name, delete_prior_indexes=True) - resp = search_client.search(alias_name, {}) - resp_records = [record["_source"] for record in resp["hits"]["hits"]] - assert resp_records == records + resp = search_client.search(alias_name, {}, include_scores=False) + assert resp.records == records # Verify the tmp one was deleted assert search_client._client.indices.exists(tmp_index) is False From 327f2423faa3341db42c70d91a4e0914bfb52c96 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Tue, 28 May 2024 11:30:09 -0400 Subject: [PATCH 09/19] A lot of tests / comments / cleanup --- api/src/adapters/search/opensearch_client.py | 42 +- .../search/opensearch_query_builder.py | 96 +++- .../adapters/search/opensearch_response.py | 7 +- api/src/pagination/pagination_models.py | 5 + .../search/test_opensearch_query_builder.py | 518 ++++++++++++++++++ 5 files changed, 643 insertions(+), 25 deletions(-) create mode 100644 api/tests/src/adapters/search/test_opensearch_query_builder.py diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index 74e094b50..8f5aeb1b4 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -8,6 +8,30 @@ logger = logging.getLogger(__name__) +# By default, we'll override the default analyzer+tokenization +# for a search index. You can provide your own when calling create_index +DEFAULT_INDEX_ANALYSIS = { + "analyzer": { + "default": { + "type": "custom", + "filter": ["lowercase", "custom_stemmer"], + # Change tokenization to whitespace as the default is very clunky + # with a lot of our IDs that have dashes in them. + # see: https://opensearch.org/docs/latest/analyzers/tokenizers/index/ + "tokenizer": "whitespace", + } + }, + # Change the default stemming to use snowball which handles plural + # queries better than the default + # TODO - there are a lot of stemmers, we should take some time to figure out + # which one works best with our particular dataset. Snowball is really + # basic and naive (literally just adjusting suffixes on words in common patterns) + # which might be fine generally, but we work with a lot of acronyms + # and should verify that doesn't cause any issues. + # see: https://opensearch.org/docs/latest/analyzers/token-filters/index/ + "filter": {"custom_stemmer": {"type": "snowball", "name": "english"}}, +} + class SearchClient: def __init__(self, opensearch_config: OpensearchConfig | None = None) -> None: @@ -18,15 +42,27 @@ def __init__(self, opensearch_config: OpensearchConfig | None = None) -> None: self._client = opensearchpy.OpenSearch(**_get_connection_parameters(opensearch_config)) def create_index( - self, index_name: str, *, shard_count: int = 1, replica_count: int = 1 + self, + index_name: str, + *, + shard_count: int = 1, + replica_count: int = 1, + analysis: dict | None = None ) -> None: """ Create an empty search index """ + + # Allow the user to adjust how the index analyzer + tokenization works + # but also have a general default. + if analysis is None: + analysis = DEFAULT_INDEX_ANALYSIS + body = { "settings": { - "index": {"number_of_shards": shard_count, "number_of_replicas": replica_count} - } + "index": {"number_of_shards": shard_count, "number_of_replicas": replica_count}, + "analysis": analysis, + }, } logger.info("Creating search index %s", index_name, extra={"index_name": index_name}) diff --git a/api/src/adapters/search/opensearch_query_builder.py b/api/src/adapters/search/opensearch_query_builder.py index 36bb630c8..778cdbdc0 100644 --- a/api/src/adapters/search/opensearch_query_builder.py +++ b/api/src/adapters/search/opensearch_query_builder.py @@ -1,6 +1,6 @@ import typing -SORT_DIRECTION = typing.Literal["asc", "desc"] # TODO - use an enum? +from src.pagination.pagination_models import SortDirection class SearchQueryBuilder: @@ -8,13 +8,12 @@ def __init__(self) -> None: self.page_size = 25 self.page_number = 1 - self.sort_by = "relevancy_score" - self.sort_direction: SORT_DIRECTION = "asc" + self.sort_values: list[dict[str, dict[str, str]]] = [] - self.must = [] + self.must: list[dict] = [] + self.filters: list[dict] = [] - self.filters = [] - self.aggregations = [] + self.aggregations: dict[str, dict] = {} def pagination(self, page_size: int, page_number: int) -> typing.Self: """ @@ -27,9 +26,27 @@ def pagination(self, page_size: int, page_number: int) -> typing.Self: self.page_number = page_number return self - def sorting(self, sort_by: str, sort_direction: SORT_DIRECTION) -> typing.Self: - self.sort_by = sort_by - self.sort_direction = sort_direction + def sort_by(self, sort_values: list[typing.Tuple[str, SortDirection]]) -> typing.Self: + """ + List of tuples of field name + sort direction to sort by. If you wish to sort by the relevancy + score provide a field name of "relevancy". + + The order of the tuples matters, and the earlier values will take precedence - or put another way + the first tuple is the "primary sort", the second is the "secondary sort", and so on. If + all of the primary sort values are unique, then the secondary sorts won't be relevant. + + If this method is not called, no sort info will be added to the request, and OpenSearch + will internally default to sorting by relevancy score. If there is no scores calculated, + then the order is likely the IDs of the documents in the index. + + Note that multiple calls to this method will erase any info provided in a prior call. + """ + for field, sort_direction in sort_values: + if field == "relevancy": + field = "_score" + + self.sort_values.append({field: {"order": sort_direction.short_form()}}) + return self def simple_query(self, query: str, fields: list[str]) -> typing.Self: @@ -43,25 +60,47 @@ def simple_query(self, query: str, fields: list[str]) -> typing.Self: See: https://opensearch.org/docs/latest/query-dsl/full-text/simple-query-string/ """ self.must.append( - {"simple_query_string": {"query": query, "default_operator": "AND", "fields": fields}} + {"simple_query_string": {"query": query, "fields": fields, "default_operator": "AND"}} ) return self - def filter_terms(self, field: str, terms: list[str | int]) -> typing.Self: + def filter_terms(self, field: str, terms: list) -> typing.Self: + """ + For a given field, filter to a set of values. + + These filters do not affect the relevancy score, they are purely + a binary filter on the overall results. + """ self.filters.append({"terms": {field: terms}}) return self def aggregation_terms( - self, aggregation_name: str, field_name: str, size: int = 25 + self, aggregation_name: str, field_name: str, size: int = 25, minimum_count: int = 1 ) -> typing.Self: - self.aggregations.append({aggregation_name: {"terms": {"field": field_name, "size": size}}}) + """ + Add a term aggregation to the request. Aggregations are the counts of particular fields in the + full response and are often displayed next to filters in a search UI. + + Size determines how many different values can be returned. + Minimum count determines how many occurrences need to occur to include in the response. + If you pass in 0 for this, then values that don't occur at all in the full result set will be returned. + + see: https://opensearch.org/docs/latest/aggregations/bucket/terms/ + """ + self.aggregations[aggregation_name] = { + "terms": {"field": field_name, "size": size, "min_doc_count": minimum_count} + } return self def build(self) -> dict: - page_offset = self.page_size * (self.page_number - 1) + """ + Build the search request + """ - request = { + # Base request + page_offset = self.page_size * (self.page_number - 1) + request: dict[str, typing.Any] = { "size": self.page_size, "from": page_offset, # Always include the scores in the response objects @@ -69,18 +108,35 @@ def build(self) -> dict: "track_scores": True, } - if self.sort_by != "relevancy_score": - request["sort"] = [{self.sort_by: {"order": self.sort_direction}}] - + # Add sorting if any was provided + if len(self.sort_values) > 0: + request["sort"] = self.sort_values + + # Add a bool query + # + # The "must" block contains anything relevant to scoring + # The "filter" block contains filters that don't affect scoring and act + # as just binary filters + # + # See: https://opensearch.org/docs/latest/query-dsl/compound/bool/ bool_query = {} - if len(self.must) > 0: bool_query["must"] = self.must if len(self.filters) > 0: bool_query["filter"] = self.filters + # Add the query object which wraps the bool query + query_obj = {} if len(bool_query) > 0: - request["bool"] = bool_query + query_obj["bool"] = bool_query + + if len(query_obj) > 0: + request["query"] = query_obj + + # Add any aggregations + # see: https://opensearch.org/docs/latest/aggregations/ + if len(self.aggregations) > 0: + request["aggs"] = self.aggregations return request diff --git a/api/src/adapters/search/opensearch_response.py b/api/src/adapters/search/opensearch_response.py index 38d418c0b..c8bb16cb6 100644 --- a/api/src/adapters/search/opensearch_response.py +++ b/api/src/adapters/search/opensearch_response.py @@ -48,7 +48,7 @@ def from_opensearch_response( records = [] for raw_record in raw_records: - record = raw_record.get("_source") + record = raw_record.get("_source", {}) if include_scores: score: int | None = raw_record.get("_score", None) @@ -113,7 +113,10 @@ def _parse_aggregations( field_aggregation: dict[str, int] = {} for bucket in buckets: key = bucket.get("key") - count = bucket.get("doc_count") + count = bucket.get("doc_count", 0) + + if key is None: + raise ValueError("Unable to parse aggregation, null key for %s" % field) field_aggregation[key] = count diff --git a/api/src/pagination/pagination_models.py b/api/src/pagination/pagination_models.py index f3197e761..e1a93e018 100644 --- a/api/src/pagination/pagination_models.py +++ b/api/src/pagination/pagination_models.py @@ -11,6 +11,11 @@ class SortDirection(StrEnum): ASCENDING = "ascending" DESCENDING = "descending" + def short_form(self) -> str: + if self == SortDirection.DESCENDING: + return "desc" + return "asc" + class SortingParamsV0(BaseModel): order_by: str diff --git a/api/tests/src/adapters/search/test_opensearch_query_builder.py b/api/tests/src/adapters/search/test_opensearch_query_builder.py new file mode 100644 index 000000000..731c0e6ca --- /dev/null +++ b/api/tests/src/adapters/search/test_opensearch_query_builder.py @@ -0,0 +1,518 @@ +import uuid + +import pytest + +from src.adapters.search.opensearch_query_builder import SearchQueryBuilder +from src.pagination.pagination_models import SortDirection +from tests.conftest import BaseTestClass + +WAY_OF_KINGS = { + "id": 1, + "title": "The Way of Kings", + "author": "Brandon Sanderson", + "in_stock": True, + "page_count": 1007, +} +WORDS_OF_RADIANCE = { + "id": 2, + "title": "Words of Radiance", + "author": "Brandon Sanderson", + "in_stock": False, + "page_count": 1087, +} +OATHBRINGER = { + "id": 3, + "title": "Oathbringer", + "author": "Brandon Sanderson", + "in_stock": True, + "page_count": 1248, +} +RHYTHM_OF_WAR = { + "id": 4, + "title": "Rhythm of War", + "author": "Brandon Sanderson", + "in_stock": False, + "page_count": 1232, +} +GAME_OF_THRONES = { + "id": 5, + "title": "A Game of Thrones", + "author": "George R.R. Martin", + "in_stock": True, + "page_count": 694, +} +CLASH_OF_KINGS = { + "id": 6, + "title": "A Clash of Kings", + "author": "George R.R. Martin", + "in_stock": True, + "page_count": 768, +} +STORM_OF_SWORDS = { + "id": 7, + "title": "A Storm of Swords", + "author": "George R.R. Martin", + "in_stock": True, + "page_count": 973, +} +FEAST_FOR_CROWS = { + "id": 8, + "title": "A Feast for Crows", + "author": "George R.R. Martin", + "in_stock": True, + "page_count": 753, +} +DANCE_WITH_DRAGONS = { + "id": 9, + "title": "A Dance with Dragons", + "author": "George R.R. Martin", + "in_stock": False, + "page_count": 1056, +} +FELLOWSHIP_OF_THE_RING = { + "id": 10, + "title": "The Fellowship of the Ring", + "author": "J R.R. Tolkien", + "in_stock": True, + "page_count": 423, +} +TWO_TOWERS = { + "id": 11, + "title": "The Two Towers", + "author": "J R.R. Tolkien", + "in_stock": True, + "page_count": 352, +} +RETURN_OF_THE_KING = { + "id": 12, + "title": "The Return of the King", + "author": "J R.R. Tolkien", + "in_stock": True, + "page_count": 416, +} + +FULL_DATA = [ + WAY_OF_KINGS, + WORDS_OF_RADIANCE, + OATHBRINGER, + RHYTHM_OF_WAR, + GAME_OF_THRONES, + CLASH_OF_KINGS, + STORM_OF_SWORDS, + FEAST_FOR_CROWS, + DANCE_WITH_DRAGONS, + FELLOWSHIP_OF_THE_RING, + TWO_TOWERS, + RETURN_OF_THE_KING, +] + + +def validate_valid_request( + search_client, index, request, expected_results, expected_aggregations=None +): + json_value = request.build() + try: + resp = search_client.search(index, json_value, include_scores=False) + + except Exception: + # If it errors while making the query, catch the exception just to give a message that makes it a bit clearer + pytest.fail( + f"Request generated was invalid and caused an error in search client: {json_value}" + ) + + assert resp.records == expected_results + + if expected_aggregations is not None: + assert resp.aggregations == expected_aggregations + + +class TestOpenSearchQueryBuilder(BaseTestClass): + @pytest.fixture(scope="class") + def search_index(self, search_client): + index_name = f"test-search-index-{uuid.uuid4().int}" + + search_client.create_index(index_name) + + try: + yield index_name + finally: + # Try to clean up the index at the end + search_client.delete_index(index_name) + + @pytest.fixture(scope="class", autouse=True) + def seed_data(self, search_client, search_index): + search_client.bulk_upsert(search_index, FULL_DATA, primary_key_field="id") + + def test_query_builder_empty(self, search_client, search_index): + builder = SearchQueryBuilder() + + assert builder.build() == {"size": 25, "from": 0, "track_scores": True} + + validate_valid_request(search_client, search_index, builder, FULL_DATA) + + @pytest.mark.parametrize( + "page_size,page_number,sort_values,expected_sort,expected_results", + [ + ### ID Sorting + (25, 1, [("id", SortDirection.ASCENDING)], [{"id": {"order": "asc"}}], FULL_DATA), + (3, 1, [("id", SortDirection.ASCENDING)], [{"id": {"order": "asc"}}], FULL_DATA[:3]), + ( + 15, + 1, + [("id", SortDirection.DESCENDING)], + [{"id": {"order": "desc"}}], + FULL_DATA[::-1], + ), + ( + 5, + 2, + [("id", SortDirection.DESCENDING)], + [{"id": {"order": "desc"}}], + FULL_DATA[-6:-11:-1], + ), + (10, 100, [("id", SortDirection.DESCENDING)], [{"id": {"order": "desc"}}], []), + ### Title sorting + ( + 2, + 1, + [("title.keyword", SortDirection.ASCENDING)], + [{"title.keyword": {"order": "asc"}}], + [CLASH_OF_KINGS, DANCE_WITH_DRAGONS], + ), + ( + 3, + 4, + [("title.keyword", SortDirection.DESCENDING)], + [{"title.keyword": {"order": "desc"}}], + [FEAST_FOR_CROWS, DANCE_WITH_DRAGONS, CLASH_OF_KINGS], + ), + ( + 10, + 2, + [("title.keyword", SortDirection.ASCENDING)], + [{"title.keyword": {"order": "asc"}}], + [WAY_OF_KINGS, WORDS_OF_RADIANCE], + ), + ### Page Count + ( + 3, + 1, + [("page_count", SortDirection.ASCENDING)], + [{"page_count": {"order": "asc"}}], + [TWO_TOWERS, RETURN_OF_THE_KING, FELLOWSHIP_OF_THE_RING], + ), + ( + 4, + 2, + [("page_count", SortDirection.DESCENDING)], + [{"page_count": {"order": "desc"}}], + [WAY_OF_KINGS, STORM_OF_SWORDS, CLASH_OF_KINGS, FEAST_FOR_CROWS], + ), + ### Multi-sorts + # Author ascending (Primary) + Page count descending (Secondary) + ( + 5, + 1, + [ + ("author.keyword", SortDirection.ASCENDING), + ("page_count", SortDirection.DESCENDING), + ], + [{"author.keyword": {"order": "asc"}}, {"page_count": {"order": "desc"}}], + [OATHBRINGER, RHYTHM_OF_WAR, WORDS_OF_RADIANCE, WAY_OF_KINGS, DANCE_WITH_DRAGONS], + ), + # Author descending (Primary) + ID descending (Secondary) + ( + 4, + 1, + [("author.keyword", SortDirection.DESCENDING), ("id", SortDirection.DESCENDING)], + [{"author.keyword": {"order": "desc"}}, {"id": {"order": "desc"}}], + [RETURN_OF_THE_KING, TWO_TOWERS, FELLOWSHIP_OF_THE_RING, DANCE_WITH_DRAGONS], + ), + ], + ) + def test_query_builder_pagination_and_sorting( + self, + search_client, + search_index, + page_size, + page_number, + sort_values, + expected_sort, + expected_results, + ): + builder = ( + SearchQueryBuilder() + .pagination(page_size=page_size, page_number=page_number) + .sort_by(sort_values) + ) + + assert builder.build() == { + "size": page_size, + "from": page_size * (page_number - 1), + "track_scores": True, + "sort": expected_sort, + } + + validate_valid_request(search_client, search_index, builder, expected_results) + + # Note that by having parametrize twice, it will run every one of the specific tests with the different + # sort by parameter to show that they behave the same + @pytest.mark.parametrize("sort_by", [[], [("relevancy", SortDirection.DESCENDING)]]) + @pytest.mark.parametrize( + "filters,expected_results", + [ + ### Author + ( + [("author.keyword", ["Brandon Sanderson"])], + [WAY_OF_KINGS, WORDS_OF_RADIANCE, OATHBRINGER, RHYTHM_OF_WAR], + ), + ( + [("author.keyword", ["George R.R. Martin", "Mark Twain"])], + [ + GAME_OF_THRONES, + CLASH_OF_KINGS, + STORM_OF_SWORDS, + FEAST_FOR_CROWS, + DANCE_WITH_DRAGONS, + ], + ), + ( + [("author.keyword", ["J R.R. Tolkien"])], + [FELLOWSHIP_OF_THE_RING, TWO_TOWERS, RETURN_OF_THE_KING], + ), + ( + [("author.keyword", ["Brandon Sanderson", "J R.R. Tolkien"])], + [ + WAY_OF_KINGS, + WORDS_OF_RADIANCE, + OATHBRINGER, + RHYTHM_OF_WAR, + FELLOWSHIP_OF_THE_RING, + TWO_TOWERS, + RETURN_OF_THE_KING, + ], + ), + ( + [("author.keyword", ["Brandon Sanderson", "George R.R. Martin", "J R.R. Tolkien"])], + FULL_DATA, + ), + ([("author.keyword", ["Mark Twain"])], []), + ### in stock + ([("in_stock", [False])], [WORDS_OF_RADIANCE, RHYTHM_OF_WAR, DANCE_WITH_DRAGONS]), + ([("in_stock", [True, False])], FULL_DATA), + ### page count + ([("page_count", [1007, 694, 352])], [WAY_OF_KINGS, GAME_OF_THRONES, TWO_TOWERS]), + ([("page_count", [1, 2, 3])], []), + ### Multi-filter + # Author + In Stock + ( + [("author.keyword", ["Brandon Sanderson"]), ("in_stock", [True])], + [WAY_OF_KINGS, OATHBRINGER], + ), + ( + [ + ("author.keyword", ["George R.R. Martin", "J R.R. Tolkien"]), + ("in_stock", [False]), + ], + [DANCE_WITH_DRAGONS], + ), + # Author + Title + ( + [ + ("author.keyword", ["Brandon Sanderson", "J R.R. Tolkien", "Mark Twain"]), + ( + "title.keyword", + ["A Game of Thrones", "The Way of Kings", "The Fellowship of the Ring"], + ), + ], + [WAY_OF_KINGS, FELLOWSHIP_OF_THE_RING], + ), + ( + [ + ("author.keyword", ["George R.R. Martin", "J R.R. Tolkien"]), + ( + "title.keyword", + ["A Game of Thrones", "The Way of Kings", "The Fellowship of the Ring"], + ), + ], + [GAME_OF_THRONES, FELLOWSHIP_OF_THE_RING], + ), + ], + ) + def test_query_builder_filter_terms( + self, search_client, search_index, filters, expected_results, sort_by + ): + builder = SearchQueryBuilder().sort_by(sort_by) + + expected_terms = [] + for filter in filters: + builder.filter_terms(filter[0], filter[1]) + + expected_terms.append({"terms": {filter[0]: filter[1]}}) + + expected_query = { + "size": 25, + "from": 0, + "track_scores": True, + "query": {"bool": {"filter": expected_terms}}, + } + + if len(sort_by) > 0: + expected_query["sort"] = [{"_score": {"order": "desc"}}] + + assert builder.build() == expected_query + + validate_valid_request(search_client, search_index, builder, expected_results) + + @pytest.mark.parametrize( + "query,fields,expected_results,expected_aggregations", + [ + ( + "king", + ["title"], + [WAY_OF_KINGS, CLASH_OF_KINGS, RETURN_OF_THE_KING], + { + "author": { + "Brandon Sanderson": 1, + "George R.R. Martin": 1, + "J R.R. Tolkien": 1, + }, + "in_stock": {0: 0, 1: 3}, + }, + ), + ( + "R.R.", + ["author"], + [ + GAME_OF_THRONES, + CLASH_OF_KINGS, + STORM_OF_SWORDS, + FEAST_FOR_CROWS, + DANCE_WITH_DRAGONS, + FELLOWSHIP_OF_THE_RING, + TWO_TOWERS, + RETURN_OF_THE_KING, + ], + { + "author": { + "Brandon Sanderson": 0, + "George R.R. Martin": 5, + "J R.R. Tolkien": 3, + }, + "in_stock": {0: 1, 1: 7}, + }, + ), + ( + "Martin (Crows | Storm)", + ["title", "author"], + [STORM_OF_SWORDS, FEAST_FOR_CROWS], + { + "author": { + "Brandon Sanderson": 0, + "George R.R. Martin": 2, + "J R.R. Tolkien": 0, + }, + "in_stock": {0: 0, 1: 2}, + }, + ), + ( + "(Sanderson + (Words | King)) | Tolkien | Crow", + ["title", "author"], + [ + WAY_OF_KINGS, + WORDS_OF_RADIANCE, + FEAST_FOR_CROWS, + FELLOWSHIP_OF_THE_RING, + TWO_TOWERS, + RETURN_OF_THE_KING, + ], + { + "author": { + "Brandon Sanderson": 2, + "George R.R. Martin": 1, + "J R.R. Tolkien": 3, + }, + "in_stock": {0: 1, 1: 5}, + }, + ), + ( + "-R.R. -Oathbringer", + ["title", "author"], + [WAY_OF_KINGS, WORDS_OF_RADIANCE, RHYTHM_OF_WAR], + { + "author": { + "Brandon Sanderson": 3, + "George R.R. Martin": 0, + "J R.R. Tolkien": 0, + }, + "in_stock": {0: 2, 1: 1}, + }, + ), + ( + "Brandon | George | J", + ["title", "author"], + FULL_DATA, + { + "author": { + "Brandon Sanderson": 4, + "George R.R. Martin": 5, + "J R.R. Tolkien": 3, + }, + "in_stock": {0: 3, 1: 9}, + }, + ), + ( + "how to make a pizza", + ["title", "author"], + [], + { + "author": { + "Brandon Sanderson": 0, + "George R.R. Martin": 0, + "J R.R. Tolkien": 0, + }, + "in_stock": {0: 0, 1: 0}, + }, + ), + ], + ) + def test_query_builder_simple_query_and_aggregations( + self, search_client, search_index, query, fields, expected_results, expected_aggregations + ): + # Add a sort by ID ascending to make it so any relevancy from this is ignored, just testing that values returned + builder = SearchQueryBuilder().sort_by([("id", SortDirection.ASCENDING)]) + + builder.simple_query(query, fields) + + # Statically add the same aggregated fields every time + builder.aggregation_terms("author", "author.keyword", minimum_count=0).aggregation_terms( + "in_stock", "in_stock", minimum_count=0 + ) + + assert builder.build() == { + "size": 25, + "from": 0, + "track_scores": True, + "query": { + "bool": { + "must": [ + { + "simple_query_string": { + "query": query, + "fields": fields, + "default_operator": "AND", + } + } + ] + } + }, + "sort": [{"id": {"order": "asc"}}], + "aggs": { + "author": {"terms": {"field": "author.keyword", "size": 25, "min_doc_count": 0}}, + "in_stock": {"terms": {"field": "in_stock", "size": 25, "min_doc_count": 0}}, + }, + } + + validate_valid_request( + search_client, search_index, builder, expected_results, expected_aggregations + ) From eaba30dc7a3b94b492f864f7479b36f532361a4d Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Tue, 28 May 2024 11:49:20 -0400 Subject: [PATCH 10/19] Add an example --- api/src/adapters/search/opensearch_client.py | 5 +- .../search/opensearch_query_builder.py | 75 +++++++++++++++++++ 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index 8f5aeb1b4..b2e5b2bea 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -142,9 +142,8 @@ def swap_alias_index( self.delete_index(index) def search_raw(self, index_name: str, search_query: dict) -> dict: - # TODO - add more when we build out the request/response parsing logic - # we use something like Pydantic to help reorganize the response - # object into something easier to parse. + # Simple wrapper around search if you don't want the request or response + # object handled in any special way. return self._client.search(index=index_name, body=search_query) def search( diff --git a/api/src/adapters/search/opensearch_query_builder.py b/api/src/adapters/search/opensearch_query_builder.py index 778cdbdc0..4aa4e07e5 100644 --- a/api/src/adapters/search/opensearch_query_builder.py +++ b/api/src/adapters/search/opensearch_query_builder.py @@ -4,6 +4,81 @@ class SearchQueryBuilder: + """ + Utility to help build queries to OpenSearch + + This helps with making sure everything we want in a search query goes + to the right spot in the large JSON object we're building. Note that + it still requires some understanding of OpenSearch (eg. when to add ".keyword" to a field name) + + For example, if you wanted to build a query against a search index containing + books with the following: + * Page size of 5, page number 1 + * Sorted by relevancy score descending + * Scored on titles containing "king" + * Where the author is one of Brandon Sanderson or J R.R. Tolkien + * Returning aggregate counts of books by those authors in the full results + + This query could either be built manually and look like: + + { + "size": 5, + "from": 0, + "track_scores": true, + "sort": [ + { + "_score": { + "order": "desc" + } + } + ], + "query": { + "bool": { + "must": [ + { + "simple_query_string": { + "query": "king", + "fields": [ + "title.keyword" + ], + "default_operator": "AND" + } + } + ], + "filter": [ + { + "terms": { + "author.keyword": [ + "Brandon Sanderson", + "J R.R. Tolkien" + ] + } + } + ] + } + }, + "aggs": { + "author": { + "terms": { + "field": "author.keyword", + "size": 25, + "min_doc_count": 0 + } + } + } + } + + Or you could use the builder and produce the same result: + + search_query = SearchQueryBuilder() + .pagination(page_size=5, page_number=1) + .sort_by([("relevancy", SortDirection.DESCENDING)]) + .simple_query("king", fields=["title.keyword"]) + .filter_terms("author.keyword", terms=["Brandon Sanderson", "J R.R. Tolkien"]) + .aggregation_terms(aggregation_name="author", field_name="author.keyword", minimum_count=0) + .build() + """ + def __init__(self) -> None: self.page_size = 25 self.page_number = 1 From 641ebd110558660d42d7ed12de30268e06efd4e5 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Thu, 30 May 2024 12:25:20 -0400 Subject: [PATCH 11/19] [Issue #16] Connect the API to use the search index --- api/src/adapters/search/__init__.py | 3 +- api/src/adapters/search/flask_opensearch.py | 33 + api/src/adapters/search/opensearch_client.py | 5 +- .../opportunities_v1/opportunity_routes.py | 77 +- .../opportunities_v1/opportunity_schemas.py | 70 +- .../api/schemas/extension/schema_fields.py | 22 +- api/src/api/schemas/response_schema.py | 2 + api/src/app.py | 8 + .../backend/load_opportunities_to_index.py | 4 +- api/src/search/search_config.py | 19 + .../opportunities_v1/search_opportunities.py | 131 ++- api/tests/conftest.py | 13 +- .../src/api/opportunities_v1/conftest.py | 4 +- .../test_opportunity_route_search.py | 772 +++++++++++++++++- 14 files changed, 1112 insertions(+), 51 deletions(-) create mode 100644 api/src/adapters/search/flask_opensearch.py create mode 100644 api/src/search/search_config.py diff --git a/api/src/adapters/search/__init__.py b/api/src/adapters/search/__init__.py index 6b2607a04..c44446964 100644 --- a/api/src/adapters/search/__init__.py +++ b/api/src/adapters/search/__init__.py @@ -1,4 +1,5 @@ from src.adapters.search.opensearch_client import SearchClient from src.adapters.search.opensearch_config import get_opensearch_config +from src.adapters.search.opensearch_query_builder import SearchQueryBuilder -__all__ = ["SearchClient", "get_opensearch_config"] +__all__ = ["SearchClient", "get_opensearch_config", "SearchQueryBuilder"] diff --git a/api/src/adapters/search/flask_opensearch.py b/api/src/adapters/search/flask_opensearch.py new file mode 100644 index 000000000..66ed31528 --- /dev/null +++ b/api/src/adapters/search/flask_opensearch.py @@ -0,0 +1,33 @@ +from functools import wraps +from typing import Callable, Concatenate, ParamSpec, TypeVar + +from flask import Flask, current_app + +from src.adapters.search import SearchClient + +_SEARCH_CLIENT_KEY = "search-client" + + +def register_search_client(search_client: SearchClient, app: Flask) -> None: + app.extensions[_SEARCH_CLIENT_KEY] = search_client + + +def get_search_client(app: Flask) -> SearchClient: + return app.extensions[_SEARCH_CLIENT_KEY] + + +P = ParamSpec("P") +T = TypeVar("T") + + +def with_search_client() -> Callable[[Callable[Concatenate[SearchClient, P], T]], Callable[P, T]]: + # TODO docs + + def decorator(f: Callable[Concatenate[SearchClient, P], T]) -> Callable[P, T]: + @wraps(f) + def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: + return f(get_search_client(current_app), *args, **kwargs) + + return wrapper + + return decorator diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index b2e5b2bea..cb97a9c8c 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -15,10 +15,7 @@ "default": { "type": "custom", "filter": ["lowercase", "custom_stemmer"], - # Change tokenization to whitespace as the default is very clunky - # with a lot of our IDs that have dashes in them. - # see: https://opensearch.org/docs/latest/analyzers/tokenizers/index/ - "tokenizer": "whitespace", + "tokenizer": "standard", } }, # Change the default stemming to use snowball which handles plural diff --git a/api/src/api/opportunities_v1/opportunity_routes.py b/api/src/api/opportunities_v1/opportunity_routes.py index 0d94996b0..d4ba8cf48 100644 --- a/api/src/api/opportunities_v1/opportunity_routes.py +++ b/api/src/api/opportunities_v1/opportunity_routes.py @@ -2,6 +2,8 @@ import src.adapters.db as db import src.adapters.db.flask_db as flask_db +import src.adapters.search as search +import src.adapters.search.flask_opensearch as flask_opensearch import src.api.opportunities_v1.opportunity_schemas as opportunity_schemas import src.api.response as response from src.api.opportunities_v1.opportunity_blueprint import opportunity_blueprint @@ -24,20 +26,77 @@ See [Release Phases](https://github.com/github/roadmap?tab=readme-ov-file#release-phases) for further details. """ +examples = { + "example1": { + "summary": "No filters", + "value": { + "pagination": { + "order_by": "opportunity_id", + "page_offset": 1, + "page_size": 25, + "sort_direction": "ascending", + }, + }, + }, + "example2": { + "summary": "All filters", + "value": { + "query": "research", + "filters": { + "agency": {"one_of": ["USAID", "ARPAH"]}, + "applicant_type": { + "one_of": ["state_governments", "county_governments", "individuals"] + }, + "funding_category": {"one_of": ["recovery_act", "arts", "natural_resources"]}, + "funding_instrument": {"one_of": ["cooperative_agreement", "grant"]}, + "opportunity_status": {"one_of": ["forecasted", "posted"]}, + }, + "pagination": { + "order_by": "opportunity_id", + "page_offset": 1, + "page_size": 25, + "sort_direction": "descending", + }, + }, + }, + "example3": { + "summary": "Query & opportunity_status filters", + "value": { + "query": "research", + "filters": { + "opportunity_status": {"one_of": ["forecasted", "posted"]}, + }, + "pagination": { + "order_by": "opportunity_id", + "page_offset": 1, + "page_size": 25, + "sort_direction": "descending", + }, + }, + }, +} + @opportunity_blueprint.post("/opportunities/search") @opportunity_blueprint.input( - opportunity_schemas.OpportunitySearchRequestV1Schema, arg_name="search_params" + opportunity_schemas.OpportunitySearchRequestV1Schema, + arg_name="search_params", + examples=examples, ) # many=True allows us to return a list of opportunity objects -@opportunity_blueprint.output(opportunity_schemas.OpportunityV1Schema(many=True)) +@opportunity_blueprint.output(opportunity_schemas.OpportunitySearchResponseV1Schema()) @opportunity_blueprint.auth_required(api_key_auth) @opportunity_blueprint.doc(description=SHARED_ALPHA_DESCRIPTION) -def opportunity_search(search_params: dict) -> response.ApiResponse: +@flask_opensearch.with_search_client() +def opportunity_search( + search_client: search.SearchClient, search_params: dict +) -> response.ApiResponse: add_extra_data_to_current_request_logs(flatten_dict(search_params, prefix="request.body")) logger.info("POST /v1/opportunities/search") - opportunities, pagination_info = search_opportunities(search_params) + opportunities, aggregations, pagination_info = search_opportunities( + search_client, search_params + ) add_extra_data_to_current_request_logs( { @@ -48,12 +107,18 @@ def opportunity_search(search_params: dict) -> response.ApiResponse: logger.info("Successfully fetched opportunities") return response.ApiResponse( - message="Success", data=opportunities, pagination_info=pagination_info + message="Success", + data={ + "opportunities": opportunities, + "facet_counts": aggregations, + "pagination_info": pagination_info, + }, + pagination_info=pagination_info, ) @opportunity_blueprint.get("/opportunities/") -@opportunity_blueprint.output(opportunity_schemas.OpportunityV1Schema) +@opportunity_blueprint.output(opportunity_schemas.OpportunityV1Schema()) @opportunity_blueprint.auth_required(api_key_auth) @opportunity_blueprint.doc(description=SHARED_ALPHA_DESCRIPTION) @flask_db.with_db_session() diff --git a/api/src/api/opportunities_v1/opportunity_schemas.py b/api/src/api/opportunities_v1/opportunity_schemas.py index 5f72c7958..3bc10ef5b 100644 --- a/api/src/api/opportunities_v1/opportunity_schemas.py +++ b/api/src/api/opportunities_v1/opportunity_schemas.py @@ -7,7 +7,7 @@ OpportunityCategory, OpportunityStatus, ) -from src.pagination.pagination_schema import generate_pagination_schema +from src.pagination.pagination_schema import PaginationInfoSchema, generate_pagination_schema class OpportunitySummaryV1Schema(Schema): @@ -195,7 +195,6 @@ class OpportunityAssistanceListingV1Schema(Schema): class OpportunityV1Schema(Schema): opportunity_id = fields.Integer( - dump_only=True, metadata={"description": "The internal ID of the opportunity", "example": 12345}, ) @@ -266,11 +265,73 @@ class OpportunitySearchFilterV1Schema(Schema): ) agency = fields.Nested( StrSearchSchemaBuilder("AgencyFilterV1Schema") - .with_one_of(example="US-ABC", minimum_length=2) + .with_one_of(example="USAID", minimum_length=2) .build() ) +class OpportunityFacetV1Schema(Schema): + opportunity_status = fields.Dict( + keys=fields.String(), + values=fields.Integer(), + metadata={ + "description": "The counts of opportunity_status values in the full response", + "example": {"posted": 1, "forecasted": 2}, + }, + ) + applicant_type = fields.Dict( + keys=fields.String(), + values=fields.Integer(), + metadata={ + "description": "The counts of applicant_type values in the full response", + "example": { + "state_governments": 3, + "county_governments": 2, + "city_or_township_governments": 1, + }, + }, + ) + funding_instrument = fields.Dict( + keys=fields.String(), + values=fields.Integer(), + metadata={ + "description": "The counts of funding_instrument values in the full response", + "example": {"cooperative_agreement": 4, "grant": 3}, + }, + ) + funding_category = fields.Dict( + keys=fields.String(), + values=fields.Integer(), + metadata={ + "description": "The counts of funding_category values in the full response", + "example": {"recovery_act": 2, "arts": 3, "agriculture": 5}, + }, + ) + agency = fields.Dict( + keys=fields.String(), + values=fields.Integer(), + metadata={ + "description": "The counts of agency values in the full response", + "example": {"USAID": 4, "ARPAH": 3}, + }, + ) + + +class OpportunitySearchResponseV1Schema(Schema): + opportunities = fields.List( + fields.Nested(OpportunityV1Schema()), metadata={"description": "The opportunity records"} + ) + facet_counts = fields.Nested( + OpportunityFacetV1Schema(), + metadata={"description": "Counts of filter/facet values in the full response"}, + ) + + pagination_info = fields.Nested( + PaginationInfoSchema(), + metadata={"description": "The pagination information for the search response"}, + ) + + class OpportunitySearchRequestV1Schema(Schema): query = fields.String( metadata={ @@ -284,8 +345,9 @@ class OpportunitySearchRequestV1Schema(Schema): pagination = fields.Nested( generate_pagination_schema( - "OpportunityPaginationSchema", + "OpportunityPaginationV1Schema", [ + "relevancy", "opportunity_id", "opportunity_number", "opportunity_title", diff --git a/api/src/api/schemas/extension/schema_fields.py b/api/src/api/schemas/extension/schema_fields.py index 97b08636d..d4678f69d 100644 --- a/api/src/api/schemas/extension/schema_fields.py +++ b/api/src/api/schemas/extension/schema_fields.py @@ -39,7 +39,19 @@ class MixinField(original_fields.Field): } def __init__(self, **kwargs: typing.Any) -> None: - super().__init__(**kwargs) + # By default, make it so if a field is required, null isn't allowed + # otherwise null is allowed. You can modify this behavior (ie. required, and none allowed) + # by explicitly setting the allow_none field. + is_required = kwargs.get("required", None) + allow_none = kwargs.pop("allow_none", None) + + if allow_none is None: + if is_required: + allow_none = False + else: + allow_none = True + + super().__init__(allow_none=allow_none, **kwargs) # The actual error mapping used for a specific instance self._error_mapping: dict[str, MarshmallowErrorContainer] = {} @@ -183,6 +195,12 @@ class Raw(original_fields.Raw, MixinField): pass +class Dict(original_fields.Dict, MixinField): + error_mapping: dict[str, MarshmallowErrorContainer] = { + "invalid": MarshmallowErrorContainer(ValidationErrorType.INVALID, "Not a valid dict."), + } + + class Enum(MixinField): """ Custom field class for handling unioning together multiple Python enums into @@ -230,7 +248,7 @@ def _serialize( if value is None: return None - val = value.value + val = value return self.field._serialize(val, attr, obj, **kwargs) def _deserialize( diff --git a/api/src/api/schemas/response_schema.py b/api/src/api/schemas/response_schema.py index b6509699b..ecefa6515 100644 --- a/api/src/api/schemas/response_schema.py +++ b/api/src/api/schemas/response_schema.py @@ -19,6 +19,8 @@ class ErrorResponseSchema(BaseResponseSchema): class ResponseSchema(BaseResponseSchema): + # TODO - once we've worked with the frontend folks, this should be removed from the top-level + # schema entirely as we should only return it when the endpoint has pagination. pagination_info = fields.Nested( PaginationInfoSchema(), metadata={"description": "The pagination information for paginated endpoints"}, diff --git a/api/src/app.py b/api/src/app.py index e9604157b..c4c0f17fc 100644 --- a/api/src/app.py +++ b/api/src/app.py @@ -7,6 +7,8 @@ import src.adapters.db as db import src.adapters.db.flask_db as flask_db +import src.adapters.search as search +import src.adapters.search.flask_opensearch as flask_opensearch import src.api.feature_flags.feature_flag_config as feature_flag_config import src.logging import src.logging.flask_logger as flask_logger @@ -46,6 +48,7 @@ def create_app() -> APIFlask: configure_app(app) register_blueprints(app) register_index(app) + register_search_client(app) return app @@ -60,6 +63,11 @@ def register_db_client(app: APIFlask) -> None: flask_db.register_db_client(db_client, app) +def register_search_client(app: APIFlask) -> None: + search_client = search.SearchClient() + flask_opensearch.register_search_client(search_client, app) + + def configure_app(app: APIFlask) -> None: # Modify the response schema to instead use the format of our ApiResponse class # which adds additional details to the object. diff --git a/api/src/search/backend/load_opportunities_to_index.py b/api/src/search/backend/load_opportunities_to_index.py index a01357a96..630ecf616 100644 --- a/api/src/search/backend/load_opportunities_to_index.py +++ b/api/src/search/backend/load_opportunities_to_index.py @@ -9,7 +9,7 @@ import src.adapters.db as db import src.adapters.search as search -from src.api.opportunities_v0_1.opportunity_schemas import OpportunityV01Schema +from src.api.opportunities_v1.opportunity_schemas import OpportunityV1Schema from src.db.models.opportunity_models import CurrentOpportunitySummary, Opportunity from src.task.task import Task from src.util.datetime_util import get_now_us_eastern_datetime @@ -95,7 +95,7 @@ def fetch_opportunities(self) -> Iterator[Sequence[Opportunity]]: def load_records(self, records: Sequence[Opportunity]) -> None: logger.info("Loading batch of opportunities...") - schema = OpportunityV01Schema() + schema = OpportunityV1Schema() json_records = [] for record in records: diff --git a/api/src/search/search_config.py b/api/src/search/search_config.py new file mode 100644 index 000000000..8b7ea4f29 --- /dev/null +++ b/api/src/search/search_config.py @@ -0,0 +1,19 @@ +from pydantic import Field + +from src.util.env_config import PydanticBaseEnvConfig + + +class SearchConfig(PydanticBaseEnvConfig): + opportunity_search_index_alias: str = Field(default="opportunity-index-alias") + + +_search_config: SearchConfig | None = None + + +def get_search_config() -> SearchConfig: + global _search_config + + if _search_config is None: + _search_config = SearchConfig() + + return _search_config diff --git a/api/src/services/opportunities_v1/search_opportunities.py b/api/src/services/opportunities_v1/search_opportunities.py index 1823bc31d..cd94c4c2e 100644 --- a/api/src/services/opportunities_v1/search_opportunities.py +++ b/api/src/services/opportunities_v1/search_opportunities.py @@ -1,39 +1,144 @@ import logging +import math from typing import Sequence, Tuple from pydantic import BaseModel, Field -from src.db.models.opportunity_models import Opportunity -from src.pagination.pagination_models import PaginationInfo, PaginationParams +import src.adapters.search as search +from src.api.opportunities_v1.opportunity_schemas import OpportunityV1Schema +from src.pagination.pagination_models import PaginationInfo, PaginationParams, SortDirection +from src.search.search_config import get_search_config logger = logging.getLogger(__name__) +# To assist with mapping field names from our API requests +# to what they are called in the search index, this mapping +# can be used. Note that in many cases its just adjusting paths +# or for text based fields adding ".keyword" to the end to tell +# the query we want to use the raw value rather than the tokenized one +# See: https://opensearch.org/docs/latest/field-types/supported-field-types/keyword/ +REQUEST_FIELD_NAME_MAPPING = { + "opportunity_number": "opportunity_number.keyword", + "opportunity_title": "opportunity_title.keyword", + "post_date": "summary.post_date", + "close_date": "summary.close_date", + "agency_code": "agency.keyword", + "agency": "agency.keyword", + "opportunity_status": "opportunity_status.keyword", + "funding_instrument": "summary.funding_instruments.keyword", + "funding_category": "summary.funding_categories.keyword", + "applicant_type": "summary.applicant_types.keyword", +} -class SearchOpportunityFilters(BaseModel): - funding_instrument: dict | None = Field(default=None) - funding_category: dict | None = Field(default=None) - applicant_type: dict | None = Field(default=None) - opportunity_status: dict | None = Field(default=None) - agency: dict | None = Field(default=None) +SEARCH_FIELDS = [ + # Note that we do keyword for agency & opportunity number + # as we don't want to compare to a tokenized value which + # may have split on the dashes. + "agency.keyword^16", + "opportunity_title^2", + "opportunity_number.keyword^12", + "summary.summary_description", + "opportunity_assistance_listings.assistance_listing_number^10", + "opportunity_assistance_listings.program_title^4", +] + +SCHEMA = OpportunityV1Schema() class SearchOpportunityParams(BaseModel): pagination: PaginationParams query: str | None = Field(default=None) - filters: SearchOpportunityFilters | None = Field(default=None) + filters: dict | None = Field(default=None) + + +def _adjust_field_name(field: str) -> str: + return REQUEST_FIELD_NAME_MAPPING.get(field, field) + + +def _get_sort_by(pagination: PaginationParams) -> list[tuple[str, SortDirection]]: + sort_by: list[tuple[str, SortDirection]] = [] + + sort_by.append((_adjust_field_name(pagination.order_by), pagination.sort_direction)) + + # Add a secondary sort for relevancy to sort by post date (matching the sort direction) + if pagination.order_by == "relevancy": + sort_by.append((_adjust_field_name("post_date"), pagination.sort_direction)) + + return sort_by + + +def _add_search_filters(builder: search.SearchQueryBuilder, filters: dict | None) -> None: + if filters is None: + return + + for field, field_filters in filters.items(): + # one_of filters translate to an opensearch term filter + # see: https://opensearch.org/docs/latest/query-dsl/term/terms/ + one_of_filters = field_filters.get("one_of", None) + if one_of_filters: + builder.filter_terms(_adjust_field_name(field), one_of_filters) + + +def _add_aggregations(builder: search.SearchQueryBuilder) -> None: + # TODO - we'll likely want to adjust the total number of values returned, especially + # for agency as there could be hundreds of different agencies, and currently it's limited to 25. + builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_types")) + builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_types")) + builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instruments")) + builder.aggregation_terms("funding_category", _adjust_field_name("funding_categories")) + builder.aggregation_terms("agency", _adjust_field_name("agency_code")) -def search_opportunities(raw_search_params: dict) -> Tuple[Sequence[Opportunity], PaginationInfo]: +def _get_search_request(params: SearchOpportunityParams) -> dict: + builder = search.SearchQueryBuilder() + + # Pagination + builder.pagination( + page_size=params.pagination.page_size, page_number=params.pagination.page_offset + ) + + # Sorting + builder.sort_by(_get_sort_by(params.pagination)) + + # Query + if params.query: + builder.simple_query(params.query, SEARCH_FIELDS) + + # Filters + _add_search_filters(builder, params.filters) + + # Aggregations / Facet / Filter Counts + _add_aggregations(builder) + + return builder.build() + + +def search_opportunities( + search_client: search.SearchClient, raw_search_params: dict +) -> Tuple[Sequence[dict], dict, PaginationInfo]: search_params = SearchOpportunityParams.model_validate(raw_search_params) + search_request = _get_search_request(search_params) + + response = search_client.search( + get_search_config().opportunity_search_index_alias, search_request + ) + pagination_info = PaginationInfo( page_offset=search_params.pagination.page_offset, page_size=search_params.pagination.page_size, order_by=search_params.pagination.order_by, sort_direction=search_params.pagination.sort_direction, - total_records=0, - total_pages=0, + total_records=response.total_records, + total_pages=int(math.ceil(response.total_records / search_params.pagination.page_size)), ) - return [], pagination_info + # While the data returned is already JSON/dicts like we want to return + # APIFlask will try to run whatever we return through the deserializers + # which means anything that requires conversions like timestamps end up failing + # as they don't need to be converted. So, we convert everything to those types (serialize) + # so that deserialization won't fail. + records = SCHEMA.load(response.records, many=True) + + return records, response.aggregations, pagination_info diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 4b45c4f2c..6887ff0e4 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -173,13 +173,18 @@ def opportunity_index(search_client): yield index_name finally: # Try to clean up the index at the end - search_client.delete_index(index_name) + # Use a prefix which will delete the above (if it exists) + # and any that might not have been cleaned up due to issues + # in prior runs + search_client.delete_index("test-opportunity-index-*") @pytest.fixture(scope="session") -def opportunity_index_alias(search_client): +def opportunity_index_alias(search_client, monkeypatch_session): # Note we don't actually create anything, this is just a random name - return f"test-opportunity-index-alias-{uuid.uuid4().int}" + alias = f"test-opportunity-index-alias-{uuid.uuid4().int}" + monkeypatch_session.setenv("OPPORTUNITY_SEARCH_INDEX_ALIAS", alias) + return alias #################### @@ -190,7 +195,7 @@ def opportunity_index_alias(search_client): # Make app session scoped so the database connection pool is only created once # for the test session. This speeds up the tests. @pytest.fixture(scope="session") -def app(db_client) -> APIFlask: +def app(db_client, opportunity_index_alias) -> APIFlask: return app_entry.create_app() diff --git a/api/tests/src/api/opportunities_v1/conftest.py b/api/tests/src/api/opportunities_v1/conftest.py index c00490cff..c1babc7ba 100644 --- a/api/tests/src/api/opportunities_v1/conftest.py +++ b/api/tests/src/api/opportunities_v1/conftest.py @@ -13,9 +13,9 @@ def get_search_request( page_offset: int = 1, - page_size: int = 5, + page_size: int = 25, order_by: str = "opportunity_id", - sort_direction: str = "descending", + sort_direction: str = "ascending", query: str | None = None, funding_instrument_one_of: list[FundingInstrument] | None = None, funding_category_one_of: list[FundingCategory] | None = None, diff --git a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py index 6e79419db..93ac56b4b 100644 --- a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py +++ b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py @@ -1,19 +1,765 @@ +from datetime import date + +import pytest + +from src.api.opportunities_v1.opportunity_schemas import OpportunityV1Schema +from src.constants.lookup_constants import ( + ApplicantType, + FundingCategory, + FundingInstrument, + OpportunityStatus, +) +from src.db.models.opportunity_models import Opportunity +from src.pagination.pagination_models import SortDirection +from src.util.dict_util import flatten_dict +from tests.conftest import BaseTestClass from tests.src.api.opportunities_v1.conftest import get_search_request +from tests.src.db.models.factories import ( + CurrentOpportunitySummaryFactory, + OpportunityAssistanceListingFactory, + OpportunityFactory, + OpportunitySummaryFactory, +) + + +def validate_search_response( + search_response, expected_results: list[Opportunity], expected_status_code: int = 200 +): + assert search_response.status_code == expected_status_code + + response_json = search_response.get_json() + data = response_json["data"] + + opportunities = data["opportunities"] + + response_ids = [opp["opportunity_id"] for opp in opportunities] + expected_ids = [exp.opportunity_id for exp in expected_results] + + assert ( + response_ids == expected_ids + ), f"Actual opportunities:\n {'\n'.join([opp['opportunity_title'] for opp in opportunities])}" + + +def build_opp( + opportunity_title: str, + opportunity_number: str, + agency: str, + summary_description: str, + opportunity_status: OpportunityStatus, + assistance_listings: list, + applicant_types: list, + funding_instruments: list, + funding_categories: list, + post_date: date, + close_date: date | None, +) -> Opportunity: + opportunity = OpportunityFactory.build( + opportunity_title=opportunity_title, + opportunity_number=opportunity_number, + agency=agency, + opportunity_assistance_listings=[], + current_opportunity_summary=None, + ) + + for assistance_listing in assistance_listings: + opportunity.opportunity_assistance_listings.append( + OpportunityAssistanceListingFactory.build( + opportunity=opportunity, + assistance_listing_number=assistance_listing[0], + program_title=assistance_listing[1], + ) + ) + + opportunity_summary = OpportunitySummaryFactory.build( + opportunity=opportunity, + summary_description=summary_description, + applicant_types=applicant_types, + funding_instruments=funding_instruments, + funding_categories=funding_categories, + post_date=post_date, + close_date=close_date, + ) + + opportunity.current_opportunity_summary = CurrentOpportunitySummaryFactory.build( + opportunity_status=opportunity_status, + opportunity_summary=opportunity_summary, + opportunity=opportunity, + ) + + return opportunity + + +########################################## +# Opportunity scenarios for tests +# +# These try to mimic real opportunities +########################################## + +EDUCATION_AL = ("43.008", "Office of Stem Engagement (OSTEM)") +SPACE_AL = ("43.012", "Space Technology") +AERONAUTICS_AL = ("43.002", "Aeronautics") +LOC_AL = ("42.011", "Library of Congress Grants") +AMERICAN_AL = ("19.441", "ECA - American Spaces") +ECONOMIC_AL = ("11.307", "Economic Adjustment Assistance") +MANUFACTURING_AL = ("11.611", "Manufacturing Extension Partnership") + +NASA_SPACE_FELLOWSHIP = build_opp( + opportunity_title="National Space Grant College and Fellowship Program FY 2020 - 2024", + opportunity_number="NNH123ZYX", + agency="NASA", + summary_description="This Cooperative Agreement Notice is a multi-year award that aims to contribute to NASA's mission", + opportunity_status=OpportunityStatus.POSTED, + assistance_listings=[EDUCATION_AL], + applicant_types=[ApplicantType.OTHER], + funding_instruments=[FundingInstrument.COOPERATIVE_AGREEMENT], + funding_categories=[FundingCategory.EDUCATION], + post_date=date(2020, 3, 1), + close_date=date(2027, 6, 1), +) + +NASA_INNOVATIONS = build_opp( + opportunity_title="Early Stage Innovations", + opportunity_number="NNH24-TR0N", + agency="NASA", + summary_description="The program within STMD seeks proposals from accredited U.S. universities to develop unique, disruptive, or transformational space technologies.", + opportunity_status=OpportunityStatus.FORECASTED, + assistance_listings=[SPACE_AL], + applicant_types=[ApplicantType.OTHER], + funding_instruments=[FundingInstrument.GRANT], + funding_categories=[FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT], + post_date=date(2019, 3, 1), + close_date=None, +) + +NASA_SUPERSONIC = build_opp( + opportunity_title="Commercial Supersonic Technology (CST) Project", + opportunity_number="NNH24-CST", + agency="NASA", + summary_description="Commercial Supersonic Technology seeks proposals for a fuel injector design concept and fabrication for testing at NASA Glenn Research Center", + opportunity_status=OpportunityStatus.CLOSED, + assistance_listings=[AERONAUTICS_AL], + applicant_types=[ApplicantType.UNRESTRICTED], + funding_instruments=[FundingInstrument.GRANT], + funding_categories=[FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT], + post_date=date(2021, 3, 1), + close_date=date(2030, 6, 1), +) + +NASA_K12_DIVERSITY = build_opp( + opportunity_title="Space Grant K-12 Inclusiveness and Diversity in STEM", + opportunity_number="NNH22ZHA", + agency="NASA", + summary_description="Expands the reach of individual Consortia to collaborate regionally on efforts that directly support middle and high school student participation in hands-on, NASA-aligned STEM activities", + opportunity_status=OpportunityStatus.ARCHIVED, + assistance_listings=[EDUCATION_AL], + applicant_types=[ApplicantType.OTHER], + funding_instruments=[FundingInstrument.COOPERATIVE_AGREEMENT], + funding_categories=[FundingCategory.EDUCATION], + post_date=date(2025, 3, 1), + close_date=date(2018, 6, 1), +) + +LOC_TEACHING = build_opp( + opportunity_title="Teaching with Primary Sources - New Awards for FY25-FY27", + opportunity_number="012ADV345", + agency="LOC", + summary_description="Builds student literacy, critical thinking skills, content knowledge and ability to conduct original research.", + opportunity_status=OpportunityStatus.POSTED, + assistance_listings=[EDUCATION_AL], + applicant_types=[ + ApplicantType.STATE_GOVERNMENTS, + ApplicantType.COUNTY_GOVERNMENTS, + ApplicantType.INDEPENDENT_SCHOOL_DISTRICTS, + ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, + ApplicantType.SPECIAL_DISTRICT_GOVERNMENTS, + ], + funding_instruments=[FundingInstrument.COOPERATIVE_AGREEMENT], + funding_categories=[FundingCategory.EDUCATION], + post_date=date(2031, 3, 1), + close_date=date(2010, 6, 1), +) + +LOC_HIGHER_EDUCATION = build_opp( + opportunity_title="Of the People: Widening the Path: CCDI – Higher Education", + opportunity_number="012ADV346", + agency="LOC", + summary_description="The Library of Congress will expand the connections between the Library and diverse communities and strengthen the use of Library of Congress digital collections and digital tools", + opportunity_status=OpportunityStatus.FORECASTED, + assistance_listings=[LOC_AL], + applicant_types=[ + ApplicantType.PRIVATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ], + funding_instruments=[FundingInstrument.GRANT], + funding_categories=[FundingCategory.OTHER], + post_date=date(2026, 3, 1), + close_date=None, +) + +DOS_DIGITAL_LITERACY = build_opp( + opportunity_title="American Spaces Digital Literacy and Training Program", + opportunity_number="SFOP0001234", + agency="DOS-ECA", + summary_description="An open competition to administer a new award in the field of digital and media literacy and countering disinformation", + opportunity_status=OpportunityStatus.CLOSED, + assistance_listings=[AMERICAN_AL], + applicant_types=[ + ApplicantType.OTHER, + ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, + ApplicantType.PRIVATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ], + funding_instruments=[FundingInstrument.COOPERATIVE_AGREEMENT], + funding_categories=[FundingCategory.OTHER], + post_date=date(2028, 3, 1), + close_date=date(2023, 6, 1), +) + +DOC_SPACE_COAST = build_opp( + opportunity_title="Space Coast RIC", + opportunity_number="SFOP0009876", + agency="DOC-EDA", + summary_description="diversification of Florida's Space Coast region", + opportunity_status=OpportunityStatus.ARCHIVED, + assistance_listings=[ECONOMIC_AL], + applicant_types=[ + ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, + ApplicantType.COUNTY_GOVERNMENTS, + ApplicantType.STATE_GOVERNMENTS, + ], + funding_instruments=[FundingInstrument.COOPERATIVE_AGREEMENT, FundingInstrument.GRANT], + funding_categories=[FundingCategory.OTHER, FundingCategory.REGIONAL_DEVELOPMENT], + post_date=date(2017, 3, 1), + close_date=date(2019, 6, 1), +) + +DOC_MANUFACTURING = build_opp( + opportunity_title="Advanced Manufacturing Jobs and Innovation Accelerator Challenge", + opportunity_number="JIAC1234AM", + agency="DOC-EDA", + summary_description="foster job creation, increase public and private investments, and enhance economic prosperity", + opportunity_status=OpportunityStatus.POSTED, + assistance_listings=[ECONOMIC_AL, MANUFACTURING_AL], + applicant_types=[ApplicantType.OTHER], + funding_instruments=[FundingInstrument.COOPERATIVE_AGREEMENT, FundingInstrument.GRANT], + funding_categories=[ + FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, + FundingCategory.ENERGY, + FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT, + ], + post_date=date(2013, 3, 1), + close_date=date(2035, 6, 1), +) + +OPPORTUNITIES = [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + NASA_SUPERSONIC, + NASA_K12_DIVERSITY, + LOC_TEACHING, + LOC_HIGHER_EDUCATION, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, +] + + +def search_scenario_id_fnc(val): + if isinstance(val, dict): + return str(flatten_dict(val, separator="|")) + +class TestOpportunityRouteSearch(BaseTestClass): + @pytest.fixture(scope="class") + def setup_search_data(self, opportunity_index, opportunity_index_alias, search_client): + # Load into the search index + schema = OpportunityV1Schema() + json_records = [schema.dump(opportunity) for opportunity in OPPORTUNITIES] + search_client.bulk_upsert(opportunity_index, json_records, "opportunity_id") -def test_opportunity_route_search_200(client, api_auth_token): - req = get_search_request() + # Swap the search index alias + search_client.swap_alias_index(opportunity_index, opportunity_index_alias) - resp = client.post("/v1/opportunities/search", json=req, headers={"X-Auth": api_auth_token}) + @pytest.mark.parametrize( + "search_request,expected_results", + [ + # Opportunity ID + ( + get_search_request( + page_size=25, + page_offset=1, + order_by="opportunity_id", + sort_direction=SortDirection.ASCENDING, + ), + OPPORTUNITIES, + ), + ( + get_search_request( + page_size=3, + page_offset=2, + order_by="opportunity_id", + sort_direction=SortDirection.ASCENDING, + ), + OPPORTUNITIES[3:6], + ), + ( + get_search_request( + page_size=25, + page_offset=1, + order_by="opportunity_id", + sort_direction=SortDirection.DESCENDING, + ), + OPPORTUNITIES[::-1], + ), + # Opportunity Number + ( + get_search_request( + page_size=3, + page_offset=1, + order_by="opportunity_number", + sort_direction=SortDirection.ASCENDING, + ), + [LOC_TEACHING, LOC_HIGHER_EDUCATION, DOC_MANUFACTURING], + ), + ( + get_search_request( + page_size=2, + page_offset=3, + order_by="opportunity_number", + sort_direction=SortDirection.DESCENDING, + ), + [NASA_K12_DIVERSITY, NASA_SPACE_FELLOWSHIP], + ), + # Opportunity Title + ( + get_search_request( + page_size=4, + page_offset=2, + order_by="opportunity_title", + sort_direction=SortDirection.ASCENDING, + ), + [NASA_SPACE_FELLOWSHIP, LOC_HIGHER_EDUCATION, DOC_SPACE_COAST, NASA_K12_DIVERSITY], + ), + ( + get_search_request( + page_size=5, + page_offset=1, + order_by="opportunity_title", + sort_direction=SortDirection.DESCENDING, + ), + [ + LOC_TEACHING, + NASA_K12_DIVERSITY, + DOC_SPACE_COAST, + LOC_HIGHER_EDUCATION, + NASA_SPACE_FELLOWSHIP, + ], + ), + # Post Date + ( + get_search_request( + page_size=2, + page_offset=1, + order_by="post_date", + sort_direction=SortDirection.ASCENDING, + ), + [DOC_MANUFACTURING, DOC_SPACE_COAST], + ), + ( + get_search_request( + page_size=3, + page_offset=1, + order_by="post_date", + sort_direction=SortDirection.DESCENDING, + ), + [LOC_TEACHING, DOS_DIGITAL_LITERACY, LOC_HIGHER_EDUCATION], + ), + ( + get_search_request( + page_size=3, + page_offset=12, + order_by="post_date", + sort_direction=SortDirection.DESCENDING, + ), + [], + ), + # Relevancy has a secondary sort of post date so should be identical. + ( + get_search_request( + page_size=2, + page_offset=1, + order_by="relevancy", + sort_direction=SortDirection.ASCENDING, + ), + [DOC_MANUFACTURING, DOC_SPACE_COAST], + ), + ( + get_search_request( + page_size=3, + page_offset=1, + order_by="relevancy", + sort_direction=SortDirection.DESCENDING, + ), + [LOC_TEACHING, DOS_DIGITAL_LITERACY, LOC_HIGHER_EDUCATION], + ), + ( + get_search_request( + page_size=3, + page_offset=12, + order_by="relevancy", + sort_direction=SortDirection.DESCENDING, + ), + [], + ), + # Close Date (note several have null values which always go to the end) + ( + get_search_request( + page_size=4, + page_offset=1, + order_by="close_date", + sort_direction=SortDirection.ASCENDING, + ), + [LOC_TEACHING, NASA_K12_DIVERSITY, DOC_SPACE_COAST, DOS_DIGITAL_LITERACY], + ), + ( + get_search_request( + page_size=3, + page_offset=1, + order_by="close_date", + sort_direction=SortDirection.DESCENDING, + ), + [DOC_MANUFACTURING, NASA_SUPERSONIC, NASA_SPACE_FELLOWSHIP], + ), + # close date - but check the end of the list to find the null values + ( + get_search_request( + page_size=5, + page_offset=2, + order_by="close_date", + sort_direction=SortDirection.ASCENDING, + ), + [NASA_SUPERSONIC, DOC_MANUFACTURING, NASA_INNOVATIONS, LOC_HIGHER_EDUCATION], + ), + # Agency + ( + get_search_request( + page_size=5, + page_offset=1, + order_by="agency_code", + sort_direction=SortDirection.ASCENDING, + ), + [ + DOC_SPACE_COAST, + DOC_MANUFACTURING, + DOS_DIGITAL_LITERACY, + LOC_TEACHING, + LOC_HIGHER_EDUCATION, + ], + ), + ( + get_search_request( + page_size=3, + page_offset=1, + order_by="agency_code", + sort_direction=SortDirection.DESCENDING, + ), + [NASA_SPACE_FELLOWSHIP, NASA_INNOVATIONS, NASA_SUPERSONIC], + ), + ], + ids=search_scenario_id_fnc, + ) + def test_sorting_and_pagination_200( + self, client, api_auth_token, setup_search_data, search_request, expected_results + ): + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + validate_search_response(resp, expected_results) - assert resp.status_code == 200 + @pytest.mark.parametrize( + "search_request, expected_results", + [ + # Agency + (get_search_request(agency_one_of=["not an agency"]), []), + ( + get_search_request(agency_one_of=["NASA"]), + [NASA_SPACE_FELLOWSHIP, NASA_INNOVATIONS, NASA_SUPERSONIC, NASA_K12_DIVERSITY], + ), + (get_search_request(agency_one_of=["LOC"]), [LOC_TEACHING, LOC_HIGHER_EDUCATION]), + (get_search_request(agency_one_of=["DOS-ECA"]), [DOS_DIGITAL_LITERACY]), + (get_search_request(agency_one_of=["DOC-EDA"]), [DOC_SPACE_COAST, DOC_MANUFACTURING]), + ( + get_search_request( + agency_one_of=["DOC-EDA", "NASA", "LOC", "DOS-ECA", "something else"] + ), + OPPORTUNITIES, + ), + # Opportunity Status + ( + get_search_request(opportunity_status_one_of=[OpportunityStatus.POSTED]), + [NASA_SPACE_FELLOWSHIP, LOC_TEACHING, DOC_MANUFACTURING], + ), + ( + get_search_request(opportunity_status_one_of=[OpportunityStatus.FORECASTED]), + [NASA_INNOVATIONS, LOC_HIGHER_EDUCATION], + ), + ( + get_search_request(opportunity_status_one_of=[OpportunityStatus.CLOSED]), + [NASA_SUPERSONIC, DOS_DIGITAL_LITERACY], + ), + ( + get_search_request(opportunity_status_one_of=[OpportunityStatus.ARCHIVED]), + [NASA_K12_DIVERSITY, DOC_SPACE_COAST], + ), + ( + get_search_request( + opportunity_status_one_of=[ + OpportunityStatus.POSTED, + OpportunityStatus.FORECASTED, + ] + ), + [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + LOC_TEACHING, + LOC_HIGHER_EDUCATION, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request( + opportunity_status_one_of=[ + OpportunityStatus.POSTED, + OpportunityStatus.FORECASTED, + OpportunityStatus.CLOSED, + OpportunityStatus.ARCHIVED, + ] + ), + OPPORTUNITIES, + ), + # Funding Instrument + ( + get_search_request( + funding_instrument_one_of=[FundingInstrument.COOPERATIVE_AGREEMENT] + ), + [ + NASA_SPACE_FELLOWSHIP, + NASA_K12_DIVERSITY, + LOC_TEACHING, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(funding_instrument_one_of=[FundingInstrument.GRANT]), + [ + NASA_INNOVATIONS, + NASA_SUPERSONIC, + LOC_HIGHER_EDUCATION, + DOC_SPACE_COAST, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request( + funding_instrument_one_of=[FundingInstrument.PROCUREMENT_CONTRACT] + ), + [], + ), + (get_search_request(funding_instrument_one_of=[FundingInstrument.OTHER]), []), + ( + get_search_request( + funding_instrument_one_of=[ + FundingInstrument.COOPERATIVE_AGREEMENT, + FundingInstrument.GRANT, + ] + ), + OPPORTUNITIES, + ), + # Funding Category + ( + get_search_request(funding_category_one_of=[FundingCategory.EDUCATION]), + [NASA_SPACE_FELLOWSHIP, NASA_K12_DIVERSITY, LOC_TEACHING], + ), + ( + get_search_request( + funding_category_one_of=[ + FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT + ] + ), + [NASA_INNOVATIONS, NASA_SUPERSONIC, DOC_MANUFACTURING], + ), + ( + get_search_request(funding_category_one_of=[FundingCategory.OTHER]), + [LOC_HIGHER_EDUCATION, DOS_DIGITAL_LITERACY, DOC_SPACE_COAST], + ), + ( + get_search_request(funding_category_one_of=[FundingCategory.REGIONAL_DEVELOPMENT]), + [DOC_SPACE_COAST], + ), + ( + get_search_request( + funding_category_one_of=[FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING] + ), + [DOC_MANUFACTURING], + ), + ( + get_search_request(funding_category_one_of=[FundingCategory.ENERGY]), + [DOC_MANUFACTURING], + ), + (get_search_request(funding_category_one_of=[FundingCategory.HOUSING]), []), + ( + get_search_request( + funding_category_one_of=[ + FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT, + FundingCategory.REGIONAL_DEVELOPMENT, + ] + ), + [NASA_INNOVATIONS, NASA_SUPERSONIC, DOC_SPACE_COAST, DOC_MANUFACTURING], + ), + # Applicant Type + ( + get_search_request(applicant_type_one_of=[ApplicantType.OTHER]), + [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + NASA_K12_DIVERSITY, + DOS_DIGITAL_LITERACY, + DOC_MANUFACTURING, + ], + ), + ( + get_search_request(applicant_type_one_of=[ApplicantType.UNRESTRICTED]), + [NASA_SUPERSONIC], + ), + ( + get_search_request(applicant_type_one_of=[ApplicantType.STATE_GOVERNMENTS]), + [LOC_TEACHING, DOC_SPACE_COAST], + ), + ( + get_search_request(applicant_type_one_of=[ApplicantType.COUNTY_GOVERNMENTS]), + [LOC_TEACHING, DOC_SPACE_COAST], + ), + ( + get_search_request( + applicant_type_one_of=[ + ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION + ] + ), + [LOC_HIGHER_EDUCATION, DOS_DIGITAL_LITERACY], + ), + (get_search_request(applicant_type_one_of=[ApplicantType.INDIVIDUALS]), []), + ( + get_search_request( + applicant_type_one_of=[ + ApplicantType.STATE_GOVERNMENTS, + ApplicantType.UNRESTRICTED, + ] + ), + [NASA_SUPERSONIC, LOC_TEACHING, DOC_SPACE_COAST], + ), + # Mix + ( + get_search_request( + agency_one_of=["NASA"], applicant_type_one_of=[ApplicantType.OTHER] + ), + [NASA_SPACE_FELLOWSHIP, NASA_INNOVATIONS, NASA_K12_DIVERSITY], + ), + ( + get_search_request( + funding_instrument_one_of=[ + FundingInstrument.GRANT, + FundingInstrument.PROCUREMENT_CONTRACT, + ], + funding_category_one_of=[ + FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT + ], + ), + [NASA_INNOVATIONS, NASA_SUPERSONIC, DOC_MANUFACTURING], + ), + ( + get_search_request( + opportunity_status_one_of=[OpportunityStatus.POSTED], + applicant_type_one_of=[ApplicantType.OTHER], + ), + [NASA_SPACE_FELLOWSHIP, DOC_MANUFACTURING], + ), + ], + ids=search_scenario_id_fnc, + ) + def test_search_filters_200( + self, client, api_auth_token, setup_search_data, search_request, expected_results + ): + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + validate_search_response(resp, expected_results) - # The endpoint meaningfully only returns the pagination params back - # at the moment, so just validate that for now. - resp_body = resp.get_json() - assert resp_body["pagination_info"]["page_offset"] == req["pagination"]["page_offset"] - assert resp_body["pagination_info"]["page_size"] == req["pagination"]["page_size"] - assert resp_body["pagination_info"]["sort_direction"] == req["pagination"]["sort_direction"] - assert resp_body["pagination_info"]["order_by"] == req["pagination"]["order_by"] - assert resp_body["pagination_info"]["total_records"] == 0 - assert resp_body["pagination_info"]["total_pages"] == 0 + @pytest.mark.parametrize( + "search_request, expected_results", + [ + # Note that the sorting is not relevancy for this as we intend to update the relevancy scores a bit + # and don't want to break this every time we adjust those. + ( + get_search_request( + order_by="opportunity_id", sort_direction=SortDirection.ASCENDING, query="space" + ), + [ + NASA_SPACE_FELLOWSHIP, + NASA_INNOVATIONS, + NASA_K12_DIVERSITY, + DOS_DIGITAL_LITERACY, + DOC_SPACE_COAST, + ], + ), + ( + get_search_request( + order_by="opportunity_id", + sort_direction=SortDirection.ASCENDING, + query="43.008", + ), + [NASA_SPACE_FELLOWSHIP, NASA_K12_DIVERSITY, LOC_TEACHING], + ), + ( + get_search_request( + order_by="opportunity_id", + sort_direction=SortDirection.ASCENDING, + query="012ADV*", + ), + [LOC_TEACHING, LOC_HIGHER_EDUCATION], + ), + ( + get_search_request( + order_by="opportunity_id", sort_direction=SortDirection.ASCENDING, query="DOC*" + ), + [DOC_SPACE_COAST, DOC_MANUFACTURING], + ), + ( + get_search_request( + order_by="opportunity_id", + sort_direction=SortDirection.ASCENDING, + query="Aeronautics", + ), + [NASA_SUPERSONIC], + ), + ( + get_search_request( + order_by="opportunity_id", + sort_direction=SortDirection.ASCENDING, + query="literacy", + ), + [LOC_TEACHING, DOS_DIGITAL_LITERACY], + ), + ], + ids=search_scenario_id_fnc, + ) + def test_search_query_200( + self, client, api_auth_token, setup_search_data, search_request, expected_results + ): + # This test isn't looking to validate opensearch behavior, just that we've connected fields properly and + # results being returned are as expected. + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + validate_search_response(resp, expected_results) From bba9a52dd4fb63c34b093b55ecbb2f4fc5e404b4 Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Thu, 30 May 2024 12:54:16 -0400 Subject: [PATCH 12/19] Docs and logging --- api/src/adapters/search/flask_opensearch.py | 16 +++++++++++++++- .../opportunities_v1/search_opportunities.py | 7 +++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/api/src/adapters/search/flask_opensearch.py b/api/src/adapters/search/flask_opensearch.py index 66ed31528..0fa195456 100644 --- a/api/src/adapters/search/flask_opensearch.py +++ b/api/src/adapters/search/flask_opensearch.py @@ -21,7 +21,21 @@ def get_search_client(app: Flask) -> SearchClient: def with_search_client() -> Callable[[Callable[Concatenate[SearchClient, P], T]], Callable[P, T]]: - # TODO docs + """ + Decorator for functions that need a search client. + + This decorator will return the shared search client object which + has an internal connection pool that is shared. + + Usage: + @with_search_client() + def foo(search_client: search.SearchClient): + ... + + @with_search_client() + def bar(search_client: search.SearchClient, x: int, y: int): + ... + """ def decorator(f: Callable[Concatenate[SearchClient, P], T]) -> Callable[P, T]: @wraps(f) diff --git a/api/src/services/opportunities_v1/search_opportunities.py b/api/src/services/opportunities_v1/search_opportunities.py index cd94c4c2e..92a71344c 100644 --- a/api/src/services/opportunities_v1/search_opportunities.py +++ b/api/src/services/opportunities_v1/search_opportunities.py @@ -121,10 +121,13 @@ def search_opportunities( search_request = _get_search_request(search_params) - response = search_client.search( - get_search_config().opportunity_search_index_alias, search_request + index_alias = get_search_config().opportunity_search_index_alias + logger.info( + "Querying search index alias %s", index_alias, extra={"search_index_alias": index_alias} ) + response = search_client.search(index_alias, search_request) + pagination_info = PaginationInfo( page_offset=search_params.pagination.page_offset, page_size=search_params.pagination.page_size, From 3b9fec930a8228d89d0caadd5b266e0b88f0ede0 Mon Sep 17 00:00:00 2001 From: nava-platform-bot Date: Thu, 30 May 2024 17:23:13 +0000 Subject: [PATCH 13/19] Update OpenAPI spec --- api/openapi.generated.yml | 1121 +++++++++++++++++++++++++++++-------- 1 file changed, 878 insertions(+), 243 deletions(-) diff --git a/api/openapi.generated.yml b/api/openapi.generated.yml index 7302ded44..ae109a5eb 100644 --- a/api/openapi.generated.yml +++ b/api/openapi.generated.yml @@ -37,26 +37,36 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/Healthcheck' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id001 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: &id002 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Successful response '503': content: @@ -65,24 +75,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id001 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id002 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Service Unavailable tags: - Health @@ -94,7 +112,9 @@ paths: name: FF-Enable-Opportunity-Log-Msg description: Whether to log a message in the opportunity endpoint schema: - type: boolean + type: + - boolean + - 'null' required: false responses: '200': @@ -104,28 +124,38 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: type: array items: $ref: '#/components/schemas/OpportunityV0' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id003 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: &id004 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Successful response '422': content: @@ -134,24 +164,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id003 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id004 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Validation error '401': content: @@ -160,24 +198,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id003 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id004 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Authentication error tags: - Opportunity v0 @@ -216,28 +262,36 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: - type: array - items: - $ref: '#/components/schemas/OpportunityV1' + $ref: '#/components/schemas/OpportunitySearchResponseV1' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id005 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: &id006 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Successful response '422': content: @@ -246,24 +300,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id005 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id006 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Validation error '401': content: @@ -272,24 +334,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id005 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id006 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Authentication error tags: - Opportunity v1 @@ -315,6 +385,61 @@ paths: application/json: schema: $ref: '#/components/schemas/OpportunitySearchRequestV1' + examples: + example1: + summary: No filters + value: + pagination: + order_by: opportunity_id + page_offset: 1 + page_size: 25 + sort_direction: ascending + example2: + summary: All filters + value: + query: research + filters: + agency: + one_of: + - USAID + - ARPAH + applicant_type: + one_of: + - state_governments + - county_governments + - individuals + funding_category: + one_of: + - recovery_act + - arts + - natural_resources + funding_instrument: + one_of: + - cooperative_agreement + - grant + opportunity_status: + one_of: + - forecasted + - posted + pagination: + order_by: opportunity_id + page_offset: 1 + page_size: 25 + sort_direction: descending + example3: + summary: Query & opportunity_status filters + value: + query: research + filters: + opportunity_status: + one_of: + - forecasted + - posted + pagination: + order_by: opportunity_id + page_offset: 1 + page_size: 25 + sort_direction: descending security: - ApiKeyAuth: [] /v0.1/opportunities/search: @@ -328,28 +453,38 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: type: array items: $ref: '#/components/schemas/OpportunityV01' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id007 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: &id008 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Successful response '422': content: @@ -358,24 +493,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id007 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id008 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Validation error '401': content: @@ -384,24 +527,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id007 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id008 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Authentication error tags: - Opportunity v0.1 @@ -486,26 +637,36 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/OpportunityV0' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id009 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: &id010 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Successful response '401': content: @@ -514,24 +675,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id009 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id010 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Authentication error '404': content: @@ -540,24 +709,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id009 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id010 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Not found tags: - Opportunity v0 @@ -596,26 +773,36 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/OpportunityV1' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id011 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: &id012 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Successful response '401': content: @@ -624,24 +811,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id011 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id012 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Authentication error '404': content: @@ -650,24 +845,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id011 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id012 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Not found tags: - Opportunity v1 @@ -706,26 +909,36 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/OpportunityV01' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id013 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: &id014 - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Successful response '401': content: @@ -734,24 +947,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id013 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id014 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Authentication error '404': content: @@ -760,24 +981,32 @@ paths: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id013 - allOf: + anyOf: - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' warnings: - type: array + type: + - array + - 'null' items: type: *id014 - allOf: + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' description: Not found tags: - Opportunity v0.1 @@ -807,23 +1036,33 @@ components: type: object properties: page_offset: - type: integer + type: + - integer + - 'null' description: The page number that was fetched example: 1 page_size: - type: integer + type: + - integer + - 'null' description: The size of the page fetched example: 25 total_records: - type: integer + type: + - integer + - 'null' description: The total number of records fetchable example: 42 total_pages: - type: integer + type: + - integer + - 'null' description: The total number of pages that can be fetched example: 2 order_by: - type: string + type: + - string + - 'null' description: The field that the records were sorted by example: id sort_direction: @@ -833,41 +1072,61 @@ components: - descending type: - string + - 'null' + - 'null' ValidationIssue: type: object properties: type: - type: string + type: + - string + - 'null' description: The type of error message: - type: string + type: + - string + - 'null' description: The message to return field: - type: string + type: + - string + - 'null' description: The field that failed Healthcheck: type: object properties: message: - type: string + type: + - string + - 'null' ErrorResponse: type: object properties: message: - type: string + type: + - string + - 'null' description: The message to return data: description: The REST resource object + type: + - 'null' status_code: - type: integer + type: + - integer + - 'null' description: The HTTP status code errors: - type: array + type: + - array + - 'null' items: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ValidationIssue' + - type: 'null' OpportunitySorting: type: object properties: @@ -910,7 +1169,9 @@ components: type: object properties: opportunity_title: - type: string + type: + - string + - 'null' description: The title of the opportunity to search for example: research category: @@ -925,6 +1186,8 @@ components: - O type: - string + - 'null' + - 'null' sorting: type: - object @@ -942,20 +1205,28 @@ components: type: object properties: opportunity_id: - type: integer + type: + - integer + - 'null' readOnly: true description: The internal ID of the opportunity example: 12345 opportunity_number: - type: string + type: + - string + - 'null' description: The funding opportunity number example: ABC-123-XYZ-001 opportunity_title: - type: string + type: + - string + - 'null' description: The title of the opportunity example: Research into conservation techniques agency: - type: string + type: + - string + - 'null' description: The agency who created the opportunity example: US-ABC category: @@ -970,32 +1241,46 @@ components: - O type: - string + - 'null' + - 'null' category_explanation: - type: string + type: + - string + - 'null' description: Explanation of the category when the category is 'O' (other) example: null revision_number: - type: integer + type: + - integer + - 'null' description: The current revision number of the opportunity, counting starts at 0 example: 0 modified_comments: - type: string + type: + - string + - 'null' description: Details regarding what modification was last made example: null created_at: - type: string + type: + - string + - 'null' format: date-time readOnly: true updated_at: - type: string + type: + - string + - 'null' format: date-time readOnly: true FundingInstrumentFilterV1: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1005,11 +1290,15 @@ components: - other type: - string + - 'null' + - 'null' FundingCategoryFilterV1: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1041,11 +1330,15 @@ components: - other type: - string + - 'null' + - 'null' ApplicantTypeFilterV1: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1068,11 +1361,15 @@ components: - unrestricted type: - string + - 'null' + - 'null' OpportunityStatusFilterV1: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1082,50 +1379,67 @@ components: - archived type: - string + - 'null' + - 'null' AgencyFilterV1: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: - type: string + type: + - string + - 'null' minLength: 2 - example: US-ABC + example: USAID OpportunitySearchFilterV1: type: object properties: funding_instrument: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/FundingInstrumentFilterV1' + - type: 'null' funding_category: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/FundingCategoryFilterV1' + - type: 'null' applicant_type: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ApplicantTypeFilterV1' + - type: 'null' opportunity_status: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunityStatusFilterV1' + - type: 'null' agency: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/AgencyFilterV1' - OpportunityPagination: + - type: 'null' + OpportunityPaginationV1: type: object properties: order_by: type: string enum: + - relevancy - opportunity_id - opportunity_number - opportunity_title @@ -1159,7 +1473,9 @@ components: type: object properties: query: - type: string + type: + - string + - 'null' minLength: 1 maxLength: 100 description: Query string which searches against several text fields @@ -1167,25 +1483,31 @@ components: filters: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunitySearchFilterV1' + - type: 'null' pagination: type: - object allOf: - - $ref: '#/components/schemas/OpportunityPagination' + - $ref: '#/components/schemas/OpportunityPaginationV1' required: - pagination OpportunityAssistanceListingV1: type: object properties: program_title: - type: string + type: + - string + - 'null' description: The name of the program, see https://sam.gov/content/assistance-listings for more detail example: Space Technology assistance_listing_number: - type: string + type: + - string + - 'null' description: The assistance listing number, see https://sam.gov/content/assistance-listings for more detail example: '43.012' @@ -1193,123 +1515,179 @@ components: type: object properties: summary_description: - type: string + type: + - string + - 'null' description: The summary of the opportunity example: This opportunity aims to unravel the mysteries of the universe. is_cost_sharing: - type: boolean + type: + - boolean + - 'null' description: Whether or not the opportunity has a cost sharing/matching requirement is_forecast: - type: boolean + type: + - boolean + - 'null' description: Whether the opportunity is forecasted, that is, the information is only an estimate and not yet official example: false close_date: - type: string + type: + - string + - 'null' format: date description: The date that the opportunity will close - only set if is_forecast=False close_date_description: - type: string + type: + - string + - 'null' description: Optional details regarding the close date example: Proposals are due earlier than usual. post_date: - type: string + type: + - string + - 'null' format: date description: The date the opportunity was posted archive_date: - type: string + type: + - string + - 'null' format: date description: When the opportunity will be archived expected_number_of_awards: - type: integer + type: + - integer + - 'null' description: The number of awards the opportunity is expected to award example: 10 estimated_total_program_funding: - type: integer + type: + - integer + - 'null' description: The total program funding of the opportunity in US Dollars example: 10000000 award_floor: - type: integer + type: + - integer + - 'null' description: The minimum amount an opportunity would award example: 10000 award_ceiling: - type: integer + type: + - integer + - 'null' description: The maximum amount an opportunity would award example: 100000 additional_info_url: - type: string + type: + - string + - 'null' description: A URL to a website that can provide additional information about the opportunity example: grants.gov additional_info_url_description: - type: string + type: + - string + - 'null' description: The text to display for the additional_info_url link example: Click me for more info forecasted_post_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the opportunity is expected to be posted, and transition out of being a forecast forecasted_close_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the opportunity is expected to be close once posted. forecasted_close_date_description: - type: string + type: + - string + - 'null' description: Forecasted opportunity only. Optional details regarding the forecasted closed date. example: Proposals will probably be due on this date forecasted_award_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the grantor plans to award the opportunity. forecasted_project_start_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the grantor expects the award recipient should start their project fiscal_year: - type: integer + type: + - integer + - 'null' description: Forecasted opportunity only. The fiscal year the project is expected to be funded and launched funding_category_description: - type: string + type: + - string + - 'null' description: Additional information about the funding category example: Economic Support applicant_eligibility_description: - type: string + type: + - string + - 'null' description: Additional information about the types of applicants that are eligible example: All types of domestic applicants are eligible to apply agency_code: - type: string + type: + - string + - 'null' description: The agency who owns the opportunity example: US-ABC agency_name: - type: string + type: + - string + - 'null' description: The name of the agency who owns the opportunity example: US Alphabetical Basic Corp agency_phone_number: - type: string + type: + - string + - 'null' description: The phone number of the agency who owns the opportunity example: 123-456-7890 agency_contact_description: - type: string + type: + - string + - 'null' description: Information regarding contacting the agency who owns the opportunity example: For more information, reach out to Jane Smith at agency US-ABC agency_email_address: - type: string + type: + - string + - 'null' description: The contact email of the agency who owns the opportunity example: fake_email@grants.gov agency_email_address_description: - type: string + type: + - string + - 'null' description: The text for the link to the agency email address example: Click me to email the agency funding_instruments: - type: array + type: + - array + - 'null' items: enum: - cooperative_agreement @@ -1318,8 +1696,12 @@ components: - other type: - string + - 'null' + - 'null' funding_categories: - type: array + type: + - array + - 'null' items: enum: - recovery_act @@ -1350,8 +1732,12 @@ components: - other type: - string + - 'null' + - 'null' applicant_types: - type: array + type: + - array + - 'null' items: enum: - state_governments @@ -1373,24 +1759,33 @@ components: - unrestricted type: - string + - 'null' + - 'null' OpportunityV1: type: object properties: opportunity_id: - type: integer - readOnly: true + type: + - integer + - 'null' description: The internal ID of the opportunity example: 12345 opportunity_number: - type: string + type: + - string + - 'null' description: The funding opportunity number example: ABC-123-XYZ-001 opportunity_title: - type: string + type: + - string + - 'null' description: The title of the opportunity example: Research into conservation techniques agency: - type: string + type: + - string + - 'null' description: The agency who created the opportunity example: US-ABC category: @@ -1405,22 +1800,32 @@ components: - other type: - string + - 'null' + - 'null' category_explanation: - type: string + type: + - string + - 'null' description: Explanation of the category when the category is 'O' (other) example: null opportunity_assistance_listings: - type: array + type: + - array + - 'null' items: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunityAssistanceListingV1' + - type: 'null' summary: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunitySummaryV1' + - type: 'null' opportunity_status: description: The current status of the opportunity example: !!python/object/apply:src.constants.lookup_constants.OpportunityStatus @@ -1432,19 +1837,123 @@ components: - archived type: - string + - 'null' + - 'null' created_at: - type: string + type: + - string + - 'null' format: date-time readOnly: true updated_at: - type: string + type: + - string + - 'null' format: date-time readOnly: true + OpportunityFacetV1: + type: object + properties: + opportunity_status: + type: + - object + - 'null' + description: The counts of opportunity_status values in the full response + example: + posted: 1 + forecasted: 2 + additionalProperties: + type: + - integer + - 'null' + applicant_type: + type: + - object + - 'null' + description: The counts of applicant_type values in the full response + example: + state_governments: 3 + county_governments: 2 + city_or_township_governments: 1 + additionalProperties: + type: + - integer + - 'null' + funding_instrument: + type: + - object + - 'null' + description: The counts of funding_instrument values in the full response + example: + cooperative_agreement: 4 + grant: 3 + additionalProperties: + type: + - integer + - 'null' + funding_category: + type: + - object + - 'null' + description: The counts of funding_category values in the full response + example: + recovery_act: 2 + arts: 3 + agriculture: 5 + additionalProperties: + type: + - integer + - 'null' + agency: + type: + - object + - 'null' + description: The counts of agency values in the full response + example: + USAID: 4 + ARPAH: 3 + additionalProperties: + type: + - integer + - 'null' + OpportunitySearchResponseV1: + type: object + properties: + opportunities: + type: + - array + - 'null' + description: The opportunity records + items: + type: + - object + - 'null' + anyOf: + - $ref: '#/components/schemas/OpportunityV1' + - type: 'null' + facet_counts: + description: Counts of filter/facet values in the full response + type: + - object + - 'null' + anyOf: + - $ref: '#/components/schemas/OpportunityFacetV1' + - type: 'null' + pagination_info: + description: The pagination information for the search response + type: + - object + - 'null' + anyOf: + - $ref: '#/components/schemas/PaginationInfo' + - type: 'null' FundingInstrumentFilterV01: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1454,11 +1963,15 @@ components: - other type: - string + - 'null' + - 'null' FundingCategoryFilterV01: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1490,11 +2003,15 @@ components: - other type: - string + - 'null' + - 'null' ApplicantTypeFilterV01: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1517,11 +2034,15 @@ components: - unrestricted type: - string + - 'null' + - 'null' OpportunityStatusFilterV01: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: enum: @@ -1531,14 +2052,20 @@ components: - archived type: - string + - 'null' + - 'null' AgencyFilterV01: type: object properties: one_of: - type: array + type: + - array + - 'null' minItems: 1 items: - type: string + type: + - string + - 'null' minLength: 2 example: US-ABC OpportunitySearchFilterV01: @@ -1547,29 +2074,39 @@ components: funding_instrument: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/FundingInstrumentFilterV01' + - type: 'null' funding_category: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/FundingCategoryFilterV01' + - type: 'null' applicant_type: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/ApplicantTypeFilterV01' + - type: 'null' opportunity_status: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunityStatusFilterV01' + - type: 'null' agency: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/AgencyFilterV01' - OpportunityPagination1: + - type: 'null' + OpportunityPagination: type: object properties: order_by: @@ -1608,7 +2145,9 @@ components: type: object properties: query: - type: string + type: + - string + - 'null' minLength: 1 maxLength: 100 description: Query string which searches against several text fields @@ -1616,25 +2155,31 @@ components: filters: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunitySearchFilterV01' + - type: 'null' pagination: type: - object allOf: - - $ref: '#/components/schemas/OpportunityPagination1' + - $ref: '#/components/schemas/OpportunityPagination' required: - pagination OpportunityAssistanceListingV01: type: object properties: program_title: - type: string + type: + - string + - 'null' description: The name of the program, see https://sam.gov/content/assistance-listings for more detail example: Space Technology assistance_listing_number: - type: string + type: + - string + - 'null' description: The assistance listing number, see https://sam.gov/content/assistance-listings for more detail example: '43.012' @@ -1642,123 +2187,179 @@ components: type: object properties: summary_description: - type: string + type: + - string + - 'null' description: The summary of the opportunity example: This opportunity aims to unravel the mysteries of the universe. is_cost_sharing: - type: boolean + type: + - boolean + - 'null' description: Whether or not the opportunity has a cost sharing/matching requirement is_forecast: - type: boolean + type: + - boolean + - 'null' description: Whether the opportunity is forecasted, that is, the information is only an estimate and not yet official example: false close_date: - type: string + type: + - string + - 'null' format: date description: The date that the opportunity will close - only set if is_forecast=False close_date_description: - type: string + type: + - string + - 'null' description: Optional details regarding the close date example: Proposals are due earlier than usual. post_date: - type: string + type: + - string + - 'null' format: date description: The date the opportunity was posted archive_date: - type: string + type: + - string + - 'null' format: date description: When the opportunity will be archived expected_number_of_awards: - type: integer + type: + - integer + - 'null' description: The number of awards the opportunity is expected to award example: 10 estimated_total_program_funding: - type: integer + type: + - integer + - 'null' description: The total program funding of the opportunity in US Dollars example: 10000000 award_floor: - type: integer + type: + - integer + - 'null' description: The minimum amount an opportunity would award example: 10000 award_ceiling: - type: integer + type: + - integer + - 'null' description: The maximum amount an opportunity would award example: 100000 additional_info_url: - type: string + type: + - string + - 'null' description: A URL to a website that can provide additional information about the opportunity example: grants.gov additional_info_url_description: - type: string + type: + - string + - 'null' description: The text to display for the additional_info_url link example: Click me for more info forecasted_post_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the opportunity is expected to be posted, and transition out of being a forecast forecasted_close_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the opportunity is expected to be close once posted. forecasted_close_date_description: - type: string + type: + - string + - 'null' description: Forecasted opportunity only. Optional details regarding the forecasted closed date. example: Proposals will probably be due on this date forecasted_award_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the grantor plans to award the opportunity. forecasted_project_start_date: - type: string + type: + - string + - 'null' format: date description: Forecasted opportunity only. The date the grantor expects the award recipient should start their project fiscal_year: - type: integer + type: + - integer + - 'null' description: Forecasted opportunity only. The fiscal year the project is expected to be funded and launched funding_category_description: - type: string + type: + - string + - 'null' description: Additional information about the funding category example: Economic Support applicant_eligibility_description: - type: string + type: + - string + - 'null' description: Additional information about the types of applicants that are eligible example: All types of domestic applicants are eligible to apply agency_code: - type: string + type: + - string + - 'null' description: The agency who owns the opportunity example: US-ABC agency_name: - type: string + type: + - string + - 'null' description: The name of the agency who owns the opportunity example: US Alphabetical Basic Corp agency_phone_number: - type: string + type: + - string + - 'null' description: The phone number of the agency who owns the opportunity example: 123-456-7890 agency_contact_description: - type: string + type: + - string + - 'null' description: Information regarding contacting the agency who owns the opportunity example: For more information, reach out to Jane Smith at agency US-ABC agency_email_address: - type: string + type: + - string + - 'null' description: The contact email of the agency who owns the opportunity example: fake_email@grants.gov agency_email_address_description: - type: string + type: + - string + - 'null' description: The text for the link to the agency email address example: Click me to email the agency funding_instruments: - type: array + type: + - array + - 'null' items: enum: - cooperative_agreement @@ -1767,8 +2368,12 @@ components: - other type: - string + - 'null' + - 'null' funding_categories: - type: array + type: + - array + - 'null' items: enum: - recovery_act @@ -1799,8 +2404,12 @@ components: - other type: - string + - 'null' + - 'null' applicant_types: - type: array + type: + - array + - 'null' items: enum: - state_governments @@ -1822,24 +2431,34 @@ components: - unrestricted type: - string + - 'null' + - 'null' OpportunityV01: type: object properties: opportunity_id: - type: integer + type: + - integer + - 'null' readOnly: true description: The internal ID of the opportunity example: 12345 opportunity_number: - type: string + type: + - string + - 'null' description: The funding opportunity number example: ABC-123-XYZ-001 opportunity_title: - type: string + type: + - string + - 'null' description: The title of the opportunity example: Research into conservation techniques agency: - type: string + type: + - string + - 'null' description: The agency who created the opportunity example: US-ABC category: @@ -1854,22 +2473,32 @@ components: - other type: - string + - 'null' + - 'null' category_explanation: - type: string + type: + - string + - 'null' description: Explanation of the category when the category is 'O' (other) example: null opportunity_assistance_listings: - type: array + type: + - array + - 'null' items: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunityAssistanceListingV01' + - type: 'null' summary: type: - object - allOf: + - 'null' + anyOf: - $ref: '#/components/schemas/OpportunitySummaryV01' + - type: 'null' opportunity_status: description: The current status of the opportunity example: !!python/object/apply:src.constants.lookup_constants.OpportunityStatus @@ -1881,12 +2510,18 @@ components: - archived type: - string + - 'null' + - 'null' created_at: - type: string + type: + - string + - 'null' format: date-time readOnly: true updated_at: - type: string + type: + - string + - 'null' format: date-time readOnly: true securitySchemes: From 01a5bc080dff48db4b4092a5abe0d20da016a66b Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Thu, 30 May 2024 15:55:28 -0400 Subject: [PATCH 14/19] Adjust the allow_none logic --- .../opportunities_v1/opportunity_schemas.py | 97 +++++++++++++------ .../api/schemas/extension/schema_fields.py | 14 +-- 2 files changed, 66 insertions(+), 45 deletions(-) diff --git a/api/src/api/opportunities_v1/opportunity_schemas.py b/api/src/api/opportunities_v1/opportunity_schemas.py index 3bc10ef5b..db0cbd539 100644 --- a/api/src/api/opportunities_v1/opportunity_schemas.py +++ b/api/src/api/opportunities_v1/opportunity_schemas.py @@ -12,15 +12,17 @@ class OpportunitySummaryV1Schema(Schema): summary_description = fields.String( + allow_none=True, metadata={ "description": "The summary of the opportunity", "example": "This opportunity aims to unravel the mysteries of the universe.", - } + }, ) is_cost_sharing = fields.Boolean( + allow_none=True, metadata={ "description": "Whether or not the opportunity has a cost sharing/matching requirement", - } + }, ) is_forecast = fields.Boolean( metadata={ @@ -30,147 +32,171 @@ class OpportunitySummaryV1Schema(Schema): ) close_date = fields.Date( + allow_none=True, metadata={ "description": "The date that the opportunity will close - only set if is_forecast=False", - } + }, ) close_date_description = fields.String( + allow_none=True, metadata={ "description": "Optional details regarding the close date", "example": "Proposals are due earlier than usual.", - } + }, ) post_date = fields.Date( + allow_none=True, metadata={ "description": "The date the opportunity was posted", - } + }, ) archive_date = fields.Date( + allow_none=True, metadata={ "description": "When the opportunity will be archived", - } + }, ) # not including unarchive date at the moment expected_number_of_awards = fields.Integer( + allow_none=True, metadata={ "description": "The number of awards the opportunity is expected to award", "example": 10, - } + }, ) estimated_total_program_funding = fields.Integer( + allow_none=True, metadata={ "description": "The total program funding of the opportunity in US Dollars", "example": 10_000_000, - } + }, ) award_floor = fields.Integer( + allow_none=True, metadata={ "description": "The minimum amount an opportunity would award", "example": 10_000, - } + }, ) award_ceiling = fields.Integer( + allow_none=True, metadata={ "description": "The maximum amount an opportunity would award", "example": 100_000, - } + }, ) additional_info_url = fields.String( + allow_none=True, metadata={ "description": "A URL to a website that can provide additional information about the opportunity", "example": "grants.gov", - } + }, ) additional_info_url_description = fields.String( + allow_none=True, metadata={ "description": "The text to display for the additional_info_url link", "example": "Click me for more info", - } + }, ) forecasted_post_date = fields.Date( + allow_none=True, metadata={ "description": "Forecasted opportunity only. The date the opportunity is expected to be posted, and transition out of being a forecast" - } + }, ) forecasted_close_date = fields.Date( + allow_none=True, metadata={ "description": "Forecasted opportunity only. The date the opportunity is expected to be close once posted." - } + }, ) forecasted_close_date_description = fields.String( + allow_none=True, metadata={ "description": "Forecasted opportunity only. Optional details regarding the forecasted closed date.", "example": "Proposals will probably be due on this date", - } + }, ) forecasted_award_date = fields.Date( + allow_none=True, metadata={ "description": "Forecasted opportunity only. The date the grantor plans to award the opportunity." - } + }, ) forecasted_project_start_date = fields.Date( + allow_none=True, metadata={ "description": "Forecasted opportunity only. The date the grantor expects the award recipient should start their project" - } + }, ) fiscal_year = fields.Integer( + allow_none=True, metadata={ "description": "Forecasted opportunity only. The fiscal year the project is expected to be funded and launched" - } + }, ) funding_category_description = fields.String( + allow_none=True, metadata={ "description": "Additional information about the funding category", "example": "Economic Support", - } + }, ) applicant_eligibility_description = fields.String( + allow_none=True, metadata={ "description": "Additional information about the types of applicants that are eligible", "example": "All types of domestic applicants are eligible to apply", - } + }, ) agency_code = fields.String( + allow_none=True, metadata={ "description": "The agency who owns the opportunity", "example": "US-ABC", - } + }, ) agency_name = fields.String( + allow_none=True, metadata={ "description": "The name of the agency who owns the opportunity", "example": "US Alphabetical Basic Corp", - } + }, ) agency_phone_number = fields.String( + allow_none=True, metadata={ "description": "The phone number of the agency who owns the opportunity", "example": "123-456-7890", - } + }, ) agency_contact_description = fields.String( + allow_none=True, metadata={ "description": "Information regarding contacting the agency who owns the opportunity", "example": "For more information, reach out to Jane Smith at agency US-ABC", - } + }, ) agency_email_address = fields.String( + allow_none=True, metadata={ "description": "The contact email of the agency who owns the opportunity", "example": "fake_email@grants.gov", - } + }, ) agency_email_address_description = fields.String( + allow_none=True, metadata={ "description": "The text for the link to the agency email address", "example": "Click me to email the agency", - } + }, ) funding_instruments = fields.List(fields.Enum(FundingInstrument)) @@ -180,16 +206,18 @@ class OpportunitySummaryV1Schema(Schema): class OpportunityAssistanceListingV1Schema(Schema): program_title = fields.String( + allow_none=True, metadata={ "description": "The name of the program, see https://sam.gov/content/assistance-listings for more detail", "example": "Space Technology", - } + }, ) assistance_listing_number = fields.String( + allow_none=True, metadata={ "description": "The assistance listing number, see https://sam.gov/content/assistance-listings for more detail", "example": "43.012", - } + }, ) @@ -199,30 +227,35 @@ class OpportunityV1Schema(Schema): ) opportunity_number = fields.String( - metadata={"description": "The funding opportunity number", "example": "ABC-123-XYZ-001"} + allow_none=True, + metadata={"description": "The funding opportunity number", "example": "ABC-123-XYZ-001"}, ) opportunity_title = fields.String( + allow_none=True, metadata={ "description": "The title of the opportunity", "example": "Research into conservation techniques", - } + }, ) agency = fields.String( - metadata={"description": "The agency who created the opportunity", "example": "US-ABC"} + allow_none=True, + metadata={"description": "The agency who created the opportunity", "example": "US-ABC"}, ) category = fields.Enum( OpportunityCategory, + allow_none=True, metadata={ "description": "The opportunity category", "example": OpportunityCategory.DISCRETIONARY, }, ) category_explanation = fields.String( + allow_none=True, metadata={ "description": "Explanation of the category when the category is 'O' (other)", "example": None, - } + }, ) opportunity_assistance_listings = fields.List( diff --git a/api/src/api/schemas/extension/schema_fields.py b/api/src/api/schemas/extension/schema_fields.py index d4678f69d..8431c3ecd 100644 --- a/api/src/api/schemas/extension/schema_fields.py +++ b/api/src/api/schemas/extension/schema_fields.py @@ -39,19 +39,7 @@ class MixinField(original_fields.Field): } def __init__(self, **kwargs: typing.Any) -> None: - # By default, make it so if a field is required, null isn't allowed - # otherwise null is allowed. You can modify this behavior (ie. required, and none allowed) - # by explicitly setting the allow_none field. - is_required = kwargs.get("required", None) - allow_none = kwargs.pop("allow_none", None) - - if allow_none is None: - if is_required: - allow_none = False - else: - allow_none = True - - super().__init__(allow_none=allow_none, **kwargs) + super().__init__(**kwargs) # The actual error mapping used for a specific instance self._error_mapping: dict[str, MarshmallowErrorContainer] = {} From 3d933e867f9f6facb35d30e8659b4be5138d7602 Mon Sep 17 00:00:00 2001 From: nava-platform-bot Date: Thu, 30 May 2024 19:58:40 +0000 Subject: [PATCH 15/19] Update OpenAPI spec --- api/openapi.generated.yml | 880 ++++++++++---------------------------- 1 file changed, 216 insertions(+), 664 deletions(-) diff --git a/api/openapi.generated.yml b/api/openapi.generated.yml index ae109a5eb..5c2b505e8 100644 --- a/api/openapi.generated.yml +++ b/api/openapi.generated.yml @@ -37,36 +37,26 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/Healthcheck' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id001 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: &id002 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Successful response '503': content: @@ -75,32 +65,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id001 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id002 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Service Unavailable tags: - Health @@ -112,9 +94,7 @@ paths: name: FF-Enable-Opportunity-Log-Msg description: Whether to log a message in the opportunity endpoint schema: - type: - - boolean - - 'null' + type: boolean required: false responses: '200': @@ -124,38 +104,28 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: type: array items: $ref: '#/components/schemas/OpportunityV0' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id003 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: &id004 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Successful response '422': content: @@ -164,32 +134,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id003 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id004 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Validation error '401': content: @@ -198,32 +160,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id003 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id004 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Authentication error tags: - Opportunity v0 @@ -262,36 +216,26 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/OpportunitySearchResponseV1' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id005 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: &id006 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Successful response '422': content: @@ -300,32 +244,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id005 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id006 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Validation error '401': content: @@ -334,32 +270,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id005 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id006 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Authentication error tags: - Opportunity v1 @@ -453,38 +381,28 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: type: array items: $ref: '#/components/schemas/OpportunityV01' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id007 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: &id008 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Successful response '422': content: @@ -493,32 +411,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id007 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id008 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Validation error '401': content: @@ -527,32 +437,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id007 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id008 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Authentication error tags: - Opportunity v0.1 @@ -637,36 +539,26 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/OpportunityV0' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id009 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: &id010 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Successful response '401': content: @@ -675,32 +567,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id009 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id010 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Authentication error '404': content: @@ -709,32 +593,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id009 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id010 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Not found tags: - Opportunity v0 @@ -773,36 +649,26 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/OpportunityV1' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id011 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: &id012 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Successful response '401': content: @@ -811,32 +677,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id011 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id012 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Authentication error '404': content: @@ -845,32 +703,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id011 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id012 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Not found tags: - Opportunity v1 @@ -909,36 +759,26 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/OpportunityV01' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: &id013 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: &id014 - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Successful response '401': content: @@ -947,32 +787,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id013 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id014 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Authentication error '404': content: @@ -981,32 +813,24 @@ paths: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: $ref: '#/components/schemas/ErrorResponse' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code pagination_info: description: The pagination information for paginated endpoints type: *id013 - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' warnings: - type: - - array - - 'null' + type: array items: type: *id014 - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' description: Not found tags: - Opportunity v0.1 @@ -1036,33 +860,23 @@ components: type: object properties: page_offset: - type: - - integer - - 'null' + type: integer description: The page number that was fetched example: 1 page_size: - type: - - integer - - 'null' + type: integer description: The size of the page fetched example: 25 total_records: - type: - - integer - - 'null' + type: integer description: The total number of records fetchable example: 42 total_pages: - type: - - integer - - 'null' + type: integer description: The total number of pages that can be fetched example: 2 order_by: - type: - - string - - 'null' + type: string description: The field that the records were sorted by example: id sort_direction: @@ -1072,61 +886,41 @@ components: - descending type: - string - - 'null' - - 'null' ValidationIssue: type: object properties: type: - type: - - string - - 'null' + type: string description: The type of error message: - type: - - string - - 'null' + type: string description: The message to return field: - type: - - string - - 'null' + type: string description: The field that failed Healthcheck: type: object properties: message: - type: - - string - - 'null' + type: string ErrorResponse: type: object properties: message: - type: - - string - - 'null' + type: string description: The message to return data: description: The REST resource object - type: - - 'null' status_code: - type: - - integer - - 'null' + type: integer description: The HTTP status code errors: - type: - - array - - 'null' + type: array items: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ValidationIssue' - - type: 'null' OpportunitySorting: type: object properties: @@ -1169,9 +963,7 @@ components: type: object properties: opportunity_title: - type: - - string - - 'null' + type: string description: The title of the opportunity to search for example: research category: @@ -1186,8 +978,6 @@ components: - O type: - string - - 'null' - - 'null' sorting: type: - object @@ -1205,28 +995,20 @@ components: type: object properties: opportunity_id: - type: - - integer - - 'null' + type: integer readOnly: true description: The internal ID of the opportunity example: 12345 opportunity_number: - type: - - string - - 'null' + type: string description: The funding opportunity number example: ABC-123-XYZ-001 opportunity_title: - type: - - string - - 'null' + type: string description: The title of the opportunity example: Research into conservation techniques agency: - type: - - string - - 'null' + type: string description: The agency who created the opportunity example: US-ABC category: @@ -1241,46 +1023,32 @@ components: - O type: - string - - 'null' - - 'null' category_explanation: - type: - - string - - 'null' + type: string description: Explanation of the category when the category is 'O' (other) example: null revision_number: - type: - - integer - - 'null' + type: integer description: The current revision number of the opportunity, counting starts at 0 example: 0 modified_comments: - type: - - string - - 'null' + type: string description: Details regarding what modification was last made example: null created_at: - type: - - string - - 'null' + type: string format: date-time readOnly: true updated_at: - type: - - string - - 'null' + type: string format: date-time readOnly: true FundingInstrumentFilterV1: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -1290,15 +1058,11 @@ components: - other type: - string - - 'null' - - 'null' FundingCategoryFilterV1: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -1330,15 +1094,11 @@ components: - other type: - string - - 'null' - - 'null' ApplicantTypeFilterV1: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -1361,15 +1121,11 @@ components: - unrestricted type: - string - - 'null' - - 'null' OpportunityStatusFilterV1: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -1379,20 +1135,14 @@ components: - archived type: - string - - 'null' - - 'null' AgencyFilterV1: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: - type: - - string - - 'null' + type: string minLength: 2 example: USAID OpportunitySearchFilterV1: @@ -1401,38 +1151,28 @@ components: funding_instrument: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/FundingInstrumentFilterV1' - - type: 'null' funding_category: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/FundingCategoryFilterV1' - - type: 'null' applicant_type: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ApplicantTypeFilterV1' - - type: 'null' opportunity_status: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunityStatusFilterV1' - - type: 'null' agency: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/AgencyFilterV1' - - type: 'null' OpportunityPaginationV1: type: object properties: @@ -1473,9 +1213,7 @@ components: type: object properties: query: - type: - - string - - 'null' + type: string minLength: 1 maxLength: 100 description: Query string which searches against several text fields @@ -1483,10 +1221,8 @@ components: filters: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunitySearchFilterV1' - - type: 'null' pagination: type: - object @@ -1527,9 +1263,7 @@ components: description: Whether or not the opportunity has a cost sharing/matching requirement is_forecast: - type: - - boolean - - 'null' + type: boolean description: Whether the opportunity is forecasted, that is, the information is only an estimate and not yet official example: false @@ -1685,9 +1419,7 @@ components: description: The text for the link to the agency email address example: Click me to email the agency funding_instruments: - type: - - array - - 'null' + type: array items: enum: - cooperative_agreement @@ -1696,12 +1428,8 @@ components: - other type: - string - - 'null' - - 'null' funding_categories: - type: - - array - - 'null' + type: array items: enum: - recovery_act @@ -1732,12 +1460,8 @@ components: - other type: - string - - 'null' - - 'null' applicant_types: - type: - - array - - 'null' + type: array items: enum: - state_governments @@ -1759,15 +1483,11 @@ components: - unrestricted type: - string - - 'null' - - 'null' OpportunityV1: type: object properties: opportunity_id: - type: - - integer - - 'null' + type: integer description: The internal ID of the opportunity example: 12345 opportunity_number: @@ -1809,23 +1529,17 @@ components: description: Explanation of the category when the category is 'O' (other) example: null opportunity_assistance_listings: - type: - - array - - 'null' + type: array items: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunityAssistanceListingV1' - - type: 'null' summary: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunitySummaryV1' - - type: 'null' opportunity_status: description: The current status of the opportunity example: !!python/object/apply:src.constants.lookup_constants.OpportunityStatus @@ -1837,123 +1551,87 @@ components: - archived type: - string - - 'null' - - 'null' created_at: - type: - - string - - 'null' + type: string format: date-time readOnly: true updated_at: - type: - - string - - 'null' + type: string format: date-time readOnly: true OpportunityFacetV1: type: object properties: opportunity_status: - type: - - object - - 'null' + type: object description: The counts of opportunity_status values in the full response example: posted: 1 forecasted: 2 additionalProperties: - type: - - integer - - 'null' + type: integer applicant_type: - type: - - object - - 'null' + type: object description: The counts of applicant_type values in the full response example: state_governments: 3 county_governments: 2 city_or_township_governments: 1 additionalProperties: - type: - - integer - - 'null' + type: integer funding_instrument: - type: - - object - - 'null' + type: object description: The counts of funding_instrument values in the full response example: cooperative_agreement: 4 grant: 3 additionalProperties: - type: - - integer - - 'null' + type: integer funding_category: - type: - - object - - 'null' + type: object description: The counts of funding_category values in the full response example: recovery_act: 2 arts: 3 agriculture: 5 additionalProperties: - type: - - integer - - 'null' + type: integer agency: - type: - - object - - 'null' + type: object description: The counts of agency values in the full response example: USAID: 4 ARPAH: 3 additionalProperties: - type: - - integer - - 'null' + type: integer OpportunitySearchResponseV1: type: object properties: opportunities: - type: - - array - - 'null' + type: array description: The opportunity records items: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunityV1' - - type: 'null' facet_counts: description: Counts of filter/facet values in the full response type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunityFacetV1' - - type: 'null' pagination_info: description: The pagination information for the search response type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/PaginationInfo' - - type: 'null' FundingInstrumentFilterV01: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -1963,15 +1641,11 @@ components: - other type: - string - - 'null' - - 'null' FundingCategoryFilterV01: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -2003,15 +1677,11 @@ components: - other type: - string - - 'null' - - 'null' ApplicantTypeFilterV01: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -2034,15 +1704,11 @@ components: - unrestricted type: - string - - 'null' - - 'null' OpportunityStatusFilterV01: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: enum: @@ -2052,20 +1718,14 @@ components: - archived type: - string - - 'null' - - 'null' AgencyFilterV01: type: object properties: one_of: - type: - - array - - 'null' + type: array minItems: 1 items: - type: - - string - - 'null' + type: string minLength: 2 example: US-ABC OpportunitySearchFilterV01: @@ -2074,38 +1734,28 @@ components: funding_instrument: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/FundingInstrumentFilterV01' - - type: 'null' funding_category: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/FundingCategoryFilterV01' - - type: 'null' applicant_type: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/ApplicantTypeFilterV01' - - type: 'null' opportunity_status: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunityStatusFilterV01' - - type: 'null' agency: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/AgencyFilterV01' - - type: 'null' OpportunityPagination: type: object properties: @@ -2145,9 +1795,7 @@ components: type: object properties: query: - type: - - string - - 'null' + type: string minLength: 1 maxLength: 100 description: Query string which searches against several text fields @@ -2155,10 +1803,8 @@ components: filters: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunitySearchFilterV01' - - type: 'null' pagination: type: - object @@ -2170,16 +1816,12 @@ components: type: object properties: program_title: - type: - - string - - 'null' + type: string description: The name of the program, see https://sam.gov/content/assistance-listings for more detail example: Space Technology assistance_listing_number: - type: - - string - - 'null' + type: string description: The assistance listing number, see https://sam.gov/content/assistance-listings for more detail example: '43.012' @@ -2187,179 +1829,123 @@ components: type: object properties: summary_description: - type: - - string - - 'null' + type: string description: The summary of the opportunity example: This opportunity aims to unravel the mysteries of the universe. is_cost_sharing: - type: - - boolean - - 'null' + type: boolean description: Whether or not the opportunity has a cost sharing/matching requirement is_forecast: - type: - - boolean - - 'null' + type: boolean description: Whether the opportunity is forecasted, that is, the information is only an estimate and not yet official example: false close_date: - type: - - string - - 'null' + type: string format: date description: The date that the opportunity will close - only set if is_forecast=False close_date_description: - type: - - string - - 'null' + type: string description: Optional details regarding the close date example: Proposals are due earlier than usual. post_date: - type: - - string - - 'null' + type: string format: date description: The date the opportunity was posted archive_date: - type: - - string - - 'null' + type: string format: date description: When the opportunity will be archived expected_number_of_awards: - type: - - integer - - 'null' + type: integer description: The number of awards the opportunity is expected to award example: 10 estimated_total_program_funding: - type: - - integer - - 'null' + type: integer description: The total program funding of the opportunity in US Dollars example: 10000000 award_floor: - type: - - integer - - 'null' + type: integer description: The minimum amount an opportunity would award example: 10000 award_ceiling: - type: - - integer - - 'null' + type: integer description: The maximum amount an opportunity would award example: 100000 additional_info_url: - type: - - string - - 'null' + type: string description: A URL to a website that can provide additional information about the opportunity example: grants.gov additional_info_url_description: - type: - - string - - 'null' + type: string description: The text to display for the additional_info_url link example: Click me for more info forecasted_post_date: - type: - - string - - 'null' + type: string format: date description: Forecasted opportunity only. The date the opportunity is expected to be posted, and transition out of being a forecast forecasted_close_date: - type: - - string - - 'null' + type: string format: date description: Forecasted opportunity only. The date the opportunity is expected to be close once posted. forecasted_close_date_description: - type: - - string - - 'null' + type: string description: Forecasted opportunity only. Optional details regarding the forecasted closed date. example: Proposals will probably be due on this date forecasted_award_date: - type: - - string - - 'null' + type: string format: date description: Forecasted opportunity only. The date the grantor plans to award the opportunity. forecasted_project_start_date: - type: - - string - - 'null' + type: string format: date description: Forecasted opportunity only. The date the grantor expects the award recipient should start their project fiscal_year: - type: - - integer - - 'null' + type: integer description: Forecasted opportunity only. The fiscal year the project is expected to be funded and launched funding_category_description: - type: - - string - - 'null' + type: string description: Additional information about the funding category example: Economic Support applicant_eligibility_description: - type: - - string - - 'null' + type: string description: Additional information about the types of applicants that are eligible example: All types of domestic applicants are eligible to apply agency_code: - type: - - string - - 'null' + type: string description: The agency who owns the opportunity example: US-ABC agency_name: - type: - - string - - 'null' + type: string description: The name of the agency who owns the opportunity example: US Alphabetical Basic Corp agency_phone_number: - type: - - string - - 'null' + type: string description: The phone number of the agency who owns the opportunity example: 123-456-7890 agency_contact_description: - type: - - string - - 'null' + type: string description: Information regarding contacting the agency who owns the opportunity example: For more information, reach out to Jane Smith at agency US-ABC agency_email_address: - type: - - string - - 'null' + type: string description: The contact email of the agency who owns the opportunity example: fake_email@grants.gov agency_email_address_description: - type: - - string - - 'null' + type: string description: The text for the link to the agency email address example: Click me to email the agency funding_instruments: - type: - - array - - 'null' + type: array items: enum: - cooperative_agreement @@ -2368,12 +1954,8 @@ components: - other type: - string - - 'null' - - 'null' funding_categories: - type: - - array - - 'null' + type: array items: enum: - recovery_act @@ -2404,12 +1986,8 @@ components: - other type: - string - - 'null' - - 'null' applicant_types: - type: - - array - - 'null' + type: array items: enum: - state_governments @@ -2431,34 +2009,24 @@ components: - unrestricted type: - string - - 'null' - - 'null' OpportunityV01: type: object properties: opportunity_id: - type: - - integer - - 'null' + type: integer readOnly: true description: The internal ID of the opportunity example: 12345 opportunity_number: - type: - - string - - 'null' + type: string description: The funding opportunity number example: ABC-123-XYZ-001 opportunity_title: - type: - - string - - 'null' + type: string description: The title of the opportunity example: Research into conservation techniques agency: - type: - - string - - 'null' + type: string description: The agency who created the opportunity example: US-ABC category: @@ -2473,32 +2041,22 @@ components: - other type: - string - - 'null' - - 'null' category_explanation: - type: - - string - - 'null' + type: string description: Explanation of the category when the category is 'O' (other) example: null opportunity_assistance_listings: - type: - - array - - 'null' + type: array items: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunityAssistanceListingV01' - - type: 'null' summary: type: - object - - 'null' - anyOf: + allOf: - $ref: '#/components/schemas/OpportunitySummaryV01' - - type: 'null' opportunity_status: description: The current status of the opportunity example: !!python/object/apply:src.constants.lookup_constants.OpportunityStatus @@ -2510,18 +2068,12 @@ components: - archived type: - string - - 'null' - - 'null' created_at: - type: - - string - - 'null' + type: string format: date-time readOnly: true updated_at: - type: - - string - - 'null' + type: string format: date-time readOnly: true securitySchemes: From 103eab501ea362cdfbe3e1881ef3d8267c290faa Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Mon, 17 Jun 2024 11:22:32 -0400 Subject: [PATCH 16/19] [Issue #86] Download the search response as a CSV file --- .../opportunities_v1/opportunity_routes.py | 40 +++++++- .../opportunities_v1/opportunity_schemas.py | 19 ++++ .../opportunities_v1/opportunity_to_csv.py | 93 +++++++++++++++++++ .../opportunities_v1/search_opportunities.py | 8 +- .../src/api/opportunities_v1/conftest.py | 4 + .../test_opportunity_route_search.py | 46 +++++---- api/tests/src/db/models/factories.py | 9 +- 7 files changed, 194 insertions(+), 25 deletions(-) create mode 100644 api/src/services/opportunities_v1/opportunity_to_csv.py diff --git a/api/src/api/opportunities_v1/opportunity_routes.py b/api/src/api/opportunities_v1/opportunity_routes.py index c72e49133..e4c165534 100644 --- a/api/src/api/opportunities_v1/opportunity_routes.py +++ b/api/src/api/opportunities_v1/opportunity_routes.py @@ -1,15 +1,19 @@ import logging +from flask import Response + import src.adapters.db as db import src.adapters.db.flask_db as flask_db import src.adapters.search as search import src.adapters.search.flask_opensearch as flask_opensearch import src.api.opportunities_v1.opportunity_schemas as opportunity_schemas import src.api.response as response +import src.util.datetime_util as datetime_util from src.api.opportunities_v1.opportunity_blueprint import opportunity_blueprint from src.auth.api_key_auth import api_key_auth from src.logging.flask_logger import add_extra_data_to_current_request_logs from src.services.opportunities_v1.get_opportunity import get_opportunity +from src.services.opportunities_v1.opportunity_to_csv import opportunity_to_csv from src.services.opportunities_v1.search_opportunities import search_opportunities from src.util.dict_util import flatten_dict @@ -74,6 +78,21 @@ }, }, }, + "example4": { + "summary": "CSV file response", + "value": { + "format": "csv", + "filters": { + "opportunity_status": {"one_of": ["forecasted", "posted"]}, + }, + "pagination": { + "order_by": "opportunity_id", + "page_offset": 1, + "page_size": 100, + "sort_direction": "ascending", + }, + }, + }, } @@ -85,11 +104,16 @@ ) @opportunity_blueprint.output(opportunity_schemas.OpportunitySearchResponseV1Schema()) @opportunity_blueprint.auth_required(api_key_auth) -@opportunity_blueprint.doc(description=SHARED_ALPHA_DESCRIPTION) +@opportunity_blueprint.doc( + description=SHARED_ALPHA_DESCRIPTION, + # This adds a file response schema + # in addition to the one added by the output decorator + responses={200: {"content": {"application/octet-stream": {}}}}, # type: ignore +) @flask_opensearch.with_search_client() def opportunity_search( search_client: search.SearchClient, search_params: dict -) -> response.ApiResponse: +) -> response.ApiResponse | Response: add_extra_data_to_current_request_logs(flatten_dict(search_params, prefix="request.body")) logger.info("POST /v1/opportunities/search") @@ -105,6 +129,18 @@ def opportunity_search( ) logger.info("Successfully fetched opportunities") + if search_params.get("format") == opportunity_schemas.SearchResponseFormat.CSV: + # Convert the response into a CSV and return the contents + output = opportunity_to_csv(opportunities) + timestamp = datetime_util.utcnow().strftime("%Y%m%d-%H%M%S") + return Response( + output.getvalue().encode("utf-8"), + content_type="text/csv", + headers={ + "Content-Disposition": f"attachment; filename=opportunity_search_results_{timestamp}.csv" + }, + ) + return response.ApiResponse( message="Success", data=opportunities, diff --git a/api/src/api/opportunities_v1/opportunity_schemas.py b/api/src/api/opportunities_v1/opportunity_schemas.py index 76d1a9cb0..6bd5ad9e2 100644 --- a/api/src/api/opportunities_v1/opportunity_schemas.py +++ b/api/src/api/opportunities_v1/opportunity_schemas.py @@ -1,3 +1,5 @@ +from enum import StrEnum + from src.api.schemas.extension import Schema, fields, validators from src.api.schemas.response_schema import AbstractResponseSchema, PaginationMixinSchema from src.api.schemas.search_schema import StrSearchSchemaBuilder @@ -11,6 +13,11 @@ from src.pagination.pagination_schema import generate_pagination_schema +class SearchResponseFormat(StrEnum): + JSON = "json" + CSV = "csv" + + class OpportunitySummaryV1Schema(Schema): summary_description = fields.String( allow_none=True, @@ -204,6 +211,9 @@ class OpportunitySummaryV1Schema(Schema): funding_categories = fields.List(fields.Enum(FundingCategory)) applicant_types = fields.List(fields.Enum(ApplicantType)) + created_at = fields.DateTime(metadata={"description": "TODO"}) + updated_at = fields.DateTime(metadata={"description": "TODO"}) + class OpportunityAssistanceListingV1Schema(Schema): program_title = fields.String( @@ -378,6 +388,15 @@ class OpportunitySearchRequestV1Schema(Schema): required=True, ) + format = fields.Enum( + SearchResponseFormat, + load_default=SearchResponseFormat.JSON, + metadata={ + "description": "The format of the response", + "default": SearchResponseFormat.JSON, + }, + ) + class OpportunityGetResponseV1Schema(AbstractResponseSchema): data = fields.Nested(OpportunityV1Schema()) diff --git a/api/src/services/opportunities_v1/opportunity_to_csv.py b/api/src/services/opportunities_v1/opportunity_to_csv.py new file mode 100644 index 000000000..8be6f6c0d --- /dev/null +++ b/api/src/services/opportunities_v1/opportunity_to_csv.py @@ -0,0 +1,93 @@ +import csv +import io +from typing import Sequence + +from src.util.dict_util import flatten_dict + +CSV_FIELDS = [ + "opportunity_id", + "opportunity_number", + "opportunity_title", + "opportunity_status", + "agency", + "category", + "category_explanation", + "post_date", + "close_date", + "close_date_description", + "archive_date", + "is_cost_sharing", + "expected_number_of_awards", + "estimated_total_program_funding", + "award_floor", + "award_ceiling", + "additional_info_url", + "additional_info_url_description", + "opportunity_assistance_listings", + "funding_instruments", + "funding_categories", + "funding_category_description", + "applicant_types", + "applicant_eligibility_description", + "agency_code", + "agency_name", + "agency_phone_number", + "agency_contact_description", + "agency_email_address", + "agency_email_address_description", + "is_forecast", + "forecasted_post_date", + "forecasted_close_date", + "forecasted_close_date_description", + "forecasted_award_date", + "forecasted_project_start_date", + "fiscal_year", + "created_at", + "updated_at", + # We put the description at the end as it's the longest value + # which can help improve readability of other fields + "summary_description", +] +# Same as above, but faster lookup +CSV_FIELDS_SET = set(CSV_FIELDS) + + +def _process_assistance_listing(assistance_listings: list[dict]) -> str: + return ";".join( + [f"{a['assistance_listing_number']}|{a['program_title']}" for a in assistance_listings] + ) + + +def opportunity_to_csv(opportunities: Sequence[dict]) -> io.StringIO: + opportunities_to_write: list[dict] = [] + + for opportunity in opportunities: + opp = flatten_dict(opportunity) + + out_opportunity = {} + for k, v in opp.items(): + # Remove prefixes from nested data structures + k = k.removeprefix("summary.") + k = k.removeprefix("assistance_listings.") + + # Remove fields we haven't configured + if k not in CSV_FIELDS_SET: + continue + + if k == "opportunity_assistance_listings": + v = _process_assistance_listing(v) + + if k in ["funding_instruments", "funding_categories", "applicant_types"]: + v = ";".join(v) + + out_opportunity[k] = v + + opportunities_to_write.append(out_opportunity) + + output = io.StringIO() + + writer = csv.DictWriter(output, fieldnames=CSV_FIELDS, quoting=csv.QUOTE_ALL) + writer.writeheader() + writer.writerows(opportunities_to_write) + + return output diff --git a/api/src/services/opportunities_v1/search_opportunities.py b/api/src/services/opportunities_v1/search_opportunities.py index 92a71344c..e3252e90e 100644 --- a/api/src/services/opportunities_v1/search_opportunities.py +++ b/api/src/services/opportunities_v1/search_opportunities.py @@ -83,10 +83,10 @@ def _add_search_filters(builder: search.SearchQueryBuilder, filters: dict | None def _add_aggregations(builder: search.SearchQueryBuilder) -> None: # TODO - we'll likely want to adjust the total number of values returned, especially # for agency as there could be hundreds of different agencies, and currently it's limited to 25. - builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_types")) - builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_types")) - builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instruments")) - builder.aggregation_terms("funding_category", _adjust_field_name("funding_categories")) + builder.aggregation_terms("opportunity_status", _adjust_field_name("applicant_type")) + builder.aggregation_terms("applicant_type", _adjust_field_name("applicant_type")) + builder.aggregation_terms("funding_instrument", _adjust_field_name("funding_instrument")) + builder.aggregation_terms("funding_category", _adjust_field_name("funding_category")) builder.aggregation_terms("agency", _adjust_field_name("agency_code")) diff --git a/api/tests/src/api/opportunities_v1/conftest.py b/api/tests/src/api/opportunities_v1/conftest.py index c1babc7ba..5d0a9eaf4 100644 --- a/api/tests/src/api/opportunities_v1/conftest.py +++ b/api/tests/src/api/opportunities_v1/conftest.py @@ -22,6 +22,7 @@ def get_search_request( applicant_type_one_of: list[ApplicantType] | None = None, opportunity_status_one_of: list[OpportunityStatus] | None = None, agency_one_of: list[str] | None = None, + format: str | None = None, ): req = { "pagination": { @@ -55,6 +56,9 @@ def get_search_request( if query is not None: req["query"] = query + if format is not None: + req["format"] = format + return req diff --git a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py index e3becbfb3..f889d6390 100644 --- a/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py +++ b/api/tests/src/api/opportunities_v1/test_opportunity_route_search.py @@ -1,3 +1,4 @@ +import csv from datetime import date import pytest @@ -23,22 +24,42 @@ def validate_search_response( - search_response, expected_results: list[Opportunity], expected_status_code: int = 200 + search_response, + expected_results: list[Opportunity], + expected_status_code: int = 200, + is_csv_response: bool = False, ): assert search_response.status_code == expected_status_code - response_json = search_response.get_json() + expected_ids = [exp.opportunity_id for exp in expected_results] - opportunities = response_json["data"] + if is_csv_response: + reader = csv.DictReader(search_response.text.split("\n")) + opportunities = [record for record in reader] + else: + response_json = search_response.get_json() + opportunities = response_json["data"] - response_ids = [opp["opportunity_id"] for opp in opportunities] - expected_ids = [exp.opportunity_id for exp in expected_results] + response_ids = [int(opp["opportunity_id"]) for opp in opportunities] assert ( response_ids == expected_ids ), f"Actual opportunities:\n {'\n'.join([opp['opportunity_title'] for opp in opportunities])}" +def call_search_and_validate(client, api_auth_token, search_request, expected_results): + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + validate_search_response(resp, expected_results) + + search_request["format"] = "csv" + resp = client.post( + "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} + ) + validate_search_response(resp, expected_results, is_csv_response=True) + + def build_opp( opportunity_title: str, opportunity_number: str, @@ -470,10 +491,7 @@ def setup_search_data(self, opportunity_index, opportunity_index_alias, search_c def test_sorting_and_pagination_200( self, client, api_auth_token, setup_search_data, search_request, expected_results ): - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - validate_search_response(resp, expected_results) + call_search_and_validate(client, api_auth_token, search_request, expected_results) @pytest.mark.parametrize( "search_request, expected_results", @@ -690,10 +708,7 @@ def test_sorting_and_pagination_200( def test_search_filters_200( self, client, api_auth_token, setup_search_data, search_request, expected_results ): - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - validate_search_response(resp, expected_results) + call_search_and_validate(client, api_auth_token, search_request, expected_results) @pytest.mark.parametrize( "search_request, expected_results", @@ -758,7 +773,4 @@ def test_search_query_200( ): # This test isn't looking to validate opensearch behavior, just that we've connected fields properly and # results being returned are as expected. - resp = client.post( - "/v1/opportunities/search", json=search_request, headers={"X-Auth": api_auth_token} - ) - validate_search_response(resp, expected_results) + call_search_and_validate(client, api_auth_token, search_request, expected_results) diff --git a/api/tests/src/db/models/factories.py b/api/tests/src/db/models/factories.py index 624a80a5c..3fb784d43 100644 --- a/api/tests/src/db/models/factories.py +++ b/api/tests/src/db/models/factories.py @@ -94,7 +94,7 @@ class CustomProvider(BaseProvider): AGENCY_CONTACT_DESC_FORMATS = [ "{{name}}\n{{job}}\n555-###-####\n{{email}}", "{{relevant_url}} Contact Center\nHours of operation are 24 hours a day, 7 days a week.\n{{email}}", - "{{agency}} Webmaster\n{{email}}", + "Webmaster\n{{email}}", ] # Rather than generate any random URL in our data, use those @@ -367,7 +367,7 @@ class Meta: no_declaration=None, ) - agency_code = factory.Faker("agency") + agency_code = factory.LazyAttribute(lambda s: s.opportunity.agency) agency_name = factory.Faker("agency_name") agency_phone_number = Generators.PhoneNumber agency_contact_description = factory.Faker("agency_contact_description") @@ -438,6 +438,11 @@ class Meta: unique=True, ) + created_at = factory.Faker("date_time") + updated_at = factory.LazyAttribute( + lambda o: fake.date_time_between(start_date=o.created_at, end_date="now") + ) + class Params: # These are common overrides we might want for an opportunity summary. # Simply pass the in `trait_name=True` to the factory when making an object From 12be2896be037371910074d2921e90cba744c4f5 Mon Sep 17 00:00:00 2001 From: nava-platform-bot Date: Mon, 17 Jun 2024 15:33:23 +0000 Subject: [PATCH 17/19] Update OpenAPI spec --- api/openapi.generated.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/api/openapi.generated.yml b/api/openapi.generated.yml index 34d056f83..45b730053 100644 --- a/api/openapi.generated.yml +++ b/api/openapi.generated.yml @@ -108,6 +108,7 @@ paths: application/json: schema: $ref: '#/components/schemas/OpportunitySearchResponseV1' + application/octet-stream: {} description: Successful response '422': content: @@ -200,6 +201,20 @@ paths: page_offset: 1 page_size: 25 sort_direction: descending + example4: + summary: CSV file response + value: + format: csv + filters: + opportunity_status: + one_of: + - forecasted + - posted + pagination: + order_by: opportunity_id + page_offset: 1 + page_size: 100 + sort_direction: ascending security: - ApiKeyAuth: [] /v0.1/opportunities/search: @@ -846,6 +861,15 @@ components: - object allOf: - $ref: '#/components/schemas/OpportunityPaginationV1' + format: + default: !!python/object/apply:src.api.opportunities_v1.opportunity_schemas.SearchResponseFormat + - json + description: The format of the response + enum: + - json + - csv + type: + - string required: - pagination OpportunityAssistanceListingV1: @@ -1101,6 +1125,14 @@ components: - unrestricted type: - string + created_at: + type: string + format: date-time + description: TODO + updated_at: + type: string + format: date-time + description: TODO OpportunityV1: type: object properties: From a3add378a19e7f7bd3216a4854cbea430ccae7eb Mon Sep 17 00:00:00 2001 From: Michael Chouinard Date: Mon, 17 Jun 2024 11:34:06 -0400 Subject: [PATCH 18/19] Fix a TODO --- api/src/api/opportunities_v1/opportunity_schemas.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/src/api/opportunities_v1/opportunity_schemas.py b/api/src/api/opportunities_v1/opportunity_schemas.py index 6bd5ad9e2..91f69d816 100644 --- a/api/src/api/opportunities_v1/opportunity_schemas.py +++ b/api/src/api/opportunities_v1/opportunity_schemas.py @@ -211,8 +211,12 @@ class OpportunitySummaryV1Schema(Schema): funding_categories = fields.List(fields.Enum(FundingCategory)) applicant_types = fields.List(fields.Enum(ApplicantType)) - created_at = fields.DateTime(metadata={"description": "TODO"}) - updated_at = fields.DateTime(metadata={"description": "TODO"}) + created_at = fields.DateTime( + metadata={"description": "When the opportunity summary was created"} + ) + updated_at = fields.DateTime( + metadata={"description": "When the opportunity summary was last updated"} + ) class OpportunityAssistanceListingV1Schema(Schema): From 2f2dc176d723fbd5d7ca2c70b5ceaf31b7587525 Mon Sep 17 00:00:00 2001 From: nava-platform-bot Date: Mon, 17 Jun 2024 15:36:35 +0000 Subject: [PATCH 19/19] Update OpenAPI spec --- api/openapi.generated.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/openapi.generated.yml b/api/openapi.generated.yml index 45b730053..4a138cdec 100644 --- a/api/openapi.generated.yml +++ b/api/openapi.generated.yml @@ -1128,11 +1128,11 @@ components: created_at: type: string format: date-time - description: TODO + description: When the opportunity summary was created updated_at: type: string format: date-time - description: TODO + description: When the opportunity summary was last updated OpportunityV1: type: object properties: