diff --git a/api/Makefile b/api/Makefile index f2774d3a7..d5daab1d2 100644 --- a/api/Makefile +++ b/api/Makefile @@ -100,7 +100,7 @@ start-debug: run-logs: start docker-compose logs --follow --no-color $(APP_NAME) -init: build init-db +init: build init-db init-opensearch clean-volumes: ## Remove project docker volumes (which includes the DB state) docker-compose down --volumes @@ -179,6 +179,19 @@ create-erds: # Create ERD diagrams for our DB schema setup-postgres-db: ## Does any initial setup necessary for our local database to work $(PY_RUN_CMD) setup-postgres-db +################################################## +# Opensearch +################################################## + +init-opensearch: start-opensearch +# TODO - in subsequent PRs, we'll add more to this command to setup the search index locally + +start-opensearch: + docker-compose up --detach opensearch-node + docker-compose up --detach opensearch-dashboards + ./bin/wait-for-local-opensearch.sh + + ################################################## # Testing diff --git a/api/bin/wait-for-local-opensearch.sh b/api/bin/wait-for-local-opensearch.sh new file mode 100755 index 000000000..a14af8048 --- /dev/null +++ b/api/bin/wait-for-local-opensearch.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# wait-for-local-opensearch + +set -e + +# Color formatting +RED='\033[0;31m' +NO_COLOR='\033[0m' + +MAX_WAIT_TIME=30 # seconds +WAIT_TIME=0 + +# Curl the healthcheck endpoint of the local opensearch +# until it returns a success response +until curl --output /dev/null --silent http://localhost:9200/_cluster/health; +do + echo "waiting on OpenSearch to initialize..." + sleep 3 + + WAIT_TIME=$(($WAIT_TIME+3)) + if [ $WAIT_TIME -gt $MAX_WAIT_TIME ] + then + echo -e "${RED}ERROR: OpenSearch appears to not be starting up, running \"docker logs opensearch-node\" to troubleshoot.${NO_COLOR}" + docker logs opensearch-node + exit 1 + fi +done + +echo "OpenSearch is ready after ~${WAIT_TIME} seconds" + + diff --git a/api/docker-compose.yml b/api/docker-compose.yml index a364c74c3..9ec206214 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -12,6 +12,41 @@ services: volumes: - grantsdbdata:/var/lib/postgresql/data + opensearch-node: + image: opensearchproject/opensearch:latest + container_name: opensearch-node + environment: + - cluster.name=opensearch-cluster # Name the cluster + - node.name=opensearch-node # Name the node that will run in this container + - discovery.type=single-node # Nodes to look for when discovering the cluster + - bootstrap.memory_lock=true # Disable JVM heap memory swapping + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM + - DISABLE_INSTALL_DEMO_CONFIG=true # Prevents execution of bundled demo script which installs demo certificates and security configurations to OpenSearch + - DISABLE_SECURITY_PLUGIN=true # Disables Security plugin + ulimits: + memlock: + soft: -1 # Set memlock to unlimited (no soft or hard limit) + hard: -1 + nofile: + soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536 + hard: 65536 + volumes: + - opensearch-data:/usr/share/opensearch/data # Creates volume called opensearch-data and mounts it to the container + ports: + - 9200:9200 # REST API + - 9600:9600 # Performance Analyzer + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:latest + container_name: opensearch-dashboards + ports: + - 5601:5601 # Map host port 5601 to container port 5601 + expose: + - "5601" # Expose port 5601 for web access to OpenSearch Dashboards + environment: + - 'OPENSEARCH_HOSTS=["http://opensearch-node:9200"]' + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true # disables security dashboards plugin in OpenSearch Dashboards + grants-api: build: context: . @@ -28,6 +63,8 @@ services: - .:/api depends_on: - grants-db + - opensearch-node volumes: grantsdbdata: + opensearch-data: diff --git a/api/local.env b/api/local.env index fc1c1c1a4..4ca4c86b5 100644 --- a/api/local.env +++ b/api/local.env @@ -59,6 +59,15 @@ DB_SSL_MODE=allow # could contain sensitive information. HIDE_SQL_PARAMETER_LOGS=TRUE +############################ +# Opensearch Environment Variables +############################ + +OPENSEARCH_HOST=opensearch-node +OPENSEARCH_PORT=9200 +OPENSEARCH_USE_SSL=FALSE +OPENSEARCH_VERIFY_CERTS=FALSE + ############################ # AWS Defaults ############################ diff --git a/api/poetry.lock b/api/poetry.lock index 5fe4fa9e1..017f1460e 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -1106,6 +1106,30 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "opensearch-py" +version = "2.5.0" +description = "Python client for OpenSearch" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,<4,>=2.7" +files = [ + {file = "opensearch-py-2.5.0.tar.gz", hash = "sha256:0dde4ac7158a717d92a8cd81964cb99705a4b80bcf9258ba195b9a9f23f5226d"}, + {file = "opensearch_py-2.5.0-py2.py3-none-any.whl", hash = "sha256:cf093a40e272b60663f20417fc1264ac724dcf1e03c1a4542a6b44835b1e6c49"}, +] + +[package.dependencies] +certifi = ">=2022.12.07" +python-dateutil = "*" +requests = ">=2.4.0,<3.0.0" +six = "*" +urllib3 = ">=1.26.18,<2" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +develop = ["black", "botocore", "coverage (<8.0.0)", "jinja2", "mock", "myst-parser", "pytest (>=3.0.0)", "pytest-cov", "pytest-mock (<4.0.0)", "pytz", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +docs = ["aiohttp (>=3,<4)", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +kerberos = ["requests-kerberos"] + [[package]] name = "packaging" version = "24.0" @@ -1902,6 +1926,31 @@ files = [ {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"}, ] +[[package]] +name = "types-requests" +version = "2.31.0.1" +description = "Typing stubs for requests" +optional = false +python-versions = "*" +files = [ + {file = "types-requests-2.31.0.1.tar.gz", hash = "sha256:3de667cffa123ce698591de0ad7db034a5317457a596eb0b4944e5a9d9e8d1ac"}, + {file = "types_requests-2.31.0.1-py3-none-any.whl", hash = "sha256:afb06ef8f25ba83d59a1d424bd7a5a939082f94b94e90ab5e6116bd2559deaa3"}, +] + +[package.dependencies] +types-urllib3 = "*" + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +description = "Typing stubs for urllib3" +optional = false +python-versions = "*" +files = [ + {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, + {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, +] + [[package]] name = "typing-extensions" version = "4.11.0" @@ -1941,20 +1990,19 @@ files = [ [[package]] name = "urllib3" -version = "2.2.1" +version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.8" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, - {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, + {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, + {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "watchdog" @@ -2050,4 +2098,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "c53875955c1b910c3d4aa1748dce786e3cfa6f507895d7ca4111391333decb13" +content-hash = "9671a2d68d2b1bc91b8ce111a7a32d08292475e0d1c4f058c33bf650349757e0" diff --git a/api/pyproject.toml b/api/pyproject.toml index f0a06b447..0f3c2f10b 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -22,6 +22,7 @@ gunicorn = "^22.0.0" psycopg = { extras = ["binary"], version = "^3.1.10" } pydantic-settings = "^2.0.3" flask-cors = "^4.0.0" +opensearch-py = "^2.5.0" [tool.poetry.group.dev.dependencies] black = "^23.9.1" @@ -43,6 +44,12 @@ sadisplay = "0.4.9" ruff = "^0.4.0" debugpy = "^1.8.1" freezegun = "^1.5.0" +# This isn't the latest version of types-requests +# because otherwise it depends on urllib3 v2 but opensearch-py +# needs urlib3 v1. This should be temporary as opensearch-py +# has an unreleased change to switch to v2, so I'm guessing +# in the next few weeks we can just make this the latest? +types-requests = "2.31.0.1" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/api/src/adapters/search/__init__.py b/api/src/adapters/search/__init__.py new file mode 100644 index 000000000..166441e1d --- /dev/null +++ b/api/src/adapters/search/__init__.py @@ -0,0 +1,4 @@ +from src.adapters.search.opensearch_client import SearchClient, get_opensearch_client +from src.adapters.search.opensearch_config import get_opensearch_config + +__all__ = ["SearchClient", "get_opensearch_client", "get_opensearch_config"] diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py new file mode 100644 index 000000000..dadcfd7c4 --- /dev/null +++ b/api/src/adapters/search/opensearch_client.py @@ -0,0 +1,36 @@ +from typing import Any + +import opensearchpy + +from src.adapters.search.opensearch_config import OpensearchConfig, get_opensearch_config + +# More configuration/setup coming in: +# TODO - https://github.com/navapbc/simpler-grants-gov/issues/13 + +# Alias the OpenSearch client so that it doesn't need to be imported everywhere +# and to make it clear it's a client +SearchClient = opensearchpy.OpenSearch + + +def get_opensearch_client( + opensearch_config: OpensearchConfig | None = None, +) -> SearchClient: + if opensearch_config is None: + opensearch_config = get_opensearch_config() + + # See: https://opensearch.org/docs/latest/clients/python-low-level/ for more details + return opensearchpy.OpenSearch(**_get_connection_parameters(opensearch_config)) + + +def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: + # TODO - we'll want to add the AWS connection params here when we set that up + # See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-amazon-opensearch-serverless + + return dict( + hosts=[{"host": opensearch_config.host, "port": opensearch_config.port}], + http_compress=True, + use_ssl=opensearch_config.use_ssl, + verify_certs=opensearch_config.verify_certs, + ssl_assert_hostname=False, + ssl_show_warn=False, + ) diff --git a/api/src/adapters/search/opensearch_config.py b/api/src/adapters/search/opensearch_config.py new file mode 100644 index 000000000..4975feb3e --- /dev/null +++ b/api/src/adapters/search/opensearch_config.py @@ -0,0 +1,33 @@ +import logging + +from pydantic import Field +from pydantic_settings import SettingsConfigDict + +from src.util.env_config import PydanticBaseEnvConfig + +logger = logging.getLogger(__name__) + + +class OpensearchConfig(PydanticBaseEnvConfig): + model_config = SettingsConfigDict(env_prefix="OPENSEARCH_") + + host: str # OPENSEARCH_HOST + port: int # OPENSEARCH_PORT + use_ssl: bool = Field(default=True) # OPENSEARCH_USE_SSL + verify_certs: bool = Field(default=True) # OPENSEARCH_VERIFY_CERTS + + +def get_opensearch_config() -> OpensearchConfig: + opensearch_config = OpensearchConfig() + + logger.info( + "Constructed opensearch configuration", + extra={ + "host": opensearch_config.host, + "port": opensearch_config.port, + "use_ssl": opensearch_config.use_ssl, + "verify_certs": opensearch_config.verify_certs, + }, + ) + + return opensearch_config diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 928932b67..97173e9a7 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -11,6 +11,7 @@ import src.adapters.db as db import src.app as app_entry import tests.src.db.models.factories as factories +from src.adapters import search from src.constants.schema import Schemas from src.db import models from src.db.models.lookup.sync_lookup_values import sync_lookup_values @@ -143,6 +144,34 @@ def test_foreign_schema(db_schema_prefix): return f"{db_schema_prefix}{Schemas.LEGACY}" +#################### +# Opensearch Fixtures +#################### + + +@pytest.fixture(scope="session") +def search_client() -> search.SearchClient: + return search.get_opensearch_client() + + +@pytest.fixture(scope="session") +def opportunity_index(search_client): + # TODO - will adjust this in the future to use utils we'll build + # for setting up / aliasing indexes. For now, keep it simple + + # create a random index name just to make sure it won't ever conflict + # with an actual one, similar to how we create schemas for database tests + index_name = f"test_{uuid.uuid4().int}_opportunity" + + search_client.indices.create(index_name, body={}) + + try: + yield index_name + finally: + # Try to clean up the index at the end + search_client.indices.delete(index_name) + + #################### # Test App & Client #################### diff --git a/api/tests/src/adapters/search/__init__.py b/api/tests/src/adapters/search/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/tests/src/adapters/search/test_opensearch.py b/api/tests/src/adapters/search/test_opensearch.py new file mode 100644 index 000000000..490ffcb3b --- /dev/null +++ b/api/tests/src/adapters/search/test_opensearch.py @@ -0,0 +1,58 @@ +######################################## +# This is a placeholder set of tests, +# we'll evolve / change the structure +# as we continue developing this +# +# Just wanted something simple so I can verify +# the early steps of this setup are working +# before we actually have code to use +######################################## + + +def test_index_is_running(search_client, opportunity_index): + # Very simple test, will rewrite / remove later once we have something + # more meaningful to test. + + existing_indexes = search_client.cat.indices(format="json") + + found_opportunity_index = False + for index in existing_indexes: + if index["index"] == opportunity_index: + found_opportunity_index = True + break + + assert found_opportunity_index is True + + # Add a few records to the index + + record1 = { + "opportunity_id": 1, + "opportunity_title": "Research into how to make a search engine", + "opportunity_status": "posted", + } + record2 = { + "opportunity_id": 2, + "opportunity_title": "Research about words, and more words!", + "opportunity_status": "forecasted", + } + + search_client.index(index=opportunity_index, body=record1, id=1, refresh=True) + search_client.index(index=opportunity_index, body=record2, id=2, refresh=True) + + search_request = { + "query": { + "bool": { + "must": { + "simple_query_string": {"query": "research", "fields": ["opportunity_title"]} + } + } + } + } + response = search_client.search(index=opportunity_index, body=search_request) + assert response["hits"]["total"]["value"] == 2 + + filter_request = { + "query": {"bool": {"filter": [{"terms": {"opportunity_status": ["forecasted"]}}]}} + } + response = search_client.search(index=opportunity_index, body=filter_request) + assert response["hits"]["total"]["value"] == 1