From acc5451f56f7c5b16a1750cb425c782ddbc7eafa Mon Sep 17 00:00:00 2001 From: Michael Chouinard <46358556+chouinar@users.noreply.github.com> Date: Wed, 22 May 2024 13:58:12 -0400 Subject: [PATCH] [Issue #2089] Setup opensearch locally (navapbc/simpler-grants-gov#39) Fixes #2089 Setup a search index to run locally via Docker Updated makefile to automatically initialize the index + added a script to wait for the index to start up before proceeding. Setup a very basic client for connecting to the search index (will be expanded more in subsequent PRs) Basic test / test utils to verify it is working (also will be expanded) This is the first step in getting the search index working locally. This actually gets it running, and the client works, we just aren't doing anything meaningful with it yet besides tests. This doesn't yet create an index that we can use, except in the test. However, if you want to test out a search index, you can go to http://localhost:5601/app/dev_tools#/console (after running `make init`) to run some queries against the (one node) cluster. https://opensearch.org/docs/latest/getting-started/communicate/#sending-requests-in-dev-tools provides some examples of how to create + use indexes that you can follow. --- api/Makefile | 15 ++++- api/bin/wait-for-local-opensearch.sh | 31 +++++++++ api/docker-compose.yml | 37 +++++++++++ api/local.env | 9 +++ api/poetry.lock | 66 ++++++++++++++++--- api/pyproject.toml | 7 ++ api/src/adapters/search/__init__.py | 4 ++ api/src/adapters/search/opensearch_client.py | 36 ++++++++++ api/src/adapters/search/opensearch_config.py | 33 ++++++++++ api/tests/conftest.py | 29 ++++++++ api/tests/src/adapters/search/__init__.py | 0 .../src/adapters/search/test_opensearch.py | 58 ++++++++++++++++ 12 files changed, 315 insertions(+), 10 deletions(-) create mode 100755 api/bin/wait-for-local-opensearch.sh create mode 100644 api/src/adapters/search/__init__.py create mode 100644 api/src/adapters/search/opensearch_client.py create mode 100644 api/src/adapters/search/opensearch_config.py create mode 100644 api/tests/src/adapters/search/__init__.py create mode 100644 api/tests/src/adapters/search/test_opensearch.py diff --git a/api/Makefile b/api/Makefile index f2774d3a75..d5daab1d22 100644 --- a/api/Makefile +++ b/api/Makefile @@ -100,7 +100,7 @@ start-debug: run-logs: start docker-compose logs --follow --no-color $(APP_NAME) -init: build init-db +init: build init-db init-opensearch clean-volumes: ## Remove project docker volumes (which includes the DB state) docker-compose down --volumes @@ -179,6 +179,19 @@ create-erds: # Create ERD diagrams for our DB schema setup-postgres-db: ## Does any initial setup necessary for our local database to work $(PY_RUN_CMD) setup-postgres-db +################################################## +# Opensearch +################################################## + +init-opensearch: start-opensearch +# TODO - in subsequent PRs, we'll add more to this command to setup the search index locally + +start-opensearch: + docker-compose up --detach opensearch-node + docker-compose up --detach opensearch-dashboards + ./bin/wait-for-local-opensearch.sh + + ################################################## # Testing diff --git a/api/bin/wait-for-local-opensearch.sh b/api/bin/wait-for-local-opensearch.sh new file mode 100755 index 0000000000..a14af8048f --- /dev/null +++ b/api/bin/wait-for-local-opensearch.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# wait-for-local-opensearch + +set -e + +# Color formatting +RED='\033[0;31m' +NO_COLOR='\033[0m' + +MAX_WAIT_TIME=30 # seconds +WAIT_TIME=0 + +# Curl the healthcheck endpoint of the local opensearch +# until it returns a success response +until curl --output /dev/null --silent http://localhost:9200/_cluster/health; +do + echo "waiting on OpenSearch to initialize..." + sleep 3 + + WAIT_TIME=$(($WAIT_TIME+3)) + if [ $WAIT_TIME -gt $MAX_WAIT_TIME ] + then + echo -e "${RED}ERROR: OpenSearch appears to not be starting up, running \"docker logs opensearch-node\" to troubleshoot.${NO_COLOR}" + docker logs opensearch-node + exit 1 + fi +done + +echo "OpenSearch is ready after ~${WAIT_TIME} seconds" + + diff --git a/api/docker-compose.yml b/api/docker-compose.yml index a364c74c34..9ec2062147 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -12,6 +12,41 @@ services: volumes: - grantsdbdata:/var/lib/postgresql/data + opensearch-node: + image: opensearchproject/opensearch:latest + container_name: opensearch-node + environment: + - cluster.name=opensearch-cluster # Name the cluster + - node.name=opensearch-node # Name the node that will run in this container + - discovery.type=single-node # Nodes to look for when discovering the cluster + - bootstrap.memory_lock=true # Disable JVM heap memory swapping + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM + - DISABLE_INSTALL_DEMO_CONFIG=true # Prevents execution of bundled demo script which installs demo certificates and security configurations to OpenSearch + - DISABLE_SECURITY_PLUGIN=true # Disables Security plugin + ulimits: + memlock: + soft: -1 # Set memlock to unlimited (no soft or hard limit) + hard: -1 + nofile: + soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536 + hard: 65536 + volumes: + - opensearch-data:/usr/share/opensearch/data # Creates volume called opensearch-data and mounts it to the container + ports: + - 9200:9200 # REST API + - 9600:9600 # Performance Analyzer + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:latest + container_name: opensearch-dashboards + ports: + - 5601:5601 # Map host port 5601 to container port 5601 + expose: + - "5601" # Expose port 5601 for web access to OpenSearch Dashboards + environment: + - 'OPENSEARCH_HOSTS=["http://opensearch-node:9200"]' + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true # disables security dashboards plugin in OpenSearch Dashboards + grants-api: build: context: . @@ -28,6 +63,8 @@ services: - .:/api depends_on: - grants-db + - opensearch-node volumes: grantsdbdata: + opensearch-data: diff --git a/api/local.env b/api/local.env index fc1c1c1a42..4ca4c86b59 100644 --- a/api/local.env +++ b/api/local.env @@ -59,6 +59,15 @@ DB_SSL_MODE=allow # could contain sensitive information. HIDE_SQL_PARAMETER_LOGS=TRUE +############################ +# Opensearch Environment Variables +############################ + +OPENSEARCH_HOST=opensearch-node +OPENSEARCH_PORT=9200 +OPENSEARCH_USE_SSL=FALSE +OPENSEARCH_VERIFY_CERTS=FALSE + ############################ # AWS Defaults ############################ diff --git a/api/poetry.lock b/api/poetry.lock index 5fe4fa9e14..017f1460e2 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -1106,6 +1106,30 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "opensearch-py" +version = "2.5.0" +description = "Python client for OpenSearch" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,<4,>=2.7" +files = [ + {file = "opensearch-py-2.5.0.tar.gz", hash = "sha256:0dde4ac7158a717d92a8cd81964cb99705a4b80bcf9258ba195b9a9f23f5226d"}, + {file = "opensearch_py-2.5.0-py2.py3-none-any.whl", hash = "sha256:cf093a40e272b60663f20417fc1264ac724dcf1e03c1a4542a6b44835b1e6c49"}, +] + +[package.dependencies] +certifi = ">=2022.12.07" +python-dateutil = "*" +requests = ">=2.4.0,<3.0.0" +six = "*" +urllib3 = ">=1.26.18,<2" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +develop = ["black", "botocore", "coverage (<8.0.0)", "jinja2", "mock", "myst-parser", "pytest (>=3.0.0)", "pytest-cov", "pytest-mock (<4.0.0)", "pytz", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +docs = ["aiohttp (>=3,<4)", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +kerberos = ["requests-kerberos"] + [[package]] name = "packaging" version = "24.0" @@ -1902,6 +1926,31 @@ files = [ {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"}, ] +[[package]] +name = "types-requests" +version = "2.31.0.1" +description = "Typing stubs for requests" +optional = false +python-versions = "*" +files = [ + {file = "types-requests-2.31.0.1.tar.gz", hash = "sha256:3de667cffa123ce698591de0ad7db034a5317457a596eb0b4944e5a9d9e8d1ac"}, + {file = "types_requests-2.31.0.1-py3-none-any.whl", hash = "sha256:afb06ef8f25ba83d59a1d424bd7a5a939082f94b94e90ab5e6116bd2559deaa3"}, +] + +[package.dependencies] +types-urllib3 = "*" + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +description = "Typing stubs for urllib3" +optional = false +python-versions = "*" +files = [ + {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, + {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, +] + [[package]] name = "typing-extensions" version = "4.11.0" @@ -1941,20 +1990,19 @@ files = [ [[package]] name = "urllib3" -version = "2.2.1" +version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.8" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, - {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, + {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, + {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "watchdog" @@ -2050,4 +2098,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "c53875955c1b910c3d4aa1748dce786e3cfa6f507895d7ca4111391333decb13" +content-hash = "9671a2d68d2b1bc91b8ce111a7a32d08292475e0d1c4f058c33bf650349757e0" diff --git a/api/pyproject.toml b/api/pyproject.toml index f0a06b447c..0f3c2f10be 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -22,6 +22,7 @@ gunicorn = "^22.0.0" psycopg = { extras = ["binary"], version = "^3.1.10" } pydantic-settings = "^2.0.3" flask-cors = "^4.0.0" +opensearch-py = "^2.5.0" [tool.poetry.group.dev.dependencies] black = "^23.9.1" @@ -43,6 +44,12 @@ sadisplay = "0.4.9" ruff = "^0.4.0" debugpy = "^1.8.1" freezegun = "^1.5.0" +# This isn't the latest version of types-requests +# because otherwise it depends on urllib3 v2 but opensearch-py +# needs urlib3 v1. This should be temporary as opensearch-py +# has an unreleased change to switch to v2, so I'm guessing +# in the next few weeks we can just make this the latest? +types-requests = "2.31.0.1" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/api/src/adapters/search/__init__.py b/api/src/adapters/search/__init__.py new file mode 100644 index 0000000000..166441e1dd --- /dev/null +++ b/api/src/adapters/search/__init__.py @@ -0,0 +1,4 @@ +from src.adapters.search.opensearch_client import SearchClient, get_opensearch_client +from src.adapters.search.opensearch_config import get_opensearch_config + +__all__ = ["SearchClient", "get_opensearch_client", "get_opensearch_config"] diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py new file mode 100644 index 0000000000..dadcfd7c48 --- /dev/null +++ b/api/src/adapters/search/opensearch_client.py @@ -0,0 +1,36 @@ +from typing import Any + +import opensearchpy + +from src.adapters.search.opensearch_config import OpensearchConfig, get_opensearch_config + +# More configuration/setup coming in: +# TODO - https://github.com/navapbc/simpler-grants-gov/issues/13 + +# Alias the OpenSearch client so that it doesn't need to be imported everywhere +# and to make it clear it's a client +SearchClient = opensearchpy.OpenSearch + + +def get_opensearch_client( + opensearch_config: OpensearchConfig | None = None, +) -> SearchClient: + if opensearch_config is None: + opensearch_config = get_opensearch_config() + + # See: https://opensearch.org/docs/latest/clients/python-low-level/ for more details + return opensearchpy.OpenSearch(**_get_connection_parameters(opensearch_config)) + + +def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]: + # TODO - we'll want to add the AWS connection params here when we set that up + # See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-amazon-opensearch-serverless + + return dict( + hosts=[{"host": opensearch_config.host, "port": opensearch_config.port}], + http_compress=True, + use_ssl=opensearch_config.use_ssl, + verify_certs=opensearch_config.verify_certs, + ssl_assert_hostname=False, + ssl_show_warn=False, + ) diff --git a/api/src/adapters/search/opensearch_config.py b/api/src/adapters/search/opensearch_config.py new file mode 100644 index 0000000000..4975feb3e5 --- /dev/null +++ b/api/src/adapters/search/opensearch_config.py @@ -0,0 +1,33 @@ +import logging + +from pydantic import Field +from pydantic_settings import SettingsConfigDict + +from src.util.env_config import PydanticBaseEnvConfig + +logger = logging.getLogger(__name__) + + +class OpensearchConfig(PydanticBaseEnvConfig): + model_config = SettingsConfigDict(env_prefix="OPENSEARCH_") + + host: str # OPENSEARCH_HOST + port: int # OPENSEARCH_PORT + use_ssl: bool = Field(default=True) # OPENSEARCH_USE_SSL + verify_certs: bool = Field(default=True) # OPENSEARCH_VERIFY_CERTS + + +def get_opensearch_config() -> OpensearchConfig: + opensearch_config = OpensearchConfig() + + logger.info( + "Constructed opensearch configuration", + extra={ + "host": opensearch_config.host, + "port": opensearch_config.port, + "use_ssl": opensearch_config.use_ssl, + "verify_certs": opensearch_config.verify_certs, + }, + ) + + return opensearch_config diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 928932b674..97173e9a7f 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -11,6 +11,7 @@ import src.adapters.db as db import src.app as app_entry import tests.src.db.models.factories as factories +from src.adapters import search from src.constants.schema import Schemas from src.db import models from src.db.models.lookup.sync_lookup_values import sync_lookup_values @@ -143,6 +144,34 @@ def test_foreign_schema(db_schema_prefix): return f"{db_schema_prefix}{Schemas.LEGACY}" +#################### +# Opensearch Fixtures +#################### + + +@pytest.fixture(scope="session") +def search_client() -> search.SearchClient: + return search.get_opensearch_client() + + +@pytest.fixture(scope="session") +def opportunity_index(search_client): + # TODO - will adjust this in the future to use utils we'll build + # for setting up / aliasing indexes. For now, keep it simple + + # create a random index name just to make sure it won't ever conflict + # with an actual one, similar to how we create schemas for database tests + index_name = f"test_{uuid.uuid4().int}_opportunity" + + search_client.indices.create(index_name, body={}) + + try: + yield index_name + finally: + # Try to clean up the index at the end + search_client.indices.delete(index_name) + + #################### # Test App & Client #################### diff --git a/api/tests/src/adapters/search/__init__.py b/api/tests/src/adapters/search/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/src/adapters/search/test_opensearch.py b/api/tests/src/adapters/search/test_opensearch.py new file mode 100644 index 0000000000..490ffcb3ba --- /dev/null +++ b/api/tests/src/adapters/search/test_opensearch.py @@ -0,0 +1,58 @@ +######################################## +# This is a placeholder set of tests, +# we'll evolve / change the structure +# as we continue developing this +# +# Just wanted something simple so I can verify +# the early steps of this setup are working +# before we actually have code to use +######################################## + + +def test_index_is_running(search_client, opportunity_index): + # Very simple test, will rewrite / remove later once we have something + # more meaningful to test. + + existing_indexes = search_client.cat.indices(format="json") + + found_opportunity_index = False + for index in existing_indexes: + if index["index"] == opportunity_index: + found_opportunity_index = True + break + + assert found_opportunity_index is True + + # Add a few records to the index + + record1 = { + "opportunity_id": 1, + "opportunity_title": "Research into how to make a search engine", + "opportunity_status": "posted", + } + record2 = { + "opportunity_id": 2, + "opportunity_title": "Research about words, and more words!", + "opportunity_status": "forecasted", + } + + search_client.index(index=opportunity_index, body=record1, id=1, refresh=True) + search_client.index(index=opportunity_index, body=record2, id=2, refresh=True) + + search_request = { + "query": { + "bool": { + "must": { + "simple_query_string": {"query": "research", "fields": ["opportunity_title"]} + } + } + } + } + response = search_client.search(index=opportunity_index, body=search_request) + assert response["hits"]["total"]["value"] == 2 + + filter_request = { + "query": {"bool": {"filter": [{"terms": {"opportunity_status": ["forecasted"]}}]}} + } + response = search_client.search(index=opportunity_index, body=filter_request) + assert response["hits"]["total"]["value"] == 1