From 118481df0829caac745b54489f9c0cab252cd68d Mon Sep 17 00:00:00 2001 From: Michael Chouinard <46358556+chouinar@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:08:58 -0400 Subject: [PATCH] [Issue #2046] Setup s3 localstack (navapbc/simpler-grants-gov#161) Fixes #2046 Setup S3 localstack for having a local version of S3 to use (for future work) Script / utils for interacting with S3 Localstack is a tool that creates a mock version of AWS locally. While the ability to mock out certain features varies, S3 being just a file storage system is pretty simple and fully featured even when mocked. Note that localstack has a paid version as well that adds more features, but all of S3's features are [supported in the free community tier](https://docs.localstack.cloud/references/coverage/coverage_s3/). We've used localstack for s3 and a few other AWS services on other projects. The script creates the S3 bucket in localstack. You can actually interact with the localstack instance of s3 with the AWS cli like so: ```sh aws --endpoint-url http://localhost:4566 s3 ls > 2024-07-12 13:10:24 local-opportunities ``` I created a tmp file in it succesfully: ```sh aws --endpoint-url http://localhost:4566 s3 cp tmp.txt s3://local-opportunities/path/to/tmp.txt ``` I can see the tmp file: ```sh aws --endpoint-url http://localhost:4566 s3 ls s3://local-opportunities/path/to/ > 2024-07-12 13:23:22 15 tmp.txt ``` And I can download it: ```sh aws --endpoint-url http://localhost:4566 s3 cp s3://local-opportunities/path/to/tmp.txt local_tmp.txt ``` --- api/.gitignore | 3 ++ api/Makefile | 12 +++++++- api/bin/setup_localstack.py | 49 ++++++++++++++++++++++++++++++ api/docker-compose.yml | 17 +++++++++++ api/local.env | 14 +++++++++ api/pyproject.toml | 2 +- api/src/adapters/aws/__init__.py | 3 ++ api/src/adapters/aws/s3_adapter.py | 30 ++++++++++++++++++ api/src/util/file_util.py | 15 --------- 9 files changed, 128 insertions(+), 17 deletions(-) create mode 100644 api/bin/setup_localstack.py create mode 100644 api/src/adapters/aws/__init__.py create mode 100644 api/src/adapters/aws/s3_adapter.py diff --git a/api/.gitignore b/api/.gitignore index 2b1784734..3db52578e 100644 --- a/api/.gitignore +++ b/api/.gitignore @@ -29,3 +29,6 @@ coverage.* #e2e /test-results/ + +# localstack +/volume \ No newline at end of file diff --git a/api/Makefile b/api/Makefile index e6a87c819..dcdbf23c9 100644 --- a/api/Makefile +++ b/api/Makefile @@ -100,7 +100,7 @@ start-debug: run-logs: start docker compose logs --follow --no-color $(APP_NAME) -init: build init-db init-opensearch +init: build init-db init-opensearch init-localstack clean-volumes: ## Remove project docker volumes (which includes the DB state) docker compose down --volumes @@ -191,7 +191,17 @@ start-opensearch: docker compose up --detach opensearch-dashboards ./bin/wait-for-local-opensearch.sh +################################################## +# Opensearch +################################################## + +init-localstack: start-localstack setup-localstack + +start-localstack: + docker compose up --detach localstack +setup-localstack: + $(PY_RUN_CMD) setup-localstack ################################################## # Testing diff --git a/api/bin/setup_localstack.py b/api/bin/setup_localstack.py new file mode 100644 index 000000000..f9107bf89 --- /dev/null +++ b/api/bin/setup_localstack.py @@ -0,0 +1,49 @@ +import logging + +import botocore.client +import botocore.exceptions + +import src.logging +from src.adapters.aws import S3Config, get_s3_client +from src.util.local import error_if_not_local + +logger = logging.getLogger(__name__) + + +def does_s3_bucket_exist(s3_client: botocore.client.BaseClient, bucket_name: str) -> bool: + try: + s3_client.head_bucket(Bucket=bucket_name) + return True + except botocore.exceptions.ClientError as e: + # We'll assume that if the error code is a 404 that means + # it could not find the bucket and thus it needs to be created + # as there are not more specific errors than this available + error_code = e.response.get("Error", {}).get("Code") + if error_code != "404": + raise e + + return False + + +def setup_s3() -> None: + s3_config = S3Config() + s3_client = get_s3_client(s3_config) + + if s3_config.s3_opportunity_bucket is None: + raise Exception("S3_OPPORTUNITY_BUCKET env var must be set") + + if not does_s3_bucket_exist(s3_client, s3_config.s3_opportunity_bucket): + logger.info("Creating S3 bucket %s", s3_config.s3_opportunity_bucket) + s3_client.create_bucket(Bucket=s3_config.s3_opportunity_bucket) + else: + logger.info("S3 bucket %s already exists - skipping", s3_config.s3_opportunity_bucket) + + +def main() -> None: + with src.logging.init("setup_localstack"): + error_if_not_local() + setup_s3() + + +if __name__ == "__main__": + main() diff --git a/api/docker-compose.yml b/api/docker-compose.yml index 30a7df050..cbe007185 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -44,6 +44,22 @@ services: - 'OPENSEARCH_HOSTS=["http://opensearch-node:9200"]' - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true # disables security dashboards plugin in OpenSearch Dashboards + localstack: + container_name: "${LOCALSTACK_DOCKER_NAME:-localstack-main}" + image: localstack/localstack + ports: + - "127.0.0.1:4566:4566" # LocalStack Gateway + - "127.0.0.1:4510-4559:4510-4559" # external services port range + environment: + # LocalStack configuration: https://docs.localstack.cloud/references/configuration/ + - DEBUG=${DEBUG:-0} + # To improve startup time, only add services we use + - SERVICES=s3 + - EAGER_SERVICES_LOADING=1 + volumes: + - "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack" + - "/var/run/docker.sock:/var/run/docker.sock" + grants-api: build: context: . @@ -61,6 +77,7 @@ services: depends_on: - grants-db - opensearch-node + - localstack volumes: grantsdbdata: diff --git a/api/local.env b/api/local.env index 498c89324..a2cb295f6 100644 --- a/api/local.env +++ b/api/local.env @@ -88,6 +88,20 @@ AWS_SECRET_ACCESS_KEY=DO_NOT_SET_HERE AWS_DEFAULT_REGION=us-east-1 +############################ +# Localstack +############################ + +# If you want to connect to localstack outside of docker +# use localhost:4566 instead +S3_ENDPOINT_URL=http://localstack:4566 + +############################ +# S3 +############################ + +S3_OPPORTUNITY_BUCKET=local-opportunities + ############################ # Feature Flags ############################ diff --git a/api/pyproject.toml b/api/pyproject.toml index 0f3c2f10b..1fabc3afa 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -62,7 +62,7 @@ db-migrate-down-all = "src.db.migrations.run:downall" db-seed-local = "tests.lib.seed_local_db:seed_local_db" create-erds = "bin.create_erds:main" setup-postgres-db = "src.db.migrations.setup_local_postgres_db:setup_local_postgres_db" - +setup-localstack = "bin.setup_localstack:main" [tool.black] line-length = 100 diff --git a/api/src/adapters/aws/__init__.py b/api/src/adapters/aws/__init__.py new file mode 100644 index 000000000..3f55ab312 --- /dev/null +++ b/api/src/adapters/aws/__init__.py @@ -0,0 +1,3 @@ +from .s3_adapter import S3Config, get_s3_client + +__all__ = ["get_s3_client", "S3Config"] diff --git a/api/src/adapters/aws/s3_adapter.py b/api/src/adapters/aws/s3_adapter.py new file mode 100644 index 000000000..ad43d20dc --- /dev/null +++ b/api/src/adapters/aws/s3_adapter.py @@ -0,0 +1,30 @@ +import boto3 +import botocore.client + +from src.util.env_config import PydanticBaseEnvConfig + + +class S3Config(PydanticBaseEnvConfig): + # We should generally not need to set this except + # locally to use localstack + s3_endpoint_url: str | None = None + + ### S3 Buckets + # note that we default these to None + # so that we don't need to set all of these for every + # process that uses S3 + + # TODO - I'm not sure how we want to organize our + # s3 buckets so this will likely change in the future + s3_opportunity_bucket: str | None = None + + +def get_s3_client(s3_config: S3Config | None = None) -> botocore.client.BaseClient: + if s3_config is None: + s3_config = S3Config() + + params = {} + if s3_config.s3_endpoint_url is not None: + params["endpoint_url"] = s3_config.s3_endpoint_url + + return boto3.client("s3", **params) diff --git a/api/src/util/file_util.py b/api/src/util/file_util.py index a7b604dbf..cc4c15619 100644 --- a/api/src/util/file_util.py +++ b/api/src/util/file_util.py @@ -3,8 +3,6 @@ from typing import Any, Optional, Tuple from urllib.parse import urlparse -import boto3 -import botocore import smart_open from botocore.config import Config @@ -40,19 +38,6 @@ def join(*parts: str) -> str: return os.path.join(*parts) -################################## -# S3 Utilities -################################## - - -def get_s3_client(boto_session: Optional[boto3.Session] = None) -> botocore.client.BaseClient: - """Returns an S3 client, wrapping around boiler plate if you already have a session""" - if boto_session: - return boto_session.client("s3") - - return boto3.client("s3") - - ################################## # File operations ##################################