From d8dd736e2201d8a049e41a1dd118ec7562ad742d Mon Sep 17 00:00:00 2001 From: Michael Chouinard <46358556+chouinar@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:08:58 -0400 Subject: [PATCH] [Issue #138] Setup s3 localstack (#161) ## Summary Fixes #138 ### Time to review: __5 mins__ ## Changes proposed Setup S3 localstack for having a local version of S3 to use (for future work) Script / utils for interacting with S3 ## Context for reviewers Localstack is a tool that creates a mock version of AWS locally. While the ability to mock out certain features varies, S3 being just a file storage system is pretty simple and fully featured even when mocked. Note that localstack has a paid version as well that adds more features, but all of S3's features are [supported in the free community tier](https://docs.localstack.cloud/references/coverage/coverage_s3/). We've used localstack for s3 and a few other AWS services on other projects. ## Additional information The script creates the S3 bucket in localstack. You can actually interact with the localstack instance of s3 with the AWS cli like so: ```sh aws --endpoint-url http://localhost:4566 s3 ls > 2024-07-12 13:10:24 local-opportunities ``` I created a tmp file in it succesfully: ```sh aws --endpoint-url http://localhost:4566 s3 cp tmp.txt s3://local-opportunities/path/to/tmp.txt ``` I can see the tmp file: ```sh aws --endpoint-url http://localhost:4566 s3 ls s3://local-opportunities/path/to/ > 2024-07-12 13:23:22 15 tmp.txt ``` And I can download it: ```sh aws --endpoint-url http://localhost:4566 s3 cp s3://local-opportunities/path/to/tmp.txt local_tmp.txt ``` --- api/.gitignore | 3 ++ api/Makefile | 12 +++++++- api/bin/setup_localstack.py | 49 ++++++++++++++++++++++++++++++ api/docker-compose.yml | 17 +++++++++++ api/local.env | 14 +++++++++ api/pyproject.toml | 2 +- api/src/adapters/aws/__init__.py | 3 ++ api/src/adapters/aws/s3_adapter.py | 30 ++++++++++++++++++ api/src/util/file_util.py | 15 --------- 9 files changed, 128 insertions(+), 17 deletions(-) create mode 100644 api/bin/setup_localstack.py create mode 100644 api/src/adapters/aws/__init__.py create mode 100644 api/src/adapters/aws/s3_adapter.py diff --git a/api/.gitignore b/api/.gitignore index 2b1784734..3db52578e 100644 --- a/api/.gitignore +++ b/api/.gitignore @@ -29,3 +29,6 @@ coverage.* #e2e /test-results/ + +# localstack +/volume \ No newline at end of file diff --git a/api/Makefile b/api/Makefile index e6a87c819..dcdbf23c9 100644 --- a/api/Makefile +++ b/api/Makefile @@ -100,7 +100,7 @@ start-debug: run-logs: start docker compose logs --follow --no-color $(APP_NAME) -init: build init-db init-opensearch +init: build init-db init-opensearch init-localstack clean-volumes: ## Remove project docker volumes (which includes the DB state) docker compose down --volumes @@ -191,7 +191,17 @@ start-opensearch: docker compose up --detach opensearch-dashboards ./bin/wait-for-local-opensearch.sh +################################################## +# Opensearch +################################################## + +init-localstack: start-localstack setup-localstack + +start-localstack: + docker compose up --detach localstack +setup-localstack: + $(PY_RUN_CMD) setup-localstack ################################################## # Testing diff --git a/api/bin/setup_localstack.py b/api/bin/setup_localstack.py new file mode 100644 index 000000000..f9107bf89 --- /dev/null +++ b/api/bin/setup_localstack.py @@ -0,0 +1,49 @@ +import logging + +import botocore.client +import botocore.exceptions + +import src.logging +from src.adapters.aws import S3Config, get_s3_client +from src.util.local import error_if_not_local + +logger = logging.getLogger(__name__) + + +def does_s3_bucket_exist(s3_client: botocore.client.BaseClient, bucket_name: str) -> bool: + try: + s3_client.head_bucket(Bucket=bucket_name) + return True + except botocore.exceptions.ClientError as e: + # We'll assume that if the error code is a 404 that means + # it could not find the bucket and thus it needs to be created + # as there are not more specific errors than this available + error_code = e.response.get("Error", {}).get("Code") + if error_code != "404": + raise e + + return False + + +def setup_s3() -> None: + s3_config = S3Config() + s3_client = get_s3_client(s3_config) + + if s3_config.s3_opportunity_bucket is None: + raise Exception("S3_OPPORTUNITY_BUCKET env var must be set") + + if not does_s3_bucket_exist(s3_client, s3_config.s3_opportunity_bucket): + logger.info("Creating S3 bucket %s", s3_config.s3_opportunity_bucket) + s3_client.create_bucket(Bucket=s3_config.s3_opportunity_bucket) + else: + logger.info("S3 bucket %s already exists - skipping", s3_config.s3_opportunity_bucket) + + +def main() -> None: + with src.logging.init("setup_localstack"): + error_if_not_local() + setup_s3() + + +if __name__ == "__main__": + main() diff --git a/api/docker-compose.yml b/api/docker-compose.yml index 30a7df050..cbe007185 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -44,6 +44,22 @@ services: - 'OPENSEARCH_HOSTS=["http://opensearch-node:9200"]' - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true # disables security dashboards plugin in OpenSearch Dashboards + localstack: + container_name: "${LOCALSTACK_DOCKER_NAME:-localstack-main}" + image: localstack/localstack + ports: + - "127.0.0.1:4566:4566" # LocalStack Gateway + - "127.0.0.1:4510-4559:4510-4559" # external services port range + environment: + # LocalStack configuration: https://docs.localstack.cloud/references/configuration/ + - DEBUG=${DEBUG:-0} + # To improve startup time, only add services we use + - SERVICES=s3 + - EAGER_SERVICES_LOADING=1 + volumes: + - "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack" + - "/var/run/docker.sock:/var/run/docker.sock" + grants-api: build: context: . @@ -61,6 +77,7 @@ services: depends_on: - grants-db - opensearch-node + - localstack volumes: grantsdbdata: diff --git a/api/local.env b/api/local.env index 498c89324..a2cb295f6 100644 --- a/api/local.env +++ b/api/local.env @@ -88,6 +88,20 @@ AWS_SECRET_ACCESS_KEY=DO_NOT_SET_HERE AWS_DEFAULT_REGION=us-east-1 +############################ +# Localstack +############################ + +# If you want to connect to localstack outside of docker +# use localhost:4566 instead +S3_ENDPOINT_URL=http://localstack:4566 + +############################ +# S3 +############################ + +S3_OPPORTUNITY_BUCKET=local-opportunities + ############################ # Feature Flags ############################ diff --git a/api/pyproject.toml b/api/pyproject.toml index 0f3c2f10b..1fabc3afa 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -62,7 +62,7 @@ db-migrate-down-all = "src.db.migrations.run:downall" db-seed-local = "tests.lib.seed_local_db:seed_local_db" create-erds = "bin.create_erds:main" setup-postgres-db = "src.db.migrations.setup_local_postgres_db:setup_local_postgres_db" - +setup-localstack = "bin.setup_localstack:main" [tool.black] line-length = 100 diff --git a/api/src/adapters/aws/__init__.py b/api/src/adapters/aws/__init__.py new file mode 100644 index 000000000..3f55ab312 --- /dev/null +++ b/api/src/adapters/aws/__init__.py @@ -0,0 +1,3 @@ +from .s3_adapter import S3Config, get_s3_client + +__all__ = ["get_s3_client", "S3Config"] diff --git a/api/src/adapters/aws/s3_adapter.py b/api/src/adapters/aws/s3_adapter.py new file mode 100644 index 000000000..ad43d20dc --- /dev/null +++ b/api/src/adapters/aws/s3_adapter.py @@ -0,0 +1,30 @@ +import boto3 +import botocore.client + +from src.util.env_config import PydanticBaseEnvConfig + + +class S3Config(PydanticBaseEnvConfig): + # We should generally not need to set this except + # locally to use localstack + s3_endpoint_url: str | None = None + + ### S3 Buckets + # note that we default these to None + # so that we don't need to set all of these for every + # process that uses S3 + + # TODO - I'm not sure how we want to organize our + # s3 buckets so this will likely change in the future + s3_opportunity_bucket: str | None = None + + +def get_s3_client(s3_config: S3Config | None = None) -> botocore.client.BaseClient: + if s3_config is None: + s3_config = S3Config() + + params = {} + if s3_config.s3_endpoint_url is not None: + params["endpoint_url"] = s3_config.s3_endpoint_url + + return boto3.client("s3", **params) diff --git a/api/src/util/file_util.py b/api/src/util/file_util.py index a7b604dbf..cc4c15619 100644 --- a/api/src/util/file_util.py +++ b/api/src/util/file_util.py @@ -3,8 +3,6 @@ from typing import Any, Optional, Tuple from urllib.parse import urlparse -import boto3 -import botocore import smart_open from botocore.config import Config @@ -40,19 +38,6 @@ def join(*parts: str) -> str: return os.path.join(*parts) -################################## -# S3 Utilities -################################## - - -def get_s3_client(boto_session: Optional[boto3.Session] = None) -> botocore.client.BaseClient: - """Returns an S3 client, wrapping around boiler plate if you already have a session""" - if boto_session: - return boto_session.client("s3") - - return boto3.client("s3") - - ################################## # File operations ##################################