Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
[Issue HHS#2046] Setup s3 localstack (#161)
Browse files Browse the repository at this point in the history
Fixes HHS#2046

Setup S3 localstack for having a local version of S3 to use (for future
work)

Script / utils for interacting with S3

Localstack is a tool that creates a mock version of AWS locally. While
the ability to mock out certain features varies, S3 being just a file
storage system is pretty simple and fully featured even when mocked.

Note that localstack has a paid version as well that adds more features,
but all of S3's features are [supported in the free community
tier](https://docs.localstack.cloud/references/coverage/coverage_s3/).
We've used localstack for s3 and a few other AWS services on other
projects.

The script creates the S3 bucket in localstack. You can actually
interact with the localstack instance of s3 with the AWS cli like so:

```sh
aws --endpoint-url http://localhost:4566 s3 ls
> 2024-07-12 13:10:24 local-opportunities
```

I created a tmp file in it succesfully:
```sh
aws --endpoint-url http://localhost:4566 s3 cp tmp.txt s3://local-opportunities/path/to/tmp.txt
```

I can see the tmp file:
```sh
aws --endpoint-url http://localhost:4566 s3 ls s3://local-opportunities/path/to/
> 2024-07-12 13:23:22         15 tmp.txt
```

And I can download it:
```sh
aws --endpoint-url http://localhost:4566 s3 cp s3://local-opportunities/path/to/tmp.txt local_tmp.txt
```
  • Loading branch information
chouinar authored and acouch committed Sep 18, 2024
1 parent ebc6732 commit 8464504
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 17 deletions.
3 changes: 3 additions & 0 deletions api/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ coverage.*

#e2e
/test-results/

# localstack
/volume
12 changes: 11 additions & 1 deletion api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ start-debug:
run-logs: start
docker compose logs --follow --no-color $(APP_NAME)

init: build init-db init-opensearch
init: build init-db init-opensearch init-localstack

clean-volumes: ## Remove project docker volumes (which includes the DB state)
docker compose down --volumes
Expand Down Expand Up @@ -191,7 +191,17 @@ start-opensearch:
docker compose up --detach opensearch-dashboards
./bin/wait-for-local-opensearch.sh

##################################################
# Opensearch
##################################################

init-localstack: start-localstack setup-localstack

start-localstack:
docker compose up --detach localstack

setup-localstack:
$(PY_RUN_CMD) setup-localstack

##################################################
# Testing
Expand Down
49 changes: 49 additions & 0 deletions api/bin/setup_localstack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import logging

import botocore.client
import botocore.exceptions

import src.logging
from src.adapters.aws import S3Config, get_s3_client
from src.util.local import error_if_not_local

logger = logging.getLogger(__name__)


def does_s3_bucket_exist(s3_client: botocore.client.BaseClient, bucket_name: str) -> bool:
try:
s3_client.head_bucket(Bucket=bucket_name)
return True
except botocore.exceptions.ClientError as e:
# We'll assume that if the error code is a 404 that means
# it could not find the bucket and thus it needs to be created
# as there are not more specific errors than this available
error_code = e.response.get("Error", {}).get("Code")
if error_code != "404":
raise e

return False


def setup_s3() -> None:
s3_config = S3Config()
s3_client = get_s3_client(s3_config)

if s3_config.s3_opportunity_bucket is None:
raise Exception("S3_OPPORTUNITY_BUCKET env var must be set")

if not does_s3_bucket_exist(s3_client, s3_config.s3_opportunity_bucket):
logger.info("Creating S3 bucket %s", s3_config.s3_opportunity_bucket)
s3_client.create_bucket(Bucket=s3_config.s3_opportunity_bucket)
else:
logger.info("S3 bucket %s already exists - skipping", s3_config.s3_opportunity_bucket)


def main() -> None:
with src.logging.init("setup_localstack"):
error_if_not_local()
setup_s3()


if __name__ == "__main__":
main()
17 changes: 17 additions & 0 deletions api/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,22 @@ services:
- 'OPENSEARCH_HOSTS=["http://opensearch-node:9200"]'
- DISABLE_SECURITY_DASHBOARDS_PLUGIN=true # disables security dashboards plugin in OpenSearch Dashboards

localstack:
container_name: "${LOCALSTACK_DOCKER_NAME:-localstack-main}"
image: localstack/localstack
ports:
- "127.0.0.1:4566:4566" # LocalStack Gateway
- "127.0.0.1:4510-4559:4510-4559" # external services port range
environment:
# LocalStack configuration: https://docs.localstack.cloud/references/configuration/
- DEBUG=${DEBUG:-0}
# To improve startup time, only add services we use
- SERVICES=s3
- EAGER_SERVICES_LOADING=1
volumes:
- "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack"
- "/var/run/docker.sock:/var/run/docker.sock"

grants-api:
build:
context: .
Expand All @@ -61,6 +77,7 @@ services:
depends_on:
- grants-db
- opensearch-node
- localstack

volumes:
grantsdbdata:
Expand Down
14 changes: 14 additions & 0 deletions api/local.env
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ AWS_SECRET_ACCESS_KEY=DO_NOT_SET_HERE

AWS_DEFAULT_REGION=us-east-1

############################
# Localstack
############################

# If you want to connect to localstack outside of docker
# use localhost:4566 instead
S3_ENDPOINT_URL=http://localstack:4566

############################
# S3
############################

S3_OPPORTUNITY_BUCKET=local-opportunities

############################
# Feature Flags
############################
Expand Down
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ db-migrate-down-all = "src.db.migrations.run:downall"
db-seed-local = "tests.lib.seed_local_db:seed_local_db"
create-erds = "bin.create_erds:main"
setup-postgres-db = "src.db.migrations.setup_local_postgres_db:setup_local_postgres_db"

setup-localstack = "bin.setup_localstack:main"

[tool.black]
line-length = 100
Expand Down
3 changes: 3 additions & 0 deletions api/src/adapters/aws/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .s3_adapter import S3Config, get_s3_client

__all__ = ["get_s3_client", "S3Config"]
30 changes: 30 additions & 0 deletions api/src/adapters/aws/s3_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import boto3
import botocore.client

from src.util.env_config import PydanticBaseEnvConfig


class S3Config(PydanticBaseEnvConfig):
# We should generally not need to set this except
# locally to use localstack
s3_endpoint_url: str | None = None

### S3 Buckets
# note that we default these to None
# so that we don't need to set all of these for every
# process that uses S3

# TODO - I'm not sure how we want to organize our
# s3 buckets so this will likely change in the future
s3_opportunity_bucket: str | None = None


def get_s3_client(s3_config: S3Config | None = None) -> botocore.client.BaseClient:
if s3_config is None:
s3_config = S3Config()

params = {}
if s3_config.s3_endpoint_url is not None:
params["endpoint_url"] = s3_config.s3_endpoint_url

return boto3.client("s3", **params)
15 changes: 0 additions & 15 deletions api/src/util/file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from typing import Any, Optional, Tuple
from urllib.parse import urlparse

import boto3
import botocore
import smart_open
from botocore.config import Config

Expand Down Expand Up @@ -40,19 +38,6 @@ def join(*parts: str) -> str:
return os.path.join(*parts)


##################################
# S3 Utilities
##################################


def get_s3_client(boto_session: Optional[boto3.Session] = None) -> botocore.client.BaseClient:
"""Returns an S3 client, wrapping around boiler plate if you already have a session"""
if boto_session:
return boto_session.client("s3")

return boto3.client("s3")


##################################
# File operations
##################################
Expand Down

0 comments on commit 8464504

Please sign in to comment.