diff --git a/.env.example b/.env.example index 849f661368..e7a20bbd2b 100644 --- a/.env.example +++ b/.env.example @@ -23,6 +23,13 @@ OSM_SCOPE=read_prefs OSM_LOGIN_REDIRECT_URI=http://127.0.0.1:8080/osmauth/ OSM_SECRET_KEY=xxx +### S3 File Storage ### +S3_ENDPOINT="http://s3:9000" +S3_ACCESS_KEY="fmtm" +S3_SECRET_KEY="somelongpassword" +S3_BUCKET_NAME_BASEMAPS="basemaps" +S3_BUCKET_NAME_OVERLAYS="overlays" + ### Database (optional) ### CENTRAL_DB_HOST=central-db CENTRAL_DB_USER=odk diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml index 1bc20f936d..7584fd6446 100644 --- a/.github/workflows/build_and_deploy.yml +++ b/.github/workflows/build_and_deploy.yml @@ -102,6 +102,8 @@ jobs: # -e OSM_CLIENT_ID="test" \ # -e OSM_CLIENT_SECRET="test" \ # -e OSM_SECRET_KEY="test" \ + # -e S3_ACCESS_KEY="fmtm" \ + # -e S3_SECRET_KEY="somelongpassword" \ # "ghcr.io/hotosm/fmtm/backend:${API_VERSION}-${GIT_BRANCH}" # # First wait 10 seconds for API diff --git a/.github/workflows/r-pytest.yml b/.github/workflows/r-pytest.yml index 5a2a4c502e..bd5bf87b85 100644 --- a/.github/workflows/r-pytest.yml +++ b/.github/workflows/r-pytest.yml @@ -78,6 +78,8 @@ jobs: OSM_CLIENT_ID: "${{ secrets.OSM_CLIENT_ID }}" OSM_CLIENT_SECRET: "${{ secrets.OSM_CLIENT_SECRET }}" OSM_SECRET_KEY: "${{ secrets.OSM_SECRET_KEY }}" + S3_ACCESS_KEY: "fmtm" + S3_SECRET_KEY: "somelongpassword" run: | echo "DEBUG=${DEBUG}" >> .env echo "LOG_LEVEL=${LOG_LEVEL}" >> .env @@ -89,6 +91,8 @@ jobs: echo "OSM_CLIENT_ID=${OSM_CLIENT_ID}" >> .env echo "OSM_CLIENT_SECRET=${OSM_CLIENT_SECRET}" >> .env echo "OSM_SECRET_KEY=${OSM_SECRET_KEY}" >> .env + echo "S3_ACCESS_KEY=${S3_ACCESS_KEY}" >> .env + echo "S3_SECRET_KEY=${S3_SECRET_KEY}" >> .env - name: Run PyTest run: | diff --git a/INSTALL.md b/INSTALL.md index 1e297b356d..ddf3d760ad 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -100,6 +100,11 @@ OSM_SCOPE=read_prefs OSM_LOGIN_REDIRECT_URI=http://127.0.0.1:8080/osmauth/ OSM_SECRET_KEY= +### S3 File Storage ### +S3_ENDPOINT="http://s3:9000" +S3_ACCESS_KEY=`` +S3_SECRET_KEY=`` + ### Database (optional) ### CENTRAL_DB_HOST=central-db CENTRAL_DB_USER=odk diff --git a/docker-compose.noodk.yml b/docker-compose.noodk.yml index 61f2ea8260..e46355c2b5 100644 --- a/docker-compose.noodk.yml +++ b/docker-compose.noodk.yml @@ -19,6 +19,7 @@ version: "3" volumes: fmtm_db_data: + fmtm_data: fmtm_logs: fmtm_images: fmtm_tiles: @@ -103,3 +104,20 @@ services: networks: - fmtm-dev restart: "unless-stopped" + + s3: + image: "docker.io/minio/minio:RELEASE.2023-10-07T15-07-38Z" + container_name: fmtm_s3 + environment: + MINIO_ROOT_USER: ${S3_ACCESS_KEY:-fmtm} + MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY:-somelongpassword} + MINIO_VOLUMES: "/mnt/data" + MINIO_BROWSER: off + volumes: + - fmtm_data:/mnt/data + ports: + - 9000:9000 + networks: + - fmtm-dev + command: minio server # --console-address ":9090" + restart: "unless-stopped" diff --git a/docker-compose.yml b/docker-compose.yml index 713aebeb5f..73309e19e6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,11 +18,12 @@ version: "3" volumes: + fmtm_data: fmtm_db_data: + central_db_data: fmtm_logs: fmtm_images: fmtm_tiles: - central_db_data: networks: fmtm-dev: @@ -67,6 +68,7 @@ services: depends_on: - fmtm-db - migrations + - s3 - central-proxy env_file: - .env @@ -180,3 +182,20 @@ services: networks: - fmtm-dev restart: "unless-stopped" + + s3: + image: "docker.io/minio/minio:RELEASE.2023-10-07T15-07-38Z" + container_name: fmtm_s3 + environment: + MINIO_ROOT_USER: ${S3_ACCESS_KEY:-fmtm} + MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY:-somelongpassword} + MINIO_VOLUMES: "/mnt/data" + MINIO_BROWSER: off + volumes: + - fmtm_data:/mnt/data + ports: + - 9000:9000 + networks: + - fmtm-dev + command: minio server # --console-address ":9090" + restart: "unless-stopped" diff --git a/docs/dev/Production.md b/docs/dev/Production.md index 6fdf307019..2a8d8812e3 100644 --- a/docs/dev/Production.md +++ b/docs/dev/Production.md @@ -70,6 +70,11 @@ that file to contain the needful (it should look like this): OSM_LOGIN_REDIRECT_URI=``/osmauth/ OSM_SECRET_KEY=`` + ### S3 File Storage ### + S3_ENDPOINT="http://s3:9000" + S3_ACCESS_KEY=`` + S3_SECRET_KEY=`` + FMTM_DB_HOST=fmtm-db FMTM_DB_USER=fmtm FMTM_DB_PASSWORD=`` diff --git a/docs/dev/Setup.md b/docs/dev/Setup.md index 03b0faf4d6..eec20af1de 100644 --- a/docs/dev/Setup.md +++ b/docs/dev/Setup.md @@ -280,6 +280,11 @@ OSM_SCOPE=read_prefs OSM_LOGIN_REDIRECT_URI=http://127.0.0.1:8080/osmauth/ OSM_SECRET_KEY= +### S3 File Storage ### +S3_ENDPOINT="http://s3:9000" +S3_ACCESS_KEY=`` +S3_SECRET_KEY=`` + ### Database (optional) ### CENTRAL_DB_HOST=central-db CENTRAL_DB_USER=odk diff --git a/docs/dev/Troubleshooting.md b/docs/dev/Troubleshooting.md index c257922f54..290edaf7e0 100644 --- a/docs/dev/Troubleshooting.md +++ b/docs/dev/Troubleshooting.md @@ -44,5 +44,6 @@ an alternative can be to feed them into the pdm command: ```bash FRONTEND_MAIN_URL="" \ OSM_CLIENT_ID="" OSM_CLIENT_SECRET="" OSM_SECRET_KEY="" \ +S3_ACCESS_KEY="" S3_SECRET_KEY="" \ pdm run uvicorn app.main:api --host 0.0.0.0 --port 8000 ``` diff --git a/src/backend/app-entrypoint.sh b/src/backend/app-entrypoint.sh index 259e74c550..f6ecf5af64 100644 --- a/src/backend/app-entrypoint.sh +++ b/src/backend/app-entrypoint.sh @@ -19,5 +19,33 @@ wait_for_db() { exit 1 # Exit with an error code } -wait_for_db +wait_for_s3() { + max_retries=30 + retry_interval=5 + + for ((i = 0; i < max_retries; i++)); do + if curl --silent -I ${S3_ENDPOINT} >/dev/null; then + echo "S3 is available." + return 0 # S3 is available, exit successfully + fi + echo "S3 is not yet available. Retrying in ${retry_interval} seconds..." + sleep ${retry_interval} + done + + echo "Timed out waiting for S3 to become available." + exit 1 # Exit with an error code +} + +create_s3_buckets() { + echo "Running init_s3_buckets.py script main function" + python /opt/app/s3.py +} + +# Start wait in background with tmp log files +wait_for_db & +wait_for_s3 & +# Wait until checks complete +wait + +create_s3_buckets exec "$@" diff --git a/src/backend/app/config.py b/src/backend/app/config.py index fe4b525775..7ceb234468 100644 --- a/src/backend/app/config.py +++ b/src/backend/app/config.py @@ -107,6 +107,12 @@ def assemble_db_connection(cls, v: Optional[str], info: FieldValidationInfo) -> OSM_SCOPE: str = "read_prefs" OSM_LOGIN_REDIRECT_URI: str = "http://127.0.0.1:8080/osmauth/" + S3_ENDPOINT: str = "http://s3:9000" + S3_ACCESS_KEY: str + S3_SECRET_KEY: str + S3_BUCKET_NAME_BASEMAPS: str = "basemaps" + S3_BUCKET_NAME_OVERLAYS: str = "overlays" + UNDERPASS_API_URL: str = "https://raw-data-api0.hotosm.org/v1" SENTRY_DSN: Optional[str] = None diff --git a/src/backend/app/s3.py b/src/backend/app/s3.py new file mode 100644 index 0000000000..855d53fff1 --- /dev/null +++ b/src/backend/app/s3.py @@ -0,0 +1,139 @@ +"""Initialise the S3 buckets for FMTM to function.""" + +import sys +from io import BytesIO + +from loguru import logger as log +from minio import Minio + +from app.config import settings + + +def s3_client(): + """Return the initialised S3 client with credentials.""" + minio_url, is_secure = is_connection_secure(settings.S3_ENDPOINT) + + log.debug("Connecting to Minio S3 server") + return Minio( + minio_url, + settings.S3_ACCESS_KEY, + settings.S3_SECRET_KEY, + secure=is_secure, + ) + + +def add_file_to_bucket(bucket_name: str, file_path: str, s3_path: str): + """Upload a file from the filesystem to an S3 bucket. + + Args: + bucket_name (str): The name of the S3 bucket. + file_path (str): The path to the file on the local filesystem. + s3_path (str): The path in the S3 bucket where the file will be stored. + """ + client = s3_client() + client.fput_object(bucket_name, file_path, s3_path) + + +def add_obj_to_bucket(bucket_name: str, file_obj: BytesIO, s3_path: str): + """Upload a BytesIO object to an S3 bucket. + + Args: + bucket_name (str): The name of the S3 bucket. + file_obj (BytesIO): A BytesIO object containing the data to be uploaded. + s3_path (str): The path in the S3 bucket where the data will be stored. + """ + client = s3_client() + result = client.put_object(bucket_name, file_obj, s3_path) + log.debug( + f"Created {result.object_name} object; etag: {result.etag}, " + f"version-id: {result.version_id}" + ) + + +def get_file_from_bucket(bucket_name: str, s3_path: str, file_path: str): + """Download a file from an S3 bucket and save it to the local filesystem. + + Args: + bucket_name (str): The name of the S3 bucket. + s3_path (str): The path to the file in the S3 bucket. + file_path (str): The path on the local filesystem where the S3 + file will be saved. + """ + client = s3_client() + client.fget_object(bucket_name, s3_path, file_path) + + +def get_obj_from_bucket(bucket_name: str, s3_path: str) -> BytesIO: + """Download an S3 object from a bucket and return it as a BytesIO object. + + Args: + bucket_name (str): The name of the S3 bucket. + s3_path (str): The path to the S3 object in the bucket. + + Returns: + BytesIO: A BytesIO object containing the content of the downloaded S3 object. + """ + client = s3_client() + try: + response = client.get_object(bucket_name, s3_path) + return BytesIO(response.read()) + finally: + response.close() + response.release_conn() + + +def create_bucket_if_not_exists(client: Minio, bucket_name: str): + """Checks if a bucket exits, else creates it.""" + if not client.bucket_exists(bucket_name): + log.info(f"Creating S3 bucket: {bucket_name}") + client.make_bucket(bucket_name) + else: + log.debug(f"S3 bucket already exists: {bucket_name}") + + +def is_connection_secure(minio_url: str): + """Determine from URL string if is http or https.""" + if minio_url.startswith("http://"): + secure = False + stripped_url = minio_url[len("http://") :] + log.warning("S3 URL is insecure (ignore if on devserver)") + + elif minio_url.startswith("https://"): + secure = True + stripped_url = minio_url[len("https://") :] + + else: + err = ( + "The S3_ENDPOINT is set incorrectly. " + "It must start with http:// or https://" + ) + log.error(err) + raise ValueError(err) + + return stripped_url, secure + + +def startup_init_buckets(): + """Wrapper to create defined buckets at startup.""" + # Logging + log.remove() + log.add( + sys.stderr, + level=settings.LOG_LEVEL, + format=( + "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} " + "| {name}:{function}:{line} | {message}" + ), + colorize=True, + backtrace=True, # More detailed tracebacks + catch=True, # Prevent app crashes + ) + + # Init S3 Buckets + client = s3_client() + create_bucket_if_not_exists(client, settings.S3_BUCKET_NAME_BASEMAPS) + create_bucket_if_not_exists(client, settings.S3_BUCKET_NAME_OVERLAYS) + + +if __name__ == "__main__": + startup_init_buckets() diff --git a/src/backend/pdm.lock b/src/backend/pdm.lock index 4d38030760..a440777665 100644 --- a/src/backend/pdm.lock +++ b/src/backend/pdm.lock @@ -6,7 +6,7 @@ groups = ["default", "test", "dev", "docs", "debug"] cross_platform = true static_urls = false lock_version = "4.3" -content_hash = "sha256:58849917d219600e52b8f65aadfb900919883b3eaff54b30ba07ce6f48654a12" +content_hash = "sha256:26442935d8f062b7a5d6ceeaf544bab11d9099cd47a19c17df8f3d3415a32bfb" [[package]] name = "annotated-types" @@ -772,6 +772,19 @@ files = [ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, ] +[[package]] +name = "minio" +version = "7.1.17" +summary = "MinIO Python SDK for Amazon S3 Compatible Cloud Storage" +dependencies = [ + "certifi", + "urllib3", +] +files = [ + {file = "minio-7.1.17-py3-none-any.whl", hash = "sha256:0aa525d77a3bc61378444c2400b0ba2685ad4cd6ecb3fba4141a0d0765e25f40"}, + {file = "minio-7.1.17.tar.gz", hash = "sha256:b0b687c1ec9be422a1f8b04c65fb8e43a1c090f9508178db57c434a17341c404"}, +] + [[package]] name = "mkdocs" version = "1.5.3" diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml index aed1f320d5..a85f0f4ac4 100644 --- a/src/backend/pyproject.toml +++ b/src/backend/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "osm-login-python==1.0.1", "osm-fieldwork==0.3.6", "osm-rawdata==0.1.3", + "minio>=7.1.17", ] requires-python = ">=3.10,<3.12" readme = "../../README.md"