From c1951251c54927ae130c9c790340a377306ffc8a Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Fri, 20 Dec 2024 16:25:34 +0300 Subject: [PATCH 1/9] feat: BI-5952 WIP file-uploader-api handler for processing presigned file from bucket --- .../dl_file_uploader_api_lib/schemas/files.py | 5 ++ .../dl_file_uploader_api_lib/views/files.py | 51 ++++++++++++++++++- .../db/test_files_api.py | 12 ++++- .../ext/test_update_data_gsheets.py | 2 +- .../ext/test_update_data_yadocs.py | 2 +- .../ext/test_yadocs.py | 2 +- lib/dl_s3/dl_s3/utils.py | 21 ++++++++ lib/dl_testing/dl_testing/s3_utils.py | 13 ----- 8 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 lib/dl_s3/dl_s3/utils.py diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/schemas/files.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/schemas/files.py index a60f731f6..179ae198e 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/schemas/files.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/schemas/files.py @@ -76,6 +76,11 @@ class MakePresignedUrlRequestSchema(ma.Schema): content_md5 = ma.fields.String(required=True) +class DownloadPresignedUrlRequestSchema(ma.Schema): + filename = ma.fields.String(required=True) + key = ma.fields.String(required=True) + + class PresignedUrlSchema(ma.Schema): class PresignedUrlFields(ma.Schema): class Meta: diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py index eeea0c7c0..8932e50d4 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py @@ -13,6 +13,7 @@ from aiohttp import web from aiohttp.multipart import BodyPartReader +from dl_s3.utils import s3_file_exists from redis.asyncio.lock import Lock as RedisLock from dl_api_commons.aiohttp.aiohttp_wrappers import ( @@ -53,6 +54,8 @@ LOGGER = logging.getLogger(__name__) +S3_KEY_PARTS_SEPARATOR = "--" # used to separate author user_id from the rest of the s3 object key to sign it + def get_file_type_from_name( filename: Optional[str], @@ -136,7 +139,10 @@ async def post(self) -> web.StreamResponse: content_md5: str = req_data["content_md5"] s3 = self.dl_request.get_s3_service() - s3_key = "{}_{}".format(self.dl_request.rci.user_id or "unknown", str(uuid.uuid4())) + s3_key = S3_KEY_PARTS_SEPARATOR.join(( + self.dl_request.rci.user_id or "unknown", + str(uuid.uuid4()), + )) url = await s3.client.generate_presigned_post( Bucket=s3.tmp_bucket_name, @@ -154,6 +160,49 @@ async def post(self) -> web.StreamResponse: ) +class DownloadPresignedUrlView(FileUploaderBaseView): + async def post(self) -> web.StreamResponse: + req_data = await self._load_post_request_schema_data(files_schemas.DownloadPresignedUrlRequestSchema) + filename: str = req_data["filename"] + key: str = req_data["key"] + + file_type = get_file_type_from_name(filename=filename, allow_xlsx=self.request.app["ALLOW_XLSX"]) + + s3 = self.dl_request.get_s3_service() + + file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, key) + if not file_exists: + raise exc.DocumentNotFound() + + user_id_from_key = key.split(S3_KEY_PARTS_SEPARATOR)[0] + if user_id_from_key != self.dl_request.rci.user_id: + exc.PermissionDenied() + + rmm = self.dl_request.get_redis_model_manager() + dfile = DataFile( + manager=rmm, + filename=filename, + file_type=file_type, + status=FileProcessingStatus.in_progress, + ) + LOGGER.info(f"Data file id: {dfile.id}") + + await dfile.save() + + task_processor = self.dl_request.get_task_processor() + if file_type == FileType.xlsx: + await task_processor.schedule(ProcessExcelTask(file_id=dfile.id)) + LOGGER.info(f"Scheduled ProcessExcelTask for file_id {dfile.id}") + else: + await task_processor.schedule(ParseFileTask(file_id=dfile.id)) + LOGGER.info(f"Scheduled ParseFileTask for file_id {dfile.id}") + + return web.json_response( + files_schemas.FileUploadResponseSchema().dump({"file_id": dfile.id, "title": dfile.filename}), + status=HTTPStatus.CREATED, + ) + + class LinksView(FileUploaderBaseView): REQUIRED_RESOURCES: ClassVar[frozenset[RequiredResource]] = frozenset() # Don't skip CSRF check diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py index 81b2882a6..0ee5740eb 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py @@ -4,6 +4,7 @@ import uuid import attr +from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR import pytest from dl_api_commons.base_models import RequestContextInfo @@ -28,7 +29,7 @@ async def test_file_upload_cors(fu_client, s3_tmp_bucket, upload_file_req): @pytest.mark.asyncio -async def test_make_presigned_url(fu_client, s3_tmp_bucket): +async def test_make_presigned_url(fu_client, s3_tmp_bucket, rci): expected_url_fields = ("key", "x-amz-algorithm", "x-amz-credential", "x-amz-date", "policy", "x-amz-signature") resp = await fu_client.make_request(ReqBuilder.presigned_url("mymd5")) @@ -36,6 +37,15 @@ async def test_make_presigned_url(fu_client, s3_tmp_bucket): assert "url" in resp.json, resp.json assert "fields" in resp.json, resp.json assert all(field in resp.json["fields"] for field in expected_url_fields), resp.json + key = resp.json["fields"]["key"] + key_parts = key.split(S3_KEY_PARTS_SEPARATOR) + assert len(key_parts) == 2, key_parts + assert key_parts[0] == rci.user_id + + +@pytest.mark.asyncio +async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci): + # TODO test @pytest.mark.asyncio diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_gsheets.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_gsheets.py index 7b1d799b9..e1d59880e 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_gsheets.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_gsheets.py @@ -15,7 +15,7 @@ from dl_core_testing.connection import make_conn_key from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder from dl_file_uploader_lib import exc -from dl_testing.s3_utils import s3_file_exists +from dl_s3.utils import s3_file_exists from dl_connector_bundle_chs3.chs3_gsheets.core.constants import CONNECTION_TYPE_GSHEETS_V2 from dl_connector_bundle_chs3.chs3_gsheets.core.lifecycle import GSheetsFileS3ConnectionLifecycleManager diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_yadocs.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_yadocs.py index 98ea488f5..cce64cfd2 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_yadocs.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/ext/test_update_data_yadocs.py @@ -15,7 +15,7 @@ from dl_core_testing.connection import make_conn_key from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder from dl_file_uploader_lib import exc -from dl_testing.s3_utils import s3_file_exists +from dl_s3.utils import s3_file_exists from dl_connector_bundle_chs3.chs3_yadocs.core.constants import CONNECTION_TYPE_YADOCS from dl_connector_bundle_chs3.chs3_yadocs.core.lifecycle import YaDocsFileS3ConnectionLifecycleManager diff --git a/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py b/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py index 934e784a6..6f6ad5fcc 100644 --- a/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py +++ b/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py @@ -8,7 +8,7 @@ ) from dl_file_uploader_task_interface.tasks import DownloadYaDocsTask from dl_task_processor.state import wait_task -from dl_testing.s3_utils import s3_file_exists +from dl_s3.utils import s3_file_exists @pytest.mark.asyncio diff --git a/lib/dl_s3/dl_s3/utils.py b/lib/dl_s3/dl_s3/utils.py new file mode 100644 index 000000000..9f06913eb --- /dev/null +++ b/lib/dl_s3/dl_s3/utils.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import botocore.exceptions + +if TYPE_CHECKING: + from types_aiobotocore_s3 import S3Client as AsyncS3Client + + +async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool: + try: + s3_resp = await s3_client.head_object( + Bucket=bucket, + Key=key, + ) + except botocore.exceptions.ClientError as ex: + if ex.response["ResponseMetadata"]["HTTPStatusCode"] == 404: + return False + raise + return s3_resp["ResponseMetadata"]["HTTPStatusCode"] == 200 diff --git a/lib/dl_testing/dl_testing/s3_utils.py b/lib/dl_testing/dl_testing/s3_utils.py index 6cac0791f..f3327ae04 100644 --- a/lib/dl_testing/dl_testing/s3_utils.py +++ b/lib/dl_testing/dl_testing/s3_utils.py @@ -83,19 +83,6 @@ async def get_lc_rules_number(s3_client: AsyncS3Client, bucket: str) -> int: return len(lc_config["Rules"]) -async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool: - try: - s3_resp = await s3_client.head_object( - Bucket=bucket, - Key=key, - ) - except botocore.exceptions.ClientError as ex: - if ex.response["ResponseMetadata"]["HTTPStatusCode"] == 404: - return False - raise - return s3_resp["ResponseMetadata"]["HTTPStatusCode"] == 200 - - S3_TBL_FUNC_TEMPLATE = """s3( '{s3_endpoint}/{bucket}/{filename}', '{key_id}', From 2cccae9ccefb9d0a72c4ef49fa6b8b0c37902272 Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Tue, 24 Dec 2024 19:49:42 +0300 Subject: [PATCH 2/9] fixes & tests --- .../dl_file_uploader_api_lib/app.py | 1 + .../dl_file_uploader_api_lib/views/files.py | 19 +++++--- .../conftest.py | 8 +++- .../db/conftest.py | 37 +++++++++++---- .../db/test_files_api.py | 46 +++++++++++++++++-- .../req_builder.py | 12 +++++ .../docker-compose.yml | 13 +++--- .../redis_model/models/models.py | 4 +- lib/dl_s3/dl_s3/s3_service.py | 2 +- lib/dl_s3/dl_s3/utils.py | 27 ++++++++++- 10 files changed, 136 insertions(+), 33 deletions(-) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py index 06fc4723e..6a98e65ef 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/app.py @@ -137,6 +137,7 @@ def create_app(self, app_version: str) -> web.Application: app.router.add_route("post", "/api/v2/files", files_views.FilesView) app.router.add_route("post", "/api/v2/make_presigned_url", files_views.MakePresignedUrlView) + app.router.add_route("post", "/api/v2/download_presigned_url", files_views.DownloadPresignedUrlView) app.router.add_route("post", "/api/v2/links", files_views.LinksView) app.router.add_route("post", "/api/v2/documents", files_views.DocumentsView) app.router.add_route("post", "/api/v2/update_connection_data", files_views.UpdateConnectionDataView) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py index 8932e50d4..283e15771 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py @@ -54,7 +54,7 @@ LOGGER = logging.getLogger(__name__) -S3_KEY_PARTS_SEPARATOR = "--" # used to separate author user_id from the rest of the s3 object key to sign it +S3_KEY_PARTS_SEPARATOR = "/" # used to separate author user_id from the rest of the s3 object key to sign it def get_file_type_from_name( @@ -134,6 +134,10 @@ async def _chunk_iter(chunk_size: int = 10 * 1024 * 1024) -> AsyncGenerator[byte class MakePresignedUrlView(FileUploaderBaseView): + PRESIGNED_URL_EXPIRATION_SECONDS: ClassVar[int] = 60 * 60 # 1 hour + PRESIGNED_URL_MIN_BYTES: ClassVar[int] = 1 + PRESIGNED_URL_MAX_BYTES: ClassVar[int] = 200 * 1024 ** 2 # 200 MB + async def post(self) -> web.StreamResponse: req_data = await self._load_post_request_schema_data(files_schemas.MakePresignedUrlRequestSchema) content_md5: str = req_data["content_md5"] @@ -147,9 +151,9 @@ async def post(self) -> web.StreamResponse: url = await s3.client.generate_presigned_post( Bucket=s3.tmp_bucket_name, Key=s3_key, - ExpiresIn=60 * 60, # 1 hour + ExpiresIn=self.PRESIGNED_URL_EXPIRATION_SECONDS, Conditions=[ - ["content-length-range", 1, 200 * 1024 * 1024], # 1B .. 200MB # TODO use constant from DataSink + ["content-length-range", self.PRESIGNED_URL_MIN_BYTES, self.PRESIGNED_URL_MAX_BYTES], {"Content-MD5": content_md5}, ], ) @@ -164,22 +168,23 @@ class DownloadPresignedUrlView(FileUploaderBaseView): async def post(self) -> web.StreamResponse: req_data = await self._load_post_request_schema_data(files_schemas.DownloadPresignedUrlRequestSchema) filename: str = req_data["filename"] - key: str = req_data["key"] + s3_key: str = req_data["key"] file_type = get_file_type_from_name(filename=filename, allow_xlsx=self.request.app["ALLOW_XLSX"]) s3 = self.dl_request.get_s3_service() - file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, key) + file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, s3_key) if not file_exists: raise exc.DocumentNotFound() - user_id_from_key = key.split(S3_KEY_PARTS_SEPARATOR)[0] - if user_id_from_key != self.dl_request.rci.user_id: + s3_key_parts = s3_key.split(S3_KEY_PARTS_SEPARATOR) + if len(s3_key_parts) != 2 or s3_key_parts[0] != self.dl_request.rci.user_id: exc.PermissionDenied() rmm = self.dl_request.get_redis_model_manager() dfile = DataFile( + s3_key=s3_key, manager=rmm, filename=filename, file_type=file_type, diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py index 0c844d977..546f74c9d 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py @@ -98,6 +98,10 @@ pass +_TESTS_USER_ID = "_the_tests_asyncapp_user_id_" +_TESTS_USER_NAME = "_the_tests_asyncapp_user_name_" + + def pytest_configure(config: Any) -> None: # noqa common_pytest_configure(tracing_service_name="tests_bi_file_uploader") @@ -215,8 +219,8 @@ class TestingFileUploaderApiAppFactory(FileUploaderApiAppFactory[FileUploaderAPI def get_auth_middlewares(self) -> list[Middleware]: return [ auth_trust_middleware( - fake_user_id="_the_tests_file_uploader_api_user_id_", - fake_user_name="_the_tests_file_uploader_api_user_name_", + fake_user_id=_TESTS_USER_ID, + fake_user_name=_TESTS_USER_NAME, ) ] diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py index ae254b5c7..30ce34216 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py @@ -1,7 +1,10 @@ import asyncio +import base64 +import hashlib import logging import os +from dl_s3.utils import upload_to_s3_by_presigned import pytest from dl_api_commons.client.common import Req @@ -64,21 +67,39 @@ def upload_file_req_12mb() -> Req: @pytest.fixture(scope="function") -async def uploaded_file_id(s3_tmp_bucket, fu_client, upload_file_req) -> str: - resp = await fu_client.make_request(upload_file_req) - assert resp.status == 201 +async def uploaded_file_id(s3_tmp_bucket, fu_client, csv_data) -> str: + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) + assert upload_resp.status == 204 + + download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv")) + assert download_resp.status == 201, download_resp.json + + assert download_resp.status == 201 await asyncio.sleep(3) - return resp.json["file_id"] + return download_resp.json["file_id"] @pytest.fixture(scope="function") async def uploaded_excel_id( s3_tmp_bucket, fu_client, - upload_excel_req, + excel_data, reader_app, ) -> str: - resp = await fu_client.make_request(upload_excel_req) - assert resp.status == 201 + content_md5 = base64.b64encode(hashlib.md5(excel_data).digest()).decode("utf-8") + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, excel_data) + assert upload_resp.status == 204 + + download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "data.xlsx")) + assert download_resp.status == 201, download_resp.json + + assert download_resp.status == 201 await asyncio.sleep(3) - return resp.json["file_id"] + return download_resp.json["file_id"] diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py index 0ee5740eb..d92db464e 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py @@ -1,10 +1,16 @@ import asyncio import http +import io import json import uuid +import hashlib +from urllib.parse import urlparse +import base64 +import aiohttp import attr -from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR +from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR, MakePresignedUrlView +from dl_s3.utils import upload_to_s3_by_presigned import pytest from dl_api_commons.base_models import RequestContextInfo @@ -44,8 +50,42 @@ async def test_make_presigned_url(fu_client, s3_tmp_bucket, rci): @pytest.mark.asyncio -async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci): - # TODO test +async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci, csv_data): + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) + upload_resp_data = await upload_resp.read() + assert upload_resp.status == 204, upload_resp_data + + download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv")) + assert download_resp.status == 201, download_resp.json + + +async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data): + monkeypatch.setattr(MakePresignedUrlView, "PRESIGNED_URL_MAX_BYTES", 32) + + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + with pytest.raises(aiohttp.ClientResponseError): + await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) + + +async def test_upload_presigned_bad_key(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data): + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + presigned_url_resp.json["fields"]["key"] = "hacker/file" + + with pytest.raises(aiohttp.ClientResponseError): + await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) @pytest.mark.asyncio diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py index 6c24ee012..96f8ab7ac 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/req_builder.py @@ -109,6 +109,18 @@ def presigned_url(cls, content_md5: str, *, require_ok: bool = True) -> Req: require_ok=require_ok, ) + @classmethod + def presigned_url_download(cls, key: str, filename: str, *, require_ok: bool = True) -> Req: + return Req( + method="post", + url="/api/v2/download_presigned_url", + data_json={ + "key": key, + "filename": filename, + }, + require_ok=require_ok, + ) + @classmethod def file_status(cls, file_id: str) -> Req: return Req( diff --git a/lib/dl_file_uploader_api_lib/docker-compose.yml b/lib/dl_file_uploader_api_lib/docker-compose.yml index 6f13199cf..ab9402f0e 100644 --- a/lib/dl_file_uploader_api_lib/docker-compose.yml +++ b/lib/dl_file_uploader_api_lib/docker-compose.yml @@ -11,15 +11,14 @@ services: - 51404:6379 s3-storage: - build: - context: ../testenv-common/images - dockerfile: Dockerfile.s3-storage - command: bash /data/entrypoint.sh + image: minio/minio:RELEASE.2024-12-18T13-15-44Z@sha256:1dce27c494a16bae114774f1cec295493f3613142713130c2d22dd5696be6ad3 environment: - S3BACKEND: "mem" - REMOTE_MANAGEMENT_DISABLE: 1 + MINIO_ROOT_USER: accessKey1 + MINIO_ROOT_PASSWORD: verySecretKey1 + MINIO_DOMAIN: local + command: server /export ports: - - 51420:8000 + - "51420:9000" init-db: depends_on: diff --git a/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py b/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py index 468e2ddf5..790a7f2d2 100644 --- a/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py +++ b/lib/dl_file_uploader_lib/dl_file_uploader_lib/redis_model/models/models.py @@ -202,9 +202,7 @@ class DataFile(RedisModelUserIdAuth): def s3_key_old(self) -> str: # transition from s3_key generated by self.id to stored self.s3_key, to be removed in future releases # see also: DataFileSchema - if self.s3_key is not None: - return self.s3_key - return self.id + return self.s3_key or self.id def get_secret_keys(self) -> set[DataKey]: if self.user_source_properties is None: diff --git a/lib/dl_s3/dl_s3/s3_service.py b/lib/dl_s3/dl_s3/s3_service.py index eec46e4da..20a01a2ba 100644 --- a/lib/dl_s3/dl_s3/s3_service.py +++ b/lib/dl_s3/dl_s3/s3_service.py @@ -60,7 +60,7 @@ async def initialize(self) -> None: aws_access_key_id=self._access_key_id, aws_secret_access_key=self._secret_access_key, endpoint_url=self._endpoint_url, - config=AioConfig(signature_version="s3v4"), + config=AioConfig(signature_version="s3v4"), # v4 signature is required to generate presigned URLs with restriction policies ) session = get_session() diff --git a/lib/dl_s3/dl_s3/utils.py b/lib/dl_s3/dl_s3/utils.py index 9f06913eb..3a073f161 100644 --- a/lib/dl_s3/dl_s3/utils.py +++ b/lib/dl_s3/dl_s3/utils.py @@ -1,13 +1,36 @@ from __future__ import annotations -from typing import TYPE_CHECKING +import typing +import aiohttp import botocore.exceptions -if TYPE_CHECKING: +if typing.TYPE_CHECKING: from types_aiobotocore_s3 import S3Client as AsyncS3Client +async def upload_to_s3_by_presigned(presigned_url: dict[str, typing.Any], content_md5: str, data: str) -> aiohttp.ClientResponse: + upload_url = presigned_url["url"] + upload_url_fields = presigned_url["fields"] + upload_url_fields["content-md5"] = content_md5 + + async with aiohttp.ClientSession() as session: + with aiohttp.MultipartWriter("form-data") as mpwriter: + for k, v in upload_url_fields.items(): + part = mpwriter.append(v, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; name="{k}"'}) + part.set_content_disposition("form-data", name=k) + + part = mpwriter.append(data, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; filename="mydata"'}) + part.set_content_disposition("form-data", name="file") + + async with session.post( + url=upload_url, + data=mpwriter, + ) as resp: + resp.raise_for_status() + return resp + + async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool: try: s3_resp = await s3_client.head_object( From f434238cad69dfb0b33182519113ef2ab98aa6aa Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Tue, 24 Dec 2024 20:00:40 +0300 Subject: [PATCH 3/9] cq --- .../dl_file_uploader_api_lib/views/files.py | 14 ++++++++------ .../db/conftest.py | 10 +++++++--- .../db/test_files_api.py | 17 ++++++++++------- .../ext/test_yadocs.py | 2 +- lib/dl_s3/dl_s3/s3_service.py | 3 ++- lib/dl_s3/dl_s3/utils.py | 13 ++++++++++--- 6 files changed, 38 insertions(+), 21 deletions(-) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py index 283e15771..820afbfab 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py @@ -13,7 +13,6 @@ from aiohttp import web from aiohttp.multipart import BodyPartReader -from dl_s3.utils import s3_file_exists from redis.asyncio.lock import Lock as RedisLock from dl_api_commons.aiohttp.aiohttp_wrappers import ( @@ -50,6 +49,7 @@ ) from dl_s3.data_sink import S3RawFileAsyncDataSink from dl_s3.stream import RawBytesAsyncDataStream +from dl_s3.utils import s3_file_exists LOGGER = logging.getLogger(__name__) @@ -136,17 +136,19 @@ async def _chunk_iter(chunk_size: int = 10 * 1024 * 1024) -> AsyncGenerator[byte class MakePresignedUrlView(FileUploaderBaseView): PRESIGNED_URL_EXPIRATION_SECONDS: ClassVar[int] = 60 * 60 # 1 hour PRESIGNED_URL_MIN_BYTES: ClassVar[int] = 1 - PRESIGNED_URL_MAX_BYTES: ClassVar[int] = 200 * 1024 ** 2 # 200 MB + PRESIGNED_URL_MAX_BYTES: ClassVar[int] = 200 * 1024**2 # 200 MB async def post(self) -> web.StreamResponse: req_data = await self._load_post_request_schema_data(files_schemas.MakePresignedUrlRequestSchema) content_md5: str = req_data["content_md5"] s3 = self.dl_request.get_s3_service() - s3_key = S3_KEY_PARTS_SEPARATOR.join(( - self.dl_request.rci.user_id or "unknown", - str(uuid.uuid4()), - )) + s3_key = S3_KEY_PARTS_SEPARATOR.join( + ( + self.dl_request.rci.user_id or "unknown", + str(uuid.uuid4()), + ) + ) url = await s3.client.generate_presigned_post( Bucket=s3.tmp_bucket_name, diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py index 30ce34216..35824164c 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/conftest.py @@ -4,12 +4,12 @@ import logging import os -from dl_s3.utils import upload_to_s3_by_presigned import pytest from dl_api_commons.client.common import Req from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder from dl_file_uploader_lib.testing.data_gen import generate_sample_csv_data_str +from dl_s3.utils import upload_to_s3_by_presigned LOGGER = logging.getLogger(__name__) @@ -75,7 +75,9 @@ async def uploaded_file_id(s3_tmp_bucket, fu_client, csv_data) -> str: upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) assert upload_resp.status == 204 - download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv")) + download_resp = await fu_client.make_request( + ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv") + ) assert download_resp.status == 201, download_resp.json assert download_resp.status == 201 @@ -97,7 +99,9 @@ async def uploaded_excel_id( upload_resp = await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, excel_data) assert upload_resp.status == 204 - download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "data.xlsx")) + download_resp = await fu_client.make_request( + ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "data.xlsx") + ) assert download_resp.status == 201, download_resp.json assert download_resp.status == 201 diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py index d92db464e..381b463dc 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py @@ -1,25 +1,26 @@ import asyncio +import base64 +import hashlib import http -import io import json import uuid -import hashlib -from urllib.parse import urlparse -import base64 import aiohttp import attr -from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR, MakePresignedUrlView -from dl_s3.utils import upload_to_s3_by_presigned import pytest from dl_api_commons.base_models import RequestContextInfo from dl_configs.crypto_keys import get_dummy_crypto_keys_config from dl_constants.enums import FileProcessingStatus +from dl_file_uploader_api_lib.views.files import ( + S3_KEY_PARTS_SEPARATOR, + MakePresignedUrlView, +) from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder from dl_file_uploader_lib.redis_model.base import RedisModelManager from dl_file_uploader_lib.redis_model.models import DataFile from dl_s3.data_sink import S3RawFileAsyncDataSink +from dl_s3.utils import upload_to_s3_by_presigned @pytest.mark.asyncio @@ -60,7 +61,9 @@ async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci, csv_data): upload_resp_data = await upload_resp.read() assert upload_resp.status == 204, upload_resp_data - download_resp = await fu_client.make_request(ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv")) + download_resp = await fu_client.make_request( + ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv") + ) assert download_resp.status == 201, download_resp.json diff --git a/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py b/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py index 6f6ad5fcc..c395049f2 100644 --- a/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py +++ b/lib/dl_file_uploader_worker_lib/dl_file_uploader_worker_lib_tests/ext/test_yadocs.py @@ -7,8 +7,8 @@ YaDocsUserSourceProperties, ) from dl_file_uploader_task_interface.tasks import DownloadYaDocsTask -from dl_task_processor.state import wait_task from dl_s3.utils import s3_file_exists +from dl_task_processor.state import wait_task @pytest.mark.asyncio diff --git a/lib/dl_s3/dl_s3/s3_service.py b/lib/dl_s3/dl_s3/s3_service.py index 20a01a2ba..f68ddeb0c 100644 --- a/lib/dl_s3/dl_s3/s3_service.py +++ b/lib/dl_s3/dl_s3/s3_service.py @@ -60,7 +60,8 @@ async def initialize(self) -> None: aws_access_key_id=self._access_key_id, aws_secret_access_key=self._secret_access_key, endpoint_url=self._endpoint_url, - config=AioConfig(signature_version="s3v4"), # v4 signature is required to generate presigned URLs with restriction policies + config=AioConfig(signature_version="s3v4"), + # ^ v4 signature is required to generate presigned URLs with restriction policies ) session = get_session() diff --git a/lib/dl_s3/dl_s3/utils.py b/lib/dl_s3/dl_s3/utils.py index 3a073f161..5aa7649d2 100644 --- a/lib/dl_s3/dl_s3/utils.py +++ b/lib/dl_s3/dl_s3/utils.py @@ -5,11 +5,14 @@ import aiohttp import botocore.exceptions + if typing.TYPE_CHECKING: from types_aiobotocore_s3 import S3Client as AsyncS3Client -async def upload_to_s3_by_presigned(presigned_url: dict[str, typing.Any], content_md5: str, data: str) -> aiohttp.ClientResponse: +async def upload_to_s3_by_presigned( + presigned_url: dict[str, typing.Any], content_md5: str, data: str +) -> aiohttp.ClientResponse: upload_url = presigned_url["url"] upload_url_fields = presigned_url["fields"] upload_url_fields["content-md5"] = content_md5 @@ -17,10 +20,14 @@ async def upload_to_s3_by_presigned(presigned_url: dict[str, typing.Any], conten async with aiohttp.ClientSession() as session: with aiohttp.MultipartWriter("form-data") as mpwriter: for k, v in upload_url_fields.items(): - part = mpwriter.append(v, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; name="{k}"'}) + part = mpwriter.append( + v, {"Content-Type": "text/plain", "Content-Disposition": f'attachment; name="{k}"'} + ) part.set_content_disposition("form-data", name=k) - part = mpwriter.append(data, {'Content-Type': 'text/plain', 'Content-Disposition': f'attachment; filename="mydata"'}) + part = mpwriter.append( + data, {"Content-Type": "text/plain", "Content-Disposition": 'attachment; filename="mydata"'} + ) part.set_content_disposition("form-data", name="file") async with session.post( From e051b9e3cd20b5003fb8e5e8c3ac90dfbd95bfaf Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Tue, 24 Dec 2024 20:07:32 +0300 Subject: [PATCH 4/9] update init-db --- .../init-db/s3/docker-entrypoint-initdb.d/initialize.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh b/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh index f2f19ee70..e9c9fc695 100644 --- a/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh +++ b/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh @@ -1,7 +1,7 @@ #! /bin/bash echo 'Waiting for S3 to initialize...' -until curl -s s3-storage:8000 > /dev/null +until curl -s s3-storage:9000 > /dev/null do sleep 5 echo 'Waiting for S3 to initialize...' From 1cd632ecd348d54a5fee3671eceab66c65f1bf57 Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Tue, 24 Dec 2024 20:18:21 +0300 Subject: [PATCH 5/9] touch up s3 upload --- lib/dl_s3/dl_s3/utils.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/lib/dl_s3/dl_s3/utils.py b/lib/dl_s3/dl_s3/utils.py index 5aa7649d2..5f1f2ddb6 100644 --- a/lib/dl_s3/dl_s3/utils.py +++ b/lib/dl_s3/dl_s3/utils.py @@ -19,16 +19,10 @@ async def upload_to_s3_by_presigned( async with aiohttp.ClientSession() as session: with aiohttp.MultipartWriter("form-data") as mpwriter: - for k, v in upload_url_fields.items(): - part = mpwriter.append( - v, {"Content-Type": "text/plain", "Content-Disposition": f'attachment; name="{k}"'} - ) - part.set_content_disposition("form-data", name=k) - - part = mpwriter.append( - data, {"Content-Type": "text/plain", "Content-Disposition": 'attachment; filename="mydata"'} - ) - part.set_content_disposition("form-data", name="file") + for key, value in upload_url_fields.items(): + mpwriter.append(value, {"Content-Disposition": f'form-data; name="{key}"'}) + + mpwriter.append(data, {"Content-Disposition": 'form-data; name=file; filename="mydata"'}) async with session.post( url=upload_url, From 20ea4d68f067584f68b1d1da3598bcfd4fb09e9e Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Tue, 24 Dec 2024 23:06:08 +0300 Subject: [PATCH 6/9] update s3 port in tests --- .../dl_file_uploader_api_lib_tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py index 546f74c9d..bd4cf2b2d 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py @@ -316,7 +316,7 @@ def connectors_settings(s3_settings): ACCESS_KEY_ID=s3_settings.ACCESS_KEY_ID, SECRET_ACCESS_KEY=s3_settings.SECRET_ACCESS_KEY, BUCKET="bi-file-uploader", - S3_ENDPOINT="http://s3-storage:8000", + S3_ENDPOINT="http://s3-storage:9000", ), ) From b32829cd53bea3965110911f435f94b8da161b94 Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Wed, 25 Dec 2024 11:57:57 +0300 Subject: [PATCH 7/9] add test with bad file prefix download; fix permission denied exc; clean up fixtures --- .../dl_file_uploader_api_lib/views/files.py | 9 ++++---- .../db/test_files_api.py | 23 ++++++++++++++++--- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py index 820afbfab..0b01d73fc 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib/views/files.py @@ -174,16 +174,15 @@ async def post(self) -> web.StreamResponse: file_type = get_file_type_from_name(filename=filename, allow_xlsx=self.request.app["ALLOW_XLSX"]) - s3 = self.dl_request.get_s3_service() + s3_key_parts = s3_key.split(S3_KEY_PARTS_SEPARATOR) + if len(s3_key_parts) != 2 or s3_key_parts[0] != self.dl_request.rci.user_id: + raise exc.PermissionDenied() + s3 = self.dl_request.get_s3_service() file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, s3_key) if not file_exists: raise exc.DocumentNotFound() - s3_key_parts = s3_key.split(S3_KEY_PARTS_SEPARATOR) - if len(s3_key_parts) != 2 or s3_key_parts[0] != self.dl_request.rci.user_id: - exc.PermissionDenied() - rmm = self.dl_request.get_redis_model_manager() dfile = DataFile( s3_key=s3_key, diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py index 381b463dc..cdc3ff2f9 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py @@ -51,7 +51,7 @@ async def test_make_presigned_url(fu_client, s3_tmp_bucket, rci): @pytest.mark.asyncio -async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci, csv_data): +async def test_download_presigned_url(fu_client, s3_tmp_bucket, csv_data): content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) @@ -67,7 +67,24 @@ async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci, csv_data): assert download_resp.status == 201, download_resp.json -async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data): +@pytest.mark.asyncio +async def test_download_presigned_url_bad_user(fu_client, s3_tmp_bucket, csv_data): + content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") + + presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) + assert presigned_url_resp.status == 200, presigned_url_resp.json + + _, file_uuid = presigned_url_resp.json["fields"]["key"].split(S3_KEY_PARTS_SEPARATOR) + presigned_url_resp.json["fields"]["key"] = S3_KEY_PARTS_SEPARATOR.join(("hacker", file_uuid)) + + download_resp = await fu_client.make_request( + ReqBuilder.presigned_url_download(presigned_url_resp.json["fields"]["key"], "csv_data.csv", require_ok=False), + ) + assert download_resp.status != 201, download_resp.json + assert download_resp.json["code"] == "ERR.FILE.PERMISSION_DENIED" + + +async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bucket, csv_data): monkeypatch.setattr(MakePresignedUrlView, "PRESIGNED_URL_MAX_BYTES", 32) content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") @@ -79,7 +96,7 @@ async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bu await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) -async def test_upload_presigned_bad_key(monkeypatch, fu_client, s3_tmp_bucket, rci, csv_data): +async def test_upload_presigned_bad_key(monkeypatch, fu_client, s3_tmp_bucket, csv_data): content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") presigned_url_resp = await fu_client.make_request(ReqBuilder.presigned_url(content_md5)) From d9a83b256fb6051542160df25190259e10a9a868 Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Wed, 25 Dec 2024 11:59:27 +0300 Subject: [PATCH 8/9] async --- .../dl_file_uploader_api_lib_tests/db/test_files_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py index cdc3ff2f9..5a2e64ccf 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/db/test_files_api.py @@ -84,6 +84,7 @@ async def test_download_presigned_url_bad_user(fu_client, s3_tmp_bucket, csv_dat assert download_resp.json["code"] == "ERR.FILE.PERMISSION_DENIED" +@pytest.mark.asyncio async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bucket, csv_data): monkeypatch.setattr(MakePresignedUrlView, "PRESIGNED_URL_MAX_BYTES", 32) @@ -96,6 +97,7 @@ async def test_upload_presigned_too_large_file(monkeypatch, fu_client, s3_tmp_bu await upload_to_s3_by_presigned(presigned_url_resp.json, content_md5, csv_data) +@pytest.mark.asyncio async def test_upload_presigned_bad_key(monkeypatch, fu_client, s3_tmp_bucket, csv_data): content_md5 = base64.b64encode(hashlib.md5(csv_data.encode("utf-8")).digest()).decode("utf-8") From 9c692f95452b5a1784b6f2490c18d8d05cc370b2 Mon Sep 17 00:00:00 2001 From: KonstantAnxiety Date: Wed, 25 Dec 2024 13:58:10 +0300 Subject: [PATCH 9/9] change minio server port to 8000 --- .../dl_file_uploader_api_lib_tests/conftest.py | 2 +- lib/dl_file_uploader_api_lib/docker-compose.yml | 4 ++-- .../init-db/s3/docker-entrypoint-initdb.d/initialize.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py index bd4cf2b2d..546f74c9d 100644 --- a/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py +++ b/lib/dl_file_uploader_api_lib/dl_file_uploader_api_lib_tests/conftest.py @@ -316,7 +316,7 @@ def connectors_settings(s3_settings): ACCESS_KEY_ID=s3_settings.ACCESS_KEY_ID, SECRET_ACCESS_KEY=s3_settings.SECRET_ACCESS_KEY, BUCKET="bi-file-uploader", - S3_ENDPOINT="http://s3-storage:9000", + S3_ENDPOINT="http://s3-storage:8000", ), ) diff --git a/lib/dl_file_uploader_api_lib/docker-compose.yml b/lib/dl_file_uploader_api_lib/docker-compose.yml index ab9402f0e..fcbf6bd12 100644 --- a/lib/dl_file_uploader_api_lib/docker-compose.yml +++ b/lib/dl_file_uploader_api_lib/docker-compose.yml @@ -16,9 +16,9 @@ services: MINIO_ROOT_USER: accessKey1 MINIO_ROOT_PASSWORD: verySecretKey1 MINIO_DOMAIN: local - command: server /export + command: server --address ":8000" /export ports: - - "51420:9000" + - 51420:8000 init-db: depends_on: diff --git a/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh b/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh index e9c9fc695..f2f19ee70 100644 --- a/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh +++ b/lib/dl_file_uploader_api_lib/docker-compose/init-db/s3/docker-entrypoint-initdb.d/initialize.sh @@ -1,7 +1,7 @@ #! /bin/bash echo 'Waiting for S3 to initialize...' -until curl -s s3-storage:9000 > /dev/null +until curl -s s3-storage:8000 > /dev/null do sleep 5 echo 'Waiting for S3 to initialize...'