Skip to content

Commit

Permalink
feat: BI-5952 WIP file-uploader-api handler for processing presigned …
Browse files Browse the repository at this point in the history
…file from bucket
  • Loading branch information
KonstantAnxiety committed Dec 20, 2024
1 parent 413bacc commit c195125
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ class MakePresignedUrlRequestSchema(ma.Schema):
content_md5 = ma.fields.String(required=True)


class DownloadPresignedUrlRequestSchema(ma.Schema):
filename = ma.fields.String(required=True)
key = ma.fields.String(required=True)


class PresignedUrlSchema(ma.Schema):
class PresignedUrlFields(ma.Schema):
class Meta:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from aiohttp import web
from aiohttp.multipart import BodyPartReader
from dl_s3.utils import s3_file_exists
from redis.asyncio.lock import Lock as RedisLock

from dl_api_commons.aiohttp.aiohttp_wrappers import (
Expand Down Expand Up @@ -53,6 +54,8 @@

LOGGER = logging.getLogger(__name__)

S3_KEY_PARTS_SEPARATOR = "--" # used to separate author user_id from the rest of the s3 object key to sign it


def get_file_type_from_name(
filename: Optional[str],
Expand Down Expand Up @@ -136,7 +139,10 @@ async def post(self) -> web.StreamResponse:
content_md5: str = req_data["content_md5"]

s3 = self.dl_request.get_s3_service()
s3_key = "{}_{}".format(self.dl_request.rci.user_id or "unknown", str(uuid.uuid4()))
s3_key = S3_KEY_PARTS_SEPARATOR.join((
self.dl_request.rci.user_id or "unknown",
str(uuid.uuid4()),
))

url = await s3.client.generate_presigned_post(
Bucket=s3.tmp_bucket_name,
Expand All @@ -154,6 +160,49 @@ async def post(self) -> web.StreamResponse:
)


class DownloadPresignedUrlView(FileUploaderBaseView):
async def post(self) -> web.StreamResponse:
req_data = await self._load_post_request_schema_data(files_schemas.DownloadPresignedUrlRequestSchema)
filename: str = req_data["filename"]
key: str = req_data["key"]

file_type = get_file_type_from_name(filename=filename, allow_xlsx=self.request.app["ALLOW_XLSX"])

s3 = self.dl_request.get_s3_service()

file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, key)
if not file_exists:
raise exc.DocumentNotFound()

user_id_from_key = key.split(S3_KEY_PARTS_SEPARATOR)[0]
if user_id_from_key != self.dl_request.rci.user_id:
exc.PermissionDenied()

rmm = self.dl_request.get_redis_model_manager()
dfile = DataFile(
manager=rmm,
filename=filename,
file_type=file_type,
status=FileProcessingStatus.in_progress,
)
LOGGER.info(f"Data file id: {dfile.id}")

await dfile.save()

task_processor = self.dl_request.get_task_processor()
if file_type == FileType.xlsx:
await task_processor.schedule(ProcessExcelTask(file_id=dfile.id))
LOGGER.info(f"Scheduled ProcessExcelTask for file_id {dfile.id}")
else:
await task_processor.schedule(ParseFileTask(file_id=dfile.id))
LOGGER.info(f"Scheduled ParseFileTask for file_id {dfile.id}")

return web.json_response(
files_schemas.FileUploadResponseSchema().dump({"file_id": dfile.id, "title": dfile.filename}),
status=HTTPStatus.CREATED,
)


class LinksView(FileUploaderBaseView):
REQUIRED_RESOURCES: ClassVar[frozenset[RequiredResource]] = frozenset() # Don't skip CSRF check

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import uuid

import attr
from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR
import pytest

from dl_api_commons.base_models import RequestContextInfo
Expand All @@ -28,14 +29,23 @@ async def test_file_upload_cors(fu_client, s3_tmp_bucket, upload_file_req):


@pytest.mark.asyncio
async def test_make_presigned_url(fu_client, s3_tmp_bucket):
async def test_make_presigned_url(fu_client, s3_tmp_bucket, rci):
expected_url_fields = ("key", "x-amz-algorithm", "x-amz-credential", "x-amz-date", "policy", "x-amz-signature")

resp = await fu_client.make_request(ReqBuilder.presigned_url("mymd5"))
assert resp.status == 200
assert "url" in resp.json, resp.json
assert "fields" in resp.json, resp.json
assert all(field in resp.json["fields"] for field in expected_url_fields), resp.json
key = resp.json["fields"]["key"]
key_parts = key.split(S3_KEY_PARTS_SEPARATOR)
assert len(key_parts) == 2, key_parts
assert key_parts[0] == rci.user_id


@pytest.mark.asyncio
async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci):
# TODO test


@pytest.mark.asyncio
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dl_core_testing.connection import make_conn_key
from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder
from dl_file_uploader_lib import exc
from dl_testing.s3_utils import s3_file_exists
from dl_s3.utils import s3_file_exists

from dl_connector_bundle_chs3.chs3_gsheets.core.constants import CONNECTION_TYPE_GSHEETS_V2
from dl_connector_bundle_chs3.chs3_gsheets.core.lifecycle import GSheetsFileS3ConnectionLifecycleManager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dl_core_testing.connection import make_conn_key
from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder
from dl_file_uploader_lib import exc
from dl_testing.s3_utils import s3_file_exists
from dl_s3.utils import s3_file_exists

from dl_connector_bundle_chs3.chs3_yadocs.core.constants import CONNECTION_TYPE_YADOCS
from dl_connector_bundle_chs3.chs3_yadocs.core.lifecycle import YaDocsFileS3ConnectionLifecycleManager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from dl_file_uploader_task_interface.tasks import DownloadYaDocsTask
from dl_task_processor.state import wait_task
from dl_testing.s3_utils import s3_file_exists
from dl_s3.utils import s3_file_exists


@pytest.mark.asyncio
Expand Down
21 changes: 21 additions & 0 deletions lib/dl_s3/dl_s3/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import botocore.exceptions

if TYPE_CHECKING:
from types_aiobotocore_s3 import S3Client as AsyncS3Client


async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool:
try:
s3_resp = await s3_client.head_object(
Bucket=bucket,
Key=key,
)
except botocore.exceptions.ClientError as ex:
if ex.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
return False
raise
return s3_resp["ResponseMetadata"]["HTTPStatusCode"] == 200
13 changes: 0 additions & 13 deletions lib/dl_testing/dl_testing/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,6 @@ async def get_lc_rules_number(s3_client: AsyncS3Client, bucket: str) -> int:
return len(lc_config["Rules"])


async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool:
try:
s3_resp = await s3_client.head_object(
Bucket=bucket,
Key=key,
)
except botocore.exceptions.ClientError as ex:
if ex.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
return False
raise
return s3_resp["ResponseMetadata"]["HTTPStatusCode"] == 200


S3_TBL_FUNC_TEMPLATE = """s3(
'{s3_endpoint}/{bucket}/{filename}',
'{key_id}',
Expand Down

0 comments on commit c195125

Please sign in to comment.