Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: BI-5952 file-uploader-api handler for processing presigned file from bucket #762

Merged
merged 9 commits into from
Dec 25, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ class MakePresignedUrlRequestSchema(ma.Schema):
content_md5 = ma.fields.String(required=True)


class DownloadPresignedUrlRequestSchema(ma.Schema):
filename = ma.fields.String(required=True)
key = ma.fields.String(required=True)


class PresignedUrlSchema(ma.Schema):
class PresignedUrlFields(ma.Schema):
class Meta:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from aiohttp import web
from aiohttp.multipart import BodyPartReader
from dl_s3.utils import s3_file_exists
from redis.asyncio.lock import Lock as RedisLock

from dl_api_commons.aiohttp.aiohttp_wrappers import (
Expand Down Expand Up @@ -53,6 +54,8 @@

LOGGER = logging.getLogger(__name__)

S3_KEY_PARTS_SEPARATOR = "--" # used to separate author user_id from the rest of the s3 object key to sign it


def get_file_type_from_name(
filename: Optional[str],
Expand Down Expand Up @@ -136,7 +139,10 @@ async def post(self) -> web.StreamResponse:
content_md5: str = req_data["content_md5"]

s3 = self.dl_request.get_s3_service()
s3_key = "{}_{}".format(self.dl_request.rci.user_id or "unknown", str(uuid.uuid4()))
s3_key = S3_KEY_PARTS_SEPARATOR.join((
self.dl_request.rci.user_id or "unknown",
str(uuid.uuid4()),
))

url = await s3.client.generate_presigned_post(
Bucket=s3.tmp_bucket_name,
Expand All @@ -154,6 +160,49 @@ async def post(self) -> web.StreamResponse:
)


class DownloadPresignedUrlView(FileUploaderBaseView):
async def post(self) -> web.StreamResponse:
req_data = await self._load_post_request_schema_data(files_schemas.DownloadPresignedUrlRequestSchema)
filename: str = req_data["filename"]
key: str = req_data["key"]

file_type = get_file_type_from_name(filename=filename, allow_xlsx=self.request.app["ALLOW_XLSX"])

s3 = self.dl_request.get_s3_service()

file_exists = await s3_file_exists(s3.client, s3.tmp_bucket_name, key)
if not file_exists:
raise exc.DocumentNotFound()

user_id_from_key = key.split(S3_KEY_PARTS_SEPARATOR)[0]
if user_id_from_key != self.dl_request.rci.user_id:
exc.PermissionDenied()

rmm = self.dl_request.get_redis_model_manager()
dfile = DataFile(
manager=rmm,
filename=filename,
file_type=file_type,
status=FileProcessingStatus.in_progress,
)
LOGGER.info(f"Data file id: {dfile.id}")

await dfile.save()

task_processor = self.dl_request.get_task_processor()
if file_type == FileType.xlsx:
await task_processor.schedule(ProcessExcelTask(file_id=dfile.id))
LOGGER.info(f"Scheduled ProcessExcelTask for file_id {dfile.id}")
else:
await task_processor.schedule(ParseFileTask(file_id=dfile.id))
LOGGER.info(f"Scheduled ParseFileTask for file_id {dfile.id}")

return web.json_response(
files_schemas.FileUploadResponseSchema().dump({"file_id": dfile.id, "title": dfile.filename}),
status=HTTPStatus.CREATED,
)


class LinksView(FileUploaderBaseView):
REQUIRED_RESOURCES: ClassVar[frozenset[RequiredResource]] = frozenset() # Don't skip CSRF check

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import uuid

import attr
from dl_file_uploader_api_lib.views.files import S3_KEY_PARTS_SEPARATOR
import pytest

from dl_api_commons.base_models import RequestContextInfo
Expand All @@ -28,14 +29,23 @@ async def test_file_upload_cors(fu_client, s3_tmp_bucket, upload_file_req):


@pytest.mark.asyncio
async def test_make_presigned_url(fu_client, s3_tmp_bucket):
async def test_make_presigned_url(fu_client, s3_tmp_bucket, rci):
expected_url_fields = ("key", "x-amz-algorithm", "x-amz-credential", "x-amz-date", "policy", "x-amz-signature")

resp = await fu_client.make_request(ReqBuilder.presigned_url("mymd5"))
assert resp.status == 200
assert "url" in resp.json, resp.json
assert "fields" in resp.json, resp.json
assert all(field in resp.json["fields"] for field in expected_url_fields), resp.json
key = resp.json["fields"]["key"]
key_parts = key.split(S3_KEY_PARTS_SEPARATOR)
assert len(key_parts) == 2, key_parts
assert key_parts[0] == rci.user_id


@pytest.mark.asyncio
async def test_download_presigned_url(fu_client, s3_tmp_bucket, rci):
# TODO test


@pytest.mark.asyncio
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dl_core_testing.connection import make_conn_key
from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder
from dl_file_uploader_lib import exc
from dl_testing.s3_utils import s3_file_exists
from dl_s3.utils import s3_file_exists

from dl_connector_bundle_chs3.chs3_gsheets.core.constants import CONNECTION_TYPE_GSHEETS_V2
from dl_connector_bundle_chs3.chs3_gsheets.core.lifecycle import GSheetsFileS3ConnectionLifecycleManager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dl_core_testing.connection import make_conn_key
from dl_file_uploader_api_lib_tests.req_builder import ReqBuilder
from dl_file_uploader_lib import exc
from dl_testing.s3_utils import s3_file_exists
from dl_s3.utils import s3_file_exists

from dl_connector_bundle_chs3.chs3_yadocs.core.constants import CONNECTION_TYPE_YADOCS
from dl_connector_bundle_chs3.chs3_yadocs.core.lifecycle import YaDocsFileS3ConnectionLifecycleManager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from dl_file_uploader_task_interface.tasks import DownloadYaDocsTask
from dl_task_processor.state import wait_task
from dl_testing.s3_utils import s3_file_exists
from dl_s3.utils import s3_file_exists


@pytest.mark.asyncio
Expand Down
21 changes: 21 additions & 0 deletions lib/dl_s3/dl_s3/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import botocore.exceptions

if TYPE_CHECKING:
from types_aiobotocore_s3 import S3Client as AsyncS3Client


async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool:
KonstantAnxiety marked this conversation as resolved.
Show resolved Hide resolved
try:
s3_resp = await s3_client.head_object(
Bucket=bucket,
Key=key,
)
except botocore.exceptions.ClientError as ex:
if ex.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
return False
raise
return s3_resp["ResponseMetadata"]["HTTPStatusCode"] == 200
13 changes: 0 additions & 13 deletions lib/dl_testing/dl_testing/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,6 @@ async def get_lc_rules_number(s3_client: AsyncS3Client, bucket: str) -> int:
return len(lc_config["Rules"])


async def s3_file_exists(s3_client: AsyncS3Client, bucket: str, key: str) -> bool:
try:
s3_resp = await s3_client.head_object(
Bucket=bucket,
Key=key,
)
except botocore.exceptions.ClientError as ex:
if ex.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
return False
raise
return s3_resp["ResponseMetadata"]["HTTPStatusCode"] == 200


S3_TBL_FUNC_TEMPLATE = """s3(
'{s3_endpoint}/{bucket}/{filename}',
'{key_id}',
Expand Down
Loading