Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AYR-1386 - File download timeout #712

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 13 additions & 33 deletions app/main/routes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import io
import uuid

import boto3
Expand All @@ -8,7 +7,6 @@
redirect,
render_template,
request,
send_file,
session,
url_for,
)
Expand Down Expand Up @@ -51,7 +49,6 @@
generate_pdf_manifest,
get_download_filename,
get_file_details,
get_file_mimetype,
)
from app.main.util.search_utils import (
build_search_results_summary_query,
Expand Down Expand Up @@ -716,12 +713,12 @@ def record(record_id: uuid.UUID):
)


@bp.route("/download/<uuid:record_id>")
@bp.route("/download/<uuid:record_id>", methods=["GET"])
@access_token_sign_in_required
@log_page_view
def download_record(record_id: uuid.UUID):
s3 = boto3.client("s3")
file = db.session.get(File, record_id)
render = request.args.get("render", False)
ayr_user = AYRUser(session.get("user_groups"))
can_download_records = ayr_user.can_download_records

Expand All @@ -733,48 +730,31 @@ def download_record(record_id: uuid.UUID):

validate_body_user_groups_or_404(file.consignment.series.body.Name)

s3 = boto3.client("s3")
bucket = current_app.config["RECORD_BUCKET_NAME"]
key = f"{file.consignment.ConsignmentReference}/{file.FileId}"

try:
s3_file_object = s3.get_object(Bucket=bucket, Key=key)
except Exception as e:
current_app.app_logger.error(f"Failed to get object from S3: {e}")
abort(404)

download_filename = file.FileName

if file.CiteableReference:
if len(file.FileName.rsplit(".", 1)) > 1:
download_filename = (
file.CiteableReference + "." + file.FileName.rsplit(".", 1)[1]
)

try:
file_content = s3_file_object["Body"].read()
file_type = download_filename.split(".")[-1].lower()
presigned_url = s3.generate_presigned_url(
"get_object",
Params={
"Bucket": bucket,
"Key": key,
"ResponseContentDisposition": f"attachment; filename={download_filename}",
},
ExpiresIn=3600,
)
except Exception as e:
current_app.app_logger.error(f"Error reading S3 file content: {e}")
current_app.app_logger.error(f"Failed to generate presigned URL: {e}")
abort(500)

content_type = s3_file_object.get("ContentType", "application/octet-stream")

if render:
return send_file(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition block was already redundant anyway wasn't in, since we moved render to use presigned urls directly.

io.BytesIO(file_content),
mimetype=get_file_mimetype(file_type),
as_attachment=False,
download_name=download_filename,
)
else:
response = send_file(
io.BytesIO(file_content),
mimetype=content_type,
as_attachment=True,
download_name=download_filename,
)
return response
return redirect(presigned_url)


@bp.route("/signed-out", methods=["GET"])
Expand Down
122 changes: 15 additions & 107 deletions app/tests/test_download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch
from unittest.mock import patch

import boto3
from flask.testing import FlaskClient
Expand Down Expand Up @@ -38,17 +38,10 @@ def test_invalid_id_raises_404(self, client: FlaskClient):
assert response.status_code == 404

@mock_aws
def test_download_record_standard_user_with_citable_reference_with_file_extension(
def test_download_existing_file_with_presigned_url(
Copy link
Contributor

@anthonyhashemi anthonyhashemi Jan 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we still need the test cases for citable_reference_with_file_extension and citable_reference_without_file_extension unless we move the logic for the downloaded_filename into a helper function and test that

self, app, client, mock_standard_user
):
"""
Given a File in the database with corresponding file in the s3 bucket
When a standard user with access to the file's transferring body makes a
request to download record
Then the response status code should be 200
And the file should contain the expected content
And the downloaded filename should be the File's CiteableReference with extension
"""
""" """
bucket_name = "test_bucket"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function name for this pretty clear, I still think that having a docstring here would be useful.

file = FileFactory(FileType="file", FileName="testfile.doc")

Expand All @@ -60,107 +53,35 @@ def test_download_record_standard_user_with_citable_reference_with_file_extensio
)
response = client.get(f"{self.route_url}/{file.FileId}")

assert response.status_code == 200

assert response.status_code == 302
assert (
response.headers["Content-Disposition"]
== f"attachment; filename={file.CiteableReference}.doc"
"https://s3.eu-west-2.amazonaws.com/test_bucket"
in response.headers["Location"]
)
assert response.data == b"record"

@mock_aws
def test_download_record_standard_user_with_citable_reference_without_file_extension(
def test_download_non_existing_file_with_presigned_url(
self, app, client, mock_standard_user
):
"""
Given a File in the database with corresponding file in the s3 bucket
When a standard user with access to the file's transferring body makes a
request to download record
Then the response status code should be 200
And the file should contain the expected content
And the downloaded filename should be the filename with extension
"""
""" """
bucket_name = "test_bucket"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here for the docstring.

file = FileFactory(
FileType="file",
)

create_mock_s3_bucket_with_object(bucket_name, file)
app.config["RECORD_BUCKET_NAME"] = bucket_name

mock_standard_user(
client, file.consignment.series.body.Name, can_download=True
)
response = client.get(f"{self.route_url}/{file.FileId}")

assert response.status_code == 200
file = FileFactory(FileType="file", FileName="testfile.doc")

assert (
response.headers["Content-Disposition"]
== f"attachment; filename={file.FileName}"
)
assert response.data == b"record"
s3 = boto3.resource("s3", region_name="us-east-1")
s3.create_bucket(Bucket=bucket_name)

@mock_aws
def test_download_record_standard_user_without_citable_reference(
self, app, client, mock_standard_user
):
"""
Given a File in the database with corresponding file in the s3 bucket
without a CiteableReference
When a standard user with access to the file's transferring body makes a
request to download record
Then the response status code should be 200
And the file should contain the expected content
And the downloaded filename should be fileName with extension
"""
bucket_name = "test_bucket"
file = FileFactory(
FileType="file", FileName="testfile.doc", CiteableReference=None
)
create_mock_s3_bucket_with_object(bucket_name, file)
app.config["RECORD_BUCKET_NAME"] = bucket_name

mock_standard_user(
client, file.consignment.series.body.Name, can_download=True
)
response = client.get(f"{self.route_url}/{file.FileId}")

assert response.status_code == 200

assert (
response.headers["Content-Disposition"]
== f"attachment; filename={file.FileName}"
)
assert response.data == b"record"

@mock_aws
def test_download_record_standard_user_get_file_errors(
self, app, client, mock_standard_user
):
"""
Given a file is requested from the database / S3 which doesn't exist
When a standard user tries to access the file to download
Then the response status code should be 404
"""

bucket_name = "test_bucket"
file = FileFactory(
FileType="file", FileName="testimage.png", CiteableReference=None
)
create_mock_s3_bucket_with_object(bucket_name, file)
app.config["RECORD_BUCKET_NAME"] = bucket_name

mock_standard_user(
client, file.consignment.series.body.Name, can_download=True
)
response = client.get(f"{self.route_url}/invalid_file")

assert response.status_code == 404

@mock_aws
@patch("boto3.client")
def test_download_record_standard_user_read_file_error(
@patch("app.main.routes.boto3.client")
def test_download_with_presigned_url_error(
self, mock_boto3_client, app, client, mock_standard_user, caplog
):
"""
Expand All @@ -177,26 +98,13 @@ def test_download_record_standard_user_read_file_error(
create_mock_s3_bucket_with_object(bucket_name, file)
app.config["RECORD_BUCKET_NAME"] = bucket_name

# Mock the S3 client and its get_object method
mock_s3_client = MagicMock()
mock_s3_client.get_object.return_value = {
"Body": MagicMock(
read=MagicMock(side_effect=Exception("Read error"))
)
}
mock_boto3_client.return_value = mock_s3_client

mock_standard_user(
client, file.consignment.series.body.Name, can_download=True
mock_boto3_client.generate_presigned_url.side_effect = Exception(
"Simulated error"
)

response = client.get(f"{self.route_url}/{file.FileId}")

msg = "Error reading S3 file content: Read error"

assert response.status_code == 500
assert caplog.records[1].levelname == "ERROR"
assert caplog.records[1].message == msg

@mock_aws
def test_raises_404_for_standard_user_without_access_to_files_transferring_body(
Expand Down
Loading