Skip to content

Commit

Permalink
Fix points we were leaving a file open for the entire lifetime of the…
Browse files Browse the repository at this point in the history
… object
  • Loading branch information
jacksonj04 committed Dec 19, 2024
1 parent d8e8422 commit 10b0577
Showing 1 changed file with 23 additions and 27 deletions.
50 changes: 23 additions & 27 deletions ds-caselaw-ingester/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,15 +393,14 @@ def __init__(self, message: Message):
print(f"Ingester Start: Consignment reference {self.consignment_reference}")
print(f"Received Message: {self.message.message}")
self.local_tar_filename = self.save_tar_file_in_s3()
with tarfile.open(self.local_tar_filename, mode="r") as tar:
self.tar = tar
self.metadata = extract_metadata(self.tar, self.consignment_reference)
self.message.update_consignment_reference(self.metadata["parameters"]["TRE"]["reference"])
self.consignment_reference = self.message.get_consignment_reference()
self.xml_file_name = self.metadata["parameters"]["TRE"]["payload"]["xml"]
self.uri = DocumentURIString("d-" + str(uuid4()))
with tarfile.open(self.local_tar_filename, mode="r") as tar:
self.metadata = extract_metadata(tar, self.consignment_reference)
self.message.update_consignment_reference(self.metadata["parameters"]["TRE"]["reference"])
self.xml_file_name = self.metadata["parameters"]["TRE"]["payload"]["xml"]
self.xml = get_best_xml(self.uri, tar, self.xml_file_name, self.consignment_reference)
print(f"Ingesting document {self.uri}")
self.xml = get_best_xml(self.uri, self.tar, self.xml_file_name, self.consignment_reference)

def save_tar_file_in_s3(self):
"""This should be mocked out for testing -- get the tar file from S3 and
Expand Down Expand Up @@ -543,18 +542,19 @@ def save_files_to_s3(self) -> None:
# Store docx and rename
# The docx_filename is None for files which have been reparsed.
if docx_filename is not None:
copy_file(
self.tar,
f"{self.consignment_reference}/{docx_filename}",
f'{self.uri.replace("/", "_")}.docx',
self.uri,
s3_client,
)
with tarfile.open(self.local_tar_filename, mode="r") as tar:
copy_file(
tar,
f"{self.consignment_reference}/{docx_filename}",
f'{self.uri.replace("/", "_")}.docx',
self.uri,
s3_client,
)

# Store parser log
with suppress(FileNotFoundException):
with suppress(FileNotFoundException), tarfile.open(self.local_tar_filename, mode="r") as tar:
copy_file(
self.tar,
tar,
f"{self.consignment_reference}/parser.log",
"parser.log",
self.uri,
Expand All @@ -565,13 +565,14 @@ def save_files_to_s3(self) -> None:
image_list = self.metadata["parameters"]["TRE"]["payload"]["images"]
if image_list:
for image_filename in image_list:
copy_file(
self.tar,
f"{self.consignment_reference}/{image_filename}",
image_filename,
self.uri,
s3_client,
)
with tarfile.open(self.local_tar_filename, mode="r") as tar:
copy_file(
tar,
f"{self.consignment_reference}/{image_filename}",
image_filename,
self.uri,
s3_client,
)

@property
def metadata_object(self) -> Metadata:
Expand Down Expand Up @@ -606,9 +607,6 @@ def send_email(self) -> None:

raise RuntimeError(f"Didn't recognise originator {originator!r}")

def close_tar(self) -> None:
self.tar.close()

def upload_xml(self) -> None:
self.updated = self.update_document_xml()
self.inserted = False if self.updated else self.insert_document_xml()
Expand Down Expand Up @@ -650,8 +648,6 @@ def process_message(message):
else:
ingest.unpublish_updated_judgment()

ingest.close_tar()

print("Ingestion complete")
return message.message

Expand Down

0 comments on commit 10b0577

Please sign in to comment.