Skip to content

Commit

Permalink
Newly ingested documents are now assigned a UUID-based URI
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksonj04 committed Dec 18, 2024
1 parent cbb7731 commit ffe92c8
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 29 deletions.
15 changes: 2 additions & 13 deletions ds-caselaw-ingester/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from notifications_python_client.notifications import NotificationsAPIClient
import logging
from caselawclient.models.documents import Document
from uuid import uuid4

logger = logging.getLogger("ingester")
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -238,18 +239,6 @@ def extract_metadata(tar: tarfile.TarFile, consignment_reference: str):
return decoder.decode(te_metadata_file.read().decode("utf-8"))


def extract_uri(metadata: dict, consignment_reference: str) -> str:
uri = metadata["parameters"]["PARSER"].get("uri", "")

if uri:
uri = uri.replace("https://caselaw.nationalarchives.gov.uk/id/", "")

if not uri:
uri = f"failures/{consignment_reference}"

return uri


# called by tests
def get_consignment_reference(message):
return Message.from_message(message).get_consignment_reference()
Expand Down Expand Up @@ -409,7 +398,7 @@ def __init__(self, message: Message):
self.message.update_consignment_reference(self.metadata["parameters"]["TRE"]["reference"])
self.consignment_reference = self.message.get_consignment_reference()
self.xml_file_name = self.metadata["parameters"]["TRE"]["payload"]["xml"]
self.uri = DocumentURIString(extract_uri(self.metadata, self.consignment_reference))
self.uri = DocumentURIString("d-" + str(uuid4()))
print(f"Ingesting document {self.uri}")
self.xml = get_best_xml(self.uri, self.tar, self.xml_file_name, self.consignment_reference)

Expand Down
16 changes: 0 additions & 16 deletions ds-caselaw-ingester/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,22 +280,6 @@ def test_extract_metadata_not_found_tdr(self):
with pytest.raises(lambda_function.FileNotFoundException, match="Consignment Ref:"):
lambda_function.extract_metadata(tar, consignment_reference)

def test_extract_uri_success(self):
metadata = {"parameters": {"PARSER": {"uri": "https://caselaw.nationalarchives.gov.uk/id/ewca/civ/2022/111"}}}
assert lambda_function.extract_uri(metadata, "anything") == "ewca/civ/2022/111"

def test_extract_uri_incompete(self):
metadata = {"parameters": {"PARSER": {"uri": "https://caselaw.nationalarchives.gov.uk/id/"}}}
assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"

def test_extract_uri_missing_key(self):
metadata = {"parameters": {"PARSER": {}}}
assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"

def test_extract_uri_none(self):
metadata = {"parameters": {"PARSER": {"uri": None}}}
assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"

def test_extract_docx_filename_success(self):
metadata = {"parameters": {"TRE": {"payload": {"filename": "judgment.docx"}}}}
assert lambda_function.extract_docx_filename(metadata, "anything") == "judgment.docx"
Expand Down

0 comments on commit ffe92c8

Please sign in to comment.