Skip to content

Commit

Permalink
Newly ingested documents are now assigned a UUID-based URI
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksonj04 committed Dec 18, 2024
1 parent cbb7731 commit 3ffbbbc
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 33 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ jobs:
SKIP: no-commit-to-branch
test:
env:
MARKLOGIC_HOST: ""
MARKLOGIC_USER: ""
MARKLOGIC_PASSWORD: ""
MARKLOGIC_HOST: ml-host
MARKLOGIC_USER: ml-user
MARKLOGIC_PASSWORD: ml-password
MARKLOGIC_USE_HTTPS: 0
AWS_BUCKET_NAME: judgments-original-versions
name: Run unit tests
runs-on: ubuntu-24.04
steps:
Expand Down
17 changes: 3 additions & 14 deletions ds-caselaw-ingester/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from notifications_python_client.notifications import NotificationsAPIClient
import logging
from caselawclient.models.documents import Document
from uuid import uuid4

logger = logging.getLogger("ingester")
logger.setLevel(logging.DEBUG)
Expand All @@ -30,7 +31,7 @@

rollbar.init(os.getenv("ROLLBAR_TOKEN"), environment=os.getenv("ROLLBAR_ENV"))

MARKLOGIC_HOST: str = os.environ["AWS_BUCKET_NAME"]
MARKLOGIC_HOST: str = os.environ["MARKLOGIC_HOST"]
MARKLOGIC_USER: str = os.environ["MARKLOGIC_USER"]
MARKLOGIC_PASSWORD: str = os.environ["MARKLOGIC_PASSWORD"]
MARKLOGIC_USE_HTTPS: bool = bool(os.environ["MARKLOGIC_USE_HTTPS"])
Expand Down Expand Up @@ -238,18 +239,6 @@ def extract_metadata(tar: tarfile.TarFile, consignment_reference: str):
return decoder.decode(te_metadata_file.read().decode("utf-8"))


def extract_uri(metadata: dict, consignment_reference: str) -> str:
uri = metadata["parameters"]["PARSER"].get("uri", "")

if uri:
uri = uri.replace("https://caselaw.nationalarchives.gov.uk/id/", "")

if not uri:
uri = f"failures/{consignment_reference}"

return uri


# called by tests
def get_consignment_reference(message):
return Message.from_message(message).get_consignment_reference()
Expand Down Expand Up @@ -409,7 +398,7 @@ def __init__(self, message: Message):
self.message.update_consignment_reference(self.metadata["parameters"]["TRE"]["reference"])
self.consignment_reference = self.message.get_consignment_reference()
self.xml_file_name = self.metadata["parameters"]["TRE"]["payload"]["xml"]
self.uri = DocumentURIString(extract_uri(self.metadata, self.consignment_reference))
self.uri = DocumentURIString("d-" + str(uuid4()))
print(f"Ingesting document {self.uri}")
self.xml = get_best_xml(self.uri, self.tar, self.xml_file_name, self.consignment_reference)

Expand Down
16 changes: 0 additions & 16 deletions ds-caselaw-ingester/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,22 +280,6 @@ def test_extract_metadata_not_found_tdr(self):
with pytest.raises(lambda_function.FileNotFoundException, match="Consignment Ref:"):
lambda_function.extract_metadata(tar, consignment_reference)

def test_extract_uri_success(self):
metadata = {"parameters": {"PARSER": {"uri": "https://caselaw.nationalarchives.gov.uk/id/ewca/civ/2022/111"}}}
assert lambda_function.extract_uri(metadata, "anything") == "ewca/civ/2022/111"

def test_extract_uri_incompete(self):
metadata = {"parameters": {"PARSER": {"uri": "https://caselaw.nationalarchives.gov.uk/id/"}}}
assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"

def test_extract_uri_missing_key(self):
metadata = {"parameters": {"PARSER": {}}}
assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"

def test_extract_uri_none(self):
metadata = {"parameters": {"PARSER": {"uri": None}}}
assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"

def test_extract_docx_filename_success(self):
metadata = {"parameters": {"TRE": {"payload": {"filename": "judgment.docx"}}}}
assert lambda_function.extract_docx_filename(metadata, "anything") == "judgment.docx"
Expand Down

0 comments on commit 3ffbbbc

Please sign in to comment.