Skip to content

Commit

Permalink
Add NCN to identifiers on ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
dragon-dxw committed Dec 12, 2024
1 parent 8b982c1 commit 760cf0e
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 1 deletion.
27 changes: 26 additions & 1 deletion ds-caselaw-ingester/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import os
import tarfile
import xml.etree.ElementTree as ET
from typing import Dict, List, Tuple
from urllib.parse import unquote_plus
from xml.sax.saxutils import escape
from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
from caselawclient.models.documents import DocumentURIString

import boto3
import rollbar
Expand All @@ -18,6 +19,11 @@
from caselawclient.client_helpers import VersionAnnotation, VersionType
from dotenv import load_dotenv
from notifications_python_client.notifications import NotificationsAPIClient
import logging
from caselawclient.models.documents import Document

logger = logging.getLogger("ingester")
logger.setLevel(logging.DEBUG)

load_dotenv()
rollbar.init(os.getenv("ROLLBAR_TOKEN"), environment=os.getenv("ROLLBAR_ENV"))
Expand Down Expand Up @@ -437,6 +443,24 @@ def insert_document_xml(self) -> bool:
api_client.insert_document_xml(self.uri, self.xml, annotation)
return True

def set_document_identifiers(self) -> None:
doc = api_client.get_document_by_uri(DocumentURIString(self.uri))
if doc.identifiers:
msg = f"Ingesting, but identifiers already present for {self.uri}!"
logger.warning(msg)

try:
ncn = doc.neutral_citation
except AttributeError:
ncn = None

if ncn:
doc.identifiers.add(NeutralCitationNumber(ncn))
doc.identifiers.save(doc)
logger.info(f"Ingested document had NCN {ncn}")
else:
logger.info(f"Ingested document had NCN (NOT FOUND)")

def send_updated_judgment_notification(self) -> None:
personalisation = personalise_email(self.uri, self.metadata)
if os.getenv("ROLLBAR_ENV") != "prod":
Expand Down Expand Up @@ -597,6 +621,7 @@ def upload_xml(self) -> None:
raise DocumentInsertionError(
f"Judgment {self.uri} failed to insert into Marklogic. Consignment Ref: {self.consignment_reference}"
)
self.set_document_identifiers()

@property
def upload_state(self) -> str:
Expand Down
9 changes: 9 additions & 0 deletions ds-caselaw-ingester/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ def test_handler_messages_v2(
capsys,
):
boto_session.return_value.client.return_value.download_file = create_fake_tdr_file
doc = apiclient.get_document_by_uri.return_value
doc.neutral_citation = None

message = v2_message_raw
event = {"Records": [{"Sns": {"Message": message}}, {"Sns": {"Message": message}}]}
Expand All @@ -161,6 +163,8 @@ def test_handler_messages_v2(
payload=ANY,
)
assert annotation.call_count == 2
doc.identifiers.add.assert_not_called()
doc.identifiers.save.assert_not_called()

@patch("lambda_function.api_client", autospec=True)
@patch("lambda_function.boto3.session.Session")
Expand All @@ -180,6 +184,8 @@ def test_handler_messages_s3(
):
"""Test that, with appropriate stubs, an S3 message passes through the parsing process"""
boto_session.return_value.client.return_value.download_file = create_fake_bulk_file
doc = apiclient.get_document_by_uri.return_value
doc.neutral_citation = "[2012] UKUT 82 (IAC)"

message = s3_message_raw
event = {"Records": [{"Sns": {"Message": message}}, {"Sns": {"Message": message}}]}
Expand All @@ -200,13 +206,16 @@ def test_handler_messages_s3(
notify_new.assert_not_called()
notify_updated.assert_not_called()
modify_filename.assert_not_called()

annotation.assert_called_with(
ANY,
automated=True,
message="Updated document uploaded by Find Case Law",
payload=ANY,
)
assert annotation.call_count == 2
assert doc.identifiers.add.call_args_list[0].args[0].value == "[2012] UKUT 82 (IAC)"
doc.identifiers.save.assert_called()


class TestLambda:
Expand Down

0 comments on commit 760cf0e

Please sign in to comment.