From f72f7a4bd82dc3ca197683c65ce36bd123a66260 Mon Sep 17 00:00:00 2001 From: David McKee Date: Fri, 1 Sep 2023 17:38:51 +0100 Subject: [PATCH 1/3] Add non-source directories to git ignore Volume is used by docker and tmp by the code itself --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 66c487f..fccedca 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ dist .DS_Store .aws-sam */__pycache__ + +tmp +volume From 9192e257906e2258039121aadf9f0e55bc46d40a Mon Sep 17 00:00:00 2001 From: David McKee Date: Mon, 4 Sep 2023 12:04:00 +0100 Subject: [PATCH 2/3] S3 upload tests should not depend on identity of bucket --- ds-caselaw-ingester/tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ds-caselaw-ingester/tests.py b/ds-caselaw-ingester/tests.py index 189b604..65074c1 100644 --- a/ds-caselaw-ingester/tests.py +++ b/ds-caselaw-ingester/tests.py @@ -269,7 +269,7 @@ def test_store_file_success(self, mock_print): session.upload_fileobj = MagicMock() lambda_function.store_file(None, "folder", "filename.ext", session) mock_print.assert_called_with("Upload Successful folder/filename.ext") - session.upload_fileobj.assert_called_with(None, None, "folder/filename.ext") + session.upload_fileobj.assert_called_with(None, ANY, "folder/filename.ext") @patch("builtins.print") def test_store_file_file_not_found(self, mock_print): @@ -277,7 +277,7 @@ def test_store_file_file_not_found(self, mock_print): session.upload_fileobj = MagicMock(side_effect=FileNotFoundError) lambda_function.store_file(None, "folder", "filename.ext", session) mock_print.assert_called_with("The file folder/filename.ext was not found") - session.upload_fileobj.assert_called_with(None, None, "folder/filename.ext") + session.upload_fileobj.assert_called_with(None, ANY, "folder/filename.ext") @patch("builtins.print") def test_store_file_file_no_credentials(self, mock_print): @@ -285,7 +285,7 @@ def test_store_file_file_no_credentials(self, mock_print): session.upload_fileobj = MagicMock(side_effect=NoCredentialsError) lambda_function.store_file(None, "folder", "filename.ext", session) mock_print.assert_called_with("Credentials not available") - session.upload_fileobj.assert_called_with(None, None, "folder/filename.ext") + session.upload_fileobj.assert_called_with(None, ANY, "folder/filename.ext") @patch.dict( os.environ, From 835c18ff671224d7233bc38007dc98a6e255aa00 Mon Sep 17 00:00:00 2001 From: David McKee Date: Mon, 4 Sep 2023 12:05:37 +0100 Subject: [PATCH 3/3] Call MarklogicApiClient explicitly to set user agent --- ds-caselaw-ingester/lambda_function.py | 15 ++++++++++++++- ds-caselaw-ingester/tests.py | 25 +++++++++++++++++-------- requirements/base.txt | 3 ++- scripts/test | 2 +- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/ds-caselaw-ingester/lambda_function.py b/ds-caselaw-ingester/lambda_function.py index c203979..0d5ab49 100644 --- a/ds-caselaw-ingester/lambda_function.py +++ b/ds-caselaw-ingester/lambda_function.py @@ -13,14 +13,27 @@ from boto3.session import Session from botocore.exceptions import NoCredentialsError from caselawclient.Client import ( + DEFAULT_USER_AGENT, + MarklogicApiClient, MarklogicCommunicationError, MarklogicResourceNotFoundError, - api_client, ) +from dotenv import load_dotenv from notifications_python_client.notifications import NotificationsAPIClient +load_dotenv() + + rollbar.init(os.getenv("ROLLBAR_TOKEN"), environment=os.getenv("ROLLBAR_ENV")) +api_client = MarklogicApiClient( + host=os.getenv("MARKLOGIC_HOST", default=None), + username=os.getenv("MARKLOGIC_USER", default=None), + password=os.getenv("MARKLOGIC_PASSWORD", default=None), + use_https=os.getenv("MARKLOGIC_USE_HTTPS", default=False), + user_agent=f"ds-caselaw-ingester/unknown {DEFAULT_USER_AGENT}", +) + class Message(object): @classmethod diff --git a/ds-caselaw-ingester/tests.py b/ds-caselaw-ingester/tests.py index 65074c1..a0161bb 100644 --- a/ds-caselaw-ingester/tests.py +++ b/ds-caselaw-ingester/tests.py @@ -13,7 +13,6 @@ from caselawclient.Client import ( MarklogicCommunicationError, MarklogicResourceNotFoundError, - api_client, ) from notifications_python_client.notifications import NotificationsAPIClient @@ -239,7 +238,8 @@ def test_extract_docx_filename_failure(self): with pytest.raises(lambda_function.DocxFilenameNotFoundException): lambda_function.extract_docx_filename(metadata, "anything") - def test_store_metadata(self): + @patch("lambda_function.api_client") + def test_store_metadata(self, api_client): metadata = { "parameters": { "TDR": { @@ -580,14 +580,16 @@ def test_malformed_message(self): with pytest.raises(lambda_function.InvalidMessageException): lambda_function.get_consignment_reference(message) - def test_update_judgment_xml_success(self): + @patch("lambda_function.api_client") + def test_update_judgment_xml_success(self, api_client): xml = ET.XML("Here's some xml") api_client.get_judgment_xml = MagicMock(return_value=True) api_client.save_judgment_xml = MagicMock(return_value=True) result = lambda_function.update_judgment_xml("a/fake/uri", xml) assert result is True - def test_update_judgment_xml_judgment_does_not_exist(self): + @patch("lambda_function.api_client") + def test_update_judgment_xml_judgment_does_not_exist(self, api_client): xml = ET.XML("Here's some xml") api_client.get_judgment_xml = MagicMock( side_effect=MarklogicResourceNotFoundError("error") @@ -596,7 +598,8 @@ def test_update_judgment_xml_judgment_does_not_exist(self): result = lambda_function.update_judgment_xml("a/fake/uri", xml) assert result is False - def test_update_judgment_xml_judgment_does_not_save(self): + @patch("lambda_function.api_client") + def test_update_judgment_xml_judgment_does_not_save(self, api_client): xml = ET.XML("Here's some xml") api_client.get_judgment_xml = MagicMock(return_value=True) api_client.save_judgment_xml = MagicMock( @@ -605,13 +608,15 @@ def test_update_judgment_xml_judgment_does_not_save(self): result = lambda_function.update_judgment_xml("a/fake/uri", xml) assert result is False - def test_insert_document_xml_success(self): + @patch("lambda_function.api_client") + def test_insert_document_xml_success(self, api_client): xml = ET.XML("Here's some xml") api_client.insert_document_xml = MagicMock(return_value=True) result = lambda_function.insert_document_xml("a/fake/uri", xml) assert result is True - def test_insert_document_xml_failure(self): + @patch("lambda_function.api_client") + def test_insert_document_xml_failure(self, api_client): xml = ET.XML("Here's some xml") api_client.insert_document_xml = MagicMock( side_effect=MarklogicCommunicationError("error") @@ -695,8 +700,12 @@ def test_get_best_xml_with_no_xml_file(self): assert result.__class__ == ET.Element assert result.tag == "error" - def test_unpublish_updated_judgment(self): + @patch("lambda_function.api_client") + def test_unpublish_updated_judgment(self, api_client): uri = "a/fake/uri" api_client.set_published = MagicMock() lambda_function.unpublish_updated_judgment(uri) api_client.set_published.assert_called_with(uri, False) + + def test_user_agent(self): + assert "ingester" in lambda_function.api_client.session.headers["User-Agent"] diff --git a/requirements/base.txt b/requirements/base.txt index 5c3a0ef..6ab46fd 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,5 +1,5 @@ django-environ~=0.10 -ds-caselaw-marklogic-api-client==14.0.2 +ds-caselaw-marklogic-api-client==14.1.0 requests-toolbelt~=1.0 urllib3~=1.26 boto3 @@ -8,3 +8,4 @@ notifications-python-client~=8.0 mypy-boto3-s3 mypy-boto3-sns +python-dotenv diff --git a/scripts/test b/scripts/test index 3d03518..d2815f8 100755 --- a/scripts/test +++ b/scripts/test @@ -1 +1 @@ -MARKLOGIC_HOST= MARKLOGIC_USER= MARKLOGIC_PASSWORD= python -m pytest ds-caselaw-ingester/tests.py $* +python -m pytest ds-caselaw-ingester/tests.py $*