From 6e6991e81d0c1d0b67b9a55e5cd0a5ddb167c1cb Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Tue, 30 Jan 2024 10:46:03 +0000 Subject: [PATCH] Make auto re-enrich aware of parser versions This also introduces a fix for the case where MarkLogic returns zero results, in which case instead of returning an array of arrays (with the first being a header row) it just returns a single array and breaks our expectations. The new `get_rows_from_result` method in `judgments.views.reports` will handle this case and will always return the expected list of results, even if that list is empty. --- .../reports/awaiting_enrichment.html | 2 +- .../enrich_next_in_reenrichment_queue.py | 13 ++++- judgments/tests/test_commands.py | 57 +++++++++++++++++++ judgments/tests/test_reports.py | 9 +++ judgments/views/reports.py | 37 ++++++++++-- requirements/base.txt | 2 +- 6 files changed, 109 insertions(+), 11 deletions(-) create mode 100644 judgments/tests/test_commands.py diff --git a/ds_caselaw_editor_ui/templates/reports/awaiting_enrichment.html b/ds_caselaw_editor_ui/templates/reports/awaiting_enrichment.html index e943318bc..57fd848c6 100644 --- a/ds_caselaw_editor_ui/templates/reports/awaiting_enrichment.html +++ b/ds_caselaw_editor_ui/templates/reports/awaiting_enrichment.html @@ -4,7 +4,7 @@

Documents awaiting enrichment

- These documents have not yet been enriched with the latest version of the enrichment engine, version {{ target_enrichment_version }}, and have not recently had an enrichment attempt. + These documents have been parsed with the latest version of the parser ({{ target_parser_version }}) but have not yet been enriched with the latest version of the enrichment engine, version {{ target_enrichment_version }}, and have not recently had an enrichment attempt.

There are {{ documents|length|intcomma }} documents waiting. diff --git a/judgments/management/commands/enrich_next_in_reenrichment_queue.py b/judgments/management/commands/enrich_next_in_reenrichment_queue.py index f07397615..39cb4f186 100644 --- a/judgments/management/commands/enrich_next_in_reenrichment_queue.py +++ b/judgments/management/commands/enrich_next_in_reenrichment_queue.py @@ -1,6 +1,7 @@ from django.core.management.base import BaseCommand from judgments.utils import api_client +from judgments.views.reports import get_rows_from_result NUMBER_TO_ENRICH = 1 @@ -9,11 +10,17 @@ class Command(BaseCommand): help = "Sends the next document in the re-enrichment queue to be enriched" def handle(self, *args, **options): - document_details_to_enrich = api_client.get_pending_enrichment_for_version( - api_client.get_highest_enrichment_version(), + target_enrichment_version = api_client.get_highest_enrichment_version() + target_parser_version = api_client.get_highest_parser_version() + + document_details_to_enrich = get_rows_from_result( + api_client.get_pending_enrichment_for_version( + target_enrichment_version=target_enrichment_version, + target_parser_version=target_parser_version, + ), ) - for document_details in document_details_to_enrich[1 : NUMBER_TO_ENRICH + 1]: + for document_details in document_details_to_enrich[:NUMBER_TO_ENRICH]: document_uri = document_details[0] document = api_client.get_document_by_uri(document_uri.replace(".xml", "")) diff --git a/judgments/tests/test_commands.py b/judgments/tests/test_commands.py new file mode 100644 index 000000000..8f21049fd --- /dev/null +++ b/judgments/tests/test_commands.py @@ -0,0 +1,57 @@ +from unittest.mock import call, patch + +from django.core.management import call_command +from django.test import TestCase +from factories import DocumentFactory + + +class CommandsTestCase(TestCase): + @patch("judgments.management.commands.enrich_next_in_reenrichment_queue.api_client") + @patch( + "judgments.management.commands.enrich_next_in_reenrichment_queue.NUMBER_TO_ENRICH", + 2, + ) + def test_enrich_next_in_reenrichment_queue(self, mock_api_client): + mock_api_client.get_pending_enrichment_for_version.return_value = [ + ["uri", "enrich_version_string", "minutes_since_enrichment_request"], + ["/test/123.xml", "1.2.3", 45], + ["/test/456.xml", None, None], + ] + + document_1 = DocumentFactory.build() + document_2 = DocumentFactory.build() + + mock_api_client.get_document_by_uri.side_effect = [document_1, document_2] + + call_command("enrich_next_in_reenrichment_queue") + + mock_api_client.get_document_by_uri.assert_has_calls( + [call("/test/123"), call("/test/456")], + ) + + document_1.enrich.assert_called_once() + document_2.enrich.assert_called_once() + + @patch("judgments.management.commands.enrich_next_in_reenrichment_queue.api_client") + @patch( + "judgments.management.commands.enrich_next_in_reenrichment_queue.NUMBER_TO_ENRICH", + 1, + ) + def test_enrich_next_in_reenrichment_queue_with_limit(self, mock_api_client): + mock_api_client.get_pending_enrichment_for_version.return_value = [ + ["uri", "enrich_version_string", "minutes_since_enrichment_request"], + ["/test/123.xml", "1.2.3", 45], + ["/test/456.xml", None, None], + ] + + document_1 = DocumentFactory.build() + document_2 = DocumentFactory.build() + + mock_api_client.get_document_by_uri.side_effect = [document_1, document_2] + + call_command("enrich_next_in_reenrichment_queue") + + mock_api_client.get_document_by_uri.assert_has_calls([call("/test/123")]) + + document_1.enrich.assert_called_once() + document_2.enrich.assert_not_called() diff --git a/judgments/tests/test_reports.py b/judgments/tests/test_reports.py index 4e0310344..5b6885443 100644 --- a/judgments/tests/test_reports.py +++ b/judgments/tests/test_reports.py @@ -4,6 +4,8 @@ from django.test import TestCase from django.urls import reverse +from judgments.views.reports import get_rows_from_result + class TestReports(TestCase): def test_index_view(self): @@ -58,3 +60,10 @@ def test_awaiting_parse_view(self, mock_api_client): assert "parser_version_string" not in decoded_response assert response.status_code == 200 + + def test_get_rows_from_result(self): + assert get_rows_from_result(["header 1", "header 2"]) == [] + + assert get_rows_from_result( + [["header 1", "header 2"], ["value 1", "value 2"]], + ) == [["value 1", "value 2"]] diff --git a/judgments/views/reports.py b/judgments/views/reports.py index efba6bd65..0233e93bf 100644 --- a/judgments/views/reports.py +++ b/judgments/views/reports.py @@ -1,3 +1,5 @@ +from typing import Any + from django.views.generic import TemplateView from judgments.utils import api_client @@ -14,6 +16,18 @@ def get_context_data(self, **kwargs): return context +def get_rows_from_result(result: list | list[list[Any]]) -> list[list[Any]]: + """ + If there are results, MarkLogic returns a list of lists where the first row is column names. If there are no + results, it returns a single list of column names. + + :return: A list of results, which may be empty. + """ + if isinstance(result[0], list): + return result[1:] + return [] + + class AwaitingParse(TemplateView): template_name = "reports/awaiting_parse.html" @@ -26,9 +40,12 @@ def get_context_data(self, **kwargs): context["target_parser_version"] = ( f"{target_parser_version[0]}.{target_parser_version[1]}" ) - context["documents"] = api_client.get_pending_parse_for_version( - target_parser_version, - )[1:] + + context["documents"] = get_rows_from_result( + api_client.get_pending_parse_for_version( + target_parser_version, + ), + ) return context @@ -40,13 +57,21 @@ def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) target_enrichment_version = api_client.get_highest_enrichment_version() + target_parser_version = api_client.get_highest_parser_version() context["page_title"] = "Documents awaiting enrichment" context["target_enrichment_version"] = ( f"{target_enrichment_version[0]}.{target_enrichment_version[1]}" ) - context["documents"] = api_client.get_pending_enrichment_for_version( - target_enrichment_version, - )[1:] + context["target_parser_version"] = ( + f"{target_parser_version[0]}.{target_parser_version[1]}" + ) + + context["documents"] = get_rows_from_result( + api_client.get_pending_enrichment_for_version( + target_enrichment_version=target_enrichment_version, + target_parser_version=target_parser_version, + ), + ) return context diff --git a/requirements/base.txt b/requirements/base.txt index cd11bf28a..94771652f 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -16,7 +16,7 @@ xmltodict~=0.13.0 requests-toolbelt~=1.0.0 lxml~=5.1.0 wsgi-basic-auth~=1.1.0 -ds-caselaw-marklogic-api-client==20.0.0 +ds-caselaw-marklogic-api-client==21.0.0 ds-caselaw-utils~=1.3.3 rollbar django-stronghold==0.4.0