Skip to content

Commit

Permalink
Merge pull request #1382 from nationalarchives/feature/enrichment-bac…
Browse files Browse the repository at this point in the history
…klog-is-parser-version-aware

Make auto re-enrich aware of parser versions
  • Loading branch information
jacksonj04 authored Jan 31, 2024
2 parents 162fa0f + 6e6991e commit 4abe3ef
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<div class="standard-text-template">
<h1>Documents awaiting enrichment</h1>
<p>
These documents have not yet been enriched with the latest version of the enrichment engine, version <b>{{ target_enrichment_version }}</b>, and have not recently had an enrichment attempt.
These documents have been parsed with the latest version of the parser ({{ target_parser_version }}) but have not yet been enriched with the latest version of the enrichment engine, version <b>{{ target_enrichment_version }}</b>, and have not recently had an enrichment attempt.
</p>
<p>
There are <b>{{ documents|length|intcomma }}</b> documents waiting.
Expand Down
13 changes: 10 additions & 3 deletions judgments/management/commands/enrich_next_in_reenrichment_queue.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.core.management.base import BaseCommand

from judgments.utils import api_client
from judgments.views.reports import get_rows_from_result

NUMBER_TO_ENRICH = 1

Expand All @@ -9,11 +10,17 @@ class Command(BaseCommand):
help = "Sends the next document in the re-enrichment queue to be enriched"

def handle(self, *args, **options):
document_details_to_enrich = api_client.get_pending_enrichment_for_version(
api_client.get_highest_enrichment_version(),
target_enrichment_version = api_client.get_highest_enrichment_version()
target_parser_version = api_client.get_highest_parser_version()

document_details_to_enrich = get_rows_from_result(
api_client.get_pending_enrichment_for_version(
target_enrichment_version=target_enrichment_version,
target_parser_version=target_parser_version,
),
)

for document_details in document_details_to_enrich[1 : NUMBER_TO_ENRICH + 1]:
for document_details in document_details_to_enrich[:NUMBER_TO_ENRICH]:
document_uri = document_details[0]

document = api_client.get_document_by_uri(document_uri.replace(".xml", ""))
Expand Down
57 changes: 57 additions & 0 deletions judgments/tests/test_commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from unittest.mock import call, patch

from django.core.management import call_command
from django.test import TestCase
from factories import DocumentFactory


class CommandsTestCase(TestCase):
@patch("judgments.management.commands.enrich_next_in_reenrichment_queue.api_client")
@patch(
"judgments.management.commands.enrich_next_in_reenrichment_queue.NUMBER_TO_ENRICH",
2,
)
def test_enrich_next_in_reenrichment_queue(self, mock_api_client):
mock_api_client.get_pending_enrichment_for_version.return_value = [
["uri", "enrich_version_string", "minutes_since_enrichment_request"],
["/test/123.xml", "1.2.3", 45],
["/test/456.xml", None, None],
]

document_1 = DocumentFactory.build()
document_2 = DocumentFactory.build()

mock_api_client.get_document_by_uri.side_effect = [document_1, document_2]

call_command("enrich_next_in_reenrichment_queue")

mock_api_client.get_document_by_uri.assert_has_calls(
[call("/test/123"), call("/test/456")],
)

document_1.enrich.assert_called_once()
document_2.enrich.assert_called_once()

@patch("judgments.management.commands.enrich_next_in_reenrichment_queue.api_client")
@patch(
"judgments.management.commands.enrich_next_in_reenrichment_queue.NUMBER_TO_ENRICH",
1,
)
def test_enrich_next_in_reenrichment_queue_with_limit(self, mock_api_client):
mock_api_client.get_pending_enrichment_for_version.return_value = [
["uri", "enrich_version_string", "minutes_since_enrichment_request"],
["/test/123.xml", "1.2.3", 45],
["/test/456.xml", None, None],
]

document_1 = DocumentFactory.build()
document_2 = DocumentFactory.build()

mock_api_client.get_document_by_uri.side_effect = [document_1, document_2]

call_command("enrich_next_in_reenrichment_queue")

mock_api_client.get_document_by_uri.assert_has_calls([call("/test/123")])

document_1.enrich.assert_called_once()
document_2.enrich.assert_not_called()
9 changes: 9 additions & 0 deletions judgments/tests/test_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from django.test import TestCase
from django.urls import reverse

from judgments.views.reports import get_rows_from_result


class TestReports(TestCase):
def test_index_view(self):
Expand Down Expand Up @@ -58,3 +60,10 @@ def test_awaiting_parse_view(self, mock_api_client):
assert "parser_version_string" not in decoded_response

assert response.status_code == 200

def test_get_rows_from_result(self):
assert get_rows_from_result(["header 1", "header 2"]) == []

assert get_rows_from_result(
[["header 1", "header 2"], ["value 1", "value 2"]],
) == [["value 1", "value 2"]]
37 changes: 31 additions & 6 deletions judgments/views/reports.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

from django.views.generic import TemplateView

from judgments.utils import api_client
Expand All @@ -14,6 +16,18 @@ def get_context_data(self, **kwargs):
return context


def get_rows_from_result(result: list | list[list[Any]]) -> list[list[Any]]:
"""
If there are results, MarkLogic returns a list of lists where the first row is column names. If there are no
results, it returns a single list of column names.
:return: A list of results, which may be empty.
"""
if isinstance(result[0], list):
return result[1:]
return []


class AwaitingParse(TemplateView):
template_name = "reports/awaiting_parse.html"

Expand All @@ -26,9 +40,12 @@ def get_context_data(self, **kwargs):
context["target_parser_version"] = (
f"{target_parser_version[0]}.{target_parser_version[1]}"
)
context["documents"] = api_client.get_pending_parse_for_version(
target_parser_version,
)[1:]

context["documents"] = get_rows_from_result(
api_client.get_pending_parse_for_version(
target_parser_version,
),
)

return context

Expand All @@ -40,13 +57,21 @@ def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)

target_enrichment_version = api_client.get_highest_enrichment_version()
target_parser_version = api_client.get_highest_parser_version()

context["page_title"] = "Documents awaiting enrichment"
context["target_enrichment_version"] = (
f"{target_enrichment_version[0]}.{target_enrichment_version[1]}"
)
context["documents"] = api_client.get_pending_enrichment_for_version(
target_enrichment_version,
)[1:]
context["target_parser_version"] = (
f"{target_parser_version[0]}.{target_parser_version[1]}"
)

context["documents"] = get_rows_from_result(
api_client.get_pending_enrichment_for_version(
target_enrichment_version=target_enrichment_version,
target_parser_version=target_parser_version,
),
)

return context
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ xmltodict~=0.13.0
requests-toolbelt~=1.0.0
lxml~=5.1.0
wsgi-basic-auth~=1.1.0
ds-caselaw-marklogic-api-client==20.0.0
ds-caselaw-marklogic-api-client==21.0.0
ds-caselaw-utils~=1.3.3
rollbar
django-stronghold==0.4.0
Expand Down

0 comments on commit 4abe3ef

Please sign in to comment.