From 48e5e75aabbc19185555ffa5654bcd9816995085 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Wed, 24 Jan 2024 14:15:40 +0000 Subject: [PATCH] Re-enrichment now uses minor version, not just major version --- src/caselawclient/Client.py | 9 +++++---- .../get_pending_enrichment_for_version.xqy | 20 ++++++++++++++----- src/caselawclient/xquery_type_dicts.py | 3 ++- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/caselawclient/Client.py b/src/caselawclient/Client.py index a7a4e4c7..82672c94 100644 --- a/src/caselawclient/Client.py +++ b/src/caselawclient/Client.py @@ -952,7 +952,7 @@ def get_combined_stats_table(self) -> list[list[Any]]: return results - def get_highest_enrichment_version(self) -> int: + def get_highest_enrichment_version(self) -> tuple[int, int]: """This gets the highest enrichment version in the database, so if nothing has been enriched with the most recent version of enrichment, this won't reflect that change.""" @@ -965,14 +965,15 @@ def get_highest_enrichment_version(self) -> int: ) ) - return int(table[1][1]) + return (int(table[1][1]), int(table[1][2])) def get_pending_enrichment_for_version( - self, target_version: int + self, target_version: tuple[int, int] ) -> list[list[Any]]: """Retrieve documents which are not yet enriched with a given version.""" vars: query_dicts.GetPendingEnrichmentForVersionDict = { - "target_version": target_version + "target_major_version": target_version[0], + "target_minor_version": target_version[1], } results: list[list[Any]] = json.loads( get_single_string_from_marklogic_response( diff --git a/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy b/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy index 0ddbb0d5..6a58eacc 100644 --- a/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +++ b/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy @@ -1,18 +1,28 @@ xquery version "1.0-ml"; -declare variable $target_version as xs:int external; +declare variable $target_major_version as xs:int external; +declare variable $target_minor_version as xs:int external; xdmp:to-json(xdmp:sql( "SELECT process_data.uri, enrich_version_string, minutes_since_enrichment_request FROM ( - SELECT process_data.uri, enrich_version_string, enrich_major_version, DATEDIFF('minute', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS minutes_since_enrichment_request + SELECT + process_data.uri, + enrich_version_string, enrich_major_version, enrich_minor_version, + DATEDIFF('minute', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS minutes_since_enrichment_request FROM documents.process_data JOIN documents.process_property_data ON process_data.uri = process_property_data.uri ) - WHERE ((enrich_version_string IS NULL) OR (enrich_major_version < @target_version)) + WHERE ( + (enrich_version_string IS NULL) OR + (enrich_major_version <= @target_major_version AND enrich_minor_version < @target_minor_version) + ) AND (minutes_since_enrichment_request > 43200 OR minutes_since_enrichment_request IS NULL) - ORDER BY enrich_major_version ASC NULLS FIRST", + ORDER BY enrich_major_version ASC NULLS FIRST, enrich_minor_version ASC", "array", - map:new(map:entry("target_version", $target_version)) + map:new(( + map:entry("target_major_version", $target_major_version), + map:entry("target_minor_version", $target_minor_version) + )) )) diff --git a/src/caselawclient/xquery_type_dicts.py b/src/caselawclient/xquery_type_dicts.py index 9d7e8f41..b4df2b15 100644 --- a/src/caselawclient/xquery_type_dicts.py +++ b/src/caselawclient/xquery_type_dicts.py @@ -80,7 +80,8 @@ class GetLastModifiedDict(MarkLogicAPIDict): # get_pending_enrichment_for_version.xqy class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict): - target_version: int + target_major_version: int + target_minor_version: int # get_pending_parse_for_version.xqy