From 7f3d1f6d183f919cc7a6f083b7d2e7a79799d475 Mon Sep 17 00:00:00 2001 From: Nick Jackson Date: Tue, 30 Jan 2024 09:44:23 +0000 Subject: [PATCH] Make pending enrichment query aware of parser version --- src/caselawclient/Client.py | 10 +++++++--- .../get_pending_enrichment_for_version.xqy | 17 ++++++++++++----- src/caselawclient/xquery_type_dicts.py | 6 ++++-- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/caselawclient/Client.py b/src/caselawclient/Client.py index 82672c94..007ac6ac 100644 --- a/src/caselawclient/Client.py +++ b/src/caselawclient/Client.py @@ -968,12 +968,16 @@ def get_highest_enrichment_version(self) -> tuple[int, int]: return (int(table[1][1]), int(table[1][2])) def get_pending_enrichment_for_version( - self, target_version: tuple[int, int] + self, + target_enrichment_version: tuple[int, int], + target_parser_version: tuple[int, int], ) -> list[list[Any]]: """Retrieve documents which are not yet enriched with a given version.""" vars: query_dicts.GetPendingEnrichmentForVersionDict = { - "target_major_version": target_version[0], - "target_minor_version": target_version[1], + "target_enrichment_major_version": target_enrichment_version[0], + "target_enrichment_minor_version": target_enrichment_version[1], + "target_parser_major_version": target_parser_version[0], + "target_parser_minor_version": target_parser_version[1], } results: list[list[Any]] = json.loads( get_single_string_from_marklogic_response( diff --git a/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy b/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy index 6a58eacc..d9ff45cc 100644 --- a/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy +++ b/src/caselawclient/xquery/get_pending_enrichment_for_version.xqy @@ -1,7 +1,9 @@ xquery version "1.0-ml"; -declare variable $target_major_version as xs:int external; -declare variable $target_minor_version as xs:int external; +declare variable $target_enrichment_major_version as xs:int external; +declare variable $target_enrichment_minor_version as xs:int external; +declare variable $target_parser_major_version as xs:int external; +declare variable $target_parser_minor_version as xs:int external; xdmp:to-json(xdmp:sql( "SELECT process_data.uri, enrich_version_string, minutes_since_enrichment_request @@ -9,20 +11,25 @@ xdmp:to-json(xdmp:sql( SELECT process_data.uri, enrich_version_string, enrich_major_version, enrich_minor_version, + parser_major_version, parser_minor_version, DATEDIFF('minute', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS minutes_since_enrichment_request FROM documents.process_data JOIN documents.process_property_data ON process_data.uri = process_property_data.uri ) WHERE ( (enrich_version_string IS NULL) OR - (enrich_major_version <= @target_major_version AND enrich_minor_version < @target_minor_version) + (enrich_major_version <= @target_enrichment_major_version AND enrich_minor_version < @target_enrichment_minor_version) + ) AND ( + (parser_major_version = @target_parser_major_version AND parser_minor_version = @target_parser_minor_version) ) AND (minutes_since_enrichment_request > 43200 OR minutes_since_enrichment_request IS NULL) ORDER BY enrich_major_version ASC NULLS FIRST, enrich_minor_version ASC", "array", map:new(( - map:entry("target_major_version", $target_major_version), - map:entry("target_minor_version", $target_minor_version) + map:entry("target_enrichment_major_version", $target_enrichment_major_version), + map:entry("target_enrichment_minor_version", $target_enrichment_minor_version), + map:entry("target_parser_major_version", $target_parser_major_version), + map:entry("target_parser_minor_version", $target_parser_minor_version) )) )) diff --git a/src/caselawclient/xquery_type_dicts.py b/src/caselawclient/xquery_type_dicts.py index b4df2b15..543347c2 100644 --- a/src/caselawclient/xquery_type_dicts.py +++ b/src/caselawclient/xquery_type_dicts.py @@ -80,8 +80,10 @@ class GetLastModifiedDict(MarkLogicAPIDict): # get_pending_enrichment_for_version.xqy class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict): - target_major_version: int - target_minor_version: int + target_enrichment_major_version: int + target_enrichment_minor_version: int + target_parser_major_version: int + target_parser_minor_version: int # get_pending_parse_for_version.xqy