Skip to content

Commit

Permalink
Make pending enrichment query aware of parser version
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksonj04 committed Jan 30, 2024
1 parent 9831ef5 commit 7f3d1f6
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 10 deletions.
10 changes: 7 additions & 3 deletions src/caselawclient/Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,12 +968,16 @@ def get_highest_enrichment_version(self) -> tuple[int, int]:
return (int(table[1][1]), int(table[1][2]))

def get_pending_enrichment_for_version(
self, target_version: tuple[int, int]
self,
target_enrichment_version: tuple[int, int],
target_parser_version: tuple[int, int],
) -> list[list[Any]]:
"""Retrieve documents which are not yet enriched with a given version."""
vars: query_dicts.GetPendingEnrichmentForVersionDict = {
"target_major_version": target_version[0],
"target_minor_version": target_version[1],
"target_enrichment_major_version": target_enrichment_version[0],
"target_enrichment_minor_version": target_enrichment_version[1],
"target_parser_major_version": target_parser_version[0],
"target_parser_minor_version": target_parser_version[1],
}
results: list[list[Any]] = json.loads(
get_single_string_from_marklogic_response(
Expand Down
17 changes: 12 additions & 5 deletions src/caselawclient/xquery/get_pending_enrichment_for_version.xqy
Original file line number Diff line number Diff line change
@@ -1,28 +1,35 @@
xquery version "1.0-ml";

declare variable $target_major_version as xs:int external;
declare variable $target_minor_version as xs:int external;
declare variable $target_enrichment_major_version as xs:int external;
declare variable $target_enrichment_minor_version as xs:int external;
declare variable $target_parser_major_version as xs:int external;
declare variable $target_parser_minor_version as xs:int external;

xdmp:to-json(xdmp:sql(
"SELECT process_data.uri, enrich_version_string, minutes_since_enrichment_request
FROM (
SELECT
process_data.uri,
enrich_version_string, enrich_major_version, enrich_minor_version,
parser_major_version, parser_minor_version,
DATEDIFF('minute', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS minutes_since_enrichment_request
FROM documents.process_data
JOIN documents.process_property_data ON process_data.uri = process_property_data.uri
)
WHERE (
(enrich_version_string IS NULL) OR
(enrich_major_version <= @target_major_version AND enrich_minor_version < @target_minor_version)
(enrich_major_version <= @target_enrichment_major_version AND enrich_minor_version < @target_enrichment_minor_version)
) AND (
(parser_major_version = @target_parser_major_version AND parser_minor_version = @target_parser_minor_version)
)
AND (minutes_since_enrichment_request > 43200 OR minutes_since_enrichment_request IS NULL)
ORDER BY enrich_major_version ASC NULLS FIRST, enrich_minor_version ASC",
"array",
map:new((
map:entry("target_major_version", $target_major_version),
map:entry("target_minor_version", $target_minor_version)
map:entry("target_enrichment_major_version", $target_enrichment_major_version),
map:entry("target_enrichment_minor_version", $target_enrichment_minor_version),
map:entry("target_parser_major_version", $target_parser_major_version),
map:entry("target_parser_minor_version", $target_parser_minor_version)
))
))

6 changes: 4 additions & 2 deletions src/caselawclient/xquery_type_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ class GetLastModifiedDict(MarkLogicAPIDict):

# get_pending_enrichment_for_version.xqy
class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict):
target_major_version: int
target_minor_version: int
target_enrichment_major_version: int
target_enrichment_minor_version: int
target_parser_major_version: int
target_parser_minor_version: int


# get_pending_parse_for_version.xqy
Expand Down

0 comments on commit 7f3d1f6

Please sign in to comment.