Skip to content

Commit

Permalink
Make pending enrichment query aware of parser version
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksonj04 committed Jan 30, 2024
1 parent 9831ef5 commit 929f8c7
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 10 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog 1.0.0].

## Unreleased

- **Breaking:** `Client.get_pending_enrichment_for_version` now requires both a target enrichment version and a target parser version, and will not include documents which have not been parsed with the target version.

## [Release 20.0.0]

- **Feature:** New `Client.get_pending_parse_for_version` and `Client.get_highest_parser_version` methods to help find documents in need of re-parsing.
Expand Down
10 changes: 7 additions & 3 deletions src/caselawclient/Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,12 +968,16 @@ def get_highest_enrichment_version(self) -> tuple[int, int]:
return (int(table[1][1]), int(table[1][2]))

def get_pending_enrichment_for_version(
self, target_version: tuple[int, int]
self,
target_enrichment_version: tuple[int, int],
target_parser_version: tuple[int, int],
) -> list[list[Any]]:
"""Retrieve documents which are not yet enriched with a given version."""
vars: query_dicts.GetPendingEnrichmentForVersionDict = {
"target_major_version": target_version[0],
"target_minor_version": target_version[1],
"target_enrichment_major_version": target_enrichment_version[0],
"target_enrichment_minor_version": target_enrichment_version[1],
"target_parser_major_version": target_parser_version[0],
"target_parser_minor_version": target_parser_version[1],
}
results: list[list[Any]] = json.loads(
get_single_string_from_marklogic_response(
Expand Down
17 changes: 12 additions & 5 deletions src/caselawclient/xquery/get_pending_enrichment_for_version.xqy
Original file line number Diff line number Diff line change
@@ -1,28 +1,35 @@
xquery version "1.0-ml";

declare variable $target_major_version as xs:int external;
declare variable $target_minor_version as xs:int external;
declare variable $target_enrichment_major_version as xs:int external;
declare variable $target_enrichment_minor_version as xs:int external;
declare variable $target_parser_major_version as xs:int external;
declare variable $target_parser_minor_version as xs:int external;

xdmp:to-json(xdmp:sql(
"SELECT process_data.uri, enrich_version_string, minutes_since_enrichment_request
FROM (
SELECT
process_data.uri,
enrich_version_string, enrich_major_version, enrich_minor_version,
parser_major_version, parser_minor_version,
DATEDIFF('minute', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS minutes_since_enrichment_request
FROM documents.process_data
JOIN documents.process_property_data ON process_data.uri = process_property_data.uri
)
WHERE (
(enrich_version_string IS NULL) OR
(enrich_major_version <= @target_major_version AND enrich_minor_version < @target_minor_version)
(enrich_major_version <= @target_enrichment_major_version AND enrich_minor_version < @target_enrichment_minor_version)
) AND (
(parser_major_version = @target_parser_major_version AND parser_minor_version = @target_parser_minor_version)
)
AND (minutes_since_enrichment_request > 43200 OR minutes_since_enrichment_request IS NULL)
ORDER BY enrich_major_version ASC NULLS FIRST, enrich_minor_version ASC",
"array",
map:new((
map:entry("target_major_version", $target_major_version),
map:entry("target_minor_version", $target_minor_version)
map:entry("target_enrichment_major_version", $target_enrichment_major_version),
map:entry("target_enrichment_minor_version", $target_enrichment_minor_version),
map:entry("target_parser_major_version", $target_parser_major_version),
map:entry("target_parser_minor_version", $target_parser_minor_version)
))
))

6 changes: 4 additions & 2 deletions src/caselawclient/xquery_type_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ class GetLastModifiedDict(MarkLogicAPIDict):

# get_pending_enrichment_for_version.xqy
class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict):
target_major_version: int
target_minor_version: int
target_enrichment_major_version: int
target_enrichment_minor_version: int
target_parser_major_version: int
target_parser_minor_version: int


# get_pending_parse_for_version.xqy
Expand Down

0 comments on commit 929f8c7

Please sign in to comment.