Skip to content

Commit

Permalink
Re-enrichment now uses minor version, not just major version
Browse files Browse the repository at this point in the history
  • Loading branch information
jacksonj04 committed Jan 24, 2024
1 parent 0e1f959 commit 48e5e75
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
9 changes: 5 additions & 4 deletions src/caselawclient/Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ def get_combined_stats_table(self) -> list[list[Any]]:

return results

def get_highest_enrichment_version(self) -> int:
def get_highest_enrichment_version(self) -> tuple[int, int]:
"""This gets the highest enrichment version in the database,
so if nothing has been enriched with the most recent version of enrichment,
this won't reflect that change."""
Expand All @@ -965,14 +965,15 @@ def get_highest_enrichment_version(self) -> int:
)
)

return int(table[1][1])
return (int(table[1][1]), int(table[1][2]))

def get_pending_enrichment_for_version(
self, target_version: int
self, target_version: tuple[int, int]
) -> list[list[Any]]:
"""Retrieve documents which are not yet enriched with a given version."""
vars: query_dicts.GetPendingEnrichmentForVersionDict = {
"target_version": target_version
"target_major_version": target_version[0],
"target_minor_version": target_version[1],
}
results: list[list[Any]] = json.loads(
get_single_string_from_marklogic_response(
Expand Down
20 changes: 15 additions & 5 deletions src/caselawclient/xquery/get_pending_enrichment_for_version.xqy
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
xquery version "1.0-ml";

declare variable $target_version as xs:int external;
declare variable $target_major_version as xs:int external;
declare variable $target_minor_version as xs:int external;

xdmp:to-json(xdmp:sql(
"SELECT process_data.uri, enrich_version_string, minutes_since_enrichment_request
FROM (
SELECT process_data.uri, enrich_version_string, enrich_major_version, DATEDIFF('minute', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS minutes_since_enrichment_request
SELECT
process_data.uri,
enrich_version_string, enrich_major_version, enrich_minor_version,
DATEDIFF('minute', last_sent_to_enrichment, CURRENT_TIMESTAMP) AS minutes_since_enrichment_request
FROM documents.process_data
JOIN documents.process_property_data ON process_data.uri = process_property_data.uri
)
WHERE ((enrich_version_string IS NULL) OR (enrich_major_version < @target_version))
WHERE (
(enrich_version_string IS NULL) OR
(enrich_major_version <= @target_major_version AND enrich_minor_version < @target_minor_version)
)
AND (minutes_since_enrichment_request > 43200 OR minutes_since_enrichment_request IS NULL)
ORDER BY enrich_major_version ASC NULLS FIRST",
ORDER BY enrich_major_version ASC NULLS FIRST, enrich_minor_version ASC",
"array",
map:new(map:entry("target_version", $target_version))
map:new((
map:entry("target_major_version", $target_major_version),
map:entry("target_minor_version", $target_minor_version)
))
))

3 changes: 2 additions & 1 deletion src/caselawclient/xquery_type_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class GetLastModifiedDict(MarkLogicAPIDict):

# get_pending_enrichment_for_version.xqy
class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict):
target_version: int
target_major_version: int
target_minor_version: int


# get_pending_parse_for_version.xqy
Expand Down

0 comments on commit 48e5e75

Please sign in to comment.