Skip to content

Commit

Permalink
Set default limit for reparse/enrich results to 1000, customisable
Browse files Browse the repository at this point in the history
  • Loading branch information
dragon-dxw committed Oct 30, 2024
1 parent 83406dc commit 834a938
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog 1.0.0].
### Feat

- **FCL-386**: search query can now be passed to get_document_by_uri
- **FCL-318**: Allow setting a limit on the number of enrich/reparse targets returned

## v27.2.0 (2024-10-28)

Expand Down
4 changes: 4 additions & 0 deletions src/caselawclient/Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,13 +1080,15 @@ def get_pending_enrichment_for_version(
self,
target_enrichment_version: tuple[int, int],
target_parser_version: tuple[int, int],
maximum_records: int = 1000,
) -> list[list[Any]]:
"""Retrieve documents which are not yet enriched with a given version."""
vars: query_dicts.GetPendingEnrichmentForVersionDict = {
"target_enrichment_major_version": target_enrichment_version[0],
"target_enrichment_minor_version": target_enrichment_version[1],
"target_parser_major_version": target_parser_version[0],
"target_parser_minor_version": target_parser_version[1],
"maximum_records": maximum_records,
}
results: list[list[Any]] = json.loads(
get_single_string_from_marklogic_response(
Expand Down Expand Up @@ -1115,11 +1117,13 @@ def get_highest_parser_version(self) -> tuple[int, int]:
def get_pending_parse_for_version(
self,
target_version: tuple[int, int],
maximum_records: int = 1000,
) -> list[list[Any]]:
"""Retrieve documents which are not yet parsed with a given version."""
vars: query_dicts.GetPendingParseForVersionDict = {
"target_major_version": target_version[0],
"target_minor_version": target_version[1],
"maximum_records": maximum_records,
}
results: list[list[Any]] = json.loads(
get_single_string_from_marklogic_response(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
xquery version "1.0-ml";

declare namespace xdmp="http://marklogic.com/xdmp";
declare variable $target_enrichment_major_version as xs:int external;
declare variable $target_enrichment_minor_version as xs:int external;
declare variable $target_parser_major_version as xs:int external;
declare variable $target_parser_minor_version as xs:int external;
declare variable $maximum_records as xs:int? external := 1000;

xdmp:to-json(xdmp:sql(
"SELECT process_data.uri, enrich_version_string, minutes_since_enrichment_request
Expand All @@ -23,13 +25,16 @@ xdmp:to-json(xdmp:sql(
(parser_major_version = @target_parser_major_version AND parser_minor_version = @target_parser_minor_version)
)
AND (minutes_since_enrichment_request > 43200 OR minutes_since_enrichment_request IS NULL)
ORDER BY enrich_major_version ASC NULLS FIRST, enrich_minor_version ASC",
ORDER BY enrich_major_version ASC NULLS FIRST, enrich_minor_version ASC
LIMIT @maximum_records",
"array",
map:new((
map:entry("target_enrichment_major_version", $target_enrichment_major_version),
map:entry("target_enrichment_minor_version", $target_enrichment_minor_version),
map:entry("target_parser_major_version", $target_parser_major_version),
map:entry("target_parser_minor_version", $target_parser_minor_version)
map:entry("target_parser_minor_version", $target_parser_minor_version),
map:entry("maximum_records", $maximum_records)

))
))

11 changes: 7 additions & 4 deletions src/caselawclient/xquery/get_pending_parse_for_version.xqy
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ xquery version "1.0-ml";

declare variable $target_major_version as xs:int external;
declare variable $target_minor_version as xs:int external;
declare variable $maximum_records as xs:int? external := 1000;

xdmp:to-json(xdmp:sql(
"SELECT process_data.uri, parser_version_string, minutes_since_parse_request
Expand All @@ -18,11 +19,13 @@ xdmp:to-json(xdmp:sql(
(parser_major_version <= @target_major_version AND parser_minor_version < @target_minor_version)
)
AND (minutes_since_parse_request > 43200 OR minutes_since_parse_request IS NULL)
ORDER BY parser_major_version ASC NULLS FIRST, parser_minor_version ASC",
ORDER BY parser_major_version ASC NULLS FIRST, parser_minor_version ASC
LIMIT @maximum_records",
"array",
map:new((
map:entry("target_major_version", $target_major_version),
map:entry("target_minor_version", $target_minor_version)
map:entry("target_minor_version", $target_minor_version),
map:entry("maximum_records", $maximum_records)

))
))

))
2 changes: 2 additions & 0 deletions src/caselawclient/xquery_type_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class GetLastModifiedDict(MarkLogicAPIDict):

# get_pending_enrichment_for_version.xqy
class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict):
maximum_records: Optional[int]
target_enrichment_major_version: int
target_enrichment_minor_version: int
target_parser_major_version: int
Expand All @@ -96,6 +97,7 @@ class GetPendingEnrichmentForVersionDict(MarkLogicAPIDict):

# get_pending_parse_for_version.xqy
class GetPendingParseForVersionDict(MarkLogicAPIDict):
maximum_records: Optional[int]
target_major_version: int
target_minor_version: int

Expand Down

0 comments on commit 834a938

Please sign in to comment.