Skip to content

Commit

Permalink
Merge pull request #743 from nationalarchives/decouple-query-parameters
Browse files Browse the repository at this point in the history
[FCL-396] Change search query highlighting behaviour
  • Loading branch information
jacksonj04 authored Oct 28, 2024
2 parents 092cc73 + 5d55064 commit e273714
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 4 deletions.
4 changes: 4 additions & 0 deletions src/caselawclient/Client.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ def get_judgment_xml_bytestring(
judgment_uri: DocumentURIString,
version_uri: Optional[DocumentURIString] = None,
show_unpublished: bool = False,
search_query: Optional[str] = None,
) -> bytes:
marklogic_document_uri = self._format_uri_for_marklogic(judgment_uri)
marklogic_document_version_uri = (
Expand All @@ -418,6 +419,7 @@ def get_judgment_xml_bytestring(
"uri": marklogic_document_uri,
"version_uri": marklogic_document_version_uri,
"show_unpublished": show_unpublished,
"search_query": search_query,
}

response = self._eval_as_bytes(vars, "get_judgment.xqy")
Expand All @@ -433,11 +435,13 @@ def get_judgment_xml(
judgment_uri: DocumentURIString,
version_uri: Optional[DocumentURIString] = None,
show_unpublished: bool = False,
search_query: Optional[str] = None,
) -> str:
return self.get_judgment_xml_bytestring(
judgment_uri,
version_uri,
show_unpublished,
search_query=search_query,
).decode(encoding="utf-8")

def set_document_name(
Expand Down
8 changes: 6 additions & 2 deletions src/caselawclient/models/documents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class Document:
Individual document classes should extend this list where necessary to validate document type-specific attributes.
"""

def __init__(self, uri: str, api_client: "MarklogicApiClient"):
def __init__(self, uri: str, api_client: "MarklogicApiClient", search_query: Optional[str] = None):
"""
:param uri: For historical reasons this accepts a pseudo-URI which may include leading or trailing slashes.
Expand All @@ -117,7 +117,11 @@ def __init__(self, uri: str, api_client: "MarklogicApiClient"):
raise DocumentNotFoundError(f"Document {self.uri} does not exist")

self.body: DocumentBody = DocumentBody(
xml_bytestring=self.api_client.get_judgment_xml_bytestring(self.uri, show_unpublished=True),
xml_bytestring=self.api_client.get_judgment_xml_bytestring(
self.uri,
show_unpublished=True,
search_query=search_query,
),
)
""" `Document.body` represents the XML of the document itself, without any information such as version tracking or properties. """

Expand Down
8 changes: 8 additions & 0 deletions src/caselawclient/models/documents/transforms/html.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,14 @@
</xsl:element>
</xsl:template>

<!-- search query numbering -->

<xsl:template match="uk:mark">
<xsl:element name="{ local-name(.) }">
<xsl:copy-of select="@*"/>
<xsl:apply-templates />
</xsl:element>
</xsl:template>

<!-- text -->

Expand Down
49 changes: 47 additions & 2 deletions src/caselawclient/xquery/get_judgment.xqy
Original file line number Diff line number Diff line change
@@ -1,8 +1,38 @@
xquery version "1.0-ml";

declare namespace xdmp = "http://marklogic.com/xdmp";
declare namespace cts = "http://marklogic.com/cts";
declare namespace uk = "https://caselaw.nationalarchives.gov.uk/akn";
import module namespace helper = "https://caselaw.nationalarchives.gov.uk/helper" at "/judgments/search/helper.xqy";

declare variable $show_unpublished as xs:boolean? external;
declare variable $uri as xs:string external;
declare variable $version_uri as xs:string? external;
declare variable $search_query as xs:string? external;

(: Note that `xsl:output method` is changed from `html` to `xml` and we've namespaced the tag :)
let $number_marks_xslt := (
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:uk="https://caselaw.nationalarchives.gov.uk/akn"
version="2.0">
<xsl:output method="xml" />
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="uk:mark">
<xsl:copy>
<xsl:copy-of select="@*" />
<xsl:attribute name="id">
<xsl:text>mark_</xsl:text>
<xsl:value-of select="count(preceding::uk:mark)"/>
</xsl:attribute>
<xsl:apply-templates />
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
)

let $judgment := fn:document($uri)
let $version := if ($version_uri) then fn:document($version_uri) else ()
Expand All @@ -11,11 +41,26 @@ let $is_published := $judgment_published_property/text()

let $document_to_return := if ($version_uri) then $version else $judgment

let $return_value := if ($show_unpublished) then

let $raw_xml := if ($show_unpublished) then
$document_to_return
else if (xs:boolean($is_published)) then
$document_to_return
else
()

return $return_value
(: If a search query string is present, highlight instances :)
let $transformed := if($search_query) then
xdmp:xslt-eval(
$number_marks_xslt,
cts:highlight(
$raw_xml,
helper:make-q-query($search_query),
<uk:mark>{$cts:text}</uk:mark>
)
)
else
$raw_xml


return $transformed
1 change: 1 addition & 0 deletions src/caselawclient/xquery_type_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class GetComponentsForDocumentDict(MarkLogicAPIDict):

# get_judgment.xqy
class GetJudgmentDict(MarkLogicAPIDict):
search_query: Optional[str]
show_unpublished: Optional[bool]
uri: MarkLogicDocumentURIString
version_uri: Optional[MarkLogicDocumentVersionURIString]
Expand Down
7 changes: 7 additions & 0 deletions tests/models/documents/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,13 @@ def test_validates_against_schema(self, mock_api_client):

mock_api_client.validate_document.assert_called_with(document.uri)

def test_document_initialises_with_search_query_string(self, mock_api_client):
document = Document("test/1234", mock_api_client, search_query="test search query")

mock_api_client.get_judgment_xml_bytestring.assert_called_with(
document.uri, show_unpublished=True, search_query="test search query"
)


class TestDocumentEnrichedRecently:
def test_enriched_recently_returns_false_when_never_enriched(self, mock_api_client):
Expand Down
1 change: 1 addition & 0 deletions tests/models/test_judgments.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_judgment_neutral_citation(self, mock_api_client):
mock_api_client.get_judgment_xml_bytestring.assert_called_once_with(
"test/1234",
show_unpublished=True,
search_query=None,
)

@pytest.mark.parametrize(
Expand Down
1 change: 1 addition & 0 deletions tests/models/test_press_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_press_summary_neutral_citation(self, mock_api_client):
mock_api_client.get_judgment_xml_bytestring.assert_called_once_with(
"test/1234",
show_unpublished=True,
search_query=None,
)

@pytest.mark.parametrize(
Expand Down

0 comments on commit e273714

Please sign in to comment.