Skip to content

Commit

Permalink
Support token lists in match info (closes #88)
Browse files Browse the repository at this point in the history
Change-Id: I086029ef01b7064d652964ec8bf62460c84ba569
  • Loading branch information
Akron committed Apr 11, 2023
1 parent 405413e commit ea44b34
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 8 deletions.
6 changes: 4 additions & 2 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
0.61.2 2023-03-28
0.61.2 2023-04-05
- [bugfix] Fix pagebreak retrieval (margaretha, diewald)
- [feature] Support token lists for match infos (solved #88,
diewald)

0.61.1 2023-02-14
- [bugfix] Fixed ensuring same documents of spans (solved #87,
margaretha)
margaretha)

0.61.0 2022-11-16
- [cleanup] Remove ehcache from dependencies (diewald)
Expand Down
28 changes: 24 additions & 4 deletions src/main/java/de/ids_mannheim/korap/KrillIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -933,6 +933,18 @@ public Match getMatchInfo (String id, String field, boolean info,
includeSpans, includeHighlights, extendToSentence);
};

public Match getMatchInfo (String idString, String field, boolean info,
List<String> foundry, List<String> layer, boolean includeSpans,
boolean includeHighlights, boolean extendToSentence)
throws QueryException {
return getMatchInfo(
idString, field, info,
foundry, layer, includeSpans,
true, // include Snippets
false, // include Tokens
includeHighlights, extendToSentence
);
}

/**
* Get a match.
Expand All @@ -943,7 +955,8 @@ public Match getMatchInfo (String id, String field, boolean info,
*/
public Match getMatchInfo (String idString, String field, boolean info,
List<String> foundry, List<String> layer, boolean includeSpans,
boolean includeHighlights, boolean extendToSentence)
boolean includeSnippets, boolean includeTokens,
boolean includeHighlights, boolean extendToSentence)
throws QueryException {

if (DEBUG)
Expand All @@ -960,9 +973,16 @@ public Match getMatchInfo (String idString, String field, boolean info,
if (match.getStartPos() == -1)
return match;

// For the moment, direct match retrievals will always include
// snippets. But this may change in the future.
match.hasSnippet = true;
if (includeTokens)
match.hasTokens = true;

if (includeSnippets) {
match.hasSnippet = true;
} else {
includeHighlights = false;
includeSpans = false;
info = false;
};

// Create a filter based on the corpusID and the docID
BooleanQuery bool = new BooleanQuery();
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/de/ids_mannheim/korap/response/Match.java
Original file line number Diff line number Diff line change
Expand Up @@ -1399,6 +1399,9 @@ else if (element[3] == 2) {
public ObjectNode getSnippetTokens () {
ObjectNode json = mapper.createObjectNode();

if (!this._processHighlight())
return null;

if (this.processed && this.snippetTokens != null)
return this.snippetTokens;

Expand Down
66 changes: 64 additions & 2 deletions src/test/java/de/ids_mannheim/korap/index/TestMatchIdentifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -878,10 +878,72 @@ public void indexExample7Dependencies ()
"</span>"+
"<span class=\"context-right\">"+
"<span class=\"more\"></span>"+
"</span>",
km.getSnippetHTML());
"</span>",
km.getSnippetHTML());
};

@Test
public void indexExample8Tokens ()
throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
ki.addDoc(createSimpleFieldDoc2());
ki.commit();

ArrayList<String> foundryList = new ArrayList<>(2);
foundryList.add("f");
foundryList.add("x");

ArrayList<String> layerList = new ArrayList<>(2);
layerList.add("is");

Match km = ki.getMatchInfo(
"match-c1!d1-p0-4",
"tokens",
true,
null, //foundryList,
null, // layerList,
true,
false,
true,
true,
true);

JsonNode res = mapper.readTree(km.toJsonString());
assertEquals("c1", res.at("/corpusID").asText());
assertEquals("d1", res.at("/docID").asText());
assertFalse(res.at("/hasSnippet").asBoolean());
assertTrue(res.at("/hasTokens").asBoolean());
assertEquals("a", res.at("/tokens/match/0").asText());
assertEquals("b", res.at("/tokens/match/1").asText());
assertEquals("c", res.at("/tokens/match/2").asText());
assertEquals("a", res.at("/tokens/match/3").asText());
assertTrue(res.at("/tokens/match/4").isMissingNode());


km = ki.getMatchInfo(
"match-c1!d1-p0-4",
"tokens",
true,
null, //foundryList,
null, // layerList,
true,
true,
true,
true,
true);

res = mapper.readTree(km.toJsonString());
assertEquals("c1", res.at("/corpusID").asText());
assertEquals("d1", res.at("/docID").asText());
assertTrue(res.at("/hasSnippet").asBoolean());
assertTrue(res.at("/hasTokens").asBoolean());
assertEquals("a", res.at("/tokens/match/0").asText());
assertEquals("b", res.at("/tokens/match/1").asText());
assertEquals("c", res.at("/tokens/match/2").asText());
assertEquals("a", res.at("/tokens/match/3").asText());
assertTrue(res.at("/tokens/match/4").isMissingNode());
};


@Test
public void indexExampleMultipleFoundries ()
Expand Down

0 comments on commit ea44b34

Please sign in to comment.