Skip to content

Commit

Permalink
Fix casefolding for case-insensitive queries
Browse files Browse the repository at this point in the history
Change-Id: I23db7454c7ab0a54fee4c9c450665b294ccc1324
  • Loading branch information
Akron authored and margaretha committed Aug 5, 2023
1 parent 2da6b4c commit 4bc5c46
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 2 deletions.
4 changes: 3 additions & 1 deletion Changes
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
0.61.3 2023-07-11
0.61.3 2023-07-17
- Add totalResources to results (diewald)
- [bugfix] Fix casefolding for case-insensitive queries
(diewald).

0.61.2 2023-04-05
- [bugfix] Fix pagebreak retrieval (margaretha, diewald)
Expand Down
17 changes: 16 additions & 1 deletion src/main/java/de/ids_mannheim/korap/KrillQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -1309,7 +1309,22 @@ else if (layer.equals("i")) {
value.setLength(offset);

// Add key to value
value.append(isCaseInsensitive ? key.toLowerCase() : key);

if (isCaseInsensitive) {

// This supports both legacy search and locale-dependent case-folding.
// It mimics the Perl fc behaviour probably better than icu4j.
if (key.toLowerCase().equals(key.toUpperCase().toLowerCase())) {
value.append(key.toLowerCase());
} else {
value.append(key.toLowerCase());
values.push(value.toString());
value.setLength(offset);
value.append(key.toUpperCase().toLowerCase());
};
} else {
value.append(key);
};

// TODO:
// This should iterate over all values as well
Expand Down
28 changes: 28 additions & 0 deletions src/test/java/de/ids_mannheim/korap/search/TestKrill.java
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,34 @@ public void searchIndex () throws IOException {
assertTrue(res.at("/matches/0/snippet").isMissingNode());
assertEquals("dem", res.at("/matches/0/tokens/left/0").asText());
assertEquals("Buchstaben", res.at("/matches/0/tokens/match/0").asText());

// The test-data is old and therefore precedes the correct testfolding.
// However, we can check the correct behaviour nonetheless.
String json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Grösstenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}";

ObjectMapper mapper = new ObjectMapper();

ks = new Krill(json);
kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 0);
assertEquals(kr.getItemsPerPage(), 25);
assertEquals(kr.getMatches().size(), 0);

res = mapper.readTree(kr.toJsonString());
assertEquals(res.at("/meta/serialQuery").asText(),"tokens:i:grösstenteils");

json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Größtenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}";

ks = new Krill(json);
kr = ks.apply(ki);

assertEquals(kr.getTotalResults(), 2);
assertEquals(kr.getItemsPerPage(), 25);
assertEquals(kr.getMatches().size(), 2);

res = mapper.readTree(kr.toJsonString());
assertEquals(res.at("/meta/serialQuery").asText(),
"spanOr([tokens:i:grösstenteils, tokens:i:größtenteils])");
};


Expand Down

0 comments on commit 4bc5c46

Please sign in to comment.