diff --git a/Changes b/Changes index 185fd0fe..38072b59 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,7 @@ -0.61.3 2023-07-11 +0.61.3 2023-07-17 - Add totalResources to results (diewald) + - [bugfix] Fix casefolding for case-insensitive queries + (diewald). 0.61.2 2023-04-05 - [bugfix] Fix pagebreak retrieval (margaretha, diewald) diff --git a/src/main/java/de/ids_mannheim/korap/KrillQuery.java b/src/main/java/de/ids_mannheim/korap/KrillQuery.java index c10d93ce..6729521f 100644 --- a/src/main/java/de/ids_mannheim/korap/KrillQuery.java +++ b/src/main/java/de/ids_mannheim/korap/KrillQuery.java @@ -1309,7 +1309,22 @@ else if (layer.equals("i")) { value.setLength(offset); // Add key to value - value.append(isCaseInsensitive ? key.toLowerCase() : key); + + if (isCaseInsensitive) { + + // This supports both legacy search and locale-dependent case-folding. + // It mimics the Perl fc behaviour probably better than icu4j. + if (key.toLowerCase().equals(key.toUpperCase().toLowerCase())) { + value.append(key.toLowerCase()); + } else { + value.append(key.toLowerCase()); + values.push(value.toString()); + value.setLength(offset); + value.append(key.toUpperCase().toLowerCase()); + }; + } else { + value.append(key); + }; // TODO: // This should iterate over all values as well diff --git a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java index f36cc285..168b83d7 100644 --- a/src/test/java/de/ids_mannheim/korap/search/TestKrill.java +++ b/src/test/java/de/ids_mannheim/korap/search/TestKrill.java @@ -154,6 +154,34 @@ public void searchIndex () throws IOException { assertTrue(res.at("/matches/0/snippet").isMissingNode()); assertEquals("dem", res.at("/matches/0/tokens/left/0").asText()); assertEquals("Buchstaben", res.at("/matches/0/tokens/match/0").asText()); + + // The test-data is old and therefore precedes the correct testfolding. + // However, we can check the correct behaviour nonetheless. + String json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Grösstenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}"; + + ObjectMapper mapper = new ObjectMapper(); + + ks = new Krill(json); + kr = ks.apply(ki); + assertEquals(kr.getTotalResults(), 0); + assertEquals(kr.getItemsPerPage(), 25); + assertEquals(kr.getMatches().size(), 0); + + res = mapper.readTree(kr.toJsonString()); + assertEquals(res.at("/meta/serialQuery").asText(),"tokens:i:grösstenteils"); + + json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Größtenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}"; + + ks = new Krill(json); + kr = ks.apply(ki); + + assertEquals(kr.getTotalResults(), 2); + assertEquals(kr.getItemsPerPage(), 25); + assertEquals(kr.getMatches().size(), 2); + + res = mapper.readTree(kr.toJsonString()); + assertEquals(res.at("/meta/serialQuery").asText(), + "spanOr([tokens:i:grösstenteils, tokens:i:größtenteils])"); };