From ad2166f26ab81da2785ba1612853e7e6c3907034 Mon Sep 17 00:00:00 2001 From: Tom Udding Date: Sun, 19 Nov 2023 19:29:39 +0100 Subject: [PATCH] Remove `iconv` for decision search highlighting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unfortunately, by using `iconv` there is a greater chance that the resulting (converted) text is either short or longer. For example, by using the euro symbol (€) we artifically increase the length of the texts we are comparing: ```php iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', '€'); // 'EUR' ``` This is problematic, as it will result in incorrectly alignments of ``. While this can be mitigated by carefully calculating offsets for the offsets this quickly makes it more difficult to keep maintaining this functionality. Especially when there need to be more of these exceptions. Only using the transliterator with `Any-Latin; Latin-ASCII` seems to preserve the length of the comparing elements and allow for searching accented/special characters. There are characters that are not part of/exist in `Latin-ASCII`, however, these characters are probably never used in the setting of the association. --- composer.json | 1 - .../view/decision/decision/search.phtml | 17 +++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/composer.json b/composer.json index 5a0a673b43..bd9ba4982e 100644 --- a/composer.json +++ b/composer.json @@ -15,7 +15,6 @@ "ext-exif": "*", "ext-fileinfo": "*", "ext-gd": "*", - "ext-iconv": "*", "ext-intl": "*", "ext-imagick": "^3.5.0", "ext-mbstring": "*", diff --git a/module/Decision/view/decision/decision/search.phtml b/module/Decision/view/decision/decision/search.phtml index 50230f36e7..800fd63c23 100644 --- a/module/Decision/view/decision/decision/search.phtml +++ b/module/Decision/view/decision/decision/search.phtml @@ -23,25 +23,18 @@ function highlightSearch( string $decision, string $search, ): string { - // Convert the decision to something that is easily searchable (i.e. it MUST NOT contain any non-ASCII characters). - $transliteratedDecision = iconv( - 'UTF-8', - 'ASCII//TRANSLIT//IGNORE', - transliterator_transliterate('Any-Latin; Latin-ASCII', $decision), - ); + // Convert the decision to something that is easily searchable (i.e. it MUST contain only Latin-ASCII characters). + $transliteratedDecision = transliterator_transliterate('Any-Latin; Latin-ASCII', $decision); // Do the same for the search prompt, as otherwise searches WITH non-ASCII characters will not work. - $search = iconv( - 'UTF-8', - 'ASCII//TRANSLIT//IGNORE', - transliterator_transliterate('Any-Latin; Latin-ASCII', $search), - ); + $search = transliterator_transliterate('Any-Latin; Latin-ASCII', $search); $offset = 0; $output = ''; $length = mb_strlen($search); // There is a very important assumption here; the transliterated version of the decision MUST be exactly as long as - // the original version. Otherwise, the insertion is done with an incorrect offset. + // the original version. Otherwise, the insertion is done with an incorrect offset. As such, using `iconv` is NOT + // good as it will either extend (e.g. `€` becomes `EUR`) or completely remove characters (`//IGNORE` option). while (false !== ($position = mb_stripos($transliteratedDecision, $search, $offset, 'UTF-8'))) { // Progressively insert markers into the original decision. $output .= sprintf('%s%s%s%s',