diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java index be64e06ae1..76b7af55c4 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/LanguageUtils.java @@ -4,7 +4,6 @@ import java.util.Set; import java.util.function.Predicate; import java.util.regex.Pattern; -import java.util.stream.Stream; public class LanguageUtils { // Name tags that should be eligible for finding a latin name. @@ -39,6 +38,10 @@ public static String nullIfEmpty(String a) { return (a == null || a.isEmpty()) ? null : a; } + /** + * @deprecated Use {@code OmtLanguageUtils.string()} + */ + @Deprecated(forRemoval = true) public static String string(Object obj) { return nullIfEmpty(obj == null ? null : obj.toString()); } @@ -47,6 +50,10 @@ public static boolean containsOnlyLatinCharacters(String string) { return string != null && ONLY_LATIN.test(string); } + /** + * @deprecated Use {@code Translations.transliterate(string(tags.get("name")))} + */ + @Deprecated(forRemoval = true) public static String transliteratedName(Map tags) { return Translations.transliterate(string(tags.get("name"))); } @@ -73,26 +80,4 @@ public static boolean isValidOsmNameTag(String tag) { return VALID_NAME_TAGS.test(tag); } - public static String getLatinName(Map tags, boolean transliterate) { - String name = string(tags.get("name")); - if (containsOnlyLatinCharacters(name)) { - return name; - } else { - return getNameTranslations(tags) - .filter(LanguageUtils::containsOnlyLatinCharacters) - .findFirst() - .orElse(transliterate ? Translations.transliterate(name) : null); - } - } - - - private static Stream getNameTranslations(Map tags) { - return Stream.concat( - Stream.of("name:en", "int_name", "name:de").map(tag -> string(tags.get(tag))), - tags.entrySet().stream() - .filter(e -> !EN_DE_NAME_KEYS.contains(e.getKey()) && VALID_NAME_TAGS.test(e.getKey())) - .map(Map.Entry::getValue) - .map(LanguageUtils::string) - ); - } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java index 341d95d00b..049dc3f76d 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/LanguageUtilsTest.java @@ -1,44 +1,12 @@ package com.onthegomap.planetiler.util; -import static com.onthegomap.planetiler.util.LanguageUtils.containsOnlyLatinCharacters; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.Map; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; -import org.junit.jupiter.params.provider.ValueSource; class LanguageUtilsTest { - @ParameterizedTest - @CsvSource({ - "abc, true", - "5!, true", - "5~, true", - "é, true", - "éś, true", - "ɏə, true", - "ɐ, true", - "ᵿἀ, false", - "Ḁỿ, true", - "\u02ff\u0370, false", - "\u0030\u036f, true", - "日本, false", - "abc本123, false", - }) - void testIsLatin(String in, boolean isLatin) { - if (!isLatin) { - assertFalse(containsOnlyLatinCharacters(in)); - } else { - assertEquals(in, LanguageUtils.getLatinName(Map.of( - "name", in - ), true)); - } - } - @ParameterizedTest @CsvSource(value = { "null,null", @@ -58,102 +26,28 @@ void testRemoveNonLatin(String in, String out) { } @ParameterizedTest - @ValueSource(strings = { - // OSM tags that SHOULD be eligible for name:latin feature in the output - "name:en", - "name:en-US", - "name:en-010", - "int_name", - "name:fr", - "name:es", - "name:pt", - "name:de", - "name:ar", - "name:it", - "name:ko-Latn", - "name:be-tarask", - // https://wiki.openstreetmap.org/wiki/Multilingual_names#Japan - "name:ja", - "name:ja-Latn", - "name:ja_rm", - "name:ja_kana", - // https://wiki.openstreetmap.org/wiki/Multilingual_names#China - "name:zh-CN", - "name:zh-hant-CN", - "name:zh_pinyin", - "name:zh_zhuyin", - "name:zh-Latn-tongyong", - "name:zh-Latn-pinyin", - "name:zh-Latn-wadegiles", - "name:yue-Latn-jyutping", - // https://wiki.openstreetmap.org/wiki/Multilingual_names#France - "name:fr", - "name:fr-x-gallo", - "name:br", - "name:oc", - "name:vls", - "name:frp", - "name:gcf", - "name:gsw", - }) - void testLatinFallbacks(String key) { - if (key.startsWith("name:")) { - assertTrue(LanguageUtils.isValidOsmNameTag(key)); - } - assertEquals("a", LanguageUtils.getLatinName(Map.of( - key, "a" - ), true)); - assertNull(LanguageUtils.getLatinName(Map.of( - key, "ア" - ), true)); - assertNull(LanguageUtils.getLatinName(Map.of( - key, "غ" - ), true)); - } - - @ParameterizedTest - @ValueSource(strings = { - // OSM tags that should NOT be eligible for name:latin feature in the output - "name:signed", - "name:prefix", - "name:abbreviation", - "name:source", - "name:full", - "name:adjective", - "name:proposed", - "name:pronunciation", - "name:etymology", - "name:etymology:wikidata", - "name:etymology:wikipedia", - "name:etymology:right", - "name:etymology:left", - "name:genitive", - }) - void testNoLatinFallback(String key) { - assertFalse(LanguageUtils.isValidOsmNameTag(key)); - assertEquals("Branch Hill–Loveland Road", LanguageUtils.getLatinName(Map.of( - "name", "Branch Hill–Loveland Road", - key, "Q22133584;Q843993" - ), true)); - assertEquals("rì", LanguageUtils.getLatinName(Map.of( - "name", "日", - key, "other" - ), true)); - } - - @ParameterizedTest - @CsvSource({ - "キャンパス, kyanpasu", - "Αλφαβητικός Κατάλογος, Alphabētikós Katálogos", - "биологическом, biologičeskom", - }) - void testTransliterate(String in, String out) { - assertEquals(out, LanguageUtils.getLatinName(Map.of( - "name", in - ), true)); - assertNull(LanguageUtils.getLatinName(Map.of( - "name", in - ), false)); + @CsvSource(value = { + "name:es, true", + "name:en-US, true", + "name:fr-x-gallo, true", + "name:ko-Latn, true", + "name:be-tarask, true", + "name:ja_rm, true", + "name:ja_kana, true", + "name:vls, true", + "name:zh-hant-CN, true", + "name:zh_pinyin, true", + "name:zh_zhuyin, true", + "name:zh-Latn-tongyong, true", + "name:zh-Latn-pinyin, true", + "name:zh-Latn-wadegiles, true", + "name:yue-Latn-jyutping, true", + "nombre, false", + "name:, false", + "name:xxxxx, false", + }, nullValues = "null") + void testIsValidOsmNameTag(String in, boolean out) { + assertEquals(out, LanguageUtils.isValidOsmNameTag(in)); } }