From 1112f7645f72a306c7c26d429d0761378dfb7563 Mon Sep 17 00:00:00 2001 From: Theodore Dubois Date: Sun, 24 Nov 2024 12:20:37 -0800 Subject: [PATCH] Add more flexibility to "languages" flag With this, the flag supports: - "default" to append the default set of languages, instead of overriding it entirely, e.g. --languages=default,tlh means all default languages *plus* Klingon. - "-lang" to exclude languages, e.g. --languages=default,-en means all default languages *except* English. - "*" to include all languages not specified. This can be combined with "-lang". e.g. --languages=*,-jbo means all languages *except* Lojban. --- .../com/onthegomap/planetiler/Planetiler.java | 13 ++++++-- .../planetiler/util/Translations.java | 32 +++++++++++++++---- .../planetiler/PlanetilerTests.java | 11 +++++++ .../planetiler/util/TranslationsTest.java | 27 ++++++++++++++++ 4 files changed, 74 insertions(+), 9 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index d87fe774d0..47b250879c 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -48,6 +48,7 @@ import java.util.function.Function; import java.util.regex.Pattern; import java.util.stream.IntStream; +import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -103,7 +104,7 @@ public class Planetiler { private boolean overwrite = false; private boolean ran = false; // most common OSM languages - private List languages = List.of( + private List defaultLanguages = List.of( "en", "ru", "ar", "zh", "ja", "ko", "fr", "de", "fi", "pl", "es", "be", "br", "he" ); @@ -547,7 +548,7 @@ public Planetiler addStage(String name, String description, RunnableThatThrows t * @return this runner instance for chaining */ public Planetiler setDefaultLanguages(List languages) { - this.languages = languages; + this.defaultLanguages = languages; return this; } @@ -587,7 +588,13 @@ public Planetiler fetchWikidataNameTranslations(Path defaultWikidataCache) { public Translations translations() { if (translations == null) { boolean transliterate = arguments.getBoolean("transliterate", "attempt to transliterate latin names", true); - List languages = arguments.getList("languages", "languages to use", this.languages); + List languages = arguments.getList("languages", "Languages to include labels for. \"default\" expands to the default set of languages configured by the profile. \"-lang\" excludes \"lang\". \"*\" includes every language not listed.", this.defaultLanguages); + if (languages.contains("default")) { + languages = Stream.concat( + languages.stream().filter(language -> !language.equals("default")), + this.defaultLanguages.stream() + ).toList(); + } translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate); } return translations; diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Translations.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Translations.java index 7e3c60b0f8..85c87deecf 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Translations.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Translations.java @@ -22,15 +22,31 @@ public class Translations { ThreadLocal.withInitial(() -> new ThreadLocalTransliterator().getInstance("Any-Latin")); private boolean shouldTransliterate = true; - private final Set languageSet; private final List providers = new ArrayList<>(); + private final Set includeLanguages; + private final Set excludeLanguages; + private boolean defaultInclude = false; private Translations(List languages) { - this.languageSet = new HashSet<>(); + this.includeLanguages = new HashSet<>(); + this.excludeLanguages = new HashSet<>(); + for (String language : languages) { + if (language.equals("*")) { + defaultInclude = true; + continue; + } + + boolean include = true; + if (language.startsWith("-")) { + language = language.replaceFirst("^-", ""); + include = false; + } + String withoutPrefix = language.replaceFirst("^name:", ""); - languageSet.add(withoutPrefix); - languageSet.add("name:" + withoutPrefix); + Set set = include ? this.includeLanguages : this.excludeLanguages; + set.add(withoutPrefix); + set.add("name:" + withoutPrefix); } } @@ -85,7 +101,7 @@ public void addTranslations(Map output, Map inpu if (translations != null && !translations.isEmpty()) { for (var entry : translations.entrySet()) { String key = entry.getKey(); - if (languageSet.contains(key)) { + if (careAboutLanguage(key)) { output.putIfAbsent(key.startsWith("name:") ? key : "name:" + key, entry.getValue()); } } @@ -105,7 +121,11 @@ public Translations setShouldTransliterate(boolean shouldTransliterate) { /** Returns true if {@code language} is in the set of language translations to use. */ public boolean careAboutLanguage(String language) { - return languageSet.contains(language); + if (excludeLanguages.contains(language)) + return false; + if (includeLanguages.contains(language)) + return true; + return defaultInclude; } /** A source of name translations. */ diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java index 0be94acf28..7116edccbb 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java @@ -2714,6 +2714,17 @@ void testBoundFiltersFill() throws Exception { assertTrue(polyResultz8.tiles.containsKey(TileCoord.ofXYZ(z8tiles * 3 / 4, z8tiles * 7 / 8, 8))); } + @Test + void testDefaultLanguages() { + var planetiler = Planetiler.create(Arguments.of("languages", "default,en")) + .setDefaultLanguages(List.of("jbo", "tlh")); + var translations = planetiler.translations(); + assertTrue(translations.careAboutLanguage("jbo")); + assertTrue(translations.careAboutLanguage("tlh")); + assertTrue(translations.careAboutLanguage("en")); + assertFalse(translations.careAboutLanguage("fr")); + } + @FunctionalInterface private interface ReadableTileArchiveFactory { ReadableTileArchive create(Path p) throws IOException; diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TranslationsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TranslationsTest.java index 14291c8d8c..091efd95af 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TranslationsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TranslationsTest.java @@ -1,10 +1,16 @@ package com.onthegomap.planetiler.util; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.List; import java.util.Map; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; class TranslationsTest { @@ -33,4 +39,25 @@ void testTwoProvidersPrefersFirst() { void testTransliterate() { assertEquals("rì běn", Translations.transliterate("日本")); } + + @ParameterizedTest + @MethodSource("includeExcludeCases") + void testIncludeExclude(List languages, List shouldCare, List shouldNotCare) { + var translations = Translations.nullProvider(languages); + for (var lang : shouldCare) { + assertTrue(translations.careAboutLanguage(lang)); + } + for (var lang : shouldNotCare) { + assertFalse(translations.careAboutLanguage(lang)); + } + } + + private static Stream includeExcludeCases() { + return Stream.of( + Arguments.of(List.of("jbo", "tlh"), List.of("jbo", "tlh"), List.of("en", "fr")), + Arguments.of(List.of("*"), List.of("jbo", "tlh", "en", "fr"), List.of()), + Arguments.of(List.of("*", "-tlh"), List.of("jbo", "fr"), List.of("tlh")), + Arguments.of(List.of("tlh", "-tlh"), List.of(), List.of("tlh", "en")) + ); + } }