Skip to content

Commit

Permalink
Add more flexibility to "languages" flag
Browse files Browse the repository at this point in the history
With this, the flag supports:

- "default" to append the default set of languages, instead of
  overriding it entirely, e.g. --languages=default,tlh means all default
  languages *plus* Klingon.
- "-lang" to exclude languages, e.g. --languages=default,-en means all
  default languages *except* English.
- "*" to include all languages not specified. This can be combined with
  "-lang". e.g. --languages=*,-jbo means all languages *except* Lojban.
  • Loading branch information
tbodt committed Nov 24, 2024
1 parent 8a1b54f commit 1112f76
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -103,7 +104,7 @@ public class Planetiler {
private boolean overwrite = false;
private boolean ran = false;
// most common OSM languages
private List<String> languages = List.of(
private List<String> defaultLanguages = List.of(
"en", "ru", "ar", "zh", "ja", "ko", "fr",
"de", "fi", "pl", "es", "be", "br", "he"
);
Expand Down Expand Up @@ -547,7 +548,7 @@ public Planetiler addStage(String name, String description, RunnableThatThrows t
* @return this runner instance for chaining
*/
public Planetiler setDefaultLanguages(List<String> languages) {
this.languages = languages;
this.defaultLanguages = languages;
return this;
}

Expand Down Expand Up @@ -587,7 +588,13 @@ public Planetiler fetchWikidataNameTranslations(Path defaultWikidataCache) {
public Translations translations() {
if (translations == null) {
boolean transliterate = arguments.getBoolean("transliterate", "attempt to transliterate latin names", true);
List<String> languages = arguments.getList("languages", "languages to use", this.languages);
List<String> languages = arguments.getList("languages", "Languages to include labels for. \"default\" expands to the default set of languages configured by the profile. \"-lang\" excludes \"lang\". \"*\" includes every language not listed.", this.defaultLanguages);
if (languages.contains("default")) {
languages = Stream.concat(
languages.stream().filter(language -> !language.equals("default")),
this.defaultLanguages.stream()
).toList();
}
translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate);
}
return translations;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,31 @@ public class Translations {
ThreadLocal.withInitial(() -> new ThreadLocalTransliterator().getInstance("Any-Latin"));

private boolean shouldTransliterate = true;
private final Set<String> languageSet;
private final List<TranslationProvider> providers = new ArrayList<>();
private final Set<String> includeLanguages;
private final Set<String> excludeLanguages;
private boolean defaultInclude = false;

private Translations(List<String> languages) {
this.languageSet = new HashSet<>();
this.includeLanguages = new HashSet<>();
this.excludeLanguages = new HashSet<>();

for (String language : languages) {
if (language.equals("*")) {
defaultInclude = true;
continue;
}

boolean include = true;
if (language.startsWith("-")) {
language = language.replaceFirst("^-", "");
include = false;
}

String withoutPrefix = language.replaceFirst("^name:", "");
languageSet.add(withoutPrefix);
languageSet.add("name:" + withoutPrefix);
Set<String> set = include ? this.includeLanguages : this.excludeLanguages;
set.add(withoutPrefix);
set.add("name:" + withoutPrefix);
}
}

Expand Down Expand Up @@ -85,7 +101,7 @@ public void addTranslations(Map<String, Object> output, Map<String, Object> inpu
if (translations != null && !translations.isEmpty()) {
for (var entry : translations.entrySet()) {
String key = entry.getKey();
if (languageSet.contains(key)) {
if (careAboutLanguage(key)) {
output.putIfAbsent(key.startsWith("name:") ? key : "name:" + key, entry.getValue());
}
}
Expand All @@ -105,7 +121,11 @@ public Translations setShouldTransliterate(boolean shouldTransliterate) {

/** Returns true if {@code language} is in the set of language translations to use. */
public boolean careAboutLanguage(String language) {
return languageSet.contains(language);
if (excludeLanguages.contains(language))
return false;
if (includeLanguages.contains(language))
return true;
return defaultInclude;
}

/** A source of name translations. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2714,6 +2714,17 @@ void testBoundFiltersFill() throws Exception {
assertTrue(polyResultz8.tiles.containsKey(TileCoord.ofXYZ(z8tiles * 3 / 4, z8tiles * 7 / 8, 8)));
}

@Test
void testDefaultLanguages() {
var planetiler = Planetiler.create(Arguments.of("languages", "default,en"))
.setDefaultLanguages(List.of("jbo", "tlh"));
var translations = planetiler.translations();
assertTrue(translations.careAboutLanguage("jbo"));
assertTrue(translations.careAboutLanguage("tlh"));
assertTrue(translations.careAboutLanguage("en"));
assertFalse(translations.careAboutLanguage("fr"));
}

@FunctionalInterface
private interface ReadableTileArchiveFactory {
ReadableTileArchive create(Path p) throws IOException;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
package com.onthegomap.planetiler.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class TranslationsTest {

Expand Down Expand Up @@ -33,4 +39,25 @@ void testTwoProvidersPrefersFirst() {
void testTransliterate() {
assertEquals("rì běn", Translations.transliterate("日本"));
}

@ParameterizedTest
@MethodSource("includeExcludeCases")
void testIncludeExclude(List<String> languages, List<String> shouldCare, List<String> shouldNotCare) {
var translations = Translations.nullProvider(languages);
for (var lang : shouldCare) {
assertTrue(translations.careAboutLanguage(lang));
}
for (var lang : shouldNotCare) {
assertFalse(translations.careAboutLanguage(lang));
}
}

private static Stream<Arguments> includeExcludeCases() {
return Stream.of(
Arguments.of(List.of("jbo", "tlh"), List.of("jbo", "tlh"), List.of("en", "fr")),
Arguments.of(List.of("*"), List.of("jbo", "tlh", "en", "fr"), List.of()),
Arguments.of(List.of("*", "-tlh"), List.of("jbo", "fr"), List.of("tlh")),
Arguments.of(List.of("tlh", "-tlh"), List.of(), List.of("tlh", "en"))
);
}
}

0 comments on commit 1112f76

Please sign in to comment.