Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more flexibility to "languages" flag #1111

Merged
merged 1 commit into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -103,7 +104,7 @@ public class Planetiler {
private boolean overwrite = false;
private boolean ran = false;
// most common OSM languages
private List<String> languages = List.of(
private List<String> defaultLanguages = List.of(
"en", "ru", "ar", "zh", "ja", "ko", "fr",
"de", "fi", "pl", "es", "be", "br", "he"
);
Expand Down Expand Up @@ -547,7 +548,7 @@ public Planetiler addStage(String name, String description, RunnableThatThrows t
* @return this runner instance for chaining
*/
public Planetiler setDefaultLanguages(List<String> languages) {
this.languages = languages;
this.defaultLanguages = languages;
return this;
}

Expand Down Expand Up @@ -587,7 +588,13 @@ public Planetiler fetchWikidataNameTranslations(Path defaultWikidataCache) {
public Translations translations() {
if (translations == null) {
boolean transliterate = arguments.getBoolean("transliterate", "attempt to transliterate latin names", true);
List<String> languages = arguments.getList("languages", "languages to use", this.languages);
List<String> languages = arguments.getList("languages", "Languages to include labels for. \"default\" expands to the default set of languages configured by the profile. \"-lang\" excludes \"lang\". \"*\" includes every language not listed.", this.defaultLanguages);
if (languages.contains("default")) {
languages = Stream.concat(
languages.stream().filter(language -> !language.equals("default")),
this.defaultLanguages.stream()
).toList();
}
translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate);
}
return translations;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,31 @@ public class Translations {
ThreadLocal.withInitial(() -> new ThreadLocalTransliterator().getInstance("Any-Latin"));

private boolean shouldTransliterate = true;
private final Set<String> languageSet;
private final List<TranslationProvider> providers = new ArrayList<>();
private final Set<String> includeLanguages;
private final Set<String> excludeLanguages;
private boolean defaultInclude = false;

private Translations(List<String> languages) {
this.languageSet = new HashSet<>();
this.includeLanguages = new HashSet<>();
this.excludeLanguages = new HashSet<>();

for (String language : languages) {
if (language.equals("*")) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor but what about supporting "all" instead of or in addition to "*" so that you don't need to worry about bash expanding it as a wildcard?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, but all is a language code assigned to https://en.wikipedia.org/wiki/Allar_language. Any better ideas for a keyword?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good catch! I can't think of any better short name than "all" so "*" seems good for now!

defaultInclude = true;
continue;
}

boolean include = true;
if (language.startsWith("-")) {
language = language.replaceFirst("^-", "");
include = false;
}

String withoutPrefix = language.replaceFirst("^name:", "");
languageSet.add(withoutPrefix);
languageSet.add("name:" + withoutPrefix);
Set<String> set = include ? this.includeLanguages : this.excludeLanguages;
set.add(withoutPrefix);
set.add("name:" + withoutPrefix);
}
}

Expand Down Expand Up @@ -85,7 +101,7 @@ public void addTranslations(Map<String, Object> output, Map<String, Object> inpu
if (translations != null && !translations.isEmpty()) {
for (var entry : translations.entrySet()) {
String key = entry.getKey();
if (languageSet.contains(key)) {
if (careAboutLanguage(key)) {
output.putIfAbsent(key.startsWith("name:") ? key : "name:" + key, entry.getValue());
}
}
Expand All @@ -105,7 +121,11 @@ public Translations setShouldTransliterate(boolean shouldTransliterate) {

/** Returns true if {@code language} is in the set of language translations to use. */
public boolean careAboutLanguage(String language) {
return languageSet.contains(language);
if (excludeLanguages.contains(language))
return false;
if (includeLanguages.contains(language))
return true;
return defaultInclude;
}

/** A source of name translations. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2714,6 +2714,17 @@ void testBoundFiltersFill() throws Exception {
assertTrue(polyResultz8.tiles.containsKey(TileCoord.ofXYZ(z8tiles * 3 / 4, z8tiles * 7 / 8, 8)));
}

@Test
void testDefaultLanguages() {
var planetiler = Planetiler.create(Arguments.of("languages", "default,en"))
.setDefaultLanguages(List.of("jbo", "tlh"));
var translations = planetiler.translations();
assertTrue(translations.careAboutLanguage("jbo"));
assertTrue(translations.careAboutLanguage("tlh"));
assertTrue(translations.careAboutLanguage("en"));
assertFalse(translations.careAboutLanguage("fr"));
}

@FunctionalInterface
private interface ReadableTileArchiveFactory {
ReadableTileArchive create(Path p) throws IOException;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
package com.onthegomap.planetiler.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class TranslationsTest {

Expand Down Expand Up @@ -33,4 +39,25 @@ void testTwoProvidersPrefersFirst() {
void testTransliterate() {
assertEquals("rì běn", Translations.transliterate("日本"));
}

@ParameterizedTest
@MethodSource("includeExcludeCases")
void testIncludeExclude(List<String> languages, List<String> shouldCare, List<String> shouldNotCare) {
var translations = Translations.nullProvider(languages);
for (var lang : shouldCare) {
assertTrue(translations.careAboutLanguage(lang));
}
for (var lang : shouldNotCare) {
assertFalse(translations.careAboutLanguage(lang));
}
}

private static Stream<Arguments> includeExcludeCases() {
return Stream.of(
Arguments.of(List.of("jbo", "tlh"), List.of("jbo", "tlh"), List.of("en", "fr")),
Arguments.of(List.of("*"), List.of("jbo", "tlh", "en", "fr"), List.of()),
Arguments.of(List.of("*", "-tlh"), List.of("jbo", "fr"), List.of("tlh")),
Arguments.of(List.of("tlh", "-tlh"), List.of(), List.of("tlh", "en"))
);
}
}
Loading