diff --git a/Lib/gflanguages/data/languages/bdh_Latn.textproto b/Lib/gflanguages/data/languages/bdh_Latn.textproto index 6e53be3f..7e7ccf6d 100644 --- a/Lib/gflanguages/data/languages/bdh_Latn.textproto +++ b/Lib/gflanguages/data/languages/bdh_Latn.textproto @@ -1,11 +1,12 @@ id: "bdh_Latn" language: "bdh" script: "Latn" -name: "Baka" +name: "Baka (DRC/South Sudan)" +autonym: "Tara Baká" population: 60000 region: "CD" region: "SS" exemplar_chars { base: "a A b B c C d D e E f F g G h H i I ị Ị ɨ Ɨ k K l L m M n N ṇ Ṇ o O p P r R ṛ Ṛ s S t T u U ụ Ụ v V ṿ Ṿ w W y Y z Z ꞌ Ꞌ" marks: "◌̣ ◌́" -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/beh_Latn.textproto b/Lib/gflanguages/data/languages/beh_Latn.textproto index 73661516..62dfbda2 100644 --- a/Lib/gflanguages/data/languages/beh_Latn.textproto +++ b/Lib/gflanguages/data/languages/beh_Latn.textproto @@ -1,10 +1,10 @@ id: "beh_Latn" language: "beh" script: "Latn" -name: "Baka" +name: "Biali" population: 100000 region: "BJ" exemplar_chars { base: "a A b B c C d D e E ə Ə f F g G h H i I k K l L m M n N o O p P r R s S t T u U w W y Y" auxiliary: "j J q Q v V x X z Z" -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/bgn_Arab.textproto b/Lib/gflanguages/data/languages/bgn_Arab.textproto index 56d4cb3f..d42af6e8 100644 --- a/Lib/gflanguages/data/languages/bgn_Arab.textproto +++ b/Lib/gflanguages/data/languages/bgn_Arab.textproto @@ -2,7 +2,7 @@ id: "bgn_Arab" language: "bgn" script: "Arab" name: "Western Balochi" -preferred_name: "Balochi" +#preferred_name: "Balochi" population: 2037382 region: "AF" region: "IR" diff --git a/Lib/gflanguages/data/languages/bkc_Latn.textproto b/Lib/gflanguages/data/languages/bkc_Latn.textproto index e761d8cf..02b8bca3 100644 --- a/Lib/gflanguages/data/languages/bkc_Latn.textproto +++ b/Lib/gflanguages/data/languages/bkc_Latn.textproto @@ -1,11 +1,11 @@ id: "bkc_Latn" language: "bkc" script: "Latn" -name: "Baka" +name: "Baka (Cameroon/Gabon)" population: 71000 region: "CM" region: "GA" exemplar_chars { base: "a A b B ɓ Ɓ d D ɗ Ɗ e E ɛ Ɛ f F g G h H i I j J k K l L m M n N o O ɔ Ɔ s S t T u U w W y Y" auxiliary: "c C p P q Q r R v V x X z Z" -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/bsc_Latn_GN.textproto b/Lib/gflanguages/data/languages/bsc_Latn_GN.textproto index 97d08bd4..1048b22f 100644 --- a/Lib/gflanguages/data/languages/bsc_Latn_GN.textproto +++ b/Lib/gflanguages/data/languages/bsc_Latn_GN.textproto @@ -1,7 +1,7 @@ id: "bsc_Latn_GN" language: "bsc" script: "Latn" -name: "Bassari" +name: "Guinean Bassari" autonym: "oneyan" population: 18000 region: "GN" diff --git a/Lib/gflanguages/data/languages/bsq_Bass.textproto b/Lib/gflanguages/data/languages/bsq_Bass.textproto index 65c182d7..8b9b529e 100644 --- a/Lib/gflanguages/data/languages/bsq_Bass.textproto +++ b/Lib/gflanguages/data/languages/bsq_Bass.textproto @@ -1,7 +1,7 @@ id: "bsq_Bass" language: "bsq" script: "Bass" -name: "Bassa" +name: "Bassa, Vah" population: 410000 region: "LR" region: "SL" diff --git a/Lib/gflanguages/data/languages/bsq_Latn.textproto b/Lib/gflanguages/data/languages/bsq_Latn.textproto index ee3a76d8..7e6d576d 100644 --- a/Lib/gflanguages/data/languages/bsq_Latn.textproto +++ b/Lib/gflanguages/data/languages/bsq_Latn.textproto @@ -1,7 +1,7 @@ id: "bsq_Latn" language: "bsq" script: "Latn" -name: "Bassa" +name: "Bassa, Latin" population: 410000 region: "LR" region: "SL" @@ -11,4 +11,4 @@ exemplar_chars { auxiliary: "l L q Q r R x X y Y z Z" } source: "“Bassa dictionary”, Christian Education Foundation in Liberia (CEFL), https://cefliberia.org/bassa/bassa-dictionary/" -source: "Ɓǎsɔ́ɔ̀ Báɓòɔ̀, Bible Society Liberia, 2002" \ No newline at end of file +source: "Ɓǎsɔ́ɔ̀ Báɓòɔ̀, Bible Society Liberia, 2002" diff --git a/Lib/gflanguages/data/languages/crh_Cyrl.textproto b/Lib/gflanguages/data/languages/crh_Cyrl.textproto index 3af416fb..ce6648b4 100644 --- a/Lib/gflanguages/data/languages/crh_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/crh_Cyrl.textproto @@ -1,7 +1,7 @@ id: "crh_Cyrl" language: "crh" script: "Cyrl" -name: "Crimean Turkish" +name: "Crimean Turkish, Cyrillic" autonym: "Къырымтатар" population: 245968 region: "UA" diff --git a/Lib/gflanguages/data/languages/dnj_Latn_LR.textproto b/Lib/gflanguages/data/languages/dnj_Latn_LR.textproto index d665f96e..73290077 100644 --- a/Lib/gflanguages/data/languages/dnj_Latn_LR.textproto +++ b/Lib/gflanguages/data/languages/dnj_Latn_LR.textproto @@ -1,7 +1,7 @@ id: "dnj_Latn_LR" language: "dnj" script: "Latn" -name: "Dan" +name: "Liberian Dan" autonym: "Gio" population: 1099244 region: "LR" diff --git a/Lib/gflanguages/data/languages/evn_Latn.textproto b/Lib/gflanguages/data/languages/evn_Latn.textproto index 756f5dbc..937e9a9b 100644 --- a/Lib/gflanguages/data/languages/evn_Latn.textproto +++ b/Lib/gflanguages/data/languages/evn_Latn.textproto @@ -1,7 +1,7 @@ id: "evn_Latn" language: "evn" script: "Latn" -name: "Evenki" +name: "Evenki, Latin" population: 16000 region: "RU" region: "CN" diff --git a/Lib/gflanguages/data/languages/kr_Arab.textproto b/Lib/gflanguages/data/languages/kr_Arab.textproto index e051c89f..4621a632 100644 --- a/Lib/gflanguages/data/languages/kr_Arab.textproto +++ b/Lib/gflanguages/data/languages/kr_Arab.textproto @@ -1,4 +1,4 @@ id: "kr_Arab" language: "kr" script: "Arab" -name: "Kanuri" +name: "Kanuri, Arabic" diff --git a/Lib/gflanguages/data/languages/mlt_Latn.textproto b/Lib/gflanguages/data/languages/mlt_Latn.textproto deleted file mode 100644 index 21f8b767..00000000 --- a/Lib/gflanguages/data/languages/mlt_Latn.textproto +++ /dev/null @@ -1,11 +0,0 @@ -id: "mlt_Latn" -language: "mlt" -script: "Latn" -name: "Maltese" -population: 530000 -region: "MT" -exemplar_chars { - base: "a A à À b B ċ Ċ d D e E è È f F ġ Ġ g G h H ħ Ħ i I ì Ì j J k K l L m M n N o O ò Ò p P q Q r R s S t T u U ù Ù v V w W x X ż Ż z Z" - marks: "◌̀ ◌̇" - auxiliary: "c C y Y" -} \ No newline at end of file diff --git a/Lib/gflanguages/data/languages/sa_Nand.textproto b/Lib/gflanguages/data/languages/sa_Nand.textproto index cd2c12ca..80b4c563 100644 --- a/Lib/gflanguages/data/languages/sa_Nand.textproto +++ b/Lib/gflanguages/data/languages/sa_Nand.textproto @@ -2,7 +2,7 @@ id: "sa_Nand" language: "sa" script: "Nand" -name: "Sanskrit" +name: "Sanskrit, Nandinagari" autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠" region: "IN" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/uma_Latn.textproto b/Lib/gflanguages/data/languages/uma_Latn.textproto index ba40ae58..eea6d698 100644 --- a/Lib/gflanguages/data/languages/uma_Latn.textproto +++ b/Lib/gflanguages/data/languages/uma_Latn.textproto @@ -1,4 +1,5 @@ id: "uma_Latn" +language: "uma" script: "Latn" name: "Umatilla" population: 25 diff --git a/Lib/gflanguages/data/languages/wal_Ethi.textproto b/Lib/gflanguages/data/languages/wal_Ethi.textproto index de9c0d25..b5c7fbe6 100644 --- a/Lib/gflanguages/data/languages/wal_Ethi.textproto +++ b/Lib/gflanguages/data/languages/wal_Ethi.textproto @@ -1,6 +1,6 @@ id: "wal_Ethi" language: "wal" script: "Ethi" -name: "Wolaytta" +name: "Wolaytta, Ethiopic" population: 1946034 region: "ET" diff --git a/Lib/gflanguages/data/languages/wal_Latn.textproto b/Lib/gflanguages/data/languages/wal_Latn.textproto index 5804b209..c6676a56 100644 --- a/Lib/gflanguages/data/languages/wal_Latn.textproto +++ b/Lib/gflanguages/data/languages/wal_Latn.textproto @@ -1,9 +1,9 @@ id: "wal_Latn" language: "wal" script: "Latn" -name: "Wolaytta" +name: "Wolaytta, Latin" population: 7000000 region: "ET" exemplar_chars { base: "a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z" -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/xsm_Latn_BF.textproto b/Lib/gflanguages/data/languages/xsm_Latn_BF.textproto index ce9282dc..98332911 100644 --- a/Lib/gflanguages/data/languages/xsm_Latn_BF.textproto +++ b/Lib/gflanguages/data/languages/xsm_Latn_BF.textproto @@ -1,7 +1,7 @@ id: "xsm_Latn_BF" language: "xsm" script: "Latn" -name: "Kasem" +name: "Burkinabè Kasem" population: 250000 region: "BF" exemplar_chars { diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 315275d9..25a22dd3 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -265,3 +265,18 @@ def test_exemplar_parser(): "l", "̍", } + + +def test_language_uniqueness(): + names = Counter([]) + for lang in LANGUAGES.values(): + # We check that names are unique *within a script* since + # when we display them in a menu we segment that menu by + # script and then by language + if lang.preferred_name: + names[lang.script + "/" + lang.preferred_name] += 1 + else: + names[lang.name + "/" + lang.preferred_name] += 1 + if any(count > 1 for count in names.values()): + duplicates = {name: count for name, count in names.items() if count > 1} + pytest.fail(f"Duplicate language names: {duplicates}") diff --git a/tests/test_parsable.py b/tests/test_parsable.py index 8f32a2ee..f209bc3b 100644 --- a/tests/test_parsable.py +++ b/tests/test_parsable.py @@ -7,9 +7,16 @@ languages_dir = os.path.join(DATA_DIR, "languages") -textproto_files = [os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))] +textproto_files = [ + os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto")) +] + @pytest.mark.parametrize("lang_code", textproto_files) def test_parsable(lang_code): with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f: - text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) + msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) + assert msg.id + assert msg.language + assert msg.script + assert msg.population is not None