Skip to content

Commit b81eebb

Browse files
authored
Merge branch 'main' into soninke-exemplar
2 parents dd37ff8 + eaa9068 commit b81eebb

17 files changed

+351
-48
lines changed

.github/workflows/test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
strategy:
1616
fail-fast: false
1717
matrix:
18-
python-version: ["3.8", "3.9", "3.10", "3.11"]
18+
python-version: ["3.9", "3.10", "3.11", "3.12"]
1919
platform: [ubuntu-latest, windows-latest]
2020
steps:
2121
- uses: actions/checkout@v4

Cargo.toml

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[package]
2+
name = "google-fonts-languages"
3+
version = "0.7.1"
4+
edition = "2021"
5+
description = "Google Fonts script and language support data"
6+
repository = "https://github.com/googlefonts/lang"
7+
license-file = "LICENSE.txt"
8+
9+
[dependencies]
10+
bytes = "1.7.1"
11+
prost = "0.13"
12+
serde = { version = "1.0", features = ["derive"] }
13+
serde_json = "1.0"
14+
15+
[build-dependencies]
16+
prost-build = "0.13"
17+
protobuf-support = "3.7.1"
18+
protobuf = "3.7.1"
19+
protobuf-parse = "3.7.1"
20+
glob = "0"
21+
prettyplease = "0.2"
22+
quote = "1.0"
23+
proc-macro2 = "1.0"
24+
syn = "2.0"
25+
itertools = "0.13"
26+
serde_json = "1.0"

Lib/gflanguages/__init__.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,15 @@
2222
import glob
2323
import os
2424
import unicodedata
25+
import sys
2526

2627
from gflanguages import languages_public_pb2
2728
from google.protobuf import text_format
28-
from importlib_resources import files
29+
30+
if sys.version_info < (3, 10):
31+
from importlib_resources import files
32+
else:
33+
from importlib.resources import files
2934

3035
try:
3136
from ._version import version as __version__ # type: ignore

Lib/gflanguages/data/languages/agq_Latn.textproto

+19-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,25 @@ autonym: "Wum"
66
population: 38843
77
region: "CM"
88
exemplar_chars {
9-
base: "a A à À â Â ǎ Ǎ ā Ā b B c C d D e E è È ê Ê ě Ě ē Ē ɛ Ɛ {ɛ̀} {Ɛ̀} {ɛ̂} {Ɛ̂} {ɛ̌} {Ɛ̌} {ɛ̄} {Ɛ̄} f F g G h H i I ì Ì î Î ǐ Ǐ ī Ī ɨ Ɨ {ɨ̀} {Ɨ̀} {ɨ̂} {Ɨ̂} {ɨ̌} {Ɨ̌} {ɨ̄} {Ɨ̄} k K l L m M n N ŋ Ŋ o O ò Ò ô Ô ǒ Ǒ ō Ō ɔ Ɔ {ɔ̀} {Ɔ̀} {ɔ̂} {Ɔ̂} {ɔ̌} {Ɔ̌} {ɔ̄} {Ɔ̄} p P s S t T u U ù Ù û Û ǔ Ǔ ū Ū ʉ Ʉ {ʉ̀} {Ʉ̀} {ʉ̂} {Ʉ̂} {ʉ̌} {Ʉ̌} {ʉ̄} {Ʉ̄} v V w W y Y z Z ʔ"
10-
auxiliary: "q Q r R x X"
9+
base: "a A à À â Â ǎ Ǎ ā Ā b B c C d D e E è È ê Ê ě Ě ē Ē ɛ Ɛ {ɛ̀} {Ɛ̀} {ɛ̂} {Ɛ̂} {ɛ̌} {Ɛ̌} {ɛ̄} {Ɛ̄} f F g G h H i I ì Ì î Î ǐ Ǐ ī Ī ɨ Ɨ {ɨ̀} {Ɨ̀} {ɨ̂} {Ɨ̂} {ɨ̌} {Ɨ̌} {ɨ̄} {Ɨ̄} k K l L m M n N ŋ Ŋ o O ò Ò ô Ô ǒ Ǒ ō Ō ɔ Ɔ {ɔ̀} {Ɔ̀} {ɔ̂} {Ɔ̂} {ɔ̌} {Ɔ̌} {ɔ̄} {Ɔ̄} p P s S t T u U ù Ù û Û ǔ Ǔ ū Ū ʉ Ʉ {ʉ̀} {Ʉ̀} {ʉ̂} {Ʉ̂} {ʉ̌} {Ʉ̌} {ʉ̄} {Ʉ̄} v V w W y Y z Z ʼ"
10+
auxiliary: "q Q r R x X"
1111
marks: "◌̀ ◌̂ ◌̄ ◌̌"
1212
numerals: "- , % + 0 1 2 3 4 5 6 7 8 9"
13-
index: "A B C D E Ɛ F G H I Ɨ K L M N Ŋ O Ɔ P S T U Ʉ V W Y Z ʔ"
13+
index: "A B C D E Ɛ F G H I Ɨ K L M N Ŋ O Ɔ P S T U Ʉ V W Y Z ʼ"
1414
}
15+
sample_text {
16+
masthead_full: "Ɛ̀ɛ̀Ɨ̄ɨ̄"
17+
masthead_partial: "KƗ̀"
18+
styles: "A fɨ̀ lo tsɨgha ko nû, tɔ̀lɔki m bwìi ùfwɨn ghèe"
19+
tester: "A fɨ̀ bugho an tì li kō, ghùw ù lì ghe fɨ̀ tɔɔŋ tsɨ̀ghà enyɨa Kɨ̀zɨ̀ŋè"
20+
poster_sm: "kɨ̀zɨ̀ŋè"
21+
poster_md: "kɨ̀zɨ̀ŋè"
22+
poster_lg: "tɔ̀lɔki"
23+
specimen_48: "Mbùʼù kɨ̀ m mgbɛɛ bùghòo bwìin kòʼò tsɨ̀ghà ko wɨ̄n."
24+
specimen_36: "Tɔ̀lɔ̀ki n dzɛ̀ â mbùʼù kò enyɨa kǎ ghee lûum, è kê tsǒo bùghò gbɨ̀là ghù, tō."
25+
specimen_32: "A fɨ̀ lo alo enaʼ ghɨa ghɨ ntsòʼ ghɨ̌ n tughuu fughu, ghe n tum tɔ̀lɔki enyɨa è ndùw zue ntsòʼ a n lo mò dzì zɨ̀ ntsoʼ e te lɔ̄ʼso dàa."
26+
specimen_21: "A fɨ̀ bugho an tì li kō, ghùw ù lì ghe fɨ̀ tɔɔŋ tsɨ̀ghà enyɨa Kɨ̀zɨ̀ŋè. Kɨ̀zɨ̀ŋè fɨ̀ lo êzɨ̀, ghe fɨ̀ tɔŋɔ wɨ̄n enyɨa kɨ̀zɨ̀ŋè bòʼ enyɨa ufʉghà ù dzɨ̀m u fɨ̀ lo àzɨ̀ŋo wɨ̄n. A fɨ̀ lo tsɨgha ko nû, tɔ̀lɔki m bwìi ùfwɨn ghèe mbùʼù kò. Mbùʼù kɨ̀ m mgbɛɛ bùghòo bwìin kòʼò tsɨ̀ghà ko wɨ̄n. Ù mo᷇, “ŋ ghûw fɨ̀n, N sô mgbɛɛ bùghoo bwìin kôʼò ko. Ghù lô mùghò bùgho bwìin kòʼò zɨn a?”"
27+
specimen_16: "A fɨ̀ bugho an tì li kō, ghùw ù lì ghe fɨ̀ tɔɔŋ tsɨ̀ghà enyɨa Kɨ̀zɨ̀ŋè. Kɨ̀zɨ̀ŋè fɨ̀ lo êzɨ̀, ghe fɨ̀ tɔŋɔ wɨ̄n enyɨa kɨ̀zɨ̀ŋè bòʼ enyɨa ufʉghà ù dzɨ̀m u fɨ̀ lo àzɨ̀ŋo wɨ̄n. A fɨ̀ lo tsɨgha ko nû, tɔ̀lɔki m bwìi ùfwɨn ghèe mbùʼù kò. Mbùʼù kɨ̀ m mgbɛɛ bùghòo bwìin kòʼò tsɨ̀ghà ko wɨ̄n. Ù mo᷇, “ŋ ghûw fɨ̀n, N sô mgbɛɛ bùghoo bwìin kôʼò ko. Ghù lô mùghò bùgho bwìin kòʼò zɨn a?” Tɔ̀lɔ̀ki n dzɛ̀ â mbùʼù kò enyɨa kǎ ghee lûum, è kê tsǒo bùghò gbɨ̀là ghù, tō. E kê bùghò gbɨ̀ɨn ghù, E ndôʼò dzàa zìi yò tsuʼ fɨla e kê bughoo a te alô ghù. A fɨ̀ lo alo enaʼ ghɨa ghɨ ntsòʼ ghɨ̌ n tughuu fughu, ghe n tum tɔ̀lɔki enyɨa è ndùw zue ntsòʼ a n lo mò dzì zɨ̀ ntsoʼ e te lɔ̄ʼso dàa."
28+
}
29+
source: "Aghem alphabet, Aghem Language Development Committee “ALDEC” Wum, North West Province Republic of Cameroon, 2010"
30+
source: "Robyn Hackett, Once upon a Time: A Collection of Five Aghem Folktales, SIL, 2013"

Lib/gflanguages/data/languages/apd_Latn.textproto

+10-4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,14 @@ population: 33000000
66
region: "ER"
77
region: "SD"
88
region: "SS"
9+
historical: true
910
exemplar_chars {
10-
base: "a A b B d D ḍ Ḍ f F g G h H ḥ Ḥ i I j J k K x X l L m M n N r R s S ṣ Ṣ t T ṭ Ṭ u U w W y Y z Z ẓ Ẓ"
11-
marks: "◌̣ ◌͟"
12-
auxiliary: "c C e E o O p P q Q v V"
13-
}
11+
base: "a A b B d D ḍ Ḍ f F g G h H ḥ Ḥ i I j J k K ḵ Ḵ x X l L m M n N q Q r R s S ṣ Ṣ t T ṭ Ṭ u U w W y Y z Z ẓ Ẓ ‘ ’"
12+
marks: "◌̄ ◌̣ ◌̱ ◌͟"
13+
auxiliary: "c C e E o O p P q Q v V ʾ ʿ ʻ ʼ"
14+
}
15+
note: "Romanized Sudanese Arabic is mainly used in historical Bible translations or in some teaching material. In other contexts, Modern Standard Arabic or Sudanese Arabic written with the Arabic script are used. Eluzai 1993 in Hartell 1993 gives 3 Romanizations: one used in the BAM 1978 New Testament, one used by Hillelson 1925 and one used by Persson & Persson 1979. More recent editions like Persson & Personn 2017 use a different Romanization. Digitized versions of the BFBF 1927 mistakenly use two U+0320 or U+0331 instead of a single U+035F. Persson & Persson 2017 uses half rings U+02BE ʾ and U+02BF ʿ, Hillelson 1925 uses U+2018 ‘ and U+2019 ’(as in digitized BFBS 1927) or U+02BB ʻ and U+02BC ʼ."
16+
source: "Bible Alliance Mission (BAM), Kitaab al [Vahd] aj Jadid, 1978"
17+
source: "British and Foreign Bible Society (BFBS), Ingīl Marqus, 1927"
18+
source: "Moka Yokwe Eluzai, “Soudan”, in Rhonda L. Hartell, Alphabets des langues africaines, Dakar: Unesco, SIL, 1993, pp. 265-277"
19+
source: "Sigmar Hillelson, Sudan Arabic English-Arabic Vocabulary, London: Sudan Government, 1925"

0 commit comments

Comments
 (0)