diff --git a/nisaba/scripts/natural_translit/language_params/en.py b/nisaba/scripts/natural_translit/language_params/en.py index a61a7324..0f8963c3 100644 --- a/nisaba/scripts/natural_translit/language_params/en.py +++ b/nisaba/scripts/natural_translit/language_params/en.py @@ -226,8 +226,9 @@ def _latn_inventory() -> g.Grapheme.Inventory: gr.make_iterable_suppl( 'consonant', *consonants, *(c.upper for c in consonants) ) + gr.import_graphemes(*latn.number, list_alias='number') return gr.sync_atomics( - [gr.upper, gr.lower, gr.letter, gr.vowel, gr.consonant] + [gr.upper, gr.lower, gr.letter, gr.vowel, gr.consonant, gr.number] ) diff --git a/nisaba/scripts/natural_translit/script/grapheme.py b/nisaba/scripts/natural_translit/script/grapheme.py index 0c98d9f1..84e7b063 100644 --- a/nisaba/scripts/natural_translit/script/grapheme.py +++ b/nisaba/scripts/natural_translit/script/grapheme.py @@ -73,6 +73,7 @@ def _grapheme_features() -> ft.Feature.Inventory: Script('br', 'Brahmic Parent', 801), ) ), + f.Aspect(f.equidistant('gr_class', f('letter'), f('number'))), f.Aspect(f.equidistant('case', f('upper'), f('lower'))), ) return ftr @@ -132,7 +133,7 @@ def from_char( name = unicodedata.name(character) except ValueError: name = 'GRAPHEME' - name += ' U+' + code_hex.upper()[2:] + name += ' U+' + code_hex.upper()[2:].rjust(4, '0') return cls( alias=alias, text=character, diff --git a/nisaba/scripts/natural_translit/script/grapheme_test.py b/nisaba/scripts/natural_translit/script/grapheme_test.py index f3ce5609..9d73eca5 100644 --- a/nisaba/scripts/natural_translit/script/grapheme_test.py +++ b/nisaba/scripts/natural_translit/script/grapheme_test.py @@ -25,7 +25,8 @@ def _test_inventory() -> _G.Inventory: gr_inv = _G.Inventory(_G.GR_FEATURES.script.latn) gr_inv.add_graphemes( # Raw - _G.from_char('a', 'a'), + _G.from_char('a', 'a', {_G.GR_FEATURES.gr_class.letter}), + _G.from_char('1', 'one', {_G.GR_FEATURES.gr_class.number}), # Abstract with custom text _G('nasal', '~'), # Abstract with no text @@ -158,7 +159,7 @@ def test_parse(self): def test_grapheme_description(self): self.assertEqual( _TEST_INVENTORY.a.description(), - 'alias: a\traw: a\tname: LATIN SMALL LETTER A U+61', + 'alias: a\traw: a\tname: LATIN SMALL LETTER A U+0061', ) self.assertEqual( _TEST_INVENTORY.nasal.description(), @@ -169,6 +170,11 @@ def test_grapheme_description(self): 'alias: ch_1\ttext: ch_1\tname: ch_1', ) + def test_grapheme_class(self): + self.AssertHasFeature(_TEST_INVENTORY.a, _G.GR_FEATURES.gr_class.letter) + self.AssertHasFeature(_TEST_INVENTORY.one, _G.GR_FEATURES.gr_class.number) + self.AssertHasFeature(_TEST_INVENTORY.nasal, _G.GR_FEATURES.gr_class.any) + def test_import_graphemes(self): new_inv = _G.Inventory( _G.GR_FEATURES.script.latn, _G.LANGUAGE.en diff --git a/nisaba/scripts/natural_translit/script/inventories/docs/en_latn.md b/nisaba/scripts/natural_translit/script/inventories/docs/en_latn.md index 66f8514d..091e7f17 100644 --- a/nisaba/scripts/natural_translit/script/inventories/docs/en_latn.md +++ b/nisaba/scripts/natural_translit/script/inventories/docs/en_latn.md @@ -10,7 +10,297 @@ correspondences. ## Inventory: en_latn -### alias: a_upper raw: A name: LATIN CAPITAL LETTER A U+41 +### alias: zero raw: 0 name: DIGIT ZERO U+0030 + zero features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: one raw: 1 name: DIGIT ONE U+0031 + one features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: two raw: 2 name: DIGIT TWO U+0032 + two features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: three raw: 3 name: DIGIT THREE U+0033 + three features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: four raw: 4 name: DIGIT FOUR U+0034 + four features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: five raw: 5 name: DIGIT FIVE U+0035 + five features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: six raw: 6 name: DIGIT SIX U+0036 + six features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: seven raw: 7 name: DIGIT SEVEN U+0037 + seven features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: eight raw: 8 name: DIGIT EIGHT U+0038 + eight features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: nine raw: 9 name: DIGIT NINE U+0039 + nine features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: a_upper raw: A name: LATIN CAPITAL LETTER A U+0041 a_upper features: | aspects | values | @@ -36,9 +326,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: b_upper raw: B name: LATIN CAPITAL LETTER B U+42 +### alias: b_upper raw: B name: LATIN CAPITAL LETTER B U+0042 b_upper features: | aspects | values | @@ -64,9 +355,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: c_upper raw: C name: LATIN CAPITAL LETTER C U+43 +### alias: c_upper raw: C name: LATIN CAPITAL LETTER C U+0043 c_upper features: | aspects | values | @@ -92,9 +384,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: d_upper raw: D name: LATIN CAPITAL LETTER D U+44 +### alias: d_upper raw: D name: LATIN CAPITAL LETTER D U+0044 d_upper features: | aspects | values | @@ -120,9 +413,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: e_upper raw: E name: LATIN CAPITAL LETTER E U+45 +### alias: e_upper raw: E name: LATIN CAPITAL LETTER E U+0045 e_upper features: | aspects | values | @@ -148,9 +442,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: f_upper raw: F name: LATIN CAPITAL LETTER F U+46 +### alias: f_upper raw: F name: LATIN CAPITAL LETTER F U+0046 f_upper features: | aspects | values | @@ -176,9 +471,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: g_upper raw: G name: LATIN CAPITAL LETTER G U+47 +### alias: g_upper raw: G name: LATIN CAPITAL LETTER G U+0047 g_upper features: | aspects | values | @@ -204,9 +500,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: h_upper raw: H name: LATIN CAPITAL LETTER H U+48 +### alias: h_upper raw: H name: LATIN CAPITAL LETTER H U+0048 h_upper features: | aspects | values | @@ -232,9 +529,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: i_upper raw: I name: LATIN CAPITAL LETTER I U+49 +### alias: i_upper raw: I name: LATIN CAPITAL LETTER I U+0049 i_upper features: | aspects | values | @@ -260,9 +558,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: j_upper raw: J name: LATIN CAPITAL LETTER J U+4A +### alias: j_upper raw: J name: LATIN CAPITAL LETTER J U+004A j_upper features: | aspects | values | @@ -288,9 +587,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: k_upper raw: K name: LATIN CAPITAL LETTER K U+4B +### alias: k_upper raw: K name: LATIN CAPITAL LETTER K U+004B k_upper features: | aspects | values | @@ -316,9 +616,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: l_upper raw: L name: LATIN CAPITAL LETTER L U+4C +### alias: l_upper raw: L name: LATIN CAPITAL LETTER L U+004C l_upper features: | aspects | values | @@ -344,9 +645,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: m_upper raw: M name: LATIN CAPITAL LETTER M U+4D +### alias: m_upper raw: M name: LATIN CAPITAL LETTER M U+004D m_upper features: | aspects | values | @@ -372,9 +674,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: n_upper raw: N name: LATIN CAPITAL LETTER N U+4E +### alias: n_upper raw: N name: LATIN CAPITAL LETTER N U+004E n_upper features: | aspects | values | @@ -400,9 +703,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: o_upper raw: O name: LATIN CAPITAL LETTER O U+4F +### alias: o_upper raw: O name: LATIN CAPITAL LETTER O U+004F o_upper features: | aspects | values | @@ -428,9 +732,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: p_upper raw: P name: LATIN CAPITAL LETTER P U+50 +### alias: p_upper raw: P name: LATIN CAPITAL LETTER P U+0050 p_upper features: | aspects | values | @@ -456,9 +761,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: q_upper raw: Q name: LATIN CAPITAL LETTER Q U+51 +### alias: q_upper raw: Q name: LATIN CAPITAL LETTER Q U+0051 q_upper features: | aspects | values | @@ -484,9 +790,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: r_upper raw: R name: LATIN CAPITAL LETTER R U+52 +### alias: r_upper raw: R name: LATIN CAPITAL LETTER R U+0052 r_upper features: | aspects | values | @@ -512,9 +819,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: s_upper raw: S name: LATIN CAPITAL LETTER S U+53 +### alias: s_upper raw: S name: LATIN CAPITAL LETTER S U+0053 s_upper features: | aspects | values | @@ -540,9 +848,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: t_upper raw: T name: LATIN CAPITAL LETTER T U+54 +### alias: t_upper raw: T name: LATIN CAPITAL LETTER T U+0054 t_upper features: | aspects | values | @@ -568,9 +877,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: u_upper raw: U name: LATIN CAPITAL LETTER U U+55 +### alias: u_upper raw: U name: LATIN CAPITAL LETTER U U+0055 u_upper features: | aspects | values | @@ -596,9 +906,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: v_upper raw: V name: LATIN CAPITAL LETTER V U+56 +### alias: v_upper raw: V name: LATIN CAPITAL LETTER V U+0056 v_upper features: | aspects | values | @@ -624,9 +935,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: w_upper raw: W name: LATIN CAPITAL LETTER W U+57 +### alias: w_upper raw: W name: LATIN CAPITAL LETTER W U+0057 w_upper features: | aspects | values | @@ -652,9 +964,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: x_upper raw: X name: LATIN CAPITAL LETTER X U+58 +### alias: x_upper raw: X name: LATIN CAPITAL LETTER X U+0058 x_upper features: | aspects | values | @@ -680,9 +993,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: y_upper raw: Y name: LATIN CAPITAL LETTER Y U+59 +### alias: y_upper raw: Y name: LATIN CAPITAL LETTER Y U+0059 y_upper features: | aspects | values | @@ -708,9 +1022,10 @@ correspondences. | syllabicity | none, syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: z_upper raw: Z name: LATIN CAPITAL LETTER Z U+5A +### alias: z_upper raw: Z name: LATIN CAPITAL LETTER Z U+005A z_upper features: | aspects | values | @@ -736,9 +1051,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: a raw: a name: LATIN SMALL LETTER A U+61 +### alias: a raw: a name: LATIN SMALL LETTER A U+0061 a features: | aspects | values | @@ -764,9 +1080,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: b raw: b name: LATIN SMALL LETTER B U+62 +### alias: b raw: b name: LATIN SMALL LETTER B U+0062 b features: | aspects | values | @@ -792,9 +1109,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: c raw: c name: LATIN SMALL LETTER C U+63 +### alias: c raw: c name: LATIN SMALL LETTER C U+0063 c features: | aspects | values | @@ -820,9 +1138,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: d raw: d name: LATIN SMALL LETTER D U+64 +### alias: d raw: d name: LATIN SMALL LETTER D U+0064 d features: | aspects | values | @@ -848,9 +1167,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: e raw: e name: LATIN SMALL LETTER E U+65 +### alias: e raw: e name: LATIN SMALL LETTER E U+0065 e features: | aspects | values | @@ -876,9 +1196,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: f raw: f name: LATIN SMALL LETTER F U+66 +### alias: f raw: f name: LATIN SMALL LETTER F U+0066 f features: | aspects | values | @@ -904,9 +1225,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: g raw: g name: LATIN SMALL LETTER G U+67 +### alias: g raw: g name: LATIN SMALL LETTER G U+0067 g features: | aspects | values | @@ -932,9 +1254,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: h raw: h name: LATIN SMALL LETTER H U+68 +### alias: h raw: h name: LATIN SMALL LETTER H U+0068 h features: | aspects | values | @@ -960,9 +1283,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: i raw: i name: LATIN SMALL LETTER I U+69 +### alias: i raw: i name: LATIN SMALL LETTER I U+0069 i features: | aspects | values | @@ -988,9 +1312,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: j raw: j name: LATIN SMALL LETTER J U+6A +### alias: j raw: j name: LATIN SMALL LETTER J U+006A j features: | aspects | values | @@ -1016,9 +1341,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: k raw: k name: LATIN SMALL LETTER K U+6B +### alias: k raw: k name: LATIN SMALL LETTER K U+006B k features: | aspects | values | @@ -1044,9 +1370,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: l raw: l name: LATIN SMALL LETTER L U+6C +### alias: l raw: l name: LATIN SMALL LETTER L U+006C l features: | aspects | values | @@ -1072,9 +1399,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: m raw: m name: LATIN SMALL LETTER M U+6D +### alias: m raw: m name: LATIN SMALL LETTER M U+006D m features: | aspects | values | @@ -1100,9 +1428,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: n raw: n name: LATIN SMALL LETTER N U+6E +### alias: n raw: n name: LATIN SMALL LETTER N U+006E n features: | aspects | values | @@ -1128,9 +1457,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: o raw: o name: LATIN SMALL LETTER O U+6F +### alias: o raw: o name: LATIN SMALL LETTER O U+006F o features: | aspects | values | @@ -1156,9 +1486,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: p raw: p name: LATIN SMALL LETTER P U+70 +### alias: p raw: p name: LATIN SMALL LETTER P U+0070 p features: | aspects | values | @@ -1184,9 +1515,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: q raw: q name: LATIN SMALL LETTER Q U+71 +### alias: q raw: q name: LATIN SMALL LETTER Q U+0071 q features: | aspects | values | @@ -1212,9 +1544,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: r raw: r name: LATIN SMALL LETTER R U+72 +### alias: r raw: r name: LATIN SMALL LETTER R U+0072 r features: | aspects | values | @@ -1240,9 +1573,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: s raw: s name: LATIN SMALL LETTER S U+73 +### alias: s raw: s name: LATIN SMALL LETTER S U+0073 s features: | aspects | values | @@ -1268,9 +1602,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: t raw: t name: LATIN SMALL LETTER T U+74 +### alias: t raw: t name: LATIN SMALL LETTER T U+0074 t features: | aspects | values | @@ -1296,9 +1631,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: u raw: u name: LATIN SMALL LETTER U U+75 +### alias: u raw: u name: LATIN SMALL LETTER U U+0075 u features: | aspects | values | @@ -1324,9 +1660,10 @@ correspondences. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: v raw: v name: LATIN SMALL LETTER V U+76 +### alias: v raw: v name: LATIN SMALL LETTER V U+0076 v features: | aspects | values | @@ -1352,9 +1689,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: w raw: w name: LATIN SMALL LETTER W U+77 +### alias: w raw: w name: LATIN SMALL LETTER W U+0077 w features: | aspects | values | @@ -1380,9 +1718,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: x raw: x name: LATIN SMALL LETTER X U+78 +### alias: x raw: x name: LATIN SMALL LETTER X U+0078 x features: | aspects | values | @@ -1408,9 +1747,10 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: y raw: y name: LATIN SMALL LETTER Y U+79 +### alias: y raw: y name: LATIN SMALL LETTER Y U+0079 y features: | aspects | values | @@ -1436,9 +1776,10 @@ correspondences. | syllabicity | none, syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: z raw: z name: LATIN SMALL LETTER Z U+7A +### alias: z raw: z name: LATIN SMALL LETTER Z U+007A z features: | aspects | values | @@ -1464,4 +1805,5 @@ correspondences. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | diff --git a/nisaba/scripts/natural_translit/script/inventories/docs/latn.md b/nisaba/scripts/natural_translit/script/inventories/docs/latn.md index 5c84aaae..13957eff 100644 --- a/nisaba/scripts/natural_translit/script/inventories/docs/latn.md +++ b/nisaba/scripts/natural_translit/script/inventories/docs/latn.md @@ -9,7 +9,297 @@ descriptive phonological features are set from rows and columns of IPA charts. ## Inventory: latn -### alias: a_upper raw: A name: LATIN CAPITAL LETTER A U+41 +### alias: zero raw: 0 name: DIGIT ZERO U+0030 + zero features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: one raw: 1 name: DIGIT ONE U+0031 + one features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: two raw: 2 name: DIGIT TWO U+0032 + two features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: three raw: 3 name: DIGIT THREE U+0033 + three features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: four raw: 4 name: DIGIT FOUR U+0034 + four features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: five raw: 5 name: DIGIT FIVE U+0035 + five features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: six raw: 6 name: DIGIT SIX U+0036 + six features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: seven raw: 7 name: DIGIT SEVEN U+0037 + seven features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: eight raw: 8 name: DIGIT EIGHT U+0038 + eight features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: nine raw: 9 name: DIGIT NINE U+0039 + nine features: + +| aspects | values | +|------------------|----------------| +| **sym_features** | | +| type | raw | +| **descriptive** | | +| ph_class | not_applicable | +| airstream | not_applicable | +| manner | not_applicable | +| place | not_applicable | +| articulator | not_applicable | +| height | not_applicable | +| backness | not_applicable | +| breathiness | not_applicable | +| voicing | not_applicable | +| labialization | not_applicable | +| lateralization | not_applicable | +| nasalization | not_applicable | +| palatalization | not_applicable | +| rhoticization | not_applicable | +| duration | not_applicable | +| syllabicity | not_applicable | +| **gr_features** | | +| script | Latin | +| gr_class | number | +| case | any | + +### alias: a_upper raw: A name: LATIN CAPITAL LETTER A U+0041 a_upper features: | aspects | values | @@ -35,9 +325,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: b_upper raw: B name: LATIN CAPITAL LETTER B U+42 +### alias: b_upper raw: B name: LATIN CAPITAL LETTER B U+0042 b_upper features: | aspects | values | @@ -63,9 +354,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: c_upper raw: C name: LATIN CAPITAL LETTER C U+43 +### alias: c_upper raw: C name: LATIN CAPITAL LETTER C U+0043 c_upper features: | aspects | values | @@ -91,9 +383,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: d_upper raw: D name: LATIN CAPITAL LETTER D U+44 +### alias: d_upper raw: D name: LATIN CAPITAL LETTER D U+0044 d_upper features: | aspects | values | @@ -119,9 +412,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: e_upper raw: E name: LATIN CAPITAL LETTER E U+45 +### alias: e_upper raw: E name: LATIN CAPITAL LETTER E U+0045 e_upper features: | aspects | values | @@ -147,9 +441,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: f_upper raw: F name: LATIN CAPITAL LETTER F U+46 +### alias: f_upper raw: F name: LATIN CAPITAL LETTER F U+0046 f_upper features: | aspects | values | @@ -175,9 +470,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: g_upper raw: G name: LATIN CAPITAL LETTER G U+47 +### alias: g_upper raw: G name: LATIN CAPITAL LETTER G U+0047 g_upper features: | aspects | values | @@ -203,9 +499,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: h_upper raw: H name: LATIN CAPITAL LETTER H U+48 +### alias: h_upper raw: H name: LATIN CAPITAL LETTER H U+0048 h_upper features: | aspects | values | @@ -231,9 +528,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: i_upper raw: I name: LATIN CAPITAL LETTER I U+49 +### alias: i_upper raw: I name: LATIN CAPITAL LETTER I U+0049 i_upper features: | aspects | values | @@ -259,9 +557,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: j_upper raw: J name: LATIN CAPITAL LETTER J U+4A +### alias: j_upper raw: J name: LATIN CAPITAL LETTER J U+004A j_upper features: | aspects | values | @@ -287,9 +586,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: k_upper raw: K name: LATIN CAPITAL LETTER K U+4B +### alias: k_upper raw: K name: LATIN CAPITAL LETTER K U+004B k_upper features: | aspects | values | @@ -315,9 +615,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: l_upper raw: L name: LATIN CAPITAL LETTER L U+4C +### alias: l_upper raw: L name: LATIN CAPITAL LETTER L U+004C l_upper features: | aspects | values | @@ -343,9 +644,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: m_upper raw: M name: LATIN CAPITAL LETTER M U+4D +### alias: m_upper raw: M name: LATIN CAPITAL LETTER M U+004D m_upper features: | aspects | values | @@ -371,9 +673,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: n_upper raw: N name: LATIN CAPITAL LETTER N U+4E +### alias: n_upper raw: N name: LATIN CAPITAL LETTER N U+004E n_upper features: | aspects | values | @@ -399,9 +702,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: o_upper raw: O name: LATIN CAPITAL LETTER O U+4F +### alias: o_upper raw: O name: LATIN CAPITAL LETTER O U+004F o_upper features: | aspects | values | @@ -427,9 +731,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: p_upper raw: P name: LATIN CAPITAL LETTER P U+50 +### alias: p_upper raw: P name: LATIN CAPITAL LETTER P U+0050 p_upper features: | aspects | values | @@ -455,9 +760,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: q_upper raw: Q name: LATIN CAPITAL LETTER Q U+51 +### alias: q_upper raw: Q name: LATIN CAPITAL LETTER Q U+0051 q_upper features: | aspects | values | @@ -483,9 +789,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: r_upper raw: R name: LATIN CAPITAL LETTER R U+52 +### alias: r_upper raw: R name: LATIN CAPITAL LETTER R U+0052 r_upper features: | aspects | values | @@ -511,9 +818,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: s_upper raw: S name: LATIN CAPITAL LETTER S U+53 +### alias: s_upper raw: S name: LATIN CAPITAL LETTER S U+0053 s_upper features: | aspects | values | @@ -539,9 +847,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: t_upper raw: T name: LATIN CAPITAL LETTER T U+54 +### alias: t_upper raw: T name: LATIN CAPITAL LETTER T U+0054 t_upper features: | aspects | values | @@ -567,9 +876,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: u_upper raw: U name: LATIN CAPITAL LETTER U U+55 +### alias: u_upper raw: U name: LATIN CAPITAL LETTER U U+0055 u_upper features: | aspects | values | @@ -595,9 +905,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: v_upper raw: V name: LATIN CAPITAL LETTER V U+56 +### alias: v_upper raw: V name: LATIN CAPITAL LETTER V U+0056 v_upper features: | aspects | values | @@ -623,9 +934,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: w_upper raw: W name: LATIN CAPITAL LETTER W U+57 +### alias: w_upper raw: W name: LATIN CAPITAL LETTER W U+0057 w_upper features: | aspects | values | @@ -651,9 +963,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: x_upper raw: X name: LATIN CAPITAL LETTER X U+58 +### alias: x_upper raw: X name: LATIN CAPITAL LETTER X U+0058 x_upper features: | aspects | values | @@ -679,9 +992,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: y_upper raw: Y name: LATIN CAPITAL LETTER Y U+59 +### alias: y_upper raw: Y name: LATIN CAPITAL LETTER Y U+0059 y_upper features: | aspects | values | @@ -707,9 +1021,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: z_upper raw: Z name: LATIN CAPITAL LETTER Z U+5A +### alias: z_upper raw: Z name: LATIN CAPITAL LETTER Z U+005A z_upper features: | aspects | values | @@ -735,9 +1050,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | upper | -### alias: a raw: a name: LATIN SMALL LETTER A U+61 +### alias: a raw: a name: LATIN SMALL LETTER A U+0061 a features: | aspects | values | @@ -763,9 +1079,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: b raw: b name: LATIN SMALL LETTER B U+62 +### alias: b raw: b name: LATIN SMALL LETTER B U+0062 b features: | aspects | values | @@ -791,9 +1108,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: c raw: c name: LATIN SMALL LETTER C U+63 +### alias: c raw: c name: LATIN SMALL LETTER C U+0063 c features: | aspects | values | @@ -819,9 +1137,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: d raw: d name: LATIN SMALL LETTER D U+64 +### alias: d raw: d name: LATIN SMALL LETTER D U+0064 d features: | aspects | values | @@ -847,9 +1166,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: e raw: e name: LATIN SMALL LETTER E U+65 +### alias: e raw: e name: LATIN SMALL LETTER E U+0065 e features: | aspects | values | @@ -875,9 +1195,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: f raw: f name: LATIN SMALL LETTER F U+66 +### alias: f raw: f name: LATIN SMALL LETTER F U+0066 f features: | aspects | values | @@ -903,9 +1224,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: g raw: g name: LATIN SMALL LETTER G U+67 +### alias: g raw: g name: LATIN SMALL LETTER G U+0067 g features: | aspects | values | @@ -931,9 +1253,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: h raw: h name: LATIN SMALL LETTER H U+68 +### alias: h raw: h name: LATIN SMALL LETTER H U+0068 h features: | aspects | values | @@ -959,9 +1282,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: i raw: i name: LATIN SMALL LETTER I U+69 +### alias: i raw: i name: LATIN SMALL LETTER I U+0069 i features: | aspects | values | @@ -987,9 +1311,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: j raw: j name: LATIN SMALL LETTER J U+6A +### alias: j raw: j name: LATIN SMALL LETTER J U+006A j features: | aspects | values | @@ -1015,9 +1340,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: k raw: k name: LATIN SMALL LETTER K U+6B +### alias: k raw: k name: LATIN SMALL LETTER K U+006B k features: | aspects | values | @@ -1043,9 +1369,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: l raw: l name: LATIN SMALL LETTER L U+6C +### alias: l raw: l name: LATIN SMALL LETTER L U+006C l features: | aspects | values | @@ -1071,9 +1398,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: m raw: m name: LATIN SMALL LETTER M U+6D +### alias: m raw: m name: LATIN SMALL LETTER M U+006D m features: | aspects | values | @@ -1099,9 +1427,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: n raw: n name: LATIN SMALL LETTER N U+6E +### alias: n raw: n name: LATIN SMALL LETTER N U+006E n features: | aspects | values | @@ -1127,9 +1456,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: o raw: o name: LATIN SMALL LETTER O U+6F +### alias: o raw: o name: LATIN SMALL LETTER O U+006F o features: | aspects | values | @@ -1155,9 +1485,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: p raw: p name: LATIN SMALL LETTER P U+70 +### alias: p raw: p name: LATIN SMALL LETTER P U+0070 p features: | aspects | values | @@ -1183,9 +1514,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: q raw: q name: LATIN SMALL LETTER Q U+71 +### alias: q raw: q name: LATIN SMALL LETTER Q U+0071 q features: | aspects | values | @@ -1211,9 +1543,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: r raw: r name: LATIN SMALL LETTER R U+72 +### alias: r raw: r name: LATIN SMALL LETTER R U+0072 r features: | aspects | values | @@ -1239,9 +1572,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: s raw: s name: LATIN SMALL LETTER S U+73 +### alias: s raw: s name: LATIN SMALL LETTER S U+0073 s features: | aspects | values | @@ -1267,9 +1601,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: t raw: t name: LATIN SMALL LETTER T U+74 +### alias: t raw: t name: LATIN SMALL LETTER T U+0074 t features: | aspects | values | @@ -1295,9 +1630,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: u raw: u name: LATIN SMALL LETTER U U+75 +### alias: u raw: u name: LATIN SMALL LETTER U U+0075 u features: | aspects | values | @@ -1323,9 +1659,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | syllabic | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: v raw: v name: LATIN SMALL LETTER V U+76 +### alias: v raw: v name: LATIN SMALL LETTER V U+0076 v features: | aspects | values | @@ -1351,9 +1688,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: w raw: w name: LATIN SMALL LETTER W U+77 +### alias: w raw: w name: LATIN SMALL LETTER W U+0077 w features: | aspects | values | @@ -1379,9 +1717,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: x raw: x name: LATIN SMALL LETTER X U+78 +### alias: x raw: x name: LATIN SMALL LETTER X U+0078 x features: | aspects | values | @@ -1407,9 +1746,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: y raw: y name: LATIN SMALL LETTER Y U+79 +### alias: y raw: y name: LATIN SMALL LETTER Y U+0079 y features: | aspects | values | @@ -1435,9 +1775,10 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | -### alias: z raw: z name: LATIN SMALL LETTER Z U+7A +### alias: z raw: z name: LATIN SMALL LETTER Z U+007A z features: | aspects | values | @@ -1463,4 +1804,5 @@ descriptive phonological features are set from rows and columns of IPA charts. | syllabicity | none | | **gr_features** | | | script | Latin | +| gr_class | letter | | case | lower | diff --git a/nisaba/scripts/natural_translit/script/inventories/latn.py b/nisaba/scripts/natural_translit/script/inventories/latn.py index d1499232..e4f41562 100644 --- a/nisaba/scripts/natural_translit/script/inventories/latn.py +++ b/nisaba/scripts/natural_translit/script/inventories/latn.py @@ -26,7 +26,9 @@ def _build_inventory() -> grapheme.Grapheme.Inventory: lowercase_vowels = ['a', 'e', 'i', 'o', 'u'] latn.add_graphemes( *( - g.from_char(char, char, {grf.script.latn, phf.vowel}) + g.from_char( + char, char, {grf.script.latn, grf.gr_class.letter, phf.vowel} + ) for char in lowercase_vowels ), list_alias='vowel', @@ -82,7 +84,9 @@ def _build_inventory() -> grapheme.Grapheme.Inventory: ] latn.add_graphemes( *( - g.from_char(char, char, {grf.script.latn, phf.consonant}) + g.from_char( + char, char, {grf.script.latn, grf.gr_class.letter, phf.consonant} + ) for char in lowercase_consonants ), list_alias='consonant', @@ -150,7 +154,12 @@ def _build_inventory() -> grapheme.Grapheme.Inventory: uppercase = g.from_char( char.text.upper(), char.alias + '_upper', - {grf.script.latn, grf.case.upper, char.descriptives()}, + { + grf.script.latn, + grf.gr_class.letter, + grf.case.upper, + char.descriptives(), + }, ) latn.add_graphemes(uppercase) latn.upper.add(uppercase) @@ -160,9 +169,37 @@ def _build_inventory() -> grapheme.Grapheme.Inventory: if uppercase.has_feature(phf.ph_class.consonant): latn.consonant.add(uppercase) latn.make_iterable_suppl('letter', *latn.lower, *latn.upper) - return latn.sync_atomics( - [latn.lower, latn.upper, latn.letter, latn.vowel, latn.consonant] + numbers = [ + (0, 'zero'), + (1, 'one'), + (2, 'two'), + (3, 'three'), + (4, 'four'), + (5, 'five'), + (6, 'six'), + (7, 'seven'), + (8, 'eight'), + (9, 'nine'), + ] + latn.add_graphemes( + *( + g.from_char( + str(number), + alias, + {grf.script.latn, grf.gr_class.number, phf.not_applicable}, + ) + for number, alias in numbers + ), + list_alias='number', ) + return latn.sync_atomics([ + latn.lower, + latn.upper, + latn.letter, + latn.vowel, + latn.consonant, + latn.number, + ]) LATN = _build_inventory() diff --git a/nisaba/scripts/natural_translit/script/inventories/latn_test.py b/nisaba/scripts/natural_translit/script/inventories/latn_test.py index 30f61589..573c7a9b 100644 --- a/nisaba/scripts/natural_translit/script/inventories/latn_test.py +++ b/nisaba/scripts/natural_translit/script/inventories/latn_test.py @@ -44,6 +44,8 @@ def test_latn(self): self.assertIs(_LATN.a_upper.lower, _LATN.a) self.assertIs(_LATN.a.lower, _LATN.a) self.assertIs(_LATN.a_upper.upper, _LATN.a_upper) + self.AssertHasFeature(_LATN.one, _G.GR_FEATURES.gr_class.number) + self.assertIn(_LATN.one, _LATN.number) def test_en_latn(self): self.assertNotEqual(_LATN.a, _EN.a) diff --git a/nisaba/scripts/natural_translit/utils/feature.py b/nisaba/scripts/natural_translit/utils/feature.py index 5ecbba03..fda50bd3 100644 --- a/nisaba/scripts/natural_translit/utils/feature.py +++ b/nisaba/scripts/natural_translit/utils/feature.py @@ -533,6 +533,21 @@ class Aspect(inventory.Inventory): purple: 0.50 } + Every aspect has the following supplements: + - 'all' is a set of all possible values for this aspect. + - 'any' is a supplemental feature that returns 0 distance to all features + of this aspect. This is used to skip looping over the 'all' set for + faster distance calculation. + - 'n_a' (not_applicable) returns max_dist to all features. It is + semantically distinct from values that have 'not' semantics for an + applicable aspect such as 'none', 'zero', 'minus', etc. For example, + given a linear aspect such as life_span: [short, medium, long] for + animals, an animal shaped ornament will have a life_span of + 'not_applicable', since it cannot be compared to a live animal in this + aspect. Placing a 'none' value at either end would falsely indicate that + the ornament has a similar life_span to animals with the shortest or + longest life_span. + TODO: Add bidict method to Inventory and convert distance_dict to bidict. """ @@ -999,6 +1014,10 @@ def copy(self, alias: str = '') -> Feature.MultiProfile: new.new_profile(profile.copy(profile.alias)) return new + # The following class methods are used as a shortcut for creating ValueLists + # of different types. See Feature.Aspect.ValueList docstring for more details. + # TODO: Move these to the ValueList class. + @classmethod def equidistant( cls,