Skip to content

Commit

Permalink
Add name and features to symbol.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 609754318
  • Loading branch information
isingoo authored and copybara-github committed Feb 24, 2024
1 parent 50575c9 commit 091901b
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 11 deletions.
1 change: 1 addition & 0 deletions nisaba/scripts/natural_translit/utils/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ py_library(
name = "expression",
srcs = ["expression.py"],
deps = [
":feature",
":log_op",
":type_op",
],
Expand Down
69 changes: 60 additions & 9 deletions nisaba/scripts/natural_translit/utils/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,24 @@
# limitations under the License.

"""Interfaces for generating fsts from objects."""
from typing import Union
from nisaba.scripts.natural_translit.utils import feature
from nisaba.scripts.natural_translit.utils import log_op as log
from nisaba.scripts.natural_translit.utils import type_op as ty
f = feature.Feature


def _symbol_features() -> f.Inventory:
"""Symbol feature inventory."""
ftr = f.Inventory(
'sym_features',
f.Aspect(
f.equidistant(
'type',
f('abst', 'abstract'), f('raw')
)
),
)
return ftr


class Symbol(ty.Thing):
Expand All @@ -39,23 +54,53 @@ class Symbol(ty.Thing):
representation, like schwa which doesn't have a corresponding
Devanagari grapheme, this field will be an empty string, i.e., `raw=''`.
index: An int that will be used as the index of this symbol in fsts.
name: A conventional or descriptive name for the symbol, eg. the Unicode
name of the raw grapheme 'अ' `name='DEVANAGARI LETTER A'`, a descriptive
name for the abstract grapheme schwa `name='BRAHMIC SCHWA'`, or the
conventional description of the phoneme /a/
`name=OPEN FRONT UNROUNDED VOWEL`.
features: Features to be added to the symbol.
Eg. `features=SYM_FEATURES.sym_type.raw` for 'अ'
"""

def __init__(
self, alias: str = '', text: str = '', raw: str = '',
self,
alias: str = '',
text: str = '',
raw: str = '',
index: ty.IntOrNothing = ty.UNSPECIFIED,
name: str = '',
features: f.ITERABLE = ty.UNSPECIFIED,
):
# TODO: Move alias to Thing's init() and remove set_alias() from child
# classes.
super().__init__(alias=alias)
self.text = text if text else self.alias
self.raw = raw
self.index = index if ty.is_specified(index) else hash(self)
self.name = name if name else self.alias
self.features = f.Set(features)
if self.raw:
self.features.add(self.SYM_FEATURES.type.raw)
else:
self.features.add(self.SYM_FEATURES.type.abst)

def __str__(self) -> str:
return self.text

def description(self, show_features: bool = False) -> str:
"""A string that describes the symbol."""
text = 'alias: %s index: %s' % (self.alias, self.index)
if self.raw: text += ' raw: %s' % self.raw
if self.text: text += ' text: %s' % self.text
if self.name != self.alias: text += ' name: %s' % self.name
if show_features:
text += (
' features: {%s}'
% ', '.join([feature.alias for feature in self.features])
)
return text

SYM_FEATURES = _symbol_features()


class Expression(ty.IterableThing):
"""Parent class for Expressions."""
Expand All @@ -72,11 +117,17 @@ def __str__(self) -> str:
class Atomic(Expression, Symbol):
"""An instance of a single symbol."""

OR_SYMBOL = Union[Symbol, 'Atomic']

def __init__(self, symbol: 'Atomic.OR_SYMBOL', alias: str = ''):
def __init__(self, symbol: Symbol, alias: str = ''):
Expression.__init__(self, symbol.alias)
Symbol.__init__(self, symbol.alias, symbol.text, symbol.raw, symbol.index)
Symbol.__init__(
self,
symbol.alias,
symbol.text,
symbol.raw,
symbol.index,
symbol.name,
symbol.features.copy(),
)
self._item_type = Atomic
self._items = [self]
self.symbol = symbol.symbol if isinstance(symbol, Atomic) else symbol
Expand Down
30 changes: 28 additions & 2 deletions nisaba/scripts/natural_translit/utils/expression_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,48 @@
from absl.testing import absltest
from nisaba.scripts.natural_translit.utils import expression as exp

# TODO: move global variables into the inventory building function.
str_schwa = 'schwa'
str_salt = '🜔' # Alchemical symbol for salt.
str_a_ind = 'a_ind'
str_a_letter = 'अ'

sym_schwa = exp.Symbol(str_schwa, text=str_salt)
sym_schwa = exp.Symbol(str_schwa, text=str_salt, index=1, name='SCHWA')
sym_a_ind = exp.Symbol(
str_a_ind, text=str_a_letter, raw=str_a_letter, index=2, name='A LETTER'
)
atm_schwa = exp.Atomic(sym_schwa)
atm_schwa2 = exp.Atomic(atm_schwa)


class ExpressionTest(absltest.TestCase):

def test_symbol_str(self):
def test_symbol_abstract(self):
self.assertEqual(str(sym_schwa), str_salt)
self.assertEmpty(sym_schwa.raw)
self.assertEqual(
sym_schwa.description(show_features=True),
'alias: schwa index: 1 text: 🜔 name: SCHWA features: {abst}',
)

def test_symbol_raw(self):
self.assertEqual(
sym_a_ind.description(show_features=True),
'alias: a_ind index: 2 raw: अ text: अ '
'name: A LETTER features: {raw}',
)

def test_atomic_from_symbol(self):
self.assertEqual(str(atm_schwa), str_salt)
self.assertEmpty(atm_schwa.raw)
self.assertIn(atm_schwa, atm_schwa)
self.assertIs(atm_schwa.symbol, sym_schwa)
self.assertEqual(atm_schwa.index, sym_schwa.index)
self.assertIn(exp.Symbol.SYM_FEATURES.type.abst, atm_schwa.features)
self.assertEqual(
atm_schwa.description(show_features=True),
'alias: schwa index: 1 text: 🜔 name: SCHWA features: {abst}',
)

def test_atomic_from_atomic(self):
self.assertEqual(str(atm_schwa2), str_salt)
Expand All @@ -43,6 +65,10 @@ def test_atomic_from_atomic(self):
self.assertNotIn(atm_schwa, atm_schwa2)
self.assertIs(atm_schwa2.symbol, sym_schwa)
self.assertEqual(atm_schwa2.index, sym_schwa.index)
self.assertEqual(
atm_schwa.description(),
'alias: schwa index: 1 text: 🜔 name: SCHWA',
)

def test_atomic_add(self):
atm_schwa.add(atm_schwa2)
Expand Down

0 comments on commit 091901b

Please sign in to comment.