Skip to content

Commit

Permalink
feat: generate Jamo conversion lookup table
Browse files Browse the repository at this point in the history
  • Loading branch information
WieeRd committed Feb 12, 2024
1 parent 20919bb commit c8a36dc
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 294 deletions.
43 changes: 14 additions & 29 deletions mklookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from collections.abc import Callable
from typing import TypeVar

# FIX: moum to jungseong can be 1:1 mapped; it does not need a lookup table


def jamo_to_compat_jamo(jamo: str, /) -> str | None:
"""Maps a Jamo character to a Compatibility Jamo character."""
Expand All @@ -25,12 +23,6 @@ def compat_jaum_to_choseong(compat_jaum: str, /) -> str | None:
return None


def compat_moum_to_jungseong(compat_moum: str, /) -> str:
"""Maps a Compatibility Moum character to a Jamo Jungseong character."""
name = ud.name(compat_moum).split(" ")[-1] # HANGUL LETTER "A"
return ud.lookup(f"HANGUL JUNGSEONG {name}")


def compat_jaum_to_jongseong(compat_jaum: str, /) -> str:
"""Maps a Compatibility Jaum character to a Jamo Jongseong character."""
name = ud.name(compat_jaum).split(" ")[-1] # HANGUL LETTER "KIYEOK"
Expand Down Expand Up @@ -67,39 +59,32 @@ def decompose_jongseong(jongseong: str, /) -> tuple[str, str] | None:

T = TypeVar("T")

def _create_lookup_table(
convert: Callable[[str], T], base: int, end: int
) -> list[T]:
def _mklookup(convert: Callable[[str], T], base: int, end: int) -> list[T]:
return [convert(chr(code)) for code in range(base, end + 1)]

JAMO_TO_COMPAT_JAMO = _create_lookup_table(
CHOSEONG_TO_COMPAT_JAUM = _mklookup(
jamo_to_compat_jamo,
hg.JAMO_BASE,
hg.JAMO_END,
hg.MODERN_CHOSEONG_BASE,
hg.MODERN_CHOSEONG_END,
)
JONGSEONG_TO_COMPAT_JAUM = _mklookup(
jamo_to_compat_jamo,
hg.MODERN_JONGSEONG_BASE,
hg.MODERN_JONGSEONG_END,
)
COMPAT_JAUM_TO_CHOSEONG = _create_lookup_table(

COMPAT_JAUM_TO_CHOSEONG = _mklookup(
compat_jaum_to_choseong,
hg.MODERN_COMPAT_JAUM_BASE,
hg.MODERN_COMPAT_JAUM_END,
)
COMPAT_MOUM_TO_JUNGSEONG = _create_lookup_table(
compat_moum_to_jungseong,
hg.MODERN_COMPAT_MOUM_BASE,
hg.MODERN_COMPAT_MOUM_END,
)
COMPAT_JAUM_TO_JONGSEONG = _create_lookup_table(
COMPAT_JAUM_TO_JONGSEONG = _mklookup(
compat_jaum_to_jongseong,
hg.MODERN_COMPAT_JAUM_BASE,
hg.MODERN_COMPAT_JAUM_END,
)
DECOMPOSE_JONGSEONG = _create_lookup_table(
decompose_jongseong,
hg.MODERN_JONGSEONG_BASE,
hg.MODERN_JONGSEONG_END,
)

print(f"JAMO_TO_COMPAT_JAMO = {JAMO_TO_COMPAT_JAMO}\n")
print(f"CHOSEONG_TO_COMPAT_JAUM = {CHOSEONG_TO_COMPAT_JAUM}\n")
print(f"JONGSEONG_TO_COMPAT_JAUM = {JONGSEONG_TO_COMPAT_JAUM}\n")
print(f"COMPAT_JAUM_TO_CHOSEONG = {COMPAT_JAUM_TO_CHOSEONG}\n")
print(f"COMPAT_MOUM_TO_JUNGSEONG = {COMPAT_MOUM_TO_JUNGSEONG}\n")
print(f"COMPAT_JAUM_TO_JONGSEONG = {COMPAT_JAUM_TO_JONGSEONG}\n")
print(f"DECOMPOSE_JONGSEONG = {DECOMPOSE_JONGSEONG}\n")
269 changes: 4 additions & 265 deletions ricecake/hangul/_lookup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
JAMO_TO_COMPAT_JAMO = [
CHOSEONG_TO_COMPAT_JAUM = [
"ㄱ",
"ㄲ",
"ㄴ",
Expand All @@ -18,155 +18,9 @@
"ㅌ",
"ㅍ",
"ㅎ",
None,
"ㅥ",
"ㅦ",
None,
None,
None,
None,
"ㅀ",
None,
"ㅮ",
"ㅱ",
"ㅲ",
None,
"ㅳ",
"ㅄ",
"ㅴ",
"ㅵ",
None,
None,
None,
"ㅶ",
None,
"ㅷ",
None,
"ㅸ",
"ㅹ",
"ㅺ",
"ㅻ",
"ㅼ",
None,
None,
"ㅽ",
None,
None,
None,
"ㅾ",
None,
None,
None,
None,
None,
None,
None,
None,
None,
"ㅿ",
None,
None,
None,
None,
None,
None,
"ㆀ",
None,
None,
None,
None,
"ㆁ",
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
"ㆄ",
"ㆅ",
"ㆆ",
None,
"ㅧ",
"ㄵ",
"ㄶ",
None,
None,
None,
"ㅏ",
"ㅐ",
"ㅑ",
"ㅒ",
"ㅓ",
"ㅔ",
"ㅕ",
"ㅖ",
"ㅗ",
"ㅘ",
"ㅙ",
"ㅚ",
"ㅛ",
"ㅜ",
"ㅝ",
"ㅞ",
"ㅟ",
"ㅠ",
"ㅡ",
"ㅢ",
"ㅣ",
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
"ㆇ",
"ㆈ",
None,
None,
"ㆉ",
None,
None,
None,
None,
None,
None,
None,
None,
"ㆊ",
"ㆋ",
None,
"ㆌ",
None,
None,
None,
None,
None,
None,
None,
None,
None,
"ㆍ",
None,
None,
None,
None,
None,
None,
None,
None,
None,
]

JONGSEONG_TO_COMPAT_JAUM = [
"ㄱ",
"ㄲ",
"ㄳ",
Expand Down Expand Up @@ -194,67 +48,6 @@
"ㅌ",
"ㅍ",
"ㅎ",
None,
None,
None,
"ㅦ",
"ㅧ",
"ㅨ",
None,
None,
None,
"ㅩ",
None,
"ㅪ",
None,
None,
None,
None,
"ㅫ",
None,
None,
None,
"ㅬ",
None,
"ㅭ",
None,
None,
"ㅮ",
"ㅯ",
None,
"ㅰ",
None,
None,
"ㅱ",
None,
None,
None,
"ㅸ",
"ㅺ",
"ㅼ",
None,
"ㅽ",
"ㅿ",
None,
None,
"ㆀ",
None,
"ㆁ",
"ㆂ",
"ㆃ",
None,
"ㆄ",
None,
None,
None,
None,
"ㆆ",
None,
None,
None,
None,
None,
"ㅥ",
]

COMPAT_JAUM_TO_CHOSEONG = [
Expand Down Expand Up @@ -290,30 +83,6 @@
"ᄒ",
]

COMPAT_MOUM_TO_JUNGSEONG = [
"ᅡ",
"ᅢ",
"ᅣ",
"ᅤ",
"ᅥ",
"ᅦ",
"ᅧ",
"ᅨ",
"ᅩ",
"ᅪ",
"ᅫ",
"ᅬ",
"ᅭ",
"ᅮ",
"ᅯ",
"ᅰ",
"ᅱ",
"ᅲ",
"ᅳ",
"ᅴ",
"ᅵ",
]

COMPAT_JAUM_TO_JONGSEONG = [
"ᆨ",
"ᆩ",
Expand Down Expand Up @@ -346,33 +115,3 @@
"ᇁ",
"ᇂ",
]

DECOMPOSE_JONGSEONG = [
None,
("ᆨ", "ᆨ"),
("ᆨ", "ᆺ"),
None,
("ᆫ", "ᆽ"),
("ᆫ", "ᇂ"),
None,
None,
("ᆯ", "ᆨ"),
("ᆯ", "ᆷ"),
("ᆯ", "ᆸ"),
("ᆯ", "ᆺ"),
("ᆯ", "ᇀ"),
("ᆯ", "ᇁ"),
("ᆯ", "ᇂ"),
None,
None,
("ᆸ", "ᆺ"),
None,
("ᆺ", "ᆺ"),
None,
None,
None,
None,
None,
None,
None,
]

0 comments on commit c8a36dc

Please sign in to comment.