-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtransliterate.py
52 lines (43 loc) · 2.12 KB
/
transliterate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import requests
import json
import sys
from .numerals import transliterate_numerals
G_API_DEFAULT = 'https://inputtools.google.com/request?text=%s&itc=%s-t-i0&num=%d'
G_API_CHINESE = 'https://inputtools.google.com/request?text=%s&itc=%s-t-i0-%s&num=%d'
CHINESE_LANGS = {'yue-hant', 'zh', 'zh-hant'}
def transliterate_word(word: str, lang_code: str, max_suggestions: int = 6, input_scheme='pinyin') -> list:
"""Transliterate a given word to the required language.
Args:
word (str): The word to transliterate from Latin/Roman (English) script
lang_code (str): The target language's ISO639 code
max_suggestions (int, optional): Maximum number of suggestions to fetch. Defaults to 6.
input_scheme(str, optional): Romanization scheme (Only for Chinese)
Returns:
list: List of suggested transliterations.
"""
if lang_code in CHINESE_LANGS:
api_url = G_API_CHINESE % (word.lower(), lang_code, input_scheme, max_suggestions)
else:
api_url = G_API_DEFAULT % (word.lower(), lang_code, max_suggestions)
response = requests.get(api_url, allow_redirects=False, timeout=5)
r = json.loads(response.text)
if 'SUCCESS' not in r[0] or response.status_code != 200:
print('Request failed with status code: %d\nERROR: %s' % (response.status_code, response.text), file=sys.stderr)
return []
return r[1][0][1]
def transliterate_text(text: str, lang_code: str, convert_numerals: bool = False) -> str:
"""[Experimental] Transliterate a given sentence or text to the required language.
Args:
text (str): The text to transliterate from Latin/Roman (English) script.
lang_code (str): The target language's ISO639 code
convert_numerals (bool): Transliterate numerals. Defaults to False.
Returns:
str: Transliterated text.
"""
result = []
for word in text.split():
result.append(transliterate_word(word, lang_code, 1)[0])
result = ' '.join(result)
if convert_numerals:
result = transliterate_numerals(result, lang_code)
return result