-
Notifications
You must be signed in to change notification settings - Fork 38
/
han2one_rev.py
58 lines (50 loc) · 1.8 KB
/
han2one_rev.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
from hgtk.letter import decompose as decom
choseng = ['ㄱ','ㄴ','ㄷ','ㄹ','ㅁ','ㅂ','ㅅ','ㅇ','ㅈ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ','ㄲ','ㄸ','ㅃ','ㅆ','ㅉ']
cwungseng = ['ㅏ','ㅑ','ㅓ','ㅕ','ㅗ','ㅛ','ㅜ','ㅠ','ㅡ','ㅣ','ㅐ','ㅒ','ㅔ','ㅖ','ㅘ','ㅙ','ㅚ','ㅝ','ㅞ','ㅟ','ㅢ']
congseng = ['ㄱ','ㄴ','ㄷ','ㄹ','ㅁ','ㅂ','ㅅ','ㅇ','ㅈ','ㅊ','ㅋ','ㅌ','ㅍ','ㅎ','ㄲ','ㅆ','ㄳ','ㄵ','ㄶ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ','ㅄ','']
alp = choseng+cwungseng+congseng
uniquealp = list(set(choseng+cwungseng+congseng))
def cho2onehot(s):
res = np.zeros(len(choseng))
if s in choseng:
res[choseng.index(s)]=1
return res
def cwu2onehot(s):
res = np.zeros(len(cwungseng))
if s in cwungseng:
res[cwungseng.index(s)]=1
return res
def con2onehot(s):
res = np.zeros(len(congseng))
if s in congseng:
res[congseng.index(s)]=1
return res
def uni2onehot(s):
res = np.zeros(len(uniquealp))
if s in uniquealp:
res[uniquealp.index(s)]=1
return res
def shin_onehot(s):
z = decom(s)
res = np.zeros((len(alp),3))
res[:len(choseng),0] = cho2onehot(z[0])
res[len(choseng):len(choseng)+len(cwungseng),1] = cwu2onehot(z[1])
res[len(choseng)+len(cwungseng):len(alp),2] = con2onehot(z[2])
return res
def cho_onehot(s):
z = decom(s)
res = np.zeros((len(alp)+len(uniquealp),3))
if len(z[0]+z[1]+z[2]) > 1:
res[:len(alp),:] = shin_onehot(s)
elif len(z[0])>0:
res[len(alp):,0] = uni2onehot(s)
elif len(z[1])>0:
res[len(alp):,1] = uni2onehot(s)
else:
res[len(alp):,2] = uni2onehot(s)
return res
def char2onehot(s):
z = decom(s)
res = np.concatenate([cho2onehot(z[0]),cwu2onehot(z[1]),con2onehot(z[2])])
return res