This repository was archived by the owner on Apr 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanu_to_unicode.py
353 lines (325 loc) · 17.3 KB
/
anu_to_unicode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# -*- coding: utf-8 -*-
from string import punctuation, whitespace, digits
def convertTounicode(source_text, filtered=False, debug=False):
translated = source_text
translated = translated.replace(u"¶", u"அ")
translated = translated.replace(u"g", u"ஆ")
translated = translated.replace(u"Ö", u"இ")
translated = translated.replace(u"~", u"ஈ")
translated = translated.replace(u"c", u"உ")
translated = translated.replace(u"»", u"ஊ")
translated = translated.replace(u"¨", u"எ")
translated = translated.replace(u"°", u"ஏ")
translated = translated.replace(u"n", u"ஐ")
translated = translated.replace(u"Î", u"ஒ")
translated = translated.replace(u"{", u"ஓ")
translated = translated.replace(u"Îe", u"ஔ")
translated = translated.replace(u"à", u"ஃ")
translated = translated.replace(u"{D", u"ௐ")
translated = translated.replace(u"p", u"ஸ்ரீ")
translated = translated.replace(u"Â", u"க்")
translated = translated.replace(u"Øïe", u"கௌ")
translated = translated.replace(u"¼ïV", u"கோ")
translated = translated.replace(u"ØïV", u"கொ")
translated = translated.replace(u"çï", u"கை")
translated = translated.replace(u"¼ï", u"கே")
translated = translated.replace(u"Øï", u"கெ")
translated = translated.replace(u"í", u"கூ")
translated = translated.replace(u"z", u"கு")
translated = translated.replace(u"ÿ", u"கீ")
translated = translated.replace(u"þ", u"கி")
translated = translated.replace(u"ïV", u"கா")
translated = translated.replace(u"ï", u"க")
translated = translated.replace(u"º", u"ங்")
translated = translated.replace(u"Øôe", u"ஙௌ")
translated = translated.replace(u"¼ôV", u"ஙோ")
translated = translated.replace(u"ØôV", u"ஙொ")
translated = translated.replace(u"çô", u"ஙை")
translated = translated.replace(u"¼ô", u"ஙே")
translated = translated.replace(u"Øô", u"ஙெ")
# translated = translated.replace(u"dd", u"ஙூ")
# translated = translated.replace(u"dd", u"ஙு")
# translated = translated.replace(u"dd", u"ஙீ")
# translated = translated.replace(u"dd", u"ஙி")
translated = translated.replace(u"ôV", u"ஙா")
translated = translated.replace(u"ô", u"ங")
translated = translated.replace(u"ß", u"ச்")
translated = translated.replace(u"ØÄe", u"சௌ")
translated = translated.replace(u"¼ÄV", u"சோ")
translated = translated.replace(u"ØÄV", u"சொ")
translated = translated.replace(u"çÄ", u"சை")
translated = translated.replace(u"¼Ä", u"சே")
translated = translated.replace(u"ØÄ", u"செ")
translated = translated.replace(u"ó", u"சூ")
translated = translated.replace(u"·", u"சு")
translated = translated.replace(u"æ", u"சீ")
translated = translated.replace(u"E", u"சி")
translated = translated.replace(u"ÄV", u"சா")
translated = translated.replace(u"Ä", u"ச")
translated = translated.replace(u"ë", u"ஜ்")
translated = translated.replace(u"ØÛe", u"ஜௌ")
translated = translated.replace(u"¼ÛV", u"ஜோ")
translated = translated.replace(u"ØÛV", u"ஜொ")
translated = translated.replace(u"çÛ", u"ஜை")
translated = translated.replace(u"¼Û", u"ஜே")
translated = translated.replace(u"ØÛ", u"ஜெ")
translated = translated.replace(u"É", u"ஜூ")
translated = translated.replace(u"h", u"ஜு")
translated = translated.replace(u"ý", u"ஜீ")
translated = translated.replace(u"÷", u"ஜி")
translated = translated.replace(u"ÛV", u"ஜா")
translated = translated.replace(u"Û", u"ஜ")
translated = translated.replace(u"Þ", u"ஞ்")
translated = translated.replace(u"ØQe", u"ஞௌ")
translated = translated.replace(u"¼QV", u"ஞோ")
translated = translated.replace(u"ØQV", u"ஞொ")
translated = translated.replace(u"çQ", u"ஞை")
translated = translated.replace(u"¼Q", u"ஞே")
translated = translated.replace(u"ØQ", u"ஞெ")
# translated = translated.replace(u"dd", u"ஞூ")
# translated = translated.replace(u"dd", u"ஞு")
# translated = translated.replace(u"dd", u"ஞீ")
# translated = translated.replace(u"dd", u"ஞி")
translated = translated.replace(u"QV", u"ஞா")
translated = translated.replace(u"Q", u"ஞ")
translated = translated.replace(u"â", u"ட்")
translated = translated.replace(u"ئe", u"டௌ")
translated = translated.replace(u"¼¦V", u"டோ")
translated = translated.replace(u"ئV", u"டொ")
translated = translated.replace(u"ç¦", u"டை")
translated = translated.replace(u"¼¦", u"டே")
translated = translated.replace(u"ئ", u"டெ")
translated = translated.replace(u"ù", u"டூ")
translated = translated.replace(u"|", u"டு")
translated = translated.replace(u"Ï", u"டீ")
translated = translated.replace(u"½", u"டி")
translated = translated.replace(u"¦V", u"டா")
translated = translated.replace(u"¦", u"ட")
translated = translated.replace(u"õ", u"ண்")
translated = translated.replace(u"Øðe", u"ணௌ")
translated = translated.replace(u"¼ðV", u"ணோ")
translated = translated.replace(u"ØðV", u"ணொ")
translated = translated.replace(u"çð", u"ணை")
translated = translated.replace(u"¼ð", u"ணே")
translated = translated.replace(u"Øð", u"ணெ")
translated = translated.replace(u"dd", u"ணூ")
translated = translated.replace(u"b", u"ணு")
translated = translated.replace(u"§", u"ணீ")
translated = translated.replace(u"è", u"ணி")
translated = translated.replace(u"ðV", u"ணா")
translated = translated.replace(u"ð", u"ண")
translated = translated.replace(u"Ý", u"த்")
translated = translated.replace(u"Ø>e", u"தௌ")
translated = translated.replace(u"¼>V", u"தோ")
translated = translated.replace(u"Ø>V", u"தொ")
translated = translated.replace(u"ç>", u"தை")
translated = translated.replace(u"¼>", u"தே")
translated = translated.replace(u"Ø>", u"தெ")
translated = translated.replace(u"#", u"தூ")
translated = translated.replace(u"m", u"து")
translated = translated.replace(u"y", u"தீ")
translated = translated.replace(u"]", u"தி")
translated = translated.replace(u">V", u"தா")
translated = translated.replace(u">", u"த")
translated = translated.replace(u"Í", u"ந்")
translated = translated.replace(u"Øåe", u"நௌ")
translated = translated.replace(u"¼åV", u"நோ")
translated = translated.replace(u"ØåV", u"நொ")
translated = translated.replace(u"çå", u"நை")
translated = translated.replace(u"¼å", u"நே")
translated = translated.replace(u"Øå", u"நெ")
translated = translated.replace(u"±", u"நூ")
translated = translated.replace(u"O", u"நு")
translated = translated.replace(u"À", u"நீ")
translated = translated.replace(u"W", u"நி")
translated = translated.replace(u"åV", u"நா")
translated = translated.replace(u"å", u"ந")
translated = translated.replace(u"[", u"ன்")
translated = translated.replace(u"تe", u"னௌ")
translated = translated.replace(u"¼ªV", u"னோ")
translated = translated.replace(u"تV", u"னொ")
translated = translated.replace(u"çª", u"னை")
translated = translated.replace(u"¼ª", u"னே")
translated = translated.replace(u"ت", u"னெ")
translated = translated.replace(u"û", u"னூ")
translated = translated.replace(u"Ð", u"னு")
translated = translated.replace(u"Ì", u"னீ")
translated = translated.replace(u"M", u"னி")
translated = translated.replace(u"ªV", u"னா")
translated = translated.replace(u"ª", u"ன")
translated = translated.replace(u"©", u"ப்")
translated = translated.replace(u"ØÃe", u"பௌ")
translated = translated.replace(u"¼ÃV", u"போ")
translated = translated.replace(u"ØÃV", u"பொ")
translated = translated.replace(u"çÃ", u"பை")
translated = translated.replace(u"¼Ã", u"பே")
translated = translated.replace(u"ØÃ", u"பெ")
translated = translated.replace(u"¯", u"பூ")
translated = translated.replace(u"A", u"பு")
translated = translated.replace(u"¬", u"பீ")
translated = translated.replace(u"¸", u"பி")
translated = translated.replace(u"ÃV", u"பா")
translated = translated.replace(u"Ã", u"ப")
translated = translated.replace(u"D", u"ம்")
translated = translated.replace(u"Ø\e", u"மௌ")
translated = translated.replace(u"¼\V", u"மோ")
translated = translated.replace(u"Ø\V", u"மொ")
translated = translated.replace(u"ç\\", u"மை")
translated = translated.replace(u"¼\\", u"மே")
translated = translated.replace(u"Ø\\", u"மெ")
translated = translated.replace(u"J", u"மூ")
translated = translated.replace(u"x", u"மு")
translated = translated.replace(u"*", u"மீ")
translated = translated.replace(u"t", u"மி")
translated = translated.replace(u"\V", u"மா")
translated = translated.replace(u"\\", u"ம")
translated = translated.replace(u"F", u"ய்")
translated = translated.replace(u"ØBe", u"யௌ")
translated = translated.replace(u"¼BV", u"யோ")
translated = translated.replace(u"ØBV", u"யொ")
translated = translated.replace(u"çB", u"யை")
translated = translated.replace(u"¼B", u"யே")
translated = translated.replace(u"ØB", u"யெ")
translated = translated.replace(u"R", u"யூ")
translated = translated.replace(u"¥", u"யு")
translated = translated.replace(u"X", u"யீ")
translated = translated.replace(u"l", u"யி")
translated = translated.replace(u"BV", u"யா")
translated = translated.replace(u"B", u"ய")
translated = translated.replace(u"ì", u"ர்")
translated = translated.replace(u"Ø«e", u"ரௌ")
translated = translated.replace(u"¼«V", u"ரோ")
translated = translated.replace(u"Ø«V", u"ரொ")
translated = translated.replace(u"ç«", u"ரை")
translated = translated.replace(u"¼«", u"ரே")
translated = translated.replace(u"Ø«", u"ரெ")
translated = translated.replace(u"Ô", u"ரூ")
translated = translated.replace(u"ò", u"ரு")
translated = translated.replace(u"Z", u"ரீ")
translated = translated.replace(u"ö", u"ரி")
translated = translated.replace(u"«V", u"ரா")
translated = translated.replace(u"«", u"ர")
translated = translated.replace(u"u", u"ற்")
translated = translated.replace(u"ØÅe", u"றௌ")
translated = translated.replace(u"¼ÅV", u"றோ")
translated = translated.replace(u"ØÅV", u"றொ")
translated = translated.replace(u"çÅ", u"றை")
translated = translated.replace(u"¼Å", u"றே")
translated = translated.replace(u"Ø«", u"ரெ")
translated = translated.replace(u"G", u"றூ")
translated = translated.replace(u"®", u"று")
translated = translated.replace(u"S", u"றீ")
translated = translated.replace(u"¤", u"றி")
translated = translated.replace(u"ÅV", u"றா")
translated = translated.replace(u"Å", u"ற")
translated = translated.replace(u"_", u"ல்")
translated = translated.replace(u"Øée", u"லௌ")
translated = translated.replace(u"¼éV", u"லோ")
translated = translated.replace(u"ØéV", u"லொ")
translated = translated.replace(u"çé", u"லை")
translated = translated.replace(u"¼é", u"லே")
translated = translated.replace(u"Øé", u"லெ")
translated = translated.replace(u"Ù", u"லூ")
translated = translated.replace(u"K", u"லு")
translated = translated.replace(u"Ü", u"லீ")
translated = translated.replace(u"o", u"லி")
translated = translated.replace(u"éV", u"லா")
translated = translated.replace(u"é", u"ல")
translated = translated.replace(u"^", u"ள்")
translated = translated.replace(u"Øee", u"ளௌ")
translated = translated.replace(u"¼eV", u"ளோ")
translated = translated.replace(u"ØeV", u"ளொ")
translated = translated.replace(u"çe", u"ளை")
translated = translated.replace(u"¼e", u"ளே")
translated = translated.replace(u"Øe", u"ளெ")
translated = translated.replace(u"j", u"ளூ")
translated = translated.replace(u"Ó", u"ளு")
translated = translated.replace(u"C", u"ளீ")
translated = translated.replace(u"¹", u"ளி")
translated = translated.replace(u"eV", u"ளா")
translated = translated.replace(u"e", u"ள")
translated = translated.replace(u"µ", u"ழ்")
translated = translated.replace(u"Øwe", u"ழௌ")
translated = translated.replace(u"¼wV", u"ழோ")
translated = translated.replace(u"ØwV", u"ழொ")
translated = translated.replace(u"çw", u"ழை")
translated = translated.replace(u"¼w", u"ழே")
translated = translated.replace(u"Øw", u"ழெ")
translated = translated.replace(u"ñ", u"ழூ")
translated = translated.replace(u"¿", u"ழு")
translated = translated.replace(u"Ñ", u"ழீ")
translated = translated.replace(u"a", u"ழி")
translated = translated.replace(u"wV", u"ழா")
translated = translated.replace(u"w", u"ழ")
translated = translated.replace(u"Ë", u"வ்")
translated = translated.replace(u"Øke", u"வௌ")
translated = translated.replace(u"¼kV", u"வோ")
translated = translated.replace(u"ØkV", u"வொ")
translated = translated.replace(u"çk", u"வை")
translated = translated.replace(u"¼k", u"வே")
translated = translated.replace(u"Øk", u"வெ")
translated = translated.replace(u"Æ", u"வூ")
translated = translated.replace(u"¡", u"வு")
translated = translated.replace(u"T", u"வீ")
translated = translated.replace(u"s", u"வி")
translated = translated.replace(u"kV", u"வா")
translated = translated.replace(u"k", u"வ")
translated = translated.replace(u"i", u"ஷ்")
translated = translated.replace(u"Øre", u"ஷௌ")
translated = translated.replace(u"¼rV", u"ஷோ")
translated = translated.replace(u"ØrV", u"ஷொ")
translated = translated.replace(u"çr", u"ஷை")
translated = translated.replace(u"¼r", u"ஷே")
translated = translated.replace(u"Ør", u"ஷெ")
translated = translated.replace(u"£", u"ஷூ")
translated = translated.replace(u"×", u"ஷு")
translated = translated.replace(u"U", u"ஷீ")
translated = translated.replace(u"´", u"ஷி")
translated = translated.replace(u"rV", u"ஷா")
translated = translated.replace(u"r", u"ஷ")
translated = translated.replace(u"ü", u"ஸ்")
translated = translated.replace(u"Øve", u"ஸௌ")
translated = translated.replace(u"¼vV", u"ஸோ")
translated = translated.replace(u"ØvV", u"ஸொ")
translated = translated.replace(u"çv", u"ஸை")
translated = translated.replace(u"¼v", u"ஸே")
translated = translated.replace(u"Øv", u"ஸெ")
translated = translated.replace(u"`", u"ஸூ")
translated = translated.replace(u"q", u"ஸு")
translated = translated.replace(u"¢", u"ஸீ")
translated = translated.replace(u"L", u"ஸி")
translated = translated.replace(u"vV", u"ஸா")
translated = translated.replace(u"v", u"ஸ")
translated = translated.replace(u"ã", u"ஹ்")
translated = translated.replace(u"ØÇe", u"ஹௌ")
translated = translated.replace(u"¼ÇV", u"ஹோ")
translated = translated.replace(u"ØÇV", u"ஹொ")
translated = translated.replace(u"çÇ", u"ஹை")
translated = translated.replace(u"¼Ç", u"ஹே")
translated = translated.replace(u"ØÇ", u"ஹெ")
translated = translated.replace(u"ê", u"ஹீ")
translated = translated.replace(u"N", u"ஹி")
translated = translated.replace(u"ÇV", u"ஹா")
translated = translated.replace(u"Ç", u"ஹ")
translated = translated.replace(u"Õ", u"க்ஷ்")
translated = translated.replace(u"سe", u"க்ஷௌ")
translated = translated.replace(u"¼³V", u"க்ஷோ")
translated = translated.replace(u"سV", u"க்ஷொ")
translated = translated.replace(u"ç³", u"க்ஷை")
translated = translated.replace(u"¼³", u"க்ஷே")
translated = translated.replace(u"س", u"க்ஷெ")
translated = translated.replace(u"f", u"க்ஷூ")
translated = translated.replace(u"Y", u"க்ஷு")
translated = translated.replace(u"È", u"க்ஷீ")
translated = translated.replace(u"H", u"க்ஷி")
translated = translated.replace(u"³V", u"க்ஷா")
translated = translated.replace(u"³", u"க்ஷ")
unconverted = [i for i in translated if i in source_text and not i in
punctuation+whitespace+digits]
translated_trimed = ''.join([i for i in translated if not i in unconverted])
if debug:
return unconverted
elif filtered:
return translated_trimed
else:
return translated