This repository was archived by the owner on Apr 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshreelipi_to_unicode.py
353 lines (325 loc) · 17.5 KB
/
shreelipi_to_unicode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# -*- coding: utf-8 -*-
from string import punctuation, whitespace, digits
def convertTounicode(source_text, filtered=False, debug=False):
translated = source_text
translated = translated.replace(u"A", u"அ")
translated = translated.replace(u"B", u"ஆ")
translated = translated.replace(u"C", u"இ")
translated = translated.replace(u"D", u"ஈ")
translated = translated.replace(u"E", u"உ")
translated = translated.replace(u"F", u"ஊ")
translated = translated.replace(u"G", u"எ")
translated = translated.replace(u"H", u"ஏ")
translated = translated.replace(u"I", u"ஐ")
translated = translated.replace(u"J", u"ஒ")
translated = translated.replace(u"K", u"ஓ")
translated = translated.replace(u"JÍ", u"ஔ")
translated = translated.replace(u"ஃ", u"ஃ")
translated = translated.replace(u"K®", u"ௐ")
translated = translated.replace(u"ÿ", u"ஸ்ரீ")
translated = translated.replace(u"U", u"க்")
translated = translated.replace(u"öPÍ", u"கௌ")
translated = translated.replace(u"@Põ", u"கோ")
translated = translated.replace(u"öPõ", u"கொ")
translated = translated.replace(u"øP", u"கை")
translated = translated.replace(u"@P", u"கே")
translated = translated.replace(u"öP", u"கெ")
translated = translated.replace(u"T", u"கூ")
translated = translated.replace(u"S", u"கு")
translated = translated.replace(u"R", u"கீ")
translated = translated.replace(u"Q", u"கி")
translated = translated.replace(u"Põ", u"கா")
translated = translated.replace(u"P", u"க")
translated = translated.replace(u"[", u"ங்")
translated = translated.replace(u"öVÍ", u"ஙௌ")
translated = translated.replace(u"@Võ", u"ஙோ")
translated = translated.replace(u"öVõ", u"ஙொ")
translated = translated.replace(u"øV", u"ஙை")
translated = translated.replace(u"@V", u"ஙே")
translated = translated.replace(u"öV", u"ஙெ")
translated = translated.replace(u"Z", u"ஙூ")
translated = translated.replace(u"Y", u"ஙு")
translated = translated.replace(u"X", u"ஙீ")
translated = translated.replace(u"W", u"ஙி")
translated = translated.replace(u"Võ", u"ஙா")
translated = translated.replace(u"V", u"ங")
translated = translated.replace(u"a", u"ச்")
translated = translated.replace(u"öŒÍ", u"சௌ")
translated = translated.replace(u"@Œõ", u"சோ")
translated = translated.replace(u"öŒõ", u"சொ")
translated = translated.replace(u"øŒ", u"சை")
translated = translated.replace(u"@Œ", u"சே")
translated = translated.replace(u"öŒ", u"செ")
translated = translated.replace(u"‹", u"சூ")
translated = translated.replace(u"”", u"சு")
translated = translated.replace(u"^", u"சீ")
translated = translated.replace(u"]", u"சி")
translated = translated.replace(u"Œõ", u"சா")
translated = translated.replace(u"Œ", u"ச")
translated = translated.replace(u"ä", u"ஜ்")
translated = translated.replace(u"öáÍ", u"ஜௌ")
translated = translated.replace(u"@áõ", u"ஜோ")
translated = translated.replace(u"öáõ", u"ஜொ")
translated = translated.replace(u"øá", u"ஜை")
translated = translated.replace(u"@á", u"ஜே")
translated = translated.replace(u"öá", u"ஜெ")
translated = translated.replace(u"á„", u"ஜூ")
translated = translated.replace(u"áú", u"ஜு")
translated = translated.replace(u"ã", u"ஜீ")
translated = translated.replace(u"â", u"ஜி")
translated = translated.replace(u"áõ", u"ஜா")
translated = translated.replace(u"á", u"ஜ")
translated = translated.replace(u"g", u"ஞ்")
translated = translated.replace(u"öbÍ", u"ஞௌ")
translated = translated.replace(u"@bõ", u"ஞோ")
translated = translated.replace(u"öbõ", u"ஞொ")
translated = translated.replace(u"øb", u"ஞை")
translated = translated.replace(u"@b", u"ஞே")
translated = translated.replace(u"öb", u"ஞெ")
translated = translated.replace(u"f", u"ஞூ")
translated = translated.replace(u"e", u"ஞு")
translated = translated.replace(u"d", u"ஞீ")
translated = translated.replace(u"c", u"ஞி")
translated = translated.replace(u"bõ", u"ஞா")
translated = translated.replace(u"b", u"ஞ")
translated = translated.replace(u"m", u"ட்")
translated = translated.replace(u"öhÍ", u"டௌ")
translated = translated.replace(u"@hõ", u"டோ")
translated = translated.replace(u"öhõ", u"டொ")
translated = translated.replace(u"øh", u"டை")
translated = translated.replace(u"@h", u"டே")
translated = translated.replace(u"öh", u"டெ")
translated = translated.replace(u"l", u"டூ")
translated = translated.replace(u"k", u"டு")
translated = translated.replace(u"j", u"டீ")
translated = translated.replace(u"i", u"டி")
translated = translated.replace(u"hõ", u"டா")
translated = translated.replace(u"h", u"ட")
translated = translated.replace(u"s", u"ண்")
translated = translated.replace(u"önÍ", u"ணௌ")
translated = translated.replace(u"@nõ", u"ணோ")
translated = translated.replace(u"önõ", u"ணொ")
translated = translated.replace(u"øn", u"ணை")
translated = translated.replace(u"@n", u"ணே")
translated = translated.replace(u"ön", u"ணெ")
translated = translated.replace(u"r", u"ணூ")
translated = translated.replace(u"q", u"ணு")
translated = translated.replace(u"p", u"ணீ")
translated = translated.replace(u"o", u"ணி")
translated = translated.replace(u"nõ", u"ணா")
translated = translated.replace(u"n", u"ண")
translated = translated.replace(u"z", u"த்")
translated = translated.replace(u"öuÍ", u"தௌ")
translated = translated.replace(u"@uõ", u"தோ")
translated = translated.replace(u"öuõ", u"தொ")
translated = translated.replace(u"øu", u"தை")
translated = translated.replace(u"@u", u"தே")
translated = translated.replace(u"öu", u"தெ")
translated = translated.replace(u"y", u"தூ")
translated = translated.replace(u"x", u"து")
translated = translated.replace(u"w", u"தீ")
translated = translated.replace(u"v", u"தி")
translated = translated.replace(u"uõ", u"தா")
translated = translated.replace(u"u", u"த")
translated = translated.replace(u"¢", u"ந்")
translated = translated.replace(u"ö|Í", u"நௌ")
translated = translated.replace(u"@|õ", u"நோ")
translated = translated.replace(u"ö|õ", u"நொ")
translated = translated.replace(u"ø|", u"நை")
translated = translated.replace(u"@|", u"நே")
translated = translated.replace(u"ö|", u"நெ")
translated = translated.replace(u"¡", u"நூ")
translated = translated.replace(u"~", u"நு")
translated = translated.replace(u"}", u"நீ")
translated = translated.replace(u"{", u"நி")
translated = translated.replace(u"|õ", u"நா")
translated = translated.replace(u"|", u"ந")
translated = translated.replace(u"ß", u"ன்")
translated = translated.replace(u"öÚÍ", u"னௌ")
translated = translated.replace(u"@Úõ", u"னோ")
translated = translated.replace(u"öÚõ", u"னொ")
translated = translated.replace(u"øÚ", u"னை")
translated = translated.replace(u"@Ú", u"னே")
translated = translated.replace(u"öÚ", u"னெ")
translated = translated.replace(u"Ù", u"னூ")
translated = translated.replace(u"ˆ", u"னு")
translated = translated.replace(u"Ü", u"னீ")
translated = translated.replace(u"Û", u"னி")
translated = translated.replace(u"Úõ", u"னா")
translated = translated.replace(u"Ú", u"ன")
translated = translated.replace(u"¨", u"ப்")
translated = translated.replace(u"ö£Í", u"பௌ")
translated = translated.replace(u"@£õ", u"போ")
translated = translated.replace(u"ö£õ", u"பொ")
translated = translated.replace(u"ø£", u"பை")
translated = translated.replace(u"@£", u"பே")
translated = translated.replace(u"ö£", u"பெ")
translated = translated.replace(u"§", u"பூ")
translated = translated.replace(u">", u"பு")
translated = translated.replace(u"¥", u"பீ")
translated = translated.replace(u"$", u"பி")
translated = translated.replace(u"£õ", u"பா")
translated = translated.replace(u"£", u"ப")
translated = translated.replace(u"®", u"ம்")
translated = translated.replace(u"ö©Í", u"மௌ")
translated = translated.replace(u"@©õ", u"மோ")
translated = translated.replace(u"ö©õ", u"மொ")
translated = translated.replace(u"ø©", u"மை")
translated = translated.replace(u"@©", u"மே")
translated = translated.replace(u"ö©", u"மெ")
translated = translated.replace(u"‰", u"மூ")
translated = translated.replace(u"•", u"மு")
translated = translated.replace(u"«", u"மீ")
translated = translated.replace(u"ª", u"மி")
translated = translated.replace(u"©õ", u"மா")
translated = translated.replace(u"©", u"ம")
translated = translated.replace(u"#", u"ய்")
translated = translated.replace(u"ö¯Í", u"யௌ")
translated = translated.replace(u"@¯õ", u"யோ")
translated = translated.replace(u"ö¯õ", u"யொ")
translated = translated.replace(u"ø¯", u"யை")
translated = translated.replace(u"@¯", u"யே")
translated = translated.replace(u"ö¯", u"யெ")
translated = translated.replace(u"N", u"யூ")
translated = translated.replace(u"M", u"யு")
translated = translated.replace(u"±", u"யீ")
translated = translated.replace(u"°", u"யி")
translated = translated.replace(u"¯õ", u"யா")
translated = translated.replace(u"¯", u"ய")
translated = translated.replace(u"º", u"ர்")
translated = translated.replace(u"öµÍ", u"ரௌ")
translated = translated.replace(u"@µõ", u"ரோ")
translated = translated.replace(u"öµõ", u"ரொ")
translated = translated.replace(u"øµ", u"ரை")
translated = translated.replace(u"@µ", u"ரே")
translated = translated.replace(u"öµ", u"ரெ")
translated = translated.replace(u"O", u"ரூ")
translated = translated.replace(u"¸", u"ரு")
translated = translated.replace(u"Ÿ", u"ரீ")
translated = translated.replace(u"›", u"ரி")
translated = translated.replace(u"µõ", u"ரா")
translated = translated.replace(u"µ", u"ர")
translated = translated.replace(u"Ø", u"ற்")
translated = translated.replace(u"öÓõ", u"றௌ")
translated = translated.replace(u"@Óõ", u"றோ")
translated = translated.replace(u"öÓõ", u"றொ")
translated = translated.replace(u"øÓ", u"றை")
translated = translated.replace(u"@Ó", u"றே")
translated = translated.replace(u"öµ", u"ரெ")
translated = translated.replace(u"–", u"றூ")
translated = translated.replace(u"Ö", u"று")
translated = translated.replace(u"Õ", u"றீ")
translated = translated.replace(u"Ô", u"றி")
translated = translated.replace(u"Óõ", u"றா")
translated = translated.replace(u"Ó", u"ற")
translated = translated.replace(u"À", u"ல்")
translated = translated.replace(u"ö»Í", u"லௌ")
translated = translated.replace(u"@»õ", u"லோ")
translated = translated.replace(u"ö»õ", u"லொ")
translated = translated.replace(u"ø»", u"லை")
translated = translated.replace(u"@»", u"லே")
translated = translated.replace(u"ö»", u"லெ")
translated = translated.replace(u"¿", u"லூ")
translated = translated.replace(u"˜", u"லு")
translated = translated.replace(u"—", u"லீ")
translated = translated.replace(u"t", u"லி")
translated = translated.replace(u"»õ", u"லா")
translated = translated.replace(u"»", u"ல")
translated = translated.replace(u"Ò", u"ள்")
translated = translated.replace(u"öÍÍ", u"ளௌ")
translated = translated.replace(u"@Íõ", u"ளோ")
translated = translated.replace(u"öÍõ", u"ளொ")
translated = translated.replace(u"øÍ", u"ளை")
translated = translated.replace(u"@Í", u"ளே")
translated = translated.replace(u"öÍ", u"ளெ")
translated = translated.replace(u"Ñ", u"ளூ")
translated = translated.replace(u"™", u"ளு")
translated = translated.replace(u"Ï", u"ளீ")
translated = translated.replace(u"Î", u"ளி")
translated = translated.replace(u"Íõ", u"ளா")
translated = translated.replace(u"Í", u"ள")
translated = translated.replace(u"Ì", u"ழ்")
translated = translated.replace(u"öÇÍ", u"ழௌ")
translated = translated.replace(u"@Çõ", u"ழோ")
translated = translated.replace(u"öÇõ", u"ழொ")
translated = translated.replace(u"øÇ", u"ழை")
translated = translated.replace(u"@Ç", u"ழே")
translated = translated.replace(u"öÇ", u"ழெ")
translated = translated.replace(u"Ë", u"ழூ")
translated = translated.replace(u"Ê", u"ழு")
translated = translated.replace(u"É", u"ழீ")
translated = translated.replace(u"È", u"ழி")
translated = translated.replace(u"Çõ", u"ழா")
translated = translated.replace(u"Ç", u"ழ")
translated = translated.replace(u"Æ", u"வ்")
translated = translated.replace(u"öÁÍ", u"வௌ")
translated = translated.replace(u"@Áõ", u"வோ")
translated = translated.replace(u"öÁõ", u"வொ")
translated = translated.replace(u"øÁ", u"வை")
translated = translated.replace(u"@Á", u"வே")
translated = translated.replace(u"öÁ", u"வெ")
translated = translated.replace(u"Å", u"வூ")
translated = translated.replace(u"Ä", u"வு")
translated = translated.replace(u"Ã", u"வீ")
translated = translated.replace(u"Â", u"வி")
translated = translated.replace(u"Áõ", u"வா")
translated = translated.replace(u"Á", u"வ")
translated = translated.replace(u"è", u"ஷ்")
translated = translated.replace(u"öåÍ", u"ஷௌ")
translated = translated.replace(u"@åõ", u"ஷோ")
translated = translated.replace(u"öåõ", u"ஷொ")
translated = translated.replace(u"øå", u"ஷை")
translated = translated.replace(u"@å", u"ஷே")
translated = translated.replace(u"öå", u"ஷெ")
translated = translated.replace(u"å„", u"ஷூ")
translated = translated.replace(u"åû", u"ஷு")
translated = translated.replace(u"ç", u"ஷீ")
translated = translated.replace(u"æ", u"ஷி")
translated = translated.replace(u"åõ", u"ஷா")
translated = translated.replace(u"å", u"ஷ")
translated = translated.replace(u"ì", u"ஸ்")
translated = translated.replace(u"öéÍ", u"ஸௌ")
translated = translated.replace(u"@éõ", u"ஸோ")
translated = translated.replace(u"öéõ", u"ஸொ")
translated = translated.replace(u"øé", u"ஸை")
translated = translated.replace(u"@é", u"ஸே")
translated = translated.replace(u"öé", u"ஸெ")
translated = translated.replace(u"é„", u"ஸூ")
translated = translated.replace(u"éú", u"ஸு")
translated = translated.replace(u"ë", u"ஸீ")
translated = translated.replace(u"ê", u"ஸி")
translated = translated.replace(u"éõ", u"ஸா")
translated = translated.replace(u"é", u"ஸ")
translated = translated.replace(u"à", u"ஹ்")
translated = translated.replace(u"öíÍ", u"ஹௌ")
translated = translated.replace(u"@íõ", u"ஹோ")
translated = translated.replace(u"öíõ", u"ஹொ")
translated = translated.replace(u"øí", u"ஹை")
translated = translated.replace(u"@í", u"ஹே")
translated = translated.replace(u"öí", u"ஹெ")
translated = translated.replace(u"ï", u"ஹீ")
translated = translated.replace(u"î", u"ஹி")
translated = translated.replace(u"ஹா", u"ஹா")
translated = translated.replace(u"ஹ", u"ஹ")
translated = translated.replace(u"க்ஷ்", u"க்ஷ்")
translated = translated.replace(u"க்ஷௌ", u"க்ஷௌ")
translated = translated.replace(u"க்ஷோ", u"க்ஷோ")
translated = translated.replace(u"க்ஷொ", u"க்ஷொ")
translated = translated.replace(u"க்ஷை", u"க்ஷை")
translated = translated.replace(u"க்ஷே", u"க்ஷே")
translated = translated.replace(u"க்ஷெ", u"க்ஷெ")
translated = translated.replace(u"க்ஷூ", u"க்ஷூ")
translated = translated.replace(u"க்ஷு", u"க்ஷு")
translated = translated.replace(u"க்ஷீ", u"க்ஷீ")
translated = translated.replace(u"க்ஷி", u"க்ஷி")
translated = translated.replace(u"க்ஷா", u"க்ஷா")
translated = translated.replace(u"க்ஷ", u"க்ஷ")
unconverted = [i for i in translated if i in source_text and not i in
punctuation+whitespace+digits]
translated_trimed = ''.join([i for i in translated if not i in unconverted])
if debug:
return ''.join(unconverted)
elif filtered:
return translated_trimed
else:
return translated