forked from ddnet/ddnet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunicode.py
35 lines (30 loc) · 852 Bytes
/
unicode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import csv
def confusables():
with open('confusables.txt', encoding='utf-8-sig') as f:
# Filter comments
f = map(lambda line: line.split('#')[0], f)
return list(csv.DictReader(f, fieldnames=['Value', 'Target', 'Category'], delimiter=';'))
UNICODEDATA_FIELDS = (
"Value",
"Name",
"General_Category",
"Canonical_Combining_Class",
"Bidi_Class",
"Decomposition_Type",
"Decomposition_Mapping",
"Numeric_Type",
"Numeric_Mapping",
"Bidi_Mirrored",
"Unicode_1_Name",
"ISO_Comment",
"Simple_Uppercase_Mapping",
"Simple_Lowercase_Mapping",
"Simple_Titlecase_Mapping",
)
def data():
with open('UnicodeData.txt', encoding='utf-8') as f:
return list(csv.DictReader(f, fieldnames=UNICODEDATA_FIELDS, delimiter=';'))
def unhex(s):
return int(s, 16)
def unhex_sequence(s):
return [unhex(x) for x in s.split()] if '<' not in s else None