-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathsymbols.py
87 lines (81 loc) · 2.21 KB
/
symbols.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
symbols
"""
import re
import html
# Resolve symbols and unescape any numeric character references
def resolveSymbols(text):
# h = html.parser.HTMLParser()
textSplit = re.split("(&\#x?[0-9a-f]{2,6};)", text, flags=re.IGNORECASE)
text2 = ""
for t in textSplit:
if t == "":
text2 = text2 + t
elif (t[0:2] == "&#") & (t[-1] == ";"):
text2 = text2 + html.unescape(t)
else:
text2 = text2 + t
# Replace certain entity references with actual characters
replacementRules = [
("=", "="),
("<", chr(236)),
(">", chr(237)),
("≤", "≤"),
("≥", "≥"),
("≈", "≈"),
("Δ", "Δ"),
("δ", "δ"),
("∼", "∼"),
(" ", chr(160)),
(";", ";"),
(":", ":"),
(",", ","),
("&", "&"),
("←", "←"),
("→", "→"),
("↑", "↑"),
("↓", "↓"),
("↔", "↔"),
("↕", "↕"),
("↖", "↖"),
("↗", "↗"),
("↙", "↙"),
("↘", "↘"),
("[", r"\["),
("]", r"\]"),
("∞", "∞"),
("ä", "ä"),
("Ä", "Ä"),
("ü", "ü"),
("Ü", "Ü"),
("ö", "ö"),
("Ö", "Ö"),
("ß", "ß"),
("€", "€"),
("✓", "✓"),
("…", "…"),
("×", "×"),
("%", "%"),
("÷", "÷"),
("∀", "∀"),
("∃", "∃"),
("λ", "λ"),
("μ", "μ"),
("ν", "ν"),
("π", "π"),
("ρ", "ρ"),
("‐", "-"),
("\`", chr(235)),
("`", chr(235)),
(""", "\""),
("“", u"\u201C"),
("”", u"\u201D"),
("'", "'"),
("‘", u"\u2018"),
("’", u"\u2019"),
("Ø",u"\u00D8"),
("ø",u"\u00F8"),
]
for term, replacement in replacementRules:
text2 = text2.replace(term, replacement)
return text2