-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdebug_encoding.py
executable file
·139 lines (102 loc) · 3.38 KB
/
debug_encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
import argparse
import curses
import re
import sys
from _builtinencodings import encodings
from _curses_helpers import KEY_SPACEBAR, Window, re_nonascii
HELP = """
View the given file in all encodings supported by python, while highlighting all
non-ascii characters. This tool can be used to manually figure out, which
encoding should be used to decode a file. Pass the original file, which has not
been touched by recode_language.py and friends.
Displays 3 encodings side-by-side. If two encodings produce the same output,
only the first one is shown. Navigate with LEFT/RIGHT, remove the middle
candidate using SPACE.
Quit by pressing 'q'.
"""
class Navigator:
def __init__(self, items):
self.last = Window(1, 15, 1, 55)
self.now = Window(1, 15, 1 + self.last.width + 2, 55)
self.next = Window(1, 15, 1 + self.last.width + self.now.width + 4, 55)
self.pointer = 0
self.items = [[i[0], i[1]] for i in items]
def go_left(self):
self.pointer -= 1
self.pointer = max(self.pointer, 0)
self.refresh()
def go_right(self):
self.pointer += 1
self.pointer = min(self.pointer, len(self.items) - 1)
self.refresh()
def remove(self):
if len(self.items) > 1:
del self.items[self.pointer]
self.pointer = min(self.pointer, len(self.items) - 1)
@property
def item_last(self):
if self.pointer == 0:
return "", "", None
return self.items[self.pointer - 1]
@property
def item_now(self):
return self.items[self.pointer]
@property
def item_next(self):
if self.pointer == len(self.items) - 1:
return "", "", None
return self.items[self.pointer + 1]
def refresh(self):
self.last.set_title(self.item_last[0])
self.last.set_text(self.item_last[1])
self.last.refresh()
self.now.set_title(self.item_now[0])
self.now.set_text(self.item_now[1])
self.now.refresh()
self.next.set_title(self.item_next[0])
self.next.set_text(self.item_next[1])
self.next.refresh()
def variants(content):
seen = set()
for enc in encodings:
try:
text = content.decode(enc)
except:
continue
if text in seen:
continue
if "#" not in text:
continue
seen.add(text)
text = text.replace("\0", "") # null bytes confuse ncurses
demo_lines = [l for l in text.splitlines() if re_nonascii.search(l)]
yield enc, "\n".join(demo_lines)
def run(stdscr, path):
curses.start_color()
curses.use_default_colors()
curses.init_pair(3, curses.COLOR_BLUE, -1) # non-ascii char
curses.curs_set(0)
stdscr.refresh()
with open(path, "rb") as f:
content = f.read()
n = Navigator(list(variants(content)))
n.refresh()
while True:
c = stdscr.getch()
if c == curses.KEY_LEFT:
n.go_left()
elif c == curses.KEY_RIGHT:
n.go_right()
elif c == KEY_SPACEBAR:
n.remove()
elif c == ord("q"):
break
n.refresh()
def main(argv):
parser = argparse.ArgumentParser(description=HELP)
parser.add_argument("file")
args = parser.parse_args(argv)
curses.wrapper(run, args.file)
if __name__ == "__main__":
main(sys.argv[1:])