-
Notifications
You must be signed in to change notification settings - Fork 0
/
jsonfix.py
142 lines (139 loc) · 5.61 KB
/
jsonfix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/python
def _extractString(data):
if data[0] not in "'\"": # This isn't a string
return "", 0
idx = 1
output = "\""
is_hex = False
prev_char = None
escaped_num = None
cur_char = data[0]
quote_char = data[0]
# While current character isn't a quote or
# at beginning or previous character is an escape slash
while 1:
if idx == len(data): # Truncated?
raise ValueError("Input ended early")
prev_char = cur_char
cur_char = data[idx]
if cur_char == quote_char and not prev_char == "\\": # We're at the end quote
output += "\""
break
if escaped_num is not None:
if cur_char.isdigit():
idx += 1
escaped_num += cur_char
else: # We're at the end of the escape, put it on the end of the output
output += "u" + ("0" * 4 + hex(int(escaped_num, 16 if is_hex else 8))[2:])[-4:]
escaped_num = None
continue
else:
if prev_char == "\\":
if cur_char in "xu": # Start of \x or \u escape
idx += 1
is_hex = True
escaped_num = ""
continue
elif cur_char.isdigit(): # Start of octal escape
escaped_num = ""
is_hex = False
continue
elif quote_char == "'" and cur_char == "'": # We don't need to escape ' now
output = output[:-1] # Get rid of the previous slash
if quote_char == "'" and cur_char == "\"": # Need to escape "s now
output += "\\"
output += cur_char
idx += 1
return output, idx
def fixJSON(js):
i = 0
output = ""
might_be_a_key = False
brace_stack = []
while i < len(js):
if js[i] in "'\"": # Found a string
a, b = _extractString(js[i:])
output += a
i += b + 1
else:
if js[i] in "{}[]": # Brace of somesort
if js[i] in "{[":
brace_stack.append(js[i])
elif js[i] in "]}":
if js[i] != {"[": "]", "{": "}"}[brace_stack.pop()]:
raise ValueError("Brace mismatch (char %d)" % i)
if js[i] == "{": # Start of a hash, whatever comes next might be a key
might_be_a_key = True
output += js[i]
i += 1
elif might_be_a_key and js[i].isalpha() or js[i] in "_": # Might be a key without quotes
if js[i] == ",":
output += ","
i += 1
keystr = ""
while js[i] != ":":
if js[i].isalnum() or js[i] in "_":
if keystr and js[i - 1] == " ": # Can't have spaces in keys
raise ValueError("Key has a space (char %d)" % (i - 1))
keystr += js[i]
elif js[i] != " ": # Some invalid character it seems
raise ValueError("Invalid character (char %d)" % i)
i += 1
if i == len(js): # Truncated?
raise ValueError("Input ended early")
keystr = keystr.strip()
output += "\"%s\":" % keystr.strip()
i += 1
might_be_a_key = False
elif js[i: i + 4] in ["null", "true"]: # These are valid
output += js[i:i + 4]
i += 4
elif js[i: i + 5] == "false": # And this
output += "false"
i += 5
elif js[i].isdigit(): # We're a number.
numstr = ""
while js[i] not in "],:} ":
if js[i] not in "0123456789abcdefx." and js[i] != "\n":
raise ValueError("Not a number")
numstr += js[i]
i += 1
if i == len(js):
raise ValueError("Input ended early")
base = 10
if numstr[:2] == "0x": # We're hex
base = 16
numstr = numstr[2:]
elif numstr[0] == "0": # We're octal
base = 8
if base == 10 and not all([x.isdigit() for x in numstr]):
numstr = str(float(numstr))
else:
numstr = str(int(numstr, base))
if might_be_a_key and brace_stack[-1] != "[": # We're not in a list
output += "\"%s\"" % numstr
else:
output += numstr
elif js[i] == ",": # End of a value
if brace_stack[-1] == "[": # In a list
j = i - 1
while js[j] == " ":
j -= 1
if js[j] in "[,":
output += "null"
if js[i + 1:].strip()[0] == "]": # List ends with a , so skip it
i += js[i + 1:].index("]")
else:
output += ","
else: # Not a list, so next value might be a key
output += ","
might_be_a_key = True
i += 1
else: # Only some : and spaces should get here
if js[i] == ":":
might_be_a_key = False
output += js[i]
i += 1
if len(brace_stack) != 0:
raise ValueError("Missing brace")
return output