-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathextract_forthel.py
69 lines (56 loc) · 1.63 KB
/
extract_forthel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
import json
forthel_segments = []
segment = []
in_forthel = False
for line in sys.stdin:
line = line.strip()
if line.startswith('%'):
pass
elif line:
forthel_segments.append(line)
elif line == '\\begin{forthel}':
in_forthel = True
elif line == '\\end{forthel}':
in_forthel = False
forthel_segments.append(' '.join(segment))
segment = []
elif line.startswith('%'): ## to be kept
pass
elif line.startswith('[timelimit'): ## to be kept
pass
elif line.startswith('\\begin{'): ## to be kept
pass
elif line.startswith('\\end{'): ## to be kept
pass
elif in_forthel:
segment.append(line)
else:
pass
def indexed(i):
return '$\\INDEXEDTERM{' + str(i) + '}$'
math_dict = {}
counter = 1
analysed_segments = []
for segment in forthel_segments:
asegment = []
segment = segment.strip()
math_in = 0 if segment and segment[0] == '$' else 1
print(math_in)
parts = [s for part in segment.split('$$')
for s in part.split('$')]
for i in range(len(parts)):
if math_in and i % 2 == 1:
asegment.append(indexed(counter))
math_dict[counter] = parts[i]
counter += 1
else:
asegment.append(parts[i])
asegment = ' '.join(asegment)
asegment = asegment.split('.')
asegment = [s + '.' for s in asegment if s]
analysed_segments.extend(asegment)
for a in analysed_segments:
print(a)
with open('TERMINDEX.json', 'w') as file:
json.dump(math_dict, file, ensure_ascii=False, indent=2)