-
Notifications
You must be signed in to change notification settings - Fork 0
/
WikiPage.py
260 lines (248 loc) · 7.27 KB
/
WikiPage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
from Formatting import TEMPLATES
languages = ('English', 'German', 'French', 'Dutch', 'Russian')
flags = ('EN' , 'DE' , 'FR' , 'NL' , 'RU' )
code = ('**', '[[', '`')
class Bunch:
def __init__(self, **kwds):
self.__dict__.update(kwds)
class WikiPage:
def __init__(self, fname):
self.main = fname
self.order = []
self.sections = {}
self.orders = {}
f = open(fname, 'r', encoding="utf-8")
self.text = f.readlines()
f.close()
cur = ''
for line in self.text:
if line.startswith('## '):
cur = line[3:].strip()
self.orders[cur] = []
continue
if not cur or not line.strip():
# skip lines before the first section
continue
# TODO: shitty condition!
if line[2] == '_':
for s in line[2:].split('; '):
z = s.strip()
if z.startswith('_') and z.endswith('_'):
z = z[1:-1]
self.addValue(cur, 'Terms', z)
continue
wrds = line.split(': ')
if len(wrds) > 1:
lhs = wrds[0].split('* ')[1]
rhs = ': '.join(wrds[1:]).strip()
# TODO: decide whether this is a temporary fix or a robustness invariant
if rhs.startswith('_') and rhs.endswith('_'):
rhs = rhs[1:-1]
if lhs == 'Publication':
e = Publication(rhs)
elif lhs == 'Definition':
e = MDText(rhs)
else:
e = Entry(rhs)
self.addValue(cur, lhs, e)
elif line.strip():
print('Strange line:', line)
def getValues(self, key1, key2):
if key1 not in self.sections.keys() or key2 not in self.sections[key1].keys():
return []
return self.sections[key1][key2]
def getKeys(self, key):
if key not in self.sections.keys():
return []
return self.orders[key]
def addValue(self, key1, key2, v):
if key1 not in self.sections.keys():
self.sections[key1] = {}
self.order.append(key1)
if key2 not in self.sections[key1].keys():
self.sections[key1][key2] = [v]
self.orders[key1].append(key2)
else:
self.sections[key1][key2].append(v)
def who(self):
return self.__class__.__name__
def validate(self):
lines = [x.strip() for x in self.text if x.strip()]
for line in str(self).split('\n'):
if not line:
continue
if line in lines:
lines.remove(line)
else:
print(' * The original is expected to have line "%s"' % line)
for line in lines:
print(' * The original has unmatched line "%s"' % line)
def getLanguages(self):
return sorted(self.sections.keys())
def getNames(self, lang):
return self.getValues(lang, 'Terms') + self.getValues(lang, 'Short')
def getKeywords(self):
kws = []
for lang in sorted(self.orders.keys()):
kws.append(lang)
kws.extend(self.getValues(lang, 'Terms'))
return kws
def getHtml(self, main):
s = TEMPLATES['pagehead'].format('; '.join(self.getKeywords()),\
main,\
self.main.split('.md')[0].replace(' ', '-'))
for lang in languages:
if lang not in self.sections.keys():
continue
# main loop
# TODO: do not hyperlink self-references
s += '<h2>{0}</h2>\n<ul><li>'.format(Flagged(lang))
# s += '<ul><li>%s</li>\n' % '; '.join(['<strong>%s</strong>' % s for s in self.sections[lang].terms])
ts = []
# for t in self.sections[lang].terms:
for t in self.getValues(lang, 'Terms'):
if t == main:
ts.append('<strong>{}</strong>'.format(t))
else:
ts.append(TEMPLATES['bilink'].format(t))
s += '; '.join(ts)
if 'Short' in self.getKeys(lang):
z = []
for short in self.getValues(lang, 'Short'):
if short == main or not short.text.isalnum():
z.append(short.getHtml())
else:
z.append(TEMPLATES['ilink'].format(short, short.getHtml()))
s += ' (%s)' % '; '.join(z)
s += '</li>\n'
for k in self.getKeys(lang):
for rhs in self.getValues(lang, k):
if k in ('Short', 'Terms'):
continue
elif k == 'Figure':
s += TEMPLATES['figure'].format(rhs, main)
elif k == 'Definition':
s += '<li class="def">{}</li>\n'.format(rhs.getHtml())
else:
s += '<li>{0}: {1}</li>'.format(k, rhs.getHtml())
s += '</ul>'
# Last updated: %s.<br/>
return s+TEMPLATES['footer']
def __str__(self):
s = ''
for lang in self.order:
s += '\n## %s\n* %s\n' % (lang, '; '.join(['_%s_' % s for s in self.getValues(lang, 'Terms')]))
for k in self.getKeys(lang):
if k == 'Terms':
continue
for v in self.getValues(lang, k):
s += '* %s: %s\n' % (k, v)
return s.strip()+'\n'
# Publication: [*Generalized multitext grammars*](http://dx.doi.org/10.3115/1218955.1219039)
class Publication:
def __init__(self, s):
self.title = s.split('[')[1].split(']')[0]
if self.title.startswith('*') and self.title.endswith('*'):
self.title = self.title[1:-1]
if self.title.startswith('_') and self.title.endswith('_'):
self.title = self.title[1:-1]
self.link = s.split('](')[1][:-1]
def who(self):
return self.__class__.__name__
def getHtml(self):
return TEMPLATES['pub'].format(self.link, self.title)
def __str__(self):
return '[*%s*](%s)' % (self.title, self.link)
# English: Wikipedia: http://en.wikipedia.org/wiki/Algebraic_data_type
class Entry:
def __init__(self, s):
self.text = s
def who(self):
return self.__class__.__name__
def getHtml(self):
if self.text.startswith('http://'):
return TEMPLATES['entry'].format(self.text)
elif self.text.startswith('`'):
return TEMPLATES['code'].format(self.text.split('`')[1])
else:
return self.text
def __str__(self):
return self.text
def __lt__(self, other):
return str(self) < str(other)
def __gt__(self, other):
return str(self) > str(other)
class MDText:
def __init__(self, s):
self.chunks = []
while s:
if s.startswith('**'):
j = s[2:].find('**')+2
self.chunks.append(MDBold(s[2:j]))
s = s[j+2:]
elif s.startswith('[['):
j = s.find(']]')
self.chunks.append(MDLink(s[2:j]))
s = s[j+2:]
elif s.startswith('`'):
j = s[1:].find('`')+1
self.chunks.append(MDCode(s[1:j]))
s = s[j+1:]
else:
seq = [x for x in [s.find(x) for x in code] if x != -1]
if len(seq) < 1:
j = len(s)
else:
j = min(seq)
self.chunks.append(MDBare(s[:j]))
s = s[j:]
def getHtml(self):
# present as HTML
return ''.join([x.getHtml() for x in self.chunks])
def __str__(self):
# present as Markdown
return ''.join(map(str, self.chunks))
# not good with nesting
class MDBold:
def __init__(self, s):
self.text = s
def getHtml(self):
return '<strong>%s</strong>' % self.text
def __str__(self):
return '**%s**' % self.text
# bar not yet implemented
class MDLink:
def __init__(self, s):
if s.find('|') < 0:
self.goal = self.text = s
else:
self.text, self.goal = s.split('|')
def getHtml(self):
return '<a href="%s.html">%s</a>' % (self.goal, self.text) # .capitalize()?
def __str__(self):
if self.goal == self.text:
return '[[%s]]' % self.text
else:
return '[[%s|%s]]' % (self.text, self.goal)
class MDCode:
def __init__(self, s):
self.text = s
def getHtml(self):
return '<code>%s</code>' % self.text
def __str__(self):
return '`%s`' % self.text
class MDBare:
def __init__(self, s):
self.text = s
def getHtml(self):
return self.text
def __str__(self):
return self.text
class Flagged:
def __init__(self, lang):
self.lang = lang
self.flag = flags[languages.index(self.lang)]
def __str__(self):
return '<img src="../www/%s.png" alt="%s"/> %s' % (self.flag, self.lang, self.lang)