-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmd2html.py
175 lines (161 loc) · 5.6 KB
/
md2html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import mistune, os
renderer = mistune.Renderer(use_xhtml=True)
markdown = mistune.Markdown(renderer=renderer)
def md2dsl(ifile, ofile, folder):
slebokroot, purename = ifile.split(folder)
purename = cleanup_name(purename)
with open(ifile, 'r', encoding='utf-8') as myinput:
data = myinput.read()
title = data.split('\n')[0].strip()
data = expand_before(data, slebokroot)
while title.startswith('#'):
title = title[1:]
with open(ofile, 'w', encoding='utf-8') as myoutput:
myoutput.write('''<?xml version="1.0" encoding="UTF-8"?>
<path css="../www" img="../www"/>
<html doctype>
<head viewport title="SLEBoK: Software Language Engineering Body of Knowledge - {}">
<body>
<div style="text-align:center;"><a href="http://slebok.github.io">Software Language Engineering Body of Knowledge</a>: <strong><a href="index.html">{}</a></strong></div>
<hr/>
<div class="flr edit">
<a href="https://github.com/slebok/slebok/edit/master/{}/{}">Edit @ the SLEBoK repo</a>
</div>'''.format(title, folder, folder, purename))
myoutput.write(expand_after(markdown(data), slebokroot))
myoutput.write('''
<div class="last">
<br/><hr/>
The page is generated from <a href="https://github.com/slebok/slebok/blob/master/{}/{}">{}</a>.<br/>
The website is maintained by <a href="http://grammarware.github.io/">Dr. Vadim Zaytsev</a> a.k.a. @<a href="http://grammarware.net/">grammarware</a>.
Last updated: #LASTMOD#.
<valid/>
</div>
</body>
</html>'''.format(folder, purename, purename))
return title.strip()
def expand_before(txt, slebokroot):
# $[Guy Steele](bibtex:person/Guy L Steele Jr)$
txtsliced = txt.split('$[')
if len(txtsliced) > 1:
txt = txtsliced[0]
for part in txtsliced[1:]:
before_dollar = part.split('$')[0]
after_dollar = part[len(before_dollar) + 1:]
if before_dollar.endswith(')') and before_dollar.find('](') > -1:
linktext = before_dollar.split('](')[0]
target = normalise_link(before_dollar.split('](')[-1].split(')')[0])
txt += '[{}]({})'.format(linktext, target) + after_dollar
elif before_dollar.endswith(']'):
linktext = normalise_text(before_dollar[:-1])
target = normalise_link(before_dollar[:-1])
txt += '[{}]({})'.format(linktext, target) + after_dollar
else:
# not slebokode anyway
txt += '$[' + part
# terms/tools/... linking
txt = link_terms_tools_etc(txt, slebokroot)
return txt
def expand_after(txt, slebokroot):
txt = identify_local_links(txt)
txt = colour_code(txt)
txt = slebokode_mark(txt)
return txt
def normalise_link(link):
if link.startswith('bibtex:'):
arg = link[7:]
return 'http://bibtex.github.io/' + arg + '.html'
# ...
return link
# Clean up links like 'bibtex:person/Guy_L_Steele_Jr'
def normalise_text(link):
return link.split('/')[-1].split(':')[-1].replace('_', ' ')
# Colour up local (= SLEBoK-friendly) links
def identify_local_links(txt):
# implicitly local links
bylink = txt.split('href="')
txt = bylink[0]
for link in bylink[1:]:
if link.startswith('http://') or link.startswith('https://') or link.startswith('ftp://'):
txt += 'href="' + link
else:
txt += 'class="local" href="' + link
# explicit links to SLEBoK projects
for http in 'http', 'https':
for friend in 'bibtex.github.io', 'slebok.github.io':
link = 'href="{}://{}'.format(http, friend)
txt = txt.replace('<a ' + link, '<a class="local" ' + link)
return txt
# Simple SLEBoK-specific markdown extensions
def slebokode_mark(txt):
txt = txt.replace('$/', '<em>').replace('/$', '</em>')
txt = txt.replace('$*', '<strong>').replace('*$', '</strong>')
txt = txt.replace('$_', '<sub>').replace('_$', '</sub>')
txt = txt.replace('$~', '<span class="over">').replace('~$', '</span>')
txt = txt.replace('-->', '→').replace('<--', '←')
txt = txt.replace('|->', ':→') # TODO: mapsto!
txt = txt.replace('$$', '$')
return txt
def link_terms_tools_etc(txt, slebokroot):
print('SLEBoK ROOT is ' + slebokroot)
parts = txt.split('$@')
if len(parts) == 1:
return txt
txt = parts[0]
for part in parts[1:]:
first = part.split('@')[0]
rest = part[len(first) + 1:]
if rest.startswith('$'):
# just $@...@$
second = first
rest = rest[1:]
else:
second = rest.split('$')[0]
rest = rest[len(second) + 1:]
if os.path.exists(os.path.join(slebokroot, 'terms/' + second + '.md')):
txt += '[{}](../terms/{}.md)'.format(first, second) + rest
elif os.path.exists(os.path.join(slebokroot, 'tools/' + second + '.md')):
txt += '[{}](../tools/{}.md)'.format(first, second) + rest
else:
txt += '<span class="miss">{}</span>'.format(first) + rest
return txt
def cleanup_name(name):
if name.startswith('\\') or name.startswith('/'):
name = name[1:]
if name.endswith('\\') or name.endswith('/'):
name = name[:-1]
return name
def colour_code(txt):
CCL = '<code class="lang-'
parts = txt.split(CCL)
if len(parts) == 1:
return txt
txt = parts[0]
for part in parts[1:]:
language = part.split('"')[0]
proc = determine_language(language)
if proc is None:
txt += CCL + part
continue
rest = part[len(language) + 2:]
code = rest.split('</code>')[0]
rest = rest[len(code):]
done = []
for line in code.split('\n'):
done.append(proc(line))
txt += CCL + language + '">' + '\n'.join(done) + rest
return txt
def determine_language(language):
if language == 'prolog':
return colour_prolog
return None
def colour_prolog(line):
# comments
pos = line.find('%')
if pos > -1:
line = line[:pos] + '<span class="code-cmt">' + line[pos:] + '</span>'
# define, cut, etc
for kw in '!', ':-':
line = line.replace(kw, '<span class="code-kw">' + kw + '</span>')
return line