-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild.py
executable file
·331 lines (263 loc) · 10.6 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#!/usr/bin/env python3
"""Tool to build the HTML source for the statgen.org website.
The following dependencies are required:
* yaml (to parse the website configuration page)
* bibtexparser (to parse the BibTeX files for Dubé's publications)
* markdown (to convert Markdown files to the HTML format)
* jinja2 (to help with the HTML templating)
To execute the script, perform the following command (where `site.yaml` is the
website's YAML configuration file):
python3 build.py site.yaml
Note
----
This script only works on Python version 3.3 and higher.
"""
import os
import re
import sys
import shutil
import argparse
from glob import glob
import yaml
import bibtexparser
from markdown import Markdown
from jinja2 import Environment, FileSystemLoader
class Site(object):
LANGUAGES = ("en", "fr")
def __init__(self, site_root="/", static_dir="static", source_dir="source",
build_dir="build", template_dir="templates", bibtex_dir=None):
"""Initiate a new website.
Args:
site_root (str): the website root
static_dir (str): the directory that will contain the static files.
source_dir (str): the name of the directory containing the source.
build_dir (str): the name of the build directory.
template_dir (str): the name of the templates directory.
bibtex_dir (str): the name of the BibTeX directory.
"""
# The directories
self.site_root = site_root.rstrip("/")
self.static_dir = static_dir
self.source_dir = source_dir
self.build_dir = build_dir
self.template_dir = template_dir
self.bibtex_dir = bibtex_dir
# Overwrite the build directory.
if os.path.isdir(self.build_dir):
r = input("Delete entire directory '{}'? ".format(self.build_dir))
if r.upper() not in {"Y", "YES"}:
print("Stopped")
sys.exit(0)
shutil.rmtree(self.build_dir)
os.mkdir(self.build_dir)
# Creating the language subdirectories
for language in self.LANGUAGES:
os.mkdir(os.path.join(self.build_dir, language))
# Copying the directory inside the template directory
for path in os.listdir(self.template_dir):
full_path = os.path.join(self.template_dir, path)
if os.path.isdir(full_path):
shutil.copytree(
full_path,
os.path.join(self.build_dir, self.static_dir, path),
)
# Prepare the jinja2 environment for the templates
self.template_env = Environment(
loader=FileSystemLoader(self.template_dir)
)
# Prepare the jinja2 environment for the Markdown source files
self.markdown_env = Environment(
loader=FileSystemLoader(self.source_dir),
)
# The pages
self.pages = []
self.navigation = []
def add_page(self, title, file_prefix, **kwargs):
"""Add a page to the website.
Args:
title (dict): the title of the page.
file_prefix (str): the prefix of the markdown file.
Note
----
Site should be bilingual, hence the file_prefix is only the prefix,
and both `_en.mkd` and `_fr.mkd` will be appended to it to get the
input markdown file. The generated file will be put in both `en`
and `fr` directory of the build directory, respectively.
"""
# The page information
page_info = dict(
title=title,
curr_url=file_prefix + ".html",
filename=os.path.join(self.build_dir, "{language}",
file_prefix + ".html"),
template=self.template_env.get_template(
kwargs.get("template", "default.html"),
),
)
# Reading the page content (both languages)
for language in self.LANGUAGES:
# Getting the markdown template
content = self.markdown_env.get_template(
file_prefix + "_{}.mkd".format(language),
)
# Rendering the markdown template
content = content.render(
static_url=self.site_root + "/" + self.static_dir,
lang_root=self.site_root + "/" + language,
)
# Converting to HTML
page_info["content_" + language] = Markdown().convert(content)
# If publications, we need something fancy
if kwargs.get("bibtex", False) and self.bibtex_dir:
page_info["bibtex"] = self.parse_bibtex()
# Adding to the web site
self.pages.append(page_info)
# Adding the navigation if required
is_nav = kwargs.get("navigation", False)
if is_nav:
self.navigation.append(dict(
title=title,
url=file_prefix + ".html",
))
def discover(self, directory, title, **kwargs):
"""Automatically discover Markdown files and add them.
Args:
directory (str): the name of the directory to find Markdown files.
title (dict): the title of the pages to add.
"""
# Finding the files
filenames = os.path.join(self.source_dir, directory, "*_en.mkd")
for filename in glob(filenames):
# Creating the page information
file_prefix = directory + "/" + os.path.basename(filename).replace(
"_en.mkd",
"",
)
page_info = dict(
title=title,
curr_url=file_prefix + ".html",
filename=os.path.join(self.build_dir, "{language}",
file_prefix + ".html"),
template=self.template_env.get_template(
kwargs.get("template", "default.html"),
),
)
for language in self.LANGUAGES:
# Getting the Markdown template
content = self.markdown_env.get_template(
file_prefix + "_{}.mkd".format(language),
)
# Rendering the markdown template
content = content.render(
static_url=self.site_root + "/" + self.static_dir,
lang_root=self.site_root + "/" + language,
)
# Converting to HTML
page_info["content_" + language] = Markdown().convert(content)
# Adding the page
self.pages.append(page_info)
def generate(self):
"""Generates the website."""
for page in self.pages:
template = page["template"]
for language in self.LANGUAGES:
content = template.render(
language=language,
title=page["title"][language],
content=page["content_" + language],
static_url=self.site_root + "/" + self.static_dir,
site_root=self.site_root,
lang_root=self.site_root + "/" + language,
navigation=self.navigation,
bibtex=page.get("bibtex", None),
curr_url=page["curr_url"],
)
# Saving the content
fn = page["filename"].format(language=language)
if not os.path.isdir(os.path.dirname(fn)):
os.makedirs(os.path.dirname(fn))
with open(fn, "w") as o_file:
o_file.write(content)
def parse_bibtex(self):
"""Parses all *.bib files inside a directory.
Returns:
dict: a dictionary containing years as keys, and
bibtexparser.bibdatabase.BibDatabase as values.
Warning
-------
The names of the file should always be `YEAR.bib`.
"""
bib = []
# The regular expression for the "and" between the authors
and_re = re.compile(r"\s+and\s+")
dash_re = re.compile(r"-+")
# Getting the BibTeX files
for fn in glob(os.path.join(self.bibtex_dir, "*.bib")):
year = int(os.path.basename(fn).split(".")[0])
pubs = []
with open(fn, "r") as i_file:
pubs = [
entries for entries in bibtexparser.load(i_file).entries
]
# Some formatting
for i in range(len(pubs)):
# Adding a dot to the title, if required
if not pubs[i]["title"].endswith("."):
pubs[i]["title"] += "."
# Adding a dot to the authors, if required
if not pubs[i]["author"].endswith("."):
pubs[i]["author"] += "."
# Replacing the in between author "and"
authors = and_re.split(pubs[i]["author"])
if len(authors) > 30:
authors = ", ".join(authors[:30]) + " et al."
else:
authors = ", ".join(authors[:-1]) + " and " + authors[-1]
pubs[i]["author"] = authors
# Replacing '--' with '-'
pubs[i]["pages"] = dash_re.sub("-", pubs[i].get("pages", ""))
# Adding the pubmed identification number
pubs[i]["pmid"] = int(pubs[i]["ID"].replace("pmid", ""))
# Saving
bib.append((year, pubs))
# Sorting
bib.sort(reverse=True, key=lambda pub: pub[0])
return bib
def main():
"""The main."""
# Parsing the arguments and YAML configuration file
args = parse_args()
conf = parse_yaml(args.yaml_conf)
# Creating the new site
website = Site(**conf["configuration"])
# Adding the pages
for page in conf["pages"]:
website.add_page(**page)
# Discovering the pages
for discover in conf["autodiscover"]:
website.discover(**discover)
# Generating the website
website.generate()
def parse_yaml(fn):
"""Parses YAML configuration from file.
Args:
fn (str): the name of the YAML configuration file.
Returns:
dict: the dictionary containing the configuration (returned by YAML).
"""
conf = None
with open(fn, "r") as i_file:
conf = yaml.load(i_file)
return conf
def parse_args():
parser = argparse.ArgumentParser(
description="Parses the configuration file and generates the website",
)
parser.add_argument(
"yaml_conf",
metavar="YAML",
help="YAML configuration file.",
)
return parser.parse_args()
if __name__ == "__main__":
main()