-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.py
executable file
·152 lines (118 loc) · 3.96 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#! /usr/bin/env python
import argparse
from pathlib import Path
import shutil
import re
import secrets
import time
import jinja2
import pandas as pd
from skrub import datasets as skrub_data
from sklearn import datasets as sklearn_data
from skrub import TableReport
NAV_LINKS = [("Demo", "index.html"), ("Examples", "examples/index.html")]
def write(text, path):
path.write_text(bust(text), encoding="utf-8")
@jinja2.pass_context
def relative(context, path):
current_page = context["current_page"]
depth = len(current_page.split("/")) - 1
parts = [".."] * depth + path.split("/")
return "/".join(parts)
def get_jinja_env():
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(
[REPO, REPO / "_includes"],
encoding="UTF-8",
),
autoescape=True,
)
env.filters["relative"] = relative
env.globals = {
"nav_links": NAV_LINKS,
}
return env
def bust(text):
return re.sub(r"\?__skrub_[a-zA-Z0-9]+__", f"?__skrub_{VERSION}__", text)
def get_datasets():
AMES_HOUSING_CSV = (
"https://www.openml.org/data/get_csv/20649135/file2ed11cebe25.arff"
)
datasets = [("AMES Housing", (lambda: pd.read_csv(AMES_HOUSING_CSV)))]
skrub_dataset_names = [
"employee_salaries",
"medical_charge",
"traffic_violations",
"drug_directory",
]
for name in skrub_dataset_names:
def fetch(name=name):
return getattr(skrub_data, f"fetch_{name}")().X
datasets.append((name, fetch))
sklearn_dataset_names = ["titanic"]
for name in sklearn_dataset_names:
def fetch(name=name):
return sklearn_data.fetch_openml(
name, as_frame=True, parser="auto", version=1
).frame
datasets.append((name, fetch))
return datasets
def add_report(name, fetcher):
if ARGS.no_reports:
html = '<div class="report-placeholder">report</div>'
elapsed = 0
else:
df = fetcher()
print(f"making report for {name}")
pretty_name = name.replace("_", " ").capitalize()
start = time.time()
html = TableReport(df, title=pretty_name).html_snippet()
elapsed = time.time() - start
print(f"{name} took {elapsed:.2f}s")
report_template = ENV.get_template("example-report.html")
current_page = f"examples/{name}.html"
html = report_template.render(
nav_links=NAV_LINKS + [(name, current_page)],
current_page=current_page,
report=html,
time=elapsed,
)
write(html, EXAMPLES_DIR / f"{name}.html")
def build_examples():
datasets = get_datasets()
for name, fetcher in datasets:
add_report(name, fetcher)
examples_index = ENV.get_template("examples-index.html")
html = examples_index.render(
report_names=[name for name, _ in datasets], current_page="examples/index.html"
)
write(html, EXAMPLES_DIR / "index.html")
def build_pages():
all_pages = REPO.glob("*.html")
for page in all_pages:
template = ENV.get_template(page.name)
rendered = template.render(current_page=f"{page.name}")
write(rendered, BUILD_DIR / page.name)
for ext in ["css", "js", "svg"]:
for file_path in REPO.glob(f"*.{ext}"):
text = file_path.read_text("utf-8")
write(text, BUILD_DIR / file_path.name)
for asset in REPO.glob(f"*.whl"):
shutil.copyfile(asset, BUILD_DIR / asset.name)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--no_reports", help="skip building example reports", action="store_true"
)
ARGS = parser.parse_args()
REPO = Path(__file__).parent.resolve()
BUILD_DIR = REPO / "_build"
if BUILD_DIR.is_dir():
shutil.rmtree(BUILD_DIR)
BUILD_DIR.mkdir()
EXAMPLES_DIR = BUILD_DIR / "examples"
EXAMPLES_DIR.mkdir()
VERSION = secrets.token_hex()[:4]
ENV = get_jinja_env()
build_pages()
build_examples()