-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcreate_notebook_table.py
76 lines (62 loc) · 2.8 KB
/
create_notebook_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
GITHUB_PATH_PREFIX = "lewtun/dl4phys/blob/main/"
CHAPTER_TO_NB = {
"1 - Jet tagging with neural networks": "lecture01",
"2 - Gradient descent": "lecture02",
"3 - Neural network deep dive": "lecture03",
"4 - Jet images and transfer learning with CNNs": "lecture04",
"5 - Convolutional neural networks": "lecture05",
"6 - Generating hep-ph titles with Transformers": "lecture06"
}
def _find_text_in_file(filename, start_prompt, end_prompt):
"""
Find the text in `filename` between a line beginning with `start_prompt` and before `end_prompt`, removing empty
lines.
Copied from: https://github.com/huggingface/transformers/blob/16f0b7d72c6d4e122957392c342b074aa2c5c519/utils/check_table.py#L30
"""
with open(filename, "r", encoding="utf-8", newline="\n") as f:
lines = f.readlines()
# Find the start prompt.
start_index = 0
while not lines[start_index].startswith(start_prompt):
start_index += 1
start_index += 1
end_index = start_index
while not lines[end_index].startswith(end_prompt):
end_index += 1
end_index -= 1
while len(lines[start_index]) <= 1:
start_index += 1
while len(lines[end_index]) <= 1:
end_index -= 1
end_index += 1
return "".join(lines[start_index:end_index]), start_index, end_index, lines
def create_table():
data = {"Lecture": [], "Colab": [], "Kaggle": [], "Gradient": [], "Studio Lab": []}
for title, nb in CHAPTER_TO_NB.items():
nb_path = f"{GITHUB_PATH_PREFIX}{nb}.ipynb"
data["Lecture"].append(title)
data["Colab"].append(
f"[](https://colab.research.google.com/github/{nb_path})"
)
data["Kaggle"].append(
f"[](https://kaggle.com/kernels/welcome?src=https://github.com/{nb_path})"
)
data["Gradient"].append(
f"[](https://console.paperspace.com/github/{nb_path})"
)
data["Studio Lab"].append(
f"[](https://studiolab.sagemaker.aws/import/github/{nb_path})"
)
return pd.DataFrame(data).to_markdown(index=False) + "\n"
def main():
table = create_table()
_, start_index, end_index, lines = _find_text_in_file(
filename="intro.md",
start_prompt="<!--This table is automatically generated, do not fill manually!-->",
end_prompt="<!--End of table-->",
)
with open("intro.md", "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines[:start_index] + [table] + lines[end_index:])
if __name__ == "__main__":
main()