-
-
Notifications
You must be signed in to change notification settings - Fork 41
/
archive.py
executable file
·222 lines (162 loc) · 5.62 KB
/
archive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python3
"""
This is the main program for the Zulip archive system. For help:
python archive.py -h
Note that this actual file mostly does the following:
parse command line arguments
check some settings from settings.py
complain if you haven't made certain directories
The actual work is done in two main libraries:
lib/html.py
lib/populate.py
"""
# The workflow (timing for the leanprover Zulip chat, on my slow laptop):
# - populate_all() builds a json file in `settings.json_directory` for each topic,
# containing message data and an index json file mapping streams to their topics.
# This uses the Zulip API and takes ~10 minutes to crawl the whole chat.
# - populate_incremental() assumes there is already a json cache and collects only new messages.
# - build_website() builds the webstie
# - See hosting.md for suggestions on hosting.
#
import sys
if sys.version_info < (3, 6):
version_error = " Python version must be 3.6 or higher\n\
Your current version of python is {}.{}\n\
Please try again with python3.".format(
sys.version_info.major, sys.version_info.minor
)
raise Exception(version_error)
import argparse
import configparser
import os
import zulip
from lib.common import stream_validator, exit_immediately
# Most of the heavy lifting is done by the following modules:
from lib.populate import populate_all, populate_incremental
from lib.website import build_website
from lib.sitemap import build_sitemap
try:
import settings
except ModuleNotFoundError:
# TODO: Add better instructions.
exit_immediately(
"""
We can't find settings.py.
Please copy default_settings.py to settings.py
and then edit the settings.py file to fit your use case.
For testing, you can often leave the default settings,
but you will still want to review them first.
"""
)
NO_JSON_DIR_ERROR_WRITE = """
We cannot find a place to write JSON files.
Please run the below command:
mkdir {}"""
NO_JSON_DIR_ERROR_READ = """
We cannot find a place to read JSON files.
Please run the below command:
mkdir {}
And then fetch the JSON:
python archive.py -t"""
NO_HTML_DIR_ERROR = """
We cannot find a place to write HTML files.
Please run the below command:
mkdir {}"""
def get_json_directory(for_writing):
json_dir = settings.json_directory
if not json_dir.exists():
# I use posix paths here, since even on Windows folks will
# probably be using some kinda Unix-y shell to run mkdir.
if for_writing:
error_msg = NO_JSON_DIR_ERROR_WRITE.format(json_dir.as_posix())
else:
error_msg = NO_JSON_DIR_ERROR_READ.format(json_dir.as_posix())
exit_immediately(error_msg)
if not json_dir.is_dir():
exit_immediately(str(json_dir) + " needs to be a directory")
return settings.json_directory
def get_html_directory():
html_dir = settings.html_directory
if not html_dir.exists():
error_msg = NO_HTML_DIR_ERROR.format(html_dir.as_posix())
exit_immediately(error_msg)
if not html_dir.is_dir():
exit_immediately(str(html_dir) + " needs to be a directory")
return settings.html_directory
def get_client_info():
config_file = "./zuliprc"
client = zulip.Client(config_file=config_file)
# It would be convenient if the Zulip client object
# had a `site` field, but instead I just re-read the file
# directly to get it.
config = configparser.RawConfigParser()
config.read(config_file)
zulip_url = config.get("api", "site")
return client, zulip_url
def run():
parser = argparse.ArgumentParser(
description="Build an html archive of the Zulip chat."
)
parser.add_argument(
"-b", action="store_true", default=False, help="Build .md files"
)
parser.add_argument(
"--no-sitemap",
action="store_true",
default=False,
help="Don't build sitemap files",
)
parser.add_argument(
"-t", action="store_true", default=False, help="Make a clean json archive"
)
parser.add_argument(
"-i",
action="store_true",
default=False,
help="Incrementally update the json archive",
)
results = parser.parse_args()
if results.t and results.i:
print("Cannot perform both a total and incremental update. Use -t or -i.")
exit(1)
if not (results.t or results.i or results.b):
print("\nERROR!\n\nYou have not specified any work to do.\n")
parser.print_help()
exit(1)
json_root = get_json_directory(for_writing=results.t)
# The directory where this archive.py is located
repo_root = os.path.dirname(os.path.realpath(__file__))
if results.b:
md_root = get_html_directory()
if results.t or results.i:
is_valid_stream_name = stream_validator(settings)
client, zulip_url = get_client_info()
if results.t:
populate_all(
client,
json_root,
is_valid_stream_name,
)
elif results.i:
populate_incremental(
client,
json_root,
is_valid_stream_name,
)
if results.b:
build_website(
json_root,
md_root,
settings.site_url,
settings.html_root,
settings.title,
zulip_url,
settings.zulip_icon_url,
repo_root,
settings.page_head_html,
settings.page_footer_html,
)
if not results.no_sitemap:
build_sitemap(settings.site_url, md_root.as_posix(), md_root.as_posix())
if __name__ == "__main__":
run()