forked from icpc-jag/atcoder-clar2slack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_clar.py
46 lines (38 loc) · 1.73 KB
/
read_clar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import json
import re
import sys
SAVE_PATH = sys.argv[2]
data = []
with open(sys.argv[1], encoding="utf-8") as f:
tbody = re.findall(r"<tbody>.*?</tbody>", f.read(), re.S)
if not tbody:
print("<tbody> not found")
else:
assert len(tbody) == 1
rows = re.findall(r"<tr.*?</tr>", tbody[0], re.S)
for row in rows:
columns = re.findall(r"<td.*?</td>", row, re.S)
title = re.sub(r"\s+", " ", columns[0])[4:-5].strip()
if title == "":
title = "(指定なし)"
else:
m = re.search(r"<a href=\"(.*)\">(.*)</a>", title)
title = "<%s|%s>" % ("https://atcoder.jp" + m.group(1), m.group(2).strip())
m = re.search(r"<a href=\"(.*)\" .*\"><span.*>(.*)</span></a>.*<a.*", re.sub(r"\s+", " ", columns[1]))
user_name = "<%s|%s>" % ("https://atcoder.jp" + m.group(1), m.group(2))
assert re.fullmatch(r"<td>.*</td>", columns[2], re.S)
assert re.fullmatch(r"<td>.*</td>", columns[3], re.S)
question = re.sub(r"<.*?>", "", columns[2][55:-11]).replace("'", "'")
response = re.sub(r"<.*?>", "", columns[3][55:-11]).replace("'", "'")
public = re.sub(r"<.*?>", "", columns[4][4:-5])
update_url = "https://atcoder.jp" + re.search(r"<a href=\"(.*?)\">", columns[7]).group(1)
data.append({
"title": title,
"user_name": user_name,
"question": question,
"response": response,
"public": public,
"update_url": update_url,
})
with open(SAVE_PATH, "w", encoding="utf8") as f:
f.write(json.dumps(data, ensure_ascii=False))