-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmassgethtml.py
29 lines (27 loc) · 1.45 KB
/
massgethtml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
from requests import get
from mw_api_client import Wiki
from configuration import WIKI_ARTICLE_PATH, SCRAP_DIR, API_URL, USERAGENT, INDEX_HEAD, INDEX_BODY
wiki = Wiki(API_URL, USERAGENT)
pages = wiki.allpages()
indexslot = ''
for page in pages:
try:
generatedslot = '<tr><td class="px-6 py-4 whitespace-nowrap text-left text-sm font-medium">' + page.title + '</td><td class="px-6 py-4 whitespace-nowrap text-left text-sm font-medium"><a class="text-indigo-500 hover:text-indigo-600" href="' + page.title + '.html">' + 'Visit</a></td></tr>'
indexslot += generatedslot
filename1 = SCRAP_DIR + page.title
if filename1.count('/') > 1:
for directory in filename1.split('/'):
if not directory == '/':
path = SCRAP_DIR + directory
if not os.path.isdir(path):
os.mkdir(path)
filename = open(filename1 + '.html', 'w', encoding='utf-8')
articleurl = WIKI_ARTICLE_PATH + page.title + '?action=render'
filename.write(get(articleurl).text.replace('</div>', '<script src="https://snap-blocks.github.io/js/snapblocks.min.js"></script><script>snapblocks.renderMatching(`.blocks`, {});</script></div>'))
filename.close()
except Exception as err:
print("An exception occurred" + str(err))
index = open(SCRAP_DIR + '/index.html', 'w', encoding='utf-8')
index.write(INDEX_HEAD + indexslot + INDEX_BODY)
index.close()