forked from sheepzh/poetry
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjintian_net.py
44 lines (29 loc) · 1.06 KB
/
jintian_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""
爬去今天的指定页面
"""
items = []
from bs4 import BeautifulSoup as soup
from util import Profile, write_poem
base_url = 'http://www.jintian.net'
import requests
def parse(item, page=1):
url = base_url + item + '-page-' + str(page)
response = requests.get(url)
response.encoding = 'utf-8'
html_text = response.text.replace(u'<br>', '\n').replace('<BR>', '\n').replace(u'</P>', '\n</P>').replace(u'</p>', '\n</p>')
html = soup(html_text, 'lxml')
title = html.find('h1', id='articletitle').text
author = html.find('font', color='#996666').text[3:]
article = html.find('div', id='articlebody').text
total = html.find('span', class_='xspace-totlerecord')
if total is not None:
total = int(total.text)
if(total > page):
article += '\r\n\r\n\r\n\r\n\r\n' + parse(item, page + 1)
write_poem(Profile(title=title, author=author, href=item), article)
if page == 1:
print(article)
return article
# parse('/today/?action-viewnews-itemid-2469')
for item in items:
parse(item)