-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawlerTest3.py
47 lines (41 loc) · 1.51 KB
/
crawlerTest3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json
from urllib.parse import urlencode
import requests
from lxml import etree
import ssl
import urllib3
url = 'http://movie.douban.com/j/search_subjects'
header = {
"User-Agent": 'ozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': 'bid=JBee6dlJHQw',
'Host': 'movie.douban.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
parameters = {
'type':'movie',
'tag':'热门',
'page_limit':10,
'start':0
}
encodedParameters = urlencode(parameters)
newUrl = '{}?{}'.format(url,encodedParameters)
print(newUrl)
r1 = requests.get(newUrl,headers=header)
r1_decoded=r1.content.decode('utf-8')
print('get via requets:',r1_decoded);
with urllib3.PoolManager() as http:
response = http.urlopen('GET',url=newUrl,headers=header)
print(type(response))
print('status:',response.status)
print(response.data.decode('utf-8'))
jsonO = json.loads(response.data.decode('utf-8'))
print(jsonO['subjects'])
#ssl
# context = ssl._create_unverified_context()
# requests.get(url,headers=header,context=context)