-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSpider_baidu_picture.py
48 lines (38 loc) · 1.36 KB
/
Spider_baidu_picture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/python
#-*- coding:utf-8 -*
import re
import requests
import os
import datetime
cur_path = os.path.abspath(os.curdir)
def getPic(html,keyword):
picture_url = re.findall('"objURL":"(.*?)",',html,re.S)
i = 0
golbal_path = cur_path + '/'+ keyword + '/'
if not os.path.exists(golbal_path):
os.mkdir(golbal_path)
print 'Start downloading images now...'
for everypic in picture_url:
print 'downloading'+str(i+1)+',The url of this picture:'+str(everypic)
#print(str(datetime.datetime.now()))
try:
pic= requests.get(everypic)
except requests.exceptions.ConnectionError:
print '【error】Current image cannot be downloaded'
continue
except requests.exceptions.Timeout:
print '【error】timeout'
continue
string = golbal_path + keyword+'_'+ str(datetime.datetime.now()) + '.jpg'
#print (string)
fp = open(string,'wb')
fp.write(pic.content)
fp.close()
i += 1
if __name__ == '__main__':
input_word = raw_input("Input key word: ")
urls = ["http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word="+input_word+"&pn={}".format(str(i)) for i in range(0,81,20)]
for i in range(len(urls)):
#print(urls[i])
re_txt = requests.get(urls[i])
getPic(re_txt.text,input_word)