-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
77 lines (66 loc) · 2.96 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*- coding: gbk -*-
import os
from BeautifulSoup import BeautifulSoup
from pprint import pprint
import requests
from urlparse import urljoin #网址分割
import time
#需要分析的html目录
dirname = r'F:\20130107\1\1月合集'
#torrent存放目录
save_dir = r'./1/'
for root,dirs,files in os.walk( dirname ):
for fn in files:
filepath = root + "\\" + fn
print filepath
file_handler = open(filepath) #中文
soup = BeautifulSoup(file_handler,fromEncoding="gb18030")
torrent_url = soup.findAll('a')
for html_list in torrent_url:
html_list_href = html_list.get('href')
if '/file.php' in html_list_href: #判断字符是否在内
print html_list_href
try:
time.sleep (1)
torrent_get = requests.get(html_list_href)
except requests.RequestException:
continue
except requests.ConnectionError:
continue
print len(torrent_get.text)
if len(torrent_get.text) < 100:
print "没这个种子\r\n\r\n"
continue
torrent_sonp = BeautifulSoup(torrent_get.content)
torrent_name = torrent_sonp.find("input", id='name')['value']
torrent_id = torrent_sonp.find("input", id='id')['value']
print torrent_id, torrent_name
torrent_headers = {'referer': html_list_href, \
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; .NET CLR 1.1.4322; PeoplePal 6.2)'}
post_data = {'type': 'torrent', 'id': torrent_id, 'name':torrent_name}
#提取下载链接
torrent_post_url = urljoin(html_list_href, '../down.php')
torrent_filename = save_dir + torrent_name + '.torrent'
if os.path.isfile(torrent_filename):
print '文件已存在\r\n'
continue
print torrent_filename
try:
time.sleep (1)
torrent_data = requests.post(torrent_post_url,
data=post_data,
headers=torrent_headers,
timeout=200)
except Exception, e:
print '有错误。2'
continue
except requests.RequestException:
continue
except requests.ConnectionError:
continue
torrent_f = open(torrent_filename, 'wb')
torrent_f.write(torrent_data.content)
torrent_f.close()
file_handler.close()
#s = file_handler.readlines()
#print s