forked from Mocha-Pudding/DPspider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecrypt.py
68 lines (63 loc) · 2.4 KB
/
decrypt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#coding:utf-8
from settings import *
from bs4.element import Tag
from util.tools import from_pattern
from util.decrypt import _clean,_find_css,\
_find_head,_get_str_svg,_get_num_svg
class Decrypter(object):
def __init__(self,shopId=None):
self.shopId = shopId
self.svg = None
self._str_svg = None
self._num_svg = None
def decrypt(self,soup,cls_dict,css_dict,pattern='.*',comment=False):
'''
soup:加密的标签
cls_dict,css_dict:解析css文件得到的解密字典。
pattern:解密后的正则匹配模式,会匹配解密后的文本,将符合正则的内容返回
comment:当前标签是否属于点评评论标签。有些不是属于评论内容的标签不用置True
'''
_contents = soup.contents
_ = []
while _contents:
i = _contents.pop(0)
if isinstance(i, Tag):
if i.name in DECRYPT_TAGS:
if i['class'][0] in IGNORED_SPAN_CLASS:
continue
if i['class'][0] == 'item':
i_contents = i.contents
for j in reversed(i_contents):
_contents.insert(0,j)
continue
i = self._get_decrypted(i,cls_dict,css_dict,comment)
elif not isinstance(i, str):
continue
_.append(i)
text = _clean(_)
return from_pattern(pattern, text)
def _get_decrypted(self,element,tag_dict,css_dict,comment=False):
cls = element['class'][0]
f,url = _find_head(cls,tag_dict)
_css = _find_css(cls,css_dict)
if f and _css:
if not comment:
svg = {
'e':self._str_svg,
'd':self._num_svg,
}[element.name]
if svg is None:
svg = eval(DECRYPT_TAGS[element.name.strip()])(url)
{
'e': self._str_svg,
'd': self._num_svg,
}[element.name] = svg
else:
if self.svg is None:
self.svg = _get_str_svg(url)
svg = self.svg
for y,_str in svg.items():
if _css['y'] > int(y):
continue
x = int(_css['x']/f)
return _str[x]