Skip to content

Commit

Permalink
Merge pull request #25 from wuyue92tree/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
wuyue92tree authored Oct 18, 2018
2 parents e7addf1 + a0aab1d commit 6346270
Show file tree
Hide file tree
Showing 9 changed files with 299 additions and 28 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Crwy

[![PyPI Version](https://img.shields.io/pypi/v/Crwy.svg)](https://pypi.python.org/pypi/Crwy)
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.1.2)](https://travis-ci.org/wuyue92tree/crwy)
[![Build Status](https://travis-ci.org/wuyue92tree/crwy.svg?branch=1.1.3)](https://travis-ci.org/wuyue92tree/crwy)

# 简介

Expand Down Expand Up @@ -34,7 +34,7 @@ pip install crwy
```

or
前往下载: https://pypi.python.org/pypi/Crwy/1.1.2/
前往下载: https://pypi.python.org/pypi/Crwy/1.1.3/

# 使用手册

Expand Down
2 changes: 1 addition & 1 deletion crwy/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.2
1.1.3
12 changes: 4 additions & 8 deletions crwy/utils/extend/xunma.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"""


from __future__ import print_function
from __future__ import print_function, unicode_literals

from crwy.spider import Spider
from crwy.exceptions import CrwyExtendException
Expand All @@ -38,7 +38,7 @@ def login(self):
password=self.password)
res = self.html_downloader.download(url)

return res.content.strip().split("&")[0]
return res.text.strip().split("&")[0]
except Exception as e:
raise CrwyExtendException(e)

Expand All @@ -58,11 +58,7 @@ def get_phone(self, token, phone_type='', phone=''):
phone_type=phone_type, phone=phone)

res = self.html_downloader.download(url)
# if 'success' not in res.content:
# raise MfExtendException("[XunMa] get phone failed.")
#
# # print(res.content)
return res.content.strip().split(';')[0]
return res.text.strip().split(';')[0]

except Exception as e:
raise CrwyExtendException(e)
Expand All @@ -82,7 +78,7 @@ def get_message(self, token, phone):
item_id=self.item_id, phone=phone)
res = self.html_downloader.download(url)

return res.content.strip().split('&')[-1]
return res.text.strip().split('&')[-1]

except Exception as e:
raise CrwyExtendException(e)
Expand Down
20 changes: 10 additions & 10 deletions crwy/utils/extend/yima.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
这一行开始写关于本文件的说明与解释
"""

from __future__ import print_function
from __future__ import print_function, unicode_literals

from crwy.spider import Spider
from crwy.exceptions import CrwyException
Expand Down Expand Up @@ -39,10 +39,10 @@ def login(self):
password=self.password)
res = self.html_downloader.download(url)

if 'success' not in res.content:
if 'success' not in res.text:
raise CrwyException("[YiMa] Login failed.")

return res.content.strip().split("|")[-1]
return res.text.strip().split("|")[-1]
except Exception as e:
raise CrwyException(e)

Expand All @@ -65,11 +65,11 @@ def get_phone(self, token, phone_type='',
phone_type=phone_type, phone=phone)

res = self.html_downloader.download(url)
if 'success' not in res.content:
if 'success' not in res.text:
raise CrwyException("[YiMa] get phone failed.")

# print(res.content)
return res.content.strip().split('|')[-1]
# print(res.text)
return res.text.strip().split('|')[-1]

except Exception as e:
raise CrwyException(e)
Expand All @@ -88,11 +88,11 @@ def get_message(self, token, phone):
phone=phone)
res = self.html_downloader.download(url)

if 'success' not in res.content:
if 'success' not in res.text:
raise CrwyException("[YiMa] get message failed.")

else:
return res.content.strip().split('|')[-1]
return res.text.strip().split('|')[-1]

except Exception as e:
raise CrwyException(e)
Expand All @@ -105,7 +105,7 @@ def release_phone(self, token, phone):
phone=phone)
res = self.html_downloader.download(url)

if 'success' not in res.content:
if 'success' not in res.text:
raise CrwyException("[YiMa] release phone failed.")

except Exception as e:
Expand All @@ -119,7 +119,7 @@ def add_black(self, token, phone):
phone=phone)
res = self.html_downloader.download(url)

if 'success' not in res.content:
if 'success' not in res.text:
raise CrwyException("[YiMa] black phone failed.")

except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion crwy/utils/html/font_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def analysis(self, is_clean=True, debug=False):

self.save_font()
self.get_font_xml()
with open(self.xml_path, 'r') as xml:
with open(self.xml_path, 'rb') as xml:
soups = self.html_parser.parser(xml.read())
ttglyph_lst = soups.find('glyf').find_all('ttglyph')[1:]
analysis_res = []
Expand Down
5 changes: 2 additions & 3 deletions crwy/utils/no_sql/redis_m.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@
class RedisDb(object):
def __init__(self, **kwargs):
if 'url' in kwargs.keys():
url = kwargs.get('url')
db = kwargs.get('db', 0)
self.pool = redis.ConnectionPool.from_url(url, db=db, **kwargs)
url = kwargs.pop('url')
self.pool = redis.ConnectionPool.from_url(url, **kwargs)
else:
self.pool = redis.ConnectionPool(**kwargs)
self.db = redis.StrictRedis(connection_pool=self.pool)
Expand Down
4 changes: 4 additions & 0 deletions crwy/utils/scrapy_plugs/dupefilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def request_seen(self, request):
if not request.meta.get('dupefilter_key', None):
return False

if len(request.meta.get('redirect_urls', [])) > 0:
# skip url from redirect
return False

dupefilter_key = request.meta.get('dupefilter_key')
dupefilter_key = hashlib.sha1(dupefilter_key).hexdigest() if \
self.do_hash else dupefilter_key
Expand Down
9 changes: 6 additions & 3 deletions crwy/utils/scrapy_plugs/middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def process_request(self, request, spider):
'no user in cookie_pool:{}'.format(self.site))
cookie = self.h.hget(user)
if cookie:
request.cookies = json.loads(cookie)
spider.logger.debug('get_cookie_success: {}'.format(user))
# 字典存入redis,取出时未string,通过eval进行还原
request.cookies = eval(cookie)
spider.logger.debug('get_cookie_success: {}'.format(
user.decode('utf-8')))
else:
spider.logger.warning('get_cookie_failed: {}'.format(user))
spider.logger.warning('get_cookie_failed: {}'.format(
user.decode('utf-8')))
Loading

0 comments on commit 6346270

Please sign in to comment.