Skip to content

Commit

Permalink
🎉 v0.4.6
Browse files Browse the repository at this point in the history
  • Loading branch information
howie6879 committed Feb 9, 2019
1 parent c40bc9c commit ddf6ddd
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 53 deletions.
4 changes: 4 additions & 0 deletions ruia/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,9 @@ class InvalidRequestMethod(Exception):
pass


class NotImplementedParseError(Exception):
pass


class NothingMatchedError(Exception):
pass
19 changes: 10 additions & 9 deletions ruia/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,18 @@ async def fetch_callback(self, sem: Semaphore = None) -> Tuple[AsyncGeneratorTyp
try:
async with sem:
response = await self.fetch()
if self.callback is not None:
if iscoroutinefunction(self.callback):
callback_result = await self.callback(response)
response.callback_result = callback_result
else:
callback_result = self.callback(response)
else:
callback_result = None
except Exception as e:
response = None
self.logger.error(f"<Error: {self.url} {e}>")
callback_result, response = None, None

if self.callback is not None:
if iscoroutinefunction(self.callback):
callback_result = await self.callback(response)
response.callback_result = callback_result
else:
callback_result = self.callback(response)
else:
callback_result = None

return callback_result, response

Expand Down
31 changes: 23 additions & 8 deletions ruia/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from signal import SIGINT, SIGTERM
from types import AsyncGeneratorType

from ruia.exceptions import InvalidParseType, NothingMatchedError
from ruia.exceptions import InvalidParseType, NotImplementedParseError, NothingMatchedError
from ruia.item import Item
from ruia.middleware import Middleware
from ruia.request import Request
Expand Down Expand Up @@ -97,7 +97,7 @@ async def parse(self, response):
:param response: Response
:return:
"""
raise NotImplementedError
raise NotImplementedParseError('<!!! parse function is expected !!!>')

@classmethod
async def async_start(cls,
Expand Down Expand Up @@ -149,17 +149,34 @@ def start(cls,
spider_ins.loop.close()

async def handle_callback(self, aws_callback: typing.Coroutine, response):
callback_result = await aws_callback
"""Process coroutine callback function"""
callback_result = None

try:
callback_result = await aws_callback
except NothingMatchedError as e:
self.logger.error(f'<Item: {str(e).lower()}>')
except Exception as e:
self.logger.error(f'<Callback[{aws_callback.__name__}]: {e}')
return callback_result, response

async def handle_request(self, request: Request) -> typing.Tuple[AsyncGeneratorType, Response]:
"""
Wrap request with middlewares.
Wrap request with middleware.
:param request:
:return:
"""
callback_result, response = None, None

await self._run_request_middleware(request)
callback_result, response = await request.fetch_callback(self.sem)
try:
callback_result, response = await request.fetch_callback(self.sem)
except NotImplementedParseError as e:
self.logger.error(e)
except NothingMatchedError as e:
self.logger.error(f'<Item: {str(e).lower()}>')
except Exception as e:
self.logger.error(f'<Callback[{request.callback.__name__}]: {e}')
await self._run_response_middleware(request, response)
await self._process_response(request=request, response=response)
return callback_result, response
Expand Down Expand Up @@ -263,9 +280,7 @@ async def _process_async_callback(self, callback_result: AsyncGeneratorType, res
if process_item:
await process_item(each)
else:
raise InvalidParseType(f'Invalid parse type: {type(each)}')
except NothingMatchedError as e:
self.logger.error(f'Field: {e}')
raise InvalidParseType(f'<Parse Invalid parse type: {type(each)}>')
except Exception as e:
self.logger.error(e)

Expand Down
2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
#!/usr/bin/env python
# !/usr/bin/env python
8 changes: 4 additions & 4 deletions tests/test_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,18 @@ def test_item():
assert item.title == 'Title: 豆瓣电影TOP250'

try:
item = asyncio.get_event_loop().run_until_complete(DoubanCleanMethodErrorItem.get_item(html=HTML))
asyncio.get_event_loop().run_until_complete(DoubanCleanMethodErrorItem.get_item(html=HTML))
except Exception as e:
assert isinstance(e, InvalidFuncType)

try:

item = asyncio.get_event_loop().run_until_complete(DoubanItem.get_item(html=''))
asyncio.get_event_loop().run_until_complete(DoubanItem.get_item(html=''))
except Exception as e:
assert isinstance(e, ValueError)

try:
item = asyncio.get_event_loop().run_until_complete(DoubanItem.get_item(html_etree='test'))
asyncio.get_event_loop().run_until_complete(DoubanItem.get_item(html_etree='test'))
except Exception as e:
assert isinstance(e, AttributeError)

Expand All @@ -91,7 +91,7 @@ def test_items():
assert items[0].abstract == '希望让人自由。'

try:
items = asyncio.get_event_loop().run_until_complete(error_parse_item(html=HTML))
asyncio.get_event_loop().run_until_complete(error_parse_item(html=HTML))
except Exception as e:
assert isinstance(e, ValueError)

Expand Down
51 changes: 34 additions & 17 deletions tests/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ async def hello(response):
return 'hello ruia'


def hi(response):
yield 'hi ruia'


async def retry_func(request):
request.request_config['TIMEOUT'] = 10

Expand All @@ -21,22 +25,6 @@ async def retry_func(request):
}


async def timeout_request(sem):
request_config = {
'RETRIES': 1,
'DELAY': 1,
'TIMEOUT': 0.1,
}
request = Request('http://www.httpbin.org/get',
method='GET',
metadata={'hello': 'ruia'},
encoding='utf-8',
request_config=request_config,
params=params,
callback=hello)
return await request.fetch_callback(sem)


async def make_get_request(sem, callback=None):
request_config = {
'RETRIES': 3,
Expand Down Expand Up @@ -66,6 +54,7 @@ async def make_post_request(sem, callback):


def test_request_config():
assert str(Request('https://www.httpbin.org/')) == '<GET https://www.httpbin.org/>'
_, response = asyncio.get_event_loop().run_until_complete(make_get_request(sem=sem, callback=hello))
assert response.callback_result == 'hello ruia'
assert response.metadata == {'hello': 'ruia'}
Expand All @@ -76,6 +65,8 @@ def test_request_config():
json_result = asyncio.get_event_loop().run_until_complete(response.json())
assert json_result['data'] == "name=ruia"


def test_method_error_request():
try:
request = Request('https://www.httpbin.org/', method='PUT')
response = asyncio.get_event_loop().run_until_complete(request.fetch())
Expand All @@ -84,6 +75,32 @@ def test_request_config():
assert isinstance(e, InvalidRequestMethod)


def test_sem_error_request():
_, response = asyncio.get_event_loop().run_until_complete(make_get_request(sem=None, callback=None))
assert response == None


def test_retry_request():
request = Request('http://www.httpbin.org/404')
_, response = asyncio.get_event_loop().run_until_complete(request.fetch_callback(sem=sem))
assert response.url == 'http://www.httpbin.org/404'


def test_timeout_request():
callback_result, response = asyncio.get_event_loop().run_until_complete(timeout_request(sem=sem))
async def timeout_request(sem):
request_config = {
'RETRIES': 1,
'DELAY': 1,
'TIMEOUT': 0.1,
}
request = Request('http://www.httpbin.org/get',
method='GET',
metadata={'hello': 'ruia'},
encoding='utf-8',
request_config=request_config,
params=params,
callback=hi)
return await request.fetch_callback(sem)

_, response = asyncio.get_event_loop().run_until_complete(timeout_request(sem=sem))
assert response.url == 'http://www.httpbin.org/get'
65 changes: 51 additions & 14 deletions tests/test_spider.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python


import asyncio
import os

Expand Down Expand Up @@ -62,7 +61,7 @@ async def parse(self, response):
async def parse_item(self, response):
pages = [f'https://www.httpbin.org/get?p={i}' for i in range(1, 2)]
async for resp in self.multiple_request(pages):
yield self.parse_next(resp, any_param='hello')
yield self.parse_next(response=resp, any_param='hello')

async def parse_next(self, response, any_param):
assert any_param == 'hello'
Expand All @@ -81,14 +80,6 @@ async def process_item(self, item: ItemDemo):
async def count_nums(self):
SpiderDemo.call_nums += 1

# def test_invalid_parse_type_spider():
# class InvalidParseTypeSpider(Spider):
# start_urls = ['http://www.httpbin.org/get']
#
# async def parse(self, response):
# yield {}
#
# InvalidParseTypeSpider.start(loop=asyncio.new_event_loop())

def test_spider_with_middleware():
loop = asyncio.new_event_loop()
Expand Down Expand Up @@ -187,9 +178,12 @@ class MultipleRequestSpider(Spider):
async def parse(self, response: Response):
urls = [f'https://httpbin.org/get?p={page}' for page in range(1, 2)]
async for response in self.multiple_request(urls, is_gather=True):
json_result = await response.json()
page = json_result['args']['p']
result.append(int(page))
yield self.parse_next(response=response)

async def parse_next(self, response):
json_result = await response.json()
page = json_result['args']['p']
result.append(int(page))

MultipleRequestSpider.start()
assert result == [1]
Expand All @@ -205,14 +199,57 @@ class NoStartUrlSpider(Spider):
assert isinstance(e, ValueError)


def test_no_parse_spider():
def test_callback_error():
class NoParseSpider(Spider):
start_urls = ['http://www.httpbin.org/get']

NoParseSpider.start()

class CallbackError(Spider):
start_urls = ['http://www.httpbin.org/get']

async def parse(self, response):
raise ValueError('error')

CallbackError.start()


def test_coroutine_callback_error():
class CoroutineItemErrorSpider(Spider):
start_urls = ['http://www.httpbin.org/get']

async def parse(self, response):
pages = ['http://www.httpbin.org/get?p=1']
async for resp in self.multiple_request(pages):
yield self.parse_item(response=resp)

async def parse_item(self, response):
await ItemDemo.get_item(html=response.html)

CoroutineItemErrorSpider.start()

class CoroutineErrorSpider(Spider):
start_urls = ['http://www.httpbin.org/get']

async def parse(self, response):
pages = ['http://www.httpbin.org/get?p=1']
async for resp in self.multiple_request(pages):
yield self.parse_item(response=resp)

async def parse_item(self, response):
raise ValueError("error")

CoroutineErrorSpider.start()


def test_nothing_marched_spider():
class NothingMatchedErrorSpider(Spider):
start_urls = ['http://www.httpbin.org/get']

async def parse(self, response):
await ItemDemo.get_item(html=response.html)

NothingMatchedErrorSpider.start()


def test_multiple_spider():
Expand Down

0 comments on commit ddf6ddd

Please sign in to comment.