From d25e13b86e23cc2b45b12769f176b9f9754acb3f Mon Sep 17 00:00:00 2001 From: conlin <995018884@qq.com> Date: Wed, 27 Sep 2023 18:04:04 +0800 Subject: [PATCH] fix bug --- aioscrapy/core/engine.py | 1 - aioscrapy/core/scheduler.py | 3 +- aioscrapy/libs/extensions/metric.py | 14 ++++++-- aioscrapy/utils/log.py | 40 +++++++---------------- example/singlespider/demo_queue_memory.py | 3 +- 5 files changed, 26 insertions(+), 35 deletions(-) diff --git a/aioscrapy/core/engine.py b/aioscrapy/core/engine.py index de704e6..184a34a 100644 --- a/aioscrapy/core/engine.py +++ b/aioscrapy/core/engine.py @@ -14,7 +14,6 @@ from aioscrapy.exceptions import DontCloseSpider from aioscrapy.http import Response from aioscrapy.http.request import Request -from aioscrapy.utils.log import logformatter_adapter from aioscrapy.utils.misc import load_instance from aioscrapy.utils.tools import call_helper, create_task diff --git a/aioscrapy/core/scheduler.py b/aioscrapy/core/scheduler.py index a047945..4655d0d 100644 --- a/aioscrapy/core/scheduler.py +++ b/aioscrapy/core/scheduler.py @@ -6,6 +6,7 @@ from aioscrapy.statscollectors import StatsCollector from aioscrapy.utils.misc import load_instance from aioscrapy.utils.tools import call_helper +from aioscrapy.utils.log import logger class BaseSchedulerMeta(type): @@ -122,7 +123,7 @@ async def from_crawler(cls: Type[SchedulerTV], crawler: "aioscrapy.Crawler") -> await instance.flush() count = await call_helper(instance.queue.len) - count and crawler.spider.log("Resuming crawl (%d requests scheduled)" % count) + count and logger.info("Resuming crawl (%d requests scheduled)" % count) return instance diff --git a/aioscrapy/libs/extensions/metric.py b/aioscrapy/libs/extensions/metric.py index b72d80a..898556e 100644 --- a/aioscrapy/libs/extensions/metric.py +++ b/aioscrapy/libs/extensions/metric.py @@ -30,6 +30,7 @@ def __init__(self, spider_name: str, settings: Settings): influxdb_url = settings.get('METRIC_INFLUXDB_URL') token = settings.get('METRIC_INFLUXDB_TOKEN') location = settings.get('METRIC_LOCATION') + self.retry_times = settings.getint('METRIC_RETRY_TIMES', 5) self.location = location or f"{platform.node()}_{os.getpid()}" self.spider_name = spider_name self.session = ClientSession(headers={ @@ -54,11 +55,18 @@ async def record(self, obj: "Metric"): continue cnt = current_cnt - obj.prev.get(metric_name, 0) if cnt: - data += self.format_metric(metric_name.replace('/', '-'), cnt, self.spider_name, - self.location) + '\n' + data += self.format_metric( + metric_name.replace('/', '-'), cnt, self.spider_name, self.location + ) + '\n' obj.prev[metric_name] = current_cnt if data: - await self.emit(data) + for _ in range(self.retry_times): + try: + await self.emit(data) + return + except: + continue + logger.warning(f"emit metric failed:\n{data}") async def close(self): if self.session is not None: diff --git a/aioscrapy/utils/log.py b/aioscrapy/utils/log.py index bf25e20..3afd6ff 100644 --- a/aioscrapy/utils/log.py +++ b/aioscrapy/utils/log.py @@ -6,7 +6,6 @@ from loguru import logger as _logger from aioscrapy import Settings, Spider -from aioscrapy.exceptions import AioScrapyDeprecationWarning _logger.remove(0) @@ -32,34 +31,19 @@ def configure_logging(spider: Type[Spider], settings: Settings): ) -def logformatter_adapter(logkws): - """ - Helper that takes the dictionary output from the methods in LogFormatter - and adapts it into a tuple of positional arguments for logger.log calls, - handling backward compatibility as well. - """ - if not {'level', 'msg', 'args'} <= set(logkws): - warnings.warn('Missing keys in LogFormatter method', - AioScrapyDeprecationWarning) - - if 'format' in logkws: - warnings.warn('`format` key in LogFormatter methods has been ' - 'deprecated, use `msg` instead', - AioScrapyDeprecationWarning) - - level = logkws.get('level', "INFO") - message = logkws.get('format', logkws.get('msg')) - # NOTE: This also handles 'args' being an empty dict, that case doesn't - # play well in logger.log calls - args = logkws if not logkws.get('args') else logkws['args'] - return level, message, args - - class AioScrapyLogger: + __slots__ = ( + 'catch', 'complete', 'critical', 'debug', 'error', 'exception', + 'info', 'log', 'patch', 'success', 'trace', 'warning' + ) - def __getattr__(self, item): - spider_name = asyncio.current_task().get_name() - return getattr(_logger.bind(spidername=spider_name), item) + def __getattr__(self, method): + try: + spider_name = asyncio.current_task().get_name() + return getattr(_logger.bind(spidername=spider_name), method) + except Exception as e: + warnings.warn(f'Error on get logger: {e}') + return getattr(_logger, method) -logger: Type[_logger] = AioScrapyLogger() +logger = AioScrapyLogger() diff --git a/example/singlespider/demo_queue_memory.py b/example/singlespider/demo_queue_memory.py index 2a06d9d..fa34356 100644 --- a/example/singlespider/demo_queue_memory.py +++ b/example/singlespider/demo_queue_memory.py @@ -13,7 +13,7 @@ class DemoMemorySpider(Spider): # 'DOWNLOAD_DELAY': 3, # 'RANDOMIZE_DOWNLOAD_DELAY': True, # 'CONCURRENT_REQUESTS': 1, - 'LOG_LEVEL': 'INFO', + 'LOG_LEVEL': 'DEBUG', "CLOSE_SPIDER_ON_IDLE": True, } @@ -40,7 +40,6 @@ async def parse(self, response): 'author': quote.xpath('span/small/text()').get(), 'text': quote.css('span.text::text').get(), } - raise Exception(111) next_page = response.css('li.next a::attr("href")').get() if next_page is not None: