From 62120f625141564cf7f42d5324aabf2f4323c10a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bompard?= Date: Fri, 15 Mar 2024 13:55:06 +0100 Subject: [PATCH] Don't pollute the main logger with thread-specific loggers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Aurélien Bompard --- mirrormanager2/crawler/crawler.py | 117 +++++++++++++++--------------- mirrormanager2/crawler/log.py | 10 ++- 2 files changed, 67 insertions(+), 60 deletions(-) diff --git a/mirrormanager2/crawler/crawler.py b/mirrormanager2/crawler/crawler.py index 829b24418..f02b1e677 100755 --- a/mirrormanager2/crawler/crawler.py +++ b/mirrormanager2/crawler/crawler.py @@ -492,63 +492,66 @@ def crawl_and_report(options, crawler): host = crawler.host # Set the host-specific log file - thread_file_logger(crawler.config, host.id, options["debug"]) - - details = None - stats = None - try: - stats = crawler.crawl() - except AllCategoriesFailed: - status = CrawlStatus.FAILURE - if options["canary"]: - # If running in canary mode do not auto disable mirrors - # if they have failed. - # Let's mark the complete mirror as not being up to date. - details = "Canary mode failed for all categories. Marking host as not up to date." - logger.info("All categories failed.") - except HostTimeoutError: - status = CrawlStatus.TIMEOUT - details = "Crawler timed out before completing. Host is likely overloaded." - logger.info(details) - except GlobalTimeoutError: - status = CrawlStatus.UNKNOWN - details = "Crawler reached its maximum execution time, could not complete this host's scan." - logger.info(details) - except WrongContinent: - logger.info("Skipping host %s (%s); wrong continent", host.id, host.name) - status = CrawlStatus.UNKNOWN - except BrokenBaseUrl: - logger.info("Skipping host %s (%s); broken base URL", host.id, host.name) - status = CrawlStatus.UNKNOWN - except EmbargoedCountry as e: - logger.info("Host %s (%s) is from an embargoed country: %s", host.id, host.name, e.country) - status = CrawlStatus.DISABLE - details = f"Embargoed country: {e.country}" - except NoCategory: - # no category to crawl found. This is to make sure, - # that host.crawl_failures is not reset to zero for crawling - # non existing categories on this host - logger.info("No categories to crawl on host %s (%s)", host.id, host.name) - status = CrawlStatus.UNKNOWN - except KeyboardInterrupt: - status = CrawlStatus.UNKNOWN - except Exception: - logger.exception("Unhandled exception raised, this is a bug in the MM crawler.") - # Don't disable the host, it's not their fault. - # status = CrawlStatus.FAILURE - status = CrawlStatus.UNKNOWN - else: - status = CrawlStatus.OK - - result = CrawlResult( - host_id=host.id, - host_name=host.name, - status=status.value, - details=details, - finished_at=datetime.datetime.now(tz=datetime.timezone.utc), - duration=crawler.timeout.elapsed(), - stats=stats, - ) + with thread_file_logger(crawler.config, host.id, options["debug"]): + details = None + stats = None + try: + stats = crawler.crawl() + except AllCategoriesFailed: + status = CrawlStatus.FAILURE + if options["canary"]: + # If running in canary mode do not auto disable mirrors + # if they have failed. + # Let's mark the complete mirror as not being up to date. + details = "Canary mode failed for all categories. Marking host as not up to date." + logger.info("All categories failed.") + except HostTimeoutError: + status = CrawlStatus.TIMEOUT + details = "Crawler timed out before completing. Host is likely overloaded." + logger.info(details) + except GlobalTimeoutError: + status = CrawlStatus.UNKNOWN + details = ( + "Crawler reached its maximum execution time, could not complete this host's scan." + ) + logger.info(details) + except WrongContinent: + logger.info("Skipping host %s (%s); wrong continent", host.id, host.name) + status = CrawlStatus.UNKNOWN + except BrokenBaseUrl: + logger.info("Skipping host %s (%s); broken base URL", host.id, host.name) + status = CrawlStatus.UNKNOWN + except EmbargoedCountry as e: + logger.info( + "Host %s (%s) is from an embargoed country: %s", host.id, host.name, e.country + ) + status = CrawlStatus.DISABLE + details = f"Embargoed country: {e.country}" + except NoCategory: + # no category to crawl found. This is to make sure, + # that host.crawl_failures is not reset to zero for crawling + # non existing categories on this host + logger.info("No categories to crawl on host %s (%s)", host.id, host.name) + status = CrawlStatus.UNKNOWN + except KeyboardInterrupt: + status = CrawlStatus.UNKNOWN + except Exception: + logger.exception("Unhandled exception raised, this is a bug in the MM crawler.") + # Don't disable the host, it's not their fault. + # status = CrawlStatus.FAILURE + status = CrawlStatus.UNKNOWN + else: + status = CrawlStatus.OK + + result = CrawlResult( + host_id=host.id, + host_name=host.name, + status=status.value, + details=details, + finished_at=datetime.datetime.now(tz=datetime.timezone.utc), + duration=crawler.timeout.elapsed(), + stats=stats, + ) return result diff --git a/mirrormanager2/crawler/log.py b/mirrormanager2/crawler/log.py index 887ce49db..e41266014 100755 --- a/mirrormanager2/crawler/log.py +++ b/mirrormanager2/crawler/log.py @@ -1,11 +1,12 @@ import logging import os +from contextlib import contextmanager from .threads import threadlocal from .ui import get_logging_handler logger = logging.getLogger(__name__) -formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +thread_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") master_formatter = ( # "%(levelname)s:%(name)s:%(hosts)s:%(threads)s:%(hostid)s:%(hostname)s:%(message)s" # "%(levelname)s:%(name)s:%(hostid)s:%(hostname)s:%(message)s" @@ -62,6 +63,7 @@ def setup_logging(debug, console): logging.getLogger(logger_name).setLevel(logging.INFO) +@contextmanager def thread_file_logger(config, host_id, debug): log_dir = config.get("MM_LOG_DIR", None) if log_dir is None or log_dir == "-": @@ -76,8 +78,10 @@ def thread_file_logger(config, host_id, debug): handler.addFilter(f) handler.setLevel(logging.DEBUG if debug else logging.INFO) - handler.setFormatter(formatter) + handler.setFormatter(thread_formatter) root_logger = logging.getLogger() root_logger.addHandler(handler) - return log_file + yield log_file + + root_logger.removeHandler(handler)