Skip to content

Commit

Permalink
Don't pollute the main logger with thread-specific loggers
Browse files Browse the repository at this point in the history
Signed-off-by: Aurélien Bompard <[email protected]>
  • Loading branch information
abompard committed Mar 15, 2024
1 parent 6cd0761 commit 62120f6
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 60 deletions.
117 changes: 60 additions & 57 deletions mirrormanager2/crawler/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,63 +492,66 @@ def crawl_and_report(options, crawler):
host = crawler.host

# Set the host-specific log file
thread_file_logger(crawler.config, host.id, options["debug"])

details = None
stats = None
try:
stats = crawler.crawl()
except AllCategoriesFailed:
status = CrawlStatus.FAILURE
if options["canary"]:
# If running in canary mode do not auto disable mirrors
# if they have failed.
# Let's mark the complete mirror as not being up to date.
details = "Canary mode failed for all categories. Marking host as not up to date."
logger.info("All categories failed.")
except HostTimeoutError:
status = CrawlStatus.TIMEOUT
details = "Crawler timed out before completing. Host is likely overloaded."
logger.info(details)
except GlobalTimeoutError:
status = CrawlStatus.UNKNOWN
details = "Crawler reached its maximum execution time, could not complete this host's scan."
logger.info(details)
except WrongContinent:
logger.info("Skipping host %s (%s); wrong continent", host.id, host.name)
status = CrawlStatus.UNKNOWN
except BrokenBaseUrl:
logger.info("Skipping host %s (%s); broken base URL", host.id, host.name)
status = CrawlStatus.UNKNOWN
except EmbargoedCountry as e:
logger.info("Host %s (%s) is from an embargoed country: %s", host.id, host.name, e.country)
status = CrawlStatus.DISABLE
details = f"Embargoed country: {e.country}"
except NoCategory:
# no category to crawl found. This is to make sure,
# that host.crawl_failures is not reset to zero for crawling
# non existing categories on this host
logger.info("No categories to crawl on host %s (%s)", host.id, host.name)
status = CrawlStatus.UNKNOWN
except KeyboardInterrupt:
status = CrawlStatus.UNKNOWN
except Exception:
logger.exception("Unhandled exception raised, this is a bug in the MM crawler.")
# Don't disable the host, it's not their fault.
# status = CrawlStatus.FAILURE
status = CrawlStatus.UNKNOWN
else:
status = CrawlStatus.OK

result = CrawlResult(
host_id=host.id,
host_name=host.name,
status=status.value,
details=details,
finished_at=datetime.datetime.now(tz=datetime.timezone.utc),
duration=crawler.timeout.elapsed(),
stats=stats,
)
with thread_file_logger(crawler.config, host.id, options["debug"]):
details = None
stats = None
try:
stats = crawler.crawl()
except AllCategoriesFailed:
status = CrawlStatus.FAILURE
if options["canary"]:
# If running in canary mode do not auto disable mirrors
# if they have failed.
# Let's mark the complete mirror as not being up to date.
details = "Canary mode failed for all categories. Marking host as not up to date."
logger.info("All categories failed.")
except HostTimeoutError:
status = CrawlStatus.TIMEOUT
details = "Crawler timed out before completing. Host is likely overloaded."
logger.info(details)
except GlobalTimeoutError:
status = CrawlStatus.UNKNOWN
details = (
"Crawler reached its maximum execution time, could not complete this host's scan."
)
logger.info(details)
except WrongContinent:
logger.info("Skipping host %s (%s); wrong continent", host.id, host.name)
status = CrawlStatus.UNKNOWN
except BrokenBaseUrl:
logger.info("Skipping host %s (%s); broken base URL", host.id, host.name)
status = CrawlStatus.UNKNOWN
except EmbargoedCountry as e:
logger.info(
"Host %s (%s) is from an embargoed country: %s", host.id, host.name, e.country
)
status = CrawlStatus.DISABLE
details = f"Embargoed country: {e.country}"
except NoCategory:
# no category to crawl found. This is to make sure,
# that host.crawl_failures is not reset to zero for crawling
# non existing categories on this host
logger.info("No categories to crawl on host %s (%s)", host.id, host.name)
status = CrawlStatus.UNKNOWN
except KeyboardInterrupt:
status = CrawlStatus.UNKNOWN
except Exception:
logger.exception("Unhandled exception raised, this is a bug in the MM crawler.")
# Don't disable the host, it's not their fault.
# status = CrawlStatus.FAILURE
status = CrawlStatus.UNKNOWN
else:
status = CrawlStatus.OK

result = CrawlResult(
host_id=host.id,
host_name=host.name,
status=status.value,
details=details,
finished_at=datetime.datetime.now(tz=datetime.timezone.utc),
duration=crawler.timeout.elapsed(),
stats=stats,
)

return result

Expand Down
10 changes: 7 additions & 3 deletions mirrormanager2/crawler/log.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
import os
from contextlib import contextmanager

from .threads import threadlocal
from .ui import get_logging_handler

logger = logging.getLogger(__name__)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
thread_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
master_formatter = (
# "%(levelname)s:%(name)s:%(hosts)s:%(threads)s:%(hostid)s:%(hostname)s:%(message)s"
# "%(levelname)s:%(name)s:%(hostid)s:%(hostname)s:%(message)s"
Expand Down Expand Up @@ -62,6 +63,7 @@ def setup_logging(debug, console):
logging.getLogger(logger_name).setLevel(logging.INFO)


@contextmanager
def thread_file_logger(config, host_id, debug):
log_dir = config.get("MM_LOG_DIR", None)
if log_dir is None or log_dir == "-":
Expand All @@ -76,8 +78,10 @@ def thread_file_logger(config, host_id, debug):
handler.addFilter(f)

handler.setLevel(logging.DEBUG if debug else logging.INFO)
handler.setFormatter(formatter)
handler.setFormatter(thread_formatter)
root_logger = logging.getLogger()
root_logger.addHandler(handler)

return log_file
yield log_file

root_logger.removeHandler(handler)

0 comments on commit 62120f6

Please sign in to comment.