Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Kedro logging broadcasting markup log message to all handlers. #4496

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions kedro/io/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
generate_timestamp,
)
from kedro.io.memory_dataset import MemoryDataset
from kedro.utils import _format_rich, _has_rich_handler
from kedro.utils import _has_rich_handler

CATALOG_KEY = "catalog" # Kept to avoid the breaking change
WORDS_REGEX_PATTERN = re.compile(r"\W+")
Expand Down Expand Up @@ -389,9 +389,9 @@ def load(self, name: str, version: str | None = None) -> Any:

self._logger.info(
"Loading data from %s (%s)...",
_format_rich(name, "dark_orange") if self._use_rich_markup else name,
name,
type(dataset).__name__,
extra={"markup": True},
extra={"rich_format": ["dark_orange"]},
)

result = dataset.load()
Expand Down Expand Up @@ -431,9 +431,9 @@ def save(self, name: str, data: Any) -> None:

self._logger.info(
"Saving data to %s (%s)...",
_format_rich(name, "dark_orange") if self._use_rich_markup else name,
name,
type(dataset).__name__,
extra={"markup": True},
extra={"rich_format": ["dark_orange"]},
)

dataset.save(data)
Expand Down
3 changes: 2 additions & 1 deletion kedro/io/kedro_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
parse_dataset_definition,
)
from kedro.io.memory_dataset import MemoryDataset, _is_memory_dataset
from kedro.utils import _format_rich, _has_rich_handler
from kedro.logging import _format_rich
from kedro.utils import _has_rich_handler


class _LazyDataset:
Expand Down
23 changes: 23 additions & 0 deletions kedro/logging.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import copy
import logging
import sys
from logging import LogRecord
from pathlib import Path
from typing import Any

Expand All @@ -26,6 +28,8 @@ class RichHandler(rich.logging.RichHandler):
"""

def __init__(self, *args: Any, **kwargs: Any):
if "markup" not in kwargs:
kwargs.update({"markup": True}) # Set markup as default
super().__init__(*args, **kwargs)
logging.captureWarnings(True)
rich.pretty.install()
Expand Down Expand Up @@ -54,3 +58,22 @@ def __init__(self, *args: Any, **kwargs: Any):
# fixed on their side at some point, but until then we disable it.
# See https://github.com/Textualize/rich/issues/2455
rich.traceback.install(**traceback_install_kwargs) # type: ignore[arg-type]

def emit(self, record: LogRecord):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The trick here is to keep the raw message without markup language. The handler will be responsible to add the [brackets] (I believe this is how rich.RichHandler functions anyway)

args = record.args
new_args = list(args)
# LogRecord is shared among all handler, created a new instance so it does not affect the rest.
updated_record = copy.copy(record)
if hasattr(record, "rich_format"):
# Add markup to the message
# if the list is shorter than the arg list, keep it unchanged
for i, color in enumerate(record.rich_format):
new_args[i] = _format_rich(args[i], color)

updated_record.args = tuple(new_args)
super().emit(updated_record)


def _format_rich(value: str, markup: str) -> str:
"""Format string with rich markup"""
return f"[{markup}]{value}[/{markup}]"
5 changes: 0 additions & 5 deletions kedro/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,3 @@ def _has_rich_handler(logger: Optional[logging.Logger] = None) -> bool:
except ImportError:
return False
return any(isinstance(handler, RichHandler) for handler in logger.handlers)


def _format_rich(value: str, markup: str) -> str:
"""Format string with rich markup"""
return f"[{markup}]{value}[/{markup}]"
30 changes: 29 additions & 1 deletion tests/framework/project/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import yaml

from kedro.framework.project import LOGGING, configure_logging, configure_project
from kedro.utils import _format_rich, _has_rich_handler
from kedro.io.data_catalog import DataCatalog
from kedro.logging import _format_rich
from kedro.utils import _has_rich_handler


@pytest.fixture
Expand Down Expand Up @@ -187,3 +189,29 @@ def test_default_logging_info_emission(monkeypatch, tmp_path, caplog):
assert logging.getLogger("kedro").level == logging.DEBUG
expected_message = "You can change this by setting the KEDRO_LOGGING_CONFIG environment variable accordingly."
assert expected_message in "".join(caplog.messages).strip("\n")


def test_logger_without_rich_markup():
class CustomHandler(logging.Handler):
def __init__(self):
super().__init__()
self.records = []

def emit(self, record):
self.records.append(record)

data = ("dummy",)
catalog = DataCatalog.from_config({"dummy": {"type": "MemoryDataset"}})

# Add a custom handler
custom_handler = CustomHandler()
root_logger = logging.getLogger()
root_logger.addHandler(custom_handler)

# Emit some logs
assert not custom_handler.records
catalog.save("dummy", data)
assert custom_handler.records

for record in custom_handler.records:
assert "[dark_orange]" not in record.message
Loading