From 3793e84795f459d827b9b6f1b6a3a0478c6d5222 Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario Date: Mon, 11 Nov 2024 20:59:20 -0500 Subject: [PATCH] Expose all settings in ydata-profiling ProfileReport Signed-off-by: Eduardo Apolinario --- .../dev-requirements.in | 1 + .../flytekitplugins/deck/renderer.py | 15 ++++++++--- plugins/flytekit-deck-standard/setup.py | 4 +-- .../tests/test_renderer.py | 26 ++++++++++++++++--- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/plugins/flytekit-deck-standard/dev-requirements.in b/plugins/flytekit-deck-standard/dev-requirements.in index 970e7776f0..fc0d99c8cf 100644 --- a/plugins/flytekit-deck-standard/dev-requirements.in +++ b/plugins/flytekit-deck-standard/dev-requirements.in @@ -3,3 +3,4 @@ pandas plotly pygments ydata-profiling +lxml diff --git a/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py b/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py index 708e941d88..e9ae312ede 100644 --- a/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py +++ b/plugins/flytekit-deck-standard/flytekitplugins/deck/renderer.py @@ -53,12 +53,21 @@ class FrameProfilingRenderer: Generate a ProfileReport based on a pandas DataFrame """ - def __init__(self, title: str = "Pandas Profiling Report"): + def __init__(self, title: Optional[str] = None): self._title = title - def to_html(self, df: "pd.DataFrame") -> str: + def to_html(self, df: "pd.DataFrame", **kwargs) -> str: + """ + Generate a ydata_profiling.ProfileReport based on a pandas DataFrame + """ assert isinstance(df, pd.DataFrame) - profile = ydata_profiling.ProfileReport(df, title=self._title) + if kwargs is None: + kwargs = {} + # For backwards compatibility, if title is None, it will be set to "Pandas Profiling Report". + # Also, if title in kwargs, it will overwrite the title in the constructor. + if "title" not in kwargs: + kwargs["title"] = "Pandas Profiling Report" if self._title is None else self._title + profile = ydata_profiling.ProfileReport(df, **kwargs) return profile.to_html() diff --git a/plugins/flytekit-deck-standard/setup.py b/plugins/flytekit-deck-standard/setup.py index adfe422bc9..fb0f7a9aac 100644 --- a/plugins/flytekit-deck-standard/setup.py +++ b/plugins/flytekit-deck-standard/setup.py @@ -9,11 +9,11 @@ extras = { "pandas": ["pandas"], "pillow": ["pillow"], - "ydata-profiling": ["ydata-profiling"], + "ydata-profiling": ["ydata-profiling>=2.4.0"], "markdown": ["markdown"], "plotly": ["plotly"], "pygments": ["pygments"], - "all": ["pandas", "pillow", "ydata-profiling", "markdown", "plotly", "pygments"], + "all": ["pandas", "pillow", "ydata-profiling>=2.4.0", "markdown", "plotly", "pygments"], } __version__ = "0.0.0+develop" diff --git a/plugins/flytekit-deck-standard/tests/test_renderer.py b/plugins/flytekit-deck-standard/tests/test_renderer.py index dbe157cefc..8e79b97fde 100644 --- a/plugins/flytekit-deck-standard/tests/test_renderer.py +++ b/plugins/flytekit-deck-standard/tests/test_renderer.py @@ -14,6 +14,7 @@ TableRenderer, ) from PIL import Image +from lxml import html from flytekit.types.file import FlyteFile, JPEGImageFile, PNGImageFile @@ -31,10 +32,29 @@ ) -def test_frame_profiling_renderer(): - renderer = FrameProfilingRenderer() - assert "Pandas Profiling Report" in renderer.to_html(df).title() +@pytest.mark.parametrize( + "title, kwargs, expected_title", + [ + (None, {}, "Pandas Profiling Report"), + ("Custom Title", {}, "Custom Title"), + (None, {"title": "from-kwargs"}, "from-kwargs"), + (None, {"minimal": False}, "Pandas Profiling Report"), + ("Custom Title", {"minimal": False}, "Custom Title"), + (None, {"minimal": True}, "Pandas Profiling Report"), + ("Custom Title", {"minimal": True}, "Custom Title"), + ("Custom Title", { "minimal": True}, "Custom Title"), + ("Custom Title", {"missing_diagrams":{"heatmap": False}}, "Custom Title"), + # Test that title in kwargs takes precedence over title in constructor + ("title in constructor", {"title": "title in kwargs"}, "title in kwargs") + ], +) +def test_frame_profiling_renderer(title, kwargs, expected_title): + fpr_kwargs = {"title": title} if title else {} + renderer = FrameProfilingRenderer(**fpr_kwargs) + profile_report = renderer.to_html(df, **kwargs) + tree = html.fromstring(profile_report) + assert expected_title == tree.xpath('//title/text()')[0] def test_markdown_renderer(): md_text = "#Hello Flyte\n##Hello Flyte\n###Hello Flyte"