Skip to content

Commit

Permalink
Fix Whylogs example (#1669)
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Su <[email protected]>
  • Loading branch information
pingsutw authored Apr 30, 2024
1 parent 7a11248 commit e89c659
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 43 deletions.
36 changes: 0 additions & 36 deletions examples/whylogs_plugin/Dockerfile

This file was deleted.

17 changes: 10 additions & 7 deletions examples/whylogs_plugin/whylogs_plugin/whylogs_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np
import pandas as pd
import whylogs as why
from flytekit import conditional, task, workflow
from flytekit import ImageSpec, conditional, task, workflow
from flytekitplugins.whylogs.renderer import WhylogsConstraintsRenderer, WhylogsSummaryDriftRenderer
from flytekitplugins.whylogs.schema import WhylogsDatasetProfileTransformer # noqa
from sklearn.datasets import load_diabetes
Expand All @@ -27,12 +27,15 @@
smaller_than_number,
)

image_spec = ImageSpec(
packages=["flytekitplugins-whylogs", "whylogs[whylabs]", "scikit-learn", "mlflow"], registry="ghcr.io/flyteorg"
)

# %% [markdown]
# Next thing is defining a task to read our reference dataset.
# For this, we will take scikit-learn's entire example Diabetes dataset
# %%
@task
@task(container_image=image_spec)
def get_reference_data() -> pd.DataFrame:
diabetes = load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
Expand All @@ -45,7 +48,7 @@ def get_reference_data() -> pd.DataFrame:
# so in order to reproduce some of what real-life data behaves
# we will take an arbitrary subset of the reference dataset
# %%
@task
@task(container_image=image_spec)
def get_target_data() -> pd.DataFrame:
diabetes = load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
Expand All @@ -59,7 +62,7 @@ def get_target_data() -> pd.DataFrame:
# With it, users can either visualize and check overall statistics
# or even run a constraint suite on top of it.
# %%
@task
@task(container_image=image_spec)
def create_profile_view(df: pd.DataFrame) -> DatasetProfileView:
result = why.log(df)
return result.view()
Expand All @@ -69,7 +72,7 @@ def create_profile_view(df: pd.DataFrame) -> DatasetProfileView:
# And we will also define a constraints report task
# that will run some checks in our existing profile.
# %%
@task
@task(container_image=image_spec)
def constraints_report(profile_view: DatasetProfileView) -> bool:
builder = ConstraintsBuilder(dataset_profile_view=profile_view)
builder.add_constraint(greater_than_number(column_name="age", number=-11.0))
Expand All @@ -92,7 +95,7 @@ def constraints_report(profile_view: DatasetProfileView) -> bool:
# random numbers with numpy. This task will take place if we pass our
# constraints suite.
# %%
@task
@task(container_image=image_spec)
def make_predictions(input_data: pd.DataFrame, output_path: str) -> str:
input_data["predictions"] = np.random.random(size=len(input_data))
if not os.path.exists(output_path):
Expand All @@ -106,7 +109,7 @@ def make_predictions(input_data: pd.DataFrame, output_path: str) -> str:
# with the Summary Drift Report, which can provide further intuition into
# whether there was a data drift to the failed constraint checks.
# %%
@task
@task(container_image=image_spec)
def summary_drift_report(new_data: pd.DataFrame, reference_data: pd.DataFrame) -> str:
renderer = WhylogsSummaryDriftRenderer()
report = renderer.to_html(target_data=new_data, reference_data=reference_data)
Expand Down

0 comments on commit e89c659

Please sign in to comment.