Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DON'T MERGE] PoC of recording stats during kedro run #1465

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion demo-project/src/demo_project/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,49 @@

from pathlib import Path

from traitlets import default

# Define where to store data from a KedroSession. Defaults to BaseSessionStore.
# from kedro.framework.session.store import ShelveStore
from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore

from kedro.framework.hooks import hook_impl
from collections import defaultdict


class DatasetStatsHook:
def __init__(self):
self._stats = defaultdict(dict)

@hook_impl
def after_context_created(self, context):
self._catalog = context.catalog

@hook_impl
def after_dataset_loaded(self, dataset_name, data):
import pandas as pd
tynandebold marked this conversation as resolved.
Show resolved Hide resolved

if isinstance(data, pd.DataFrame):
self._stats[dataset_name] = {}
self._stats[dataset_name]["filesize"] = int(data.size)
self._stats[dataset_name]["columns"] = int(data.shape[1])
self._stats[dataset_name]["rows"] = int(data.shape[0])

print(data)
@hook_impl
def after_pipeline_run(self):
import json
with open("stats.json", "w") as f:
json.dump(self._stats, f)



dataset_stats_hook = DatasetStatsHook()
HOOKS = (dataset_stats_hook,)
SESSION_STORE_CLASS = SQLiteStore
SESSION_STORE_ARGS = {"path": str(Path(__file__).parents[2] / "data")}

#Setup for collaborative experiment tracking.
# Setup for collaborative experiment tracking.
# SESSION_STORE_ARGS = {"path": str(Path(__file__).parents[2] / "data"),
# "remote_path": "s3://{path-to-session_store}" }

Expand Down