From e77c8849573db71056c63422ad0f5daae0ea897c Mon Sep 17 00:00:00 2001 From: Nok Chan Date: Thu, 27 Jul 2023 16:05:23 +0100 Subject: [PATCH] poc --- demo-project/src/demo_project/settings.py | 37 ++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/demo-project/src/demo_project/settings.py b/demo-project/src/demo_project/settings.py index b8a7cf33d4..4a99620e4f 100644 --- a/demo-project/src/demo_project/settings.py +++ b/demo-project/src/demo_project/settings.py @@ -4,14 +4,49 @@ from pathlib import Path +from traitlets import default + # Define where to store data from a KedroSession. Defaults to BaseSessionStore. # from kedro.framework.session.store import ShelveStore from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore +from kedro.framework.hooks import hook_impl +from collections import defaultdict + + +class DatasetStatsHook: + def __init__(self): + self._stats = defaultdict(dict) + + @hook_impl + def after_context_created(self, context): + self._catalog = context.catalog + + @hook_impl + def after_dataset_loaded(self, dataset_name, data): + import pandas as pd + + if isinstance(data, pd.DataFrame): + self._stats[dataset_name] = {} + self._stats[dataset_name]["filesize"] = int(data.size) + self._stats[dataset_name]["columns"] = int(data.shape[1]) + self._stats[dataset_name]["rows"] = int(data.shape[0]) + + print(data) + @hook_impl + def after_pipeline_run(self): + import json + with open("stats.json", "w") as f: + json.dump(self._stats, f) + + + +dataset_stats_hook = DatasetStatsHook() +HOOKS = (dataset_stats_hook,) SESSION_STORE_CLASS = SQLiteStore SESSION_STORE_ARGS = {"path": str(Path(__file__).parents[2] / "data")} -#Setup for collaborative experiment tracking. +# Setup for collaborative experiment tracking. # SESSION_STORE_ARGS = {"path": str(Path(__file__).parents[2] / "data"), # "remote_path": "s3://{path-to-session_store}" }