From ebad47e093d806ed2b59191345a9cd12045c7441 Mon Sep 17 00:00:00 2001 From: Stefan Krawczyk Date: Sat, 16 Mar 2024 16:15:36 -0700 Subject: [PATCH] WIP: creating file to help debug an error Proof of concept. TODOs: - python dependency logging - creating a notebook version of this - tests - extensions would be to serialize things more effectively --- examples/notebook_debug/example_error.py | 11 ++ .../notebook_debugger_plugin.py | 100 ++++++++++++++++++ examples/notebook_debug/run.py | 8 ++ hamilton/lifecycle/api.py | 15 ++- 4 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 examples/notebook_debug/example_error.py create mode 100644 examples/notebook_debug/notebook_debugger_plugin.py create mode 100644 examples/notebook_debug/run.py diff --git a/examples/notebook_debug/example_error.py b/examples/notebook_debug/example_error.py new file mode 100644 index 000000000..f910e9420 --- /dev/null +++ b/examples/notebook_debug/example_error.py @@ -0,0 +1,11 @@ +def input_function() -> int: + return 2 + + +def output_function(input_function: int) -> int: + return input_function + 1 + + +def error_function(input_function: int, output_function: int, input: int) -> int: + raise ValueError("This is an error") + return input_function + output_function + input diff --git a/examples/notebook_debug/notebook_debugger_plugin.py b/examples/notebook_debug/notebook_debugger_plugin.py new file mode 100644 index 000000000..d311a69d2 --- /dev/null +++ b/examples/notebook_debug/notebook_debugger_plugin.py @@ -0,0 +1,100 @@ +import inspect +import logging +from typing import Any, Callable, Dict, Optional + +from hamilton import lifecycle + +try: + import cloudpickle as pickle +except ImportError: + import pickle + +logger = logging.getLogger(__name__) + + +template = """ +try: + import cloudpickle as pickle +except ImportError: + import pickle + +import {module_name} # we load this for imports + +# let's load the inputs +with open('{node_name}_inputs.pkl', 'rb') as f: + inputs = pickle.load(f) + +# the function that errored +{function_to_debug} + + +# run the function +{func_name}(**inputs) +""" + + +class NotebookErrorDebugger(lifecycle.NodeExecutionHook): + + def run_before_node_execution( + self, + *, + node_name: str, + node_tags: Dict[str, Any], + node_kwargs: Dict[str, Any], + node_return_type: type, + task_id: Optional[str], + run_id: str, + node_input_types: Dict[str, Any], + **future_kwargs: Any, + ): + pass + + def run_after_node_execution( + self, + *, + node_name: str, + node_tags: Dict[str, Any], + node_kwargs: Dict[str, Any], + node_return_type: type, + result: Any, + error: Optional[Exception], + success: bool, + task_id: Optional[str], + run_id: str, + originating_function: Callable, + **future_kwargs: Any, + ): + """ + This function will create the follow in the case of a failure: + + 1. It will pickle of the inputs to the function. + 2. It will create a file with the following: + a. it will import the module the function is from -- to cover any imports that need to exist. + b. it will load the pickled inputs. + c. it will have the code of the function that errored so you can debug it. + d. it will then also list python version, hamilton version, and any other relevant package versions for + the user to install / have. + 2. It will then print out where this data has been saved for the user to then debug. + """ + if not success: + # pickle the inputs + with open(f"{node_name}_inputs.pkl", "wb") as f: + pickle.dump(node_kwargs, f) + # create a file with the function and the inputs + with open(f"{node_name}_debug.py", "w") as f: + f.write( + template.format( + module_name=node_tags.get("module"), + node_name=node_name, + function_to_debug=inspect.getsource(originating_function), + func_name=originating_function.__name__, + ) + ) + # print out where the data has been saved + message = ( + f"Inputs to {node_name} have been saved to {node_name}_inputs.pkl\n" + f"The function that errored has been saved to {node_name}_debug.py\n" + f"Please run the function in {node_name}_debug.py to debug the error." + ) + logger.warning(message) + # TODO: create file with python requirements for pickle to work... diff --git a/examples/notebook_debug/run.py b/examples/notebook_debug/run.py new file mode 100644 index 000000000..052520e52 --- /dev/null +++ b/examples/notebook_debug/run.py @@ -0,0 +1,8 @@ +if __name__ == "__main__": + import example_error + from notebook_debugger_plugin import NotebookErrorDebugger + + from hamilton import driver + + dr = driver.Builder().with_modules(example_error).with_adapters(NotebookErrorDebugger()).build() + dr.execute(["error_function"], inputs={"input": 4}) diff --git a/hamilton/lifecycle/api.py b/hamilton/lifecycle/api.py index f3fb137f7..adc0a6740 100644 --- a/hamilton/lifecycle/api.py +++ b/hamilton/lifecycle/api.py @@ -1,7 +1,18 @@ import abc from abc import ABC from types import ModuleType -from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Type, final +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Collection, + Dict, + List, + Optional, + Tuple, + Type, + final, +) from hamilton import graph_types, node @@ -224,6 +235,7 @@ def run_after_node_execution( success: bool, task_id: Optional[str], run_id: str, + originating_function: Callable, **future_kwargs: Any, ): """Hook that is executed post node execution. @@ -265,6 +277,7 @@ def post_node_execute( task_id=task_id, success=success, run_id=run_id, + originating_function=node_.originating_functions[0], )