From af5acd6b23cbcae45853cb5efc0ebc48e5fe9a95 Mon Sep 17 00:00:00 2001 From: tonykao8080 <36019416+tonykao8080@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:22:41 -0400 Subject: [PATCH] torchx - add exception_type, exception_message, and exception_source_location to torchx event (#966) (#966) Summary: Add exception type, exception message, exception source location to torchx event. This allows for better logging of exception details for further analysis. Differential Revision: D64406552 Co-authored-by: Tony Kao --- torchx/runner/events/__init__.py | 16 ++++++++++++++++ torchx/runner/events/api.py | 3 +++ 2 files changed, 19 insertions(+) diff --git a/torchx/runner/events/__init__.py b/torchx/runner/events/__init__.py index c8eb89d96..8fab92a10 100644 --- a/torchx/runner/events/__init__.py +++ b/torchx/runner/events/__init__.py @@ -20,7 +20,9 @@ """ +import json import logging +import sys import time import traceback from types import TracebackType @@ -123,6 +125,20 @@ def __exit__( ) // 1000 if traceback_type: self._torchx_event.raw_exception = traceback.format_exc() + typ, value, tb = sys.exc_info() + if tb: + last_frame = traceback.extract_tb(tb)[-1] + self._torchx_event.exception_source_location = json.dumps( + { + "filename": last_frame.filename, + "lineno": last_frame.lineno, + "name": last_frame.name, + } + ) + if exec_type: + self._torchx_event.exception_type = exec_type.__name__ + if exec_value: + self._torchx_event.exception_message = str(exec_value) record(self._torchx_event) def _generate_torchx_event( diff --git a/torchx/runner/events/api.py b/torchx/runner/events/api.py index 355c03f6c..f03815e75 100644 --- a/torchx/runner/events/api.py +++ b/torchx/runner/events/api.py @@ -52,6 +52,9 @@ class TorchxEvent: wall_time_usec: Optional[int] = None start_epoch_time_usec: Optional[int] = None workspace: Optional[str] = None + exception_type: Optional[str] = None + exception_message: Optional[str] = None + exception_source_location: Optional[str] = None def __str__(self) -> str: return self.serialize()