From df6e14cb89839b144b44516ef387f76c10397420 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Tue, 17 Dec 2024 11:00:03 +0000 Subject: [PATCH] misc: Support more types of errors --- devito/operator/operator.py | 12 ++++++++++++ devito/passes/iet/errors.py | 2 ++ 2 files changed, 14 insertions(+) diff --git a/devito/operator/operator.py b/devito/operator/operator.py index edf737a79a..703353826b 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -696,6 +696,18 @@ def _postprocess_errors(self, retval): raise ExecutionError("Detected nan/inf in some output Functions") elif retval == error_mapper['KernelLaunch']: raise ExecutionError("Kernel launch failed") + elif retval == error_mapper['KernelLaunchOutOfResources']: + raise ExecutionError( + "Kernel launch failed due to insufficient resources. This may be " + "due to excessive register pressure in one of the Operator " + "kernels. Try supplying a smaller `par-tile` value." + ) + elif retval == error_mapper['KernelLaunchUnknown']: + raise ExecutionError( + "Kernel launch failed due to an unknown error. This might " + "simply indicate memory corruption, but also, in a more unlikely " + "case, a hardware issue. Please report this issue to the " + "Devito team.") else: raise ExecutionError("An error occurred during execution") diff --git a/devito/passes/iet/errors.py b/devito/passes/iet/errors.py index 6d8f60526a..13f1101a3f 100644 --- a/devito/passes/iet/errors.py +++ b/devito/passes/iet/errors.py @@ -108,4 +108,6 @@ class Retval(LocalObject, Expr): error_mapper = { 'Stability': 100, 'KernelLaunch': 200, + 'KernelLaunchOutOfResources': 201, + 'KernelLaunchUnknown': 202, }