From 481f184af34f833404b8dbac09bb7bdb848b3efc Mon Sep 17 00:00:00 2001 From: Xin Yang <105740670+xyang16@users.noreply.github.com> Date: Mon, 4 Nov 2024 09:26:48 -0800 Subject: [PATCH] [python] Add more OOM handling (#2515) --- engines/python/setup/djl_python_engine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/engines/python/setup/djl_python_engine.py b/engines/python/setup/djl_python_engine.py index e08a68a89..f11c9fa2d 100644 --- a/engines/python/setup/djl_python_engine.py +++ b/engines/python/setup/djl_python_engine.py @@ -166,8 +166,10 @@ def run_server(self): f"Invalid output type: {type(outputs)}") except Exception as e: logging.exception("Failed invoke service.invoke_handler()") - if type(e).__name__ == "OutOfMemoryError" or type( - e).__name__ == "MemoryError": + if (type(e).__name__ == "OutOfMemoryError" + or type(e).__name__ == "MemoryError" + or "No available memory for the cache blocks" in str(e) + or "CUDA error: out of memory" in str(e)): outputs = Output(code=507, message=str(e)) else: outputs = Output().error(str(e))