From 8b857f0dcbd13c165fc161461caf9bb6f5f3c60b Mon Sep 17 00:00:00 2001 From: Nikolai Petukhov Date: Thu, 16 May 2024 11:39:51 -0300 Subject: [PATCH] cp1252 decode logs support --- agent/worker/agent.py | 2 +- agent/worker/task_app.py | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/agent/worker/agent.py b/agent/worker/agent.py index 7fdde64..b162f40 100644 --- a/agent/worker/agent.py +++ b/agent/worker/agent.py @@ -41,7 +41,7 @@ ) from worker.app_file_streamer import AppFileStreamer from worker.telemetry_reporter import TelemetryReporter -from supervisely_lib._utils import _remove_sensitive_information # pylint: disable=import-error, no-name-in-module +from supervisely_lib._utils import _remove_sensitive_information # pylint: disable=import-error, no-name-in-module class Agent: diff --git a/agent/worker/task_app.py b/agent/worker/task_app.py index 82ae356..3f6ca8e 100644 --- a/agent/worker/task_app.py +++ b/agent/worker/task_app.py @@ -308,8 +308,7 @@ def clean_task_dir(self): super().clean_task_dir() tmp_data_dir = os.path.join( - constants.SUPERVISELY_AGENT_FILES_CONTAINER(), - "app_tmp_data", str(self.info["task_id"]) + constants.SUPERVISELY_AGENT_FILES_CONTAINER(), "app_tmp_data", str(self.info["task_id"]) ) if sly.fs.dir_exists(tmp_data_dir): @@ -409,9 +408,7 @@ def _get_task_volumes(self): useTmpFromFiles = self.info.get("useTmpFromFiles", False) if useTmpFromFiles is True: - relative_app_tmp_data_dir = os.path.join( - "app_tmp_data", str(self.info["task_id"]) - ) + relative_app_tmp_data_dir = os.path.join("app_tmp_data", str(self.info["task_id"])) host_tmp_data_dir = os.path.join( constants.SUPERVISELY_AGENT_FILES(), @@ -806,11 +803,25 @@ def _process_line(log_line): if lvl_int != -1: self.logger.log(lvl_int, msg, extra=res_log) + def _decode(bytes: bytes): + decode_args = [ + ("utf-8", "strict"), + ("cp1252", "strict"), + ("utf-8", "replace"), + ] + for args in decode_args: + try: + return bytes.decode(*args) + except UnicodeDecodeError: + continue + # if all decodings failed, return the first one to raise error + return bytes.decode(*decode_args[0]) + # @TODO: parse multiline logs correctly (including exceptions) log_line = "" for log_line_arr in self._logs_output: - for log_part in log_line_arr.decode("utf-8").splitlines(): + for log_part in _decode(log_line_arr).splitlines(): logs_found = True _process_line(log_part)