From c9aa59d87c654d72267c0295cf46faed683e0446 Mon Sep 17 00:00:00 2001 From: Mathias Kuhring Date: Mon, 3 May 2021 11:02:40 +0200 Subject: [PATCH 1/2] Add exception for invalid encodings --- altamisa/isatab/parse_assay_study.py | 8 ++++++++ altamisa/isatab/parse_investigation.py | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/altamisa/isatab/parse_assay_study.py b/altamisa/isatab/parse_assay_study.py index bae41b8..e777375 100644 --- a/altamisa/isatab/parse_assay_study.py +++ b/altamisa/isatab/parse_assay_study.py @@ -768,6 +768,10 @@ def _read_next_line(self): self.unique_rows.add("\t".join(self._line)) except StopIteration: self._line = None + except UnicodeDecodeError as e: # pragma: no cover + tpl = "Invalid encoding after line {} of study file '{}' (use Unicode/UTF-8)." + msg = tpl.format(self._reader.line_num, self.filename) + raise ParseIsatabException(msg) from e return prev_line def read(self): @@ -888,6 +892,10 @@ def _read_next_line(self): self.unique_rows.add("\t".join(self._line)) except StopIteration: self._line = None + except UnicodeDecodeError as e: # pragma: no cover + tpl = "Invalid encoding after line {} of assay file '{}' (use Unicode/UTF-8)." + msg = tpl.format(self._reader.line_num, self.filename) + raise ParseIsatabException(msg) from e return prev_line def read(self): diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 8ddba3f..3d9a8ab 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -132,6 +132,10 @@ def _read_next_line(self): self._line = list_strip(next(self._reader)) except StopIteration: self._line = None + except UnicodeDecodeError as e: # pragma: no cover + tpl = "Invalid encoding after line {} of investigation file '{}' (use Unicode/UTF-8)." + msg = tpl.format(self._reader.line_num, self._filename) + raise ParseIsatabException(msg) from e return prev_line def _next_line_startswith_comment(self): From 811d254a440bfceb2b7101b6d06a366379fc3bc1 Mon Sep 17 00:00:00 2001 From: Thomas Sell Date: Thu, 2 Nov 2023 11:56:45 +0100 Subject: [PATCH 2/2] do not include line no in the error msg - it doesn't work - 'invalid encoding after line x' makes no sense - line no with invalid chars is already reported by the csv reader exception --- altamisa/isatab/parse_assay_study.py | 6 ++---- altamisa/isatab/parse_investigation.py | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/altamisa/isatab/parse_assay_study.py b/altamisa/isatab/parse_assay_study.py index 0a9c446..a49ac97 100644 --- a/altamisa/isatab/parse_assay_study.py +++ b/altamisa/isatab/parse_assay_study.py @@ -769,8 +769,7 @@ def _read_next_line(self): except StopIteration: self._line = None except UnicodeDecodeError as e: # pragma: no cover - tpl = "Invalid encoding after line {} of study file '{}' (use Unicode/UTF-8)." - msg = tpl.format(self._reader.line_num, self.filename) + msg = f"Invalid encoding of study file '{self._filename}' (use Unicode/UTF-8)." raise ParseIsatabException(msg) from e return prev_line @@ -893,8 +892,7 @@ def _read_next_line(self): except StopIteration: self._line = None except UnicodeDecodeError as e: # pragma: no cover - tpl = "Invalid encoding after line {} of assay file '{}' (use Unicode/UTF-8)." - msg = tpl.format(self._reader.line_num, self.filename) + msg = f"Invalid encoding of assay file '{self._filename}' (use Unicode/UTF-8)." raise ParseIsatabException(msg) from e return prev_line diff --git a/altamisa/isatab/parse_investigation.py b/altamisa/isatab/parse_investigation.py index 2db728e..a365cbf 100644 --- a/altamisa/isatab/parse_investigation.py +++ b/altamisa/isatab/parse_investigation.py @@ -133,8 +133,7 @@ def _read_next_line(self): except StopIteration: self._line = None except UnicodeDecodeError as e: # pragma: no cover - tpl = "Invalid encoding after line {} of investigation file '{}' (use Unicode/UTF-8)." - msg = tpl.format(self._reader.line_num, self._filename) + msg = f"Invalid encoding of investigation file '{self._filename}' (use Unicode/UTF-8)." raise ParseIsatabException(msg) from e return prev_line