From d17bf4f6fae6f2c2cfa638447d0a0f97810d789f Mon Sep 17 00:00:00 2001 From: SanchoSamba <urh.prosenc@gmail.com> Date: Tue, 17 Dec 2024 11:25:26 +0100 Subject: [PATCH 1/3] Regex exception handling --- orangecontrib/text/widgets/owcorpusviewer.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py index ef18fdaef..7c172f4a3 100644 --- a/orangecontrib/text/widgets/owcorpusviewer.py +++ b/orangecontrib/text/widgets/owcorpusviewer.py @@ -23,6 +23,7 @@ QSizePolicy, QSplitter, QTableView, + QMessageBox, ) from Orange.data import Variable from Orange.data.domain import Domain, filter_visible @@ -35,6 +36,8 @@ from orangecanvas.gui.utils import disconnected from orangewidget.utils.listview import ListViewSearch +from PyQt5.QtWidgets import QMessageBox + from orangecontrib.text.corpus import Corpus HTML = """ @@ -133,7 +136,14 @@ def _count_matches(content: List[str], search_string: str, state: TaskState) -> """ matches = 0 if search_string: - regex = re.compile(search_string.strip("|"), re.IGNORECASE) + try: + regex = re.compile(search_string.strip("|"), re.IGNORECASE) + except re.error: + msg = QMessageBox() + msg.setIcon(QMessageBox.Critical) + msg.setText("Invalid regex") + msg.exec_() + return 0 for i, text in enumerate(content): matches += len(regex.findall(text)) state.set_progress_value((i + 1) / len(content) * 100) @@ -186,6 +196,10 @@ class DocumentsFilterProxyModel(QSortFilterProxyModel): __regex = None def set_filter_string(self, filter_string: str): + try: + re.compile(filter_string.strip("|"), re.IGNORECASE) + except re.error: + return self.__regex = re.compile(filter_string.strip("|"), re.IGNORECASE) self.invalidateFilter() From c24c4a5a84efb4bdaac9bcfdd8f2e33235a3ef9b Mon Sep 17 00:00:00 2001 From: SanchoSamba <urh.prosenc@gmail.com> Date: Mon, 20 Jan 2025 12:12:40 +0100 Subject: [PATCH 2/3] Error handled with widget --- orangecontrib/text/widgets/owcorpusviewer.py | 32 +++++++++---------- .../text/widgets/tests/test_owcorpusviewer.py | 14 ++++++++ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py index 7c172f4a3..e0f38d166 100644 --- a/orangecontrib/text/widgets/owcorpusviewer.py +++ b/orangecontrib/text/widgets/owcorpusviewer.py @@ -36,8 +36,6 @@ from orangecanvas.gui.utils import disconnected from orangewidget.utils.listview import ListViewSearch -from PyQt5.QtWidgets import QMessageBox - from orangecontrib.text.corpus import Corpus HTML = """ @@ -118,7 +116,7 @@ ) -def _count_matches(content: List[str], search_string: str, state: TaskState) -> int: +def _count_matches(content: List[str], regex: re.Pattern, state: TaskState) -> int: """ Count number of appears of any terms in search_string in content texts. @@ -135,18 +133,11 @@ def _count_matches(content: List[str], search_string: str, state: TaskState) -> Number of all matches of search_string in all texts in content list """ matches = 0 - if search_string: - try: - regex = re.compile(search_string.strip("|"), re.IGNORECASE) - except re.error: - msg = QMessageBox() - msg.setIcon(QMessageBox.Critical) - msg.setText("Invalid regex") - msg.exec_() - return 0 - for i, text in enumerate(content): - matches += len(regex.findall(text)) - state.set_progress_value((i + 1) / len(content) * 100) + if regex: + if regex.pattern: + for i, text in enumerate(content): + matches += len(regex.findall(text)) + state.set_progress_value((i + 1) / len(content) * 100) return matches @@ -327,6 +318,9 @@ class Outputs: class Warning(OWWidget.Warning): no_feats_search = Msg("No features included in search.") no_feats_display = Msg("No features selected for display.") + + class Error(OWWidget.Error): + invalid_regex = Msg("Invalid regular expression.") def __init__(self): super().__init__() @@ -601,6 +595,7 @@ def regenerate_docs(self) -> List[str]: return self.corpus.documents_from_features(self.search_features) def refresh_search(self): + self.Error.invalid_regex.clear() if self.corpus is not None: self.doc_list.model().set_filter_string(self.regexp_filter) if not self.selected_documents: @@ -608,10 +603,15 @@ def refresh_search(self): # select first element in the view in that case self.doc_list.setCurrentIndex(self.doc_list.model().index(0, 0)) self.update_info() + try: + self.compiled_regex = re.compile(self.regexp_filter.strip("|"), re.IGNORECASE) + except re.error: + self.Error.invalid_regex() + self.compiled_regex = None self.start( _count_matches, self.doc_list_model.get_filter_content(), - self.regexp_filter, + self.compiled_regex, ) self.show_docs() self.commit.deferred() diff --git a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py index 0abdcbbdc..68889ca75 100644 --- a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py +++ b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py @@ -140,6 +140,20 @@ def test_search(self): self.wait_until_finished() self.assertEqual(self.widget.n_matches, 0) + def test_invalid_regex(self): + # Error is shown when invalid regex is entered + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.widget.regexp_filter = "*" + self.widget.refresh_search() + self.process_events() + self.assertEqual(self.widget.n_matches, 0) + self.assertTrue(self.widget.Error.invalid_regex.is_shown()) + # Error is hidden when valid regex is entered + self.widget.regexp_filter = "graph" + self.widget.refresh_search() + self.process_events() + self.assertFalse(self.widget.Error.invalid_regex.is_shown()) + def test_highlighting(self): self.send_signal(self.widget.Inputs.corpus, self.corpus) # no intersection between filter and selection From 7d0629c76c5032ce5db5507987534351695695ec Mon Sep 17 00:00:00 2001 From: SanchoSamba <urh.prosenc@gmail.com> Date: Mon, 20 Jan 2025 13:39:03 +0100 Subject: [PATCH 3/3] restructure error handling --- orangecontrib/text/widgets/owcorpusviewer.py | 26 +++++++++++-------- .../text/widgets/tests/test_owcorpusviewer.py | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py index e0f38d166..775ddba67 100644 --- a/orangecontrib/text/widgets/owcorpusviewer.py +++ b/orangecontrib/text/widgets/owcorpusviewer.py @@ -133,11 +133,10 @@ def _count_matches(content: List[str], regex: re.Pattern, state: TaskState) -> i Number of all matches of search_string in all texts in content list """ matches = 0 - if regex: - if regex.pattern: - for i, text in enumerate(content): - matches += len(regex.findall(text)) - state.set_progress_value((i + 1) / len(content) * 100) + if regex.pattern: + for i, text in enumerate(content): + matches += len(regex.findall(text)) + state.set_progress_value((i + 1) / len(content) * 100) return matches @@ -605,14 +604,19 @@ def refresh_search(self): self.update_info() try: self.compiled_regex = re.compile(self.regexp_filter.strip("|"), re.IGNORECASE) + self.start( + _count_matches, + self.doc_list_model.get_filter_content(), + self.compiled_regex, + ) except re.error: self.Error.invalid_regex() - self.compiled_regex = None - self.start( - _count_matches, - self.doc_list_model.get_filter_content(), - self.compiled_regex, - ) + self.compiled_regex = None + self.n_matching = "n/a" + self.n_matches = "n/a" + self.n_tokens = "n/a" + self.n_types = "n/a" + self.show_docs() self.commit.deferred() diff --git a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py index 68889ca75..4b22c6365 100644 --- a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py +++ b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py @@ -146,7 +146,7 @@ def test_invalid_regex(self): self.widget.regexp_filter = "*" self.widget.refresh_search() self.process_events() - self.assertEqual(self.widget.n_matches, 0) + self.assertEqual(self.widget.n_matches, "n/a") self.assertTrue(self.widget.Error.invalid_regex.is_shown()) # Error is hidden when valid regex is entered self.widget.regexp_filter = "graph"