diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py index ef18fdaef..775ddba67 100644 --- a/orangecontrib/text/widgets/owcorpusviewer.py +++ b/orangecontrib/text/widgets/owcorpusviewer.py @@ -23,6 +23,7 @@ QSizePolicy, QSplitter, QTableView, + QMessageBox, ) from Orange.data import Variable from Orange.data.domain import Domain, filter_visible @@ -115,7 +116,7 @@ ) -def _count_matches(content: List[str], search_string: str, state: TaskState) -> int: +def _count_matches(content: List[str], regex: re.Pattern, state: TaskState) -> int: """ Count number of appears of any terms in search_string in content texts. @@ -132,8 +133,7 @@ def _count_matches(content: List[str], search_string: str, state: TaskState) -> Number of all matches of search_string in all texts in content list """ matches = 0 - if search_string: - regex = re.compile(search_string.strip("|"), re.IGNORECASE) + if regex.pattern: for i, text in enumerate(content): matches += len(regex.findall(text)) state.set_progress_value((i + 1) / len(content) * 100) @@ -186,6 +186,10 @@ class DocumentsFilterProxyModel(QSortFilterProxyModel): __regex = None def set_filter_string(self, filter_string: str): + try: + re.compile(filter_string.strip("|"), re.IGNORECASE) + except re.error: + return self.__regex = re.compile(filter_string.strip("|"), re.IGNORECASE) self.invalidateFilter() @@ -313,6 +317,9 @@ class Outputs: class Warning(OWWidget.Warning): no_feats_search = Msg("No features included in search.") no_feats_display = Msg("No features selected for display.") + + class Error(OWWidget.Error): + invalid_regex = Msg("Invalid regular expression.") def __init__(self): super().__init__() @@ -587,6 +594,7 @@ def regenerate_docs(self) -> List[str]: return self.corpus.documents_from_features(self.search_features) def refresh_search(self): + self.Error.invalid_regex.clear() if self.corpus is not None: self.doc_list.model().set_filter_string(self.regexp_filter) if not self.selected_documents: @@ -594,11 +602,21 @@ def refresh_search(self): # select first element in the view in that case self.doc_list.setCurrentIndex(self.doc_list.model().index(0, 0)) self.update_info() - self.start( - _count_matches, - self.doc_list_model.get_filter_content(), - self.regexp_filter, - ) + try: + self.compiled_regex = re.compile(self.regexp_filter.strip("|"), re.IGNORECASE) + self.start( + _count_matches, + self.doc_list_model.get_filter_content(), + self.compiled_regex, + ) + except re.error: + self.Error.invalid_regex() + self.compiled_regex = None + self.n_matching = "n/a" + self.n_matches = "n/a" + self.n_tokens = "n/a" + self.n_types = "n/a" + self.show_docs() self.commit.deferred() diff --git a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py index 0abdcbbdc..4b22c6365 100644 --- a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py +++ b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py @@ -140,6 +140,20 @@ def test_search(self): self.wait_until_finished() self.assertEqual(self.widget.n_matches, 0) + def test_invalid_regex(self): + # Error is shown when invalid regex is entered + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.widget.regexp_filter = "*" + self.widget.refresh_search() + self.process_events() + self.assertEqual(self.widget.n_matches, "n/a") + self.assertTrue(self.widget.Error.invalid_regex.is_shown()) + # Error is hidden when valid regex is entered + self.widget.regexp_filter = "graph" + self.widget.refresh_search() + self.process_events() + self.assertFalse(self.widget.Error.invalid_regex.is_shown()) + def test_highlighting(self): self.send_signal(self.widget.Inputs.corpus, self.corpus) # no intersection between filter and selection