From d17bf4f6fae6f2c2cfa638447d0a0f97810d789f Mon Sep 17 00:00:00 2001
From: SanchoSamba <urh.prosenc@gmail.com>
Date: Tue, 17 Dec 2024 11:25:26 +0100
Subject: [PATCH 1/3] Regex exception handling

---
 orangecontrib/text/widgets/owcorpusviewer.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py
index ef18fdaef..7c172f4a3 100644
--- a/orangecontrib/text/widgets/owcorpusviewer.py
+++ b/orangecontrib/text/widgets/owcorpusviewer.py
@@ -23,6 +23,7 @@
     QSizePolicy,
     QSplitter,
     QTableView,
+    QMessageBox,
 )
 from Orange.data import Variable
 from Orange.data.domain import Domain, filter_visible
@@ -35,6 +36,8 @@
 from orangecanvas.gui.utils import disconnected
 from orangewidget.utils.listview import ListViewSearch
 
+from PyQt5.QtWidgets import QMessageBox
+
 from orangecontrib.text.corpus import Corpus
 
 HTML = """
@@ -133,7 +136,14 @@ def _count_matches(content: List[str], search_string: str, state: TaskState) ->
     """
     matches = 0
     if search_string:
-        regex = re.compile(search_string.strip("|"), re.IGNORECASE)
+        try:
+            regex = re.compile(search_string.strip("|"), re.IGNORECASE)
+        except re.error:
+            msg = QMessageBox()
+            msg.setIcon(QMessageBox.Critical)
+            msg.setText("Invalid regex")
+            msg.exec_()
+            return 0
         for i, text in enumerate(content):
             matches += len(regex.findall(text))
             state.set_progress_value((i + 1) / len(content) * 100)
@@ -186,6 +196,10 @@ class DocumentsFilterProxyModel(QSortFilterProxyModel):
     __regex = None
 
     def set_filter_string(self, filter_string: str):
+        try:
+            re.compile(filter_string.strip("|"), re.IGNORECASE)
+        except re.error:
+            return
         self.__regex = re.compile(filter_string.strip("|"), re.IGNORECASE)
         self.invalidateFilter()
 

From c24c4a5a84efb4bdaac9bcfdd8f2e33235a3ef9b Mon Sep 17 00:00:00 2001
From: SanchoSamba <urh.prosenc@gmail.com>
Date: Mon, 20 Jan 2025 12:12:40 +0100
Subject: [PATCH 2/3] Error handled with widget

---
 orangecontrib/text/widgets/owcorpusviewer.py  | 32 +++++++++----------
 .../text/widgets/tests/test_owcorpusviewer.py | 14 ++++++++
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py
index 7c172f4a3..e0f38d166 100644
--- a/orangecontrib/text/widgets/owcorpusviewer.py
+++ b/orangecontrib/text/widgets/owcorpusviewer.py
@@ -36,8 +36,6 @@
 from orangecanvas.gui.utils import disconnected
 from orangewidget.utils.listview import ListViewSearch
 
-from PyQt5.QtWidgets import QMessageBox
-
 from orangecontrib.text.corpus import Corpus
 
 HTML = """
@@ -118,7 +116,7 @@
 )
 
 
-def _count_matches(content: List[str], search_string: str, state: TaskState) -> int:
+def _count_matches(content: List[str], regex: re.Pattern, state: TaskState) -> int:
     """
     Count number of appears of any terms in search_string in content texts.
 
@@ -135,18 +133,11 @@ def _count_matches(content: List[str], search_string: str, state: TaskState) ->
     Number of all matches of search_string in all texts in content list
     """
     matches = 0
-    if search_string:
-        try:
-            regex = re.compile(search_string.strip("|"), re.IGNORECASE)
-        except re.error:
-            msg = QMessageBox()
-            msg.setIcon(QMessageBox.Critical)
-            msg.setText("Invalid regex")
-            msg.exec_()
-            return 0
-        for i, text in enumerate(content):
-            matches += len(regex.findall(text))
-            state.set_progress_value((i + 1) / len(content) * 100)
+    if regex:
+        if regex.pattern:
+            for i, text in enumerate(content):
+                matches += len(regex.findall(text))
+                state.set_progress_value((i + 1) / len(content) * 100)
     return matches
 
 
@@ -327,6 +318,9 @@ class Outputs:
     class Warning(OWWidget.Warning):
         no_feats_search = Msg("No features included in search.")
         no_feats_display = Msg("No features selected for display.")
+        
+    class Error(OWWidget.Error):
+        invalid_regex = Msg("Invalid regular expression.")
 
     def __init__(self):
         super().__init__()
@@ -601,6 +595,7 @@ def regenerate_docs(self) -> List[str]:
         return self.corpus.documents_from_features(self.search_features)
 
     def refresh_search(self):
+        self.Error.invalid_regex.clear()
         if self.corpus is not None:
             self.doc_list.model().set_filter_string(self.regexp_filter)
             if not self.selected_documents:
@@ -608,10 +603,15 @@ def refresh_search(self):
                 # select first element in the view in that case
                 self.doc_list.setCurrentIndex(self.doc_list.model().index(0, 0))
             self.update_info()
+            try:
+                self.compiled_regex = re.compile(self.regexp_filter.strip("|"), re.IGNORECASE)
+            except re.error:
+                self.Error.invalid_regex()
+                self.compiled_regex = None
             self.start(
                 _count_matches,
                 self.doc_list_model.get_filter_content(),
-                self.regexp_filter,
+                self.compiled_regex,
             )
             self.show_docs()
             self.commit.deferred()
diff --git a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py
index 0abdcbbdc..68889ca75 100644
--- a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py
+++ b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py
@@ -140,6 +140,20 @@ def test_search(self):
         self.wait_until_finished()
         self.assertEqual(self.widget.n_matches, 0)
 
+    def test_invalid_regex(self):
+        # Error is shown when invalid regex is entered
+        self.send_signal(self.widget.Inputs.corpus, self.corpus)
+        self.widget.regexp_filter = "*"
+        self.widget.refresh_search()
+        self.process_events()
+        self.assertEqual(self.widget.n_matches, 0)
+        self.assertTrue(self.widget.Error.invalid_regex.is_shown())
+        # Error is hidden when valid regex is entered
+        self.widget.regexp_filter = "graph"
+        self.widget.refresh_search()
+        self.process_events()
+        self.assertFalse(self.widget.Error.invalid_regex.is_shown())      
+   
     def test_highlighting(self):
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
         # no intersection between filter and selection

From 7d0629c76c5032ce5db5507987534351695695ec Mon Sep 17 00:00:00 2001
From: SanchoSamba <urh.prosenc@gmail.com>
Date: Mon, 20 Jan 2025 13:39:03 +0100
Subject: [PATCH 3/3] restructure error handling

---
 orangecontrib/text/widgets/owcorpusviewer.py  | 26 +++++++++++--------
 .../text/widgets/tests/test_owcorpusviewer.py |  2 +-
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py
index e0f38d166..775ddba67 100644
--- a/orangecontrib/text/widgets/owcorpusviewer.py
+++ b/orangecontrib/text/widgets/owcorpusviewer.py
@@ -133,11 +133,10 @@ def _count_matches(content: List[str], regex: re.Pattern, state: TaskState) -> i
     Number of all matches of search_string in all texts in content list
     """
     matches = 0
-    if regex:
-        if regex.pattern:
-            for i, text in enumerate(content):
-                matches += len(regex.findall(text))
-                state.set_progress_value((i + 1) / len(content) * 100)
+    if regex.pattern:
+        for i, text in enumerate(content):
+            matches += len(regex.findall(text))
+            state.set_progress_value((i + 1) / len(content) * 100)
     return matches
 
 
@@ -605,14 +604,19 @@ def refresh_search(self):
             self.update_info()
             try:
                 self.compiled_regex = re.compile(self.regexp_filter.strip("|"), re.IGNORECASE)
+                self.start(
+                    _count_matches,
+                    self.doc_list_model.get_filter_content(),
+                    self.compiled_regex,
+                )
             except re.error:
                 self.Error.invalid_regex()
-                self.compiled_regex = None
-            self.start(
-                _count_matches,
-                self.doc_list_model.get_filter_content(),
-                self.compiled_regex,
-            )
+                self.compiled_regex = None 
+                self.n_matching = "n/a"
+                self.n_matches = "n/a"
+                self.n_tokens = "n/a"
+                self.n_types = "n/a"
+                
             self.show_docs()
             self.commit.deferred()
 
diff --git a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py
index 68889ca75..4b22c6365 100644
--- a/orangecontrib/text/widgets/tests/test_owcorpusviewer.py
+++ b/orangecontrib/text/widgets/tests/test_owcorpusviewer.py
@@ -146,7 +146,7 @@ def test_invalid_regex(self):
         self.widget.regexp_filter = "*"
         self.widget.refresh_search()
         self.process_events()
-        self.assertEqual(self.widget.n_matches, 0)
+        self.assertEqual(self.widget.n_matches, "n/a")
         self.assertTrue(self.widget.Error.invalid_regex.is_shown())
         # Error is hidden when valid regex is entered
         self.widget.regexp_filter = "graph"