From 1e2eb6e3383b9c9329d81cdf99c9b7f6460f13d7 Mon Sep 17 00:00:00 2001
From: Kristoffer Andersson <kristoffer.andersson@gu.se>
Date: Tue, 22 Oct 2024 14:42:23 +0200
Subject: [PATCH] refactor: fix type errors

---
 src/parallel_corpus/graph.py                  | 34 ++++++-------
 src/parallel_corpus/shared/dicts.py           |  2 +-
 .../{token.py => text_token.py}               |  0
 tests/test_graph.py                           | 50 +++++++++----------
 tests/test_shared/test_lists.py               |  5 +-
 tests/test_token.py                           |  2 +-
 6 files changed, 47 insertions(+), 46 deletions(-)
 rename src/parallel_corpus/{token.py => text_token.py} (100%)

diff --git a/src/parallel_corpus/graph.py b/src/parallel_corpus/graph.py
index f083907..bbdd326 100644
--- a/src/parallel_corpus/graph.py
+++ b/src/parallel_corpus/graph.py
@@ -9,11 +9,11 @@
 import parallel_corpus.shared.ranges
 import parallel_corpus.shared.str_map
 import parallel_corpus.shared.union_find
-from parallel_corpus import shared, token
+from parallel_corpus import shared, text_token
 from parallel_corpus.shared import dicts, diffs, ids, lists
 from parallel_corpus.shared.unique_check import UniqueCheck
 from parallel_corpus.source_target import Side, SourceTarget, map_sides
-from parallel_corpus.token import Token
+from parallel_corpus.text_token import Token
 
 A = TypeVar("A")
 B = TypeVar("B")
@@ -84,20 +84,20 @@ def edge_record(es: Iterable[Edge]) -> Dict[str, Edge]:  # noqa: D103
 
 
 def init(s: str, *, manual: bool = False) -> Graph:  # noqa: D103
-    return init_from(token.tokenize(s), manual=manual)
+    return init_from(text_token.tokenize(s), manual=manual)
 
 
 def init_with_source_and_target(source: str, target: str, *, manual: bool = False) -> Graph:  # noqa: D103
     return init_from_source_and_target(
-        source=token.tokenize(source), target=token.tokenize(target), manual=manual
+        source=text_token.tokenize(source), target=text_token.tokenize(target), manual=manual
     )
 
 
 def init_from(tokens: List[str], *, manual: bool = False) -> Graph:  # noqa: D103
     return align(
         Graph(
-            source=token.identify(tokens, "s"),
-            target=token.identify(tokens, "t"),
+            source=text_token.identify(tokens, "s"),
+            target=text_token.identify(tokens, "t"),
             edges=edge_record(
                 (edge([f"s{i}", f"t{i}"], [], manual=manual) for i, _ in enumerate(tokens))
             ),
@@ -108,8 +108,8 @@ def init_from(tokens: List[str], *, manual: bool = False) -> Graph:  # noqa: D10
 def init_from_source_and_target(  # noqa: D103
     source: List[str], target: List[str], *, manual: bool = False
 ) -> Graph:
-    source_tokens = token.identify(source, "s")
-    target_tokens = token.identify(target, "t")
+    source_tokens = text_token.identify(source, "s")
+    target_tokens = text_token.identify(target, "t")
     return align(
         Graph(
             source=source_tokens,
@@ -204,7 +204,7 @@ def align(g: Graph) -> Graph:  # noqa: D103
             c.a is not None and c.b is not None and c.a.id is not None and c.b.id is not None
         ):
             uf.union(c.a.id, c.b.id)
-    proto_edges = {k: e for k, e in g.edges.items() if e.manual}
+    proto_edges: Dict[str, Edge] = {k: e for k, e in g.edges.items() if e.manual}
     first: UniqueCheck[str] = UniqueCheck()
 
     def update_edges(tokens, _side) -> None:  # noqa: ANN001
@@ -214,7 +214,7 @@ def update_edges(tokens, _side) -> None:  # noqa: ANN001
                 labels = e_repr.labels if first(e_repr.id) else []
                 e_token = edge([tok.id], labels, manual=False, comment=e_repr.comment)
                 dicts.modify(
-                    proto_edges,
+                    proto_edges,  # type: ignore[misc]
                     uf.find(tok.id),
                     zero_edge,
                     lambda e: merge_edges(e, e_token),  # noqa: B023
@@ -229,8 +229,8 @@ def rearrange(g: Graph, begin: int, end: int, dest: int) -> Graph:  # noqa: D103
     return align(unaligned_rearrange(g, begin, end, dest))
 
 
-def target_text(g: SourceTarget[List[token.Text]]) -> str:  # noqa: D103
-    return token.text(g.target)
+def target_text(g: SourceTarget[List[text_token.Text]]) -> str:  # noqa: D103
+    return text_token.text(g.target)
 
 
 @dataclass
@@ -314,23 +314,23 @@ def unaligned_modify(
     Indexes are character offsets (use CodeMirror's doc.posFromIndex and doc.indexFromPos to convert)
     """  # noqa: E501
     tokens = get_side_texts(g, side)
-    token_at = token.token_at(tokens, from_)
+    token_at = text_token.token_at(tokens, from_)
     from_token, from_ix = token_at["token"], token_at["offset"]
     pre = (tokens[from_token] if from_token < len(tokens) else "")[:from_ix]
     if to == len(get_side_text(g, side)):
         return unaligned_modify_tokens(g, from_token, len(g.get_side(side)), pre + text, side)
-    to_token_at = token.token_at(tokens, to)
+    to_token_at = text_token.token_at(tokens, to)
     to_token, to_ix = to_token_at["token"], to_token_at["offset"]
     post = (tokens[to_token] or "")[to_ix:]
     return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side)
 
 
 def get_side_text(g: Graph, side: Side) -> str:  # noqa: D103
-    return token.text(g.get_side(side))
+    return text_token.text(g.get_side(side))
 
 
 def get_side_texts(g: Graph, side: Side) -> List[str]:  # noqa: D103
-    return token.texts(g.get_side(side))
+    return text_token.texts(g.get_side(side))
 
 
 def unaligned_modify_tokens(
@@ -402,7 +402,7 @@ def unaligned_modify_tokens(
     id_offset = next_id(g)
 
     tokens = [
-        Token(t, f"{side[0]}{(id_offset + i)}") for i, t in enumerate(token.tokenize(text))
+        Token(t, f"{side[0]}{(id_offset + i)}") for i, t in enumerate(text_token.tokenize(text))
     ]
 
     new_tokens, removed = lists.splice(g.get_side(side), from_, to - from_, *tokens)
diff --git a/src/parallel_corpus/shared/dicts.py b/src/parallel_corpus/shared/dicts.py
index 7c2c07b..7041264 100644
--- a/src/parallel_corpus/shared/dicts.py
+++ b/src/parallel_corpus/shared/dicts.py
@@ -5,7 +5,7 @@
 if TYPE_CHECKING:
     from _typeshed import SupportsRichComparison
 
-    K = TypeVar("K", bound=SupportsRichComparison)
+    K = TypeVar("K", bound=SupportsRichComparison)  # type: ignore [syntax]
 else:
     K = TypeVar("K")
 
diff --git a/src/parallel_corpus/token.py b/src/parallel_corpus/text_token.py
similarity index 100%
rename from src/parallel_corpus/token.py
rename to src/parallel_corpus/text_token.py
diff --git a/tests/test_graph.py b/tests/test_graph.py
index 06a8b81..900b8c0 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -2,14 +2,14 @@
 
 import pytest
 
-from parallel_corpus import graph, token
+from parallel_corpus import graph, text_token
 from parallel_corpus.source_target import Side, SourceTarget
 
 
 def test_graph_init() -> None:
     g = graph.init("w1 w2")
-    source = [token.Token(text="w1 ", id="s0"), token.Token(text="w2 ", id="s1")]
-    target = [token.Token(text="w1 ", id="t0"), token.Token(text="w2 ", id="t1")]
+    source = [text_token.Token(text="w1 ", id="s0"), text_token.Token(text="w2 ", id="s1")]
+    target = [text_token.Token(text="w1 ", id="t0"), text_token.Token(text="w2 ", id="t1")]
     edges = graph.edge_record([graph.edge(["s0", "t0"], []), graph.edge(["s1", "t1"], [])])
 
     assert g.source == source
@@ -24,8 +24,8 @@ def test_init_from_source_and_target_1() -> None:
 
 def test_init_from_source_and_target_2() -> None:
     g = graph.init_with_source_and_target(source="apa bepa", target="apa")
-    expected_source = token.identify(token.tokenize("apa bepa"), "s")
-    expected_target = token.identify(token.tokenize("apa"), "t")
+    expected_source = text_token.identify(text_token.tokenize("apa bepa"), "s")
+    expected_target = text_token.identify(text_token.tokenize("apa"), "t")
     g_expected = graph.Graph(
         source=expected_source,
         target=expected_target,
@@ -36,8 +36,8 @@ def test_init_from_source_and_target_2() -> None:
 
 def test_init_from_source_and_target_3() -> None:
     g = graph.init_with_source_and_target(source="apa", target="bepa apa")
-    expected_source = token.identify(token.tokenize("apa"), "s")
-    expected_target = token.identify(token.tokenize("bepa apa"), "t")
+    expected_source = text_token.identify(text_token.tokenize("apa"), "s")
+    expected_target = text_token.identify(text_token.tokenize("bepa apa"), "t")
     g_expected = graph.Graph(
         source=expected_source,
         target=expected_target,
@@ -95,19 +95,19 @@ def test_unaligned_set_side() -> None:
     print("<<< test_unaligned_set_side")
 
     expected_source = [
-        token.Token(id="s0", text="a "),
-        token.Token(id="s1", text="bc "),
-        token.Token(id="s2", text="d "),
+        text_token.Token(id="s0", text="a "),
+        text_token.Token(id="s1", text="bc "),
+        text_token.Token(id="s2", text="d "),
     ]
     expected_g0_target = [
-        token.Token(id="t0", text="a "),
-        token.Token(id="t1", text="bc "),
-        token.Token(id="t2", text="d "),
+        text_token.Token(id="t0", text="a "),
+        text_token.Token(id="t1", text="bc "),
+        text_token.Token(id="t2", text="d "),
     ]
     expected_g_target = [
-        token.Token(id="t3", text="ab "),
-        token.Token(id="t4", text="c "),
-        token.Token(id="t5", text="d "),
+        text_token.Token(id="t3", text="ab "),
+        text_token.Token(id="t4", text="c "),
+        text_token.Token(id="t5", text="d "),
     ]
     expected_g_edges = {
         "e-s0-s1-s2-t3-t4-t5": graph.Edge(
@@ -131,19 +131,19 @@ def test_graph_align() -> None:
     g = graph.unaligned_set_side(g0, Side.target, "ab c d")
 
     expected_source = [
-        token.Token(id="s0", text="a "),
-        token.Token(id="s1", text="bc "),
-        token.Token(id="s2", text="d "),
+        text_token.Token(id="s0", text="a "),
+        text_token.Token(id="s1", text="bc "),
+        text_token.Token(id="s2", text="d "),
     ]
     expected_g0_target = [
-        token.Token(id="t0", text="a "),
-        token.Token(id="t1", text="bc "),
-        token.Token(id="t2", text="d "),
+        text_token.Token(id="t0", text="a "),
+        text_token.Token(id="t1", text="bc "),
+        text_token.Token(id="t2", text="d "),
     ]
     expected_g_target = [
-        token.Token(id="t3", text="ab "),
-        token.Token(id="t4", text="c "),
-        token.Token(id="t5", text="d "),
+        text_token.Token(id="t3", text="ab "),
+        text_token.Token(id="t4", text="c "),
+        text_token.Token(id="t5", text="d "),
     ]
     expected_g_edges = {
         "e-s0-s1-s2-t3-t4-t5": graph.Edge(
diff --git a/tests/test_shared/test_lists.py b/tests/test_shared/test_lists.py
index 797c2f4..1720aa6 100644
--- a/tests/test_shared/test_lists.py
+++ b/tests/test_shared/test_lists.py
@@ -2,14 +2,15 @@
 
 
 def test_splice_1() -> None:
-    (*s_chars,) = "abcdef"
+    s_chars = ["a", "b", "c", "d", "e", "f"]
     ex, rm = lists.splice(s_chars, 3, 1, " ", "_")
     assert "".join(ex) == "abc _ef"
     assert "".join(rm) == "d"
 
 
 def test_splice_2() -> None:
-    (*s_chars,) = "abcdef"
+    s_chars = ["a", "b", "c", "d", "e", "f"]
+
     (ex, rm) = lists.splice(s_chars, 3, 2, " ", "_")
     assert "".join(ex) == "abc _f"
     assert "".join(rm) == "de"
diff --git a/tests/test_token.py b/tests/test_token.py
index 1d9dd72..2479ffa 100644
--- a/tests/test_token.py
+++ b/tests/test_token.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from parallel_corpus.token import Token, identify, tokenize
+from parallel_corpus.text_token import Token, identify, tokenize
 
 
 def test_can_create_token() -> None: