Skip to content

Commit

Permalink
refactor: fix type errors
Browse files Browse the repository at this point in the history
  • Loading branch information
kod-kristoff committed Oct 22, 2024
1 parent 056b7d6 commit 1e2eb6e
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 46 deletions.
34 changes: 17 additions & 17 deletions src/parallel_corpus/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
import parallel_corpus.shared.ranges
import parallel_corpus.shared.str_map
import parallel_corpus.shared.union_find
from parallel_corpus import shared, token
from parallel_corpus import shared, text_token
from parallel_corpus.shared import dicts, diffs, ids, lists
from parallel_corpus.shared.unique_check import UniqueCheck
from parallel_corpus.source_target import Side, SourceTarget, map_sides
from parallel_corpus.token import Token
from parallel_corpus.text_token import Token

A = TypeVar("A")
B = TypeVar("B")
Expand Down Expand Up @@ -84,20 +84,20 @@ def edge_record(es: Iterable[Edge]) -> Dict[str, Edge]: # noqa: D103


def init(s: str, *, manual: bool = False) -> Graph: # noqa: D103
return init_from(token.tokenize(s), manual=manual)
return init_from(text_token.tokenize(s), manual=manual)


def init_with_source_and_target(source: str, target: str, *, manual: bool = False) -> Graph: # noqa: D103
return init_from_source_and_target(
source=token.tokenize(source), target=token.tokenize(target), manual=manual
source=text_token.tokenize(source), target=text_token.tokenize(target), manual=manual
)


def init_from(tokens: List[str], *, manual: bool = False) -> Graph: # noqa: D103
return align(
Graph(
source=token.identify(tokens, "s"),
target=token.identify(tokens, "t"),
source=text_token.identify(tokens, "s"),
target=text_token.identify(tokens, "t"),
edges=edge_record(
(edge([f"s{i}", f"t{i}"], [], manual=manual) for i, _ in enumerate(tokens))
),
Expand All @@ -108,8 +108,8 @@ def init_from(tokens: List[str], *, manual: bool = False) -> Graph: # noqa: D10
def init_from_source_and_target( # noqa: D103
source: List[str], target: List[str], *, manual: bool = False
) -> Graph:
source_tokens = token.identify(source, "s")
target_tokens = token.identify(target, "t")
source_tokens = text_token.identify(source, "s")
target_tokens = text_token.identify(target, "t")
return align(
Graph(
source=source_tokens,
Expand Down Expand Up @@ -204,7 +204,7 @@ def align(g: Graph) -> Graph: # noqa: D103
c.a is not None and c.b is not None and c.a.id is not None and c.b.id is not None
):
uf.union(c.a.id, c.b.id)
proto_edges = {k: e for k, e in g.edges.items() if e.manual}
proto_edges: Dict[str, Edge] = {k: e for k, e in g.edges.items() if e.manual}
first: UniqueCheck[str] = UniqueCheck()

def update_edges(tokens, _side) -> None: # noqa: ANN001
Expand All @@ -214,7 +214,7 @@ def update_edges(tokens, _side) -> None: # noqa: ANN001
labels = e_repr.labels if first(e_repr.id) else []
e_token = edge([tok.id], labels, manual=False, comment=e_repr.comment)
dicts.modify(
proto_edges,
proto_edges, # type: ignore[misc]
uf.find(tok.id),
zero_edge,
lambda e: merge_edges(e, e_token), # noqa: B023
Expand All @@ -229,8 +229,8 @@ def rearrange(g: Graph, begin: int, end: int, dest: int) -> Graph: # noqa: D103
return align(unaligned_rearrange(g, begin, end, dest))


def target_text(g: SourceTarget[List[token.Text]]) -> str: # noqa: D103
return token.text(g.target)
def target_text(g: SourceTarget[List[text_token.Text]]) -> str: # noqa: D103
return text_token.text(g.target)


@dataclass
Expand Down Expand Up @@ -314,23 +314,23 @@ def unaligned_modify(
Indexes are character offsets (use CodeMirror's doc.posFromIndex and doc.indexFromPos to convert)
""" # noqa: E501
tokens = get_side_texts(g, side)
token_at = token.token_at(tokens, from_)
token_at = text_token.token_at(tokens, from_)
from_token, from_ix = token_at["token"], token_at["offset"]
pre = (tokens[from_token] if from_token < len(tokens) else "")[:from_ix]
if to == len(get_side_text(g, side)):
return unaligned_modify_tokens(g, from_token, len(g.get_side(side)), pre + text, side)
to_token_at = token.token_at(tokens, to)
to_token_at = text_token.token_at(tokens, to)
to_token, to_ix = to_token_at["token"], to_token_at["offset"]
post = (tokens[to_token] or "")[to_ix:]
return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side)


def get_side_text(g: Graph, side: Side) -> str: # noqa: D103
return token.text(g.get_side(side))
return text_token.text(g.get_side(side))


def get_side_texts(g: Graph, side: Side) -> List[str]: # noqa: D103
return token.texts(g.get_side(side))
return text_token.texts(g.get_side(side))


def unaligned_modify_tokens(
Expand Down Expand Up @@ -402,7 +402,7 @@ def unaligned_modify_tokens(
id_offset = next_id(g)

tokens = [
Token(t, f"{side[0]}{(id_offset + i)}") for i, t in enumerate(token.tokenize(text))
Token(t, f"{side[0]}{(id_offset + i)}") for i, t in enumerate(text_token.tokenize(text))
]

new_tokens, removed = lists.splice(g.get_side(side), from_, to - from_, *tokens)
Expand Down
2 changes: 1 addition & 1 deletion src/parallel_corpus/shared/dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
if TYPE_CHECKING:
from _typeshed import SupportsRichComparison

K = TypeVar("K", bound=SupportsRichComparison)
K = TypeVar("K", bound=SupportsRichComparison) # type: ignore [syntax]
else:
K = TypeVar("K")

Expand Down
File renamed without changes.
50 changes: 25 additions & 25 deletions tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

import pytest

from parallel_corpus import graph, token
from parallel_corpus import graph, text_token
from parallel_corpus.source_target import Side, SourceTarget


def test_graph_init() -> None:
g = graph.init("w1 w2")
source = [token.Token(text="w1 ", id="s0"), token.Token(text="w2 ", id="s1")]
target = [token.Token(text="w1 ", id="t0"), token.Token(text="w2 ", id="t1")]
source = [text_token.Token(text="w1 ", id="s0"), text_token.Token(text="w2 ", id="s1")]
target = [text_token.Token(text="w1 ", id="t0"), text_token.Token(text="w2 ", id="t1")]
edges = graph.edge_record([graph.edge(["s0", "t0"], []), graph.edge(["s1", "t1"], [])])

assert g.source == source
Expand All @@ -24,8 +24,8 @@ def test_init_from_source_and_target_1() -> None:

def test_init_from_source_and_target_2() -> None:
g = graph.init_with_source_and_target(source="apa bepa", target="apa")
expected_source = token.identify(token.tokenize("apa bepa"), "s")
expected_target = token.identify(token.tokenize("apa"), "t")
expected_source = text_token.identify(text_token.tokenize("apa bepa"), "s")
expected_target = text_token.identify(text_token.tokenize("apa"), "t")
g_expected = graph.Graph(
source=expected_source,
target=expected_target,
Expand All @@ -36,8 +36,8 @@ def test_init_from_source_and_target_2() -> None:

def test_init_from_source_and_target_3() -> None:
g = graph.init_with_source_and_target(source="apa", target="bepa apa")
expected_source = token.identify(token.tokenize("apa"), "s")
expected_target = token.identify(token.tokenize("bepa apa"), "t")
expected_source = text_token.identify(text_token.tokenize("apa"), "s")
expected_target = text_token.identify(text_token.tokenize("bepa apa"), "t")
g_expected = graph.Graph(
source=expected_source,
target=expected_target,
Expand Down Expand Up @@ -95,19 +95,19 @@ def test_unaligned_set_side() -> None:
print("<<< test_unaligned_set_side")

expected_source = [
token.Token(id="s0", text="a "),
token.Token(id="s1", text="bc "),
token.Token(id="s2", text="d "),
text_token.Token(id="s0", text="a "),
text_token.Token(id="s1", text="bc "),
text_token.Token(id="s2", text="d "),
]
expected_g0_target = [
token.Token(id="t0", text="a "),
token.Token(id="t1", text="bc "),
token.Token(id="t2", text="d "),
text_token.Token(id="t0", text="a "),
text_token.Token(id="t1", text="bc "),
text_token.Token(id="t2", text="d "),
]
expected_g_target = [
token.Token(id="t3", text="ab "),
token.Token(id="t4", text="c "),
token.Token(id="t5", text="d "),
text_token.Token(id="t3", text="ab "),
text_token.Token(id="t4", text="c "),
text_token.Token(id="t5", text="d "),
]
expected_g_edges = {
"e-s0-s1-s2-t3-t4-t5": graph.Edge(
Expand All @@ -131,19 +131,19 @@ def test_graph_align() -> None:
g = graph.unaligned_set_side(g0, Side.target, "ab c d")

expected_source = [
token.Token(id="s0", text="a "),
token.Token(id="s1", text="bc "),
token.Token(id="s2", text="d "),
text_token.Token(id="s0", text="a "),
text_token.Token(id="s1", text="bc "),
text_token.Token(id="s2", text="d "),
]
expected_g0_target = [
token.Token(id="t0", text="a "),
token.Token(id="t1", text="bc "),
token.Token(id="t2", text="d "),
text_token.Token(id="t0", text="a "),
text_token.Token(id="t1", text="bc "),
text_token.Token(id="t2", text="d "),
]
expected_g_target = [
token.Token(id="t3", text="ab "),
token.Token(id="t4", text="c "),
token.Token(id="t5", text="d "),
text_token.Token(id="t3", text="ab "),
text_token.Token(id="t4", text="c "),
text_token.Token(id="t5", text="d "),
]
expected_g_edges = {
"e-s0-s1-s2-t3-t4-t5": graph.Edge(
Expand Down
5 changes: 3 additions & 2 deletions tests/test_shared/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@


def test_splice_1() -> None:
(*s_chars,) = "abcdef"
s_chars = ["a", "b", "c", "d", "e", "f"]
ex, rm = lists.splice(s_chars, 3, 1, " ", "_")
assert "".join(ex) == "abc _ef"
assert "".join(rm) == "d"


def test_splice_2() -> None:
(*s_chars,) = "abcdef"
s_chars = ["a", "b", "c", "d", "e", "f"]

(ex, rm) = lists.splice(s_chars, 3, 2, " ", "_")
assert "".join(ex) == "abc _f"
assert "".join(rm) == "de"
2 changes: 1 addition & 1 deletion tests/test_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from parallel_corpus.token import Token, identify, tokenize
from parallel_corpus.text_token import Token, identify, tokenize


def test_can_create_token() -> None:
Expand Down

0 comments on commit 1e2eb6e

Please sign in to comment.