Skip to content

Commit

Permalink
chore: use Python 3.9 syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
kod-kristoff committed Nov 18, 2024
1 parent 8219057 commit 558887f
Show file tree
Hide file tree
Showing 15 changed files with 76 additions and 74 deletions.
2 changes: 1 addition & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ namespace_packages = True
explicit_package_bases = True
show_error_codes = True
ignore_missing_imports = True
python_version = 3.8
python_version = 3.9
; plugins = adt.mypy_plugin
2 changes: 2 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
line-length = 97

target-version = "py39"

[lint]
select = [
"A", # flake8-builtins
Expand Down
43 changes: 22 additions & 21 deletions src/parallel_corpus/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import itertools
import logging
import re
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Dict, Iterable, List, Optional, TypedDict, TypeVar
from typing import Optional, TypedDict, TypeVar

import parallel_corpus.shared.ranges
import parallel_corpus.shared.str_map
Expand Down Expand Up @@ -33,24 +34,24 @@ class Edge:
# a copy of the identifier used in the edges object of the graph
id: str
# these are ids to source and target tokens
ids: List[str]
ids: list[str]
# labels on this edge
labels: List[str]
labels: list[str]
# is this manually or automatically aligned
manual: bool
comment: Optional[str] = None


Edges = Dict[str, Edge]
Edges = dict[str, Edge]


@dataclass
class Graph(SourceTarget[List[Token]]): # noqa: D101
class Graph(SourceTarget[list[Token]]): # noqa: D101
edges: Edges
comment: Optional[str] = None

def copy_with_updated_side_and_edges( # noqa: D102
self, side: Side, new_tokens: List[Token], edges: Edges
self, side: Side, new_tokens: list[Token], edges: Edges
) -> "Graph":
source = self.source if side == Side.target else new_tokens
target = new_tokens if side == Side.target else self.target
Expand All @@ -65,8 +66,8 @@ def next_id(g: Graph) -> int:


def edge(
ids: List[str],
labels: List[str],
ids: list[str],
labels: list[str],
*,
comment: Optional[str] = None,
manual: bool = False,
Expand All @@ -82,7 +83,7 @@ def edge(
)


def edge_record(es: Iterable[Edge]) -> Dict[str, Edge]:
def edge_record(es: Iterable[Edge]) -> dict[str, Edge]:
return {e.id: e for e in es}


Expand All @@ -96,7 +97,7 @@ def init_with_source_and_target(source: str, target: str, *, manual: bool = Fals
)


def init_from(tokens: List[str], *, manual: bool = False) -> Graph:
def init_from(tokens: list[str], *, manual: bool = False) -> Graph:
return align(
Graph(
source=text_token.identify(tokens, "s"),
Expand All @@ -109,7 +110,7 @@ def init_from(tokens: List[str], *, manual: bool = False) -> Graph:


def init_from_source_and_target(
source: List[str], target: List[str], *, manual: bool = False
source: list[str], target: list[str], *, manual: bool = False
) -> Graph:
source_tokens = text_token.identify(source, "s")
target_tokens = text_token.identify(target, "t")
Expand All @@ -129,20 +130,20 @@ def init_from_source_and_target(

class TextLabels(TypedDict):
text: str
labels: List[str]
labels: list[str]


def from_unaligned(st: SourceTarget[List[TextLabels]]) -> Graph:
def from_unaligned(st: SourceTarget[list[TextLabels]]) -> Graph:
"""Initialize a graph from unaligned tokens."""
edges: Dict[str, Edge] = {}
edges: dict[str, Edge] = {}

def proto_token_to_token(tok: TextLabels, i: int, prefix: str) -> Token:
id_ = f"{prefix}{i}"
e = edge([id_], tok["labels"], manual=False)
edges[id_] = e
return Token(tok["text"], id_)

def proto_tokens_to_tokens(toks: List[TextLabels], side: Side) -> List[Token]:
def proto_tokens_to_tokens(toks: list[TextLabels], side: Side) -> list[Token]:
return [
proto_token_to_token(tok, i, "s" if side == Side.source else "t")
for i, tok in enumerate(toks)
Expand Down Expand Up @@ -207,7 +208,7 @@ def align(g: Graph) -> Graph:
c.a is not None and c.b is not None and c.a.id is not None and c.b.id is not None
):
uf.union(c.a.id, c.b.id)
proto_edges: Dict[str, Edge] = {k: e for k, e in g.edges.items() if e.manual}
proto_edges: dict[str, Edge] = {k: e for k, e in g.edges.items() if e.manual}
first: UniqueCheck[str] = UniqueCheck()

def update_edges(tokens, _side) -> None: # noqa: ANN001
Expand All @@ -232,7 +233,7 @@ def rearrange(g: Graph, begin: int, end: int, dest: int) -> Graph:
return align(unaligned_rearrange(g, begin, end, dest))


def target_text(g: SourceTarget[List[text_token.Text]]) -> str:
def target_text(g: SourceTarget[list[text_token.Text]]) -> str:
return text_token.text(g.target)


Expand Down Expand Up @@ -263,21 +264,21 @@ class CharIdPair:
id: Optional[str] = None


def to_char_ids(token: Token) -> List[CharIdPair]:
def to_char_ids(token: Token) -> list[CharIdPair]:
return parallel_corpus.shared.str_map.str_map(
token.text,
lambda char, _i: CharIdPair(char=char, id=None if char == " " else token.id),
)


def edge_map(g: Graph) -> Dict[str, Edge]:
def edge_map(g: Graph) -> dict[str, Edge]:
"""Map from token ids to edges.
Args:
g (Graph): the Graph to build the edge map from.
Returns:
Dict[str, Edge]: a map from token ids to edges
dict[str, Edge]: a map from token ids to edges
"""
edges = {}
for e in g.edges.values():
Expand Down Expand Up @@ -353,7 +354,7 @@ def get_side_text(g: Graph, side: Side) -> str:
return text_token.text(g.get_side(side))


def get_side_texts(g: Graph, side: Side) -> List[str]:
def get_side_texts(g: Graph, side: Side) -> list[str]:
return text_token.texts(g.get_side(side))


Expand Down
6 changes: 3 additions & 3 deletions src/parallel_corpus/shared/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Utilities."""

import re
from typing import List, TypeVar
from typing import TypeVar

from . import diffs

Expand All @@ -17,8 +17,8 @@ def end_with_space(s: str) -> str:
return f"{s} " if (ENDING_WHITESPACE.fullmatch(s[-1]) is None) else s


def uniq(xs: List[str]) -> List[str]:
used = set()
def uniq(xs: list[str]) -> list[str]:
used: set[str] = set()
return [x for x in xs if x not in used and (used.add(x) or True)] # type: ignore [func-returns-value]


Expand Down
10 changes: 5 additions & 5 deletions src/parallel_corpus/shared/dicts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Dicts."""
"""dicts."""

from typing import TYPE_CHECKING, Callable, Dict, List, TypeVar
from typing import TYPE_CHECKING, Callable, TypeVar

if TYPE_CHECKING:
from _typeshed import SupportsRichComparison
Expand All @@ -14,15 +14,15 @@
V = TypeVar("V")


def modify(x: Dict[K, V], k: K, default: V, f: Callable[[V], V]) -> V: # noqa: D103
def modify(x: dict[K, V], k: K, default: V, f: Callable[[V], V]) -> V: # noqa: D103
x[k] = f(x.get(k) or default)
return x[k]


def traverse(x: Dict[K, A], k: Callable[[A, K], B], *, sort_keys: bool = False) -> List[B]: # noqa: D103
def traverse(x: dict[K, A], k: Callable[[A, K], B], *, sort_keys: bool = False) -> list[B]: # noqa: D103
ks = sorted(x.keys()) if sort_keys else x.keys()
return [k(x[i], i) for i in ks]


def filter_dict(x: Dict[K, A], k: Callable[[A, K], bool]) -> Dict[K, A]: # noqa: D103
def filter_dict(x: dict[K, A], k: Callable[[A, K], bool]) -> dict[K, A]: # noqa: D103
return {id_: a for id_, a in x.items() if k(a, id_)}
23 changes: 12 additions & 11 deletions src/parallel_corpus/shared/diffs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Diffs."""

import enum
from collections.abc import Generator
from itertools import starmap
from typing import Any, Callable, Dict, Generator, Generic, List, Optional, Tuple, TypeVar, Union
from typing import Any, Callable, Generic, Optional, TypeVar, Union

import diff_match_patch as dmp_module
from typing_extensions import Self
Expand Down Expand Up @@ -46,8 +47,8 @@ def deleted(cls, a: A) -> Self: # noqa: D102
def inserted(cls, b: B) -> Self: # noqa: D102
return cls(ChangeType.INSERTED, b=b)

def model_dump(self) -> Dict[str, Union[int, A, B]]: # noqa: D102
out: Dict[str, Union[int, A, B]] = {
def model_dump(self) -> dict[str, Union[int, A, B]]: # noqa: D102
out: dict[str, Union[int, A, B]] = {
"change": int(self.change),
}
if self.a is not None:
Expand Down Expand Up @@ -94,17 +95,17 @@ def char_stream() -> Generator[str, None, None]:


def hdiff( # noqa: D103
xs: List[A],
ys: List[B],
xs: list[A],
ys: list[B],
a_cmp: Callable[[A], str] = str,
b_cmp: Callable[[B], str] = str,
) -> List[Change[A, B]]:
to: Dict[str, str] = {}
a_from: Dict[str, List[A]] = {}
b_from: Dict[str, List[B]] = {}
) -> list[Change[A, B]]:
to: dict[str, str] = {}
a_from: dict[str, list[A]] = {}
b_from: dict[str, list[B]] = {}
chars = char_stream()

def assign(c: C, c_cmp: Callable[[C], str], c_from: Dict[str, List[C]]) -> str:
def assign(c: C, c_cmp: Callable[[C], str], c_from: dict[str, list[C]]) -> str:
s = c_cmp(c)
u = to.get(s)
if u is None:
Expand Down Expand Up @@ -147,7 +148,7 @@ def map_change(change: int, cs): # noqa: ANN001, ANN202
return out


def token_diff(s1: str, s2: str) -> List[Tuple[int, str]]: # noqa: D103
def token_diff(s1: str, s2: str) -> list[tuple[int, str]]: # noqa: D103
d = dmp.diff_main(s1, s2)
dmp.diff_cleanupSemantic(d)
return d
3 changes: 2 additions & 1 deletion src/parallel_corpus/shared/functional.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Functional utilities."""

from typing import Callable, Sequence, TypeVar
from collections.abc import Sequence
from typing import Callable, TypeVar

A = TypeVar("A")

Expand Down
2 changes: 1 addition & 1 deletion src/parallel_corpus/shared/ids.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Ids."""

import re
from typing import Iterable
from collections.abc import Iterable

DIGITS = re.compile(r"\d+")

Expand Down
12 changes: 6 additions & 6 deletions src/parallel_corpus/shared/lists.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""List."""
"""list."""

import copy
from typing import List, Tuple, TypeVar
from typing import TypeVar

A = TypeVar("A")


def rearrange(xs: List[A], begin: int, end: int, dest: int) -> List[A]:
def rearrange(xs: list[A], begin: int, end: int, dest: int) -> list[A]:
"""Move a slice of the items and puts back them at some destination.
rearrange([0, 1, 2, 3], 1, 2, 0) // => [1, 2, 0, 3]
Expand All @@ -23,14 +23,14 @@ def rearrange(xs: List[A], begin: int, end: int, dest: int) -> List[A]:
return pre + mid + post


def splice(xs: List[A], start: int, count: int, *insert) -> Tuple[List[A], List[A]]: # noqa: ANN002, D103
def splice(xs: list[A], start: int, count: int, *insert) -> tuple[list[A], list[A]]: # noqa: ANN002, D103
ys = copy.deepcopy(xs)
zs = ys[start : (start + count)]
ys[start : (start + count)] = insert
return ys, zs


def split_at_3(xs: List[A], start: int, end: int) -> Tuple[List[A], List[A], List[A]]:
def split_at_3(xs: list[A], start: int, end: int) -> tuple[list[A], list[A], list[A]]:
"""Split an array into three pieces.
splitAt3('0123456'.split(''), 2, 4).map(xs => xs.join('')) // => ['01', '23', '456']
Expand All @@ -43,5 +43,5 @@ def split_at_3(xs: List[A], start: int, end: int) -> Tuple[List[A], List[A], Lis
return a, b, c


def split_at(xs: List[A], index: int) -> Tuple[List[A], List[A]]: # noqa: D103
def split_at(xs: list[A], index: int) -> tuple[list[A], list[A]]: # noqa: D103
return xs[:index], xs[index:]
4 changes: 2 additions & 2 deletions src/parallel_corpus/shared/str_map.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""str_map."""

from typing import Callable, List, TypeVar
from typing import Callable, TypeVar

A = TypeVar("A")


def str_map(s: str, f: Callable[[str, int], A]) -> List[A]: # noqa: D103
def str_map(s: str, f: Callable[[str, int], A]) -> list[A]: # noqa: D103
return [f(s[i], i) for i in range(len(s))]
Loading

0 comments on commit 558887f

Please sign in to comment.