Skip to content

Commit

Permalink
Update infrastructure & tests (WiP)
Browse files Browse the repository at this point in the history
  • Loading branch information
khoidt committed Dec 3, 2024
1 parent eedec78 commit 78e71d4
Show file tree
Hide file tree
Showing 15 changed files with 70 additions and 49 deletions.
2 changes: 1 addition & 1 deletion ebl/atf_importer/domain/atf_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def control_line(self, tree: Tree) -> str:
class GetLineNumber(Visitor):
number: str = ""

def ebl_atf_text_line__single_line_number(self, tree: Tree) -> str:
def ebl_atf_text_line__ebl_atf_common__single_line_number(self, tree: Tree) -> str:
result = DepthFirstSearch().visit_topdown(tree, "")
self.number += result
return result
Expand Down
26 changes: 13 additions & 13 deletions ebl/corpus/domain/chapter_transformer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from typing import Iterable

from lark.visitors import v_args
from ebl.common.domain.period import Period

from ebl.corpus.domain.line import Line
from ebl.corpus.domain.manuscript_line import ManuscriptLine
from ebl.corpus.domain.line_variant import LineVariant
from ebl.corpus.domain.manuscript import (
Manuscript,
Siglum,
)
from ebl.common.domain.manuscript_type import ManuscriptType
Expand All @@ -16,23 +13,28 @@


class ChapterTransformer(LineTransformer):
def __init__(self, manuscripts: Iterable[Manuscript]):
self._manuscripts = {
manuscript.siglum: manuscript.id for manuscript in manuscripts
}

def manuscript_label(self, children):
return children

@v_args(inline=True)
def siglum(self, provenance, period, type_, disambiquator):
def get_siglum(self, provenance, period, type_, disambiquator):
return Siglum(
Provenance.from_abbreviation(provenance or ""),
Period.from_abbreviation(period),
ManuscriptType.from_abbreviation(type_ or ""),
disambiquator or "",
)

@v_args(inline=True)
def siglum(self, provenance=None, period=None, type_=None, disambiquator=None):
if period:
return self.get_siglum(provenance, period, type_, disambiquator)
else:
return self.standard_text_siglum(disambiquator)

@v_args(inline=True)
def ebl_atf_manuscript_line__siglum(self, provenance, period, type_, disambiquator):
return self.get_siglum(provenance, period, type_, disambiquator)

@v_args(inline=True)
def standard_text_siglum(self, disambiquator):
return Siglum(
Expand All @@ -44,9 +46,7 @@ def standard_text_siglum(self, disambiquator):

@v_args(inline=True)
def manuscript_line(self, siglum, labels, line, *paratext):
return ManuscriptLine(
self._manuscripts[siglum], labels or (), line, tuple(paratext)
)
return ManuscriptLine(siglum, labels or (), line, tuple(paratext))

@v_args(inline=True)
def reconstruction(self, line, note, *parallels):
Expand Down
18 changes: 16 additions & 2 deletions ebl/corpus/domain/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
from ebl.corpus.domain.manuscript import Manuscript
from ebl.errors import DataError
from ebl.transliteration.domain.dollar_line import DollarLine
from ebl.transliteration.domain.atf_parsers.lark_parser import CHAPTER_PARSER, PARATEXT_PARSER
from ebl.transliteration.domain.atf_parsers.lark_parser import (
CHAPTER_PARSER,
PARATEXT_PARSER,
MANUSCRIPT_PARSER,
)
from ebl.transliteration.domain.atf_parsers.lark_parser_errors import PARSE_ERRORS
from ebl.transliteration.domain.note_line import NoteLine

Expand All @@ -14,7 +18,17 @@ def parse_chapter(
atf: str, manuscripts: Iterable[Manuscript], start: Optional[str] = None
) -> Sequence[Line]:
try:
tree = CHAPTER_PARSER.parse(atf)
tree = CHAPTER_PARSER.parse(atf, start=start)
return ChapterTransformer(manuscripts).transform(tree)
except PARSE_ERRORS as error:
raise DataError(error) from error


def parse_manuscript(
atf: str, manuscripts: Iterable[Manuscript], start: Optional[str] = None
) -> Sequence[Line]:
try:
tree = MANUSCRIPT_PARSER.parse(atf, start=start)
return ChapterTransformer(manuscripts).transform(tree)
except PARSE_ERRORS as error:
raise DataError(error) from error
Expand Down
22 changes: 13 additions & 9 deletions ebl/tests/corpus/test_parse_chapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
)
from ebl.common.domain.manuscript_type import ManuscriptType
from ebl.common.domain.provenance import Provenance
from ebl.corpus.domain.parser import parse_chapter, parse_paratext
from ebl.corpus.domain.parser import (
parse_chapter,
parse_paratext,
parse_manuscript as _parse_manuscript,
)
from ebl.errors import DataError
from ebl.tests.factories.corpus import ManuscriptFactory
from ebl.transliteration.domain.labels import parse_labels
Expand All @@ -34,7 +38,7 @@


def parse_siglum(siglum):
return parse_chapter(siglum, MANUSCRIPTS, "siglum")
return _parse_manuscript(siglum, MANUSCRIPTS, "siglum")


@pytest.mark.parametrize("period", [Period.NEO_ASSYRIAN])
Expand All @@ -61,7 +65,7 @@ def test_parse_siglum_standard_text(disambiquator: str) -> None:


def parse_manuscript(atf):
return parse_chapter(atf, MANUSCRIPTS, "manuscript_line")
return _parse_manuscript(atf, MANUSCRIPTS, "manuscript_line")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -224,21 +228,21 @@ def parse_chapter_line(atf):
@pytest.mark.parametrize(
"lines,expected",
[
(["1. kur"], Line(LineNumber(1), (parse_line_variant("1. kur")[1],))),
(["1. kur"], Line(LineNumber(1), (parse_line_variant("1. kur")[0],))),
(
["1. kur", "1. ra"],
Line(
LineNumber(1),
(parse_line_variant("1. kur")[1], parse_line_variant("1. ra")[1]),
(parse_line_variant("1. kur")[0], parse_line_variant("1. ra")[0]),
),
),
(
[f"1. kur\n{MANUSCRIPTS[0].siglum} 1. kur", "1. ra"],
Line(
LineNumber(1),
(
parse_line_variant(f"1. kur\n{MANUSCRIPTS[0].siglum} 1. kur")[1],
parse_line_variant("1. ra")[1],
parse_line_variant(f"1. kur\n{MANUSCRIPTS[0].siglum} 1. kur")[0],
parse_line_variant("1. ra")[0],
),
),
),
Expand All @@ -256,15 +260,15 @@ def test_parse_chapter_line(lines, expected) -> None:
["#tr.en: translation", "1. kur"],
Line(
LineNumber(1),
(parse_line_variant("1. kur")[1],),
(parse_line_variant("1. kur")[0],),
translation=(parse_translation_line("#tr.en: translation"),),
),
),
(
["#tr.en: translation", "#tr.de: translation", "1. kur"],
Line(
LineNumber(1),
(parse_line_variant("1. kur")[1],),
(parse_line_variant("1. kur")[0],),
translation=(
parse_translation_line("#tr.en: translation"),
parse_translation_line("#tr.de: translation"),
Expand Down
7 changes: 5 additions & 2 deletions ebl/transliteration/domain/atf_parsers/lark_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,11 @@
)
PARATEXT_PARSER = Lark.open(ATF_GRAMMAR_PATH, **kwargs_lark, start="paratext")
CHAPTER_PARSER = Lark.open(
ATF_GRAMMAR_PATH, **kwargs_lark, start="chapter", debug=True
) # ToDo: Remove debug
"lark_parser/ebl_atf_chapter.lark", **kwargs_lark, start="chapter"
)
MANUSCRIPT_PARSER = Lark.open(
"lark_parser/ebl_atf_manuscript_line.lark", **kwargs_lark, start="manuscript_line"
)
LINE_PARSER = Lark.open(ATF_GRAMMAR_PATH, **kwargs_lark)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
%import .ebl_atf_text_line (text_line, any_word)
%import .ebl_atf_parallel_line (parallel_line)
%import .ebl_atf_manuscript_line (manuscript_line, paratext)
%import .ebl_atf_manuscript_line (manuscript_line, paratext, siglum)
%import .ebl_atf_translation_line (translation_line)
%import .ebl_atf_note_line (note_line, markup)
%import .ebl_atf_dollar_line (dollar_line)
%import .ebl_atf_at_line (at_line)
%import .ebl_atf_chapter (chapter)
%import .ebl_atf_empty_line (empty_line)
%import .ebl_atf_common (labels)
%import .ebl_atf_control_line (control_line)
%import .oracc_atf_lem_line (lem_line)

?start: line
Expand All @@ -20,6 +21,4 @@
| parallel_line
| translation_line
| control_line
| lem_line

!control_line.-2: ("=:" | "&" | "#") /.+/?
| lem_line
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%import .ebl_atf_common (INT)
%import common.INT
%import .ebl_atf_common (status)
%import .ebl_atf_common (object, OBJECT, generic_object, fragment)
%import .ebl_atf_common (surface, SURFACE, generic_surface, face, edge)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
%import common.CR
%import common.LF

%import .ebl_atf_manuscript_line (manuscript_line)
%import .ebl_atf_text_line (text_line)
%import .ebl_atf_manuscript_line.manuscript_line -> manuscript_line
%import .ebl_atf_text_line.text_line -> text_line
%import .ebl_atf_note_line (note_line)
%import .ebl_atf_parallel_line (parallel_line)
%import .ebl_atf_translation_line (translation_line)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!control_line.-2: ("=:" | "&" | "#") /.+/?
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
%import .ebl_atf_common (DIGIT, INT, LCASE_LETTER)
%import common.DIGIT -> DIGIT
%import common.INT -> INT
%import common.LCASE_LETTER -> LCASE_LETTER
%import .ebl_atf_common (free_text)
%import .ebl_atf_common (object, OBJECT, generic_object, fragment)
%import .ebl_atf_common (surface, SURFACE, generic_surface, face, edge)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
%import common.LF
%import common.WS_INLINE

%import .ebl_atf_common (labels)
%import .ebl_atf_text_line (text_line)
%import .ebl_atf_note_line (note_line)
%import .ebl_atf_empty_line (empty_line)
%import .ebl_atf_dollar_line (dollar_line)
%import .ebl_atf_common.labels -> labels
%import .ebl_atf_text_line.text_line -> text_line
%import .ebl_atf_note_line.note_line -> note_line
%import .ebl_atf_empty_line.empty_line -> empty_line
%import .ebl_atf_dollar_line.dollar_line -> dollar_line

%import .ebl_atf_abbreviations (PROVENANCE, PERIOD, TYPE)

Expand All @@ -15,7 +15,7 @@ manuscript_line: _WHITE_SPACE? siglum [" " labels] manuscript_text paratext_lin
?manuscript_text: " " text_line | empty_line
?paratext_line: _NEWLINE _WHITE_SPACE? paratext
siglum: [PROVENANCE] PERIOD [TYPE] [DISAMBIQUATOR]
| "Std" [DISAMBIQUATOR] -> standard_text_siglum
| "Std" [DISAMBIQUATOR]
DISAMBIQUATOR: /[\S]+/

_WHITE_SPACE: WS_INLINE
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
%import .ebl_atf_common (AKKADIAN_ALPHABET, GREEK_ALPHABET)
%import .ebl_atf_common (VALUE_CHARACTER, LOGOGRAM_CHARACTER)
%import .ebl_atf_common (UNCERTAIN, CORRECTION, COLLATION, NO_LONGER_VISIBLE)
%import .ebl_atf_common (line_number)
%import .ebl_atf_common.line_number -> line_number
%import .legacy_atf (LEGACY_OPEN_HALF_BRACKET, LEGACY_CLOSE_HALF_BRACKET)
%import .legacy_atf (LEGACY_ORACC_JOINER, LEGACY_ORACC_DISH_DIVIDER)
%import .legacy_atf (legacy_uncertain_sign_prefix)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%import common.LCASE_LETTER

%import .ebl_atf_common (line_number, labels)
%import .ebl_atf_common.line_number -> line_number
%import .ebl_atf_common.labels -> labels
%import .ebl_atf_note_line (_markup)

translation_line: "#tr" [ "." TRANSLATION_LANGUAGE ] [".(" translation_extent ")"] ": " _markup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,4 @@ legacy_single_ruling: " "* "ruling" [" "? ("!?" | "*" | "?" | "!")]


//# ToDo:
//# Implement here the following:
//# oracc_atf_text_line__uncertain_sign
//# etc. (check `atf_conversions.py`).
//# check `atf_conversions.py` and implement all missing features.
4 changes: 2 additions & 2 deletions ebl/transliteration/domain/text_line_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ def text_line(self, line_number, content):
return TextLine.of_iterable(line_number, content)

@v_args(inline=True)
def ebl_atf_text_line__line_number_range(self, start, end):
def ebl_atf_text_line__ebl_atf_common__line_number_range(self, start, end):
return LineNumberRange(start, end)

@v_args(inline=True)
def ebl_atf_text_line__single_line_number(
def ebl_atf_text_line__ebl_atf_common__single_line_number(
self, prefix_modifier, number, prime, suffix_modifier
):
return LineNumber(
Expand Down

0 comments on commit 78e71d4

Please sign in to comment.