Skip to content

Commit

Permalink
tests: add actual assertions
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed Aug 30, 2024
1 parent ae6445b commit fd15e2a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 6 deletions.
22 changes: 20 additions & 2 deletions tests/test_binarize.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,31 @@
# pylint: disable=import-error

import json
import os

from ocrd import run_processor
from ocrd_utils import MIMETYPE_PAGE
from ocrd_models.constants import NAMESPACES
from ocrd_modelfactory import page_from_file

from ocrd_kraken.binarize import KrakenBinarize

from .assets import assets


PARAM_JSON = assets.url_of('param-binarize.json')

def analyse_result(ws, level):
assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-BIN-KRAKEN'))
out_files = list(ws.find_files(fileGrp="OCR-D-BIN-KRAKEN", mimetype=MIMETYPE_PAGE))
assert len(out_files), "found no output PAGE file"
out_images = list(ws.find_files(fileGrp="OCR-D-BIN-KRAKEN", mimetype="//^image/.*"))
assert len(out_images), "found no output image file"
out_pcgts = page_from_file(out_files[0])
assert out_pcgts is not None
out_images = out_pcgts.etree.xpath('//page:%s/page:AlternativeImage[contains(@comments,"binarized")]' % level, namespaces=NAMESPACES)
assert len(out_images) > 0, "found no binarized AlternativeImages in output PAGE file"

def test_param_json(workspace_sbb):
run_processor(KrakenBinarize,
input_file_grp="OCR-D-IMG",
Expand All @@ -19,6 +35,7 @@ def test_param_json(workspace_sbb):
)
ws = workspace_sbb['workspace']
ws.save_mets()
analyse_result(ws, 'Page')

def test_binarize_regions(workspace_aufklaerung):
run_processor(KrakenBinarize,
Expand All @@ -29,7 +46,7 @@ def test_binarize_regions(workspace_aufklaerung):
)
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
analyse_result(ws, 'TextRegion')

def test_binarize_lines(workspace_aufklaerung):
run_processor(KrakenBinarize,
Expand All @@ -40,4 +57,5 @@ def test_binarize_lines(workspace_aufklaerung):
)
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
analyse_result(ws, 'TextLine')

14 changes: 13 additions & 1 deletion tests/test_recognize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# pylint: disable=import-error

import os

from ocrd import run_processor
from ocrd_utils import MIMETYPE_PAGE
from ocrd_models.constants import NAMESPACES
from ocrd_modelfactory import page_from_file

from ocrd_kraken.recognize import KrakenRecognize
from ocrd_kraken.binarize import KrakenBinarize

Expand All @@ -21,4 +27,10 @@ def test_recognize(workspace_aufklaerung):
)
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-OCR-KRAKEN'))
results = ws.find_files(file_grp='OCR-D-OCR-KRAKEN', mimetype=MIMETYPE_PAGE)
result0 = next(results, False)
assert result0, "found no output PAGE file"
result0 = page_from_file(result0)
text0 = result0.etree.xpath('//page:Glyph/page:TextEquiv/page:Unicode', namespaces=NAMESPACES)
assert len(text0) > 0, "found no glyph text in output PAGE file"
23 changes: 20 additions & 3 deletions tests/test_segment.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
# pylint: disable=import-error

import os

from ocrd import run_processor
from ocrd_utils import MIMETYPE_PAGE
from ocrd_models.constants import NAMESPACES
from ocrd_modelfactory import page_from_file

from ocrd_kraken.segment import KrakenSegment
from ocrd_kraken.binarize import KrakenBinarize


def analyse_result(ws):
assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-SEG-LINE-KRAKEN'))
out_files = list(ws.find_files(fileGrp="OCR-D-SEG-LINE-KRAKEN", mimetype=MIMETYPE_PAGE))
assert len(out_files), "found no output PAGE file"
out_pcgts = page_from_file(out_files[0])
assert out_pcgts is not None
out_regions = out_pcgts.etree.xpath('//page:TextRegion/page:Coords', namespaces=NAMESPACES)
assert len(out_regions) > 0, "found no text regions in output PAGE file"
out_lines = out_pcgts.get_Page().get_AllTextLines()
assert len(out_lines), "found no text lines in output PAGE file"

def test_run_blla(workspace_aufklaerung):
run_processor(KrakenSegment,
input_file_grp="OCR-D-IMG",
Expand All @@ -14,7 +31,7 @@ def test_run_blla(workspace_aufklaerung):
)
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
analyse_result(ws)

def test_run_blla_regionlevel(workspace_aufklaerung_region):
run_processor(KrakenSegment,
Expand All @@ -27,7 +44,7 @@ def test_run_blla_regionlevel(workspace_aufklaerung_region):
)
ws = workspace_aufklaerung_region['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
analyse_result(ws)

def test_run_legacy(workspace_aufklaerung):
# legacy segmentation requires binarized images
Expand All @@ -45,4 +62,4 @@ def test_run_legacy(workspace_aufklaerung):
)
ws = workspace_aufklaerung['workspace']
ws.save_mets()
# FIXME: add result assertions (find_files, parsing PAGE etc)
analyse_result(ws)

0 comments on commit fd15e2a

Please sign in to comment.