Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate annotations (old) #317

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
4 changes: 4 additions & 0 deletions ebl/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from ebl.ebl_ai_client import EblAiClient
from ebl.files.infrastructure.grid_fs_file_repository import GridFsFileRepository
from ebl.files.web.bootstrap import create_files_route
from ebl.fragmentarium.infrastructure.cropped_sign_images_repository import (
MongoCroppedSignImagesRepository,
)
from ebl.fragmentarium.infrastructure.mongo_annotations_repository import (
MongoAnnotationsRepository,
)
Expand Down Expand Up @@ -67,6 +70,7 @@ def create_context():
return Context(
ebl_ai_client=ebl_ai_client,
auth_backend=auth_backend,
cropped_sign_images_repository=MongoCroppedSignImagesRepository(database),
word_repository=MongoWordRepository(database),
sign_repository=MongoSignRepository(database),
public_file_repository=GridFsFileRepository(database, "fs"),
Expand Down
4 changes: 4 additions & 0 deletions ebl/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from ebl.fragmentarium.application.transliteration_update_factory import (
TransliterationUpdateFactory,
)
from ebl.fragmentarium.infrastructure.cropped_sign_images_repository import (
MongoCroppedSignImagesRepository,
)
from ebl.lemmatization.application.suggestion_finder import LemmaRepository
from ebl.transliteration.application.parallel_line_injector import ParallelLineInjector
from ebl.transliteration.application.sign_repository import SignRepository
Expand All @@ -27,6 +30,7 @@
class Context:
ebl_ai_client: EblAiClient
auth_backend: AuthBackend
cropped_sign_images_repository: MongoCroppedSignImagesRepository
word_repository: WordRepository
sign_repository: SignRepository
public_file_repository: FileRepository
Expand Down
137 changes: 0 additions & 137 deletions ebl/fragmentarium/application/annotations_image_extractor.py
Original file line number Diff line number Diff line change
@@ -1,137 +0,0 @@
import base64
from functools import singledispatchmethod
import io
from typing import Sequence, NewType

import attr
from PIL import Image

from ebl.files.application.file_repository import FileRepository
from ebl.fragmentarium.application.annotations_repository import AnnotationsRepository
from ebl.fragmentarium.application.fragment_repository import FragmentRepository
from ebl.fragmentarium.domain.annotation import BoundingBox, Annotation
from ebl.transliteration.domain.museum_number import MuseumNumber
from ebl.transliteration.domain.line_label import LineLabel
from ebl.transliteration.domain.line_number import (
AbstractLineNumber,
LineNumber,
LineNumberRange,
)

Base64 = NewType("Base64", str)


@attr.attrs(auto_attribs=True, frozen=True)
class CroppedAnnotation:
image: Base64
fragment_number: MuseumNumber
script: str
label: str


class AnnotationImageExtractor:
def __init__(
self,
fragment_repository: FragmentRepository,
annotations_repository: AnnotationsRepository,
photos_repository: FileRepository,
):
self._fragments_repository = fragment_repository
self._annotations_repository = annotations_repository
self._photos_repository = photos_repository

def _format_label(self, label: LineLabel) -> str:
line_number = label.line_number
column = label.column
surface = label.surface
object = label.object
line_atf = line_number.atf if line_number else ""
column_abbr = column.abbreviation if column else ""
surface_abbr = surface.abbreviation if surface else ""
object_abbr = object.abbreviation if object else ""
return " ".join(
filter(
bool,
[column_abbr, surface_abbr, object_abbr, line_atf.replace(".", "")],
)
)

def _cropped_image_from_annotation(
self, annotation: Annotation, fragment_number: MuseumNumber
) -> Base64:
fragment_image = self._photos_repository.query_by_file_name(
f"{fragment_number}.jpg"
)
image_bytes = fragment_image.read()
image = Image.open(io.BytesIO(image_bytes), mode="r")
bounding_box = BoundingBox.from_annotations(
image.size[0], image.size[1], [annotation]
)[0]
area = (
bounding_box.top_left_x,
bounding_box.top_left_y,
bounding_box.top_left_x + bounding_box.width,
bounding_box.top_left_y + bounding_box.height,
)
cropped_image = image.crop(area)
buf = io.BytesIO()
cropped_image.save(buf, format="PNG")
return Base64(base64.b64encode(buf.getvalue()).decode("utf-8"))

@singledispatchmethod
def _is_matching_number(self, line_number: AbstractLineNumber, number: int) -> bool:
raise ValueError("No default for overloading")

@_is_matching_number.register(LineNumber)
def _(self, line_number: LineNumber, number: int):
return number == line_number.number

@_is_matching_number.register(LineNumberRange)
def _(self, line_number: LineNumberRange, number: int):
return line_number.start.number <= number <= line_number.end.number

def _cropped_image_from_annotations(
self, fragment_number: MuseumNumber, annotations: Sequence[Annotation]
) -> Sequence[CroppedAnnotation]:
cropped_annotations = []
for annotation in annotations:
fragment = self._fragments_repository.query_by_museum_number(
fragment_number
)
script = fragment.script
labels = fragment.text.labels
label = next(
(
label
for label in labels
if self._is_matching_number(
label.line_number, annotation.data.path[0]
)
),
None,
)

cropped_image = self._cropped_image_from_annotation(
annotation, fragment_number
)
cropped_annotations.append(
CroppedAnnotation(
cropped_image,
fragment_number,
script,
self._format_label(label) if label else "",
)
)
return cropped_annotations

def cropped_images_from_sign(self, sign: str) -> Sequence[CroppedAnnotation]:
annotations = self._annotations_repository.find_by_sign(sign)
cropped_annotations = []
for single_annotation in annotations:
fragment_number = single_annotation.fragment_number
cropped_annotations.extend(
self._cropped_image_from_annotations(
fragment_number, single_annotation.annotations
)
)
return cropped_annotations
17 changes: 11 additions & 6 deletions ebl/fragmentarium/application/annotations_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from marshmallow import Schema, fields, post_load, EXCLUDE

from marshmallow import Schema, fields, post_load, post_dump, EXCLUDE
import pydash
from ebl.fragmentarium.application.cropped_sign_image import CroppedSignSchema
from ebl.fragmentarium.domain.annotation import (
Geometry,
AnnotationData,
Expand Down Expand Up @@ -40,18 +41,22 @@ class Meta:

geometry = fields.Nested(GeometrySchema(), required=True)
data = fields.Nested(AnnotationDataSchema(), required=True)
cropped_sign = fields.Nested(
CroppedSignSchema(), load_default=None, data_key="croppedSign"
)

@post_load
def make_annotation(self, data, **kwargs):
return Annotation(**data)

@post_dump
def filter_none(self, data, **kwargs):
return pydash.omit_by(data, pydash.is_none)

class AnnotationsSchema(Schema):
class Meta:
unknown = EXCLUDE

class AnnotationsSchema(Schema):
fragment_number = fields.String(required=True, data_key="fragmentNumber")
annotations = fields.Nested(AnnotationSchema, many=True, required=True)
annotations = fields.List(fields.Nested(AnnotationSchema(), required=True))

@post_load
def make_annotation(self, data, **kwargs):
Expand Down
109 changes: 104 additions & 5 deletions ebl/fragmentarium/application/annotations_service.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
from io import BytesIO
from typing import Tuple, Sequence

import attr
from PIL import Image

from ebl.changelog import Changelog
from ebl.ebl_ai_client import EblAiClient
import attr
from ebl.files.application.file_repository import FileRepository
from ebl.fragmentarium.application.annotations_repository import AnnotationsRepository
from ebl.fragmentarium.application.annotations_schema import AnnotationsSchema
from ebl.fragmentarium.domain.annotation import Annotations
from ebl.fragmentarium.application.cropped_sign_image import CroppedSign
from ebl.fragmentarium.application.cropped_sign_images_repository import (
CroppedSignImage,
CroppedSignImagesRepository,
)
from ebl.fragmentarium.application.fragment_repository import FragmentRepository
from ebl.fragmentarium.domain.annotation import (
Annotations,
AnnotationValueType,
Annotation,
BoundingBox,
AnnotationValueType,
)

from ebl.transliteration.domain.line_label import LineLabel
from ebl.transliteration.domain.museum_number import MuseumNumber
from ebl.users.domain.user import User

Expand All @@ -15,6 +34,9 @@ class AnnotationsService:
_annotations_repository: AnnotationsRepository
_photo_repository: FileRepository
_changelog: Changelog
_fragments_repository: FragmentRepository
_photos_repository: FileRepository
_cropped_sign_images_repository: CroppedSignImagesRepository

def generate_annotations(
self, number: MuseumNumber, threshold: float = 0.3
Expand All @@ -27,17 +49,94 @@ def generate_annotations(
def find(self, number: MuseumNumber) -> Annotations:
return self._annotations_repository.query_by_museum_number(number)

def _label_by_line_number(
self, line_number_to_match: int, labels: Sequence[LineLabel]
) -> str:
matching_label = None
for label in labels:
label_line_number = label.line_number
if label_line_number and label_line_number.is_matching_number(
line_number_to_match
):
matching_label = label
return matching_label.formatted_label if matching_label else ""

def _cropped_image_from_annotations_helper(
self,
annotations: Annotations,
image: Image.Image,
script: str,
labels: Sequence[LineLabel],
) -> Tuple[Annotations, Sequence[CroppedSignImage]]:
cropped_sign_images = []
updated_cropped_annotations = []

for annotation in annotations.annotations:
label = (
self._label_by_line_number(annotation.data.path[0], labels)
if annotation.data.type != AnnotationValueType.BLANK
else ""
)
cropped_image = annotation.crop_image(image)
cropped_sign_image = CroppedSignImage.create(cropped_image)
cropped_sign_images.append(cropped_sign_image)

updated_cropped_annotation = attr.evolve(
annotation,
cropped_sign=CroppedSign(
cropped_sign_image.image_id,
script,
label,
),
)
updated_cropped_annotations.append(updated_cropped_annotation)
return (
attr.evolve(annotations, annotations=updated_cropped_annotations),
cropped_sign_images,
)

def _cropped_image_from_annotations(
self, annotations: Annotations
) -> Tuple[Annotations, Sequence[CroppedSignImage]]:
fragment = self._fragments_repository.query_by_museum_number(
annotations.fragment_number
)
fragment_image = self._photos_repository.query_by_file_name(
f"{annotations.fragment_number}.jpg"
)
image_bytes = fragment_image.read()
image = Image.open(BytesIO(image_bytes), mode="r")
return self._cropped_image_from_annotations_helper(
annotations, image, fragment.script, fragment.text.labels
)

def update(self, annotations: Annotations, user: User) -> Annotations:
old_annotations = self._annotations_repository.query_by_museum_number(
annotations.fragment_number
)
_id = str(annotations.fragment_number)
schema = AnnotationsSchema()
(
annotations_with_image_ids,
cropped_sign_images,
) = self._cropped_image_from_annotations(annotations)

self._annotations_repository.create_or_update(annotations_with_image_ids)
self._cropped_sign_images_repository.create_many(cropped_sign_images)

self._changelog.create(
"annotations",
user.profile,
{"_id": _id, **schema.dump(old_annotations)},
{"_id": _id, **schema.dump(annotations)},
{"_id": _id, **schema.dump(annotations_with_image_ids)},
)
self._annotations_repository.create_or_update(annotations)
return annotations
return annotations_with_image_ids

def migrate(self, annotations: Annotations) -> Annotations:
(
annotations_with_image_ids,
cropped_sign_images,
) = self._cropped_image_from_annotations(annotations)
self._annotations_repository.create_or_update(annotations_with_image_ids)
self._cropped_sign_images_repository.create_many(cropped_sign_images)
return annotations_with_image_ids
38 changes: 38 additions & 0 deletions ebl/fragmentarium/application/cropped_annotations_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Sequence

from ebl.fragmentarium.application.annotations_repository import AnnotationsRepository
from ebl.fragmentarium.application.cropped_sign_image import CroppedAnnotation
from ebl.fragmentarium.application.cropped_sign_images_repository import (
CroppedSignImagesRepository,
)


class CroppedAnnotationService:
def __init__(
self,
annotations_repository: AnnotationsRepository,
cropped_sign_images_repository: CroppedSignImagesRepository,
):
self._annotations_repository = annotations_repository
self._cropped_sign_image_repository = cropped_sign_images_repository

def find_annotations_by_sign(self, sign: str) -> Sequence[CroppedAnnotation]:
annotations = self._annotations_repository.find_by_sign(sign)
cropped_image_annotations = []
for annotation in annotations:
for annotation_elem in annotation.annotations:
cropped_sign = annotation_elem.cropped_sign
if cropped_sign:
cropped_sign_image = (
self._cropped_sign_image_repository.query_by_id(
cropped_sign.image_id
)
)
cropped_image_annotations.append(
CroppedAnnotation.from_cropped_sign(
annotation.fragment_number,
cropped_sign_image.image,
cropped_sign,
)
)
return cropped_image_annotations
Loading