From 5b1796c3d06062ea53db900270572f06c6c52ec3 Mon Sep 17 00:00:00 2001 From: fsimonjetz Date: Tue, 29 Aug 2023 14:29:09 +0000 Subject: [PATCH 1/3] add variant index injection --- ebl/corpus/application/display_schemas.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ebl/corpus/application/display_schemas.py b/ebl/corpus/application/display_schemas.py index ee967b44a..de9b7dbf9 100644 --- a/ebl/corpus/application/display_schemas.py +++ b/ebl/corpus/application/display_schemas.py @@ -1,4 +1,4 @@ -from marshmallow import Schema, fields, post_load +from marshmallow import Schema, fields, post_load, post_dump from ebl.corpus.application.id_schemas import ChapterIdSchema from ebl.corpus.application.record_schemas import RecordSchema @@ -44,6 +44,15 @@ def make_line(self, data: dict, **kwargs) -> LineDisplay: tuple(data["translation"] or []), ) + @post_dump + def add_variant_indexes(self, data: dict, **kwargs) -> dict: + data["variants"] = [ + {**variant, "index": index} + for index, variant in enumerate(data["variants"]) + ] + + return data + class ChapterDisplaySchema(Schema): id_ = fields.Nested(ChapterIdSchema, required=True, data_key="id") From 707e8d7918097fb3941ff8023180160dc730cffd Mon Sep 17 00:00:00 2001 From: fsimonjetz Date: Tue, 29 Aug 2023 14:29:26 +0000 Subject: [PATCH 2/3] use _id instead of cache_key --- .../infrastructure/mongo_cache_repository.py | 13 +++++----- .../cache/test_mongo_cache_repository.py | 24 +++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/ebl/cache/infrastructure/mongo_cache_repository.py b/ebl/cache/infrastructure/mongo_cache_repository.py index 67decac6e..b4bdef4c8 100644 --- a/ebl/cache/infrastructure/mongo_cache_repository.py +++ b/ebl/cache/infrastructure/mongo_cache_repository.py @@ -13,21 +13,20 @@ def __init__(self, database: Database) -> None: def has(self, cache_key: str, regex=False) -> bool: return self._collection.exists( - {"cache_key": {"$regex": cache_key} if regex else cache_key} + {"_id": {"$regex": cache_key} if regex else cache_key} ) def get(self, cache_key: str) -> dict: - return self._collection.find_one( - {"cache_key": cache_key}, projection={"cache_key": 0, "_id": 0} - ) + return self._collection.find_one({"_id": cache_key}, projection={"_id": 0}) def set(self, cache_key: str, item: dict) -> None: - self._collection.insert_one({"cache_key": cache_key, **item}) + self.delete(cache_key) + self._collection.insert_one({"_id": cache_key, **item}) def delete(self, cache_key: str) -> None: if self.has(cache_key): - self._collection.delete_one({"cache_key": cache_key}) + self._collection.delete_one({"_id": cache_key}) def delete_all(self, pattern: str) -> None: if self.has(pattern, regex=True): - self._collection.delete_many({"cache_key": {"$regex": pattern}}) + self._collection.delete_many({"_id": {"$regex": pattern}}) diff --git a/ebl/tests/cache/test_mongo_cache_repository.py b/ebl/tests/cache/test_mongo_cache_repository.py index 38e9a39d7..091f3a733 100644 --- a/ebl/tests/cache/test_mongo_cache_repository.py +++ b/ebl/tests/cache/test_mongo_cache_repository.py @@ -7,25 +7,25 @@ def test_set(database, mongo_cache_repository) -> None: mongo_cache_repository.set("test", {"data": "data"}) inserted_text = database[CACHE_COLLECTION].find_one( - {"cache_key": "test"}, projection={"_id": False, "cache_key": False} + {"_id": "test"}, projection={"_id": False, "_id": False} ) assert inserted_text == {"data": "data"} def test_has(database, mongo_cache_repository) -> None: - database[CACHE_COLLECTION].insert_one({"cache_key": "test", "data": "data"}) + database[CACHE_COLLECTION].insert_one({"_id": "test", "data": "data"}) assert mongo_cache_repository.has("test") is True def test_get(database, mongo_cache_repository) -> None: - database[CACHE_COLLECTION].insert_one({"cache_key": "test", "data": "data"}) + database[CACHE_COLLECTION].insert_one({"_id": "test", "data": "data"}) assert mongo_cache_repository.get("test") == {"data": "data"} def test_delete(database, mongo_cache_repository) -> None: - database[CACHE_COLLECTION].insert_one({"cache_key": "test", "data": "data"}) + database[CACHE_COLLECTION].insert_one({"_id": "test", "data": "data"}) mongo_cache_repository.delete("test") - assert database[CACHE_COLLECTION].find_one({"cache_key": "test"}) is None + assert database[CACHE_COLLECTION].find_one({"_id": "test"}) is None @pytest.mark.parametrize( @@ -33,7 +33,7 @@ def test_delete(database, mongo_cache_repository) -> None: ) def test_exists_with_regex(database, mongo_cache_repository, key, expected) -> None: database[CACHE_COLLECTION].insert_one( - {"cache_key": key, "data": "data"}, + {"_id": key, "data": "data"}, ) assert mongo_cache_repository.has(r"^test(\sline-\d+)?$", regex=True) == expected @@ -41,12 +41,12 @@ def test_exists_with_regex(database, mongo_cache_repository, key, expected) -> N def test_delete_all(database, mongo_cache_repository) -> None: database[CACHE_COLLECTION].insert_many( [ - {"cache_key": "test", "data": "data"}, - {"cache_key": "test line-42", "data": "data"}, - {"cache_key": "foobar", "data": "data"}, + {"_id": "test", "data": "data"}, + {"_id": "test line-42", "data": "data"}, + {"_id": "foobar", "data": "data"}, ] ) mongo_cache_repository.delete_all(pattern=r"^test(\sline-\d+)?$") - assert database[CACHE_COLLECTION].find_one({"cache_key": "test"}) is None - assert database[CACHE_COLLECTION].find_one({"cache_key": "test line-42"}) is None - assert database[CACHE_COLLECTION].find_one({"cache_key": "foobar"}) is not None + assert database[CACHE_COLLECTION].find_one({"_id": "test"}) is None + assert database[CACHE_COLLECTION].find_one({"_id": "test line-42"}) is None + assert database[CACHE_COLLECTION].find_one({"_id": "foobar"}) is not None From 7ca612a1b271a3da98d3304cd8e9d2e57d354ae7 Mon Sep 17 00:00:00 2001 From: fsimonjetz Date: Tue, 29 Aug 2023 14:56:00 +0000 Subject: [PATCH 3/3] update schemas --- ebl/corpus/application/display_schemas.py | 16 +++++++++++++++- ebl/corpus/application/schemas.py | 4 ++++ ebl/tests/corpus/test_chapter_display_schema.py | 6 ++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/ebl/corpus/application/display_schemas.py b/ebl/corpus/application/display_schemas.py index de9b7dbf9..c24dadec8 100644 --- a/ebl/corpus/application/display_schemas.py +++ b/ebl/corpus/application/display_schemas.py @@ -1,4 +1,4 @@ -from marshmallow import Schema, fields, post_load, post_dump +from marshmallow import Schema, fields, post_load, post_dump, EXCLUDE from ebl.corpus.application.id_schemas import ChapterIdSchema from ebl.corpus.application.record_schemas import RecordSchema @@ -18,6 +18,9 @@ class LineDisplaySchema(Schema): + class Meta: + unknown = EXCLUDE + number = fields.Nested(OneOfLineNumberSchema, required=True) old_line_numbers = fields.Nested( OldLineNumberSchema, many=True, data_key="oldLineNumbers", load_default=tuple() @@ -55,6 +58,9 @@ def add_variant_indexes(self, data: dict, **kwargs) -> dict: class ChapterDisplaySchema(Schema): + class Meta: + unknown = EXCLUDE + id_ = fields.Nested(ChapterIdSchema, required=True, data_key="id") text_name = fields.String(required=True, data_key="textName") text_has_doi = fields.Boolean(data_key="textHasDoi", load_default=False) @@ -76,3 +82,11 @@ def make_chapter(self, data: dict, **kwargs) -> ChapterDisplay: data["record"], tuple(data["manuscripts"]), ) + + @post_dump + def add_line_indexes(self, data: dict, **kwargs) -> dict: + data["lines"] = [ + {**line, "index": index} for index, line in enumerate(data["lines"]) + ] + + return data diff --git a/ebl/corpus/application/schemas.py b/ebl/corpus/application/schemas.py index d0a5cf24f..c15ef2cc4 100644 --- a/ebl/corpus/application/schemas.py +++ b/ebl/corpus/application/schemas.py @@ -5,6 +5,7 @@ post_load, validate, validates_schema, + EXCLUDE, ) from ebl.bibliography.application.reference_schema import ReferenceSchema @@ -176,6 +177,9 @@ def make_manuscript_line(self, data: dict, **kwargs) -> ManuscriptLine: class LineVariantSchema(Schema): + class Meta: + unknown = EXCLUDE + reconstruction: fields.Field = fields.Nested( OneOfTokenSchema, required=True, many=True ) diff --git a/ebl/tests/corpus/test_chapter_display_schema.py b/ebl/tests/corpus/test_chapter_display_schema.py index 9d6e1df94..20d864ea9 100644 --- a/ebl/tests/corpus/test_chapter_display_schema.py +++ b/ebl/tests/corpus/test_chapter_display_schema.py @@ -47,6 +47,7 @@ def to_dict( "isSingleStage": chapter.is_single_stage, "lines": [ { + "index": index, "number": OneOfLineNumberSchema().dump(line.number), "oldLineNumbers": OldLineNumberSchema().dump( line.old_line_numbers, many=True @@ -55,6 +56,7 @@ def to_dict( "isBeginningOfSection": line.is_beginning_of_section, "variants": [ { + "index": index, "intertext": OneOfNoteLinePartSchema().dump( variant.intertext, many=True ), @@ -69,13 +71,13 @@ def to_dict( variant.parallel_lines, many=True ), } - for variant in line.variants + for index, variant in enumerate(line.variants) ], "translation": [] if missing_translation else TranslationLineSchema().dump(line.translation, many=True), } - for line in chapter.lines + for index, line in enumerate(chapter.lines) ], "record": RecordSchema().dump(chapter.record), "manuscripts": ManuscriptSchema().dump(chapter.manuscripts, many=True),