Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add indexes #453

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions ebl/cache/infrastructure/mongo_cache_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,20 @@ def __init__(self, database: Database) -> None:

def has(self, cache_key: str, regex=False) -> bool:
return self._collection.exists(
{"cache_key": {"$regex": cache_key} if regex else cache_key}
{"_id": {"$regex": cache_key} if regex else cache_key}
)

def get(self, cache_key: str) -> dict:
return self._collection.find_one(
{"cache_key": cache_key}, projection={"cache_key": 0, "_id": 0}
)
return self._collection.find_one({"_id": cache_key}, projection={"_id": 0})

def set(self, cache_key: str, item: dict) -> None:
self._collection.insert_one({"cache_key": cache_key, **item})
self.delete(cache_key)
self._collection.insert_one({"_id": cache_key, **item})

def delete(self, cache_key: str) -> None:
if self.has(cache_key):
self._collection.delete_one({"cache_key": cache_key})
self._collection.delete_one({"_id": cache_key})

def delete_all(self, pattern: str) -> None:
if self.has(pattern, regex=True):
self._collection.delete_many({"cache_key": {"$regex": pattern}})
self._collection.delete_many({"_id": {"$regex": pattern}})
25 changes: 24 additions & 1 deletion ebl/corpus/application/display_schemas.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from marshmallow import Schema, fields, post_load
from marshmallow import Schema, fields, post_load, post_dump, EXCLUDE

from ebl.corpus.application.id_schemas import ChapterIdSchema
from ebl.corpus.application.record_schemas import RecordSchema
Expand All @@ -18,6 +18,9 @@


class LineDisplaySchema(Schema):
class Meta:
unknown = EXCLUDE

number = fields.Nested(OneOfLineNumberSchema, required=True)
old_line_numbers = fields.Nested(
OldLineNumberSchema, many=True, data_key="oldLineNumbers", load_default=tuple()
Expand All @@ -44,8 +47,20 @@ def make_line(self, data: dict, **kwargs) -> LineDisplay:
tuple(data["translation"] or []),
)

@post_dump
def add_variant_indexes(self, data: dict, **kwargs) -> dict:
data["variants"] = [
{**variant, "index": index}
for index, variant in enumerate(data["variants"])
]

return data


class ChapterDisplaySchema(Schema):
class Meta:
unknown = EXCLUDE

id_ = fields.Nested(ChapterIdSchema, required=True, data_key="id")
text_name = fields.String(required=True, data_key="textName")
text_has_doi = fields.Boolean(data_key="textHasDoi", load_default=False)
Expand All @@ -67,3 +82,11 @@ def make_chapter(self, data: dict, **kwargs) -> ChapterDisplay:
data["record"],
tuple(data["manuscripts"]),
)

@post_dump
def add_line_indexes(self, data: dict, **kwargs) -> dict:
data["lines"] = [
{**line, "index": index} for index, line in enumerate(data["lines"])
]

return data
4 changes: 4 additions & 0 deletions ebl/corpus/application/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
post_load,
validate,
validates_schema,
EXCLUDE,
)

from ebl.bibliography.application.reference_schema import ReferenceSchema
Expand Down Expand Up @@ -176,6 +177,9 @@ def make_manuscript_line(self, data: dict, **kwargs) -> ManuscriptLine:


class LineVariantSchema(Schema):
class Meta:
unknown = EXCLUDE

reconstruction: fields.Field = fields.Nested(
OneOfTokenSchema, required=True, many=True
)
Expand Down
24 changes: 12 additions & 12 deletions ebl/tests/cache/test_mongo_cache_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,46 @@ def test_set(database, mongo_cache_repository) -> None:
mongo_cache_repository.set("test", {"data": "data"})

inserted_text = database[CACHE_COLLECTION].find_one(
{"cache_key": "test"}, projection={"_id": False, "cache_key": False}
{"_id": "test"}, projection={"_id": False, "_id": False}

Check warning

Code scanning / CodeQL

Duplicate key in dict literal

Dictionary key '_id' is subsequently [overwritten](1).
)
assert inserted_text == {"data": "data"}


def test_has(database, mongo_cache_repository) -> None:
database[CACHE_COLLECTION].insert_one({"cache_key": "test", "data": "data"})
database[CACHE_COLLECTION].insert_one({"_id": "test", "data": "data"})
assert mongo_cache_repository.has("test") is True


def test_get(database, mongo_cache_repository) -> None:
database[CACHE_COLLECTION].insert_one({"cache_key": "test", "data": "data"})
database[CACHE_COLLECTION].insert_one({"_id": "test", "data": "data"})
assert mongo_cache_repository.get("test") == {"data": "data"}


def test_delete(database, mongo_cache_repository) -> None:
database[CACHE_COLLECTION].insert_one({"cache_key": "test", "data": "data"})
database[CACHE_COLLECTION].insert_one({"_id": "test", "data": "data"})
mongo_cache_repository.delete("test")
assert database[CACHE_COLLECTION].find_one({"cache_key": "test"}) is None
assert database[CACHE_COLLECTION].find_one({"_id": "test"}) is None


@pytest.mark.parametrize(
"key,expected", [("test", True), ("test line-42", True), ("foobar", False)]
)
def test_exists_with_regex(database, mongo_cache_repository, key, expected) -> None:
database[CACHE_COLLECTION].insert_one(
{"cache_key": key, "data": "data"},
{"_id": key, "data": "data"},
)
assert mongo_cache_repository.has(r"^test(\sline-\d+)?$", regex=True) == expected


def test_delete_all(database, mongo_cache_repository) -> None:
database[CACHE_COLLECTION].insert_many(
[
{"cache_key": "test", "data": "data"},
{"cache_key": "test line-42", "data": "data"},
{"cache_key": "foobar", "data": "data"},
{"_id": "test", "data": "data"},
{"_id": "test line-42", "data": "data"},
{"_id": "foobar", "data": "data"},
]
)
mongo_cache_repository.delete_all(pattern=r"^test(\sline-\d+)?$")
assert database[CACHE_COLLECTION].find_one({"cache_key": "test"}) is None
assert database[CACHE_COLLECTION].find_one({"cache_key": "test line-42"}) is None
assert database[CACHE_COLLECTION].find_one({"cache_key": "foobar"}) is not None
assert database[CACHE_COLLECTION].find_one({"_id": "test"}) is None
assert database[CACHE_COLLECTION].find_one({"_id": "test line-42"}) is None
assert database[CACHE_COLLECTION].find_one({"_id": "foobar"}) is not None
6 changes: 4 additions & 2 deletions ebl/tests/corpus/test_chapter_display_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def to_dict(
"isSingleStage": chapter.is_single_stage,
"lines": [
{
"index": index,
"number": OneOfLineNumberSchema().dump(line.number),
"oldLineNumbers": OldLineNumberSchema().dump(
line.old_line_numbers, many=True
Expand All @@ -55,6 +56,7 @@ def to_dict(
"isBeginningOfSection": line.is_beginning_of_section,
"variants": [
{
"index": index,
"intertext": OneOfNoteLinePartSchema().dump(
variant.intertext, many=True
),
Expand All @@ -69,13 +71,13 @@ def to_dict(
variant.parallel_lines, many=True
),
}
for variant in line.variants
for index, variant in enumerate(line.variants)
],
"translation": []
if missing_translation
else TranslationLineSchema().dump(line.translation, many=True),
}
for line in chapter.lines
for index, line in enumerate(chapter.lines)
],
"record": RecordSchema().dump(chapter.record),
"manuscripts": ManuscriptSchema().dump(chapter.manuscripts, many=True),
Expand Down