Skip to content

Commit

Permalink
Implement reference fetching & injection
Browse files Browse the repository at this point in the history
  • Loading branch information
khoidt committed Jan 14, 2025
1 parent 51ad64d commit dc34d86
Showing 1 changed file with 60 additions and 15 deletions.
75 changes: 60 additions & 15 deletions ebl/dossiers/infrastructure/mongo_dossiers_repository.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Sequence
import attr
from typing import Sequence, List, Dict
from marshmallow import Schema, fields, post_load, EXCLUDE
from pymongo.database import Database
from ebl.mongo_collection import MongoCollection
Expand All @@ -9,14 +10,10 @@
from ebl.common.domain.provenance import Provenance
from ebl.fragmentarium.application.fragment_fields_schemas import ScriptSchema
from ebl.bibliography.application.reference_schema import ReferenceSchema
from ebl.bibliography.domain.reference import BibliographyId

COLLECTION = "dossiers"

provenance_field = fields.Function(
lambda object_: getattr(object_.provenance, "long_name", None),
lambda value: Provenance.from_name(value) if value else None,
allow_none=True,
)
DOSSIERS_COLLECTION = "dossiers"
BIBLIOGRAPHY_COLLECTION = "bibliography"


class DossierRecordSchema(Schema):
Expand All @@ -37,7 +34,11 @@ class Meta:
related_kings = fields.List(
fields.Float(), data_key="relatedKings", load_default=list
)
provenance = provenance_field
provenance = fields.Function(
lambda object_: getattr(object_.provenance, "long_name", None),
lambda value: Provenance.from_name(value) if value else None,
allow_none=True,
)
script = fields.Nested(ScriptSchema, allow_none=True, load_default=None)
references = fields.Nested(
ReferenceSchema, allow_none=True, many=True, load_default=()
Expand All @@ -51,12 +52,56 @@ def make_record(self, data, **kwargs):

class MongoDossiersRepository(DossiersRepository):
def __init__(self, database: Database):
self._collection = MongoCollection(database, COLLECTION)
self._dossiers_collection = MongoCollection(database, DOSSIERS_COLLECTION)
self._bibliography_collection = MongoCollection(
database, BIBLIOGRAPHY_COLLECTION
)

def create(self, dossier_record: DossierRecord) -> str:
return self._dossiers_collection.insert_one(
DossierRecordSchema().dump(dossier_record)
)

def query_by_ids(self, ids: Sequence[str]) -> Sequence[DossierRecord]:
cursor = self._collection.find_many({"_id": {"$in": ids}})
records = DossierRecordSchema(many=True).load(cursor)
return records
dossiers = self._fetch_dossiers(ids)
reference_ids = self._extract_reference_ids(dossiers)
bibliography_entries = self._fetch_bibliography_entries(reference_ids)
self._inject_dossiers_with_bibliography(dossiers, bibliography_entries)
return dossiers

def create(self, dossier_record: DossierRecord) -> str:
return self._collection.insert_one(DossierRecordSchema().dump(dossier_record))
def _fetch_dossiers(self, ids: Sequence[str]) -> List[DossierRecord]:
cursor = self._dossiers_collection.find_many({"_id": {"$in": ids}})
return DossierRecordSchema(many=True).load(cursor)

def _extract_reference_ids(
self, dossiers: List[DossierRecord]
) -> List[BibliographyId]:
return list(
{reference.id for dossier in dossiers for reference in dossier.references}
)

def _fetch_bibliography_entries(
self, reference_ids: List[BibliographyId]
) -> Dict[str, dict]:
entries = self._bibliography_collection.find_many(
{"_id": {"$in": reference_ids}}
)
return {entry["_id"]: entry for entry in entries}

def _inject_dossiers_with_bibliography(
self, dossiers: List[DossierRecord], bibliography_entries: Dict[str, dict]
) -> None:
for index, dossier in enumerate(dossiers):
injected_references = [
{
**ReferenceSchema().dump(reference),
"document": bibliography_entries.get(reference.id, {}),
}
for reference in dossier.references
]
dossiers[index] = attr.evolve(
dossier,
references=ReferenceSchema(unknown=EXCLUDE, many=True).load(
injected_references
),
)

0 comments on commit dc34d86

Please sign in to comment.