diff --git a/app/records/schemas/details.py b/app/records/schemas/details.py index 0d0321e..4b45306 100644 --- a/app/records/schemas/details.py +++ b/app/records/schemas/details.py @@ -20,6 +20,7 @@ class Record(Details): title: str = "" date: str = "" is_digitised: bool | None = None + held_by: dict | None = None def __init__(self, id: str): super().__init__(id) @@ -30,7 +31,7 @@ class ExternalRecord(Details): ref: str | None = None title: str = "" covering_date: str | None = None - held_by: str | None = None + held_by: dict | None = None def __init__(self, id: str): super().__init__(id) diff --git a/app/records/schemas/search.py b/app/records/schemas/search.py index 2929747..15e7366 100644 --- a/app/records/schemas/search.py +++ b/app/records/schemas/search.py @@ -4,7 +4,7 @@ class RecordSearchResult(APISearchResult): ref: str | None = None - covering_date: str | None = None + date: str | None = None held_by: str | None = None def __str__(self): diff --git a/app/sources/rosetta/api.py b/app/sources/rosetta/api.py index 5fa4193..089af96 100644 --- a/app/sources/rosetta/api.py +++ b/app/sources/rosetta/api.py @@ -10,7 +10,7 @@ ) from config import Config -from .lib import RosettaResponseParser +from .lib import RosettaResponseParser, RosettaSourceParser class RosettaRecords(GetAPI): @@ -32,6 +32,7 @@ def get_result( offset = (page - 1) * self.results_per_page self.add_parameter("size", self.results_per_page) self.add_parameter("from", offset) + self.add_parameter("includeSource", True) url = self.build_url() print(url) raw_results = self.execute(url) @@ -40,24 +41,17 @@ def get_result( def parse_results(self, raw_results, page): response = RecordSearchResults() for r in raw_results["metadata"]: + parsed_data = RosettaSourceParser(r["_source"]) record = RecordSearchResult() - record.id = r["id"] + record.id = parsed_data.id() details = r["detail"]["@template"]["details"] - record.ref = ( - details["referenceNumber"] - if "referenceNumber" in details - else None - ) + record.ref = parsed_data.reference_number() record.title = ( details["summaryTitle"] if "summaryTitle" in details else None ) - record.description = ( - details["description"][0] if "description" in details else None - ) - record.covering_date = ( - details["dateCovering"] if "dateCovering" in details else None - ) - record.held_by = details["heldBy"] if "heldBy" in details else None + record.description = parsed_data.description() + record.date = parsed_data.date() + record.held_by = parsed_data.held_by() # if highlight and "highLight" in r: # if "@template.details.summaryTitle" in r["highLight"]: # record.title = r["highLight"]["@template.details.summaryTitle"][0] @@ -89,47 +83,25 @@ def get_result(self, id: str) -> dict: def parse_results(self, raw_results): parsed_data = RosettaResponseParser(raw_results) - # dump = { - # "actual_type": parsed_data.actual_type(), - # "type": parsed_data.type(), - # "title": parsed_data.title(), - # "name": parsed_data.name(), - # "names": parsed_data.names(), - # "date": parsed_data.date(), - # "lifespan": parsed_data.lifespan(), - # "date_range": parsed_data.date_range(), - # "places": parsed_data.places(), - # "gender": parsed_data.gender(), - # "contact_info": parsed_data.contact_info(), - # "description": parsed_data.description(), - # "functions": parsed_data.functions(), - # "history": parsed_data.history(), - # "biography": parsed_data.biography(), - # "identifier": parsed_data.identifier(), - # "reference_number": parsed_data.reference_number(), - # # 'agents': parsed_data.agents() - # } if parsed_data.type() == "record": # TODO: ExternalRecord record = Record(parsed_data.id()) - record.ref = "" + record.ref = parsed_data.identifier() record.title = parsed_data.title() record.date = parsed_data.date_range() record.is_digitised = parsed_data.is_digitised() - # record.dump = dump + record.held_by = parsed_data.held_by() return record.toJSON() if ( parsed_data.type() == "archive" or parsed_data.type() == "repository" ): - # return raw_results record = RecordArchive(parsed_data.id()) record.name = parsed_data.title() record.archon = parsed_data.reference_number() record.places = parsed_data.places() record.contact_info = parsed_data.contact_info() record.agents = parsed_data.agents() - # record.dump = dump return record.toJSON() if parsed_data.type() == "agent": if parsed_data.actual_type() == "person": @@ -149,6 +121,5 @@ def parse_results(self, raw_results): record.places = parsed_data.places() record.identifier = parsed_data.identifier() record.history = parsed_data.functions() - # record.dump = dump return record.toJSON() return {} diff --git a/app/sources/rosetta/lib/__init__.py b/app/sources/rosetta/lib/__init__.py index 211d6b8..689636e 100644 --- a/app/sources/rosetta/lib/__init__.py +++ b/app/sources/rosetta/lib/__init__.py @@ -1 +1 @@ -from .response_parser import RosettaResponseParser +from .response_parser import RosettaResponseParser, RosettaSourceParser diff --git a/app/sources/rosetta/lib/response_parser.py b/app/sources/rosetta/lib/response_parser.py index 35f0893..aedf633 100644 --- a/app/sources/rosetta/lib/response_parser.py +++ b/app/sources/rosetta/lib/response_parser.py @@ -2,9 +2,14 @@ class RosettaResponseParser: - def __init__(self, rosetta_data): - self.data = rosetta_data - self.source = self.data["metadata"][0]["_source"] + def __new__(cls, rosetta_data: dict, source_item: int = 0): + rosetta_data_source = rosetta_data["metadata"][source_item]["_source"] + return RosettaSourceParser(rosetta_data_source) + + +class RosettaSourceParser: + def __init__(self, rosetta_data_source): + self.source = rosetta_data_source def strip_outside_tags(self, markup, query): document = PyQuery(markup) @@ -51,12 +56,19 @@ def title(self) -> str: None, ): return primary_title + if summary_title := self.summary_title(): + return summary_title if name := self.name(): return name if description := self.description(): return description return "" + def summary_title(self) -> str: + if "summary" in self.source and "title" in self.source["summary"]: + return self.source["summary"]["title"] + return "" + def name(self) -> str: names = self.names() if "name" in names: @@ -64,8 +76,8 @@ def name(self) -> str: return "" def names(self) -> dict: - names = {} if "name" in self.source: + names = {} if name_data := next( ( item @@ -96,8 +108,8 @@ def names(self) -> dict: ), None, ): - names["Alternative name(s)"] = aka - return names + names["alternative_names"] = aka + return {} def date(self) -> str: return self.lifespan() or self.date_range() or "" @@ -146,6 +158,24 @@ def date_range(self) -> str: if "end" in self.source and "date" in self.source["end"] else "" ) + if date_from or date_to: + return f"{date_from}–{date_to}" + if ( + "origination" in self.source + and "date" in self.source["origination"] + ): + if value := self.source["origination"]["date"]["value"]: + return value + date_from = ( + self.source["origination"]["date"]["from"] + if "from" in self.source["origination"]["date"] + else "" + ) + date_to = ( + self.source["origination"]["date"]["to"] + if "to" in self.source["origination"]["date"] + else "" + ) if date_from or date_to: return f"{date_from}–{date_to}" return "" @@ -325,6 +355,16 @@ def biography(self) -> str: def identifier(self) -> str: if "identifier" in self.source: + if identifier := next( + ( + item["value"] + for item in self.source["identifier"] + if "primary" in item and item["primary"] and "value" in item + ), + None, + ): + return identifier + primary_identifier = next( ( item["value"] @@ -415,3 +455,21 @@ def agents(self) -> dict: if archon_number == "P": agents["persons"].append(agent_data) return agents + + def held_by(self) -> dict: + if "repository" in self.source: + id = "" + name = "" + if ( + "name" in self.source["repository"] + and "value" in self.source["repository"]["name"] + ): + name = self.source["repository"]["name"]["value"] + if ( + "@admin" in self.source["repository"] + and "id" in self.source["repository"]["@admin"] + ): + id = self.source["repository"]["@admin"]["id"] + if id and name: + return {"id": id, "name": name} + return {}