Skip to content
This repository has been archived by the owner on Oct 28, 2024. It is now read-only.

Commit

Permalink
Use RosettaResponseParser for searches
Browse files Browse the repository at this point in the history
  • Loading branch information
ahosgood committed Jan 12, 2024
1 parent f845057 commit e93c002
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 48 deletions.
3 changes: 2 additions & 1 deletion app/records/schemas/details.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Record(Details):
title: str = ""
date: str = ""
is_digitised: bool | None = None
held_by: dict | None = None

def __init__(self, id: str):
super().__init__(id)
Expand All @@ -30,7 +31,7 @@ class ExternalRecord(Details):
ref: str | None = None
title: str = ""
covering_date: str | None = None
held_by: str | None = None
held_by: dict | None = None

def __init__(self, id: str):
super().__init__(id)
Expand Down
2 changes: 1 addition & 1 deletion app/records/schemas/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class RecordSearchResult(APISearchResult):
ref: str | None = None
covering_date: str | None = None
date: str | None = None
held_by: str | None = None

def __str__(self):
Expand Down
49 changes: 10 additions & 39 deletions app/sources/rosetta/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
)
from config import Config

from .lib import RosettaResponseParser
from .lib import RosettaResponseParser, RosettaSourceParser


class RosettaRecords(GetAPI):
Expand All @@ -32,6 +32,7 @@ def get_result(
offset = (page - 1) * self.results_per_page
self.add_parameter("size", self.results_per_page)
self.add_parameter("from", offset)
self.add_parameter("includeSource", True)
url = self.build_url()
print(url)
raw_results = self.execute(url)
Expand All @@ -40,24 +41,17 @@ def get_result(
def parse_results(self, raw_results, page):
response = RecordSearchResults()
for r in raw_results["metadata"]:
parsed_data = RosettaSourceParser(r["_source"])
record = RecordSearchResult()
record.id = r["id"]
record.id = parsed_data.id()
details = r["detail"]["@template"]["details"]
record.ref = (
details["referenceNumber"]
if "referenceNumber" in details
else None
)
record.ref = parsed_data.reference_number()
record.title = (
details["summaryTitle"] if "summaryTitle" in details else None
)
record.description = (
details["description"][0] if "description" in details else None
)
record.covering_date = (
details["dateCovering"] if "dateCovering" in details else None
)
record.held_by = details["heldBy"] if "heldBy" in details else None
record.description = parsed_data.description()
record.date = parsed_data.date()
record.held_by = parsed_data.held_by()
# if highlight and "highLight" in r:
# if "@template.details.summaryTitle" in r["highLight"]:
# record.title = r["highLight"]["@template.details.summaryTitle"][0]
Expand Down Expand Up @@ -89,47 +83,25 @@ def get_result(self, id: str) -> dict:

def parse_results(self, raw_results):
parsed_data = RosettaResponseParser(raw_results)
# dump = {
# "actual_type": parsed_data.actual_type(),
# "type": parsed_data.type(),
# "title": parsed_data.title(),
# "name": parsed_data.name(),
# "names": parsed_data.names(),
# "date": parsed_data.date(),
# "lifespan": parsed_data.lifespan(),
# "date_range": parsed_data.date_range(),
# "places": parsed_data.places(),
# "gender": parsed_data.gender(),
# "contact_info": parsed_data.contact_info(),
# "description": parsed_data.description(),
# "functions": parsed_data.functions(),
# "history": parsed_data.history(),
# "biography": parsed_data.biography(),
# "identifier": parsed_data.identifier(),
# "reference_number": parsed_data.reference_number(),
# # 'agents': parsed_data.agents()
# }
if parsed_data.type() == "record":
# TODO: ExternalRecord
record = Record(parsed_data.id())
record.ref = ""
record.ref = parsed_data.identifier()
record.title = parsed_data.title()
record.date = parsed_data.date_range()
record.is_digitised = parsed_data.is_digitised()
# record.dump = dump
record.held_by = parsed_data.held_by()
return record.toJSON()
if (
parsed_data.type() == "archive"
or parsed_data.type() == "repository"
):
# return raw_results
record = RecordArchive(parsed_data.id())
record.name = parsed_data.title()
record.archon = parsed_data.reference_number()
record.places = parsed_data.places()
record.contact_info = parsed_data.contact_info()
record.agents = parsed_data.agents()
# record.dump = dump
return record.toJSON()
if parsed_data.type() == "agent":
if parsed_data.actual_type() == "person":
Expand All @@ -149,6 +121,5 @@ def parse_results(self, raw_results):
record.places = parsed_data.places()
record.identifier = parsed_data.identifier()
record.history = parsed_data.functions()
# record.dump = dump
return record.toJSON()
return {}
2 changes: 1 addition & 1 deletion app/sources/rosetta/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .response_parser import RosettaResponseParser
from .response_parser import RosettaResponseParser, RosettaSourceParser
70 changes: 64 additions & 6 deletions app/sources/rosetta/lib/response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@


class RosettaResponseParser:
def __init__(self, rosetta_data):
self.data = rosetta_data
self.source = self.data["metadata"][0]["_source"]
def __new__(cls, rosetta_data: dict, source_item: int = 0):
rosetta_data_source = rosetta_data["metadata"][source_item]["_source"]
return RosettaSourceParser(rosetta_data_source)


class RosettaSourceParser:
def __init__(self, rosetta_data_source):
self.source = rosetta_data_source

def strip_outside_tags(self, markup, query):
document = PyQuery(markup)
Expand Down Expand Up @@ -51,21 +56,28 @@ def title(self) -> str:
None,
):
return primary_title
if summary_title := self.summary_title():
return summary_title
if name := self.name():
return name
if description := self.description():
return description
return ""

def summary_title(self) -> str:
if "summary" in self.source and "title" in self.source["summary"]:
return self.source["summary"]["title"]
return ""

def name(self) -> str:
names = self.names()
if "name" in names:
return names["name"]
return ""

def names(self) -> dict:
names = {}
if "name" in self.source:
names = {}
if name_data := next(
(
item
Expand Down Expand Up @@ -96,8 +108,8 @@ def names(self) -> dict:
),
None,
):
names["Alternative name(s)"] = aka
return names
names["alternative_names"] = aka
return {}

def date(self) -> str:
return self.lifespan() or self.date_range() or ""
Expand Down Expand Up @@ -146,6 +158,24 @@ def date_range(self) -> str:
if "end" in self.source and "date" in self.source["end"]
else ""
)
if date_from or date_to:
return f"{date_from}{date_to}"
if (
"origination" in self.source
and "date" in self.source["origination"]
):
if value := self.source["origination"]["date"]["value"]:
return value
date_from = (
self.source["origination"]["date"]["from"]
if "from" in self.source["origination"]["date"]
else ""
)
date_to = (
self.source["origination"]["date"]["to"]
if "to" in self.source["origination"]["date"]
else ""
)
if date_from or date_to:
return f"{date_from}{date_to}"
return ""
Expand Down Expand Up @@ -325,6 +355,16 @@ def biography(self) -> str:

def identifier(self) -> str:
if "identifier" in self.source:
if identifier := next(
(
item["value"]
for item in self.source["identifier"]
if "primary" in item and item["primary"] and "value" in item
),
None,
):
return identifier

primary_identifier = next(
(
item["value"]
Expand Down Expand Up @@ -415,3 +455,21 @@ def agents(self) -> dict:
if archon_number == "P":
agents["persons"].append(agent_data)
return agents

def held_by(self) -> dict:
if "repository" in self.source:
id = ""
name = ""
if (
"name" in self.source["repository"]
and "value" in self.source["repository"]["name"]
):
name = self.source["repository"]["name"]["value"]
if (
"@admin" in self.source["repository"]
and "id" in self.source["repository"]["@admin"]
):
id = self.source["repository"]["@admin"]["id"]
if id and name:
return {"id": id, "name": name}
return {}

0 comments on commit e93c002

Please sign in to comment.