diff --git a/app/records/schemas/details.py b/app/records/schemas/details.py
index 9bef444..db71265 100644
--- a/app/records/schemas/details.py
+++ b/app/records/schemas/details.py
@@ -30,6 +30,7 @@ class Record(Details):
closure_status: str | None = None
access_condition: str | None = None
languages: list[str] = []
+ related_materials: list[dict] = []
hierarchy: list[dict] = []
def __init__(self, id: str):
@@ -56,6 +57,7 @@ class Aggregation(Details):
closure_status: str | None = None
access_condition: str | None = None
languages: list[str] = []
+ related_materials: list[dict] = []
hierarchy: list[dict] = []
def __init__(self, id: str):
diff --git a/app/sources/rosetta/api.py b/app/sources/rosetta/api.py
index bdac17b..e34b08a 100644
--- a/app/sources/rosetta/api.py
+++ b/app/sources/rosetta/api.py
@@ -101,6 +101,7 @@ def parse_results(self, raw_results, source_url):
record.closure_status = parsed_data.closure_status()
record.access_condition = parsed_data.access_condition()
record.languages = parsed_data.languages()
+ record.related_materials = parsed_data.related_materials()
record.hierarchy = (
parsed_data.hierarchies()[0]
if len(parsed_data.hierarchies())
@@ -132,6 +133,7 @@ def parse_results(self, raw_results, source_url):
record.closure_status = parsed_data.closure_status()
record.access_condition = parsed_data.access_condition()
record.languages = parsed_data.languages()
+ record.related_materials = parsed_data.related_materials()
record.hierarchy = (
parsed_data.hierarchies()[0]
if len(parsed_data.hierarchies())
diff --git a/app/sources/rosetta/lib/response_parser.py b/app/sources/rosetta/lib/response_parser.py
index 47526ea..e9c3cfe 100644
--- a/app/sources/rosetta/lib/response_parser.py
+++ b/app/sources/rosetta/lib/response_parser.py
@@ -1,5 +1,7 @@
+import re
from enum import Enum
+from pydash import objects
from pyquery import PyQuery
hierarchy_level_names = {
@@ -50,20 +52,13 @@ def strip_wrapper_and_split_span(self, markup):
return "
".join(contents)
def type(self) -> str:
- if "@datatype" in self.source and "base" in self.source["@datatype"]:
- return self.source["@datatype"]["base"]
- return ""
+ return objects.get(self.source, "@datatype.base") or ""
def actual_type(self) -> str:
- if "@datatype" in self.source and "actual" in self.source["@datatype"]:
- return self.source["@datatype"]["actual"]
- return ""
+ return objects.get(self.source, "@datatype.actual") or ""
def id(self) -> str:
- if "@admin" in self.source:
- if "id" in self.source["@admin"]:
- return self.source["@admin"]["id"]
- return ""
+ return objects.get(self.source, "@admin.id") or ""
def iaid(self) -> str | None:
if "identifier" in self.source:
@@ -80,15 +75,10 @@ def iaid(self) -> str | None:
return None
def uuid(self) -> str | None:
- if "@admin" in self.source:
- if "uuid" in self.source["@admin"]:
- return self.source["@admin"]["uuid"]
- return None
+ return objects.get(self.source, "@admin.uuid")
def is_digitised(self) -> bool:
- if "digitised" in self.source:
- return self.source["digitised"]
- return False
+ return objects.get(self.source, "digitised") or False
def is_tna(self) -> bool:
if "@datatype" in self.source and "group" in self.source["@datatype"]:
@@ -132,11 +122,7 @@ def title(self) -> str:
return ""
def summary_title(self) -> str | None:
- return (
- self.source["summary"]["title"]
- if "summary" in self.source and "title" in self.source["summary"]
- else None
- )
+ return objects.get(self.source, "summary.title")
def name(self) -> str | None:
names = self.names()
@@ -184,20 +170,8 @@ def date(self) -> str | None:
def lifespan(self) -> str | None:
if "birth" in self.source or "death" in self.source:
- date_from = (
- self.source["birth"]["date"]["value"]
- if "birth" in self.source
- and "date" in self.source["birth"]
- and "value" in self.source["birth"]["date"]
- else ""
- )
- date_to = (
- self.source["death"]["date"]["value"]
- if "death" in self.source
- and "date" in self.source["death"]
- and "value" in self.source["death"]["date"]
- else ""
- )
+ date_from = objects.get(self.source, "birth.date.value") or ""
+ date_to = objects.get(self.source, "death.date.value") or ""
return f"{date_from}–{date_to}" if date_from or date_to else None
return None
@@ -234,16 +208,8 @@ def date_range(self) -> str | None:
):
if value := self.source["origination"]["date"]["value"]:
return value
- date_from = (
- self.source["origination"]["date"]["from"]
- if "from" in self.source["origination"]["date"]
- else ""
- )
- date_to = (
- self.source["origination"]["date"]["to"]
- if "to" in self.source["origination"]["date"]
- else ""
- )
+ date_from = objects.get(self.source, "origination.date.from") or ""
+ date_to = objects.get(self.source, "origination.date.to") or ""
if date_from or date_to:
return f"{date_from}–{date_to}"
return None
@@ -307,8 +273,8 @@ def place_descriptions(self) -> list[str]:
def place_opening_times(self) -> str | None:
if "place" in self.source:
for place in self.source["place"]:
- if "description" in place and "value" in place["description"]:
- document = PyQuery(place["description"]["value"])
+ if place_details := objects.get(place, "description.value"):
+ document = PyQuery(place_details)
if opening_times := document("span.openinghours").text():
return opening_times
return None
@@ -316,8 +282,8 @@ def place_opening_times(self) -> str | None:
def place_disabled_access(self) -> str | None:
if "place" in self.source:
for place in self.source["place"]:
- if "description" in place and "value" in place["description"]:
- document = PyQuery(place["description"]["value"])
+ if place_details := objects.get(place, "description.value"):
+ document = PyQuery(place_details)
if disabled_access := document(
"span.disabledaccess"
).text():
@@ -327,8 +293,8 @@ def place_disabled_access(self) -> str | None:
def place_comments(self) -> str | None:
if "place" in self.source:
for place in self.source["place"]:
- if "description" in place and "value" in place["description"]:
- document = PyQuery(place["description"]["value"])
+ if place_details := objects.get(place, "description.value"):
+ document = PyQuery(place_details)
if comments := document("span.comments").text():
return comments
return None
@@ -336,8 +302,8 @@ def place_comments(self) -> str | None:
def place_fee(self) -> str | None:
if "place" in self.source:
for place in self.source["place"]:
- if "description" in place and "value" in place["description"]:
- document = PyQuery(place["description"]["value"])
+ if place_details := objects.get(place, "description.value"):
+ document = PyQuery(place_details)
if fee := document("span.fee").text():
return fee
return None
@@ -345,8 +311,8 @@ def place_fee(self) -> str | None:
def place_appointment(self) -> str | None:
if "place" in self.source:
for place in self.source["place"]:
- if "description" in place and "value" in place["description"]:
- document = PyQuery(place["description"]["value"])
+ if place_details := objects.get(place, "description.value"):
+ document = PyQuery(place_details)
if appointment := document("span.appointment").text():
return appointment
return None
@@ -401,7 +367,6 @@ def description(self) -> str | None:
None,
):
if "value" in description:
- # TODO: Breaks on C17371160
return (
self.strip_scope_and_content(description["value"])
or self.strip_wrapper_and_split_span(
@@ -468,25 +433,22 @@ def functions(self) -> str | None:
return None
def physical_description(self) -> str | None:
- return (
- self.source["measurements"]["display"]
- if "measurements" in self.source
- and "display" in self.source["measurements"]
- else None
- )
+ return objects.get(self.source, "measurements.display")
def epithet(self) -> str | None:
if "description" in self.source:
epithet = next(
(
- item
+ item["value"]
for item in self.source["description"]
- if "type" in item and item["type"] == "epithet"
+ if "value" in item
+ and "type" in item
+ and item["type"] == "epithet"
),
None,
)
- if epithet and "value" in epithet:
- return epithet["value"]
+ if epithet:
+ return epithet
return None
def history(self) -> str | None:
@@ -520,7 +482,7 @@ def biography(self) -> str | None:
if biography and "value" in biography and "url" in biography:
url = biography["url"]
text = biography["value"]
- url = f'{text}'
+ url = f'{text}'
return url
return None
@@ -606,16 +568,8 @@ def agents(self) -> dict:
),
None,
):
- id = (
- agent["@admin"]["id"]
- if "@admin" in agent and "id" in agent["@admin"]
- else ""
- )
- name = (
- agent["name"]["value"]
- if "name" in agent and "value" in agent["name"]
- else ""
- )
+ id = objects.get(self.source, "@admin.id")
+ name = objects.get(self.source, "name.value")
if id and name:
places = (
[
@@ -641,57 +595,26 @@ def agents(self) -> dict:
def held_by(self) -> dict:
if "repository" in self.source:
- id = ""
- name = ""
- if (
- "name" in self.source["repository"]
- and "value" in self.source["repository"]["name"]
- ):
- name = self.source["repository"]["name"]["value"]
- if (
- "@admin" in self.source["repository"]
- and "id" in self.source["repository"]["@admin"]
- ):
- id = self.source["repository"]["@admin"]["id"]
+ id = objects.get(self.source, "repository.@admin.id")
+ name = objects.get(self.source, "repository.name.value")
if id and name:
return {"id": id, "name": name}
return {}
def legal_status(self) -> str | None:
- return (
- self.source["legal"]["status"]
- if "legal" in self.source and "status" in self.source["legal"]
- else None
- )
+ return objects.get(self.source, "legal.status")
def arrangement(self) -> str | None:
- if (
- "arrangement" in self.source
- and "value" in self.source["arrangement"]
- ):
- document = PyQuery(self.source["arrangement"]["value"])
+ if arrangement := objects.get(self.source, "arrangement.value"):
+ document = PyQuery(arrangement)
return str(document("span.arrangement").contents())
return None
def closure_status(self) -> str | None:
- return (
- self.source["availability"]["closure"]["label"]["value"]
- if "availability" in self.source
- and "closure" in self.source["availability"]
- and "label" in self.source["availability"]["closure"]
- and "value" in self.source["availability"]["closure"]["label"]
- else None
- )
+ return objects.get(self.source, "availability.closure.label.value")
def access_condition(self) -> str | None:
- return (
- self.source["availability"]["access"]["condition"]["value"]
- if "availability" in self.source
- and "access" in self.source["availability"]
- and "condition" in self.source["availability"]["access"]
- and "value" in self.source["availability"]["access"]["condition"]
- else None
- )
+ return objects.get(self.source, "availability.access.condition.value")
def creators(self) -> list[str]:
creators = []
@@ -700,43 +623,14 @@ def creators(self) -> list[str]:
and "creator" in self.source["origination"]
):
for creator in self.source["origination"]["creator"]:
- name_details = (
- creator["name"][0]
- if "name" in creator and len(creator["name"])
- else None
- )
- first_names = (
- "".join(name_details["first"])
- if name_details and "first" in name_details
- else ""
- )
- last_name = (
- name_details["last"]
- if name_details and "last" in name_details
- else ""
- )
- name = f"{first_names} {last_name}".strip()
+ first_names = objects.get(creator, "name[0].first") or []
+ last_name = objects.get(creator, "name[0].last") or ""
+ name = f"{" ".join(first_names)} {last_name}".strip()
if not name:
- name = (
- name_details["value"]
- if name_details and "value" in name_details
- else ""
- )
- title = (
- name_details["title"]
- if name_details and "title" in name_details
- else None
- )
- date_from = (
- creator["date"]["from"]
- if "date" in creator and "from" in creator["date"]
- else ""
- )
- date_to = (
- creator["date"]["to"]
- if "date" in creator and "to" in creator["date"]
- else ""
- )
+ name = objects.get(creator, "name[0].value")
+ title = objects.get(creator, "name[0].title")
+ date_from = objects.get(creator, "date.from") or ""
+ date_to = objects.get(creator, "date.to") or ""
creators.append(
{
"name": name,
@@ -752,33 +646,11 @@ def acquisition(self) -> list[str]:
acquisition = []
if "acquisition" in self.source:
for acquisitor in self.source["acquisition"]:
- title = (
- acquisitor["agent"]["name"][0]["value"]
- if "agent" in acquisitor
- and "name" in acquisitor["agent"]
- and len(acquisitor["agent"]["name"])
- and "value" in acquisitor["agent"]["name"][0]
- else (
- acquisitor["description"]["value"]
- if "description" in acquisitor
- and "value" in acquisitor["description"]
- else None
- )
- )
- date_from = (
- acquisitor["agent"]["date"]["from"]
- if "agent" in acquisitor
- and "date" in acquisitor["agent"]
- and "from" in acquisitor["agent"]["date"]
- else None
- )
- date_to = (
- acquisitor["agent"]["date"]["to"]
- if "agent" in acquisitor
- and "date" in acquisitor["agent"]
- and "to" in acquisitor["agent"]["date"]
- else None
- )
+ title = objects.get(
+ acquisitor, "agent.name[0].value"
+ ) or objects.get(acquisitor, "description.value")
+ date_from = objects.get(acquisitor, "agent.date.from") or ""
+ date_to = objects.get(acquisitor, "agent.date.to") or ""
acquisition.append(
{
"title": title,
@@ -800,11 +672,8 @@ def languages(self) -> list[str]:
def accumulation_dates(self) -> list[str]:
if "accruals" in self.source:
- if (
- "date" in self.source["accruals"]
- and "value" in self.source["accruals"]["date"]
- ):
- document = PyQuery(self.source["accruals"]["date"]["value"])
+ if accruals := objects.get(self.source, "accruals.date.value"):
+ document = PyQuery(accruals)
spans = document("span.accessionyears").find(
"span.accessionyear"
)
@@ -848,25 +717,13 @@ def hierarchies(self) -> list[dict]:
for hierarchy in self.source["@hierarchy"]:
hierarchy_levels = []
for level in hierarchy:
- id = (
- level["@admin"]["id"]
- if "@admin" in level and "id" in level["@admin"]
- else ""
- )
- title = (
- level["summary"]["title"]
- if "summary" in level and "title" in level["summary"]
- else ""
- )
- level_code = (
- level["level"]["code"]
- if "level" in level and "code" in level["level"]
- else ""
- )
+ id = objects.get(level, "@admin.id")
+ title = objects.get(level, "summary.title")
+ level_code = objects.get(level, "level.code")
hierarchy_level = {
"id": id,
"title": title,
- "level_code": level_code or "",
+ "level_code": level_code,
}
if level_code:
level_names = (
@@ -895,6 +752,34 @@ def hierarchies(self) -> list[dict]:
hierarchies.append(hierarchy_levels)
return hierarchies
+ def related_materials(self) -> list[dict]:
+ related_materials = []
+ if "related" in self.source:
+ for item in self.source["related"]:
+ id = objects.get(item, "@admin.id")
+ title = objects.get(item, "summary.title")
+ note = objects.get(item, "@link.note.value")
+ related_material = {
+ "id": id,
+ "title": title,
+ "ref": None,
+ "note": note,
+ }
+ if "identifier" in item:
+ if reference_number := next(
+ (
+ identifier["value"]
+ for identifier in item["identifier"]
+ if "value" in identifier
+ and "type" in identifier
+ and identifier["type"] == "reference number"
+ ),
+ None,
+ ):
+ related_material["ref"] = reference_number
+ related_materials.append(related_material)
+ return related_materials
+
def unpublished_finding_aids(self) -> str | None:
if "note" in self.source:
return next(
diff --git a/poetry.lock b/poetry.lock
index 272e99e..383e15c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -580,6 +580,23 @@ files = [
[package.dependencies]
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
+[[package]]
+name = "pydash"
+version = "7.0.6"
+description = "The kitchen sink of Python utility libraries for doing \"stuff\" in a functional way. Based on the Lo-Dash Javascript library."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "pydash-7.0.6-py3-none-any.whl", hash = "sha256:10e506935953fde4b0d6fe21a88e17783cd1479256ae96f285b5f89063b4efd6"},
+ {file = "pydash-7.0.6.tar.gz", hash = "sha256:7d9df7e9f36f2bbb08316b609480e7c6468185473a21bdd8e65dda7915565a26"},
+]
+
+[package.dependencies]
+typing-extensions = ">=3.10,<4.6.0 || >4.6.0"
+
+[package.extras]
+dev = ["Sphinx", "black", "build", "coverage", "docformatter", "flake8", "flake8-black", "flake8-bugbear", "flake8-isort", "furo", "importlib-metadata (<5)", "invoke", "isort", "mypy", "pylint", "pytest", "pytest-cov", "pytest-mypy-testing", "sphinx-autodoc-typehints", "tox", "twine", "wheel"]
+
[[package]]
name = "pyflakes"
version = "3.1.0"
@@ -688,4 +705,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
-content-hash = "09eca8816abb57a7e5f16e71bff23fc6fb9830884fe7ab868ffad03e621eb959"
+content-hash = "a61906add154d5e767f518d8af5baa3eee87cf0b4a5e815b38b64771ed294ade"
diff --git a/pyproject.toml b/pyproject.toml
index 241da9a..993dd99 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ isort = "^5.12.0"
requests = "^2.31.0"
fastapi = "^0.108.0"
pyquery = "^2.0.0"
+pydash = "^7.0.6"
[build-system]
requires = ["poetry-core"]