Skip to content
This repository has been archived by the owner on Oct 28, 2024. It is now read-only.

Commit

Permalink
Add former ref
Browse files Browse the repository at this point in the history
  • Loading branch information
ahosgood committed Jan 15, 2024
1 parent 31adaa9 commit 2019f90
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
1 change: 1 addition & 0 deletions app/records/schemas/details.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def toJSON(self):
class Record(Details):
type: str = "record"
ref: str | None = None
former_ref: str | None = None
title: str = ""
description: str = ""
date: str = ""
Expand Down
1 change: 1 addition & 0 deletions app/sources/rosetta/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def parse_results(self, raw_results, source_url):
if parsed_data.type() == "record":
record = Record(parsed_data.id())
record.ref = parsed_data.identifier()
record.former_ref = parsed_data.former_identifier()
record.title = parsed_data.title()
record.description = parsed_data.description()
record.date = parsed_data.date_range()
Expand Down
37 changes: 30 additions & 7 deletions app/sources/rosetta/lib/response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ class RosettaSourceParser:
def __init__(self, rosetta_data_source):
self.source = rosetta_data_source

def strip_outside_tags(self, markup, query):
def strip_scope_and_content(self, markup):
document = PyQuery(markup)
return str(document(query).contents().eq(0))
return str(document("span.scopecontent").contents().eq(0))

def strip_scope_and_content(self, markup):
return self.strip_outside_tags(markup, "span.scopecontent")
def strip_wrapper_and_split_span(self, markup):
document = PyQuery(markup)
spans = document("span.wrapper").find("span.emph")
contents = [span.text for span in spans if span.text is not None]
return "<br>".join(contents)

def type(self) -> str:
if "@datatype" in self.source and "base" in self.source["@datatype"]:
Expand Down Expand Up @@ -275,9 +278,14 @@ def description(self) -> str:
None,
):
if "value" in description:
return self.strip_scope_and_content(
description["value"]
) # TODO: Breaks on C17371160
# TODO: Breaks on C17371160
return (
self.strip_scope_and_content(description["value"])
or self.strip_wrapper_and_split_span(
description["value"]
)
or description["value"]
)
elif (
"ephemera" in description
and "value" in description["ephemera"]
Expand Down Expand Up @@ -406,6 +414,21 @@ def identifier(self) -> str:
)
return ""

def former_identifier(self) -> str:
if "identifier" in self.source:
if identifier := next(
(
item["value"]
for item in self.source["identifier"]
if "type" in item
and item["type"] == "former reference (Department)"
and "value" in item
),
None,
):
return identifier
return ""

def reference_number(self) -> str:
if "identifier" in self.source:
if reference_number := next(
Expand Down

0 comments on commit 2019f90

Please sign in to comment.