Skip to content

Commit

Permalink
Parsers: Elsevier, OUP, Springer orcids parsing
Browse files Browse the repository at this point in the history
* Adapted tests.
* ref: cern-sis/issues-scoap3#345
  • Loading branch information
ErnestaP committed Jul 17, 2024
1 parent 7bcdcf5 commit f01ecf1
Show file tree
Hide file tree
Showing 8 changed files with 4,762 additions and 203 deletions.
3 changes: 3 additions & 0 deletions dags/elsevier/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def _get_authors(self, article):
def _get_authors_details(self, author_group):
authors = []
for author in author_group.findall("author"):
orcid = author.get("orcid")
surname = extract_text(
article=author, path="surname", field_name="surname", dois=self.dois
)
Expand All @@ -146,6 +147,8 @@ def _get_authors_details(self, author_group):
auth_dict["affiliations"] = affiliations
if emails:
auth_dict["email"] = emails
if orcid:
auth_dict["orcid"] = orcid
authors.append(auth_dict)

if not authors:
Expand Down
19 changes: 10 additions & 9 deletions dags/oup/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def _get_authors(self, article):
)
authors = []
for contribution in contributions:
orcid = get_text_value(contribution.find("contrib-id"))
surname = get_text_value(contribution.find("name/surname"))
given_names = get_text_value(contribution.find("name/given-names"))
email = get_text_value(contribution.find("email"))
Expand All @@ -183,20 +184,20 @@ def _get_authors(self, article):
if country:
country = country.capitalize()
_aff["country"] = country

full_affiliation.append(_aff)

if not all([surname, given_names, email]) and not full_affiliation:
pass
else:
authors.append(
{
"surname": surname,
"given_names": given_names,
"email": email,
"affiliations": full_affiliation,
}
)
author = {
"surname": surname,
"given_names": given_names,
"email": email,
"affiliations": full_affiliation,
}
if orcid:
author.update({"orcid": orcid})
authors.append(author)
return authors

def _get_date(self, article):
Expand Down
4 changes: 2 additions & 2 deletions dags/springer/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
CustomExtractor,
TextExtractor,
)
from common.utils import construct_license
from common.utils import construct_license, clean_text
from structlog import get_logger


Expand Down Expand Up @@ -211,7 +211,7 @@ def _get_affiliations(self, author_group, contrib):
affiliations.append(cleaned_aff)

mapped_affiliations = [
{"value": aff, "organization": org, **({"country": country} if country else {})}
{"value": clean_text(aff), "organization": clean_text(org), **({"country": country} if country else {})}
for aff, org, country, in affiliations
]

Expand Down
Loading

0 comments on commit f01ecf1

Please sign in to comment.