Skip to content

Commit

Permalink
common: fix no surname parsing issue
Browse files Browse the repository at this point in the history
Signed-off-by: pamfilos <[email protected]>
  • Loading branch information
pamfilos committed Dec 11, 2024
1 parent 9980e05 commit 4557afc
Show file tree
Hide file tree
Showing 3 changed files with 1,591 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dags/common/parsing/generic_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def parse_author(author):
if "given_names" in author and author["given_names"]:
author["given_names"] = collapse_initials(author["given_names"])
author["full_name"] = "{0}, {1}".format(
author["surname"], author["given_names"]
author.get("surname", ""), author["given_names"]
)
else:
author["full_name"] = author["surname"]
author["full_name"] = author.get("surname")

return author

Expand Down
1,576 changes: 1,576 additions & 0 deletions tests/units/iop/data/no_author_surname.xml

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions tests/units/iop/test_iop_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,19 @@ def test_date_published(shared_datadir, parser):
assert parsed_article["journal_year"] == 2022


def test_authors_no_surname(shared_datadir, parser):
content = (shared_datadir / "no_author_surname.xml").read_text()
article = ET.fromstring(content)
parsed_article = parser._publisher_specific_parsing(article)
parsed_article = parser._generic_parsing(parsed_article)

no_surname = []
for pa in parsed_article["authors"]:
if not pa.get("surname"):
no_surname.append(pa)
assert len(no_surname) > 0


def test_authors(shared_datadir, parser):
content = (shared_datadir / "all_fields.xml").read_text()
article = ET.fromstring(content)
Expand Down

0 comments on commit 4557afc

Please sign in to comment.