Skip to content

Commit

Permalink
common: add journal mapping
Browse files Browse the repository at this point in the history
* Adds `journal_title` mappings.

* ref: cern-sis/issues-scoap3#308
  • Loading branch information
drjova committed Feb 27, 2024
1 parent 0ac80fc commit 5c811c1
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 1 deletion.
2 changes: 2 additions & 0 deletions dags/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@
WHITE_SPACES = re.compile(r"[\n\t]{1,}" + r"\s{2,}")
CDATA_PATTERN = re.compile(r"<\?CDATA(.*)\?>")
FN_REGEX = re.compile(r"<fn.*<\/fn>")

JOURNAL_MAPPING = {"PLB": "Physics Letters B", "NUPHB": "Nuclear Physics B"}
8 changes: 7 additions & 1 deletion dags/common/parsing/generic_parsing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import re
from datetime import date

from dags.common.constants import JOURNAL_MAPPING


def take_first(arr):
try:
Expand Down Expand Up @@ -82,9 +84,13 @@ def parse_thesis_supervisors(value):
def publication_info(article):
if "publication_info" in article:
return article["publication_info"]

journal_title = article.get("journal_title", "")
journal_title = JOURNAL_MAPPING.get(journal_title, journal_title)

return [
{
"journal_title": article.get("journal_title", ""),
"journal_title": journal_title,
"journal_volume": article.get("journal_volume", ""),
"year": int(article.get("journal_year", 0)) or "",
"journal_issue": article.get("journal_issue", ""),
Expand Down
56 changes: 56 additions & 0 deletions tests/units/common/parsing/test_generic_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,62 @@ def test_parse_thesis_supervisors(test_input, expected):
],
id="Some values populated",
),
param(
{
"journal_title": "NUPHB",
"journal_volume": "Test Value",
"journal_year": "2022",
"journal_issue": "Test Value",
"journal_artid": "",
"journal_fpage": "",
"journal_lpage": "",
"journal_doctype": "",
"pubinfo_freetext": "",
"another_field": "Test Another Field",
},
[
{
"journal_title": "Nuclear Physics B",
"journal_volume": "Test Value",
"year": 2022,
"journal_issue": "Test Value",
"artid": "",
"page_start": "",
"page_end": "",
"material": "",
"pubinfo_freetext": "",
}
],
id="Test journal title NUPHB to Nuclear Physics B",
),
param(
{
"journal_title": "PLB",
"journal_volume": "Test Value",
"journal_year": "2022",
"journal_issue": "Test Value",
"journal_artid": "",
"journal_fpage": "",
"journal_lpage": "",
"journal_doctype": "",
"pubinfo_freetext": "",
"another_field": "Test Another Field",
},
[
{
"journal_title": "Physics Letters B",
"journal_volume": "Test Value",
"year": 2022,
"journal_issue": "Test Value",
"artid": "",
"page_start": "",
"page_end": "",
"material": "",
"pubinfo_freetext": "",
}
],
id="Test journal title PLB to Physics Letters B",
),
param(
{
"journal_title": "Test Value",
Expand Down

0 comments on commit 5c811c1

Please sign in to comment.