From e3e2e7c456f7d423968dbf5d2a3588640268eebf Mon Sep 17 00:00:00 2001 From: pamfilos Date: Tue, 26 Mar 2024 10:42:48 +0100 Subject: [PATCH] dags: country updates Signed-off-by: pamfilos --- dags/aps/parser.py | 3 +- dags/common/constants.py | 217 ++++++++ dags/common/countries_mapping.py | 177 ------ dags/common/enhancer.py | 18 +- dags/common/utils.py | 11 +- dags/elsevier/parser.py | 2 - dags/hindawi/parser.py | 2 - dags/oup/parser.py | 7 +- dags/springer/parser.py | 4 +- .../iop/test_iop_dag_process_file.py | 7 + .../oup/test_oup_dag_process_file.py | 32 ++ tests/units/aps/test_aps_parser.py | 12 +- tests/units/elsevier/test_elsevier_parser.py | 512 +++++++++--------- tests/units/hindawi/test_hindawi_enhance.py | 148 +++++ tests/units/hindawi/test_hindawi_parser.py | 16 +- tests/units/springer/test_parser.py | 12 +- 16 files changed, 730 insertions(+), 450 deletions(-) delete mode 100644 dags/common/countries_mapping.py create mode 100644 tests/units/hindawi/test_hindawi_enhance.py diff --git a/dags/aps/parser.py b/dags/aps/parser.py index 8675313b..6d10b10a 100644 --- a/dags/aps/parser.py +++ b/dags/aps/parser.py @@ -2,7 +2,7 @@ from common.parsing.json_extractors import CustomExtractor, NestedValueExtractor from common.parsing.parser import IParser -from common.utils import construct_license, parse_country_from_value +from common.utils import construct_license from inspire_utils.record import get_value from structlog import get_logger @@ -102,7 +102,6 @@ def _get_affiliations(self, article, affiliationIds): { "value": affiliation["name"], "organization": (",").join(affiliation["name"].split(",")[:-1]), - "country": parse_country_from_value(affiliation["name"]), } for affiliation in article["affiliations"] if affiliation["id"] in affiliationIds diff --git a/dags/common/constants.py b/dags/common/constants.py index c4a8dc4e..4b297d65 100644 --- a/dags/common/constants.py +++ b/dags/common/constants.py @@ -1,4 +1,5 @@ import re +from collections import OrderedDict ARXIV_EXTRACTION_PATTERN = re.compile(r"(arxiv:|v[0-9]$)", flags=re.I) NODE_ATTRIBUTE_NOT_FOUND_ERRORS = (AttributeError, TypeError) @@ -14,3 +15,219 @@ FN_REGEX = re.compile(r"") JOURNAL_MAPPING = {"PLB": "Physics Letters B", "NUPHB": "Nuclear Physics B"} + +PARTNERS = [ + "Australia", + "Austria", + "Belgium", + "Canada", + "CERN", + "China", + "Czech Republic", + "Denmark", + "Finland", + "France", + "Germany", + "Greece", + "Hong-Kong", + "Hungary", + "Iceland", + "Israel", + "Italy", + "Japan", + "JINR", + "Mexico", + "Netherlands", + "Norway", + "Poland", + "Portugal", + "Slovak Republic", + "South Africa", + "South Korea", + "Spain", + "Sweden", + "Switzerland", + "Taiwan", + "Turkey", + "United Kingdom", + "United States", +] + +COUNTRIES_DEFAULT_MAPPING = OrderedDict( + [ + ("INFN", "Italy"), + ("Democratic People's Republic of Korea", "North Korea"), + ("DPR Korea", "North Korea"), + ("DPR. Korea", "North Korea"), + ("CERN", "CERN"), + ("European Organization for Nuclear Research", "CERN"), + ("KEK", "Japan"), + ("DESY", "Germany"), + ("FERMILAB", "USA"), + ("FNAL", "USA"), + ("SLACK", "USA"), + ("Stanford Linear Accelerator Center", "USA"), + ("Joint Institute for Nuclear Research", "JINR"), + ("JINR", "JINR"), + ("Northern Cyprus", "Turkey"), + ("North Cyprus", "Turkey"), + ("New Mexico", "USA"), + ("South China Normal University", "China"), + ("Hong Kong China", "Hong Kong"), + ("Hong-Kong China", "Hong Kong"), + ("Hong Kong, China", "Hong Kong"), + ("Hong Kong", "Hong Kong"), + ("Hong-Kong", "Hong Kong"), + ("Algeria", "Algeria"), + ("Argentina", "Argentina"), + ("Armenia", "Armenia"), + ("Australia", "Australia"), + ("Austria", "Austria"), + ("Azerbaijan", "Azerbaijan"), + ("Belarus", "Belarus"), + ("Belgium", "Belgium"), + ("Belgique", "Belgium"), + ("Bangladesh", "Bangladesh"), + ("Brazil", "Brazil"), + ("Brasil", "Brazil"), + ("Benin", "Benin"), + (u"Bénin", "Benin"), + ("Bulgaria", "Bulgaria"), + ("Bosnia and Herzegovina", "Bosnia and Herzegovina"), + ("Canada", "Canada"), + ("Chile", "Chile"), + ("ROC", "Taiwan"), + ("R.O.C", "Taiwan"), + ("Republic of China", "Taiwan"), + ("China (PRC)", "China"), + ("PR China", "China"), + ("China", "China"), + ("People's Republic of China", "China"), + ("Republic of China", "China"), + ("Colombia", "Colombia"), + ("Costa Rica", "Costa Rica"), + ("Cuba", "Cuba"), + ("Croatia", "Croatia"), + ("Cyprus", "Cyprus"), + ("Czech Republic", "Czech Republic"), + ("Czech", "Czech Republic"), + ("Czechia", "Czech Republic"), + ("Denmark", "Denmark"), + ("Egypt", "Egypt"), + ("Estonia", "Estonia"), + ("Ecuador", "Ecuador"), + ("Finland", "Finland"), + ("France", "France"), + ("Germany", "Germany"), + ("Deutschland", "Germany"), + ("Greece", "Greece"), + ("Hungary", "Hungary"), + ("Iceland", "Iceland"), + ("India", "India"), + ("Indonesia", "Indonesia"), + ("Iran", "Iran"), + ("Ireland", "Ireland"), + ("Israel", "Israel"), + ("Italy", "Italy"), + ("Italia", "Italy"), + ("Japan", "Japan"), + ("Jamaica", "Jamaica"), + ("Korea", "South Korea"), + ("Republic of Korea", "South Korea"), + ("South Korea", "South Korea"), + ("Latvia", "Latvia"), + ("Lebanon", "Lebanon"), + ("Lithuania", "Lithuania"), + ("Luxembourg", "Luxembourg"), + ("Macedonia", "Macedonia"), + ("Mexico", "Mexico"), + (u"México", "Mexico"), + ("Monaco", "Monaco"), + ("Montenegro", "Montenegro"), + ("Morocco", "Morocco"), + ("Niger", "Niger"), + ("Nigeria", "Nigeria"), + ("Netherlands", "Netherlands"), + ("The Netherlands", "Netherlands"), + ("New Zealand", "New Zealand"), + ("Zealand", "New Zealand"), + ("Norway", "Norway"), + ("Oman", "Oman"), + ("Sultanate of Oman", "Oman"), + ("Pakistan", "Pakistan"), + ("Panama", "Panama"), + ("Philipines", "Philipines"), + ("Poland", "Poland"), + ("Portugalo", "Portugal"), + ("Portugal", "Portugal"), + ("P.R.China", "China"), + (u"People’s Republic of China", "China"), + ("Republic of Belarus", "Belarus"), + ("Republic of Benin", "Benin"), + ("Republic of Korea", "South Korea"), + ("Republic of San Marino", "San Marino"), + ("Republic of South Africa", "South Africa"), + ("Romania", "Romania"), + ("Russia", "Russia"), + ("Russian Federation", "Russia"), + ("Saudi Arabia", "Saudi Arabia"), + ("Kingdom of Saudi Arabia", "Saudi Arabia"), + ("Arabia", "Saudi Arabia"), + ("Serbia", "Serbia"), + ("Singapore", "Singapore"), + ("Slovak Republic", "Slovakia"), + ("Slovak", "Slovakia"), + ("Slovakia", "Slovakia"), + ("Slovenia", "Slovenia"), + ("South Africa", "South Africa"), + ("Africa", "South Africa"), + (u"España", "Spain"), + ("Spain", "Spain"), + ("Sudan", "Sudan"), + ("Sweden", "Sweden"), + ("Switzerland", "Switzerland"), + ("Syria", "Syria"), + ("Taiwan", "Taiwan"), + ("Thailand", "Thailand"), + ("Tunisia", "Tunisia"), + ("Turkey", "Turkey"), + ("Ukraine", "Ukraine"), + ("United Kingdom", "UK"), + ("Kingdom", "UK"), + ("United Kingdom of Great Britain and Northern Ireland", "UK"), + ("UK", "UK"), + ("England", "UK"), + ("Scotland", "UK"), + ("Wales", "UK"), + ("New South Wales", "Australia"), + ("U.K", "UK"), + ("United States of America", "USA"), + ("United States", "USA"), + ("USA", "USA"), + ("U.S.A", "USA"), + ("America", "USA"), + ("Uruguay", "Uruguay"), + ("Uzbekistan", "Uzbekistan"), + ("Venezuela", "Venezuela"), + ("Vietnam", "Vietnam"), + ("Viet Nam", "Vietnam"), + ("Yemen", "Yemen"), + ("Peru", "Peru"), + ("Kuwait", "Kuwait"), + ("Sri Lanka", "Sri Lanka"), + ("Lanka", "Sri Lanka"), + ("Kazakhstan", "Kazakhstan"), + ("Mongolia", "Mongolia"), + ("United Arab Emirates", "United Arab Emirates"), + ("Emirates", "United Arab Emirates"), + ("Malaysia", "Malaysia"), + ("Qatar", "Qatar"), + ("Kyrgyz Republic", "Kyrgyz Republic"), + ("Jordan", "Jordan"), + ("Belgrade", "Serbia"), + ("Istanbul", "Turkey"), + ("Ankara", "Turkey"), + ("Rome", "Italy"), + ("Georgia", "Georgia"), + ] +) diff --git a/dags/common/countries_mapping.py b/dags/common/countries_mapping.py deleted file mode 100644 index 8c54f02f..00000000 --- a/dags/common/countries_mapping.py +++ /dev/null @@ -1,177 +0,0 @@ -COUNTRIES_DEFAULT_MAPPING = { - "INFN": "Italy", - "Democratic People's Republic of Korea": "North Korea", - "Korea, Democratic People's Republic of": "North Korea", - "Korea, Republic of": "South Korea", - "DPR Korea": "North Korea", - "DPR. Korea": "North Korea", - "CERN": "CERN", - "European Organization for Nuclear Research": "CERN", - "Conseil Européen pour la Recherche Nucléaire": "CERN", - "KEK": "Japan", - "DESY": "Germany", - "FERMILAB": "USA", - "FNAL": "USA", - "SLACK": "USA", - "Stanford Linear Accelerator Center": "USA", - "Joint Institute for Nuclear Research": "JINR", - "JINR": "JINR", - "Northern Cyprus": "Turkey", - "North Cyprus": "Turkey", - "New Mexico": "USA", - "South China Normal University": "China", - "Hong Kong China": "Hong Kong", - "Hong-Kong China": "Hong Kong", - "Hong Kong, China": "Hong Kong", - "Hong Kong": "Hong Kong", - "Hong-Kong": "Hong Kong", - "Algeria": "Algeria", - "Argentina": "Argentina", - "Armenia": "Armenia", - "Australia": "Australia", - "Austria": "Austria", - "Azerbaijan": "Azerbaijan", - "Belarus": "Belarus", - "Belgium": "Belgium", - "Belgique": "Belgium", - "Bangladesh": "Bangladesh", - "Brazil": "Brazil", - "Brasil": "Brazil", - "Benin": "Benin", - "Bulgaria": "Bulgaria", - "Bosnia and Herzegovina": "Bosnia and Herzegovina", - "Canada": "Canada", - "Chile": "Chile", - "ROC": "Taiwan", - "R.O.C": "Taiwan", - "Republic of China": "Taiwan", - "China (PRC)": "China", - "PR China": "China", - "China": "China", - "People's Republic of China": "China", - "Republic of China": "China", - "Colombia": "Colombia", - "Costa Rica": "Costa Rica", - "Cuba": "Cuba", - "Croatia": "Croatia", - "Cyprus": "Cyprus", - "Czech Republic": "Czech Republic", - "Czech": "Czech Republic", - "Czechia": "Czech Republic", - "Denmark": "Denmark", - "Egypt": "Egypt", - "Estonia": "Estonia", - "Ecuador": "Ecuador", - "Finland": "Finland", - "France": "France", - "Germany": "Germany", - "Deutschland": "Germany", - "Greece": "Greece", - "Hungary": "Hungary", - "Iceland": "Iceland", - "India": "India", - "Indonesia": "Indonesia", - "Iran": "Iran", - "Ireland": "Ireland", - "Israel": "Israel", - "Italy": "Italy", - "Italia": "Italy", - "Japan": "Japan", - "Jamaica": "Jamaica", - "Korea": "South Korea", - "Republic of Korea": "South Korea", - "South Korea": "South Korea", - "Latvia": "Latvia", - "Lebanon": "Lebanon", - "Lithuania": "Lithuania", - "Luxembourg": "Luxembourg", - "Macedonia": "Macedonia", - "Mexico": "Mexico", - "Monaco": "Monaco", - "Montenegro": "Montenegro", - "Morocco": "Morocco", - "Niger": "Niger", - "Nigeria": "Nigeria", - "Netherlands": "Netherlands", - "The Netherlands": "Netherlands", - "New Zealand": "New Zealand", - "Zealand": "New Zealand", - "Norway": "Norway", - "Oman": "Oman", - "Sultanate of Oman": "Oman", - "Pakistan": "Pakistan", - "Panama": "Panama", - "Philipines": "Philipines", - "Poland": "Poland", - "Portugalo": "Portugal", - "Portugal": "Portugal", - "P.R.China": "China", - "People’s Republic of China": "China", - "Republic of Belarus": "Belarus", - "Republic of Benin": "Benin", - "Republic of Korea": "South Korea", - "Republic of San Marino": "San Marino", - "Republic of South Africa": "South Africa", - "Romania": "Romania", - "Russia": "Russia", - "Russian Federation": "Russia", - "Saudi Arabia": "Saudi Arabia", - "Kingdom of Saudi Arabia": "Saudi Arabia", - "Arabia": "Saudi Arabia", - "Serbia": "Serbia", - "Singapore": "Singapore", - "Slovak Republic": "Slovakia", - "Slovak": "Slovakia", - "Slovakia": "Slovakia", - "Slovenia": "Slovenia", - "South Africa": "South Africa", - "Africa": "South Africa", - "España": "Spain", - "Spain": "Spain", - "Sudan": "Sudan", - "Sweden": "Sweden", - "Switzerland": "Switzerland", - "Syria": "Syria", - "Taiwan": "Taiwan", - "Thailand": "Thailand", - "Tunisia": "Tunisia", - "Turkey": "Turkey", - "Ukraine": "Ukraine", - "United Kingdom": "UK", - "Kingdom": "UK", - "United Kingdom of Great Britain and Northern Ireland": "UK", - "UK": "UK", - "England": "UK", - "Scotland": "UK", - "Wales": "UK", - "New South Wales": "Australia", - "U.K": "UK", - "United States of America": "USA", - "United States": "USA", - "USA": "USA", - "U.S.A": "USA", - "America": "USA", - "Uruguay": "Uruguay", - "Uzbekistan": "Uzbekistan", - "Venezuela": "Venezuela", - "Vietnam": "Vietnam", - "Viet Nam": "Vietnam", - "Yemen": "Yemen", - "Peru": "Peru", - "Kuwait": "Kuwait", - "Sri Lanka": "Sri Lanka", - "Lanka": "Sri Lanka", - "Kazakhstan": "Kazakhstan", - "Mongolia": "Mongolia", - "United Arab Emirates": "United Arab Emirates", - "Emirates": "United Arab Emirates", - "Malaysia": "Malaysia", - "Qatar": "Qatar", - "Kyrgyz Republic": "Kyrgyz Republic", - "Jordan": "Jordan", - "Belgrade": "Serbia", - "Istanbul": "Turkey", - "Ankara": "Turkey", - "Rome": "Italy", - "Georgia": "Georgia", -} diff --git a/dags/common/enhancer.py b/dags/common/enhancer.py index 3b58f138..2c2979af 100644 --- a/dags/common/enhancer.py +++ b/dags/common/enhancer.py @@ -2,6 +2,7 @@ import re from common.constants import FN_REGEX +from common.utils import parse_country_from_value, get_country_ISO_name class Enhancer: @@ -43,17 +44,28 @@ def __construct_titles(self, item, publisher): } ] - def __remove_country(self, item): + def __construct_authors(self, item): + # add_nations(item) pattern_for_cern_cooperation_agreement = re.compile( - r"cooperation agreement with cern", re.IGNORECASE + r'cooperation agreement with cern', re.IGNORECASE ) for author in item.get("authors", []): for affiliation in author.get("affiliations", []): + # Remove country, on special string 'cooperation agreement with cern' match_pattern = pattern_for_cern_cooperation_agreement.search( affiliation.get("value", "") ) if match_pattern: affiliation.pop("country", None) + continue + + if not affiliation.get("country"): + affiliation["country"] = parse_country_from_value(affiliation.get("value")) + + affiliation["country"] = get_country_ISO_name(affiliation["country"]) + + return item + def __call__(self, publisher, item): creation_date = datetime.datetime.now().isoformat() @@ -64,5 +76,5 @@ def __call__(self, publisher, item): self.__construct_imprints(item_copy, publisher) self.__construct_record_creation_date(item_copy, creation_date) self.__construct_titles(item_copy, publisher) - self.__remove_country(item) + self.__construct_authors(item_copy) return item_copy diff --git a/dags/common/utils.py b/dags/common/utils.py index 0d7fee5d..ada54514 100644 --- a/dags/common/utils.py +++ b/dags/common/utils.py @@ -21,7 +21,7 @@ CREATIVE_COMMONS_PATTERN, LICENSE_PATTERN, ) -from common.countries_mapping import COUNTRIES_DEFAULT_MAPPING +from common.constants import COUNTRIES_DEFAULT_MAPPING from common.exceptions import ( FoundMoreThanOneMatchOrNone, UnknownFileExtension, @@ -288,3 +288,12 @@ def find_country_match_from_mapping(affiliation_value): for key in COUNTRIES_DEFAULT_MAPPING: if re.search(r"\b%s\b" % key, affiliation_value, flags=re.IGNORECASE): return COUNTRIES_DEFAULT_MAPPING[key] + +def get_country_ISO_name(country): + if COUNTRIES_DEFAULT_MAPPING[country]: + return COUNTRIES_DEFAULT_MAPPING[country] + countries = pycountry.countries.search_fuzzy(country) + if len(countries) > 1 or len(countries) == 0: + return country + else: + return countries[0].name \ No newline at end of file diff --git a/dags/elsevier/parser.py b/dags/elsevier/parser.py index b946584c..6a2850b1 100644 --- a/dags/elsevier/parser.py +++ b/dags/elsevier/parser.py @@ -183,7 +183,6 @@ def _get_affiliation(self, article, ref_id="", affiliations=[]): field_name="country", dois=self.dois, ) - country = country and parse_country_from_value(country) if affiliation_value and organization and country: affiliations.append( { @@ -203,7 +202,6 @@ def _get_affiliation(self, article, ref_id="", affiliations=[]): affiliations.append( { "value": affiliation_value, - "country": parse_country_from_value(affiliation_value), } ) diff --git a/dags/hindawi/parser.py b/dags/hindawi/parser.py index 768ba749..ccd29bfb 100644 --- a/dags/hindawi/parser.py +++ b/dags/hindawi/parser.py @@ -3,7 +3,6 @@ from common.constants import ORGANIZATION_PARSING_PATTERN from common.parsing.parser import IParser from common.parsing.xml_extractors import ConstantExtractor, CustomExtractor -from common.utils import parse_country_from_value from hindawi.xml_extractors import HindawiTextExtractor as TextExtractor from structlog import get_logger @@ -121,7 +120,6 @@ def _get_affiliations(self, author): { "value": affiliation.text, "organization": ORGANIZATION_PARSING_PATTERN.sub("", affiliation.text), - "country": parse_country_from_value(affiliation.text), } for affiliation in affiliations ] diff --git a/dags/oup/parser.py b/dags/oup/parser.py index a79e4036..d477b827 100644 --- a/dags/oup/parser.py +++ b/dags/oup/parser.py @@ -179,11 +179,12 @@ def _get_authors(self, article): "institution", ) ) + _aff = {"institution": institution} if country: country = country.capitalize() - full_affiliation.append( - {"institution": institution, "country": country} - ) + _aff["country"] = country + + full_affiliation.append(_aff) if not all([surname, given_names, email]) and not full_affiliation: pass diff --git a/dags/springer/parser.py b/dags/springer/parser.py index a548f8bb..fc013dab 100644 --- a/dags/springer/parser.py +++ b/dags/springer/parser.py @@ -179,7 +179,7 @@ def _clean_aff(self, article): ] if node is not None ] - country = parse_country_from_value(country_node.text) + country = country_node.text result.append(country) return ", ".join(result), org_name_node.text, country @@ -211,7 +211,7 @@ def _get_affiliations(self, author_group, contrib): affiliations.append(cleaned_aff) mapped_affiliations = [ - {"value": aff, "organization": org, "country": country} + {"value": aff, "organization": org, **({"country": country} if country else {})} for aff, org, country, in affiliations ] diff --git a/tests/integration/iop/test_iop_dag_process_file.py b/tests/integration/iop/test_iop_dag_process_file.py index d6fe1911..6b28be43 100644 --- a/tests/integration/iop/test_iop_dag_process_file.py +++ b/tests/integration/iop/test_iop_dag_process_file.py @@ -50,6 +50,13 @@ def test_dag_loaded(dag): assert len(dag.tasks) == 5 +def test_affiliation_countries_in_enriched(article): + authors = article.get("authors", []) + for author in authors: + for aff in author.get("affiliations"): + assert aff.get("country") is not None + + publisher = "IOP" generic_pseudo_parser_output = { diff --git a/tests/integration/oup/test_oup_dag_process_file.py b/tests/integration/oup/test_oup_dag_process_file.py index 6c642f23..5405f702 100644 --- a/tests/integration/oup/test_oup_dag_process_file.py +++ b/tests/integration/oup/test_oup_dag_process_file.py @@ -23,6 +23,27 @@ def parser(): return OUPParser() +@fixture +def articles(parser): + data_dir = "./data/oup/" + test_file = "2022-09-22_00:30:02_ptep_iss_2022_9.xml.zip" + + def extract_zip_to_article(zip_filename): + with ZipFile(zip_filename, "r") as zip_file: + xmls = [ + file.filename for file in zip_file.filelist if ".xml" in file.filename + ] + xmls_content = [ + parse_without_names_spaces(zip_file.read(xml).decode("utf-8")) + for xml in xmls + ] + return xmls_content + + articles = extract_zip_to_article(data_dir + test_file) + + return articles + + @fixture def article(parser): data_dir = "./data/oup/" @@ -47,6 +68,17 @@ def extract_zip_to_article(zip_filename): return enriched_file +def test_affiliation_countries_in_enriched(parser, articles): + for article in articles: + parsed_file = parser.parse(article) + enhanced_file = oup_enhance_file(parsed_file) + enriched_file = oup_enrich_file(enhanced_file) + + authors = enriched_file.get("authors", []) + for author in authors: + for aff in author.get("affiliations"): + assert aff.get("country") is not None + def test_dag_loaded(dag): assert dag assert len(dag.tasks) == 5 diff --git a/tests/units/aps/test_aps_parser.py b/tests/units/aps/test_aps_parser.py index 53d066cc..ffdb265c 100644 --- a/tests/units/aps/test_aps_parser.py +++ b/tests/units/aps/test_aps_parser.py @@ -63,7 +63,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of Oregon, Eugene, Oregon 97403, USA", "organization": "Department of Physics, University of Oregon, Eugene, Oregon 97403", - "country": "USA", + # "country": "USA", } ], }, @@ -75,7 +75,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of Oregon, Eugene, Oregon 97403, USA", "organization": "Department of Physics, University of Oregon, Eugene, Oregon 97403", - "country": "USA", + # "country": "USA", } ], }, @@ -87,7 +87,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of Oregon, Eugene, Oregon 97403, USA", "organization": "Department of Physics, University of Oregon, Eugene, Oregon 97403", - "country": "USA", + # "country": "USA", } ], }, @@ -101,7 +101,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of Toronto, Toronto, Ontario, Canada M5S1A7", "organization": "Department of Physics, University of Toronto, Toronto, Ontario", - "country": "Canada", + # "country": "Canada", } ], }, @@ -113,7 +113,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of Toronto, Toronto, Ontario, Canada M5S1A7", "organization": "Department of Physics, University of Toronto, Toronto, Ontario", - "country": "Canada", + # "country": "Canada", } ], }, @@ -125,7 +125,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of Toronto, Toronto, Ontario, Canada M5S1A7", "organization": "Department of Physics, University of Toronto, Toronto, Ontario", - "country": "Canada", + # "country": "Canada", } ], }, diff --git a/tests/units/elsevier/test_elsevier_parser.py b/tests/units/elsevier/test_elsevier_parser.py index 0eab6547..3f3ad8b7 100644 --- a/tests/units/elsevier/test_elsevier_parser.py +++ b/tests/units/elsevier/test_elsevier_parser.py @@ -1,6 +1,7 @@ from common.utils import parse_without_names_spaces from elsevier.parser import ElsevierParser from pytest import fixture, mark, param +from common.enhancer import Enhancer @fixture(scope="module") @@ -27,6 +28,10 @@ def articles(shared_datadir): def parsed_articles(parser, articles): return [parser._publisher_specific_parsing(article) for article in articles] +@fixture() +def enhanced_articles(parser, parsed_articles): + return [Enhancer()("Elsevier", parser._publisher_specific_parsing(article)) for article in parsed_articles] + @mark.parametrize( "expected, key", @@ -231,7 +236,7 @@ def parsed_articles(parser, articles): { "value": "Korea Institute of Science and Technology Information, Daejeon, Republic of Korea", "organization": "Korea Institute of Science and Technology Information", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -242,7 +247,7 @@ def parsed_articles(parser, articles): { "value": "Faculty of Science, P.J. Šafárik University, Košice, Slovak Republic", "organization": "Faculty of Science", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -252,7 +257,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -273,7 +278,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -350,7 +355,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -404,7 +409,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -492,7 +497,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -558,7 +563,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -624,7 +629,7 @@ def parsed_articles(parser, articles): { "value": "Gangneung-Wonju National University, Gangneung, Republic of Korea", "organization": "Gangneung-Wonju National University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -778,7 +783,7 @@ def parsed_articles(parser, articles): { "value": "Nuclear Physics Group, STFC Daresbury Laboratory, Daresbury, United Kingdom", "organization": "Nuclear Physics Group", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -811,7 +816,7 @@ def parsed_articles(parser, articles): { "value": "University of Liverpool, Liverpool, United Kingdom", "organization": "University of Liverpool", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -898,7 +903,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -942,7 +947,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -1008,7 +1013,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -1018,7 +1023,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1028,7 +1033,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1076,7 +1081,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1274,7 +1279,7 @@ def parsed_articles(parser, articles): { "value": "The University of Texas at Austin, Austin, TX, United States", "organization": "The University of Texas at Austin", - "country": "USA", + "country": "United States", } ], }, @@ -1284,7 +1289,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1344,7 +1349,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -1365,7 +1370,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1387,7 +1392,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -1408,7 +1413,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1419,7 +1424,7 @@ def parsed_articles(parser, articles): { "value": "Faculty of Science, P.J. Šafárik University, Košice, Slovak Republic", "organization": "Faculty of Science", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -1467,7 +1472,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1478,7 +1483,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -1560,7 +1565,7 @@ def parsed_articles(parser, articles): { "value": "University of Liverpool, Liverpool, United Kingdom", "organization": "University of Liverpool", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -1570,7 +1575,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -1652,7 +1657,7 @@ def parsed_articles(parser, articles): { "value": "University of Kansas, Lawrence, KS, United States", "organization": "University of Kansas", - "country": "USA", + "country": "United States", } ], }, @@ -1690,7 +1695,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -1826,7 +1831,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -1837,7 +1842,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -1881,7 +1886,7 @@ def parsed_articles(parser, articles): { "value": "University of Liverpool, Liverpool, United Kingdom", "organization": "University of Liverpool", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -1991,7 +1996,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -2002,7 +2007,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -2145,7 +2150,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -2172,7 +2177,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -2260,7 +2265,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -2336,7 +2341,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -2369,7 +2374,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -2633,7 +2638,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of California, Berkeley, CA, United States", "organization": "Department of Physics", - "country": "USA", + "country": "United States", } ], }, @@ -2665,7 +2670,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" }, { "value": "Centro de Aplicaciones Tecnológicas y Desarrollo Nuclear (CEADEN), Havana, Cuba", @@ -2719,7 +2724,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of California, Berkeley, CA, United States", "organization": "Department of Physics", - "country": "USA", + "country": "United States", } ], }, @@ -2740,7 +2745,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -2828,7 +2833,7 @@ def parsed_articles(parser, articles): { "value": "Technical University of Košice, Košice, Slovak Republic", "organization": "Technical University of Košice", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -2839,7 +2844,7 @@ def parsed_articles(parser, articles): { "value": "Comenius University Bratislava, Faculty of Mathematics, Physics and Informatics, Bratislava, Slovak Republic", "organization": "Comenius University Bratislava", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -2861,7 +2866,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -2971,7 +2976,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -2981,7 +2986,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -3036,7 +3041,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -3058,7 +3063,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -3090,7 +3095,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -3177,7 +3182,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -3210,7 +3215,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -3221,7 +3226,7 @@ def parsed_articles(parser, articles): { "value": "The University of Texas at Austin, Austin, TX, United States", "organization": "The University of Texas at Austin", - "country": "USA", + "country": "United States", } ], }, @@ -3253,7 +3258,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -3318,7 +3323,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -3417,7 +3422,7 @@ def parsed_articles(parser, articles): { "value": "Chicago State University, Chicago, IL, United States", "organization": "Chicago State University", - "country": "USA", + "country": "United States", } ], }, @@ -3472,7 +3477,7 @@ def parsed_articles(parser, articles): { "value": "The University of Texas at Austin, Austin, TX, United States", "organization": "The University of Texas at Austin", - "country": "USA", + "country": "United States", } ], }, @@ -3483,7 +3488,7 @@ def parsed_articles(parser, articles): { "value": "University of Kansas, Lawrence, KS, United States", "organization": "University of Kansas", - "country": "USA", + "country": "United States", } ], }, @@ -3598,7 +3603,7 @@ def parsed_articles(parser, articles): { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", } ], }, @@ -3620,7 +3625,7 @@ def parsed_articles(parser, articles): { "value": "Wayne State University, Detroit, MI, United States", "organization": "Wayne State University", - "country": "USA", + "country": "United States", } ], }, @@ -3719,7 +3724,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -3751,7 +3756,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -3761,7 +3766,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" }, { "value": "A.I. Alikhanyan National Science Laboratory (Yerevan Physics Institute) Foundation, Yerevan, Armenia", @@ -3975,7 +3980,7 @@ def parsed_articles(parser, articles): { "value": "Yonsei University, Seoul, Republic of Korea", "organization": "Yonsei University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -3986,7 +3991,7 @@ def parsed_articles(parser, articles): { "value": "The University of Texas at Austin, Austin, TX, United States", "organization": "The University of Texas at Austin", - "country": "USA", + "country": "United States", } ], }, @@ -4019,7 +4024,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -4030,7 +4035,7 @@ def parsed_articles(parser, articles): { "value": "Chicago State University, Chicago, IL, United States", "organization": "Chicago State University", - "country": "USA", + "country": "United States", } ], }, @@ -4052,7 +4057,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -4085,7 +4090,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -4195,7 +4200,7 @@ def parsed_articles(parser, articles): { "value": "University of Liverpool, Liverpool, United Kingdom", "organization": "University of Liverpool", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -4250,7 +4255,7 @@ def parsed_articles(parser, articles): { "value": "Yonsei University, Seoul, Republic of Korea", "organization": "Yonsei University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -4283,7 +4288,7 @@ def parsed_articles(parser, articles): { "value": "Creighton University, Omaha, NE, United States", "organization": "Creighton University", - "country": "USA", + "country": "United States", } ], }, @@ -4316,7 +4321,7 @@ def parsed_articles(parser, articles): { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", } ], }, @@ -4360,7 +4365,7 @@ def parsed_articles(parser, articles): { "value": "Ohio State University, Columbus, OH, United States", "organization": "Ohio State University", - "country": "USA", + "country": "United States", } ], }, @@ -4382,7 +4387,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -4404,7 +4409,7 @@ def parsed_articles(parser, articles): { "value": "University of Liverpool, Liverpool, United Kingdom", "organization": "University of Liverpool", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -4414,7 +4419,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4457,7 +4462,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4479,7 +4484,7 @@ def parsed_articles(parser, articles): { "value": "University of Kansas, Lawrence, KS, United States", "organization": "University of Kansas", - "country": "USA", + "country": "United States", } ], }, @@ -4511,7 +4516,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4521,7 +4526,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4543,7 +4548,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -4565,7 +4570,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -4576,7 +4581,7 @@ def parsed_articles(parser, articles): { "value": "Technical University of Košice, Košice, Slovak Republic", "organization": "Technical University of Košice", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -4587,7 +4592,7 @@ def parsed_articles(parser, articles): { "value": "Technical University of Košice, Košice, Slovak Republic", "organization": "Technical University of Košice", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -4653,7 +4658,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -4675,7 +4680,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", }, { "value": "Westfälische Wilhelms-Universität Münster, Institut für Kernphysik, Münster, Germany", @@ -4691,7 +4696,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -4751,7 +4756,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -4789,7 +4794,7 @@ def parsed_articles(parser, articles): { "value": "Institute of Experimental Physics, Slovak Academy of Sciences, Košice, Slovak Republic", "organization": "Institute of Experimental Physics", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -4821,7 +4826,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4853,7 +4858,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4863,7 +4868,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4884,7 +4889,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4905,7 +4910,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -4992,7 +4997,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5002,7 +5007,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5046,7 +5051,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, Pusan National University, Pusan, Republic of Korea", "organization": "Department of Physics", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5057,7 +5062,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, Pusan National University, Pusan, Republic of Korea", "organization": "Department of Physics", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5079,7 +5084,7 @@ def parsed_articles(parser, articles): { "value": "Jeonbuk National University, Jeonju, Republic of Korea", "organization": "Jeonbuk National University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5090,7 +5095,7 @@ def parsed_articles(parser, articles): { "value": "Yonsei University, Seoul, Republic of Korea", "organization": "Yonsei University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5101,7 +5106,7 @@ def parsed_articles(parser, articles): { "value": "Gangneung-Wonju National University, Gangneung, Republic of Korea", "organization": "Gangneung-Wonju National University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5123,7 +5128,7 @@ def parsed_articles(parser, articles): { "value": "Jeonbuk National University, Jeonju, Republic of Korea", "organization": "Jeonbuk National University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5145,7 +5150,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, Sejong University, Seoul, Republic of Korea", "organization": "Department of Physics", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5156,7 +5161,7 @@ def parsed_articles(parser, articles): { "value": "Yonsei University, Seoul, Republic of Korea", "organization": "Yonsei University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5188,7 +5193,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5221,7 +5226,7 @@ def parsed_articles(parser, articles): { "value": "California Polytechnic State University, San Luis Obispo, CA, United States", "organization": "California Polytechnic State University", - "country": "USA", + "country": "United States", } ], }, @@ -5243,7 +5248,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -5298,7 +5303,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -5330,7 +5335,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -5340,7 +5345,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5350,7 +5355,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5437,7 +5442,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5459,7 +5464,7 @@ def parsed_articles(parser, articles): { "value": "Institute of Experimental Physics, Slovak Academy of Sciences, Košice, Slovak Republic", "organization": "Institute of Experimental Physics", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -5470,7 +5475,7 @@ def parsed_articles(parser, articles): { "value": "Faculty of Science, P.J. Šafárik University, Košice, Slovak Republic", "organization": "Faculty of Science", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -5492,12 +5497,12 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", }, { "value": "Institute of Experimental Physics, Slovak Academy of Sciences, Košice, Slovak Republic", "organization": "Institute of Experimental Physics", - "country": "Slovakia", + "country": "Slovak Republic", }, ], }, @@ -5562,7 +5567,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5682,7 +5687,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5692,7 +5697,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5714,7 +5719,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -5725,7 +5730,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5736,7 +5741,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5747,7 +5752,7 @@ def parsed_articles(parser, articles): { "value": "Yonsei University, Seoul, Republic of Korea", "organization": "Yonsei University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -5780,7 +5785,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -5872,7 +5877,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -5910,7 +5915,7 @@ def parsed_articles(parser, articles): { "value": "Nuclear Physics Group, STFC Daresbury Laboratory, Daresbury, United Kingdom", "organization": "Nuclear Physics Group", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -5943,7 +5948,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of California, Berkeley, CA, United States", "organization": "Department of Physics", - "country": "USA", + "country": "United States", } ], }, @@ -6009,7 +6014,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -6020,7 +6025,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, Pusan National University, Pusan, Republic of Korea", "organization": "Department of Physics", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -6031,7 +6036,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, Pusan National University, Pusan, Republic of Korea", "organization": "Department of Physics", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -6075,7 +6080,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of California, Berkeley, CA, United States", "organization": "Department of Physics", - "country": "USA", + "country": "United States", } ], }, @@ -6097,7 +6102,7 @@ def parsed_articles(parser, articles): { "value": "University of Liverpool, Liverpool, United Kingdom", "organization": "University of Liverpool", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -6118,7 +6123,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -6129,7 +6134,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -6243,7 +6248,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -6286,7 +6291,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -6329,7 +6334,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -6339,7 +6344,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -6387,7 +6392,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -6464,7 +6469,7 @@ def parsed_articles(parser, articles): { "value": "The University of Texas at Austin, Austin, TX, United States", "organization": "The University of Texas at Austin", - "country": "USA", + "country": "United States", } ], }, @@ -6508,7 +6513,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -6710,7 +6715,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -6748,7 +6753,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -6759,7 +6764,7 @@ def parsed_articles(parser, articles): { "value": "Comenius University Bratislava, Faculty of Mathematics, Physics and Informatics, Bratislava, Slovak Republic", "organization": "Comenius University Bratislava", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -6829,11 +6834,11 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" }, { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", }, ], }, @@ -6953,7 +6958,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7019,7 +7024,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -7096,7 +7101,7 @@ def parsed_articles(parser, articles): { "value": "Institute of Experimental Physics, Slovak Academy of Sciences, Košice, Slovak Republic", "organization": "Institute of Experimental Physics", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -7195,7 +7200,7 @@ def parsed_articles(parser, articles): { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", } ], }, @@ -7260,7 +7265,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7292,7 +7297,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7302,7 +7307,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7312,7 +7317,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7334,7 +7339,7 @@ def parsed_articles(parser, articles): { "value": "Chungbuk National University, Cheongju, Republic of Korea", "organization": "Chungbuk National University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -7344,7 +7349,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -7355,7 +7360,7 @@ def parsed_articles(parser, articles): { "value": "University of Liverpool, Liverpool, United Kingdom", "organization": "University of Liverpool", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -7387,7 +7392,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7430,7 +7435,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7452,7 +7457,7 @@ def parsed_articles(parser, articles): { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", } ], }, @@ -7463,7 +7468,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -7622,7 +7627,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -7649,7 +7654,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -7736,7 +7741,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7768,7 +7773,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7789,7 +7794,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -7838,7 +7843,7 @@ def parsed_articles(parser, articles): { "value": "Comenius University Bratislava, Faculty of Mathematics, Physics and Informatics, Bratislava, Slovak Republic", "organization": "Comenius University Bratislava", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -7876,7 +7881,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -7914,7 +7919,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -7947,7 +7952,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -8002,7 +8007,7 @@ def parsed_articles(parser, articles): { "value": "Lawrence Berkeley National Laboratory, Berkeley, CA, United States", "organization": "Lawrence Berkeley National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -8012,7 +8017,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -8067,7 +8072,7 @@ def parsed_articles(parser, articles): { "value": "Wayne State University, Detroit, MI, United States", "organization": "Wayne State University", - "country": "USA", + "country": "United States", } ], }, @@ -8077,7 +8082,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -8121,7 +8126,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -8132,7 +8137,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -8258,12 +8263,12 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", }, { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", }, ], }, @@ -8340,7 +8345,7 @@ def parsed_articles(parser, articles): { "value": "Faculty of Science, P.J. Šafárik University, Košice, Slovak Republic", "organization": "Faculty of Science", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -8361,7 +8366,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -8371,7 +8376,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -8469,7 +8474,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -8479,7 +8484,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -8681,7 +8686,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -8724,7 +8729,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -8734,7 +8739,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -8799,7 +8804,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -8931,7 +8936,7 @@ def parsed_articles(parser, articles): { "value": "Faculty of Science, P.J. Šafárik University, Košice, Slovak Republic", "organization": "Faculty of Science", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -8986,7 +8991,7 @@ def parsed_articles(parser, articles): { "value": "Wayne State University, Detroit, MI, United States", "organization": "Wayne State University", - "country": "USA", + "country": "United States", } ], }, @@ -9041,7 +9046,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -9052,7 +9057,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", } ], }, @@ -9151,7 +9156,7 @@ def parsed_articles(parser, articles): { "value": "Oak Ridge National Laboratory, Oak Ridge, TN, United States", "organization": "Oak Ridge National Laboratory", - "country": "USA", + "country": "United States", }, { "value": "Institut für Kernphysik, Johann Wolfgang Goethe-Universität Frankfurt, Frankfurt, Germany", @@ -9167,7 +9172,7 @@ def parsed_articles(parser, articles): { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", } ], }, @@ -9244,7 +9249,7 @@ def parsed_articles(parser, articles): { "value": "Creighton University, Omaha, NE, United States", "organization": "Creighton University", - "country": "USA", + "country": "United States", } ], }, @@ -9281,7 +9286,7 @@ def parsed_articles(parser, articles): }, { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", }, ], }, @@ -9303,7 +9308,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -9313,7 +9318,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -9356,7 +9361,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -9399,7 +9404,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -9498,7 +9503,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -9530,7 +9535,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -9551,7 +9556,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -9727,7 +9732,7 @@ def parsed_articles(parser, articles): { "value": "Comenius University Bratislava, Faculty of Mathematics, Physics and Informatics, Bratislava, Slovak Republic", "organization": "Comenius University Bratislava", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -9787,7 +9792,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -9831,7 +9836,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -9864,7 +9869,7 @@ def parsed_articles(parser, articles): { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", } ], }, @@ -9930,7 +9935,7 @@ def parsed_articles(parser, articles): { "value": "University of Tennessee, Knoxville, TN, United States", "organization": "University of Tennessee", - "country": "USA", + "country": "United States", } ], }, @@ -10039,7 +10044,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -10094,7 +10099,7 @@ def parsed_articles(parser, articles): { "value": "Comenius University Bratislava, Faculty of Mathematics, Physics and Informatics, Bratislava, Slovak Republic", "organization": "Comenius University Bratislava", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -10105,7 +10110,7 @@ def parsed_articles(parser, articles): { "value": "Comenius University Bratislava, Faculty of Mathematics, Physics and Informatics, Bratislava, Slovak Republic", "organization": "Comenius University Bratislava", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -10203,7 +10208,7 @@ def parsed_articles(parser, articles): { "value": "University of Kansas, Lawrence, KS, United States", "organization": "University of Kansas", - "country": "USA", + "country": "United States", } ], }, @@ -10280,7 +10285,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -10313,7 +10318,7 @@ def parsed_articles(parser, articles): { "value": "The University of Texas at Austin, Austin, TX, United States", "organization": "The University of Texas at Austin", - "country": "USA", + "country": "United States", } ], }, @@ -10334,7 +10339,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -10345,7 +10350,7 @@ def parsed_articles(parser, articles): { "value": "University of Houston, Houston, TX, United States", "organization": "University of Houston", - "country": "USA", + "country": "United States", } ], }, @@ -10356,7 +10361,7 @@ def parsed_articles(parser, articles): { "value": "Technical University of Košice, Košice, Slovak Republic", "organization": "Technical University of Košice", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -10367,7 +10372,7 @@ def parsed_articles(parser, articles): { "value": "Technical University of Košice, Košice, Slovak Republic", "organization": "Technical University of Košice", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -10388,7 +10393,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -10410,7 +10415,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, University of California, Berkeley, CA, United States", "organization": "Department of Physics", - "country": "USA", + "country": "United States", } ], }, @@ -10623,7 +10628,7 @@ def parsed_articles(parser, articles): { "value": "Faculty of Science, P.J. Šafárik University, Košice, Slovak Republic", "organization": "Faculty of Science", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -10754,7 +10759,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -10775,7 +10780,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -10863,7 +10868,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", } ], }, @@ -10884,7 +10889,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -10916,7 +10921,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an international laboratory covered by a cooperation agreement with CERN", - "country": "CERN" + # "country": "CERN" } ], }, @@ -10948,7 +10953,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -10959,7 +10964,7 @@ def parsed_articles(parser, articles): { "value": "Wayne State University, Detroit, MI, United States", "organization": "Wayne State University", - "country": "USA", + "country": "United States", } ], }, @@ -11002,7 +11007,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -11013,7 +11018,7 @@ def parsed_articles(parser, articles): { "value": "Faculty of Science, P.J. Šafárik University, Košice, Slovak Republic", "organization": "Faculty of Science", - "country": "Slovakia", + "country": "Slovak Republic", } ], }, @@ -11112,7 +11117,7 @@ def parsed_articles(parser, articles): { "value": "Yale University, New Haven, CT, United States", "organization": "Yale University", - "country": "USA", + "country": "United States", } ], }, @@ -11200,7 +11205,7 @@ def parsed_articles(parser, articles): { "value": "The University of Texas at Austin, Austin, TX, United States", "organization": "The University of Texas at Austin", - "country": "USA", + "country": "United States", } ], }, @@ -11321,7 +11326,7 @@ def parsed_articles(parser, articles): { "value": "Department of Physics, Pusan National University, Pusan, Republic of Korea", "organization": "Department of Physics", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -11332,7 +11337,7 @@ def parsed_articles(parser, articles): { "value": "Inha University, Incheon, Republic of Korea", "organization": "Inha University", - "country": "South Korea", + "country": "Republic of Korea", } ], }, @@ -11414,7 +11419,7 @@ def parsed_articles(parser, articles): { "value": "School of Physics and Astronomy, University of Birmingham, Birmingham, United Kingdom", "organization": "School of Physics and Astronomy", - "country": "UK", + "country": "United Kingdom", }, ], }, @@ -11424,7 +11429,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -11445,7 +11450,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -11455,7 +11460,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -11520,7 +11525,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -11541,7 +11546,7 @@ def parsed_articles(parser, articles): "affiliations": [ { "value": "Affiliated with an institute covered by a cooperation agreement with CERN", - "country": "CERN", + # "country": "CERN", } ], }, @@ -11692,3 +11697,26 @@ def parsed_articles(parser, articles): def test_elsevier_parsing(parsed_articles, expected, key): for (expected_value, article) in zip(expected, parsed_articles): assert article[key] == expected_value + + SKIP_ENHANCE_FOR = [ + "abstract", + "title", + "copyright_holder", + "copyright_year", + "copyright_statement", + ] + + if key not in SKIP_ENHANCE_FOR: + if key is "authors": + for author in expected_value: + for aff in author.get("affiliations", []): + if aff.get("country") is "Republic of Korea": + aff["country"] = "South Korea" + if aff.get("country") is "Slovak Republic": + aff["country"] = "Slovakia" + if aff.get("country") is "United States": + aff["country"] = "USA" + if aff.get("country") is "United Kingdom": + aff["country"] = "UK" + + assert Enhancer()("Elsevier", article)[key] == expected_value diff --git a/tests/units/hindawi/test_hindawi_enhance.py b/tests/units/hindawi/test_hindawi_enhance.py new file mode 100644 index 00000000..623925a5 --- /dev/null +++ b/tests/units/hindawi/test_hindawi_enhance.py @@ -0,0 +1,148 @@ +import xml.etree.ElementTree as ET + +import pytest +from common.parsing.xml_extractors import RequiredFieldNotFoundExtractionError +from hindawi.hindawi_file_processing import ( + enhance_hindawi, +) +from hindawi.parser import HindawiParser + + +@pytest.fixture(scope="module") +def hindawi_parser(): + return HindawiParser() + + +@pytest.fixture +def articles(shared_datadir): + articles = [] + files = ["example1.xml", "example2.xml", "example4.xml"] + + for file in files: + with open(shared_datadir / file) as file: + articles.append(ET.fromstring(file.read())) + return articles + + +@pytest.fixture +def parsed_articles(hindawi_parser, articles): + return [hindawi_parser._publisher_specific_parsing(article) for article in articles] + + +@pytest.mark.parametrize( + "expected, key", + [ + pytest.param( + [ + ["10.1155/2019/3465159"], + ["10.1155/2022/5287693"], + ["10.1155/2022/2755821"], + ], + "dois", + id="test_dois", + ), + pytest.param( + [ + [ + { + "raw_name": "Entem, David R.", + "affiliations": [ + { + "value": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca, Spain", + "organization": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca", + "country": "Spain", + } + ], + "orcid": "ORCID-0000-0003-2376-6255", + }, + { + "raw_name": "Ortega, Pablo G.", + "affiliations": [ + { + "value": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca, Spain", + "organization": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca", + "country": "Spain", + } + ], + }, + { + "raw_name": "Fernández, Francisco", + "affiliations": [ + { + "value": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca, Spain", + "organization": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca", + "country": "Spain", + } + ], + }, + ], + [ + { + "raw_name": "Li, Ying", + "affiliations": [ + { + "value": "Department of Physics, Yantai University, Yantai 264005, China", + "organization": "Department of Physics, Yantai University, Yantai 264005", + "country": "China", + } + ], + "orcid": "https://orcid.org/0000-0002-1337-7662", + }, + { + "raw_name": "Liu, Wen-Feng", + "affiliations": [ + { + "value": "Department of Physics, Yantai University, Yantai 264005, China", + "organization": "Department of Physics, Yantai University, Yantai 264005", + "country": "China", + } + ], + "orcid": "https://orcid.org/0000-0002-9549-1863", + }, + { + "raw_name": "Zou, Zhi-Tian", + "affiliations": [ + { + "value": "Department of Physics, Yantai University, Yantai 264005, China", + "organization": "Department of Physics, Yantai University, Yantai 264005", + "country": "China", + } + ], + "orcid": "https://orcid.org/0000-0002-6985-8174", + }, + ], + [ + { + "raw_name": "Wei, Yan-Bing", + "affiliations": [ + { + "value": "Physik Department T31, James-Franck-Straße 1, Technische Universität München, D85748 Garching, Germany", + "organization": "Physik Department T31, James-Franck-Straße 1, Technische Universität München, D85748 Garching", + "country": "Germany", + } + ], + "orcid": "https://orcid.org/0000-0001-5917-5786", + }, + { + "raw_name": "Shen, Yue-Long", + "affiliations": [ + { + "value": "College of Physics and Photoelectric Engineering, Ocean University of China, Qingdao 266100, China", + "organization": "College of Physics and Photoelectric Engineering, Ocean University of China, Qingdao 266100", + "country": "China", + } + ], + }, + ], + ], + "authors", + id="test_authors", + ), + ], +) +def test_hindawi_parsing(parsed_articles, expected, key): + for ( + expected_value, + article, + ) in zip(expected, parsed_articles): + assert enhance_hindawi(article).get(key) == expected_value diff --git a/tests/units/hindawi/test_hindawi_parser.py b/tests/units/hindawi/test_hindawi_parser.py index 5037d044..8fa26ea4 100644 --- a/tests/units/hindawi/test_hindawi_parser.py +++ b/tests/units/hindawi/test_hindawi_parser.py @@ -47,7 +47,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca, Spain", "organization": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca", - "country": "Spain", + # "country": "Spain", } ], "orcid": "ORCID-0000-0003-2376-6255", @@ -58,7 +58,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca, Spain", "organization": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca", - "country": "Spain", + # "country": "Spain", } ], }, @@ -68,7 +68,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca, Spain", "organization": "Grupo de Física Nuclear and Instituto Universitario de Física Fundamental y Matemáticas (IUFFyM), Universidad de Salamanca, E-37008 Salamanca", - "country": "Spain", + # "country": "Spain", } ], }, @@ -80,7 +80,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "Department of Physics, Yantai University, Yantai 264005, China", "organization": "Department of Physics, Yantai University, Yantai 264005", - "country": "China", + # "country": "China", } ], "orcid": "https://orcid.org/0000-0002-1337-7662", @@ -91,7 +91,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "Department of Physics, Yantai University, Yantai 264005, China", "organization": "Department of Physics, Yantai University, Yantai 264005", - "country": "China", + # "country": "China", } ], "orcid": "https://orcid.org/0000-0002-9549-1863", @@ -102,7 +102,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "Department of Physics, Yantai University, Yantai 264005, China", "organization": "Department of Physics, Yantai University, Yantai 264005", - "country": "China", + # "country": "China", } ], "orcid": "https://orcid.org/0000-0002-6985-8174", @@ -115,7 +115,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "Physik Department T31, James-Franck-Straße 1, Technische Universität München, D85748 Garching, Germany", "organization": "Physik Department T31, James-Franck-Straße 1, Technische Universität München, D85748 Garching", - "country": "Germany", + # "country": "Germany", } ], "orcid": "https://orcid.org/0000-0001-5917-5786", @@ -126,7 +126,7 @@ def parsed_articles(hindawi_parser, articles): { "value": "College of Physics and Photoelectric Engineering, Ocean University of China, Qingdao 266100, China", "organization": "College of Physics and Photoelectric Engineering, Ocean University of China, Qingdao 266100", - "country": "China", + # "country": "China", } ], }, diff --git a/tests/units/springer/test_parser.py b/tests/units/springer/test_parser.py index 75deea36..257b84b1 100644 --- a/tests/units/springer/test_parser.py +++ b/tests/units/springer/test_parser.py @@ -3,6 +3,7 @@ from pytest import fixture from springer.parser import SpringerParser +from common.enhancer import Enhancer @fixture(scope="module") @@ -72,8 +73,8 @@ def test_authors(parsed_articles): "affiliations": [ { "organization": "School of Physics, Korea Institute for Advanced Study", - "value": "School of Physics, Korea Institute for Advanced Study, Dongdaemun-gu, Seoul, 02455, South Korea", - "country": "South Korea", + "value": "School of Physics, Korea Institute for Advanced Study, Dongdaemun-gu, Seoul, 02455, Korea", + "country": "Korea", } ], "surname": "Nosaka", @@ -129,6 +130,13 @@ def test_authors(parsed_articles): for authors, parsed_article in zip(expected_results, parsed_articles): assert authors == parsed_article["authors"] + for author in authors: + for aff in author.get("affiliations", []): + if aff.get("country") is "Korea": + aff["country"] = "South Korea" + + assert Enhancer()("Springer", parsed_article)["authors"] == authors + def test_title(parsed_articles): titles = (