Skip to content

Commit

Permalink
utils: take the institution or the last match of the country
Browse files Browse the repository at this point in the history
  • Loading branch information
ErnestaP committed Apr 17, 2024
1 parent f370ad5 commit afa5a9d
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 2 deletions.
16 changes: 16 additions & 0 deletions scoap3/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,22 @@ def _(x):
"United States",
]

INSTITUTIONS_AND_COUNTRIES_MAPPING = OrderedDict([
("INFN", "Italy"),
("European Organization for Nuclear Research", "CERN"),
("Conseil Européen pour la Recherche Nucléaire", "CERN"),
("CERN", "CERN"),
("KEK", "Japan"),
("DESY", "Germany"),
("FERMILAB", "USA"),
("FNAL", "USA"),
("SLACK", "USA"),
("Stanford Linear Accelerator Center", "USA"),
("Joint Institute for Nuclear Research", "JINR"),
("JINR", "JINR"),
("ROC", "Taiwan"),
("R.O.C", "Taiwan"),
])

COUNTRIES_DEFAULT_MAPPING = OrderedDict(
[
Expand Down
10 changes: 8 additions & 2 deletions scoap3/utils/nations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re

from scoap3.config import COUNTRIES_DEFAULT_MAPPING
from scoap3.config import COUNTRIES_DEFAULT_MAPPING, INSTITUTIONS_AND_COUNTRIES_MAPPING
from scoap3.utils.google_maps import get_country


Expand All @@ -18,9 +18,15 @@ def find_country(affiliation):


def _find_country_no_cache(affiliation):
for key, val in COUNTRIES_DEFAULT_MAPPING.iteritems():
for key, val in INSTITUTIONS_AND_COUNTRIES_MAPPING.iteritems():
if re.search(r'\b%s\b' % key, affiliation, flags=re.IGNORECASE):
return val
country = None
for key, val in COUNTRIES_DEFAULT_MAPPING.iteritems():
if re.search(r'\b%s\b' % key, affiliation, flags=re.IGNORECASE):
country = val
if country:
return country

# if we can't figure out the country use the cache and Google API if needed
return get_country(affiliation) or "HUMAN CHECK"
27 changes: 27 additions & 0 deletions tests/unit/test_nations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from scoap3.utils.nations import _find_country_no_cache

affiliations = {
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Avenida Brasil 2950, Casilla, Valparaíso, 4059, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Avenida Brasil 2950, Casilla, Valparaiso, 4059, Chile": "Chile",
"Instituto de Física, Facultad de Ciencias, Pontificia Universidad Católica de Valparaíso, Av. Brasil, Valparaiso, 2950, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Av. Brasil 2950, Valparaíso, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Av. Brasil, Valparaíso, 2950, Chile": "Chile",
"Pontificia Universidad Católica de Valparaíso, Instituto de Física, Avenida Brasil 2950, Valparaíso, Chile": "Chile",
"Facultad de Ciencias, Instituto de Física, Pontificia Universidad Católica de Valparaíso, Av. Brasil 2950, Valparaiso, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Avda. Brasil 2950, Valparaiso, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Avenida Brasil 2950, Valparaíso, Chile": "Chile",
"Instituto de Física, Facultad de Ciencias, Pontificia Universidad Católica de Valparaíso, Av. Brasil 2950, Valparaiso, Chile": "Chile",
"Instituto de Física, Pontificia Universidad de Católica de Valparaíso, Avenida Brasil 2950, Valparaíso, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Avenida Brasil 2950, Casilla 4059, Valparaiso, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Avenida Brasil 2950, Casilla 4059, Valparaíso, Chile": "Chile",
"Instituto de Física, Facultad de Ciencias, Pontificia Universidad Católica de Valparaíso, Av. Brasil 2950, Valparaíso, Chile": "Chile",
"Instituto de Física, Pontificia Universidad Católica de Valparaíso, Avenida Brasil 2950, Valparaiso, Chile": "Chile",
"Pontificia Universidad Católica de Valparaíso, Instituto de Física, Av. Brasil 2950, Valparaíso, Chile": "Chile",
"Pontificia Universidad Católica de Valparaíso, Instituto de Física, Av. Brasil, Valparaíso, 2950, Chile": "Chile",
"Joint Institute for Nuclear Research, 141980 Dubna, Moscow region, Russia": "JINR",
"Theoretical Physics Department, CERN, Esplande des Particules, 1211 Geneva 23, Switzerland" : "CERN"}


def test_find_country_no_cache():
for affiliation in affiliations:
assert _find_country_no_cache(affiliation) == affiliations[affiliation]

0 comments on commit afa5a9d

Please sign in to comment.