From 8cd3844ccfc9334cd9889960b88784d87688cbe8 Mon Sep 17 00:00:00 2001 From: PascalEgn Date: Tue, 6 Aug 2024 13:53:03 +0200 Subject: [PATCH] global: add pre-commit with ruff --- .pre-commit-config.yaml | 17 + inspire_dojson/__init__.py | 6 +- inspire_dojson/api.py | 48 +- inspire_dojson/cds/__init__.py | 4 +- inspire_dojson/cds/model.py | 31 +- inspire_dojson/cds/rules.py | 137 ++++-- inspire_dojson/common/__init__.py | 2 +- inspire_dojson/common/rules.py | 96 ++-- inspire_dojson/conferences/__init__.py | 4 +- inspire_dojson/conferences/model.py | 7 +- inspire_dojson/conferences/rules.py | 39 +- inspire_dojson/data/__init__.py | 4 +- inspire_dojson/data/model.py | 8 +- inspire_dojson/data/rules.py | 4 +- inspire_dojson/errors.py | 5 +- inspire_dojson/experiments/__init__.py | 4 +- inspire_dojson/experiments/model.py | 7 +- inspire_dojson/experiments/rules.py | 143 +++--- inspire_dojson/hep/__init__.py | 4 +- inspire_dojson/hep/model.py | 37 +- inspire_dojson/hep/rules/bd0xx.py | 177 +++++--- inspire_dojson/hep/rules/bd1xx.py | 111 +++-- inspire_dojson/hep/rules/bd2xx.py | 34 +- inspire_dojson/hep/rules/bd3xx.py | 4 +- inspire_dojson/hep/rules/bd4xx.py | 2 +- inspire_dojson/hep/rules/bd5xx.py | 102 +++-- inspire_dojson/hep/rules/bd6xx.py | 101 +++-- inspire_dojson/hep/rules/bd7xx.py | 40 +- inspire_dojson/hep/rules/bd9xx.py | 42 +- inspire_dojson/hep/rules/bdFFT.py | 49 +- inspire_dojson/hepnames/__init__.py | 4 +- inspire_dojson/hepnames/model.py | 9 +- inspire_dojson/hepnames/rules.py | 166 +++---- inspire_dojson/institutions/__init__.py | 4 +- inspire_dojson/institutions/model.py | 7 +- inspire_dojson/institutions/rules.py | 69 +-- inspire_dojson/journals/__init__.py | 4 +- inspire_dojson/journals/model.py | 8 +- inspire_dojson/journals/rules.py | 8 +- inspire_dojson/model.py | 24 +- inspire_dojson/utils/__init__.py | 22 +- inspire_dojson/utils/geo.py | 78 +++- ruff.toml | 29 ++ run-tests.sh | 1 - setup.py | 10 +- tests/conftest.py | 7 +- tests/test_api.py | 47 +- tests/test_cds.py | 487 ++++++++++++++------ tests/test_common.py | 174 ++++---- tests/test_conferences.py | 428 +++++++++--------- tests/test_data.py | 10 +- tests/test_experiments.py | 177 +++++--- tests/test_experiments_model.py | 2 +- tests/test_hep_bd0xx.py | 168 ++++--- tests/test_hep_bd1xx.py | 565 +++++++++++------------ tests/test_hep_bd2xx.py | 109 ++--- tests/test_hep_bd3xx.py | 14 +- tests/test_hep_bd4xx.py | 6 +- tests/test_hep_bd5xx.py | 525 +++++++++++++++------- tests/test_hep_bd6xx.py | 66 +-- tests/test_hep_bd7xx.py | 151 +++---- tests/test_hep_bd9xx.py | 478 ++++++++++---------- tests/test_hep_bdFFT.py | 488 ++++++++++---------- tests/test_hep_model.py | 27 +- tests/test_hepnames.py | 568 ++++++++++-------------- tests/test_institutions.py | 271 ++++++----- tests/test_journals.py | 161 ++++--- tests/test_model.py | 6 +- tests/test_utils.py | 37 +- tests/test_utils_geo.py | 1 - 70 files changed, 3767 insertions(+), 2918 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 ruff.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..2c56732d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - id: fix-byte-order-marker + - id: mixed-line-ending + - id: name-tests-test + args: [ --pytest-test-first ] + exclude: '^(?!factories/)' + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.6 + hooks: + - id: ruff + args: [ --fix ] diff --git a/inspire_dojson/__init__.py b/inspire_dojson/__init__.py index c69c7cc4..d6f976e0 100644 --- a/inspire_dojson/__init__.py +++ b/inspire_dojson/__init__.py @@ -24,8 +24,8 @@ from __future__ import absolute_import, division, print_function -from . import common # noqa: F401 -from .api import marcxml2record, record2marcxml # noqa: F401 -from .errors import DoJsonError # noqa: F401 +from inspire_dojson import common # noqa: F401 +from inspire_dojson.api import marcxml2record, record2marcxml # noqa: F401 +from inspire_dojson.errors import DoJsonError # noqa: F401 __version__ = "63.2.22" diff --git a/inspire_dojson/api.py b/inspire_dojson/api.py index 522a3a13..cea95d9a 100644 --- a/inspire_dojson/api.py +++ b/inspire_dojson/api.py @@ -28,34 +28,34 @@ import re from itertools import chain +from dojson.contrib.marc21.utils import create_record +from inspire_utils.helpers import force_list +from inspire_utils.record import get_value from lxml.builder import E from lxml.etree import tostring from six import iteritems, text_type, unichr from six.moves import urllib -from dojson.contrib.marc21.utils import create_record - +from inspire_dojson.cds import cds2hep_marc +from inspire_dojson.conferences import conferences +from inspire_dojson.data import data +from inspire_dojson.errors import NotSupportedError +from inspire_dojson.experiments import experiments +from inspire_dojson.hep import hep, hep2marc +from inspire_dojson.hepnames import hepnames, hepnames2marc +from inspire_dojson.institutions import institutions +from inspire_dojson.journals import journals from inspire_dojson.utils import create_record_from_dict, force_single_element -from inspire_utils.helpers import force_list -from inspire_utils.record import get_value - -from .cds import cds2hep_marc -from .conferences import conferences -from .data import data -from .errors import NotSupportedError -from .experiments import experiments -from .hep import hep, hep2marc -from .hepnames import hepnames, hepnames2marc -from .institutions import institutions -from .journals import journals try: unichr(0x100000) RE_INVALID_CHARS_FOR_XML = re.compile( - u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]+') + u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]+' + ) except ValueError: # pragma: no cover RE_INVALID_CHARS_FOR_XML = re.compile( - u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD]+') + u'[^\U00000009\U0000000A\U0000000D\U00000020-\U0000D7FF\U0000E000-\U0000FFFD]+' + ) RECORD = E.record CONTROLFIELD = E.controlfield @@ -107,7 +107,9 @@ def record2marcxml_etree(record): elif schema_name == 'authors': marcjson = hepnames2marc.do(record) else: - raise NotSupportedError(u'JSON -> MARC rules missing for "{}"'.format(schema_name)) + raise NotSupportedError( + u'JSON -> MARC rules missing for "{}"'.format(schema_name) + ) record = RECORD() @@ -117,7 +119,9 @@ def record2marcxml_etree(record): value = force_single_element(values) if not isinstance(value, text_type): value = text_type(value) - record.append(CONTROLFIELD(_strip_invalid_chars_for_xml(value), {'tag': tag})) + record.append( + CONTROLFIELD(_strip_invalid_chars_for_xml(value), {'tag': tag}) + ) else: for value in force_list(values): datafield = DATAFIELD({'tag': tag, 'ind1': ind1, 'ind2': ind2}) @@ -125,7 +129,9 @@ def record2marcxml_etree(record): for el in force_list(els): if not isinstance(el, text_type): el = text_type(el) - datafield.append(SUBFIELD(_strip_invalid_chars_for_xml(el), {'code': code})) + datafield.append( + SUBFIELD(_strip_invalid_chars_for_xml(el), {'code': code}) + ) record.append(datafield) return record @@ -155,7 +161,9 @@ def cds_marcxml2record(marcxml): def _get_collections(marcjson): - collections = chain.from_iterable([force_list(el) for el in force_list(get_value(marcjson, '980__.a'))]) + collections = chain.from_iterable( + [force_list(el) for el in force_list(get_value(marcjson, '980__.a'))] + ) normalized_collections = [el.lower() for el in collections] return normalized_collections diff --git a/inspire_dojson/cds/__init__.py b/inspire_dojson/cds/__init__.py index 892b9c1e..04958dc4 100644 --- a/inspire_dojson/cds/__init__.py +++ b/inspire_dojson/cds/__init__.py @@ -24,5 +24,5 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 -from .model import cds2hep_marc # noqa: F401 +from inspire_dojson.cds import rules # noqa: F401 +from inspire_dojson.cds.model import cds2hep_marc # noqa: F401 diff --git a/inspire_dojson/cds/model.py b/inspire_dojson/cds/model.py index 4a2ccb19..97e3da25 100644 --- a/inspire_dojson/cds/model.py +++ b/inspire_dojson/cds/model.py @@ -25,28 +25,35 @@ from __future__ import absolute_import, division, print_function from itertools import chain -from inspire_utils.record import get_value + from inspire_utils.helpers import force_list +from inspire_utils.record import get_value -from ..model import FilterOverdo, clean_record +from inspire_dojson.model import FilterOverdo, clean_record def add_control_number(record, blob): if '001' not in blob: return record - collections = (value.lower() for value in chain(force_list(get_value(blob, '980__.a', default=[])), - force_list(get_value(blob, '980__.c', default=[])))) + collections = ( + value.lower() + for value in chain( + force_list(get_value(blob, '980__.a', default=[])), + force_list(get_value(blob, '980__.c', default=[])), + ) + ) if 'hidden' in collections: - record.setdefault('595__', []).append({ - '9': 'CDS', - 'a': u'CDS-{}'.format(blob['001']) - }) + record.setdefault('595__', []).append( + {'9': 'CDS', 'a': u'CDS-{}'.format(blob['001'])} + ) else: - record.setdefault('035__', []).append({ - '9': 'CDS', - 'a': blob['001'], - }) + record.setdefault('035__', []).append( + { + '9': 'CDS', + 'a': blob['001'], + } + ) return record diff --git a/inspire_dojson/cds/rules.py b/inspire_dojson/cds/rules.py index ad82d511..0e5a10eb 100644 --- a/inspire_dojson/cds/rules.py +++ b/inspire_dojson/cds/rules.py @@ -24,6 +24,7 @@ from __future__ import absolute_import, division, print_function +import contextlib import os import re from itertools import chain @@ -31,17 +32,14 @@ import pycountry import rfc3987 import six - -from idutils import is_arxiv from dojson import utils - -from six.moves import urllib - +from idutils import is_arxiv from inspire_utils.helpers import force_list from inspire_utils.name import normalize_name +from six.moves import urllib -from .model import cds2hep_marc -from ..utils import force_single_element, quote_url +from inspire_dojson.cds.model import cds2hep_marc +from inspire_dojson.utils import force_single_element, quote_url CATEGORIES = { 'Accelerators and Storage Rings': 'Accelerators', @@ -55,7 +53,6 @@ 'Engineering': 'Instrumentation', 'General Relativity and Cosmology': 'Gravitation and Cosmology', 'General Theoretical Physics': 'General Physics', - 'General Theoretical Physics': 'General Physics', 'Information Transfer and Management': 'Other', 'Mathematical Physics and Mathematics': 'Math and Math Physics', 'Nuclear Physics - Experiment': 'Experiment-Nucl', @@ -123,7 +120,7 @@ def escape_url(url): else: scheme = '' - url = quote_url(url[len(scheme):]) + url = quote_url(url[len(scheme) :]) return scheme + url @@ -138,8 +135,19 @@ def persistent_identifiers(self, key, value): @cds2hep_marc.over('035__', '^035..') @utils.for_each_value def external_sytem_identifiers(self, key, value): - ignored = {'cercer', 'inspire', 'xx', 'cern annual report', 'cmscms', 'wai01', 'spires'} - if any(val.lower() in ignored for val in chain(force_list(value.get('9')), force_list(value.get('a')))): + ignored = { + 'cercer', + 'inspire', + 'xx', + 'cern annual report', + 'cmscms', + 'wai01', + 'spires', + } + if any( + val.lower() in ignored + for val in chain(force_list(value.get('9')), force_list(value.get('a'))) + ): return if any(val.lower().endswith('cercer') for val in force_list(value.get('a'))): return @@ -153,8 +161,21 @@ def secondary_report_numbers(self, key, value): Also populates the ``500``, ``595`` and ``980`` MARC field through side effects. """ - preliminary_results_prefixes = ['ATLAS-CONF-', 'CMS-PAS-', 'CMS-DP-', 'LHCB-CONF-'] - note_prefixes = ['ALICE-INT-', 'ATL-', 'ATLAS-CONF-', 'CMS-DP-', 'CMS-PAS-', 'LHCB-CONF-', 'LHCB-PUB-'] + preliminary_results_prefixes = [ + 'ATLAS-CONF-', + 'CMS-PAS-', + 'CMS-DP-', + 'LHCB-CONF-', + ] + note_prefixes = [ + 'ALICE-INT-', + 'ATL-', + 'ATLAS-CONF-', + 'CMS-DP-', + 'CMS-PAS-', + 'LHCB-CONF-', + 'LHCB-PUB-', + ] result_037 = self.get('037__', []) result_500 = self.get('500__', []) @@ -168,17 +189,21 @@ def secondary_report_numbers(self, key, value): if any(report.upper().startswith(prefix) for prefix in note_prefixes): result_980.append({'a': 'NOTE'}) - if any(report.upper().startswith(prefix) for prefix in preliminary_results_prefixes): + if any( + report.upper().startswith(prefix) for prefix in preliminary_results_prefixes + ): result_500.append({'9': 'CDS', 'a': 'Preliminary results'}) is_barcode = hidden_report.startswith('P0') or hidden_report.startswith('CM-P0') if not report.startswith('SIS-') and not is_barcode: - result_037.append({ - '9': source, - 'a': report, - 'c': value.get('c'), - 'z': hidden_report if source == 'CDS' else None, - }) + result_037.append( + { + '9': source, + 'a': report, + 'c': value.get('c'), + 'z': hidden_report if source == 'CDS' else None, + } + ) self['500__'] = result_500 self['595__'] = result_595 @@ -198,10 +223,10 @@ def languages(self, key, value): try: languages.append({'a': pycountry.languages.get(alpha_3=alpha_3).name}) except KeyError: - try: - languages.append({'a': pycountry.languages.get(bibliographic=alpha_3).name}) - except KeyError: - pass + with contextlib.suppress(KeyError): + languages.append( + {'a': pycountry.languages.get(bibliographic=alpha_3).name} + ) return languages @@ -267,7 +292,9 @@ def nonfirst_authors(self, key, value): field_700 = self.get('700__', []) field_701 = self.get('701__', []) - is_supervisor = any(el.lower().startswith('dir') for el in force_list(value.get('e', ''))) + is_supervisor = any( + el.lower().startswith('dir') for el in force_list(value.get('e', '')) + ) if is_supervisor: field_701.append(_converted_author(value)) else: @@ -351,7 +378,7 @@ def categories(self, key, value): result = { '2': 'INSPIRE', # XXX: will fail validation and be logged if invalid category - 'a': CATEGORIES.get(value.get('a'), value.get('a')) + 'a': CATEGORIES.get(value.get('a'), value.get('a')), } else: result = vanilla_dict(value) @@ -410,20 +437,28 @@ def urls(self, key, value): Also populate the ``FFT`` field through side effects. """ + def _is_preprint(value): return value.get('y', '').lower() == 'preprint' def _is_fulltext(value): - return value['u'].endswith('.pdf') and value['u'].startswith('http://cds.cern.ch') + return value['u'].endswith('.pdf') and value['u'].startswith( + 'http://cds.cern.ch' + ) def _is_local_copy(value): return 'local copy' in value.get('y', '') def _is_ignored_domain(value): - ignored_domains = ['http://cdsweb.cern.ch', 'http://cms.cern.ch', - 'http://cmsdoc.cern.ch', 'http://documents.cern.ch', - 'http://preprints.cern.ch', 'http://cds.cern.ch', - 'http://arxiv.org'] + ignored_domains = [ + 'http://cdsweb.cern.ch', + 'http://cms.cern.ch', + 'http://cmsdoc.cern.ch', + 'http://documents.cern.ch', + 'http://preprints.cern.ch', + 'http://cds.cern.ch', + 'http://arxiv.org', + ] return any(value['u'].startswith(domain) for domain in ignored_domains) field_8564 = self.get('8564_', []) @@ -436,26 +471,34 @@ def _is_ignored_domain(value): if _is_fulltext(value) and not _is_preprint(value): if _is_local_copy(value): - description = value.get('y', '').replace('local copy', 'on CERN Document Server') - field_8564.append({ - 'u': url, - 'y': description, - }) + description = value.get('y', '').replace( + 'local copy', 'on CERN Document Server' + ) + field_8564.append( + { + 'u': url, + 'y': description, + } + ) else: _, file_name = os.path.split(urllib.parse.urlparse(value['u']).path) _, extension = os.path.splitext(file_name) - field_FFT.append({ - 't': 'CDS', - 'a': url, - 'd': value.get('y', ''), - 'n': file_name, - 'f': extension, - }) + field_FFT.append( + { + 't': 'CDS', + 'a': url, + 'd': value.get('y', ''), + 'n': file_name, + 'f': extension, + } + ) elif not _is_ignored_domain(value): - field_8564.append({ - 'u': url, - 'y': value.get('y'), - }) + field_8564.append( + { + 'u': url, + 'y': value.get('y'), + } + ) self['FFT__'] = field_FFT return field_8564 diff --git a/inspire_dojson/common/__init__.py b/inspire_dojson/common/__init__.py index 7e89ca52..d715f5a6 100644 --- a/inspire_dojson/common/__init__.py +++ b/inspire_dojson/common/__init__.py @@ -24,4 +24,4 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 +from inspire_dojson.common import rules # noqa: F401 diff --git a/inspire_dojson/common/rules.py b/inspire_dojson/common/rules.py index b2b190bb..53868e74 100644 --- a/inspire_dojson/common/rules.py +++ b/inspire_dojson/common/rules.py @@ -27,30 +27,27 @@ import re from datetime import datetime -from flask import current_app -from six.moves import urllib - from dojson import utils - +from flask import current_app from inspire_schemas.api import load_schema from inspire_schemas.utils import classify_field from inspire_utils.date import PartialDate, earliest_date from inspire_utils.helpers import force_list, maybe_int +from six.moves import urllib -from ..conferences.model import conferences -from ..data.model import data -from ..experiments.model import experiments -from ..hep.model import hep, hep2marc -from ..hepnames.model import hepnames, hepnames2marc -from ..institutions.model import institutions -from ..journals.model import journals -from ..utils import ( +from inspire_dojson.conferences.model import conferences +from inspire_dojson.data.model import data +from inspire_dojson.experiments.model import experiments +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.hepnames.model import hepnames, hepnames2marc +from inspire_dojson.institutions.model import institutions +from inspire_dojson.journals.model import journals +from inspire_dojson.utils import ( force_single_element, get_recid_from_ref, get_record_ref, ) - IS_INTERNAL_UID = re.compile(r'^(inspire:uid:)?\d{5}$') IS_ORCID = re.compile(r'^(orcid:)?\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$') @@ -346,7 +343,9 @@ 'HEPPDF': 'PDF Server', 'HLTPA': 'Health Physics Server', 'HSERVER': 'HTML_Version from a server', - 'HTTP://POS.SISSA.IT/ARCHIVE/CONFERENCES/045/026/LHC07_026.PDF': 'HTTP://WWW-BD.FNAL.GOV/ICFABD/NEWSLETTER45.PDF', + 'HTTP://POS.SISSA.IT/ARCHIVE/CONFERENCES/045/026/LHC07_026.PDF': ( + 'HTTP://WWW-BD.FNAL.GOV/ICFABD/NEWSLETTER45.PDF' + ), 'ICTP': 'ICTP Trieste Preprint Server', 'ICTP-LNS': 'ICTP Lecture Notes Server', 'IEEE': 'IEEExplore Server', @@ -587,6 +586,7 @@ def control_number(endpoint): Also populates the ``self`` key through side effects. """ + def _control_number(self, key, value): self['self'] = get_record_ref(int(value), endpoint) return int(value) @@ -630,6 +630,7 @@ def legacy_version2marc(self, key, value): @hepnames.over('acquisition_source', '^541..') def acquisition_source(self, key, value): """Populate the ``acquisition_source`` key.""" + def _get_datetime(value): d_value = force_single_element(value.get('d', '')) if d_value: @@ -646,15 +647,13 @@ def _get_datetime(value): a_values = force_list(value.get('a')) for a_value in a_values: if IS_INTERNAL_UID.match(a_value): - if a_value.startswith('inspire:uid:'): - internal_uid = int(a_value[12:]) - else: - internal_uid = int(a_value) + internal_uid = ( + int(a_value[12:]) + if a_value.startswith('inspire:uid:') + else int(a_value) + ) elif IS_ORCID.match(a_value): - if a_value.startswith('orcid:'): - orcid = a_value[6:] - else: - orcid = a_value + orcid = a_value[6:] if a_value.startswith('orcid:') else a_value else: source = a_value @@ -718,7 +717,8 @@ def public_notes_500(self, key, value): { 'source': value.get('9'), 'value': public_note, - } for public_note in force_list(value.get('a')) + } + for public_note in force_list(value.get('a')) ] @@ -745,7 +745,8 @@ def _private_notes_595(self, key, value): { 'source': value.get('9'), 'value': _private_note, - } for _private_note in force_list(value.get('a')) + } + for _private_note in force_list(value.get('a')) ] @@ -771,7 +772,7 @@ def inspire_categories(self, key, value): inspire_categories = self.get('inspire_categories', []) scheme = force_single_element(value.get('2')) - if scheme == 'arXiv': # XXX: we skip arXiv categories here because + if scheme == 'arXiv': # XXX: we skip arXiv categories here because return inspire_categories # we're going to add them later in a filter. source = force_single_element(value.get('9', '')).lower() @@ -787,10 +788,12 @@ def inspire_categories(self, key, value): for _term in terms: term = classify_field(_term) if term: - inspire_categories.append({ - 'term': term, - 'source': source, - }) + inspire_categories.append( + { + 'term': term, + 'source': source, + } + ) return inspire_categories @@ -850,10 +853,12 @@ def _is_internal_url(url): description = WEBLINKS.get(description, description) for url in force_list(value.get('u')): if not _is_internal_url(url): - urls.append({ - 'description': description, - 'value': url, - }) + urls.append( + { + 'description': description, + 'value': url, + } + ) return urls @@ -894,6 +899,7 @@ def external_system_identifiers(endpoint): Also populates the ``new_record`` key through side effects. """ + @utils.flatten @utils.for_each_value def _external_system_identifiers(self, key, value): @@ -905,17 +911,28 @@ def _external_system_identifiers(self, key, value): { 'schema': 'SPIRES', 'value': ext_sys_id, - } for ext_sys_id in force_list(value.get('a')) + } + for ext_sys_id in force_list(value.get('a')) ] return _external_system_identifiers -conferences.over('external_system_identifiers', '^970..')(external_system_identifiers('conferences')) -experiments.over('external_system_identifiers', '^970..')(external_system_identifiers('experiments')) -hep.over('external_system_identifiers', '^970..')(external_system_identifiers('literature')) -institutions.over('external_system_identifiers', '^970..')(external_system_identifiers('institutions')) -journals.over('external_system_identifiers', '^970..')(external_system_identifiers('journals')) +conferences.over('external_system_identifiers', '^970..')( + external_system_identifiers('conferences') +) +experiments.over('external_system_identifiers', '^970..')( + external_system_identifiers('experiments') +) +hep.over('external_system_identifiers', '^970..')( + external_system_identifiers('literature') +) +institutions.over('external_system_identifiers', '^970..')( + external_system_identifiers('institutions') +) +journals.over('external_system_identifiers', '^970..')( + external_system_identifiers('journals') +) @hep2marc.over('970', '^new_record$') @@ -931,6 +948,7 @@ def deleted(self, key, value): def deleted_records(endpoint): """Populate the ``deleted_records`` key.""" + @utils.for_each_value def _deleted_records(self, key, value): deleted_recid = maybe_int(value.get('a')) diff --git a/inspire_dojson/conferences/__init__.py b/inspire_dojson/conferences/__init__.py index 4eb22221..f85e2691 100644 --- a/inspire_dojson/conferences/__init__.py +++ b/inspire_dojson/conferences/__init__.py @@ -24,5 +24,5 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 -from .model import conferences # noqa: F401 +from inspire_dojson.conferences import rules # noqa: F401 +from inspire_dojson.conferences.model import conferences # noqa: F401 diff --git a/inspire_dojson/conferences/model.py b/inspire_dojson/conferences/model.py index 6f6a68fc..f5e13b9d 100644 --- a/inspire_dojson/conferences/model.py +++ b/inspire_dojson/conferences/model.py @@ -24,7 +24,12 @@ from __future__ import absolute_import, division, print_function -from ..model import FilterOverdo, add_schema, add_collection, clean_record +from inspire_dojson.model import ( + FilterOverdo, + add_collection, + add_schema, + clean_record, +) def remove_lone_series_number(record, blob): diff --git a/inspire_dojson/conferences/rules.py b/inspire_dojson/conferences/rules.py index c7be443d..b23dce60 100644 --- a/inspire_dojson/conferences/rules.py +++ b/inspire_dojson/conferences/rules.py @@ -25,12 +25,11 @@ from __future__ import absolute_import, division, print_function from dojson import utils +from inspire_utils.helpers import force_list, maybe_float, maybe_int -from inspire_utils.helpers import force_list, maybe_int, maybe_float - -from .model import conferences -from ..utils import force_single_element -from ..utils.geo import parse_conference_address +from inspire_dojson.conferences.model import conferences +from inspire_dojson.utils import force_single_element +from inspire_dojson.utils.geo import parse_conference_address def _trim_date(date): @@ -108,10 +107,12 @@ def contact_details(self, key, value): # we might match an email with the wrong name. if len(m_values) == len(p_values): for m_value, p_value in zip(m_values, p_values): - result.append({ - 'email': m_value, - 'name': p_value, - }) + result.append( + { + 'email': m_value, + 'name': p_value, + } + ) else: for m_value in m_values: result.append({'email': m_value}) @@ -140,10 +141,12 @@ def _last_is_incomplete(series, key): elif number and name is None and _last_is_incomplete(series, 'number'): series[-1]['number'] = number else: - series.append({ - 'name': name, - 'number': number, - }) + series.append( + { + 'name': name, + 'number': number, + } + ) return series @@ -209,8 +212,10 @@ def keywords(self, key, values): a_values = force_list(value.get('a')) for a_value in a_values: - keywords.append({ - 'source': force_single_element(sources), - 'value': a_value, - }) + keywords.append( + { + 'source': force_single_element(sources), + 'value': a_value, + } + ) return keywords diff --git a/inspire_dojson/data/__init__.py b/inspire_dojson/data/__init__.py index dccbe47e..be3a99f1 100644 --- a/inspire_dojson/data/__init__.py +++ b/inspire_dojson/data/__init__.py @@ -24,5 +24,5 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 -from .model import data # noqa: F401 +from inspire_dojson.data import rules # noqa: F401 +from inspire_dojson.data.model import data # noqa: F401 diff --git a/inspire_dojson/data/model.py b/inspire_dojson/data/model.py index 9c50ee02..6c7ce9b1 100644 --- a/inspire_dojson/data/model.py +++ b/inspire_dojson/data/model.py @@ -24,8 +24,12 @@ from __future__ import absolute_import, division, print_function -from ..model import FilterOverdo, add_schema, add_collection, clean_record - +from inspire_dojson.model import ( + FilterOverdo, + add_collection, + add_schema, + clean_record, +) filters = [ add_schema('data.json'), diff --git a/inspire_dojson/data/rules.py b/inspire_dojson/data/rules.py index d92bb589..3f067dd7 100644 --- a/inspire_dojson/data/rules.py +++ b/inspire_dojson/data/rules.py @@ -27,8 +27,8 @@ from dojson import utils from idutils import normalize_doi -from .model import data -from ..utils import force_single_element, get_record_ref +from inspire_dojson.data.model import data +from inspire_dojson.utils import force_single_element, get_record_ref @data.over('dois', '^0247.') diff --git a/inspire_dojson/errors.py b/inspire_dojson/errors.py index 8266652a..01c10085 100644 --- a/inspire_dojson/errors.py +++ b/inspire_dojson/errors.py @@ -30,12 +30,15 @@ @python_2_unicode_compatible class DoJsonError(Exception): """Error during DoJSON processing.""" + def __str__(self): message = self.args[0] exc = u' '.join(text_type(arg) for arg in self.args[1]) try: subfields = [(k, v) for (k, v) in self.args[2].items() if k != '__order__'] - except AttributeError: # when not dealing with MARC, the value doesn't have to be a dict + except ( + AttributeError + ): # when not dealing with MARC, the value doesn't have to be a dict subfields = self.args[2] return u'{message}\n\n{exc}\n\nSubfields: {subfields}'.format( message=message, exc=exc, subfields=subfields diff --git a/inspire_dojson/experiments/__init__.py b/inspire_dojson/experiments/__init__.py index a03f4b16..04b98eb2 100644 --- a/inspire_dojson/experiments/__init__.py +++ b/inspire_dojson/experiments/__init__.py @@ -24,5 +24,5 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 -from .model import experiments # noqa: F401 +from inspire_dojson.experiments import rules # noqa: F401 +from inspire_dojson.experiments.model import experiments # noqa: F401 diff --git a/inspire_dojson/experiments/model.py b/inspire_dojson/experiments/model.py index d9768601..6ac44e3d 100644 --- a/inspire_dojson/experiments/model.py +++ b/inspire_dojson/experiments/model.py @@ -24,7 +24,12 @@ from __future__ import absolute_import, division, print_function -from ..model import FilterOverdo, add_collection, add_schema, clean_record +from inspire_dojson.model import ( + FilterOverdo, + add_collection, + add_schema, + clean_record, +) def add_project_type(record, blob): diff --git a/inspire_dojson/experiments/rules.py b/inspire_dojson/experiments/rules.py index 3f6938d4..ab00d6f5 100644 --- a/inspire_dojson/experiments/rules.py +++ b/inspire_dojson/experiments/rules.py @@ -26,78 +26,77 @@ from dojson import utils from dojson.errors import IgnoreKey - from inspire_utils.date import normalize_date from inspire_utils.helpers import force_list, maybe_int -from .model import experiments -from ..utils import force_single_element, get_record_ref - - -EXPERIMENT_CATEGORIES_MAP = \ - {'1': 'Collider Experiments', - '1.1': 'Collider Experiments|Hadrons', - '1.1.1': 'Collider Experiments|Hadrons|p anti-p', - '1.1.2': 'Collider Experiments|Hadrons|p p', - '1.2': 'Collider Experiments|e+ e-', - '1.3': 'Collider Experiments|e p', - '1.4': 'Collider Experiments|Heavy Flavor Factory', - '1.5': 'Collider Experiments|Heavy ion', - '1.6': 'Collider Experiments|Detector development', - '2': 'Fixed Target Experiments', - '2.1': 'Fixed Target Experiments|High-momentum transfer', - '2.2': 'Fixed Target Experiments|Hadron Spectroscopy', - '2.3': 'Fixed Target Experiments|Deep inelastic scattering', - '2.4': 'Fixed Target Experiments|Drell-Yan/Dilepton production', - '2.5': 'Fixed Target Experiments|Flavor physics', - '2.6': 'Fixed Target Experiments|Lepton precision experiments', - '2.7': 'Fixed Target Experiments|Neutron/proton precision experiments', - '3': 'Neutrino (flavor) experiments', - '3.1': 'Neutrino (flavor) experiments|Accelerator', - '3.1.1': 'Neutrino (flavor) experiments|Accelerator|short-baseline', - '3.1.2': 'Neutrino (flavor) experiments|Accelerator|long-baseline', - '3.2': 'Neutrino (flavor) experiments|Reactor', - '3.2.1': 'Neutrino (flavor) experiments|Reactor|ultra-short-baseline', - '3.2.2': 'Neutrino (flavor) experiments|Reactor|longer baselines', - '3.3': 'Neutrino (flavor) experiments|Non terrestrial', - '3.3.1': 'Neutrino (flavor) experiments|Non terrestrial|Atmospheric', - '3.3.2': 'Neutrino (flavor) experiments|Non terrestrial|Solar', - '3.3.3': 'Neutrino (flavor) experiments|Non terrestrial|Cosmic', - '3.4': 'Neutrino (flavor) experiments|Neutrinoless double beta decay', - '3.5': 'Neutrino (flavor) experiments|Neutrino mass', - '4': 'Dark matter search experiments', - '4.1': 'Dark matter search experiments|Non-accelerator', - '4.2': 'Dark matter search experiments|Axion search experiments', - '4.3': 'Dark matter search experiments|Dark Forces', - '5': 'Cosmic ray/Gamma ray experiments', - '5.1': 'Cosmic ray/Gamma ray experiments|Ground array', - '5.2': 'Cosmic ray/Gamma ray experiments|Cerenkov array', - '5.3': 'Cosmic ray/Gamma ray experiments|Satellite', - '5.4': 'Cosmic ray/Gamma ray experiments|Balloon', - '6': 'Other Rare-process/exotic experiments', - '6.1': 'Other Rare-process/exotic experiments|Proton decay', - '6.2': 'Other Rare-process/exotic experiments|Modified gravity and space-time', - '6.3': 'Other Rare-process/exotic experiments|Magnetic monopoles', - '6.4': 'Other Rare-process/exotic experiments|Fractionally charged particles', - '7': 'Accelerator Test Facility Experiments', - '7.1': 'Accelerator Test Facility Experiments|Electron and positron beams', - '7.2': 'Accelerator Test Facility Experiments|Muon beams', - '7.3': 'Accelerator Test Facility Experiments|Proton beams', - '7.4': 'Accelerator Test Facility Experiments|Neutrino beams', - '8': 'Astronomy experiments', - '8.1': 'Astronomy experiments|CMB', - '8.2': 'Astronomy experiments|Survey', - '8.3': 'Astronomy experiments|Supernovae', - '8.4': 'Astronomy experiments|Gravitational waves', - '8.5': 'Astronomy experiments|Gravitational lensing/Dark matter', - '9': 'Non-experimental', - '9.1': 'Non-experimental|Data Analysis', - '9.2': 'Non-experimental|Simulation tools', - '9.2.1': 'Non-experimental|Simulation tools|Detector Simulation', - '9.2.2': 'Non-experimental|Simulation tools|Event Simulation', - '9.3': 'Non-experimental|Parton Distribution Fits', - '9.4': 'Non-experimental|Lattice Gauge Theory', - '9.5': 'Non-experimental|Neutrino Physics'} +from inspire_dojson.experiments.model import experiments +from inspire_dojson.utils import force_single_element, get_record_ref + +EXPERIMENT_CATEGORIES_MAP = { + '1': 'Collider Experiments', + '1.1': 'Collider Experiments|Hadrons', + '1.1.1': 'Collider Experiments|Hadrons|p anti-p', + '1.1.2': 'Collider Experiments|Hadrons|p p', + '1.2': 'Collider Experiments|e+ e-', + '1.3': 'Collider Experiments|e p', + '1.4': 'Collider Experiments|Heavy Flavor Factory', + '1.5': 'Collider Experiments|Heavy ion', + '1.6': 'Collider Experiments|Detector development', + '2': 'Fixed Target Experiments', + '2.1': 'Fixed Target Experiments|High-momentum transfer', + '2.2': 'Fixed Target Experiments|Hadron Spectroscopy', + '2.3': 'Fixed Target Experiments|Deep inelastic scattering', + '2.4': 'Fixed Target Experiments|Drell-Yan/Dilepton production', + '2.5': 'Fixed Target Experiments|Flavor physics', + '2.6': 'Fixed Target Experiments|Lepton precision experiments', + '2.7': 'Fixed Target Experiments|Neutron/proton precision experiments', + '3': 'Neutrino (flavor) experiments', + '3.1': 'Neutrino (flavor) experiments|Accelerator', + '3.1.1': 'Neutrino (flavor) experiments|Accelerator|short-baseline', + '3.1.2': 'Neutrino (flavor) experiments|Accelerator|long-baseline', + '3.2': 'Neutrino (flavor) experiments|Reactor', + '3.2.1': 'Neutrino (flavor) experiments|Reactor|ultra-short-baseline', + '3.2.2': 'Neutrino (flavor) experiments|Reactor|longer baselines', + '3.3': 'Neutrino (flavor) experiments|Non terrestrial', + '3.3.1': 'Neutrino (flavor) experiments|Non terrestrial|Atmospheric', + '3.3.2': 'Neutrino (flavor) experiments|Non terrestrial|Solar', + '3.3.3': 'Neutrino (flavor) experiments|Non terrestrial|Cosmic', + '3.4': 'Neutrino (flavor) experiments|Neutrinoless double beta decay', + '3.5': 'Neutrino (flavor) experiments|Neutrino mass', + '4': 'Dark matter search experiments', + '4.1': 'Dark matter search experiments|Non-accelerator', + '4.2': 'Dark matter search experiments|Axion search experiments', + '4.3': 'Dark matter search experiments|Dark Forces', + '5': 'Cosmic ray/Gamma ray experiments', + '5.1': 'Cosmic ray/Gamma ray experiments|Ground array', + '5.2': 'Cosmic ray/Gamma ray experiments|Cerenkov array', + '5.3': 'Cosmic ray/Gamma ray experiments|Satellite', + '5.4': 'Cosmic ray/Gamma ray experiments|Balloon', + '6': 'Other Rare-process/exotic experiments', + '6.1': 'Other Rare-process/exotic experiments|Proton decay', + '6.2': 'Other Rare-process/exotic experiments|Modified gravity and space-time', + '6.3': 'Other Rare-process/exotic experiments|Magnetic monopoles', + '6.4': 'Other Rare-process/exotic experiments|Fractionally charged particles', + '7': 'Accelerator Test Facility Experiments', + '7.1': 'Accelerator Test Facility Experiments|Electron and positron beams', + '7.2': 'Accelerator Test Facility Experiments|Muon beams', + '7.3': 'Accelerator Test Facility Experiments|Proton beams', + '7.4': 'Accelerator Test Facility Experiments|Neutrino beams', + '8': 'Astronomy experiments', + '8.1': 'Astronomy experiments|CMB', + '8.2': 'Astronomy experiments|Survey', + '8.3': 'Astronomy experiments|Supernovae', + '8.4': 'Astronomy experiments|Gravitational waves', + '8.5': 'Astronomy experiments|Gravitational lensing/Dark matter', + '9': 'Non-experimental', + '9.1': 'Non-experimental|Data Analysis', + '9.2': 'Non-experimental|Simulation tools', + '9.2.1': 'Non-experimental|Simulation tools|Detector Simulation', + '9.2.2': 'Non-experimental|Simulation tools|Event Simulation', + '9.3': 'Non-experimental|Parton Distribution Fits', + '9.4': 'Non-experimental|Lattice Gauge Theory', + '9.5': 'Non-experimental|Neutrino Physics', +} @experiments.over('_dates', '^046..') @@ -173,6 +172,7 @@ def long_name(self, key, value): def inspire_classification(self, key, value): def _get_category(value): return EXPERIMENT_CATEGORIES_MAP.get(value.get('a')) + return _get_category(value) @@ -186,10 +186,7 @@ def name_variants(self, key, value): @utils.for_each_value def related_records(self, key, value): def _get_relation(value): - RELATIONS_MAP = { - 'a': 'predecessor', - 'b': 'successor' - } + RELATIONS_MAP = {'a': 'predecessor', 'b': 'successor'} return RELATIONS_MAP.get(value.get('w')) diff --git a/inspire_dojson/hep/__init__.py b/inspire_dojson/hep/__init__.py index 0f103c6f..51e21fe5 100644 --- a/inspire_dojson/hep/__init__.py +++ b/inspire_dojson/hep/__init__.py @@ -24,7 +24,8 @@ from __future__ import absolute_import, division, print_function -from .rules import ( # noqa: F401 +from inspire_dojson.hep.model import hep, hep2marc # noqa: F401 +from inspire_dojson.hep.rules import ( # noqa: F401 bd0xx, bd1xx, bd2xx, @@ -36,4 +37,3 @@ bd9xx, bdFFT, ) -from .model import hep, hep2marc # noqa: F401 diff --git a/inspire_dojson/hep/model.py b/inspire_dojson/hep/model.py index 42771c2c..039a7d0b 100644 --- a/inspire_dojson/hep/model.py +++ b/inspire_dojson/hep/model.py @@ -27,16 +27,20 @@ import itertools import six - +from inspire_schemas.builders.literature import is_citeable from inspire_schemas.utils import ( convert_old_publication_info_to_new, normalize_arxiv_category, ) -from inspire_schemas.builders.literature import is_citeable from inspire_utils.helpers import force_list from inspire_utils.record import get_value -from ..model import FilterOverdo, add_schema, clean_marc, clean_record +from inspire_dojson.model import ( + FilterOverdo, + add_schema, + clean_marc, + clean_record, +) def add_arxiv_categories(record, blob): @@ -56,7 +60,9 @@ def convert_publication_infos(record, blob): if not record.get('publication_info'): return record - record['publication_info'] = convert_old_publication_info_to_new(record['publication_info']) + record['publication_info'] = convert_old_publication_info_to_new( + record['publication_info'] + ) return record @@ -69,7 +75,9 @@ def move_incomplete_publication_infos(record, blob): if not non_empty_keys: continue if non_empty_keys.issubset({'journal_record', 'journal_title'}): - public_note = {'value': u'Submitted to {}'.format(publication_info['journal_title'])} + public_note = { + 'value': u'Submitted to {}'.format(publication_info['journal_title']) + } record.setdefault('public_notes', []).append(public_note) else: publication_infos.append(publication_info) @@ -112,7 +120,9 @@ def ensure_ordered_figures(record, blob): else: unordered_figures_list.append(figure) - record['figures'] = [value for key, value in sorted(six.iteritems(ordered_figures_dict))] + record['figures'] = [ + value for key, value in sorted(six.iteritems(ordered_figures_dict)) + ] record['figures'].extend(unordered_figures_list) return record @@ -127,7 +137,10 @@ def duplicates(elements): else: duplicate_keys_list.append(element['key']) - for index, attachment in itertools.chain(duplicates(record.get('documents', [])), duplicates(record.get('figures', []))): + for index, attachment in itertools.chain( + duplicates(record.get('documents', [])), + duplicates(record.get('figures', [])), + ): attachment['key'] = u'{}_{}'.format(index, attachment['key']) return record @@ -140,15 +153,9 @@ def write_ids(record, blob): for schema, values in six.iteritems(id_dict): z_values = iter(values) a_value = next(z_values) - result_035.append({ - '9': schema, - 'a': a_value - }) + result_035.append({'9': schema, 'a': a_value}) for z_value in z_values: - result_035.append({ - '9': schema, - 'z': z_value - }) + result_035.append({'9': schema, 'z': z_value}) if 'id_dict' in record: del record['id_dict'] diff --git a/inspire_dojson/hep/rules/bd0xx.py b/inspire_dojson/hep/rules/bd0xx.py index 66724136..81412361 100644 --- a/inspire_dojson/hep/rules/bd0xx.py +++ b/inspire_dojson/hep/rules/bd0xx.py @@ -28,17 +28,14 @@ from collections import defaultdict import pycountry - from dojson import utils from idutils import is_arxiv_post_2007, is_doi, is_handle, normalize_doi - from inspire_schemas.api import load_schema from inspire_schemas.utils import normalize_arxiv_category from inspire_utils.helpers import force_list -from ..model import hep, hep2marc -from ...utils import force_single_element, normalize_isbn - +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import force_single_element, normalize_isbn RE_LANGUAGE = re.compile(r'\/| or | and |,|=|\s+') @@ -47,10 +44,13 @@ @utils.for_each_value def isbns(self, key, value): """Populate the ``isbns`` key.""" + def _get_medium(value): def _normalize(medium): schema = load_schema('hep') - valid_media = schema['properties']['isbns']['items']['properties']['medium']['enum'] + valid_media = schema['properties']['isbns']['items']['properties'][ + 'medium' + ]['enum'] medium = medium.lower().replace('-', '').replace(' ', '') if medium in valid_media: @@ -95,6 +95,7 @@ def dois(self, key, value): Also populates the ``persistent_identifiers`` key through side effects. """ + def _get_first_non_curator_source(sources): sources_without_curator = [el for el in sources if el.upper() != 'CURATOR'] return force_single_element(sources_without_curator) @@ -128,19 +129,23 @@ def _is_handle(id_, type_): source = _get_first_non_curator_source(sources) if _is_doi(id_, schema): - dois.append({ - 'material': material, - 'source': source, - 'value': normalize_doi(id_), - }) + dois.append( + { + 'material': material, + 'source': source, + 'value': normalize_doi(id_), + } + ) else: schema = 'HDL' if _is_handle(id_, schema) else schema - persistent_identifiers.append({ - 'material': material, - 'schema': schema, - 'source': source, - 'value': id_, - }) + persistent_identifiers.append( + { + 'material': material, + 'schema': schema, + 'source': source, + 'value': id_, + } + ) self['persistent_identifiers'] = persistent_identifiers return dois @@ -174,8 +179,10 @@ def persistent_identifiers2marc(self, key, value): def texkeys(self, key, value): """Populate the ``texkeys`` key. - Also populates the ``external_system_identifiers`` and ``_desy_bookkeeping`` keys through side effects. + Also populates the ``external_system_identifiers`` and + ``_desy_bookkeeping`` keys through side effects. """ + def _is_oai(id_, schema): return id_.startswith('oai:') @@ -207,10 +214,13 @@ def _is_texkey(id_, schema): elif _is_desy(id_, schema): _desy_bookkeeping.append({'identifier': id_}) else: - external_system_identifiers.insert(0, { - 'schema': schema, - 'value': id_, - }) + external_system_identifiers.insert( + 0, + { + 'schema': schema, + 'value': id_, + }, + ) for id_ in other_ids: id_ = id_.strip() @@ -224,10 +234,12 @@ def _is_texkey(id_, schema): elif _is_desy(id_, schema): _desy_bookkeeping.append({'identifier': id_}) else: - external_system_identifiers.append({ - 'schema': schema, - 'value': id_, - }) + external_system_identifiers.append( + { + 'schema': schema, + 'value': id_, + } + ) self['external_system_identifiers'] = external_system_identifiers self['_desy_bookkeeping'] = _desy_bookkeeping @@ -242,16 +254,20 @@ def texkeys2marc(self, key, value): values = force_list(value) if values: value = values[0] - result.append({ - '9': 'INSPIRETeX', - 'a': value, - }) + result.append( + { + '9': 'INSPIRETeX', + 'a': value, + } + ) for value in values[1:]: - result.append({ - '9': 'INSPIRETeX', - 'z': value, - }) + result.append( + { + '9': 'INSPIRETeX', + 'z': value, + } + ) return result @@ -264,6 +280,7 @@ def external_system_identifiers2marc(self, key, value): ``id_dict`` dictionary that holds potentially duplicate IDs that are post-processed in a filter. """ + def _is_scheme_cernkey(id_, schema): return schema == 'CERNKEY' @@ -280,14 +297,18 @@ def _is_scheme_spires(id_, schema): schema = value.get('schema') if _is_scheme_spires(id_, schema): - result_970.append({ - 'a': id_, - }) + result_970.append( + { + 'a': id_, + } + ) elif _is_scheme_cernkey(id_, schema): - result_035.append({ - '9': 'CERNKEY', - 'z': id_, - }) + result_035.append( + { + '9': 'CERNKEY', + 'z': id_, + } + ) else: id_dict[schema].append(id_) @@ -302,6 +323,7 @@ def arxiv_eprints(self, key, value): Also populates the ``report_numbers`` key through side effects. """ + def _get_clean_arxiv_eprint(id_): return id_.split(':')[-1] @@ -323,26 +345,34 @@ def _get_clean_source(source): for value in values: id_ = force_single_element(value.get('a', '')) other_id = force_single_element(value.get('z', '')) - categories = [normalize_arxiv_category(category) for category - in force_list(value.get('c'))] + categories = [ + normalize_arxiv_category(category) + for category in force_list(value.get('c')) + ] source = force_single_element(value.get('9', '')) if _is_arxiv_eprint(id_, source): - arxiv_eprints.append({ - 'categories': categories, - 'value': _get_clean_arxiv_eprint(id_), - }) + arxiv_eprints.append( + { + 'categories': categories, + 'value': _get_clean_arxiv_eprint(id_), + } + ) elif _is_hidden_report_number(other_id, source): - report_numbers.append({ - 'hidden': True, - 'source': _get_clean_source(source), - 'value': other_id, - }) + report_numbers.append( + { + 'hidden': True, + 'source': _get_clean_source(source), + 'value': other_id, + } + ) else: - report_numbers.append({ - 'source': _get_clean_source(source), - 'value': id_, - }) + report_numbers.append( + { + 'source': _get_clean_source(source), + 'value': id_, + } + ) self['report_numbers'] = report_numbers return arxiv_eprints @@ -361,23 +391,29 @@ def arxiv_eprints2marc(self, key, values): for value in values: arxiv_id = value.get('value') arxiv_id = 'arXiv:' + arxiv_id if is_arxiv_post_2007(arxiv_id) else arxiv_id - result_037.append({ - '9': 'arXiv', - 'a': arxiv_id, - 'c': force_single_element(value.get('categories')), - }) - - result_035.append({ - '9': 'arXiv', - 'a': 'oai:arXiv.org:' + value.get('value'), - }) + result_037.append( + { + '9': 'arXiv', + 'a': arxiv_id, + 'c': force_single_element(value.get('categories')), + } + ) + + result_035.append( + { + '9': 'arXiv', + 'a': 'oai:arXiv.org:' + value.get('value'), + } + ) categories = force_list(value.get('categories')) for category in categories: - result_65017.append({ - '2': 'arXiv', - 'a': category, - }) + result_65017.append( + { + '2': 'arXiv', + 'a': category, + } + ) self['65017'] = result_65017 self['035'] = result_035 @@ -388,6 +424,7 @@ def arxiv_eprints2marc(self, key, values): @utils.for_each_value def report_numbers2marc(self, key, value): """Populate the ``037`` MARC field.""" + def _get_mangled_source(source): if source == 'arXiv': return 'arXiv:reportnumber' diff --git a/inspire_dojson/hep/rules/bd1xx.py b/inspire_dojson/hep/rules/bd1xx.py index 9c98e21c..54b3ebe3 100644 --- a/inspire_dojson/hep/rules/bd1xx.py +++ b/inspire_dojson/hep/rules/bd1xx.py @@ -27,16 +27,11 @@ import re from dojson import utils - from inspire_utils.dedupers import dedupe_list from inspire_utils.helpers import force_list, maybe_int -from ..model import hep, hep2marc -from ...utils import ( - force_single_element, - get_record_ref, -) - +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import force_single_element, get_record_ref ORCID = re.compile(r'\d{4}-\d{4}-\d{4}-\d{3}[0-9Xx]') @@ -52,10 +47,12 @@ def _get_affiliations(value): # we might match a value with the wrong recid. if len(u_values) == len(z_values): for u_value, z_value in zip(u_values, z_values): - result.append({ - 'record': get_record_ref(z_value, 'institutions'), - 'value': u_value, - }) + result.append( + { + 'record': get_record_ref(z_value, 'institutions'), + 'value': u_value, + } + ) else: for u_value in u_values: result.append({'value': u_value}) @@ -63,15 +60,23 @@ def _get_affiliations(value): return dedupe_list(result) def _get_affiliations_identifiers(value): - t_values = (t_value.split(':', 1) for t_value in dedupe_list(force_list(value.get('t')))) + t_values = ( + t_value.split(':', 1) for t_value in dedupe_list(force_list(value.get('t'))) + ) - return [{'schema': schema.upper(), 'value': identifier} for schema, identifier in t_values] + return [ + {'schema': schema.upper(), 'value': identifier} + for schema, identifier in t_values + ] def _get_curated_relation(value): return value.get('y') == '1' or None def _get_emails(value): - return [el[6:] if el.startswith('email:') else el for el in force_list(value.get('m'))] + return [ + el[6:] if el.startswith('email:') else el + for el in force_list(value.get('m')) + ] def _get_full_names(value): return [full_name.strip(', ') for full_name in force_list(value.get('a'))] @@ -93,40 +98,52 @@ def _is_cern(j_value): i_values = force_list(value.get('i')) for i_value in i_values: - result.append({ - 'schema': 'INSPIRE ID', - 'value': i_value, - }) + result.append( + { + 'schema': 'INSPIRE ID', + 'value': i_value, + } + ) j_values = force_list(value.get('j')) for j_value in j_values: if _is_jacow(j_value): - result.append({ - 'schema': 'JACOW', - 'value': 'JACoW-' + j_value[6:], - }) + result.append( + { + 'schema': 'JACOW', + 'value': 'JACoW-' + j_value[6:], + } + ) elif _is_orcid(j_value): - result.append({ - 'schema': 'ORCID', - 'value': j_value[6:].replace('.', ''), - }) + result.append( + { + 'schema': 'ORCID', + 'value': j_value[6:].replace('.', ''), + } + ) elif _is_naked_orcid(j_value): - result.append({ - 'schema': 'ORCID', - 'value': j_value, - }) + result.append( + { + 'schema': 'ORCID', + 'value': j_value, + } + ) elif _is_cern(j_value): - result.append({ - 'schema': 'CERN', - 'value': 'CERN-' + j_value[5:], - }) + result.append( + { + 'schema': 'CERN', + 'value': 'CERN-' + j_value[5:], + } + ) w_values = force_list(value.get('w')) for w_value in w_values: - result.append({ - 'schema': 'INSPIRE BAI', - 'value': w_value, - }) + result.append( + { + 'schema': 'INSPIRE BAI', + 'value': w_value, + } + ) return dedupe_list(result) @@ -146,7 +163,9 @@ def _get_raw_affiliations(value): return dedupe_list([{'value': el} for el in force_list(value.get('v'))]) def _get_record(value): - return get_record_ref(maybe_int(force_single_element(value.get('x'))), 'authors') + return get_record_ref( + maybe_int(force_single_element(value.get('x'))), 'authors' + ) full_names = _get_full_names(value) if len(full_names) == 1: @@ -172,7 +191,8 @@ def _get_record(value): 'full_name': full_name, 'inspire_roles': _get_inspire_roles(value), 'raw_affiliations': _get_raw_affiliations(value), - } for full_name in full_names + } + for full_name in full_names ] @@ -218,13 +238,12 @@ def _get_ids(value): return ids def _get_affiliations(value): - return [ - aff.get('value') for aff in value.get('affiliations', []) - ] + return [aff.get('value') for aff in value.get('affiliations', [])] def _get_affiliations_identifiers(value): return [ - u'{}:{}'.format(aff.get('schema'), aff.get('value')) for aff in value.get('affiliations_identifiers', []) + u'{}:{}'.format(aff.get('schema'), aff.get('value')) + for aff in value.get('affiliations_identifiers', []) ] def _get_inspire_roles(value): @@ -232,9 +251,7 @@ def _get_inspire_roles(value): return ['ed.' for role in values if role == 'editor'] def _get_raw_affiliations(value): - return [ - aff.get('value') for aff in value.get('raw_affiliations', []) - ] + return [aff.get('value') for aff in value.get('raw_affiliations', [])] def get_value_100_700(value): ids = _get_ids(value) diff --git a/inspire_dojson/hep/rules/bd2xx.py b/inspire_dojson/hep/rules/bd2xx.py index e2df2513..cca61a0c 100644 --- a/inspire_dojson/hep/rules/bd2xx.py +++ b/inspire_dojson/hep/rules/bd2xx.py @@ -25,13 +25,11 @@ from __future__ import absolute_import, division, print_function import langdetect - from dojson import utils - from inspire_utils.helpers import force_list -from ..model import hep, hep2marc -from ...utils import normalize_date_aggressively +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import normalize_date_aggressively @hep.over('titles', '^(210|245|246|247)..') @@ -53,11 +51,14 @@ def titles(self, key, value): 'title': value.get('a'), } - self.setdefault('titles', []).insert(0, { - 'source': value.get('9'), - 'subtitle': value.get('b'), - 'title': value.get('a'), - }) + self.setdefault('titles', []).insert( + 0, + { + 'source': value.get('9'), + 'subtitle': value.get('b'), + 'title': value.get('a'), + }, + ) @hep.over('title_translations', '^242..') @@ -88,18 +89,21 @@ def titles2marc(self, key, values): """ first, rest = values[0], values[1:] - self.setdefault('245', []).append({ - 'a': first.get('title'), - 'b': first.get('subtitle'), - '9': first.get('source'), - }) + self.setdefault('245', []).append( + { + 'a': first.get('title'), + 'b': first.get('subtitle'), + '9': first.get('source'), + } + ) return [ { 'a': value.get('title'), 'b': value.get('subtitle'), '9': value.get('source'), - } for value in rest + } + for value in rest ] diff --git a/inspire_dojson/hep/rules/bd3xx.py b/inspire_dojson/hep/rules/bd3xx.py index 9913e12b..d3b00e93 100644 --- a/inspire_dojson/hep/rules/bd3xx.py +++ b/inspire_dojson/hep/rules/bd3xx.py @@ -26,8 +26,8 @@ from inspire_utils.helpers import maybe_int -from ..model import hep, hep2marc -from ...utils import force_single_element +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import force_single_element @hep.over('number_of_pages', '^300..') diff --git a/inspire_dojson/hep/rules/bd4xx.py b/inspire_dojson/hep/rules/bd4xx.py index d2999aba..c05479cc 100644 --- a/inspire_dojson/hep/rules/bd4xx.py +++ b/inspire_dojson/hep/rules/bd4xx.py @@ -26,7 +26,7 @@ from dojson import utils -from ..model import hep, hep2marc +from inspire_dojson.hep.model import hep, hep2marc @hep.over('book_series', '^490..') diff --git a/inspire_dojson/hep/rules/bd5xx.py b/inspire_dojson/hep/rules/bd5xx.py index a52cf25b..292aaf1a 100644 --- a/inspire_dojson/hep/rules/bd5xx.py +++ b/inspire_dojson/hep/rules/bd5xx.py @@ -27,13 +27,11 @@ import re from dojson import utils - from inspire_utils.date import normalize_date from inspire_utils.helpers import force_list, maybe_int -from ..model import hep, hep2marc -from ...utils import force_single_element, get_record_ref - +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import force_single_element, get_record_ref IS_DEFENSE_DATE = re.compile('Presented (on )?(?P.*)', re.IGNORECASE) @@ -44,6 +42,7 @@ def public_notes(self, key, value): Also populates the ``curated`` and ``thesis_info`` keys through side effects. """ + def _means_not_curated(public_note): return public_note in [ '*Brief entry*', @@ -58,24 +57,30 @@ def _means_not_curated(public_note): thesis_info = self.get('thesis_info', {}) source = force_single_element(value.get('9', '')) - for value in force_list(value): - for public_note in force_list(value.get('a')): + for current_value in force_list(value): + for public_note in force_list(current_value.get('a')): match = IS_DEFENSE_DATE.match(public_note) if match: try: - thesis_info['defense_date'] = normalize_date(match.group('defense_date')) + thesis_info['defense_date'] = normalize_date( + match.group('defense_date') + ) except ValueError: - public_notes.append({ - 'source': source, - 'value': public_note, - }) + public_notes.append( + { + 'source': source, + 'value': public_note, + } + ) elif _means_not_curated(public_note): self['curated'] = False else: - public_notes.append({ - 'source': source, - 'value': public_note, - }) + public_notes.append( + { + 'source': source, + 'value': public_note, + } + ) self['thesis_info'] = thesis_info return public_notes @@ -84,6 +89,7 @@ def _means_not_curated(public_note): @hep.over('thesis_info', '^502..') def thesis_info(self, key, value): """Populate the ``thesis_info`` key.""" + def _get_degree_type(value): DEGREE_TYPES_MAP = { 'RAPPORT DE STAGE': 'other', @@ -112,11 +118,14 @@ def _get_institutions(value): if len(c_values) != len(z_values): return [{'name': c_value} for c_value in c_values] else: - return [{ - 'curated_relation': True, - 'name': c_value, - 'record': get_record_ref(z_value, 'institutions'), - } for c_value, z_value in zip(c_values, z_values)] + return [ + { + 'curated_relation': True, + 'name': c_value, + 'record': get_record_ref(z_value, 'institutions'), + } + for c_value, z_value in zip(c_values, z_values) + ] thesis_info = self.get('thesis_info', {}) @@ -133,6 +142,7 @@ def thesis_info2marc(self, key, value): Also populates the ``500`` MARC field through side effects. """ + def _get_b_value(value): DEGREE_TYPES_MAP = { 'bachelor': 'Bachelor', @@ -152,9 +162,11 @@ def _get_b_value(value): result_502 = self.get('502', {}) if value.get('defense_date'): - result_500.append({ - 'a': u'Presented on {}'.format(value.get('defense_date')), - }) + result_500.append( + { + 'a': u'Presented on {}'.format(value.get('defense_date')), + } + ) result_502 = { 'b': _get_b_value(value), @@ -176,10 +188,12 @@ def abstracts(self, key, value): source = force_single_element(value.get('9')) for a_value in force_list(value.get('a')): - result.append({ - 'source': source, - 'value': a_value, - }) + result.append( + { + 'source': source, + 'value': a_value, + } + ) return result @@ -220,6 +234,7 @@ def funding_info2marc(self, key, value): @utils.for_each_value def license(self, key, value): """Populate the ``license`` key.""" + def _get_license(value): a_values = force_list(value.get('a')) @@ -301,6 +316,7 @@ def _private_notes(self, key, value): Also populates the ``_export_to`` key through side effects. """ + def _is_for_cds(value): normalized_c_values = [el.upper() for el in force_list(value.get('c'))] return 'CDS' in normalized_c_values @@ -316,21 +332,23 @@ def _is_not_for_hal(value): _private_notes = self.get('_private_notes', []) _export_to = self.get('_export_to', {}) - for value in force_list(value): - if _is_for_cds(value): + for current_value in force_list(value): + if _is_for_cds(current_value): _export_to['CDS'] = True - if _is_for_hal(value): + if _is_for_hal(current_value): _export_to['HAL'] = True - elif _is_not_for_hal(value): + elif _is_not_for_hal(current_value): _export_to['HAL'] = False - source = force_single_element(value.get('9')) - for _private_note in force_list(value.get('a')): - _private_notes.append({ - 'source': source, - 'value': _private_note, - }) + source = force_single_element(current_value.get('9')) + for _private_note in force_list(current_value.get('a')): + _private_notes.append( + { + 'source': source, + 'value': _private_note, + } + ) self['_export_to'] = _export_to return _private_notes @@ -343,6 +361,7 @@ def _private_notes2marc(self, key, value): Also populates the `595_H` MARC key through side effects. """ + def _is_from_hal(value): return value.get('source') == 'HAL' @@ -358,6 +377,7 @@ def _is_from_hal(value): @hep2marc.over('595', '^_export_to$') def _export_to2marc(self, key, value): """Populate the ``595`` MARC field.""" + def _is_for_cds(value): return 'CDS' in value @@ -405,10 +425,7 @@ def _desy_bookkeeping2marc(self, key, value): 's': value.get('status'), } - self.setdefault('035', []).append({ - '9': 'DESY', - 'z': value['identifier'] - }) + self.setdefault('035', []).append({'9': 'DESY', 'z': value['identifier']}) @hep.over('_private_notes', '^595.H') @@ -420,5 +437,6 @@ def _private_notes_hal(self, key, value): { 'source': 'HAL', 'value': _private_note, - } for _private_note in force_list(value.get('a')) + } + for _private_note in force_list(value.get('a')) ] diff --git a/inspire_dojson/hep/rules/bd6xx.py b/inspire_dojson/hep/rules/bd6xx.py index f58aebbc..3ef3871f 100644 --- a/inspire_dojson/hep/rules/bd6xx.py +++ b/inspire_dojson/hep/rules/bd6xx.py @@ -25,13 +25,11 @@ from __future__ import absolute_import, division, print_function import six - from dojson import utils - from inspire_utils.helpers import force_list -from ..model import hep, hep2marc -from ...utils import force_single_element, get_record_ref +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import force_single_element, get_record_ref ENERGY_RANGES_MAP = { '1': '0-3 GeV', @@ -65,10 +63,12 @@ def accelerator_experiments(self, key, value): # we might match a value with the wrong recid. if len(e_values) == len(zero_values): for e_value, zero_value in zip(e_values, zero_values): - result.append({ - 'legacy_name': e_value, - 'record': get_record_ref(zero_value, 'experiments'), - }) + result.append( + { + 'legacy_name': e_value, + 'record': get_record_ref(zero_value, 'experiments'), + } + ) else: for e_value in e_values: result.append({'legacy_name': e_value}) @@ -92,6 +92,7 @@ def keywords(self, key, values): Also populates the ``energy_ranges`` key through side effects. """ + def _get_source(value): sources = force_list(value.get('9')) if 'conference' in sources: @@ -105,7 +106,8 @@ def _get_source(value): values = force_list(values) automatic_keywords = any( a_value.lower() == '* automatic keywords *' - for value in values for a_value in force_list(value.get('a')) + for value in values + for a_value in force_list(value.get('a')) ) for value in values: @@ -120,11 +122,13 @@ def _get_source(value): for a_value in a_values: if a_value.lower() == '* automatic keywords *': continue - keywords.append({ - 'schema': schema, - 'source': source, - 'value': a_value, - }) + keywords.append( + { + 'schema': schema, + 'source': source, + 'value': a_value, + } + ) if value.get('e'): energy_ranges.append(ENERGY_RANGES_MAP.get(value.get('e'))) @@ -164,40 +168,53 @@ def keywords2marc(self, key, values): keyword = value.get('value') if schema == 'PACS' or schema == 'PDG': - result_084.append({ - '2': schema, - '9': source, - 'a': keyword, - }) + result_084.append( + { + '2': schema, + '9': source, + 'a': keyword, + } + ) elif schema == 'JACOW': - result_6531.append({ - '2': 'JACoW', - '9': source, - 'a': keyword, - }) + result_6531.append( + { + '2': 'JACoW', + '9': source, + 'a': keyword, + } + ) elif schema == 'INSPIRE': - result_695.append({ - '2': 'INSPIRE', - '9': source, - 'a': keyword, - }) + result_695.append( + { + '2': 'INSPIRE', + '9': source, + 'a': keyword, + } + ) elif schema == 'INIS': - result_695.append({ - '2': 'INIS', - '9': source, - 'a': keyword, - }) + result_695.append( + { + '2': 'INIS', + '9': source, + 'a': keyword, + } + ) elif source != 'magpie': - result_6531.append({ - '9': source, - 'a': keyword, - }) + result_6531.append( + { + '9': source, + 'a': keyword, + } + ) if automatic_keywords: - result_695.insert(0, { - '2': 'INSPIRE', - 'a': '* Automatic Keywords *', - }) + result_695.insert( + 0, + { + '2': 'INSPIRE', + 'a': '* Automatic Keywords *', + }, + ) self['6531'] = result_6531 self['084'] = result_084 diff --git a/inspire_dojson/hep/rules/bd7xx.py b/inspire_dojson/hep/rules/bd7xx.py index 5a85c24b..7b01b2d7 100644 --- a/inspire_dojson/hep/rules/bd7xx.py +++ b/inspire_dojson/hep/rules/bd7xx.py @@ -25,7 +25,6 @@ from __future__ import absolute_import, division, print_function from dojson import utils - from inspire_schemas.api import load_schema from inspire_schemas.utils import ( convert_new_publication_info_to_old, @@ -34,8 +33,8 @@ ) from inspire_utils.helpers import force_list, maybe_int -from ..model import hep, hep2marc -from ...utils import ( +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import ( force_single_element, get_recid_from_ref, get_record_ref, @@ -53,10 +52,12 @@ def collaborations(self, key, value): for g_value in force_list(value.get('g')): collaborations = normalize_collaboration(g_value) if len(collaborations) == 1: - result.append({ - 'record': get_record_ref(maybe_int(value.get('0')), 'experiments'), - 'value': collaborations[0], - }) + result.append( + { + 'record': get_record_ref(maybe_int(value.get('0')), 'experiments'), + 'value': collaborations[0], + } + ) else: result.extend({'value': collaboration} for collaboration in collaborations) @@ -74,6 +75,7 @@ def collaborations2marc(self, key, value): @utils.for_each_value def publication_info(self, key, value): """Populate the ``publication_info`` key.""" + def _get_cnum(value): w_value = force_single_element(value.get('w', '')) normalized_w_value = w_value.replace('/', '-').upper() @@ -226,14 +228,20 @@ def related_records2marc(self, key, value): 'w': get_recid_from_ref(value.get('record')), } elif value.get('relation') == 'successor': - self.setdefault('78502', []).append({ - 'i': 'superseded by', - 'w': get_recid_from_ref(value.get('record')), - }) + self.setdefault('78502', []).append( + { + 'i': 'superseded by', + 'w': get_recid_from_ref(value.get('record')), + } + ) elif value.get('relation') == 'predecessor': - self.setdefault('78002', []).append({ - 'i': 'supersedes', - 'w': get_recid_from_ref(value.get('record')), - }) + self.setdefault('78002', []).append( + { + 'i': 'supersedes', + 'w': get_recid_from_ref(value.get('record')), + } + ) else: - raise NotImplementedError(u"Unhandled relation in related_records: {}".format(value.get('relation'))) + raise NotImplementedError( + u"Unhandled relation in related_records: {}".format(value.get('relation')) + ) diff --git a/inspire_dojson/hep/rules/bd9xx.py b/inspire_dojson/hep/rules/bd9xx.py index 8ef04f31..1c358818 100644 --- a/inspire_dojson/hep/rules/bd9xx.py +++ b/inspire_dojson/hep/rules/bd9xx.py @@ -28,7 +28,6 @@ from dojson import utils from idutils import is_arxiv_post_2007 - from inspire_schemas.api import ReferenceBuilder, load_schema from inspire_schemas.utils import ( build_pubnote, @@ -38,8 +37,12 @@ from inspire_utils.helpers import force_list, maybe_int from inspire_utils.record import get_value -from ..model import hep, hep2marc -from ...utils import force_single_element, get_recid_from_ref, get_record_ref +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import ( + force_single_element, + get_recid_from_ref, + get_record_ref, +) COLLECTIONS_MAP = { 'babar-analysisdocument': 'BABAR Analysis Documents', @@ -170,7 +173,9 @@ def document_type(self, key, value): elif normalized_a_value == 'deleted': self['deleted'] = True elif normalized_a_value in COLLECTIONS_MAP: - self.setdefault('_collections', []).append(COLLECTIONS_MAP[normalized_a_value]) + self.setdefault('_collections', []).append( + COLLECTIONS_MAP[normalized_a_value] + ) elif normalized_a_value in DOCUMENT_TYPE_MAP: document_type.append(DOCUMENT_TYPE_MAP[normalized_a_value]) elif normalized_a_value in valid_publication_types: @@ -255,14 +260,15 @@ def publication_type2marc(self, key, value): @utils.for_each_value def references(self, key, value): """Populate the ``references`` key.""" + def _has_curator_flag(value): normalized_nine_values = [el.upper() for el in force_list(value.get('9'))] return 'CURATOR' in normalized_nine_values def _is_curated(value): - is_explicitly_curated = ( - force_single_element(value.get('z')) == '1' and _has_curator_flag(value) - ) + is_explicitly_curated = force_single_element( + value.get('z') + ) == '1' and _has_curator_flag(value) has_only_0_and_z = set(value.keys()) == {'0', 'z'} return is_explicitly_curated or has_only_0_and_z @@ -323,8 +329,16 @@ def references2marc(self, key, value): external_ids = force_list(reference.get('external_system_identifiers')) u_values = force_list(get_value(reference, 'urls.value')) - u_values.extend(CDS_RECORD_FORMAT.format(el['value']) for el in external_ids if el.get('schema') == 'CDS') - u_values.extend(ADS_RECORD_FORMAT.format(el['value']) for el in external_ids if el.get('schema') == 'ADS') + u_values.extend( + CDS_RECORD_FORMAT.format(el['value']) + for el in external_ids + if el.get('schema') == 'CDS' + ) + u_values.extend( + ADS_RECORD_FORMAT.format(el['value']) + for el in external_ids + if el.get('schema') == 'ADS' + ) authors = force_list(reference.get('authors')) e_values = [el['full_name'] for el in authors if el.get('inspire_role') == 'editor'] @@ -333,10 +347,16 @@ def references2marc(self, key, value): r_values = force_list(reference.get('report_numbers')) if reference.get('arxiv_eprint'): arxiv_eprint = reference['arxiv_eprint'] - r_values.append('arXiv:' + arxiv_eprint if is_arxiv_post_2007(arxiv_eprint) else arxiv_eprint) + r_values.append( + 'arXiv:' + arxiv_eprint + if is_arxiv_post_2007(arxiv_eprint) + else arxiv_eprint + ) if reference.get('publication_info'): - reference['publication_info'] = convert_new_publication_info_to_old([reference['publication_info']])[0] + reference['publication_info'] = convert_new_publication_info_to_old( + [reference['publication_info']] + )[0] journal_title = get_value(reference, 'publication_info.journal_title') journal_volume = get_value(reference, 'publication_info.journal_volume') page_start = get_value(reference, 'publication_info.page_start') diff --git a/inspire_dojson/hep/rules/bdFFT.py b/inspire_dojson/hep/rules/bdFFT.py index bded0913..2be81aa5 100644 --- a/inspire_dojson/hep/rules/bdFFT.py +++ b/inspire_dojson/hep/rules/bdFFT.py @@ -28,11 +28,10 @@ import re from dojson import utils - from inspire_utils.helpers import force_list -from ..model import hep, hep2marc -from ...utils import absolute_url, afs_url, afs_url_to_path +from inspire_dojson.hep.model import hep, hep2marc +from inspire_dojson.utils import absolute_url, afs_url, afs_url_to_path @hep.over('documents', '^FFT[^%][^%]') @@ -42,8 +41,13 @@ def documents(self, key, value): Also populates the ``figures`` key through side effects. """ + def _is_hidden(value): - return 'HIDDEN' in [val.upper() for val in force_list(value.get('o'))] or _get_source(value) == 'arxiv' or None + return ( + 'HIDDEN' in [val.upper() for val in force_list(value.get('o'))] + or _get_source(value) == 'arxiv' + or None + ) def _is_figure(value): return value.get('f', "").endswith(".png") @@ -81,13 +85,15 @@ def _get_source(value): if _is_figure(value): index, caption = _get_index_and_caption(value.get('d', '')) - figures.append({ - 'key': _get_key(value), - 'caption': caption, - 'url': afs_url(value.get('a')), - 'order': index, - 'source': 'arxiv', # XXX: we don't have any other figures on legacy - }) + figures.append( + { + 'key': _get_key(value), + 'caption': caption, + 'url': afs_url(value.get('a')), + 'order': index, + 'source': 'arxiv', # XXX: we don't have any other figures on legacy + } + ) self['figures'] = figures else: return { @@ -124,7 +130,10 @@ def _get_hidden(value): def _get_filename_and_extension(value): file_name, extension = os.path.splitext(value.get('filename', value['key'])) - if file_name == "document" and value.get("material", "publication") != "publication": + if ( + file_name == "document" + and value.get("material", "publication") != "publication" + ): file_name = value["material"] return file_name, extension @@ -145,12 +154,14 @@ def figures2marc(self, key, values): fft = self.setdefault('FFT', []) for index, value in enumerate(values): file_name, extension = os.path.splitext(value.get('filename', value['key'])) - fft.append({ - 'd': u'{:05d} {}'.format(index, value.get('caption')), - 'a': afs_url_to_path(absolute_url(value.get('url'))), - 't': 'Plot', - 'n': file_name, - 'f': extension, - }) + fft.append( + { + 'd': u'{:05d} {}'.format(index, value.get('caption')), + 'a': afs_url_to_path(absolute_url(value.get('url'))), + 't': 'Plot', + 'n': file_name, + 'f': extension, + } + ) return fft diff --git a/inspire_dojson/hepnames/__init__.py b/inspire_dojson/hepnames/__init__.py index af6754d7..5292f163 100644 --- a/inspire_dojson/hepnames/__init__.py +++ b/inspire_dojson/hepnames/__init__.py @@ -24,5 +24,5 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 -from .model import hepnames, hepnames2marc # noqa: F401 +from inspire_dojson.hepnames import rules # noqa: F401 +from inspire_dojson.hepnames.model import hepnames, hepnames2marc # noqa: F401 diff --git a/inspire_dojson/hepnames/model.py b/inspire_dojson/hepnames/model.py index 07de915c..2a5f1ead 100644 --- a/inspire_dojson/hepnames/model.py +++ b/inspire_dojson/hepnames/model.py @@ -24,8 +24,13 @@ from __future__ import absolute_import, division, print_function -from ..model import FilterOverdo, add_schema, add_collection, clean_marc, clean_record - +from inspire_dojson.model import ( + FilterOverdo, + add_collection, + add_schema, + clean_marc, + clean_record, +) hepnames_filters = [ add_schema('authors.json'), diff --git a/inspire_dojson/hepnames/rules.py b/inspire_dojson/hepnames/rules.py index 2ac13b8a..b62a8cf9 100644 --- a/inspire_dojson/hepnames/rules.py +++ b/inspire_dojson/hepnames/rules.py @@ -27,7 +27,6 @@ import re from dojson import utils - from inspire_schemas.api import load_schema from inspire_schemas.utils import ( normalize_arxiv_category, @@ -37,22 +36,23 @@ from inspire_utils.helpers import force_list, maybe_int from inspire_utils.name import normalize_name -from .model import hepnames, hepnames2marc -from ..utils import ( +from inspire_dojson.hepnames.model import hepnames, hepnames2marc +from inspire_dojson.utils import ( force_single_element, - get_record_ref, get_recid_from_ref, + get_record_ref, normalize_rank, quote_url, - unquote_url + unquote_url, ) - AWARD_YEAR = re.compile(r'\(?(?P\d{4})\)?') INSPIRE_BAI = re.compile(r'(\w+\.)+\d+') LOOKS_LIKE_CERN = re.compile(r'^\d+$|^CER[MN]?-|^CNER-|^CVERN-', re.I) NON_DIGIT = re.compile(r'[^\d]+') -LINKEDIN_URL = re.compile(r'https?://(\w+\.)?linkedin\.com/in/(?P[\w%-]+)', re.UNICODE) +LINKEDIN_URL = re.compile( + r'https?://(\w+\.)?linkedin\.com/in/(?P[\w%-]+)', re.UNICODE +) TWITTER_URL = re.compile(r'https?://(www\.)?twitter\.com/(?P\w+)') WIKIPEDIA_URL = re.compile(r'https?://(?P\w+)\.wikipedia\.org/wiki/(?P.*)') @@ -111,15 +111,20 @@ def _try_to_correct_value(schema, value): z_value = _try_to_correct_value(schema, z_value) if schema and a_value: - ids.insert(0, { - 'schema': schema, - 'value': a_value, - }) + ids.insert( + 0, + { + 'schema': schema, + 'value': a_value, + }, + ) if schema and z_value: - ids.append({ - 'schema': schema, - 'value': z_value, - }) + ids.append( + { + 'schema': schema, + 'value': z_value, + } + ) return ids @@ -130,11 +135,9 @@ def ids2marc(self, key, values): Also populates the ``8564`` and ``970`` MARC field through side effects. """ + def _convert_schema(schema): - conversion = { - 'INSPIRE BAI': 'BAI', - 'INSPIRE ID': 'INSPIRE' - } + conversion = {'INSPIRE BAI': 'BAI', 'INSPIRE ID': 'INSPIRE'} return conversion.get(schema, schema) def _is_schema_spires(id_, schema): @@ -175,10 +178,12 @@ def _is_schema_twitter(id, schema): field = 'a' else: field = 'z' - result.append({ - field: id_, - '9': _convert_schema(schema), - }) + result.append( + { + field: id_, + '9': _convert_schema(schema), + } + ) return result @@ -187,8 +192,10 @@ def _is_schema_twitter(id, schema): def name(self, key, value): """Populate the ``name`` key. - Also populates the ``status``, ``birth_date`` and ``death_date`` keys through side effects. + Also populates the ``status``, ``birth_date`` and ``death_date`` + keys through side effects. """ + def _get_title(value): c_value = force_single_element(value.get('c', '')) if c_value != 'title (e.g. Sir)': @@ -280,14 +287,20 @@ def positions(self, key, value): current_email_addresses = force_list(value.get('m')) non_current_email_addresses = force_list(value.get('o')) - email_addresses.extend({ - 'value': address, - 'current': True, - } for address in current_email_addresses) - email_addresses.extend({ - 'value': address, - 'current': False, - } for address in non_current_email_addresses) + email_addresses.extend( + { + 'value': address, + 'current': True, + } + for address in current_email_addresses + ) + email_addresses.extend( + { + 'value': address, + 'current': False, + } + for address in non_current_email_addresses + ) self['email_addresses'] = email_addresses @@ -345,9 +358,7 @@ def email_addresses2marc(self, key, value): Also populates the 371 field as a side effect. """ m_or_o = 'm' if value.get('current') else 'o' - element = { - m_or_o: value.get('value') - } + element = {m_or_o: value.get('value')} if value.get('hidden'): return element @@ -365,25 +376,30 @@ def email_addresses595(self, key, value): emails = self.get('email_addresses', []) if value.get('o'): - emails.append({ - 'value': value.get('o'), - 'current': False, - 'hidden': True, - }) + emails.append( + { + 'value': value.get('o'), + 'current': False, + 'hidden': True, + } + ) if value.get('m'): - emails.append({ - 'value': value.get('m'), - 'current': True, - 'hidden': True, - }) + emails.append( + { + 'value': value.get('m'), + 'current': True, + 'hidden': True, + } + ) notes = self.get('_private_notes', []) new_note = ( { 'source': value.get('9'), 'value': _private_note, - } for _private_note in force_list(value.get('a')) + } + for _private_note in force_list(value.get('a')) ) notes.extend(new_note) self['_private_notes'] = notes @@ -393,7 +409,6 @@ def email_addresses595(self, key, value): @hepnames.over('name', '^400..') def name_variants(self, key, value): - name_item = self.get('name', {}) name_variants_list = name_item.get('name_variants', []) @@ -409,6 +424,7 @@ def arxiv_categories(self, key, value): Also populates the ``inspire_categories`` key through side effects. """ + def _is_arxiv(category): return category in valid_arxiv_categories() @@ -452,8 +468,8 @@ def _normalize(a_value): arxiv_categories = self.get('arxiv_categories', []) inspire_categories = self.get('inspire_categories', []) - for value in force_list(value): - for a_value in force_list(value.get('a')): + for current_value in force_list(value): + for a_value in force_list(current_value.get('a')): normalized_a_value = _normalize(a_value) if _is_arxiv(normalized_a_value): @@ -535,10 +551,7 @@ def birth_and_death_date2marc(self, key, value): def awards(self, key, value): award = AWARD_YEAR.sub('', value.get('a')).strip() year_match = AWARD_YEAR.search(value.get('a')) - if year_match: - year = int(year_match.group('year')) - else: - year = None + year = int(year_match.group('year')) if year_match else None return { 'name': award, @@ -552,7 +565,7 @@ def awards(self, key, value): def awards2marc(self, key, value): return { 'a': ' '.join([value.get('name', ''), str(value.get('year', ''))]).strip(), - 'u': value.get('url') + 'u': value.get('url'), } @@ -571,10 +584,7 @@ def _get_json_experiments(marc_dict): record = get_record_ref(recid, 'experiments') yield { 'curated_relation': record is not None, - 'current': ( - True if marc_dict.get('z', '').lower() == 'current' - else False - ), + 'current': (marc_dict.get('z', '').lower() == 'current'), 'end_date': end_year, 'name': name, 'record': record, @@ -643,10 +653,13 @@ def _get_id_schema(id_): recid = force_single_element(value.get('x')) record = get_record_ref(recid, 'authors') - ids = [{ - 'schema': _get_id_schema(id_), - 'value': id_, - } for id_ in force_list(value.get('i'))] + ids = [ + { + 'schema': _get_id_schema(id_), + 'value': id_, + } + for id_ in force_list(value.get('i')) + ] hidden = value.get('h') == 'HIDDEN' or None @@ -656,7 +669,7 @@ def _get_id_schema(id_): 'ids': ids, 'record': record, 'hidden': hidden, - 'curated_relation': value.get('y') == '1' if record else None + 'curated_relation': value.get('y') == '1' if record else None, } @@ -670,7 +683,7 @@ def advisors2marc(self, key, value): 'a': value.get('name'), 'g': value.get('degree_type'), 'i': ids, - 'h': 'HIDDEN' if value.get('hidden') else None + 'h': 'HIDDEN' if value.get('hidden') else None, } @@ -735,14 +748,16 @@ def new_record(self, key, value): new_record = self.get('new_record', {}) ids = self.get('ids', []) - for value in force_list(value): - for id_ in force_list(value.get('a')): - ids.append({ - 'schema': 'SPIRES', - 'value': id_, - }) + for current_value in force_list(value): + for id_ in force_list(current_value.get('a')): + ids.append( + { + 'schema': 'SPIRES', + 'value': id_, + } + ) - new_recid = force_single_element(value.get('d', '')) + new_recid = force_single_element(current_value.get('d', '')) if new_recid: new_record = get_record_ref(new_recid, 'authors') @@ -756,18 +771,19 @@ def deleted(self, key, value): Also populates the ``stub`` key through side effects. """ + def _is_deleted(value): return force_single_element(value.get('c', '')).upper() == 'DELETED' def _is_stub(value): - return not (force_single_element(value.get('a', '')).upper() == 'USEFUL') + return force_single_element(value.get('a', '')).upper() != 'USEFUL' deleted = self.get('deleted') stub = self.get('stub') - for value in force_list(value): - deleted = not deleted and _is_deleted(value) - stub = not stub and _is_stub(value) + for current_value in force_list(value): + deleted = not deleted and _is_deleted(current_value) + stub = not stub and _is_stub(current_value) self['stub'] = stub return deleted diff --git a/inspire_dojson/institutions/__init__.py b/inspire_dojson/institutions/__init__.py index 783e9d42..70024b7a 100644 --- a/inspire_dojson/institutions/__init__.py +++ b/inspire_dojson/institutions/__init__.py @@ -24,5 +24,5 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 -from .model import institutions # noqa: F401 +from inspire_dojson.institutions import rules # noqa: F401 +from inspire_dojson.institutions.model import institutions # noqa: F401 diff --git a/inspire_dojson/institutions/model.py b/inspire_dojson/institutions/model.py index 287e5bfe..58472249 100644 --- a/inspire_dojson/institutions/model.py +++ b/inspire_dojson/institutions/model.py @@ -24,7 +24,12 @@ from __future__ import absolute_import, division, print_function -from ..model import FilterOverdo, add_schema, add_collection, clean_record +from inspire_dojson.model import ( + FilterOverdo, + add_collection, + add_schema, + clean_record, +) def combine_addresses_and_location(record, blob): diff --git a/inspire_dojson/institutions/rules.py b/inspire_dojson/institutions/rules.py index f9d618d0..b13820bd 100644 --- a/inspire_dojson/institutions/rules.py +++ b/inspire_dojson/institutions/rules.py @@ -27,13 +27,11 @@ import re from dojson import utils - from inspire_utils.helpers import force_list, maybe_float, maybe_int -from .model import institutions -from ..utils import force_single_element, get_record_ref -from ..utils.geo import parse_institution_address - +from inspire_dojson.institutions.model import institutions +from inspire_dojson.utils import force_single_element, get_record_ref +from inspire_dojson.utils.geo import parse_institution_address ACRONYM = re.compile(r'\s*\((.*)\)\s*$') @@ -78,38 +76,44 @@ def _split_acronym(value): institution_hierarchy = self.get('institution_hierarchy', []) related_records = self.get('related_records', []) - for value in force_list(value): + for current_value in force_list(value): ICN.extend(force_list(value.get('t'))) if not legacy_ICN: - legacy_ICN = force_single_element(value.get('u')) + legacy_ICN = force_single_element(current_value.get('u')) - for b_value in force_list(value.get('b')): + for b_value in force_list(current_value.get('b')): department_name, department_acronym = _split_acronym(b_value) - institution_hierarchy.append({ - 'acronym': department_acronym, - 'name': department_name, - }) - - for a_value in force_list(value.get('a')): + institution_hierarchy.append( + { + 'acronym': department_acronym, + 'name': department_name, + } + ) + + for a_value in force_list(current_value.get('a')): institution_name, institution_acronym = _split_acronym(a_value) - institution_hierarchy.append({ - 'acronym': institution_acronym, - 'name': institution_name, - }) + institution_hierarchy.append( + { + 'acronym': institution_acronym, + 'name': institution_name, + } + ) - x_values = force_list(value.get('x')) - z_values = force_list(value.get('z')) + x_values = force_list(current_value.get('x')) + z_values = force_list(current_value.get('z')) # XXX: we zip only when they have the same length, otherwise # we might match a relation with the wrong recid. if len(x_values) == len(z_values): for _, recid in zip(x_values, z_values): - related_records.append({ - 'curated_relation': True, - 'record': get_record_ref(recid, 'institutions'), - 'relation_freetext': 'obsolete', - }) + related_records.append( + { + 'curated_relation': True, + 'record': get_record_ref(recid, 'institutions'), + 'relation_freetext': 'obsolete', + } + ) self['related_records'] = related_records self['institution_hierarchy'] = institution_hierarchy @@ -159,10 +163,7 @@ def institution_type(self, key, value): @institutions.over('name_variants', '^410..') def name_variants(self, key, value): - valid_sources = [ - 'ADS', - 'INSPIRE' - ] + valid_sources = ['ADS', 'INSPIRE'] if value.get('9') and value.get('9') not in valid_sources: return self.get('name_variants', []) @@ -175,10 +176,12 @@ def name_variants(self, key, value): source = force_single_element(value.get('9')) for name_variant in force_list(value.get('a')): - name_variants.append({ - 'source': source, - 'value': name_variant, - }) + name_variants.append( + { + 'source': source, + 'value': name_variant, + } + ) return name_variants diff --git a/inspire_dojson/journals/__init__.py b/inspire_dojson/journals/__init__.py index 1add2ab8..babc2907 100644 --- a/inspire_dojson/journals/__init__.py +++ b/inspire_dojson/journals/__init__.py @@ -24,5 +24,5 @@ from __future__ import absolute_import, division, print_function -from . import rules # noqa: F401 -from .model import journals # noqa: F401 +from inspire_dojson.journals import rules # noqa: F401 +from inspire_dojson.journals.model import journals # noqa: F401 diff --git a/inspire_dojson/journals/model.py b/inspire_dojson/journals/model.py index 4b8ef804..1467f64f 100644 --- a/inspire_dojson/journals/model.py +++ b/inspire_dojson/journals/model.py @@ -24,8 +24,12 @@ from __future__ import absolute_import, division, print_function -from ..model import FilterOverdo, add_collection, add_schema, clean_record - +from inspire_dojson.model import ( + FilterOverdo, + add_collection, + add_schema, + clean_record, +) filters = [ add_schema('journals.json'), diff --git a/inspire_dojson/journals/rules.py b/inspire_dojson/journals/rules.py index a9470c8c..7f8ad78e 100644 --- a/inspire_dojson/journals/rules.py +++ b/inspire_dojson/journals/rules.py @@ -26,12 +26,11 @@ from dojson import utils from idutils import normalize_issn - from inspire_utils.date import normalize_date from inspire_utils.helpers import force_list, maybe_int -from .model import journals -from ..utils import get_record_ref +from inspire_dojson.journals.model import journals +from inspire_dojson.utils import get_record_ref @journals.over('issns', '^022..') @@ -152,7 +151,8 @@ def _private_notes(self, key, value): { 'source': value.get('9'), 'value': _private_note, - } for _private_note in force_list(value.get('x')) + } + for _private_note in force_list(value.get('x')) ] diff --git a/inspire_dojson/model.py b/inspire_dojson/model.py index b19d01d0..2a7a593c 100644 --- a/inspire_dojson/model.py +++ b/inspire_dojson/model.py @@ -32,15 +32,13 @@ from dojson import Overdo from dojson.errors import IgnoreKey - from six import raise_from -from .errors import DoJsonError -from .utils import dedupe_all_lists, strip_empty_values +from inspire_dojson.errors import DoJsonError +from inspire_dojson.utils import dedupe_all_lists, strip_empty_values class FilterOverdo(Overdo): - def __init__(self, filters=None, *args, **kwargs): super(FilterOverdo, self).__init__(*args, **kwargs) self.filters = filters or [] @@ -55,7 +53,9 @@ def do(self, blob, **kwargs): def over(self, name, *source_tags): def decorator(creator): - return super(FilterOverdo, self).over(name, *source_tags)(self._wrap_exception(creator, name)) + return super(FilterOverdo, self).over(name, *source_tags)( + self._wrap_exception(creator, name) + ) return decorator @@ -68,9 +68,16 @@ def func(self, key, value): except Exception as exc: if type(exc) is IgnoreKey: raise exc - raise_from(DoJsonError( - u'Error in rule "{name}" for field "{key}"'.format(name=name, key=key), exc.args, value - ), exc) + raise_from( + DoJsonError( + u'Error in rule "{name}" for field "{key}"'.format( + name=name, key=key + ), + exc.args, + value, + ), + exc, + ) return func @@ -98,4 +105,5 @@ def clean_marc(record, blob): def clean_record(exclude_keys=()): def _clean_record(record, blob): return dedupe_all_lists(strip_empty_values(record), exclude_keys=exclude_keys) + return _clean_record diff --git a/inspire_dojson/utils/__init__.py b/inspire_dojson/utils/__init__.py index de4138b4..71a7f58a 100644 --- a/inspire_dojson/utils/__init__.py +++ b/inspire_dojson/utils/__init__.py @@ -27,16 +27,14 @@ import os import re -from flask import current_app -from isbn import ISBN -from six import binary_type, iteritems, text_type -from six.moves import urllib - from dojson.utils import GroupableOrderedDict - +from flask import current_app from inspire_utils.date import normalize_date from inspire_utils.dedupers import dedupe_list, dedupe_list_of_dicts from inspire_utils.helpers import force_list, maybe_int +from isbn import ISBN +from six import binary_type, iteritems, text_type +from six.moves import urllib DEFAULT_AFS_PATH = '/afs/cern.ch/project/inspire/PROD' @@ -136,9 +134,14 @@ def afs_url(file_path): if process_path: if afs_service: - return os.path.join(afs_service, urllib.request.pathname2url(file_path.encode('utf-8'))) + return os.path.join( + afs_service, + urllib.request.pathname2url(file_path.encode('utf-8')), + ) file_path = os.path.join(afs_path, file_path) - return urllib.parse.urljoin('file://', urllib.request.pathname2url(file_path.encode('utf-8'))) + return urllib.parse.urljoin( + 'file://', urllib.request.pathname2url(file_path.encode('utf-8')) + ) return file_path @@ -161,7 +164,7 @@ def afs_url_to_path(url): if not afs_service or not url.startswith(afs_service): return url - path = url[len(afs_service):].lstrip('/') + path = url[len(afs_service) :].lstrip('/') return urllib.parse.urljoin('file://', os.path.join(afs_path, path)) @@ -227,6 +230,7 @@ def dedupe_all_lists(obj, exclude_keys=()): def normalize_date_aggressively(date): """Normalize date, stripping date parts until a valid date is obtained.""" + def _strip_last_part(date): parts = date.split('-') return '-'.join(parts[:-1]) diff --git a/inspire_dojson/utils/geo.py b/inspire_dojson/utils/geo.py index 71739b46..596b079e 100644 --- a/inspire_dojson/utils/geo.py +++ b/inspire_dojson/utils/geo.py @@ -25,10 +25,8 @@ from __future__ import absolute_import, division, print_function import six - from inspire_utils.helpers import force_list - country_to_iso_code = { 'AFGHANISTAN': 'AF', 'ÅLAND ISLANDS': 'AX', @@ -295,7 +293,7 @@ 'FR': ['FX'], 'GB': ['UK'], 'TL': ['TP'], - 'CD': ['ZR'] + 'CD': ['ZR'], } countries_alternative_spellings = { @@ -311,13 +309,26 @@ 'CN': ['PR CHINA'], 'CS': ['CZECHSOLVAKIA'], 'CZ': ['PRAGUE'], - 'DE': ['DEUTSCHLAND', 'WEST GERMANY', 'EAST GERMANY', 'BAVARIA', - 'GERMANY (DESY)'], + 'DE': [ + 'DEUTSCHLAND', + 'WEST GERMANY', + 'EAST GERMANY', + 'BAVARIA', + 'GERMANY (DESY)', + ], 'ES': ['CANARY ISLANDS', 'MADRID'], 'FR': ['CORSICA'], 'GR': ['CRETE'], - 'GB': ['UK', 'ENGLAND', 'ENG', 'SCOTLAND', 'WALES', 'SCOTLAND/UK', - 'NORTHERN IRELAND', 'LONDON'], + 'GB': [ + 'UK', + 'ENGLAND', + 'ENG', + 'SCOTLAND', + 'WALES', + 'SCOTLAND/UK', + 'NORTHERN IRELAND', + 'LONDON', + ], 'ID': ['BALI'], 'IL': ['JERUSALEM'], 'IR': ['IRAN'], @@ -333,9 +344,13 @@ 'VE': ['VENEZUELA'], 'VN': ['VIETNAM'], 'US': ['UNITED STATES OF AMERICA', 'UNITED STATES', 'US', 'USA'], - 'ZA': ['SAFRICA'] + 'ZA': ['SAFRICA'], +} +countries_from_alternative_spellings = { + spelling: code + for (code, spellings) in countries_alternative_spellings.items() + for spelling in spellings } -countries_from_alternative_spellings = {spelling: code for (code, spellings) in countries_alternative_spellings.items() for spelling in spellings} us_state_to_iso_code = { @@ -389,7 +404,7 @@ 'WASHINGTON': 'WA', 'WEST VIRGINIA': 'WV', 'WISCONSIN': 'WI', - 'WYOMING': 'WY' + 'WYOMING': 'WY', } us_states_alternative_spellings = { @@ -445,12 +460,29 @@ 'WI': ['WI', 'WIS', 'WISC'], 'WY': ['WY'], } -us_states_from_alternative_spellings = {spelling: state for (state, spellings) in us_states_alternative_spellings.items() for spelling in spellings} +us_states_from_alternative_spellings = { + spelling: state + for (state, spellings) in us_states_alternative_spellings.items() + for spelling in spellings +} -south_korean_cities = ['SEOUL', 'DAEJON', 'DAEJEON', 'MT SORAK', 'POHANG', - 'JEJU ISLAND', 'CHEJU ISLAND', 'GYEONGJU', 'BUSAN', - 'DAEGU', 'GYEONGIU', 'PUSAN', 'YONGPYONG', - 'PHOENIX PARK', 'CHEJU ISLAND'] +south_korean_cities = [ + 'SEOUL', + 'DAEJON', + 'DAEJEON', + 'MT SORAK', + 'POHANG', + 'JEJU ISLAND', + 'CHEJU ISLAND', + 'GYEONGJU', + 'BUSAN', + 'DAEGU', + 'GYEONGIU', + 'PUSAN', + 'YONGPYONG', + 'PHOENIX PARK', + 'CHEJU ISLAND', +] def match_country_code(original_code): @@ -459,7 +491,10 @@ def match_country_code(original_code): if iso_code_to_country_name.get(original_code): return original_code else: - for country_code, alternatives in countries_alternative_codes.items(): + for ( + country_code, + alternatives, + ) in countries_alternative_codes.items(): for alternative in alternatives: if original_code == alternative: return country_code @@ -541,8 +576,9 @@ def parse_conference_address(address_string): } -def parse_institution_address(address, city, state_province, - country, postal_code, country_code): +def parse_institution_address( + address, city, state_province, country, postal_code, country_code +): """Parse an institution address.""" address_list = force_list(address) state_province = match_us_state(state_province) or state_province @@ -560,7 +596,11 @@ def parse_institution_address(address, city, state_province, if not country_code and country: country_code = match_country_name_to_its_code(country) - if not country_code and state_province and state_province in us_state_to_iso_code.values(): + if ( + not country_code + and state_province + and state_province in us_state_to_iso_code.values() + ): country_code = 'US' return { diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..06202d58 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,29 @@ +target-version = "py311" +[lint.flake8-tidy-imports] +ban-relative-imports = "all" + +[lint] +select = [ + # pycodestyle + "E", + # Pyflakes + "F", + # flake8-bugbear + "B", + # flake8-simplify + "SIM", + # isort + "I", + # flake8-tidy-imports + "TID", + # flake8-pytest-style + "PT", +] +ignore = ["B904", "B905"] + + +[lint.pycodestyle] +ignore-overlong-task-comments = true + +[lint.pydocstyle] +convention = "google" diff --git a/run-tests.sh b/run-tests.sh index ac5a34cd..02585227 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -22,5 +22,4 @@ set -e -flake8 inspire_dojson tests py.test tests diff --git a/setup.py b/setup.py index 2cd0e60b..acd5291e 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,8 @@ URL = "https://github.com/inspirehep/inspire-dojson" -readme = open("README.rst").read() +with open("README.rst") as f: + readme = f.read() install_requires = [ @@ -54,13 +55,18 @@ "pytest-cov~=2.0,>=2.6.1", ] +dev_require = [ + "pre-commit==3.5.0", +] + extras_require = { "docs": docs_require, "tests": tests_require, + "dev": dev_require, } extras_require["all"] = [] -for name, reqs in extras_require.items(): +for _name, reqs in extras_require.items(): extras_require["all"].extend(reqs) packages = find_packages(exclude=["docs"]) diff --git a/tests/conftest.py b/tests/conftest.py index 2521c61d..c9ab5491 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,9 +22,8 @@ from __future__ import absolute_import, division, print_function import pytest -from langdetect import DetectorFactory from flask import Flask - +from langdetect import DetectorFactory CONFIG = { 'SERVER_NAME': 'localhost:5000', @@ -40,8 +39,8 @@ def app(): yield app -@pytest.fixture(scope='function') -def stable_langdetect(app): +@pytest.fixture() +def _stable_langdetect(app): """Ensure that ``langdetect`` always returns the same thing. See: https://github.com/Mimino666/langdetect#basic-usage. diff --git a/tests/test_api.py b/tests/test_api.py index d0d786a0..980c3262 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -24,7 +24,11 @@ import pytest -from inspire_dojson.api import marcxml2record, record2marcxml, cds_marcxml2record +from inspire_dojson.api import ( + cds_marcxml2record, + marcxml2record, + record2marcxml, +) from inspire_dojson.errors import NotSupportedError @@ -142,7 +146,7 @@ def test_marcxml2record_handles_journalsnew(): def test_marcxml2record_handles_multiple_as_in_the_same_980(): - snippet = ( + snippet = ( # record/1247377 '' ' ' ' Published' @@ -155,7 +159,7 @@ def test_marcxml2record_handles_multiple_as_in_the_same_980(): ' NONCORE' ' ' '' - ) # record/1247377 + ) expected = 'hep.json' result = marcxml2record(snippet) @@ -177,12 +181,12 @@ def test_marcxml2record_falls_back_to_hep(): def test_cds_marcxml2record_handles_cds(): - snippet = ( + snippet = ( # cds.cern.ch/record/2270264 '' ' 2270264' ' SzGeCERN' '' - ) # cds.cern.ch/record/2270264 + ) expected = [ { @@ -201,11 +205,7 @@ def test_record2marcxml_generates_controlfields(): 'control_number': 4328, } - expected = ( - b'\n' - b' 4328\n' - b'\n' - ) + expected = b'\n 4328\n\n' result = record2marcxml(record) assert expected == result @@ -259,9 +259,7 @@ def test_record2marcxml_supports_authors(): } expected = ( - b'\n' - b' 1010819\n' - b'\n' + b'\n 1010819\n\n' ) result = record2marcxml(record) @@ -274,11 +272,7 @@ def test_record2marcxml_supports_relative_urls(): 'control_number': 4328, } - expected = ( - b'\n' - b' 4328\n' - b'\n' - ) + expected = b'\n 4328\n\n' result = record2marcxml(record) assert expected == result @@ -382,18 +376,21 @@ def test_record2marcxml_strips_control_characters(): 'abstracts': [ { 'source': 'submitter', - 'value': u'A common feature shared by many quantum gravity models is modi\u001Ccations of two-point functions at energy scales around the Planck scale.', + 'value': ( + u'A common feature shared by many quantum gravity models is' + u' modi\u001Ccations of two-point functions at energy' + u' scales around the Planck scale.' + ), }, ], } # holdingpen/812647 expected = ( - b'\n' - b' \n' - b' submitter\n' - b' A common feature shared by many quantum gravity models is modications of two-point functions at energy scales around the Planck scale.\n' - b' \n' - b'\n' + b'\n \n submitter\n A common' + b' feature shared by many quantum gravity models is modications of' + b' two-point functions at energy scales around the Planck' + b' scale.\n \n\n' ) result = record2marcxml(record) diff --git a/tests/test_cds.py b/tests/test_cds.py index 1bfe07b8..6e732a3c 100644 --- a/tests/test_cds.py +++ b/tests/test_cds.py @@ -23,20 +23,20 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.cds import cds2hep_marc from inspire_dojson.hep import hep from inspire_dojson.utils import create_record_from_dict -from inspire_schemas.api import load_schema, validate def test_external_system_identifiers_from_001(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # cds.cern.ch/record/2270264 '2270264' - ) # cds.cern.ch/record/2270264 + ) expected = [ { @@ -64,14 +64,14 @@ def test_private_notes_from_001_and_980__c_hidden(): schema = load_schema('hep') subschema = schema['properties']['_private_notes'] - snippet = ( + snippet = ( # cds.cern.ch/record/1355275 '' ' 1355275' ' ' ' Hidden' ' ' '' - ) # cds.cern.ch/record/1355275 + ) expected = [ { @@ -99,12 +99,12 @@ def test_dois_from_0247_a_2(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # cds.cern.ch/record/2297288 '' ' DOI' ' 10.1016/j.nima.2017.11.093' '' - ) # cds.cern.ch/record/2297288 + ) expected = [ { @@ -133,13 +133,13 @@ def test_dois_from_0247_a_2_9(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # cds.cern.ch/record/2295116 '' ' DOI' ' submitter' ' 10.1098/rsta.2014.0044' '' - ) # cds.cern.ch/record/2295116 + ) expected = [ { @@ -168,14 +168,14 @@ def test_external_sytem_identifiers_from_035__a_9(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # cds.cern.ch/record/2295073 '' ' ' ' OSTI' ' 1358095' ' ' '' - ) # cds.cern.ch/record/2295073 + ) expected = [ { @@ -200,14 +200,14 @@ def test_external_sytem_identifiers_from_035__a_9(): def test_external_sytem_identifiers_from_035__a_9_ignores_inspire(): - snippet = ( + snippet = ( # cds.cern.ch/record/2295116 '' ' ' ' Inspire' ' 1640199' ' ' '' - ) # cds.cern.ch/record/2295116 + ) result = cds2hep_marc.do(create_record(snippet)) @@ -215,13 +215,13 @@ def test_external_sytem_identifiers_from_035__a_9_ignores_inspire(): def test_external_sytem_identifiers_from_035__a_ignores_cercer(): - snippet = ( + snippet = ( # cds.cern.ch/record/2307509 '' ' ' ' 0148182CERCER' ' ' '' - ) # cds.cern.ch/record/2307509 + ) result = cds2hep_marc.do(create_record(snippet)) @@ -232,11 +232,11 @@ def test_report_numbers_from_037__a(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # cds.cern.ch/record/2270264 '' ' CLICDP-PUB-2017-002' ' ' - ) # cds.cern.ch/record/2270264 + ) expected = [ { @@ -264,11 +264,11 @@ def test_report_numbers_from_037__z(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # cds.cern.ch/record/2299967 '' ' CERN-THESIS-2018-004' ' ' - ) # cds.cern.ch/record/2299967 + ) expected = [ { @@ -297,11 +297,11 @@ def test_report_numbers_from_088__9(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # cds.cern.ch/record/2255823 '' ' ATL-COM-PHYS-2017-030' '' - ) # cds.cern.ch/record/2255823 + ) expected = [ { @@ -332,7 +332,7 @@ def test_report_numbers_and_document_type_from_multiple_088__a(): subschema_report_numbers = schema['properties']['report_numbers'] subschema_document_type = schema['properties']['document_type'] - snippet = ( + snippet = ( # cds.cern.ch/record/2275456 '' ' ' ' ATL-PHYS-CONF-2008-015' @@ -341,7 +341,7 @@ def test_report_numbers_and_document_type_from_multiple_088__a(): ' ATL-COM-PHYS-2008-052' ' ' '' - ) # cds.cern.ch/record/2275456 + ) expected = { '037__': [ @@ -406,11 +406,11 @@ def test_report_numbers_and_document_type_and_publicate_notes_from_037__a(): subschema_document_type = schema['properties']['document_type'] subschema_public_notes = schema['properties']['public_notes'] - snippet = ( + snippet = ( # cds.cern.ch/record/2202807 '' ' CMS-PAS-SMP-15-001' '' - ) # cds.cern.ch/record/2202807 + ) expected = { '037__': [ @@ -474,7 +474,7 @@ def test_languages_from_multiple_041__a(): schema = load_schema('hep') subschema = schema['properties']['languages'] - snippet = ( + snippet = ( # cds.cern.ch/record/2258299 '' ' ' ' eng' @@ -483,7 +483,7 @@ def test_languages_from_multiple_041__a(): ' fre' ' ' '' - ) # cds.cern.ch/record/2258299 + ) expected = [ {'a': 'English'}, @@ -501,11 +501,11 @@ def test_languages_from_multiple_041__a(): def test_languages_from_041__a_ignores_english(): - snippet = ( + snippet = ( # cds.cern.ch/record/2295270 '' ' eng' '' - ) # cds.cern.ch/record/2295270 + ) result = cds2hep_marc.do(create_record(snippet)) @@ -516,7 +516,7 @@ def test_authors_from_100__a_0_u_m_and_700__a_0_u_m(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/2295263 '' ' ' ' Joram, Christian' @@ -534,7 +534,7 @@ def test_authors_from_100__a_0_u_m_and_700__a_0_u_m(): ' Xavier.Pons@cern.ch' ' ' '' - ) # record/2295263 + ) expected = { '100__': [ @@ -598,7 +598,7 @@ def test_authors_from_100_a_i_j_u_0_9_ignores_beard(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # cds.cern.ch/record/2285529 '' ' AUTHOR|(CDS)2077287' ' #BEARD#' @@ -607,7 +607,7 @@ def test_authors_from_100_a_i_j_u_0_9_ignores_beard(): ' CCID-695565' ' Aachen, Tech. Hochsch.' '' - ) # cds.cern.ch/record/2285529 + ) expected = [ { @@ -650,7 +650,7 @@ def test_authors_from_100__a_u_and_multiple_700__a_u_e(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/2295265 '' ' ' ' Aichinger, Ida' @@ -667,7 +667,7 @@ def test_authors_from_100__a_u_and_multiple_700__a_u_e(): ' dir.' ' ' '' - ) # record/2295265 + ) expected = { '100__': [ @@ -686,8 +686,8 @@ def test_authors_from_100__a_u_and_multiple_700__a_u_e(): 'a': 'Kersevan, Roberto', 'e': 'dir.', 'u': 'Linz U.', - } - ] + }, + ], } result = cds2hep_marc.do(create_record(snippet)) @@ -720,11 +720,11 @@ def test_authors_from_100__a_normalizes_name(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # cds.cern.ch/record/1099557 '' ' Tagliente, G' '' - ) # cds.cern.ch/record/1099557 + ) expected = [ { @@ -750,11 +750,11 @@ def test_corporate_author_from_110__a(): schema = load_schema('hep') subschema = schema['properties']['corporate_author'] - snippet = ( + snippet = ( # cds.cern.ch/record/2292626 '' ' CERN. Geneva. Research Board Committee' '' - ) # cds.cern.ch/record/2292626 + ) expected = [ {'a': 'CERN. Geneva. Research Board Committee'}, @@ -774,15 +774,19 @@ def test_title_translations_from_242__a(): schema = load_schema('hep') subschema = schema['properties']['title_translations'] - snippet = ( - '' - ' Reconstruction of the invariant masses of bosons of the Standard Model using public data from ATLAS Open Data' - '' - ) # cds.cern.ch/record/2293251 + snippet = ( # cds.cern.ch/record/2293251 + ' Reconstruction of the invariant masses of bosons of the' + ' Standard Model using public data from ATLAS Open' + ' Data' + ) expected = { '9': 'CDS', - 'a': 'Reconstruction of the invariant masses of bosons of the Standard Model using public data from ATLAS Open Data', + 'a': ( + 'Reconstruction of the invariant masses of bosons of the Standard' + ' Model using public data from ATLAS Open Data' + ), } result = cds2hep_marc.do(create_record(snippet)) @@ -792,7 +796,10 @@ def test_title_translations_from_242__a(): { 'source': 'CDS', 'language': 'en', - 'title': 'Reconstruction of the invariant masses of bosons of the Standard Model using public data from ATLAS Open Data', + 'title': ( + 'Reconstruction of the invariant masses of bosons of the' + ' Standard Model using public data from ATLAS Open Data' + ), }, ] result = hep.do(create_record_from_dict(result)) @@ -805,15 +812,19 @@ def test_titles_from_245__a(): schema = load_schema('hep') subschema = schema['properties']['titles'] - snippet = ( - '' - ' Reconstrucción de masas invariantes de bosones del Modelo Estándar usando datos públicos de ATLAS Open Data' - '' - ) # cds.cern.ch/record/2293251 + snippet = ( # cds.cern.ch/record/2293251 + ' Reconstrucción de masas invariantes de bosones del Modelo' + ' Estándar usando datos públicos de ATLAS Open' + ' Data' + ) expected = { '9': 'CDS', - 'a': u'Reconstrucción de masas invariantes de bosones del Modelo Estándar usando datos públicos de ATLAS Open Data', + 'a': ( + u'Reconstrucción de masas invariantes de bosones del Modelo' + u' Estándar usando datos públicos de ATLAS Open Data' + ), } result = cds2hep_marc.do(create_record(snippet)) @@ -822,7 +833,10 @@ def test_titles_from_245__a(): expected = [ { 'source': 'CDS', - 'title': u'Reconstrucción de masas invariantes de bosones del Modelo Estándar usando datos públicos de ATLAS Open Data', + 'title': ( + u'Reconstrucción de masas invariantes de bosones del Modelo' + u' Estándar usando datos públicos de ATLAS Open Data' + ), }, ] result = hep.do(create_record_from_dict(result)) @@ -835,12 +849,11 @@ def test_titles_from_246__a_b(): schema = load_schema('hep') subschema = schema['properties']['titles'] - snippet = ( - '' - ' v.2' - ' Advances and applications the deterministic case' - '' - ) # cds.cern.ch/record/1999859 + snippet = ( # cds.cern.ch/record/1999859 + ' v.2 Advances and applications' + ' the deterministic case' + ) expected = [ { @@ -870,13 +883,13 @@ def test_imprints_from_260__a_b_c(): schema = load_schema('hep') subschema = schema['properties']['imprints'] - snippet = ( + snippet = ( # cds.cern.ch/record/1999859 '' ' Hoboken, NJ' ' Wiley' ' 2015' '' - ) # cds.cern.ch/record/1999859 + ) expected = { 'a': 'Hoboken, NJ', @@ -904,11 +917,11 @@ def test_number_of_pages_from_300__a(): schema = load_schema('hep') subschema = schema['properties']['number_of_pages'] - snippet = ( + snippet = ( # cds.cern.ch/record/2292558 '' ' 20 p' '' - ) # cds.cern.ch/record/2292558 + ) expected = { 'a': '20', @@ -928,7 +941,7 @@ def test_thesis_info_from_502__a_b_c_and_500__a(): schema = load_schema('hep') subschema = schema['properties']['thesis_info'] - snippet = ( + snippet = ( # cds.cern.ch/record/2295265 '' ' ' ' Presented 2017' @@ -939,7 +952,7 @@ def test_thesis_info_from_502__a_b_c_and_500__a(): ' 2017' ' ' '' - ) # cds.cern.ch/record/2295265 + ) expected = { '500__': [ @@ -952,7 +965,7 @@ def test_thesis_info_from_502__a_b_c_and_500__a(): 'b': 'PhD', 'c': 'Linz U.', 'd': '2017', - } + }, } result = cds2hep_marc.do(create_record(snippet)) @@ -977,16 +990,137 @@ def test_abstracts_from_520__a(): schema = load_schema('hep') subschema = schema['properties']['abstracts'] - snippet = ( - '' - ' The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.' - '' - ) # cds.cern.ch/record/2295265 + snippet = ( # cds.cern.ch/record/2295265 + ' The' + ' underlying thesis on mathematical simulation methods in application' + ' and theory is structured into three parts. The first part sets up a' + ' mathematical model capable of predicting the performance and' + ' operation of an accelerator’s vacuum system based on analytical' + ' methods. A coupled species-balance equation system describes the' + ' distribution of the gas dynamics in an ultra-high vacuum system' + ' considering impacts of conductance limitations, beam induced effects' + ' (ion-, electron-, and photon-induced de- sorption), thermal' + ' outgassing and sticking probabilities of the chamber materials. A new' + ' solving algorithm based on sparse matrix representations, is' + ' introduced and presents a closed form solution of the equation' + ' system. The model is implemented in a Python environment, named' + ' PyVasco, and is supported by a graphical user interface to make it' + ' easy available for everyone. A sensitivity analysis, a cross-check' + ' with the Test-Particle Monte Carlo simulation program Molflow+ and a' + ' comparison of the simulation results to readings of the Large Hadron' + ' Colliders (LHC) pressure gauges validate the code. The computation of' + ' density profiles considering several effects (as men- tioned above)' + ' is performed within a short computation time for indefinitely long' + ' vacuum systems. This is in particular interesting for the design of a' + ' stable vacuum system for new large accelerat- ors like the Future' + ' Circular Colliders (FCC) with 100 km in circumference. A simulation' + ' of the FCC is shown at the end of this part. Additionally, PyVasco' + ' was presented twice at international conferences in Rome and Berlin' + ' and has been submitted in July with the title “Analytical vacuum' + ' simulations in high energy accelerators for future machines based on' + ' the LHC performance” to the Journal “Physical Review Accelerator and' + ' Beams”. The second and third part of the thesis study properties of' + ' quasi-Monte Carlo (QMC) methods in the scope of the special research' + ' project “Quasi-Monte Carlo methods: Theory and Applications”. Instead' + ' of solving a complex integral analytically, its value is approximated' + ' by function evaluation at specific points. The choice of a good point' + ' set is critical for a good result. It turned out that continuous' + ' curves provide a good tool to define these point sets. So called' + ' “bounded remainder sets” (BRS) define a measure for the quality of' + ' the uniform distribution of a curve in the unit- square. The' + ' trajectory of a billiard path with an irrational slope is especially' + ' well distributed. Certain criteria to the BRS are defined and' + ' analysed in regard to the distribution error. The idea of the proofs' + ' is based on Diophantine approximations of irrational numbers and on' + ' the unfolding technique of the billiard path to a straight line in' + ' the plane. New results of the BRS for the billiard path are reported' + ' to the “Journal of Uniform Distribution”. The third part analyses the' + ' distribution of the energy levels of quantum systems. It was stated' + ' that the eigenvalues of the energy spectra for almost all integrable' + ' quantum systems are uncor- related and Poisson distributed. The' + ' harmonic oscillator presents already one counter example to this' + ' assertion. The particle in a box on the other hand obtains these' + ' properties. This thesis formulates a general statement that describes' + ' under which conditions the eigenvalues do not follow the poissonian' + ' property. The concept of the proofs is based on the analysis of the' + ' pair correlations of sequences. The former particle physicist Ian' + ' Sloan also exposed this topic and he became spe- cialized as a' + ' skilled mathematician in this field. To honour his achievements a' + ' Festschrift for his 80th birthday is written and the results of the' + ' work of this thesis are published there. The book will appear in' + ' 2018.' + ) expected = [ { '9': 'CDS', - 'a': u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.' + 'a': ( + u'The underlying thesis on mathematical simulation methods in' + u' application and theory is structured into three parts. The' + u' first part sets up a mathematical model capable of' + u' predicting the performance and operation of an accelerator’s' + u' vacuum system based on analytical methods. A coupled' + u' species-balance equation system describes the distribution' + u' of the gas dynamics in an ultra-high vacuum system' + u' considering impacts of conductance limitations, beam induced' + u' effects (ion-, electron-, and photon-induced de- sorption),' + u' thermal outgassing and sticking probabilities of the chamber' + u' materials. A new solving algorithm based on sparse matrix' + u' representations, is introduced and presents a closed form' + u' solution of the equation system. The model is implemented in' + u' a Python environment, named PyVasco, and is supported by a' + u' graphical user interface to make it easy available for' + u' everyone. A sensitivity analysis, a cross-check with the' + u' Test-Particle Monte Carlo simulation program Molflow+ and a' + u' comparison of the simulation results to readings of the' + u' Large Hadron Colliders (LHC) pressure gauges validate the' + u' code. The computation of density profiles considering' + u' several effects (as men- tioned above) is performed within a' + u' short computation time for indefinitely long vacuum systems.' + u' This is in particular interesting for the design of a stable' + u' vacuum system for new large accelerat- ors like the Future' + u' Circular Colliders (FCC) with 100 km in circumference. A' + u' simulation of the FCC is shown at the end of this part.' + u' Additionally, PyVasco was presented twice at international' + u' conferences in Rome and Berlin and has been submitted in' + u' July with the title “Analytical vacuum simulations in high' + u' energy accelerators for future machines based on the LHC' + u' performance” to the Journal “Physical Review Accelerator and' + u' Beams”. The second and third part of the thesis study' + u' properties of quasi-Monte Carlo (QMC) methods in the scope' + u' of the special research project “Quasi-Monte Carlo methods:' + u' Theory and Applications”. Instead of solving a complex' + u' integral analytically, its value is approximated by function' + u' evaluation at specific points. The choice of a good point' + u' set is critical for a good result. It turned out that' + u' continuous curves provide a good tool to define these point' + u' sets. So called “bounded remainder sets” (BRS) define a' + u' measure for the quality of the uniform distribution of a' + u' curve in the unit- square. The trajectory of a billiard path' + u' with an irrational slope is especially well distributed.' + u' Certain criteria to the BRS are defined and analysed in' + u' regard to the distribution error. The idea of the proofs is' + u' based on Diophantine approximations of irrational numbers' + u' and on the unfolding technique of the billiard path to a' + u' straight line in the plane. New results of the BRS for the' + u' billiard path are reported to the “Journal of Uniform' + u' Distribution”. The third part analyses the distribution of' + u' the energy levels of quantum systems. It was stated that the' + u' eigenvalues of the energy spectra for almost all integrable' + u' quantum systems are uncor- related and Poisson distributed.' + u' The harmonic oscillator presents already one counter example' + u' to this assertion. The particle in a box on the other hand' + u' obtains these properties. This thesis formulates a general' + u' statement that describes under which conditions the' + u' eigenvalues do not follow the poissonian property. The' + u' concept of the proofs is based on the analysis of the pair' + u' correlations of sequences. The former particle physicist Ian' + u' Sloan also exposed this topic and he became spe- cialized as' + u' a skilled mathematician in this field. To honour his' + u' achievements a Festschrift for his 80th birthday is written' + u' and the results of the work of this thesis are published' + u' there. The book will appear in 2018.' + ), }, ] result = cds2hep_marc.do(create_record(snippet)) @@ -996,7 +1130,73 @@ def test_abstracts_from_520__a(): expected = [ { 'source': 'CDS', - 'value': u'The underlying thesis on mathematical simulation methods in application and theory is structured into three parts. The first part sets up a mathematical model capable of predicting the performance and operation of an accelerator’s vacuum system based on analytical methods. A coupled species-balance equation system describes the distribution of the gas dynamics in an ultra-high vacuum system considering impacts of conductance limitations, beam induced effects (ion-, electron-, and photon-induced de- sorption), thermal outgassing and sticking probabilities of the chamber materials. A new solving algorithm based on sparse matrix representations, is introduced and presents a closed form solution of the equation system. The model is implemented in a Python environment, named PyVasco, and is supported by a graphical user interface to make it easy available for everyone. A sensitivity analysis, a cross-check with the Test-Particle Monte Carlo simulation program Molflow+ and a comparison of the simulation results to readings of the Large Hadron Colliders (LHC) pressure gauges validate the code. The computation of density profiles considering several effects (as men- tioned above) is performed within a short computation time for indefinitely long vacuum systems. This is in particular interesting for the design of a stable vacuum system for new large accelerat- ors like the Future Circular Colliders (FCC) with 100 km in circumference. A simulation of the FCC is shown at the end of this part. Additionally, PyVasco was presented twice at international conferences in Rome and Berlin and has been submitted in July with the title “Analytical vacuum simulations in high energy accelerators for future machines based on the LHC performance” to the Journal “Physical Review Accelerator and Beams”. The second and third part of the thesis study properties of quasi-Monte Carlo (QMC) methods in the scope of the special research project “Quasi-Monte Carlo methods: Theory and Applications”. Instead of solving a complex integral analytically, its value is approximated by function evaluation at specific points. The choice of a good point set is critical for a good result. It turned out that continuous curves provide a good tool to define these point sets. So called “bounded remainder sets” (BRS) define a measure for the quality of the uniform distribution of a curve in the unit- square. The trajectory of a billiard path with an irrational slope is especially well distributed. Certain criteria to the BRS are defined and analysed in regard to the distribution error. The idea of the proofs is based on Diophantine approximations of irrational numbers and on the unfolding technique of the billiard path to a straight line in the plane. New results of the BRS for the billiard path are reported to the “Journal of Uniform Distribution”. The third part analyses the distribution of the energy levels of quantum systems. It was stated that the eigenvalues of the energy spectra for almost all integrable quantum systems are uncor- related and Poisson distributed. The harmonic oscillator presents already one counter example to this assertion. The particle in a box on the other hand obtains these properties. This thesis formulates a general statement that describes under which conditions the eigenvalues do not follow the poissonian property. The concept of the proofs is based on the analysis of the pair correlations of sequences. The former particle physicist Ian Sloan also exposed this topic and he became spe- cialized as a skilled mathematician in this field. To honour his achievements a Festschrift for his 80th birthday is written and the results of the work of this thesis are published there. The book will appear in 2018.' + 'value': ( + u'The underlying thesis on mathematical simulation methods in' + u' application and theory is structured into three parts. The' + u' first part sets up a mathematical model capable of' + u' predicting the performance and operation of an accelerator’s' + u' vacuum system based on analytical methods. A coupled' + u' species-balance equation system describes the distribution' + u' of the gas dynamics in an ultra-high vacuum system' + u' considering impacts of conductance limitations, beam induced' + u' effects (ion-, electron-, and photon-induced de- sorption),' + u' thermal outgassing and sticking probabilities of the chamber' + u' materials. A new solving algorithm based on sparse matrix' + u' representations, is introduced and presents a closed form' + u' solution of the equation system. The model is implemented in' + u' a Python environment, named PyVasco, and is supported by a' + u' graphical user interface to make it easy available for' + u' everyone. A sensitivity analysis, a cross-check with the' + u' Test-Particle Monte Carlo simulation program Molflow+ and a' + u' comparison of the simulation results to readings of the' + u' Large Hadron Colliders (LHC) pressure gauges validate the' + u' code. The computation of density profiles considering' + u' several effects (as men- tioned above) is performed within a' + u' short computation time for indefinitely long vacuum systems.' + u' This is in particular interesting for the design of a stable' + u' vacuum system for new large accelerat- ors like the Future' + u' Circular Colliders (FCC) with 100 km in circumference. A' + u' simulation of the FCC is shown at the end of this part.' + u' Additionally, PyVasco was presented twice at international' + u' conferences in Rome and Berlin and has been submitted in' + u' July with the title “Analytical vacuum simulations in high' + u' energy accelerators for future machines based on the LHC' + u' performance” to the Journal “Physical Review Accelerator and' + u' Beams”. The second and third part of the thesis study' + u' properties of quasi-Monte Carlo (QMC) methods in the scope' + u' of the special research project “Quasi-Monte Carlo methods:' + u' Theory and Applications”. Instead of solving a complex' + u' integral analytically, its value is approximated by function' + u' evaluation at specific points. The choice of a good point' + u' set is critical for a good result. It turned out that' + u' continuous curves provide a good tool to define these point' + u' sets. So called “bounded remainder sets” (BRS) define a' + u' measure for the quality of the uniform distribution of a' + u' curve in the unit- square. The trajectory of a billiard path' + u' with an irrational slope is especially well distributed.' + u' Certain criteria to the BRS are defined and analysed in' + u' regard to the distribution error. The idea of the proofs is' + u' based on Diophantine approximations of irrational numbers' + u' and on the unfolding technique of the billiard path to a' + u' straight line in the plane. New results of the BRS for the' + u' billiard path are reported to the “Journal of Uniform' + u' Distribution”. The third part analyses the distribution of' + u' the energy levels of quantum systems. It was stated that the' + u' eigenvalues of the energy spectra for almost all integrable' + u' quantum systems are uncor- related and Poisson distributed.' + u' The harmonic oscillator presents already one counter example' + u' to this assertion. The particle in a box on the other hand' + u' obtains these properties. This thesis formulates a general' + u' statement that describes under which conditions the' + u' eigenvalues do not follow the poissonian property. The' + u' concept of the proofs is based on the analysis of the pair' + u' correlations of sequences. The former particle physicist Ian' + u' Sloan also exposed this topic and he became spe- cialized as' + u' a skilled mathematician in this field. To honour his' + u' achievements a Festschrift for his 80th birthday is written' + u' and the results of the work of this thesis are published' + u' there. The book will appear in 2018.' + ), }, ] result = hep.do(create_record_from_dict(result)) @@ -1009,12 +1209,12 @@ def test_inspire_categories_from_65017a_2(): schema = load_schema('hep') subschema = schema['properties']['inspire_categories'] - snippet = ( + snippet = ( # cds.cern.ch/record/2276097 '' ' SzGeCERN' ' Engineering' '' - ) # cds.cern.ch/record/2276097 + ) expected = [ { @@ -1043,12 +1243,12 @@ def test_keywords_from_6531_a_9(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # cds.cern.ch/record/1123149 '' ' CERN' ' QCD' '' - ) # cds.cern.ch/record/1123149 + ) expected = [ { @@ -1076,11 +1276,11 @@ def test_accelerator_experiments_from_693__a(): schema = load_schema('hep') subschema = schema['properties']['accelerator_experiments'] - snippet = ( + snippet = ( # regression test, unknown record '' ' CERN LHC' '' - ) # regression test, unknown record + ) expected = [ { @@ -1107,12 +1307,12 @@ def test_accelerator_experiments_from_693__a_e(): schema = load_schema('hep') subschema = schema['properties']['accelerator_experiments'] - snippet = ( + snippet = ( # cds.cern.ch/record/2295080 '' ' CERN LHC' ' ALICE' '' - ) # cds.cern.ch/record/2295080 + ) expected = [ { @@ -1136,19 +1336,19 @@ def test_accelerator_experiments_from_693__a_e(): def test_accelerator_experiments_from_693__a_e_ignores_not_applicable(): - snippet = ( + snippet = ( # cds.cern.ch/record/329074 '' ' Not applicable' ' Not applicable' '' - ) # cds.cern.ch/record/329074 + ) result = cds2hep_marc.do(create_record(snippet)) assert '693__' not in result -def test_accelerator_experiments_from_693__a_e_ignores_not_applicable_if_only_one_field_has_it(): +def test_accelerator_experiments_from_693__a_e_ignores_not_applicable_only_one_field(): schema = load_schema('hep') subschema = schema['properties']['accelerator_experiments'] @@ -1157,7 +1357,7 @@ def test_accelerator_experiments_from_693__a_e_ignores_not_applicable_if_only_on ' CERN SPS' ' Not applicable' '' - ) # cds.cern.ch/record/2320495 + ) expected = [ { @@ -1184,7 +1384,7 @@ def test_arxiv_eprints_from_037__a_b_9_and_695__a_9(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] - snippet = ( + snippet = ( # cds.cern.ch/record/2270264 '' ' ' ' arXiv' @@ -1200,7 +1400,7 @@ def test_arxiv_eprints_from_037__a_b_9_and_695__a_9(): ' hep-ph' ' ' '' - ) # cds.cern.ch/record/2270264 + ) expected = { '037__': [ @@ -1242,22 +1442,18 @@ def test_collaboration_from_710__g(): schema = load_schema('hep') subschema = schema['properties']['collaborations'] - snippet = ( + snippet = ( # cds.cern.ch/2295739 '' ' ATLAS Collaboration' '' - ) # cds.cern.ch/2295739 + ) - expected = [ - {'g': 'ATLAS Collaboration'} - ] + expected = [{'g': 'ATLAS Collaboration'}] result = cds2hep_marc.do(create_record(snippet)) assert expected == result['710__'] - expected = [ - {'value': 'ATLAS'} - ] + expected = [{'value': 'ATLAS'}] result = hep.do(create_record_from_dict(result)) assert validate(result['collaborations'], subschema) is None @@ -1268,13 +1464,13 @@ def test_publication_info_from_773__c_w_0(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # cds.cern.ch/record/2294664 '' ' 1217633' ' 3-6' ' C07-03-17' '' - ) # cds.cern.ch/record/2294664 + ) expected = [ { @@ -1307,14 +1503,13 @@ def test_documents_from_8564_s_u_y_8(): schema = load_schema('hep') subschema = schema['properties']['documents'] - snippet = ( - '' - ' 1369908' - ' 76482' - ' http://cds.cern.ch/record/2294664/files/James.pdf' - ' Fulltext' - '' - ) # cds.cern.ch/record/2294664 + snippet = ( # cds.cern.ch/record/2294664 + ' 1369908 76482 ' + ' http://cds.cern.ch/record/2294664/files/James.pdf' + ' Fulltext' + ) expected = [ { @@ -1347,14 +1542,13 @@ def test_documents_from_8564_s_u_y_8_escapes_spaces(): schema = load_schema('hep') subschema = schema['properties']['documents'] - snippet = ( - '' - ' 1427610' - ' 8265196' - ' http://cds.cern.ch/record/2636102/files/Thesis Fiorendi.pdf' - ' Fulltext' - '' - ) # cds.cern.ch/record/2636102 + snippet = ( # cds.cern.ch/record/2636102 + ' 1427610 8265196 ' + ' http://cds.cern.ch/record/2636102/files/Thesis' + ' Fiorendi.pdf Fulltext' + ) expected = [ { @@ -1387,13 +1581,14 @@ def test_documents_from_8564_s_u_8_escapes_encoded_characters(): schema = load_schema('hep') subschema = schema['properties']['documents'] - snippet = ( + snippet = ( # cds.cern.ch/record/148555 '' ' 200773' ' 5978977' - ' http://cds.cern.ch/record/148555/files/Rückl.pdf' + ' http://cds.cern.ch/record/148555/files/Rückl.pdf' '' - ) # cds.cern.ch/record/148555 + ) expected = [ { @@ -1421,14 +1616,12 @@ def test_documents_from_8564_s_u_8_escapes_encoded_characters(): def test_documents_from_8564_s_u_y_8_ignores_preprint(): - snippet = ( - '' - ' 1371451' - ' 4446886' - ' http://cds.cern.ch/record/2295716/files/arXiv:1711.07494.pdf' - ' Preprint' - '' - ) # cds.cern.ch/record/2295716 + snippet = ( # cds.cern.ch/record/2295716 + ' 1371451' + ' 4446886 http://cds.cern.ch/record/2295716/files/arXiv:1711.07494.pdf' + ' Preprint' + ) result = cds2hep_marc.do(create_record(snippet)) @@ -1439,14 +1632,13 @@ def test_urls_from_8564_s_u_y_8_local_copy(): schema = load_schema('hep') subschema = schema['properties']['urls'] - snippet = ( - '' - ' 1119425' - ' http://cds.cern.ch/record/1979225/files/1748-0221_10_01_C01003.pdf' - ' Published version from IOP, local copy' - ' 1053236' - '' - ) # cds.cern.ch/record/2159118 + snippet = ( # cds.cern.ch/record/2159118 + ' 1119425' + ' http://cds.cern.ch/record/1979225/files/1748-0221_10_01_C01003.pdf' + ' Published version from IOP, local copy ' + ' 1053236' + ) expected = [ { @@ -1460,7 +1652,9 @@ def test_urls_from_8564_s_u_y_8_local_copy(): expected = [ { - 'value': 'http://cds.cern.ch/record/1979225/files/1748-0221_10_01_C01003.pdf', + 'value': ( + 'http://cds.cern.ch/record/1979225/files/1748-0221_10_01_C01003.pdf' + ), 'description': 'Published version from IOP, on CERN Document Server', }, ] @@ -1474,12 +1668,11 @@ def test_urls_from_8564_u_y(): schema = load_schema('hep') subschema = schema['properties']['urls'] - snippet = ( - '' - ' http://pos.sissa.it/archive/conferences/209/007/Charged2014_007.pdf' - ' Published version from PoS' - '' - ) # cds.cern.ch/record/2159118 + snippet = ( # cds.cern.ch/record/2159118 + ' http://pos.sissa.it/archive/conferences/209/007/Charged2014_007.pdf' + ' Published version from PoS' + ) expected = [ { @@ -1493,7 +1686,9 @@ def test_urls_from_8564_u_y(): expected = [ { - 'value': 'http://pos.sissa.it/archive/conferences/209/007/Charged2014_007.pdf', + 'value': ( + 'http://pos.sissa.it/archive/conferences/209/007/Charged2014_007.pdf' + ), 'description': 'Published version from PoS', }, ] @@ -1507,13 +1702,13 @@ def test_document_type_from_962__b_k_n(): schema = load_schema('hep') subschema = schema['properties']['document_type'] - snippet = ( + snippet = ( # cds.cern.ch/record/2275456 '' ' 1075481' ' lathuile20080301' ' 79-84' '' - ) # cds.cern.ch/record/2275456 + ) expected = [ {'a': 'ConferencePaper'}, @@ -1535,7 +1730,7 @@ def test_document_type_from_multiple_980_a(): schema = load_schema('hep') subschema = schema['properties']['document_type'] - snippet = ( + snippet = ( # cds.cern.ch/record/1979225 '' ' ' ' ARTICLE' @@ -1544,7 +1739,7 @@ def test_document_type_from_multiple_980_a(): ' ConferencePaper' ' ' '' - ) # cds.cern.ch/record/1979225 + ) expected = [ {'a': 'ConferencePaper'}, diff --git a/tests/test_common.py b/tests/test_common.py index dd44a888..d37404c6 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -23,23 +23,23 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.conferences import conferences from inspire_dojson.hep import hep, hep2marc from inspire_dojson.hepnames import hepnames, hepnames2marc -from inspire_schemas.api import load_schema, validate def test_acquisition_source_from_541__a_c(): schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] - snippet = ( + snippet = ( # record/1487640 '' ' IOP' ' batchupload' '' - ) # record/1487640 + ) expected = { 'source': 'IOP', @@ -63,7 +63,7 @@ def test_acquisition_source_from_541__double_a_b_c_e(): schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] - snippet = ( + snippet = ( # record/1416571 '' ' inspire:uid:52524' ' orcid:0000-0002-1048-661X' @@ -71,7 +71,7 @@ def test_acquisition_source_from_541__double_a_b_c_e(): ' submission' ' 504296' '' - ) # record/1416571 + ) expected = { 'email': 'oliver.schlotterer@web.de', @@ -100,7 +100,7 @@ def test_acquisition_source_from_541__a_b_c_d_e_converts_dates_to_datetimes(): schema = load_schema('authors') subschema = schema['properties']['acquisition_source'] - snippet = ( + snippet = ( # record/982806 '' ' inspire:uid:51852' ' jmyang@itp.ac.cn' @@ -108,7 +108,7 @@ def test_acquisition_source_from_541__a_b_c_d_e_converts_dates_to_datetimes(): ' 2016-05-24' ' 805819' '' - ) # record/982806 + ) expected = { 'datetime': '2016-05-24T00:00:00', @@ -137,7 +137,7 @@ def test_acquisition_source_from_541__a_b_c_d_e_handles_datetime(): schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] - snippet = ( + snippet = ( # record/1644748 '' ' orcid:0000-0002-7307-0726' ' ratra@phys.ksu.edu' @@ -145,7 +145,7 @@ def test_acquisition_source_from_541__a_b_c_d_e_handles_datetime(): ' 2017-12-23T18:39:38.751244' ' 832953' '' - ) # record/1644748 + ) expected = { 'datetime': '2017-12-23T18:39:38.751244', @@ -175,9 +175,7 @@ def test_self_from_001(): schema = load_schema('hep') subschema = schema['properties']['self'] - snippet = ( - '1508668' - ) # record/1508668 + snippet = '1508668' # record/1508668 expected = {'$ref': 'http://localhost:5000/api/literature/1508668'} result = hep.do(create_record(snippet)) @@ -190,9 +188,7 @@ def test_control_number_from_001(): schema = load_schema('hep') subschema = schema['properties']['control_number'] - snippet = ( - '1508668' - ) # record/1508668 + snippet = '1508668' # record/1508668 expected = 1508668 result = hep.do(create_record(snippet)) @@ -210,7 +206,7 @@ def test_legacy_creation_date_from_961__x_and_961__c(): schema = load_schema('hep') subschema = schema['properties']['legacy_creation_date'] - snippet = ( + snippet = ( # record/1124236 '' ' ' ' 2012-07-30' @@ -219,7 +215,7 @@ def test_legacy_creation_date_from_961__x_and_961__c(): ' 2012-11-20' ' ' '' - ) # record/1124236 + ) expected = '2012-07-30' result = hep.do(create_record(snippet)) @@ -237,7 +233,7 @@ def test_legacy_creation_date_from_961__c_and_961__x(): schema = load_schema('hep') subschema = schema['properties']['legacy_creation_date'] - snippet = ( + snippet = ( # synthetic data '' ' ' ' 2012-11-20' @@ -246,7 +242,7 @@ def test_legacy_creation_date_from_961__c_and_961__x(): ' 2012-07-30' ' ' '' - ) # synthetic data + ) expected = '2012-07-30' result = hep.do(create_record(snippet)) @@ -261,11 +257,11 @@ def test_legacy_creation_date_from_961__c_and_961__x(): def test_legacy_creation_date_from_961__c_does_not_raise(): - snippet = ( + snippet = ( # record/1501611 '' ' 2009-07-12' '' - ) # record/1501611 + ) assert 'legacy_creation_date' not in hep.do(create_record(snippet)) @@ -274,12 +270,12 @@ def test_legacy_creation_date_from_961__double_x_does_not_raise(): schema = load_schema('authors') subschema = schema['properties']['legacy_creation_date'] - snippet = ( + snippet = ( # record/982164 '' ' 2006-04-21' ' 1996-09-01' '' - ) # record/982164 + ) expected = '1996-09-01' result = hepnames.do(create_record(snippet)) @@ -297,11 +293,11 @@ def test_external_system_identifiers_from_970__a(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # record/1297176 '' ' SPIRES-10325093' '' - ) # record/1297176 + ) expected = [ { @@ -326,12 +322,12 @@ def test_external_system_identifiers_from_970__double_a(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # record/1217763 '' ' SPIRES-9663061' ' SPIRES-9949933' '' - ) # record/1217763 + ) expected = [ { @@ -361,11 +357,11 @@ def test_external_system_identifiers_from_970__a_conferences(): schema = load_schema('conferences') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # record/972464 '' ' CONF-461733' '' - ) # record/972464 + ) expected = [ { @@ -383,11 +379,11 @@ def test_new_record_from_970__d(): schema = load_schema('hep') subschema = schema['properties']['new_record'] - snippet = ( + snippet = ( # record/37545 '' ' 361769' '' - ) # record/37545 + ) expected = {'$ref': 'http://localhost:5000/api/literature/361769'} result = hep.do(create_record(snippet)) @@ -405,11 +401,11 @@ def test_deleted_records_from_981__a(): schema = load_schema('hep') subschema = schema['properties']['deleted_records'] - snippet = ( + snippet = ( # record/1508886 '' ' 1508668' '' - ) # record/1508886 + ) expected = [{'$ref': 'http://localhost:5000/api/literature/1508668'}] result = hep.do(create_record(snippet)) @@ -429,12 +425,12 @@ def test_inspire_categories_from_65017a_2(): schema = load_schema('hep') subschema = schema['properties']['inspire_categories'] - snippet = ( + snippet = ( # record/1426196 '' ' Inspire' ' Experiment-HEP' '' - ) # record/1426196 + ) expected = [ { @@ -461,13 +457,13 @@ def test_inspire_categories_from_65017a_2_9_discards_conference(): schema = load_schema('hep') subschema = schema['properties']['inspire_categories'] - snippet = ( + snippet = ( # record/1479228 '' ' INSPIRE' ' conference' ' Accelerators' '' - ) # record/1479228 + ) expected = [ { @@ -494,13 +490,12 @@ def test_inspire_categories_from_65017a_2_9_converts_automatically_added(): schema = load_schema('hep') subschema = schema['properties']['inspire_categories'] - snippet = ( - '' - ' INSPIRE' - ' Instrumentation' - ' automatically added based on DCC, PPF, DK' - '' - ) # record/669400 + snippet = ( # record/669400 + ' INSPIRE Instrumentation automatically' + ' added based on DCC, PPF, DK' + ) expected = [ { @@ -529,13 +524,13 @@ def test_inspire_categories_from_65017a_2_9_converts_submitter(): schema = load_schema('hep') subschema = schema['properties']['inspire_categories'] - snippet = ( + snippet = ( # record/1511089 '' ' Math and Math Physics' ' submitter' ' INSPIRE' '' - ) # record/1511089 + ) expected = [ { @@ -549,11 +544,7 @@ def test_inspire_categories_from_65017a_2_9_converts_submitter(): assert expected == result['inspire_categories'] expected = [ - { - '2': 'INSPIRE', - '9': 'user', - 'a': 'Math and Math Physics' - }, + {'2': 'INSPIRE', '9': 'user', 'a': 'Math and Math Physics'}, ] result = hep2marc.do(result) @@ -561,12 +552,12 @@ def test_inspire_categories_from_65017a_2_9_converts_submitter(): def test_inspire_categories_from_65017a_2_discards_arxiv(): - snippet = ( + snippet = ( # record/1511862 '' ' math-ph' ' arXiv' '' - ) # record/1511862 + ) result = hep.do(create_record(snippet)) @@ -577,12 +568,12 @@ def test_urls_from_8564_u_y(): schema = load_schema('hep') subschema = schema['properties']['urls'] - snippet = ( + snippet = ( # record/1405358 '' ' http://www-lib.kek.jp/ar/ar.html' ' KEK' '' - ) # record/1405358 + ) expected = [ { @@ -607,12 +598,13 @@ def test_urls_from_8564_u_y(): def test_urls_from_8564_ignores_internal_links(): - snippet = ( + snippet = ( # record/1610503 '' ' 1506142' - ' http://inspirehep.net/record/1610503/files/arXiv:1707.05770.pdf' + ' http://inspirehep.net/record/1610503/files/arXiv:1707.05770.pdf' '' - ) # record/1610503 + ) result = hep.do(create_record(snippet)) @@ -620,12 +612,13 @@ def test_urls_from_8564_ignores_internal_links(): def test_urls_from_8564_ignores_internal_links_with_subdomain(): - snippet = ( + snippet = ( # record/1610503 '' ' 1506142' - ' http://old.inspirehep.net/record/1610503/files/arXiv:1707.05770.pdf' + ' http://old.inspirehep.net/record/1610503/files/arXiv:1707.05770.pdf' '' - ) # record/1610503 + ) result = hep.do(create_record(snippet)) @@ -633,13 +626,12 @@ def test_urls_from_8564_ignores_internal_links_with_subdomain(): def test_urls_from_8564_ignores_internal_links_https(): - snippet = ( - '' - ' 2392681' - ' https://inspirehep.net/record/1508108/files/fermilab-pub-16-617-cms.pdf' - ' Fulltext' - '' - ) # record/1508036 + snippet = ( # record/1508036 + ' 2392681' + ' https://inspirehep.net/record/1508108/files/fermilab-pub-16-617-cms.pdf' + ' Fulltext' + ) result = hep.do(create_record(snippet)) @@ -650,12 +642,13 @@ def test_urls_from_8564_s_u_ignores_s(): schema = load_schema('hep') subschema = schema['properties']['urls'] - snippet = ( + snippet = ( # record/1511347 '' ' 443981' - ' http://localhost:5000/record/1511347/files/HIG-16-034-pas.pdf' + ' http://localhost:5000/record/1511347/files/HIG-16-034-pas.pdf' '' - ) # record/1511347 + ) expected = [ {'value': 'http://localhost:5000/record/1511347/files/HIG-16-034-pas.pdf'}, @@ -677,13 +670,14 @@ def test_urls_from_8564_u_w_y_ignores_w_and_translates_weblinks(): schema = load_schema('hep') subschema = schema['properties']['urls'] - snippet = ( + snippet = ( # record/1120360 '' ' 12-316' ' FERMILABPUB' - ' http://lss.fnal.gov/cgi-bin/find_paper.pl?pub-12-316' + ' http://lss.fnal.gov/cgi-bin/find_paper.pl?pub-12-316' '' - ) # record/1120360 + ) expected = [ { @@ -711,18 +705,21 @@ def test_urls_from_8564_u_w_y_ignores_w_and_translates_weblinks_with_apostrophes schema = load_schema('hep') subschema = schema['properties']['urls'] - snippet = ( + snippet = ( # record/417789 '' ' Abstracts_2/Stanek.html' ' C95-10-29' - ' http://www-bd.fnal.gov/icalepcs/abstracts/Abstracts_2/Stanek.html' + ' http://www-bd.fnal.gov/icalepcs/abstracts/Abstracts_2/Stanek.html' '' - ) # record/417789 + ) expected = [ { 'description': 'ICALEPCS\'95 Server', - 'value': 'http://www-bd.fnal.gov/icalepcs/abstracts/Abstracts_2/Stanek.html', + 'value': ( + 'http://www-bd.fnal.gov/icalepcs/abstracts/Abstracts_2/Stanek.html' + ), }, ] result = hep.do(create_record(snippet)) # no roundtrip @@ -745,13 +742,12 @@ def test_urls_from_8564_u_double_y_selects_the_first_y(): schema = load_schema('hep') subschema = schema['properties']['urls'] - snippet = ( - '' - ' http://link.springer.com/journal/10909/176/5/page/1' - ' Part II' - ' Springer' - '' - ) # record/1312672 + snippet = ( # record/1312672 + ' http://link.springer.com/journal/10909/176/5/page/1' + ' Part II Springer' + ) expected = [ { @@ -776,11 +772,11 @@ def test_urls_from_8564_u_double_y_selects_the_first_y(): def test_private_notes_from_595__9(): - snippet = ( + snippet = ( # record/1005469 '' ' SPIRES-HIDDEN' '' - ) # record/1005469 + ) assert '_private_notes' not in hepnames.do(create_record(snippet)) @@ -789,9 +785,9 @@ def test_legacy_version_from_005(): schema = load_schema('hep') subschema = schema['properties']['legacy_version'] - snippet = ( + snippet = ( # record/1694560 '20180919130452.0' - ) # record/1694560 + ) expected = '20180919130452.0' result = hep.do(create_record(snippet)) diff --git a/tests/test_conferences.py b/tests/test_conferences.py index e1e040c8..21e33376 100644 --- a/tests/test_conferences.py +++ b/tests/test_conferences.py @@ -23,16 +23,16 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.conferences import conferences -from inspire_schemas.api import load_schema, validate def test_addresses_from_034__d_f_and_111__c(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( + snippet = ( # record/1707423 '' ' ' ' 11.3426162' @@ -42,14 +42,16 @@ def test_addresses_from_034__d_f_and_111__c(): ' Bologna, Italy' ' ' '' - ) # record/1707423 + ) - expected = [{ - 'cities': ['Bologna'], - 'country_code': 'IT', - 'latitude': 44.494887, - 'longitude': 11.3426162 - }] + expected = [ + { + 'cities': ['Bologna'], + 'country_code': 'IT', + 'latitude': 44.494887, + 'longitude': 11.3426162, + } + ] result = conferences.do(create_record(snippet)) assert validate(result['addresses'], subschema) is None @@ -60,16 +62,14 @@ def test_acronyms_from_111__a_c_e_g_x_y(): schema = load_schema('conferences') subschema = schema['properties']['acronyms'] - snippet = ( - '' - ' 16th Conference on Flavor Physics and CP Violation' - ' Hyderabad, INDIA' - ' FPCP 2018' - ' C18-07-09' - ' 2018-07-09' - ' 2018-07-12' - '' - ) # record/1468357 + snippet = ( # record/1468357 + ' 16th' + ' Conference on Flavor Physics and CP Violation Hyderabad, INDIA FPCP' + ' 2018 C18-07-09 2018-07-09 2018-07-12' + ) expected = [ 'FPCP 2018', @@ -84,18 +84,16 @@ def test_acronyms_from_111__a_c_d_double_e_g_x_y(): schema = load_schema('conferences') subschema = schema['properties']['acronyms'] - snippet = ( - '' - ' 11th international vacuum congress and 7th international conference on solid surfaces' - ' Cologne, Germany' - ' 25 – 29 Sep 1989' - ' IVC-11' - ' ICSS-7' - ' C89-09-25.3' - ' 1989-09-25' - ' 1989-09-29' - '' - ) # record/1308774 + snippet = ( # record/1308774 + ' 11th' + ' international vacuum congress and 7th international conference on' + ' solid surfaces Cologne,' + ' Germany 25 – 29 Sep 1989 ' + ' IVC-11 ICSS-7 C89-09-25.3' + ' 1989-09-25 1989-09-29' + ) expected = [ 'IVC-11', @@ -111,17 +109,16 @@ def test_acronyms_from_111__a_c_double_e_g_x_y(): schema = load_schema('conferences') subschema = schema['properties']['acronyms'] - snippet = ( - '' - ' 2013 IEEE Nuclear Science Symposium and Medical Imaging Conference and Workshop on Room-Temperature Semiconductor Detectors' - ' Seoul, Korea' - ' NSS/MIC 2013' - ' RTSD 2013' - ' C13-10-26' - ' 2013-10-26' - ' 2013-11-02' - '' - ) # record/1218346 + snippet = ( # record/1218346 + ' 2013 IEEE' + ' Nuclear Science Symposium and Medical Imaging Conference and Workshop' + ' on Room-Temperature Semiconductor Detectors Seoul, Korea NSS/MIC' + ' 2013 RTSD 2013 C13-10-26 2013-10-26 2013-11-02' + ) expected = [ 'NSS/MIC 2013', @@ -137,16 +134,14 @@ def test_addresses_from_111__a_c_d_g_x_y(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( - '' - ' 11th Texas Symposium on Relativistic Astrophysics' - ' Austin, Tex.' - ' 13-17 Dec 1982' - ' C82-12-13' - ' 1982-12-13' - ' 1982-12-17' - '' - ) # record/965081 + snippet = ( # record/965081 + ' 11th Texas' + ' Symposium on Relativistic Astrophysics Austin, Tex. 13-17 Dec' + ' 1982 C82-12-13 1982-12-13 1982-12-17' + ) expected = [ { @@ -167,21 +162,17 @@ def test_addresses_from_111__a_c_d_g_x_y_and_111__c(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( - '' - ' ' - ' Low dimensional physics and gauge principles' - ' Yerevan, Armenia' - ' 21-29 Sep 2011' - ' C11-09-21.2' - ' 2011-09-21' - ' 2011-09-29' - ' ' - ' ' - ' Tbilisi, Georgia' - ' ' - '' - ) # record/1220831 + snippet = ( # record/1220831 + ' Low dimensional physics and gauge principles ' + ' Yerevan, Armenia 21-29 Sep 2011 C11-09-21.2 2011-09-21 2011-09-29 Tbilisi, Georgia ' + ' ' + ) expected = [ { @@ -207,18 +198,16 @@ def test_addresses_from_111__a_double_c_d_e_g_x_y(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( - '' - ' 16th High-Energy Physics International Conference in Quantum Chromodynamics' - ' QCD 12' - ' Montpellier, France' - ' 2-7 Jul 2012' - ' QCD 12' - ' C12-07-02' - ' 2012-07-02' - ' 2012-07-07' - '' - ) # record/1085463 + snippet = ( # record/1085463 + ' 16th' + ' High-Energy Physics International Conference in Quantum' + ' Chromodynamics QCD 12 ' + ' Montpellier, France 2-7 Jul 2012 QCD' + ' 12 C12-07-02 2012-07-02 2012-07-07' + ) expected = [ { @@ -241,16 +230,15 @@ def test_addresses_from_111__a_c_d_e_g_x_y_three_address_parts(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( - '' - ' 10th Int. Conf. DICE2020: Spacetime - Matter - Quantum Mechanics' - ' DICE2020' - ' 2020-09-14' - ' 2020-09-18' - ' Castiglioncello , Tuscany, Italy' - ' C20-09-14.1' - '' - ) # record/1781388 + snippet = ( # record/1781388 + ' 10th Int.' + ' Conf. DICE2020: Spacetime - Matter - Quantum Mechanics ' + ' DICE2020 2020-09-14 2020-09-18 Castiglioncello ,' + ' Tuscany, Italy C20-09-14.1' + ) expected = [ { @@ -271,15 +259,14 @@ def test_addresses_from_111__a_c_d_e_g_x_y_many_address_parts(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( - '' - ' Higher structures in Holomorphic and Topological Field Theory' - ' 2019-01-14' - ' 2019-01-18' - ' IHES, Bures-sur-Yvette, Paris area, France' - ' C19-01-14.1' - '' - ) # record/1699363 + snippet = ( # record/1699363 + ' Higher' + ' structures in Holomorphic and Topological Field Theory ' + ' 2019-01-14 2019-01-18 IHES,' + ' Bures-sur-Yvette, Paris area, France C19-01-14.1' + ) expected = [ { @@ -301,11 +288,11 @@ def test_addresses_from_270__b(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( + snippet = ( # record/1430104 '' ' British Columbia' '' - ) # record/1430104 + ) expected = [ {'place_name': 'British Columbia'}, @@ -320,21 +307,17 @@ def test_addresses_from_111__a_c_e_g_x_y_and_270__b(): schema = load_schema('conferences') subschema = schema['properties']['addresses'] - snippet = ( - '' - ' ' - ' 2017 International Workshop on Baryon and Lepton Number Violation: From the Cosmos to the LHC' - ' Cleveland, Ohio, USA' - ' BLV 2017' - ' C17-05-15' - ' 2017-05-15' - ' 2017-05-18' - ' ' - ' ' - ' Case Western Reserve University' - ' ' - '' - ) # record/1353313 + snippet = ( # record/1353313 + ' 2017 International Workshop on Baryon and Lepton Number' + ' Violation: From the Cosmos to the LHC Cleveland, Ohio, USA BLV' + ' 2017 C17-05-15 ' + ' 2017-05-15 2017-05-18 Case Western Reserve' + ' University ' + ) expected = [ { @@ -408,11 +391,11 @@ def test_opening_date_from_111__x_handles_incomplete_dates_with_year_and_month() schema = load_schema('conferences') subschema = schema['properties']['opening_date'] - snippet = ( + snippet = ( # record/1442284 '' ' 2001-02-00' '' - ) # record/1442284 + ) expected = '2001-02' result = conferences.do(create_record(snippet)) @@ -421,15 +404,15 @@ def test_opening_date_from_111__x_handles_incomplete_dates_with_year_and_month() assert expected == result['opening_date'] -def test_opening_date_from_111__x_handles_incomplete_dates_with_year_and_month_without_padding(): +def test_opening_date_from_111__x_handles_incomplete_dates(): schema = load_schema('conferences') subschema = schema['properties']['opening_date'] - snippet = ( + snippet = ( # record/1477158 '' ' 1999-07' '' - ) # record/1477158 + ) expected = '1999-07' result = conferences.do(create_record(snippet)) @@ -442,11 +425,11 @@ def test_opening_date_from_111__x_handles_unseparated_dates(): schema = load_schema('conferences') subschema = schema['properties']['opening_date'] - snippet = ( + snippet = ( # record/1280577 '' ' 20140518' '' - ) # record/1280577 + ) expected = '2014-05-18' result = conferences.do(create_record(snippet)) @@ -459,11 +442,11 @@ def test_closing_date_from_111__y_handles_incomplete_dates_with_only_year(): schema = load_schema('conferences') subschema = schema['properties']['closing_date'] - snippet = ( + snippet = ( # record/1372837 '' ' 1967-00-00' '' - ) # record/1372837 + ) expected = '1967' result = conferences.do(create_record(snippet)) @@ -476,12 +459,12 @@ def test_contact_details_from_270__m_p(): schema = load_schema('conferences') subschema = schema['properties']['contact_details'] - snippet = ( + snippet = ( # record/1517305 '' ' jonivar@thphys.nuim.ie' ' Jon-Ivar Skullerud' '' - ) # record/1517305 + ) expected = [ { @@ -499,11 +482,11 @@ def test_series_from_411__a(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/1430017 '' ' DPF Series' '' - ) # record/1430017 + ) expected = [ {'name': 'DPF Series'}, @@ -515,11 +498,11 @@ def test_series_from_411__a(): def test_series_from_411__n(): - snippet = ( + snippet = ( # record/1447029 '' ' 7' '' - ) # record/1447029 + ) result = conferences.do(create_record(snippet)) @@ -530,12 +513,12 @@ def test_series_from_411__a_n(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/1468357 '' ' FPCP' ' 16' '' - ) # record/1468357 + ) expected = [ { @@ -553,7 +536,7 @@ def test_series_from_411__a_n_and_411__a(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/1404073 '' ' ' ' Rencontres de Moriond' @@ -563,7 +546,7 @@ def test_series_from_411__a_n_and_411__a(): ' Moriond EW' ' ' '' - ) # record/1404073 + ) expected = [ { @@ -584,7 +567,7 @@ def test_series_from_411__a_n_and_411__n(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/963769 '' ' ' ' SSI' @@ -594,7 +577,7 @@ def test_series_from_411__a_n_and_411__n(): ' 2' ' ' '' - ) # record/963769 + ) expected = [ { @@ -612,7 +595,7 @@ def test_series_from_double_411__a_n(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/974856 '' ' ' ' ICHEP' @@ -623,7 +606,7 @@ def test_series_from_double_411__a_n(): ' 5' ' ' '' - ) # record/974856 + ) expected = [ { @@ -645,7 +628,7 @@ def test_series_from_411__n_and_411__a_n(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/963914 '' ' ' ' 3' @@ -655,7 +638,7 @@ def test_series_from_411__n_and_411__a_n(): ' 3' ' ' '' - ) # record/963914 + ) expected = [ { @@ -673,7 +656,7 @@ def test_series_from_411__n_and_411__a(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/972145 '' ' ' ' 3' @@ -682,7 +665,7 @@ def test_series_from_411__n_and_411__a(): ' Gordon' ' ' '' - ) # record/972145 + ) expected = [ { @@ -700,7 +683,7 @@ def test_series_from_double_411__a(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/964177 '' ' ' ' SNPS' @@ -709,7 +692,7 @@ def test_series_from_double_411__a(): ' NSS' ' ' '' - ) # record/964177 + ) expected = [ { @@ -729,7 +712,7 @@ def test_series_from_411__a_and_411__a_n(): schema = load_schema('conferences') subschema = schema['properties']['series'] - snippet = ( + snippet = ( # record/964448 '' ' ' ' CEC' @@ -739,7 +722,7 @@ def test_series_from_411__a_and_411__a_n(): ' 2' ' ' '' - ) # record/964448 + ) expected = [ { @@ -760,11 +743,10 @@ def test_public_notes_from_500__a(): schema = load_schema('conferences') subschema = schema['properties']['public_notes'] - snippet = ( - '' - ' Same conf. as Kyoto 1975: none in intervening years' - '' - ) # record/963579 + snippet = ( # record/963579 + ' Same conf.' + ' as Kyoto 1975: none in intervening years' + ) expected = [ {'value': 'Same conf. as Kyoto 1975: none in intervening years'}, @@ -779,19 +761,23 @@ def test_public_notes_from_double_500__a(): schema = load_schema('conferences') subschema = schema['properties']['public_notes'] - snippet = ( - '' - ' ' - ' Marion White, PhD (Argonne) Conference Chair Vladimir Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power (Argonne) Conference Editor/Scientific Secretariat' - ' ' - ' ' - ' Will be published in: JACoW' - ' ' - '' - ) # record/1445071 + snippet = ( # record/1445071 + ' Marion White, PhD (Argonne) Conference Chair Vladimir' + ' Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power (Argonne)' + ' Conference Editor/Scientific Secretariat ' + ' Will be' + ' published in: JACoW ' + ) expected = [ - {'value': 'Marion White, PhD (Argonne) Conference Chair Vladimir Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power (Argonne) Conference Editor/Scientific Secretariat'}, + { + 'value': ( + 'Marion White, PhD (Argonne) Conference Chair Vladimir' + ' Shiltsev, PhD (FNAL) Scientific Program Chair Maria Power' + ' (Argonne) Conference Editor/Scientific Secretariat' + ) + }, {'value': 'Will be published in: JACoW'}, ] result = conferences.do(create_record(snippet)) @@ -804,14 +790,36 @@ def test_short_description_from_520__a(): schema = load_schema('conferences') subschema = schema['properties']['short_description'] - snippet = ( - '' - ' QNP2015 is the Seventh International Conference on Quarks and Nuclear Physics. It is anticipated that QCD practitioners, both experimentalists and theorists, will gather at the Universidad Técnica Federico Santa María, in Valparaíso, Chile during the week of March 2, 2015 to present and discuss the latest advances in the field. The following topics will be covered: quarks and gluons content of nucleons and nuclei, hadron spectroscopy, non-perturbative methods in QCD (including lattice calculations), effective field theories, nuclear matter under extreme conditions and nuclear medium. Participants should register at the conference website https://indico.cern.ch/event/304663/' - '' - ) # record/1326067 + snippet = ( # record/1326067 + ' QNP2015 is' + ' the Seventh International Conference on Quarks and Nuclear Physics.' + ' It is anticipated that QCD practitioners, both experimentalists and' + ' theorists, will gather at the Universidad Técnica Federico Santa' + ' María, in Valparaíso, Chile during the week of March 2, 2015 to' + ' present and discuss the latest advances in the field. The following' + ' topics will be covered: quarks and gluons content of nucleons and' + ' nuclei, hadron spectroscopy, non-perturbative methods in QCD' + ' (including lattice calculations), effective field theories, nuclear' + ' matter under extreme conditions and nuclear medium. Participants' + ' should register at the conference website' + ' https://indico.cern.ch/event/304663/' + ) expected = { - 'value': u'QNP2015 is the Seventh International Conference on Quarks and Nuclear Physics. It is anticipated that QCD practitioners, both experimentalists and theorists, will gather at the Universidad Técnica Federico Santa María, in Valparaíso, Chile during the week of March 2, 2015 to present and discuss the latest advances in the field. The following topics will be covered: quarks and gluons content of nucleons and nuclei, hadron spectroscopy, non-perturbative methods in QCD (including lattice calculations), effective field theories, nuclear matter under extreme conditions and nuclear medium. Participants should register at the conference website https://indico.cern.ch/event/304663/', + 'value': ( + u'QNP2015 is the Seventh International Conference on Quarks and' + u' Nuclear Physics. It is anticipated that QCD practitioners, both' + u' experimentalists and theorists, will gather at the Universidad' + u' Técnica Federico Santa María, in Valparaíso, Chile during the' + u' week of March 2, 2015 to present and discuss the latest advances' + u' in the field. The following topics will be covered: quarks and' + u' gluons content of nucleons and nuclei, hadron spectroscopy,' + u' non-perturbative methods in QCD (including lattice' + u' calculations), effective field theories, nuclear matter under' + u' extreme conditions and nuclear medium. Participants should' + u' register at the conference website' + u' https://indico.cern.ch/event/304663/' + ), } result = conferences.do(create_record(snippet)) @@ -823,25 +831,46 @@ def test_short_description_from_multiple_520__a(): schema = load_schema('conferences') subschema = schema['properties']['short_description'] - snippet = ( - '' - ' ' - ' The alliance "Physics at the Terascale" will host "Proton Structure in the LHC Era", from 29 September - 2 October, 2014 at DESY in Hamburg. The planned structure will be a 2 day SCHOOL (Monday-Tuesday) followed by a 2 day WORKSHOP (Wednesday-Thursday) devoted to the current problems of the LHC data interpretation, related to the particularities of QCD, factorization, proton structure and higher order calculations.' - ' ' - ' ' - ' SCHOOL: (Monday-Tuesday, September 29-30, 2014) The school will address mainly Ph.D. students and postdocs working at the LHC experiments. It includes introductory lectures, accompanied by tutorials in HERAFitter, FastNLO, Applgrid and further tools.' - ' ' - ' ' - ' WORKSHOP: (Wednesday-Thursday, October 1-2, 2014) The following workshop will encompass the open issues in theory and experiment concerning the determination of PDFs, heavy quark masses and strong coupling. The workshop will run as an open session and is more expert-oriented' - ' ' - '' - ) # record/1288023 + snippet = ( # record/1288023 + ' The alliance "Physics at the Terascale" will host "Proton' + ' Structure in the LHC Era", from 29 September - 2 October, 2014 at' + ' DESY in Hamburg. The planned structure will be a 2 day SCHOOL' + ' (Monday-Tuesday) followed by a 2 day WORKSHOP (Wednesday-Thursday)' + ' devoted to the current problems of the LHC data interpretation,' + ' related to the particularities of QCD, factorization, proton' + ' structure and higher order calculations. ' + ' SCHOOL:' + ' (Monday-Tuesday, September 29-30, 2014) The school will address' + ' mainly Ph.D. students and postdocs working at the LHC experiments. It' + ' includes introductory lectures, accompanied by tutorials in' + ' HERAFitter, FastNLO, Applgrid and further tools. ' + ' WORKSHOP: (Wednesday-Thursday, October 1-2, 2014) The' + ' following workshop will encompass the open issues in theory and' + ' experiment concerning the determination of PDFs, heavy quark masses' + ' and strong coupling. The workshop will run as an open session and is' + ' more expert-oriented ' + ) expected = { 'value': ( - 'The alliance "Physics at the Terascale" will host "Proton Structure in the LHC Era", from 29 September - 2 October, 2014 at DESY in Hamburg. The planned structure will be a 2 day SCHOOL (Monday-Tuesday) followed by a 2 day WORKSHOP (Wednesday-Thursday) devoted to the current problems of the LHC data interpretation, related to the particularities of QCD, factorization, proton structure and higher order calculations.\n' - 'SCHOOL: (Monday-Tuesday, September 29-30, 2014) The school will address mainly Ph.D. students and postdocs working at the LHC experiments. It includes introductory lectures, accompanied by tutorials in HERAFitter, FastNLO, Applgrid and further tools.\n' - 'WORKSHOP: (Wednesday-Thursday, October 1-2, 2014) The following workshop will encompass the open issues in theory and experiment concerning the determination of PDFs, heavy quark masses and strong coupling. The workshop will run as an open session and is more expert-oriented' + 'The alliance "Physics at the Terascale" will host "Proton' + ' Structure in the LHC Era", from 29 September - 2 October, 2014 at' + ' DESY in Hamburg. The planned structure will be a 2 day SCHOOL' + ' (Monday-Tuesday) followed by a 2 day WORKSHOP' + ' (Wednesday-Thursday) devoted to the current problems of the LHC' + ' data interpretation, related to the particularities of QCD,' + ' factorization, proton structure and higher order' + ' calculations.\nSCHOOL: (Monday-Tuesday, September 29-30, 2014)' + ' The school will address mainly Ph.D. students and postdocs' + ' working at the LHC experiments. It includes introductory' + ' lectures, accompanied by tutorials in HERAFitter, FastNLO,' + ' Applgrid and further tools.\nWORKSHOP: (Wednesday-Thursday,' + ' October 1-2, 2014) The following workshop will encompass the open' + ' issues in theory and experiment concerning the determination of' + ' PDFs, heavy quark masses and strong coupling. The workshop will' + ' run as an open session and is more expert-oriented' ), } result = conferences.do(create_record(snippet)) @@ -854,11 +883,11 @@ def test_alternative_titles_from_711__a(): schema = load_schema('conferences') subschema = schema['properties']['alternative_titles'] - snippet = ( + snippet = ( # record/1436454 '' ' GCACSE16' '' - ) # record/1436454 + ) expected = [ {'title': 'GCACSE16'}, @@ -873,7 +902,7 @@ def test_alternative_titles_from_double_711__a(): schema = load_schema('conferences') subschema = schema['properties']['alternative_titles'] - snippet = ( + snippet = ( # record/1436454 '' ' ' ' GCACSE16' @@ -882,7 +911,7 @@ def test_alternative_titles_from_double_711__a(): ' GCACSE 2016' ' ' '' - ) # record/1436454 + ) expected = [ {'title': 'GCACSE16'}, @@ -898,12 +927,11 @@ def test_alternative_titles_from_711__a_b(): schema = load_schema('conferences') subschema = schema['properties']['alternative_titles'] - snippet = ( - '' - ' XX Riunione Nazionale di Elettromagnetismo' - ' Padova' - '' - ) # record/1403856 + snippet = ( # record/1403856 + ' XX' + ' Riunione Nazionale di Elettromagnetismo Padova' + ) expected = [ {'title': 'XX Riunione Nazionale di Elettromagnetismo'}, @@ -919,11 +947,11 @@ def test_core_from_980__a(): schema = load_schema('conferences') subschema = schema['properties']['core'] - snippet = ( + snippet = ( # record/1707423 '' ' CORE' '' - ) # record/1707423 + ) expected = True result = conferences.do(create_record(snippet)) @@ -936,7 +964,7 @@ def test_core_from_980__a_b(): schema = load_schema('conferences') subschema = schema['properties']['core'] - snippet = ( + snippet = ( # record/1726216 '' ' ' ' CONFERENCES' @@ -945,7 +973,7 @@ def test_core_from_980__a_b(): ' CORE' ' ' '' - ) # record/1726216 + ) expected = True result = conferences.do(create_record(snippet)) @@ -975,7 +1003,7 @@ def test_keywords_from_6531_9_a(): schema = load_schema('conferences') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1713483 '' ' ' ' submitter' @@ -986,7 +1014,7 @@ def test_keywords_from_6531_9_a(): ' standard model' ' ' '' - ) # record/1713483 + ) expected = [ {'source': 'submitter', 'value': 'electroweak'}, diff --git a/tests/test_data.py b/tests/test_data.py index 057ff10d..cc22f77b 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -23,21 +23,21 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.data import data -from inspire_schemas.api import load_schema, validate def test_dois_from_0247_2_a(): schema = load_schema('data') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # record/1639676 '' ' 10.17182/hepdata.77268.v1/t6' ' DOI' '' - ) # record/1639676 + ) expected = [ { @@ -54,11 +54,11 @@ def test_new_record_from_970__d(): schema = load_schema('data') subschema = schema['properties']['new_record'] - snippet = ( + snippet = ( # synthetic data '' ' 361769' '' - ) # synthetic data + ) expected = {'$ref': 'http://localhost:5000/api/data/361769'} result = data.do(create_record(snippet)) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 91c04aeb..bcf0f008 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -23,9 +23,9 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.experiments import experiments -from inspire_schemas.api import load_schema, validate def test_dates_from_046__q_s_and_046__r(): @@ -34,7 +34,7 @@ def test_dates_from_046__q_s_and_046__r(): date_approved_schema = schema['properties']['date_approved'] date_started_schema = schema['properties']['date_started'] - snippet = ( + snippet = ( # record/1318099 '' ' ' ' 2009-08-19' @@ -44,7 +44,7 @@ def test_dates_from_046__q_s_and_046__r(): ' 2009-10-08' ' ' '' - ) # record/1318099 + ) expected_date_proposed = '2009-08-19' expected_date_approved = '2009-10-08' @@ -66,7 +66,7 @@ def test_dates_from_046__q_and_046__r_and_046__x(): date_proposed_schema = schema['properties']['date_proposed'] date_approved_schema = schema['properties']['date_approved'] - snippet = ( + snippet = ( # record/1108188 '' ' ' ' 2010' @@ -78,7 +78,7 @@ def test_dates_from_046__q_and_046__r_and_046__x(): ' yes' ' ' '' - ) # record/1108188 + ) expected_date_proposed = '2010' expected_date_approved = '2011-03-18' @@ -96,7 +96,7 @@ def test_dates_from_046__s_and_046__t_and_046__x(): date_started_schema = schema['properties']['date_started'] date_completed_schema = schema['properties']['date_completed'] - snippet = ( + snippet = ( # record/1108324 '' ' ' ' 1996' @@ -108,7 +108,7 @@ def test_dates_from_046__s_and_046__t_and_046__x(): ' yes' ' ' '' - ) # record/1108324 + ) expected_date_started = '1996' expected_date_completed = '2002' @@ -125,12 +125,12 @@ def test_dates_from_046__c_x(): schema = load_schema('experiments') subschema = schema['properties']['date_cancelled'] - snippet = ( + snippet = ( # record/1110624 '' ' 2000' ' no' '' - ) # record/1110624 + ) expected = '2000' result = experiments.do(create_record(snippet)) @@ -144,13 +144,13 @@ def test_legacy_name_and_institutions_from_119__a_u_z(): legacy_name_schema = schema['properties']['legacy_name'] institutions_schema = schema['properties']['institutions'] - snippet = ( + snippet = ( # record/1108206 '' ' CERN-ALPHA' ' CERN' ' 902725' '' - ) # record/1108206 + ) expected_legacy_name = 'CERN-ALPHA' expected_institutions = [ @@ -176,7 +176,7 @@ def test_legacy_name_and_institutions_from_119__a_and_multiple_119__u_z(): legacy_name_schema = schema['properties']['legacy_name'] institutions_schema = schema['properties']['institutions'] - snippet = ( + snippet = ( # record/1228417 '' ' ' ' LATTICE-UKQCD' @@ -214,7 +214,7 @@ def test_legacy_name_and_institutions_from_119__a_and_multiple_119__u_z(): ' 903240' ' ' '' - ) # record/1228417 + ) expected_legacy_name = 'LATTICE-UKQCD' expected_institutions = [ @@ -284,14 +284,14 @@ def test_legacy_name_and_institutions_from_119__a_and_multiple_119__u_z(): assert expected_institutions == result['institutions'] -def test_accelerator_and_legacy_name_and_experiment_and_institutions_from_119__a_b_c_d_u_z(): +def test_accel_legacy_name_exp_inst_from_119__a_b_c_d_u_z(): schema = load_schema('experiments') accelerator_schema = schema['properties']['accelerator'] legacy_name_schema = schema['properties']['legacy_name'] experiment_schema = schema['properties']['experiment'] institutions_schema = schema['properties']['institutions'] - snippet = ( + snippet = ( # record/1617971 '' ' ASAS-SN' ' NON' @@ -300,7 +300,7 @@ def test_accelerator_and_legacy_name_and_experiment_and_institutions_from_119__a ' Ohio State U.' ' 903092' '' - ) # record/1617971 + ) expected_accelerator = {'value': 'NON'} expected_legacy_name = 'ASAS-SN' @@ -336,11 +336,11 @@ def test_long_name_from_245__a(): schema = load_schema('experiments') subschema = schema['properties']['long_name'] - snippet = ( + snippet = ( # record/1108206 '' ' The ALPHA experiment' '' - ) # record/1108206 + ) expected = 'The ALPHA experiment' result = experiments.do(create_record(snippet)) @@ -353,12 +353,12 @@ def test_inspire_classification_from_372__a_9(): schema = load_schema('experiments') subschema = schema['properties']['inspire_classification'] - snippet = ( + snippet = ( # record/1110577 '' ' INSPIRE' ' 5.3' '' - ) # record/1110577 + ) expected = [ 'Cosmic ray/Gamma ray experiments|Satellite', @@ -370,11 +370,11 @@ def test_inspire_classification_from_372__a_9(): def test_inspire_classification_from_372__a_ignores_non_numerical_values(): - snippet = ( + snippet = ( # record/1108515 '' ' ATLAS' '' - ) # record/1108515 + ) result = experiments.do(create_record(snippet)) @@ -385,11 +385,11 @@ def test_name_variants_from_419__a(): schema = load_schema('experiments') subschema = schema['properties']['name_variants'] - snippet = ( + snippet = ( # record/1108206 '' ' ALPHA' '' - ) # record/1108206 + ) expected = [ 'ALPHA', @@ -408,7 +408,9 @@ def test_long_name_and_name_variants_from_245__a_and_419__a(): snippet = ( '' ' ' - r' Proposal to measure the very rare kaon decay $K^+ \to \pi^+ \nu \bar{\nu}$' + r' Proposal to measure the very rare kaon decay' + r' $K^+ \to' + r' \pi^+ \nu \bar{\nu}$' ' ' ' ' ' P-326' @@ -416,7 +418,10 @@ def test_long_name_and_name_variants_from_245__a_and_419__a(): '' ) # record/1275752 - expected_long_name = r'Proposal to measure the very rare kaon decay $K^+ \to \pi^+ \nu \bar{\nu}$' + expected_long_name = ( + r'Proposal to measure the very rare kaon decay $K^+ \to \pi^+ \nu' + r' \bar{\nu}$' + ) expected_name_variants = [ 'P-326', ] @@ -433,14 +438,31 @@ def test_description_from_520__a(): schema = load_schema('experiments') subschema = schema['properties']['description'] - snippet = ( - '' - ' The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.' - '' - ) # record/1108188 + snippet = ( # record/1108188 + ' The Muon' + ' Accelerator Program (MAP) was created in 2010 to unify the DOE' + ' supported R&D in the U.S. aimed at developing the concepts and' + ' technologies required for Muon Colliders and Neutrino Factories.' + ' These muon based facilities have the potential to discover and' + ' explore new exciting fundamental physics, but will require the' + ' development of demanding technologies and innovative concepts. The' + ' MAP aspires to prove the feasibility of a Muon Collider within a few' + ' years, and to make significant contributions to the international' + ' effort devoted to developing Neutrino Factories. MAP was formally' + ' approved on March 18, 2011.' + ) expected = ( - 'The Muon Accelerator Program (MAP) was created in 2010 to unify the DOE supported R&D in the U.S. aimed at developing the concepts and technologies required for Muon Colliders and Neutrino Factories. These muon based facilities have the potential to discover and explore new exciting fundamental physics, but will require the development of demanding technologies and innovative concepts. The MAP aspires to prove the feasibility of a Muon Collider within a few years, and to make significant contributions to the international effort devoted to developing Neutrino Factories. MAP was formally approved on March 18, 2011.' + 'The Muon Accelerator Program (MAP) was created in 2010 to unify the' + ' DOE supported R&D in the U.S. aimed at developing the concepts and' + ' technologies required for Muon Colliders and Neutrino Factories.' + ' These muon based facilities have the potential to discover and' + ' explore new exciting fundamental physics, but will require the' + ' development of demanding technologies and innovative concepts. The' + ' MAP aspires to prove the feasibility of a Muon Collider within a few' + ' years, and to make significant contributions to the international' + ' effort devoted to developing Neutrino Factories. MAP was formally' + ' approved on March 18, 2011.' ) result = experiments.do(create_record(snippet)) @@ -453,28 +475,51 @@ def test_description_from_multiple_520__a(): schema = load_schema('experiments') subschema = schema['properties']['description'] - snippet = ( - '' - ' ' - ' DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002' - ' ' - ' ' - ' ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)' - ' ' - ' ' - ' iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003' - ' ' - ' ' - ' v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.' - ' ' - '' - ) # record/1110568 + snippet = ( # record/1110568 + ' DAMA is an observatory for rare processes which develops and' + ' uses several low-background set-ups at the Gran Sasso National' + ' Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are:' + ' i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which' + ' completed its data taking on July 2002 ' + ' ii)' + ' DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe' + ' or in 136Xe) iii) DAMA/R&D, devoted to tests' + ' on prototypes and to small scale experiments, mainly on the' + ' investigations of double beta decay modes in various isotopes. iv)' + ' the second generation DAMA/LIBRA set-up (about 250 kg highly' + ' radiopure NaI(Tl)) in operation since March 2003 ' + ' v) the low background DAMA/Ge detector mainly devoted to' + ' sample measurements: in some measurements on rare processes the' + ' low-background Germanium detectors of the LNGS facility are also' + ' used. Moreover, a third generation R&D is in progress towards a' + ' possible 1 ton set-up, DAMA proposed in 1996. In particular, the' + ' DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of' + ' Dark Matter particles in the galactic halo by exploiting the Dark' + ' Matter annual modulation signature. ' + ) expected = ( - 'DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002\n' - 'ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)\n' - 'iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003\n' - 'v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.' + 'DAMA is an observatory for rare processes which develops and uses' + ' several low-background set-ups at the Gran Sasso National Laboratory' + ' of the I.N.F.N. (LNGS). The main experimental set-ups are: i)' + ' DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed' + ' its data taking on July 2002\nii) DAMA/LXe (about 6.5 kg liquid' + ' Kr-free Xenon enriched either in 129Xe or in 136Xe)\niii) DAMA/R&D,' + ' devoted to tests on prototypes and to small scale experiments, mainly' + ' on the investigations of double beta decay modes in various isotopes.' + ' iv) the second generation DAMA/LIBRA set-up (about 250 kg highly' + ' radiopure NaI(Tl)) in operation since March 2003\nv) the low' + ' background DAMA/Ge detector mainly devoted to sample measurements: in' + ' some measurements on rare processes the low-background Germanium' + ' detectors of the LNGS facility are also used. Moreover, a third' + ' generation R&D is in progress towards a possible 1 ton set-up, DAMA' + ' proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA' + ' set-ups have investigated the presence of Dark Matter particles in' + ' the galactic halo by exploiting the Dark Matter annual modulation' + ' signature.' ) result = experiments.do(create_record(snippet)) @@ -486,7 +531,7 @@ def test_related_records_from_double_510__a_w_0_accepts_predecessors(): schema = load_schema('experiments') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1386519 '' ' ' ' 1108293' @@ -499,7 +544,7 @@ def test_related_records_from_double_510__a_w_0_accepts_predecessors(): ' a' ' ' '' - ) # record/1386519 + ) expected = [ { @@ -526,13 +571,13 @@ def test_related_records_from_double_510__a_w_0_accepts_predecessors(): def test_related_records_from_510__a_w_0_accepts_successors(): schema = load_schema('experiments') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1108192 '' ' 1262631' ' LZ' ' b' '' - ) # record/1108192 + ) expected = [ { @@ -552,12 +597,12 @@ def test_collaboration_from_710__g_0(): schema = load_schema('experiments') subschema = schema['properties']['collaboration'] - snippet = ( + snippet = ( # record/1108199 '' ' DarkSide' ' 1108199' '' - ) # record/1108199 + ) expected = { 'curated_relation': True, @@ -576,7 +621,7 @@ def test_collaboration_from_710__g_q(): schema = load_schema('experiments') subschema = schema['properties']['collaboration'] - snippet = ( + snippet = ( # record/1108642 '' ' CMS' ' ECAL' @@ -589,7 +634,7 @@ def test_collaboration_from_710__g_q(): ' Silicon Tracker' ' Tracker' '' - ) # record/1108642 + ) expected = { 'value': 'CMS', @@ -616,7 +661,7 @@ def test_core_from_multiple_980__a(): schema = load_schema('experiments') subschema = schema['properties']['core'] - snippet = ( + snippet = ( # record/1332131 '' ' ' ' CORE' @@ -625,7 +670,7 @@ def test_core_from_multiple_980__a(): ' EXPERIMENT' ' ' '' - ) # record/1332131 + ) expected = True result = experiments.do(create_record(snippet)) @@ -638,7 +683,7 @@ def test_project_type_from_double_980__a_recognizes_accelerators(): schema = load_schema('experiments') subschema = schema['properties']['project_type'] - snippet = ( + snippet = ( # record/1607855 '' ' ' ' ACCELERATOR' @@ -647,7 +692,7 @@ def test_project_type_from_double_980__a_recognizes_accelerators(): ' EXPERIMENT' ' ' '' - ) # record/1607855 + ) expected = [ 'accelerator', @@ -662,11 +707,11 @@ def test_deleted_from_980__c(): schema = load_schema('hep') subschema = schema['properties']['deleted'] - snippet = ( + snippet = ( # synthetic data '' ' DELETED' '' - ) # synthetic data + ) expected = True result = experiments.do(create_record(snippet)) diff --git a/tests/test_experiments_model.py b/tests/test_experiments_model.py index 08251b67..488655a8 100644 --- a/tests/test_experiments_model.py +++ b/tests/test_experiments_model.py @@ -22,8 +22,8 @@ from __future__ import absolute_import, division, print_function -from inspire_dojson.model import FilterOverdo from inspire_dojson.experiments.model import add_project_type +from inspire_dojson.model import FilterOverdo def test_add_project_type(): diff --git a/tests/test_hep_bd0xx.py b/tests/test_hep_bd0xx.py index 618258ac..919006ab 100644 --- a/tests/test_hep_bd0xx.py +++ b/tests/test_hep_bd0xx.py @@ -23,20 +23,20 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_isbns_from_020__a(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1510325 '' ' 9780198759713' '' - ) # record/1510325 + ) expected = [ {'value': '9780198759713'}, @@ -58,11 +58,11 @@ def test_isbns_from_020__a_handles_capital_x(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1230427 '' ' 069114558X' '' - ) # record/1230427 + ) expected = [ {'value': '9780691145587'}, @@ -84,12 +84,12 @@ def test_isbns_from_020__a_b_normalizes_online(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1504286 '' ' 978-94-024-0999-4' ' Online' '' - ) # record/1504286 + ) expected = [ { @@ -117,12 +117,12 @@ def test_isbns_from_020__a_b_normalizes_print(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1509456 '' ' 9781786341105' ' Print' '' - ) # record/1509456 + ) expected = [ { @@ -150,12 +150,12 @@ def test_isbns_from_020__a_b_normalizes_electronic(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1292006 '' ' 9783319006260' ' electronic version' '' - ) # record/1292006 + ) expected = [ { @@ -181,12 +181,12 @@ def test_isbns_from_020__a_b_normalizes_ebook(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1430829 '' ' 9783319259017' ' eBook' '' - ) # record/1430829 + ) expected = [ { @@ -214,12 +214,12 @@ def test_isbns_from_020__a_b_normalizes_hardcover(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1351311 '' ' 978-981-4571-66-1' ' hardcover' '' - ) # record/1351311 + ) expected = [ { @@ -247,12 +247,12 @@ def test_isbns_from_020__a_b_handles_dots(): schema = load_schema('hep') subschema = schema['properties']['isbns'] - snippet = ( + snippet = ( # record/1426768 '' ' 978.90.9023556.1' ' Online' '' - ) # record/1426768 + ) result = hep.do(create_record(snippet)) @@ -263,14 +263,14 @@ def test_dois_from_0247_a_2_double_9_ignores_curator_source(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # record/1117362 '' ' DOI' ' bibcheck' ' CURATOR' ' 10.1590/S1806-11172008005000006' '' - ) # record/1117362 + ) expected = [ { @@ -299,12 +299,12 @@ def test_dois_from_0247_a_2(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # record/1302395 '' ' DOI' ' 10.1088/0264-9381/31/24/245004' '' - ) # record/1302395 + ) expected = [ {'value': '10.1088/0264-9381/31/24/245004'}, @@ -329,7 +329,7 @@ def test_dois_from_0247_a_2_9_and_0247_a_2(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # record/1286727 '' ' ' ' DOI' @@ -341,7 +341,7 @@ def test_dois_from_0247_a_2_9_and_0247_a_2(): ' 10.1088/1475-7516/2015/03/044' ' ' '' - ) # record/1286727 + ) expected = [ { @@ -377,7 +377,7 @@ def test_dois_from_0247_a_2_and_0247_a_2_9(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # record/1273665 '' ' ' ' DOI' @@ -389,7 +389,7 @@ def test_dois_from_0247_a_2_and_0247_a_2_9(): ' 10.1103/PhysRevD.91.019903' ' ' '' - ) # record/1273665 + ) expected = [ { @@ -425,14 +425,14 @@ def test_dois_from_0247_a_q_2_9_normalizes_erratum(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # record/898839 '' ' DOI' ' bibmatch' ' 10.1103/PhysRevC.93.049901' ' Erratum' '' - ) # record/898839 + ) expected = [ { @@ -463,13 +463,13 @@ def test_dois_from_0247_a_q_2_normalizes_ebook(): schema = load_schema('hep') subschema = schema['properties']['dois'] - snippet = ( + snippet = ( # record/1509573 '' ' DOI' ' 10.1017/CBO9780511813924' ' ebook' '' - ) # record/1509573 + ) expected = [ { @@ -498,12 +498,12 @@ def test_persistent_identifiers_from_0247_a_2(): schema = load_schema('hep') subschema = schema['properties']['persistent_identifiers'] - snippet = ( + snippet = ( # record/1623117 '' ' HDL' ' 10150/625467' '' - ) # record/1623117 + ) expected = [ { @@ -531,12 +531,12 @@ def test_texkeys_from_035__a_9(): schema = load_schema('hep') subschema = schema['properties']['texkeys'] - snippet = ( + snippet = ( # record/1403324 '' ' INSPIRETeX' ' Hagedorn:1963hdh' '' - ) # record/1403324 + ) expected = [ 'Hagedorn:1963hdh', @@ -561,7 +561,7 @@ def test_texkeys_from_035__z_9_and_035__a_9(): schema = load_schema('hep') subschema = schema['properties']['texkeys'] - snippet = ( + snippet = ( # record/1498308 '' ' ' ' SPIRESTeX' @@ -572,10 +572,10 @@ def test_texkeys_from_035__z_9_and_035__a_9(): ' Akiba:2016ofq' ' ' '' - ) # record/1498308 + ) expected = [ - 'Akiba:2016ofq', # XXX: the first one is the one coming + 'Akiba:2016ofq', # XXX: the first one is the one coming 'N.Cartiglia:2015cn', # from the "a" field. ] result = hep.do(create_record(snippet)) @@ -591,7 +591,7 @@ def test_texkeys_from_035__z_9_and_035__a_9(): { '9': 'INSPIRETeX', 'z': 'N.Cartiglia:2015cn', - } + }, ] result = hep2marc.do(result) @@ -602,12 +602,12 @@ def test_desy_bookkeekping_from_035__z_9_DESY(): schema = load_schema('hep') subschema = schema['properties']['_desy_bookkeeping'] - snippet = ( + snippet = ( # record/1635310 '' ' DESY' ' DA17-kp47ch' '' - ) # record/1635310 + ) expected = [ { @@ -634,7 +634,7 @@ def test_desy_bookkeekping_from_035__z_9_DESY_and_595_Da_d_s(): schema = load_schema('hep') subschema = schema['properties']['_desy_bookkeeping'] - snippet = ( + snippet = ( # record/1635310 '' ' ' ' DESY' @@ -646,7 +646,7 @@ def test_desy_bookkeekping_from_035__z_9_DESY_and_595_Da_d_s(): ' abs' ' ' '' - ) # record/1635310 + ) expected = [ { @@ -683,12 +683,12 @@ def test_desy_bookkeekping_from_035__z_9_DESY_and_595_Da_d_s(): def test_external_system_identifiers_from_035__a_9_discards_arxiv(): - snippet = ( + snippet = ( # record/1498308 '' ' arXiv' ' oai:arXiv.org:1611.05079' '' - ) # record/1498308 + ) result = hep.do(create_record(snippet)) @@ -699,12 +699,12 @@ def test_external_system_identifiers_from_035__z_9_handles_cernkey(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # record/451647 '' ' CERNKEY' ' 0263439' '' - ) # record/451647 + ) expected = [ { @@ -729,7 +729,7 @@ def test_external_system_identifiers_from_035__z_9_handles_cernkey(): def test_external_system_numbers_from_035__a_d_h_m_9_ignores_oai(): - snippet = ( + snippet = ( # record/1403324 '' ' http://cds.cern.ch/oai2d' ' oai:cds.cern.ch:325030' @@ -737,7 +737,7 @@ def test_external_system_numbers_from_035__a_d_h_m_9_ignores_oai(): ' 2015-11-09T16:22:48Z' ' marcxml' '' - ) # record/1403324 + ) result = hep.do(create_record(snippet)) @@ -760,12 +760,12 @@ def test_external_system_numbers_from_035__a_9_hepdata(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # record/1498566 ' ' ' ins1498566' ' HEPDATA' ' ' - ) # record/1498566 + ) expected = [ { @@ -793,7 +793,7 @@ def test_external_system_numbers_from_035__a_9_and_035__z_9(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # record/700376 '' ' ' ' OSTI' @@ -804,7 +804,7 @@ def test_external_system_numbers_from_035__a_9_and_035__z_9(): ' 897192' ' ' '' - ) # record/700376 + ) expected = [ { @@ -842,9 +842,7 @@ def test_035_from_arxiv_eprints_and_texkeys(): subschema_arxiv_eprints = schema['properties']['arxiv_eprints'] subschema_texkeys = schema['properties']['texkeys'] snippet = { - 'arxiv_eprints': [ - {'value': '2212.04977', 'categories': ['hep-ex']} - ], + 'arxiv_eprints': [{'value': '2212.04977', 'categories': ['hep-ex']}], 'texkeys': ['LHCb:2022diq'], } # literature/2612668 @@ -870,13 +868,13 @@ def test_arxiv_eprints_from_037__a_c_9(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] - snippet = ( + snippet = ( # record/1368891 '' ' arXiv' ' arXiv:1505.01843' ' hep-ph' '' - ) # record/1368891 + ) expected = [ { @@ -907,13 +905,13 @@ def test_arxiv_eprints_from_037__a_c_9_old_identifier(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] - snippet = ( + snippet = ( # record/782187 '' ' hep-th/0110148' ' arXiv' ' hep-th' '' - ) # record/782187 + ) expected = [ { @@ -944,20 +942,20 @@ def test_arxiv_eprints_from_037__a_c_9_obsolete_category(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] - snippet = ( + snippet = ( # record/450571 '' ' funct-an/9710003' ' arXiv' ' funct-an' '' - ) # record/450571 + ) expected = [ { 'categories': [ 'math.FA', ], - 'value': 'funct-an/9710003' + 'value': 'funct-an/9710003', }, ] result = hep.do(create_record(snippet)) @@ -981,11 +979,11 @@ def test_report_numbers_from_037__a(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # record/1511277 '' ' CERN-EP-2016-319' '' - ) # record/1511277 + ) expected = [ {'value': 'CERN-EP-2016-319'}, @@ -1007,7 +1005,7 @@ def test_report_numbers_from_two_037__a(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # record/26564 '' ' ' ' UTPT-89-27' @@ -1016,7 +1014,7 @@ def test_report_numbers_from_two_037__a(): ' CALT-68-1585' ' ' '' - ) # record/26564 + ) expected = [ { @@ -1044,11 +1042,11 @@ def test_report_numbers_hidden_from_037__z(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # record/1508174 '' ' FERMILAB-PUB-17-011-CMS' '' - ) # record/1508174 + ) expected = [ { @@ -1061,9 +1059,7 @@ def test_report_numbers_hidden_from_037__z(): assert validate(result['report_numbers'], subschema) is None assert expected == result['report_numbers'] - expected = [ - {'z': 'FERMILAB-PUB-17-011-CMS'} - ] + expected = [{'z': 'FERMILAB-PUB-17-011-CMS'}] result = hep2marc.do(result) assert expected == result['037'] @@ -1073,12 +1069,12 @@ def test_report_numbers_from_037__z_9(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # record/1326454 '' ' SLAC' ' SLAC-PUB-16140' '' - ) # record/1326454 + ) expected = [ { @@ -1106,12 +1102,12 @@ def test_report_numbers_from_037__a_9_arXiv_reportnumber(): schema = load_schema('hep') subschema = schema['properties']['report_numbers'] - snippet = ( + snippet = ( # record/1618037 '' ' arXiv:reportnumber' ' LIGO-P1500247' '' - ) # record/1618037 + ) expected = [ { @@ -1140,7 +1136,7 @@ def test_arxiv_eprints_from_037__a_c_9_and_multiple_65017_a_2(): schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] - snippet = ( + snippet = ( # record/1511862 '' ' ' ' arXiv' @@ -1156,7 +1152,7 @@ def test_arxiv_eprints_from_037__a_c_9_and_multiple_65017_a_2(): ' arXiv' ' ' '' - ) # record/1511862 + ) expected = [ { @@ -1164,7 +1160,7 @@ def test_arxiv_eprints_from_037__a_c_9_and_multiple_65017_a_2(): 'math-ph', 'gr-qc', ], - 'value': '1702.00702' + 'value': '1702.00702', } ] result = hep.do(create_record(snippet)) @@ -1213,11 +1209,7 @@ def test_arxiv_eprints_037__a_9_lowercase_arxiv(): "" ) - expected = [ - { - 'value': '1703.09086' - } - ] + expected = [{'value': '1703.09086'}] result = hep.do(create_record(snippet)) assert validate(result['arxiv_eprints'], subschema) is None @@ -1238,11 +1230,11 @@ def test_languages_from_041__a(): schema = load_schema('hep') subschema = schema['properties']['languages'] - snippet = ( + snippet = ( # record/1503566 '' ' Italian' '' - ) # record/1503566 + ) expected = [ 'it', @@ -1264,11 +1256,11 @@ def test_languages_from_041__a_handles_multiple_languages_in_one_a(): schema = load_schema('hep') subschema = schema['properties']['languages'] - snippet = ( + snippet = ( # record/116959 '' ' Russian / English' '' - ) # record/116959 + ) expected = [ 'ru', @@ -1292,7 +1284,7 @@ def test_languages_from_double_041__a(): schema = load_schema('hep') subschema = schema['properties']['languages'] - snippet = ( + snippet = ( # record/1231408 '' ' ' ' French' @@ -1301,7 +1293,7 @@ def test_languages_from_double_041__a(): ' German' ' ' '' - ) # record/1231408 + ) expected = [ 'fr', diff --git a/tests/test_hep_bd1xx.py b/tests/test_hep_bd1xx.py index b011d445..3199c6c1 100644 --- a/tests/test_hep_bd1xx.py +++ b/tests/test_hep_bd1xx.py @@ -23,16 +23,16 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_authors_from_100__a_i_u_x_y(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/4328 '' ' Glashow, S.L.' ' INSPIRE-00085173' @@ -40,7 +40,7 @@ def test_authors_from_100__a_i_u_x_y(): ' 1008235' ' 1' '' - ) # record/4328 + ) expected = [ { @@ -85,7 +85,7 @@ def test_authors_from_100__a_u_w_y_and_700_a_u_w_x_y(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/81350 '' ' ' ' Kobayashi, Makoto' @@ -101,7 +101,7 @@ def test_authors_from_100__a_u_w_y_and_700_a_u_w_x_y(): ' 1' ' ' '' - ) # record/81350 + ) expected = [ { @@ -166,7 +166,7 @@ def test_authors_from_100__a_and_700__a_orders_correctly(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # synthetic data '' ' ' ' Author, Second' @@ -175,7 +175,7 @@ def test_authors_from_100__a_and_700__a_orders_correctly(): ' Author, First' ' ' '' - ) # synthetic data + ) expected = [ {'full_name': 'Author, First'}, @@ -202,7 +202,7 @@ def test_authors_from_100__a_e_w_y_and_700_a_e_w_y(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1505338 '' ' ' ' Vinokurov, Nikolay A.' @@ -217,7 +217,7 @@ def test_authors_from_100__a_e_w_y_and_700_a_e_w_y(): ' 0' ' ' '' - ) # record/1505338 + ) expected = [ { @@ -243,7 +243,7 @@ def test_authors_from_100__a_e_w_y_and_700_a_e_w_y(): 'inspire_roles': [ 'editor', ], - } + }, ] result = hep.do(create_record(snippet)) @@ -274,7 +274,7 @@ def test_authors_from_100__a_i_u_x_y_z_and_double_700__a_u_w_x_y_z(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/712925 '' ' ' ' Sjostrand, Torbjorn' @@ -301,7 +301,7 @@ def test_authors_from_100__a_i_u_x_y_z_and_double_700__a_u_w_x_y_z(): ' 902796' ' ' '' - ) # record/712925 + ) expected = [ { @@ -375,9 +375,7 @@ def test_authors_from_100__a_i_u_x_y_z_and_double_700__a_u_w_x_y_z(): expected_100 = { 'a': 'Sjostrand, Torbjorn', - 'i': [ - 'INSPIRE-00126851' - ], + 'i': ['INSPIRE-00126851'], 'u': [ 'Lund U., Dept. Theor. Phys.', ], @@ -406,15 +404,14 @@ def test_authors_from_100__a_v_m_w_y(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' Gao, Xu' - ' Chern Institute of Mathematics and LPMC, Nankai University, Tianjin, 300071, China' - ' gausyu@gmail.com' - ' X.Gao.11' - ' 0' - '' - ) # record/1475380 + snippet = ( # record/1475380 + ' Gao,' + ' Xu Chern Institute of Mathematics and' + ' LPMC, Nankai University, Tianjin, 300071, China gausyu@gmail.com X.Gao.11 0' + ) expected = [ { @@ -430,10 +427,12 @@ def test_authors_from_100__a_v_m_w_y(): ], 'raw_affiliations': [ { - 'value': 'Chern Institute of Mathematics and LPMC, Nankai University,' - ' Tianjin, 300071, China', + 'value': ( + 'Chern Institute of Mathematics and LPMC, Nankai' + ' University, Tianjin, 300071, China' + ), } - ] + ], }, ] result = hep.do(create_record(snippet)) @@ -444,7 +443,10 @@ def test_authors_from_100__a_v_m_w_y(): expected = { 'a': 'Gao, Xu', 'v': [ - 'Chern Institute of Mathematics and LPMC, Nankai University, Tianjin, 300071, China', + ( + 'Chern Institute of Mathematics and LPMC, Nankai University,' + ' Tianjin, 300071, China' + ), ], 'm': [ 'gausyu@gmail.com', @@ -459,7 +461,7 @@ def test_authors_from_100__a_double_q_u_w_y_z(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/144579 '' ' Dineykhan, M.' ' Dineĭkhan, M.' @@ -470,7 +472,7 @@ def test_authors_from_100__a_double_q_u_w_y_z(): ' 0' ' 902780' '' - ) # record/144579 + ) expected = [ { @@ -521,27 +523,23 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' ' - ' Gao, Xu' - ' gausyu@gmail.com' - ' Nankai U.' - ' Chern Institute of Mathematics and LPMC, Nankai University, Tianjin, 300071, China' - ' X.Gao.11' - ' 0' - ' 906082' - ' ' - ' ' - ' Liu, Ming' - ' ORCID:0000-0002-3413-183X' - ' School of Mathematics, South China University of Technology, Guangdong, Guangzhou, 510640, China' - ' ming.l1984@gmail.com' - ' M.Liu.16' - ' 0' - ' ' - '' - ) # record/1475380 + snippet = ( # record/1475380 + ' Gao, Xu gausyu@gmail.com Nankai' + ' U. Chern Institute of Mathematics' + ' and LPMC, Nankai University, Tianjin, 300071, China ' + ' X.Gao.11 0 906082 ' + ' Liu, Ming ORCID:0000-0002-3413-183X School of Mathematics, South China University of Technology,' + ' Guangdong, Guangzhou, 510640, China ming.l1984@gmail.com M.Liu.16 0 ' + ' ' + ) expected = [ { @@ -565,10 +563,12 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): ], 'raw_affiliations': [ { - 'value': 'Chern Institute of Mathematics and LPMC, Nankai University,' - ' Tianjin, 300071, China', + 'value': ( + 'Chern Institute of Mathematics and LPMC, Nankai' + ' University, Tianjin, 300071, China' + ), } - ] + ], }, { 'emails': [ @@ -587,10 +587,12 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): ], 'raw_affiliations': [ { - 'value': 'School of Mathematics, South China University of Technology,' - ' Guangdong, Guangzhou, 510640, China', + 'value': ( + 'School of Mathematics, South China University of' + ' Technology, Guangdong, Guangzhou, 510640, China' + ), } - ] + ], }, ] result = hep.do(create_record(snippet)) @@ -607,9 +609,11 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): 'Nankai U.', ], 'v': [ - 'Chern Institute of Mathematics and LPMC, Nankai University,' - ' Tianjin, 300071, China', - ] + ( + 'Chern Institute of Mathematics and LPMC, Nankai University,' + ' Tianjin, 300071, China' + ), + ], } expected_700 = [ { @@ -618,8 +622,10 @@ def test_authors_from_100__a_m_u_v_w_y_z_and_700__a_j_v_m_w_y(): 'ORCID:0000-0002-3413-183X', ], 'v': [ - 'School of Mathematics, South China University of ' - 'Technology, Guangdong, Guangzhou, 510640, China', + ( + 'School of Mathematics, South China University of ' + 'Technology, Guangdong, Guangzhou, 510640, China' + ), ], 'm': [ 'ming.l1984@gmail.com', @@ -636,7 +642,7 @@ def test_authors_from_100__a_triple_u_w_x_y_triple_z_and_700__double_a_u_w_x_y_z schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1345256 '' ' ' ' Abe, K.' @@ -660,7 +666,7 @@ def test_authors_from_100__a_triple_u_w_x_y_triple_z_and_700__double_a_u_w_x_y_z ' 903734' ' ' '' - ) # record/1345256 + ) expected = [ { @@ -725,11 +731,7 @@ def test_authors_from_100__a_triple_u_w_x_y_triple_z_and_700__double_a_u_w_x_y_z expected_100 = { 'a': 'Abe, K.', - 'u': [ - 'Tokyo U., ICRR', - 'Tokyo U.', - 'Tokyo U., IPMU' - ], + 'u': ['Tokyo U., ICRR', 'Tokyo U.', 'Tokyo U., IPMU'], } expected_700 = [ { @@ -755,7 +757,7 @@ def test_authors_from_100__a_j_m_u_w_y_z(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1475499 '' ' Martins, Ricardo S.' ' ORCID:' @@ -765,7 +767,7 @@ def test_authors_from_100__a_j_m_u_w_y_z(): ' 0' ' 910325' '' - ) # record/1475499 + ) expected = [ { @@ -813,21 +815,16 @@ def test_authors_from_100__a_v_w_x_y_and_100__a_v_w_y(): subschema = schema['properties']['authors'] snippet = ( - '' - ' ' - ' Tojyo, E.' - ' University of Tokyo, Tokyo, Japan' - ' Eiki.Tojyo.1' - ' 1477256' - ' 0' - ' ' - ' ' - ' Hattori, T.' - ' Tokyo Institute of Technology, Tokyo, Japan' - ' T.Hattori.1' - ' 0' - ' ' - '' + ' Tojyo, E. University of' + ' Tokyo, Tokyo, Japan Eiki.Tojyo.1 1477256 0 ' + ' Hattori, T. Tokyo Institute' + ' of Technology, Tokyo, Japan T.Hattori.1 0 ' + ' ' ) expected = [ @@ -923,9 +920,7 @@ def test_authors_from_100__a_j_m_u_v_w_y(): 'value': 'D.Macnair.2', }, ], - 'raw_affiliations': [ - {'value': 'SLAC, Menlo Park, California, USA'} - ], + 'raw_affiliations': [{'value': 'SLAC, Menlo Park, California, USA'}], }, ] result = hep.do(create_record(snippet)) @@ -957,7 +952,7 @@ def test_authors_from_100__a_u_x_w_y_z_with_malformed_x(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/931310 '' ' Bakhrushin, Iu.P.' ' NIIEFA, St. Petersburg' @@ -966,7 +961,7 @@ def test_authors_from_100__a_u_x_w_y_z_with_malformed_x(): ' 0' ' 903073' '' - ) # record/931310 + ) expected = [ { @@ -1007,7 +1002,7 @@ def test_authors_from_100__a_double_m_double_u_w_y_z(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/413614 '' ' Puy, Denis' ' puy@tsmi19.sissa.it' @@ -1018,7 +1013,7 @@ def test_authors_from_100__a_double_m_double_u_w_y_z(): ' 0' ' 903393' '' - ) # record/413614 + ) expected = [ { @@ -1064,7 +1059,7 @@ def test_authors_supervisors_from_100__a_i_j_u_v_x_y_z_and_multiple_701__u_z(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1462486 '' ' ' ' Spannagel, Simon' @@ -1094,7 +1089,7 @@ def test_authors_supervisors_from_100__a_i_j_u_v_x_y_z_and_multiple_701__u_z(): ' 913279' ' ' '' - ) # record/1462486 + ) expected = [ { @@ -1227,7 +1222,7 @@ def test_authors_supervisors_from_100_a_u_w_y_z_and_701__double_a_u_z(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/776962 '' ' ' ' Lang, Brian W.' @@ -1243,7 +1238,7 @@ def test_authors_supervisors_from_100_a_u_w_y_z_and_701__double_a_u_z(): ' 903010' ' ' '' - ) # record/776962 + ) expected = [ { @@ -1267,9 +1262,7 @@ def test_authors_supervisors_from_100_a_u_w_y_z_and_701__double_a_u_z(): 'affiliations': [ { 'value': 'Minnesota U.', - 'record': { - '$ref': 'http://localhost:5000/api/institutions/903010' - } + 'record': {'$ref': 'http://localhost:5000/api/institutions/903010'}, } ], 'full_name': 'Poling, Ron', @@ -1290,7 +1283,7 @@ def test_authors_supervisors_from_100_a_u_w_y_z_and_701__double_a_u_z(): 'inspire_roles': [ 'supervisor', ], - } + }, ] result = hep.do(create_record(snippet)) @@ -1327,7 +1320,7 @@ def test_authors_supervisors_from_100_a_j_u_w_y_z_and_701__a_i_j_u_x_y_z(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1504133 '' ' ' ' Teroerde, Marius' @@ -1347,7 +1340,7 @@ def test_authors_supervisors_from_100_a_j_u_w_y_z_and_701__a_i_j_u_x_y_z(): ' 902624' ' ' '' - ) # record/1504133 + ) expected = [ { @@ -1392,9 +1385,7 @@ def test_authors_supervisors_from_100_a_j_u_w_y_z_and_701__a_i_j_u_x_y_z(): 'value': 'CERN-456299', }, ], - 'inspire_roles': [ - 'supervisor' - ], + 'inspire_roles': ['supervisor'], 'record': { '$ref': 'http://localhost:5000/api/authors/1060887', }, @@ -1439,7 +1430,7 @@ def test_authors_from_100_a_double_u_w_z_y_double_z_and_700__a_double_u_w_y_doub schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1088610 '' ' ' ' Billo, M.' @@ -1462,7 +1453,7 @@ def test_authors_from_100_a_double_u_w_z_y_double_z_and_700__a_double_u_w_y_doub ' 903297' ' ' '' - ) # record/1088610 + ) expected = [ { @@ -1528,7 +1519,7 @@ def test_authors_from_100_a_double_u_w_z_y_double_z_and_700__a_double_u_w_y_doub 'u': [ 'INFN, Turin', 'Turin U.', - ] + ], } expected_700 = [ { @@ -1549,18 +1540,16 @@ def test_author_from_100__a_i_m_u_v_x_y_z_strips_email_prefix(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' Kuehn, S.' - ' CERN' - ' CERN, European Organization for Nuclear Research, Geneve, Switzerland' - ' email:susanne.kuehn@cern.ch' - ' INSPIRE-00218553' - ' 1066844' - ' 1' - ' 902725' - '' - ) # record/1634669 + snippet = ( # record/1634669 + ' Kuehn,' + ' S. CERN CERN, European Organization for Nuclear Research, Geneve,' + ' Switzerland email:susanne.kuehn@cern.ch INSPIRE-00218553 1066844 1 ' + ' 902725' + ) expected = [ { @@ -1584,7 +1573,12 @@ def test_author_from_100__a_i_m_u_v_x_y_z_strips_email_prefix(): }, ], 'raw_affiliations': [ - {'value': 'CERN, European Organization for Nuclear Research, Geneve, Switzerland'}, + { + 'value': ( + 'CERN, European Organization for Nuclear Research,' + ' Geneve, Switzerland' + ) + }, ], 'record': { '$ref': 'http://localhost:5000/api/authors/1066844', @@ -1620,17 +1614,16 @@ def test_author_from_700__strips_dot_from_orcid(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' Gainutdinov, Azat M.' - ' ORCID:0000-0003-3127-682X.' - ' Tours U., CNRS' - ' Laboratoire de Mathématiques et Physique Théorique CNRS - Université de Tours - Parc de Grammont - 37200 Tours - France' - ' A.Gainutdinov.1' - ' 0' - ' 909619' - '' - ) # record/1600830 + snippet = ( # record/1600830 + ' Gainutdinov, Azat M. ORCID:0000-0003-3127-682X. Tours U., CNRS Laboratoire de' + ' Mathématiques et Physique Théorique CNRS - Université de Tours - Parc' + ' de Grammont - 37200 Tours - France A.Gainutdinov.1 0 ' + ' 909619' + ) expected = [ { @@ -1652,7 +1645,13 @@ def test_author_from_700__strips_dot_from_orcid(): }, ], 'raw_affiliations': [ - {'value': u'Laboratoire de Mathématiques et Physique Théorique CNRS - Université de Tours - Parc de Grammont - 37200 Tours - France'}, + { + 'value': ( + u'Laboratoire de Mathématiques et Physique Théorique' + u' CNRS - Université de Tours - Parc de Grammont -' + u' 37200 Tours - France' + ) + }, ], }, ] @@ -1670,7 +1669,11 @@ def test_author_from_700__strips_dot_from_orcid(): 'Tours U., CNRS', ], 'v': [ - u'Laboratoire de Mathématiques et Physique Théorique CNRS - Université de Tours - Parc de Grammont - 37200 Tours - France', + ( + u'Laboratoire de Mathématiques et Physique Théorique CNRS -' + u' Université de Tours - Parc de Grammont - 37200 Tours -' + u' France' + ), ], } result = hep2marc.do(result) @@ -1682,13 +1685,13 @@ def test_authors_from_700__a_double_e_handles_multiple_roles(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1264604 '' ' Peskin, M.E.' ' Convener' ' ed.' '' - ) # record/1264604 + ) expected = [ { @@ -1708,11 +1711,11 @@ def test_authors_from_700__a_removes_trailing_comma(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1683590 '' ' Lenske,' '' - ) # record/1683590 + ) expected = [ { @@ -1729,11 +1732,11 @@ def test_authors_from_100__a_removes_starting_comma_and_space(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1697210 '' ' , M.A.Valuyan' '' - ) # record/1697210 + ) expected = [ { @@ -1750,7 +1753,7 @@ def test_authors_from_700__a_w_x_y_repeated_author(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1684644 '' ' ' ' Suzuki, K.' @@ -1765,7 +1768,7 @@ def test_authors_from_700__a_w_x_y_repeated_author(): ' 0' ' ' '' - ) # record/1684644 + ) expected = [ { @@ -1818,11 +1821,11 @@ def test_corporate_author_from_110__a(): schema = load_schema('hep') subschema = schema['properties']['corporate_author'] - snippet = ( + snippet = ( # record/1621218 '' ' CMS Collaboration' '' - ) # record/1621218 + ) expected = [ 'CMS Collaboration', @@ -1844,7 +1847,7 @@ def test_authors_from_100__a_with_q_w_y_z_duplicated_u(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/144579 '' ' Dineykhan, M.' ' Dineĭkhan, M.' @@ -1857,7 +1860,7 @@ def test_authors_from_100__a_with_q_w_y_z_duplicated_u(): ' 902780' ' 902780' '' - ) # record/144579 + ) expected = [ { @@ -1908,7 +1911,7 @@ def test_authors_from_100__a_with_q_v_w_y_z_duplicated_v(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/144579 '' ' Dineykhan, M.' ' Dineĭkhan, M.' @@ -1921,7 +1924,7 @@ def test_authors_from_100__a_with_q_v_w_y_z_duplicated_v(): ' 0' ' 902780' '' - ) # record/144579 + ) expected = [ { @@ -1949,7 +1952,7 @@ def test_authors_from_100__a_with_q_v_w_y_z_duplicated_v(): { 'value': 'Joint Institute for Nuclear Research', } - ] + ], }, ] result = hep.do(create_record(snippet)) @@ -1980,7 +1983,7 @@ def test_authors_from_700__a_v_x_y_repeated_author_duplicated_v(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1684644 '' ' ' ' Suzuki, K.' @@ -1997,7 +2000,7 @@ def test_authors_from_700__a_v_x_y_repeated_author_duplicated_v(): ' 0' ' ' '' - ) # record/1684644 + ) expected = [ { @@ -2054,7 +2057,7 @@ def test_authors_from_100__a_i_u_x_y_duplicated_i(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/4328 '' ' Glashow, S.L.' ' INSPIRE-00085173' @@ -2063,7 +2066,7 @@ def test_authors_from_100__a_i_u_x_y_duplicated_i(): ' 1008235' ' 1' '' - ) # record/4328 + ) expected = [ { @@ -2108,7 +2111,7 @@ def test_authors_from_700__a_i_x_y_repeated_author_duplicated_i(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( + snippet = ( # record/1684644 '' ' ' ' Suzuki, K.' @@ -2125,7 +2128,7 @@ def test_authors_from_700__a_i_x_y_repeated_author_duplicated_i(): ' 0' ' ' '' - ) # record/1684644 + ) expected = [ { @@ -2184,34 +2187,38 @@ def test_authors_from_100__a_v_w_y_repeated_t(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' Puertas-Centeno, David' - ' GRID:grid.4489.1' - ' GRID:grid.4489.1' - ' Departamento de Física Atómica - Molecular y Nuclear - Universidad de Granada - Granada - 18071 - Spain' - ' Instituto Carlos I de Física Teórica y Computacional - Universidad de Granada - Granada - 18071 - Spain' - ' D.Puertas.Centeno.2' - ' 0' - '' - ) # record/1676659 + snippet = ( # record/1676659 + ' Puertas-Centeno, David GRID:grid.4489.1 GRID:grid.4489.1 Departamento' + ' de Física Atómica - Molecular y Nuclear - Universidad de Granada -' + ' Granada - 18071 - Spain Instituto' + ' Carlos I de Física Teórica y Computacional - Universidad de Granada -' + ' Granada - 18071 - Spain D.Puertas.Centeno.2 0' + ) expected = [ { 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.4489.1' - }, + {'schema': 'GRID', 'value': 'grid.4489.1'}, ], 'full_name': 'Puertas-Centeno, David', 'raw_affiliations': [ { - 'value': u'Departamento de Física Atómica - Molecular y Nuclear - Universidad de Granada - Granada - 18071 - Spain', + 'value': ( + u'Departamento de Física Atómica - Molecular y Nuclear' + u' - Universidad de Granada - Granada - 18071 - Spain' + ), }, { - 'value': u'Instituto Carlos I de Física Teórica y Computacional - Universidad de Granada - Granada - 18071 - Spain', - } + 'value': ( + u'Instituto Carlos I de Física Teórica y Computacional' + u' - Universidad de Granada - Granada - 18071 - Spain' + ), + }, ], 'ids': [ { @@ -2228,12 +2235,16 @@ def test_authors_from_100__a_v_w_y_repeated_t(): expected = { 'a': 'Puertas-Centeno, David', - 't': [ - 'GRID:grid.4489.1' - ], + 't': ['GRID:grid.4489.1'], 'v': [ - u'Departamento de Física Atómica - Molecular y Nuclear - Universidad de Granada - Granada - 18071 - Spain', - u'Instituto Carlos I de Física Teórica y Computacional - Universidad de Granada - Granada - 18071 - Spain', + ( + u'Departamento de Física Atómica - Molecular y Nuclear -' + u' Universidad de Granada - Granada - 18071 - Spain' + ), + ( + u'Instituto Carlos I de Física Teórica y Computacional -' + u' Universidad de Granada - Granada - 18071 - Spain' + ), ], } result = hep2marc.do(result) @@ -2245,17 +2256,17 @@ def test_authors_from_100__a_t_u_v(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' Plumari, S.' - ' Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy' - ' Catania U.' - ' GRID:grid.8158.4' - ' Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy' - ' INFN, LNS' - ' GRID:grid.466880.4' - '' - ) # record/1712320 + snippet = ( # record/1712320 + ' Plumari,' + ' S. Department of Physics U. and' + ' Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 -' + ' Catania - Italy Catania U. ' + ' GRID:grid.8158.4 Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 -' + ' 95123 - Catania - Italy INFN,' + ' LNS GRID:grid.466880.4' + ) expected = [ { @@ -2263,29 +2274,28 @@ def test_authors_from_100__a_t_u_v(): { 'value': 'Catania U.', }, - { - 'value': 'INFN, LNS' - } + {'value': 'INFN, LNS'}, ], 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.8158.4' - }, - { - 'schema': 'GRID', - 'value': 'grid.466880.4' - }, + {'schema': 'GRID', 'value': 'grid.8158.4'}, + {'schema': 'GRID', 'value': 'grid.466880.4'}, ], 'full_name': 'Plumari, S.', 'raw_affiliations': [ { - 'value': u'Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy', + 'value': ( + u'Department of Physics U. and Astronomy ‘Ettore' + u' Majorana’ - Catania - Via S. Sofia 64 - 95125 -' + u' Catania - Italy' + ), }, { - 'value': u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy', - } - ] + 'value': ( + u'Laboratori Nazionali del Sud - INFN-LNS - Via S.' + u' Sofia 62 - 95123 - Catania - Italy' + ), + }, + ], }, ] result = hep.do(create_record(snippet)) @@ -2295,18 +2305,21 @@ def test_authors_from_100__a_t_u_v(): expected = { 'a': 'Plumari, S.', - 't': [ - 'GRID:grid.8158.4', - 'GRID:grid.466880.4' - ], + 't': ['GRID:grid.8158.4', 'GRID:grid.466880.4'], 'u': [ 'Catania U.', 'INFN, LNS', ], 'v': [ - u'Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy', - u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy' - ] + ( + u'Department of Physics U. and Astronomy ‘Ettore Majorana’ -' + u' Catania - Via S. Sofia 64 - 95125 - Catania - Italy' + ), + ( + u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 -' + u' 95123 - Catania - Italy' + ), + ], } result = hep2marc.do(result) @@ -2317,17 +2330,17 @@ def test_authors_from_100__a_t_u_v_ROR(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' Plumari, S.' - ' Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy' - ' Catania U.' - ' ROR:https://ror.org/03a64bh57' - ' Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy' - ' INFN, LNS' - ' ROR:https://ror.org/02k1zhm92' - '' - ) # synthetic data + snippet = ( # synthetic data + ' Plumari,' + ' S. Department of Physics U. and' + ' Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 -' + ' Catania - Italy Catania U. ' + ' ROR:https://ror.org/03a64bh57 ' + ' Laboratori Nazionali del Sud - INFN-LNS - Via S.' + ' Sofia 62 - 95123 - Catania - Italy INFN, LNS ROR:https://ror.org/02k1zhm92' + ) expected = [ { @@ -2335,29 +2348,28 @@ def test_authors_from_100__a_t_u_v_ROR(): { 'value': 'Catania U.', }, - { - 'value': 'INFN, LNS' - } + {'value': 'INFN, LNS'}, ], 'affiliations_identifiers': [ - { - 'schema': 'ROR', - 'value': 'https://ror.org/03a64bh57' - }, - { - 'schema': 'ROR', - 'value': 'https://ror.org/02k1zhm92' - }, + {'schema': 'ROR', 'value': 'https://ror.org/03a64bh57'}, + {'schema': 'ROR', 'value': 'https://ror.org/02k1zhm92'}, ], 'full_name': 'Plumari, S.', 'raw_affiliations': [ { - 'value': u'Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy', + 'value': ( + u'Department of Physics U. and Astronomy ‘Ettore' + u' Majorana’ - Catania - Via S. Sofia 64 - 95125 -' + u' Catania - Italy' + ), }, { - 'value': u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy', - } - ] + 'value': ( + u'Laboratori Nazionali del Sud - INFN-LNS - Via S.' + u' Sofia 62 - 95123 - Catania - Italy' + ), + }, + ], }, ] result = hep.do(create_record(snippet)) @@ -2367,18 +2379,21 @@ def test_authors_from_100__a_t_u_v_ROR(): expected = { 'a': 'Plumari, S.', - 't': [ - 'ROR:https://ror.org/03a64bh57', - 'ROR:https://ror.org/02k1zhm92' - ], + 't': ['ROR:https://ror.org/03a64bh57', 'ROR:https://ror.org/02k1zhm92'], 'u': [ 'Catania U.', 'INFN, LNS', ], 'v': [ - u'Department of Physics U. and Astronomy ‘Ettore Majorana’ - Catania - Via S. Sofia 64 - 95125 - Catania - Italy', - u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 - 95123 - Catania - Italy' - ] + ( + u'Department of Physics U. and Astronomy ‘Ettore Majorana’ -' + u' Catania - Via S. Sofia 64 - 95125 - Catania - Italy' + ), + ( + u'Laboratori Nazionali del Sud - INFN-LNS - Via S. Sofia 62 -' + u' 95123 - Catania - Italy' + ), + ], } result = hep2marc.do(result) @@ -2389,47 +2404,41 @@ def test_authors_from_100__a_t_v_and_700_a_t_v(): schema = load_schema('hep') subschema = schema['properties']['authors'] - snippet = ( - '' - ' ' - ' Hosseini, M.' - ' Faculty of Physics - Shahrood Technology U. - P. O. Box 3619995161-316 - Shahrood - Iran' - ' GRID:grid.440804.c' - ' ' - ' ' - ' Hassanabadi, H.' - ' Faculty of Physics - Shahrood Technology U. - P. O. Box 3619995161-316 - Shahrood - Iran' - ' GRID:grid.440804.c' - ' ' - '' - ) # record/1712798 + snippet = ( # record/1712798 + ' Hosseini, M. Faculty of' + ' Physics - Shahrood Technology U. - P. O. Box 3619995161-316 -' + ' Shahrood - Iran GRID:grid.440804.c Hassanabadi,' + ' H. Faculty of Physics - Shahrood' + ' Technology U. - P. O. Box 3619995161-316 - Shahrood - Iran' + ' GRID:grid.440804.c ' + ' ' + ) expected = [ { - 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.440804.c' - } - ], + 'affiliations_identifiers': [{'schema': 'GRID', 'value': 'grid.440804.c'}], 'full_name': 'Hosseini, M.', 'raw_affiliations': [ { - 'value': 'Faculty of Physics - Shahrood Technology U. - P. O. Box 3619995161-316 - Shahrood - Iran' + 'value': ( + 'Faculty of Physics - Shahrood Technology U. - P. O.' + ' Box 3619995161-316 - Shahrood - Iran' + ) } ], }, { - 'affiliations_identifiers': [ - { - 'schema': 'GRID', - 'value': 'grid.440804.c' - } - ], + 'affiliations_identifiers': [{'schema': 'GRID', 'value': 'grid.440804.c'}], 'full_name': 'Hassanabadi, H.', 'raw_affiliations': [ { - 'value': 'Faculty of Physics - Shahrood Technology U. - P. O. Box 3619995161-316 - Shahrood - Iran' + 'value': ( + 'Faculty of Physics - Shahrood Technology U. - P. O.' + ' Box 3619995161-316 - Shahrood - Iran' + ) } ], }, @@ -2443,7 +2452,10 @@ def test_authors_from_100__a_t_v_and_700_a_t_v(): 'a': 'Hosseini, M.', 't': ['GRID:grid.440804.c'], 'v': [ - 'Faculty of Physics - Shahrood Technology U. - P. O. Box 3619995161-316 - Shahrood - Iran', + ( + 'Faculty of Physics - Shahrood Technology U. - P. O. Box' + ' 3619995161-316 - Shahrood - Iran' + ), ], } expected_700 = [ @@ -2451,7 +2463,10 @@ def test_authors_from_100__a_t_v_and_700_a_t_v(): 'a': 'Hassanabadi, H.', 't': ['GRID:grid.440804.c'], 'v': [ - 'Faculty of Physics - Shahrood Technology U. - P. O. Box 3619995161-316 - Shahrood - Iran', + ( + 'Faculty of Physics - Shahrood Technology U. - P. O. Box' + ' 3619995161-316 - Shahrood - Iran' + ), ], } ] diff --git a/tests/test_hep_bd2xx.py b/tests/test_hep_bd2xx.py index 9e59ca04..65086f38 100644 --- a/tests/test_hep_bd2xx.py +++ b/tests/test_hep_bd2xx.py @@ -22,21 +22,22 @@ from __future__ import absolute_import, division, print_function +import pytest from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_rpp_from_210__a(): schema = load_schema('hep') subschema = schema['properties']['rpp'] - snippet = ( + snippet = ( # record/875948 '' ' RPP' '' - ) # record/875948 + ) expected = True result = hep.do(create_record(snippet)) @@ -54,11 +55,11 @@ def test_rpp_from_210__a__RPP_section(): schema = load_schema('hep') subschema = schema['properties']['rpp'] - snippet = ( + snippet = ( # record/806134 '' ' RPP section' '' - ) # record/806134 + ) expected = True result = hep.do(create_record(snippet)) @@ -76,18 +77,19 @@ def test_titles_from_245__a_9(): schema = load_schema('hep') subschema = schema['properties']['titles'] - snippet = ( - '' - ' Exact Form of Boundary Operators Dual to ' - 'Interacting Bulk Scalar Fields in the AdS/CFT Correspondence' - ' arXiv' - '' - ) # record/001511698 + snippet = ( # record/001511698 + ' Exact Form' + ' of Boundary Operators Dual to Interacting Bulk Scalar Fields in the' + ' AdS/CFT Correspondence arXiv' + ) expected = [ { - 'title': 'Exact Form of Boundary Operators Dual to Interacting ' - 'Bulk Scalar Fields in the AdS/CFT Correspondence', + 'title': ( + 'Exact Form of Boundary Operators Dual to Interacting ' + 'Bulk Scalar Fields in the AdS/CFT Correspondence' + ), 'source': 'arXiv', }, ] @@ -98,8 +100,10 @@ def test_titles_from_245__a_9(): expected = [ { - 'a': 'Exact Form of Boundary Operators Dual to Interacting ' - 'Bulk Scalar Fields in the AdS/CFT Correspondence', + 'a': ( + 'Exact Form of Boundary Operators Dual to Interacting ' + 'Bulk Scalar Fields in the AdS/CFT Correspondence' + ), '9': 'arXiv', }, ] @@ -112,19 +116,21 @@ def test_titles_from_246__a_9(): schema = load_schema('hep') subschema = schema['properties']['titles'] - snippet = ( + snippet = ( # record/1511471 '' ' Superintegrable relativistic systems in' ' spacetime-dependent background fields' ' arXiv' '' - ) # record/1511471 + ) expected = [ { 'source': 'arXiv', - 'title': 'Superintegrable relativistic systems in ' - 'spacetime-dependent background fields', + 'title': ( + 'Superintegrable relativistic systems in ' + 'spacetime-dependent background fields' + ), }, ] result = hep.do(create_record(snippet)) @@ -134,7 +140,10 @@ def test_titles_from_246__a_9(): expected = [ { - 'a': 'Superintegrable relativistic systems in spacetime-dependent background fields', + 'a': ( + 'Superintegrable relativistic systems in spacetime-dependent' + ' background fields' + ), '9': 'arXiv', }, ] @@ -147,12 +156,12 @@ def test_titles_from_245__a_b(): schema = load_schema('hep') subschema = schema['properties']['titles'] - snippet = ( - '' - ' Proceedings, New Observables in Quarkonium Production' - ' Trento, Italy' - '' - ) # record/1510141 + snippet = ( # record/1510141 + ' Proceedings, New Observables in Quarkonium' + ' Production Trento,' + ' Italy' + ) expected = [ { @@ -176,15 +185,16 @@ def test_titles_from_245__a_b(): assert expected == result['245'] -def test_title_translations_from_242__a(stable_langdetect): +@pytest.mark.usefixtures(name='_stable_langdetect') +def test_title_translations_from_242__a(): schema = load_schema('hep') subschema = schema['properties']['title_translations'] - snippet = ( + snippet = ( # record/8352 '' ' The redshift of extragalactic nebulae' '' - ) # record/8352 + ) expected = [ { @@ -207,16 +217,17 @@ def test_title_translations_from_242__a(stable_langdetect): assert expected == result['242'] -def test_title_translations_from_242__a_b(stable_langdetect): +@pytest.mark.usefixtures(name='_stable_langdetect') +def test_title_translations_from_242__a_b(): schema = load_schema('hep') subschema = schema['properties']['title_translations'] - snippet = ( - '' - ' Generalized Hamilton-Jacobi Formalism' - ' Field Theories with Upper-Order Derivatives' - '' - ) # record/1501064 + snippet = ( # record/1501064 + ' Generalized Hamilton-Jacobi Formalism Field Theories with Upper-Order' + ' Derivatives' + ) expected = [ { @@ -245,11 +256,11 @@ def test_editions_from_250__a(): schema = load_schema('hep') subschema = schema['properties']['editions'] - snippet = ( + snippet = ( # record/1383727 '' ' 2nd ed.' '' - ) # record/1383727 + ) expected = [ '2nd ed.', @@ -271,13 +282,13 @@ def test_imprints_from_260__a_b_c(): schema = load_schema('hep') subschema = schema['properties']['imprints'] - snippet = ( + snippet = ( # record/1614215 '' ' Geneva' ' CERN' ' 2017' '' - ) # record/1614215 + ) expected = [ { @@ -307,15 +318,13 @@ def test_imprints_from_260__c_wrong_date(): schema = load_schema('hep') subschema = schema['properties']['imprints'] - snippet = ( + snippet = ( # record/1314991 '' ' 2014-00-01' '' - ) # record/1314991 + ) - expected = [ - {'date': '2014'} - ] + expected = [{'date': '2014'}] result = hep.do(create_record(snippet)) assert validate(result['imprints'], subschema) is None @@ -333,11 +342,11 @@ def test_preprint_date_from_269__c(): schema = load_schema('hep') subschema = schema['properties']['preprint_date'] - snippet = ( + snippet = ( # record/1375944 '' ' 2015-05-03' '' - ) # record/1375944 + ) expected = '2015-05-03' result = hep.do(create_record(snippet)) @@ -357,11 +366,11 @@ def test_preprint_date_from_269__c_wrong_date(): schema = load_schema('hep') subschema = schema['properties']['preprint_date'] - snippet = ( + snippet = ( # record/1194517 '' ' 2001-02-31' '' - ) # record/1194517 + ) expected = '2001-02' result = hep.do(create_record(snippet)) diff --git a/tests/test_hep_bd3xx.py b/tests/test_hep_bd3xx.py index ba726240..6cbdedda 100644 --- a/tests/test_hep_bd3xx.py +++ b/tests/test_hep_bd3xx.py @@ -23,20 +23,20 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_number_of_pages_from_300__a(): schema = load_schema('hep') subschema = schema['properties']['number_of_pages'] - snippet = ( + snippet = ( # record/4328 '' ' 10' '' - ) # record/4328 + ) expected = 10 result = hep.do(create_record(snippet)) @@ -51,20 +51,20 @@ def test_number_of_pages_from_300__a(): def test_number_of_pages_from_300__a_malformed(): - snippet = ( + snippet = ( # record/67556 '' ' 216+337' '' - ) # record/67556 + ) assert 'number_of_pages' not in hep.do(create_record(snippet)) def test_number_of_pages_from_300__a_nonpositive(): - snippet = ( + snippet = ( # record/1511769 '' ' 0' '' - ) # record/1511769 + ) assert 'number_of_pages' not in hep.do(create_record(snippet)) diff --git a/tests/test_hep_bd4xx.py b/tests/test_hep_bd4xx.py index 99b7c48f..744dabaa 100644 --- a/tests/test_hep_bd4xx.py +++ b/tests/test_hep_bd4xx.py @@ -23,20 +23,20 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_book_series_from_490__a(): schema = load_schema('hep') subschema = schema['properties']['book_series'] - snippet = ( + snippet = ( # record/1508903 '' ' Graduate Texts in Physics' '' - ) # record/1508903 + ) expected = [ {'title': 'Graduate Texts in Physics'}, diff --git a/tests/test_hep_bd5xx.py b/tests/test_hep_bd5xx.py index 7f650628..fa1b0344 100644 --- a/tests/test_hep_bd5xx.py +++ b/tests/test_hep_bd5xx.py @@ -23,21 +23,21 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_public_notes_from_500__a_9(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] - snippet = ( + snippet = ( # record/1450044 '' ' arXiv' ' 5 pages' '' - ) # record/1450044 + ) expected = [ { @@ -65,17 +65,21 @@ def test_public_notes_from_500__a_9_presented_on(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] - snippet = ( - '' - ' presented on the 11th International Conference on Modification of Materials with Particle Beams and Plasma Flows (Tomsk, Russia, 17-21 september 2012)' - ' arXiv' - '' - ) # record/1185462 + snippet = ( # record/1185462 + ' presented' + ' on the 11th International Conference on Modification of Materials' + ' with Particle Beams and Plasma Flows (Tomsk, Russia, 17-21 september' + ' 2012) arXiv' + ) expected = [ { 'source': 'arXiv', - 'value': 'presented on the 11th International Conference on Modification of Materials with Particle Beams and Plasma Flows (Tomsk, Russia, 17-21 september 2012)', + 'value': ( + 'presented on the 11th International Conference on Modification' + ' of Materials with Particle Beams and Plasma Flows (Tomsk,' + ' Russia, 17-21 september 2012)' + ), }, ] result = hep.do(create_record(snippet)) @@ -86,7 +90,11 @@ def test_public_notes_from_500__a_9_presented_on(): expected = [ { '9': 'arXiv', - 'a': 'presented on the 11th International Conference on Modification of Materials with Particle Beams and Plasma Flows (Tomsk, Russia, 17-21 september 2012)', + 'a': ( + 'presented on the 11th International Conference on Modification' + ' of Materials with Particle Beams and Plasma Flows (Tomsk,' + ' Russia, 17-21 september 2012)' + ), }, ] result = hep2marc.do(result) @@ -98,13 +106,12 @@ def test_public_notes_from_500__double_a_9(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] - snippet = ( - '' - ' arXiv' - ' 11 pages, 8 figures. Submitted to MNRAS' - ' preliminary entry' - '' - ) # record/1380257 + snippet = ( # record/1380257 + ' arXiv 11 pages, 8 figures.' + ' Submitted to MNRAS preliminary' + ' entry' + ) expected = [ { @@ -141,7 +148,7 @@ def test_curated_and_public_notes_from_500__a_and_500__a_9(): curated_schema = schema['properties']['curated'] public_notes_schema = schema['properties']['public_notes'] - snippet = ( + snippet = ( # record/1450045 '' ' ' ' *Brief entry*' @@ -151,7 +158,7 @@ def test_curated_and_public_notes_from_500__a_and_500__a_9(): ' arXiv' ' ' '' - ) # record/1450045 + ) expected_curated = False expected_public_notes = [ @@ -186,11 +193,11 @@ def test_curated_from_500__a(): schema = load_schema('hep') subschema = schema['properties']['curated'] - snippet = ( + snippet = ( # record/1184775 '' ' * Brief entry *' '' - ) # record/1184775 + ) expected = False result = hep.do(create_record(snippet)) @@ -212,7 +219,7 @@ def test_core_and_curated_and_public_notes_from_500__a_and_500__a_9_and_980__a() curated_schema = schema['properties']['curated'] public_notes_schema = schema['properties']['public_notes'] - snippet = ( + snippet = ( # record/1217749 '' ' ' ' * Temporary entry *' @@ -228,7 +235,7 @@ def test_core_and_curated_and_public_notes_from_500__a_and_500__a_9_and_980__a() ' CORE' ' ' '' - ) # record/1217749 + ) expected_core = True expected_curated = False @@ -267,11 +274,11 @@ def test_thesis_info_defense_date_from_500__a(): schema = load_schema('hep') subschema = schema['properties']['thesis_info'] - snippet = ( + snippet = ( # record/1517362 '' ' Presented on 2016-09-30' '' - ) # record/1517362 + ) expected = {'defense_date': '2016-09-30'} result = hep.do(create_record(snippet)) @@ -291,11 +298,11 @@ def test_thesis_info_defense_date_from_500__a_incomplete_date(): schema = load_schema('hep') subschema = schema['properties']['thesis_info'] - snippet = ( + snippet = ( # record/1509061 '' ' Presented on 2016' '' - ) # record/1509061 + ) expected = {'defense_date': '2016'} result = hep.do(create_record(snippet)) @@ -315,17 +322,17 @@ def test_thesis_info_defense_date_from_500__a_incomplete_human_date(): schema = load_schema('hep') subschema = schema['properties']['thesis_info'] - snippet = ( + snippet = ( # record/887715 '' ' Presented on Dec 1992' '' - ) # record/887715 + ) expected = {'defense_date': '1992-12'} result = hep.do(create_record(snippet)) assert validate(result['thesis_info'], subschema) is None - expected == result['thesis_info'] + assert expected == result['thesis_info'] expected = [ {'a': 'Presented on 1992-12'}, @@ -339,14 +346,14 @@ def test_thesis_from_502__b_c_d_z(): schema = load_schema('hep') subschema = schema['properties']['thesis_info'] - snippet = ( + snippet = ( # record/897773 '' ' PhD' ' IIT, Roorkee' ' 2011' ' 909554' '' - ) # record/897773 + ) expected = { 'date': '2011', @@ -382,7 +389,7 @@ def test_thesis_from_502_b_double_c_d_double_z(): schema = load_schema('hep') subschema = schema['properties']['thesis_info'] - snippet = ( + snippet = ( # record/1385648 '' ' Thesis' ' Nice U.' @@ -391,7 +398,7 @@ def test_thesis_from_502_b_double_c_d_double_z(): ' 903069' ' 904125' '' - ) # record/1385648 + ) expected = { 'date': '2014', @@ -435,7 +442,7 @@ def test_thesis_info_from_500__a_and_502__b_c_d(): schema = load_schema('hep') subschema = schema['properties']['thesis_info'] - snippet = ( + snippet = ( # record/1517362 '' ' ' ' Presented on 2015-11-27' @@ -446,7 +453,7 @@ def test_thesis_info_from_500__a_and_502__b_c_d(): ' 2017' ' ' '' - ) # record/1517362 + ) expected = { 'date': '2017', @@ -481,17 +488,42 @@ def test_abstracts_from_520__a_9(): schema = load_schema('hep') subschema = schema['properties']['abstracts'] - snippet = ( - '' - ' Springer' - ' We study a notion of non-commutative integration, in the spirit of modular spectral triples, for the quantum group SU$_{q}$ (2). In particular we define the non-commutative integral as the residue at the spectral dimension of a zeta function, which is constructed using a Dirac operator and a weight. We consider the Dirac operator introduced by Kaad and Senior and a family of weights depending on two parameters, which are related to the diagonal automorphisms of SU$_{q}$ (2). We show that, after fixing one of the parameters, the non-commutative integral coincides with the Haar state of SU$_{q}$ (2). Moreover we can impose an additional condition on the zeta function, which also fixes the second parameter. For this unique choice the spectral dimension coincides with the classical dimension.' - '' - ) # record/1346798 + snippet = ( # record/1346798 + ' Springer We study a notion of' + ' non-commutative integration, in the spirit of modular spectral' + ' triples, for the quantum group SU$_{q}$ (2). In particular we define' + ' the non-commutative integral as the residue at the spectral dimension' + ' of a zeta function, which is constructed using a Dirac operator and a' + ' weight. We consider the Dirac operator introduced by Kaad and Senior' + ' and a family of weights depending on two parameters, which are' + ' related to the diagonal automorphisms of SU$_{q}$ (2). We show that,' + ' after fixing one of the parameters, the non-commutative integral' + ' coincides with the Haar state of SU$_{q}$ (2). Moreover we can impose' + ' an additional condition on the zeta function, which also fixes the' + ' second parameter. For this unique choice the spectral dimension' + ' coincides with the classical dimension.' + ) expected = [ { 'source': 'Springer', - 'value': 'We study a notion of non-commutative integration, in the spirit of modular spectral triples, for the quantum group SU$_{q}$ (2). In particular we define the non-commutative integral as the residue at the spectral dimension of a zeta function, which is constructed using a Dirac operator and a weight. We consider the Dirac operator introduced by Kaad and Senior and a family of weights depending on two parameters, which are related to the diagonal automorphisms of SU$_{q}$ (2). We show that, after fixing one of the parameters, the non-commutative integral coincides with the Haar state of SU$_{q}$ (2). Moreover we can impose an additional condition on the zeta function, which also fixes the second parameter. For this unique choice the spectral dimension coincides with the classical dimension.', + 'value': ( + 'We study a notion of non-commutative integration, in the' + ' spirit of modular spectral triples, for the quantum group' + ' SU$_{q}$ (2). In particular we define the non-commutative' + ' integral as the residue at the spectral dimension of a zeta' + ' function, which is constructed using a Dirac operator and a' + ' weight. We consider the Dirac operator introduced by Kaad and' + ' Senior and a family of weights depending on two parameters,' + ' which are related to the diagonal automorphisms of SU$_{q}$' + ' (2). We show that, after fixing one of the parameters, the' + ' non-commutative integral coincides with the Haar state of' + ' SU$_{q}$ (2). Moreover we can impose an additional condition' + ' on the zeta function, which also fixes the second parameter.' + ' For this unique choice the spectral dimension coincides with' + ' the classical dimension.' + ), }, ] result = hep.do(create_record(snippet)) @@ -502,7 +534,22 @@ def test_abstracts_from_520__a_9(): expected = [ { '9': 'Springer', - 'a': 'We study a notion of non-commutative integration, in the spirit of modular spectral triples, for the quantum group SU$_{q}$ (2). In particular we define the non-commutative integral as the residue at the spectral dimension of a zeta function, which is constructed using a Dirac operator and a weight. We consider the Dirac operator introduced by Kaad and Senior and a family of weights depending on two parameters, which are related to the diagonal automorphisms of SU$_{q}$ (2). We show that, after fixing one of the parameters, the non-commutative integral coincides with the Haar state of SU$_{q}$ (2). Moreover we can impose an additional condition on the zeta function, which also fixes the second parameter. For this unique choice the spectral dimension coincides with the classical dimension.', + 'a': ( + 'We study a notion of non-commutative integration, in the' + ' spirit of modular spectral triples, for the quantum group' + ' SU$_{q}$ (2). In particular we define the non-commutative' + ' integral as the residue at the spectral dimension of a zeta' + ' function, which is constructed using a Dirac operator and a' + ' weight. We consider the Dirac operator introduced by Kaad and' + ' Senior and a family of weights depending on two parameters,' + ' which are related to the diagonal automorphisms of SU$_{q}$' + ' (2). We show that, after fixing one of the parameters, the' + ' non-commutative integral coincides with the Haar state of' + ' SU$_{q}$ (2). Moreover we can impose an additional condition' + ' on the zeta function, which also fixes the second parameter.' + ' For this unique choice the spectral dimension coincides with' + ' the classical dimension.' + ), }, ] result = hep2marc.do(result) @@ -514,16 +561,36 @@ def test_abstracts_from_520__double_a(): schema = load_schema('hep') subschema = schema['properties']['abstracts'] - snippet = ( - '' - ' $D$ $K$ scattering and the $D_s$ spectrum from lattice QCD 520__' - ' We present results from Lattice QCD calculations of the low-lying charmed-strange meson spectrum using two types of Clover-Wilson lattices. In addition to quark-antiquark interpolating fields we also consider meson-meson interpolators corresponding to D-meson kaon scattering states. To calculate the all-to-all propagation necessary for the backtracking loops we use the (stochastic) distillation technique. For the charm quark we use the Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange meson are presented.' - '' - ) # record/1297699 + snippet = ( # record/1297699 + ' $D$ $K$' + ' scattering and the $D_s$ spectrum from lattice QCD 520__ ' + ' We present results from Lattice QCD calculations' + ' of the low-lying charmed-strange meson spectrum using two types of' + ' Clover-Wilson lattices. In addition to quark-antiquark interpolating' + ' fields we also consider meson-meson interpolators corresponding to' + ' D-meson kaon scattering states. To calculate the all-to-all' + ' propagation necessary for the backtracking loops we use the' + ' (stochastic) distillation technique. For the charm quark we use the' + ' Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$' + ' charmed-strange meson are presented.' + ) expected = [ {'value': '$D$ $K$ scattering and the $D_s$ spectrum from lattice QCD 520__'}, - {'value': 'We present results from Lattice QCD calculations of the low-lying charmed-strange meson spectrum using two types of Clover-Wilson lattices. In addition to quark-antiquark interpolating fields we also consider meson-meson interpolators corresponding to D-meson kaon scattering states. To calculate the all-to-all propagation necessary for the backtracking loops we use the (stochastic) distillation technique. For the charm quark we use the Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange meson are presented.'}, + { + 'value': ( + 'We present results from Lattice QCD calculations of the' + ' low-lying charmed-strange meson spectrum using two types of' + ' Clover-Wilson lattices. In addition to quark-antiquark' + ' interpolating fields we also consider meson-meson' + ' interpolators corresponding to D-meson kaon scattering' + ' states. To calculate the all-to-all propagation necessary for' + ' the backtracking loops we use the (stochastic) distillation' + ' technique. For the charm quark we use the Fermilab method.' + ' Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange' + ' meson are presented.' + ) + }, ] result = hep.do(create_record(snippet)) @@ -532,7 +599,20 @@ def test_abstracts_from_520__double_a(): expected = [ {'a': '$D$ $K$ scattering and the $D_s$ spectrum from lattice QCD 520__'}, - {'a': 'We present results from Lattice QCD calculations of the low-lying charmed-strange meson spectrum using two types of Clover-Wilson lattices. In addition to quark-antiquark interpolating fields we also consider meson-meson interpolators corresponding to D-meson kaon scattering states. To calculate the all-to-all propagation necessary for the backtracking loops we use the (stochastic) distillation technique. For the charm quark we use the Fermilab method. Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange meson are presented.'}, + { + 'a': ( + 'We present results from Lattice QCD calculations of the' + ' low-lying charmed-strange meson spectrum using two types of' + ' Clover-Wilson lattices. In addition to quark-antiquark' + ' interpolating fields we also consider meson-meson' + ' interpolators corresponding to D-meson kaon scattering' + ' states. To calculate the all-to-all propagation necessary for' + ' the backtracking loops we use the (stochastic) distillation' + ' technique. For the charm quark we use the Fermilab method.' + ' Results for the $J^P=0^+$ $D_{s0}^*(2317)$ charmed-strange' + ' meson are presented.' + ) + }, ] result = hep2marc.do(result) @@ -540,12 +620,16 @@ def test_abstracts_from_520__double_a(): def test_abstracts_from_520__h_9(): - snippet = ( - '' - ' HEPDATA' - ' CERN-SPS. Measurements of the spectra of positively charged kaons in proton-carbon interactions at a beam momentum of 31 GeV/c. The analysis is based on the full set of data collected in 2007 using a 4% nuclear interaction length graphite target. Charged pion spectra taken using the same data set are compared with the kaon spectra.' - '' - ) # record/1079585 + snippet = ( # record/1079585 + ' HEPDATA CERN-SPS.' + ' Measurements of the spectra of positively charged kaons in' + ' proton-carbon interactions at a beam momentum of 31 GeV/c. The' + ' analysis is based on the full set of data collected in 2007 using a' + ' 4% nuclear interaction length graphite target. Charged pion spectra' + ' taken using the same data set are compared with the kaon' + ' spectra.' + ) result = hep.do(create_record(snippet)) @@ -556,27 +640,112 @@ def test_abstracts_from_double_520__a_9_reorders_fields(): schema = load_schema('hep') subschema = schema['properties']['abstracts'] - snippet = ( - '' - ' ' - ' The origin of extragalactic magnetic fields is still poorly understood. Based on a dedicated suite of cosmological magneto-hydrodynamical simulations with the ENZO code we have performed a survey of different models that may have caused present-day magnetic fields in galaxies and galaxy clusters. The outcomes of these models differ in cluster outskirts, filaments, sheets and voids and we use these simulations to find observational signatures of magnetogenesis. With these simulations, we predict the signal of extragalactic magnetic fields in radio observations of synchrotron emission from the cosmic web, in Faraday Rotation, in the propagation of Ultra High Energy Cosmic Rays, in the polarized signal from Fast Radio Bursts at cosmological distance and in spectra of distant blazars. In general, primordial scenarios in which present-day magnetic fields originate from the amplification of weak (<nG) uniform seed fields result more homogeneous and relatively easier to observe magnetic fields than than astrophysical scenarios, in which present-day fields are the product of feedback processes triggered by stars and active galaxies. In the near future the best evidence for the origin of cosmic magnetic fields will most likely come from a combination of synchrotron emission and Faraday Rotation observed at the periphery of large-scale structures.' - ' arXiv' - ' ' - ' ' - ' The origin of extragalactic magnetic fields is still poorly understood. Based on a dedicated suite of cosmological magneto-hydrodynamical simulations with the ENZO code we have performed a survey of different models that may have caused present-day magnetic fields in galaxies and galaxy clusters. The outcomes of these models differ in cluster outskirts, filaments, sheets and voids and we use these simulations to find observational signatures of magnetogenesis. With these simulations, we predict the signal of extragalactic magnetic fields in radio observations of synchrotron emission from the cosmic web, in Faraday rotation, in the propagation of ultra high energy cosmic rays, in the polarized signal from fast radio bursts at cosmological distance and in spectra of distant blazars. In general, primordial scenarios in which present-day magnetic fields originate from the amplification of weak (⩽$\\rm nG$ ) uniform seed fields result in more homogeneous and relatively easier to observe magnetic fields than astrophysical scenarios, in which present-day fields are the product of feedback processes triggered by stars and active galaxies. In the near future the best evidence for the origin of cosmic magnetic fields will most likely come from a combination of synchrotron emission and Faraday rotation observed at the periphery of large-scale structures.' - ' IOP' - ' ' - '' - ) # record/1634941 + snippet = ( # record/1634941 + ' The origin of extragalactic magnetic fields is still poorly' + ' understood. Based on a dedicated suite of cosmological' + ' magneto-hydrodynamical simulations with the ENZO code we have' + ' performed a survey of different models that may have caused' + ' present-day magnetic fields in galaxies and galaxy clusters. The' + ' outcomes of these models differ in cluster outskirts, filaments,' + ' sheets and voids and we use these simulations to find observational' + ' signatures of magnetogenesis. With these simulations, we predict the' + ' signal of extragalactic magnetic fields in radio observations of' + ' synchrotron emission from the cosmic web, in Faraday Rotation, in the' + ' propagation of Ultra High Energy Cosmic Rays, in the polarized signal' + ' from Fast Radio Bursts at cosmological distance and in spectra of' + ' distant blazars. In general, primordial scenarios in which' + ' present-day magnetic fields originate from the amplification of weak' + ' (<nG) uniform seed fields result more homogeneous and relatively' + ' easier to observe magnetic fields than than astrophysical scenarios,' + ' in which present-day fields are the product of feedback processes' + ' triggered by stars and active galaxies. In the near future the best' + ' evidence for the origin of cosmic magnetic fields will most likely' + ' come from a combination of synchrotron emission and Faraday Rotation' + ' observed at the periphery of large-scale structures. ' + ' arXiv The origin of' + ' extragalactic magnetic fields is still poorly understood. Based on a' + ' dedicated suite of cosmological magneto-hydrodynamical simulations' + ' with the ENZO code we have performed a survey of different models' + ' that may have caused present-day magnetic fields in galaxies and' + ' galaxy clusters. The outcomes of these models differ in cluster' + ' outskirts, filaments, sheets and voids and we use these simulations' + ' to find observational signatures of magnetogenesis. With these' + ' simulations, we predict the signal of extragalactic magnetic fields' + ' in radio observations of synchrotron emission from the cosmic web, in' + ' Faraday rotation, in the propagation of ultra high energy cosmic' + ' rays, in the polarized signal from fast radio bursts at cosmological' + ' distance and in spectra of distant blazars. In general, primordial' + ' scenarios in which present-day magnetic fields originate from the' + ' amplification of weak (⩽$\\rm nG$ ) uniform seed fields result in' + ' more homogeneous and relatively easier to observe magnetic fields' + ' than astrophysical scenarios, in which present-day fields are the' + ' product of feedback processes triggered by stars and active galaxies.' + ' In the near future the best evidence for the origin of cosmic' + ' magnetic fields will most likely come from a combination of' + ' synchrotron emission and Faraday rotation observed at the periphery' + ' of large-scale structures. IOP ' + ) expected = [ { 'source': 'IOP', - 'value': u'The origin of extragalactic magnetic fields is still poorly understood. Based on a dedicated suite of cosmological magneto-hydrodynamical simulations with the ENZO code we have performed a survey of different models that may have caused present-day magnetic fields in galaxies and galaxy clusters. The outcomes of these models differ in cluster outskirts, filaments, sheets and voids and we use these simulations to find observational signatures of magnetogenesis. With these simulations, we predict the signal of extragalactic magnetic fields in radio observations of synchrotron emission from the cosmic web, in Faraday rotation, in the propagation of ultra high energy cosmic rays, in the polarized signal from fast radio bursts at cosmological distance and in spectra of distant blazars. In general, primordial scenarios in which present-day magnetic fields originate from the amplification of weak (⩽$\\rm nG$ ) uniform seed fields result in more homogeneous and relatively easier to observe magnetic fields than astrophysical scenarios, in which present-day fields are the product of feedback processes triggered by stars and active galaxies. In the near future the best evidence for the origin of cosmic magnetic fields will most likely come from a combination of synchrotron emission and Faraday rotation observed at the periphery of large-scale structures.', + 'value': ( + u'The origin of extragalactic magnetic fields is still poorly' + u' understood. Based on a dedicated suite of cosmological' + u' magneto-hydrodynamical simulations with the ENZO code we' + u' have performed a survey of different models that may have' + u' caused present-day magnetic fields in galaxies and galaxy' + u' clusters. The outcomes of these models differ in cluster' + u' outskirts, filaments, sheets and voids and we use these' + u' simulations to find observational signatures of' + u' magnetogenesis. With these simulations, we predict the' + u' signal of extragalactic magnetic fields in radio' + u' observations of synchrotron emission from the cosmic web, in' + u' Faraday rotation, in the propagation of ultra high energy' + u' cosmic rays, in the polarized signal from fast radio bursts' + u' at cosmological distance and in spectra of distant blazars.' + u' In general, primordial scenarios in which present-day' + u' magnetic fields originate from the amplification of weak' + u' (⩽$\\rm nG$ ) uniform seed fields result in more homogeneous' + u' and relatively easier to observe magnetic fields than' + u' astrophysical scenarios, in which present-day fields are the' + u' product of feedback processes triggered by stars and active' + u' galaxies. In the near future the best evidence for the' + u' origin of cosmic magnetic fields will most likely come from' + u' a combination of synchrotron emission and Faraday rotation' + u' observed at the periphery of large-scale structures.' + ), }, { 'source': 'arXiv', - 'value': 'The origin of extragalactic magnetic fields is still poorly understood. Based on a dedicated suite of cosmological magneto-hydrodynamical simulations with the ENZO code we have performed a survey of different models that may have caused present-day magnetic fields in galaxies and galaxy clusters. The outcomes of these models differ in cluster outskirts, filaments, sheets and voids and we use these simulations to find observational signatures of magnetogenesis. With these simulations, we predict the signal of extragalactic magnetic fields in radio observations of synchrotron emission from the cosmic web, in Faraday Rotation, in the propagation of Ultra High Energy Cosmic Rays, in the polarized signal from Fast Radio Bursts at cosmological distance and in spectra of distant blazars. In general, primordial scenarios in which present-day magnetic fields originate from the amplification of weak (' - ' G:(EU-Grant)317089' - ' GATIS - Gauge Theory as an Integrable System (317089)' - ' 317089' - ' FP7-PEOPLE-2012-ITN' - '' - ) # record/1508869 + snippet = ( # record/1508869 + ' G:(EU-Grant)317089 GATIS -' + ' Gauge Theory as an Integrable System (317089) 317089 FP7-PEOPLE-2012-ITN' + ) expected = [ { @@ -640,12 +857,12 @@ def test_license_from_540__a_3(): schema = load_schema('hep') subschema = schema['properties']['license'] - snippet = ( + snippet = ( # record/120203 '' ' Article' ' OA' '' - ) # record/120203 + ) expected = [ { @@ -673,13 +890,14 @@ def test_license_from_540__a_u_3(): schema = load_schema('hep') subschema = schema['properties']['license'] - snippet = ( + snippet = ( # record/1184984 '' ' Publication' ' CC-BY-3.0' - ' http://creativecommons.org/licenses/by/3.0/' + ' http://creativecommons.org/licenses/by/3.0/' '' - ) # record/1184984 + ) expected = [ { @@ -709,13 +927,14 @@ def test_license_from_540__a_u_3_handles_preprint(): schema = load_schema('hep') subschema = schema['properties']['license'] - snippet = ( + snippet = ( # record/1682011 '' ' preprint' ' arXiv nonexclusive-distrib 1.0' - ' http://arxiv.org/licenses/nonexclusive-distrib/1.0/' + ' http://arxiv.org/licenses/nonexclusive-distrib/1.0/' '' - ) # record/1682011 + ) expected = [ { @@ -745,13 +964,14 @@ def test_license_from_540__double_a_u(): schema = load_schema('hep') subschema = schema['properties']['license'] - snippet = ( + snippet = ( # record/1414671 '' ' Open Access' ' CC-BY-3.0' - ' http://creativecommons.org/licenses/by/3.0/' + ' http://creativecommons.org/licenses/by/3.0/' '' - ) # record/1414671 + ) expected = [ { @@ -779,13 +999,13 @@ def test_copyright_from_542__d_e_g(): schema = load_schema('hep') subschema = schema['properties']['copyright'] - snippet = ( + snippet = ( # record/1511489 '' ' American Physical Society' ' 2017' ' Article' '' - ) # record/1511489 + ) expected = [ { @@ -815,13 +1035,13 @@ def test_copyright_from_542__d_g_3(): schema = load_schema('hep') subschema = schema['properties']['copyright'] - snippet = ( + snippet = ( # record/1255327 '' ' Article' ' American Physical Society' ' 2014' '' - ) # record/1255327 + ) expected = [ { @@ -851,13 +1071,13 @@ def test_copyright_from_542__d_g_3_with_weird_material(): schema = load_schema('hep') subschema = schema['properties']['copyright'] - snippet = ( + snippet = ( # record/773620 '' ' Published thesis as a book' ' Shaker Verlag' ' 2007' '' - ) # record/773620 + ) expected = [ { @@ -887,12 +1107,12 @@ def test_private_notes_from_595__a_9(): schema = load_schema('hep') subschema = schema['properties']['_private_notes'] - snippet = ( + snippet = ( # record/109310 '' ' SPIRES-HIDDEN' ' Title changed from ALLCAPS' '' - ) # record/109310 + ) expected = [ { @@ -920,13 +1140,12 @@ def test_private_notes_from_595__double_a_9(): schema = load_schema('hep') subschema = schema['properties']['_private_notes'] - snippet = ( - '' - ' SPIRES-HIDDEN' - ' TeXtitle from script' - ' no affiliation (not clear pn the fulltext)' - '' - ) # record/109310 + snippet = ( # record/109310 + ' SPIRES-HIDDEN TeXtitle from' + ' script no affiliation (not clear pn' + ' the fulltext)' + ) expected = [ { @@ -962,19 +1181,15 @@ def test_private_notes_from_595__a_9_and_595__double_a_9(): schema = load_schema('hep') subschema = schema['properties']['_private_notes'] - snippet = ( - '' - ' ' - ' SPIRES-HIDDEN' - ' Title changed from ALLCAPS' - ' ' - ' ' - ' SPIRES-HIDDEN' - ' TeXtitle from script' - ' no affiliation (not clear pn the fulltext)' - ' ' - '' - ) # record/109310 + snippet = ( # record/109310 + ' SPIRES-HIDDEN Title changed' + ' from ALLCAPS SPIRES-HIDDEN TeXtitle from script no' + ' affiliation (not clear pn the fulltext) ' + ' ' + ) expected = [ { @@ -1018,11 +1233,11 @@ def test_private_notes_from_595_Ha(): schema = load_schema('hep') subschema = schema['properties']['_private_notes'] - snippet = ( - '' - ' affiliations à corriger, voir avec Mathieu - Dominique' - '' - ) # record/1514389 + snippet = ( # record/1514389 + ' affiliations à corriger, voir avec Mathieu -' + ' Dominique' + ) expected = [ { @@ -1047,7 +1262,7 @@ def test_desy_bookkeeping_from_multiple_595_Da_d_s(): schema = load_schema('hep') subschema = schema['properties']['_desy_bookkeeping'] - snippet = ( + snippet = ( # record/1513161 '' ' ' ' 8' @@ -1060,19 +1275,11 @@ def test_desy_bookkeeping_from_multiple_595_Da_d_s(): ' printed' ' ' '' - ) # record/1513161 + ) expected = [ - { - 'expert': '8', - 'date': '2017-02-17', - 'status': 'abs' - }, - { - 'expert': '8', - 'date': '2017-02-19', - 'status': 'printed' - } + {'expert': '8', 'date': '2017-02-17', 'status': 'abs'}, + {'expert': '8', 'date': '2017-02-19', 'status': 'printed'}, ] result = hep.do(create_record(snippet)) @@ -1080,16 +1287,8 @@ def test_desy_bookkeeping_from_multiple_595_Da_d_s(): assert expected == result['_desy_bookkeeping'] expected = [ - { - 'a': '8', - 'd': '2017-02-17', - 's': 'abs' - }, - { - 'a': '8', - 'd': '2017-02-19', - 's': 'printed' - } + {'a': '8', 'd': '2017-02-17', 's': 'abs'}, + {'a': '8', 'd': '2017-02-19', 's': 'printed'}, ] result = hep2marc.do(result) @@ -1100,14 +1299,14 @@ def test_desy_bookkeeping_from_595_D_double_a_d_s(): schema = load_schema('hep') subschema = schema['properties']['_desy_bookkeeping'] - snippet = ( + snippet = ( # record/558693 '' ' 2016-07-23' ' E' ' final' ' E' '' - ) # record/558693 + ) expected = [ { @@ -1137,11 +1336,11 @@ def test_export_to_from_595__c_cds(): schema = load_schema('hep') subschema = schema['properties']['_export_to'] - snippet = ( + snippet = ( # record/1513006 '' ' CDS' '' - ) # record/1513006 + ) expected = {'CDS': True} result = hep.do(create_record(snippet)) @@ -1149,9 +1348,7 @@ def test_export_to_from_595__c_cds(): assert validate(result['_export_to'], subschema) is None assert expected == result['_export_to'] - expected = [ - {'c': 'CDS'} - ] + expected = [{'c': 'CDS'}] result = hep2marc.do(result) assert expected == result['595'] @@ -1161,11 +1358,11 @@ def test_export_to_from_595__c_hal(): schema = load_schema('hep') subschema = schema['properties']['_export_to'] - snippet = ( + snippet = ( # record/1623281 '' ' HAL' '' - ) # record/1623281 + ) expected = {'HAL': True} result = hep.do(create_record(snippet)) @@ -1185,11 +1382,11 @@ def test_export_to_from_595__c_not_hal(): schema = load_schema('hep') subschema = schema['properties']['_export_to'] - snippet = ( + snippet = ( # record/1512891 '' ' not HAL' '' - ) # record/1512891 + ) expected = {'HAL': False} result = hep.do(create_record(snippet)) @@ -1197,9 +1394,7 @@ def test_export_to_from_595__c_not_hal(): assert validate(result['_export_to'], subschema) is None assert expected == result['_export_to'] - expected = [ - {'c': 'not HAL'} - ] + expected = [{'c': 'not HAL'}] result = hep2marc.do(result) assert expected == result['595'] @@ -1209,12 +1404,12 @@ def test_export_to_from_595__double_c(): schema = load_schema('hep') subschema = schema['properties']['_export_to'] - snippet = ( + snippet = ( # record/1512843 '' ' CDS' ' not HAL' '' - ) # record/1512843 + ) expected = { 'CDS': True, diff --git a/tests/test_hep_bd6xx.py b/tests/test_hep_bd6xx.py index f1f49ea5..4ad496af 100644 --- a/tests/test_hep_bd6xx.py +++ b/tests/test_hep_bd6xx.py @@ -23,21 +23,21 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_keywords_from_084__a_2(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1590395 '' ' 02.20.Sv' ' PACS' '' - ) # record/1590395 + ) expected = [ { @@ -65,13 +65,13 @@ def test_keywords_from_084__a_2(): def test_keywords_from_084__double_2_does_not_raise(): - snippet = ( + snippet = ( # synthetic data '' ' 02.20.Sv' ' PACS' ' PACS' '' - ) # synthetic data + ) hep.do(create_record(snippet)) @@ -80,13 +80,13 @@ def test_keywords_from_084__a_2_9(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1421100 '' ' PDG' ' PDG' ' G033M' '' - ) # record/1421100 + ) expected = [ { @@ -119,13 +119,13 @@ def test_keywords_from_084__double_a_2(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1376406 '' ' PACS' ' 04.80.N' ' 07.10.Y' '' - ) # record/1376406 + ) expected = [ { @@ -164,12 +164,12 @@ def test_keywords_from_6531_a_2(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1473380 '' ' JACoW' ' experiment' '' - ) # record/1473380 + ) expected = [ { @@ -200,12 +200,12 @@ def test_keywords_from_6531_a_9(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1260876 '' ' author' ' Data' '' - ) # record/1260876 + ) expected = [ { @@ -233,13 +233,13 @@ def test_keywords_from_6531_a_9(): def test_keywords_from_6531_a_double_9_ignores_values_from_conference(): - snippet = ( + snippet = ( # record/1498175 '' ' submitter' ' conference' ' Track reconstruction' '' - ) # record/1498175 + ) result = hep.do(create_record(snippet)) @@ -248,11 +248,11 @@ def test_keywords_from_6531_a_double_9_ignores_values_from_conference(): def test_keywords_from_6531_9_ignores_lone_sources(): - snippet = ( + snippet = ( # record/1382933 '' ' author' '' - ) # record/1382933 + ) result = hep.do(create_record(snippet)) @@ -279,11 +279,11 @@ def test_accelerator_experiments_from_693__a(): schema = load_schema('hep') subschema = schema['properties']['accelerator_experiments'] - snippet = ( + snippet = ( # record/1623303 '' ' BATSE' '' - ) # record/1623303 + ) expected = [ {'accelerator': 'BATSE'}, @@ -305,13 +305,13 @@ def test_accelerator_experiments_from_693__a_e(): schema = load_schema('hep') subschema = schema['properties']['accelerator_experiments'] - snippet = ( + snippet = ( # record/1517829 '' ' CERN LHC' ' CERN-LHC-CMS' ' 1108642' '' - ) # record/1517829 + ) expected = [ { @@ -338,7 +338,7 @@ def test_accelerator_experiments_from_693__e_0_and_693__e_discards_single_dashes schema = load_schema('hep') subschema = schema['properties']['accelerator_experiments'] - snippet = ( + snippet = ( # record/1503527 '' ' ' ' CERN-LHC-ATLAS' @@ -348,7 +348,7 @@ def test_accelerator_experiments_from_693__e_0_and_693__e_discards_single_dashes ' -' ' ' '' - ) # record/1503527 + ) expected = [ { @@ -375,12 +375,12 @@ def test_keywords_from_695__a_2(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/200123 '' ' REVIEW' ' INSPIRE' '' - ) # record/200123 + ) expected = [ { @@ -411,12 +411,12 @@ def test_keywords_from_695__a_2_inis(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1493738 '' ' Accelerators' ' INIS' '' - ) # record/1493738 + ) expected = [ { @@ -447,12 +447,12 @@ def test_energy_ranges_from_695__e_2(): schema = load_schema('hep') subschema = schema['properties']['energy_ranges'] - snippet = ( + snippet = ( # record/1124337 '' ' INSPIRE' ' 7' '' - ) # record/1124337 + ) expected = [ '1-10 TeV', @@ -480,7 +480,7 @@ def test_keywords_from_multiple_695__a_2(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/363605 '' ' ' ' programming: Monte Carlo' @@ -491,7 +491,7 @@ def test_keywords_from_multiple_695__a_2(): ' INSPIRE' ' ' '' - ) # record/363605 + ) expected = [ { @@ -531,7 +531,7 @@ def test_keywords_from_695__a_2_9_automatic_keywords(): schema = load_schema('hep') subschema = schema['properties']['keywords'] - snippet = ( + snippet = ( # record/1859815 '' ' ' ' INSPIRE' @@ -548,7 +548,7 @@ def test_keywords_from_695__a_2_9_automatic_keywords(): ' bibclassify' ' ' '' - ) # record/1859815 + ) expected = [ { diff --git a/tests/test_hep_bd7xx.py b/tests/test_hep_bd7xx.py index 8b320598..5d33390b 100644 --- a/tests/test_hep_bd7xx.py +++ b/tests/test_hep_bd7xx.py @@ -23,20 +23,20 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate def test_collaborations_from_710__g(): schema = load_schema('hep') subschema = schema['properties']['collaborations'] - snippet = ( + snippet = ( # record/1510404 '' ' Pierre Auger' '' - ) # record/1510404 + ) expected = [ {'value': 'Pierre Auger'}, @@ -58,11 +58,11 @@ def test_collaborations_from_710__g_normalizes_value(): schema = load_schema('hep') subschema = schema['properties']['collaborations'] - snippet = ( + snippet = ( # http://cds.cern.ch/record/2293683 '' ' on behalf of the CMS Collaboration' '' - ) # http://cds.cern.ch/record/2293683 + ) expected = [ {'value': 'CMS'}, @@ -84,12 +84,12 @@ def test_collaborations_from_710__g_0(): schema = load_schema('hep') subschema = schema['properties']['collaborations'] - snippet = ( + snippet = ( # record/1422032 '' ' ANTARES' ' 1110619' '' - ) # record/1422032 + ) expected = [ { @@ -116,7 +116,7 @@ def test_collaborations_from_multiple_710__g_0_and_710__g(): schema = load_schema('hep') subschema = schema['properties']['collaborations'] - snippet = ( + snippet = ( # record/1422032 '' ' ' ' ANTARES' @@ -134,7 +134,7 @@ def test_collaborations_from_multiple_710__g_0_and_710__g(): ' 1110601' ' ' '' - ) # record/1422032 + ) expected = [ { @@ -179,12 +179,12 @@ def test_collaborations_from_710__double_g_does_not_raise(): schema = load_schema('hep') subschema = schema['properties']['collaborations'] - snippet = ( + snippet = ( # record/1665755 '' ' ATLAS' ' CMS' '' - ) # record/1665755 + ) expected = [ {'value': 'ATLAS'}, @@ -208,7 +208,7 @@ def test_publication_info_from_773_c_m_p_v_y_1(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/1104 '' ' Erratum' ' Phys.Rev.Lett.' @@ -217,7 +217,7 @@ def test_publication_info_from_773_c_m_p_v_y_1(): ' 1975' ' 1214495' '' - ) # record/1104 + ) expected = [ { @@ -257,7 +257,7 @@ def test_publication_info_from_773_c_p_w_double_v_double_y_0_1_2(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/820763 '' ' IAU Symp.' ' C08-06-09' @@ -270,7 +270,7 @@ def test_publication_info_from_773_c_p_w_double_v_double_y_0_1_2(): ' 978924' ' 1408366' '' - ) # record/820763 + ) expected = [ { @@ -317,7 +317,7 @@ def test_publication_info_from_773__c_w_y_z_0_2(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/1501319 '' ' 95-104' ' C16-03-17' @@ -326,7 +326,7 @@ def test_publication_info_from_773__c_w_y_z_0_2(): ' 1407887' ' 1500425' '' - ) # record/1501319 + ) expected = [ { @@ -368,7 +368,7 @@ def test_publication_info_from_773__c_r_w_triple_0_2(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/1513005 '' ' 1512294' ' 122-127' @@ -378,7 +378,7 @@ def test_publication_info_from_773__c_r_w_triple_0_2(): ' 1484403' ' 1512294' '' - ) # record/1513005 + ) expected = [ { @@ -418,12 +418,11 @@ def test_publication_info_from_773__q_t(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( - '' - ' LENPIC2017' - ' Chiral Forces in Low Energy Nuclear Physics' - '' - ) # record/1598069 + snippet = ( # record/1598069 + ' LENPIC2017 Chiral Forces in' + ' Low Energy Nuclear Physics' + ) expected = [ {'conf_acronym': 'LENPIC2017'}, @@ -445,14 +444,14 @@ def test_publication_info_from_773__w_x_0_2_handles_lowercase_cnums(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( - '' - ' c12-07-09.10' - ' Proceedings of the 57th Annual Conference of the South African Institute of Physics, edited by Johan Janse van Rensburg (2014), pp. 362 - 367' - ' 1423475' - ' 1424370' - '' - ) # record/1264637 + snippet = ( # record/1264637 + ' c12-07-09.10 Proceedings of' + ' the 57th Annual Conference of the South African Institute of Physics,' + ' edited by Johan Janse van Rensburg (2014), pp. 362 - 367 ' + ' 1423475 1424370' + ) expected = [ { @@ -463,7 +462,11 @@ def test_publication_info_from_773__w_x_0_2_handles_lowercase_cnums(): 'parent_record': { '$ref': 'http://localhost:5000/api/literature/1424370', }, - 'pubinfo_freetext': 'Proceedings of the 57th Annual Conference of the South African Institute of Physics, edited by Johan Janse van Rensburg (2014), pp. 362 - 367', + 'pubinfo_freetext': ( + 'Proceedings of the 57th Annual Conference of the South African' + ' Institute of Physics, edited by Johan Janse van Rensburg' + ' (2014), pp. 362 - 367' + ), }, ] result = hep.do(create_record(snippet)) @@ -474,7 +477,11 @@ def test_publication_info_from_773__w_x_0_2_handles_lowercase_cnums(): expected = [ { 'w': 'C12-07-09.10', - 'x': 'Proceedings of the 57th Annual Conference of the South African Institute of Physics, edited by Johan Janse van Rensburg (2014), pp. 362 - 367', + 'x': ( + 'Proceedings of the 57th Annual Conference of the South African' + ' Institute of Physics, edited by Johan Janse van Rensburg' + ' (2014), pp. 362 - 367' + ), '0': 1424370, }, ] @@ -487,11 +494,11 @@ def test_publication_info_from_773__w_handles_slashes_in_cnums(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/1622968 '' ' C17/05/14' '' - ) # record/1622968 + ) expected = [ {'cnum': 'C17-05-14'}, @@ -513,12 +520,12 @@ def test_publication_info_from_773__c_z_handles_dashes_in_isbns(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/1334853 '' ' 110-125' ' 978-1-4684-7552-4' '' - ) # record/1334853 + ) expected = [ { @@ -549,11 +556,11 @@ def test_publication_info_from_773__p_populates_public_notes(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] - snippet = ( + snippet = ( # record/1631620 '' ' Phys.Rev.D' '' - ) # record/1631620 + ) expected = [ {'value': 'Submitted to Phys.Rev.D'}, @@ -569,12 +576,12 @@ def test_publication_info_from_773__p_1_populates_public_notes(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] - snippet = ( + snippet = ( # record/1470899 '' ' Phys.Rev.Lett.' ' 1214495' '' - ) # record/1470899 + ) expected = [ {'value': 'Submitted to Phys.Rev.Lett.'}, @@ -587,11 +594,10 @@ def test_publication_info_from_773__p_1_populates_public_notes(): def test_publication_info_from_773__t_doesnt_populate_public_notes(): - snippet = ( - '' - ' Indian Particle Accelerator Conference (InPAC)' - '' - ) # record/1763998 + snippet = ( # record/1763998 + ' Indian' + ' Particle Accelerator Conference (InPAC)' + ) result = hep.do(create_record(snippet)) @@ -604,7 +610,7 @@ def test_publication_info_from_773__p_and_773__c_p_v_y_1_also_populates_public_n publication_info_schema = schema['properties']['publication_info'] public_notes_schema = schema['properties']['public_notes'] - snippet = ( + snippet = ( # record/769448 '' ' ' ' Eur.Phys.J.A' @@ -617,11 +623,9 @@ def test_publication_info_from_773__p_and_773__c_p_v_y_1_also_populates_public_n ' 1212905' ' ' '' - ) # record/769448 + ) - expected_public_notes = [ - {'value': 'Submitted to Eur.Phys.J.A'} - ] + expected_public_notes = [{'value': 'Submitted to Eur.Phys.J.A'}] expected_publication_info = [ { 'artid': '615', @@ -644,16 +648,13 @@ def test_publication_info_from_double_773__p(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] - snippet = ( - '' - ' ' - ' Proc.HELAS Workshop on `New insights into the Sun\'' - ' ' - ' ' - ' & M.J.Thompson (2009)' - ' ' - '' - ) # record/920292 + snippet = ( # record/920292 + ' Proc.HELAS Workshop on `New insights into the' + ' Sun\' & M.J.Thompson (2009) ' + ' ' + ) expected = [ {'value': 'Submitted to Proc.HELAS Workshop on `New insights into the Sun\''}, @@ -671,7 +672,7 @@ def test_publication_info_from_773__c_p_v_x_y_1_discards_done_x(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/1479030 '' ' 134516' ' Phys.Rev.' @@ -680,7 +681,7 @@ def test_publication_info_from_773__c_p_v_x_y_1_discards_done_x(): ' 2016' ' 1214516' '' - ) # record/1479030 + ) expected = [ { @@ -714,14 +715,14 @@ def test_publication_info_from_7731_c_p_v_y(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/697133 '' ' 948-979' ' Adv.Theor.Math.Phys.' ' 12' ' 2008' '' - ) # record/697133 + ) expected = [ { @@ -757,7 +758,7 @@ def test_publication_info_from_7731_c_p_v_y_and_773__c_p_v_y_1(): schema = load_schema('hep') subschema = schema['properties']['publication_info'] - snippet = ( + snippet = ( # record/1439897 '' ' ' ' 602-604' @@ -773,7 +774,7 @@ def test_publication_info_from_7731_c_p_v_y_and_773__c_p_v_y_1(): ' 1214521' ' ' '' - ) # record/1439897 + ) expected = [ { @@ -852,13 +853,13 @@ def test_related_records_from_78002i_r_w(): schema = load_schema('hep') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1510564 '' ' supersedes' ' ATLAS-CONF-2016-113' ' 1503270' '' - ) # record/1510564 + ) expected = [ { @@ -888,13 +889,13 @@ def test_related_records_from_78002i_r_w(): def test_related_superseding_records_78502r_w_z(): schema = load_schema('hep') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1503270 '' 'superseded by' 'CERN-EP-2016-305' '1510564' '' - ) # record/1503270 + ) expected = [ { @@ -924,12 +925,12 @@ def test_related_records_from_78708i_w(): schema = load_schema('hep') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1415979 '' ' Addendum' ' 1474710' '' - ) # record/1415979 + ) expected = [ { diff --git a/tests/test_hep_bd9xx.py b/tests/test_hep_bd9xx.py index 6e652574..d6bd10ee 100644 --- a/tests/test_hep_bd9xx.py +++ b/tests/test_hep_bd9xx.py @@ -23,6 +23,7 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep, hep2marc from inspire_dojson.hep.rules.bd9xx import ( @@ -31,7 +32,6 @@ DOCUMENT_TYPE_MAP, DOCUMENT_TYPE_REVERSE_MAP, ) -from inspire_schemas.api import load_schema, validate def test_collections_map_contains_all_valid_collections(): @@ -76,12 +76,12 @@ def test_record_affiliations_from_902__a_z(): schema = load_schema('hep') subschema = schema['properties']['record_affiliations'] - snippet = ( + snippet = ( # record/1216295 '' ' Iowa State U.' ' 902893' '' - ) # record/1216295 + ) expected = [ { @@ -109,7 +109,7 @@ def test_record_affiliations_from_double_902__a_z(): schema = load_schema('hep') subschema = schema['properties']['record_affiliations'] - snippet = ( + snippet = ( # record/1216295 '' ' ' ' Iowa State U.' @@ -120,7 +120,7 @@ def test_record_affiliations_from_double_902__a_z(): ' 902642' ' ' '' - ) # record/1216295 + ) expected = [ { @@ -156,11 +156,11 @@ def test_citeable_from_980__a_citeable(): schema = load_schema('hep') subschema = schema['properties']['citeable'] - snippet = ( + snippet = ( # record/1511471 '' ' Citeable' '' - ) # record/1511471 + ) expected = True result = hep.do(create_record(snippet)) @@ -180,11 +180,11 @@ def test_core_from_980__a_core(): schema = load_schema('hep') subschema = schema['properties']['core'] - snippet = ( + snippet = ( # record/1509993 '' ' CORE' '' - ) # record/1509993 + ) expected = True result = hep.do(create_record(snippet)) @@ -204,11 +204,11 @@ def test_core_from_980__a_noncore(): schema = load_schema('hep') subschema = schema['properties']['core'] - snippet = ( + snippet = ( # record/1411887 '' ' NONCORE' '' - ) # record/1411887 + ) expected = False result = hep.do(create_record(snippet)) @@ -228,11 +228,11 @@ def test_deleted_from_980__c(): schema = load_schema('hep') subschema = schema['properties']['deleted'] - snippet = ( + snippet = ( # record/1508668 '' ' DELETED' '' - ) # record/1508668 + ) expected = True result = hep.do(create_record(snippet)) @@ -252,11 +252,11 @@ def test_deleted_from_980__a(): schema = load_schema('hep') subschema = schema['properties']['deleted'] - snippet = ( + snippet = ( # record/931344 '' ' DELETED' '' - ) # record/931344 + ) expected = True result = hep.do(create_record(snippet)) @@ -276,11 +276,11 @@ def test_collections_from_980__a(): schema = load_schema('hep') subschema = schema['properties']['_collections'] - snippet = ( + snippet = ( # record/1610892 '' ' HEP' '' - ) # record/1610892 + ) expected = ['Literature'] result = hep.do(create_record(snippet)) @@ -300,11 +300,11 @@ def test_collections_from_980__a_hal_hidden(): schema = load_schema('hep') subschema = schema['properties']['_collections'] - snippet = ( + snippet = ( # record/1505341 '' ' HALhidden' '' - ) # record/1505341 + ) expected = [ 'HAL Hidden', @@ -326,11 +326,11 @@ def test_collections_from_980__a_babar_analysis_document(): schema = load_schema('hep') subschema = schema['properties']['_collections'] - snippet = ( + snippet = ( # record/1598316 '' ' BABAR-AnalysisDocument' '' - ) # record/1598316 + ) expected = [ 'BABAR Analysis Documents', @@ -352,7 +352,7 @@ def test_collections_from_double_980__a(): schema = load_schema('hep') subschema = schema['properties']['_collections'] - snippet = ( + snippet = ( # record/1201407 '' ' ' ' D0-PRELIMINARY-NOTE' @@ -361,7 +361,7 @@ def test_collections_from_double_980__a(): ' HEP' ' ' '' - ) # record/1201407 + ) expected = [ 'D0 Preliminary Notes', @@ -388,11 +388,11 @@ def test_refereed_from_980__a_published(): schema = load_schema('hep') subschema = schema['properties']['refereed'] - snippet = ( + snippet = ( # record/1509992 '' ' Published' '' - ) # record/1509992 + ) expected = True result = hep.do(create_record(snippet)) @@ -412,9 +412,7 @@ def test_document_type_defaults_to_article(): schema = load_schema('hep') subschema = schema['properties']['document_type'] - snippet = ( - '' - ) # synthetic data + snippet = '' # synthetic data expected = [ 'article', @@ -429,11 +427,11 @@ def test_document_type_from_980__a(): schema = load_schema('hep') subschema = schema['properties']['document_type'] - snippet = ( + snippet = ( # record/1512050 '' ' Book' '' - ) # record/1512050 + ) expected = [ 'book', @@ -455,11 +453,11 @@ def test_document_type_from_980__a_handles_conference_paper(): schema = load_schema('hep') subschema = schema['properties']['document_type'] - snippet = ( + snippet = ( # record/1589240 '' ' ConferencePaper' '' - ) # record/1589240 + ) expected = [ 'conference paper', @@ -481,11 +479,11 @@ def test_document_type_from_980__a_handles_activity_report(): schema = load_schema('hep') subschema = schema['properties']['document_type'] - snippet = ( + snippet = ( # record/1514964 '' ' ActivityReport' '' - ) # record/1514964 + ) expected = [ 'activity report', @@ -507,11 +505,11 @@ def test_publication_type_from_980__a(): schema = load_schema('hep') subschema = schema['properties']['publication_type'] - snippet = ( + snippet = ( # record/1509993 '' ' Review' '' - ) # record/1509993 + ) expected = [ 'review', @@ -533,11 +531,11 @@ def test_withdrawn_from_980__a_withdrawn(): schema = load_schema('hep') subschema = schema['properties']['withdrawn'] - snippet = ( + snippet = ( # record/1486153 '' ' Withdrawn' '' - ) # record/1486153 + ) expected = True result = hep.do(create_record(snippet)) @@ -557,12 +555,12 @@ def test_references_from_999C5r_0(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/41194 '' ' solv-int/9611008' ' 433620' '' - ) # record/41194 + ) expected = [ { @@ -598,13 +596,13 @@ def test_references_from_999C5r_s_0(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/863300 '' ' arXiv:1006.1289' ' Prog.Part.Nucl.Phys.,65,149' ' 857206' '' - ) # record/863300 + ) expected = [ { @@ -647,17 +645,16 @@ def test_references_from_999C5h_m_o_y_z_0_9(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 1242925' - ' CURATOR' - ' M. Schwarz' - ' Nontrivial Spacetime Topology, Modified Dispersion Relations, and an SO(3)Skyrme Model, PhD Thesis, KIT (Verlag Dr. Hut, Munich, Germany,)' - ' 7' - ' 2010' - ' 1' - '' - ) # record/1289907 + snippet = ( # record/1289907 + ' 1242925 CURATOR ' + ' M. Schwarz Nontrivial Spacetime Topology, Modified Dispersion' + ' Relations, and an SO(3)Skyrme Model, PhD Thesis, KIT (Verlag Dr. Hut,' + ' Munich, Germany,) 7 ' + ' 2010 1' + ) expected = [ { @@ -672,7 +669,11 @@ def test_references_from_999C5h_m_o_y_z_0_9(): ], 'label': '7', 'misc': [ - 'Nontrivial Spacetime Topology, Modified Dispersion Relations, and an SO(3)Skyrme Model, PhD Thesis, KIT (Verlag Dr. Hut, Munich, Germany,)', + ( + 'Nontrivial Spacetime Topology, Modified Dispersion' + ' Relations, and an SO(3)Skyrme Model, PhD Thesis, KIT' + ' (Verlag Dr. Hut, Munich, Germany,)' + ), ], 'publication_info': { 'year': 2010, @@ -692,7 +693,11 @@ def test_references_from_999C5h_m_o_y_z_0_9(): 'h': [ 'Schwarz, M.', ], - 'm': 'Nontrivial Spacetime Topology, Modified Dispersion Relations, and an SO(3)Skyrme Model, PhD Thesis, KIT (Verlag Dr. Hut, Munich, Germany,)', + 'm': ( + 'Nontrivial Spacetime Topology, Modified Dispersion Relations,' + ' and an SO(3)Skyrme Model, PhD Thesis, KIT (Verlag Dr. Hut,' + ' Munich, Germany,)' + ), 'o': '7', 'y': 2010, 'z': 1, @@ -707,7 +712,7 @@ def test_references_from_999C5h_m_o_t_y_repeated_z_0_9(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/1095388 '' ' 794379' ' S. Weinberg' @@ -719,7 +724,7 @@ def test_references_from_999C5h_m_o_t_y_repeated_z_0_9(): ' CURATOR' ' 1' '' - ) # record/1095388 + ) expected = [ { @@ -771,17 +776,15 @@ def test_references_from_999C5h_m_o_r_s_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 857215' - ' R. C. Myers and A. Sinha' - ' Seeing a c-theorem with holography ; [hep-th]' - ' 10' - ' arXiv:1006.1263' - ' Phys.Rev.,D82,046006' - ' 2010' - '' - ) # record/1498589 + snippet = ( # record/1498589 + ' 857215 R. C. Myers and A.' + ' Sinha Seeing a c-theorem with' + ' holography ; [hep-th] 10 ' + ' arXiv:1006.1263 Phys.Rev.,D82,046006 2010' + ) expected = [ { @@ -839,17 +842,16 @@ def test_references_from_999C5a_h_o_s_x_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' doi:10.1142/S0217751X0804055X' - ' G.K. Leontaris' - ' 15' - ' Int.J.Mod.Phys.,A23,2055' - ' Int. J. Mod. Phys. A 23 (doi:10.1142/S0217751X0804055X)' - ' 2008' - ' 780399' - '' - ) # record/1478478 + snippet = ( # record/1478478 + ' doi:10.1142/S0217751X0804055X G.K. Leontaris 15 ' + ' Int.J.Mod.Phys.,A23,2055 Int. J. Mod. Phys. A 23' + ' (doi:10.1142/S0217751X0804055X) 2008 780399' + ) expected = [ { @@ -911,16 +913,15 @@ def test_references_from_999C50_h_m_o_r_y(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 701721' - ' A. Ferrari, P.R. Sala, A. Fasso, and J. Ranft' - ' FLUKA: a multi-particle transport code, CERN-10 , INFN/TC_05/11' - ' 13' - ' SLAC-R-773' - ' 2005' - '' - ) # record/1478478 + snippet = ( # record/1478478 + ' 701721 A. Ferrari, P.R. Sala,' + ' A. Fasso, and J. Ranft FLUKA: a' + ' multi-particle transport code, CERN-10 , INFN/TC_05/11 ' + ' 13 SLAC-R-773 2005' + ) expected = [ { @@ -978,7 +979,7 @@ def test_references_from_999C59_h_m_o_double_r_y(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/1449990 '' ' CURATOR' ' Bennett, J' @@ -988,7 +989,7 @@ def test_references_from_999C59_h_m_o_double_r_y(): ' CERN-INTCP-186' ' 2004' '' - ) # record/1449990 + ) expected = [ { @@ -1039,17 +1040,15 @@ def test_references_from_999C50_9_r_u_h_m_o(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 1511470' - ' CURATOR' - ' urn:nbn:de:hebis:77-diss-1000009520' - ' http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316' - ' K. Wiebe' - ' Ph.D. thesis, University of Mainz, in preparation' - ' 51' - '' - ) # record/1504897 + snippet = ( # record/1504897 + ' 1511470' + ' CURATOR urn:nbn:de:hebis:77-diss-1000009520 http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316' + ' K. Wiebe Ph.D. thesis,' + ' University of Mainz, in preparation 51' + ) expected = [ { @@ -1070,7 +1069,9 @@ def test_references_from_999C50_9_r_u_h_m_o(): 'urn:nbn:de:hebis:77-diss-1000009520', ], 'urls': [ - {'value': 'http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316'}, + { + 'value': 'http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316' + }, ], }, }, @@ -1107,15 +1108,14 @@ def test_reference_from_999C5t_p_y_e_o(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' Higher Transcendetal Functions Vol. I, Bateman Manuscript Project' - ' New York: McGraw-Hill Book Company, Inc.' - ' 1953' - ' Erdélyi,A.' - ' 16' - '' - ) # record/1590099 + snippet = ( # record/1590099 + ' Higher' + ' Transcendetal Functions Vol. I, Bateman Manuscript Project' + ' New York: McGraw-Hill Book Company,' + ' Inc. 1953 Erdélyi,A. 16' + ) expected = [ { @@ -1129,7 +1129,12 @@ def test_reference_from_999C5t_p_y_e_o(): 'imprint': {'publisher': 'New York: McGraw-Hill Book Company, Inc.'}, 'label': '16', 'publication_info': {'year': 1953}, - 'title': {'title': 'Higher Transcendetal Functions Vol. I, Bateman Manuscript Project'}, + 'title': { + 'title': ( + 'Higher Transcendetal Functions Vol. I, Bateman' + ' Manuscript Project' + ) + }, }, }, ] @@ -1159,33 +1164,26 @@ def test_reference_from_999C5o_h_c_t_s_r_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 36' - ' S. Chatrchyan et al.' - ' CMS Collaboration' - ' Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-' - ' Phys.Lett.,B727,77' - ' arXiv:1308.3409 [hep-ex]' - ' 2013' - ' 1247976' - '' - ) # record/1591975 + snippet = ( # record/1591975 + ' 36 S. Chatrchyan et' + ' al. CMS Collaboration ' + ' Angular analysis and branching fraction' + ' measurement of the decay B0 → K∗0 µ+ µ- Phys.Lett.,B727,77 arXiv:1308.3409 [hep-ex] 2013 1247976' + ) expected = [ { 'curated_relation': False, - 'record': { - '$ref': 'http://localhost:5000/api/literature/1247976' - }, + 'record': {'$ref': 'http://localhost:5000/api/literature/1247976'}, 'reference': { 'arxiv_eprint': '1308.3409', - 'authors': [ - {'full_name': u'Chatrchyan, S.'} - ], - 'collaborations': [ - 'CMS Collaboration' - ], + 'authors': [{'full_name': u'Chatrchyan, S.'}], + 'collaborations': ['CMS Collaboration'], 'label': '36', 'publication_info': { 'artid': '77', @@ -1194,8 +1192,13 @@ def test_reference_from_999C5o_h_c_t_s_r_y_0(): 'page_start': '77', 'year': 2013, }, - 'title': {'title': u'Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-'}, - } + 'title': { + 'title': ( + u'Angular analysis and branching fraction measurement' + u' of the decay B0 → K∗0 µ+ µ-' + ) + }, + }, } ] result = hep.do(create_record(snippet)) @@ -1217,7 +1220,10 @@ def test_reference_from_999C5o_h_c_t_s_r_y_0(): 'arXiv:1308.3409', ], 's': 'Phys.Lett.,B727,77', - 't': u'Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-', + 't': ( + u'Angular analysis and branching fraction measurement of the' + u' decay B0 → K∗0 µ+ µ-' + ), 'y': 2013, 'z': 0, } @@ -1231,18 +1237,16 @@ def test_references_from_999C5b_h_m_o_p_t_y_9(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' CURATOR' - ' C93-06-08' - ' C. Gaspar' - ' Real Time Conference,, Vancouver, Canada' - ' 7' - ' IEEE' - ' DIM - A Distributed Information Management System for the Delphi experiment at CERN' - ' 1993' - '' - ) # record/1481519 + snippet = ( # record/1481519 + ' CURATOR C93-06-08 ' + ' C. Gaspar Real Time' + ' Conference,, Vancouver, Canada 7 IEEE DIM - A Distributed Information Management System for the' + ' Delphi experiment at CERN 1993' + ) expected = [ { @@ -1260,7 +1264,12 @@ def test_references_from_999C5b_h_m_o_p_t_y_9(): 'cnum': 'C93-06-08', 'year': 1993, }, - 'title': {'title': 'DIM - A Distributed Information Management System for the Delphi experiment at CERN'}, + 'title': { + 'title': ( + 'DIM - A Distributed Information Management System for' + ' the Delphi experiment at CERN' + ) + }, }, }, ] @@ -1279,7 +1288,10 @@ def test_references_from_999C5b_h_m_o_p_t_y_9(): 'm': 'Real Time Conference,, Vancouver, Canada', 'o': '7', 'p': 'IEEE', - 't': 'DIM - A Distributed Information Management System for the Delphi experiment at CERN', + 't': ( + 'DIM - A Distributed Information Management System for the' + ' Delphi experiment at CERN' + ), 'y': 1993, 'z': 0, }, @@ -1293,18 +1305,16 @@ def test_references_from_999C5a_h_i_m_o_p_y_9(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 16' - ' A. Del Guerra' - ' Ionizing Radiation Detectors for Medical Imaging Crossref:' - ' World Scientific' - ' 9812562621' - ' doi:10.1142/5408' - ' 2004' - ' refextract' - '' - ) # record/1593684 + snippet = ( # record/1593684 + ' 16 A. Del Guerra ' + ' Ionizing Radiation Detectors for Medical Imaging' + ' Crossref: World Scientific ' + ' 9812562621 doi:10.1142/5408 2004 refextract' + ) expected = [ { @@ -1355,7 +1365,7 @@ def test_references_from_999C5h_o_q_t_y(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/1592189 '' ' Gromov, M.' ' Spaces and questions' @@ -1363,7 +1373,7 @@ def test_references_from_999C5h_o_q_t_y(): ' Geom. Funct. Anal., GAFA 2000' ' 16' '' - ) # record/1592189 + ) expected = [ { @@ -1406,11 +1416,11 @@ def test_references_from_999C5k(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # synthetic data '' ' Robilotta:2008js' '' - ) # synthetic data + ) expected = [ { @@ -1439,7 +1449,7 @@ def test_references_from_999C5d_multiple_h_o_r_0_9(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/1410105 '' ' 568216' ' CURATOR' @@ -1452,7 +1462,7 @@ def test_references_from_999C5d_multiple_h_o_r_0_9(): ' 20' ' hep-ph/0112168v2' '' - ) # record/1410105 + ) expected = [ { @@ -1506,7 +1516,7 @@ def test_references_from_999C5h_k_double_m_o_s_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/1613562 '' ' W, Schoutens.' ' Bouwknegt:1992wg' @@ -1517,7 +1527,7 @@ def test_references_from_999C5h_k_double_m_o_s_y_0(): ' 1993' ' 338634' '' - ) # record/1613562 + ) expected = [ { @@ -1573,17 +1583,15 @@ def test_references_from_999C5_0_h_m_o_r_t_y(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 674429' - ' R. Ardito et al.' - ' 66' - ' 57' - ' hep-ex/0501010' - ' CUORE: A Cryogenic underground Observatory for Rare Events' - ' 2005' - '' - ) # record/1615506 + snippet = ( # record/1615506 + ' 674429 R. Ardito et' + ' al. 66 57 hep-ex/0501010 ' + ' CUORE: A Cryogenic underground Observatory for' + ' Rare Events 2005' + ) expected = [ { @@ -1601,7 +1609,11 @@ def test_references_from_999C5_0_h_m_o_r_t_y(): '66', ], 'publication_info': {'year': 2005}, - 'title': {'title': 'CUORE: A Cryogenic underground Observatory for Rare Events'}, + 'title': { + 'title': ( + 'CUORE: A Cryogenic underground Observatory for Rare Events' + ) + }, }, }, ] @@ -1635,12 +1647,12 @@ def test_references_from_999C5_0_z(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( + snippet = ( # record/374213 '' ' 351013' ' 1' '' - ) # record/374213 + ) expected = [ { @@ -1670,21 +1682,21 @@ def test_references_from_999C5u_as_cds_system_identifiers(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 59' - ' ATLAS Collaboration' - ' CMS Collaboration' - ' The LHC Higgs Combination Group Collaboration Tech. Rep CERN, Geneva, Aug' - ' G. Aad et al.' - ' Procedure for the LHC Higgs boson search combination in Summer 2011' - ' CMS-NOTE-2011-005' - ' ATL-PHYS-PUB-2011-11' - ' http://cds.cern.ch/record/1379837' - ' 2011' - ' 1196797' - '' - ) # record/1665526 + snippet = ( # record/1665526 + ' 59 ATLAS' + ' Collaboration CMS' + ' Collaboration The LHC Higgs' + ' Combination Group Collaboration Tech. Rep CERN, Geneva,' + ' Aug G. Aad et al. ' + ' Procedure for the LHC Higgs boson search' + ' combination in Summer 2011 CMS-NOTE-2011-005 ATL-PHYS-PUB-2011-11 http://cds.cern.ch/record/1379837 2011 1196797' + ) expected = [ { @@ -1697,14 +1709,20 @@ def test_references_from_999C5u_as_cds_system_identifiers(): 'ATL-PHYS-PUB-2011-11', ], 'title': { - 'title': 'Procedure for the LHC Higgs boson search combination in Summer 2011', + 'title': ( + 'Procedure for the LHC Higgs boson search combination' + ' in Summer 2011' + ), }, 'collaborations': [ 'ATLAS Collaboration', 'CMS Collaboration', ], 'misc': [ - 'The LHC Higgs Combination Group Collaboration Tech. Rep CERN, Geneva, Aug', + ( + 'The LHC Higgs Combination Group Collaboration Tech.' + ' Rep CERN, Geneva, Aug' + ), ], 'label': '59', 'publication_info': { @@ -1722,7 +1740,7 @@ def test_references_from_999C5u_as_cds_system_identifiers(): }, ], }, - 'curated_relation': False + 'curated_relation': False, } ] result = hep.do(create_record(snippet)) @@ -1739,7 +1757,10 @@ def test_references_from_999C5u_as_cds_system_identifiers(): 'h': [ u'Aad, G.', ], - 'm': 'The LHC Higgs Combination Group Collaboration Tech. Rep CERN, Geneva, Aug', + 'm': ( + 'The LHC Higgs Combination Group Collaboration Tech. Rep CERN,' + ' Geneva, Aug' + ), 'o': '59', '0': 1196797, 'r': [ @@ -1763,17 +1784,16 @@ def test_references_from_999C5u_as_ads_system_identifiers(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 25' - ' Kragh, Helge Bibcode:PhP...17..107K' - ' Pascual Jordan, Varying Gravity, and the Expanding Earth' - ' Phys.Perspect.,17,107' - ' http://adsabs.harvard.edu/abs/2015PhP...17..107K' - ' doi:10.1007/s00016-015-0157-9' - ' 2015' - '' - ) # record/1663135 + snippet = ( # record/1663135 + ' 25 Kragh, Helge' + ' Bibcode:PhP...17..107K Pascual Jordan,' + ' Varying Gravity, and the Expanding Earth Phys.Perspect.,17,107 http://adsabs.harvard.edu/abs/2015PhP...17..107K ' + ' doi:10.1007/s00016-015-0157-9 ' + ' 2015' + ) expected = [ { @@ -1800,7 +1820,7 @@ def test_references_from_999C5u_as_ads_system_identifiers(): ], 'dois': [ '10.1007/s00016-015-0157-9', - ] + ], }, } ] @@ -1834,18 +1854,18 @@ def test_references_from_999C5u_duplicated_u(): schema = load_schema('hep') subschema = schema['properties']['references'] - snippet = ( - '' - ' 25' - ' Kragh, Helge Bibcode:PhP...17..107K' - ' Pascual Jordan, Varying Gravity, and the Expanding Earth' - ' Phys.Perspect.,17,107' - ' http://adsabs.harvard.edu/abs/2015PhP...17..107K' - ' http://adsabs.harvard.edu/abs/2015PhP...17..107K' - ' doi:10.1007/s00016-015-0157-9' - ' 2015' - '' - ) # record/1663135 + snippet = ( # record/1663135 + ' 25 Kragh, Helge' + ' Bibcode:PhP...17..107K Pascual Jordan,' + ' Varying Gravity, and the Expanding Earth Phys.Perspect.,17,107 http://adsabs.harvard.edu/abs/2015PhP...17..107K ' + ' http://adsabs.harvard.edu/abs/2015PhP...17..107K ' + ' doi:10.1007/s00016-015-0157-9 ' + ' 2015' + ) expected = [ { @@ -1872,7 +1892,7 @@ def test_references_from_999C5u_duplicated_u(): ], 'dois': [ '10.1007/s00016-015-0157-9', - ] + ], }, } ] diff --git a/tests/test_hep_bdFFT.py b/tests/test_hep_bdFFT.py index 40c6bb12..f4ef68fe 100644 --- a/tests/test_hep_bdFFT.py +++ b/tests/test_hep_bdFFT.py @@ -23,42 +23,35 @@ from __future__ import absolute_import, division, print_function import pytest - from dojson.contrib.marc21.utils import create_record - from flask import current_app +from inspire_schemas.api import load_schema, validate from mock import patch from inspire_dojson.hep import hep, hep2marc -from inspire_schemas.api import load_schema, validate -@pytest.fixture -def legacy_afs_service_config(): - config = { - 'LABS_AFS_HTTP_SERVICE': 'http://legacy-afs-web' - } +@pytest.fixture() +def _legacy_afs_service_config(): + config = {'LABS_AFS_HTTP_SERVICE': 'http://legacy-afs-web'} with patch.dict(current_app.config, config): yield -def test_documents_from_FFT(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_documents_from_FFT(): schema = load_schema('hep') subschema = schema['properties']['documents'] - snippet = ( - '' - ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' - ' ' - ' .pdf' - ' arXiv:1710.01187' - ' ' - ' 2017-10-04 09:42:00' - ' Main' - ' 1' - ' ' - '' - ) # record/1628455 + snippet = ( # record/1628455 + ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' + '' + ' .pdf arXiv:1710.01187 2017-10-04 09:42:00 Main ' + ' 1 ' + ) expected = [ { @@ -73,23 +66,20 @@ def test_documents_from_FFT(legacy_afs_service_config): assert 'figures' not in result -def test_documents_from_FFT_special_cases_arxiv_properly(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_documents_from_FFT_special_cases_arxiv_properly(): schema = load_schema('hep') subschema = schema['properties']['documents'] - snippet = ( - '' - ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;2' - ' ' - ' .pdf' - ' arXiv:1710.01187' - ' ' - ' 2017-12-06 03:34:26' - ' arXiv' - ' 2' - ' ' - '' - ) # record/1628455 + snippet = ( # record/1628455 + ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;2' + '' + ' .pdf arXiv:1710.01187 2017-12-06 03:34:26 arXiv ' + ' 2 ' + ) expected = [ { @@ -106,36 +96,28 @@ def test_documents_from_FFT_special_cases_arxiv_properly(legacy_afs_service_conf assert 'figures' not in result -def test_documents_are_unique_from_FFT(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_documents_are_unique_from_FFT(): schema = load_schema('hep') subschema = schema['properties']['documents'] - snippet = ( - '' - ' ' - ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' - ' ' - ' .pdf' - ' arXiv:1710.01187' - ' ' - ' 2017-10-04 09:42:00' - ' Main' - ' 1' - ' ' - ' ' - ' ' - ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' - ' ' - ' .pdf' - ' arXiv:1710.01187' - ' ' - ' 2017-10-04 09:42:00' - ' Main' - ' 1' - ' ' - ' ' - '' - ) # record/1628455 + snippet = ( # record/1628455 + ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' + '' + ' .pdf arXiv:1710.01187 2017-10-04 09:42:00 Main' + ' 1 ' + ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' + '' + ' .pdf arXiv:1710.01187 2017-10-04 09:42:00 Main' + ' 1 ' + ' ' + ) expected = [ { @@ -154,23 +136,21 @@ def test_documents_are_unique_from_FFT(legacy_afs_service_config): assert 'figures' not in result -def test_figures_from_FFT(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_figures_from_FFT(): schema = load_schema('hep') subschema = schema['properties']['figures'] - snippet = ( - '' - ' /opt/cds-invenio/var/data/files/g151/3037399/content.png;1' - ' 00009 Co-simulation results, at $50~\\mathrm{ms}$...' - ' .png' - ' FIG10' - ' ' - ' 2017-10-04 07:54:54' - ' Main' - ' 1' - ' ' - '' - ) # record/1628455 + snippet = ( # record/1628455 + ' /opt/cds-invenio/var/data/files/g151/3037399/content.png;1' + '' + ' 00009 Co-simulation results, at' + ' $50~\\mathrm{ms}$... .png ' + ' FIG10 2017-10-04 07:54:54 Main ' + ' 1 ' + ) expected = [ { @@ -187,47 +167,38 @@ def test_figures_from_FFT(legacy_afs_service_config): assert 'documents' not in result -def test_figures_order_from_FFT(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_figures_order_from_FFT(): schema = load_schema('hep') subschema = schema['properties']['figures'] - snippet = ( - '' - ' ' - ' /opt/cds-invenio/var/data/files/g151/3037400/content.png;1' - ' 00010 Co-simulation results, at $50~\\mathrm{ms}$...' - ' .png' - ' FIG11' - ' ' - ' 2017-10-04 07:54:54' - ' Main' - ' 1' - ' ' - ' ' - ' ' - ' /opt/cds-invenio/var/data/files/g151/3037399/content.png;1' - ' 00009 Co-simulation results, at $50~\\mathrm{ms}$...' - ' .png' - ' FIG10' - ' ' - ' 2017-10-04 07:54:54' - ' Main' - ' 1' - ' ' - ' ' - ' ' - ' /opt/cds-invenio/var/data/files/g151/3037401/content.png;1' - ' 00011 Co-simulation results, at $50~\\mathrm{ms}$...' - ' .png' - ' FIG12' - ' ' - ' 2017-10-04 07:54:54' - ' Main' - ' 1' - ' ' - ' ' - '' - ) # record/1628455 + snippet = ( # record/1628455 + ' /opt/cds-invenio/var/data/files/g151/3037400/content.png;1' + '' + ' 00010 Co-simulation results, at' + ' $50~\\mathrm{ms}$... .png ' + ' FIG11 2017-10-04 07:54:54 Main' + ' 1 ' + ' /opt/cds-invenio/var/data/files/g151/3037399/content.png;1' + '' + ' 00009 Co-simulation results, at' + ' $50~\\mathrm{ms}$... .png ' + ' FIG10 2017-10-04 07:54:54 Main' + ' 1 ' + ' /opt/cds-invenio/var/data/files/g151/3037401/content.png;1' + '' + ' 00011 Co-simulation results, at' + ' $50~\\mathrm{ms}$... .png ' + ' FIG12 2017-10-04 07:54:54 Main' + ' 1 ' + ' ' + ) expected = [ { @@ -247,7 +218,7 @@ def test_figures_order_from_FFT(legacy_afs_service_config): 'caption': 'Co-simulation results, at $50~\\mathrm{ms}$...', 'url': 'http://legacy-afs-web/var/data/files/g151/3037401/content.png%3B1', 'source': 'arxiv', - } + }, ] result = hep.do(create_record(snippet)) assert validate(result['figures'], subschema) is None @@ -256,19 +227,16 @@ def test_figures_order_from_FFT(legacy_afs_service_config): def test_documents_from_FFT_ignores_context(): - snippet = ( - '' - ' /opt/cds-invenio/var/data/files/g148/2964970/content.png;context;1' - ' ' - ' .png;context' - ' TNR' - ' ' - ' 2017-07-19 09:29:27' - ' Main' - ' 1' - ' HIDDEN' - '' - ) # record/1610503 + snippet = ( # record/1610503 + ' /opt/cds-invenio/var/data/files/g148/2964970/content.png;context;1' + '' + ' .png;context TNR 2017-07-19' + ' 09:29:27 Main 1 HIDDEN' + ) result = hep.do(create_record(snippet)) @@ -280,19 +248,18 @@ def test_documents_from_FFT_does_not_require_s(): schema = load_schema('hep') subschema = schema['properties']['documents'] - snippet = ( - '' - ' http://www.mdpi.com/2218-1997/3/1/24/pdf' - ' Fulltext' - ' INSPIRE-PUBLIC' - '' - ) # DESY harvest + snippet = ( # DESY harvest + ' http://www.mdpi.com/2218-1997/3/1/24/pdf ' + ' Fulltext INSPIRE-PUBLIC' + ) expected = [ { 'key': 'document', 'fulltext': True, - 'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf' + 'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf', } ] result = hep.do(create_record(snippet)) @@ -315,13 +282,14 @@ def test_documents_from_FFT_does_not_require_s(): def test_fft_from_FFT_percent_percent(): - snippet = ( - '' - ' /opt/cds-invenio/var/tmp-shared/apsharvest_unzip_5dGfY5/articlebag-10-1103-PhysRevD-87-083514-apsxml/data/PhysRevD.87.083514/fulltext.xml' - ' HIDDEN' - ' APS' - '' - ) # record/1094156 + snippet = ( # record/1094156 + ' /opt/cds-invenio/var/tmp-shared/apsharvest_unzip_5dGfY5/' + ' articlebag-10-1103-PhysRevD-87-083514-apsxml/data/PhysRevD.87.083514/' + 'fulltext.xml' + ' HIDDEN APS' + ) result = hep.do(create_record(snippet)) assert 'documents' not in result @@ -349,7 +317,9 @@ def test_documents_to_FFT(): expected = [ { - 'a': 'http://localhost:5000/api/files/1234-1234-1234-1234/some_document.pdf', + 'a': ( + 'http://localhost:5000/api/files/1234-1234-1234-1234/some_document.pdf' + ), 'd': 'Thesis fulltext', 'f': '.pdf', 't': 'INSPIRE-PUBLIC', @@ -365,7 +335,8 @@ def test_documents_to_FFT(): assert expected == result['FFT'] -def test_documents_to_FFT_converts_afs_urls_to_path(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_documents_to_FFT_converts_afs_urls_to_path(): schema = load_schema('hep') subschema = schema['properties']['documents'] @@ -412,7 +383,7 @@ def test_documents_to_FFT_special_cases_arxiv_properly(): 'material': 'preprint', 'original_url': 'http://export.arxiv.org/pdf/1712.04934', 'source': 'arxiv', - 'url': '/api/files/d82dc015-83ea-4d83-820b-adb7ce1e42d0/1712.04934.pdf' + 'url': '/api/files/d82dc015-83ea-4d83-820b-adb7ce1e42d0/1712.04934.pdf', } ], } # holdingpen/820589 @@ -444,9 +415,11 @@ def test_documents_to_FFT_uses_filename(): "description": "Article from SCOAP3", "filename": "scoap3-fulltext.pdf", "key": "136472d8763496230daa8b6b72fb219a", - "original_url": "http://legacy-afs-web/var/data/files/g206/4135590/content.pdf%3B1", + "original_url": ( + "http://legacy-afs-web/var/data/files/g206/4135590/content.pdf%3B1" + ), "source": "SCOAP3", - "url": "https://s3.cern.ch/inspire-prod-files-1/136472d8763496230daa8b6b72fb219a" + "url": "https://s3.cern.ch/inspire-prod-files-1/136472d8763496230daa8b6b72fb219a", } ] } # literature/1789709 @@ -457,7 +430,7 @@ def test_documents_to_FFT_uses_filename(): 'd': 'Article from SCOAP3', 't': 'SCOAP3', 'n': 'scoap3-fulltext', - 'f': '.pdf' + 'f': '.pdf', } ] @@ -498,7 +471,7 @@ def test_documents_to_FFT_uses_material_as_filename_fallback(): "filename": "document", "fulltext": True, "material": "publication", - } + }, ], } # literature/1852846 @@ -521,7 +494,7 @@ def test_documents_to_FFT_uses_material_as_filename_fallback(): "d": "Fulltext", "n": "document", "t": "INSPIRE-PUBLIC", - } + }, ] assert validate(snippet['documents'], subschema) is None @@ -564,7 +537,8 @@ def test_figures_to_FFT(): assert expected == result['FFT'] -def test_figures_to_FFT_converts_afs_urls_to_paths(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_figures_to_FFT_converts_afs_urls_to_paths(): schema = load_schema('hep') subschema = schema['properties']['figures'] @@ -581,11 +555,13 @@ def test_figures_to_FFT_converts_afs_urls_to_paths(legacy_afs_service_config): expected = [ { - 'a': 'file:///afs/cern.ch/project/inspire/PROD/var/files/some_figure.png%3B1', + 'a': ( + 'file:///afs/cern.ch/project/inspire/PROD/var/files/some_figure.png%3B1' + ), 'd': '00000 This figure illustrates something', 't': 'Plot', 'n': 'some_figure', - 'f': '.png' + 'f': '.png', } ] @@ -602,7 +578,18 @@ def test_figures_to_FFT_uses_filename(): snippet = { 'figures': [ { - "caption": "(Color online) (a) Comparison between the function $f_\\Gamma(Q\\xi)$ (\\ref{eq:fgamma}) and the Kawasaki function defined in footnote \\ref{footnote:kawasaki}. Large-$Q$ modes relax faster with $f_\\Gamma$ than with $K$. (b) Illustration of the contribution $\\Delta s_Q$ to the entropy density by a single slow mode with wave number $Q$ ($x{\\,\\equiv\\,}\\phi_Q/\\bar\\phi_Q$). $\\Delta s_Q$ is negative whether $\\phi_Q$ is below or above its equilibrium value ({\\it cf.} Eq.~(\\ref{eq:deltas}) below).", + "caption": ( + "(Color online) (a) Comparison between the function" + " $f_\\Gamma(Q\\xi)$ (\\ref{eq:fgamma}) and the Kawasaki" + " function defined in footnote \\ref{footnote:kawasaki}." + " Large-$Q$ modes relax faster with $f_\\Gamma$ than with" + " $K$. (b) Illustration of the contribution $\\Delta s_Q$" + " to the entropy density by a single slow mode with wave" + " number $Q$ ($x{\\,\\equiv\\,}\\phi_Q/\\bar\\phi_Q$)." + " $\\Delta s_Q$ is negative whether $\\phi_Q$ is below or" + " above its equilibrium value ({\\it cf.}" + " Eq.~(\\ref{eq:deltas}) below)." + ), "filename": "plot_functions.png", "key": "b43cbd4ccd7cceb3a30d2b80894101d1", "source": "arxiv", @@ -614,7 +601,17 @@ def test_figures_to_FFT_uses_filename(): expected = [ { 'a': 'https://s3.cern.ch/inspire-prod-files-b/b43cbd4ccd7cceb3a30d2b80894101d1', - 'd': '00000 (Color online) (a) Comparison between the function $f_\\Gamma(Q\\xi)$ (\\ref{eq:fgamma}) and the Kawasaki function defined in footnote \\ref{footnote:kawasaki}. Large-$Q$ modes relax faster with $f_\\Gamma$ than with $K$. (b) Illustration of the contribution $\\Delta s_Q$ to the entropy density by a single slow mode with wave number $Q$ ($x{\\,\\equiv\\,}\\phi_Q/\\bar\\phi_Q$). $\\Delta s_Q$ is negative whether $\\phi_Q$ is below or above its equilibrium value ({\\it cf.} Eq.~(\\ref{eq:deltas}) below).', + 'd': ( + '00000 (Color online) (a) Comparison between the function' + ' $f_\\Gamma(Q\\xi)$ (\\ref{eq:fgamma}) and the Kawasaki' + ' function defined in footnote \\ref{footnote:kawasaki}.' + ' Large-$Q$ modes relax faster with $f_\\Gamma$ than with $K$.' + ' (b) Illustration of the contribution $\\Delta s_Q$ to the' + ' entropy density by a single slow mode with wave number $Q$' + ' ($x{\\,\\equiv\\,}\\phi_Q/\\bar\\phi_Q$). $\\Delta s_Q$ is' + ' negative whether $\\phi_Q$ is below or above its equilibrium' + ' value ({\\it cf.} Eq.~(\\ref{eq:deltas}) below).' + ), 't': 'Plot', 'n': 'plot_functions', 'f': '.png', @@ -628,29 +625,43 @@ def test_figures_to_FFT_uses_filename(): assert expected == result['FFT'] -def test_figures_from_FFT_generates_valid_uri(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_figures_from_FFT_generates_valid_uri(): schema = load_schema('hep') subschema = schema['properties']['figures'] - snippet = ( - '' - ' /opt/cds-invenio/var/data/files/g83/1678426/FKLP new_VF.png;1' - ' 00000 Inflationary potential ${g^{2}\\vp^{2}\\over 2} (1-a\\vp+b\\vp^{2})^2$ (\\ref{three}), for $a = 0.1$, $b = 0.0035$. The field is shown in Planck units, the potential $V$ is shown in units $g^{2}$. In realistic models of that type, $g \\sim 10^{-5} - 10^{-6}$ in Planck units, depending on details of the theory, so the height of the potential in this figure is about $10^{-10}$ in Planck units.' - ' .png' - ' FKLP new_VF' - ' ' - ' 2013-10-22 05:04:33' - ' Plot' - ' 1' - ' ' - '' - ) # record/1245001 + snippet = ( # record/1245001 + ' /opt/cds-invenio/var/data/files/g83/1678426/FKLP' + ' new_VF.png;1 00000 Inflationary' + ' potential ${g^{2}\\vp^{2}\\over 2} (1-a\\vp+b\\vp^{2})^2$ ' + ' (\\ref{three}), for $a = 0.1$, $b = 0.0035$. The field is shown in' + ' Planck units, the potential $V$ is shown in units $g^{2}$. In' + ' realistic models of that type, $g \\sim 10^{-5} - 10^{-6}$ in Planck' + ' units, depending on details of the theory, so the height of the' + ' potential in this figure is about $10^{-10}$ in Planck' + ' units. .png FKLP new_VF ' + ' 2013-10-22 05:04:33 Plot 1 ' + ) expected = [ { 'key': 'FKLP new_VF.png', - 'caption': 'Inflationary potential ${g^{2}\\vp^{2}\\over 2} (1-a\\vp+b\\vp^{2})^2$ (\\ref{three}), for $a = 0.1$, $b = 0.0035$. The field is shown in Planck units, the potential $V$ is shown in units $g^{2}$. In realistic models of that type, $g \\sim 10^{-5} - 10^{-6}$ in Planck units, depending on details of the theory, so the height of the potential in this figure is about $10^{-10}$ in Planck units.', - 'url': 'http://legacy-afs-web/var/data/files/g83/1678426/FKLP%20new_VF.png%3B1', + 'caption': ( + 'Inflationary potential ${g^{2}\\vp^{2}\\over 2}' + ' (1-a\\vp+b\\vp^{2})^2$ (\\ref{three}), for $a = 0.1$, $b =' + ' 0.0035$. The field is shown in Planck units, the potential' + ' $V$ is shown in units $g^{2}$. In realistic models of that' + ' type, $g \\sim 10^{-5} - 10^{-6}$ in Planck units, depending' + ' on details of the theory, so the height of the potential in' + ' this figure is about $10^{-10}$ in Planck units.' + ), + 'url': ( + 'http://legacy-afs-web/var/data/files/g83/1678426/FKLP%20new_VF.png%3B1' + ), 'source': 'arxiv', } ] @@ -662,7 +673,15 @@ def test_figures_from_FFT_generates_valid_uri(legacy_afs_service_config): expected = [ { 'a': 'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g83/1678426/FKLP%20new_VF.png%3B1', - 'd': '00000 Inflationary potential ${g^{2}\\vp^{2}\\over 2} (1-a\\vp+b\\vp^{2})^2$ (\\ref{three}), for $a = 0.1$, $b = 0.0035$. The field is shown in Planck units, the potential $V$ is shown in units $g^{2}$. In realistic models of that type, $g \\sim 10^{-5} - 10^{-6}$ in Planck units, depending on details of the theory, so the height of the potential in this figure is about $10^{-10}$ in Planck units.', + 'd': ( + '00000 Inflationary potential ${g^{2}\\vp^{2}\\over 2}' + ' (1-a\\vp+b\\vp^{2})^2$ (\\ref{three}), for $a = 0.1$, $b =' + ' 0.0035$. The field is shown in Planck units, the potential' + ' $V$ is shown in units $g^{2}$. In realistic models of that' + ' type, $g \\sim 10^{-5} - 10^{-6}$ in Planck units, depending' + ' on details of the theory, so the height of the potential in' + ' this figure is about $10^{-10}$ in Planck units.' + ), 't': 'Plot', 'n': 'FKLP new_VF', 'f': '.png', @@ -673,35 +692,29 @@ def test_figures_from_FFT_generates_valid_uri(legacy_afs_service_config): assert expected == result['FFT'] -def test_figures_and_documents_from_FFT_without_d_subfield(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_figures_and_documents_from_FFT_without_d_subfield(): schema = load_schema('hep') figures_subschema = schema['properties']['figures'] documents_subschema = schema['properties']['documents'] - snippet = ( - '' - ' ' - ' /opt/cds-invenio/var/data/files/g151/3037399/content.png;1' - ' .png' - ' FIG10' - ' ' - ' 2017-10-04 07:54:54' - ' Main' - ' 1' - ' ' - ' ' - ' ' - ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' - ' .pdf' - ' arXiv:1710.01187' - ' ' - ' 2017-10-04 09:42:00' - ' Main' - ' 1' - ' ' - ' ' - '' - ) # record/1628455 + snippet = ( # record/1628455 + ' /opt/cds-invenio/var/data/files/g151/3037399/content.png;1' + '' + ' .png FIG10 ' + ' 2017-10-04 07:54:54 ' + ' Main 1 ' + ' ' + ' /opt/cds-invenio/var/data/files/g151/3037619/content.pdf;1' + '' + ' .pdf arXiv:1710.01187 2017-10-04 09:42:00 Main' + ' 1 ' + ' ' + ) expected_figures = [ { @@ -729,27 +742,33 @@ def test_figures_and_documents_from_FFT_without_d_subfield(legacy_afs_service_co assert expected_documents == result['documents'] -def test_figures_from_FFT_with_composite_file_extension(legacy_afs_service_config): +@pytest.mark.usefixtures("_legacy_afs_service_config") +def test_figures_from_FFT_with_composite_file_extension(): schema = load_schema('hep') subschema = schema['properties']['figures'] - snippet = ( - '' - ' /opt/cds-invenio/var/data/files/g22/457549/266.stripe82.jpg.png;1' - ' 00011 Examples of relaxed early-types (top three rows) and galaxies classified as late-type (bottom three rows). We show both the multi-colour standard-depth image (left-hand column) and itsdeeper Stripe82 counterpart (right-hand column).' - ' .jpg.png' - ' 266.stripe82' - ' ' - ' 2010-10-09 23:23:31' - ' Plot' - ' 1' - ' ' - '' - ) # record/852500 + snippet = ( # record/852500 + ' /opt/cds-invenio/var/data/files/g22/457549/266.stripe82.jpg.png;1' + '' + ' 00011 Examples of relaxed early-types (top three rows)' + ' and galaxies classified as late-type (bottom three rows). We show both the' + ' multi-colour standard-depth image (left-hand column) and itsdeeper Stripe82' + ' counterpart (right-hand column). .jpg.png 266.stripe82 ' + ' 2010-10-09' + ' 23:23:31 Plot 1 ' + ) expected = [ { - 'caption': 'Examples of relaxed early-types (top three rows) and galaxies classified as late-type (bottom three rows). We show both the multi-colour standard-depth image (left-hand column) and itsdeeper Stripe82 counterpart (right-hand column).', + 'caption': ( + 'Examples of relaxed early-types (top three rows) and galaxies' + ' classified as late-type (bottom three rows). We show both the' + ' multi-colour standard-depth image (left-hand column) and itsdeeper' + ' Stripe82 counterpart (right-hand column).' + ), 'key': '266.stripe82.jpg.png', 'url': 'http://legacy-afs-web/var/data/files/g22/457549/266.stripe82.jpg.png%3B1', 'source': 'arxiv', @@ -769,7 +788,20 @@ def test_figures2marc_handles_unicode(): record = { 'figures': [ { - 'caption': u'Hard gaps. (a) Differential conductance $G_S$ of an epitaxial nanowire device as a function of backgate voltage $V_{BG}$ and source\u00d0drain voltage $V_{SD}$. Increasing $V_{BG}$, the conductance increases from the tunneling to the Andreev regime (orange and blue plots in the bottom). Adapted from Ref. \\cite{Chang2015}. (b) Subgap conductance $G_s$ as a function of the normal (above-gap) conductance $G_n$. Red curve is the theory prediction for a single channel NS contact, Eq. (\\ref{NS-Andreev}). Inset shows different $dI/dV$ taken at different values of $G_n$. Adapted from Ref. \\cite{Zhang2016}.', + 'caption': ( + u'Hard gaps. (a) Differential conductance $G_S$ of an' + u' epitaxial nanowire device as a function of backgate' + u' voltage $V_{BG}$ and source\u00d0drain voltage $V_{SD}$.' + u' Increasing $V_{BG}$, the conductance increases from the' + u' tunneling to the Andreev regime (orange and blue plots' + u' in the bottom). Adapted from Ref. \\cite{Chang2015}. (b)' + u' Subgap conductance $G_s$ as a function of the normal' + u' (above-gap) conductance $G_n$. Red curve is the theory' + u' prediction for a single channel NS contact, Eq.' + u' (\\ref{NS-Andreev}). Inset shows different $dI/dV$ taken' + u' at different values of $G_n$. Adapted from Ref.' + u' \\cite{Zhang2016}.' + ), 'key': 'Fig21.png', 'label': 'fig:21', 'material': 'preprint', @@ -783,7 +815,19 @@ def test_figures2marc_handles_unicode(): expected = [ { 'a': 'http://localhost:5000/api/files/feb489f4-7e13-4ca4-b51c-2c8c2242d596/Fig21.png', - 'd': u'00000 Hard gaps. (a) Differential conductance $G_S$ of an epitaxial nanowire device as a function of backgate voltage $V_{BG}$ and source\xd0drain voltage $V_{SD}$. Increasing $V_{BG}$, the conductance increases from the tunneling to the Andreev regime (orange and blue plots in the bottom). Adapted from Ref. \\cite{Chang2015}. (b) Subgap conductance $G_s$ as a function of the normal (above-gap) conductance $G_n$. Red curve is the theory prediction for a single channel NS contact, Eq. (\\ref{NS-Andreev}). Inset shows different $dI/dV$ taken at different values of $G_n$. Adapted from Ref. \\cite{Zhang2016}.', + 'd': ( + u'00000 Hard gaps. (a) Differential conductance $G_S$ of an' + u' epitaxial nanowire device as a function of backgate voltage' + u' $V_{BG}$ and source\xd0drain voltage $V_{SD}$. Increasing' + u' $V_{BG}$, the conductance increases from the tunneling to' + u' the Andreev regime (orange and blue plots in the bottom).' + u' Adapted from Ref. \\cite{Chang2015}. (b) Subgap conductance' + u' $G_s$ as a function of the normal (above-gap) conductance' + u' $G_n$. Red curve is the theory prediction for a single' + u' channel NS contact, Eq. (\\ref{NS-Andreev}). Inset shows' + u' different $dI/dV$ taken at different values of $G_n$.' + u' Adapted from Ref. \\cite{Zhang2016}.' + ), 't': 'Plot', 'n': 'Fig21', 'f': '.png', @@ -799,19 +843,17 @@ def test_documents_from_FFT_without_t_subfield(): subschema = schema['properties']['documents'] snippet = ( - "" - " http://scoap3.iop.org/article/doi/10.1088/1674-1137/43/1/013104?format=pdf" - " .pdf" - " fulltext" - "" + " http://scoap3.iop.org/article/doi/10.1088/1674-1137/43/1/013104?format=pdf" + " .pdf fulltext" ) expected = [ { 'url': 'http://scoap3.iop.org/article/doi/10.1088/1674-1137/43/1/013104?format=pdf', - 'key': 'fulltext.pdf' + 'key': 'fulltext.pdf', } - ] result = hep.do(create_record(snippet)) assert validate(result['documents'], subschema) is None diff --git a/tests/test_hep_model.py b/tests/test_hep_model.py index 00cdeb54..8193753b 100644 --- a/tests/test_hep_model.py +++ b/tests/test_hep_model.py @@ -23,18 +23,16 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.hep import hep -from inspire_schemas.api import load_schema, validate def test_ensure_curated(): schema = load_schema('hep') subschema = schema['properties']['curated'] - snippet = ( - '' - ) # synthetic data + snippet = '' # synthetic data expected = True result = hep.do(create_record(snippet)) @@ -47,12 +45,12 @@ def test_ensure_curated_when_500_present(): schema = load_schema('hep') subschema = schema['properties']['curated'] - snippet = ( + snippet = ( # record/1450044 '' ' arXiv' ' 5 pages' '' - ) # record/1450044 + ) expected = True result = hep.do(create_record(snippet)) @@ -62,12 +60,13 @@ def test_ensure_curated_when_500_present(): def test_set_citeable_when_not_citeable(): - snippet = ( - '' - ' 152-61' - ' Proc. of Athens Topical Conference on Recently Discovered Resonant Particles, Athens, Ohio, 1963. Athens, Ohio, Ohio U., 1963. p. 152-61' - '' - ) # record/59 + snippet = ( # record/59 + ' 152-61 Proc. of Athens' + ' Topical Conference on Recently Discovered Resonant Particles, Athens,' + ' Ohio, 1963. Athens, Ohio, Ohio U., 1963. p.' + ' 152-61' + ) result = hep.do(create_record(snippet)) @@ -78,7 +77,7 @@ def test_set_citeable_when_citeable(): schema = load_schema('hep') subschema = schema['properties']['citeable'] - snippet = ( + snippet = ( # record/4328 '' ' Nucl.Phys.' ' 22' @@ -86,7 +85,7 @@ def test_set_citeable_when_citeable(): ' 1961' ' 1214548' '' - ) # record/4328 + ) expected = True result = hep.do(create_record(snippet)) diff --git a/tests/test_hepnames.py b/tests/test_hepnames.py index 3bc68b47..c428c02b 100644 --- a/tests/test_hepnames.py +++ b/tests/test_hepnames.py @@ -23,12 +23,10 @@ from __future__ import absolute_import, division, print_function import pytest - from dojson.contrib.marc21.utils import create_record - -from inspire_dojson.hepnames import hepnames2marc, hepnames from inspire_schemas.api import load_schema, validate +from inspire_dojson.hepnames import hepnames, hepnames2marc EXPERIMENTS_DATA = [ [ @@ -42,23 +40,27 @@ current ''', - [{ - 'curated_relation': True, - 'current': True, - 'end_date': '2020', - 'name': 'CERN-ALPHA', - 'record': { - '$ref': 'http://localhost:5000/api/experiments/1', - }, - 'start_date': '2014', - }], - [{ - '0': 1, - 'd': '2020', - 'e': 'CERN-ALPHA', - 's': '2014', - 'z': 'current', - }], + [ + { + 'curated_relation': True, + 'current': True, + 'end_date': '2020', + 'name': 'CERN-ALPHA', + 'record': { + '$ref': 'http://localhost:5000/api/experiments/1', + }, + 'start_date': '2014', + } + ], + [ + { + '0': 1, + 'd': '2020', + 'e': 'CERN-ALPHA', + 's': '2014', + 'z': 'current', + } + ], ], [ 'current_curated_hidden', @@ -72,25 +74,29 @@ HIDDEN ''', - [{ - 'curated_relation': True, - 'current': True, - 'end_date': '2020', - 'name': 'CERN-ALPHA', - 'record': { - '$ref': 'http://localhost:5000/api/experiments/1', - }, - 'start_date': '2014', - 'hidden': True - }], - [{ - '0': 1, - 'd': '2020', - 'e': 'CERN-ALPHA', - 's': '2014', - 'z': 'current', - 'h': 'HIDDEN', - }], + [ + { + 'curated_relation': True, + 'current': True, + 'end_date': '2020', + 'name': 'CERN-ALPHA', + 'record': { + '$ref': 'http://localhost:5000/api/experiments/1', + }, + 'start_date': '2014', + 'hidden': True, + } + ], + [ + { + '0': 1, + 'd': '2020', + 'e': 'CERN-ALPHA', + 's': '2014', + 'z': 'current', + 'h': 'HIDDEN', + } + ], ], [ 'notcurrent_curated', @@ -100,18 +106,22 @@ 3 ''', - [{ - 'curated_relation': True, - 'current': False, - 'name': 'SDSS', - 'record': { - '$ref': 'http://localhost:5000/api/experiments/3', - }, - }], - [{ - '0': 3, - 'e': 'SDSS', - }], + [ + { + 'curated_relation': True, + 'current': False, + 'name': 'SDSS', + 'record': { + '$ref': 'http://localhost:5000/api/experiments/3', + }, + } + ], + [ + { + '0': 3, + 'e': 'SDSS', + } + ], ], [ 'notcurrent_notcurated', @@ -120,14 +130,18 @@ NOTCURATED ''', - [{ - 'name': 'NOTCURATED', - 'curated_relation': False, - 'current': False, - }], - [{ - 'e': 'NOTCURATED', - }], + [ + { + 'name': 'NOTCURATED', + 'curated_relation': False, + 'current': False, + } + ], + [ + { + 'e': 'NOTCURATED', + } + ], ], [ 'repeated_experiment', @@ -215,7 +229,7 @@ 'record': { '$ref': 'http://localhost:5000/api/experiments/2', }, - 'start_date': '2015' + 'start_date': '2015', }, ], [ @@ -237,7 +251,7 @@ @pytest.mark.parametrize( - 'test_name,xml_snippet,expected_json,expected_marc', + ('test_name', 'xml_snippet', 'expected_json', 'expected_marc'), EXPERIMENTS_DATA, ids=[test_data[0] for test_data in EXPERIMENTS_DATA], ) @@ -261,7 +275,7 @@ def test_ids_from_double_035__a_9(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/984519 '' ' ' ' INSPIRE-00134135' @@ -272,7 +286,7 @@ def test_ids_from_double_035__a_9(): ' BAI' ' ' '' - ) # record/984519 + ) expected = [ { @@ -290,14 +304,8 @@ def test_ids_from_double_035__a_9(): assert expected == result['ids'] expected = [ - { - 'a': 'H.Vogel.1', - '9': 'BAI' - }, - { - 'a': 'INSPIRE-00134135', - '9': 'INSPIRE' - }, + {'a': 'H.Vogel.1', '9': 'BAI'}, + {'a': 'INSPIRE-00134135', '9': 'INSPIRE'}, ] result = hepnames2marc.do(result) @@ -344,12 +352,12 @@ def test_ids_from_035__a_9_with_cern(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1064570 '' ' CERN' ' CERN-622961' '' - ) # record/1064570 + ) expected = [ { @@ -362,12 +370,7 @@ def test_ids_from_035__a_9_with_cern(): assert validate(result['ids'], subschema) is None assert expected == result['ids'] - expected = [ - { - '9': 'CERN', - 'a': 'CERN-622961' - } - ] + expected = [{'9': 'CERN', 'a': 'CERN-622961'}] result = hepnames2marc.do(result) assert expected == result['035'] @@ -460,12 +463,12 @@ def test_ids_from_035__a_9_with_desy(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/993224 '' ' DESY-1001805' ' DESY' '' - ) # record/993224 + ) expected = [ { @@ -493,12 +496,12 @@ def test_ids_from_035__a_9_with_wikipedia(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/985898 '' ' Wikipedia' ' Guido_Tonelli' '' - ) # record/985898 + ) expected = [ { @@ -526,12 +529,12 @@ def test_ids_from_035__a_9_with_slac(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1028379 '' ' SLAC' ' SLAC-218626' '' - ) # record/1028379 + ) expected = [ { @@ -559,11 +562,11 @@ def test_ids_from_035__a_with_bai(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1464894 '' ' Jian.Long.Han.1' '' - ) # record/1464894 + ) expected = [ { @@ -591,7 +594,7 @@ def test_ids_from_double_035__a_9_with_kaken(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1474271 '' ' ' ' BAI' @@ -602,7 +605,7 @@ def test_ids_from_double_035__a_9_with_kaken(): ' 70139070' ' ' '' - ) # record/1474271 + ) expected = [ { @@ -638,12 +641,12 @@ def test_ids_from_035__a_9_with_googlescholar(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1467553 '' ' GoogleScholar' ' Tnl-9KoAAAAJ' '' - ) # record/1467553 + ) expected = [ { @@ -671,12 +674,12 @@ def test_ids_from_035__a_9_with_viaf(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1008109 '' ' VIAF' ' 34517183' '' - ) # record/1008109 + ) expected = [ { @@ -704,12 +707,12 @@ def test_ids_from_035__a_9_with_researcherid(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1051026 '' ' RESEARCHERID' ' B-4717-2008' '' - ) # record/1051026 + ) expected = [ { @@ -737,12 +740,12 @@ def test_ids_from_035__a_9_with_scopus(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1017182 '' ' SCOPUS' ' 7103280792' '' - ) # record/1017182 + ) expected = [ { @@ -768,10 +771,11 @@ def test_ids_from_035__a_9_with_scopus(): def test_ids_from_035__9(): snippet = ( + # record/edit/?ln=en#state=edit&recid=1474355&recrev=20160707223728 '' ' ' ' INSPIRE' - ' ' # record/edit/?ln=en#state=edit&recid=1474355&recrev=20160707223728 + ' ' ' ' ' CERN' ' ' # record/1364570 @@ -790,7 +794,7 @@ def test_ids_from_035__a_z_same_field_9(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1709705 '' ' ' ' INSPIRE' @@ -806,7 +810,7 @@ def test_ids_from_035__a_z_same_field_9(): ' Yen.Chen.Pan.1' ' ' '' - ) # record/1709705 + ) expected = [ { @@ -858,7 +862,7 @@ def test_ids_from_035__a_z_different_fields_9(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1357501 '' ' 1357501' ' ' @@ -878,7 +882,7 @@ def test_ids_from_035__a_z_different_fields_9(): ' 0000-0001-5010-7517' ' ' '' - ) # record/1357501 + ) expected = [ { @@ -930,13 +934,13 @@ def test_name_from_100__a_g_q(): schema = load_schema('authors') subschema = schema['properties']['name'] - snippet = ( + snippet = ( # record/1019100 '' ' Abarbanel, Henry D.I.' ' Henry D.I. Abarbanel' ' ACTIVE' '' - ) # record/1019100 + ) expected = { 'value': 'Abarbanel, Henry D.I.', @@ -961,12 +965,12 @@ def test_name_from_100__g_q_populates_value_from_preferred_name(): schema = load_schema('authors') subschema = schema['properties']['name'] - snippet = ( + snippet = ( # record/1259075 '' ' ACTIVE' ' Vyacheslav I. Yukalova' '' - ) # record/1259075 + ) expected = { 'preferred_name': 'Vyacheslav I. Yukalova', @@ -991,13 +995,13 @@ def test_title_from_100__a_c_q_discards_default_title(): schema = load_schema('authors') subschema = schema['properties']['name'] - snippet = ( + snippet = ( # record/1270441 '' ' Joosten, Sylvester Johannes' ' title (e.g. Sir)' ' Sylvester Johannes Joosten' '' - ) # record/1270441 + ) expected = { 'preferred_name': 'Sylvester Johannes Joosten', @@ -1021,13 +1025,13 @@ def test_status_from_100__a_g_q(): schema = load_schema('authors') subschema = schema['properties']['status'] - snippet = ( + snippet = ( # record/1019100 '' ' Abarbanel, Henry D.I.' ' Henry D.I. Abarbanel' ' ACTIVE' '' - ) # record/1019100 + ) expected = 'active' result = hepnames.do(create_record(snippet)) @@ -1050,14 +1054,14 @@ def test_birth_date_death_date_from_100__a_d_g_q(): subschema_birth = schema['properties']['birth_date'] subschema_death = schema['properties']['death_date'] - snippet = ( + snippet = ( # record/1017374 '' ' Bardeen, John' ' 1908-05-23 - 1991-01-30' ' DECEASED' ' John Bardeen' '' - ) # record/1017374 + ) expected_birth = '1908-05-23' expected_death = '1991-01-30' @@ -1084,14 +1088,14 @@ def test_birth_date_death_date_from_100__a_d_g_q_only_years(): subschema_birth = schema['properties']['birth_date'] subschema_death = schema['properties']['death_date'] - snippet = ( + snippet = ( # record/983266 '' ' Wolfenstein, Lincoln' ' 1923-2015' ' DECEASED' ' Lincoln Wolfenstein' '' - ) # record/983266 + ) expected_birth = '1923' expected_death = '2015' @@ -1117,14 +1121,14 @@ def test_death_date_from_100__a_d_g_q(): schema = load_schema('authors') subschema = schema['properties']['death_date'] - snippet = ( + snippet = ( # record/1046337 '' ' Blosser, Henry G.' ' -2013-03-20' ' DECEASED' ' Henry G. Blosser' '' - ) # record/1046337 + ) expected = '2013-03-20' result = hepnames.do(create_record(snippet)) @@ -1147,13 +1151,13 @@ def test_name_variants_from_400__triple_a(): schema = load_schema('authors') subschema = schema['properties']['name']['properties']['name_variants'] - snippet = ( + snippet = ( # record/1292399 '' ' Yosef Cohen, Hadar' ' Josef Cohen, Hadar' ' Cohen, Hadar Josef' '' - ) # record/1292399 + ) expected = { 'name_variants': [ @@ -1181,7 +1185,7 @@ def test_advisors_from_701__a_g_i(): schema = load_schema('authors') subschema = schema['properties']['advisors'] - snippet = ( + snippet = ( # record/1474091 '' ' Rivelles, Victor O.' ' PhD' @@ -1189,22 +1193,17 @@ def test_advisors_from_701__a_g_i(): ' 991627' ' 1' '' - ) # record/1474091 + ) expected = [ { 'name': 'Rivelles, Victor O.', 'degree_type': 'phd', - 'ids': [ - { - 'schema': 'INSPIRE ID', - 'value': 'INSPIRE-00120420' - } - ], + 'ids': [{'schema': 'INSPIRE ID', 'value': 'INSPIRE-00120420'}], 'record': { '$ref': 'http://localhost:5000/api/authors/991627', }, - 'curated_relation': True + 'curated_relation': True, }, ] result = hepnames.do(create_record(snippet)) @@ -1228,7 +1227,7 @@ def test_advisors_from_701__a_g_i_h(): schema = load_schema('authors') subschema = schema['properties']['advisors'] - snippet = ( + snippet = ( # synthetic data '' ' Rivelles, Victor O.' ' PhD' @@ -1237,23 +1236,18 @@ def test_advisors_from_701__a_g_i_h(): ' 1' ' HIDDEN' '' - ) # synthetic data + ) expected = [ { 'name': 'Rivelles, Victor O.', 'degree_type': 'phd', - 'ids': [ - { - 'schema': 'INSPIRE ID', - 'value': 'INSPIRE-00120420' - } - ], + 'ids': [{'schema': 'INSPIRE ID', 'value': 'INSPIRE-00120420'}], 'record': { '$ref': 'http://localhost:5000/api/authors/991627', }, 'curated_relation': True, - 'hidden': True + 'hidden': True, }, ] result = hepnames.do(create_record(snippet)) @@ -1278,24 +1272,19 @@ def test_advisors_from_701__a_g_i_orcid(): schema = load_schema('authors') subschema = schema['properties']['advisors'] - snippet = ( + snippet = ( # record/1413663 '' ' Riccioni, Fabio' ' PhD' ' 0000-0003-4702-3632' '' - ) # record/1413663 + ) expected = [ { 'name': 'Riccioni, Fabio', 'degree_type': 'phd', - 'ids': [ - { - 'schema': 'ORCID', - 'value': '0000-0003-4702-3632' - } - ], + 'ids': [{'schema': 'ORCID', 'value': '0000-0003-4702-3632'}], }, ] result = hepnames.do(create_record(snippet)) @@ -1319,33 +1308,25 @@ def test_email_addresses_from_371__a_m_z(): schema = load_schema('authors') subschema = schema['properties']['email_addresses'] - snippet = ( + snippet = ( # record/1222902 '' ' Siegen U.' ' test@hep.physik.uni-siegen.de' ' current' '' - ) # record/1222902 + ) - expected = [ - { - 'current': True, - 'value': 'test@hep.physik.uni-siegen.de' - } - ] + expected = [{'current': True, 'value': 'test@hep.physik.uni-siegen.de'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None assert expected == result['email_addresses'] expected = [ - { - "a": "Siegen U.", - "z": "Current" - }, + {"a": "Siegen U.", "z": "Current"}, { "m": "test@hep.physik.uni-siegen.de", - } + }, ] result = hepnames2marc.do(result) @@ -1357,24 +1338,18 @@ def test_email_addresses_from_371__a_repeated_m_z(): schema = load_schema('authors') subschema = schema['properties']['email_addresses'] - snippet = ( + snippet = ( # record/1019084 '' ' Sao Paulo U.' ' test@usp.br' ' test@fma.if.usp.br' ' Current' '' - ) # record/1019084 + ) expected = [ - { - 'current': True, - 'value': 'test@usp.br' - }, - { - 'current': True, - 'value': 'test@fma.if.usp.br' - }, + {'current': True, 'value': 'test@usp.br'}, + {'current': True, 'value': 'test@fma.if.usp.br'}, ] result = hepnames.do(create_record(snippet)) @@ -1382,16 +1357,13 @@ def test_email_addresses_from_371__a_repeated_m_z(): assert expected == result['email_addresses'] expected = [ - { - "a": "Sao Paulo U.", - "z": "Current" - }, + {"a": "Sao Paulo U.", "z": "Current"}, { "m": "test@usp.br", }, { "m": "test@fma.if.usp.br", - } + }, ] result = hepnames2marc.do(result) @@ -1403,7 +1375,7 @@ def test_email_addresses_from_371__a_o_r_s_t(): schema = load_schema('authors') subschema = schema['properties']['email_addresses'] - snippet = ( + snippet = ( # record/1060782 '' ' IMSc, Chennai' ' test@imsc.res.in' @@ -1411,29 +1383,19 @@ def test_email_addresses_from_371__a_o_r_s_t(): ' 2012' ' 2013' '' - ) # record/1060782 + ) - expected = [ - { - 'current': False, - 'value': 'test@imsc.res.in' - } - ] + expected = [{'current': False, 'value': 'test@imsc.res.in'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None assert expected == result['email_addresses'] expected = [ - { - "a": "IMSc, Chennai", - "s": "2012", - "r": "PD", - "t": "2013" - }, + {"a": "IMSc, Chennai", "s": "2012", "r": "PD", "t": "2013"}, { "o": "test@imsc.res.in", - } + }, ] result = hepnames2marc.do(result) @@ -1445,19 +1407,13 @@ def test_email_addresses_from_595__m(): schema = load_schema('authors') subschema = schema['properties']['email_addresses'] - snippet = ( + snippet = ( # record/1021896 '' ' test@pnnl.gov' '' - ) # record/1021896 + ) - expected = [ - { - 'current': True, - 'hidden': True, - 'value': 'test@pnnl.gov' - } - ] + expected = [{'current': True, 'hidden': True, 'value': 'test@pnnl.gov'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None @@ -1478,19 +1434,13 @@ def test_email_addresses_from_595__o(): schema = load_schema('authors') subschema = schema['properties']['email_addresses'] - snippet = ( + snippet = ( # record/1021896 '' ' test@pnl.gov' '' - ) # record/1021896 + ) - expected = [ - { - 'current': False, - 'hidden': True, - 'value': 'test@pnl.gov' - } - ] + expected = [{'current': False, 'hidden': True, 'value': 'test@pnl.gov'}] result = hepnames.do(create_record(snippet)) assert validate(result['email_addresses'], subschema) is None @@ -1511,11 +1461,11 @@ def test_positions_from_371__a(): schema = load_schema('authors') subschema = schema['properties']['positions'] - snippet = ( + snippet = ( # record/997958 '' ' Aachen, Tech. Hochsch.' '' - ) # record/997958 + ) expected = [ { @@ -1527,9 +1477,7 @@ def test_positions_from_371__a(): assert validate(result['positions'], subschema) is None assert expected == result['positions'] - expected = [ - {'a': 'Aachen, Tech. Hochsch.'} - ] + expected = [{'a': 'Aachen, Tech. Hochsch.'}] result = hepnames2marc.do(result) assert expected == result['371'] @@ -1539,12 +1487,12 @@ def test_positions_from_371__a_z(): schema = load_schema('authors') subschema = schema['properties']['positions'] - snippet = ( + snippet = ( # record/1408378 '' ' Argonne' ' current' '' - ) # record/1408378 + ) expected = [ { @@ -1557,12 +1505,7 @@ def test_positions_from_371__a_z(): assert validate(result['positions'], subschema) is None assert expected == result['positions'] - expected = [ - { - 'a': 'Argonne', - 'z': 'Current' - } - ] + expected = [{'a': 'Argonne', 'z': 'Current'}] result = hepnames2marc.do(result) @@ -1573,13 +1516,13 @@ def test_positions_from_371__a_r_z(): schema = load_schema('authors') subschema = schema['properties']['positions'] - snippet = ( + snippet = ( # record/997958 '' ' Antwerp U.' ' SENIOR' ' Current' '' - ) # record/997958 + ) expected = [ { @@ -1593,13 +1536,7 @@ def test_positions_from_371__a_r_z(): assert validate(result['positions'], subschema) is None assert expected == result['positions'] - expected = [ - { - 'a': 'Antwerp U.', - 'r': 'SENIOR', - 'z': 'Current' - } - ] + expected = [{'a': 'Antwerp U.', 'r': 'SENIOR', 'z': 'Current'}] result = hepnames2marc.do(result) assert expected == result['371'] @@ -1609,21 +1546,21 @@ def test_positions_from_371__a_r_z_h(): schema = load_schema('authors') subschema = schema['properties']['positions'] - snippet = ( + snippet = ( # synthetic data '' ' Antwerp U.' ' SENIOR' ' Current' ' HIDDEN' '' - ) # synthetic data + ) expected = [ { 'current': True, 'institution': 'Antwerp U.', 'rank': 'SENIOR', - 'hidden': True + 'hidden': True, }, ] result = hepnames.do(create_record(snippet)) @@ -1648,23 +1585,21 @@ def test_positions_from_371__a_r_t_z(): schema = load_schema('authors') subschema = schema['properties']['positions'] - snippet = ( + snippet = ( # record/1037568 '' ' San Luis Potosi U.' ' Master' ' 2007' ' 903830' '' - ) # record/1037568 + ) expected = [ { 'end_date': '2007', 'institution': 'San Luis Potosi U.', 'rank': 'MASTER', - 'record': { - '$ref': 'http://localhost:5000/api/institutions/903830' - }, + 'record': {'$ref': 'http://localhost:5000/api/institutions/903830'}, 'curated_relation': True, }, ] @@ -1686,12 +1621,12 @@ def test_positions_from_371__a_r_t_z(): def test_positions_from_371__r_t(): - snippet = ( + snippet = ( # record/1038489 '' ' UG' ' 1970' '' - ) # record/1038489 + ) result = hepnames.do(create_record(snippet)) @@ -1702,13 +1637,13 @@ def test_positions_from_371__a_r_t(): schema = load_schema('authors') subschema = schema['properties']['positions'] - snippet = ( + snippet = ( # record/1590188 '' ' Case Western Reserve U.' ' UNDERGRADUATE' ' 2011' '' - ) # record/1590188 + ) expected = [ { @@ -1738,12 +1673,12 @@ def test_arxiv_categories_from_65017a_2(): schema = load_schema('authors') subschema = schema['properties']['arxiv_categories'] - snippet = ( + snippet = ( # record/1010819 '' ' INSPIRE' ' HEP-TH' '' - ) # record/1010819 + ) expected = [ 'hep-th', @@ -1768,12 +1703,12 @@ def test_arxiv_categories_from_65017a_2_obsolete_category(): schema = load_schema('authors') subschema = schema['properties']['arxiv_categories'] - snippet = ( + snippet = ( # record/1010819 '' ' INSPIRE' ' ATOM-PH' '' - ) # record/1010819 + ) expected = [ 'physics.atom-ph', @@ -1798,12 +1733,12 @@ def test_inspire_categories_from_65017a_2(): schema = load_schema('authors') subschema = schema['properties']['inspire_categories'] - snippet = ( + snippet = ( # record/1271076 '' ' INSPIRE' ' Computing' '' - ) # record/1271076 + ) expected = [ {'term': 'Computing'}, @@ -1828,12 +1763,12 @@ def test_inspire_categories_from_65017a_2_E(): schema = load_schema('authors') subschema = schema['properties']['inspire_categories'] - snippet = ( + snippet = ( # record/1019112 '' ' INSPIRE' ' E' '' - ) # record/1019112 + ) expected = [ {'term': 'Experiment-HEP'}, @@ -1858,15 +1793,12 @@ def test_public_notes_from_667__a(): schema = load_schema('authors') subschema = schema['properties']['public_notes'] - snippet = ( - '' - ' Do not confuse with Acharya, Bannanje Sripath' - '' - ) # record/1018999 + snippet = ( # record/1018999 + ' Do not' + ' confuse with Acharya, Bannanje Sripath' + ) - expected = [ - {'value': 'Do not confuse with Acharya, Bannanje Sripath'} - ] + expected = [{'value': 'Do not confuse with Acharya, Bannanje Sripath'}] result = hepnames.do(create_record(snippet)) assert validate(result['public_notes'], subschema) is None @@ -1882,11 +1814,11 @@ def test_public_notes_from_667__a(): def test_previous_names_from_667__a(): - snippet = ( + snippet = ( # record/1281982 '' ' Formerly Tomoko Furukawa' '' - ) # record/1281982 + ) expected = ['Tomoko Furukawa'] @@ -1901,7 +1833,7 @@ def test_previous_names_to_667__a(): expected = [ {'a': 'Formerly Tomoko Furukawa'}, - {'a': 'Formerly Second previous name'} + {'a': 'Formerly Second previous name'}, ] # record/1281982 @@ -1925,11 +1857,11 @@ def test_awards_from_678__a(): schema = load_schema('authors') subschema = schema['properties']['awards'] - snippet = ( + snippet = ( # record/1050484 '' ' Nobel Prize Physics 2003' ' ' - ) # record/1050484 + ) expected = [ { @@ -1956,12 +1888,12 @@ def test_private_notes_from_595__a_9(): schema = load_schema('authors') subschema = schema['properties']['_private_notes'] - snippet = ( + snippet = ( # record/1050484 '' ' Author prefers Alexandrov, A.S.' ' SPIRES-HIDDEN' '' - ) # record/1050484 + ) expected = [ { @@ -1989,12 +1921,11 @@ def test_private_notes_from_595__double_a(): schema = load_schema('authors') subschema = schema['properties']['_private_notes'] - snippet = ( - '' - ' "I want to hide my personal information on REDACTED" 7/2017' - ' REDACTED' - '' - ) # record/1279232 + snippet = ( # record/1279232 + ' "I want to' + ' hide my personal information on REDACTED" 7/2017 ' + ' REDACTED' + ) expected = [ {'value': '"I want to hide my personal information on REDACTED" 7/2017'}, @@ -2018,18 +1949,14 @@ def test_urls_from_8564_u_and_8564_g_u_y(): schema = load_schema('authors') subschema = schema['properties']['urls'] - snippet = ( - '' - ' ' - ' http://www.haydenplanetarium.org/tyson/' - ' ' - ' ' - ' active' - ' https://twitter.com/neiltyson' - ' TWITTER' - ' ' - '' - ) # record/1073331 + snippet = ( # record/1073331 + ' http://www.haydenplanetarium.org/tyson/ ' + ' active https://twitter.com/neiltyson TWITTER ' + ) expected = [ {'value': 'http://www.haydenplanetarium.org/tyson/'}, @@ -2055,24 +1982,17 @@ def test_ids_from_8564_g_u_y_twitter(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( - '' - ' ' - ' http://www.haydenplanetarium.org/tyson/' - ' ' - ' ' - ' active' - ' https://twitter.com/neiltyson' - ' TWITTER' - ' ' - '' - ) # record/1073331 + snippet = ( # record/1073331 + ' http://www.haydenplanetarium.org/tyson/ ' + ' active https://twitter.com/neiltyson TWITTER ' + ) expected = [ - { - 'schema': 'TWITTER', - 'value': 'neiltyson' - }, + {'schema': 'TWITTER', 'value': 'neiltyson'}, ] result = hepnames.do(create_record(snippet)) @@ -2095,17 +2015,15 @@ def test_ids_from_8564_u_wikipedia(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1018793 '' - ' https://en.wikipedia.org/wiki/Torsten_%C3%85kesson' + ' https://en.wikipedia.org/wiki/Torsten_%C3%85kesson' '' - ) # record/1018793 + ) expected = [ - { - 'schema': 'WIKIPEDIA', - 'value': u'Torsten_Åkesson' - }, + {'schema': 'WIKIPEDIA', 'value': u'Torsten_Åkesson'}, ] result = hepnames.do(create_record(snippet)) @@ -2127,18 +2045,14 @@ def test_ids_from_8564_u_y_linkedin(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( - '' - ' https://www.linkedin.com/in/silvia-adri%C3%A1n-mart%C3%ADnez-ab1a548b' - ' LINKEDIN' - '' - ) # record/1423251 + snippet = ( # record/1423251 + ' https://www.linkedin.com/in/silvia-adri%C3%A1n-mart%C3%ADnez-ab1a548b' + ' LINKEDIN' + ) expected = [ - { - 'schema': 'LINKEDIN', - 'value': u'silvia-adrián-martínez-ab1a548b' - }, + {'schema': 'LINKEDIN', 'value': u'silvia-adrián-martínez-ab1a548b'}, ] result = hepnames.do(create_record(snippet)) @@ -2147,7 +2061,9 @@ def test_ids_from_8564_u_y_linkedin(): expected = [ { - 'u': 'https://www.linkedin.com/in/silvia-adri%C3%A1n-mart%C3%ADnez-ab1a548b', + 'u': ( + 'https://www.linkedin.com/in/silvia-adri%C3%A1n-mart%C3%ADnez-ab1a548b' + ), 'y': 'LINKEDIN', }, ] @@ -2160,11 +2076,11 @@ def test_native_names_from_880__a(): schema = load_schema('authors') subschema = schema['properties']['name']['properties']['native_names'] - snippet = ( + snippet = ( # record/1019097 '' ' Գեւորգ Ն. Աբազաջիան' '' - ) # record/1019097 + ) expected = [u'Գեւորգ Ն. Աբազաջիան'] @@ -2185,11 +2101,11 @@ def test_ids_from_970__a(): schema = load_schema('authors') subschema = schema['properties']['ids'] - snippet = ( + snippet = ( # record/1498151 '' ' HEPNAMES-646482' '' - ) # record/1498151 + ) expected = [ { @@ -2214,11 +2130,11 @@ def test_new_record_from_970__d(): schema = load_schema('authors') subschema = schema['properties']['new_record'] - snippet = ( + snippet = ( # record/1271254 '' ' 1039458' '' - ) # record/1271254 + ) expected = {'$ref': 'http://localhost:5000/api/authors/1039458'} result = hepnames.do(create_record(snippet)) @@ -2236,11 +2152,11 @@ def test_stub_from_980__a_useful(): schema = load_schema('authors') subschema = schema['properties']['stub'] - snippet = ( + snippet = ( # record/1222902 '' ' USEFUL' '' - ) # record/1222902 + ) expected = False result = hepnames.do(create_record(snippet)) @@ -2264,11 +2180,11 @@ def test_stub_from_980__a_not_useful(): schema = load_schema('authors') subschema = schema['properties']['stub'] - snippet = ( + snippet = ( # record/1019103 '' ' HEPNAMES' '' - ) # record/1019103 + ) expected = True result = hepnames.do(create_record(snippet)) @@ -2288,11 +2204,11 @@ def test_deleted_from_980__c(): schema = load_schema('authors') subschema = schema['properties']['deleted'] - snippet = ( + snippet = ( # record/1511071 '' ' DELETED' '' - ) # record/1511071 + ) expected = True result = hepnames.do(create_record(snippet)) diff --git a/tests/test_institutions.py b/tests/test_institutions.py index 7ed3a511..d3d8bbee 100644 --- a/tests/test_institutions.py +++ b/tests/test_institutions.py @@ -23,16 +23,16 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.institutions import institutions -from inspire_schemas.api import load_schema, validate def test_addresses_from_034__d_f_and_371__double_a_b_d_g(): schema = load_schema('institutions') subschema = schema['properties']['addresses'] - snippet = ( + snippet = ( # record/902630 '' ' ' ' 35.0499505' @@ -46,7 +46,7 @@ def test_addresses_from_034__d_f_and_371__double_a_b_d_g(): ' JP' ' ' '' - ) # record/902630 + ) expected = [ { @@ -72,12 +72,12 @@ def test_external_system_identifiers_from_035__a_9(): schema = load_schema('institutions') subschema = schema['properties']['external_system_identifiers'] - snippet = ( + snippet = ( # record/910133 '' ' HAL' ' 1969' '' - ) # record/910133 + ) expected = [ { @@ -95,7 +95,7 @@ def test_related_records_from_110__a_t_u_double_x_double_z(): schema = load_schema('institutions') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1272953 '' ' University of Pittsburgh' ' U. Pittsburgh' @@ -105,7 +105,7 @@ def test_related_records_from_110__a_t_u_double_x_double_z(): ' 908047' ' 905042' '' - ) # record/1272953 + ) expected = [ { @@ -135,13 +135,12 @@ def test_icn_legacy_icn_institution_hierarchy_from_110__a_t_u(): legacy_ICN_schema = schema['properties']['legacy_ICN'] institution_hierarchy_schema = schema['properties']['institution_hierarchy'] - snippet = ( - '' - ' European Organization for Nuclear Research (CERN)' - ' CERN, Geneva' - ' CERN' - '' - ) # record/902725 + snippet = ( # record/902725 + ' European' + ' Organization for Nuclear Research (CERN) CERN, Geneva CERN' + ) expected_ICN = [ 'CERN, Geneva', @@ -161,7 +160,9 @@ def test_icn_legacy_icn_institution_hierarchy_from_110__a_t_u(): assert validate(result['legacy_ICN'], legacy_ICN_schema) is None assert expected_legacy_ICN == result['legacy_ICN'] - assert validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + assert ( + validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + ) assert expected_institution_hierarchy == result['institution_hierarchy'] @@ -171,14 +172,13 @@ def test_icn_legacy_icn_institution_hierarchy_from_110__a_b_t_u(): legacy_ICN_schema = schema['properties']['legacy_ICN'] institution_hierarchy_schema = schema['properties']['institution_hierarchy'] - snippet = ( - '' - ' Université Libre de Bruxelles' - ' Physique Theorique et Mathematique (PTM)' - ' U. Libre Brussels, PTM' - ' Brussels U., PTM' - '' - ) # record/909579 + snippet = ( # record/909579 + ' Université' + ' Libre de Bruxelles Physique Theorique' + ' et Mathematique (PTM) U. Libre' + ' Brussels, PTM Brussels U.,' + ' PTM' + ) expected_ICN = [ 'U. Libre Brussels, PTM', @@ -201,7 +201,9 @@ def test_icn_legacy_icn_institution_hierarchy_from_110__a_b_t_u(): assert validate(result['legacy_ICN'], legacy_ICN_schema) is None assert expected_legacy_ICN == result['legacy_ICN'] - assert validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + assert ( + validate(result['institution_hierarchy'], institution_hierarchy_schema) is None + ) assert expected_institution_hierarchy == result['institution_hierarchy'] @@ -209,7 +211,7 @@ def test_addresses_from_371__double_a_b_d_e_g(): schema = load_schema('institutions') subschema = schema['properties']['addresses'] - snippet = ( + snippet = ( # record/1209215 '' ' Philosophenweg 16' ' 69120 Heidelberg' @@ -218,7 +220,7 @@ def test_addresses_from_371__double_a_b_d_e_g(): ' 69120' ' DE' '' - ) # record/1209215 + ) expected = [ { @@ -243,7 +245,7 @@ def test_addresses_from_371__triple_a_b_d_e_g_and_371__triple_a_b_d_e_g_x(): schema = load_schema('institutions') subschema = schema['properties']['addresses'] - snippet = ( + snippet = ( # record/902696 '' ' ' ' Université Libre de Bruxelles (ULB)' @@ -265,7 +267,7 @@ def test_addresses_from_371__triple_a_b_d_e_g_and_371__triple_a_b_d_e_g_x(): ' secondary' ' ' '' - ) # record/902696 + ) expected = [ { @@ -303,7 +305,7 @@ def test_addresses_from_371__double_a_b_d_e_double_g(): schema = load_schema('institutions') subschema = schema['properties']['addresses'] - snippet = ( + snippet = ( # record/1241283 '' ' 88003' ' US' @@ -313,7 +315,7 @@ def test_addresses_from_371__double_a_b_d_e_double_g(): ' USA' ' US' '' - ) # record/1241283 + ) expected = [ { @@ -338,7 +340,7 @@ def test_addresses_from_371__a_b_d_e_g(): schema = load_schema('institutions') subschema = schema['properties']['addresses'] - snippet = ( + snippet = ( # record/902671 '' ' Edgbaston, Birmingham B15 2TT' ' Birmingham' @@ -346,7 +348,7 @@ def test_addresses_from_371__a_b_d_e_g(): ' B15 2TT' ' UK' '' - ) # record/902671 + ) expected = [ { @@ -370,11 +372,11 @@ def test_institution_type_from_372__a(): schema = load_schema('institutions') subschema = schema['properties']['institution_type'] - snippet = ( + snippet = ( # record/902624 '' ' Research center' '' - ) # record/902624 + ) expected = [ 'Research Center', @@ -389,12 +391,12 @@ def test_name_variants_from_410__a_9(): schema = load_schema('institutions') subschema = schema['properties']['name_variants'] - snippet = ( + snippet = ( # record/1496423 '' ' INSPIRE' ' University of Chile' '' - ) # record/1496423 + ) expected = [ { @@ -409,12 +411,12 @@ def test_name_variants_from_410__a_9(): def test_name_variants_from_410__a_9_discards_desy_source(): - snippet = ( + snippet = ( # record/902624 '' ' DESY' ' Aachen Tech. Hochsch.' '' - ) # record/902624 + ) result = institutions.do(create_record(snippet)) @@ -422,12 +424,12 @@ def test_name_variants_from_410__a_9_discards_desy_source(): def test_name_variants_from_410__a_9_discards_desy_aff_source(): - snippet = ( + snippet = ( # record/902626 '' ' DESY_AFF' ' AARHUS UNIV' '' - ) # record/902626 + ) result = institutions.do(create_record(snippet)) @@ -435,13 +437,13 @@ def test_name_variants_from_410__a_9_discards_desy_aff_source(): def test_name_variants_from_410__9_discards_other_sources(): - snippet = ( + snippet = ( # record/1338296 '' ' Tech' ' CIIT' ' Inst' '' - ) # record/1338296 + ) result = institutions.do(create_record(snippet)) @@ -452,12 +454,11 @@ def test_name_variants_from_410__double_a(): schema = load_schema('institutions') subschema = schema['properties']['name_variants'] - snippet = ( - '' - ' Theoretische Teilchenphysik und Kosmologie' - ' Elementarteilchenphysik' - '' - ) # record/902624 + snippet = ( # record/902624 + ' Theoretische Teilchenphysik und Kosmologie ' + ' Elementarteilchenphysik' + ) expected = [ {'value': 'Theoretische Teilchenphysik und Kosmologie'}, @@ -473,20 +474,17 @@ def test_extra_words_from_410__decuple_g(): schema = load_schema('institutions') subschema = schema['properties']['extra_words'] - snippet = ( - '' - ' Institut Theoretische Physik,' - ' RWTH, Inst.' - ' institute A' - ' III. Physikalisches Institut, Technische Hochschule Aachen, Aachen, West' - ' physics' - ' 52056' - ' D-52056' - ' DE-52056' - ' phys' - ' I. Physikalisches Institut' - '' - ) # record/902624 + snippet = ( # record/902624 + ' Institut' + ' Theoretische Physik, RWTH,' + ' Inst. institute A ' + ' III. Physikalisches Institut, Technische' + ' Hochschule Aachen, Aachen, West physics 52056 ' + ' D-52056 DE-52056 phys ' + ' I. Physikalisches Institut' + ) expected = [ 'Institut Theoretische Physik,', @@ -510,13 +508,13 @@ def test_related_records_from_510__a_w_0_accepts_parents(): schema = load_schema('institutions') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1430106 '' ' 1385404' ' U. Caen (main)' ' t' '' - ) # record/1430106 + ) expected = [ { @@ -537,7 +535,7 @@ def test_related_records_from_double_510__a_w_0_accepts_parents(): schema = load_schema('institutions') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1430106 '' ' ' ' 1385404' @@ -550,7 +548,7 @@ def test_related_records_from_double_510__a_w_0_accepts_parents(): ' t' ' ' '' - ) # record/1430106 + ) expected = [ { @@ -578,7 +576,7 @@ def test_related_records_from_double_510__a_w_0_accepts_predecessors(): schema = load_schema('institutions') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/902916 '' ' ' ' 903276' @@ -591,7 +589,7 @@ def test_related_records_from_double_510__a_w_0_accepts_predecessors(): ' a' ' ' '' - ) # record/902916 + ) expected = [ { @@ -619,13 +617,13 @@ def test_related_records_from_510__a_w_0_accepts_other(): schema = load_schema('institutions') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/902971 '' ' 945696' ' UMass Amherst' ' r' '' - ) # record/902971 + ) expected = [ { @@ -646,13 +644,13 @@ def test_related_records_from__510__a_w_0_accepts_successors(): schema = load_schema('institutions') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/902831 '' ' 911753' ' HZB, Berlin' ' b' '' - ) # record/902831 + ) expected = [ { @@ -670,11 +668,11 @@ def test_related_records_from__510__a_w_0_accepts_successors(): def test_related_records_from_510__w_discards_malformed(): - snippet = ( + snippet = ( # synthetic data '' ' foo' '' - ) # synthetic data + ) result = institutions.do(create_record(snippet)) @@ -685,11 +683,11 @@ def test_core_from_980__a_core(): schema = load_schema('institutions') subschema = schema['properties']['core'] - snippet = ( + snippet = ( # record/902645 '' ' CORE' '' - ) # record/902645 + ) expected = True result = institutions.do(create_record(snippet)) @@ -702,12 +700,12 @@ def test_core_from_980__a_b_noncore(): schema = load_schema('institutions') subschema = schema['properties']['core'] - snippet = ( + snippet = ( # record/906132 '' ' NONCORE' ' NON-PPF' '' - ) # record/906132 + ) expected = False result = institutions.do(create_record(snippet)) @@ -720,11 +718,11 @@ def test_private_notes_from_667__a(): schema = load_schema('institutions') subschema = schema['properties']['_private_notes'] - snippet = ( + snippet = ( # record/902663 '' ' Former ICN = Negev U.' '' - ) # record/902663 + ) expected = [ {'value': 'Former ICN = Negev U.'}, @@ -739,14 +737,23 @@ def test_private_notes_from_595__a(): schema = load_schema('institutions') subschema = schema['properties']['_private_notes'] - snippet = ( - '' - ' The Division is located inside the Department of Physics and Astronomy of the University of Catania Scientific Campus ("Città Universitaria" or "Cittadella"). Via Santa Sofia 64 95123 CATANIA' - '' - ) # record/902879 + snippet = ( # record/902879 + ' The' + ' Division is located inside the Department of Physics and Astronomy of' + ' the University of Catania Scientific Campus ("Città Universitaria" or' + ' "Cittadella"). Via Santa Sofia 64 95123' + ' CATANIA' + ) expected = [ - {'value': u'The Division is located inside the Department of Physics and Astronomy of the University of Catania Scientific Campus ("Città Universitaria" or "Cittadella"). Via Santa Sofia 64 95123 CATANIA'}, + { + 'value': ( + u'The Division is located inside the Department of Physics and' + u' Astronomy of the University of Catania Scientific Campus' + u' ("Città Universitaria" or "Cittadella"). Via Santa Sofia 64' + u' 95123 CATANIA' + ) + }, ] result = institutions.do(create_record(snippet)) @@ -758,20 +765,31 @@ def test_private_notes_from_double_595__a(): schema = load_schema('institutions') subschema = schema['properties']['_private_notes'] - snippet = ( - '' - ' ' - ' The Roma II Structure was established in 1989 at the University of Rome “Tor Vergata” - cc' - ' ' - ' ' - ' REDACTED thinks we don\'t have to write 110__t: "INFN, Rome 2" because Rome 2 is only in the url but not in the site. She\'ll ask to REDACTED (from INFN) to have her feedback.' - ' ' - '' - ) # record/907691 + snippet = ( # record/907691 + ' The Roma II Structure was established in 1989 at the' + ' University of Rome “Tor Vergata” - cc ' + ' REDACTED thinks we don\'t have to write 110__t: "INFN, Rome' + ' 2" because Rome 2 is only in the url but not in the site. She\'ll ask' + ' to REDACTED (from INFN) to have her feedback. ' + ' ' + ) expected = [ - {'value': u'The Roma II Structure was established in 1989 at the University of Rome “Tor Vergata” - cc'}, - {'value': u'REDACTED thinks we don\'t have to write 110__t: "INFN, Rome 2" because Rome 2 is only in the url but not in the site. She\'ll ask to REDACTED (from INFN) to have her feedback.'}, + { + 'value': ( + u'The Roma II Structure was established in 1989 at the' + u' University of Rome “Tor Vergata” - cc' + ) + }, + { + 'value': ( + u'REDACTED thinks we don\'t have to write 110__t: "INFN, Rome' + u' 2" because Rome 2 is only in the url but not in the site.' + u' She\'ll ask to REDACTED (from INFN) to have her feedback.' + ) + }, ] result = institutions.do(create_record(snippet)) @@ -783,14 +801,19 @@ def test_public_notes_from_680__i(): schema = load_schema('institutions') subschema = schema['properties']['public_notes'] - snippet = ( - '' - ' 2nd address: Organisation Européenne pour la Recherche Nucléaire (CERN), F-01631 Prévessin Cedex, France' - '' - ) # record/902725 + snippet = ( # record/902725 + ' 2nd' + ' address: Organisation Européenne pour la Recherche Nucléaire (CERN),' + ' F-01631 Prévessin Cedex, France' + ) expected = [ - {'value': u'2nd address: Organisation Européenne pour la Recherche Nucléaire (CERN), F-01631 Prévessin Cedex, France'} + { + 'value': ( + u'2nd address: Organisation Européenne pour la Recherche' + u' Nucléaire (CERN), F-01631 Prévessin Cedex, France' + ) + } ] result = institutions.do(create_record(snippet)) @@ -802,14 +825,17 @@ def test_historical_data_from_6781_a(): schema = load_schema('institutions') subschema = schema['properties']['historical_data'] - snippet = ( - '' - ' Became IFH (Inst for Hochenergiephysik)in 1968. Since 1992 the official name of the Inst. is simply DESY Zeuthen. Changed 1/26/99 AMR' - '' - ) # record/902666 + snippet = ( # record/902666 + ' Became IFH' + ' (Inst for Hochenergiephysik)in 1968. Since 1992 the official name of' + ' the Inst. is simply DESY Zeuthen. Changed 1/26/99' + ' AMR' + ) expected = [ - 'Became IFH (Inst for Hochenergiephysik)in 1968. Since 1992 the official name of the Inst. is simply DESY Zeuthen. Changed 1/26/99 AMR' + 'Became IFH (Inst for Hochenergiephysik)in 1968. Since 1992 the' + ' official name of the Inst. is simply DESY Zeuthen. Changed' + ' 1/26/99 AMR' ] result = institutions.do(create_record(snippet)) @@ -821,14 +847,15 @@ def test_historical_data_from_6781_multiple_a(): schema = load_schema('institutions') subschema = schema['properties']['historical_data'] - snippet = ( - '' - ' Conseil européen pour la Recherche Nucléaire (1952-1954)' - ' Organisation européenne pour la Recherche nucléaire (1954-now)' - ' Sub title: Laboratoire européen pour la Physique des Particules (1984-now)' - ' Sub title: European Laboratory for Particle Physics (1984-now)' - '' - ) # record/902725 + snippet = ( # record/902725 + ' Conseil' + ' européen pour la Recherche Nucléaire (1952-1954) ' + ' Organisation européenne pour la Recherche' + ' nucléaire (1954-now) Sub title:' + ' Laboratoire européen pour la Physique des Particules' + ' (1984-now) Sub title: European' + ' Laboratory for Particle Physics (1984-now)' + ) expected = [ u'Conseil européen pour la Recherche Nucléaire (1952-1954)', @@ -846,7 +873,7 @@ def test_deleted_from_980__multiple_b_and_980__a_and_980__c(): schema = load_schema('institutions') subschema = schema['properties']['deleted'] - snippet = ( + snippet = ( # record/905453 '' ' ' ' CK90' @@ -860,7 +887,7 @@ def test_deleted_from_980__multiple_b_and_980__a_and_980__c(): ' DELETED' ' ' '' - ) # record/905453 + ) expected = True result = institutions.do(create_record(snippet)) @@ -873,11 +900,11 @@ def test_inactive_from_980__a(): schema = load_schema('institutions') subschema = schema['properties']['inactive'] - snippet = ( + snippet = ( # record/1241100 '' ' DEAD' '' - ) # record/1241100 + ) expected = True result = institutions.do(create_record(snippet)) diff --git a/tests/test_journals.py b/tests/test_journals.py index f19eb6f3..cf589808 100644 --- a/tests/test_journals.py +++ b/tests/test_journals.py @@ -23,20 +23,20 @@ from __future__ import absolute_import, division, print_function from dojson.contrib.marc21.utils import create_record +from inspire_schemas.api import load_schema, validate from inspire_dojson.journals import journals -from inspire_schemas.api import load_schema, validate def test_issns_from_022__a(): schema = load_schema('journals') subschema = schema['properties']['issns'] - snippet = ( + snippet = ( # record/1445059 '' ' 2213-1337' ' ' - ) # record/1445059 + ) expected = [ {'value': '2213-1337'}, @@ -51,12 +51,12 @@ def test_issns_from_022__a_b(): schema = load_schema('journals') subschema = schema['properties']['issns'] - snippet = ( + snippet = ( # record/1513418 '' ' 1812-9471' ' Print' '' - ) # record/1513418 + ) expected = [ { @@ -74,7 +74,7 @@ def test_issns_from_double_022__a_b(): schema = load_schema('journals') subschema = schema['properties']['issns'] - snippet = ( + snippet = ( # record/1513418 '' ' ' ' 1812-9471' @@ -85,7 +85,7 @@ def test_issns_from_double_022__a_b(): ' Online' ' ' '' - ) # record/1513418 + ) expected = [ { @@ -107,12 +107,12 @@ def test_issns_from_022__a_b_handles_electronic(): schema = load_schema('journals') subschema = schema['properties']['issns'] - snippet = ( + snippet = ( # record/1415879 '' ' 2469-9888' ' electronic' '' - ) # record/1415879 + ) expected = [ { @@ -130,11 +130,10 @@ def test_journal_title_from_130__a(): schema = load_schema('journals') subschema = schema['properties']['journal_title'] - snippet = ( - '' - ' Physical Review Special Topics - Accelerators and Beams' - '' - ) # record/1212820 + snippet = ( # record/1212820 + ' Physical' + ' Review Special Topics - Accelerators and Beams' + ) expected = {'title': 'Physical Review Special Topics - Accelerators and Beams'} result = journals.do(create_record(snippet)) @@ -147,12 +146,12 @@ def test_journal_title_from_130__a_b(): schema = load_schema('journals') subschema = schema['properties']['journal_title'] - snippet = ( + snippet = ( # record/1325601 '' ' Humana Mente' ' Journal of Philosophical Studies' '' - ) # record/1325601 + ) expected = { 'title': 'Humana Mente', @@ -168,13 +167,13 @@ def test_related_records_from_530__a_w_0(): schema = load_schema('journals') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1415879 '' ' 1212820' ' Phys.Rev.ST Accel.Beams' ' a' '' - ) # record/1415879 + ) expected = [ { @@ -195,14 +194,14 @@ def test_related_records_from_530__a_i_w_0(): schema = load_schema('journals') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1214386 '' ' 1214339' ' Zh.Eksp.Teor.Fiz.' ' Original version (Russian)' ' r' '' - ) # record/1214386 + ) expected = [ { @@ -223,13 +222,13 @@ def test_related_successor_records_from_530__a_i_w_0(): schema = load_schema('journals') subschema = schema['properties']['related_records'] - snippet = ( + snippet = ( # record/1504005 '' '1214520' 'Phil.Mag.' 'b' '/datafield>' - ) # record/1504005 + ) expected = [ { @@ -250,11 +249,11 @@ def test_license_from_540__a(): schema = load_schema('journals') subschema = schema['properties']['license'] - snippet = ( + snippet = ( # record/1617955 '' ' CC-BY 4.0' '' - ) # record/1617955 + ) expected = {'license': 'CC-BY 4.0'} result = journals.do(create_record(snippet)) @@ -267,14 +266,14 @@ def test_harvesting_info_from_583__a_c_i_3(): schema = load_schema('journals') subschema = schema['properties']['_harvesting_info'] - snippet = ( + snippet = ( # record/1616534 '' ' 2017-08-21' ' New Phys.Sae Mulli,67' ' partial' ' harvest' '' - ) # record/1616534 + ) expected = { 'coverage': 'partial', @@ -292,11 +291,11 @@ def test_public_notes_from_640__a(): schema = load_schema('journals') subschema = schema['properties']['public_notes'] - snippet = ( + snippet = ( # record/1466026 '' ' v.1 starts 2013' '' - ) # record/1466026 + ) expected = [ {'value': 'v.1 starts 2013'}, @@ -311,11 +310,11 @@ def test_publisher_from_643__b(): schema = load_schema('journals') subschema = schema['properties']['publisher'] - snippet = ( + snippet = ( # record/1211888 '' ' ANITA PUBLICATIONS, INDIA' '' - ) # record/1211888 + ) expected = ['ANITA PUBLICATIONS, INDIA'] result = journals.do(create_record(snippet)) @@ -328,7 +327,7 @@ def test_publisher_from_double_643__b(): schema = load_schema('journals') subschema = schema['properties']['publisher'] - snippet = ( + snippet = ( # record/1212635 '' ' ' ' Elsevier' @@ -337,7 +336,7 @@ def test_publisher_from_double_643__b(): ' Science Press' ' ' '' - ) # record/1212635 + ) expected = [ 'Elsevier', @@ -353,11 +352,11 @@ def test_private_notes_from_667__x(): schema = load_schema('journals') subschema = schema['properties']['_private_notes'] - snippet = ( + snippet = ( # record/1485643 '' ' Open Access' '' - ) # record/1485643 + ) expected = [ {'value': 'Open Access'}, @@ -372,12 +371,11 @@ def test_private_notes_from_667__double_x(): schema = load_schema('journals') subschema = schema['properties']['_private_notes'] - snippet = ( - '' - ' Do not use vol, use year and page: 2006:2154,2006' - ' even year is not unique' - '' - ) # record/1212189 + snippet = ( # record/1212189 + ' Do not use' + ' vol, use year and page: 2006:2154,2006 even year is not unique' + ) expected = [ {'value': 'Do not use vol, use year and page: 2006:2154,2006'}, @@ -393,11 +391,11 @@ def test_doi_prefixes_from_677__d(): schema = load_schema('journals') subschema = schema['properties']['doi_prefixes'] - snippet = ( + snippet = ( # record/1617963 '' ' 10.17406/GJSFR' '' - ) # record/1617963 + ) expected = ['10.17406/GJSFR'] result = journals.do(create_record(snippet)) @@ -410,14 +408,19 @@ def test_public_notes_from_680__i(): schema = load_schema('journals') subschema = schema['properties']['public_notes'] - snippet = ( - u'' - u' Russian Title: Высокомолекулярные соединения. Серия В. Химия полимеров (Vysokomolekulyarnye Soedineniya, Seriya B)' - u'' - ) # record/1615699 + snippet = ( # record/1615699 + u' Russian' + u' Title: Высокомолекулярные соединения. Серия В. Химия полимеров' + u' (Vysokomolekulyarnye Soedineniya, Seriya B)' + ) expected = [ - {'value': u'Russian Title: Высокомолекулярные соединения. Серия В. Химия полимеров (Vysokomolekulyarnye Soedineniya, Seriya B)'}, + { + 'value': ( + u'Russian Title: Высокомолекулярные соединения. Серия В. Химия' + u' полимеров (Vysokomolekulyarnye Soedineniya, Seriya B)' + ) + }, ] result = journals.do(create_record(snippet)) @@ -429,7 +432,7 @@ def test_proceedings_from_double_690__a(): schema = load_schema('journals') subschema = schema['properties']['proceedings'] - snippet = ( + snippet = ( # record/1213080 '' ' ' ' NON-PUBLISHED' @@ -438,7 +441,7 @@ def test_proceedings_from_double_690__a(): ' Proceedings' ' ' '' - ) # record/1213080 + ) expected = True result = journals.do(create_record(snippet)) @@ -451,11 +454,11 @@ def test_refereed_from_690__a_peer_review(): schema = load_schema('journals') subschema = schema['properties']['refereed'] - snippet = ( + snippet = ( # record/1617955 '' ' Peer Review' '' - ) # record/1617955 + ) expected = True result = journals.do(create_record(snippet)) @@ -468,11 +471,11 @@ def test_refereed_from_690__a_non_published(): schema = load_schema('journals') subschema = schema['properties']['refereed'] - snippet = ( + snippet = ( # record/1357923 '' ' NON-PUBLISHED' '' - ) # record/1357923 + ) expected = False result = journals.do(create_record(snippet)) @@ -485,11 +488,11 @@ def test_short_title_from_711__a(): schema = load_schema('journals') subschema = schema['properties']['short_title'] - snippet = ( + snippet = ( # record/1212820 '' ' Phys.Rev.ST Accel.Beams' '' - ) # record/1212820 + ) expected = 'Phys.Rev.ST Accel.Beams' result = journals.do(create_record(snippet)) @@ -503,12 +506,12 @@ def test_short_title_from_711__a_u(): short_title_schema = schema['properties']['short_title'] title_variants_schema = schema['properties']['title_variants'] - snippet = ( + snippet = ( # record/1485822 '' ' Univ.Politech.Bucharest Sci.Bull.' ' Univ.Politech.Bucharest Sci.Bull.A' '' - ) # record/1485822 + ) expected_short_title = 'Univ.Politech.Bucharest Sci.Bull.A' expected_title_variants = ['Univ.Politech.Bucharest Sci.Bull.'] @@ -526,7 +529,7 @@ def test_short_title_from_711__a_u_and_double_730__a(): short_title_schema = schema['properties']['short_title'] title_variants_schema = schema['properties']['title_variants'] - snippet = ( + snippet = ( # record/1212928 '' ' ' ' Diss.Abstr.Int.' @@ -539,7 +542,7 @@ def test_short_title_from_711__a_u_and_double_730__a(): ' DABBB' ' ' '' - ) # record/1212928 + ) expected_short_title = 'Diss.Abstr.Int.B' expected_title_variants = [ @@ -560,11 +563,10 @@ def test_title_variants_from_730__a(): schema = load_schema('journals') subschema = schema['properties']['title_variants'] - snippet = ( - '' - ' PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS' - '' - ) # record/1212820 + snippet = ( # record/1212820 + ' PHYSICAL' + ' REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS' + ) expected = ['PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS'] result = journals.do(create_record(snippet)) @@ -577,16 +579,13 @@ def test_title_variants_from_double_730__a(): schema = load_schema('journals') subschema = schema['properties']['title_variants'] - snippet = ( - '' - ' ' - ' PHYS REV SPECIAL TOPICS ACCELERATORS BEAMS' - ' ' - ' ' - ' PHYSICS REVIEW ST ACCEL BEAMS' - ' ' - '' - ) # record/1212820 + snippet = ( # record/1212820 + ' PHYS REV SPECIAL TOPICS ACCELERATORS BEAMS ' + ' PHYSICS REVIEW ST ACCEL BEAMS ' + ' ' + ) expected = [ 'PHYS REV SPECIAL TOPICS ACCELERATORS BEAMS', @@ -599,12 +598,12 @@ def test_title_variants_from_double_730__a(): def test_title_variants_skips_730_when_it_contains_a_b(): - snippet = ( + snippet = ( # record/1511950 '' ' AIHPD' ' D' '' - ) # record/1511950 + ) result = journals.do(create_record(snippet)) @@ -615,7 +614,7 @@ def test_book_series_from_double_980__a(): schema = load_schema('journals') subschema = schema['properties']['book_series'] - snippet = ( + snippet = ( # record/1311535 '' ' ' ' JOURNALS' @@ -624,7 +623,7 @@ def test_book_series_from_double_980__a(): ' BookSeries' ' ' '' - ) # record/1311535 + ) expected = True result = journals.do(create_record(snippet)) @@ -637,11 +636,11 @@ def test_deleted_from_980__a(): schema = load_schema('journals') subschema = schema['properties']['deleted'] - snippet = ( + snippet = ( # synthetic data '' ' DELETED' '' - ) # synthetic data + ) expected = True result = journals.do(create_record(snippet)) diff --git a/tests/test_model.py b/tests/test_model.py index f3533b51..a44441d9 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -24,8 +24,8 @@ import pytest -from inspire_dojson.model import FilterOverdo, add_schema from inspire_dojson import DoJsonError, marcxml2record, record2marcxml +from inspire_dojson.model import FilterOverdo, add_schema def test_filteroverdo_works_without_filters(): @@ -38,7 +38,7 @@ def test_filteroverdo_works_without_filters(): def test_filteroverdo_wraps_exceptions(): - record = ( + record = ( # synthetic data '' ' ' ' Ceci n’est pas une dâte' @@ -47,7 +47,7 @@ def test_filteroverdo_wraps_exceptions(): ' HEP' ' ' '' - ) # synthetic data + ) with pytest.raises(DoJsonError) as exc: marcxml2record(record) diff --git a/tests/test_utils.py b/tests/test_utils.py index ff91cba3..75f01b1b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -23,7 +23,6 @@ from __future__ import absolute_import, division, print_function import pytest - from flask import current_app from mock import patch @@ -31,13 +30,13 @@ absolute_url, afs_url, afs_url_to_path, - normalize_rank, + dedupe_all_lists, force_single_element, get_recid_from_ref, get_record_ref, - dedupe_all_lists, - strip_empty_values, normalize_date_aggressively, + normalize_rank, + strip_empty_values, ) @@ -164,7 +163,9 @@ def test_afs_url_converts_afs_path(): def test_afs_url_converts_new_afs_path(): expected = 'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g220/4413039/content.xml' - result = afs_url('/opt/venvs/inspire-legacy/var/data/files/g220/4413039/content.xml') + result = afs_url( + '/opt/venvs/inspire-legacy/var/data/files/g220/4413039/content.xml' + ) assert expected == result @@ -195,7 +196,9 @@ def test_afs_url_with_custom_afs_path(): def test_afs_url_handles_unicode(): expected = u'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g70/1407585/%E7%89%A9%E7%90%86%E7%A7%91%E5%AD%A6%E4%B8%8E%E6%8A%80%E6%9C%AF%E5%AD%A6%E9%99%A2-%E6%9D%8E%E5%A8%9C-200650218-%E5%AD%A6%E4%BD%8D%E7%BA%A7....pdf%3B1' - result = afs_url(u'/opt/cds-invenio/var/data/files/g70/1407585/物理科学与技术学院-李娜-200650218-学位级....pdf;1') + result = afs_url( + u'/opt/cds-invenio/var/data/files/g70/1407585/物理科学与技术学院-李娜-200650218-学位级....pdf;1' + ) assert expected == result @@ -214,7 +217,6 @@ def test_afs_url_with_afs_service_enabled_converts_afs_path(): config = {'LABS_AFS_HTTP_SERVICE': 'http://jessicajones.com/nested/nested'} with patch.dict(current_app.config, config): - expected = 'http://jessicajones.com/nested/nested/var/file.txt' result = afs_url('/opt/cds-invenio/var/file.txt') @@ -225,7 +227,6 @@ def test_afs_url_with_afs_service_enabled_with_trailing_slash_converts_afs_path( config = {'LABS_AFS_HTTP_SERVICE': 'http://jessicajones.com/nested/nested/'} with patch.dict(current_app.config, config): - expected = 'http://jessicajones.com/nested/nested/var/file.txt' result = afs_url('/opt/cds-invenio/var/file.txt') @@ -261,7 +262,7 @@ def test_afs_url_converts_afs_url_to_path(): def test_afs_url_handles_custom_afs_path(): config = { 'LABS_AFS_HTTP_SERVICE': 'http://jessicajones.com/nested/nested', - 'LEGACY_AFS_PATH': '/foo/bar' + 'LEGACY_AFS_PATH': '/foo/bar', } expected = "file:///foo/bar/var/file.txt" @@ -350,13 +351,17 @@ def test_get_recid_from_ref_returns_none_on_ref_malformed(): def test_dedupe_all_lists(): - obj = {'l0': list(range(10)) + list(range(10)), - 'o1': [{'foo': 'bar'}] * 10, - 'o2': [{'foo': [1, 2]}, {'foo': [1, 1, 2]}] * 10} + obj = { + 'l0': list(range(10)) + list(range(10)), + 'o1': [{'foo': 'bar'}] * 10, + 'o2': [{'foo': [1, 2]}, {'foo': [1, 1, 2]}] * 10, + } - expected = {'l0': list(range(10)), - 'o1': [{'foo': 'bar'}], - 'o2': [{'foo': [1, 2]}]} + expected = { + 'l0': list(range(10)), + 'o1': [{'foo': 'bar'}], + 'o2': [{'foo': [1, 2]}], + } assert dedupe_all_lists(obj) == expected @@ -404,7 +409,7 @@ def test_normalize_date_aggressively_strips_wrong_month(): def test_normalize_date_aggressively_raises_on_wrong_format(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match='Unknown string format: 2014=12'): normalize_date_aggressively('2014=12-01') diff --git a/tests/test_utils_geo.py b/tests/test_utils_geo.py index e51dd413..24d497d1 100644 --- a/tests/test_utils_geo.py +++ b/tests/test_utils_geo.py @@ -28,7 +28,6 @@ parse_institution_address, ) - # TODO: test match_country_code