diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5bfb7c22..4c09835c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,4 +31,3 @@ repos:
entry: gitlint
stages: [commit-msg]
args: [--msg-filename]
-
diff --git a/dags/aps/aps_params.py b/dags/aps/aps_params.py
index 56a37e87..78c304d7 100644
--- a/dags/aps/aps_params.py
+++ b/dags/aps/aps_params.py
@@ -4,11 +4,11 @@
class APSParams:
def __init__(
self,
- from_date= (date.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
- until_date= date.today().strftime("%Y-%m-%d"),
- date= "modified",
- journals= "",
- set= "scoap3",
+ from_date=(date.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
+ until_date=date.today().strftime("%Y-%m-%d"),
+ date="modified",
+ journals="",
+ set="scoap3",
per_page: int = 100,
):
self.from_date = from_date
diff --git a/dags/aps/aps_pull_api.py b/dags/aps/aps_pull_api.py
index 5e85b928..8b6302e0 100644
--- a/dags/aps/aps_pull_api.py
+++ b/dags/aps/aps_pull_api.py
@@ -18,11 +18,11 @@
)
def aps_pull_api():
@task()
- def set_fetching_intervals(repo = APSRepository(), **kwargs):
+ def set_fetching_intervals(repo=APSRepository(), **kwargs):
return set_harvesting_interval(repo=repo, **kwargs)
@task()
- def save_json_in_s3(dates: dict, repo = APSRepository(), **kwargs):
+ def save_json_in_s3(dates: dict, repo=APSRepository(), **kwargs):
parameters = APSParams(
from_date=dates["from_date"],
until_date=dates["until_date"],
@@ -40,7 +40,7 @@ def save_json_in_s3(dates: dict, repo = APSRepository(), **kwargs):
return None
@task()
- def trigger_files_processing(key, repo = APSRepository()):
+ def trigger_files_processing(key, repo=APSRepository()):
if key is None:
logging.warning("No new files were downloaded to s3")
return
diff --git a/dags/aps/parser.py b/dags/aps/parser.py
index c46d4506..14c8d20a 100644
--- a/dags/aps/parser.py
+++ b/dags/aps/parser.py
@@ -97,26 +97,27 @@ def _form_authors(self, article):
if author["type"] == "Person"
]
-
def extract_organization_and_ror(self, text):
pattern = r'(.*?)'
-
+
ror_url = None
-
+
def replace_and_capture(match):
nonlocal ror_url
ror_url = match.group(1)
return match.group(2)
-
+
modified_text = re.sub(pattern, replace_and_capture, text)
-
+
return modified_text, ror_url
def _get_affiliations(self, article, affiliationIds):
parsed_affiliations = [
{
"value": affiliation["name"],
- "organization": self.extract_organization_and_ror(affiliation["name"])[0],
+ "organization": self.extract_organization_and_ror(affiliation["name"])[
+ 0
+ ],
"ror": self.extract_organization_and_ror(affiliation["name"])[1],
}
for affiliation in article["affiliations"]
diff --git a/dags/aps/repository.py b/dags/aps/repository.py
index d07f9f26..cc96e8c9 100644
--- a/dags/aps/repository.py
+++ b/dags/aps/repository.py
@@ -1,6 +1,5 @@
import io
import os
-from typing import IO
from common.repository import IRepository
from common.s3_service import S3Service
diff --git a/dags/clean/cleanup_logs.py b/dags/clean/cleanup_logs.py
index e8b4db93..babb2219 100644
--- a/dags/clean/cleanup_logs.py
+++ b/dags/clean/cleanup_logs.py
@@ -2,7 +2,6 @@
import pendulum
from airflow.decorators import dag
-from airflow.operators.bash import BashOperator
from airflow.operators.bash_operator import BashOperator
AIRFLOW_HOME = os.getenv("AIRFLOW_HOME")
@@ -17,7 +16,7 @@ def cleanup_logs():
bash_command=f"""
logs_dir="{logs_dir}"
find "$logs_dir" -type d -mtime +30 -exec rm -r {{}} \;
- """,
+ """, # noqa
)
diff --git a/dags/common/cleanup.py b/dags/common/cleanup.py
index a2b4eb8f..1e795054 100644
--- a/dags/common/cleanup.py
+++ b/dags/common/cleanup.py
@@ -35,7 +35,7 @@ def replace_cdata_format(text):
CDATA_PATTERN = re.compile(r"<\?CDATA(.*)\?>")
# pattern = re.compile(r'<\?CDATA\s(.*?)\s\?>', re.DOTALL)
- replaced_text = CDATA_PATTERN.sub(r'', text)
+ replaced_text = CDATA_PATTERN.sub(r"", text)
return replaced_text
diff --git a/dags/common/constants.py b/dags/common/constants.py
index 77e7a2b4..4e9ed959 100644
--- a/dags/common/constants.py
+++ b/dags/common/constants.py
@@ -91,7 +91,7 @@
("Brazil", "Brazil"),
("Brasil", "Brazil"),
("Benin", "Benin"),
- (u"Bénin", "Benin"),
+ ("Bénin", "Benin"),
("Bulgaria", "Bulgaria"),
("Bosnia and Herzegovina", "Bosnia and Herzegovina"),
("Canada", "Canada"),
@@ -141,7 +141,7 @@
("Luxembourg", "Luxembourg"),
("Macedonia", "Macedonia"),
("Mexico", "Mexico"),
- (u"México", "Mexico"),
+ ("México", "Mexico"),
("Monaco", "Monaco"),
("Montenegro", "Montenegro"),
("Morocco", "Morocco"),
@@ -161,7 +161,7 @@
("Portugalo", "Portugal"),
("Portugal", "Portugal"),
("P.R.China", "China"),
- (u"People’s Republic of China", "China"),
+ ("People’s Republic of China", "China"),
("Republic of Belarus", "Belarus"),
("Republic of Benin", "Benin"),
("Republic of Korea", "South Korea"),
@@ -181,7 +181,7 @@
("Slovenia", "Slovenia"),
("South Africa", "South Africa"),
("Africa", "South Africa"),
- (u"España", "Spain"),
+ ("España", "Spain"),
("Spain", "Spain"),
("Sudan", "Sudan"),
("Sweden", "Sweden"),
@@ -233,19 +233,21 @@
]
)
-INSTITUTIONS_AND_COUNTRIES_MAPPING = OrderedDict([
- ("INFN", "Italy"),
- ("European Organization for Nuclear Research", "CERN"),
- ("Conseil Européen pour la Recherche Nucléaire", "CERN"),
- ("CERN", "CERN"),
- ("KEK", "Japan"),
- ("DESY", "Germany"),
- ("FERMILAB", "USA"),
- ("FNAL", "USA"),
- ("SLACK", "USA"),
- ("Stanford Linear Accelerator Center", "USA"),
- ("Joint Institute for Nuclear Research", "JINR"),
- ("JINR", "JINR"),
- ("ROC", "Taiwan"),
- ("R.O.C", "Taiwan"),
-])
+INSTITUTIONS_AND_COUNTRIES_MAPPING = OrderedDict(
+ [
+ ("INFN", "Italy"),
+ ("European Organization for Nuclear Research", "CERN"),
+ ("Conseil Européen pour la Recherche Nucléaire", "CERN"),
+ ("CERN", "CERN"),
+ ("KEK", "Japan"),
+ ("DESY", "Germany"),
+ ("FERMILAB", "USA"),
+ ("FNAL", "USA"),
+ ("SLACK", "USA"),
+ ("Stanford Linear Accelerator Center", "USA"),
+ ("Joint Institute for Nuclear Research", "JINR"),
+ ("JINR", "JINR"),
+ ("ROC", "Taiwan"),
+ ("R.O.C", "Taiwan"),
+ ]
+)
diff --git a/dags/common/enhancer.py b/dags/common/enhancer.py
index b49ebb61..1e9f6b57 100644
--- a/dags/common/enhancer.py
+++ b/dags/common/enhancer.py
@@ -2,7 +2,7 @@
import re
from common.constants import FN_REGEX
-from common.utils import parse_country_from_value, get_country_ISO_name
+from common.utils import get_country_ISO_name, parse_country_from_value
class Enhancer:
@@ -47,7 +47,7 @@ def __construct_titles(self, item, publisher):
def __construct_authors(self, item):
# add_nations(item)
pattern_for_cern_cooperation_agreement = re.compile(
- r'cooperation agreement with cern', re.IGNORECASE
+ r"cooperation agreement with cern", re.IGNORECASE
)
for author in item.get("authors", []):
for affiliation in author.get("affiliations", []):
@@ -65,11 +65,12 @@ def __construct_authors(self, item):
affiliation["country"] = _parsed_country
if affiliation.get("country"):
- affiliation["country"] = get_country_ISO_name(affiliation["country"])
+ affiliation["country"] = get_country_ISO_name(
+ affiliation["country"]
+ )
return item
-
def __call__(self, publisher, item):
creation_date = datetime.datetime.now().isoformat()
item_copy = item.copy()
diff --git a/dags/common/parsing/xml_extractors.py b/dags/common/parsing/xml_extractors.py
index 4f878f0c..8195424d 100644
--- a/dags/common/parsing/xml_extractors.py
+++ b/dags/common/parsing/xml_extractors.py
@@ -17,7 +17,7 @@ def __init__(
extra_function=lambda s: s,
prefixes=None,
all_content_between_tags=False,
- remove_tags=False
+ remove_tags=False,
):
super().__init__(destination)
@@ -92,7 +92,7 @@ def __init__(
default_value=None,
required=False,
extra_function=lambda x: x,
- ) :
+ ):
super().__init__(destination)
self.destination = destination
self.source = source
@@ -132,7 +132,7 @@ def extract(self, article):
class CustomExtractor(IExtractor):
def __init__(
self, destination, extraction_function, required=False, default_value=None
- ) :
+ ):
super().__init__(destination)
self.destination = destination
self.extraction_function = extraction_function
@@ -154,7 +154,7 @@ def __init__(
destination,
value,
required=False,
- ) :
+ ):
super().__init__(destination)
self.destination = destination
self.required = required
diff --git a/dags/common/scoap3_s3.py b/dags/common/scoap3_s3.py
index f5c933ea..49f9c4b6 100644
--- a/dags/common/scoap3_s3.py
+++ b/dags/common/scoap3_s3.py
@@ -8,11 +8,8 @@
logger = get_logger()
-FILE_EXTENSIONS = {
- "pdf": ".pdf",
- "xml": ".xml",
- "pdfa": ".pdf"
-}
+FILE_EXTENSIONS = {"pdf": ".pdf", "xml": ".xml", "pdfa": ".pdf"}
+
def update_filename_extension(filename, type):
extension = FILE_EXTENSIONS.get(type, "")
@@ -20,9 +17,10 @@ def update_filename_extension(filename, type):
return filename
elif extension:
if type == "pdfa":
- extension = f".a-2b.pdf"
+ extension = ".a-2b.pdf"
return f"{filename}{extension}"
+
class Scoap3Repository(IRepository):
def __init__(self):
super().__init__()
@@ -55,7 +53,7 @@ def copy_file(self, source_bucket, source_key, prefix=None, type=None):
"source_key": source_key,
},
"MetadataDirective": "REPLACE",
- "ACL": "public-read"
+ "ACL": "public-read",
},
)
logger.info(
@@ -67,7 +65,9 @@ def copy_files(self, bucket, files, prefix=None):
copied_files = {}
for type, path in files.items():
try:
- copied_files[type] = self.copy_file(bucket, path, prefix=prefix, type=type)
+ copied_files[type] = self.copy_file(
+ bucket, path, prefix=prefix, type=type
+ )
except Exception as e:
logger.error("Failed to copy file.", error=str(e), type=type, path=path)
return copied_files
diff --git a/dags/common/utils.py b/dags/common/utils.py
index 76f36c2c..71847c1f 100644
--- a/dags/common/utils.py
+++ b/dags/common/utils.py
@@ -1,4 +1,3 @@
-from datetime import date, datetime
import io
import json
import os
@@ -6,10 +5,10 @@
import tarfile
import xml.etree.ElementTree as ET
import zipfile
+from datetime import date, datetime
from ftplib import error_perm
from io import StringIO
from stat import S_ISDIR, S_ISREG
-from inspire_utils.record import get_value
import backoff
import country_converter as coco
@@ -20,7 +19,6 @@
BY_PATTERN,
CDATA_PATTERN,
COUNTRIES_DEFAULT_MAPPING,
- COUNTRY_PARSING_PATTERN,
CREATIVE_COMMONS_PATTERN,
INSTITUTIONS_AND_COUNTRIES_MAPPING,
LICENSE_PATTERN,
@@ -30,11 +28,13 @@
UnknownFileExtension,
UnknownLicense,
)
+from inspire_utils.record import get_value
from structlog import get_logger
logger = get_logger()
cc = coco.CountryConverter()
+
def set_harvesting_interval(repo, **kwargs):
if (
"params" in kwargs
@@ -268,7 +268,7 @@ def iterate_element(item):
iterate_element(item)
title_part = [i for i in title_parts if i]
- full_text = ' '.join(title_part).strip()
+ full_text = " ".join(title_part).strip()
return full_text
@@ -311,10 +311,12 @@ def parse_country_from_value(affiliation_value):
country_code = cc.convert(country, to="iso2")
mapped_countries = []
if country_code != "not found":
- mapped_countries = [{
- "code": country_code,
- "name": cc.convert(country, to="name_short"),
- }]
+ mapped_countries = [
+ {
+ "code": country_code,
+ "name": cc.convert(country, to="name_short"),
+ }
+ ]
if len(mapped_countries) > 1 or len(mapped_countries) == 0:
raise FoundMoreThanOneMatchOrNone(affiliation_value)
diff --git a/dags/elsevier/elsevier_pull_sftp.py b/dags/elsevier/elsevier_pull_sftp.py
index e9b539c1..3a1e4504 100644
--- a/dags/elsevier/elsevier_pull_sftp.py
+++ b/dags/elsevier/elsevier_pull_sftp.py
@@ -23,9 +23,7 @@ def elsevier_pull_sftp():
@task(executor_config=kubernetes_executor_config)
def migrate_from_ftp(
- sftp = ElsevierSFTPService(),
- repo = ElsevierRepository(),
- **kwargs
+ sftp=ElsevierSFTPService(), repo=ElsevierRepository(), **kwargs
):
params = kwargs["params"]
specific_files = (
@@ -44,7 +42,7 @@ def migrate_from_ftp(
@task(executor_config=kubernetes_executor_config)
def trigger_file_processing(
- repo = ElsevierRepository(),
+ repo=ElsevierRepository(),
filenames=None,
):
return trigger_file_processing_elsevier(
diff --git a/dags/executor_config.py b/dags/executor_config.py
index db0feb33..971ddecf 100644
--- a/dags/executor_config.py
+++ b/dags/executor_config.py
@@ -9,9 +9,9 @@
resources=k8s.V1ResourceRequirements(
requests={"memory": "1500Mi"},
limits={"memory": "2Gi"},
- )
+ ),
)
],
)
),
-}
\ No newline at end of file
+}
diff --git a/dags/hindawi/hindawi_api_client.py b/dags/hindawi/hindawi_api_client.py
index 0546e11c..a1883da0 100644
--- a/dags/hindawi/hindawi_api_client.py
+++ b/dags/hindawi/hindawi_api_client.py
@@ -29,7 +29,7 @@ def get_articles_metadata(self, parameters, doi=None):
base_url=self.base_url,
headers={
"Accept": "application/xml",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
},
path_segments=path_segments,
parameters=parameters,
diff --git a/dags/hindawi/hindawi_params.py b/dags/hindawi/hindawi_params.py
index 4d7df7c8..39b0b569 100644
--- a/dags/hindawi/hindawi_params.py
+++ b/dags/hindawi/hindawi_params.py
@@ -4,12 +4,12 @@
class HindawiParams:
def __init__(
self,
- from_date= (date.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
- until_date= date.today().strftime("%Y-%m-%d"),
- verb= "listrecords",
- set= "HINDAWI.AHEP",
- metadataprefix= "marc21",
- record= "",
+ from_date=(date.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
+ until_date=date.today().strftime("%Y-%m-%d"),
+ verb="listrecords",
+ set="HINDAWI.AHEP",
+ metadataprefix="marc21",
+ record="",
):
self.from_date = from_date
self.until_date = until_date
diff --git a/dags/hindawi/parser.py b/dags/hindawi/parser.py
index a3b706f7..a72b9123 100644
--- a/dags/hindawi/parser.py
+++ b/dags/hindawi/parser.py
@@ -216,9 +216,7 @@ def _get_publication_info(self, article):
"journal_volume": journal.find(
"./ns0:subfield/[@code='v']", self.prefixes
).text,
- "year": journal.find(
- "./ns0:subfield/[@code='y']", self.prefixes
- ).text,
+ "year": journal.find("./ns0:subfield/[@code='y']", self.prefixes).text,
}
for journal in journals
]
diff --git a/dags/iop/iop_process_file.py b/dags/iop/iop_process_file.py
index 47a80e3f..62f0084a 100644
--- a/dags/iop/iop_process_file.py
+++ b/dags/iop/iop_process_file.py
@@ -3,16 +3,16 @@
import pendulum
from airflow.decorators import dag, task
+from common.cleanup import (
+ convert_html_italics_to_latex,
+ convert_html_subscripts_to_latex,
+ replace_cdata_format,
+)
from common.enhancer import Enhancer
from common.enricher import Enricher
from common.exceptions import EmptyOutputFromPreviousTask
from common.scoap3_s3 import Scoap3Repository
from common.utils import create_or_update_article, upload_json_to_s3
-from common.cleanup import (
- replace_cdata_format,
- convert_html_subscripts_to_latex,
- convert_html_italics_to_latex,
-)
from inspire_utils.record import get_value
from iop.parser import IOPParser
from iop.repository import IOPRepository
@@ -20,12 +20,14 @@
logger = get_logger()
+
def process_xml(input):
input = convert_html_subscripts_to_latex(input)
input = convert_html_italics_to_latex(input)
input = replace_cdata_format(input)
return input
+
def iop_parse_file(**kwargs):
if "params" not in kwargs or "file" not in kwargs["params"]:
raise Exception("There was no 'file' parameter. Exiting run.")
@@ -33,7 +35,7 @@ def iop_parse_file(**kwargs):
file_name = kwargs["params"]["file_name"]
xml_bytes = base64.b64decode(encoded_xml)
if isinstance(xml_bytes, bytes):
- xml_bytes = xml_bytes.decode('utf-8')
+ xml_bytes = xml_bytes.decode("utf-8")
xml_bytes = process_xml(xml_bytes)
xml = ET.fromstring(xml_bytes)
diff --git a/dags/iop/parser.py b/dags/iop/parser.py
index 6161ac2c..57ba642b 100644
--- a/dags/iop/parser.py
+++ b/dags/iop/parser.py
@@ -17,8 +17,8 @@
extract_text,
get_license_type,
get_license_type_and_version_from_url,
+ parse_country_from_value,
parse_to_int,
- parse_country_from_value
)
from idutils import is_arxiv
from inspire_utils.date import PartialDate
@@ -114,7 +114,7 @@ def __init__(self, file_path=None):
required=True,
all_content_between_tags=True,
source="front/article-meta/title-group/article-title",
- remove_tags=True
+ remove_tags=True,
),
TextExtractor(
destination="subtitle",
@@ -131,7 +131,7 @@ def __init__(self, file_path=None):
source="front/article-meta/abstract/p",
all_content_between_tags=True,
extra_function=lambda x: x,
- remove_tags=True
+ remove_tags=True,
),
CustomExtractor(
destination="files",
diff --git a/dags/oup/oup_pull_ftp.py b/dags/oup/oup_pull_ftp.py
index a8fc9444..8b868f2c 100644
--- a/dags/oup/oup_pull_ftp.py
+++ b/dags/oup/oup_pull_ftp.py
@@ -19,9 +19,7 @@ def oup_pull_ftp():
logger = get_logger().bind(class_name="oup_pull_ftp")
@task()
- def migrate_from_ftp(
- ftp = OUPFTPService(), repo = OUPRepository(), **kwargs
- ):
+ def migrate_from_ftp(ftp=OUPFTPService(), repo=OUPRepository(), **kwargs):
params = kwargs["params"]
specific_files = (
"filenames_pull" in params
@@ -37,7 +35,7 @@ def migrate_from_ftp(
@task()
def trigger_file_processing(
- repo = OUPRepository(),
+ repo=OUPRepository(),
filenames=None,
):
return pull_ftp.trigger_file_processing(
diff --git a/dags/oup/parser.py b/dags/oup/parser.py
index b961dcfb..f0c3f27c 100644
--- a/dags/oup/parser.py
+++ b/dags/oup/parser.py
@@ -163,7 +163,9 @@ def _get_authors(self, article):
)
authors = []
for contribution in contributions:
- orcid = get_text_value(contribution.find("contrib-id[@contrib-id-type='orcid']"))
+ orcid = get_text_value(
+ contribution.find("contrib-id[@contrib-id-type='orcid']")
+ )
surname = get_text_value(contribution.find("name/surname"))
given_names = get_text_value(contribution.find("name/given-names"))
email = get_text_value(contribution.find("email"))
diff --git a/dags/springer/parser.py b/dags/springer/parser.py
index 9fd62a64..64180b46 100644
--- a/dags/springer/parser.py
+++ b/dags/springer/parser.py
@@ -9,7 +9,7 @@
CustomExtractor,
TextExtractor,
)
-from common.utils import construct_license, clean_text
+from common.utils import clean_text, construct_license
from structlog import get_logger
@@ -211,7 +211,11 @@ def _get_affiliations(self, author_group, contrib):
affiliations.append(cleaned_aff)
mapped_affiliations = [
- {"value": clean_text(aff), "organization": clean_text(org), **({"country": country} if country else {})}
+ {
+ "value": clean_text(aff),
+ "organization": clean_text(org),
+ **({"country": country} if country else {}),
+ }
for aff, org, country, in affiliations
]
diff --git a/documentation/Hindawi/hindawi_fields_mapping.md b/documentation/Hindawi/hindawi_fields_mapping.md
index 1824eaaf..a8746b09 100644
--- a/documentation/Hindawi/hindawi_fields_mapping.md
+++ b/documentation/Hindawi/hindawi_fields_mapping.md
@@ -5,16 +5,16 @@
| dois | generic_parsing : [33] | value | |
| arxiv_eprints | enricher : [67] | value | |
| | | categories | |
-| page_nr | parsing : [6] | | |
+| page_nr | parsing : [6] | | |
| authors | parsing : [6]
generic_parsing : [22] | surname | |
| | | given_names | |
| | | full_name | |
| | | affiliations | country |
| | | | institution |
-| collections | parsing [12] | | |
+| collections | parsing [12] | | |
| license | parsing [11] | url | |
| | | license | |
-| publication_info | generic_parsing : [40]] | journal_title | |
+| publication_info | generic_parsing : [40]] | journal_title | |
| | | journal_volume | |
| | | year | |
| abstracts | enhancer : [46] | value | |
@@ -30,8 +30,8 @@
| | | source | |
| $schema | enricher : [66] | | |
-
# [Enricher](#enricher)
+
| | | |
| ------------------------------ | ------------- | ----------------------------------------------------- |
| Reference | Field | Enricher |
@@ -65,24 +65,24 @@
### [\_\_construct_abstracts](#__construct_abstracts)
-| Reference | Subfield | Value |
-| ------------------------------ | -------- | ------------------------------------------------------------------------------ |
+| Reference | Subfield | Value |
+| ------------------------------ | -------- | ---------------------------------------------------------------------------------------------- |
| [53] | value | Take value from generic parsing abstract [23] |
-| [54] | source | Constant: Hindawi |
+| [54] | source | Constant: Hindawi |
### [\_\_construct_acquisition_source](#__construct_acquisition_source)
| Reference | Subfield | Value |
| ------------------------------ | -------- | ------------------------------------------------ |
-| [55] | source | Constant: Hindawi |
-| [56] | method | Constant: Hindawi |
+| [55] | source | Constant: Hindawi |
+| [56] | method | Constant: Hindawi |
| [57] | date | datetime.datetime.now().isoformat()
|
### [\_\_construct_copyright](#__construct_copyright)
-| Reference | Subfield | Value |
-| ------------------------------ | --------- | ----------------------------------------------------------------------------------------- |
-| [58] | year | Take value from parsing copyright_year [10] |
+| Reference | Subfield | Value |
+| ------------------------------ | --------- | --------------------------------------------------------------------------------------- |
+| [58] | year | Take value from parsing copyright_year [10] |
| [59] | statement | Take value from parsing copyright_statement [9] |
### [\_\_construct_imprints](#__construct_imprints)
@@ -107,99 +107,90 @@
### [\_\_remove_country](#__remove_country)
-| | | | |
-| ------------------------------ | ---------------------------------------------------------------------------------------- | ----- | -------------------------------------------- |
-| Reference | Field | Value | Processing |
-| [65] | from parsed affiliation country [55] | | removes county if the value has: |
-
+| | | | |
+| ------------------------------ | ----------------------------------------------------------------------------- | ----- | -------------------------------- |
+| Reference | Field | Value | Processing |
+| [65] | from parsed affiliation country [55] | | removes county if the value has: |
# [Generic parsing](#generic_parsing)
-| Reference | Field | Subfield | Processing | Default value |
-|-----------|------------------------|----------------------|--------------------------------------------------------------------------------------------------------------------------------------|---------------|
+| Reference | Field | Subfield | Processing | Default value |
+| ------------------------------ | ---------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------------- |
| [22] | authors | surname, given_names | takes authors [2] and splits raw_name: if there is a comma, it means that the surname and given_name are in the second part | |
-| [23] | abstract | | takes abstract [3] and cleans white space characters | |
-| [24] | collaborations | | NO SUCH A FIELD IN HINDAWI | |
-| [25] | title | | takes title [4] and cleans white space characters | |
-| [26] | subtitle | | NO SUCH A FIELD IN HINDAWI | |
-| [27] | journal_year | | | |
-| [28] | preprint_date | | NO SUCH A FIELD IN HINDAWI | |
-| [29] | date_published | | takes date_published [5] and forms it f"{tmp_date.year:04d}-{tmp_date.month:02d}-{tmp_date.day:02d}"
| |
-| [30] | related_article_doi | | NO SUCH A FIELD IN HINDAWI | |
-| [31] | free_keywords | | NO SUCH A FIELD IN HINDAWI | |
-| [32] | classification_numbers | | NO SUCH A FIELD IN HINDAWI | |
-| [33] | dois | | takes dois | |
-| [34] | thesis_supervisor | | NO SUCH A FIELD IN HINDAWI | |
-| [35] | thesis | | NO SUCH A FIELD IN HINDAWI | |
-| [36] | urls | | NO SUCH A FIELD IN HINDAWI | |
-| [37] | local_files | | NO SUCH A FIELD IN HINDAWI | |
-| [38] | record_creation_date | | NO SUCH A FIELD IN HINDAWI | |
-| [39] | control_field | | NO SUCH A FIELD IN HINDAWI | |
-| [40] | publication_info | | | |
-| [41] | | journal_title | takes journal title [16] | |
-| [42] | | journal_volume | takes journal volume [17] | |
-| [43] | | journal_year | takes journal year [18] | |
-| [44] | | journal_issue | NO SUCH A FIELD IN HINDAWI | |
-| [45] | | journal_doctype | NO SUCH A FIELD IN HINDAWI | |
-
+| [23] | abstract | | takes abstract [3] and cleans white space characters | |
+| [24] | collaborations | | NO SUCH A FIELD IN HINDAWI | |
+| [25] | title | | takes title [4] and cleans white space characters | |
+| [26] | subtitle | | NO SUCH A FIELD IN HINDAWI | |
+| [27] | journal_year | | | |
+| [28] | preprint_date | | NO SUCH A FIELD IN HINDAWI | |
+| [29] | date_published | | takes date_published [5] and forms it f"{tmp_date.year:04d}-{tmp_date.month:02d}-{tmp_date.day:02d}"
| |
+| [30] | related_article_doi | | NO SUCH A FIELD IN HINDAWI | |
+| [31] | free_keywords | | NO SUCH A FIELD IN HINDAWI | |
+| [32] | classification_numbers | | NO SUCH A FIELD IN HINDAWI | |
+| [33] | dois | | takes dois | |
+| [34] | thesis_supervisor | | NO SUCH A FIELD IN HINDAWI | |
+| [35] | thesis | | NO SUCH A FIELD IN HINDAWI | |
+| [36] | urls | | NO SUCH A FIELD IN HINDAWI | |
+| [37] | local_files | | NO SUCH A FIELD IN HINDAWI | |
+| [38] | record_creation_date | | NO SUCH A FIELD IN HINDAWI | |
+| [39] | control_field | | NO SUCH A FIELD IN HINDAWI | |
+| [40] | publication_info | | | |
+| [41] | | journal_title | takes journal title [16] | |
+| [42] | | journal_volume | takes journal volume [17] | |
+| [43] | | journal_year | takes journal year [18] | |
+| [44] | | journal_issue | NO SUCH A FIELD IN HINDAWI | |
+| [45] | | journal_doctype | NO SUCH A FIELD IN HINDAWI | |
# [Parsing](#parsing)
-
-| Reference | Field | Source | Parsing |
-|-----------|---------------------|-----------------------------------------------------------------------------|------------------------------------------------------------------|
-| [1] | dois | ns0:metadata/ns1:record/ns0:datafield/[@tag='024']/ns0:subfield/[@code='a'] | lambda x: [x]
|
-| [2] | authors | | authors_parsing |
-| [3] | abstract | ns0:metadata/ns1:record/ns0:datafield/[@tag='520']/ns0:subfield/[@code='a'] | lambda x: " ".join(x.split())
|
-| [4] | title | ns0:metadata/ns1:record/ns0:datafield/[@tag='245']/ns0:subfield/[@code='a'] | lambda x: x
|
-| [5] | date_published | ns0:metadata/ns1:record/ns0:datafield/[@tag='260']/ns0:subfield/[@code='c'] | lambda x: x
|
-| [6] | page_nr | ns0:metadata/ns1:record/ns0:datafield/[@tag='300']/ns0:subfield/[@code='a'] | lambda x: [int(x)]
|
-| [7] | publication_info | | _get_publication_info |
-| [8] | arxiv_eprints | | _get_arxiv |
-| [9] | copyright_statement | ns0:metadata/ns1:record/ns0:datafield/[@tag='542']/ns0:subfield/[@code='f'] | |
-| [10] | copyright_year | ns0:metadata/ns1:record/ns0:datafield/[@tag='542']/ns0:subfield/ | re.search(r"[0-9]{4}", value).group(0)
|
-| [11] | license | | _get_license |
-| [12] | collections | | constant: "Advances in High Energy Physics" |
-
+| Reference | Field | Source | Parsing |
+| ------------------------------ | ------------------- | --------------------------------------------------------------------------- | ----------------------------------------------------------- |
+| [1] | dois | ns0:metadata/ns1:record/ns0:datafield/[@tag='024']/ns0:subfield/[@code='a'] | lambda x: [x]
|
+| [2] | authors | | authors_parsing |
+| [3] | abstract | ns0:metadata/ns1:record/ns0:datafield/[@tag='520']/ns0:subfield/[@code='a'] | lambda x: " ".join(x.split())
|
+| [4] | title | ns0:metadata/ns1:record/ns0:datafield/[@tag='245']/ns0:subfield/[@code='a'] | lambda x: x
|
+| [5] | date_published | ns0:metadata/ns1:record/ns0:datafield/[@tag='260']/ns0:subfield/[@code='c'] | lambda x: x
|
+| [6] | page_nr | ns0:metadata/ns1:record/ns0:datafield/[@tag='300']/ns0:subfield/[@code='a'] | lambda x: [int(x)]
|
+| [7] | publication_info | | \_get_publication_info |
+| [8] | arxiv_eprints | | \_get_arxiv |
+| [9] | copyright_statement | ns0:metadata/ns1:record/ns0:datafield/[@tag='542']/ns0:subfield/[@code='f'] | |
+| [10] | copyright_year | ns0:metadata/ns1:record/ns0:datafield/[@tag='542']/ns0:subfield/ | re.search(r"[0-9]{4}", value).group(0)
|
+| [11] | license | | \_get_license |
+| [12] | collections | | constant: "Advances in High Energy Physics" |
### [authors_parsing](#authors_parsing)
-| Reference | Field | Source | Parsing |
-|-----------|--------------|-------------------------------------------------|-----------------------------------------------------|
+| Reference | Field | Source | Parsing |
+| ------------------------------ | ------------ | ----------------------------------------------- | --------------------------------------------------- |
| [13] | raw_name | ns0:subfield[@code='a'] | lambda x: [x]
|
-| [14] | affiliations | | _get_affiliations |
+| [14] | affiliations | | \_get_affiliations |
| [15] | orcid | ns0:subfield[@code='a']/ns0:subfield[@code='j'] | lambda x: " ".join(x.split())
|
+### [\_get_publication_info](#_get_publication_info)
-
-### [_get_publication_info](#_get_publication_info)
-
-| Reference | Field | Source | Parsing |
-|-----------|----------------|-----------------------------------------------------------------------------|---------|
+| Reference | Field | Source | Parsing |
+| ------------------------------ | -------------- | --------------------------------------------------------------------------- | ------- |
| [16] | journal_title | ns0:metadata/ns1:record/ns0:datafield/[@tag='773']/ns0:subfield/[@code='p'] | |
| [17] | journal_volume | ns0:metadata/ns1:record/ns0:datafield/[@tag='773']/ns0:subfield/[@code='v'] | |
| [18] | journal_year | ns0:metadata/ns1:record/ns0:datafield/[@tag='773']/ns0:subfield/[@code='y'] | |
+### [\_get_arxiv](#_get_arxiv)
+| Reference | Field | Source | Parsing |
+| ------------------------------ | ----- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
+| [19] | value | "ns0:metadata/ns1:record/ns0:datafield/[@tag='037']/ns0:subfield/[@code='a']"
if the field above == 'arxiv'
field above:
ns0:metadata/ns1:record/ns0:datafield/[@tag='037']/ns0:subfield/[@code='9'] | Removing "arxiv" from value, leaving just digits |
-### [_get_arxiv](#_get_arxiv)
-
-| Reference | Field | Source | Parsing |
-|-----------|-------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------|
-| [19] | value | "ns0:metadata/ns1:record/ns0:datafield/[@tag='037']/ns0:subfield/[@code='a']"
if the field above == 'arxiv'
field above:
ns0:metadata/ns1:record/ns0:datafield/[@tag='037']/ns0:subfield/[@code='9'] | Removing "arxiv" from value, leaving just digits |
-
-
+### [\_get_license](#_get_arxiv)
-### [_get_license](#_get_arxiv)
-| Reference | Field | Source | Parsing |
-|-----------|---------|---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Reference | Field | Source | Parsing |
+| ------------------------------ | ------- | ------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [20] | url | License urls: "ns0:metadata/ns1:record/ns0:datafield/[@tag='540']/ns0:subfield/[@code='u']" | |
| [21] | license | license text = ns0:metadata/ns1:record/ns0:datafield/[@tag='540']/ns0:subfield/[@code='a'] | url_parts = license_url.text.split("/")
clean_url_parts = list(filter(bool, url_parts))
version = clean_url_parts.pop()
license_type = clean_url_parts.pop()
f"CC-{license_type}-{version}"
|
-### [_get_affiliations](#_get_affiliations)
+### [\_get_affiliations](#_get_affiliations)
-| Reference | Field | Source | Parsing |
-|-----------|--------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------|
-| [53] | value | ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u']
ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u'] | |
-| [54] | organization | same as value: ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u']
ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u'] | takes the string before the last comma |
+| Reference | Field | Source | Parsing |
+| ------------------------------ | ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------- |
+| [53] | value | ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u']
ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u'] | |
+| [54] | organization | same as value: ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u']
ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u'] | takes the string before the last comma |
| [55] | country | same as value: ns0:metadata/ns1:record/ns0:datafield/[@tag='100']/ns0:subfield[@code='u']
ns0:metadata/ns1:record/ns0:datafield/[@tag='700']/ns0:subfield[@code='u'] | takes the last string after comma, which starts with a capital letter |
diff --git a/documentation/IOP/iop_fields_mapping.md b/documentation/IOP/iop_fields_mapping.md
index f713d35b..c67366c0 100644
--- a/documentation/IOP/iop_fields_mapping.md
+++ b/documentation/IOP/iop_fields_mapping.md
@@ -80,8 +80,8 @@
| Reference | Subfield | Value |
| ------------------------------ | -------- | ------------------------------------------------ |
-| [57] | source | Constant: IOP |
-| [58] | method | Constant: IOP |
+| [57] | source | Constant: IOP |
+| [58] | method | Constant: IOP |
| [59] | date | datetime.datetime.now().isoformat()
|
### [\_\_construct_copyright](#__construct_copyright)
diff --git a/requirements-airflow.txt b/requirements-airflow.txt
index eceea4bc..88265bbb 100644
--- a/requirements-airflow.txt
+++ b/requirements-airflow.txt
@@ -1,3 +1,2 @@
-c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.3/constraints-3.10.txt
apache-airflow[celery, postgres, redis, cncf.kubernetes, sentry]==2.8.3
-
diff --git a/scripts/gitlint_rules/rules.py b/scripts/gitlint_rules/rules.py
index 89a7e9fc..5e6790c5 100644
--- a/scripts/gitlint_rules/rules.py
+++ b/scripts/gitlint_rules/rules.py
@@ -1,6 +1,6 @@
-import re
+# import re
-from gitlint.rules import CommitRule, RuleViolation
+# from gitlint.rules import CommitRule, RuleViolation
# EXAMPLE GITLINT CONFIGURATION
diff --git a/tests/integration/iop/test_iop_dag_pull_sftp.py b/tests/integration/iop/test_iop_dag_pull_sftp.py
index 30236786..71dfde84 100644
--- a/tests/integration/iop/test_iop_dag_pull_sftp.py
+++ b/tests/integration/iop/test_iop_dag_pull_sftp.py
@@ -1,10 +1,11 @@
+import time
+
import pytest
from airflow.models import DagBag
from common.pull_ftp import migrate_from_ftp, trigger_file_processing
from iop.repository import IOPRepository
from iop.sftp_service import IOPSFTPService
from structlog import get_logger
-import time
DAG_NAME = "iop_pull_sftp"
@@ -94,7 +95,7 @@ def test_dag_run(dag, dag_was_paused: bool, iop_empty_repo):
def test_dag_migrate_from_FTP(iop_empty_repo):
iop_empty_repo.delete_all()
assert len(iop_empty_repo.find_all()) == 0
-
+
with IOPSFTPService() as sftp:
migrate_from_ftp(
sftp,
@@ -165,12 +166,20 @@ def test_dag_migrate_from_FTP(iop_empty_repo):
assert len(iop_empty_repo.find_all()) == len(expected_files)
- iop_pdf_files = sorted(item["pdf"] for item in iop_empty_repo.find_all() if "pdf" in item)
- expected_pdf_files = sorted(item["pdf"] for item in expected_files if "pdf" in item)
+ iop_pdf_files = sorted(
+ item["pdf"] for item in iop_empty_repo.find_all() if "pdf" in item
+ )
+ expected_pdf_files = sorted(
+ item["pdf"] for item in expected_files if "pdf" in item
+ )
assert iop_pdf_files == expected_pdf_files
- iop_xml_files = sorted(item["xml"] for item in iop_empty_repo.find_all() if "xml" in item)
- expected_xml_files = sorted(item["xml"] for item in expected_files if "xml" in item)
+ iop_xml_files = sorted(
+ item["xml"] for item in iop_empty_repo.find_all() if "xml" in item
+ )
+ expected_xml_files = sorted(
+ item["xml"] for item in expected_files if "xml" in item
+ )
assert iop_xml_files == expected_xml_files
diff --git a/tests/integration/iop/test_repo.py b/tests/integration/iop/test_repo.py
index ae757e72..e9245bc6 100644
--- a/tests/integration/iop/test_repo.py
+++ b/tests/integration/iop/test_repo.py
@@ -1,9 +1,10 @@
+import time
+
from common.pull_ftp import migrate_from_ftp
from iop.repository import IOPRepository
from iop.sftp_service import IOPSFTPService
from pytest import fixture
from structlog import get_logger
-import time
@fixture
@@ -16,7 +17,7 @@ def iop_empty_repo():
def test_pull_from_sftp(iop_empty_repo):
iop_empty_repo.delete_all()
assert len(iop_empty_repo.find_all()) == 0
-
+
with IOPSFTPService() as sftp:
migrate_from_ftp(
sftp,
@@ -49,7 +50,8 @@ def test_pull_from_sftp(iop_empty_repo):
{
"pdf": "extracted/2022-07-30T03_02_01_content/1674-1137/1674-1137_46/1674-1137_46_8/1674-1137_46_8_085106/cpc_46_8_085106.pdf",
"xml": "extracted/2022-07-30T03_02_01_content/1674-1137/1674-1137_46/1674-1137_46_8/1674-1137_46_8_085106/cpc_46_8_085106.xml",
- }, {
+ },
+ {
"pdf": "extracted/2022-09-01T03_01_40_content/1674-1137/1674-1137_46/1674-1137_46_9/1674-1137_46_9_093111/cpc_46_9_093111.pdf",
"xml": "extracted/2022-09-01T03_01_40_content/1674-1137/1674-1137_46/1674-1137_46_9/1674-1137_46_9_093111/cpc_46_9_093111.xml",
},
@@ -82,17 +84,24 @@ def test_pull_from_sftp(iop_empty_repo):
"xml": "extracted/2022-09-24T03_01_43_content/1674-1137/1674-1137_46/1674-1137_46_10/1674-1137_46_10_103108/cpc_46_10_103108.xml",
},
{"xml": "extracted/aca95c/aca95c.xml"},
-
]
-
+
assert len(iop_empty_repo.find_all()) == len(expected_files)
- iop_pdf_files = sorted(item["pdf"] for item in iop_empty_repo.find_all() if "pdf" in item)
- expected_pdf_files = sorted(item["pdf"] for item in expected_files if "pdf" in item)
+ iop_pdf_files = sorted(
+ item["pdf"] for item in iop_empty_repo.find_all() if "pdf" in item
+ )
+ expected_pdf_files = sorted(
+ item["pdf"] for item in expected_files if "pdf" in item
+ )
assert iop_pdf_files == expected_pdf_files
- iop_xml_files = sorted(item["xml"] for item in iop_empty_repo.find_all() if "xml" in item)
- expected_xml_files = sorted(item["xml"] for item in expected_files if "xml" in item)
+ iop_xml_files = sorted(
+ item["xml"] for item in iop_empty_repo.find_all() if "xml" in item
+ )
+ expected_xml_files = sorted(
+ item["xml"] for item in expected_files if "xml" in item
+ )
assert iop_xml_files == expected_xml_files
assert sorted(iop_empty_repo.get_all_raw_filenames()) == sorted(
@@ -103,4 +112,4 @@ def test_pull_from_sftp(iop_empty_repo):
"2022-09-24T03_01_43_content.zip",
"aca95c.zip",
]
- )
\ No newline at end of file
+ )
diff --git a/tests/integration/oup/test_oup_dag_process_file.py b/tests/integration/oup/test_oup_dag_process_file.py
index 60c3937c..0e641e93 100644
--- a/tests/integration/oup/test_oup_dag_process_file.py
+++ b/tests/integration/oup/test_oup_dag_process_file.py
@@ -79,6 +79,7 @@ def test_affiliation_countries_in_enriched(parser, articles):
for aff in author.get("affiliations"):
assert aff.get("country") is not None
+
def test_dag_loaded(dag):
assert dag
assert len(dag.tasks) == 6
diff --git a/tests/units/aps/test_aps_parser.py b/tests/units/aps/test_aps_parser.py
index 987ba470..745abdbc 100644
--- a/tests/units/aps/test_aps_parser.py
+++ b/tests/units/aps/test_aps_parser.py
@@ -61,9 +61,9 @@ def parsed_articles(parser, articles):
"surname": "Wu",
"affiliations": [
{
- "value": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
+ "value": 'Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA',
"organization": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
- "ror": "https://ror.org/02vm5rt34"
+ "ror": "https://ror.org/02vm5rt34",
}
],
},
@@ -73,9 +73,9 @@ def parsed_articles(parser, articles):
"surname": "Turner",
"affiliations": [
{
- "value": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
+ "value": 'Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA',
"organization": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
- "ror": "https://ror.org/02vm5rt34"
+ "ror": "https://ror.org/02vm5rt34",
}
],
},
@@ -85,9 +85,9 @@ def parsed_articles(parser, articles):
"surname": "Wang",
"affiliations": [
{
- "value": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
+ "value": 'Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA',
"organization": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
- "ror": "https://ror.org/02vm5rt34"
+ "ror": "https://ror.org/02vm5rt34",
}
],
},
@@ -97,9 +97,9 @@ def parsed_articles(parser, articles):
"surname": "Borel",
"affiliations": [
{
- "value": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
+ "value": 'Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA',
"organization": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
- "ror": "https://ror.org/02vm5rt34"
+ "ror": "https://ror.org/02vm5rt34",
}
],
},
@@ -111,9 +111,9 @@ def parsed_articles(parser, articles):
"surname": "Boudjada",
"affiliations": [
{
- "value": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
+ "value": 'Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA',
"organization": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
- "ror": "https://ror.org/02vm5rt34"
+ "ror": "https://ror.org/02vm5rt34",
}
],
},
@@ -123,9 +123,9 @@ def parsed_articles(parser, articles):
"surname": "Buessen",
"affiliations": [
{
- "value": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
+ "value": 'Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA',
"organization": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
- "ror": "https://ror.org/02vm5rt34"
+ "ror": "https://ror.org/02vm5rt34",
}
],
},
@@ -135,9 +135,9 @@ def parsed_articles(parser, articles):
"surname": "Paramekanti",
"affiliations": [
{
- "value": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
+ "value": 'Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA',
"organization": "Department of Physics and Astronomy, Vanderbilt University, Nashville, Tennessee 37240, USA",
- "ror": "https://ror.org/02vm5rt34"
+ "ror": "https://ror.org/02vm5rt34",
}
],
},
diff --git a/tests/units/aps/test_trigger_files_processing.py b/tests/units/aps/test_trigger_files_processing.py
index 07192369..e71d7593 100644
--- a/tests/units/aps/test_trigger_files_processing.py
+++ b/tests/units/aps/test_trigger_files_processing.py
@@ -7,6 +7,7 @@
from airflow.models.dagrun import DagRun
from aps.utils import trigger_file_processing_DAG
+
class S3BucketResultObj:
def __init__(self, key):
self.key = key
diff --git a/tests/units/aps/test_utils.py b/tests/units/aps/test_utils.py
index 5b93f4eb..9745f3ff 100644
--- a/tests/units/aps/test_utils.py
+++ b/tests/units/aps/test_utils.py
@@ -61,6 +61,6 @@ def test_save_file_in_s3():
@freeze_time("2023-12-04 10:00")
def test_split_json():
ids_and_articles = split_json(repo=MockedRepo(), key="key/key")
- expected_id = f"APS__2023-12-04T10:00:00.000000+0000"
+ expected_id = "APS__2023-12-04T10:00:00.000000+0000"
assert ids_and_articles[0]["id"] == expected_id
assert len(ids_and_articles) == 1
diff --git a/tests/units/clean/test_clean.py b/tests/units/clean/test_clean.py
index 449b29bc..a24e78eb 100644
--- a/tests/units/clean/test_clean.py
+++ b/tests/units/clean/test_clean.py
@@ -9,7 +9,7 @@
@fixture
def dag():
dagbag = DagBag(dag_folder="dags/", include_examples=False)
- assert dagbag.import_errors.get(f"dags/cleanup_logs.py") is None
+ assert dagbag.import_errors.get("dags/cleanup_logs.py") is None
clean_dag = dagbag.get_dag(dag_id="cleanup_logs")
return clean_dag
diff --git a/tests/units/common/data/file_with_mathML.xml b/tests/units/common/data/file_with_mathML.xml
index 106dd94f..dddc4760 100644
--- a/tests/units/common/data/file_with_mathML.xml
+++ b/tests/units/common/data/file_with_mathML.xml
@@ -1,6 +1,6 @@