Global: Changes caused by linting

Signed-off-by: Lorenzo Vagliano
cern-sis · Nov 1, 2024 · 04534da · 04534da
1 parent 4331dbe
commit 04534da
Show file tree

Hide file tree

Showing 58 changed files with 442 additions and 445 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -31,4 +31,3 @@ repos:
         entry: gitlint
         stages: [commit-msg]
         args: [--msg-filename]
-
diff --git a/dags/aps/aps_params.py b/dags/aps/aps_params.py
@@ -4,11 +4,11 @@
 class APSParams:
     def __init__(
         self,
-        from_date= (date.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
-        until_date= date.today().strftime("%Y-%m-%d"),
-        date= "modified",
-        journals= "",
-        set= "scoap3",
+        from_date=(date.today() - timedelta(days=1)).strftime("%Y-%m-%d"),
+        until_date=date.today().strftime("%Y-%m-%d"),
+        date="modified",
+        journals="",
+        set="scoap3",
         per_page: int = 100,
     ):
         self.from_date = from_date

diff --git a/dags/aps/aps_pull_api.py b/dags/aps/aps_pull_api.py
@@ -18,11 +18,11 @@
 )
 def aps_pull_api():
     @task()
-    def set_fetching_intervals(repo = APSRepository(), **kwargs):
+    def set_fetching_intervals(repo=APSRepository(), **kwargs):
         return set_harvesting_interval(repo=repo, **kwargs)
 
     @task()
-    def save_json_in_s3(dates: dict, repo = APSRepository(), **kwargs):
+    def save_json_in_s3(dates: dict, repo=APSRepository(), **kwargs):
         parameters = APSParams(
             from_date=dates["from_date"],
             until_date=dates["until_date"],
@@ -40,7 +40,7 @@ def save_json_in_s3(dates: dict, repo = APSRepository(), **kwargs):
         return None
 
     @task()
-    def trigger_files_processing(key, repo = APSRepository()):
+    def trigger_files_processing(key, repo=APSRepository()):
         if key is None:
             logging.warning("No new files were downloaded to s3")
             return

diff --git a/dags/aps/parser.py b/dags/aps/parser.py
@@ -97,26 +97,27 @@ def _form_authors(self, article):
             if author["type"] == "Person"
         ]
 
-
     def extract_organization_and_ror(self, text):
         pattern = r'<a href="([^"]+)">(.*?)</a>'
-        
+
         ror_url = None
-        
+
         def replace_and_capture(match):
             nonlocal ror_url
             ror_url = match.group(1)
             return match.group(2)
-        
+
         modified_text = re.sub(pattern, replace_and_capture, text)
-        
+
         return modified_text, ror_url
 
     def _get_affiliations(self, article, affiliationIds):
         parsed_affiliations = [
             {
                 "value": affiliation["name"],
-                "organization": self.extract_organization_and_ror(affiliation["name"])[0],
+                "organization": self.extract_organization_and_ror(affiliation["name"])[
+                    0
+                ],
                 "ror": self.extract_organization_and_ror(affiliation["name"])[1],
             }
             for affiliation in article["affiliations"]

diff --git a/dags/aps/repository.py b/dags/aps/repository.py
@@ -1,6 +1,5 @@
 import io
 import os
-from typing import IO
 
 from common.repository import IRepository
 from common.s3_service import S3Service

diff --git a/dags/clean/cleanup_logs.py b/dags/clean/cleanup_logs.py
@@ -2,7 +2,6 @@
 
 import pendulum
 from airflow.decorators import dag
-from airflow.operators.bash import BashOperator
 from airflow.operators.bash_operator import BashOperator
 
 AIRFLOW_HOME = os.getenv("AIRFLOW_HOME")
@@ -17,7 +16,7 @@ def cleanup_logs():
         bash_command=f"""
     logs_dir="{logs_dir}"
     find "$logs_dir" -type d -mtime +30 -exec rm -r {{}} \;
-    """,
+    """,  # noqa
     )
 
 

diff --git a/dags/common/cleanup.py b/dags/common/cleanup.py
@@ -35,7 +35,7 @@ def replace_cdata_format(text):
     CDATA_PATTERN = re.compile(r"<\?CDATA(.*)\?>")
     # pattern = re.compile(r'<\?CDATA\s(.*?)\s\?>', re.DOTALL)
 
-    replaced_text = CDATA_PATTERN.sub(r'<![CDATA[ \1 ]]>', text)
+    replaced_text = CDATA_PATTERN.sub(r"<![CDATA[ \1 ]]>", text)
 
     return replaced_text
 

diff --git a/dags/common/constants.py b/dags/common/constants.py
@@ -91,7 +91,7 @@
         ("Brazil", "Brazil"),
         ("Brasil", "Brazil"),
         ("Benin", "Benin"),
-        (u"Bénin", "Benin"),
+        ("Bénin", "Benin"),
         ("Bulgaria", "Bulgaria"),
         ("Bosnia and Herzegovina", "Bosnia and Herzegovina"),
         ("Canada", "Canada"),
@@ -141,7 +141,7 @@
         ("Luxembourg", "Luxembourg"),
         ("Macedonia", "Macedonia"),
         ("Mexico", "Mexico"),
-        (u"México", "Mexico"),
+        ("México", "Mexico"),
         ("Monaco", "Monaco"),
         ("Montenegro", "Montenegro"),
         ("Morocco", "Morocco"),
@@ -161,7 +161,7 @@
         ("Portugalo", "Portugal"),
         ("Portugal", "Portugal"),
         ("P.R.China", "China"),
-        (u"People’s Republic of China", "China"),
+        ("People’s Republic of China", "China"),
         ("Republic of Belarus", "Belarus"),
         ("Republic of Benin", "Benin"),
         ("Republic of Korea", "South Korea"),
@@ -181,7 +181,7 @@
         ("Slovenia", "Slovenia"),
         ("South Africa", "South Africa"),
         ("Africa", "South Africa"),
-        (u"España", "Spain"),
+        ("España", "Spain"),
         ("Spain", "Spain"),
         ("Sudan", "Sudan"),
         ("Sweden", "Sweden"),
@@ -233,19 +233,21 @@
     ]
 )
 
-INSTITUTIONS_AND_COUNTRIES_MAPPING = OrderedDict([
-    ("INFN", "Italy"),
-    ("European Organization for Nuclear Research", "CERN"),
-    ("Conseil Européen pour la Recherche Nucléaire", "CERN"),
-    ("CERN", "CERN"),
-    ("KEK", "Japan"),
-    ("DESY", "Germany"),
-    ("FERMILAB", "USA"),
-    ("FNAL", "USA"),
-    ("SLACK", "USA"),
-    ("Stanford Linear Accelerator Center", "USA"),
-    ("Joint Institute for Nuclear Research", "JINR"),
-    ("JINR", "JINR"),
-    ("ROC", "Taiwan"),
-    ("R.O.C", "Taiwan"),
-])
+INSTITUTIONS_AND_COUNTRIES_MAPPING = OrderedDict(
+    [
+        ("INFN", "Italy"),
+        ("European Organization for Nuclear Research", "CERN"),
+        ("Conseil Européen pour la Recherche Nucléaire", "CERN"),
+        ("CERN", "CERN"),
+        ("KEK", "Japan"),
+        ("DESY", "Germany"),
+        ("FERMILAB", "USA"),
+        ("FNAL", "USA"),
+        ("SLACK", "USA"),
+        ("Stanford Linear Accelerator Center", "USA"),
+        ("Joint Institute for Nuclear Research", "JINR"),
+        ("JINR", "JINR"),
+        ("ROC", "Taiwan"),
+        ("R.O.C", "Taiwan"),
+    ]
+)
diff --git a/dags/common/enhancer.py b/dags/common/enhancer.py
@@ -2,7 +2,7 @@
 import re
 
 from common.constants import FN_REGEX
-from common.utils import parse_country_from_value, get_country_ISO_name
+from common.utils import get_country_ISO_name, parse_country_from_value
 
 
 class Enhancer:
@@ -47,7 +47,7 @@ def __construct_titles(self, item, publisher):
     def __construct_authors(self, item):
         # add_nations(item)
         pattern_for_cern_cooperation_agreement = re.compile(
-            r'cooperation agreement with cern', re.IGNORECASE
+            r"cooperation agreement with cern", re.IGNORECASE
         )
         for author in item.get("authors", []):
             for affiliation in author.get("affiliations", []):
@@ -65,11 +65,12 @@ def __construct_authors(self, item):
                         affiliation["country"] = _parsed_country
 
                 if affiliation.get("country"):
-                    affiliation["country"] = get_country_ISO_name(affiliation["country"])
+                    affiliation["country"] = get_country_ISO_name(
+                        affiliation["country"]
+                    )
 
         return item
 
-
     def __call__(self, publisher, item):
         creation_date = datetime.datetime.now().isoformat()
         item_copy = item.copy()

diff --git a/dags/common/parsing/xml_extractors.py b/dags/common/parsing/xml_extractors.py
@@ -17,7 +17,7 @@ def __init__(
         extra_function=lambda s: s,
         prefixes=None,
         all_content_between_tags=False,
-        remove_tags=False
+        remove_tags=False,
     ):
         super().__init__(destination)
 
@@ -92,7 +92,7 @@ def __init__(
         default_value=None,
         required=False,
         extra_function=lambda x: x,
-    ) :
+    ):
         super().__init__(destination)
         self.destination = destination
         self.source = source
@@ -132,7 +132,7 @@ def extract(self, article):
 class CustomExtractor(IExtractor):
     def __init__(
         self, destination, extraction_function, required=False, default_value=None
-    ) :
+    ):
         super().__init__(destination)
         self.destination = destination
         self.extraction_function = extraction_function
@@ -154,7 +154,7 @@ def __init__(
         destination,
         value,
         required=False,
-    ) :
+    ):
         super().__init__(destination)
         self.destination = destination
         self.required = required

diff --git a/dags/common/scoap3_s3.py b/dags/common/scoap3_s3.py
@@ -8,21 +8,19 @@
 
 logger = get_logger()
 
-FILE_EXTENSIONS = {
-    "pdf": ".pdf",
-    "xml": ".xml",
-    "pdfa": ".pdf"
-}
+FILE_EXTENSIONS = {"pdf": ".pdf", "xml": ".xml", "pdfa": ".pdf"}
+
 
 def update_filename_extension(filename, type):
     extension = FILE_EXTENSIONS.get(type, "")
     if filename.endswith(extension):
         return filename
     elif extension:
         if type == "pdfa":
-            extension = f".a-2b.pdf"
+            extension = ".a-2b.pdf"
         return f"{filename}{extension}"
 
+
 class Scoap3Repository(IRepository):
     def __init__(self):
         super().__init__()
@@ -55,7 +53,7 @@ def copy_file(self, source_bucket, source_key, prefix=None, type=None):
                     "source_key": source_key,
                 },
                 "MetadataDirective": "REPLACE",
-                "ACL": "public-read"
+                "ACL": "public-read",
             },
         )
         logger.info(
@@ -67,7 +65,9 @@ def copy_files(self, bucket, files, prefix=None):
         copied_files = {}
         for type, path in files.items():
             try:
-                copied_files[type] = self.copy_file(bucket, path, prefix=prefix, type=type)
+                copied_files[type] = self.copy_file(
+                    bucket, path, prefix=prefix, type=type
+                )
             except Exception as e:
                 logger.error("Failed to copy file.", error=str(e), type=type, path=path)
         return copied_files

diff --git a/dags/common/utils.py b/dags/common/utils.py
@@ -1,15 +1,14 @@
-from datetime import date, datetime
 import io
 import json
 import os
 import re
 import tarfile
 import xml.etree.ElementTree as ET
 import zipfile
+from datetime import date, datetime
 from ftplib import error_perm
 from io import StringIO
 from stat import S_ISDIR, S_ISREG
-from inspire_utils.record import get_value
 
 import backoff
 import country_converter as coco
@@ -20,7 +19,6 @@
     BY_PATTERN,
     CDATA_PATTERN,
     COUNTRIES_DEFAULT_MAPPING,
-    COUNTRY_PARSING_PATTERN,
     CREATIVE_COMMONS_PATTERN,
     INSTITUTIONS_AND_COUNTRIES_MAPPING,
     LICENSE_PATTERN,
@@ -30,11 +28,13 @@
     UnknownFileExtension,
     UnknownLicense,
 )
+from inspire_utils.record import get_value
 from structlog import get_logger
 
 logger = get_logger()
 cc = coco.CountryConverter()
 
+
 def set_harvesting_interval(repo, **kwargs):
     if (
         "params" in kwargs
@@ -268,7 +268,7 @@ def iterate_element(item):
     iterate_element(item)
 
     title_part = [i for i in title_parts if i]
-    full_text = ' '.join(title_part).strip()
+    full_text = " ".join(title_part).strip()
 
     return full_text
 
@@ -311,10 +311,12 @@ def parse_country_from_value(affiliation_value):
         country_code = cc.convert(country, to="iso2")
         mapped_countries = []
         if country_code != "not found":
-            mapped_countries = [{
-                "code": country_code,
-                "name": cc.convert(country, to="name_short"),
-            }]
+            mapped_countries = [
+                {
+                    "code": country_code,
+                    "name": cc.convert(country, to="name_short"),
+                }
+            ]
 
         if len(mapped_countries) > 1 or len(mapped_countries) == 0:
             raise FoundMoreThanOneMatchOrNone(affiliation_value)

diff --git a/dags/elsevier/elsevier_pull_sftp.py b/dags/elsevier/elsevier_pull_sftp.py
@@ -23,9 +23,7 @@ def elsevier_pull_sftp():
 
     @task(executor_config=kubernetes_executor_config)
     def migrate_from_ftp(
-        sftp = ElsevierSFTPService(),
-        repo = ElsevierRepository(),
-        **kwargs
+        sftp=ElsevierSFTPService(), repo=ElsevierRepository(), **kwargs
     ):
         params = kwargs["params"]
         specific_files = (
@@ -44,7 +42,7 @@ def migrate_from_ftp(
 
     @task(executor_config=kubernetes_executor_config)
     def trigger_file_processing(
-        repo = ElsevierRepository(),
+        repo=ElsevierRepository(),
         filenames=None,
     ):
         return trigger_file_processing_elsevier(

diff --git a/dags/executor_config.py b/dags/executor_config.py
@@ -9,9 +9,9 @@
                     resources=k8s.V1ResourceRequirements(
                         requests={"memory": "1500Mi"},
                         limits={"memory": "2Gi"},
-                    )
+                    ),
                 )
             ],
         )
     ),
-}
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -31,4 +31,3 @@ repos:
		entry: gitlint
		stages: [commit-msg]
		args: [--msg-filename]