common(formating): fix linting, formatting errors

Signed-off-by: pamfilos <[email protected]>
cern-sis · Dec 5, 2024 · 0f07a99 · 0f07a99
1 parent 109223c
commit 0f07a99
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 26 deletions.
diff --git a/dags/common/cleanup.py b/dags/common/cleanup.py
@@ -15,28 +15,27 @@ def convert_html_subscripts_to_latex(input):
     input = re.sub("<sup>(.*?)</sup>", r"$^{\1}$", input)
     return input
 
+
 def clean_inline_expressions(input):
     input = re.sub(
-        r"<InlineEquation.*?>(.*?)</InlineEquation>",
-        r"\1",
-        input,
-        flags=re.DOTALL
+        r"<InlineEquation.*?>(.*?)</InlineEquation>", r"\1", input, flags=re.DOTALL
     )
     input = re.sub(
         r"<EquationSource Format=\"TEX\"><!\[CDATA\[(.*?)\]\]></EquationSource>",
         r"\1",
-        input
+        input,
     )
     input = re.sub(
         r"<EquationSource Format=\"MATHML\">.*?</EquationSource>",
         "",
         input,
-        flags=re.DOTALL
+        flags=re.DOTALL,
     )
-    input = input.replace('\n', '').replace('\r', '')
+    input = input.replace("\n", "").replace("\r", "")
 
     return input
 
+
 def convert_html_italics_to_latex(input):
     input = re.sub(r"<italic\b[^>]*>(.*?)</italic>", r"$\\textit{\1}$", input)
     return input

diff --git a/dags/springer/springer_process_file.py b/dags/springer/springer_process_file.py
@@ -4,18 +4,18 @@
 import pendulum
 import requests
 from airflow.decorators import dag, task
+from common.cleanup import (
+    clean_inline_expressions,
+    clean_whitespace_characters,
+    convert_html_italics_to_latex,
+    convert_html_subscripts_to_latex,
+    replace_cdata_format,
+)
 from common.enhancer import Enhancer
 from common.enricher import Enricher
 from common.exceptions import EmptyOutputFromPreviousTask
 from common.scoap3_s3 import Scoap3Repository
 from common.utils import create_or_update_article, upload_json_to_s3
-from common.cleanup import (
-    replace_cdata_format,
-    convert_html_subscripts_to_latex,
-    convert_html_italics_to_latex,
-    clean_whitespace_characters,
-    clean_inline_expressions,
-)
 from inspire_utils.record import get_value
 from jsonschema import validate
 from springer.parser import SpringerParser
@@ -24,22 +24,24 @@
 
 logger = get_logger()
 
+
 def process_xml(input):
     input = convert_html_subscripts_to_latex(input)
     input = convert_html_italics_to_latex(input)
     input = replace_cdata_format(input)
     input = clean_inline_expressions(input)
-    input = input.replace('\n', '').replace('\r', '').lstrip().rstrip()
+    input = input.replace("\n", "").replace("\r", "").lstrip().rstrip()
     input = clean_whitespace_characters(input.strip())
     return input
 
+
 def springer_parse_file(**kwargs):
     if "params" in kwargs and "file" in kwargs["params"]:
         encoded_xml = kwargs["params"]["file"]
         file_name = kwargs["params"]["file_name"]
         xml_bytes = base64.b64decode(encoded_xml)
         if isinstance(xml_bytes, bytes):
-            xml_bytes = xml_bytes.decode('utf-8')
+            xml_bytes = xml_bytes.decode("utf-8")
         xml_bytes = process_xml(xml_bytes)
         xml = ET.fromstring(xml_bytes.decode("utf-8"))
 

diff --git a/tests/units/aps/test_aps_parser.py b/tests/units/aps/test_aps_parser.py
@@ -1,8 +1,8 @@
 import json
 
 import pytest
-from aps.parser import APSParser
 from aps.aps_process_file import enhance_aps
+from aps.parser import APSParser
 
 
 @pytest.fixture(scope="module")
@@ -203,6 +203,7 @@ def test_aps_parsing(parsed_articles, expected, key):
         assert key in article
         assert article[key] == expected_value
 
+
 def test_aps_country_parsing(parsed_articles):
     for article in parsed_articles:
         enhance_aps(article)
diff --git a/tests/units/springer/test_parser.py b/tests/units/springer/test_parser.py
@@ -30,15 +30,17 @@ def parsed_articles(parser, articles):
 
 def test_weird_titles(parsed_articles):
     parsed_titles = sorted([a.get("title") for a in parsed_articles])
-    expected_results = sorted([
-        " $$(g-2)_{e,\\mu }$$ anomalies and decays $$h\\rightarrow e_a e_b$$ , "
+    expected_results = sorted(
+        [
+            " $$(g-2)_{e,\\mu }$$ anomalies and decays $$h\\rightarrow e_a e_b$$ , "
             "$$Z\\rightarrow e_ae_b$$ , and $$e_b\\rightarrow e_a \\gamma $$ in a two "
             "Higgs doublet model with inverse seesaw neutrinos",
-        " $$\\Lambda $$ polarization in very high energy heavy ion collisions as a probe of the quark–gluon plasma formation and properties",
-        "A strategy for a general search for new phenomena using data-derived signal regions and its application within the ATLAS experiment",
-        "Revisiting the mechanical properties of the nucleon",
-        "Symmetry breaking in quantum curves and super Chern-Simons matrix models"
-    ])
+            " $$\\Lambda $$ polarization in very high energy heavy ion collisions as a probe of the quark–gluon plasma formation and properties",
+            "A strategy for a general search for new phenomena using data-derived signal regions and its application within the ATLAS experiment",
+            "Revisiting the mechanical properties of the nucleon",
+            "Symmetry breaking in quantum curves and super Chern-Simons matrix models",
+        ]
+    )
 
     assert expected_results == parsed_titles
 
@@ -351,7 +353,7 @@ def test_abstract(parsed_articles):
         "experimental data of $$(g-2)_{e,\\mu }$$ as well as the "
         "promising LFV signals corresponding to the future experimental "
         "sensitivities.",
-        None
+        None,
     )
     for abstract, article in zip(abstracts, parsed_articles):
         if abstract is None:

diff --git a/tests/units/springer/test_parser/weird.title.Meta b/tests/units/springer/test_parser/weird.title.Meta
@@ -220,4 +220,4 @@
          </Issue>
       </Volume>
    </Journal>
-</Publisher>
+</Publisher>