From 150dd5d945b11636b6f705a9b52ccdf67de5a5d5 Mon Sep 17 00:00:00 2001
From: ErnestaP <ernesta.petraityte@yahoo.com>
Date: Wed, 10 Jul 2024 13:55:21 +0200
Subject: [PATCH] HindawiAPIClient:added curl headers

---
 dags/hindawi/hindawi_api_client.py | 5 +++++
 dags/hindawi/hindawi_pull_api.py   | 8 ++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/dags/hindawi/hindawi_api_client.py b/dags/hindawi/hindawi_api_client.py
index d5de2f8d..67003043 100644
--- a/dags/hindawi/hindawi_api_client.py
+++ b/dags/hindawi/hindawi_api_client.py
@@ -19,6 +19,10 @@ def __init__(
             "HINDAWI_API_FILES_URL", "http://downloads.hindawi.com"
         )
         self.logger = get_logger().bind(class_name=type(self).__name__)
+        self.headers = {
+            "Accept": "application/xml",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
+        }
 
     def get_articles_metadata(self, parameters, doi=None):
         path_segments = ["oai-pmh", "oai.aspx"]
@@ -27,6 +31,7 @@ def get_articles_metadata(self, parameters, doi=None):
             path_segments.append(doi)
         request = Request(
             base_url=self.base_url,
+            headers=self.headers,
             path_segments=path_segments,
             parameters=parameters,
         )
diff --git a/dags/hindawi/hindawi_pull_api.py b/dags/hindawi/hindawi_pull_api.py
index 84b7a22f..75ee10f2 100644
--- a/dags/hindawi/hindawi_pull_api.py
+++ b/dags/hindawi/hindawi_pull_api.py
@@ -17,17 +17,17 @@
 )
 def hindawi_pull_api():
     @task()
-    def set_fetching_intervals(repo= HindawiRepository(), **kwargs):
+    def set_fetching_intervals(repo=HindawiRepository(), **kwargs):
         return set_harvesting_interval(repo=repo, **kwargs)
 
     @task()
-    def save_xml_in_s3(dates: dict, repo= HindawiRepository(), **kwargs):
+    def save_xml_in_s3(dates: dict, repo=HindawiRepository(), **kwargs):
         record = kwargs["params"]["record_doi"]
         parameters = HindawiParams(
             from_date=dates["from_date"], until_date=dates["until_date"], record=record
         ).get_params()
         rest_api = HindawiApiClient(
-            base_url=os.getenv("HINDAWI_API_BASE_URL", "https://www.hindawi.com")
+            base_url=os.getenv("HINDAWI_API_BASE_URL", "https://oaipmh.hindawi.com")
         )
         articles_metadata = rest_api.get_articles_metadata(parameters)
         if not articles_metadata:
@@ -36,7 +36,7 @@ def save_xml_in_s3(dates: dict, repo= HindawiRepository(), **kwargs):
         return save_file_in_s3(data=articles_metadata, repo=repo)
 
     @task()
-    def trigger_files_processing(key, repo= HindawiRepository()):
+    def trigger_files_processing(key, repo=HindawiRepository()):
         if not key:
             logging.warning("No new files were downloaded to s3")
             return