Skip to content

Commit

Permalink
HindawiAPIClient:added curl headers
Browse files Browse the repository at this point in the history
  • Loading branch information
ErnestaP authored and pamfilos committed Aug 2, 2024
1 parent 7584af2 commit 150dd5d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
5 changes: 5 additions & 0 deletions dags/hindawi/hindawi_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ def __init__(
"HINDAWI_API_FILES_URL", "http://downloads.hindawi.com"
)
self.logger = get_logger().bind(class_name=type(self).__name__)
self.headers = {
"Accept": "application/xml",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
}

def get_articles_metadata(self, parameters, doi=None):
path_segments = ["oai-pmh", "oai.aspx"]
Expand All @@ -27,6 +31,7 @@ def get_articles_metadata(self, parameters, doi=None):
path_segments.append(doi)
request = Request(
base_url=self.base_url,
headers=self.headers,
path_segments=path_segments,
parameters=parameters,
)
Expand Down
8 changes: 4 additions & 4 deletions dags/hindawi/hindawi_pull_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@
)
def hindawi_pull_api():
@task()
def set_fetching_intervals(repo= HindawiRepository(), **kwargs):
def set_fetching_intervals(repo=HindawiRepository(), **kwargs):
return set_harvesting_interval(repo=repo, **kwargs)

@task()
def save_xml_in_s3(dates: dict, repo= HindawiRepository(), **kwargs):
def save_xml_in_s3(dates: dict, repo=HindawiRepository(), **kwargs):
record = kwargs["params"]["record_doi"]
parameters = HindawiParams(
from_date=dates["from_date"], until_date=dates["until_date"], record=record
).get_params()
rest_api = HindawiApiClient(
base_url=os.getenv("HINDAWI_API_BASE_URL", "https://www.hindawi.com")
base_url=os.getenv("HINDAWI_API_BASE_URL", "https://oaipmh.hindawi.com")
)
articles_metadata = rest_api.get_articles_metadata(parameters)
if not articles_metadata:
Expand All @@ -36,7 +36,7 @@ def save_xml_in_s3(dates: dict, repo= HindawiRepository(), **kwargs):
return save_file_in_s3(data=articles_metadata, repo=repo)

@task()
def trigger_files_processing(key, repo= HindawiRepository()):
def trigger_files_processing(key, repo=HindawiRepository()):
if not key:
logging.warning("No new files were downloaded to s3")
return
Expand Down

0 comments on commit 150dd5d

Please sign in to comment.