Skip to content

Commit

Permalink
hindawi: update files download url
Browse files Browse the repository at this point in the history
Signed-off-by: pamfilos <[email protected]>
  • Loading branch information
pamfilos committed Aug 28, 2024
1 parent 55f43c7 commit c195c61
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 8 deletions.
9 changes: 4 additions & 5 deletions dags/hindawi/hindawi_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ def __init__(
"HINDAWI_API_FILES_URL", "http://downloads.hindawi.com"
)
self.logger = get_logger().bind(class_name=type(self).__name__)
self.headers = {
"Accept": "application/xml",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
}

def get_articles_metadata(self, parameters, doi=None):
path_segments = ["oai-pmh", "oai.aspx"]
Expand All @@ -31,7 +27,10 @@ def get_articles_metadata(self, parameters, doi=None):
path_segments.append(doi)
request = Request(
base_url=self.base_url,
headers=self.headers,
headers={
"Accept": "application/xml",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
},
path_segments=path_segments,
parameters=parameters,
)
Expand Down
6 changes: 3 additions & 3 deletions dags/hindawi/hindawi_file_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def populate_files(parsed_file):
logger.info("Populating files", doi=doi)
doi_part = doi.split("10.1155/")[1]
files = {
"pdf": f"http://downloads.hindawi.com/journals/ahep/{doi_part}.pdf",
"pdfa": f"http://downloads.hindawi.com/journals/ahep/{doi_part}.a.pdf",
"xml": f"http://downloads.hindawi.com/journals/ahep/{doi_part}.xml",
"pdf": f"https://s3.amazonaws.com/downloads.hindawi.com/journals/ahep/{doi_part}.pdf",
"pdfa": f"https://s3.amazonaws.com/downloads.hindawi.com/journals/ahep/{doi_part}.a.pdf",
"xml": f"https://s3.amazonaws.com/downloads.hindawi.com/journals/ahep/{doi_part}.xml",
}
s3_scoap3_client = Scoap3Repository()
downloaded_files = s3_scoap3_client.download_files(files, prefix=doi)
Expand Down

0 comments on commit c195c61

Please sign in to comment.