diff --git a/dags/open_access/open_access.py b/dags/open_access/open_access.py index 79e4d61..f9bf788 100644 --- a/dags/open_access/open_access.py +++ b/dags/open_access/open_access.py @@ -26,12 +26,11 @@ def fetch_data_task(query, **kwargs): r"(affiliation:CERN+or+595:'For+annual+report')" + rf"and+year:{year}+not+980:ConferencePaper+" + r"not+980:BookChapter" - + rf"&apikey={cds_token}" - if cds_token - else "" ) type_of_query = [*query][0] - url = utils.get_url(f"{base_query}+{query[type_of_query]}") + url = utils.get_url( + query=f"{base_query}+{query[type_of_query]}", cds_token=cds_token + ) data = utils.request_again_if_failed(url) total = utils.get_total_results_count(data.text) if type_of_query == "gold": diff --git a/dags/open_access/utils.py b/dags/open_access/utils.py index dd6ecdb..3b1c4c9 100644 --- a/dags/open_access/utils.py +++ b/dags/open_access/utils.py @@ -51,3 +51,13 @@ def get_gold_access_count(total, url): get_golden_access_records_ids(response.text) ) return records_ids_count + + +def get_url(query, current_collection="Published+Articles", cds_token=None): + url = ( + rf"https://cds.cern.ch/search?ln=en&cc={current_collection}&p={query}" + + r"&action_search=Search&op1=a&m1=a&p1=&f1=&c=" + + r"Published+Articles&c=&sf=&so=d&rm=&rg=100&sc=0&of=xm" + ) + url = url + (rf"&apikey={cds_token}" if cds_token else "") + return url