Skip to content

Commit

Permalink
OA: fix URLS and precommit setup
Browse files Browse the repository at this point in the history
  • Loading branch information
ErnestaP committed Apr 9, 2024
1 parent 9fa7191 commit 0666039
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 14 deletions.
7 changes: 5 additions & 2 deletions dags/open_access/open_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@ def oa_dag():
@task()
def fetch_data_task(query, **kwargs):
year = kwargs["params"].get("year")
base_query = f"(affiliation:CERN+or+595:'For+annual+report')+\
and+year:{year}+not+980:ConferencePaper+not+980:BookChapter"
base_query = (
r"(affiliation:CERN+or+595:'For+annual+report')"
+ rf"and+year:{year}+not+980:ConferencePaper+"
+ r"not+980:BookChapter"
)
type_of_query = [*query][0]
url = utils.get_url(f"{base_query}+{query[type_of_query]}")
data = utils.get_data(url)
Expand Down
29 changes: 17 additions & 12 deletions dags/open_access/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@


def get_url(query, current_collection="Published+Articles"):
url = f"https://cds.cern.ch/search?ln=en&cc={current_collection}&p={query}\
&action_search=Search&op1=a&m1=a&p1=&f1=&c=\
Published+Articles&c=&sf=&so=d&rm=&rg=100&sc=0&of=xm"
url = (
rf"https://cds.cern.ch/search?ln=en&cc={current_collection}&p={query}"
+ r"&action_search=Search&op1=a&m1=a&p1=&f1=&c="
+ r"Published+Articles&c=&sf=&so=d&rm=&rg=100&sc=0&of=xm"
)
return url


def get_total_results_count(data):
TOTAL_RECORDS_COUNT = re.compile(
r"Search-Engine-Total-Number-Of-Results\
:\s(\d*)\s"
r"Search-Engine-Total-Number-Of-Results" + r":\s(\d*)\s"
)
comment_line = data.split("\n")[1]
match = TOTAL_RECORDS_COUNT.search(comment_line)
Expand All @@ -25,13 +26,17 @@ def get_total_results_count(data):
return 0


closed_access_query = "not+540__a:'CC+BY'+not+540__a:'CC-BY'+\
not+540__f:Bronze+not+540__3:preprint"
bronze_access_query = "540__f:'Bronze'"
green_access_query = "not+540__a:'CC+BY'+not+540__a:'CC-BY'+not+540__a:\
'arXiv+nonexclusive-distrib'+not+540__f:'Bronze'"
gold_access_query = "540__3:'publication'+and+\
(540__a:'CC-BY'+OR++540__a:'CC+BY')"
closed_access_query = (
r"not+540__a:'CC+BY'+not+540__a:'CC-BY'+" + r"not+540__f:Bronze+not+540__3:preprint"
)
bronze_access_query = r"540__f:'Bronze'"
green_access_query = (
r"not+540__a:'CC+BY'+not+540__a:'CC-BY'+not+540__a:"
+ r"'arXiv+nonexclusive-distrib'+not+540__f:'Bronze'"
)
gold_access_query = (
r"540__3:'publication'+and+" + r"(540__a:'CC-BY'+OR++540__a:'CC+BY')"
)


@backoff.on_exception(
Expand Down
8 changes: 8 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[flake8]
max-line-length = 120

[pycodestyle]
max-line-length = 120

[isort]
line_length = 88

0 comments on commit 0666039

Please sign in to comment.