Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Limit number of requests sent to OpenAlex in tests #44

Merged
merged 1 commit into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/demo_data/snowballing_doi.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
,title,doi,included
0,"Social Networks Analysis: Tools, Measures and Visualization",https://doi.org/10.1007/978-1-4471-4054-2_1,1
1,"Genome-wide Association Study of Alcohol Dependence",https://doi.org/10.1001/archgenpsychiatry.2009.83,0
0,"Myrmecochorous plants in Australia and their dispersal by ants",https://doi.org/10.1071/bt9750475,1
1,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",https://doi.org/10.1007/bf00699039,0
4 changes: 2 additions & 2 deletions tests/demo_data/snowballing_openalex.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
,openalex_id,title,included
0,https://openalex.org/W2234238252,"Social Networks Analysis: Tools, Measures and Visualization",1
1,https://openalex.org/W1977467968,"Genome-wide Association Study of Alcohol Dependence",0
0,https://openalex.org/W2051970045,"Myrmecochorous plants in Australia and their dispersal by ants",1
1,https://openalex.org/W104454400,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",0
60 changes: 47 additions & 13 deletions tests/test_snowball.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,45 @@
from pathlib import Path

import pandas as pd
import pyalex

from asreviewcontrib.datatools.snowball import backward_snowballing
from asreviewcontrib.datatools.snowball import forward_snowballing
from asreviewcontrib.datatools.snowball import openalex_from_doi
from asreviewcontrib.datatools.snowball import snowball

INPUT_DIR = Path(__file__).parent / "demo_data"
EMAIL = "[email protected]"

pyalex.config.email = EMAIL

# These works were chosen for testing forward snowballing.
# They have a DOI, they cite and are cited by, their cited_by_count is less than 400,
# so it takes only two requests to get all citing works. And they are from the previous
# century so the cited_by_count is unlikely to change very much.
# These are also the same records as in the demo datasets 'snowballing_doi.csv' and
# 'snowballing_openalex.csv'.
WORKS = [
{
"id": "https://openalex.org/W2051970045",
"doi": "https://doi.org/10.1071/bt9750475",
"title": "Myrmecochorous plants in Australia and their dispersal by ants",
"cited_by_count": 372,
"cited_by": "https://openalex.org/W2174650845",
"cites": "https://openalex.org/W1538725992",
},
{
"id": "https://openalex.org/W104454400",
"doi": "https://doi.org/10.1007/bf00699039",
"title": (
"Mimicking the one-dimensional marginal distributions of processes having"
" an ito differential"
),
"cited_by_count": 299,
"cited_by": "https://openalex.org/W1842249978",
"cites": "https://openalex.org/W1513091520",
},
]


def test_openalex_from_doi():
Expand Down Expand Up @@ -41,32 +73,30 @@ def test_backward_snowballing():


def test_forward_snowballing():
identifiers = [
"https://openalex.org/W4281483266",
"https://openalex.org/W2008620264",
]
identifiers = [work["id"] for work in WORKS]

forwards_citations = forward_snowballing(identifiers)

assert "https://openalex.org/W4386305682" in [
assert WORKS[0]["cited_by"] in [
field_dict["id"] for field_dict in forwards_citations[identifiers[0]]
]
assert "https://openalex.org/W2124637492" in [
assert WORKS[1]["cited_by"] in [
field_dict["id"] for field_dict in forwards_citations[identifiers[1]]
]


def test_openalex_id_forward(tmpdir):
out_fp = Path(tmpdir, "forward_all.csv")
out_fp = Path(tmpdir, "forward.csv")
snowball(
input_path=INPUT_DIR / "snowballing_openalex.csv",
output_path=out_fp,
forward=True,
backward=False,
use_all=False,
email=EMAIL,
)
df = pd.read_csv(out_fp)
assert len(df) >= 23
assert len(df) >= 364

all_out_fp = Path(tmpdir, "forward_all.csv")
snowball(
Expand All @@ -75,22 +105,24 @@ def test_openalex_id_forward(tmpdir):
forward=True,
backward=False,
use_all=True,
email=EMAIL,
)
df_all = pd.read_csv(all_out_fp)
assert len(df_all) >= 387
assert len(df_all) >= 656


def test_openalex_id_backward(tmpdir):
out_fp = Path(tmpdir, "forward_all.csv")
out_fp = Path(tmpdir, "backward.csv")
snowball(
input_path=INPUT_DIR / "snowballing_openalex.csv",
output_path=out_fp,
forward=False,
backward=True,
use_all=False,
email=EMAIL,
)
df = pd.read_csv(out_fp)
assert len(df) == 31
assert len(df) == 40

all_out_fp = Path(tmpdir, "backward_all.csv")
snowball(
Expand All @@ -99,9 +131,10 @@ def test_openalex_id_backward(tmpdir):
forward=False,
backward=True,
use_all=True,
email=EMAIL,
)
df_all = pd.read_csv(all_out_fp)
assert len(df_all) == 117
assert len(df_all) == 45


def test_snowballing_from_doi(tmpdir):
Expand All @@ -112,6 +145,7 @@ def test_snowballing_from_doi(tmpdir):
forward=False,
backward=True,
use_all=True,
email=EMAIL,
)
df = pd.read_csv(out_fp)
assert len(df) == 117
assert len(df) == 45
Loading