Skip to content

Commit

Permalink
Limit number of requests sent to OpenAlex in tests (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
PeterLombaers authored Jul 25, 2024
1 parent d71440c commit d1d78e2
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 17 deletions.
4 changes: 2 additions & 2 deletions tests/demo_data/snowballing_doi.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
,title,doi,included
0,"Social Networks Analysis: Tools, Measures and Visualization",https://doi.org/10.1007/978-1-4471-4054-2_1,1
1,"Genome-wide Association Study of Alcohol Dependence",https://doi.org/10.1001/archgenpsychiatry.2009.83,0
0,"Myrmecochorous plants in Australia and their dispersal by ants",https://doi.org/10.1071/bt9750475,1
1,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",https://doi.org/10.1007/bf00699039,0
4 changes: 2 additions & 2 deletions tests/demo_data/snowballing_openalex.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
,openalex_id,title,included
0,https://openalex.org/W2234238252,"Social Networks Analysis: Tools, Measures and Visualization",1
1,https://openalex.org/W1977467968,"Genome-wide Association Study of Alcohol Dependence",0
0,https://openalex.org/W2051970045,"Myrmecochorous plants in Australia and their dispersal by ants",1
1,https://openalex.org/W104454400,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",0
60 changes: 47 additions & 13 deletions tests/test_snowball.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,45 @@
from pathlib import Path

import pandas as pd
import pyalex

from asreviewcontrib.datatools.snowball import backward_snowballing
from asreviewcontrib.datatools.snowball import forward_snowballing
from asreviewcontrib.datatools.snowball import openalex_from_doi
from asreviewcontrib.datatools.snowball import snowball

INPUT_DIR = Path(__file__).parent / "demo_data"
EMAIL = "[email protected]"

pyalex.config.email = EMAIL

# These works were chosen for testing forward snowballing.
# They have a DOI, they cite and are cited by, their cited_by_count is less than 400,
# so it takes only two requests to get all citing works. And they are from the previous
# century so the cited_by_count is unlikely to change very much.
# These are also the same records as in the demo datasets 'snowballing_doi.csv' and
# 'snowballing_openalex.csv'.
WORKS = [
{
"id": "https://openalex.org/W2051970045",
"doi": "https://doi.org/10.1071/bt9750475",
"title": "Myrmecochorous plants in Australia and their dispersal by ants",
"cited_by_count": 372,
"cited_by": "https://openalex.org/W2174650845",
"cites": "https://openalex.org/W1538725992",
},
{
"id": "https://openalex.org/W104454400",
"doi": "https://doi.org/10.1007/bf00699039",
"title": (
"Mimicking the one-dimensional marginal distributions of processes having"
" an ito differential"
),
"cited_by_count": 299,
"cited_by": "https://openalex.org/W1842249978",
"cites": "https://openalex.org/W1513091520",
},
]


def test_openalex_from_doi():
Expand Down Expand Up @@ -41,32 +73,30 @@ def test_backward_snowballing():


def test_forward_snowballing():
identifiers = [
"https://openalex.org/W4281483266",
"https://openalex.org/W2008620264",
]
identifiers = [work["id"] for work in WORKS]

forwards_citations = forward_snowballing(identifiers)

assert "https://openalex.org/W4386305682" in [
assert WORKS[0]["cited_by"] in [
field_dict["id"] for field_dict in forwards_citations[identifiers[0]]
]
assert "https://openalex.org/W2124637492" in [
assert WORKS[1]["cited_by"] in [
field_dict["id"] for field_dict in forwards_citations[identifiers[1]]
]


def test_openalex_id_forward(tmpdir):
out_fp = Path(tmpdir, "forward_all.csv")
out_fp = Path(tmpdir, "forward.csv")
snowball(
input_path=INPUT_DIR / "snowballing_openalex.csv",
output_path=out_fp,
forward=True,
backward=False,
use_all=False,
email=EMAIL,
)
df = pd.read_csv(out_fp)
assert len(df) >= 23
assert len(df) >= 364

all_out_fp = Path(tmpdir, "forward_all.csv")
snowball(
Expand All @@ -75,22 +105,24 @@ def test_openalex_id_forward(tmpdir):
forward=True,
backward=False,
use_all=True,
email=EMAIL,
)
df_all = pd.read_csv(all_out_fp)
assert len(df_all) >= 387
assert len(df_all) >= 656


def test_openalex_id_backward(tmpdir):
out_fp = Path(tmpdir, "forward_all.csv")
out_fp = Path(tmpdir, "backward.csv")
snowball(
input_path=INPUT_DIR / "snowballing_openalex.csv",
output_path=out_fp,
forward=False,
backward=True,
use_all=False,
email=EMAIL,
)
df = pd.read_csv(out_fp)
assert len(df) == 31
assert len(df) == 40

all_out_fp = Path(tmpdir, "backward_all.csv")
snowball(
Expand All @@ -99,9 +131,10 @@ def test_openalex_id_backward(tmpdir):
forward=False,
backward=True,
use_all=True,
email=EMAIL,
)
df_all = pd.read_csv(all_out_fp)
assert len(df_all) == 117
assert len(df_all) == 45


def test_snowballing_from_doi(tmpdir):
Expand All @@ -112,6 +145,7 @@ def test_snowballing_from_doi(tmpdir):
forward=False,
backward=True,
use_all=True,
email=EMAIL,
)
df = pd.read_csv(out_fp)
assert len(df) == 117
assert len(df) == 45

0 comments on commit d1d78e2

Please sign in to comment.