From d1d78e292e9b6719b97b9a7229ce11f69498f6ac Mon Sep 17 00:00:00 2001 From: PeterLombaers <71253799+PeterLombaers@users.noreply.github.com> Date: Thu, 25 Jul 2024 13:28:39 +0200 Subject: [PATCH] Limit number of requests sent to OpenAlex in tests (#44) --- tests/demo_data/snowballing_doi.csv | 4 +- tests/demo_data/snowballing_openalex.csv | 4 +- tests/test_snowball.py | 60 +++++++++++++++++++----- 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/tests/demo_data/snowballing_doi.csv b/tests/demo_data/snowballing_doi.csv index b9b24cf..abfc959 100644 --- a/tests/demo_data/snowballing_doi.csv +++ b/tests/demo_data/snowballing_doi.csv @@ -1,3 +1,3 @@ ,title,doi,included -0,"Social Networks Analysis: Tools, Measures and Visualization",https://doi.org/10.1007/978-1-4471-4054-2_1,1 -1,"Genome-wide Association Study of Alcohol Dependence",https://doi.org/10.1001/archgenpsychiatry.2009.83,0 +0,"Myrmecochorous plants in Australia and their dispersal by ants",https://doi.org/10.1071/bt9750475,1 +1,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",https://doi.org/10.1007/bf00699039,0 diff --git a/tests/demo_data/snowballing_openalex.csv b/tests/demo_data/snowballing_openalex.csv index 2a30570..253cf37 100644 --- a/tests/demo_data/snowballing_openalex.csv +++ b/tests/demo_data/snowballing_openalex.csv @@ -1,3 +1,3 @@ ,openalex_id,title,included -0,https://openalex.org/W2234238252,"Social Networks Analysis: Tools, Measures and Visualization",1 -1,https://openalex.org/W1977467968,"Genome-wide Association Study of Alcohol Dependence",0 +0,https://openalex.org/W2051970045,"Myrmecochorous plants in Australia and their dispersal by ants",1 +1,https://openalex.org/W104454400,"Mimicking the one-dimensional marginal distributions of processes having an ito differential",0 diff --git a/tests/test_snowball.py b/tests/test_snowball.py index eb840f8..596e4b0 100644 --- a/tests/test_snowball.py +++ b/tests/test_snowball.py @@ -1,6 +1,7 @@ from pathlib import Path import pandas as pd +import pyalex from asreviewcontrib.datatools.snowball import backward_snowballing from asreviewcontrib.datatools.snowball import forward_snowballing @@ -8,6 +9,37 @@ from asreviewcontrib.datatools.snowball import snowball INPUT_DIR = Path(__file__).parent / "demo_data" +EMAIL = "asreview@uu.nl" + +pyalex.config.email = EMAIL + +# These works were chosen for testing forward snowballing. +# They have a DOI, they cite and are cited by, their cited_by_count is less than 400, +# so it takes only two requests to get all citing works. And they are from the previous +# century so the cited_by_count is unlikely to change very much. +# These are also the same records as in the demo datasets 'snowballing_doi.csv' and +# 'snowballing_openalex.csv'. +WORKS = [ + { + "id": "https://openalex.org/W2051970045", + "doi": "https://doi.org/10.1071/bt9750475", + "title": "Myrmecochorous plants in Australia and their dispersal by ants", + "cited_by_count": 372, + "cited_by": "https://openalex.org/W2174650845", + "cites": "https://openalex.org/W1538725992", + }, + { + "id": "https://openalex.org/W104454400", + "doi": "https://doi.org/10.1007/bf00699039", + "title": ( + "Mimicking the one-dimensional marginal distributions of processes having" + " an ito differential" + ), + "cited_by_count": 299, + "cited_by": "https://openalex.org/W1842249978", + "cites": "https://openalex.org/W1513091520", + }, +] def test_openalex_from_doi(): @@ -41,32 +73,30 @@ def test_backward_snowballing(): def test_forward_snowballing(): - identifiers = [ - "https://openalex.org/W4281483266", - "https://openalex.org/W2008620264", - ] + identifiers = [work["id"] for work in WORKS] forwards_citations = forward_snowballing(identifiers) - assert "https://openalex.org/W4386305682" in [ + assert WORKS[0]["cited_by"] in [ field_dict["id"] for field_dict in forwards_citations[identifiers[0]] ] - assert "https://openalex.org/W2124637492" in [ + assert WORKS[1]["cited_by"] in [ field_dict["id"] for field_dict in forwards_citations[identifiers[1]] ] def test_openalex_id_forward(tmpdir): - out_fp = Path(tmpdir, "forward_all.csv") + out_fp = Path(tmpdir, "forward.csv") snowball( input_path=INPUT_DIR / "snowballing_openalex.csv", output_path=out_fp, forward=True, backward=False, use_all=False, + email=EMAIL, ) df = pd.read_csv(out_fp) - assert len(df) >= 23 + assert len(df) >= 364 all_out_fp = Path(tmpdir, "forward_all.csv") snowball( @@ -75,22 +105,24 @@ def test_openalex_id_forward(tmpdir): forward=True, backward=False, use_all=True, + email=EMAIL, ) df_all = pd.read_csv(all_out_fp) - assert len(df_all) >= 387 + assert len(df_all) >= 656 def test_openalex_id_backward(tmpdir): - out_fp = Path(tmpdir, "forward_all.csv") + out_fp = Path(tmpdir, "backward.csv") snowball( input_path=INPUT_DIR / "snowballing_openalex.csv", output_path=out_fp, forward=False, backward=True, use_all=False, + email=EMAIL, ) df = pd.read_csv(out_fp) - assert len(df) == 31 + assert len(df) == 40 all_out_fp = Path(tmpdir, "backward_all.csv") snowball( @@ -99,9 +131,10 @@ def test_openalex_id_backward(tmpdir): forward=False, backward=True, use_all=True, + email=EMAIL, ) df_all = pd.read_csv(all_out_fp) - assert len(df_all) == 117 + assert len(df_all) == 45 def test_snowballing_from_doi(tmpdir): @@ -112,6 +145,7 @@ def test_snowballing_from_doi(tmpdir): forward=False, backward=True, use_all=True, + email=EMAIL, ) df = pd.read_csv(out_fp) - assert len(df) == 117 + assert len(df) == 45