diff --git a/scoap3/utils/tests/test_author_export.py b/scoap3/utils/tests/test_author_export.py new file mode 100644 index 000000000..ddf644f0a --- /dev/null +++ b/scoap3/utils/tests/test_author_export.py @@ -0,0 +1,313 @@ +import pytest +from django.test import TestCase + +from scoap3.articles.models import Article, ArticleIdentifier +from scoap3.authors.models import Author +from scoap3.misc.models import Affiliation, Country, PublicationInfo, Publisher +from scoap3.utils.tools import author_export + + +@pytest.mark.django_db +@pytest.mark.vcr +class TestAuthorExport(TestCase): + def setUp(self): + self.publisher_1 = Publisher.objects.create(name="Elsevier") + self.publisher_2 = Publisher.objects.create(name="Springer") + + self.country_gb = Country.objects.create(code="GB", name="United Kingdom") + self.country_fr = Country.objects.create(code="FR", name="France") + self.country_jp = Country.objects.create(code="JP", name="Japan") + self.country_be = Country.objects.create(code="BE", name="Belgium") + self.country_br = Country.objects.create(code="BR", name="Brazil") + self.country_it = Country.objects.create(code="IT", name="Italy") + self.country_es = Country.objects.create(code="ES", name="Spain") + + def create_article( + self, + title, + subtitle, + abstract, + publication_date, + doi_value, + publisher, + journal_title, + author_data, + country, + affiliation_value, + ): + article = Article.objects.create( + title=title, + subtitle=subtitle, + abstract=abstract, + publication_date=publication_date, + ) + + ArticleIdentifier.objects.create( + article_id=article, + identifier_type="DOI", + identifier_value=doi_value, + ) + + PublicationInfo.objects.create( + journal_title=journal_title, + article_id=article, + publisher=publisher, + ) + + author = Author.objects.create(article_id=article, **author_data) + + affiliation = Affiliation.objects.create( + country=country, + value=affiliation_value, + organization="Example Organization", + ) + affiliation.author_id.add(author) + + return article + + def test_author_export_no_data(self): + result = author_export("2024", "IN") + expected_result = { + "header": [ + "year", + "journal", + "doi", + "arxiv number", + "primary arxiv category", + "author", + "country", + "affiliation", + "total number of authors", + ], + "data": [], + } + + assert result == expected_result + + def test_author_export_correct_year_wrong_country(self): + self.create_article( + title="Test Article", + subtitle="Test Subtitle", + abstract="Test Abstract", + publication_date="2024-01-01", + doi_value="TestDOI2", + publisher=self.publisher_1, + journal_title="Adv. High Energy Phys.", + author_data={ + "last_name": "ExampleSurname", + "first_name": "ExampleName", + "email": "ExampleName.ExampleSurname@gmail.com", + "author_order": 100, + }, + country=self.country_fr, + affiliation_value="Example", + ) + + result = author_export("2024", "JP") + expected_result = { + "header": [ + "year", + "journal", + "doi", + "arxiv number", + "primary arxiv category", + "author", + "country", + "affiliation", + "total number of authors", + ], + "data": [], + } + + assert result == expected_result + + def test_author_export_wrong_year_correct_country(self): + self.create_article( + title="Test Article", + subtitle="Test Subtitle", + abstract="Test Abstract", + publication_date="2024-01-01", + doi_value="TestDOI3", + publisher=self.publisher_1, + journal_title="Adv. High Energy Phys.", + author_data={ + "last_name": "ExampleSurname", + "first_name": "ExampleName", + "email": "ExampleName.ExampleSurname@gmail.com", + "author_order": 100, + }, + country=self.country_be, + affiliation_value="Example", + ) + + result = author_export("2023", "BE") + expected_result = { + "header": [ + "year", + "journal", + "doi", + "arxiv number", + "primary arxiv category", + "author", + "country", + "affiliation", + "total number of authors", + ], + "data": [], + } + + assert result == expected_result + + def test_author_export_filtering(self): + self.create_article( + title="Test Article", + subtitle="Test Subtitle", + abstract="Test Abstract", + publication_date="2024-01-01", + doi_value="TestDOI4", + publisher=self.publisher_1, + journal_title="Adv. High Energy Phys.", + author_data={ + "last_name": "ExampleSurname", + "first_name": "ExampleName", + "email": "ExampleName.ExampleSurname@gmail.com", + "author_order": 100, + }, + country=self.country_br, + affiliation_value="Example", + ) + + self.create_article( + title="Test Article 2", + subtitle="Test Subtitle 2", + abstract="Test Abstract 2", + publication_date="2024-02-02", + doi_value="TestDOI5", + publisher=self.publisher_2, + journal_title="Adv. High Energy Phys.", + author_data={ + "last_name": "ExampleSurname2", + "first_name": "ExampleName2", + "email": "ExampleName2.ExampleSurname2@gmail.com", + "author_order": 100, + }, + country=self.country_es, + affiliation_value="Example2", + ) + + result = author_export("2024", "BR") + expected_result = { + "header": [ + "year", + "journal", + "doi", + "arxiv number", + "primary arxiv category", + "author", + "country", + "affiliation", + "total number of authors", + ], + "data": [ + [ + 2024, + "Adv. High Energy Phys.", + "TestDOI4", + None, + None, + "ExampleName ExampleSurname", + "BR", + "Example", + 1, + ] + ], + } + + assert result == expected_result + + def test_author_export_multiple(self): + self.create_article( + title="Test Article", + subtitle="Test Subtitle", + abstract="Test Abstract", + publication_date="2024-01-01", + doi_value="TestDOI6", + publisher=self.publisher_1, + journal_title="Adv. High Energy Phys.", + author_data={ + "last_name": "ExampleSurname", + "first_name": "ExampleName", + "email": "ExampleName.ExampleSurname@gmail.com", + "author_order": 100, + }, + country=self.country_it, + affiliation_value="Example", + ) + + self.create_article( + title="Test Article 2", + subtitle="Test Subtitle 2", + abstract="Test Abstract 2", + publication_date="2024-02-02", + doi_value="TestDOI7", + publisher=self.publisher_2, + journal_title="Adv. High Energy Phys.", + author_data={ + "last_name": "ExampleSurname2", + "first_name": "ExampleName2", + "email": "ExampleName2.ExampleSurname2@gmail.com", + "author_order": 100, + }, + country=self.country_it, + affiliation_value="Example2", + ) + + result = author_export("2024", "IT") + expected_result = { + "header": [ + "year", + "journal", + "doi", + "arxiv number", + "primary arxiv category", + "author", + "country", + "affiliation", + "total number of authors", + ], + "data": [ + [ + 2024, + "Adv. High Energy Phys.", + "TestDOI7", + None, + None, + "ExampleName2 ExampleSurname2", + "IT", + "Example2", + 1, + ], + [ + 2024, + "Adv. High Energy Phys.", + "TestDOI6", + None, + None, + "ExampleName ExampleSurname", + "IT", + "Example", + 1, + ], + ], + } + + assert result == expected_result + + def tearDown(self): + Publisher.objects.all().delete() + ArticleIdentifier.objects.all().delete() + Article.objects.all().delete() + PublicationInfo.objects.all().delete() + Author.objects.all().delete() + Affiliation.objects.all().delete() + Country.objects.all().delete() diff --git a/scoap3/utils/tools.py b/scoap3/utils/tools.py index 91d055a11..7e70750a8 100644 --- a/scoap3/utils/tools.py +++ b/scoap3/utils/tools.py @@ -110,12 +110,22 @@ def author_export(search_year, search_country): if search_year: search = search.filter("match", publication_date=f"{search_year}-01-01||/y") - if search_country: - search = search.filter("term", countries=search_country) + seen_dois = set() for article in search.scan(): + doi = get_first_doi(article) + + if doi in seen_dois or doi is None: + continue + + seen_dois.add(doi) + year = article.publication_date.year - journal = article.publication_info[0].journal_title + journal = ( + article.publication_info[0].journal_title + if article.publication_info + else None + ) doi = get_first_doi(article) arxiv = get_first_arxiv(article) arxiv_category = get_arxiv_primary_category(article) @@ -124,34 +134,29 @@ def author_export(search_year, search_country): missing_author_affiliations = 0 for author in authors: - # if there are no affiliations, we cannot add this author - # (this also means the record is not valid according to the schema) if not author.affiliations: missing_author_affiliations += 1 continue author_first_name = author.get("first_name", "UNKNOWN") author_last_name = author.get("last_name", "UNKNOWN") - # add extracted information to result list for affiliation in author.affiliations: - if not affiliation.country: - aff_country = "UNKNOWN" - else: + if affiliation.country.code == search_country: aff_country = affiliation.country.code - aff_value = affiliation.get("value", "UNKNOWN") - result_data.append( - [ - year, - journal, - doi, - arxiv, - arxiv_category, - author_first_name + " " + author_last_name, - aff_country, - aff_value, - total_authors, - ] - ) + aff_value = affiliation.get("value", "UNKNOWN") + result_data.append( + [ + year, + journal, + doi, + arxiv, + arxiv_category, + author_first_name + " " + author_last_name, + aff_country, + aff_value, + total_authors, + ] + ) if missing_author_affiliations: logger.warn( @@ -159,6 +164,7 @@ def author_export(search_year, search_country): doi, missing_author_affiliations, total_authors ) ) + return {"header": result_headers, "data": result_data}