From adabf6d69defdfb4998e252352dd553bf9329635 Mon Sep 17 00:00:00 2001 From: ErnestaP Date: Thu, 22 Feb 2024 09:09:53 +0100 Subject: [PATCH] Update existing record --- scoap3/articles/api/views.py | 1 - .../tests/data/record_failing_on_airflow.json | 116 ++++++++++++++++++ .../articles/tests/data/workflow_record.json | 1 - scoap3/articles/tests/test_article_views.py | 53 +++++++- scoap3/exports/views.py | 1 - scoap3/tasks.py | 47 +++++-- 6 files changed, 202 insertions(+), 17 deletions(-) create mode 100644 scoap3/articles/tests/data/record_failing_on_airflow.json diff --git a/scoap3/articles/api/views.py b/scoap3/articles/api/views.py index a4dab0355..26cf8d436 100644 --- a/scoap3/articles/api/views.py +++ b/scoap3/articles/api/views.py @@ -63,7 +63,6 @@ def create(self, request, *args, **kwargs): return Response( serializer.data, status=status.HTTP_201_CREATED, headers=headers ) - article_id = data.get("id") class ArticleWorkflowImportView(ViewSet): diff --git a/scoap3/articles/tests/data/record_failing_on_airflow.json b/scoap3/articles/tests/data/record_failing_on_airflow.json new file mode 100644 index 000000000..f1f2940c2 --- /dev/null +++ b/scoap3/articles/tests/data/record_failing_on_airflow.json @@ -0,0 +1,116 @@ +{ + "dois":[ + { + "value":"10.1103/PhysRevD.109.046003" + } + ], + "page_nr":[ + 17 + ], + "arxiv_eprints":[ + { + "value":"2312.06750", + "categories":[ + "hep-th", + "gr-qc", + "math-ph", + "math.MP" + ] + } + ], + "authors":[ + { + "full_name":"Chatwin-Davies, Aidan", + "given_names":"Aidan", + "surname":"Chatwin-Davies", + "affiliations":[ + { + "value":"Okinawa Institute of Science and Technology, 1919-1 Tancha, Onna-son, Kunigami-gun, Okinawa, 904-0495, Japan", + "organization":"Okinawa Institute of Science and Technology, 1919-1 Tancha, Onna-son, Kunigami-gun, Okinawa, 904-0495", + "country":"Japan" + } + ] + }, + { + "full_name":"Leung, Pompey", + "given_names":"Pompey", + "surname":"Leung", + "affiliations":[ + { + "value":"Department of Physics and Astronomy, University of British Columbia, 6224 Agricultural Road, Vancouver, British Columbia, V6T 1Z1, Canada", + "organization":"Department of Physics and Astronomy, University of British Columbia, 6224 Agricultural Road, Vancouver, British Columbia, V6T 1Z1", + "country":"Canada" + } + ] + }, + { + "full_name":"Remmen, Grant N.", + "given_names":"Grant N.", + "surname":"Remmen", + "affiliations":[ + { + "value":"Center for Cosmology and Particle Physics, Department of Physics, New York University, New York, New York, 10003, USA", + "organization":"Center for Cosmology and Particle Physics, Department of Physics, New York University, New York, New York, 10003", + "country":"USA" + } + ] + } + ], + "license":[ + { + "url":"https://creativecommons.org/licenses/by/4.0/", + "license":"CC-BY-4.0" + } + ], + "collections":[ + { + "primary":"HEP" + }, + { + "primary":"Citeable" + }, + { + "primary":"Published" + } + ], + "publication_info":[ + { + "journal_title":"Physical Review D", + "journal_volume":"109", + "year":2024, + "journal_issue":"4", + "material":"article" + } + ], + "abstracts":[ + { + "value":"

Holographic screens are codimension-one hypersurfaces that extend the notion of apparent horizons to general (non-black hole) spacetimes and that display interesting thermodynamic properties. We show that if a spacetime contains a codimension-two, boundary-homologous, minimal extremal spacelike surface X (known as an HRT surface in AdS/CFT), then any holographic screens are sequestered to the causal wedges of X. That is, any single connected component of a holographic screen can be located in at most one of the causal future, causal past, inner wedge, or outer wedge of X. We comment on how this result informs possible coarse grained entropic interpretations of generic holographic screens, as well as on connections to semiclassical objects such as quantum extremal surfaces.

", + "source":"APS" + } + ], + "acquisition_source":{ + "source":"APS", + "method":"APS", + "date":"2024-02-08T00:00:16.954712" + }, + "copyright":[ + { + "year":2024, + "statement":"Published by the American Physical Society" + } + ], + "imprints":[ + { + "date":"2024-02-07", + "publisher":"APS" + } + ], + "record_creation_date":"2024-02-08T00:00:16.954712", + "titles":[ + { + "title":"Holographic screen sequestration", + "source":"APS" + } + ], + "$schema":"https://repo.qa.scoap3.org/schemas/hep.json" + } diff --git a/scoap3/articles/tests/data/workflow_record.json b/scoap3/articles/tests/data/workflow_record.json index 72add9369..7be41ce54 100644 --- a/scoap3/articles/tests/data/workflow_record.json +++ b/scoap3/articles/tests/data/workflow_record.json @@ -10,7 +10,6 @@ "statement": "The Author" } ], - "control_number": "81204", "_oai": { "updated": "2023-10-31T08:21:45Z", "id": "oai:repo.scoap3.org:81204", diff --git a/scoap3/articles/tests/test_article_views.py b/scoap3/articles/tests/test_article_views.py index cb79be127..a1e344951 100644 --- a/scoap3/articles/tests/test_article_views.py +++ b/scoap3/articles/tests/test_article_views.py @@ -1,10 +1,14 @@ import json +import os +from pathlib import Path import pytest +from django.test import TestCase from django.urls import reverse from rest_framework import status -from scoap3.articles.models import Article +from scoap3.articles.models import Article, ArticleIdentifier +from scoap3.users.tests.factories import UserFactory pytestmark = pytest.mark.django_db @@ -53,7 +57,6 @@ def test_update_article_from_workflow(self, client, user, shared_datadir): ) data["titles"][0]["title"] = "New title" - data["dois"].append({"value": "10.5506/APhysPolB.54.10-A5"}) response = client.post( reverse("api:article-workflow-import-list"), data, @@ -69,11 +72,13 @@ def test_update_article_from_workflow(self, client, user, shared_datadir): ] assert article.title == "New title" - assert len(expected_dois) == 2 - assert "10.5506/APhysPolB.54.10-A5" in expected_dois + assert len(expected_dois) == 1 assert "10.5506/APhysPolB.54.10-A3" in expected_dois +pytestmark = pytest.mark.django_db + + class TestArticleIdentifierViewSet: def test_get_article_identifier(self, client): url = reverse("api:articleidentifier-list") @@ -83,3 +88,43 @@ def test_get_article_identifier(self, client): url = reverse("api:articleidentifier-detail", kwargs={"pk": 0}) response = client.get(url) assert response.status_code == status.HTTP_404_NOT_FOUND + + +pytestmark = pytest.mark.django_db + + +class TestTheSameArticleCreationTwice(TestCase): + @staticmethod + def shared_datadir(): + return os.path.join( + Path(__file__).parent.resolve(), "data", "record_failing_on_airflow.json" + ) + + def test_create_article_from_workflow(self): + user = UserFactory() + self.client.force_login(user) + with open(self.shared_datadir()) as file: + contents = file.read() + data = json.loads(contents) + response = self.client.post( + reverse("api:article-workflow-import-list"), + data, + content_type="application/json", + ) + assert response.status_code == status.HTTP_200_OK + + response = self.client.post( + reverse("api:article-workflow-import-list"), + data, + content_type="application/json", + ) + assert response.status_code == status.HTTP_200_OK + doi = data.get("dois")[0].get("value") + assert ( + len( + ArticleIdentifier.objects.filter( + identifier_type="DOI", identifier_value=doi + ) + ) + == 1 + ) diff --git a/scoap3/exports/views.py b/scoap3/exports/views.py index 4c655a180..318327619 100644 --- a/scoap3/exports/views.py +++ b/scoap3/exports/views.py @@ -21,7 +21,6 @@ def generate_csv_response(data, action_name, write_header=True): writer = csv.writer(response) if write_header: writer.writerow(data.get("header")) - print(data) for row in data.get("data", []): writer.writerow(row) diff --git a/scoap3/tasks.py b/scoap3/tasks.py index e8e7a3ea1..a9e7bee9d 100644 --- a/scoap3/tasks.py +++ b/scoap3/tasks.py @@ -71,17 +71,21 @@ def _create_licenses(data): def _create_article(data, licenses): article_data = { - "id": data.get("control_number"), "publication_date": data["imprints"][0].get("date"), "title": data["titles"][0].get("title"), "subtitle": data["titles"][0].get("subtitle", ""), "abstract": data["abstracts"][0].get("value", ""), } + doi = data.get("dois")[0].get("value") if ( - article_data.get("id") - and Article.objects.filter(pk=article_data["id"]).exists() + doi + and ArticleIdentifier.objects.filter( + identifier_type="DOI", identifier_value=doi + ).exists() ): - article = Article.objects.get(pk=article_data["id"]) + article = ArticleIdentifier.objects.get( + identifier_type="DOI", identifier_value=doi + ).article_id article.__dict__.update(**article_data) else: article = Article.objects.create(**article_data) @@ -110,14 +114,22 @@ def _create_article_identifier(data, article): "identifier_value": doi.get("value"), } ArticleIdentifier.objects.get_or_create(**article_identifier_data) - for arxiv in data.get("arxiv_eprints", []): article_identifier_data = { "article_id": article, "identifier_type": "arXiv", "identifier_value": arxiv.get("value"), } - ArticleIdentifier.objects.get_or_create(**article_identifier_data) + doi = data.get("dois")[0].get("value") + if ArticleIdentifier.objects.filter( + identifier_type="arXiv", article_id=article + ).exists(): + article_identifier = ArticleIdentifier.objects.get( + article_id=article, identifier_type="arXiv" + ) + article_identifier.__dict__.update(**article_identifier_data) + else: + ArticleIdentifier.objects.get_or_create(**article_identifier_data) def _create_copyright(data, article): @@ -128,7 +140,11 @@ def _create_copyright(data, article): "holder": copyright.get("holder", ""), "year": copyright.get("year"), } - Copyright.objects.get_or_create(**copyright_data) + if Copyright.objects.filter(article_id=article).exists(): + copyright = Copyright.objects.get(article_id=article) + copyright.__dict__.update(**copyright_data) + else: + Copyright.objects.get_or_create(**copyright_data) def _create_article_arxiv_category(data, article): @@ -141,8 +157,15 @@ def _create_article_arxiv_category(data, article): "category": arxiv_category, "primary": True if idx == 0 else False, } - - ArticleArxivCategory.objects.get_or_create(**article_arxiv_category_data) + if ArticleArxivCategory.objects.filter(article_id=article).exists(): + article_arxiv_category = ArticleArxivCategory.objects.get( + article_id=article + ) + article_arxiv_category.__dict__.update(**article_arxiv_category_data) + else: + ArticleArxivCategory.objects.get_or_create( + **article_arxiv_category_data + ) def _create_publisher(data): @@ -170,7 +193,11 @@ def _create_publication_info(data, article, publishers): "journal_issue_date": publication_info.get("journal_issue_date"), "publisher_id": publishers[idx].id, } - PublicationInfo.objects.get_or_create(**publication_info_data) + if PublicationInfo.objects.filter(article_id=article).exists(): + publication_info = PublicationInfo.objects.get(article_id=article) + publication_info.__dict__.update(**publication_info_data) + else: + PublicationInfo.objects.get_or_create(**publication_info_data) def _create_experimental_collaborations(data):