From b08217dc8347acb9f997dea251e211f260ec687c Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Mon, 6 Nov 2023 09:02:50 +0000 Subject: [PATCH 1/9] put a fix to prevent consent rt queues from being created --- emgena/serializers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/emgena/serializers.py b/emgena/serializers.py index 43d87eafc..6fa036d09 100644 --- a/emgena/serializers.py +++ b/emgena/serializers.py @@ -92,7 +92,6 @@ def create(self, validated_data): n = ena_models.Notify(**validated_data) emg_queue = settings.RT["emg_queue"] - ena_queue = settings.RT["ena_queue"] ticket = { "Requestor": n.from_email, @@ -104,7 +103,7 @@ def create(self, validated_data): ticket["Cc"] = n.cc if n.is_consent: - ticket["Queue"] = ena_queue + return 403 else: ticket["Queue"] = emg_queue From 0883da0b5901d762e955d8b7a8280276ef9320be Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Mon, 6 Nov 2023 09:46:03 +0000 Subject: [PATCH 2/9] Got rid of consent notification test as it is no longer required --- tests/webuploader/test_notify.py | 36 -------------------------------- 1 file changed, 36 deletions(-) diff --git a/tests/webuploader/test_notify.py b/tests/webuploader/test_notify.py index 762f89816..7e6384427 100644 --- a/tests/webuploader/test_notify.py +++ b/tests/webuploader/test_notify.py @@ -70,42 +70,6 @@ def test_notify_not_consent(self): HTTP_AUTHORIZATION='Bearer {}'.format(self.get_token()) ) - assert len(responses.calls) == 1 - call = responses.calls[0] - assert call.request.url == settings.RT["url"] - assert call.request.body == json.dumps(expected_body) - - @responses.activate - @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") - def test_notify_consent(self): - """Test notify endpoint for consent requests - """ - expected_body = { - "Requestor": "fake@email.com", - "Priority": "4", - "Subject": "Test email subject", - "Content": "Hi this is just an example", - "Queue": settings.RT["ena_queue"], - } - - responses.add( - responses.POST, - settings.RT["url"], - status=200) - - post_data = { - "from_email": "fake@email.com", - "subject": "Test email subject", - "message": "Hi this is just an example", - "is_consent": True - } - - self.client.post( - reverse('emgapi_v1:csrf-notify'), - data=post_data, format='json', - HTTP_AUTHORIZATION='Bearer {}'.format(self.get_token()) - ) - assert len(responses.calls) == 1 call = responses.calls[0] assert call.request.url == settings.RT["url"] From 8d9a9497386ce39cf5136b3765d05a5d854e16b4 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Fri, 29 Sep 2023 13:43:03 +0100 Subject: [PATCH 3/9] Truncate the fields in the Publication model if longer than expected. I've increased the max len of the pub_type in the Publication model. To prevent the import to fail in the future I've implemented a brute truncation mechanism when a Publication is saved. --- emgapi/mgx.py | 0 emgapi/models.py | 13 ++- .../management/commands/import_publication.py | 35 ++++---- .../europe_pmc_api/europe_pmc_api_handler.py | 4 +- tests/webuploader/test_import_publication.py | 89 +++++++++++++------ 5 files changed, 93 insertions(+), 48 deletions(-) create mode 100644 emgapi/mgx.py diff --git a/emgapi/mgx.py b/emgapi/mgx.py new file mode 100644 index 000000000..e69de29bb diff --git a/emgapi/models.py b/emgapi/models.py index 1faf824ea..1cf7f0321 100644 --- a/emgapi/models.py +++ b/emgapi/models.py @@ -502,7 +502,7 @@ class Publication(models.Model): db_column='PUBLISHED_YEAR', blank=True, null=True, help_text='Published year') pub_type = models.CharField( - db_column='PUB_TYPE', max_length=150, blank=True, null=True) + db_column='PUB_TYPE', max_length=300, blank=True, null=True) objects = PublicationManager() @@ -510,6 +510,17 @@ class Meta: db_table = 'PUBLICATION' ordering = ('pubmed_id',) + def save(self, *args, **kwargs): + for field in self._meta.fields: + if isinstance(field, models.TextField) or isinstance(field, models.CharField): + field_name = field.name + max_length = field.max_length + field_value = getattr(self, field_name) + if field_value and len(field_value) > max_length: + logger.error(f"Publication field {field_name} content was truncated at {max_length}") + setattr(self, field_name, field_value[:max_length]) + super(Publication, self).save(*args, **kwargs) + def __str__(self): return str(self.pubmed_id) diff --git a/emgapianns/management/commands/import_publication.py b/emgapianns/management/commands/import_publication.py index 3bc8d52c0..55d064d39 100644 --- a/emgapianns/management/commands/import_publication.py +++ b/emgapianns/management/commands/import_publication.py @@ -16,7 +16,9 @@ import logging from django.core.management import BaseCommand from emgapi import models as emg_models -from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import EuropePMCApiHandler +from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import ( + EuropePMCApiHandler, +) logger = logging.getLogger(__name__) @@ -29,15 +31,17 @@ def lookup_publication_by_pubmed_id(pubmed_id): def update_or_create_publication(publication): return emg_models.Publication.objects.update_or_create( pubmed_id=publication.pmid, - defaults={'authors': publication.author_string, - 'doi': publication.doi, - 'isbn': publication.journal_issn, - 'iso_journal': publication.journal_title, - 'pub_title': publication.title, - 'raw_pages': publication.page_info, - 'volume': publication.journal_volume, - 'published_year': publication.pub_year, - 'pub_type': publication.pub_type}, + defaults={ + "authors": publication.author_string, + "doi": publication.doi, + "isbn": publication.journal_issn, + "iso_journal": publication.journal_title, + "pub_title": publication.title, + "raw_pages": publication.page_info, + "volume": publication.journal_volume, + "published_year": publication.pub_year, + "pub_type": publication.pub_type, + }, ) @@ -47,19 +51,18 @@ def lookup_publication_by_project_id(project_id): class Command(BaseCommand): - help = 'Creates or updates a publication in EMG.' + help = "Creates or updates a publication in EMG." def add_arguments(self, parser): # TODO: Consider lookup by project id - parser.add_argument('pubmed-id', - help='PubMed identifier (PMID)', - type=int, - action='store') + parser.add_argument( + "pubmed-id", help="PubMed identifier (PMID)", type=int, action="store" + ) def handle(self, *args, **options): logger.info("CLI %r" % options) - pubmed_id = options['pubmed-id'] + pubmed_id = options["pubmed-id"] publications = lookup_publication_by_pubmed_id(pubmed_id) for publication in publications: update_or_create_publication(publication) diff --git a/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py b/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py index 7c0a5b13e..cacfb308b 100644 --- a/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py +++ b/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright 2019-2022 EMBL - European Bioinformatics Institute +# Copyright 2019-2023 EMBL - European Bioinformatics Institute # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ def get_default_connection_headers(): } -class Publication(object): +class Publication: def __init__( self, pub_year, diff --git a/tests/webuploader/test_import_publication.py b/tests/webuploader/test_import_publication.py index 29531aaf8..13bb9ff40 100644 --- a/tests/webuploader/test_import_publication.py +++ b/tests/webuploader/test_import_publication.py @@ -16,28 +16,39 @@ import pytest -from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id - - -@pytest.mark.parametrize("pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", [ - (4838818, - "Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.", - 1974, - "Floyd K, Linden RJ, Saunders DA.", - "n/a"), - (31138692, - "Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse " - "model of infant undernutrition.", - 2019, - "Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D," - " Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.", - "10.1073/pnas.1821770116") -]) -def test_lookup_publication_by_pubmed_id_should_return(pubmed_id, - expected_pub_title, - expected_year_of_pub, - expected_authors, - expected_doi): +from emgapi.models import Publication +from model_bakery import baker + + +from emgapianns.management.commands.import_publication import ( + lookup_publication_by_pubmed_id, +) + + +@pytest.mark.parametrize( + "pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", + [ + ( + 4838818, + "Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.", + 1974, + "Floyd K, Linden RJ, Saunders DA.", + "n/a", + ), + ( + 31138692, + "Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse " + "model of infant undernutrition.", + 2019, + "Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D," + " Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.", + "10.1073/pnas.1821770116", + ), + ], +) +def test_lookup_publication_by_pubmed_id_should_return( + pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi +): publications = lookup_publication_by_pubmed_id(pubmed_id) assert len(publications) == 1 @@ -49,18 +60,38 @@ def test_lookup_publication_by_pubmed_id_should_return(pubmed_id, assert publication.doi == expected_doi -@pytest.mark.parametrize("pubmed_id", [ - (0), - (000) -]) +@pytest.mark.parametrize("pubmed_id", [(0), (000)]) def test_lookup_publication_by_pubmed_id_(pubmed_id): with pytest.raises(ValueError): lookup_publication_by_pubmed_id(pubmed_id) -@pytest.mark.parametrize("pubmed_id", [ - ("test") -]) +@pytest.mark.parametrize("pubmed_id", [("test")]) def test_lookup_publication_by_pubmed_id_raises_exception_on_string(pubmed_id): with pytest.raises(TypeError): lookup_publication_by_pubmed_id(pubmed_id) + + +@pytest.mark.django_db +def test_text_fields_longer_than_expected(faker): + PUB_TITLE_MAX = 740 + PUB_TYPE_MAX = 300 + VOLUME_MAX = 55 + + # I've picked 3 fields as representatives + publications = baker.prepare( + Publication, + pub_title=faker.text(max_nb_chars=PUB_TITLE_MAX + 1000), + pub_type=faker.text(max_nb_chars=PUB_TYPE_MAX + 1000), + volume=faker.text(max_nb_chars=VOLUME_MAX + 1000), + _quantity=5, + ) + + for publication in publications: + assert len(publication.pub_title) > PUB_TITLE_MAX + assert len(publication.pub_type) > PUB_TYPE_MAX + assert len(publication.volume) > VOLUME_MAX + publication.save() + assert len(publication.pub_title) <= PUB_TITLE_MAX + assert len(publication.pub_type) <= PUB_TYPE_MAX + assert len(publication.volume) <= VOLUME_MAX From 62dfd01e270bc63ab3da088a1ce0c7e16a779b1b Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Wed, 8 Nov 2023 08:14:26 +0000 Subject: [PATCH 4/9] The corresponding migration for the Publication.pub_type change --- .../0011_alter_publication_pub_type.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 emgapi/migrations/0011_alter_publication_pub_type.py diff --git a/emgapi/migrations/0011_alter_publication_pub_type.py b/emgapi/migrations/0011_alter_publication_pub_type.py new file mode 100644 index 000000000..632263679 --- /dev/null +++ b/emgapi/migrations/0011_alter_publication_pub_type.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.18 on 2023-11-08 08:13 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('emgapi', '0010_runextraannotation'), + ] + + operations = [ + migrations.AlterField( + model_name='publication', + name='pub_type', + field=models.CharField(blank=True, db_column='PUB_TYPE', max_length=300, null=True), + ), + ] From 8967eda0f9ab807c00bc55bbe4376ea94c3015e4 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Wed, 8 Nov 2023 09:46:39 +0000 Subject: [PATCH 5/9] Forgot to add Faker test dep --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index bbc991dca..b5d27771b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ tests = [ "pytest-cov==2.12.1", "pandas==1.3.2", "responses==0.23.1", + "Faker==19.6.2", ] dev = [ From c3dc316658a32a2d4158a13af9a764aa77d587b6 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Wed, 8 Nov 2023 09:58:21 +0000 Subject: [PATCH 6/9] Rebase dev and fixed the conflicting migration --- ...ication_pub_type.py => 0012_alter_publication_pub_type.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename emgapi/migrations/{0011_alter_publication_pub_type.py => 0012_alter_publication_pub_type.py} (75%) diff --git a/emgapi/migrations/0011_alter_publication_pub_type.py b/emgapi/migrations/0012_alter_publication_pub_type.py similarity index 75% rename from emgapi/migrations/0011_alter_publication_pub_type.py rename to emgapi/migrations/0012_alter_publication_pub_type.py index 632263679..1d61c5fb2 100644 --- a/emgapi/migrations/0011_alter_publication_pub_type.py +++ b/emgapi/migrations/0012_alter_publication_pub_type.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.18 on 2023-11-08 08:13 +# Generated by Django 3.2.18 on 2023-11-08 09:57 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('emgapi', '0010_runextraannotation'), + ('emgapi', '0011_analysisjob_analysis_summary_json'), ] operations = [ From f6701fc8dc36f375cb4323d0567839b0e9a3315d Mon Sep 17 00:00:00 2001 From: Mahfouz Shehu Date: Wed, 8 Nov 2023 10:35:41 +0000 Subject: [PATCH 7/9] Update emgena/serializers.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martín Beracochea --- emgena/serializers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emgena/serializers.py b/emgena/serializers.py index 6fa036d09..2d9d600d2 100644 --- a/emgena/serializers.py +++ b/emgena/serializers.py @@ -103,7 +103,7 @@ def create(self, validated_data): ticket["Cc"] = n.cc if n.is_consent: - return 403 + return 404 else: ticket["Queue"] = emg_queue From d8c1f42822067e6504bc3af52c6a9629bc0802d3 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Wed, 8 Nov 2023 12:51:52 +0000 Subject: [PATCH 8/9] Updated doc string to reflect removal on ena_queue consent ticket creation: --- emgena/serializers.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/emgena/serializers.py b/emgena/serializers.py index 2d9d600d2..b8f0d85f8 100644 --- a/emgena/serializers.py +++ b/emgena/serializers.py @@ -81,11 +81,7 @@ def is_valid(self): return super().is_valid() def create(self, validated_data): - """Create an RT ticket. - If this is a consent approval then the procedure is: - - create ticket in ENA-MG queue - otherwise: - - create ticket in EMG queue + """Create an RT ticket in EMG queue Note: remember to setup valid credentials (i.e. url, user and token token) in the config.rt """ import requests From 9199a565ea2630b66317e1810b2620250c5503e3 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Wed, 8 Nov 2023 14:26:31 +0000 Subject: [PATCH 9/9] Bumped api version --- emgcli/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index d0b83bc77..31dfb7928 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.34" +__version__: str = "2.4.35" diff --git a/pyproject.toml b/pyproject.toml index b5d27771b..569182588 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.34" +current_version = "2.4.35" [[tool.bumpversion.files]] filename = "emgcli/__init__.py"