From aa052b6636fedc261c785780de8cc56a89cde33c Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Tue, 31 Oct 2023 16:00:45 +0000 Subject: [PATCH 01/13] Workaround to query ENA / ERAPRO Views. ERA needs to be appended as the default connection tries to use the PUBLIC SYNONYM (according to ENA) and it's not working ATM we were advised to prefix the views and this is the simplest way. The short-term plan is to remove the dependency of ENA databases --- emgena/models.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/emgena/models.py b/emgena/models.py index 3e34271a7..c3aaba848 100644 --- a/emgena/models.py +++ b/emgena/models.py @@ -194,12 +194,20 @@ class Meta: class RunStudy(StudyAbstract): class Meta(StudyAbstract.Meta): - db_table = 'V_MGP_RUN_STUDY' + # ERA needs to be appended as the default connection tries to use + # the PUBLIC SYNONYM (according to ENA) and it's not working ATM + # we were advised to prefix the views and this is the simplest way. + # The short-term plan is to remove the dependency of ENA databases + db_table = 'ERA\".\"V_MGP_RUN_STUDY' class AssemblyStudy(StudyAbstract): class Meta(StudyAbstract.Meta): - db_table = 'V_MGP_ASSEMBLY_STUDY' + # ERA needs to be appended as the default connection tries to use + # the PUBLIC SYNONYM (according to ENA) and it's not working ATM + # we were advised to prefix the views and this is the simplest way. + # The short-term plan is to remove the dependency of ENA databases + db_table = 'ERA\".\"V_MGP_ASSEMBLY_STUDY' class Project(models.Model): From c6981ea4a5900a304b1ad43ad3fd213a5728d8b0 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Wed, 1 Nov 2023 10:41:28 +0000 Subject: [PATCH 02/13] Bump version - 2.3.34 --- emgcli/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index 4566b742b..d0b83bc77 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.33" +__version__: str = "2.4.34" diff --git a/pyproject.toml b/pyproject.toml index e7084a99f..bbc991dca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,7 +116,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.33" +current_version = "2.4.34" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" From b08217dc8347acb9f997dea251e211f260ec687c Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Mon, 6 Nov 2023 09:02:50 +0000 Subject: [PATCH 03/13] put a fix to prevent consent rt queues from being created --- emgena/serializers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/emgena/serializers.py b/emgena/serializers.py index 43d87eafc..6fa036d09 100644 --- a/emgena/serializers.py +++ b/emgena/serializers.py @@ -92,7 +92,6 @@ def create(self, validated_data): n = ena_models.Notify(**validated_data) emg_queue = settings.RT["emg_queue"] - ena_queue = settings.RT["ena_queue"] ticket = { "Requestor": n.from_email, @@ -104,7 +103,7 @@ def create(self, validated_data): ticket["Cc"] = n.cc if n.is_consent: - ticket["Queue"] = ena_queue + return 403 else: ticket["Queue"] = emg_queue From 0883da0b5901d762e955d8b7a8280276ef9320be Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Mon, 6 Nov 2023 09:46:03 +0000 Subject: [PATCH 04/13] Got rid of consent notification test as it is no longer required --- tests/webuploader/test_notify.py | 36 -------------------------------- 1 file changed, 36 deletions(-) diff --git a/tests/webuploader/test_notify.py b/tests/webuploader/test_notify.py index 762f89816..7e6384427 100644 --- a/tests/webuploader/test_notify.py +++ b/tests/webuploader/test_notify.py @@ -70,42 +70,6 @@ def test_notify_not_consent(self): HTTP_AUTHORIZATION='Bearer {}'.format(self.get_token()) ) - assert len(responses.calls) == 1 - call = responses.calls[0] - assert call.request.url == settings.RT["url"] - assert call.request.body == json.dumps(expected_body) - - @responses.activate - @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") - def test_notify_consent(self): - """Test notify endpoint for consent requests - """ - expected_body = { - "Requestor": "fake@email.com", - "Priority": "4", - "Subject": "Test email subject", - "Content": "Hi this is just an example", - "Queue": settings.RT["ena_queue"], - } - - responses.add( - responses.POST, - settings.RT["url"], - status=200) - - post_data = { - "from_email": "fake@email.com", - "subject": "Test email subject", - "message": "Hi this is just an example", - "is_consent": True - } - - self.client.post( - reverse('emgapi_v1:csrf-notify'), - data=post_data, format='json', - HTTP_AUTHORIZATION='Bearer {}'.format(self.get_token()) - ) - assert len(responses.calls) == 1 call = responses.calls[0] assert call.request.url == settings.RT["url"] From 8d9a9497386ce39cf5136b3765d05a5d854e16b4 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Fri, 29 Sep 2023 13:43:03 +0100 Subject: [PATCH 05/13] Truncate the fields in the Publication model if longer than expected. I've increased the max len of the pub_type in the Publication model. To prevent the import to fail in the future I've implemented a brute truncation mechanism when a Publication is saved. --- emgapi/mgx.py | 0 emgapi/models.py | 13 ++- .../management/commands/import_publication.py | 35 ++++---- .../europe_pmc_api/europe_pmc_api_handler.py | 4 +- tests/webuploader/test_import_publication.py | 89 +++++++++++++------ 5 files changed, 93 insertions(+), 48 deletions(-) create mode 100644 emgapi/mgx.py diff --git a/emgapi/mgx.py b/emgapi/mgx.py new file mode 100644 index 000000000..e69de29bb diff --git a/emgapi/models.py b/emgapi/models.py index 1faf824ea..1cf7f0321 100644 --- a/emgapi/models.py +++ b/emgapi/models.py @@ -502,7 +502,7 @@ class Publication(models.Model): db_column='PUBLISHED_YEAR', blank=True, null=True, help_text='Published year') pub_type = models.CharField( - db_column='PUB_TYPE', max_length=150, blank=True, null=True) + db_column='PUB_TYPE', max_length=300, blank=True, null=True) objects = PublicationManager() @@ -510,6 +510,17 @@ class Meta: db_table = 'PUBLICATION' ordering = ('pubmed_id',) + def save(self, *args, **kwargs): + for field in self._meta.fields: + if isinstance(field, models.TextField) or isinstance(field, models.CharField): + field_name = field.name + max_length = field.max_length + field_value = getattr(self, field_name) + if field_value and len(field_value) > max_length: + logger.error(f"Publication field {field_name} content was truncated at {max_length}") + setattr(self, field_name, field_value[:max_length]) + super(Publication, self).save(*args, **kwargs) + def __str__(self): return str(self.pubmed_id) diff --git a/emgapianns/management/commands/import_publication.py b/emgapianns/management/commands/import_publication.py index 3bc8d52c0..55d064d39 100644 --- a/emgapianns/management/commands/import_publication.py +++ b/emgapianns/management/commands/import_publication.py @@ -16,7 +16,9 @@ import logging from django.core.management import BaseCommand from emgapi import models as emg_models -from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import EuropePMCApiHandler +from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import ( + EuropePMCApiHandler, +) logger = logging.getLogger(__name__) @@ -29,15 +31,17 @@ def lookup_publication_by_pubmed_id(pubmed_id): def update_or_create_publication(publication): return emg_models.Publication.objects.update_or_create( pubmed_id=publication.pmid, - defaults={'authors': publication.author_string, - 'doi': publication.doi, - 'isbn': publication.journal_issn, - 'iso_journal': publication.journal_title, - 'pub_title': publication.title, - 'raw_pages': publication.page_info, - 'volume': publication.journal_volume, - 'published_year': publication.pub_year, - 'pub_type': publication.pub_type}, + defaults={ + "authors": publication.author_string, + "doi": publication.doi, + "isbn": publication.journal_issn, + "iso_journal": publication.journal_title, + "pub_title": publication.title, + "raw_pages": publication.page_info, + "volume": publication.journal_volume, + "published_year": publication.pub_year, + "pub_type": publication.pub_type, + }, ) @@ -47,19 +51,18 @@ def lookup_publication_by_project_id(project_id): class Command(BaseCommand): - help = 'Creates or updates a publication in EMG.' + help = "Creates or updates a publication in EMG." def add_arguments(self, parser): # TODO: Consider lookup by project id - parser.add_argument('pubmed-id', - help='PubMed identifier (PMID)', - type=int, - action='store') + parser.add_argument( + "pubmed-id", help="PubMed identifier (PMID)", type=int, action="store" + ) def handle(self, *args, **options): logger.info("CLI %r" % options) - pubmed_id = options['pubmed-id'] + pubmed_id = options["pubmed-id"] publications = lookup_publication_by_pubmed_id(pubmed_id) for publication in publications: update_or_create_publication(publication) diff --git a/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py b/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py index 7c0a5b13e..cacfb308b 100644 --- a/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py +++ b/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright 2019-2022 EMBL - European Bioinformatics Institute +# Copyright 2019-2023 EMBL - European Bioinformatics Institute # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ def get_default_connection_headers(): } -class Publication(object): +class Publication: def __init__( self, pub_year, diff --git a/tests/webuploader/test_import_publication.py b/tests/webuploader/test_import_publication.py index 29531aaf8..13bb9ff40 100644 --- a/tests/webuploader/test_import_publication.py +++ b/tests/webuploader/test_import_publication.py @@ -16,28 +16,39 @@ import pytest -from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id - - -@pytest.mark.parametrize("pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", [ - (4838818, - "Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.", - 1974, - "Floyd K, Linden RJ, Saunders DA.", - "n/a"), - (31138692, - "Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse " - "model of infant undernutrition.", - 2019, - "Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D," - " Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.", - "10.1073/pnas.1821770116") -]) -def test_lookup_publication_by_pubmed_id_should_return(pubmed_id, - expected_pub_title, - expected_year_of_pub, - expected_authors, - expected_doi): +from emgapi.models import Publication +from model_bakery import baker + + +from emgapianns.management.commands.import_publication import ( + lookup_publication_by_pubmed_id, +) + + +@pytest.mark.parametrize( + "pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", + [ + ( + 4838818, + "Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.", + 1974, + "Floyd K, Linden RJ, Saunders DA.", + "n/a", + ), + ( + 31138692, + "Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse " + "model of infant undernutrition.", + 2019, + "Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D," + " Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.", + "10.1073/pnas.1821770116", + ), + ], +) +def test_lookup_publication_by_pubmed_id_should_return( + pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi +): publications = lookup_publication_by_pubmed_id(pubmed_id) assert len(publications) == 1 @@ -49,18 +60,38 @@ def test_lookup_publication_by_pubmed_id_should_return(pubmed_id, assert publication.doi == expected_doi -@pytest.mark.parametrize("pubmed_id", [ - (0), - (000) -]) +@pytest.mark.parametrize("pubmed_id", [(0), (000)]) def test_lookup_publication_by_pubmed_id_(pubmed_id): with pytest.raises(ValueError): lookup_publication_by_pubmed_id(pubmed_id) -@pytest.mark.parametrize("pubmed_id", [ - ("test") -]) +@pytest.mark.parametrize("pubmed_id", [("test")]) def test_lookup_publication_by_pubmed_id_raises_exception_on_string(pubmed_id): with pytest.raises(TypeError): lookup_publication_by_pubmed_id(pubmed_id) + + +@pytest.mark.django_db +def test_text_fields_longer_than_expected(faker): + PUB_TITLE_MAX = 740 + PUB_TYPE_MAX = 300 + VOLUME_MAX = 55 + + # I've picked 3 fields as representatives + publications = baker.prepare( + Publication, + pub_title=faker.text(max_nb_chars=PUB_TITLE_MAX + 1000), + pub_type=faker.text(max_nb_chars=PUB_TYPE_MAX + 1000), + volume=faker.text(max_nb_chars=VOLUME_MAX + 1000), + _quantity=5, + ) + + for publication in publications: + assert len(publication.pub_title) > PUB_TITLE_MAX + assert len(publication.pub_type) > PUB_TYPE_MAX + assert len(publication.volume) > VOLUME_MAX + publication.save() + assert len(publication.pub_title) <= PUB_TITLE_MAX + assert len(publication.pub_type) <= PUB_TYPE_MAX + assert len(publication.volume) <= VOLUME_MAX From 62dfd01e270bc63ab3da088a1ce0c7e16a779b1b Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Wed, 8 Nov 2023 08:14:26 +0000 Subject: [PATCH 06/13] The corresponding migration for the Publication.pub_type change --- .../0011_alter_publication_pub_type.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 emgapi/migrations/0011_alter_publication_pub_type.py diff --git a/emgapi/migrations/0011_alter_publication_pub_type.py b/emgapi/migrations/0011_alter_publication_pub_type.py new file mode 100644 index 000000000..632263679 --- /dev/null +++ b/emgapi/migrations/0011_alter_publication_pub_type.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.18 on 2023-11-08 08:13 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('emgapi', '0010_runextraannotation'), + ] + + operations = [ + migrations.AlterField( + model_name='publication', + name='pub_type', + field=models.CharField(blank=True, db_column='PUB_TYPE', max_length=300, null=True), + ), + ] From 8967eda0f9ab807c00bc55bbe4376ea94c3015e4 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Wed, 8 Nov 2023 09:46:39 +0000 Subject: [PATCH 07/13] Forgot to add Faker test dep --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index bbc991dca..b5d27771b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ tests = [ "pytest-cov==2.12.1", "pandas==1.3.2", "responses==0.23.1", + "Faker==19.6.2", ] dev = [ From c3dc316658a32a2d4158a13af9a764aa77d587b6 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Wed, 8 Nov 2023 09:58:21 +0000 Subject: [PATCH 08/13] Rebase dev and fixed the conflicting migration --- ...ication_pub_type.py => 0012_alter_publication_pub_type.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename emgapi/migrations/{0011_alter_publication_pub_type.py => 0012_alter_publication_pub_type.py} (75%) diff --git a/emgapi/migrations/0011_alter_publication_pub_type.py b/emgapi/migrations/0012_alter_publication_pub_type.py similarity index 75% rename from emgapi/migrations/0011_alter_publication_pub_type.py rename to emgapi/migrations/0012_alter_publication_pub_type.py index 632263679..1d61c5fb2 100644 --- a/emgapi/migrations/0011_alter_publication_pub_type.py +++ b/emgapi/migrations/0012_alter_publication_pub_type.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.18 on 2023-11-08 08:13 +# Generated by Django 3.2.18 on 2023-11-08 09:57 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('emgapi', '0010_runextraannotation'), + ('emgapi', '0011_analysisjob_analysis_summary_json'), ] operations = [ From f6701fc8dc36f375cb4323d0567839b0e9a3315d Mon Sep 17 00:00:00 2001 From: Mahfouz Shehu Date: Wed, 8 Nov 2023 10:35:41 +0000 Subject: [PATCH 09/13] Update emgena/serializers.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martín Beracochea --- emgena/serializers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emgena/serializers.py b/emgena/serializers.py index 6fa036d09..2d9d600d2 100644 --- a/emgena/serializers.py +++ b/emgena/serializers.py @@ -103,7 +103,7 @@ def create(self, validated_data): ticket["Cc"] = n.cc if n.is_consent: - return 403 + return 404 else: ticket["Queue"] = emg_queue From d8c1f42822067e6504bc3af52c6a9629bc0802d3 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Wed, 8 Nov 2023 12:51:52 +0000 Subject: [PATCH 10/13] Updated doc string to reflect removal on ena_queue consent ticket creation: --- emgena/serializers.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/emgena/serializers.py b/emgena/serializers.py index 2d9d600d2..b8f0d85f8 100644 --- a/emgena/serializers.py +++ b/emgena/serializers.py @@ -81,11 +81,7 @@ def is_valid(self): return super().is_valid() def create(self, validated_data): - """Create an RT ticket. - If this is a consent approval then the procedure is: - - create ticket in ENA-MG queue - otherwise: - - create ticket in EMG queue + """Create an RT ticket in EMG queue Note: remember to setup valid credentials (i.e. url, user and token token) in the config.rt """ import requests From 9199a565ea2630b66317e1810b2620250c5503e3 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Wed, 8 Nov 2023 14:26:31 +0000 Subject: [PATCH 11/13] Bumped api version --- emgcli/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index d0b83bc77..31dfb7928 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.34" +__version__: str = "2.4.35" diff --git a/pyproject.toml b/pyproject.toml index b5d27771b..569182588 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.34" +current_version = "2.4.35" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" From c4da25e9966527280fa61dc5c64c96451487ca1d Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Mon, 11 Dec 2023 16:28:12 +0000 Subject: [PATCH 12/13] Filtering out verify token bad request from error logs --- emgcli/settings.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/emgcli/settings.py b/emgcli/settings.py index d31947da0..e7e72017e 100644 --- a/emgcli/settings.py +++ b/emgcli/settings.py @@ -87,6 +87,10 @@ '()': 'django.utils.log.CallbackFilter', 'callback': lambda record: "v1/utils/myaccounts" not in record.getMessage(), }, + 'exclude_token_verification': { + '()': 'django.utils.log.CallbackFilter', + 'callback': lambda record: "v1/utils/token/verify" not in record.getMessage(), + }, }, 'formatters': { 'default': { @@ -136,13 +140,13 @@ 'handlers': ['default'], 'level': 'INFO', 'propagate': False, - 'filters': ['exclude_myaccounts'], + 'filters': ['exclude_myaccounts', 'exclude_token_verification'], }, 'django.server': { 'handlers': ['default'], 'level': 'INFO', 'propagate': False, - 'filters': ['exclude_myaccounts'], + 'filters': ['exclude_myaccounts', 'exclude_token_verification'], }, 'django': { 'handlers': ['null'], From 481052f355b1839158f72e2893a1ced9c9c49a71 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Tue, 12 Dec 2023 10:08:20 +0000 Subject: [PATCH 13/13] Bumped API version --- emgcli/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index 31dfb7928..d550e808a 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.35" +__version__: str = "2.4.36" diff --git a/pyproject.toml b/pyproject.toml index 569182588..2063ce372 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.35" +current_version = "2.4.36" [[tool.bumpversion.files]] filename = "emgcli/__init__.py"