diff --git a/emgapi/mgx.py b/emgapi/mgx.py new file mode 100644 index 000000000..e69de29bb diff --git a/emgapi/migrations/0012_alter_publication_pub_type.py b/emgapi/migrations/0012_alter_publication_pub_type.py new file mode 100644 index 000000000..1d61c5fb2 --- /dev/null +++ b/emgapi/migrations/0012_alter_publication_pub_type.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.18 on 2023-11-08 09:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('emgapi', '0011_analysisjob_analysis_summary_json'), + ] + + operations = [ + migrations.AlterField( + model_name='publication', + name='pub_type', + field=models.CharField(blank=True, db_column='PUB_TYPE', max_length=300, null=True), + ), + ] diff --git a/emgapi/models.py b/emgapi/models.py index 1faf824ea..1cf7f0321 100644 --- a/emgapi/models.py +++ b/emgapi/models.py @@ -502,7 +502,7 @@ class Publication(models.Model): db_column='PUBLISHED_YEAR', blank=True, null=True, help_text='Published year') pub_type = models.CharField( - db_column='PUB_TYPE', max_length=150, blank=True, null=True) + db_column='PUB_TYPE', max_length=300, blank=True, null=True) objects = PublicationManager() @@ -510,6 +510,17 @@ class Meta: db_table = 'PUBLICATION' ordering = ('pubmed_id',) + def save(self, *args, **kwargs): + for field in self._meta.fields: + if isinstance(field, models.TextField) or isinstance(field, models.CharField): + field_name = field.name + max_length = field.max_length + field_value = getattr(self, field_name) + if field_value and len(field_value) > max_length: + logger.error(f"Publication field {field_name} content was truncated at {max_length}") + setattr(self, field_name, field_value[:max_length]) + super(Publication, self).save(*args, **kwargs) + def __str__(self): return str(self.pubmed_id) diff --git a/emgapianns/management/commands/import_publication.py b/emgapianns/management/commands/import_publication.py index 3bc8d52c0..55d064d39 100644 --- a/emgapianns/management/commands/import_publication.py +++ b/emgapianns/management/commands/import_publication.py @@ -16,7 +16,9 @@ import logging from django.core.management import BaseCommand from emgapi import models as emg_models -from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import EuropePMCApiHandler +from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import ( + EuropePMCApiHandler, +) logger = logging.getLogger(__name__) @@ -29,15 +31,17 @@ def lookup_publication_by_pubmed_id(pubmed_id): def update_or_create_publication(publication): return emg_models.Publication.objects.update_or_create( pubmed_id=publication.pmid, - defaults={'authors': publication.author_string, - 'doi': publication.doi, - 'isbn': publication.journal_issn, - 'iso_journal': publication.journal_title, - 'pub_title': publication.title, - 'raw_pages': publication.page_info, - 'volume': publication.journal_volume, - 'published_year': publication.pub_year, - 'pub_type': publication.pub_type}, + defaults={ + "authors": publication.author_string, + "doi": publication.doi, + "isbn": publication.journal_issn, + "iso_journal": publication.journal_title, + "pub_title": publication.title, + "raw_pages": publication.page_info, + "volume": publication.journal_volume, + "published_year": publication.pub_year, + "pub_type": publication.pub_type, + }, ) @@ -47,19 +51,18 @@ def lookup_publication_by_project_id(project_id): class Command(BaseCommand): - help = 'Creates or updates a publication in EMG.' + help = "Creates or updates a publication in EMG." def add_arguments(self, parser): # TODO: Consider lookup by project id - parser.add_argument('pubmed-id', - help='PubMed identifier (PMID)', - type=int, - action='store') + parser.add_argument( + "pubmed-id", help="PubMed identifier (PMID)", type=int, action="store" + ) def handle(self, *args, **options): logger.info("CLI %r" % options) - pubmed_id = options['pubmed-id'] + pubmed_id = options["pubmed-id"] publications = lookup_publication_by_pubmed_id(pubmed_id) for publication in publications: update_or_create_publication(publication) diff --git a/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py b/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py index 7c0a5b13e..cacfb308b 100644 --- a/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py +++ b/emgapianns/management/lib/europe_pmc_api/europe_pmc_api_handler.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright 2019-2022 EMBL - European Bioinformatics Institute +# Copyright 2019-2023 EMBL - European Bioinformatics Institute # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ def get_default_connection_headers(): } -class Publication(object): +class Publication: def __init__( self, pub_year, diff --git a/pyproject.toml b/pyproject.toml index bbc991dca..b5d27771b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ tests = [ "pytest-cov==2.12.1", "pandas==1.3.2", "responses==0.23.1", + "Faker==19.6.2", ] dev = [ diff --git a/tests/webuploader/test_import_publication.py b/tests/webuploader/test_import_publication.py index 29531aaf8..13bb9ff40 100644 --- a/tests/webuploader/test_import_publication.py +++ b/tests/webuploader/test_import_publication.py @@ -16,28 +16,39 @@ import pytest -from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id - - -@pytest.mark.parametrize("pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", [ - (4838818, - "Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.", - 1974, - "Floyd K, Linden RJ, Saunders DA.", - "n/a"), - (31138692, - "Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse " - "model of infant undernutrition.", - 2019, - "Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D," - " Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.", - "10.1073/pnas.1821770116") -]) -def test_lookup_publication_by_pubmed_id_should_return(pubmed_id, - expected_pub_title, - expected_year_of_pub, - expected_authors, - expected_doi): +from emgapi.models import Publication +from model_bakery import baker + + +from emgapianns.management.commands.import_publication import ( + lookup_publication_by_pubmed_id, +) + + +@pytest.mark.parametrize( + "pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", + [ + ( + 4838818, + "Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.", + 1974, + "Floyd K, Linden RJ, Saunders DA.", + "n/a", + ), + ( + 31138692, + "Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse " + "model of infant undernutrition.", + 2019, + "Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D," + " Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.", + "10.1073/pnas.1821770116", + ), + ], +) +def test_lookup_publication_by_pubmed_id_should_return( + pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi +): publications = lookup_publication_by_pubmed_id(pubmed_id) assert len(publications) == 1 @@ -49,18 +60,38 @@ def test_lookup_publication_by_pubmed_id_should_return(pubmed_id, assert publication.doi == expected_doi -@pytest.mark.parametrize("pubmed_id", [ - (0), - (000) -]) +@pytest.mark.parametrize("pubmed_id", [(0), (000)]) def test_lookup_publication_by_pubmed_id_(pubmed_id): with pytest.raises(ValueError): lookup_publication_by_pubmed_id(pubmed_id) -@pytest.mark.parametrize("pubmed_id", [ - ("test") -]) +@pytest.mark.parametrize("pubmed_id", [("test")]) def test_lookup_publication_by_pubmed_id_raises_exception_on_string(pubmed_id): with pytest.raises(TypeError): lookup_publication_by_pubmed_id(pubmed_id) + + +@pytest.mark.django_db +def test_text_fields_longer_than_expected(faker): + PUB_TITLE_MAX = 740 + PUB_TYPE_MAX = 300 + VOLUME_MAX = 55 + + # I've picked 3 fields as representatives + publications = baker.prepare( + Publication, + pub_title=faker.text(max_nb_chars=PUB_TITLE_MAX + 1000), + pub_type=faker.text(max_nb_chars=PUB_TYPE_MAX + 1000), + volume=faker.text(max_nb_chars=VOLUME_MAX + 1000), + _quantity=5, + ) + + for publication in publications: + assert len(publication.pub_title) > PUB_TITLE_MAX + assert len(publication.pub_type) > PUB_TYPE_MAX + assert len(publication.volume) > VOLUME_MAX + publication.save() + assert len(publication.pub_title) <= PUB_TITLE_MAX + assert len(publication.pub_type) <= PUB_TYPE_MAX + assert len(publication.volume) <= VOLUME_MAX