Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix Publication pub type max length #336

Merged
merged 4 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added emgapi/mgx.py
Empty file.
18 changes: 18 additions & 0 deletions emgapi/migrations/0012_alter_publication_pub_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.18 on 2023-11-08 09:57

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('emgapi', '0011_analysisjob_analysis_summary_json'),
]

operations = [
migrations.AlterField(
model_name='publication',
name='pub_type',
field=models.CharField(blank=True, db_column='PUB_TYPE', max_length=300, null=True),
),
]
13 changes: 12 additions & 1 deletion emgapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,14 +502,25 @@ class Publication(models.Model):
db_column='PUBLISHED_YEAR', blank=True, null=True,
help_text='Published year')
pub_type = models.CharField(
db_column='PUB_TYPE', max_length=150, blank=True, null=True)
db_column='PUB_TYPE', max_length=300, blank=True, null=True)

objects = PublicationManager()

class Meta:
db_table = 'PUBLICATION'
ordering = ('pubmed_id',)

def save(self, *args, **kwargs):
for field in self._meta.fields:
if isinstance(field, models.TextField) or isinstance(field, models.CharField):
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
field_name = field.name
max_length = field.max_length
field_value = getattr(self, field_name)
if field_value and len(field_value) > max_length:
logger.error(f"Publication field {field_name} content was truncated at {max_length}")
setattr(self, field_name, field_value[:max_length])
super(Publication, self).save(*args, **kwargs)

def __str__(self):
return str(self.pubmed_id)

Expand Down
35 changes: 19 additions & 16 deletions emgapianns/management/commands/import_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import logging
from django.core.management import BaseCommand
from emgapi import models as emg_models
from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import EuropePMCApiHandler
from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import (
EuropePMCApiHandler,
)

logger = logging.getLogger(__name__)

Expand All @@ -29,15 +31,17 @@ def lookup_publication_by_pubmed_id(pubmed_id):
def update_or_create_publication(publication):
return emg_models.Publication.objects.update_or_create(
pubmed_id=publication.pmid,
defaults={'authors': publication.author_string,
'doi': publication.doi,
'isbn': publication.journal_issn,
'iso_journal': publication.journal_title,
'pub_title': publication.title,
'raw_pages': publication.page_info,
'volume': publication.journal_volume,
'published_year': publication.pub_year,
'pub_type': publication.pub_type},
defaults={
"authors": publication.author_string,
"doi": publication.doi,
"isbn": publication.journal_issn,
"iso_journal": publication.journal_title,
"pub_title": publication.title,
"raw_pages": publication.page_info,
"volume": publication.journal_volume,
"published_year": publication.pub_year,
"pub_type": publication.pub_type,
},
)


Expand All @@ -47,19 +51,18 @@ def lookup_publication_by_project_id(project_id):


class Command(BaseCommand):
help = 'Creates or updates a publication in EMG.'
help = "Creates or updates a publication in EMG."

def add_arguments(self, parser):
# TODO: Consider lookup by project id
parser.add_argument('pubmed-id',
help='PubMed identifier (PMID)',
type=int,
action='store')
parser.add_argument(
"pubmed-id", help="PubMed identifier (PMID)", type=int, action="store"
)

def handle(self, *args, **options):
logger.info("CLI %r" % options)

pubmed_id = options['pubmed-id']
pubmed_id = options["pubmed-id"]
publications = lookup_publication_by_pubmed_id(pubmed_id)
for publication in publications:
update_or_create_publication(publication)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2019-2022 EMBL - European Bioinformatics Institute
# Copyright 2019-2023 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ def get_default_connection_headers():
}


class Publication(object):
class Publication:
def __init__(
self,
pub_year,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ tests = [
"pytest-cov==2.12.1",
"pandas==1.3.2",
"responses==0.23.1",
"Faker==19.6.2",
]

dev = [
Expand Down
89 changes: 60 additions & 29 deletions tests/webuploader/test_import_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,39 @@

import pytest

from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id


@pytest.mark.parametrize("pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", [
(4838818,
"Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.",
1974,
"Floyd K, Linden RJ, Saunders DA.",
"n/a"),
(31138692,
"Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse "
"model of infant undernutrition.",
2019,
"Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D,"
" Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.",
"10.1073/pnas.1821770116")
])
def test_lookup_publication_by_pubmed_id_should_return(pubmed_id,
expected_pub_title,
expected_year_of_pub,
expected_authors,
expected_doi):
from emgapi.models import Publication
from model_bakery import baker


from emgapianns.management.commands.import_publication import (
lookup_publication_by_pubmed_id,
)


@pytest.mark.parametrize(
"pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi",
[
(
4838818,
"Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.",
1974,
"Floyd K, Linden RJ, Saunders DA.",
"n/a",
),
(
31138692,
"Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse "
"model of infant undernutrition.",
2019,
"Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D,"
" Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.",
"10.1073/pnas.1821770116",
),
],
)
def test_lookup_publication_by_pubmed_id_should_return(
pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi
):
publications = lookup_publication_by_pubmed_id(pubmed_id)
assert len(publications) == 1

Expand All @@ -49,18 +60,38 @@ def test_lookup_publication_by_pubmed_id_should_return(pubmed_id,
assert publication.doi == expected_doi


@pytest.mark.parametrize("pubmed_id", [
(0),
(000)
])
@pytest.mark.parametrize("pubmed_id", [(0), (000)])
def test_lookup_publication_by_pubmed_id_(pubmed_id):
with pytest.raises(ValueError):
lookup_publication_by_pubmed_id(pubmed_id)


@pytest.mark.parametrize("pubmed_id", [
("test")
])
@pytest.mark.parametrize("pubmed_id", [("test")])
def test_lookup_publication_by_pubmed_id_raises_exception_on_string(pubmed_id):
with pytest.raises(TypeError):
lookup_publication_by_pubmed_id(pubmed_id)


@pytest.mark.django_db
def test_text_fields_longer_than_expected(faker):
PUB_TITLE_MAX = 740
PUB_TYPE_MAX = 300
VOLUME_MAX = 55

# I've picked 3 fields as representatives
publications = baker.prepare(
Publication,
pub_title=faker.text(max_nb_chars=PUB_TITLE_MAX + 1000),
pub_type=faker.text(max_nb_chars=PUB_TYPE_MAX + 1000),
volume=faker.text(max_nb_chars=VOLUME_MAX + 1000),
_quantity=5,
)

for publication in publications:
assert len(publication.pub_title) > PUB_TITLE_MAX
assert len(publication.pub_type) > PUB_TYPE_MAX
assert len(publication.volume) > VOLUME_MAX
publication.save()
assert len(publication.pub_title) <= PUB_TITLE_MAX
assert len(publication.pub_type) <= PUB_TYPE_MAX
assert len(publication.volume) <= VOLUME_MAX