Skip to content

Commit

Permalink
Merge pull request #336 from EBI-Metagenomics/bugfix/europepmc-api-fi…
Browse files Browse the repository at this point in the history
…elds-lenlimit

Bugfix Publication pub type max length
  • Loading branch information
mberacochea authored Nov 8, 2023
2 parents c4a2e36 + c3dc316 commit 103568d
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 48 deletions.
Empty file added emgapi/mgx.py
Empty file.
18 changes: 18 additions & 0 deletions emgapi/migrations/0012_alter_publication_pub_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.18 on 2023-11-08 09:57

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('emgapi', '0011_analysisjob_analysis_summary_json'),
]

operations = [
migrations.AlterField(
model_name='publication',
name='pub_type',
field=models.CharField(blank=True, db_column='PUB_TYPE', max_length=300, null=True),
),
]
13 changes: 12 additions & 1 deletion emgapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,14 +502,25 @@ class Publication(models.Model):
db_column='PUBLISHED_YEAR', blank=True, null=True,
help_text='Published year')
pub_type = models.CharField(
db_column='PUB_TYPE', max_length=150, blank=True, null=True)
db_column='PUB_TYPE', max_length=300, blank=True, null=True)

objects = PublicationManager()

class Meta:
db_table = 'PUBLICATION'
ordering = ('pubmed_id',)

def save(self, *args, **kwargs):
for field in self._meta.fields:
if isinstance(field, models.TextField) or isinstance(field, models.CharField):
field_name = field.name
max_length = field.max_length
field_value = getattr(self, field_name)
if field_value and len(field_value) > max_length:
logger.error(f"Publication field {field_name} content was truncated at {max_length}")
setattr(self, field_name, field_value[:max_length])
super(Publication, self).save(*args, **kwargs)

def __str__(self):
return str(self.pubmed_id)

Expand Down
35 changes: 19 additions & 16 deletions emgapianns/management/commands/import_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import logging
from django.core.management import BaseCommand
from emgapi import models as emg_models
from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import EuropePMCApiHandler
from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import (
EuropePMCApiHandler,
)

logger = logging.getLogger(__name__)

Expand All @@ -29,15 +31,17 @@ def lookup_publication_by_pubmed_id(pubmed_id):
def update_or_create_publication(publication):
return emg_models.Publication.objects.update_or_create(
pubmed_id=publication.pmid,
defaults={'authors': publication.author_string,
'doi': publication.doi,
'isbn': publication.journal_issn,
'iso_journal': publication.journal_title,
'pub_title': publication.title,
'raw_pages': publication.page_info,
'volume': publication.journal_volume,
'published_year': publication.pub_year,
'pub_type': publication.pub_type},
defaults={
"authors": publication.author_string,
"doi": publication.doi,
"isbn": publication.journal_issn,
"iso_journal": publication.journal_title,
"pub_title": publication.title,
"raw_pages": publication.page_info,
"volume": publication.journal_volume,
"published_year": publication.pub_year,
"pub_type": publication.pub_type,
},
)


Expand All @@ -47,19 +51,18 @@ def lookup_publication_by_project_id(project_id):


class Command(BaseCommand):
help = 'Creates or updates a publication in EMG.'
help = "Creates or updates a publication in EMG."

def add_arguments(self, parser):
# TODO: Consider lookup by project id
parser.add_argument('pubmed-id',
help='PubMed identifier (PMID)',
type=int,
action='store')
parser.add_argument(
"pubmed-id", help="PubMed identifier (PMID)", type=int, action="store"
)

def handle(self, *args, **options):
logger.info("CLI %r" % options)

pubmed_id = options['pubmed-id']
pubmed_id = options["pubmed-id"]
publications = lookup_publication_by_pubmed_id(pubmed_id)
for publication in publications:
update_or_create_publication(publication)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2019-2022 EMBL - European Bioinformatics Institute
# Copyright 2019-2023 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ def get_default_connection_headers():
}


class Publication(object):
class Publication:
def __init__(
self,
pub_year,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ tests = [
"pytest-cov==2.12.1",
"pandas==1.3.2",
"responses==0.23.1",
"Faker==19.6.2",
]

dev = [
Expand Down
89 changes: 60 additions & 29 deletions tests/webuploader/test_import_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,39 @@

import pytest

from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id


@pytest.mark.parametrize("pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", [
(4838818,
"Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.",
1974,
"Floyd K, Linden RJ, Saunders DA.",
"n/a"),
(31138692,
"Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse "
"model of infant undernutrition.",
2019,
"Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D,"
" Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.",
"10.1073/pnas.1821770116")
])
def test_lookup_publication_by_pubmed_id_should_return(pubmed_id,
expected_pub_title,
expected_year_of_pub,
expected_authors,
expected_doi):
from emgapi.models import Publication
from model_bakery import baker


from emgapianns.management.commands.import_publication import (
lookup_publication_by_pubmed_id,
)


@pytest.mark.parametrize(
"pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi",
[
(
4838818,
"Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.",
1974,
"Floyd K, Linden RJ, Saunders DA.",
"n/a",
),
(
31138692,
"Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse "
"model of infant undernutrition.",
2019,
"Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D,"
" Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.",
"10.1073/pnas.1821770116",
),
],
)
def test_lookup_publication_by_pubmed_id_should_return(
pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi
):
publications = lookup_publication_by_pubmed_id(pubmed_id)
assert len(publications) == 1

Expand All @@ -49,18 +60,38 @@ def test_lookup_publication_by_pubmed_id_should_return(pubmed_id,
assert publication.doi == expected_doi


@pytest.mark.parametrize("pubmed_id", [
(0),
(000)
])
@pytest.mark.parametrize("pubmed_id", [(0), (000)])
def test_lookup_publication_by_pubmed_id_(pubmed_id):
with pytest.raises(ValueError):
lookup_publication_by_pubmed_id(pubmed_id)


@pytest.mark.parametrize("pubmed_id", [
("test")
])
@pytest.mark.parametrize("pubmed_id", [("test")])
def test_lookup_publication_by_pubmed_id_raises_exception_on_string(pubmed_id):
with pytest.raises(TypeError):
lookup_publication_by_pubmed_id(pubmed_id)


@pytest.mark.django_db
def test_text_fields_longer_than_expected(faker):
PUB_TITLE_MAX = 740
PUB_TYPE_MAX = 300
VOLUME_MAX = 55

# I've picked 3 fields as representatives
publications = baker.prepare(
Publication,
pub_title=faker.text(max_nb_chars=PUB_TITLE_MAX + 1000),
pub_type=faker.text(max_nb_chars=PUB_TYPE_MAX + 1000),
volume=faker.text(max_nb_chars=VOLUME_MAX + 1000),
_quantity=5,
)

for publication in publications:
assert len(publication.pub_title) > PUB_TITLE_MAX
assert len(publication.pub_type) > PUB_TYPE_MAX
assert len(publication.volume) > VOLUME_MAX
publication.save()
assert len(publication.pub_title) <= PUB_TITLE_MAX
assert len(publication.pub_type) <= PUB_TYPE_MAX
assert len(publication.volume) <= VOLUME_MAX

0 comments on commit 103568d

Please sign in to comment.