Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
mberacochea committed Sep 29, 2023
1 parent 61af088 commit 73894c1
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 54 deletions.
Empty file added emgapi/mgx.py
Empty file.
14 changes: 13 additions & 1 deletion emgapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,14 +502,26 @@ class Publication(models.Model):
db_column='PUBLISHED_YEAR', blank=True, null=True,
help_text='Published year')
pub_type = models.CharField(
db_column='PUB_TYPE', max_length=150, blank=True, null=True)
db_column='PUB_TYPE', max_length=300, blank=True, null=True)

objects = PublicationManager()

class Meta:
db_table = 'PUBLICATION'
ordering = ('pubmed_id',)

def save(self, *args, **kwargs):
for field in self._meta.fields:
if isinstance(field, models.TextField) or isinstance(field, models.CharField):
field_name = field.name
max_length = field.max_length
field_value = getattr(self, field_name)
if field_value and len(field_value) > max_length:
logger.info(f"Publication field {field_name} content was truncated at {max_length}")
setattr(self, field_name, field_value[:max_length])
super().save(*args, **kwargs)
return self

def __str__(self):
return str(self.pubmed_id)

Expand Down
35 changes: 19 additions & 16 deletions emgapianns/management/commands/import_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import logging
from django.core.management import BaseCommand
from emgapi import models as emg_models
from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import EuropePMCApiHandler
from emgapianns.management.lib.europe_pmc_api.europe_pmc_api_handler import (
EuropePMCApiHandler,
)

logger = logging.getLogger(__name__)

Expand All @@ -29,15 +31,17 @@ def lookup_publication_by_pubmed_id(pubmed_id):
def update_or_create_publication(publication):
return emg_models.Publication.objects.update_or_create(
pubmed_id=publication.pmid,
defaults={'authors': publication.author_string,
'doi': publication.doi,
'isbn': publication.journal_issn,
'iso_journal': publication.journal_title,
'pub_title': publication.title,
'raw_pages': publication.page_info,
'volume': publication.journal_volume,
'published_year': publication.pub_year,
'pub_type': publication.pub_type},
defaults={
"authors": publication.author_string,
"doi": publication.doi,
"isbn": publication.journal_issn,
"iso_journal": publication.journal_title,
"pub_title": publication.title,
"raw_pages": publication.page_info,
"volume": publication.journal_volume,
"published_year": publication.pub_year,
"pub_type": publication.pub_type,
},
)


Expand All @@ -47,19 +51,18 @@ def lookup_publication_by_project_id(project_id):


class Command(BaseCommand):
help = 'Creates or updates a publication in EMG.'
help = "Creates or updates a publication in EMG."

def add_arguments(self, parser):
# TODO: Consider lookup by project id
parser.add_argument('pubmed-id',
help='PubMed identifier (PMID)',
type=int,
action='store')
parser.add_argument(
"pubmed-id", help="PubMed identifier (PMID)", type=int, action="store"
)

def handle(self, *args, **options):
logger.info("CLI %r" % options)

pubmed_id = options['pubmed-id']
pubmed_id = options["pubmed-id"]
publications = lookup_publication_by_pubmed_id(pubmed_id)
for publication in publications:
update_or_create_publication(publication)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2019-2022 EMBL - European Bioinformatics Institute
# Copyright 2019-2023 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ def get_default_connection_headers():
}


class Publication(object):
class Publication:
def __init__(
self,
pub_year,
Expand Down
12 changes: 6 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def hide_ena_config():
def django_db_setup(hide_ena_config, django_db_setup):
if hide_ena_config:
settings.DATABASES.update(hide_ena_config)
mongo_db = settings.MONGO_CONF["db"]
if "test" not in mongo_db:
raise ValueError(f"The mongo DB name is {mongo_db}... it should have the word 'test' somewhere.")
mongo_connection = connect(**settings.MONGO_CONF)
yield
mongo_connection.drop_database(mongo_db)
# mongo_db = settings.MONGO_CONF["db"]
# if "test" not in mongo_db:
# raise ValueError(f"The mongo DB name is {mongo_db}... it should have the word 'test' somewhere.")
# mongo_connection = connect(**settings.MONGO_CONF)
# yield
# mongo_connection.drop_database(mongo_db)
89 changes: 60 additions & 29 deletions tests/webuploader/test_import_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,39 @@

import pytest

from emgapianns.management.commands.import_publication import lookup_publication_by_pubmed_id


@pytest.mark.parametrize("pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi", [
(4838818,
"Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.",
1974,
"Floyd K, Linden RJ, Saunders DA.",
"n/a"),
(31138692,
"Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse "
"model of infant undernutrition.",
2019,
"Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D,"
" Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.",
"10.1073/pnas.1821770116")
])
def test_lookup_publication_by_pubmed_id_should_return(pubmed_id,
expected_pub_title,
expected_year_of_pub,
expected_authors,
expected_doi):
from emgapi.models import Publication
from model_bakery import baker


from emgapianns.management.commands.import_publication import (
lookup_publication_by_pubmed_id,
)


@pytest.mark.parametrize(
"pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi",
[
(
4838818,
"Proceedings: The morphological variation of nervous structures in the atrial endocardium of the dog.",
1974,
"Floyd K, Linden RJ, Saunders DA.",
"n/a",
),
(
31138692,
"Mechanisms by which sialylated milk oligosaccharides impact bone biology in a gnotobiotic mouse "
"model of infant undernutrition.",
2019,
"Cowardin CA, Ahern PP, Kung VL, Hibberd MC, Cheng J, Guruge JL, Sundaresan V, Head RD, Barile D,"
" Mills DA, Barratt MJ, Huq S, Ahmed T, Gordon JI.",
"10.1073/pnas.1821770116",
),
],
)
def test_lookup_publication_by_pubmed_id_should_return(
pubmed_id, expected_pub_title, expected_year_of_pub, expected_authors, expected_doi
):
publications = lookup_publication_by_pubmed_id(pubmed_id)
assert len(publications) == 1

Expand All @@ -49,18 +60,38 @@ def test_lookup_publication_by_pubmed_id_should_return(pubmed_id,
assert publication.doi == expected_doi


@pytest.mark.parametrize("pubmed_id", [
(0),
(000)
])
@pytest.mark.parametrize("pubmed_id", [(0), (000)])
def test_lookup_publication_by_pubmed_id_(pubmed_id):
with pytest.raises(ValueError):
lookup_publication_by_pubmed_id(pubmed_id)


@pytest.mark.parametrize("pubmed_id", [
("test")
])
@pytest.mark.parametrize("pubmed_id", [("test")])
def test_lookup_publication_by_pubmed_id_raises_exception_on_string(pubmed_id):
with pytest.raises(TypeError):
lookup_publication_by_pubmed_id(pubmed_id)


@pytest.mark.django_db
def test_text_fields_longer_than_expected(faker):
PUB_TITLE_MAX = 740
PUB_TYPE_MAX = 300
VOLUME_MAX = 50

# I've picked 3 fields as representatives
publications = baker.prepare(
Publication,
pub_title=faker.text(max_nb_chars=740 + 1000),
pub_type=faker.text(max_nb_chars=300 + 1000),
volume=faker.text(max_nb_chars=50 + 1000),
_quantity=5,
)

for publication in publications:
assert len(publication.pub_title) > PUB_TITLE_MAX
assert len(publication.pub_type) > PUB_TYPE_MAX
assert len(publication.volume) > VOLUME_MAX
publication.save()
assert len(publication.pub_title) <= PUB_TITLE_MAX
assert len(publication.pub_type) <= PUB_TYPE_MAX
assert len(publication.volume) <= VOLUME_MAX

0 comments on commit 73894c1

Please sign in to comment.