Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Europe PMC Publication annotations #233

Merged
merged 7 commits into from
Oct 12, 2021
75 changes: 75 additions & 0 deletions emgapi/europe_pmc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import itertools

import requests
from django.http import Http404

TITLE = 'title'
DESCRIPTION = 'description'
ANNOTATIONS = 'annotations'

# based on http://blog.europepmc.org/2020/11/europe-pmc-publications-metagenomics-annotations.html
annotation_type_humanize_map = {
'Sample-Material': {TITLE: 'Sample material', DESCRIPTION: 'Sample from which the microbiome is extracted'},
'Body-Site': {TITLE: 'Body site', DESCRIPTION: 'Host body region/structure where microbiome is found'},
'Host': {TITLE: 'Host', DESCRIPTION: 'The organism where the microbiome is found'},
'Engineered': {TITLE: 'Engineered environment', DESCRIPTION: 'Microbiome’s man-made environment'},
'Ecoregion': {TITLE: 'Ecoregion', DESCRIPTION: 'Microbiome’s natural environment'},
'Date': {TITLE: 'Date', DESCRIPTION: 'Sampling date'},
'Place': {TITLE: 'Place', DESCRIPTION: 'Microbiome’s place or geocoordinates'},
'Site': {TITLE: 'Site', DESCRIPTION: 'Microbiome’s site within place'},
'State': {TITLE: 'State', DESCRIPTION: 'Host/Environment state'},
'Treatment': {TITLE: 'Treatment', DESCRIPTION: 'Host/Environment treatments'},
'Kit': {TITLE: 'Kit', DESCRIPTION: 'Nucleic acid extraction-kit'},
'Gene': {TITLE: 'Gene', DESCRIPTION: 'Target gene(s) (e.g. hypervariable regions of 16s/18s rRNA gene)'},
'Primer': {TITLE: 'Primer', DESCRIPTION: 'PCR primers'},
'LS': {TITLE: 'Library strategy', DESCRIPTION: 'e.g. aplicon, whole metagenome'},
'LCM': {TITLE: 'Library construction method', DESCRIPTION: 'e.g. paired-end, single-end'},
'Sequencing': {TITLE: 'Sequencing platform', DESCRIPTION: ''},
}

# sample processing annotations tend to be more accurate than others.
sample_processing_annotation_types = ['Sequencing', 'LS', 'LCM', 'Kit', 'Primer']


def get_publication_annotations(pubmed_id):
"""
Fetch EMERALD-provided Europe PMC metagenomics annotations for a paper, and group them by type.
:param pubmed_id: the publication identified in pubmed
:return: grouped and sorted annotations, dict of lists of dicts
"""
epmc = requests.get('https://www.ebi.ac.uk/europepmc/annotations_api/annotationsByArticleIds', params={
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
'articleIds': f'MED:{pubmed_id}',
'provider': 'Metagenomics'
mberacochea marked this conversation as resolved.
Show resolved Hide resolved
})
try:
assert epmc.status_code == 200
annotations = epmc.json()[0][ANNOTATIONS]
except (AssertionError, KeyError, IndexError):
raise Http404

# Group by annotation type, sort within group by icase annotation text
grouped_annotations = {
anno_type: sorted([anno for anno in annots], key=lambda anno: anno.get('exact', '').lower())
for anno_type, annots
in itertools.groupby(annotations, key=lambda annotation: annotation.get('type', 'Other'))
}

# Split off special sample processing annotation groups
sample_processing_annotations = []
other_annotations = []

for anno_type, annots in grouped_annotations.items():
humanized_annotation_group = {
**annotation_type_humanize_map.get(anno_type, {TITLE: anno_type, DESCRIPTION: ''}),
ANNOTATIONS: annots
}
if anno_type in sample_processing_annotation_types:
sample_processing_annotations.append(humanized_annotation_group)
else:
other_annotations.append(humanized_annotation_group)

# Sort each group by highest number of annotations of that type
sample_processing_annotations.sort(key=lambda group: len(group.get(ANNOTATIONS, [])), reverse=True)
other_annotations.sort(key=lambda group: len(group.get(ANNOTATIONS, [])), reverse=True)

return {'sample_processing': sample_processing_annotations, 'other': other_annotations}
11 changes: 10 additions & 1 deletion emgapi/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import logging
import inflection
Expand Down Expand Up @@ -49,6 +48,7 @@
from . import utils as emg_utils
from . import renderers as emg_renderers
from . import filters as emg_filters
from .europe_pmc import get_publication_annotations
from .sourmash import validate_sourmash_signature, save_signature, send_sourmash_jobs, get_sourmash_job_status, \
get_result_file

Expand Down Expand Up @@ -1164,6 +1164,15 @@ def list(self, request, *args, **kwargs):
"""
return super(PublicationViewSet, self).list(request, *args, **kwargs)

@action(
detail=True,
methods=['get', ]
)
def europe_pmc_annotations(self, request, pubmed_id=None):
if not pubmed_id:
raise Http404
return Response(data=get_publication_annotations(pubmed_id))


class GenomeCatalogueViewSet(mixins.RetrieveModelMixin,
emg_mixins.ListModelMixin,
Expand Down
3 changes: 2 additions & 1 deletion emgcli/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def create_secret_key(var_dir):
'rest_framework_mongoengine',
'rest_framework_jwt',
'django_filters',
'rest_framework_json_api',
# apps
'emgapi',
'emgena',
Expand Down Expand Up @@ -364,7 +365,7 @@ def create_secret_key(var_dir):
# 'rest_framework_xml.renderers.XMLRenderer',
# 'rest_framework_yaml.renderers.YAMLRenderer',
'emgapi.renderers.CSVStreamingRenderer',
'rest_framework.renderers.BrowsableAPIRenderer',
'rest_framework_json_api.renderers.BrowsableAPIRenderer',
),

'DEFAULT_FILTER_BACKENDS': (
Expand Down
42 changes: 41 additions & 1 deletion tests/api/test_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,57 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

from django.urls import reverse
from model_bakery import baker

from rest_framework import status
from rest_framework.test import APITestCase


class MockEuropePMCResponse:
status_code = 200

@staticmethod
def json():
return [
{
'annotations': [
{
'prefix': 'Love is required whenever he’s ',
'exact': 'sequenced',
'postfix': '. It comes just before the assembly.',
'type': 'LS',
}
]
}
]


class TestPublicationAPI(APITestCase):
def setUp(self):
baker.make(
'emgapi.Publication',
pk=7,
pubmed_id='007',
pub_title='The man with the golden metagenome',
authors='Bond, J; Moneypenny, J; et al'
)

def test_default(self):
url = reverse('emgapi_v1:publications-list')
response = self.client.get(url)
assert response.status_code == status.HTTP_200_OK

@mock.patch('emgapi.europe_pmc.requests.get')
def test_europe_pmc_annotations(self, mock_get):
mock_get.return_value = MockEuropePMCResponse()
url = reverse('emgapi_v1:publications-europe-pmc-annotations', args=('007',))
response = self.client.get(url)
self.assertEqual(response.status_code, status.HTTP_200_OK)
annotations = response.json()
self.assertIn('sample_processing', annotations['data'])
first_group = annotations['data']['sample_processing'][0]
self.assertEqual(first_group['title'], 'Library strategy')
self.assertEqual(len(first_group['annotations']), 1)