Skip to content

Commit

Permalink
Europe PMC Publication annotations (#233)
Browse files Browse the repository at this point in the history
* use DRFJsonApi browsable API

* adds Publication detail endpoint to proxy Europe PMC annotations request

* limits Europe PMC annotations to metagenomics, groups by type, and sorts alphabetically

* moves Europe PMC annotation logic to separate src file, changes grouping/sorting/humanizing

* adds unit test / mock for europe pmc annotations

* moves Europe PMC annotations endpoint & provider strings to settings.py

* version bump -> 2.0.1
  • Loading branch information
SandyRogers authored Oct 12, 2021
1 parent 6b7f348 commit 361a788
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 4 deletions.
76 changes: 76 additions & 0 deletions emgapi/europe_pmc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import itertools

import requests
from django.conf import settings
from django.http import Http404

TITLE = 'title'
DESCRIPTION = 'description'
ANNOTATIONS = 'annotations'

# based on http://blog.europepmc.org/2020/11/europe-pmc-publications-metagenomics-annotations.html
annotation_type_humanize_map = {
'Sample-Material': {TITLE: 'Sample material', DESCRIPTION: 'Sample from which the microbiome is extracted'},
'Body-Site': {TITLE: 'Body site', DESCRIPTION: 'Host body region/structure where microbiome is found'},
'Host': {TITLE: 'Host', DESCRIPTION: 'The organism where the microbiome is found'},
'Engineered': {TITLE: 'Engineered environment', DESCRIPTION: 'Microbiome’s man-made environment'},
'Ecoregion': {TITLE: 'Ecoregion', DESCRIPTION: 'Microbiome’s natural environment'},
'Date': {TITLE: 'Date', DESCRIPTION: 'Sampling date'},
'Place': {TITLE: 'Place', DESCRIPTION: 'Microbiome’s place or geocoordinates'},
'Site': {TITLE: 'Site', DESCRIPTION: 'Microbiome’s site within place'},
'State': {TITLE: 'State', DESCRIPTION: 'Host/Environment state'},
'Treatment': {TITLE: 'Treatment', DESCRIPTION: 'Host/Environment treatments'},
'Kit': {TITLE: 'Kit', DESCRIPTION: 'Nucleic acid extraction-kit'},
'Gene': {TITLE: 'Gene', DESCRIPTION: 'Target gene(s) (e.g. hypervariable regions of 16s/18s rRNA gene)'},
'Primer': {TITLE: 'Primer', DESCRIPTION: 'PCR primers'},
'LS': {TITLE: 'Library strategy', DESCRIPTION: 'e.g. aplicon, whole metagenome'},
'LCM': {TITLE: 'Library construction method', DESCRIPTION: 'e.g. paired-end, single-end'},
'Sequencing': {TITLE: 'Sequencing platform', DESCRIPTION: ''},
}

# sample processing annotations tend to be more accurate than others.
sample_processing_annotation_types = ['Sequencing', 'LS', 'LCM', 'Kit', 'Primer']


def get_publication_annotations(pubmed_id):
"""
Fetch EMERALD-provided Europe PMC metagenomics annotations for a paper, and group them by type.
:param pubmed_id: the publication identified in pubmed
:return: grouped and sorted annotations, dict of lists of dicts
"""
epmc = requests.get(settings.EUROPE_PMC['annotations_endpoint'], params={
'articleIds': f'MED:{pubmed_id}',
'provider': settings.EUROPE_PMC['annotations_provider']
})
try:
assert epmc.status_code == 200
annotations = epmc.json()[0][ANNOTATIONS]
except (AssertionError, KeyError, IndexError):
raise Http404

# Group by annotation type, sort within group by icase annotation text
grouped_annotations = {
anno_type: sorted([anno for anno in annots], key=lambda anno: anno.get('exact', '').lower())
for anno_type, annots
in itertools.groupby(annotations, key=lambda annotation: annotation.get('type', 'Other'))
}

# Split off special sample processing annotation groups
sample_processing_annotations = []
other_annotations = []

for anno_type, annots in grouped_annotations.items():
humanized_annotation_group = {
**annotation_type_humanize_map.get(anno_type, {TITLE: anno_type, DESCRIPTION: ''}),
ANNOTATIONS: annots
}
if anno_type in sample_processing_annotation_types:
sample_processing_annotations.append(humanized_annotation_group)
else:
other_annotations.append(humanized_annotation_group)

# Sort each group by highest number of annotations of that type
sample_processing_annotations.sort(key=lambda group: len(group.get(ANNOTATIONS, [])), reverse=True)
other_annotations.sort(key=lambda group: len(group.get(ANNOTATIONS, [])), reverse=True)

return {'sample_processing': sample_processing_annotations, 'other': other_annotations}
11 changes: 10 additions & 1 deletion emgapi/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import logging
import inflection
Expand Down Expand Up @@ -49,6 +48,7 @@
from . import utils as emg_utils
from . import renderers as emg_renderers
from . import filters as emg_filters
from .europe_pmc import get_publication_annotations
from .sourmash import validate_sourmash_signature, save_signature, send_sourmash_jobs, get_sourmash_job_status, \
get_result_file

Expand Down Expand Up @@ -1164,6 +1164,15 @@ def list(self, request, *args, **kwargs):
"""
return super(PublicationViewSet, self).list(request, *args, **kwargs)

@action(
detail=True,
methods=['get', ]
)
def europe_pmc_annotations(self, request, pubmed_id=None):
if not pubmed_id:
raise Http404
return Response(data=get_publication_annotations(pubmed_id))


class GenomeCatalogueViewSet(mixins.RetrieveModelMixin,
emg_mixins.ListModelMixin,
Expand Down
11 changes: 10 additions & 1 deletion emgcli/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def create_secret_key(var_dir):
'rest_framework_mongoengine',
'rest_framework_jwt',
'django_filters',
'rest_framework_json_api',
# apps
'emgapi',
'emgena',
Expand Down Expand Up @@ -364,7 +365,7 @@ def create_secret_key(var_dir):
# 'rest_framework_xml.renderers.XMLRenderer',
# 'rest_framework_yaml.renderers.YAMLRenderer',
'emgapi.renderers.CSVStreamingRenderer',
'rest_framework.renderers.BrowsableAPIRenderer',
'rest_framework_json_api.renderers.BrowsableAPIRenderer',
),

'DEFAULT_FILTER_BACKENDS': (
Expand Down Expand Up @@ -636,3 +637,11 @@ def create_secret_key(var_dir):
"celery_broker": "redis://localhost:6379/0",
"celery_backend": "redis://localhost:6379/0",
}

try:
EUROPE_PMC = EMG_CONF['emg']['europe_pmc']
except KeyError:
EUROPE_PMC = {
"annotations_endpoint": 'https://www.ebi.ac.uk/europepmc/annotations_api/annotationsByArticleIds',
"annotations_provider": "Metagenomics"
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
_requirements = os.path.join(_base, 'requirements.txt')
_requirements_test = os.path.join(_base, 'requirements-test.txt')

version = "2.0.0"
version = "2.0.1"

install_requirements = []
with open(_requirements) as f:
Expand Down
42 changes: 41 additions & 1 deletion tests/api/test_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,57 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

from django.urls import reverse
from model_bakery import baker

from rest_framework import status
from rest_framework.test import APITestCase


class MockEuropePMCResponse:
status_code = 200

@staticmethod
def json():
return [
{
'annotations': [
{
'prefix': 'Love is required whenever he’s ',
'exact': 'sequenced',
'postfix': '. It comes just before the assembly.',
'type': 'LS',
}
]
}
]


class TestPublicationAPI(APITestCase):
def setUp(self):
baker.make(
'emgapi.Publication',
pk=7,
pubmed_id='007',
pub_title='The man with the golden metagenome',
authors='Bond, J; Moneypenny, J; et al'
)

def test_default(self):
url = reverse('emgapi_v1:publications-list')
response = self.client.get(url)
assert response.status_code == status.HTTP_200_OK

@mock.patch('emgapi.europe_pmc.requests.get')
def test_europe_pmc_annotations(self, mock_get):
mock_get.return_value = MockEuropePMCResponse()
url = reverse('emgapi_v1:publications-europe-pmc-annotations', args=('007',))
response = self.client.get(url)
self.assertEqual(response.status_code, status.HTTP_200_OK)
annotations = response.json()
self.assertIn('sample_processing', annotations['data'])
first_group = annotations['data']['sample_processing'][0]
self.assertEqual(first_group['title'], 'Library strategy')
self.assertEqual(len(first_group['annotations']), 1)

0 comments on commit 361a788

Please sign in to comment.