Skip to content

Commit

Permalink
EVA-3604 - contig alias get methods (#58)
Browse files Browse the repository at this point in the history
* New functions to retrieve information from the contig alias API

* bump version
  • Loading branch information
tcezard authored Jul 29, 2024
1 parent 55b6ddd commit be1c9d3
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 3 deletions.
48 changes: 46 additions & 2 deletions ebi_eva_common_pyutils/contig_alias/contig_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,23 @@ class InternalServerError(Exception):
pass


CONTING_ALIAS_URL = 'https://www.ebi.ac.uk/eva/webservices/contig-alias'


# TODO add the get methods
class ContigAliasClient(AppLogger):
"""
Python client for interfacing with the contig alias service.
Authentication is required if using admin endpoints.
"""

def __init__(self, base_url, username=None, password=None):
self.base_url = base_url
def __init__(self, base_url=None, username=None, password=None, default_page_size=1000):
if base_url:
self.base_url = base_url
else:
self.base_url = os.environ.get('CONTING_ALIAS_URL') or CONTING_ALIAS_URL
# Used for get method
self.default_page_size=default_page_size
# Only required for admin endpoints
self.username = username
self.password = password
Expand Down Expand Up @@ -69,3 +77,39 @@ def delete_assembly(self, assembly):
raise InternalServerError
else:
self.error(f'Assembly accession {assembly} could not be deleted. Response: {response.text}')

@retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
def _get_page_for_contig_alias_url(self, sub_url, page=0):
"""queries the contig alias to retrieve the page of the provided url"""
url = f'{self.base_url}/{sub_url}?page={page}&size={self.default_page_size}'
response = requests.get(url, headers={'accept': 'application/json'})
response.raise_for_status()
response_json = response.json()
return response_json

def _depaginate_iter(self, sub_url, entity_to_retrieve):
"""Generator that provides the contigs in the assembly requested."""
page = 0
response_json = self._get_page_for_contig_alias_url(sub_url, page=page)
for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []):
yield entity
while 'next' in response_json['_links']:
page += 1
response_json = self._get_page_for_contig_alias_url(sub_url, page=page)
for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []):
yield entity

def assembly_contig_iter(self, assembly_accession):
"""Generator that provides the contigs in the assembly requested."""
sub_url = f'v1/assemblies/{assembly_accession}/chromosomes'
return self._depaginate_iter(sub_url, 'chromosomeEntities')

def assembly(self, assembly_accession):
"""provides the description of the requested assembly."""
sub_url = f'v1/assemblies/{assembly_accession}'
response_json = self._get_page_for_contig_alias_url(sub_url)
return response_json.get('_embedded', {}).get('assemblyEntities', [])[0]

def contig_iter(self, insdc_accession):
sub_url = f'v1/chromosomes/genbank/{insdc_accession}'
return self._depaginate_iter(sub_url, 'chromosomeEntities')
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
name='ebi_eva_common_pyutils',
scripts=[os.path.join(os.path.dirname(__file__), 'ebi_eva_internal_pyutils', 'archive_directory.py')],
packages=find_packages(),
version='0.6.8.dev0',
version='0.6.8.dev1',
license='Apache',
description='EBI EVA - Common Python Utilities',
url='https://github.com/EBIVariation/eva-common-pyutils',
Expand Down
59 changes: 59 additions & 0 deletions tests/common/test_contig_alias.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os
from collections.abc import Iterable
from unittest import TestCase

from ebi_eva_common_pyutils.contig_alias.contig_alias import ContigAliasClient


class TestContigAliasClient(TestCase):
resources = os.path.join(os.path.dirname(__file__), 'resources')

def setUp(self) -> None:
self.assembly_accession = 'GCA_000002945.2'
self.client = ContigAliasClient()

def test_assembly_contig_iter(self):
iterator = self.client.assembly_contig_iter(self.assembly_accession)
assert isinstance(iterator, Iterable)
# print(list(iterator))
assert [e.get('genbankSequenceName') for e in iterator] == ['I', 'II', 'III', 'MT']

def test_assembly(self):
assembly = self.client.assembly(self.assembly_accession)
assert assembly == {
'insdcAccession': 'GCA_000002945.2',
'name': 'ASM294v2',
'organism': 'Schizosaccharomyces pombe (fission yeast)',
'taxid': 4896,
'refseq': 'GCF_000002945.1',
'md5checksum': None,
'trunc512checksum': None,
'genbankRefseqIdentical': True
}

def test_contig_iter(self):
iterator = self.client.contig_iter('CU329670.1')
assert isinstance(iterator, Iterable)
contig = next(iterator)
assert contig == {
'genbankSequenceName': 'I',
'enaSequenceName': 'I',
'insdcAccession': 'CU329670.1',
'refseq': 'NC_003424.3',
'seqLength': 5579133,
'ucscName': None,
'md5checksum': 'a5bc80a74aae8fd7622290b11dbc8ab3',
'trunc512checksum': None,
'contigType': 'CHROMOSOME',
'assembly': {
'insdcAccession': 'GCA_000002945.2',
'name': 'ASM294v2',
'organism': 'Schizosaccharomyces pombe (fission yeast)',
'taxid': 4896,
'refseq': 'GCF_000002945.1',
'md5checksum': None,
'trunc512checksum': None,
'genbankRefseqIdentical': True
}
}

0 comments on commit be1c9d3

Please sign in to comment.