From be1c9d3e67d86f4f81f8c37afc9ab383dd31f2e1 Mon Sep 17 00:00:00 2001 From: Timothee Cezard Date: Mon, 29 Jul 2024 13:45:40 +0100 Subject: [PATCH] EVA-3604 - contig alias get methods (#58) * New functions to retrieve information from the contig alias API * bump version --- .../contig_alias/contig_alias.py | 48 ++++++++++++++- setup.py | 2 +- tests/common/test_contig_alias.py | 59 +++++++++++++++++++ 3 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 tests/common/test_contig_alias.py diff --git a/ebi_eva_common_pyutils/contig_alias/contig_alias.py b/ebi_eva_common_pyutils/contig_alias/contig_alias.py index 0cb8648..6792ea4 100644 --- a/ebi_eva_common_pyutils/contig_alias/contig_alias.py +++ b/ebi_eva_common_pyutils/contig_alias/contig_alias.py @@ -22,6 +22,9 @@ class InternalServerError(Exception): pass +CONTING_ALIAS_URL = 'https://www.ebi.ac.uk/eva/webservices/contig-alias' + + # TODO add the get methods class ContigAliasClient(AppLogger): """ @@ -29,8 +32,13 @@ class ContigAliasClient(AppLogger): Authentication is required if using admin endpoints. """ - def __init__(self, base_url, username=None, password=None): - self.base_url = base_url + def __init__(self, base_url=None, username=None, password=None, default_page_size=1000): + if base_url: + self.base_url = base_url + else: + self.base_url = os.environ.get('CONTING_ALIAS_URL') or CONTING_ALIAS_URL + # Used for get method + self.default_page_size=default_page_size # Only required for admin endpoints self.username = username self.password = password @@ -69,3 +77,39 @@ def delete_assembly(self, assembly): raise InternalServerError else: self.error(f'Assembly accession {assembly} could not be deleted. Response: {response.text}') + + @retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3)) + def _get_page_for_contig_alias_url(self, sub_url, page=0): + """queries the contig alias to retrieve the page of the provided url""" + url = f'{self.base_url}/{sub_url}?page={page}&size={self.default_page_size}' + response = requests.get(url, headers={'accept': 'application/json'}) + response.raise_for_status() + response_json = response.json() + return response_json + + def _depaginate_iter(self, sub_url, entity_to_retrieve): + """Generator that provides the contigs in the assembly requested.""" + page = 0 + response_json = self._get_page_for_contig_alias_url(sub_url, page=page) + for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []): + yield entity + while 'next' in response_json['_links']: + page += 1 + response_json = self._get_page_for_contig_alias_url(sub_url, page=page) + for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []): + yield entity + + def assembly_contig_iter(self, assembly_accession): + """Generator that provides the contigs in the assembly requested.""" + sub_url = f'v1/assemblies/{assembly_accession}/chromosomes' + return self._depaginate_iter(sub_url, 'chromosomeEntities') + + def assembly(self, assembly_accession): + """provides the description of the requested assembly.""" + sub_url = f'v1/assemblies/{assembly_accession}' + response_json = self._get_page_for_contig_alias_url(sub_url) + return response_json.get('_embedded', {}).get('assemblyEntities', [])[0] + + def contig_iter(self, insdc_accession): + sub_url = f'v1/chromosomes/genbank/{insdc_accession}' + return self._depaginate_iter(sub_url, 'chromosomeEntities') diff --git a/setup.py b/setup.py index 9789fcc..7acf28e 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ name='ebi_eva_common_pyutils', scripts=[os.path.join(os.path.dirname(__file__), 'ebi_eva_internal_pyutils', 'archive_directory.py')], packages=find_packages(), - version='0.6.8.dev0', + version='0.6.8.dev1', license='Apache', description='EBI EVA - Common Python Utilities', url='https://github.com/EBIVariation/eva-common-pyutils', diff --git a/tests/common/test_contig_alias.py b/tests/common/test_contig_alias.py new file mode 100644 index 0000000..30f49c1 --- /dev/null +++ b/tests/common/test_contig_alias.py @@ -0,0 +1,59 @@ +import os +from collections.abc import Iterable +from unittest import TestCase + +from ebi_eva_common_pyutils.contig_alias.contig_alias import ContigAliasClient + + +class TestContigAliasClient(TestCase): + resources = os.path.join(os.path.dirname(__file__), 'resources') + + def setUp(self) -> None: + self.assembly_accession = 'GCA_000002945.2' + self.client = ContigAliasClient() + + def test_assembly_contig_iter(self): + iterator = self.client.assembly_contig_iter(self.assembly_accession) + assert isinstance(iterator, Iterable) + # print(list(iterator)) + assert [e.get('genbankSequenceName') for e in iterator] == ['I', 'II', 'III', 'MT'] + + def test_assembly(self): + assembly = self.client.assembly(self.assembly_accession) + assert assembly == { + 'insdcAccession': 'GCA_000002945.2', + 'name': 'ASM294v2', + 'organism': 'Schizosaccharomyces pombe (fission yeast)', + 'taxid': 4896, + 'refseq': 'GCF_000002945.1', + 'md5checksum': None, + 'trunc512checksum': None, + 'genbankRefseqIdentical': True + } + + def test_contig_iter(self): + iterator = self.client.contig_iter('CU329670.1') + assert isinstance(iterator, Iterable) + contig = next(iterator) + assert contig == { + 'genbankSequenceName': 'I', + 'enaSequenceName': 'I', + 'insdcAccession': 'CU329670.1', + 'refseq': 'NC_003424.3', + 'seqLength': 5579133, + 'ucscName': None, + 'md5checksum': 'a5bc80a74aae8fd7622290b11dbc8ab3', + 'trunc512checksum': None, + 'contigType': 'CHROMOSOME', + 'assembly': { + 'insdcAccession': 'GCA_000002945.2', + 'name': 'ASM294v2', + 'organism': 'Schizosaccharomyces pombe (fission yeast)', + 'taxid': 4896, + 'refseq': 'GCF_000002945.1', + 'md5checksum': None, + 'trunc512checksum': None, + 'genbankRefseqIdentical': True + } + } +