From 585e25687ed53c996cf9e3dd710fd788c5b274d3 Mon Sep 17 00:00:00 2001 From: Florent Yvon Date: Thu, 5 Dec 2024 15:23:52 +0000 Subject: [PATCH] Checking if europepmc result is valid if multiple --- curation/parsers/publication.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/curation/parsers/publication.py b/curation/parsers/publication.py index adc67c23..ed78a6b9 100644 --- a/curation/parsers/publication.py +++ b/curation/parsers/publication.py @@ -77,7 +77,22 @@ def rest_api_call_to_epmc(self,query): result = requests.get(constants.USEFUL_URLS['EPMC_REST_SEARCH'], params=payload) result = result.json() if 'result' in result['resultList']: - return result['resultList']['result'][0] + if len(result['resultList']['result']) > 1: + # If multiple results, the first one might be a PMC entry, which doesn't contain PMID or DOI, therefore needs to be skipped. + if query.startswith('doi:'): + query_id = query.removeprefix('doi:') + id_type = 'doi' + elif query.startswith('ext_id:'): + query_id = query.removeprefix('ext_id:') + id_type = 'pmid' + else: + raise Exception('Unexpected query format: {}'.format(query)) + for single_result in result['resultList']['result']: + if id_type in single_result and single_result[id_type] == query_id: + return single_result + raise Exception('Results from EuropePMC for {} not in the expected format.'.format(query)) + else: + return result['resultList']['result'][0] else: raise Exception(f'Can\'t find the paper in EuropePMC! (query:{query})')