Skip to content

Commit

Permalink
Checking if europepmc result is valid if multiple
Browse files Browse the repository at this point in the history
  • Loading branch information
fyvon committed Dec 5, 2024
1 parent 804d121 commit 585e256
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion curation/parsers/publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,22 @@ def rest_api_call_to_epmc(self,query):
result = requests.get(constants.USEFUL_URLS['EPMC_REST_SEARCH'], params=payload)
result = result.json()
if 'result' in result['resultList']:
return result['resultList']['result'][0]
if len(result['resultList']['result']) > 1:
# If multiple results, the first one might be a PMC entry, which doesn't contain PMID or DOI, therefore needs to be skipped.
if query.startswith('doi:'):
query_id = query.removeprefix('doi:')
id_type = 'doi'
elif query.startswith('ext_id:'):
query_id = query.removeprefix('ext_id:')
id_type = 'pmid'
else:
raise Exception('Unexpected query format: {}'.format(query))
for single_result in result['resultList']['result']:
if id_type in single_result and single_result[id_type] == query_id:
return single_result
raise Exception('Results from EuropePMC for {} not in the expected format.'.format(query))
else:
return result['resultList']['result'][0]
else:
raise Exception(f'Can\'t find the paper in EuropePMC! (query:{query})')

Expand Down

0 comments on commit 585e256

Please sign in to comment.