From 57c6caca83e1ff62bd857933cb42d78812cae62f Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Thu, 7 Nov 2024 10:56:05 +0000 Subject: [PATCH] Print message when the list of cohorts from the spreadsheet (for a GWAS study) is merged with the list of cohorts from the GWAS Catalog REST API --- curation/template_parser.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/curation/template_parser.py b/curation/template_parser.py index 6e93d31..b641551 100644 --- a/curation/template_parser.py +++ b/curation/template_parser.py @@ -321,19 +321,26 @@ def get_gwas_study(self,gcst_id:str,spreadsheet_cohorts:list,spreadsheet_name:st source_PMID = response_data['publicationInfo']['pubmedId'] # Update the Cohorts list found in the cohort column of the spreadsheet by # adding the list of cohorts from the GWAS study (if the list is present) - cohorts_list = spreadsheet_cohorts + cohorts_list = spreadsheet_cohorts.copy() if 'cohort' in response_data.keys(): cohorts = response_data['cohort'].split('|') for cohort in cohorts: cohort_id = cohort.upper() # Check if cohort in list of cohort references - # # and if the cohort is already in the list provided by th author + # and if the cohort is already in the list provided by the author if cohort_id in self.parsed_cohorts: if cohort_id not in spreadsheet_cohorts_names: cohorts_list.append(self.parsed_cohorts[cohort_id]) else: self.report_error(spreadsheet_name, f'Error: the GWAS Catalog sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet') - + # Print a message if the list of Cohorts from the spreadsheet and from GWAS Catalog (REST API) have been merged. + if spreadsheet_cohorts and len(spreadsheet_cohorts) != len(cohorts_list): + msg = f'''GWAS study {gcst_id} -> the list of cohorts from the spreadsheet has been merged with the one from GWAS. + \t- Spreadsheet list: {', '.join(sorted(spreadsheet_cohorts_names))} + \t+ Merged GWAS list: {', '.join(sorted([x.name.upper() for x in cohorts_list]))}''' + self.report_warning(spreadsheet_name, msg) + + # Ancestry information for ancestry in response_data['ancestries']: if ancestry['type'] != 'initial':