From b1c9eaa6b8923113819cd6238950e2d37e7ec800 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Thu, 17 Oct 2024 12:00:08 +0100 Subject: [PATCH 1/9] Fetcht the list of cohorts from GWAS Catalog REST API if available --- curation/template_parser.py | 142 ++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 61 deletions(-) diff --git a/curation/template_parser.py b/curation/template_parser.py index 76b78c09..fdbcca9d 100644 --- a/curation/template_parser.py +++ b/curation/template_parser.py @@ -201,12 +201,14 @@ def extract_samples(self): sample_keys = sample_data.data.keys() if 'sample_number' not in sample_keys: if 'source_GWAS_catalog' in sample_keys: - gwas_study = get_gwas_study(sample_data.data['source_GWAS_catalog']) + gwas_study = self.get_gwas_study(sample_data.data['source_GWAS_catalog'],spreadsheet_name) if gwas_study: for gwas_ancestry in gwas_study: c_sample = SampleData(spreadsheet_name) + # Spreadsheet sample/cohort data for col, entry in sample_data.data.items(): c_sample.add_data(col, entry) + # GWAS Catalog sample/cohort data for field, val in gwas_ancestry.items(): c_sample.add_data(field, val) self.update_report(c_sample) @@ -289,6 +291,84 @@ def get_sample_data(self, sample_info, current_schema, spreadsheet_name, samples return sample_data + def get_gwas_study(self,gcst_id:str,spreadsheet_name:str) -> dict: + """ + Get the GWAS Study information related to the PGS sample. + Check that all the required data is available + > Parameter: + - gcst_id: GWAS Study ID (e.g. GCST010127) + - spreadsheet_name: Spreadsheet name for report (e.g. Sample Descriptions) + > Return: list of dictionnaries (1 per ancestry) + """ + study_data = [] + gwas_rest_url = 'https://www.ebi.ac.uk/gwas/rest/api/studies/' + response = requests.get(f'{gwas_rest_url}{gcst_id}') + + if not response: + return study_data + response_data = response.json() + if response_data: + try: + source_PMID = response_data['publicationInfo']['pubmedId'] + + # Create list of cohorts if it exists in the GWAS study + # This override the Cohorts found in the cohort column in the spreadsheet + cohorts_list = [] + if 'cohort' in response_data.keys(): + cohorts = response_data['cohort'].split('|') + for cohort in cohorts: + cohort_id = cohort.upper() + if cohort_id in self.parsed_cohorts: + cohorts_list.append(self.parsed_cohorts[cohort_id]) + else: + self.report_error(spreadsheet_name, f'Error: the GWAS Catalog sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet') + + for ancestry in response_data['ancestries']: + + if ancestry['type'] != 'initial': + continue + + ancestry_data = { 'source_PMID': source_PMID } + # Add cohorts list + if cohorts_list: + ancestry_data['cohorts'] = cohorts_list + ancestry_data['sample_number'] = ancestry['numberOfIndividuals'] + + # ancestry_broad + for ancestralGroup in ancestry['ancestralGroups']: + if not 'ancestry_broad' in ancestry_data: + ancestry_data['ancestry_broad'] = '' + else: + ancestry_data['ancestry_broad'] += ',' + ancestry_data['ancestry_broad'] += ancestralGroup['ancestralGroup'] + + # ancestry_free + for countryOfOrigin in ancestry['countryOfOrigin']: + if countryOfOrigin['countryName'] != 'NR': + if not 'ancestry_free' in ancestry_data: + ancestry_data['ancestry_free'] = '' + else: + ancestry_data['ancestry_free'] += ',' + ancestry_data['ancestry_free'] += countryOfOrigin['countryName'] + + # ancestry_country + for countryOfRecruitment in ancestry['countryOfRecruitment']: + if countryOfRecruitment['countryName'] != 'NR': + if not 'ancestry_country' in ancestry_data: + ancestry_data['ancestry_country'] = '' + else: + ancestry_data['ancestry_country'] += ',' + ancestry_data['ancestry_country'] += countryOfRecruitment['countryName'] + # ancestry_additional + # Not found in the REST API + + study_data.append(ancestry_data) + except: + print(f'Error: can\'t fetch GWAS results for {gcst_id}') + return study_data + + + def get_model_field_from_schema(self, col, current_schema): ''' Retrieve the model and field from the Template, that corresponds to the current spreadsheet column. @@ -370,66 +450,6 @@ def has_report_info(self): # Independent methods # #=======================# -def get_gwas_study(gcst_id): - """ - Get the GWAS Study information related to the PGS sample. - Check that all the required data is available - > Parameter: - - gcst_id: GWAS Study ID (e.g. GCST010127) - > Return: list of dictionnaries (1 per ancestry) - """ - study_data = [] - gwas_rest_url = 'https://www.ebi.ac.uk/gwas/rest/api/studies/' - response = requests.get(f'{gwas_rest_url}{gcst_id}') - - if not response: - return study_data - response_data = response.json() - if response_data: - try: - source_PMID = response_data['publicationInfo']['pubmedId'] - for ancestry in response_data['ancestries']: - - if ancestry['type'] != 'initial': - continue - - ancestry_data = { 'source_PMID': source_PMID } - ancestry_data['sample_number'] = ancestry['numberOfIndividuals'] - - # ancestry_broad - for ancestralGroup in ancestry['ancestralGroups']: - if not 'ancestry_broad' in ancestry_data: - ancestry_data['ancestry_broad'] = '' - else: - ancestry_data['ancestry_broad'] += ',' - ancestry_data['ancestry_broad'] += ancestralGroup['ancestralGroup'] - - # ancestry_free - for countryOfOrigin in ancestry['countryOfOrigin']: - if countryOfOrigin['countryName'] != 'NR': - if not 'ancestry_free' in ancestry_data: - ancestry_data['ancestry_free'] = '' - else: - ancestry_data['ancestry_free'] += ',' - ancestry_data['ancestry_free'] += countryOfOrigin['countryName'] - - # ancestry_country - for countryOfRecruitment in ancestry['countryOfRecruitment']: - if countryOfRecruitment['countryName'] != 'NR': - if not 'ancestry_country' in ancestry_data: - ancestry_data['ancestry_country'] = '' - else: - ancestry_data['ancestry_country'] += ',' - ancestry_data['ancestry_country'] += countryOfRecruitment['countryName'] - # ancestry_additional - # Not found in the REST API - - study_data.append(ancestry_data) - except: - print(f'Error: can\'t fetch GWAS results for {gcst_id}') - return study_data - - def next_PSS_num(): r = SampleSet.objects.last() if r == None: From 0bb94ce12742740d87f55c66cac077d317ef2897 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Thu, 17 Oct 2024 12:08:29 +0100 Subject: [PATCH 2/9] Use the list of cohorts from the GWAS Catalog REST API while updating the record, if available --- release/scripts/UpdateGwasStudies.py | 64 ++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/release/scripts/UpdateGwasStudies.py b/release/scripts/UpdateGwasStudies.py index 99308a77..823620a7 100644 --- a/release/scripts/UpdateGwasStudies.py +++ b/release/scripts/UpdateGwasStudies.py @@ -1,7 +1,5 @@ import requests -from catalog.models import Sample, Score -from pgs_web import constants - +from catalog.models import Sample, Score, Cohort class UpdateGwasStudies: @@ -16,15 +14,15 @@ def __init__(self,verbose=None): self.verbose = verbose - def get_gwas_info(self,sample): + def get_gwas_info(self,sample:Sample) -> dict: """ Get the GWAS Study information related to the PGS sample. Check that all the required data is available > Parameter: - - gcst_id: GWAS Study ID (e.g. GCST010127) - > Return: list of dictionnaries (1 per ancestry) + - sample: instance of a Sample model + > Return: dictionary (cohorts and ancestries) """ - study_data = [] + study_data = { "ancestries": [] } gcst_id = sample.source_GWAS_catalog response = requests.get(f'{self.gwas_rest_url}{gcst_id}') @@ -37,6 +35,26 @@ def get_gwas_info(self,sample): if response_data: try: source_PMID = response_data['publicationInfo']['pubmedId'] + + # Create list of cohorts if it exists in the GWAS study + # This override the Cohorts found previously in the cohort column in the spreadsheet + cohorts_list = [] + if 'cohort' in response_data.keys(): + cohorts = response_data['cohort'].split('|') + for cohort in cohorts: + cohort_id = cohort.upper() + try: + cohort_model = Cohort.objects.get(name_short__iexact=cohort_id) + cohorts_list.append(cohort_model) + except Cohort.DoesNotExist: + print(f"New cohort found: {cohort_id}") + cohort_model = Cohort(name_short=cohort_id,name_full=cohort_id) + cohort_model.save() + cohorts_list.append(cohort_model) + if cohorts_list: + study_data['cohorts'] = cohorts_list + + # Ancestries for ancestry in response_data['ancestries']: if ancestry['type'] != 'initial': @@ -70,12 +88,12 @@ def get_gwas_info(self,sample): else: ancestry_data['ancestry_country'] += self.country_sep ancestry_data['ancestry_country'] += countryOfRecruitment['countryName'] - study_data.append(ancestry_data) + study_data["ancestries"].append(ancestry_data) - if study_data: - print(f'\t{len(study_data)} distinct ancestries') + if study_data["ancestries"]: + print(f'\t{len(study_data["ancestries"])} distinct ancestries') if self.verbose: - for anc in study_data: + for anc in study_data["ancestries"]: print(f'\t{anc}') else: print("\tNo ancestry") @@ -90,7 +108,12 @@ def update_studies(self): for sample in self.samples: gwas_study = self.get_gwas_info(sample) new_samples = [] - for gwas_ancestry in gwas_study: + cohorts_list = [] + # List of cohorts + if 'cohorts' in gwas_study.keys(): + cohorts_list = gwas_study['cohorts'] + # List of ancestry data + for gwas_ancestry in gwas_study['ancestries']: new_sample = Sample() new_sample.source_GWAS_catalog = sample.source_GWAS_catalog for field, val in gwas_ancestry.items(): @@ -99,11 +122,18 @@ def update_studies(self): setattr(new_sample, field, val) new_sample.save() - # Cohorts - need to be added once the Sample object as been saved, - # i.e. when the Sample `id` has been created - if sample.cohorts: - for cohort in sample.cohorts.all(): - new_sample.cohorts.add(cohort) + # Cohorts data + if cohorts_list or sample.cohorts: + # Use the list of cohorts from the GWAS study (if available) + # Override the list of cohorts found in the existing sample + if cohorts_list: + new_sample.cohorts.set(cohorts_list) + # Copy the list of cohorts from the existing sample. + # Need to be added once the new Sample object as been saved, + # i.e. when the Sample `id` has been created + elif sample.cohorts: + for cohort in sample.cohorts.all(): + new_sample.cohorts.add(cohort) new_sample.save() new_samples.append(new_sample) From 69d8bf6b334d3bb8796f676bad9e26d9add2bfc9 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Fri, 18 Oct 2024 14:38:05 +0100 Subject: [PATCH 3/9] Print a message if the new list of cohorts (from GWAS) differs from the one alreday stored in the database --- release/scripts/UpdateGwasStudies.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/release/scripts/UpdateGwasStudies.py b/release/scripts/UpdateGwasStudies.py index 823620a7..f18f52df 100644 --- a/release/scripts/UpdateGwasStudies.py +++ b/release/scripts/UpdateGwasStudies.py @@ -128,6 +128,14 @@ def update_studies(self): # Override the list of cohorts found in the existing sample if cohorts_list: new_sample.cohorts.set(cohorts_list) + # Print a message if the 2 list of cohorts (old & new) are different + if sample.cohorts: + old_set = ', '.join(sorted([x.name_short for x in sample.cohorts.all()])) + new_set = ', '.join(sorted([x.name_short for x in cohorts_list])) + if old_set != new_set: + print(f"\t# {new_sample.source_GWAS_catalog}: replacing cohorts list") + print(f"\t - Old set: {old_set}") + print(f"\t + New set: {new_set}") # Copy the list of cohorts from the existing sample. # Need to be added once the new Sample object as been saved, # i.e. when the Sample `id` has been created From 1335aa77c23cd46d8eaebc48a254890b8e9b0c07 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Fri, 18 Oct 2024 14:41:16 +0100 Subject: [PATCH 4/9] Update text message --- release/scripts/UpdateGwasStudies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/scripts/UpdateGwasStudies.py b/release/scripts/UpdateGwasStudies.py index f18f52df..f2e06aa6 100644 --- a/release/scripts/UpdateGwasStudies.py +++ b/release/scripts/UpdateGwasStudies.py @@ -133,7 +133,7 @@ def update_studies(self): old_set = ', '.join(sorted([x.name_short for x in sample.cohorts.all()])) new_set = ', '.join(sorted([x.name_short for x in cohorts_list])) if old_set != new_set: - print(f"\t# {new_sample.source_GWAS_catalog}: replacing cohorts list") + print(f"\t/!\ Replacing cohorts list:") print(f"\t - Old set: {old_set}") print(f"\t + New set: {new_set}") # Copy the list of cohorts from the existing sample. From b474a314872b587cd61053202689ed0907519e4e Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Fri, 25 Oct 2024 15:15:26 +0100 Subject: [PATCH 5/9] Fix regex warning during data import --- curation/parsers/performance.py | 2 +- curation/parsers/sample.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/curation/parsers/performance.py b/curation/parsers/performance.py index 91a703e1..f0baa9a4 100644 --- a/curation/parsers/performance.py +++ b/curation/parsers/performance.py @@ -52,7 +52,7 @@ def str2metric(self, field, val): val = self.replace_non_ascii_chars(field,val) # Estimate with percentage as unit - if re.match('^\d+\.?\d*\s*\%$',val): + if re.match(r'^\d+\.?\d*\s*\%$',val): val = val.replace('%','').strip() current_metric.add_data('estimate', val) current_metric.add_data('unit', '%') diff --git a/curation/parsers/sample.py b/curation/parsers/sample.py index 5a8148dc..df648b77 100644 --- a/curation/parsers/sample.py +++ b/curation/parsers/sample.py @@ -18,7 +18,7 @@ def str2demographic(self, field, val): - val: data value Return type: DemographicData object ''' - unit_regex = "([-+]?\d*\.\d+|\d+) ([a-zA-Z]+)" + unit_regex = r"([-+]?\d*\.\d+|\d+) ([a-zA-Z]+)" current_demographic = DemographicData(field,val,self.spreadsheet_name) if type(val) == float: current_demographic.add_data('estimate', val) @@ -129,7 +129,7 @@ def create_sample_model(self): elif field == 'sample_percent_male': # Remove % character val_str = str(val) - if re.search('\%',val_str): + if re.search(r'\%',val_str): val_str = re.sub(r'\%', r'', val_str) val_str = re.sub(r' ', r'', val_str) val = float(val_str) From a24350771758a61a050929064f9acbb97ebc7573 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Fri, 25 Oct 2024 15:22:47 +0100 Subject: [PATCH 6/9] Merge the 2 list of cohorts: the one from the spreadsheet and the one from the GWAS study (fetched via the GWAS REST API) --- curation/template_parser.py | 20 +++++++++++++++----- release/scripts/UpdateGwasStudies.py | 16 +++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/curation/template_parser.py b/curation/template_parser.py index fdbcca9d..03a4554e 100644 --- a/curation/template_parser.py +++ b/curation/template_parser.py @@ -201,7 +201,10 @@ def extract_samples(self): sample_keys = sample_data.data.keys() if 'sample_number' not in sample_keys: if 'source_GWAS_catalog' in sample_keys: - gwas_study = self.get_gwas_study(sample_data.data['source_GWAS_catalog'],spreadsheet_name) + spreadsheet_cohorts = [] + if 'cohorts' in sample_keys: + spreadsheet_cohorts = sample_data.data['cohorts'] + gwas_study = self.get_gwas_study(sample_data.data['source_GWAS_catalog'],spreadsheet_cohorts,spreadsheet_name) if gwas_study: for gwas_ancestry in gwas_study: c_sample = SampleData(spreadsheet_name) @@ -291,7 +294,7 @@ def get_sample_data(self, sample_info, current_schema, spreadsheet_name, samples return sample_data - def get_gwas_study(self,gcst_id:str,spreadsheet_name:str) -> dict: + def get_gwas_study(self,gcst_id:str,spreadsheet_cohorts:list,spreadsheet_name:str) -> dict: """ Get the GWAS Study information related to the PGS sample. Check that all the required data is available @@ -308,18 +311,25 @@ def get_gwas_study(self,gcst_id:str,spreadsheet_name:str) -> dict: return study_data response_data = response.json() if response_data: + # List the cohorts present in the spreadsheet for this sample + spreadsheet_cohorts_names = [] + if spreadsheet_cohorts: + spreadsheet_cohorts_names = [x.name.upper() for x in spreadsheet_cohorts] + try: source_PMID = response_data['publicationInfo']['pubmedId'] - # Create list of cohorts if it exists in the GWAS study # This override the Cohorts found in the cohort column in the spreadsheet - cohorts_list = [] + cohorts_list = spreadsheet_cohorts if 'cohort' in response_data.keys(): cohorts = response_data['cohort'].split('|') for cohort in cohorts: cohort_id = cohort.upper() + # Check if cohort in list of cohort references + # # and if the cohort is already in the list provided by th author if cohort_id in self.parsed_cohorts: - cohorts_list.append(self.parsed_cohorts[cohort_id]) + if cohort_id not in spreadsheet_cohorts_names: + cohorts_list.append(self.parsed_cohorts[cohort_id]) else: self.report_error(spreadsheet_name, f'Error: the GWAS Catalog sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet') diff --git a/release/scripts/UpdateGwasStudies.py b/release/scripts/UpdateGwasStudies.py index f2e06aa6..7418ae5e 100644 --- a/release/scripts/UpdateGwasStudies.py +++ b/release/scripts/UpdateGwasStudies.py @@ -130,12 +130,18 @@ def update_studies(self): new_sample.cohorts.set(cohorts_list) # Print a message if the 2 list of cohorts (old & new) are different if sample.cohorts: - old_set = ', '.join(sorted([x.name_short for x in sample.cohorts.all()])) - new_set = ', '.join(sorted([x.name_short for x in cohorts_list])) - if old_set != new_set: + new_set = sorted([x.name_short.upper() for x in cohorts_list]) + + old_set_string = ', '.join(sorted([x.name_short.upper() for x in sample.cohorts.all()])) + new_set_string = ', '.join(new_set) + if old_set_string != new_set_string: + # Add cohorts which are already associated to the sample in the database, but not in the GWAS study + for sample_cohort in sample.cohorts.all(): + if sample_cohort.name_short.upper() not in new_set: + new_sample.cohorts.add(sample_cohort) print(f"\t/!\ Replacing cohorts list:") - print(f"\t - Old set: {old_set}") - print(f"\t + New set: {new_set}") + print(f"\t - Old set: {old_set_string}") + print(f"\t + New set: {', '.join(sorted([x.name_short.upper() for x in new_sample.cohorts.all()]))}") # Copy the list of cohorts from the existing sample. # Need to be added once the new Sample object as been saved, # i.e. when the Sample `id` has been created From f4474c8c1b05e3df800ca3dba7eed541e85f8688 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Fri, 25 Oct 2024 15:28:19 +0100 Subject: [PATCH 7/9] Add missing comment --- curation/template_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/curation/template_parser.py b/curation/template_parser.py index 03a4554e..a7787265 100644 --- a/curation/template_parser.py +++ b/curation/template_parser.py @@ -300,6 +300,7 @@ def get_gwas_study(self,gcst_id:str,spreadsheet_cohorts:list,spreadsheet_name:st Check that all the required data is available > Parameter: - gcst_id: GWAS Study ID (e.g. GCST010127) + - spreadsheet_cohorts: list of CohortData objects for the current sample, collected from the spreadsheet - spreadsheet_name: Spreadsheet name for report (e.g. Sample Descriptions) > Return: list of dictionnaries (1 per ancestry) """ From c123da533d47d59d02290a670d7d8452305d88d4 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Fri, 1 Nov 2024 16:44:51 +0000 Subject: [PATCH 8/9] Update comments about the GWAS and sample cohorts --- curation/template_parser.py | 4 ++-- release/scripts/UpdateGwasStudies.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/curation/template_parser.py b/curation/template_parser.py index a7787265..6e93d314 100644 --- a/curation/template_parser.py +++ b/curation/template_parser.py @@ -319,8 +319,8 @@ def get_gwas_study(self,gcst_id:str,spreadsheet_cohorts:list,spreadsheet_name:st try: source_PMID = response_data['publicationInfo']['pubmedId'] - # Create list of cohorts if it exists in the GWAS study - # This override the Cohorts found in the cohort column in the spreadsheet + # Update the Cohorts list found in the cohort column of the spreadsheet by + # adding the list of cohorts from the GWAS study (if the list is present) cohorts_list = spreadsheet_cohorts if 'cohort' in response_data.keys(): cohorts = response_data['cohort'].split('|') diff --git a/release/scripts/UpdateGwasStudies.py b/release/scripts/UpdateGwasStudies.py index 7418ae5e..184ef0fb 100644 --- a/release/scripts/UpdateGwasStudies.py +++ b/release/scripts/UpdateGwasStudies.py @@ -125,7 +125,7 @@ def update_studies(self): # Cohorts data if cohorts_list or sample.cohorts: # Use the list of cohorts from the GWAS study (if available) - # Override the list of cohorts found in the existing sample + # Update the list of cohorts from the existing sample if new cohorts are found in the GWAS study if cohorts_list: new_sample.cohorts.set(cohorts_list) # Print a message if the 2 list of cohorts (old & new) are different From 57c6caca83e1ff62bd857933cb42d78812cae62f Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Thu, 7 Nov 2024 10:56:05 +0000 Subject: [PATCH 9/9] Print message when the list of cohorts from the spreadsheet (for a GWAS study) is merged with the list of cohorts from the GWAS Catalog REST API --- curation/template_parser.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/curation/template_parser.py b/curation/template_parser.py index 6e93d314..b641551c 100644 --- a/curation/template_parser.py +++ b/curation/template_parser.py @@ -321,19 +321,26 @@ def get_gwas_study(self,gcst_id:str,spreadsheet_cohorts:list,spreadsheet_name:st source_PMID = response_data['publicationInfo']['pubmedId'] # Update the Cohorts list found in the cohort column of the spreadsheet by # adding the list of cohorts from the GWAS study (if the list is present) - cohorts_list = spreadsheet_cohorts + cohorts_list = spreadsheet_cohorts.copy() if 'cohort' in response_data.keys(): cohorts = response_data['cohort'].split('|') for cohort in cohorts: cohort_id = cohort.upper() # Check if cohort in list of cohort references - # # and if the cohort is already in the list provided by th author + # and if the cohort is already in the list provided by the author if cohort_id in self.parsed_cohorts: if cohort_id not in spreadsheet_cohorts_names: cohorts_list.append(self.parsed_cohorts[cohort_id]) else: self.report_error(spreadsheet_name, f'Error: the GWAS Catalog sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet') - + # Print a message if the list of Cohorts from the spreadsheet and from GWAS Catalog (REST API) have been merged. + if spreadsheet_cohorts and len(spreadsheet_cohorts) != len(cohorts_list): + msg = f'''GWAS study {gcst_id} -> the list of cohorts from the spreadsheet has been merged with the one from GWAS. + \t- Spreadsheet list: {', '.join(sorted(spreadsheet_cohorts_names))} + \t+ Merged GWAS list: {', '.join(sorted([x.name.upper() for x in cohorts_list]))}''' + self.report_warning(spreadsheet_name, msg) + + # Ancestry information for ancestry in response_data['ancestries']: if ancestry['type'] != 'initial':