From 0a8aecaa13cec080fa499715470a2fbc774d2f05 Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Thu, 28 Nov 2024 14:40:40 +0000 Subject: [PATCH] Search the cohort 'long name' (column 'Cohort Name') in the Cohort Refr. spreadsheet when the cohort can't be found in the short names list (column 'Cohort ID') --- curation/template_parser.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/curation/template_parser.py b/curation/template_parser.py index b641551..4e47b3d 100644 --- a/curation/template_parser.py +++ b/curation/template_parser.py @@ -18,6 +18,7 @@ def __init__(self): self.parsed_publication = None self.parsed_scores = {} self.parsed_cohorts = {} + self.parsed_cohorts_long_names = {} self.parsed_samples_scores = [] self.parsed_samples_testing = [] self.parsed_performances = [] @@ -89,6 +90,9 @@ def extract_cohorts(self): if cohort_id in self.parsed_cohorts: self.report_warning(spreadsheet_name, f'Ambiguity found in the Cohort spreadsheet: the cohort ID "{cohort_name}" has been found more than once!') self.parsed_cohorts[cohort_id] = parsed_cohort + if parsed_cohort.name_long: + cohort_long_name = parsed_cohort.name_long.upper() + self.parsed_cohorts_long_names[cohort_long_name] = cohort_id self.update_report(parsed_cohort) @@ -275,6 +279,11 @@ def get_sample_data(self, sample_info, current_schema, spreadsheet_name, samples cohort_id = cohort.upper() if cohort_id in self.parsed_cohorts: cohorts_list.append(self.parsed_cohorts[cohort_id]) + # Check if the cohort name corresponds to a cohort long name on the Cohort Refr. spreadsheet + elif cohort_id in self.parsed_cohorts_long_names.keys(): + new_cohort_id = self.parsed_cohorts_long_names[cohort_id] + cohorts_list.append(self.parsed_cohorts[new_cohort_id]) + self.report_warning(spreadsheet_name, f'Warning: the sample cohort "{cohort}" has been found in the Cohort Refr. spreadsheet as "{new_cohort_id}"') else: self.report_error(spreadsheet_name, f'Error: the sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet') val = cohorts_list @@ -331,6 +340,12 @@ def get_gwas_study(self,gcst_id:str,spreadsheet_cohorts:list,spreadsheet_name:st if cohort_id in self.parsed_cohorts: if cohort_id not in spreadsheet_cohorts_names: cohorts_list.append(self.parsed_cohorts[cohort_id]) + # Check if the cohort name corresponds to a cohort long name on the Cohort Refr. spreadsheet + elif cohort_id in self.parsed_cohorts_long_names.keys(): + new_cohort_id = self.parsed_cohorts_long_names[cohort_id] + if new_cohort_id not in spreadsheet_cohorts_names: + cohorts_list.append(self.parsed_cohorts[new_cohort_id]) + self.report_warning(spreadsheet_name, f'Warning: the GWAS Catalog sample cohort "{cohort}" has been found in the Cohort Refr. spreadsheet as "{new_cohort_id}"') else: self.report_error(spreadsheet_name, f'Error: the GWAS Catalog sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet') # Print a message if the list of Cohorts from the spreadsheet and from GWAS Catalog (REST API) have been merged.