Skip to content

Commit

Permalink
Merge pull request PGScatalog#398 from ens-lgil/feature/gwas_cohorts_2
Browse files Browse the repository at this point in the history
Search the cohort 'long name' (column 'Cohort Name') in the Cohort Re…
  • Loading branch information
fyvon authored Dec 3, 2024
2 parents f221885 + 0a8aeca commit 062c31f
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions curation/template_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(self):
self.parsed_publication = None
self.parsed_scores = {}
self.parsed_cohorts = {}
self.parsed_cohorts_long_names = {}
self.parsed_samples_scores = []
self.parsed_samples_testing = []
self.parsed_performances = []
Expand Down Expand Up @@ -89,6 +90,9 @@ def extract_cohorts(self):
if cohort_id in self.parsed_cohorts:
self.report_warning(spreadsheet_name, f'Ambiguity found in the Cohort spreadsheet: the cohort ID "{cohort_name}" has been found more than once!')
self.parsed_cohorts[cohort_id] = parsed_cohort
if parsed_cohort.name_long:
cohort_long_name = parsed_cohort.name_long.upper()
self.parsed_cohorts_long_names[cohort_long_name] = cohort_id
self.update_report(parsed_cohort)


Expand Down Expand Up @@ -275,6 +279,11 @@ def get_sample_data(self, sample_info, current_schema, spreadsheet_name, samples
cohort_id = cohort.upper()
if cohort_id in self.parsed_cohorts:
cohorts_list.append(self.parsed_cohorts[cohort_id])
# Check if the cohort name corresponds to a cohort long name on the Cohort Refr. spreadsheet
elif cohort_id in self.parsed_cohorts_long_names.keys():
new_cohort_id = self.parsed_cohorts_long_names[cohort_id]
cohorts_list.append(self.parsed_cohorts[new_cohort_id])
self.report_warning(spreadsheet_name, f'Warning: the sample cohort "{cohort}" has been found in the Cohort Refr. spreadsheet as "{new_cohort_id}"')
else:
self.report_error(spreadsheet_name, f'Error: the sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet')
val = cohorts_list
Expand Down Expand Up @@ -331,6 +340,12 @@ def get_gwas_study(self,gcst_id:str,spreadsheet_cohorts:list,spreadsheet_name:st
if cohort_id in self.parsed_cohorts:
if cohort_id not in spreadsheet_cohorts_names:
cohorts_list.append(self.parsed_cohorts[cohort_id])
# Check if the cohort name corresponds to a cohort long name on the Cohort Refr. spreadsheet
elif cohort_id in self.parsed_cohorts_long_names.keys():
new_cohort_id = self.parsed_cohorts_long_names[cohort_id]
if new_cohort_id not in spreadsheet_cohorts_names:
cohorts_list.append(self.parsed_cohorts[new_cohort_id])
self.report_warning(spreadsheet_name, f'Warning: the GWAS Catalog sample cohort "{cohort}" has been found in the Cohort Refr. spreadsheet as "{new_cohort_id}"')
else:
self.report_error(spreadsheet_name, f'Error: the GWAS Catalog sample cohort "{cohort}" cannot be found in the Cohort Refr. spreadsheet')
# Print a message if the list of Cohorts from the spreadsheet and from GWAS Catalog (REST API) have been merged.
Expand Down

0 comments on commit 062c31f

Please sign in to comment.