diff --git a/src/app.py b/src/app.py index 8c5b3090..1f1b549e 100644 --- a/src/app.py +++ b/src/app.py @@ -3326,12 +3326,9 @@ def sankey_data(): # String constants HEADER_DATASET_GROUP_NAME = 'dataset_group_name' HEADER_ORGAN_TYPE = 'organ_type' - HEADER_DATASET_DATA_TYPES = 'dataset_data_types' # TODO-eliminate when HEADER_DATASET_DATASET_TYPE is required HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type' HEADER_DATASET_STATUS = 'dataset_status' - with open('sankey_mapping.json') as f: - mapping_dict = json.load(f) # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description # because that would require using a urllib request for each dataset organ_types_dict = schema_manager.get_organ_types() @@ -3359,41 +3356,27 @@ def sankey_data(): internal_dict = collections.OrderedDict() internal_dict[HEADER_DATASET_GROUP_NAME] = dataset[HEADER_DATASET_GROUP_NAME] - # TODO BEGIN evaluate elimination of this block once dataset['dataset_type'] is required and dataset['data_types'] removed. organ_code = dataset[HEADER_ORGAN_TYPE].upper() - validate_organ_code(organ_code) + validate_organ_code(organ_code, organ_types_dict) internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[organ_code].lower() - # Data type codes are replaced with data type descriptions - assay_description = "" - try: - assay_description = assay_types_dict[dataset[HEADER_DATASET_DATA_TYPES]]['description'] - except KeyError: - logger.exception(f"Data type {dataset[HEADER_DATASET_DATA_TYPES]} not found in resulting assay types via ontology-api") - # Just use the data type value - assay_description = dataset[HEADER_DATASET_DATA_TYPES] - - internal_dict[HEADER_DATASET_DATA_TYPES] = assay_description + internal_dict[HEADER_DATASET_DATASET_TYPE] = dataset[HEADER_DATASET_DATASET_TYPE] # Replace applicable Group Name and Data type with the value needed for the sankey via the mapping_dict internal_dict[HEADER_DATASET_STATUS] = dataset['dataset_status'] - if internal_dict[HEADER_DATASET_GROUP_NAME] in mapping_dict.keys(): - internal_dict[HEADER_DATASET_GROUP_NAME] = mapping_dict[internal_dict[HEADER_DATASET_GROUP_NAME]] - if internal_dict[HEADER_DATASET_DATA_TYPES] in mapping_dict.keys(): - internal_dict[HEADER_DATASET_DATA_TYPES] = mapping_dict[internal_dict[HEADER_DATASET_DATA_TYPES]] - # TODO END evaluate elimination of this block, if it is still in place following the YAML-to-UBKG effort on https://github.com/hubmapconsortium/entity-api/issues/494, - # and once dataset['dataset_type'] is required and dataset['data_types'] removed. + # if internal_dict[HEADER_DATASET_GROUP_NAME] in mapping_dict.keys(): + # internal_dict[HEADER_DATASET_GROUP_NAME] = mapping_dict[internal_dict[HEADER_DATASET_GROUP_NAME]] # Each dataset's dictionary is added to the list to be returned dataset_sankey_list.append(internal_dict) - + if MEMCACHED_MODE: # Cache the result memcached_client_instance.set(cache_key, dataset_sankey_list, expire = SchemaConstants.MEMCACHED_TTL) else: logger.info(f'Using the cached sankey data at time {datetime.now()}') - + return jsonify(dataset_sankey_list) @@ -4807,13 +4790,13 @@ def access_level_prefix_dir(dir_name): ---------- organ_code : str """ -def validate_organ_code(organ_code): +def validate_organ_code(organ_code, organ_types_dict=None): + if organ_types_dict is None: + organ_types_dict = schema_manager.get_organ_types() if not organ_code.isalpha() or not len(organ_code) == 2: internal_server_error(f"Invalid organ code {organ_code}. Must be 2-letter alphabetic code") try: - organ_types_dict = schema_manager.get_organ_types() - if organ_code.upper() not in organ_types_dict: not_found_error(f"Unable to find organ code {organ_code} via the ontology-api") except requests.exceptions.RequestException: diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 87183315..77927c0a 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -942,7 +942,7 @@ def get_sankey_info(neo4j_driver): query = (f"MATCH (ds:Dataset)<-[]-(a)<-[]-(:Sample)" # specimen_type -> sample_category 12/15/2022 f"MATCH (donor)-[:ACTIVITY_INPUT]->(oa)-[:ACTIVITY_OUTPUT]->(organ:Sample {{sample_category:'organ'}})-[*]->(ds)" - f"RETURN distinct ds.group_name, organ.organ, ds.data_types, ds.status, ds. uuid order by ds.group_name") + f"RETURN distinct ds.group_name, organ.organ, ds.dataset_type, ds.status, ds. uuid order by ds.group_name") logger.info("======get_sankey_info() query======") logger.info(query) with neo4j_driver.session() as session: @@ -958,14 +958,7 @@ def get_sankey_info(neo4j_driver): record_contents.append(item) record_dict['dataset_group_name'] = record_contents[0] record_dict['organ_type'] = record_contents[1] - data_types_list = record_contents[2] - data_types_list = data_types_list.replace("'", '"') - data_types_list = json.loads(data_types_list) - data_types = data_types_list[0] - if (len(data_types_list)) > 1: - if (data_types_list[0] == "scRNAseq-10xGenomics-v3" and data_types_list[1] == "snATACseq") or (data_types_list[1] == "scRNAseq-10xGenomics-v3" and data_types_list[0] == "snATACseq"): - data_types = "scRNA-seq (10x Genomics v3),snATAC-seq" - record_dict['dataset_data_types'] = data_types + record_dict['dataset_dataset_type'] = record_contents[2] record_dict['dataset_status'] = record_contents[3] list_of_dictionaries.append(record_dict) return list_of_dictionaries diff --git a/src/sankey_mapping.json b/src/sankey_mapping.json deleted file mode 100644 index ebd4de90..00000000 --- a/src/sankey_mapping.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "hubmap-uflorida-tmc": "University of Florida TMC", - "hubmap-northwestern-r": "Northwestern RTI", - "hubmap-vanderbilt-tmc": "Vanderbilt TMC", - "hubmap-stanford-tmc": "Stanford TMC", - "hubmap-caltech-tmc": "California Institute of Technology TMC", - "hubmap-ucsd-tmc": "University of California San Diego TMC", - "LC-MS Top Down": "MS-non-spatial", - "Untargeted LC-MS": "MS-non-spatial", - "TMT LC-MS": "MS-non-spatial", - "Targeted Shotgun / Flow-injection LC-MS": "MS-non-spatial", - "snRNAseq (SNARE-seq2)": "SNARE-seq", - "scRNA-seq (10x Genomics v3)": "RNA-seq", - "snRNA-seq (10x Genomics v3)": "sc/snRNA-seq", - "snRNA-seq (10x Genomics v2)": "SNARE-seq", - "scRNA-seq (10x Genomics v2)": "sc/snRNA-seq", - "sciRNA-seq": "sc/snRNA-seq", - "sciATAC-seq": "ATAC-seq", - "snATAC-seq": "sc/snATAC-seq", - "PAS Stained Microscopy": "Histology", - "MALDI IMS": "MS-Spatial", - "Autofluorescence Microscopy": "AF", - "Imaging Mass Cytometry (2D)": "Other-MxIF", - "Imaging Mass Cytometry (3D)": "Other-MxIF", - "Lightsheet Microscopy": "Other-MxIF", - "Bulk ATAC-seq": "ATAC-seq", - "Whole Genome Sequencing": "WGS", - "Bulk RNA-seq": "RNA-seq", - "seqFISH": "FISH", - "nano-DESI IMS positive": "MS-Spatial", - "snATACseq (SNARE-seq2)": "Multiome (RNA, ATAC)", - "Cell DIVE": "Other-MxIF", - "DART-FISH": "FISH", - "Slide-seq": "RNA-spatial", - "scRNA-seq (10x Genomics v3),snATAC-seq": "Multiome (RNA, ATAC)" -} \ No newline at end of file