From 393d40f1d5d5bf3da115f7dc021fa17fa13aae2b Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 21 Nov 2023 10:20:08 -0500 Subject: [PATCH] Code cleanup --- entity-api-spec.yaml | 53 ------------------ src/app.py | 12 ----- src/schema/schema_manager.py | 1 - src/schema/schema_neo4j_queries.py | 17 ------ .../api-template-test/entity-Template.yaml | 54 ------------------- .../example-yaml-templates/sample-schema.yaml | 9 ---- 6 files changed, 146 deletions(-) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index 8fd04211..fbfc5548 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -406,59 +406,6 @@ components: - section - suspension description: "A code representing the type of specimen. Must be an organ, block, section, or suspension" - specimen_type: - type: string - enum: - - atacseq - - biopsy - - blood - - cell_lysate - - clarity_hydrogel - - codex - - cryosections_curls_from_fresh_frozen_oct - - cryosections_curls_rnalater - - ffpe_block - - ffpe_slide - - fixed_frozen_section_slide - - fixed_tissue_piece - - flash_frozen_liquid_nitrogen - - formalin_fixed_oct_block - - fresh_frozen_oct_block - - fresh_frozen_section_slide - - fresh_frozen_tissue - - fresh_frozen_tissue_section - - fresh_tissue - - frozen_cell_pellet_buffy_coat - - gdna - - module - - nuclei - - nuclei_rnalater - - organ - - organ_piece - - other - - pbmc - - pfa_fixed_frozen_oct_block - - plasma - - protein - - ran_poly_a_enriched - - rna_total - - rnalater_treated_and_stored - - rnaseq - - scatacseq - - scrnaseq - - segment - - seqfish - - sequence_library - - serum - - single_cell_cryopreserved - - snatacseq - - snrnaseq - - tissue_lysate - - wgs - description: "DEPRECATED: No longer a required field. A code representing the type of specimen. Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)" - specimen_type_other: - type: string - description: "The user provided sample type if the 'other' sample_type is chosen." protocol_url: type: string description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared." diff --git a/src/app.py b/src/app.py index ae8ad938..00f1d523 100644 --- a/src/app.py +++ b/src/app.py @@ -436,7 +436,6 @@ def get_ancestor_organs(id): bad_request_error(f"Unable to get the ancestor organs for this: {normalized_entity_type}," " supported entity types: Sample, Dataset, Publication") - # specimen_type -> sample_category 12/15/2022 if normalized_entity_type == 'Sample' and entity_dict['sample_category'].lower() == 'organ': bad_request_error("Unable to get the ancestor organ of an organ.") @@ -939,7 +938,6 @@ def create_entity(entity_type): # Check existence of the direct ancestor (either another Sample or Donor) direct_ancestor_dict = query_target_entity(direct_ancestor_uuid, user_token) - # specimen_type -> sample_category 12/15/2022 # `sample_category` is required on create sample_category = json_data_dict['sample_category'].lower() @@ -1112,7 +1110,6 @@ def create_multiple_samples(count): # sample's direct ancestor is a Donor. # Must be one of the codes from: https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml if direct_ancestor_dict['entity_type'] == 'Donor': - # specimen_type -> sample_category 12/15/2022 # `sample_category` is required on create if json_data_dict['sample_category'].lower() != 'organ': bad_request_error("The sample_category must be organ since the direct ancestor is a Donor") @@ -2828,8 +2825,6 @@ def get_prov_info(): first_sample_hubmap_id_list.append(item['hubmap_id']) first_sample_submission_id_list.append(item['submission_id']) first_sample_uuid_list.append(item['uuid']) - - # specimen_type -> sample_category 12/15/2022 first_sample_type_list.append(item['sample_category']) first_sample_portal_url_list.append(app.config['DOI_REDIRECT_URL'].replace('', 'sample').replace('', item['uuid'])) @@ -3148,8 +3143,6 @@ def get_prov_info_for_dataset(id): first_sample_hubmap_id_list.append(item['hubmap_id']) first_sample_submission_id_list.append(item['submission_id']) first_sample_uuid_list.append(item['uuid']) - - # specimen_type -> sample_category 12/15/2022 first_sample_type_list.append(item['sample_category']) first_sample_portal_url_list.append( @@ -3267,7 +3260,6 @@ def get_prov_info_for_dataset(id): else: requested_samples = {} for uuid in dataset_samples.keys(): - # specimen_type -> sample_category 12/15/2022 if dataset_samples[uuid]['sample_category'] in include_samples: requested_samples[uuid] = dataset_samples[uuid] internal_dict[HEADER_DATASET_SAMPLES] = requested_samples @@ -3479,7 +3471,6 @@ def get_sample_prov_info(): organ_hubmap_id = sample['organ_hubmap_id'] organ_submission_id = sample['organ_submission_id'] else: - # sample_specimen_type -> sample_category 12/15/2022 if sample['sample_category'] == "organ": organ_uuid = sample['sample_uuid'] organ_type = organ_types_dict[sample['sample_organ']]['description'].lower() @@ -3507,10 +3498,7 @@ def get_sample_prov_info(): internal_dict[HEADER_SAMPLE_HAS_METADATA] = sample_has_metadata internal_dict[HEADER_SAMPLE_HAS_RUI_INFO] = sample_has_rui_info internal_dict[HEADER_SAMPLE_DIRECT_ANCESTOR_ID] = sample['sample_ancestor_id'] - - # sample_specimen_type -> sample_category 12/15/2022 internal_dict[HEADER_SAMPLE_TYPE] = sample['sample_category'] - internal_dict[HEADER_SAMPLE_HUBMAP_ID] = sample['sample_hubmap_id'] internal_dict[HEADER_SAMPLE_SUBMISSION_ID] = sample['sample_submission_id'] internal_dict[HEADER_SAMPLE_DIRECT_ANCESTOR_ENTITY_TYPE] = sample['sample_ancestor_entity'] diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index e66c6f21..1f0d79b5 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1356,7 +1356,6 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di parent_id = json_data_dict['direct_ancestor_uuid'] json_to_post['parent_ids'] = [parent_id] - # specimen_type -> sample_category 12/15/2022 # 'Sample.sample_category' is marked as `required_on_create` in the schema yaml if json_data_dict['sample_category'].lower() == 'organ': # The 'organ' field containing the 2 digit organ code is required in this case diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 8da148b3..231375b3 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -442,24 +442,8 @@ def get_dataset_organ_and_donor_info(neo4j_driver, uuid): donor_metadata = None with neo4j_driver.session() as session: - # Old time-consuming single query, it takes a significant amounts of DB hits - # query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) " - # f"WHERE e.uuid='{uuid}' AND s.specimen_type='organ' AND EXISTS(s.organ) " - # f"RETURN s.organ AS organ_name, d.metadata AS donor_metadata") - - # logger.info("======get_dataset_organ_and_donor_info() query======") - # logger.info(query) - - # with neo4j_driver.session() as session: - # record = session.read_transaction(execute_readonly_tx, query) - - # if record: - # organ_name = record['organ_name'] - # donor_metadata = record['donor_metadata'] - # To improve the query performance, we implement the two-step queries to drastically reduce the DB hits sample_query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample) " - # specimen_type -> sample_category 12/15/2022 f"WHERE e.uuid='{uuid}' AND s.sample_category='organ' AND EXISTS(s.organ) " f"RETURN DISTINCT s.organ AS organ_name, s.uuid AS sample_uuid") @@ -473,7 +457,6 @@ def get_dataset_organ_and_donor_info(neo4j_driver, uuid): sample_uuid = sample_record['sample_uuid'] donor_query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(d:Donor) " - # specimen_type -> sample_category 12/15/2022 f"WHERE s.uuid='{sample_uuid}' AND s.sample_category='organ' AND EXISTS(s.organ) " f"RETURN DISTINCT d.metadata AS donor_metadata") diff --git a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml index 5a6739f7..ab8ec463 100644 --- a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml +++ b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml @@ -398,60 +398,6 @@ x-ref-components: - consortium - public description: "One of the values: public, consortium." - specimen_type: - type: string - enum: - - atacseq - - biopsy - - blood - - cell_lysate - - clarity_hydrogel - - codex - - cryosections_curls_from_fresh_frozen_oct - - cryosections_curls_rnalater - - ffpe_block - - ffpe_slide - - fixed_frozen_section_slide - - fixed_tissue_piece - - flash_frozen_liquid_nitrogen - - formalin_fixed_oct_block - - fresh_frozen_oct_block - - fresh_frozen_section_slide - - fresh_frozen_tissue - - fresh_frozen_tissue_section - - fresh_tissue - - frozen_cell_pellet_buffy_coat - - gdna - - module - - nuclei - - nuclei_rnalater - - organ - - organ_piece - - other - - pbmc - - pfa_fixed_frozen_oct_block - - plasma - - protein - - ran_poly_a_enriched - - rna_total - - rnalater_treated_and_stored - - rnaseq - - scatacseq - - scrnaseq - - segment - - seqfish - - sequence_library - - serum - - sequence_library - - single_cell_cryopreserved - - snatacseq - - snrnaseq - - tissue_lysate - - wgs - description: "A code representing the type of specimen. Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)" - specimen_type_other: - type: string - description: "The user provided sample type if the 'other' sample_type is chosen." protocol_url: type: string description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared." diff --git a/src/schema_templating/example-yaml-templates/sample-schema.yaml b/src/schema_templating/example-yaml-templates/sample-schema.yaml index 1f5ee751..2b6f3f11 100644 --- a/src/schema_templating/example-yaml-templates/sample-schema.yaml +++ b/src/schema_templating/example-yaml-templates/sample-schema.yaml @@ -72,15 +72,6 @@ Sample: - consortium - public description: "One of the values: public, consortium." - specimen_type: - type: string - enum: - X-replace-enum-list: - enum-file-ref: https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml - description: "A code representing the type of specimen. Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)" - specimen_type_other: - type: string - description: "The user provided sample type if the 'other' sample_type is chosen." protocol_url: type: string description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."