From 8be60a6901913ad581dd213cb53cf9241e61d0b7 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Fri, 10 Nov 2023 14:26:40 -0500
Subject: [PATCH 01/12] Replace oragns and assaytypes yamls with ontology-api
 calls

---
 src/app.py                     | 104 ++++++++-------------------------
 src/instance/app.cfg.example   |   4 ++
 src/schema/schema_constants.py |   8 ++-
 src/schema/schema_manager.py   |  91 +++++++++++++++++++++++++++--
 src/schema/schema_triggers.py  |  72 +++--------------------
 5 files changed, 130 insertions(+), 149 deletions(-)

diff --git a/src/app.py b/src/app.py
index 7878d726..ce39b5bc 100644
--- a/src/app.py
+++ b/src/app.py
@@ -60,6 +60,7 @@
 # Remove trailing slash / from URL base to avoid "//" caused by config with trailing slash
 app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/')
 app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/')
+app.config['ONTOLOGY_API_URL'] = app.config['ONTOLOGY_API_URL'].strip('/')
 app.config['SEARCH_API_URL_LIST'] = [url.strip('/') for url in app.config['SEARCH_API_URL_LIST']]
 
 # This mode when set True disables the PUT and POST calls, used on STAGE to make entity-api READ-ONLY 
@@ -198,6 +199,7 @@ def http_internal_server_error(e):
     schema_manager.initialize(app.config['SCHEMA_YAML_FILE'],
                               app.config['UUID_API_URL'],
                               app.config['INGEST_API_URL'],
+                              app.config['ONTOLOGY_API_URL'],
                               auth_helper_instance,
                               neo4j_driver_instance,
                               memcached_client_instance,
@@ -2623,26 +2625,12 @@ def get_prov_info():
 
     # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
     # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    organ_types_dict = schema_manager.get_organ_types()
 
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
-    response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            assay_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-
+    assay_types_dict = schema_manager.get_assay_types()
+    
     # Processing and validating query parameters
     accepted_arguments = ['format', 'organ', 'has_rui_info', 'dataset_status', 'group_uuid']
     return_json = False
@@ -3007,25 +2995,11 @@ def get_prov_info_for_dataset(id):
 
     # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
     # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    organ_types_dict = schema_manager.get_organ_types()
 
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
-    response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            assay_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    assay_types_dict = schema_manager.get_assay_types()
 
     hubmap_ids = schema_manager.get_hubmap_ids(id)
 
@@ -3251,25 +3225,11 @@ def sankey_data():
         mapping_dict = json.load(f)
     # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
     # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    organ_types_dict = schema_manager.get_organ_types()
 
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
-    response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            assay_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    assay_types_dict = schema_manager.get_assay_types()
 
     # Instantiation of the list dataset_sankey_list
     dataset_sankey_list = []
@@ -3377,14 +3337,16 @@ def get_sample_prov_info():
 
     # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
     # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
+    # response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
 
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    # if response.status_code == 200:
+    #     yaml_file = response.text
+    #     try:
+    #         organ_types_dict = yaml.safe_load(yaml_file)
+    #     except yaml.YAMLError as e:
+    #         raise yaml.YAMLError(e)
+
+    organ_types_dict = schema_manager.get_organ_types()
 
     # Processing and validating query parameters
     accepted_arguments = ['group_uuid']
@@ -4744,34 +4706,18 @@ def access_level_prefix_dir(dir_name):
 Returns nothing. Raises bad_request_error is organ code not found on organ_types.yaml 
 """
 def validate_organ_code(organ_code):
-    yaml_file_url = SchemaConstants.ORGAN_TYPES_YAML
-
-    # Use Memcached to improve performance
-    response = schema_manager.make_request_get(yaml_file_url)
-
-    if response.status_code == 200:
-        yaml_file = response.text
+    try:
+        organ_types_dict = schema_manager.get_organ_types()
 
-        try:
-            organ_types_dict = yaml.safe_load(response.text)
-            
-            if organ_code.upper() not in organ_types_dict:
-                bad_request_error(f"Invalid organ code. Must be 2 digit code specified {yaml_file_url}")
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-    else:
-        msg = f"Unable to fetch the: {yaml_file_url}"
+        if organ_code.upper() not in organ_types_dict:
+            bad_request_error(f"Invalid organ code. Must be 2 digit code")
+    except:
+        msg = f"Failed to validate the organ code: {organ_code}"
         # Log the full stack trace, prepend a line with our message
         logger.exception(msg)
 
-        logger.debug("======validate_organ_code() status code======")
-        logger.debug(response.status_code)
-
-        logger.debug("======validate_organ_code() response text======")
-        logger.debug(response.text)
-
         # Terminate and let the users know
-        internal_server_error(f"Failed to validate the organ code: {organ_code}")
+        internal_server_error(msg)
 
 
 ####################################################################################################
diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example
index 0c55f5bd..839972dc 100644
--- a/src/instance/app.cfg.example
+++ b/src/instance/app.cfg.example
@@ -28,6 +28,10 @@ UUID_API_URL = 'http://uuid-api:8080'
 # Works regardless of the trailing slash
 INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'
 
+# URL for talking to Ontology API (default for DEV)
+# Works regardless of the trailing slash
+ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'
+
 # A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
 # Works regardless of the trailing slash /
 SEARCH_API_URL_LIST = ['http://search-api:8080']
diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
index 5e4ad332..b426827c 100644
--- a/src/schema/schema_constants.py
+++ b/src/schema/schema_constants.py
@@ -13,9 +13,11 @@ class SchemaConstants(object):
     ACCESS_LEVEL_CONSORTIUM = 'consortium'
     ACCESS_LEVEL_PROTECTED = 'protected'
 
-    # Yaml file to parse organ description
-    ORGAN_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/organ_types.yaml'
-    ASSAY_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/assay_types.yaml'
+    UUID_API_ID_ENDPOINT = '/uuid'
+    INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
+    INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
+    ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HuBMAP'
+    ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs?application_context=HuBMAP'
 
     # For generating Sample.tissue_type
     TISSUE_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml'
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 2386b013..5018e567 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -50,9 +50,11 @@
 valid_yaml_file : file
     A valid yaml file
 uuid_api_url : str
-    The uuid-api URL
+    The uuid-api base URL
 ingest_api_url : str
-    The ingest-api URL
+    The ingest-api base URL
+ontology_api_url : str
+    The ontology-api base URL
 auth_helper_instance : AuthHelper
     The auth helper instance
 neo4j_driver_instance : neo4j_driver
@@ -65,6 +67,7 @@
 def initialize(valid_yaml_file, 
                uuid_api_url,
                ingest_api_url,
+               ontology_api_url,
                auth_helper_instance,
                neo4j_driver_instance,
                memcached_client_instance,
@@ -73,6 +76,7 @@ def initialize(valid_yaml_file,
     global _schema
     global _uuid_api_url
     global _ingest_api_url
+    global _ontology_api_url
     global _auth_helper
     global _neo4j_driver
     global _memcached_client
@@ -81,6 +85,7 @@ def initialize(valid_yaml_file,
     _schema = load_provenance_schema(valid_yaml_file)
     _uuid_api_url = uuid_api_url
     _ingest_api_url = ingest_api_url
+    _ontology_api_url = ontology_api_url
 
     # Get the helper instances
     _auth_helper = auth_helper_instance
@@ -1202,7 +1207,7 @@ def get_user_info(request):
 def get_hubmap_ids(id):
     global _uuid_api_url
 
-    target_url = _uuid_api_url + '/uuid/' + id
+    target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT + '/' + id
 
     # Use Memcached to improve performance
     response = make_request_get(target_url, internal_token_used = True)
@@ -1365,7 +1370,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di
     logger.info(json_to_post)
 
     # Disable ssl certificate verification
-    target_url = _uuid_api_url + '/uuid'
+    target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT
     response = requests.post(url = target_url, headers = request_headers, json = json_to_post, verify = False, params = query_parms)
     
     # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
@@ -1764,6 +1769,84 @@ def delete_memcached_cache(uuids_list):
         logger.info(f"Deleted cache by key: {', '.join(cache_keys)}")
 
 
+"""
+Retrive the organ types from ontology-api
+
+Returns
+-------
+dict
+    The available organ types
+"""
+def get_organ_types():
+    global _ontology_api_url
+
+    target_url = _ontology_api_url + '/organs?application_context=HuBMAP'
+
+    # Use Memcached to improve performance
+    response = make_request_get(target_url, internal_token_used = True)
+
+    # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
+    response.raise_for_status()
+
+    if response.status_code == 200:
+        ids_dict = response.json()
+        return ids_dict
+    else:
+        # uuid-api will also return 400 if the given id is invalid
+        # We'll just hanle that and all other cases all together here
+        msg = f"Unable to make a request to query the id via uuid-api: {id}"
+        # Log the full stack trace, prepend a line with our message
+        logger.exception(msg)
+
+        logger.debug("======get_organ_types() status code from ontology-api======")
+        logger.debug(response.status_code)
+
+        logger.debug("======get_organ_types() response text from ontology-api======")
+        logger.debug(response.text)
+
+        # Also bubble up the error message from ontology-api
+        raise requests.exceptions.RequestException(response.text)
+
+
+"""
+Retrive the assay types from ontology-api
+
+Returns
+-------
+dict
+    The available assay types
+"""
+def get_assay_types():
+    global _ontology_api_url
+
+    target_url = _ontology_api_url + '/assaytype?application_context=HuBMAP'
+
+    # Use Memcached to improve performance
+    response = make_request_get(target_url, internal_token_used = True)
+
+    # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
+    response.raise_for_status()
+
+    if response.status_code == 200:
+        ids_dict = response.json()
+        return ids_dict
+    else:
+        # uuid-api will also return 400 if the given id is invalid
+        # We'll just hanle that and all other cases all together here
+        msg = f"Unable to make a request to query the id via uuid-api: {id}"
+        # Log the full stack trace, prepend a line with our message
+        logger.exception(msg)
+
+        logger.debug("======get_assay_types() status code from ontology-api======")
+        logger.debug(response.status_code)
+
+        logger.debug("======get_assay_types() response text from ontology-api======")
+        logger.debug(response.text)
+
+        # Also bubble up the error message from ontology-api
+        raise requests.exceptions.RequestException(response.text)
+
+
 ####################################################################################################
 ## Internal functions
 ####################################################################################################
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 38cf61dc..d831670a 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -1184,7 +1184,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da
             entity_uuid = existing_data_dict['uuid']
 
         # Commit the thumbnail file via ingest-api call
-        ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-commit'
+        ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_COMMIT_ENDPOINT
         
         # Example: {"temp_file_id":"dzevgd6xjs4d5grmcp4n"}
         thumbnail_file_dict = new_data_dict[property_key]
@@ -1286,7 +1286,7 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da
         file_info_dict = generated_dict[target_property_key]
     
     # Remove the thumbnail file via ingest-api call
-    ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-remove'
+    ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_REMOVE_ENDPOINT
 
     # ingest-api's /file-remove takes a list of files to remove
     # In this case, we only need to remove the single thumbnail file
@@ -1994,7 +1994,7 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token
             entity_uuid = existing_data_dict['uuid']
 
         # Commit the files via ingest-api call
-        ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-commit'
+        ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_COMMIT_ENDPOINT
 
         for file_info in new_data_dict[property_key]:
             temp_file_id = file_info['temp_file_id']
@@ -2104,7 +2104,7 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token
         file_uuids.append(file_uuid)
 
     # Remove the files via ingest-api call
-    ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-remove'
+    ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_REMOVE_ENDPOINT
 
     json_to_post = {
         'entity_uuid': entity_uuid,
@@ -2143,39 +2143,10 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token
 str: The corresponding assay type description
 """
 def _get_assay_type_description(assay_type):
-    yaml_file_url = SchemaConstants.ASSAY_TYPES_YAML
+    assay_types_dict = schema_manager.get_assay_types()
 
-    # Use Memcached to improve performance
-    response = schema_manager.make_request_get(yaml_file_url)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-
-        try:
-            assay_types_dict = yaml.safe_load(response.text)
-
-            if assay_type in assay_types_dict:
-                return assay_types_dict[assay_type]['description'].lower()
-            else:
-                # Check the 'alt-names' list if not found in the top-level keys
-                for key in assay_types_dict:
-                    if assay_type in assay_types_dict[key]['alt-names']:
-                        return assay_types_dict[key]['description'].lower()
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-    else:
-        msg = f"Unable to fetch the: {yaml_file_url}"
-        # Log the full stack trace, prepend a line with our message
-        logger.exception(msg)
-
-        logger.debug("======_get_assay_type_description() status code======")
-        logger.debug(response.status_code)
-
-        logger.debug("======_get_assay_type_description() response text======")
-        logger.debug(response.text)
-
-        # Also bubble up the error message
-        raise requests.exceptions.RequestException(response.text)
+    if assay_type in assay_types_dict:
+        return assay_types_dict[assay_type]['description'].lower()
 
 
 """
@@ -2230,32 +2201,7 @@ def _get_combined_assay_type_description(data_types):
 str: The organ code description
 """
 def _get_organ_description(organ_code):
-    yaml_file_url = SchemaConstants.ORGAN_TYPES_YAML
-
-    # Use Memcached to improve performance
-    response = schema_manager.make_request_get(yaml_file_url)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-
-        try:
-            organ_types_dict = yaml.safe_load(response.text)
-            return organ_types_dict[organ_code]['description'].lower()
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-    else:
-        msg = f"Unable to fetch the: {yaml_file_url}"
-        # Log the full stack trace, prepend a line with our message
-        logger.exception(msg)
-
-        logger.debug("======_get_organ_description() status code======")
-        logger.debug(response.status_code)
-
-        logger.debug("======_get_organ_description() response text======")
-        logger.debug(response.text)
-
-        # Also bubble up the error message
-        raise requests.exceptions.RequestException(response.text)
-
+    organ_types_dict = schema_manager.get_organ_types()
+    return organ_types_dict[organ_code]['description'].lower()
 
 

From 83a4b57c220eae960b35d27be8db49974ac9062c Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Fri, 10 Nov 2023 20:30:39 -0500
Subject: [PATCH 02/12] Remove tissue_type, specimen_type, and realted pieces

---
 src/schema/provenance_schema.yaml | 28 ---------
 src/schema/schema_constants.py    |  5 --
 src/schema/schema_triggers.py     | 99 -------------------------------
 src/schema/schema_validators.py   | 71 ----------------------
 4 files changed, 203 deletions(-)

diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
index b72bf3a1..7e43d41a 100644
--- a/src/schema/provenance_schema.yaml
+++ b/src/schema/provenance_schema.yaml
@@ -897,33 +897,6 @@ ENTITIES:
           - validate_sample_category
         before_property_update_validators:
           - validate_sample_category
-
-      # No logner required on create, specimen_type -> sample_category 12/15/2022
-      specimen_type:
-        type: string
-        #required_on_create: true # Only required for create via POST, not update via PUT
-        description: "A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-        # Validate the given value against the definitions: https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml
-        # Disabled validation 12/15/2022
-        # before_property_create_validators:
-        #   - validate_specimen_type
-        # before_property_update_validators:
-        #   - validate_specimen_type
-      specimen_type_other:
-        type: string
-        description: "The user provided sample type if the 'other' sample_type is chosen."
-      
-
-      # specimen_type no logner required on create, will remove this field when removing specimen_type
-      # Simply always set to 'Unknown' and no need to update 12/15/2022
-      tissue_type:
-        type: string
-        generated: true # Can not be updated via the PUT
-        #auto_update: true # Will always update automatically if the entity gets updated
-        description: 'The type of the tissue based on the mapping between type (Block/Section/Suspension) and the specimen_type, default is Unknown'
-        before_create_trigger: set_tissue_type
-        #before_update_trigger: set_tissue_type
-
       portal_metadata_upload_files:
         type: json_string
         description: "A list of relative paths to metadata files"
@@ -949,7 +922,6 @@ ENTITIES:
         immutable: true
         description: "The displayname of globus group which the user who created this entity is a member of"
         before_create_trigger: set_group_name
-      # Should be required on create only when specimen_type==organ
       organ:
         type: string
         description: "Organ code specifier, only set if sample_type == organ.  Valid values found in: [organ types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml)"
diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
index b426827c..e34bd10a 100644
--- a/src/schema/schema_constants.py
+++ b/src/schema/schema_constants.py
@@ -2,13 +2,11 @@
 class SchemaConstants(object):
     MEMCACHED_TTL = 7200
 
-    # Constants used by validators
     INGEST_API_APP = 'ingest-api'
     INGEST_PIPELINE_APP = 'ingest-pipeline'
     HUBMAP_APP_HEADER = 'X-Hubmap-Application'
     DATASET_STATUS_PUBLISHED = 'published'
 
-    # Used by triggers, all lowercase for easy comparision
     ACCESS_LEVEL_PUBLIC = 'public'
     ACCESS_LEVEL_CONSORTIUM = 'consortium'
     ACCESS_LEVEL_PROTECTED = 'protected'
@@ -19,9 +17,6 @@ class SchemaConstants(object):
     ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HuBMAP'
     ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs?application_context=HuBMAP'
 
-    # For generating Sample.tissue_type
-    TISSUE_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml'
-
     DOI_BASE_URL = 'https://doi.org/'
 
 # Define an enumeration to classify an entity's visibility, which can be combined with
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index d831670a..093ed3a6 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -1562,105 +1562,6 @@ def get_sample_direct_ancestor(property_key, normalized_type, user_token, existi
     return property_key, schema_manager.normalize_entity_result_for_response(direct_ancestor_dict)
 
 
-"""
-Trigger event method of generating the type of the tissue based on the mapping between type (Block/Section/Suspension) and the specimen_type
-This method applies to both the create and update triggers
-
-Rererence:
-    - https://docs.google.com/spreadsheets/d/1OODo8QK852txSNSmfIe0ua4A7nPFSgKq6h46grmrpto/edit#gid=0
-    - https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml
-
-Parameters
-----------
-property_key : str
-    The target property key of the value to be generated
-normalized_type : str
-    One of the types defined in the schema yaml: Sample
-user_token: str
-    The user's globus nexus token
-existing_data_dict : dict
-    A dictionary that contains all existing entity properties
-new_data_dict : dict
-    A merged dictionary that contains all possible input data to be used
-
-Returns
--------
-str: The target property key
-str: The type of the tissue
-"""
-def set_tissue_type(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
-    # specimen_type is no logner required on create 12/15/2022, set to Unknown
-    # Default to use 'Unknown'
-    tissue_type = 'Unknown'
-
-    # # The `specimen_type` field is required on entity creation via POST
-    # # thus should be available on existing entity update via PUT
-    # # We do a double check here just in case
-    # if ('specimen_type' not in new_data_dict) and ('specimen_type' not in existing_data_dict):
-    #     raise KeyError("Missing 'specimen_type' key in both 'new_data_dict' and 'existing_data_dict' during calling 'set_tissue_type()' trigger method.")
-
-    # # Always calculate the tissue_type value no matter new creation or update existing
-    # # The `specimen_type` field can be used in a PUT
-    # # But if it's not in the request JSON of a PUT, it must be in the existing data
-    # if 'specimen_type' in new_data_dict:
-    #     # The `specimen_type` value validation is handled in the `schema_validators.validate_specimen_type()`
-    #     # and that gets called before this trigger method
-    #     specimen_type = new_data_dict['specimen_type'].lower()
-    # else:
-    #     # Use lowercase in case someone manually updated the neo4j filed with incorrect case
-    #     specimen_type = existing_data_dict['specimen_type'].lower()
-
-    # # Categories: Block, Section, Suspension 
-    # block_category = [
-    #     'pbmc',
-    #     'biopsy',
-    #     'segment',
-    #     'ffpe_block',
-    #     'organ_piece',
-    #     'fresh_tissue',
-    #     'clarity_hydrogel',
-    #     'fixed_tissue_piece',
-    #     'fresh_frozen_tissue',
-    #     'fresh_frozen_oct_block',
-    #     'formalin_fixed_oct_block',
-    #     'pfa_fixed_frozen_oct_block',
-    #     'flash_frozen_liquid_nitrogen',
-    #     'frozen_cell_pellet_buffy_coat'
-    # ]
-
-    # section_category = [
-    #     'ffpe_slide',
-    #     'fixed_frozen_section_slide',
-    #     'fresh_frozen_section_slide',
-    #     'fresh_frozen_tissue_section',
-    #     'cryosections_curls_rnalater',
-    #     'cryosections_curls_from_fresh_frozen_oct'
-    # ]
-
-    # suspension_category = [
-    #     'gdna',
-    #     'serum',
-    #     'plasma',
-    #     'nuclei',
-    #     'protein',
-    #     'rna_total',
-    #     'cell_lysate',
-    #     'tissue_lysate',
-    #     'sequence_library',
-    #     'ran_poly_a_enriched',
-    #     'single_cell_cryopreserved'
-    # ]
-
-    # # Capitalized type, default is 'Unknown' if no match
-    # if specimen_type in block_category:
-    #     tissue_type = 'Block'
-    # elif specimen_type in section_category:
-    #     tissue_type = 'Section'
-    # elif specimen_type in suspension_category:
-    #     tissue_type = 'Suspension'
-
-    return property_key, tissue_type
-
 
 ####################################################################################################
 ## Trigger methods specific to Publication - DO NOT RENAME
diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py
index 8c1269a5..cedd2d53 100644
--- a/src/schema/schema_validators.py
+++ b/src/schema/schema_validators.py
@@ -430,33 +430,6 @@ def validate_upload_status_value(property_key, normalized_entity_type, request,
         raise ValueError(f"Invalid status value: {new_status}")
 
 
-"""
-NOTE: TO BE REMOVED when we remove specimen_type field
-
-Validate the provided value of Sample.specimen_type on create via POST and update via PUT
-
-Parameters
-----------
-property_key : str
-    The target property key
-normalized_type : str
-    Submission
-request: Flask request object
-    The instance of Flask request passed in from application request
-existing_data_dict : dict
-    A dictionary that contains all existing entity properties
-new_data_dict : dict
-    The json data in request body, already after the regular validations
-"""
-def validate_specimen_type(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
-    # Use lowercase for comparison
-    defined_tissue_types = _get_tissue_types()
-    specimen_type = new_data_dict[property_key].lower()
-
-    if specimen_type not in defined_tissue_types:
-        raise ValueError(f"Invalid specimen_type value: {specimen_type}")
-
-
 """
 Validate the provided value of Sample.sample_category on create via POST and update via PUT
 
@@ -562,47 +535,3 @@ def _validate_application_header(applications_allowed, request_headers):
         msg = f"Unable to proceed due to invalid {SchemaConstants.HUBMAP_APP_HEADER} header value: {app_header}"
         raise schema_errors.InvalidApplicationHeaderException(msg)
 
-
-"""
-Get the complete list of defined tissue types
-
-Returns
--------
-list: The list of defined tissue types
-"""
-def _get_tissue_types():
-    yaml_file_url = SchemaConstants.TISSUE_TYPES_YAML
-
-    # Use Memcached to improve performance
-    response = schema_manager.make_request_get(yaml_file_url)
-    
-    if response.status_code == 200:
-        yaml_file = response.text
-
-        try:
-            tissue_types_dict = yaml.safe_load(response.text)
-
-            # We don't need the description here, just a list of tissue types
-            # Note: dict.keys() returns a dict, need to typecast to list
-            tissue_types_list = list(tissue_types_dict.keys())
-
-            # Add the 'other'
-            tissue_types_list.append('other')
-            
-            return tissue_types_list
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-    else:
-        msg = f"Unable to fetch the: {yaml_file_url}"
-        # Log the full stack trace, prepend a line with our message
-        logger.exception(msg)
-
-        logger.debug("======_get_tissue_types() status code======")
-        logger.debug(response.status_code)
-
-        logger.debug("======_get_tissue_types() response text======")
-        logger.debug(response.text)
-
-        # Also bubble up the error message
-        raise requests.exceptions.RequestException(response.text)
-

From 031c4b548bfe2b332ce325ed05c1ef98327f9718 Mon Sep 17 00:00:00 2001
From: yuanzhou <yuanzhou19@gmail.com>
Date: Mon, 20 Nov 2023 09:26:38 -0500
Subject: [PATCH 03/12] Remove irrelevant logging comments

---
 src/app.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/app.py b/src/app.py
index ce39b5bc..3f90d7e8 100644
--- a/src/app.py
+++ b/src/app.py
@@ -44,9 +44,6 @@
 global logger
 
 # Set logging format and level (default is warning)
-# All the API logging is forwarded to the uWSGI server and gets written into the log file `log/uwsgi-entity-api.log`
-# Log rotation is handled via logrotate on the host system with a configuration file
-# Do NOT handle log file and rotation via the Python logging to avoid issues with multi-worker processes
 logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S')
 
 # Use `getLogger()` instead of `getLogger(__name__)` to apply the config to the root logger

From 393d40f1d5d5bf3da115f7dc021fa17fa13aae2b Mon Sep 17 00:00:00 2001
From: yuanzhou <yuanzhou19@gmail.com>
Date: Tue, 21 Nov 2023 10:20:08 -0500
Subject: [PATCH 04/12] Code cleanup

---
 entity-api-spec.yaml                          | 53 ------------------
 src/app.py                                    | 12 -----
 src/schema/schema_manager.py                  |  1 -
 src/schema/schema_neo4j_queries.py            | 17 ------
 .../api-template-test/entity-Template.yaml    | 54 -------------------
 .../example-yaml-templates/sample-schema.yaml |  9 ----
 6 files changed, 146 deletions(-)

diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml
index 8fd04211..fbfc5548 100644
--- a/entity-api-spec.yaml
+++ b/entity-api-spec.yaml
@@ -406,59 +406,6 @@ components:
             - section
             - suspension
           description: "A code representing the type of specimen. Must be an organ, block, section, or suspension"
-        specimen_type:
-          type: string
-          enum:
-            - atacseq
-            - biopsy
-            - blood
-            - cell_lysate
-            - clarity_hydrogel
-            - codex
-            - cryosections_curls_from_fresh_frozen_oct
-            - cryosections_curls_rnalater
-            - ffpe_block
-            - ffpe_slide
-            - fixed_frozen_section_slide
-            - fixed_tissue_piece
-            - flash_frozen_liquid_nitrogen
-            - formalin_fixed_oct_block
-            - fresh_frozen_oct_block
-            - fresh_frozen_section_slide
-            - fresh_frozen_tissue
-            - fresh_frozen_tissue_section
-            - fresh_tissue
-            - frozen_cell_pellet_buffy_coat
-            - gdna
-            - module
-            - nuclei
-            - nuclei_rnalater
-            - organ
-            - organ_piece
-            - other
-            - pbmc
-            - pfa_fixed_frozen_oct_block
-            - plasma
-            - protein
-            - ran_poly_a_enriched
-            - rna_total
-            - rnalater_treated_and_stored
-            - rnaseq
-            - scatacseq
-            - scrnaseq
-            - segment
-            - seqfish
-            - sequence_library
-            - serum
-            - single_cell_cryopreserved
-            - snatacseq
-            - snrnaseq
-            - tissue_lysate
-            - wgs
-          description: "DEPRECATED:  No longer a required field. A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-        specimen_type_other:
-          type: string
-          description: "The user provided sample type if the 'other' sample_type is chosen."
         protocol_url:
           type: string
           description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."
diff --git a/src/app.py b/src/app.py
index ae8ad938..00f1d523 100644
--- a/src/app.py
+++ b/src/app.py
@@ -436,7 +436,6 @@ def get_ancestor_organs(id):
         bad_request_error(f"Unable to get the ancestor organs for this: {normalized_entity_type},"
                           " supported entity types: Sample, Dataset, Publication")
 
-    # specimen_type -> sample_category 12/15/2022
     if normalized_entity_type == 'Sample' and entity_dict['sample_category'].lower() == 'organ':
         bad_request_error("Unable to get the ancestor organ of an organ.")
 
@@ -939,7 +938,6 @@ def create_entity(entity_type):
         # Check existence of the direct ancestor (either another Sample or Donor)
         direct_ancestor_dict = query_target_entity(direct_ancestor_uuid, user_token)
 
-        # specimen_type -> sample_category 12/15/2022
         # `sample_category` is required on create
         sample_category = json_data_dict['sample_category'].lower()
         
@@ -1112,7 +1110,6 @@ def create_multiple_samples(count):
     # sample's direct ancestor is a Donor.
     # Must be one of the codes from: https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml
     if direct_ancestor_dict['entity_type'] == 'Donor':
-        # specimen_type -> sample_category 12/15/2022
         # `sample_category` is required on create
         if json_data_dict['sample_category'].lower() != 'organ':
             bad_request_error("The sample_category must be organ since the direct ancestor is a Donor")
@@ -2828,8 +2825,6 @@ def get_prov_info():
                 first_sample_hubmap_id_list.append(item['hubmap_id'])
                 first_sample_submission_id_list.append(item['submission_id'])
                 first_sample_uuid_list.append(item['uuid'])
-
-                # specimen_type -> sample_category 12/15/2022
                 first_sample_type_list.append(item['sample_category'])
 
                 first_sample_portal_url_list.append(app.config['DOI_REDIRECT_URL'].replace('<entity_type>', 'sample').replace('<identifier>', item['uuid']))
@@ -3148,8 +3143,6 @@ def get_prov_info_for_dataset(id):
             first_sample_hubmap_id_list.append(item['hubmap_id'])
             first_sample_submission_id_list.append(item['submission_id'])
             first_sample_uuid_list.append(item['uuid'])
-
-            # specimen_type -> sample_category 12/15/2022
             first_sample_type_list.append(item['sample_category'])
 
             first_sample_portal_url_list.append(
@@ -3267,7 +3260,6 @@ def get_prov_info_for_dataset(id):
         else:
             requested_samples = {}
             for uuid in dataset_samples.keys():
-                # specimen_type -> sample_category 12/15/2022
                 if dataset_samples[uuid]['sample_category'] in include_samples:
                     requested_samples[uuid] = dataset_samples[uuid]
             internal_dict[HEADER_DATASET_SAMPLES] = requested_samples
@@ -3479,7 +3471,6 @@ def get_sample_prov_info():
             organ_hubmap_id = sample['organ_hubmap_id']
             organ_submission_id = sample['organ_submission_id']
         else:
-            # sample_specimen_type -> sample_category 12/15/2022
             if sample['sample_category'] == "organ":
                 organ_uuid = sample['sample_uuid']
                 organ_type = organ_types_dict[sample['sample_organ']]['description'].lower()
@@ -3507,10 +3498,7 @@ def get_sample_prov_info():
         internal_dict[HEADER_SAMPLE_HAS_METADATA] = sample_has_metadata
         internal_dict[HEADER_SAMPLE_HAS_RUI_INFO] = sample_has_rui_info
         internal_dict[HEADER_SAMPLE_DIRECT_ANCESTOR_ID] = sample['sample_ancestor_id']
-
-        # sample_specimen_type -> sample_category 12/15/2022
         internal_dict[HEADER_SAMPLE_TYPE] = sample['sample_category']
-
         internal_dict[HEADER_SAMPLE_HUBMAP_ID] = sample['sample_hubmap_id']
         internal_dict[HEADER_SAMPLE_SUBMISSION_ID] = sample['sample_submission_id']
         internal_dict[HEADER_SAMPLE_DIRECT_ANCESTOR_ENTITY_TYPE] = sample['sample_ancestor_entity']
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index e66c6f21..1f0d79b5 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -1356,7 +1356,6 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di
             parent_id = json_data_dict['direct_ancestor_uuid']
             json_to_post['parent_ids'] = [parent_id]
 
-            # specimen_type -> sample_category 12/15/2022
             # 'Sample.sample_category' is marked as `required_on_create` in the schema yaml
             if json_data_dict['sample_category'].lower() == 'organ':
                 # The 'organ' field containing the 2 digit organ code is required in this case
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 8da148b3..231375b3 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -442,24 +442,8 @@ def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
     donor_metadata = None
 
     with neo4j_driver.session() as session:
-        # Old time-consuming single query, it takes a significant amounts of DB hits
-        # query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) "
-        #          f"WHERE e.uuid='{uuid}' AND s.specimen_type='organ' AND EXISTS(s.organ) "
-        #          f"RETURN s.organ AS organ_name, d.metadata AS donor_metadata")
-
-        # logger.info("======get_dataset_organ_and_donor_info() query======")
-        # logger.info(query)
-
-        # with neo4j_driver.session() as session:
-        #     record = session.read_transaction(execute_readonly_tx, query)
-
-        #     if record:
-        #         organ_name = record['organ_name']
-        #         donor_metadata = record['donor_metadata']
-
         # To improve the query performance, we implement the two-step queries to drastically reduce the DB hits
         sample_query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample) "
-                        # specimen_type -> sample_category 12/15/2022
                         f"WHERE e.uuid='{uuid}' AND s.sample_category='organ' AND EXISTS(s.organ) "
                         f"RETURN DISTINCT s.organ AS organ_name, s.uuid AS sample_uuid")
 
@@ -473,7 +457,6 @@ def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
             sample_uuid = sample_record['sample_uuid']
 
             donor_query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(d:Donor) "
-                           # specimen_type -> sample_category 12/15/2022
                            f"WHERE s.uuid='{sample_uuid}' AND s.sample_category='organ' AND EXISTS(s.organ) "
                            f"RETURN DISTINCT d.metadata AS donor_metadata")
 
diff --git a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml
index 5a6739f7..ab8ec463 100644
--- a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml
+++ b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml
@@ -398,60 +398,6 @@ x-ref-components:
             - consortium
             - public
           description: "One of the values: public, consortium."
-        specimen_type:
-          type: string
-          enum:
-            - atacseq
-            - biopsy
-            - blood
-            - cell_lysate
-            - clarity_hydrogel
-            - codex
-            - cryosections_curls_from_fresh_frozen_oct
-            - cryosections_curls_rnalater
-            - ffpe_block
-            - ffpe_slide
-            - fixed_frozen_section_slide
-            - fixed_tissue_piece
-            - flash_frozen_liquid_nitrogen
-            - formalin_fixed_oct_block
-            - fresh_frozen_oct_block
-            - fresh_frozen_section_slide
-            - fresh_frozen_tissue
-            - fresh_frozen_tissue_section
-            - fresh_tissue
-            - frozen_cell_pellet_buffy_coat
-            - gdna
-            - module
-            - nuclei
-            - nuclei_rnalater
-            - organ
-            - organ_piece
-            - other
-            - pbmc
-            - pfa_fixed_frozen_oct_block
-            - plasma
-            - protein
-            - ran_poly_a_enriched
-            - rna_total
-            - rnalater_treated_and_stored
-            - rnaseq
-            - scatacseq
-            - scrnaseq
-            - segment
-            - seqfish
-            - sequence_library
-            - serum
-            - sequence_library
-            - single_cell_cryopreserved
-            - snatacseq
-            - snrnaseq
-            - tissue_lysate
-            - wgs
-          description: "A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-        specimen_type_other:
-          type: string
-          description: "The user provided sample type if the 'other' sample_type is chosen."
         protocol_url:
           type: string
           description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."
diff --git a/src/schema_templating/example-yaml-templates/sample-schema.yaml b/src/schema_templating/example-yaml-templates/sample-schema.yaml
index 1f5ee751..2b6f3f11 100644
--- a/src/schema_templating/example-yaml-templates/sample-schema.yaml
+++ b/src/schema_templating/example-yaml-templates/sample-schema.yaml
@@ -72,15 +72,6 @@ Sample:
         - consortium
         - public
       description: "One of the values: public, consortium."
-    specimen_type:
-      type: string
-      enum:
-        X-replace-enum-list:
-          enum-file-ref: https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml
-      description: "A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-    specimen_type_other:
-      type: string
-      description: "The user provided sample type if the 'other' sample_type is chosen."
     protocol_url:
       type: string
       description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."

From 8ec3fd0687bf58d266243f0a4d8847c6d7f587d1 Mon Sep 17 00:00:00 2001
From: yuanzhou <yuanzhou19@gmail.com>
Date: Tue, 21 Nov 2023 11:02:28 -0500
Subject: [PATCH 05/12] Fix constants reference

---
 src/schema/schema_manager.py  | 4 ++--
 src/schema/schema_triggers.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 1f0d79b5..cf247934 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -1211,7 +1211,7 @@ def get_user_info(request):
 def get_hubmap_ids(id):
     global _uuid_api_url
 
-    target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT + '/' + id
+    target_url = _uuid_api_url + SchemaConstants.UUID_API_ID_ENDPOINT + '/' + id
 
     # Use Memcached to improve performance
     response = make_request_get(target_url, internal_token_used = True)
@@ -1373,7 +1373,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di
     logger.info(json_to_post)
 
     # Disable ssl certificate verification
-    target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT
+    target_url = _uuid_api_url + SchemaConstants.UUID_API_ID_ENDPOINT
     response = requests.post(url = target_url, headers = request_headers, json = json_to_post, verify = False, params = query_parms)
     
     # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 5908fc05..182e5614 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -1194,7 +1194,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da
             entity_uuid = existing_data_dict['uuid']
 
         # Commit the thumbnail file via ingest-api call
-        ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_COMMIT_ENDPOINT
+        ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_COMMIT_ENDPOINT
         
         # Example: {"temp_file_id":"dzevgd6xjs4d5grmcp4n"}
         thumbnail_file_dict = new_data_dict[property_key]
@@ -1296,7 +1296,7 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da
         file_info_dict = generated_dict[target_property_key]
     
     # Remove the thumbnail file via ingest-api call
-    ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_REMOVE_ENDPOINT
+    ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_REMOVE_ENDPOINT
 
     # ingest-api's /file-remove takes a list of files to remove
     # In this case, we only need to remove the single thumbnail file
@@ -1905,7 +1905,7 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token
             entity_uuid = existing_data_dict['uuid']
 
         # Commit the files via ingest-api call
-        ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_COMMIT_ENDPOINT
+        ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_COMMIT_ENDPOINT
 
         for file_info in new_data_dict[property_key]:
             temp_file_id = file_info['temp_file_id']
@@ -2015,7 +2015,7 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token
         file_uuids.append(file_uuid)
 
     # Remove the files via ingest-api call
-    ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_REMOVE_ENDPOINT
+    ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_REMOVE_ENDPOINT
 
     json_to_post = {
         'entity_uuid': entity_uuid,

From 97d2c90e74afb53a9879e01f7bb72c7139812ac1 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Mon, 27 Nov 2023 11:20:51 -0500
Subject: [PATCH 06/12] Fix incomptiable result format

---
 src/schema/schema_constants.py |  2 +-
 src/schema/schema_manager.py   | 53 ++++++++++++++++++++++++++++------
 src/schema/schema_triggers.py  | 23 ++-------------
 3 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
index e34bd10a..111f1ed1 100644
--- a/src/schema/schema_constants.py
+++ b/src/schema/schema_constants.py
@@ -15,7 +15,7 @@ class SchemaConstants(object):
     INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
     INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
     ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HuBMAP'
-    ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs?application_context=HuBMAP'
+    ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs/by-code?application_context=HuBMAP'
 
     DOI_BASE_URL = 'https://doi.org/'
 
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index cf247934..d619669e 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -1778,12 +1778,22 @@ def delete_memcached_cache(uuids_list):
 Returns
 -------
 dict
-    The available organ types
+    The available organ types in the following format:
+
+    {
+        "AO": "Aorta",
+        "BD": "Blood",
+        "BL": "Bladder",
+        "BM": "Bone Marrow",
+        "BR": "Brain",
+        "HT": "Heart",
+        ...
+    }
 """
 def get_organ_types():
     global _ontology_api_url
 
-    target_url = _ontology_api_url + '/organs?application_context=HuBMAP'
+    target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ORGAN_TYPES_ENDPOINT
 
     # Use Memcached to improve performance
     response = make_request_get(target_url, internal_token_used = True)
@@ -1795,8 +1805,6 @@ def get_organ_types():
         ids_dict = response.json()
         return ids_dict
     else:
-        # uuid-api will also return 400 if the given id is invalid
-        # We'll just hanle that and all other cases all together here
         msg = f"Unable to make a request to query the id via uuid-api: {id}"
         # Log the full stack trace, prepend a line with our message
         logger.exception(msg)
@@ -1817,12 +1825,32 @@ def get_organ_types():
 Returns
 -------
 dict
-    The available assay types
+    The available assay types by name in the following format:
+
+    {
+        "10x-multiome": {
+            "contains_pii": true,
+            "description": "10x Multiome",
+            "name": "10x-multiome",
+            "primary": true,
+            "vis_only": false,
+            "vitessce_hints": []
+        },
+        "AF": {
+            "contains_pii": false,
+            "description": "Autofluorescence Microscopy",
+            "name": "AF",
+            "primary": true,
+            "vis_only": false,
+            "vitessce_hints": []
+        },
+        ...
+    }
 """
 def get_assay_types():
     global _ontology_api_url
 
-    target_url = _ontology_api_url + '/assaytype?application_context=HuBMAP'
+    target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ASSAY_TYPES_ENDPOINT
 
     # Use Memcached to improve performance
     response = make_request_get(target_url, internal_token_used = True)
@@ -1831,8 +1859,15 @@ def get_assay_types():
     response.raise_for_status()
 
     if response.status_code == 200:
-        ids_dict = response.json()
-        return ids_dict
+        assay_types_by_name = {}
+        result_dict = response.json()
+
+        # Due to the json envelop being used int the json result
+        assay_types_list = result_dict['result']
+        for assay_type_dict in assay_types_list:
+            assay_types_dict_by_name[assay_type_dict['name']] = assay_type_dict
+
+        return assay_types_by_name
     else:
         # uuid-api will also return 400 if the given id is invalid
         # We'll just hanle that and all other cases all together here
@@ -1876,4 +1911,4 @@ def _create_request_headers(user_token):
         auth_header_name: auth_scheme + ' ' + user_token
     }
 
-    return headers_dict
\ No newline at end of file
+    return headers_dict
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 182e5614..606e9af2 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -1024,9 +1024,10 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
     # Parse the organ description
     if organ_name is not None:
         try: 
-            # The organ_name is the two-letter code only set if specimen_type == 'organ'
+            # The organ_name is the two-letter code only set for 'organ'
             # Convert the two-letter code to a description
-            organ_desc = _get_organ_description(organ_name)
+            organ_types_dict = schema_manager.get_organ_types()
+            organ_desc = organ_types_dict[organ_name].lower()
         except (yaml.YAMLError, requests.exceptions.RequestException) as e:
             raise Exception(e)
 
@@ -2098,21 +2099,3 @@ def _get_combined_assay_type_description(data_types):
 
     return assay_type_desc
 
-
-"""
-Get the organ description based on the given organ code
-
-Parameters
-----------
-organ_code : str
-    The two-letter organ code
-
-Returns
--------
-str: The organ code description
-"""
-def _get_organ_description(organ_code):
-    organ_types_dict = schema_manager.get_organ_types()
-    return organ_types_dict[organ_code]['description'].lower()
-
-

From 5cd9a073b823545a91a29a79e0c346e77ffc52f3 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Mon, 27 Nov 2023 11:25:23 -0500
Subject: [PATCH 07/12] Fix code comments

---
 src/schema/schema_manager.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index d619669e..a4f36612 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -1802,10 +1802,9 @@ def get_organ_types():
     response.raise_for_status()
 
     if response.status_code == 200:
-        ids_dict = response.json()
-        return ids_dict
+        return response.json()
     else:
-        msg = f"Unable to make a request to query the id via uuid-api: {id}"
+        msg = "Unable to make a request to query the organ types via ontology-api: {id}"
         # Log the full stack trace, prepend a line with our message
         logger.exception(msg)
 
@@ -1869,9 +1868,7 @@ def get_assay_types():
 
         return assay_types_by_name
     else:
-        # uuid-api will also return 400 if the given id is invalid
-        # We'll just hanle that and all other cases all together here
-        msg = f"Unable to make a request to query the id via uuid-api: {id}"
+        msg = "Unable to make a request to query the assay types via ontology-api"
         # Log the full stack trace, prepend a line with our message
         logger.exception(msg)
 

From d63b25a328938296a58e3a66ca737afdb56dd416 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Mon, 27 Nov 2023 19:50:03 -0500
Subject: [PATCH 08/12] Remove parsing on alt-names

---
 src/app.py                     | 45 ++++++++++++----------------------
 src/schema/schema_constants.py |  4 +--
 src/schema/schema_manager.py   |  6 ++---
 3 files changed, 19 insertions(+), 36 deletions(-)

diff --git a/src/app.py b/src/app.py
index 00f1d523..5e18cb59 100644
--- a/src/app.py
+++ b/src/app.py
@@ -2794,17 +2794,12 @@ def get_prov_info():
         for item in dataset['data_types']:
             try:
                 assay_description_list.append(assay_types_dict[item]['description'])
-            # Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
-            # In these cases, we have to search each assay type and see if the given code matches any alternate names.
             except KeyError:
-                valid_key = False
-                for each in assay_types_dict:
-                    if valid_key is False:
-                        if item in assay_types_dict[each]['alt-names']:
-                            assay_description_list.append(assay_types_dict[each]['description'])
-                            valid_key = True
-                if valid_key is False:
-                    assay_description_list.append(item)
+                logger.exception(f"Data type {item} not found in resulting assay types via ontology-api")
+
+                # Just use the data type value
+                assay_description_list.append(item)
+
         dataset['data_types'] = assay_description_list
         internal_dict[HEADER_DATASET_DATA_TYPES] = dataset['data_types']
 
@@ -3116,17 +3111,12 @@ def get_prov_info_for_dataset(id):
     for item in dataset['data_types']:
         try:
             assay_description_list.append(assay_types_dict[item]['description'])
-        # Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
-        # In these cases, we have to search each assay type and see if the given code matches any alternate names.
         except KeyError:
-            valid_key = False
-            for each in assay_types_dict:
-                if valid_key is False:
-                    if item in assay_types_dict[each]['alt-names']:
-                        assay_description_list.append(assay_types_dict[each]['description'])
-                        valid_key = True
-            if valid_key is False:
-                assay_description_list.append(item)
+            logger.exception(f"Data type {item} not found in resulting assay types via ontology-api")
+
+            # Just use the data type value
+            assay_description_list.append(item)
+
     dataset['data_types'] = assay_description_list
     internal_dict[HEADER_DATASET_DATA_TYPES] = dataset['data_types']
     if return_json is False:
@@ -3342,17 +3332,12 @@ def sankey_data():
             assay_description = ""
             try:
                 assay_description = assay_types_dict[dataset[HEADER_DATASET_DATA_TYPES]]['description']
-            # Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
-            # In these cases, we have to search each assay type and see if the given code matches any alternate names.
             except KeyError:
-                valid_key = False
-                for each in assay_types_dict:
-                    if valid_key is False:
-                        if dataset[HEADER_DATASET_DATA_TYPES] in assay_types_dict[each]['alt-names']:
-                            assay_description = assay_types_dict[each]['description']
-                            valid_key = True
-                if valid_key is False:
-                    assay_description = dataset[HEADER_DATASET_DATA_TYPES]
+                logger.exception(f"Data type {dataset[HEADER_DATASET_DATA_TYPES]} not found in resulting assay types via ontology-api")
+
+                # Just use the data type value
+                assay_description = dataset[HEADER_DATASET_DATA_TYPES]
+
             internal_dict[HEADER_DATASET_DATA_TYPES] = assay_description
 
             # Replace applicable Group Name and Data type with the value needed for the sankey via the mapping_dict
diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
index 111f1ed1..6a471587 100644
--- a/src/schema/schema_constants.py
+++ b/src/schema/schema_constants.py
@@ -14,8 +14,8 @@ class SchemaConstants(object):
     UUID_API_ID_ENDPOINT = '/uuid'
     INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
     INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
-    ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HuBMAP'
-    ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs/by-code?application_context=HuBMAP'
+    ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HUBMAP'
+    ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs/by-code?application_context=HUBMAP'
 
     DOI_BASE_URL = 'https://doi.org/'
 
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index a4f36612..bba74e03 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -1804,9 +1804,8 @@ def get_organ_types():
     if response.status_code == 200:
         return response.json()
     else:
-        msg = "Unable to make a request to query the organ types via ontology-api: {id}"
         # Log the full stack trace, prepend a line with our message
-        logger.exception(msg)
+        logger.exception("Unable to make a request to query the organ types via ontology-api")
 
         logger.debug("======get_organ_types() status code from ontology-api======")
         logger.debug(response.status_code)
@@ -1868,9 +1867,8 @@ def get_assay_types():
 
         return assay_types_by_name
     else:
-        msg = "Unable to make a request to query the assay types via ontology-api"
         # Log the full stack trace, prepend a line with our message
-        logger.exception(msg)
+        logger.exception("Unable to make a request to query the assay types via ontology-api")
 
         logger.debug("======get_assay_types() status code from ontology-api======")
         logger.debug(response.status_code)

From ab37ce0f4256daf791223c02434fca8e7c881853 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Mon, 27 Nov 2023 20:02:34 -0500
Subject: [PATCH 09/12] Fix var name

---
 src/schema/schema_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index bba74e03..edab58a7 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -1863,7 +1863,7 @@ def get_assay_types():
         # Due to the json envelop being used int the json result
         assay_types_list = result_dict['result']
         for assay_type_dict in assay_types_list:
-            assay_types_dict_by_name[assay_type_dict['name']] = assay_type_dict
+            assay_types_by_name[assay_type_dict['name']] = assay_type_dict
 
         return assay_types_by_name
     else:

From 68d233ea6bf75164332e01c8e7b1fb85e53920d9 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Mon, 27 Nov 2023 20:17:41 -0500
Subject: [PATCH 10/12] Fix sankey data and prov-info

---
 src/app.py                    | 10 +++++-----
 src/schema/schema_triggers.py |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/app.py b/src/app.py
index 5e18cb59..307160d7 100644
--- a/src/app.py
+++ b/src/app.py
@@ -2845,7 +2845,7 @@ def get_prov_info():
                 distinct_organ_hubmap_id_list.append(item['hubmap_id'])
                 distinct_organ_submission_id_list.append(item['submission_id'])
                 distinct_organ_uuid_list.append(item['uuid'])
-                distinct_organ_type_list.append(organ_types_dict[item['organ']]['description'].lower())
+                distinct_organ_type_list.append(organ_types_dict[item['organ']].lower())
             internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
             internal_dict[HEADER_ORGAN_SUBMISSION_ID] = distinct_organ_submission_id_list
             internal_dict[HEADER_ORGAN_UUID] = distinct_organ_uuid_list
@@ -3157,7 +3157,7 @@ def get_prov_info_for_dataset(id):
             distinct_organ_hubmap_id_list.append(item['hubmap_id'])
             distinct_organ_submission_id_list.append(item['submission_id'])
             distinct_organ_uuid_list.append(item['uuid'])
-            distinct_organ_type_list.append(organ_types_dict[item['organ']]['description'].lower())
+            distinct_organ_type_list.append(organ_types_dict[item['organ']].lower())
         internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
         internal_dict[HEADER_ORGAN_SUBMISSION_ID] = distinct_organ_submission_id_list
         internal_dict[HEADER_ORGAN_UUID] = distinct_organ_uuid_list
@@ -3327,7 +3327,7 @@ def sankey_data():
         for dataset in sankey_info:
             internal_dict = collections.OrderedDict()
             internal_dict[HEADER_DATASET_GROUP_NAME] = dataset[HEADER_DATASET_GROUP_NAME]
-            internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[dataset[HEADER_ORGAN_TYPE]]['description'].lower()
+            internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[dataset[HEADER_ORGAN_TYPE]].lower()
             # Data type codes are replaced with data type descriptions
             assay_description = ""
             try:
@@ -3452,13 +3452,13 @@ def get_sample_prov_info():
         organ_submission_id = None
         if sample['organ_uuid'] is not None:
             organ_uuid = sample['organ_uuid']
-            organ_type = organ_types_dict[sample['organ_organ_type']]['description'].lower()
+            organ_type = organ_types_dict[sample['organ_organ_type']].lower()
             organ_hubmap_id = sample['organ_hubmap_id']
             organ_submission_id = sample['organ_submission_id']
         else:
             if sample['sample_category'] == "organ":
                 organ_uuid = sample['sample_uuid']
-                organ_type = organ_types_dict[sample['sample_organ']]['description'].lower()
+                organ_type = organ_types_dict[sample['sample_organ']].lower()
                 organ_hubmap_id = sample['sample_hubmap_id']
                 organ_submission_id = sample['sample_submission_id']
 
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 606e9af2..3446da2f 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -1086,6 +1086,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
 
     return property_key, generated_title
 
+
 """
 Trigger event method of getting the uuid of the previous revision dataset if exists
 

From 9779fcf527dd3bfcf16031dacaca5959bf947de9 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Mon, 27 Nov 2023 21:47:58 -0500
Subject: [PATCH 11/12] Organ code validation

---
 src/app.py | 57 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/src/app.py b/src/app.py
index 307160d7..86aa9e3b 100644
--- a/src/app.py
+++ b/src/app.py
@@ -951,9 +951,8 @@ def create_entity(entity_type):
             # A valid organ code must be present in the `organ` field
             if ('organ' not in json_data_dict) or (json_data_dict['organ'].strip() == ''):
                 bad_request_error("A valid organ code is required when registering an organ associated with a Donor")
-
-            # Must be one of the defined organ codes
-            # https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml
+            
+            # Must be a 2-letter alphabetic code and can be found in UBKG ontology-api
             validate_organ_code(json_data_dict['organ'])
         else:
             if 'organ' in json_data_dict:
@@ -2845,7 +2844,11 @@ def get_prov_info():
                 distinct_organ_hubmap_id_list.append(item['hubmap_id'])
                 distinct_organ_submission_id_list.append(item['submission_id'])
                 distinct_organ_uuid_list.append(item['uuid'])
-                distinct_organ_type_list.append(organ_types_dict[item['organ']].lower())
+
+                organ_code = item['organ'].upper()
+                validate_organ_code(organ_code)
+
+                distinct_organ_type_list.append(organ_types_dict[organ_code].lower())
             internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
             internal_dict[HEADER_ORGAN_SUBMISSION_ID] = distinct_organ_submission_id_list
             internal_dict[HEADER_ORGAN_UUID] = distinct_organ_uuid_list
@@ -3157,7 +3160,11 @@ def get_prov_info_for_dataset(id):
             distinct_organ_hubmap_id_list.append(item['hubmap_id'])
             distinct_organ_submission_id_list.append(item['submission_id'])
             distinct_organ_uuid_list.append(item['uuid'])
-            distinct_organ_type_list.append(organ_types_dict[item['organ']].lower())
+
+            organ_code = item['organ'].upper()
+            validate_organ_code(organ_code)
+
+            distinct_organ_type_list.append(organ_types_dict[organ_code].lower())
         internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
         internal_dict[HEADER_ORGAN_SUBMISSION_ID] = distinct_organ_submission_id_list
         internal_dict[HEADER_ORGAN_UUID] = distinct_organ_uuid_list
@@ -3327,7 +3334,11 @@ def sankey_data():
         for dataset in sankey_info:
             internal_dict = collections.OrderedDict()
             internal_dict[HEADER_DATASET_GROUP_NAME] = dataset[HEADER_DATASET_GROUP_NAME]
-            internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[dataset[HEADER_ORGAN_TYPE]].lower()
+
+            organ_code = dataset[HEADER_ORGAN_TYPE].upper()
+            validate_organ_code(organ_code)
+
+            internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[organ_code].lower()
             # Data type codes are replaced with data type descriptions
             assay_description = ""
             try:
@@ -3408,17 +3419,6 @@ def get_sample_prov_info():
     if user_in_hubmap_read_group(request):
         public_only = False
 
-    # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
-    # because that would require using a urllib request for each dataset
-    # response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    # if response.status_code == 200:
-    #     yaml_file = response.text
-    #     try:
-    #         organ_types_dict = yaml.safe_load(yaml_file)
-    #     except yaml.YAMLError as e:
-    #         raise yaml.YAMLError(e)
-
     organ_types_dict = schema_manager.get_organ_types()
 
     # Processing and validating query parameters
@@ -3452,13 +3452,21 @@ def get_sample_prov_info():
         organ_submission_id = None
         if sample['organ_uuid'] is not None:
             organ_uuid = sample['organ_uuid']
-            organ_type = organ_types_dict[sample['organ_organ_type']].lower()
+
+            organ_code = sample['organ_organ_type'].upper()
+            validate_organ_code(organ_code)
+
+            organ_type = organ_types_dict[organ_code].lower()
             organ_hubmap_id = sample['organ_hubmap_id']
             organ_submission_id = sample['organ_submission_id']
         else:
             if sample['sample_category'] == "organ":
                 organ_uuid = sample['sample_uuid']
-                organ_type = organ_types_dict[sample['sample_organ']].lower()
+
+                organ_code = sample['sample_organ'].upper()
+                validate_organ_code(organ_code)
+
+                organ_type = organ_types_dict[organ_code].lower()
                 organ_hubmap_id = sample['sample_hubmap_id']
                 organ_submission_id = sample['sample_submission_id']
 
@@ -4764,22 +4772,21 @@ def access_level_prefix_dir(dir_name):
 
 
 """
-Ensures that a given organ code matches what is found on the organ_types yaml document
+Ensures that a given organ code is 2-letter alphabetic and can be found int the UBKG ontology-api
 
 Parameters
 ----------
 organ_code : str
-
-Returns
--------
-Returns nothing. Raises bad_request_error is organ code not found on organ_types.yaml 
 """
 def validate_organ_code(organ_code):
+    if not organ_code.isalpha() or not len(organ_code) == 2:
+        internal_server_error(f"Invalid organ code {organ_code}. Must be 2-letter alphabetic code")
+
     try:
         organ_types_dict = schema_manager.get_organ_types()
 
         if organ_code.upper() not in organ_types_dict:
-            bad_request_error(f"Invalid organ code. Must be 2 digit code")
+            internal_server_error(f"Unable to find organ code {organ_code} via the ontology-api")
     except:
         msg = f"Failed to validate the organ code: {organ_code}"
         # Log the full stack trace, prepend a line with our message

From ebfecad4bcf06b9e9f722e5362f7f2ffbab44321 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Mon, 27 Nov 2023 21:53:14 -0500
Subject: [PATCH 12/12] Further tweaks to organ code validation

---
 src/app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/app.py b/src/app.py
index 86aa9e3b..ab01761f 100644
--- a/src/app.py
+++ b/src/app.py
@@ -4786,8 +4786,8 @@ def validate_organ_code(organ_code):
         organ_types_dict = schema_manager.get_organ_types()
 
         if organ_code.upper() not in organ_types_dict:
-            internal_server_error(f"Unable to find organ code {organ_code} via the ontology-api")
-    except:
+            not_found_error(f"Unable to find organ code {organ_code} via the ontology-api")
+    except requests.exceptions.RequestException:
         msg = f"Failed to validate the organ code: {organ_code}"
         # Log the full stack trace, prepend a line with our message
         logger.exception(msg)