From 91f88b340dce25ad6273f2503f84c7b4cb063e93 Mon Sep 17 00:00:00 2001
From: Karl Burke <kburke@pitt.edu>
Date: Mon, 4 Dec 2023 08:25:14 -0500
Subject: [PATCH] Merge YAML-to-UBKG work with initial support for Dataset
 dataset_type attribute create, update, and read.

---
 entity-api-spec.yaml                          |  53 ----
 src/app.py                                    | 252 +++++++-----------
 src/app_neo4j_queries.py                      |   5 +-
 src/instance/app.cfg.example                  |   4 +
 src/schema/provenance_schema.yaml             |  64 +++--
 src/schema/schema_constants.py                |  13 +-
 src/schema/schema_errors.py                   |   3 +
 src/schema/schema_manager.py                  | 237 +++++++++++++++-
 src/schema/schema_neo4j_queries.py            |  17 --
 src/schema/schema_triggers.py                 | 190 +------------
 src/schema/schema_validators.py               |  33 ++-
 .../api-template-test/entity-Template.yaml    |  54 ----
 .../example-yaml-templates/sample-schema.yaml |   9 -
 13 files changed, 421 insertions(+), 513 deletions(-)

diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml
index 8fd04211..fbfc5548 100644
--- a/entity-api-spec.yaml
+++ b/entity-api-spec.yaml
@@ -406,59 +406,6 @@ components:
             - section
             - suspension
           description: "A code representing the type of specimen. Must be an organ, block, section, or suspension"
-        specimen_type:
-          type: string
-          enum:
-            - atacseq
-            - biopsy
-            - blood
-            - cell_lysate
-            - clarity_hydrogel
-            - codex
-            - cryosections_curls_from_fresh_frozen_oct
-            - cryosections_curls_rnalater
-            - ffpe_block
-            - ffpe_slide
-            - fixed_frozen_section_slide
-            - fixed_tissue_piece
-            - flash_frozen_liquid_nitrogen
-            - formalin_fixed_oct_block
-            - fresh_frozen_oct_block
-            - fresh_frozen_section_slide
-            - fresh_frozen_tissue
-            - fresh_frozen_tissue_section
-            - fresh_tissue
-            - frozen_cell_pellet_buffy_coat
-            - gdna
-            - module
-            - nuclei
-            - nuclei_rnalater
-            - organ
-            - organ_piece
-            - other
-            - pbmc
-            - pfa_fixed_frozen_oct_block
-            - plasma
-            - protein
-            - ran_poly_a_enriched
-            - rna_total
-            - rnalater_treated_and_stored
-            - rnaseq
-            - scatacseq
-            - scrnaseq
-            - segment
-            - seqfish
-            - sequence_library
-            - serum
-            - single_cell_cryopreserved
-            - snatacseq
-            - snrnaseq
-            - tissue_lysate
-            - wgs
-          description: "DEPRECATED:  No longer a required field. A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-        specimen_type_other:
-          type: string
-          description: "The user provided sample type if the 'other' sample_type is chosen."
         protocol_url:
           type: string
           description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."
diff --git a/src/app.py b/src/app.py
index 4cbc05e8..8902643a 100644
--- a/src/app.py
+++ b/src/app.py
@@ -44,9 +44,6 @@
 global logger
 
 # Set logging format and level (default is warning)
-# All the API logging is forwarded to the uWSGI server and gets written into the log file `log/uwsgi-entity-api.log`
-# Log rotation is handled via logrotate on the host system with a configuration file
-# Do NOT handle log file and rotation via the Python logging to avoid issues with multi-worker processes
 logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S')
 
 # Use `getLogger()` instead of `getLogger(__name__)` to apply the config to the root logger
@@ -60,6 +57,7 @@
 # Remove trailing slash / from URL base to avoid "//" caused by config with trailing slash
 app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/')
 app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/')
+app.config['ONTOLOGY_API_URL'] = app.config['ONTOLOGY_API_URL'].strip('/')
 app.config['SEARCH_API_URL_LIST'] = [url.strip('/') for url in app.config['SEARCH_API_URL_LIST']]
 
 # This mode when set True disables the PUT and POST calls, used on STAGE to make entity-api READ-ONLY 
@@ -193,11 +191,18 @@ def http_internal_server_error(e):
 ####################################################################################################
 
 try:
+    try:
+        _schema_yaml_file = app.config['SCHEMA_YAML_FILE']
+    except KeyError as ke:
+        logger.error("Expected configuration failed to load %s from app_config=%s.", ke, app.config)
+        raise Exception("Expected configuration failed to load. See the logs.")
+
     # The schema_manager is a singleton module
     # Pass in auth_helper_instance, neo4j_driver instance, and memcached_client_instance
     schema_manager.initialize(app.config['SCHEMA_YAML_FILE'],
                               app.config['UUID_API_URL'],
                               app.config['INGEST_API_URL'],
+                              app.config['ONTOLOGY_API_URL'],
                               auth_helper_instance,
                               neo4j_driver_instance,
                               memcached_client_instance,
@@ -263,7 +268,6 @@ def http_internal_server_error(e):
 DATASET_STATUS_PUBLISHED = SchemaConstants.DATASET_STATUS_PUBLISHED
 COMMA_SEPARATOR = ','
 
-
 ####################################################################################################
 ## API Endpoints
 ####################################################################################################
@@ -437,7 +441,6 @@ def get_ancestor_organs(id):
         bad_request_error(f"Unable to get the ancestor organs for this: {normalized_entity_type},"
                           " supported entity types: Sample, Dataset, Publication")
 
-    # specimen_type -> sample_category 12/15/2022
     if normalized_entity_type == 'Sample' and entity_dict['sample_category'].lower() == 'organ':
         bad_request_error("Unable to get the ancestor organ of an organ.")
 
@@ -933,6 +936,8 @@ def create_entity(entity_type):
     # Currently only ValueError
     except ValueError as e:
         bad_request_error(e)
+    except schema_errors.UnimplementedValidatorException as uve:
+        internal_server_error(uve)
 
     # Additional validation for Sample entities
     if normalized_entity_type == 'Sample':
@@ -940,7 +945,6 @@ def create_entity(entity_type):
         # Check existence of the direct ancestor (either another Sample or Donor)
         direct_ancestor_dict = query_target_entity(direct_ancestor_uuid, user_token)
 
-        # specimen_type -> sample_category 12/15/2022
         # `sample_category` is required on create
         sample_category = json_data_dict['sample_category'].lower()
         
@@ -954,9 +958,8 @@ def create_entity(entity_type):
             # A valid organ code must be present in the `organ` field
             if ('organ' not in json_data_dict) or (json_data_dict['organ'].strip() == ''):
                 bad_request_error("A valid organ code is required when registering an organ associated with a Donor")
-
-            # Must be one of the defined organ codes
-            # https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml
+            
+            # Must be a 2-letter alphabetic code and can be found in UBKG ontology-api
             validate_organ_code(json_data_dict['organ'])
         else:
             if 'organ' in json_data_dict:
@@ -1113,7 +1116,6 @@ def create_multiple_samples(count):
     # sample's direct ancestor is a Donor.
     # Must be one of the codes from: https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml
     if direct_ancestor_dict['entity_type'] == 'Donor':
-        # specimen_type -> sample_category 12/15/2022
         # `sample_category` is required on create
         if json_data_dict['sample_category'].lower() != 'organ':
             bad_request_error("The sample_category must be organ since the direct ancestor is a Donor")
@@ -2671,7 +2673,8 @@ def get_prov_info():
     HEADER_DATASET_DATE_TIME_MODIFIED = 'dataset_date_time_modified'
     HEADER_DATASET_MODIFIED_BY_EMAIL = 'dataset_modified_by_email'
     HEADER_DATASET_LAB_ID = 'lab_id_or_name'
-    HEADER_DATASET_DATA_TYPES = 'dataset_data_types'
+    HEADER_DATASET_DATA_TYPES = 'dataset_data_types' # TODO-eliminate when HEADER_DATASET_DATASET_TYPE is required
+    HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type'
     HEADER_DATASET_PORTAL_URL = 'dataset_portal_url'
     HEADER_FIRST_SAMPLE_HUBMAP_ID = 'first_sample_hubmap_id'
     HEADER_FIRST_SAMPLE_SUBMISSION_ID = 'first_sample_submission_id'
@@ -2698,11 +2701,12 @@ def get_prov_info():
     HEADER_PROCESSED_DATASET_PORTAL_URL = 'processed_dataset_portal_url'
     HEADER_PREVIOUS_VERSION_HUBMAP_IDS = 'previous_version_hubmap_ids'
 
+    # TODO-Eliminate HEADER_DATASET_DATA_TYPES once HEADER_DATASET_DATASET_TYPE is required.
     headers = [
         HEADER_DATASET_UUID, HEADER_DATASET_HUBMAP_ID, HEADER_DATASET_STATUS, HEADER_DATASET_GROUP_NAME,
         HEADER_DATASET_GROUP_UUID, HEADER_DATASET_DATE_TIME_CREATED, HEADER_DATASET_CREATED_BY_EMAIL,
         HEADER_DATASET_DATE_TIME_MODIFIED, HEADER_DATASET_MODIFIED_BY_EMAIL, HEADER_DATASET_LAB_ID,
-        HEADER_DATASET_DATA_TYPES, HEADER_DATASET_PORTAL_URL, HEADER_FIRST_SAMPLE_HUBMAP_ID,
+        HEADER_DATASET_DATA_TYPES, HEADER_DATASET_DATASET_TYPE, HEADER_DATASET_PORTAL_URL, HEADER_FIRST_SAMPLE_HUBMAP_ID,
         HEADER_FIRST_SAMPLE_SUBMISSION_ID, HEADER_FIRST_SAMPLE_UUID, HEADER_FIRST_SAMPLE_TYPE,
         HEADER_FIRST_SAMPLE_PORTAL_URL, HEADER_ORGAN_HUBMAP_ID, HEADER_ORGAN_SUBMISSION_ID, HEADER_ORGAN_UUID,
         HEADER_ORGAN_TYPE, HEADER_DONOR_HUBMAP_ID, HEADER_DONOR_SUBMISSION_ID, HEADER_DONOR_UUID,
@@ -2722,26 +2726,12 @@ def get_prov_info():
 
     # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
     # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    organ_types_dict = schema_manager.get_organ_types()
 
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
-    response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            assay_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-
+    assay_types_dict = schema_manager.get_assay_types()
+    
     # Processing and validating query parameters
     accepted_arguments = ['format', 'organ', 'has_rui_info', 'dataset_status', 'group_uuid']
     return_json = False
@@ -2809,26 +2799,25 @@ def get_prov_info():
 
         # Data type codes are replaced with data type descriptions
         assay_description_list = []
+        # TODO BEGIN evaluate elimination of this block, if it is still in place following the YAML-to-UBKG effort on https://github.com/hubmapconsortium/entity-api/issues/494,
+        # and once dataset['dataset_type'] is required and dataset['data_types'] removed.
         for item in dataset['data_types']:
             try:
                 assay_description_list.append(assay_types_dict[item]['description'])
-            # Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
-            # In these cases, we have to search each assay type and see if the given code matches any alternate names.
             except KeyError:
-                valid_key = False
-                for each in assay_types_dict:
-                    if valid_key is False:
-                        if item in assay_types_dict[each]['alt-names']:
-                            assay_description_list.append(assay_types_dict[each]['description'])
-                            valid_key = True
-                if valid_key is False:
-                    assay_description_list.append(item)
+                logger.exception(f"Data type {item} not found in resulting assay types via ontology-api")
+
+                # Just use the data type value
+                assay_description_list.append(item)
+
         dataset['data_types'] = assay_description_list
         internal_dict[HEADER_DATASET_DATA_TYPES] = dataset['data_types']
 
         # If return_format was not equal to json, json arrays must be converted into comma separated lists for the tsv
         if return_json is False:
             internal_dict[HEADER_DATASET_DATA_TYPES] = ",".join(dataset['data_types'])
+        # TODO END evaluate elimination of this block, if it is still in place following the YAML-to-UBKG effort on https://github.com/hubmapconsortium/entity-api/issues/494,
+        # and once dataset['dataset_type'] is required and dataset['data_types'] removed.
 
         internal_dict[HEADER_DATASET_PORTAL_URL] = app.config['DOI_REDIRECT_URL'].replace('<entity_type>', 'dataset').replace('<identifier>', dataset['uuid'])
 
@@ -2843,8 +2832,6 @@ def get_prov_info():
                 first_sample_hubmap_id_list.append(item['hubmap_id'])
                 first_sample_submission_id_list.append(item['submission_id'])
                 first_sample_uuid_list.append(item['uuid'])
-
-                # specimen_type -> sample_category 12/15/2022
                 first_sample_type_list.append(item['sample_category'])
 
                 first_sample_portal_url_list.append(app.config['DOI_REDIRECT_URL'].replace('<entity_type>', 'sample').replace('<identifier>', item['uuid']))
@@ -2870,7 +2857,11 @@ def get_prov_info():
                 distinct_organ_hubmap_id_list.append(item['hubmap_id'])
                 distinct_organ_submission_id_list.append(item['submission_id'])
                 distinct_organ_uuid_list.append(item['uuid'])
-                distinct_organ_type_list.append(organ_types_dict[item['organ']]['description'].lower())
+
+                organ_code = item['organ'].upper()
+                validate_organ_code(organ_code)
+
+                distinct_organ_type_list.append(organ_types_dict[organ_code].lower())
             internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
             internal_dict[HEADER_ORGAN_SUBMISSION_ID] = distinct_organ_submission_id_list
             internal_dict[HEADER_ORGAN_UUID] = distinct_organ_uuid_list
@@ -3063,7 +3054,8 @@ def get_prov_info_for_dataset(id):
     HEADER_DATASET_DATE_TIME_MODIFIED = 'dataset_date_time_modified'
     HEADER_DATASET_MODIFIED_BY_EMAIL = 'dataset_modified_by_email'
     HEADER_DATASET_LAB_ID = 'lab_id_or_name'
-    HEADER_DATASET_DATA_TYPES = 'dataset_data_types'
+    HEADER_DATASET_DATA_TYPES = 'dataset_data_types' # TODO-eliminate when HEADER_DATASET_DATASET_TYPE is required
+    HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type'
     HEADER_DATASET_PORTAL_URL = 'dataset_portal_url'
     HEADER_DATASET_SAMPLES = 'dataset_samples'
     HEADER_FIRST_SAMPLE_HUBMAP_ID = 'first_sample_hubmap_id'
@@ -3090,11 +3082,12 @@ def get_prov_info_for_dataset(id):
     HEADER_PROCESSED_DATASET_STATUS = 'processed_dataset_status'
     HEADER_PROCESSED_DATASET_PORTAL_URL = 'processed_dataset_portal_url'
 
+    # TODO-Eliminate HEADER_DATASET_DATA_TYPES once HEADER_DATASET_DATASET_TYPE is required.
     headers = [
         HEADER_DATASET_UUID, HEADER_DATASET_HUBMAP_ID, HEADER_DATASET_STATUS, HEADER_DATASET_GROUP_NAME,
         HEADER_DATASET_GROUP_UUID, HEADER_DATASET_DATE_TIME_CREATED, HEADER_DATASET_CREATED_BY_EMAIL,
         HEADER_DATASET_DATE_TIME_MODIFIED, HEADER_DATASET_MODIFIED_BY_EMAIL, HEADER_DATASET_LAB_ID,
-        HEADER_DATASET_DATA_TYPES, HEADER_DATASET_PORTAL_URL, HEADER_FIRST_SAMPLE_HUBMAP_ID,
+        HEADER_DATASET_DATA_TYPES, HEADER_DATASET_DATASET_TYPE, HEADER_DATASET_PORTAL_URL, HEADER_FIRST_SAMPLE_HUBMAP_ID,
         HEADER_FIRST_SAMPLE_SUBMISSION_ID, HEADER_FIRST_SAMPLE_UUID, HEADER_FIRST_SAMPLE_TYPE,
         HEADER_FIRST_SAMPLE_PORTAL_URL, HEADER_ORGAN_HUBMAP_ID, HEADER_ORGAN_SUBMISSION_ID, HEADER_ORGAN_UUID,
         HEADER_ORGAN_TYPE, HEADER_DONOR_HUBMAP_ID, HEADER_DONOR_SUBMISSION_ID, HEADER_DONOR_UUID,
@@ -3106,25 +3099,11 @@ def get_prov_info_for_dataset(id):
 
     # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
     # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    organ_types_dict = schema_manager.get_organ_types()
 
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
-    response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            assay_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    assay_types_dict = schema_manager.get_assay_types()
 
     hubmap_ids = schema_manager.get_hubmap_ids(id)
 
@@ -3147,24 +3126,26 @@ def get_prov_info_for_dataset(id):
 
     # Data type codes are replaced with data type descriptions
     assay_description_list = []
+    # TODO BEGIN evaluate elimination of this block, if it is still in place following the YAML-to-UBKG effort on https://github.com/hubmapconsortium/entity-api/issues/494,
+    # and once dataset['dataset_type'] is required and dataset['data_types'] removed.
     for item in dataset['data_types']:
         try:
             assay_description_list.append(assay_types_dict[item]['description'])
-        # Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
-        # In these cases, we have to search each assay type and see if the given code matches any alternate names.
         except KeyError:
-            valid_key = False
-            for each in assay_types_dict:
-                if valid_key is False:
-                    if item in assay_types_dict[each]['alt-names']:
-                        assay_description_list.append(assay_types_dict[each]['description'])
-                        valid_key = True
-            if valid_key is False:
-                assay_description_list.append(item)
+            logger.exception(f"Data type {item} not found in resulting assay types via ontology-api")
+
+            # Just use the data type value
+            assay_description_list.append(item)
+
     dataset['data_types'] = assay_description_list
     internal_dict[HEADER_DATASET_DATA_TYPES] = dataset['data_types']
     if return_json is False:
         internal_dict[HEADER_DATASET_DATA_TYPES] = ",".join(dataset['data_types'])
+    # TODO END evaluate elimination of this block, if it is still in place following the YAML-to-UBKG effort on https://github.com/hubmapconsortium/entity-api/issues/494,
+    # and once dataset['dataset_type'] is required and dataset['data_types'] removed.
+
+    internal_dict[HEADER_DATASET_DATASET_TYPE] = dataset['dataset_type']
+
     internal_dict[HEADER_DATASET_PORTAL_URL] = app.config['DOI_REDIRECT_URL'].replace('<entity_type>', 'dataset').replace(
         '<identifier>', dataset['uuid'])
     if dataset['first_sample'] is not None:
@@ -3177,8 +3158,6 @@ def get_prov_info_for_dataset(id):
             first_sample_hubmap_id_list.append(item['hubmap_id'])
             first_sample_submission_id_list.append(item['submission_id'])
             first_sample_uuid_list.append(item['uuid'])
-
-            # specimen_type -> sample_category 12/15/2022
             first_sample_type_list.append(item['sample_category'])
 
             first_sample_portal_url_list.append(
@@ -3203,7 +3182,11 @@ def get_prov_info_for_dataset(id):
             distinct_organ_hubmap_id_list.append(item['hubmap_id'])
             distinct_organ_submission_id_list.append(item['submission_id'])
             distinct_organ_uuid_list.append(item['uuid'])
-            distinct_organ_type_list.append(organ_types_dict[item['organ']]['description'].lower())
+
+            organ_code = item['organ'].upper()
+            validate_organ_code(organ_code)
+
+            distinct_organ_type_list.append(organ_types_dict[organ_code].lower())
         internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
         internal_dict[HEADER_ORGAN_SUBMISSION_ID] = distinct_organ_submission_id_list
         internal_dict[HEADER_ORGAN_UUID] = distinct_organ_uuid_list
@@ -3296,7 +3279,6 @@ def get_prov_info_for_dataset(id):
         else:
             requested_samples = {}
             for uuid in dataset_samples.keys():
-                # specimen_type -> sample_category 12/15/2022
                 if dataset_samples[uuid]['sample_category'] in include_samples:
                     requested_samples[uuid] = dataset_samples[uuid]
             internal_dict[HEADER_DATASET_SAMPLES] = requested_samples
@@ -3335,7 +3317,7 @@ def get_prov_info_for_dataset(id):
 -------
 json
     a json array. Each item in the array corresponds to a dataset. Each dataset has the values: dataset_group_name, 
-    organ_type, dataset_data_types, and dataset_status, each of which is a string. 
+    organ_type, dataset_data_types, and dataset_status, each of which is a string. # TODO-integrate dataset_dataset_type to documentation.
 
 """
 @app.route('/datasets/sankey_data', methods=['GET'])
@@ -3343,32 +3325,19 @@ def sankey_data():
     # String constants
     HEADER_DATASET_GROUP_NAME = 'dataset_group_name'
     HEADER_ORGAN_TYPE = 'organ_type'
-    HEADER_DATASET_DATA_TYPES = 'dataset_data_types'
+    HEADER_DATASET_DATA_TYPES = 'dataset_data_types' # TODO-eliminate when HEADER_DATASET_DATASET_TYPE is required
+    HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type'
     HEADER_DATASET_STATUS = 'dataset_status'
 
     with open('sankey_mapping.json') as f:
         mapping_dict = json.load(f)
     # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
     # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    organ_types_dict = schema_manager.get_organ_types()
 
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
-    response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            assay_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    assay_types_dict = schema_manager.get_assay_types()
 
     # Instantiation of the list dataset_sankey_list
     dataset_sankey_list = []
@@ -3388,22 +3357,23 @@ def sankey_data():
         for dataset in sankey_info:
             internal_dict = collections.OrderedDict()
             internal_dict[HEADER_DATASET_GROUP_NAME] = dataset[HEADER_DATASET_GROUP_NAME]
-            internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[dataset[HEADER_ORGAN_TYPE]]['description'].lower()
+
+            # TODO BEGIN evaluate elimination of this block, if it is still in place following the YAML-to-UBKG effort on https://github.com/hubmapconsortium/entity-api/issues/494,
+            # and once dataset['dataset_type'] is required and dataset['data_types'] removed.
+            organ_code = dataset[HEADER_ORGAN_TYPE].upper()
+            validate_organ_code(organ_code)
+
+            internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[organ_code].lower()
             # Data type codes are replaced with data type descriptions
             assay_description = ""
             try:
                 assay_description = assay_types_dict[dataset[HEADER_DATASET_DATA_TYPES]]['description']
-            # Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
-            # In these cases, we have to search each assay type and see if the given code matches any alternate names.
             except KeyError:
-                valid_key = False
-                for each in assay_types_dict:
-                    if valid_key is False:
-                        if dataset[HEADER_DATASET_DATA_TYPES] in assay_types_dict[each]['alt-names']:
-                            assay_description = assay_types_dict[each]['description']
-                            valid_key = True
-                if valid_key is False:
-                    assay_description = dataset[HEADER_DATASET_DATA_TYPES]
+                logger.exception(f"Data type {dataset[HEADER_DATASET_DATA_TYPES]} not found in resulting assay types via ontology-api")
+
+                # Just use the data type value
+                assay_description = dataset[HEADER_DATASET_DATA_TYPES]
+
             internal_dict[HEADER_DATASET_DATA_TYPES] = assay_description
 
             # Replace applicable Group Name and Data type with the value needed for the sankey via the mapping_dict
@@ -3412,6 +3382,8 @@ def sankey_data():
                 internal_dict[HEADER_DATASET_GROUP_NAME] = mapping_dict[internal_dict[HEADER_DATASET_GROUP_NAME]]
             if internal_dict[HEADER_DATASET_DATA_TYPES] in mapping_dict.keys():
                 internal_dict[HEADER_DATASET_DATA_TYPES] = mapping_dict[internal_dict[HEADER_DATASET_DATA_TYPES]]
+            # TODO END evaluate elimination of this block, if it is still in place following the YAML-to-UBKG effort on https://github.com/hubmapconsortium/entity-api/issues/494,
+            # and once dataset['dataset_type'] is required and dataset['data_types'] removed.
 
             # Each dataset's dictionary is added to the list to be returned
             dataset_sankey_list.append(internal_dict)
@@ -3474,16 +3446,7 @@ def get_sample_prov_info():
     if user_in_hubmap_read_group(request):
         public_only = False
 
-    # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
-    # because that would require using a urllib request for each dataset
-    response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-        try:
-            organ_types_dict = yaml.safe_load(yaml_file)
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
+    organ_types_dict = schema_manager.get_organ_types()
 
     # Processing and validating query parameters
     accepted_arguments = ['group_uuid']
@@ -3516,14 +3479,21 @@ def get_sample_prov_info():
         organ_submission_id = None
         if sample['organ_uuid'] is not None:
             organ_uuid = sample['organ_uuid']
-            organ_type = organ_types_dict[sample['organ_organ_type']]['description'].lower()
+
+            organ_code = sample['organ_organ_type'].upper()
+            validate_organ_code(organ_code)
+
+            organ_type = organ_types_dict[organ_code].lower()
             organ_hubmap_id = sample['organ_hubmap_id']
             organ_submission_id = sample['organ_submission_id']
         else:
-            # sample_specimen_type -> sample_category 12/15/2022
             if sample['sample_category'] == "organ":
                 organ_uuid = sample['sample_uuid']
-                organ_type = organ_types_dict[sample['sample_organ']]['description'].lower()
+
+                organ_code = sample['sample_organ'].upper()
+                validate_organ_code(organ_code)
+
+                organ_type = organ_types_dict[organ_code].lower()
                 organ_hubmap_id = sample['sample_hubmap_id']
                 organ_submission_id = sample['sample_submission_id']
 
@@ -3548,10 +3518,7 @@ def get_sample_prov_info():
         internal_dict[HEADER_SAMPLE_HAS_METADATA] = sample_has_metadata
         internal_dict[HEADER_SAMPLE_HAS_RUI_INFO] = sample_has_rui_info
         internal_dict[HEADER_SAMPLE_DIRECT_ANCESTOR_ID] = sample['sample_ancestor_id']
-
-        # sample_specimen_type -> sample_category 12/15/2022
         internal_dict[HEADER_SAMPLE_TYPE] = sample['sample_category']
-
         internal_dict[HEADER_SAMPLE_HUBMAP_ID] = sample['sample_hubmap_id']
         internal_dict[HEADER_SAMPLE_SUBMISSION_ID] = sample['sample_submission_id']
         internal_dict[HEADER_SAMPLE_DIRECT_ANCESTOR_ENTITY_TYPE] = sample['sample_ancestor_entity']
@@ -3594,16 +3561,17 @@ def get_sample_prov_info():
 json
     an array of each unpublished dataset.
     fields: ("data_types", "donor_hubmap_id", "donor_submission_id", "hubmap_id", "organ", "organization", 
-             "provider_experiment_id", "uuid")
+             "provider_experiment_id", "uuid")  # TODO-integrate dataset_dataset_type to documentation.
 tsv
     a text/tab-seperated-value document including each unpublished dataset.
     fields: ("data_types", "donor_hubmap_id", "donor_submission_id", "hubmap_id", "organ", "organization", 
-             "provider_experiment_id", "uuid")
+             "provider_experiment_id", "uuid")  # TODO-integrate dataset_dataset_type to documentation.
 """
 @app.route('/datasets/unpublished', methods=['GET'])
 def unpublished():
     # String constraints
-    HEADER_DATA_TYPES = "data_types"
+    HEADER_DATA_TYPES = "data_types" # TODO-eliminate when HEADER_DATASET_TYPE is required
+    HEADER_DATASET_TYPE = 'dataset_type'
     HEADER_ORGANIZATION = "organization"
     HEADER_UUID = "uuid"
     HEADER_HUBMAP_ID = "hubmap_id"
@@ -3612,8 +3580,9 @@ def unpublished():
     HEADER_SUBMISSION_ID = "donor_submission_id"
     HEADER_PROVIDER_EXPERIMENT_ID = "provider_experiment_id"
 
+    # TODO-Eliminate HEADER_DATA_TYPES once HEADER_DATASET_TYPE is required.
     headers = [
-        HEADER_DATA_TYPES, HEADER_ORGANIZATION, HEADER_UUID, HEADER_HUBMAP_ID, HEADER_ORGAN, HEADER_DONOR_HUBMAP_ID,
+        HEADER_DATA_TYPES, HEADER_DATASET_TYPE, HEADER_ORGANIZATION, HEADER_UUID, HEADER_HUBMAP_ID, HEADER_ORGAN, HEADER_DONOR_HUBMAP_ID,
         HEADER_SUBMISSION_ID, HEADER_PROVIDER_EXPERIMENT_ID
     ]
 
@@ -4832,45 +4801,28 @@ def access_level_prefix_dir(dir_name):
 
 
 """
-Ensures that a given organ code matches what is found on the organ_types yaml document
+Ensures that a given organ code is 2-letter alphabetic and can be found int the UBKG ontology-api
 
 Parameters
 ----------
 organ_code : str
-
-Returns
--------
-Returns nothing. Raises bad_request_error is organ code not found on organ_types.yaml 
 """
 def validate_organ_code(organ_code):
-    yaml_file_url = SchemaConstants.ORGAN_TYPES_YAML
-
-    # Use Memcached to improve performance
-    response = schema_manager.make_request_get(yaml_file_url)
+    if not organ_code.isalpha() or not len(organ_code) == 2:
+        internal_server_error(f"Invalid organ code {organ_code}. Must be 2-letter alphabetic code")
 
-    if response.status_code == 200:
-        yaml_file = response.text
+    try:
+        organ_types_dict = schema_manager.get_organ_types()
 
-        try:
-            organ_types_dict = yaml.safe_load(response.text)
-            
-            if organ_code.upper() not in organ_types_dict:
-                bad_request_error(f"Invalid organ code. Must be 2 digit code specified {yaml_file_url}")
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-    else:
-        msg = f"Unable to fetch the: {yaml_file_url}"
+        if organ_code.upper() not in organ_types_dict:
+            not_found_error(f"Unable to find organ code {organ_code} via the ontology-api")
+    except requests.exceptions.RequestException:
+        msg = f"Failed to validate the organ code: {organ_code}"
         # Log the full stack trace, prepend a line with our message
         logger.exception(msg)
 
-        logger.debug("======validate_organ_code() status code======")
-        logger.debug(response.status_code)
-
-        logger.debug("======validate_organ_code() response text======")
-        logger.debug(response.text)
-
         # Terminate and let the users know
-        internal_server_error(f"Failed to validate the organ code: {organ_code}")
+        internal_server_error(msg)
 
 
 ####################################################################################################
diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
index 586edf45..87183315 100644
--- a/src/app_neo4j_queries.py
+++ b/src/app_neo4j_queries.py
@@ -736,7 +736,7 @@ def get_prov_info(neo4j_driver, param_dict, published_only):
              f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET"
              f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name,"
              f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, "
-             f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET, REVISIONS")
+             f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET, REVISIONS") # TODO replace ds.data_types with ds.dataset_type when required
 
     logger.info("======get_prov_info() query======")
     logger.info(query)
@@ -834,7 +834,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid):
              f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET"
              f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name,"
              f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, "
-             f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET")
+             f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET, ds.dataset_type")
     logger.info("======get_prov_info() query======")
     logger.info(query)
 
@@ -891,6 +891,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid):
                 node_dict = schema_neo4j_queries.node_to_dict(entry)
                 content_sixteen.append(node_dict)
             record_dict['processed_dataset'] = content_sixteen
+            record_dict['dataset_type'] = record_contents[17] if record_contents[17] is not None else ''
     return record_dict
 
 
diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example
index 0c55f5bd..839972dc 100644
--- a/src/instance/app.cfg.example
+++ b/src/instance/app.cfg.example
@@ -28,6 +28,10 @@ UUID_API_URL = 'http://uuid-api:8080'
 # Works regardless of the trailing slash
 INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'
 
+# URL for talking to Ontology API (default for DEV)
+# Works regardless of the trailing slash
+ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'
+
 # A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
 # Works regardless of the trailing slash /
 SEARCH_API_URL_LIST = ['http://search-api:8080']
diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
index 05174972..0e3bda8c 100644
--- a/src/schema/provenance_schema.yaml
+++ b/src/schema/provenance_schema.yaml
@@ -332,6 +332,14 @@ ENTITIES:
         type: list
         required_on_create: true # Only required for create via POST, not update via PUT
         description: "The data or assay types contained in this dataset as a json array of strings.  Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
+      dataset_type:
+        before_property_create_validators:
+          - validate_recognized_dataset_type
+        before_property_update_validators:
+          - validate_recognized_dataset_type
+        type: string
+        required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT
+        description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
       collections:
         type: list
         transient: true
@@ -901,33 +909,6 @@ ENTITIES:
           - validate_sample_category
         before_property_update_validators:
           - validate_sample_category
-
-      # No logner required on create, specimen_type -> sample_category 12/15/2022
-      specimen_type:
-        type: string
-        #required_on_create: true # Only required for create via POST, not update via PUT
-        description: "A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-        # Validate the given value against the definitions: https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml
-        # Disabled validation 12/15/2022
-        # before_property_create_validators:
-        #   - validate_specimen_type
-        # before_property_update_validators:
-        #   - validate_specimen_type
-      specimen_type_other:
-        type: string
-        description: "The user provided sample type if the 'other' sample_type is chosen."
-      
-
-      # specimen_type no logner required on create, will remove this field when removing specimen_type
-      # Simply always set to 'Unknown' and no need to update 12/15/2022
-      tissue_type:
-        type: string
-        generated: true # Can not be updated via the PUT
-        #auto_update: true # Will always update automatically if the entity gets updated
-        description: 'The type of the tissue based on the mapping between type (Block/Section/Suspension) and the specimen_type, default is Unknown'
-        before_create_trigger: set_tissue_type
-        #before_update_trigger: set_tissue_type
-
       portal_metadata_upload_files:
         type: json_string
         description: "A list of relative paths to metadata files"
@@ -940,6 +921,34 @@ ENTITIES:
         #todo: migrate to new attribute set as above portal_metadata_upload files
         type: json_string
         description: "A list of uploaded image files and descriptions of the files."
+      thumbnail_file:
+        generated: true
+        type: json_string
+        description: "The dataset thumbnail file detail. Stored in db as a stringfied json, e.g., {'filename': 'thumbnail.jpg', 'file_uuid': 'dadasdasdadda'}"
+        # The updated_peripherally tag is a temporary measure to correctly handle any attributes
+        # which are potentially updated by multiple triggers
+        updated_peripherally: true
+      thumbnail_file_to_add:
+        type: json_string 
+        transient: true
+        exposed: false
+        description: 'Just a temporary file id. Provide as a json object with an temp_file_id like {"temp_file_id":"dzevgd6xjs4d5grmcp4n"}'
+        before_create_trigger: commit_thumbnail_file
+        # This before_update_trigger with the same commit process can be used by ingest-api to update the dataset via PUT call
+        before_update_trigger: commit_thumbnail_file
+        # The updated_peripherally tag is a temporary measure to correctly handle any attributes
+        # which are potentially updated by multiple triggers
+        updated_peripherally: true
+      thumbnail_file_to_remove:
+        # This is only valid on update via a PUT request
+        type: string 
+        transient: true
+        exposed: false
+        description: 'The thumbnail image file previously uploaded to delete. Provide as a string of the file_uuid like: "232934234234234234234270c0ea6c51d604a850558ef2247d0b4"'
+        before_update_trigger: delete_thumbnail_file
+        # The updated_peripherally tag is a temporary measure to correctly handle any attributes
+        # which are potentially updated by multiple triggers
+        updated_peripherally: true
       # A user who is a member of multiple groups HAS to send in the group_uuid 
       group_uuid:
         type: string
@@ -953,7 +962,6 @@ ENTITIES:
         immutable: true
         description: "The displayname of globus group which the user who created this entity is a member of"
         before_create_trigger: set_group_name
-      # Should be required on create only when specimen_type==organ
       organ:
         type: string
         description: "Organ code specifier, only set if sample_type == organ.  Valid values found in: [organ types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml)"
diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
index 5e4ad332..6a471587 100644
--- a/src/schema/schema_constants.py
+++ b/src/schema/schema_constants.py
@@ -2,23 +2,20 @@
 class SchemaConstants(object):
     MEMCACHED_TTL = 7200
 
-    # Constants used by validators
     INGEST_API_APP = 'ingest-api'
     INGEST_PIPELINE_APP = 'ingest-pipeline'
     HUBMAP_APP_HEADER = 'X-Hubmap-Application'
     DATASET_STATUS_PUBLISHED = 'published'
 
-    # Used by triggers, all lowercase for easy comparision
     ACCESS_LEVEL_PUBLIC = 'public'
     ACCESS_LEVEL_CONSORTIUM = 'consortium'
     ACCESS_LEVEL_PROTECTED = 'protected'
 
-    # Yaml file to parse organ description
-    ORGAN_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/organ_types.yaml'
-    ASSAY_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/assay_types.yaml'
-
-    # For generating Sample.tissue_type
-    TISSUE_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml'
+    UUID_API_ID_ENDPOINT = '/uuid'
+    INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
+    INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
+    ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HUBMAP'
+    ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs/by-code?application_context=HUBMAP'
 
     DOI_BASE_URL = 'https://doi.org/'
 
diff --git a/src/schema/schema_errors.py b/src/schema/schema_errors.py
index 47690b52..90a900a0 100644
--- a/src/schema/schema_errors.py
+++ b/src/schema/schema_errors.py
@@ -1,4 +1,7 @@
 
+class UnimplementedValidatorException(Exception):
+    pass
+
 class SchemaValidationException(Exception):
     pass
 
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 0cafdecf..42b0a514 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -31,6 +31,7 @@
 _schema = None
 _uuid_api_url = None
 _ingest_api_url = None
+_ontology_api_url = None
 _auth_helper = None
 _neo4j_driver = None
 _memcached_client = None
@@ -50,9 +51,11 @@
 valid_yaml_file : file
     A valid yaml file
 uuid_api_url : str
-    The uuid-api URL
+    The uuid-api base URL
 ingest_api_url : str
-    The ingest-api URL
+    The ingest-api base URL
+ontology_api_url : str
+    The ontology-api base URL
 auth_helper_instance : AuthHelper
     The auth helper instance
 neo4j_driver_instance : neo4j_driver
@@ -65,6 +68,7 @@
 def initialize(valid_yaml_file, 
                uuid_api_url,
                ingest_api_url,
+               ontology_api_url,
                auth_helper_instance,
                neo4j_driver_instance,
                memcached_client_instance,
@@ -73,14 +77,33 @@ def initialize(valid_yaml_file,
     global _schema
     global _uuid_api_url
     global _ingest_api_url
+    global _ontology_api_url
     global _auth_helper
     global _neo4j_driver
     global _memcached_client
     global _memcached_prefix
 
     _schema = load_provenance_schema(valid_yaml_file)
-    _uuid_api_url = uuid_api_url
-    _ingest_api_url = ingest_api_url
+    if uuid_api_url is not None:
+        _uuid_api_url = uuid_api_url
+    else:
+        msg = f"Unable to initialize schema manager with uuid_api_url={uuid_api_url}."
+        logger.critical(msg=msg)
+        raise Exception(msg)
+
+    if ingest_api_url is not None:
+        _ingest_api_url = ingest_api_url
+    else:
+        msg = f"Unable to initialize schema manager with ingest_api_url={ingest_api_url}."
+        logger.critical(msg=msg)
+        raise Exception(msg)
+
+    if ontology_api_url is not None:
+        _ontology_api_url = ontology_api_url
+    else:
+        msg = f"Unable to initialize schema manager with ontology_api_url={ontology_api_url}."
+        logger.critical(msg=msg)
+        raise Exception(msg)
 
     # Get the helper instances
     _auth_helper = auth_helper_instance
@@ -839,7 +862,7 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque
             except schema_errors.InvalidApplicationHeaderException as e: 
                 raise schema_errors.InvalidApplicationHeaderException(e)
             except Exception:
-                msg = f"Failed to call the {validator_type} method: {validator_method_name} defiend for entity {normalized_entity_type}"
+                msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}"
                 # Log the full stack trace, prepend a line with our message
                 logger.exception(msg)
 
@@ -892,11 +915,16 @@ def execute_property_level_validators(validator_type, normalized_entity_type, re
                     raise schema_errors.InvalidApplicationHeaderException(e)
                 except ValueError as ve:
                     raise ValueError(ve)
-                except Exception as e:
+                except schema_errors.UnimplementedValidatorException as uve:
                     msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type} on property {key}"
                     # Log the full stack trace, prepend a line with our message
+                    logger.exception(f"{msg}. {str(uve)}")
+                    raise uve
+                except Exception as e:
+                    msg = f"Unexpected exception @TODO-KBKBKB calling {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type} on property {key}"
+                    # Log the full stack trace, prepend a line with our message
                     logger.exception(f"{msg}. {str(e)}")
-
+                    raise e
 
 """
 Get a list of entity types that can be used as derivation source in the schmea yaml
@@ -1206,7 +1234,7 @@ def get_user_info(request):
 def get_hubmap_ids(id):
     global _uuid_api_url
 
-    target_url = _uuid_api_url + '/uuid/' + id
+    target_url = _uuid_api_url + SchemaConstants.UUID_API_ID_ENDPOINT + '/' + id
 
     # Use Memcached to improve performance
     response = make_request_get(target_url, internal_token_used = True)
@@ -1234,6 +1262,86 @@ def get_hubmap_ids(id):
         raise requests.exceptions.RequestException(response.text)
 
 
+"""
+Helper function to use the Ontology API to retrieve a valueset from UBKG containing
+allowed values for soft assays, which can be set on the beginning of (part before
+square brackets containing anything) the Dataset dataset_type field.
+
+Examples of valid dataset_type values are "RNASeq" and "CODEX [cytokit, image_pyramid]" 
+
+Parameters
+----------
+N/A: This help encapsulates hard-coded strings for soft assay values from the HUBMAP
+     source vocabulary of UBKG.
+
+Returns
+-------
+List of String values for each element in the UBKG valueset for valid dataset_type soft assay entries.
+['Histology','Molecular Cartography',...]
+"""
+def get_dataset_type_valueset_list():
+    # Use the Ontology API to get JSON for allowed terms.
+    ubkg_valueset = get_valueset(parent_vocabulary_sab='HUBMAP'
+                                 ,parent_vocabulary_valueset_code='C003041'
+                                 ,value_preferred_vocabulary_sab='HUBMAP')
+    # Extract the term elements from the JSON into a list to be returned.
+    return [v['term'] for v in ubkg_valueset]
+
+"""
+Use the Ontology API valueset endpoint to retrieve the UBKG valueset for a particular
+"parent" vocabulary & term.  The preferred vocabulary which each "child" element of the valueset
+comes from is also specified.
+
+Parameters
+----------
+parent_vocabulary_sab: The source vocabulary (SAB) recognized by UBKG to which parent_vocabulary_valueset_code belongs.
+
+parent_vocabulary_valueset_code: A code from parent_vocabulary_sab which is the parent of all elements of the valueset.
+
+value_preferred_vocabulary_sab: The source vocabulary (SAB) preferred for each term in the dataset.  It is common, but
+not required, that parent_vocabulary_sab and value_preferred_vocabulary_sab are the same i.e. specify a parent code
+from the HUBMAP vocabulary and return terms from the HUBMAP vocabulary.
+@TODO-KBKBKB determine if it is advisable to check the "sab" element of each term dictionary the Ontology API returns or if UBKG assures coverage such that we would never get a "sab" element which did not match value_preferred_vocabulary_sab.
+
+Returns
+-------
+JSON response from the Ontology API, which is a list of dictionaries, each containing "code", "sab", and "term" elements.
+[
+    {"code": "C003047", "sab": "HUBMAP", "term": "Histology"},
+    {"code": "C003051", "sab": "HUBMAP", "term": "Molecular Cartography"},
+    ...
+]
+"""
+def get_valueset(parent_vocabulary_sab, parent_vocabulary_valueset_code, value_preferred_vocabulary_sab):
+    global _ontology_api_url
+
+    target_url = f"{_ontology_api_url}/valueset" \
+                 f"?parent_sab={parent_vocabulary_sab}" \
+                 f"&parent_code={parent_vocabulary_valueset_code}" \
+                 f"&child_sabs={value_preferred_vocabulary_sab}"
+
+    # Use Memcached to improve performance
+    response = make_request_get(target_url, internal_token_used = True)
+
+    # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
+    response.raise_for_status()
+
+    if response.status_code == 200:
+        return response.json()
+    else:
+        msg = f"Unable to make a request to query the UBKG via ontology-api: {target_url}"
+        # Log the full stack trace, prepend a line with our message
+        logger.exception(msg)
+
+        logger.debug("======get_valueset() status code from ontology-api======")
+        logger.debug(response.status_code)
+
+        logger.debug("======get_valueset() response text from ontology-api======")
+        logger.debug(response.text)
+
+        # Also bubble up the error message from uuid-api
+        raise requests.exceptions.RequestException(response.text)
+
 """
 Create a set of new ids for the new entity to be created
 
@@ -1351,7 +1459,6 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di
             parent_id = json_data_dict['direct_ancestor_uuid']
             json_to_post['parent_ids'] = [parent_id]
 
-            # specimen_type -> sample_category 12/15/2022
             # 'Sample.sample_category' is marked as `required_on_create` in the schema yaml
             if json_data_dict['sample_category'].lower() == 'organ':
                 # The 'organ' field containing the 2 digit organ code is required in this case
@@ -1369,7 +1476,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di
     logger.info(json_to_post)
 
     # Disable ssl certificate verification
-    target_url = _uuid_api_url + '/uuid'
+    target_url = _uuid_api_url + SchemaConstants.UUID_API_ID_ENDPOINT
     response = requests.post(url = target_url, headers = request_headers, json = json_to_post, verify = False, params = query_parms)
     
     # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
@@ -1768,6 +1875,114 @@ def delete_memcached_cache(uuids_list):
         logger.info(f"Deleted cache by key: {', '.join(cache_keys)}")
 
 
+"""
+Retrieve the organ types from ontology-api
+
+Returns
+-------
+dict
+    The available organ types in the following format:
+
+    {
+        "AO": "Aorta",
+        "BD": "Blood",
+        "BL": "Bladder",
+        "BM": "Bone Marrow",
+        "BR": "Brain",
+        "HT": "Heart",
+        ...
+    }
+"""
+def get_organ_types():
+    global _ontology_api_url
+
+    target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ORGAN_TYPES_ENDPOINT
+
+    # Use Memcached to improve performance
+    response = make_request_get(target_url, internal_token_used = True)
+
+    # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
+    response.raise_for_status()
+
+    if response.status_code == 200:
+        return response.json()
+    else:
+        # Log the full stack trace, prepend a line with our message
+        logger.exception("Unable to make a request to query the organ types via ontology-api")
+
+        logger.debug("======get_organ_types() status code from ontology-api======")
+        logger.debug(response.status_code)
+
+        logger.debug("======get_organ_types() response text from ontology-api======")
+        logger.debug(response.text)
+
+        # Also bubble up the error message from ontology-api
+        raise requests.exceptions.RequestException(response.text)
+
+
+"""
+Retrieve the assay types from ontology-api
+
+Returns
+-------
+dict
+    The available assay types by name in the following format:
+
+    {
+        "10x-multiome": {
+            "contains_pii": true,
+            "description": "10x Multiome",
+            "name": "10x-multiome",
+            "primary": true,
+            "vis_only": false,
+            "vitessce_hints": []
+        },
+        "AF": {
+            "contains_pii": false,
+            "description": "Autofluorescence Microscopy",
+            "name": "AF",
+            "primary": true,
+            "vis_only": false,
+            "vitessce_hints": []
+        },
+        ...
+    }
+"""
+def get_assay_types():
+    global _ontology_api_url
+
+    target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ASSAY_TYPES_ENDPOINT
+
+    # Use Memcached to improve performance
+    response = make_request_get(target_url, internal_token_used = True)
+
+    # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
+    response.raise_for_status()
+
+    if response.status_code == 200:
+        assay_types_by_name = {}
+        result_dict = response.json()
+
+        # Due to the json envelop being used int the json result
+        assay_types_list = result_dict['result']
+        for assay_type_dict in assay_types_list:
+            assay_types_by_name[assay_type_dict['name']] = assay_type_dict
+
+        return assay_types_by_name
+    else:
+        # Log the full stack trace, prepend a line with our message
+        logger.exception("Unable to make a request to query the assay types via ontology-api")
+
+        logger.debug("======get_assay_types() status code from ontology-api======")
+        logger.debug(response.status_code)
+
+        logger.debug("======get_assay_types() response text from ontology-api======")
+        logger.debug(response.text)
+
+        # Also bubble up the error message from ontology-api
+        raise requests.exceptions.RequestException(response.text)
+
+
 ####################################################################################################
 ## Internal functions
 ####################################################################################################
@@ -1794,4 +2009,4 @@ def _create_request_headers(user_token):
         auth_header_name: auth_scheme + ' ' + user_token
     }
 
-    return headers_dict
\ No newline at end of file
+    return headers_dict
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 8da148b3..231375b3 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -442,24 +442,8 @@ def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
     donor_metadata = None
 
     with neo4j_driver.session() as session:
-        # Old time-consuming single query, it takes a significant amounts of DB hits
-        # query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(d:Donor) "
-        #          f"WHERE e.uuid='{uuid}' AND s.specimen_type='organ' AND EXISTS(s.organ) "
-        #          f"RETURN s.organ AS organ_name, d.metadata AS donor_metadata")
-
-        # logger.info("======get_dataset_organ_and_donor_info() query======")
-        # logger.info(query)
-
-        # with neo4j_driver.session() as session:
-        #     record = session.read_transaction(execute_readonly_tx, query)
-
-        #     if record:
-        #         organ_name = record['organ_name']
-        #         donor_metadata = record['donor_metadata']
-
         # To improve the query performance, we implement the two-step queries to drastically reduce the DB hits
         sample_query = (f"MATCH (e:Dataset)<-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]-(s:Sample) "
-                        # specimen_type -> sample_category 12/15/2022
                         f"WHERE e.uuid='{uuid}' AND s.sample_category='organ' AND EXISTS(s.organ) "
                         f"RETURN DISTINCT s.organ AS organ_name, s.uuid AS sample_uuid")
 
@@ -473,7 +457,6 @@ def get_dataset_organ_and_donor_info(neo4j_driver, uuid):
             sample_uuid = sample_record['sample_uuid']
 
             donor_query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(d:Donor) "
-                           # specimen_type -> sample_category 12/15/2022
                            f"WHERE s.uuid='{sample_uuid}' AND s.sample_category='organ' AND EXISTS(s.organ) "
                            f"RETURN DISTINCT d.metadata AS donor_metadata")
 
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index a24f2e32..606e9af2 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -1024,9 +1024,10 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
     # Parse the organ description
     if organ_name is not None:
         try: 
-            # The organ_name is the two-letter code only set if specimen_type == 'organ'
+            # The organ_name is the two-letter code only set for 'organ'
             # Convert the two-letter code to a description
-            organ_desc = _get_organ_description(organ_name)
+            organ_types_dict = schema_manager.get_organ_types()
+            organ_desc = organ_types_dict[organ_name].lower()
         except (yaml.YAMLError, requests.exceptions.RequestException) as e:
             raise Exception(e)
 
@@ -1194,7 +1195,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da
             entity_uuid = existing_data_dict['uuid']
 
         # Commit the thumbnail file via ingest-api call
-        ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-commit'
+        ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_COMMIT_ENDPOINT
         
         # Example: {"temp_file_id":"dzevgd6xjs4d5grmcp4n"}
         thumbnail_file_dict = new_data_dict[property_key]
@@ -1296,7 +1297,7 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da
         file_info_dict = generated_dict[target_property_key]
     
     # Remove the thumbnail file via ingest-api call
-    ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-remove'
+    ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_REMOVE_ENDPOINT
 
     # ingest-api's /file-remove takes a list of files to remove
     # In this case, we only need to remove the single thumbnail file
@@ -1572,105 +1573,6 @@ def get_sample_direct_ancestor(property_key, normalized_type, user_token, existi
     return property_key, schema_manager.normalize_entity_result_for_response(direct_ancestor_dict)
 
 
-"""
-Trigger event method of generating the type of the tissue based on the mapping between type (Block/Section/Suspension) and the specimen_type
-This method applies to both the create and update triggers
-
-Rererence:
-    - https://docs.google.com/spreadsheets/d/1OODo8QK852txSNSmfIe0ua4A7nPFSgKq6h46grmrpto/edit#gid=0
-    - https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml
-
-Parameters
-----------
-property_key : str
-    The target property key of the value to be generated
-normalized_type : str
-    One of the types defined in the schema yaml: Sample
-user_token: str
-    The user's globus nexus token
-existing_data_dict : dict
-    A dictionary that contains all existing entity properties
-new_data_dict : dict
-    A merged dictionary that contains all possible input data to be used
-
-Returns
--------
-str: The target property key
-str: The type of the tissue
-"""
-def set_tissue_type(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
-    # specimen_type is no logner required on create 12/15/2022, set to Unknown
-    # Default to use 'Unknown'
-    tissue_type = 'Unknown'
-
-    # # The `specimen_type` field is required on entity creation via POST
-    # # thus should be available on existing entity update via PUT
-    # # We do a double check here just in case
-    # if ('specimen_type' not in new_data_dict) and ('specimen_type' not in existing_data_dict):
-    #     raise KeyError("Missing 'specimen_type' key in both 'new_data_dict' and 'existing_data_dict' during calling 'set_tissue_type()' trigger method.")
-
-    # # Always calculate the tissue_type value no matter new creation or update existing
-    # # The `specimen_type` field can be used in a PUT
-    # # But if it's not in the request JSON of a PUT, it must be in the existing data
-    # if 'specimen_type' in new_data_dict:
-    #     # The `specimen_type` value validation is handled in the `schema_validators.validate_specimen_type()`
-    #     # and that gets called before this trigger method
-    #     specimen_type = new_data_dict['specimen_type'].lower()
-    # else:
-    #     # Use lowercase in case someone manually updated the neo4j filed with incorrect case
-    #     specimen_type = existing_data_dict['specimen_type'].lower()
-
-    # # Categories: Block, Section, Suspension 
-    # block_category = [
-    #     'pbmc',
-    #     'biopsy',
-    #     'segment',
-    #     'ffpe_block',
-    #     'organ_piece',
-    #     'fresh_tissue',
-    #     'clarity_hydrogel',
-    #     'fixed_tissue_piece',
-    #     'fresh_frozen_tissue',
-    #     'fresh_frozen_oct_block',
-    #     'formalin_fixed_oct_block',
-    #     'pfa_fixed_frozen_oct_block',
-    #     'flash_frozen_liquid_nitrogen',
-    #     'frozen_cell_pellet_buffy_coat'
-    # ]
-
-    # section_category = [
-    #     'ffpe_slide',
-    #     'fixed_frozen_section_slide',
-    #     'fresh_frozen_section_slide',
-    #     'fresh_frozen_tissue_section',
-    #     'cryosections_curls_rnalater',
-    #     'cryosections_curls_from_fresh_frozen_oct'
-    # ]
-
-    # suspension_category = [
-    #     'gdna',
-    #     'serum',
-    #     'plasma',
-    #     'nuclei',
-    #     'protein',
-    #     'rna_total',
-    #     'cell_lysate',
-    #     'tissue_lysate',
-    #     'sequence_library',
-    #     'ran_poly_a_enriched',
-    #     'single_cell_cryopreserved'
-    # ]
-
-    # # Capitalized type, default is 'Unknown' if no match
-    # if specimen_type in block_category:
-    #     tissue_type = 'Block'
-    # elif specimen_type in section_category:
-    #     tissue_type = 'Section'
-    # elif specimen_type in suspension_category:
-    #     tissue_type = 'Suspension'
-
-    return property_key, tissue_type
-
 
 ####################################################################################################
 ## Trigger methods specific to Publication - DO NOT RENAME
@@ -2004,7 +1906,7 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token
             entity_uuid = existing_data_dict['uuid']
 
         # Commit the files via ingest-api call
-        ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-commit'
+        ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_COMMIT_ENDPOINT
 
         for file_info in new_data_dict[property_key]:
             temp_file_id = file_info['temp_file_id']
@@ -2114,7 +2016,7 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token
         file_uuids.append(file_uuid)
 
     # Remove the files via ingest-api call
-    ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-remove'
+    ingest_api_target_url = schema_manager.get_ingest_api_url() + SchemaConstants.INGEST_API_FILE_REMOVE_ENDPOINT
 
     json_to_post = {
         'entity_uuid': entity_uuid,
@@ -2153,39 +2055,10 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token
 str: The corresponding assay type description
 """
 def _get_assay_type_description(assay_type):
-    yaml_file_url = SchemaConstants.ASSAY_TYPES_YAML
-
-    # Use Memcached to improve performance
-    response = schema_manager.make_request_get(yaml_file_url)
-
-    if response.status_code == 200:
-        yaml_file = response.text
+    assay_types_dict = schema_manager.get_assay_types()
 
-        try:
-            assay_types_dict = yaml.safe_load(response.text)
-
-            if assay_type in assay_types_dict:
-                return assay_types_dict[assay_type]['description'].lower()
-            else:
-                # Check the 'alt-names' list if not found in the top-level keys
-                for key in assay_types_dict:
-                    if assay_type in assay_types_dict[key]['alt-names']:
-                        return assay_types_dict[key]['description'].lower()
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-    else:
-        msg = f"Unable to fetch the: {yaml_file_url}"
-        # Log the full stack trace, prepend a line with our message
-        logger.exception(msg)
-
-        logger.debug("======_get_assay_type_description() status code======")
-        logger.debug(response.status_code)
-
-        logger.debug("======_get_assay_type_description() response text======")
-        logger.debug(response.text)
-
-        # Also bubble up the error message
-        raise requests.exceptions.RequestException(response.text)
+    if assay_type in assay_types_dict:
+        return assay_types_dict[assay_type]['description'].lower()
 
 
 """
@@ -2226,46 +2099,3 @@ def _get_combined_assay_type_description(data_types):
 
     return assay_type_desc
 
-
-"""
-Get the organ description based on the given organ code
-
-Parameters
-----------
-organ_code : str
-    The two-letter organ code
-
-Returns
--------
-str: The organ code description
-"""
-def _get_organ_description(organ_code):
-    yaml_file_url = SchemaConstants.ORGAN_TYPES_YAML
-
-    # Use Memcached to improve performance
-    response = schema_manager.make_request_get(yaml_file_url)
-
-    if response.status_code == 200:
-        yaml_file = response.text
-
-        try:
-            organ_types_dict = yaml.safe_load(response.text)
-            return organ_types_dict[organ_code]['description'].lower()
-        except yaml.YAMLError as e:
-            raise yaml.YAMLError(e)
-    else:
-        msg = f"Unable to fetch the: {yaml_file_url}"
-        # Log the full stack trace, prepend a line with our message
-        logger.exception(msg)
-
-        logger.debug("======_get_organ_description() status code======")
-        logger.debug(response.status_code)
-
-        logger.debug("======_get_organ_description() response text======")
-        logger.debug(response.text)
-
-        # Also bubble up the error message
-        raise requests.exceptions.RequestException(response.text)
-
-
-
diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py
index 8c1269a5..336ec8f1 100644
--- a/src/schema/schema_validators.py
+++ b/src/schema/schema_validators.py
@@ -1,3 +1,5 @@
+import re
+
 import yaml
 import logging
 import requests
@@ -41,6 +43,36 @@ def validate_application_header_before_entity_create(normalized_entity_type, req
 ## Property Level Validators
 ####################################################################################################
 
+
+"""
+@TODO-KBKBKB redo doc...
+Validate the specified value for a Dataset's dataset_type is in the valueset UBKG recognizes. 
+
+Parameters
+----------
+property_key : str
+    The target property key
+normalized_type : str
+    Submission
+request: Flask request object
+    The instance of Flask request passed in from application request
+existing_data_dict : dict
+    A dictionary that contains all existing entity properties
+new_data_dict : dict
+    The json data in request body, already after the regular validations
+"""
+def validate_recognized_dataset_type(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
+    # If the proposed Dataset dataset_type ends with something in square brackets, anything inside
+    # those square brackets are acceptable at the end of the string.  Simply validate the start.
+    proposed_dataset_type_prefix = re.sub(pattern='[ ]*\[.*]$', repl='', string=new_data_dict['dataset_type'])
+    target_list = schema_manager.get_dataset_type_valueset_list()
+
+    if proposed_dataset_type_prefix not in target_list:
+        raise ValueError(f"Proposed Dataset dataset_type '{proposed_dataset_type_prefix}'"
+                         f" is not recognized in the existing ontology."
+                         f" Valid values are: {str(target_list)}.")
+
+
 """
 Validate the target list has no duplicated items
 
@@ -605,4 +637,3 @@ def _get_tissue_types():
 
         # Also bubble up the error message
         raise requests.exceptions.RequestException(response.text)
-
diff --git a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml
index 5a6739f7..ab8ec463 100644
--- a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml
+++ b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml
@@ -398,60 +398,6 @@ x-ref-components:
             - consortium
             - public
           description: "One of the values: public, consortium."
-        specimen_type:
-          type: string
-          enum:
-            - atacseq
-            - biopsy
-            - blood
-            - cell_lysate
-            - clarity_hydrogel
-            - codex
-            - cryosections_curls_from_fresh_frozen_oct
-            - cryosections_curls_rnalater
-            - ffpe_block
-            - ffpe_slide
-            - fixed_frozen_section_slide
-            - fixed_tissue_piece
-            - flash_frozen_liquid_nitrogen
-            - formalin_fixed_oct_block
-            - fresh_frozen_oct_block
-            - fresh_frozen_section_slide
-            - fresh_frozen_tissue
-            - fresh_frozen_tissue_section
-            - fresh_tissue
-            - frozen_cell_pellet_buffy_coat
-            - gdna
-            - module
-            - nuclei
-            - nuclei_rnalater
-            - organ
-            - organ_piece
-            - other
-            - pbmc
-            - pfa_fixed_frozen_oct_block
-            - plasma
-            - protein
-            - ran_poly_a_enriched
-            - rna_total
-            - rnalater_treated_and_stored
-            - rnaseq
-            - scatacseq
-            - scrnaseq
-            - segment
-            - seqfish
-            - sequence_library
-            - serum
-            - sequence_library
-            - single_cell_cryopreserved
-            - snatacseq
-            - snrnaseq
-            - tissue_lysate
-            - wgs
-          description: "A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-        specimen_type_other:
-          type: string
-          description: "The user provided sample type if the 'other' sample_type is chosen."
         protocol_url:
           type: string
           description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."
diff --git a/src/schema_templating/example-yaml-templates/sample-schema.yaml b/src/schema_templating/example-yaml-templates/sample-schema.yaml
index 1f5ee751..2b6f3f11 100644
--- a/src/schema_templating/example-yaml-templates/sample-schema.yaml
+++ b/src/schema_templating/example-yaml-templates/sample-schema.yaml
@@ -72,15 +72,6 @@ Sample:
         - consortium
         - public
       description: "One of the values: public, consortium."
-    specimen_type:
-      type: string
-      enum:
-        X-replace-enum-list:
-          enum-file-ref: https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml
-      description: "A code representing the type of specimen.  Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
-    specimen_type_other:
-      type: string
-      description: "The user provided sample type if the 'other' sample_type is chosen."
     protocol_url:
       type: string
       description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."