diff --git a/src/app.py b/src/app.py index 7878d726..ce39b5bc 100644 --- a/src/app.py +++ b/src/app.py @@ -60,6 +60,7 @@ # Remove trailing slash / from URL base to avoid "//" caused by config with trailing slash app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/') app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/') +app.config['ONTOLOGY_API_URL'] = app.config['ONTOLOGY_API_URL'].strip('/') app.config['SEARCH_API_URL_LIST'] = [url.strip('/') for url in app.config['SEARCH_API_URL_LIST']] # This mode when set True disables the PUT and POST calls, used on STAGE to make entity-api READ-ONLY @@ -198,6 +199,7 @@ def http_internal_server_error(e): schema_manager.initialize(app.config['SCHEMA_YAML_FILE'], app.config['UUID_API_URL'], app.config['INGEST_API_URL'], + app.config['ONTOLOGY_API_URL'], auth_helper_instance, neo4j_driver_instance, memcached_client_instance, @@ -2623,26 +2625,12 @@ def get_prov_info(): # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description # because that would require using a urllib request for each dataset - response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML) - - if response.status_code == 200: - yaml_file = response.text - try: - organ_types_dict = yaml.safe_load(yaml_file) - except yaml.YAMLError as e: - raise yaml.YAMLError(e) + organ_types_dict = schema_manager.get_organ_types() # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids # having to access the resource for every dataset. - response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML) - - if response.status_code == 200: - yaml_file = response.text - try: - assay_types_dict = yaml.safe_load(yaml_file) - except yaml.YAMLError as e: - raise yaml.YAMLError(e) - + assay_types_dict = schema_manager.get_assay_types() + # Processing and validating query parameters accepted_arguments = ['format', 'organ', 'has_rui_info', 'dataset_status', 'group_uuid'] return_json = False @@ -3007,25 +2995,11 @@ def get_prov_info_for_dataset(id): # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description # because that would require using a urllib request for each dataset - response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML) - - if response.status_code == 200: - yaml_file = response.text - try: - organ_types_dict = yaml.safe_load(yaml_file) - except yaml.YAMLError as e: - raise yaml.YAMLError(e) + organ_types_dict = schema_manager.get_organ_types() # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids # having to access the resource for every dataset. - response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML) - - if response.status_code == 200: - yaml_file = response.text - try: - assay_types_dict = yaml.safe_load(yaml_file) - except yaml.YAMLError as e: - raise yaml.YAMLError(e) + assay_types_dict = schema_manager.get_assay_types() hubmap_ids = schema_manager.get_hubmap_ids(id) @@ -3251,25 +3225,11 @@ def sankey_data(): mapping_dict = json.load(f) # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description # because that would require using a urllib request for each dataset - response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML) - - if response.status_code == 200: - yaml_file = response.text - try: - organ_types_dict = yaml.safe_load(yaml_file) - except yaml.YAMLError as e: - raise yaml.YAMLError(e) + organ_types_dict = schema_manager.get_organ_types() # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids # having to access the resource for every dataset. - response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML) - - if response.status_code == 200: - yaml_file = response.text - try: - assay_types_dict = yaml.safe_load(yaml_file) - except yaml.YAMLError as e: - raise yaml.YAMLError(e) + assay_types_dict = schema_manager.get_assay_types() # Instantiation of the list dataset_sankey_list dataset_sankey_list = [] @@ -3377,14 +3337,16 @@ def get_sample_prov_info(): # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description # because that would require using a urllib request for each dataset - response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML) + # response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML) - if response.status_code == 200: - yaml_file = response.text - try: - organ_types_dict = yaml.safe_load(yaml_file) - except yaml.YAMLError as e: - raise yaml.YAMLError(e) + # if response.status_code == 200: + # yaml_file = response.text + # try: + # organ_types_dict = yaml.safe_load(yaml_file) + # except yaml.YAMLError as e: + # raise yaml.YAMLError(e) + + organ_types_dict = schema_manager.get_organ_types() # Processing and validating query parameters accepted_arguments = ['group_uuid'] @@ -4744,34 +4706,18 @@ def access_level_prefix_dir(dir_name): Returns nothing. Raises bad_request_error is organ code not found on organ_types.yaml """ def validate_organ_code(organ_code): - yaml_file_url = SchemaConstants.ORGAN_TYPES_YAML - - # Use Memcached to improve performance - response = schema_manager.make_request_get(yaml_file_url) - - if response.status_code == 200: - yaml_file = response.text + try: + organ_types_dict = schema_manager.get_organ_types() - try: - organ_types_dict = yaml.safe_load(response.text) - - if organ_code.upper() not in organ_types_dict: - bad_request_error(f"Invalid organ code. Must be 2 digit code specified {yaml_file_url}") - except yaml.YAMLError as e: - raise yaml.YAMLError(e) - else: - msg = f"Unable to fetch the: {yaml_file_url}" + if organ_code.upper() not in organ_types_dict: + bad_request_error(f"Invalid organ code. Must be 2 digit code") + except: + msg = f"Failed to validate the organ code: {organ_code}" # Log the full stack trace, prepend a line with our message logger.exception(msg) - logger.debug("======validate_organ_code() status code======") - logger.debug(response.status_code) - - logger.debug("======validate_organ_code() response text======") - logger.debug(response.text) - # Terminate and let the users know - internal_server_error(f"Failed to validate the organ code: {organ_code}") + internal_server_error(msg) #################################################################################################### diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 0c55f5bd..839972dc 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -28,6 +28,10 @@ UUID_API_URL = 'http://uuid-api:8080' # Works regardless of the trailing slash INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org' +# URL for talking to Ontology API (default for DEV) +# Works regardless of the trailing slash +ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org' + # A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed) # Works regardless of the trailing slash / SEARCH_API_URL_LIST = ['http://search-api:8080'] diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py index 5e4ad332..b426827c 100644 --- a/src/schema/schema_constants.py +++ b/src/schema/schema_constants.py @@ -13,9 +13,11 @@ class SchemaConstants(object): ACCESS_LEVEL_CONSORTIUM = 'consortium' ACCESS_LEVEL_PROTECTED = 'protected' - # Yaml file to parse organ description - ORGAN_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/organ_types.yaml' - ASSAY_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/assay_types.yaml' + UUID_API_ID_ENDPOINT = '/uuid' + INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit' + INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove' + ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HuBMAP' + ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs?application_context=HuBMAP' # For generating Sample.tissue_type TISSUE_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml' diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 2386b013..5018e567 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -50,9 +50,11 @@ valid_yaml_file : file A valid yaml file uuid_api_url : str - The uuid-api URL + The uuid-api base URL ingest_api_url : str - The ingest-api URL + The ingest-api base URL +ontology_api_url : str + The ontology-api base URL auth_helper_instance : AuthHelper The auth helper instance neo4j_driver_instance : neo4j_driver @@ -65,6 +67,7 @@ def initialize(valid_yaml_file, uuid_api_url, ingest_api_url, + ontology_api_url, auth_helper_instance, neo4j_driver_instance, memcached_client_instance, @@ -73,6 +76,7 @@ def initialize(valid_yaml_file, global _schema global _uuid_api_url global _ingest_api_url + global _ontology_api_url global _auth_helper global _neo4j_driver global _memcached_client @@ -81,6 +85,7 @@ def initialize(valid_yaml_file, _schema = load_provenance_schema(valid_yaml_file) _uuid_api_url = uuid_api_url _ingest_api_url = ingest_api_url + _ontology_api_url = ontology_api_url # Get the helper instances _auth_helper = auth_helper_instance @@ -1202,7 +1207,7 @@ def get_user_info(request): def get_hubmap_ids(id): global _uuid_api_url - target_url = _uuid_api_url + '/uuid/' + id + target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT + '/' + id # Use Memcached to improve performance response = make_request_get(target_url, internal_token_used = True) @@ -1365,7 +1370,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di logger.info(json_to_post) # Disable ssl certificate verification - target_url = _uuid_api_url + '/uuid' + target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT response = requests.post(url = target_url, headers = request_headers, json = json_to_post, verify = False, params = query_parms) # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes @@ -1764,6 +1769,84 @@ def delete_memcached_cache(uuids_list): logger.info(f"Deleted cache by key: {', '.join(cache_keys)}") +""" +Retrive the organ types from ontology-api + +Returns +------- +dict + The available organ types +""" +def get_organ_types(): + global _ontology_api_url + + target_url = _ontology_api_url + '/organs?application_context=HuBMAP' + + # Use Memcached to improve performance + response = make_request_get(target_url, internal_token_used = True) + + # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes + response.raise_for_status() + + if response.status_code == 200: + ids_dict = response.json() + return ids_dict + else: + # uuid-api will also return 400 if the given id is invalid + # We'll just hanle that and all other cases all together here + msg = f"Unable to make a request to query the id via uuid-api: {id}" + # Log the full stack trace, prepend a line with our message + logger.exception(msg) + + logger.debug("======get_organ_types() status code from ontology-api======") + logger.debug(response.status_code) + + logger.debug("======get_organ_types() response text from ontology-api======") + logger.debug(response.text) + + # Also bubble up the error message from ontology-api + raise requests.exceptions.RequestException(response.text) + + +""" +Retrive the assay types from ontology-api + +Returns +------- +dict + The available assay types +""" +def get_assay_types(): + global _ontology_api_url + + target_url = _ontology_api_url + '/assaytype?application_context=HuBMAP' + + # Use Memcached to improve performance + response = make_request_get(target_url, internal_token_used = True) + + # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes + response.raise_for_status() + + if response.status_code == 200: + ids_dict = response.json() + return ids_dict + else: + # uuid-api will also return 400 if the given id is invalid + # We'll just hanle that and all other cases all together here + msg = f"Unable to make a request to query the id via uuid-api: {id}" + # Log the full stack trace, prepend a line with our message + logger.exception(msg) + + logger.debug("======get_assay_types() status code from ontology-api======") + logger.debug(response.status_code) + + logger.debug("======get_assay_types() response text from ontology-api======") + logger.debug(response.text) + + # Also bubble up the error message from ontology-api + raise requests.exceptions.RequestException(response.text) + + #################################################################################################### ## Internal functions #################################################################################################### diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 38cf61dc..d831670a 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1184,7 +1184,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da entity_uuid = existing_data_dict['uuid'] # Commit the thumbnail file via ingest-api call - ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-commit' + ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_COMMIT_ENDPOINT # Example: {"temp_file_id":"dzevgd6xjs4d5grmcp4n"} thumbnail_file_dict = new_data_dict[property_key] @@ -1286,7 +1286,7 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da file_info_dict = generated_dict[target_property_key] # Remove the thumbnail file via ingest-api call - ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-remove' + ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_REMOVE_ENDPOINT # ingest-api's /file-remove takes a list of files to remove # In this case, we only need to remove the single thumbnail file @@ -1994,7 +1994,7 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token entity_uuid = existing_data_dict['uuid'] # Commit the files via ingest-api call - ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-commit' + ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_COMMIT_ENDPOINT for file_info in new_data_dict[property_key]: temp_file_id = file_info['temp_file_id'] @@ -2104,7 +2104,7 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token file_uuids.append(file_uuid) # Remove the files via ingest-api call - ingest_api_target_url = schema_manager.get_ingest_api_url() + '/file-remove' + ingest_api_target_url = schema_manager.get_ingest_api_url() + schema_constants.INGEST_API_FILE_REMOVE_ENDPOINT json_to_post = { 'entity_uuid': entity_uuid, @@ -2143,39 +2143,10 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token str: The corresponding assay type description """ def _get_assay_type_description(assay_type): - yaml_file_url = SchemaConstants.ASSAY_TYPES_YAML + assay_types_dict = schema_manager.get_assay_types() - # Use Memcached to improve performance - response = schema_manager.make_request_get(yaml_file_url) - - if response.status_code == 200: - yaml_file = response.text - - try: - assay_types_dict = yaml.safe_load(response.text) - - if assay_type in assay_types_dict: - return assay_types_dict[assay_type]['description'].lower() - else: - # Check the 'alt-names' list if not found in the top-level keys - for key in assay_types_dict: - if assay_type in assay_types_dict[key]['alt-names']: - return assay_types_dict[key]['description'].lower() - except yaml.YAMLError as e: - raise yaml.YAMLError(e) - else: - msg = f"Unable to fetch the: {yaml_file_url}" - # Log the full stack trace, prepend a line with our message - logger.exception(msg) - - logger.debug("======_get_assay_type_description() status code======") - logger.debug(response.status_code) - - logger.debug("======_get_assay_type_description() response text======") - logger.debug(response.text) - - # Also bubble up the error message - raise requests.exceptions.RequestException(response.text) + if assay_type in assay_types_dict: + return assay_types_dict[assay_type]['description'].lower() """ @@ -2230,32 +2201,7 @@ def _get_combined_assay_type_description(data_types): str: The organ code description """ def _get_organ_description(organ_code): - yaml_file_url = SchemaConstants.ORGAN_TYPES_YAML - - # Use Memcached to improve performance - response = schema_manager.make_request_get(yaml_file_url) - - if response.status_code == 200: - yaml_file = response.text - - try: - organ_types_dict = yaml.safe_load(response.text) - return organ_types_dict[organ_code]['description'].lower() - except yaml.YAMLError as e: - raise yaml.YAMLError(e) - else: - msg = f"Unable to fetch the: {yaml_file_url}" - # Log the full stack trace, prepend a line with our message - logger.exception(msg) - - logger.debug("======_get_organ_description() status code======") - logger.debug(response.status_code) - - logger.debug("======_get_organ_description() response text======") - logger.debug(response.text) - - # Also bubble up the error message - raise requests.exceptions.RequestException(response.text) - + organ_types_dict = schema_manager.get_organ_types() + return organ_types_dict[organ_code]['description'].lower()