Skip to content

Commit

Permalink
Replace oragns and assaytypes yamls with ontology-api calls
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanzhou committed Nov 10, 2023
1 parent cd3b08d commit 8be60a6
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 149 deletions.
104 changes: 25 additions & 79 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
# Remove trailing slash / from URL base to avoid "//" caused by config with trailing slash
app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/')
app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/')
app.config['ONTOLOGY_API_URL'] = app.config['ONTOLOGY_API_URL'].strip('/')
app.config['SEARCH_API_URL_LIST'] = [url.strip('/') for url in app.config['SEARCH_API_URL_LIST']]

# This mode when set True disables the PUT and POST calls, used on STAGE to make entity-api READ-ONLY
Expand Down Expand Up @@ -198,6 +199,7 @@ def http_internal_server_error(e):
schema_manager.initialize(app.config['SCHEMA_YAML_FILE'],
app.config['UUID_API_URL'],
app.config['INGEST_API_URL'],
app.config['ONTOLOGY_API_URL'],
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand Down Expand Up @@ -2623,26 +2625,12 @@ def get_prov_info():

# Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
# because that would require using a urllib request for each dataset
response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)

if response.status_code == 200:
yaml_file = response.text
try:
organ_types_dict = yaml.safe_load(yaml_file)
except yaml.YAMLError as e:
raise yaml.YAMLError(e)
organ_types_dict = schema_manager.get_organ_types()

# As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
# having to access the resource for every dataset.
response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)

if response.status_code == 200:
yaml_file = response.text
try:
assay_types_dict = yaml.safe_load(yaml_file)
except yaml.YAMLError as e:
raise yaml.YAMLError(e)

assay_types_dict = schema_manager.get_assay_types()

# Processing and validating query parameters
accepted_arguments = ['format', 'organ', 'has_rui_info', 'dataset_status', 'group_uuid']
return_json = False
Expand Down Expand Up @@ -3007,25 +2995,11 @@ def get_prov_info_for_dataset(id):

# Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
# because that would require using a urllib request for each dataset
response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)

if response.status_code == 200:
yaml_file = response.text
try:
organ_types_dict = yaml.safe_load(yaml_file)
except yaml.YAMLError as e:
raise yaml.YAMLError(e)
organ_types_dict = schema_manager.get_organ_types()

# As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
# having to access the resource for every dataset.
response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)

if response.status_code == 200:
yaml_file = response.text
try:
assay_types_dict = yaml.safe_load(yaml_file)
except yaml.YAMLError as e:
raise yaml.YAMLError(e)
assay_types_dict = schema_manager.get_assay_types()

hubmap_ids = schema_manager.get_hubmap_ids(id)

Expand Down Expand Up @@ -3251,25 +3225,11 @@ def sankey_data():
mapping_dict = json.load(f)
# Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
# because that would require using a urllib request for each dataset
response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)

if response.status_code == 200:
yaml_file = response.text
try:
organ_types_dict = yaml.safe_load(yaml_file)
except yaml.YAMLError as e:
raise yaml.YAMLError(e)
organ_types_dict = schema_manager.get_organ_types()

# As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
# having to access the resource for every dataset.
response = schema_manager.make_request_get(SchemaConstants.ASSAY_TYPES_YAML)

if response.status_code == 200:
yaml_file = response.text
try:
assay_types_dict = yaml.safe_load(yaml_file)
except yaml.YAMLError as e:
raise yaml.YAMLError(e)
assay_types_dict = schema_manager.get_assay_types()

# Instantiation of the list dataset_sankey_list
dataset_sankey_list = []
Expand Down Expand Up @@ -3377,14 +3337,16 @@ def get_sample_prov_info():

# Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
# because that would require using a urllib request for each dataset
response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)
# response = schema_manager.make_request_get(SchemaConstants.ORGAN_TYPES_YAML)

if response.status_code == 200:
yaml_file = response.text
try:
organ_types_dict = yaml.safe_load(yaml_file)
except yaml.YAMLError as e:
raise yaml.YAMLError(e)
# if response.status_code == 200:
# yaml_file = response.text
# try:
# organ_types_dict = yaml.safe_load(yaml_file)
# except yaml.YAMLError as e:
# raise yaml.YAMLError(e)

organ_types_dict = schema_manager.get_organ_types()

# Processing and validating query parameters
accepted_arguments = ['group_uuid']
Expand Down Expand Up @@ -4744,34 +4706,18 @@ def access_level_prefix_dir(dir_name):
Returns nothing. Raises bad_request_error is organ code not found on organ_types.yaml
"""
def validate_organ_code(organ_code):
yaml_file_url = SchemaConstants.ORGAN_TYPES_YAML

# Use Memcached to improve performance
response = schema_manager.make_request_get(yaml_file_url)

if response.status_code == 200:
yaml_file = response.text
try:
organ_types_dict = schema_manager.get_organ_types()

try:
organ_types_dict = yaml.safe_load(response.text)

if organ_code.upper() not in organ_types_dict:
bad_request_error(f"Invalid organ code. Must be 2 digit code specified {yaml_file_url}")
except yaml.YAMLError as e:
raise yaml.YAMLError(e)
else:
msg = f"Unable to fetch the: {yaml_file_url}"
if organ_code.upper() not in organ_types_dict:
bad_request_error(f"Invalid organ code. Must be 2 digit code")
except:
msg = f"Failed to validate the organ code: {organ_code}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)

logger.debug("======validate_organ_code() status code======")
logger.debug(response.status_code)

logger.debug("======validate_organ_code() response text======")
logger.debug(response.text)

# Terminate and let the users know
internal_server_error(f"Failed to validate the organ code: {organ_code}")
internal_server_error(msg)


####################################################################################################
Expand Down
4 changes: 4 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ UUID_API_URL = 'http://uuid-api:8080'
# Works regardless of the trailing slash
INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'

# URL for talking to Ontology API (default for DEV)
# Works regardless of the trailing slash
ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'

# A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
# Works regardless of the trailing slash /
SEARCH_API_URL_LIST = ['http://search-api:8080']
Expand Down
8 changes: 5 additions & 3 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ class SchemaConstants(object):
ACCESS_LEVEL_CONSORTIUM = 'consortium'
ACCESS_LEVEL_PROTECTED = 'protected'

# Yaml file to parse organ description
ORGAN_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/organ_types.yaml'
ASSAY_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/assay_types.yaml'
UUID_API_ID_ENDPOINT = '/uuid'
INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HuBMAP'
ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs?application_context=HuBMAP'

# For generating Sample.tissue_type
TISSUE_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml'
Expand Down
91 changes: 87 additions & 4 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@
valid_yaml_file : file
A valid yaml file
uuid_api_url : str
The uuid-api URL
The uuid-api base URL
ingest_api_url : str
The ingest-api URL
The ingest-api base URL
ontology_api_url : str
The ontology-api base URL
auth_helper_instance : AuthHelper
The auth helper instance
neo4j_driver_instance : neo4j_driver
Expand All @@ -65,6 +67,7 @@
def initialize(valid_yaml_file,
uuid_api_url,
ingest_api_url,
ontology_api_url,
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand All @@ -73,6 +76,7 @@ def initialize(valid_yaml_file,
global _schema
global _uuid_api_url
global _ingest_api_url
global _ontology_api_url
global _auth_helper
global _neo4j_driver
global _memcached_client
Expand All @@ -81,6 +85,7 @@ def initialize(valid_yaml_file,
_schema = load_provenance_schema(valid_yaml_file)
_uuid_api_url = uuid_api_url
_ingest_api_url = ingest_api_url
_ontology_api_url = ontology_api_url

# Get the helper instances
_auth_helper = auth_helper_instance
Expand Down Expand Up @@ -1202,7 +1207,7 @@ def get_user_info(request):
def get_hubmap_ids(id):
global _uuid_api_url

target_url = _uuid_api_url + '/uuid/' + id
target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT + '/' + id

# Use Memcached to improve performance
response = make_request_get(target_url, internal_token_used = True)
Expand Down Expand Up @@ -1365,7 +1370,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di
logger.info(json_to_post)

# Disable ssl certificate verification
target_url = _uuid_api_url + '/uuid'
target_url = _uuid_api_url + schema_constants.UUID_API_ID_ENDPOINT
response = requests.post(url = target_url, headers = request_headers, json = json_to_post, verify = False, params = query_parms)

# Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
Expand Down Expand Up @@ -1764,6 +1769,84 @@ def delete_memcached_cache(uuids_list):
logger.info(f"Deleted cache by key: {', '.join(cache_keys)}")


"""
Retrive the organ types from ontology-api
Returns
-------
dict
The available organ types
"""
def get_organ_types():
global _ontology_api_url

target_url = _ontology_api_url + '/organs?application_context=HuBMAP'

# Use Memcached to improve performance
response = make_request_get(target_url, internal_token_used = True)

# Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
response.raise_for_status()

if response.status_code == 200:
ids_dict = response.json()
return ids_dict
else:
# uuid-api will also return 400 if the given id is invalid
# We'll just hanle that and all other cases all together here
msg = f"Unable to make a request to query the id via uuid-api: {id}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)

logger.debug("======get_organ_types() status code from ontology-api======")
logger.debug(response.status_code)

logger.debug("======get_organ_types() response text from ontology-api======")
logger.debug(response.text)

# Also bubble up the error message from ontology-api
raise requests.exceptions.RequestException(response.text)


"""
Retrive the assay types from ontology-api
Returns
-------
dict
The available assay types
"""
def get_assay_types():
global _ontology_api_url

target_url = _ontology_api_url + '/assaytype?application_context=HuBMAP'

# Use Memcached to improve performance
response = make_request_get(target_url, internal_token_used = True)

# Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
response.raise_for_status()

if response.status_code == 200:
ids_dict = response.json()
return ids_dict
else:
# uuid-api will also return 400 if the given id is invalid
# We'll just hanle that and all other cases all together here
msg = f"Unable to make a request to query the id via uuid-api: {id}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)

logger.debug("======get_assay_types() status code from ontology-api======")
logger.debug(response.status_code)

logger.debug("======get_assay_types() response text from ontology-api======")
logger.debug(response.text)

# Also bubble up the error message from ontology-api
raise requests.exceptions.RequestException(response.text)


####################################################################################################
## Internal functions
####################################################################################################
Expand Down
Loading

0 comments on commit 8be60a6

Please sign in to comment.