Skip to content

Commit

Permalink
Merge pull request #571 from hubmapconsortium/yuanzhou/yaml-to-ubkg
Browse files Browse the repository at this point in the history
Yuanzhou/yaml to ubkg
  • Loading branch information
yuanzhou authored Nov 28, 2023
2 parents 61f9660 + ab37ce0 commit 6f59c9c
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 69 deletions.
45 changes: 15 additions & 30 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2794,17 +2794,12 @@ def get_prov_info():
for item in dataset['data_types']:
try:
assay_description_list.append(assay_types_dict[item]['description'])
# Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
# In these cases, we have to search each assay type and see if the given code matches any alternate names.
except KeyError:
valid_key = False
for each in assay_types_dict:
if valid_key is False:
if item in assay_types_dict[each]['alt-names']:
assay_description_list.append(assay_types_dict[each]['description'])
valid_key = True
if valid_key is False:
assay_description_list.append(item)
logger.exception(f"Data type {item} not found in resulting assay types via ontology-api")

# Just use the data type value
assay_description_list.append(item)

dataset['data_types'] = assay_description_list
internal_dict[HEADER_DATASET_DATA_TYPES] = dataset['data_types']

Expand Down Expand Up @@ -3116,17 +3111,12 @@ def get_prov_info_for_dataset(id):
for item in dataset['data_types']:
try:
assay_description_list.append(assay_types_dict[item]['description'])
# Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
# In these cases, we have to search each assay type and see if the given code matches any alternate names.
except KeyError:
valid_key = False
for each in assay_types_dict:
if valid_key is False:
if item in assay_types_dict[each]['alt-names']:
assay_description_list.append(assay_types_dict[each]['description'])
valid_key = True
if valid_key is False:
assay_description_list.append(item)
logger.exception(f"Data type {item} not found in resulting assay types via ontology-api")

# Just use the data type value
assay_description_list.append(item)

dataset['data_types'] = assay_description_list
internal_dict[HEADER_DATASET_DATA_TYPES] = dataset['data_types']
if return_json is False:
Expand Down Expand Up @@ -3342,17 +3332,12 @@ def sankey_data():
assay_description = ""
try:
assay_description = assay_types_dict[dataset[HEADER_DATASET_DATA_TYPES]]['description']
# Some data types aren't given by their code in the assay types yaml and are instead given as an alt name.
# In these cases, we have to search each assay type and see if the given code matches any alternate names.
except KeyError:
valid_key = False
for each in assay_types_dict:
if valid_key is False:
if dataset[HEADER_DATASET_DATA_TYPES] in assay_types_dict[each]['alt-names']:
assay_description = assay_types_dict[each]['description']
valid_key = True
if valid_key is False:
assay_description = dataset[HEADER_DATASET_DATA_TYPES]
logger.exception(f"Data type {dataset[HEADER_DATASET_DATA_TYPES]} not found in resulting assay types via ontology-api")

# Just use the data type value
assay_description = dataset[HEADER_DATASET_DATA_TYPES]

internal_dict[HEADER_DATASET_DATA_TYPES] = assay_description

# Replace applicable Group Name and Data type with the value needed for the sankey via the mapping_dict
Expand Down
4 changes: 2 additions & 2 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class SchemaConstants(object):
UUID_API_ID_ENDPOINT = '/uuid'
INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HuBMAP'
ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs?application_context=HuBMAP'
ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HUBMAP'
ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs/by-code?application_context=HUBMAP'

DOI_BASE_URL = 'https://doi.org/'

Expand Down
64 changes: 47 additions & 17 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1778,12 +1778,22 @@ def delete_memcached_cache(uuids_list):
Returns
-------
dict
The available organ types
The available organ types in the following format:
{
"AO": "Aorta",
"BD": "Blood",
"BL": "Bladder",
"BM": "Bone Marrow",
"BR": "Brain",
"HT": "Heart",
...
}
"""
def get_organ_types():
global _ontology_api_url

target_url = _ontology_api_url + '/organs?application_context=HuBMAP'
target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ORGAN_TYPES_ENDPOINT

# Use Memcached to improve performance
response = make_request_get(target_url, internal_token_used = True)
Expand All @@ -1792,14 +1802,10 @@ def get_organ_types():
response.raise_for_status()

if response.status_code == 200:
ids_dict = response.json()
return ids_dict
return response.json()
else:
# uuid-api will also return 400 if the given id is invalid
# We'll just hanle that and all other cases all together here
msg = f"Unable to make a request to query the id via uuid-api: {id}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
logger.exception("Unable to make a request to query the organ types via ontology-api")

logger.debug("======get_organ_types() status code from ontology-api======")
logger.debug(response.status_code)
Expand All @@ -1817,12 +1823,32 @@ def get_organ_types():
Returns
-------
dict
The available assay types
The available assay types by name in the following format:
{
"10x-multiome": {
"contains_pii": true,
"description": "10x Multiome",
"name": "10x-multiome",
"primary": true,
"vis_only": false,
"vitessce_hints": []
},
"AF": {
"contains_pii": false,
"description": "Autofluorescence Microscopy",
"name": "AF",
"primary": true,
"vis_only": false,
"vitessce_hints": []
},
...
}
"""
def get_assay_types():
global _ontology_api_url

target_url = _ontology_api_url + '/assaytype?application_context=HuBMAP'
target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ASSAY_TYPES_ENDPOINT

# Use Memcached to improve performance
response = make_request_get(target_url, internal_token_used = True)
Expand All @@ -1831,14 +1857,18 @@ def get_assay_types():
response.raise_for_status()

if response.status_code == 200:
ids_dict = response.json()
return ids_dict
assay_types_by_name = {}
result_dict = response.json()

# Due to the json envelop being used int the json result
assay_types_list = result_dict['result']
for assay_type_dict in assay_types_list:
assay_types_by_name[assay_type_dict['name']] = assay_type_dict

return assay_types_by_name
else:
# uuid-api will also return 400 if the given id is invalid
# We'll just hanle that and all other cases all together here
msg = f"Unable to make a request to query the id via uuid-api: {id}"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
logger.exception("Unable to make a request to query the assay types via ontology-api")

logger.debug("======get_assay_types() status code from ontology-api======")
logger.debug(response.status_code)
Expand Down Expand Up @@ -1876,4 +1906,4 @@ def _create_request_headers(user_token):
auth_header_name: auth_scheme + ' ' + user_token
}

return headers_dict
return headers_dict
23 changes: 3 additions & 20 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,9 +1024,10 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
# Parse the organ description
if organ_name is not None:
try:
# The organ_name is the two-letter code only set if specimen_type == 'organ'
# The organ_name is the two-letter code only set for 'organ'
# Convert the two-letter code to a description
organ_desc = _get_organ_description(organ_name)
organ_types_dict = schema_manager.get_organ_types()
organ_desc = organ_types_dict[organ_name].lower()
except (yaml.YAMLError, requests.exceptions.RequestException) as e:
raise Exception(e)

Expand Down Expand Up @@ -2098,21 +2099,3 @@ def _get_combined_assay_type_description(data_types):

return assay_type_desc


"""
Get the organ description based on the given organ code
Parameters
----------
organ_code : str
The two-letter organ code
Returns
-------
str: The organ code description
"""
def _get_organ_description(organ_code):
organ_types_dict = schema_manager.get_organ_types()
return organ_types_dict[organ_code]['description'].lower()


0 comments on commit 6f59c9c

Please sign in to comment.