Skip to content

Commit

Permalink
Merge pull request #602 from hubmapconsortium/karlburke/Set-dataset_t…
Browse files Browse the repository at this point in the history
…ype-On_Publication

Karlburke/set dataset type on publication
  • Loading branch information
yuanzhou authored Jan 25, 2024
2 parents 3b06853 + a605af5 commit 8133c93
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 16 deletions.
10 changes: 6 additions & 4 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -331,15 +331,15 @@ ENTITIES:
before_property_update_validators:
- validate_no_duplicates_in_list
type: list
required_on_create: true # Only required for create via POST, not update via PUT
required_on_create: false
description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
dataset_type:
before_property_create_validators:
- validate_recognized_dataset_type
before_property_update_validators:
- validate_recognized_dataset_type
type: string
required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT
required_on_create: true # Required for create via POST, not update via PUT
description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
collections:
type: list
Expand Down Expand Up @@ -599,15 +599,17 @@ ENTITIES:
before_property_update_validators:
- validate_no_duplicates_in_list
type: list
required_on_create: true # Only required for create via POST, not update via PUT
required_on_create: false
description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
dataset_type:
before_create_trigger: set_publication_dataset_type
before_property_create_validators:
- validate_recognized_dataset_type
before_property_update_validators:
- validate_recognized_dataset_type
type: string
required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT
generated: true
immutable: true
description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
collections:
type: list
Expand Down
39 changes: 27 additions & 12 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,17 +1005,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
race = None
sex = None

# Parse assay_type from the Dataset
try:
logger.info(f"Executing convert_str_literal() on 'data_types' of uuid: {existing_data_dict['uuid']} during calling 'get_dataset_title()' trigger method.")

# Note: The existing_data_dict['data_types'] is stored in Neo4j as a string representation of the Python list
# It's not stored in Neo4j as a json string! And we can't store it as a json string
# due to the way that Cypher handles single/double quotes.
data_types_list = schema_manager.convert_str_literal(existing_data_dict['data_types'])
assay_type_desc = _get_combined_assay_type_description(data_types_list)
except requests.exceptions.RequestException as e:
raise requests.exceptions.RequestException(e)
dataset_type = existing_data_dict['dataset_type']

# Get the sample organ name and donor metadata information of this dataset
organ_name, donor_metadata = schema_neo4j_queries.get_dataset_organ_and_donor_info(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
Expand Down Expand Up @@ -1082,7 +1072,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
else:
age_race_sex_info = f"{age}-year-old {race} {sex}"

generated_title = f"{assay_type_desc} data from the {organ_desc} of a {age_race_sex_info}"
generated_title = f"{dataset_type} data from the {organ_desc} of a {age_race_sex_info}"

return property_key, generated_title

Expand Down Expand Up @@ -1691,6 +1681,31 @@ def set_publication_date(property_key, normalized_type, user_token, existing_dat

return property_key, date_obj.date().isoformat()

"""
Trigger event method setting the dataset_type immutable property for a Publication.
Parameters
----------
property_key : str
The target property key of the value to be generated
normalized_type : str
One of the types defined in the schema yaml: Publication
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
Returns
-------
str: The target property key
str: Immutable dataset_type of "Publication"
"""
def set_publication_dataset_type(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
# Count upon the dataset_type generated: true property in provenance_schema.yaml to assure the
# request does not contain a value which will be overwritten.
return property_key, 'Publication'

####################################################################################################
## Trigger methods specific to Upload - DO NOT RENAME
Expand Down

0 comments on commit 8133c93

Please sign in to comment.