Merge pull request #602 from hubmapconsortium/karlburke/Set-dataset_t…

…ype-On_Publication Karlburke/set dataset type on publication
hubmapconsortium · Jan 25, 2024 · 8133c93 · 8133c93
2 parents 3b06853 + a605af5
commit 8133c93
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 16 deletions.
diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
@@ -331,15 +331,15 @@ ENTITIES:
         before_property_update_validators:
           - validate_no_duplicates_in_list
         type: list
-        required_on_create: true # Only required for create via POST, not update via PUT
+        required_on_create: false
         description: "The data or assay types contained in this dataset as a json array of strings.  Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
       dataset_type:
         before_property_create_validators:
           - validate_recognized_dataset_type
         before_property_update_validators:
           - validate_recognized_dataset_type
         type: string
-        required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT
+        required_on_create: true # Required for create via POST, not update via PUT
         description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
       collections:
         type: list
@@ -599,15 +599,17 @@ ENTITIES:
         before_property_update_validators:
           - validate_no_duplicates_in_list
         type: list
-        required_on_create: true # Only required for create via POST, not update via PUT
+        required_on_create: false
         description: "The data or assay types contained in this dataset as a json array of strings.  Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
       dataset_type:
+        before_create_trigger: set_publication_dataset_type
         before_property_create_validators:
           - validate_recognized_dataset_type
         before_property_update_validators:
           - validate_recognized_dataset_type
         type: string
-        required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT
+        generated: true
+        immutable: true
         description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
       collections:
         type: list

diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
@@ -1005,17 +1005,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
     race = None
     sex = None
 
-    # Parse assay_type from the Dataset
-    try:
-        logger.info(f"Executing convert_str_literal() on 'data_types' of uuid: {existing_data_dict['uuid']} during calling 'get_dataset_title()' trigger method.")
-
-        # Note: The existing_data_dict['data_types'] is stored in Neo4j as a string representation of the Python list
-        # It's not stored in Neo4j as a json string! And we can't store it as a json string 
-        # due to the way that Cypher handles single/double quotes.
-        data_types_list = schema_manager.convert_str_literal(existing_data_dict['data_types'])
-        assay_type_desc = _get_combined_assay_type_description(data_types_list)
-    except requests.exceptions.RequestException as e:
-        raise requests.exceptions.RequestException(e)
+    dataset_type = existing_data_dict['dataset_type']
 
     # Get the sample organ name and donor metadata information of this dataset
     organ_name, donor_metadata = schema_neo4j_queries.get_dataset_organ_and_donor_info(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
@@ -1082,7 +1072,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
     else:
         age_race_sex_info = f"{age}-year-old {race} {sex}"
 
-    generated_title = f"{assay_type_desc} data from the {organ_desc} of a {age_race_sex_info}"
+    generated_title = f"{dataset_type} data from the {organ_desc} of a {age_race_sex_info}"
 
     return property_key, generated_title
 
@@ -1691,6 +1681,31 @@ def set_publication_date(property_key, normalized_type, user_token, existing_dat
 
     return property_key, date_obj.date().isoformat()
 
+"""
+Trigger event method setting the dataset_type immutable property for a Publication.
+
+Parameters
+----------
+property_key : str
+    The target property key of the value to be generated
+normalized_type : str
+    One of the types defined in the schema yaml: Publication
+user_token: str
+    The user's globus nexus token
+existing_data_dict : dict
+    A dictionary that contains all existing entity properties
+new_data_dict : dict
+    A merged dictionary that contains all possible input data to be used
+
+Returns
+-------
+str: The target property key
+str: Immutable dataset_type of "Publication"
+"""
+def set_publication_dataset_type(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
+    # Count upon the dataset_type generated: true property in provenance_schema.yaml to assure the
+    # request does not contain a value which will be overwritten.
+    return property_key, 'Publication'
 
 ####################################################################################################
 ## Trigger methods specific to Upload - DO NOT RENAME