hubmapconsortium · yuanzhou · Mar 13, 2024 · Mar 12, 2024 · Mar 13, 2024 · Mar 13, 2024
diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
@@ -3,7 +3,7 @@
 # - type: data type of the property, one of the following: string|integer|boolean|list|json_string
 # - generated: whether the property is auto generated (either with a  `before_create_trigger` or not) or user provided, default to false
 # - required_on_create: whether the property is required from user reqeust JSON for entity creation via POST
-# - immutable: whether the property can NOT be updated once being created, default to false
+# - immutable: true indicates the property can NOT be updated after the entity is created, default to false
 # - transient: whether the property to persist in database or not, default to false
 # - exposed: whether the property gets returned to the user or not, default to true
 # - trigger types: before_create_trigger|after_create_trigger|before_update_trigger|after_update_trigger|on_read_trigger, one property can have none (default) or more than one triggers
@@ -457,6 +457,28 @@ ENTITIES:
         immutable: true
         description: "The list of the uuids of next revision datasets"
         on_read_trigger: get_next_revision_uuids
+      superseded_associated_processed_component_uuids:
+        type: list
+        # This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset
+        # superseding the entity with this attribute, so we can't define it as `immutable: true`.
+        # Modifications to an existing attribute are rejected using a validation trigger.
+        immutable: false
+        description: "List of uuids of existing Datasets used to construct this Multi-Assay Dataset, when present"
+        before_property_create_validators:
+          - verify_multi_assay_dataset_components
+        before_property_update_validators:
+          - verify_multi_assay_dataset_components
+      new_associated_multi_assay_uuid:
+        type: string
+        # This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset
+        # superseding the entity with this attribute, so we can't define it as `immutable: true`.
+        # Modifications to an existing attribute are rejected using a validation trigger.
+        immutable: false
+        description: "The uuid of the Multi-Assay Dataset constructed using this Dataset, when present"
+        before_property_create_validators:
+          - verify_multi_assay_dataset_components
+        before_property_update_validators:
+          - verify_multi_assay_dataset_components
       # No like image and metadata files handling for Donor/Sample
       # Dataset has only one thumbnail file
       thumbnail_file:
@@ -609,6 +631,8 @@ ENTITIES:
         after_update_trigger: link_publication_to_associated_collection
       assigned_to_group_name: null # This assigned_to_group_name is Dataset specific, Publication doesn't have it
       ingest_task: null # This ingest_task is Dataset specific, Publication doesn't have it
+      new_associated_multi_assay_uuid: null # Dataset-only attribute of Multi-Assay Dataset relationships
+      superseded_associated_processed_component_uuids: null # Dataset-only attribute of Multi-Assay Dataset relationships
 
   ############################################# Donor #############################################
   Donor:

diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py
@@ -46,7 +46,6 @@ def validate_application_header_before_entity_create(normalized_entity_type, req
 
 
 """
-@TODO-KBKBKB redo doc...
 Validate the specified value for a Dataset's dataset_type is in the valueset UBKG recognizes. 
 
 Parameters
@@ -626,6 +625,72 @@ def validate_group_name(property_key, normalized_entity_type, request, existing_
         raise ValueError("Invalid group in 'assigned_to_group_name'. Must be a data provider")
 
 
+"""
+Trigger event method to verify tracking fields new_associated_multi_assay_uuid and
+superseded_associated_processed_component_uuids are set coherently on Multi-Assay Datasets and
+their component Datasets.
+
+Parameters
+----------
+property_key : str
+    The target property key
+normalized_type : str
+    One of the types defined in the schema yaml: Dataset
+user_token: str
+    The user's globus nexus token
+existing_data_dict : dict
+    A dictionary that contains all existing entity properties as Neo4j data types.
+    N.B. elements are not Python data types and must be converted with utilities like schema_manager.convert_str_literal()
+new_data_dict : dict
+    The request input data as Python data structures converted from JSON, which has passed schema validation, entity
+    validation, and possibly other property validations.
+"""
+
+def verify_multi_assay_dataset_components(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
+
+    if  'superseded_associated_processed_component_uuids' in existing_data_dict \
+        and 'superseded_associated_processed_component_uuids' in new_data_dict:
+        raise ValueError(   f"'superseded_associated_processed_component_uuids' is already set on"
+                            f" {existing_data_dict['uuid']}.")
+    if  'new_associated_multi_assay_uuid' in existing_data_dict \
+        and 'new_associated_multi_assay_uuid' in new_data_dict:
+        raise ValueError(   f"'new_associated_multi_assay_uuid' is already set on"
+                            f" {existing_data_dict['uuid']}.")
+    if  'superseded_associated_processed_component_uuids' in new_data_dict \
+        and 'new_associated_multi_assay_uuid' in new_data_dict:
+        raise ValueError(   f"'superseded_associated_processed_component_uuids' and 'new_associated_multi_assay_uuid'"
+                            f" cannot both be specified on a single Dataset.")
+    if  'superseded_associated_processed_component_uuids' in new_data_dict \
+        and 'new_associated_multi_assay_uuid' in existing_data_dict:
+        raise ValueError( f"'superseded_associated_processed_component_uuids' cannot be set on"
+                        f" existing Dataset {existing_data_dict['uuid']} because it is a component Dataset of"
+                        f" {existing_data_dict['new_associated_multi_assay_uuid']}.")
+    if  'new_associated_multi_assay_uuid' in new_data_dict \
+        and 'superseded_associated_processed_component_uuids' in existing_data_dict:
+        # Convert the string from Neo4j the Python list
+        supersededComponentDatasets = schema_manager.convert_str_literal(existing_data_dict['superseded_associated_processed_component_uuids'])
+        raise ValueError( f"'new_associated_multi_assay_uuid' cannot be set on"
+                        f" existing Dataset {existing_data_dict['uuid']} because it is a Multi-Assay Dataset"
+                        f" with {len(supersededComponentDatasets)}"
+                        f" component Datasets it supersedes.")
+    # If no contradictions above have caused a ValueError, check if new data contains UUIDs for valid entities.
+    if  'new_associated_multi_assay_uuid' in new_data_dict:
+        proposedMultiAssayDataset = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance()
+                                                                    , new_data_dict['new_associated_multi_assay_uuid'])
+        if len(proposedMultiAssayDataset) < 1:
+            raise ValueError(   f"'new_associated_multi_assay_uuid' value"
+                                f" {new_data_dict['new_associated_multi_assay_uuid']}"
+                                f" does not exist.")
+    if  'superseded_associated_processed_component_uuids' in new_data_dict:
+        for uuid in new_data_dict['superseded_associated_processed_component_uuids']:
+            proposedComponentDataset = schema_neo4j_queries.get_entity( schema_manager.get_neo4j_driver_instance()
+                                                                        , uuid)
+            if len(proposedComponentDataset) < 1:
+                raise ValueError(f"'superseded_associated_processed_component_uuids' entry with value"
+                                 f" {uuid} does not exist.")
+
+    # fall out successfully if no raise() occurred.
+    return
 
 ####################################################################################################
 ## Internal Functions