diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 2f856b52..5f66372a 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -3,7 +3,7 @@ # - type: data type of the property, one of the following: string|integer|boolean|list|json_string # - generated: whether the property is auto generated (either with a `before_create_trigger` or not) or user provided, default to false # - required_on_create: whether the property is required from user reqeust JSON for entity creation via POST -# - immutable: whether the property can NOT be updated once being created, default to false +# - immutable: true indicates the property can NOT be updated after the entity is created, default to false # - transient: whether the property to persist in database or not, default to false # - exposed: whether the property gets returned to the user or not, default to true # - trigger types: before_create_trigger|after_create_trigger|before_update_trigger|after_update_trigger|on_read_trigger, one property can have none (default) or more than one triggers @@ -457,6 +457,28 @@ ENTITIES: immutable: true description: "The list of the uuids of next revision datasets" on_read_trigger: get_next_revision_uuids + superseded_associated_processed_component_uuids: + type: list + # This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset + # superseding the entity with this attribute, so we can't define it as `immutable: true`. + # Modifications to an existing attribute are rejected using a validation trigger. + immutable: false + description: "List of uuids of existing Datasets used to construct this Multi-Assay Dataset, when present" + before_property_create_validators: + - verify_multi_assay_dataset_components + before_property_update_validators: + - verify_multi_assay_dataset_components + new_associated_multi_assay_uuid: + type: string + # This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset + # superseding the entity with this attribute, so we can't define it as `immutable: true`. + # Modifications to an existing attribute are rejected using a validation trigger. + immutable: false + description: "The uuid of the Multi-Assay Dataset constructed using this Dataset, when present" + before_property_create_validators: + - verify_multi_assay_dataset_components + before_property_update_validators: + - verify_multi_assay_dataset_components # No like image and metadata files handling for Donor/Sample # Dataset has only one thumbnail file thumbnail_file: @@ -609,6 +631,8 @@ ENTITIES: after_update_trigger: link_publication_to_associated_collection assigned_to_group_name: null # This assigned_to_group_name is Dataset specific, Publication doesn't have it ingest_task: null # This ingest_task is Dataset specific, Publication doesn't have it + new_associated_multi_assay_uuid: null # Dataset-only attribute of Multi-Assay Dataset relationships + superseded_associated_processed_component_uuids: null # Dataset-only attribute of Multi-Assay Dataset relationships ############################################# Donor ############################################# Donor: diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 17ad3cf6..4d13543e 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -46,7 +46,6 @@ def validate_application_header_before_entity_create(normalized_entity_type, req """ -@TODO-KBKBKB redo doc... Validate the specified value for a Dataset's dataset_type is in the valueset UBKG recognizes. Parameters @@ -626,6 +625,72 @@ def validate_group_name(property_key, normalized_entity_type, request, existing_ raise ValueError("Invalid group in 'assigned_to_group_name'. Must be a data provider") +""" +Trigger event method to verify tracking fields new_associated_multi_assay_uuid and +superseded_associated_processed_component_uuids are set coherently on Multi-Assay Datasets and +their component Datasets. + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + One of the types defined in the schema yaml: Dataset +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties as Neo4j data types. + N.B. elements are not Python data types and must be converted with utilities like schema_manager.convert_str_literal() +new_data_dict : dict + The request input data as Python data structures converted from JSON, which has passed schema validation, entity + validation, and possibly other property validations. +""" + +def verify_multi_assay_dataset_components(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): + + if 'superseded_associated_processed_component_uuids' in existing_data_dict \ + and 'superseded_associated_processed_component_uuids' in new_data_dict: + raise ValueError( f"'superseded_associated_processed_component_uuids' is already set on" + f" {existing_data_dict['uuid']}.") + if 'new_associated_multi_assay_uuid' in existing_data_dict \ + and 'new_associated_multi_assay_uuid' in new_data_dict: + raise ValueError( f"'new_associated_multi_assay_uuid' is already set on" + f" {existing_data_dict['uuid']}.") + if 'superseded_associated_processed_component_uuids' in new_data_dict \ + and 'new_associated_multi_assay_uuid' in new_data_dict: + raise ValueError( f"'superseded_associated_processed_component_uuids' and 'new_associated_multi_assay_uuid'" + f" cannot both be specified on a single Dataset.") + if 'superseded_associated_processed_component_uuids' in new_data_dict \ + and 'new_associated_multi_assay_uuid' in existing_data_dict: + raise ValueError( f"'superseded_associated_processed_component_uuids' cannot be set on" + f" existing Dataset {existing_data_dict['uuid']} because it is a component Dataset of" + f" {existing_data_dict['new_associated_multi_assay_uuid']}.") + if 'new_associated_multi_assay_uuid' in new_data_dict \ + and 'superseded_associated_processed_component_uuids' in existing_data_dict: + # Convert the string from Neo4j the Python list + supersededComponentDatasets = schema_manager.convert_str_literal(existing_data_dict['superseded_associated_processed_component_uuids']) + raise ValueError( f"'new_associated_multi_assay_uuid' cannot be set on" + f" existing Dataset {existing_data_dict['uuid']} because it is a Multi-Assay Dataset" + f" with {len(supersededComponentDatasets)}" + f" component Datasets it supersedes.") + # If no contradictions above have caused a ValueError, check if new data contains UUIDs for valid entities. + if 'new_associated_multi_assay_uuid' in new_data_dict: + proposedMultiAssayDataset = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance() + , new_data_dict['new_associated_multi_assay_uuid']) + if len(proposedMultiAssayDataset) < 1: + raise ValueError( f"'new_associated_multi_assay_uuid' value" + f" {new_data_dict['new_associated_multi_assay_uuid']}" + f" does not exist.") + if 'superseded_associated_processed_component_uuids' in new_data_dict: + for uuid in new_data_dict['superseded_associated_processed_component_uuids']: + proposedComponentDataset = schema_neo4j_queries.get_entity( schema_manager.get_neo4j_driver_instance() + , uuid) + if len(proposedComponentDataset) < 1: + raise ValueError(f"'superseded_associated_processed_component_uuids' entry with value" + f" {uuid} does not exist.") + + # fall out successfully if no raise() occurred. + return #################################################################################################### ## Internal Functions