Skip to content

Commit

Permalink
Merge pull request #643 from hubmapconsortium/karlburke/AddAttribsToT…
Browse files Browse the repository at this point in the history
…rackMultiAssayComponents

Karlburke/add attribs to track multi assay components
  • Loading branch information
yuanzhou authored Mar 19, 2024
2 parents cc29504 + c5aebe4 commit 795b63d
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 2 deletions.
26 changes: 25 additions & 1 deletion src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# - type: data type of the property, one of the following: string|integer|boolean|list|json_string
# - generated: whether the property is auto generated (either with a `before_create_trigger` or not) or user provided, default to false
# - required_on_create: whether the property is required from user reqeust JSON for entity creation via POST
# - immutable: whether the property can NOT be updated once being created, default to false
# - immutable: true indicates the property can NOT be updated after the entity is created, default to false
# - transient: whether the property to persist in database or not, default to false
# - exposed: whether the property gets returned to the user or not, default to true
# - trigger types: before_create_trigger|after_create_trigger|before_update_trigger|after_update_trigger|on_read_trigger, one property can have none (default) or more than one triggers
Expand Down Expand Up @@ -457,6 +457,28 @@ ENTITIES:
immutable: true
description: "The list of the uuids of next revision datasets"
on_read_trigger: get_next_revision_uuids
superseded_associated_processed_component_uuids:
type: list
# This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset
# superseding the entity with this attribute, so we can't define it as `immutable: true`.
# Modifications to an existing attribute are rejected using a validation trigger.
immutable: false
description: "List of uuids of existing Datasets used to construct this Multi-Assay Dataset, when present"
before_property_create_validators:
- verify_multi_assay_dataset_components
before_property_update_validators:
- verify_multi_assay_dataset_components
new_associated_multi_assay_uuid:
type: string
# This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset
# superseding the entity with this attribute, so we can't define it as `immutable: true`.
# Modifications to an existing attribute are rejected using a validation trigger.
immutable: false
description: "The uuid of the Multi-Assay Dataset constructed using this Dataset, when present"
before_property_create_validators:
- verify_multi_assay_dataset_components
before_property_update_validators:
- verify_multi_assay_dataset_components
# No like image and metadata files handling for Donor/Sample
# Dataset has only one thumbnail file
thumbnail_file:
Expand Down Expand Up @@ -609,6 +631,8 @@ ENTITIES:
after_update_trigger: link_publication_to_associated_collection
assigned_to_group_name: null # This assigned_to_group_name is Dataset specific, Publication doesn't have it
ingest_task: null # This ingest_task is Dataset specific, Publication doesn't have it
new_associated_multi_assay_uuid: null # Dataset-only attribute of Multi-Assay Dataset relationships
superseded_associated_processed_component_uuids: null # Dataset-only attribute of Multi-Assay Dataset relationships

############################################# Donor #############################################
Donor:
Expand Down
67 changes: 66 additions & 1 deletion src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def validate_application_header_before_entity_create(normalized_entity_type, req


"""
@TODO-KBKBKB redo doc...
Validate the specified value for a Dataset's dataset_type is in the valueset UBKG recognizes.
Parameters
Expand Down Expand Up @@ -630,6 +629,72 @@ def validate_group_name(property_key, normalized_entity_type, request, existing_
raise ValueError("Invalid group in 'assigned_to_group_name'. Must be a data provider")


"""
Trigger event method to verify tracking fields new_associated_multi_assay_uuid and
superseded_associated_processed_component_uuids are set coherently on Multi-Assay Datasets and
their component Datasets.
Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties as Neo4j data types.
N.B. elements are not Python data types and must be converted with utilities like schema_manager.convert_str_literal()
new_data_dict : dict
The request input data as Python data structures converted from JSON, which has passed schema validation, entity
validation, and possibly other property validations.
"""

def verify_multi_assay_dataset_components(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):

if 'superseded_associated_processed_component_uuids' in existing_data_dict \
and 'superseded_associated_processed_component_uuids' in new_data_dict:
raise ValueError( f"'superseded_associated_processed_component_uuids' is already set on"
f" {existing_data_dict['uuid']}.")
if 'new_associated_multi_assay_uuid' in existing_data_dict \
and 'new_associated_multi_assay_uuid' in new_data_dict:
raise ValueError( f"'new_associated_multi_assay_uuid' is already set on"
f" {existing_data_dict['uuid']}.")
if 'superseded_associated_processed_component_uuids' in new_data_dict \
and 'new_associated_multi_assay_uuid' in new_data_dict:
raise ValueError( f"'superseded_associated_processed_component_uuids' and 'new_associated_multi_assay_uuid'"
f" cannot both be specified on a single Dataset.")
if 'superseded_associated_processed_component_uuids' in new_data_dict \
and 'new_associated_multi_assay_uuid' in existing_data_dict:
raise ValueError( f"'superseded_associated_processed_component_uuids' cannot be set on"
f" existing Dataset {existing_data_dict['uuid']} because it is a component Dataset of"
f" {existing_data_dict['new_associated_multi_assay_uuid']}.")
if 'new_associated_multi_assay_uuid' in new_data_dict \
and 'superseded_associated_processed_component_uuids' in existing_data_dict:
# Convert the string from Neo4j the Python list
supersededComponentDatasets = schema_manager.convert_str_literal(existing_data_dict['superseded_associated_processed_component_uuids'])
raise ValueError( f"'new_associated_multi_assay_uuid' cannot be set on"
f" existing Dataset {existing_data_dict['uuid']} because it is a Multi-Assay Dataset"
f" with {len(supersededComponentDatasets)}"
f" component Datasets it supersedes.")
# If no contradictions above have caused a ValueError, check if new data contains UUIDs for valid entities.
if 'new_associated_multi_assay_uuid' in new_data_dict:
proposedMultiAssayDataset = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance()
, new_data_dict['new_associated_multi_assay_uuid'])
if len(proposedMultiAssayDataset) < 1:
raise ValueError( f"'new_associated_multi_assay_uuid' value"
f" {new_data_dict['new_associated_multi_assay_uuid']}"
f" does not exist.")
if 'superseded_associated_processed_component_uuids' in new_data_dict:
for uuid in new_data_dict['superseded_associated_processed_component_uuids']:
proposedComponentDataset = schema_neo4j_queries.get_entity( schema_manager.get_neo4j_driver_instance()
, uuid)
if len(proposedComponentDataset) < 1:
raise ValueError(f"'superseded_associated_processed_component_uuids' entry with value"
f" {uuid} does not exist.")

# fall out successfully if no raise() occurred.
return

####################################################################################################
## Internal Functions
Expand Down

0 comments on commit 795b63d

Please sign in to comment.