Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial commit of changes supporting Multi-Assay Dataset tracking #639

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# - type: data type of the property, one of the following: string|integer|boolean|list|json_string
# - generated: whether the property is auto generated (either with a `before_create_trigger` or not) or user provided, default to false
# - required_on_create: whether the property is required from user reqeust JSON for entity creation via POST
# - immutable: whether the property can NOT be updated once being created, default to false
# - immutable: true indicates the property can NOT be updated after the entity is created, default to false
# - transient: whether the property to persist in database or not, default to false
# - exposed: whether the property gets returned to the user or not, default to true
# - trigger types: before_create_trigger|after_create_trigger|before_update_trigger|after_update_trigger|on_read_trigger, one property can have none (default) or more than one triggers
Expand Down Expand Up @@ -457,6 +457,28 @@ ENTITIES:
immutable: true
description: "The list of the uuids of next revision datasets"
on_read_trigger: get_next_revision_uuids
superseded_associated_processed_component_uuids:
type: list
# This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset
# superseding the entity with this attribute, so we can't define it as `immutable: true`.
# Modifications to an existing attribute are rejected using a validation trigger.
immutable: false
description: "List of uuids of existing Datasets used to construct this Multi-Assay Dataset, when present"
before_property_create_validators:
- verify_multi_assay_dataset_components
before_property_update_validators:
- verify_multi_assay_dataset_components
new_associated_multi_assay_uuid:
type: string
# This property gets set via a PUT by entity-api /entities/{{dataset_uuid}} to point to the Multi-Assay Dataset
# superseding the entity with this attribute, so we can't define it as `immutable: true`.
# Modifications to an existing attribute are rejected using a validation trigger.
immutable: false
description: "The uuid of the Multi-Assay Dataset constructed using this Dataset, when present"
before_property_create_validators:
- verify_multi_assay_dataset_components
before_property_update_validators:
- verify_multi_assay_dataset_components
# No like image and metadata files handling for Donor/Sample
# Dataset has only one thumbnail file
thumbnail_file:
Expand Down Expand Up @@ -609,6 +631,8 @@ ENTITIES:
after_update_trigger: link_publication_to_associated_collection
assigned_to_group_name: null # This assigned_to_group_name is Dataset specific, Publication doesn't have it
ingest_task: null # This ingest_task is Dataset specific, Publication doesn't have it
new_associated_multi_assay_uuid: null # Dataset-only attribute of Multi-Assay Dataset relationships
superseded_associated_processed_component_uuids: null # Dataset-only attribute of Multi-Assay Dataset relationships

############################################# Donor #############################################
Donor:
Expand Down
67 changes: 66 additions & 1 deletion src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def validate_application_header_before_entity_create(normalized_entity_type, req


"""
@TODO-KBKBKB redo doc...
Validate the specified value for a Dataset's dataset_type is in the valueset UBKG recognizes.

Parameters
Expand Down Expand Up @@ -626,6 +625,72 @@ def validate_group_name(property_key, normalized_entity_type, request, existing_
raise ValueError("Invalid group in 'assigned_to_group_name'. Must be a data provider")


"""
Trigger event method to verify tracking fields new_associated_multi_assay_uuid and
superseded_associated_processed_component_uuids are set coherently on Multi-Assay Datasets and
their component Datasets.

Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties as Neo4j data types.
N.B. elements are not Python data types and must be converted with utilities like schema_manager.convert_str_literal()
new_data_dict : dict
The request input data as Python data structures converted from JSON, which has passed schema validation, entity
validation, and possibly other property validations.
"""

def verify_multi_assay_dataset_components(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):

if 'superseded_associated_processed_component_uuids' in existing_data_dict \
and 'superseded_associated_processed_component_uuids' in new_data_dict:
raise ValueError( f"'superseded_associated_processed_component_uuids' is already set on"
f" {existing_data_dict['uuid']}.")
if 'new_associated_multi_assay_uuid' in existing_data_dict \
and 'new_associated_multi_assay_uuid' in new_data_dict:
raise ValueError( f"'new_associated_multi_assay_uuid' is already set on"
f" {existing_data_dict['uuid']}.")
if 'superseded_associated_processed_component_uuids' in new_data_dict \
and 'new_associated_multi_assay_uuid' in new_data_dict:
raise ValueError( f"'superseded_associated_processed_component_uuids' and 'new_associated_multi_assay_uuid'"
f" cannot both be specified on a single Dataset.")
if 'superseded_associated_processed_component_uuids' in new_data_dict \
and 'new_associated_multi_assay_uuid' in existing_data_dict:
raise ValueError( f"'superseded_associated_processed_component_uuids' cannot be set on"
f" existing Dataset {existing_data_dict['uuid']} because it is a component Dataset of"
f" {existing_data_dict['new_associated_multi_assay_uuid']}.")
if 'new_associated_multi_assay_uuid' in new_data_dict \
and 'superseded_associated_processed_component_uuids' in existing_data_dict:
# Convert the string from Neo4j the Python list
supersededComponentDatasets = schema_manager.convert_str_literal(existing_data_dict['superseded_associated_processed_component_uuids'])
raise ValueError( f"'new_associated_multi_assay_uuid' cannot be set on"
f" existing Dataset {existing_data_dict['uuid']} because it is a Multi-Assay Dataset"
f" with {len(supersededComponentDatasets)}"
f" component Datasets it supersedes.")
# If no contradictions above have caused a ValueError, check if new data contains UUIDs for valid entities.
if 'new_associated_multi_assay_uuid' in new_data_dict:
proposedMultiAssayDataset = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance()
, new_data_dict['new_associated_multi_assay_uuid'])
if len(proposedMultiAssayDataset) < 1:
raise ValueError( f"'new_associated_multi_assay_uuid' value"
f" {new_data_dict['new_associated_multi_assay_uuid']}"
f" does not exist.")
if 'superseded_associated_processed_component_uuids' in new_data_dict:
for uuid in new_data_dict['superseded_associated_processed_component_uuids']:
proposedComponentDataset = schema_neo4j_queries.get_entity( schema_manager.get_neo4j_driver_instance()
, uuid)
if len(proposedComponentDataset) < 1:
raise ValueError(f"'superseded_associated_processed_component_uuids' entry with value"
f" {uuid} does not exist.")

# fall out successfully if no raise() occurred.
return

####################################################################################################
## Internal Functions
Expand Down
Loading