diff --git a/src/app.py b/src/app.py index 78348102..4cbc05e8 100644 --- a/src/app.py +++ b/src/app.py @@ -983,26 +983,31 @@ def create_entity(entity_type): # Also check existence of the previous revision dataset if specified if 'previous_revision_uuid' in json_data_dict: - previous_version_dict = query_target_entity(json_data_dict['previous_revision_uuid'], user_token) - - # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) - if previous_version_dict['entity_type'] not in ['Sample'] and \ - not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): - bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication") - - # Also need to validate if the given 'previous_revision_uuid' has already had - # an existing next revision - # Only return a list of the uuids, no need to get back the list of dicts - next_revisions_list = app_neo4j_queries.get_next_revisions(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid') - - # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' - if next_revisions_list: - bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision") - - # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified - # by previous_revision_uuid is published. Else, bad request error. - if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: - bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") + if isinstance(json_data_dict['previous_revision_uuid'], list): + previous_revision_list = json_data_dict['previous_revision_uuid'] + + nested_revisions = app_neo4j_queries.nested_previous_revisions(neo4j_driver_instance, previous_revision_list) + if nested_revisions: + bad_request_error(f"{nested_revisions[0][0]} is a revision of {nested_revisions[1][0]}. Datasets in previous_revision_uuid must not be revisions of eachother") + else: + previous_revision_list = [json_data_dict['previous_revision_uuid']] + for previous_revision in previous_revision_list: + previous_version_dict = query_target_entity(previous_revision, user_token) + + # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) + if not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): + bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication") + + next_revision_is_latest = app_neo4j_queries.is_next_revision_latest(neo4j_driver_instance, previous_version_dict['uuid']) + + # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' + if not next_revision_is_latest: + bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision") + + # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified + # by previous_revision_uuid is published. Else, bad request error. + if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") # If the preceding "additional validations" did not raise an error, # generate 'before_create_trigger' data and create the entity details in Neo4j diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index a1fca65a..586edf45 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -459,6 +459,78 @@ def get_next_revisions(neo4j_driver, uuid, property_key = None): return results +""" +Verifies whether a revisions of a given entity are the last (most recent) revisions. Example: If an entity has a +revision, but that revision also has a revision, return false. + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of target entity + +Returns +------- +bool + Returns true or false whether revisions of the target entity are the latest revisions +""" +def is_next_revision_latest(neo4j_driver, uuid): + results = [] + + query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(rev:Entity)<-[:REVISION_OF*]-(next:Entity) " + f"WHERE e.uuid='{uuid}' " + # COLLECT() returns a list + # apoc.coll.toSet() reruns a set containing unique nodes + f"RETURN apoc.coll.toSet(COLLECT(next.uuid)) AS {record_field_name}") + + logger.info("======is_next_revision_latest() query======") + logger.info(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) + + if record and record[record_field_name]: + results = record[record_field_name] + if results: + return False + else: + return True + + +""" +Verifies that, for a list of previous revision, one or more revisions in the list is itself a revision of another +revision in the list. + +Parameters +---------- +previous_revision_list : list + The list of previous_revision_uuids + +Returns +------- +tuple + The uuid of the first encountered uuid that is a revision of another previous_revision, as well as the uuid that it is a revision of + Else return None +""" +def nested_previous_revisions(neo4j_driver, previous_revision_list): + query = (f"WITH {previous_revision_list} AS uuidList " + "MATCH (ds1:Dataset)-[r:REVISION_OF]->(ds2:Dataset) " + "WHERE ds1.uuid IN uuidList AND ds2.uuid IN uuidList " + "WITH COLLECT(DISTINCT ds1.uuid) AS connectedUUID1, COLLECT(DISTINCT ds2.uuid) as connectedUUID2 " + "RETURN connectedUUID1, connectedUUID2 ") + + logger.info("======nested_previous_revisions() query======") + logger.info(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) + if record[0]: + return record + else: + return None + + """ Retrive the full tree above the given entity diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index b72bf3a1..05174972 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -418,7 +418,9 @@ ENTITIES: description: "The displayname of globus group which the user who created this entity is a member of" before_create_trigger: set_group_name #same as group_uuid, except set group_name previous_revision_uuid: - type: string + type: + - string + - list transient: true immutable: true description: "The uuid of previous revision dataset" @@ -645,7 +647,9 @@ ENTITIES: description: "The displayname of globus group which the user who created this entity is a member of" before_create_trigger: set_group_name #same as group_uuid, except set group_name previous_revision_uuid: - type: string + type: + - string + - list transient: true immutable: true description: "The uuid of previous revision dataset" diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 2386b013..0cafdecf 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -784,9 +784,13 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex invalid_data_type_keys = [] for key in json_data_keys: # boolean starts with bool, string starts with str, integer starts with int, list is list - if (properties[key]['type'] in ['string', 'integer', 'list', 'boolean']) and (not properties[key]['type'].startswith(type(json_data_dict[key]).__name__)): - invalid_data_type_keys.append(key) - + property_type = properties[key]['type'] + if isinstance(property_type, str): + if (property_type in ['string', 'integer', 'list', 'boolean']) and (not property_type.startswith(type(json_data_dict[key]).__name__)): + invalid_data_type_keys.append(key) + elif isinstance(property_type, list): + if not any(item.startswith(type(json_data_dict[key]).__name__) for item in property_type): + invalid_data_type_keys.append(key) # Handling json_string as dict if (properties[key]['type'] == 'json_string') and (not isinstance(json_data_dict[key], dict)): invalid_data_type_keys.append(key) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index b23d33ba..8da148b3 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -652,14 +652,13 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list previous_revision_entity_uuid : str The uuid of previous revision entity """ -def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuid): +def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuids): try: with neo4j_driver.session() as session: tx = session.begin_transaction() - - # Create relationship from ancestor entity node to this Activity node - create_relationship_tx(tx, entity_uuid, previous_revision_entity_uuid, 'REVISION_OF', '->') - + for previous_uuid in previous_revision_entity_uuids: + # Create relationship from ancestor entity node to this Activity node + create_relationship_tx(tx, entity_uuid, previous_uuid, 'REVISION_OF', '->') tx.commit() except TransactionError as te: msg = "TransactionError from calling link_entity_to_previous_revision(): " diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 38cf61dc..a24f2e32 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -940,26 +940,36 @@ def get_local_directory_rel_path(property_key, normalized_type, user_token, exis A merged dictionary that contains all possible input data to be used """ def link_to_previous_revision(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): - if 'uuid' not in existing_data_dict: - raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") + try: + if 'uuid' not in existing_data_dict: + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") - if 'previous_revision_uuid' not in existing_data_dict: - raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") + if 'previous_revision_uuid' not in existing_data_dict: + raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") - entity_uuid = existing_data_dict['uuid'] - previous_uuid = existing_data_dict['previous_revision_uuid'] + entity_uuid = existing_data_dict['uuid'] + if isinstance(existing_data_dict['previous_revision_uuid'], list): + previous_uuid = existing_data_dict['previous_revision_uuid'] + else: + previous_uuid = [existing_data_dict['previous_revision_uuid']] - # Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j - try: - schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid) - - # Delete the cache of each associated dataset if any cache exists - # Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields - uuids_list = [entity_uuid, previous_uuid] - schema_manager.delete_memcached_cache(uuids_list) - except TransactionError: - # No need to log - raise + # Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j + try: + schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid) + + # Delete the cache of each associated dataset if any cache exists + # Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields + uuids_list = [entity_uuid] + if isinstance(previous_uuid, list): + uuids_list.extend(previous_uuid) + else: + uuids_list.append(previous_uuid) + schema_manager.delete_memcached_cache(uuids_list) + except TransactionError: + # No need to log + raise + except Exception as e: + raise KeyError(e) """ Trigger event method of auto generating the dataset title