From 7ad0fb87785e79f5a9f98b21222f25b4158780c8 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 6 Nov 2023 22:17:38 -0500 Subject: [PATCH 1/5] work in progress updating previous_revision_uuid to support lists of ids. --- src/app.py | 42 ++++++++++++++++-------------- src/app_neo4j_queries.py | 37 ++++++++++++++++++++++++++ src/schema/provenance_schema.yaml | 4 ++- src/schema/schema_manager.py | 10 ++++--- src/schema/schema_neo4j_queries.py | 13 ++++----- src/schema/schema_triggers.py | 40 ++++++++++++++++------------ 6 files changed, 99 insertions(+), 47 deletions(-) diff --git a/src/app.py b/src/app.py index 29ad7072..89342af9 100644 --- a/src/app.py +++ b/src/app.py @@ -997,26 +997,28 @@ def create_entity(entity_type): # Also check existence of the previous revision dataset if specified if 'previous_revision_uuid' in json_data_dict: - previous_version_dict = query_target_entity(json_data_dict['previous_revision_uuid'], user_token) - - # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) - if previous_version_dict['entity_type'] not in ['Sample'] and \ - not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): - bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication") - - # Also need to validate if the given 'previous_revision_uuid' has already had - # an existing next revision - # Only return a list of the uuids, no need to get back the list of dicts - next_revisions_list = app_neo4j_queries.get_next_revisions(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid') - - # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' - if next_revisions_list: - bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision") - - # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified - # by previous_revision_uuid is published. Else, bad request error. - if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: - bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") + if isinstance(json_data_dict['previous_revision_uuid'], list): + previous_revision_list = json_data_dict['previous_revision_uuid'] + else: + previous_revision_list = [json_data_dict['previous_revision_uuid']] + for previous_revision in previous_revision_list: + previous_version_dict = query_target_entity(previous_revision, user_token) + + # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) + if previous_version_dict['entity_type'] not in ['Sample'] and \ + not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): + bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication") + + next_revision_is_latest = app_neo4j_queries.is_next_revision_latest(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid') + + # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' + if next_revision_is_latest: + bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision") + + # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified + # by previous_revision_uuid is published. Else, bad request error. + if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") # If the preceding "additional validations" did not raise an error, # generate 'before_create_trigger' data and create the entity details in Neo4j diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index fa613c0e..8de3297d 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -399,6 +399,43 @@ def get_next_revisions(neo4j_driver, uuid, property_key = None): return results + +def is_next_revision_latest(neo4j_driver, uuid, property_key = None): + results = [] + + if property_key: + query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(parent:Entity)<-[:REVISION_OF*]-(next:Entity) " + f"WHERE e.uuid='{uuid}' " + # COLLECT() returns a list + # apoc.coll.toSet() reruns a set containing unique nodes + f"RETURN apoc.coll.toSet(COLLECT(next.{property_key})) AS {record_field_name}") + else: + query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(parent:Entity)<-[:REVISION_OF*]-(next:Entity) " + f"WHERE e.uuid='{uuid}' " + # COLLECT() returns a list + # apoc.coll.toSet() reruns a set containing unique nodes + f"RETURN apoc.coll.toSet(COLLECT(next)) AS {record_field_name}") + + logger.info("======get_next_revisions() query======") + logger.info(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) + + if record and record[record_field_name]: + if property_key: + # Just return the list of property values from each entity node + results = record[record_field_name] + else: + # Convert the list of nodes to a list of dicts + results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) + + if results: + return False + else: + return True + + """ Retrive the full tree above the given entity diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index b72bf3a1..5312659f 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -645,7 +645,9 @@ ENTITIES: description: "The displayname of globus group which the user who created this entity is a member of" before_create_trigger: set_group_name #same as group_uuid, except set group_name previous_revision_uuid: - type: string + type: + - string + - list transient: true immutable: true description: "The uuid of previous revision dataset" diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 2386b013..ca53f742 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -784,9 +784,13 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex invalid_data_type_keys = [] for key in json_data_keys: # boolean starts with bool, string starts with str, integer starts with int, list is list - if (properties[key]['type'] in ['string', 'integer', 'list', 'boolean']) and (not properties[key]['type'].startswith(type(json_data_dict[key]).__name__)): - invalid_data_type_keys.append(key) - + property_type = properties[key]['type'] + if isinstance(property_type, str): + if (property_type in ['string', 'integer', 'list', 'boolean']) and (not property_type.startswith(type(json_data_dict[key]).__name__)): + invalid_data_type_keys.append(key) + elif isinstance(property_type, list): + if any(item.startswith(type(json_data_dict[key]).__name__) for item in property_type): + invalid_data_type_keys.append(key) # Handling json_string as dict if (properties[key]['type'] == 'json_string') and (not isinstance(json_data_dict[key], dict)): invalid_data_type_keys.append(key) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index b23d33ba..ea5b845f 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -652,15 +652,16 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list previous_revision_entity_uuid : str The uuid of previous revision entity """ -def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuid): +def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuids): try: - with neo4j_driver.session() as session: - tx = session.begin_transaction() + for previous_uuid in previous_revision_entity_uuids: + with neo4j_driver.session() as session: + tx = session.begin_transaction() - # Create relationship from ancestor entity node to this Activity node - create_relationship_tx(tx, entity_uuid, previous_revision_entity_uuid, 'REVISION_OF', '->') + # Create relationship from ancestor entity node to this Activity node + create_relationship_tx(tx, entity_uuid, previous_uuid, 'REVISION_OF', '->') - tx.commit() + tx.commit() except TransactionError as te: msg = "TransactionError from calling link_entity_to_previous_revision(): " # Log the full stack trace, prepend a line with our message diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 38cf61dc..c7cac805 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -940,26 +940,32 @@ def get_local_directory_rel_path(property_key, normalized_type, user_token, exis A merged dictionary that contains all possible input data to be used """ def link_to_previous_revision(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): - if 'uuid' not in existing_data_dict: - raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") + try: + if 'uuid' not in existing_data_dict: + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") - if 'previous_revision_uuid' not in existing_data_dict: - raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") + if 'previous_revision_uuid' not in existing_data_dict: + raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") - entity_uuid = existing_data_dict['uuid'] - previous_uuid = existing_data_dict['previous_revision_uuid'] + entity_uuid = existing_data_dict['uuid'] + if isinstance(existing_data_dict['previous_revision_uuid'], list): + previous_uuid = existing_data_dict['previous_revision_uuid'] + else: + previous_uuid = [existing_data_dict['previous_revision_uuid']] - # Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j - try: - schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid) - - # Delete the cache of each associated dataset if any cache exists - # Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields - uuids_list = [entity_uuid, previous_uuid] - schema_manager.delete_memcached_cache(uuids_list) - except TransactionError: - # No need to log - raise + # Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j + try: + schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid) + + # Delete the cache of each associated dataset if any cache exists + # Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields + uuids_list = [entity_uuid, previous_uuid] + schema_manager.delete_memcached_cache(uuids_list) + except TransactionError: + # No need to log + raise + except Exception as e: + raise KeyError(e) """ Trigger event method of auto generating the dataset title From 1e7d2f900e95981496486bfae587a8fc2ebd3964 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Wed, 8 Nov 2023 00:06:18 -0500 Subject: [PATCH 2/5] Fixed validation bug --- src/app.py | 2 +- src/schema/schema_manager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 89342af9..5cb768ea 100644 --- a/src/app.py +++ b/src/app.py @@ -1012,7 +1012,7 @@ def create_entity(entity_type): next_revision_is_latest = app_neo4j_queries.is_next_revision_latest(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid') # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' - if next_revision_is_latest: + if not next_revision_is_latest: bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision") # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index ca53f742..0cafdecf 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -789,7 +789,7 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex if (property_type in ['string', 'integer', 'list', 'boolean']) and (not property_type.startswith(type(json_data_dict[key]).__name__)): invalid_data_type_keys.append(key) elif isinstance(property_type, list): - if any(item.startswith(type(json_data_dict[key]).__name__) for item in property_type): + if not any(item.startswith(type(json_data_dict[key]).__name__) for item in property_type): invalid_data_type_keys.append(key) # Handling json_string as dict if (properties[key]['type'] == 'json_string') and (not isinstance(json_data_dict[key], dict)): From 5dfe1184166524eb8221a847525844a86a17ddf6 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 13 Nov 2023 14:24:38 -0500 Subject: [PATCH 3/5] added an additional check to verify that, for groups of previous_revisions submitted in a POST request, none of them are revisions of each other --- src/app.py | 4 ++++ src/app_neo4j_queries.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/app.py b/src/app.py index 5cb768ea..b2649e99 100644 --- a/src/app.py +++ b/src/app.py @@ -999,6 +999,10 @@ def create_entity(entity_type): if 'previous_revision_uuid' in json_data_dict: if isinstance(json_data_dict['previous_revision_uuid'], list): previous_revision_list = json_data_dict['previous_revision_uuid'] + + nested_revisions = app_neo4j_queries.nested_previous_revisions(neo4j_driver_instance, previous_revision_list) + if nested_revisions: + bad_request_error(f"{nested_revisions[0][0]} is a revision of {nested_revisions[1][0]}. Datasets in previous_revision_uuid must not be revisions of eachother") else: previous_revision_list = [json_data_dict['previous_revision_uuid']] for previous_revision in previous_revision_list: diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 8de3297d..66d76c9e 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -436,6 +436,39 @@ def is_next_revision_latest(neo4j_driver, uuid, property_key = None): return True +""" +Verifies that, for a list of previous revision, one or more revisions in the list is itself a revision of another +revision in the list. + +Parameters +---------- +previous_revision_list : list + The list of previous_revision_uuids + +Returns +------- +tuple + The uuid of the first encountered uuid that is a revision of another previous_revision, as well as the uuid that it is a revision of + Else return None +""" +def nested_previous_revisions(neo4j_driver, previous_revision_list): + query = (f"WITH {previous_revision_list} AS uuidList " + "MATCH (ds1:Dataset)-[r:REVISION_OF]->(ds2:Dataset) " + "WHERE ds1.uuid IN uuidList AND ds2.uuid IN uuidList " + "WITH COLLECT(DISTINCT ds1.uuid) AS connectedUUID1, COLLECT(DISTINCT ds2.uuid) as connectedUUID2 " + "RETURN connectedUUID1, connectedUUID2 ") + + logger.info("======nexted_previous_revisions() query======") + logger.info(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) + if record[0]: + return record + else: + return None + + """ Retrive the full tree above the given entity From f37aa8dbc5c78d2c89bdc436d23aedfb39aa357a Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Wed, 15 Nov 2023 16:35:32 -0500 Subject: [PATCH 4/5] Requested small fixes --- src/app.py | 5 ++-- src/app_neo4j_queries.py | 46 ++++++++++++++++-------------- src/schema/provenance_schema.yaml | 4 ++- src/schema/schema_neo4j_queries.py | 10 +++---- 4 files changed, 33 insertions(+), 32 deletions(-) diff --git a/src/app.py b/src/app.py index faa9c769..4cbc05e8 100644 --- a/src/app.py +++ b/src/app.py @@ -995,11 +995,10 @@ def create_entity(entity_type): previous_version_dict = query_target_entity(previous_revision, user_token) # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) - if previous_version_dict['entity_type'] not in ['Sample'] and \ - not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): + if not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication") - next_revision_is_latest = app_neo4j_queries.is_next_revision_latest(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid') + next_revision_is_latest = app_neo4j_queries.is_next_revision_latest(neo4j_driver_instance, previous_version_dict['uuid']) # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' if not next_revision_is_latest: diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index fec7cbdc..586edf45 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -459,37 +459,39 @@ def get_next_revisions(neo4j_driver, uuid, property_key = None): return results +""" +Verifies whether a revisions of a given entity are the last (most recent) revisions. Example: If an entity has a +revision, but that revision also has a revision, return false. -def is_next_revision_latest(neo4j_driver, uuid, property_key = None): +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of target entity + +Returns +------- +bool + Returns true or false whether revisions of the target entity are the latest revisions +""" +def is_next_revision_latest(neo4j_driver, uuid): results = [] - if property_key: - query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(parent:Entity)<-[:REVISION_OF*]-(next:Entity) " - f"WHERE e.uuid='{uuid}' " - # COLLECT() returns a list - # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(next.{property_key})) AS {record_field_name}") - else: - query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(parent:Entity)<-[:REVISION_OF*]-(next:Entity) " - f"WHERE e.uuid='{uuid}' " - # COLLECT() returns a list - # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(next)) AS {record_field_name}") + query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(rev:Entity)<-[:REVISION_OF*]-(next:Entity) " + f"WHERE e.uuid='{uuid}' " + # COLLECT() returns a list + # apoc.coll.toSet() reruns a set containing unique nodes + f"RETURN apoc.coll.toSet(COLLECT(next.uuid)) AS {record_field_name}") - logger.info("======get_next_revisions() query======") + logger.info("======is_next_revision_latest() query======") logger.info(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) if record and record[record_field_name]: - if property_key: - # Just return the list of property values from each entity node - results = record[record_field_name] - else: - # Convert the list of nodes to a list of dicts - results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) - + results = record[record_field_name] if results: return False else: @@ -518,7 +520,7 @@ def nested_previous_revisions(neo4j_driver, previous_revision_list): "WITH COLLECT(DISTINCT ds1.uuid) AS connectedUUID1, COLLECT(DISTINCT ds2.uuid) as connectedUUID2 " "RETURN connectedUUID1, connectedUUID2 ") - logger.info("======nexted_previous_revisions() query======") + logger.info("======nested_previous_revisions() query======") logger.info(query) with neo4j_driver.session() as session: diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 5312659f..05174972 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -418,7 +418,9 @@ ENTITIES: description: "The displayname of globus group which the user who created this entity is a member of" before_create_trigger: set_group_name #same as group_uuid, except set group_name previous_revision_uuid: - type: string + type: + - string + - list transient: true immutable: true description: "The uuid of previous revision dataset" diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index ea5b845f..8da148b3 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -654,14 +654,12 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list """ def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuids): try: - for previous_uuid in previous_revision_entity_uuids: - with neo4j_driver.session() as session: - tx = session.begin_transaction() - + with neo4j_driver.session() as session: + tx = session.begin_transaction() + for previous_uuid in previous_revision_entity_uuids: # Create relationship from ancestor entity node to this Activity node create_relationship_tx(tx, entity_uuid, previous_uuid, 'REVISION_OF', '->') - - tx.commit() + tx.commit() except TransactionError as te: msg = "TransactionError from calling link_entity_to_previous_revision(): " # Log the full stack trace, prepend a line with our message From a5d2d15a3e9e61245143225761ed3ab946b9097b Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 17 Nov 2023 13:46:50 -0500 Subject: [PATCH 5/5] fixed bug in schema_triggers.link_to_previous_revision causing bad input to be sent to schema_manager_delete_memcached_cache. --- src/schema/schema_triggers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index c7cac805..a24f2e32 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -959,7 +959,11 @@ def link_to_previous_revision(property_key, normalized_type, user_token, existin # Delete the cache of each associated dataset if any cache exists # Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields - uuids_list = [entity_uuid, previous_uuid] + uuids_list = [entity_uuid] + if isinstance(previous_uuid, list): + uuids_list.extend(previous_uuid) + else: + uuids_list.append(previous_uuid) schema_manager.delete_memcached_cache(uuids_list) except TransactionError: # No need to log