Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Derek furst/multiple revisions #567

Merged
merged 7 commits into from
Nov 20, 2023
45 changes: 25 additions & 20 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,26 +983,31 @@ def create_entity(entity_type):

# Also check existence of the previous revision dataset if specified
if 'previous_revision_uuid' in json_data_dict:
previous_version_dict = query_target_entity(json_data_dict['previous_revision_uuid'], user_token)

# Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23)
if previous_version_dict['entity_type'] not in ['Sample'] and \
not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'):
bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication")

# Also need to validate if the given 'previous_revision_uuid' has already had
# an existing next revision
# Only return a list of the uuids, no need to get back the list of dicts
next_revisions_list = app_neo4j_queries.get_next_revisions(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid')

# As long as the list is not empty, tell the users to use a different 'previous_revision_uuid'
if next_revisions_list:
bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision")

# Only published datasets can have revisions made of them. Verify that that status of the Dataset specified
# by previous_revision_uuid is published. Else, bad request error.
if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it")
if isinstance(json_data_dict['previous_revision_uuid'], list):
previous_revision_list = json_data_dict['previous_revision_uuid']

nested_revisions = app_neo4j_queries.nested_previous_revisions(neo4j_driver_instance, previous_revision_list)
if nested_revisions:
bad_request_error(f"{nested_revisions[0][0]} is a revision of {nested_revisions[1][0]}. Datasets in previous_revision_uuid must not be revisions of eachother")
else:
previous_revision_list = [json_data_dict['previous_revision_uuid']]
for previous_revision in previous_revision_list:
previous_version_dict = query_target_entity(previous_revision, user_token)

# Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23)
if not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'):
bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication")

next_revision_is_latest = app_neo4j_queries.is_next_revision_latest(neo4j_driver_instance, previous_version_dict['uuid'])

# As long as the list is not empty, tell the users to use a different 'previous_revision_uuid'
if not next_revision_is_latest:
bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision")

# Only published datasets can have revisions made of them. Verify that that status of the Dataset specified
# by previous_revision_uuid is published. Else, bad request error.
if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it")

# If the preceding "additional validations" did not raise an error,
# generate 'before_create_trigger' data and create the entity details in Neo4j
Expand Down
72 changes: 72 additions & 0 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,78 @@ def get_next_revisions(neo4j_driver, uuid, property_key = None):

return results

"""
Verifies whether a revisions of a given entity are the last (most recent) revisions. Example: If an entity has a
revision, but that revision also has a revision, return false.

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of target entity

Returns
-------
bool
Returns true or false whether revisions of the target entity are the latest revisions
"""
def is_next_revision_latest(neo4j_driver, uuid):
results = []

query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(rev:Entity)<-[:REVISION_OF*]-(next:Entity) "
f"WHERE e.uuid='{uuid}' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(next.uuid)) AS {record_field_name}")

logger.info("======is_next_revision_latest() query======")
logger.info(query)

with neo4j_driver.session() as session:
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)

if record and record[record_field_name]:
results = record[record_field_name]
if results:
return False
else:
return True


"""
Verifies that, for a list of previous revision, one or more revisions in the list is itself a revision of another
revision in the list.

Parameters
----------
previous_revision_list : list
The list of previous_revision_uuids

Returns
-------
tuple
The uuid of the first encountered uuid that is a revision of another previous_revision, as well as the uuid that it is a revision of
Else return None
"""
def nested_previous_revisions(neo4j_driver, previous_revision_list):
query = (f"WITH {previous_revision_list} AS uuidList "
"MATCH (ds1:Dataset)-[r:REVISION_OF]->(ds2:Dataset) "
"WHERE ds1.uuid IN uuidList AND ds2.uuid IN uuidList "
"WITH COLLECT(DISTINCT ds1.uuid) AS connectedUUID1, COLLECT(DISTINCT ds2.uuid) as connectedUUID2 "
"RETURN connectedUUID1, connectedUUID2 ")

logger.info("======nested_previous_revisions() query======")
logger.info(query)

with neo4j_driver.session() as session:
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
if record[0]:
return record
else:
return None


"""
Retrive the full tree above the given entity

Expand Down
8 changes: 6 additions & 2 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ ENTITIES:
description: "The displayname of globus group which the user who created this entity is a member of"
before_create_trigger: set_group_name #same as group_uuid, except set group_name
previous_revision_uuid:
type: string
type:
- string
- list
transient: true
immutable: true
description: "The uuid of previous revision dataset"
Expand Down Expand Up @@ -645,7 +647,9 @@ ENTITIES:
description: "The displayname of globus group which the user who created this entity is a member of"
before_create_trigger: set_group_name #same as group_uuid, except set group_name
previous_revision_uuid:
type: string
type:
- string
- list
transient: true
immutable: true
description: "The uuid of previous revision dataset"
Expand Down
10 changes: 7 additions & 3 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,9 +784,13 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex
invalid_data_type_keys = []
for key in json_data_keys:
# boolean starts with bool, string starts with str, integer starts with int, list is list
if (properties[key]['type'] in ['string', 'integer', 'list', 'boolean']) and (not properties[key]['type'].startswith(type(json_data_dict[key]).__name__)):
invalid_data_type_keys.append(key)

property_type = properties[key]['type']
if isinstance(property_type, str):
if (property_type in ['string', 'integer', 'list', 'boolean']) and (not property_type.startswith(type(json_data_dict[key]).__name__)):
invalid_data_type_keys.append(key)
elif isinstance(property_type, list):
if not any(item.startswith(type(json_data_dict[key]).__name__) for item in property_type):
invalid_data_type_keys.append(key)
# Handling json_string as dict
if (properties[key]['type'] == 'json_string') and (not isinstance(json_data_dict[key], dict)):
invalid_data_type_keys.append(key)
Expand Down
9 changes: 4 additions & 5 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,14 +652,13 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list
previous_revision_entity_uuid : str
The uuid of previous revision entity
"""
def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuid):
def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuids):
try:
with neo4j_driver.session() as session:
tx = session.begin_transaction()

# Create relationship from ancestor entity node to this Activity node
create_relationship_tx(tx, entity_uuid, previous_revision_entity_uuid, 'REVISION_OF', '->')

for previous_uuid in previous_revision_entity_uuids:
# Create relationship from ancestor entity node to this Activity node
create_relationship_tx(tx, entity_uuid, previous_uuid, 'REVISION_OF', '->')
tx.commit()
except TransactionError as te:
msg = "TransactionError from calling link_entity_to_previous_revision(): "
Expand Down
44 changes: 27 additions & 17 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,26 +940,36 @@ def get_local_directory_rel_path(property_key, normalized_type, user_token, exis
A merged dictionary that contains all possible input data to be used
"""
def link_to_previous_revision(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")
try:
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")

if 'previous_revision_uuid' not in existing_data_dict:
raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")
if 'previous_revision_uuid' not in existing_data_dict:
raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")

entity_uuid = existing_data_dict['uuid']
previous_uuid = existing_data_dict['previous_revision_uuid']
entity_uuid = existing_data_dict['uuid']
if isinstance(existing_data_dict['previous_revision_uuid'], list):
previous_uuid = existing_data_dict['previous_revision_uuid']
else:
previous_uuid = [existing_data_dict['previous_revision_uuid']]

# Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j
try:
schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid)

# Delete the cache of each associated dataset if any cache exists
# Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields
uuids_list = [entity_uuid, previous_uuid]
schema_manager.delete_memcached_cache(uuids_list)
except TransactionError:
# No need to log
raise
# Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j
try:
schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid)

# Delete the cache of each associated dataset if any cache exists
# Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields
uuids_list = [entity_uuid]
if isinstance(previous_uuid, list):
uuids_list.extend(previous_uuid)
else:
uuids_list.append(previous_uuid)
schema_manager.delete_memcached_cache(uuids_list)
except TransactionError:
# No need to log
raise
except Exception as e:
raise KeyError(e)

"""
Trigger event method of auto generating the dataset title
Expand Down