Skip to content

Commit

Permalink
Merge pull request #565 from hubmapconsortium/Derek-Furst/multiple-re…
Browse files Browse the repository at this point in the history
…visions

Derek furst/multiple revisions
  • Loading branch information
yuanzhou authored Nov 15, 2023
2 parents 55709b2 + f37aa8d commit 6cca879
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 47 deletions.
45 changes: 25 additions & 20 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,26 +983,31 @@ def create_entity(entity_type):

# Also check existence of the previous revision dataset if specified
if 'previous_revision_uuid' in json_data_dict:
previous_version_dict = query_target_entity(json_data_dict['previous_revision_uuid'], user_token)

# Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23)
if previous_version_dict['entity_type'] not in ['Sample'] and \
not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'):
bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication")

# Also need to validate if the given 'previous_revision_uuid' has already had
# an existing next revision
# Only return a list of the uuids, no need to get back the list of dicts
next_revisions_list = app_neo4j_queries.get_next_revisions(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid')

# As long as the list is not empty, tell the users to use a different 'previous_revision_uuid'
if next_revisions_list:
bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision")

# Only published datasets can have revisions made of them. Verify that that status of the Dataset specified
# by previous_revision_uuid is published. Else, bad request error.
if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it")
if isinstance(json_data_dict['previous_revision_uuid'], list):
previous_revision_list = json_data_dict['previous_revision_uuid']

nested_revisions = app_neo4j_queries.nested_previous_revisions(neo4j_driver_instance, previous_revision_list)
if nested_revisions:
bad_request_error(f"{nested_revisions[0][0]} is a revision of {nested_revisions[1][0]}. Datasets in previous_revision_uuid must not be revisions of eachother")
else:
previous_revision_list = [json_data_dict['previous_revision_uuid']]
for previous_revision in previous_revision_list:
previous_version_dict = query_target_entity(previous_revision, user_token)

# Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23)
if not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'):
bad_request_error(f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication")

next_revision_is_latest = app_neo4j_queries.is_next_revision_latest(neo4j_driver_instance, previous_version_dict['uuid'])

# As long as the list is not empty, tell the users to use a different 'previous_revision_uuid'
if not next_revision_is_latest:
bad_request_error(f"The previous_revision_uuid specified for this dataset has already had a next revision")

# Only published datasets can have revisions made of them. Verify that that status of the Dataset specified
# by previous_revision_uuid is published. Else, bad request error.
if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it")

# If the preceding "additional validations" did not raise an error,
# generate 'before_create_trigger' data and create the entity details in Neo4j
Expand Down
72 changes: 72 additions & 0 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,78 @@ def get_next_revisions(neo4j_driver, uuid, property_key = None):

return results

"""
Verifies whether a revisions of a given entity are the last (most recent) revisions. Example: If an entity has a
revision, but that revision also has a revision, return false.
Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of target entity
Returns
-------
bool
Returns true or false whether revisions of the target entity are the latest revisions
"""
def is_next_revision_latest(neo4j_driver, uuid):
results = []

query = (f"MATCH (e:Entity)<-[:REVISION_OF*]-(rev:Entity)<-[:REVISION_OF*]-(next:Entity) "
f"WHERE e.uuid='{uuid}' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(next.uuid)) AS {record_field_name}")

logger.info("======is_next_revision_latest() query======")
logger.info(query)

with neo4j_driver.session() as session:
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)

if record and record[record_field_name]:
results = record[record_field_name]
if results:
return False
else:
return True


"""
Verifies that, for a list of previous revision, one or more revisions in the list is itself a revision of another
revision in the list.
Parameters
----------
previous_revision_list : list
The list of previous_revision_uuids
Returns
-------
tuple
The uuid of the first encountered uuid that is a revision of another previous_revision, as well as the uuid that it is a revision of
Else return None
"""
def nested_previous_revisions(neo4j_driver, previous_revision_list):
query = (f"WITH {previous_revision_list} AS uuidList "
"MATCH (ds1:Dataset)-[r:REVISION_OF]->(ds2:Dataset) "
"WHERE ds1.uuid IN uuidList AND ds2.uuid IN uuidList "
"WITH COLLECT(DISTINCT ds1.uuid) AS connectedUUID1, COLLECT(DISTINCT ds2.uuid) as connectedUUID2 "
"RETURN connectedUUID1, connectedUUID2 ")

logger.info("======nested_previous_revisions() query======")
logger.info(query)

with neo4j_driver.session() as session:
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
if record[0]:
return record
else:
return None


"""
Retrive the full tree above the given entity
Expand Down
8 changes: 6 additions & 2 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ ENTITIES:
description: "The displayname of globus group which the user who created this entity is a member of"
before_create_trigger: set_group_name #same as group_uuid, except set group_name
previous_revision_uuid:
type: string
type:
- string
- list
transient: true
immutable: true
description: "The uuid of previous revision dataset"
Expand Down Expand Up @@ -645,7 +647,9 @@ ENTITIES:
description: "The displayname of globus group which the user who created this entity is a member of"
before_create_trigger: set_group_name #same as group_uuid, except set group_name
previous_revision_uuid:
type: string
type:
- string
- list
transient: true
immutable: true
description: "The uuid of previous revision dataset"
Expand Down
10 changes: 7 additions & 3 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,9 +784,13 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex
invalid_data_type_keys = []
for key in json_data_keys:
# boolean starts with bool, string starts with str, integer starts with int, list is list
if (properties[key]['type'] in ['string', 'integer', 'list', 'boolean']) and (not properties[key]['type'].startswith(type(json_data_dict[key]).__name__)):
invalid_data_type_keys.append(key)

property_type = properties[key]['type']
if isinstance(property_type, str):
if (property_type in ['string', 'integer', 'list', 'boolean']) and (not property_type.startswith(type(json_data_dict[key]).__name__)):
invalid_data_type_keys.append(key)
elif isinstance(property_type, list):
if not any(item.startswith(type(json_data_dict[key]).__name__) for item in property_type):
invalid_data_type_keys.append(key)
# Handling json_string as dict
if (properties[key]['type'] == 'json_string') and (not isinstance(json_data_dict[key], dict)):
invalid_data_type_keys.append(key)
Expand Down
9 changes: 4 additions & 5 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,14 +652,13 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list
previous_revision_entity_uuid : str
The uuid of previous revision entity
"""
def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuid):
def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revision_entity_uuids):
try:
with neo4j_driver.session() as session:
tx = session.begin_transaction()

# Create relationship from ancestor entity node to this Activity node
create_relationship_tx(tx, entity_uuid, previous_revision_entity_uuid, 'REVISION_OF', '->')

for previous_uuid in previous_revision_entity_uuids:
# Create relationship from ancestor entity node to this Activity node
create_relationship_tx(tx, entity_uuid, previous_uuid, 'REVISION_OF', '->')
tx.commit()
except TransactionError as te:
msg = "TransactionError from calling link_entity_to_previous_revision(): "
Expand Down
40 changes: 23 additions & 17 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,26 +940,32 @@ def get_local_directory_rel_path(property_key, normalized_type, user_token, exis
A merged dictionary that contains all possible input data to be used
"""
def link_to_previous_revision(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")
try:
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")

if 'previous_revision_uuid' not in existing_data_dict:
raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")
if 'previous_revision_uuid' not in existing_data_dict:
raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.")

entity_uuid = existing_data_dict['uuid']
previous_uuid = existing_data_dict['previous_revision_uuid']
entity_uuid = existing_data_dict['uuid']
if isinstance(existing_data_dict['previous_revision_uuid'], list):
previous_uuid = existing_data_dict['previous_revision_uuid']
else:
previous_uuid = [existing_data_dict['previous_revision_uuid']]

# Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j
try:
schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid)

# Delete the cache of each associated dataset if any cache exists
# Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields
uuids_list = [entity_uuid, previous_uuid]
schema_manager.delete_memcached_cache(uuids_list)
except TransactionError:
# No need to log
raise
# Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j
try:
schema_neo4j_queries.link_entity_to_previous_revision(schema_manager.get_neo4j_driver_instance(), entity_uuid, previous_uuid)

# Delete the cache of each associated dataset if any cache exists
# Because the `Dataset.previous_revision_uuid` and `Dataset.next_revision_uuid` fields
uuids_list = [entity_uuid, previous_uuid]
schema_manager.delete_memcached_cache(uuids_list)
except TransactionError:
# No need to log
raise
except Exception as e:
raise KeyError(e)

"""
Trigger event method of auto generating the dataset title
Expand Down

0 comments on commit 6cca879

Please sign in to comment.