Skip to content

Commit

Permalink
Merge pull request #610 from hubmapconsortium/Derek-Furst/tuplets
Browse files Browse the repository at this point in the history
Derek furst/tuplets
  • Loading branch information
yuanzhou authored Feb 6, 2024
2 parents ac4bf25 + a6c9c52 commit 9f22eac
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 0 deletions.
45 changes: 45 additions & 0 deletions entity-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,51 @@ paths:
description: The target entity could not be found
'500':
description: Internal error
'/entities/{id}/tuplets':
get:
summary: Get the tuplets list for an Entity. The tuplets have the same parent activity node.
parameters:
- name: id
in: path
description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
required: true
schema:
type: string
- name: status
in: query
description: A case insensitive string. Any value besides 'new', 'qa', and 'published' will raise an error. If a valid status is provided, only results matching that status (if they are datasets) will be returned
equired: false
schema:
type: string
enum: [ 'new', 'qa', 'published' ]
- name: property_key
in: query
description: A case insensitive string. Any value besides 'uuid' will raise an error. If property_key=uuid is provided, rather than entire dictionary representations of each node, only the list of matching uuid's will be returned
required: false
schema:
type: string
enum: [ 'uuid' ]
responses:
'200':
description: An array of tuplet entities is returned
content:
application/json:
schema:
type: array
items:
anyOf:
- $ref: '#/components/schemas/Sample'
- $ref: '#/components/schemas/Dataset'
'400':
description: Invalid or misformatted entity identifier
'401':
description: The user's token has expired or the user did not supply a valid token
'403':
description: The user is not authorized to access the entity.
'404':
description: The target entity could not be found
'500':
description: Internal error
'/entities/{id}/provenance':
get:
summary: 'Get Provenance Data for Entity. This returns a PROV JSON compliant representation of the entity''s provenance. Refer to this document for more information regarding [PROV JSON format](https://www.w3.org/Submission/2013/SUBM-prov-json-20130424/)'
Expand Down
107 changes: 107 additions & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1825,6 +1825,113 @@ def get_siblings(id):
return jsonify(final_result)


"""
Get all tuplets of the given entit: sibling entities sharing an parent activity
The gateway treats this endpoint as public accessible
Result filtering based on query string
For example: /entities/{id}/tuplets?property=uuid
Parameters
----------
id : str
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
Returns
-------
json
A list of all the tuplets of the target entity
"""
@app.route('/entities/<id>/tuplets', methods = ['GET'])
def get_tuplets(id):
final_result = []

# Token is not required, but if an invalid token provided,
# we need to tell the client with a 401 error
validate_token_if_auth_header_exists(request)

# Use the internal token to query the target entity
# since public entities don't require user token
token = get_internal_token()

# Get the entity dict from cache if exists
# Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
entity_dict = query_target_entity(id, token)
normalized_entity_type = entity_dict['entity_type']
uuid = entity_dict['uuid']

# Collection doesn't have ancestors via Activity nodes
if normalized_entity_type == 'Collection':
bad_request_error(f"Unsupported entity type of id {id}: {normalized_entity_type}")

if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
# Only published/public datasets don't require token
if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
# Token is required and the user must belong to HuBMAP-READ group
token = get_user_token(request, non_public_access_required = True)
elif normalized_entity_type == 'Sample':
# The `data_access_level` of Sample can only be either 'public' or 'consortium'
if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
token = get_user_token(request, non_public_access_required = True)
else:
# Donor and Upload will always get back an empty list
# becuase their direct ancestor is Lab, which is being skipped by Neo4j query
# So no need to execute the code below
return jsonify(final_result)

# By now, either the entity is public accessible or the user token has the correct access level
# Result filtering based on query string
status = None
property_key = None
accepted_args = ['property', 'status']
if bool(request.args):
for arg_name in request.args.keys():
if arg_name not in accepted_args:
bad_request_error(f"{arg_name} is an unrecognized argument")
property_key = request.args.get('property')
status = request.args.get('status')
if status is not None:
status = status.lower()
if status not in ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']:
bad_request_error("Invalid Dataset Status. Must be 'new', 'qa', or 'published' Case-Insensitive")
if property_key is not None:
property_key = property_key.lower()
result_filtering_accepted_property_keys = ['uuid']
if property_key not in result_filtering_accepted_property_keys:
bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key)
if property_key is not None:
return jsonify(tuplet_list)
# Generate trigger data
# Skip some of the properties that are time-consuming to generate via triggers
# Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
# checks when the target Dataset is public but the revisions are not public
properties_to_skip = [
# Properties to skip for Sample
'direct_ancestor',
# Properties to skip for Dataset
'direct_ancestors',
'collections',
'upload',
'title',
'next_revision_uuid',
'previous_revision_uuid',
'associated_collection',
'creation_action',
'local_directory_rel_path',
'previous_revision_uuids',
'next_revision-uuids'
]

complete_entities_list = schema_manager.get_complete_entities_list(token, tuplet_list, properties_to_skip)
# Final result after normalization
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)

return jsonify(final_result)



"""
Get all previous revisions of the given entity
Result filtering based on query string
Expand Down
45 changes: 45 additions & 0 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1155,3 +1155,48 @@ def get_siblings(neo4j_driver, uuid, status, prop_key, include_revisions):
# Convert the list of nodes to a list of dicts
results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
return results


"""
Get all tuplets by uuid
Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of target entity
property_key : str
A target property key for result filtering
Returns
-------
dict
A list of unique tuplet dictionaries returned from the Cypher query
"""
def get_tuplets(neo4j_driver, uuid, status, prop_key):
tuplet_uuids = schema_neo4j_queries.get_tuplets(neo4j_driver, uuid, property_key='uuid')
tuplets_uuids_string = str(tuplet_uuids)
status_query_string = ""
prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}"
if status is not None:
status_query_string = f"AND (NOT e:Dataset OR TOLOWER(e.status) = '{status}') "
if prop_key is not None:
prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e.{prop_key})) AS {record_field_name}"
results = []
query = ("MATCH (e:Entity) "
f"WHERE e.uuid IN {tuplets_uuids_string} "
f"{status_query_string}"
f"{prop_query_string}")

with neo4j_driver.session() as session:
record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)

if record and record[record_field_name]:
if prop_key:
# Just return the list of property values from each entity node
results = record[record_field_name]
else:
# Convert the list of nodes to a list of dicts
results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
return results
56 changes: 56 additions & 0 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,62 @@ def get_siblings(neo4j_driver, uuid, property_key=None):
return results


"""
Get all tuplets by uuid
Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of target entity
property_key : str
A target property key for result filtering
Returns
-------
dict
A list of unique tuplet dictionaries returned from the Cypher query
"""
def get_tuplets(neo4j_driver, uuid, property_key=None):
results = []

if property_key:
query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
# filter out the Lab entities
f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a) "
f"WHERE tuplet <> e "
# COLLECT() returns a list
# apoc.coll.toSet() returns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(tuplet.{property_key})) AS {record_field_name}")
else:
query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
# filter out the Lab entities
f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity) "
f"WHERE tuplet <> e "
# COLLECT() returns a list
# apoc.coll.toSet() returns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(tuplet)) AS {record_field_name}")


logger.info("======get_tuplets() query======")
logger.info(query)

with neo4j_driver.session() as session:
record = session.read_transaction(execute_readonly_tx, query)

if record and record[record_field_name]:
if property_key:
# Just return the list of property values from each entity node
results = record[record_field_name]
else:
# Convert the list of nodes to a list of dicts
results = nodes_to_dicts(record[record_field_name])
return results


"""
Get all ancestors by uuid
Expand Down

0 comments on commit 9f22eac

Please sign in to comment.