Initial implementation of tuplets endpoint

hubmapconsortium · Jan 30, 2024 · ec3bfa5 · ec3bfa5
1 parent 8133c93
commit ec3bfa5
Show file tree

Hide file tree

Showing 3 changed files with 204 additions and 0 deletions.
diff --git a/src/app.py b/src/app.py
@@ -1825,6 +1825,109 @@ def get_siblings(id):
     return jsonify(final_result)
 
 
+"""
+Get all tuplets of the given entit: sibling entities sharing an ancestor activity
+
+The gateway treats this endpoint as public accessible
+
+Result filtering based on query string
+For example: /tuplets/<id>?property=uuid
+
+Parameters
+----------
+id : str
+    The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
+
+Returns
+-------
+json
+    A list of all the tuplets of the target entity
+"""
+@app.route('/tuplets/<id>', methods = ['GET'])
+def get_tuplets(id):
+    final_result = []
+
+    # Token is not required, but if an invalid token provided,
+    # we need to tell the client with a 401 error
+    validate_token_if_auth_header_exists(request)
+
+    # Use the internal token to query the target entity
+    # since public entities don't require user token
+    token = get_internal_token()
+
+    # Get the entity dict from cache if exists
+    # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
+    entity_dict = query_target_entity(id, token)
+    normalized_entity_type = entity_dict['entity_type']
+    uuid = entity_dict['uuid']
+
+    # Collection doesn't have ancestors via Activity nodes
+    if normalized_entity_type == 'Collection':
+        bad_request_error(f"Unsupported entity type of id {id}: {normalized_entity_type}")
+
+    if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
+        # Only published/public datasets don't require token
+        if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
+            # Token is required and the user must belong to HuBMAP-READ group
+            token = get_user_token(request, non_public_access_required = True)
+    elif normalized_entity_type == 'Sample':
+        # The `data_access_level` of Sample can only be either 'public' or 'consortium'
+        if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
+            token = get_user_token(request, non_public_access_required = True)
+    else:
+        # Donor and Upload will always get back an empty list
+        # becuase their direct ancestor is Lab, which is being skipped by Neo4j query
+        # So no need to execute the code below
+        return jsonify(final_result)
+
+    # By now, either the entity is public accessible or the user token has the correct access level
+    # Result filtering based on query string
+    status = None
+    property_key = None
+    accepted_args = ['property', 'status']
+    if bool(request.args):
+        for arg_name in request.args.keys():
+            if arg_name not in accepted_args:
+                bad_request_error(f"{arg_name} is an unrecognized argument")
+        property_key = request.args.get('property')
+        status = request.args.get('status')
+        if status is not None:
+            status = status.lower()
+            if status not in ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']:
+                bad_request_error("Invalid Dataset Status. Must be 'new', 'qa', or 'published' Case-Insensitive")
+        if property_key is not None:
+            property_key = property_key.lower()
+            result_filtering_accepted_property_keys = ['uuid']
+            if property_key not in result_filtering_accepted_property_keys:
+                bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
+    tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key)
+    if property_key is not None:
+        return jsonify(tuplet_list)
+    # Generate trigger data
+    # Skip some of the properties that are time-consuming to generate via triggers
+    # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
+    # checks when the target Dataset is public but the revisions are not public
+    properties_to_skip = [
+        # Properties to skip for Sample
+        'direct_ancestor',
+        # Properties to skip for Dataset
+        'direct_ancestors',
+        'collections',
+        'upload',
+        'title',
+        'next_revision_uuid',
+        'previous_revision_uuid',
+        'associated_collection'
+    ]
+
+    complete_entities_list = schema_manager.get_complete_entities_list(token, tuplet_list, properties_to_skip)
+    # Final result after normalization
+    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+
+    return jsonify(final_result)
+
+
+
 """
 Get all previous revisions of the given entity
 Result filtering based on query string

diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
@@ -1155,3 +1155,48 @@ def get_siblings(neo4j_driver, uuid, status, prop_key, include_revisions):
                 # Convert the list of nodes to a list of dicts
                 results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
     return results
+
+
+"""
+Get all tuplets by uuid
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+uuid : str
+    The uuid of target entity 
+property_key : str
+    A target property key for result filtering
+
+Returns
+-------
+dict
+    A list of unique tuplet dictionaries returned from the Cypher query
+"""
+def get_tuplets(neo4j_driver, uuid, status, prop_key):
+    tuplet_uuids = schema_neo4j_queries.get_tuplets(neo4j_driver, uuid, property_key='uuid')
+    tuplets_uuids_string = str(tuplet_uuids)
+    status_query_string = ""
+    prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}"
+    if status is not None:
+        status_query_string = f"AND (NOT e:Dataset OR TOLOWER(e.status) = '{status}') "
+    if prop_key is not None:
+        prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e.{prop_key})) AS {record_field_name}"
+    results = []
+    query = ("MATCH (e:Entity) "
+             f"WHERE e.uuid IN {tuplets_uuids_string} "
+             f"{status_query_string}"
+             f"{prop_query_string}")
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
+
+        if record and record[record_field_name]:
+            if prop_key:
+                # Just return the list of property values from each entity node
+                results = record[record_field_name]
+            else:
+                # Convert the list of nodes to a list of dicts
+                results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
+    return results
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
@@ -272,6 +272,62 @@ def get_siblings(neo4j_driver, uuid, property_key=None):
     return results
 
 
+"""
+Get all tuplets by uuid
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+uuid : str
+    The uuid of target entity 
+property_key : str
+    A target property key for result filtering
+
+Returns
+-------
+dict
+    A list of unique tuplet dictionaries returned from the Cypher query
+"""
+def get_tuplets(neo4j_driver, uuid, property_key=None):
+    results = []
+
+    if property_key:
+        query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
+                 # filter out the Lab entities
+                 f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
+                 f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a) "
+                 f"WHERE tuplet <> e "
+                 # COLLECT() returns a list
+                 # apoc.coll.toSet() returns a set containing unique nodes
+                 f"RETURN apoc.coll.toSet(COLLECT(tuplet.{property_key})) AS {record_field_name}")
+    else:
+        query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
+                 # filter out the Lab entities
+                 f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
+                 f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity) "
+                 f"WHERE tuplet <> e "
+                 # COLLECT() returns a list
+                 # apoc.coll.toSet() returns a set containing unique nodes
+                 f"RETURN apoc.coll.toSet(COLLECT(tuplet)) AS {record_field_name}")
+
+
+    logger.info("======get_tuplets() query======")
+    logger.info(query)
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(execute_readonly_tx, query)
+
+        if record and record[record_field_name]:
+            if property_key:
+                # Just return the list of property values from each entity node
+                results = record[record_field_name]
+            else:
+                # Convert the list of nodes to a list of dicts
+                results = nodes_to_dicts(record[record_field_name])
+    return results
+
+
 """
 Get all ancestors by uuid