Merge pull request #610 from hubmapconsortium/Derek-Furst/tuplets

Derek furst/tuplets
hubmapconsortium · Feb 6, 2024 · 9f22eac · 9f22eac
2 parents ac4bf25 + a6c9c52
commit 9f22eac
Show file tree

Hide file tree

Showing 4 changed files with 253 additions and 0 deletions.
diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml
@@ -1544,6 +1544,51 @@ paths:
           description: The target entity could not be found
         '500':
           description: Internal error
+  '/entities/{id}/tuplets':
+    get:
+      summary: Get the tuplets list for an Entity.  The tuplets have the same parent activity node.
+      parameters:
+        - name: id
+          in: path
+          description: The unique identifier of entity.  This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
+          required: true
+          schema:
+            type: string
+        - name: status
+          in: query
+          description: A case insensitive string. Any value besides 'new', 'qa', and 'published' will raise an error. If a valid status is provided, only results matching that status (if they are datasets) will be returned
+          equired: false
+          schema:
+            type: string
+            enum: [ 'new', 'qa', 'published' ]
+        - name: property_key
+          in: query
+          description: A case insensitive string. Any value besides 'uuid' will raise an error. If property_key=uuid is provided, rather than entire dictionary representations of each node, only the list of matching uuid's will be returned
+          required: false
+          schema:
+            type: string
+            enum: [ 'uuid' ]
+      responses:
+        '200':
+          description: An array of tuplet entities is returned
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  anyOf:
+                    - $ref: '#/components/schemas/Sample'
+                    - $ref: '#/components/schemas/Dataset'
+        '400':
+          description: Invalid or misformatted entity identifier
+        '401':
+          description: The user's token has expired or the user did not supply a valid token
+        '403':
+          description: The user is not authorized to access the entity.
+        '404':
+          description: The target entity could not be found
+        '500':
+          description: Internal error
   '/entities/{id}/provenance':
     get:
       summary: 'Get Provenance Data for Entity.  This returns a PROV JSON compliant representation of the entity''s provenance. Refer to this document for more information regarding [PROV JSON format](https://www.w3.org/Submission/2013/SUBM-prov-json-20130424/)'

diff --git a/src/app.py b/src/app.py
@@ -1825,6 +1825,113 @@ def get_siblings(id):
     return jsonify(final_result)
 
 
+"""
+Get all tuplets of the given entit: sibling entities sharing an parent activity
+
+The gateway treats this endpoint as public accessible
+
+Result filtering based on query string
+For example: /entities/{id}/tuplets?property=uuid
+
+Parameters
+----------
+id : str
+    The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity
+
+Returns
+-------
+json
+    A list of all the tuplets of the target entity
+"""
+@app.route('/entities/<id>/tuplets', methods = ['GET'])
+def get_tuplets(id):
+    final_result = []
+
+    # Token is not required, but if an invalid token provided,
+    # we need to tell the client with a 401 error
+    validate_token_if_auth_header_exists(request)
+
+    # Use the internal token to query the target entity
+    # since public entities don't require user token
+    token = get_internal_token()
+
+    # Get the entity dict from cache if exists
+    # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
+    entity_dict = query_target_entity(id, token)
+    normalized_entity_type = entity_dict['entity_type']
+    uuid = entity_dict['uuid']
+
+    # Collection doesn't have ancestors via Activity nodes
+    if normalized_entity_type == 'Collection':
+        bad_request_error(f"Unsupported entity type of id {id}: {normalized_entity_type}")
+
+    if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
+        # Only published/public datasets don't require token
+        if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
+            # Token is required and the user must belong to HuBMAP-READ group
+            token = get_user_token(request, non_public_access_required = True)
+    elif normalized_entity_type == 'Sample':
+        # The `data_access_level` of Sample can only be either 'public' or 'consortium'
+        if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
+            token = get_user_token(request, non_public_access_required = True)
+    else:
+        # Donor and Upload will always get back an empty list
+        # becuase their direct ancestor is Lab, which is being skipped by Neo4j query
+        # So no need to execute the code below
+        return jsonify(final_result)
+
+    # By now, either the entity is public accessible or the user token has the correct access level
+    # Result filtering based on query string
+    status = None
+    property_key = None
+    accepted_args = ['property', 'status']
+    if bool(request.args):
+        for arg_name in request.args.keys():
+            if arg_name not in accepted_args:
+                bad_request_error(f"{arg_name} is an unrecognized argument")
+        property_key = request.args.get('property')
+        status = request.args.get('status')
+        if status is not None:
+            status = status.lower()
+            if status not in ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']:
+                bad_request_error("Invalid Dataset Status. Must be 'new', 'qa', or 'published' Case-Insensitive")
+        if property_key is not None:
+            property_key = property_key.lower()
+            result_filtering_accepted_property_keys = ['uuid']
+            if property_key not in result_filtering_accepted_property_keys:
+                bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
+    tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key)
+    if property_key is not None:
+        return jsonify(tuplet_list)
+    # Generate trigger data
+    # Skip some of the properties that are time-consuming to generate via triggers
+    # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
+    # checks when the target Dataset is public but the revisions are not public
+    properties_to_skip = [
+        # Properties to skip for Sample
+        'direct_ancestor',
+        # Properties to skip for Dataset
+        'direct_ancestors',
+        'collections',
+        'upload',
+        'title',
+        'next_revision_uuid',
+        'previous_revision_uuid',
+        'associated_collection',
+        'creation_action',
+        'local_directory_rel_path',
+        'previous_revision_uuids',
+        'next_revision-uuids'
+    ]
+
+    complete_entities_list = schema_manager.get_complete_entities_list(token, tuplet_list, properties_to_skip)
+    # Final result after normalization
+    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+
+    return jsonify(final_result)
+
+
+
 """
 Get all previous revisions of the given entity
 Result filtering based on query string

diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
@@ -1155,3 +1155,48 @@ def get_siblings(neo4j_driver, uuid, status, prop_key, include_revisions):
                 # Convert the list of nodes to a list of dicts
                 results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
     return results
+
+
+"""
+Get all tuplets by uuid
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+uuid : str
+    The uuid of target entity 
+property_key : str
+    A target property key for result filtering
+
+Returns
+-------
+dict
+    A list of unique tuplet dictionaries returned from the Cypher query
+"""
+def get_tuplets(neo4j_driver, uuid, status, prop_key):
+    tuplet_uuids = schema_neo4j_queries.get_tuplets(neo4j_driver, uuid, property_key='uuid')
+    tuplets_uuids_string = str(tuplet_uuids)
+    status_query_string = ""
+    prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}"
+    if status is not None:
+        status_query_string = f"AND (NOT e:Dataset OR TOLOWER(e.status) = '{status}') "
+    if prop_key is not None:
+        prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e.{prop_key})) AS {record_field_name}"
+    results = []
+    query = ("MATCH (e:Entity) "
+             f"WHERE e.uuid IN {tuplets_uuids_string} "
+             f"{status_query_string}"
+             f"{prop_query_string}")
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
+
+        if record and record[record_field_name]:
+            if prop_key:
+                # Just return the list of property values from each entity node
+                results = record[record_field_name]
+            else:
+                # Convert the list of nodes to a list of dicts
+                results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
+    return results
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
@@ -272,6 +272,62 @@ def get_siblings(neo4j_driver, uuid, property_key=None):
     return results
 
 
+"""
+Get all tuplets by uuid
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+uuid : str
+    The uuid of target entity 
+property_key : str
+    A target property key for result filtering
+
+Returns
+-------
+dict
+    A list of unique tuplet dictionaries returned from the Cypher query
+"""
+def get_tuplets(neo4j_driver, uuid, property_key=None):
+    results = []
+
+    if property_key:
+        query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
+                 # filter out the Lab entities
+                 f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
+                 f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a) "
+                 f"WHERE tuplet <> e "
+                 # COLLECT() returns a list
+                 # apoc.coll.toSet() returns a set containing unique nodes
+                 f"RETURN apoc.coll.toSet(COLLECT(tuplet.{property_key})) AS {record_field_name}")
+    else:
+        query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
+                 # filter out the Lab entities
+                 f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
+                 f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity) "
+                 f"WHERE tuplet <> e "
+                 # COLLECT() returns a list
+                 # apoc.coll.toSet() returns a set containing unique nodes
+                 f"RETURN apoc.coll.toSet(COLLECT(tuplet)) AS {record_field_name}")
+
+
+    logger.info("======get_tuplets() query======")
+    logger.info(query)
+
+    with neo4j_driver.session() as session:
+        record = session.read_transaction(execute_readonly_tx, query)
+
+        if record and record[record_field_name]:
+            if property_key:
+                # Just return the list of property values from each entity node
+                results = record[record_field_name]
+            else:
+                # Convert the list of nodes to a list of dicts
+                results = nodes_to_dicts(record[record_field_name])
+    return results
+
+
 """
 Get all ancestors by uuid