From ec3bfa5e50af4535236e3f09834294c4070801e1 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 30 Jan 2024 14:38:55 -0500 Subject: [PATCH] Initial implementation of tuplets endpoint --- src/app.py | 103 +++++++++++++++++++++++++++++ src/app_neo4j_queries.py | 45 +++++++++++++ src/schema/schema_neo4j_queries.py | 56 ++++++++++++++++ 3 files changed, 204 insertions(+) diff --git a/src/app.py b/src/app.py index f21be0a6..4f4f8ac5 100644 --- a/src/app.py +++ b/src/app.py @@ -1825,6 +1825,109 @@ def get_siblings(id): return jsonify(final_result) +""" +Get all tuplets of the given entit: sibling entities sharing an ancestor activity + +The gateway treats this endpoint as public accessible + +Result filtering based on query string +For example: /tuplets/?property=uuid + +Parameters +---------- +id : str + The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of given entity + +Returns +------- +json + A list of all the tuplets of the target entity +""" +@app.route('/tuplets/', methods = ['GET']) +def get_tuplets(id): + final_result = [] + + # Token is not required, but if an invalid token provided, + # we need to tell the client with a 401 error + validate_token_if_auth_header_exists(request) + + # Use the internal token to query the target entity + # since public entities don't require user token + token = get_internal_token() + + # Get the entity dict from cache if exists + # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists + entity_dict = query_target_entity(id, token) + normalized_entity_type = entity_dict['entity_type'] + uuid = entity_dict['uuid'] + + # Collection doesn't have ancestors via Activity nodes + if normalized_entity_type == 'Collection': + bad_request_error(f"Unsupported entity type of id {id}: {normalized_entity_type}") + + if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): + # Only published/public datasets don't require token + if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + # Token is required and the user must belong to HuBMAP-READ group + token = get_user_token(request, non_public_access_required = True) + elif normalized_entity_type == 'Sample': + # The `data_access_level` of Sample can only be either 'public' or 'consortium' + if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: + token = get_user_token(request, non_public_access_required = True) + else: + # Donor and Upload will always get back an empty list + # becuase their direct ancestor is Lab, which is being skipped by Neo4j query + # So no need to execute the code below + return jsonify(final_result) + + # By now, either the entity is public accessible or the user token has the correct access level + # Result filtering based on query string + status = None + property_key = None + accepted_args = ['property', 'status'] + if bool(request.args): + for arg_name in request.args.keys(): + if arg_name not in accepted_args: + bad_request_error(f"{arg_name} is an unrecognized argument") + property_key = request.args.get('property') + status = request.args.get('status') + if status is not None: + status = status.lower() + if status not in ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']: + bad_request_error("Invalid Dataset Status. Must be 'new', 'qa', or 'published' Case-Insensitive") + if property_key is not None: + property_key = property_key.lower() + result_filtering_accepted_property_keys = ['uuid'] + if property_key not in result_filtering_accepted_property_keys: + bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}") + tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key) + if property_key is not None: + return jsonify(tuplet_list) + # Generate trigger data + # Skip some of the properties that are time-consuming to generate via triggers + # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional + # checks when the target Dataset is public but the revisions are not public + properties_to_skip = [ + # Properties to skip for Sample + 'direct_ancestor', + # Properties to skip for Dataset + 'direct_ancestors', + 'collections', + 'upload', + 'title', + 'next_revision_uuid', + 'previous_revision_uuid', + 'associated_collection' + ] + + complete_entities_list = schema_manager.get_complete_entities_list(token, tuplet_list, properties_to_skip) + # Final result after normalization + final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + + return jsonify(final_result) + + + """ Get all previous revisions of the given entity Result filtering based on query string diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index adf26267..568867e4 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -1155,3 +1155,48 @@ def get_siblings(neo4j_driver, uuid, status, prop_key, include_revisions): # Convert the list of nodes to a list of dicts results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) return results + + +""" +Get all tuplets by uuid + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of target entity +property_key : str + A target property key for result filtering + +Returns +------- +dict + A list of unique tuplet dictionaries returned from the Cypher query +""" +def get_tuplets(neo4j_driver, uuid, status, prop_key): + tuplet_uuids = schema_neo4j_queries.get_tuplets(neo4j_driver, uuid, property_key='uuid') + tuplets_uuids_string = str(tuplet_uuids) + status_query_string = "" + prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}" + if status is not None: + status_query_string = f"AND (NOT e:Dataset OR TOLOWER(e.status) = '{status}') " + if prop_key is not None: + prop_query_string = f"RETURN apoc.coll.toSet(COLLECT(e.{prop_key})) AS {record_field_name}" + results = [] + query = ("MATCH (e:Entity) " + f"WHERE e.uuid IN {tuplets_uuids_string} " + f"{status_query_string}" + f"{prop_query_string}") + + with neo4j_driver.session() as session: + record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) + + if record and record[record_field_name]: + if prop_key: + # Just return the list of property values from each entity node + results = record[record_field_name] + else: + # Convert the list of nodes to a list of dicts + results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) + return results diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 8042423c..ab00b9cb 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -272,6 +272,62 @@ def get_siblings(neo4j_driver, uuid, property_key=None): return results +""" +Get all tuplets by uuid + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of target entity +property_key : str + A target property key for result filtering + +Returns +------- +dict + A list of unique tuplet dictionaries returned from the Cypher query +""" +def get_tuplets(neo4j_driver, uuid, property_key=None): + results = [] + + if property_key: + query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) " + # filter out the Lab entities + f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' " + f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a) " + f"WHERE tuplet <> e " + # COLLECT() returns a list + # apoc.coll.toSet() returns a set containing unique nodes + f"RETURN apoc.coll.toSet(COLLECT(tuplet.{property_key})) AS {record_field_name}") + else: + query = (f"MATCH (e:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) " + # filter out the Lab entities + f"WHERE e.uuid='{uuid}' AND parent.entity_type <> 'Lab' " + f"MATCH (tuplet:Entity)<-[:ACTIVITY_OUTPUT]-(a:Activity) " + f"WHERE tuplet <> e " + # COLLECT() returns a list + # apoc.coll.toSet() returns a set containing unique nodes + f"RETURN apoc.coll.toSet(COLLECT(tuplet)) AS {record_field_name}") + + + logger.info("======get_tuplets() query======") + logger.info(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(execute_readonly_tx, query) + + if record and record[record_field_name]: + if property_key: + # Just return the list of property values from each entity node + results = record[record_field_name] + else: + # Convert the list of nodes to a list of dicts + results = nodes_to_dicts(record[record_field_name]) + return results + + """ Get all ancestors by uuid