From 071d3ab8f0b1a3fb55328cd5421604bb95d4e4f8 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 28 Sep 2023 15:00:01 -0400 Subject: [PATCH 01/16] work in progress multiple components endpoint --- src/app.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/app.py b/src/app.py index 69aa1cc7..f9d9c439 100644 --- a/src/app.py +++ b/src/app.py @@ -3785,6 +3785,22 @@ def paired_dataset(id): return jsonify(out_list), 200 +""" +Description +""" +@app.route('datasets/components', methods=['POST']) +def multiple_components(): + if READ_ONLY_MODE: + forbidden_error("Access not granted when entity-api in READ-ONLY mode") + + # If an invalid token provided, we need to tell the client with a 401 error, rather + # than a 500 error later if the token is not good. + validate_token_if_auth_header_exists(request) + # Get user token from Authorization header + user_token = get_user_token(request) + + + #################################################################################################### ## Internal Functions #################################################################################################### From 9726734ba08b2f60a403e8730b80091face180e0 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 5 Oct 2023 13:50:20 -0400 Subject: [PATCH 02/16] Work in progress create multiple components endpoint --- src/app.py | 96 +++++++++++++++++++++++++++++- src/schema/schema_neo4j_queries.py | 52 ++++++++++++++++ 2 files changed, 147 insertions(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index f9d9c439..017818ff 100644 --- a/src/app.py +++ b/src/app.py @@ -3788,7 +3788,7 @@ def paired_dataset(id): """ Description """ -@app.route('datasets/components', methods=['POST']) +@app.route('/datasets/components', methods=['POST']) def multiple_components(): if READ_ONLY_MODE: forbidden_error("Access not granted when entity-api in READ-ONLY mode") @@ -3799,6 +3799,100 @@ def multiple_components(): # Get user token from Authorization header user_token = get_user_token(request) + schema_validators.validate_application_header_before_entity_create("Dataset", request) + + require_json(request) + + json_data_dict = request.get_json() + + required_fields = ['creation_action', 'group_uuid', 'direct_ancestor_uuids', 'datasets'] + # dataset_required_fields = ['dataset_link_abs_dir', 'contains_human_genetic_sequences', 'data_types'] + + # Verify that each field is in the json_data_dict, and that there are no other fields + for field in required_fields: + if field not in json_data_dict: + raise bad_request_error(f"Missing required field {field}") + for field in json_data_dict: + if field not in required_fields: + raise bad_request_error(f"Request body contained unexpected field {field}") + + user_info_dict = schema_manager.get_user_info(request) + return jsonify(user_info_dict) + + new_data_dict = {**json_data_dict, **user_info_dict} + + # validate top level fields + + # validate group_uuid + schema_triggers.set_group_uuid("group_uuid", "Dataset", user_token, {}, new_data_dict) + + allowable_creation_actions = ['Multi-Assay Split'] + + if json_data_dict.get('creation_action') not in allowable_creation_actions: + bad_request_error(f"creation_action {json_data_dict.get('creation_action')} not recognized. Allowed values are: {COMMA_SEPARATOR.join(allowable_creation_actions)}") + + # validate existence of direct ancestors. + for direct_ancestor_uuid in json_data_dict['direct_ancestor_uuids']: + direct_ancestor_dict = query_target_entity(direct_ancestor_uuid, user_token) + + dataset_list = [] + + for dataset in json_data_dict.get('datasets'): + dataset['group_uuid'] = json_data_dict.get('group_uuid') + dataset['direct_ancestor_uuids'] = json_Data_dict.get('direct_ancestor_uuids') + try: + schema_manager.validate_json_data_against_schema(dataset, 'Dataset') + except schema_errors.SchemaValidationException as e: + # No need to log validation errors + bad_request_error(str(e)) + # Execute property level validators defined in the schema yaml before entity property creation + # Use empty dict {} to indicate there's no existing_data_dict + try: + schema_manager.execute_property_level_validators('before_property_create_validators', normalized_entity_type, request, {}, json_data_dict) + # Currently only ValueError + except ValueError as e: + bad_request_error(e) + + # Also check existence of the previous revision dataset if specified + if 'previous_revision_uuid' in json_data_dict: + previous_version_dict = query_target_entity(json_data_dict['previous_revision_uuid'], user_token) + + # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) + if previous_version_dict['entity_type'] not in ['Sample'] and \ + not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): + bad_request_error( + f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication") + + # Also need to validate if the given 'previous_revision_uuid' has already had + # an existing next revision + # Only return a list of the uuids, no need to get back the list of dicts + next_revisions_list = app_neo4j_queries.get_next_revisions(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid') + + # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' + if next_revisions_list: + bad_request_error( + f"The previous_revision_uuid specified for this dataset has already had a next revision") + + # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified + # by previous_revision_uuid is published. Else, bad request error. + if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") + + for dataset in json_data_dict.get('datasets'): + merged_dict = create_entity_details(request, "Dataset", user_token, json_data_dict) + schema_triggers.set_status_history('status', 'Dataset', user_token, merged_dict, {}) + schema_triggers.link_to_previous_revision('previous_revision_uuid', 'Dataset', user_token, merged_dict, {}) + dataset_list.append(merged_dict) + + # Generate property values for Activity node + # Can grab the existing data from the first dataset in the list + activity_data_dict = schema_manager.generate_activity_data('Dataset', user_token, dataset_list[0]) + + + + for data_dict in dataset_list: + + #################################################################################################### diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index b23d33ba..0d8dc787 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -554,6 +554,58 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u raise TransactionError(msg) +""" +Create or recreate one or more linkages (via Activity nodes) +between the target entity nodes and the direct ancestor node in neo4j + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +entity_uuids : list + List of the uuids of target child entities +direct_ancestor_uuid : str + The uuid of direct ancestors +activity_data_dict : dict + A dict of activity properties to be created +""" + + +def link_multiple_entities_to_direct_ancestor(neo4j_driver, entity_uuids, direct_ancestor_uuid, activity_data_dict): + try: + with neo4j_driver.session() as session: + tx = session.begin_transaction() + + # First delete all the old linkages and Activity node between this entity and its direct ancestors + _delete_activity_node_and_linkages_tx(tx, entity_uuid) + + # Get the activity uuid + activity_uuid = activity_data_dict['uuid'] + + # Create the Acvitity node + create_activity_tx(tx, activity_data_dict) + + # Create relationship from this Activity node to the target entity node + create_relationship_tx(tx, activity_uuid, entity_uuid, 'ACTIVITY_OUTPUT', '->') + + # Create relationship from each ancestor entity node to this Activity node + for direct_ancestor_uuid in direct_ancestor_uuids: + create_relationship_tx(tx, direct_ancestor_uuid, activity_uuid, 'ACTIVITY_INPUT', '->') + + tx.commit() + except TransactionError as te: + msg = "TransactionError from calling link_entity_to_direct_ancestors(): " + # Log the full stack trace, prepend a line with our message + logger.exception(msg) + + if tx.closed() == False: + # Log the full stack trace, prepend a line with our message + logger.info("Failed to commit link_entity_to_direct_ancestors() transaction, rollback") + tx.rollback() + + raise TransactionError(msg) + + """ Create or recreate linkage between the publication node and the associated collection node in neo4j From a1c8c5cf3ee3436a7b2b9217449e5af2235f0823 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 6 Oct 2023 15:16:34 -0400 Subject: [PATCH 03/16] work in progress multiple components endpoint --- src/app.py | 8 ++++---- src/schema/schema_neo4j_queries.py | 20 +++++++++----------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/app.py b/src/app.py index 017818ff..d49b1049 100644 --- a/src/app.py +++ b/src/app.py @@ -3806,7 +3806,6 @@ def multiple_components(): json_data_dict = request.get_json() required_fields = ['creation_action', 'group_uuid', 'direct_ancestor_uuids', 'datasets'] - # dataset_required_fields = ['dataset_link_abs_dir', 'contains_human_genetic_sequences', 'data_types'] # Verify that each field is in the json_data_dict, and that there are no other fields for field in required_fields: @@ -3817,7 +3816,6 @@ def multiple_components(): raise bad_request_error(f"Request body contained unexpected field {field}") user_info_dict = schema_manager.get_user_info(request) - return jsonify(user_info_dict) new_data_dict = {**json_data_dict, **user_info_dict} @@ -3888,10 +3886,12 @@ def multiple_components(): # Can grab the existing data from the first dataset in the list activity_data_dict = schema_manager.generate_activity_data('Dataset', user_token, dataset_list[0]) + entity_uuid_list = [] + for dataset in dataset_list: + entity_uuid_list.append(dataset['uuid']) - for data_dict in dataset_list: - + schema_neo4j_queries.link_multiple_entities_to_direct_ancestor(neo4j_driver_instance, entity_uuid_list, , activity_data_dict) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 0d8dc787..888387de 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -569,28 +569,26 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u activity_data_dict : dict A dict of activity properties to be created """ - - def link_multiple_entities_to_direct_ancestor(neo4j_driver, entity_uuids, direct_ancestor_uuid, activity_data_dict): try: with neo4j_driver.session() as session: tx = session.begin_transaction() - # First delete all the old linkages and Activity node between this entity and its direct ancestors - _delete_activity_node_and_linkages_tx(tx, entity_uuid) + # Create the Acvitity node + create_activity_tx(tx, activity_data_dict) # Get the activity uuid activity_uuid = activity_data_dict['uuid'] - # Create the Acvitity node - create_activity_tx(tx, activity_data_dict) + for entity_uuid in entity_uuids: + # First delete all the old linkages and Activity node between this entity and its direct ancestors + _delete_activity_node_and_linkages_tx(tx, entity_uuid) - # Create relationship from this Activity node to the target entity node - create_relationship_tx(tx, activity_uuid, entity_uuid, 'ACTIVITY_OUTPUT', '->') + # Create relationship from this Activity node to the target entity node + create_relationship_tx(tx, activity_uuid, entity_uuid, 'ACTIVITY_OUTPUT', '->') - # Create relationship from each ancestor entity node to this Activity node - for direct_ancestor_uuid in direct_ancestor_uuids: - create_relationship_tx(tx, direct_ancestor_uuid, activity_uuid, 'ACTIVITY_INPUT', '->') + # Create relationship from the ancestor entity node to this Activity node + create_relationship_tx(tx, direct_ancestor_uuid, activity_uuid, 'ACTIVITY_INPUT', '->') tx.commit() except TransactionError as te: From 82a8b9c09d7f9a4b160a00a467b879df1319a511 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 6 Oct 2023 15:48:27 -0400 Subject: [PATCH 04/16] multiple components implemented. Testing required. Output returned tbd --- src/app.py | 6 +++--- src/schema/schema_neo4j_queries.py | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/app.py b/src/app.py index d49b1049..5f6d0032 100644 --- a/src/app.py +++ b/src/app.py @@ -3817,12 +3817,12 @@ def multiple_components(): user_info_dict = schema_manager.get_user_info(request) - new_data_dict = {**json_data_dict, **user_info_dict} + json_data_with_user_info_dict = {**json_data_dict, **user_info_dict} # validate top level fields # validate group_uuid - schema_triggers.set_group_uuid("group_uuid", "Dataset", user_token, {}, new_data_dict) + schema_triggers.set_group_uuid("group_uuid", "Dataset", user_token, {}, json_data_with_user_info_dict) allowable_creation_actions = ['Multi-Assay Split'] @@ -3891,7 +3891,7 @@ def multiple_components(): for dataset in dataset_list: entity_uuid_list.append(dataset['uuid']) - schema_neo4j_queries.link_multiple_entities_to_direct_ancestor(neo4j_driver_instance, entity_uuid_list, , activity_data_dict) + schema_neo4j_queries.link_multiple_entities_to_direct_ancestors(neo4j_driver_instance, entity_uuid_list, json_data_dict['direct_ancestor_uuids'], activity_data_dict) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 888387de..5b95da03 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -556,7 +556,7 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u """ Create or recreate one or more linkages (via Activity nodes) -between the target entity nodes and the direct ancestor node in neo4j +between the target entity nodes and the direct ancestor nodes in neo4j Parameters ---------- @@ -564,12 +564,12 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u The neo4j database connection pool entity_uuids : list List of the uuids of target child entities -direct_ancestor_uuid : str - The uuid of direct ancestors +direct_ancestor_uuid : list + The uuids of direct ancestors activity_data_dict : dict A dict of activity properties to be created """ -def link_multiple_entities_to_direct_ancestor(neo4j_driver, entity_uuids, direct_ancestor_uuid, activity_data_dict): +def link_multiple_entities_to_direct_ancestors(neo4j_driver, entity_uuids, direct_ancestor_uuids, activity_data_dict): try: with neo4j_driver.session() as session: tx = session.begin_transaction() @@ -587,8 +587,10 @@ def link_multiple_entities_to_direct_ancestor(neo4j_driver, entity_uuids, direct # Create relationship from this Activity node to the target entity node create_relationship_tx(tx, activity_uuid, entity_uuid, 'ACTIVITY_OUTPUT', '->') - # Create relationship from the ancestor entity node to this Activity node - create_relationship_tx(tx, direct_ancestor_uuid, activity_uuid, 'ACTIVITY_INPUT', '->') + # Create relationship from each ancestor entity node to this Activity node + for direct_ancestor_uuid in direct_ancestor_uuids: + create_relationship_tx(tx, direct_ancestor_uuid, activity_uuid, 'ACTIVITY_INPUT', '->') + tx.commit() except TransactionError as te: From fc85743f5e2f1ca72732c42def13e11ef8b969ca Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 6 Oct 2023 15:49:53 -0400 Subject: [PATCH 05/16] multiple components implemented. Testing required. Output returned tbd. For now, returns uuids of the entities created. --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 5f6d0032..8fbd00ee 100644 --- a/src/app.py +++ b/src/app.py @@ -3892,7 +3892,7 @@ def multiple_components(): entity_uuid_list.append(dataset['uuid']) schema_neo4j_queries.link_multiple_entities_to_direct_ancestors(neo4j_driver_instance, entity_uuid_list, json_data_dict['direct_ancestor_uuids'], activity_data_dict) - + return jsonify(entity_uuid_list) #################################################################################################### From 860f0edd4dc45f9daa9ebb59ebbaf07297211bad Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 6 Oct 2023 16:55:40 -0400 Subject: [PATCH 06/16] Applied some bug fixing as well as modified the output to include a list containing both datasets. Still need to apply the same modifications and read triggers done with create_entity. Creation action field is not currently supported in the triggers. Need to merge in changes from creation_action branch --- src/app.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/app.py b/src/app.py index 8fbd00ee..57fc130a 100644 --- a/src/app.py +++ b/src/app.py @@ -27,6 +27,7 @@ from schema import schema_manager from schema import schema_errors from schema import schema_triggers +from schema import schema_validators from schema import schema_neo4j_queries from schema.schema_constants import SchemaConstants from schema.schema_constants import DataVisibilityEnum @@ -3798,8 +3799,10 @@ def multiple_components(): validate_token_if_auth_header_exists(request) # Get user token from Authorization header user_token = get_user_token(request) - - schema_validators.validate_application_header_before_entity_create("Dataset", request) + try: + schema_validators.validate_application_header_before_entity_create("Dataset", request) + except Exception as e: + bad_request_error(str(e)) require_json(request) @@ -3836,8 +3839,13 @@ def multiple_components(): dataset_list = [] for dataset in json_data_dict.get('datasets'): + # dataset_link_abs_dir is not part of the entity creation, will not be stored in neo4j and does not require + # validation. Remove it here and add it back after validation. We do the same for creating the entities. Doing + # this makes it easier to keep the dataset_link_abs_dir with the associated dataset instead of adding additional lists and keeping track of which value is tied to which dataset + dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) dataset['group_uuid'] = json_data_dict.get('group_uuid') - dataset['direct_ancestor_uuids'] = json_Data_dict.get('direct_ancestor_uuids') + dataset['direct_ancestor_uuids'] = json_data_dict.get('direct_ancestor_uuids') + dataset['creation_action'] = json_data_dict.get('creation_action') try: schema_manager.validate_json_data_against_schema(dataset, 'Dataset') except schema_errors.SchemaValidationException as e: @@ -3846,7 +3854,7 @@ def multiple_components(): # Execute property level validators defined in the schema yaml before entity property creation # Use empty dict {} to indicate there's no existing_data_dict try: - schema_manager.execute_property_level_validators('before_property_create_validators', normalized_entity_type, request, {}, json_data_dict) + schema_manager.execute_property_level_validators('before_property_create_validators', "Dataset", request, {}, json_data_dict) # Currently only ValueError except ValueError as e: bad_request_error(e) @@ -3875,11 +3883,15 @@ def multiple_components(): # by previous_revision_uuid is published. Else, bad request error. if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") + # Add back in dataset_link_abs_dir + dataset['dataset_link_abs_dir'] = dataset_link_abs_dir for dataset in json_data_dict.get('datasets'): - merged_dict = create_entity_details(request, "Dataset", user_token, json_data_dict) - schema_triggers.set_status_history('status', 'Dataset', user_token, merged_dict, {}) - schema_triggers.link_to_previous_revision('previous_revision_uuid', 'Dataset', user_token, merged_dict, {}) + # Remove dataset_link_abs_dir once more before entity creation + dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) + merged_dict = create_entity_details(request, "Dataset", user_token, dataset) + # Add back in dataset_link_abs_dir + merged_dict['dataset_link_abs_dir'] = dataset_link_abs_dir dataset_list.append(merged_dict) # Generate property values for Activity node @@ -3892,7 +3904,15 @@ def multiple_components(): entity_uuid_list.append(dataset['uuid']) schema_neo4j_queries.link_multiple_entities_to_direct_ancestors(neo4j_driver_instance, entity_uuid_list, json_data_dict['direct_ancestor_uuids'], activity_data_dict) - return jsonify(entity_uuid_list) + + # We wait until after the new datasets are linked to their ancestor before performing the remaining post-creation + # linkeages. This way, in the event of unforseen errors, we don't have orphaned nodes. + for dataset in dataset_list: + schema_triggers.set_status_history('status', 'Dataset', user_token, merged_dict, {}) + if merged_dict.get('previous_revision_uuid'): + schema_triggers.link_to_previous_revision('previous_revision_uuid', 'Dataset', user_token, merged_dict, {}) + + return jsonify(dataset_list) #################################################################################################### From a86ec29ed1e1f9dea5a32a68118cf3654552b676 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 6 Oct 2023 17:23:15 -0400 Subject: [PATCH 07/16] Fixed some more bugs. Commented out creation_action until that field has its support merged in. Normalized output. Added reindex call like is done on a normal create entity request. --- src/app.py | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 57fc130a..6a53fb1c 100644 --- a/src/app.py +++ b/src/app.py @@ -3845,7 +3845,8 @@ def multiple_components(): dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) dataset['group_uuid'] = json_data_dict.get('group_uuid') dataset['direct_ancestor_uuids'] = json_data_dict.get('direct_ancestor_uuids') - dataset['creation_action'] = json_data_dict.get('creation_action') + # TODO re-enable the following line once creattion_action support is merged in + # dataset['creation_action'] = json_data_dict.get('creation_action') try: schema_manager.validate_json_data_against_schema(dataset, 'Dataset') except schema_errors.SchemaValidationException as e: @@ -3912,7 +3913,44 @@ def multiple_components(): if merged_dict.get('previous_revision_uuid'): schema_triggers.link_to_previous_revision('previous_revision_uuid', 'Dataset', user_token, merged_dict, {}) - return jsonify(dataset_list) + properties_to_skip = [ + 'direct_ancestors', + 'collections', + 'upload', + 'title', + 'previous_revision_uuid', + 'next_revision_uuid' + ] + + if bool(request.args): + # The parsed query string value is a string 'true' + return_all_properties = request.args.get('return_all_properties') + + if (return_all_properties is not None) and (return_all_properties.lower() == 'true'): + properties_to_skip = [] + + normalized_complete_entity_list = [] + for dataset in dataset_list: + # Remove dataset_link_abs_dir once more before entity creation + dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) + # Generate the filtered or complete entity dict to send back + complete_dict = schema_manager.get_complete_entity_result(user_token, dataset, properties_to_skip) + + # Will also filter the result based on schema + normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) + + + # Also index the new entity node in elasticsearch via search-api + logger.log(logging.INFO + ,f"Re-indexing for creation of {complete_dict['entity_type']}" + f" with UUID {complete_dict['uuid']}") + reindex_entity(complete_dict['uuid'], user_token) + # Add back in dataset_link_abs_dir one last time + normalized_complete_dict['dataset_link_abs_dir'] = dataset_link_abs_dir + normalized_complete_entity_list.append(normalized_complete_dict) + + return jsonify(normalized_complete_entity_list) + #################################################################################################### From bf8a67ab24382291caead8377b8f023c4863e5bc Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 9 Oct 2023 14:12:38 -0400 Subject: [PATCH 08/16] merged in creation_action support. Fixed a validation bug: removed creation action from the dataset since this is set manually act activity creation time. Replaced json_data_dict with dataset where it was put mistakenly during validation. --- src/app.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/app.py b/src/app.py index 8ae95211..2acfc449 100644 --- a/src/app.py +++ b/src/app.py @@ -3845,8 +3845,6 @@ def multiple_components(): dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) dataset['group_uuid'] = json_data_dict.get('group_uuid') dataset['direct_ancestor_uuids'] = json_data_dict.get('direct_ancestor_uuids') - # TODO re-enable the following line once creattion_action support is merged in - # dataset['creation_action'] = json_data_dict.get('creation_action') try: schema_manager.validate_json_data_against_schema(dataset, 'Dataset') except schema_errors.SchemaValidationException as e: @@ -3855,14 +3853,14 @@ def multiple_components(): # Execute property level validators defined in the schema yaml before entity property creation # Use empty dict {} to indicate there's no existing_data_dict try: - schema_manager.execute_property_level_validators('before_property_create_validators', "Dataset", request, {}, json_data_dict) + schema_manager.execute_property_level_validators('before_property_create_validators', "Dataset", request, {}, dataset) # Currently only ValueError except ValueError as e: bad_request_error(e) # Also check existence of the previous revision dataset if specified - if 'previous_revision_uuid' in json_data_dict: - previous_version_dict = query_target_entity(json_data_dict['previous_revision_uuid'], user_token) + if 'previous_revision_uuid' in dataset: + previous_version_dict = query_target_entity(dataset['previous_revision_uuid'], user_token) # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) if previous_version_dict['entity_type'] not in ['Sample'] and \ @@ -3898,6 +3896,7 @@ def multiple_components(): # Generate property values for Activity node # Can grab the existing data from the first dataset in the list activity_data_dict = schema_manager.generate_activity_data('Dataset', user_token, dataset_list[0]) + activity_data_dict['creation_action'] = json_data_dict.get('creation_action') entity_uuid_list = [] From f6fbae2a64dd6a8aa2ca574f8493030ea25187d7 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 13 Oct 2023 14:50:18 -0400 Subject: [PATCH 09/16] Reorganized multiple components endpoint to more closely mirror existing multiple samples endpoint. --- src/app.py | 240 +++++++++++++++++++++-------- src/app_neo4j_queries.py | 67 ++++++++ src/schema/schema_neo4j_queries.py | 52 ------- 3 files changed, 242 insertions(+), 117 deletions(-) diff --git a/src/app.py b/src/app.py index 2acfc449..95a653ae 100644 --- a/src/app.py +++ b/src/app.py @@ -3787,13 +3787,34 @@ def paired_dataset(id): """ -Description +Create multiple component datasets from a single Multi-Assay ancestor + +Input +----- +json + A json object with the fields: + creation_action + - type: str + - description: the action event that will describe the activity node. Allowed valuese are: "Multi-Assay Split" + group_uuid + - type: str + - description: the group uuid for the new component datasets + direct_ancestor_uuid + - type: str + - description: the uuid for the parent multi assay dataset + datasets + - type: dict + - description: the datasets to be created. Only difference between these and normal datasets are the field "dataset_link_abs_dir" + +Returns +-------- +json array + List of uuids of the newly created component datasets """ @app.route('/datasets/components', methods=['POST']) def multiple_components(): if READ_ONLY_MODE: forbidden_error("Access not granted when entity-api in READ-ONLY mode") - # If an invalid token provided, we need to tell the client with a 401 error, rather # than a 500 error later if the token is not good. validate_token_if_auth_header_exists(request) @@ -3803,14 +3824,13 @@ def multiple_components(): schema_validators.validate_application_header_before_entity_create("Dataset", request) except Exception as e: bad_request_error(str(e)) - require_json(request) - json_data_dict = request.get_json() + ######### validate top level properties ######## + # Verify that each required field is in the json_data_dict, and that there are no other fields + json_data_dict = request.get_json() required_fields = ['creation_action', 'group_uuid', 'direct_ancestor_uuids', 'datasets'] - - # Verify that each field is in the json_data_dict, and that there are no other fields for field in required_fields: if field not in json_data_dict: raise bad_request_error(f"Missing required field {field}") @@ -3818,33 +3838,34 @@ def multiple_components(): if field not in required_fields: raise bad_request_error(f"Request body contained unexpected field {field}") - user_info_dict = schema_manager.get_user_info(request) - - json_data_with_user_info_dict = {**json_data_dict, **user_info_dict} - - # validate top level fields - - # validate group_uuid - schema_triggers.set_group_uuid("group_uuid", "Dataset", user_token, {}, json_data_with_user_info_dict) - + # validate creation_action allowable_creation_actions = ['Multi-Assay Split'] - if json_data_dict.get('creation_action') not in allowable_creation_actions: bad_request_error(f"creation_action {json_data_dict.get('creation_action')} not recognized. Allowed values are: {COMMA_SEPARATOR.join(allowable_creation_actions)}") + # While we accept a list of direct_ancestor_uuids, we currently only allow a single direct ancestor so verify that there is only 1 + direct_ancestor_uuids = json_data_dict.get('direct_ancestor_uuids') + if direct_ancestor_uuids is None or not isinstance(direct_ancestor_uuids, list) or len(direct_ancestor_uuids) !=1: + bad_request_error(f"Required field 'direct_ancestor_uuids' must be a list. This list may only contain 1 item: a string representing the uuid of the direct ancestor") + # validate existence of direct ancestors. - for direct_ancestor_uuid in json_data_dict['direct_ancestor_uuids']: + for direct_ancestor_uuid in direct_ancestor_uuids: direct_ancestor_dict = query_target_entity(direct_ancestor_uuid, user_token) + if direct_ancestor_dict.get('entity_type').lower() != "dataset": + bad_request_error(f"Direct ancestor is of type: {direct_ancestor_dict.get('entity_type')}. Must be of type 'dataset'.") - dataset_list = [] + # validate that there are 2 and only 2 datasets in the dataset list + if len(json_data_dict.get('datasets')) != 2: + bad_request_error(f"'datasets' field must contain 2 component datasets.") + # Validate all datasets using existing schema with triggers and validators for dataset in json_data_dict.get('datasets'): # dataset_link_abs_dir is not part of the entity creation, will not be stored in neo4j and does not require # validation. Remove it here and add it back after validation. We do the same for creating the entities. Doing # this makes it easier to keep the dataset_link_abs_dir with the associated dataset instead of adding additional lists and keeping track of which value is tied to which dataset dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) dataset['group_uuid'] = json_data_dict.get('group_uuid') - dataset['direct_ancestor_uuids'] = json_data_dict.get('direct_ancestor_uuids') + dataset['direct_ancestor_uuids'] = direct_ancestor_uuids try: schema_manager.validate_json_data_against_schema(dataset, 'Dataset') except schema_errors.SchemaValidationException as e: @@ -3858,59 +3879,15 @@ def multiple_components(): except ValueError as e: bad_request_error(e) - # Also check existence of the previous revision dataset if specified - if 'previous_revision_uuid' in dataset: - previous_version_dict = query_target_entity(dataset['previous_revision_uuid'], user_token) - - # Make sure the previous version entity is either a Dataset or Sample (and publication 2/17/23) - if previous_version_dict['entity_type'] not in ['Sample'] and \ - not schema_manager.entity_type_instanceof(previous_version_dict['entity_type'], 'Dataset'): - bad_request_error( - f"The previous_revision_uuid specified for this dataset must be either a Dataset or Sample or Publication") - - # Also need to validate if the given 'previous_revision_uuid' has already had - # an existing next revision - # Only return a list of the uuids, no need to get back the list of dicts - next_revisions_list = app_neo4j_queries.get_next_revisions(neo4j_driver_instance, previous_version_dict['uuid'], 'uuid') - - # As long as the list is not empty, tell the users to use a different 'previous_revision_uuid' - if next_revisions_list: - bad_request_error( - f"The previous_revision_uuid specified for this dataset has already had a next revision") - - # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified - # by previous_revision_uuid is published. Else, bad request error. - if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: - bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") # Add back in dataset_link_abs_dir dataset['dataset_link_abs_dir'] = dataset_link_abs_dir - for dataset in json_data_dict.get('datasets'): - # Remove dataset_link_abs_dir once more before entity creation - dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) - merged_dict = create_entity_details(request, "Dataset", user_token, dataset) - # Add back in dataset_link_abs_dir - merged_dict['dataset_link_abs_dir'] = dataset_link_abs_dir - dataset_list.append(merged_dict) - - # Generate property values for Activity node - # Can grab the existing data from the first dataset in the list - activity_data_dict = schema_manager.generate_activity_data('Dataset', user_token, dataset_list[0]) - activity_data_dict['creation_action'] = json_data_dict.get('creation_action') - - entity_uuid_list = [] - - for dataset in dataset_list: - entity_uuid_list.append(dataset['uuid']) - - schema_neo4j_queries.link_multiple_entities_to_direct_ancestors(neo4j_driver_instance, entity_uuid_list, json_data_dict['direct_ancestor_uuids'], activity_data_dict) + dataset_list = create_multiple_component_details(request, "Dataset", user_token, json_data_dict.get('datasets'), json_data_dict.get('creation_action')) # We wait until after the new datasets are linked to their ancestor before performing the remaining post-creation # linkeages. This way, in the event of unforseen errors, we don't have orphaned nodes. for dataset in dataset_list: - schema_triggers.set_status_history('status', 'Dataset', user_token, merged_dict, {}) - if merged_dict.get('previous_revision_uuid'): - schema_triggers.link_to_previous_revision('previous_revision_uuid', 'Dataset', user_token, merged_dict, {}) + schema_triggers.set_status_history('status', 'Dataset', user_token, dataset, {}) properties_to_skip = [ 'direct_ancestors', @@ -4470,6 +4447,139 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, return new_ids_dict_list +""" +Create multiple dataset nodes and relationships with the source entity node + +Parameters +---------- +request : flask.Request object + The incoming request +normalized_entity_type : str + One of the normalized entity types: Dataset, Collection, Sample, Donor +user_token: str + The user's globus groups token +json_data_dict: dict + The json request dict from user input +creation_action : str + The creation action for the new activity node. + +Returns +------- +list + A list of all the newly generated ids via uuid-api +""" +def create_multiple_component_details(request, normalized_entity_type, user_token, json_data_dict_list, creation_action): + # Get user info based on request + user_info_dict = schema_manager.get_user_info(request) + direct_ancestor = json_data_dict_list[0].get('direct_ancestor_uuids')[0] + # Create new ids for the new entity + try: + # we only need the json data from one of the datasets. The info will be the same for both, so we just grab the first in the list + new_ids_dict_list = schema_manager.create_hubmap_ids(normalized_entity_type, json_data_dict_list[0], user_token, user_info_dict, len(json_data_dict_list)) + # When group_uuid is provided by user, it can be invalid + except schema_errors.NoDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + if 'group_uuid' in json_data_dict: + msg = "Invalid 'group_uuid' value, can't create the entity" + else: + msg = "The user does not have the correct Globus group associated with, can't create the entity" + + logger.exception(msg) + bad_request_error(msg) + except schema_errors.UnmatchedDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + msg = "The user does not belong to the given Globus group, can't create the entity" + logger.exception(msg) + forbidden_error(msg) + except schema_errors.MultipleDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + msg = "The user has mutiple Globus groups associated with, please specify one using 'group_uuid'" + logger.exception(msg) + bad_request_error(msg) + except KeyError as e: + # Log the full stack trace, prepend a line with our message + logger.exception(e) + bad_request_error(e) + except requests.exceptions.RequestException as e: + msg = f"Failed to create new HuBMAP ids via the uuid-api service" + logger.exception(msg) + + # Due to the use of response.raise_for_status() in schema_manager.create_hubmap_ids() + # we can access the status codes from the exception + status_code = e.response.status_code + + if status_code == 400: + bad_request_error(e.response.text) + if status_code == 404: + not_found_error(e.response.text) + else: + internal_server_error(e.response.text) + datasets_dict_list = [] + for i in range(len(json_data_dict_list)): + # Remove dataset_link_abs_dir once more before entity creation + dataset_link_abs_dir = json_data_dict_list[i].pop('dataset_link_abs_dir', None) + # Combine each id dict into each dataset in json_data_dict_list + new_data_dict = {**json_data_dict_list[i], **user_info_dict, **new_ids_dict_list[i]} + try: + # Use {} since no existing dict + generated_before_create_trigger_data_dict = schema_manager.generate_triggered_data('before_create_trigger', normalized_entity_type, user_token, {}, new_data_dict) + # If one of the before_create_trigger methods fails, we can't create the entity + except schema_errors.BeforeCreateTriggerException: + # Log the full stack trace, prepend a line with our message + msg = "Failed to execute one of the 'before_create_trigger' methods, can't create the entity" + logger.exception(msg) + internal_server_error(msg) + except schema_errors.NoDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + if 'group_uuid' in json_data_dict: + msg = "Invalid 'group_uuid' value, can't create the entity" + else: + msg = "The user does not have the correct Globus group associated with, can't create the entity" + + logger.exception(msg) + bad_request_error(msg) + except schema_errors.UnmatchedDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + msg = "The user does not belong to the given Globus group, can't create the entity" + logger.exception(msg) + forbidden_error(msg) + except schema_errors.MultipleDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + msg = "The user has mutiple Globus groups associated with, please specify one using 'group_uuid'" + logger.exception(msg) + bad_request_error(msg) + except KeyError as e: + # Log the full stack trace, prepend a line with our message + logger.exception(e) + bad_request_error(e) + except Exception as e: + logger.exception(e) + internal_server_error(e) + merged_dict = {**json_data_dict_list[i], **generated_before_create_trigger_data_dict} + + # Filter out the merged_dict by getting rid of the transitent properties (not to be stored) + # and properties with None value + # Meaning the returned target property key is different from the original key + # in the trigger method, e.g., Donor.image_files_to_add + filtered_merged_dict = schema_manager.remove_transient_and_none_values(merged_dict, normalized_entity_type) + dataset_dict = {**filtered_merged_dict, **new_ids_dict_list[i]} + dataset_dict['dataset_link_abs_dir'] = dataset_link_abs_dir + datasets_dict_list.append(dataset_dict) + + activity_data_dict = schema_manager.generate_activity_data(normalized_entity_type, user_token, user_info_dict) + activity_data_dict['creation_action'] = creation_action + try: + created_datasets = app_neo4j_queries.create_multiple_datasets(neo4j_driver_instance, datasets_dict_list, activity_data_dict, direct_ancestor) + except TransactionError: + msg = "Failed to create multiple samples" + # Log the full stack trace, prepend a line with our message + logger.exception(msg) + # Terminate and let the users know + internal_server_error(msg) + + + return created_datasets + """ Execute 'after_create_triiger' methods diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 933017e8..45de7616 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -239,6 +239,73 @@ def create_multiple_samples(neo4j_driver, samples_dict_list, activity_data_dict, raise TransactionError(msg) +""" +Create multiple dataset nodes in neo4j + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +datasets_dict_list : list + A list of dicts containing the generated data of each sample to be created +activity_dict : dict + The dict containing generated activity data +direct_ancestor_uuid : str + The uuid of the direct ancestor to be linked to +""" +def create_multiple_datasets(neo4j_driver, datasets_dict_list, activity_data_dict, direct_ancestor_uuid): + try: + with neo4j_driver.session() as session: + entity_dict = {} + + tx = session.begin_transaction() + + activity_uuid = activity_data_dict['uuid'] + + # Step 1: create the Activity node + schema_neo4j_queries.create_activity_tx(tx, activity_data_dict) + + # Step 2: create relationship from source entity node to this Activity node + schema_neo4j_queries.create_relationship_tx(tx, direct_ancestor_uuid, activity_uuid, 'ACTIVITY_INPUT', '->') + + # Step 3: create each new sample node and link to the Activity node at the same time + output_dicts_list = [] + for dataset_dict in datasets_dict_list: + # Remove dataset_link_abs_dir once more before entity creation + dataset_link_abs_dir = dataset_dict.pop('dataset_link_abs_dir', None) + node_properties_map = schema_neo4j_queries.build_properties_map(dataset_dict) + + query = (f"MATCH (a:Activity) " + f"WHERE a.uuid = '{activity_uuid}' " + # Always define the Entity label in addition to the target `entity_type` label + f"CREATE (e:Entity:Dataset {node_properties_map} ) " + f"CREATE (a)-[:ACTIVITY_OUTPUT]->(e)" + f"RETURN e AS {record_field_name}") + + logger.info("======create_multiple_samples() individual query======") + logger.info(query) + + result = tx.run(query) + record = result.single() + entity_node = record[record_field_name] + entity_dict = schema_neo4j_queries.node_to_dict(entity_node) + entity_dict['dataset_link_abs_dir'] = dataset_link_abs_dir + output_dicts_list.append(entity_dict) + # Then + tx.commit() + return output_dicts_list + except TransactionError as te: + msg = f"TransactionError from calling create_multiple_samples(): {te.value}" + # Log the full stack trace, prepend a line with our message + logger.exception(msg) + + if tx.closed() == False: + logger.info("Failed to commit create_multiple_samples() transaction, rollback") + + tx.rollback() + + raise TransactionError(msg) + """ Get all revisions for a given dataset uuid and sort them in descending order based on their creation time diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 5b95da03..b23d33ba 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -554,58 +554,6 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u raise TransactionError(msg) -""" -Create or recreate one or more linkages (via Activity nodes) -between the target entity nodes and the direct ancestor nodes in neo4j - -Parameters ----------- -neo4j_driver : neo4j.Driver object - The neo4j database connection pool -entity_uuids : list - List of the uuids of target child entities -direct_ancestor_uuid : list - The uuids of direct ancestors -activity_data_dict : dict - A dict of activity properties to be created -""" -def link_multiple_entities_to_direct_ancestors(neo4j_driver, entity_uuids, direct_ancestor_uuids, activity_data_dict): - try: - with neo4j_driver.session() as session: - tx = session.begin_transaction() - - # Create the Acvitity node - create_activity_tx(tx, activity_data_dict) - - # Get the activity uuid - activity_uuid = activity_data_dict['uuid'] - - for entity_uuid in entity_uuids: - # First delete all the old linkages and Activity node between this entity and its direct ancestors - _delete_activity_node_and_linkages_tx(tx, entity_uuid) - - # Create relationship from this Activity node to the target entity node - create_relationship_tx(tx, activity_uuid, entity_uuid, 'ACTIVITY_OUTPUT', '->') - - # Create relationship from each ancestor entity node to this Activity node - for direct_ancestor_uuid in direct_ancestor_uuids: - create_relationship_tx(tx, direct_ancestor_uuid, activity_uuid, 'ACTIVITY_INPUT', '->') - - - tx.commit() - except TransactionError as te: - msg = "TransactionError from calling link_entity_to_direct_ancestors(): " - # Log the full stack trace, prepend a line with our message - logger.exception(msg) - - if tx.closed() == False: - # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_entity_to_direct_ancestors() transaction, rollback") - tx.rollback() - - raise TransactionError(msg) - - """ Create or recreate linkage between the publication node and the associated collection node in neo4j From 07dd8aa62ed95af3401de6e77b5d7ffeb6092584 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 16 Oct 2023 13:16:52 -0400 Subject: [PATCH 10/16] Updated comments to reflect changes to /components endpoint and accompanying details function. Merged in changes from main. --- src/app.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/app.py b/src/app.py index 95a653ae..715e4469 100644 --- a/src/app.py +++ b/src/app.py @@ -3809,7 +3809,7 @@ def paired_dataset(id): Returns -------- json array - List of uuids of the newly created component datasets + List of the newly created datasets represented as dictionaries. """ @app.route('/datasets/components', methods=['POST']) def multiple_components(): @@ -4458,15 +4458,15 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, One of the normalized entity types: Dataset, Collection, Sample, Donor user_token: str The user's globus groups token -json_data_dict: dict - The json request dict from user input +json_data_dict_list: list + List of datasets objects as dictionaries creation_action : str The creation action for the new activity node. Returns ------- list - A list of all the newly generated ids via uuid-api + A list of all the newly created datasets with generated fields represented as dictionaries """ def create_multiple_component_details(request, normalized_entity_type, user_token, json_data_dict_list, creation_action): # Get user info based on request From d2cc66517bc9db1a2c38a1e4142c6f35034e902f Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 16 Oct 2023 13:44:36 -0400 Subject: [PATCH 11/16] Updated entity api spec for the new endpoint --- entity-api-spec.yaml | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index 22046e05..c6652cc0 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -2535,3 +2535,47 @@ paths: description: The given dataset is unpublished and the user does not have the authorization to view it. '500': description: Internal error + '/datasets/components': + post: + summary: Create multiple component datasets from a single Multi-Assay ancestor + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + creation_action: + type: string + description: the action event that will describe the activity node. Allowed valuese are "Multi-Assay Split" + group_uuid: + type: string + description: the group uuid for the new component datasets + direct_ancestor_uuids: + type: string + description: the uuid for the parent multi assay dataset + datasets: + type: array + items: + $ref: '#/components/schemas/Dataset' + + responses: + '200': + description: The entities were successfully created and are returned. + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Dataset' + + '400': + description: Invalid input. + '404': + description: Not found. No matching datasets were found, or the none were found that the user is authorized to see. + '401': + description: The user's token has expired or the user did not supply a valid token + '403': + description: The given dataset is unpublished and the user does not have the authorization to view it. + '500': + description: Internal error From 905ec73604aabdc72c8aa7c968a11b13e8a9367b Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 16 Oct 2023 15:12:39 -0400 Subject: [PATCH 12/16] Fixed a bug in a group uuid exception handler where a reference is made to json_data_dict rather than the first json_data_dict in json_data_dict_list. --- src/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 715e4469..20b2eb44 100644 --- a/src/app.py +++ b/src/app.py @@ -4479,7 +4479,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke # When group_uuid is provided by user, it can be invalid except schema_errors.NoDataProviderGroupException: # Log the full stack trace, prepend a line with our message - if 'group_uuid' in json_data_dict: + if 'group_uuid' in json_data_dict_list[0]: msg = "Invalid 'group_uuid' value, can't create the entity" else: msg = "The user does not have the correct Globus group associated with, can't create the entity" @@ -4531,7 +4531,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke internal_server_error(msg) except schema_errors.NoDataProviderGroupException: # Log the full stack trace, prepend a line with our message - if 'group_uuid' in json_data_dict: + if 'group_uuid' in json_data_dict_list[i]: msg = "Invalid 'group_uuid' value, can't create the entity" else: msg = "The user does not have the correct Globus group associated with, can't create the entity" From c7521e97138473a10e10e3777114aeee79db2052 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 17 Oct 2023 13:15:52 -0400 Subject: [PATCH 13/16] Added a check that dataset_link_abs_dir is required at the dataset level --- src/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/app.py b/src/app.py index 20b2eb44..23659019 100644 --- a/src/app.py +++ b/src/app.py @@ -3864,6 +3864,8 @@ def multiple_components(): # validation. Remove it here and add it back after validation. We do the same for creating the entities. Doing # this makes it easier to keep the dataset_link_abs_dir with the associated dataset instead of adding additional lists and keeping track of which value is tied to which dataset dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) + if not dataset_link_abs_dir: + bad_request_error(f"Missing required field in datasets: dataset_link_abs_dir") dataset['group_uuid'] = json_data_dict.get('group_uuid') dataset['direct_ancestor_uuids'] = direct_ancestor_uuids try: From 2bfc99a5ee8448341ce0ec9cb1fcaa877006a397 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 17 Oct 2023 13:23:43 -0400 Subject: [PATCH 14/16] removed superfluous exception types from multiple components that would only apply for donors and samples. --- src/app.py | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/src/app.py b/src/app.py index 23659019..0ac085ce 100644 --- a/src/app.py +++ b/src/app.py @@ -4479,25 +4479,6 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke # we only need the json data from one of the datasets. The info will be the same for both, so we just grab the first in the list new_ids_dict_list = schema_manager.create_hubmap_ids(normalized_entity_type, json_data_dict_list[0], user_token, user_info_dict, len(json_data_dict_list)) # When group_uuid is provided by user, it can be invalid - except schema_errors.NoDataProviderGroupException: - # Log the full stack trace, prepend a line with our message - if 'group_uuid' in json_data_dict_list[0]: - msg = "Invalid 'group_uuid' value, can't create the entity" - else: - msg = "The user does not have the correct Globus group associated with, can't create the entity" - - logger.exception(msg) - bad_request_error(msg) - except schema_errors.UnmatchedDataProviderGroupException: - # Log the full stack trace, prepend a line with our message - msg = "The user does not belong to the given Globus group, can't create the entity" - logger.exception(msg) - forbidden_error(msg) - except schema_errors.MultipleDataProviderGroupException: - # Log the full stack trace, prepend a line with our message - msg = "The user has mutiple Globus groups associated with, please specify one using 'group_uuid'" - logger.exception(msg) - bad_request_error(msg) except KeyError as e: # Log the full stack trace, prepend a line with our message logger.exception(e) @@ -4531,25 +4512,6 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke msg = "Failed to execute one of the 'before_create_trigger' methods, can't create the entity" logger.exception(msg) internal_server_error(msg) - except schema_errors.NoDataProviderGroupException: - # Log the full stack trace, prepend a line with our message - if 'group_uuid' in json_data_dict_list[i]: - msg = "Invalid 'group_uuid' value, can't create the entity" - else: - msg = "The user does not have the correct Globus group associated with, can't create the entity" - - logger.exception(msg) - bad_request_error(msg) - except schema_errors.UnmatchedDataProviderGroupException: - # Log the full stack trace, prepend a line with our message - msg = "The user does not belong to the given Globus group, can't create the entity" - logger.exception(msg) - forbidden_error(msg) - except schema_errors.MultipleDataProviderGroupException: - # Log the full stack trace, prepend a line with our message - msg = "The user has mutiple Globus groups associated with, please specify one using 'group_uuid'" - logger.exception(msg) - bad_request_error(msg) except KeyError as e: # Log the full stack trace, prepend a line with our message logger.exception(e) From 7067a8def99da9ad3dbfaa72e1b228b986158066 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 17 Oct 2023 13:43:32 -0400 Subject: [PATCH 15/16] Re-adding exceptions removed mistakenly --- src/app.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/app.py b/src/app.py index 0ac085ce..bb43b880 100644 --- a/src/app.py +++ b/src/app.py @@ -4512,6 +4512,25 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke msg = "Failed to execute one of the 'before_create_trigger' methods, can't create the entity" logger.exception(msg) internal_server_error(msg) + except schema_errors.NoDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + if 'group_uuid' in json_data_dict: + msg = "Invalid 'group_uuid' value, can't create the entity" + else: + msg = "The user does not have the correct Globus group associated with, can't create the entity" + + logger.exception(msg) + bad_request_error(msg) + except schema_errors.UnmatchedDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + msg = "The user does not belong to the given Globus group, can't create the entity" + logger.exception(msg) + forbidden_error(msg) + except schema_errors.MultipleDataProviderGroupException: + # Log the full stack trace, prepend a line with our message + msg = "The user has mutiple Globus groups associated with, please specify one using 'group_uuid'" + logger.exception(msg) + bad_request_error(msg) except KeyError as e: # Log the full stack trace, prepend a line with our message logger.exception(e) From c14692ec1d09411a48556dd34b0af9dbb4932c67 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Wed, 18 Oct 2023 13:37:21 -0400 Subject: [PATCH 16/16] Reapplying fix (json_data_dict -> json_data_dict_list[i]) in exception. Fix was unintentionally removed when re-adding exceptions. --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index bb43b880..e7b2d595 100644 --- a/src/app.py +++ b/src/app.py @@ -4514,7 +4514,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke internal_server_error(msg) except schema_errors.NoDataProviderGroupException: # Log the full stack trace, prepend a line with our message - if 'group_uuid' in json_data_dict: + if 'group_uuid' in json_data_dict_list[i]: msg = "Invalid 'group_uuid' value, can't create the entity" else: msg = "The user does not have the correct Globus group associated with, can't create the entity"