From 881174706b62ab5feb75c7af70c0ff5914dd3a7b Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 26 Sep 2023 08:54:24 -0400 Subject: [PATCH 1/5] Adding new create_multi_datasets endpoints to support creating multiple secondary datasets simultaneously --- src/routes/entity_CRUD/__init__.py | 45 ++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index 2cbdcb21..01a2cb43 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -80,6 +80,51 @@ def create_dataset(): return Response("Unexpected error while creating a dataset: " + str(e) + " Check the logs", 500) +@entity_CRUD_blueprint.route('/multi-datasets', methods=['POST']) +def create_multi_datasets(): + + if not request.is_json: + return Response("json request required", 400) + try: + multi_dataset_request = request.json + # Get the single Globus groups token for authorization + auth_helper_instance = AuthHelper.instance() + auth_token = auth_helper_instance.getAuthorizationTokens(request.headers) + if isinstance(auth_token, Response): + return(auth_token) + elif isinstance(auth_token, str): + token = auth_token + else: + return Response("Valid Globus groups token required", 401) + + # if we go with method 1 we need to add checks that all direct_ancestor_uuids and group_uuid properties are the same + # If we go with method 2 we need to add checks that direct_ancestor_uuids and group_uuid do no exist at the dataset level + for i, dataset in enumerate(multi_dataset_request): + requested_group_uuid = None + if 'group_uuid' in dataset: + requested_group_uuid = dataset['group_uuid'] + + ingest_helper = IngestFileHelper(current_app.config) + requested_group_uuid = auth_helper_instance.get_write_group_uuid(token, requested_group_uuid) + multi_dataset_request[i]['group_uuid'] = requested_group_uuid + + post_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['ENTITY_WEBSERVICE_URL']) + 'entities/dataset' + response = requests.post(post_url, json = multi_dataset_request, headers = {'Authorization': 'Bearer ' + token, 'X-SenNet-Application':'ingest-api' }, verify = False) + if response.status_code != 200: + return Response(response.text, response.status_code) + new_dataset = response.json() + + ingest_helper.create_dataset_directory(new_dataset, requested_group_uuid, new_dataset['uuid']) + + return jsonify(new_dataset) + except HTTPException as hte: + return Response(hte.get_description(), hte.get_status_code()) + except Exception as e: + logger.error(e, exc_info=True) + return Response("Unexpected error while creating a dataset: " + str(e) + " Check the logs", 500) + + + @entity_CRUD_blueprint.route('/sources/bulk/validate', methods=['POST']) def bulk_sources_upload_and_validate(): return _bulk_upload_and_validate(Ontology.ops().entities().SOURCE) From 2d228e84c4e07b82067c58eee90c4eb000c072d6 Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 10 Oct 2023 10:18:45 -0400 Subject: [PATCH 2/5] Bumping version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0b1f1edf..fd9d1a5a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.13 +1.2.14 From 64842d2cb28a6bae44d8e6c4550b318d50652c13 Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Mon, 16 Oct 2023 10:59:56 -0400 Subject: [PATCH 3/5] Adding new /datasets/components endpoint to handle the initial step in creating multi-assay datasets --- src/requirements.txt | 2 +- src/routes/entity_CRUD/__init__.py | 55 ++++++++++++++---------------- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/requirements.txt b/src/requirements.txt index 4c2b9248..1448bfad 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -10,7 +10,7 @@ requests==2.25.1 # Use the branch name of commons from github for testing new changes made in commons from different branch # Default is main branch specified in docker-compose.development.yml if not set # git+https://github.com/hubmapconsortium/commons.git@${COMMONS_BRANCH}#egg=hubmap-commons -hubmap-commons==2.1.9 +hubmap-commons==2.1.12 atlas-consortia-commons==1.0.5 # Testing diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index 0ebc7d21..e11b772e 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -81,49 +81,46 @@ def create_dataset(): return Response("Unexpected error while creating a dataset: " + str(e) + " Check the logs", 500) -@entity_CRUD_blueprint.route('/multi-datasets', methods=['POST']) -def create_multi_datasets(): - +@entity_CRUD_blueprint.route('/datasets/components', methods=['POST']) +def multiple_components(): if not request.is_json: return Response("json request required", 400) + entity_type = 'dataset' try: - multi_dataset_request = request.json - # Get the single Globus groups token for authorization - auth_helper_instance = AuthHelper.instance() - auth_token = auth_helper_instance.getAuthorizationTokens(request.headers) - if isinstance(auth_token, Response): - return(auth_token) - elif isinstance(auth_token, str): - token = auth_token + component_request = request.json + auth_helper = AuthHelper.configured_instance(current_app.config['APP_CLIENT_ID'], current_app.config['APP_CLIENT_SECRET']) + auth_tokens = auth_helper.getAuthorizationTokens(request.headers) + if isinstance(auth_tokens, Response): + return(auth_tokens) + elif isinstance(auth_tokens, str): + token = auth_tokens + elif 'nexus_token' in auth_tokens: + token = auth_tokens['nexus_token'] else: - return Response("Valid Globus groups token required", 401) - - # if we go with method 1 we need to add checks that all direct_ancestor_uuids and group_uuid properties are the same - # If we go with method 2 we need to add checks that direct_ancestor_uuids and group_uuid do no exist at the dataset level - for i, dataset in enumerate(multi_dataset_request): - requested_group_uuid = None - if 'group_uuid' in dataset: - requested_group_uuid = dataset['group_uuid'] + return(Response("Valid nexus auth token required", 401)) - ingest_helper = IngestFileHelper(current_app.config) - requested_group_uuid = auth_helper_instance.get_write_group_uuid(token, requested_group_uuid) - multi_dataset_request[i]['group_uuid'] = requested_group_uuid + requested_group_uuid = None + if 'group_uuid' in component_request: + requested_group_uuid = component_request['group_uuid'] - post_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['ENTITY_WEBSERVICE_URL']) + 'entities/dataset' - response = requests.post(post_url, json = multi_dataset_request, headers = {'Authorization': 'Bearer ' + token, 'X-SenNet-Application':'ingest-api' }, verify = False) + ingest_helper = IngestFileHelper(current_app.config) + requested_group_uuid = auth_helper.get_write_group_uuid(token, requested_group_uuid) + component_request['group_uuid'] = requested_group_uuid + post_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['ENTITY_WEBSERVICE_URL']) + 'datasets/components' + response = requests.post(post_url, json = component_request, headers = {'Authorization': 'Bearer ' + token, 'X-SenNet-Application':'ingest-api' }, verify = False) if response.status_code != 200: return Response(response.text, response.status_code) - new_dataset = response.json() + new_datasets_list = response.json() - ingest_helper.create_dataset_directory(new_dataset, requested_group_uuid, new_dataset['uuid']) + for dataset in new_datasets_list: + ingest_helper.create_dataset_directory(dataset, requested_group_uuid, dataset['uuid']) - return jsonify(new_dataset) + return jsonify(new_datasets_list) except HTTPException as hte: return Response(hte.get_description(), hte.get_status_code()) except Exception as e: logger.error(e, exc_info=True) - return Response("Unexpected error while creating a dataset: " + str(e) + " Check the logs", 500) - + return Response("Unexpected error while creating a dataset: " + str(e) + " Check the logs", 500) @entity_CRUD_blueprint.route('/sources/bulk/validate', methods=['POST']) From edf67f79657fff6611ecf9a8f2285ed92cbd59d7 Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 17 Oct 2023 13:14:28 -0400 Subject: [PATCH 4/5] Adding checks for 'dataset_link_abs_dir' and modifying directory creation for datasets/components to create a symlink to the directory specified by dataset_link_abs_dir --- src/routes/entity_CRUD/__init__.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index e11b772e..d47b80c6 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -99,6 +99,15 @@ def multiple_components(): else: return(Response("Valid nexus auth token required", 401)) + # Check that `dataset_link_abs_dir` exists for both datasets and that it is a valid directory + json_data_dict = request.get_json() + for dataset in json_data_dict.get('datasets'): + if 'dataset_link_abs_dir' in dataset: + if not os.path.exists(dataset['dataset_link_abs_dir']): + return Response(f"The filepath specified with 'dataset_link_abs_dir' does not exist: {dataset['dataset_link_abs_dir']}", 500) + else: + return Response("Required field 'dataset_link_abs_dir' is missing from dataset", 500) + requested_group_uuid = None if 'group_uuid' in component_request: requested_group_uuid = component_request['group_uuid'] @@ -113,7 +122,15 @@ def multiple_components(): new_datasets_list = response.json() for dataset in new_datasets_list: - ingest_helper.create_dataset_directory(dataset, requested_group_uuid, dataset['uuid']) + # The property `dataset_link_abs_dir` will contain the filepath to the existing directory located inside the primary multi-assay + # directory. We need to create a new directory for each secondary dataset and then create a symlink to the aforementioned directory + if 'dataset_link_abs_dir' in dataset: + new_directory_path = ingest_helper.get_dataset_directory_absolute_path(dataset, requested_group_uuid, dataset['uuid']) + logger.info( + f"Creating a directory as: {new_directory_path} with a symbolic link to: {dataset['dataset_link_abs_dir']}") + ingest_helper.make_directory(new_directory_path, dataset['dataset_link_abs_dir']) + else: + return Response("Required field 'dataset_link_abs_dir' is missing from dataset", 500) return jsonify(new_datasets_list) except HTTPException as hte: From 152ed80e1288772b6f5e9adb0e8901f8510cc64e Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 17 Oct 2023 14:21:30 -0400 Subject: [PATCH 5/5] Updating symlink call for multiple components --- src/routes/entity_CRUD/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index d47b80c6..ec5026c3 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -123,12 +123,12 @@ def multiple_components(): for dataset in new_datasets_list: # The property `dataset_link_abs_dir` will contain the filepath to the existing directory located inside the primary multi-assay - # directory. We need to create a new directory for each secondary dataset and then create a symlink to the aforementioned directory + # directory. We need to create a symlink to the aforementioned directory at the path for the newly created datsets. if 'dataset_link_abs_dir' in dataset: new_directory_path = ingest_helper.get_dataset_directory_absolute_path(dataset, requested_group_uuid, dataset['uuid']) logger.info( f"Creating a directory as: {new_directory_path} with a symbolic link to: {dataset['dataset_link_abs_dir']}") - ingest_helper.make_directory(new_directory_path, dataset['dataset_link_abs_dir']) + os.symlink(dataset['dataset_link_abs_dir'], new_directory_path, True) else: return Response("Required field 'dataset_link_abs_dir' is missing from dataset", 500)