From b055ee8a4f5bf3a7c9325e51c0548c294f65ee97 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 22 Mar 2024 03:38:44 -0400 Subject: [PATCH 1/5] Implemented component dataset status sync --- src/app.py | 1 + src/instance/app.cfg.example | 3 ++ src/schema/provenance_schema.yaml | 3 +- src/schema/schema_constants.py | 1 + src/schema/schema_manager.py | 29 ++++++++++++++++ src/schema/schema_triggers.py | 56 +++++++++++++++++++++++++++++++ src/schema/schema_validators.py | 30 +++++++++++++++++ 7 files changed, 122 insertions(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index e8c03e22..785822f4 100644 --- a/src/app.py +++ b/src/app.py @@ -204,6 +204,7 @@ def http_internal_server_error(e): app.config['UUID_API_URL'], app.config['INGEST_API_URL'], app.config['ONTOLOGY_API_URL'], + app.config['ENTITY_API_URL'], auth_helper_instance, neo4j_driver_instance, memcached_client_instance, diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 839972dc..f3c1fc0e 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -32,6 +32,9 @@ INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org' # Works regardless of the trailing slash ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org' +# URL for talking to Entity API (default for DEV) +ENTITY_API_URL = 'https://entity-api.dev.hubmapconsortium.org' + # A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed) # Works regardless of the trailing slash / SEARCH_API_URL_LIST = ['http://search-api:8080'] diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 4aa46b8a..50ffe6c6 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -311,11 +311,12 @@ ENTITIES: - validate_application_header_before_property_update - validate_dataset_status_value - validate_status_changed + - validate_dataset_not_component generated: true description: "One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete" before_create_trigger: set_dataset_status_new after_create_trigger: set_status_history - after_update_trigger: set_status_history + after_update_trigger: update_status title: type: string generated: true # Disallow entry from users via POST diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py index 6a471587..7aa41151 100644 --- a/src/schema/schema_constants.py +++ b/src/schema/schema_constants.py @@ -11,6 +11,7 @@ class SchemaConstants(object): ACCESS_LEVEL_CONSORTIUM = 'consortium' ACCESS_LEVEL_PROTECTED = 'protected' + ENTITY_API_UPDATE_ENDPOINT = '/entities' UUID_API_ID_ENDPOINT = '/uuid' INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit' INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove' diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 8c6318e1..4b45943e 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1,4 +1,5 @@ import ast +import sys import yaml import logging import requests @@ -31,6 +32,7 @@ _schema = None _uuid_api_url = None _ingest_api_url = None +_entity_api_url = None _ontology_api_url = None _auth_helper = None _neo4j_driver = None @@ -69,6 +71,7 @@ def initialize(valid_yaml_file, uuid_api_url, ingest_api_url, ontology_api_url, + entity_api_url, auth_helper_instance, neo4j_driver_instance, memcached_client_instance, @@ -78,6 +81,7 @@ def initialize(valid_yaml_file, global _uuid_api_url global _ingest_api_url global _ontology_api_url + global _entity_api_url global _auth_helper global _neo4j_driver global _memcached_client @@ -105,6 +109,12 @@ def initialize(valid_yaml_file, logger.critical(msg=msg) raise Exception(msg) + if entity_api_url is not None: + _entity_api_url = entity_api_url + else: + msg = f"Unable to initialize schema manager with entity_api_url={entity_api_url}." + logger.critical(msg=msg) + raise Exception(msg) # Get the helper instances _auth_helper = auth_helper_instance _neo4j_driver = neo4j_driver_instance @@ -507,6 +517,25 @@ def remove_transient_and_none_values(merged_dict, normalized_entity_type): return filtered_dict +""" +Update entity via HTTP call to entity-api +This is useful when a change in one entity necessitates changes in another, while allowing the normal triggers +and validators to execute + +Parameters +---------- +uuid : string + The uuid of the entity being updated +data : dict + A dict representation of the json_data_dict for the updated entity +token : string + The globus groups token +""" +def update_entity(uuid, data, token): + url = _entity_api_url + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + uuid + header = _create_request_headers(token) + header['X-Hubmap-Application'] = 'ingest-api' + response = requests.put(url=url, headers=header, json=data) """ Generate the complete entity record by running the read triggers diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index a197caab..a249ccde 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1402,6 +1402,62 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da return generated_dict +""" +Trigger event method that calls related functions involved with updating the status value + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + One of the types defined in the schema yaml: Dataset +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used +""" + +def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): + # execute set_status_history + set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict) + + #execute sync_component_dataset_status + sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict) + + +""" +Function that changes the status of component datasets when their parent multi-assay dataset's status changes + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + One of the types defined in the schema yaml: Dataset +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used +""" +def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): + if 'uuid' not in existing_data_dict: + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") + uuid = existing_data_dict['uuid'] + if 'status' not in existing_data_dict: + raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") + status = existing_data_dict['status'] + children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid') + status_body = {"status": status} + for child_uuid in children_uuids_list: + creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) + if creation_action == 'Multi-Assay Split': + schema_manager.update_entity(child_uuid, status_body, user_token) + + #################################################################################################### ## Trigger methods specific to Donor - DO NOT RENAME #################################################################################################### diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index beaf906f..f78732f7 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -95,6 +95,36 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request if len(set(target_list)) != len(target_list): raise ValueError(f"The {property_key} field must only contain unique items") + +""" +Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be +updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + Submission +request: Flask request object + The instance of Flask request passed in from application request +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + The json data in request body, already after the regular validations +""" + + +def validate_dataset_not_component(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + neo4j_driver_instance = schema_manager.get_neo4j_driver_instance() + uuid = existing_data_dict['uuid'] + creation_action = schema_neo4j_queries.get_entity_creation_action_activity(neo4j_driver_instance, uuid) + if creation_action == 'Multi-Assay Split': + raise ValueError(f"Unable to modify existing {existing_data_dict['entity_type']}" + f" {existing_data_dict['uuid']}. Can not change status on component datasets directly. Status" + f"change must occur on parent multi-assay split dataset") + + """ If an entity has a DOI, do not allow it to be updated """ From 19b958785f5106264310be5fceea0ff2596aca45 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 22 Mar 2024 14:01:57 -0400 Subject: [PATCH 2/5] generalized x-hubmap-header to use schemaConstants instead of hard coding --- src/schema/schema_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 4b45943e..6da7545e 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -534,7 +534,7 @@ def remove_transient_and_none_values(merged_dict, normalized_entity_type): def update_entity(uuid, data, token): url = _entity_api_url + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + uuid header = _create_request_headers(token) - header['X-Hubmap-Application'] = 'ingest-api' + header[schemaConstants.HUBMAP_APP_HEADER] = INGEST_API_APP response = requests.put(url=url, headers=header, json=data) """ Generate the complete entity record by running the read triggers From 53dfdb7def79dab0dc24de12c8d05c83228fc983 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 22 Mar 2024 14:08:36 -0400 Subject: [PATCH 3/5] removed Erroneous import 'sys' used during debugging --- src/schema/schema_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 6da7545e..0e16c518 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1,5 +1,4 @@ import ast -import sys import yaml import logging import requests From f65b0bc2768bacea6880e4eb85729ecc808f8ca6 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 2 Apr 2024 12:31:14 -0400 Subject: [PATCH 4/5] Moved update_entity functionality within trigger method sync_component_dataset_status rather than schema manager. created getter get_entity_api_url --- src/schema/schema_manager.py | 35 ++++++++++++++++------------------- src/schema/schema_triggers.py | 4 ++++ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 0e16c518..5f486976 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -516,25 +516,6 @@ def remove_transient_and_none_values(merged_dict, normalized_entity_type): return filtered_dict -""" -Update entity via HTTP call to entity-api -This is useful when a change in one entity necessitates changes in another, while allowing the normal triggers -and validators to execute - -Parameters ----------- -uuid : string - The uuid of the entity being updated -data : dict - A dict representation of the json_data_dict for the updated entity -token : string - The globus groups token -""" -def update_entity(uuid, data, token): - url = _entity_api_url + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + uuid - header = _create_request_headers(token) - header[schemaConstants.HUBMAP_APP_HEADER] = INGEST_API_APP - response = requests.put(url=url, headers=header, json=data) """ Generate the complete entity record by running the read triggers @@ -1729,6 +1710,22 @@ def get_ingest_api_url(): return _ingest_api_url +""" +Get the entity-api URL to be used by trigger methods + +Returns +------- +str + The entity-api URL +""" + + +def get_entity_api_url(): + global _entity_api_url + + return _entity_api_url + + """ Get the AUthHelper instance to be used by trigger methods diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index a249ccde..5a337be6 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1456,6 +1456,10 @@ def sync_component_dataset_status(property_key, normalized_type, user_token, exi creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) if creation_action == 'Multi-Assay Split': schema_manager.update_entity(child_uuid, status_body, user_token) + url = schema_manager.get_entity_api_url + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid + header = _create_request_headers(user_token) + header[schemaConstants.HUBMAP_APP_HEADER] = schemaConstants.INGEST_API_APP + response = requests.put(url=url, headers=header, json=status_body) #################################################################################################### From 4bd15611b14bec903f6fae4b6c94c9d0cf01782c Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 2 Apr 2024 12:47:54 -0400 Subject: [PATCH 5/5] bug fixes. Fixed some capitalization inconsistencies in schemaConstants and added some missing parenthesis --- src/schema/schema_triggers.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 5a337be6..ae5bb117 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1455,10 +1455,9 @@ def sync_component_dataset_status(property_key, normalized_type, user_token, exi for child_uuid in children_uuids_list: creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) if creation_action == 'Multi-Assay Split': - schema_manager.update_entity(child_uuid, status_body, user_token) - url = schema_manager.get_entity_api_url + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid - header = _create_request_headers(user_token) - header[schemaConstants.HUBMAP_APP_HEADER] = schemaConstants.INGEST_API_APP + url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid + header = schema_manager._create_request_headers(user_token) + header[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP response = requests.put(url=url, headers=header, json=status_body)