From a62ed062247caaf3b8bff8bc71edd485a5ca6fe2 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 7 Sep 2023 15:35:17 -0400 Subject: [PATCH 1/3] Completed initial implementation of status_history property and accompanying trigger method --- src/schema/provenance_schema.yaml | 21 ++++++++++++++++++ src/schema/schema_triggers.py | 37 +++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 2bb70174..6fb2d72e 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -476,6 +476,13 @@ ENTITIES: dbgap_study_url: type: string description: 'A URL linking the dataset to the particular study on dbGap it belongs to' + status_history: + type: list + description: "A list of all status change events. Each entry in the list is a dictionary containing the change_timestamp, changed_by_email, previous_status, new_status" + generated: true + auto_update: true + before_create_trigger: set_status_history + before_update_trigger: set_status_history ############################################# Publication ############################################# Publication: @@ -739,6 +746,13 @@ ENTITIES: description: "The uuid of the associated collection for a given publication" after_create_trigger: link_publication_to_associated_collection after_update_trigger: link_publication_to_associated_collection + status_history: + type: list + description: "A list of all status change events. Each entry in the list is a dictionary containing the change_timestamp, changed_by_email, previous_status, new_status" + generated: true + auto_update: true + before_create_trigger: set_status_history + before_update_trigger: set_status_history ############################################# Donor ############################################# Donor: @@ -1138,3 +1152,10 @@ ENTITIES: description: "The datasets that are contained in this Upload." # A few time-consuming properties (with read triggers) of each dataset are excluded on_read_trigger: get_upload_datasets + status_history: + type: list + description: "A list of all status change events. Each entry in the list is a dictionary containing the change_timestamp, changed_by_email, previous_status, new_status" + generated: true + auto_update: true + before_create_trigger: set_status_history + before_update_trigger: set_status_history diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index c570d2d4..a9f6ccdc 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -556,6 +556,43 @@ def update_file_descriptions(property_key, normalized_type, user_token, existing +#################################################################################################### +## Trigger methods shared by Dataset, Upload, and Publication - DO NOT RENAME +#################################################################################################### + +def set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): + # This trigger is set to auto-update. This is because while it involves status_history, it depends on changes to status. + # So if auto_update is not set to true, it will not trigger. We must now add this first conditional so that if there are no + # changes to the status field, it exits returning the existing status_history value. + if ('status' not in new_data_dict and existing_data_dict) or (new_data_dict.get('status') and existing_data_dict.get('status') and new_data_dict.get('status') == existing_data_dict.get('status')): + if 'status_history' not in existing_data_dict: + raise KeyError("Missing 'status_history' key in 'existing_data_dict' during calling 'set_status_history()' trigger method.") + return property_key, existing_data_dict['status_history'] + + new_status_history = [] + status_entry = {} + if not existing_data_dict: + status = "New" + else: + if 'status' not in new_data_dict: + raise KeyError("Missing 'status' key in 'new_data_dict' during calling 'set_status_history()' trigger method.") + status = new_data_dict['status'] + if 'status_history' in existing_data_dict: + status_history_string = existing_data_dict['status_history'].replace("'", "\"") + new_status_history += json.loads(status_history_string) + current_time = int(datetime.now().timestamp() * 1000) + if 'email' not in new_data_dict: + raise KeyError("Missing 'email' key in 'new_data_dict' during calling 'set_status_hisotry()' trigger method.") + email = new_data_dict['email'] + status_entry['status'] = status + status_entry['changed_by_email'] = email + status_entry['change_timestamp'] = current_time + new_status_history.append(status_entry) + return property_key, new_status_history + + + + #################################################################################################### ## Trigger methods specific to Collection - DO NOT RENAME #################################################################################################### From c84125029fccf694ae719483f3f141e3c1f305f8 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 18 Sep 2023 15:38:13 -0400 Subject: [PATCH 2/3] Reworked status history field. Status history is now an immutable field that is updated in response to changes to the field 'status'. Uses after_create and after_update New validator created. Will now reject with 400 if status is included in a json for put call when its the same as the existing status. --- src/app.py | 5 +++- src/schema/provenance_schema.yaml | 21 ++++++++------ src/schema/schema_triggers.py | 46 +++++++++++++++---------------- src/schema/schema_validators.py | 24 ++++++++++++++++ 4 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/app.py b/src/app.py index 50001ded..b8aba4ba 100644 --- a/src/app.py +++ b/src/app.py @@ -1352,6 +1352,7 @@ def update_entity(id): # A bit more validation if `direct_ancestor_uuids` provided has_direct_ancestor_uuids = False has_associated_collection_uuid = False + has_updated_status = False if ('direct_ancestor_uuids' in json_data_dict) and (json_data_dict['direct_ancestor_uuids']): has_direct_ancestor_uuids = True @@ -1363,12 +1364,14 @@ def update_entity(id): # Check existence of associated collection associated_collection_dict = query_target_entity(json_data_dict['associated_collection_uuid'], user_token) + if ('status' in json_data_dict) and (json_data_dict['status']): + has_updated_status = True # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods - if has_direct_ancestor_uuids or has_associated_collection_uuid: + if has_direct_ancestor_uuids or has_associated_collection_uuid or has_updated_status: after_update(normalized_entity_type, user_token, merged_updated_dict) elif normalized_entity_type == 'Upload': has_dataset_uuids_to_link = False diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index f2d79d08..b6be5ac4 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -304,9 +304,12 @@ ENTITIES: before_property_update_validators: - validate_application_header_before_property_update - validate_dataset_status_value + - validate_status_changed generated: true description: "One of: New|Processing|QA|Published|Error|Hold|Invalid|Submitted" before_create_trigger: set_dataset_status_new + after_create_trigger: set_status_history + after_update_trigger: set_status_history title: type: string generated: true # Disallow entry from users via POST @@ -479,9 +482,7 @@ ENTITIES: type: list description: "A list of all status change events. Each entry in the list is a dictionary containing the change_timestamp, changed_by_email, previous_status, new_status" generated: true - auto_update: true - before_create_trigger: set_status_history - before_update_trigger: set_status_history + immutable: true ############################################# Publication ############################################# Publication: @@ -525,9 +526,12 @@ ENTITIES: before_property_update_validators: - validate_application_header_before_property_update - validate_dataset_status_value + - validate_status_changed generated: true description: "One of: New|Processing|QA|Published|Error|Hold|Invalid" before_create_trigger: set_dataset_status_new + after_create_trigger: set_status_history + after_update_trigger: set_status_history title: type: string description: "The title of the publication." @@ -748,9 +752,7 @@ ENTITIES: type: list description: "A list of all status change events. Each entry in the list is a dictionary containing the change_timestamp, changed_by_email, previous_status, new_status" generated: true - auto_update: true - before_create_trigger: set_status_history - before_update_trigger: set_status_history + immutable: true ############################################# Donor ############################################# Donor: @@ -1098,11 +1100,14 @@ ENTITIES: before_property_update_validators: - validate_application_header_before_property_update - validate_upload_status_value + - validate_status_changed type: string generated: true description: "One of: New|Valid|Invalid|Error|Reorganized|Processing" # Trigger method will set the status to "New" on create before_create_trigger: set_upload_status_new + after_create_trigger: set_status_history + after_update_trigger: set_status_history validation_message: type: string description: A message from the validation tools describing what is invalid with the upload. @@ -1154,6 +1159,4 @@ ENTITIES: type: list description: "A list of all status change events. Each entry in the list is a dictionary containing the change_timestamp, changed_by_email, previous_status, new_status" generated: true - auto_update: true - before_create_trigger: set_status_history - before_update_trigger: set_status_history + immutable: true diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index c2057627..438b5274 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -561,34 +561,32 @@ def update_file_descriptions(property_key, normalized_type, user_token, existing #################################################################################################### def set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): - # This trigger is set to auto-update. This is because while it involves status_history, it depends on changes to status. - # So if auto_update is not set to true, it will not trigger. We must now add this first conditional so that if there are no - # changes to the status field, it exits returning the existing status_history value. - if ('status' not in new_data_dict and existing_data_dict) or (new_data_dict.get('status') and existing_data_dict.get('status') and new_data_dict.get('status') == existing_data_dict.get('status')): - if 'status_history' not in existing_data_dict: - raise KeyError("Missing 'status_history' key in 'existing_data_dict' during calling 'set_status_history()' trigger method.") - return property_key, existing_data_dict['status_history'] - new_status_history = [] status_entry = {} - if not existing_data_dict: - status = "New" - else: - if 'status' not in new_data_dict: - raise KeyError("Missing 'status' key in 'new_data_dict' during calling 'set_status_history()' trigger method.") - status = new_data_dict['status'] - if 'status_history' in existing_data_dict: - status_history_string = existing_data_dict['status_history'].replace("'", "\"") - new_status_history += json.loads(status_history_string) - current_time = int(datetime.now().timestamp() * 1000) - if 'email' not in new_data_dict: - raise KeyError("Missing 'email' key in 'new_data_dict' during calling 'set_status_hisotry()' trigger method.") - email = new_data_dict['email'] + + if 'status_history' in existing_data_dict: + status_history_string = existing_data_dict['status_history'].replace("'", "\"") + new_status_history += json.loads(status_history_string) + + if 'status' not in existing_data_dict: + raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'set_status_history()' trigger method") + if 'last_modified_timestamp' not in existing_data_dict: + raise KeyError("Missing 'last_modified_timestamp' key in 'existing_dat_dict' during calling 'set_status_history()' trigger method.") + if 'last_modified_user_email' not in existing_data_dict: + raise KeyError("Missing 'last_modified_user_email' key in 'existing_data_dict' during calling 'set_status_hisotry()' trigger method.") + + status = existing_data_dict['status'] + last_modified_user_email = existing_data_dict['last_modified_user_email'] + last_modified_timestamp = existing_data_dict['last_modified_timestamp'] + uuid = existing_data_dict['uuid'] + status_entry['status'] = status - status_entry['changed_by_email'] = email - status_entry['change_timestamp'] = current_time + status_entry['changed_by_email'] = last_modified_user_email + status_entry['change_timestamp'] = last_modified_timestamp new_status_history.append(status_entry) - return property_key, new_status_history + entity_data_dict = {"status_history": new_status_history} + + schema_neo4j_queries.update_entity(schema_manager.get_neo4j_driver_instance(), normalized_type, entity_data_dict, uuid) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 6e15635a..5aaf9704 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -271,6 +271,30 @@ def validate_dataset_status_value(property_key, normalized_entity_type, request, raise ValueError(f"Dataset status change to 'Published' can only be made via {SchemaConstants.INGEST_API_APP}") +""" +Validate that status, if included in new_data_dict, is different from the existing status value + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + Submission +request: Flask request object + The instance of Flask request passed in from application request +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + The json data in request body, already after the regular validations +""" +def validate_status_changed(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + if 'status' not in existing_data_dict: + raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'validate_status_changed()' validator method.") + # Only allow 'status' in new_data_dict if its different than the existing status value + if existing_data_dict['status'].lower() == new_data_dict['status'].lower(): + raise ValueError(f"Status value is already {existing_data_dict['status']}, cannot change to {existing_data_dict['status']}. If no change, do not include status field in update") + + """ Validate the sub_status field is also provided when Dataset.retraction_reason is provided on update via PUT From 7a3d36273c31756e575a4cf32c83b4e6f06c2965 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Wed, 20 Sep 2023 14:13:25 -0400 Subject: [PATCH 3/3] Moved has_updated_status property up one level outside of the normalized_entity_type in ['Publication', 'Dataset'] block. Added has_updated_status to the if block under Uploads before calling after_update --- src/app.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/app.py b/src/app.py index b8aba4ba..69aa1cc7 100644 --- a/src/app.py +++ b/src/app.py @@ -1294,6 +1294,11 @@ def update_entity(id): normalized_status = schema_manager.normalize_status(json_data_dict["status"]) json_data_dict["status"] = normalized_status + has_updated_status = False + if ('status' in json_data_dict) and (json_data_dict['status']): + has_updated_status = True + + # Normalize user provided status if "sub_status" in json_data_dict: normalized_status = schema_manager.normalize_status(json_data_dict["sub_status"]) @@ -1352,7 +1357,6 @@ def update_entity(id): # A bit more validation if `direct_ancestor_uuids` provided has_direct_ancestor_uuids = False has_associated_collection_uuid = False - has_updated_status = False if ('direct_ancestor_uuids' in json_data_dict) and (json_data_dict['direct_ancestor_uuids']): has_direct_ancestor_uuids = True @@ -1364,8 +1368,6 @@ def update_entity(id): # Check existence of associated collection associated_collection_dict = query_target_entity(json_data_dict['associated_collection_uuid'], user_token) - if ('status' in json_data_dict) and (json_data_dict['status']): - has_updated_status = True # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) @@ -1403,7 +1405,7 @@ def update_entity(id): merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods - if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink: + if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status: after_update(normalized_entity_type, user_token, merged_updated_dict) elif normalized_entity_type == 'Collection': # Generate 'before_update_trigger' data and update the entity details in Neo4j