From dd14d49d30cb8ba5623dd983fc679b074d2e5fba Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Wed, 10 Jan 2024 13:34:09 -0500 Subject: [PATCH 1/3] Changed dataset types to be required on create (but not update) and changed data_types to be not required. These changes are for dataset and publication --- src/schema/provenance_schema.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index db66e60e..4416a3cc 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -331,7 +331,7 @@ ENTITIES: before_property_update_validators: - validate_no_duplicates_in_list type: list - required_on_create: true # Only required for create via POST, not update via PUT + required_on_create: false description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)." dataset_type: before_property_create_validators: @@ -339,7 +339,7 @@ ENTITIES: before_property_update_validators: - validate_recognized_dataset_type type: string - required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT + required_on_create: true # Required for create via POST, not update via PUT description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API." collections: type: list @@ -599,7 +599,7 @@ ENTITIES: before_property_update_validators: - validate_no_duplicates_in_list type: list - required_on_create: true # Only required for create via POST, not update via PUT + required_on_create: false description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)." dataset_type: before_property_create_validators: @@ -607,7 +607,7 @@ ENTITIES: before_property_update_validators: - validate_recognized_dataset_type type: string - required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT + required_on_create: true #Required for create via POST, not update via PUT description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API." collections: type: list From 1a44c1abb3fe25b4811d6905eebf429f856fdb86 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 16 Jan 2024 12:12:35 -0500 Subject: [PATCH 2/3] Modified schema trigger generate_dataset_title which is used by the dataset property and is autogenerated. Now uses dataset_type instead of data_types and some existing conversions were no longer needed --- src/schema/schema_triggers.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 169a7eb3..b9ee10fb 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1005,17 +1005,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d race = None sex = None - # Parse assay_type from the Dataset - try: - logger.info(f"Executing convert_str_literal() on 'data_types' of uuid: {existing_data_dict['uuid']} during calling 'get_dataset_title()' trigger method.") - - # Note: The existing_data_dict['data_types'] is stored in Neo4j as a string representation of the Python list - # It's not stored in Neo4j as a json string! And we can't store it as a json string - # due to the way that Cypher handles single/double quotes. - data_types_list = schema_manager.convert_str_literal(existing_data_dict['data_types']) - assay_type_desc = _get_combined_assay_type_description(data_types_list) - except requests.exceptions.RequestException as e: - raise requests.exceptions.RequestException(e) + dataset_type = existing_data_dict['dataset_type'] # Get the sample organ name and donor metadata information of this dataset organ_name, donor_metadata = schema_neo4j_queries.get_dataset_organ_and_donor_info(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid']) @@ -1082,7 +1072,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d else: age_race_sex_info = f"{age}-year-old {race} {sex}" - generated_title = f"{assay_type_desc} data from the {organ_desc} of a {age_race_sex_info}" + generated_title = f"{dataset_type} data from the {organ_desc} of a {age_race_sex_info}" return property_key, generated_title From a605af550973fd9c9257fbd933f24f25ae9c02c9 Mon Sep 17 00:00:00 2001 From: Karl Burke Date: Wed, 17 Jan 2024 14:20:46 -0500 Subject: [PATCH 3/3] Generate dataset_type='Publication' for Publication entities, and prohibit dataset_type from being specified on the request. --- src/schema/provenance_schema.yaml | 4 +++- src/schema/schema_triggers.py | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 4416a3cc..902672c4 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -602,12 +602,14 @@ ENTITIES: required_on_create: false description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)." dataset_type: + before_create_trigger: set_publication_dataset_type before_property_create_validators: - validate_recognized_dataset_type before_property_update_validators: - validate_recognized_dataset_type type: string - required_on_create: true #Required for create via POST, not update via PUT + generated: true + immutable: true description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API." collections: type: list diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index b9ee10fb..a197caab 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1681,6 +1681,31 @@ def set_publication_date(property_key, normalized_type, user_token, existing_dat return property_key, date_obj.date().isoformat() +""" +Trigger event method setting the dataset_type immutable property for a Publication. + +Parameters +---------- +property_key : str + The target property key of the value to be generated +normalized_type : str + One of the types defined in the schema yaml: Publication +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used + +Returns +------- +str: The target property key +str: Immutable dataset_type of "Publication" +""" +def set_publication_dataset_type(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): + # Count upon the dataset_type generated: true property in provenance_schema.yaml to assure the + # request does not contain a value which will be overwritten. + return property_key, 'Publication' #################################################################################################### ## Trigger methods specific to Upload - DO NOT RENAME