Skip to content

Commit

Permalink
Merge pull request #646 from hubmapconsortium/Derek-Furst/sync-compon…
Browse files Browse the repository at this point in the history
…ent-datasets

Derek furst/sync component datasets
  • Loading branch information
yuanzhou authored Apr 3, 2024
2 parents a7ad29c + 4bd1561 commit e36a1ab
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def http_internal_server_error(e):
app.config['UUID_API_URL'],
app.config['INGEST_API_URL'],
app.config['ONTOLOGY_API_URL'],
app.config['ENTITY_API_URL'],
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand Down
3 changes: 3 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'
# Works regardless of the trailing slash
ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'

# URL for talking to Entity API (default for DEV)
ENTITY_API_URL = 'https://entity-api.dev.hubmapconsortium.org'

# A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
# Works regardless of the trailing slash /
SEARCH_API_URL_LIST = ['http://search-api:8080']
Expand Down
3 changes: 2 additions & 1 deletion src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -311,11 +311,12 @@ ENTITIES:
- validate_application_header_before_property_update
- validate_dataset_status_value
- validate_status_changed
- validate_dataset_not_component
generated: true
description: "One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete"
before_create_trigger: set_dataset_status_new
after_create_trigger: set_status_history
after_update_trigger: set_status_history
after_update_trigger: update_status
title:
type: string
generated: true # Disallow entry from users via POST
Expand Down
1 change: 1 addition & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class SchemaConstants(object):
ACCESS_LEVEL_CONSORTIUM = 'consortium'
ACCESS_LEVEL_PROTECTED = 'protected'

ENTITY_API_UPDATE_ENDPOINT = '/entities'
UUID_API_ID_ENDPOINT = '/uuid'
INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
Expand Down
25 changes: 25 additions & 0 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
_schema = None
_uuid_api_url = None
_ingest_api_url = None
_entity_api_url = None
_ontology_api_url = None
_auth_helper = None
_neo4j_driver = None
Expand Down Expand Up @@ -69,6 +70,7 @@ def initialize(valid_yaml_file,
uuid_api_url,
ingest_api_url,
ontology_api_url,
entity_api_url,
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand All @@ -78,6 +80,7 @@ def initialize(valid_yaml_file,
global _uuid_api_url
global _ingest_api_url
global _ontology_api_url
global _entity_api_url
global _auth_helper
global _neo4j_driver
global _memcached_client
Expand Down Expand Up @@ -105,6 +108,12 @@ def initialize(valid_yaml_file,
logger.critical(msg=msg)
raise Exception(msg)

if entity_api_url is not None:
_entity_api_url = entity_api_url
else:
msg = f"Unable to initialize schema manager with entity_api_url={entity_api_url}."
logger.critical(msg=msg)
raise Exception(msg)
# Get the helper instances
_auth_helper = auth_helper_instance
_neo4j_driver = neo4j_driver_instance
Expand Down Expand Up @@ -1701,6 +1710,22 @@ def get_ingest_api_url():
return _ingest_api_url


"""
Get the entity-api URL to be used by trigger methods
Returns
-------
str
The entity-api URL
"""


def get_entity_api_url():
global _entity_api_url

return _entity_api_url


"""
Get the AUthHelper instance to be used by trigger methods
Expand Down
59 changes: 59 additions & 0 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,65 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da
return generated_dict


"""
Trigger event method that calls related functions involved with updating the status value
Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""

def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
# execute set_status_history
set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)

#execute sync_component_dataset_status
sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)


"""
Function that changes the status of component datasets when their parent multi-assay dataset's status changes
Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""
def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
uuid = existing_data_dict['uuid']
if 'status' not in existing_data_dict:
raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
status = existing_data_dict['status']
children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid')
status_body = {"status": status}
for child_uuid in children_uuids_list:
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid)
if creation_action == 'Multi-Assay Split':
url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid
header = schema_manager._create_request_headers(user_token)
header[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP
response = requests.put(url=url, headers=header, json=status_body)


####################################################################################################
## Trigger methods specific to Donor - DO NOT RENAME
####################################################################################################
Expand Down
30 changes: 30 additions & 0 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,36 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request
if len(set(target_list)) != len(target_list):
raise ValueError(f"The {property_key} field must only contain unique items")


"""
Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be
updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset
Parameters
----------
property_key : str
The target property key
normalized_type : str
Submission
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""


def validate_dataset_not_component(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
neo4j_driver_instance = schema_manager.get_neo4j_driver_instance()
uuid = existing_data_dict['uuid']
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(neo4j_driver_instance, uuid)
if creation_action == 'Multi-Assay Split':
raise ValueError(f"Unable to modify existing {existing_data_dict['entity_type']}"
f" {existing_data_dict['uuid']}. Can not change status on component datasets directly. Status"
f"change must occur on parent multi-assay split dataset")


"""
If an entity has a DOI, do not allow it to be updated
"""
Expand Down

0 comments on commit e36a1ab

Please sign in to comment.