Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Derek furst/sync component datasets #651

Merged
merged 7 commits into from
Apr 8, 2024
1 change: 1 addition & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def http_internal_server_error(e):
app.config['UUID_API_URL'],
app.config['INGEST_API_URL'],
app.config['ONTOLOGY_API_URL'],
app.config['ENTITY_API_URL'],
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand Down
5 changes: 5 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'
# Works regardless of the trailing slash
ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'

# URL for talking to Entity API (default for Localhost)
# This is the same URL base where entity-api is running. This is useful in places where a call for one entity
# necessitates subsequent calls for other entities.
ENTITY_API_URL = 'http://localhost:5002'

# A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
# Works regardless of the trailing slash /
SEARCH_API_URL_LIST = ['http://search-api:8080']
Expand Down
3 changes: 2 additions & 1 deletion src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -311,11 +311,12 @@ ENTITIES:
- validate_application_header_before_property_update
- validate_dataset_status_value
- validate_status_changed
- validate_dataset_not_component
generated: true
description: "One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete"
before_create_trigger: set_dataset_status_new
after_create_trigger: set_status_history
after_update_trigger: set_status_history
after_update_trigger: update_status
title:
type: string
generated: true # Disallow entry from users via POST
Expand Down
3 changes: 3 additions & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@ class SchemaConstants(object):
MEMCACHED_TTL = 7200

INGEST_API_APP = 'ingest-api'
COMPONENT_DATASET = 'component-dataset'
INGEST_PIPELINE_APP = 'ingest-pipeline'
HUBMAP_APP_HEADER = 'X-Hubmap-Application'
INTERNAL_TRIGGER = 'X-Internal-Trigger'
DATASET_STATUS_PUBLISHED = 'published'

ACCESS_LEVEL_PUBLIC = 'public'
ACCESS_LEVEL_CONSORTIUM = 'consortium'
ACCESS_LEVEL_PROTECTED = 'protected'

ENTITY_API_UPDATE_ENDPOINT = '/entities'
UUID_API_ID_ENDPOINT = '/uuid'
INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
Expand Down
25 changes: 25 additions & 0 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
_schema = None
_uuid_api_url = None
_ingest_api_url = None
_entity_api_url = None
_ontology_api_url = None
_auth_helper = None
_neo4j_driver = None
Expand Down Expand Up @@ -69,6 +70,7 @@ def initialize(valid_yaml_file,
uuid_api_url,
ingest_api_url,
ontology_api_url,
entity_api_url,
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand All @@ -78,6 +80,7 @@ def initialize(valid_yaml_file,
global _uuid_api_url
global _ingest_api_url
global _ontology_api_url
global _entity_api_url
global _auth_helper
global _neo4j_driver
global _memcached_client
Expand Down Expand Up @@ -105,6 +108,12 @@ def initialize(valid_yaml_file,
logger.critical(msg=msg)
raise Exception(msg)

if entity_api_url is not None:
_entity_api_url = entity_api_url
else:
msg = f"Unable to initialize schema manager with entity_api_url={entity_api_url}."
logger.critical(msg=msg)
raise Exception(msg)
# Get the helper instances
_auth_helper = auth_helper_instance
_neo4j_driver = neo4j_driver_instance
Expand Down Expand Up @@ -1701,6 +1710,22 @@ def get_ingest_api_url():
return _ingest_api_url


"""
Get the entity-api URL to be used by trigger methods
Returns
-------
str
The entity-api URL
"""


def get_entity_api_url():
global _entity_api_url

return _entity_api_url


"""
Get the AUthHelper instance to be used by trigger methods
Expand Down
60 changes: 60 additions & 0 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,66 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da
return generated_dict


"""
Trigger event method that calls related functions involved with updating the status value
Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""

def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
# execute set_status_history
set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)

#execute sync_component_dataset_status
sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)


"""
Function that changes the status of component datasets when their parent multi-assay dataset's status changes
Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""
def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
uuid = existing_data_dict['uuid']
if 'status' not in existing_data_dict:
raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
status = existing_data_dict['status']
children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid')
status_body = {"status": status}
for child_uuid in children_uuids_list:
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid)
if creation_action == 'Multi-Assay Split':
url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid
header = schema_manager._create_request_headers(user_token)
header[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP
header[SchemaConstants.INTERNAL_TRIGGER] = SchemaConstants.COMPONENT_DATASET
response = requests.put(url=url, headers=header, json=status_body)


####################################################################################################
## Trigger methods specific to Donor - DO NOT RENAME
####################################################################################################
Expand Down
32 changes: 32 additions & 0 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,38 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request
if len(set(target_list)) != len(target_list):
raise ValueError(f"The {property_key} field must only contain unique items")


"""
Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be
updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset
Parameters
----------
property_key : str
The target property key
normalized_type : str
Submission
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""


def validate_dataset_not_component(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
headers = request.headers
if not headers.get(SchemaConstants.INTERNAL_TRIGGER) == SchemaConstants.COMPONENT_DATASET:
neo4j_driver_instance = schema_manager.get_neo4j_driver_instance()
uuid = existing_data_dict['uuid']
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(neo4j_driver_instance, uuid)
if creation_action == 'Multi-Assay Split':
raise ValueError(f"Unable to modify existing {existing_data_dict['entity_type']}"
f" {existing_data_dict['uuid']}. Can not change status on component datasets directly. Status"
f"change must occur on parent multi-assay split dataset")


"""
If an entity has a DOI, do not allow it to be updated
"""
Expand Down
Loading