Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tjmadonna/314 synchronize component dataset status #329

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 22 additions & 16 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,18 @@ def close_neo4j_driver(error):
try:
# The schema_manager is a singleton module
# Pass in auth_helper_instance, neo4j_driver instance, and file_upload_helper instance
schema_manager.initialize(app.config['SCHEMA_YAML_FILE'],
app.config['UUID_API_URL'],
app.config['INGEST_API_URL'],
app.config['SEARCH_API_URL'],
auth_helper_instance,
neo4j_driver_instance,
app.ubkg,
memcached_client_instance,
app.config['MEMCACHED_PREFIX'])
schema_manager.initialize(
valid_yaml_file=app.config['SCHEMA_YAML_FILE'],
uuid_api_url=app.config['UUID_API_URL'],
entity_api_url=app.config['ENTITY_API_URL'],
ingest_api_url=app.config['INGEST_API_URL'],
search_api_url=app.config['SEARCH_API_URL'],
auth_helper_instance=auth_helper_instance,
neo4j_driver_instance=neo4j_driver_instance,
ubkg_instance=app.ubkg,
memcached_client_instance=memcached_client_instance,
memcached_prefix=app.config['MEMCACHED_PREFIX']
)

logger.info("Initialized schema_manager module successfully :)")
# Use a broad catch-all here
Expand Down Expand Up @@ -1229,6 +1232,10 @@ def update_entity(id):
normalized_status = schema_manager.normalize_status(json_data_dict["status"])
json_data_dict["status"] = normalized_status

has_updated_status = False
if 'status' in json_data_dict and json_data_dict['status']:
has_updated_status = True

# Normalize user provided status
if "sub_status" in json_data_dict:
normalized_status = schema_manager.normalize_status(json_data_dict["sub_status"])
Expand All @@ -1240,7 +1247,6 @@ def update_entity(id):
# Normalize user provided entity_type
normalized_entity_type = schema_manager.normalize_entity_type(entity_dict['entity_type'])


verify_ubkg_properties(json_data_dict)

# Note, we don't support entity level validators on entity update via PUT
Expand All @@ -1249,7 +1255,7 @@ def update_entity(id):
# Validate request json against the yaml schema
# Pass in the entity_dict for missing required key check, this is different from creating new entity
try:
schema_manager.validate_json_data_against_schema('ENTITIES', json_data_dict, normalized_entity_type, existing_entity_dict = entity_dict)
schema_manager.validate_json_data_against_schema('ENTITIES', json_data_dict, normalized_entity_type, existing_entity_dict=entity_dict)
except schema_errors.SchemaValidationException as e:
# No need to log the validation errors
abort_bad_req(str(e))
Expand Down Expand Up @@ -1284,7 +1290,7 @@ def update_entity(id):
# Generate 'before_update_triiger' data and update the entity details in Neo4j
merged_updated_dict = update_object_details('ENTITIES', request, normalized_entity_type, user_token, json_data_dict, entity_dict)

# Handle linkages update via `after_update_trigger` methods
# Handle linkages update via `after_update_trigger` methods
if has_direct_ancestor_uuid:
after_update(normalized_entity_type, user_token, merged_updated_dict)
elif normalized_entity_type in ['Dataset', 'Publication']:
Expand All @@ -1302,7 +1308,7 @@ def update_entity(id):
merged_updated_dict = update_object_details('ENTITIES', request, normalized_entity_type, user_token, json_data_dict, entity_dict)

# Handle linkages update via `after_update_trigger` methods
if has_direct_ancestor_uuids:
if has_direct_ancestor_uuids or has_updated_status:
after_update(normalized_entity_type, user_token, merged_updated_dict)
elif normalized_entity_type == 'Upload':
has_dataset_uuids_to_link = False
Expand Down Expand Up @@ -1334,11 +1340,11 @@ def update_entity(id):
merged_updated_dict = update_object_details('ENTITIES', request, normalized_entity_type, user_token, json_data_dict, entity_dict)

# Handle linkages update via `after_update_trigger` methods
if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink:
if has_dataset_uuids_to_link or has_updated_status:
after_update(normalized_entity_type, user_token, merged_updated_dict)
elif normalized_entity_type == 'Collection':
entity_visibility = _get_entity_visibility( normalized_entity_type=normalized_entity_type
,entity_dict=entity_dict)
entity_visibility = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=entity_dict)

# Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI.
if entity_visibility == DataVisibilityEnum.PUBLIC:
logger.info(f"Attempt to update {normalized_entity_type} with id={id} which has visibility {entity_visibility}.")
Expand Down
5 changes: 5 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ UUID_API_URL = 'http://uuid-api:8080'
# Works regardless of the trailing slash
INGEST_API_URL = 'http://ingest-api:8080'

# URL for talking to Entity API (default for Localhost)
# This is the same URL base where entity-api is running. This is useful in places where a call for one entity
# necessitates subsequent calls for other entities.
ENTITY_API_URL = 'http://localhost:5002'

# URL for talking to Search API (default value used for docker deployment, no token needed)
# Don't use localhost since search-api is running on a different container
# Point to remote URL for non-docker development
Expand Down
4 changes: 3 additions & 1 deletion src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -369,11 +369,13 @@ ENTITIES:
before_property_update_validators:
- validate_application_header_before_property_update
- validate_dataset_status_value
- validate_status_changed
- validate_dataset_not_component
generated: true
description: "One of: New|Processing|QA|Published|Error|Hold|Invalid|Incomplete"
before_create_trigger: set_dataset_status_new
after_create_trigger: set_status_history
after_update_trigger: set_status_history
after_update_trigger: update_status
status_history:
type: list
description: "A list of all status change events. Each entry in the list is a dictionary containing the change_timestamp, changed_by_email, previous_status, new_status"
Expand Down
5 changes: 5 additions & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
from enum import Enum


class SchemaConstants(object):
# Expire the request cache after the time-to-live (seconds), default 4 hours
REQUEST_CACHE_TTL = 14400
MEMCACHED_TTL = 7200

# Constants used by validators
INGEST_API_APP = 'ingest-api'
COMPONENT_DATASET = 'component-dataset'
INGEST_PIPELINE_APP = 'ingest-pipeline'
INGEST_PORTAL_APP = 'portal-ui'
# HTTP header names are case-insensitive
SENNET_APP_HEADER = 'X-SenNet-Application'
INTERNAL_TRIGGER = 'X-Internal-Trigger'
DATASET_STATUS_PUBLISHED = 'published'

# Used by triggers, all lowercase for easy comparision
Expand All @@ -25,6 +29,7 @@ class SchemaConstants(object):
ALLOWED_DATASET_STATUSES = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete']
ALLOWED_UPLOAD_STATUSES = ['new', 'valid', 'invalid', 'error', 'reorganized', 'processing', 'submitted', 'incomplete']


# Define an enumeration to classify an entity's visibility, which can be combined with
# authorization info when verify operations on a request.
class DataVisibilityEnum(Enum):
Expand Down
31 changes: 26 additions & 5 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import yaml
import logging
import requests
from flask import Response
from datetime import datetime

from flask import Response
from hubmap_commons.file_helper import ensureTrailingSlashURL

# Don't confuse urllib (Python native library) with urllib3 (3rd-party library, requests also uses urllib3)
from requests.packages.urllib3.exceptions import InsecureRequestWarning

Expand All @@ -15,9 +17,6 @@
from schema.schema_constants import SchemaConstants
from schema import schema_neo4j_queries

# HuBMAP commons
from hubmap_commons.hm_auth import AuthHelper

# Atlas Consortia commons
from atlas_consortia_commons.rest import *

Expand All @@ -36,6 +35,7 @@
# A single leading underscore means you're not supposed to access it "from the outside"
_schema = None
_uuid_api_url = None
_entity_api_url = None
_ingest_api_url = None
_search_api_url = None
_auth_helper = None
Expand Down Expand Up @@ -66,6 +66,7 @@

def initialize(valid_yaml_file,
uuid_api_url,
entity_api_url,
ingest_api_url,
search_api_url,
auth_helper_instance,
Expand All @@ -77,6 +78,7 @@ def initialize(valid_yaml_file,
# Specify as module-scope variables
global _schema
global _uuid_api_url
global _entity_api_url
global _ingest_api_url
global _search_api_url
global _auth_helper
Expand All @@ -93,6 +95,13 @@ def initialize(valid_yaml_file,
_ingest_api_url = ingest_api_url
_search_api_url = search_api_url

if entity_api_url is not None:
_entity_api_url = entity_api_url
else:
msg = f"Unable to initialize schema manager with entity_api_url={entity_api_url}."
logger.critical(msg=msg)
raise Exception(msg)

# Get the helper instances
_auth_helper = auth_helper_instance
_neo4j_driver = neo4j_driver_instance
Expand Down Expand Up @@ -2032,4 +2041,16 @@ def delete_memcached_cache(uuids_list):

_memcached_client.delete_many(cache_keys)

logger.info(f"Deleted cache by key: {', '.join(cache_keys)}")
logger.info(f"Deleted cache by key: {', '.join(cache_keys)}")


def get_entity_api_url():
""" Get the entity-api URL to be used by trigger methods.

Returns
-------
str
The entity-api URL ending with a trailing slash
"""
global _entity_api_url
return ensureTrailingSlashURL(_entity_api_url)
70 changes: 68 additions & 2 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1824,6 +1824,71 @@ def set_was_derived_from(property_key, normalized_type, user_token, existing_dat
raise


def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
"""
Trigger event method that calls related functions involved with updating the status value

Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""

# execute set_status_history
set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)

# execute sync_component_dataset_status
sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)


def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
"""
Function that changes the status of component datasets when their parent multi-assay dataset's status changes.

Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""

if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
uuid = existing_data_dict['uuid']
if 'status' not in existing_data_dict:
raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
status = existing_data_dict['status']
children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid')
status_body = {"status": status}

for child_uuid in children_uuids_list:
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid)
if creation_action == 'Multi-Assay Split':
# Update the status of the child entities
url = schema_manager.get_entity_api_url() + 'entities/' + child_uuid
header = schema_manager._create_request_headers(user_token)
header[SchemaConstants.SENNET_APP_HEADER] = SchemaConstants.INGEST_API_APP
header[SchemaConstants.INTERNAL_TRIGGER] = SchemaConstants.COMPONENT_DATASET
response = requests.put(url=url, headers=header, json=status_body)
if response.status_code != 200:
logger.error(f"Failed to update status of child entity {child_uuid} when parent dataset status changed: {response.text}")


####################################################################################################
## Trigger methods specific to Collection - DO NOT RENAME
####################################################################################################
Expand Down Expand Up @@ -2801,6 +2866,7 @@ def source_metadata_display_value(metadata_item: dict) -> str:
####################################################################################################
## Trigger methods shared by Dataset, Upload, and Publication - DO NOT RENAME
####################################################################################################

def set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
new_status_history = []
status_entry = {}
Expand Down Expand Up @@ -2839,5 +2905,5 @@ def set_status_history(property_key, normalized_type, user_token, existing_data_
new_status_history.append(status_entry)
entity_data_dict = {"status_history": new_status_history}

schema_neo4j_queries.update_entity(schema_manager.get_neo4j_driver_instance(), normalized_type, entity_data_dict,
uuid)
schema_neo4j_queries.update_entity(schema_manager.get_neo4j_driver_instance(), normalized_type, entity_data_dict, uuid)

55 changes: 55 additions & 0 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,61 @@ def validate_group_name(property_key, normalized_entity_type, request, existing_
raise ValueError("Invalid group in 'assigned_to_group_name'. Must be a data provider")


def validate_status_changed(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
"""
Validate that status, if included in new_data_dict, is different from the existing status value

Parameters
----------
property_key : str
The target property key
normalized_type : str
Submission
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""

if 'status' not in existing_data_dict:
raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'validate_status_changed()' validator method.")

# Only allow 'status' in new_data_dict if its different than the existing status value
if existing_data_dict['status'].lower() == new_data_dict['status'].lower():
raise ValueError(f"Status value is already {existing_data_dict['status']}, cannot change to {existing_data_dict['status']}. If no change, do not include status field in update")


def validate_dataset_not_component(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
"""
Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be
updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset

Parameters
----------
property_key : str
The target property key
normalized_type : str
Submission
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""
headers = request.headers
if headers.get(SchemaConstants.INTERNAL_TRIGGER) != SchemaConstants.COMPONENT_DATASET:
neo4j_driver_instance = schema_manager.get_neo4j_driver_instance()
uuid = existing_data_dict['uuid']
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(neo4j_driver_instance, uuid)
if creation_action == 'Multi-Assay Split':
raise ValueError(f"Unable to modify existing {existing_data_dict['entity_type']}"
f" {existing_data_dict['uuid']}. Can not change status on component datasets directly. Status"
f"change must occur on parent multi-assay split dataset")


####################################################################################################
## Internal Functions
####################################################################################################
Expand Down
5 changes: 5 additions & 0 deletions test/config/app.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ UUID_API_URL = 'http://uuid-api:8080'
# Works regardless of the trailing slash
INGEST_API_URL = 'http://ingest-api:8080'

# URL for talking to Entity API (default for Localhost)
# This is the same URL base where entity-api is running. This is useful in places where a call for one entity
# necessitates subsequent calls for other entities.
ENTITY_API_URL = 'http://entity-api:5002'

# URL for talking to Search API (default value used for docker deployment, no token needed)
# Don't use localhost since search-api is running on a different container
# Point to remote URL for non-docker development
Expand Down
Loading