Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Derek furst/sync component datasets #646

Merged
merged 8 commits into from
Apr 3, 2024
26 changes: 26 additions & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application
on:
push:
branches: [ "main", "dev-integrate" ]
pull_request:
branches: [ "main", "dev-integrate" ]
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
- name: Upgrade Pip
run: python -m pip install --upgrade pip
working-directory: src
- name: Install Dependencies
run: pip install -r requirements.txt
working-directory: src
1 change: 1 addition & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def http_internal_server_error(e):
app.config['UUID_API_URL'],
app.config['INGEST_API_URL'],
app.config['ONTOLOGY_API_URL'],
app.config['ENTITY_API_URL'],
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand Down
3 changes: 3 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'
# Works regardless of the trailing slash
ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'

# URL for talking to Entity API (default for DEV)
ENTITY_API_URL = 'https://entity-api.dev.hubmapconsortium.org'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even though this is instructional, we should use localhost URL rather than DEV. Add some comment to explain this is the same URL base where the entity-api is running. Because if someone uses the DEV URL and runs the entity-api locally, it'll end up updating the component datasets in DEV neo4j from the local trigger requests. The key is to keep it consistent. And once deployed on DEV/TEST/PROD, we can still use a localhost:port where the entity-api runs on the VM.


# A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
# Works regardless of the trailing slash /
SEARCH_API_URL_LIST = ['http://search-api:8080']
Expand Down
7 changes: 4 additions & 3 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -311,11 +311,12 @@ ENTITIES:
- validate_application_header_before_property_update
- validate_dataset_status_value
- validate_status_changed
- validate_dataset_not_component
generated: true
description: "One of: New|Processing|QA|Published|Error|Hold|Invalid|Submitted"
description: "One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete"
before_create_trigger: set_dataset_status_new
after_create_trigger: set_status_history
after_update_trigger: set_status_history
after_update_trigger: update_status
title:
type: string
generated: true # Disallow entry from users via POST
Expand Down Expand Up @@ -955,7 +956,7 @@ ENTITIES:
- validate_status_changed
type: string
generated: true
description: "One of: New|Valid|Invalid|Error|Reorganized|Processing"
description: "One of: New|Valid|Invalid|Error|Reorganized|Processing|Submitted|Incomplete"
# Trigger method will set the status to "New" on create
before_create_trigger: set_upload_status_new
after_create_trigger: set_status_history
Expand Down
1 change: 1 addition & 0 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class SchemaConstants(object):
ACCESS_LEVEL_CONSORTIUM = 'consortium'
ACCESS_LEVEL_PROTECTED = 'protected'

ENTITY_API_UPDATE_ENDPOINT = '/entities'
UUID_API_ID_ENDPOINT = '/uuid'
INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
Expand Down
25 changes: 25 additions & 0 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
_schema = None
_uuid_api_url = None
_ingest_api_url = None
_entity_api_url = None
_ontology_api_url = None
_auth_helper = None
_neo4j_driver = None
Expand Down Expand Up @@ -69,6 +70,7 @@ def initialize(valid_yaml_file,
uuid_api_url,
ingest_api_url,
ontology_api_url,
entity_api_url,
auth_helper_instance,
neo4j_driver_instance,
memcached_client_instance,
Expand All @@ -78,6 +80,7 @@ def initialize(valid_yaml_file,
global _uuid_api_url
global _ingest_api_url
global _ontology_api_url
global _entity_api_url
global _auth_helper
global _neo4j_driver
global _memcached_client
Expand Down Expand Up @@ -105,6 +108,12 @@ def initialize(valid_yaml_file,
logger.critical(msg=msg)
raise Exception(msg)

if entity_api_url is not None:
_entity_api_url = entity_api_url
else:
msg = f"Unable to initialize schema manager with entity_api_url={entity_api_url}."
logger.critical(msg=msg)
raise Exception(msg)
# Get the helper instances
_auth_helper = auth_helper_instance
_neo4j_driver = neo4j_driver_instance
Expand Down Expand Up @@ -1701,6 +1710,22 @@ def get_ingest_api_url():
return _ingest_api_url


"""
Get the entity-api URL to be used by trigger methods

Returns
-------
str
The entity-api URL
"""


def get_entity_api_url():
global _entity_api_url

return _entity_api_url


"""
Get the AUthHelper instance to be used by trigger methods

Expand Down
59 changes: 59 additions & 0 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,65 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da
return generated_dict


"""
Trigger event method that calls related functions involved with updating the status value

Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""

def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
# execute set_status_history
set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)

#execute sync_component_dataset_status
sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)


"""
Function that changes the status of component datasets when their parent multi-assay dataset's status changes

Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml: Dataset
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used
"""
def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
if 'uuid' not in existing_data_dict:
raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
uuid = existing_data_dict['uuid']
if 'status' not in existing_data_dict:
raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
status = existing_data_dict['status']
children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid')
status_body = {"status": status}
for child_uuid in children_uuids_list:
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid)
if creation_action == 'Multi-Assay Split':
url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid
header = schema_manager._create_request_headers(user_token)
header[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP
response = requests.put(url=url, headers=header, json=status_body)


####################################################################################################
## Trigger methods specific to Donor - DO NOT RENAME
####################################################################################################
Expand Down
40 changes: 37 additions & 3 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,36 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request
if len(set(target_list)) != len(target_list):
raise ValueError(f"The {property_key} field must only contain unique items")


"""
Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be
updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset

Parameters
----------
property_key : str
The target property key
normalized_type : str
Submission
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""


def validate_dataset_not_component(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
neo4j_driver_instance = schema_manager.get_neo4j_driver_instance()
uuid = existing_data_dict['uuid']
creation_action = schema_neo4j_queries.get_entity_creation_action_activity(neo4j_driver_instance, uuid)
if creation_action == 'Multi-Assay Split':
raise ValueError(f"Unable to modify existing {existing_data_dict['entity_type']}"
f" {existing_data_dict['uuid']}. Can not change status on component datasets directly. Status"
f"change must occur on parent multi-assay split dataset")


"""
If an entity has a DOI, do not allow it to be updated
"""
Expand Down Expand Up @@ -279,7 +309,9 @@ def validate_application_header_before_property_update(property_key, normalized_
"""
def validate_dataset_status_value(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
# Use lowercase for comparison
accepted_status_values = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']
accepted_status_values = [
'new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete'
]
new_status = new_data_dict[property_key].lower()

if new_status not in accepted_status_values:
Expand Down Expand Up @@ -455,7 +487,9 @@ def validate_retracted_dataset_sub_status_value(property_key, normalized_entity_
"""
def validate_upload_status_value(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
# Use lowercase for comparison
accepted_status_values = ['new', 'valid', 'invalid', 'error', 'reorganized', 'processing', 'submitted']
accepted_status_values = [
'new', 'valid', 'invalid', 'error', 'reorganized', 'processing', 'submitted', 'incomplete'
]
new_status = new_data_dict[property_key].lower()

if new_status not in accepted_status_values:
Expand All @@ -480,7 +514,7 @@ def validate_upload_status_value(property_key, normalized_entity_type, request,
"""
def validate_sample_category(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
defined_tissue_types = ["organ", "block", "section", "suspension"]
sample_category = new_data_dict[property_key]
sample_category = new_data_dict[property_key].lower()

if sample_category not in defined_tissue_types:
raise ValueError(f"Invalid sample_category: {sample_category}")
Expand Down
Loading