hubmapconsortium · yuanzhou · Apr 3, 2024 · Mar 13, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -0,0 +1,26 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python application
+on:
+  push:
+    branches: [ "main", "dev-integrate" ]
+  pull_request:
+    branches: [ "main", "dev-integrate" ]
+permissions:
+  contents: read
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.9
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.9"
+    - name: Upgrade Pip
+      run: python -m pip install --upgrade pip
+      working-directory: src
+    - name: Install Dependencies
+      run: pip install -r requirements.txt
+      working-directory: src
diff --git a/src/app.py b/src/app.py
@@ -204,6 +204,7 @@ def http_internal_server_error(e):
                               app.config['UUID_API_URL'],
                               app.config['INGEST_API_URL'],
                               app.config['ONTOLOGY_API_URL'],
+                              app.config['ENTITY_API_URL'],
                               auth_helper_instance,
                               neo4j_driver_instance,
                               memcached_client_instance,

diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example
@@ -32,6 +32,9 @@ INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'
 # Works regardless of the trailing slash
 ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'
 
+# URL for talking to Entity API (default for DEV)
+ENTITY_API_URL = 'https://entity-api.dev.hubmapconsortium.org'
+
 # A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
 # Works regardless of the trailing slash /
 SEARCH_API_URL_LIST = ['http://search-api:8080']

diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
@@ -311,11 +311,12 @@ ENTITIES:
           - validate_application_header_before_property_update
           - validate_dataset_status_value
           - validate_status_changed
+          - validate_dataset_not_component
         generated: true
-        description: "One of: New|Processing|QA|Published|Error|Hold|Invalid|Submitted"
+        description: "One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete"
         before_create_trigger: set_dataset_status_new
         after_create_trigger: set_status_history
-        after_update_trigger: set_status_history
+        after_update_trigger: update_status
       title:
         type: string
         generated: true # Disallow entry from users via POST
@@ -955,7 +956,7 @@ ENTITIES:
           - validate_status_changed
         type: string
         generated: true
-        description: "One of: New|Valid|Invalid|Error|Reorganized|Processing"
+        description: "One of: New|Valid|Invalid|Error|Reorganized|Processing|Submitted|Incomplete"
         # Trigger method will set the status to "New" on create
         before_create_trigger: set_upload_status_new
         after_create_trigger: set_status_history

diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py
@@ -11,6 +11,7 @@ class SchemaConstants(object):
     ACCESS_LEVEL_CONSORTIUM = 'consortium'
     ACCESS_LEVEL_PROTECTED = 'protected'
 
+    ENTITY_API_UPDATE_ENDPOINT = '/entities'
     UUID_API_ID_ENDPOINT = '/uuid'
     INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
     INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'

diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
@@ -31,6 +31,7 @@
 _schema = None
 _uuid_api_url = None
 _ingest_api_url = None
+_entity_api_url = None
 _ontology_api_url = None
 _auth_helper = None
 _neo4j_driver = None
@@ -69,6 +70,7 @@ def initialize(valid_yaml_file,
                uuid_api_url,
                ingest_api_url,
                ontology_api_url,
+               entity_api_url,
                auth_helper_instance,
                neo4j_driver_instance,
                memcached_client_instance,
@@ -78,6 +80,7 @@ def initialize(valid_yaml_file,
     global _uuid_api_url
     global _ingest_api_url
     global _ontology_api_url
+    global _entity_api_url
     global _auth_helper
     global _neo4j_driver
     global _memcached_client
@@ -105,6 +108,12 @@ def initialize(valid_yaml_file,
         logger.critical(msg=msg)
         raise Exception(msg)
 
+    if entity_api_url is not None:
+        _entity_api_url = entity_api_url
+    else:
+        msg = f"Unable to initialize schema manager with entity_api_url={entity_api_url}."
+        logger.critical(msg=msg)
+        raise Exception(msg)
     # Get the helper instances
     _auth_helper = auth_helper_instance
     _neo4j_driver = neo4j_driver_instance
@@ -1701,6 +1710,22 @@ def get_ingest_api_url():
     return _ingest_api_url
 
 
+"""
+Get the entity-api URL to be used by trigger methods
+
+Returns
+-------
+str
+    The entity-api URL
+"""
+
+
+def get_entity_api_url():
+    global _entity_api_url
+
+    return _entity_api_url
+
+
 """
 Get the AUthHelper instance to be used by trigger methods
 

diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
@@ -1402,6 +1402,65 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da
     return generated_dict
 
 
+"""
+Trigger event method that calls related functions involved with updating the status value 
+
+Parameters
+----------
+property_key : str
+    The target property key
+normalized_type : str
+    One of the types defined in the schema yaml: Dataset
+user_token: str
+    The user's globus nexus token
+existing_data_dict : dict
+    A dictionary that contains all existing entity properties
+new_data_dict : dict
+    A merged dictionary that contains all possible input data to be used
+"""
+
+def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
+    # execute set_status_history
+    set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)
+
+    #execute sync_component_dataset_status
+    sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict)
+
+
+"""
+Function that changes the status of component datasets when their parent multi-assay dataset's status changes 
+
+Parameters
+----------
+property_key : str
+    The target property key
+normalized_type : str
+    One of the types defined in the schema yaml: Dataset
+user_token: str
+    The user's globus nexus token
+existing_data_dict : dict
+    A dictionary that contains all existing entity properties
+new_data_dict : dict
+    A merged dictionary that contains all possible input data to be used
+"""
+def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
+    if 'uuid' not in existing_data_dict:
+        raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
+    uuid = existing_data_dict['uuid']
+    if 'status' not in existing_data_dict:
+        raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.")
+    status = existing_data_dict['status']
+    children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid')
+    status_body = {"status": status}
+    for child_uuid in children_uuids_list:
+        creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid)
+        if creation_action == 'Multi-Assay Split':
+            url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid
+            header = schema_manager._create_request_headers(user_token)
+            header[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP
+            response = requests.put(url=url, headers=header, json=status_body)
+
+
 ####################################################################################################
 ## Trigger methods specific to Donor - DO NOT RENAME
 ####################################################################################################

diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py
@@ -95,6 +95,36 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request
     if len(set(target_list)) != len(target_list):
         raise ValueError(f"The {property_key} field must only contain unique items")
 
+
+"""
+Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be 
+updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset
+
+Parameters
+----------
+property_key : str
+    The target property key
+normalized_type : str
+    Submission
+request: Flask request object
+    The instance of Flask request passed in from application request
+existing_data_dict : dict
+    A dictionary that contains all existing entity properties
+new_data_dict : dict
+    The json data in request body, already after the regular validations
+"""
+
+
+def validate_dataset_not_component(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
+    neo4j_driver_instance = schema_manager.get_neo4j_driver_instance()
+    uuid = existing_data_dict['uuid']
+    creation_action = schema_neo4j_queries.get_entity_creation_action_activity(neo4j_driver_instance, uuid)
+    if creation_action == 'Multi-Assay Split':
+        raise ValueError(f"Unable to modify existing {existing_data_dict['entity_type']}"
+                         f" {existing_data_dict['uuid']}. Can not change status on component datasets directly. Status"
+                         f"change must occur on parent multi-assay split dataset")
+
+
 """
 If an entity has a DOI, do not allow it to be updated 
 """
@@ -279,7 +309,9 @@ def validate_application_header_before_property_update(property_key, normalized_
 """
 def validate_dataset_status_value(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
     # Use lowercase for comparison
-    accepted_status_values = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']
+    accepted_status_values = [
+        'new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete'
+    ]
     new_status = new_data_dict[property_key].lower()
 
     if new_status not in accepted_status_values:
@@ -455,7 +487,9 @@ def validate_retracted_dataset_sub_status_value(property_key, normalized_entity_
 """
 def validate_upload_status_value(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
     # Use lowercase for comparison
-    accepted_status_values = ['new', 'valid', 'invalid', 'error', 'reorganized', 'processing', 'submitted']
+    accepted_status_values = [
+        'new', 'valid', 'invalid', 'error', 'reorganized', 'processing', 'submitted', 'incomplete'
+    ]
     new_status = new_data_dict[property_key].lower()
 
     if new_status not in accepted_status_values:
@@ -480,7 +514,7 @@ def validate_upload_status_value(property_key, normalized_entity_type, request,
 """
 def validate_sample_category(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
     defined_tissue_types = ["organ", "block", "section", "suspension"]
-    sample_category = new_data_dict[property_key]
+    sample_category = new_data_dict[property_key].lower()
 
     if sample_category not in defined_tissue_types:
         raise ValueError(f"Invalid sample_category: {sample_category}")