Merge branch 'maxsibilla/issue-591' into dev-integrate

sennetconsortium · Nov 25, 2024 · 0ab4133 · 0ab4133
2 parents 997a210 + 8000ee9
commit 0ab4133
Show file tree

Hide file tree

Showing 8 changed files with 96 additions and 75 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.4.18
+1.4.19
diff --git a/ingest-api-spec.yaml b/ingest-api-spec.yaml
@@ -2,7 +2,7 @@ openapi: 3.0.0
 info:
   description: |
     A RESTful web service exposing calls needed for the SenNet Data Sharing Portal.
-  version: 1.4.18
+  version: 1.4.19
   title: SenNet Ingest API
   contact:
     name: SenNet Help Desk

diff --git a/src/lib/dataset_helper.py b/src/lib/dataset_helper.py
@@ -15,7 +15,7 @@
 
 # Local modules
 from hubmap_commons.hubmap_const import HubmapConst
-from hubmap_sdk import EntitySdk
+from hubmap_sdk import EntitySdk, Entity
 
 from lib.file_upload_helper import UploadFileHelper
 from lib.ingest_file_helper import IngestFileHelper
@@ -440,7 +440,7 @@ def get_file_list(self, orig_file_path):
     def dataset_is_primary(self, dataset_uuid):
         with self.neo4j_driver_instance.session() as neo_session:
             q = (
-                f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(s:Sample) RETURN ds.uuid")
+                f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(a:Activity) WHERE toLower(a.creation_action) = 'create dataset activity' RETURN ds.uuid")
             result = neo_session.run(q).data()
             if len(result) == 0:
                 return False

diff --git a/src/lib/services.py b/src/lib/services.py
@@ -1,19 +1,31 @@
+import json
 import logging
 import time
 from typing import Callable, List, Optional, Union
+from urllib import request
 
 import requests
-from flask import current_app
+from flask import current_app, request
 from hubmap_commons.file_helper import removeTrailingSlashURL, ensureTrailingSlashURL
+from hubmap_commons.hm_auth import AuthHelper
 from hubmap_sdk import Entity, EntitySdk, SearchSdk
 from hubmap_sdk.sdk_helper import HTTPException as SDKException
+from rdflib.parser import headers
 from requests.adapters import HTTPAdapter, Retry
 
 logger = logging.getLogger(__name__)
 
 
+def get_token() -> Optional[str]:
+    auth_helper_instance = AuthHelper.instance()
+    token = auth_helper_instance.getAuthorizationTokens(request.headers)
+    if not isinstance(token, str):
+        token = None
+    return token
+
+
 def get_entity(
-    entity_id: str, token: Optional[str], as_dict: bool = False
+        entity_id: str, token: Optional[str], as_dict: bool = False
 ) -> Union[Entity, dict]:
     """Get the entity from entity-api for the given uuid.
 
@@ -48,7 +60,7 @@ def get_entity(
 
 
 def get_entity_from_search_api(
-    entity_id: str, token: Optional[str], as_dict: bool = False
+        entity_id: str, token: Optional[str], as_dict: bool = False
 ) -> Union[Entity, dict]:
     """Get the entity from search-api for the given uuid.
 
@@ -100,7 +112,7 @@ def get_entity_from_search_api(
 
 
 def get_associated_sources_from_dataset(
-    dataset_id: str, token: str, as_dict: bool = False
+        dataset_id: str, token: str, as_dict: bool = False
 ) -> Union[List[Entity], dict]:
     """Get the associated sources for the given dataset.
 
@@ -125,7 +137,9 @@ def get_associated_sources_from_dataset(
     """
     entity_api_url = ensureTrailingSlashURL(current_app.config["ENTITY_WEBSERVICE_URL"])
     url = f"{entity_api_url}datasets/{dataset_id}/sources"
-    headers = {"Authorization": f"Bearer {token}"}
+    headers = {}
+    if token is not None:
+        headers = {"Authorization": f"Bearer {token}"}
     res = requests.get(url, headers=headers)
     if not res.ok:
         raise SDKException(f"Failed to get associated source for dataset {dataset_id}")
@@ -170,12 +184,12 @@ def reindex_entities(entity_ids: list, token: str) -> None:
 
 
 def bulk_update_entities(
-    entity_updates: dict,
-    token: str,
-    total_tries: int = 3,
-    throttle: float = 5,
-    entity_api_url: Optional[str] = None,
-    after_each_callback: Optional[Callable[[int], None]] = None,
+        entity_updates: dict,
+        token: str,
+        total_tries: int = 3,
+        throttle: float = 5,
+        entity_api_url: Optional[str] = None,
+        after_each_callback: Optional[Callable[[int], None]] = None,
 ) -> dict:
     """Bulk update the entities in the entity-api.
 
@@ -253,13 +267,13 @@ def bulk_update_entities(
 
 
 def bulk_create_entities(
-    entity_type: str,
-    entities: list,
-    token: str,
-    total_tries: int = 3,
-    throttle: float = 5,
-    entity_api_url: Optional[str] = None,
-    after_each_callback: Optional[Callable[[int], None]] = None,
+        entity_type: str,
+        entities: list,
+        token: str,
+        total_tries: int = 3,
+        throttle: float = 5,
+        entity_api_url: Optional[str] = None,
+        after_each_callback: Optional[Callable[[int], None]] = None,
 ) -> list:
     """Bulk create the entities in the entity-api.
 
@@ -359,3 +373,37 @@ def error_msg(json_res: dict) -> str:
         return json_res["message"]
 
     return str(json_res)
+
+
+def obj_to_dict(obj) -> dict:
+    """
+    Convert the obj[ect] into a dict, but deeply.
+    Note: The Python builtin 'vars()' does not work here because of the way that some of the classes
+    are defined.
+    """
+    return json.loads(
+        json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
+    )
+
+
+def entity_json_dumps(entity: Entity, token: str, entity_sdk: EntitySdk, to_file: False):
+    """
+    Because entity and the content of the arrays returned from entity_instance.get_associated_*
+    contain user defined objects we need to turn them into simple python objects (e.g., dicts, lists, str)
+    before we can convert them wth json.dumps.
+    Here we create an expanded version of the entity associated with the dataset_uuid and return it as a json string.
+    """
+    dataset_uuid = entity.get_uuid()
+    entity = obj_to_dict(entity)
+    entity['organs'] = obj_to_dict(entity_sdk.get_associated_organs_from_dataset(dataset_uuid))
+    entity['samples'] = obj_to_dict(entity_sdk.get_associated_samples_from_dataset(dataset_uuid))
+    entity['sources'] = get_associated_sources_from_dataset(dataset_uuid, token=token, as_dict=True)
+
+    # Return as a string to be fed into a file
+    if to_file:
+        json_object = json.dumps(entity, indent=4)
+        json_object += '\n'
+        return json_object
+    # Return as a dict for JSON response
+    else:
+        return entity
diff --git a/src/routes/assayclassifier/__init__.py b/src/routes/assayclassifier/__init__.py
@@ -19,7 +19,7 @@
     get_data_from_ubkg,
     standardize_results
 )
-from lib.services import get_entity
+from lib.services import get_entity, get_token
 
 assayclassifier_blueprint = Blueprint("assayclassifier", __name__)
 
@@ -173,10 +173,3 @@ def reload_chain():
         logger.error(e, exc_info=True)
         return Response("Unexpected error while reloading rule chain: " + str(e), 500)
 
-
-def get_token() -> Optional[str]:
-    auth_helper_instance = AuthHelper.instance()
-    token = auth_helper_instance.getAuthorizationTokens(request.headers)
-    if not isinstance(token, str):
-        token = None
-    return token
diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py
@@ -40,7 +40,7 @@
 
 from lib.ontology import Ontology
 from lib.file import get_csv_records, check_upload, files_exist
-from lib.services import get_associated_sources_from_dataset
+from lib.services import get_associated_sources_from_dataset, obj_to_dict, entity_json_dumps
 from jobs.validation.metadata import validate_tsv, determine_schema
 
 entity_CRUD_blueprint = Blueprint('entity_CRUD', __name__)
@@ -695,6 +695,7 @@ def dataset_data_status():
 
 def update_datasets_datastatus(app_context):
     with app_context:
+        dataset_helper = DatasetHelper(current_app.config)
         organ_types_dict = Ontology.ops(as_data_dict=True, key='rui_code', val_key='term').organ_types()
         all_datasets_query = (
             "MATCH (ds:Dataset)-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(ancestor) "
@@ -800,7 +801,7 @@ def update_datasets_datastatus(app_context):
 
             dataset['last_touch'] = dataset['last_touch'] if dataset['published_timestamp'] is None else dataset[
                 'published_timestamp']
-            dataset['is_primary'] = dataset_is_primary(dataset.get('uuid'))
+            dataset['is_primary'] = dataset_helper.dataset_is_primary(dataset.get('uuid'))
 
             has_data = files_exist(dataset.get('uuid'), dataset.get('data_access_level'), dataset.get('group_name'))
             has_dataset_metadata = files_exist(dataset.get('uuid'), dataset.get('data_access_level'),
@@ -959,6 +960,7 @@ def update_uploads_datastatus(app_context):
 def publish_datastage(identifier):
     try:
         auth_helper = AuthHelper.instance()
+        dataset_helper = DatasetHelper(current_app.config)
 
         user_info = auth_helper.getUserInfoUsingRequest(request, getGroups=True)
         if user_info is None:
@@ -981,7 +983,7 @@ def publish_datastage(identifier):
             abort_not_found("Cannot find specimen with identifier: " + identifier)
 
         dataset_uuid = json.loads(r.text)['hm_uuid']
-        is_primary = dataset_is_primary(dataset_uuid)
+        is_primary = dataset_helper.dataset_is_primary(dataset_uuid)
         suspend_indexing_and_acls = string_helper.isYes(request.args.get('suspend-indexing-and-acls'))
         no_indexing_and_acls = False
         if suspend_indexing_and_acls:
@@ -1090,7 +1092,7 @@ def publish_datastage(identifier):
             is_component = entity_dict.get('creation_action') == 'Multi-Assay Split'
             if is_primary or is_component is False:
                 md_file = os.path.join(ds_path, "metadata.json")
-                json_object = entity_json_dumps(entity, auth_tokens, entity_instance)
+                json_object = entity_json_dumps(entity, auth_tokens, entity_instance, True)
                 logger.info(f"publish_datastage; writing metadata.json file: '{md_file}'; containing: '{json_object}'")
                 try:
                     with open(md_file, "w") as outfile:
@@ -1223,17 +1225,6 @@ def dataset_has_entity_lab_processed_data_type(dataset_uuid):
             return False
         return True
 
-
-def dataset_is_primary(dataset_uuid):
-    with Neo4jHelper.get_instance().session() as neo_session:
-        q = (
-            f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(a:Activity) WHERE toLower(a.creation_action) = 'create dataset activity' RETURN ds.uuid")
-        result = neo_session.run(q).data()
-        if len(result) == 0:
-            return False
-        return True
-
-
 def get_primary_ancestor_globus_path(entity_dict):
     ancestor = None
     origin_path = None
@@ -1500,32 +1491,3 @@ def get_entity_type_instanceof(type_a, type_b, auth_header=None) -> bool:
 
     resp_json: dict = response.json()
     return resp_json['instanceof']
-
-
-def obj_to_dict(obj) -> dict:
-    """
-    Convert the obj[ect] into a dict, but deeply.
-    Note: The Python builtin 'vars()' does not work here because of the way that some of the classes
-    are defined.
-    """
-    return json.loads(
-        json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
-    )
-
-
-def entity_json_dumps(entity: Entity, token: str, entity_sdk: EntitySdk) -> str:
-    """
-    Because entity and the content of the arrays returned from entity_instance.get_associated_*
-    contain user defined objects we need to turn them into simple python objects (e.g., dicts, lists, str)
-    before we can convert them wth json.dumps.
-    Here we create an expanded version of the entity associated with the dataset_uuid and return it as a json string.
-    """
-    dataset_uuid = entity.get_uuid()
-    entity = obj_to_dict(entity)
-    entity['organs'] = obj_to_dict(entity_sdk.get_associated_organs_from_dataset(dataset_uuid))
-    entity['samples'] = obj_to_dict(entity_sdk.get_associated_samples_from_dataset(dataset_uuid))
-    entity['sources'] = get_associated_sources_from_dataset(dataset_uuid, token=token, as_dict=True)
-
-    json_object = json.dumps(entity, indent=4)
-    json_object += '\n'
-    return json_object
diff --git a/src/routes/metadata/__init__.py b/src/routes/metadata/__init__.py
@@ -22,6 +22,7 @@
 from atlas_consortia_commons.string import equals
 from flask import Blueprint, jsonify, Response, current_app
 from hubmap_commons.hm_auth import AuthHelper
+from hubmap_sdk import EntitySdk
 from rq.job import Job, JobStatus, NoSuchJobError
 
 from jobs import (
@@ -40,6 +41,7 @@
 from jobs.registration.metadata import register_uploaded_metadata
 from jobs.validation.metadata import validate_uploaded_metadata
 from lib.file import check_upload, get_base_path, get_csv_records, set_file_details
+from lib.services import obj_to_dict, entity_json_dumps, get_token
 from lib.ontology import Ontology
 from lib.request_validation import get_validated_job_id, get_validated_referrer
 
@@ -199,6 +201,22 @@ def get_all_data_provider_groups(token: str, user: User):
         return Response("Unexpected error while fetching group list: " + str(e) + "  Check the logs", 500)
 
 
+@metadata_blueprint.route('/metadata/provenance-metadata/<ds_uuid>', methods=['GET'])
+def get_provenance_metadata(ds_uuid: str):
+    try:
+        token = get_token()
+        entity_instance = EntitySdk(token=token, service_url=current_app.config['ENTITY_WEBSERVICE_URL'])
+        entity = entity_instance.get_entity_by_id(ds_uuid)
+        metadata_json_object = entity_json_dumps(entity, token, entity_instance, False)
+        return jsonify(metadata_json_object), 200
+    except Exception as e:
+        logger.error(e, exc_info=True)
+        return Response(
+            f"Unexpected error while retrieving entity {ds_uuid}: " + str(e), 500
+        )
+
+
+
 def check_metadata_upload():
     """Checks the uploaded file.
 

diff --git a/src/submodules/ingest_validation_tools b/src/submodules/ingest_validation_tools
+1 −0		CHANGELOG.md
+25 −1		docs/codex/current/index.md
+88 −0		src/ingest_validation_tools/directory-schemas/codex-v2.2.yaml