Skip to content

Commit

Permalink
Merge branch 'maxsibilla/issue-591' into dev-integrate
Browse files Browse the repository at this point in the history
  • Loading branch information
maxsibilla committed Nov 25, 2024
2 parents 997a210 + 8000ee9 commit 0ab4133
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 75 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.4.18
1.4.19
2 changes: 1 addition & 1 deletion ingest-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ openapi: 3.0.0
info:
description: |
A RESTful web service exposing calls needed for the SenNet Data Sharing Portal.
version: 1.4.18
version: 1.4.19
title: SenNet Ingest API
contact:
name: SenNet Help Desk
Expand Down
4 changes: 2 additions & 2 deletions src/lib/dataset_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# Local modules
from hubmap_commons.hubmap_const import HubmapConst
from hubmap_sdk import EntitySdk
from hubmap_sdk import EntitySdk, Entity

from lib.file_upload_helper import UploadFileHelper
from lib.ingest_file_helper import IngestFileHelper
Expand Down Expand Up @@ -440,7 +440,7 @@ def get_file_list(self, orig_file_path):
def dataset_is_primary(self, dataset_uuid):
with self.neo4j_driver_instance.session() as neo_session:
q = (
f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(s:Sample) RETURN ds.uuid")
f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(a:Activity) WHERE toLower(a.creation_action) = 'create dataset activity' RETURN ds.uuid")
result = neo_session.run(q).data()
if len(result) == 0:
return False
Expand Down
84 changes: 66 additions & 18 deletions src/lib/services.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
import json
import logging
import time
from typing import Callable, List, Optional, Union
from urllib import request

import requests
from flask import current_app
from flask import current_app, request
from hubmap_commons.file_helper import removeTrailingSlashURL, ensureTrailingSlashURL
from hubmap_commons.hm_auth import AuthHelper
from hubmap_sdk import Entity, EntitySdk, SearchSdk
from hubmap_sdk.sdk_helper import HTTPException as SDKException
from rdflib.parser import headers
from requests.adapters import HTTPAdapter, Retry

logger = logging.getLogger(__name__)


def get_token() -> Optional[str]:
auth_helper_instance = AuthHelper.instance()
token = auth_helper_instance.getAuthorizationTokens(request.headers)
if not isinstance(token, str):
token = None
return token


def get_entity(
entity_id: str, token: Optional[str], as_dict: bool = False
entity_id: str, token: Optional[str], as_dict: bool = False
) -> Union[Entity, dict]:
"""Get the entity from entity-api for the given uuid.
Expand Down Expand Up @@ -48,7 +60,7 @@ def get_entity(


def get_entity_from_search_api(
entity_id: str, token: Optional[str], as_dict: bool = False
entity_id: str, token: Optional[str], as_dict: bool = False
) -> Union[Entity, dict]:
"""Get the entity from search-api for the given uuid.
Expand Down Expand Up @@ -100,7 +112,7 @@ def get_entity_from_search_api(


def get_associated_sources_from_dataset(
dataset_id: str, token: str, as_dict: bool = False
dataset_id: str, token: str, as_dict: bool = False
) -> Union[List[Entity], dict]:
"""Get the associated sources for the given dataset.
Expand All @@ -125,7 +137,9 @@ def get_associated_sources_from_dataset(
"""
entity_api_url = ensureTrailingSlashURL(current_app.config["ENTITY_WEBSERVICE_URL"])
url = f"{entity_api_url}datasets/{dataset_id}/sources"
headers = {"Authorization": f"Bearer {token}"}
headers = {}
if token is not None:
headers = {"Authorization": f"Bearer {token}"}
res = requests.get(url, headers=headers)
if not res.ok:
raise SDKException(f"Failed to get associated source for dataset {dataset_id}")
Expand Down Expand Up @@ -170,12 +184,12 @@ def reindex_entities(entity_ids: list, token: str) -> None:


def bulk_update_entities(
entity_updates: dict,
token: str,
total_tries: int = 3,
throttle: float = 5,
entity_api_url: Optional[str] = None,
after_each_callback: Optional[Callable[[int], None]] = None,
entity_updates: dict,
token: str,
total_tries: int = 3,
throttle: float = 5,
entity_api_url: Optional[str] = None,
after_each_callback: Optional[Callable[[int], None]] = None,
) -> dict:
"""Bulk update the entities in the entity-api.
Expand Down Expand Up @@ -253,13 +267,13 @@ def bulk_update_entities(


def bulk_create_entities(
entity_type: str,
entities: list,
token: str,
total_tries: int = 3,
throttle: float = 5,
entity_api_url: Optional[str] = None,
after_each_callback: Optional[Callable[[int], None]] = None,
entity_type: str,
entities: list,
token: str,
total_tries: int = 3,
throttle: float = 5,
entity_api_url: Optional[str] = None,
after_each_callback: Optional[Callable[[int], None]] = None,
) -> list:
"""Bulk create the entities in the entity-api.
Expand Down Expand Up @@ -359,3 +373,37 @@ def error_msg(json_res: dict) -> str:
return json_res["message"]

return str(json_res)


def obj_to_dict(obj) -> dict:
"""
Convert the obj[ect] into a dict, but deeply.
Note: The Python builtin 'vars()' does not work here because of the way that some of the classes
are defined.
"""
return json.loads(
json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
)


def entity_json_dumps(entity: Entity, token: str, entity_sdk: EntitySdk, to_file: False):
"""
Because entity and the content of the arrays returned from entity_instance.get_associated_*
contain user defined objects we need to turn them into simple python objects (e.g., dicts, lists, str)
before we can convert them wth json.dumps.
Here we create an expanded version of the entity associated with the dataset_uuid and return it as a json string.
"""
dataset_uuid = entity.get_uuid()
entity = obj_to_dict(entity)
entity['organs'] = obj_to_dict(entity_sdk.get_associated_organs_from_dataset(dataset_uuid))
entity['samples'] = obj_to_dict(entity_sdk.get_associated_samples_from_dataset(dataset_uuid))
entity['sources'] = get_associated_sources_from_dataset(dataset_uuid, token=token, as_dict=True)

# Return as a string to be fed into a file
if to_file:
json_object = json.dumps(entity, indent=4)
json_object += '\n'
return json_object
# Return as a dict for JSON response
else:
return entity
9 changes: 1 addition & 8 deletions src/routes/assayclassifier/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
get_data_from_ubkg,
standardize_results
)
from lib.services import get_entity
from lib.services import get_entity, get_token

assayclassifier_blueprint = Blueprint("assayclassifier", __name__)

Expand Down Expand Up @@ -173,10 +173,3 @@ def reload_chain():
logger.error(e, exc_info=True)
return Response("Unexpected error while reloading rule chain: " + str(e), 500)


def get_token() -> Optional[str]:
auth_helper_instance = AuthHelper.instance()
token = auth_helper_instance.getAuthorizationTokens(request.headers)
if not isinstance(token, str):
token = None
return token
50 changes: 6 additions & 44 deletions src/routes/entity_CRUD/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

from lib.ontology import Ontology
from lib.file import get_csv_records, check_upload, files_exist
from lib.services import get_associated_sources_from_dataset
from lib.services import get_associated_sources_from_dataset, obj_to_dict, entity_json_dumps
from jobs.validation.metadata import validate_tsv, determine_schema

entity_CRUD_blueprint = Blueprint('entity_CRUD', __name__)
Expand Down Expand Up @@ -695,6 +695,7 @@ def dataset_data_status():

def update_datasets_datastatus(app_context):
with app_context:
dataset_helper = DatasetHelper(current_app.config)
organ_types_dict = Ontology.ops(as_data_dict=True, key='rui_code', val_key='term').organ_types()
all_datasets_query = (
"MATCH (ds:Dataset)-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(ancestor) "
Expand Down Expand Up @@ -800,7 +801,7 @@ def update_datasets_datastatus(app_context):

dataset['last_touch'] = dataset['last_touch'] if dataset['published_timestamp'] is None else dataset[
'published_timestamp']
dataset['is_primary'] = dataset_is_primary(dataset.get('uuid'))
dataset['is_primary'] = dataset_helper.dataset_is_primary(dataset.get('uuid'))

has_data = files_exist(dataset.get('uuid'), dataset.get('data_access_level'), dataset.get('group_name'))
has_dataset_metadata = files_exist(dataset.get('uuid'), dataset.get('data_access_level'),
Expand Down Expand Up @@ -959,6 +960,7 @@ def update_uploads_datastatus(app_context):
def publish_datastage(identifier):
try:
auth_helper = AuthHelper.instance()
dataset_helper = DatasetHelper(current_app.config)

user_info = auth_helper.getUserInfoUsingRequest(request, getGroups=True)
if user_info is None:
Expand All @@ -981,7 +983,7 @@ def publish_datastage(identifier):
abort_not_found("Cannot find specimen with identifier: " + identifier)

dataset_uuid = json.loads(r.text)['hm_uuid']
is_primary = dataset_is_primary(dataset_uuid)
is_primary = dataset_helper.dataset_is_primary(dataset_uuid)
suspend_indexing_and_acls = string_helper.isYes(request.args.get('suspend-indexing-and-acls'))
no_indexing_and_acls = False
if suspend_indexing_and_acls:
Expand Down Expand Up @@ -1090,7 +1092,7 @@ def publish_datastage(identifier):
is_component = entity_dict.get('creation_action') == 'Multi-Assay Split'
if is_primary or is_component is False:
md_file = os.path.join(ds_path, "metadata.json")
json_object = entity_json_dumps(entity, auth_tokens, entity_instance)
json_object = entity_json_dumps(entity, auth_tokens, entity_instance, True)
logger.info(f"publish_datastage; writing metadata.json file: '{md_file}'; containing: '{json_object}'")
try:
with open(md_file, "w") as outfile:
Expand Down Expand Up @@ -1223,17 +1225,6 @@ def dataset_has_entity_lab_processed_data_type(dataset_uuid):
return False
return True


def dataset_is_primary(dataset_uuid):
with Neo4jHelper.get_instance().session() as neo_session:
q = (
f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(a:Activity) WHERE toLower(a.creation_action) = 'create dataset activity' RETURN ds.uuid")
result = neo_session.run(q).data()
if len(result) == 0:
return False
return True


def get_primary_ancestor_globus_path(entity_dict):
ancestor = None
origin_path = None
Expand Down Expand Up @@ -1500,32 +1491,3 @@ def get_entity_type_instanceof(type_a, type_b, auth_header=None) -> bool:

resp_json: dict = response.json()
return resp_json['instanceof']


def obj_to_dict(obj) -> dict:
"""
Convert the obj[ect] into a dict, but deeply.
Note: The Python builtin 'vars()' does not work here because of the way that some of the classes
are defined.
"""
return json.loads(
json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
)


def entity_json_dumps(entity: Entity, token: str, entity_sdk: EntitySdk) -> str:
"""
Because entity and the content of the arrays returned from entity_instance.get_associated_*
contain user defined objects we need to turn them into simple python objects (e.g., dicts, lists, str)
before we can convert them wth json.dumps.
Here we create an expanded version of the entity associated with the dataset_uuid and return it as a json string.
"""
dataset_uuid = entity.get_uuid()
entity = obj_to_dict(entity)
entity['organs'] = obj_to_dict(entity_sdk.get_associated_organs_from_dataset(dataset_uuid))
entity['samples'] = obj_to_dict(entity_sdk.get_associated_samples_from_dataset(dataset_uuid))
entity['sources'] = get_associated_sources_from_dataset(dataset_uuid, token=token, as_dict=True)

json_object = json.dumps(entity, indent=4)
json_object += '\n'
return json_object
18 changes: 18 additions & 0 deletions src/routes/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from atlas_consortia_commons.string import equals
from flask import Blueprint, jsonify, Response, current_app
from hubmap_commons.hm_auth import AuthHelper
from hubmap_sdk import EntitySdk
from rq.job import Job, JobStatus, NoSuchJobError

from jobs import (
Expand All @@ -40,6 +41,7 @@
from jobs.registration.metadata import register_uploaded_metadata
from jobs.validation.metadata import validate_uploaded_metadata
from lib.file import check_upload, get_base_path, get_csv_records, set_file_details
from lib.services import obj_to_dict, entity_json_dumps, get_token
from lib.ontology import Ontology
from lib.request_validation import get_validated_job_id, get_validated_referrer

Expand Down Expand Up @@ -199,6 +201,22 @@ def get_all_data_provider_groups(token: str, user: User):
return Response("Unexpected error while fetching group list: " + str(e) + " Check the logs", 500)


@metadata_blueprint.route('/metadata/provenance-metadata/<ds_uuid>', methods=['GET'])
def get_provenance_metadata(ds_uuid: str):
try:
token = get_token()
entity_instance = EntitySdk(token=token, service_url=current_app.config['ENTITY_WEBSERVICE_URL'])
entity = entity_instance.get_entity_by_id(ds_uuid)
metadata_json_object = entity_json_dumps(entity, token, entity_instance, False)
return jsonify(metadata_json_object), 200
except Exception as e:
logger.error(e, exc_info=True)
return Response(
f"Unexpected error while retrieving entity {ds_uuid}: " + str(e), 500
)



def check_metadata_upload():
"""Checks the uploaded file.
Expand Down

0 comments on commit 0ab4133

Please sign in to comment.