Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

251 full provenance info to metadata #296

Merged
merged 10 commits into from
Feb 16, 2024
84 changes: 42 additions & 42 deletions src/lib/datacite_doi_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
from flask import Flask
from api.datacite_api import DataCiteApi
from hubmap_sdk import EntitySdk
from hubmap_sdk import Entity, EntitySdk
from hubmap_commons.exceptions import HTTPException
import ast

Expand Down Expand Up @@ -120,7 +120,6 @@ def build_doi_creators(self, dataset: object) -> list:

return creators


"""
Register a draft DOI with DataCite

Expand Down Expand Up @@ -181,21 +180,28 @@ def create_dataset_draft_doi(self, dataset: dict, check_publication_status=True)
raise KeyError('Either the entity_type of the given Dataset is missing or the entity is not a Dataset')

"""
Move the DOI state from draft to findable, meaning publish this dataset
Move the DOI state from draft to findable (publish) in DataCite

Parameters
----------
dataset: dict
The dataset dict to be published
user_token: str
The user's globus nexus token

Returns
-------
dict
The published datset entity dict with updated DOI properties
The updated DOI properties

Raises
------
requests.exceptions.RequestException
If the request to entity-api fails
KeyError
If the entity_type of the given Dataset is missing or the entity is not a Dataset
"""
def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str) -> object:
def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str) -> dict:
if ('entity_type' in dataset) and (dataset['entity_type'] == 'Dataset'):
datacite_api = DataCiteApi(self.datacite_repository_id, self.datacite_repository_password,
self.datacite_hubmap_prefix, self.datacite_api_url, self.entity_api_url)
Expand All @@ -207,19 +213,16 @@ def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str)
logger.debug("======resulting json from DataCite======")
logger.debug(doi_data)

# Then update the dataset DOI properties via entity-api after the DOI gets published
try:
doi_name = datacite_api.build_doi_name(dataset['sennet_id'])
entity_api = EntitySdk(user_token, self.entity_api_url)
updated_dataset = self.update_dataset_after_doi_published(dataset['uuid'], doi_name, entity_api)

return updated_dataset
except requests.exceptions.RequestException as e:
raise requests.exceptions.RequestException(e)
doi_name = datacite_api.build_doi_name(dataset['sennet_id'])
doi_info = {
'registered_doi': doi_name,
'doi_url': f'https://doi.org/{doi_name}'
}
return doi_info
else:
# Log the full stack trace, prepend a line with our message
logger.exception(f"Unable to publish DOI for dataset {dataset['uuid']} via DataCite")
logger.debug(f'======Status code from DataCite {response.status_code} ======')
logger.debug(f'======Status code from DataCite {response.status_code}======')
logger.debug("======response text from DataCite======")
logger.debug(response.text)

Expand All @@ -229,8 +232,8 @@ def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str)
raise KeyError('Either the entity_type of the given Dataset is missing or the entity is not a Dataset')

"""
Update the dataset's properties after DOI is published (Draft -> Findable)
Update the dataset's properties in Entity-API after DOI is published (Draft -> Findable)

Parameters
----------
dataset_uuid: str
Expand All @@ -239,35 +242,32 @@ def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str)
The registered doi: prefix/suffix
entity_api
The EntitySdk object instance

Returns
-------
dict
The entity dict with updated DOI properties
"""
def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, entity_api: EntitySdk) -> object:
hubmap_sdk.Entity
The updated dataset entity

Raises
------
requests.exceptions.RequestException
If the request to entity-api fails
"""
def update_dataset_after_doi_published(self, dataset_uuid: dict, doi_info: str, entity_api: EntitySdk) -> Entity:
# Update the registered_doi, and doi_url properties after DOI made findable
# Changing Dataset.status to "Published" and setting the published_* properties
# are handled by another script
# See https://github.com/hubmapconsortium/ingest-ui/issues/354
dataset_properties_to_update = {
'registered_doi': doi_name,
'doi_url': f'https://doi.org/{doi_name}'
}

try:
entity = entity_api.update_entity(dataset_uuid, dataset_properties_to_update)
# Entity update via PUT call only returns a json message, no entity details
result = entity_api.update_entity(dataset_uuid, doi_info)
logger.info("======The dataset {dataset['uuid']} has been updated with DOI info======")
updated_entity = vars(entity)
logger.debug("======updated_entity======")
logger.debug(updated_entity)
return updated_entity

logger.info(doi_info)
return result
except HTTPException as e:
# Log the full stack trace, prepend a line with our message
logger.exception(f"Unable to update the DOI properties of dataset {dataset_uuid}")
logger.debug(f'======Status code from DataCite {e.status_code} ======')
logger.debug(f'======Status code from DataCite {e.status_code}======')
logger.debug("======response text from entity-api======")
logger.debug(e.description)

Expand All @@ -283,7 +283,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e

try:
user_token = sys.argv[1]
except IndexError as e:
except IndexError:
msg = "Missing user token argument"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
Expand All @@ -295,7 +295,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e

count = 1
for dataset_uuid in datasets:
logger.debug(f"Begin {count}: ========================= {dataset_uuid} =========================")
logger.debug(f"Begin {count}: ========================={dataset_uuid}=========================")
try:
entity = entity_api.get_entity_by_id(dataset_uuid)
dataset = vars(entity)
Expand All @@ -306,7 +306,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e
logger.debug("Create Draft DOI")

# DISABLED
#data_cite_doi_helper.create_dataset_draft_doi(dataset)
# data_cite_doi_helper.create_dataset_draft_doi(dataset)
except Exception as e:
logger.exception(e)
sys.exit(e)
Expand All @@ -316,7 +316,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e

# DISABLED
# To publish an existing draft DOI (change the state from draft to findable)
#data_cite_doi_helper.move_doi_state_from_draft_to_findable(dataset, user_token)
# data_cite_doi_helper.move_doi_state_from_draft_to_findable(dataset, user_token)
except Exception as e:
logger.exception(e)
sys.exit(e)
Expand All @@ -330,8 +330,8 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e
logger.debug("======response text from entity-api======")
logger.debug(e.description)

logger.debug(f"End {count}: ========================= {dataset_uuid} =========================")
logger.debug(f"End {count}: ========================={dataset_uuid}=========================")

time.sleep(1)

count = count + 1
count = count + 1
44 changes: 43 additions & 1 deletion src/lib/services.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional, Union
from typing import List, Optional, Union

import requests
from flask import current_app
from hubmap_sdk import Entity, EntitySdk, SearchSdk
from hubmap_sdk.sdk_helper import HTTPException as SDKException
Expand Down Expand Up @@ -90,3 +91,44 @@ def get_entity_from_search_api(

except SDKException:
raise


def get_associated_sources_from_dataset(
dataset_id: str, token: str, as_dict: bool = False
) -> Union[List[Entity], dict]:
"""Get the associated sources for the given dataset.

Parameters
----------
dataset_id : str
The uuid of the dataset.
token : str
The groups token for the request.
as_dict : bool, optional
Should entity be returned as a dictionary, by default False

Returns
-------
Union[List[Entity], dict]
The associated sources for the given dataset.

Raises
------
hubmap_sdk.sdk_helper.HTTPException
If the entiti-api request fails or entity not found
"""
entity_api_url = current_app.config["ENTITY_WEBSERVICE_URL"]
url = f"{entity_api_url}/datasets/{dataset_id}/sources"
headers = {"Authorization": f"Bearer {token}"}
res = requests.get(url, headers=headers)
if not res.ok:
raise SDKException(f"Failed to get associated source for dataset {dataset_id}")
body = res.json()

if as_dict:
return body

if isinstance(body, list):
return [Entity(entity) for entity in res.json()]

return [Entity(body)]
2 changes: 1 addition & 1 deletion src/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Flask==2.1.3
globus_sdk==2.0.1
hubmap-sdk==1.0.4
hubmap-sdk==1.0.7
Werkzeug==2.3.7

# For interacting with memcached
Expand Down
Loading
Loading