Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

251 full provenance info to metadata #296

Merged
merged 10 commits into from
Feb 16, 2024
Prev Previous commit
Next Next commit
Updating doi publishing function to match hubmap
  • Loading branch information
tjmadonna committed Feb 14, 2024
commit dcc86f39b63093cca2e7fcb75c685f192fcca239
84 changes: 42 additions & 42 deletions src/lib/datacite_doi_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
from flask import Flask
from api.datacite_api import DataCiteApi
from hubmap_sdk import EntitySdk
from hubmap_sdk import Entity, EntitySdk
from hubmap_commons.exceptions import HTTPException
import ast

Expand Down Expand Up @@ -120,7 +120,6 @@ def build_doi_creators(self, dataset: object) -> list:

return creators


"""
Register a draft DOI with DataCite

Expand Down Expand Up @@ -181,21 +180,28 @@ def create_dataset_draft_doi(self, dataset: dict, check_publication_status=True)
raise KeyError('Either the entity_type of the given Dataset is missing or the entity is not a Dataset')

"""
Move the DOI state from draft to findable, meaning publish this dataset
Move the DOI state from draft to findable (publish) in DataCite

Parameters
----------
dataset: dict
The dataset dict to be published
user_token: str
The user's globus nexus token

Returns
-------
dict
The published datset entity dict with updated DOI properties
The updated DOI properties

Raises
------
requests.exceptions.RequestException
If the request to entity-api fails
KeyError
If the entity_type of the given Dataset is missing or the entity is not a Dataset
"""
def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str) -> object:
def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str) -> dict:
if ('entity_type' in dataset) and (dataset['entity_type'] == 'Dataset'):
datacite_api = DataCiteApi(self.datacite_repository_id, self.datacite_repository_password,
self.datacite_hubmap_prefix, self.datacite_api_url, self.entity_api_url)
Expand All @@ -207,19 +213,16 @@ def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str)
logger.debug("======resulting json from DataCite======")
logger.debug(doi_data)

# Then update the dataset DOI properties via entity-api after the DOI gets published
try:
doi_name = datacite_api.build_doi_name(dataset['sennet_id'])
entity_api = EntitySdk(user_token, self.entity_api_url)
updated_dataset = self.update_dataset_after_doi_published(dataset['uuid'], doi_name, entity_api)

return updated_dataset
except requests.exceptions.RequestException as e:
raise requests.exceptions.RequestException(e)
doi_name = datacite_api.build_doi_name(dataset['hubmap_id'])
doi_info = {
'registered_doi': doi_name,
'doi_url': f'https://doi.org/{doi_name}'
}
return doi_info
else:
# Log the full stack trace, prepend a line with our message
logger.exception(f"Unable to publish DOI for dataset {dataset['uuid']} via DataCite")
logger.debug(f'======Status code from DataCite {response.status_code} ======')
logger.debug(f'======Status code from DataCite {response.status_code}======')
logger.debug("======response text from DataCite======")
logger.debug(response.text)

Expand All @@ -229,8 +232,8 @@ def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str)
raise KeyError('Either the entity_type of the given Dataset is missing or the entity is not a Dataset')

"""
Update the dataset's properties after DOI is published (Draft -> Findable)
Update the dataset's properties in Entity-API after DOI is published (Draft -> Findable)

Parameters
----------
dataset_uuid: str
Expand All @@ -239,35 +242,32 @@ def move_doi_state_from_draft_to_findable(self, dataset: dict, user_token: str)
The registered doi: prefix/suffix
entity_api
The EntitySdk object instance

Returns
-------
dict
The entity dict with updated DOI properties
"""
def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, entity_api: EntitySdk) -> object:
hubmap_sdk.Entity
The updated dataset entity

Raises
------
requests.exceptions.RequestException
If the request to entity-api fails
"""
def update_dataset_after_doi_published(self, dataset_uuid: dict, doi_info: str, entity_api: EntitySdk) -> Entity:
# Update the registered_doi, and doi_url properties after DOI made findable
# Changing Dataset.status to "Published" and setting the published_* properties
# are handled by another script
# See https://github.com/hubmapconsortium/ingest-ui/issues/354
dataset_properties_to_update = {
'registered_doi': doi_name,
'doi_url': f'https://doi.org/{doi_name}'
}

try:
entity = entity_api.update_entity(dataset_uuid, dataset_properties_to_update)
# Entity update via PUT call only returns a json message, no entity details
result = entity_api.update_entity(dataset_uuid, doi_info)
logger.info("======The dataset {dataset['uuid']} has been updated with DOI info======")
updated_entity = vars(entity)
logger.debug("======updated_entity======")
logger.debug(updated_entity)
return updated_entity

logger.info(doi_info)
return result
except HTTPException as e:
# Log the full stack trace, prepend a line with our message
logger.exception(f"Unable to update the DOI properties of dataset {dataset_uuid}")
logger.debug(f'======Status code from DataCite {e.status_code} ======')
logger.debug(f'======Status code from DataCite {e.status_code}======')
logger.debug("======response text from entity-api======")
logger.debug(e.description)

Expand All @@ -283,7 +283,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e

try:
user_token = sys.argv[1]
except IndexError as e:
except IndexError:
msg = "Missing user token argument"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
Expand All @@ -295,7 +295,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e

count = 1
for dataset_uuid in datasets:
logger.debug(f"Begin {count}: ========================= {dataset_uuid} =========================")
logger.debug(f"Begin {count}: ========================={dataset_uuid}=========================")
try:
entity = entity_api.get_entity_by_id(dataset_uuid)
dataset = vars(entity)
Expand All @@ -306,7 +306,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e
logger.debug("Create Draft DOI")

# DISABLED
#data_cite_doi_helper.create_dataset_draft_doi(dataset)
# data_cite_doi_helper.create_dataset_draft_doi(dataset)
except Exception as e:
logger.exception(e)
sys.exit(e)
Expand All @@ -316,7 +316,7 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e

# DISABLED
# To publish an existing draft DOI (change the state from draft to findable)
#data_cite_doi_helper.move_doi_state_from_draft_to_findable(dataset, user_token)
# data_cite_doi_helper.move_doi_state_from_draft_to_findable(dataset, user_token)
except Exception as e:
logger.exception(e)
sys.exit(e)
Expand All @@ -330,8 +330,8 @@ def update_dataset_after_doi_published(self, dataset_uuid: str, doi_name: str, e
logger.debug("======response text from entity-api======")
logger.debug(e.description)

logger.debug(f"End {count}: ========================= {dataset_uuid} =========================")
logger.debug(f"End {count}: ========================={dataset_uuid}=========================")

time.sleep(1)

count = count + 1
count = count + 1
18 changes: 8 additions & 10 deletions src/routes/entity_CRUD/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,27 +879,25 @@ def publish_datastage(identifier):
try:
datacite_doi_helper.create_dataset_draft_doi(entity_dict, check_publication_status=False)
except Exception as e:
return jsonify({"error": f"Error occurred while trying to create a draft doi for{dataset_uuid}. {e}"}), 500
logger.exception(f"Exception while creating a draft doi for {dataset_uuid}: {e}")
return jsonify({"error": f"Error occurred while trying to create a draft doi for {dataset_uuid}. {e}"}), 500

# This will make the draft DOI created above 'findable'....
try:
doi_info = datacite_doi_helper.move_doi_state_from_draft_to_findable(entity_dict, auth_tokens)
except Exception as e:
return jsonify({"error": f"Error occurred while trying to change doi draft state to findable doi for{dataset_uuid}. {e}"}), 500
logger.exception(f"Exception while creating making doi findable and saving to entity for {dataset_uuid}: {e}")
return jsonify({"error": f"Error occurred while trying to change doi draft state to findable doi for {dataset_uuid}. {e}"}), 500

doi_update_clause = ""
if doi_info is not None:
doi_update_clause = f", e.registered_doi = '{doi_info['registered_doi']}', e.doi_url = '{doi_info['doi_url']}'"

# set up a status_history list to add a "Published" entry to below
if 'status_history' in rval[0]:
status_history_str = rval[0]['status_history']
if status_history_str is None:
status_history_list = []
else:
status_history_list = string_helper.convert_str_literal(status_history_str)
else:
status_history_list = []
status_history_list = []
status_history_str = rval[0].get('status_history')
if status_history_str is not None:
status_history_list = string_helper.convert_str_literal(status_history_str)

# add Published status change to status history
status_update = {
Expand Down
Loading