From 0169662db06f183081a75c466eeafcf0d91e1631 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 22 Mar 2024 09:23:03 +0100 Subject: [PATCH] Created script for bulk access --- src/dspace/impl/client.py | 88 ++++++++++++++++++++++++-- src/project_settings.py | 2 +- tools/change_item_policies.py | 112 ++++++++++++++++++++++++++++++++++ 3 files changed, 196 insertions(+), 6 deletions(-) create mode 100644 tools/change_item_policies.py diff --git a/src/dspace/impl/client.py b/src/dspace/impl/client.py index d743d93..217d130 100644 --- a/src/dspace/impl/client.py +++ b/src/dspace/impl/client.py @@ -231,7 +231,7 @@ def api_post(self, url, params, data, retry=False, content_type='application/jso check_response(r, "api post") return r - def api_put(self, url, params, json_p, retry=False): + def api_put(self, url, params, json_p, retry=False, content_type='application/json'): """ Perform a PUT request. Refresh XSRF token if necessary. PUTs are typically used to update objects. @@ -243,15 +243,16 @@ def api_put(self, url, params, json_p, retry=False): Used if we need to refresh XSRF. @return: Response from API """ - h = {'Content-type': 'application/json'} + h = {'Content-type': content_type} r = self.session.put(url, params=params, json=json_p, headers=h) if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug('Updating token to ' + t) + logging.debug('API Put: Updating token to ' + t) self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) if r.status_code == 403: + self.exception401Counter = 0 # 403 Forbidden # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these @@ -264,8 +265,26 @@ def api_put(self, url, params, json_p, retry=False): logging.error('Already retried... something must be wrong') else: logging.debug("Retrying request with updated CSRF token") - return self.api_put(url, params=params, json_p=json_p, retry=True) - + return self.api_put(url, params=params, json_p=json_p, retry=True, content_type=content_type) + elif r.status_code == 401: + r_json = r.json() + if 'message' in r_json and 'Authentication is required' in r_json[ + 'message']: + if retry: + logging.error( + 'API Post: Already retried... something must be wrong') + self.exception401Counter = 0 + else: + logging.debug("API Post: Retrying request with updated CSRF token") + # try to authenticate + self.authenticate() + # Try to authenticate and repeat the request 3 times - + # if it won't happen log error + self.exception401Counter = self.exception401Counter + 1 + retry_value = False + if self.exception401Counter > 3: + retry_value = True + return self.api_put(url, params=params, json_p=json_p, retry=True, content_type=content_type) return r def api_delete(self, url, params, retry=False): @@ -556,6 +575,24 @@ def delete_dso(self, dso=None, url=None, params=None): logging.error(f'{e}') return None + + def get_bundle_by_name(self, name, item_uuid): + """ + Get a bundle by name for a specific item + @param name: Name of the bundle + @param item_uuid: UUID of the item + @return: Bundle object + """ + url = f'{self.API_ENDPOINT}core/items/{item_uuid}/bundles' + r_json = self.fetch_resource(url, params=None) + if '_embedded' in r_json: + if 'bundles' in r_json['_embedded']: + for bundle in r_json['_embedded']['bundles']: + if bundle['name'] == name: + return Bundle(bundle) + return None + + def get_bundles(self, parent=None, uuid=None): """ Get bundles for an item @@ -839,6 +876,47 @@ def get_items(self): items.append(Item(item_resource)) return items + def get_items_from_collection(self, collection_id, page=0, size=1000): + """ + Get all items + @return: list of Item objects + """ + url = f'{self.API_ENDPOINT}discover/search/objects?sort=dc.date.accessioned,DESC&page={page}&size={size}&scope={collection_id}&dsoType=ITEM&embed=thumbnail' + + items = list() + r = self.api_get(url) + r_json = parse_json(r) + if '_embedded' in r_json: + if 'searchResult' in r_json['_embedded']: + if '_embedded' in r_json['_embedded']['searchResult']: + for item_resource in r_json['_embedded']['searchResult']['_embedded']['objects']: + items.append(Item(item_resource['_embedded']['indexableObject'])) + + return items + + + def get_resource_policy(self, bundle_uuid): + """ + Get a resource policy for a specific bundle + """ + url = f'{self.API_ENDPOINT}authz/resourcepolicies/search/resource?uuid={bundle_uuid}&embed=eperson&embed=group' + r = self.api_get(url) + r_json = parse_json(r) + if '_embedded' in r_json: + if 'resourcepolicies' in r_json['_embedded']: + return r_json['_embedded']['resourcepolicies'][0] + + + def update_resource_policy_group(self, policy_id, group_uuid): + """ + Update a resource policy with a new group + """ + url = f'{self.API_ENDPOINT}authz/resourcepolicies/{policy_id}/group' + body = f'{self.API_ENDPOINT}eperson/groups/{group_uuid}' + r = self.api_put(url, None, body, content_type='text/uri-list') + return r + + def get_item(self, uuid): """ Get an item, given its UUID diff --git a/src/project_settings.py b/src/project_settings.py index dd10a5d..6511562 100644 --- a/src/project_settings.py +++ b/src/project_settings.py @@ -9,7 +9,7 @@ "resume_dir": "__temp/resume/", "backend": { - "endpoint": "http://dev-5.pc:85/server/api/", + "endpoint": "http://localhost:8080/server/api/", "user": "test@test.edu", "password": "admin", "authentication": True, diff --git a/tools/change_item_policies.py b/tools/change_item_policies.py new file mode 100644 index 0000000..f09f35b --- /dev/null +++ b/tools/change_item_policies.py @@ -0,0 +1,112 @@ +### +# This script changes the policy of items in a community to a specific group. Bulk access. +### +import logging +import os +import sys + + +_this_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, os.path.join(_this_dir, "../src")) + + +import dspace # noqa +import settings # noqa +import project_settings # noqa +from dspace.impl.models import Item # noqa +from dspace.impl.models import Community # noqa +from utils import init_logging, update_settings # noqa + +_logger = logging.getLogger() + +# env settings, update with project_settings +env = update_settings(settings.env, project_settings.settings) +init_logging(_logger, env["log_file"]) + +if "DSPACE_REST_API" in os.environ: + env["backend"]["endpoint"] = os.getenv("DSPACE_REST_API") + env_backend_endpoint = env["backend"]["endpoint"] + _logger.info(f"Loaded env.backend.endpoint from env DSPACE_REST_API." + f" Current value: {env_backend_endpoint}") + +def get_all_items_from_collection(coll): + """ + Get all items from collection + @param coll: + @return: + """ + # Pagination and size because BE has a limit of 100 items per page and if the size is set to 1000 it will return + # only 100 items + page = 0 + size = 5 + all_collections = list() + has_more = True + while has_more: + collections_on_page = dspace_be.client.get_items_from_collection(coll.uuid, page=page, size=size) + if not collections_on_page: + has_more = False + break + page += 1 + all_collections.extend(collections_on_page) + return all_collections + + + +if __name__ == '__main__': + dspace_be = dspace.rest( + env["backend"]["endpoint"], + env["backend"]["user"], + env["backend"]["password"], + env["backend"]["authentication"] + ) + + # Group ID of the group to which the policy will be changed e.g. admin group + GROUP_ID = "59ca14ed-0380-4655-bfa2-ca0711d1f1d0" + + # Community UUID of the community whose items of collections will be updated + COM_UPDATE_ITEMS_UUID = 'e640c622-f0de-43e1-8446-bd6007737022' + COL_SUBCOLLS_URL = f'{dspace_be.endpoint}/core/communities/{COM_UPDATE_ITEMS_UUID}/collections' + + COMMUNITY = Community({ + "id": COM_UPDATE_ITEMS_UUID, + "type": "community", + "_links": { + "collections": { + "href": COL_SUBCOLLS_URL + } + }, + }) + + # How many items were updated + counter = 0 + # How many items were without file + without_file = 0 + # Get all collections of the community + subcolls = dspace_be.client.get_collections(community=COMMUNITY) + for coll in subcolls: + # Counter for items in collection + collection_counter = 0 + # Get all items of the collection + items_of_collection = get_all_items_from_collection(coll) + _logger.info(f'*******************Collection: {coll.name}*******************') + _logger.info(f'Items length: {len(items_of_collection)}') + for item in items_of_collection: + collection_counter += 1 + _logger.debug(f'Item: {item.uuid}') + # Get bundle of the item - ORIGINAL + bundle = dspace_be.client.get_bundle_by_name('ORIGINAL', item.uuid) + # If there is no bundle, skip the item - there is no file + if not bundle: + _logger.debug(f'No ORIGINAL bundle for item uuid={item.uuid}') + without_file += 1 + continue + counter += 1 + resource_policy = dspace_be.client.get_resource_policy(bundle.uuid) + _logger.debug( + f'Changing policy uuid={resource_policy["id"]} for item uuid={item.uuid} to group uuid={GROUP_ID}') + r = dspace_be.client.update_resource_policy_group(resource_policy["id"], GROUP_ID) + _logger.debug('Response: ' + str(r)) + _logger.info(f'===================Updated Items: {collection_counter}=====================') + + _logger.info(f'Items Without file: {without_file}') + _logger.info(f'Total updated Items: {counter}')