Skip to content

Commit

Permalink
Created script for bulk access
Browse files Browse the repository at this point in the history
  • Loading branch information
milanmajchrak committed Mar 22, 2024
1 parent d2560ed commit 0169662
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 6 deletions.
88 changes: 83 additions & 5 deletions src/dspace/impl/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def api_post(self, url, params, data, retry=False, content_type='application/jso
check_response(r, "api post")
return r

def api_put(self, url, params, json_p, retry=False):
def api_put(self, url, params, json_p, retry=False, content_type='application/json'):
"""
Perform a PUT request. Refresh XSRF token if necessary.
PUTs are typically used to update objects.
Expand All @@ -243,15 +243,16 @@ def api_put(self, url, params, json_p, retry=False):
Used if we need to refresh XSRF.
@return: Response from API
"""
h = {'Content-type': 'application/json'}
h = {'Content-type': content_type}
r = self.session.put(url, params=params, json=json_p, headers=h)
if 'DSPACE-XSRF-TOKEN' in r.headers:
t = r.headers['DSPACE-XSRF-TOKEN']
logging.debug('Updating token to ' + t)
logging.debug('API Put: Updating token to ' + t)
self.session.headers.update({'X-XSRF-Token': t})
self.session.cookies.update({'X-XSRF-Token': t})

if r.status_code == 403:
self.exception401Counter = 0
# 403 Forbidden
# If we had a CSRF failure, retry the request with the updated token
# After speaking in #dev it seems that these
Expand All @@ -264,8 +265,26 @@ def api_put(self, url, params, json_p, retry=False):
logging.error('Already retried... something must be wrong')
else:
logging.debug("Retrying request with updated CSRF token")
return self.api_put(url, params=params, json_p=json_p, retry=True)

return self.api_put(url, params=params, json_p=json_p, retry=True, content_type=content_type)
elif r.status_code == 401:
r_json = r.json()
if 'message' in r_json and 'Authentication is required' in r_json[
'message']:
if retry:
logging.error(
'API Post: Already retried... something must be wrong')
self.exception401Counter = 0
else:
logging.debug("API Post: Retrying request with updated CSRF token")
# try to authenticate
self.authenticate()
# Try to authenticate and repeat the request 3 times -
# if it won't happen log error
self.exception401Counter = self.exception401Counter + 1
retry_value = False
if self.exception401Counter > 3:
retry_value = True
return self.api_put(url, params=params, json_p=json_p, retry=True, content_type=content_type)
return r

def api_delete(self, url, params, retry=False):
Expand Down Expand Up @@ -556,6 +575,24 @@ def delete_dso(self, dso=None, url=None, params=None):
logging.error(f'{e}')
return None


def get_bundle_by_name(self, name, item_uuid):
"""
Get a bundle by name for a specific item
@param name: Name of the bundle
@param item_uuid: UUID of the item
@return: Bundle object
"""
url = f'{self.API_ENDPOINT}core/items/{item_uuid}/bundles'
r_json = self.fetch_resource(url, params=None)
if '_embedded' in r_json:
if 'bundles' in r_json['_embedded']:
for bundle in r_json['_embedded']['bundles']:
if bundle['name'] == name:
return Bundle(bundle)
return None


def get_bundles(self, parent=None, uuid=None):
"""
Get bundles for an item
Expand Down Expand Up @@ -839,6 +876,47 @@ def get_items(self):
items.append(Item(item_resource))
return items

def get_items_from_collection(self, collection_id, page=0, size=1000):
"""
Get all items
@return: list of Item objects
"""
url = f'{self.API_ENDPOINT}discover/search/objects?sort=dc.date.accessioned,DESC&page={page}&size={size}&scope={collection_id}&dsoType=ITEM&embed=thumbnail'

items = list()
r = self.api_get(url)
r_json = parse_json(r)
if '_embedded' in r_json:
if 'searchResult' in r_json['_embedded']:
if '_embedded' in r_json['_embedded']['searchResult']:
for item_resource in r_json['_embedded']['searchResult']['_embedded']['objects']:
items.append(Item(item_resource['_embedded']['indexableObject']))

return items


def get_resource_policy(self, bundle_uuid):
"""
Get a resource policy for a specific bundle
"""
url = f'{self.API_ENDPOINT}authz/resourcepolicies/search/resource?uuid={bundle_uuid}&embed=eperson&embed=group'
r = self.api_get(url)
r_json = parse_json(r)
if '_embedded' in r_json:
if 'resourcepolicies' in r_json['_embedded']:
return r_json['_embedded']['resourcepolicies'][0]


def update_resource_policy_group(self, policy_id, group_uuid):
"""
Update a resource policy with a new group
"""
url = f'{self.API_ENDPOINT}authz/resourcepolicies/{policy_id}/group'
body = f'{self.API_ENDPOINT}eperson/groups/{group_uuid}'
r = self.api_put(url, None, body, content_type='text/uri-list')
return r


def get_item(self, uuid):
"""
Get an item, given its UUID
Expand Down
2 changes: 1 addition & 1 deletion src/project_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"resume_dir": "__temp/resume/",

"backend": {
"endpoint": "http://dev-5.pc:85/server/api/",
"endpoint": "http://localhost:8080/server/api/",
"user": "[email protected]",
"password": "admin",
"authentication": True,
Expand Down
112 changes: 112 additions & 0 deletions tools/change_item_policies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
###
# This script changes the policy of items in a community to a specific group. Bulk access.
###
import logging
import os
import sys


_this_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.join(_this_dir, "../src"))


import dspace # noqa
import settings # noqa
import project_settings # noqa
from dspace.impl.models import Item # noqa
from dspace.impl.models import Community # noqa
from utils import init_logging, update_settings # noqa

_logger = logging.getLogger()

# env settings, update with project_settings
env = update_settings(settings.env, project_settings.settings)
init_logging(_logger, env["log_file"])

if "DSPACE_REST_API" in os.environ:
env["backend"]["endpoint"] = os.getenv("DSPACE_REST_API")
env_backend_endpoint = env["backend"]["endpoint"]
_logger.info(f"Loaded env.backend.endpoint from env DSPACE_REST_API."
f" Current value: {env_backend_endpoint}")

def get_all_items_from_collection(coll):
"""
Get all items from collection
@param coll:
@return:
"""
# Pagination and size because BE has a limit of 100 items per page and if the size is set to 1000 it will return
# only 100 items
page = 0
size = 5
all_collections = list()
has_more = True
while has_more:
collections_on_page = dspace_be.client.get_items_from_collection(coll.uuid, page=page, size=size)
if not collections_on_page:
has_more = False
break
page += 1
all_collections.extend(collections_on_page)
return all_collections



if __name__ == '__main__':
dspace_be = dspace.rest(
env["backend"]["endpoint"],
env["backend"]["user"],
env["backend"]["password"],
env["backend"]["authentication"]
)

# Group ID of the group to which the policy will be changed e.g. admin group
GROUP_ID = "59ca14ed-0380-4655-bfa2-ca0711d1f1d0"

# Community UUID of the community whose items of collections will be updated
COM_UPDATE_ITEMS_UUID = 'e640c622-f0de-43e1-8446-bd6007737022'
COL_SUBCOLLS_URL = f'{dspace_be.endpoint}/core/communities/{COM_UPDATE_ITEMS_UUID}/collections'

COMMUNITY = Community({
"id": COM_UPDATE_ITEMS_UUID,
"type": "community",
"_links": {
"collections": {
"href": COL_SUBCOLLS_URL
}
},
})

# How many items were updated
counter = 0
# How many items were without file
without_file = 0
# Get all collections of the community
subcolls = dspace_be.client.get_collections(community=COMMUNITY)
for coll in subcolls:
# Counter for items in collection
collection_counter = 0
# Get all items of the collection
items_of_collection = get_all_items_from_collection(coll)
_logger.info(f'*******************Collection: {coll.name}*******************')
_logger.info(f'Items length: {len(items_of_collection)}')
for item in items_of_collection:
collection_counter += 1
_logger.debug(f'Item: {item.uuid}')
# Get bundle of the item - ORIGINAL
bundle = dspace_be.client.get_bundle_by_name('ORIGINAL', item.uuid)
# If there is no bundle, skip the item - there is no file
if not bundle:
_logger.debug(f'No ORIGINAL bundle for item uuid={item.uuid}')
without_file += 1
continue
counter += 1
resource_policy = dspace_be.client.get_resource_policy(bundle.uuid)
_logger.debug(
f'Changing policy uuid={resource_policy["id"]} for item uuid={item.uuid} to group uuid={GROUP_ID}')
r = dspace_be.client.update_resource_policy_group(resource_policy["id"], GROUP_ID)
_logger.debug('Response: ' + str(r))
_logger.info(f'===================Updated Items: {collection_counter}=====================')

_logger.info(f'Items Without file: {without_file}')
_logger.info(f'Total updated Items: {counter}')

0 comments on commit 0169662

Please sign in to comment.