Skip to content

Commit

Permalink
Added create_bitstreams.py into refactored python api.
Browse files Browse the repository at this point in the history
  • Loading branch information
milanmajchrak committed Jan 10, 2024
2 parents 07a3ae6 + bb8509b commit 78db324
Show file tree
Hide file tree
Showing 106 changed files with 179 additions and 6 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ requests
lxml
psycopg2
pre-commit
tqdm
tqdm
requests-toolbelt
Empty file.
140 changes: 140 additions & 0 deletions src/create_bitstreams/create_bitstreams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import logging
import os

import src.dspace # noqa
import src.settings # noqa
import src.project_settings # noqa

from src.dspace.impl.models import Item
from src.utils import update_settings

env = update_settings(src.settings.env, src.project_settings.settings)

MULTIPART_CONTENT_TYPE = 'multipart/form-data'
HUNDRED_FILES_PATH = 'hundred_of_files'
ZIP_FILES_PATH = 'zip_files'

COMMUNITY_2_CREATE = {
"type": {
"value": "community"
},
"metadata": {
"dc.title": [
{
"language": None,
"value": "Test Item Community"
}
],
}
}

COLLECTION_2_CREATE = {
"type": {
"value": "collection"
},
"metadata": {
"dc.title": [
{
"language": None,
"value": "Test Item Collection"
}
]
},
}

ITEM_2_CREATE = {
"type": {
"value": "item"
},
"metadata": {
"dc.title": [
{
"language": None,
"value": "Test Item"
}
]
},
"inArchive": True,
"discoverable": True,
"withdrawn": False,
}


def load_files_from_folder(folder_path):
"""
Load all files from folder.
@param folder_path: path to the folder
@return: list of files or None if folder does not exist
"""
# Check if the folder path exists
if not os.path.exists(folder_path):
logging.warning(f"The folder '{folder_path}' does not exist.")
return None

f = []
for (dirpath, dirnames, filenames) in os.walk(folder_path):
f.extend(filenames)
break

return f


def create_bistreams_from_folder(dspace_client, item, folder_path):
"""
Create a bitstream for each file from specific folder.
@param dspace_client: dspace client
@param item: item where the bitstreams will be created
@param folder_path: folder path where the files are located
"""
# Create a bundle for item where the files will be uploaded
original_bundle = dspace_client.create_bundle(item)
if not original_bundle:
logging.warning(f'Bundle was not created.')

# Load files from folder
files = load_files_from_folder(folder_path)
if not files:
logging.warning(f'No files were loaded from the folder {folder_path}')
for file_name in files:
logging.info(f'Creating bitstream with file: {file_name}')
dspace_client.create_bitstream(original_bundle, file_name, f'{folder_path}/{file_name}', MULTIPART_CONTENT_TYPE)


def create_item_with_title(dspace_client, parent, title):
"""
Create item with specific title.
@param dspace_client: dspace client
@param parent: collection where the item will be created
@param title: title of the item
@return: created item or None if item was not created
"""
item2create = ITEM_2_CREATE
item2create['metadata']['dc.title'][0]['value'] = title
return dspace_client.create_item(parent.uuid, Item(item2create))


if __name__ == '__main__':
dspace_be = src.dspace.rest(
env["backend"]["endpoint"],
env["backend"]["user"],
env["backend"]["password"],
env["backend"]["authentication"]
)

# Create community
community = dspace_be.create_community(None, COMMUNITY_2_CREATE)
if not community:
logging.warning(f'Community was not created.')

# Create collection
collection = dspace_be.create_collection(community.uuid, COLLECTION_2_CREATE)
if not collection:
logging.warning(f'Collection was not created.')

# Create item with 100 bitstreams
item_hundred_files = create_item_with_title(dspace_be, collection, 'Hundred Files')
create_bistreams_from_folder(dspace_be, item_hundred_files, HUNDRED_FILES_PATH)

# Create item with zip bitstream
item_zip_file = create_item_with_title(dspace_be, collection, 'Zip File')
create_bistreams_from_folder(dspace_be, item_zip_file, ZIP_FILES_PATH)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/create_bitstreams/zip_files/images.zip
Binary file not shown.
20 changes: 20 additions & 0 deletions src/dspace/_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,3 +468,23 @@ def _resp_error(self, r):

def _resp_ok(self, r):
return True

def create_community(self, parent, data):
_logger.debug(f'Creating community: {data}')
return self.client.create_community(parent, data)

def create_collection(self, parent, data):
_logger.debug(f'Creating collection: {data}')
return self.client.create_collection(parent, data)

def create_item(self, parent, data):
_logger.debug(f'Creating item: {data}')
return self.client.create_item(parent, data)

def create_bitstream(self, parent, name, path, content_type):
_logger.debug(f'Creating bitstream: {name}')
return self.client.create_bitstream(parent, name, path, content_type)

def create_bundle(self, parent):
_logger.debug(f'Creating bundle with parent: {parent}')
return self.client.create_bundle(parent)
22 changes: 17 additions & 5 deletions src/dspace/impl/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from uuid import UUID

import requests
from requests import Request
from requests_toolbelt import MultipartEncoder

from .models import DSpaceObject, SimpleDSpaceObject, Bundle, Bitstream, Community, \
Collection, User, Item, Group
Expand Down Expand Up @@ -609,7 +609,7 @@ def create_bundle(self, parent=None, name='ORIGINAL'):
return None
url = f'{self.API_ENDPOINT}core/items/{parent.uuid}/bundles'
return Bundle(api_resource=parse_json(
self.api_post(url, params=None, json_p={'name': name, 'metadata': {}})))
self.api_post(url, params=None, data={'name': name, 'metadata': {}})))

def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20):
"""
Expand Down Expand Up @@ -674,9 +674,21 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None,
payload = {'properties': json.dumps(properties) + ';application/json'}
h = self.session.headers
h.update({'Content-Encoding': 'gzip'})
req = Request('POST', url, data=payload, headers=h, files=files)
prepared_req = self.session.prepare_request(req)
r = self.session.send(prepared_req)

mp_encoder = MultipartEncoder(
fields={
'file': (name, open(path, 'rb')),
}
)
h.update({'Content-Type': mp_encoder.content_type})
r = self.session.post(
url,
# The MultipartEncoder is posted as data, don't use files=...!
data=mp_encoder,
# The MultipartEncoder provides the content-type header with the boundary:
headers=h
)

if 'DSPACE-XSRF-TOKEN' in r.headers:
t = r.headers['DSPACE-XSRF-TOKEN']
logging.debug('Updating token to ' + t)
Expand Down

0 comments on commit 78db324

Please sign in to comment.