Skip to content

Commit

Permalink
ufal/py-collection2item-table-is-not-migrated
Browse files Browse the repository at this point in the history
ufal/py-collection2item-table-is-not-migrated
  • Loading branch information
milanmajchrak authored Oct 3, 2023
2 parents 195433e + 3e2c447 commit 639ad21
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
39 changes: 38 additions & 1 deletion data_pump/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ def import_item(metadata_class,
workspaceitem_json_name = "workspaceitem.json"
saved_workspace_json_name = "workspaceitem_dict.json"
workflowitem_json_name = 'workflowitem.json'
saved_workflow_json_name = "workflowitem_dict.json"
saved_workflow_json_name = "workflow_dict.json"
collection2table_name = "collection2item.json"
item_url = 'clarin/import/item'
saved_item_json_name = "item_dict.json"
workflowitem_url = 'clarin/import/workflowitem'
item2collection_url = 'clarin/import/item/{item_uuid}/mappedCollections'
imported_workspaceitem = 0
imported_workflowitem = 0
imported_item = 0
Expand Down Expand Up @@ -139,6 +141,41 @@ def import_item(metadata_class,
logging.error('POST request ' + item_url + ' for id: ' +
str(item['item_id']) + ' failed. Exception: ' + str(e))

# Import collection2item table - only items which are mapped in more collections
# Add another collection into Item only if another collection is not owning_collection
collection2table_json_list = read_json(collection2table_name)
coll_2_item_dict = {}
items_with_more_colls = {}
# Find items which are mapped in more collections and store them into dictionary in this way
# {'item_uuid': [collection_uuid_1, collection_uuid_2]}
for collection2table in collection2table_json_list:
# Every item should have mapped only one collection - the owning collection except the items which
# are mapped into more collections
item_uuid = item_id_dict[collection2table['item_id']]
collection_uuid = collection_id_dict[collection2table['collection_id']]
if item_uuid in coll_2_item_dict:
# Add another collection into dict to get all collections for current Item
coll_2_item_dict[item_uuid].append(collection_id_dict[collection2table['collection_id']])
# Add item UUID and collection UUID into list in this way {`item_uuid`: `collection_uuid`}
items_with_more_colls[item_uuid] = collection_uuid
continue
coll_2_item_dict[item_uuid] = [collection_uuid]

# Call Vanilla REST endpoint which add relation between Item and Collection into the collection2item table
for item_with_more_coll_uuid in items_with_more_colls.keys():
# Prepare request URL - replace `{item_uuid}` with current `item_with_more_coll_uuid`
request_url = item2collection_url.replace('{item_uuid}', item_with_more_coll_uuid)

# Prepare request body which should looks like this:
# `"https://localhost:8080/spring-rest/api/core/collections/{collection_uuid_1}" + \n
# "https://localhost:8080/spring-rest/api/core/collections/{collection_uuid_2}"
request_body = []
collection_url = 'core/collections/'
for collection_uuid in coll_2_item_dict[item_with_more_coll_uuid]:
request_body.append(API_URL + collection_url + collection_uuid)

do_api_post(request_url, {}, request_body)

# save item dict as json
if save_dict:
save_dict_as_json(saved_item_json_name, item_id_dict)
Expand Down
13 changes: 7 additions & 6 deletions support/dspace_interface/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,20 +167,21 @@ def api_get(self, url, params=None, data=None):
self.session.cookies.update({'X-XSRF-Token': t})
return r

def api_post(self, url, params, json_p, retry=False):
def api_post(self, url, params, data, retry=False, content_type='application/json'):
"""
Perform a POST request. Refresh XSRF token if necessary.
POSTs are typically used to create objects.
@param content_type: Type of the content, it is `JSON` by default
@param url: DSpace REST API URL
@param params: Any parameters to include (eg ?parent=abbc-....)
@param json_p: Data in json-ready form (dict) to send as
@param data: Data in json-ready form (dict) to send as
POST body (eg. item.as_dict())
@param retry: Has this method already been retried?
Used if we need to refresh XSRF.
@return: Response from API
"""
h = {'Content-type': 'application/json'}
r = self.session.post(url, json=json_p, params=params, headers=h)
h = {'Content-type': content_type}
r = self.session.post(url, json=data, params=params, headers=h)
if 'DSPACE-XSRF-TOKEN' in r.headers:
t = r.headers['DSPACE-XSRF-TOKEN']
logging.debug('API Post: Updating token to ' + t)
Expand All @@ -201,7 +202,7 @@ def api_post(self, url, params, json_p, retry=False):
'API Post: Already retried... something must be wrong')
else:
logging.info("API Post: Retrying request with updated CSRF token")
return self.api_post(url, params=params, json_p=json_p, retry=True)
return self.api_post(url, params=params, data=data, retry=True)
elif r.status_code == 401:
r_json = r.json()
if 'message' in r_json and 'Authentication is required' in r_json[
Expand All @@ -220,7 +221,7 @@ def api_post(self, url, params, json_p, retry=False):
retry_value = False
if self.exception401Counter > 3:
retry_value = True
return self.api_post(url, params=params, json_p=json_p,
return self.api_post(url, params=params, data=data,
retry=retry_value)

check_response(r, "api post")
Expand Down

0 comments on commit 639ad21

Please sign in to comment.