From 0fea0d2ed436c91fce7631db3111576cc93618ed Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Thu, 20 Jun 2024 13:54:11 +0200 Subject: [PATCH 1/4] control internal_id in assetstore for testing --- src/pump/_bitstream.py | 9 +++++++-- src/pump/_utils.py | 7 +++++++ src/repo_import.py | 6 ++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/pump/_bitstream.py b/src/pump/_bitstream.py index acb4c40..1d137be 100644 --- a/src/pump/_bitstream.py +++ b/src/pump/_bitstream.py @@ -1,5 +1,5 @@ import logging -from ._utils import read_json, time_method, serialize, deserialize, progress_bar, log_before_import, log_after_import +from ._utils import read_json, time_method, serialize, deserialize, progress_bar, log_before_import, log_after_import, path_exists _logger = logging.getLogger("pump.bitstream") @@ -49,6 +49,10 @@ def __len__(self): def uuid(self, b_id: int): return self._id2uuid.get(str(b_id), None) + @property + def bitstream_path(self, internal_id: str): + return f'{internal_id[:2]}/{internal_id[2:4]}/{internal_id[4:6]}/{internal_id}' + @property def imported(self): return self._imported['bitstream'] @@ -198,7 +202,8 @@ def _bitstream_import_to(self, env, dspace, metadatas, bitstreamformatregistry, # TODO(jm): fake bitstreams TEST_DEV5 = "http://dev-5.pc" in env["backend"]["endpoint"] - if TEST_DEV5: + path = self.bitstream_path(b['internal_id']) + if TEST_DEV5 and not path_exists(f'{env["assetstore"]}{path}'): data['sizeBytes'] = 1748 data['checkSum'] = { 'checkSumAlgorithm': b['checksum_algorithm'], 'value': '8a4605be74aa9ea9d79846c1fba20a33'} diff --git a/src/pump/_utils.py b/src/pump/_utils.py index 3709d1c..62b8998 100644 --- a/src/pump/_utils.py +++ b/src/pump/_utils.py @@ -3,6 +3,8 @@ import logging from datetime import datetime, timezone from time import time as time_fnc +from pathlib import Path + _logger = logging.getLogger("pump.utils") @@ -126,3 +128,8 @@ def log_before_import(msg: str, expected: int): def log_after_import(msg: str, expected: int, imported: int): prefix = "OK " if expected == imported else "!!! WARN !!! " _logger.info(f"{prefix}Imported [{imported: >4d}] {msg}") + + +def path_exists(path): + path_obj = Path(path) + return path_obj.exists() diff --git a/src/repo_import.py b/src/repo_import.py index 9f7339f..2661002 100644 --- a/src/repo_import.py +++ b/src/repo_import.py @@ -52,6 +52,9 @@ def deserialize(resume: bool, obj, cache_file: str) -> bool: parser.add_argument('--config', help='Update configs', required=False, type=str, action='append') + parser.add_argument('--assetstore', + help='Location of assetstore folder', + required=False, type=str) args = parser.parse_args() s = time.time() @@ -67,6 +70,9 @@ def deserialize(resume: bool, obj, cache_file: str) -> bool: new_val = type(prev_val)(v) set_key(k, new_val, env) + # add assetstore folder location to env + env["assetstore"] = args.assetstore + # just in case # verify_disabled_mailserver() From 8fe6cd96752bc25345cf17dacd64fee9b8965ff4 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Thu, 20 Jun 2024 15:13:27 +0200 Subject: [PATCH 2/4] made bitstream path method static --- src/pump/_bitstream.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pump/_bitstream.py b/src/pump/_bitstream.py index 1d137be..7f8931a 100644 --- a/src/pump/_bitstream.py +++ b/src/pump/_bitstream.py @@ -49,8 +49,8 @@ def __len__(self): def uuid(self, b_id: int): return self._id2uuid.get(str(b_id), None) - @property - def bitstream_path(self, internal_id: str): + @staticmethod + def bitstream_path(internal_id: str): return f'{internal_id[:2]}/{internal_id[2:4]}/{internal_id[4:6]}/{internal_id}' @property @@ -202,7 +202,7 @@ def _bitstream_import_to(self, env, dspace, metadatas, bitstreamformatregistry, # TODO(jm): fake bitstreams TEST_DEV5 = "http://dev-5.pc" in env["backend"]["endpoint"] - path = self.bitstream_path(b['internal_id']) + path = self.bitstream_path(params['internal_id']) if TEST_DEV5 and not path_exists(f'{env["assetstore"]}{path}'): data['sizeBytes'] = 1748 data['checkSum'] = { From 98193d15cc7f49e9be2a7deaf0d74d6154937192 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 21 Jun 2024 10:33:42 +0200 Subject: [PATCH 3/4] added default value for assetstore path --- src/repo_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repo_import.py b/src/repo_import.py index 2661002..34a0d6d 100644 --- a/src/repo_import.py +++ b/src/repo_import.py @@ -54,7 +54,7 @@ def deserialize(resume: bool, obj, cache_file: str) -> bool: required=False, type=str, action='append') parser.add_argument('--assetstore', help='Location of assetstore folder', - required=False, type=str) + required=False, type=str, default="") args = parser.parse_args() s = time.time() From fffe6e5b4e5f4c4041d35147dd02df31a22b6c97 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Fri, 21 Jun 2024 12:34:23 +0200 Subject: [PATCH 4/4] only one assetstore folder path control --- src/pump/_bitstream.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/pump/_bitstream.py b/src/pump/_bitstream.py index 7f8931a..d564177 100644 --- a/src/pump/_bitstream.py +++ b/src/pump/_bitstream.py @@ -147,6 +147,12 @@ def _bitstream_import_to(self, env, dspace, metadatas, bitstreamformatregistry, log_key = "bitstreams" log_before_import(log_key, expected) + # TODO(jm): fake bitstreams + TEST_DEV5 = "http://dev-5.pc" in env["backend"]["endpoint"] + if TEST_DEV5 and env["assetstore"] == "": + _logger.error( + 'Location of assetstore folder is not defined but it should be checked!') + for i, b in enumerate(progress_bar(self._bs)): b_id = b['bitstream_id'] b_deleted = b['deleted'] @@ -201,12 +207,13 @@ def _bitstream_import_to(self, env, dspace, metadatas, bitstreamformatregistry, } # TODO(jm): fake bitstreams - TEST_DEV5 = "http://dev-5.pc" in env["backend"]["endpoint"] path = self.bitstream_path(params['internal_id']) if TEST_DEV5 and not path_exists(f'{env["assetstore"]}{path}'): data['sizeBytes'] = 1748 data['checkSum'] = { - 'checkSumAlgorithm': b['checksum_algorithm'], 'value': '8a4605be74aa9ea9d79846c1fba20a33'} + 'checkSumAlgorithm': b['checksum_algorithm'], + 'value': '8a4605be74aa9ea9d79846c1fba20a33' + } params['internal_id'] = '77893754617268908529226218097860272513' # if bitstream has bundle, set bundle_id from None to id