Skip to content

Commit

Permalink
Script for Adding Metadata (#168)
Browse files Browse the repository at this point in the history
* added dc.date to from_mtd_field
* reauthentification
* remove retry=False authentification
* show doubles
* removed metadata

---------

Co-authored-by: jm <jm@maz>
  • Loading branch information
Paurikova2 and jm authored Oct 29, 2024
1 parent 3113ec6 commit be747d1
Show file tree
Hide file tree
Showing 5 changed files with 392 additions and 6 deletions.
2 changes: 1 addition & 1 deletion libs/dspace-rest-python
48 changes: 48 additions & 0 deletions src/dspace/_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,54 @@ def fetch_item(self, uuid: str):
_logger.debug(f"Importing [] using [{url}]")
return self._fetch(url, self.get, None)

def fetch_items(self, page_size: int = 100, limit=None):
url = 'core/items'
_logger.debug(f"Fetch [] using [{url}]")
page = 0
items = []
while True:
r = self._fetch(url, self.get, "_embedded",
params={"page": page, "size": page_size})
if r is None:
break
key = "items"
items_data = r.get(key, [])
if items_data:
items.extend(items_data)
else:
_logger.warning(f"Key [{key}] does not exist in response: {r}")
page += 1

if limit is not None and len(items) > limit:
return items[:limit]
return items

def iter_items(self, page_size: int = 100, limit: int = -1):
from tqdm import tqdm

url = 'core/items'
_logger.debug(f"Fetch iter [] using [{url}]")
page = 0
len_items = 0
with tqdm(desc="Fetching items", unit=" items") as pbar:
while True:
r = self._fetch(url, self.get, "_embedded",
params={"page": page, "size": page_size})
if r is None:
break
key = "items"
items_data = r.get(key, [])
if items_data:
len_items += len(items_data)
yield items_data
else:
_logger.warning(f"Key [{key}] does not exist in response: {r}")
page += 1
pbar.update(len(items_data))

if len_items > limit > 0:
return

def put_ws_item(self, param: dict, data: dict):
url = 'clarin/import/workspaceitem'
_logger.debug(f"Importing [{data}] using [{url}]")
Expand Down
16 changes: 11 additions & 5 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@ def init_logging(
os.makedirs(base_log_dir, exist_ok=True)

formatter = logging.Formatter(format)
file_handler = logging.FileHandler(log_file)
file_handler = logging.FileHandler(log_file, encoding="utf-8")
file_handler.setFormatter(formatter)
file_handler.setLevel(file_level)
logger.addHandler(file_handler)

console_handler = logging.StreamHandler()
console_handler.setLevel(console_level)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
found_stream = None
for h in logger.handlers:
if isinstance(h, logging.StreamHandler):
found_stream = h
break
if found_stream is None:
console_handler = logging.StreamHandler()
console_handler.setLevel(console_level)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

logger.setLevel(logging.INFO)

Expand Down
10 changes: 10 additions & 0 deletions tools/add_metadata/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# add_metadata.py

This script adds new metadata to items that are missing it. The values for this new metadata are taken from the existing input metadata field.
```
python add_metadata.py --to_mtd_field dc.date.issued --from_mtd_field dc.date.submitted dc.date.committed dc.date.defense dc.date
```
Dry run:
```
python add_metadata.py --dry-run --endpoint="http://dev-5.pc:86/server/api/" --to_mtd_field dc.date.issued --from_mtd_field dc.date.submitted dc.date.committed dc.date.defense dc.date
```
Loading

0 comments on commit be747d1

Please sign in to comment.