From 31235a415725ab2d6ba718db64c857002c9b0df8 Mon Sep 17 00:00:00 2001 From: Paurikova2 Date: Tue, 22 Oct 2024 15:16:10 +0200 Subject: [PATCH] fetch licenses and import tham scripts --- src/dspace/_rest.py | 19 ++++++ src/pump/_license.py | 9 ++- tools/license/README.md | 15 +++++ tools/license/fetch_licenses.py | 108 +++++++++++++++++++++++++++++++ tools/license/import_licenses.py | 51 +++++++++++++++ 5 files changed, 199 insertions(+), 3 deletions(-) create mode 100644 tools/license/README.md create mode 100644 tools/license/fetch_licenses.py create mode 100644 tools/license/import_licenses.py diff --git a/src/dspace/_rest.py b/src/dspace/_rest.py index c275e35..46aa9dd 100644 --- a/src/dspace/_rest.py +++ b/src/dspace/_rest.py @@ -243,6 +243,25 @@ def put_bitstreamregistry(self, data: dict): # ======= + def fetch_licenses(self): + url ='core/clarinlicenses' + _logger.debug(f"Fatch [] using [{url}]") + page = 0 + licenses = [] + while True: + r = self._fetch(url, self.get, "_embedded", + params={"page": page, "size": 100}) + if r is None: + break + key = "clarinlicenses" + licenses_data = r.get(key, []) + if licenses_data: + licenses.extend(licenses_data) + else: + _logger.warning(f"Key [{key}] does not exist in response: {r}") + page += 1 + return licenses + def put_license_label(self, data: dict): url = 'core/clarinlicenselabels' _logger.debug(f"Importing [{data}] using [{url}]") diff --git a/src/pump/_license.py b/src/pump/_license.py index 87849d4..7e4c21b 100644 --- a/src/pump/_license.py +++ b/src/pump/_license.py @@ -1,6 +1,7 @@ import os import logging -from ._utils import read_json, time_method, serialize, deserialize, progress_bar, log_before_import, log_after_import + +from pump._utils import read_json, time_method, serialize, deserialize, progress_bar, log_before_import, log_after_import _logger = logging.getLogger("pump.license") @@ -68,7 +69,7 @@ def imported_labels(self): def imported_licenses(self): return self._imported['licenses'] - def import_to(self, env, dspace, epersons): + def import_to(self, env, dspace, epersons = None): self._import_license_labels(env, dspace) self._import_license_defs(env, dspace, epersons) @@ -143,7 +144,9 @@ def _import_license_defs(self, env, dspace, epersons): if lic_id in self._license2label: data['extendedClarinLicenseLabels'] = self._license2label[lic_id] - params = {'eperson': epersons.uuid(lic['eperson_id'])} + params = {} + if epersons: + params = {'eperson': epersons.uuid(lic['eperson_id'])} try: resp = dspace.put_license(params, data) self._imported["licenses"] += 1 diff --git a/tools/license/README.md b/tools/license/README.md new file mode 100644 index 0000000..7fed48e --- /dev/null +++ b/tools/license/README.md @@ -0,0 +1,15 @@ +# fetch_licenses.py + +This script retrieves all licenses, labels, and mappings from DSpace that meet the defined conditions and returns them in JSON format. + +``` +python ferch_licenses.py --no_definition dev-5.pc:85 --output data +``` + +# import_licenses.py + +This script imports licenses, labels, and mappings. + +``` +python import_licenses.py --input data +``` \ No newline at end of file diff --git a/tools/license/fetch_licenses.py b/tools/license/fetch_licenses.py new file mode 100644 index 0000000..f9255bb --- /dev/null +++ b/tools/license/fetch_licenses.py @@ -0,0 +1,108 @@ +### +# This script retrieves all licenses, labels, and mappings from DSpace that meet the defined conditions and returns them in JSON format. +### + +import argparse +import logging +import os +import json +import sys + +_this_dir = os.path.dirname(os.path.abspath(__file__)) +path_to_dspace_lib = os.path.join(_this_dir, "../../libs/dspace-rest-python") +sys.path.insert(0, os.path.join(_this_dir, "../../src")) + +import dspace # noqa +import settings # noqa +import project_settings # noqa +from dspace_rest_client.models import License # noqa +from utils import init_logging, update_settings # noqa + +_logger = logging.getLogger() + +# env settings, update with project_settings +env = update_settings(settings.env, project_settings.settings) +init_logging(_logger, env["log_file"]) + + +def fetch_licenses(dspace_be): + """Fetch licenses from DSpace backend.""" + all_licenses = dspace_be.fetch_licenses() + _logger.info(f"Number of fetched licenses: {len(all_licenses)}") + return all_licenses + + +def filter_licenses(all_licenses, no_definition): + """Filter licenses based on the no_definition criteria.""" + key = "definition" + no_definition_set = set(no_definition) + return [ + License(license) + for license in all_licenses + if key in license and not any(arg in license[key] for arg in no_definition_set) + ] + +def write_data_to_file(data, output_path): + """Write the filtered data to a JSON file.""" + os.makedirs(os.path.dirname(output_path), exist_ok=True) # Ensure output directory exists + with open(output_path, 'w', encoding='utf-8') as fout: + json.dump(data, fout, indent=2) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Get DSpace licenses that meet condition.") + parser.add_argument("--no_definition", type=str, nargs='+', required=True, + help="String that cannot be part of the license definition") + parser.add_argument('--output', type=str, + default=os.path.join(_this_dir, "data"), + help='Output directory for the JSON file') + args = parser.parse_args() + + # Initialize DSpace backend + dspace_be = dspace.rest( + env["backend"]["endpoint"], + env["backend"]["user"], + env["backend"]["password"], + env["backend"]["authentication"] + ) + + # Fetch and filter licenses + all_licenses = fetch_licenses(dspace_be) + filtered_licenses = filter_licenses(all_licenses, args.no_definition) + # Collect unique license labels and extended license mappings + added_ids = set() + filtered_license_labels = [] + + for license in filtered_licenses: + # Function to add labels if they're unique + def add_unique_label(label): + if label and label.id not in added_ids: + added_ids.add(label.id) + filtered_license_labels.append(label) + + # Add the primary license label + add_unique_label(license.licenseLabel) + + # Add extended license labels + for ext in license.extendedLicenseLabel or []: + add_unique_label(ext) + + # Create extended license mappings + filtered_ext_mapping = [ + {'license_id': license.id, 'label_id': ext.id} + for license in filtered_licenses + for ext in license.extendedLicenseLabel or [] + ] + + _logger.info(f"Filtered licenses: {filtered_licenses}") + _logger.info(f"Filtered license labels: {filtered_license_labels}") + _logger.info(f"Filtered license extended mapping: {filtered_ext_mapping}") + + _logger.info(f"Number of filtered licenses: {len(filtered_licenses)}") + _logger.info(f"Number of filtered license labels: {len(filtered_license_labels)}") + _logger.info(f"Number of filtered license extended mapping: {len(filtered_ext_mapping)}") + + # Write the filtered data to the specified output file + write_data_to_file([license.to_dict() for license in filtered_licenses], os.path.join(args.output, 'licenses.json')) + write_data_to_file([license.to_dict() for license in filtered_license_labels], os.path.join(args.output, 'labels.json')) + write_data_to_file(filtered_ext_mapping, os.path.join(args.output, 'mapping.json')) diff --git a/tools/license/import_licenses.py b/tools/license/import_licenses.py new file mode 100644 index 0000000..1ca976e --- /dev/null +++ b/tools/license/import_licenses.py @@ -0,0 +1,51 @@ +### +# This script import license, labels and mappings. +### +import argparse +import logging +import os +import sys + +_this_dir = os.path.dirname(os.path.abspath(__file__)) +path_to_dspace_lib = os.path.join(_this_dir, "../../libs/dspace-rest-python") +sys.path.insert(0, os.path.join(_this_dir, "../../src")) +sys.path.insert(0, os.path.join(_this_dir, "../../src/pump")) + +import dspace # noqa +import pump # noqa +import settings # noqa +import project_settings # noqa +from dspace_rest_client.models import License # noqa +from utils import init_logging, update_settings # noqa + +from _license import licenses + +_logger = logging.getLogger() + +# env settings, update with project_settings +env = update_settings(settings.env, project_settings.settings) +init_logging(_logger, env["log_file"]) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Import licenses to DSpace.") + parser.add_argument('--input', type=str, + default=os.path.join(_this_dir, "data"), + help='Input directory for the JSON file') + args = parser.parse_args() + + # Initialize DSpace backend + dspace_be = dspace.rest( + env["backend"]["endpoint"], + env["backend"]["user"], + env["backend"]["password"], + env["backend"]["authentication"] + ) + + _logger.info("Loading license import") + licenses_imp = licenses(os.path.join(args.input, 'labels.json'), os.path.join(args.input, 'licenses.json'), os.path.join(args.input, 'mapping.json')) + + # import licenses + _logger.info("Start license import") + licenses_imp.import_to(env, dspace_be) + _logger.info("End license import") \ No newline at end of file