Skip to content

Commit

Permalink
fetch licenses and import tham scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Paurikova2 committed Oct 22, 2024
1 parent 3113ec6 commit 31235a4
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 3 deletions.
19 changes: 19 additions & 0 deletions src/dspace/_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,25 @@ def put_bitstreamregistry(self, data: dict):

# =======

def fetch_licenses(self):
url ='core/clarinlicenses'
_logger.debug(f"Fatch [] using [{url}]")
page = 0
licenses = []
while True:
r = self._fetch(url, self.get, "_embedded",
params={"page": page, "size": 100})
if r is None:
break
key = "clarinlicenses"
licenses_data = r.get(key, [])
if licenses_data:
licenses.extend(licenses_data)
else:
_logger.warning(f"Key [{key}] does not exist in response: {r}")
page += 1
return licenses

def put_license_label(self, data: dict):
url = 'core/clarinlicenselabels'
_logger.debug(f"Importing [{data}] using [{url}]")
Expand Down
9 changes: 6 additions & 3 deletions src/pump/_license.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import logging
from ._utils import read_json, time_method, serialize, deserialize, progress_bar, log_before_import, log_after_import

from pump._utils import read_json, time_method, serialize, deserialize, progress_bar, log_before_import, log_after_import

_logger = logging.getLogger("pump.license")

Expand Down Expand Up @@ -68,7 +69,7 @@ def imported_labels(self):
def imported_licenses(self):
return self._imported['licenses']

def import_to(self, env, dspace, epersons):
def import_to(self, env, dspace, epersons = None):
self._import_license_labels(env, dspace)
self._import_license_defs(env, dspace, epersons)

Expand Down Expand Up @@ -143,7 +144,9 @@ def _import_license_defs(self, env, dspace, epersons):
if lic_id in self._license2label:
data['extendedClarinLicenseLabels'] = self._license2label[lic_id]

params = {'eperson': epersons.uuid(lic['eperson_id'])}
params = {}
if epersons:
params = {'eperson': epersons.uuid(lic['eperson_id'])}
try:
resp = dspace.put_license(params, data)
self._imported["licenses"] += 1
Expand Down
15 changes: 15 additions & 0 deletions tools/license/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# fetch_licenses.py

This script retrieves all licenses, labels, and mappings from DSpace that meet the defined conditions and returns them in JSON format.

```
python ferch_licenses.py --no_definition dev-5.pc:85 --output data
```

# import_licenses.py

This script imports licenses, labels, and mappings.

```
python import_licenses.py --input data
```
108 changes: 108 additions & 0 deletions tools/license/fetch_licenses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
###
# This script retrieves all licenses, labels, and mappings from DSpace that meet the defined conditions and returns them in JSON format.
###

import argparse
import logging
import os
import json
import sys

_this_dir = os.path.dirname(os.path.abspath(__file__))
path_to_dspace_lib = os.path.join(_this_dir, "../../libs/dspace-rest-python")
sys.path.insert(0, os.path.join(_this_dir, "../../src"))

import dspace # noqa
import settings # noqa
import project_settings # noqa
from dspace_rest_client.models import License # noqa
from utils import init_logging, update_settings # noqa

_logger = logging.getLogger()

# env settings, update with project_settings
env = update_settings(settings.env, project_settings.settings)
init_logging(_logger, env["log_file"])


def fetch_licenses(dspace_be):
"""Fetch licenses from DSpace backend."""
all_licenses = dspace_be.fetch_licenses()
_logger.info(f"Number of fetched licenses: {len(all_licenses)}")
return all_licenses


def filter_licenses(all_licenses, no_definition):
"""Filter licenses based on the no_definition criteria."""
key = "definition"
no_definition_set = set(no_definition)
return [
License(license)
for license in all_licenses
if key in license and not any(arg in license[key] for arg in no_definition_set)
]

def write_data_to_file(data, output_path):
"""Write the filtered data to a JSON file."""
os.makedirs(os.path.dirname(output_path), exist_ok=True) # Ensure output directory exists
with open(output_path, 'w', encoding='utf-8') as fout:
json.dump(data, fout, indent=2)


if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Get DSpace licenses that meet condition.")
parser.add_argument("--no_definition", type=str, nargs='+', required=True,
help="String that cannot be part of the license definition")
parser.add_argument('--output', type=str,
default=os.path.join(_this_dir, "data"),
help='Output directory for the JSON file')
args = parser.parse_args()

# Initialize DSpace backend
dspace_be = dspace.rest(
env["backend"]["endpoint"],
env["backend"]["user"],
env["backend"]["password"],
env["backend"]["authentication"]
)

# Fetch and filter licenses
all_licenses = fetch_licenses(dspace_be)
filtered_licenses = filter_licenses(all_licenses, args.no_definition)
# Collect unique license labels and extended license mappings
added_ids = set()
filtered_license_labels = []

for license in filtered_licenses:
# Function to add labels if they're unique
def add_unique_label(label):
if label and label.id not in added_ids:
added_ids.add(label.id)
filtered_license_labels.append(label)

# Add the primary license label
add_unique_label(license.licenseLabel)

# Add extended license labels
for ext in license.extendedLicenseLabel or []:
add_unique_label(ext)

# Create extended license mappings
filtered_ext_mapping = [
{'license_id': license.id, 'label_id': ext.id}
for license in filtered_licenses
for ext in license.extendedLicenseLabel or []
]

_logger.info(f"Filtered licenses: {filtered_licenses}")
_logger.info(f"Filtered license labels: {filtered_license_labels}")
_logger.info(f"Filtered license extended mapping: {filtered_ext_mapping}")

_logger.info(f"Number of filtered licenses: {len(filtered_licenses)}")
_logger.info(f"Number of filtered license labels: {len(filtered_license_labels)}")
_logger.info(f"Number of filtered license extended mapping: {len(filtered_ext_mapping)}")

# Write the filtered data to the specified output file
write_data_to_file([license.to_dict() for license in filtered_licenses], os.path.join(args.output, 'licenses.json'))
write_data_to_file([license.to_dict() for license in filtered_license_labels], os.path.join(args.output, 'labels.json'))
write_data_to_file(filtered_ext_mapping, os.path.join(args.output, 'mapping.json'))
51 changes: 51 additions & 0 deletions tools/license/import_licenses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
###
# This script import license, labels and mappings.
###
import argparse
import logging
import os
import sys

_this_dir = os.path.dirname(os.path.abspath(__file__))
path_to_dspace_lib = os.path.join(_this_dir, "../../libs/dspace-rest-python")
sys.path.insert(0, os.path.join(_this_dir, "../../src"))
sys.path.insert(0, os.path.join(_this_dir, "../../src/pump"))

import dspace # noqa
import pump # noqa
import settings # noqa
import project_settings # noqa
from dspace_rest_client.models import License # noqa
from utils import init_logging, update_settings # noqa

from _license import licenses

_logger = logging.getLogger()

# env settings, update with project_settings
env = update_settings(settings.env, project_settings.settings)
init_logging(_logger, env["log_file"])


if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Import licenses to DSpace.")
parser.add_argument('--input', type=str,
default=os.path.join(_this_dir, "data"),
help='Input directory for the JSON file')
args = parser.parse_args()

# Initialize DSpace backend
dspace_be = dspace.rest(
env["backend"]["endpoint"],
env["backend"]["user"],
env["backend"]["password"],
env["backend"]["authentication"]
)

_logger.info("Loading license import")
licenses_imp = licenses(os.path.join(args.input, 'labels.json'), os.path.join(args.input, 'licenses.json'), os.path.join(args.input, 'mapping.json'))

# import licenses
_logger.info("Start license import")
licenses_imp.import_to(env, dspace_be)
_logger.info("End license import")

0 comments on commit 31235a4

Please sign in to comment.