Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removes pre-Bulkrax rake tasks for ingesting content. Fixes #571 #574

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 0 additions & 241 deletions lib/tasks/gwss.rake
Original file line number Diff line number Diff line change
Expand Up @@ -57,247 +57,6 @@ namespace :gwss do
end
end

desc "Ingest a Work"
task :ingest_work => :environment do |t, args|
begin
options = {}

op = OptionParser.new
op.banner = "Usage: rake gwss:ingest_work -- --manifest=MFPATH --primaryfile=PFPATH --otherfiles=OFLIST --depositor=DEPOSITOR --update-item-id=UPDATEID"
op.on('-mf MFPATH', '--manifest=MFPATH', 'Path to manifest file') { |mfpath| options[:mfpath] = mfpath }
op.on('-pf FPATH', '--primaryfile=PFPATH', 'Path to primary attachment file') { |pfpath| options[:pfpath] = pfpath }
op.on('-of OFLIST', '--otherfiles=OFLIST', 'Comma-separated list of paths to supplemental files') { |oflist| options[:oflist] = oflist }
op.on('-dep DEPOSITOR', '--depositor=DEPOSITOR', 'Scholarspace ID (e.g. email) of depositor') { |depositor| options[:depositor] = depositor }
op.on('--set-item-id[=UPDATEID]', 'Set Item ID') { |setid| options[:setid] = setid }
op.on('--update-item-id[=UPDATEID]', 'Update Item ID') { |updateid| options[:updateid] = updateid }
op.on('--skip-file-updates', 'If upload, do not delete existing files') { options[:skip_file_updates] = true }
op.on('--private', 'Ingest and create with Private visibility') { options[:private] = true }

# return `ARGV` with the intended arguments
args = op.order!(ARGV) {}
op.parse!(args)

raise OptionParser::MissingArgument if options[:mfpath].nil?
raise OptionParser::MissingArgument if options[:depositor].nil?

manifest_file = options[:mfpath]
if File.exist?(manifest_file)
mf = File.read(manifest_file)
manifest_json = JSON.parse(mf.squish)
item_attributes = manifest_json.dup
item_attributes.delete('embargo')
item_attributes.delete('embargo_release_date')

# dc:rights
# There are some items with extraneous 'None' values; remove these
licenses = (manifest_json['license'] or []) - ['None']
if licenses.length == 0
item_attributes['license'] = ['http://www.europeana.eu/portal/rights/rr-r.html']
else
item_attributes['license'] = licenses
end

# edm:rights
# turn this scalar value into a single-valued list
item_attributes['rights_statement'] = [manifest_json['rights_statement']]

work_id = ingest_work(item_attributes, options[:depositor], options[:updateid], options[:setid], options[:private], options[:skip_file_updates])
# generate_ingest_report(noid_list, investigation_id)
embargo_attributes = read_embargo_info(manifest_json)
gww = GwWork.find(work_id)
unless !options[:updateid].nil? && options[:skip_file_updates]
attach_files(gww, options[:pfpath], options[:oflist],
options[:depositor], embargo_attributes)
end
puts work_id
else
puts "Manifest file doesn't exist - no ingest"
end
end
end

desc "Ingest an ETD"
task :ingest_etd => :environment do |t, args|
begin
options = {}

op = OptionParser.new
op.banner = "Usage: rake gwss:ingest_etd -- --manifest=MFPATH --primaryfile=PFPATH --otherfiles=OFLIST --depositor=DEPOSITOR --update-item-id=UPDATEID"
op.on('-mf MFPATH', '--manifest=MFPATH', 'Path to manifest file') { |mfpath| options[:mfpath] = mfpath }
op.on('-pf FPATH', '--primaryfile=PFPATH', 'Path to primary attachment file') { |pfpath| options[:pfpath] = pfpath }
op.on('-of OFLIST', '--otherfiles=OFLIST', 'Comma-separated list of paths to supplemental files') { |oflist| options[:oflist] = oflist }
op.on('-dep DEPOSITOR', '--depositor=DEPOSITOR', 'Scholarspace ID (e.g. email) of depositor') { |depositor| options[:depositor] = depositor }
op.on('--update-item-id[=UPDATEID]', 'Update Item ID') { |updateid| options[:updateid] = updateid }

# return `ARGV` with the intended arguments
args = op.order!(ARGV) {}
op.parse!(args)

raise OptionParser::MissingArgument if options[:mfpath].nil?
raise OptionParser::MissingArgument if options[:pfpath].nil?
raise OptionParser::MissingArgument if options[:depositor].nil?

# Reference GwWork to work around circular dependency
# problem that would be caused by referencing GwEtd first
# See articles such as http://neethack.com/2015/04/rails-circular-dependency/
GwWork

degree_hash = YAML.load_file('config/etd_degree_map.yml')
degree_categories = degree_hash.keys # Typically ["Master's Thesis", "Dissertation"]

manifest_file = options[:mfpath]
if File.exist?(manifest_file)
mf = File.read(manifest_file)
manifest_json = JSON.parse(mf.squish)
item_attributes = manifest_json.dup
# Since we're going to embargo the file, not the item:
item_attributes.delete('embargo')
item_attributes.delete('embargo_release_date')
if manifest_json['degree']
item_attributes['degree'] = manifest_json['degree'][0]
end
# resource_type may need more logic around it, TBD
if manifest_json['etd_type']
item_attributes['resource_type'] = manifest_json['etd_type']
end
# item_attributes['resource_type'] = ['Thesis or Dissertation']

# dc:rights
# Always set this license for ETDs
item_attributes['license'] = ['http://www.europeana.eu/portal/rights/rr-r.html']
item_attributes.delete('rights')

# edm:rights
# Always set this rights statement for ETDs
item_attributes['rights_statement'] = ['http://rightsstatements.org/vocab/InC/1.0/']

etd_id = ingest_etd(item_attributes, options[:depositor], options[:updateid])
# generate_ingest_report(noid_list, investigation_id)
embargo_attributes = read_embargo_info(manifest_json)
gwe = GwEtd.find(etd_id)
attach_files(gwe, options[:pfpath], options[:oflist],
options[:depositor], embargo_attributes)
puts etd_id
else
puts "Manifest file doesn't exist - no ingest"
end
end
end

def ingest_work(item_attributes, depositor, updateid, setid, visibility_private, skip_file_updates)
begin
gww = nil
if updateid.nil?
gww = GwWork.new
if setid.nil?
gww.id = Noid::Rails::Service.new.mint
else
gww.id = setid
end
else
gww = GwWork.find(updateid)
# delete existing files; we'll "overwrite" with new ones
# TODO: Unfortunately, this will have the effect that links
# to individual files won't be persistent if the ETD is updated
# To solve this, we'd need a scheme for matching up updated files
# with existing files (perhaps by file name?)
unless skip_file_updates
fsets = gww.file_sets
fsets.each do |fs|
fs.delete
end
end
end

gww.apply_depositor_metadata(depositor)
gww.attributes = item_attributes
if visibility_private
gww.visibility = Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PRIVATE
else
gww.visibility = Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PUBLIC
end
now = Hyrax::TimeService.time_in_utc
gww.date_uploaded = now

# Add to Default Administrative Set
default_admin_set_id = AdminSet.find_or_create_default_admin_set_id
default_admin_set = AdminSet.find(default_admin_set_id)
gww.admin_set = default_admin_set
gww.set_edit_groups(["content-admin"],[])
gww.save

return gww.id
end
end

def ingest_etd(item_attributes, depositor, updateid)
begin
gwe = nil
if updateid.nil?
gwe = GwEtd.new
gwe.id = Noid::Rails::Service.new.mint
else
gwe = GwEtd.find(updateid)
# delete existing files; we'll "overwrite" with new ones
# TODO: Unfortunately, this will have the effect that links
# to individual files won't be persistent if the ETD is updated
# To solve this, we'd need a scheme for matching up updated files
# with existing files (perhaps by file name?)
fsets = gwe.file_sets
fsets.each do |fs|
fs.delete
end
end

gwe.apply_depositor_metadata(depositor)
gwe.attributes = item_attributes
gwe.visibility = Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PUBLIC
now = Hyrax::TimeService.time_in_utc
gwe.date_uploaded = now

etd_admin_set = AdminSet.where(title: "ETDs")[0]
gwe.admin_set = etd_admin_set
gwe.set_edit_groups(["content-admin"],[])
gwe.save
return gwe.id
end
end

def read_embargo_info(metadata)
embargo_info = {}
embargo_info['embargo'] = metadata['embargo'] == true ? true : false
if embargo_info['embargo'] == true
embargo_info['embargo_release_date'] = metadata['embargo_release_date'].nil? ? '2100-01-01' : metadata['embargo_release_date']
end

return embargo_info
end

def attach_files(work, primaryfile_path, otherfiles_list, depositor, embargo_attributes)
user = User.find_by_user_key(depositor)
# add primary file first, other files afterwards
files = []
files += [primaryfile_path] if primaryfile_path
files += otherfiles_list.split(',') if otherfiles_list
files.each do |f|
fs = FileSet.new
# use the filename as the FileSet title
fs.id = Noid::Rails::Service.new.mint
fs.title = [File.basename(f)]
actor = ::Hyrax::Actors::FileSetActor.new(fs, user)
actor.create_metadata()
actor.create_content(File.open(f))
actor.attach_to_work(work)
if embargo_attributes['embargo'] == true
fs.apply_embargo(embargo_attributes['embargo_release_date'],
Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PRIVATE,
Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PUBLIC)
end
fs.set_edit_groups(["content-admin"],[])
fs.save
end
end

desc "Reindex everything"
task reindex_everything: :environment do
ActiveFedora::Base.reindex_everything
Expand Down
Loading