Skip to content

Commit

Permalink
Add :child_works feature flag, delete children if it is unset (#875)
Browse files Browse the repository at this point in the history
* Remove IIIF Manifest generation from ingest process.

Manifests will already exist in hyrax for old Works. New Works will have been Festerized already, so this code is unnecessary.

* use current solr image, add Flipflop gui to routes

* Add :child_works? feature flag, delete children if it is unset
  • Loading branch information
sourcefilter authored May 11, 2021
1 parent 121966b commit ae83339
Show file tree
Hide file tree
Showing 31 changed files with 210 additions and 582 deletions.
44 changes: 0 additions & 44 deletions app/actors/californica/manifest_actor.rb

This file was deleted.

2 changes: 1 addition & 1 deletion app/actors/hyrax/actors/work_actor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def apply_save_data_to_curation_concern(env)
# There is no metadata field called row_id, so remove this to prevent problems when saving this object
env.attributes.delete(:row_id)
env.attributes.delete(:batch_id)
raise "Cannot set id without a valid ark" unless env.attributes["ark"]
raise ArgumentError, "Cannot set id without a valid ark" unless env.attributes["ark"]
ark_based_id = Californica::IdGenerator.id_from_ark(env.attributes["ark"])
env.curation_concern.id = ark_based_id unless env.curation_concern.id
super
Expand Down
18 changes: 16 additions & 2 deletions app/importers/actor_record_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,25 @@ def create_for(record:)
# message should be recorded on the CsvRow object for reporting in the UI
raise "Validation failed: #{error_messages.join(', ')}"
end
rescue Ldp::BadRequest
rescue Ldp::BadRequest => e
# get the id from the ark and the uri from the id then delete the tombstone
tombstone_uri = "#{ActiveFedora::Base.id_to_uri(Californica::IdGenerator.id_from_ark(created.ark))}/fcr:tombstone"
ActiveFedora.fedora.connection.delete(tombstone_uri)
retry if (retries += 1) < 3
if (retries += 1) < 3
retry
else
raise e
end
end
rescue ActiveFedora::IllegalOperation => e
raise e unless e.message.start_with?('Attempting to recreate existing ldp_source')
retries ||= 0
fcrepo_id = Californica::IdGenerator.id_from_ark(record.ark)
Californica::Deleter.new(id: fcrepo_id).delete
if (retries += 1) < 3
retry
else
raise e
end
end
end
18 changes: 2 additions & 16 deletions app/importers/californica_csv_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ def initialize(file:,
self.info_stream = info_stream
@csv_import_id = csv_import_id
@import_file_path = import_file_path
@collections_needing_reindex = Set.new
@works_needing_ordering = Set.new
@manifests_needing_build = Set.new

self.validators = []

Expand All @@ -50,14 +47,6 @@ def headers
[]
end

# Creates IIIF Manifests for each Work in a CSV. Does not create documents
# for Collection or ChildWork objects.
def build_iiif_manifests
CsvImportCreateManifest.where(csv_import_id: @csv_import_id, status: ['queued', 'in progress']).each do |create_manifest_object|
CreateManifestJob.perform_now(Ark.ensure_prefix(create_manifest_object.ark), create_manifest_object_id: create_manifest_object.id)
end
end

# Given an array of Work arks that have had ChildWorks added to them during this import,
# iterate through each and use the PageOrder objects to ensure the ChildWorks are
# in the right order. In other works, ensure a manuscript's pages are ordered by the
Expand All @@ -76,13 +65,10 @@ def add_finalization_tasks(row)
row['Parent ARK'].to_s.split('|~|').each do |parent_ark|
CsvCollectionReindex.create(csv_import_id: @csv_import_id, ark: parent_ark, status: 'queued')
end
CsvImportOrderChild.create(csv_import_id: @csv_import_id, ark: row['Item ARK'], status: 'queued')
CsvImportCreateManifest.create(csv_import_id: @csv_import_id, ark: row['Item ARK'], status: 'queued')
CsvImportOrderChild.create(csv_import_id: @csv_import_id, ark: row['Item ARK'], status: 'queued') if Flipflop.child_works?
when 'ChildWork', 'Page'
CsvImportCreateManifest.create(csv_import_id: @csv_import_id, ark: row['Item ARK'], status: 'queued')
row['Parent ARK'].split('|~|').each do |parent_ark|
CsvImportOrderChild.create(csv_import_id: @csv_import_id, ark: parent_ark, status: 'queued')
CsvImportCreateManifest.create(csv_import_id: @csv_import_id, ark: parent_ark, status: 'queued')
CsvImportOrderChild.create(csv_import_id: @csv_import_id, ark: parent_ark, status: 'queued') if Flipflop.child_works?
end
else
raise ArgumentError, "Unknown Object Type #{row['Object Type']}"
Expand Down
1 change: 0 additions & 1 deletion app/importers/californica_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def import
def finalize_import
parser.order_child_works
parser.reindex_collections
# parser.build_iiif_manifests
@csv_import.csv_rows.where(status: 'pending finalization').update_all(status: 'complete')
end

Expand Down
51 changes: 0 additions & 51 deletions app/jobs/create_manifest_job.rb

This file was deleted.

70 changes: 44 additions & 26 deletions app/jobs/csv_row_import_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,43 +9,61 @@ def perform(row_id:)
@row_id = row_id
@row = CsvRow.find(@row_id)
@row.ingest_record_start_time = Time.current
@row.update(status: 'preparing')

@metadata = JSON.parse(@row.metadata)
@row.status = @metadata["Object Type"].include?("Page") ? 'not ingested' : 'in progress'
@metadata = @metadata.merge(row_id: @row_id)
@csv_import = CsvImport.find(@row.csv_import_id)
import_file_path = @csv_import.import_file_path
record = Darlingtonia::InputRecord.from(metadata: @metadata, mapper: CalifornicaMapper.new(import_file_path: import_file_path))

selected_importer = if record.mapper.collection?
collection_record_importer
else
actor_record_importer
end
if Flipflop.child_works?
case record.mapper.object_type
when 'ChildWork', 'Page'
selected_importer = actor_record_importer
new_status = 'complete'
when 'Work', 'Manuscript'
selected_importer = actor_record_importer
new_status = 'pending finalization'
when 'Collection'
selected_importer = collection_record_importer
new_status = 'pending finalization'
else
selected_importer = nil
new_status = 'not imported'
end
else
case record.mapper.object_type
when 'Work', 'Manuscript'
@row.update(status: 'deleting child works')
Californica::Deleter.new(id: Californica::IdGenerator.id_from_ark(record.mapper.ark)).delete_with_children(of_type: ChildWork)
@row.update(status: 'in progress')
selected_importer = actor_record_importer
new_status = 'complete'
when 'Collection'
selected_importer = collection_record_importer
new_status = 'pending finalization'
else
selected_importer = nil
new_status = 'not imported'
end
end

selected_importer.import(record: record) unless @metadata["Object Type"].include?("Page")
@row.status = if ['Page', 'ChildWork'].include?(record.mapper.object_type)
if @metadata["Object Type"].include?("Page")
"not ingested"
else
"complete"
end
else
"pending finalization"
end
@row.update(status: 'in progress')
selected_importer&.import(record: record)

end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
@row.ingest_record_end_time = Time.current

ingest_duration = end_time - start_time
@row.ingest_duration = ingest_duration
@row.job_ids_completed << job_id
@row.save
@row.update(status: new_status,
ingest_record_end_time: Time.current,
ingest_duration: end_time - start_time,
job_ids_completed: @row.job_ids_completed << job_id)
rescue => e
@row.status = 'error'
@row.job_ids_errored << job_id
@row.error_messages << e.message
@row.save
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
@row.update(status: 'error',
ingest_record_end_time: Time.current,
ingest_duration: end_time - start_time,
job_ids_errored: @row.job_ids_completed << job_id,
error_messages: @row.error_messages << "#{e.class}: #{e.message}")
end

def collection_record_importer
Expand Down
63 changes: 63 additions & 0 deletions app/lib/californica/deleter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# frozen_string_literal: true

module Californica
class Deleter
attr_reader :id, :logger

def initialize(id: nil, record: nil, logger: Rails.logger)
@id = id || record.id
@record = record
@logger = logger

raise ArgumentError "id #{@id} does not match record #{@record}" if @record && @record.id != @id
rescue NoMethodError
raise ArgumentError 'Californica::Deleter must be initialized with a fcrepo id or a Californica record object (Collection, Work, or ChildWork).'
end

def delete
destroy_and_eradicate
end

def delete_with_children(of_type: nil)
# Delete the record _first_, or sever its connection to children
# so that each child deletion doesnt trigger a save / reindex
record.member_ids.each do |child_id|
Californica::Deleter.new(id: child_id)
.delete_with_children(of_type: of_type)
end
delete if record.is_a?(of_type)

rescue ActiveFedora::ObjectNotFoundError
delete_from_fcrepo
end

def delete_children(of_type: nil)
record.members.each do |child|
Californica::Deleter.new(record: child)
.delete_with_children(of_type: of_type)
end
end

private

def destroy_and_eradicate
record_name = "#{record.class} #{record.ark}"
record&.destroy&.eradicate
Hyrax.config.callback.run(:after_destroy, record.id, User.batch_user)
logger.info("Deleted #{record_name || id}}")
rescue ActiveFedora::ObjectNotFoundError
delete_from_fcrepo
end

def delete_from_fcrepo
ActiveFedora.fedora.connection.delete(ActiveFedora::Base.id_to_uri(id))
logger.info("Forced delete of #{record_name || id} from Fedora")
rescue Ldp::NotFound
nil # Everything's good, we just wanted to make sure there wasn't a record in fedora not indexed to solr
end

def record
@record ||= ActiveFedora::Base.find(id)
end
end
end
2 changes: 2 additions & 0 deletions app/lib/californica/id_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def self.blacklight_id_from_ark(ark)
raise ArgumentError, 'Could not parse ARK shoulder and blade' if ark_parts.count < 2

ark_parts.join('-')
rescue NoMethodError
raise ArgumentError, 'Cannot set id without a valid ark'
end
end
end
12 changes: 12 additions & 0 deletions app/models/collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def reindex_extent

# @param ark [String] The ARK
# @return [Collection] The Collection with that ARK
# rubocop:disable Metrics/MethodLength
def self.find_or_create_by_ark(ark)
collection = find_by_ark(ark)
return collection if collection
Expand All @@ -60,5 +61,16 @@ def self.find_or_create_by_ark(ark)
Hyrax::Collections::PermissionsCreateService.create_default(collection: collection, creating_user: User.batch_user, grants: grants)

collection
rescue ActiveFedora::IllegalOperation => e
raise e unless e.message.start_with?('Attempting to recreate existing ldp_source')

retries ||= 0
fcrepo_id = Californica::IdGenerator.id_from_ark(ark)
Californica::Deleter.new(id: fcrepo_id).delete
if (retries += 1) < 3
retry
else
raise e
end
end
end
1 change: 0 additions & 1 deletion app/models/csv_import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ class CsvImport < ApplicationRecord
has_many :csv_rows
has_many :csv_collection_reindices
has_many :csv_import_order_children
has_many :csv_import_create_manifests

def queue_start_job
StartCsvImportJob.perform_later(id)
Expand Down
6 changes: 0 additions & 6 deletions app/models/csv_import_create_manifest.rb

This file was deleted.

6 changes: 1 addition & 5 deletions app/models/solr_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,7 @@ def genre
end

def iiif_manifest_url
if Flipflop.use_manifest_store? && self[:iiif_manifest_url_ssi]
self[:iiif_manifest_url_ssi]
else
"/concern/works/#{id}/manifest"
end
self[:iiif_manifest_url_ssi] || "/concern/works/#{id}/manifest"
end

def iiif_range
Expand Down
Loading

0 comments on commit ae83339

Please sign in to comment.