Skip to content

Commit

Permalink
Merge pull request #818 from WGBH-MLA/batch_overhaul
Browse files Browse the repository at this point in the history
Upgrade Hyrax Batch Ingest to Valkyrie
  • Loading branch information
orangewolf authored Dec 20, 2023
2 parents 892ba94 + e5715e7 commit b83fdc9
Show file tree
Hide file tree
Showing 48 changed files with 381 additions and 210 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if ENV['DEPENDENCIES_NEXT'] && !ENV['DEPENDENCIES_NEXT'].empty?

else
gem 'rails', '~> 6.0'
gem 'hyrax-batch_ingest', git: 'https://github.com/samvera-labs/hyrax-batch_ingest', branch: 'dependency-upgrades'
gem 'hyrax-batch_ingest', git: 'https://github.com/samvera-labs/hyrax-batch_ingest', branch: 'valkyrie_update'
gem 'hyrax', github: 'samvera/hyrax', branch: 'double_combo' # , tag: 'hyrax-v5.0.0.rc1'
# Use SCSS for stylesheets
gem 'sass-rails', '~> 6.0'
Expand Down
16 changes: 8 additions & 8 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ GIT

GIT
remote: https://github.com/samvera-labs/bulkrax.git
revision: ba7a071799b97fc3585448c93ad8ec246cc5de5e
revision: c2ee9bc372cb7e6eb348f4f2e41e75f6fec8adcc
branch: hyrax-4-valkyrie-support
specs:
bulkrax (5.3.0)
Expand All @@ -31,17 +31,17 @@ GIT

GIT
remote: https://github.com/samvera-labs/hyrax-batch_ingest
revision: cab14d5db9f5b54ab7ee076c5b056bd49088a7c2
branch: dependency-upgrades
revision: 137775262520b9de4f4f84fb65fb93d1499a1a86
branch: valkyrie_update
specs:
hyrax-batch_ingest (0.2.0)
hyrax-batch_ingest (0.2.0b)
hyrax (>= 4.0, < 6.0)
rails (~> 6.0)
roo (~> 2.7.0)
roo (~> 2.7)

GIT
remote: https://github.com/samvera/hyrax.git
revision: b7891b758411c59f71ff54212e0d250fcc47e35f
revision: d91636abdddde752ca2bd59f43097ca3067b1d8f
branch: double_combo
specs:
hyrax (5.0.0.rc2)
Expand Down Expand Up @@ -887,9 +887,9 @@ GEM
railties (>= 5.2)
retriable (3.1.2)
rexml (3.2.6)
roo (2.7.1)
roo (2.10.0)
nokogiri (~> 1)
rubyzip (~> 1.1, < 2.0.0)
rubyzip (>= 1.3.0, < 3.0.0)
rsolr (2.5.0)
builder (>= 2.1.2)
faraday (>= 0.9, < 3, != 2.0.0)
Expand Down
5 changes: 5 additions & 0 deletions app/forms/asset_resource_form.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ class AssetResourceForm < Hyrax::Forms::ResourceForm(AssetResource)
include ChildCreateButton
include DisabledFields

validates :created_date, date: { allow_blank: true }
validates :date, date: { allow_blank: true }
validates :broadcast_date, date: { allow_blank: true }
validates :copyright_date, date: { allow_blank: true }

attr_accessor :controller, :current_ability

class_attribute :field_groups
Expand Down
2 changes: 1 addition & 1 deletion app/jobs/cool_digital_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
class CoolDigitalJob < Hyrax::BatchIngest::BatchItemProcessingJob
def perform(parent_id:, xml:, batch_item:)
# we only do digi instantiations round here
parent = Asset.find(parent_id)
parent = AssetResource.find(parent_id)
# Need to set @work to the ingested DigitalInstantiation in order for
# the `after_perform` hook of Hyrax::BatchIngest::BatchItemProcessingJob
# to properly set BatchItem#repo_object_id. If that sounds a bit convoluted
Expand Down
2 changes: 1 addition & 1 deletion app/jobs/cool_essence_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
class CoolEssenceJob < Hyrax::BatchIngest::BatchItemProcessingJob
def perform(parent_id:, xml:, batch_item:)
# we only do essoes round here
parent = ActiveFedora::Base.find(parent_id)
parent = Hyrax.query_service.find_by(id: parent_id)
# Need to set @work to the ingested EssenceTrack in order for
# the `after_perform` hook of Hyrax::BatchIngest::BatchItemProcessingJob
# to properly set BatchItem#repo_object_id. If that sounds a bit convoluted
Expand Down
4 changes: 2 additions & 2 deletions app/jobs/cool_physical_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
class CoolPhysicalJob < Hyrax::BatchIngest::BatchItemProcessingJob
def perform(parent_id:, xml:, batch_item:)
# we only do physical instantiations round here
parent = Asset.find(parent_id)
parent = AssetResource.find(parent_id)
physical_inst = AAPB::BatchIngest::PBCoreXMLItemIngester.new(batch_item, {}).ingest_physical_instantiation!(parent: parent, xml: xml)
pbcore_physical = PBCore::Instantiation.parse(xml)
# fire these off while we have em
pbcore_physical.essence_tracks.each do |ess_track|
et_batch_item = Hyrax::BatchIngest::BatchItem.create!(batch: batch_item.batch, status: 'initialized', id_within_batch: batch_item.id_within_batch)
CoolEssenceJob.perform_later(parent_id: physical_inst.id, xml: ess_track.to_xml, batch_item: et_batch_item)
CoolEssenceJob.perform_later(parent_id: physical_inst.id.to_s, xml: ess_track.to_xml, batch_item: et_batch_item)
end
# Need to set @work to the ingested PhysicalInstantiation in order for
# the `after_perform` hook of Hyrax::BatchIngest::BatchItemProcessingJob
Expand Down
24 changes: 24 additions & 0 deletions app/models/ability.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def ams_base_permissions
DigitalInstantiation,
Collection,
Contribution,
AssetResource,
EssenceTrackResource,
PhysicalInstantiationResource,
DigitalInstantiationResource,
Hyrax::PcdmCollection,
ContributionResource,
Annotation ]

# Explicitly forbid these actions.
Expand All @@ -42,6 +48,12 @@ def ams_base_permissions
DigitalInstantiation,
Collection,
Contribution,
AssetResource,
EssenceTrackResource,
PhysicalInstantiationResource,
DigitalInstantiationResource,
Hyrax::PcdmCollection,
ContributionResource,
Annotation ]
end

Expand All @@ -62,6 +74,12 @@ def ams_ingester_permissions
Contribution,
AdminData,
InstantiationAdminData,
AssetResource,
EssenceTrackResource,
PhysicalInstantiationResource,
DigitalInstantiationResource,
Hyrax::PcdmCollection,
ContributionResource,
Annotation ]

# Field-level permissions for Admin Data
Expand All @@ -80,6 +98,12 @@ def ams_aapb_admin_permissions
DigitalInstantiation,
Collection,
Contribution,
AssetResource,
EssenceTrackResource,
PhysicalInstantiationResource,
DigitalInstantiationResource,
Hyrax::PcdmCollection,
ContributionResource,
Annotation ]
end

Expand Down
7 changes: 7 additions & 0 deletions app/models/ams/create_member_methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@ module CreateMemberMethods
included do
def create_child_methods
self.valid_child_concerns.each do |child_class|
# name with _resources gives us valkyrie or af record - digital_instantation_resouces
method_name = child_class.to_s.underscore.pluralize
self.class.send(:define_method, method_name) do
self.members.select { |work| work.is_a?(child_class) }
end

# name with out _resources gives us solr record - digital_instantation
method_name = child_class.to_s.underscore.gsub(/_resour.*/, '').pluralize
self.class.send(:define_method, method_name) do
case self
when ActiveFedora::Base
Expand Down
2 changes: 1 addition & 1 deletion app/models/asset_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def find_admin_data_attribute(attribute)
end

def set_validation_status
current_children_count = SolrDocument.get_members(self).reject { |child| child.is_a?(Contribution) || child.id == self.id }.size
current_children_count = SolrDocument.get_members(self).reject { |child| child.is_a?(Contribution) || child.is_a?(ContributionResource) || child.id == self.id }.size
intended_children_count = self.intended_children_count.to_i

self.validation_status_for_aapb = if intended_children_count.blank? && self.validation_status_for_aapb.blank?
Expand Down
10 changes: 10 additions & 0 deletions app/models/date_validator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class DateValidator < ActiveModel::EachValidator
def validate_each(record, attribute, value)
value = Array.wrap(value)
value.each do |val|
if AMS::NonExactDateService.invalid?(val)
record.errors.add attribute, (options[:message] || "invalid date format: #{val}")
end
end
end
end
5 changes: 5 additions & 0 deletions app/models/hyrax/administrative_set_decorator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# OVERRIDE Hyrax 5.0 to add basic metadata to collection

Hyrax::AdministrativeSet.class_eval do
include Hyrax::ArResource
end
1 change: 1 addition & 0 deletions app/models/hyrax/pcdm_collection_decorator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

Hyrax::PcdmCollection.class_eval do
include Hyrax::Schema(:basic_metadata)
include Hyrax::ArResource
end
2 changes: 1 addition & 1 deletion app/services/aapb/batch_ingest/bulkrax_xml_mapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def asset_attributes

intended_children_count = 0
intended_children_count += pbcore.instantiations.size
intended_children_count += pbcore.instantiations.map(&:essence_tracks).flatten.size
intended_children_count += pbcore.instantiations.map(&:essence_track).flatten.size
attrs[:intended_children_count] = intended_children_count
end
end
Expand Down
4 changes: 2 additions & 2 deletions app/services/aapb/batch_ingest/csv_config_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def self.new_from_hash(hash)
attr.each do |attr|
# Look for admin_data accessors from assets or physical_nstantiations.
# If one of them is there, add their attribute names to the whitelisted properties.
whitelisted_properties = klass.properties.keys
whitelisted_properties = klass.respond_to?(:schema) ? klass.fields : klass.properties.keys

if klass.instance_methods.include?(:admin_data)
whitelisted_properties += AdminData.attribute_names
Expand Down Expand Up @@ -44,7 +44,7 @@ def header_keys
attributes.deep_dup
else
extra_attr=[]
if object_class == "Asset"
if object_class.include?("Asset")
extra_attr=(AdminData.attribute_names.dup - ['id', 'created_at', 'updated_at'] + Annotation.ingestable_attributes).uniq
elsif object_class.include?("Instantiation")
extra_attr=(InstantiationAdminData.attribute_names.dup - ['id', 'created_at', 'updated_at'])
Expand Down
53 changes: 26 additions & 27 deletions app/services/aapb/batch_ingest/csv_item_ingester.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def ingest
@works_ingested = []
set_options
@source_data = JSON.parse(@batch_item.source_data)
ingest_object_at options, @source_data
result = ingest_object_at options, @source_data

raise "Batch item contained invalid data.\n\n#{@batch_item.error}" unless @batch_item.error.nil?
@works_ingested.first
Expand Down Expand Up @@ -55,15 +55,15 @@ def transaction_create(model_object, user, ability, attributes)
.call(cx)

if result.failure?
msg = result.failure[0].to_s
msg += " - #{result.failure[1].full_messages.join(',')}" if result.failure[1].respond_to?(:full_messages)
raise StandardError, msg, result.trace
msg = "Batch item contained invalid data.\n\n"
msg += "#{result.failure[1].messages}" if result.failure[1].respond_to?(:messages)
raise RuntimeError, msg, result.trace
end

result
end

def ingest_object_at(node, with_data, with_parent = false)
actor = ::Hyrax::CurationConcern.actor
user = User.find_by_email(@batch_item.submitter_email)
ability = ::Ability.new(user)
ingest_type = node.ingest_type
Expand All @@ -72,7 +72,7 @@ def ingest_object_at(node, with_data, with_parent = false)
with_data[node.object_class]
else
solr_doc = SolrDocument.new(with_parent.to_solr)
with_data.merge(in_works_ids: [with_parent.id], title: solr_doc.title)
with_data.merge(title: solr_doc.title)
end

attributes["admin_set_id"] = @batch_item.batch.admin_set_id
Expand Down Expand Up @@ -110,10 +110,10 @@ def ingest_object_at(node, with_data, with_parent = false)

# catch sub-Asset ingest failures here, where we have attributes, cleanup, then re-raise to enable rescue_from to properly update failed batch item etc
begin
if actor_stack_status
if actor_stack_status.success?
@batch_item.repo_object_id = model_object.id unless !with_parent
model_object = actor_stack_status.value!
@works_ingested << model_object.dup

parent_node = if !with_parent
@works_ingested.last
else
Expand All @@ -125,23 +125,23 @@ def ingest_object_at(node, with_data, with_parent = false)
# We won't always have data from the CSV for the children, so don't
# fail if it is not included with the with_data
with_data[c_node.object_class].each do |c_data|
ingest_object_at(c_node,c_data,parent_node)
result = ingest_object_at(c_node,c_data,parent_node)
parent_node.member_ids += [result.id.to_s] if result
end unless with_data[c_node.object_class].nil?
end
end
if model_object.errors.any?
@batch_item.error = model_object.errors.messages.to_s
parent_node.save if parent_node.member_ids.present?
else
@batch_item.error = actor_stack_status.failure[0].to_s
end
rescue => e
# If there was an exception during ingest, ensure the related work
# is destroyed.
work_id = attributes.fetch(:in_works_ids, []).first
work_id ||= model_object&.in_works_ids&.first if model_object
work_id = parent_node.id

if work_id
work = Hyrax.query_service.find_by(id: work_id)
asset_batch_id = work.admin_data.hyrax_batch_ingest_batch_id if work.admin_data
child_batch_id = model_object.admin_data.hyrax_batch_ingest_batch_id if model_object.admin_data
child_batch_id = model_object.admin_data.hyrax_batch_ingest_batch_id if model_object.respond_to?(:admin_data) && model_object.admin_data

# make sure failed child object is from the same batch as parent
if work && asset_batch_id == child_batch_id
Expand All @@ -154,6 +154,7 @@ def ingest_object_at(node, with_data, with_parent = false)
# BatchItemIngestJob from hyrax-batch_ingest gem
raise e
end
model_object
end

def set_options
Expand All @@ -171,12 +172,6 @@ def set_attributes_for_new_ingest_type(model_object, attributes, ability)
new_attributes["hyrax_batch_ingest_batch_id"] = batch_id
end

if new_attributes[:in_works_ids].present?
new_attributes[:in_works_ids].each do |work_id|
set_batch_ingest_id_on_related_asset(work_id, ability)
end
end

new_attributes
end

Expand All @@ -190,8 +185,14 @@ def set_asset_objects_attributes(model_object, attributes, ingest_type)
# the AssetActor does not expect the existing Annotions unless Annotations are in the env.
set_admin_data_attributes(admin_data, attributes)
# annotations work the same for both update and add
admin_data.annotations_attributes = attributes.delete('annotations')
admin_data.save!
new_annotations = attributes.delete('annotations')
if new_annotations.present?
annotation_types = new_annotations.map {|a| a['annotation_type']}
to_remove = admin_data.annotations.select { |a| a.annotation_type.in?(annotation_types) }
admin_data.annotations.destroy(to_remove)
admin_data.annotations_attributes = new_annotations
admin_data.save!
end
when 'add'
# serialized fields need to preserve exising data in an add ingest
# handles asset, admin_data, and annotations
Expand All @@ -205,10 +206,8 @@ def set_batch_ingest_id_on_related_asset(work_id, ability)
unless asset = Hyrax.query_service.find_by(id: work_id)
raise 'Cannot find Asset with ID: #{work_id}.'
end
asset_actor = ::Hyrax::CurationConcern.actor
asset_attrs = { hyrax_batch_ingest_batch_id: batch_id }
asset_env = Hyrax::Actors::Environment.new(asset, ability, asset_attrs)
asset_actor.update(asset_env)
asset.admin_data.hyrax_batch_ingest_batch_id = batch_id
asset.save
end

def set_admin_data_attributes(admin_data, attributes)
Expand Down
Loading

0 comments on commit b83fdc9

Please sign in to comment.