Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Adds batch process to pull children from preservica for existing parent #1205

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion app/models/batch_process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class BatchProcess < ApplicationRecord # rubocop:disable Metrics/ClassLength
# LISTS AVAILABLE BATCH ACTIONS
# rubocop:disable Layout/LineLength
def self.batch_actions
['create parent objects', 'update parent objects', 'update child objects caption and label', 'delete parent objects', 'delete child objects', 'export all parent objects by admin set', 'export parent metadata', 'export child oids', 'reassociate child oids', 'recreate child oid ptiffs', 'update fulltext status', 'resync with preservica', 'activity stream updates']
['create parent objects', 'update parent objects', 'update child objects caption and label', 'delete parent objects', 'delete child objects', 'export all parent objects by admin set', 'export parent metadata', 'export child oids', 'reassociate child oids', 'recreate child oid ptiffs', 'update fulltext status', 'reingest with preservica', 'resync with preservica', 'activity stream updates']
end
# rubocop:enable Layout/LineLength

Expand Down Expand Up @@ -181,6 +181,8 @@ def determine_background_jobs
RecreateChildOidPtiffsJob.perform_later(self)
when 'update fulltext status'
UpdateFulltextStatusJob.perform_later(self)
when 'reingest with preservica'
SyncFromPreservicaJob.perform_later(self)
when 'resync with preservica'
SyncFromPreservicaJob.perform_later(self)
end
Expand Down
12 changes: 6 additions & 6 deletions app/models/concerns/create_parent_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ def create_new_parent_csv
batch_processing_event(e.message, e.kind)
next
rescue PreservicaImageService::PreservicaImageServiceError => e
if e.message.include?("bad URI")
batch_processing_event("The given URI does not match the URI of an entity in Preservica. Please make sure your URI is correct, starts with /structure-object/ or /information-object/, and includes no spaces or line breaks. ------------ Message from System: Skipping row [#{index + 2}] #{e.message}.", "Skipped Row")
elsif e.message.include?("entity.does.not.exist")
batch_processing_event("The given URI does not match the URI of an entity of this type in Preservica. Please make sure your Preservica URI and object structure type is correct. ------------ Message from System: Skipping row [#{index + 2}] #{e.message}.", "Skipped Row")
else
# if e.message.include?("bad URI")
# batch_processing_event("The given URI does not match the URI of an entity in Preservica. Please make sure your URI is correct, starts with /structure-object/ or /information-object/, and includes no spaces or line breaks. ------------ Message from System: Skipping row [#{index + 2}] #{e.message}.", "Skipped Row")
# elsif e.message.include?("entity.does.not.exist")
# batch_processing_event("The given URI does not match the URI of an entity of this type in Preservica. Please make sure your Preservica URI and object structure type is correct. ------------ Message from System: Skipping row [#{index + 2}] #{e.message}.", "Skipped Row")
# else
batch_processing_event("Skipping row [#{index + 2}] #{e.message}.", "Skipped Row")
end
# end
next
end
else
Expand Down
22 changes: 12 additions & 10 deletions app/models/concerns/sync_from_preservica.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def sync_from_preservica
end
begin
preservica_children_hash = {}
PreservicaImageService.new(parent_object.preservica_uri, parent_object.admin_set.key).image_list(parent_object.preservica_representation_type).each_with_index do |preservica_co, index|
parent_preservica_uri = parent_object.preservica_uri.presence || row['preservica_uri'].presence ||
PreservicaImageService.new(parent_preservica_uri, parent_object.admin_set.key).image_list(parent_object.preservica_representation_type).each_with_index do |preservica_co, index|
# increment by one so index lines up with order
index_plus_one = index + 1
preservica_children_hash["hash_#{index_plus_one}".to_sym] = { order: index_plus_one,
Expand Down Expand Up @@ -71,24 +72,25 @@ def sync_images_preservica(local_children_hash, preservica_children_hash, parent
# rubocop:disable Metrics/MethodLength
# ERROR HANDLING FOR PRESERVICA SYNC
def validate_preservica_sync(parent_object, row)
# byebug
if parent_object.redirect_to.present?
batch_processing_event("Parent OID: #{row['oid']} is a redirected parent object", 'Skipped Import')
false
elsif parent_object.preservica_uri.nil?
batch_processing_event("Parent OID: #{row['oid']} does not have a Preservica URI", 'Skipped Import')
elsif !current_ability.can?(:update, parent_object)
batch_processing_event("Skipping row with parent oid: #{parent_object.oid}, user does not have permission to update", 'Permission Denied')
false
elsif parent_object.preservica_uri.nil? && row['preservica_uri'].nil?
batch_processing_event("Parent OID: #{row['oid']} does not have a Preservica URI. Please ensure Preservica URI is saved to parent or included in CSV.", 'Skipped Import')
false
elsif parent_object.digital_object_source != "Preservica"
batch_processing_event("Parent OID: #{row['oid']} does not have a Preservica digital object source", 'Skipped Import')
elsif parent_object.digital_object_source != "Preservica" && row['digital_object_source'].nil?
batch_processing_event("Parent OID: #{row['oid']} does not have a Preservica digital object source. Please ensure Digital Object Source is saved to parent or included in CSV.", 'Skipped Import')
false
elsif parent_object.preservica_representation_type.nil?
batch_processing_event("Parent OID: #{row['oid']} does not have a Preservica representation type", 'Skipped Import')
elsif parent_object.preservica_representation_type.nil? && row['preservica_representation_type'].nil?
batch_processing_event("Parent OID: #{row['oid']} does not have a Preservica representation type. Please ensure Preservica representation type is saved to parent or included in CSV.", 'Skipped Import')
false
elsif !parent_object.admin_set.preservica_credentials_verified
batch_processing_event("Admin set #{parent_object.admin_set.key} does not have Preservica credentials set", 'Skipped Import')
false
elsif !current_ability.can?(:update, parent_object)
batch_processing_event("Skipping row with parent oid: #{parent_object.oid}, user does not have permission to update", 'Permission Denied')
false
else
true
end
Expand Down
12 changes: 11 additions & 1 deletion app/services/preservica_image_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,21 @@
class PreservicaImageService
class PreservicaImageServiceError < StandardError
attr_reader :id
# rubocop:disable Layout/LineLength
def initialize(msg, id)
@id = id
super("#{msg} for #{id}")
friendly_msg = if msg.include?("bad URI")
"The given URI does not match the URI of an entity in Preservica. Please make sure your URI is correct, starts with /structure-object/ or /information-object/, and includes no spaces or line breaks. ------------ Message from System: Skipping row [#{index + 2}] #{msg}."
elsif msg.include?("entity.does.not.exist")
"The given URI does not match the URI of an entity of this type in Preservica. Please make sure your Preservica URI and object structure type is correct. ------------ Message from System: Skipping row [#{index + 2}] #{msg}."
else
msg
end

super("#{friendly_msg} for #{id}")
end
end
# rubocop:enable Layout/LineLength

class PreservicaImageServiceNetworkError < PreservicaImageServiceError
def initialize(msg, id)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
oid,digital_object_source,preservica_uri,preservica_representation_type
2 changes: 2 additions & 0 deletions spec/fixtures/csv/preservica/preservica_reingest.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
oid,digital_object_source,preservica_uri,preservica_representation_type
200000000,Preservica,/preservica/api/entity/structural-objects/7fe35e8c-c21a-444a-a2e2-e3c926b519c5,Preservation
2 changes: 2 additions & 0 deletions spec/fixtures/csv/preservica/preservica_reingest_invalid.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
oid,digital_object_source,preservica_uri,preservica_representation_type
12345,Preservica,/preservica/api/entity/structural-objects/7fe35e8c-c21a-444a-a2e2-e3c926b519c5,Preservation
158 changes: 158 additions & 0 deletions spec/models/preservica/preservica_reingest_existing_parent_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# frozen_string_literal: true

require 'rails_helper'

RSpec.describe Preservica::PreservicaObject, type: :model, prep_metadata_sources: true, prep_admin_sets: true do
subject(:batch_process) { BatchProcess.new }
let(:admin_set) { FactoryBot.create(:admin_set, key: 'brbl') }
let(:admin_set_sml) { FactoryBot.create(:admin_set, key: 'sml') }
let(:user) { FactoryBot.create(:user, uid: "mk2525") }
let(:aspace_parent) { FactoryBot.create(:parent_object, oid: 200_000_000, admin_set: AdminSet.find_by_key('brbl')) }
# let(:sml_parent) { FactoryBot.create(:parent_object, oid: 12_345, admin_set: AdminSet.find_by_key('sml')) }
let(:co_1) { FactoryBot.create(:child_object, oid: 1_002_533, parent_object: aspace_parent, order: 1, label: 'original label', caption: 'original caption') }
let(:co_2) { FactoryBot.create(:child_object, oid: 1_002_534, parent_object: aspace_parent, order: 2, label: 'original label', caption: 'original caption') }
let(:co_3) { FactoryBot.create(:child_object, oid: 1_002_535, parent_object: aspace_parent, order: 3, label: 'original label', caption: 'original caption') }
let(:ptf_1) { PyramidalTiff.new(co_1) }
let(:ptf_2) { PyramidalTiff.new(co_2) }
let(:ptf_3) { PyramidalTiff.new(co_3) }
let(:preservica_reingest_invalid) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "preservica", "preservica_reingest_invalid.csv")) }
let(:preservica_reingest) { Rack::Test::UploadedFile.new(Rails.root.join(fixture_path, "csv", "preservica", "preservica_reingest.csv")) }

around do |example|
preservica_host = ENV['PRESERVICA_HOST']
preservica_creds = ENV['PRESERVICA_CREDENTIALS']
ENV['PRESERVICA_HOST'] = "testpreservica"
ENV['PRESERVICA_CREDENTIALS'] = '{"brbl": {"username":"xxxxx", "password":"xxxxx"}}'
access_host = ENV['ACCESS_MASTER_MOUNT']
ENV['ACCESS_MASTER_MOUNT'] = File.join("spec", "fixtures", "images", "access_masters")
perform_enqueued_jobs do
example.run
end
ENV['PRESERVICA_HOST'] = preservica_host
ENV['PRESERVICA_CREDENTIALS'] = preservica_creds
ENV['ACCESS_MASTER_MOUNT'] = access_host
end

before do
login_as(:user)
batch_process.user_id = user.id
stub_pdfs
aspace_parent
co_1
co_2
co_3
stub_preservica_login
fixtures = %w[preservica/api/entity/structural-objects/7fe35e8c-c21a-444a-a2e2-e3c926b519c5/children
preservica/api/entity/information-objects/1e42a2bb-8953-41b6-bcc3-1a19c86a5e3r/representations
preservica/api/entity/information-objects/1e42a2bb-8953-41b6-bcc3-1a19c86a5e3r/representations/Access
preservica/api/entity/information-objects/1e42a2bb-8953-41b6-bcc3-1a19c86a5e3r/representations/Preservation
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b486/generations
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b486/generations/1
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b486/generations/1/bitstreams/1
preservica/api/entity/information-objects/1e42a2bb-8953-41b6-bcc3-1a19c86a5e3d/representations
preservica/api/entity/information-objects/1e42a2bb-8953-41b6-bcc3-1a19c86a5e3d/representations/Access
preservica/api/entity/information-objects/1e42a2bb-8953-41b6-bcc3-1a19c86a5e3d/representations/Preservation
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b489/generations
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b489/generations/1
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b489/generations/1/bitstreams/1
preservica/api/entity/information-objects/f44ba97e-af2b-498e-b118-ed1247822f44/representations
preservica/api/entity/information-objects/f44ba97e-af2b-498e-b118-ed1247822f44/representations/Access
preservica/api/entity/information-objects/f44ba97e-af2b-498e-b118-ed1247822f44/representations/Preservation
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b487/generations
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b487/generations/1
preservica/api/entity/content-objects/ae328d84-e429-4d46-a865-9ee11157b487/generations/1/bitstreams/1]

fixtures.each do |fixture|
stub_request(:get, "https://test#{fixture}").to_return(
status: 200, body: File.open(File.join(fixture_path, "#{fixture}.xml"))
)
end
stub_preservica_tifs_set_of_three
stub_request(:get, "https://yale-test-image-samples.s3.amazonaws.com/originals/1002533.tif")
.to_return(status: 200, body: File.open('spec/fixtures/images/sample.tiff', 'rb'))
stub_request(:head, "https://yale-test-image-samples.s3.amazonaws.com/originals/1002533.tif")
.to_return(status: 200)
stub_request(:put, "https://yale-test-image-samples.s3.amazonaws.com/ptiffs/33/10/02/53/1002533.tif")
.to_return(status: 200)
stub_request(:get, "https://yale-test-image-samples.s3.amazonaws.com/originals/1002534.tif")
.to_return(status: 200, body: File.open('spec/fixtures/images/sample.tiff', 'rb'))
stub_request(:head, "https://yale-test-image-samples.s3.amazonaws.com/originals/1002534.tif")
.to_return(status: 200)
stub_request(:put, "https://yale-test-image-samples.s3.amazonaws.com/ptiffs/34/10/02/53/1002534.tif")
.to_return(status: 200)
stub_request(:get, "https://yale-test-image-samples.s3.amazonaws.com/originals/1002535.tif")
.to_return(status: 200, body: File.open('spec/fixtures/images/sample.tiff', 'rb'))
stub_request(:head, "https://yale-test-image-samples.s3.amazonaws.com/originals/1002535.tif")
.to_return(status: 200)
stub_request(:put, "https://yale-test-image-samples.s3.amazonaws.com/ptiffs/03/35/10/02/53/1002535.tif")
.to_return(status: 200)
end

context 'user with permission' do
before do
user.add_role(:editor, admin_set)
login_as(:user)
end

it 'can reingest child objects and keep oids, captions and labels but replace images' do
File.delete("spec/fixtures/images/access_masters/03/33/10/02/53/1002533.tif") if File.exist?("spec/fixtures/images/access_masters/03/33/10/02/53/1002533.tif")
File.delete("spec/fixtures/images/access_masters/03/34/10/02/53/1002534.tif") if File.exist?("spec/fixtures/images/access_masters/03/34/10/02/53/1002534.tif")
File.delete("spec/fixtures/images/access_masters/03/35/10/02/53/1002535.tif") if File.exist?("spec/fixtures/images/access_masters/03/35/10/02/53/1002535.tif")
allow(S3Service).to receive(:s3_exists?).and_return(false)
expect(ParentObject.count).to eq 1
expect(ChildObject.count).to eq 3
po_first = ParentObject.first
co_first = ChildObject.first
expect(co_first.oid).to eq 1_002_533
expect(co_first.caption).to eq 'original caption'
expect(co_first.label).to eq 'original label'
expect(co_first.sha512_checksum).to be nil
expect(po_first.last_preservica_update).to be nil
expect(ptf_1.access_master_path).to eq "spec/fixtures/images/access_masters/03/33/10/02/53/1002533.tif"
expect(ptf_2.access_master_path).to eq "spec/fixtures/images/access_masters/03/34/10/02/53/1002534.tif"
expect(ptf_3.access_master_path).to eq "spec/fixtures/images/access_masters/03/35/10/02/53/1002535.tif"

reingest_batch_process = BatchProcess.new(batch_action: 'reingest with preservica', user: user)
expect do
reingest_batch_process.file = preservica_reingest
reingest_batch_process.save!
end.not_to change { ChildObject.count }
po_first = ParentObject.first
co_first = ChildObject.first

expect(po_first.last_preservica_update).not_to be nil
expect(co_first.sha512_checksum).not_to be nil
expect(co_first.oid).to eq 1_002_533
expect(co_first.caption).to eq 'original caption'
expect(co_first.label).to eq 'original label'
expect(File.exist?("spec/fixtures/images/access_masters/03/33/10/02/53/1002533.tif")).to be true
expect(File.exist?("spec/fixtures/images/access_masters/03/34/10/02/53/1002534.tif")).to be true
expect(File.exist?("spec/fixtures/images/access_masters/03/35/10/02/53/1002535.tif")).to be true
File.delete("spec/fixtures/images/access_masters/03/33/10/02/53/1002533.tif") if File.exist?("spec/fixtures/images/access_masters/03/33/10/02/53/1002533.tif")
File.delete("spec/fixtures/images/access_masters/03/34/10/02/53/1002534.tif") if File.exist?("spec/fixtures/images/access_masters/03/34/10/02/53/1002534.tif")
File.delete("spec/fixtures/images/access_masters/03/35/10/02/53/1002535.tif") if File.exist?("spec/fixtures/images/access_masters/03/35/10/02/53/1002535.tif")
end
end

context 'user without permission' do
before do
user.remove_role(:editor)
login_as(:user)
end

it 'can throw an error if user does not have permission on parent object' do
# byebug
allow(S3Service).to receive(:s3_exists?).and_return(false)
parent_object = ParentObject.new(oid: 12_345, admin_set: AdminSet.find_by_key('brbl'))
parent_object.save

reingest_batch_process = BatchProcess.new(batch_action: 'reingest with preservica', user: user)
expect do
reingest_batch_process.file = preservica_reingest_invalid
reingest_batch_process.save!
end.not_to change { ChildObject.count }
expect(reingest_batch_process.batch_ingest_events_count).to be 1
expect(reingest_batch_process.batch_ingest_events.last.reason).to eq('Skipping row with parent oid: 12345, user does not have permission to update')
end
end
end