Skip to content

Commit

Permalink
Removes potentially destructive code.
Browse files Browse the repository at this point in the history
* Removes PBCoreIngester.delete_all method.
* Modifies PBCoreIngester.load_fixtures to not delete anything.
* Adds guard clause on PBCoreIngester.load_fixtures that raises an error
  when run in production.
  • Loading branch information
afred committed Aug 24, 2023
1 parent 009b905 commit 011964b
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 25 deletions.
42 changes: 20 additions & 22 deletions scripts/lib/pb_core_ingester.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,52 +10,50 @@
require_relative '../../app/helpers/solr_guid_fetcher'

class PBCoreIngester
attr_reader :errors
attr_reader :success_count
attr_reader :errors, :success_count, :solr, :log

include SolrGUIDFetcher

def initialize
# TODO: hostname and corename from config?
@solr = Solr.instance.connect
$LOG ||= NullLogger.new
@log = $LOG || NullLogger.new
@errors = Hash.new([])
@success_count = 0
end

def self.load_fixtures(*globs)
# This is a test in its own right elsewhere.
ingester = PBCoreIngester.new
ingester.delete_all
raise 'Cannot load PBCore fixtures in "production" environment' if ENV['RAILS_ENV'] == 'production'
# If no globs were passed in, default to all "clean" PBCore fixtures.
globs << 'spec/fixtures/pbcore/clean-*.xml' if globs.empty?
# Get a list of all file paths from all the globs.
all_paths = globs.map { |glob| Dir[glob] }.flatten.uniq
ingester = PBCoreIngester.new
all_paths.each do |path|
ingester.ingest(path: path)
end
end

def delete_all
@solr.delete_by_query('*:*')
commit
end

def delete_records(guids)
guids.each do |guid|
puts "Deleting #{guid}"
resp = @solr.get('select', params: { q: "id:#{guid}" })
resp = solr.get('select', params: { q: "id:#{guid}" })
docs = resp['response']['docs'] if resp['response'] && resp['response']['docs']

# can't delete what you can't query
next unless docs && docs.count == 1
puts "Ready to delete #{guid}"
@solr.delete_by_query(%(id:#{guid}))
delete_by_query(%(id:#{guid}))
commit
end
puts 'Done!'
end

def delete_by_query(query)
solr.delete_by_query(query)
end


def ingest(opts)
path = opts[:path]
is_batch_commit = opts[:is_batch_commit]
Expand All @@ -74,12 +72,12 @@ def ingest(opts)
xml_top = xml[0..100] # just look at the start of the file.
case xml_top
when /<pbcoreCollection/
$LOG.info("Read pbcoreCollection from #{path}")
log.info("Read pbcoreCollection from #{path}")
Uncollector.uncollect_string(xml).each do |document|
md5 = Digest::MD5.hexdigest(document)
if @md5s_seen.include?(md5)
# Documents are often repeated in AMS exports.
$LOG.info("Skipping already seen md5 #{md5}")
log.info("Skipping already seen md5 #{md5}")
else
@md5s_seen.add(md5)
begin
Expand Down Expand Up @@ -108,12 +106,12 @@ def ingest(opts)

def record_error(e, path, id_extracts = '')
message = "#{path} #{id_extracts}: #{e.message}"
$LOG.warn(message)
log.warn(message)
@errors["#{e.class}: #{e.message.split(/\n/).first}"] += [message]
end

def commit
@solr.commit
solr.commit
end

def ingest_xml_no_commit(xml)
Expand All @@ -125,17 +123,17 @@ def ingest_xml_no_commit(xml)

begin
# From SolrGUIDFetcher
fetch_all_from_solr(pbcore.id, @solr).each do |id|
$LOG.info("Removing solr record with ID: #{pbcore.id}")
@solr.delete_by_id(id)
fetch_all_from_solr(pbcore.id, solr).each do |id|
log.info("Removing solr record with ID: #{pbcore.id}")
solr.delete_by_id(id)
end

@solr.add(pbcore.to_solr)
solr.add(pbcore.to_solr)
rescue => e
raise SolrError.new(e)
end

$LOG.info("Updated solr record #{pbcore.id}")
log.info("Updated solr record #{pbcore.id}")

pbcore
end
Expand Down
2 changes: 1 addition & 1 deletion spec/features/media_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setup(ci)
"cpb-aacip-1234</pbcoreIdentifier><pbcoreIdentifier source='Sony Ci'>#{ci_id}</pbcoreIdentifier>")

ingester = PBCoreIngester.new
ingester.delete_all
ingester.delete_by_query('*:*')
ingester.ingest_xml_no_commit(pbcore)
ingester.commit
ci_id
Expand Down
4 changes: 2 additions & 2 deletions spec/scripts/pb_core_ingester_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
end

after(:each) do
@ingester.delete_all
@ingester.delete_by_query('*:*')
end

it 'whines about non-existent file' do
Expand All @@ -28,7 +28,7 @@
expect_results(1)
expect { @ingester.ingest(path: path) }.not_to raise_error
expect_results(1)
expect { @ingester.delete_all }.not_to raise_error
expect { @ingester.delete_by_query('*:*') }.not_to raise_error
expect_results(0)
end

Expand Down

0 comments on commit 011964b

Please sign in to comment.