Skip to content

Commit

Permalink
Merge pull request #678 from UCLALibrary/no-file-ingest
Browse files Browse the repository at this point in the history
Make ingest metadata-only.
  • Loading branch information
jendiamond authored Jul 18, 2019
2 parents 73045ef + 28bdd74 commit 09537e5
Show file tree
Hide file tree
Showing 16 changed files with 3 additions and 392 deletions.
1 change: 0 additions & 1 deletion .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ inherit_gem:
AllCops:
TargetRubyVersion: 2.4
Exclude:
- 'fits-*/**/**'
- 'db/**/*.rb'
- 'vendor/**/*'
- 'tmp/**/*'
Expand Down
4 changes: 0 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ env:
before_install:
- gem update --system
- gem install bundler:1.17.3
- curl -sLO https://projects.iq.harvard.edu/files/fits/files/fits-0.8.6_1.zip
- unzip fits-0.8.6_1.zip
- chmod +x fits-0.8.6/fits.sh
- export PATH="$(pwd)/fits-0.8.6:$PATH"
before_script:
- bundle exec rake db:create
- ln --symbolic /usr/lib/chromium-browser/chromedriver "${HOME}/bin/chromedriver"
Expand Down
7 changes: 0 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
FROM ruby:2.5

# Install fits
RUN mkdir /fits
WORKDIR /fits
ADD https://github.com/harvard-lts/fits/releases/download/1.4.0/fits-1.4.0.zip /fits/
RUN unzip fits-1.4.0.zip -d /fits
ENV PATH "/fits:$PATH"

RUN apt-get update -qq
# Add https support to apt to download yarn & newer node
RUN apt-get install -y apt-transport-https
Expand Down
47 changes: 2 additions & 45 deletions app/importers/californica_mapper.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true

class CalifornicaMapper < Darlingtonia::HashMapper
attr_reader :missing_file_log, :import_file_path, :row_number
attr_reader :row_number

CALIFORNICA_TERMS_MAP = {
alternative_title: ["AltTitle.other",
Expand Down Expand Up @@ -58,8 +58,6 @@ class CalifornicaMapper < Darlingtonia::HashMapper
DELIMITER = '|~|'

def initialize(attributes = {})
@missing_file_log = ENV['MISSING_FILE_LOG'] || "#{::Rails.root}/log/missing_files_#{Rails.env}"
@import_file_path = attributes[:import_file_path] || ENV['IMPORT_FILE_PATH'] || '/opt/data'
@row_number = attributes[:row_number]
super()
end
Expand All @@ -77,7 +75,7 @@ def self.required_headers

def fields
# The fields common to all object types
common_fields = CALIFORNICA_TERMS_MAP.keys + [:remote_files, :visibility, :member_of_collections_attributes, :license]
common_fields = CALIFORNICA_TERMS_MAP.keys + [:visibility, :member_of_collections_attributes, :license]
# Pages additionally have a field :in_works_ids, which defines their parent work
return common_fields + [:in_works_ids] if ['ChildWork', 'Page'].include?(metadata["Object Type"])
common_fields
Expand All @@ -91,33 +89,6 @@ def collection?
object_type&.downcase&.chomp == 'collection'
end

##
# Take a filename and:
# 1) Check that it exists. Log it to a missing files log if it doesn't.
# 2) Turn the filename into a file:// url
# 3) Pass it to the actor stack in the remote_files param. This means that
# it will be processed by the CreateWithRemoteFilesActor
# Using the remote_files param to ingest local files is misleading.
# However, it lets us fetch the file from disk in a background job
# instead of creating a Hyrax::UploadedFile object while the CSV is
# being parsed, which gives us a performance advantage.
def remote_files
return [] if collection?
if metadata['File Name'].nil?
File.open(@missing_file_log, 'a') { |file| file.puts "Work #{ark} is missing a filename" }
return []
end
file_name = file_uri_base_path.join(master_file_path).to_s
file_exists = File.exist?(file_name)
return_value = []
if file_exists
return_value = [{ url: file_uri_for(name: metadata['File Name']) }]
else
File.open(@missing_file_log, 'a') { |file| file.puts "Work #{ark} has an invalid file: #{file_name} not found" }
end
return_value
end

def visibility
value_from_csv = metadata['Visibility']&.squish&.downcase
visibility_mapping.fetch(value_from_csv, Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PUBLIC)
Expand Down Expand Up @@ -285,18 +256,4 @@ def in_works_ids
record_page_sequence
[parent_work.id]
end

private

def file_uri_for(name:)
uri = URI('file:///')
uri.path = file_uri_base_path.join(name).to_s
uri.to_s
end

# Prefer the import_file_path that's been explicitly passed to this instance of CalifornicaMapper
# if it exists.
def file_uri_base_path
Pathname.new(@import_file_path)
end
end
32 changes: 0 additions & 32 deletions app/jobs/hyrax/characterize_job.rb

This file was deleted.

33 changes: 0 additions & 33 deletions app/lib/californica/corrupt_file_error.rb

This file was deleted.

16 changes: 0 additions & 16 deletions app/lib/californica/is_valid_image.rb

This file was deleted.

117 changes: 0 additions & 117 deletions config/initializers/characterization_service.rb

This file was deleted.

6 changes: 0 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ services:
- solr
- fedora_test
- solr_test
- fits
env_file:
- ./docker.env
stdin_open: true
Expand Down Expand Up @@ -99,11 +98,6 @@ services:
ports:
- "8985:8983"

fits:
image: harvardlts/fitsservlet_container:latest
ports:
- "8889:8080"

# iiif:
# image: uclalibrary/cantaloupe
# environment:
Expand Down
2 changes: 0 additions & 2 deletions docker.env
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ FEDORA_TEST_BASE_PATH=/test
FEDORA_TEST_URL=http://fedora_test:8080/rest
FEDORA_URL=http://fedora:8080/rest
FEDORA_USER=fedoraAdmin
FITS_SERVLET_URL=http://fits:8080/fits
GEONAMES_USERNAME=
IMPORT_FILE_PATH=/opt/data
MISSING_FILE_LOG=log/missing_files.log
RAILS_HOST=localhost
RAILS_SERVE_STATIC_FILES=true
REDIS_CABLE_DB=1
Expand Down
14 changes: 0 additions & 14 deletions spec/importers/californica_importer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@
let(:user) { FactoryBot.create(:admin) }
let(:csv_path) { 'spec/fixtures/example.csv' }

# Cleanup log files after each test run
after do
File.delete(ENV['MISSING_FILE_LOG']) if File.exist?(ENV['MISSING_FILE_LOG'])
end

describe 'CSV import' do
it 'has an import_file_path' do
expect(importer.import_file_path).to eq csv_import.import_file_path
Expand Down Expand Up @@ -116,14 +111,5 @@
end
end
end

context 'when the image file is missing' do
let(:csv_path) { 'spec/fixtures/example-missingimage.csv' }

it "records missing files" do
importer.import
expect(File.readlines(ENV['MISSING_FILE_LOG']).each(&:chomp!).last).to match(/missing_file.tif/)
end
end
end
end
Loading

0 comments on commit 09537e5

Please sign in to comment.