Skip to content

Commit

Permalink
CAL-939 Validate normalized date column in CSVs (#865)
Browse files Browse the repository at this point in the history
  • Loading branch information
sourcefilter authored Feb 9, 2021
1 parent 94909a2 commit 0f26e11
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
6 changes: 6 additions & 0 deletions app/uploaders/csv_manifest_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def initialize(manifest_uploader)
@errors = []
@warnings = []
@mapper = CalifornicaMapper.new

# This is a hack bc WorkIndexer is supposed to be initialized with a Hyrax object, not a Mapper. It works for now bc both support the 'normalized_date' method, which is all we're initially using, but be very careful about using it for anything else.
@indexer = WorkIndexer.new(@mapper)
end

# Errors and warnings for the CSV file.
Expand Down Expand Up @@ -188,6 +191,9 @@ def validate_records
this_row_warnings << "Rows contain a File Name that does not exist. Incorrect values may be imported." unless File.exist?(full_path)
end

# Row has improperly formatted date values
this_row_warnings << "Rows contain unparsable values for 'normalized_date'." if @mapper.normalized_date.to_a.length != @indexer.solr_dates.to_a.length

this_row_warnings.each do |warning|
# +1 for 0-based indexing, +1 for skipped headers
row_warnings[warning] << i + 2
Expand Down
3 changes: 3 additions & 0 deletions spec/fixtures/example-baddates.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Item ARK,AltIdentifier.local,Title,Subject,Type.typeOfResource,Publisher.publisherName,Format.medium,Name.repository,Description.note,Format.extent,Project Name,Description.latitude,Description.longitude,Date.creation,Description.caption,Format.dimensions,Description.fundingNote,Type.genre,Rights.rightsHolderContact,Rights.countryCreation,Date.normalized,File Name,Coverage.geographic,Name.subject
21199/zz0002nq4w,bluclalat_1387_b107_40098,"Picture of Missing Image, Calif., 1947",Express highways--California--Los Angeles County--Design and construction|~|Eminent domain--California--Los Angeles|~|Demonstrations--California--Los Angeles County|~|Transportation|~|Government|~|Activism|~|Interstate 10,still image,Los Angeles Daily News,1 photograph,"University of California, Los Angeles. $b Library Special Collections","At the Hall of Records, 220 N. Broadway.|~|Description 2",1 photo,Los Angeles Daily News Negatives,34.054133,-118.243865,"September 17, 1947",This example does not have a caption.,10 x 12.5 cm.,Info about funding,news photographs,"UCLA Charles E. Young Research Library Department of Special Collections, A1713 Young Research Library, Box 951575, Los Angeles, CA 90095-1575. E-mail: [email protected]. Phone: (310)825-4988",US,1930-121-31,,Los Angeles (Calif.),Los Angeles County (Calif.). $b Board of Supervisors
21199/zz0002nq4w,bluclalat_1387_b107_40098,"Picture of Missing Image, Calif., 1947",Express highways--California--Los Angeles County--Design and construction|~|Eminent domain--California--Los Angeles|~|Demonstrations--California--Los Angeles County|~|Transportation|~|Government|~|Activism|~|Interstate 10,still image,Los Angeles Daily News,1 photograph,"University of California, Los Angeles. $b Library Special Collections","At the Hall of Records, 220 N. Broadway.|~|Description 2",1 photo,Los Angeles Daily News Negatives,34.054133,-118.243865,"September 17, 1947",This example does not have a caption.,10 x 12.5 cm.,Info about funding,news photographs,"UCLA Charles E. Young Research Library Department of Special Collections, A1713 Young Research Library, Box 951575, Los Angeles, CA 90095-1575. E-mail: [email protected]. Phone: (310)825-4988",US,1947-09-17,,Los Angeles (Calif.),Los Angeles County (Calif.). $b Board of Supervisors
14 changes: 12 additions & 2 deletions spec/uploaders/csv_manifest_validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,27 @@
context 'when the csv has a missing file' do
let(:csv_file) { 'spec/fixtures/example-missingimage.csv' }
let(:path) { File.join(ENV['IMPORT_FILE_PATH'], 'Masters/dlmasters/missing_file.tif') }
let(:warning_text) { "Row 2: Rows contain a File Name that does not exist. Incorrect values may be imported." }

it 'has warnings' do
allow(File).to receive(:exist?).with(path).and_return(false)
validator.validate
expect(validator.warnings).to include("Row 2: Rows contain a File Name that does not exist. Incorrect values may be imported.")
expect(validator.warnings).to include(warning_text)
end

it 'doesn\'t warn about files that aren\'t missing' do
allow(File).to receive(:exist?).with(path).and_return(true)
validator.validate
expect(validator.warnings).to_not include("Row 2: cannot find '#{path}'")
expect(validator.warnings).to_not include(warning_text)
end
end

context 'when the csv has improperly formatted dates' do
let(:csv_file) { 'spec/fixtures/example-baddates.csv' }

it 'warns about the bad dates, not about the good' do
validator.validate
expect(validator.warnings).to contain_exactly("Row 2: Rows contain unparsable values for 'normalized_date'.")
end
end

Expand Down

0 comments on commit 0f26e11

Please sign in to comment.