From 0f26e11e48ecf4170ecac6d8d277b309a5bdb426 Mon Sep 17 00:00:00 2001 From: Andy Wallace Date: Tue, 9 Feb 2021 13:15:24 -0800 Subject: [PATCH] CAL-939 Validate normalized date column in CSVs (#865) --- app/uploaders/csv_manifest_validator.rb | 6 ++++++ spec/fixtures/example-baddates.csv | 3 +++ spec/uploaders/csv_manifest_validator_spec.rb | 14 ++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 spec/fixtures/example-baddates.csv diff --git a/app/uploaders/csv_manifest_validator.rb b/app/uploaders/csv_manifest_validator.rb index 1ef52aea..07663409 100644 --- a/app/uploaders/csv_manifest_validator.rb +++ b/app/uploaders/csv_manifest_validator.rb @@ -70,6 +70,9 @@ def initialize(manifest_uploader) @errors = [] @warnings = [] @mapper = CalifornicaMapper.new + + # This is a hack bc WorkIndexer is supposed to be initialized with a Hyrax object, not a Mapper. It works for now bc both support the 'normalized_date' method, which is all we're initially using, but be very careful about using it for anything else. + @indexer = WorkIndexer.new(@mapper) end # Errors and warnings for the CSV file. @@ -188,6 +191,9 @@ def validate_records this_row_warnings << "Rows contain a File Name that does not exist. Incorrect values may be imported." unless File.exist?(full_path) end + # Row has improperly formatted date values + this_row_warnings << "Rows contain unparsable values for 'normalized_date'." if @mapper.normalized_date.to_a.length != @indexer.solr_dates.to_a.length + this_row_warnings.each do |warning| # +1 for 0-based indexing, +1 for skipped headers row_warnings[warning] << i + 2 diff --git a/spec/fixtures/example-baddates.csv b/spec/fixtures/example-baddates.csv new file mode 100644 index 00000000..6797b6d7 --- /dev/null +++ b/spec/fixtures/example-baddates.csv @@ -0,0 +1,3 @@ +Item ARK,AltIdentifier.local,Title,Subject,Type.typeOfResource,Publisher.publisherName,Format.medium,Name.repository,Description.note,Format.extent,Project Name,Description.latitude,Description.longitude,Date.creation,Description.caption,Format.dimensions,Description.fundingNote,Type.genre,Rights.rightsHolderContact,Rights.countryCreation,Date.normalized,File Name,Coverage.geographic,Name.subject +21199/zz0002nq4w,bluclalat_1387_b107_40098,"Picture of Missing Image, Calif., 1947",Express highways--California--Los Angeles County--Design and construction|~|Eminent domain--California--Los Angeles|~|Demonstrations--California--Los Angeles County|~|Transportation|~|Government|~|Activism|~|Interstate 10,still image,Los Angeles Daily News,1 photograph,"University of California, Los Angeles. $b Library Special Collections","At the Hall of Records, 220 N. Broadway.|~|Description 2",1 photo,Los Angeles Daily News Negatives,34.054133,-118.243865,"September 17, 1947",This example does not have a caption.,10 x 12.5 cm.,Info about funding,news photographs,"UCLA Charles E. Young Research Library Department of Special Collections, A1713 Young Research Library, Box 951575, Los Angeles, CA 90095-1575. E-mail: spec-coll@library.ucla.edu. Phone: (310)825-4988",US,1930-121-31,,Los Angeles (Calif.),Los Angeles County (Calif.). $b Board of Supervisors +21199/zz0002nq4w,bluclalat_1387_b107_40098,"Picture of Missing Image, Calif., 1947",Express highways--California--Los Angeles County--Design and construction|~|Eminent domain--California--Los Angeles|~|Demonstrations--California--Los Angeles County|~|Transportation|~|Government|~|Activism|~|Interstate 10,still image,Los Angeles Daily News,1 photograph,"University of California, Los Angeles. $b Library Special Collections","At the Hall of Records, 220 N. Broadway.|~|Description 2",1 photo,Los Angeles Daily News Negatives,34.054133,-118.243865,"September 17, 1947",This example does not have a caption.,10 x 12.5 cm.,Info about funding,news photographs,"UCLA Charles E. Young Research Library Department of Special Collections, A1713 Young Research Library, Box 951575, Los Angeles, CA 90095-1575. E-mail: spec-coll@library.ucla.edu. Phone: (310)825-4988",US,1947-09-17,,Los Angeles (Calif.),Los Angeles County (Calif.). $b Board of Supervisors diff --git a/spec/uploaders/csv_manifest_validator_spec.rb b/spec/uploaders/csv_manifest_validator_spec.rb index ebf0e6a7..984fcaef 100644 --- a/spec/uploaders/csv_manifest_validator_spec.rb +++ b/spec/uploaders/csv_manifest_validator_spec.rb @@ -123,17 +123,27 @@ context 'when the csv has a missing file' do let(:csv_file) { 'spec/fixtures/example-missingimage.csv' } let(:path) { File.join(ENV['IMPORT_FILE_PATH'], 'Masters/dlmasters/missing_file.tif') } + let(:warning_text) { "Row 2: Rows contain a File Name that does not exist. Incorrect values may be imported." } it 'has warnings' do allow(File).to receive(:exist?).with(path).and_return(false) validator.validate - expect(validator.warnings).to include("Row 2: Rows contain a File Name that does not exist. Incorrect values may be imported.") + expect(validator.warnings).to include(warning_text) end it 'doesn\'t warn about files that aren\'t missing' do allow(File).to receive(:exist?).with(path).and_return(true) validator.validate - expect(validator.warnings).to_not include("Row 2: cannot find '#{path}'") + expect(validator.warnings).to_not include(warning_text) + end + end + + context 'when the csv has improperly formatted dates' do + let(:csv_file) { 'spec/fixtures/example-baddates.csv' } + + it 'warns about the bad dates, not about the good' do + validator.validate + expect(validator.warnings).to contain_exactly("Row 2: Rows contain unparsable values for 'normalized_date'.") end end