Skip to content

Commit

Permalink
CAL-964 check invalid dates (#863)
Browse files Browse the repository at this point in the history
* Rebuild gem file lock

* Check for invalid dates and convert dates to solr dates in code

* Fix rubocop errors

* Add the utc conversion to the spec for solr dates
  • Loading branch information
pghorpade authored Feb 2, 2021
1 parent 91b55f0 commit 94909a2
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 57 deletions.
1 change: 1 addition & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Metrics/ClassLength:
- app/importers/californica_mapper.rb
- app/jobs/hyrax/characterize_job.rb
- app/uploaders/csv_manifest_validator.rb
- app/indexers/work_indexer.rb

Metrics/BlockLength:
Enabled: true
Expand Down
6 changes: 3 additions & 3 deletions app/controllers/csv_imports_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,21 +85,21 @@ def row_times
def min
if @csv_rows.count == @csv_import.record_count
@min_ingest_duration = row_times.minimum(:ingest_duration)
@min_ingest_duration.round(2)
@min_ingest_duration&.round(2)
end
end

def max
if @csv_rows.count == @csv_import.record_count
@max_ingest_duration = row_times.maximum(:ingest_duration)
@max_ingest_duration.round(2)
@max_ingest_duration&.round(2)
end
end

def mean
if @csv_rows.count == @csv_import.record_count
@mean_ingest_duration = row_times.average(:ingest_duration)
@mean_ingest_duration.round(2)
@mean_ingest_duration&.round(2)
end
end

Expand Down
26 changes: 16 additions & 10 deletions app/indexers/work_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class WorkIndexer < Hyrax::WorkIndexer
def generate_solr_document
super.tap do |solr_doc|
solr_doc['combined_subject_ssim'] = combined_subject
solr_doc['date_dtsim'] = solr_dates
add_dates(solr_doc)
solr_doc['geographic_coordinates_ssim'] = coordinates
solr_doc['human_readable_iiif_text_direction_ssi'] = human_readable_iiif_text_direction
solr_doc['human_readable_iiif_viewing_hint_ssi'] = human_readable_iiif_viewing_hint
Expand All @@ -33,6 +33,12 @@ def generate_solr_document
end
end

def add_dates(solr_doc)
valid_dates = solr_dates
solr_doc['date_dtsim'] = valid_dates if valid_dates
solr_doc['date_dtsort'] = solr_doc['date_dtsim'][0] if solr_doc['date_dtsort']
end

def combined_subject
object.named_subject.to_a + object.subject.to_a + object.subject_topic.to_a + object.subject_geographic.to_a + object.subject_temporal.to_a
end
Expand Down Expand Up @@ -103,22 +109,22 @@ def years

def solr_dates
dates = object.normalized_date.to_a
dates = Array.wrap(dates).flat_map do |date|
validate_date = date.split('/')
validate_date.each do |item|
valid_dates = []
dates.each do |date|
split_dates = date.split('/')
split_dates.each do |item|
item_values = item.split('-')
if item_values.length == 2
Date.strptime(item, "%Y-%m")
valid_dates.push Date.strptime(item, "%Y-%m").to_time.utc.iso8601
elsif item_values.length == 3
Date.strptime(item, "%Y-%m-%d")
valid_dates.push Date.strptime(item, "%Y-%m-%d").to_time.utc.iso8601
else
Date.strptime(item, "%Y")
valid_dates.push Date.strptime(item, "%Y").to_time.utc.iso8601
end
end
validate_date.reverse.join("/")
end.compact.uniq.sort
end
return nil if dates.blank?
dates
valid_dates
rescue ArgumentError => e
# We might want to start reporting metadata errors to Rollbar if we come up with a way to make them searchable and allow them to provide a feedback loop.
# Rollbar.error(e, "Invalid date string encountered in normalized date field: #{date_string}")
Expand Down
41 changes: 0 additions & 41 deletions solr/config/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -200,47 +200,6 @@
</requestHandler>

<updateRequestProcessorChain name="add_modify_fields" default="${update.autoCreateFields:true}">
<processor class="solr.CloneFieldUpdateProcessorFactory">
<str name="source">date_dtsim</str>
<str name="dest">date_dtsort</str>
</processor>

<processor class="solr.processor.FirstFieldValueUpdateProcessorFactory">
<str name="fieldName">date_dtsort</str>
</processor>

<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<arr name="format">
<str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
<str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
<str>yyyy-MM-dd'T'HH:mm:ssZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss</str>
<str>yyyy-MM-dd'T'HH:mmZ</str>
<str>yyyy-MM-dd'T'HH:mm</str>
<str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
<str>yyyy-MM-dd HH:mm:ss.SSS</str>
<str>yyyy-MM-dd HH:mm:ss,SSS</str>
<str>yyyy-MM-dd HH:mm:ssZ</str>
<str>yyyy-MM-dd HH:mm:ss</str>
<str>yyyy-MM-dd HH:mmZ</str>
<str>yyyy-MM-dd HH:mm</str>
<str>yyyy-MM-dd</str>
<str>yyyy-MM</str>
<str>yyyy</str>
<str>yyyy/yyyy</str>
<str>yyyy-MM/yyyy-MM</str>
<str>yyyy-MM-dd/yyyy-MM-dd</str>
<str>yyyy/yyyy-mm</str>
<str>yyyy-mm/yyyy</str>
<str>yyyy/yyyy-mm-dd</str>
<str>yyyy-mm-dd/yyyy</str>
<str>yyyy-mm/yyyy-mm-dd</str>
<str>yyyy-mm-dd/yyyy-mm</str>
</arr>
</processor>

<processor class="solr.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
Expand Down
6 changes: 3 additions & 3 deletions spec/indexers/work_indexer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@

it 'indexes the year' do
expect(solr_document['year_isim']).to eq [1940]
expect(solr_document['date_dtsim']).to eq ['1940']
expect(solr_document['date_dtsim']).to eq [Date.strptime('1940', "%Y").to_time.utc.iso8601]
end
end

Expand Down Expand Up @@ -276,7 +276,7 @@

it 'indexes the earliest year' do
expect(solr_document['sort_year_isi']).to eq 1940
expect(solr_document['date_dtsim']).to eq ['1940-10-15']
expect(solr_document['date_dtsim']).to eq [Date.strptime('1940-10-15', "%Y-%m-%d").to_time.utc.iso8601]
end
end

Expand All @@ -303,7 +303,7 @@

it 'indexes the earliest year' do
expect(solr_document['sort_year_isi']).to eq 1934
expect(solr_document['date_dtsim']).to eq ['1937-07/1934-06']
expect(solr_document['date_dtsim']).to eq [Date.strptime('1934-06', "%Y-%m").to_time.utc.iso8601, Date.strptime('1937-07', "%Y-%m").to_time.utc.iso8601]
end
end

Expand Down

0 comments on commit 94909a2

Please sign in to comment.