Skip to content

Commit

Permalink
CharacterizeJob non-ASCII filename bug
Browse files Browse the repository at this point in the history
  • Loading branch information
conorom committed Jun 30, 2022
1 parent e5c1de5 commit b42b2df
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
2 changes: 1 addition & 1 deletion app/jobs/characterize_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def characterize(file_set, _file_id, filepath) # rubocop:disable Metrics/AbcSize
file_set.date_modified = Hyrax::TimeService.time_in_utc if file_set.characterization_proxy.original_checksum.first != previous_checksum

# set title to label if that's how it was before this characterization
file_set.title = [file_set.characterization_proxy.original_name] if reset_title
file_set.title = [file_set.characterization_proxy.original_name.force_encoding("UTF-8")] if reset_title
# always set the label to the original_name
file_set.label = file_set.characterization_proxy.original_name

Expand Down
26 changes: 23 additions & 3 deletions spec/jobs/characterize_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,13 @@
allow(file_set).to receive(:characterization_proxy).and_call_original
end

context 'title and label were the previously the same' do
context 'title and label were previously the same' do
let(:title) { ['old_filename.jpg'] }
let(:label) { 'old_filename.jpg' }

before do
allow(file_set).to receive_message_chain(:characterization_proxy, :original_name).and_return('new_filename.jpg') # rubocop:disable RSpec/MessageChain
allow(file_set).to receive_message_chain(:characterization_proxy, :original_name)
.and_return(String.new('new_filename.jpg', encoding: 'ASCII-8BIT')) # rubocop:disable RSpec/MessageChain
end

it 'sets title to label' do
Expand All @@ -107,14 +108,33 @@
expect(file_set.title).to eq ['new_filename.jpg']
expect(file_set.label).to eq 'new_filename.jpg'
end

# https://github.com/samvera/hyrax/issues/5671
context 'original_name, which has encoding set to ASCII-8BIT, contains non-ASCII characters' do
before do
allow(file_set).to receive_message_chain(:characterization_proxy, :original_name)
.and_return(String.new('ファイル.txt', encoding: 'ASCII-8BIT')) # rubocop:disable RSpec/MessageChain
end

it 'does not raise an error, and still sets title to label' do
expect(file).to receive(:save!)
expect(file_set).to receive(:update_index)
expect { described_class.perform_now(file_set, file.id) }
.not_to raise_error(Encoding::UndefinedConversionError, '"\xE3" from ASCII-8BIT to UTF-8')
expect(file_set.title).to eq ['ファイル.txt']
expect(file_set.label).to eq 'ファイル.txt'
end
end
end

context 'title and label were not previously the same' do
let(:title) { ['My User-Entered Title'] }
let(:label) { 'old_filename.jpg' }
let(:original_name) { 'new_filename.jpg' }

before do
allow(file_set).to receive_message_chain(:characterization_proxy, :original_name).and_return('new_filename.jpg') # rubocop:disable RSpec/MessageChain
allow(file_set).to receive_message_chain(:characterization_proxy, :original_name)
.and_return(String.new('new_filename.jpg', encoding: 'ASCII-8BIT')) # rubocop:disable RSpec/MessageChain
end

it 'assumes a user-entered title value and leaves title as-is' do
Expand Down

0 comments on commit b42b2df

Please sign in to comment.