Skip to content

Commit

Permalink
Add configuration option to skip_full_text_extract default to false
Browse files Browse the repository at this point in the history
  • Loading branch information
Collier committed Jun 13, 2017
1 parent 0b82da5 commit a9b309d
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 4 deletions.
16 changes: 12 additions & 4 deletions app/services/hyrax/file_set_derivatives_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,15 @@ def supported_mime_types
def create_pdf_derivatives(filename)
Hydra::Derivatives::PdfDerivatives.create(filename,
outputs: [{ label: :thumbnail, format: 'jpg', size: '338x493', url: derivative_url('thumbnail') }])
Hydra::Derivatives::FullTextExtract.create(filename,
outputs: [{ url: uri, container: "extracted_text" }])
extract_full_text(filename, uri)
end

def create_office_document_derivatives(filename)
Hydra::Derivatives::DocumentDerivatives.create(filename,
outputs: [{ label: :thumbnail, format: 'jpg',
size: '200x150>',
url: derivative_url('thumbnail') }])
Hydra::Derivatives::FullTextExtract.create(filename,
outputs: [{ url: uri, container: "extracted_text" }])
extract_full_text(filename, uri)
end

def create_audio_derivatives(filename)
Expand All @@ -83,5 +81,15 @@ def create_image_derivatives(filename)
def derivative_path_factory
Hyrax::DerivativePath
end

# Calls the Hydra::Derivates::FulltextExtraction unless the extract_full_text
# configuration option is set to false
# @param [String] filename of the object to be used for full text extraction
# @param [String] uri to the file set (deligated to file_set)
def extract_full_text(filename, uri)
return unless Hyrax.config.extract_full_text?
Hydra::Derivatives::FullTextExtract.create(filename,
outputs: [{ url: uri, container: "extracted_text" }])
end
end
end
4 changes: 4 additions & 0 deletions lib/generators/hyrax/templates/config/hyrax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@
# Path to the file derivatives creation tool
# config.libreoffice_path = "soffice"

# Option to enable/disable full text extraction from PDFs
# Default is true, set to false to disable full text extraction
# config.extract_full_text = true

# How many seconds back from the current time that we should show by default of the user's activity on the user's dashboard
# config.activity_to_show_default_seconds_since_now = 24*60*60

Expand Down
6 changes: 6 additions & 0 deletions lib/hyrax/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,12 @@ def subject_prefix
@subject_prefix ||= "Contact form:"
end

attr_writer :extract_full_text
def extract_full_text?
return @extract_full_text unless @extract_full_text.nil?
@extract_full_text = true
end

private

# @param [Symbol, #to_s] model_name - symbol representing the model
Expand Down
1 change: 1 addition & 0 deletions spec/lib/hyrax/configuration_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@
it { is_expected.to respond_to(:translate_uri_to_id) }
it { is_expected.to respond_to(:upload_path) }
it { is_expected.to respond_to(:work_requires_files?) }
it { is_expected.to respond_to(:extract_full_text?) }
end

0 comments on commit a9b309d

Please sign in to comment.