From a9b309d00bf37497fb3408bdaef7b72049fa9e14 Mon Sep 17 00:00:00 2001 From: Collier Date: Tue, 13 Jun 2017 15:43:59 -0700 Subject: [PATCH] Add configuration option to skip_full_text_extract default to false --- .../hyrax/file_set_derivatives_service.rb | 16 ++++++++++++---- lib/generators/hyrax/templates/config/hyrax.rb | 4 ++++ lib/hyrax/configuration.rb | 6 ++++++ spec/lib/hyrax/configuration_spec.rb | 1 + 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/app/services/hyrax/file_set_derivatives_service.rb b/app/services/hyrax/file_set_derivatives_service.rb index 1693de04e1..fba8fede68 100644 --- a/app/services/hyrax/file_set_derivatives_service.rb +++ b/app/services/hyrax/file_set_derivatives_service.rb @@ -49,8 +49,7 @@ def supported_mime_types def create_pdf_derivatives(filename) Hydra::Derivatives::PdfDerivatives.create(filename, outputs: [{ label: :thumbnail, format: 'jpg', size: '338x493', url: derivative_url('thumbnail') }]) - Hydra::Derivatives::FullTextExtract.create(filename, - outputs: [{ url: uri, container: "extracted_text" }]) + extract_full_text(filename, uri) end def create_office_document_derivatives(filename) @@ -58,8 +57,7 @@ def create_office_document_derivatives(filename) outputs: [{ label: :thumbnail, format: 'jpg', size: '200x150>', url: derivative_url('thumbnail') }]) - Hydra::Derivatives::FullTextExtract.create(filename, - outputs: [{ url: uri, container: "extracted_text" }]) + extract_full_text(filename, uri) end def create_audio_derivatives(filename) @@ -83,5 +81,15 @@ def create_image_derivatives(filename) def derivative_path_factory Hyrax::DerivativePath end + + # Calls the Hydra::Derivates::FulltextExtraction unless the extract_full_text + # configuration option is set to false + # @param [String] filename of the object to be used for full text extraction + # @param [String] uri to the file set (deligated to file_set) + def extract_full_text(filename, uri) + return unless Hyrax.config.extract_full_text? + Hydra::Derivatives::FullTextExtract.create(filename, + outputs: [{ url: uri, container: "extracted_text" }]) + end end end diff --git a/lib/generators/hyrax/templates/config/hyrax.rb b/lib/generators/hyrax/templates/config/hyrax.rb index bbaffd37d4..067ad2efae 100644 --- a/lib/generators/hyrax/templates/config/hyrax.rb +++ b/lib/generators/hyrax/templates/config/hyrax.rb @@ -74,6 +74,10 @@ # Path to the file derivatives creation tool # config.libreoffice_path = "soffice" + # Option to enable/disable full text extraction from PDFs + # Default is true, set to false to disable full text extraction + # config.extract_full_text = true + # How many seconds back from the current time that we should show by default of the user's activity on the user's dashboard # config.activity_to_show_default_seconds_since_now = 24*60*60 diff --git a/lib/hyrax/configuration.rb b/lib/hyrax/configuration.rb index 7ba50909a0..d2652ca776 100644 --- a/lib/hyrax/configuration.rb +++ b/lib/hyrax/configuration.rb @@ -373,6 +373,12 @@ def subject_prefix @subject_prefix ||= "Contact form:" end + attr_writer :extract_full_text + def extract_full_text? + return @extract_full_text unless @extract_full_text.nil? + @extract_full_text = true + end + private # @param [Symbol, #to_s] model_name - symbol representing the model diff --git a/spec/lib/hyrax/configuration_spec.rb b/spec/lib/hyrax/configuration_spec.rb index 83cd5113c7..7d5f51a9c0 100644 --- a/spec/lib/hyrax/configuration_spec.rb +++ b/spec/lib/hyrax/configuration_spec.rb @@ -55,4 +55,5 @@ it { is_expected.to respond_to(:translate_uri_to_id) } it { is_expected.to respond_to(:upload_path) } it { is_expected.to respond_to(:work_requires_files?) } + it { is_expected.to respond_to(:extract_full_text?) } end