diff --git a/Cargo.lock b/Cargo.lock index 8efe2cf..ea7e198 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2242,7 +2242,7 @@ dependencies = [ [[package]] name = "redacter" -version = "0.5.0" +version = "0.5.1" dependencies = [ "async-recursion", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index b1dd937..7e94ead 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "redacter" -version = "0.5.0" +version = "0.5.1" edition = "2021" authors = ["Abdulla Abdurakhmanov "] license = "Apache-2.0" diff --git a/src/commands/copy_command.rs b/src/commands/copy_command.rs index 4e649c3..7893569 100644 --- a/src/commands/copy_command.rs +++ b/src/commands/copy_command.rs @@ -3,7 +3,9 @@ use crate::filesystems::{ AbsoluteFilePath, DetectFileSystem, FileMatcher, FileMatcherResult, FileSystemConnection, FileSystemRef, }; -use crate::redacters::{RedactSupportedOptions, Redacter, RedacterOptions, Redacters}; +use crate::redacters::{ + RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterOptions, Redacters, +}; use crate::reporter::AppReporter; use crate::AppResult; use console::{Style, Term}; @@ -81,7 +83,10 @@ pub async fn command_copy( let mut destination_fs = DetectFileSystem::open(destination, &app_reporter).await?; let maybe_redacter = match redacter_options { - Some(options) => Some(Redacters::new_redacter(options, &app_reporter).await?), + Some(options) => Some(( + options.base_options, + Redacters::new_redacter(options.provider_options, &app_reporter).await?, + )), None => None, }; @@ -174,7 +179,7 @@ async fn transfer_and_redact_file< source_fs: &mut SFS, destination_fs: &mut DFS, options: &CopyCommandOptions, - redacter: &Option, + redacter: &Option<(RedacterBaseOptions, impl Redacter)>, ) -> AppResult { let bold_style = Style::new().bold().white(); let (base_file_ref, source_reader) = source_fs.download(source_file_ref).await?; @@ -208,7 +213,7 @@ async fn transfer_and_redact_file< ) .as_str(), ); - let transfer_result = if let Some(ref redacter) = redacter { + let transfer_result = if let Some(ref redacter_with_options) = redacter { redact_upload_file::( bar, destination_fs, @@ -216,7 +221,7 @@ async fn transfer_and_redact_file< source_reader, &base_resolved_file_ref, file_ref, - redacter, + redacter_with_options, ) .await? } else { @@ -241,12 +246,14 @@ async fn redact_upload_file< source_reader: S, base_resolved_file_ref: &AbsoluteFilePath, dest_file_ref: &FileSystemRef, - redacter: &impl Redacter, + redacter_with_options: &(RedacterBaseOptions, impl Redacter), ) -> AppResult { + let (redacter_base_options, redacter) = redacter_with_options; let redacter_supported_options = redacter.redact_supported_options(dest_file_ref).await?; if redacter_supported_options != RedactSupportedOptions::Unsupported { match crate::redacters::redact_stream( redacter, + redacter_base_options, &redacter_supported_options, source_reader, dest_file_ref, @@ -273,7 +280,7 @@ async fn redact_upload_file< Ok(TransferFileResult::Skipped) } } - } else if redacter.options().allow_unsupported_copies { + } else if redacter_base_options.allow_unsupported_copies { bar.println( format!( "Still copying {} {} because it is allowed by arguments", diff --git a/src/redacters/aws_comprehend.rs b/src/redacters/aws_comprehend.rs index a158a4f..723bd1a 100644 --- a/src/redacters/aws_comprehend.rs +++ b/src/redacters/aws_comprehend.rs @@ -1,8 +1,7 @@ use crate::errors::AppError; use crate::filesystems::FileSystemRef; use crate::redacters::{ - RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem, - RedacterDataItemContent, Redacters, + RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters, }; use crate::reporter::AppReporter; use crate::AppResult; @@ -17,13 +16,11 @@ pub struct AwsComprehendRedacterOptions { #[derive(Clone)] pub struct AwsComprehendRedacter<'a> { client: aws_sdk_comprehend::Client, - base_options: RedacterBaseOptions, reporter: &'a AppReporter<'a>, } impl<'a> AwsComprehendRedacter<'a> { pub async fn new( - base_options: RedacterBaseOptions, aws_dlp_options: AwsComprehendRedacterOptions, reporter: &'a AppReporter<'a>, ) -> AppResult { @@ -33,11 +30,7 @@ impl<'a> AwsComprehendRedacter<'a> { .or_default_provider(); let shared_config = aws_config::from_env().region(region_provider).load().await; let client = aws_sdk_comprehend::Client::new(&shared_config); - Ok(Self { - client, - base_options, - reporter, - }) + Ok(Self { client, reporter }) } pub async fn redact_text_file( @@ -113,10 +106,6 @@ impl<'a> Redacter for AwsComprehendRedacter<'a> { _ => RedactSupportedOptions::Unsupported, }) } - - fn options(&self) -> &RedacterBaseOptions { - &self.base_options - } } #[allow(unused_imports)] @@ -142,15 +131,7 @@ mod tests { let content = RedacterDataItemContent::Value(test_content.to_string()); let input = RedacterDataItem { file_ref, content }; - let redacter_options = RedacterBaseOptions { - allow_unsupported_copies: false, - csv_headers_disable: false, - csv_delimiter: None, - sampling_size: None, - }; - let redacter = AwsComprehendRedacter::new( - redacter_options, AwsComprehendRedacterOptions { region: Some(Region::new(test_aws_region)), }, diff --git a/src/redacters/gcp_dlp.rs b/src/redacters/gcp_dlp.rs index 4dd95ae..ff8424f 100644 --- a/src/redacters/gcp_dlp.rs +++ b/src/redacters/gcp_dlp.rs @@ -2,8 +2,7 @@ use crate::common_types::GcpProjectId; use crate::errors::AppError; use crate::filesystems::FileSystemRef; use crate::redacters::{ - RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem, - RedacterDataItemContent, Redacters, + RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters, }; use crate::reporter::AppReporter; use crate::AppResult; @@ -16,7 +15,6 @@ use rvstruct::ValueStruct; #[derive(Clone)] pub struct GcpDlpRedacter<'a> { client: GoogleApi>, - base_options: RedacterBaseOptions, gcp_dlp_options: GcpDlpRedacterOptions, reporter: &'a AppReporter<'a>, } @@ -50,7 +48,6 @@ impl<'a> GcpDlpRedacter<'a> { "ENCRYPTION_KEY", ]; pub async fn new( - base_options: RedacterBaseOptions, gcp_dlp_options: GcpDlpRedacterOptions, reporter: &'a AppReporter<'a>, ) -> AppResult { @@ -59,7 +56,6 @@ impl<'a> GcpDlpRedacter<'a> { .await?; Ok(GcpDlpRedacter { client, - base_options, gcp_dlp_options, reporter, }) @@ -230,10 +226,6 @@ impl<'a> Redacter for GcpDlpRedacter<'a> { }, ) } - - fn options(&self) -> &RedacterBaseOptions { - &self.base_options - } } impl TryInto for RedacterDataItemContent { @@ -402,15 +394,7 @@ mod tests { let content = RedacterDataItemContent::Value(test_content.to_string()); let input = RedacterDataItem { file_ref, content }; - let redacter_options = RedacterBaseOptions { - allow_unsupported_copies: false, - csv_headers_disable: false, - csv_delimiter: None, - sampling_size: None, - }; - let redacter = GcpDlpRedacter::new( - redacter_options, GcpDlpRedacterOptions { project_id: GcpProjectId::new(test_gcp_project_id), }, diff --git a/src/redacters/gemini_llm.rs b/src/redacters/gemini_llm.rs index ab201ce..ed8f2d2 100644 --- a/src/redacters/gemini_llm.rs +++ b/src/redacters/gemini_llm.rs @@ -2,8 +2,7 @@ use crate::common_types::GcpProjectId; use crate::errors::AppError; use crate::filesystems::FileSystemRef; use crate::redacters::{ - RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem, - RedacterDataItemContent, Redacters, + RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters, }; use crate::reporter::AppReporter; use crate::AppResult; @@ -24,7 +23,6 @@ pub struct GeminiLlmModelName(String); #[derive(Clone)] pub struct GeminiLlmRedacter<'a> { client: GoogleApi>, - base_options: RedacterBaseOptions, gemini_llm_options: crate::redacters::GeminiLlmRedacterOptions, reporter: &'a AppReporter<'a>, } @@ -33,7 +31,6 @@ impl<'a> GeminiLlmRedacter<'a> { const DEFAULT_GEMINI_MODEL: &'static str = "models/gemini-1.5-flash"; pub async fn new( - base_options: RedacterBaseOptions, gemini_llm_options: GeminiLlmRedacterOptions, reporter: &'a AppReporter<'a>, ) -> AppResult { @@ -47,7 +44,6 @@ impl<'a> GeminiLlmRedacter<'a> { ).await?; Ok(GeminiLlmRedacter { client, - base_options, gemini_llm_options, reporter, }) @@ -198,10 +194,6 @@ impl<'a> Redacter for GeminiLlmRedacter<'a> { _ => RedactSupportedOptions::Unsupported, }) } - - fn options(&self) -> &RedacterBaseOptions { - &self.base_options - } } #[allow(unused_imports)] @@ -228,15 +220,7 @@ mod tests { let content = RedacterDataItemContent::Value(test_content.to_string()); let input = RedacterDataItem { file_ref, content }; - let redacter_options = RedacterBaseOptions { - allow_unsupported_copies: false, - csv_headers_disable: false, - csv_delimiter: None, - sampling_size: None, - }; - let redacter = GeminiLlmRedacter::new( - redacter_options, GeminiLlmRedacterOptions { project_id: GcpProjectId::new(test_gcp_project_id), gemini_model: None, diff --git a/src/redacters/mod.rs b/src/redacters/mod.rs index 4554c50..d9e8e51 100644 --- a/src/redacters/mod.rs +++ b/src/redacters/mod.rs @@ -87,25 +87,24 @@ impl Display for RedacterOptions { impl<'a> Redacters<'a> { pub async fn new_redacter( - redacter_options: RedacterOptions, + provider_options: RedacterProviderOptions, reporter: &'a AppReporter<'a>, ) -> AppResult { - match redacter_options.provider_options { + match provider_options { RedacterProviderOptions::GcpDlp(options) => Ok(Redacters::GcpDlp( - GcpDlpRedacter::new(redacter_options.base_options, options, reporter).await?, + GcpDlpRedacter::new(options, reporter).await?, )), RedacterProviderOptions::AwsComprehend(options) => Ok(Redacters::AwsComprehendDlp( - AwsComprehendRedacter::new(redacter_options.base_options, options, reporter) - .await?, + AwsComprehendRedacter::new(options, reporter).await?, )), RedacterProviderOptions::MsPresidio(options) => Ok(Redacters::MsPresidio( - MsPresidioRedacter::new(redacter_options.base_options, options, reporter).await?, + MsPresidioRedacter::new(options, reporter).await?, )), RedacterProviderOptions::GeminiLlm(options) => Ok(Redacters::GeminiLlm( - GeminiLlmRedacter::new(redacter_options.base_options, options, reporter).await?, + GeminiLlmRedacter::new(options, reporter).await?, )), RedacterProviderOptions::OpenAiLlm(options) => Ok(Redacters::OpenAiLlm( - OpenAiLlmRedacter::new(redacter_options.base_options, options, reporter).await?, + OpenAiLlmRedacter::new(options, reporter).await?, )), } } @@ -149,8 +148,6 @@ pub trait Redacter { &self, file_ref: &FileSystemRef, ) -> AppResult; - - fn options(&self) -> &RedacterBaseOptions; } impl<'a> Redacter for Redacters<'a> { @@ -178,22 +175,13 @@ impl<'a> Redacter for Redacters<'a> { Redacters::OpenAiLlm(redacter) => redacter.redact_supported_options(file_ref).await, } } - - fn options(&self) -> &RedacterBaseOptions { - match self { - Redacters::GcpDlp(redacter) => redacter.options(), - Redacters::AwsComprehendDlp(redacter) => redacter.options(), - Redacters::MsPresidio(redacter) => redacter.options(), - Redacters::GeminiLlm(redacter) => redacter.options(), - Redacters::OpenAiLlm(redacter) => redacter.options(), - } - } } pub async fn redact_stream< S: Stream> + Send + Unpin + Sync + 'static, >( redacter: &impl Redacter, + redacter_base_options: &RedacterBaseOptions, supported_options: &RedactSupportedOptions, input: S, file_ref: &FileSystemRef, @@ -210,7 +198,7 @@ pub async fn redact_stream< String::from_utf8(all_bytes).map_err(|e| AppError::SystemError { message: format!("Failed to convert bytes to string: {}", e), })?; - let content = if let Some(sampling_size) = redacter.options().sampling_size { + let content = if let Some(sampling_size) = redacter_base_options.sampling_size { let sampling_size = std::cmp::min(sampling_size, whole_content.len()); whole_content .chars() @@ -240,17 +228,16 @@ pub async fn redact_stream< input.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)), ); let mut reader = csv_async::AsyncReaderBuilder::default() - .has_headers(!redacter.options().csv_headers_disable) + .has_headers(!redacter_base_options.csv_headers_disable) .delimiter( - redacter - .options() + redacter_base_options .csv_delimiter .as_ref() .cloned() .unwrap_or(b','), ) .create_reader(reader); - let headers = if !redacter.options().csv_headers_disable { + let headers = if !redacter_base_options.csv_headers_disable { reader .headers() .await? diff --git a/src/redacters/ms_presidio.rs b/src/redacters/ms_presidio.rs index 49433ce..fb74ae9 100644 --- a/src/redacters/ms_presidio.rs +++ b/src/redacters/ms_presidio.rs @@ -5,8 +5,7 @@ use url::Url; use crate::errors::AppError; use crate::filesystems::FileSystemRef; use crate::redacters::{ - RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem, - RedacterDataItemContent, Redacters, + RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters, }; use crate::reporter::AppReporter; use crate::AppResult; @@ -21,7 +20,6 @@ pub struct MsPresidioRedacterOptions { pub struct MsPresidioRedacter<'a> { client: reqwest::Client, ms_presidio_options: MsPresidioRedacterOptions, - base_options: RedacterBaseOptions, reporter: &'a AppReporter<'a>, } @@ -44,7 +42,6 @@ impl<'a> MsPresidioRedacter<'a> { const DISALLOW_ENTITY_TYPES: [&'static str; 1] = ["US_DRIVER_LICENSE"]; pub async fn new( - base_options: RedacterBaseOptions, ms_presidio_options: MsPresidioRedacterOptions, reporter: &'a AppReporter<'a>, ) -> AppResult { @@ -52,7 +49,6 @@ impl<'a> MsPresidioRedacter<'a> { Ok(Self { client, ms_presidio_options, - base_options, reporter, }) } @@ -214,10 +210,6 @@ impl<'a> Redacter for MsPresidioRedacter<'a> { _ => RedactSupportedOptions::Unsupported, }) } - - fn options(&self) -> &RedacterBaseOptions { - &self.base_options - } } #[allow(unused_imports)] @@ -249,15 +241,7 @@ mod tests { let content = RedacterDataItemContent::Value(test_content.to_string()); let input = RedacterDataItem { file_ref, content }; - let redacter_options = RedacterBaseOptions { - allow_unsupported_copies: false, - csv_headers_disable: false, - csv_delimiter: None, - sampling_size: None, - }; - let redacter = MsPresidioRedacter::new( - redacter_options, MsPresidioRedacterOptions { text_analyze_url: Some(test_analyze_url), image_redact_url: None, diff --git a/src/redacters/open_ai_llm.rs b/src/redacters/open_ai_llm.rs index 188848b..aa10a6c 100644 --- a/src/redacters/open_ai_llm.rs +++ b/src/redacters/open_ai_llm.rs @@ -5,8 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::errors::AppError; use crate::filesystems::FileSystemRef; use crate::redacters::{ - RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem, - RedacterDataItemContent, Redacters, + RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters, }; use crate::reporter::AppReporter; use crate::AppResult; @@ -27,7 +26,6 @@ pub struct OpenAiLlmRedacterOptions { pub struct OpenAiLlmRedacter<'a> { client: reqwest::Client, open_ai_llm_options: OpenAiLlmRedacterOptions, - base_options: RedacterBaseOptions, reporter: &'a AppReporter<'a>, } @@ -57,7 +55,6 @@ impl<'a> OpenAiLlmRedacter<'a> { const DEFAULT_MODEL: &'static str = "gpt-4o-mini"; pub async fn new( - base_options: RedacterBaseOptions, open_ai_llm_options: OpenAiLlmRedacterOptions, reporter: &'a AppReporter<'a>, ) -> AppResult { @@ -65,7 +62,6 @@ impl<'a> OpenAiLlmRedacter<'a> { Ok(Self { client, open_ai_llm_options, - base_options, reporter, }) } @@ -175,10 +171,6 @@ impl<'a> Redacter for OpenAiLlmRedacter<'a> { _ => RedactSupportedOptions::Unsupported, }) } - - fn options(&self) -> &RedacterBaseOptions { - &self.base_options - } } #[allow(unused_imports)] @@ -207,15 +199,7 @@ mod tests { let content = RedacterDataItemContent::Value(test_content.to_string()); let input = RedacterDataItem { file_ref, content }; - let redacter_options = RedacterBaseOptions { - allow_unsupported_copies: false, - csv_headers_disable: false, - csv_delimiter: None, - sampling_size: None, - }; - let redacter = OpenAiLlmRedacter::new( - redacter_options, OpenAiLlmRedacterOptions { api_key: test_api_key.into(), model: None,