From 02238053b5c4439825111c1d834216ab305ef03f Mon Sep 17 00:00:00 2001 From: Abdulla Abdurakhmanov Date: Thu, 15 Aug 2024 13:22:23 +0200 Subject: [PATCH] Support user defined InfoTypes for GCP DLP redacter --- src/args.rs | 23 +++++++++++-- src/redacters/gcp_dlp.rs | 70 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/src/args.rs b/src/args.rs index 22fbf34..beb5900 100644 --- a/src/args.rs +++ b/src/args.rs @@ -118,18 +118,27 @@ pub struct RedacterArgs { #[arg(short = 'd', long, value_enum, help = "List of redacters to use")] redact: Option>, + #[arg( + long, + help = "Allow unsupported types to be copied without redaction", + default_value = "false" + )] + pub allow_unsupported_copies: bool, + #[arg( long, help = "GCP project id that will be used to redact and bill API calls" )] pub gcp_project_id: Option, + #[arg(long, help = "Additional GCP DLP built in info types for redaction")] + pub gcp_dlp_built_in_info_type: Option>, + #[arg( long, - help = "Allow unsupported types to be copied without redaction", - default_value = "false" + help = "Additional GCP DLP user defined stored info types for redaction" )] - pub allow_unsupported_copies: bool, + pub gcp_dlp_stored_info_type: Option>, #[arg( long, @@ -184,6 +193,14 @@ impl TryInto for RedacterArgs { Some(ref project_id) => { Ok(RedacterProviderOptions::GcpDlp(GcpDlpRedacterOptions { project_id: project_id.clone(), + user_defined_built_in_info_types: self + .gcp_dlp_built_in_info_type + .clone() + .unwrap_or_default(), + user_defined_stored_info_types: self + .gcp_dlp_stored_info_type + .clone() + .unwrap_or_default(), })) } None => Err(AppError::RedacterConfigError { diff --git a/src/redacters/gcp_dlp.rs b/src/redacters/gcp_dlp.rs index 2cd605b..dbe0ca9 100644 --- a/src/redacters/gcp_dlp.rs +++ b/src/redacters/gcp_dlp.rs @@ -12,6 +12,7 @@ use gcloud_sdk::tonic::metadata::MetadataValue; use gcloud_sdk::{tonic, GoogleApi, GoogleAuthMiddleware}; use mime::Mime; use rvstruct::ValueStruct; +use std::collections::HashSet; use tokio_util::bytes; #[derive(Clone)] @@ -25,6 +26,8 @@ pub struct GcpDlpRedacter<'a> { #[derive(Debug, Clone)] pub struct GcpDlpRedacterOptions { pub project_id: GcpProjectId, + pub user_defined_built_in_info_types: Vec, + pub user_defined_stored_info_types: Vec, } impl<'a> GcpDlpRedacter<'a> { @@ -71,8 +74,8 @@ impl<'a> GcpDlpRedacter<'a> { "projects/{}/locations/global", self.gcp_dlp_options.project_id.value() ), - inspect_config: Some(Self::create_inspect_config()), - deidentify_config: Some(Self::create_deidentify_config()), + inspect_config: Some(self.create_inspect_config()), + deidentify_config: Some(self.create_deidentify_config()), item: Some(input.content.try_into()?), ..gcloud_sdk::google::privacy::dlp::v2::DeidentifyContentRequest::default() }, @@ -108,7 +111,7 @@ impl<'a> GcpDlpRedacter<'a> { "projects/{}/locations/global", self.gcp_dlp_options.project_id.value() ), - inspect_config: Some(Self::create_inspect_config()), + inspect_config: Some(self.create_inspect_config()), byte_item: Some(input_bytes_content), ..gcloud_sdk::google::privacy::dlp::v2::RedactImageRequest::default() }); @@ -142,26 +145,63 @@ impl<'a> GcpDlpRedacter<'a> { } } - fn create_inspect_config() -> gcloud_sdk::google::privacy::dlp::v2::InspectConfig { + fn create_inspect_config(&self) -> gcloud_sdk::google::privacy::dlp::v2::InspectConfig { gcloud_sdk::google::privacy::dlp::v2::InspectConfig { - info_types: Self::INFO_TYPES + info_types: self + .create_built_in_info_types() .iter() .map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType { name: v.to_string(), ..gcloud_sdk::google::privacy::dlp::v2::InfoType::default() }) .collect(), + custom_info_types: self + .gcp_dlp_options + .user_defined_stored_info_types + .iter() + .map( + |stored_info_type_name| { + gcloud_sdk::google::privacy::dlp::v2::CustomInfoType { + info_type: Some(gcloud_sdk::google::privacy::dlp::v2::InfoType { + name: stored_info_type_name.clone(), + ..gcloud_sdk::google::privacy::dlp::v2::InfoType::default() + }), + r#type: Some( + gcloud_sdk::google::privacy::dlp::v2::custom_info_type::Type::StoredType( + gcloud_sdk::google::privacy::dlp::v2::StoredType { + name: format!( + "projects/{}/storedInfoTypes/{}", + self.gcp_dlp_options.project_id.value(), + stored_info_type_name + ), + ..gcloud_sdk::google::privacy::dlp::v2::StoredType::default() + }, + ), + ), + ..gcloud_sdk::google::privacy::dlp::v2::CustomInfoType::default() + } + }, + ) + .collect(), ..gcloud_sdk::google::privacy::dlp::v2::InspectConfig::default() } } - fn create_deidentify_config() -> gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig { + fn create_deidentify_config(&self) -> gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig { + let user_stored_info_types_set: HashSet<&str> = self + .gcp_dlp_options + .user_defined_stored_info_types + .iter() + .map(|s| s.as_str()) + .collect(); gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig { transformation: Some(gcloud_sdk::google::privacy::dlp::v2::deidentify_config::Transformation::InfoTypeTransformations( gcloud_sdk::google::privacy::dlp::v2::InfoTypeTransformations { transformations: vec![ gcloud_sdk::google::privacy::dlp::v2::info_type_transformations::InfoTypeTransformation { - info_types: Self::INFO_TYPES.iter().map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType { + info_types: self.create_built_in_info_types().union( + &user_stored_info_types_set + ).collect::>().iter().map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType { name: v.to_string(), ..gcloud_sdk::google::privacy::dlp::v2::InfoType::default() }).collect(), @@ -183,6 +223,20 @@ impl<'a> GcpDlpRedacter<'a> { } } + fn create_built_in_info_types(&self) -> HashSet<&str> { + [ + Self::INFO_TYPES.to_vec(), + self.gcp_dlp_options + .user_defined_built_in_info_types + .iter() + .map(|v| v.as_str()) + .collect(), + ] + .concat() + .into_iter() + .collect() + } + fn check_supported_image_type(mime_type: &Mime) -> bool { Redacters::is_mime_image(mime_type) && (mime_type.subtype() == "png" @@ -410,6 +464,8 @@ mod tests { let redacter = GcpDlpRedacter::new( GcpDlpRedacterOptions { project_id: GcpProjectId::new(test_gcp_project_id), + user_defined_built_in_info_types: vec![], + user_defined_stored_info_types: vec![], }, &reporter, )