From 7a6d6139d71abb42e0f6c215b92b808068206050 Mon Sep 17 00:00:00 2001 From: Abdulla Abdurakhmanov Date: Mon, 5 Aug 2024 11:08:52 +0200 Subject: [PATCH] List (ls) command implementation --- README.md | 22 ++++++++ src/args.rs | 16 ++++++ src/commands/ls_command.rs | 113 +++++++++++++++++++++++++++++++++++++ src/commands/mod.rs | 3 + src/filesystems/gcs.rs | 15 ++++- src/main.rs | 8 +++ 6 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 src/commands/ls_command.rs diff --git a/README.md b/README.md index 9e8e533..79ce019 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,28 @@ MS Presidio redacter: redacter cp -d ms-presidio --ms-presidio-text-analyze-url http://localhost:5002/analyze --ms-presidio-image-redact-url http://localhost:5003/redact ... ``` +## List (LS) command + +For convenience, the tool also supports listing files in the source directory so you can see what files will be copied: + +``` +Usage: redacter ls [OPTIONS] + +Arguments: + Source directory or file such as /tmp, /tmp/file.txt or gs://bucket/file.txt and others supported providers + +Options: + -m, --max-size-limit Maximum size of files to copy in bytes + -f, --filename-filter Filter by name using glob patterns such as *.txt + -h, --help Print help +``` + +Example: list files in the GCS bucket: + +```sh +redacter ls gs://my-little-bucket/my-big-files/ +``` + ## Security considerations - Your file contents are sent to the DLP API for redaction. Make sure you trust the DLP API provider. diff --git a/src/args.rs b/src/args.rs index 4077618..a22444d 100644 --- a/src/args.rs +++ b/src/args.rs @@ -13,6 +13,7 @@ pub struct CliArgs { } #[derive(Subcommand, Debug)] +#[allow(clippy::large_enum_variant)] pub enum CliCommand { #[command(about = "Copy and redact files from source to destination")] Cp { @@ -36,6 +37,21 @@ pub enum CliCommand { #[command(flatten)] redacter_args: Option, }, + #[command(about = "List files in the source")] + Ls { + #[arg( + help = "Source directory or file such as /tmp, /tmp/file.txt or gs://bucket/file.txt and others supported providers" + )] + source: String, + #[arg(short = 'm', long, help = "Maximum size of files to copy in bytes")] + max_size_limit: Option, + #[arg( + short = 'f', + long, + help = "Filter by name using glob patterns such as *.txt" + )] + filename_filter: Option, + }, } #[derive(ValueEnum, Debug, Clone)] diff --git a/src/commands/ls_command.rs b/src/commands/ls_command.rs new file mode 100644 index 0000000..00e2e93 --- /dev/null +++ b/src/commands/ls_command.rs @@ -0,0 +1,113 @@ +use crate::filesystems::FileSystemConnection; +use crate::filesystems::{DetectFileSystem, FileMatcher}; +use crate::AppResult; +use console::{pad_str, Alignment, Style, Term}; +use indicatif::{HumanBytes, TermLike}; +use rvstruct::ValueStruct; + +#[derive(Debug, Clone)] +pub struct LsCommandOptions { + pub file_matcher: FileMatcher, +} + +impl LsCommandOptions { + pub fn new(filename_filter: Option, max_size_limit: Option) -> Self { + let filename_matcher = filename_filter + .as_ref() + .map(|filter| filter.compile_matcher()); + LsCommandOptions { + file_matcher: FileMatcher::new(filename_matcher, max_size_limit), + } + } +} + +pub async fn command_ls(term: &Term, source: &str, options: LsCommandOptions) -> AppResult<()> { + let bold_style = Style::new().bold(); + let highlighted = bold_style.clone().white(); + let dimmed_style = Style::new().dim(); + term.write_line(format!("Listing files in {}.", bold_style.apply_to(source)).as_str())?; + let app_reporter = crate::reporter::AppReporter::from(term); + let mut source_fs = DetectFileSystem::open(source, &app_reporter).await?; + let list_files_result = source_fs.list_files(Some(&options.file_matcher)).await?; + let total_size: u64 = list_files_result + .files + .iter() + .map(|f| f.file_size.unwrap_or(0)) + .sum(); + + if !list_files_result.files.is_empty() { + let max_filename_width = std::cmp::min( + list_files_result + .files + .iter() + .map(|f| f.relative_path.value().len()) + .max() + .unwrap_or(25) + + 5, + (term.width() * 2 / 3) as usize, + ); + term.write_line( + format!( + "\n {} {} {}", + dimmed_style.apply_to(pad_str( + "Filename", + max_filename_width, + Alignment::Left, + None + )), + dimmed_style.apply_to(pad_str("Media Type", 40, Alignment::Left, None)), + dimmed_style.apply_to(pad_str("Size", 16, Alignment::Left, None)) + ) + .as_str(), + )?; + + for file in &list_files_result.files { + term.write_line( + format!( + "- {} {} {}", + highlighted.apply_to(pad_str( + file.relative_path.value(), + max_filename_width, + Alignment::Left, + Some("...") + )), + pad_str( + file.media_type + .as_ref() + .map(|mime| mime.to_string()) + .unwrap_or("".to_string()) + .as_str(), + 40, + Alignment::Left, + None + ), + highlighted.apply_to(pad_str( + format!("{}", HumanBytes(file.file_size.unwrap_or(0))).as_str(), + 16, + Alignment::Left, + None + )) + ) + .as_str(), + )?; + } + term.write_line("")?; + } + term.write_line( + format!( + "{} files found. Total size: {}", + highlighted.apply_to(list_files_result.files.len()), + highlighted.apply_to(HumanBytes(total_size)) + ) + .as_str(), + )?; + term.write_line( + format!( + "{} files skipped/filtered out.", + dimmed_style.apply_to(list_files_result.skipped.to_string()) + ) + .as_str(), + )?; + source_fs.close().await?; + Ok(()) +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 48ac525..f4d6ce3 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,2 +1,5 @@ mod copy_command; pub use copy_command::*; + +mod ls_command; +pub use ls_command::*; diff --git a/src/filesystems/gcs.rs b/src/filesystems/gcs.rs index 4b239f0..5afe82c 100644 --- a/src/filesystems/gcs.rs +++ b/src/filesystems/gcs.rs @@ -35,8 +35,12 @@ impl<'a> GoogleCloudStorageFileSystem<'a> { let path = path.trim_start_matches("gs://"); let parts: Vec<&str> = path.split('/').collect(); let bucket = parts[0]; - let object = parts[1..].join("/"); - (bucket.to_string(), object.to_string()) + if parts.len() == 1 || (parts.len() == 2 && parts[1].is_empty()) { + (bucket.to_string(), "/".to_string()) + } else { + let object = parts[1..].join("/"); + (bucket.to_string(), object.to_string()) + } } #[async_recursion::async_recursion] @@ -202,7 +206,12 @@ impl<'a> FileSystemConnection<'a> for GoogleCloudStorageFileSystem<'a> { self.bucket_name, self.object_name ))?; if self.object_name.ends_with('/') { - self.list_files_with_token(Some(self.object_name.clone()), None, &file_matcher) + let prefix = if self.object_name != "/" { + Some(self.object_name.clone()) + } else { + None + }; + self.list_files_with_token(prefix, None, &file_matcher) .await } else { Ok(ListFilesResult::EMPTY) diff --git a/src/main.rs b/src/main.rs index cc280b0..03e842f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -90,6 +90,14 @@ async fn handle_args(cli: CliArgs, term: &Term) -> AppResult<()> { .as_str(), )?; } + CliCommand::Ls { + source, + max_size_limit, + filename_filter, + } => { + let options = LsCommandOptions::new(filename_filter, max_size_limit); + command_ls(term, &source, options).await?; + } } Ok(())