Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

List (ls) command implementation #4

Merged
merged 1 commit into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,28 @@ MS Presidio redacter:
redacter cp -d ms-presidio --ms-presidio-text-analyze-url http://localhost:5002/analyze --ms-presidio-image-redact-url http://localhost:5003/redact ...
```

## List (LS) command

For convenience, the tool also supports listing files in the source directory so you can see what files will be copied:

```
Usage: redacter ls [OPTIONS] <SOURCE>

Arguments:
<SOURCE> Source directory or file such as /tmp, /tmp/file.txt or gs://bucket/file.txt and others supported providers

Options:
-m, --max-size-limit <MAX_SIZE_LIMIT> Maximum size of files to copy in bytes
-f, --filename-filter <FILENAME_FILTER> Filter by name using glob patterns such as *.txt
-h, --help Print help
```

Example: list files in the GCS bucket:

```sh
redacter ls gs://my-little-bucket/my-big-files/
```

## Security considerations

- Your file contents are sent to the DLP API for redaction. Make sure you trust the DLP API provider.
Expand Down
16 changes: 16 additions & 0 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub struct CliArgs {
}

#[derive(Subcommand, Debug)]
#[allow(clippy::large_enum_variant)]
pub enum CliCommand {
#[command(about = "Copy and redact files from source to destination")]
Cp {
Expand All @@ -36,6 +37,21 @@ pub enum CliCommand {
#[command(flatten)]
redacter_args: Option<RedacterArgs>,
},
#[command(about = "List files in the source")]
Ls {
#[arg(
help = "Source directory or file such as /tmp, /tmp/file.txt or gs://bucket/file.txt and others supported providers"
)]
source: String,
#[arg(short = 'm', long, help = "Maximum size of files to copy in bytes")]
max_size_limit: Option<u64>,
#[arg(
short = 'f',
long,
help = "Filter by name using glob patterns such as *.txt"
)]
filename_filter: Option<globset::Glob>,
},
}

#[derive(ValueEnum, Debug, Clone)]
Expand Down
113 changes: 113 additions & 0 deletions src/commands/ls_command.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use crate::filesystems::FileSystemConnection;
use crate::filesystems::{DetectFileSystem, FileMatcher};
use crate::AppResult;
use console::{pad_str, Alignment, Style, Term};
use indicatif::{HumanBytes, TermLike};
use rvstruct::ValueStruct;

#[derive(Debug, Clone)]
pub struct LsCommandOptions {
pub file_matcher: FileMatcher,
}

impl LsCommandOptions {
pub fn new(filename_filter: Option<globset::Glob>, max_size_limit: Option<u64>) -> Self {
let filename_matcher = filename_filter
.as_ref()
.map(|filter| filter.compile_matcher());
LsCommandOptions {
file_matcher: FileMatcher::new(filename_matcher, max_size_limit),
}
}
}

pub async fn command_ls(term: &Term, source: &str, options: LsCommandOptions) -> AppResult<()> {
let bold_style = Style::new().bold();
let highlighted = bold_style.clone().white();
let dimmed_style = Style::new().dim();
term.write_line(format!("Listing files in {}.", bold_style.apply_to(source)).as_str())?;
let app_reporter = crate::reporter::AppReporter::from(term);
let mut source_fs = DetectFileSystem::open(source, &app_reporter).await?;
let list_files_result = source_fs.list_files(Some(&options.file_matcher)).await?;
let total_size: u64 = list_files_result
.files
.iter()
.map(|f| f.file_size.unwrap_or(0))
.sum();

if !list_files_result.files.is_empty() {
let max_filename_width = std::cmp::min(
list_files_result
.files
.iter()
.map(|f| f.relative_path.value().len())
.max()
.unwrap_or(25)
+ 5,
(term.width() * 2 / 3) as usize,
);
term.write_line(
format!(
"\n {} {} {}",
dimmed_style.apply_to(pad_str(
"Filename",
max_filename_width,
Alignment::Left,
None
)),
dimmed_style.apply_to(pad_str("Media Type", 40, Alignment::Left, None)),
dimmed_style.apply_to(pad_str("Size", 16, Alignment::Left, None))
)
.as_str(),
)?;

for file in &list_files_result.files {
term.write_line(
format!(
"- {} {} {}",
highlighted.apply_to(pad_str(
file.relative_path.value(),
max_filename_width,
Alignment::Left,
Some("...")
)),
pad_str(
file.media_type
.as_ref()
.map(|mime| mime.to_string())
.unwrap_or("".to_string())
.as_str(),
40,
Alignment::Left,
None
),
highlighted.apply_to(pad_str(
format!("{}", HumanBytes(file.file_size.unwrap_or(0))).as_str(),
16,
Alignment::Left,
None
))
)
.as_str(),
)?;
}
term.write_line("")?;
}
term.write_line(
format!(
"{} files found. Total size: {}",
highlighted.apply_to(list_files_result.files.len()),
highlighted.apply_to(HumanBytes(total_size))
)
.as_str(),
)?;
term.write_line(
format!(
"{} files skipped/filtered out.",
dimmed_style.apply_to(list_files_result.skipped.to_string())
)
.as_str(),
)?;
source_fs.close().await?;
Ok(())
}
3 changes: 3 additions & 0 deletions src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
mod copy_command;
pub use copy_command::*;

mod ls_command;
pub use ls_command::*;
15 changes: 12 additions & 3 deletions src/filesystems/gcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ impl<'a> GoogleCloudStorageFileSystem<'a> {
let path = path.trim_start_matches("gs://");
let parts: Vec<&str> = path.split('/').collect();
let bucket = parts[0];
let object = parts[1..].join("/");
(bucket.to_string(), object.to_string())
if parts.len() == 1 || (parts.len() == 2 && parts[1].is_empty()) {
(bucket.to_string(), "/".to_string())
} else {
let object = parts[1..].join("/");
(bucket.to_string(), object.to_string())
}
}

#[async_recursion::async_recursion]
Expand Down Expand Up @@ -202,7 +206,12 @@ impl<'a> FileSystemConnection<'a> for GoogleCloudStorageFileSystem<'a> {
self.bucket_name, self.object_name
))?;
if self.object_name.ends_with('/') {
self.list_files_with_token(Some(self.object_name.clone()), None, &file_matcher)
let prefix = if self.object_name != "/" {
Some(self.object_name.clone())
} else {
None
};
self.list_files_with_token(prefix, None, &file_matcher)
.await
} else {
Ok(ListFilesResult::EMPTY)
Expand Down
8 changes: 8 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ async fn handle_args(cli: CliArgs, term: &Term) -> AppResult<()> {
.as_str(),
)?;
}
CliCommand::Ls {
source,
max_size_limit,
filename_filter,
} => {
let options = LsCommandOptions::new(filename_filter, max_size_limit);
command_ls(term, &source, options).await?;
}
}

Ok(())
Expand Down