From 9d2e6d2b36287ee47a4fa399f842302003c45e05 Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 5 Sep 2021 09:50:45 +0100 Subject: [PATCH] Feature: Filter by invert_filter: reverse match Mimic grep's -v option. Allows dust to only match files that do not match the given filter --- src/dir_walker.rs | 11 +++++++++++ src/filter.rs | 10 +++++----- src/main.rs | 45 ++++++++++++++++++++++++++++++++------------- src/node.rs | 4 ++++ src/utils.rs | 7 +++++++ tests/test_flags.rs | 21 ++++++++++++++++++--- 6 files changed, 77 insertions(+), 21 deletions(-) diff --git a/src/dir_walker.rs b/src/dir_walker.rs index 5dc48897..939ae781 100644 --- a/src/dir_walker.rs +++ b/src/dir_walker.rs @@ -1,6 +1,7 @@ use std::fs; use crate::node::Node; +use crate::utils::is_filtered_out_due_to_invert_regex; use crate::utils::is_filtered_out_due_to_regex; use rayon::iter::ParallelBridge; use rayon::prelude::ParallelIterator; @@ -20,6 +21,7 @@ use crate::platform::get_metadata; pub struct WalkData { pub ignore_directories: HashSet, pub filter_regex: Option, + pub invert_filter_regex: Option, pub allowed_filesystems: HashSet, pub use_apparent_size: bool, pub by_filecount: bool, @@ -96,6 +98,13 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool { return true; } + if walk_data.invert_filter_regex.is_some() + && entry.path().is_file() + && is_filtered_out_due_to_invert_regex(&walk_data.invert_filter_regex, &entry.path()) + { + return true; + } + (is_dot_file && walk_data.ignore_hidden) || is_ignored_path } @@ -123,6 +132,7 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op entry.path(), vec![], &walk_data.filter_regex, + &walk_data.invert_filter_regex, walk_data.use_apparent_size, data.is_symlink(), data.is_file(), @@ -143,6 +153,7 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op dir, children, &walk_data.filter_regex, + &walk_data.invert_filter_regex, walk_data.use_apparent_size, false, false, diff --git a/src/filter.rs b/src/filter.rs index 754bbd81..70fc299e 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -17,7 +17,7 @@ pub fn get_by_depth(top_level_nodes: Vec, n: usize) -> Option pub fn get_biggest( top_level_nodes: Vec, n: usize, - using_file_type_filter: bool, + using_a_filter: bool, ) -> Option { if top_level_nodes.is_empty() { // perhaps change this, bring back Error object? @@ -30,14 +30,14 @@ pub fn get_biggest( let mut allowed_nodes = HashSet::new(); allowed_nodes.insert(&root.name); - heap = add_children(using_file_type_filter, &root, heap); + heap = add_children(using_a_filter, &root, heap); for _ in number_top_level_nodes..n { let line = heap.pop(); match line { Some(line) => { allowed_nodes.insert(&line.name); - heap = add_children(using_file_type_filter, line, heap); + heap = add_children(using_a_filter, line, heap); } None => break, } @@ -76,11 +76,11 @@ pub fn get_all_file_types(top_level_nodes: Vec, n: usize) -> Option( - using_file_type_filter: bool, + using_a_filter: bool, line: &'a Node, mut heap: BinaryHeap<&'a Node>, ) -> BinaryHeap<&'a Node> { - if using_file_type_filter { + if using_a_filter { line.children.iter().for_each(|c| { if c.name.is_file() || c.size > 0 { heap.push(c) diff --git a/src/main.rs b/src/main.rs index 73cacf9a..0ffccfb6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,6 +73,19 @@ fn get_width_of_terminal() -> usize { } } +fn get_regex_value(maybe_value: Option<&str>) -> Option { + match maybe_value { + Some(v) => match Regex::new(v) { + Ok(r) => Some(r), + Err(e) => { + eprintln!("Ignoring bad value for regex {:?}", e); + process::exit(1); + } + }, + None => None, + } +} + fn main() { let default_height = get_height_of_terminal(); let def_num_str = default_height.to_string(); @@ -151,9 +164,21 @@ fn main() { .arg( Arg::with_name("ignore_hidden") .short("i") // Do not use 'h' this is used by 'help' - .long("ignore_hidden") + .long("ignore_hidden") //TODO: fix change - -> _ .help("Do not display hidden files"), ) + .arg( + Arg::with_name("invert_filter") + .short("v") + .long("invert-filter") + .takes_value(true) + .number_of_values(1) + .multiple(true) + .conflicts_with("filter") + .conflicts_with("types") + .conflicts_with("depth") + .help("Exclude files matching this regex. To ignore png files type: -v \"\\.png$\" "), + ) .arg( Arg::with_name("filter") .short("e") @@ -162,6 +187,7 @@ fn main() { .number_of_values(1) .multiple(true) .conflicts_with("types") + .conflicts_with("depth") .help("Only include files matching this regex. For png files type: -e \"\\.png$\" "), ) .arg( @@ -191,17 +217,8 @@ fn main() { let summarize_file_types = options.is_present("types"); - let maybe_filter = if options.is_present("filter") { - match Regex::new(options.value_of("filter").unwrap()) { - Ok(r) => Some(r), - Err(e) => { - eprintln!("Ignoring bad value for filter {:?}", e); - process::exit(1); - } - } - } else { - None - }; + let maybe_filter = get_regex_value(options.value_of("filter")); + let maybe_invert_filter = get_regex_value(options.value_of("invert_filter")); let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) { Ok(v) => v, @@ -252,6 +269,7 @@ fn main() { let walk_data = WalkData { ignore_directories: ignored_full_path, filter_regex: maybe_filter, + invert_filter_regex: maybe_invert_filter, allowed_filesystems, use_apparent_size, by_filecount, @@ -267,7 +285,8 @@ fn main() { (_, _) => get_biggest( top_level_nodes, number_of_lines, - options.values_of("filter").is_some(), + options.values_of("filter").is_some() + || options.value_of("invert_filter").is_some(), ), } }; diff --git a/src/node.rs b/src/node.rs index ff391ab4..d30b7ef5 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,4 +1,5 @@ use crate::platform::get_metadata; +use crate::utils::is_filtered_out_due_to_invert_regex; use crate::utils::is_filtered_out_due_to_regex; use regex::Regex; @@ -13,10 +14,12 @@ pub struct Node { pub inode_device: Option<(u64, u64)>, } +#[allow(clippy::too_many_arguments)] pub fn build_node( dir: PathBuf, children: Vec, filter_regex: &Option, + invert_filter_regex: &Option, use_apparent_size: bool, is_symlink: bool, is_file: bool, @@ -31,6 +34,7 @@ pub fn build_node( }; let size = if is_filtered_out_due_to_regex(filter_regex, &dir) + || is_filtered_out_due_to_invert_regex(invert_filter_regex, &dir) || (is_symlink && !use_apparent_size) || by_filecount && !is_file { diff --git a/src/utils.rs b/src/utils.rs index cf91b86f..51bef5d5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -64,6 +64,13 @@ pub fn is_filtered_out_due_to_regex(filter_regex: &Option, dir: &Path) -> } } +pub fn is_filtered_out_due_to_invert_regex(filter_regex: &Option, dir: &Path) -> bool { + match filter_regex { + Some(fr) => fr.is_match(&dir.as_os_str().to_string_lossy()), + None => false, + } +} + fn is_a_parent_of>(parent: P, child: P) -> bool { let parent = parent.as_ref(); let child = child.as_ref(); diff --git a/tests/test_flags.rs b/tests/test_flags.rs index 5518552b..e7cab291 100644 --- a/tests/test_flags.rs +++ b/tests/test_flags.rs @@ -129,14 +129,29 @@ pub fn test_show_files_by_type() { } #[test] -pub fn test_show_files_by_specific_type() { +pub fn test_show_files_by_regex() { // Check we can see '.rs' files in the tests directory let output = build_command(vec!["-c", "-e", "\\.rs$", "tests"]); assert!(output.contains(" ┌─┴ tests")); assert!(!output.contains("0B ┌── tests")); assert!(!output.contains("0B ┌─┴ tests")); - // Check there are no '.bad_type' files in the tests directory - let output = build_command(vec!["-c", "-e", "bad_regex", "tests"]); + // Check there are no files named: '.match_nothing' in the tests directory + let output = build_command(vec!["-c", "-e", "match_nothing$", "tests"]); assert!(output.contains("0B ┌── tests")); } + +#[test] +pub fn test_show_files_by_invert_regex() { + let output = build_command(vec!["-c", "-f", "-v", "e", "tests/test_dir2"]); + // There are 0 files without 'e' in the name + assert!(output.contains("0 ┌── test_dir2")); + + let output = build_command(vec!["-c", "-f", "-v", "a", "tests/test_dir2"]); + // There are 2 files without 'a' in the name + assert!(output.contains("2 ┌─┴ test_dir2")); + + // There are 4 files in the test_dir2 hierarchy + let output = build_command(vec!["-c", "-f", "-v", "match_nothing$", "tests/test_dir2"]); + assert!(output.contains("4 ┌─┴ test_dir2")); +}