Skip to content

Commit

Permalink
Feature: Filter by invert_filter: reverse match
Browse files Browse the repository at this point in the history
Mimic grep's -v option.

Allows dust to only match files that do not match the given filter
  • Loading branch information
bootandy committed Sep 19, 2021
1 parent 124c19b commit 9d2e6d2
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 21 deletions.
11 changes: 11 additions & 0 deletions src/dir_walker.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fs;

use crate::node::Node;
use crate::utils::is_filtered_out_due_to_invert_regex;
use crate::utils::is_filtered_out_due_to_regex;
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
Expand All @@ -20,6 +21,7 @@ use crate::platform::get_metadata;
pub struct WalkData {
pub ignore_directories: HashSet<PathBuf>,
pub filter_regex: Option<Regex>,
pub invert_filter_regex: Option<Regex>,
pub allowed_filesystems: HashSet<u64>,
pub use_apparent_size: bool,
pub by_filecount: bool,
Expand Down Expand Up @@ -96,6 +98,13 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
return true;
}

if walk_data.invert_filter_regex.is_some()
&& entry.path().is_file()
&& is_filtered_out_due_to_invert_regex(&walk_data.invert_filter_regex, &entry.path())
{
return true;
}

(is_dot_file && walk_data.ignore_hidden) || is_ignored_path
}

Expand Down Expand Up @@ -123,6 +132,7 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
entry.path(),
vec![],
&walk_data.filter_regex,
&walk_data.invert_filter_regex,
walk_data.use_apparent_size,
data.is_symlink(),
data.is_file(),
Expand All @@ -143,6 +153,7 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
dir,
children,
&walk_data.filter_regex,
&walk_data.invert_filter_regex,
walk_data.use_apparent_size,
false,
false,
Expand Down
10 changes: 5 additions & 5 deletions src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub fn get_by_depth(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
pub fn get_biggest(
top_level_nodes: Vec<Node>,
n: usize,
using_file_type_filter: bool,
using_a_filter: bool,
) -> Option<DisplayNode> {
if top_level_nodes.is_empty() {
// perhaps change this, bring back Error object?
Expand All @@ -30,14 +30,14 @@ pub fn get_biggest(
let mut allowed_nodes = HashSet::new();

allowed_nodes.insert(&root.name);
heap = add_children(using_file_type_filter, &root, heap);
heap = add_children(using_a_filter, &root, heap);

for _ in number_top_level_nodes..n {
let line = heap.pop();
match line {
Some(line) => {
allowed_nodes.insert(&line.name);
heap = add_children(using_file_type_filter, line, heap);
heap = add_children(using_a_filter, line, heap);
}
None => break,
}
Expand Down Expand Up @@ -76,11 +76,11 @@ pub fn get_all_file_types(top_level_nodes: Vec<Node>, n: usize) -> Option<Displa
}

fn add_children<'a>(
using_file_type_filter: bool,
using_a_filter: bool,
line: &'a Node,
mut heap: BinaryHeap<&'a Node>,
) -> BinaryHeap<&'a Node> {
if using_file_type_filter {
if using_a_filter {
line.children.iter().for_each(|c| {
if c.name.is_file() || c.size > 0 {
heap.push(c)
Expand Down
45 changes: 32 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ fn get_width_of_terminal() -> usize {
}
}

fn get_regex_value(maybe_value: Option<&str>) -> Option<Regex> {
match maybe_value {
Some(v) => match Regex::new(v) {
Ok(r) => Some(r),
Err(e) => {
eprintln!("Ignoring bad value for regex {:?}", e);
process::exit(1);
}
},
None => None,
}
}

fn main() {
let default_height = get_height_of_terminal();
let def_num_str = default_height.to_string();
Expand Down Expand Up @@ -151,9 +164,21 @@ fn main() {
.arg(
Arg::with_name("ignore_hidden")
.short("i") // Do not use 'h' this is used by 'help'
.long("ignore_hidden")
.long("ignore_hidden") //TODO: fix change - -> _
.help("Do not display hidden files"),
)
.arg(
Arg::with_name("invert_filter")
.short("v")
.long("invert-filter")
.takes_value(true)
.number_of_values(1)
.multiple(true)
.conflicts_with("filter")
.conflicts_with("types")
.conflicts_with("depth")
.help("Exclude files matching this regex. To ignore png files type: -v \"\\.png$\" "),
)
.arg(
Arg::with_name("filter")
.short("e")
Expand All @@ -162,6 +187,7 @@ fn main() {
.number_of_values(1)
.multiple(true)
.conflicts_with("types")
.conflicts_with("depth")
.help("Only include files matching this regex. For png files type: -e \"\\.png$\" "),
)
.arg(
Expand Down Expand Up @@ -191,17 +217,8 @@ fn main() {

let summarize_file_types = options.is_present("types");

let maybe_filter = if options.is_present("filter") {
match Regex::new(options.value_of("filter").unwrap()) {
Ok(r) => Some(r),
Err(e) => {
eprintln!("Ignoring bad value for filter {:?}", e);
process::exit(1);
}
}
} else {
None
};
let maybe_filter = get_regex_value(options.value_of("filter"));
let maybe_invert_filter = get_regex_value(options.value_of("invert_filter"));

let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) {
Ok(v) => v,
Expand Down Expand Up @@ -252,6 +269,7 @@ fn main() {
let walk_data = WalkData {
ignore_directories: ignored_full_path,
filter_regex: maybe_filter,
invert_filter_regex: maybe_invert_filter,
allowed_filesystems,
use_apparent_size,
by_filecount,
Expand All @@ -267,7 +285,8 @@ fn main() {
(_, _) => get_biggest(
top_level_nodes,
number_of_lines,
options.values_of("filter").is_some(),
options.values_of("filter").is_some()
|| options.value_of("invert_filter").is_some(),
),
}
};
Expand Down
4 changes: 4 additions & 0 deletions src/node.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::platform::get_metadata;
use crate::utils::is_filtered_out_due_to_invert_regex;
use crate::utils::is_filtered_out_due_to_regex;

use regex::Regex;
Expand All @@ -13,10 +14,12 @@ pub struct Node {
pub inode_device: Option<(u64, u64)>,
}

#[allow(clippy::too_many_arguments)]
pub fn build_node(
dir: PathBuf,
children: Vec<Node>,
filter_regex: &Option<Regex>,
invert_filter_regex: &Option<Regex>,
use_apparent_size: bool,
is_symlink: bool,
is_file: bool,
Expand All @@ -31,6 +34,7 @@ pub fn build_node(
};

let size = if is_filtered_out_due_to_regex(filter_regex, &dir)
|| is_filtered_out_due_to_invert_regex(invert_filter_regex, &dir)
|| (is_symlink && !use_apparent_size)
|| by_filecount && !is_file
{
Expand Down
7 changes: 7 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ pub fn is_filtered_out_due_to_regex(filter_regex: &Option<Regex>, dir: &Path) ->
}
}

pub fn is_filtered_out_due_to_invert_regex(filter_regex: &Option<Regex>, dir: &Path) -> bool {
match filter_regex {
Some(fr) => fr.is_match(&dir.as_os_str().to_string_lossy()),
None => false,
}
}

fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let parent = parent.as_ref();
let child = child.as_ref();
Expand Down
21 changes: 18 additions & 3 deletions tests/test_flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,29 @@ pub fn test_show_files_by_type() {
}

#[test]
pub fn test_show_files_by_specific_type() {
pub fn test_show_files_by_regex() {
// Check we can see '.rs' files in the tests directory
let output = build_command(vec!["-c", "-e", "\\.rs$", "tests"]);
assert!(output.contains(" ┌─┴ tests"));
assert!(!output.contains("0B ┌── tests"));
assert!(!output.contains("0B ┌─┴ tests"));

// Check there are no '.bad_type' files in the tests directory
let output = build_command(vec!["-c", "-e", "bad_regex", "tests"]);
// Check there are no files named: '.match_nothing' in the tests directory
let output = build_command(vec!["-c", "-e", "match_nothing$", "tests"]);
assert!(output.contains("0B ┌── tests"));
}

#[test]
pub fn test_show_files_by_invert_regex() {
let output = build_command(vec!["-c", "-f", "-v", "e", "tests/test_dir2"]);
// There are 0 files without 'e' in the name
assert!(output.contains("0 ┌── test_dir2"));

let output = build_command(vec!["-c", "-f", "-v", "a", "tests/test_dir2"]);
// There are 2 files without 'a' in the name
assert!(output.contains("2 ┌─┴ test_dir2"));

// There are 4 files in the test_dir2 hierarchy
let output = build_command(vec!["-c", "-f", "-v", "match_nothing$", "tests/test_dir2"]);
assert!(output.contains("4 ┌─┴ test_dir2"));
}

0 comments on commit 9d2e6d2

Please sign in to comment.