From bb106d265a45a3544d3f2b2781f757b638b606b9 Mon Sep 17 00:00:00 2001 From: Bogdan Mart Date: Sat, 23 Apr 2022 03:13:20 +0300 Subject: [PATCH] Fix problem with MAX_PATH on windows + UnitTest * Manually implemented windows File Namespace name generation * added some unit tests closes scullionw/dirstat-rs#5 --- .gitignore | 7 ++- Cargo.lock | 48 +++++++++++++++++- Cargo.toml | 7 ++- src/ffi.rs | 50 +++++++++++++++++- src/lib.rs | 17 +++++++ src/tests.rs | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 265 insertions(+), 4 deletions(-) create mode 100644 src/tests.rs diff --git a/.gitignore b/.gitignore index 1a61dfb..a86de41 100644 --- a/.gitignore +++ b/.gitignore @@ -74,4 +74,9 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk -# End of https://www.gitignore.io/api/linux,macos,windows \ No newline at end of file +# End of https://www.gitignore.io/api/linux,macos,windows + +# Jetrbrains IDE (Intellij RUST/Clion/Idea) +.idea/ + +test-data/ diff --git a/Cargo.lock b/Cargo.lock index cacc42d..9f9058d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,6 +55,26 @@ dependencies = [ "vec_map", ] +[[package]] +name = "const_format" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22bc6cd49b0ec407b680c3e380182b6ac63b73991cb7602de350352fc309b614" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef196d5d972878a48da7decb7686eded338b4858fbabeed513d63a7c98b2b82d" +dependencies = [ + "proc-macro2 1.0.33", + "quote 1.0.10", + "unicode-xid 0.2.2", +] + [[package]] name = "crossbeam-channel" version = "0.5.1" @@ -101,9 +121,11 @@ dependencies = [ [[package]] name = "dirstat-rs" -version = "0.3.7" +version = "0.3.8" dependencies = [ "atty", + "const_format", + "path-absolutize", "pretty-bytes", "rayon", "serde", @@ -184,6 +206,30 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" + +[[package]] +name = "path-absolutize" +version = "3.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3de4b40bd9736640f14c438304c09538159802388febb02c8abaae0846c1f13" +dependencies = [ + "path-dedot", +] + +[[package]] +name = "path-dedot" +version = "3.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d611d5291372b3738a34ebf0d1f849e58b1dcc1101032f76a346eaa1f8ddbb5b" +dependencies = [ + "once_cell", +] + [[package]] name = "pretty-bytes" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 2745fa6..eac9021 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dirstat-rs" -version = "0.3.7" +version = "0.3.8" authors = ["scullionw "] edition = "2018" license = "MIT" @@ -19,8 +19,12 @@ atty = "0.2.14" serde = { version = "1.0.131", features = ["derive"] } serde_json = "1.0.73" +[dev-dependencies] +const_format = "0.2.22" + [target.'cfg(windows)'.dependencies] winapi-util = "0.1.2" +path-absolutize = "3.0.12" [target.'cfg(windows)'.dependencies.winapi] version = "0.3.7" @@ -35,3 +39,4 @@ incremental = false bench = false path = "src/bin/main.rs" name = "ds" + diff --git a/src/ffi.rs b/src/ffi.rs index 8334819..dcbfc16 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -11,7 +11,7 @@ use winapi::um::fileapi::GetCompressedFileSizeW; use winapi::um::fileapi::INVALID_FILE_SIZE; pub fn compressed_size(path: &Path) -> Result> { - let wide: Vec = path.as_os_str().encode_wide().chain(once(0)).collect(); + let wide = path_to_u16s(path); let mut high: u32 = 0; // TODO: Deal with max path size @@ -27,6 +27,54 @@ pub fn compressed_size(path: &Path) -> Result> { Ok(u64::from(high) << 32 | u64::from(low)) } +/// inspired by [fn maybe_verbatim(path: &Path)](https://github.com/rust-lang/rust/blob/1f4681ad7a132755452c32a987ad0f0d075aa6aa/library/std/src/sys/windows/path.rs#L170) +/// But function from std is calling winapi GetFullPathNameW in case if path is longer than 248. +/// We are more optimistic and expect all path being absolute, so no API calls from this function. +fn path_to_u16s(path: &Path) -> Vec { + // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL). + // However, for APIs such as CreateDirectory[1], the limit is 248. + // + // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters + const LEGACY_MAX_PATH: usize = 248; + // UTF-16 encoded code points, used in parsing and building UTF-16 paths. + // All of these are in the ASCII range so they can be cast directly to `u16`. + const SEP: u16 = b'\\' as _; + const QUERY: u16 = b'?' as _; + const U: u16 = b'U' as _; + const N: u16 = b'N' as _; + const C: u16 = b'C' as _; + // \\?\ + const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP]; + // \??\ + const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP]; + // \\?\UNC\ + const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP]; + // \\ + const NETWORK_PREFIX: &[u16] = &[SEP, SEP]; + + let wide: Vec = path.as_os_str().encode_wide().chain(once(0)).collect(); + // don't need to do anything if path is small enaught. + if wide.len() < LEGACY_MAX_PATH { + return wide; + } + + if wide.starts_with(VERBATIM_PREFIX) || wide.starts_with(NT_PREFIX) { + return wide; + } + + if wide.starts_with(NETWORK_PREFIX) { + // network path from SMB + let mut tmp = Vec::from(UNC_PREFIX); + tmp.extend(&wide[2..]); + return tmp; + } else { + // if we came here, we aren't using network drive, so just prepend File namespace prefix + let mut tmp = Vec::from(VERBATIM_PREFIX); + tmp.extend(wide); + return tmp; + } +} + fn get_last_error() -> u32 { unsafe { GetLastError() } } diff --git a/src/lib.rs b/src/lib.rs index 283d5cf..38fc52d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,11 +15,25 @@ pub struct DiskItem { } impl DiskItem { + /// Analyzes provided path and returns tree structure of analyzed DiskItems pub fn from_analyze( path: &Path, apparent: bool, root_dev: u64, ) -> Result> { + #[cfg(windows)] + { + // Solution for windows compressed files requires path to be absolute, see ffi.rs + // Basically it would be triggered only on top most invocation, + // and afterwards all path would be absolute. We do it here as it is relatively harmless + // but this would allow us fo it only once instead of each invocation of ffi::compressed_size + if apparent && !path.is_absolute() { + use path_absolutize::*; + let absolute_dir = path.absolutize()?; + return Self::from_analyze(absolute_dir.as_ref(), apparent, root_dev); + } + } + let name = path .file_name() .unwrap_or(&OsStr::new(".")) @@ -115,3 +129,6 @@ impl FileInfo { } } } + +#[cfg(test)] +mod tests; diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 0000000..45d545e --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,140 @@ +use crate::{DiskItem, FileInfo}; +// warn: don't remove `as &str` after macro invocation. +// It breaks type checker in Intellij Rust IDE +use const_format::concatcp; +use std::fs::File; +use std::io::Write; +use std::panic; +use std::path::Path; + +// be aware that rust runs tests in parallel, so tests should use different dirs + +const TEST_DATA_DIR: &str = "./test-data/"; + +const LONG_PATH_DIR: &str = "long-path/"; +//noinspection SpellCheckingInspection +const PATH_1: &str = "lll1/llllllll/llllllllllllllll/llllllllllllll/lllllllllllll/oooooo\ +oooooooo/oooooooooooooooo/nnnnnnnnn/nnnnnnnnnn/nnnnnnnn/nnnnnn/gggggggggg/p/a/tttt\ +tttttttttt/ttttttttttt/ttttttttttttttt/ttttttttt/tttthhh/2222222222/22222222222/222222222222/\ +3333333333333/33333333/33333333333/33333333333/333333333/33333333/44444444/44444444444444444/\ +5555555/55555555555/55555555/5555555555/5555555/5555555/555555/555555555/66666666666666666666/\ +77777777/7777777777/7777777777777/77777777777/7777777777/77777777/7777777/77777777/8888888888/\ +99999999/999999/99999999/99999999999/99999999/999999999/9999999999/"; + +const PATH_1_FULL: &str = concatcp!(TEST_DATA_DIR, LONG_PATH_DIR, PATH_1) as &str; +//noinspection SpellCheckingInspection +const PATH_2: &str = "lll2/llllllll/llllllllllllllll/llllllllllllll/lllllllllllll/oooooo\ +oooooooo/oooooooooooooooo/nnnnnnnnn/nnnnnnnnnn/nnnnnnnn/nnnnnn/gggggggggg/p/a/tttt\ +tttttttttt/ttttttttttt/ttttttttttttttt/ttttttttt/tttthhh/2222222222/22222222222/222222222222/\ +3333333333333/33333333/33333333333/33333333333/333333333/33333333/44444444/44444444444444444/\ +5555555/55555555555/55555555/5555555555/5555555/5555555/555555/555555555/66666666666666666666/\ +77777777/7777777777/7777777777777/77777777777/7777777777/77777777/7777777/77777777/8888888888/\ +99999999/999999/99999999/99999999999/99999999/999999999/9999999999/"; + +const PATH_2_FULL: &str = concatcp!(TEST_DATA_DIR, LONG_PATH_DIR, PATH_2) as &str; + +#[test] +fn test_max_path() { + // do not rename it into `_` it would cause immediate destrucion after creation + let _guard = CleanUpGuard { + path: concatcp!(TEST_DATA_DIR, LONG_PATH_DIR) as &str, + }; + + // Given + create_dir(PATH_1_FULL); + create_dir(PATH_2_FULL); + create_file(&concatcp!(PATH_1_FULL, "file.bin") as &str, 4096); + create_file(&concatcp!(PATH_2_FULL, "file.bin") as &str, 8192); + + // When + let test_path = Path::new(concatcp!(TEST_DATA_DIR, LONG_PATH_DIR) as &str); + let result = FileInfo::from_path(test_path, true); + + // Then + if let Result::Ok(FileInfo::Directory { volume_id }) = result { + let result = DiskItem::from_analyze(test_path, true, volume_id); + let result = result.expect("Must collect data"); + assert_eq!(result.disk_size, 4096 + 8192); + let children = result.children.unwrap(); + assert_eq!(children.len(), 2); + assert_eq!(children[0].disk_size, 8192); + assert_eq!(children[1].disk_size, 4096); + } else { + panic!("Can not get file info"); + } +} + +#[test] +#[cfg(unix)] // It gives inconsistent results on windows +fn test_file_size() { + const DIR: &str = concatcp!(TEST_DATA_DIR, "test_file_size/") as &str; + // do not rename it into `_` it would cause immediate destrucion after creation + let _guard = CleanUpGuard { path: DIR }; + + // Given + create_file(&concatcp!(DIR, "foo/file.bin") as &str, 8192); + create_file(&concatcp!(DIR, "bar/file.bin") as &str, 8192 - 5); + + // When calculating with apparent size + let test_path = Path::new(DIR); + let result = FileInfo::from_path(test_path, true); + + // Then + if let Result::Ok(FileInfo::Directory { volume_id }) = result { + let result = DiskItem::from_analyze(test_path, true, volume_id); + let result = result.expect("Must collect data"); + assert_eq!(result.disk_size, 8192 + 8192); + let children = result.children.unwrap(); + assert_eq!(children.len(), 2); + // Both dirs should be rounded to sector size + assert_eq!(children[0].disk_size, 8192); + assert_eq!(children[1].disk_size, 8192); + } else { + panic!("Can not get file info"); + } + + // When calculating withOUT apparent size + let result = FileInfo::from_path(test_path, false); + + // Then + if let Result::Ok(FileInfo::Directory { volume_id }) = result { + let result = DiskItem::from_analyze(test_path, false, volume_id); + let result = result.expect("Must collect data"); + assert_eq!(result.disk_size, 8192 + 8192 - 5); + let children = result.children.unwrap(); + assert_eq!(children.len(), 2); + // Both dirs should be rounded to sector size + assert_eq!(children[0].disk_size, 8192); + assert_eq!(children[1].disk_size, 8192 - 5); + } else { + panic!("Can not get file info"); + } +} + +// Helper functions and cleanup code goes next + +fn create_dir(dir_path: &str) { + std::fs::create_dir_all(dir_path).unwrap(); +} + +fn create_file(file_path: &str, size: usize) { + let content = vec![0u8; size]; + let file_path = Path::new(file_path); + // ensure parent + std::fs::create_dir_all(file_path.parent().unwrap()).unwrap(); + + let mut file = File::create(file_path).unwrap(); + file.write(&content).unwrap(); +} + +/// Used to clean up test folder after test runs. +struct CleanUpGuard { + path: &'static str, +} + +impl Drop for CleanUpGuard { + fn drop(&mut self) { + // Teardown + std::fs::remove_dir_all(Path::new(self.path)).unwrap(); + } +}