From ad2c9d775728fab33fba050ca7f7b84911bc6bbd Mon Sep 17 00:00:00 2001 From: higersky <64680426+higersky@users.noreply.github.com> Date: Thu, 25 Apr 2024 13:05:31 +0800 Subject: [PATCH] Use windows-sys to merge pull request #9 from scullionw/dirstat-rs --- Cargo.lock | 127 +++++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 9 ++-- src/ffi.rs | 58 +++++++++++++++++++-- src/lib.rs | 27 ++++++++++ src/tests.rs | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 350 insertions(+), 13 deletions(-) create mode 100644 src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index dc74f2e..00a7169 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,6 +55,26 @@ dependencies = [ "vec_map", ] +[[package]] +name = "const_format" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" +dependencies = [ + "proc-macro2 1.0.33", + "quote 1.0.10", + "unicode-xid 0.2.2", +] + [[package]] name = "crossbeam-channel" version = "0.5.1" @@ -104,14 +124,16 @@ name = "dirstat-rs" version = "0.3.8" dependencies = [ "atty", + "const_format", + "path-absolutize", "pretty-bytes", "rayon", "serde", "serde_json", "structopt", "termcolor", - "winapi", "winapi-util", + "windows-sys", ] [[package]] @@ -184,6 +206,30 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "path-absolutize" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4af381fe79fa195b4909485d99f73a80792331df0625188e707854f0b3383f5" +dependencies = [ + "path-dedot", +] + +[[package]] +name = "path-dedot" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ba0ad7e047712414213ff67533e6dd477af0a4e1d14fb52343e53d30ea9397" +dependencies = [ + "once_cell", +] + [[package]] name = "pretty-bytes" version = "0.2.2" @@ -414,11 +460,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys", ] [[package]] @@ -426,3 +472,76 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/Cargo.toml b/Cargo.toml index 92c8086..0cac752 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,11 +20,12 @@ serde = { version = "1.0.131", features = ["derive"] } serde_json = "1.0.73" [target.'cfg(windows)'.dependencies] -winapi-util = "0.1.2" +winapi-util = "0.1.8" +windows-sys = { version = "0.52", features = ["Win32_Foundation", "Win32_Storage_FileSystem"]} +path-absolutize = "3.1.1" -[target.'cfg(windows)'.dependencies.winapi] -version = "0.3.7" -features = ["winerror"] +[dev-dependencies] +const_format = "0.2.23" [profile.release] lto = 'fat' diff --git a/src/ffi.rs b/src/ffi.rs index 8334819..6fd179d 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -5,13 +5,13 @@ use std::io; use std::iter::once; use std::os::windows::ffi::OsStrExt; use std::path::Path; -use winapi::shared::winerror::NO_ERROR; -use winapi::um::errhandlingapi::GetLastError; -use winapi::um::fileapi::GetCompressedFileSizeW; -use winapi::um::fileapi::INVALID_FILE_SIZE; +use windows_sys::Win32::Foundation::GetLastError; +use windows_sys::Win32::Foundation::NO_ERROR; +use windows_sys::Win32::Storage::FileSystem::GetCompressedFileSizeW; +use windows_sys::Win32::Storage::FileSystem::INVALID_FILE_SIZE; pub fn compressed_size(path: &Path) -> Result> { - let wide: Vec = path.as_os_str().encode_wide().chain(once(0)).collect(); + let wide = path_to_u16s(path); let mut high: u32 = 0; // TODO: Deal with max path size @@ -27,6 +27,54 @@ pub fn compressed_size(path: &Path) -> Result> { Ok(u64::from(high) << 32 | u64::from(low)) } +/// inspired by [fn maybe_verbatim(path: &Path)](https://github.com/rust-lang/rust/blob/1f4681ad7a132755452c32a987ad0f0d075aa6aa/library/std/src/sys/windows/path.rs#L170) +/// But function from std is calling winapi GetFullPathNameW in case if path is longer than 248. +/// We are more optimistic and expect all path being absolute, so no API calls from this function. +fn path_to_u16s(path: &Path) -> Vec { + // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL). + // However, for APIs such as CreateDirectory[1], the limit is 248. + // + // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters + const LEGACY_MAX_PATH: usize = 248; + // UTF-16 encoded code points, used in parsing and building UTF-16 paths. + // All of these are in the ASCII range so they can be cast directly to `u16`. + const SEP: u16 = b'\\' as _; + const QUERY: u16 = b'?' as _; + const U: u16 = b'U' as _; + const N: u16 = b'N' as _; + const C: u16 = b'C' as _; + // \\?\ + const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP]; + // \??\ + const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP]; + // \\?\UNC\ + const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP]; + // \\ + const NETWORK_PREFIX: &[u16] = &[SEP, SEP]; + + let wide: Vec = path.as_os_str().encode_wide().chain(once(0)).collect(); + // don't need to do anything if path is small enaught. + if wide.len() < LEGACY_MAX_PATH { + return wide; + } + + if wide.starts_with(VERBATIM_PREFIX) || wide.starts_with(NT_PREFIX) { + return wide; + } + + if wide.starts_with(NETWORK_PREFIX) { + // network path from SMB + let mut tmp = Vec::from(UNC_PREFIX); + tmp.extend(&wide[2..]); + return tmp; + } else { + // if we came here, we aren't using network drive, so just prepend File namespace prefix + let mut tmp = Vec::from(VERBATIM_PREFIX); + tmp.extend(wide); + return tmp; + } +} + fn get_last_error() -> u32 { unsafe { GetLastError() } } diff --git a/src/lib.rs b/src/lib.rs index 283d5cf..52e4e95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,30 @@ impl DiskItem { path: &Path, apparent: bool, root_dev: u64, + ) -> Result> { + #[cfg(windows)] + { + // Solution for windows compressed files requires path to be absolute, see ffi.rs + // Basically it would be triggered only on top most invocation, + // and afterwards all path would be absolute. We do it here as it is relatively harmless + // but this would allow us fo it only once instead of each invocation of ffi::compressed_size + if apparent && !path.is_absolute() { + use path_absolutize::*; + let absolute_dir = path.absolutize()?; + return Self::analyze( + absolute_dir.as_ref(), + apparent, + root_dev, + ); + } + } + return Self::analyze(path, apparent, root_dev); + } + + fn analyze( + path: &Path, + apparent: bool, + root_dev: u64, ) -> Result> { let name = path .file_name() @@ -115,3 +139,6 @@ impl FileInfo { } } } + +#[cfg(test)] +mod tests; \ No newline at end of file diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 0000000..95bee25 --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,142 @@ +use crate::{DiskItem, FileInfo}; +// warn: don't remove `as &str` after macro invocation. +// It breaks type checker in Intellij Rust IDE +use const_format::concatcp; +use std::fs::File; +use std::io::Write; +use std::panic; +use std::path::Path; + +// be aware that rust runs tests in parallel, so tests should use different dirs + +const TEST_DATA_DIR: &str = "./test-data/"; + +const LONG_PATH_DIR: &str = "long-path/"; +//noinspection SpellCheckingInspection +const PATH_1: &str = "lll1/llllllll/llllllllllllllll/llllllllllllll/lllllllllllll/oooooo\ +oooooooo/oooooooooooooooo/nnnnnnnnn/nnnnnnnnnn/nnnnnnnn/nnnnnn/gggggggggg/p/a/tttt\ +tttttttttt/ttttttttttt/ttttttttttttttt/ttttttttt/tttthhh/2222222222/22222222222/222222222222/\ +3333333333333/33333333/33333333333/33333333333/333333333/33333333/44444444/44444444444444444/\ +5555555/55555555555/55555555/5555555555/5555555/5555555/555555/555555555/66666666666666666666/\ +77777777/7777777777/7777777777777/77777777777/7777777777/77777777/7777777/77777777/8888888888/\ +99999999/999999/99999999/99999999999/99999999/999999999/9999999999/"; + +const PATH_1_FULL: &str = concatcp!(TEST_DATA_DIR, LONG_PATH_DIR, PATH_1) as &str; +//noinspection SpellCheckingInspection +const PATH_2: &str = "lll2/llllllll/llllllllllllllll/llllllllllllll/lllllllllllll/oooooo\ +oooooooo/oooooooooooooooo/nnnnnnnnn/nnnnnnnnnn/nnnnnnnn/nnnnnn/gggggggggg/p/a/tttt\ +tttttttttt/ttttttttttt/ttttttttttttttt/ttttttttt/tttthhh/2222222222/22222222222/222222222222/\ +3333333333333/33333333/33333333333/33333333333/333333333/33333333/44444444/44444444444444444/\ +5555555/55555555555/55555555/5555555555/5555555/5555555/555555/555555555/66666666666666666666/\ +77777777/7777777777/7777777777777/77777777777/7777777777/77777777/7777777/77777777/8888888888/\ +99999999/999999/99999999/99999999999/99999999/999999999/9999999999/"; + +const PATH_2_FULL: &str = concatcp!(TEST_DATA_DIR, LONG_PATH_DIR, PATH_2) as &str; + +#[test] +fn test_max_path() { + // do not rename it into `_` it would cause immediate destrucion after creation + let _guard = CleanUpGuard { + path: concatcp!(TEST_DATA_DIR, LONG_PATH_DIR) as &str, + }; + + // Given + create_dir(PATH_1_FULL); + create_dir(PATH_2_FULL); + create_file(&concatcp!(PATH_1_FULL, "file.bin") as &str, 4096); + create_file(&concatcp!(PATH_2_FULL, "file.bin") as &str, 8192); + + // When + let test_path = Path::new(concatcp!(TEST_DATA_DIR, LONG_PATH_DIR) as &str); + let result = FileInfo::from_path(test_path, true); + + // Then + if let Result::Ok(FileInfo::Directory { volume_id, .. }) = result { + let result = DiskItem::from_analyze(test_path, true, volume_id); + let result = result.expect("Must collect data"); + assert_eq!(result.disk_size, 4096 + 8192); + let children = result.children.unwrap(); + assert_eq!(children.len(), 2); + assert_eq!(children[0].disk_size, 8192); + assert_eq!(children[1].disk_size, 4096); + } else { + panic!("Can not get file info"); + } +} + +#[test] +#[cfg(unix)] // It gives inconsistent results on windows +fn test_file_size() { + const DIR: &str = concatcp!(TEST_DATA_DIR, "test_file_size/") as &str; + // do not rename it into `_` it would cause immediate destrucion after creation + let _guard = CleanUpGuard { path: DIR }; + + // Given + // Such sizes is selected to be close to filesystem sector size, and to be maximally universal + // event for FS-es with sector as bif as 8KiB + create_file(&concatcp!(DIR, "foo/file.bin") as &str, 8192); + create_file(&concatcp!(DIR, "bar/file.bin") as &str, 8192 - 5); + + // When calculating with apparent size + let test_path = Path::new(DIR); + let result = FileInfo::from_path(test_path, true); + + // Then + if let Result::Ok(FileInfo::Directory { volume_id }) = result { + let result = DiskItem::from_analyze(test_path, true, volume_id); + let result = result.expect("Must collect data"); + assert_eq!(result.disk_size, 8192 + 8192); + let children = result.children.unwrap(); + assert_eq!(children.len(), 2); + // Both dirs should be rounded to sector size + assert_eq!(children[0].disk_size, 8192); + assert_eq!(children[1].disk_size, 8192); + } else { + panic!("Can not get file info"); + } + + // When calculating withOUT apparent size + let result = FileInfo::from_path(test_path, false); + + // Then + if let Result::Ok(FileInfo::Directory { volume_id }) = result { + let result = DiskItem::from_analyze(test_path, false, volume_id); + let result = result.expect("Must collect data"); + assert_eq!(result.disk_size, 8192 + 8192 - 5); + let children = result.children.unwrap(); + assert_eq!(children.len(), 2); + // Both dirs should be rounded to sector size + assert_eq!(children[0].disk_size, 8192); + assert_eq!(children[1].disk_size, 8192 - 5); + } else { + panic!("Can not get file info"); + } +} + +// Helper functions and cleanup code goes next + +fn create_dir(dir_path: &str) { + std::fs::create_dir_all(dir_path).unwrap(); +} + +fn create_file(file_path: &str, size: usize) { + let content = vec![0u8; size]; + let file_path = Path::new(file_path); + // ensure parent + std::fs::create_dir_all(file_path.parent().unwrap()).unwrap(); + + let mut file = File::create(file_path).unwrap(); + file.write(&content).unwrap(); +} + +/// Used to clean up test folder after test runs. +struct CleanUpGuard { + path: &'static str, +} + +impl Drop for CleanUpGuard { + fn drop(&mut self) { + // Teardown + std::fs::remove_dir_all(Path::new(self.path)).unwrap(); + } +} \ No newline at end of file