Skip to content

Commit

Permalink
Fix problem with MAX_PATH on windows + UnitTest
Browse files Browse the repository at this point in the history
* Manually implemented windows File Namespace name generation

* added some unit tests

closes scullionw#5
  • Loading branch information
Mart-Bogdan committed Apr 23, 2022
1 parent aafe068 commit bb106d2
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 4 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,9 @@ $RECYCLE.BIN/
# Windows shortcuts
*.lnk

# End of https://www.gitignore.io/api/linux,macos,windows
# End of https://www.gitignore.io/api/linux,macos,windows

# Jetrbrains IDE (Intellij RUST/Clion/Idea)
.idea/

test-data/
48 changes: 47 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dirstat-rs"
version = "0.3.7"
version = "0.3.8"
authors = ["scullionw <[email protected]>"]
edition = "2018"
license = "MIT"
Expand All @@ -19,8 +19,12 @@ atty = "0.2.14"
serde = { version = "1.0.131", features = ["derive"] }
serde_json = "1.0.73"

[dev-dependencies]
const_format = "0.2.22"

[target.'cfg(windows)'.dependencies]
winapi-util = "0.1.2"
path-absolutize = "3.0.12"

[target.'cfg(windows)'.dependencies.winapi]
version = "0.3.7"
Expand All @@ -35,3 +39,4 @@ incremental = false
bench = false
path = "src/bin/main.rs"
name = "ds"

50 changes: 49 additions & 1 deletion src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use winapi::um::fileapi::GetCompressedFileSizeW;
use winapi::um::fileapi::INVALID_FILE_SIZE;

pub fn compressed_size(path: &Path) -> Result<u64, Box<dyn Error>> {
let wide: Vec<u16> = path.as_os_str().encode_wide().chain(once(0)).collect();
let wide = path_to_u16s(path);
let mut high: u32 = 0;

// TODO: Deal with max path size
Expand All @@ -27,6 +27,54 @@ pub fn compressed_size(path: &Path) -> Result<u64, Box<dyn Error>> {
Ok(u64::from(high) << 32 | u64::from(low))
}

/// inspired by [fn maybe_verbatim(path: &Path)](https://github.com/rust-lang/rust/blob/1f4681ad7a132755452c32a987ad0f0d075aa6aa/library/std/src/sys/windows/path.rs#L170)
/// But function from std is calling winapi GetFullPathNameW in case if path is longer than 248.
/// We are more optimistic and expect all path being absolute, so no API calls from this function.
fn path_to_u16s(path: &Path) -> Vec<u16> {
// Normally the MAX_PATH is 260 UTF-16 code units (including the NULL).
// However, for APIs such as CreateDirectory[1], the limit is 248.
//
// [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters
const LEGACY_MAX_PATH: usize = 248;
// UTF-16 encoded code points, used in parsing and building UTF-16 paths.
// All of these are in the ASCII range so they can be cast directly to `u16`.
const SEP: u16 = b'\\' as _;
const QUERY: u16 = b'?' as _;
const U: u16 = b'U' as _;
const N: u16 = b'N' as _;
const C: u16 = b'C' as _;
// \\?\
const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP];
// \??\
const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP];
// \\?\UNC\
const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP];
// \\
const NETWORK_PREFIX: &[u16] = &[SEP, SEP];

let wide: Vec<u16> = path.as_os_str().encode_wide().chain(once(0)).collect();
// don't need to do anything if path is small enaught.
if wide.len() < LEGACY_MAX_PATH {
return wide;
}

if wide.starts_with(VERBATIM_PREFIX) || wide.starts_with(NT_PREFIX) {
return wide;
}

if wide.starts_with(NETWORK_PREFIX) {
// network path from SMB
let mut tmp = Vec::from(UNC_PREFIX);
tmp.extend(&wide[2..]);
return tmp;
} else {
// if we came here, we aren't using network drive, so just prepend File namespace prefix
let mut tmp = Vec::from(VERBATIM_PREFIX);
tmp.extend(wide);
return tmp;
}
}

fn get_last_error() -> u32 {
unsafe { GetLastError() }
}
17 changes: 17 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,25 @@ pub struct DiskItem {
}

impl DiskItem {
/// Analyzes provided path and returns tree structure of analyzed DiskItems
pub fn from_analyze(
path: &Path,
apparent: bool,
root_dev: u64,
) -> Result<Self, Box<dyn Error>> {
#[cfg(windows)]
{
// Solution for windows compressed files requires path to be absolute, see ffi.rs
// Basically it would be triggered only on top most invocation,
// and afterwards all path would be absolute. We do it here as it is relatively harmless
// but this would allow us fo it only once instead of each invocation of ffi::compressed_size
if apparent && !path.is_absolute() {
use path_absolutize::*;
let absolute_dir = path.absolutize()?;
return Self::from_analyze(absolute_dir.as_ref(), apparent, root_dev);
}
}

let name = path
.file_name()
.unwrap_or(&OsStr::new("."))
Expand Down Expand Up @@ -115,3 +129,6 @@ impl FileInfo {
}
}
}

#[cfg(test)]
mod tests;
140 changes: 140 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
use crate::{DiskItem, FileInfo};
// warn: don't remove `as &str` after macro invocation.
// It breaks type checker in Intellij Rust IDE
use const_format::concatcp;
use std::fs::File;
use std::io::Write;
use std::panic;
use std::path::Path;

// be aware that rust runs tests in parallel, so tests should use different dirs

const TEST_DATA_DIR: &str = "./test-data/";

const LONG_PATH_DIR: &str = "long-path/";
//noinspection SpellCheckingInspection
const PATH_1: &str = "lll1/llllllll/llllllllllllllll/llllllllllllll/lllllllllllll/oooooo\
oooooooo/oooooooooooooooo/nnnnnnnnn/nnnnnnnnnn/nnnnnnnn/nnnnnn/gggggggggg/p/a/tttt\
tttttttttt/ttttttttttt/ttttttttttttttt/ttttttttt/tttthhh/2222222222/22222222222/222222222222/\
3333333333333/33333333/33333333333/33333333333/333333333/33333333/44444444/44444444444444444/\
5555555/55555555555/55555555/5555555555/5555555/5555555/555555/555555555/66666666666666666666/\
77777777/7777777777/7777777777777/77777777777/7777777777/77777777/7777777/77777777/8888888888/\
99999999/999999/99999999/99999999999/99999999/999999999/9999999999/";

const PATH_1_FULL: &str = concatcp!(TEST_DATA_DIR, LONG_PATH_DIR, PATH_1) as &str;
//noinspection SpellCheckingInspection
const PATH_2: &str = "lll2/llllllll/llllllllllllllll/llllllllllllll/lllllllllllll/oooooo\
oooooooo/oooooooooooooooo/nnnnnnnnn/nnnnnnnnnn/nnnnnnnn/nnnnnn/gggggggggg/p/a/tttt\
tttttttttt/ttttttttttt/ttttttttttttttt/ttttttttt/tttthhh/2222222222/22222222222/222222222222/\
3333333333333/33333333/33333333333/33333333333/333333333/33333333/44444444/44444444444444444/\
5555555/55555555555/55555555/5555555555/5555555/5555555/555555/555555555/66666666666666666666/\
77777777/7777777777/7777777777777/77777777777/7777777777/77777777/7777777/77777777/8888888888/\
99999999/999999/99999999/99999999999/99999999/999999999/9999999999/";

const PATH_2_FULL: &str = concatcp!(TEST_DATA_DIR, LONG_PATH_DIR, PATH_2) as &str;

#[test]
fn test_max_path() {
// do not rename it into `_` it would cause immediate destrucion after creation
let _guard = CleanUpGuard {
path: concatcp!(TEST_DATA_DIR, LONG_PATH_DIR) as &str,
};

// Given
create_dir(PATH_1_FULL);
create_dir(PATH_2_FULL);
create_file(&concatcp!(PATH_1_FULL, "file.bin") as &str, 4096);
create_file(&concatcp!(PATH_2_FULL, "file.bin") as &str, 8192);

// When
let test_path = Path::new(concatcp!(TEST_DATA_DIR, LONG_PATH_DIR) as &str);
let result = FileInfo::from_path(test_path, true);

// Then
if let Result::Ok(FileInfo::Directory { volume_id }) = result {
let result = DiskItem::from_analyze(test_path, true, volume_id);
let result = result.expect("Must collect data");
assert_eq!(result.disk_size, 4096 + 8192);
let children = result.children.unwrap();
assert_eq!(children.len(), 2);
assert_eq!(children[0].disk_size, 8192);
assert_eq!(children[1].disk_size, 4096);
} else {
panic!("Can not get file info");
}
}

#[test]
#[cfg(unix)] // It gives inconsistent results on windows
fn test_file_size() {
const DIR: &str = concatcp!(TEST_DATA_DIR, "test_file_size/") as &str;
// do not rename it into `_` it would cause immediate destrucion after creation
let _guard = CleanUpGuard { path: DIR };

// Given
create_file(&concatcp!(DIR, "foo/file.bin") as &str, 8192);
create_file(&concatcp!(DIR, "bar/file.bin") as &str, 8192 - 5);

// When calculating with apparent size
let test_path = Path::new(DIR);
let result = FileInfo::from_path(test_path, true);

// Then
if let Result::Ok(FileInfo::Directory { volume_id }) = result {
let result = DiskItem::from_analyze(test_path, true, volume_id);
let result = result.expect("Must collect data");
assert_eq!(result.disk_size, 8192 + 8192);
let children = result.children.unwrap();
assert_eq!(children.len(), 2);
// Both dirs should be rounded to sector size
assert_eq!(children[0].disk_size, 8192);
assert_eq!(children[1].disk_size, 8192);
} else {
panic!("Can not get file info");
}

// When calculating withOUT apparent size
let result = FileInfo::from_path(test_path, false);

// Then
if let Result::Ok(FileInfo::Directory { volume_id }) = result {
let result = DiskItem::from_analyze(test_path, false, volume_id);
let result = result.expect("Must collect data");
assert_eq!(result.disk_size, 8192 + 8192 - 5);
let children = result.children.unwrap();
assert_eq!(children.len(), 2);
// Both dirs should be rounded to sector size
assert_eq!(children[0].disk_size, 8192);
assert_eq!(children[1].disk_size, 8192 - 5);
} else {
panic!("Can not get file info");
}
}

// Helper functions and cleanup code goes next

fn create_dir(dir_path: &str) {
std::fs::create_dir_all(dir_path).unwrap();
}

fn create_file(file_path: &str, size: usize) {
let content = vec![0u8; size];
let file_path = Path::new(file_path);
// ensure parent
std::fs::create_dir_all(file_path.parent().unwrap()).unwrap();

let mut file = File::create(file_path).unwrap();
file.write(&content).unwrap();
}

/// Used to clean up test folder after test runs.
struct CleanUpGuard {
path: &'static str,
}

impl Drop for CleanUpGuard {
fn drop(&mut self) {
// Teardown
std::fs::remove_dir_all(Path::new(self.path)).unwrap();
}
}

0 comments on commit bb106d2

Please sign in to comment.