diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 06667066d..106f9dccd 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -87,7 +87,7 @@ jobs: env: RUST_BACKTRACE: 1 - name: cargo test with llvm-cov - run: cargo llvm-cov --target ${{matrix.target}} ${{ matrix.features }} --lcov --output-path lcov.info --ignore-filename-regex "bzip2recover.rs|bzip2\.rs|test-libbzip2" --workspace + run: cargo llvm-cov --target ${{matrix.target}} ${{ matrix.features }} --lcov --output-path lcov.info --ignore-filename-regex "bzip2\.rs|test-libbzip2" --workspace env: RUST_BACKTRACE: 1 if: matrix.os != 'windows-2022' diff --git a/Cargo.lock b/Cargo.lock index ab66e4c83..0589ba9fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + [[package]] name = "bzip2-sys" version = "0.1.11+1.0.8" @@ -17,8 +23,10 @@ dependencies = [ name = "c2rust_out" version = "0.0.0" dependencies = [ + "crc32fast", "libbzip2-rs-sys", "libc", + "tempfile", ] [[package]] @@ -30,6 +38,37 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" + [[package]] name = "libbzip2-rs-sys" version = "0.0.0" @@ -43,18 +82,56 @@ version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + [[package]] name = "pkg-config" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "rustix" +version = "0.38.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "tempfile" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "test-libbzip2-rs-sys" version = "0.0.0" @@ -63,3 +140,85 @@ dependencies = [ "libbzip2-rs-sys", "libc", ] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml index e6cd9ebd9..f13b0b1ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,3 +30,7 @@ libbzip2-rs-sys = { path = "libbzip2-rs-sys/", default-features = false } [dependencies] libc = "0.2" libbzip2-rs-sys = { path = "libbzip2-rs-sys/", default-features = false } + +[dev-dependencies] +tempfile = "3.13.0" +crc32fast = "=1.4.2" diff --git a/bzip2recover.rs b/bzip2recover.rs index 0fa7a941e..2071ca9ac 100644 --- a/bzip2recover.rs +++ b/bzip2recover.rs @@ -116,57 +116,68 @@ impl BitStream { } } -fn emit_read_error(program_name: &Path, in_filename: &Path, io_error: std::io::Error) -> ExitCode { - eprintln!( - "{}: I/O error reading `{}', possible reason follows.", - program_name.display(), - in_filename.display(), - ); - - eprintln!("{}", io_error); - - eprintln!( - "{}: warning: output file(s) may be incomplete.", - program_name.display(), - ); - - ExitCode::FAILURE +struct EmitError<'a> { + program_name: &'a Path, + in_filename: &'a Path, + error: Error, } -fn emit_write_error(program_name: &Path, in_filename: &Path, io_error: std::io::Error) -> ExitCode { - eprintln!( - "{}: I/O error writing `{}', possible reason follows.", - program_name.display(), - in_filename.display(), - ); +impl core::fmt::Display for EmitError<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.error { + Error::Reading(ref io_error) => { + f.write_fmt(format_args!( + "{}: I/O error reading `{}', possible reason follows.\n", + self.program_name.display(), + self.in_filename.display(), + ))?; - eprintln!("{}", io_error); + f.write_fmt(format_args!("{}\n", io_error))?; - eprintln!( - "{}: warning: output file(s) may be incomplete.", - program_name.display() - ); + f.write_fmt(format_args!( + "{}: warning: output file(s) may be incomplete.\n", + self.program_name.display(), + ))?; - ExitCode::FAILURE -} - -fn emit_too_many_blocks( - program_name: &Path, - in_filename: &Path, - max_handled_blocks: usize, -) -> ExitCode { - let program_name = program_name.display(); + Ok(()) + } + Error::Writing(ref io_error) => { + f.write_fmt(format_args!( + "{}: I/O error writing `{}', possible reason follows.\n", + self.program_name.display(), + self.in_filename.display(), + ))?; - eprintln!( - "{}: `{}' appears to contain more than {max_handled_blocks} blocks", - program_name, - in_filename.display(), - ); + f.write_fmt(format_args!("{}\n", io_error))?; - eprintln!("{program_name}: and cannot be handled. To fix, increase"); - eprintln!("{program_name}: BZ_MAX_HANDLED_BLOCKS in bzip2recover.rs, and recompile."); + f.write_fmt(format_args!( + "{}: warning: output file(s) may be incomplete.\n", + self.program_name.display(), + ))?; - ExitCode::FAILURE + Ok(()) + } + Error::TooManyBlocks(max_handled_blocks) => { + let program_name = self.program_name.display(); + + f.write_fmt(format_args!( + "{}: `{}' appears to contain more than {max_handled_blocks} blocks\n", + program_name, + self.in_filename.display(), + ))?; + + f.write_fmt(format_args!( + "{program_name}: and cannot be handled. To fix, increase\n" + ))?; + f.write_fmt(format_args!( + "{program_name}: BZ_MAX_HANDLED_BLOCKS in bzip2recover.rs, and recompile.\n" + ))?; + + Ok(()) + } + Error::Fatal => Ok(()), + } + } } fn main_help(program_name: &Path, in_filename: &Path) -> Result<(), Error> { @@ -387,13 +398,101 @@ pub fn main() -> ExitCode { match main_help(&program_name, &in_filename) { Ok(()) => ExitCode::SUCCESS, - Err(error) => match error { - Error::Reading(io_error) => emit_read_error(&program_name, &in_filename, io_error), - Error::Writing(io_error) => emit_write_error(&program_name, &in_filename, io_error), - Error::TooManyBlocks(handled) => { - emit_too_many_blocks(&program_name, &in_filename, handled) - } - Error::Fatal => ExitCode::FAILURE, - }, + Err(error) => { + let emit_error = EmitError { + program_name: &program_name, + in_filename: &in_filename, + error, + }; + + eprint!("{}", emit_error); + + ExitCode::FAILURE + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn read_error() { + use std::fmt::Write; + + let program_name = Path::new("/foo/bar/bzip2recover"); + let in_filename = Path::new("$garbage"); + + let io_error = std::fs::File::open(in_filename).unwrap_err(); + let emit_error = EmitError { + program_name, + in_filename, + error: Error::Reading(io_error), + }; + + let mut buf = String::new(); + write!(&mut buf, "{}", emit_error).unwrap(); + + assert_eq!( + buf, + concat!( + "/foo/bar/bzip2recover: I/O error reading `$garbage', possible reason follows.\n", + "No such file or directory (os error 2)\n", + "/foo/bar/bzip2recover: warning: output file(s) may be incomplete.\n" + ) + ); + } + + #[test] + fn write() { + use std::fmt::Write; + + let program_name = Path::new("/foo/bar/bzip2recover"); + let in_filename = Path::new("$garbage"); + + let io_error = std::fs::File::open(in_filename).unwrap_err(); + let emit_error = EmitError { + program_name, + in_filename, + error: Error::Writing(io_error), + }; + + let mut buf = String::new(); + write!(&mut buf, "{}", emit_error).unwrap(); + + assert_eq!( + buf, + concat!( + "/foo/bar/bzip2recover: I/O error writing `$garbage', possible reason follows.\n", + "No such file or directory (os error 2)\n", + "/foo/bar/bzip2recover: warning: output file(s) may be incomplete.\n" + ) + ); + } + + #[test] + fn too_many_blocks() { + use std::fmt::Write; + + let program_name = Path::new("/foo/bar/bzip2recover"); + let in_filename = Path::new("$garbage"); + + let emit_error = EmitError { + program_name, + in_filename, + error: Error::TooManyBlocks(42), + }; + + let mut buf = String::new(); + write!(&mut buf, "{}", emit_error).unwrap(); + + assert_eq!( + buf, + concat!( + "/foo/bar/bzip2recover: `$garbage' appears to contain more than 42 blocks\n", + "/foo/bar/bzip2recover: and cannot be handled. To fix, increase\n", + "/foo/bar/bzip2recover: BZ_MAX_HANDLED_BLOCKS in bzip2recover.rs, and recompile.\n" + ) + ); } } diff --git a/tests/recover.rs b/tests/recover.rs new file mode 100644 index 000000000..afc3fc76d --- /dev/null +++ b/tests/recover.rs @@ -0,0 +1,269 @@ +use std::env; +use std::fs::File; +use std::io::Write; +use std::os::unix::fs::FileExt; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +/// Useful to test with the C binary +fn bzip2recover_binary() -> &'static str { + env!("CARGO_BIN_EXE_bzip2recover") +} + +fn run_bzip2recover(path: Option<&Path>) -> std::process::Output { + let mut cmd; + match env::var("RUNNER") { + Ok(runner) if !runner.is_empty() => { + let mut runner_args = runner.split(' '); + cmd = Command::new(runner_args.next().unwrap()); + cmd.args(runner_args); + cmd.arg(bzip2recover_binary()); + } + _ => cmd = Command::new(bzip2recover_binary()), + } + + if let Some(path) = path { + cmd.arg(path.as_os_str()).stdout(Stdio::piped()); + } + + match cmd.output() { + Ok(output) => output, + Err(err) => panic!("Running {cmd:?} failed with {err:?}"), + } +} + +fn checksum(path: &Path) -> u32 { + crc32fast::hash(&std::fs::read(path).unwrap()) +} + +#[test] +fn basic_valid_file() { + let tmp = tempfile::tempdir().unwrap(); + let tmp_path_str = tmp.path().display().to_string(); + + let file_path = tmp.path().join("sample1.bz2"); + let mut file = File::create(&file_path).unwrap(); + + file.write_all(include_bytes!("input/quick/sample2.bz2")) + .unwrap(); + + drop(file); + + let output = run_bzip2recover(Some(&file_path)); + + assert!(output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr) + .replace(&tmp_path_str, "$TEMPDIR") + .replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: searching for block boundaries ...\n", + " block 1 runs from 80 to 544887\n", + " block 2 runs from 544936 to 589771\n", + "bzip2recover: splitting into blocks\n", + " writing block 1 to `$TEMPDIR/rec00001sample1.bz2' ...\n", + " writing block 2 to `$TEMPDIR/rec00002sample1.bz2' ...\n", + "bzip2recover: finished\n" + ) + ); + + assert_eq!( + checksum(&tmp.path().join("rec00001sample1.bz2")), + 2309536424 + ); + assert_eq!( + checksum(&tmp.path().join("rec00002sample1.bz2")), + 1823861694 + ); +} + +#[test] +fn basic_invalid_file() { + let tmp = tempfile::tempdir().unwrap(); + let tmp_path_str = tmp.path().display().to_string(); + + let file_path = tmp.path().join("sample1.bz2"); + let mut file = File::create(&file_path).unwrap(); + + file.write_all(include_bytes!("input/quick/sample2.bz2")) + .unwrap(); + + // write some garbage data at the start of the second block + file.write_all_at(&[0xAA, 0xBB, 0xCC], 544936 + 64).unwrap(); + + drop(file); + + let output = run_bzip2recover(Some(&file_path)); + + assert!(output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr) + .replace(&tmp_path_str, "$TEMPDIR") + .replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: searching for block boundaries ...\n", + " block 1 runs from 80 to 544887\n", + " block 2 runs from 544936 to 589771\n", + " block 3 runs from 589820 to 4360024 (incomplete)\n", + "bzip2recover: splitting into blocks\n", + " writing block 1 to `$TEMPDIR/rec00001sample1.bz2' ...\n", + " writing block 2 to `$TEMPDIR/rec00002sample1.bz2' ...\n", + "bzip2recover: finished\n" + ) + ); + + assert_eq!( + checksum(&tmp.path().join("rec00001sample1.bz2")), + 2309536424 + ); + assert_eq!( + checksum(&tmp.path().join("rec00002sample1.bz2")), + 1823861694 + ); + + assert!(!tmp.path().join("rec00003sample1.bz2").exists()); +} + +#[test] +fn no_input_file() { + let output = run_bzip2recover(None); + + assert!(!output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr).replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: usage is `bzip2recover damaged_file_name'.\n", + "\trestrictions on size of recovered file: None\n" + ) + ); +} + +#[test] +fn nonexistent_input_file() { + let output = run_bzip2recover(Some(Path::new("does_not_exist.txt"))); + + assert!(!output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr).replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: can't read `does_not_exist.txt'\n", + ) + ); +} + +#[test] +fn random_input_data() { + let tmp = tempfile::tempdir().unwrap(); + let tmp_path_str = tmp.path().display().to_string(); + + let file_path = tmp.path().join("sample1.bz2"); + let mut file = File::create(&file_path).unwrap(); + + file.write_all(include_bytes!("input/quick/sample1.ref")) + .unwrap(); + + drop(file); + + let output = run_bzip2recover(Some(&file_path)); + + assert!(!output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr) + .replace(&tmp_path_str, "$TEMPDIR") + .replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: searching for block boundaries ...\n", + "bzip2recover: sorry, I couldn't find any block boundaries.\n" + ) + ); +} + +#[test] +fn does_not_overwrite_recovered_files() { + let tmp = tempfile::tempdir().unwrap(); + let tmp_path_str = tmp.path().display().to_string(); + + let file_path = tmp.path().join("sample1.bz2"); + let mut file = File::create(&file_path).unwrap(); + + file.write_all(include_bytes!("input/quick/sample2.bz2")) + .unwrap(); + + drop(file); + + let output = run_bzip2recover(Some(&file_path)); + + assert!(output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr) + .replace(&tmp_path_str, "$TEMPDIR") + .replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: searching for block boundaries ...\n", + " block 1 runs from 80 to 544887\n", + " block 2 runs from 544936 to 589771\n", + "bzip2recover: splitting into blocks\n", + " writing block 1 to `$TEMPDIR/rec00001sample1.bz2' ...\n", + " writing block 2 to `$TEMPDIR/rec00002sample1.bz2' ...\n", + "bzip2recover: finished\n" + ) + ); + + // now we run the same command. The output files are only created when they don't already + // exist. + + let output = run_bzip2recover(Some(&file_path)); + + assert!(!output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr) + .replace(&tmp_path_str, "$TEMPDIR") + .replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: searching for block boundaries ...\n", + " block 1 runs from 80 to 544887\n", + " block 2 runs from 544936 to 589771\n", + "bzip2recover: splitting into blocks\n", + " writing block 1 to `$TEMPDIR/rec00001sample1.bz2' ...\n", + "bzip2recover: can't write `$TEMPDIR/rec00001sample1.bz2'\n", + ) + ); +} + +#[test] +fn very_long_file_name() { + let file_path = PathBuf::from("NaN".repeat(1000) + " batman!.txt"); + let output = run_bzip2recover(Some(&file_path)); + + assert!(!output.status.success()); + assert!(output.stdout.is_empty()); + + assert_eq!( + String::from_utf8_lossy(&output.stderr).replace(bzip2recover_binary(), "bzip2recover"), + concat!( + "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n", + "bzip2recover: supplied filename is suspiciously (>= 3012 chars) long. Bye!\n", + ) + ); +}