Skip to content

Commit

Permalink
handle zero bytes in input properly
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Aug 15, 2024
1 parent 3b14e8d commit e9b41bc
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 15 deletions.
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[toolchain]
channel = "stable"
channel = "nightly-2024-08-14"
components = ["rust-src", "rustfmt", "clippy"]
profile = "minimal"
13 changes: 10 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,14 @@ impl Symbol {
// For little-endian platforms, this counts the number of *trailing* zeros
let null_bytes = (numeric.leading_zeros() >> 3) as usize;

size_of::<Self>() - null_bytes
// Special case handling of a symbol with all-zeros. This is actually
// a 1-byte symbol containing 0x00.
let len = size_of::<Self>() - null_bytes;
if len == 0 {
1
} else {
len
}
}

/// Returns true if the symbol does not encode any bytes.
Expand Down Expand Up @@ -298,9 +305,9 @@ impl SymbolTable {
///
/// # Safety
///
/// `in_ptr` and `out_ptr` must never be NULL or otherwise point to invalid memory.
/// `out_ptr` must never be NULL or otherwise point to invalid memory.
// NOTE(aduffy): uncomment this line to make the function appear in profiles
// #[inline(never)]
#[inline(never)]
pub(crate) unsafe fn compress_word(&self, word: u64, out_ptr: *mut u8) -> (usize, usize) {
// Speculatively write the first byte of `word` at offset 1. This is necessary if it is an escape, and
// if it isn't, it will be overwritten anyway.
Expand Down
22 changes: 11 additions & 11 deletions tests/correctness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ fn test_train_on_empty() {
);
}

// #[test]
// fn test_zeros() {
// println!("training zeros");
// let training_data: Vec<u8> = vec![0, 1, 2, 3, 4];
// let trained = fsst_rs::train(&training_data);
// println!("compressing with zeros");
// let compressed = trained.compress(&[0, 4]);
// println!("decomperssing with zeros");
// assert_eq!(trained.decompress(&compressed), &[0, 4]);
// println!("done");
// }
#[test]
fn test_zeros() {
println!("training zeros");
let training_data: Vec<u8> = vec![0, 1, 2, 3, 4];
let trained = fsst_rs::train(&training_data);
println!("compressing with zeros");
let compressed = trained.compress(&[0, 4]);
println!("decomperssing with zeros");
assert_eq!(trained.decompress(&compressed), &[0, 4]);
println!("done");
}

#[test]
fn test_large() {
Expand Down

0 comments on commit e9b41bc

Please sign in to comment.