From 358ef31e277c0c1186f0cb1b863b4a4ee9b237d3 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 3 Sep 2024 11:04:38 -0400 Subject: [PATCH] better benchmarks, cleanup --- benches/micro.rs | 65 ++++++++++++++++++++++++++---------------------- src/builder.rs | 16 +++++++++--- src/lib.rs | 37 +++++++++++++++++---------- 3 files changed, 72 insertions(+), 46 deletions(-) diff --git a/benches/micro.rs b/benches/micro.rs index e5a15f5..960dab2 100644 --- a/benches/micro.rs +++ b/benches/micro.rs @@ -4,7 +4,11 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use fsst::{CompressorBuilder, Symbol}; -fn bench1(c: &mut Criterion) { +fn one_megabyte(seed: &[u8]) -> Vec { + seed.iter().copied().cycle().take(1024 * 1024).collect() +} + +fn bench_compress(c: &mut Criterion) { let mut group = c.benchmark_group("compress-overhead"); group.bench_function("compress-word", |b| { let mut compressor = CompressorBuilder::new(); @@ -21,8 +25,7 @@ fn bench1(c: &mut Criterion) { // Reusable memory to hold outputs let mut output_buf: Vec = Vec::with_capacity(1_024 * 1024 * 2); - group.throughput(Throughput::Bytes(8u64)); - group.bench_function("compress_fastpath", |b| { + group.bench_function("compress-hashtab", |b| { // We create a symbol table and an input that will execute exactly one iteration, // in the fast compress_word pathway. let mut compressor = CompressorBuilder::new(); @@ -30,26 +33,28 @@ fn bench1(c: &mut Criterion) { let compressor = compressor.build(); b.iter(|| unsafe { - compressor.compress_into(b"abcdefgh", &mut output_buf); + compressor.compress_into( + b"abcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefgh", + &mut output_buf, + ); }); }); - group.throughput(Throughput::Bytes(4u64)); - group.bench_function("compress_slowpath", |b| { + group.bench_function("compress-twobytes", |b| { // We create a symbol table and an input that will execute exactly one iteration, - // but it misses the compress_word and needs to go on the slow path. + // in the fast compress_word pathway. let mut compressor = CompressorBuilder::new(); - compressor.insert(Symbol::from_slice(&[b'a', b'b', b'c', b'd', 0, 0, 0, 0]), 4); + compressor.insert(Symbol::from_slice(&[b'a', b'b', 0, 0, 0, 0, 0, 0]), 8); let compressor = compressor.build(); b.iter(|| unsafe { - compressor.compress_into(b"abcd", &mut output_buf); + compressor.compress_into(b"abababababababab", &mut output_buf); }); }); group.finish(); let mut group = c.benchmark_group("cf=1"); - let test_string = b"aaaaaaaa"; + let test_string = one_megabyte(b"aaaaaaaa"); group.throughput(Throughput::Bytes(test_string.len() as u64)); group.bench_function("compress", |b| { let mut compressor = CompressorBuilder::new(); @@ -57,29 +62,18 @@ fn bench1(c: &mut Criterion) { let compressor = compressor.build(); b.iter(|| unsafe { - compressor.compress_into(test_string, &mut output_buf); + compressor.compress_into(&test_string, &mut output_buf); }) }); group.finish(); let mut group = c.benchmark_group("cf=2"); - let test_string = { - // 1MB of data - let mut out = Vec::with_capacity(1024 * 1024); - - for _ in 0..(out.capacity() / 2) { - out.push(b'a'); - out.push(b'b'); - } - - out - }; - - assert!(test_string.len() == 1024 * 1024); + let test_string = one_megabyte(b"ab"); group.throughput(Throughput::Bytes(test_string.len() as u64)); group.bench_function("compress", |b| { let mut compressor = CompressorBuilder::new(); + // This outputs two codes for every 4 bytes of text. assert!(compressor.insert(Symbol::from_slice(&[b'a', 0, 0, 0, 0, 0, 0, 0]), 1)); assert!(compressor.insert(Symbol::from_slice(&[b'b', b'a', b'b', 0, 0, 0, 0, 0]), 3)); let compressor = compressor.build(); @@ -91,7 +85,7 @@ fn bench1(c: &mut Criterion) { group.finish(); let mut group = c.benchmark_group("cf=4"); - let test_string = b"abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"; + let test_string = one_megabyte(b"abcd"); group.throughput(Throughput::Bytes(test_string.len() as u64)); group.bench_function("compress", |b| { let mut compressor = CompressorBuilder::new(); @@ -99,13 +93,13 @@ fn bench1(c: &mut Criterion) { let compressor = compressor.build(); b.iter(|| unsafe { - compressor.compress_into(test_string, &mut output_buf); + compressor.compress_into(&test_string, &mut output_buf); }) }); group.finish(); let mut group = c.benchmark_group("cf=8"); - let test_string = b"abcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefgh"; + let test_string = one_megabyte(b"abcdefgh"); group.throughput(Throughput::Bytes(test_string.len() as u64)); group.bench_function("compress", |b| { let mut compressor = CompressorBuilder::new(); @@ -113,13 +107,24 @@ fn bench1(c: &mut Criterion) { let compressor = compressor.build(); b.iter(|| unsafe { - compressor.compress_into(test_string, &mut output_buf); + compressor.compress_into(&test_string, &mut output_buf); }) }); + + group.bench_function("decompress", |b| { + let mut compressor = CompressorBuilder::new(); + assert!(compressor.insert(Symbol::from_slice(b"abcdefgh"), 8)); + let compressor = compressor.build(); + let compressed = compressor.compress(&test_string); + + let decompressor = compressor.decompressor(); + + b.iter(|| decompressor.decompress(&compressed)) + }); group.finish(); let _ = std::hint::black_box(output_buf); } -criterion_group!(bench_toy, bench1); -criterion_main!(bench_toy); +criterion_group!(bench_micro, bench_compress); +criterion_main!(bench_micro); diff --git a/src/builder.rs b/src/builder.rs index 697f654..dc4cde1 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -384,8 +384,11 @@ impl CompressorBuilder { /// /// Returns the `suffix_lim`, which is the index of the two-byte code before where we know /// there are no longer suffixies in the symbol table. + /// + /// Also returns the lengths vector, which is of length `n_symbols` and contains the + /// length for each of the values. #[inline(never)] - fn finalize(&mut self) -> u8 { + fn finalize(&mut self) -> (u8, Vec) { // Create a cumulative sum of each of the elements of the input line numbers. // Do a map that includes the previously seen value as well. // Regroup symbols based on their lengths. @@ -486,7 +489,13 @@ impl CompressorBuilder { // Reset values in the hash table as well. self.lossy_pht.renumber(&new_codes); - has_suffix_code + // Pre-compute the lengths + let mut lengths = Vec::with_capacity(self.n_symbols as usize); + for symbol in &self.symbols { + lengths.push(symbol.len() as u8); + } + + (has_suffix_code, lengths) } /// Build into the final hash table. @@ -494,10 +503,11 @@ impl CompressorBuilder { // finalize the symbol table by inserting the codes_twobyte values into // the relevant parts of the `codes_onebyte` set. - let has_suffix_code = self.finalize(); + let (has_suffix_code, lengths) = self.finalize(); Compressor { symbols: self.symbols, + lengths, n_symbols: self.n_symbols, has_suffix_code, codes_two_byte: self.codes_two_byte, diff --git a/src/lib.rs b/src/lib.rs index dd69424..65c0522 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -192,12 +192,14 @@ impl Code { /// This corresponds to the maximum code with a length of 1. pub const UNUSED: Self = Code(FSST_CODE_MASK + (1 << 12)); - /// Create a new code for a symbol of given length + /// Create a new code for a symbol of given length. fn new_symbol(code: u8, len: usize) -> Self { Self(code as u16 + ((len as u16) << FSST_LEN_BITS)) } /// Code for a new symbol during the building phase. + /// + /// The code is remapped from 0..254 to 256...510. fn new_symbol_building(code: u8, len: usize) -> Self { Self(code as u16 + 256 + ((len as u16) << FSST_LEN_BITS)) } @@ -236,13 +238,11 @@ impl Debug for Code { /// Decompressor uses a symbol table to take a stream of 8-bit codes into a string. #[derive(Clone)] pub struct Decompressor<'a> { - /// Table mapping codes to symbols. - /// - /// The first 256 slots are escapes. The following slots (up to 254) - /// are for symbols with actual codes. - /// - /// This physical layout is important so that we can do straight-line execution in the decompress method. + /// Slice mapping codes to symbols. pub(crate) symbols: &'a [Symbol], + + /// Slice containing the length of each symbol in the `symbols` slice. + pub(crate) lengths: &'a [u8], } impl<'a> Decompressor<'a> { @@ -251,13 +251,13 @@ impl<'a> Decompressor<'a> { /// # Panics /// /// If the provided symbol table has length greater than 256 - pub fn new(symbols: &'a [Symbol]) -> Self { + pub fn new(symbols: &'a [Symbol], lengths: &'a [u8]) -> Self { assert!( symbols.len() <= 255, "symbol table cannot have size exceeding 255" ); - Self { symbols } + Self { symbols, lengths } } /// Decompress a byte slice that was previously returned by a compressor using @@ -283,6 +283,7 @@ impl<'a> Decompressor<'a> { in_pos += 1; } else { let symbol = self.symbols[code as usize]; + let length = self.lengths[code as usize]; // SAFETY: out_pos is always 8 bytes or more from the end of decoded buffer unsafe { let write_addr = ptr.byte_offset(out_pos as isize) as *mut u64; @@ -290,7 +291,7 @@ impl<'a> Decompressor<'a> { write_addr.write_unaligned(symbol.as_u64()); } in_pos += 1; - out_pos += symbol.len(); + out_pos += length as usize; } } @@ -329,6 +330,9 @@ pub struct Compressor { /// Table mapping codes to symbols. pub(crate) symbols: Vec, + /// Length of each symbol, values range from 1-8. + pub(crate) lengths: Vec, + /// The number of entries in the symbol table that have been populated, not counting /// the escape values. pub(crate) n_symbols: u8, @@ -387,7 +391,6 @@ impl Compressor { // Now, downshift the `word` and the `entry` to see if they align. let ignored_bits = entry.ignored_bits; - if entry.code != Code::UNUSED && compare_masked(word, entry.symbol.as_u64(), ignored_bits) { @@ -547,14 +550,22 @@ impl Compressor { /// Access the decompressor that can be used to decompress strings emitted from this /// `Compressor` instance. pub fn decompressor(&self) -> Decompressor { - Decompressor::new(self.symbol_table()) + Decompressor::new(self.symbol_table(), self.symbol_lengths()) } /// Returns a readonly slice of the current symbol table. /// /// The returned slice will have length of `n_symbols`. pub fn symbol_table(&self) -> &[Symbol] { - unsafe { std::slice::from_raw_parts(self.symbols.as_ptr(), self.n_symbols as usize) } + &self.symbols[0..self.n_symbols as usize] + } + + /// Returns a readonly slice where index `i` contains the + /// length of the symbol represented by code `i`. + /// + /// Values range from 1-8. + pub fn symbol_lengths(&self) -> &[u8] { + &self.lengths[0..self.n_symbols as usize] } }