Skip to content

Commit

Permalink
better benchmarks, cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Sep 3, 2024
1 parent 41a4e57 commit 358ef31
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 46 deletions.
65 changes: 35 additions & 30 deletions benches/micro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput};

use fsst::{CompressorBuilder, Symbol};

fn bench1(c: &mut Criterion) {
fn one_megabyte(seed: &[u8]) -> Vec<u8> {
seed.iter().copied().cycle().take(1024 * 1024).collect()
}

fn bench_compress(c: &mut Criterion) {
let mut group = c.benchmark_group("compress-overhead");
group.bench_function("compress-word", |b| {
let mut compressor = CompressorBuilder::new();
Expand All @@ -21,65 +25,55 @@ fn bench1(c: &mut Criterion) {
// Reusable memory to hold outputs
let mut output_buf: Vec<u8> = Vec::with_capacity(1_024 * 1024 * 2);

group.throughput(Throughput::Bytes(8u64));
group.bench_function("compress_fastpath", |b| {
group.bench_function("compress-hashtab", |b| {
// We create a symbol table and an input that will execute exactly one iteration,
// in the fast compress_word pathway.
let mut compressor = CompressorBuilder::new();
compressor.insert(Symbol::from_slice(b"abcdefgh"), 8);
let compressor = compressor.build();

b.iter(|| unsafe {
compressor.compress_into(b"abcdefgh", &mut output_buf);
compressor.compress_into(
b"abcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefgh",
&mut output_buf,
);
});
});

group.throughput(Throughput::Bytes(4u64));
group.bench_function("compress_slowpath", |b| {
group.bench_function("compress-twobytes", |b| {
// We create a symbol table and an input that will execute exactly one iteration,
// but it misses the compress_word and needs to go on the slow path.
// in the fast compress_word pathway.
let mut compressor = CompressorBuilder::new();
compressor.insert(Symbol::from_slice(&[b'a', b'b', b'c', b'd', 0, 0, 0, 0]), 4);
compressor.insert(Symbol::from_slice(&[b'a', b'b', 0, 0, 0, 0, 0, 0]), 8);
let compressor = compressor.build();

b.iter(|| unsafe {
compressor.compress_into(b"abcd", &mut output_buf);
compressor.compress_into(b"abababababababab", &mut output_buf);
});
});
group.finish();

let mut group = c.benchmark_group("cf=1");
let test_string = b"aaaaaaaa";
let test_string = one_megabyte(b"aaaaaaaa");
group.throughput(Throughput::Bytes(test_string.len() as u64));
group.bench_function("compress", |b| {
let mut compressor = CompressorBuilder::new();
assert!(compressor.insert(Symbol::from_u8(b'a'), 1));
let compressor = compressor.build();

b.iter(|| unsafe {
compressor.compress_into(test_string, &mut output_buf);
compressor.compress_into(&test_string, &mut output_buf);
})
});
group.finish();

let mut group = c.benchmark_group("cf=2");
let test_string = {
// 1MB of data
let mut out = Vec::with_capacity(1024 * 1024);

for _ in 0..(out.capacity() / 2) {
out.push(b'a');
out.push(b'b');
}

out
};

assert!(test_string.len() == 1024 * 1024);
let test_string = one_megabyte(b"ab");

group.throughput(Throughput::Bytes(test_string.len() as u64));
group.bench_function("compress", |b| {
let mut compressor = CompressorBuilder::new();
// This outputs two codes for every 4 bytes of text.
assert!(compressor.insert(Symbol::from_slice(&[b'a', 0, 0, 0, 0, 0, 0, 0]), 1));
assert!(compressor.insert(Symbol::from_slice(&[b'b', b'a', b'b', 0, 0, 0, 0, 0]), 3));
let compressor = compressor.build();
Expand All @@ -91,35 +85,46 @@ fn bench1(c: &mut Criterion) {
group.finish();

let mut group = c.benchmark_group("cf=4");
let test_string = b"abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd";
let test_string = one_megabyte(b"abcd");
group.throughput(Throughput::Bytes(test_string.len() as u64));
group.bench_function("compress", |b| {
let mut compressor = CompressorBuilder::new();
assert!(compressor.insert(Symbol::from_slice(&[b'a', b'b', b'c', b'd', 0, 0, 0, 0]), 4));
let compressor = compressor.build();

b.iter(|| unsafe {
compressor.compress_into(test_string, &mut output_buf);
compressor.compress_into(&test_string, &mut output_buf);
})
});
group.finish();

let mut group = c.benchmark_group("cf=8");
let test_string = b"abcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefghabcdefgh";
let test_string = one_megabyte(b"abcdefgh");
group.throughput(Throughput::Bytes(test_string.len() as u64));
group.bench_function("compress", |b| {
let mut compressor = CompressorBuilder::new();
assert!(compressor.insert(Symbol::from_slice(b"abcdefgh"), 8));
let compressor = compressor.build();

b.iter(|| unsafe {
compressor.compress_into(test_string, &mut output_buf);
compressor.compress_into(&test_string, &mut output_buf);
})
});

group.bench_function("decompress", |b| {
let mut compressor = CompressorBuilder::new();
assert!(compressor.insert(Symbol::from_slice(b"abcdefgh"), 8));
let compressor = compressor.build();
let compressed = compressor.compress(&test_string);

let decompressor = compressor.decompressor();

b.iter(|| decompressor.decompress(&compressed))
});
group.finish();

let _ = std::hint::black_box(output_buf);
}

criterion_group!(bench_toy, bench1);
criterion_main!(bench_toy);
criterion_group!(bench_micro, bench_compress);
criterion_main!(bench_micro);
16 changes: 13 additions & 3 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,11 @@ impl CompressorBuilder {
///
/// Returns the `suffix_lim`, which is the index of the two-byte code before where we know
/// there are no longer suffixies in the symbol table.
///
/// Also returns the lengths vector, which is of length `n_symbols` and contains the
/// length for each of the values.
#[inline(never)]
fn finalize(&mut self) -> u8 {
fn finalize(&mut self) -> (u8, Vec<u8>) {
// Create a cumulative sum of each of the elements of the input line numbers.
// Do a map that includes the previously seen value as well.
// Regroup symbols based on their lengths.
Expand Down Expand Up @@ -486,18 +489,25 @@ impl CompressorBuilder {
// Reset values in the hash table as well.
self.lossy_pht.renumber(&new_codes);

has_suffix_code
// Pre-compute the lengths
let mut lengths = Vec::with_capacity(self.n_symbols as usize);
for symbol in &self.symbols {
lengths.push(symbol.len() as u8);
}

(has_suffix_code, lengths)
}

/// Build into the final hash table.
pub fn build(mut self) -> Compressor {
// finalize the symbol table by inserting the codes_twobyte values into
// the relevant parts of the `codes_onebyte` set.

let has_suffix_code = self.finalize();
let (has_suffix_code, lengths) = self.finalize();

Compressor {
symbols: self.symbols,
lengths,
n_symbols: self.n_symbols,
has_suffix_code,
codes_two_byte: self.codes_two_byte,
Expand Down
37 changes: 24 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,14 @@ impl Code {
/// This corresponds to the maximum code with a length of 1.
pub const UNUSED: Self = Code(FSST_CODE_MASK + (1 << 12));

/// Create a new code for a symbol of given length
/// Create a new code for a symbol of given length.
fn new_symbol(code: u8, len: usize) -> Self {
Self(code as u16 + ((len as u16) << FSST_LEN_BITS))
}

/// Code for a new symbol during the building phase.
///
/// The code is remapped from 0..254 to 256...510.
fn new_symbol_building(code: u8, len: usize) -> Self {
Self(code as u16 + 256 + ((len as u16) << FSST_LEN_BITS))
}
Expand Down Expand Up @@ -236,13 +238,11 @@ impl Debug for Code {
/// Decompressor uses a symbol table to take a stream of 8-bit codes into a string.
#[derive(Clone)]
pub struct Decompressor<'a> {
/// Table mapping codes to symbols.
///
/// The first 256 slots are escapes. The following slots (up to 254)
/// are for symbols with actual codes.
///
/// This physical layout is important so that we can do straight-line execution in the decompress method.
/// Slice mapping codes to symbols.
pub(crate) symbols: &'a [Symbol],

/// Slice containing the length of each symbol in the `symbols` slice.
pub(crate) lengths: &'a [u8],
}

impl<'a> Decompressor<'a> {
Expand All @@ -251,13 +251,13 @@ impl<'a> Decompressor<'a> {
/// # Panics
///
/// If the provided symbol table has length greater than 256
pub fn new(symbols: &'a [Symbol]) -> Self {
pub fn new(symbols: &'a [Symbol], lengths: &'a [u8]) -> Self {
assert!(
symbols.len() <= 255,
"symbol table cannot have size exceeding 255"
);

Self { symbols }
Self { symbols, lengths }
}

/// Decompress a byte slice that was previously returned by a compressor using
Expand All @@ -283,14 +283,15 @@ impl<'a> Decompressor<'a> {
in_pos += 1;
} else {
let symbol = self.symbols[code as usize];
let length = self.lengths[code as usize];
// SAFETY: out_pos is always 8 bytes or more from the end of decoded buffer
unsafe {
let write_addr = ptr.byte_offset(out_pos as isize) as *mut u64;
// Perform 8 byte unaligned write.
write_addr.write_unaligned(symbol.as_u64());
}
in_pos += 1;
out_pos += symbol.len();
out_pos += length as usize;
}
}

Expand Down Expand Up @@ -329,6 +330,9 @@ pub struct Compressor {
/// Table mapping codes to symbols.
pub(crate) symbols: Vec<Symbol>,

/// Length of each symbol, values range from 1-8.
pub(crate) lengths: Vec<u8>,

/// The number of entries in the symbol table that have been populated, not counting
/// the escape values.
pub(crate) n_symbols: u8,
Expand Down Expand Up @@ -387,7 +391,6 @@ impl Compressor {

// Now, downshift the `word` and the `entry` to see if they align.
let ignored_bits = entry.ignored_bits;

if entry.code != Code::UNUSED
&& compare_masked(word, entry.symbol.as_u64(), ignored_bits)
{
Expand Down Expand Up @@ -547,14 +550,22 @@ impl Compressor {
/// Access the decompressor that can be used to decompress strings emitted from this
/// `Compressor` instance.
pub fn decompressor(&self) -> Decompressor {
Decompressor::new(self.symbol_table())
Decompressor::new(self.symbol_table(), self.symbol_lengths())
}

/// Returns a readonly slice of the current symbol table.
///
/// The returned slice will have length of `n_symbols`.
pub fn symbol_table(&self) -> &[Symbol] {
unsafe { std::slice::from_raw_parts(self.symbols.as_ptr(), self.n_symbols as usize) }
&self.symbols[0..self.n_symbols as usize]
}

/// Returns a readonly slice where index `i` contains the
/// length of the symbol represented by code `i`.
///
/// Values range from 1-8.
pub fn symbol_lengths(&self) -> &[u8] {
&self.lengths[0..self.n_symbols as usize]
}
}

Expand Down

0 comments on commit 358ef31

Please sign in to comment.