From 25537648e587e03717c334d4853fac5cbed613de Mon Sep 17 00:00:00 2001 From: Brian Pane Date: Sun, 8 Dec 2024 12:05:36 -0800 Subject: [PATCH] Parallelize the checking of the first two bytes of a potential match. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before-and-after benchmark results on x86_64: ``` Benchmark 1 (55 runs): ./compress-baseline 1 rs silesia-small.tar measurement mean ± σ min … max outliers delta wall_time 91.4ms ± 1.12ms 89.9ms … 97.9ms 1 ( 2%) 0% peak_rss 26.7MB ± 52.9KB 26.6MB … 26.7MB 11 (20%) 0% cpu_cycles 341M ± 743K 340M … 343M 0 ( 0%) 0% instructions 748M ± 261 748M … 748M 0 ( 0%) 0% cache_references 401K ± 6.61K 398K … 436K 8 (15%) 0% cache_misses 298K ± 8.08K 273K … 312K 9 (16%) 0% branch_misses 3.28M ± 4.77K 3.27M … 3.29M 0 ( 0%) 0% Benchmark 2 (56 runs): ./target/release/examples/compress 1 rs silesia-small.tar measurement mean ± σ min … max outliers delta wall_time 89.5ms ± 596us 88.1ms … 90.9ms 0 ( 0%) ⚡- 2.1% ± 0.4% peak_rss 26.7MB ± 50.7KB 26.6MB … 26.7MB 10 (18%) + 0.0% ± 0.1% cpu_cycles 334M ± 657K 332M … 335M 1 ( 2%) ⚡- 2.3% ± 0.1% instructions 747M ± 274 747M … 747M 1 ( 2%) - 0.1% ± 0.0% cache_references 400K ± 3.67K 397K … 418K 6 (11%) - 0.3% ± 0.5% cache_misses 299K ± 5.78K 278K … 305K 5 ( 9%) + 0.4% ± 0.9% branch_misses 3.16M ± 5.78K 3.15M … 3.18M 1 ( 2%) ⚡- 3.6% ± 0.1% ``` --- zlib-rs/src/deflate/algorithm/quick.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/zlib-rs/src/deflate/algorithm/quick.rs b/zlib-rs/src/deflate/algorithm/quick.rs index 4b397ff6..c7b35264 100644 --- a/zlib-rs/src/deflate/algorithm/quick.rs +++ b/zlib-rs/src/deflate/algorithm/quick.rs @@ -100,7 +100,12 @@ pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSt let str_start = &state.window.filled()[state.strstart..]; let match_start = &state.window.filled()[hash_head as usize..]; - if str_start[0] == match_start[0] && str_start[1] == match_start[1] { + macro_rules! first_two_bytes { + ($slice:expr, $offset:expr) => { + $slice[$offset] as u16 | ($slice[$offset + 1] as u16) << 8 + } + } + if first_two_bytes!(str_start, 0) == first_two_bytes!(match_start, 0) { let mut match_len = crate::deflate::compare256::compare256_slice( &str_start[2..], &match_start[2..],