Skip to content

Commit

Permalink
chore: reduce to 1 horizontal reduction
Browse files Browse the repository at this point in the history
  • Loading branch information
rymnc committed Jan 1, 2025
1 parent 1e689b3 commit 545d366
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions fuel-vm/src/interpreter/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1117,58 +1117,58 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
let mut i = 0;

unsafe {
let mut mismatch = false;
let mut all_equal = _mm256_set1_epi8(-1);

while i + CHUNK <= len {
let simd_a1 = _mm256_loadu_si256(a.as_ptr().add(i) as *const _);
let simd_b1 = _mm256_loadu_si256(b.as_ptr().add(i) as *const _);

let cmp = _mm256_cmpeq_epi8(simd_a1, simd_b1);
mismatch |= _mm256_movemask_epi8(cmp) != -1;
all_equal = _mm256_and_si256(all_equal, cmp);

let simd_a2 = _mm256_loadu_si256(a.as_ptr().add(i + 32) as *const _);
let simd_b2 = _mm256_loadu_si256(b.as_ptr().add(i + 32) as *const _);

let cmp2 = _mm256_cmpeq_epi8(simd_a2, simd_b2);
mismatch |= _mm256_movemask_epi8(cmp2) != -1;
all_equal = _mm256_and_si256(all_equal, cmp2);

let simd_a3 = _mm256_loadu_si256(a.as_ptr().add(i + 64) as *const _);
let simd_b3 = _mm256_loadu_si256(b.as_ptr().add(i + 64) as *const _);

let cmp3 = _mm256_cmpeq_epi8(simd_a3, simd_b3);
mismatch |= _mm256_movemask_epi8(cmp3) != -1;
all_equal = _mm256_and_si256(all_equal, cmp3);

let simd_a4 = _mm256_loadu_si256(a.as_ptr().add(i + 96) as *const _);
let simd_b4 = _mm256_loadu_si256(b.as_ptr().add(i + 96) as *const _);

let cmp4 = _mm256_cmpeq_epi8(simd_a4, simd_b4);
mismatch |= _mm256_movemask_epi8(cmp4) != -1;
all_equal = _mm256_and_si256(all_equal, cmp4);

let simd_a5 = _mm256_loadu_si256(a.as_ptr().add(i + 128) as *const _);
let simd_b5 = _mm256_loadu_si256(b.as_ptr().add(i + 128) as *const _);

let cmp5 = _mm256_cmpeq_epi8(simd_a5, simd_b5);
mismatch |= _mm256_movemask_epi8(cmp5) != -1;
all_equal = _mm256_and_si256(all_equal, cmp5);

let simd_a6 = _mm256_loadu_si256(a.as_ptr().add(i + 160) as *const _);
let simd_b6 = _mm256_loadu_si256(b.as_ptr().add(i + 160) as *const _);

let cmp6 = _mm256_cmpeq_epi8(simd_a6, simd_b6);
mismatch |= _mm256_movemask_epi8(cmp6) != -1;
all_equal = _mm256_and_si256(all_equal, cmp6);

let simd_a7 = _mm256_loadu_si256(a.as_ptr().add(i + 192) as *const _);
let simd_b7 = _mm256_loadu_si256(b.as_ptr().add(i + 192) as *const _);

let cmp7 = _mm256_cmpeq_epi8(simd_a7, simd_b7);
mismatch |= _mm256_movemask_epi8(cmp7) != -1;
all_equal = _mm256_and_si256(all_equal, cmp7);

let simd_a8 = _mm256_loadu_si256(a.as_ptr().add(i + 224) as *const _);
let simd_b8 = _mm256_loadu_si256(b.as_ptr().add(i + 224) as *const _);

let cmp8 = _mm256_cmpeq_epi8(simd_a8, simd_b8);
mismatch |= _mm256_movemask_epi8(cmp8) != -1;
all_equal = _mm256_and_si256(all_equal, cmp8);

if mismatch {
if _mm256_movemask_epi8(all_equal) != -1 {
return false;
}

Expand Down

0 comments on commit 545d366

Please sign in to comment.