From 545d366912e2b6ff7505a4b72d862cb9bb20ee36 Mon Sep 17 00:00:00 2001 From: rymnc <43716372+rymnc@users.noreply.github.com> Date: Thu, 2 Jan 2025 00:16:18 +0530 Subject: [PATCH] chore: reduce to 1 horizontal reduction --- fuel-vm/src/interpreter/memory.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fuel-vm/src/interpreter/memory.rs b/fuel-vm/src/interpreter/memory.rs index 0d24e75df..a7559e33a 100644 --- a/fuel-vm/src/interpreter/memory.rs +++ b/fuel-vm/src/interpreter/memory.rs @@ -1117,58 +1117,58 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool { let mut i = 0; unsafe { - let mut mismatch = false; + let mut all_equal = _mm256_set1_epi8(-1); while i + CHUNK <= len { let simd_a1 = _mm256_loadu_si256(a.as_ptr().add(i) as *const _); let simd_b1 = _mm256_loadu_si256(b.as_ptr().add(i) as *const _); let cmp = _mm256_cmpeq_epi8(simd_a1, simd_b1); - mismatch |= _mm256_movemask_epi8(cmp) != -1; + all_equal = _mm256_and_si256(all_equal, cmp); let simd_a2 = _mm256_loadu_si256(a.as_ptr().add(i + 32) as *const _); let simd_b2 = _mm256_loadu_si256(b.as_ptr().add(i + 32) as *const _); let cmp2 = _mm256_cmpeq_epi8(simd_a2, simd_b2); - mismatch |= _mm256_movemask_epi8(cmp2) != -1; + all_equal = _mm256_and_si256(all_equal, cmp2); let simd_a3 = _mm256_loadu_si256(a.as_ptr().add(i + 64) as *const _); let simd_b3 = _mm256_loadu_si256(b.as_ptr().add(i + 64) as *const _); let cmp3 = _mm256_cmpeq_epi8(simd_a3, simd_b3); - mismatch |= _mm256_movemask_epi8(cmp3) != -1; + all_equal = _mm256_and_si256(all_equal, cmp3); let simd_a4 = _mm256_loadu_si256(a.as_ptr().add(i + 96) as *const _); let simd_b4 = _mm256_loadu_si256(b.as_ptr().add(i + 96) as *const _); let cmp4 = _mm256_cmpeq_epi8(simd_a4, simd_b4); - mismatch |= _mm256_movemask_epi8(cmp4) != -1; + all_equal = _mm256_and_si256(all_equal, cmp4); let simd_a5 = _mm256_loadu_si256(a.as_ptr().add(i + 128) as *const _); let simd_b5 = _mm256_loadu_si256(b.as_ptr().add(i + 128) as *const _); let cmp5 = _mm256_cmpeq_epi8(simd_a5, simd_b5); - mismatch |= _mm256_movemask_epi8(cmp5) != -1; + all_equal = _mm256_and_si256(all_equal, cmp5); let simd_a6 = _mm256_loadu_si256(a.as_ptr().add(i + 160) as *const _); let simd_b6 = _mm256_loadu_si256(b.as_ptr().add(i + 160) as *const _); let cmp6 = _mm256_cmpeq_epi8(simd_a6, simd_b6); - mismatch |= _mm256_movemask_epi8(cmp6) != -1; + all_equal = _mm256_and_si256(all_equal, cmp6); let simd_a7 = _mm256_loadu_si256(a.as_ptr().add(i + 192) as *const _); let simd_b7 = _mm256_loadu_si256(b.as_ptr().add(i + 192) as *const _); let cmp7 = _mm256_cmpeq_epi8(simd_a7, simd_b7); - mismatch |= _mm256_movemask_epi8(cmp7) != -1; + all_equal = _mm256_and_si256(all_equal, cmp7); let simd_a8 = _mm256_loadu_si256(a.as_ptr().add(i + 224) as *const _); let simd_b8 = _mm256_loadu_si256(b.as_ptr().add(i + 224) as *const _); let cmp8 = _mm256_cmpeq_epi8(simd_a8, simd_b8); - mismatch |= _mm256_movemask_epi8(cmp8) != -1; + all_equal = _mm256_and_si256(all_equal, cmp8); - if mismatch { + if _mm256_movemask_epi8(all_equal) != -1 { return false; }