From 179fd247ca7555a751c27a19973791d2c20d3708 Mon Sep 17 00:00:00 2001 From: rymnc <43716372+rymnc@users.noreply.github.com> Date: Wed, 1 Jan 2025 23:47:06 +0530 Subject: [PATCH] test: different intrinsic for cmp --- fuel-vm/src/interpreter/memory.rs | 39 ++++++++++++++----------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/fuel-vm/src/interpreter/memory.rs b/fuel-vm/src/interpreter/memory.rs index 5e96fe59da..dd2933b742 100644 --- a/fuel-vm/src/interpreter/memory.rs +++ b/fuel-vm/src/interpreter/memory.rs @@ -1117,13 +1117,6 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool { let mut i = 0; unsafe { - let mut aggregate_mask_a = -1i32; - let mut aggregate_mask_b = -1i32; - let mut aggregate_mask_c = -1i32; - let mut aggregate_mask_d = -1i32; - let mut aggregate_mask_a_b = -1i32; - let mut aggregate_mask_c_d = -1i32; - while i + CHUNK <= len { let simd_a1 = _mm256_loadu_si256(a.as_ptr().add(i) as *const _); let simd_b1 = _mm256_loadu_si256(b.as_ptr().add(i) as *const _); @@ -1149,24 +1142,26 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool { let simd_a8 = _mm256_loadu_si256(a.as_ptr().add(i + 224) as *const _); let simd_b8 = _mm256_loadu_si256(b.as_ptr().add(i + 224) as *const _); - let cmp1 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a1, simd_b1)); - let cmp2 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a2, simd_b2)); - let cmp3 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a3, simd_b3)); - let cmp4 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a4, simd_b4)); - let cmp5 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a5, simd_b5)); - let cmp6 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a6, simd_b6)); - let cmp7 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a7, simd_b7)); - let cmp8 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a8, simd_b8)); + let cmp1 = _mm256_mpsadbw_epu8(simd_a1, simd_b1, 0); + let cmp2 = _mm256_mpsadbw_epu8(simd_a2, simd_b2, 0); + let cmp3 = _mm256_mpsadbw_epu8(simd_a3, simd_b3, 0); + let cmp4 = _mm256_mpsadbw_epu8(simd_a4, simd_b4, 0); + let cmp5 = _mm256_mpsadbw_epu8(simd_a5, simd_b5, 0); + let cmp6 = _mm256_mpsadbw_epu8(simd_a6, simd_b6, 0); + let cmp7 = _mm256_mpsadbw_epu8(simd_a7, simd_b7, 0); + let cmp8 = _mm256_mpsadbw_epu8(simd_a8, simd_b8, 0); + + let cmp9 = _mm256_testz_si256(cmp1, cmp2); + let cmp10 = _mm256_testz_si256(cmp3, cmp4); + let cmp11 = _mm256_testz_si256(cmp5, cmp6); + let cmp12 = _mm256_testz_si256(cmp7, cmp8); - aggregate_mask_a &= cmp1 & cmp2; - aggregate_mask_b &= cmp3 & cmp4; - aggregate_mask_c &= cmp5 & cmp6; - aggregate_mask_d &= cmp7 & cmp8; + let cmp13 = _mm256_testz_si256(cmp9, cmp10); + let cmp14 = _mm256_testz_si256(cmp11, cmp12); - aggregate_mask_a_b &= aggregate_mask_a & aggregate_mask_b; - aggregate_mask_c_d &= aggregate_mask_c & aggregate_mask_d; + let cmp15 = _mm256_testz_si256(cmp13, cmp14); - if aggregate_mask_a_b & aggregate_mask_c_d != -1i32 { + if cmp15 != -1i32 { return false; }