Skip to content

Commit

Permalink
test: different intrinsic for cmp
Browse files Browse the repository at this point in the history
  • Loading branch information
rymnc committed Jan 1, 2025
1 parent 123aab9 commit 179fd24
Showing 1 changed file with 17 additions and 22 deletions.
39 changes: 17 additions & 22 deletions fuel-vm/src/interpreter/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1117,13 +1117,6 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
let mut i = 0;

unsafe {
let mut aggregate_mask_a = -1i32;
let mut aggregate_mask_b = -1i32;
let mut aggregate_mask_c = -1i32;
let mut aggregate_mask_d = -1i32;
let mut aggregate_mask_a_b = -1i32;
let mut aggregate_mask_c_d = -1i32;

while i + CHUNK <= len {
let simd_a1 = _mm256_loadu_si256(a.as_ptr().add(i) as *const _);
let simd_b1 = _mm256_loadu_si256(b.as_ptr().add(i) as *const _);
Expand All @@ -1149,24 +1142,26 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
let simd_a8 = _mm256_loadu_si256(a.as_ptr().add(i + 224) as *const _);
let simd_b8 = _mm256_loadu_si256(b.as_ptr().add(i + 224) as *const _);

let cmp1 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a1, simd_b1));
let cmp2 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a2, simd_b2));
let cmp3 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a3, simd_b3));
let cmp4 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a4, simd_b4));
let cmp5 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a5, simd_b5));
let cmp6 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a6, simd_b6));
let cmp7 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a7, simd_b7));
let cmp8 = _mm256_movemask_epi8(_mm256_cmpeq_epi32(simd_a8, simd_b8));
let cmp1 = _mm256_mpsadbw_epu8(simd_a1, simd_b1, 0);
let cmp2 = _mm256_mpsadbw_epu8(simd_a2, simd_b2, 0);
let cmp3 = _mm256_mpsadbw_epu8(simd_a3, simd_b3, 0);
let cmp4 = _mm256_mpsadbw_epu8(simd_a4, simd_b4, 0);
let cmp5 = _mm256_mpsadbw_epu8(simd_a5, simd_b5, 0);
let cmp6 = _mm256_mpsadbw_epu8(simd_a6, simd_b6, 0);
let cmp7 = _mm256_mpsadbw_epu8(simd_a7, simd_b7, 0);
let cmp8 = _mm256_mpsadbw_epu8(simd_a8, simd_b8, 0);

let cmp9 = _mm256_testz_si256(cmp1, cmp2);
let cmp10 = _mm256_testz_si256(cmp3, cmp4);
let cmp11 = _mm256_testz_si256(cmp5, cmp6);
let cmp12 = _mm256_testz_si256(cmp7, cmp8);

aggregate_mask_a &= cmp1 & cmp2;
aggregate_mask_b &= cmp3 & cmp4;
aggregate_mask_c &= cmp5 & cmp6;
aggregate_mask_d &= cmp7 & cmp8;
let cmp13 = _mm256_testz_si256(cmp9, cmp10);
let cmp14 = _mm256_testz_si256(cmp11, cmp12);

aggregate_mask_a_b &= aggregate_mask_a & aggregate_mask_b;
aggregate_mask_c_d &= aggregate_mask_c & aggregate_mask_d;
let cmp15 = _mm256_testz_si256(cmp13, cmp14);

if aggregate_mask_a_b & aggregate_mask_c_d != -1i32 {
if cmp15 != -1i32 {
return false;
}

Expand Down

0 comments on commit 179fd24

Please sign in to comment.