Skip to content

Commit

Permalink
chore: increase number of cmp
Browse files Browse the repository at this point in the history
  • Loading branch information
rymnc committed Jan 1, 2025
1 parent f14a67e commit 1e689b3
Showing 1 changed file with 50 additions and 5 deletions.
55 changes: 50 additions & 5 deletions fuel-vm/src/interpreter/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
return false;
}

const CHUNK: usize = 32;
const CHUNK: usize = 256;

// if the slices are small, we don't need to
// use SIMD instructions due to overhead
Expand All @@ -1117,13 +1117,58 @@ fn slices_equal_avx2(a: &[u8], b: &[u8]) -> bool {
let mut i = 0;

unsafe {
let mut mismatch = false;

while i + CHUNK <= len {
let simd_a = _mm256_loadu_si256(a.as_ptr().add(i) as *const _);
let simd_b = _mm256_loadu_si256(b.as_ptr().add(i) as *const _);
let simd_a1 = _mm256_loadu_si256(a.as_ptr().add(i) as *const _);
let simd_b1 = _mm256_loadu_si256(b.as_ptr().add(i) as *const _);

let cmp = _mm256_cmpeq_epi8(simd_a1, simd_b1);
mismatch |= _mm256_movemask_epi8(cmp) != -1;

let simd_a2 = _mm256_loadu_si256(a.as_ptr().add(i + 32) as *const _);
let simd_b2 = _mm256_loadu_si256(b.as_ptr().add(i + 32) as *const _);

let cmp2 = _mm256_cmpeq_epi8(simd_a2, simd_b2);
mismatch |= _mm256_movemask_epi8(cmp2) != -1;

let simd_a3 = _mm256_loadu_si256(a.as_ptr().add(i + 64) as *const _);
let simd_b3 = _mm256_loadu_si256(b.as_ptr().add(i + 64) as *const _);

let cmp3 = _mm256_cmpeq_epi8(simd_a3, simd_b3);
mismatch |= _mm256_movemask_epi8(cmp3) != -1;

let simd_a4 = _mm256_loadu_si256(a.as_ptr().add(i + 96) as *const _);
let simd_b4 = _mm256_loadu_si256(b.as_ptr().add(i + 96) as *const _);

let cmp4 = _mm256_cmpeq_epi8(simd_a4, simd_b4);
mismatch |= _mm256_movemask_epi8(cmp4) != -1;

let simd_a5 = _mm256_loadu_si256(a.as_ptr().add(i + 128) as *const _);
let simd_b5 = _mm256_loadu_si256(b.as_ptr().add(i + 128) as *const _);

let cmp5 = _mm256_cmpeq_epi8(simd_a5, simd_b5);
mismatch |= _mm256_movemask_epi8(cmp5) != -1;

let simd_a6 = _mm256_loadu_si256(a.as_ptr().add(i + 160) as *const _);
let simd_b6 = _mm256_loadu_si256(b.as_ptr().add(i + 160) as *const _);

let cmp6 = _mm256_cmpeq_epi8(simd_a6, simd_b6);
mismatch |= _mm256_movemask_epi8(cmp6) != -1;

let simd_a7 = _mm256_loadu_si256(a.as_ptr().add(i + 192) as *const _);
let simd_b7 = _mm256_loadu_si256(b.as_ptr().add(i + 192) as *const _);

let cmp7 = _mm256_cmpeq_epi8(simd_a7, simd_b7);
mismatch |= _mm256_movemask_epi8(cmp7) != -1;

let simd_a8 = _mm256_loadu_si256(a.as_ptr().add(i + 224) as *const _);
let simd_b8 = _mm256_loadu_si256(b.as_ptr().add(i + 224) as *const _);

let cmp = _mm256_cmpeq_epi8(simd_a, simd_b);
let cmp8 = _mm256_cmpeq_epi8(simd_a8, simd_b8);
mismatch |= _mm256_movemask_epi8(cmp8) != -1;

if _mm256_movemask_epi8(cmp) != -1 {
if mismatch {
return false;
}

Expand Down

0 comments on commit 1e689b3

Please sign in to comment.