Skip to content

Commit

Permalink
Minor ARM NEON speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Nov 9, 2017
1 parent 3df6ef9 commit c0424ab
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion libpopcnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,7 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
uint64_t tmp[2];
uint64_t chunk_size = 128;
uint64_t n = size / chunk_size;
uint64_t is_sum = 14;
uint64_t i;

uint8x16x4_t input0;
Expand Down Expand Up @@ -709,8 +710,9 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
t2 = vaddq_u8(t2, vcntq_u8(input1.val[2]));
t3 = vaddq_u8(t3, vcntq_u8(input1.val[3]));

if ((i & 7) == 7)
if (i == is_sum)
{
is_sum += 14;
sum = vpadalq(sum, t0);
sum = vpadalq(sum, t1);
sum = vpadalq(sum, t2);
Expand Down

0 comments on commit c0424ab

Please sign in to comment.