Skip to content

Commit

Permalink
Fix: Missing stdint
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Nov 19, 2024
1 parent 8b98963 commit daa41bd
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 26 deletions.
8 changes: 4 additions & 4 deletions include/simsimd/binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ SIMSIMD_INTERNAL simsimd_u32_t _simsimd_reduce_u8x16_neon(uint8x16_t vec) {
// Sum the widened halves
uint16x8_t sum16 = vaddq_u16(low_half, high_half);

// Now reduce the `uint16x8_t` to a single `uint32_t`
uint32x4_t sum32 = vpaddlq_u16(sum16); // pairwise add into 32-bit integers
uint64x2_t sum64 = vpaddlq_u32(sum32); // pairwise add into 64-bit integers
uint32_t final_sum = vaddvq_u64(sum64); // final horizontal add to 32-bit result
// Now reduce the `uint16x8_t` to a single `simsimd_u32_t`
uint32x4_t sum32 = vpaddlq_u16(sum16); // pairwise add into 32-bit integers
uint64x2_t sum64 = vpaddlq_u32(sum32); // pairwise add into 64-bit integers
simsimd_u32_t final_sum = vaddvq_u64(sum64); // final horizontal add to 32-bit result
return final_sum;
}

Expand Down
8 changes: 4 additions & 4 deletions include/simsimd/dot.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,9 @@ SIMSIMD_PUBLIC void simsimd_dot_i8_neon(simsimd_i8_t const *a, simsimd_i8_t cons
}

// Take care of the tail:
int32_t ab = vaddvq_s32(ab_vec);
simsimd_i32_t ab = vaddvq_s32(ab_vec);
for (; i < n; ++i) {
int32_t ai = a[i], bi = b[i];
simsimd_i32_t ai = a[i], bi = b[i];
ab += ai * bi;
}

Expand All @@ -383,9 +383,9 @@ SIMSIMD_PUBLIC void simsimd_dot_u8_neon(simsimd_u8_t const *a, simsimd_u8_t cons
}

// Take care of the tail:
uint32_t ab = vaddvq_u32(ab_vec);
simsimd_u32_t ab = vaddvq_u32(ab_vec);
for (; i < n; ++i) {
uint32_t ai = a[i], bi = b[i];
simsimd_u32_t ai = a[i], bi = b[i];
ab += ai * bi;
}

Expand Down
2 changes: 1 addition & 1 deletion include/simsimd/simsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ SIMSIMD_PUBLIC simsimd_capability_t _simsimd_capabilities_x86(void) {
SIMSIMD_PUBLIC simsimd_capability_t _simsimd_capabilities_arm(void) {
#if defined(_SIMSIMD_DEFINED_APPLE)
// On Apple Silicon, `mrs` is not allowed in user-space, so we need to use the `sysctl` API.
uint32_t supports_neon = 0, supports_fp16 = 0, supports_bf16 = 0, supports_i8mm = 0;
unsigned supports_neon = 0, supports_fp16 = 0, supports_bf16 = 0, supports_i8mm = 0;
size_t size = sizeof(supports_neon);
if (sysctlbyname("hw.optional.neon", &supports_neon, &size, NULL, 0) != 0) supports_neon = 0;
if (sysctlbyname("hw.optional.arm.FEAT_FP16", &supports_fp16, &size, NULL, 0) != 0) supports_fp16 = 0;
Expand Down
34 changes: 17 additions & 17 deletions include/simsimd/spatial.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,10 +595,10 @@ SIMSIMD_PUBLIC void simsimd_l2sq_i8_neon(simsimd_i8_t const *a, simsimd_i8_t con
uint8x16_t d_vec = vreinterpretq_u8_s8(vabdq_s8(a_vec, b_vec));
d2_vec = vdotq_u32(d2_vec, d_vec, d_vec);
}
uint32_t d2 = vaddvq_u32(d2_vec);
simsimd_u32_t d2 = vaddvq_u32(d2_vec);
for (; i < n; ++i) {
int32_t n = (int32_t)a[i] - b[i];
d2 += (uint32_t)(n * n);
simsimd_i32_t n = (simsimd_i32_t)a[i] - b[i];
d2 += (simsimd_u32_t)(n * n);
}
*result = d2;
}
Expand Down Expand Up @@ -693,9 +693,9 @@ SIMSIMD_PUBLIC void simsimd_cos_i8_neon(simsimd_i8_t const *a, simsimd_i8_t cons
// products_high_vec = vmmlaq_s32(products_high_vec, v_vec, y_w_vecs.val[1]);
// }
// int32x4_t products_vec = vaddq_s32(products_high_vec, products_low_vec);
// int32_t a2 = products_vec[0];
// int32_t ab = products_vec[1];
// int32_t b2 = products_vec[3];
// simsimd_i32_t a2 = products_vec[0];
// simsimd_i32_t ab = products_vec[1];
// simsimd_i32_t b2 = products_vec[3];
//
// That solution is elegant, but it requires the additional `+i8mm` extension and is currently slower,
// at least on AWS Graviton 3.
Expand All @@ -709,13 +709,13 @@ SIMSIMD_PUBLIC void simsimd_cos_i8_neon(simsimd_i8_t const *a, simsimd_i8_t cons
a2_vec = vdotq_s32(a2_vec, a_vec, a_vec);
b2_vec = vdotq_s32(b2_vec, b_vec, b_vec);
}
int32_t ab = vaddvq_s32(ab_vec);
int32_t a2 = vaddvq_s32(a2_vec);
int32_t b2 = vaddvq_s32(b2_vec);
simsimd_i32_t ab = vaddvq_s32(ab_vec);
simsimd_i32_t a2 = vaddvq_s32(a2_vec);
simsimd_i32_t b2 = vaddvq_s32(b2_vec);

// Take care of the tail:
for (; i < n; ++i) {
int32_t ai = a[i], bi = b[i];
simsimd_i32_t ai = a[i], bi = b[i];
ab += ai * bi, a2 += ai * ai, b2 += bi * bi;
}

Expand All @@ -737,10 +737,10 @@ SIMSIMD_PUBLIC void simsimd_l2sq_u8_neon(simsimd_u8_t const *a, simsimd_u8_t con
uint8x16_t d_vec = vabdq_u8(a_vec, b_vec);
d2_vec = vdotq_u32(d2_vec, d_vec, d_vec);
}
uint32_t d2 = vaddvq_u32(d2_vec);
simsimd_u32_t d2 = vaddvq_u32(d2_vec);
for (; i < n; ++i) {
int32_t n = (int32_t)a[i] - b[i];
d2 += (uint32_t)(n * n);
simsimd_i32_t n = (simsimd_i32_t)a[i] - b[i];
d2 += (simsimd_u32_t)(n * n);
}
*result = d2;
}
Expand All @@ -759,13 +759,13 @@ SIMSIMD_PUBLIC void simsimd_cos_u8_neon(simsimd_u8_t const *a, simsimd_u8_t cons
a2_vec = vdotq_u32(a2_vec, a_vec, a_vec);
b2_vec = vdotq_u32(b2_vec, b_vec, b_vec);
}
uint32_t ab = vaddvq_u32(ab_vec);
uint32_t a2 = vaddvq_u32(a2_vec);
uint32_t b2 = vaddvq_u32(b2_vec);
simsimd_u32_t ab = vaddvq_u32(ab_vec);
simsimd_u32_t a2 = vaddvq_u32(a2_vec);
simsimd_u32_t b2 = vaddvq_u32(b2_vec);

// Take care of the tail:
for (; i < n; ++i) {
uint32_t ai = a[i], bi = b[i];
simsimd_u32_t ai = a[i], bi = b[i];
ab += ai * bi, a2 += ai * ai, b2 += bi * bi;
}

Expand Down

0 comments on commit daa41bd

Please sign in to comment.