diff --git a/docs/2023.html b/docs/2023.html
index c907b9ec67..a836a28b7e 100644
--- a/docs/2023.html
+++ b/docs/2023.html
@@ -59,6 +59,7 @@
New features
NEON optimizations of function DescrIntEncode32f.
NEON optimizations of function DescrIntEncode16f.
NEON optimizations of function DescrIntDecode32f.
+ NEON optimizations of function DescrIntDecode16f.
Bug fixing
diff --git a/src/Simd/SimdNeonDescrInt.cpp b/src/Simd/SimdNeonDescrInt.cpp
index 6a3019ca48..376c853587 100644
--- a/src/Simd/SimdNeonDescrInt.cpp
+++ b/src/Simd/SimdNeonDescrInt.cpp
@@ -102,7 +102,7 @@ namespace Simd
_encode16f = GetEncode16f(_depth);
_decode32f = GetDecode32f(_depth);
- if (_depth >= 9) _decode16f = GetDecode16f(_depth);
+ if (_depth >= 8) _decode16f = GetDecode16f(_depth);
//_cosineDistance = GetCosineDistance(_depth);
//_macroCosineDistancesDirect = GetMacroCosineDistancesDirect(_depth);
diff --git a/src/Simd/SimdNeonDescrIntDec.cpp b/src/Simd/SimdNeonDescrIntDec.cpp
index bb8c778414..0041ea4cfd 100644
--- a/src/Simd/SimdNeonDescrIntDec.cpp
+++ b/src/Simd/SimdNeonDescrIntDec.cpp
@@ -219,6 +219,38 @@ namespace Simd
//-------------------------------------------------------------------------------------------------
+ static void Decode16f8(const uint8_t* src, float scale, float shift, size_t size, uint16_t * dst)
+ {
+ assert(size % 8 == 0);
+ float32x4_t _scale = vdupq_n_f32(scale);
+ float32x4_t _shift = vdupq_n_f32(shift);
+ size_t i = 0;
+ if (Aligned(src) && Aligned(dst))
+ {
+ for (; i < size; i += 8)
+ {
+ uint16x8_t u16 = vmovl_u8(LoadHalf(src));
+ Store(dst + 0, (uint16x4_t)vcvt_f16_f32(vmlaq_f32(_shift, _scale, vcvtq_f32_u32(UnpackU16<0>(u16)))));
+ Store(dst + 4, (uint16x4_t)vcvt_f16_f32(vmlaq_f32(_shift, _scale, vcvtq_f32_u32(UnpackU16<1>(u16)))));
+ src += 8;
+ dst += 8;
+ }
+ }
+ else
+ {
+ for (; i < size; i += 8)
+ {
+ uint16x8_t u16 = vmovl_u8(LoadHalf(src));
+ Store(dst + 0, (uint16x4_t)vcvt_f16_f32(vmlaq_f32(_shift, _scale, vcvtq_f32_u32(UnpackU16<0>(u16)))));
+ Store(dst + 4, (uint16x4_t)vcvt_f16_f32(vmlaq_f32(_shift, _scale, vcvtq_f32_u32(UnpackU16<1>(u16)))));
+ src += 8;
+ dst += 8;
+ }
+ }
+ }
+
+ //-------------------------------------------------------------------------------------------------
+
Base::DescrInt::Decode32fPtr GetDecode32f(size_t depth)
{
switch (depth)
@@ -240,7 +272,7 @@ namespace Simd
//case 5: return Decode16f5;
//case 6: return Decode16f6;
//case 7: return Decode16f7;
- //case 8: return Decode16f8;
+ case 8: return Decode16f8;
default: assert(0); return NULL;
}
}
diff --git a/src/Test/TestDescrInt.cpp b/src/Test/TestDescrInt.cpp
index 1f1fa0f9b5..dad6d9d0f7 100644
--- a/src/Test/TestDescrInt.cpp
+++ b/src/Test/TestDescrInt.cpp
@@ -453,10 +453,10 @@ namespace Test
result = result && DescrIntDecode16fAutoTest(FUNC_DI(Simd::Avx512bw::DescrIntInit), FUNC_DI(SimdDescrIntInit));
#endif
-//#ifdef SIMD_NEON_ENABLE
-// if (Simd::Neon::Enable)
-// result = result && DescrIntDecode16fAutoTest(FUNC_DI(Simd::Neon::DescrIntInit), FUNC_DI(SimdDescrIntInit));
-//#endif
+#ifdef SIMD_NEON_ENABLE
+ if (Simd::Neon::Enable)
+ result = result && DescrIntDecode16fAutoTest(FUNC_DI(Simd::Neon::DescrIntInit), FUNC_DI(SimdDescrIntInit));
+#endif
return result;
}