Skip to content

Commit

Permalink
+add SSE4.1 optimizations of function BgrToYuv444pV2.
Browse files Browse the repository at this point in the history
  • Loading branch information
ermig1979 committed Sep 19, 2023
1 parent 1ac70e2 commit e2e3a2f
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 11 deletions.
2 changes: 1 addition & 1 deletion docs/2023.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ <h5>New features</h5>
<ul>
<li>Base implementation, SSE4.1, AVX2, AVX-512BW optimizations of function BgrToYuv420pV2.</li>
<li>Base implementation, SSE4.1, AVX2 optimizations of function BgrToYuv422pV2.</li>
<li>Base implementation of function BgrToYuv444pV2.</li>
<li>Base implementation, SSE4.1 optimizations of function BgrToYuv444pV2.</li>
</ul>
<ul>
<li>Error in AVX-512BW optimizations of function SynetSoftmaxLayerForward.</li>
Expand Down
10 changes: 5 additions & 5 deletions src/Simd/SimdLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1722,11 +1722,11 @@ SIMD_API void SimdBgrToYuv444pV2(const uint8_t* bgr, size_t bgrStride, size_t wi
// Avx2::BgrToYuv444pV2(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride, yuvType);
// else
//#endif
//#ifdef SIMD_SSE41_ENABLE
// if (Sse41::Enable && width >= Sse41::A)
// Sse41::BgrToYuv444pV2(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride, yuvType);
// else
//#endif
#ifdef SIMD_SSE41_ENABLE
if (Sse41::Enable && width >= Sse41::A)
Sse41::BgrToYuv444pV2(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride, yuvType);
else
#endif
//#ifdef SIMD_NEON_ENABLE
// if (Neon::Enable && width >= Neon::A)
// Neon::BgrToYuv444pV2(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride, yuvType);
Expand Down
3 changes: 3 additions & 0 deletions src/Simd/SimdSse41.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ namespace Simd

void BgrToYuv444p(const uint8_t* bgr, size_t width, size_t height, size_t bgrStride, uint8_t* y, size_t yStride, uint8_t* u, size_t uStride, uint8_t* v, size_t vStride);

void BgrToYuv444pV2(const uint8_t* bgr, size_t bgrStride, size_t width, size_t height,
uint8_t* y, size_t yStride, uint8_t* u, size_t uStride, uint8_t* v, size_t vStride, SimdYuvType yuvType);

void Binarization(const uint8_t* src, size_t srcStride, size_t width, size_t height,
uint8_t value, uint8_t positive, uint8_t negative, uint8_t* dst, size_t dstStride, SimdCompareType compareType);

Expand Down
51 changes: 51 additions & 0 deletions src/Simd/SimdSse41BgrToYuv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,57 @@ namespace Simd
default:
assert(0);
}
#endif
}

//-------------------------------------------------------------------------------------------------

template <class T> SIMD_INLINE void BgrToYuv444pV2(const uint8_t* bgr, uint8_t* y, uint8_t* u, uint8_t* v)
{
__m128i blue, green, red;
LoadBgr<false>((__m128i*)bgr, blue, green, red);
_mm_storeu_si128((__m128i*)y, BgrToY8<T>(blue, green, red));
_mm_storeu_si128((__m128i*)u, BgrToU8<T>(blue, green, red));
_mm_storeu_si128((__m128i*)v, BgrToV8<T>(blue, green, red));
}

template <class T> void BgrToYuv444pV2(const uint8_t* bgr, size_t bgrStride, size_t width, size_t height, uint8_t* y, size_t yStride,
uint8_t* u, size_t uStride, uint8_t* v, size_t vStride)
{
assert(width >= A);

size_t widthA = AlignLo(width, A);
for (size_t row = 0; row < height; row += 1)
{
for (size_t col = 0; col < widthA; col += A)
BgrToYuv444pV2<T>(bgr + col * 3, y + col, u + col, v + col);
if (width != widthA)
{
size_t col = width - A;
BgrToYuv444pV2<T>(bgr + col * 3, y + col, u + col, v + col);
}
y += yStride;
u += uStride;
v += vStride;
bgr += bgrStride;
}
}

void BgrToYuv444pV2(const uint8_t* bgr, size_t bgrStride, size_t width, size_t height, uint8_t* y, size_t yStride,
uint8_t* u, size_t uStride, uint8_t* v, size_t vStride, SimdYuvType yuvType)
{
#if defined(SIMD_X86_ENABLE) && defined(NDEBUG) && defined(_MSC_VER) && _MSC_VER <= 1900
Base::BgrToYuv444pV2(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride, yuvType);
#else
switch (yuvType)
{
case SimdYuvBt601: BgrToYuv444pV2<Base::Bt601>(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride); break;
case SimdYuvBt709: BgrToYuv444pV2<Base::Bt709>(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride); break;
case SimdYuvBt2020: BgrToYuv444pV2<Base::Bt2020>(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride); break;
case SimdYuvTrect871: BgrToYuv444pV2<Base::Trect871>(bgr, bgrStride, width, height, y, yStride, u, uStride, v, vStride); break;
default:
assert(0);
}
#endif
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/Test/TestAnyToYuv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,11 +563,11 @@ namespace Test

result = result && AnyToYuvV2AutoTest(View::Bgr24, 1, 1, FUNC_YUV2(Simd::Base::BgrToYuv444pV2), FUNC_YUV2(SimdBgrToYuv444pV2));

//#ifdef SIMD_SSE41_ENABLE
// if (Simd::Sse41::Enable)
// result = result && AnyToYuvV2AutoTest(View::Bgr24, 1, 1, FUNC_YUV2(Simd::Sse41::BgrToYuv444pV2), FUNC_YUV2(SimdBgrToYuv444pV2));
//#endif
//
#ifdef SIMD_SSE41_ENABLE
if (Simd::Sse41::Enable)
result = result && AnyToYuvV2AutoTest(View::Bgr24, 1, 1, FUNC_YUV2(Simd::Sse41::BgrToYuv444pV2), FUNC_YUV2(SimdBgrToYuv444pV2));
#endif

//#ifdef SIMD_AVX2_ENABLE
// if (Simd::Avx2::Enable)
// result = result && AnyToYuvV2AutoTest(View::Bgr24, 1, 1, FUNC_YUV2(Simd::Avx2::BgrToYuv444pV2), FUNC_YUV2(SimdBgrToYuv444pV2));
Expand Down

0 comments on commit e2e3a2f

Please sign in to comment.