diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index c33e11f868..37a163f28f 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -371,25 +371,172 @@ static int InitSha256(wc_Sha256* sha256) } /* extern "C" */ #endif + static word32 intel_flags; + static int Transform_Sha256_is_vectorized = 0; + +#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS + + static enum { SHA256_UNSET, SHA256_AVX1, SHA256_AVX2, SHA256_AVX1_RORX, + SHA256_AVX2_RORX, SHA256_SSE2, SHA256_C } + sha_method = SHA256_UNSET; + + static void Sha256_SetTransform(void) + { + + if (sha_method != SHA256_UNSET) + return; + + intel_flags = cpuid_get_flags(); + + if (IS_INTEL_SHA(intel_flags)) { + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + sha_method = SHA256_AVX1; + Transform_Sha256_is_vectorized = 1; + } + else + #endif + { + sha_method = SHA256_SSE2; + Transform_Sha256_is_vectorized = 1; + } + } + else + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + #ifdef HAVE_INTEL_RORX + if (IS_INTEL_BMI2(intel_flags)) { + sha_method = SHA256_AVX2_RORX; + Transform_Sha256_is_vectorized = 1; + } + else + #endif + { + sha_method = SHA256_AVX2; + Transform_Sha256_is_vectorized = 1; + } + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + #ifdef HAVE_INTEL_RORX + if (IS_INTEL_BMI2(intel_flags)) { + sha_method = SHA256_AVX1_RORX; + Transform_Sha256_is_vectorized = 1; + } + else + #endif + { + sha_method = SHA256_AVX1; + Transform_Sha256_is_vectorized = 1; + } + } + else + #endif + { + sha_method = SHA256_C; + Transform_Sha256_is_vectorized = 0; + } + } + + static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) { + int ret; + if (sha_method == SHA256_C) + return Transform_Sha256(S, D); + SAVE_VECTOR_REGISTERS(return _svr_ret;); + switch (sha_method) { + case SHA256_AVX2: + ret = Transform_Sha256_AVX2(S, D); + break; + case SHA256_AVX2_RORX: + ret = Transform_Sha256_AVX2_RORX(S, D); + break; + case SHA256_AVX1: + ret = Transform_Sha256_AVX1_Sha(S, D); + break; + case SHA256_AVX1_RORX: + ret = Transform_Sha256_AVX1_RORX(S, D); + break; + case SHA256_SSE2: + ret = Transform_Sha256_SSE2_Sha(S, D); + break; + case SHA256_C: + case SHA256_UNSET: + default: + ret = Transform_Sha256(S, D); + break; + } + RESTORE_VECTOR_REGISTERS(); + return ret; + } +#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__) + + static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + switch (sha_method) { + case SHA256_AVX2: + ret = Transform_Sha256_AVX2_Len(S, D, L); + break; + case SHA256_AVX2_RORX: + ret = Transform_Sha256_AVX2_RORX_Len(S, D, L); + break; + case SHA256_AVX1: + ret = Transform_Sha256_AVX1_Sha_Len(S, D, L); + break; + case SHA256_AVX1_RORX: + ret = Transform_Sha256_AVX1_RORX_Len(S, D, L); + break; + case SHA256_SSE2: + ret = Transform_Sha256_SSE2_Sha_Len(S, D, L); + break; + case SHA256_C: + case SHA256_UNSET: + default: + ret = 0; + break; + } + RESTORE_VECTOR_REGISTERS(); + return ret; + } +#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__) + +#else /* !WC_NO_INTERNAL_FUNCTION_POINTERS */ + static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data); /* = _Transform_Sha256 */ static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data, word32 len); /* = NULL */ static int transform_check = 0; - static word32 intel_flags; - static int Transform_Sha256_is_vectorized = 0; static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) { int ret; +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha256_is_vectorized) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif ret = (*Transform_Sha256_p)(S, D); +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha256_is_vectorized) + RESTORE_VECTOR_REGISTERS(); +#endif return ret; } #define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__) static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) { int ret; +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha256_is_vectorized) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif ret = (*Transform_Sha256_Len_p)(S, D, L); +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha256_is_vectorized) + RESTORE_VECTOR_REGISTERS(); +#endif return ret; } #define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__) @@ -463,6 +610,8 @@ static int InitSha256(wc_Sha256* sha256) transform_check = 1; } +#endif /* !WC_NO_INTERNAL_FUNCTION_POINTERS */ + #if !defined(WOLFSSL_KCAPI_HASH) int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) { @@ -1162,7 +1311,13 @@ static int InitSha256(wc_Sha256* sha256) #ifdef XTRANSFORM_LEN #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) + + #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS + if (sha_method != SHA256_C) + #else if (Transform_Sha256_Len_p != NULL) + #endif + #endif { if (len >= WC_SHA256_BLOCK_SIZE) { diff --git a/wolfcrypt/src/sha3.c b/wolfcrypt/src/sha3.c index f639dabb19..30732d501a 100644 --- a/wolfcrypt/src/sha3.c +++ b/wolfcrypt/src/sha3.c @@ -651,6 +651,10 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) word32 i; word32 blocks; +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif if (sha3->i > 0) { byte *t; byte l = (byte)(p * 8 - sha3->i); @@ -699,6 +703,10 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p) len -= p * 8; data += p * 8; } +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + RESTORE_VECTOR_REGISTERS(); +#endif XMEMCPY(sha3->t, data, len); sha3->i += (byte)len; @@ -732,6 +740,12 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l) for (i = 0; i < p; i++) { sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i); } + +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif + for (j = 0; l - j >= rate; j += rate) { #ifdef USE_INTEL_SPEEDUP (*sha3_block)(sha3->s); @@ -755,6 +769,11 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l) #endif XMEMCPY(hash + j, sha3->s, l - j); } +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + RESTORE_VECTOR_REGISTERS(); +#endif + return 0; } @@ -1328,6 +1347,10 @@ int wc_Shake128_Absorb(wc_Shake* shake, const byte* data, word32 len) */ int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) { +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif for (; (blockCnt > 0); blockCnt--) { #ifdef USE_INTEL_SPEEDUP (*sha3_block)(shake->s); @@ -1341,6 +1364,10 @@ int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) #endif out += WC_SHA3_128_COUNT * 8; } +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + RESTORE_VECTOR_REGISTERS(); +#endif return 0; } @@ -1458,6 +1485,10 @@ int wc_Shake256_Absorb(wc_Shake* shake, const byte* data, word32 len) */ int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) { +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif for (; (blockCnt > 0); blockCnt--) { #ifdef USE_INTEL_SPEEDUP (*sha3_block)(shake->s); @@ -1471,6 +1502,10 @@ int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt) #endif out += WC_SHA3_256_COUNT * 8; } +#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP) + if (sha3_block == sha3_block_avx2) + RESTORE_VECTOR_REGISTERS(); +#endif return 0; } diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index e4509c038d..50a5bb47ec 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -426,20 +426,147 @@ static int InitSha512_256(wc_Sha512* sha512) #endif static int _Transform_Sha512(wc_Sha512 *sha512); + static word32 intel_flags; + static int Transform_Sha512_is_vectorized = 0; + +#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS + + static enum { SHA512_UNSET, SHA512_AVX1, SHA512_AVX2, SHA512_AVX1_RORX, + SHA512_AVX2_RORX, SHA512_C } + sha_method = SHA512_UNSET; + + static void Sha512_SetTransform(void) + { + + if (sha_method != SHA512_UNSET) + return; + + intel_flags = cpuid_get_flags(); + + #if defined(HAVE_INTEL_AVX2) + if (IS_INTEL_AVX2(intel_flags)) { + #ifdef HAVE_INTEL_RORX + if (IS_INTEL_BMI2(intel_flags)) { + sha_method = SHA512_AVX2_RORX; + Transform_Sha512_is_vectorized = 1; + } + else + #endif + { + sha_method = SHA512_AVX2; + Transform_Sha512_is_vectorized = 1; + } + } + else + #endif + #if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + #ifdef HAVE_INTEL_RORX + if (IS_INTEL_BMI2(intel_flags)) { + sha_method = SHA512_AVX1_RORX; + Transform_Sha512_is_vectorized = 1; + } + else + #endif + { + sha_method = SHA512_AVX1; + Transform_Sha512_is_vectorized = 1; + } + } + else + #endif + { + sha_method = SHA512_C; + Transform_Sha512_is_vectorized = 0; + } + } + + static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) { + int ret; + if (sha_method == SHA512_C) + return _Transform_Sha512(sha512); + SAVE_VECTOR_REGISTERS(return _svr_ret;); + switch (sha_method) { + case SHA512_AVX2: + ret = Transform_Sha512_AVX2(sha512); + break; + case SHA512_AVX2_RORX: + ret = Transform_Sha512_AVX2_RORX(sha512); + break; + case SHA512_AVX1: + ret = Transform_Sha512_AVX1(sha512); + break; + case SHA512_AVX1_RORX: + ret = Transform_Sha512_AVX1_RORX(sha512); + break; + case SHA512_C: + case SHA512_UNSET: + default: + ret = _Transform_Sha512(sha512); + break; + } + RESTORE_VECTOR_REGISTERS(); + return ret; + } +#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__) + + static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, word32 len) { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + switch (sha_method) { + case SHA512_AVX2: + ret = Transform_Sha512_AVX2_Len(sha512, len); + break; + case SHA512_AVX2_RORX: + ret = Transform_Sha512_AVX2_RORX_Len(sha512, len); + break; + case SHA512_AVX1: + ret = Transform_Sha512_AVX1_Len(sha512, len); + break; + case SHA512_AVX1_RORX: + ret = Transform_Sha512_AVX1_RORX_Len(sha512, len); + break; + case SHA512_C: + case SHA512_UNSET: + default: + ret = 0; + break; + } + RESTORE_VECTOR_REGISTERS(); + return ret; + } +#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__) + +#else /* !WC_NO_INTERNAL_FUNCTION_POINTERS */ + static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512; static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL; static int transform_check = 0; - static word32 intel_flags; - static int Transform_Sha512_is_vectorized = 0; static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) { int ret; +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha512_is_vectorized) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif ret = (*Transform_Sha512_p)(sha512); +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha512_is_vectorized) + RESTORE_VECTOR_REGISTERS(); +#endif return ret; } static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, word32 len) { int ret; +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha512_is_vectorized) + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#endif ret = (*Transform_Sha512_Len_p)(sha512, len); +#ifdef WOLFSSL_LINUXKM + if (Transform_Sha512_is_vectorized) + RESTORE_VECTOR_REGISTERS(); +#endif return ret; } @@ -495,6 +622,8 @@ static int InitSha512_256(wc_Sha512* sha512) transform_check = 1; } +#endif /* !WC_NO_INTERNAL_FUNCTION_POINTERS */ + #else #define Transform_Sha512(sha512) _Transform_Sha512(sha512) @@ -804,7 +933,13 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) - if (Transform_Sha512_Len_p != NULL) { + + #ifdef WC_NO_INTERNAL_FUNCTION_POINTERS + if (sha_method != SHA512_C) + #else + if (Transform_Sha512_Len_p != NULL) + #endif + { word32 blocksLen = len & ~((word32)WC_SHA512_BLOCK_SIZE-1); if (blocksLen > 0) { diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index 063ec42304..0a26a63d7a 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -2896,6 +2896,9 @@ extern void uITRON4_free(void *p) ; #ifndef WOLFSSL_SP_DIV_WORD_HALF #define WOLFSSL_SP_DIV_WORD_HALF #endif + #ifdef __PIE__ + #define WC_NO_INTERNAL_FUNCTION_POINTERS + #endif #endif