diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index 72c423d911..5b0da2fe38 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -808,8 +808,7 @@ AES_set_encrypt_key: cmp r1, #0xc0 beq L_AES_set_encrypt_key_start_192 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -1038,8 +1037,7 @@ L_AES_set_encrypt_key_loop_256: b L_AES_set_encrypt_key_end L_AES_set_encrypt_key_start_192: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -1203,8 +1201,7 @@ L_AES_set_encrypt_key_loop_192: b L_AES_set_encrypt_key_end L_AES_set_encrypt_key_start_128: #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -3952,8 +3949,7 @@ L_AES_CBC_decrypt_loop_block_256: ldr r7, [lr, #12] ldr lr, [sp, #16] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [lr] - str r5, [lr, #4] + stm lr, {r4, r5} #else strd r4, r5, [lr] #endif @@ -4141,8 +4137,7 @@ L_AES_CBC_decrypt_loop_block_192: ldr r7, [lr, #12] ldr lr, [sp, #16] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [lr] - str r5, [lr, #4] + stm lr, {r4, r5} #else strd r4, r5, [lr] #endif @@ -4330,8 +4325,7 @@ L_AES_CBC_decrypt_loop_block_128: ldr r7, [lr, #12] ldr lr, [sp, #16] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [lr] - str r5, [lr, #4] + stm lr, {r4, r5} #else strd r4, r5, [lr] #endif @@ -4438,8 +4432,7 @@ L_AES_CBC_decrypt_end_odd: ldrd r10, r11, [r4, #24] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r8, [r4] - str r9, [r4, #4] + stm r4, {r8, r9} #else strd r8, r9, [r4] #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index d23ef95992..21dcb0d23a 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -441,8 +441,7 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, "cmp %[len], #0xc0\n\t" "beq L_AES_set_encrypt_key_start_192_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[key]]\n\t" - "ldr r5, [%[key], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[key]]\n\t" #endif @@ -673,8 +672,7 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, "\n" "L_AES_set_encrypt_key_start_192_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[key]]\n\t" - "ldr r5, [%[key], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[key]]\n\t" #endif @@ -840,8 +838,7 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, "\n" "L_AES_set_encrypt_key_start_128_%=: \n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[key]]\n\t" - "ldr r5, [%[key], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[key]]\n\t" #endif @@ -3465,8 +3462,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, "ldr r7, [lr, #12]\n\t" "ldr lr, [sp, #16]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [lr]\n\t" - "str r5, [lr, #4]\n\t" + "stm lr, {r4, r5}\n\t" #else "strd r4, r5, [lr]\n\t" #endif @@ -3655,8 +3651,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, "ldr r7, [lr, #12]\n\t" "ldr lr, [sp, #16]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [lr]\n\t" - "str r5, [lr, #4]\n\t" + "stm lr, {r4, r5}\n\t" #else "strd r4, r5, [lr]\n\t" #endif @@ -3845,8 +3840,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, "ldr r7, [lr, #12]\n\t" "ldr lr, [sp, #16]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [lr]\n\t" - "str r5, [lr, #4]\n\t" + "stm lr, {r4, r5}\n\t" #else "strd r4, r5, [lr]\n\t" #endif @@ -3954,8 +3948,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, "ldrd r10, r11, [r4, #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r8, [r4]\n\t" - "str r9, [r4, #4]\n\t" + "stm r4, {r8, r9}\n\t" #else "strd r8, r9, [r4]\n\t" #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519.S b/wolfcrypt/src/port/arm/armv8-32-curve25519.S index 2f33048c63..2411807510 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519.S @@ -51,14 +51,12 @@ fe_add_sub_op: push {lr} # Add-Sub #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r2] - ldr r5, [r2, #4] + ldm r2, {r4, r5} #else ldrd r4, r5, [r2] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r3] - ldr r7, [r3, #4] + ldm r3, {r6, r7} #else ldrd r6, r7, [r3] #endif @@ -68,8 +66,7 @@ fe_add_sub_op: adcs r9, r5, r7 adc r12, r12, #0 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r8, [r0] - str r9, [r0, #4] + stm r0, {r8, r9} #else strd r8, r9, [r0] #endif @@ -77,8 +74,7 @@ fe_add_sub_op: subs r10, r4, r6 sbcs r11, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r10, [r1] - str r11, [r1, #4] + stm r1, {r10, r11} #else strd r10, r11, [r1] #endif @@ -177,8 +173,7 @@ fe_add_sub_op: mul r12, r3, r12 # Add -x*modulus (if overflow) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -193,8 +188,7 @@ fe_add_sub_op: adcs r6, r6, #0 adcs r7, r7, #0 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -468,8 +462,7 @@ fe_copy: push {r4, r5, lr} # Copy #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [r1] - ldr r3, [r1, #4] + ldm r1, {r2, r3} #else ldrd r2, r3, [r1] #endif @@ -480,8 +473,7 @@ fe_copy: ldrd r4, r5, [r1, #8] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r2, [r0] - str r3, [r0, #4] + stm r0, {r2, r3} #else strd r2, r3, [r0] #endif @@ -629,19 +621,12 @@ fe_cmov_table: mov r7, #0 mov r8, #0 mov r9, #0 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #31 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -676,19 +661,12 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #30 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -723,19 +701,12 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #29 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -770,19 +741,12 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #28 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -817,19 +781,12 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #27 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -864,19 +821,12 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #26 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -911,19 +861,12 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #25 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -958,19 +901,12 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #24 ror r3, r3, r12 asr r3, r3, #31 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -1026,8 +962,7 @@ fe_cmov_table: and r11, r11, r12 eor r9, r9, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -1057,13 +992,7 @@ fe_cmov_table: mov r7, #0 mov r8, #0 mov r9, #0 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #31 ror r3, r3, r12 asr r3, r3, #31 @@ -1104,13 +1033,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #30 ror r3, r3, r12 asr r3, r3, #31 @@ -1151,13 +1074,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #29 ror r3, r3, r12 asr r3, r3, #31 @@ -1198,13 +1115,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #28 ror r3, r3, r12 asr r3, r3, #31 @@ -1245,13 +1156,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #27 ror r3, r3, r12 asr r3, r3, #31 @@ -1292,13 +1197,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #26 ror r3, r3, r12 asr r3, r3, #31 @@ -1339,13 +1238,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #25 ror r3, r3, r12 asr r3, r3, #31 @@ -1386,13 +1279,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #24 ror r3, r3, r12 asr r3, r3, #31 @@ -1486,13 +1373,7 @@ fe_cmov_table: mov r7, #0 mov r8, #0 mov r9, #0 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #31 ror r3, r3, r12 asr r3, r3, #31 @@ -1533,13 +1414,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #30 ror r3, r3, r12 asr r3, r3, #31 @@ -1580,13 +1455,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #29 ror r3, r3, r12 asr r3, r3, #31 @@ -1627,13 +1496,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #28 ror r3, r3, r12 asr r3, r3, #31 @@ -1674,13 +1537,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #27 ror r3, r3, r12 asr r3, r3, #31 @@ -1721,13 +1578,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #26 ror r3, r3, r12 asr r3, r3, #31 @@ -1768,13 +1619,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #25 ror r3, r3, r12 asr r3, r3, #31 @@ -1815,13 +1660,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #24 ror r3, r3, r12 asr r3, r3, #31 @@ -1915,13 +1754,7 @@ fe_cmov_table: mov r7, #0 mov r8, #0 mov r9, #0 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #31 ror r3, r3, r12 asr r3, r3, #31 @@ -1962,13 +1795,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #30 ror r3, r3, r12 asr r3, r3, #31 @@ -2009,13 +1836,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #29 ror r3, r3, r12 asr r3, r3, #31 @@ -2056,13 +1877,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #28 ror r3, r3, r12 asr r3, r3, #31 @@ -2103,13 +1918,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #27 ror r3, r3, r12 asr r3, r3, #31 @@ -2150,13 +1959,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #26 ror r3, r3, r12 asr r3, r3, #31 @@ -2197,13 +2000,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #25 ror r3, r3, r12 asr r3, r3, #31 @@ -2244,13 +2041,7 @@ fe_cmov_table: eor r8, r8, r10 eor r9, r9, r11 add r1, r1, #0x60 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x800000 - lsl r3, r3, #8 - add r3, r3, #0x0 -#else mov r3, #0x80000000 -#endif ror r3, r3, #24 ror r3, r3, r12 asr r3, r3, #31 @@ -3405,16 +3196,13 @@ fe_mul121666: # Multiply by 121666 ldm r1, {r2, r3, r4, r5, r6, r7, r8, r9} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #1 - lsl r10, r10, #8 - orr r10, r10, #0xdb - lsl r10, r10, #8 - orr r10, r10, #0x42 + mov r10, #0x42 + orr r10, r10, #0x10000 + orr r10, r10, #0xdb00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xdb - lsl r10, r10, #8 - add r10, r10, #0x42 + mov r10, #0x42 + orr r10, r10, #0xdb00 #else mov r10, #0xdb42 #endif @@ -3476,16 +3264,13 @@ fe_mul121666: # Multiply by 121666 ldm r1, {r2, r3, r4, r5, r6, r7, r8, r9} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #1 - lsl lr, lr, #8 - orr lr, lr, #0xdb - lsl lr, lr, #8 - orr lr, lr, #0x42 + mov lr, #0x42 + orr lr, lr, #0x10000 + orr lr, lr, #0xdb00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #0xdb - lsl lr, lr, #8 - add lr, lr, #0x42 + mov lr, #0x42 + orr lr, lr, #0xdb00 #else mov lr, #0xdb42 #endif @@ -5472,18 +5257,14 @@ sc_reduce: sub r0, r0, #28 # Add order times bits 504..511 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xa3 - lsl r10, r10, #8 - orr r10, r10, #10 - lsl r10, r10, #8 - orr r10, r10, #44 - lsl r10, r10, #8 - orr r10, r10, #19 + mov r10, #19 + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 + orr r10, r10, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x2c - lsl r10, r10, #8 - add r10, r10, #0x13 + mov r10, #0x13 + orr r10, r10, #0x2c00 #else mov r10, #0x2c13 #endif @@ -5495,18 +5276,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xa7 - lsl r11, r11, #8 - orr r11, r11, #0xed - lsl r11, r11, #8 - orr r11, r11, #0x9c - lsl r11, r11, #8 - orr r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 + orr r11, r11, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x9c - lsl r11, r11, #8 - add r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0x9c00 #else mov r11, #0x9ce5 #endif @@ -5524,18 +5301,14 @@ sc_reduce: adc r1, r1, #0 umlal r3, r1, r11, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5d - lsl r10, r10, #8 - orr r10, r10, #8 - lsl r10, r10, #8 - orr r10, r10, #0x63 - lsl r10, r10, #8 - orr r10, r10, #41 + mov r10, #41 + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 + orr r10, r10, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x63 - lsl r10, r10, #8 - add r10, r10, #0x29 + mov r10, #0x29 + orr r10, r10, #0x6300 #else mov r10, #0x6329 #endif @@ -5547,18 +5320,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xeb - lsl r11, r11, #8 - orr r11, r11, #33 - lsl r11, r11, #8 - orr r11, r11, #6 - lsl r11, r11, #8 - orr r11, r11, #33 + mov r11, #33 + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 + orr r11, r11, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x6 - lsl r11, r11, #8 - add r11, r11, #0x21 + mov r11, #0x21 + orr r11, r11, #0x600 #else mov r11, #0x621 #endif @@ -5588,18 +5357,14 @@ sc_reduce: # Sub product of top 8 words and order mov r12, sp #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -5656,18 +5421,14 @@ sc_reduce: sub r0, r0, #16 sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -5718,18 +5479,14 @@ sc_reduce: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -5780,18 +5537,14 @@ sc_reduce: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -5861,26 +5614,16 @@ sc_reduce: sub r12, r12, #36 asr lr, r11, #25 # Conditionally subtract order starting at bit 125 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa00000 - lsl r1, r1, #8 - add r1, r1, #0x0 -#else mov r1, #0xa0000000 -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0x4b - lsl r2, r2, #8 - orr r2, r2, #0x9e - lsl r2, r2, #8 - orr r2, r2, #0xba - lsl r2, r2, #8 - orr r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 + orr r2, r2, #0xba00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0xba - lsl r2, r2, #8 - add r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0xba00 #else mov r2, #0xba7d #endif @@ -5892,18 +5635,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0xcb - lsl r3, r3, #8 - orr r3, r3, #2 - lsl r3, r3, #8 - orr r3, r3, #0x4c - lsl r3, r3, #8 - orr r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 + orr r3, r3, #0x4c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x4c - lsl r3, r3, #8 - add r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0x4c00 #else mov r3, #0x4c63 #endif @@ -5915,18 +5654,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xd4 - lsl r4, r4, #8 - orr r4, r4, #0x5e - lsl r4, r4, #8 - orr r4, r4, #0xf3 - lsl r4, r4, #8 - orr r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 + orr r4, r4, #0xf300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xf3 - lsl r4, r4, #8 - add r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xf300 #else mov r4, #0xf39a #endif @@ -5938,18 +5673,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #2 - lsl r5, r5, #8 - orr r5, r5, #0x9b - lsl r5, r5, #8 - orr r5, r5, #0xdf - lsl r5, r5, #8 - orr r5, r5, #59 + mov r5, #59 + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 + orr r5, r5, #0xdf00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #0xdf - lsl r5, r5, #8 - add r5, r5, #0x3b + mov r5, #0x3b + orr r5, r5, #0xdf00 #else mov r5, #0xdf3b #endif @@ -5960,13 +5691,7 @@ sc_reduce: movt r5, #0x29b #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r9, #0x20000 - lsl r9, r9, #8 - add r9, r9, #0x0 -#else mov r9, #0x2000000 -#endif and r1, r1, lr and r2, r2, lr and r3, r3, lr @@ -6015,18 +5740,14 @@ sc_reduce: mov r0, sp # * -5cf5d3ed #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -6056,18 +5777,14 @@ sc_reduce: add r0, r0, #4 # * -5812631b #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -6097,18 +5814,14 @@ sc_reduce: add r0, r0, #4 # * -a2f79cd7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -6138,18 +5851,14 @@ sc_reduce: add r0, r0, #4 # * -14def9df #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -6197,18 +5906,14 @@ sc_reduce: sub r0, r0, #16 ldm r0, {r2, r3, r4, r5} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5c - lsl r10, r10, #8 - orr r10, r10, #0xf5 - lsl r10, r10, #8 - orr r10, r10, #0xd3 - lsl r10, r10, #8 - orr r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 + orr r10, r10, #0xd300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd3 - lsl r10, r10, #8 - add r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0xd300 #else mov r10, #0xd3ed #endif @@ -6220,18 +5925,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x58 - lsl r11, r11, #8 - orr r11, r11, #18 - lsl r11, r11, #8 - orr r11, r11, #0x63 - lsl r11, r11, #8 - orr r11, r11, #26 + mov r11, #26 + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 + orr r11, r11, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x63 - lsl r11, r11, #8 - add r11, r11, #0x1a + mov r11, #0x1a + orr r11, r11, #0x6300 #else mov r11, #0x631a #endif @@ -6243,18 +5944,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xa2 - lsl r12, r12, #8 - orr r12, r12, #0xf7 - lsl r12, r12, #8 - orr r12, r12, #0x9c - lsl r12, r12, #8 - orr r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 + orr r12, r12, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0x9c - lsl r12, r12, #8 - add r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0x9c00 #else mov r12, #0x9cd6 #endif @@ -6266,18 +5963,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #20 - lsl lr, lr, #8 - orr lr, lr, #0xde - lsl lr, lr, #8 - orr lr, lr, #0xf9 - lsl lr, lr, #8 - orr lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 + orr lr, lr, #0xf900 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #0xf9 - lsl lr, lr, #8 - add lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0xf900 #else mov lr, #0xf9de #endif @@ -6349,18 +6042,14 @@ sc_reduce: sub r0, r0, #28 # Add order times bits 504..511 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xa3 - lsl r10, r10, #8 - orr r10, r10, #10 - lsl r10, r10, #8 - orr r10, r10, #44 - lsl r10, r10, #8 - orr r10, r10, #19 + mov r10, #19 + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 + orr r10, r10, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x2c - lsl r10, r10, #8 - add r10, r10, #0x13 + mov r10, #0x13 + orr r10, r10, #0x2c00 #else mov r10, #0x2c13 #endif @@ -6372,18 +6061,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xa7 - lsl r11, r11, #8 - orr r11, r11, #0xed - lsl r11, r11, #8 - orr r11, r11, #0x9c - lsl r11, r11, #8 - orr r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 + orr r11, r11, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x9c - lsl r11, r11, #8 - add r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0x9c00 #else mov r11, #0x9ce5 #endif @@ -6398,18 +6083,14 @@ sc_reduce: umlal r2, r1, r10, lr umaal r3, r1, r11, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5d - lsl r10, r10, #8 - orr r10, r10, #8 - lsl r10, r10, #8 - orr r10, r10, #0x63 - lsl r10, r10, #8 - orr r10, r10, #41 + mov r10, #41 + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 + orr r10, r10, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x63 - lsl r10, r10, #8 - add r10, r10, #0x29 + mov r10, #0x29 + orr r10, r10, #0x6300 #else mov r10, #0x6329 #endif @@ -6421,18 +6102,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xeb - lsl r11, r11, #8 - orr r11, r11, #33 - lsl r11, r11, #8 - orr r11, r11, #6 - lsl r11, r11, #8 - orr r11, r11, #33 + mov r11, #33 + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 + orr r11, r11, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x6 - lsl r11, r11, #8 - add r11, r11, #0x21 + mov r11, #0x21 + orr r11, r11, #0x600 #else mov r11, #0x621 #endif @@ -6456,18 +6133,14 @@ sc_reduce: # Sub product of top 8 words and order mov r12, sp #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -6503,18 +6176,14 @@ sc_reduce: sub r0, r0, #16 sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -6544,18 +6213,14 @@ sc_reduce: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -6585,18 +6250,14 @@ sc_reduce: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -6645,26 +6306,16 @@ sc_reduce: sub r12, r12, #36 asr lr, r11, #25 # Conditionally subtract order starting at bit 125 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa00000 - lsl r1, r1, #8 - add r1, r1, #0x0 -#else mov r1, #0xa0000000 -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0x4b - lsl r2, r2, #8 - orr r2, r2, #0x9e - lsl r2, r2, #8 - orr r2, r2, #0xba - lsl r2, r2, #8 - orr r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 + orr r2, r2, #0xba00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0xba - lsl r2, r2, #8 - add r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0xba00 #else mov r2, #0xba7d #endif @@ -6676,18 +6327,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0xcb - lsl r3, r3, #8 - orr r3, r3, #2 - lsl r3, r3, #8 - orr r3, r3, #0x4c - lsl r3, r3, #8 - orr r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 + orr r3, r3, #0x4c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x4c - lsl r3, r3, #8 - add r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0x4c00 #else mov r3, #0x4c63 #endif @@ -6699,18 +6346,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xd4 - lsl r4, r4, #8 - orr r4, r4, #0x5e - lsl r4, r4, #8 - orr r4, r4, #0xf3 - lsl r4, r4, #8 - orr r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 + orr r4, r4, #0xf300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xf3 - lsl r4, r4, #8 - add r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xf300 #else mov r4, #0xf39a #endif @@ -6722,18 +6365,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #2 - lsl r5, r5, #8 - orr r5, r5, #0x9b - lsl r5, r5, #8 - orr r5, r5, #0xdf - lsl r5, r5, #8 - orr r5, r5, #59 + mov r5, #59 + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 + orr r5, r5, #0xdf00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #0xdf - lsl r5, r5, #8 - add r5, r5, #0x3b + mov r5, #0x3b + orr r5, r5, #0xdf00 #else mov r5, #0xdf3b #endif @@ -6744,13 +6383,7 @@ sc_reduce: movt r5, #0x29b #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r9, #0x20000 - lsl r9, r9, #8 - add r9, r9, #0x0 -#else mov r9, #0x2000000 -#endif and r1, r1, lr and r2, r2, lr and r3, r3, lr @@ -6799,18 +6432,14 @@ sc_reduce: mov r0, sp # * -5cf5d3ed #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -6831,18 +6460,14 @@ sc_reduce: add r0, r0, #4 # * -5812631b #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -6863,18 +6488,14 @@ sc_reduce: add r0, r0, #4 # * -a2f79cd7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -6895,18 +6516,14 @@ sc_reduce: add r0, r0, #4 # * -14def9df #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -6945,18 +6562,14 @@ sc_reduce: sub r0, r0, #16 ldm r0, {r2, r3, r4, r5} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5c - lsl r10, r10, #8 - orr r10, r10, #0xf5 - lsl r10, r10, #8 - orr r10, r10, #0xd3 - lsl r10, r10, #8 - orr r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 + orr r10, r10, #0xd300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd3 - lsl r10, r10, #8 - add r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0xd300 #else mov r10, #0xd3ed #endif @@ -6968,18 +6581,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x58 - lsl r11, r11, #8 - orr r11, r11, #18 - lsl r11, r11, #8 - orr r11, r11, #0x63 - lsl r11, r11, #8 - orr r11, r11, #26 + mov r11, #26 + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 + orr r11, r11, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x63 - lsl r11, r11, #8 - add r11, r11, #0x1a + mov r11, #0x1a + orr r11, r11, #0x6300 #else mov r11, #0x631a #endif @@ -6991,18 +6600,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xa2 - lsl r12, r12, #8 - orr r12, r12, #0xf7 - lsl r12, r12, #8 - orr r12, r12, #0x9c - lsl r12, r12, #8 - orr r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 + orr r12, r12, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0x9c - lsl r12, r12, #8 - add r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0x9c00 #else mov r12, #0x9cd6 #endif @@ -7014,18 +6619,14 @@ sc_reduce: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #20 - lsl lr, lr, #8 - orr lr, lr, #0xde - lsl lr, lr, #8 - orr lr, lr, #0xf9 - lsl lr, lr, #8 - orr lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 + orr lr, lr, #0xf900 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #0xf9 - lsl lr, lr, #8 - add lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0xf900 #else mov lr, #0xf9de #endif @@ -7449,18 +7050,14 @@ sc_muladd: #endif # Add order times bits 504..507 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xa3 - lsl r10, r10, #8 - orr r10, r10, #10 - lsl r10, r10, #8 - orr r10, r10, #44 - lsl r10, r10, #8 - orr r10, r10, #19 + mov r10, #19 + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 + orr r10, r10, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x2c - lsl r10, r10, #8 - add r10, r10, #0x13 + mov r10, #0x13 + orr r10, r10, #0x2c00 #else mov r10, #0x2c13 #endif @@ -7472,18 +7069,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xa7 - lsl r11, r11, #8 - orr r11, r11, #0xed - lsl r11, r11, #8 - orr r11, r11, #0x9c - lsl r11, r11, #8 - orr r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 + orr r11, r11, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x9c - lsl r11, r11, #8 - add r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0x9c00 #else mov r11, #0x9ce5 #endif @@ -7501,18 +7094,14 @@ sc_muladd: adc r1, r1, #0 umlal r3, r1, r11, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5d - lsl r10, r10, #8 - orr r10, r10, #8 - lsl r10, r10, #8 - orr r10, r10, #0x63 - lsl r10, r10, #8 - orr r10, r10, #41 + mov r10, #41 + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 + orr r10, r10, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x63 - lsl r10, r10, #8 - add r10, r10, #0x29 + mov r10, #0x29 + orr r10, r10, #0x6300 #else mov r10, #0x6329 #endif @@ -7524,18 +7113,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xeb - lsl r11, r11, #8 - orr r11, r11, #33 - lsl r11, r11, #8 - orr r11, r11, #6 - lsl r11, r11, #8 - orr r11, r11, #33 + mov r11, #33 + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 + orr r11, r11, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x6 - lsl r11, r11, #8 - add r11, r11, #0x21 + mov r11, #0x21 + orr r11, r11, #0x600 #else mov r11, #0x621 #endif @@ -7565,18 +7150,14 @@ sc_muladd: # Sub product of top 8 words and order mov r12, sp #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -7633,18 +7214,14 @@ sc_muladd: sub r0, r0, #16 sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -7695,18 +7272,14 @@ sc_muladd: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -7757,18 +7330,14 @@ sc_muladd: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -7838,26 +7407,16 @@ sc_muladd: sub r12, r12, #36 asr lr, r11, #25 # Conditionally subtract order starting at bit 125 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa00000 - lsl r1, r1, #8 - add r1, r1, #0x0 -#else mov r1, #0xa0000000 -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0x4b - lsl r2, r2, #8 - orr r2, r2, #0x9e - lsl r2, r2, #8 - orr r2, r2, #0xba - lsl r2, r2, #8 - orr r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 + orr r2, r2, #0xba00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0xba - lsl r2, r2, #8 - add r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0xba00 #else mov r2, #0xba7d #endif @@ -7869,18 +7428,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0xcb - lsl r3, r3, #8 - orr r3, r3, #2 - lsl r3, r3, #8 - orr r3, r3, #0x4c - lsl r3, r3, #8 - orr r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 + orr r3, r3, #0x4c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x4c - lsl r3, r3, #8 - add r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0x4c00 #else mov r3, #0x4c63 #endif @@ -7892,18 +7447,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xd4 - lsl r4, r4, #8 - orr r4, r4, #0x5e - lsl r4, r4, #8 - orr r4, r4, #0xf3 - lsl r4, r4, #8 - orr r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 + orr r4, r4, #0xf300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xf3 - lsl r4, r4, #8 - add r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xf300 #else mov r4, #0xf39a #endif @@ -7915,18 +7466,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #2 - lsl r5, r5, #8 - orr r5, r5, #0x9b - lsl r5, r5, #8 - orr r5, r5, #0xdf - lsl r5, r5, #8 - orr r5, r5, #59 + mov r5, #59 + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 + orr r5, r5, #0xdf00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #0xdf - lsl r5, r5, #8 - add r5, r5, #0x3b + mov r5, #0x3b + orr r5, r5, #0xdf00 #else mov r5, #0xdf3b #endif @@ -7937,13 +7484,7 @@ sc_muladd: movt r5, #0x29b #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r9, #0x20000 - lsl r9, r9, #8 - add r9, r9, #0x0 -#else mov r9, #0x2000000 -#endif and r1, r1, lr and r2, r2, lr and r3, r3, lr @@ -7992,18 +7533,14 @@ sc_muladd: mov r0, sp # * -5cf5d3ed #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -8033,18 +7570,14 @@ sc_muladd: add r0, r0, #4 # * -5812631b #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -8074,18 +7607,14 @@ sc_muladd: add r0, r0, #4 # * -a2f79cd7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -8115,18 +7644,14 @@ sc_muladd: add r0, r0, #4 # * -14def9df #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -8174,18 +7699,14 @@ sc_muladd: sub r0, r0, #16 ldm r0, {r2, r3, r4, r5} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5c - lsl r10, r10, #8 - orr r10, r10, #0xf5 - lsl r10, r10, #8 - orr r10, r10, #0xd3 - lsl r10, r10, #8 - orr r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 + orr r10, r10, #0xd300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd3 - lsl r10, r10, #8 - add r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0xd300 #else mov r10, #0xd3ed #endif @@ -8197,18 +7718,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x58 - lsl r11, r11, #8 - orr r11, r11, #18 - lsl r11, r11, #8 - orr r11, r11, #0x63 - lsl r11, r11, #8 - orr r11, r11, #26 + mov r11, #26 + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 + orr r11, r11, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x63 - lsl r11, r11, #8 - add r11, r11, #0x1a + mov r11, #0x1a + orr r11, r11, #0x6300 #else mov r11, #0x631a #endif @@ -8220,18 +7737,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xa2 - lsl r12, r12, #8 - orr r12, r12, #0xf7 - lsl r12, r12, #8 - orr r12, r12, #0x9c - lsl r12, r12, #8 - orr r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 + orr r12, r12, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0x9c - lsl r12, r12, #8 - add r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0x9c00 #else mov r12, #0x9cd6 #endif @@ -8243,18 +7756,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #20 - lsl lr, lr, #8 - orr lr, lr, #0xde - lsl lr, lr, #8 - orr lr, lr, #0xf9 - lsl lr, lr, #8 - orr lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 + orr lr, lr, #0xf900 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #0xf9 - lsl lr, lr, #8 - add lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0xf900 #else mov lr, #0xf9de #endif @@ -8456,18 +7965,14 @@ sc_muladd: #endif # Add order times bits 504..507 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xa3 - lsl r10, r10, #8 - orr r10, r10, #10 - lsl r10, r10, #8 - orr r10, r10, #44 - lsl r10, r10, #8 - orr r10, r10, #19 + mov r10, #19 + orr r10, r10, #0xa3000000 + orr r10, r10, #0xa0000 + orr r10, r10, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x2c - lsl r10, r10, #8 - add r10, r10, #0x13 + mov r10, #0x13 + orr r10, r10, #0x2c00 #else mov r10, #0x2c13 #endif @@ -8479,18 +7984,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xa7 - lsl r11, r11, #8 - orr r11, r11, #0xed - lsl r11, r11, #8 - orr r11, r11, #0x9c - lsl r11, r11, #8 - orr r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0xa7000000 + orr r11, r11, #0xed0000 + orr r11, r11, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x9c - lsl r11, r11, #8 - add r11, r11, #0xe5 + mov r11, #0xe5 + orr r11, r11, #0x9c00 #else mov r11, #0x9ce5 #endif @@ -8505,18 +8006,14 @@ sc_muladd: umlal r2, r1, r10, lr umaal r3, r1, r11, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5d - lsl r10, r10, #8 - orr r10, r10, #8 - lsl r10, r10, #8 - orr r10, r10, #0x63 - lsl r10, r10, #8 - orr r10, r10, #41 + mov r10, #41 + orr r10, r10, #0x5d000000 + orr r10, r10, #0x80000 + orr r10, r10, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x63 - lsl r10, r10, #8 - add r10, r10, #0x29 + mov r10, #0x29 + orr r10, r10, #0x6300 #else mov r10, #0x6329 #endif @@ -8528,18 +8025,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xeb - lsl r11, r11, #8 - orr r11, r11, #33 - lsl r11, r11, #8 - orr r11, r11, #6 - lsl r11, r11, #8 - orr r11, r11, #33 + mov r11, #33 + orr r11, r11, #0xeb000000 + orr r11, r11, #0x210000 + orr r11, r11, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x6 - lsl r11, r11, #8 - add r11, r11, #0x21 + mov r11, #0x21 + orr r11, r11, #0x600 #else mov r11, #0x621 #endif @@ -8563,18 +8056,14 @@ sc_muladd: # Sub product of top 8 words and order mov r12, sp #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -8610,18 +8099,14 @@ sc_muladd: sub r0, r0, #16 sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -8651,18 +8136,14 @@ sc_muladd: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -8692,18 +8173,14 @@ sc_muladd: stm r12!, {r10, r11, lr} sub r12, r12, #32 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -8752,26 +8229,16 @@ sc_muladd: sub r12, r12, #36 asr lr, r11, #25 # Conditionally subtract order starting at bit 125 -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa00000 - lsl r1, r1, #8 - add r1, r1, #0x0 -#else mov r1, #0xa0000000 -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0x4b - lsl r2, r2, #8 - orr r2, r2, #0x9e - lsl r2, r2, #8 - orr r2, r2, #0xba - lsl r2, r2, #8 - orr r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0x4b000000 + orr r2, r2, #0x9e0000 + orr r2, r2, #0xba00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r2, #0xba - lsl r2, r2, #8 - add r2, r2, #0x7d + mov r2, #0x7d + orr r2, r2, #0xba00 #else mov r2, #0xba7d #endif @@ -8783,18 +8250,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0xcb - lsl r3, r3, #8 - orr r3, r3, #2 - lsl r3, r3, #8 - orr r3, r3, #0x4c - lsl r3, r3, #8 - orr r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0xcb000000 + orr r3, r3, #0x20000 + orr r3, r3, #0x4c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r3, #0x4c - lsl r3, r3, #8 - add r3, r3, #0x63 + mov r3, #0x63 + orr r3, r3, #0x4c00 #else mov r3, #0x4c63 #endif @@ -8806,18 +8269,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xd4 - lsl r4, r4, #8 - orr r4, r4, #0x5e - lsl r4, r4, #8 - orr r4, r4, #0xf3 - lsl r4, r4, #8 - orr r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xd4000000 + orr r4, r4, #0x5e0000 + orr r4, r4, #0xf300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r4, #0xf3 - lsl r4, r4, #8 - add r4, r4, #0x9a + mov r4, #0x9a + orr r4, r4, #0xf300 #else mov r4, #0xf39a #endif @@ -8829,18 +8288,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #2 - lsl r5, r5, #8 - orr r5, r5, #0x9b - lsl r5, r5, #8 - orr r5, r5, #0xdf - lsl r5, r5, #8 - orr r5, r5, #59 + mov r5, #59 + orr r5, r5, #0x2000000 + orr r5, r5, #0x9b0000 + orr r5, r5, #0xdf00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r5, #0xdf - lsl r5, r5, #8 - add r5, r5, #0x3b + mov r5, #0x3b + orr r5, r5, #0xdf00 #else mov r5, #0xdf3b #endif @@ -8851,13 +8306,7 @@ sc_muladd: movt r5, #0x29b #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r9, #0x20000 - lsl r9, r9, #8 - add r9, r9, #0x0 -#else mov r9, #0x2000000 -#endif and r1, r1, lr and r2, r2, lr and r3, r3, lr @@ -8906,18 +8355,14 @@ sc_muladd: mov r0, sp # * -5cf5d3ed #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa3 - lsl r1, r1, #8 - orr r1, r1, #10 - lsl r1, r1, #8 - orr r1, r1, #44 - lsl r1, r1, #8 - orr r1, r1, #19 + mov r1, #19 + orr r1, r1, #0xa3000000 + orr r1, r1, #0xa0000 + orr r1, r1, #0x2c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x2c - lsl r1, r1, #8 - add r1, r1, #0x13 + mov r1, #0x13 + orr r1, r1, #0x2c00 #else mov r1, #0x2c13 #endif @@ -8938,18 +8383,14 @@ sc_muladd: add r0, r0, #4 # * -5812631b #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xa7 - lsl r1, r1, #8 - orr r1, r1, #0xed - lsl r1, r1, #8 - orr r1, r1, #0x9c - lsl r1, r1, #8 - orr r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0xa7000000 + orr r1, r1, #0xed0000 + orr r1, r1, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x9c - lsl r1, r1, #8 - add r1, r1, #0xe5 + mov r1, #0xe5 + orr r1, r1, #0x9c00 #else mov r1, #0x9ce5 #endif @@ -8970,18 +8411,14 @@ sc_muladd: add r0, r0, #4 # * -a2f79cd7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x5d - lsl r1, r1, #8 - orr r1, r1, #8 - lsl r1, r1, #8 - orr r1, r1, #0x63 - lsl r1, r1, #8 - orr r1, r1, #41 + mov r1, #41 + orr r1, r1, #0x5d000000 + orr r1, r1, #0x80000 + orr r1, r1, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x63 - lsl r1, r1, #8 - add r1, r1, #0x29 + mov r1, #0x29 + orr r1, r1, #0x6300 #else mov r1, #0x6329 #endif @@ -9002,18 +8439,14 @@ sc_muladd: add r0, r0, #4 # * -14def9df #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0xeb - lsl r1, r1, #8 - orr r1, r1, #33 - lsl r1, r1, #8 - orr r1, r1, #6 - lsl r1, r1, #8 - orr r1, r1, #33 + mov r1, #33 + orr r1, r1, #0xeb000000 + orr r1, r1, #0x210000 + orr r1, r1, #0x600 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x6 - lsl r1, r1, #8 - add r1, r1, #0x21 + mov r1, #0x21 + orr r1, r1, #0x600 #else mov r1, #0x621 #endif @@ -9052,18 +8485,14 @@ sc_muladd: sub r0, r0, #16 ldm r0, {r2, r3, r4, r5} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0x5c - lsl r10, r10, #8 - orr r10, r10, #0xf5 - lsl r10, r10, #8 - orr r10, r10, #0xd3 - lsl r10, r10, #8 - orr r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0x5c000000 + orr r10, r10, #0xf50000 + orr r10, r10, #0xd300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd3 - lsl r10, r10, #8 - add r10, r10, #0xed + mov r10, #0xed + orr r10, r10, #0xd300 #else mov r10, #0xd3ed #endif @@ -9075,18 +8504,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x58 - lsl r11, r11, #8 - orr r11, r11, #18 - lsl r11, r11, #8 - orr r11, r11, #0x63 - lsl r11, r11, #8 - orr r11, r11, #26 + mov r11, #26 + orr r11, r11, #0x58000000 + orr r11, r11, #0x120000 + orr r11, r11, #0x6300 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x63 - lsl r11, r11, #8 - add r11, r11, #0x1a + mov r11, #0x1a + orr r11, r11, #0x6300 #else mov r11, #0x631a #endif @@ -9098,18 +8523,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xa2 - lsl r12, r12, #8 - orr r12, r12, #0xf7 - lsl r12, r12, #8 - orr r12, r12, #0x9c - lsl r12, r12, #8 - orr r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0xa2000000 + orr r12, r12, #0xf70000 + orr r12, r12, #0x9c00 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0x9c - lsl r12, r12, #8 - add r12, r12, #0xd6 + mov r12, #0xd6 + orr r12, r12, #0x9c00 #else mov r12, #0x9cd6 #endif @@ -9121,18 +8542,14 @@ sc_muladd: #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #20 - lsl lr, lr, #8 - orr lr, lr, #0xde - lsl lr, lr, #8 - orr lr, lr, #0xf9 - lsl lr, lr, #8 - orr lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0x14000000 + orr lr, lr, #0xde0000 + orr lr, lr, #0xf900 #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #0xf9 - lsl lr, lr, #8 - add lr, lr, #0xde + mov lr, #0xde + orr lr, lr, #0xf900 #else mov lr, #0xf9de #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index cacf310983..40462097e1 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -76,14 +76,12 @@ void fe_add_sub_op() __asm__ __volatile__ ( /* Add-Sub */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [r2]\n\t" - "ldr r5, [r2, #4]\n\t" + "ldm r2, {r4, r5}\n\t" #else "ldrd r4, r5, [r2]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [r3]\n\t" - "ldr r7, [r3, #4]\n\t" + "ldm r3, {r6, r7}\n\t" #else "ldrd r6, r7, [r3]\n\t" #endif @@ -93,8 +91,7 @@ void fe_add_sub_op() "adcs r9, r5, r7\n\t" "adc r12, r12, #0\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r8, [r0]\n\t" - "str r9, [r0, #4]\n\t" + "stm r0, {r8, r9}\n\t" #else "strd r8, r9, [r0]\n\t" #endif @@ -102,8 +99,7 @@ void fe_add_sub_op() "subs r10, r4, r6\n\t" "sbcs r11, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r10, [r1]\n\t" - "str r11, [r1, #4]\n\t" + "stm r1, {r10, r11}\n\t" #else "strd r10, r11, [r1]\n\t" #endif @@ -202,8 +198,7 @@ void fe_add_sub_op() "mul r12, r3, r12\n\t" /* Add -x*modulus (if overflow) */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [r0]\n\t" - "ldr r5, [r0, #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [r0]\n\t" #endif @@ -218,8 +213,7 @@ void fe_add_sub_op() "adcs r6, r6, #0\n\t" "adcs r7, r7, #0\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [r0]\n\t" - "str r5, [r0, #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [r0]\n\t" #endif @@ -527,8 +521,7 @@ void fe_copy(fe r_p, const fe a_p) __asm__ __volatile__ ( /* Copy */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" + "ldm r1, {r2, r3}\n\t" #else "ldrd r2, r3, [%[a]]\n\t" #endif @@ -539,8 +532,7 @@ void fe_copy(fe r_p, const fe a_p) "ldrd r4, r5, [%[a], #8]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r2, [%[r]]\n\t" - "str r3, [%[r], #4]\n\t" + "stm r0, {r2, r3}\n\t" #else "strd r2, r3, [%[r]]\n\t" #endif @@ -706,19 +698,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r9, #0\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #31\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -753,19 +738,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #30\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -800,19 +778,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #29\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -847,19 +818,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #28\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -894,19 +858,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #27\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -941,19 +898,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #26\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -988,19 +938,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #25\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -1035,19 +978,12 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #24\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [%[base]]\n\t" - "ldr r11, [%[base], #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [%[base]]\n\t" #endif @@ -1103,8 +1039,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "and r11, r11, r12\n\t" "eor r9, r9, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[r]]\n\t" - "str r5, [%[r], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[r]]\n\t" #endif @@ -1134,13 +1069,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r9, #0\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #31\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1181,13 +1110,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #30\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1228,13 +1151,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #29\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1275,13 +1192,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #28\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1322,13 +1233,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #27\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1369,13 +1274,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #26\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1416,13 +1315,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #25\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1463,13 +1356,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #24\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1563,13 +1450,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r9, #0\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #31\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1610,13 +1491,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #30\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1657,13 +1532,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #29\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1704,13 +1573,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #28\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1751,13 +1614,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #27\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1798,13 +1655,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #26\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1845,13 +1696,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #25\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1892,13 +1737,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #24\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -1992,13 +1831,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r9, #0\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #31\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -2039,13 +1872,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #30\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -2086,13 +1913,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #29\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -2133,13 +1954,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #28\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -2180,13 +1995,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #27\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -2227,13 +2036,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #26\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -2274,13 +2077,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #25\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -2321,13 +2118,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "eor r8, r8, r10\n\t" "eor r9, r9, r11\n\t" "add %[base], %[base], #0x60\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x800000\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x0\n\t" -#else "mov r3, #0x80000000\n\t" -#endif "ror r3, r3, #24\n\t" "ror r3, r3, r12\n\t" "asr r3, r3, #31\n\t" @@ -3512,16 +3303,13 @@ void fe_mul121666(fe r_p, fe a_p) /* Multiply by 121666 */ "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #1\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xdb\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0x42\n\t" + "mov r10, #0x42\n\t" + "orr r10, r10, #0x10000\n\t" + "orr r10, r10, #0xdb00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xdb\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x42\n\t" + "mov r10, #0x42\n\t" + "orr r10, r10, #0xdb00\n\t" #else "mov r10, #0xdb42\n\t" #endif @@ -3588,16 +3376,13 @@ void fe_mul121666(fe r_p, fe a_p) /* Multiply by 121666 */ "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #1\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xdb\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0x42\n\t" + "mov lr, #0x42\n\t" + "orr lr, lr, #0x10000\n\t" + "orr lr, lr, #0xdb00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #0xdb\n\t" - "lsl lr, lr, #8\n\t" - "add lr, lr, #0x42\n\t" + "mov lr, #0x42\n\t" + "orr lr, lr, #0xdb00\n\t" #else "mov lr, #0xdb42\n\t" #endif @@ -5694,18 +5479,14 @@ void sc_reduce(byte* s_p) "sub %[s], %[s], #28\n\t" /* Add order times bits 504..511 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xa3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #10\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #44\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #19\n\t" + "mov r10, #19\n\t" + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" + "orr r10, r10, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x2c\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x13\n\t" + "mov r10, #0x13\n\t" + "orr r10, r10, #0x2c00\n\t" #else "mov r10, #0x2c13\n\t" #endif @@ -5717,18 +5498,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xa7\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xed\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" + "orr r11, r11, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0x9c00\n\t" #else "mov r11, #0x9ce5\n\t" #endif @@ -5746,18 +5523,14 @@ void sc_reduce(byte* s_p) "adc r1, r1, #0\n\t" "umlal r3, r1, r11, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5d\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #8\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #41\n\t" + "mov r10, #41\n\t" + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" + "orr r10, r10, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x29\n\t" + "mov r10, #0x29\n\t" + "orr r10, r10, #0x6300\n\t" #else "mov r10, #0x6329\n\t" #endif @@ -5769,18 +5542,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xeb\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #6\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" + "mov r11, #33\n\t" + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" + "orr r11, r11, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x6\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x21\n\t" + "mov r11, #0x21\n\t" + "orr r11, r11, #0x600\n\t" #else "mov r11, #0x621\n\t" #endif @@ -5810,18 +5579,14 @@ void sc_reduce(byte* s_p) /* Sub product of top 8 words and order */ "mov r12, sp\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa3\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #10\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #44\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #19\n\t" + "mov r1, #19\n\t" + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" + "orr r1, r1, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x2c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x13\n\t" + "mov r1, #0x13\n\t" + "orr r1, r1, #0x2c00\n\t" #else "mov r1, #0x2c13\n\t" #endif @@ -5878,18 +5643,14 @@ void sc_reduce(byte* s_p) "sub %[s], %[s], #16\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa7\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xed\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" + "orr r1, r1, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0x9c00\n\t" #else "mov r1, #0x9ce5\n\t" #endif @@ -5940,18 +5701,14 @@ void sc_reduce(byte* s_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x5d\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #8\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #41\n\t" + "mov r1, #41\n\t" + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" + "orr r1, r1, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x29\n\t" + "mov r1, #0x29\n\t" + "orr r1, r1, #0x6300\n\t" #else "mov r1, #0x6329\n\t" #endif @@ -6002,18 +5759,14 @@ void sc_reduce(byte* s_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xeb\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #6\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" + "mov r1, #33\n\t" + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" + "orr r1, r1, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x6\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x21\n\t" + "mov r1, #0x21\n\t" + "orr r1, r1, #0x600\n\t" #else "mov r1, #0x621\n\t" #endif @@ -6083,26 +5836,16 @@ void sc_reduce(byte* s_p) "sub r12, r12, #36\n\t" "asr lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa00000\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x0\n\t" -#else "mov r1, #0xa0000000\n\t" -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x4b\n\t" - "lsl r2, r2, #8\n\t" - "orr r2, r2, #0x9e\n\t" - "lsl r2, r2, #8\n\t" - "orr r2, r2, #0xba\n\t" - "lsl r2, r2, #8\n\t" - "orr r2, r2, #0x7d\n\t" + "mov r2, #0x7d\n\t" + "orr r2, r2, #0x4b000000\n\t" + "orr r2, r2, #0x9e0000\n\t" + "orr r2, r2, #0xba00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0xba\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x7d\n\t" + "mov r2, #0x7d\n\t" + "orr r2, r2, #0xba00\n\t" #else "mov r2, #0xba7d\n\t" #endif @@ -6114,18 +5857,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0xcb\n\t" - "lsl r3, r3, #8\n\t" - "orr r3, r3, #2\n\t" - "lsl r3, r3, #8\n\t" - "orr r3, r3, #0x4c\n\t" - "lsl r3, r3, #8\n\t" - "orr r3, r3, #0x63\n\t" + "mov r3, #0x63\n\t" + "orr r3, r3, #0xcb000000\n\t" + "orr r3, r3, #0x20000\n\t" + "orr r3, r3, #0x4c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x4c\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x63\n\t" + "mov r3, #0x63\n\t" + "orr r3, r3, #0x4c00\n\t" #else "mov r3, #0x4c63\n\t" #endif @@ -6137,18 +5876,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xd4\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x5e\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" + "orr r4, r4, #0xf300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xf300\n\t" #else "mov r4, #0xf39a\n\t" #endif @@ -6160,18 +5895,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #2\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0x9b\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #59\n\t" + "mov r5, #59\n\t" + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" + "orr r5, r5, #0xdf00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "add r5, r5, #0x3b\n\t" + "mov r5, #0x3b\n\t" + "orr r5, r5, #0xdf00\n\t" #else "mov r5, #0xdf3b\n\t" #endif @@ -6182,13 +5913,7 @@ void sc_reduce(byte* s_p) "movt r5, #0x29b\n\t" #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r9, #0x20000\n\t" - "lsl r9, r9, #8\n\t" - "add r9, r9, #0x0\n\t" -#else "mov r9, #0x2000000\n\t" -#endif "and r1, r1, lr\n\t" "and r2, r2, lr\n\t" "and r3, r3, lr\n\t" @@ -6237,18 +5962,14 @@ void sc_reduce(byte* s_p) "mov %[s], sp\n\t" /* * -5cf5d3ed */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa3\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #10\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #44\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #19\n\t" + "mov r1, #19\n\t" + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" + "orr r1, r1, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x2c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x13\n\t" + "mov r1, #0x13\n\t" + "orr r1, r1, #0x2c00\n\t" #else "mov r1, #0x2c13\n\t" #endif @@ -6278,18 +5999,14 @@ void sc_reduce(byte* s_p) "add %[s], %[s], #4\n\t" /* * -5812631b */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa7\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xed\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" + "orr r1, r1, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0x9c00\n\t" #else "mov r1, #0x9ce5\n\t" #endif @@ -6319,18 +6036,14 @@ void sc_reduce(byte* s_p) "add %[s], %[s], #4\n\t" /* * -a2f79cd7 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x5d\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #8\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #41\n\t" + "mov r1, #41\n\t" + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" + "orr r1, r1, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x29\n\t" + "mov r1, #0x29\n\t" + "orr r1, r1, #0x6300\n\t" #else "mov r1, #0x6329\n\t" #endif @@ -6360,18 +6073,14 @@ void sc_reduce(byte* s_p) "add %[s], %[s], #4\n\t" /* * -14def9df */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xeb\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #6\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" + "mov r1, #33\n\t" + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" + "orr r1, r1, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x6\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x21\n\t" + "mov r1, #0x21\n\t" + "orr r1, r1, #0x600\n\t" #else "mov r1, #0x621\n\t" #endif @@ -6419,18 +6128,14 @@ void sc_reduce(byte* s_p) "sub %[s], %[s], #16\n\t" "ldm %[s], {r2, r3, r4, r5}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5c\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xf5\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" + "orr r10, r10, #0xd300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0xd300\n\t" #else "mov r10, #0xd3ed\n\t" #endif @@ -6442,18 +6147,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x58\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #18\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #26\n\t" + "mov r11, #26\n\t" + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" + "orr r11, r11, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x1a\n\t" + "mov r11, #0x1a\n\t" + "orr r11, r11, #0x6300\n\t" #else "mov r11, #0x631a\n\t" #endif @@ -6465,18 +6166,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xa2\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xf7\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" + "orr r12, r12, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0x9c00\n\t" #else "mov r12, #0x9cd6\n\t" #endif @@ -6488,18 +6185,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #20\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" + "orr lr, lr, #0xf900\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "add lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0xf900\n\t" #else "mov lr, #0xf9de\n\t" #endif @@ -6575,18 +6268,14 @@ void sc_reduce(byte* s_p) "sub %[s], %[s], #28\n\t" /* Add order times bits 504..511 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xa3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #10\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #44\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #19\n\t" + "mov r10, #19\n\t" + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" + "orr r10, r10, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x2c\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x13\n\t" + "mov r10, #0x13\n\t" + "orr r10, r10, #0x2c00\n\t" #else "mov r10, #0x2c13\n\t" #endif @@ -6598,18 +6287,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xa7\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xed\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" + "orr r11, r11, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0x9c00\n\t" #else "mov r11, #0x9ce5\n\t" #endif @@ -6624,18 +6309,14 @@ void sc_reduce(byte* s_p) "umlal r2, r1, r10, lr\n\t" "umaal r3, r1, r11, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5d\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #8\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #41\n\t" + "mov r10, #41\n\t" + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" + "orr r10, r10, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x29\n\t" + "mov r10, #0x29\n\t" + "orr r10, r10, #0x6300\n\t" #else "mov r10, #0x6329\n\t" #endif @@ -6647,18 +6328,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xeb\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #6\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" + "mov r11, #33\n\t" + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" + "orr r11, r11, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x6\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x21\n\t" + "mov r11, #0x21\n\t" + "orr r11, r11, #0x600\n\t" #else "mov r11, #0x621\n\t" #endif @@ -6682,18 +6359,14 @@ void sc_reduce(byte* s_p) /* Sub product of top 8 words and order */ "mov r12, sp\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa3\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #10\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #44\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #19\n\t" + "mov r1, #19\n\t" + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" + "orr r1, r1, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x2c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x13\n\t" + "mov r1, #0x13\n\t" + "orr r1, r1, #0x2c00\n\t" #else "mov r1, #0x2c13\n\t" #endif @@ -6729,18 +6402,14 @@ void sc_reduce(byte* s_p) "sub %[s], %[s], #16\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa7\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xed\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" + "orr r1, r1, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0x9c00\n\t" #else "mov r1, #0x9ce5\n\t" #endif @@ -6770,18 +6439,14 @@ void sc_reduce(byte* s_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x5d\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #8\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #41\n\t" + "mov r1, #41\n\t" + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" + "orr r1, r1, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x29\n\t" + "mov r1, #0x29\n\t" + "orr r1, r1, #0x6300\n\t" #else "mov r1, #0x6329\n\t" #endif @@ -6811,18 +6476,14 @@ void sc_reduce(byte* s_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xeb\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #6\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" + "mov r1, #33\n\t" + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" + "orr r1, r1, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x6\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x21\n\t" + "mov r1, #0x21\n\t" + "orr r1, r1, #0x600\n\t" #else "mov r1, #0x621\n\t" #endif @@ -6871,26 +6532,16 @@ void sc_reduce(byte* s_p) "sub r12, r12, #36\n\t" "asr lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa00000\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x0\n\t" -#else "mov r1, #0xa0000000\n\t" -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x4b\n\t" - "lsl r2, r2, #8\n\t" - "orr r2, r2, #0x9e\n\t" - "lsl r2, r2, #8\n\t" - "orr r2, r2, #0xba\n\t" - "lsl r2, r2, #8\n\t" - "orr r2, r2, #0x7d\n\t" + "mov r2, #0x7d\n\t" + "orr r2, r2, #0x4b000000\n\t" + "orr r2, r2, #0x9e0000\n\t" + "orr r2, r2, #0xba00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0xba\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x7d\n\t" + "mov r2, #0x7d\n\t" + "orr r2, r2, #0xba00\n\t" #else "mov r2, #0xba7d\n\t" #endif @@ -6902,18 +6553,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0xcb\n\t" - "lsl r3, r3, #8\n\t" - "orr r3, r3, #2\n\t" - "lsl r3, r3, #8\n\t" - "orr r3, r3, #0x4c\n\t" - "lsl r3, r3, #8\n\t" - "orr r3, r3, #0x63\n\t" + "mov r3, #0x63\n\t" + "orr r3, r3, #0xcb000000\n\t" + "orr r3, r3, #0x20000\n\t" + "orr r3, r3, #0x4c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x4c\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0x63\n\t" + "mov r3, #0x63\n\t" + "orr r3, r3, #0x4c00\n\t" #else "mov r3, #0x4c63\n\t" #endif @@ -6925,18 +6572,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xd4\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x5e\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" + "orr r4, r4, #0xf300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xf300\n\t" #else "mov r4, #0xf39a\n\t" #endif @@ -6948,18 +6591,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #2\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0x9b\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #59\n\t" + "mov r5, #59\n\t" + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" + "orr r5, r5, #0xdf00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "add r5, r5, #0x3b\n\t" + "mov r5, #0x3b\n\t" + "orr r5, r5, #0xdf00\n\t" #else "mov r5, #0xdf3b\n\t" #endif @@ -6970,13 +6609,7 @@ void sc_reduce(byte* s_p) "movt r5, #0x29b\n\t" #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r9, #0x20000\n\t" - "lsl r9, r9, #8\n\t" - "add r9, r9, #0x0\n\t" -#else "mov r9, #0x2000000\n\t" -#endif "and r1, r1, lr\n\t" "and r2, r2, lr\n\t" "and r3, r3, lr\n\t" @@ -7025,18 +6658,14 @@ void sc_reduce(byte* s_p) "mov %[s], sp\n\t" /* * -5cf5d3ed */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa3\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #10\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #44\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #19\n\t" + "mov r1, #19\n\t" + "orr r1, r1, #0xa3000000\n\t" + "orr r1, r1, #0xa0000\n\t" + "orr r1, r1, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x2c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x13\n\t" + "mov r1, #0x13\n\t" + "orr r1, r1, #0x2c00\n\t" #else "mov r1, #0x2c13\n\t" #endif @@ -7057,18 +6686,14 @@ void sc_reduce(byte* s_p) "add %[s], %[s], #4\n\t" /* * -5812631b */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xa7\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xed\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0xa7000000\n\t" + "orr r1, r1, #0xed0000\n\t" + "orr r1, r1, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x9c\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0xe5\n\t" + "mov r1, #0xe5\n\t" + "orr r1, r1, #0x9c00\n\t" #else "mov r1, #0x9ce5\n\t" #endif @@ -7089,18 +6714,14 @@ void sc_reduce(byte* s_p) "add %[s], %[s], #4\n\t" /* * -a2f79cd7 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x5d\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #8\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #41\n\t" + "mov r1, #41\n\t" + "orr r1, r1, #0x5d000000\n\t" + "orr r1, r1, #0x80000\n\t" + "orr r1, r1, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x63\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x29\n\t" + "mov r1, #0x29\n\t" + "orr r1, r1, #0x6300\n\t" #else "mov r1, #0x6329\n\t" #endif @@ -7121,18 +6742,14 @@ void sc_reduce(byte* s_p) "add %[s], %[s], #4\n\t" /* * -14def9df */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0xeb\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #6\n\t" - "lsl r1, r1, #8\n\t" - "orr r1, r1, #33\n\t" + "mov r1, #33\n\t" + "orr r1, r1, #0xeb000000\n\t" + "orr r1, r1, #0x210000\n\t" + "orr r1, r1, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x6\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x21\n\t" + "mov r1, #0x21\n\t" + "orr r1, r1, #0x600\n\t" #else "mov r1, #0x621\n\t" #endif @@ -7171,18 +6788,14 @@ void sc_reduce(byte* s_p) "sub %[s], %[s], #16\n\t" "ldm %[s], {r2, r3, r4, r5}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5c\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xf5\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" + "orr r10, r10, #0xd300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0xd300\n\t" #else "mov r10, #0xd3ed\n\t" #endif @@ -7194,18 +6807,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x58\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #18\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #26\n\t" + "mov r11, #26\n\t" + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" + "orr r11, r11, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x1a\n\t" + "mov r11, #0x1a\n\t" + "orr r11, r11, #0x6300\n\t" #else "mov r11, #0x631a\n\t" #endif @@ -7217,18 +6826,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xa2\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xf7\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" + "orr r12, r12, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0x9c00\n\t" #else "mov r12, #0x9cd6\n\t" #endif @@ -7240,18 +6845,14 @@ void sc_reduce(byte* s_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #20\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" + "orr lr, lr, #0xf900\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "add lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0xf900\n\t" #else "mov lr, #0xf9de\n\t" #endif @@ -7682,18 +7283,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif /* Add order times bits 504..507 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xa3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #10\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #44\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #19\n\t" + "mov r10, #19\n\t" + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" + "orr r10, r10, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x2c\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x13\n\t" + "mov r10, #0x13\n\t" + "orr r10, r10, #0x2c00\n\t" #else "mov r10, #0x2c13\n\t" #endif @@ -7705,18 +7302,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xa7\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xed\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" + "orr r11, r11, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0x9c00\n\t" #else "mov r11, #0x9ce5\n\t" #endif @@ -7734,18 +7327,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "adc %[a], %[a], #0\n\t" "umlal %[c], %[a], r11, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5d\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #8\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #41\n\t" + "mov r10, #41\n\t" + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" + "orr r10, r10, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x29\n\t" + "mov r10, #0x29\n\t" + "orr r10, r10, #0x6300\n\t" #else "mov r10, #0x6329\n\t" #endif @@ -7757,18 +7346,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xeb\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #6\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" + "mov r11, #33\n\t" + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" + "orr r11, r11, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x6\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x21\n\t" + "mov r11, #0x21\n\t" + "orr r11, r11, #0x600\n\t" #else "mov r11, #0x621\n\t" #endif @@ -7798,18 +7383,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) /* Sub product of top 8 words and order */ "mov r12, sp\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa3\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #10\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #44\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #19\n\t" + "mov %[a], #19\n\t" + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x2c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x13\n\t" + "mov %[a], #0x13\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else "mov %[a], #0x2c13\n\t" #endif @@ -7866,18 +7447,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "sub %[s], %[s], #16\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa7\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xed\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else "mov %[a], #0x9ce5\n\t" #endif @@ -7928,18 +7505,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x5d\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #8\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #41\n\t" + "mov %[a], #41\n\t" + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" + "orr %[a], %[a], #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x29\n\t" + "mov %[a], #0x29\n\t" + "orr %[a], %[a], #0x6300\n\t" #else "mov %[a], #0x6329\n\t" #endif @@ -7990,18 +7563,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xeb\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #6\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" + "mov %[a], #33\n\t" + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" + "orr %[a], %[a], #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x6\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x21\n\t" + "mov %[a], #0x21\n\t" + "orr %[a], %[a], #0x600\n\t" #else "mov %[a], #0x621\n\t" #endif @@ -8071,26 +7640,16 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "sub r12, r12, #36\n\t" "asr lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa00000\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x0\n\t" -#else "mov %[a], #0xa0000000\n\t" -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[b], #0x4b\n\t" - "lsl %[b], %[b], #8\n\t" - "orr %[b], %[b], #0x9e\n\t" - "lsl %[b], %[b], #8\n\t" - "orr %[b], %[b], #0xba\n\t" - "lsl %[b], %[b], #8\n\t" - "orr %[b], %[b], #0x7d\n\t" + "mov %[b], #0x7d\n\t" + "orr %[b], %[b], #0x4b000000\n\t" + "orr %[b], %[b], #0x9e0000\n\t" + "orr %[b], %[b], #0xba00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[b], #0xba\n\t" - "lsl %[b], %[b], #8\n\t" - "add %[b], %[b], #0x7d\n\t" + "mov %[b], #0x7d\n\t" + "orr %[b], %[b], #0xba00\n\t" #else "mov %[b], #0xba7d\n\t" #endif @@ -8102,18 +7661,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[c], #0xcb\n\t" - "lsl %[c], %[c], #8\n\t" - "orr %[c], %[c], #2\n\t" - "lsl %[c], %[c], #8\n\t" - "orr %[c], %[c], #0x4c\n\t" - "lsl %[c], %[c], #8\n\t" - "orr %[c], %[c], #0x63\n\t" + "mov %[c], #0x63\n\t" + "orr %[c], %[c], #0xcb000000\n\t" + "orr %[c], %[c], #0x20000\n\t" + "orr %[c], %[c], #0x4c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[c], #0x4c\n\t" - "lsl %[c], %[c], #8\n\t" - "add %[c], %[c], #0x63\n\t" + "mov %[c], #0x63\n\t" + "orr %[c], %[c], #0x4c00\n\t" #else "mov %[c], #0x4c63\n\t" #endif @@ -8125,18 +7680,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xd4\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x5e\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" + "orr r4, r4, #0xf300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xf300\n\t" #else "mov r4, #0xf39a\n\t" #endif @@ -8148,18 +7699,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #2\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0x9b\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #59\n\t" + "mov r5, #59\n\t" + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" + "orr r5, r5, #0xdf00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "add r5, r5, #0x3b\n\t" + "mov r5, #0x3b\n\t" + "orr r5, r5, #0xdf00\n\t" #else "mov r5, #0xdf3b\n\t" #endif @@ -8170,13 +7717,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "movt r5, #0x29b\n\t" #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r9, #0x20000\n\t" - "lsl r9, r9, #8\n\t" - "add r9, r9, #0x0\n\t" -#else "mov r9, #0x2000000\n\t" -#endif "and %[a], %[a], lr\n\t" "and %[b], %[b], lr\n\t" "and %[c], %[c], lr\n\t" @@ -8225,18 +7766,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "mov %[s], sp\n\t" /* * -5cf5d3ed */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa3\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #10\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #44\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #19\n\t" + "mov %[a], #19\n\t" + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x2c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x13\n\t" + "mov %[a], #0x13\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else "mov %[a], #0x2c13\n\t" #endif @@ -8266,18 +7803,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "add %[s], %[s], #4\n\t" /* * -5812631b */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa7\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xed\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else "mov %[a], #0x9ce5\n\t" #endif @@ -8307,18 +7840,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "add %[s], %[s], #4\n\t" /* * -a2f79cd7 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x5d\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #8\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #41\n\t" + "mov %[a], #41\n\t" + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" + "orr %[a], %[a], #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x29\n\t" + "mov %[a], #0x29\n\t" + "orr %[a], %[a], #0x6300\n\t" #else "mov %[a], #0x6329\n\t" #endif @@ -8348,18 +7877,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "add %[s], %[s], #4\n\t" /* * -14def9df */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xeb\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #6\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" + "mov %[a], #33\n\t" + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" + "orr %[a], %[a], #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x6\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x21\n\t" + "mov %[a], #0x21\n\t" + "orr %[a], %[a], #0x600\n\t" #else "mov %[a], #0x621\n\t" #endif @@ -8407,18 +7932,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "sub %[s], %[s], #16\n\t" "ldm %[s], {r2, r3, r4, r5}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5c\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xf5\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" + "orr r10, r10, #0xd300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0xd300\n\t" #else "mov r10, #0xd3ed\n\t" #endif @@ -8430,18 +7951,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x58\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #18\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #26\n\t" + "mov r11, #26\n\t" + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" + "orr r11, r11, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x1a\n\t" + "mov r11, #0x1a\n\t" + "orr r11, r11, #0x6300\n\t" #else "mov r11, #0x631a\n\t" #endif @@ -8453,18 +7970,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xa2\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xf7\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" + "orr r12, r12, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0x9c00\n\t" #else "mov r12, #0x9cd6\n\t" #endif @@ -8476,18 +7989,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #20\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" + "orr lr, lr, #0xf900\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "add lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0xf900\n\t" #else "mov lr, #0xf9de\n\t" #endif @@ -8696,18 +8205,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif /* Add order times bits 504..507 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xa3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #10\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #44\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #19\n\t" + "mov r10, #19\n\t" + "orr r10, r10, #0xa3000000\n\t" + "orr r10, r10, #0xa0000\n\t" + "orr r10, r10, #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x2c\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x13\n\t" + "mov r10, #0x13\n\t" + "orr r10, r10, #0x2c00\n\t" #else "mov r10, #0x2c13\n\t" #endif @@ -8719,18 +8224,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xa7\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xed\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0xa7000000\n\t" + "orr r11, r11, #0xed0000\n\t" + "orr r11, r11, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x9c\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xe5\n\t" + "mov r11, #0xe5\n\t" + "orr r11, r11, #0x9c00\n\t" #else "mov r11, #0x9ce5\n\t" #endif @@ -8745,18 +8246,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "umlal %[b], %[a], r10, lr\n\t" "umaal %[c], %[a], r11, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5d\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #8\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #41\n\t" + "mov r10, #41\n\t" + "orr r10, r10, #0x5d000000\n\t" + "orr r10, r10, #0x80000\n\t" + "orr r10, r10, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x63\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x29\n\t" + "mov r10, #0x29\n\t" + "orr r10, r10, #0x6300\n\t" #else "mov r10, #0x6329\n\t" #endif @@ -8768,18 +8265,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xeb\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #6\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #33\n\t" + "mov r11, #33\n\t" + "orr r11, r11, #0xeb000000\n\t" + "orr r11, r11, #0x210000\n\t" + "orr r11, r11, #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x6\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x21\n\t" + "mov r11, #0x21\n\t" + "orr r11, r11, #0x600\n\t" #else "mov r11, #0x621\n\t" #endif @@ -8803,18 +8296,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) /* Sub product of top 8 words and order */ "mov r12, sp\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa3\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #10\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #44\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #19\n\t" + "mov %[a], #19\n\t" + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x2c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x13\n\t" + "mov %[a], #0x13\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else "mov %[a], #0x2c13\n\t" #endif @@ -8850,18 +8339,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "sub %[s], %[s], #16\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa7\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xed\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else "mov %[a], #0x9ce5\n\t" #endif @@ -8891,18 +8376,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x5d\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #8\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #41\n\t" + "mov %[a], #41\n\t" + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" + "orr %[a], %[a], #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x29\n\t" + "mov %[a], #0x29\n\t" + "orr %[a], %[a], #0x6300\n\t" #else "mov %[a], #0x6329\n\t" #endif @@ -8932,18 +8413,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "stm r12!, {r10, r11, lr}\n\t" "sub r12, r12, #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xeb\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #6\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" + "mov %[a], #33\n\t" + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" + "orr %[a], %[a], #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x6\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x21\n\t" + "mov %[a], #0x21\n\t" + "orr %[a], %[a], #0x600\n\t" #else "mov %[a], #0x621\n\t" #endif @@ -8992,26 +8469,16 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "sub r12, r12, #36\n\t" "asr lr, r11, #25\n\t" /* Conditionally subtract order starting at bit 125 */ -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa00000\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x0\n\t" -#else "mov %[a], #0xa0000000\n\t" -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[b], #0x4b\n\t" - "lsl %[b], %[b], #8\n\t" - "orr %[b], %[b], #0x9e\n\t" - "lsl %[b], %[b], #8\n\t" - "orr %[b], %[b], #0xba\n\t" - "lsl %[b], %[b], #8\n\t" - "orr %[b], %[b], #0x7d\n\t" + "mov %[b], #0x7d\n\t" + "orr %[b], %[b], #0x4b000000\n\t" + "orr %[b], %[b], #0x9e0000\n\t" + "orr %[b], %[b], #0xba00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[b], #0xba\n\t" - "lsl %[b], %[b], #8\n\t" - "add %[b], %[b], #0x7d\n\t" + "mov %[b], #0x7d\n\t" + "orr %[b], %[b], #0xba00\n\t" #else "mov %[b], #0xba7d\n\t" #endif @@ -9023,18 +8490,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[c], #0xcb\n\t" - "lsl %[c], %[c], #8\n\t" - "orr %[c], %[c], #2\n\t" - "lsl %[c], %[c], #8\n\t" - "orr %[c], %[c], #0x4c\n\t" - "lsl %[c], %[c], #8\n\t" - "orr %[c], %[c], #0x63\n\t" + "mov %[c], #0x63\n\t" + "orr %[c], %[c], #0xcb000000\n\t" + "orr %[c], %[c], #0x20000\n\t" + "orr %[c], %[c], #0x4c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[c], #0x4c\n\t" - "lsl %[c], %[c], #8\n\t" - "add %[c], %[c], #0x63\n\t" + "mov %[c], #0x63\n\t" + "orr %[c], %[c], #0x4c00\n\t" #else "mov %[c], #0x4c63\n\t" #endif @@ -9046,18 +8509,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xd4\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x5e\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "orr r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xd4000000\n\t" + "orr r4, r4, #0x5e0000\n\t" + "orr r4, r4, #0xf300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0xf3\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #0x9a\n\t" + "mov r4, #0x9a\n\t" + "orr r4, r4, #0xf300\n\t" #else "mov r4, #0xf39a\n\t" #endif @@ -9069,18 +8528,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #2\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0x9b\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "orr r5, r5, #59\n\t" + "mov r5, #59\n\t" + "orr r5, r5, #0x2000000\n\t" + "orr r5, r5, #0x9b0000\n\t" + "orr r5, r5, #0xdf00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r5, #0xdf\n\t" - "lsl r5, r5, #8\n\t" - "add r5, r5, #0x3b\n\t" + "mov r5, #0x3b\n\t" + "orr r5, r5, #0xdf00\n\t" #else "mov r5, #0xdf3b\n\t" #endif @@ -9091,13 +8546,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "movt r5, #0x29b\n\t" #endif #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r9, #0x20000\n\t" - "lsl r9, r9, #8\n\t" - "add r9, r9, #0x0\n\t" -#else "mov r9, #0x2000000\n\t" -#endif "and %[a], %[a], lr\n\t" "and %[b], %[b], lr\n\t" "and %[c], %[c], lr\n\t" @@ -9146,18 +8595,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "mov %[s], sp\n\t" /* * -5cf5d3ed */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa3\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #10\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #44\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #19\n\t" + "mov %[a], #19\n\t" + "orr %[a], %[a], #0xa3000000\n\t" + "orr %[a], %[a], #0xa0000\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x2c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x13\n\t" + "mov %[a], #0x13\n\t" + "orr %[a], %[a], #0x2c00\n\t" #else "mov %[a], #0x2c13\n\t" #endif @@ -9178,18 +8623,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "add %[s], %[s], #4\n\t" /* * -5812631b */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xa7\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xed\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0xa7000000\n\t" + "orr %[a], %[a], #0xed0000\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x9c\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0xe5\n\t" + "mov %[a], #0xe5\n\t" + "orr %[a], %[a], #0x9c00\n\t" #else "mov %[a], #0x9ce5\n\t" #endif @@ -9210,18 +8651,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "add %[s], %[s], #4\n\t" /* * -a2f79cd7 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x5d\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #8\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #41\n\t" + "mov %[a], #41\n\t" + "orr %[a], %[a], #0x5d000000\n\t" + "orr %[a], %[a], #0x80000\n\t" + "orr %[a], %[a], #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x63\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x29\n\t" + "mov %[a], #0x29\n\t" + "orr %[a], %[a], #0x6300\n\t" #else "mov %[a], #0x6329\n\t" #endif @@ -9242,18 +8679,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "add %[s], %[s], #4\n\t" /* * -14def9df */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0xeb\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #6\n\t" - "lsl %[a], %[a], #8\n\t" - "orr %[a], %[a], #33\n\t" + "mov %[a], #33\n\t" + "orr %[a], %[a], #0xeb000000\n\t" + "orr %[a], %[a], #0x210000\n\t" + "orr %[a], %[a], #0x600\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov %[a], #0x6\n\t" - "lsl %[a], %[a], #8\n\t" - "add %[a], %[a], #0x21\n\t" + "mov %[a], #0x21\n\t" + "orr %[a], %[a], #0x600\n\t" #else "mov %[a], #0x621\n\t" #endif @@ -9292,18 +8725,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "sub %[s], %[s], #16\n\t" "ldm %[s], {r2, r3, r4, r5}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x5c\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xf5\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "orr r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0x5c000000\n\t" + "orr r10, r10, #0xf50000\n\t" + "orr r10, r10, #0xd300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd3\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xed\n\t" + "mov r10, #0xed\n\t" + "orr r10, r10, #0xd300\n\t" #else "mov r10, #0xd3ed\n\t" #endif @@ -9315,18 +8744,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x58\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #18\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "orr r11, r11, #26\n\t" + "mov r11, #26\n\t" + "orr r11, r11, #0x58000000\n\t" + "orr r11, r11, #0x120000\n\t" + "orr r11, r11, #0x6300\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x63\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x1a\n\t" + "mov r11, #0x1a\n\t" + "orr r11, r11, #0x6300\n\t" #else "mov r11, #0x631a\n\t" #endif @@ -9338,18 +8763,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xa2\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xf7\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "orr r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0xa2000000\n\t" + "orr r12, r12, #0xf70000\n\t" + "orr r12, r12, #0x9c00\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0x9c\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xd6\n\t" + "mov r12, #0xd6\n\t" + "orr r12, r12, #0x9c00\n\t" #else "mov r12, #0x9cd6\n\t" #endif @@ -9361,18 +8782,14 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #20\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "orr lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0x14000000\n\t" + "orr lr, lr, #0xde0000\n\t" + "orr lr, lr, #0xf900\n\t" #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #0xf9\n\t" - "lsl lr, lr, #8\n\t" - "add lr, lr, #0xde\n\t" + "mov lr, #0xde\n\t" + "orr lr, lr, #0xf900\n\t" #else "mov lr, #0xf9de\n\t" #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S b/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S index 3fb4f9b64f..2f29f52b83 100644 --- a/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-kyber-asm.S @@ -177,9 +177,8 @@ kyber_arm32_ntt: adr r1, L_kyber_arm32_ntt_zetas #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -229,17 +228,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r6, lr, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -251,9 +248,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -265,9 +261,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -283,18 +278,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r2, r12, lsr #16 add r12, r2, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, lr, lsl #16 ror r6, r6, #16 #else bfi r6, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -328,17 +319,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r7, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -350,9 +339,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -364,9 +352,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -382,18 +369,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r3, r12, lsr #16 add r12, r3, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, lr, lsl #16 ror r7, r7, #16 #else bfi r7, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -427,17 +410,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r8, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -449,9 +430,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -463,9 +443,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -481,18 +460,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r4, r12, lsr #16 add r12, r4, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, lr, lsl #16 ror r8, r8, #16 #else bfi r8, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -526,17 +501,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -548,9 +521,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -562,9 +534,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -580,18 +551,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r5, r12, lsr #16 add r12, r5, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, lr, lsl #16 ror r9, r9, #16 #else bfi r9, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -626,17 +593,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r4, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -648,9 +613,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -662,9 +626,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -680,18 +643,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r2, r12, lsr #16 add r12, r2, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, lr, lsl #16 ror r4, r4, #16 #else bfi r4, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -725,17 +684,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r5, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -747,9 +704,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -761,9 +717,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -779,18 +734,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r3, r12, lsr #16 add r12, r3, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, lr, lsl #16 ror r5, r5, #16 #else bfi r5, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -823,17 +774,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r8, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -845,9 +794,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -859,9 +807,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -877,18 +824,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r6, r12, lsr #16 add r12, r6, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, lr, lsl #16 ror r8, r8, #16 #else bfi r8, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -921,17 +864,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -943,9 +884,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -957,9 +897,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -975,18 +914,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r7, r12, lsr #16 add r12, r7, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, lr, lsl #16 ror r9, r9, #16 #else bfi r9, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -1021,17 +956,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r3, lr, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1043,9 +976,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1057,9 +989,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1075,18 +1006,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r2, r12, lsr #16 add r12, r2, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, lr, lsl #16 ror r3, r3, #16 #else bfi r3, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -1119,17 +1046,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r5, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1141,9 +1066,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1155,9 +1079,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1173,18 +1096,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r4, r12, lsr #16 add r12, r4, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, lr, lsl #16 ror r5, r5, #16 #else bfi r5, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -1219,17 +1138,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r7, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1241,9 +1158,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1255,9 +1171,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1273,18 +1188,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r6, r12, lsr #16 add r12, r6, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, lr, lsl #16 ror r7, r7, #16 #else bfi r7, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -1317,17 +1228,15 @@ L_kyber_arm32_ntt_loop_123: mul r12, lr, r12 mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1339,9 +1248,8 @@ L_kyber_arm32_ntt_loop_123: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1353,9 +1261,8 @@ L_kyber_arm32_ntt_loop_123: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1371,18 +1278,14 @@ L_kyber_arm32_ntt_loop_123: sub lr, r8, r12, lsr #16 add r12, r8, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, lr, lsl #16 ror r9, r9, #16 #else bfi r9, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -1445,17 +1348,15 @@ L_kyber_arm32_ntt_loop_4_i: mul r12, lr, r12 mul r4, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1467,9 +1368,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1481,9 +1381,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1499,18 +1398,14 @@ L_kyber_arm32_ntt_loop_4_i: sub lr, r2, r12, lsr #16 add r12, r2, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, lr, lsl #16 ror r4, r4, #16 #else bfi r4, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -1544,17 +1439,15 @@ L_kyber_arm32_ntt_loop_4_i: mul r12, lr, r12 mul r5, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1566,9 +1459,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1580,9 +1472,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1598,18 +1489,14 @@ L_kyber_arm32_ntt_loop_4_i: sub lr, r3, r12, lsr #16 add r12, r3, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, lr, lsl #16 ror r5, r5, #16 #else bfi r5, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -1642,17 +1529,15 @@ L_kyber_arm32_ntt_loop_4_i: mul r12, lr, r12 mul r8, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1664,9 +1549,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1678,9 +1562,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1696,18 +1579,14 @@ L_kyber_arm32_ntt_loop_4_i: sub lr, r6, r12, lsr #16 add r12, r6, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, lr, lsl #16 ror r8, r8, #16 #else bfi r8, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -1740,17 +1619,15 @@ L_kyber_arm32_ntt_loop_4_i: mul r12, lr, r12 mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1762,9 +1639,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1776,9 +1652,8 @@ L_kyber_arm32_ntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1794,18 +1669,14 @@ L_kyber_arm32_ntt_loop_4_i: sub lr, r7, r12, lsr #16 add r12, r7, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, lr, lsl #16 ror r9, r9, #16 #else bfi r9, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -1821,8 +1692,7 @@ L_kyber_arm32_ntt_loop_4_i: str r8, [r0, #96] str r9, [r0, #112] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [sp] - ldr r3, [sp, #4] + ldm sp, {r2, r3} #else ldrd r2, r3, [sp] #endif @@ -1874,17 +1744,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r6, lr, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1896,9 +1764,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -1910,9 +1777,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1928,18 +1794,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r2, r12, lsr #16 add r12, r2, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, lr, lsl #16 ror r6, r6, #16 #else bfi r6, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -1973,17 +1835,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r7, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -1995,9 +1855,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2009,9 +1868,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2027,18 +1885,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r3, r12, lsr #16 add r12, r3, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, lr, lsl #16 ror r7, r7, #16 #else bfi r7, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -2072,17 +1926,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r8, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2094,9 +1946,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2108,9 +1959,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2126,18 +1976,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r4, r12, lsr #16 add r12, r4, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, lr, lsl #16 ror r8, r8, #16 #else bfi r8, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -2171,17 +2017,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2193,9 +2037,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2207,9 +2050,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2225,18 +2067,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r5, r12, lsr #16 add r12, r5, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, lr, lsl #16 ror r9, r9, #16 #else bfi r9, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -2273,17 +2111,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r4, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2295,9 +2131,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2309,9 +2144,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2327,18 +2161,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r2, r12, lsr #16 add r12, r2, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, lr, lsl #16 ror r4, r4, #16 #else bfi r4, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -2372,17 +2202,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r5, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2394,9 +2222,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2408,9 +2235,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2426,18 +2252,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r3, r12, lsr #16 add r12, r3, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, lr, lsl #16 ror r5, r5, #16 #else bfi r5, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -2470,17 +2292,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r8, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2492,9 +2312,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2506,9 +2325,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2524,18 +2342,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r6, r12, lsr #16 add r12, r6, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, lr, lsl #16 ror r8, r8, #16 #else bfi r8, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -2568,17 +2382,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2590,9 +2402,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2604,9 +2415,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2622,18 +2432,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r7, r12, lsr #16 add r12, r7, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, lr, lsl #16 ror r9, r9, #16 #else bfi r9, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -2670,17 +2476,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r3, lr, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2692,9 +2496,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2706,9 +2509,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2724,18 +2526,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r2, r12, lsr #16 add r12, r2, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, lr, lsl #16 ror r3, r3, #16 #else bfi r3, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -2768,17 +2566,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r5, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2790,9 +2586,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2804,9 +2599,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2822,18 +2616,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r4, r12, lsr #16 add r12, r4, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, lr, lsl #16 ror r5, r5, #16 #else bfi r5, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -2870,17 +2660,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r7, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2892,9 +2680,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -2906,9 +2693,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2924,18 +2710,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r6, r12, lsr #16 add r12, r6, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, lr, lsl #16 ror r7, r7, #16 #else bfi r7, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -2968,17 +2750,15 @@ L_kyber_arm32_ntt_loop_567: mul r12, lr, r12 mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -2990,9 +2770,8 @@ L_kyber_arm32_ntt_loop_567: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -3004,9 +2783,8 @@ L_kyber_arm32_ntt_loop_567: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3022,18 +2800,14 @@ L_kyber_arm32_ntt_loop_567: sub lr, r8, r12, lsr #16 add r12, r8, r12, lsr #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, lr, lsl #16 ror r9, r9, #16 #else bfi r9, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -3042,9 +2816,8 @@ L_kyber_arm32_ntt_loop_567: #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xaf - lsl r11, r11, #8 - add r11, r11, #0xc0 + mov r11, #0xc0 + orr r11, r11, #0xaf00 #else mov r11, #0xafc0 #endif @@ -3055,16 +2828,14 @@ L_kyber_arm32_ntt_loop_567: #endif #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x4e - lsl r11, r11, #8 - add r11, r11, #0xbf + mov r11, #0xbf + orr r11, r11, #0x4e00 #else mov r11, #0x4ebf #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3359,9 +3130,8 @@ L_kyber_arm32_ntt_loop_567: #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3531,9 +3301,8 @@ kyber_arm32_invntt: adr r1, L_kyber_arm32_invntt_zetas_inv #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3585,18 +3354,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r2, r3 add r2, r2, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -3618,9 +3383,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -3632,9 +3396,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3646,9 +3409,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -3660,9 +3422,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3675,9 +3436,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r3, r10, lr, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -3712,18 +3471,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r4, r5 add r4, r4, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r10, lsl #16 ror r4, r4, #16 #else @@ -3744,9 +3499,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -3758,9 +3512,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3772,9 +3525,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -3786,9 +3538,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3801,9 +3552,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r5, r10, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -3841,18 +3590,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r6, r7 add r6, r6, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r10, lsl #16 ror r6, r6, #16 #else @@ -3874,9 +3619,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -3888,9 +3632,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3902,9 +3645,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -3916,9 +3658,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -3931,9 +3672,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r7, r10, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -3968,18 +3707,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r8, r9 add r8, r8, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r10, lsl #16 ror r8, r8, #16 #else @@ -4000,9 +3735,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4014,9 +3748,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4028,9 +3761,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4042,9 +3774,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4057,9 +3788,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -4097,18 +3826,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r2, r4 add r2, r2, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -4130,9 +3855,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4144,9 +3868,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4158,9 +3881,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4172,9 +3894,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4187,9 +3908,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r4, r10, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -4224,18 +3943,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r3, r5 add r3, r3, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r10, lsl #16 ror r3, r3, #16 #else @@ -4257,9 +3972,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4271,9 +3985,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4285,9 +3998,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4299,9 +4011,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4314,9 +4025,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r5, r10, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -4351,18 +4060,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r6, r8 add r6, r6, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r10, lsl #16 ror r6, r6, #16 #else @@ -4383,9 +4088,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4397,9 +4101,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4411,9 +4114,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4425,9 +4127,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4440,9 +4141,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r8, r10, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -4477,18 +4176,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r7, r9 add r7, r7, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r10, lsl #16 ror r7, r7, #16 #else @@ -4509,9 +4204,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4523,9 +4217,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4537,9 +4230,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4551,9 +4243,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4566,9 +4257,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -4606,18 +4295,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r2, r6 add r2, r2, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -4639,9 +4324,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4653,9 +4337,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4667,9 +4350,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4681,9 +4363,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4696,9 +4377,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r6, r10, lr, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -4733,18 +4412,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r3, r7 add r3, r3, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r10, lsl #16 ror r3, r3, #16 #else @@ -4766,9 +4441,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4780,9 +4454,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4794,9 +4467,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4808,9 +4480,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4823,9 +4494,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r7, r10, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -4860,18 +4529,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r4, r8 add r4, r4, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r10, lsl #16 ror r4, r4, #16 #else @@ -4893,9 +4558,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4907,9 +4571,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4921,9 +4584,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -4935,9 +4597,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -4950,9 +4611,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r8, r10, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -4987,18 +4646,14 @@ L_kyber_arm32_invntt_loop_765: sub r12, r5, r9 add r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r10, lsl #16 ror r5, r5, #16 #else @@ -5020,9 +4675,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5034,9 +4688,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5048,9 +4701,8 @@ L_kyber_arm32_invntt_loop_765: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5062,9 +4714,8 @@ L_kyber_arm32_invntt_loop_765: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5077,9 +4728,7 @@ L_kyber_arm32_invntt_loop_765: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -5088,9 +4737,8 @@ L_kyber_arm32_invntt_loop_765: #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xaf - lsl r11, r11, #8 - add r11, r11, #0xc0 + mov r11, #0xc0 + orr r11, r11, #0xaf00 #else mov r11, #0xafc0 #endif @@ -5101,9 +4749,8 @@ L_kyber_arm32_invntt_loop_765: #endif #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x4e - lsl r11, r11, #8 - add r11, r11, #0xbf + mov r11, #0xbf + orr r11, r11, #0x4e00 #else mov r11, #0x4ebf #endif @@ -5310,18 +4957,14 @@ L_kyber_arm32_invntt_loop_4_i: sub r12, r2, r4 add r2, r2, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -5343,9 +4986,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5357,9 +4999,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5371,9 +5012,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5385,9 +5025,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5400,9 +5039,7 @@ L_kyber_arm32_invntt_loop_4_i: lsr r12, r12, #16 mla r4, r10, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -5437,18 +5074,14 @@ L_kyber_arm32_invntt_loop_4_i: sub r12, r3, r5 add r3, r3, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r10, lsl #16 ror r3, r3, #16 #else @@ -5470,9 +5103,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5484,9 +5116,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5498,9 +5129,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5512,9 +5142,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5527,9 +5156,7 @@ L_kyber_arm32_invntt_loop_4_i: lsr r12, r12, #16 mla r5, r10, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -5564,18 +5191,14 @@ L_kyber_arm32_invntt_loop_4_i: sub r12, r6, r8 add r6, r6, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r10, lsl #16 ror r6, r6, #16 #else @@ -5596,9 +5219,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5610,9 +5232,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5624,9 +5245,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5638,9 +5258,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5653,9 +5272,7 @@ L_kyber_arm32_invntt_loop_4_i: lsr r12, r12, #16 mla r8, r10, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -5690,18 +5307,14 @@ L_kyber_arm32_invntt_loop_4_i: sub r12, r7, r9 add r7, r7, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r10, lsl #16 ror r7, r7, #16 #else @@ -5722,9 +5335,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5736,9 +5348,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5750,9 +5361,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5764,9 +5374,8 @@ L_kyber_arm32_invntt_loop_4_i: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5779,9 +5388,7 @@ L_kyber_arm32_invntt_loop_4_i: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -5797,8 +5404,7 @@ L_kyber_arm32_invntt_loop_4_i: str r8, [r0, #96] str r9, [r0, #112] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [sp] - ldr r3, [sp, #4] + ldm sp, {r2, r3} #else ldrd r2, r3, [sp] #endif @@ -5851,18 +5457,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r2, r3 add r2, r2, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -5884,9 +5486,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5898,9 +5499,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5912,9 +5512,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -5926,9 +5525,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -5941,9 +5539,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r3, r10, lr, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -5978,18 +5574,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r4, r5 add r4, r4, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r10, lsl #16 ror r4, r4, #16 #else @@ -6010,9 +5602,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6024,9 +5615,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6038,9 +5628,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6052,9 +5641,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6067,9 +5655,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r5, r10, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -6105,18 +5691,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r6, r7 add r6, r6, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r10, lsl #16 ror r6, r6, #16 #else @@ -6138,9 +5720,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6152,9 +5733,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6166,9 +5746,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6180,9 +5759,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6195,9 +5773,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r7, r10, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -6232,18 +5808,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r8, r9 add r8, r8, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r10, lsl #16 ror r8, r8, #16 #else @@ -6264,9 +5836,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6278,9 +5849,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6292,9 +5862,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6306,9 +5875,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6321,9 +5889,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -6359,18 +5925,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r2, r4 add r2, r2, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -6392,9 +5954,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6406,9 +5967,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6420,9 +5980,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6434,9 +5993,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6449,9 +6007,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r4, r10, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -6486,18 +6042,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r3, r5 add r3, r3, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r10, lsl #16 ror r3, r3, #16 #else @@ -6519,9 +6071,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6533,9 +6084,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6547,9 +6097,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6561,9 +6110,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6576,9 +6124,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r5, r10, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -6613,18 +6159,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r6, r8 add r6, r6, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r10, lsl #16 ror r6, r6, #16 #else @@ -6645,9 +6187,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6659,9 +6200,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6673,9 +6213,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6687,9 +6226,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6702,9 +6240,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r8, r10, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -6739,18 +6275,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r7, r9 add r7, r7, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r10, lsl #16 ror r7, r7, #16 #else @@ -6771,9 +6303,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6785,9 +6316,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6799,9 +6329,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -6813,9 +6342,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -6828,9 +6356,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -6839,9 +6365,8 @@ L_kyber_arm32_invntt_loop_321: #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0xaf - lsl r11, r11, #8 - add r11, r11, #0xc0 + mov r11, #0xc0 + orr r11, r11, #0xaf00 #else mov r11, #0xafc0 #endif @@ -6852,9 +6377,8 @@ L_kyber_arm32_invntt_loop_321: #endif #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x4e - lsl r11, r11, #8 - add r11, r11, #0xbf + mov r11, #0xbf + orr r11, r11, #0x4e00 #else mov r11, #0x4ebf #endif @@ -7032,18 +6556,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r2, r6 add r2, r2, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -7065,9 +6585,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7079,9 +6598,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7093,9 +6611,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7107,9 +6624,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7122,9 +6638,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r6, r10, lr, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -7159,18 +6673,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r3, r7 add r3, r3, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r10, lsl #16 ror r3, r3, #16 #else @@ -7192,9 +6702,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7206,9 +6715,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7220,9 +6728,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7234,9 +6741,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7249,9 +6755,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r7, r10, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -7286,18 +6790,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r4, r8 add r4, r4, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r10, lsl #16 ror r4, r4, #16 #else @@ -7319,9 +6819,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7333,9 +6832,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7347,9 +6845,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7361,9 +6858,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7376,9 +6872,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r8, r10, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -7413,18 +6907,14 @@ L_kyber_arm32_invntt_loop_321: sub r12, r5, r9 add r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r12, r12, #0xff - bic r12, r12, #0xff00 - ror r12, r12, #16 + lsr r12, r12, #16 orr r12, r12, lr, lsl #16 ror r12, r12, #16 #else bfi r12, lr, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r10, lsl #16 ror r5, r5, #16 #else @@ -7446,9 +6936,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul r12, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7460,9 +6949,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7474,9 +6962,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7488,9 +6975,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7503,9 +6989,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -7542,17 +7026,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r2, lr, r2 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7564,9 +7046,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7578,9 +7059,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7593,9 +7073,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r2, r10, lr, r2 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r12, lsl #16 ror r2, r2, #16 #else @@ -7631,17 +7109,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r3, lr, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7653,9 +7129,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7667,9 +7142,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7682,9 +7156,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r3, r10, lr, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r12, lsl #16 ror r3, r3, #16 #else @@ -7720,17 +7192,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r4, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7742,9 +7212,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7756,9 +7225,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7771,9 +7239,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r4, r10, lr, r4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r12, lsl #16 ror r4, r4, #16 #else @@ -7809,17 +7275,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r5, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7831,9 +7295,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7845,9 +7308,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7860,9 +7322,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r5, r10, lr, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r12, lsl #16 ror r5, r5, #16 #else @@ -7898,17 +7358,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r6, lr, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7920,9 +7378,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -7934,9 +7391,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -7949,9 +7405,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r6, r10, lr, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r6, r6, #0xff - bic r6, r6, #0xff00 - ror r6, r6, #16 + lsr r6, r6, #16 orr r6, r6, r12, lsl #16 ror r6, r6, #16 #else @@ -7987,17 +7441,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r7, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -8009,9 +7461,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -8023,9 +7474,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -8038,9 +7488,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r7, r10, lr, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r7, r7, #0xff - bic r7, r7, #0xff00 - ror r7, r7, #16 + lsr r7, r7, #16 orr r7, r7, r12, lsl #16 ror r7, r7, #16 #else @@ -8076,17 +7524,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r8, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -8098,9 +7544,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -8112,9 +7557,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -8127,9 +7571,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r8, r10, lr, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r8, r8, #0xff - bic r8, r8, #0xff00 - ror r8, r8, #16 + lsr r8, r8, #16 orr r8, r8, r12, lsl #16 ror r8, r8, #16 #else @@ -8165,17 +7607,15 @@ L_kyber_arm32_invntt_loop_321: #endif mul r9, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif mul lr, r10, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -8187,9 +7627,8 @@ L_kyber_arm32_invntt_loop_321: #endif mla r12, r10, lr, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xc - lsl r10, r10, #8 - add r10, r10, #0xff + mov r10, #0xff + orr r10, r10, #0xc00 #else mov r10, #0xcff #endif @@ -8201,9 +7640,8 @@ L_kyber_arm32_invntt_loop_321: #endif mul lr, r10, lr #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r10, #0xd - lsl r10, r10, #8 - add r10, r10, #0x1 + mov r10, #0x1 + orr r10, r10, #0xd00 #else mov r10, #0xd01 #endif @@ -8216,9 +7654,7 @@ L_kyber_arm32_invntt_loop_321: lsr r12, r12, #16 mla r9, r10, lr, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r9, r9, #0xff - bic r9, r9, #0xff00 - ror r9, r9, #16 + lsr r9, r9, #16 orr r9, r9, r12, lsl #16 ror r9, r9, #16 #else @@ -8383,9 +7819,8 @@ kyber_arm32_basemul_mont: add r3, r3, #0x80 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8438,9 +7873,8 @@ L_kyber_arm32_basemul_mont_loop: mul r8, r9, r8 mul r10, r11, r10 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xc - lsl r12, r12, #8 - add r12, r12, #0xff + mov r12, #0xff + orr r12, r12, #0xc00 #else mov r12, #0xcff #endif @@ -8459,9 +7893,8 @@ L_kyber_arm32_basemul_mont_loop: mul r9, r12, r8 mul r11, r12, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8523,9 +7956,8 @@ L_kyber_arm32_basemul_mont_loop: #endif mla r10, r11, r12, r10 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xc - lsl r12, r12, #8 - add r12, r12, #0xff + mov r12, #0xff + orr r12, r12, #0xc00 #else mov r12, #0xcff #endif @@ -8544,9 +7976,8 @@ L_kyber_arm32_basemul_mont_loop: mul r9, r12, r9 mul r11, r12, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8597,9 +8028,8 @@ L_kyber_arm32_basemul_mont_loop: #endif mla r11, r5, r12, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xc - lsl r12, r12, #8 - add r12, r12, #0xff + mov r12, #0xff + orr r12, r12, #0xc00 #else mov r12, #0xcff #endif @@ -8618,9 +8048,8 @@ L_kyber_arm32_basemul_mont_loop: mul r6, r12, r6 mul r7, r12, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8668,9 +8097,8 @@ kyber_arm32_basemul_mont_add: add r3, r3, #0x80 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8726,9 +8154,8 @@ L_kyber_arm32_basemul_mont_add_loop: mul r8, r9, r8 mul r10, r11, r10 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xc - lsl r12, r12, #8 - add r12, r12, #0xff + mov r12, #0xff + orr r12, r12, #0xc00 #else mov r12, #0xcff #endif @@ -8747,9 +8174,8 @@ L_kyber_arm32_basemul_mont_add_loop: mul r9, r12, r8 mul r11, r12, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8811,9 +8237,8 @@ L_kyber_arm32_basemul_mont_add_loop: #endif mla r10, r11, r12, r10 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xc - lsl r12, r12, #8 - add r12, r12, #0xff + mov r12, #0xff + orr r12, r12, #0xc00 #else mov r12, #0xcff #endif @@ -8832,9 +8257,8 @@ L_kyber_arm32_basemul_mont_add_loop: mul r9, r12, r9 mul r11, r12, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8885,9 +8309,8 @@ L_kyber_arm32_basemul_mont_add_loop: #endif mla r11, r5, r12, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xc - lsl r12, r12, #8 - add r12, r12, #0xff + mov r12, #0xff + orr r12, r12, #0xc00 #else mov r12, #0xcff #endif @@ -8906,9 +8329,8 @@ L_kyber_arm32_basemul_mont_add_loop: mul r6, r12, r6 mul r7, r12, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif @@ -8958,18 +8380,14 @@ L_kyber_arm32_basemul_mont_add_loop: add r4, r4, r9 add r5, r5, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r8, lsl #16 ror r4, r4, #16 #else bfi r4, r8, #0, #16 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r10, lsl #16 ror r5, r5, #16 #else @@ -8988,16 +8406,14 @@ L_kyber_arm32_basemul_mont_add_loop: kyber_arm32_csubq: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r12, #0xd - lsl r12, r12, #8 - add r12, r12, #0x1 + mov r12, #0x1 + orr r12, r12, #0xd00 #else mov r12, #0xd01 #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov lr, #0xd - lsl lr, lr, #8 - add lr, lr, #0x1 + mov lr, #0x1 + orr lr, lr, #0xd00 #else mov lr, #0xd01 #endif @@ -9009,25 +8425,13 @@ kyber_arm32_csubq: movt lr, #0xd01 #endif #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r11, #0x80 - lsl r11, r11, #8 - add r11, r11, #0x0 -#else mov r11, #0x8000 -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) orr r11, r11, #0x80000000 #else movt r11, #0x8000 #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r1, #0x1 - lsl r1, r1, #8 - add r1, r1, #0x0 -#else mov r1, #0x100 -#endif L_kyber_arm32_csubq_loop: ldm r0, {r2, r3, r4, r5} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) @@ -9055,9 +8459,7 @@ L_kyber_arm32_csubq_loop: sub r6, r2, lr sub r2, r2, lr, lsl #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r6, lsl #16 ror r2, r2, #16 #else @@ -9066,9 +8468,7 @@ L_kyber_arm32_csubq_loop: sub r7, r3, lr sub r3, r3, lr, lsl #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r7, lsl #16 ror r3, r3, #16 #else @@ -9077,9 +8477,7 @@ L_kyber_arm32_csubq_loop: sub r8, r4, lr sub r4, r4, lr, lsl #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r8, lsl #16 ror r4, r4, #16 #else @@ -9088,9 +8486,7 @@ L_kyber_arm32_csubq_loop: sub r9, r5, lr sub r5, r5, lr, lsl #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r9, lsl #16 ror r5, r5, #16 #else @@ -9117,9 +8513,7 @@ L_kyber_arm32_csubq_loop: #endif add r2, r2, r6 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r2, r2, #0xff - bic r2, r2, #0xff00 - ror r2, r2, #16 + lsr r2, r2, #16 orr r2, r2, r10, lsl #16 ror r2, r2, #16 #else @@ -9134,9 +8528,7 @@ L_kyber_arm32_csubq_loop: #endif add r3, r3, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r3, r3, #0xff - bic r3, r3, #0xff00 - ror r3, r3, #16 + lsr r3, r3, #16 orr r3, r3, r10, lsl #16 ror r3, r3, #16 #else @@ -9151,9 +8543,7 @@ L_kyber_arm32_csubq_loop: #endif add r4, r4, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r4, r4, #0xff - bic r4, r4, #0xff00 - ror r4, r4, #16 + lsr r4, r4, #16 orr r4, r4, r10, lsl #16 ror r4, r4, #16 #else @@ -9168,9 +8558,7 @@ L_kyber_arm32_csubq_loop: #endif add r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - bic r5, r5, #0xff - bic r5, r5, #0xff00 - ror r5, r5, #16 + lsr r5, r5, #16 orr r5, r5, r10, lsl #16 ror r5, r5, #16 #else @@ -9189,9 +8577,8 @@ L_kyber_arm32_csubq_loop: kyber_arm32_rej_uniform: push {r4, r5, r6, r7, r8, lr} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - mov r8, #0xd - lsl r8, r8, #8 - add r8, r8, #0x1 + mov r8, #0x1 + orr r8, r8, #0xd00 #else mov r8, #0xd01 #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c index 30774f5b36..e514604f8c 100644 --- a/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c @@ -97,9 +97,8 @@ void kyber_arm32_ntt(sword16* r_p) "sub sp, sp, #8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -150,17 +149,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r6, lr, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -172,9 +169,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -186,9 +182,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -204,18 +199,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r2, r12, lsr #16\n\t" "add r12, r2, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, lr, lsl #16\n\t" "ror r6, r6, #16\n\t" #else "bfi r6, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -249,17 +240,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r7, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -271,9 +260,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -285,9 +273,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -303,18 +290,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r3, r12, lsr #16\n\t" "add r12, r3, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, lr, lsl #16\n\t" "ror r7, r7, #16\n\t" #else "bfi r7, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -348,17 +331,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r8, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -370,9 +351,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -384,9 +364,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -402,18 +381,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r4, r12, lsr #16\n\t" "add r12, r4, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, lr, lsl #16\n\t" "ror r8, r8, #16\n\t" #else "bfi r8, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -447,17 +422,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -469,9 +442,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -483,9 +455,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -501,18 +472,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r5, r12, lsr #16\n\t" "add r12, r5, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, lr, lsl #16\n\t" "ror r9, r9, #16\n\t" #else "bfi r9, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -547,17 +514,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r4, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -569,9 +534,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -583,9 +547,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -601,18 +564,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r2, r12, lsr #16\n\t" "add r12, r2, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, lr, lsl #16\n\t" "ror r4, r4, #16\n\t" #else "bfi r4, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -646,17 +605,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r5, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -668,9 +625,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -682,9 +638,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -700,18 +655,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r3, r12, lsr #16\n\t" "add r12, r3, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, lr, lsl #16\n\t" "ror r5, r5, #16\n\t" #else "bfi r5, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -744,17 +695,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r8, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -766,9 +715,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -780,9 +728,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -798,18 +745,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r6, r12, lsr #16\n\t" "add r12, r6, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, lr, lsl #16\n\t" "ror r8, r8, #16\n\t" #else "bfi r8, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -842,17 +785,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -864,9 +805,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -878,9 +818,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -896,18 +835,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r7, r12, lsr #16\n\t" "add r12, r7, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, lr, lsl #16\n\t" "ror r9, r9, #16\n\t" #else "bfi r9, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -942,17 +877,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r3, lr, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -964,9 +897,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -978,9 +910,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -996,18 +927,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r2, r12, lsr #16\n\t" "add r12, r2, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, lr, lsl #16\n\t" "ror r3, r3, #16\n\t" #else "bfi r3, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -1040,17 +967,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r5, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1062,9 +987,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1076,9 +1000,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1094,18 +1017,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r4, r12, lsr #16\n\t" "add r12, r4, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, lr, lsl #16\n\t" "ror r5, r5, #16\n\t" #else "bfi r5, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -1140,17 +1059,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r7, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1162,9 +1079,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1176,9 +1092,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1194,18 +1109,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r6, r12, lsr #16\n\t" "add r12, r6, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, lr, lsl #16\n\t" "ror r7, r7, #16\n\t" #else "bfi r7, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -1238,17 +1149,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1260,9 +1169,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1274,9 +1182,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1292,18 +1199,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r8, r12, lsr #16\n\t" "add r12, r8, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, lr, lsl #16\n\t" "ror r9, r9, #16\n\t" #else "bfi r9, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -1368,17 +1271,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r4, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1390,9 +1291,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1404,9 +1304,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1422,18 +1321,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r2, r12, lsr #16\n\t" "add r12, r2, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, lr, lsl #16\n\t" "ror r4, r4, #16\n\t" #else "bfi r4, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -1467,17 +1362,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r5, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1489,9 +1382,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1503,9 +1395,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1521,18 +1412,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r3, r12, lsr #16\n\t" "add r12, r3, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, lr, lsl #16\n\t" "ror r5, r5, #16\n\t" #else "bfi r5, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -1565,17 +1452,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r8, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1587,9 +1472,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1601,9 +1485,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1619,18 +1502,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r6, r12, lsr #16\n\t" "add r12, r6, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, lr, lsl #16\n\t" "ror r8, r8, #16\n\t" #else "bfi r8, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -1663,17 +1542,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1685,9 +1562,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1699,9 +1575,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1717,18 +1592,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r7, r12, lsr #16\n\t" "add r12, r7, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, lr, lsl #16\n\t" "ror r9, r9, #16\n\t" #else "bfi r9, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -1744,8 +1615,7 @@ void kyber_arm32_ntt(sword16* r_p) "str r8, [%[r], #96]\n\t" "str r9, [%[r], #112]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [sp]\n\t" - "ldr r3, [sp, #4]\n\t" + "ldm sp, {r2, r3}\n\t" #else "ldrd r2, r3, [sp]\n\t" #endif @@ -1798,17 +1668,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r6, lr, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1820,9 +1688,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1834,9 +1701,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1852,18 +1718,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r2, r12, lsr #16\n\t" "add r12, r2, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, lr, lsl #16\n\t" "ror r6, r6, #16\n\t" #else "bfi r6, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -1897,17 +1759,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r7, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1919,9 +1779,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -1933,9 +1792,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -1951,18 +1809,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r3, r12, lsr #16\n\t" "add r12, r3, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, lr, lsl #16\n\t" "ror r7, r7, #16\n\t" #else "bfi r7, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -1996,17 +1850,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r8, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2018,9 +1870,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2032,9 +1883,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2050,18 +1900,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r4, r12, lsr #16\n\t" "add r12, r4, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, lr, lsl #16\n\t" "ror r8, r8, #16\n\t" #else "bfi r8, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -2095,17 +1941,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2117,9 +1961,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2131,9 +1974,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2149,18 +1991,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r5, r12, lsr #16\n\t" "add r12, r5, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, lr, lsl #16\n\t" "ror r9, r9, #16\n\t" #else "bfi r9, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -2197,17 +2035,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r4, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2219,9 +2055,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2233,9 +2068,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2251,18 +2085,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r2, r12, lsr #16\n\t" "add r12, r2, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, lr, lsl #16\n\t" "ror r4, r4, #16\n\t" #else "bfi r4, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -2296,17 +2126,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r5, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2318,9 +2146,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2332,9 +2159,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2350,18 +2176,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r3, r12, lsr #16\n\t" "add r12, r3, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, lr, lsl #16\n\t" "ror r5, r5, #16\n\t" #else "bfi r5, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -2394,17 +2216,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r8, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2416,9 +2236,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2430,9 +2249,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2448,18 +2266,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r6, r12, lsr #16\n\t" "add r12, r6, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, lr, lsl #16\n\t" "ror r8, r8, #16\n\t" #else "bfi r8, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -2492,17 +2306,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2514,9 +2326,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2528,9 +2339,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2546,18 +2356,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r7, r12, lsr #16\n\t" "add r12, r7, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, lr, lsl #16\n\t" "ror r9, r9, #16\n\t" #else "bfi r9, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -2594,17 +2400,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r3, lr, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2616,9 +2420,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2630,9 +2433,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2648,18 +2450,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r2, r12, lsr #16\n\t" "add r12, r2, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, lr, lsl #16\n\t" "ror r3, r3, #16\n\t" #else "bfi r3, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -2692,17 +2490,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r5, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2714,9 +2510,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2728,9 +2523,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2746,18 +2540,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r4, r12, lsr #16\n\t" "add r12, r4, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, lr, lsl #16\n\t" "ror r5, r5, #16\n\t" #else "bfi r5, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -2794,17 +2584,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r7, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2816,9 +2604,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2830,9 +2617,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2848,18 +2634,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r6, r12, lsr #16\n\t" "add r12, r6, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, lr, lsl #16\n\t" "ror r7, r7, #16\n\t" #else "bfi r7, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -2892,17 +2674,15 @@ void kyber_arm32_ntt(sword16* r_p) "mul r12, lr, r12\n\t" "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2914,9 +2694,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -2928,9 +2707,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -2946,18 +2724,14 @@ void kyber_arm32_ntt(sword16* r_p) "sub lr, r8, r12, lsr #16\n\t" "add r12, r8, r12, lsr #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, lr, lsl #16\n\t" "ror r9, r9, #16\n\t" #else "bfi r9, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -2966,9 +2740,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xaf\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xc0\n\t" + "mov r11, #0xc0\n\t" + "orr r11, r11, #0xaf00\n\t" #else "mov r11, #0xafc0\n\t" #endif @@ -2979,16 +2752,14 @@ void kyber_arm32_ntt(sword16* r_p) #endif #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x4e\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xbf\n\t" + "mov r11, #0xbf\n\t" + "orr r11, r11, #0x4e00\n\t" #else "mov r11, #0x4ebf\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3283,9 +3054,8 @@ void kyber_arm32_ntt(sword16* r_p) #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3363,9 +3133,8 @@ void kyber_arm32_invntt(sword16* r_p) "sub sp, sp, #8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3418,18 +3187,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r2, r3\n\t" "add r2, r2, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -3451,9 +3216,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3465,9 +3229,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3479,9 +3242,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3493,9 +3255,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3508,9 +3269,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r3, r10, lr, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -3545,18 +3304,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r4, r5\n\t" "add r4, r4, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r10, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -3577,9 +3332,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3591,9 +3345,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3605,9 +3358,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3619,9 +3371,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3634,9 +3385,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r5, r10, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -3674,18 +3423,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r6, r7\n\t" "add r6, r6, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r10, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -3707,9 +3452,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3721,9 +3465,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3735,9 +3478,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3749,9 +3491,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3764,9 +3505,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r7, r10, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -3801,18 +3540,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r8, r9\n\t" "add r8, r8, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r10, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -3833,9 +3568,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3847,9 +3581,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3861,9 +3594,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3875,9 +3607,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3890,9 +3621,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -3930,18 +3659,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r2, r4\n\t" "add r2, r2, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -3963,9 +3688,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -3977,9 +3701,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -3991,9 +3714,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4005,9 +3727,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4020,9 +3741,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r4, r10, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -4057,18 +3776,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r3, r5\n\t" "add r3, r3, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r10, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -4090,9 +3805,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4104,9 +3818,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4118,9 +3831,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4132,9 +3844,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4147,9 +3858,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r5, r10, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -4184,18 +3893,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r6, r8\n\t" "add r6, r6, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r10, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -4216,9 +3921,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4230,9 +3934,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4244,9 +3947,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4258,9 +3960,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4273,9 +3974,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r8, r10, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -4310,18 +4009,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r7, r9\n\t" "add r7, r7, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r10, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -4342,9 +4037,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4356,9 +4050,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4370,9 +4063,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4384,9 +4076,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4399,9 +4090,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -4439,18 +4128,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r2, r6\n\t" "add r2, r2, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -4472,9 +4157,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4486,9 +4170,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4500,9 +4183,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4514,9 +4196,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4529,9 +4210,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r6, r10, lr, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -4566,18 +4245,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r3, r7\n\t" "add r3, r3, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r10, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -4599,9 +4274,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4613,9 +4287,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4627,9 +4300,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4641,9 +4313,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4656,9 +4327,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r7, r10, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -4693,18 +4362,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r4, r8\n\t" "add r4, r4, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r10, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -4726,9 +4391,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4740,9 +4404,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4754,9 +4417,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4768,9 +4430,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4783,9 +4444,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r8, r10, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -4820,18 +4479,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r5, r9\n\t" "add r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r10, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -4853,9 +4508,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4867,9 +4521,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4881,9 +4534,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -4895,9 +4547,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -4910,9 +4561,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -4921,9 +4570,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xaf\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xc0\n\t" + "mov r11, #0xc0\n\t" + "orr r11, r11, #0xaf00\n\t" #else "mov r11, #0xafc0\n\t" #endif @@ -4934,9 +4582,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x4e\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xbf\n\t" + "mov r11, #0xbf\n\t" + "orr r11, r11, #0x4e00\n\t" #else "mov r11, #0x4ebf\n\t" #endif @@ -5145,18 +4792,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r2, r4\n\t" "add r2, r2, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -5178,9 +4821,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5192,9 +4834,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5206,9 +4847,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5220,9 +4860,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5235,9 +4874,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r4, r10, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -5272,18 +4909,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r3, r5\n\t" "add r3, r3, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r10, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -5305,9 +4938,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5319,9 +4951,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5333,9 +4964,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5347,9 +4977,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5362,9 +4991,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r5, r10, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -5399,18 +5026,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r6, r8\n\t" "add r6, r6, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r10, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -5431,9 +5054,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5445,9 +5067,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5459,9 +5080,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5473,9 +5093,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5488,9 +5107,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r8, r10, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -5525,18 +5142,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r7, r9\n\t" "add r7, r7, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r10, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -5557,9 +5170,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5571,9 +5183,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5585,9 +5196,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5599,9 +5209,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5614,9 +5223,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -5632,8 +5239,7 @@ void kyber_arm32_invntt(sword16* r_p) "str r8, [%[r], #96]\n\t" "str r9, [%[r], #112]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [sp]\n\t" - "ldr r3, [sp, #4]\n\t" + "ldm sp, {r2, r3}\n\t" #else "ldrd r2, r3, [sp]\n\t" #endif @@ -5687,18 +5293,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r2, r3\n\t" "add r2, r2, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -5720,9 +5322,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5734,9 +5335,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5748,9 +5348,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5762,9 +5361,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5777,9 +5375,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r3, r10, lr, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -5814,18 +5410,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r4, r5\n\t" "add r4, r4, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r10, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -5846,9 +5438,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5860,9 +5451,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5874,9 +5464,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5888,9 +5477,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -5903,9 +5491,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r5, r10, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -5941,18 +5527,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r6, r7\n\t" "add r6, r6, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r10, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -5974,9 +5556,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -5988,9 +5569,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6002,9 +5582,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6016,9 +5595,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6031,9 +5609,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r7, r10, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -6068,18 +5644,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r8, r9\n\t" "add r8, r8, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r10, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -6100,9 +5672,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6114,9 +5685,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6128,9 +5698,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6142,9 +5711,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6157,9 +5725,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -6195,18 +5761,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r2, r4\n\t" "add r2, r2, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -6228,9 +5790,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6242,9 +5803,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6256,9 +5816,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6270,9 +5829,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6285,9 +5843,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r4, r10, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -6322,18 +5878,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r3, r5\n\t" "add r3, r3, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r10, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -6355,9 +5907,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6369,9 +5920,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6383,9 +5933,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6397,9 +5946,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6412,9 +5960,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r5, r10, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -6449,18 +5995,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r6, r8\n\t" "add r6, r6, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r10, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -6481,9 +6023,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6495,9 +6036,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6509,9 +6049,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6523,9 +6062,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6538,9 +6076,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r8, r10, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -6575,18 +6111,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r7, r9\n\t" "add r7, r7, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r10, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -6607,9 +6139,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6621,9 +6152,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6635,9 +6165,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6649,9 +6178,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6664,9 +6192,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -6675,9 +6201,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0xaf\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xc0\n\t" + "mov r11, #0xc0\n\t" + "orr r11, r11, #0xaf00\n\t" #else "mov r11, #0xafc0\n\t" #endif @@ -6688,9 +6213,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif #else #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x4e\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0xbf\n\t" + "mov r11, #0xbf\n\t" + "orr r11, r11, #0x4e00\n\t" #else "mov r11, #0x4ebf\n\t" #endif @@ -6868,18 +6392,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r2, r6\n\t" "add r2, r2, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -6901,9 +6421,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6915,9 +6434,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6929,9 +6447,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -6943,9 +6460,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -6958,9 +6474,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r6, r10, lr, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -6995,18 +6509,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r3, r7\n\t" "add r3, r3, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r10, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -7028,9 +6538,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7042,9 +6551,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7056,9 +6564,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7070,9 +6577,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7085,9 +6591,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r7, r10, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -7122,18 +6626,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r4, r8\n\t" "add r4, r4, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r10, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -7155,9 +6655,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7169,9 +6668,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7183,9 +6681,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7197,9 +6694,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7212,9 +6708,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r8, r10, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -7249,18 +6743,14 @@ void kyber_arm32_invntt(sword16* r_p) "sub r12, r5, r9\n\t" "add r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r12, r12, #0xff\n\t" - "bic r12, r12, #0xff00\n\t" - "ror r12, r12, #16\n\t" + "lsr r12, r12, #16\n\t" "orr r12, r12, lr, lsl #16\n\t" "ror r12, r12, #16\n\t" #else "bfi r12, lr, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r10, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -7282,9 +6772,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r12, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7296,9 +6785,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7310,9 +6798,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7324,9 +6811,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7339,9 +6825,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -7378,17 +6862,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r2, lr, r2\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7400,9 +6882,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7414,9 +6895,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7429,9 +6909,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r2, r10, lr, r2\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r12, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -7467,17 +6945,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r3, lr, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7489,9 +6965,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7503,9 +6978,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7518,9 +6992,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r3, r10, lr, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r12, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -7556,17 +7028,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r4, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7578,9 +7048,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7592,9 +7061,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7607,9 +7075,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r4, r10, lr, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r12, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -7645,17 +7111,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r5, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7667,9 +7131,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7681,9 +7144,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7696,9 +7158,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r5, r10, lr, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r12, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -7734,17 +7194,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r6, lr, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7756,9 +7214,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7770,9 +7227,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7785,9 +7241,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r6, r10, lr, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r6, r6, #0xff\n\t" - "bic r6, r6, #0xff00\n\t" - "ror r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" "orr r6, r6, r12, lsl #16\n\t" "ror r6, r6, #16\n\t" #else @@ -7823,17 +7277,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r7, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7845,9 +7297,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7859,9 +7310,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7874,9 +7324,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r7, r10, lr, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r7, r7, #0xff\n\t" - "bic r7, r7, #0xff00\n\t" - "ror r7, r7, #16\n\t" + "lsr r7, r7, #16\n\t" "orr r7, r7, r12, lsl #16\n\t" "ror r7, r7, #16\n\t" #else @@ -7912,17 +7360,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r8, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7934,9 +7380,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -7948,9 +7393,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -7963,9 +7407,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r8, r10, lr, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r8, r8, #0xff\n\t" - "bic r8, r8, #0xff00\n\t" - "ror r8, r8, #16\n\t" + "lsr r8, r8, #16\n\t" "orr r8, r8, r12, lsl #16\n\t" "ror r8, r8, #16\n\t" #else @@ -8001,17 +7443,15 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul r9, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif "mul lr, r10, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -8023,9 +7463,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mla r12, r10, lr, r12\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xc\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0xc00\n\t" #else "mov r10, #0xcff\n\t" #endif @@ -8037,9 +7476,8 @@ void kyber_arm32_invntt(sword16* r_p) #endif "mul lr, r10, lr\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0xd\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0x1\n\t" + "mov r10, #0x1\n\t" + "orr r10, r10, #0xd00\n\t" #else "mov r10, #0xd01\n\t" #endif @@ -8052,9 +7490,7 @@ void kyber_arm32_invntt(sword16* r_p) "lsr r12, r12, #16\n\t" "mla r9, r10, lr, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r9, r9, #0xff\n\t" - "bic r9, r9, #0xff00\n\t" - "ror r9, r9, #16\n\t" + "lsr r9, r9, #16\n\t" "orr r9, r9, r12, lsl #16\n\t" "ror r9, r9, #16\n\t" #else @@ -8130,9 +7566,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, "add r3, r3, #0x80\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8186,9 +7621,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, "mul r8, r9, r8\n\t" "mul r10, r11, r10\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xc\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0xc00\n\t" #else "mov r12, #0xcff\n\t" #endif @@ -8207,9 +7641,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, "mul r9, r12, r8\n\t" "mul r11, r12, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8271,9 +7704,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, #endif "mla r10, r11, r12, r10\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xc\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0xc00\n\t" #else "mov r12, #0xcff\n\t" #endif @@ -8292,9 +7724,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, "mul r9, r12, r9\n\t" "mul r11, r12, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8345,9 +7776,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, #endif "mla r11, r5, r12, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xc\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0xc00\n\t" #else "mov r12, #0xcff\n\t" #endif @@ -8366,9 +7796,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, "mul r6, r12, r6\n\t" "mul r7, r12, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8425,9 +7854,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, "add r3, r3, #0x80\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8484,9 +7912,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, "mul r8, r9, r8\n\t" "mul r10, r11, r10\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xc\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0xc00\n\t" #else "mov r12, #0xcff\n\t" #endif @@ -8505,9 +7932,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, "mul r9, r12, r8\n\t" "mul r11, r12, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8569,9 +7995,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, #endif "mla r10, r11, r12, r10\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xc\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0xc00\n\t" #else "mov r12, #0xcff\n\t" #endif @@ -8590,9 +8015,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, "mul r9, r12, r9\n\t" "mul r11, r12, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8643,9 +8067,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, #endif "mla r11, r5, r12, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xc\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0xc00\n\t" #else "mov r12, #0xcff\n\t" #endif @@ -8664,9 +8087,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, "mul r6, r12, r6\n\t" "mul r7, r12, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif @@ -8716,18 +8138,14 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, "add r4, r4, r9\n\t" "add r5, r5, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r8, lsl #16\n\t" "ror r4, r4, #16\n\t" #else "bfi r4, r8, #0, #16\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r10, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -8753,16 +8171,14 @@ void kyber_arm32_csubq(sword16* p_p) __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0xd\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0x1\n\t" + "mov r12, #0x1\n\t" + "orr r12, r12, #0xd00\n\t" #else "mov r12, #0xd01\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #0xd\n\t" - "lsl lr, lr, #8\n\t" - "add lr, lr, #0x1\n\t" + "mov lr, #0x1\n\t" + "orr lr, lr, #0xd00\n\t" #else "mov lr, #0xd01\n\t" #endif @@ -8774,25 +8190,13 @@ void kyber_arm32_csubq(sword16* p_p) "movt lr, #0xd01\n\t" #endif #endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r11, #0x80\n\t" - "lsl r11, r11, #8\n\t" - "add r11, r11, #0x0\n\t" -#else "mov r11, #0x8000\n\t" -#endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "orr r11, r11, #0x80000000\n\t" #else "movt r11, #0x8000\n\t" #endif -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r1, #0x1\n\t" - "lsl r1, r1, #8\n\t" - "add r1, r1, #0x0\n\t" -#else "mov r1, #0x100\n\t" -#endif "\n" "L_kyber_arm32_csubq_loop_%=: \n\t" "ldm %[p], {r2, r3, r4, r5}\n\t" @@ -8821,9 +8225,7 @@ void kyber_arm32_csubq(sword16* p_p) "sub r6, r2, lr\n\t" "sub r2, r2, lr, lsl #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r6, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -8832,9 +8234,7 @@ void kyber_arm32_csubq(sword16* p_p) "sub r7, r3, lr\n\t" "sub r3, r3, lr, lsl #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r7, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -8843,9 +8243,7 @@ void kyber_arm32_csubq(sword16* p_p) "sub r8, r4, lr\n\t" "sub r4, r4, lr, lsl #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r8, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -8854,9 +8252,7 @@ void kyber_arm32_csubq(sword16* p_p) "sub r9, r5, lr\n\t" "sub r5, r5, lr, lsl #16\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r9, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -8883,9 +8279,7 @@ void kyber_arm32_csubq(sword16* p_p) #endif "add r2, r2, r6\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r2, r2, #0xff\n\t" - "bic r2, r2, #0xff00\n\t" - "ror r2, r2, #16\n\t" + "lsr r2, r2, #16\n\t" "orr r2, r2, r10, lsl #16\n\t" "ror r2, r2, #16\n\t" #else @@ -8900,9 +8294,7 @@ void kyber_arm32_csubq(sword16* p_p) #endif "add r3, r3, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r3, r3, #0xff\n\t" - "bic r3, r3, #0xff00\n\t" - "ror r3, r3, #16\n\t" + "lsr r3, r3, #16\n\t" "orr r3, r3, r10, lsl #16\n\t" "ror r3, r3, #16\n\t" #else @@ -8917,9 +8309,7 @@ void kyber_arm32_csubq(sword16* p_p) #endif "add r4, r4, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r4, r4, #0xff\n\t" - "bic r4, r4, #0xff00\n\t" - "ror r4, r4, #16\n\t" + "lsr r4, r4, #16\n\t" "orr r4, r4, r10, lsl #16\n\t" "ror r4, r4, #16\n\t" #else @@ -8934,9 +8324,7 @@ void kyber_arm32_csubq(sword16* p_p) #endif "add r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "bic r5, r5, #0xff\n\t" - "bic r5, r5, #0xff00\n\t" - "ror r5, r5, #16\n\t" + "lsr r5, r5, #16\n\t" "orr r5, r5, r10, lsl #16\n\t" "ror r5, r5, #16\n\t" #else @@ -8966,9 +8354,8 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, __asm__ __volatile__ ( #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r8, #0xd\n\t" - "lsl r8, r8, #8\n\t" - "add r8, r8, #0x1\n\t" + "mov r8, #0x1\n\t" + "orr r8, r8, #0xd00\n\t" #else "mov r8, #0xd01\n\t" #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S index 0fb5265949..a79142f40b 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S @@ -114,8 +114,7 @@ Transform_Sha256_Len: adr r3, L_SHA256_transform_len_k # Copy digest to add in at end #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -186,8 +185,7 @@ L_SHA256_transform_len_begin: eor r6, r6, r10, lsr #8 eor r7, r7, r11, lsr #8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [sp] - str r5, [sp, #4] + stm sp, {r4, r5} #else strd r4, r5, [sp] #endif @@ -311,8 +309,7 @@ L_SHA256_transform_len_begin: rev r10, r10 rev r11, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [sp] - str r5, [sp, #4] + stm sp, {r4, r5} #else strd r4, r5, [sp] #endif @@ -1650,8 +1647,7 @@ L_SHA256_transform_len_start: str r9, [r0] # Add in digest from start #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -1678,8 +1674,7 @@ L_SHA256_transform_len_start: add r6, r6, r10 add r7, r7, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -1841,8 +1836,7 @@ Transform_Sha256_Len: vpush {d8-d11} sub sp, sp, #24 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r0, [sp] - str r1, [sp, #4] + stm sp, {r0, r1} #else strd r0, r1, [sp] #endif @@ -1850,8 +1844,7 @@ Transform_Sha256_Len: adr r12, L_SHA256_transform_neon_len_k # Load digest into registers #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [r0] - ldr r3, [r0, #4] + ldm r0, {r2, r3} #else ldrd r2, r3, [r0] #endif @@ -2799,16 +2792,14 @@ L_SHA256_transform_neon_len_start: ldr r10, [sp] # Add in digest from start #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r0, [r10] - ldr r1, [r10, #4] + ldm r10, {r0, r1} #else ldrd r0, r1, [r10] #endif add r2, r2, r0 add r3, r3, r1 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r2, [r10] - str r3, [r10, #4] + stm r10, {r2, r3} #else strd r2, r3, [r10] #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 6166249fdd..bb7f1f86d5 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -85,8 +85,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "sub sp, sp, #0xc0\n\t" /* Copy digest to add in at end */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha256]]\n\t" - "ldr r5, [%[sha256], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha256]]\n\t" #endif @@ -158,8 +157,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "eor r6, r6, r10, lsr #8\n\t" "eor r7, r7, r11, lsr #8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [sp]\n\t" - "str r5, [sp, #4]\n\t" + "stm sp, {r4, r5}\n\t" #else "strd r4, r5, [sp]\n\t" #endif @@ -283,8 +281,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "rev r10, r10\n\t" "rev r11, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [sp]\n\t" - "str r5, [sp, #4]\n\t" + "stm sp, {r4, r5}\n\t" #else "strd r4, r5, [sp]\n\t" #endif @@ -1623,8 +1620,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "str r9, [%[sha256]]\n\t" /* Add in digest from start */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha256]]\n\t" - "ldr r5, [%[sha256], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha256]]\n\t" #endif @@ -1651,8 +1647,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "add r6, r6, r10\n\t" "add r7, r7, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha256]]\n\t" - "str r5, [%[sha256], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha256]]\n\t" #endif @@ -1774,8 +1769,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) __asm__ __volatile__ ( "sub sp, sp, #24\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str %[sha256], [sp]\n\t" - "str %[data], [sp, #4]\n\t" + "stm sp, {r0, r1}\n\t" #else "strd %[sha256], %[data], [sp]\n\t" #endif @@ -1783,8 +1777,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "mov r12, %[L_SHA256_transform_neon_len_k]\n\t" /* Load digest into registers */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr %[len], [%[sha256]]\n\t" - "ldr r3, [%[sha256], #4]\n\t" + "ldm r0, {r2, r3}\n\t" #else "ldrd %[len], r3, [%[sha256]]\n\t" #endif @@ -2734,16 +2727,14 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "ldr r10, [sp]\n\t" /* Add in digest from start */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr %[sha256], [r10]\n\t" - "ldr %[data], [r10, #4]\n\t" + "ldm r10, {r0, r1}\n\t" #else "ldrd %[sha256], %[data], [r10]\n\t" #endif "add %[len], %[len], %[sha256]\n\t" "add r3, r3, %[data]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str %[len], [r10]\n\t" - "str r3, [r10, #4]\n\t" + "stm r10, {r2, r3}\n\t" #else "strd %[len], r3, [r10]\n\t" #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S index e7a63ed55e..ac623304a2 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S @@ -510,8 +510,7 @@ L_sha3_arm32_begin: eor r3, r3, r5, lsl #1 # Calc b[0] and XOR t[0] into s[x*5+0] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -548,8 +547,7 @@ L_sha3_arm32_begin: eor r10, r10, r2 eor r11, r11, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -724,8 +722,7 @@ L_sha3_arm32_begin: str lr, [sp, #20] # Calc t[1] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [sp] - ldr r3, [sp, #4] + ldm sp, {r2, r3} #else ldrd r2, r3, [sp] #endif @@ -884,8 +881,7 @@ L_sha3_arm32_begin: ldrd r2, r3, [sp, #24] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [sp] - ldr r5, [sp, #4] + ldm sp, {r4, r5} #else ldrd r4, r5, [sp] #endif @@ -959,8 +955,7 @@ L_sha3_arm32_begin: # Row Mix # Row 0 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [r0] - ldr r3, [r0, #4] + ldm r0, {r2, r3} #else ldrd r2, r3, [r0] #endif @@ -1036,8 +1031,7 @@ L_sha3_arm32_begin: str lr, [sp, #36] # Get constant #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif @@ -1493,8 +1487,7 @@ L_sha3_arm32_begin: eor r3, r3, r5, lsl #1 # Calc b[0] and XOR t[0] into s[x*5+0] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [sp] - ldr r5, [sp, #4] + ldm sp, {r4, r5} #else ldrd r4, r5, [sp] #endif @@ -1531,8 +1524,7 @@ L_sha3_arm32_begin: eor r10, r10, r2 eor r11, r11, r3 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [sp] - str r5, [sp, #4] + stm sp, {r4, r5} #else strd r4, r5, [sp] #endif @@ -1707,8 +1699,7 @@ L_sha3_arm32_begin: str lr, [r0, #20] # Calc t[1] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [r0] - ldr r3, [r0, #4] + ldm r0, {r2, r3} #else ldrd r2, r3, [r0] #endif @@ -1867,8 +1858,7 @@ L_sha3_arm32_begin: ldrd r2, r3, [r0, #24] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -1942,8 +1932,7 @@ L_sha3_arm32_begin: # Row Mix # Row 0 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r2, [sp] - ldr r3, [sp, #4] + ldm sp, {r2, r3} #else ldrd r2, r3, [sp] #endif @@ -2019,8 +2008,7 @@ L_sha3_arm32_begin: str lr, [r0, #36] # Get constant #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r10, [r1] - ldr r11, [r1, #4] + ldm r1, {r10, r11} #else ldrd r10, r11, [r1] #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c index 4eed8e667e..0caad5e906 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -462,8 +462,7 @@ void BlockSha3(word64* state_p) "eor r3, r3, r5, lsl #1\n\t" /* Calc b[0] and XOR t[0] into s[x*5+0] */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[state]]\n\t" - "ldr r5, [%[state], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[state]]\n\t" #endif @@ -500,8 +499,7 @@ void BlockSha3(word64* state_p) "eor r10, r10, r2\n\t" "eor r11, r11, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[state]]\n\t" - "str r5, [%[state], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[state]]\n\t" #endif @@ -676,8 +674,7 @@ void BlockSha3(word64* state_p) "str lr, [sp, #20]\n\t" /* Calc t[1] */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [sp]\n\t" - "ldr r3, [sp, #4]\n\t" + "ldm sp, {r2, r3}\n\t" #else "ldrd r2, r3, [sp]\n\t" #endif @@ -836,8 +833,7 @@ void BlockSha3(word64* state_p) "ldrd r2, r3, [sp, #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [sp]\n\t" - "ldr r5, [sp, #4]\n\t" + "ldm sp, {r4, r5}\n\t" #else "ldrd r4, r5, [sp]\n\t" #endif @@ -911,8 +907,7 @@ void BlockSha3(word64* state_p) /* Row Mix */ /* Row 0 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [%[state]]\n\t" - "ldr r3, [%[state], #4]\n\t" + "ldm r0, {r2, r3}\n\t" #else "ldrd r2, r3, [%[state]]\n\t" #endif @@ -988,8 +983,7 @@ void BlockSha3(word64* state_p) "str lr, [sp, #36]\n\t" /* Get constant */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [r1]\n\t" - "ldr r11, [r1, #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [r1]\n\t" #endif @@ -1445,8 +1439,7 @@ void BlockSha3(word64* state_p) "eor r3, r3, r5, lsl #1\n\t" /* Calc b[0] and XOR t[0] into s[x*5+0] */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [sp]\n\t" - "ldr r5, [sp, #4]\n\t" + "ldm sp, {r4, r5}\n\t" #else "ldrd r4, r5, [sp]\n\t" #endif @@ -1483,8 +1476,7 @@ void BlockSha3(word64* state_p) "eor r10, r10, r2\n\t" "eor r11, r11, r3\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [sp]\n\t" - "str r5, [sp, #4]\n\t" + "stm sp, {r4, r5}\n\t" #else "strd r4, r5, [sp]\n\t" #endif @@ -1659,8 +1651,7 @@ void BlockSha3(word64* state_p) "str lr, [%[state], #20]\n\t" /* Calc t[1] */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [%[state]]\n\t" - "ldr r3, [%[state], #4]\n\t" + "ldm r0, {r2, r3}\n\t" #else "ldrd r2, r3, [%[state]]\n\t" #endif @@ -1819,8 +1810,7 @@ void BlockSha3(word64* state_p) "ldrd r2, r3, [%[state], #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[state]]\n\t" - "ldr r5, [%[state], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[state]]\n\t" #endif @@ -1894,8 +1884,7 @@ void BlockSha3(word64* state_p) /* Row Mix */ /* Row 0 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [sp]\n\t" - "ldr r3, [sp, #4]\n\t" + "ldm sp, {r2, r3}\n\t" #else "ldrd r2, r3, [sp]\n\t" #endif @@ -1971,8 +1960,7 @@ void BlockSha3(word64* state_p) "str lr, [%[state], #36]\n\t" /* Get constant */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r10, [r1]\n\t" - "ldr r11, [r1, #4]\n\t" + "ldm r1, {r10, r11}\n\t" #else "ldrd r10, r11, [r1]\n\t" #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S index 405b94a332..f7895a83ca 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -210,8 +210,7 @@ Transform_Sha512_Len: adr r3, L_SHA512_transform_len_k # Copy digest to add in at end #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -685,16 +684,14 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [sp] - ldr r9, [sp, #4] + ldm sp, {r8, r9} #else ldrd r8, r9, [sp] #endif adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r3] - ldr r7, [r3, #4] + ldm r3, {r6, r7} #else ldrd r6, r7, [r3] #endif @@ -717,8 +714,7 @@ L_SHA512_transform_len_start: adds r8, r8, r4 adc r9, r9, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -753,8 +749,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -815,8 +810,7 @@ L_SHA512_transform_len_start: eor r7, r7, r9 eor r6, r6, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [sp] - ldr r5, [sp, #4] + ldm sp, {r4, r5} #else ldrd r4, r5, [sp] #endif @@ -831,8 +825,7 @@ L_SHA512_transform_len_start: adds r4, r4, r8 adc r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [sp] - str r5, [sp, #4] + stm sp, {r4, r5} #else strd r4, r5, [sp] #endif @@ -858,16 +851,14 @@ L_SHA512_transform_len_start: eor r7, r7, r9 eor r6, r6, r8 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [sp] - ldr r5, [sp, #4] + ldm sp, {r4, r5} #else ldrd r4, r5, [sp] #endif adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [sp] - str r5, [sp, #4] + stm sp, {r4, r5} #else strd r4, r5, [sp] #endif @@ -1013,8 +1004,7 @@ L_SHA512_transform_len_start: ldrd r8, r9, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -1302,8 +1292,7 @@ L_SHA512_transform_len_start: mov r11, r9 # Calc new W[2] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [sp] - ldr r5, [sp, #4] + ldm sp, {r4, r5} #else ldrd r4, r5, [sp] #endif @@ -1463,8 +1452,7 @@ L_SHA512_transform_len_start: adds r4, r4, r8 adc r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -1485,8 +1473,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #40] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r8, [r0] - str r9, [r0, #4] + stm r0, {r8, r9} #else strd r8, r9, [r0] #endif @@ -1635,8 +1622,7 @@ L_SHA512_transform_len_start: #endif # Round 4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -1671,8 +1657,7 @@ L_SHA512_transform_len_start: strd r4, r5, [r0, #24] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -1931,8 +1916,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -2191,8 +2175,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -2417,8 +2400,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #9 orr r8, r8, r5, lsr #9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -2427,8 +2409,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -2457,8 +2438,7 @@ L_SHA512_transform_len_start: eor r6, r6, r8 eor r7, r7, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -2487,8 +2467,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -2521,8 +2500,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #7 orr r8, r8, r5, lsr #7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -2543,8 +2521,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #16] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -2555,16 +2532,14 @@ L_SHA512_transform_len_start: eor r10, r10, r6 eor r11, r11, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif adds r6, r6, r10 adc r7, r7, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r6, [r0] - str r7, [r0, #4] + stm r0, {r6, r7} #else strd r6, r7, [r0] #endif @@ -2599,8 +2574,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [sp, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [sp] - ldr r9, [sp, #4] + ldm sp, {r8, r9} #else ldrd r8, r9, [sp] #endif @@ -2749,8 +2723,7 @@ L_SHA512_transform_len_start: adds r8, r8, r4 adc r9, r9, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -2785,8 +2758,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -3045,8 +3017,7 @@ L_SHA512_transform_len_start: ldrd r8, r9, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -3495,8 +3466,7 @@ L_SHA512_transform_len_start: adds r4, r4, r8 adc r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -3517,8 +3487,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #40] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r8, [r0] - str r9, [r0, #4] + stm r0, {r8, r9} #else strd r8, r9, [r0] #endif @@ -3667,8 +3636,7 @@ L_SHA512_transform_len_start: #endif # Round 12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -3703,8 +3671,7 @@ L_SHA512_transform_len_start: strd r4, r5, [r0, #24] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -3963,8 +3930,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -4223,8 +4189,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -4449,8 +4414,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #9 orr r8, r8, r5, lsr #9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -4459,8 +4423,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -4489,8 +4452,7 @@ L_SHA512_transform_len_start: eor r6, r6, r8 eor r7, r7, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -4519,8 +4481,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -4553,8 +4514,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #7 orr r8, r8, r5, lsr #7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -4575,8 +4535,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #16] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -4587,16 +4546,14 @@ L_SHA512_transform_len_start: eor r10, r10, r6 eor r11, r11, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif adds r6, r6, r10 adc r7, r7, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r6, [r0] - str r7, [r0, #4] + stm r0, {r6, r7} #else strd r6, r7, [r0] #endif @@ -4647,8 +4604,7 @@ L_SHA512_transform_len_start: strd r4, r5, [sp, #120] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [sp] - ldr r5, [sp, #4] + ldm sp, {r4, r5} #else ldrd r4, r5, [sp] #endif @@ -4752,16 +4708,14 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [sp] - ldr r9, [sp, #4] + ldm sp, {r8, r9} #else ldrd r8, r9, [sp] #endif adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r3] - ldr r7, [r3, #4] + ldm r3, {r6, r7} #else ldrd r6, r7, [r3] #endif @@ -4784,8 +4738,7 @@ L_SHA512_transform_len_start: adds r8, r8, r4 adc r9, r9, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -4820,8 +4773,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -5001,8 +4953,7 @@ L_SHA512_transform_len_start: ldrd r8, r9, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -5293,8 +5244,7 @@ L_SHA512_transform_len_start: adds r4, r4, r8 adc r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -5315,8 +5265,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #40] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r8, [r0] - str r9, [r0, #4] + stm r0, {r8, r9} #else strd r8, r9, [r0] #endif @@ -5386,8 +5335,7 @@ L_SHA512_transform_len_start: mov r11, r9 # Round 4 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -5422,8 +5370,7 @@ L_SHA512_transform_len_start: strd r4, r5, [r0, #24] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -5603,8 +5550,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -5784,8 +5730,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -5931,8 +5876,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #9 orr r8, r8, r5, lsr #9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -5941,8 +5885,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -5971,8 +5914,7 @@ L_SHA512_transform_len_start: eor r6, r6, r8 eor r7, r7, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -6001,8 +5943,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -6035,8 +5976,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #7 orr r8, r8, r5, lsr #7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -6057,8 +5997,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #16] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -6069,16 +6008,14 @@ L_SHA512_transform_len_start: eor r10, r10, r6 eor r11, r11, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif adds r6, r6, r10 adc r7, r7, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r6, [r0] - str r7, [r0, #4] + stm r0, {r6, r7} #else strd r6, r7, [r0] #endif @@ -6184,8 +6121,7 @@ L_SHA512_transform_len_start: adds r8, r8, r4 adc r9, r9, r5 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -6220,8 +6156,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -6401,8 +6336,7 @@ L_SHA512_transform_len_start: ldrd r8, r9, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -6693,8 +6627,7 @@ L_SHA512_transform_len_start: adds r4, r4, r8 adc r5, r5, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -6715,8 +6648,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #40] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r8, [r0] - str r9, [r0, #4] + stm r0, {r8, r9} #else strd r8, r9, [r0] #endif @@ -6786,8 +6718,7 @@ L_SHA512_transform_len_start: mov r11, r9 # Round 12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -6822,8 +6753,7 @@ L_SHA512_transform_len_start: strd r4, r5, [r0, #24] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -7003,8 +6933,7 @@ L_SHA512_transform_len_start: ldrd r4, r5, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif @@ -7184,8 +7113,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #56] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r8, [r0] - ldr r9, [r0, #4] + ldm r0, {r8, r9} #else ldrd r8, r9, [r0] #endif @@ -7331,8 +7259,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #9 orr r8, r8, r5, lsr #9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -7341,8 +7268,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -7371,8 +7297,7 @@ L_SHA512_transform_len_start: eor r6, r6, r8 eor r7, r7, r9 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -7401,8 +7326,7 @@ L_SHA512_transform_len_start: adds r4, r4, r6 adc r5, r5, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -7435,8 +7359,7 @@ L_SHA512_transform_len_start: orr r9, r9, r4, lsr #7 orr r8, r8, r5, lsr #7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -7457,8 +7380,7 @@ L_SHA512_transform_len_start: ldrd r6, r7, [r0, #16] #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif @@ -7469,16 +7391,14 @@ L_SHA512_transform_len_start: eor r10, r10, r6 eor r11, r11, r7 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r6, [r0] - ldr r7, [r0, #4] + ldm r0, {r6, r7} #else ldrd r6, r7, [r0] #endif adds r6, r6, r10 adc r7, r7, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r6, [r0] - str r7, [r0, #4] + stm r0, {r6, r7} #else strd r6, r7, [r0] #endif @@ -7486,8 +7406,7 @@ L_SHA512_transform_len_start: mov r11, r9 # Add in digest from start #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - ldr r4, [r0] - ldr r5, [r0, #4] + ldm r0, {r4, r5} #else ldrd r4, r5, [r0] #endif @@ -7514,8 +7433,7 @@ L_SHA512_transform_len_start: adds r6, r6, r10 adc r7, r7, r11 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - str r4, [r0] - str r5, [r0, #4] + stm r0, {r4, r5} #else strd r4, r5, [r0] #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index 6721752e15..5085358439 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -109,8 +109,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "sub sp, sp, #0xc0\n\t" /* Copy digest to add in at end */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -586,16 +585,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [sp]\n\t" - "ldr r9, [sp, #4]\n\t" + "ldm sp, {r8, r9}\n\t" #else "ldrd r8, r9, [sp]\n\t" #endif "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [r3]\n\t" - "ldr r7, [r3, #4]\n\t" + "ldm r3, {r6, r7}\n\t" #else "ldrd r6, r7, [r3]\n\t" #endif @@ -618,8 +615,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r8, r8, r4\n\t" "adc r9, r9, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -654,8 +650,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -716,8 +711,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r7, r7, r9\n\t" "eor r6, r6, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [sp]\n\t" - "ldr r5, [sp, #4]\n\t" + "ldm sp, {r4, r5}\n\t" #else "ldrd r4, r5, [sp]\n\t" #endif @@ -732,8 +726,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [sp]\n\t" - "str r5, [sp, #4]\n\t" + "stm sp, {r4, r5}\n\t" #else "strd r4, r5, [sp]\n\t" #endif @@ -759,16 +752,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r7, r7, r9\n\t" "eor r6, r6, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [sp]\n\t" - "ldr r5, [sp, #4]\n\t" + "ldm sp, {r4, r5}\n\t" #else "ldrd r4, r5, [sp]\n\t" #endif "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [sp]\n\t" - "str r5, [sp, #4]\n\t" + "stm sp, {r4, r5}\n\t" #else "strd r4, r5, [sp]\n\t" #endif @@ -914,8 +905,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r8, r9, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -1203,8 +1193,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "mov r11, r9\n\t" /* Calc new W[2] */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [sp]\n\t" - "ldr r5, [sp, #4]\n\t" + "ldm sp, {r4, r5}\n\t" #else "ldrd r4, r5, [sp]\n\t" #endif @@ -1364,8 +1353,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -1386,8 +1374,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #40]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r8, [%[sha512]]\n\t" - "str r9, [%[sha512], #4]\n\t" + "stm r0, {r8, r9}\n\t" #else "strd r8, r9, [%[sha512]]\n\t" #endif @@ -1536,8 +1523,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) #endif /* Round 4 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -1572,8 +1558,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "strd r4, r5, [%[sha512], #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -1832,8 +1817,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -2092,8 +2076,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -2318,8 +2301,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #9\n\t" "orr r8, r8, r5, lsr #9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -2328,8 +2310,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -2358,8 +2339,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -2388,8 +2368,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -2422,8 +2401,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #7\n\t" "orr r8, r8, r5, lsr #7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -2444,8 +2422,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #16]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -2456,16 +2433,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r10, r10, r6\n\t" "eor r11, r11, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif "adds r6, r6, r10\n\t" "adc r7, r7, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r6, [%[sha512]]\n\t" - "str r7, [%[sha512], #4]\n\t" + "stm r0, {r6, r7}\n\t" #else "strd r6, r7, [%[sha512]]\n\t" #endif @@ -2500,8 +2475,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [sp, #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [sp]\n\t" - "ldr r9, [sp, #4]\n\t" + "ldm sp, {r8, r9}\n\t" #else "ldrd r8, r9, [sp]\n\t" #endif @@ -2650,8 +2624,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r8, r8, r4\n\t" "adc r9, r9, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -2686,8 +2659,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -2946,8 +2918,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r8, r9, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -3396,8 +3367,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -3418,8 +3388,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #40]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r8, [%[sha512]]\n\t" - "str r9, [%[sha512], #4]\n\t" + "stm r0, {r8, r9}\n\t" #else "strd r8, r9, [%[sha512]]\n\t" #endif @@ -3568,8 +3537,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) #endif /* Round 12 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -3604,8 +3572,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "strd r4, r5, [%[sha512], #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -3864,8 +3831,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -4124,8 +4090,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -4350,8 +4315,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #9\n\t" "orr r8, r8, r5, lsr #9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -4360,8 +4324,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -4390,8 +4353,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -4420,8 +4382,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -4454,8 +4415,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #7\n\t" "orr r8, r8, r5, lsr #7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -4476,8 +4436,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #16]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -4488,16 +4447,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r10, r10, r6\n\t" "eor r11, r11, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif "adds r6, r6, r10\n\t" "adc r7, r7, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r6, [%[sha512]]\n\t" - "str r7, [%[sha512], #4]\n\t" + "stm r0, {r6, r7}\n\t" #else "strd r6, r7, [%[sha512]]\n\t" #endif @@ -4548,8 +4505,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "strd r4, r5, [sp, #120]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [sp]\n\t" - "ldr r5, [sp, #4]\n\t" + "ldm sp, {r4, r5}\n\t" #else "ldrd r4, r5, [sp]\n\t" #endif @@ -4653,16 +4609,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [sp]\n\t" - "ldr r9, [sp, #4]\n\t" + "ldm sp, {r8, r9}\n\t" #else "ldrd r8, r9, [sp]\n\t" #endif "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [r3]\n\t" - "ldr r7, [r3, #4]\n\t" + "ldm r3, {r6, r7}\n\t" #else "ldrd r6, r7, [r3]\n\t" #endif @@ -4685,8 +4639,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r8, r8, r4\n\t" "adc r9, r9, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -4721,8 +4674,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -4902,8 +4854,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r8, r9, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -5194,8 +5145,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -5216,8 +5166,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #40]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r8, [%[sha512]]\n\t" - "str r9, [%[sha512], #4]\n\t" + "stm r0, {r8, r9}\n\t" #else "strd r8, r9, [%[sha512]]\n\t" #endif @@ -5287,8 +5236,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "mov r11, r9\n\t" /* Round 4 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -5323,8 +5271,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "strd r4, r5, [%[sha512], #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -5504,8 +5451,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -5685,8 +5631,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -5832,8 +5777,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #9\n\t" "orr r8, r8, r5, lsr #9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -5842,8 +5786,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -5872,8 +5815,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -5902,8 +5844,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -5936,8 +5877,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #7\n\t" "orr r8, r8, r5, lsr #7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -5958,8 +5898,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #16]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -5970,16 +5909,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r10, r10, r6\n\t" "eor r11, r11, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif "adds r6, r6, r10\n\t" "adc r7, r7, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r6, [%[sha512]]\n\t" - "str r7, [%[sha512], #4]\n\t" + "stm r0, {r6, r7}\n\t" #else "strd r6, r7, [%[sha512]]\n\t" #endif @@ -6085,8 +6022,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r8, r8, r4\n\t" "adc r9, r9, r5\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -6121,8 +6057,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -6302,8 +6237,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r8, r9, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -6594,8 +6528,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r8\n\t" "adc r5, r5, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -6616,8 +6549,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #40]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r8, [%[sha512]]\n\t" - "str r9, [%[sha512], #4]\n\t" + "stm r0, {r8, r9}\n\t" #else "strd r8, r9, [%[sha512]]\n\t" #endif @@ -6687,8 +6619,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "mov r11, r9\n\t" /* Round 12 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -6723,8 +6654,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "strd r4, r5, [%[sha512], #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -6904,8 +6834,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r4, r5, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif @@ -7085,8 +7014,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #56]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[sha512]]\n\t" - "ldr r9, [%[sha512], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[sha512]]\n\t" #endif @@ -7232,8 +7160,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #9\n\t" "orr r8, r8, r5, lsr #9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -7242,8 +7169,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -7272,8 +7198,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r6, r6, r8\n\t" "eor r7, r7, r9\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -7302,8 +7227,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -7336,8 +7260,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "orr r9, r9, r4, lsr #7\n\t" "orr r8, r8, r5, lsr #7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -7358,8 +7281,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "ldrd r6, r7, [%[sha512], #16]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif @@ -7370,16 +7292,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "eor r10, r10, r6\n\t" "eor r11, r11, r7\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r6, [%[sha512]]\n\t" - "ldr r7, [%[sha512], #4]\n\t" + "ldm r0, {r6, r7}\n\t" #else "ldrd r6, r7, [%[sha512]]\n\t" #endif "adds r6, r6, r10\n\t" "adc r7, r7, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r6, [%[sha512]]\n\t" - "str r7, [%[sha512], #4]\n\t" + "stm r0, {r6, r7}\n\t" #else "strd r6, r7, [%[sha512]]\n\t" #endif @@ -7387,8 +7307,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "mov r11, r9\n\t" /* Add in digest from start */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[sha512]]\n\t" - "ldr r5, [%[sha512], #4]\n\t" + "ldm r0, {r4, r5}\n\t" #else "ldrd r4, r5, [%[sha512]]\n\t" #endif @@ -7415,8 +7334,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "adds r6, r6, r10\n\t" "adc r7, r7, r11\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r4, [%[sha512]]\n\t" - "str r5, [%[sha512], #4]\n\t" + "stm r0, {r4, r5}\n\t" #else "strd r4, r5, [%[sha512]]\n\t" #endif diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 3ef1986063..eb3a77317c 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -42808,9 +42808,8 @@ static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p) "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0x1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #0x7c\n\t" + "mov r4, #0x7c\n\t" + "orr r4, r4, #0x100\n\t" #else "mov r4, #0x17c\n\t" #endif @@ -57896,9 +57895,8 @@ static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p) "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r4, #0x1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #0xfc\n\t" + "mov r4, #0xfc\n\t" + "orr r4, r4, #0x100\n\t" #else "mov r4, #0x1fc\n\t" #endif @@ -78239,8 +78237,7 @@ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) "strd r8, r9, [%[r], #24]\n\t" #endif #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" + "ldm r1, {r2, r3}\n\t" #else "ldrd r2, r3, [%[a]]\n\t" #endif @@ -78259,8 +78256,7 @@ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) "orr r8, r8, r5, lsl #31\n\t" "orr r9, r9, r12, lsl #31\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "str r6, [%[r]]\n\t" - "str r7, [%[r], #4]\n\t" + "stm r0, {r6, r7}\n\t" #else "strd r6, r7, [%[r]]\n\t" #endif @@ -78733,13 +78729,7 @@ static int sp_256_num_bits_8(const sp_digit* a_p) "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_7_%=\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x0\n\t" -#else "mov r2, #0x100\n\t" -#endif "clz r12, r1\n\t" "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" @@ -96074,8 +96064,7 @@ static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi "L_sp_384_div2_mod_12_div2_%=: \n\t" "sub %[r], %[r], #48\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[r]]\n\t" - "ldr r9, [%[r], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[r]]\n\t" #endif @@ -96183,9 +96172,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_11_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x78\n\t" + "mov r2, #0x78\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x178\n\t" #endif @@ -96199,9 +96187,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_11_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x70\n\t" + "mov r2, #0x70\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x170\n\t" #endif @@ -96215,9 +96202,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_11_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x68\n\t" + "mov r2, #0x68\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x168\n\t" #endif @@ -96228,9 +96214,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "L_sp_384_num_bits_12_11_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x60\n\t" + "mov r2, #0x60\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x160\n\t" #endif @@ -96246,9 +96231,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_10_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x58\n\t" + "mov r2, #0x58\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x158\n\t" #endif @@ -96262,9 +96246,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_10_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x50\n\t" + "mov r2, #0x50\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x150\n\t" #endif @@ -96278,9 +96261,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_10_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x48\n\t" + "mov r2, #0x48\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x148\n\t" #endif @@ -96291,9 +96273,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "L_sp_384_num_bits_12_10_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x40\n\t" + "mov r2, #0x40\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x140\n\t" #endif @@ -96309,9 +96290,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_9_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x38\n\t" + "mov r2, #0x38\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x138\n\t" #endif @@ -96325,9 +96305,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_9_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x30\n\t" + "mov r2, #0x30\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x130\n\t" #endif @@ -96341,9 +96320,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_9_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x28\n\t" + "mov r2, #0x28\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x128\n\t" #endif @@ -96354,9 +96332,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "L_sp_384_num_bits_12_9_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x20\n\t" + "mov r2, #0x20\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x120\n\t" #endif @@ -96372,9 +96349,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_8_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x18\n\t" + "mov r2, #0x18\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x118\n\t" #endif @@ -96388,9 +96364,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_8_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x10\n\t" + "mov r2, #0x10\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x110\n\t" #endif @@ -96404,9 +96379,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_384_num_bits_12_8_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x8\n\t" + "mov r2, #0x8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x108\n\t" #endif @@ -96416,13 +96390,7 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "\n" "L_sp_384_num_bits_12_8_1_%=: \n\t" "and r3, r1, #0xff\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x0\n\t" -#else "mov r2, #0x100\n\t" -#endif "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" @@ -96754,9 +96722,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_11_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x80\n\t" + "mov r2, #0x80\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x180\n\t" #endif @@ -96769,9 +96736,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_10_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x60\n\t" + "mov r2, #0x60\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x160\n\t" #endif @@ -96784,9 +96750,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_9_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x40\n\t" + "mov r2, #0x40\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x140\n\t" #endif @@ -96799,9 +96764,8 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_8_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x20\n\t" + "mov r2, #0x20\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x120\n\t" #endif @@ -96813,13 +96777,7 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_7_%=\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x0\n\t" -#else "mov r2, #0x100\n\t" -#endif "clz r12, r1\n\t" "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" @@ -115793,9 +115751,8 @@ static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p "ldm %[a], {r1, r2, r3, r4, r5}\n\t" "ldm sp!, {r7, r8, r9, r10, r11}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov lr, #0x1\n\t" - "lsl lr, lr, #8\n\t" - "add lr, lr, #0xff\n\t" + "mov lr, #0xff\n\t" + "orr lr, lr, #0x100\n\t" #else "mov lr, #0x1ff\n\t" #endif @@ -115872,9 +115829,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi "cmp r9, #0x40\n\t" "bne L_sp_521_mont_reduce_order_17_nomask_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r7, #0x1\n\t" - "lsl r7, r7, #8\n\t" - "add r7, r7, #0xff\n\t" + "mov r7, #0xff\n\t" + "orr r7, r7, #0x100\n\t" #else "mov r7, #0x1ff\n\t" #endif @@ -116500,9 +116456,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi "cmp r9, #0x40\n\t" "bne L_sp_521_mont_reduce_order_17_nomask_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r7, #0x1\n\t" - "lsl r7, r7, #8\n\t" - "add r7, r7, #0xff\n\t" + "mov r7, #0xff\n\t" + "orr r7, r7, #0x100\n\t" #else "mov r7, #0x1ff\n\t" #endif @@ -116762,9 +116717,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi "cmp r12, #0x40\n\t" "bne L_sp_521_mont_reduce_order_17_nomask_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r10, #0x1\n\t" - "lsl r10, r10, #8\n\t" - "add r10, r10, #0xff\n\t" + "mov r10, #0xff\n\t" + "orr r10, r10, #0x100\n\t" #else "mov r10, #0x1ff\n\t" #endif @@ -117414,9 +117368,8 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "ldm %[b]!, {r4}\n\t" "adcs r8, r8, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0x1\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0x100\n\t" #else "mov r12, #0x1ff\n\t" #endif @@ -117491,9 +117444,8 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "ldm %[a]!, {r4}\n\t" "adcs r4, r4, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x1\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0xff\n\t" + "mov r3, #0xff\n\t" + "orr r3, r3, #0x100\n\t" #else "mov r3, #0x1ff\n\t" #endif @@ -117602,9 +117554,8 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "ldm %[a]!, {r8}\n\t" "adcs r4, r4, r8\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r3, #0x1\n\t" - "lsl r3, r3, #8\n\t" - "add r3, r3, #0xff\n\t" + "mov r3, #0xff\n\t" + "orr r3, r3, #0x100\n\t" #else "mov r3, #0x1ff\n\t" #endif @@ -117677,9 +117628,8 @@ static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "ldm %[b]!, {r4}\n\t" "sbcs r8, r8, r4\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r12, #0x1\n\t" - "lsl r12, r12, #8\n\t" - "add r12, r12, #0xff\n\t" + "mov r12, #0xff\n\t" + "orr r12, r12, #0x100\n\t" #else "mov r12, #0x1ff\n\t" #endif @@ -122419,8 +122369,7 @@ static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) __asm__ __volatile__ ( "rsb r12, %[n], #32\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r4, [%[a]]\n\t" - "ldr r5, [%[a], #4]\n\t" + "ldm r1, {r4, r5}\n\t" #else "ldrd r4, r5, [%[a]]\n\t" #endif @@ -124617,8 +124566,7 @@ static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "L_sp_521_div2_mod_17_div2_%=: \n\t" "sub %[r], %[r], #0x44\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "ldr r8, [%[r]]\n\t" - "ldr r9, [%[r], #4]\n\t" + "ldm r0, {r8, r9}\n\t" #else "ldrd r8, r9, [%[r]]\n\t" #endif @@ -124746,9 +124694,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_16_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x2\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x18\n\t" + "mov r2, #0x18\n\t" + "orr r2, r2, #0x200\n\t" #else "mov r2, #0x218\n\t" #endif @@ -124762,9 +124709,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_16_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x2\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x10\n\t" + "mov r2, #0x10\n\t" + "orr r2, r2, #0x200\n\t" #else "mov r2, #0x210\n\t" #endif @@ -124778,9 +124724,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_16_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x2\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x8\n\t" + "mov r2, #0x8\n\t" + "orr r2, r2, #0x200\n\t" #else "mov r2, #0x208\n\t" #endif @@ -124790,13 +124735,7 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "\n" "L_sp_521_num_bits_17_16_1_%=: \n\t" "and r3, r1, #0xff\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x2\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x0\n\t" -#else "mov r2, #0x200\n\t" -#endif "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" @@ -124809,9 +124748,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_15_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xf8\n\t" + "mov r2, #0xf8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1f8\n\t" #endif @@ -124825,9 +124763,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_15_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xf0\n\t" + "mov r2, #0xf0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1f0\n\t" #endif @@ -124841,9 +124778,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_15_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xe8\n\t" + "mov r2, #0xe8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1e8\n\t" #endif @@ -124854,9 +124790,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "L_sp_521_num_bits_17_15_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xe0\n\t" + "mov r2, #0xe0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1e0\n\t" #endif @@ -124872,9 +124807,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_14_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xd8\n\t" + "mov r2, #0xd8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1d8\n\t" #endif @@ -124888,9 +124822,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_14_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xd0\n\t" + "mov r2, #0xd0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1d0\n\t" #endif @@ -124904,9 +124837,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_14_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xc8\n\t" + "mov r2, #0xc8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1c8\n\t" #endif @@ -124917,9 +124849,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "L_sp_521_num_bits_17_14_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xc0\n\t" + "mov r2, #0xc0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1c0\n\t" #endif @@ -124935,9 +124866,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_13_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xb8\n\t" + "mov r2, #0xb8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1b8\n\t" #endif @@ -124951,9 +124881,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_13_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xb0\n\t" + "mov r2, #0xb0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1b0\n\t" #endif @@ -124967,9 +124896,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_13_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xa8\n\t" + "mov r2, #0xa8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1a8\n\t" #endif @@ -124980,9 +124908,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "L_sp_521_num_bits_17_13_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xa0\n\t" + "mov r2, #0xa0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1a0\n\t" #endif @@ -124998,9 +124925,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_12_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x98\n\t" + "mov r2, #0x98\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x198\n\t" #endif @@ -125014,9 +124940,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_12_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x90\n\t" + "mov r2, #0x90\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x190\n\t" #endif @@ -125030,9 +124955,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_12_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x88\n\t" + "mov r2, #0x88\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x188\n\t" #endif @@ -125043,9 +124967,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "L_sp_521_num_bits_17_12_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x80\n\t" + "mov r2, #0x80\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x180\n\t" #endif @@ -125061,9 +124984,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_11_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x78\n\t" + "mov r2, #0x78\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x178\n\t" #endif @@ -125077,9 +124999,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_11_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x70\n\t" + "mov r2, #0x70\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x170\n\t" #endif @@ -125093,9 +125014,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_11_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x68\n\t" + "mov r2, #0x68\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x168\n\t" #endif @@ -125106,9 +125026,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "L_sp_521_num_bits_17_11_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x60\n\t" + "mov r2, #0x60\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x160\n\t" #endif @@ -125124,9 +125043,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_10_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x58\n\t" + "mov r2, #0x58\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x158\n\t" #endif @@ -125140,9 +125058,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_10_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x50\n\t" + "mov r2, #0x50\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x150\n\t" #endif @@ -125156,9 +125073,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_10_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x48\n\t" + "mov r2, #0x48\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x148\n\t" #endif @@ -125169,9 +125085,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "L_sp_521_num_bits_17_10_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x40\n\t" + "mov r2, #0x40\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x140\n\t" #endif @@ -125187,9 +125102,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_9_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x38\n\t" + "mov r2, #0x38\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x138\n\t" #endif @@ -125203,9 +125117,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_9_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x30\n\t" + "mov r2, #0x30\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x130\n\t" #endif @@ -125219,9 +125132,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_9_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x28\n\t" + "mov r2, #0x28\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x128\n\t" #endif @@ -125232,9 +125144,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "L_sp_521_num_bits_17_9_1_%=: \n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x20\n\t" + "mov r2, #0x20\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x120\n\t" #endif @@ -125250,9 +125161,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_8_3_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x18\n\t" + "mov r2, #0x18\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x118\n\t" #endif @@ -125266,9 +125176,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_8_2_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x10\n\t" + "mov r2, #0x10\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x110\n\t" #endif @@ -125282,9 +125191,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r3, #0\n\t" "beq L_sp_521_num_bits_17_8_1_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x8\n\t" + "mov r2, #0x8\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x108\n\t" #endif @@ -125294,13 +125202,7 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "\n" "L_sp_521_num_bits_17_8_1_%=: \n\t" "and r3, r1, #0xff\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x0\n\t" -#else "mov r2, #0x100\n\t" -#endif "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" @@ -125632,9 +125534,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_16_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x2\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x20\n\t" + "mov r2, #0x20\n\t" + "orr r2, r2, #0x200\n\t" #else "mov r2, #0x220\n\t" #endif @@ -125646,13 +125547,7 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "ldr r1, [%[a], #60]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_15_%=\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x2\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x0\n\t" -#else "mov r2, #0x200\n\t" -#endif "clz r12, r1\n\t" "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" @@ -125662,9 +125557,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_14_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xe0\n\t" + "mov r2, #0xe0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1e0\n\t" #endif @@ -125677,9 +125571,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_13_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xc0\n\t" + "mov r2, #0xc0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1c0\n\t" #endif @@ -125692,9 +125585,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_12_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0xa0\n\t" + "mov r2, #0xa0\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x1a0\n\t" #endif @@ -125707,9 +125599,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_11_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x80\n\t" + "mov r2, #0x80\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x180\n\t" #endif @@ -125722,9 +125613,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_10_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x60\n\t" + "mov r2, #0x60\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x160\n\t" #endif @@ -125737,9 +125627,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_9_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x40\n\t" + "mov r2, #0x40\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x140\n\t" #endif @@ -125752,9 +125641,8 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_8_%=\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x20\n\t" + "mov r2, #0x20\n\t" + "orr r2, r2, #0x100\n\t" #else "mov r2, #0x120\n\t" #endif @@ -125766,13 +125654,7 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_7_%=\n\t" -#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) - "mov r2, #0x1\n\t" - "lsl r2, r2, #8\n\t" - "add r2, r2, #0x0\n\t" -#else "mov r2, #0x100\n\t" -#endif "clz r12, r1\n\t" "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t"