From 98056038302ec7621d5b5a0e7fcc573386b3a998 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 18 Jul 2024 11:15:23 +1000 Subject: [PATCH] RISC-V 64: Add assembly code for SHA-256 Move common defines out of AES file to header file. --- src/include.am | 13 + wolfcrypt/src/port/riscv/riscv-64-aes.c | 118 -- wolfcrypt/src/port/riscv/riscv-64-sha256.c | 1420 +++++++++++++++++++ wolfcrypt/src/sha256.c | 4 +- wolfssl/wolfcrypt/port/riscv/riscv-64-asm.h | 166 +++ 5 files changed, 1601 insertions(+), 120 deletions(-) create mode 100644 wolfcrypt/src/port/riscv/riscv-64-sha256.c diff --git a/src/include.am b/src/include.am index 1679e3b567..a9992a851b 100644 --- a/src/include.am +++ b/src/include.am @@ -229,6 +229,10 @@ endif !BUILD_X86_ASM endif !BUILD_ARMASM endif !BUILD_ARMASM_NEON +if BUILD_RISCV_ASM +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha256.c +endif BUILD_RISCV_ASM + if BUILD_SHA512 if BUILD_ARMASM_NEON src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c @@ -384,6 +388,10 @@ endif BUILD_INTELASM endif !BUILD_ARMASM endif !BUILD_ARMASM_NEON +if BUILD_RISCV_ASM +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha256.c +endif BUILD_RISCV_ASM + if BUILD_SHA512 if BUILD_ARMASM_NEON src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c @@ -595,6 +603,11 @@ endif BUILD_INTELASM endif !BUILD_X86_ASM endif !BUILD_ARMASM endif !BUILD_ARMASM_NEON + +if BUILD_RISCV_ASM +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha256.c +endif BUILD_RISCV_ASM + endif !BUILD_FIPS_CURRENT if BUILD_AFALG diff --git a/wolfcrypt/src/port/riscv/riscv-64-aes.c b/wolfcrypt/src/port/riscv/riscv-64-aes.c index 3a8a2bc742..c438d252ad 100644 --- a/wolfcrypt/src/port/riscv/riscv-64-aes.c +++ b/wolfcrypt/src/port/riscv/riscv-64-aes.c @@ -56,24 +56,8 @@ static WC_INLINE void memcpy16(byte* out, const byte* in) out64[1] = in64[1]; } -#ifdef WOLFSSL_RISCV_BASE_BIT_MANIPULATION - -/* Reverse bytes in 64-bit register. */ -#define REV8(rd, rs) \ - ASM_WORD((0b011010111000 << 20) | (0b101 << 12) | \ - (0b0010011 << 0) | \ - (rs << 15) | (rd << 7)) - -#endif /* WOLFSSL_RISCV_BASE_BIT_MANIPULATION */ - #ifdef WOLFSSL_RISCV_BIT_MANIPULATION -/* rd = rs1[0..31] | rs2[0..31]. */ -#define PACK(rd, rs1, rs2) \ - ASM_WORD((0b0000100 << 25) | (0b100 << 12) | \ - (0b0110011 << 0) | \ - (rs2 << 20) | (rs1 << 15) | (rd << 7)) - /* Reverse bits in each byte of 64-bit register. */ #define BREV8(rd, rs) \ ASM_WORD(0b01101000011100000101000000010011 | \ @@ -90,31 +74,6 @@ static WC_INLINE void memcpy16(byte* out, const byte* in) (vs2 << 20) | (vd << 7)) #endif -/* vd = vs2 + [i,] */ -#define VADD_VI(vd, vs2, i) \ - ASM_WORD((0b000000 << 26) | (0b1 << 25) | \ - (0b011 << 12) | (0b1010111 << 0) | \ - (vd << 7) | (i << 15) | (vs2 << 20)) -/* vd = vs1 + vs2 */ -#define VADD_VV(vd, vs1, vs2) \ - ASM_WORD((0b000000 << 26) | (0b1 << 25) | \ - (0b000 << 12) | (0b1010111 << 0) | \ - (vs2 << 20) | (vs1 << 15) | (vd << 7)) -/* vd = vs1 ^ vs2 */ -#define VXOR_VV(vd, vs1, vs2) \ - ASM_WORD((0b001011 << 26) | (0b1 << 25) | \ - (0b000 << 12) | (0b1010111 << 0) | \ - (vd << 7) | (vs1 << 15) | (vs2 << 20)) -/* vd = vs1 & vs2 */ -#define VAND_VV(vd, vs1, vs2) \ - ASM_WORD((0b001001 << 26) | (0b1 << 25) | \ - (0b000 << 12) | (0b1010111 << 0) | \ - (vd << 7) | (vs1 << 15) | (vs2 << 20)) -/* vd = vs1 | vs2 */ -#define VOR_VV(vd, vs1, vs2) \ - ASM_WORD((0b001010 << 26) | (0b1 << 25) | \ - (0b000 << 12) | (0b1010111 << 0) | \ - (vd << 7) | (vs1 << 15) | (vs2 << 20)) /* vd = vs2 << uimm */ #define VSLL_VI(vd, vs2, uimm) \ @@ -127,29 +86,6 @@ static WC_INLINE void memcpy16(byte* out, const byte* in) (0b011 << 12) | (0b1010111 << 0) | \ (vd << 7) | (uimm << 15) | (vs2 << 20)) -/* vd[shift..max] = vs2[0..max-shift] */ -#define VSLIDEUP_VI(vd, vs2, shift) \ - ASM_WORD((0b001110 << 26) | (0b1 << 25) | \ - (0b011 << 12) | (0b1010111 << 0) | \ - (vd << 7) | (shift << 15) | (vs2 << 20)) - -/* vd[0..max-shift] = vs2[shift..max] */ -#define VSLIDEDOWN_VI(vd, vs2, shift) \ - ASM_WORD((0b001111 << 26) | (0b1 << 25) | \ - (0b011 << 12) | (0b1010111 << 0) | \ - (vd << 7) | (shift << 15) | (vs2 << 20)) - -/* vd[i] = vs1[vs2[i] */ -#define VRGATHER_VV(vd, vs1, vs2) \ - ASM_WORD((0b001100 << 26) | (0b1 << 25) | \ - (0b000 << 12) | (0b1010111 << 0) | \ - (vd << 7) | (vs1 << 15) | (vs2 << 20)) - -/* Reverse order of bytes in words of vector regsiter. */ -#define VREV8(vd, vs2) \ - ASM_WORD((0b010010 << 26) | (0b1 << 25) | (0b01001<< 15) | \ - (0b010 << 12) | (0b1010111 << 0) | \ - (vs2 << 20) | (vd << 7)) /* Vector register set if equal: vd[i] = vs1[i] == vs2[i] ? 1 : 0 */ #define VMSEQ_VV(vd, vs1, vs2) \ @@ -169,60 +105,6 @@ static WC_INLINE void memcpy16(byte* out, const byte* in) (0b010 << 12) | (0b1010111 << 0) | \ (vs2 << 20) | (rd << 7)) -/* 64-bit width when loading. */ -#define WIDTH_64 0b111 -/* 32-bit width when loading. */ -#define WIDTH_32 0b110 - -/* Load n Vector registers with width-bit components. */ -#define VLRE_V(vd, rs1, cnt, width) \ - ASM_WORD(0b0000111 | (width << 12) | (0b00101000 << 20) | \ - (0 << 28) | ((cnt - 1) << 29) | (vd << 7) | (rs1 << 15)) -/* Load 1 Vector register with 64-bit components. */ -#define VL1RE64_V(vd, rs1) VLRE_V(vd, rs1, 1, WIDTH_64) -/* Load 1 Vector register with 32-bit components. */ -#define VL1RE32_V(vd, rs1) VLRE_V(vd, rs1, 1, WIDTH_32) -/* Load 2 Vector register with 32-bit components. */ -#define VL2RE32_V(vd, rs1) VLRE_V(vd, rs1, 2, WIDTH_32) -/* Load 4 Vector register with 32-bit components. */ -#define VL4RE32_V(vd, rs1) VLRE_V(vd, rs1, 4, WIDTH_32) -/* Load 8 Vector register with 32-bit components. */ -#define VL8RE32_V(vd, rs1) VLRE_V(vd, rs1, 8, WIDTH_32) - -/* Store n Vector register. */ -#define VSR_V(vs3, rs1, cnt) \ - ASM_WORD(0b0100111 | (0b00101000 << 20) | (0 << 28) | \ - ((cnt-1) << 29) | (vs3 << 7) | (rs1 << 15)) -/* Store 1 Vector register. */ -#define VS1R_V(vs3, rs1) VSR_V(vs3, rs1, 1) -/* Store 2 Vector register. */ -#define VS2R_V(vs3, rs1) VSR_V(vs3, rs1, 2) -/* Store 4 Vector register. */ -#define VS4R_V(vs3, rs1) VSR_V(vs3, rs1, 4) -/* Store 8 Vector register. */ -#define VS8R_V(vs3, rs1) VSR_V(vs3, rs1, 8) - -/* Move from vector register to vector registor. */ -#define VMV_V_V(vd, vs1) \ - ASM_WORD((0b1010111 << 0) | (0b000 << 12) | (0b1 << 25) | \ - (0b010111 << 26) | (vd << 7) | (vs1 << 15)) -/* Splat register to each component of the vector registor. */ -#define VMV_V_X(vd, rs1) \ - ASM_WORD((0b1010111 << 0) | (0b100 << 12) | (0b1 << 25) | \ - (0b010111 << 26) | (vd << 7) | (rs1 << 15)) -/* Move n vector registers to vector registers. */ -#define VMVR_V(vd, vs2, n) \ - ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \ - (0b100111 << 26) | (vd << 7) | ((n-1) << 15) | \ - (vs2 << 20)) - -/* Set the options of vector instructions. */ -#define VSETIVLI(rd, n, vma, vta, vsew, vlmul) \ - ASM_WORD((0b11 << 30) | (0b111 << 12) | (0b1010111 << 0) | \ - (rd << 7) | (n << 15) | (vma << 27) | \ - (vta << 26) | (vsew << 23) | (vlmul << 20)) - - #if defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM) /* diff --git a/wolfcrypt/src/port/riscv/riscv-64-sha256.c b/wolfcrypt/src/port/riscv/riscv-64-sha256.c new file mode 100644 index 0000000000..595d854eeb --- /dev/null +++ b/wolfcrypt/src/port/riscv/riscv-64-sha256.c @@ -0,0 +1,1420 @@ +/* riscv-sha256.c + * + * Copyright (C) 2006-2024 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_RISCV_ASM +#if !defined(NO_SHA256) || defined(WOLFSSL_SHA224) + +#if FIPS_VERSION3_LT(6,0,0) && defined(HAVE_FIPS) + #undef HAVE_FIPS +#else + #if defined(HAVE_FIPS) && FIPS_VERSION3_GE(6,0,0) + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS + #endif +#endif + +#include +#if FIPS_VERSION3_GE(6,0,0) + const unsigned int wolfCrypt_FIPS_sha256_ro_sanity[2] = + { 0x1a2b3c4d, 0x00000014 }; + int wolfCrypt_FIPS_SHA256_sanity(void) + { + return 0; + } +#endif +#include +#include + +#include + +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + +/* Constants to add in each round. */ +static const FLASH_QUALIFIER ALIGN32 word32 K[64] = { + 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL, + 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L, + 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, + 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL, + 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L, + 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L, + 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, + 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L, + 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L, + 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L, + 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, + 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L, + 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L +}; + +/* Initialze SHA-256 object for hashing. + * + * @param [in, out] sha256 SHA-256 object. + */ +static void InitSha256(wc_Sha256* sha256) +{ + /* Set initial hash values. */ +#ifndef WOLFSSL_RISCV_VECTOR_CRYPTO_ASM + sha256->digest[0] = 0x6A09E667L; + sha256->digest[1] = 0xBB67AE85L; + sha256->digest[2] = 0x3C6EF372L; + sha256->digest[3] = 0xA54FF53AL; + sha256->digest[4] = 0x510E527FL; + sha256->digest[5] = 0x9B05688CL; + sha256->digest[6] = 0x1F83D9ABL; + sha256->digest[7] = 0x5BE0CD19L; +#else + /* f, e, b, a, h, g, d, c */ + sha256->digest[0] = 0x9B05688CL; + sha256->digest[1] = 0x510E527FL; + sha256->digest[2] = 0xBB67AE85L; + sha256->digest[3] = 0x6A09E667L; + sha256->digest[4] = 0x5BE0CD19L; + sha256->digest[5] = 0x1F83D9ABL; + sha256->digest[6] = 0xA54FF53AL; + sha256->digest[7] = 0x3C6EF372L; +#endif + + /* No hashed data. */ + sha256->buffLen = 0; + /* No data hashed. */ + sha256->loLen = 0; + sha256->hiLen = 0; + +#ifdef WOLFSSL_HASH_FLAGS + sha256->flags = 0; +#endif +} + +/* More data hashed, add length to 64-bit cumulative total. + * + * @param [in, out] sha256 SHA-256 object. Assumed not NULL. + * @param [in] len Length to add. + */ +static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len) +{ + word32 tmp = sha256->loLen; + if ((sha256->loLen += len) < tmp) + sha256->hiLen++; /* carry low to high */ +} + +#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION + +/* Load a word with bytes reversed. */ +#define LOAD_WORD_REV(r, o, p, t0, t1, t2) \ + "lbu " #t0 ", " #o "(" #p ")\n\t" \ + "lbu " #t1 ", " #o "+1(" #p ")\n\t" \ + "lbu " #t2 ", " #o "+2(" #p ")\n\t" \ + "lbu " #r ", " #o "+3(" #p ")\n\t" \ + "slli " #t0 ", " #t0 ", 24\n\t" \ + "slli " #t1 ", " #t1 ", 16\n\t" \ + "slli " #t2 ", " #t2 ", 8\n\t" \ + "or " #r ", " #r ", " #t0 "\n\t" \ + "or " #r ", " #r ", " #t1 "\n\t" \ + "or " #r ", " #r ", " #t2 "\n\t" + +/* Load a word with bytes reversed. */ +#define LOAD_DWORD_REV(r, o, p, t0, t1, t2, t3) \ + "lbu " #t0 ", " #o "(" #p ")\n\t" \ + "lbu " #t1 ", " #o "+1(" #p ")\n\t" \ + "lbu " #t2 ", " #o "+2(" #p ")\n\t" \ + "lbu " #r ", " #o "+3(" #p ")\n\t" \ + "slli " #t0 ", " #t0 ", 24\n\t" \ + "slli " #t1 ", " #t1 ", 16\n\t" \ + "slli " #t2 ", " #t2 ", 8\n\t" \ + "or " #r ", " #r ", " #t0 "\n\t" \ + "or " #r ", " #r ", " #t1 "\n\t" \ + "or " #r ", " #r ", " #t2 "\n\t" \ + "lbu " #t0 ", " #o "+4(" #p ")\n\t" \ + "lbu " #t1 ", " #o "+5(" #p ")\n\t" \ + "lbu " #t2 ", " #o "+6(" #p ")\n\t" \ + "lbu " #t3 ", " #o "+7(" #p ")\n\t" \ + "slli " #t0 ", " #t0 ", 56\n\t" \ + "slli " #t1 ", " #t1 ", 48\n\t" \ + "slli " #t2 ", " #t2 ", 40\n\t" \ + "slli " #t3 ", " #t3 ", 32\n\t" \ + "or " #r ", " #r ", " #t0 "\n\t" \ + "or " #r ", " #r ", " #t1 "\n\t" \ + "or " #r ", " #r ", " #t2 "\n\t" \ + "or " #r ", " #r ", " #t3 "\n\t" + +#define PACK_BB(rd, rs1, rs2, rrd, rrs1, rrs2) \ + "slli " #rd ", " #rs1 ", 32\n\t" \ + "slli " #rs2 ", " #rs2 ", 32\n\t" \ + "srli " #rd ", " #rs1 ", 32\n\t" \ + "or " #rd ", " #rd ", " #rs2 "\n\t" + +#else + +#define PACK_BB(rd, rs1, rs2, rrd, rrs1, rrs2) \ + PACK(rrd, rrs1, rrs2) + +#endif + +#ifndef WOLFSSL_RISCV_VECTOR_CRYPTO_ASM + +#ifdef WOLFSSL_RISCV_SCALAR_CRYPTO_ASM + +/* SHA-256 SUM0 operation. */ +#define SHA256SUM0(rd, rs1) \ + ASM_WORD((0b000100000000 << 20) | (0b001 << 12) | 0b0010011 | \ + (rs1 << 15) | (rd << 7)) +/* SHA-256 SUM1 operation. */ +#define SHA256SUM1(rd, rs1) \ + ASM_WORD((0b000100000001 << 20) | (0b001 << 12) | 0b0010011 | \ + (rs1 << 15) | (rd << 7)) +/* SHA-256 SIGMA0 operation. */ +#define SHA256SIG0(rd, rs1) \ + ASM_WORD((0b000100000010 << 20) | (0b001 << 12) | 0b0010011 | \ + (rs1 << 15) | (rd << 7)) +/* SHA-256 SIGMA1 operation. */ +#define SHA256SIG1(rd, rs1) \ + ASM_WORD((0b000100000011 << 20) | (0b001 << 12) | 0b0010011 | \ + (rs1 << 15) | (rd << 7)) + +/* One round of compression. */ +#define RND(a, b, c, d, e, f, g, h, w, k) \ + /* Get e and a */ \ + "mv a4, " #e "\n\t" \ + "mv a5, " #a "\n\t" \ + /* Sigma1(e) */ \ + SHA256SUM1(REG_A4, REG_A4) \ + /* Sigma0(a) */ \ + SHA256SUM0(REG_A5, REG_A5) \ + /* Maj(a, b, c) = t5 */ \ + /* Ch(e, f, g) = t6 */ \ + /* a ^ b */ \ + "xor t4, " #a ", " #b "\n\t" \ + /* f ^ g */ \ + "xor t6, " #f ", " #g "\n\t" \ + /* b ^ c */ \ + "xor t5, " #b ", " #c "\n\t" \ + /* (f ^ g) & e */ \ + "and t6, t6, " #e "\n\t" \ + /* (a^b) & (b^c) */ \ + "and t5, t5, t4\n\t" \ + /* ((f ^ g) & e) ^ g */ \ + "xor t6, t6, " #g "\n\t" \ + /* ((a^b) & (b^c)) ^ b */ \ + "xor t5, t5, " #b "\n\t" \ + /* sigma1 + Ch */ \ + "addw t4, a4, t6\n\t" \ + /* K + W */ \ + "addw t6, " #k ", " #w "\n\t" \ + /* sigma1 + Ch + K + W = 't0'-h */ \ + "addw t4, t4, t6\n\t" \ + /* h + sigma1 + Ch + K + W = 't0' */ \ + "addw t4, " #h ", t4\n\t" \ + /* d += 't0' */ \ + "addw " #d ", " #d ", t4\n\t" \ + /* Sigma0(a) + Maj = 't1' */ \ + "addw t5, a5, t5\n\t" \ + /* 't0' += 't1' */ \ + "addw " #h ", t4, t5\n\t" + +/* Two message schedule updates. */ +#define W_UPDATE_2(w0, w1, w4, w5, w7, reg_w0, reg_w1, reg_w7) \ + /* W[i-15] = W[1] */ \ + "srli t4, " #w0 ", 32\n\t" \ + /* W[i-7] = W[9] */ \ + "srli t6, " #w4 ", 32\n\t" \ + /* Gamma0(W[1]) */ \ + SHA256SIG0(REG_A4, REG_T4) \ + /* Gamma1(W[i-2]) = Gamma1(W[14]) */ \ + SHA256SIG1(REG_A5, reg_w7) \ + /* Gamma1(W[14]) + W[9] */ \ + "addw a5, a5, t6\n\t" \ + /* Gamma0(W[1]) + W[i-16] = Gamma0(W[1]) + W[0] */ \ + "addw t5, " #w0 ", a4\n\t" \ + /* W[0] = Gamma1(W[14]) + W[9] + Gamma0(W[1]) + W[0] */ \ + "addw " #w0 ", a5, t5\n\t" \ + \ + /* W[i+1-16] = W[1] = t4 */ \ + /* W[i+1-2] = W[15] */ \ + "srli t5, " #w7 ", 32\n\t" \ + "mv a6, " #w1 "\n\t" \ + /* Gamma0(W[i+1-15]) = Gamma0(W[2]) */ \ + SHA256SIG0(REG_A6, REG_A6) \ + /* Gamma1(W[i+1-2]) = Gamma1(W[15]) */ \ + SHA256SIG1(REG_A7, REG_T5) \ + /* Gamma1(W[15]) + W[i+1-7] = Gamma1(W[15]) + W[10] */ \ + "addw a7, a7, " #w5 "\n\t" \ + /* Gamma0(W[2]) + W[i+1-16] = Gamma0(W[2]) + W[1] */ \ + "addw t5, a6, t4\n\t" \ + /* Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16] */ \ + "addw a7, a7, t5\n\t" \ + /* Place in W[i+1-16] = W[1] */ \ + PACK_BB(w0, w0, a7, reg_w0, reg_w0, REG_A7) + +#else + +/* SHA-256 SUM0 operation. */ +#define SHA256SUM0(rd, rs1) \ + "slliw t5, " #rs1 ", 30\n\t" \ + "srliw t4, " #rs1 ", 2\n\t" \ + "slliw t6, " #rs1 ", 19\n\t" \ + "or t4, t4, t5\n\t" \ + "srliw t5, " #rs1 ", 13\n\t" \ + "xor t4, t4, t6\n\t" \ + "slliw t6, " #rs1 ", 10\n\t" \ + "xor t4, t4, t5\n\t" \ + "srliw " #rd ", " #rs1 ", 22\n\t" \ + "xor t4, t4, t6\n\t" \ + "xor " #rd ", " #rd ", t4\n\t" + +/* SHA-256 SUM1 operation. */ +#define SHA256SUM1(rd, rs1) \ + "slliw t5, " #rs1 ", 26\n\t" \ + "srliw t4, " #rs1 ", 6\n\t" \ + "slliw t6, " #rs1 ", 21\n\t" \ + "or t4, t4, t5\n\t" \ + "srliw t5, " #rs1 ", 11\n\t" \ + "xor t4, t4, t6\n\t" \ + "slliw t6, " #rs1 ", 7\n\t" \ + "xor t4, t4, t5\n\t" \ + "srliw " #rd ", " #rs1 ", 25\n\t" \ + "xor t4, t4, t6\n\t" \ + "xor " #rd ", " #rd ", t4\n\t" + +/* SHA-256 SIGMA0 operation. */ +#define SHA256SIG0(rd, rs1) \ + "slliw t5, " #rs1 ", 25\n\t" \ + "srliw t6, " #rs1 ", 7\n\t" \ + "slliw t4, " #rs1 ", 14\n\t" \ + "or t6, t6, t5\n\t" \ + "srliw t5, " #rs1 ", 18\n\t" \ + "xor t6, t6, t4\n\t" \ + "srliw " #rd ", " #rs1 ", 3\n\t" \ + "xor t6, t6, t5\n\t" \ + "xor " #rd ", " #rd ", t6\n\t" + +/* SHA-256 SIGMA1 operation. */ +#define SHA256SIG1(rd, rs1) \ + "slliw t5, " #rs1 ", 15\n\t" \ + "srliw t6, " #rs1 ", 17\n\t" \ + "slliw t4, " #rs1 ", 13\n\t" \ + "or t6, t6, t5\n\t" \ + "srliw t5, " #rs1 ", 19\n\t" \ + "xor t6, t6, t4\n\t" \ + "srliw " #rd ", " #rs1 ", 10\n\t" \ + "xor t6, t6, t5\n\t" \ + "xor " #rd ", " #rd ", t6\n\t" + +/* One round of compression. */ +#define RND(a, b, c, d, e, f, g, h, w, k) \ + /* Sigma1(e) */ \ + SHA256SUM1(a4, e) \ + /* Sigma0(a) */ \ + SHA256SUM0(a5, a) \ + /* Maj(a, b, c) = t5 */ \ + /* Ch(e, f, g) = t6 */ \ + /* a ^ b */ \ + "xor t4, " #a ", " #b "\n\t" \ + /* f ^ g */ \ + "xor t6, " #f ", " #g "\n\t" \ + /* b ^ c */ \ + "xor t5, " #b ", " #c "\n\t" \ + /* (f ^ g) & e */ \ + "and t6, t6, " #e "\n\t" \ + /* (a^b) & (b^c) */ \ + "and t5, t5, t4\n\t" \ + /* ((f ^ g) & e) ^ g */ \ + "xor t6, t6, " #g "\n\t" \ + /* ((a^b) & (b^c)) ^ b */ \ + "xor t5, t5, " #b "\n\t" \ + /* sigma1 + Ch */ \ + "addw t4, a4, t6\n\t" \ + /* K + W */ \ + "addw t6, " #k ", " #w "\n\t" \ + /* sigma1 + Ch + K + W = 't0'-h */ \ + "addw t4, t4, t6\n\t" \ + /* h + sigma1 + Ch + K + W = 't0' */ \ + "addw t4, " #h ", t4\n\t" \ + /* Sigma0(a) + Maj = 't1' */ \ + "addw t5, a5, t5\n\t" \ + /* d += 't0' */ \ + "addw " #d ", " #d ", t4\n\t" \ + /* 't0' += 't1' */ \ + "addw " #h ", t4, t5\n\t" + +/* Two message schedule updates. */ +#define W_UPDATE_2(w0, w1, w4, w5, w7, reg_w0, reg_w1, reg_w7) \ + /* W[i-15] = W[1] */ \ + "srli a7, " #w0 ", 32\n\t" \ + /* W[i-7] = W[9] */ \ + "srli a6, " #w4 ", 32\n\t" \ + /* Gamma0(W[1]) */ \ + SHA256SIG0(a4, a7) \ + /* Gamma1(W[i-2]) = Gamma1(W[14]) */ \ + SHA256SIG1(a5, w7) \ + /* Gamma1(W[14]) + W[9] */ \ + "addw a5, a5, a6\n\t" \ + /* Gamma0(W[1]) + W[i-16] = Gamma0(W[1]) + W[0] */ \ + "addw t5, " #w0 ", a4\n\t" \ + /* W[0] = Gamma1(W[14]) + W[9] + Gamma0(W[1]) + W[0] */ \ + "addw " #w0 ", a5, t5\n\t" \ + \ + /* W[i+1-16] = W[1] = a7 */ \ + /* W[i+1-2] = W[15] */ \ + "srli a5, " #w7 ", 32\n\t" \ + /* Gamma0(W[i+1-15]) = Gamma0(W[2]) */ \ + SHA256SIG0(a6, w1) \ + /* Gamma1(W[i+1-2]) = Gamma1(W[15]) */ \ + SHA256SIG1(a5, a5) \ + /* Gamma1(W[15]) + W[i+1-7] = Gamma1(W[15]) + W[10] */ \ + "addw a5, a5, " #w5 "\n\t" \ + /* Gamma0(W[2]) + W[i+1-16] = Gamma0(W[2]) + W[1] */ \ + "addw t5, a6, a7\n\t" \ + /* Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16] */ \ + "addw a5, a5, t5\n\t" \ + /* Place in W[i+1-16] = W[1] */ \ + PACK_BB(w0, w0, a5, reg_w0, reg_w0, REG_A5) + +#endif /* WOLFSSL_RISCV_SCALAR_CRYPTO_ASM */ + +/* Two rounds of compression. */ +#define RND2(a, b, c, d, e, f, g, h, w, o) \ + /* Get k[i], k[i+1] */ \ + "ld a6, " #o "(%[k])\n\t" \ + RND(a, b, c, d, e, f, g, h, w, a6) \ + /* Move k[i+1] down */ \ + "srli a6, a6, 32\n\t" \ + /* Move W[i] down */ \ + "srli a7, " #w ", 32\n\t" \ + RND(h, a, b, c, d, e, f, g, a7, a6) + +/* Sixteen rounds of compression with message scheduling. */ +#define RND16() \ + RND2(t0, t1, t2, t3, s8, s9, s10, s11, s0, 0) \ + W_UPDATE_2(s0, s1, s4, s5, s7, REG_S0, REG_S1, REG_S7) \ + RND2(s10, s11, t0, t1, t2, t3, s8, s9, s1, 8) \ + W_UPDATE_2(s1, s2, s5, s6, s0, REG_S1, REG_S2, REG_S0) \ + RND2(s8, s9, s10, s11, t0, t1, t2, t3, s2, 16) \ + W_UPDATE_2(s2, s3, s6, s7, s1, REG_S2, REG_S3, REG_S1) \ + RND2(t2, t3, s8, s9, s10, s11, t0, t1, s3, 24) \ + W_UPDATE_2(s3, s4, s7, s0, s2, REG_S3, REG_S4, REG_S2) \ + RND2(t0, t1, t2, t3, s8, s9, s10, s11, s4, 32) \ + W_UPDATE_2(s4, s5, s0, s1, s3, REG_S4, REG_S5, REG_S3) \ + RND2(s10, s11, t0, t1, t2, t3, s8, s9, s5, 40) \ + W_UPDATE_2(s5, s6, s1, s2, s4, REG_S5, REG_S6, REG_S4) \ + RND2(s8, s9, s10, s11, t0, t1, t2, t3, s6, 48) \ + W_UPDATE_2(s6, s7, s2, s3, s5, REG_S6, REG_S7, REG_S5) \ + RND2(t2, t3, s8, s9, s10, s11, t0, t1, s7, 56) \ + W_UPDATE_2(s7, s0, s3, s4, s6, REG_S7, REG_S0, REG_S6) + +/* Sixteen rounds of compression only. */ +#define RND16_LAST() \ + RND2(t0, t1, t2, t3, s8, s9, s10, s11, s0, 0) \ + RND2(s10, s11, t0, t1, t2, t3, s8, s9, s1, 8) \ + RND2(s8, s9, s10, s11, t0, t1, t2, t3, s2, 16) \ + RND2(t2, t3, s8, s9, s10, s11, t0, t1, s3, 24) \ + RND2(t0, t1, t2, t3, s8, s9, s10, s11, s4, 32) \ + RND2(s10, s11, t0, t1, t2, t3, s8, s9, s5, 40) \ + RND2(s8, s9, s10, s11, t0, t1, t2, t3, s6, 48) \ + RND2(t2, t3, s8, s9, s10, s11, t0, t1, s7, 56) + +/* Transform the message data. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] data Buffer of data to hash. + * @param [in] blocks Number of blocks of data to hash. + */ +static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, + word32 blocks) +{ + word32* k = (word32*)K; + + __asm__ __volatile__ ( + /* Load digest. */ + "ld t0, 0(%[digest])\n\t" + "ld t2, 8(%[digest])\n\t" + "ld s8, 16(%[digest])\n\t" + "ld s10, 24(%[digest])\n\t" + "srli t1, t0, 32\n\t" + "srli t3, t2, 32\n\t" + "srli s9, s8, 32\n\t" + "srli s11, s10, 32\n\t" + + /* 4 rounds of 16 per block. */ + "slli %[blocks], %[blocks], 2\n\t" + + "\n1:\n\t" + /* beginning of SHA256 block operation */ + /* Load W */ +#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION + LOAD_DWORD_REV(s0, 0, %[data], a4, a5, a6, a7) + LOAD_DWORD_REV(s1, 8, %[data], a4, a5, a6, a7) + LOAD_DWORD_REV(s2, 16, %[data], a4, a5, a6, a7) + LOAD_DWORD_REV(s3, 24, %[data], a4, a5, a6, a7) + LOAD_DWORD_REV(s4, 32, %[data], a4, a5, a6, a7) + LOAD_DWORD_REV(s5, 40, %[data], a4, a5, a6, a7) + LOAD_DWORD_REV(s6, 48, %[data], a4, a5, a6, a7) + LOAD_DWORD_REV(s7, 56, %[data], a4, a5, a6, a7) +#else + "lwu a4, 0(%[data])\n\t" + "lwu s0, 4(%[data])\n\t" + "lwu a5, 8(%[data])\n\t" + "lwu s1, 12(%[data])\n\t" + "lwu a6, 16(%[data])\n\t" + "lwu s2, 20(%[data])\n\t" + "lwu a7, 24(%[data])\n\t" + "lwu s3, 28(%[data])\n\t" + PACK_BB(s0, s0, a4, REG_S0, REG_S0, REG_A4) + PACK_BB(s1, s1, a5, REG_S1, REG_S1, REG_A5) + PACK_BB(s2, s2, a6, REG_S2, REG_S2, REG_A6) + PACK_BB(s3, s3, a7, REG_S3, REG_S3, REG_A7) + REV8(REG_S0, REG_S0) + REV8(REG_S1, REG_S1) + REV8(REG_S2, REG_S2) + REV8(REG_S3, REG_S3) + "lwu a4, 32(%[data])\n\t" + "lwu s4, 36(%[data])\n\t" + "lwu a5, 40(%[data])\n\t" + "lwu s5, 44(%[data])\n\t" + "lwu a6, 48(%[data])\n\t" + "lwu s6, 52(%[data])\n\t" + "lwu a7, 56(%[data])\n\t" + "lwu s7, 60(%[data])\n\t" + PACK_BB(s4, s4, a4, REG_S4, REG_S4, REG_A4) + PACK_BB(s5, s5, a5, REG_S5, REG_S5, REG_A5) + PACK_BB(s6, s6, a6, REG_S6, REG_S6, REG_A6) + PACK_BB(s7, s7, a7, REG_S7, REG_S7, REG_A7) + REV8(REG_S4, REG_S4) + REV8(REG_S5, REG_S5) + REV8(REG_S6, REG_S6) + REV8(REG_S7, REG_S7) +#endif + + /* Subtract one as there are only 3 loops. */ + "addi %[blocks], %[blocks], -1\n\t" + "\n2:\n\t" + RND16() + "addi %[blocks], %[blocks], -1\n\t" + "add %[k], %[k], 64\n\t" + "andi a4, %[blocks], 3\n\t" + "bnez a4, 2b \n\t" + RND16_LAST() + "addi %[k], %[k], -192\n\t" + + "# Add working vars back into digest state.\n\t" + "ld a4, 0(%[digest])\n\t" + "ld a5, 8(%[digest])\n\t" + "ld a6, 16(%[digest])\n\t" + "ld a7, 24(%[digest])\n\t" + "addw t0, t0, a4\n\t" + "addw t2, t2, a5\n\t" + "addw s8, s8, a6\n\t" + "addw s10, s10, a7\n\t" + "srli a4, a4, 32\n\t" + "srli a5, a5, 32\n\t" + "srli a6, a6, 32\n\t" + "srli a7, a7, 32\n\t" + "addw t1, t1, a4\n\t" + "addw t3, t3, a5\n\t" + "addw s9, s9, a6\n\t" + "addw s11, s11, a7\n\t" + + /* Store digest. */ + "sw t0, 0(%[digest])\n\t" + "sw t1, 4(%[digest])\n\t" + "sw t2, 8(%[digest])\n\t" + "sw t3, 12(%[digest])\n\t" + "sw s8, 16(%[digest])\n\t" + "sw s9, 20(%[digest])\n\t" + "sw s10, 24(%[digest])\n\t" + "sw s11, 28(%[digest])\n\t" + + "add %[data], %[data], 64\n\t" + "bnez %[blocks], 1b \n\t" + + : [blocks] "+r" (blocks), [data] "+r" (data), [k] "+r" (k) + : [digest] "r" (sha256->digest) + : "cc", "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "a4", "a5", "a6", "a7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", + "s11" + ); +} + +#else + +/* Two rounds of compression using low two 32-bit W values. + * Assumes K has been added into W values. + */ +#define VSHA2CL_VV(vd, vs1, vs2) \ + ASM_WORD((0b101111 << 26) | (0b1 << 25) | \ + (0b010 << 12) | (0b1110111 << 0) | \ + (vd << 7) | (vs1 << 15) | (vs2 << 20)) + +/* Two rounds of compression using upper two 32-bit W values. + * Assumes K has been added into W values. + */ +#define VSHA2CH_VV(vd, vs1, vs2) \ + ASM_WORD((0b101110 << 26) | (0b1 << 25) | \ + (0b010 << 12) | (0b1110111 << 0) | \ + (vd << 7) | (vs1 << 15) | (vs2 << 20)) + +/* Update 4 W values - message scheduling. */ +#define VSHA2MS_VV(vd, vs1, vs2) \ + ASM_WORD((0b101101 << 26) | (0b1 << 25) | \ + (0b010 << 12) | (0b1110111 << 0) | \ + (vd << 7) | (vs1 << 15) | (vs2 << 20)) + +#ifndef WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION +/* Indecies to use with gather vector instruction to reverse bytes. */ +static const word32 rev_idx[4] = { + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f +}; +#endif /* !WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION */ + +#define RND4(w0, w1, w2, w3, k) \ + /* Four rounds of compression. */ \ + VADD_VV(REG_V7, w0, k) \ + VSHA2CL_VV(REG_V5, REG_V7, REG_V4) \ + VSHA2CH_VV(REG_V4, REG_V7, REG_V5) \ + /* Update 4 W values - message schedule. */ \ + VMV_X_S(REG_T1, w1) \ + VMV_V_V(REG_V6, w2) \ + VMV_S_X(REG_V6, REG_T1) \ + VSHA2MS_VV(w0, w3, REG_V6) + +#define RND4_LAST(w, k) \ + /* Four rounds of compression. */ \ + VADD_VV(REG_V7, w, k) \ + VSHA2CL_VV(REG_V5, REG_V7, REG_V4) \ + VSHA2CH_VV(REG_V4, REG_V7, REG_V5) + +#define RND16(k) \ + RND4(REG_V0, REG_V1, REG_V2, REG_V3, (k + 0)) \ + RND4(REG_V1, REG_V2, REG_V3, REG_V0, (k + 1)) \ + RND4(REG_V2, REG_V3, REG_V0, REG_V1, (k + 2)) \ + RND4(REG_V3, REG_V0, REG_V1, REG_V2, (k + 3)) + +#define RND16_LAST(k) \ + RND4_LAST(REG_V0, (k + 0)) \ + RND4_LAST(REG_V1, (k + 1)) \ + RND4_LAST(REG_V2, (k + 2)) \ + RND4_LAST(REG_V3, (k + 3)) + +/* Transform the message data. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] data Buffer of data to hash. + * @param [in] blocks Number of blocks of data to hash. + */ +static void Sha256Transform(wc_Sha256* sha256, const byte* data, + word32 blocks) +{ + word32* k = (word32*)K; + + __asm__ __volatile__ ( + VSETIVLI(REG_ZERO, 4, 1, 1, 0b010, 0b000) + + /* Load: a|b|e|f, c|d|g|h + * 3 2 1 0 3 2 1 0 + */ + "mv t0, %[digest]\n\t" + VL2RE32_V(REG_V4, REG_T0) + + "mv t0, %[k]\n\t" + VL8RE32_V(REG_V8, REG_T0) + "addi t0, %[k], 128\n\t" + VL8RE32_V(REG_V16, REG_T0) + + "\n1:\n\t" + VMV_V_V(REG_V30, REG_V4) + VMV_V_V(REG_V31, REG_V5) + + /* Load 16 W into 4 vectors of 4 32-bit words. */ + "mv t0, %[data]\n\t" + VL4RE32_V(REG_V0, REG_T0) + VREV8(REG_V0, REG_V0) + VREV8(REG_V1, REG_V1) + VREV8(REG_V2, REG_V2) + VREV8(REG_V3, REG_V3) + + RND16(REG_V8) + RND16(REG_V12) + RND16(REG_V16) + RND16_LAST(REG_V20) + + VADD_VV(REG_V4, REG_V4, REG_V30) + VADD_VV(REG_V5, REG_V5, REG_V31) + + "addi %[blocks], %[blocks], -1\n\t" + "add %[data], %[data], 64\n\t" + "bnez %[blocks], 1b \n\t" + + "mv t0, %[digest]\n\t" + VS2R_V(REG_V4, REG_T0) + + : [blocks] "+r" (blocks), [data] "+r" (data), [k] "+r" (k) + : [digest] "r" (sha256->digest) +#ifndef WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION + , [rev_idx] "r" (rev_idx) +#endif + : "cc", "memory", "t0", "t1" + ); +} + +#endif /* WOLFSSL_RISCV_VECTOR_CRYPTO_ASM */ + +/* Update the hash with data. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] data Buffer of data to hash. + * @param [in] len Number of bytes in buffer to hash. + * @return 0 on success. + */ +static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, + word32 len) +{ + word32 add; + word32 blocks; + + /* only perform actions if a buffer is passed in */ + if (len > 0) { + AddLength(sha256, len); + + if (sha256->buffLen > 0) { + /* fill leftover buffer with data */ + add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); + sha256->buffLen += add; + data += add; + len -= add; + if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { + Sha256Transform(sha256, (byte*)sha256->buffer, 1); + sha256->buffLen = 0; + } + } + + /* number of blocks in a row to complete */ + blocks = len / WC_SHA256_BLOCK_SIZE; + + if (blocks > 0) { + Sha256Transform(sha256, data, blocks); + data += blocks * WC_SHA256_BLOCK_SIZE; + len -= blocks * WC_SHA256_BLOCK_SIZE; + } + + if (len > 0) { + /* copy over any remaining data leftover */ + XMEMCPY(sha256->buffer, data, len); + sha256->buffLen = len; + } + } + + /* account for possibility of not used if len = 0 */ + (void)add; + (void)blocks; + + return 0; +} + +/* Finalize the hash and put into buffer. + * + * @param [in, out] sha256 SHA-256 object. + * @param [out] hash Buffer to hold hash result. + */ +static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash) +{ + byte* local; + + local = (byte*)sha256->buffer; + local[sha256->buffLen++] = 0x80; /* add 1 */ + + /* pad with zeros */ + if (sha256->buffLen > WC_SHA256_PAD_SIZE) { + XMEMSET(&local[sha256->buffLen], 0, + WC_SHA256_BLOCK_SIZE - sha256->buffLen); + Sha256Transform(sha256, (byte*)sha256->buffer, 1); + sha256->buffLen = 0; + } + XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen); + + /* put lengths in bits */ + sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) + + (sha256->hiLen << 3); + sha256->loLen = sha256->loLen << 3; + + XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32)); + XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen, + sizeof(word32)); + + /* store lengths */ + __asm__ __volatile__ ( + /* Reverse byte order of 32-bit words. */ +#if defined(WOLFSSL_RISCV_BASE_BIT_MANIPULATION) + "ld t1, 56(%[buff])\n\t" + REV8(REG_T1, REG_T1) + "srli t0, t1, 32\n\t" + "sw t0, 56(%[buff])\n\t" + "sw t1, 60(%[buff])\n\t" +#else + LOAD_WORD_REV(t0, 56, %[buff], t2, t3, t4) + LOAD_WORD_REV(t1, 60, %[buff], t2, t3, t4) + "sw t0, 56(%[buff])\n\t" + "sw t1, 60(%[buff])\n\t" +#endif + : + : [buff] "r" (sha256->buffer) + : "cc", "memory", "t0", "t1", "t2", "t3", "t4" + ); + + Sha256Transform(sha256, (byte*)sha256->buffer, 1); + + __asm__ __volatile__ ( + /* Reverse byte order of 32-bit words. */ +#if defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM) + VSETIVLI(REG_ZERO, 4, 1, 1, 0b010, 0b000) + "mv t0, %[digest]\n\t" + VL2RE32_V(REG_V8, REG_T0) + VREV8(REG_V8, REG_V8) + VREV8(REG_V9, REG_V9) + /* a|b|e|f, c|d|g|h + * 3 2 1 0 3 2 1 0 */ + VSLIDEDOWN_VI(REG_V0, REG_V8, 3) /* a */ + VSLIDEDOWN_VI(REG_V2, REG_V8, 2) /* b */ + VSLIDEDOWN_VI(REG_V1, REG_V8, 1) /* e */ + VSLIDEDOWN_VI(REG_V3, REG_V9, 3) /* c */ + VSLIDEDOWN_VI(REG_V4, REG_V9, 2) /* d */ + VSLIDEDOWN_VI(REG_V5, REG_V9, 1) /* g */ + /* -|-|-|a, -|-|-|e */ + VSLIDEUP_VI(REG_V0, REG_V2, 1) + /* -|-|b|a, -|-|-|e */ + VSLIDEUP_VI(REG_V0, REG_V3, 2) + /* -|c|b|a, -|-|-|e */ + VSLIDEUP_VI(REG_V0, REG_V4, 3) + /* d|c|b|a, -|-|-|e */ + VSLIDEUP_VI(REG_V1, REG_V8, 1) + /* d|c|b|a, -|-|f|e */ + VSLIDEUP_VI(REG_V1, REG_V5, 2) + /* d|c|b|a, -|g|f|e */ + VSLIDEUP_VI(REG_V1, REG_V9, 3) + /* d|c|b|a, h|g|f|e */ + "mv t0, %[hash]\n\t" + VS2R_V(REG_V0, REG_T0) +#elif defined(WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION) + VSETIVLI(REG_ZERO, 4, 1, 1, 0b010, 0b000) + "mv t0, %[digest]\n\t" + VL2RE32_V(REG_V0, REG_T0) + VREV8(REG_V0, REG_V0) + VREV8(REG_V1, REG_V1) + "mv t0, %[hash]\n\t" + VS2R_V(REG_V0, REG_T0) +#elif defined(WOLFSSL_RISCV_BASE_BIT_MANIPULATION) + "ld t1, 0(%[digest])\n\t" + "ld t3, 8(%[digest])\n\t" + "ld s1, 16(%[digest])\n\t" + "ld s3, 24(%[digest])\n\t" + REV8(REG_T1, REG_T1) + REV8(REG_T3, REG_T3) + REV8(REG_S1, REG_S1) + REV8(REG_S3, REG_S3) + "srli t0, t1, 32\n\t" + "srli t2, t3, 32\n\t" + "srli s0, s1, 32\n\t" + "srli s2, s3, 32\n\t" + "sw t0, 0(%[hash])\n\t" + "sw t1, 4(%[hash])\n\t" + "sw t2, 8(%[hash])\n\t" + "sw t3, 12(%[hash])\n\t" + "sw s0, 16(%[hash])\n\t" + "sw s1, 20(%[hash])\n\t" + "sw s2, 24(%[hash])\n\t" + "sw s3, 28(%[hash])\n\t" +#else + LOAD_WORD_REV(t0, 0, %[digest], t2, t3, t4) + LOAD_WORD_REV(t1, 4, %[digest], t2, t3, t4) + LOAD_WORD_REV(s0, 8, %[digest], t2, t3, t4) + LOAD_WORD_REV(s1, 12, %[digest], t2, t3, t4) + "sw t0, 0(%[hash])\n\t" + "sw t1, 4(%[hash])\n\t" + "sw s0, 8(%[hash])\n\t" + "sw s1, 12(%[hash])\n\t" + LOAD_WORD_REV(t0, 16, %[digest], t2, t3, t4) + LOAD_WORD_REV(t1, 20, %[digest], t2, t3, t4) + LOAD_WORD_REV(s0, 24, %[digest], t2, t3, t4) + LOAD_WORD_REV(s1, 28, %[digest], t2, t3, t4) + "sw t0, 16(%[hash])\n\t" + "sw t1, 20(%[hash])\n\t" + "sw s0, 24(%[hash])\n\t" + "sw s1, 28(%[hash])\n\t" +#endif + : + : [digest] "r" (sha256->digest), [hash] "r" (hash) +#if defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM) && \ + !defined(WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION) + , [rev_idx] "r" (rev_idx) +#endif + : "cc", "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "s0", "s1", "s2", "s3" + ); +} + + +#ifndef NO_SHA256 + +/* Initialize SHA-256 object for hashing. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] heap Dynamic memory hint. + * @param [in] devId Device Id. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 is NULL. + */ +int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) +{ + int ret = 0; + + /* Validate parameters. */ + if (sha256 == NULL) { + ret = BAD_FUNC_ARG; + } + else { + sha256->heap = heap; + #ifdef WOLF_CRYPTO_CB + sha256->devId = devId; + #endif + (void)devId; + + InitSha256(sha256); + } + + return ret; +} + +/* Initialize SHA-256 object for hashing. + * + * @param [in, out] sha256 SHA-256 object. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 is NULL. + */ +int wc_InitSha256(wc_Sha256* sha256) +{ + return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID); +} + +/* Free the SHA-256 hash. + * + * @param [in] sha256 SHA-256 object. + */ +void wc_Sha256Free(wc_Sha256* sha256) +{ + /* No dynamic memory allocated. */ + (void)sha256; +} + +/* Update the hash with data. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] data Buffer of data to hash. + * @param [in] len Number of bytes in buffer to hash. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 is NULL. + * @return BAD_FUNC_ARG when data is NULL but len is not 0. + */ +int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) +{ + int ret; + + /* Validate parameters. */ + if ((sha256 == NULL) || ((data == NULL) && (len != 0))) { + ret = BAD_FUNC_ARG; + } + else { + ret = Sha256Update(sha256, data, len); + } + + return ret; +} + +/* Put the current hash into buffer. + * + * @param [in, out] sha256 SHA-256 object. + * @param [out] hash Buffer to hold hash result. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 or hash is NULL. + */ +int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash) +{ + int ret = 0; + + /* Validate parameters. */ + if ((sha256 == NULL) || (hash == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + #ifdef LITTLE_ENDIAN_ORDER + word32 digest[WC_SHA256_DIGEST_SIZE / sizeof(word32)]; + + ByteReverseWords((word32*)digest, (word32*)sha256->digest, + WC_SHA256_DIGEST_SIZE); + XMEMCPY(hash, digest, WC_SHA256_DIGEST_SIZE); + #else + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); + #endif + } + + return ret; +} + +/* Finalize the hash and put into buffer. + * + * @param [in, out] sha256 SHA-256 object. + * @param [out] hash Buffer to hold hash result. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 or hash is NULL. + */ +int wc_Sha256Final(wc_Sha256* sha256, byte* hash) +{ + int ret = 0; + + /* Validate parameters. */ + if ((sha256 == NULL) || (hash == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + /* Finalize hash. */ + Sha256Final(sha256, hash); + /* Restart SHA-256 object for next hash. */ + InitSha256(sha256); + } + + return ret; +} + +/* Finalize the hash and put into buffer but don't modify state. + * + * @param [in, out] sha256 SHA-256 object. + * @param [out] hash Buffer to hold hash result. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 or hash is NULL. + */ +int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash) +{ + int ret; + + /* Validate parameters. */ + if ((sha256 == NULL) || (hash == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + wc_Sha256 tmpSha256; + /* Create a copy of the hash to finalize. */ + ret = wc_Sha256Copy(sha256, &tmpSha256); + if (ret == 0) { + /* Finalize copy. */ + Sha256Final(&tmpSha256, hash); + } + } + + return ret; +} + +#ifdef WOLFSSL_HASH_FLAGS +/* Set flags of SHA-256 object. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] flags Flags to set. + * @return 0 on success. + */ +int wc_Sha256SetFlags(wc_Sha256* sha256, word32 flags) +{ + /* Check we have an object to use. */ + if (sha256 != NULL) { + sha256->flags = flags; + } + return 0; +} +/* Get flags of SHA-256 object. + * + * @param [in] sha256 SHA-256 object. + * @param [out] flags Flags from SHA-256 object. + * @return 0 on success. + */ +int wc_Sha256GetFlags(wc_Sha256* sha256, word32* flags) +{ + /* Check we have an object and return parameter to use. */ + if ((sha256 != NULL) && (flags != NULL)) { + *flags = sha256->flags; + } + return 0; +} +#endif + +/* Deep copy the SHA-256 object. + * + * @param [in] src SHA-256 object to copy. + * @param [out] dst SHA-256 object to fill. + * @return 0 on success. + * @return BAD_FUNC_ARG when src or dst is NULL. + */ +int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst) +{ + int ret = 0; + + /* Validate parameters. */ + if ((src == NULL) || (dst == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + XMEMCPY(dst, src, sizeof(wc_Sha256)); + } + + return ret; +} + +#ifdef OPENSSL_EXTRA +/* Update the hash with one block of data. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] data Buffer of data to hash. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 or data is NULL. + */ +int wc_Sha256Transform(wc_Sha256* sha256, const unsigned char* data) +{ + int ret = 0; + + /* Validate parameters. */ + if ((sha256 == NULL) || (data == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + #ifdef LITTLE_ENDIAN_ORDER + ByteReverseWords(sha256->buffer, (word32*)data, WC_SHA256_BLOCK_SIZE); + #else + XMEMCPY(sha256->buffer, data, WC_SHA256_BLOCK_SIZE); + #endif + Sha256Transform(sha256, (byte*)sha256->buffer, 1); + } + + return ret; +} +#endif + +#if defined(WOLFSSL_HAVE_LMS) && !defined(WOLFSSL_LMS_FULL_HASH) +/* Update the hash with one block of data and optionally get hash. + * + * @param [in, out] sha256 SHA-256 object. + * @param [in] data Buffer of data to hash. + * @param [out] hash Buffer to hold hash. May be NULL. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha256 or data is NULL. + */ +int wc_Sha256HashBlock(wc_Sha256* sha256, const unsigned char* data, + unsigned char* hash) +{ + int ret = 0; + + /* Validate parameters. */ + if ((sha256 == NULL) || (data == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + /* Hash block. */ + Sha256Transform(sha256, data, 1); + + if (hash != NULL) { + /* Reverse bytes in digest. */ + #ifdef LITTLE_ENDIAN_ORDER + word32* hash32 = (word32*)hash; + word32* digest = (word32*)sha256->digest; + hash32[0] = ByteReverseWord32(digest[0]); + hash32[1] = ByteReverseWord32(digest[1]); + hash32[2] = ByteReverseWord32(digest[2]); + hash32[3] = ByteReverseWord32(digest[3]); + hash32[4] = ByteReverseWord32(digest[4]); + hash32[5] = ByteReverseWord32(digest[5]); + hash32[6] = ByteReverseWord32(digest[6]); + hash32[7] = ByteReverseWord32(digest[7]); + #else + XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE); + #endif + /* Reset state. */ + #ifndef WOLFSSL_RISCV_VECTOR_CRYPTO_ASM + sha256->digest[0] = 0x6A09E667L; + sha256->digest[1] = 0xBB67AE85L; + sha256->digest[2] = 0x3C6EF372L; + sha256->digest[3] = 0xA54FF53AL; + sha256->digest[4] = 0x510E527FL; + sha256->digest[5] = 0x9B05688CL; + sha256->digest[6] = 0x1F83D9ABL; + sha256->digest[7] = 0x5BE0CD19L; + #else + /* f, e, b, a, h, g, d, c */ + sha256->digest[0] = 0x9B05688CL; + sha256->digest[1] = 0x510E527FL; + sha256->digest[2] = 0xBB67AE85L; + sha256->digest[3] = 0x6A09E667L; + sha256->digest[4] = 0x5BE0CD19L; + sha256->digest[5] = 0x1F83D9ABL; + sha256->digest[6] = 0xA54FF53AL; + sha256->digest[7] = 0x3C6EF372L; + #endif + } + } + + return ret; +} +#endif /* WOLFSSL_HAVE_LMS && !WOLFSSL_LMS_FULL_HASH */ + +#endif /* !NO_SHA256 */ + + +#ifdef WOLFSSL_SHA224 + +/* Initialze SHA-224 object for hashing. + * + * @param [in, out] sha224 SHA-224 object. + */ +static void InitSha224(wc_Sha224* sha224) +{ + /* Set initial hash values. */ +#ifndef WOLFSSL_RISCV_VECTOR_CRYPTO_ASM + sha224->digest[0] = 0xc1059ed8; + sha224->digest[1] = 0x367cd507; + sha224->digest[2] = 0x3070dd17; + sha224->digest[3] = 0xf70e5939; + sha224->digest[4] = 0xffc00b31; + sha224->digest[5] = 0x68581511; + sha224->digest[6] = 0x64f98fa7; + sha224->digest[7] = 0xbefa4fa4; +#else + /* f, e, b, a, h, g, d, c */ + sha224->digest[0] = 0x68581511; + sha224->digest[1] = 0xffc00b31; + sha224->digest[2] = 0x367cd507; + sha224->digest[3] = 0xc1059ed8; + sha224->digest[4] = 0xbefa4fa4; + sha224->digest[5] = 0x64f98fa7; + sha224->digest[6] = 0xf70e5939; + sha224->digest[7] = 0x3070dd17; +#endif + + /* No hashed data. */ + sha224->buffLen = 0; + /* No data hashed. */ + sha224->loLen = 0; + sha224->hiLen = 0; + +#ifdef WOLFSSL_HASH_FLAGS + sha224->flags = 0; +#endif +} + +/* Initialize SHA-224 object for hashing. + * + * @param [in, out] sha224 SHA-224 object. + * @param [in] heap Dynamic memory hint. + * @param [in] devId Device Id. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha224 is NULL. + */ +int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId) +{ + int ret = 0; + + /* Validate parameters. */ + if (sha224 == NULL) { + ret = BAD_FUNC_ARG; + } + else { + sha224->heap = heap; + (void)devId; + + InitSha224(sha224); + } + + return ret; +} + +/* Initialize SHA-224 object for hashing. + * + * @param [in, out] sha224 SHA-224 object. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha224 is NULL. + */ +int wc_InitSha224(wc_Sha224* sha224) +{ + return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID); +} + +/* Update the hash with data. + * + * @param [in, out] sha224 SHA-224 object. + * @param [in] data Buffer of data to hash. + * @param [in] len Number of bytes in buffer to hash. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha224 is NULL. + * @return BAD_FUNC_ARG when data is NULL but len is not 0. + */ +int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len) +{ + int ret; + + /* Validate parameters. */ + if ((sha224 == NULL) || ((data == NULL) && (len > 0))) { + ret = BAD_FUNC_ARG; + } + else { + ret = Sha256Update((wc_Sha256 *)sha224, data, len); + } + + return ret; +} + +/* Finalize the hash and put into buffer. + * + * @param [in, out] sha224 SHA-224 object. + * @param [out] hash Buffer to hold hash result. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha224 or hash is NULL. + */ +int wc_Sha224Final(wc_Sha224* sha224, byte* hash) +{ + int ret = 0; + + /* Validate parameters. */ + if ((sha224 == NULL) || (hash == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + word32 hashTmp[WC_SHA256_DIGEST_SIZE/sizeof(word32)]; + /* Finalize hash. */ + Sha256Final((wc_Sha256*)sha224, (byte*)hashTmp); + /* Return only 224 bits. */ + XMEMCPY(hash, hashTmp, WC_SHA224_DIGEST_SIZE); + /* Restart SHA-256 object for next hash. */ + InitSha224(sha224); + } + + return ret; +} + +/* Free the SHA-224 hash. + * + * @param [in] sha224 SHA-224 object. + */ +void wc_Sha224Free(wc_Sha224* sha224) +{ + /* No dynamic memory allocated. */ + (void)sha224; +} + +/* Finalize the hash and put into buffer but don't modify state. + * + * @param [in, out] sha224 SHA-224 object. + * @param [out] hash Buffer to hold hash result. + * @return 0 on success. + * @return BAD_FUNC_ARG when sha224 or hash is NULL. + */ +int wc_Sha224GetHash(wc_Sha224* sha224, byte* hash) +{ + int ret; + + /* Validate parameters. */ + if ((sha224 == NULL) || (hash == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + wc_Sha224 tmpSha224; + /* Create a copy of the hash to finalize. */ + ret = wc_Sha224Copy(sha224, &tmpSha224); + if (ret == 0) { + /* Finalize copy. */ + ret = wc_Sha224Final(&tmpSha224, hash); + } + } + + return ret; +} + +#ifdef WOLFSSL_HASH_FLAGS +/* Set flags of SHA-224 object. + * + * @param [in, out] sha224 SHA-224 object. + * @param [in] flags Flags to set. + * @return 0 on success. + */ +int wc_Sha224SetFlags(wc_Sha224* sha224, word32 flags) +{ + /* Check we have an object to use. */ + if (sha224 != NULL) { + sha224->flags = flags; + } + return 0; +} +/* Get flags of SHA-224 object. + * + * @param [in] sha224 SHA-224 object. + * @param [out] flags Flags from SHA-224 object. + * @return 0 on success. + */ +int wc_Sha224GetFlags(wc_Sha224* sha224, word32* flags) +{ + /* Check we have an object and return parameter to use. */ + if ((sha224 != NULL) && (flags != NULL)) { + *flags = sha224->flags; + } + return 0; +} +#endif + +/* Deep copy the SHA-224 object. + * + * @param [in] src SHA-224 object to copy. + * @param [out] dst SHA-224 object to fill. + * @return 0 on success. + * @return BAD_FUNC_ARG when src or dst is NULL. + */ +int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst) +{ + int ret = 0; + + /* Validate parameters. */ + if ((src == NULL) || (dst == NULL)) { + ret = BAD_FUNC_ARG; + } + else { + XMEMCPY(dst, src, sizeof(wc_Sha224)); + } + + return ret; +} + +#endif /* WOLFSSL_SHA224 */ + +#endif /* !NO_SHA256 || WOLFSSL_SHA224 */ +#endif /* WOLFSSL_RISCV_ASM */ diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index f955dff4f7..232076b1f6 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -63,8 +63,8 @@ on the specific device platform. #endif -#if !defined(NO_SHA256) && (!defined(WOLFSSL_ARMASM) && \ - !defined(WOLFSSL_ARMASM_NO_NEON)) +#if !defined(NO_SHA256) && !(defined(WOLFSSL_ARMASM) || \ + defined(WOLFSSL_ARMASM_NO_NEON)) && !defined(WOLFSSL_RISCV_ASM) #if defined(HAVE_FIPS) && defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ diff --git a/wolfssl/wolfcrypt/port/riscv/riscv-64-asm.h b/wolfssl/wolfcrypt/port/riscv/riscv-64-asm.h index 25e69b3f08..5407654ee9 100644 --- a/wolfssl/wolfcrypt/port/riscv/riscv-64-asm.h +++ b/wolfssl/wolfcrypt/port/riscv/riscv-64-asm.h @@ -27,6 +27,7 @@ #define ASM_WORD(i) \ ".word " #i "\n\t" + #define REG_X0 0 #define REG_X1 1 #define REG_X2 2 @@ -127,6 +128,171 @@ #define REG_V30 30 #define REG_V31 31 + +#ifdef WOLFSSL_RISCV_BASE_BIT_MANIPULATION + +/* Reverse bytes in 64-bit register. */ +#define REV8(rd, rs) \ + ASM_WORD((0b011010111000 << 20) | (0b101 << 12) | \ + (0b0010011 << 0) | \ + (rs << 15) | (rd << 7)) + +/* rd = rs1[0..31] | rs2[0..31]. */ +#define PACK(rd, rs1, rs2) \ + ASM_WORD((0b0000100 << 25) | (0b100 << 12) | 0b0110011 | \ + (rs2 << 20) | (rs1 << 15) | (rd << 7)) + +#endif /* WOLFSSL_RISCV_BASE_BIT_MANIPULATION */ + +/* + * Load and store + */ + +/* 64-bit width when loading. */ +#define WIDTH_64 0b111 +/* 32-bit width when loading. */ +#define WIDTH_32 0b110 + +/* Load n Vector registers with width-bit components. */ +#define VLRE_V(vd, rs1, cnt, width) \ + ASM_WORD(0b0000111 | (width << 12) | (0b00101000 << 20) | \ + (0 << 28) | ((cnt - 1) << 29) | (vd << 7) | (rs1 << 15)) +/* Load 1 Vector register with 64-bit components. */ +#define VL1RE64_V(vd, rs1) VLRE_V(vd, rs1, 1, WIDTH_64) +/* Load 1 Vector register with 32-bit components. */ +#define VL1RE32_V(vd, rs1) VLRE_V(vd, rs1, 1, WIDTH_32) +/* Load 2 Vector register with 32-bit components. */ +#define VL2RE32_V(vd, rs1) VLRE_V(vd, rs1, 2, WIDTH_32) +/* Load 4 Vector register with 32-bit components. */ +#define VL4RE32_V(vd, rs1) VLRE_V(vd, rs1, 4, WIDTH_32) +/* Load 8 Vector register with 32-bit components. */ +#define VL8RE32_V(vd, rs1) VLRE_V(vd, rs1, 8, WIDTH_32) + +/* Store n Vector register. */ +#define VSR_V(vs3, rs1, cnt) \ + ASM_WORD(0b0100111 | (0b00101000 << 20) | (0 << 28) | \ + ((cnt-1) << 29) | (vs3 << 7) | (rs1 << 15)) +/* Store 1 Vector register. */ +#define VS1R_V(vs3, rs1) VSR_V(vs3, rs1, 1) +/* Store 2 Vector register. */ +#define VS2R_V(vs3, rs1) VSR_V(vs3, rs1, 2) +/* Store 4 Vector register. */ +#define VS4R_V(vs3, rs1) VSR_V(vs3, rs1, 4) +/* Store 8 Vector register. */ +#define VS8R_V(vs3, rs1) VSR_V(vs3, rs1, 8) + +/* Move from vector register to vector registor. */ +#define VMV_V_V(vd, vs1) \ + ASM_WORD((0b1010111 << 0) | (0b000 << 12) | (0b1 << 25) | \ + (0b010111 << 26) | (vd << 7) | (vs1 << 15)) +/* Splat register to each component of the vector registor. */ +#define VMV_V_X(vd, rs1) \ + ASM_WORD((0b1010111 << 0) | (0b100 << 12) | (0b1 << 25) | \ + (0b010111 << 26) | (vd << 7) | (rs1 << 15)) +/* Move n vector registers to vector registers. */ +#define VMVR_V(vd, vs2, n) \ + ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \ + (0b100111 << 26) | (vd << 7) | ((n-1) << 15) | \ + (vs2 << 20)) + + +/* + * Arithmetic + */ + +/* vd = vs2 + [i,] */ +#define VADD_VI(vd, vs2, i) \ + ASM_WORD((0b000000 << 26) | (0b1 << 25) | \ + (0b011 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (i << 15) | (vs2 << 20)) +/* vd = vs1 + vs2 */ +#define VADD_VV(vd, vs1, vs2) \ + ASM_WORD((0b000000 << 26) | (0b1 << 25) | \ + (0b000 << 12) | (0b1010111 << 0) | \ + (vs2 << 20) | (vs1 << 15) | (vd << 7)) +/* vd = vs1 ^ vs2 */ +#define VXOR_VV(vd, vs1, vs2) \ + ASM_WORD((0b001011 << 26) | (0b1 << 25) | \ + (0b000 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (vs1 << 15) | (vs2 << 20)) +/* vd = vs1 & vs2 */ +#define VAND_VV(vd, vs1, vs2) \ + ASM_WORD((0b001001 << 26) | (0b1 << 25) | \ + (0b000 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (vs1 << 15) | (vs2 << 20)) +/* vd = vs1 | vs2 */ +#define VOR_VV(vd, vs1, vs2) \ + ASM_WORD((0b001010 << 26) | (0b1 << 25) | \ + (0b000 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (vs1 << 15) | (vs2 << 20)) + + +/* + * Permute + */ + +/* x[rd] = vs2[0] */ +#define VMV_X_S(rd, vs2) \ + ASM_WORD((0b010000 << 26) | (0b1 << 25) | \ + (0b010 << 12) | (0b1010111 << 0) | \ + (rd << 7) | (vs2 << 20)) + +/* vd[0] = x[rs1] */ +#define VMV_S_X(vd, rs1) \ + ASM_WORD((0b010000 << 26) | (0b1 << 25) | \ + (0b110 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (rs1 << 15)) + +/* vd[shift..max] = vs2[0..max-shift] + * Sliding up doesn't change bottom part of destination. + */ +#define VSLIDEUP_VI(vd, vs2, shift) \ + ASM_WORD((0b001110 << 26) | (0b1 << 25) | \ + (0b011 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (shift << 15) | (vs2 << 20)) + +/* vd[0..max-shift] = vs2[shift..max] + * Sliding down change top part of destination. + */ +#define VSLIDEDOWN_VI(vd, vs2, shift) \ + ASM_WORD((0b001111 << 26) | (0b1 << 25) | \ + (0b011 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (shift << 15) | (vs2 << 20)) + +/* vd[i] = vs1[vs2[i]] */ +#define VRGATHER_VV(vd, vs1, vs2) \ + ASM_WORD((0b001100 << 26) | (0b1 << 25) | \ + (0b000 << 12) | (0b1010111 << 0) | \ + (vd << 7) | (vs1 << 15) | (vs2 << 20)) + + +/* + * Setting options. + */ + +/* Set the options of vector instructions. */ +#define VSETIVLI(rd, n, vma, vta, vsew, vlmul) \ + ASM_WORD((0b11 << 30) | (0b111 << 12) | (0b1010111 << 0) | \ + (rd << 7) | (n << 15) | (vma << 27) | \ + (vta << 26) | (vsew << 23) | (vlmul << 20)) + + +#if defined(WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION) || \ + defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM) + +/* + * Bit Manipulation + */ + +/* Reverse order of bytes in words of vector regsiter. */ +#define VREV8(vd, vs2) \ + ASM_WORD((0b010010 << 26) | (0b1 << 25) | (0b01001<< 15) | \ + (0b010 << 12) | (0b1010111 << 0) | \ + (vs2 << 20) | (vd << 7)) + +#endif /* WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION || + * WOLFSSL_RISCV_VECTOR_CRYPTO_ASM */ + #endif /* WOLFSSL_RISCV_ASM */ #endif /* WOLF_CRYPT_RISCV_64_ASM_H */