Skip to content

Commit

Permalink
Add CUDA support (#7436)
Browse files Browse the repository at this point in the history
* Redirect the AesEncrypt_C call to device
* Fix function declarations
* Force CC=nvcc with CUDA
* Don't let C++ mangle function names
* Add larger parallelization
* Add in memory copy to device
* `nvcc` does not support '-Wall' nor '-Wno-unused'
* Add in README.md
* Clean up script to output color coded data
* Fix Asymmetric cipher comparisons
* Add in standard output parsing in addition to the CSV
* Add option to output results in a CSV

---------

Co-authored-by: Andras Fekete <[email protected]>
  • Loading branch information
bandi13 and Andras Fekete authored Apr 23, 2024
1 parent c3d9fb6 commit a75c2be
Show file tree
Hide file tree
Showing 8 changed files with 1,315 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -310,3 +310,8 @@ merge-clean:
@find ./ | $(GREP) \.BASE | xargs rm -f
@find ./ | $(GREP) \~$$ | xargs rm -f

%.o: %.cu
$(NVCC) -dc $(CUDAFLAGS) -o $@ $<

.cu.lo:
$(LIBTOOL) --tag=CC --mode=compile $(COMPILE) --compile -o $@ $< -static
20 changes: 19 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -4894,6 +4894,19 @@ then
AM_CFLAGS="$AM_CFLAGS -DWC_RC2"
fi

# CUDA
AC_ARG_ENABLE([cuda],
[AS_HELP_STRING([--enable-cuda],[Enable NVidia CUDA support (default: disabled)])],
[ ENABLED_CUDA=$enableval ],
[ ENABLED_CUDA=no ]
)

if test "$ENABLED_CUDA" = "yes"
then
CC=nvcc
AM_CFLAGS="$AM_CFLAGS -DWC_CUDA -DHAVE_CUDA"
fi

# Certificate Service Support (CFLAG sections later) keep above FIPS section
AC_ARG_ENABLE([certservice],
[AS_HELP_STRING([--enable-certservice],[Enable cert service (default: disabled)])],
Expand Down Expand Up @@ -9144,7 +9157,10 @@ fi
# For distro disable custom build options that interfere with symbol generation
if test "$GCC" = "yes" && test "$ENABLED_DISTRO" = "no"
then
AM_CFLAGS="$AM_CFLAGS -Wall -Wno-unused"
if test "$ENABLED_CUDA" != "yes"
then
AM_CFLAGS="$AM_CFLAGS -Wall -Wno-unused"
fi
if test "$ax_enable_debug" = "no"
then
AS_IF([test "x$ENABLED_OPTFLAGS" = "xyes"], [
Expand Down Expand Up @@ -9429,6 +9445,7 @@ AM_CONDITIONAL([BUILD_LINUXKM],[test "$ENABLED_LINUXKM" = "yes"])
AM_CONDITIONAL([BUILD_NO_LIBRARY],[test "$ENABLED_NO_LIBRARY" = "yes"])
AM_CONDITIONAL([BUILD_BENCHMARK],[test "$ENABLED_BENCHMARK" = "yes"])
AM_CONDITIONAL([BUILD_RC2],[test "x$ENABLED_RC2" = "xyes"])
AM_CONDITIONAL([BUILD_CUDA],[test "x$ENABLED_CUDA" = "xyes"])
AM_CONDITIONAL([BUILD_CAAM],[test "x$ENABLED_CAAM" != "xno"])
AM_CONDITIONAL([BUILD_QNXCAAM],[test "x$ENABLED_CAAM_QNX" = "xyes"])
AM_CONDITIONAL([BUILD_IOTSAFE],[test "x$ENABLED_IOTSAFE" = "xyes"])
Expand Down Expand Up @@ -9753,6 +9770,7 @@ echo " * ARIA: $ENABLED_ARIA"
echo " * DES3: $ENABLED_DES3"
echo " * DES3 TLS Suites: $ENABLED_DES3_TLS_SUITES"
echo " * Camellia: $ENABLED_CAMELLIA"
echo " * CUDA: $ENABLED_CUDA"
echo " * SM4-ECB: $ENABLED_SM4_ECB"
echo " * SM4-CBC: $ENABLED_SM4_CBC"
echo " * SM4-CTR: $ENABLED_SM4_CTR"
Expand Down
164 changes: 164 additions & 0 deletions scripts/benchmark_compare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#!/bin/bash
# This script is designed to compare the output of wolfcrypt/benchmark test
# application. If the file has an extension ".csv", then it will parse the
# comma separated format, otherwise it will use the standard output format. The
# green colored output field is the better result.
# Usage: benchmark_compare.sh <first file> <second file>
# You can define a few variables to set options:
# THRESHOLD - set the threshold for equality between two results
# OUTPUT_CSV - set to "1" to print CSV

FIRST_FILE=$1
SECOND_FILE=$2
THRESHOLD=${THRESHOLD:-"10"}
OUTPUT_CSV=${OUTPUT_CSV:-"0"}

declare -A symStats
declare -A asymStats

function getAlgo() { # getAlgo <asCSV> <mode> <line>
if [ "$asCSV" = 1 ]; then
declare -a fields
IFS=',' read -ra fields <<< "$line"
if [ "$mode" = 1 ]; then
echo "${fields[0]}"
else
if [ "${fields[2]}" = "" ]; then
echo "${fields[0]}"
else
echo "${fields[0]}-${fields[2]}"
fi
fi
else
if [ "$mode" = 1 ]; then
echo "$line" | sed 's/ *[0-9]* MiB.*//g'
else
if [[ $line == "scrypt"* ]]; then
echo "scrypt"
else
echo "$line" | sed 's/ *[0-9]* ops.*//g' | sed 's/ \+[0-9]\+ \+/-/g'
fi
fi
fi
}

function getValue() { # getValue <asCSV> <mode> <line>
if [ "$asCSV" = 1 ]; then
declare -a fields
IFS=',' read -ra fields <<< "$line"
if [ "$mode" = 1 ]; then
echo "${fields[1]}"
else
echo "${fields[4]}"
fi
else
if [ "$mode" = 1 ]; then
echo "$line" | sed 's/.*seconds, *//g' | sed 's/ *MiB\/s.*//g'
else
echo "$line" | sed 's/.* ms, *//g' | sed 's/ ops\/sec.*//g'
fi
fi
}

asCSV=0
mode=0
while IFS= read -r line; do
if [[ $FIRST_FILE == *".csv" ]]; then
asCSV=1
if [[ $line == *"Symmetric Ciphers"* ]]; then
mode=1
read
read
elif [[ $line == *"Asymmetric Ciphers"* ]]; then
mode=2
read
read
elif [[ $line == "" ]]; then
mode=0
fi
else
asCSV=0
if [[ $line == *"MiB/s"* ]]; then
mode=1
elif [[ $line == *"ops/sec"* ]]; then
mode=2
else
mode=0
fi
fi
if [ "$mode" -ne 0 ]; then
ALGO=`getAlgo "$asCSV" "$mode" "$line"`
VALUE=`getValue "$asCSV" "$mode" "$line"`

if [ "$mode" = "1" ]; then
symStats["${ALGO}"]=${VALUE}
elif [ "$mode" = "2" ]; then
asymStats["${ALGO}"]=${VALUE}
fi
fi
done < ${FIRST_FILE}

RED='\033[0;31m'
GRN='\033[0;32m'
NC='\033[0m' # No Color
function printData() { # printData <ALGO> <val1> <val2>
ALGO=$1
VAL1=$2
VAL2=$3
if (( $(echo "sqrt( (${VAL1} - ${VAL2})^2 ) < ${THRESHOLD}" | bc -l) )); then
# take absolute value and check if less than a threshold
echo "${ALGO},${GRN}${VAL1}${NC},=,${GRN}${VAL2}${NC}\n"
elif (( $(echo "${VAL1} > ${VAL2}" | bc -l) )); then
echo "${ALGO},${GRN}${VAL1}${NC},>,${VAL2}\n"
else
echo "${ALGO},${VAL1},<,${GRN}${VAL2}${NC}\n"
fi
}

asCSV=0
mode=0
while IFS= read -r line; do
if [[ $SECOND_FILE == *".csv" ]]; then
asCSV=1
if [[ $line == *"Symmetric Ciphers"* ]]; then
RES+="ALGO,${FIRST_FILE},diff(MB/s),${SECOND_FILE}\n"
mode=1
read
read
elif [[ $line == *"Asymmetric Ciphers"* ]]; then
RES+="\nALGO,${FIRST_FILE},diff(ops/sec),${SECOND_FILE}\n"
mode=2
read
read
elif [[ $line == "" ]]; then
mode=0
fi
else
asCSV=0
if [[ $line == *"MiB/s"* ]]; then
mode=1
elif [[ $line == *"ops/sec"* ]]; then
mode=2
else
mode=0
fi
fi
if [ "$mode" -ne 0 ]; then
if [[ $line == *","* ]]; then
ALGO=`getAlgo "$asCSV" "$mode" "$line"`
VALUE=`getValue "$asCSV" "$mode" "$line"`

if [ "$mode" = "1" ]; then
RES+=`printData "${ALGO}" "${symStats["${ALGO}"]}" "${VALUE}"`
elif [ "$mode" = "2" ]; then
RES+=`printData "${ALGO}" "${asymStats["${ALGO}"]}" "${VALUE}"`
fi
fi
fi
done < ${SECOND_FILE}

if [ "${OUTPUT_CSV}" = "1" ]; then
echo -e "$RES"
else
echo -e "$RES" | column -t -s ',' -L
fi
1 change: 1 addition & 0 deletions scripts/include.am
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,6 @@ dist_noinst_SCRIPTS+= scripts/dtlscid.test
endif

EXTRA_DIST += scripts/bench/bench_functions.sh
EXTRA_DIST += scripts/benchmark_compare.sh

EXTRA_DIST += scripts/user_settings_asm.sh
9 changes: 9 additions & 0 deletions src/include.am
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ endif

if BUILD_AES
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
if BUILD_CUDA
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/cuda/aes-cuda.cu
endif BUILD_CUDA
endif

if BUILD_AESNI
Expand Down Expand Up @@ -154,6 +157,9 @@ endif

if BUILD_AES
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
if BUILD_CUDA
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/cuda/aes-cuda.cu
endif BUILD_CUDA
if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
endif BUILD_ARMASM
Expand Down Expand Up @@ -639,6 +645,9 @@ endif
if !BUILD_FIPS_CURRENT
if BUILD_AES
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
if BUILD_CUDA
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/cuda/aes-cuda.cu
endif BUILD_CUDA
if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
endif BUILD_ARMASM
Expand Down
12 changes: 12 additions & 0 deletions wolfcrypt/src/aes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1915,6 +1915,7 @@ static word32 GetTable8_4(const byte* t, byte o0, byte o1, byte o2, byte o3)
((word32)(t)[o2] << 8) | ((word32)(t)[o3] << 0))
#endif

#ifndef HAVE_CUDA
/* Encrypt a block using AES.
*
* @param [in] aes AES object.
Expand Down Expand Up @@ -2215,6 +2216,11 @@ static void AesEncryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz)
}
}
#endif
#else
extern void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
word32 r);
extern void AesEncryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz);
#endif /* HAVE_CUDA */

#else

Expand Down Expand Up @@ -2710,6 +2716,7 @@ static void bs_encrypt(bs_word* state, bs_word* rk, word32 r)
bs_inv_transpose(state, trans);
}

#ifndef HAVE_CUDA
/* Encrypt a block using AES.
*
* @param [in] aes AES object.
Expand Down Expand Up @@ -2761,6 +2768,11 @@ static void AesEncryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz)
}
}
#endif
#else
extern void AesEncrypt_C(Aes* aes, const byte* inBlock, byte* outBlock,
word32 r);
extern void AesEncryptBlocks_C(Aes* aes, const byte* in, byte* out, word32 sz);
#endif /* HAVE_CUDA */

#endif /* !WC_AES_BITSLICED */

Expand Down
9 changes: 9 additions & 0 deletions wolfcrypt/src/port/cuda/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
You will need to have the CUDA libraries and toolchains installed to be able to use this. For the simplest
setup, I used the 'nvidia/cuda:12.3.2-devel-ubuntu22.04' container with the '--gpus=all' flag. Note that
Docker must be set up to allow passing through the CUDA instructions to the host. The container only needs
'automake' and 'libtool' installed: `apt update && apt install -y automake libtool`.

This code was tested with the following:
./configure --enable-all --disable-shared --disable-crl-monitor --enable-cuda CC=nvcc && make check

There are still things that can be done to optimize, but the basic functionality is there.
Loading

0 comments on commit a75c2be

Please sign in to comment.