From d63a6453bbcfa48525df201c9744470c0c815c92 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 17 Dec 2023 12:22:01 +0400 Subject: [PATCH 1/6] add option for building tests with *address-sanitizer* enabled Signed-off-by: Anjan Roy --- Makefile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Makefile b/Makefile index 3230f7f..92dfd3c 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,8 @@ CXX_FLAGS = -std=c++20 WARN_FLAGS = -Wall -Wextra -pedantic OPT_FLAGS = -O3 -march=native LINK_FLAGS = -flto +DEBUG_FLAGS = -g3 +ASAN_FLAGS = -fno-omit-frame-pointer -fsanitize=address SHA3_INC_DIR = ./sha3/include SUBTLE_INC_DIR = ./subtle/include @@ -12,12 +14,15 @@ DEP_IFLAGS = -I $(SHA3_INC_DIR) -I $(SUBTLE_INC_DIR) SRC_DIR = include KYBER_SOURCES := $(wildcard $(SRC_DIR)/*.hpp) BUILD_DIR = build +ASAN_BUILD_DIR = $(BUILD_DIR)/asan TEST_DIR = tests TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +ASAN_TEST_OBJECTS := $(addprefix $(ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) TEST_LINK_FLAGS = -lgtest -lgtest_main TEST_BINARY = $(BUILD_DIR)/test.out +ASAN_TEST_BINARY = $(ASAN_BUILD_DIR)/test.out BENCHMARK_DIR = benchmarks BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp) @@ -29,6 +34,9 @@ PERF_BINARY = $(BUILD_DIR)/perf.out all: test +$(ASAN_BUILD_DIR): + mkdir -p $@ + $(BUILD_DIR): mkdir -p $@ @@ -40,12 +48,21 @@ $(SUBTLE_INC_DIR): $(SHA3_INC_DIR) $(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ +$(ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(DEBUG_FLAGS) $(ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + $(TEST_BINARY): $(TEST_OBJECTS) $(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ +$(ASAN_TEST_BINARY): $(ASAN_TEST_OBJECTS) + $(CXX) $(ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + test: $(TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 +asan_test: $(ASAN_TEST_BINARY) + ./$< --gtest_shuffle --gtest_random_seed=0 + $(BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ From 38330bcc18d52f1f816154858305a3a8cfc366df Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 17 Dec 2023 12:34:48 +0400 Subject: [PATCH 2/6] add option for building tests with *memory-sanitizer* enabled Signed-off-by: Anjan Roy --- Makefile | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 92dfd3c..a0aa3b0 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,8 @@ CXX_FLAGS = -std=c++20 WARN_FLAGS = -Wall -Wextra -pedantic OPT_FLAGS = -O3 -march=native LINK_FLAGS = -flto -DEBUG_FLAGS = -g3 -ASAN_FLAGS = -fno-omit-frame-pointer -fsanitize=address +ASAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=address +MSAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=memory SHA3_INC_DIR = ./sha3/include SUBTLE_INC_DIR = ./subtle/include @@ -15,14 +15,17 @@ SRC_DIR = include KYBER_SOURCES := $(wildcard $(SRC_DIR)/*.hpp) BUILD_DIR = build ASAN_BUILD_DIR = $(BUILD_DIR)/asan +MSAN_BUILD_DIR = $(BUILD_DIR)/msan TEST_DIR = tests TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) ASAN_TEST_OBJECTS := $(addprefix $(ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +MSAN_TEST_OBJECTS := $(addprefix $(MSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) TEST_LINK_FLAGS = -lgtest -lgtest_main TEST_BINARY = $(BUILD_DIR)/test.out ASAN_TEST_BINARY = $(ASAN_BUILD_DIR)/test.out +MSAN_TEST_BINARY = $(MSAN_BUILD_DIR)/test.out BENCHMARK_DIR = benchmarks BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp) @@ -37,6 +40,9 @@ all: test $(ASAN_BUILD_DIR): mkdir -p $@ +$(MSAN_BUILD_DIR): + mkdir -p $@ + $(BUILD_DIR): mkdir -p $@ @@ -49,7 +55,10 @@ $(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DI $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ $(ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(DEBUG_FLAGS) $(ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(MSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(MSAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(MSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ $(TEST_BINARY): $(TEST_OBJECTS) $(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ @@ -57,12 +66,18 @@ $(TEST_BINARY): $(TEST_OBJECTS) $(ASAN_TEST_BINARY): $(ASAN_TEST_OBJECTS) $(CXX) $(ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ +$(MSAN_TEST_BINARY): $(MSAN_TEST_OBJECTS) + $(CXX) $(MSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + test: $(TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 asan_test: $(ASAN_TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 +msan_test: $(MSAN_TEST_BINARY) + ./$< --gtest_shuffle --gtest_random_seed=0 + $(BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ From aee334b648787f881687e30ed9c4e1a472abd499 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 17 Dec 2023 12:58:18 +0400 Subject: [PATCH 3/6] add option for building tests with *undefined-behaviour-sanitizer* enabled, while removing *memory-sanitizer* Signed-off-by: Anjan Roy --- Makefile | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index a0aa3b0..3088594 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,8 @@ CXX_FLAGS = -std=c++20 WARN_FLAGS = -Wall -Wextra -pedantic OPT_FLAGS = -O3 -march=native LINK_FLAGS = -flto -ASAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=address -MSAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=memory +ASAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=address # From https://clang.llvm.org/docs/AddressSanitizer.html +UBSAN_FLAGS = -g -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=undefined -fsanitize=nullability # From https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html SHA3_INC_DIR = ./sha3/include SUBTLE_INC_DIR = ./subtle/include @@ -15,17 +15,17 @@ SRC_DIR = include KYBER_SOURCES := $(wildcard $(SRC_DIR)/*.hpp) BUILD_DIR = build ASAN_BUILD_DIR = $(BUILD_DIR)/asan -MSAN_BUILD_DIR = $(BUILD_DIR)/msan +UBSAN_BUILD_DIR = $(BUILD_DIR)/ubsan TEST_DIR = tests TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) TEST_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) ASAN_TEST_OBJECTS := $(addprefix $(ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) -MSAN_TEST_OBJECTS := $(addprefix $(MSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +UBSAN_TEST_OBJECTS := $(addprefix $(UBSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) TEST_LINK_FLAGS = -lgtest -lgtest_main TEST_BINARY = $(BUILD_DIR)/test.out ASAN_TEST_BINARY = $(ASAN_BUILD_DIR)/test.out -MSAN_TEST_BINARY = $(MSAN_BUILD_DIR)/test.out +UBSAN_TEST_BINARY = $(UBSAN_BUILD_DIR)/test.out BENCHMARK_DIR = benchmarks BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp) @@ -40,7 +40,7 @@ all: test $(ASAN_BUILD_DIR): mkdir -p $@ -$(MSAN_BUILD_DIR): +$(UBSAN_BUILD_DIR): mkdir -p $@ $(BUILD_DIR): @@ -57,8 +57,8 @@ $(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DI $(ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ -$(MSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(MSAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(MSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ +$(UBSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(UBSAN_BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(UBSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ $(TEST_BINARY): $(TEST_OBJECTS) $(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ @@ -66,8 +66,8 @@ $(TEST_BINARY): $(TEST_OBJECTS) $(ASAN_TEST_BINARY): $(ASAN_TEST_OBJECTS) $(CXX) $(ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ -$(MSAN_TEST_BINARY): $(MSAN_TEST_OBJECTS) - $(CXX) $(MSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ +$(UBSAN_TEST_BINARY): $(UBSAN_TEST_OBJECTS) + $(CXX) $(UBSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ test: $(TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 @@ -75,7 +75,7 @@ test: $(TEST_BINARY) asan_test: $(ASAN_TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 -msan_test: $(MSAN_TEST_BINARY) +ubsan_test: $(UBSAN_TEST_BINARY) ./$< --gtest_shuffle --gtest_random_seed=0 $(BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR) $(SUBTLE_INC_DIR) From 67a41dc1ec875f3ecdfe705e08e5feb004c960fb Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 17 Dec 2023 13:08:58 +0400 Subject: [PATCH 4/6] when benchmarking also measure minimum and maximum execution time Signed-off-by: Anjan Roy --- benchmarks/bench_helper.hpp | 6 ++++++ benchmarks/bench_kem.cpp | 22 ++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 benchmarks/bench_helper.hpp diff --git a/benchmarks/bench_helper.hpp b/benchmarks/bench_helper.hpp new file mode 100644 index 0000000..58e62ec --- /dev/null +++ b/benchmarks/bench_helper.hpp @@ -0,0 +1,6 @@ +#pragma once +#include +#include + +const auto compute_min = [](const std::vector& v) -> double { return *std::min_element(v.begin(), v.end()); }; +const auto compute_max = [](const std::vector& v) -> double { return *std::max_element(v.begin(), v.end()); }; diff --git a/benchmarks/bench_kem.cpp b/benchmarks/bench_kem.cpp index 3f6bc57..30f68e2 100644 --- a/benchmarks/bench_kem.cpp +++ b/benchmarks/bench_kem.cpp @@ -1,8 +1,6 @@ +#include "bench_helper.hpp" #include "kem.hpp" -#include "utils.hpp" -#include #include -#include // Benchmarking IND-CCA2-secure Kyber KEM key generation algorithm template @@ -148,16 +146,16 @@ bench_decapsulate(benchmark::State& state) // Register for benchmarking IND-CCA2-secure Kyber Key Encapsulation Mechanism // Kyber512 -BENCHMARK(bench_keygen<2, 3>)->Name("kyber512/keygen"); -BENCHMARK(bench_encapsulate<2, 3, 2, 10, 4>)->Name("kyber512/encap"); -BENCHMARK(bench_decapsulate<2, 3, 2, 10, 4>)->Name("kyber512/decap"); +BENCHMARK(bench_keygen<2, 3>)->Name("kyber512/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); +BENCHMARK(bench_encapsulate<2, 3, 2, 10, 4>)->Name("kyber512/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); +BENCHMARK(bench_decapsulate<2, 3, 2, 10, 4>)->Name("kyber512/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); // Kyber768 -BENCHMARK(bench_keygen<3, 2>)->Name("kyber768/keygen"); -BENCHMARK(bench_encapsulate<3, 2, 2, 10, 4>)->Name("kyber768/encap"); -BENCHMARK(bench_decapsulate<3, 2, 2, 10, 4>)->Name("kyber768/decap"); +BENCHMARK(bench_keygen<3, 2>)->Name("kyber768/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); +BENCHMARK(bench_encapsulate<3, 2, 2, 10, 4>)->Name("kyber768/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); +BENCHMARK(bench_decapsulate<3, 2, 2, 10, 4>)->Name("kyber768/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); // Kyber1024 -BENCHMARK(bench_keygen<4, 2>)->Name("kyber1024/keygen"); -BENCHMARK(bench_encapsulate<4, 2, 2, 11, 5>)->Name("kyber1024/encap"); -BENCHMARK(bench_decapsulate<4, 2, 2, 11, 5>)->Name("kyber1024/decap"); +BENCHMARK(bench_keygen<4, 2>)->Name("kyber1024/keygen")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); +BENCHMARK(bench_encapsulate<4, 2, 2, 11, 5>)->Name("kyber1024/encap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); +BENCHMARK(bench_decapsulate<4, 2, 2, 11, 5>)->Name("kyber1024/decap")->ComputeStatistics("min", compute_min)->ComputeStatistics("max", compute_max); From de19f1e77a8ff1a2151e0b889f3f65eb6624e8b4 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 17 Dec 2023 13:13:03 +0400 Subject: [PATCH 5/6] run both *address* and *undefined-behaviour* sanitizer tests on Github Actions CI Signed-off-by: Anjan Roy --- .github/workflows/test_ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index e29737c..afbf9aa 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -35,3 +35,7 @@ jobs: popd - name: Execute Tests on ${{matrix.os}} run: make -j + - name: Execute Tests with AddressSanitizer on ${{matrix.os}} + run: make asan_test -j + - name: Execute Tests with UndefinedBehaviourSanitizer on ${{matrix.os}} + run: make ubsan_test -j From a1d92dd368b128ade6f8cd90b213191b20e74302 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 17 Dec 2023 16:36:41 +0400 Subject: [PATCH 6/6] update benchmark results Signed-off-by: Anjan Roy --- README.md | 303 +++++++++++++++++++++--------------------------------- 1 file changed, 116 insertions(+), 187 deletions(-) diff --git a/README.md b/README.md index 33dddf2..f4349fd 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,6 @@ KEM KeyGen | - | Public Key and Secret Key Encapsulation | Public Key | Cipher Text and SHAKE256 KDF Decapsulation | Secret Key and Cipher Text | SHAKE256 KDF -> [!NOTE] -> *IND-CCA2-secure Kyber KEM* can be used for synchronous secure communication such as TLS. - Here I'm maintaining `kyber` - a header-only and easy-to-use ( see more in [usage](#usage) ) C++ library implementing Kyber KEM, supporting Kyber-{512, 768, 1024} parameter sets, as defined in table 1 of Kyber specification. `sha3` and `subtle` are two dependencies of this library, which are pinned to specific git commits, using git submodule. > [!NOTE] @@ -71,7 +68,9 @@ For testing functional correctness and conformance with Kyber specification, you > Known Answer Test (KAT) files living in [this](./kats/) directory are generated by following (reproducible) steps, described in https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710. ```bash -make -j +make -j # Run tests without any sort of sanitizers +make asan_test -j # Run tests with AddressSanitizer enabled +make ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled ``` ```bash @@ -124,216 +123,146 @@ make perf # If you have built google-benchmark library with libPFM support > [!NOTE] > `make perf` - was issued when collecting following benchmarks. Notice, *cycles* column, denoting cost of executing Kyber KEM routines in terms of CPU cycles. Follow [this](https://github.com/google/benchmark/blob/main/docs/perf_counters.md) for more details. -### On *12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC-13.2.0 )* - -```bash -2023-12-05T22:33:52+04:00 -Running ./build/perf.out -Run on (16 X 504.568 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x8) - L1 Instruction 32 KiB (x8) - L2 Unified 1280 KiB (x8) - L3 Unified 18432 KiB (x1) -Load Average: 0.44, 0.51, 0.51 ----------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations CYCLES items_per_second ----------------------------------------------------------------------------------------------- -kyber512/keygen_mean 14.4 us 14.4 us 8 67.3437k 69.4709k/s -kyber512/keygen_median 14.4 us 14.4 us 8 67.4082k 69.3937k/s -kyber512/keygen_stddev 0.042 us 0.043 us 8 176.213 210.232/s -kyber512/keygen_cv 0.29 % 0.30 % 8 0.26% 0.30% -kyber1024/decap_mean 48.5 us 48.5 us 8 226.76k 20.6092k/s -kyber1024/decap_median 48.5 us 48.5 us 8 226.561k 20.6251k/s -kyber1024/decap_stddev 0.165 us 0.157 us 8 426.061 66.8334/s -kyber1024/decap_cv 0.34 % 0.32 % 8 0.19% 0.32% -kyber512/decap_mean 20.7 us 20.6 us 8 96.5275k 48.4359k/s -kyber512/decap_median 20.6 us 20.6 us 8 96.3069k 48.5757k/s -kyber512/decap_stddev 0.134 us 0.135 us 8 582.227 313.68/s -kyber512/decap_cv 0.65 % 0.65 % 8 0.60% 0.65% -kyber768/keygen_mean 24.7 us 24.7 us 8 114.9k 40.5314k/s -kyber768/keygen_median 24.6 us 24.6 us 8 115.106k 40.6451k/s -kyber768/keygen_stddev 0.177 us 0.169 us 8 687.033 274.976/s -kyber768/keygen_cv 0.72 % 0.69 % 8 0.60% 0.68% -kyber768/decap_mean 33.3 us 33.2 us 8 155.52k 30.0789k/s -kyber768/decap_median 33.2 us 33.2 us 8 155.26k 30.0966k/s -kyber768/decap_stddev 0.147 us 0.139 us 8 552.726 125.078/s -kyber768/decap_cv 0.44 % 0.42 % 8 0.36% 0.42% -kyber1024/encap_mean 44.3 us 44.3 us 8 206.579k 22.5924k/s -kyber1024/encap_median 44.3 us 44.3 us 8 206.455k 22.5788k/s -kyber1024/encap_stddev 0.147 us 0.145 us 8 549.686 74.1456/s -kyber1024/encap_cv 0.33 % 0.33 % 8 0.27% 0.33% -kyber512/encap_mean 18.0 us 18.0 us 8 84.133k 55.5064k/s -kyber512/encap_median 18.0 us 18.0 us 8 84.1549k 55.5051k/s -kyber512/encap_stddev 0.064 us 0.061 us 8 151.114 186.754/s -kyber512/encap_cv 0.36 % 0.34 % 8 0.18% 0.34% -kyber768/encap_mean 29.7 us 29.7 us 8 138.837k 33.6446k/s -kyber768/encap_median 29.7 us 29.7 us 8 138.648k 33.7164k/s -kyber768/encap_stddev 0.161 us 0.158 us 8 633.267 178.415/s -kyber768/encap_cv 0.54 % 0.53 % 8 0.46% 0.53% -kyber1024/keygen_mean 38.8 us 38.8 us 8 180.897k 25.7986k/s -kyber1024/keygen_median 38.9 us 38.8 us 8 181.344k 25.7401k/s -kyber1024/keygen_stddev 0.268 us 0.267 us 8 1.09543k 179.632/s -kyber1024/keygen_cv 0.69 % 0.69 % 8 0.61% 0.70% -``` - ### On *12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang-17.0.2 )* ```bash -2023-12-05T22:32:40+04:00 +2023-12-17T16:35:19+04:00 Running ./build/perf.out -Run on (16 X 1113.91 MHz CPU s) +Run on (16 X 648.989 MHz CPU s) CPU Caches: L1 Data 48 KiB (x8) L1 Instruction 32 KiB (x8) L2 Unified 1280 KiB (x8) L3 Unified 18432 KiB (x1) -Load Average: 0.70, 0.54, 0.52 +Load Average: 0.63, 0.49, 0.35 ---------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations CYCLES items_per_second ---------------------------------------------------------------------------------------------- -kyber1024/encap_mean 39.4 us 39.4 us 8 184.447k 25.3726k/s -kyber1024/encap_median 39.4 us 39.4 us 8 184.349k 25.4044k/s -kyber1024/encap_stddev 0.111 us 0.110 us 8 666.498 70.9216/s -kyber1024/encap_cv 0.28 % 0.28 % 8 0.36% 0.28% -kyber768/encap_mean 25.2 us 25.2 us 8 118.269k 39.6068k/s -kyber768/encap_median 25.2 us 25.2 us 8 118.098k 39.6763k/s -kyber768/encap_stddev 0.190 us 0.191 us 8 944.954 296.817/s -kyber768/encap_cv 0.75 % 0.76 % 8 0.80% 0.75% -kyber768/decap_mean 27.4 us 27.4 us 8 128.276k 36.4705k/s -kyber768/decap_median 27.4 us 27.4 us 8 128.023k 36.5011k/s -kyber768/decap_stddev 0.148 us 0.147 us 8 725.711 194.118/s -kyber768/decap_cv 0.54 % 0.54 % 8 0.57% 0.53% -kyber512/keygen_mean 11.7 us 11.7 us 8 54.4158k 85.6074k/s -kyber512/keygen_median 11.6 us 11.6 us 8 54.4761k 85.8637k/s -kyber512/keygen_stddev 0.157 us 0.157 us 8 230.812 1.1305k/s -kyber512/keygen_cv 1.35 % 1.35 % 8 0.42% 1.32% -kyber1024/keygen_mean 31.2 us 31.2 us 8 146.117k 32.0219k/s -kyber1024/keygen_median 31.3 us 31.3 us 8 146.447k 31.9462k/s -kyber1024/keygen_stddev 0.443 us 0.443 us 8 2.14262k 460.613/s -kyber1024/keygen_cv 1.42 % 1.42 % 8 1.47% 1.44% -kyber512/decap_mean 16.8 us 16.8 us 8 78.599k 59.5481k/s -kyber512/decap_median 16.8 us 16.8 us 8 78.6311k 59.5517k/s -kyber512/decap_stddev 0.154 us 0.154 us 8 750.744 545.456/s -kyber512/decap_cv 0.92 % 0.92 % 8 0.96% 0.92% -kyber512/encap_mean 15.4 us 15.4 us 8 71.9738k 65.0018k/s -kyber512/encap_median 15.3 us 15.3 us 8 71.6988k 65.2546k/s -kyber512/encap_stddev 0.172 us 0.172 us 8 790.241 725.525/s -kyber512/encap_cv 1.12 % 1.12 % 8 1.10% 1.12% -kyber768/keygen_mean 20.5 us 20.5 us 8 95.9542k 48.737k/s -kyber768/keygen_median 20.5 us 20.5 us 8 95.9397k 48.6711k/s -kyber768/keygen_stddev 0.096 us 0.096 us 8 523.379 227.911/s -kyber768/keygen_cv 0.47 % 0.47 % 8 0.55% 0.47% -kyber1024/decap_mean 41.8 us 41.8 us 8 194.826k 23.9134k/s -kyber1024/decap_median 41.7 us 41.7 us 8 194.774k 24.0016k/s -kyber1024/decap_stddev 0.526 us 0.523 us 8 573.604 291.732/s -kyber1024/decap_cv 1.26 % 1.25 % 8 0.29% 1.22% -``` - -### On *ARM Cortex-A72 i.e. Raspberry Pi 4B ( compiled with GCC-13.2.0 )* - -```bash -2023-12-06T00:37:57+05:30 -Running ./build/perf.out -Run on (4 X 1800 MHz CPU s) -CPU Caches: - L1 Data 32 KiB (x4) - L1 Instruction 48 KiB (x4) - L2 Unified 1024 KiB (x1) -Load Average: 0.69, 1.28, 1.13 -Performance counters not supported. ------------------------------------------------------------------------------------ -Benchmark Time CPU Iterations items_per_second ------------------------------------------------------------------------------------ -kyber1024/encap_mean 220 us 220 us 10 4.53661k/s -kyber1024/encap_median 220 us 220 us 10 4.53843k/s -kyber1024/encap_stddev 0.775 us 0.776 us 10 15.9613/s -kyber1024/encap_cv 0.35 % 0.35 % 10 0.35% -kyber768/encap_mean 147 us 146 us 10 6.82666k/s -kyber768/encap_median 146 us 146 us 10 6.82827k/s -kyber768/encap_stddev 0.370 us 0.357 us 10 16.6515/s -kyber768/encap_cv 0.25 % 0.24 % 10 0.24% -kyber1024/keygen_mean 189 us 189 us 10 5.30215k/s -kyber1024/keygen_median 188 us 188 us 10 5.30614k/s -kyber1024/keygen_stddev 0.588 us 0.590 us 10 16.5559/s -kyber1024/keygen_cv 0.31 % 0.31 % 10 0.31% -kyber512/decap_mean 106 us 106 us 10 9.45067k/s -kyber512/decap_median 106 us 106 us 10 9.45608k/s -kyber512/decap_stddev 0.243 us 0.250 us 10 22.2866/s -kyber512/decap_cv 0.23 % 0.24 % 10 0.24% -kyber768/decap_mean 169 us 169 us 10 5.91167k/s -kyber768/decap_median 169 us 169 us 10 5.91724k/s -kyber768/decap_stddev 0.639 us 0.623 us 10 21.6423/s -kyber768/decap_cv 0.38 % 0.37 % 10 0.37% -kyber512/encap_mean 89.0 us 89.0 us 10 11.236k/s -kyber512/encap_median 89.0 us 89.0 us 10 11.2376k/s -kyber512/encap_stddev 0.216 us 0.222 us 10 28.0647/s -kyber512/encap_cv 0.24 % 0.25 % 10 0.25% -kyber768/keygen_mean 119 us 119 us 10 8.37056k/s -kyber768/keygen_median 119 us 119 us 10 8.38088k/s -kyber768/keygen_stddev 0.623 us 0.615 us 10 42.9127/s -kyber768/keygen_cv 0.52 % 0.51 % 10 0.51% -kyber1024/decap_mean 249 us 249 us 10 4.02233k/s -kyber1024/decap_median 249 us 249 us 10 4.02281k/s -kyber1024/decap_stddev 0.721 us 0.706 us 10 11.4051/s -kyber1024/decap_cv 0.29 % 0.28 % 10 0.28% -kyber512/keygen_mean 70.1 us 70.0 us 10 14.2774k/s -kyber512/keygen_median 70.1 us 70.1 us 10 14.2713k/s -kyber512/keygen_stddev 0.198 us 0.196 us 10 40.0484/s -kyber512/keygen_cv 0.28 % 0.28 % 10 0.28% +kyber512/decap_mean 16.8 us 16.8 us 10 78.2371k 59.3682k/s +kyber512/decap_median 16.8 us 16.8 us 10 78.0903k 59.5332k/s +kyber512/decap_stddev 0.213 us 0.213 us 10 1.61104k 748.863/s +kyber512/decap_cv 1.26 % 1.26 % 10 2.06% 1.26% +kyber512/decap_min 16.5 us 16.5 us 10 75.9006k 58.3612k/s +kyber512/decap_max 17.1 us 17.1 us 10 80.2234k 60.4726k/s +kyber512/keygen_mean 12.0 us 12.0 us 10 55.2343k 83.4374k/s +kyber512/keygen_median 12.0 us 12.0 us 10 55.2033k 83.3267k/s +kyber512/keygen_stddev 0.143 us 0.143 us 10 910.895 988.689/s +kyber512/keygen_cv 1.19 % 1.20 % 10 1.65% 1.18% +kyber512/keygen_min 11.8 us 11.8 us 10 54.1517k 81.2428k/s +kyber512/keygen_max 12.3 us 12.3 us 10 57.0565k 84.7691k/s +kyber768/keygen_mean 19.9 us 19.9 us 10 91.4059k 50.2463k/s +kyber768/keygen_median 19.9 us 19.9 us 10 91.0785k 50.1954k/s +kyber768/keygen_stddev 0.180 us 0.181 us 10 1.05886k 457.283/s +kyber768/keygen_cv 0.91 % 0.91 % 10 1.16% 0.91% +kyber768/keygen_min 19.6 us 19.6 us 10 90.2107k 49.5364k/s +kyber768/keygen_max 20.2 us 20.2 us 10 93.7315k 50.9569k/s +kyber768/encap_mean 26.3 us 26.3 us 10 119.902k 38.0206k/s +kyber768/encap_median 26.3 us 26.3 us 10 119.44k 37.9685k/s +kyber768/encap_stddev 0.225 us 0.225 us 10 2.0148k 326.026/s +kyber768/encap_cv 0.85 % 0.85 % 10 1.68% 0.86% +kyber768/encap_min 25.9 us 25.9 us 10 117.959k 37.5131k/s +kyber768/encap_max 26.7 us 26.7 us 10 124.856k 38.6767k/s +kyber1024/encap_mean 40.1 us 40.1 us 10 183.584k 24.9219k/s +kyber1024/encap_median 40.0 us 40.0 us 10 182.834k 25.005k/s +kyber1024/encap_stddev 0.462 us 0.462 us 10 2.02064k 285.263/s +kyber1024/encap_cv 1.15 % 1.15 % 10 1.10% 1.14% +kyber1024/encap_min 39.4 us 39.4 us 10 180.861k 24.4381k/s +kyber1024/encap_max 40.9 us 40.9 us 10 187.784k 25.367k/s +kyber1024/keygen_mean 31.3 us 31.3 us 10 142.994k 32.0037k/s +kyber1024/keygen_median 31.2 us 31.2 us 10 141.812k 32.0502k/s +kyber1024/keygen_stddev 0.468 us 0.467 us 10 3.09301k 476.014/s +kyber1024/keygen_cv 1.50 % 1.50 % 10 2.16% 1.49% +kyber1024/keygen_min 30.5 us 30.5 us 10 139.185k 31.0694k/s +kyber1024/keygen_max 32.2 us 32.2 us 10 148.231k 32.7634k/s +kyber1024/decap_mean 42.5 us 42.5 us 10 195.395k 23.5308k/s +kyber1024/decap_median 42.5 us 42.5 us 10 194.307k 23.5284k/s +kyber1024/decap_stddev 0.341 us 0.342 us 10 2.27441k 188.876/s +kyber1024/decap_cv 0.80 % 0.80 % 10 1.16% 0.80% +kyber1024/decap_min 42.0 us 42.0 us 10 192.669k 23.198k/s +kyber1024/decap_max 43.1 us 43.1 us 10 198.878k 23.8111k/s +kyber512/encap_mean 15.4 us 15.4 us 10 70.3054k 65.0965k/s +kyber512/encap_median 15.4 us 15.4 us 10 69.6619k 65.1279k/s +kyber512/encap_stddev 0.225 us 0.226 us 10 1.99248k 957.196/s +kyber512/encap_cv 1.47 % 1.47 % 10 2.83% 1.47% +kyber512/encap_min 15.0 us 15.0 us 10 68.2471k 63.7025k/s +kyber512/encap_max 15.7 us 15.7 us 10 73.4948k 66.8462k/s +kyber768/decap_mean 28.1 us 28.1 us 10 129.269k 35.6001k/s +kyber768/decap_median 28.1 us 28.1 us 10 128.604k 35.5936k/s +kyber768/decap_stddev 0.159 us 0.159 us 10 1.61556k 200.417/s +kyber768/decap_cv 0.57 % 0.56 % 10 1.25% 0.56% +kyber768/decap_min 27.8 us 27.8 us 10 127.678k 35.1656k/s +kyber768/decap_max 28.4 us 28.4 us 10 131.651k 35.9414k/s ``` ### On *ARM Cortex-A72 i.e. Raspberry Pi 4B ( compiled with Clang-17.0.2 )* ```bash -2023-12-06T00:36:20+05:30 +2023-12-17T18:03:53+05:30 Running ./build/perf.out Run on (4 X 1800 MHz CPU s) CPU Caches: L1 Data 32 KiB (x4) L1 Instruction 48 KiB (x4) L2 Unified 1024 KiB (x1) -Load Average: 0.73, 1.55, 1.20 +Load Average: 0.48, 1.44, 1.61 Performance counters not supported. ----------------------------------------------------------------------------------- Benchmark Time CPU Iterations items_per_second ----------------------------------------------------------------------------------- -kyber1024/encap_mean 297 us 297 us 10 3.36489k/s -kyber1024/encap_median 297 us 297 us 10 3.36366k/s -kyber1024/encap_stddev 1.08 us 1.09 us 10 12.2899/s -kyber1024/encap_cv 0.36 % 0.37 % 10 0.37% -kyber1024/keygen_mean 254 us 254 us 10 3.93546k/s -kyber1024/keygen_median 254 us 254 us 10 3.93845k/s -kyber1024/keygen_stddev 1.05 us 1.05 us 10 16.2184/s -kyber1024/keygen_cv 0.41 % 0.41 % 10 0.41% -kyber512/decap_mean 129 us 129 us 10 7.72535k/s -kyber512/decap_median 130 us 129 us 10 7.72301k/s -kyber512/decap_stddev 0.293 us 0.284 us 10 16.9731/s -kyber512/decap_cv 0.23 % 0.22 % 10 0.22% -kyber768/encap_mean 193 us 193 us 10 5.16967k/s -kyber768/encap_median 193 us 193 us 10 5.17074k/s -kyber768/encap_stddev 0.416 us 0.426 us 10 11.3712/s -kyber768/encap_cv 0.22 % 0.22 % 10 0.22% -kyber1024/decap_mean 320 us 320 us 10 3.12381k/s -kyber1024/decap_median 320 us 320 us 10 3.12274k/s -kyber1024/decap_stddev 0.813 us 0.805 us 10 7.85819/s -kyber1024/decap_cv 0.25 % 0.25 % 10 0.25% -kyber768/decap_mean 211 us 211 us 10 4.73723k/s -kyber768/decap_median 211 us 211 us 10 4.73735k/s -kyber768/decap_stddev 0.374 us 0.391 us 10 8.77749/s -kyber768/decap_cv 0.18 % 0.19 % 10 0.19% -kyber512/encap_mean 116 us 116 us 10 8.59258k/s -kyber512/encap_median 116 us 116 us 10 8.59042k/s -kyber512/encap_stddev 0.222 us 0.216 us 10 15.9794/s -kyber512/encap_cv 0.19 % 0.19 % 10 0.19% -kyber512/keygen_mean 92.6 us 92.6 us 10 10.7953k/s -kyber512/keygen_median 92.4 us 92.4 us 10 10.8198k/s -kyber512/keygen_stddev 0.692 us 0.695 us 10 79.7935/s -kyber512/keygen_cv 0.75 % 0.75 % 10 0.74% -kyber768/keygen_mean 159 us 159 us 10 6.29988k/s -kyber768/keygen_median 159 us 159 us 10 6.30371k/s -kyber768/keygen_stddev 0.714 us 0.712 us 10 28.124/s -kyber768/keygen_cv 0.45 % 0.45 % 10 0.45% +kyber768/encap_mean 194 us 194 us 10 5.16056k/s +kyber768/encap_median 193 us 193 us 10 5.17224k/s +kyber768/encap_stddev 1.31 us 1.29 us 10 34.0107/s +kyber768/encap_cv 0.67 % 0.66 % 10 0.66% +kyber768/encap_min 193 us 193 us 10 5.09692k/s +kyber768/encap_max 196 us 196 us 10 5.19123k/s +kyber1024/keygen_mean 254 us 254 us 10 3.93486k/s +kyber1024/keygen_median 254 us 254 us 10 3.93138k/s +kyber1024/keygen_stddev 0.713 us 0.712 us 10 11.0375/s +kyber1024/keygen_cv 0.28 % 0.28 % 10 0.28% +kyber1024/keygen_min 253 us 253 us 10 3.92136k/s +kyber1024/keygen_max 255 us 255 us 10 3.95326k/s +kyber1024/encap_mean 296 us 296 us 10 3.38308k/s +kyber1024/encap_median 296 us 296 us 10 3.38211k/s +kyber1024/encap_stddev 0.574 us 0.586 us 10 6.71083/s +kyber1024/encap_cv 0.19 % 0.20 % 10 0.20% +kyber1024/encap_min 295 us 295 us 10 3.37455k/s +kyber1024/encap_max 296 us 296 us 10 3.39256k/s +kyber768/decap_mean 212 us 212 us 10 4.71816k/s +kyber768/decap_median 212 us 212 us 10 4.71786k/s +kyber768/decap_stddev 0.741 us 0.744 us 10 16.577/s +kyber768/decap_cv 0.35 % 0.35 % 10 0.35% +kyber768/decap_min 211 us 211 us 10 4.69116k/s +kyber768/decap_max 213 us 213 us 10 4.74494k/s +kyber768/keygen_mean 158 us 158 us 10 6.31632k/s +kyber768/keygen_median 158 us 158 us 10 6.31299k/s +kyber768/keygen_stddev 0.453 us 0.445 us 10 17.7648/s +kyber768/keygen_cv 0.29 % 0.28 % 10 0.28% +kyber768/keygen_min 158 us 158 us 10 6.29411k/s +kyber768/keygen_max 159 us 159 us 10 6.34639k/s +kyber1024/decap_mean 321 us 321 us 10 3.11987k/s +kyber1024/decap_median 321 us 321 us 10 3.11851k/s +kyber1024/decap_stddev 0.564 us 0.569 us 10 5.53764/s +kyber1024/decap_cv 0.18 % 0.18 % 10 0.18% +kyber1024/decap_min 320 us 320 us 10 3.1111k/s +kyber1024/decap_max 321 us 321 us 10 3.12971k/s +kyber512/decap_mean 129 us 129 us 10 7.74209k/s +kyber512/decap_median 129 us 129 us 10 7.75584k/s +kyber512/decap_stddev 0.869 us 0.858 us 10 50.8408/s +kyber512/decap_cv 0.67 % 0.66 % 10 0.66% +kyber512/decap_min 128 us 128 us 10 7.60927k/s +kyber512/decap_max 131 us 131 us 10 7.79796k/s +kyber512/encap_mean 116 us 116 us 10 8.63463k/s +kyber512/encap_median 116 us 116 us 10 8.63491k/s +kyber512/encap_stddev 0.318 us 0.316 us 10 23.5414/s +kyber512/encap_cv 0.27 % 0.27 % 10 0.27% +kyber512/encap_min 115 us 115 us 10 8.6006k/s +kyber512/encap_max 116 us 116 us 10 8.66881k/s +kyber512/keygen_mean 92.7 us 92.7 us 10 10.7837k/s +kyber512/keygen_median 92.6 us 92.6 us 10 10.7982k/s +kyber512/keygen_stddev 0.750 us 0.753 us 10 86.1452/s +kyber512/keygen_cv 0.81 % 0.81 % 10 0.80% +kyber512/keygen_min 92.1 us 92.0 us 10 10.5513k/s +kyber512/keygen_max 94.8 us 94.8 us 10 10.8653k/s ``` ## Usage