From 493ca3bee8a97e47f045f18846bc4a6296fd1cc6 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 11:09:11 +0400 Subject: [PATCH 01/14] Add "RandomShake" as git submodule based dependency Signed-off-by: Anjan Roy --- .gitmodules | 3 +++ RandomShake | 1 + 2 files changed, 4 insertions(+) create mode 160000 RandomShake diff --git a/.gitmodules b/.gitmodules index 43f4120..d8f6e8f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "gtest-parallel"] path = gtest-parallel url = https://github.com/google/gtest-parallel.git +[submodule "RandomShake"] + path = RandomShake + url = https://github.com/itzmeanjan/RandomShake.git diff --git a/RandomShake b/RandomShake new file mode 160000 index 0000000..a9cd408 --- /dev/null +++ b/RandomShake @@ -0,0 +1 @@ +Subproject commit a9cd4085a4d38d7b99ee42caadc56fc2d64ec1dc From f1942886ec352f04a78098992ae456e9a52b3fb2 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 11:10:05 +0400 Subject: [PATCH 02/14] Bump version of `sha3` dependency Signed-off-by: Anjan Roy --- sha3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sha3 b/sha3 index f1a79a5..b6ce906 160000 --- a/sha3 +++ b/sha3 @@ -1 +1 @@ -Subproject commit f1a79a51d3a8dc3cc514ce7908e46c6a7946414e +Subproject commit b6ce906994961b711b6f2864fa8ee393c84d23ef From 9cd25955c89920e34cb4b20769cbfa06acc0a7a0 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 20:17:23 +0400 Subject: [PATCH 03/14] Split Makefile into multiple units Signed-off-by: Anjan Roy --- Makefile | 140 +++++--------------------------------------- benchmarks/bench.mk | 29 +++++++++ tests/test.mk | 91 ++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 125 deletions(-) create mode 100644 benchmarks/bench.mk create mode 100644 tests/test.mk diff --git a/Makefile b/Makefile index 17177dd..b6d0a90 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,18 @@ +DEFAULT_GOAL := help + +# Collects inspiration from https://github.com/itzmeanjan/RandomShake/blob/a9cd4085a4d38d7b99ee42caadc56fc2d64ec1dc/Makefile#L1-L9 +.PHONY: help +help: + @for file in $(MAKEFILE_LIST); do \ + grep -E '^[a-zA-Z_-]+:.*?## .*$$' $${file} | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}';\ + done + CXX ?= clang++ CXX_FLAGS := -std=c++20 WARN_FLAGS := -Wall -Wextra -Wpedantic DEBUG_FLAGS := -O1 -g RELEASE_FLAGS := -O3 -march=native LINK_OPT_FLAGS := -flto -ASAN_FLAGS := -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=address # From https://clang.llvm.org/docs/AddressSanitizer.html -DEBUG_ASAN_FLAGS := $(DEBUG_FLAGS) $(ASAN_FLAGS) -RELEASE_ASAN_FLAGS := -g $(RELEASE_FLAGS) $(ASAN_FLAGS) -UBSAN_FLAGS := -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=undefined # From https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html -DEBUG_UBSAN_FLAGS := $(DEBUG_FLAGS) $(UBSAN_FLAGS) -RELEASE_UBSAN_FLAGS := -g $(RELEASE_FLAGS) $(UBSAN_FLAGS) I_FLAGS := -I ./include SHA3_INC_DIR = ./sha3/include @@ -19,60 +22,9 @@ DEP_IFLAGS = -I $(SHA3_INC_DIR) -I $(SUBTLE_INC_DIR) SRC_DIR := include ML_KEM_SOURCES := $(shell find $(SRC_DIR) -name '*.hpp') BUILD_DIR := build -TEST_BUILD_DIR := $(BUILD_DIR)/test -BENCHMARK_BUILD_DIR := $(BUILD_DIR)/benchmark -ASAN_BUILD_DIR := $(BUILD_DIR)/asan -DEBUG_ASAN_BUILD_DIR := $(ASAN_BUILD_DIR)/debug -RELEASE_ASAN_BUILD_DIR := $(ASAN_BUILD_DIR)/release -UBSAN_BUILD_DIR := $(BUILD_DIR)/ubsan -DEBUG_UBSAN_BUILD_DIR := $(UBSAN_BUILD_DIR)/debug -RELEASE_UBSAN_BUILD_DIR := $(UBSAN_BUILD_DIR)/release - -TEST_DIR := tests -TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) -TEST_HEADERS := $(wildcard $(TEST_DIR)/*.hpp) -TEST_OBJECTS := $(addprefix $(TEST_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) -TEST_BINARY := $(TEST_BUILD_DIR)/test.out -TEST_LINK_FLAGS := -lgtest -lgtest_main -GTEST_PARALLEL := ./gtest-parallel/gtest-parallel -DEBUG_ASAN_TEST_OBJECTS := $(addprefix $(DEBUG_ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) -RELEASE_ASAN_TEST_OBJECTS := $(addprefix $(RELEASE_ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) -DEBUG_ASAN_TEST_BINARY := $(DEBUG_ASAN_BUILD_DIR)/test.out -RELEASE_ASAN_TEST_BINARY := $(RELEASE_ASAN_BUILD_DIR)/test.out -DEBUG_UBSAN_TEST_OBJECTS := $(addprefix $(DEBUG_UBSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) -RELEASE_UBSAN_TEST_OBJECTS := $(addprefix $(RELEASE_UBSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) -DEBUG_UBSAN_TEST_BINARY := $(DEBUG_UBSAN_BUILD_DIR)/test.out -RELEASE_UBSAN_TEST_BINARY := $(RELEASE_UBSAN_BUILD_DIR)/test.out - -BENCHMARK_DIR := benchmarks -BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp) -BENCHMARK_HEADERS := $(wildcard $(BENCHMARK_DIR)/*.hpp) -BENCHMARK_OBJECTS := $(addprefix $(BENCHMARK_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(BENCHMARK_SOURCES)))) -BENCHMARK_LINK_FLAGS := -lbenchmark -lbenchmark_main -lpthread -BENCHMARK_BINARY := $(BENCHMARK_BUILD_DIR)/bench.out -PERF_LINK_FLAGS := -lbenchmark -lbenchmark_main -lpfm -lpthread -PERF_BINARY := $(BENCHMARK_BUILD_DIR)/perf.out -BENCHMARK_OUT_FILE := bench_result_on_$(shell uname -s)_$(shell uname -r)_$(shell uname -m)_with_$(CXX)_$(shell $(CXX) -dumpversion).json - -all: test - -$(DEBUG_ASAN_BUILD_DIR): - mkdir -p $@ - -$(RELEASE_ASAN_BUILD_DIR): - mkdir -p $@ - -$(DEBUG_UBSAN_BUILD_DIR): - mkdir -p $@ -$(RELEASE_UBSAN_BUILD_DIR): - mkdir -p $@ - -$(TEST_BUILD_DIR): - mkdir -p $@ - -$(BENCHMARK_BUILD_DIR): - mkdir -p $@ +include tests/test.mk +include benchmarks/bench.mk $(SUBTLE_INC_DIR): git submodule update --init subtle @@ -83,72 +35,10 @@ $(SHA3_INC_DIR): $(SUBTLE_INC_DIR) $(GTEST_PARALLEL): $(SHA3_INC_DIR) git submodule update --init gtest-parallel -$(TEST_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(TEST_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ - -$(DEBUG_ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(DEBUG_ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(DEBUG_ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ - -$(RELEASE_ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(RELEASE_ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ - -$(DEBUG_UBSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(DEBUG_UBSAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(DEBUG_UBSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ - -$(RELEASE_UBSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(RELEASE_UBSAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_UBSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ - -$(TEST_BINARY): $(TEST_OBJECTS) - $(CXX) $(RELEASE_FLAGS) $(LINK_OPT_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ - -$(DEBUG_ASAN_TEST_BINARY): $(DEBUG_ASAN_TEST_OBJECTS) - $(CXX) $(DEBUG_ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ - -$(RELEASE_ASAN_TEST_BINARY): $(RELEASE_ASAN_TEST_OBJECTS) - $(CXX) $(RELEASE_ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ - -$(DEBUG_UBSAN_TEST_BINARY): $(DEBUG_UBSAN_TEST_OBJECTS) - $(CXX) $(DEBUG_UBSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ - -$(RELEASE_UBSAN_TEST_BINARY): $(RELEASE_UBSAN_TEST_OBJECTS) - $(CXX) $(RELEASE_UBSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ - -test: $(TEST_BINARY) $(GTEST_PARALLEL) - $(GTEST_PARALLEL) $< --print_test_times - -debug_asan_test: $(DEBUG_ASAN_TEST_BINARY) $(GTEST_PARALLEL) - $(GTEST_PARALLEL) $< --print_test_times - -release_asan_test: $(RELEASE_ASAN_TEST_BINARY) $(GTEST_PARALLEL) - $(GTEST_PARALLEL) $< --print_test_times - -debug_ubsan_test: $(DEBUG_UBSAN_TEST_BINARY) $(GTEST_PARALLEL) - $(GTEST_PARALLEL) $< --print_test_times - -release_ubsan_test: $(RELEASE_UBSAN_TEST_BINARY) $(GTEST_PARALLEL) - $(GTEST_PARALLEL) $< --print_test_times - -$(BENCHMARK_BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BENCHMARK_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) - $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ - -$(BENCHMARK_BINARY): $(BENCHMARK_OBJECTS) - $(CXX) $(RELEASE_FLAGS) $(LINK_OPT_FLAGS) $^ $(BENCHMARK_LINK_FLAGS) -o $@ - -benchmark: $(BENCHMARK_BINARY) - # Must *not* build google-benchmark with libPFM - ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true - -$(PERF_BINARY): $(BENCHMARK_OBJECTS) - $(CXX) $(RELEASE_FLAGS) $(LINK_OPT_FLAGS) $^ $(PERF_LINK_FLAGS) -o $@ - -perf: $(PERF_BINARY) - # Must build google-benchmark with libPFM, follow https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7 - ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES - -.PHONY: format clean - -clean: +.PHONY: clean +clean: ## Remove build directory rm -rf $(BUILD_DIR) -format: $(ML_KEM_SOURCES) $(TEST_SOURCES) $(TEST_HEADERS) $(BENCHMARK_SOURCES) $(BENCHMARK_HEADERS) +.PHONY: format +format: $(ML_KEM_SOURCES) $(TEST_SOURCES) $(TEST_HEADERS) $(BENCHMARK_SOURCES) $(BENCHMARK_HEADERS) ## Format source code clang-format -i $^ diff --git a/benchmarks/bench.mk b/benchmarks/bench.mk new file mode 100644 index 0000000..f5151d5 --- /dev/null +++ b/benchmarks/bench.mk @@ -0,0 +1,29 @@ +BENCHMARK_DIR := benchmarks +BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp) +BENCHMARK_HEADERS := $(wildcard $(BENCHMARK_DIR)/*.hpp) +BENCHMARK_OBJECTS := $(addprefix $(BENCHMARK_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(BENCHMARK_SOURCES)))) +BENCHMARK_LINK_FLAGS := -lbenchmark -lbenchmark_main -lpthread +BENCHMARK_BINARY := $(BENCHMARK_BUILD_DIR)/bench.out +PERF_LINK_FLAGS := -lbenchmark -lbenchmark_main -lpfm -lpthread +PERF_BINARY := $(BENCHMARK_BUILD_DIR)/perf.out +BENCHMARK_OUT_FILE := bench_result_on_$(shell uname -s)_$(shell uname -r)_$(shell uname -m)_with_$(CXX)_$(shell $(CXX) -dumpversion).json + +$(BENCHMARK_BUILD_DIR): + mkdir -p $@ + +$(BENCHMARK_BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BENCHMARK_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(BENCHMARK_BINARY): $(BENCHMARK_OBJECTS) + $(CXX) $(RELEASE_FLAGS) $(LINK_OPT_FLAGS) $^ $(BENCHMARK_LINK_FLAGS) -o $@ + +benchmark: $(BENCHMARK_BINARY) ## Build and run all benchmarks, without libPFM -based CPU CYCLE counter statistics + # Must *not* build google-benchmark with libPFM + ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true + +$(PERF_BINARY): $(BENCHMARK_OBJECTS) + $(CXX) $(RELEASE_FLAGS) $(LINK_OPT_FLAGS) $^ $(PERF_LINK_FLAGS) -o $@ + +perf: $(PERF_BINARY) ## Build and run all benchmarks, while also collecting libPFM -based CPU CYCLE counter statistics + # Must build google-benchmark with libPFM, follow https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7 + ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES diff --git a/tests/test.mk b/tests/test.mk new file mode 100644 index 0000000..a266ec3 --- /dev/null +++ b/tests/test.mk @@ -0,0 +1,91 @@ +ASAN_FLAGS := -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=address # From https://clang.llvm.org/docs/AddressSanitizer.html +DEBUG_ASAN_FLAGS := $(DEBUG_FLAGS) $(ASAN_FLAGS) +RELEASE_ASAN_FLAGS := -g $(RELEASE_FLAGS) $(ASAN_FLAGS) +UBSAN_FLAGS := -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize=undefined # From https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html +DEBUG_UBSAN_FLAGS := $(DEBUG_FLAGS) $(UBSAN_FLAGS) +RELEASE_UBSAN_FLAGS := -g $(RELEASE_FLAGS) $(UBSAN_FLAGS) + +TEST_BUILD_DIR := $(BUILD_DIR)/test +BENCHMARK_BUILD_DIR := $(BUILD_DIR)/benchmark +ASAN_BUILD_DIR := $(BUILD_DIR)/asan +DEBUG_ASAN_BUILD_DIR := $(ASAN_BUILD_DIR)/debug +RELEASE_ASAN_BUILD_DIR := $(ASAN_BUILD_DIR)/release +UBSAN_BUILD_DIR := $(BUILD_DIR)/ubsan +DEBUG_UBSAN_BUILD_DIR := $(UBSAN_BUILD_DIR)/debug +RELEASE_UBSAN_BUILD_DIR := $(UBSAN_BUILD_DIR)/release + +TEST_DIR := tests +TEST_SOURCES := $(wildcard $(TEST_DIR)/*.cpp) +TEST_HEADERS := $(wildcard $(TEST_DIR)/*.hpp) +TEST_OBJECTS := $(addprefix $(TEST_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +TEST_BINARY := $(TEST_BUILD_DIR)/test.out +TEST_LINK_FLAGS := -lgtest -lgtest_main +GTEST_PARALLEL := ./gtest-parallel/gtest-parallel +DEBUG_ASAN_TEST_OBJECTS := $(addprefix $(DEBUG_ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +RELEASE_ASAN_TEST_OBJECTS := $(addprefix $(RELEASE_ASAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +DEBUG_ASAN_TEST_BINARY := $(DEBUG_ASAN_BUILD_DIR)/test.out +RELEASE_ASAN_TEST_BINARY := $(RELEASE_ASAN_BUILD_DIR)/test.out +DEBUG_UBSAN_TEST_OBJECTS := $(addprefix $(DEBUG_UBSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +RELEASE_UBSAN_TEST_OBJECTS := $(addprefix $(RELEASE_UBSAN_BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SOURCES)))) +DEBUG_UBSAN_TEST_BINARY := $(DEBUG_UBSAN_BUILD_DIR)/test.out +RELEASE_UBSAN_TEST_BINARY := $(RELEASE_UBSAN_BUILD_DIR)/test.out + +$(DEBUG_ASAN_BUILD_DIR): + mkdir -p $@ + +$(RELEASE_ASAN_BUILD_DIR): + mkdir -p $@ + +$(DEBUG_UBSAN_BUILD_DIR): + mkdir -p $@ + +$(RELEASE_UBSAN_BUILD_DIR): + mkdir -p $@ + +$(TEST_BUILD_DIR): + mkdir -p $@ + +$(TEST_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(TEST_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(DEBUG_ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(DEBUG_ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(DEBUG_ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(RELEASE_ASAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(RELEASE_ASAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_ASAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(DEBUG_UBSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(DEBUG_UBSAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(DEBUG_UBSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(RELEASE_UBSAN_BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(RELEASE_UBSAN_BUILD_DIR) $(SHA3_INC_DIR) $(ASCON_INC_DIR) $(SUBTLE_INC_DIR) + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_UBSAN_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@ + +$(TEST_BINARY): $(TEST_OBJECTS) + $(CXX) $(RELEASE_FLAGS) $(LINK_OPT_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + +$(DEBUG_ASAN_TEST_BINARY): $(DEBUG_ASAN_TEST_OBJECTS) + $(CXX) $(DEBUG_ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + +$(RELEASE_ASAN_TEST_BINARY): $(RELEASE_ASAN_TEST_OBJECTS) + $(CXX) $(RELEASE_ASAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + +$(DEBUG_UBSAN_TEST_BINARY): $(DEBUG_UBSAN_TEST_OBJECTS) + $(CXX) $(DEBUG_UBSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + +$(RELEASE_UBSAN_TEST_BINARY): $(RELEASE_UBSAN_TEST_OBJECTS) + $(CXX) $(RELEASE_UBSAN_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@ + +test: $(TEST_BINARY) $(GTEST_PARALLEL) ## Build and run all tests in RELEASE mode + $(GTEST_PARALLEL) $< --print_test_times + +debug_asan_test: $(DEBUG_ASAN_TEST_BINARY) $(GTEST_PARALLEL) ## Build and run all tests in DEBUG mode, with Address Sanitizer + $(GTEST_PARALLEL) $< --print_test_times + +release_asan_test: $(RELEASE_ASAN_TEST_BINARY) $(GTEST_PARALLEL) ## Build and run all tests in RELEASE mode, with Address Sanitizer + $(GTEST_PARALLEL) $< --print_test_times + +debug_ubsan_test: $(DEBUG_UBSAN_TEST_BINARY) $(GTEST_PARALLEL) ## Build and run all tests in DEBUG mode, with Undefined Behavior Sanitizer + $(GTEST_PARALLEL) $< --print_test_times + +release_ubsan_test: $(RELEASE_UBSAN_TEST_BINARY) $(GTEST_PARALLEL) ## Build and run all tests in RELEASE mode, with Undefined Behavior Sanitizer + $(GTEST_PARALLEL) $< --print_test_times From 8437420c9e9d692aafe6f39ae0f6a88804d66a9a Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 20:41:20 +0400 Subject: [PATCH 04/14] Switch to using "RandomShake" as CSPRNG Signed-off-by: Anjan Roy --- Makefile | 12 +++-- benchmarks/bench_ml_kem_1024.cpp | 22 ++++---- benchmarks/bench_ml_kem_512.cpp | 22 ++++---- benchmarks/bench_ml_kem_768.cpp | 22 ++++---- include/ml_kem/internals/k_pke.hpp | 2 +- include/ml_kem/internals/math/field.hpp | 6 +-- include/ml_kem/internals/ml_kem.hpp | 6 +-- include/ml_kem/internals/poly/sampling.hpp | 4 +- include/ml_kem/internals/rng/prng.hpp | 59 ---------------------- tests/test_compression.cpp | 5 +- tests/test_field.cpp | 7 +-- tests/test_helper.hpp | 6 +-- tests/test_ml_kem_1024.cpp | 27 +++++----- tests/test_ml_kem_512.cpp | 27 +++++----- tests/test_ml_kem_768.cpp | 27 +++++----- tests/test_serialize.cpp | 5 +- 16 files changed, 105 insertions(+), 154 deletions(-) delete mode 100644 include/ml_kem/internals/rng/prng.hpp diff --git a/Makefile b/Makefile index b6d0a90..6675866 100644 --- a/Makefile +++ b/Makefile @@ -15,9 +15,10 @@ RELEASE_FLAGS := -O3 -march=native LINK_OPT_FLAGS := -flto I_FLAGS := -I ./include -SHA3_INC_DIR = ./sha3/include -SUBTLE_INC_DIR = ./subtle/include -DEP_IFLAGS = -I $(SHA3_INC_DIR) -I $(SUBTLE_INC_DIR) +SHA3_INC_DIR := ./sha3/include +SUBTLE_INC_DIR := ./subtle/include +RANDOMSHAKE_INC_DIR := ./RandomShake/include +DEP_IFLAGS := -I $(SHA3_INC_DIR) -I $(SUBTLE_INC_DIR) -I $(RANDOMSHAKE_INC_DIR) SRC_DIR := include ML_KEM_SOURCES := $(shell find $(SRC_DIR) -name '*.hpp') @@ -29,7 +30,10 @@ include benchmarks/bench.mk $(SUBTLE_INC_DIR): git submodule update --init subtle -$(SHA3_INC_DIR): $(SUBTLE_INC_DIR) +$(RANDOMSHAKE_INC_DIR): $(SUBTLE_INC_DIR) + git submodule update --init RandomShake + +$(SHA3_INC_DIR): $(RANDOMSHAKE_INC_DIR) git submodule update --init sha3 $(GTEST_PARALLEL): $(SHA3_INC_DIR) diff --git a/benchmarks/bench_ml_kem_1024.cpp b/benchmarks/bench_ml_kem_1024.cpp index 6b037a2..6658997 100644 --- a/benchmarks/bench_ml_kem_1024.cpp +++ b/benchmarks/bench_ml_kem_1024.cpp @@ -13,10 +13,10 @@ bench_ml_kem_1024_keygen(benchmark::State& state) std::array pubkey{}; std::array seckey{}; - ml_kem_prng::prng_t<256> prng{}; + randomshake::randomshake_t<256> csprng{}; - prng.read(seed_d); - prng.read(seed_z); + csprng.generate(seed_d); + csprng.generate(seed_z); for (auto _ : state) { ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey); @@ -45,11 +45,11 @@ bench_ml_kem_1024_encapsulate(benchmark::State& state) std::array cipher{}; std::array shared_secret{}; - ml_kem_prng::prng_t<256> prng{}; + randomshake::randomshake_t<256> csprng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey); @@ -84,11 +84,11 @@ bench_ml_kem_1024_decapsulate(benchmark::State& state) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<256> prng{}; + randomshake::randomshake_t<256> csprng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey); (void)ml_kem_1024::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); diff --git a/benchmarks/bench_ml_kem_512.cpp b/benchmarks/bench_ml_kem_512.cpp index d2ff488..7a56bb4 100644 --- a/benchmarks/bench_ml_kem_512.cpp +++ b/benchmarks/bench_ml_kem_512.cpp @@ -13,10 +13,10 @@ bench_ml_kem_512_keygen(benchmark::State& state) std::array pubkey{}; std::array seckey{}; - ml_kem_prng::prng_t<128> prng{}; + randomshake::randomshake_t<128> csprng{}; - prng.read(seed_d); - prng.read(seed_z); + csprng.generate(seed_d); + csprng.generate(seed_z); for (auto _ : state) { ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey); @@ -45,11 +45,11 @@ bench_ml_kem_512_encapsulate(benchmark::State& state) std::array cipher{}; std::array shared_secret{}; - ml_kem_prng::prng_t<128> prng{}; + randomshake::randomshake_t<128> csprng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey); @@ -84,11 +84,11 @@ bench_ml_kem_512_decapsulate(benchmark::State& state) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<128> prng{}; + randomshake::randomshake_t<128> csprng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey); (void)ml_kem_512::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); diff --git a/benchmarks/bench_ml_kem_768.cpp b/benchmarks/bench_ml_kem_768.cpp index 8411714..2767b1c 100644 --- a/benchmarks/bench_ml_kem_768.cpp +++ b/benchmarks/bench_ml_kem_768.cpp @@ -13,10 +13,10 @@ bench_ml_kem_768_keygen(benchmark::State& state) std::array pubkey{}; std::array seckey{}; - ml_kem_prng::prng_t<192> prng{}; + randomshake::randomshake_t<192> csprng{}; - prng.read(seed_d); - prng.read(seed_z); + csprng.generate(seed_d); + csprng.generate(seed_z); for (auto _ : state) { ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey); @@ -45,11 +45,11 @@ bench_ml_kem_768_encapsulate(benchmark::State& state) std::array cipher{}; std::array shared_secret{}; - ml_kem_prng::prng_t<192> prng{}; + randomshake::randomshake_t<192> csprng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey); @@ -84,11 +84,11 @@ bench_ml_kem_768_decapsulate(benchmark::State& state) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<192> prng{}; + randomshake::randomshake_t<192> csprng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey); (void)ml_kem_768::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); diff --git a/include/ml_kem/internals/k_pke.hpp b/include/ml_kem/internals/k_pke.hpp index e6b4b4b..363f999 100644 --- a/include/ml_kem/internals/k_pke.hpp +++ b/include/ml_kem/internals/k_pke.hpp @@ -4,7 +4,7 @@ #include "ml_kem/internals/poly/sampling.hpp" #include "ml_kem/internals/utility/params.hpp" #include "ml_kem/internals/utility/utils.hpp" -#include "sha3_512.hpp" +#include "sha3/sha3_512.hpp" // Public Key Encryption Scheme namespace k_pke { diff --git a/include/ml_kem/internals/math/field.hpp b/include/ml_kem/internals/math/field.hpp index 49a9c3b..254aa6a 100644 --- a/include/ml_kem/internals/math/field.hpp +++ b/include/ml_kem/internals/math/field.hpp @@ -1,6 +1,6 @@ #pragma once -#include "ml_kem/internals/rng/prng.hpp" #include "ml_kem/internals/utility/force_inline.hpp" +#include "randomshake/randomshake.hpp" #include #include @@ -117,10 +117,10 @@ struct zq_t // Samples a random Zq element, using pseudo random number generator. template - static forceinline zq_t random(ml_kem_prng::prng_t& prng) + static forceinline zq_t random(randomshake::randomshake_t& csprng) { uint16_t res = 0; - prng.read(std::span(reinterpret_cast(&res), sizeof(res))); + csprng.generate(std::span(reinterpret_cast(&res), sizeof(res))); return zq_t::from_non_reduced(static_cast(res)); } diff --git a/include/ml_kem/internals/ml_kem.hpp b/include/ml_kem/internals/ml_kem.hpp index 9e99752..8645890 100644 --- a/include/ml_kem/internals/ml_kem.hpp +++ b/include/ml_kem/internals/ml_kem.hpp @@ -1,9 +1,9 @@ #pragma once #include "k_pke.hpp" #include "ml_kem/internals/utility/utils.hpp" -#include "sha3_256.hpp" -#include "sha3_512.hpp" -#include "shake256.hpp" +#include "sha3/sha3_256.hpp" +#include "sha3/sha3_512.hpp" +#include "sha3/shake256.hpp" #include // Key Encapsulation Mechanism diff --git a/include/ml_kem/internals/poly/sampling.hpp b/include/ml_kem/internals/poly/sampling.hpp index 36d8426..1a53891 100644 --- a/include/ml_kem/internals/poly/sampling.hpp +++ b/include/ml_kem/internals/poly/sampling.hpp @@ -3,8 +3,8 @@ #include "ml_kem/internals/poly/ntt.hpp" #include "ml_kem/internals/utility/force_inline.hpp" #include "ml_kem/internals/utility/params.hpp" -#include "shake128.hpp" -#include "shake256.hpp" +#include "sha3/shake128.hpp" +#include "sha3/shake256.hpp" #include namespace ml_kem_utils { diff --git a/include/ml_kem/internals/rng/prng.hpp b/include/ml_kem/internals/rng/prng.hpp deleted file mode 100644 index 2be3c93..0000000 --- a/include/ml_kem/internals/rng/prng.hpp +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once -#include "ml_kem/internals/utility/force_inline.hpp" -#include "shake256.hpp" -#include -#include - -namespace ml_kem_prng { - -// Pseudo Random Number Generator s.t. N (>0) -many random bytes are read from SHAKE256 Xof state, arbitrary many times s.t. SHAKE256 state is obtained by -// -// - either hashing `bit_security_level / 8` -bytes sampled using std::random_device ( default ) -// - or hashing `bit_security_level / 8` -bytes supplied as argument ( explicit ) -// -// Note, std::random_device's behaviour is implementation defined feature, so this PRNG implementation doesn't guarantee that -// it'll generate cryptographic secure random bytes if you opt for using default constructor of this struct. -// -// I strongly suggest you read https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device before using default constructor. -// When using explicit constructor, it's your responsibility to supply `bit_security_level / 8` -many random seed bytes. -template - requires((bit_security_level == 128) || (bit_security_level == 192) || (bit_security_level == 256)) -struct prng_t -{ -private: - shake256::shake256_t state{}; - -public: - // Default constructor which seeds PRNG with system randomness. - forceinline prng_t() - { - std::array::digits> seed{}; - auto seed_span = std::span(seed); - - // Read more @ https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device - std::random_device rd{}; - - size_t off = 0; - while (off < seed_span.size()) { - const uint32_t v = rd(); - std::memcpy(seed_span.subspan(off, sizeof(v)).data(), &v, sizeof(v)); - - off += sizeof(v); - } - - state.absorb(seed_span); - state.finalize(); - } - - // Explicit constructor which can be used for seeding PRNG. - forceinline explicit constexpr prng_t(std::span::digits> seed) - { - state.absorb(seed); - state.finalize(); - } - - // Once PRNG is seeded i.e. PRNG object is constructed, you can request arbitrary many pseudo-random bytes from PRNG. - forceinline constexpr void read(std::span bytes) { state.squeeze(bytes); } -}; - -} diff --git a/tests/test_compression.cpp b/tests/test_compression.cpp index d017f29..e6ae3a3 100644 --- a/tests/test_compression.cpp +++ b/tests/test_compression.cpp @@ -1,5 +1,6 @@ #include "ml_kem/internals/poly/compression.hpp" #include "ml_kem/internals/utility/force_inline.hpp" +#include "randomshake/randomshake.hpp" #include // Decompression error that can happen for some given `d` s.t. @@ -36,10 +37,10 @@ test_zq_compression() requires(itr_cnt > 0) { bool res = true; - ml_kem_prng::prng_t<256> prng{}; + randomshake::randomshake_t<256> csprng{}; for (size_t i = 0; i < itr_cnt; i++) { - const auto a = ml_kem_field::zq_t::random(prng); + const auto a = ml_kem_field::zq_t::random(csprng); const auto b = ml_kem_utils::compress(a); const auto c = ml_kem_utils::decompress(b); diff --git a/tests/test_field.cpp b/tests/test_field.cpp index 9614c5c..0d06dcb 100644 --- a/tests/test_field.cpp +++ b/tests/test_field.cpp @@ -1,4 +1,5 @@ #include "ml_kem/internals/math/field.hpp" +#include "randomshake/randomshake.hpp" #include // Test functional correctness of ML-KEM prime field operations, by running through multiple rounds @@ -7,11 +8,11 @@ TEST(ML_KEM, ArithmeticOverZq) { constexpr size_t ITERATION_COUNT = 1ul << 20; - ml_kem_prng::prng_t<128> prng{}; + randomshake::randomshake_t<128> csprng{}; for (size_t i = 0; i < ITERATION_COUNT; i++) { - const auto a = ml_kem_field::zq_t::random(prng); - const auto b = ml_kem_field::zq_t::random(prng); + const auto a = ml_kem_field::zq_t::random(csprng); + const auto b = ml_kem_field::zq_t::random(csprng); // Addition, Subtraction and Negation const auto c = a + b; diff --git a/tests/test_helper.hpp b/tests/test_helper.hpp index 0589a9a..e7c4372 100644 --- a/tests/test_helper.hpp +++ b/tests/test_helper.hpp @@ -1,7 +1,7 @@ #pragma once #include "ml_kem/internals/math/field.hpp" -#include "ml_kem/internals/rng/prng.hpp" #include "ml_kem/internals/utility/force_inline.hpp" +#include "randomshake/randomshake.hpp" #include #include #include @@ -75,10 +75,10 @@ make_malformed_pubkey(std::span pubkey) // Given a ML-KEM-{512, 768, 1024} cipher text, this function flips a random bit of it, while sampling choice of random index from input PRNG. template static forceinline constexpr void -random_bitflip_in_cipher_text(std::span cipher, ml_kem_prng::prng_t& prng) +random_bitflip_in_cipher_text(std::span cipher, randomshake::randomshake_t& csprng) { size_t random_u64 = 0; - prng.read(std::span(reinterpret_cast(&random_u64), sizeof(random_u64))); + csprng.generate(std::span(reinterpret_cast(&random_u64), sizeof(random_u64))); const size_t random_byte_idx = random_u64 % cipher_byte_len; const size_t random_bit_idx = random_u64 % 8; diff --git a/tests/test_ml_kem_1024.cpp b/tests/test_ml_kem_1024.cpp index 4ec30ab..3951a2f 100644 --- a/tests/test_ml_kem_1024.cpp +++ b/tests/test_ml_kem_1024.cpp @@ -1,4 +1,5 @@ #include "ml_kem/ml_kem_1024.hpp" +#include "randomshake/randomshake.hpp" #include "test_helper.hpp" #include @@ -25,10 +26,10 @@ TEST(ML_KEM, ML_KEM_1024_KeygenEncapsDecaps) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<256> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<256> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey); const auto is_encapsulated = ml_kem_1024::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); @@ -55,10 +56,10 @@ TEST(ML_KEM, ML_KEM_1024_EncapsFailureDueToNonReducedPubKey) std::array shared_secret{}; - ml_kem_prng::prng_t<256> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<256> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey); @@ -89,15 +90,15 @@ TEST(ML_KEM, ML_KEM_1024_DecapsFailureDueToBitFlippedCipherText) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<256> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<256> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_1024::keygen(seed_d, seed_z, pubkey, seckey); const auto is_encapsulated = ml_kem_1024::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); - random_bitflip_in_cipher_text(cipher, prng); + random_bitflip_in_cipher_text(cipher, csprng); ml_kem_1024::decapsulate(seckey, cipher, shared_secret_receiver); EXPECT_TRUE(is_encapsulated); diff --git a/tests/test_ml_kem_512.cpp b/tests/test_ml_kem_512.cpp index a91b0b6..e9683a8 100644 --- a/tests/test_ml_kem_512.cpp +++ b/tests/test_ml_kem_512.cpp @@ -1,4 +1,5 @@ #include "ml_kem/ml_kem_512.hpp" +#include "randomshake/randomshake.hpp" #include "test_helper.hpp" #include #include @@ -26,10 +27,10 @@ TEST(ML_KEM, ML_KEM_512_KeygenEncapsDecaps) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<128> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<128> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey); const auto is_encapsulated = ml_kem_512::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); @@ -56,10 +57,10 @@ TEST(ML_KEM, ML_KEM_512_EncapsFailureDueToNonReducedPubKey) std::array shared_secret{}; - ml_kem_prng::prng_t<128> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<128> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey); @@ -90,15 +91,15 @@ TEST(ML_KEM, ML_KEM_512_DecapsFailureDueToBitFlippedCipherText) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<128> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<128> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_512::keygen(seed_d, seed_z, pubkey, seckey); const auto is_encapsulated = ml_kem_512::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); - random_bitflip_in_cipher_text(cipher, prng); + random_bitflip_in_cipher_text(cipher, csprng); ml_kem_512::decapsulate(seckey, cipher, shared_secret_receiver); EXPECT_TRUE(is_encapsulated); diff --git a/tests/test_ml_kem_768.cpp b/tests/test_ml_kem_768.cpp index e2ad5e7..464e827 100644 --- a/tests/test_ml_kem_768.cpp +++ b/tests/test_ml_kem_768.cpp @@ -1,4 +1,5 @@ #include "ml_kem/ml_kem_768.hpp" +#include "randomshake/randomshake.hpp" #include "test_helper.hpp" #include @@ -25,10 +26,10 @@ TEST(ML_KEM, ML_KEM_768_KeygenEncapsDecaps) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<192> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<192> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey); const auto is_encapsulated = ml_kem_768::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); @@ -55,10 +56,10 @@ TEST(ML_KEM, ML_KEM_768_EncapsFailureDueToNonReducedPubKey) std::array shared_secret{}; - ml_kem_prng::prng_t<192> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<192> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey); @@ -89,15 +90,15 @@ TEST(ML_KEM, ML_KEM_768_DecapsFailureDueToBitFlippedCipherText) std::array shared_secret_sender{}; std::array shared_secret_receiver{}; - ml_kem_prng::prng_t<192> prng{}; - prng.read(seed_d); - prng.read(seed_z); - prng.read(seed_m); + randomshake::randomshake_t<192> csprng{}; + csprng.generate(seed_d); + csprng.generate(seed_z); + csprng.generate(seed_m); ml_kem_768::keygen(seed_d, seed_z, pubkey, seckey); const auto is_encapsulated = ml_kem_768::encapsulate(seed_m, pubkey, cipher, shared_secret_sender); - random_bitflip_in_cipher_text(cipher, prng); + random_bitflip_in_cipher_text(cipher, csprng); ml_kem_768::decapsulate(seckey, cipher, shared_secret_receiver); EXPECT_TRUE(is_encapsulated); diff --git a/tests/test_serialize.cpp b/tests/test_serialize.cpp index 433a738..a6f9c43 100644 --- a/tests/test_serialize.cpp +++ b/tests/test_serialize.cpp @@ -1,4 +1,5 @@ #include "ml_kem/internals/poly/serialize.hpp" +#include "randomshake/randomshake.hpp" #include // Ensure that degree-255 polynomial serialization to byte array ( of length 32*l -bytes ) and deserialization of @@ -16,10 +17,10 @@ test_serialize_deserialize() std::vector dst(ml_kem_ntt::N); std::vector bytes(blen); - ml_kem_prng::prng_t<256> prng{}; + randomshake::randomshake_t<256> csprng{}; for (size_t i = 0; i < ml_kem_ntt::N; i++) { - src[i] = ml_kem_field::zq_t::random(prng); + src[i] = ml_kem_field::zq_t::random(csprng); } using poly_t = std::span; From 19512ecb53882ff7b24ec44257807eb5f953a2f6 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 22:19:09 +0400 Subject: [PATCH 05/14] Update example to use "RandomShake" as CSPRNG Signed-off-by: Anjan Roy --- examples/ml_kem_768.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/ml_kem_768.cpp b/examples/ml_kem_768.cpp index e9690ad..24d3ea8 100644 --- a/examples/ml_kem_768.cpp +++ b/examples/ml_kem_768.cpp @@ -1,4 +1,5 @@ #include "ml_kem/ml_kem_768.hpp" +#include "randomshake/randomshake.hpp" #include #include #include @@ -54,17 +55,17 @@ main() auto receiver_key_span = std::span(receiver_key); // Pseudo-randomness source - ml_kem_prng::prng_t<128> prng{}; + randomshake::randomshake_t<192> csprng{}; // Fill up seeds using PRNG - prng.read(d_span); - prng.read(z_span); + csprng.generate(d_span); + csprng.generate(z_span); // Generate a keypair ml_kem_768::keygen(d_span, z_span, pkey_span, skey_span); // Fill up seed required for key encapsulation, using PRNG - prng.read(m_span); + csprng.generate(m_span); // Encapsulate key, compute cipher text and obtain KDF const bool is_encapsulated = ml_kem_768::encapsulate(m_span, pkey_span, cipher_span, sender_key_span); From eb4a8ea335f1445f556a6a006bce545945723b8d Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 22:19:51 +0400 Subject: [PATCH 06/14] Run examples using Make command Signed-off-by: Anjan Roy --- Makefile | 1 + examples/example.mk | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 examples/example.mk diff --git a/Makefile b/Makefile index 6675866..c8c8d6d 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,7 @@ BUILD_DIR := build include tests/test.mk include benchmarks/bench.mk +include examples/example.mk $(SUBTLE_INC_DIR): git submodule update --init subtle diff --git a/examples/example.mk b/examples/example.mk new file mode 100644 index 0000000..2229d1d --- /dev/null +++ b/examples/example.mk @@ -0,0 +1,15 @@ +EXAMPLE_BUILD_DIR := $(BUILD_DIR)/example + +EXAMPLE_DIR := examples +EXAMPLE_SOURCES := $(wildcard $(EXAMPLE_DIR)/*.cpp) +EXAMPLE_HEADERS := $(wildcard $(EXAMPLE_DIR)/*.hpp) +EXAMPLE_EXECS := $(addprefix $(EXAMPLE_BUILD_DIR)/, $(notdir $(EXAMPLE_SOURCES:.cpp=.exe))) + +$(EXAMPLE_BUILD_DIR): + mkdir -p $@ + +$(EXAMPLE_BUILD_DIR)/%.exe: $(EXAMPLE_DIR)/%.cpp $(EXAMPLE_BUILD_DIR) + $(CXX) $(CXX_DEFS) $(CXX_FLAGS) $(WARN_FLAGS) $(RELEASE_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) $< -o $@ + +example: $(EXAMPLE_EXECS) ## Build and run example program, demonstrating usage of ML-KEM API + $(foreach exec,$^,./$(exec);) From 343a8009430c53c8b4d42d7f322b45623fd400a1 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 23:08:38 +0400 Subject: [PATCH 07/14] =?UTF-8?q?Refactor=20github=20actions=20file=20usin?= =?UTF-8?q?g=20AI=20=F0=9F=A4=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anjan Roy --- .github/workflows/test_ci.yml | 72 +++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index a5b1a3e..9e4b5d0 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -11,31 +11,53 @@ jobs: runs-on: ${{matrix.os}} strategy: matrix: - os: [ubuntu-24.04, macos-latest] + os: [ubuntu-latest, macos-latest] compiler: [g++, clang++] + build_type: [debug, release] + test_type: [standard, asan, ubsan] + max-parallel: 4 steps: - - uses: actions/checkout@v4 - - name: Setup Google-Test - run: | - pushd ~ - git clone https://github.com/google/googletest.git -b v1.15.0 - pushd googletest - mkdir build - pushd build - cmake .. -DBUILD_GMOCK=OFF - make - sudo make install - popd - popd - popd - - name: Execute Tests on ${{matrix.os}}, compiled with ${{matrix.compiler}} - run: CXX=${{matrix.compiler}} make -j - - name: Execute Tests with AddressSanitizer, in DEBUG mode, on ${{matrix.os}}, compiled with ${{matrix.compiler}} - run: CXX=${{matrix.compiler}} make debug_asan_test -j - - name: Execute Tests with AddressSanitizer, in RELEASE mode, on ${{matrix.os}}, compiled with ${{matrix.compiler}} - run: CXX=${{matrix.compiler}} make release_asan_test -j - - name: Execute Tests with UndefinedBehaviourSanitizer, in DEBUG mode, on ${{matrix.os}}, compiled with ${{matrix.compiler}} - run: CXX=${{matrix.compiler}} make debug_ubsan_test -j - - name: Execute Tests with UndefinedBehaviourSanitizer, in RELEASE mode, on ${{matrix.os}}, compiled with ${{matrix.compiler}} - run: CXX=${{matrix.compiler}} make release_ubsan_test -j + - uses: actions/checkout@v4 + + - name: Setup Google Test + uses: Bacondish2023/setup-googletest@v1 + with: + tag: v1.15.2 + + + - name: Build and Test (${{ matrix.compiler }}, ${{ matrix.build_type }}, ${{ matrix.test_type }}) + run: | + CXX=${{ matrix.compiler }} + if [[ ${{ matrix.test_type }} == "standard" ]]; then + make test -j 2>&1 | tee build.log + else + make ${{ matrix.build_type }}_${{ matrix.test_type }}_test -j 2>&1 | tee build.log + fi + if [ $? -ne 0 ]; then + echo "Build or Test Failed! See build.log for details." + exit 1 + fi + + - name: Upload Build Log + uses: actions/upload-artifact@v3 + with: + name: build-log-${{ matrix.compiler }}-${{ matrix.build_type }}-${{ matrix.test_type }} + path: build.log + + + - name: Run Examples + if: ${{ matrix.test_type == 'standard' && matrix.build_type == 'release' }} + run: | + CXX=${{ matrix.compiler }} make example -j 2>&1 | tee example.log + if [ $? -ne 0 ]; then + echo "Example execution Failed! See example.log for details." + exit 1 + fi + + - name: Upload Example Log (if failed) + if: ${{ steps.Run_Examples.outcome != 'success' && matrix.test_type == 'standard' && matrix.build_type == 'release' }} + uses: actions/upload-artifact@v3 + with: + name: example-log-${{ matrix.compiler }} + path: example.log From 274a25d8214b79a129f1c6c40122993ba4aea391 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 23:26:42 +0400 Subject: [PATCH 08/14] Shorter name of github actions test Signed-off-by: Anjan Roy --- .github/workflows/test_ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index 9e4b5d0..37497fc 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -1,4 +1,4 @@ -name: Test ML-KEM Key Encapsulation Mechanism (NIST FIPS 203) +name: Test ML-KEM i.e. NIST FIPS 203 on: push: From 7356a9ede415ecb3b1cd7f03277c813da7ec82b3 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Mon, 18 Nov 2024 23:27:35 +0400 Subject: [PATCH 09/14] Do not include header what is not used Signed-off-by: Anjan Roy --- include/ml_kem/internals/math/field.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/ml_kem/internals/math/field.hpp b/include/ml_kem/internals/math/field.hpp index 254aa6a..0dc974e 100644 --- a/include/ml_kem/internals/math/field.hpp +++ b/include/ml_kem/internals/math/field.hpp @@ -1,5 +1,4 @@ #pragma once -#include "ml_kem/internals/utility/force_inline.hpp" #include "randomshake/randomshake.hpp" #include #include From 5cf204e2ede4171a0e8e47e9c8a89994f3ab5251 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Tue, 19 Nov 2024 23:06:28 +0400 Subject: [PATCH 10/14] Refactor variable names to be more verbose Signed-off-by: Anjan Roy --- include/ml_kem/internals/k_pke.hpp | 51 ++++++++++++++--------------- include/ml_kem/internals/ml_kem.hpp | 22 ++++++------- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/include/ml_kem/internals/k_pke.hpp b/include/ml_kem/internals/k_pke.hpp index 363f999..dbb8e2a 100644 --- a/include/ml_kem/internals/k_pke.hpp +++ b/include/ml_kem/internals/k_pke.hpp @@ -55,12 +55,12 @@ keygen(std::span d, ml_kem_utils::matrix_multiply(A_prime, s, t_prime); ml_kem_utils::poly_vec_add_to(e, t_prime); - constexpr size_t pkoff = k * 12 * 32; - auto _pubkey0 = pubkey.template subspan<0, pkoff>(); - auto _pubkey1 = pubkey.template subspan(); + constexpr size_t pubkey_offset = k * 12 * 32; + auto encoded_t_prime_in_pubkey = pubkey.template subspan<0, pubkey_offset>(); + auto rho_in_pubkey = pubkey.template subspan(); - ml_kem_utils::poly_vec_encode(t_prime, _pubkey0); - std::copy(rho.begin(), rho.end(), _pubkey1.begin()); + ml_kem_utils::poly_vec_encode(t_prime, encoded_t_prime_in_pubkey); + std::copy(rho.begin(), rho.end(), rho_in_pubkey.begin()); ml_kem_utils::poly_vec_encode(s, seckey); } @@ -76,21 +76,21 @@ template encrypt(std::span pubkey, std::span msg, std::span rcoin, - std::span enc) + std::span ctxt) requires(ml_kem_params::check_encrypt_params(k, eta1, eta2, du, dv)) { constexpr size_t pkoff = k * 12 * 32; - auto _pubkey0 = pubkey.template subspan<0, pkoff>(); + auto encoded_t_prime_in_pubkey = pubkey.template subspan<0, pkoff>(); auto rho = pubkey.template subspan(); std::array t_prime{}; - std::array encoded_tprime{}; + std::array encoded_tprime{}; - ml_kem_utils::poly_vec_decode(_pubkey0, t_prime); + ml_kem_utils::poly_vec_decode(encoded_t_prime_in_pubkey, t_prime); ml_kem_utils::poly_vec_encode(t_prime, encoded_tprime); - using encoded_pkey_t = std::span; - const auto are_equal = ml_kem_utils::ct_memcmp(encoded_pkey_t(_pubkey0), encoded_pkey_t(encoded_tprime)); + using encoded_pkey_t = std::span; + const auto are_equal = ml_kem_utils::ct_memcmp(encoded_pkey_t(encoded_t_prime_in_pubkey), encoded_pkey_t(encoded_tprime)); if (are_equal == 0u) { // Got an invalid public key return false; @@ -131,15 +131,15 @@ encrypt(std::span pubkey ml_kem_utils::poly_decompress<1>(m); ml_kem_utils::poly_vec_add_to<1>(m, v); - constexpr size_t encoff = k * du * 32; - auto _enc0 = enc.template subspan<0, encoff>(); - auto _enc1 = enc.template subspan(); + constexpr size_t ctxt_offset = k * du * 32; + auto polyvec_u_in_ctxt = ctxt.template first(); + auto poly_v_in_ctxt = ctxt.template last(); ml_kem_utils::poly_vec_compress(u); - ml_kem_utils::poly_vec_encode(u, _enc0); + ml_kem_utils::poly_vec_encode(u, polyvec_u_in_ctxt); ml_kem_utils::poly_compress(v); - ml_kem_utils::encode(v, _enc1); + ml_kem_utils::encode(v, poly_v_in_ctxt); return true; } @@ -151,22 +151,21 @@ encrypt(std::span pubkey template constexpr void decrypt(std::span seckey, - std::span enc, - std::span dec) + std::span ctxt, + std::span ptxt) requires(ml_kem_params::check_decrypt_params(k, du, dv)) { - constexpr size_t encoff = k * du * 32; - auto _enc0 = enc.template subspan<0, encoff>(); - auto _enc1 = enc.template subspan(); + constexpr size_t ctxt_offset = k * du * 32; + auto polyvec_u_in_ctxt = ctxt.template subspan<0, ctxt_offset>(); + auto poly_v_in_ctxt = ctxt.template subspan(); std::array u{}; + std::array v{}; - ml_kem_utils::poly_vec_decode(_enc0, u); + ml_kem_utils::poly_vec_decode(polyvec_u_in_ctxt, u); ml_kem_utils::poly_vec_decompress(u); - std::array v{}; - - ml_kem_utils::decode(_enc1, v); + ml_kem_utils::decode(poly_v_in_ctxt, v); ml_kem_utils::poly_decompress(v); std::array s_prime{}; @@ -181,7 +180,7 @@ decrypt(std::span seckey ml_kem_utils::poly_vec_sub_from<1>(t, v); ml_kem_utils::poly_compress<1>(v); - ml_kem_utils::encode<1>(v, dec); + ml_kem_utils::encode<1>(v, ptxt); } } diff --git a/include/ml_kem/internals/ml_kem.hpp b/include/ml_kem/internals/ml_kem.hpp index 8645890..1bcafac 100644 --- a/include/ml_kem/internals/ml_kem.hpp +++ b/include/ml_kem/internals/ml_kem.hpp @@ -19,23 +19,23 @@ keygen(std::span d, // used in CPA-PKE std::span seckey) requires(ml_kem_params::check_keygen_params(k, eta1)) { - constexpr size_t skoff0 = k * 12 * 32; - constexpr size_t skoff1 = skoff0 + pubkey.size(); - constexpr size_t skoff2 = skoff1 + 32; + constexpr size_t seckey_offset_kpke_skey = k * 12 * 32; + constexpr size_t seckey_offset_kpke_pkey = seckey_offset_kpke_skey + pubkey.size(); + constexpr size_t seckey_offset_z = seckey_offset_kpke_pkey + 32; - auto _seckey0 = seckey.template subspan<0, skoff0>(); - auto _seckey1 = seckey.template subspan(); - auto _seckey2 = seckey.template subspan(); - auto _seckey3 = seckey.template subspan(); + auto kpke_skey_in_seckey = seckey.template subspan<0, seckey_offset_kpke_skey>(); + auto kpke_pkey_in_seckey = seckey.template subspan(); + auto kpke_pkey_digest_in_seckey = seckey.template subspan(); + auto z_in_seckey = seckey.template subspan(); - k_pke::keygen(d, pubkey, _seckey0); - std::copy(pubkey.begin(), pubkey.end(), _seckey1.begin()); - std::copy(z.begin(), z.end(), _seckey3.begin()); + k_pke::keygen(d, kpke_pkey_in_seckey, kpke_skey_in_seckey); + std::copy(kpke_pkey_in_seckey.begin(), kpke_pkey_in_seckey.end(), pubkey.begin()); + std::copy(z.begin(), z.end(), z_in_seckey.begin()); sha3_256::sha3_256_t hasher{}; hasher.absorb(pubkey); hasher.finalize(); - hasher.digest(_seckey2); + hasher.digest(kpke_pkey_digest_in_seckey); hasher.reset(); } From 8b538a8b9c0a44b1c3e0cc82a33547d445b46b2e Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Tue, 19 Nov 2024 23:16:24 +0400 Subject: [PATCH 11/14] Recursively clone all submodules inside "RandomShake" module Signed-off-by: Anjan Roy --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c8c8d6d..c9bf1e0 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ $(SUBTLE_INC_DIR): git submodule update --init subtle $(RANDOMSHAKE_INC_DIR): $(SUBTLE_INC_DIR) - git submodule update --init RandomShake + git submodule update --init --recursive RandomShake $(SHA3_INC_DIR): $(RANDOMSHAKE_INC_DIR) git submodule update --init sha3 From 551c27645a273d967a452a59dca9549c80869cfa Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Thu, 21 Nov 2024 19:01:28 +0400 Subject: [PATCH 12/14] Export benchmark results to JSON file Signed-off-by: Anjan Roy --- benchmarks/bench.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/bench.mk b/benchmarks/bench.mk index f5151d5..e07184a 100644 --- a/benchmarks/bench.mk +++ b/benchmarks/bench.mk @@ -19,11 +19,11 @@ $(BENCHMARK_BINARY): $(BENCHMARK_OBJECTS) benchmark: $(BENCHMARK_BINARY) ## Build and run all benchmarks, without libPFM -based CPU CYCLE counter statistics # Must *not* build google-benchmark with libPFM - ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true + ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_report_aggregates_only=true --benchmark_counters_tabular=true --benchmark_out_format=json --benchmark_out=$(BENCHMARK_OUT_FILE) $(PERF_BINARY): $(BENCHMARK_OBJECTS) $(CXX) $(RELEASE_FLAGS) $(LINK_OPT_FLAGS) $^ $(PERF_LINK_FLAGS) -o $@ perf: $(PERF_BINARY) ## Build and run all benchmarks, while also collecting libPFM -based CPU CYCLE counter statistics # Must build google-benchmark with libPFM, follow https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7 - ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES + ./$< --benchmark_time_unit=us --benchmark_min_warmup_time=.5 --benchmark_enable_random_interleaving=true --benchmark_repetitions=10 --benchmark_min_time=0.1s --benchmark_display_aggregates_only=true --benchmark_report_aggregates_only=true --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES --benchmark_out_format=json --benchmark_out=$(BENCHMARK_OUT_FILE) From a84b0664adc212c93a3d805e3cefa844ab060eb9 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Thu, 21 Nov 2024 19:02:05 +0400 Subject: [PATCH 13/14] Add benchmark results in JSON format for multiple targets Signed-off-by: Anjan Roy --- ...x_6.11.0-9-generic_x86_64_with_g++_14.json | 960 ++++++++++++++++++ ...6.6.51+rpt-rpi-v8_aarch64_with_g++_12.json | 954 +++++++++++++++++ ...ux_6.8.0-1016-aws_aarch64_with_g++_13.json | 906 +++++++++++++++++ 3 files changed, 2820 insertions(+) create mode 100644 bench_result_on_Linux_6.11.0-9-generic_x86_64_with_g++_14.json create mode 100644 bench_result_on_Linux_6.6.51+rpt-rpi-v8_aarch64_with_g++_12.json create mode 100644 bench_result_on_Linux_6.8.0-1016-aws_aarch64_with_g++_13.json diff --git a/bench_result_on_Linux_6.11.0-9-generic_x86_64_with_g++_14.json b/bench_result_on_Linux_6.11.0-9-generic_x86_64_with_g++_14.json new file mode 100644 index 0000000..30737de --- /dev/null +++ b/bench_result_on_Linux_6.11.0-9-generic_x86_64_with_g++_14.json @@ -0,0 +1,960 @@ +{ + "context": { + "date": "2024-11-21T19:00:17+04:00", + "host_name": "linux", + "executable": "./build/benchmark/perf.out", + "num_cpus": 16, + "mhz_per_cpu": 1403, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 49152, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 1310720, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 18874368, + "num_sharing": 16 + } + ], + "load_avg": [1.27002,1.01367,0.77832], + "library_version": "v1.9.0-8-g3fd1e6a7", + "library_build_type": "release", + "json_schema_version": 1 + }, + "benchmarks": [ + { + "name": "ml_kem_512/decap_mean", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.9056035692394143e+01, + "cpu_time": 1.9051925410978033e+01, + "time_unit": "us", + "CYCLES": 8.5872433324045691e+04, + "items_per_second": 5.2518847147942317e+04 + }, + { + "name": "ml_kem_512/decap_median", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.8822701727495350e+01, + "cpu_time": 1.8822634159933330e+01, + "time_unit": "us", + "CYCLES": 8.5742425954304810e+04, + "items_per_second": 5.3127627241245522e+04 + }, + { + "name": "ml_kem_512/decap_stddev", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.9474361245418547e-01, + "cpu_time": 4.9037416970198039e-01, + "time_unit": "us", + "CYCLES": 4.1099110775527362e+02, + "items_per_second": 1.3259886319894383e+03 + }, + { + "name": "ml_kem_512/decap_cv", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.5962567474181058e-02, + "cpu_time": 2.5738824770929390e-02, + "time_unit": "us", + "CYCLES": 4.7860657005533973e-03, + "items_per_second": 2.5247862510275807e-02 + }, + { + "name": "ml_kem_512/decap_min", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.8567303148504870e+01, + "cpu_time": 1.8567269713012177e+01, + "time_unit": "us", + "CYCLES": 8.5404129701866812e+04, + "items_per_second": 5.0286890986553604e+04 + }, + { + "name": "ml_kem_512/decap_max", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.9886104625248368e+01, + "cpu_time": 1.9885898300362090e+01, + "time_unit": "us", + "CYCLES": 8.6924946224575091e+04, + "items_per_second": 5.3858214775605236e+04 + }, + { + "name": "ml_kem_1024/keygen_mean", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.8248182262563155e+01, + "cpu_time": 3.8245924581005390e+01, + "time_unit": "us", + "CYCLES": 1.6822762935754191e+05, + "items_per_second": 2.6163669376711958e+04 + }, + { + "name": "ml_kem_1024/keygen_median", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.8812621368732806e+01, + "cpu_time": 3.8812553351955117e+01, + "time_unit": "us", + "CYCLES": 1.6825550279329609e+05, + "items_per_second": 2.5765123887739464e+04 + }, + { + "name": "ml_kem_1024/keygen_stddev", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.0252546605144910e+00, + "cpu_time": 1.0244522001763769e+00, + "time_unit": "us", + "CYCLES": 3.9831909047284807e+02, + "items_per_second": 7.0908840142801978e+02 + }, + { + "name": "ml_kem_1024/keygen_cv", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.6805317269103211e-02, + "cpu_time": 2.6785918013475480e-02, + "time_unit": "us", + "CYCLES": 2.3677388309757503e-03, + "items_per_second": 2.7102024231324864e-02 + }, + { + "name": "ml_kem_1024/keygen_min", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.6637519553053167e+01, + "cpu_time": 3.6637954189943763e+01, + "time_unit": "us", + "CYCLES": 1.6738924776536314e+05, + "items_per_second": 2.5410942857702819e+04 + }, + { + "name": "ml_kem_1024/keygen_max", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.9359387709503622e+01, + "cpu_time": 3.9353124581005858e+01, + "time_unit": "us", + "CYCLES": 1.6875812262569834e+05, + "items_per_second": 2.7294100396972379e+04 + }, + { + "name": "ml_kem_1024/decap_mean", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.5697936916511843e+01, + "cpu_time": 4.5690933070348507e+01, + "time_unit": "us", + "CYCLES": 2.0487672199211048e+05, + "items_per_second": 2.1900067926367228e+04 + }, + { + "name": "ml_kem_1024/decap_median", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.5332901709422870e+01, + "cpu_time": 4.5321123438527493e+01, + "time_unit": "us", + "CYCLES": 2.0481921811308351e+05, + "items_per_second": 2.2064773629203730e+04 + }, + { + "name": "ml_kem_1024/decap_stddev", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.2228308459963930e+00, + "cpu_time": 1.2187072244981885e+00, + "time_unit": "us", + "CYCLES": 4.3704066116916107e+02, + "items_per_second": 5.7862047530193854e+02 + }, + { + "name": "ml_kem_1024/decap_cv", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.6758994574097559e-02, + "cpu_time": 2.6672846068207745e-02, + "time_unit": "us", + "CYCLES": 2.1331884702157178e-03, + "items_per_second": 2.6420944320692790e-02 + }, + { + "name": "ml_kem_1024/decap_min", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.4101843195287664e+01, + "cpu_time": 4.4098349769888024e+01, + "time_unit": "us", + "CYCLES": 2.0428630867850099e+05, + "items_per_second": 2.1033180526792639e+04 + }, + { + "name": "ml_kem_1024/decap_max", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.7550309993412569e+01, + "cpu_time": 4.7543927021696632e+01, + "time_unit": "us", + "CYCLES": 2.0588643491124260e+05, + "items_per_second": 2.2676585523452781e+04 + }, + { + "name": "ml_kem_768/encap_mean", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.7442002511282329e+01, + "cpu_time": 2.7440178889543024e+01, + "time_unit": "us", + "CYCLES": 1.1990588487345495e+05, + "items_per_second": 3.6461840879822026e+04 + }, + { + "name": "ml_kem_768/encap_median", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.7666632038447592e+01, + "cpu_time": 2.7666029134785344e+01, + "time_unit": "us", + "CYCLES": 1.2041895830880910e+05, + "items_per_second": 3.6145413865879280e+04 + }, + { + "name": "ml_kem_768/encap_stddev", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.5576821624323978e-01, + "cpu_time": 6.5495692830818764e-01, + "time_unit": "us", + "CYCLES": 1.6088901440441800e+03, + "items_per_second": 8.8133514852984320e+02 + }, + { + "name": "ml_kem_768/encap_cv", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.3896514693985304e-02, + "cpu_time": 2.3868537116490170e-02, + "time_unit": "us", + "CYCLES": 1.3417941460856187e-02, + "items_per_second": 2.4171438612622925e-02 + }, + { + "name": "ml_kem_768/encap_min", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.6362878163625787e+01, + "cpu_time": 2.6361235628801399e+01, + "time_unit": "us", + "CYCLES": 1.1544373454973513e+05, + "items_per_second": 3.5507013436273264e+04 + }, + { + "name": "ml_kem_768/encap_max", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.8165765744541243e+01, + "cpu_time": 2.8163450068667839e+01, + "time_unit": "us", + "CYCLES": 1.2095648616833432e+05, + "items_per_second": 3.7934488886683052e+04 + }, + { + "name": "ml_kem_512/encap_mean", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.6417187906248760e+01, + "cpu_time": 1.6416831666062627e+01, + "time_unit": "us", + "CYCLES": 7.2548732076839442e+04, + "items_per_second": 6.0954975882310187e+04 + }, + { + "name": "ml_kem_512/encap_median", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.6467435302646741e+01, + "cpu_time": 1.6466332306391255e+01, + "time_unit": "us", + "CYCLES": 7.2429420140147398e+04, + "items_per_second": 6.0752801944241044e+04 + }, + { + "name": "ml_kem_512/encap_stddev", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.5266270600992586e-01, + "cpu_time": 4.5243198570675630e-01, + "time_unit": "us", + "CYCLES": 4.3162826710344098e+02, + "items_per_second": 1.6887868610203682e+03 + }, + { + "name": "ml_kem_512/encap_cv", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.7572487358668289e-02, + "cpu_time": 2.7559031785776143e-02, + "time_unit": "us", + "CYCLES": 5.9494942881466919e-03, + "items_per_second": 2.7705479931302426e-02 + }, + { + "name": "ml_kem_512/encap_min", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5721185695308174e+01, + "cpu_time": 1.5721903950706764e+01, + "time_unit": "us", + "CYCLES": 7.2291895131086145e+04, + "items_per_second": 5.9199678706742510e+04 + }, + { + "name": "ml_kem_512/encap_max", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.6891218919892513e+01, + "cpu_time": 1.6891983568926118e+01, + "time_unit": "us", + "CYCLES": 7.3751330675365476e+04, + "items_per_second": 6.3605527875969870e+04 + }, + { + "name": "ml_kem_1024/encap_mean", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.1782597192545246e+01, + "cpu_time": 4.1779886878242287e+01, + "time_unit": "us", + "CYCLES": 1.8480367635032043e+05, + "items_per_second": 2.3945672712881264e+04 + }, + { + "name": "ml_kem_1024/encap_median", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.1596516325886192e+01, + "cpu_time": 4.1596384650595574e+01, + "time_unit": "us", + "CYCLES": 1.8469014144034177e+05, + "items_per_second": 2.4040582191149952e+04 + }, + { + "name": "ml_kem_1024/encap_stddev", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 9.3124758712223143e-01, + "cpu_time": 9.3051675859502458e-01, + "time_unit": "us", + "CYCLES": 6.3379062984994414e+02, + "items_per_second": 5.3446969697312966e+02 + }, + { + "name": "ml_kem_1024/encap_cv", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.2287929657191886e-02, + "cpu_time": 2.2271883150540791e-02, + "time_unit": "us", + "CYCLES": 3.4295347493439906e-03, + "items_per_second": 2.2320095299958671e-02 + }, + { + "name": "ml_kem_1024/encap_min", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.0196967653326972e+01, + "cpu_time": 4.0197815990234282e+01, + "time_unit": "us", + "CYCLES": 1.8416063381141287e+05, + "items_per_second": 2.3189508655850241e+04 + }, + { + "name": "ml_kem_1024/encap_max", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.3125853524538904e+01, + "cpu_time": 4.3122949038755095e+01, + "time_unit": "us", + "CYCLES": 1.8579436710405859e+05, + "items_per_second": 2.4876973421713792e+04 + }, + { + "name": "ml_kem_768/decap_mean", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.0688348941440825e+01, + "cpu_time": 3.0683795495495566e+01, + "time_unit": "us", + "CYCLES": 1.3644561894144147e+05, + "items_per_second": 3.2635673400427855e+04 + }, + { + "name": "ml_kem_768/decap_median", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.0147188626119817e+01, + "cpu_time": 3.0146387387387598e+01, + "time_unit": "us", + "CYCLES": 1.3693468029279279e+05, + "items_per_second": 3.3172342374712010e+04 + }, + { + "name": "ml_kem_768/decap_stddev", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.2209738196918010e+00, + "cpu_time": 1.2187185673395402e+00, + "time_unit": "us", + "CYCLES": 1.6363534801009887e+03, + "items_per_second": 1.2644913468133436e+03 + }, + { + "name": "ml_kem_768/decap_cv", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 3.9786233597045251e-02, + "cpu_time": 3.9718638051751139e-02, + "time_unit": "us", + "CYCLES": 1.1992715433415744e-02, + "items_per_second": 3.8745679652400437e-02 + }, + { + "name": "ml_kem_768/decap_min", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.9513757882881208e+01, + "cpu_time": 2.9505346171171247e+01, + "time_unit": "us", + "CYCLES": 1.3215157702702703e+05, + "items_per_second": 3.0263112200522053e+04 + }, + { + "name": "ml_kem_768/decap_max", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.3057962837846887e+01, + "cpu_time": 3.3043528153153709e+01, + "time_unit": "us", + "CYCLES": 1.3763260855855857e+05, + "items_per_second": 3.3892162938832720e+04 + }, + { + "name": "ml_kem_512/keygen_mean", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.4658420033599558e+01, + "cpu_time": 1.4653819424611498e+01, + "time_unit": "us", + "CYCLES": 6.3305974506509869e+04, + "items_per_second": 6.8264086603983742e+04 + }, + { + "name": "ml_kem_512/keygen_median", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.4719643217137294e+01, + "cpu_time": 1.4717921094078179e+01, + "time_unit": "us", + "CYCLES": 6.3219567093658130e+04, + "items_per_second": 6.7944679239622667e+04 + }, + { + "name": "ml_kem_512/keygen_stddev", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.8392657487978962e-01, + "cpu_time": 2.7817695187949593e-01, + "time_unit": "us", + "CYCLES": 4.5108870359842018e+02, + "items_per_second": 1.3168907322586840e+03 + }, + { + "name": "ml_kem_512/keygen_cv", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 1.9369521014473749e-02, + "cpu_time": 1.8983238691497043e-02, + "time_unit": "us", + "CYCLES": 7.1255313122465863e-03, + "items_per_second": 1.9291120672254525e-02 + }, + { + "name": "ml_kem_512/keygen_min", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.4047684901310870e+01, + "cpu_time": 1.4047831268374377e+01, + "time_unit": "us", + "CYCLES": 6.2886353842923141e+04, + "items_per_second": 6.6351890899656923e+04 + }, + { + "name": "ml_kem_512/keygen_max", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5103418731615951e+01, + "cpu_time": 1.5071160541789032e+01, + "time_unit": "us", + "CYCLES": 6.4450741810163796e+04, + "items_per_second": 7.1185365263553642e+04 + }, + { + "name": "ml_kem_768/keygen_mean", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.4042544294713974e+01, + "cpu_time": 2.4039241809590457e+01, + "time_unit": "us", + "CYCLES": 1.0601569814490723e+05, + "items_per_second": 4.1624049321968640e+04 + }, + { + "name": "ml_kem_768/keygen_median", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3944362705631569e+01, + "cpu_time": 2.3942632044102186e+01, + "time_unit": "us", + "CYCLES": 1.0573963423171159e+05, + "items_per_second": 4.1766550538220166e+04 + }, + { + "name": "ml_kem_768/keygen_stddev", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.3294218079417042e-01, + "cpu_time": 6.2810683199751494e-01, + "time_unit": "us", + "CYCLES": 6.3115032536650551e+02, + "items_per_second": 1.0803917054075691e+03 + }, + { + "name": "ml_kem_768/keygen_cv", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.6325923456168070e-02, + "cpu_time": 2.6128396102198682e-02, + "time_unit": "us", + "CYCLES": 5.9533666844679891e-03, + "items_per_second": 2.5955949096892701e-02 + }, + { + "name": "ml_kem_768/keygen_min", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3207089254451922e+01, + "cpu_time": 2.3206509450472510e+01, + "time_unit": "us", + "CYCLES": 1.0557434476723836e+05, + "items_per_second": 3.9797209744772139e+04 + }, + { + "name": "ml_kem_768/keygen_max", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.5150476023785917e+01, + "cpu_time": 2.5127389744487363e+01, + "time_unit": "us", + "CYCLES": 1.0736027196359818e+05, + "items_per_second": 4.3091357712981648e+04 + } + ] +} diff --git a/bench_result_on_Linux_6.6.51+rpt-rpi-v8_aarch64_with_g++_12.json b/bench_result_on_Linux_6.6.51+rpt-rpi-v8_aarch64_with_g++_12.json new file mode 100644 index 0000000..9e7ce1c --- /dev/null +++ b/bench_result_on_Linux_6.6.51+rpt-rpi-v8_aarch64_with_g++_12.json @@ -0,0 +1,954 @@ +{ + "context": { + "date": "2024-11-21T19:25:45+04:00", + "host_name": "rpi", + "executable": "./build/benchmark/perf.out", + "num_cpus": 4, + "mhz_per_cpu": 1800, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 32768, + "num_sharing": 1 + }, + { + "type": "Instruction", + "level": 1, + "size": 49152, + "num_sharing": 1 + }, + { + "type": "Unified", + "level": 2, + "size": 1048576, + "num_sharing": 4 + } + ], + "load_avg": [1.54688,1.86426,1.10059], + "library_version": "v1.9.0-20-g50ffd3e5", + "library_build_type": "release", + "json_schema_version": 1 + }, + "benchmarks": [ + { + "name": "ml_kem_768/encap_mean", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.3404641412102154e+02, + "cpu_time": 1.3403692833813599e+02, + "time_unit": "us", + "CYCLES": 2.4098624217098943e+05, + "items_per_second": 7.4607340452455219e+03 + }, + { + "name": "ml_kem_768/encap_median", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.3382327329492463e+02, + "cpu_time": 1.3382489241114283e+02, + "time_unit": "us", + "CYCLES": 2.4062865850144092e+05, + "items_per_second": 7.4724516637176766e+03 + }, + { + "name": "ml_kem_768/encap_stddev", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 5.2288878260861837e-01, + "cpu_time": 5.2665154518222057e-01, + "time_unit": "us", + "CYCLES": 9.5527555842670529e+02, + "items_per_second": 2.9272150510010668e+01 + }, + { + "name": "ml_kem_768/encap_cv", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 3.9008039568782277e-03, + "cpu_time": 3.9291525978097062e-03, + "time_unit": "us", + "CYCLES": 3.9640252896632122e-03, + "items_per_second": 3.9234947034018502e-03 + }, + { + "name": "ml_kem_768/encap_min", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.3343640249756700e+02, + "cpu_time": 1.3344348318924094e+02, + "time_unit": "us", + "CYCLES": 2.3993788472622479e+05, + "items_per_second": 7.4207168050308874e+03 + }, + { + "name": "ml_kem_768/encap_max", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.3475015081645967e+02, + "cpu_time": 1.3475787127761680e+02, + "time_unit": "us", + "CYCLES": 2.4229169548511048e+05, + "items_per_second": 7.4938091849855618e+03 + }, + { + "name": "ml_kem_512/encap_mean", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 8.0896293125362689e+01, + "cpu_time": 8.0892826285383919e+01, + "time_unit": "us", + "CYCLES": 1.4543395753899481e+05, + "items_per_second": 1.2362292357161577e+04 + }, + { + "name": "ml_kem_512/encap_median", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 8.0874494511860533e+01, + "cpu_time": 8.0879217504332757e+01, + "time_unit": "us", + "CYCLES": 1.4541796995956096e+05, + "items_per_second": 1.2364116069563095e+04 + }, + { + "name": "ml_kem_512/encap_stddev", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.8834014465226396e-01, + "cpu_time": 3.8956943944875400e-01, + "time_unit": "us", + "CYCLES": 7.0538445404976494e+02, + "items_per_second": 5.9235135122240138e+01 + }, + { + "name": "ml_kem_512/encap_cv", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 4.8004689664885424e-03, + "cpu_time": 4.8158712872063804e-03, + "time_unit": "us", + "CYCLES": 4.8502046288648381e-03, + "items_per_second": 4.7915979828712539e-03 + }, + { + "name": "ml_kem_512/encap_min", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 8.0327904101678286e+01, + "cpu_time": 8.0332220103985989e+01, + "time_unit": "us", + "CYCLES": 1.4444163662622761e+05, + "items_per_second": 1.2224330144313417e+04 + }, + { + "name": "ml_kem_512/encap_max", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 8.1799739456960864e+01, + "cpu_time": 8.1804073367994377e+01, + "time_unit": "us", + "CYCLES": 1.4708832466782207e+05, + "items_per_second": 1.2448305284051039e+04 + }, + { + "name": "ml_kem_1024/keygen_mean", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.8175297196871824e+02, + "cpu_time": 1.8173309361147403e+02, + "time_unit": "us", + "CYCLES": 3.2674690312907437e+05, + "items_per_second": 5.5026451758537351e+03 + }, + { + "name": "ml_kem_1024/keygen_median", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.8166920730121859e+02, + "cpu_time": 1.8165013820078289e+02, + "time_unit": "us", + "CYCLES": 3.2658016101694916e+05, + "items_per_second": 5.5050880675551007e+03 + }, + { + "name": "ml_kem_1024/keygen_stddev", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 7.0887129976999019e-01, + "cpu_time": 6.8428935661652235e-01, + "time_unit": "us", + "CYCLES": 1.2102863466062320e+03, + "items_per_second": 2.0671859679678491e+01 + }, + { + "name": "ml_kem_1024/keygen_cv", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 3.9001909684975877e-03, + "cpu_time": 3.7653535909065632e-03, + "time_unit": "us", + "CYCLES": 3.7040484087714040e-03, + "items_per_second": 3.7567131841226256e-03 + }, + { + "name": "ml_kem_1024/keygen_min", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.8070142633631232e+02, + "cpu_time": 1.8071179139504801e+02, + "time_unit": "us", + "CYCLES": 3.2495039374185138e+05, + "items_per_second": 5.4608432300911718e+03 + }, + { + "name": "ml_kem_1024/keygen_max", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.8322365580189367e+02, + "cpu_time": 1.8312190221642828e+02, + "time_unit": "us", + "CYCLES": 3.2917317601043027e+05, + "items_per_second": 5.5336732167849159e+03 + }, + { + "name": "ml_kem_1024/encap_mean", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0687474874814711e+02, + "cpu_time": 2.0687071885125198e+02, + "time_unit": "us", + "CYCLES": 3.7194387982326956e+05, + "items_per_second": 4.8339474648620117e+03 + }, + { + "name": "ml_kem_1024/encap_median", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0685270250371792e+02, + "cpu_time": 2.0686522974963174e+02, + "time_unit": "us", + "CYCLES": 3.7192876951399120e+05, + "items_per_second": 4.8340652325349092e+03 + }, + { + "name": "ml_kem_1024/encap_stddev", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.2902330418860959e-01, + "cpu_time": 3.2246164627799734e-01, + "time_unit": "us", + "CYCLES": 5.6889591405049134e+02, + "items_per_second": 7.5356027327796227e+00 + }, + { + "name": "ml_kem_1024/encap_cv", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 1.5904469065442504e-03, + "cpu_time": 1.5587592486197125e-03, + "time_unit": "us", + "CYCLES": 1.5295208361024901e-03, + "items_per_second": 1.5588921450958987e-03 + }, + { + "name": "ml_kem_1024/encap_min", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0630859204702148e+02, + "cpu_time": 2.0631925920471176e+02, + "time_unit": "us", + "CYCLES": 3.7098637555228279e+05, + "items_per_second": 4.8226854806611655e+03 + }, + { + "name": "ml_kem_1024/encap_max", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0742703387338526e+02, + "cpu_time": 2.0735335198821736e+02, + "time_unit": "us", + "CYCLES": 3.7278725184094254e+05, + "items_per_second": 4.8468572631302022e+03 + }, + { + "name": "ml_kem_768/decap_mean", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5776309437570859e+02, + "cpu_time": 1.5776089010123695e+02, + "time_unit": "us", + "CYCLES": 2.8363557075365580e+05, + "items_per_second": 6.3387685462386671e+03 + }, + { + "name": "ml_kem_768/decap_median", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5761746231724106e+02, + "cpu_time": 1.5760998650168693e+02, + "time_unit": "us", + "CYCLES": 2.8330635433070868e+05, + "items_per_second": 6.3447786980162718e+03 + }, + { + "name": "ml_kem_768/decap_stddev", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 5.2176485524750216e-01, + "cpu_time": 5.2122026082970596e-01, + "time_unit": "us", + "CYCLES": 9.4506083424805570e+02, + "items_per_second": 2.0823039836254363e+01 + }, + { + "name": "ml_kem_768/decap_cv", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 3.3072681371533776e-03, + "cpu_time": 3.3038623228813747e-03, + "time_unit": "us", + "CYCLES": 3.3319545631632476e-03, + "items_per_second": 3.2850292110145670e-03 + }, + { + "name": "ml_kem_768/decap_min", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5736764791902002e+02, + "cpu_time": 1.5737591113610623e+02, + "time_unit": "us", + "CYCLES": 2.8296523397075367e+05, + "items_per_second": 6.2855638119630275e+03 + }, + { + "name": "ml_kem_768/decap_max", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5908670416194462e+02, + "cpu_time": 1.5909471766029091e+02, + "time_unit": "us", + "CYCLES": 2.8606102924634423e+05, + "items_per_second": 6.3542126160283333e+03 + }, + { + "name": "ml_kem_512/decap_mean", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 9.8733070467416212e+01, + "cpu_time": 9.8729313172804169e+01, + "time_unit": "us", + "CYCLES": 1.7750099652974505e+05, + "items_per_second": 1.0128771771885522e+04 + }, + { + "name": "ml_kem_512/decap_median", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 9.8684486189801163e+01, + "cpu_time": 9.8677866501416077e+01, + "time_unit": "us", + "CYCLES": 1.7740470148725214e+05, + "items_per_second": 1.0133995595985463e+04 + }, + { + "name": "ml_kem_512/decap_stddev", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.6782010695050318e-01, + "cpu_time": 2.6915369267886885e-01, + "time_unit": "us", + "CYCLES": 4.8629674066785117e+02, + "items_per_second": 2.7577547229884587e+01 + }, + { + "name": "ml_kem_512/decap_cv", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.7125673868198890e-03, + "cpu_time": 2.7261781129559151e-03, + "time_unit": "us", + "CYCLES": 2.7396845661447266e-03, + "items_per_second": 2.7226941085227835e-03 + }, + { + "name": "ml_kem_512/decap_min", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 9.8398354107663735e+01, + "cpu_time": 9.8403276203965575e+01, + "time_unit": "us", + "CYCLES": 1.7691263456090650e+05, + "items_per_second": 1.0085470217335074e+04 + }, + { + "name": "ml_kem_512/decap_max", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 9.9146844192633878e+01, + "cpu_time": 9.9152541076486798e+01, + "time_unit": "us", + "CYCLES": 1.7827090651558075e+05, + "items_per_second": 1.0162263275942645e+04 + }, + { + "name": "ml_kem_1024/decap_mean", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3600962347975263e+02, + "cpu_time": 2.3600485084459348e+02, + "time_unit": "us", + "CYCLES": 4.2432703800675675e+05, + "items_per_second": 4.2372149223854176e+03 + }, + { + "name": "ml_kem_1024/decap_median", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3610238344596650e+02, + "cpu_time": 2.3611539611486387e+02, + "time_unit": "us", + "CYCLES": 4.2451465962837834e+05, + "items_per_second": 4.2352172908413213e+03 + }, + { + "name": "ml_kem_1024/decap_stddev", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.4285261759833511e-01, + "cpu_time": 4.5016128152870249e-01, + "time_unit": "us", + "CYCLES": 8.1246182438876599e+02, + "items_per_second": 8.0854064963282664e+00 + }, + { + "name": "ml_kem_1024/decap_cv", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 1.8764176268275246e-03, + "cpu_time": 1.9074238513221435e-03, + "time_unit": "us", + "CYCLES": 1.9147067040677921e-03, + "items_per_second": 1.9081889034263193e-03 + }, + { + "name": "ml_kem_1024/decap_min", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3525970101368824e+02, + "cpu_time": 2.3521577702702416e+02, + "time_unit": "us", + "CYCLES": 4.2288322972972970e+05, + "items_per_second": 4.2229988799066541e+03 + }, + { + "name": "ml_kem_1024/decap_max", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3678645101353067e+02, + "cpu_time": 2.3679854729729513e+02, + "time_unit": "us", + "CYCLES": 4.2577192736486485e+05, + "items_per_second": 4.2514154987363336e+03 + }, + { + "name": "ml_kem_768/keygen_mean", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.1448031302211962e+02, + "cpu_time": 1.1447081883701864e+02, + "time_unit": "us", + "CYCLES": 2.0582629795249796e+05, + "items_per_second": 8.7359725700562067e+03 + }, + { + "name": "ml_kem_768/keygen_median", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.1437795782143795e+02, + "cpu_time": 1.1434726330876353e+02, + "time_unit": "us", + "CYCLES": 2.0558797133497131e+05, + "items_per_second": 8.7452929481220781e+03 + }, + { + "name": "ml_kem_768/keygen_stddev", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.4643159065666449e-01, + "cpu_time": 4.5099360704221664e-01, + "time_unit": "us", + "CYCLES": 8.1647008728204253e+02, + "items_per_second": 3.4328094454165168e+01 + }, + { + "name": "ml_kem_768/keygen_cv", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 3.8996363555575367e-03, + "cpu_time": 3.9398128852763132e-03, + "time_unit": "us", + "CYCLES": 3.9667918793859528e-03, + "items_per_second": 3.9295103297175647e-03 + }, + { + "name": "ml_kem_768/keygen_min", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.1381537674040567e+02, + "cpu_time": 1.1382141195741215e+02, + "time_unit": "us", + "CYCLES": 2.0467830794430795e+05, + "items_per_second": 8.6650564375493268e+03 + }, + { + "name": "ml_kem_768/keygen_max", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.1540001392301727e+02, + "cpu_time": 1.1540605733005734e+02, + "time_unit": "us", + "CYCLES": 2.0753400982800982e+05, + "items_per_second": 8.7856931556442451e+03 + }, + { + "name": "ml_kem_512/keygen_mean", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.6042045164313066e+01, + "cpu_time": 6.6037697417840135e+01, + "time_unit": "us", + "CYCLES": 1.1873790173708921e+05, + "items_per_second": 1.5142934042816903e+04 + }, + { + "name": "ml_kem_512/keygen_median", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.6011878638492206e+01, + "cpu_time": 6.6015505399060658e+01, + "time_unit": "us", + "CYCLES": 1.1870704389671361e+05, + "items_per_second": 1.5147957388701067e+04 + }, + { + "name": "ml_kem_512/keygen_stddev", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.4560136541833285e-01, + "cpu_time": 1.4749280124967476e-01, + "time_unit": "us", + "CYCLES": 2.6131952645446108e+02, + "items_per_second": 3.3872475924688700e+01 + }, + { + "name": "ml_kem_512/keygen_cv", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.2046768093882565e-03, + "cpu_time": 2.2334637187066647e-03, + "time_unit": "us", + "CYCLES": 2.2008097046642924e-03, + "items_per_second": 2.2368502582731787e-03 + }, + { + "name": "ml_kem_512/keygen_min", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.5751961032851653e+01, + "cpu_time": 6.5728576995304650e+01, + "time_unit": "us", + "CYCLES": 1.1818556948356808e+05, + "items_per_second": 1.5106664755519812e+04 + }, + { + "name": "ml_kem_512/keygen_max", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.6197599530471663e+01, + "cpu_time": 6.6195948356807932e+01, + "time_unit": "us", + "CYCLES": 1.1902978638497653e+05, + "items_per_second": 1.5214082606283040e+04 + } + ] +} diff --git a/bench_result_on_Linux_6.8.0-1016-aws_aarch64_with_g++_13.json b/bench_result_on_Linux_6.8.0-1016-aws_aarch64_with_g++_13.json new file mode 100644 index 0000000..cf2518b --- /dev/null +++ b/bench_result_on_Linux_6.8.0-1016-aws_aarch64_with_g++_13.json @@ -0,0 +1,906 @@ +{ + "context": { + "date": "2024-11-21T15:44:33+00:00", + "host_name": "ip-172-31-94-216", + "executable": "./build/benchmark/bench.out", + "num_cpus": 2, + "mhz_per_cpu": 2000, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 65536, + "num_sharing": 1 + }, + { + "type": "Instruction", + "level": 1, + "size": 65536, + "num_sharing": 1 + }, + { + "type": "Unified", + "level": 2, + "size": 2097152, + "num_sharing": 1 + }, + { + "type": "Unified", + "level": 3, + "size": 37748736, + "num_sharing": 2 + } + ], + "load_avg": [2.74365,1.3833,0.589355], + "library_version": "v1.9.0-21-g62a321d6", + "library_build_type": "release", + "json_schema_version": 1 + }, + "benchmarks": [ + { + "name": "ml_kem_512/keygen_mean", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0613137380420721e+01, + "cpu_time": 2.0613466239882136e+01, + "time_unit": "us", + "items_per_second": 4.8511998370497051e+04 + }, + { + "name": "ml_kem_512/keygen_median", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0617443119938464e+01, + "cpu_time": 2.0617198381162574e+01, + "time_unit": "us", + "items_per_second": 4.8503198197674428e+04 + }, + { + "name": "ml_kem_512/keygen_stddev", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.4241591913820319e-02, + "cpu_time": 1.4421156629908371e-02, + "time_unit": "us", + "items_per_second": 3.3950083889230818e+01 + }, + { + "name": "ml_kem_512/keygen_cv", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 6.9089880162287280e-04, + "cpu_time": 6.9959881865995326e-04, + "time_unit": "us", + "items_per_second": 6.9982859971973090e-04 + }, + { + "name": "ml_kem_512/keygen_min", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0587918027948938e+01, + "cpu_time": 2.0587649448123631e+01, + "time_unit": "us", + "items_per_second": 4.8471070842086308e+04 + }, + { + "name": "ml_kem_512/keygen_max", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.0630251949939112e+01, + "cpu_time": 2.0630862545989455e+01, + "time_unit": "us", + "items_per_second": 4.8572810729062643e+04 + }, + { + "name": "ml_kem_1024/decap_mean", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.8574683076946101e+01, + "cpu_time": 6.8575934292804291e+01, + "time_unit": "us", + "items_per_second": 1.4582510123123822e+04 + }, + { + "name": "ml_kem_1024/decap_median", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.8493940198569447e+01, + "cpu_time": 6.8495950372208682e+01, + "time_unit": "us", + "items_per_second": 1.4599403244275234e+04 + }, + { + "name": "ml_kem_1024/decap_stddev", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.1973640441344053e-01, + "cpu_time": 2.2028344162418331e-01, + "time_unit": "us", + "items_per_second": 4.6687650587384368e+01 + }, + { + "name": "ml_kem_1024/decap_cv", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 3.2043371482574599e-03, + "cpu_time": 3.2122557847133523e-03, + "time_unit": "us", + "items_per_second": 3.2016196246866090e-03 + }, + { + "name": "ml_kem_1024/decap_min", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.8373022828770260e+01, + "cpu_time": 6.8373729528536529e+01, + "time_unit": "us", + "items_per_second": 1.4482325640417788e+04 + }, + { + "name": "ml_kem_1024/decap_max", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.9047634243106216e+01, + "cpu_time": 6.9049683374689806e+01, + "time_unit": "us", + "items_per_second": 1.4625500274672588e+04 + }, + { + "name": "ml_kem_512/encap_mean", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3728628158969439e+01, + "cpu_time": 2.3727706657608742e+01, + "time_unit": "us", + "items_per_second": 4.2144835802927759e+04 + }, + { + "name": "ml_kem_512/encap_median", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3727544497295035e+01, + "cpu_time": 2.3726335767663251e+01, + "time_unit": "us", + "items_per_second": 4.2147259237018880e+04 + }, + { + "name": "ml_kem_512/encap_stddev", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.2275990086576632e-02, + "cpu_time": 1.3724512952882925e-02, + "time_unit": "us", + "items_per_second": 2.4373663452182495e+01 + }, + { + "name": "ml_kem_512/encap_cv", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 5.1734933871161438e-04, + "cpu_time": 5.7841717073326587e-04, + "time_unit": "us", + "items_per_second": 5.7833096244947954e-04 + }, + { + "name": "ml_kem_512/encap_min", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3713297044821193e+01, + "cpu_time": 2.3708976052989335e+01, + "time_unit": "us", + "items_per_second": 4.2105321871694199e+04 + }, + { + "name": "ml_kem_512/encap_max", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.3749564877731991e+01, + "cpu_time": 2.3749966881793675e+01, + "time_unit": "us", + "items_per_second": 4.2178118437717822e+04 + }, + { + "name": "ml_kem_768/keygen_mean", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.5581466895849388e+01, + "cpu_time": 3.5582167557932294e+01, + "time_unit": "us", + "items_per_second": 2.8104414649201266e+04 + }, + { + "name": "ml_kem_768/keygen_median", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.5536767506985761e+01, + "cpu_time": 3.5536527374585930e+01, + "time_unit": "us", + "items_per_second": 2.8140059654789977e+04 + }, + { + "name": "ml_kem_768/keygen_stddev", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5063312541365001e-01, + "cpu_time": 1.5075189919370130e-01, + "time_unit": "us", + "items_per_second": 1.1786251276251953e+02 + }, + { + "name": "ml_kem_768/keygen_cv", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 4.2334714826279835e-03, + "cpu_time": 4.2367261339056432e-03, + "time_unit": "us", + "items_per_second": 4.1937366151786840e-03 + }, + { + "name": "ml_kem_768/keygen_min", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.5485493761100841e+01, + "cpu_time": 3.5486542398778340e+01, + "time_unit": "us", + "items_per_second": 2.7772936785867918e+04 + }, + { + "name": "ml_kem_768/keygen_max", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.6005216450241079e+01, + "cpu_time": 3.6006275019098581e+01, + "time_unit": "us", + "items_per_second": 2.8179696651269860e+04 + }, + { + "name": "ml_kem_1024/encap_mean", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.3025573788147746e+01, + "cpu_time": 6.3007179219030789e+01, + "time_unit": "us", + "items_per_second": 1.5871486868375063e+04 + }, + { + "name": "ml_kem_1024/encap_median", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.2911365574485330e+01, + "cpu_time": 6.2913221723518731e+01, + "time_unit": "us", + "items_per_second": 1.5894911926582943e+04 + }, + { + "name": "ml_kem_1024/encap_stddev", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.1173509841627561e-01, + "cpu_time": 2.7909379233540665e-01, + "time_unit": "us", + "items_per_second": 7.0138326118376483e+01 + }, + { + "name": "ml_kem_1024/encap_cv", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 4.9461683516620189e-03, + "cpu_time": 4.4295554220130633e-03, + "time_unit": "us", + "items_per_second": 4.4191402292706123e-03 + }, + { + "name": "ml_kem_1024/encap_min", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.2705447935354606e+01, + "cpu_time": 6.2707279174147359e+01, + "time_unit": "us", + "items_per_second": 1.5764542937679393e+04 + }, + { + "name": "ml_kem_1024/encap_max", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 6.3545293985674107e+01, + "cpu_time": 6.3433491472173579e+01, + "time_unit": "us", + "items_per_second": 1.5947111932936088e+04 + }, + { + "name": "ml_kem_1024/keygen_mean", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 5.6830606303724117e+01, + "cpu_time": 5.6830719115841006e+01, + "time_unit": "us", + "items_per_second": 1.7596233393946408e+04 + }, + { + "name": "ml_kem_1024/keygen_median", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 5.6777861031486836e+01, + "cpu_time": 5.6778576135898604e+01, + "time_unit": "us", + "items_per_second": 1.7612277057309846e+04 + }, + { + "name": "ml_kem_1024/keygen_stddev", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.5531314611887140e-01, + "cpu_time": 1.5436254668482241e-01, + "time_unit": "us", + "items_per_second": 4.7489323835652243e+01 + }, + { + "name": "ml_kem_1024/keygen_cv", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 2.7329137628555214e-03, + "cpu_time": 2.7161814787206406e-03, + "time_unit": "us", + "items_per_second": 2.6988346183217759e-03 + }, + { + "name": "ml_kem_1024/keygen_min", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 5.6734736799038046e+01, + "cpu_time": 5.6735717151043559e+01, + "time_unit": "us", + "items_per_second": 1.7463517261317924e+04 + }, + { + "name": "ml_kem_1024/keygen_max", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "ml_kem_1024/keygen", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 5.7264625869805357e+01, + "cpu_time": 5.7262233319688804e+01, + "time_unit": "us", + "items_per_second": 1.7625581383553668e+04 + }, + { + "name": "ml_kem_768/encap_mean", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.0413159428243418e+01, + "cpu_time": 4.0408154461449669e+01, + "time_unit": "us", + "items_per_second": 2.4747484967580687e+04 + }, + { + "name": "ml_kem_768/encap_median", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.0409653624012499e+01, + "cpu_time": 4.0407994946578057e+01, + "time_unit": "us", + "items_per_second": 2.4747577868544318e+04 + }, + { + "name": "ml_kem_768/encap_stddev", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.6030794155068971e-02, + "cpu_time": 1.8815168544208706e-02, + "time_unit": "us", + "items_per_second": 1.1521579841753484e+01 + }, + { + "name": "ml_kem_768/encap_cv", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 6.4411678085423113e-04, + "cpu_time": 4.6562800986515778e-04, + "time_unit": "us", + "items_per_second": 4.6556568705251479e-04 + }, + { + "name": "ml_kem_768/encap_min", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.0382403984974061e+01, + "cpu_time": 4.0383580999133315e+01, + "time_unit": "us", + "items_per_second": 2.4728817595121422e+04 + }, + { + "name": "ml_kem_768/encap_max", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/encap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.0459657522364843e+01, + "cpu_time": 4.0438650014438345e+01, + "time_unit": "us", + "items_per_second": 2.4762539013602123e+04 + }, + { + "name": "ml_kem_512/decap_mean", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.7245118353076265e+01, + "cpu_time": 2.7245412293649167e+01, + "time_unit": "us", + "items_per_second": 3.6704294665812224e+04 + }, + { + "name": "ml_kem_512/decap_median", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.7200313167612187e+01, + "cpu_time": 2.7200910176733334e+01, + "time_unit": "us", + "items_per_second": 3.6763476041261179e+04 + }, + { + "name": "ml_kem_512/decap_stddev", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 1.4035491217682056e-01, + "cpu_time": 1.4054082749802710e-01, + "time_unit": "us", + "items_per_second": 1.8690595464041468e+02 + }, + { + "name": "ml_kem_512/decap_cv", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 5.1515618452423794e-03, + "cpu_time": 5.1583299963783927e-03, + "time_unit": "us", + "items_per_second": 5.0922094087944968e-03 + }, + { + "name": "ml_kem_512/decap_min", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.7193109924289292e+01, + "cpu_time": 2.7191460477762536e+01, + "time_unit": "us", + "items_per_second": 3.6173073198472193e+04 + }, + { + "name": "ml_kem_512/decap_max", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "ml_kem_512/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 2.7644059429005711e+01, + "cpu_time": 2.7644872596620740e+01, + "time_unit": "us", + "items_per_second": 3.6776251897826915e+04 + }, + { + "name": "ml_kem_768/decap_mean", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "mean", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.5114106821092960e+01, + "cpu_time": 4.5114952831402761e+01, + "time_unit": "us", + "items_per_second": 2.2166474895467196e+04 + }, + { + "name": "ml_kem_768/decap_median", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "median", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.5017490025717890e+01, + "cpu_time": 4.5018011743886625e+01, + "time_unit": "us", + "items_per_second": 2.2213331189963195e+04 + }, + { + "name": "ml_kem_768/decap_stddev", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "stddev", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 3.0197052925399132e-01, + "cpu_time": 3.0123935239888977e-01, + "time_unit": "us", + "items_per_second": 1.4558155989973736e+02 + }, + { + "name": "ml_kem_768/decap_cv", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "cv", + "aggregate_unit": "percentage", + "iterations": 10, + "real_time": 6.6934834917934342e-03, + "cpu_time": 6.6771509996838303e-03, + "time_unit": "us", + "items_per_second": 6.5676459872971143e-03 + }, + { + "name": "ml_kem_768/decap_min", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "min", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.4989114864829141e+01, + "cpu_time": 4.4990436293436744e+01, + "time_unit": "us", + "items_per_second": 2.1753621132761044e+04 + }, + { + "name": "ml_kem_768/decap_max", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "ml_kem_768/decap", + "run_type": "aggregate", + "repetitions": 10, + "threads": 1, + "aggregate_name": "max", + "aggregate_unit": "time", + "iterations": 10, + "real_time": 4.5970589446587269e+01, + "cpu_time": 4.5969358108108068e+01, + "time_unit": "us", + "items_per_second": 2.2226946044216980e+04 + } + ] +} From 08d2b7bd12da580dc96fcfbe35dd5de6c7fb63e3 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Tue, 19 Nov 2024 23:26:43 +0400 Subject: [PATCH 14/14] Update project documentation Signed-off-by: Anjan Roy --- README.md | 237 ++++++++++++------------------------------------------ 1 file changed, 52 insertions(+), 185 deletions(-) diff --git a/README.md b/README.md index 317c75a..79a2eb2 100644 --- a/README.md +++ b/README.md @@ -18,14 +18,20 @@ KeyGen | - | Public Key and Secret Key Encapsulation | Public Key | Cipher Text and 32B Shared Secret Decapsulation | Secret Key and Cipher Text | 32B Shared Secret -Here I'm maintaining `ml-kem` - a C++20 header-only fully `constexpr` library, implementing ML-KEM, supporting ML-KEM-{512, 768, 1024} parameter sets, as defined in table 2 of ML-KEM standard. It's pretty easy to use, see [usage](#usage). +Here I'm maintaining `ml-kem` - a C++20 header-only fully `constexpr` library, implementing ML-KEM, supporting ML-KEM-{512, 768, 1024} parameter sets, as defined in table 2 of ML-KEM standard. It's pretty easy to use, see [usage](#usage). It shows following performance characteristics on desktop and server grade CPUs. + +ML-KEM-768 Algorithm | Time taken on "12th Gen Intel(R) Core(TM) i7-1260P" | Time taken on "Raspberry Pi 4B" | Time taken on "AWS EC2 Instance c8g.large" +--- | --: | --: | --: +keygen | 23.9us | 114.3us | 35.5us +encaps | 27.7us | 133.8us | 40.4us +decaps | 30.1us | 157.6us | 45us > [!NOTE] > Find ML-KEM standard @ https://doi.org/10.6028/NIST.FIPS.203 - this is the document that I followed when implementing ML-KEM. I suggest you go through the specification to get an in-depth understanding of the scheme. ## Prerequisites -- A C++ compiler with C++20 standard library such as `clang++`/ `g++`. +- A C++ compiler such as `clang++`/ `g++`, with support for compiling C++20 programs. ```bash $ clang++ --version @@ -43,7 +49,7 @@ InstalledDir: /usr/bin > If you are on a machine running GNU/Linux kernel and you want to obtain *CPU cycle* count for ML-KEM routines, you should consider building `google-benchmark` library with `libPFM` support, following https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7, a step-by-step guide. Find more about libPFM @ https://perfmon2.sourceforge.net. > [!TIP] -> Git submodule based dependencies will normally be imported automatically, but in case that doesn't work, you can manually initialize and update them by issuing `$ git submodule update --init` from inside the root of this repository. +> Git submodule based dependencies will normally be imported automatically, but in case that doesn't work, you can manually initialize and update them by issuing `$ git submodule update --init --recursive` from inside the root of this repository. ## Testing @@ -53,34 +59,37 @@ For testing functional correctness of this implementation and conformance with M > Known Answer Test (KAT) files living in [this](./kats/) directory are generated by following (reproducible) steps, described in https://gist.github.com/itzmeanjan/c8f5bc9640d0f0bdd2437dfe364d7710. ```bash -# You can switch to non-default compiler, by setting variable `CXX` i.e. invoke like `$ CXX=clang++ make -j`. -# -make -j # Run tests without any sort of sanitizers -make debug_asan_test -j # Run tests with AddressSanitizer enabled, with `-O1` -make release_asan_test -j # Run tests with AddressSanitizer enabled, with `-O3 -march=native` -make debug_ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled, with `-O1` -make release_ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled, with `-O3 -march=native` +make test -j # Run tests without any sort of sanitizers, with default C++ compiler. +CXX=clang++ make test -j # Switch to non-default compiler, by setting variable `CXX`. + +make debug_asan_test -j # Run tests with AddressSanitizer enabled, with `-O1`. +make release_asan_test -j # Run tests with AddressSanitizer enabled, with `-O3 -march=native`. +make debug_ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled, with `-O1`. +make release_ubsan_test -j # Run tests with UndefinedBehaviourSanitizer enabled, with `-O3 -march=native`. ``` ```bash PASSED TESTS (15/15): - 1 ms: build/test/test.out ML_KEM.ML_KEM_512_EncapsFailureDueToNonReducedPubKey + 1 ms: build/test/test.out ML_KEM.ML_KEM_1024_DecapsFailureDueToBitFlippedCipherText + 1 ms: build/test/test.out ML_KEM.ML_KEM_512_DecapsFailureDueToBitFlippedCipherText + 1 ms: build/test/test.out ML_KEM.PolynomialSerialization + 2 ms: build/test/test.out ML_KEM.ML_KEM_512_EncapsFailureDueToNonReducedPubKey + 2 ms: build/test/test.out ML_KEM.ML_KEM_512_KeygenEncapsDecaps + 2 ms: build/test/test.out ML_KEM.ML_KEM_768_KeygenEncapsDecaps 2 ms: build/test/test.out ML_KEM.ML_KEM_768_DecapsFailureDueToBitFlippedCipherText + 2 ms: build/test/test.out ML_KEM.ML_KEM_768_EncapsFailureDueToNonReducedPubKey 2 ms: build/test/test.out ML_KEM.ML_KEM_1024_EncapsFailureDueToNonReducedPubKey - 2 ms: build/test/test.out ML_KEM.ML_KEM_512_DecapsFailureDueToBitFlippedCipherText - 2 ms: build/test/test.out ML_KEM.ML_KEM_768_KeygenEncapsDecaps - 2 ms: build/test/test.out ML_KEM.ML_KEM_1024_DecapsFailureDueToBitFlippedCipherText - 3 ms: build/test/test.out ML_KEM.PolynomialSerialization 3 ms: build/test/test.out ML_KEM.ML_KEM_1024_KeygenEncapsDecaps - 3 ms: build/test/test.out ML_KEM.ML_KEM_768_EncapsFailureDueToNonReducedPubKey - 3 ms: build/test/test.out ML_KEM.ML_KEM_512_KeygenEncapsDecaps - 17 ms: build/test/test.out ML_KEM.ML_KEM_768_KnownAnswerTests - 18 ms: build/test/test.out ML_KEM.ML_KEM_512_KnownAnswerTests - 32 ms: build/test/test.out ML_KEM.ML_KEM_1024_KnownAnswerTests - 100 ms: build/test/test.out ML_KEM.CompressDecompressZq - 156 ms: build/test/test.out ML_KEM.ArithmeticOverZq + 15 ms: build/test/test.out ML_KEM.ML_KEM_512_KnownAnswerTests + 24 ms: build/test/test.out ML_KEM.ML_KEM_768_KnownAnswerTests + 30 ms: build/test/test.out ML_KEM.ML_KEM_1024_KnownAnswerTests + 111 ms: build/test/test.out ML_KEM.CompressDecompressZq + 136 ms: build/test/test.out ML_KEM.ArithmeticOverZq ``` +> [!NOTE] +> There is a help menu, which introduces you to all available commands; just run `make` from the root directory of this project. + ## Benchmarking For benchmarking ML-KEM public functions such as keygen, encaps and decaps, for various suggested parameter sets, you have to issue. @@ -95,157 +104,21 @@ make perf -j # If you have built google-benchmark library with libPFM supp ### On 12th Gen Intel(R) Core(TM) i7-1260P -Compiled with **gcc (Ubuntu 14-20240412-0ubuntu1) 14.0.1 20240412**. +Compiled with **g++ (Ubuntu 14.2.0-4ubuntu2) 14.2.0**, while running kernel `Linux 6.8.0-41-generic x86_64`. -```bash -$ uname -srm -Linux 6.8.0-41-generic x86_64 -``` - -```bash -Running ./build/benchmark/perf.out -Run on (16 X 4512.33 MHz CPU s) -CPU Caches: - L1 Data 48 KiB (x8) - L1 Instruction 32 KiB (x8) - L2 Unified 1280 KiB (x8) - L3 Unified 18432 KiB (x1) -Load Average: 0.67, 0.66, 0.77 ------------------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations CYCLES items_per_second ------------------------------------------------------------------------------------------------- -ml_kem_512/decap_mean 19.7 us 19.7 us 10 91.9767k 50.8334k/s -ml_kem_512/decap_median 19.7 us 19.7 us 10 91.9204k 50.8766k/s -ml_kem_512/decap_stddev 0.071 us 0.067 us 10 189.166 172.876/s -ml_kem_512/decap_cv 0.36 % 0.34 % 10 0.21% 0.34% -ml_kem_512/decap_min 19.6 us 19.6 us 10 91.7206k 50.445k/s -ml_kem_512/decap_max 19.8 us 19.8 us 10 92.3585k 51.0262k/s -ml_kem_1024/keygen_mean 35.5 us 35.5 us 10 165.876k 28.1537k/s -ml_kem_1024/keygen_median 35.4 us 35.4 us 10 165.836k 28.211k/s -ml_kem_1024/keygen_stddev 0.223 us 0.217 us 10 653.566 170.496/s -ml_kem_1024/keygen_cv 0.63 % 0.61 % 10 0.39% 0.61% -ml_kem_1024/keygen_min 35.3 us 35.3 us 10 165.106k 27.7649k/s -ml_kem_1024/keygen_max 36.0 us 36.0 us 10 167.138k 28.3399k/s -ml_kem_768/keygen_mean 22.7 us 22.7 us 10 106.239k 44.0363k/s -ml_kem_768/keygen_median 22.7 us 22.7 us 10 106.245k 44.0287k/s -ml_kem_768/keygen_stddev 0.096 us 0.096 us 10 449.139 185.991/s -ml_kem_768/keygen_cv 0.42 % 0.42 % 10 0.42% 0.42% -ml_kem_768/keygen_min 22.6 us 22.6 us 10 105.649k 43.8031k/s -ml_kem_768/keygen_max 22.8 us 22.8 us 10 106.865k 44.2845k/s -ml_kem_1024/encap_mean 39.5 us 39.5 us 10 184.973k 25.2883k/s -ml_kem_1024/encap_median 39.5 us 39.5 us 10 184.69k 25.3388k/s -ml_kem_1024/encap_stddev 0.182 us 0.183 us 10 747.023 116.145/s -ml_kem_1024/encap_cv 0.46 % 0.46 % 10 0.40% 0.46% -ml_kem_1024/encap_min 39.4 us 39.4 us 10 184.241k 25.0329k/s -ml_kem_1024/encap_max 39.9 us 39.9 us 10 186.379k 25.3857k/s -ml_kem_1024/decap_mean 46.9 us 46.9 us 10 219.244k 21.3258k/s -ml_kem_1024/decap_median 46.9 us 46.9 us 10 219.46k 21.3138k/s -ml_kem_1024/decap_stddev 0.120 us 0.120 us 10 507.886 54.7725/s -ml_kem_1024/decap_cv 0.26 % 0.26 % 10 0.23% 0.26% -ml_kem_1024/decap_min 46.7 us 46.7 us 10 218.364k 21.2385k/s -ml_kem_1024/decap_max 47.1 us 47.1 us 10 219.722k 21.4278k/s -ml_kem_512/keygen_mean 13.5 us 13.5 us 10 63.1494k 73.9958k/s -ml_kem_512/keygen_median 13.5 us 13.5 us 10 63.0695k 74.0784k/s -ml_kem_512/keygen_stddev 0.064 us 0.064 us 10 337.757 350.424/s -ml_kem_512/keygen_cv 0.48 % 0.48 % 10 0.53% 0.47% -ml_kem_512/keygen_min 13.5 us 13.5 us 10 62.7346k 73.1112k/s -ml_kem_512/keygen_max 13.7 us 13.7 us 10 63.9768k 74.3286k/s -ml_kem_768/encap_mean 25.9 us 25.9 us 10 120.857k 38.6398k/s -ml_kem_768/encap_median 25.8 us 25.8 us 10 120.73k 38.7252k/s -ml_kem_768/encap_stddev 0.195 us 0.194 us 10 434.128 286.24/s -ml_kem_768/encap_cv 0.76 % 0.75 % 10 0.36% 0.74% -ml_kem_768/encap_min 25.7 us 25.7 us 10 120.315k 37.9192k/s -ml_kem_768/encap_max 26.4 us 26.4 us 10 121.593k 38.8849k/s -ml_kem_768/decap_mean 31.3 us 31.3 us 10 146.348k 31.9402k/s -ml_kem_768/decap_median 31.2 us 31.2 us 10 146.055k 32.0125k/s -ml_kem_768/decap_stddev 0.155 us 0.155 us 10 740.319 157.033/s -ml_kem_768/decap_cv 0.49 % 0.49 % 10 0.51% 0.49% -ml_kem_768/decap_min 31.2 us 31.2 us 10 145.743k 31.652k/s -ml_kem_768/decap_max 31.6 us 31.6 us 10 147.862k 32.09k/s -ml_kem_512/encap_mean 15.6 us 15.6 us 10 72.7434k 64.0886k/s -ml_kem_512/encap_median 15.6 us 15.6 us 10 72.7235k 64.1631k/s -ml_kem_512/encap_stddev 0.068 us 0.064 us 10 141.896 261.103/s -ml_kem_512/encap_cv 0.44 % 0.41 % 10 0.20% 0.41% -ml_kem_512/encap_min 15.5 us 15.5 us 10 72.5719k 63.7001k/s -ml_kem_512/encap_max 15.7 us 15.7 us 10 73.1062k 64.4231k/s -``` +Benchmark results are in JSON format @ [bench_result_on_Linux_6.11.0-9-generic_x86_64_with_g++_14](./bench_result_on_Linux_6.11.0-9-generic_x86_64_with_g++_14.json). ### On ARM Cortex-A72 i.e. Raspberry Pi 4B -Compiled with **gcc (GCC) 12.1.0**. +Compiled with **g++ (Debian 12.2.0-14) 12.2.0**, while running kernel `Linux 6.6.51+rpt-rpi-v8 aarch64`. -```bash -$ uname -srm -Linux 6.6.33-2-MANJARO-RPI4 aarch64 -``` +Benchmark results are in JSON format @ [bench_result_on_Linux_6.6.51+rpt-rpi-v8_aarch64_with_g++_12](./bench_result_on_Linux_6.6.51+rpt-rpi-v8_aarch64_with_g++_12.json). -```bash -2024-09-02T21:13:57+04:00 -Running ./build/benchmark/bench.out -Run on (4 X 1500 MHz CPU s) -CPU Caches: - L1 Data 32 KiB (x4) - L1 Instruction 48 KiB (x4) - L2 Unified 1024 KiB (x1) -Load Average: 3.75, 4.36, 2.95 -------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations items_per_second -------------------------------------------------------------------------------------- -ml_kem_1024/keygen_mean 225 us 223 us 10 4.47447k/s -ml_kem_1024/keygen_median 225 us 224 us 10 4.47389k/s -ml_kem_1024/keygen_stddev 0.896 us 0.828 us 10 16.5682/s -ml_kem_1024/keygen_cv 0.40 % 0.37 % 10 0.37% -ml_kem_1024/keygen_min 223 us 222 us 10 4.44656k/s -ml_kem_1024/keygen_max 226 us 225 us 10 4.50358k/s -ml_kem_512/keygen_mean 84.1 us 83.7 us 10 11.9468k/s -ml_kem_512/keygen_median 84.0 us 83.6 us 10 11.9584k/s -ml_kem_512/keygen_stddev 0.298 us 0.282 us 10 40.1688/s -ml_kem_512/keygen_cv 0.35 % 0.34 % 10 0.34% -ml_kem_512/keygen_min 83.7 us 83.3 us 10 11.8656k/s -ml_kem_512/keygen_max 84.6 us 84.3 us 10 12.0022k/s -ml_kem_512/decap_mean 130 us 130 us 10 7.70299k/s -ml_kem_512/decap_median 130 us 130 us 10 7.70396k/s -ml_kem_512/decap_stddev 0.320 us 0.273 us 10 16.1626/s -ml_kem_512/decap_cv 0.25 % 0.21 % 10 0.21% -ml_kem_512/decap_min 130 us 129 us 10 7.67157k/s -ml_kem_512/decap_max 131 us 130 us 10 7.72261k/s -ml_kem_768/decap_mean 207 us 206 us 10 4.85013k/s -ml_kem_768/decap_median 207 us 206 us 10 4.85556k/s -ml_kem_768/decap_stddev 1.31 us 1.12 us 10 26.2579/s -ml_kem_768/decap_cv 0.63 % 0.55 % 10 0.54% -ml_kem_768/decap_min 206 us 205 us 10 4.7875k/s -ml_kem_768/decap_max 210 us 209 us 10 4.87723k/s -ml_kem_512/encap_mean 102 us 101 us 10 9.89142k/s -ml_kem_512/encap_median 101 us 101 us 10 9.9073k/s -ml_kem_512/encap_stddev 0.515 us 0.491 us 10 47.6312/s -ml_kem_512/encap_cv 0.51 % 0.49 % 10 0.48% -ml_kem_512/encap_min 101 us 101 us 10 9.76968k/s -ml_kem_512/encap_max 103 us 102 us 10 9.93945k/s -ml_kem_768/keygen_mean 142 us 141 us 10 7.06934k/s -ml_kem_768/keygen_median 142 us 141 us 10 7.06916k/s -ml_kem_768/keygen_stddev 0.293 us 0.259 us 10 12.91/s -ml_kem_768/keygen_cv 0.21 % 0.18 % 10 0.18% -ml_kem_768/keygen_min 142 us 141 us 10 7.04302k/s -ml_kem_768/keygen_max 143 us 142 us 10 7.08679k/s -ml_kem_1024/encap_mean 254 us 253 us 10 3.94775k/s -ml_kem_1024/encap_median 254 us 253 us 10 3.95198k/s -ml_kem_1024/encap_stddev 1.46 us 1.41 us 10 21.6985/s -ml_kem_1024/encap_cv 0.57 % 0.56 % 10 0.55% -ml_kem_1024/encap_min 253 us 252 us 10 3.89026k/s -ml_kem_1024/encap_max 258 us 257 us 10 3.96754k/s -ml_kem_1024/decap_mean 306 us 304 us 10 3.28684k/s -ml_kem_1024/decap_median 305 us 304 us 10 3.29067k/s -ml_kem_1024/decap_stddev 1.93 us 1.84 us 10 19.7821/s -ml_kem_1024/decap_cv 0.63 % 0.60 % 10 0.60% -ml_kem_1024/decap_min 304 us 302 us 10 3.24947k/s -ml_kem_1024/decap_max 309 us 308 us 10 3.3074k/s -ml_kem_768/encap_mean 167 us 167 us 10 6.00584k/s -ml_kem_768/encap_median 167 us 166 us 10 6.00611k/s -ml_kem_768/encap_stddev 0.845 us 0.721 us 10 25.9414/s -ml_kem_768/encap_cv 0.50 % 0.43 % 10 0.43% -ml_kem_768/encap_min 166 us 166 us 10 5.95361k/s -ml_kem_768/encap_max 169 us 168 us 10 6.03767k/s -``` +### On AWS EC2 Instance `c8g.large` i.e. AWS Graviton4 + +Compiled with **g++ (Ubuntu 13.2.0-23ubuntu4) 13.2.0**, while running kernel `Linux 6.8.0-1016-aws aarch64`. More about this EC2 instance @ https://aws.amazon.com/ec2/instance-types/c8g. + +Benchmark results are in JSON format @ [bench_result_on_Linux_6.8.0-1016-aws_aarch64_with_g++_13](./bench_result_on_Linux_6.8.0-1016-aws_aarch64_with_g++_13.json). ## Usage @@ -256,12 +129,10 @@ ml_kem_768/encap_max 169 us 168 us 10 6.037 ```bash cd -# Multi-step cloning and importing of submodules -git clone https://github.com/itzmeanjan/ml-kem.git && pushd ml-kem && git submodule update --init && popd -# Or do single step cloning and importing of submodules +# Single step cloning and importing of submodules git clone https://github.com/itzmeanjan/ml-kem.git --recurse-submodules # Or clone and then run tests, which will automatically bring in dependencies -git clone https://github.com/itzmeanjan/ml-kem.git && pushd ml-kem && make -j && popd +git clone https://github.com/itzmeanjan/ml-kem.git && pushd ml-kem && make test -j && popd ``` - Write your program while including proper header files ( based on which variant of ML-KEM you want to use, see [include](./include/ml_kem/) directory ), which includes declarations ( and definitions ) of all required ML-KEM routines and constants ( such as byte length of public/ private key, cipher text etc. ). @@ -270,6 +141,7 @@ git clone https://github.com/itzmeanjan/ml-kem.git && pushd ml-kem && make -j && // main.cpp #include "ml_kem/ml_kem_512.hpp" +#include "randomshake/randomshake.hpp" #include #include #include @@ -289,14 +161,11 @@ main() std::array sender_key{}; std::array receiver_key{}; - // Be careful ! - // - // Read API documentation in include/ml_kem/internals/rng/prng.hpp - ml_kem_prng::prng_t<128> prng; + randomshake::randomshake_t<128> csprng; - prng.read(d); - prng.read(z); - prng.read(m); + csprng.generate(d); + csprng.generate(z); + csprng.generate(m); ml_kem_512::keygen(d, z, pkey, skey); assert(ml_kem_512::encapsulate(m, pkey, cipher, sender_key)); // Key Encapsulation might fail, if input public key is malformed @@ -307,16 +176,17 @@ main() } ``` -- When compiling your program, let your compiler know where it can find `ml-kem`, `sha3` and `subtle` headers, which includes their definitions ( all of them are header-only libraries ) too. +- When compiling your program, let your compiler know where it can find `ml-kem`, `sha3`, `RandomShake` and `subtle` headers, which includes their definitions ( all of them are header-only libraries ) too. ```bash # Assuming `ml-kem` was cloned just under $HOME ML_KEM_HEADERS=~/ml-kem/include SHA3_HEADERS=~/ml-kem/sha3/include +RANDOMSHAKE_HEADERS=~/ml-kem/RandomShake/include SUBTLE_HEADERS=~/ml-kem/subtle/include -g++ -std=c++20 -Wall -Wextra -Wpedantic -O3 -march=native -I $ML_KEM_HEADERS -I $SHA3_HEADERS -I $SUBTLE_HEADERS main.cpp +g++ -std=c++20 -Wall -Wextra -Wpedantic -O3 -march=native -I $ML_KEM_HEADERS -I $SHA3_HEADERS -I $RANDOMSHAKE_HEADERS -I $SUBTLE_HEADERS main.cpp ``` ML-KEM Variant | Namespace | Header @@ -379,10 +249,10 @@ main() } ``` -See example [program](./examples/ml_kem_768.cpp), where I show how to use ML-KEM-768 API. +See example [program](./examples/ml_kem_768.cpp), where I show how to use ML-KEM-768 API. Issue following command to build and execute example. ```bash -g++ -std=c++20 -Wall -Wextra -Wpedantic -O3 -march=native -I ./include -I ./sha3/include -I ./subtle/include/ examples/ml_kem_768.cpp && ./a.out +make example -j ``` ```bash @@ -394,8 +264,5 @@ Cipher : 618d4938da6a966795627c52fea714ae433de7faefdbbe3339cfd3fcce66c8c Shared secret : e6a9fc79df8a91733c7f385bc66602a526b54bbf78ed2ac11029a42a2a56f515 ``` -> [!CAUTION] -> Before you consider using Psuedo Random Number Generator which comes with this library implementation, I strongly advice you to go through [include/ml_kem/internals/rng/prng.hpp](./include/ml_kem/internals/rng/prng.hpp). - > [!NOTE] > Looking at API documentation, in header files, can give you good idea of how to use ML-KEM API. Note, this library doesn't expose any raw pointer based interface, rather everything is wrapped under statically defined `std::span` - which one can easily create from `std::{array, vector}`. I opt for using statically defined `std::span` based function interfaces because we always know, at compile-time, how many bytes the seeds/ keys/ cipher-texts/ shared-secrets are, for various different ML-KEM parameters. This gives much better type safety and compile-time error reporting.