From c1e8116d969361afa60a28b9fff3a8290955ef50 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 16 Jul 2023 14:40:02 +0400 Subject: [PATCH 1/5] update git submodule based dependency SHA3 to latest commit Signed-off-by: Anjan Roy --- example/kyber512_kem.cpp | 4 +-- include/bench/bench_kem.hpp | 6 ++-- include/kem.hpp | 65 ++++++++++++++++++++++++++++------- include/kyber1024_kem.hpp | 4 +-- include/kyber512_kem.hpp | 4 +-- include/kyber768_kem.hpp | 4 +-- include/pke.hpp | 6 +++- include/prng.hpp | 10 +++--- include/sampling.hpp | 15 ++++---- include/test/test_kem.hpp | 4 +-- include/test/test_kem_kat.hpp | 12 +++---- sha3 | 2 +- 12 files changed, 91 insertions(+), 45 deletions(-) diff --git a/example/kyber512_kem.cpp b/example/kyber512_kem.cpp index 0ddb826..1b41cab 100644 --- a/example/kyber512_kem.cpp +++ b/example/kyber512_kem.cpp @@ -48,8 +48,8 @@ main() auto rkdf = kyber512_kem::decapsulate(skey.data(), cipher.data()); // both sender's and receiver's KDF should produce same KEY_LEN many bytes - skdf.read(shrd_key0.data(), KEY_LEN); - rkdf.read(shrd_key1.data(), KEY_LEN); + skdf.squeeze(shrd_key0.data(), KEY_LEN); + rkdf.squeeze(shrd_key1.data(), KEY_LEN); // check that both of the communicating parties arrived at same shared key assert(std::ranges::equal(shrd_key0, shrd_key1)); diff --git a/include/bench/bench_kem.hpp b/include/bench/bench_kem.hpp index d31e612..056fd91 100644 --- a/include/bench/bench_kem.hpp +++ b/include/bench/bench_kem.hpp @@ -76,7 +76,7 @@ encapsulate(benchmark::State& state) for (auto _ : state) { auto skdf = kem::encapsulate(m, pkey, cipher); benchmark::DoNotOptimize(skdf); - skdf.read(sender_key, klen); + skdf.squeeze(sender_key, klen); benchmark::DoNotOptimize(m); benchmark::DoNotOptimize(pkey); @@ -129,12 +129,12 @@ decapsulate(benchmark::State& state) prng.read(m, slen); auto skdf = kem::encapsulate(m, pkey, cipher); - skdf.read(sender_key, klen); + skdf.squeeze(sender_key, klen); for (auto _ : state) { auto rkdf = kem::decapsulate(skey, cipher); benchmark::DoNotOptimize(rkdf); - rkdf.read(receiver_key, klen); + rkdf.squeeze(receiver_key, klen); benchmark::DoNotOptimize(skey); benchmark::DoNotOptimize(cipher); diff --git a/include/kem.hpp b/include/kem.hpp index 2221e87..c86a29c 100644 --- a/include/kem.hpp +++ b/include/kem.hpp @@ -40,9 +40,14 @@ keygen(const uint8_t* const __restrict d, // 32 -bytes seed ( used in CPA-PKE ) constexpr size_t skoff2 = skoff1 + 32; std::memcpy(seckey + skoff2, z, zlen); - pke::keygen(d, pubkey, seckey); // CPAPKE key generation - std::memcpy(seckey + skoff0, pubkey, pklen); // copy public key - sha3_256::hash(pubkey, pklen, seckey + skoff1); // hash public key + pke::keygen(d, pubkey, seckey); // CPAPKE key generation + std::memcpy(seckey + skoff0, pubkey, pklen); // copy public key + + // hash public key + sha3_256::sha3_256 hasher; + hasher.absorb(pubkey, pklen); + hasher.finalize(); + hasher.digest(seckey + skoff1); } // Given (k * 12 * 32 + 32) -bytes public key and 32 -bytes seed ( used for @@ -70,7 +75,7 @@ template -static inline shake256::shake256 +static inline shake256::shake256 encapsulate( const uint8_t* const __restrict m, // 32 -bytes seed for encapsulation const uint8_t* const __restrict pubkey, // (k * 12 * 32 + 32) -bytes @@ -86,17 +91,40 @@ encapsulate( uint8_t g_out[64]{}; uint8_t kdf_in[64]{}; - sha3_256::hash(m, mlen, g_in); - sha3_256::hash(pubkey, pklen, g_in + 32); - sha3_512::hash(g_in, sizeof(g_in), g_out); + { + sha3_256::sha3_256 hasher; + hasher.absorb(m, mlen); + hasher.finalize(); + hasher.digest(g_in); + } + + { + sha3_256::sha3_256 hasher; + hasher.absorb(pubkey, pklen); + hasher.finalize(); + hasher.digest(g_in + 32); + } + + { + sha3_512::sha3_512 hasher; + hasher.absorb(g_in, sizeof(g_in)); + hasher.finalize(); + hasher.digest(g_out); + } pke::encrypt(pubkey, g_in, g_out + 32, cipher); std::memcpy(kdf_in, g_out, 32); - sha3_256::hash(cipher, ctlen, kdf_in + 32); + { + sha3_256::sha3_256 hasher; + hasher.absorb(cipher, ctlen); + hasher.finalize(); + hasher.digest(kdf_in + 32); + } shake256::shake256 hasher{}; - hasher.hash(kdf_in, sizeof(kdf_in)); + hasher.absorb(kdf_in, sizeof(kdf_in)); + hasher.finalize(); return hasher; } @@ -119,7 +147,7 @@ template -static inline shake256::shake256 +static inline shake256::shake256 decapsulate( const uint8_t* const __restrict seckey, // (k * 24 * 32 + 96) -bytes const uint8_t* const __restrict cipher // (k * du * 32 + dv * 32) -bytes @@ -145,7 +173,12 @@ decapsulate( pke::decrypt(seckey, cipher, g_in); std::memcpy(g_in + 32, h, 32); - sha3_512::hash(g_in, sizeof(g_in), g_out); + { + sha3_512::sha3_512 hasher; + hasher.absorb(g_in, sizeof(g_in)); + hasher.finalize(); + hasher.digest(g_out); + } pke::encrypt(pubkey, g_in, g_out + 32, c_prime); @@ -159,10 +192,16 @@ decapsulate( kdf_in[i] = subtle::ct_select(flg, g_out[i], z[i]); } - sha3_256::hash(cipher, ctlen, kdf_in + 32); + { + sha3_256::sha3_256 hasher; + hasher.absorb(cipher, ctlen); + hasher.finalize(); + hasher.digest(kdf_in + 32); + } shake256::shake256 hasher; - hasher.hash(kdf_in, sizeof(kdf_in)); + hasher.absorb(kdf_in, sizeof(kdf_in)); + hasher.finalize(); return hasher; } diff --git a/include/kyber1024_kem.hpp b/include/kyber1024_kem.hpp index 544eeaf..5021b83 100644 --- a/include/kyber1024_kem.hpp +++ b/include/kyber1024_kem.hpp @@ -43,7 +43,7 @@ keygen(const uint8_t* const __restrict d, // at same SHAKE256 XOF backed KDF. // // Returned KDF can be used for deriving shared key of arbitrary bytes length. -inline shake256::shake256 +inline shake256::shake256 encapsulate(const uint8_t* const __restrict m, const uint8_t* const __restrict pubkey, uint8_t* const __restrict cipher) @@ -57,7 +57,7 @@ encapsulate(const uint8_t* const __restrict m, // derivation function). // // Returned KDF can be used for deriving shared key of arbitrary bytes length. -inline shake256::shake256 +inline shake256::shake256 decapsulate(const uint8_t* const __restrict seckey, const uint8_t* const __restrict cipher) { diff --git a/include/kyber512_kem.hpp b/include/kyber512_kem.hpp index facaf40..fb368c7 100644 --- a/include/kyber512_kem.hpp +++ b/include/kyber512_kem.hpp @@ -42,7 +42,7 @@ keygen(const uint8_t* const __restrict d, // SHAKE256 XOF backed KDF. // // Returned KDF can be used for deriving shared key of arbitrary bytes length. -inline shake256::shake256 +inline shake256::shake256 encapsulate(const uint8_t* const __restrict m, const uint8_t* const __restrict pubkey, uint8_t* const __restrict cipher) @@ -56,7 +56,7 @@ encapsulate(const uint8_t* const __restrict m, // derivation function). // // Returned KDF can be used for deriving shared key of arbitrary bytes length. -inline shake256::shake256 +inline shake256::shake256 decapsulate(const uint8_t* const __restrict seckey, const uint8_t* const __restrict cipher) { diff --git a/include/kyber768_kem.hpp b/include/kyber768_kem.hpp index 991856c..f29a58f 100644 --- a/include/kyber768_kem.hpp +++ b/include/kyber768_kem.hpp @@ -42,7 +42,7 @@ keygen(const uint8_t* const __restrict d, // at same SHAKE256 XOF backed KDF. // // Returned KDF can be used for deriving shared key of arbitrary bytes length. -inline shake256::shake256 +inline shake256::shake256 encapsulate(const uint8_t* const __restrict m, const uint8_t* const __restrict pubkey, uint8_t* const __restrict cipher) @@ -56,7 +56,7 @@ encapsulate(const uint8_t* const __restrict m, // derivation function). // // Returned KDF can be used for deriving shared key of arbitrary bytes length. -inline shake256::shake256 +inline shake256::shake256 decapsulate(const uint8_t* const __restrict seckey, const uint8_t* const __restrict cipher) { diff --git a/include/pke.hpp b/include/pke.hpp index bbbb550..9cdf502 100644 --- a/include/pke.hpp +++ b/include/pke.hpp @@ -34,7 +34,11 @@ keygen(const uint8_t* const __restrict d, // 32 -bytes seed // step 2 uint8_t g_out[64]{}; - sha3_512::hash(d, dlen, g_out); + + sha3_512::sha3_512 hasher; + hasher.absorb(d, dlen); + hasher.finalize(); + hasher.digest(g_out); const uint8_t* rho = g_out + 0; const uint8_t* sigma = g_out + 32; diff --git a/include/prng.hpp b/include/prng.hpp index ca1bb5e..27da074 100644 --- a/include/prng.hpp +++ b/include/prng.hpp @@ -22,7 +22,7 @@ namespace prng { struct prng_t { private: - shake256::shake256 state; + shake256::shake256 state; public: inline prng_t() @@ -41,17 +41,19 @@ struct prng_t off += sizeof(v); } - state.hash(seed, sizeof(seed)); + state.absorb(seed, sizeof(seed)); + state.finalize(); } inline explicit prng_t(const uint8_t* const seed, const size_t slen) { - state.hash(seed, slen); + state.absorb(seed, slen); + state.finalize(); } inline void read(uint8_t* const bytes, const size_t len) { - state.read(bytes, len); + state.squeeze(bytes, len); } }; diff --git a/include/sampling.hpp b/include/sampling.hpp index a989d57..260f4c9 100644 --- a/include/sampling.hpp +++ b/include/sampling.hpp @@ -19,17 +19,17 @@ namespace kyber_utils { // See algorithm 1, defined in Kyber specification // https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf inline void -parse(shake128::shake128& hasher, // Squeezes bytes +parse(shake128::shake128& hasher, // Squeezes bytes field::zq_t* const __restrict poly // Degree 255 polynomial ) { constexpr size_t n = ntt::N; size_t coeff_idx = 0; - uint8_t buf[shake128::rate / 8]; + uint8_t buf[shake128::RATE / 8]; while (coeff_idx < ntt::N) { - hasher.read(buf, sizeof(buf)); + hasher.squeeze(buf, sizeof(buf)); for (size_t off = 0; (off < sizeof(buf)) && (coeff_idx < n); off += 3) { const uint16_t d1 = (static_cast(buf[off + 1] & 0x0f) << 8) | @@ -78,8 +78,8 @@ generate_matrix(field::zq_t* const __restrict mat, } shake128::shake128 hasher{}; - hasher.hash(xof_in, sizeof(xof_in)); - + hasher.absorb(xof_in, sizeof(xof_in)); + hasher.finalize(); parse(hasher, mat + off); } } @@ -171,8 +171,9 @@ generate_vector(field::zq_t* const __restrict vec, prf_in[32] = nonce + static_cast(i); shake256::shake256 hasher{}; - hasher.hash(prf_in, sizeof(prf_in)); - hasher.read(prf_out, sizeof(prf_out)); + hasher.absorb(prf_in, sizeof(prf_in)); + hasher.finalize(); + hasher.squeeze(prf_out, sizeof(prf_out)); kyber_utils::cbd(prf_out, vec + off); } diff --git a/include/test/test_kem.hpp b/include/test/test_kem.hpp index 53c8402..37bd39c 100644 --- a/include/test/test_kem.hpp +++ b/include/test/test_kem.hpp @@ -55,8 +55,8 @@ test_kyber_kem() auto skdf = kem::encapsulate(m, pkey, cipher); auto rkdf = kem::decapsulate(skey, cipher); - skdf.read(sender_key, klen); - rkdf.read(receiver_key, klen); + skdf.squeeze(sender_key, klen); + rkdf.squeeze(receiver_key, klen); bool flg = false; for (size_t i = 0; i < klen; i++) { diff --git a/include/test/test_kem_kat.hpp b/include/test/test_kem_kat.hpp index 54dcdb6..72d4afa 100644 --- a/include/test/test_kem_kat.hpp +++ b/include/test/test_kem_kat.hpp @@ -82,8 +82,8 @@ test_kyber512_kem_kat() auto skdf = kyber512::encapsulate(___m.data(), pkey.data(), ctxt.data()); auto rkdf = kyber512::decapsulate(skey.data(), ctxt.data()); - skdf.read(shrd_sec0.data(), shrd_sec0.size()); - rkdf.read(shrd_sec1.data(), shrd_sec1.size()); + skdf.squeeze(shrd_sec0.data(), shrd_sec0.size()); + rkdf.squeeze(shrd_sec1.data(), shrd_sec1.size()); assert(std::ranges::equal(___pk, pkey)); assert(std::ranges::equal(___sk, skey)); @@ -169,8 +169,8 @@ test_kyber768_kem_kat() auto skdf = kyber768::encapsulate(___m.data(), pkey.data(), ctxt.data()); auto rkdf = kyber768::decapsulate(skey.data(), ctxt.data()); - skdf.read(shrd_sec0.data(), shrd_sec0.size()); - rkdf.read(shrd_sec1.data(), shrd_sec1.size()); + skdf.squeeze(shrd_sec0.data(), shrd_sec0.size()); + rkdf.squeeze(shrd_sec1.data(), shrd_sec1.size()); assert(std::ranges::equal(___pk, pkey)); assert(std::ranges::equal(___sk, skey)); @@ -256,8 +256,8 @@ test_kyber1024_kem_kat() auto skdf = kyber1024::encapsulate(___m.data(), pkey.data(), ctxt.data()); auto rkdf = kyber1024::decapsulate(skey.data(), ctxt.data()); - skdf.read(shrd_sec0.data(), shrd_sec0.size()); - rkdf.read(shrd_sec1.data(), shrd_sec1.size()); + skdf.squeeze(shrd_sec0.data(), shrd_sec0.size()); + rkdf.squeeze(shrd_sec1.data(), shrd_sec1.size()); assert(std::ranges::equal(___pk, pkey)); assert(std::ranges::equal(___sk, skey)); diff --git a/sha3 b/sha3 index 63231e8..e529767 160000 --- a/sha3 +++ b/sha3 @@ -1 +1 @@ -Subproject commit 63231e88fd4b7b44ce703909a8c16303cc76bc91 +Subproject commit e52976716e6550ac7ad70f8650754e1ab20df769 From 253b9c4e1048e565b3ccd7000e6de61e2f1459a0 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 16 Jul 2023 15:01:28 +0400 Subject: [PATCH 2/5] prefer allocating memory on heap using std::vector, when benchmarking Kyber KEM routines Signed-off-by: Anjan Roy --- Makefile | 10 +- {bench => benchmarks}/main.cpp | 2 +- include/{bench => benchmarks}/bench_kem.hpp | 104 ++++++++----------- include/{test => tests}/test_compression.hpp | 0 include/{test => tests}/test_field.hpp | 0 include/{test => tests}/test_kem.hpp | 0 include/{test => tests}/test_kem_kat.hpp | 0 include/{test => tests}/test_kyber.hpp | 0 include/{test => tests}/test_ntt.hpp | 0 include/{test => tests}/test_serialize.hpp | 0 {test => tests}/main.cpp | 2 +- 11 files changed, 49 insertions(+), 69 deletions(-) rename {bench => benchmarks}/main.cpp (95%) rename include/{bench => benchmarks}/bench_kem.hpp (52%) rename include/{test => tests}/test_compression.hpp (100%) rename include/{test => tests}/test_field.hpp (100%) rename include/{test => tests}/test_kem.hpp (100%) rename include/{test => tests}/test_kem_kat.hpp (100%) rename include/{test => tests}/test_kyber.hpp (100%) rename include/{test => tests}/test_ntt.hpp (100%) rename include/{test => tests}/test_serialize.hpp (100%) rename {test => tests}/main.cpp (97%) diff --git a/Makefile b/Makefile index 3853b44..c844144 100644 --- a/Makefile +++ b/Makefile @@ -4,20 +4,20 @@ OPTFLAGS = -O3 -march=native -mtune=native IFLAGS = -I ./include DEP_IFLAGS = -I ./sha3/include -I ./subtle/include -all: testing +all: test -test/a.out: test/main.cpp include/*.hpp include/test/*.hpp sha3/include/*.hpp subtle/include/*.hpp +tests/a.out: tests/main.cpp include/*.hpp include/tests/*.hpp sha3/include/*.hpp subtle/include/*.hpp $(CXX) $(CXXFLAGS) $(OPTFLAGS) $(IFLAGS) $(DEP_IFLAGS) $< -o $@ -testing: test/a.out +test: tests/a.out ./$< -bench/a.out: bench/main.cpp include/*.hpp include/bench/*.hpp sha3/include/*.hpp subtle/include/*.hpp +benchmarks/a.out: benchmarks/main.cpp include/*.hpp include/benchmarks/*.hpp sha3/include/*.hpp subtle/include/*.hpp # make sure you've google-benchmark globally installed; # see https://github.com/google/benchmark/tree/3b19d722#installation $(CXX) $(CXXFLAGS) $(OPTFLAGS) $(IFLAGS) $(DEP_IFLAGS) $< -lbenchmark -o $@ -benchmark: bench/a.out +benchmark: benchmarks/a.out ./$< --benchmark_time_unit=us --benchmark_counters_tabular=true diff --git a/bench/main.cpp b/benchmarks/main.cpp similarity index 95% rename from bench/main.cpp rename to benchmarks/main.cpp index 48e92dc..3ed8e8c 100644 --- a/bench/main.cpp +++ b/benchmarks/main.cpp @@ -1,4 +1,4 @@ -#include "bench/bench_kem.hpp" +#include "benchmarks/bench_kem.hpp" // Register for benchmarking IND-CCA2-secure Kyber Key Encapsulation Mechanism diff --git a/include/bench/bench_kem.hpp b/include/benchmarks/bench_kem.hpp similarity index 52% rename from include/bench/bench_kem.hpp rename to include/benchmarks/bench_kem.hpp index 056fd91..2ee0d2f 100644 --- a/include/bench/bench_kem.hpp +++ b/include/benchmarks/bench_kem.hpp @@ -1,7 +1,9 @@ #pragma once #include "kem.hpp" #include "utils.hpp" +#include #include +#include // Benchmark Kyber PQC suite implementation on CPU, using google-benchmark namespace bench_kyber { @@ -15,17 +17,17 @@ keygen(benchmark::State& state) constexpr size_t pklen = kyber_utils::get_kem_public_key_len(); constexpr size_t sklen = kyber_utils::get_kem_secret_key_len(); - uint8_t* d = static_cast(std::malloc(slen)); - uint8_t* z = static_cast(std::malloc(slen)); - uint8_t* pkey = static_cast(std::malloc(pklen)); - uint8_t* skey = static_cast(std::malloc(sklen)); + std::vector d(slen); + std::vector z(slen); + std::vector pkey(pklen); + std::vector skey(sklen); prng::prng_t prng; - prng.read(d, slen); - prng.read(z, slen); + prng.read(d.data(), d.size()); + prng.read(z.data(), z.size()); for (auto _ : state) { - kem::keygen(d, z, pkey, skey); + kem::keygen(d.data(), z.data(), pkey.data(), skey.data()); benchmark::DoNotOptimize(d); benchmark::DoNotOptimize(z); @@ -35,11 +37,6 @@ keygen(benchmark::State& state) } state.SetItemsProcessed(state.iterations()); - - std::free(d); - std::free(z); - std::free(pkey); - std::free(skey); } // Benchmarking IND-CCA2-secure Kyber KEM encapsulation algorithm @@ -57,26 +54,27 @@ encapsulate(benchmark::State& state) constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(); constexpr size_t klen = 32; - uint8_t* d = static_cast(std::malloc(slen)); - uint8_t* z = static_cast(std::malloc(slen)); - uint8_t* m = static_cast(std::malloc(slen)); - uint8_t* pkey = static_cast(std::malloc(pklen)); - uint8_t* skey = static_cast(std::malloc(sklen)); - uint8_t* cipher = static_cast(std::malloc(ctlen)); - uint8_t* sender_key = static_cast(std::malloc(klen)); + std::vector d(slen); + std::vector z(slen); + std::vector m(slen); + std::vector pkey(pklen); + std::vector skey(sklen); + std::vector cipher(ctlen); + std::vector sender_key(klen); prng::prng_t prng; - prng.read(d, slen); - prng.read(z, slen); + prng.read(d.data(), d.size()); + prng.read(z.data(), z.size()); - kem::keygen(d, z, pkey, skey); + kem::keygen(d.data(), z.data(), pkey.data(), skey.data()); - prng.read(m, slen); + prng.read(m.data(), m.size()); for (auto _ : state) { - auto skdf = kem::encapsulate(m, pkey, cipher); + auto skdf = kem::encapsulate( + m.data(), pkey.data(), cipher.data()); benchmark::DoNotOptimize(skdf); - skdf.squeeze(sender_key, klen); + skdf.squeeze(sender_key.data(), sender_key.size()); benchmark::DoNotOptimize(m); benchmark::DoNotOptimize(pkey); @@ -86,14 +84,6 @@ encapsulate(benchmark::State& state) } state.SetItemsProcessed(state.iterations()); - - std::free(d); - std::free(z); - std::free(m); - std::free(pkey); - std::free(skey); - std::free(cipher); - std::free(sender_key); } // Benchmarking IND-CCA2-secure Kyber KEM decapsulation algorithm @@ -111,30 +101,32 @@ decapsulate(benchmark::State& state) constexpr size_t ctlen = kyber_utils::get_kem_cipher_len(); constexpr size_t klen = 32; - uint8_t* d = static_cast(std::malloc(slen)); - uint8_t* z = static_cast(std::malloc(slen)); - uint8_t* m = static_cast(std::malloc(slen)); - uint8_t* pkey = static_cast(std::malloc(pklen)); - uint8_t* skey = static_cast(std::malloc(sklen)); - uint8_t* cipher = static_cast(std::malloc(ctlen)); - uint8_t* sender_key = static_cast(std::malloc(klen)); - uint8_t* receiver_key = static_cast(std::malloc(klen)); + std::vector d(slen); + std::vector z(slen); + std::vector m(slen); + std::vector pkey(pklen); + std::vector skey(sklen); + std::vector cipher(ctlen); + std::vector sender_key(klen); + std::vector receiver_key(klen); prng::prng_t prng; - prng.read(d, slen); - prng.read(z, slen); + prng.read(d.data(), d.size()); + prng.read(z.data(), z.size()); - kem::keygen(d, z, pkey, skey); + kem::keygen(d.data(), z.data(), pkey.data(), skey.data()); - prng.read(m, slen); + prng.read(m.data(), m.size()); - auto skdf = kem::encapsulate(m, pkey, cipher); - skdf.squeeze(sender_key, klen); + auto skdf = kem::encapsulate( + m.data(), pkey.data(), cipher.data()); + skdf.squeeze(sender_key.data(), sender_key.size()); for (auto _ : state) { - auto rkdf = kem::decapsulate(skey, cipher); + auto rkdf = + kem::decapsulate(skey.data(), cipher.data()); benchmark::DoNotOptimize(rkdf); - rkdf.squeeze(receiver_key, klen); + rkdf.squeeze(receiver_key.data(), receiver_key.size()); benchmark::DoNotOptimize(skey); benchmark::DoNotOptimize(cipher); @@ -143,19 +135,7 @@ decapsulate(benchmark::State& state) } state.SetItemsProcessed(state.iterations()); - - for (size_t i = 0; i < klen; i++) { - assert(sender_key[i] == receiver_key[i]); - } - - std::free(d); - std::free(z); - std::free(m); - std::free(pkey); - std::free(skey); - std::free(cipher); - std::free(sender_key); - std::free(receiver_key); + assert(std::ranges::equal(sender_key, receiver_key)); } } diff --git a/include/test/test_compression.hpp b/include/tests/test_compression.hpp similarity index 100% rename from include/test/test_compression.hpp rename to include/tests/test_compression.hpp diff --git a/include/test/test_field.hpp b/include/tests/test_field.hpp similarity index 100% rename from include/test/test_field.hpp rename to include/tests/test_field.hpp diff --git a/include/test/test_kem.hpp b/include/tests/test_kem.hpp similarity index 100% rename from include/test/test_kem.hpp rename to include/tests/test_kem.hpp diff --git a/include/test/test_kem_kat.hpp b/include/tests/test_kem_kat.hpp similarity index 100% rename from include/test/test_kem_kat.hpp rename to include/tests/test_kem_kat.hpp diff --git a/include/test/test_kyber.hpp b/include/tests/test_kyber.hpp similarity index 100% rename from include/test/test_kyber.hpp rename to include/tests/test_kyber.hpp diff --git a/include/test/test_ntt.hpp b/include/tests/test_ntt.hpp similarity index 100% rename from include/test/test_ntt.hpp rename to include/tests/test_ntt.hpp diff --git a/include/test/test_serialize.hpp b/include/tests/test_serialize.hpp similarity index 100% rename from include/test/test_serialize.hpp rename to include/tests/test_serialize.hpp diff --git a/test/main.cpp b/tests/main.cpp similarity index 97% rename from test/main.cpp rename to tests/main.cpp index 102dbdb..5fae5c8 100644 --- a/test/main.cpp +++ b/tests/main.cpp @@ -1,4 +1,4 @@ -#include "test/test_kyber.hpp" +#include "tests/test_kyber.hpp" #include int From 38cb9db4d1294f54d99e89cf96ce0e77d454132a Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 16 Jul 2023 15:09:51 +0400 Subject: [PATCH 3/5] update/ add MAKE recipes for ease of benchmarking Signed-off-by: Anjan Roy --- Makefile | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index c844144..2f004db 100644 --- a/Makefile +++ b/Makefile @@ -1,25 +1,33 @@ CXX = g++ -CXXFLAGS = -std=c++20 -Wall -Wextra -pedantic -OPTFLAGS = -O3 -march=native -mtune=native +CXX_FLAGS = -std=c++20 +WARN_FLAGS = -Wall -Wextra -pedantic +OPT_FLAGS = -O3 -march=native -mtune=native IFLAGS = -I ./include DEP_IFLAGS = -I ./sha3/include -I ./subtle/include all: test tests/a.out: tests/main.cpp include/*.hpp include/tests/*.hpp sha3/include/*.hpp subtle/include/*.hpp - $(CXX) $(CXXFLAGS) $(OPTFLAGS) $(IFLAGS) $(DEP_IFLAGS) $< -o $@ + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(IFLAGS) $(DEP_IFLAGS) $< -o $@ test: tests/a.out ./$< -benchmarks/a.out: benchmarks/main.cpp include/*.hpp include/benchmarks/*.hpp sha3/include/*.hpp subtle/include/*.hpp - # make sure you've google-benchmark globally installed; - # see https://github.com/google/benchmark/tree/3b19d722#installation - $(CXX) $(CXXFLAGS) $(OPTFLAGS) $(IFLAGS) $(DEP_IFLAGS) $< -lbenchmark -o $@ +benchmarks/bench.out: benchmarks/main.cpp include/*.hpp include/benchmarks/*.hpp sha3/include/*.hpp subtle/include/*.hpp + # In case you haven't built google-benchmark with libPFM support. + # More @ https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7 + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(IFLAGS) $(DEP_IFLAGS) $< -lbenchmark -lpthread -o $@ -benchmark: benchmarks/a.out +benchmark: benchmarks/bench.out ./$< --benchmark_time_unit=us --benchmark_counters_tabular=true +benchmarks/perf.out: benchmarks/main.cpp include/*.hpp include/benchmarks/*.hpp sha3/include/*.hpp subtle/include/*.hpp + # In case you've built google-benchmark with libPFM support. + # More @ https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7 + $(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(IFLAGS) $(DEP_IFLAGS) $< -lbenchmark -lpthread -lpfm -o $@ + +perf: benchmarks/perf.out + ./$< --benchmark_time_unit=us --benchmark_counters_tabular=true --benchmark_perf_counters=CYCLES clean: find . -name '*.out' -o -name '*.o' -o -name '*.so' -o -name '*.gch' | xargs rm -rf From 8d8d0e86ce3ce7c667912a98f7bdb5e997bc8105 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 16 Jul 2023 15:22:20 +0400 Subject: [PATCH 4/5] use newly introduced SHA3 reset API so that we create lesser number of hashers Signed-off-by: Anjan Roy --- include/kem.hpp | 83 +++++++++++++++++++++++-------------------------- include/ntt.hpp | 6 ++-- include/pke.hpp | 9 +++--- 3 files changed, 47 insertions(+), 51 deletions(-) diff --git a/include/kem.hpp b/include/kem.hpp index c86a29c..11f8304 100644 --- a/include/kem.hpp +++ b/include/kem.hpp @@ -91,41 +91,38 @@ encapsulate( uint8_t g_out[64]{}; uint8_t kdf_in[64]{}; - { - sha3_256::sha3_256 hasher; - hasher.absorb(m, mlen); - hasher.finalize(); - hasher.digest(g_in); - } + sha3_256::sha3_256 h256; - { - sha3_256::sha3_256 hasher; - hasher.absorb(pubkey, pklen); - hasher.finalize(); - hasher.digest(g_in + 32); - } + h256.absorb(m, mlen); + h256.finalize(); + h256.digest(g_in); + h256.reset(); - { - sha3_512::sha3_512 hasher; - hasher.absorb(g_in, sizeof(g_in)); - hasher.finalize(); - hasher.digest(g_out); - } + h256.absorb(pubkey, pklen); + h256.finalize(); + h256.digest(g_in + 32); + h256.reset(); + + sha3_512::sha3_512 h512; + + h512.absorb(g_in, sizeof(g_in)); + h512.finalize(); + h512.digest(g_out); + h512.reset(); pke::encrypt(pubkey, g_in, g_out + 32, cipher); std::memcpy(kdf_in, g_out, 32); - { - sha3_256::sha3_256 hasher; - hasher.absorb(cipher, ctlen); - hasher.finalize(); - hasher.digest(kdf_in + 32); - } - shake256::shake256 hasher{}; - hasher.absorb(kdf_in, sizeof(kdf_in)); - hasher.finalize(); - return hasher; + h256.absorb(cipher, ctlen); + h256.finalize(); + h256.digest(kdf_in + 32); + h256.reset(); + + shake256::shake256 xof256; + xof256.absorb(kdf_in, sizeof(kdf_in)); + xof256.finalize(); + return xof256; } // Given (k * 24 * 32 + 96) -bytes secret key and (k * du * 32 + dv * 32) -bytes @@ -173,12 +170,12 @@ decapsulate( pke::decrypt(seckey, cipher, g_in); std::memcpy(g_in + 32, h, 32); - { - sha3_512::sha3_512 hasher; - hasher.absorb(g_in, sizeof(g_in)); - hasher.finalize(); - hasher.digest(g_out); - } + + sha3_512::sha3_512 h512; + h512.absorb(g_in, sizeof(g_in)); + h512.finalize(); + h512.digest(g_out); + h512.reset(); pke::encrypt(pubkey, g_in, g_out + 32, c_prime); @@ -192,17 +189,15 @@ decapsulate( kdf_in[i] = subtle::ct_select(flg, g_out[i], z[i]); } - { - sha3_256::sha3_256 hasher; - hasher.absorb(cipher, ctlen); - hasher.finalize(); - hasher.digest(kdf_in + 32); - } + sha3_256::sha3_256 h256; + h256.absorb(cipher, ctlen); + h256.finalize(); + h256.digest(kdf_in + 32); - shake256::shake256 hasher; - hasher.absorb(kdf_in, sizeof(kdf_in)); - hasher.finalize(); - return hasher; + shake256::shake256 xof256; + xof256.absorb(kdf_in, sizeof(kdf_in)); + xof256.finalize(); + return xof256; } } diff --git a/include/ntt.hpp b/include/ntt.hpp index 107281a..091680b 100644 --- a/include/ntt.hpp +++ b/include/ntt.hpp @@ -102,7 +102,7 @@ constexpr std::array POLY_MUL_ζ_EXP = compute_mul_ζ(); // // Implementation inspired from // https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L69-L144 -static inline void +inline void ntt(field::zq_t* const poly) { for (size_t l = LOG2N - 1; l >= 1; l--) { @@ -139,7 +139,7 @@ ntt(field::zq_t* const poly) // // Implementation inspired from // https://github.com/itzmeanjan/falcon/blob/45b0593/include/ntt.hpp#L146-L224 -static inline void +inline void intt(field::zq_t* const poly) { for (size_t l = 1; l < LOG2N; l++) { @@ -218,7 +218,7 @@ basemul(const field::zq_t* const __restrict f, // degree-1 polynomial // g = (g0ˆ + g1ˆX, g2ˆ + g3ˆX, ..., g254ˆ + g255ˆX) // // h = f ◦ g -static inline void +inline void polymul(const field::zq_t* const __restrict f, // degree-255 polynomial const field::zq_t* const __restrict g, // degree-255 polynomial field::zq_t* const __restrict h // degree-255 polynomial diff --git a/include/pke.hpp b/include/pke.hpp index 9cdf502..7341d07 100644 --- a/include/pke.hpp +++ b/include/pke.hpp @@ -35,10 +35,11 @@ keygen(const uint8_t* const __restrict d, // 32 -bytes seed // step 2 uint8_t g_out[64]{}; - sha3_512::sha3_512 hasher; - hasher.absorb(d, dlen); - hasher.finalize(); - hasher.digest(g_out); + sha3_512::sha3_512 h512; + h512.absorb(d, dlen); + h512.finalize(); + h512.digest(g_out); + h512.reset(); const uint8_t* rho = g_out + 0; const uint8_t* sigma = g_out + 32; From 5704ea2831569351767a092a344f44f213dbe943 Mon Sep 17 00:00:00 2001 From: Anjan Roy Date: Sun, 16 Jul 2023 15:36:59 +0400 Subject: [PATCH 5/5] update project documentation, reflecting latest state of project Signed-off-by: Anjan Roy --- README.md | 90 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 4b7c875..a21adc0 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ $ g++ --version g++ (Ubuntu 12.2.0-17ubuntu1) 12.2.0 ``` -- System development utilities such as `make`, `cmake` & `git` +- Build tools such as `make`, `cmake`. ```bash $ make --version @@ -69,14 +69,11 @@ GNU Make 4.3 $ cmake --version cmake version 3.22.1 - -$ git --version -git version 2.34.1 ``` -- For benchmarking Kyber implementation, targeting CPU systems, you'll need to have `google-benchmark` header and library globally installed. I found [this](https://github.com/google/benchmark/tree/604f6fd3#installation) guide helpful. - -- For importing dependencies `sha3`, `subtle` - initialize & update git submodule after cloning this repository +- For benchmarking Kyber implementation, targeting CPU systems, you'll need to have `google-benchmark` header and library globally installed. I found [this](https://github.com/google/benchmark#installation) guide helpful. +- If you are on a machine running GNU/Linux kernel and you want to obtain CPU Cycle count for KEM routines, you should consider building `google-benchmark` library with `libPFM` support, following [this](https://gist.github.com/itzmeanjan/05dc3e946f635d00c5e0b21aae6203a7) step-by-step guide. Find more about libPFM @ https://perfmon2.sourceforge.net. +- For importing dependencies `sha3`, `subtle` - initialize & update git submodule after cloning this repository. ```bash git clone https://github.com/itzmeanjan/kyber.git @@ -106,66 +103,71 @@ make ## Benchmarking -For benchmarking Kyber KEM routines ( i.e. keygen, encaps and decaps ) for various suggested parameter sets, targeting CPU systems, you need to issue +For benchmarking Kyber KEM routines ( i.e. keygen, encaps and decaps ) for various suggested parameter sets, targeting CPU systems, you need to issue. ```bash -make benchmark +make benchmark # If you haven't built google-benchmark library with libPFM support. +make perf # If you have built google-benchmark library with libPFM support. ``` > **Note** Benchmarking expects presence of `google-benchmark` header and library in global namespace ( so that it can be found by the compiler ). -> **Warning** When benchmarking, ensure that you've disabled CPU frequency scaling, by following [this](https://github.com/google/benchmark/blob/3b19d722/docs/reducing_variance.md) guide. +> **Warning** When benchmarking, ensure that you've disabled CPU frequency scaling, by following [this](https://github.com/google/benchmark/blob/main/docs/reducing_variance.md) guide. + +> **Note** `make perf` - was issued when collecting following benchmarks. Notice, *cycles* column, denoting cost of executing Kyber KEM routines in terms of CPU cycles. Follow [this](https://github.com/google/benchmark/blob/main/docs/perf_counters.md) for more details. ### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with GCC ) ```bash -2023-06-03T11:27:13+04:00 -Running ./bench/a.out -Run on (16 X 571.333 MHz CPU s) +2023-07-16T15:32:26+04:00 +Running ./benchmarks/perf.out +Run on (16 X 1311.11 MHz CPU s) CPU Caches: L1 Data 48 KiB (x8) L1 Instruction 32 KiB (x8) L2 Unified 1280 KiB (x8) L3 Unified 18432 KiB (x1) -Load Average: 1.10, 0.64, 0.47 ----------------------------------------------------------------------------- -Benchmark Time CPU Iterations items_per_second ----------------------------------------------------------------------------- -kyber512/keygen 18.3 us 18.3 us 38106 54.622k/s -kyber512/encap 24.1 us 24.1 us 29070 41.5211k/s -kyber512/decap 29.7 us 29.7 us 23587 33.7262k/s -kyber768/keygen 31.5 us 31.5 us 22286 31.7307k/s -kyber768/encap 39.2 us 39.2 us 17844 25.5098k/s -kyber768/decap 46.7 us 46.7 us 15024 21.4321k/s -kyber1024/keygen 49.2 us 49.2 us 14232 20.326k/s -kyber1024/encap 58.8 us 58.8 us 11824 17.0102k/s -kyber1024/decap 68.5 us 68.5 us 10176 14.5951k/s +Load Average: 0.12, 0.27, 0.32 +***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads. +--------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations CYCLES items_per_second +--------------------------------------------------------------------------------------- +kyber512/keygen 18.1 us 18.1 us 38639 84.8877k 55.1314k/s +kyber512/encap 23.7 us 23.7 us 29527 111.18k 42.1187k/s +kyber512/decap 29.3 us 29.3 us 23826 137.434k 34.0758k/s +kyber768/keygen 30.9 us 30.9 us 22640 144.59k 32.3781k/s +kyber768/encap 38.8 us 38.8 us 18069 181.814k 25.7492k/s +kyber768/decap 46.2 us 46.2 us 15162 216.234k 21.6523k/s +kyber1024/keygen 47.9 us 47.9 us 14610 224.347k 20.8675k/s +kyber1024/encap 57.9 us 57.9 us 12074 271.079k 17.2612k/s +kyber1024/decap 67.9 us 67.9 us 10307 317.69k 14.7282k/s ``` ### On 12th Gen Intel(R) Core(TM) i7-1260P ( compiled with Clang ) ```bash -2023-06-03T11:27:54+04:00 -Running ./bench/a.out -Run on (16 X 4578.25 MHz CPU s) +2023-07-16T15:33:15+04:00 +Running ./benchmarks/perf.out +Run on (16 X 4371.72 MHz CPU s) CPU Caches: L1 Data 48 KiB (x8) L1 Instruction 32 KiB (x8) L2 Unified 1280 KiB (x8) L3 Unified 18432 KiB (x1) -Load Average: 0.95, 0.66, 0.48 ----------------------------------------------------------------------------- -Benchmark Time CPU Iterations items_per_second ----------------------------------------------------------------------------- -kyber512/keygen 15.4 us 15.4 us 45807 65.0474k/s -kyber512/encap 19.3 us 19.3 us 36323 51.7862k/s -kyber512/decap 23.6 us 23.6 us 29651 42.3194k/s -kyber768/keygen 25.9 us 25.9 us 26803 38.6156k/s -kyber768/encap 31.2 us 31.2 us 22373 32.0088k/s -kyber768/decap 37.5 us 37.5 us 18676 26.6759k/s -kyber1024/keygen 40.1 us 40.1 us 17344 24.9457k/s -kyber1024/encap 47.0 us 47.0 us 14881 21.2596k/s -kyber1024/decap 55.2 us 55.2 us 12730 18.1072k/s +Load Average: 0.26, 0.29, 0.33 +***WARNING*** There are 9 benchmarks with threads and 1 performance counters were requested. Beware counters will reflect the combined usage across all threads. +--------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations CYCLES items_per_second +--------------------------------------------------------------------------------------- +kyber512/keygen 15.5 us 15.5 us 44767 72.75k 64.3635k/s +kyber512/encap 19.1 us 19.1 us 36484 89.6099k 52.268k/s +kyber512/decap 23.7 us 23.7 us 29515 110.922k 42.1968k/s +kyber768/keygen 26.4 us 26.4 us 26596 123.574k 37.8928k/s +kyber768/encap 31.5 us 31.5 us 22228 147.527k 31.7306k/s +kyber768/decap 37.4 us 37.4 us 18705 175.022k 26.7379k/s +kyber1024/keygen 40.6 us 40.6 us 17351 189.919k 24.6478k/s +kyber1024/encap 46.9 us 47.0 us 14932 219.581k 21.2966k/s +kyber1024/decap 55.4 us 55.5 us 12557 259.598k 18.0263k/s ``` ## Usage @@ -219,10 +221,10 @@ main() auto rkdf = kyber512_kem::decapsulate(skey, cipher); uint8_t sender_key[32]{}; - skdf.read(sender_key, sizeof(sender_key)); + skdf.squeeze(sender_key, sizeof(sender_key)); uint8_t receiver_key[32]{}; - rkdf.read(receiver_key, sizeof(receiver_key)); + rkdf.squeeze(receiver_key, sizeof(receiver_key)); assert(std::ranges::equal(sender_key, receiver_key)); return 0;