diff --git a/benchmarks.csv b/benchmarks.csv
index 952b95f8..c614c534 100644
--- a/benchmarks.csv
+++ b/benchmarks.csv
@@ -46,13 +46,13 @@ cross-sha3-r-sdpg-1-small (10 executions),ref,290135,287741,297757,102853622,102
cross-sha3-r-sdpg-3-fast (10 executions),ref,627948,625525,637639,43573841,43565461,43582933,27513830,27493024,27525746
cross-sha3-r-sdpg-5-fast (10 executions),ref,1146280,1142409,1153794,93557878,93547167,93566329,59948216,59857434,60043852
dilithium2 (1000 executions),clean,1874167,1827645,1914566,7493877,3321630,40762756,2062795,2062255,2063222
-dilithium2 (1000 executions),m4f,1426036,1379636,1466394,3807970,1813656,18528070,1417745,1417203,1418192
+dilithium2 (1000 executions),m4f,1425723,1379410,1466445,3835095,1813682,16068642,1421307,1420219,1422056
dilithium2 (1000 executions),m4fstack,1801523,1684895,1902114,12170976,3900911,86281518,3241353,3194028,3281144
dilithium3 (1000 executions),clean,3205551,3204090,3207411,12696585,5097364,74392293,3376992,3376581,3377393
-dilithium3 (1000 executions),m4f,2515969,2514498,2517634,5884832,2917322,25268693,2411257,2410858,2411717
+dilithium3 (1000 executions),m4f,2515915,2514307,2517413,6054094,2917316,27829552,2415526,2414696,2416440
dilithium3 (1000 executions),m4fstack,3412759,3406659,3419247,23673016,6733971,145803146,5733307,5688893,5778120
dilithium5 (1000 executions),clean,5341477,5286872,5395822,15710371,7953367,75940093,5609679,5609217,5610183
-dilithium5 (1000 executions),m4f,4275029,4210286,4329519,7977781,4882524,25936176,4185417,4184925,4185896
+dilithium5 (1000 executions),m4f,4275033,4220989,4350945,8349360,4882552,29688762,4192692,4191427,4193764
dilithium5 (1000 executions),m4fstack,5816287,5474236,6115061,33452872,11170780,185259803,9912851,9845789,9981834
falcon-1024 (10 executions),clean,602066436,377135260,1488065363,136241759,136017549,136556585,1678109,1677732,1678566
falcon-1024 (10 executions),m4-ct,408725773,314885208,712370124,87706019,87549942,87839508,990541,984448,997160
@@ -200,13 +200,13 @@ cross-sha3-r-sdpg-1-small,ref,2328,466400,245512,,,,,,
cross-sha3-r-sdpg-3-fast,ref,4032,205080,108236,,,,,,
cross-sha3-r-sdpg-5-fast,ref,6824,398600,213436,,,,,,
dilithium2,clean,38304,51968,36192,,,,,,
-dilithium2,m4f,38296,49416,36220,,,,,,
+dilithium2,m4f,38296,49416,9012,,,,,,
dilithium2,m4fstack,4408,5072,2704,,,,,,
dilithium3,clean,60832,79616,57728,,,,,,
-dilithium3,m4f,60824,68864,57720,,,,,,
+dilithium3,m4f,60824,68864,9880,,,,,,
dilithium3,m4fstack,4408,6608,2704,,,,,,
dilithium5,clean,97696,122724,92940,,,,,,
-dilithium5,m4f,97688,116076,92932,,,,,,
+dilithium5,m4f,97688,116076,11944,,,,,,
dilithium5,m4fstack,4408,8136,2712,,,,,,
falcon-1024,clean,35076,84604,8776,,,,,,
falcon-1024,m4-ct,1156,2508,376,,,,,,
@@ -355,13 +355,13 @@ cross-sha3-r-sdpg-1-small,ref,71.8,74.7,78.4,,,,,,
cross-sha3-r-sdpg-3-fast,ref,71.7,68.2,68.7,,,,,,
cross-sha3-r-sdpg-5-fast,ref,71.1,66.1,66.8,,,,,,
dilithium2,clean,61.0,30.9,52.9,,,,,,
-dilithium2,m4f,79.9,60.6,76.8,,,,,,
+dilithium2,m4f,79.9,60.7,76.6,,,,,,
dilithium2,m4fstack,74.8,55.2,40.8,,,,,,
dilithium3,clean,64.7,31.3,56.8,,,,,,
-dilithium3,m4f,82.3,60.3,79.4,,,,,,
+dilithium3,m4f,82.3,60.7,79.2,,,,,,
dilithium3,m4fstack,77.1,54.6,41.0,,,,,,
dilithium5,clean,67.0,35.7,61.1,,,,,,
-dilithium5,m4f,83.5,65.0,81.7,,,,,,
+dilithium5,m4f,83.5,65.3,81.6,,,,,,
dilithium5,m4fstack,76.1,54.5,42.6,,,,,,
falcon-1024,clean,8.9,0.3,23.7,,,,,,
falcon-1024,m4-ct,8.6,0.4,32.2,,,,,,
@@ -509,13 +509,13 @@ cross-sha3-r-sdpg-1-small,ref,18846,0,208,19054,,,,,
cross-sha3-r-sdpg-3-fast,ref,19689,0,208,19897,,,,,
cross-sha3-r-sdpg-5-fast,ref,18593,0,208,18801,,,,,
dilithium2,clean,8064,0,0,8064,,,,,
-dilithium2,m4f,18596,0,0,18596,,,,,
+dilithium2,m4f,19180,0,0,19180,,,,,
dilithium2,m4fstack,24184,0,0,24184,,,,,
dilithium3,clean,7580,0,0,7580,,,,,
-dilithium3,m4f,18588,0,0,18588,,,,,
+dilithium3,m4f,19188,0,0,19188,,,,,
dilithium3,m4fstack,23448,0,0,23448,,,,,
dilithium5,clean,7808,0,0,7808,,,,,
-dilithium5,m4f,18468,0,0,18468,,,,,
+dilithium5,m4f,19096,0,0,19096,,,,,
dilithium5,m4fstack,23820,0,0,23820,,,,,
falcon-1024,clean,82703,0,0,82703,,,,,
falcon-1024,m4-ct,81825,0,79872,161697,,,,,
diff --git a/benchmarks.md b/benchmarks.md
index afb0ab01..87df4aa4 100644
--- a/benchmarks.md
+++ b/benchmarks.md
@@ -48,13 +48,13 @@
| cross-sha3-r-sdpg-3-fast (10 executions) | ref | AVG: 627,948
MIN: 625,525
MAX: 637,639 | AVG: 43,573,841
MIN: 43,565,461
MAX: 43,582,933 | AVG: 27,513,830
MIN: 27,493,024
MAX: 27,525,746 |
| cross-sha3-r-sdpg-5-fast (10 executions) | ref | AVG: 1,146,280
MIN: 1,142,409
MAX: 1,153,794 | AVG: 93,557,878
MIN: 93,547,167
MAX: 93,566,329 | AVG: 59,948,216
MIN: 59,857,434
MAX: 60,043,852 |
| dilithium2 (1000 executions) | clean | AVG: 1,874,167
MIN: 1,827,645
MAX: 1,914,566 | AVG: 7,493,877
MIN: 3,321,630
MAX: 40,762,756 | AVG: 2,062,795
MIN: 2,062,255
MAX: 2,063,222 |
-| dilithium2 (1000 executions) | m4f | AVG: 1,426,036
MIN: 1,379,636
MAX: 1,466,394 | AVG: 3,807,970
MIN: 1,813,656
MAX: 18,528,070 | AVG: 1,417,745
MIN: 1,417,203
MAX: 1,418,192 |
+| dilithium2 (1000 executions) | m4f | AVG: 1,425,723
MIN: 1,379,410
MAX: 1,466,445 | AVG: 3,835,095
MIN: 1,813,682
MAX: 16,068,642 | AVG: 1,421,307
MIN: 1,420,219
MAX: 1,422,056 |
| dilithium2 (1000 executions) | m4fstack | AVG: 1,801,523
MIN: 1,684,895
MAX: 1,902,114 | AVG: 12,170,976
MIN: 3,900,911
MAX: 86,281,518 | AVG: 3,241,353
MIN: 3,194,028
MAX: 3,281,144 |
| dilithium3 (1000 executions) | clean | AVG: 3,205,551
MIN: 3,204,090
MAX: 3,207,411 | AVG: 12,696,585
MIN: 5,097,364
MAX: 74,392,293 | AVG: 3,376,992
MIN: 3,376,581
MAX: 3,377,393 |
-| dilithium3 (1000 executions) | m4f | AVG: 2,515,969
MIN: 2,514,498
MAX: 2,517,634 | AVG: 5,884,832
MIN: 2,917,322
MAX: 25,268,693 | AVG: 2,411,257
MIN: 2,410,858
MAX: 2,411,717 |
+| dilithium3 (1000 executions) | m4f | AVG: 2,515,915
MIN: 2,514,307
MAX: 2,517,413 | AVG: 6,054,094
MIN: 2,917,316
MAX: 27,829,552 | AVG: 2,415,526
MIN: 2,414,696
MAX: 2,416,440 |
| dilithium3 (1000 executions) | m4fstack | AVG: 3,412,759
MIN: 3,406,659
MAX: 3,419,247 | AVG: 23,673,016
MIN: 6,733,971
MAX: 145,803,146 | AVG: 5,733,307
MIN: 5,688,893
MAX: 5,778,120 |
| dilithium5 (1000 executions) | clean | AVG: 5,341,477
MIN: 5,286,872
MAX: 5,395,822 | AVG: 15,710,371
MIN: 7,953,367
MAX: 75,940,093 | AVG: 5,609,679
MIN: 5,609,217
MAX: 5,610,183 |
-| dilithium5 (1000 executions) | m4f | AVG: 4,275,029
MIN: 4,210,286
MAX: 4,329,519 | AVG: 7,977,781
MIN: 4,882,524
MAX: 25,936,176 | AVG: 4,185,417
MIN: 4,184,925
MAX: 4,185,896 |
+| dilithium5 (1000 executions) | m4f | AVG: 4,275,033
MIN: 4,220,989
MAX: 4,350,945 | AVG: 8,349,360
MIN: 4,882,552
MAX: 29,688,762 | AVG: 4,192,692
MIN: 4,191,427
MAX: 4,193,764 |
| dilithium5 (1000 executions) | m4fstack | AVG: 5,816,287
MIN: 5,474,236
MAX: 6,115,061 | AVG: 33,452,872
MIN: 11,170,780
MAX: 185,259,803 | AVG: 9,912,851
MIN: 9,845,789
MAX: 9,981,834 |
| falcon-1024 (10 executions) | clean | AVG: 602,066,436
MIN: 377,135,260
MAX: 1,488,065,363 | AVG: 136,241,759
MIN: 136,017,549
MAX: 136,556,585 | AVG: 1,678,109
MIN: 1,677,732
MAX: 1,678,566 |
| falcon-1024 (10 executions) | m4-ct | AVG: 408,725,773
MIN: 314,885,208
MAX: 712,370,124 | AVG: 87,706,019
MIN: 87,549,942
MAX: 87,839,508 | AVG: 990,541
MIN: 984,448
MAX: 997,160 |
@@ -204,13 +204,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 4,032 | 205,080 | 108,236 |
| cross-sha3-r-sdpg-5-fast | ref | 6,824 | 398,600 | 213,436 |
| dilithium2 | clean | 38,304 | 51,968 | 36,192 |
-| dilithium2 | m4f | 38,296 | 49,416 | 36,220 |
+| dilithium2 | m4f | 38,296 | 49,416 | 9,012 |
| dilithium2 | m4fstack | 4,408 | 5,072 | 2,704 |
| dilithium3 | clean | 60,832 | 79,616 | 57,728 |
-| dilithium3 | m4f | 60,824 | 68,864 | 57,720 |
+| dilithium3 | m4f | 60,824 | 68,864 | 9,880 |
| dilithium3 | m4fstack | 4,408 | 6,608 | 2,704 |
| dilithium5 | clean | 97,696 | 122,724 | 92,940 |
-| dilithium5 | m4f | 97,688 | 116,076 | 92,932 |
+| dilithium5 | m4f | 97,688 | 116,076 | 11,944 |
| dilithium5 | m4fstack | 4,408 | 8,136 | 2,712 |
| falcon-1024 | clean | 35,076 | 84,604 | 8,776 |
| falcon-1024 | m4-ct | 1,156 | 2,508 | 376 |
@@ -361,13 +361,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 71.7% | 68.2% | 68.7% |
| cross-sha3-r-sdpg-5-fast | ref | 71.1% | 66.1% | 66.8% |
| dilithium2 | clean | 61.0% | 30.9% | 52.9% |
-| dilithium2 | m4f | 79.9% | 60.6% | 76.8% |
+| dilithium2 | m4f | 79.9% | 60.7% | 76.6% |
| dilithium2 | m4fstack | 74.8% | 55.2% | 40.8% |
| dilithium3 | clean | 64.7% | 31.3% | 56.8% |
-| dilithium3 | m4f | 82.3% | 61.4% | 79.4% |
+| dilithium3 | m4f | 82.3% | 60.7% | 79.2% |
| dilithium3 | m4fstack | 77.1% | 54.6% | 41.0% |
| dilithium5 | clean | 67.0% | 35.7% | 61.1% |
-| dilithium5 | m4f | 83.5% | 65.0% | 81.7% |
+| dilithium5 | m4f | 83.5% | 65.3% | 81.6% |
| dilithium5 | m4fstack | 76.1% | 54.5% | 42.6% |
| falcon-1024 | clean | 8.9% | 0.3% | 23.7% |
| falcon-1024 | m4-ct | 8.6% | 0.4% | 32.2% |
@@ -517,13 +517,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 19,689 | 0 | 208 | 19,897 |
| cross-sha3-r-sdpg-5-fast | ref | 18,593 | 0 | 208 | 18,801 |
| dilithium2 | clean | 8,064 | 0 | 0 | 8,064 |
-| dilithium2 | m4f | 18,596 | 0 | 0 | 18,596 |
+| dilithium2 | m4f | 19,180 | 0 | 0 | 19,180 |
| dilithium2 | m4fstack | 24,184 | 0 | 0 | 24,184 |
| dilithium3 | clean | 7,580 | 0 | 0 | 7,580 |
-| dilithium3 | m4f | 18,588 | 0 | 0 | 18,588 |
+| dilithium3 | m4f | 19,188 | 0 | 0 | 19,188 |
| dilithium3 | m4fstack | 23,448 | 0 | 0 | 23,448 |
| dilithium5 | clean | 7,808 | 0 | 0 | 7,808 |
-| dilithium5 | m4f | 18,468 | 0 | 0 | 18,468 |
+| dilithium5 | m4f | 19,096 | 0 | 0 | 19,096 |
| dilithium5 | m4fstack | 23,820 | 0 | 0 | 23,820 |
| falcon-1024 | clean | 82,703 | 0 | 0 | 82,703 |
| falcon-1024 | m4-ct | 81,825 | 0 | 79,872 | 161,697 |
diff --git a/crypto_sign/dilithium2/m4f/packing.c b/crypto_sign/dilithium2/m4f/packing.c
index 8aaff2a3..eb9d9a3e 100644
--- a/crypto_sign/dilithium2/m4f/packing.c
+++ b/crypto_sign/dilithium2/m4f/packing.c
@@ -2,6 +2,7 @@
#include "packing.h"
#include "polyvec.h"
#include "poly.h"
+#include
/*************************************************
* Name: pack_pk
@@ -49,6 +50,21 @@ void unpack_pk(uint8_t rho[SEEDBYTES],
polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES);
}
+/*************************************************
+* Name: unpack_pk_t1
+*
+* Description: Unpack public key pk = (rho, t1).
+*
+* Arguments: - const polyvec *t1: pointer to output vector t1
+* - const size_t idx: unpack n'th element from t1
+* - unsigned char pk[]: byte array containing bit-packed pk
+**************************************************/
+void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]) {
+ pk += SEEDBYTES;
+ polyt1_unpack(t1, pk + idx * POLYT1_PACKEDBYTES);
+}
+
+
/*************************************************
* Name: pack_sk
*
@@ -283,4 +299,92 @@ int unpack_sig(uint8_t c[CTILDEBYTES],
return 1;
return 0;
-}
\ No newline at end of file
+}
+
+/*************************************************
+* Name: unpack_sig_c
+*
+* Description: Unpack only c from signature sig = (z, h, c).
+*
+* Arguments: - poly *c: pointer to output challenge polynomial
+* - const unsigned char sig[]: byte array containing
+* bit-packed signature
+*
+* Returns 1 in case of malformed signature; otherwise 0.
+**************************************************/
+int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]) {
+ for(size_t i = 0; i < CTILDEBYTES; ++i)
+ c[i] = sig[i];
+ sig += CTILDEBYTES;
+ return 0;
+}
+
+/*************************************************
+* Name: unpack_sig_z
+*
+* Description: Unpack only z from signature sig = (z, h, c).
+*
+* Arguments: - polyvecl *z: pointer to output vector z
+* - const unsigned char sig[]: byte array containing
+* bit-packed signature
+*
+* Returns 1 in case of malformed signature; otherwise 0.
+**************************************************/
+int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]) {
+ sig += CTILDEBYTES;
+ for (size_t i = 0; i < L; ++i) {
+ polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES);
+ }
+ return 0;
+}
+
+/*************************************************
+* Name: unpack_sig_h
+*
+* Description: Unpack only h from signature sig = (z, h, c).
+*
+* Arguments: - polyveck *h: pointer to output hint vector h
+* - const unsigned char sig[]: byte array containing
+* bit-packed signature
+*
+* Returns 1 in case of malformed signature; otherwise 0.
+**************************************************/
+int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]) {
+ sig += CTILDEBYTES;
+ sig += L * POLYZ_PACKEDBYTES;
+
+ /* Decode h */
+ size_t k = 0;
+ for (size_t i = 0; i < K; ++i) {
+ for (size_t j = 0; j < N; ++j) {
+ if (i == idx) {
+ h->coeffs[j] = 0;
+ }
+ }
+
+ if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) {
+ return 1;
+ }
+
+ for (size_t j = k; j < sig[OMEGA + i]; ++j) {
+ /* Coefficients are ordered for strong unforgeability */
+ if (j > k && sig[j] <= sig[j - 1]) {
+ return 1;
+ }
+ if (i == idx) {
+ h->coeffs[sig[j]] = 1;
+ }
+ }
+
+ k = sig[OMEGA + i];
+ }
+
+ /* Extra indices are zero for strong unforgeability */
+ for (size_t j = k; j < OMEGA; ++j) {
+ if (sig[j]) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
diff --git a/crypto_sign/dilithium2/m4f/packing.h b/crypto_sign/dilithium2/m4f/packing.h
index 35553545..78ef2c2c 100644
--- a/crypto_sign/dilithium2/m4f/packing.h
+++ b/crypto_sign/dilithium2/m4f/packing.h
@@ -2,6 +2,7 @@
#define PACKING_H
#include
+#include
#include "params.h"
#include "polyvec.h"
#include "smallpoly.h"
@@ -24,6 +25,9 @@ void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const pol
#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk)
void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]);
+#define unpack_pk_t1 DILITHIUM_NAMESPACE(unpack_pk_t1)
+void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]);
+
#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk)
void unpack_sk(uint8_t rho[SEEDBYTES],
uint8_t tr[TRBYTES],
@@ -36,6 +40,15 @@ void unpack_sk(uint8_t rho[SEEDBYTES],
#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig)
int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]);
+
+#define unpack_sig_z DILITHIUM_NAMESPACE(unpack_sig_z)
+int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]);
+#define unpack_sig_h DILITHIUM_NAMESPACE(unpack_sig_h)
+int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]);
+#define unpack_sig_c DILITHIUM_NAMESPACE(unpack_sig_c)
+int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]);
+
+
#define pack_sig_c DILITHIUM_NAMESPACE(pack_sig_c)
void pack_sig_c(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES]);
diff --git a/crypto_sign/dilithium2/m4f/poly.c b/crypto_sign/dilithium2/m4f/poly.c
index 0d40fda3..654f4f23 100644
--- a/crypto_sign/dilithium2/m4f/poly.c
+++ b/crypto_sign/dilithium2/m4f/poly.c
@@ -45,6 +45,18 @@ void poly_caddq(poly *a) {
asm_caddq(a->coeffs);
}
+/*************************************************
+* Name: poly_csubq
+*
+* Description: For all coefficients of input polynomial subtract Q if
+* coefficient is bigger than Q; add Q if coefficient is negative.
+*
+* Arguments: - poly *a: pointer to input/output polynomial
+**************************************************/
+void poly_csubq(poly *a) {
+ asm_caddq(a->coeffs);
+}
+
#if 0
/*************************************************
* Name: poly_freeze
diff --git a/crypto_sign/dilithium2/m4f/poly.h b/crypto_sign/dilithium2/m4f/poly.h
index 8f8819b0..af9e7a50 100644
--- a/crypto_sign/dilithium2/m4f/poly.h
+++ b/crypto_sign/dilithium2/m4f/poly.h
@@ -12,6 +12,8 @@ typedef struct {
void poly_reduce(poly *a);
#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq)
void poly_caddq(poly *a);
+#define poly_csubq DILITHIUM_NAMESPACE(poly_csubq)
+void poly_csubq(poly *a);
#define poly_freeze DILITHIUM_NAMESPACE(poly_freeze)
void poly_freeze(poly *a);
diff --git a/crypto_sign/dilithium2/m4f/sign.c b/crypto_sign/dilithium2/m4f/sign.c
index 04bec45c..d1c5222b 100644
--- a/crypto_sign/dilithium2/m4f/sign.c
+++ b/crypto_sign/dilithium2/m4f/sign.c
@@ -225,20 +225,36 @@ int crypto_sign(uint8_t *sm,
*smlen += mlen;
return 0;
}
+/*************************************************
+ * Name: expand_mat_elem
+ *
+ * Description: Implementation of ExpandA. Generates matrix A with uniformly
+ * random coefficients a_{i,j} by performing rejection
+ * sampling on the output stream of SHAKE128(rho|i|j).
+ *
+ * Arguments: - poly mat_elem: output matrix element
+ * - const unsigned char rho[]: byte array containing seed rho
+ * - k_idx: matrix row index
+ * - l_idx: matrix col index
+ **************************************************/
+static void expand_mat_elem(poly *mat_elem, const unsigned char rho[SEEDBYTES], size_t k_idx, size_t l_idx)
+{
+ poly_uniform(mat_elem, rho, (uint16_t)((k_idx << 8) + l_idx));
+}
/*************************************************
-* Name: crypto_sign_verify
-*
-* Description: Verifies signature.
-*
-* Arguments: - uint8_t *m: pointer to input signature
-* - size_t siglen: length of signature
-* - const uint8_t *m: pointer to message
-* - size_t mlen: length of message
-* - const uint8_t *pk: pointer to bit-packed public key
-*
-* Returns 0 if signature could be verified correctly and -1 otherwise
-**************************************************/
+ * Name: crypto_sign_verify
+ *
+ * Description: Verifies signature.
+ *
+ * Arguments: - uint8_t *m: pointer to input signature
+ * - size_t siglen: length of signature
+ * - const uint8_t *m: pointer to message
+ * - size_t mlen: length of message
+ * - const uint8_t *pk: pointer to bit-packed public key
+ *
+ * Returns 0 if signature could be verified correctly and -1 otherwise
+ **************************************************/
int crypto_sign_verify(const uint8_t *sig,
size_t siglen,
const uint8_t *m,
@@ -246,23 +262,23 @@ int crypto_sign_verify(const uint8_t *sig,
const uint8_t *pk)
{
unsigned int i;
- uint8_t buf[K*POLYW1_PACKEDBYTES];
- uint8_t rho[SEEDBYTES];
+ const uint8_t *rho = pk;
uint8_t mu[CRHBYTES];
uint8_t c[CTILDEBYTES];
uint8_t c2[CTILDEBYTES];
poly cp;
- polyvecl mat[K], z;
- polyveck t1, w1, h;
+ polyvecl z;
shake256incctx state;
- if(siglen != CRYPTO_BYTES)
+ poly tmp_elem, w1_elem;
+
+ if (siglen != CRYPTO_BYTES)
return -1;
- unpack_pk(rho, &t1, pk);
- if(unpack_sig(c, &z, &h, sig))
+ if (unpack_sig_z(&z, sig) != 0) {
return -1;
- if(polyvecl_chknorm(&z, GAMMA1 - BETA))
+ }
+ if (polyvecl_chknorm(&z, GAMMA1 - BETA))
return -1;
/* Compute CRH(h(rho, t1), msg) */
@@ -273,35 +289,58 @@ int crypto_sign_verify(const uint8_t *sig,
shake256_inc_finalize(&state);
shake256_inc_squeeze(mu, CRHBYTES, &state);
+ // Hash [mu || w1'] to get c.
+ shake256_inc_init(&state);
+ shake256_inc_absorb(&state, mu, CRHBYTES);
+
/* Matrix-vector multiplication; compute Az - c2^dt1 */
+ if (unpack_sig_c(c, sig) != 0) {
+ return -1;
+ }
poly_challenge(&cp, c);
- polyvec_matrix_expand(mat, rho);
-
+ poly_ntt(&cp);
polyvecl_ntt(&z);
- polyvec_matrix_pointwise_montgomery(&w1, mat, &z);
- poly_ntt(&cp);
- polyveck_shiftl(&t1);
- polyveck_ntt(&t1);
- polyveck_pointwise_poly_montgomery(&t1, &cp, &t1);
- polyveck_sub(&w1, &w1, &t1);
- polyveck_reduce(&w1);
- polyveck_invntt_tomont(&w1);
+ for (size_t k_idx = 0; k_idx < K; k_idx++)
+ {
+ // Sample the current element from A.
+ expand_mat_elem(&tmp_elem, rho, k_idx, 0);
+ poly_pointwise_montgomery(&w1_elem, &tmp_elem, &z.vec[0]);
+
+ for (size_t l_idx = 1; l_idx < L; l_idx++)
+ {
+ // Sample the element from A.
+ expand_mat_elem(&tmp_elem, rho, k_idx, l_idx);
+ poly_pointwise_acc_montgomery(&w1_elem, &tmp_elem, &z.vec[l_idx]);
+ }
+
+ // Subtract c*(t1_{k_idx} * 2^d)
+ unpack_pk_t1(&tmp_elem, k_idx, pk);
+ poly_shiftl(&tmp_elem);
+ poly_ntt(&tmp_elem);
+ poly_pointwise_montgomery(&tmp_elem, &cp, &tmp_elem);
+ poly_sub(&w1_elem, &w1_elem, &tmp_elem);
+ poly_reduce(&w1_elem);
+ poly_invntt_tomont(&w1_elem);
+
+ // Reconstruct w1
+ poly_csubq(&w1_elem);
+ if (unpack_sig_h(&tmp_elem, k_idx, sig) != 0) {
+ return -1;
+ }
+ poly_use_hint(&w1_elem, &w1_elem, &tmp_elem);
+ uint8_t w1_packed[POLYW1_PACKEDBYTES];
+ polyw1_pack(w1_packed, &w1_elem);
+ shake256_inc_absorb(&state, w1_packed, POLYW1_PACKEDBYTES);
+ }
- /* Reconstruct w1 */
- polyveck_caddq(&w1);
- polyveck_use_hint(&w1, &w1, &h);
- polyveck_pack_w1(buf, &w1);
/* Call random oracle and verify challenge */
- shake256_inc_init(&state);
- shake256_inc_absorb(&state, mu, CRHBYTES);
- shake256_inc_absorb(&state, buf, K*POLYW1_PACKEDBYTES);
shake256_inc_finalize(&state);
shake256_inc_squeeze(c2, CTILDEBYTES, &state);
- for(i = 0; i < CTILDEBYTES; ++i)
- if(c[i] != c2[i])
+ for (i = 0; i < CTILDEBYTES; ++i)
+ if (c[i] != c2[i])
return -1;
return 0;
diff --git a/crypto_sign/dilithium2/m4f/vector.h b/crypto_sign/dilithium2/m4f/vector.h
index e5c5dda3..183ddc83 100644
--- a/crypto_sign/dilithium2/m4f/vector.h
+++ b/crypto_sign/dilithium2/m4f/vector.h
@@ -10,6 +10,8 @@ void asm_reduce32(int32_t a[N]);
void small_asm_reduce32_central(int32_t a[N]);
#define asm_caddq DILITHIUM_NAMESPACE(asm_caddq)
void asm_caddq(int32_t a[N]);
+#define asm_csubq DILITHIUM_NAMESPACE(asm_csubq)
+void asm_csubq(int32_t a[N]);
#define asm_freeze DILITHIUM_NAMESPACE(asm_freeze)
void asm_freeze(int32_t a[N]);
#define asm_rej_uniform DILITHIUM_NAMESPACE(asm_rej_uniform)
diff --git a/crypto_sign/dilithium2/m4f/vector.s b/crypto_sign/dilithium2/m4f/vector.s
index 559f11b0..a393c914 100644
--- a/crypto_sign/dilithium2/m4f/vector.s
+++ b/crypto_sign/dilithium2/m4f/vector.s
@@ -169,6 +169,59 @@ pqcrystals_dilithium_asm_caddq:
bx lr
.size pqcrystals_dilithium_asm_caddq, .-pqcrystals_dilithium_asm_caddq
+.macro csubq a, tmp, q
+ cmp.n \a, \q
+ it ge
+ subge.w \a, \a, \q
+ cmp \a, #0
+ it mi
+ addmi.w \a, \a, \q
+.endm
+
+// void asm_csubq(int32_t a[N]);
+.global pqcrystals_dilithium_asm_csubq
+.type pqcrystals_dilithium_asm_csubq, %function
+.align 2
+pqcrystals_dilithium_asm_csubq:
+ push {r4-r10}
+
+ movw r12,#:lower16:8380417
+ movt r12,#:upper16:8380417
+
+ movw r10, #32
+ 1:
+ ldr.w r1, [r0]
+ ldr.w r2, [r0, #1*4]
+ ldr.w r3, [r0, #2*4]
+ ldr.w r4, [r0, #3*4]
+ ldr.w r5, [r0, #4*4]
+ ldr.w r6, [r0, #5*4]
+ ldr.w r7, [r0, #6*4]
+ ldr.w r8, [r0, #7*4]
+
+ csubq r1, r9, r12
+ csubq r2, r9, r12
+ csubq r3, r9, r12
+ csubq r4, r9, r12
+ csubq r5, r9, r12
+ csubq r6, r9, r12
+ csubq r7, r9, r12
+ csubq r8, r9, r12
+
+ str.w r2, [r0, #1*4]
+ str.w r3, [r0, #2*4]
+ str.w r4, [r0, #3*4]
+ str.w r5, [r0, #4*4]
+ str.w r6, [r0, #5*4]
+ str.w r7, [r0, #6*4]
+ str.w r8, [r0, #7*4]
+ str r1, [r0], #8*4
+ subs r10, #1
+ bne.w 1b
+
+ pop {r4-r10}
+ bx lr
+.size pqcrystals_dilithium_asm_csubq, .-pqcrystals_dilithium_asm_csubq
// asm_rej_uniform(int32_t *a,unsigned int len,const unsigned char *buf, unsigned int buflen);
.global pqcrystals_dilithium_asm_rej_uniform