diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 13fe73a0f7..8a2f2c1781 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -70518,7 +70518,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_8(r->x, p256_mod); @@ -70527,7 +70527,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_8(r->y, p256_mod); @@ -70536,7 +70536,6 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -70839,6 +70838,61 @@ static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_ * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_8(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_8(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_8(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_8(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_8(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_8(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_8(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_8_ctx { int state; @@ -70849,6 +70903,12 @@ typedef struct sp_256_proj_point_dbl_8_ctx { sp_digit* z; } sp_256_proj_point_dbl_8_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -70973,62 +71033,6 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_8(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_8(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_8(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_8(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_8(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_8(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -71054,6 +71058,7 @@ static int sp_256_iszero_8(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -71061,6 +71066,81 @@ static int sp_256_iszero_8(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* t6 = t + 10*8; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(t2, t1) & + sp_256_cmp_equal_8(t4, t3)) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t3, y, p256_mod); + sp_256_mont_sub_8(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_8_ctx { @@ -71079,6 +71159,13 @@ typedef struct sp_256_proj_point_add_8_ctx { sp_digit* z; } sp_256_proj_point_add_8_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -71110,252 +71197,149 @@ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_8(ctx->t1, p256_mod, q->y); - sp_256_norm_8(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_8(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_8(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_8(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(ctx->t2, ctx->t1) & + sp_256_cmp_equal_8(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_8(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_8(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_8(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_8(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* t3 = t + 4*8; - sp_digit* t4 = t + 6*8; - sp_digit* t5 = t + 8*8; - sp_digit* t6 = t + 10*8; - - - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { - sp_256_proj_point_dbl_8(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_8(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_8(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { - sp_256_proj_point_dbl_8(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, x, t5, p256_mod); - sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(t3, y, p256_mod); - sp_256_mont_sub_8(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, y, t5, p256_mod); - - for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -71643,7 +71627,6 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -71661,7 +71644,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_8(t2, b, x, p256_mod); sp_256_mont_dbl_lower_8(b, t2, p256_mod); /* Z = Z*Y */ @@ -71691,7 +71674,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_8(t2, b, x, p256_mod); sp_256_mont_dbl_lower_8(b, t2, p256_mod); /* Z = Z*Y */ @@ -71701,7 +71684,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_8(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_8(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_8(y, y, p256_mod); } @@ -71746,8 +71729,8 @@ typedef struct sp_table_entry_256 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*8; @@ -71756,12 +71739,17 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, sp_digit* t5 = t + 8*8; sp_digit* t6 = t + 10*8; - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(p->x, t2) & + sp_256_cmp_equal_8(p->y, t4)) { sp_256_proj_point_dbl_8(r, p, t); } else { @@ -71773,12 +71761,6 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_8(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -75856,7 +75838,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_8(r); - if (sp_256_iszero_8(r) == 0) { + if (!sp_256_iszero_8(r)) { /* x is modified in calculation of s. */ sp_256_from_mp(x, 8, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -75865,7 +75847,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_256_calc_s_8(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_8(s) == 0)) { + if ((err == MP_OKAY) && (!sp_256_iszero_8(s))) { break; } } @@ -87956,7 +87938,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_12(r->x, p384_mod); @@ -87965,7 +87947,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_12(r->y, p384_mod); @@ -87974,7 +87956,6 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -88247,6 +88228,61 @@ static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_12(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_12(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_12(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_12(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_12(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_12(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_12(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_12_ctx { int state; @@ -88257,6 +88293,12 @@ typedef struct sp_384_proj_point_dbl_12_ctx { sp_digit* z; } sp_384_proj_point_dbl_12_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -88381,62 +88423,6 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_12(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_12(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_12(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_12(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_12(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_12(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_12(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -88464,6 +88450,7 @@ static int sp_384_iszero_12(const sp_digit* a) a[8] | a[9] | a[10] | a[11]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -88471,6 +88458,81 @@ static int sp_384_iszero_12(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* t6 = t + 10*12; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(t2, t1) & + sp_384_cmp_equal_12(t4, t3)) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t3, y, p384_mod); + sp_384_mont_sub_12(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_12_ctx { @@ -88489,6 +88551,13 @@ typedef struct sp_384_proj_point_add_12_ctx { sp_digit* z; } sp_384_proj_point_add_12_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -88520,252 +88589,149 @@ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_12(ctx->t1, p384_mod, q->y); - sp_384_norm_12(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_12(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_12(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_12(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(ctx->t2, ctx->t1) & + sp_384_cmp_equal_12(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_12(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_12(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_12(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_12(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* t3 = t + 4*12; - sp_digit* t4 = t + 6*12; - sp_digit* t5 = t + 8*12; - sp_digit* t6 = t + 10*12; - - - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { - sp_384_proj_point_dbl_12(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_12(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_12(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { - sp_384_proj_point_dbl_12(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, x, t5, p384_mod); - sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(t3, y, p384_mod); - sp_384_mont_sub_12(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, y, t5, p384_mod); - - for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -89077,7 +89043,6 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -89095,7 +89060,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_12(t2, b, x, p384_mod); sp_384_mont_dbl_lower_12(b, t2, p384_mod); /* Z = Z*Y */ @@ -89125,7 +89090,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_12(t2, b, x, p384_mod); sp_384_mont_dbl_lower_12(b, t2, p384_mod); /* Z = Z*Y */ @@ -89135,7 +89100,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_12(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_12(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_12(y, y, p384_mod); } @@ -89180,8 +89145,8 @@ typedef struct sp_table_entry_384 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*12; @@ -89190,12 +89155,17 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, sp_digit* t5 = t + 8*12; sp_digit* t6 = t + 10*12; - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(p->x, t2) & + sp_384_cmp_equal_12(p->y, t4)) { sp_384_proj_point_dbl_12(r, p, t); } else { @@ -89207,12 +89177,6 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_12(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -93470,7 +93434,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_12(r); - if (sp_384_iszero_12(r) == 0) { + if (!sp_384_iszero_12(r)) { /* x is modified in calculation of s. */ sp_384_from_mp(x, 12, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -93479,7 +93443,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_384_calc_s_12(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_12(s) == 0)) { + if ((err == MP_OKAY) && (!sp_384_iszero_12(s))) { break; } } @@ -114610,7 +114574,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_17(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 17, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 17, 0, sizeof(sp_digit) * 17U); sp_521_mont_reduce_17(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_17(r->x, p521_mod); @@ -114619,7 +114583,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_17(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 17, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 17, 0, sizeof(sp_digit) * 17U); sp_521_mont_reduce_17(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_17(r->y, p521_mod); @@ -114628,7 +114592,6 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -115079,6 +115042,61 @@ static void sp_521_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_17(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_17(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_17(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_17(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_17(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_17(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_17(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_17(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_17(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_17(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_17(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_17(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_17(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_17_ctx { int state; @@ -115089,6 +115107,12 @@ typedef struct sp_521_proj_point_dbl_17_ctx { sp_digit* z; } sp_521_proj_point_dbl_17_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -115213,62 +115237,6 @@ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*17; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_17(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_17(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_17(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_17(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_17(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_17(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_17(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_17(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_17(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_17(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_17(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_17(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_17(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_17(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_17(y, y, t2, p521_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -115299,6 +115267,7 @@ static int sp_521_iszero_17(const sp_digit* a) a[16]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -115306,6 +115275,81 @@ static int sp_521_iszero_17(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_digit* t3 = t + 4*17; + sp_digit* t4 = t + 6*17; + sp_digit* t5 = t + 8*17; + sp_digit* t6 = t + 10*17; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(t2, t1) & + sp_521_cmp_equal_17(t4, t3)) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_17(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_17(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(x, x, t5, p521_mod); + sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t3, y, p521_mod); + sp_521_mont_sub_17(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t5, p521_mod); + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_17_ctx { @@ -115324,6 +115368,13 @@ typedef struct sp_521_proj_point_add_17_ctx { sp_digit* z; } sp_521_proj_point_add_17_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -115355,252 +115406,149 @@ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_17(ctx->t1, p521_mod, q->y); - sp_521_norm_17(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_17_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_17(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_17(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_17(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_17(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_17(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_17(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_17(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(ctx->t2, ctx->t1) & + sp_521_cmp_equal_17(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_17(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_17(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_17(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_17(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_17(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_17(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_17(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_17(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_17(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_17(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_17(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_17(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_17(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*17; - sp_digit* t3 = t + 4*17; - sp_digit* t4 = t + 6*17; - sp_digit* t5 = t + 8*17; - sp_digit* t6 = t + 10*17; - - - /* Check double */ - (void)sp_521_sub_17(t1, p521_mod, q->y); - sp_521_norm_17(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { - sp_521_proj_point_dbl_17(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_17(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_17(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_17(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { - sp_521_proj_point_dbl_17(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(x, x, t5, p521_mod); - sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_17(t3, y, p521_mod); - sp_521_mont_sub_17(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(y, y, t5, p521_mod); - - for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -115946,7 +115894,6 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_17(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_17(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -115964,7 +115911,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_17(t2, b, p521_mod); sp_521_mont_sub_17(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_17(t2, b, x, p521_mod); sp_521_mont_dbl_lower_17(b, t2, p521_mod); /* Z = Z*Y */ @@ -115994,7 +115941,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_17(t2, b, p521_mod); sp_521_mont_sub_17(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_17(t2, b, x, p521_mod); sp_521_mont_dbl_lower_17(b, t2, p521_mod); /* Z = Z*Y */ @@ -116004,7 +115951,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_17(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_17(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_17(y, y, p521_mod); } @@ -116049,8 +115996,8 @@ typedef struct sp_table_entry_521 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*17; @@ -116059,12 +116006,17 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, sp_digit* t5 = t + 8*17; sp_digit* t6 = t + 10*17; - /* Check double */ - (void)sp_521_sub_17(t1, p521_mod, q->y); - sp_521_norm_17(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(p->x, t2) & + sp_521_cmp_equal_17(p->y, t4)) { sp_521_proj_point_dbl_17(r, p, t); } else { @@ -116076,12 +116028,6 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_17(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -121612,7 +121558,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_521_norm_17(r); - if (sp_521_iszero_17(r) == 0) { + if (!sp_521_iszero_17(r)) { /* x is modified in calculation of s. */ sp_521_from_mp(x, 17, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -121626,7 +121572,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_521_calc_s_17(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_521_iszero_17(s) == 0)) { + if ((err == MP_OKAY) && (!sp_521_iszero_17(s))) { break; } } @@ -143695,7 +143641,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_32(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 32, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_32(r->x, p1024_mod); @@ -143704,7 +143650,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_32(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 32, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_32(r->y, p1024_mod); @@ -143713,7 +143659,6 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -144860,6 +144805,61 @@ static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*32; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_32(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_32(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_32(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_32(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_32(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_32_ctx { int state; @@ -144870,6 +144870,12 @@ typedef struct sp_1024_proj_point_dbl_32_ctx { sp_digit* z; } sp_1024_proj_point_dbl_32_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -144994,62 +145000,6 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_32(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_32(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_32(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_32(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_32(y, y, t2, p1024_mod); -} - #ifdef WOLFSSL_SP_SMALL /* Sub b from a into r. (r = a - b) * @@ -145202,6 +145152,7 @@ static int sp_1024_iszero_32(const sp_digit* a) a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -145209,6 +145160,81 @@ static int sp_1024_iszero_32(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*32; + sp_digit* t3 = t + 4*32; + sp_digit* t4 = t + 6*32; + sp_digit* t5 = t + 8*32; + sp_digit* t6 = t + 10*32; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(t2, t1) & + sp_1024_cmp_equal_32(t4, t3)) { + sp_1024_proj_point_dbl_32(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(x, x, t5, p1024_mod); + sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_32(t3, y, p1024_mod); + sp_1024_mont_sub_32(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, y, t5, p1024_mod); + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_32_ctx { @@ -145227,6 +145253,13 @@ typedef struct sp_1024_proj_point_add_32_ctx { sp_digit* z; } sp_1024_proj_point_add_32_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -145258,252 +145291,149 @@ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y); - sp_1024_norm_32(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_32_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_32(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_32(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_32(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_32(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_32(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_32(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_32(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_32(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* t3 = t + 4*32; - sp_digit* t4 = t + 6*32; - sp_digit* t5 = t + 8*32; - sp_digit* t6 = t + 10*32; - - - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_32(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { - sp_1024_proj_point_dbl_32(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, x, t5, p1024_mod); - sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(t3, y, p1024_mod); - sp_1024_mont_sub_32(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, y, t5, p1024_mod); - - for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. * @@ -145679,7 +145609,6 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_32(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_32(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -145697,7 +145626,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_32(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_32(b, t2, p1024_mod); /* Z = Z*Y */ @@ -145727,7 +145656,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_32(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_32(b, t2, p1024_mod); /* Z = Z*Y */ @@ -145737,7 +145666,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_32(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_32(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_32(y, y, p1024_mod); } @@ -145782,8 +145711,8 @@ typedef struct sp_table_entry_1024 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*32; @@ -145792,12 +145721,17 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* sp_digit* t5 = t + 8*32; sp_digit* t6 = t + 10*32; - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(p->x, t2) & + sp_1024_cmp_equal_32(p->y, t4)) { sp_1024_proj_point_dbl_32(r, p, t); } else { @@ -145809,12 +145743,6 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_32(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 97d121d49c..6a33f1c892 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -23227,7 +23227,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_4(r->x, p256_mod); @@ -23236,7 +23236,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_4(r->y, p256_mod); @@ -23245,7 +23245,6 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -23437,6 +23436,61 @@ static void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_4(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_4(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_4(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_4(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_4(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_4(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_4(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_4(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_4_ctx { int state; @@ -23447,6 +23501,12 @@ typedef struct sp_256_proj_point_dbl_4_ctx { sp_digit* z; } sp_256_proj_point_dbl_4_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -23571,62 +23631,6 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_4(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_4(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_4(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_4(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_4(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_4(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_4(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_4(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_4(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_4(y, y, t2, p256_mod); -} - #define sp_256_mont_tpl_lower_4 sp_256_mont_tpl_4 /* Subtract two Montgomery form numbers (r = a - b % m). * @@ -23750,7 +23754,6 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -23767,7 +23770,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); @@ -23795,7 +23798,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); @@ -23804,7 +23807,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_4(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_4(y, y, p256_mod); } @@ -23833,6 +23836,7 @@ static int sp_256_iszero_4(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -23840,6 +23844,80 @@ static int sp_256_iszero_4(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* t6 = t + 10*4; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(t2, t1) & + sp_256_cmp_equal_4(t4, t3)) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_4(x, x, y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + for (i = 0; i < 4; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_4_ctx { @@ -23858,6 +23936,13 @@ typedef struct sp_256_proj_point_add_4_ctx { sp_digit* z; } sp_256_proj_point_add_4_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -23889,251 +23974,148 @@ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_4(ctx->t1, p256_mod, q->y); - sp_256_norm_4(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_4_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_4(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_4(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_4(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_4(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(ctx->t2, ctx->t1) & + sp_256_cmp_equal_4(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_4(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_4(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_sub_dbl_4(ctx->x, ctx->x, ctx->y, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_4(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_4(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_4(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_4(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_4(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_4(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* t3 = t + 4*4; - sp_digit* t4 = t + 6*4; - sp_digit* t5 = t + 8*4; - sp_digit* t6 = t + 10*4; - - - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { - sp_256_proj_point_dbl_4(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_4(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_4(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_4(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { - sp_256_proj_point_dbl_4(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(x, x, t5, p256_mod); - sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_4(x, x, y, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_4(y, y, x, p256_mod); - sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(y, y, t5, p256_mod); - - for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -24184,7 +24166,7 @@ static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(r[j].z, z, y, p256_mod, p256_mp_mod); @@ -24198,7 +24180,6 @@ static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_4(y, y, t1, p256_mod); - /* Y = Y/2 */ sp_256_div2_4(r[j].y, y, p256_mod); r[j].infinity = 0; @@ -24560,8 +24541,8 @@ typedef struct sp_table_entry_256 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*4; @@ -24570,12 +24551,17 @@ static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, sp_digit* t5 = t + 8*4; sp_digit* t6 = t + 10*4; - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(p->x, t2) & + sp_256_cmp_equal_4(p->y, t4)) { sp_256_proj_point_dbl_4(r, p, t); } else { @@ -24587,12 +24573,6 @@ static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_4(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -40604,7 +40584,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_4(r); - if (sp_256_iszero_4(r) == 0) { + if (!sp_256_iszero_4(r)) { /* x is modified in calculation of s. */ sp_256_from_mp(x, 4, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -40613,7 +40593,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_256_calc_s_4(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_4(s) == 0)) { + if ((err == MP_OKAY) && (!sp_256_iszero_4(s))) { break; } } @@ -43547,7 +43527,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_6(r->x, p384_mod); @@ -43556,7 +43536,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_6(r->y, p384_mod); @@ -43565,7 +43545,6 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -43754,6 +43733,61 @@ static void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_6(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_6(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_6(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_6(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_6(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_6(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_6(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_6(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_6_ctx { int state; @@ -43764,6 +43798,12 @@ typedef struct sp_384_proj_point_dbl_6_ctx { sp_digit* z; } sp_384_proj_point_dbl_6_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -43888,62 +43928,6 @@ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_6(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_6(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_6(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_6(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_6(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_6(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_6(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_6(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_6(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_6(y, y, t2, p384_mod); -} - #define sp_384_mont_dbl_lower_6 sp_384_mont_dbl_6 #define sp_384_mont_tpl_lower_6 sp_384_mont_tpl_6 /* Double the Montgomery form projective point p a number of times. @@ -43975,7 +43959,6 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -43993,7 +43976,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -44023,7 +44006,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -44033,7 +44016,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_6(y, y, p384_mod); } @@ -44062,6 +44045,7 @@ static int sp_384_iszero_6(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -44069,6 +44053,81 @@ static int sp_384_iszero_6(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* t6 = t + 10*6; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(t2, t1) & + sp_384_cmp_equal_6(t4, t3)) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t3, y, p384_mod); + sp_384_mont_sub_6(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_6_ctx { @@ -44087,6 +44146,13 @@ typedef struct sp_384_proj_point_add_6_ctx { sp_digit* z; } sp_384_proj_point_add_6_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -44118,252 +44184,149 @@ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_6(ctx->t1, p384_mod, q->y); - sp_384_norm_6(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_6_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_6(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_6(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_6(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_6(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(ctx->t2, ctx->t1) & + sp_384_cmp_equal_6(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_6(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_6(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_6(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_6(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_6(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_6(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_6(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_6(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_6(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* t3 = t + 4*6; - sp_digit* t4 = t + 6*6; - sp_digit* t5 = t + 8*6; - sp_digit* t6 = t + 10*6; - - - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { - sp_384_proj_point_dbl_6(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_6(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_6(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_6(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { - sp_384_proj_point_dbl_6(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(x, x, t5, p384_mod); - sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(t3, y, p384_mod); - sp_384_mont_sub_6(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_6(y, y, x, p384_mod); - sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(y, y, t5, p384_mod); - - for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -44416,7 +44379,7 @@ static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -44431,7 +44394,6 @@ static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); - /* Y = Y/2 */ sp_384_div2_6(r[j].y, y, p384_mod); r[j].infinity = 0; @@ -44805,8 +44767,8 @@ typedef struct sp_table_entry_384 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*6; @@ -44815,12 +44777,17 @@ static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, sp_digit* t5 = t + 8*6; sp_digit* t6 = t + 10*6; - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(p->x, t2) & + sp_384_cmp_equal_6(p->y, t4)) { sp_384_proj_point_dbl_6(r, p, t); } else { @@ -44832,12 +44799,6 @@ static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_6(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -66728,7 +66689,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_6(r); - if (sp_384_iszero_6(r) == 0) { + if (!sp_384_iszero_6(r)) { /* x is modified in calculation of s. */ sp_384_from_mp(x, 6, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -66737,7 +66698,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_384_calc_s_6(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_6(s) == 0)) { + if ((err == MP_OKAY) && (!sp_384_iszero_6(s))) { break; } } @@ -71694,7 +71655,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_9(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 9, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); @@ -71703,7 +71664,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_9(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 9, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); @@ -71712,7 +71673,6 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -72069,6 +72029,61 @@ static void sp_521_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_9(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_9(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_9(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_9(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_9(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_9(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_9(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_9_ctx { int state; @@ -72079,6 +72094,12 @@ typedef struct sp_521_proj_point_dbl_9_ctx { sp_digit* z; } sp_521_proj_point_dbl_9_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -72203,62 +72224,6 @@ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_9(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_9(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_9(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_9(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_9(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_9(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_9(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_9(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_9(y, y, t2, p521_mod); -} - #define sp_521_mont_dbl_lower_9 sp_521_mont_dbl_9 #define sp_521_mont_tpl_lower_9 sp_521_mont_tpl_9 /* Double the Montgomery form projective point p a number of times. @@ -72290,7 +72255,6 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_9(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_9(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -72308,7 +72272,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -72338,7 +72302,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -72348,7 +72312,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_9(y, y, p521_mod); } @@ -72379,6 +72343,7 @@ static int sp_521_iszero_9(const sp_digit* a) a[8]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -72386,6 +72351,81 @@ static int sp_521_iszero_9(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* t3 = t + 4*9; + sp_digit* t4 = t + 6*9; + sp_digit* t5 = t + 8*9; + sp_digit* t6 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(t2, t1) & + sp_521_cmp_equal_9(t4, t3)) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_9_ctx { @@ -72404,6 +72444,13 @@ typedef struct sp_521_proj_point_add_9_ctx { sp_digit* z; } sp_521_proj_point_add_9_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -72435,252 +72482,149 @@ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_9(ctx->t1, p521_mod, q->y); - sp_521_norm_9(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_9_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(ctx->t2, ctx->t1) & + sp_521_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_9(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* t3 = t + 4*9; - sp_digit* t4 = t + 6*9; - sp_digit* t5 = t + 8*9; - sp_digit* t6 = t + 10*9; - - - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { - sp_521_proj_point_dbl_9(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_9(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_9(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { - sp_521_proj_point_dbl_9(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(x, x, t5, p521_mod); - sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_9(t3, y, p521_mod); - sp_521_mont_sub_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(y, y, t5, p521_mod); - - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -72733,7 +72677,7 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -72748,7 +72692,6 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); - /* Y = Y/2 */ sp_521_div2_9(r[j].y, y, p521_mod); r[j].infinity = 0; @@ -73140,8 +73083,8 @@ typedef struct sp_table_entry_521 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*9; @@ -73150,12 +73093,17 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, sp_digit* t5 = t + 8*9; sp_digit* t6 = t + 10*9; - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(p->x, t2) & + sp_521_cmp_equal_9(p->y, t4)) { sp_521_proj_point_dbl_9(r, p, t); } else { @@ -73167,12 +73115,6 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_9(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -111550,7 +111492,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_521_norm_9(r); - if (sp_521_iszero_9(r) == 0) { + if (!sp_521_iszero_9(r)) { /* x is modified in calculation of s. */ sp_521_from_mp(x, 9, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -111564,7 +111506,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_521_calc_s_9(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_521_iszero_9(s) == 0)) { + if ((err == MP_OKAY) && (!sp_521_iszero_9(s))) { break; } } @@ -115415,7 +115357,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_16(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 16, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_16(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_16(r->x, p1024_mod); @@ -115424,7 +115366,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_16(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 16, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_16(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_16(r->y, p1024_mod); @@ -115433,7 +115375,6 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -116083,6 +116024,61 @@ static void sp_1024_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*16; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_16(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_16(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_16(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_16(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_16(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_16(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_16(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_16(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_16(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_16(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_16(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_16(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_16(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_16(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_16_ctx { int state; @@ -116093,6 +116089,12 @@ typedef struct sp_1024_proj_point_dbl_16_ctx { sp_digit* z; } sp_1024_proj_point_dbl_16_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -116217,62 +116219,6 @@ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_16(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_16(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_16(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_16(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_16(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_16(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_16(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_16(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_16(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_16(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_16(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_16(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_16(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_16(y, y, t2, p1024_mod); -} - #define sp_1024_mont_dbl_lower_16 sp_1024_mont_dbl_16 #define sp_1024_mont_tpl_lower_16 sp_1024_mont_tpl_16 /* Double the Montgomery form projective point p a number of times. @@ -116304,7 +116250,6 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_16(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_16(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -116322,7 +116267,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -116352,7 +116297,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -116362,7 +116307,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_16(y, y, p1024_mod); } @@ -116494,6 +116439,7 @@ static int sp_1024_iszero_16(const sp_digit* a) a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -116501,6 +116447,81 @@ static int sp_1024_iszero_16(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*16; + sp_digit* t3 = t + 4*16; + sp_digit* t4 = t + 6*16; + sp_digit* t5 = t + 8*16; + sp_digit* t6 = t + 10*16; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_16(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(t2, t1) & + sp_1024_cmp_equal_16(t4, t3)) { + sp_1024_proj_point_dbl_16(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(x, x, t5, p1024_mod); + sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_16(t3, y, p1024_mod); + sp_1024_mont_sub_16(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); + sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(y, y, t5, p1024_mod); + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_16_ctx { @@ -116519,6 +116540,13 @@ typedef struct sp_1024_proj_point_add_16_ctx { sp_digit* z; } sp_1024_proj_point_add_16_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -116550,252 +116578,149 @@ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_16(ctx->t1, p1024_mod, q->y); - sp_1024_norm_16(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_16(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_16_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_16(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_16(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_16(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_16(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_16(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_16(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_16(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_16(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_16(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_16(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_16(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_16(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_16(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* t3 = t + 4*16; - sp_digit* t4 = t + 6*16; - sp_digit* t5 = t + 8*16; - sp_digit* t6 = t + 10*16; - - - /* Check double */ - (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_16(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_16(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { - sp_1024_proj_point_dbl_16(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(x, x, t5, p1024_mod); - sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(t3, y, p1024_mod); - sp_1024_mont_sub_16(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); - sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(y, y, t5, p1024_mod); - - for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -116848,7 +116773,7 @@ static void sp_1024_proj_point_dbl_n_store_16(sp_point_1024* r, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -116863,7 +116788,6 @@ static void sp_1024_proj_point_dbl_n_store_16(sp_point_1024* r, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); - /* Y = Y/2 */ sp_1024_div2_16(r[j].y, y, p1024_mod); r[j].infinity = 0; @@ -117194,8 +117118,8 @@ typedef struct sp_table_entry_1024 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*16; @@ -117204,12 +117128,17 @@ static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* sp_digit* t5 = t + 8*16; sp_digit* t6 = t + 10*16; - /* Check double */ - (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(p->x, t2) & + sp_1024_cmp_equal_16(p->y, t4)) { sp_1024_proj_point_dbl_16(r, p, t); } else { @@ -117221,12 +117150,6 @@ static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_16(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 6395b7845c..959b37806d 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -99287,7 +99287,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_8(r->x, p256_mod); @@ -99296,7 +99296,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_8(r->y, p256_mod); @@ -99305,7 +99305,6 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -100463,6 +100462,61 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_8(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_8(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_8(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_8(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_8(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_8(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_8(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_8_ctx { int state; @@ -100473,6 +100527,12 @@ typedef struct sp_256_proj_point_dbl_8_ctx { sp_digit* z; } sp_256_proj_point_dbl_8_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -100597,62 +100657,6 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_8(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_8(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_8(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_8(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_8(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_8(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -100678,6 +100682,7 @@ static int sp_256_iszero_8(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -100685,6 +100690,81 @@ static int sp_256_iszero_8(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* t6 = t + 10*8; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(t2, t1) & + sp_256_cmp_equal_8(t4, t3)) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t3, y, p256_mod); + sp_256_mont_sub_8(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_8_ctx { @@ -100703,6 +100783,13 @@ typedef struct sp_256_proj_point_add_8_ctx { sp_digit* z; } sp_256_proj_point_add_8_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -100734,252 +100821,149 @@ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_8(ctx->t1, p256_mod, q->y); - sp_256_norm_8(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_8(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_8(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_8(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(ctx->t2, ctx->t1) & + sp_256_cmp_equal_8(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_8(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_8(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_8(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_8(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* t3 = t + 4*8; - sp_digit* t4 = t + 6*8; - sp_digit* t5 = t + 8*8; - sp_digit* t6 = t + 10*8; - - - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { - sp_256_proj_point_dbl_8(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_8(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_8(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { - sp_256_proj_point_dbl_8(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, x, t5, p256_mod); - sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(t3, y, p256_mod); - sp_256_mont_sub_8(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, y, t5, p256_mod); - - for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -101267,7 +101251,6 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -101285,7 +101268,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_8(t2, b, x, p256_mod); sp_256_mont_dbl_lower_8(b, t2, p256_mod); /* Z = Z*Y */ @@ -101315,7 +101298,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_8(t2, b, x, p256_mod); sp_256_mont_dbl_lower_8(b, t2, p256_mod); /* Z = Z*Y */ @@ -101325,7 +101308,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_8(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_8(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_8(y, y, p256_mod); } @@ -101370,8 +101353,8 @@ typedef struct sp_table_entry_256 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*8; @@ -101380,12 +101363,17 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, sp_digit* t5 = t + 8*8; sp_digit* t6 = t + 10*8; - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(p->x, t2) & + sp_256_cmp_equal_8(p->y, t4)) { sp_256_proj_point_dbl_8(r, p, t); } else { @@ -101397,12 +101385,6 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_8(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -105816,7 +105798,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_8(r); - if (sp_256_iszero_8(r) == 0) { + if (!sp_256_iszero_8(r)) { /* x is modified in calculation of s. */ sp_256_from_mp(x, 8, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -105825,7 +105807,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_256_calc_s_8(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_8(s) == 0)) { + if ((err == MP_OKAY) && (!sp_256_iszero_8(s))) { break; } } @@ -110724,7 +110706,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_12(r->x, p384_mod); @@ -110733,7 +110715,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_12(r->y, p384_mod); @@ -110742,7 +110724,6 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -111131,6 +111112,61 @@ SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_12(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_12(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_12(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_12(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_12(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_12(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_12(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_12_ctx { int state; @@ -111141,6 +111177,12 @@ typedef struct sp_384_proj_point_dbl_12_ctx { sp_digit* z; } sp_384_proj_point_dbl_12_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -111265,62 +111307,6 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_12(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_12(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_12(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_12(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_12(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_12(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_12(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -111348,6 +111334,7 @@ static int sp_384_iszero_12(const sp_digit* a) a[8] | a[9] | a[10] | a[11]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -111355,6 +111342,81 @@ static int sp_384_iszero_12(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* t6 = t + 10*12; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(t2, t1) & + sp_384_cmp_equal_12(t4, t3)) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t3, y, p384_mod); + sp_384_mont_sub_12(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_12_ctx { @@ -111373,6 +111435,13 @@ typedef struct sp_384_proj_point_add_12_ctx { sp_digit* z; } sp_384_proj_point_add_12_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -111404,252 +111473,149 @@ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_12(ctx->t1, p384_mod, q->y); - sp_384_norm_12(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_12(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_12(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_12(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(ctx->t2, ctx->t1) & + sp_384_cmp_equal_12(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_12(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_12(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_12(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_12(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* t3 = t + 4*12; - sp_digit* t4 = t + 6*12; - sp_digit* t5 = t + 8*12; - sp_digit* t6 = t + 10*12; - - - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { - sp_384_proj_point_dbl_12(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_12(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_12(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { - sp_384_proj_point_dbl_12(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, x, t5, p384_mod); - sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(t3, y, p384_mod); - sp_384_mont_sub_12(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, y, t5, p384_mod); - - for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -111961,7 +111927,6 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -111979,7 +111944,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_12(t2, b, x, p384_mod); sp_384_mont_dbl_lower_12(b, t2, p384_mod); /* Z = Z*Y */ @@ -112009,7 +111974,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_12(t2, b, x, p384_mod); sp_384_mont_dbl_lower_12(b, t2, p384_mod); /* Z = Z*Y */ @@ -112019,7 +111984,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_12(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_12(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_12(y, y, p384_mod); } @@ -112064,8 +112029,8 @@ typedef struct sp_table_entry_384 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*12; @@ -112074,12 +112039,17 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, sp_digit* t5 = t + 8*12; sp_digit* t6 = t + 10*12; - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(p->x, t2) & + sp_384_cmp_equal_12(p->y, t4)) { sp_384_proj_point_dbl_12(r, p, t); } else { @@ -112091,12 +112061,6 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_12(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -116589,7 +116553,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_12(r); - if (sp_384_iszero_12(r) == 0) { + if (!sp_384_iszero_12(r)) { /* x is modified in calculation of s. */ sp_384_from_mp(x, 12, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -116598,7 +116562,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_384_calc_s_12(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_12(s) == 0)) { + if ((err == MP_OKAY) && (!sp_384_iszero_12(s))) { break; } } @@ -123055,7 +123019,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_17(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 17, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 17, 0, sizeof(sp_digit) * 17U); sp_521_mont_reduce_17(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_17(r->x, p521_mod); @@ -123064,7 +123028,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_17(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 17, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 17, 0, sizeof(sp_digit) * 17U); sp_521_mont_reduce_17(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_17(r->y, p521_mod); @@ -123073,7 +123037,6 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -124961,6 +124924,61 @@ SP_NOINLINE static void sp_521_div2_17(sp_digit* r, const sp_digit* a, * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_17(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_17(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_17(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_17(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_17(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_17(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_17(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_17(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_17(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_17(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_17(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_17(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_17(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_17_ctx { int state; @@ -124971,6 +124989,12 @@ typedef struct sp_521_proj_point_dbl_17_ctx { sp_digit* z; } sp_521_proj_point_dbl_17_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -125095,62 +125119,6 @@ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*17; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_17(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_17(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_17(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_17(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_17(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_17(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_17(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_17(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_17(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_17(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_17(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_17(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_17(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_17(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_17(y, y, t2, p521_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -125181,6 +125149,7 @@ static int sp_521_iszero_17(const sp_digit* a) a[16]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -125188,6 +125157,81 @@ static int sp_521_iszero_17(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_digit* t3 = t + 4*17; + sp_digit* t4 = t + 6*17; + sp_digit* t5 = t + 8*17; + sp_digit* t6 = t + 10*17; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(t2, t1) & + sp_521_cmp_equal_17(t4, t3)) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_17(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_17(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(x, x, t5, p521_mod); + sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t3, y, p521_mod); + sp_521_mont_sub_17(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t5, p521_mod); + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_17_ctx { @@ -125206,6 +125250,13 @@ typedef struct sp_521_proj_point_add_17_ctx { sp_digit* z; } sp_521_proj_point_add_17_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -125237,252 +125288,149 @@ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_17(ctx->t1, p521_mod, q->y); - sp_521_norm_17(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_17_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_17(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_17(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_17(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_17(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_17(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_17(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_17(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(ctx->t2, ctx->t1) & + sp_521_cmp_equal_17(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_17(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_17(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_17(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_17(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_17(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_17(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_17(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_17(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_17(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_17(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_17(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_17(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_17(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*17; - sp_digit* t3 = t + 4*17; - sp_digit* t4 = t + 6*17; - sp_digit* t5 = t + 8*17; - sp_digit* t6 = t + 10*17; - - - /* Check double */ - (void)sp_521_sub_17(t1, p521_mod, q->y); - sp_521_norm_17(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { - sp_521_proj_point_dbl_17(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_17(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_17(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_17(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { - sp_521_proj_point_dbl_17(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(x, x, t5, p521_mod); - sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_17(t3, y, p521_mod); - sp_521_mont_sub_17(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(y, y, t5, p521_mod); - - for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -125828,7 +125776,6 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_17(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_17(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -125846,7 +125793,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_17(t2, b, p521_mod); sp_521_mont_sub_17(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_17(t2, b, x, p521_mod); sp_521_mont_dbl_lower_17(b, t2, p521_mod); /* Z = Z*Y */ @@ -125876,7 +125823,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_17(t2, b, p521_mod); sp_521_mont_sub_17(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_17(t2, b, x, p521_mod); sp_521_mont_dbl_lower_17(b, t2, p521_mod); /* Z = Z*Y */ @@ -125886,7 +125833,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_17(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_17(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_17(y, y, p521_mod); } @@ -125931,8 +125878,8 @@ typedef struct sp_table_entry_521 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*17; @@ -125941,12 +125888,17 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, sp_digit* t5 = t + 8*17; sp_digit* t6 = t + 10*17; - /* Check double */ - (void)sp_521_sub_17(t1, p521_mod, q->y); - sp_521_norm_17(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(p->x, t2) & + sp_521_cmp_equal_17(p->y, t4)) { sp_521_proj_point_dbl_17(r, p, t); } else { @@ -125958,12 +125910,6 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_17(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -132792,7 +132738,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_521_norm_17(r); - if (sp_521_iszero_17(r) == 0) { + if (!sp_521_iszero_17(r)) { /* x is modified in calculation of s. */ sp_521_from_mp(x, 17, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -132806,7 +132752,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_521_calc_s_17(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_521_iszero_17(s) == 0)) { + if ((err == MP_OKAY) && (!sp_521_iszero_17(s))) { break; } } @@ -203894,7 +203840,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_32(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 32, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_32(r->x, p1024_mod); @@ -203903,7 +203849,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_32(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 32, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_32(r->y, p1024_mod); @@ -203912,7 +203858,6 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -209608,6 +209553,61 @@ SP_NOINLINE static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*32; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_32(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_32(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_32(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_32(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_32(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_32_ctx { int state; @@ -209618,6 +209618,12 @@ typedef struct sp_1024_proj_point_dbl_32_ctx { sp_digit* z; } sp_1024_proj_point_dbl_32_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -209742,62 +209748,6 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_32(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_32(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_32(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_32(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_32(y, y, t2, p1024_mod); -} - #ifdef WOLFSSL_SP_SMALL /* Sub b from a into r. (r = a - b) * @@ -210198,6 +210148,7 @@ static int sp_1024_iszero_32(const sp_digit* a) a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -210205,6 +210156,81 @@ static int sp_1024_iszero_32(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*32; + sp_digit* t3 = t + 4*32; + sp_digit* t4 = t + 6*32; + sp_digit* t5 = t + 8*32; + sp_digit* t6 = t + 10*32; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(t2, t1) & + sp_1024_cmp_equal_32(t4, t3)) { + sp_1024_proj_point_dbl_32(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(x, x, t5, p1024_mod); + sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_32(t3, y, p1024_mod); + sp_1024_mont_sub_32(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, y, t5, p1024_mod); + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_32_ctx { @@ -210223,6 +210249,13 @@ typedef struct sp_1024_proj_point_add_32_ctx { sp_digit* z; } sp_1024_proj_point_add_32_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -210254,252 +210287,149 @@ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y); - sp_1024_norm_32(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_32_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_32(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_32(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_32(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_32(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_32(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_32(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_32(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_32(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* t3 = t + 4*32; - sp_digit* t4 = t + 6*32; - sp_digit* t5 = t + 8*32; - sp_digit* t6 = t + 10*32; - - - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_32(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { - sp_1024_proj_point_dbl_32(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, x, t5, p1024_mod); - sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(t3, y, p1024_mod); - sp_1024_mont_sub_32(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, y, t5, p1024_mod); - - for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. * @@ -210675,7 +210605,6 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_32(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_32(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -210693,7 +210622,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_32(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_32(b, t2, p1024_mod); /* Z = Z*Y */ @@ -210723,7 +210652,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_32(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_32(b, t2, p1024_mod); /* Z = Z*Y */ @@ -210733,7 +210662,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_32(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_32(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_32(y, y, p1024_mod); } @@ -210778,8 +210707,8 @@ typedef struct sp_table_entry_1024 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*32; @@ -210788,12 +210717,17 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* sp_digit* t5 = t + 8*32; sp_digit* t6 = t + 10*32; - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(p->x, t2) & + sp_1024_cmp_equal_32(p->y, t4)) { sp_1024_proj_point_dbl_32(r, p, t); } else { @@ -210805,12 +210739,6 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_32(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 74b0501052..e6bdcfb529 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -21076,7 +21076,7 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_9(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 9, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); sp_256_mont_reduce_9(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_9(r->x, p256_mod); @@ -21085,7 +21085,7 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_9(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 9, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); sp_256_mont_reduce_9(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_9(r->y, p256_mod); @@ -21094,7 +21094,6 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -21259,6 +21258,61 @@ static void sp_256_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_9(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_9(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_9(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_9(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_9(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_9(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_9(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_9(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_9(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_9(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_9(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_9(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_9(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_9(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_9(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_9(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_9(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_9(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_9(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_9_ctx { int state; @@ -21269,6 +21323,12 @@ typedef struct sp_256_proj_point_dbl_9_ctx { sp_digit* z; } sp_256_proj_point_dbl_9_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -21393,62 +21453,6 @@ static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_9(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_9(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_9(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_9(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_9(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_9(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_9(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_9(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_9(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_9(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_9(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_9(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_9(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_9(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_9(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_9(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_9(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_9(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_9(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -21475,6 +21479,7 @@ static int sp_256_iszero_9(const sp_digit* a) a[8]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -21482,6 +21487,81 @@ static int sp_256_iszero_9(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_9(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* t3 = t + 4*9; + sp_digit* t4 = t + 6*9; + sp_digit* t5 = t + 8*9; + sp_digit* t6 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_9(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_9(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_9(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_9(t2, t1) & + sp_256_cmp_equal_9(t4, t3)) { + sp_256_proj_point_dbl_9(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_9(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_9(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_9(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(x, x, t5, p256_mod); + sp_256_mont_mul_9(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_9(t3, y, p256_mod); + sp_256_mont_sub_9(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_9(y, y, x, p256_mod); + sp_256_mont_mul_9(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(y, y, t5, p256_mod); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_9_ctx { @@ -21500,6 +21580,13 @@ typedef struct sp_256_proj_point_add_9_ctx { sp_digit* z; } sp_256_proj_point_add_9_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -21531,252 +21618,149 @@ static int sp_256_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_9(ctx->t1, p256_mod, q->y); - sp_256_norm_9(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & - (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_9(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_9_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_9(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_9(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_9(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_9(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_9(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_9(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_9(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_9(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_9(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_9(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_9(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_9(ctx->t2, ctx->t1) & + sp_256_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_9(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_9(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_9(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_9(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_9(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_9(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_9(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_9(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_9(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_9(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_9(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_9(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_9(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_9(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_9(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_9(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_9(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* t3 = t + 4*9; - sp_digit* t4 = t + 6*9; - sp_digit* t5 = t + 8*9; - sp_digit* t6 = t + 10*9; - - - /* Check double */ - (void)sp_256_sub_9(t1, p256_mod, q->y); - sp_256_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & - (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, t1))) != 0) { - sp_256_proj_point_dbl_9(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_9(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_9(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_9(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_9(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_9(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_9(t2) & sp_256_iszero_9(t4) & maskt) { - sp_256_proj_point_dbl_9(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_9(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_9(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_9(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(x, x, t5, p256_mod); - sp_256_mont_mul_9(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_9(t3, y, p256_mod); - sp_256_mont_sub_9(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_9(y, y, x, p256_mod); - sp_256_mont_mul_9(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_9(y, y, t5, p256_mod); - - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -22213,7 +22197,6 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_9(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_9(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -22231,7 +22214,7 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int i, sp_256_mont_sqr_9(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_9(t2, b, p256_mod); sp_256_mont_sub_9(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_9(t2, b, x, p256_mod); sp_256_mont_dbl_lower_9(b, t2, p256_mod); /* Z = Z*Y */ @@ -22261,7 +22244,7 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int i, sp_256_mont_sqr_9(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_9(t2, b, p256_mod); sp_256_mont_sub_9(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_9(t2, b, x, p256_mod); sp_256_mont_dbl_lower_9(b, t2, p256_mod); /* Z = Z*Y */ @@ -22271,7 +22254,7 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_9(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_9(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_9(y, y, p256_mod); } @@ -22328,7 +22311,7 @@ static void sp_256_proj_point_dbl_n_store_9(sp_point_256* r, sp_256_mont_sqr_9(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_9(t2, b, p256_mod); sp_256_mont_sub_9(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_9(t2, b, x, p256_mod); sp_256_mont_dbl_lower_9(b, t2, p256_mod); /* Z = Z*Y */ @@ -22343,7 +22326,6 @@ static void sp_256_proj_point_dbl_n_store_9(sp_point_256* r, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_9(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_9(y, y, t1, p256_mod); - /* Y = Y/2 */ sp_256_div2_9(r[j].y, y, p256_mod); r[j].infinity = 0; @@ -22728,8 +22710,8 @@ static int sp_256_ecc_mulmod_win_add_sub_9(sp_point_256* r, const sp_point_256* * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_9(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_9(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*9; @@ -22738,12 +22720,17 @@ static void sp_256_proj_point_add_qz1_9(sp_point_256* r, const sp_point_256* p, sp_digit* t5 = t + 8*9; sp_digit* t6 = t + 10*9; - /* Check double */ - (void)sp_256_sub_9(t1, p256_mod, q->y); - sp_256_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & - (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_9(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_9(p->x, t2) & + sp_256_cmp_equal_9(p->y, t4)) { sp_256_proj_point_dbl_9(r, p, t); } else { @@ -22755,12 +22742,6 @@ static void sp_256_proj_point_add_qz1_9(sp_point_256* r, const sp_point_256* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_9(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_9(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_9(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_9(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -25796,7 +25777,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_9(r); - if (sp_256_iszero_9(r) == 0) { + if (!sp_256_iszero_9(r)) { /* x is modified in calculation of s. */ sp_256_from_mp(x, 9, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -25805,7 +25786,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_256_calc_s_9(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_9(s) == 0)) { + if ((err == MP_OKAY) && (!sp_256_iszero_9(s))) { break; } } @@ -28502,7 +28483,7 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_15(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 15, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 15, 0, sizeof(sp_digit) * 15U); sp_384_mont_reduce_15(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_15(r->x, p384_mod); @@ -28511,7 +28492,7 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_15(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 15, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 15, 0, sizeof(sp_digit) * 15U); sp_384_mont_reduce_15(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_15(r->y, p384_mod); @@ -28520,7 +28501,6 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -28697,6 +28677,61 @@ static void sp_384_div2_15(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*15; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_15(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_15(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_15(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_15(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_15(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_15(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_15(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_15(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_15(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_15(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_15(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_15(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_15(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_15(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_15(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_15(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_15_ctx { int state; @@ -28707,6 +28742,12 @@ typedef struct sp_384_proj_point_dbl_15_ctx { sp_digit* z; } sp_384_proj_point_dbl_15_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -28831,62 +28872,6 @@ static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*15; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_15(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_15(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_15(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_15(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_15(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_15(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_15(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_15(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_15(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_15(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_15(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_15(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_15(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_15(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_15(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_15(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -28915,6 +28900,7 @@ static int sp_384_iszero_15(const sp_digit* a) a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -28922,6 +28908,81 @@ static int sp_384_iszero_15(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_15(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*15; + sp_digit* t3 = t + 4*15; + sp_digit* t4 = t + 6*15; + sp_digit* t5 = t + 8*15; + sp_digit* t6 = t + 10*15; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_15(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_15(t2, t1) & + sp_384_cmp_equal_15(t4, t3)) { + sp_384_proj_point_dbl_15(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_15(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_15(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_15(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(x, x, t5, p384_mod); + sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_15(t3, y, p384_mod); + sp_384_mont_sub_15(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_15(y, y, x, p384_mod); + sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(y, y, t5, p384_mod); + for (i = 0; i < 15; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 15; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_15_ctx { @@ -28940,6 +29001,13 @@ typedef struct sp_384_proj_point_add_15_ctx { sp_digit* z; } sp_384_proj_point_add_15_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -28971,251 +29039,148 @@ static int sp_384_proj_point_add_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_15(ctx->t1, p384_mod, q->y); - sp_384_norm_15(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & - (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_15(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_15_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_15(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_15(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_15(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_15(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_15(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_15(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_15(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_15(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_15(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_15(ctx->t2, ctx->t1) & + sp_384_cmp_equal_15(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_15(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_15(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_15(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_15(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_15(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_15(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_15(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_15(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_15(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_15(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_15(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_15(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_15(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_15(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_15(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_15(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_15(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_15(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_15(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_15(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_15(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_15(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_15(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_15(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_15(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_15(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 15; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 15; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 15; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { - err = FP_WOULDBLOCK; - } - return err; -} -#endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_add_15(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*15; - sp_digit* t3 = t + 4*15; - sp_digit* t4 = t + 6*15; - sp_digit* t5 = t + 8*15; - sp_digit* t6 = t + 10*15; - - - /* Check double */ - (void)sp_384_sub_15(t1, p384_mod, q->y); - sp_384_norm_15(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & - (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { - sp_384_proj_point_dbl_15(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_15(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_15(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_15(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_15(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_15(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_15(t2) & sp_384_iszero_15(t4) & maskt) { - sp_384_proj_point_dbl_15(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_15(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_15(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_15(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(x, x, t5, p384_mod); - sp_384_mont_mul_15(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_15(t3, y, p384_mod); - sp_384_mont_sub_15(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_15(y, y, x, p384_mod); - sp_384_mont_mul_15(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_15(y, y, t5, p384_mod); - - for (i = 0; i < 15; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 15; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 15; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } + if (err == MP_OKAY && ctx->state != 25) { + err = FP_WOULDBLOCK; } + return err; } +#endif /* WOLFSSL_SP_NONBLOCK */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -29709,7 +29674,6 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_15(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_15(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -29727,7 +29691,7 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int i, sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_15(t2, b, p384_mod); sp_384_mont_sub_15(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_15(t2, b, x, p384_mod); sp_384_mont_dbl_lower_15(b, t2, p384_mod); /* Z = Z*Y */ @@ -29757,7 +29721,7 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int i, sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_15(t2, b, p384_mod); sp_384_mont_sub_15(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_15(t2, b, x, p384_mod); sp_384_mont_dbl_lower_15(b, t2, p384_mod); /* Z = Z*Y */ @@ -29767,7 +29731,7 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_15(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_15(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_15(y, y, p384_mod); } @@ -29824,7 +29788,7 @@ static void sp_384_proj_point_dbl_n_store_15(sp_point_384* r, sp_384_mont_sqr_15(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_15(t2, b, p384_mod); sp_384_mont_sub_15(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_15(t2, b, x, p384_mod); sp_384_mont_dbl_lower_15(b, t2, p384_mod); /* Z = Z*Y */ @@ -29839,7 +29803,6 @@ static void sp_384_proj_point_dbl_n_store_15(sp_point_384* r, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_15(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_15(y, y, t1, p384_mod); - /* Y = Y/2 */ sp_384_div2_15(r[j].y, y, p384_mod); r[j].infinity = 0; @@ -30260,8 +30223,8 @@ static int sp_384_ecc_mulmod_win_add_sub_15(sp_point_384* r, const sp_point_384* * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_15(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*15; @@ -30270,12 +30233,17 @@ static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p, sp_digit* t5 = t + 8*15; sp_digit* t6 = t + 10*15; - /* Check double */ - (void)sp_384_sub_15(t1, p384_mod, q->y); - sp_384_norm_15(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & - (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_15(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_15(p->x, t2) & + sp_384_cmp_equal_15(p->y, t4)) { sp_384_proj_point_dbl_15(r, p, t); } else { @@ -30287,12 +30255,6 @@ static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_15(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_15(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_15(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_15(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -33877,7 +33839,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_15(r); - if (sp_384_iszero_15(r) == 0) { + if (!sp_384_iszero_15(r)) { /* x is modified in calculation of s. */ sp_384_from_mp(x, 15, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -33886,7 +33848,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_384_calc_s_15(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_15(s) == 0)) { + if ((err == MP_OKAY) && (!sp_384_iszero_15(s))) { break; } } @@ -36178,7 +36140,7 @@ static void sp_521_map_21(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_21(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 21, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 21, 0, sizeof(sp_digit) * 21U); sp_521_mont_reduce_21(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_21(r->x, p521_mod); @@ -36187,7 +36149,7 @@ static void sp_521_map_21(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_21(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 21, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 21, 0, sizeof(sp_digit) * 21U); sp_521_mont_reduce_21(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_21(r->y, p521_mod); @@ -36196,7 +36158,6 @@ static void sp_521_map_21(sp_point_521* r, const sp_point_521* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -36381,6 +36342,61 @@ static void sp_521_div2_21(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_21(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*21; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_21(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_21(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_21(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_21(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_21(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_21(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_21(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_21(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_21(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_21(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_21(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_21(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_21(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_21(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_21(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_21(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_21(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_21(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_21_ctx { int state; @@ -36391,6 +36407,12 @@ typedef struct sp_521_proj_point_dbl_21_ctx { sp_digit* z; } sp_521_proj_point_dbl_21_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -36515,62 +36537,6 @@ static int sp_521_proj_point_dbl_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_21(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*21; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_21(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_21(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_21(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_21(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_21(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_21(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_21(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_21(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_21(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_21(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_21(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_21(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_21(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_21(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_21(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_21(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_21(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_21(y, y, t2, p521_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -36602,6 +36568,7 @@ static int sp_521_iszero_21(const sp_digit* a) a[16] | a[17] | a[18] | a[19] | a[20]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -36609,6 +36576,81 @@ static int sp_521_iszero_21(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_21(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*21; + sp_digit* t3 = t + 4*21; + sp_digit* t4 = t + 6*21; + sp_digit* t5 = t + 8*21; + sp_digit* t6 = t + 10*21; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_21(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_21(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_21(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_21(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_21(t2, t1) & + sp_521_cmp_equal_21(t4, t3)) { + sp_521_proj_point_dbl_21(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_21(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_21(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_21(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_21(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(x, x, t5, p521_mod); + sp_521_mont_mul_21(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_21(t3, y, p521_mod); + sp_521_mont_sub_21(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_21(y, y, x, p521_mod); + sp_521_mont_mul_21(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(y, y, t5, p521_mod); + for (i = 0; i < 21; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 21; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_21_ctx { @@ -36627,6 +36669,13 @@ typedef struct sp_521_proj_point_add_21_ctx { sp_digit* z; } sp_521_proj_point_add_21_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -36658,252 +36707,149 @@ static int sp_521_proj_point_add_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_21(ctx->t1, p521_mod, q->y); - sp_521_norm_21(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & - (sp_521_cmp_equal_21(p->y, q->y) | sp_521_cmp_equal_21(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_21(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_21_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_21(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_21(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_21(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_21(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_21(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_21(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_21(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_21(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_21(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_21(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_21(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_21(ctx->t2, ctx->t1) & + sp_521_cmp_equal_21(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_21(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_21(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_21(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_21(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_21(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_21(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_21(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_21(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_21(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_21(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_21(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_21(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_21(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_21(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_21(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_21(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_21(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_21(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_21(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_21(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_21(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_21(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_21(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_21(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_21(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_21(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_21(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_21(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_21(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_21(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 21; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 21; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 21; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_21(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*21; - sp_digit* t3 = t + 4*21; - sp_digit* t4 = t + 6*21; - sp_digit* t5 = t + 8*21; - sp_digit* t6 = t + 10*21; - - - /* Check double */ - (void)sp_521_sub_21(t1, p521_mod, q->y); - sp_521_norm_21(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & - (sp_521_cmp_equal_21(p->y, q->y) | sp_521_cmp_equal_21(p->y, t1))) != 0) { - sp_521_proj_point_dbl_21(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_21(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_21(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_21(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_21(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_21(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_21(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_21(t2) & sp_521_iszero_21(t4) & maskt) { - sp_521_proj_point_dbl_21(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_21(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_21(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_21(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_21(x, x, t5, p521_mod); - sp_521_mont_mul_21(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_21(t3, y, p521_mod); - sp_521_mont_sub_21(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_21(y, y, x, p521_mod); - sp_521_mont_mul_21(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_21(y, y, t5, p521_mod); - - for (i = 0; i < 21; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 21; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 21; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -37258,7 +37204,6 @@ static void sp_521_proj_point_dbl_n_21(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_21(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_21(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -37276,7 +37221,7 @@ static void sp_521_proj_point_dbl_n_21(sp_point_521* p, int i, sp_521_mont_sqr_21(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_21(t2, b, p521_mod); sp_521_mont_sub_21(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_21(t2, b, x, p521_mod); sp_521_mont_dbl_lower_21(b, t2, p521_mod); /* Z = Z*Y */ @@ -37306,7 +37251,7 @@ static void sp_521_proj_point_dbl_n_21(sp_point_521* p, int i, sp_521_mont_sqr_21(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_21(t2, b, p521_mod); sp_521_mont_sub_21(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_21(t2, b, x, p521_mod); sp_521_mont_dbl_lower_21(b, t2, p521_mod); /* Z = Z*Y */ @@ -37316,7 +37261,7 @@ static void sp_521_proj_point_dbl_n_21(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_21(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_21(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_21(y, y, p521_mod); } @@ -37373,7 +37318,7 @@ static void sp_521_proj_point_dbl_n_store_21(sp_point_521* r, sp_521_mont_sqr_21(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_21(t2, b, p521_mod); sp_521_mont_sub_21(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_21(t2, b, x, p521_mod); sp_521_mont_dbl_lower_21(b, t2, p521_mod); /* Z = Z*Y */ @@ -37388,7 +37333,6 @@ static void sp_521_proj_point_dbl_n_store_21(sp_point_521* r, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_21(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_21(y, y, t1, p521_mod); - /* Y = Y/2 */ sp_521_div2_21(r[j].y, y, p521_mod); r[j].infinity = 0; @@ -37845,8 +37789,8 @@ static int sp_521_ecc_mulmod_win_add_sub_21(sp_point_521* r, const sp_point_521* * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_21(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_21(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*21; @@ -37855,12 +37799,17 @@ static void sp_521_proj_point_add_qz1_21(sp_point_521* r, const sp_point_521* p, sp_digit* t5 = t + 8*21; sp_digit* t6 = t + 10*21; - /* Check double */ - (void)sp_521_sub_21(t1, p521_mod, q->y); - sp_521_norm_21(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & - (sp_521_cmp_equal_21(p->y, q->y) | sp_521_cmp_equal_21(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_21(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_21(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_21(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_21(p->x, t2) & + sp_521_cmp_equal_21(p->y, t4)) { sp_521_proj_point_dbl_21(r, p, t); } else { @@ -37872,12 +37821,6 @@ static void sp_521_proj_point_add_qz1_21(sp_point_521* r, const sp_point_521* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_21(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_21(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_21(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_21(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -42032,7 +41975,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_521_norm_21(r); - if (sp_521_iszero_21(r) == 0) { + if (!sp_521_iszero_21(r)) { /* x is modified in calculation of s. */ sp_521_from_mp(x, 21, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -42047,7 +41990,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_521_calc_s_21(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_521_iszero_21(s) == 0)) { + if ((err == MP_OKAY) && (!sp_521_iszero_21(s))) { break; } } @@ -45012,7 +44955,7 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_42(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 42, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 42, 0, sizeof(sp_digit) * 42U); sp_1024_mont_reduce_42(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_42(r->x, p1024_mod); @@ -45021,7 +44964,7 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_42(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 42, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 42, 0, sizeof(sp_digit) * 42U); sp_1024_mont_reduce_42(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_42(r->y, p1024_mod); @@ -45030,7 +44973,6 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -45184,6 +45126,61 @@ static void sp_1024_div2_42(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_42(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*42; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_42(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_42(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_42(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_42(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_42(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_42(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_42(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_42(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_42(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_42(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_42(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_42(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_42(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_42(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_42(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_42(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_42(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_42(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_42_ctx { int state; @@ -45194,6 +45191,12 @@ typedef struct sp_1024_proj_point_dbl_42_ctx { sp_digit* z; } sp_1024_proj_point_dbl_42_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -45283,97 +45286,41 @@ static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 14: /* X = X - Y */ - sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->y, p1024_mod); - ctx->state = 15; - break; - case 15: - /* X = X - Y */ - sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->y, p1024_mod); - ctx->state = 16; - break; - case 16: - /* Y = Y - X */ - sp_1024_mont_sub_lower_42(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 17; - break; - case 17: - /* Y = Y * T1 */ - sp_1024_mont_mul_42(ctx->y, ctx->y, ctx->t1, p1024_mod, p1024_mp_mod); - ctx->state = 18; - break; - case 18: - /* Y = Y - T2 */ - sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->t2, p1024_mod); - ctx->state = 19; - /* fall-through */ - case 19: - err = MP_OKAY; - break; - } - - if (err == MP_OKAY && ctx->state != 19) { - err = FP_WOULDBLOCK; - } - - return err; -} -#endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_42(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*42; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_42(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_42(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_42(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_42(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_42(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_42(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_42(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_42(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_42(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_42(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_42(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_42(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_42(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_42(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_42(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_42(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_42(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_42(y, y, t2, p1024_mod); -} + sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->y, p1024_mod); + ctx->state = 15; + break; + case 15: + /* X = X - Y */ + sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->y, p1024_mod); + ctx->state = 16; + break; + case 16: + /* Y = Y - X */ + sp_1024_mont_sub_lower_42(ctx->y, ctx->y, ctx->x, p1024_mod); + ctx->state = 17; + break; + case 17: + /* Y = Y * T1 */ + sp_1024_mont_mul_42(ctx->y, ctx->y, ctx->t1, p1024_mod, p1024_mp_mod); + ctx->state = 18; + break; + case 18: + /* Y = Y - T2 */ + sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->t2, p1024_mod); + ctx->state = 19; + /* fall-through */ + case 19: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 19) { + err = FP_WOULDBLOCK; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -45415,6 +45362,7 @@ static int sp_1024_iszero_42(const sp_digit* a) a[40] | a[41]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -45422,6 +45370,81 @@ static int sp_1024_iszero_42(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_42(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*42; + sp_digit* t3 = t + 4*42; + sp_digit* t4 = t + 6*42; + sp_digit* t5 = t + 8*42; + sp_digit* t6 = t + 10*42; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_42(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_42(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_42(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_42(t2, t1) & + sp_1024_cmp_equal_42(t4, t3)) { + sp_1024_proj_point_dbl_42(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_42(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_42(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_42(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(x, x, t5, p1024_mod); + sp_1024_mont_mul_42(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_42(t3, y, p1024_mod); + sp_1024_mont_sub_42(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_42(y, y, x, p1024_mod); + sp_1024_mont_mul_42(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(y, y, t5, p1024_mod); + for (i = 0; i < 42; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 42; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_42_ctx { @@ -45440,6 +45463,13 @@ typedef struct sp_1024_proj_point_add_42_ctx { sp_digit* z; } sp_1024_proj_point_add_42_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -45471,252 +45501,149 @@ static int sp_1024_proj_point_add_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_42(ctx->t1, p1024_mod, q->y); - sp_1024_norm_42(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & - (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_42(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_42_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_42(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_42(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_42(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_42(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_42(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_42(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_42(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_42(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_42(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_42(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_42(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_42(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_42(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_42(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_42(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_42(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_42(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_42(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_42(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_42(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_42(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_42(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_42(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_42(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_42(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_42(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_42(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_42(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_42(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_42(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_42(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_42(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_42(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_42(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_42(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_42(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_42(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_42(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 42; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 42; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 42; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_42(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*42; - sp_digit* t3 = t + 4*42; - sp_digit* t4 = t + 6*42; - sp_digit* t5 = t + 8*42; - sp_digit* t6 = t + 10*42; - - - /* Check double */ - (void)sp_1024_mont_sub_42(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_42(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & - (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_42(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_42(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_42(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_42(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_42(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_42(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_42(t2) & sp_1024_iszero_42(t4) & maskt) { - sp_1024_proj_point_dbl_42(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_42(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_42(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_42(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(x, x, t5, p1024_mod); - sp_1024_mont_mul_42(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_42(t3, y, p1024_mod); - sp_1024_mont_sub_42(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_42(y, y, x, p1024_mod); - sp_1024_mont_mul_42(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_42(y, y, t5, p1024_mod); - - for (i = 0; i < 42; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 42; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 42; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifdef WOLFSSL_SP_SMALL /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. @@ -46095,7 +46022,6 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_42(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_42(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -46113,7 +46039,7 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int i, sp_1024_mont_sqr_42(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_42(t2, b, p1024_mod); sp_1024_mont_sub_42(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_42(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_42(b, t2, p1024_mod); /* Z = Z*Y */ @@ -46143,7 +46069,7 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int i, sp_1024_mont_sqr_42(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_42(t2, b, p1024_mod); sp_1024_mont_sub_42(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_42(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_42(b, t2, p1024_mod); /* Z = Z*Y */ @@ -46153,7 +46079,7 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_42(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_42(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_42(y, y, p1024_mod); } @@ -46210,7 +46136,7 @@ static void sp_1024_proj_point_dbl_n_store_42(sp_point_1024* r, sp_1024_mont_sqr_42(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_42(t2, b, p1024_mod); sp_1024_mont_sub_42(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_42(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_42(b, t2, p1024_mod); /* Z = Z*Y */ @@ -46225,7 +46151,6 @@ static void sp_1024_proj_point_dbl_n_store_42(sp_point_1024* r, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_42(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_42(y, y, t1, p1024_mod); - /* Y = Y/2 */ sp_1024_div2_42(r[j].y, y, p1024_mod); r[j].infinity = 0; @@ -46550,8 +46475,8 @@ static int sp_1024_ecc_mulmod_win_add_sub_42(sp_point_1024* r, const sp_point_10 * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_42(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_42(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*42; @@ -46560,12 +46485,17 @@ static void sp_1024_proj_point_add_qz1_42(sp_point_1024* r, const sp_point_1024* sp_digit* t5 = t + 8*42; sp_digit* t6 = t + 10*42; - /* Check double */ - (void)sp_1024_mont_sub_42(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_42(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & - (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_42(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_42(p->x, t2) & + sp_1024_cmp_equal_42(p->y, t4)) { sp_1024_proj_point_dbl_42(r, p, t); } else { @@ -46577,12 +46507,6 @@ static void sp_1024_proj_point_add_qz1_42(sp_point_1024* r, const sp_point_1024* sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_42(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_42(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_42(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_42(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 21f2653952..47a494a99c 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -22056,7 +22056,7 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_5(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 5, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 5, 0, sizeof(sp_digit) * 5U); sp_256_mont_reduce_5(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_5(r->x, p256_mod); @@ -22065,7 +22065,7 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_5(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 5, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 5, 0, sizeof(sp_digit) * 5U); sp_256_mont_reduce_5(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_5(r->y, p256_mod); @@ -22074,7 +22074,6 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -22231,6 +22230,61 @@ static void sp_256_div2_5(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*5; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_5(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_5(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_5(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_5(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_5(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_5(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_5(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_5(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_5(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_5(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_5(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_5(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_5(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_5_ctx { int state; @@ -22241,6 +22295,12 @@ typedef struct sp_256_proj_point_dbl_5_ctx { sp_digit* z; } sp_256_proj_point_dbl_5_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -22365,62 +22425,6 @@ static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*5; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_5(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_5(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_5(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_5(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_5(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_5(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_5(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_5(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_5(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_5(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_5(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_5(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_5(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -22445,6 +22449,7 @@ static int sp_256_iszero_5(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -22452,6 +22457,81 @@ static int sp_256_iszero_5(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_5(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*5; + sp_digit* t3 = t + 4*5; + sp_digit* t4 = t + 6*5; + sp_digit* t5 = t + 8*5; + sp_digit* t6 = t + 10*5; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_5(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_5(t2, t1) & + sp_256_cmp_equal_5(t4, t3)) { + sp_256_proj_point_dbl_5(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_5(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_5(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_5(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(x, x, t5, p256_mod); + sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_5(t3, y, p256_mod); + sp_256_mont_sub_5(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_5(y, y, x, p256_mod); + sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(y, y, t5, p256_mod); + for (i = 0; i < 5; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 5; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_5_ctx { @@ -22470,6 +22550,13 @@ typedef struct sp_256_proj_point_add_5_ctx { sp_digit* z; } sp_256_proj_point_add_5_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -22501,252 +22588,149 @@ static int sp_256_proj_point_add_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_5(ctx->t1, p256_mod, q->y); - sp_256_norm_5(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & - (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_5(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_5_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_5(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_5(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_5(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_5(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_5(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_5(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_5(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_5(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_5(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_5(ctx->t2, ctx->t1) & + sp_256_cmp_equal_5(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_5(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_5(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_5(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_5(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_5(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_5(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_5(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_5(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_5(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_5(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_5(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_5(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_5(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_5(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_5(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_5(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_5(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_5(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_5(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_5(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_5(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_5(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_5(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_5(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_5(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_5(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 5; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 5; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 5; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_5(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*5; - sp_digit* t3 = t + 4*5; - sp_digit* t4 = t + 6*5; - sp_digit* t5 = t + 8*5; - sp_digit* t6 = t + 10*5; - - - /* Check double */ - (void)sp_256_sub_5(t1, p256_mod, q->y); - sp_256_norm_5(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & - (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { - sp_256_proj_point_dbl_5(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_5(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_5(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_5(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_5(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_5(t2) & sp_256_iszero_5(t4) & maskt) { - sp_256_proj_point_dbl_5(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_5(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(x, x, t5, p256_mod); - sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_5(t3, y, p256_mod); - sp_256_mont_sub_5(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_5(y, y, x, p256_mod); - sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_5(y, y, t5, p256_mod); - - for (i = 0; i < 5; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 5; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 5; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -23160,7 +23144,6 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_5(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_5(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -23178,7 +23161,7 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int i, sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_5(t2, b, p256_mod); sp_256_mont_sub_5(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_5(t2, b, x, p256_mod); sp_256_mont_dbl_lower_5(b, t2, p256_mod); /* Z = Z*Y */ @@ -23208,7 +23191,7 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int i, sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_5(t2, b, p256_mod); sp_256_mont_sub_5(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_5(t2, b, x, p256_mod); sp_256_mont_dbl_lower_5(b, t2, p256_mod); /* Z = Z*Y */ @@ -23218,7 +23201,7 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_5(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_5(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_5(y, y, p256_mod); } @@ -23275,7 +23258,7 @@ static void sp_256_proj_point_dbl_n_store_5(sp_point_256* r, sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_5(t2, b, p256_mod); sp_256_mont_sub_5(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_5(t2, b, x, p256_mod); sp_256_mont_dbl_lower_5(b, t2, p256_mod); /* Z = Z*Y */ @@ -23290,7 +23273,6 @@ static void sp_256_proj_point_dbl_n_store_5(sp_point_256* r, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_5(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_5(y, y, t1, p256_mod); - /* Y = Y/2 */ sp_256_div2_5(r[j].y, y, p256_mod); r[j].infinity = 0; @@ -23651,8 +23633,8 @@ static int sp_256_ecc_mulmod_win_add_sub_5(sp_point_256* r, const sp_point_256* * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_5(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*5; @@ -23661,12 +23643,17 @@ static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p, sp_digit* t5 = t + 8*5; sp_digit* t6 = t + 10*5; - /* Check double */ - (void)sp_256_sub_5(t1, p256_mod, q->y); - sp_256_norm_5(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & - (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_5(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_5(p->x, t2) & + sp_256_cmp_equal_5(p->y, t4)) { sp_256_proj_point_dbl_5(r, p, t); } else { @@ -23678,12 +23665,6 @@ static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_5(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_5(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -26689,7 +26670,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_5(r); - if (sp_256_iszero_5(r) == 0) { + if (!sp_256_iszero_5(r)) { /* x is modified in calculation of s. */ sp_256_from_mp(x, 5, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -26698,7 +26679,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_256_calc_s_5(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_5(s) == 0)) { + if ((err == MP_OKAY) && (!sp_256_iszero_5(s))) { break; } } @@ -28978,7 +28959,7 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_7(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 7, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 7, 0, sizeof(sp_digit) * 7U); sp_384_mont_reduce_7(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_7(r->x, p384_mod); @@ -28987,7 +28968,7 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_7(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 7, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 7, 0, sizeof(sp_digit) * 7U); sp_384_mont_reduce_7(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_7(r->y, p384_mod); @@ -28996,7 +28977,6 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -29157,6 +29137,61 @@ static void sp_384_div2_7(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*7; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_7(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_7(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_7(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_7(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_7(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_7(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_7(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_7(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_7(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_7(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_7(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_7(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_7(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_7(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_7(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_7(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_7_ctx { int state; @@ -29167,6 +29202,12 @@ typedef struct sp_384_proj_point_dbl_7_ctx { sp_digit* z; } sp_384_proj_point_dbl_7_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -29291,62 +29332,6 @@ static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*7; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_7(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_7(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_7(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_7(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_7(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_7(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_7(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_7(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_7(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_7(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_7(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_7(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_7(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_7(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_7(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_7(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -29372,6 +29357,7 @@ static int sp_384_iszero_7(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -29379,6 +29365,81 @@ static int sp_384_iszero_7(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_7(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*7; + sp_digit* t3 = t + 4*7; + sp_digit* t4 = t + 6*7; + sp_digit* t5 = t + 8*7; + sp_digit* t6 = t + 10*7; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_7(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_7(t2, t1) & + sp_384_cmp_equal_7(t4, t3)) { + sp_384_proj_point_dbl_7(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_7(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_7(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_7(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(x, x, t5, p384_mod); + sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_7(t3, y, p384_mod); + sp_384_mont_sub_7(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_7(y, y, x, p384_mod); + sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(y, y, t5, p384_mod); + for (i = 0; i < 7; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 7; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_7_ctx { @@ -29397,6 +29458,13 @@ typedef struct sp_384_proj_point_add_7_ctx { sp_digit* z; } sp_384_proj_point_add_7_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -29428,251 +29496,148 @@ static int sp_384_proj_point_add_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_7(ctx->t1, p384_mod, q->y); - sp_384_norm_7(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & - (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_7(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_7_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_7(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_7(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_7(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_7(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_7(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_7(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_7(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_7(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_7(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_7(ctx->t2, ctx->t1) & + sp_384_cmp_equal_7(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_7(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_7(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_7(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_7(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_7(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_7(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_7(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_7(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_7(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_7(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_7(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_7(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_7(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_7(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_7(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_7(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_7(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_7(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_7(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_7(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_7(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_7(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_7(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_7(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_7(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_7(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 7; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 7; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 7; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { - err = FP_WOULDBLOCK; - } - return err; -} -#endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_add_7(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*7; - sp_digit* t3 = t + 4*7; - sp_digit* t4 = t + 6*7; - sp_digit* t5 = t + 8*7; - sp_digit* t6 = t + 10*7; - - - /* Check double */ - (void)sp_384_sub_7(t1, p384_mod, q->y); - sp_384_norm_7(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & - (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { - sp_384_proj_point_dbl_7(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_7(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_7(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_7(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_7(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_7(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_7(t2) & sp_384_iszero_7(t4) & maskt) { - sp_384_proj_point_dbl_7(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_7(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_7(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_7(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(x, x, t5, p384_mod); - sp_384_mont_mul_7(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_7(t3, y, p384_mod); - sp_384_mont_sub_7(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_7(y, y, x, p384_mod); - sp_384_mont_mul_7(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_7(y, y, t5, p384_mod); - - for (i = 0; i < 7; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 7; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 7; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } + if (err == MP_OKAY && ctx->state != 25) { + err = FP_WOULDBLOCK; } + return err; } +#endif /* WOLFSSL_SP_NONBLOCK */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -30123,7 +30088,6 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_7(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_7(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -30141,7 +30105,7 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int i, sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_7(t2, b, p384_mod); sp_384_mont_sub_7(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_7(t2, b, x, p384_mod); sp_384_mont_dbl_lower_7(b, t2, p384_mod); /* Z = Z*Y */ @@ -30171,7 +30135,7 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int i, sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_7(t2, b, p384_mod); sp_384_mont_sub_7(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_7(t2, b, x, p384_mod); sp_384_mont_dbl_lower_7(b, t2, p384_mod); /* Z = Z*Y */ @@ -30181,7 +30145,7 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_7(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_7(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_7(y, y, p384_mod); } @@ -30238,7 +30202,7 @@ static void sp_384_proj_point_dbl_n_store_7(sp_point_384* r, sp_384_mont_sqr_7(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_7(t2, b, p384_mod); sp_384_mont_sub_7(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_7(t2, b, x, p384_mod); sp_384_mont_dbl_lower_7(b, t2, p384_mod); /* Z = Z*Y */ @@ -30253,7 +30217,6 @@ static void sp_384_proj_point_dbl_n_store_7(sp_point_384* r, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_7(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_7(y, y, t1, p384_mod); - /* Y = Y/2 */ sp_384_div2_7(r[j].y, y, p384_mod); r[j].infinity = 0; @@ -30626,8 +30589,8 @@ static int sp_384_ecc_mulmod_win_add_sub_7(sp_point_384* r, const sp_point_384* * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_7(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*7; @@ -30636,12 +30599,17 @@ static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p, sp_digit* t5 = t + 8*7; sp_digit* t6 = t + 10*7; - /* Check double */ - (void)sp_384_sub_7(t1, p384_mod, q->y); - sp_384_norm_7(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & - (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_7(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_7(p->x, t2) & + sp_384_cmp_equal_7(p->y, t4)) { sp_384_proj_point_dbl_7(r, p, t); } else { @@ -30653,12 +30621,6 @@ static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_7(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_7(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_7(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_7(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -34161,7 +34123,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_7(r); - if (sp_384_iszero_7(r) == 0) { + if (!sp_384_iszero_7(r)) { /* x is modified in calculation of s. */ sp_384_from_mp(x, 7, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -34170,7 +34132,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_384_calc_s_7(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_7(s) == 0)) { + if ((err == MP_OKAY) && (!sp_384_iszero_7(s))) { break; } } @@ -36519,7 +36481,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_9(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 9, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); @@ -36528,7 +36490,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_9(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 9, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); @@ -36537,7 +36499,6 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -36702,6 +36663,61 @@ static void sp_521_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_9(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_9(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_9(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_9(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_9(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_9(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_9(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_9_ctx { int state; @@ -36712,6 +36728,12 @@ typedef struct sp_521_proj_point_dbl_9_ctx { sp_digit* z; } sp_521_proj_point_dbl_9_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -36836,62 +36858,6 @@ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_9(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_9(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_9(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_9(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_9(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_9(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_9(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_9(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_9(y, y, t2, p521_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -36918,6 +36884,7 @@ static int sp_521_iszero_9(const sp_digit* a) a[8]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -36925,6 +36892,81 @@ static int sp_521_iszero_9(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* t3 = t + 4*9; + sp_digit* t4 = t + 6*9; + sp_digit* t5 = t + 8*9; + sp_digit* t6 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(t2, t1) & + sp_521_cmp_equal_9(t4, t3)) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_9_ctx { @@ -36943,6 +36985,13 @@ typedef struct sp_521_proj_point_add_9_ctx { sp_digit* z; } sp_521_proj_point_add_9_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -36974,252 +37023,149 @@ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_9(ctx->t1, p521_mod, q->y); - sp_521_norm_9(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_9_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(ctx->t2, ctx->t1) & + sp_521_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_9(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* t3 = t + 4*9; - sp_digit* t4 = t + 6*9; - sp_digit* t5 = t + 8*9; - sp_digit* t6 = t + 10*9; - - - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { - sp_521_proj_point_dbl_9(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_9(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_9(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { - sp_521_proj_point_dbl_9(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(x, x, t5, p521_mod); - sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_9(t3, y, p521_mod); - sp_521_mont_sub_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(y, y, t5, p521_mod); - - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Multiply a number by Montgomery normalizer mod modulus (prime). * * r The resulting Montgomery form number. @@ -37550,7 +37496,6 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_9(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_9(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -37568,7 +37513,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -37598,7 +37543,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -37608,7 +37553,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_9(y, y, p521_mod); } @@ -37665,7 +37610,7 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -37680,7 +37625,6 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); - /* Y = Y/2 */ sp_521_div2_9(r[j].y, y, p521_mod); r[j].infinity = 0; @@ -38065,8 +38009,8 @@ static int sp_521_ecc_mulmod_win_add_sub_9(sp_point_521* r, const sp_point_521* * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*9; @@ -38075,12 +38019,17 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, sp_digit* t5 = t + 8*9; sp_digit* t6 = t + 10*9; - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(p->x, t2) & + sp_521_cmp_equal_9(p->y, t4)) { sp_521_proj_point_dbl_9(r, p, t); } else { @@ -38092,12 +38041,6 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_9(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -41630,7 +41573,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_521_norm_9(r); - if (sp_521_iszero_9(r) == 0) { + if (!sp_521_iszero_9(r)) { /* x is modified in calculation of s. */ sp_521_from_mp(x, 9, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -41645,7 +41588,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_521_calc_s_9(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_521_iszero_9(s) == 0)) { + if ((err == MP_OKAY) && (!sp_521_iszero_9(s))) { break; } } @@ -44449,7 +44392,7 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_18(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 18, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 18, 0, sizeof(sp_digit) * 18U); sp_1024_mont_reduce_18(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_18(r->x, p1024_mod); @@ -44458,7 +44401,7 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_18(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 18, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 18, 0, sizeof(sp_digit) * 18U); sp_1024_mont_reduce_18(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_18(r->y, p1024_mod); @@ -44467,7 +44410,6 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -44597,6 +44539,61 @@ static void sp_1024_div2_18(sp_digit* r, const sp_digit* a, const sp_digit* m) * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_18(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*18; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_18(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_18(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_18(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_18(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_18(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_18(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_18(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_18(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_18(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_18(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_18(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_18(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_18(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_18(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_18(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_18(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_18(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_18(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_18_ctx { int state; @@ -44607,6 +44604,12 @@ typedef struct sp_1024_proj_point_dbl_18_ctx { sp_digit* z; } sp_1024_proj_point_dbl_18_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -44696,97 +44699,41 @@ static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 14: /* X = X - Y */ - sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->y, p1024_mod); - ctx->state = 15; - break; - case 15: - /* X = X - Y */ - sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->y, p1024_mod); - ctx->state = 16; - break; - case 16: - /* Y = Y - X */ - sp_1024_mont_sub_lower_18(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 17; - break; - case 17: - /* Y = Y * T1 */ - sp_1024_mont_mul_18(ctx->y, ctx->y, ctx->t1, p1024_mod, p1024_mp_mod); - ctx->state = 18; - break; - case 18: - /* Y = Y - T2 */ - sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->t2, p1024_mod); - ctx->state = 19; - /* fall-through */ - case 19: - err = MP_OKAY; - break; - } - - if (err == MP_OKAY && ctx->state != 19) { - err = FP_WOULDBLOCK; - } - - return err; -} -#endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_18(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*18; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_18(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_18(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_18(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_18(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_18(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_18(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_18(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_18(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_18(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_18(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_18(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_18(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_18(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_18(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_18(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_18(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_18(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_18(y, y, t2, p1024_mod); -} + sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->y, p1024_mod); + ctx->state = 15; + break; + case 15: + /* X = X - Y */ + sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->y, p1024_mod); + ctx->state = 16; + break; + case 16: + /* Y = Y - X */ + sp_1024_mont_sub_lower_18(ctx->y, ctx->y, ctx->x, p1024_mod); + ctx->state = 17; + break; + case 17: + /* Y = Y * T1 */ + sp_1024_mont_mul_18(ctx->y, ctx->y, ctx->t1, p1024_mod, p1024_mp_mod); + ctx->state = 18; + break; + case 18: + /* Y = Y - T2 */ + sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->t2, p1024_mod); + ctx->state = 19; + /* fall-through */ + case 19: + err = MP_OKAY; + break; + } + + if (err == MP_OKAY && ctx->state != 19) { + err = FP_WOULDBLOCK; + } + return err; +} +#endif /* WOLFSSL_SP_NONBLOCK */ /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -44817,6 +44764,7 @@ static int sp_1024_iszero_18(const sp_digit* a) a[16] | a[17]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -44824,6 +44772,81 @@ static int sp_1024_iszero_18(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_18(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*18; + sp_digit* t3 = t + 4*18; + sp_digit* t4 = t + 6*18; + sp_digit* t5 = t + 8*18; + sp_digit* t6 = t + 10*18; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_18(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_18(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_18(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_18(t2, t1) & + sp_1024_cmp_equal_18(t4, t3)) { + sp_1024_proj_point_dbl_18(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_18(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_18(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_18(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(x, x, t5, p1024_mod); + sp_1024_mont_mul_18(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_18(t3, y, p1024_mod); + sp_1024_mont_sub_18(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_18(y, y, x, p1024_mod); + sp_1024_mont_mul_18(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(y, y, t5, p1024_mod); + for (i = 0; i < 18; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 18; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_18_ctx { @@ -44842,6 +44865,13 @@ typedef struct sp_1024_proj_point_add_18_ctx { sp_digit* z; } sp_1024_proj_point_add_18_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -44873,252 +44903,149 @@ static int sp_1024_proj_point_add_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_18(ctx->t1, p1024_mod, q->y); - sp_1024_norm_18(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & - (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_18(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_18_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_18(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_18(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_18(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_18(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_18(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_18(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_18(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_18(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_18(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_18(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_18(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_18(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_18(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_18(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_18(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_18(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_18(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_18(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_18(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_18(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_18(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_18(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_18(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_18(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_18(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_18(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_18(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_18(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_18(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_18(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_18(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_18(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_18(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_18(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_18(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_18(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_18(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_18(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 18; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 18; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 18; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_18(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*18; - sp_digit* t3 = t + 4*18; - sp_digit* t4 = t + 6*18; - sp_digit* t5 = t + 8*18; - sp_digit* t6 = t + 10*18; - - - /* Check double */ - (void)sp_1024_mont_sub_18(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_18(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & - (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_18(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_18(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_18(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_18(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_18(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_18(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_18(t2) & sp_1024_iszero_18(t4) & maskt) { - sp_1024_proj_point_dbl_18(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_18(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_18(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_18(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(x, x, t5, p1024_mod); - sp_1024_mont_mul_18(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_18(t3, y, p1024_mod); - sp_1024_mont_sub_18(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_18(y, y, x, p1024_mod); - sp_1024_mont_mul_18(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_18(y, y, t5, p1024_mod); - - for (i = 0; i < 18; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 18; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 18; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifdef WOLFSSL_SP_SMALL /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. @@ -45449,7 +45376,6 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_18(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_18(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -45467,7 +45393,7 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int i, sp_1024_mont_sqr_18(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_18(t2, b, p1024_mod); sp_1024_mont_sub_18(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_18(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_18(b, t2, p1024_mod); /* Z = Z*Y */ @@ -45497,7 +45423,7 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int i, sp_1024_mont_sqr_18(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_18(t2, b, p1024_mod); sp_1024_mont_sub_18(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_18(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_18(b, t2, p1024_mod); /* Z = Z*Y */ @@ -45507,7 +45433,7 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_18(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_18(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_18(y, y, p1024_mod); } @@ -45564,7 +45490,7 @@ static void sp_1024_proj_point_dbl_n_store_18(sp_point_1024* r, sp_1024_mont_sqr_18(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_18(t2, b, p1024_mod); sp_1024_mont_sub_18(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_18(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_18(b, t2, p1024_mod); /* Z = Z*Y */ @@ -45579,7 +45505,6 @@ static void sp_1024_proj_point_dbl_n_store_18(sp_point_1024* r, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_18(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_18(y, y, t1, p1024_mod); - /* Y = Y/2 */ sp_1024_div2_18(r[j].y, y, p1024_mod); r[j].infinity = 0; @@ -45904,8 +45829,8 @@ static int sp_1024_ecc_mulmod_win_add_sub_18(sp_point_1024* r, const sp_point_10 * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_18(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_18(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*18; @@ -45914,12 +45839,17 @@ static void sp_1024_proj_point_add_qz1_18(sp_point_1024* r, const sp_point_1024* sp_digit* t5 = t + 8*18; sp_digit* t6 = t + 10*18; - /* Check double */ - (void)sp_1024_mont_sub_18(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_18(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & - (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_18(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_18(p->x, t2) & + sp_1024_cmp_equal_18(p->y, t4)) { sp_1024_proj_point_dbl_18(r, p, t); } else { @@ -45931,12 +45861,6 @@ static void sp_1024_proj_point_add_qz1_18(sp_point_1024* r, const sp_point_1024* sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_18(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_18(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_18(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_18(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 99c6cc6411..f5539226d4 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -18802,7 +18802,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_8(r->x, p256_mod); @@ -18811,7 +18811,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 8, 0, sizeof(sp_digit) * 8U); sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_8(r->y, p256_mod); @@ -18820,7 +18820,6 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -19196,6 +19195,61 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_d * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_8(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_8(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_8(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_8(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_8(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_8(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_8(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_8(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_8_ctx { int state; @@ -19206,6 +19260,12 @@ typedef struct sp_256_proj_point_dbl_8_ctx { sp_digit* z; } sp_256_proj_point_dbl_8_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -19330,62 +19390,6 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_8(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_8(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_8(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_8(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_8(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_8(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_8(y, y, t2, p256_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -19411,6 +19415,7 @@ static int sp_256_iszero_8(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -19418,6 +19423,81 @@ static int sp_256_iszero_8(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*8; + sp_digit* t3 = t + 4*8; + sp_digit* t4 = t + 6*8; + sp_digit* t5 = t + 8*8; + sp_digit* t6 = t + 10*8; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(t2, t1) & + sp_256_cmp_equal_8(t4, t3)) { + sp_256_proj_point_dbl_8(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(x, x, t5, p256_mod); + sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_dbl_8(t3, y, p256_mod); + sp_256_mont_sub_8(x, x, t3, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(y, y, x, p256_mod); + sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(y, y, t5, p256_mod); + for (i = 0; i < 8; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 8; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_8_ctx { @@ -19436,6 +19516,13 @@ typedef struct sp_256_proj_point_add_8_ctx { sp_digit* z; } sp_256_proj_point_add_8_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -19467,252 +19554,149 @@ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_8(ctx->t1, p256_mod, q->y); - sp_256_norm_8(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_8(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_8(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_8(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(ctx->t2, ctx->t1) & + sp_256_cmp_equal_8(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_8(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_dbl_8(ctx->t3, ctx->y, p256_mod); + sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t3, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_8(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_8(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_8(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*8; - sp_digit* t3 = t + 4*8; - sp_digit* t4 = t + 6*8; - sp_digit* t5 = t + 8*8; - sp_digit* t6 = t + 10*8; - - - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { - sp_256_proj_point_dbl_8(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_8(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_8(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { - sp_256_proj_point_dbl_8(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(x, x, t5, p256_mod); - sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_dbl_8(t3, y, p256_mod); - sp_256_mont_sub_8(x, x, t3, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_8(y, y, x, p256_mod); - sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_8(y, y, t5, p256_mod); - - for (i = 0; i < 8; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 8; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -20000,7 +19984,6 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -20018,7 +20001,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_8(t2, b, x, p256_mod); sp_256_mont_dbl_lower_8(b, t2, p256_mod); /* Z = Z*Y */ @@ -20048,7 +20031,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod); sp_256_mont_dbl_8(t2, b, p256_mod); sp_256_mont_sub_8(x, x, t2, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_sub_lower_8(t2, b, x, p256_mod); sp_256_mont_dbl_lower_8(b, t2, p256_mod); /* Z = Z*Y */ @@ -20058,7 +20041,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_8(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_8(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_8(y, y, p256_mod); } @@ -20103,8 +20086,8 @@ typedef struct sp_table_entry_256 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_8(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*8; @@ -20113,12 +20096,17 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, sp_digit* t5 = t + 8*8; sp_digit* t6 = t + 10*8; - /* Check double */ - (void)sp_256_sub_8(t1, p256_mod, q->y); - sp_256_norm_8(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & - (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_8(p->x, t2) & + sp_256_cmp_equal_8(p->y, t4)) { sp_256_proj_point_dbl_8(r, p, t); } else { @@ -20130,12 +20118,6 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_8(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -23710,7 +23692,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_8(r); - if (sp_256_iszero_8(r) == 0) { + if (!sp_256_iszero_8(r)) { /* x is modified in calculation of s. */ sp_256_from_mp(x, 8, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -23719,7 +23701,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_256_calc_s_8(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_8(s) == 0)) { + if ((err == MP_OKAY) && (!sp_256_iszero_8(s))) { break; } } @@ -26268,7 +26250,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_12(r->x, p384_mod); @@ -26277,7 +26259,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 12, 0, sizeof(sp_digit) * 12U); sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_12(r->y, p384_mod); @@ -26286,7 +26268,6 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -26470,6 +26451,61 @@ SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_ * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_12(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_12(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_12(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_12(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_12(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_12(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_12(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_12(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_12_ctx { int state; @@ -26480,6 +26516,12 @@ typedef struct sp_384_proj_point_dbl_12_ctx { sp_digit* z; } sp_384_proj_point_dbl_12_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -26604,62 +26646,6 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_12(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_12(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_12(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_12(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_12(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_12(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_12(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_12(y, y, t2, p384_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -26687,6 +26673,7 @@ static int sp_384_iszero_12(const sp_digit* a) a[8] | a[9] | a[10] | a[11]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -26694,6 +26681,81 @@ static int sp_384_iszero_12(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*12; + sp_digit* t3 = t + 4*12; + sp_digit* t4 = t + 6*12; + sp_digit* t5 = t + 8*12; + sp_digit* t6 = t + 10*12; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(t2, t1) & + sp_384_cmp_equal_12(t4, t3)) { + sp_384_proj_point_dbl_12(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_12(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(x, x, t5, p384_mod); + sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_12(t3, y, p384_mod); + sp_384_mont_sub_12(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(y, y, x, p384_mod); + sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(y, y, t5, p384_mod); + for (i = 0; i < 12; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 12; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_12_ctx { @@ -26712,6 +26774,13 @@ typedef struct sp_384_proj_point_add_12_ctx { sp_digit* z; } sp_384_proj_point_add_12_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -26743,252 +26812,149 @@ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_12(ctx->t1, p384_mod, q->y); - sp_384_norm_12(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_12(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_12(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_12(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_12(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(ctx->t2, ctx->t1) & + sp_384_cmp_equal_12(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_12(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_12(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_12(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_12(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_12(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_12(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*12; - sp_digit* t3 = t + 4*12; - sp_digit* t4 = t + 6*12; - sp_digit* t5 = t + 8*12; - sp_digit* t6 = t + 10*12; - - - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { - sp_384_proj_point_dbl_12(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_12(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_12(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_12(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { - sp_384_proj_point_dbl_12(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_12(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(x, x, t5, p384_mod); - sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_12(t3, y, p384_mod); - sp_384_mont_sub_12(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_12(y, y, x, p384_mod); - sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_12(y, y, t5, p384_mod); - - for (i = 0; i < 12; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 12; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -27300,7 +27266,6 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -27318,7 +27283,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_12(t2, b, x, p384_mod); sp_384_mont_dbl_lower_12(b, t2, p384_mod); /* Z = Z*Y */ @@ -27348,7 +27313,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_12(t2, b, p384_mod); sp_384_mont_sub_12(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_12(t2, b, x, p384_mod); sp_384_mont_dbl_lower_12(b, t2, p384_mod); /* Z = Z*Y */ @@ -27358,7 +27323,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_12(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_12(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_12(y, y, p384_mod); } @@ -27403,8 +27368,8 @@ typedef struct sp_table_entry_384 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_12(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*12; @@ -27413,12 +27378,17 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, sp_digit* t5 = t + 8*12; sp_digit* t6 = t + 10*12; - /* Check double */ - (void)sp_384_sub_12(t1, p384_mod, q->y); - sp_384_norm_12(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & - (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_12(p->x, t2) & + sp_384_cmp_equal_12(p->y, t4)) { sp_384_proj_point_dbl_12(r, p, t); } else { @@ -27430,12 +27400,6 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_12(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_12(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -31039,7 +31003,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_12(r); - if (sp_384_iszero_12(r) == 0) { + if (!sp_384_iszero_12(r)) { /* x is modified in calculation of s. */ sp_384_from_mp(x, 12, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -31048,7 +31012,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_384_calc_s_12(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_12(s) == 0)) { + if ((err == MP_OKAY) && (!sp_384_iszero_12(s))) { break; } } @@ -33807,7 +33771,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_17(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 17, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 17, 0, sizeof(sp_digit) * 17U); sp_521_mont_reduce_17(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_17(r->x, p521_mod); @@ -33816,7 +33780,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_17(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 17, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 17, 0, sizeof(sp_digit) * 17U); sp_521_mont_reduce_17(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_17(r->y, p521_mod); @@ -33825,7 +33789,6 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -34331,6 +34294,61 @@ SP_NOINLINE static void sp_521_div2_17(sp_digit* r, const sp_digit* a, const sp_ * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_17(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_17(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_17(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_17(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_17(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_17(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_17(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_17(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_17(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_17(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_17(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_17(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_17(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_17(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_17_ctx { int state; @@ -34341,6 +34359,12 @@ typedef struct sp_521_proj_point_dbl_17_ctx { sp_digit* z; } sp_521_proj_point_dbl_17_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -34465,62 +34489,6 @@ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*17; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_17(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_17(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_17(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_17(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_17(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_17(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_17(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_17(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_17(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_17(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_17(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_17(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_17(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_17(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_17(y, y, t2, p521_mod); -} - /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -34551,6 +34519,7 @@ static int sp_521_iszero_17(const sp_digit* a) a[16]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -34558,6 +34527,81 @@ static int sp_521_iszero_17(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*17; + sp_digit* t3 = t + 4*17; + sp_digit* t4 = t + 6*17; + sp_digit* t5 = t + 8*17; + sp_digit* t6 = t + 10*17; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(t2, t1) & + sp_521_cmp_equal_17(t4, t3)) { + sp_521_proj_point_dbl_17(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_17(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_17(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(x, x, t5, p521_mod); + sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_17(t3, y, p521_mod); + sp_521_mont_sub_17(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(y, y, x, p521_mod); + sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(y, y, t5, p521_mod); + for (i = 0; i < 17; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 17; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_17_ctx { @@ -34576,6 +34620,13 @@ typedef struct sp_521_proj_point_add_17_ctx { sp_digit* z; } sp_521_proj_point_add_17_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -34607,252 +34658,149 @@ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_17(ctx->t1, p521_mod, q->y); - sp_521_norm_17(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_17(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_17_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_17(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_17(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_17(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_17(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_17(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_17(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_17(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_17(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(ctx->t2, ctx->t1) & + sp_521_cmp_equal_17(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_17(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_17(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_17(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_17(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_17(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_17(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_17(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_17(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_17(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_17(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_17(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_17(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_17(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_17(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_17(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_17(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_17(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_17(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_17(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_17(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_17(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*17; - sp_digit* t3 = t + 4*17; - sp_digit* t4 = t + 6*17; - sp_digit* t5 = t + 8*17; - sp_digit* t6 = t + 10*17; - - - /* Check double */ - (void)sp_521_sub_17(t1, p521_mod, q->y); - sp_521_norm_17(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { - sp_521_proj_point_dbl_17(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_17(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_17(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_17(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_17(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { - sp_521_proj_point_dbl_17(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_17(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_17(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_17(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(x, x, t5, p521_mod); - sp_521_mont_mul_17(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_17(t3, y, p521_mod); - sp_521_mont_sub_17(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_17(y, y, x, p521_mod); - sp_521_mont_mul_17(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_17(y, y, t5, p521_mod); - - for (i = 0; i < 17; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 17; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. * @@ -35198,7 +35146,6 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_17(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_17(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -35216,7 +35163,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_17(t2, b, p521_mod); sp_521_mont_sub_17(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_17(t2, b, x, p521_mod); sp_521_mont_dbl_lower_17(b, t2, p521_mod); /* Z = Z*Y */ @@ -35246,7 +35193,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sqr_17(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_17(t2, b, p521_mod); sp_521_mont_sub_17(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_17(t2, b, x, p521_mod); sp_521_mont_dbl_lower_17(b, t2, p521_mod); /* Z = Z*Y */ @@ -35256,7 +35203,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_17(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_17(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_17(y, y, p521_mod); } @@ -35301,8 +35248,8 @@ typedef struct sp_table_entry_521 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_17(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*17; @@ -35311,12 +35258,17 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, sp_digit* t5 = t + 8*17; sp_digit* t6 = t + 10*17; - /* Check double */ - (void)sp_521_sub_17(t1, p521_mod, q->y); - sp_521_norm_17(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & - (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_17(p->x, t2) & + sp_521_cmp_equal_17(p->y, t4)) { sp_521_proj_point_dbl_17(r, p, t); } else { @@ -35328,12 +35280,6 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_17(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_17(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_17(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_17(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -39995,7 +39941,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_521_norm_17(r); - if (sp_521_iszero_17(r) == 0) { + if (!sp_521_iszero_17(r)) { /* x is modified in calculation of s. */ sp_521_from_mp(x, 17, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -40009,7 +39955,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_521_calc_s_17(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_521_iszero_17(s) == 0)) { + if ((err == MP_OKAY) && (!sp_521_iszero_17(s))) { break; } } @@ -43352,7 +43298,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_32(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 32, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_32(r->x, p1024_mod); @@ -43361,7 +43307,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_32(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 32, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 32, 0, sizeof(sp_digit) * 32U); sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_32(r->y, p1024_mod); @@ -43370,7 +43316,6 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } /* Add two Montgomery form numbers (r = a + b % m). @@ -44366,47 +44311,108 @@ SP_NOINLINE static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp * p Point to double. * t Temporary ordinate data. */ -#ifdef WOLFSSL_SP_NONBLOCK -typedef struct sp_1024_proj_point_dbl_32_ctx { - int state; - sp_digit* t1; - sp_digit* t2; +static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*32; sp_digit* x; sp_digit* y; sp_digit* z; -} sp_1024_proj_point_dbl_32_ctx; - -static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) -{ - int err = FP_WOULDBLOCK; - sp_1024_proj_point_dbl_32_ctx* ctx = (sp_1024_proj_point_dbl_32_ctx*)sp_ctx->data; - typedef char ctx_size_test[sizeof(sp_1024_proj_point_dbl_32_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; - (void)sizeof(ctx_size_test); - - switch (ctx->state) { - case 0: - ctx->t1 = t; - ctx->t2 = t + 2*32; - ctx->x = r->x; - ctx->y = r->y; - ctx->z = r->z; + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - ctx->state = 1; - break; - case 1: - /* T1 = Z * Z */ - sp_1024_mont_sqr_32(ctx->t1, p->z, p1024_mod, p1024_mp_mod); - ctx->state = 2; - break; - case 2: - /* Z = Y * Z */ - sp_1024_mont_mul_32(ctx->z, p->y, p->z, p1024_mod, p1024_mp_mod); - ctx->state = 3; + /* T1 = Z * Z */ + sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_32(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_32(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_32(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_32(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_32(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_32(y, y, t2, p1024_mod); +} + +#ifdef WOLFSSL_SP_NONBLOCK +typedef struct sp_1024_proj_point_dbl_32_ctx { + int state; + sp_digit* t1; + sp_digit* t2; + sp_digit* x; + sp_digit* y; + sp_digit* z; +} sp_1024_proj_point_dbl_32_ctx; + +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ +static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +{ + int err = FP_WOULDBLOCK; + sp_1024_proj_point_dbl_32_ctx* ctx = (sp_1024_proj_point_dbl_32_ctx*)sp_ctx->data; + + typedef char ctx_size_test[sizeof(sp_1024_proj_point_dbl_32_ctx) >= sizeof(*sp_ctx) ? -1 : 1]; + (void)sizeof(ctx_size_test); + + switch (ctx->state) { + case 0: + ctx->t1 = t; + ctx->t2 = t + 2*32; + ctx->x = r->x; + ctx->y = r->y; + ctx->z = r->z; + + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + ctx->state = 1; + break; + case 1: + /* T1 = Z * Z */ + sp_1024_mont_sqr_32(ctx->t1, p->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; + break; + case 2: + /* Z = Y * Z */ + sp_1024_mont_mul_32(ctx->z, p->y, p->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: /* Z = 2Z */ @@ -44500,62 +44506,6 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_32(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_32(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_32(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_32(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_32(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_32(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_32(y, y, t2, p1024_mod); -} - #ifdef WOLFSSL_SP_SMALL /* Sub b from a into r. (r = a - b) * @@ -44735,6 +44685,7 @@ static int sp_1024_iszero_32(const sp_digit* a) a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -44742,6 +44693,81 @@ static int sp_1024_iszero_32(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*32; + sp_digit* t3 = t + 4*32; + sp_digit* t4 = t + 6*32; + sp_digit* t5 = t + 8*32; + sp_digit* t6 = t + 10*32; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(t2, t1) & + sp_1024_cmp_equal_32(t4, t3)) { + sp_1024_proj_point_dbl_32(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(x, x, t5, p1024_mod); + sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_32(t3, y, p1024_mod); + sp_1024_mont_sub_32(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); + sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(y, y, t5, p1024_mod); + for (i = 0; i < 32; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 32; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_32_ctx { @@ -44760,6 +44786,13 @@ typedef struct sp_1024_proj_point_add_32_ctx { sp_digit* z; } sp_1024_proj_point_add_32_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -44791,252 +44824,149 @@ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y); - sp_1024_norm_32(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_32_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_32(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_32(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_32(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_32(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_32(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_32(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_32(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_32(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_32(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_32(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_32(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*32; - sp_digit* t3 = t + 4*32; - sp_digit* t4 = t + 6*32; - sp_digit* t5 = t + 8*32; - sp_digit* t6 = t + 10*32; - - - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_32(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_32(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { - sp_1024_proj_point_dbl_32(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_32(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(x, x, t5, p1024_mod); - sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_32(t3, y, p1024_mod); - sp_1024_mont_sub_32(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_32(y, y, x, p1024_mod); - sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_32(y, y, t5, p1024_mod); - - for (i = 0; i < 32; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 32; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Multiply the point by the scalar and return the result. * If map is true then convert result to affine coordinates. * @@ -45212,7 +45142,6 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_32(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_32(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -45230,7 +45159,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_32(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_32(b, t2, p1024_mod); /* Z = Z*Y */ @@ -45260,7 +45189,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_32(t2, b, p1024_mod); sp_1024_mont_sub_32(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_32(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_32(b, t2, p1024_mod); /* Z = Z*Y */ @@ -45270,7 +45199,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_32(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_32(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_32(y, y, p1024_mod); } @@ -45315,8 +45244,8 @@ typedef struct sp_table_entry_1024 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*32; @@ -45325,12 +45254,17 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* sp_digit* t5 = t + 8*32; sp_digit* t6 = t + 10*32; - /* Check double */ - (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_32(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & - (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_32(p->x, t2) & + sp_1024_cmp_equal_32(p->y, t4)) { sp_1024_proj_point_dbl_32(r, p, t); } else { @@ -45342,12 +45276,6 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_32(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_32(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index d77494169e..a33e3653db 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -8535,7 +8535,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_4(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_4(r->x, p256_mod); @@ -8544,7 +8544,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_4(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_4(r->y, p256_mod); @@ -8553,7 +8553,6 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #ifdef __cplusplus @@ -8604,6 +8603,61 @@ extern void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m); * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_4(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_4(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_4(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_4(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_4(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_4(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_4(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_4(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_4(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_4_ctx { int state; @@ -8614,6 +8668,12 @@ typedef struct sp_256_proj_point_dbl_4_ctx { sp_digit* z; } sp_256_proj_point_dbl_4_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -8738,62 +8798,6 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_4(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_4(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_4(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_4(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_4(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_4(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_4(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_4(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_4(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_4(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_4(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_4(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_4(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_4(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_4(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_4(y, y, t2, p256_mod); -} - #ifdef __cplusplus extern "C" { #endif @@ -8843,7 +8847,6 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_4(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_4(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -8860,7 +8863,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); @@ -8888,7 +8891,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(z, z, y, p256_mod, p256_mp_mod); @@ -8897,7 +8900,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_4(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_4(y, y, p256_mod); } @@ -8926,6 +8929,7 @@ static int sp_256_iszero_4(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -8933,6 +8937,80 @@ static int sp_256_iszero_4(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* t6 = t + 10*4; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(t2, t1) & + sp_256_cmp_equal_4(t4, t3)) { + sp_256_proj_point_dbl_4(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_4(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(x, x, t5, p256_mod); + sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_4(x, x, y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_4(y, y, x, p256_mod); + sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(y, y, t5, p256_mod); + for (i = 0; i < 4; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_4_ctx { @@ -8951,6 +9029,13 @@ typedef struct sp_256_proj_point_add_4_ctx { sp_digit* z; } sp_256_proj_point_add_4_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -8982,251 +9067,148 @@ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_4(ctx->t1, p256_mod, q->y); - sp_256_norm_4(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_4(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_4_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_4(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_4(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_4(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_4(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_4(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(ctx->t2, ctx->t1) & + sp_256_cmp_equal_4(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_4(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_4(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_4(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_sub_dbl_4(ctx->x, ctx->x, ctx->y, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_4(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_4(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_4(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_4(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_4(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_4(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_4(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* t3 = t + 4*4; - sp_digit* t4 = t + 6*4; - sp_digit* t5 = t + 8*4; - sp_digit* t6 = t + 10*4; - - - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { - sp_256_proj_point_dbl_4(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_4(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_4(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_4(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_4(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { - sp_256_proj_point_dbl_4(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_4(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_4(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(x, x, t5, p256_mod); - sp_256_mont_mul_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_4(x, x, y, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_4(y, y, x, p256_mod); - sp_256_mont_mul_4(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_4(y, y, t5, p256_mod); - - for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -9277,7 +9259,7 @@ static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, /* X = A^2 - 2B */ sp_256_mont_sqr_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_4(r[j].z, z, y, p256_mod, p256_mp_mod); @@ -9291,7 +9273,6 @@ static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_4(y, y, t1, p256_mod); - /* Y = Y/2 */ sp_256_div2_4(r[j].y, y, p256_mod); r[j].infinity = 0; @@ -9747,7 +9728,7 @@ static void sp_256_map_avx2_4(sp_point_256* r, const sp_point_256* p, /* x /= z^2 */ sp_256_mont_mul_avx2_4(r->x, p->x, t2, p256_mod, p256_mp_mod); - XMEMSET(r->x + 4, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_avx2_4(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_4(r->x, p256_mod); @@ -9756,16 +9737,15 @@ static void sp_256_map_avx2_4(sp_point_256* r, const sp_point_256* p, /* y /= z^3 */ sp_256_mont_mul_avx2_4(r->y, p->y, t1, p256_mod, p256_mp_mod); - XMEMSET(r->y + 4, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 4, 0, sizeof(sp_digit) * 4U); sp_256_mont_reduce_avx2_4(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_4(r->y, p256_mod); - sp_256_cond_sub_avx2_4(r->y, r->y, p256_mod, ~(n >> 63)); + sp_256_cond_sub_4(r->y, r->y, p256_mod, ~(n >> 63)); sp_256_norm_4(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #define sp_256_mont_add_avx2_4 sp_256_mont_add_4 @@ -9786,6 +9766,61 @@ extern void sp_256_div2_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* m * p Point to double. * t Temporary ordinate data. */ +static void sp_256_proj_point_dbl_avx2_4(sp_point_256* r, const sp_point_256* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_256_mont_sqr_avx2_4(t1, p->z, p256_mod, p256_mp_mod); + /* Z = Y * Z */ + sp_256_mont_mul_avx2_4(z, p->y, p->z, p256_mod, p256_mp_mod); + /* Z = 2Z */ + sp_256_mont_dbl_avx2_4(z, z, p256_mod); + /* T2 = X - T1 */ + sp_256_mont_sub_avx2_4(t2, p->x, t1, p256_mod); + /* T1 = X + T1 */ + sp_256_mont_add_avx2_4(t1, p->x, t1, p256_mod); + /* T2 = T1 * T2 */ + sp_256_mont_mul_avx2_4(t2, t1, t2, p256_mod, p256_mp_mod); + /* T1 = 3T2 */ + sp_256_mont_tpl_avx2_4(t1, t2, p256_mod); + /* Y = 2Y */ + sp_256_mont_dbl_avx2_4(y, p->y, p256_mod); + /* Y = Y * Y */ + sp_256_mont_sqr_avx2_4(y, y, p256_mod, p256_mp_mod); + /* T2 = Y * Y */ + sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod); + /* T2 = T2/2 */ + sp_256_div2_avx2_4(t2, t2, p256_mod); + /* Y = Y * X */ + sp_256_mont_mul_avx2_4(y, y, p->x, p256_mod, p256_mp_mod); + /* X = T1 * T1 */ + sp_256_mont_sqr_avx2_4(x, t1, p256_mod, p256_mp_mod); + /* X = X - Y */ + sp_256_mont_sub_avx2_4(x, x, y, p256_mod); + /* X = X - Y */ + sp_256_mont_sub_avx2_4(x, x, y, p256_mod); + /* Y = Y - X */ + sp_256_mont_sub_lower_avx2_4(y, y, x, p256_mod); + /* Y = Y * T1 */ + sp_256_mont_mul_avx2_4(y, y, t1, p256_mod, p256_mp_mod); + /* Y = Y - T2 */ + sp_256_mont_sub_avx2_4(y, y, t2, p256_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_dbl_avx2_4_ctx { int state; @@ -9796,6 +9831,12 @@ typedef struct sp_256_proj_point_dbl_avx2_4_ctx { sp_digit* z; } sp_256_proj_point_dbl_avx2_4_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_256_proj_point_dbl_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -9920,62 +9961,6 @@ static int sp_256_proj_point_dbl_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_256_proj_point_dbl_avx2_4(sp_point_256* r, const sp_point_256* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_256_mont_sqr_avx2_4(t1, p->z, p256_mod, p256_mp_mod); - /* Z = Y * Z */ - sp_256_mont_mul_avx2_4(z, p->y, p->z, p256_mod, p256_mp_mod); - /* Z = 2Z */ - sp_256_mont_dbl_avx2_4(z, z, p256_mod); - /* T2 = X - T1 */ - sp_256_mont_sub_avx2_4(t2, p->x, t1, p256_mod); - /* T1 = X + T1 */ - sp_256_mont_add_avx2_4(t1, p->x, t1, p256_mod); - /* T2 = T1 * T2 */ - sp_256_mont_mul_avx2_4(t2, t1, t2, p256_mod, p256_mp_mod); - /* T1 = 3T2 */ - sp_256_mont_tpl_avx2_4(t1, t2, p256_mod); - /* Y = 2Y */ - sp_256_mont_dbl_avx2_4(y, p->y, p256_mod); - /* Y = Y * Y */ - sp_256_mont_sqr_avx2_4(y, y, p256_mod, p256_mp_mod); - /* T2 = Y * Y */ - sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod); - /* T2 = T2/2 */ - sp_256_div2_avx2_4(t2, t2, p256_mod); - /* Y = Y * X */ - sp_256_mont_mul_avx2_4(y, y, p->x, p256_mod, p256_mp_mod); - /* X = T1 * T1 */ - sp_256_mont_sqr_avx2_4(x, t1, p256_mod, p256_mp_mod); - /* X = X - Y */ - sp_256_mont_sub_avx2_4(x, x, y, p256_mod); - /* X = X - Y */ - sp_256_mont_sub_avx2_4(x, x, y, p256_mod); - /* Y = Y - X */ - sp_256_mont_sub_lower_avx2_4(y, y, x, p256_mod); - /* Y = Y * T1 */ - sp_256_mont_mul_avx2_4(y, y, t1, p256_mod, p256_mp_mod); - /* Y = Y - T2 */ - sp_256_mont_sub_avx2_4(y, y, t2, p256_mod); -} - #define sp_256_mont_tpl_lower_avx2_4 sp_256_mont_tpl_lower_4 #define sp_256_mont_sub_dbl_avx2_4 sp_256_mont_sub_dbl_4 #define sp_256_mont_dbl_sub_avx2_4 sp_256_mont_dbl_sub_4 @@ -10007,7 +9992,6 @@ static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int i, /* W = Z^4 */ sp_256_mont_sqr_avx2_4(w, z, p256_mod, p256_mp_mod); sp_256_mont_sqr_avx2_4(w, w, p256_mod, p256_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -10024,7 +10008,7 @@ static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int i, /* X = A^2 - 2B */ sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_avx2_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_avx2_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_avx2_4(z, z, y, p256_mod, p256_mp_mod); @@ -10051,8 +10035,8 @@ static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int i, sp_256_mont_mul_avx2_4(b, t1, x, p256_mod, p256_mp_mod); /* X = A^2 - 2B */ sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + sp_256_mont_sub_dbl_avx2_4(x, x, b, p256_mod); + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_avx2_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_avx2_4(z, z, y, p256_mod, p256_mp_mod); @@ -10061,11 +10045,12 @@ static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_avx2_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_avx2_4(y, y, t1, p256_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_256_div2_avx2_4(y, y, p256_mod); } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -10073,6 +10058,80 @@ static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int i, * q Second point to add. * t Temporary ordinate data. */ +static void sp_256_proj_point_add_avx2_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*4; + sp_digit* t3 = t + 4*4; + sp_digit* t4 = t + 6*4; + sp_digit* t5 = t + 8*4; + sp_digit* t6 = t + 10*4; + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t1, t1, p->x, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_avx2_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_avx2_4(t3, t3, p->y, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(t2, t1) & + sp_256_cmp_equal_4(t4, t3)) { + sp_256_proj_point_dbl_avx2_4(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_256_mont_sub_avx2_4(t2, t2, t1, p256_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_avx2_4(t4, t4, t3, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_avx2_4(z, p->z, t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_avx2_4(x, x, t5, p256_mod); + sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod); + sp_256_mont_sub_dbl_avx2_4(x, x, y, p256_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_avx2_4(y, y, x, p256_mod); + sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod); + sp_256_mont_sub_avx2_4(y, y, t5, p256_mod); + for (i = 0; i < 4; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 4; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_256_proj_point_add_avx2_4_ctx { @@ -10091,6 +10150,13 @@ typedef struct sp_256_proj_point_add_avx2_4_ctx { sp_digit* z; } sp_256_proj_point_add_avx2_4_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_256_proj_point_add_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, const sp_point_256* q, sp_digit* t) { @@ -10122,251 +10188,148 @@ static int sp_256_proj_point_add_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_256_sub_avx2_4(ctx->t1, p256_mod, q->y); - sp_256_norm_avx2_4(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_avx2_4(p->x, q->x) & sp_256_cmp_equal_avx2_4(p->z, q->z) & - (sp_256_cmp_equal_avx2_4(p->y, q->y) | sp_256_cmp_equal_avx2_4(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_avx2_4(ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 2; break; case 2: - err = sp_256_proj_point_dbl_avx2_4_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_256_mont_mul_avx2_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + ctx->state = 3; break; case 3: - { + sp_256_mont_mul_avx2_4(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_avx2_4(ctx->t1, q->z, p256_mod, p256_mp_mod); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_avx2_4(ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 5; break; case 5: - sp_256_mont_mul_avx2_4(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); ctx->state = 6; break; case 6: - sp_256_mont_mul_avx2_4(ctx->t1, ctx->t1, p->x, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_avx2_4(ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_avx2_4(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); ctx->state = 8; break; case 8: - sp_256_mont_mul_avx2_4(ctx->t4, ctx->t2, p->z, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_avx2_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); ctx->state = 9; break; case 9: - sp_256_mont_mul_avx2_4(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(ctx->t2, ctx->t1) & + sp_256_cmp_equal_4(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_256_proj_point_dbl_avx2_4(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_avx2_4(ctx->t3, ctx->t3, p->y, p256_mod, p256_mp_mod); + /* H = U2 - U1 */ + sp_256_mont_sub_avx2_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_avx2_4(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod); + /* R = S2 - S1 */ + sp_256_mont_sub_avx2_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_256_mont_sub_avx2_4(ctx->t2, ctx->t2, ctx->t1, p256_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_avx2_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_256_mont_sub_avx2_4(ctx->t4, ctx->t4, ctx->t3, p256_mod); + sp_256_mont_mul_avx2_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_avx2_4(ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 15; break; case 15: - sp_256_mont_mul_avx2_4(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_avx2_4(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); ctx->state = 16; break; case 16: - sp_256_mont_mul_avx2_4(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_avx2_4(ctx->z, p->z, ctx->t2, p256_mod, p256_mp_mod); + sp_256_mont_sqr_avx2_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 18; break; case 18: - sp_256_mont_mul_avx2_4(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod); + sp_256_mont_sub_avx2_4(ctx->x, ctx->x, ctx->t5, p256_mod); ctx->state = 19; break; case 19: - sp_256_mont_sqr_avx2_4(ctx->x, ctx->t4, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); ctx->state = 20; break; case 20: - sp_256_mont_sub_avx2_4(ctx->x, ctx->x, ctx->t5, p256_mod); + sp_256_mont_sub_dbl_avx2_4(ctx->x, ctx->x, ctx->y, p256_mod); ctx->state = 21; break; case 21: - sp_256_mont_mul_avx2_4(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_lower_avx2_4(ctx->y, ctx->y, ctx->x, p256_mod); ctx->state = 22; break; case 22: - sp_256_mont_dbl_avx2_4(ctx->t3, ctx->y, p256_mod); + sp_256_mont_mul_avx2_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); ctx->state = 23; break; case 23: - sp_256_mont_sub_avx2_4(ctx->x, ctx->x, ctx->t3, p256_mod); + sp_256_mont_sub_avx2_4(ctx->y, ctx->y, ctx->t5, p256_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_avx2_4(ctx->y, ctx->y, ctx->x, p256_mod); - ctx->state = 25; - break; - case 25: - sp_256_mont_mul_avx2_4(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod); - ctx->state = 26; - break; - case 26: - sp_256_mont_sub_avx2_4(ctx->y, ctx->y, ctx->t5, p256_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_256_proj_point_add_avx2_4(sp_point_256* r, - const sp_point_256* p, const sp_point_256* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*4; - sp_digit* t3 = t + 4*4; - sp_digit* t4 = t + 6*4; - sp_digit* t5 = t + 8*4; - sp_digit* t6 = t + 10*4; - - - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { - sp_256_proj_point_dbl_avx2_4(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_avx2_4(t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t3, t1, q->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t1, t1, p->x, p256_mod, p256_mp_mod); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_avx2_4(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_avx2_4(t3, t3, p->y, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod); - /* H = U2 - U1 */ - sp_256_mont_sub_avx2_4(t2, t2, t1, p256_mod); - /* R = S2 - S1 */ - sp_256_mont_sub_avx2_4(t4, t4, t3, p256_mod); - if (~p->infinity & ~q->infinity & - sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { - sp_256_proj_point_dbl_avx2_4(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_avx2_4(t5, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(y, t1, t5, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t5, t5, t2, p256_mod, p256_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_avx2_4(z, p->z, t2, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(z, z, q->z, p256_mod, p256_mp_mod); - sp_256_mont_sqr_avx2_4(x, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_avx2_4(x, x, t5, p256_mod); - sp_256_mont_mul_avx2_4(t5, t5, t3, p256_mod, p256_mp_mod); - sp_256_mont_sub_dbl_avx2_4(x, x, y, p256_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_lower_avx2_4(y, y, x, p256_mod); - sp_256_mont_mul_avx2_4(y, y, t4, p256_mod, p256_mp_mod); - sp_256_mont_sub_avx2_4(y, y, t5, p256_mod); - - for (i = 0; i < 4; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 4; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -10417,7 +10380,7 @@ static void sp_256_proj_point_dbl_n_store_avx2_4(sp_point_256* r, /* X = A^2 - 2B */ sp_256_mont_sqr_avx2_4(x, a, p256_mod, p256_mp_mod); sp_256_mont_sub_dbl_avx2_4(x, x, b, p256_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_256_mont_dbl_sub_avx2_4(b, b, x, p256_mod); /* Z = Z*Y */ sp_256_mont_mul_avx2_4(r[j].z, z, y, p256_mod, p256_mp_mod); @@ -10431,7 +10394,6 @@ static void sp_256_proj_point_dbl_n_store_avx2_4(sp_point_256* r, /* y = 2*A*(B - X) - Y^4 */ sp_256_mont_mul_avx2_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_avx2_4(y, y, t1, p256_mod); - /* Y = Y/2 */ sp_256_div2_avx2_4(r[j].y, y, p256_mod); r[j].infinity = 0; @@ -10680,8 +10642,8 @@ typedef struct sp_table_entry_256 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*4; @@ -10690,12 +10652,17 @@ static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, sp_digit* t5 = t + 8*4; sp_digit* t6 = t + 10*4; - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(p->x, t2) & + sp_256_cmp_equal_4(p->y, t4)) { sp_256_proj_point_dbl_4(r, p, t); } else { @@ -10707,12 +10674,6 @@ static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_4(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_4(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_4(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -11168,8 +11129,8 @@ static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_ * q Second point to add. * t Temporary ordinate data. */ -static void sp_256_proj_point_add_qz1_avx2_4(sp_point_256* r, const sp_point_256* p, - const sp_point_256* q, sp_digit* t) +static void sp_256_proj_point_add_qz1_avx2_4(sp_point_256* r, + const sp_point_256* p, const sp_point_256* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*4; @@ -11178,12 +11139,17 @@ static void sp_256_proj_point_add_qz1_avx2_4(sp_point_256* r, const sp_point_256 sp_digit* t5 = t + 8*4; sp_digit* t6 = t + 10*4; - /* Check double */ - (void)sp_256_sub_4(t1, p256_mod, q->y); - sp_256_norm_4(t1); - if ((~p->infinity & ~q->infinity & - sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & - (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_avx2_4(t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t4, t2, p->z, p256_mod, p256_mp_mod); + sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_256_cmp_equal_4(p->x, t2) & + sp_256_cmp_equal_4(p->y, t4)) { sp_256_proj_point_dbl_avx2_4(r, p, t); } else { @@ -11195,12 +11161,6 @@ static void sp_256_proj_point_add_qz1_avx2_4(sp_point_256* r, const sp_point_256 sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_avx2_4(t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t4, t2, p->z, p256_mod, p256_mp_mod); - sp_256_mont_mul_avx2_4(t2, t2, q->x, p256_mod, p256_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_avx2_4(t4, t4, q->y, p256_mod, p256_mp_mod); /* H = U2 - X1 */ sp_256_mont_sub_avx2_4(t2, t2, p->x, p256_mod); /* R = S2 - Y1 */ @@ -25767,7 +25727,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_256_norm_4(r); - if (sp_256_iszero_4(r) == 0) { + if (!sp_256_iszero_4(r)) { /* x is modified in calculation of s. */ sp_256_from_mp(x, 4, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -25776,7 +25736,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_256_calc_s_4(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_256_iszero_4(s) == 0)) { + if ((err == MP_OKAY) && (!sp_256_iszero_4(s))) { break; } } @@ -27712,7 +27672,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_6(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_6(r->x, p384_mod); @@ -27721,7 +27681,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_6(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_6(r->y, p384_mod); @@ -27730,7 +27690,6 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #ifdef __cplusplus @@ -27781,6 +27740,61 @@ extern void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m); * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_6(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_6(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_6(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_6(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_6(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_6(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_6(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_6(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_6(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_6_ctx { int state; @@ -27791,6 +27805,12 @@ typedef struct sp_384_proj_point_dbl_6_ctx { sp_digit* z; } sp_384_proj_point_dbl_6_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -27915,62 +27935,6 @@ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_6(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_6(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_6(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_6(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_6(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_6(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_6(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_6(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_6(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_6(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_6(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_6(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_6(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_6(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_6(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_6(y, y, t2, p384_mod); -} - #ifdef __cplusplus extern "C" { #endif @@ -28014,7 +27978,6 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_6(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_6(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -28032,7 +27995,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -28062,7 +28025,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -28072,7 +28035,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_6(y, y, p384_mod); } @@ -28101,6 +28064,7 @@ static int sp_384_iszero_6(const sp_digit* a) return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -28108,6 +28072,81 @@ static int sp_384_iszero_6(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* t6 = t + 10*6; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(t2, t1) & + sp_384_cmp_equal_6(t4, t3)) { + sp_384_proj_point_dbl_6(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_6(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(x, x, t5, p384_mod); + sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_6(t3, y, p384_mod); + sp_384_mont_sub_6(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_6(y, y, x, p384_mod); + sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(y, y, t5, p384_mod); + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_6_ctx { @@ -28126,6 +28165,13 @@ typedef struct sp_384_proj_point_add_6_ctx { sp_digit* z; } sp_384_proj_point_add_6_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -28157,252 +28203,149 @@ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_6(ctx->t1, p384_mod, q->y); - sp_384_norm_6(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_6(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_6_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_6(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_6(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_6(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_6(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_6(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(ctx->t2, ctx->t1) & + sp_384_cmp_equal_6(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_6(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_6(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_6(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_6(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_6(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_6(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_6(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_6(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_6(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_6(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_6(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* t3 = t + 4*6; - sp_digit* t4 = t + 6*6; - sp_digit* t5 = t + 8*6; - sp_digit* t6 = t + 10*6; - - - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { - sp_384_proj_point_dbl_6(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_6(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_6(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_6(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_6(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { - sp_384_proj_point_dbl_6(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_6(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_6(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(x, x, t5, p384_mod); - sp_384_mont_mul_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_6(t3, y, p384_mod); - sp_384_mont_sub_6(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_6(y, y, x, p384_mod); - sp_384_mont_mul_6(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_6(y, y, t5, p384_mod); - - for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -28455,7 +28398,7 @@ static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, sp_384_mont_sqr_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_6(t2, b, p384_mod); sp_384_mont_sub_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -28470,7 +28413,6 @@ static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); - /* Y = Y/2 */ sp_384_div2_6(r[j].y, y, p384_mod); r[j].infinity = 0; @@ -28962,7 +28904,7 @@ static void sp_384_map_avx2_6(sp_point_384* r, const sp_point_384* p, /* x /= z^2 */ sp_384_mont_mul_avx2_6(r->x, p->x, t2, p384_mod, p384_mp_mod); - XMEMSET(r->x + 6, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_avx2_6(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_6(r->x, p384_mod); @@ -28971,16 +28913,15 @@ static void sp_384_map_avx2_6(sp_point_384* r, const sp_point_384* p, /* y /= z^3 */ sp_384_mont_mul_avx2_6(r->y, p->y, t1, p384_mod, p384_mp_mod); - XMEMSET(r->y + 6, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 6, 0, sizeof(sp_digit) * 6U); sp_384_mont_reduce_avx2_6(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_6(r->y, p384_mod); - sp_384_cond_sub_avx2_6(r->y, r->y, p384_mod, ~(n >> 63)); + sp_384_cond_sub_6(r->y, r->y, p384_mod, ~(n >> 63)); sp_384_norm_6(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #define sp_384_mont_add_avx2_6 sp_384_mont_add_6 @@ -29001,6 +28942,61 @@ extern void sp_384_div2_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* m * p Point to double. * t Temporary ordinate data. */ +static void sp_384_proj_point_dbl_avx2_6(sp_point_384* r, const sp_point_384* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_384_mont_sqr_avx2_6(t1, p->z, p384_mod, p384_mp_mod); + /* Z = Y * Z */ + sp_384_mont_mul_avx2_6(z, p->y, p->z, p384_mod, p384_mp_mod); + /* Z = 2Z */ + sp_384_mont_dbl_avx2_6(z, z, p384_mod); + /* T2 = X - T1 */ + sp_384_mont_sub_avx2_6(t2, p->x, t1, p384_mod); + /* T1 = X + T1 */ + sp_384_mont_add_avx2_6(t1, p->x, t1, p384_mod); + /* T2 = T1 * T2 */ + sp_384_mont_mul_avx2_6(t2, t1, t2, p384_mod, p384_mp_mod); + /* T1 = 3T2 */ + sp_384_mont_tpl_avx2_6(t1, t2, p384_mod); + /* Y = 2Y */ + sp_384_mont_dbl_avx2_6(y, p->y, p384_mod); + /* Y = Y * Y */ + sp_384_mont_sqr_avx2_6(y, y, p384_mod, p384_mp_mod); + /* T2 = Y * Y */ + sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod); + /* T2 = T2/2 */ + sp_384_div2_avx2_6(t2, t2, p384_mod); + /* Y = Y * X */ + sp_384_mont_mul_avx2_6(y, y, p->x, p384_mod, p384_mp_mod); + /* X = T1 * T1 */ + sp_384_mont_sqr_avx2_6(x, t1, p384_mod, p384_mp_mod); + /* X = X - Y */ + sp_384_mont_sub_avx2_6(x, x, y, p384_mod); + /* X = X - Y */ + sp_384_mont_sub_avx2_6(x, x, y, p384_mod); + /* Y = Y - X */ + sp_384_mont_sub_lower_avx2_6(y, y, x, p384_mod); + /* Y = Y * T1 */ + sp_384_mont_mul_avx2_6(y, y, t1, p384_mod, p384_mp_mod); + /* Y = Y - T2 */ + sp_384_mont_sub_avx2_6(y, y, t2, p384_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_dbl_avx2_6_ctx { int state; @@ -29011,6 +29007,12 @@ typedef struct sp_384_proj_point_dbl_avx2_6_ctx { sp_digit* z; } sp_384_proj_point_dbl_avx2_6_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_384_proj_point_dbl_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -29135,62 +29137,6 @@ static int sp_384_proj_point_dbl_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_384_proj_point_dbl_avx2_6(sp_point_384* r, const sp_point_384* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_384_mont_sqr_avx2_6(t1, p->z, p384_mod, p384_mp_mod); - /* Z = Y * Z */ - sp_384_mont_mul_avx2_6(z, p->y, p->z, p384_mod, p384_mp_mod); - /* Z = 2Z */ - sp_384_mont_dbl_avx2_6(z, z, p384_mod); - /* T2 = X - T1 */ - sp_384_mont_sub_avx2_6(t2, p->x, t1, p384_mod); - /* T1 = X + T1 */ - sp_384_mont_add_avx2_6(t1, p->x, t1, p384_mod); - /* T2 = T1 * T2 */ - sp_384_mont_mul_avx2_6(t2, t1, t2, p384_mod, p384_mp_mod); - /* T1 = 3T2 */ - sp_384_mont_tpl_avx2_6(t1, t2, p384_mod); - /* Y = 2Y */ - sp_384_mont_dbl_avx2_6(y, p->y, p384_mod); - /* Y = Y * Y */ - sp_384_mont_sqr_avx2_6(y, y, p384_mod, p384_mp_mod); - /* T2 = Y * Y */ - sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod); - /* T2 = T2/2 */ - sp_384_div2_avx2_6(t2, t2, p384_mod); - /* Y = Y * X */ - sp_384_mont_mul_avx2_6(y, y, p->x, p384_mod, p384_mp_mod); - /* X = T1 * T1 */ - sp_384_mont_sqr_avx2_6(x, t1, p384_mod, p384_mp_mod); - /* X = X - Y */ - sp_384_mont_sub_avx2_6(x, x, y, p384_mod); - /* X = X - Y */ - sp_384_mont_sub_avx2_6(x, x, y, p384_mod); - /* Y = Y - X */ - sp_384_mont_sub_lower_avx2_6(y, y, x, p384_mod); - /* Y = Y * T1 */ - sp_384_mont_mul_avx2_6(y, y, t1, p384_mod, p384_mp_mod); - /* Y = Y - T2 */ - sp_384_mont_sub_avx2_6(y, y, t2, p384_mod); -} - #define sp_384_mont_dbl_lower_avx2_6 sp_384_mont_dbl_lower_6 #define sp_384_mont_tpl_lower_avx2_6 sp_384_mont_tpl_lower_6 /* Double the Montgomery form projective point p a number of times. @@ -29222,7 +29168,6 @@ static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int i, /* W = Z^4 */ sp_384_mont_sqr_avx2_6(w, z, p384_mod, p384_mp_mod); sp_384_mont_sqr_avx2_6(w, w, p384_mod, p384_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -29240,7 +29185,7 @@ static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int i, sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_avx2_6(t2, b, p384_mod); sp_384_mont_sub_avx2_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_avx2_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_avx2_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -29270,7 +29215,7 @@ static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int i, sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_avx2_6(t2, b, p384_mod); sp_384_mont_sub_avx2_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_avx2_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_avx2_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -29280,11 +29225,12 @@ static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_avx2_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_avx2_6(y, y, t1, p384_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_384_div2_avx2_6(y, y, p384_mod); } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -29292,6 +29238,81 @@ static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int i, * q Second point to add. * t Temporary ordinate data. */ +static void sp_384_proj_point_add_avx2_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*6; + sp_digit* t3 = t + 4*6; + sp_digit* t4 = t + 6*6; + sp_digit* t5 = t + 8*6; + sp_digit* t6 = t + 10*6; + + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_avx2_6(t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t3, t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t1, t1, p->x, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_avx2_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_avx2_6(t3, t3, p->y, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(t2, t1) & + sp_384_cmp_equal_6(t4, t3)) { + sp_384_proj_point_dbl_avx2_6(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_384_mont_sub_avx2_6(t2, t2, t1, p384_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_avx2_6(t4, t4, t3, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_avx2_6(z, p->z, t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_avx2_6(x, x, t5, p384_mod); + sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod); + sp_384_mont_dbl_avx2_6(t3, y, p384_mod); + sp_384_mont_sub_avx2_6(x, x, t3, p384_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_avx2_6(y, y, x, p384_mod); + sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod); + sp_384_mont_sub_avx2_6(y, y, t5, p384_mod); + for (i = 0; i < 6; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 6; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_384_proj_point_add_avx2_6_ctx { @@ -29310,6 +29331,13 @@ typedef struct sp_384_proj_point_add_avx2_6_ctx { sp_digit* z; } sp_384_proj_point_add_avx2_6_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_384_proj_point_add_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, const sp_point_384* q, sp_digit* t) { @@ -29341,252 +29369,149 @@ static int sp_384_proj_point_add_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_384_sub_avx2_6(ctx->t1, p384_mod, q->y); - sp_384_norm_avx2_6(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_avx2_6(p->x, q->x) & sp_384_cmp_equal_avx2_6(p->z, q->z) & - (sp_384_cmp_equal_avx2_6(p->y, q->y) | sp_384_cmp_equal_avx2_6(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_384_mont_sqr_avx2_6(ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 2; break; case 2: - err = sp_384_proj_point_dbl_avx2_6_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_384_mont_mul_avx2_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + ctx->state = 3; break; case 3: - { + sp_384_mont_mul_avx2_6(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_avx2_6(ctx->t1, q->z, p384_mod, p384_mp_mod); + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_avx2_6(ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 5; break; case 5: - sp_384_mont_mul_avx2_6(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); ctx->state = 6; break; case 6: - sp_384_mont_mul_avx2_6(ctx->t1, ctx->t1, p->x, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_avx2_6(ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_384_mont_mul_avx2_6(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); ctx->state = 8; break; case 8: - sp_384_mont_mul_avx2_6(ctx->t4, ctx->t2, p->z, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_avx2_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); ctx->state = 9; break; case 9: - sp_384_mont_mul_avx2_6(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(ctx->t2, ctx->t1) & + sp_384_cmp_equal_6(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_384_proj_point_dbl_avx2_6(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_avx2_6(ctx->t3, ctx->t3, p->y, p384_mod, p384_mp_mod); + /* H = U2 - U1 */ + sp_384_mont_sub_avx2_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_avx2_6(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod); + /* R = S2 - S1 */ + sp_384_mont_sub_avx2_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_384_mont_sub_avx2_6(ctx->t2, ctx->t2, ctx->t1, p384_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_384_mont_sqr_avx2_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_384_mont_sub_avx2_6(ctx->t4, ctx->t4, ctx->t3, p384_mod); + sp_384_mont_mul_avx2_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_avx2_6(ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 15; break; case 15: - sp_384_mont_mul_avx2_6(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_384_mont_mul_avx2_6(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); ctx->state = 16; break; case 16: - sp_384_mont_mul_avx2_6(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_avx2_6(ctx->z, p->z, ctx->t2, p384_mod, p384_mp_mod); + sp_384_mont_sqr_avx2_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 18; break; case 18: - sp_384_mont_mul_avx2_6(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod); + sp_384_mont_sub_avx2_6(ctx->x, ctx->x, ctx->t5, p384_mod); ctx->state = 19; break; case 19: - sp_384_mont_sqr_avx2_6(ctx->x, ctx->t4, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); ctx->state = 20; break; case 20: - sp_384_mont_sub_avx2_6(ctx->x, ctx->x, ctx->t5, p384_mod); + sp_384_mont_dbl_avx2_6(ctx->t3, ctx->y, p384_mod); + sp_384_mont_sub_avx2_6(ctx->x, ctx->x, ctx->t3, p384_mod); ctx->state = 21; break; case 21: - sp_384_mont_mul_avx2_6(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_384_mont_sub_lower_avx2_6(ctx->y, ctx->y, ctx->x, p384_mod); ctx->state = 22; break; case 22: - sp_384_mont_dbl_avx2_6(ctx->t3, ctx->y, p384_mod); + sp_384_mont_mul_avx2_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); ctx->state = 23; break; case 23: - sp_384_mont_sub_avx2_6(ctx->x, ctx->x, ctx->t3, p384_mod); + sp_384_mont_sub_avx2_6(ctx->y, ctx->y, ctx->t5, p384_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_avx2_6(ctx->y, ctx->y, ctx->x, p384_mod); - ctx->state = 25; - break; - case 25: - sp_384_mont_mul_avx2_6(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod); - ctx->state = 26; - break; - case 26: - sp_384_mont_sub_avx2_6(ctx->y, ctx->y, ctx->t5, p384_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_384_proj_point_add_avx2_6(sp_point_384* r, - const sp_point_384* p, const sp_point_384* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*6; - sp_digit* t3 = t + 4*6; - sp_digit* t4 = t + 6*6; - sp_digit* t5 = t + 8*6; - sp_digit* t6 = t + 10*6; - - - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { - sp_384_proj_point_dbl_avx2_6(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_384_mont_sqr_avx2_6(t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t3, t1, q->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t1, t1, p->x, p384_mod, p384_mp_mod); - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_avx2_6(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_384_mont_mul_avx2_6(t3, t3, p->y, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod); - /* H = U2 - U1 */ - sp_384_mont_sub_avx2_6(t2, t2, t1, p384_mod); - /* R = S2 - S1 */ - sp_384_mont_sub_avx2_6(t4, t4, t3, p384_mod); - if (~p->infinity & ~q->infinity & - sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { - sp_384_proj_point_dbl_avx2_6(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_384_mont_sqr_avx2_6(t5, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(y, t1, t5, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t5, t5, t2, p384_mod, p384_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_384_mont_mul_avx2_6(z, p->z, t2, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(z, z, q->z, p384_mod, p384_mp_mod); - sp_384_mont_sqr_avx2_6(x, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_avx2_6(x, x, t5, p384_mod); - sp_384_mont_mul_avx2_6(t5, t5, t3, p384_mod, p384_mp_mod); - sp_384_mont_dbl_avx2_6(t3, y, p384_mod); - sp_384_mont_sub_avx2_6(x, x, t3, p384_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_384_mont_sub_lower_avx2_6(y, y, x, p384_mod); - sp_384_mont_mul_avx2_6(y, y, t4, p384_mod, p384_mp_mod); - sp_384_mont_sub_avx2_6(y, y, t5, p384_mod); - - for (i = 0; i < 6; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 6; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -29639,7 +29564,7 @@ static void sp_384_proj_point_dbl_n_store_avx2_6(sp_point_384* r, sp_384_mont_sqr_avx2_6(x, a, p384_mod, p384_mp_mod); sp_384_mont_dbl_avx2_6(t2, b, p384_mod); sp_384_mont_sub_avx2_6(x, x, t2, p384_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_384_mont_sub_lower_avx2_6(t2, b, x, p384_mod); sp_384_mont_dbl_lower_avx2_6(b, t2, p384_mod); /* Z = Z*Y */ @@ -29654,7 +29579,6 @@ static void sp_384_proj_point_dbl_n_store_avx2_6(sp_point_384* r, /* y = 2*A*(B - X) - Y^4 */ sp_384_mont_mul_avx2_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_avx2_6(y, y, t1, p384_mod); - /* Y = Y/2 */ sp_384_div2_avx2_6(r[j].y, y, p384_mod); r[j].infinity = 0; @@ -29903,8 +29827,8 @@ typedef struct sp_table_entry_384 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*6; @@ -29913,12 +29837,17 @@ static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, sp_digit* t5 = t + 8*6; sp_digit* t6 = t + 10*6; - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(p->x, t2) & + sp_384_cmp_equal_6(p->y, t4)) { sp_384_proj_point_dbl_6(r, p, t); } else { @@ -29930,12 +29859,6 @@ static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_6(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_6(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_6(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -30392,8 +30315,8 @@ static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_ * q Second point to add. * t Temporary ordinate data. */ -static void sp_384_proj_point_add_qz1_avx2_6(sp_point_384* r, const sp_point_384* p, - const sp_point_384* q, sp_digit* t) +static void sp_384_proj_point_add_qz1_avx2_6(sp_point_384* r, + const sp_point_384* p, const sp_point_384* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*6; @@ -30402,12 +30325,17 @@ static void sp_384_proj_point_add_qz1_avx2_6(sp_point_384* r, const sp_point_384 sp_digit* t5 = t + 8*6; sp_digit* t6 = t + 10*6; - /* Check double */ - (void)sp_384_sub_6(t1, p384_mod, q->y); - sp_384_norm_6(t1); - if ((~p->infinity & ~q->infinity & - sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & - (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_384_mont_sqr_avx2_6(t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t4, t2, p->z, p384_mod, p384_mp_mod); + sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_384_cmp_equal_6(p->x, t2) & + sp_384_cmp_equal_6(p->y, t4)) { sp_384_proj_point_dbl_avx2_6(r, p, t); } else { @@ -30419,12 +30347,6 @@ static void sp_384_proj_point_add_qz1_avx2_6(sp_point_384* r, const sp_point_384 sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_384_mont_sqr_avx2_6(t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t4, t2, p->z, p384_mod, p384_mp_mod); - sp_384_mont_mul_avx2_6(t2, t2, q->x, p384_mod, p384_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_384_mont_mul_avx2_6(t4, t4, q->y, p384_mod, p384_mp_mod); /* H = U2 - X1 */ sp_384_mont_sub_avx2_6(t2, t2, p->x, p384_mod); /* R = S2 - Y1 */ @@ -50664,7 +50586,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_384_norm_6(r); - if (sp_384_iszero_6(r) == 0) { + if (!sp_384_iszero_6(r)) { /* x is modified in calculation of s. */ sp_384_from_mp(x, 6, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -50673,7 +50595,7 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_384_calc_s_6(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_384_iszero_6(s) == 0)) { + if ((err == MP_OKAY) && (!sp_384_iszero_6(s))) { break; } } @@ -52645,7 +52567,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_9(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 9, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); @@ -52654,7 +52576,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_9(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 9, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); @@ -52663,7 +52585,6 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #ifdef __cplusplus @@ -52708,6 +52629,61 @@ extern void sp_521_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m); * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_9(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_9(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_9(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_9(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_9(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_9(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_9(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_9(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_9_ctx { int state; @@ -52718,6 +52694,12 @@ typedef struct sp_521_proj_point_dbl_9_ctx { sp_digit* z; } sp_521_proj_point_dbl_9_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -52842,62 +52824,6 @@ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, con return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_9(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_9(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_9(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_9(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_9(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_9(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_9(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_9(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_9(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_9(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_9(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_9(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_9(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_9(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_9(y, y, t2, p521_mod); -} - #define sp_521_mont_dbl_lower_9 sp_521_mont_dbl_9 #define sp_521_mont_tpl_lower_9 sp_521_mont_tpl_9 /* Double the Montgomery form projective point p a number of times. @@ -52929,7 +52855,6 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_9(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_9(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -52947,7 +52872,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -52977,7 +52902,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -52987,7 +52912,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_9(y, y, p521_mod); } @@ -53018,6 +52943,7 @@ static int sp_521_iszero_9(const sp_digit* a) a[8]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -53025,6 +52951,81 @@ static int sp_521_iszero_9(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* t3 = t + 4*9; + sp_digit* t4 = t + 6*9; + sp_digit* t5 = t + 8*9; + sp_digit* t6 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(t2, t1) & + sp_521_cmp_equal_9(t4, t3)) { + sp_521_proj_point_dbl_9(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_9(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(x, x, t5, p521_mod); + sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_9(t3, y, p521_mod); + sp_521_mont_sub_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(y, y, x, p521_mod); + sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(y, y, t5, p521_mod); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_9_ctx { @@ -53043,6 +53044,13 @@ typedef struct sp_521_proj_point_add_9_ctx { sp_digit* z; } sp_521_proj_point_add_9_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -53074,252 +53082,149 @@ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_9(ctx->t1, p521_mod, q->y); - sp_521_norm_9(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_9_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(ctx->t2, ctx->t1) & + sp_521_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_9(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_9(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_9(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* t3 = t + 4*9; - sp_digit* t4 = t + 6*9; - sp_digit* t5 = t + 8*9; - sp_digit* t6 = t + 10*9; - - - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { - sp_521_proj_point_dbl_9(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_9(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_9(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_9(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_9(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { - sp_521_proj_point_dbl_9(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(x, x, t5, p521_mod); - sp_521_mont_mul_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_9(t3, y, p521_mod); - sp_521_mont_sub_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_9(y, y, x, p521_mod); - sp_521_mont_mul_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_9(y, y, t5, p521_mod); - - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -53372,7 +53277,7 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, sp_521_mont_sqr_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_9(t2, b, p521_mod); sp_521_mont_sub_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -53387,7 +53292,6 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); - /* Y = Y/2 */ sp_521_div2_9(r[j].y, y, p521_mod); r[j].infinity = 0; @@ -53856,7 +53760,7 @@ static void sp_521_map_avx2_9(sp_point_521* r, const sp_point_521* p, /* x /= z^2 */ sp_521_mont_mul_avx2_9(r->x, p->x, t2, p521_mod, p521_mp_mod); - XMEMSET(r->x + 9, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_avx2_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); @@ -53865,16 +53769,15 @@ static void sp_521_map_avx2_9(sp_point_521* r, const sp_point_521* p, /* y /= z^3 */ sp_521_mont_mul_avx2_9(r->y, p->y, t1, p521_mod, p521_mp_mod); - XMEMSET(r->y + 9, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 9, 0, sizeof(sp_digit) * 9U); sp_521_mont_reduce_avx2_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); - sp_521_cond_sub_avx2_9(r->y, r->y, p521_mod, ~(n >> 63)); + sp_521_cond_sub_9(r->y, r->y, p521_mod, ~(n >> 63)); sp_521_norm_9(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #define sp_521_mont_add_avx2_9 sp_521_mont_add_9 @@ -53895,6 +53798,61 @@ extern void sp_521_div2_avx2_9(sp_digit* r, const sp_digit* a, const sp_digit* m * p Point to double. * t Temporary ordinate data. */ +static void sp_521_proj_point_dbl_avx2_9(sp_point_521* r, const sp_point_521* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_521_mont_sqr_avx2_9(t1, p->z, p521_mod, p521_mp_mod); + /* Z = Y * Z */ + sp_521_mont_mul_avx2_9(z, p->y, p->z, p521_mod, p521_mp_mod); + /* Z = 2Z */ + sp_521_mont_dbl_avx2_9(z, z, p521_mod); + /* T2 = X - T1 */ + sp_521_mont_sub_avx2_9(t2, p->x, t1, p521_mod); + /* T1 = X + T1 */ + sp_521_mont_add_avx2_9(t1, p->x, t1, p521_mod); + /* T2 = T1 * T2 */ + sp_521_mont_mul_avx2_9(t2, t1, t2, p521_mod, p521_mp_mod); + /* T1 = 3T2 */ + sp_521_mont_tpl_avx2_9(t1, t2, p521_mod); + /* Y = 2Y */ + sp_521_mont_dbl_avx2_9(y, p->y, p521_mod); + /* Y = Y * Y */ + sp_521_mont_sqr_avx2_9(y, y, p521_mod, p521_mp_mod); + /* T2 = Y * Y */ + sp_521_mont_sqr_avx2_9(t2, y, p521_mod, p521_mp_mod); + /* T2 = T2/2 */ + sp_521_div2_avx2_9(t2, t2, p521_mod); + /* Y = Y * X */ + sp_521_mont_mul_avx2_9(y, y, p->x, p521_mod, p521_mp_mod); + /* X = T1 * T1 */ + sp_521_mont_sqr_avx2_9(x, t1, p521_mod, p521_mp_mod); + /* X = X - Y */ + sp_521_mont_sub_avx2_9(x, x, y, p521_mod); + /* X = X - Y */ + sp_521_mont_sub_avx2_9(x, x, y, p521_mod); + /* Y = Y - X */ + sp_521_mont_sub_lower_avx2_9(y, y, x, p521_mod); + /* Y = Y * T1 */ + sp_521_mont_mul_avx2_9(y, y, t1, p521_mod, p521_mp_mod); + /* Y = Y - T2 */ + sp_521_mont_sub_avx2_9(y, y, t2, p521_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_dbl_avx2_9_ctx { int state; @@ -53905,6 +53863,12 @@ typedef struct sp_521_proj_point_dbl_avx2_9_ctx { sp_digit* z; } sp_521_proj_point_dbl_avx2_9_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_521_proj_point_dbl_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -54029,62 +53993,6 @@ static int sp_521_proj_point_dbl_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_521_proj_point_dbl_avx2_9(sp_point_521* r, const sp_point_521* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_521_mont_sqr_avx2_9(t1, p->z, p521_mod, p521_mp_mod); - /* Z = Y * Z */ - sp_521_mont_mul_avx2_9(z, p->y, p->z, p521_mod, p521_mp_mod); - /* Z = 2Z */ - sp_521_mont_dbl_avx2_9(z, z, p521_mod); - /* T2 = X - T1 */ - sp_521_mont_sub_avx2_9(t2, p->x, t1, p521_mod); - /* T1 = X + T1 */ - sp_521_mont_add_avx2_9(t1, p->x, t1, p521_mod); - /* T2 = T1 * T2 */ - sp_521_mont_mul_avx2_9(t2, t1, t2, p521_mod, p521_mp_mod); - /* T1 = 3T2 */ - sp_521_mont_tpl_avx2_9(t1, t2, p521_mod); - /* Y = 2Y */ - sp_521_mont_dbl_avx2_9(y, p->y, p521_mod); - /* Y = Y * Y */ - sp_521_mont_sqr_avx2_9(y, y, p521_mod, p521_mp_mod); - /* T2 = Y * Y */ - sp_521_mont_sqr_avx2_9(t2, y, p521_mod, p521_mp_mod); - /* T2 = T2/2 */ - sp_521_div2_avx2_9(t2, t2, p521_mod); - /* Y = Y * X */ - sp_521_mont_mul_avx2_9(y, y, p->x, p521_mod, p521_mp_mod); - /* X = T1 * T1 */ - sp_521_mont_sqr_avx2_9(x, t1, p521_mod, p521_mp_mod); - /* X = X - Y */ - sp_521_mont_sub_avx2_9(x, x, y, p521_mod); - /* X = X - Y */ - sp_521_mont_sub_avx2_9(x, x, y, p521_mod); - /* Y = Y - X */ - sp_521_mont_sub_lower_avx2_9(y, y, x, p521_mod); - /* Y = Y * T1 */ - sp_521_mont_mul_avx2_9(y, y, t1, p521_mod, p521_mp_mod); - /* Y = Y - T2 */ - sp_521_mont_sub_avx2_9(y, y, t2, p521_mod); -} - #define sp_521_mont_dbl_lower_avx2_9 sp_521_mont_dbl_avx2_9 #define sp_521_mont_tpl_lower_avx2_9 sp_521_mont_tpl_avx2_9 /* Double the Montgomery form projective point p a number of times. @@ -54116,7 +54024,6 @@ static void sp_521_proj_point_dbl_n_avx2_9(sp_point_521* p, int i, /* W = Z^4 */ sp_521_mont_sqr_avx2_9(w, z, p521_mod, p521_mp_mod); sp_521_mont_sqr_avx2_9(w, w, p521_mod, p521_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -54134,7 +54041,7 @@ static void sp_521_proj_point_dbl_n_avx2_9(sp_point_521* p, int i, sp_521_mont_sqr_avx2_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_avx2_9(t2, b, p521_mod); sp_521_mont_sub_avx2_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_avx2_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_avx2_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -54164,7 +54071,7 @@ static void sp_521_proj_point_dbl_n_avx2_9(sp_point_521* p, int i, sp_521_mont_sqr_avx2_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_avx2_9(t2, b, p521_mod); sp_521_mont_sub_avx2_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_avx2_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_avx2_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -54174,11 +54081,12 @@ static void sp_521_proj_point_dbl_n_avx2_9(sp_point_521* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_avx2_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_avx2_9(y, y, t1, p521_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_521_div2_avx2_9(y, y, p521_mod); } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -54186,6 +54094,81 @@ static void sp_521_proj_point_dbl_n_avx2_9(sp_point_521* p, int i, * q Second point to add. * t Temporary ordinate data. */ +static void sp_521_proj_point_add_avx2_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*9; + sp_digit* t3 = t + 4*9; + sp_digit* t4 = t + 6*9; + sp_digit* t5 = t + 8*9; + sp_digit* t6 = t + 10*9; + + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_avx2_9(t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t3, t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t1, t1, p->x, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_avx2_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_avx2_9(t3, t3, p->y, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_avx2_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(t2, t1) & + sp_521_cmp_equal_9(t4, t3)) { + sp_521_proj_point_dbl_avx2_9(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_521_mont_sub_avx2_9(t2, t2, t1, p521_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_avx2_9(t4, t4, t3, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_avx2_9(t5, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(y, t1, t5, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t5, t5, t2, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_avx2_9(z, p->z, t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(z, z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sqr_avx2_9(x, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_avx2_9(x, x, t5, p521_mod); + sp_521_mont_mul_avx2_9(t5, t5, t3, p521_mod, p521_mp_mod); + sp_521_mont_dbl_avx2_9(t3, y, p521_mod); + sp_521_mont_sub_avx2_9(x, x, t3, p521_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_avx2_9(y, y, x, p521_mod); + sp_521_mont_mul_avx2_9(y, y, t4, p521_mod, p521_mp_mod); + sp_521_mont_sub_avx2_9(y, y, t5, p521_mod); + for (i = 0; i < 9; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 9; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_521_proj_point_add_avx2_9_ctx { @@ -54204,6 +54187,13 @@ typedef struct sp_521_proj_point_add_avx2_9_ctx { sp_digit* z; } sp_521_proj_point_add_avx2_9_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_521_proj_point_add_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, const sp_point_521* q, sp_digit* t) { @@ -54235,252 +54225,149 @@ static int sp_521_proj_point_add_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_521_sub_avx2_9(ctx->t1, p521_mod, q->y); - sp_521_norm_avx2_9(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_avx2_9(p->x, q->x) & sp_521_cmp_equal_avx2_9(p->z, q->z) & - (sp_521_cmp_equal_avx2_9(p->y, q->y) | sp_521_cmp_equal_avx2_9(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_521_mont_sqr_avx2_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 2; break; case 2: - err = sp_521_proj_point_dbl_avx2_9_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_521_mont_mul_avx2_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + ctx->state = 3; break; case 3: - { + sp_521_mont_mul_avx2_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_avx2_9(ctx->t1, q->z, p521_mod, p521_mp_mod); + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_avx2_9(ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 5; break; case 5: - sp_521_mont_mul_avx2_9(ctx->t3, ctx->t1, q->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); ctx->state = 6; break; case 6: - sp_521_mont_mul_avx2_9(ctx->t1, ctx->t1, p->x, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_avx2_9(ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_521_mont_mul_avx2_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); ctx->state = 8; break; case 8: - sp_521_mont_mul_avx2_9(ctx->t4, ctx->t2, p->z, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_avx2_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); ctx->state = 9; break; case 9: - sp_521_mont_mul_avx2_9(ctx->t2, ctx->t2, q->x, p521_mod, p521_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(ctx->t2, ctx->t1) & + sp_521_cmp_equal_9(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_521_proj_point_dbl_avx2_9(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_avx2_9(ctx->t3, ctx->t3, p->y, p521_mod, p521_mp_mod); + /* H = U2 - U1 */ + sp_521_mont_sub_avx2_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_avx2_9(ctx->t4, ctx->t4, q->y, p521_mod, p521_mp_mod); + /* R = S2 - S1 */ + sp_521_mont_sub_avx2_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_521_mont_sub_avx2_9(ctx->t2, ctx->t2, ctx->t1, p521_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_521_mont_sqr_avx2_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_521_mont_sub_avx2_9(ctx->t4, ctx->t4, ctx->t3, p521_mod); + sp_521_mont_mul_avx2_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_avx2_9(ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 15; break; case 15: - sp_521_mont_mul_avx2_9(ctx->y, ctx->t1, ctx->t5, p521_mod, p521_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_521_mont_mul_avx2_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); ctx->state = 16; break; case 16: - sp_521_mont_mul_avx2_9(ctx->t5, ctx->t5, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_avx2_9(ctx->z, p->z, ctx->t2, p521_mod, p521_mp_mod); + sp_521_mont_sqr_avx2_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 18; break; case 18: - sp_521_mont_mul_avx2_9(ctx->z, ctx->z, q->z, p521_mod, p521_mp_mod); + sp_521_mont_sub_avx2_9(ctx->x, ctx->x, ctx->t5, p521_mod); ctx->state = 19; break; case 19: - sp_521_mont_sqr_avx2_9(ctx->x, ctx->t4, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); ctx->state = 20; break; case 20: - sp_521_mont_sub_avx2_9(ctx->x, ctx->x, ctx->t5, p521_mod); + sp_521_mont_dbl_avx2_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_sub_avx2_9(ctx->x, ctx->x, ctx->t3, p521_mod); ctx->state = 21; break; case 21: - sp_521_mont_mul_avx2_9(ctx->t5, ctx->t5, ctx->t3, p521_mod, p521_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_521_mont_sub_lower_avx2_9(ctx->y, ctx->y, ctx->x, p521_mod); ctx->state = 22; break; case 22: - sp_521_mont_dbl_avx2_9(ctx->t3, ctx->y, p521_mod); + sp_521_mont_mul_avx2_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); ctx->state = 23; break; case 23: - sp_521_mont_sub_avx2_9(ctx->x, ctx->x, ctx->t3, p521_mod); + sp_521_mont_sub_avx2_9(ctx->y, ctx->y, ctx->t5, p521_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_avx2_9(ctx->y, ctx->y, ctx->x, p521_mod); - ctx->state = 25; - break; - case 25: - sp_521_mont_mul_avx2_9(ctx->y, ctx->y, ctx->t4, p521_mod, p521_mp_mod); - ctx->state = 26; - break; - case 26: - sp_521_mont_sub_avx2_9(ctx->y, ctx->y, ctx->t5, p521_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_521_proj_point_add_avx2_9(sp_point_521* r, - const sp_point_521* p, const sp_point_521* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*9; - sp_digit* t3 = t + 4*9; - sp_digit* t4 = t + 6*9; - sp_digit* t5 = t + 8*9; - sp_digit* t6 = t + 10*9; - - - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { - sp_521_proj_point_dbl_avx2_9(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_521_mont_sqr_avx2_9(t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t3, t1, q->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t1, t1, p->x, p521_mod, p521_mp_mod); - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_avx2_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_521_mont_mul_avx2_9(t3, t3, p->y, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_avx2_9(t4, t4, q->y, p521_mod, p521_mp_mod); - /* H = U2 - U1 */ - sp_521_mont_sub_avx2_9(t2, t2, t1, p521_mod); - /* R = S2 - S1 */ - sp_521_mont_sub_avx2_9(t4, t4, t3, p521_mod); - if (~p->infinity & ~q->infinity & - sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { - sp_521_proj_point_dbl_avx2_9(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_521_mont_sqr_avx2_9(t5, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(y, t1, t5, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t5, t5, t2, p521_mod, p521_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_521_mont_mul_avx2_9(z, p->z, t2, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(z, z, q->z, p521_mod, p521_mp_mod); - sp_521_mont_sqr_avx2_9(x, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_avx2_9(x, x, t5, p521_mod); - sp_521_mont_mul_avx2_9(t5, t5, t3, p521_mod, p521_mp_mod); - sp_521_mont_dbl_avx2_9(t3, y, p521_mod); - sp_521_mont_sub_avx2_9(x, x, t3, p521_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_521_mont_sub_lower_avx2_9(y, y, x, p521_mod); - sp_521_mont_mul_avx2_9(y, y, t4, p521_mod, p521_mp_mod); - sp_521_mont_sub_avx2_9(y, y, t5, p521_mod); - - for (i = 0; i < 9; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 9; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -54533,7 +54420,7 @@ static void sp_521_proj_point_dbl_n_store_avx2_9(sp_point_521* r, sp_521_mont_sqr_avx2_9(x, a, p521_mod, p521_mp_mod); sp_521_mont_dbl_avx2_9(t2, b, p521_mod); sp_521_mont_sub_avx2_9(x, x, t2, p521_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_521_mont_sub_lower_avx2_9(t2, b, x, p521_mod); sp_521_mont_dbl_lower_avx2_9(b, t2, p521_mod); /* Z = Z*Y */ @@ -54548,7 +54435,6 @@ static void sp_521_proj_point_dbl_n_store_avx2_9(sp_point_521* r, /* y = 2*A*(B - X) - Y^4 */ sp_521_mont_mul_avx2_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_avx2_9(y, y, t1, p521_mod); - /* Y = Y/2 */ sp_521_div2_avx2_9(r[j].y, y, p521_mod); r[j].infinity = 0; @@ -54797,8 +54683,8 @@ typedef struct sp_table_entry_521 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*9; @@ -54807,12 +54693,17 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, sp_digit* t5 = t + 8*9; sp_digit* t6 = t + 10*9; - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(p->x, t2) & + sp_521_cmp_equal_9(p->y, t4)) { sp_521_proj_point_dbl_9(r, p, t); } else { @@ -54824,12 +54715,6 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_9(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_9(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -55286,8 +55171,8 @@ static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, const sp_ * q Second point to add. * t Temporary ordinate data. */ -static void sp_521_proj_point_add_qz1_avx2_9(sp_point_521* r, const sp_point_521* p, - const sp_point_521* q, sp_digit* t) +static void sp_521_proj_point_add_qz1_avx2_9(sp_point_521* r, + const sp_point_521* p, const sp_point_521* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*9; @@ -55296,12 +55181,17 @@ static void sp_521_proj_point_add_qz1_avx2_9(sp_point_521* r, const sp_point_521 sp_digit* t5 = t + 8*9; sp_digit* t6 = t + 10*9; - /* Check double */ - (void)sp_521_sub_9(t1, p521_mod, q->y); - sp_521_norm_9(t1); - if ((~p->infinity & ~q->infinity & - sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & - (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_521_mont_sqr_avx2_9(t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t4, t2, p->z, p521_mod, p521_mp_mod); + sp_521_mont_mul_avx2_9(t2, t2, q->x, p521_mod, p521_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_521_mont_mul_avx2_9(t4, t4, q->y, p521_mod, p521_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_521_cmp_equal_9(p->x, t2) & + sp_521_cmp_equal_9(p->y, t4)) { sp_521_proj_point_dbl_avx2_9(r, p, t); } else { @@ -55313,12 +55203,6 @@ static void sp_521_proj_point_add_qz1_avx2_9(sp_point_521* r, const sp_point_521 sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_521_mont_sqr_avx2_9(t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t4, t2, p->z, p521_mod, p521_mp_mod); - sp_521_mont_mul_avx2_9(t2, t2, q->x, p521_mod, p521_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_521_mont_mul_avx2_9(t4, t4, q->y, p521_mod, p521_mp_mod); /* H = U2 - X1 */ sp_521_mont_sub_avx2_9(t2, t2, p->x, p521_mod); /* R = S2 - Y1 */ @@ -91800,7 +91684,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, (sp_digit)0 - (sp_digit)(c >= 0)); sp_521_norm_9(r); - if (sp_521_iszero_9(r) == 0) { + if (!sp_521_iszero_9(r)) { /* x is modified in calculation of s. */ sp_521_from_mp(x, 9, priv); /* s ptr == e ptr, e is modified in calculation of s. */ @@ -91814,7 +91698,7 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, err = sp_521_calc_s_9(s, r, k, x, e, tmp); /* Check that signature is usable. */ - if ((err == MP_OKAY) && (sp_521_iszero_9(s) == 0)) { + if ((err == MP_OKAY) && (!sp_521_iszero_9(s))) { break; } } @@ -93912,7 +93796,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_16(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 16, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_16(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_16(r->x, p1024_mod); @@ -93921,7 +93805,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_16(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 16, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_16(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_16(r->y, p1024_mod); @@ -93930,7 +93814,6 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #ifdef __cplusplus @@ -93975,6 +93858,61 @@ extern void sp_1024_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m); * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*16; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_16(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_16(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_16(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_16(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_16(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_16(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_16(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_16(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_16(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_16(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_16(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_16(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_16(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_16(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_16_ctx { int state; @@ -93985,6 +93923,12 @@ typedef struct sp_1024_proj_point_dbl_16_ctx { sp_digit* z; } sp_1024_proj_point_dbl_16_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -94109,62 +94053,6 @@ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_16(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_16(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_16(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_16(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_16(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_16(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_16(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_16(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_16(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_16(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_16(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_16(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_16(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_16(y, y, t2, p1024_mod); -} - #define sp_1024_mont_dbl_lower_16 sp_1024_mont_dbl_16 #define sp_1024_mont_tpl_lower_16 sp_1024_mont_tpl_16 /* Double the Montgomery form projective point p a number of times. @@ -94196,7 +94084,6 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_16(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_16(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -94214,7 +94101,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -94244,7 +94131,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -94254,7 +94141,7 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_16(y, y, p1024_mod); } @@ -94295,6 +94182,7 @@ static int sp_1024_iszero_16(const sp_digit* a) a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]) == 0; } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -94302,6 +94190,81 @@ static int sp_1024_iszero_16(const sp_digit* a) * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*16; + sp_digit* t3 = t + 4*16; + sp_digit* t4 = t + 6*16; + sp_digit* t5 = t + 8*16; + sp_digit* t6 = t + 10*16; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_16(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(t2, t1) & + sp_1024_cmp_equal_16(t4, t3)) { + sp_1024_proj_point_dbl_16(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(x, x, t5, p1024_mod); + sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_16(t3, y, p1024_mod); + sp_1024_mont_sub_16(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); + sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(y, y, t5, p1024_mod); + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_16_ctx { @@ -94320,6 +94283,13 @@ typedef struct sp_1024_proj_point_add_16_ctx { sp_digit* z; } sp_1024_proj_point_add_16_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -94351,252 +94321,149 @@ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_16(ctx->t1, p1024_mod, q->y); - sp_1024_norm_16(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_16(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_16_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_16(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_16(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_16(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_16(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_16(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_16(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_16(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_16(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_16(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_16(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_16(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_16(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_16(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_16(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_16(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_16(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* t3 = t + 4*16; - sp_digit* t4 = t + 6*16; - sp_digit* t5 = t + 8*16; - sp_digit* t6 = t + 10*16; - - - /* Check double */ - (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_16(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_16(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_16(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { - sp_1024_proj_point_dbl_16(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_16(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_16(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(x, x, t5, p1024_mod); - sp_1024_mont_mul_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_16(t3, y, p1024_mod); - sp_1024_mont_sub_16(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_16(y, y, x, p1024_mod); - sp_1024_mont_mul_16(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_16(y, y, t5, p1024_mod); - - for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -94649,7 +94516,7 @@ static void sp_1024_proj_point_dbl_n_store_16(sp_point_1024* r, sp_1024_mont_sqr_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_16(t2, b, p1024_mod); sp_1024_mont_sub_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -94664,7 +94531,6 @@ static void sp_1024_proj_point_dbl_n_store_16(sp_point_1024* r, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); - /* Y = Y/2 */ sp_1024_div2_16(r[j].y, y, p1024_mod); r[j].infinity = 0; @@ -95079,7 +94945,7 @@ static void sp_1024_map_avx2_16(sp_point_1024* r, const sp_point_1024* p, /* x /= z^2 */ sp_1024_mont_mul_avx2_16(r->x, p->x, t2, p1024_mod, p1024_mp_mod); - XMEMSET(r->x + 16, 0, sizeof(r->x) / 2U); + XMEMSET(r->x + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_avx2_16(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_16(r->x, p1024_mod); @@ -95088,16 +94954,15 @@ static void sp_1024_map_avx2_16(sp_point_1024* r, const sp_point_1024* p, /* y /= z^3 */ sp_1024_mont_mul_avx2_16(r->y, p->y, t1, p1024_mod, p1024_mp_mod); - XMEMSET(r->y + 16, 0, sizeof(r->y) / 2U); + XMEMSET(r->y + 16, 0, sizeof(sp_digit) * 16U); sp_1024_mont_reduce_avx2_16(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_16(r->y, p1024_mod); - sp_1024_cond_sub_avx2_16(r->y, r->y, p1024_mod, ~(n >> 63)); + sp_1024_cond_sub_16(r->y, r->y, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); r->z[0] = 1; - } #ifdef __cplusplus @@ -95142,6 +95007,61 @@ extern void sp_1024_div2_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* * p Point to double. * t Temporary ordinate data. */ +static void sp_1024_proj_point_dbl_avx2_16(sp_point_1024* r, const sp_point_1024* p, + sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*16; + sp_digit* x; + sp_digit* y; + sp_digit* z; + + x = r->x; + y = r->y; + z = r->z; + /* Put infinity into result. */ + if (r != p) { + r->infinity = p->infinity; + } + + /* T1 = Z * Z */ + sp_1024_mont_sqr_avx2_16(t1, p->z, p1024_mod, p1024_mp_mod); + /* Z = Y * Z */ + sp_1024_mont_mul_avx2_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); + /* Z = 2Z */ + sp_1024_mont_dbl_avx2_16(z, z, p1024_mod); + /* T2 = X - T1 */ + sp_1024_mont_sub_avx2_16(t2, p->x, t1, p1024_mod); + /* T1 = X + T1 */ + sp_1024_mont_add_avx2_16(t1, p->x, t1, p1024_mod); + /* T2 = T1 * T2 */ + sp_1024_mont_mul_avx2_16(t2, t1, t2, p1024_mod, p1024_mp_mod); + /* T1 = 3T2 */ + sp_1024_mont_tpl_avx2_16(t1, t2, p1024_mod); + /* Y = 2Y */ + sp_1024_mont_dbl_avx2_16(y, p->y, p1024_mod); + /* Y = Y * Y */ + sp_1024_mont_sqr_avx2_16(y, y, p1024_mod, p1024_mp_mod); + /* T2 = Y * Y */ + sp_1024_mont_sqr_avx2_16(t2, y, p1024_mod, p1024_mp_mod); + /* T2 = T2/2 */ + sp_1024_div2_avx2_16(t2, t2, p1024_mod); + /* Y = Y * X */ + sp_1024_mont_mul_avx2_16(y, y, p->x, p1024_mod, p1024_mp_mod); + /* X = T1 * T1 */ + sp_1024_mont_sqr_avx2_16(x, t1, p1024_mod, p1024_mp_mod); + /* X = X - Y */ + sp_1024_mont_sub_avx2_16(x, x, y, p1024_mod); + /* X = X - Y */ + sp_1024_mont_sub_avx2_16(x, x, y, p1024_mod); + /* Y = Y - X */ + sp_1024_mont_sub_lower_avx2_16(y, y, x, p1024_mod); + /* Y = Y * T1 */ + sp_1024_mont_mul_avx2_16(y, y, t1, p1024_mod, p1024_mp_mod); + /* Y = Y - T2 */ + sp_1024_mont_sub_avx2_16(y, y, t2, p1024_mod); +} + #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_dbl_avx2_16_ctx { int state; @@ -95152,6 +95072,12 @@ typedef struct sp_1024_proj_point_dbl_avx2_16_ctx { sp_digit* z; } sp_1024_proj_point_dbl_avx2_16_ctx; +/* Double the Montgomery form projective point p. + * + * r Result of doubling point. + * p Point to double. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_dbl_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; @@ -95276,62 +95202,6 @@ static int sp_1024_proj_point_dbl_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024 return err; } #endif /* WOLFSSL_SP_NONBLOCK */ - -static void sp_1024_proj_point_dbl_avx2_16(sp_point_1024* r, const sp_point_1024* p, - sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* x; - sp_digit* y; - sp_digit* z; - - x = r->x; - y = r->y; - z = r->z; - /* Put infinity into result. */ - if (r != p) { - r->infinity = p->infinity; - } - - /* T1 = Z * Z */ - sp_1024_mont_sqr_avx2_16(t1, p->z, p1024_mod, p1024_mp_mod); - /* Z = Y * Z */ - sp_1024_mont_mul_avx2_16(z, p->y, p->z, p1024_mod, p1024_mp_mod); - /* Z = 2Z */ - sp_1024_mont_dbl_avx2_16(z, z, p1024_mod); - /* T2 = X - T1 */ - sp_1024_mont_sub_avx2_16(t2, p->x, t1, p1024_mod); - /* T1 = X + T1 */ - sp_1024_mont_add_avx2_16(t1, p->x, t1, p1024_mod); - /* T2 = T1 * T2 */ - sp_1024_mont_mul_avx2_16(t2, t1, t2, p1024_mod, p1024_mp_mod); - /* T1 = 3T2 */ - sp_1024_mont_tpl_avx2_16(t1, t2, p1024_mod); - /* Y = 2Y */ - sp_1024_mont_dbl_avx2_16(y, p->y, p1024_mod); - /* Y = Y * Y */ - sp_1024_mont_sqr_avx2_16(y, y, p1024_mod, p1024_mp_mod); - /* T2 = Y * Y */ - sp_1024_mont_sqr_avx2_16(t2, y, p1024_mod, p1024_mp_mod); - /* T2 = T2/2 */ - sp_1024_div2_avx2_16(t2, t2, p1024_mod); - /* Y = Y * X */ - sp_1024_mont_mul_avx2_16(y, y, p->x, p1024_mod, p1024_mp_mod); - /* X = T1 * T1 */ - sp_1024_mont_sqr_avx2_16(x, t1, p1024_mod, p1024_mp_mod); - /* X = X - Y */ - sp_1024_mont_sub_avx2_16(x, x, y, p1024_mod); - /* X = X - Y */ - sp_1024_mont_sub_avx2_16(x, x, y, p1024_mod); - /* Y = Y - X */ - sp_1024_mont_sub_lower_avx2_16(y, y, x, p1024_mod); - /* Y = Y * T1 */ - sp_1024_mont_mul_avx2_16(y, y, t1, p1024_mod, p1024_mp_mod); - /* Y = Y - T2 */ - sp_1024_mont_sub_avx2_16(y, y, t2, p1024_mod); -} - #define sp_1024_mont_dbl_lower_avx2_16 sp_1024_mont_dbl_avx2_16 #define sp_1024_mont_tpl_lower_avx2_16 sp_1024_mont_tpl_avx2_16 /* Double the Montgomery form projective point p a number of times. @@ -95363,7 +95233,6 @@ static void sp_1024_proj_point_dbl_n_avx2_16(sp_point_1024* p, int i, /* W = Z^4 */ sp_1024_mont_sqr_avx2_16(w, z, p1024_mod, p1024_mp_mod); sp_1024_mont_sqr_avx2_16(w, w, p1024_mod, p1024_mp_mod); - #ifndef WOLFSSL_SP_SMALL while (--n > 0) #else @@ -95381,7 +95250,7 @@ static void sp_1024_proj_point_dbl_n_avx2_16(sp_point_1024* p, int i, sp_1024_mont_sqr_avx2_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_avx2_16(t2, b, p1024_mod); sp_1024_mont_sub_avx2_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_avx2_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_avx2_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -95411,7 +95280,7 @@ static void sp_1024_proj_point_dbl_n_avx2_16(sp_point_1024* p, int i, sp_1024_mont_sqr_avx2_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_avx2_16(t2, b, p1024_mod); sp_1024_mont_sub_avx2_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_avx2_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_avx2_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -95421,11 +95290,12 @@ static void sp_1024_proj_point_dbl_n_avx2_16(sp_point_1024* p, int i, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_avx2_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_avx2_16(y, y, t1, p1024_mod); -#endif +#endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ sp_1024_div2_avx2_16(y, y, p1024_mod); } + /* Add two Montgomery form projective points. * * r Result of addition. @@ -95433,6 +95303,81 @@ static void sp_1024_proj_point_dbl_n_avx2_16(sp_point_1024* p, int i, * q Second point to add. * t Temporary ordinate data. */ +static void sp_1024_proj_point_add_avx2_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) +{ + sp_digit* t1 = t; + sp_digit* t2 = t + 2*16; + sp_digit* t3 = t + 4*16; + sp_digit* t4 = t + 6*16; + sp_digit* t5 = t + 8*16; + sp_digit* t6 = t + 10*16; + + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_avx2_16(t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t1, t1, p->x, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_avx2_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_avx2_16(t3, t3, p->y, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_avx2_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(t2, t1) & + sp_1024_cmp_equal_16(t4, t3)) { + sp_1024_proj_point_dbl_avx2_16(r, p, t); + } + else { + sp_digit maskp; + sp_digit maskq; + sp_digit maskt; + sp_digit* x = t6; + sp_digit* y = t1; + sp_digit* z = t2; + int i; + + maskp = 0 - (q->infinity & (!p->infinity)); + maskq = 0 - (p->infinity & (!q->infinity)); + maskt = ~(maskp | maskq); + + /* H = U2 - U1 */ + sp_1024_mont_sub_avx2_16(t2, t2, t1, p1024_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_avx2_16(t4, t4, t3, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_avx2_16(t5, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(y, t1, t5, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t5, t5, t2, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_avx2_16(z, p->z, t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(z, z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_avx2_16(x, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_avx2_16(x, x, t5, p1024_mod); + sp_1024_mont_mul_avx2_16(t5, t5, t3, p1024_mod, p1024_mp_mod); + sp_1024_mont_dbl_avx2_16(t3, y, p1024_mod); + sp_1024_mont_sub_avx2_16(x, x, t3, p1024_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_avx2_16(y, y, x, p1024_mod); + sp_1024_mont_mul_avx2_16(y, y, t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_avx2_16(y, y, t5, p1024_mod); + for (i = 0; i < 16; i++) { + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (x[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (y[i] & maskt); + } + for (i = 0; i < 16; i++) { + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (z[i] & maskt); + } + r->z[0] |= p->infinity & q->infinity; + r->infinity = p->infinity & q->infinity; + } +} #ifdef WOLFSSL_SP_NONBLOCK typedef struct sp_1024_proj_point_add_avx2_16_ctx { @@ -95451,6 +95396,13 @@ typedef struct sp_1024_proj_point_add_avx2_16_ctx { sp_digit* z; } sp_1024_proj_point_add_avx2_16_ctx; +/* Add two Montgomery form projective points. + * + * r Result of addition. + * p First point to add. + * q Second point to add. + * t Temporary ordinate data. + */ static int sp_1024_proj_point_add_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { @@ -95482,252 +95434,149 @@ static int sp_1024_proj_point_add_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024 ctx->state = 1; break; case 1: - /* Check double */ - (void)sp_1024_sub_avx2_16(ctx->t1, p1024_mod, q->y); - sp_1024_norm_avx2_16(ctx->t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_avx2_16(p->x, q->x) & sp_1024_cmp_equal_avx2_16(p->z, q->z) & - (sp_1024_cmp_equal_avx2_16(p->y, q->y) | sp_1024_cmp_equal_avx2_16(p->y, ctx->t1))) != 0) - { - XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); - ctx->state = 2; - } - else { - ctx->state = 3; - } + /* U1 = X1*Z2^2 */ + sp_1024_mont_sqr_avx2_16(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 2; break; case 2: - err = sp_1024_proj_point_dbl_avx2_16_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t); - if (err == MP_OKAY) - ctx->state = 27; /* done */ + sp_1024_mont_mul_avx2_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + ctx->state = 3; break; case 3: - { + sp_1024_mont_mul_avx2_16(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); ctx->state = 4; break; - } case 4: - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_avx2_16(ctx->t1, q->z, p1024_mod, p1024_mp_mod); + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_avx2_16(ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 5; break; case 5: - sp_1024_mont_mul_avx2_16(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); ctx->state = 6; break; case 6: - sp_1024_mont_mul_avx2_16(ctx->t1, ctx->t1, p->x, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); ctx->state = 7; break; case 7: - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_avx2_16(ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S1 = Y1*Z2^3 */ + sp_1024_mont_mul_avx2_16(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); ctx->state = 8; break; case 8: - sp_1024_mont_mul_avx2_16(ctx->t4, ctx->t2, p->z, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_avx2_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); ctx->state = 9; break; case 9: - sp_1024_mont_mul_avx2_16(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod); - ctx->state = 10; + /* Check double */ + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(ctx->t2, ctx->t1) & + sp_1024_cmp_equal_16(ctx->t4, ctx->t3)) { + XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); + sp_1024_proj_point_dbl_avx2_16(r, p, t); + ctx->state = 25; + } + else { + ctx->state = 10; + } break; case 10: - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_avx2_16(ctx->t3, ctx->t3, p->y, p1024_mod, p1024_mp_mod); + /* H = U2 - U1 */ + sp_1024_mont_sub_avx2_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); ctx->state = 11; break; case 11: - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_avx2_16(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod); + /* R = S2 - S1 */ + sp_1024_mont_sub_avx2_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); ctx->state = 12; break; case 12: - /* H = U2 - U1 */ - sp_1024_mont_sub_avx2_16(ctx->t2, ctx->t2, ctx->t1, p1024_mod); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_1024_mont_sqr_avx2_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 13; break; case 13: - /* R = S2 - S1 */ - sp_1024_mont_sub_avx2_16(ctx->t4, ctx->t4, ctx->t3, p1024_mod); + sp_1024_mont_mul_avx2_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); ctx->state = 14; break; case 14: - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_avx2_16(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 15; break; case 15: - sp_1024_mont_mul_avx2_16(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod); + /* Z3 = H*Z1*Z2 */ + sp_1024_mont_mul_avx2_16(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); ctx->state = 16; break; case 16: - sp_1024_mont_mul_avx2_16(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); ctx->state = 17; break; case 17: - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_avx2_16(ctx->z, p->z, ctx->t2, p1024_mod, p1024_mp_mod); + sp_1024_mont_sqr_avx2_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 18; break; case 18: - sp_1024_mont_mul_avx2_16(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_sub_avx2_16(ctx->x, ctx->x, ctx->t5, p1024_mod); ctx->state = 19; break; case 19: - sp_1024_mont_sqr_avx2_16(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); ctx->state = 20; break; case 20: - sp_1024_mont_sub_avx2_16(ctx->x, ctx->x, ctx->t5, p1024_mod); + sp_1024_mont_dbl_avx2_16(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_sub_avx2_16(ctx->x, ctx->x, ctx->t3, p1024_mod); ctx->state = 21; break; case 21: - sp_1024_mont_mul_avx2_16(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_1024_mont_sub_lower_avx2_16(ctx->y, ctx->y, ctx->x, p1024_mod); ctx->state = 22; break; case 22: - sp_1024_mont_dbl_avx2_16(ctx->t3, ctx->y, p1024_mod); + sp_1024_mont_mul_avx2_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); ctx->state = 23; break; case 23: - sp_1024_mont_sub_avx2_16(ctx->x, ctx->x, ctx->t3, p1024_mod); + sp_1024_mont_sub_avx2_16(ctx->y, ctx->y, ctx->t5, p1024_mod); ctx->state = 24; break; case 24: - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_avx2_16(ctx->y, ctx->y, ctx->x, p1024_mod); - ctx->state = 25; - break; - case 25: - sp_1024_mont_mul_avx2_16(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod); - ctx->state = 26; - break; - case 26: - sp_1024_mont_sub_avx2_16(ctx->y, ctx->y, ctx->t5, p1024_mod); - ctx->state = 27; - /* fall-through */ - case 27: { int i; sp_digit maskp = 0 - (q->infinity & (!p->infinity)); sp_digit maskq = 0 - (p->infinity & (!q->infinity)); sp_digit maskt = ~(maskp | maskq); + for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (ctx->x[i] & maskt); + r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | (ctx->x[i] & maskt); } for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (ctx->y[i] & maskt); + r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | (ctx->y[i] & maskt); } for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (ctx->z[i] & maskt); + r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | (ctx->z[i] & maskt); } r->z[0] |= p->infinity & q->infinity; r->infinity = p->infinity & q->infinity; - - err = MP_OKAY; + ctx->state = 25; break; } + case 25: + err = MP_OKAY; + break; } - if (err == MP_OKAY && ctx->state != 27) { + if (err == MP_OKAY && ctx->state != 25) { err = FP_WOULDBLOCK; } return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -static void sp_1024_proj_point_add_avx2_16(sp_point_1024* r, - const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) -{ - sp_digit* t1 = t; - sp_digit* t2 = t + 2*16; - sp_digit* t3 = t + 4*16; - sp_digit* t4 = t + 6*16; - sp_digit* t5 = t + 8*16; - sp_digit* t6 = t + 10*16; - - - /* Check double */ - (void)sp_1024_mont_sub_avx2_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { - sp_1024_proj_point_dbl_avx2_16(r, p, t); - } - else { - sp_digit maskp; - sp_digit maskq; - sp_digit maskt; - sp_digit* x = t6; - sp_digit* y = t1; - sp_digit* z = t2; - int i; - - maskp = 0 - (q->infinity & (!p->infinity)); - maskq = 0 - (p->infinity & (!q->infinity)); - maskt = ~(maskp | maskq); - - /* U1 = X1*Z2^2 */ - sp_1024_mont_sqr_avx2_16(t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t3, t1, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t1, t1, p->x, p1024_mod, p1024_mp_mod); - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_avx2_16(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S1 = Y1*Z2^3 */ - sp_1024_mont_mul_avx2_16(t3, t3, p->y, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_avx2_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); - /* H = U2 - U1 */ - sp_1024_mont_sub_avx2_16(t2, t2, t1, p1024_mod); - /* R = S2 - S1 */ - sp_1024_mont_sub_avx2_16(t4, t4, t3, p1024_mod); - if (~p->infinity & ~q->infinity & - sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { - sp_1024_proj_point_dbl_avx2_16(r, p, t); - } - else { - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_1024_mont_sqr_avx2_16(t5, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(y, t1, t5, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t5, t5, t2, p1024_mod, p1024_mp_mod); - /* Z3 = H*Z1*Z2 */ - sp_1024_mont_mul_avx2_16(z, p->z, t2, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(z, z, q->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_sqr_avx2_16(x, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_avx2_16(x, x, t5, p1024_mod); - sp_1024_mont_mul_avx2_16(t5, t5, t3, p1024_mod, p1024_mp_mod); - sp_1024_mont_dbl_avx2_16(t3, y, p1024_mod); - sp_1024_mont_sub_avx2_16(x, x, t3, p1024_mod); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_1024_mont_sub_lower_avx2_16(y, y, x, p1024_mod); - sp_1024_mont_mul_avx2_16(y, y, t4, p1024_mod, p1024_mp_mod); - sp_1024_mont_sub_avx2_16(y, y, t5, p1024_mod); - - for (i = 0; i < 16; i++) { - r->x[i] = (p->x[i] & maskp) | (q->x[i] & maskq) | - (x[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->y[i] = (p->y[i] & maskp) | (q->y[i] & maskq) | - (y[i] & maskt); - } - for (i = 0; i < 16; i++) { - r->z[i] = (p->z[i] & maskp) | (q->z[i] & maskq) | - (z[i] & maskt); - } - r->z[0] |= p->infinity & q->infinity; - r->infinity = p->infinity & q->infinity; - } - } -} - /* Double the Montgomery form projective point p a number of times. * * r Result of repeated doubling of point. @@ -95780,7 +95629,7 @@ static void sp_1024_proj_point_dbl_n_store_avx2_16(sp_point_1024* r, sp_1024_mont_sqr_avx2_16(x, a, p1024_mod, p1024_mp_mod); sp_1024_mont_dbl_avx2_16(t2, b, p1024_mod); sp_1024_mont_sub_avx2_16(x, x, t2, p1024_mod); - /* b = 2.(B - X) */ + /* B = 2.(B - X) */ sp_1024_mont_sub_lower_avx2_16(t2, b, x, p1024_mod); sp_1024_mont_dbl_lower_avx2_16(b, t2, p1024_mod); /* Z = Z*Y */ @@ -95795,7 +95644,6 @@ static void sp_1024_proj_point_dbl_n_store_avx2_16(sp_point_1024* r, /* y = 2*A*(B - X) - Y^4 */ sp_1024_mont_mul_avx2_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_avx2_16(y, y, t1, p1024_mod); - /* Y = Y/2 */ sp_1024_div2_avx2_16(r[j].y, y, p1024_mod); r[j].infinity = 0; @@ -96048,8 +95896,8 @@ typedef struct sp_table_entry_1024 { * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*16; @@ -96058,12 +95906,17 @@ static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* sp_digit* t5 = t + 8*16; sp_digit* t6 = t + 10*16; - /* Check double */ - (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(p->x, t2) & + sp_1024_cmp_equal_16(p->y, t4)) { sp_1024_proj_point_dbl_16(r, p, t); } else { @@ -96075,12 +95928,6 @@ static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_16(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_16(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */ @@ -96506,8 +96353,8 @@ static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, const * q Second point to add. * t Temporary ordinate data. */ -static void sp_1024_proj_point_add_qz1_avx2_16(sp_point_1024* r, const sp_point_1024* p, - const sp_point_1024* q, sp_digit* t) +static void sp_1024_proj_point_add_qz1_avx2_16(sp_point_1024* r, + const sp_point_1024* p, const sp_point_1024* q, sp_digit* t) { sp_digit* t1 = t; sp_digit* t2 = t + 2*16; @@ -96516,12 +96363,17 @@ static void sp_1024_proj_point_add_qz1_avx2_16(sp_point_1024* r, const sp_point_ sp_digit* t5 = t + 8*16; sp_digit* t6 = t + 10*16; - /* Check double */ - (void)sp_1024_mont_sub_avx2_16(t1, p1024_mod, q->y, p1024_mod); - sp_1024_norm_16(t1); - if ((~p->infinity & ~q->infinity & - sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & - (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { + /* Calculate values to subtract from P->x and P->y. */ + /* U2 = X2*Z1^2 */ + sp_1024_mont_sqr_avx2_16(t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); + sp_1024_mont_mul_avx2_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); + /* S2 = Y2*Z1^3 */ + sp_1024_mont_mul_avx2_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); + + if ((~p->infinity) & (~q->infinity) & + sp_1024_cmp_equal_16(p->x, t2) & + sp_1024_cmp_equal_16(p->y, t4)) { sp_1024_proj_point_dbl_avx2_16(r, p, t); } else { @@ -96533,12 +96385,6 @@ static void sp_1024_proj_point_add_qz1_avx2_16(sp_point_1024* r, const sp_point_ sp_digit* z = t6; int i; - /* U2 = X2*Z1^2 */ - sp_1024_mont_sqr_avx2_16(t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t4, t2, p->z, p1024_mod, p1024_mp_mod); - sp_1024_mont_mul_avx2_16(t2, t2, q->x, p1024_mod, p1024_mp_mod); - /* S2 = Y2*Z1^3 */ - sp_1024_mont_mul_avx2_16(t4, t4, q->y, p1024_mod, p1024_mp_mod); /* H = U2 - X1 */ sp_1024_mont_sub_avx2_16(t2, t2, p->x, p1024_mod); /* R = S2 - Y1 */