diff --git a/src/ec_mult.h b/src/ec_mult.h index 3c234895..24c151ba 100644 --- a/src/ec_mult.h +++ b/src/ec_mult.h @@ -66,20 +66,22 @@ static limb_t booth_encode(limb_t wval, size_t sz) * infinity "naturally," since resulting Z is product of original Z.] */ #define POINT_MULT_SCALAR_WX_IMPL(ptype, SZ) \ -static void ptype##_gather_booth_w##SZ(ptype *restrict p, \ - const ptype table[1<<(SZ-1)], \ - limb_t booth_idx) \ +static bool_t ptype##_gather_booth_w##SZ(ptype *restrict p, \ + const ptype table[1<<(SZ-1)], \ + limb_t booth_idx) \ { \ size_t i; \ bool_t booth_sign = (booth_idx >> SZ) & 1; \ \ booth_idx &= (1< 0) { \ @@ -136,8 +139,14 @@ static void ptype##s_mult_w##SZ(ptype *ret, \ scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \ wval = get_wval(scalar, bits - 1, window + 1) & wmask; \ wval = booth_encode(wval, SZ); \ - ptype##_gather_booth_w##SZ(temp, table[i], wval); \ - ptype##_dadd(ret, ret, temp, NULL); \ + row_is_inf = ptype##_gather_booth_w##SZ(row, table[i], wval); \ + ptype##_dadd(sum, ret, row, NULL); \ + ptype##_ccopy(ret, sum, (ret_is_inf | row_is_inf) ^ 1); \ + sum_is_inf = vec_is_zero(ret->Z, sizeof(ret->Z)); \ + ret_is_inf |= sum_is_inf; \ + row_is_inf |= sum_is_inf; \ + ptype##_ccopy(ret, row, ret_is_inf); \ + ret_is_inf &= row_is_inf; \ } \ \ for (j = 0; j < SZ; j++) \ @@ -153,9 +162,17 @@ static void ptype##s_mult_w##SZ(ptype *ret, \ scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \ wval = (scalar[0] << 1) & wmask; \ wval = booth_encode(wval, SZ); \ - ptype##_gather_booth_w##SZ(temp, table[i], wval); \ - ptype##_dadd(ret, ret, temp, NULL); \ + row_is_inf = ptype##_gather_booth_w##SZ(row, table[i], wval); \ + ptype##_dadd(sum, ret, row, NULL); \ + ptype##_ccopy(ret, sum, (ret_is_inf | row_is_inf) ^ 1); \ + sum_is_inf = vec_is_zero(ret->Z, sizeof(ret->Z)); \ + ret_is_inf |= sum_is_inf; \ + row_is_inf |= sum_is_inf; \ + ptype##_ccopy(ret, row, ret_is_inf); \ + ret_is_inf &= row_is_inf; \ } \ +\ + vec_czero(ret->Z, sizeof(ret->Z), ret_is_inf); \ } \ \ static void ptype##_mult_w##SZ(ptype *ret, const ptype *point, \ @@ -163,7 +180,8 @@ static void ptype##_mult_w##SZ(ptype *ret, const ptype *point, \ { \ limb_t wmask, wval; \ size_t j, window; \ - ptype temp[1]; \ + ptype sum[1], row[1]; \ + bool_t sum_is_inf, row_is_inf, ret_is_inf; \ ptype table[1<<(SZ-1)]; \ \ ptype##_precompute_w##SZ(table, point); \ @@ -177,7 +195,7 @@ static void ptype##_mult_w##SZ(ptype *ret, const ptype *point, \ : (limb_t)scalar[0] << 1; \ wval &= wmask; \ wval = booth_encode(wval, SZ); \ - ptype##_gather_booth_w##SZ(ret, table, wval); \ + ret_is_inf = ptype##_gather_booth_w##SZ(ret, table, wval); \ \ while (bits > 0) { \ for (j = 0; j < SZ; j++) \ @@ -191,10 +209,18 @@ static void ptype##_mult_w##SZ(ptype *ret, const ptype *point, \ : (limb_t)scalar[0] << 1; \ wval &= wmask; \ wval = booth_encode(wval, SZ); \ - ptype##_gather_booth_w##SZ(temp, table, wval); \ - if (bits > 0) ptype##_add(ret, ret, temp); \ - else ptype##_dadd(ret, ret, temp, NULL); \ + row_is_inf = ptype##_gather_booth_w##SZ(row, table, wval); \ + if (bits > 0) ptype##_add(sum, ret, row); \ + else ptype##_dadd(sum, ret, row, NULL); \ + ptype##_ccopy(ret, sum, (ret_is_inf | row_is_inf) ^ 1); \ + sum_is_inf = vec_is_zero(ret->Z, sizeof(ret->Z)); \ + ret_is_inf |= sum_is_inf; \ + row_is_inf |= sum_is_inf; \ + ptype##_ccopy(ret, row, ret_is_inf); \ + ret_is_inf &= row_is_inf; \ } \ +\ + vec_czero(ret->Z, sizeof(ret->Z), ret_is_inf); \ } #if 0 diff --git a/src/vect.h b/src/vect.h index 554dd5da..19640b11 100644 --- a/src/vect.h +++ b/src/vect.h @@ -381,6 +381,21 @@ static inline void vec_zero(void *ret, size_t num) #endif } +static inline void vec_czero(void *ret, size_t num, bool_t cbit) +{ + limb_t *rp = (limb_t *)ret; + size_t i; + limb_t mask; + + launder(cbit); + mask = (limb_t)0 - (cbit^1); + + num /= sizeof(limb_t); + + for (i = 0; i < num; i++) + rp[i] &= mask; +} + /* * Some compilers get arguably overzealous(*) when passing pointer to * multi-dimensional array [such as vec384x] as 'const' argument.