Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Signed-digit multi-comb for ecmult_gen #546

Closed
wants to merge 8 commits into from
86 changes: 86 additions & 0 deletions src/ecmult_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,92 @@
#include "scalar.h"
#include "group.h"

#define USE_COMB 1

#if USE_COMB

#if defined(EXHAUSTIVE_TEST_ORDER)

/* We need to control these values for exhaustive tests because
* the tables cannot have infinities in them (secp256k1_ge_storage
* doesn't support infinities) */
# if EXHAUSTIVE_TEST_ORDER > 32
# define COMB_BLOCKS 52
# define COMB_TEETH 5
# elif EXHAUSTIVE_TEST_ORDER > 16
# define COMB_BLOCKS 64
# define COMB_TEETH 4
# elif EXHAUSTIVE_TEST_ORDER > 8
# define COMB_BLOCKS 86
# define COMB_TEETH 3
# elif EXHAUSTIVE_TEST_ORDER > 4
# define COMB_BLOCKS 128
# define COMB_TEETH 2
# else
# define COMB_BLOCKS 256
# define COMB_TEETH 1
# endif

# define COMB_SPACING 1
# define COMB_NEGATION 1

#else

/* COMB_BLOCKS, COMB_TEETH, COMB_SPACING must all be positive and the product of the three (COMB_BITS)
* must evaluate to a value in the range [256, 288]. The COMB_NEGATION boolean controls whether the
* comb will use negations so that only negative multiples need be precomputed. The resulting memory
* usage for precomputation will be COMB_POINTS_TOTAL * sizeof(secp256k1_ge_storage).
*/
#define COMB_BLOCKS 4
#define COMB_TEETH 5
#define COMB_SPACING 13
#define COMB_NEGATION 1

#endif

#if !(1 <= COMB_BLOCKS && COMB_BLOCKS <= 256)
# error "COMB_BLOCKS must be in the range [1, 256]"
#endif
#if !(1 <= COMB_TEETH && COMB_TEETH <= 8)
# error "COMB_TEETH must be in the range [1, 8]"
#endif
#if !(1 <= COMB_SPACING && COMB_SPACING <= 256)
# error "COMB_SPACING must be in the range [1, 256]"
#endif
#if !(0 <= COMB_NEGATION && COMB_NEGATION <= 1)
# error "COMB_NEGATION must be in the range [0, 1]"
#endif

/* The remaining COMB_* parameters are derived values, don't modify these. */
#define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
#define COMB_GROUPED ((COMB_SPACING == 1) && ((32 % COMB_TEETH) == 0))
Copy link
Contributor

@apoelstra apoelstra Sep 23, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should drop COMB_GROUPED because it's impossible for 32 % COMB_TEETH to be 0. (If COMB_TEETH is zero, things obviously won't work...but if it's ≥ 32, then several other things break: COMB_MASK overflows; the bits = expression in ecmult_gen_impl.h:228 left-shifts too far; I also get this bizarre error about the size of prec being negative in ecmult_gen.h:71.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, once COMB_TEETH gets much past 10 we start running into trouble in secp256k1_ecmult_gen_context_build because we put COMB_POINTS_TOTAL-many gejs on the stack.

Given that this is a ridiculous thing to do I don't think we should try to support it, e.g. by using heap allocations rather than stack allocations in gen_context_build. We should just assume it doesn't happen and drop COMB_GROUPED.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume you are misreading 32%T, which is 0 when T is a small power of 2. Still, practical values of COMB_TEETH probably peak at 8, and there is the stack concern as you say, so we should probably have the precompiler constrain it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, I misread 32 % COMB_TEETH (which is 0 for 1,2,4,8) as COMB_TEETH % 32 (which can't be 0). Let me reassess COMB_GROUPED in that light.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

4c8ff9b adds precompiler guards on the comb constants.

#define COMB_OFFSET (COMB_BITS == 256)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some documentation somewhere about what the offset does?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done: e8beef9 .

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, thanks!

#define COMB_POINTS (1 << (COMB_TEETH - COMB_NEGATION))
#define COMB_POINTS_TOTAL (COMB_BLOCKS * COMB_POINTS)
#define COMB_MASK (COMB_POINTS - 1)

#if !(256 <= COMB_BITS && COMB_BITS <= 288)
# error "COMB_BITS must be in the range [256, 288]"
#endif

#endif

typedef struct {
#if USE_COMB
/* Precomputation data for the signed-digit multi-comb algorithm as described in section 3.3 of:
* "Fast and compact elliptic-curve cryptography", Mike Hamburg
* (https://eprint.iacr.org/2012/309)
*/
secp256k1_ge_storage (*prec)[COMB_BLOCKS][COMB_POINTS];
#if COMB_OFFSET
/* Signed recoding of a 256-bit scalar must be at least 257 bits, with the top bit always 1. We
* support a 256-bit comb over a 257-bit recoding by pre-adding an 'offset' value to the context's
* 'initial' value, to account for the high 1 bit. Note that the 'offset' is calculated to allow
* for the (COMB_SPACING - 1) doublings in the _ecmult_gen ladder.
*/
secp256k1_ge offset;
#endif
#else
/* For accelerating the computation of a*G:
* To harden against timing attacks, use the following mechanism:
* * Break up the multiplicand into groups of 4 bits, called n_0, n_1, n_2, ..., n_63.
Expand All @@ -24,6 +109,7 @@ typedef struct {
* the intermediate sums while computing a*G.
*/
secp256k1_ge_storage (*prec)[64][16]; /* prec[j][i] = 16^j * i * G + U_i */
#endif
secp256k1_scalar blind;
secp256k1_gej initial;
} secp256k1_ecmult_gen_context;
Expand Down
177 changes: 176 additions & 1 deletion src/ecmult_gen_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,72 @@ static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context *ctx)

static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx, const secp256k1_callback* cb) {
#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
#if USE_COMB
secp256k1_ge prec[COMB_POINTS_TOTAL + COMB_OFFSET];
secp256k1_gej u, sum;
int block, index, spacing, stride, tooth;
#else
secp256k1_ge prec[1024];
secp256k1_gej gj;
secp256k1_gej nums_gej;
int i, j;
#endif
#endif

if (ctx->prec != NULL) {
return;
}
#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
#if USE_COMB
ctx->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])checked_malloc(cb, sizeof(*ctx->prec));

/* get the generator */
secp256k1_gej_set_ge(&u, &secp256k1_ge_const_g);

/* compute prec. */
{
secp256k1_gej ds[COMB_TEETH];
secp256k1_gej vs[COMB_POINTS_TOTAL + COMB_OFFSET];
int vs_pos = 0;

for (block = 0; block < COMB_BLOCKS; ++block) {
secp256k1_gej_set_infinity(&sum);
for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
secp256k1_gej_add_var(&sum, &sum, &u, NULL);
secp256k1_gej_double(&u, &u);
ds[tooth] = u;
if (block + tooth != COMB_BLOCKS + COMB_TEETH - 2) {
for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
secp256k1_gej_double(&u, &u);
}
}
}
secp256k1_gej_neg(&vs[vs_pos++], &sum);
for (tooth = 0; tooth < (COMB_TEETH - COMB_NEGATION); ++tooth) {
stride = 1 << tooth;
for (index = 0; index < stride; ++index, ++vs_pos) {
secp256k1_gej_add_var(&vs[vs_pos], &vs[vs_pos - stride], &ds[tooth], NULL);
}
}
}
VERIFY_CHECK(vs_pos == COMB_POINTS_TOTAL);
#if COMB_OFFSET
vs[COMB_POINTS_TOTAL] = ds[COMB_TEETH - 1];
#endif
secp256k1_ge_set_all_gej_var(prec, vs, COMB_POINTS_TOTAL + COMB_OFFSET, cb);
}

for (block = 0; block < COMB_BLOCKS; ++block) {
for (index = 0; index < COMB_POINTS; ++index) {
secp256k1_ge_to_storage(&(*ctx->prec)[block][index], &prec[block * COMB_POINTS + index]);
}
}

#if COMB_OFFSET
ctx->offset = prec[COMB_POINTS_TOTAL];
#endif

#else
ctx->prec = (secp256k1_ge_storage (*)[64][16])checked_malloc(cb, sizeof(*ctx->prec));

/* get the generator */
Expand Down Expand Up @@ -84,9 +140,17 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*16 + i]);
}
}
#endif
#else
(void)cb;
ctx->prec = (secp256k1_ge_storage (*)[64][16])secp256k1_ecmult_static_context;
#if USE_COMB
ctx->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])secp256k1_ecmult_gen_ctx_prec;
#if COMB_OFFSET
secp256k1_ge_from_storage(&ctx->offset, &secp256k1_ecmult_gen_ctx_offset);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

secp256k1_ecmult_gen_ctx_offset is not declared except in gen_context.c, so this line doesn't compile for me when I set the parameters to 4/4/16.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a build system issue; there's no dependency of gen_context on ecmult_gen.h (and presumably others). At the moment, after changing comb parameters in ecmult_gen.h, you'd need to touch gen_context.c (or just make clean).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has burned me multiple times while testing -- @sipa can you advise how to fix this?

#endif
#else
ctx->prec = (secp256k1_ge_storage (*)[64][16])secp256k1_ecmult_gen_ctx_prec;
#endif
#endif
secp256k1_ecmult_gen_blind(ctx, NULL);
}
Expand All @@ -101,11 +165,21 @@ static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context *dst
dst->prec = NULL;
} else {
#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
#if USE_COMB
dst->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])checked_malloc(cb, sizeof(*dst->prec));
#else
dst->prec = (secp256k1_ge_storage (*)[64][16])checked_malloc(cb, sizeof(*dst->prec));
#endif
memcpy(dst->prec, src->prec, sizeof(*dst->prec));
#else
(void)cb;
dst->prec = src->prec;
#endif

#if USE_COMB
#if COMB_OFFSET
dst->offset = src->offset;
#endif
#endif
dst->initial = src->initial;
dst->blind = src->blind;
Expand All @@ -115,6 +189,11 @@ static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context *dst
static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) {
#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
free(ctx->prec);
#endif
#if USE_COMB
#if COMB_OFFSET
secp256k1_ge_clear(&ctx->offset);
#endif
#endif
secp256k1_scalar_clear(&ctx->blind);
secp256k1_gej_clear(&ctx->initial);
Expand All @@ -126,6 +205,81 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
secp256k1_ge_storage adds;
secp256k1_scalar gnb;
int bits;

#if USE_COMB

#if COMB_NEGATION
secp256k1_fe neg;
int sign;
#endif
int abs, bit_pos, block, comb_off, index;
#if !COMB_GROUPED
int bit, tooth;
#endif
uint32_t recoded[9];

memset(&adds, 0, sizeof(adds));
*r = ctx->initial;

/* Blind scalar/point multiplication by computing (n-b)G + bG instead of nG. */
secp256k1_scalar_add(&gnb, gn, &ctx->blind);
secp256k1_scalar_signed_recoding(recoded, &gnb, COMB_BITS + COMB_OFFSET);

comb_off = COMB_SPACING - 1;
for (;;) {
bit_pos = comb_off;
for (block = 0; block < COMB_BLOCKS; ++block) {
#if COMB_GROUPED
bits = recoded[bit_pos >> 5] >> (bit_pos & 0x1F);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be done as part of a cmov ladder, like the old algorithm, to avoid cache-timing attacks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is reading a window from the scalar. IIUC, the cmov ladder you mean is the _ge_storage_cmov loop at lines 255-257.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, yes! I missed that - you do indeed have a cmov ladder at the point where the secret data is actually used. Thanks.

bit_pos += COMB_TEETH;
#else
bits = 0;
for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
bit = recoded[bit_pos >> 5] >> (bit_pos & 0x1F);
bits &= ~(1 << tooth);
bits ^= bit << tooth;
bit_pos += COMB_SPACING;
}
#endif

#if COMB_NEGATION
sign = (bits >> (COMB_TEETH - 1)) & 1;
VERIFY_CHECK(sign == 0 || sign == 1);

bits ^= -sign;
#endif

abs = bits & COMB_MASK;
VERIFY_CHECK(0 <= abs && abs < COMB_POINTS);

for (index = 0; index < COMB_POINTS; ++index) {
secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[block][index], index == abs);
}

secp256k1_ge_from_storage(&add, &adds);
#if COMB_NEGATION
secp256k1_fe_negate(&neg, &add.y, 1);
secp256k1_fe_cmov(&add.y, &neg, sign);
#endif

secp256k1_gej_add_ge(r, r, &add);
}

if (--comb_off < 0) {
break;
}

secp256k1_gej_double(r, r);
}

#if COMB_NEGATION
secp256k1_fe_clear(&neg);
sign = 0;
#endif
memset(recoded, 0, sizeof(recoded));
abs = 0;

#else
int i, j;
memset(&adds, 0, sizeof(adds));
*r = ctx->initial;
Expand All @@ -150,13 +304,18 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
secp256k1_ge_from_storage(&add, &adds);
secp256k1_gej_add_ge(r, r, &add);
}
#endif
bits = 0;
secp256k1_ge_clear(&add);
memset(&adds, 0, sizeof(adds));
secp256k1_scalar_clear(&gnb);
}

/* Setup blinding values for secp256k1_ecmult_gen. */
static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) {
#if USE_COMB
int spacing;
#endif
secp256k1_scalar b;
secp256k1_gej gb;
secp256k1_fe s;
Expand All @@ -169,6 +328,14 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
secp256k1_gej_set_ge(&ctx->initial, &secp256k1_ge_const_g);
secp256k1_gej_neg(&ctx->initial, &ctx->initial);
secp256k1_scalar_set_int(&ctx->blind, 1);
#if USE_COMB
for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
}
#if COMB_OFFSET
secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
#endif
#endif
}
/* The prior blinding value (if not reset) is chained forward by including it in the hash. */
secp256k1_scalar_get_b32(nonce32, &ctx->blind);
Expand Down Expand Up @@ -203,6 +370,14 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
secp256k1_scalar_negate(&b, &b);
ctx->blind = b;
ctx->initial = gb;
#if USE_COMB
for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
}
#if COMB_OFFSET
secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
#endif
#endif
secp256k1_scalar_clear(&b);
secp256k1_gej_clear(&gb);
}
Expand Down
Loading