Loading CHANGES +13 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,19 @@ Changes between 1.0.2p and 1.0.2q [xx XXX xxxx] *) Microarchitecture timing vulnerability in ECC scalar multiplication OpenSSL ECC scalar multiplication, used in e.g. ECDSA and ECDH, has been shown to be vulnerable to a microarchitecture timing side channel attack. An attacker with sufficient access to mount local timing attacks during ECDSA signature generation could recover the private key. This issue was reported to OpenSSL on 26th October 2018 by Alejandro Cabrera Aldaya, Billy Brumley, Sohaib ul Hassan, Cesar Pereida Garcia and Nicola Tuveri. (CVE-2018-5407) [Billy Brumley] *) Resolve a compatibility issue in EC_GROUP handling with the FIPS Object Module, accidentally introduced while backporting security fixes from the development branch and hindering the use of ECC in FIPS mode. Loading crypto/bn/bn_lib.c +32 −0 Original line number Diff line number Diff line Loading @@ -903,6 +903,38 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords) a->top ^= t; b->top ^= t; t = (a->neg ^ b->neg) & condition; a->neg ^= t; b->neg ^= t; /*- * BN_FLG_STATIC_DATA: indicates that data may not be written to. Intention * is actually to treat it as it's read-only data, and some (if not most) * of it does reside in read-only segment. In other words observation of * BN_FLG_STATIC_DATA in BN_consttime_swap should be treated as fatal * condition. It would either cause SEGV or effectively cause data * corruption. * * BN_FLG_MALLOCED: refers to BN structure itself, and hence must be * preserved. * * BN_FLG_SECURE: must be preserved, because it determines how x->d was * allocated and hence how to free it. * * BN_FLG_CONSTTIME: sufficient to mask and swap * * BN_FLG_FIXED_TOP: indicates that we haven't called bn_correct_top() on * the data, so the d array may be padded with additional 0 values (i.e. * top could be greater than the minimal value that it could be). We should * be swapping it */ #define BN_CONSTTIME_SWAP_FLAGS (BN_FLG_CONSTTIME | BN_FLG_FIXED_TOP) t = ((a->flags ^ b->flags) & BN_CONSTTIME_SWAP_FLAGS) & condition; a->flags ^= t; b->flags ^= t; #define BN_CONSTTIME_SWAP(ind) \ do { \ t = (a->d[ind] ^ b->d[ind]) & condition; \ Loading crypto/ec/ec_mult.c +246 −0 Original line number Diff line number Diff line Loading @@ -310,6 +310,224 @@ static signed char *compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len) return r; } #define EC_POINT_BN_set_flags(P, flags) do { \ BN_set_flags(&(P)->X, (flags)); \ BN_set_flags(&(P)->Y, (flags)); \ BN_set_flags(&(P)->Z, (flags)); \ } while(0) /*- * This functions computes (in constant time) a point multiplication over the * EC group. * * At a high level, it is Montgomery ladder with conditional swaps. * * It performs either a fixed scalar point multiplication * (scalar * generator) * when point is NULL, or a generic scalar point multiplication * (scalar * point) * when point is not NULL. * * scalar should be in the range [0,n) otherwise all constant time bets are off. * * NB: This says nothing about EC_POINT_add and EC_POINT_dbl, * which of course are not constant time themselves. * * The product is stored in r. * * Returns 1 on success, 0 otherwise. */ static int ec_mul_consttime(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, const EC_POINT *point, BN_CTX *ctx) { int i, cardinality_bits, group_top, kbit, pbit, Z_is_one; EC_POINT *s = NULL; BIGNUM *k = NULL; BIGNUM *lambda = NULL; BIGNUM *cardinality = NULL; BN_CTX *new_ctx = NULL; int ret = 0; if (ctx == NULL && (ctx = new_ctx = BN_CTX_new()) == NULL) return 0; BN_CTX_start(ctx); s = EC_POINT_new(group); if (s == NULL) goto err; if (point == NULL) { if (!EC_POINT_copy(s, group->generator)) goto err; } else { if (!EC_POINT_copy(s, point)) goto err; } EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME); cardinality = BN_CTX_get(ctx); lambda = BN_CTX_get(ctx); k = BN_CTX_get(ctx); if (k == NULL || !BN_mul(cardinality, &group->order, &group->cofactor, ctx)) goto err; /* * Group cardinalities are often on a word boundary. * So when we pad the scalar, some timing diff might * pop if it needs to be expanded due to carries. * So expand ahead of time. */ cardinality_bits = BN_num_bits(cardinality); group_top = cardinality->top; if ((bn_wexpand(k, group_top + 2) == NULL) || (bn_wexpand(lambda, group_top + 2) == NULL)) goto err; if (!BN_copy(k, scalar)) goto err; BN_set_flags(k, BN_FLG_CONSTTIME); if ((BN_num_bits(k) > cardinality_bits) || (BN_is_negative(k))) { /*- * this is an unusual input, and we don't guarantee * constant-timeness */ if (!BN_nnmod(k, k, cardinality, ctx)) goto err; } if (!BN_add(lambda, k, cardinality)) goto err; BN_set_flags(lambda, BN_FLG_CONSTTIME); if (!BN_add(k, lambda, cardinality)) goto err; /* * lambda := scalar + cardinality * k := scalar + 2*cardinality */ kbit = BN_is_bit_set(lambda, cardinality_bits); BN_consttime_swap(kbit, k, lambda, group_top + 2); group_top = group->field.top; if ((bn_wexpand(&s->X, group_top) == NULL) || (bn_wexpand(&s->Y, group_top) == NULL) || (bn_wexpand(&s->Z, group_top) == NULL) || (bn_wexpand(&r->X, group_top) == NULL) || (bn_wexpand(&r->Y, group_top) == NULL) || (bn_wexpand(&r->Z, group_top) == NULL)) goto err; /* top bit is a 1, in a fixed pos */ if (!EC_POINT_copy(r, s)) goto err; EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME); if (!EC_POINT_dbl(group, s, s, ctx)) goto err; pbit = 0; #define EC_POINT_CSWAP(c, a, b, w, t) do { \ BN_consttime_swap(c, &(a)->X, &(b)->X, w); \ BN_consttime_swap(c, &(a)->Y, &(b)->Y, w); \ BN_consttime_swap(c, &(a)->Z, &(b)->Z, w); \ t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \ (a)->Z_is_one ^= (t); \ (b)->Z_is_one ^= (t); \ } while(0) /*- * The ladder step, with branches, is * * k[i] == 0: S = add(R, S), R = dbl(R) * k[i] == 1: R = add(S, R), S = dbl(S) * * Swapping R, S conditionally on k[i] leaves you with state * * k[i] == 0: T, U = R, S * k[i] == 1: T, U = S, R * * Then perform the ECC ops. * * U = add(T, U) * T = dbl(T) * * Which leaves you with state * * k[i] == 0: U = add(R, S), T = dbl(R) * k[i] == 1: U = add(S, R), T = dbl(S) * * Swapping T, U conditionally on k[i] leaves you with state * * k[i] == 0: R, S = T, U * k[i] == 1: R, S = U, T * * Which leaves you with state * * k[i] == 0: S = add(R, S), R = dbl(R) * k[i] == 1: R = add(S, R), S = dbl(S) * * So we get the same logic, but instead of a branch it's a * conditional swap, followed by ECC ops, then another conditional swap. * * Optimization: The end of iteration i and start of i-1 looks like * * ... * CSWAP(k[i], R, S) * ECC * CSWAP(k[i], R, S) * (next iteration) * CSWAP(k[i-1], R, S) * ECC * CSWAP(k[i-1], R, S) * ... * * So instead of two contiguous swaps, you can merge the condition * bits and do a single swap. * * k[i] k[i-1] Outcome * 0 0 No Swap * 0 1 Swap * 1 0 Swap * 1 1 No Swap * * This is XOR. pbit tracks the previous bit of k. */ for (i = cardinality_bits - 1; i >= 0; i--) { kbit = BN_is_bit_set(k, i) ^ pbit; EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one); if (!EC_POINT_add(group, s, r, s, ctx)) goto err; if (!EC_POINT_dbl(group, r, r, ctx)) goto err; /* * pbit logic merges this cswap with that of the * next iteration */ pbit ^= kbit; } /* one final cswap to move the right value into r */ EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one); #undef EC_POINT_CSWAP ret = 1; err: EC_POINT_free(s); BN_CTX_end(ctx); BN_CTX_free(new_ctx); return ret; } #undef EC_POINT_BN_set_flags /* * TODO: table should be optimised for the wNAF-based implementation, * sometimes smaller windows will give better performance (thus the Loading Loading @@ -369,6 +587,34 @@ int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, return EC_POINT_set_to_infinity(group, r); } if (!BN_is_zero(&group->order) && !BN_is_zero(&group->cofactor)) { /*- * Handle the common cases where the scalar is secret, enforcing a constant * time scalar multiplication algorithm. */ if ((scalar != NULL) && (num == 0)) { /*- * In this case we want to compute scalar * GeneratorPoint: this * codepath is reached most prominently by (ephemeral) key generation * of EC cryptosystems (i.e. ECDSA keygen and sign setup, ECDH * keygen/first half), where the scalar is always secret. This is why * we ignore if BN_FLG_CONSTTIME is actually set and we always call the * constant time version. */ return ec_mul_consttime(group, r, scalar, NULL, ctx); } if ((scalar == NULL) && (num == 1)) { /*- * In this case we want to compute scalar * GenericPoint: this codepath * is reached most prominently by the second half of ECDH, where the * secret scalar is multiplied by the peer's public point. To protect * the secret scalar, we ignore if BN_FLG_CONSTTIME is actually set and * we always call the constant time version. */ return ec_mul_consttime(group, r, scalars[0], points[0], ctx); } } for (i = 0; i < num; i++) { if (group->meth != points[i]->meth) { ECerr(EC_F_EC_WNAF_MUL, EC_R_INCOMPATIBLE_OBJECTS); Loading Loading
CHANGES +13 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,19 @@ Changes between 1.0.2p and 1.0.2q [xx XXX xxxx] *) Microarchitecture timing vulnerability in ECC scalar multiplication OpenSSL ECC scalar multiplication, used in e.g. ECDSA and ECDH, has been shown to be vulnerable to a microarchitecture timing side channel attack. An attacker with sufficient access to mount local timing attacks during ECDSA signature generation could recover the private key. This issue was reported to OpenSSL on 26th October 2018 by Alejandro Cabrera Aldaya, Billy Brumley, Sohaib ul Hassan, Cesar Pereida Garcia and Nicola Tuveri. (CVE-2018-5407) [Billy Brumley] *) Resolve a compatibility issue in EC_GROUP handling with the FIPS Object Module, accidentally introduced while backporting security fixes from the development branch and hindering the use of ECC in FIPS mode. Loading
crypto/bn/bn_lib.c +32 −0 Original line number Diff line number Diff line Loading @@ -903,6 +903,38 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords) a->top ^= t; b->top ^= t; t = (a->neg ^ b->neg) & condition; a->neg ^= t; b->neg ^= t; /*- * BN_FLG_STATIC_DATA: indicates that data may not be written to. Intention * is actually to treat it as it's read-only data, and some (if not most) * of it does reside in read-only segment. In other words observation of * BN_FLG_STATIC_DATA in BN_consttime_swap should be treated as fatal * condition. It would either cause SEGV or effectively cause data * corruption. * * BN_FLG_MALLOCED: refers to BN structure itself, and hence must be * preserved. * * BN_FLG_SECURE: must be preserved, because it determines how x->d was * allocated and hence how to free it. * * BN_FLG_CONSTTIME: sufficient to mask and swap * * BN_FLG_FIXED_TOP: indicates that we haven't called bn_correct_top() on * the data, so the d array may be padded with additional 0 values (i.e. * top could be greater than the minimal value that it could be). We should * be swapping it */ #define BN_CONSTTIME_SWAP_FLAGS (BN_FLG_CONSTTIME | BN_FLG_FIXED_TOP) t = ((a->flags ^ b->flags) & BN_CONSTTIME_SWAP_FLAGS) & condition; a->flags ^= t; b->flags ^= t; #define BN_CONSTTIME_SWAP(ind) \ do { \ t = (a->d[ind] ^ b->d[ind]) & condition; \ Loading
crypto/ec/ec_mult.c +246 −0 Original line number Diff line number Diff line Loading @@ -310,6 +310,224 @@ static signed char *compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len) return r; } #define EC_POINT_BN_set_flags(P, flags) do { \ BN_set_flags(&(P)->X, (flags)); \ BN_set_flags(&(P)->Y, (flags)); \ BN_set_flags(&(P)->Z, (flags)); \ } while(0) /*- * This functions computes (in constant time) a point multiplication over the * EC group. * * At a high level, it is Montgomery ladder with conditional swaps. * * It performs either a fixed scalar point multiplication * (scalar * generator) * when point is NULL, or a generic scalar point multiplication * (scalar * point) * when point is not NULL. * * scalar should be in the range [0,n) otherwise all constant time bets are off. * * NB: This says nothing about EC_POINT_add and EC_POINT_dbl, * which of course are not constant time themselves. * * The product is stored in r. * * Returns 1 on success, 0 otherwise. */ static int ec_mul_consttime(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, const EC_POINT *point, BN_CTX *ctx) { int i, cardinality_bits, group_top, kbit, pbit, Z_is_one; EC_POINT *s = NULL; BIGNUM *k = NULL; BIGNUM *lambda = NULL; BIGNUM *cardinality = NULL; BN_CTX *new_ctx = NULL; int ret = 0; if (ctx == NULL && (ctx = new_ctx = BN_CTX_new()) == NULL) return 0; BN_CTX_start(ctx); s = EC_POINT_new(group); if (s == NULL) goto err; if (point == NULL) { if (!EC_POINT_copy(s, group->generator)) goto err; } else { if (!EC_POINT_copy(s, point)) goto err; } EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME); cardinality = BN_CTX_get(ctx); lambda = BN_CTX_get(ctx); k = BN_CTX_get(ctx); if (k == NULL || !BN_mul(cardinality, &group->order, &group->cofactor, ctx)) goto err; /* * Group cardinalities are often on a word boundary. * So when we pad the scalar, some timing diff might * pop if it needs to be expanded due to carries. * So expand ahead of time. */ cardinality_bits = BN_num_bits(cardinality); group_top = cardinality->top; if ((bn_wexpand(k, group_top + 2) == NULL) || (bn_wexpand(lambda, group_top + 2) == NULL)) goto err; if (!BN_copy(k, scalar)) goto err; BN_set_flags(k, BN_FLG_CONSTTIME); if ((BN_num_bits(k) > cardinality_bits) || (BN_is_negative(k))) { /*- * this is an unusual input, and we don't guarantee * constant-timeness */ if (!BN_nnmod(k, k, cardinality, ctx)) goto err; } if (!BN_add(lambda, k, cardinality)) goto err; BN_set_flags(lambda, BN_FLG_CONSTTIME); if (!BN_add(k, lambda, cardinality)) goto err; /* * lambda := scalar + cardinality * k := scalar + 2*cardinality */ kbit = BN_is_bit_set(lambda, cardinality_bits); BN_consttime_swap(kbit, k, lambda, group_top + 2); group_top = group->field.top; if ((bn_wexpand(&s->X, group_top) == NULL) || (bn_wexpand(&s->Y, group_top) == NULL) || (bn_wexpand(&s->Z, group_top) == NULL) || (bn_wexpand(&r->X, group_top) == NULL) || (bn_wexpand(&r->Y, group_top) == NULL) || (bn_wexpand(&r->Z, group_top) == NULL)) goto err; /* top bit is a 1, in a fixed pos */ if (!EC_POINT_copy(r, s)) goto err; EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME); if (!EC_POINT_dbl(group, s, s, ctx)) goto err; pbit = 0; #define EC_POINT_CSWAP(c, a, b, w, t) do { \ BN_consttime_swap(c, &(a)->X, &(b)->X, w); \ BN_consttime_swap(c, &(a)->Y, &(b)->Y, w); \ BN_consttime_swap(c, &(a)->Z, &(b)->Z, w); \ t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \ (a)->Z_is_one ^= (t); \ (b)->Z_is_one ^= (t); \ } while(0) /*- * The ladder step, with branches, is * * k[i] == 0: S = add(R, S), R = dbl(R) * k[i] == 1: R = add(S, R), S = dbl(S) * * Swapping R, S conditionally on k[i] leaves you with state * * k[i] == 0: T, U = R, S * k[i] == 1: T, U = S, R * * Then perform the ECC ops. * * U = add(T, U) * T = dbl(T) * * Which leaves you with state * * k[i] == 0: U = add(R, S), T = dbl(R) * k[i] == 1: U = add(S, R), T = dbl(S) * * Swapping T, U conditionally on k[i] leaves you with state * * k[i] == 0: R, S = T, U * k[i] == 1: R, S = U, T * * Which leaves you with state * * k[i] == 0: S = add(R, S), R = dbl(R) * k[i] == 1: R = add(S, R), S = dbl(S) * * So we get the same logic, but instead of a branch it's a * conditional swap, followed by ECC ops, then another conditional swap. * * Optimization: The end of iteration i and start of i-1 looks like * * ... * CSWAP(k[i], R, S) * ECC * CSWAP(k[i], R, S) * (next iteration) * CSWAP(k[i-1], R, S) * ECC * CSWAP(k[i-1], R, S) * ... * * So instead of two contiguous swaps, you can merge the condition * bits and do a single swap. * * k[i] k[i-1] Outcome * 0 0 No Swap * 0 1 Swap * 1 0 Swap * 1 1 No Swap * * This is XOR. pbit tracks the previous bit of k. */ for (i = cardinality_bits - 1; i >= 0; i--) { kbit = BN_is_bit_set(k, i) ^ pbit; EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one); if (!EC_POINT_add(group, s, r, s, ctx)) goto err; if (!EC_POINT_dbl(group, r, r, ctx)) goto err; /* * pbit logic merges this cswap with that of the * next iteration */ pbit ^= kbit; } /* one final cswap to move the right value into r */ EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one); #undef EC_POINT_CSWAP ret = 1; err: EC_POINT_free(s); BN_CTX_end(ctx); BN_CTX_free(new_ctx); return ret; } #undef EC_POINT_BN_set_flags /* * TODO: table should be optimised for the wNAF-based implementation, * sometimes smaller windows will give better performance (thus the Loading Loading @@ -369,6 +587,34 @@ int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, return EC_POINT_set_to_infinity(group, r); } if (!BN_is_zero(&group->order) && !BN_is_zero(&group->cofactor)) { /*- * Handle the common cases where the scalar is secret, enforcing a constant * time scalar multiplication algorithm. */ if ((scalar != NULL) && (num == 0)) { /*- * In this case we want to compute scalar * GeneratorPoint: this * codepath is reached most prominently by (ephemeral) key generation * of EC cryptosystems (i.e. ECDSA keygen and sign setup, ECDH * keygen/first half), where the scalar is always secret. This is why * we ignore if BN_FLG_CONSTTIME is actually set and we always call the * constant time version. */ return ec_mul_consttime(group, r, scalar, NULL, ctx); } if ((scalar == NULL) && (num == 1)) { /*- * In this case we want to compute scalar * GenericPoint: this codepath * is reached most prominently by the second half of ECDH, where the * secret scalar is multiplied by the peer's public point. To protect * the secret scalar, we ignore if BN_FLG_CONSTTIME is actually set and * we always call the constant time version. */ return ec_mul_consttime(group, r, scalars[0], points[0], ctx); } } for (i = 0; i < num; i++) { if (group->meth != points[i]->meth) { ECerr(EC_F_EC_WNAF_MUL, EC_R_INCOMPATIBLE_OBJECTS); Loading