CVE-2018-5407 fix: ECC ladder (b18162a7) · Commits · CYBER - Cyber Security / TS 103 523 MSP / TLMSP / TLMSP OpenSSL

CHANGES

+13 −0

Original line number	Diff line number	Diff line
		@@ -9,6 +9,19 @@

		Changes between 1.0.2p and 1.0.2q [xx XXX xxxx]

		*) Microarchitecture timing vulnerability in ECC scalar multiplication

		OpenSSL ECC scalar multiplication, used in e.g. ECDSA and ECDH, has been
		shown to be vulnerable to a microarchitecture timing side channel attack.
		An attacker with sufficient access to mount local timing attacks during
		ECDSA signature generation could recover the private key.

		This issue was reported to OpenSSL on 26th October 2018 by Alejandro
		Cabrera Aldaya, Billy Brumley, Sohaib ul Hassan, Cesar Pereida Garcia and
		Nicola Tuveri.
		(CVE-2018-5407)
		[Billy Brumley]

		*) Resolve a compatibility issue in EC_GROUP handling with the FIPS Object
		Module, accidentally introduced while backporting security fixes from the
		development branch and hindering the use of ECC in FIPS mode.

crypto/bn/bn_lib.c

+32 −0

Original line number	Diff line number	Diff line
		@@ -903,6 +903,38 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM a, BIGNUM b, int nwords)
		a->top ^= t;
		b->top ^= t;

		t = (a->neg ^ b->neg) & condition;
		a->neg ^= t;
		b->neg ^= t;

		/*-
		* BN_FLG_STATIC_DATA: indicates that data may not be written to. Intention
		* is actually to treat it as it's read-only data, and some (if not most)
		* of it does reside in read-only segment. In other words observation of
		* BN_FLG_STATIC_DATA in BN_consttime_swap should be treated as fatal
		* condition. It would either cause SEGV or effectively cause data
		* corruption.
		*
		* BN_FLG_MALLOCED: refers to BN structure itself, and hence must be
		* preserved.
		*
		* BN_FLG_SECURE: must be preserved, because it determines how x->d was
		* allocated and hence how to free it.
		*
		* BN_FLG_CONSTTIME: sufficient to mask and swap
		*
		* BN_FLG_FIXED_TOP: indicates that we haven't called bn_correct_top() on
		* the data, so the d array may be padded with additional 0 values (i.e.
		* top could be greater than the minimal value that it could be). We should
		* be swapping it
		*/

		#define BN_CONSTTIME_SWAP_FLAGS (BN_FLG_CONSTTIME \| BN_FLG_FIXED_TOP)

		t = ((a->flags ^ b->flags) & BN_CONSTTIME_SWAP_FLAGS) & condition;
		a->flags ^= t;
		b->flags ^= t;

		#define BN_CONSTTIME_SWAP(ind) \
		do { \
		t = (a->d[ind] ^ b->d[ind]) & condition; \

crypto/ec/ec_mult.c

+246 −0

Original line number	Diff line number	Diff line
		@@ -310,6 +310,224 @@ static signed char compute_wNAF(const BIGNUM scalar, int w, size_t *ret_len)
		return r;
		}

		#define EC_POINT_BN_set_flags(P, flags) do { \
		BN_set_flags(&(P)->X, (flags)); \
		BN_set_flags(&(P)->Y, (flags)); \
		BN_set_flags(&(P)->Z, (flags)); \
		} while(0)

		/*-
		* This functions computes (in constant time) a point multiplication over the
		* EC group.
		*
		* At a high level, it is Montgomery ladder with conditional swaps.
		*
		* It performs either a fixed scalar point multiplication
		* (scalar * generator)
		* when point is NULL, or a generic scalar point multiplication
		* (scalar * point)
		* when point is not NULL.
		*
		* scalar should be in the range [0,n) otherwise all constant time bets are off.
		*
		* NB: This says nothing about EC_POINT_add and EC_POINT_dbl,
		* which of course are not constant time themselves.
		*
		* The product is stored in r.
		*
		* Returns 1 on success, 0 otherwise.
		*/
		static int ec_mul_consttime(const EC_GROUP group, EC_POINT r,
		const BIGNUM scalar, const EC_POINT point,
		BN_CTX *ctx)
		{
		int i, cardinality_bits, group_top, kbit, pbit, Z_is_one;
		EC_POINT *s = NULL;
		BIGNUM *k = NULL;
		BIGNUM *lambda = NULL;
		BIGNUM *cardinality = NULL;
		BN_CTX *new_ctx = NULL;
		int ret = 0;

		if (ctx == NULL && (ctx = new_ctx = BN_CTX_new()) == NULL)
		return 0;

		BN_CTX_start(ctx);

		s = EC_POINT_new(group);
		if (s == NULL)
		goto err;

		if (point == NULL) {
		if (!EC_POINT_copy(s, group->generator))
		goto err;
		} else {
		if (!EC_POINT_copy(s, point))
		goto err;
		}

		EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME);

		cardinality = BN_CTX_get(ctx);
		lambda = BN_CTX_get(ctx);
		k = BN_CTX_get(ctx);
		if (k == NULL \|\| !BN_mul(cardinality, &group->order, &group->cofactor, ctx))
		goto err;

		/*
		* Group cardinalities are often on a word boundary.
		* So when we pad the scalar, some timing diff might
		* pop if it needs to be expanded due to carries.
		* So expand ahead of time.
		*/
		cardinality_bits = BN_num_bits(cardinality);
		group_top = cardinality->top;
		if ((bn_wexpand(k, group_top + 2) == NULL)
		\|\| (bn_wexpand(lambda, group_top + 2) == NULL))
		goto err;

		if (!BN_copy(k, scalar))
		goto err;

		BN_set_flags(k, BN_FLG_CONSTTIME);

		if ((BN_num_bits(k) > cardinality_bits) \|\| (BN_is_negative(k))) {
		/*-
		* this is an unusual input, and we don't guarantee
		* constant-timeness
		*/
		if (!BN_nnmod(k, k, cardinality, ctx))
		goto err;
		}

		if (!BN_add(lambda, k, cardinality))
		goto err;
		BN_set_flags(lambda, BN_FLG_CONSTTIME);
		if (!BN_add(k, lambda, cardinality))
		goto err;
		/*
		* lambda := scalar + cardinality
		* k := scalar + 2*cardinality
		*/
		kbit = BN_is_bit_set(lambda, cardinality_bits);
		BN_consttime_swap(kbit, k, lambda, group_top + 2);

		group_top = group->field.top;
		if ((bn_wexpand(&s->X, group_top) == NULL)
		\|\| (bn_wexpand(&s->Y, group_top) == NULL)
		\|\| (bn_wexpand(&s->Z, group_top) == NULL)
		\|\| (bn_wexpand(&r->X, group_top) == NULL)
		\|\| (bn_wexpand(&r->Y, group_top) == NULL)
		\|\| (bn_wexpand(&r->Z, group_top) == NULL))
		goto err;

		/* top bit is a 1, in a fixed pos */
		if (!EC_POINT_copy(r, s))
		goto err;

		EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME);

		if (!EC_POINT_dbl(group, s, s, ctx))
		goto err;

		pbit = 0;

		#define EC_POINT_CSWAP(c, a, b, w, t) do { \
		BN_consttime_swap(c, &(a)->X, &(b)->X, w); \
		BN_consttime_swap(c, &(a)->Y, &(b)->Y, w); \
		BN_consttime_swap(c, &(a)->Z, &(b)->Z, w); \
		t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \
		(a)->Z_is_one ^= (t); \
		(b)->Z_is_one ^= (t); \
		} while(0)

		/*-
		* The ladder step, with branches, is
		*
		* k[i] == 0: S = add(R, S), R = dbl(R)
		* k[i] == 1: R = add(S, R), S = dbl(S)
		*
		* Swapping R, S conditionally on k[i] leaves you with state
		*
		* k[i] == 0: T, U = R, S
		* k[i] == 1: T, U = S, R
		*
		* Then perform the ECC ops.
		*
		* U = add(T, U)
		* T = dbl(T)
		*
		* Which leaves you with state
		*
		* k[i] == 0: U = add(R, S), T = dbl(R)
		* k[i] == 1: U = add(S, R), T = dbl(S)
		*
		* Swapping T, U conditionally on k[i] leaves you with state
		*
		* k[i] == 0: R, S = T, U
		* k[i] == 1: R, S = U, T
		*
		* Which leaves you with state
		*
		* k[i] == 0: S = add(R, S), R = dbl(R)
		* k[i] == 1: R = add(S, R), S = dbl(S)
		*
		* So we get the same logic, but instead of a branch it's a
		* conditional swap, followed by ECC ops, then another conditional swap.
		*
		* Optimization: The end of iteration i and start of i-1 looks like
		*
		* ...
		* CSWAP(k[i], R, S)
		* ECC
		* CSWAP(k[i], R, S)
		* (next iteration)
		* CSWAP(k[i-1], R, S)
		* ECC
		* CSWAP(k[i-1], R, S)
		* ...
		*
		* So instead of two contiguous swaps, you can merge the condition
		* bits and do a single swap.
		*
		* k[i] k[i-1] Outcome
		* 0 0 No Swap
		* 0 1 Swap
		* 1 0 Swap
		* 1 1 No Swap
		*
		* This is XOR. pbit tracks the previous bit of k.
		*/

		for (i = cardinality_bits - 1; i >= 0; i--) {
		kbit = BN_is_bit_set(k, i) ^ pbit;
		EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one);
		if (!EC_POINT_add(group, s, r, s, ctx))
		goto err;
		if (!EC_POINT_dbl(group, r, r, ctx))
		goto err;
		/*
		* pbit logic merges this cswap with that of the
		* next iteration
		*/
		pbit ^= kbit;
		}
		/* one final cswap to move the right value into r */
		EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one);
		#undef EC_POINT_CSWAP

		ret = 1;

		err:
		EC_POINT_free(s);
		BN_CTX_end(ctx);
		BN_CTX_free(new_ctx);

		return ret;
		}

		#undef EC_POINT_BN_set_flags

		/*
		* TODO: table should be optimised for the wNAF-based implementation,
		* sometimes smaller windows will give better performance (thus the
		@@ -369,6 +587,34 @@ int ec_wNAF_mul(const EC_GROUP group, EC_POINT r, const BIGNUM *scalar,
		return EC_POINT_set_to_infinity(group, r);
		}

		if (!BN_is_zero(&group->order) && !BN_is_zero(&group->cofactor)) {
		/*-
		* Handle the common cases where the scalar is secret, enforcing a constant
		* time scalar multiplication algorithm.
		*/
		if ((scalar != NULL) && (num == 0)) {
		/*-
		* In this case we want to compute scalar * GeneratorPoint: this
		* codepath is reached most prominently by (ephemeral) key generation
		* of EC cryptosystems (i.e. ECDSA keygen and sign setup, ECDH
		* keygen/first half), where the scalar is always secret. This is why
		* we ignore if BN_FLG_CONSTTIME is actually set and we always call the
		* constant time version.
		*/
		return ec_mul_consttime(group, r, scalar, NULL, ctx);
		}
		if ((scalar == NULL) && (num == 1)) {
		/*-
		* In this case we want to compute scalar * GenericPoint: this codepath
		* is reached most prominently by the second half of ECDH, where the
		* secret scalar is multiplied by the peer's public point. To protect
		* the secret scalar, we ignore if BN_FLG_CONSTTIME is actually set and
		* we always call the constant time version.
		*/
		return ec_mul_consttime(group, r, scalars[0], points[0], ctx);
		}
		}

		for (i = 0; i < num; i++) {
		if (group->meth != points[i]->meth) {
		ECerr(EC_F_EC_WNAF_MUL, EC_R_INCOMPATIBLE_OBJECTS);