Commit 9f1c5491 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

BN update from HEAD.

parent 70b52222
Loading
Loading
Loading
Loading
+34 −238
Original line number Diff line number Diff line
@@ -169,15 +169,13 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
#endif /* OPENSSL_NO_ASM */


/* BN_div[_no_branch] computes  dv := num / divisor,  rounding towards
/* BN_div computes  dv := num / divisor,  rounding towards
 * zero, and sets up rm  such that  dv*divisor + rm = num  holds.
 * Thus:
 *     dv->neg == num->neg ^ divisor->neg  (unless the result is zero)
 *     rm->neg == num->neg                 (unless the remainder is zero)
 * If 'dv' or 'rm' is NULL, the respective value is not returned.
 */
static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
        const BIGNUM *divisor, BN_CTX *ctx);
int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
	   BN_CTX *ctx)
	{
@@ -186,6 +184,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
	BN_ULONG *resp,*wnump;
	BN_ULONG d0,d1;
	int num_n,div_n;
	int no_branch=0;

	/* Invalid zero-padding would have particularly bad consequences
	 * in the case of 'num', so don't just rely on bn_check_top() for this one
@@ -200,7 +199,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,

	if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
		{
		return BN_div_no_branch(dv, rm, num, divisor, ctx);
		no_branch=1;
		}

	bn_check_top(dv);
@@ -214,7 +213,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
		return(0);
		}

	if (BN_ucmp(num,divisor) < 0)
	if (!no_branch && BN_ucmp(num,divisor) < 0)
		{
		if (rm != NULL)
			{ if (BN_copy(rm,num) == NULL) return(0); }
@@ -239,227 +238,9 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
	norm_shift+=BN_BITS2;
	if (!(BN_lshift(snum,num,norm_shift))) goto err;
	snum->neg=0;
	div_n=sdiv->top;
	num_n=snum->top;
	loop=num_n-div_n;
	/* Lets setup a 'window' into snum
	 * This is the part that corresponds to the current
	 * 'area' being divided */
	wnum.neg   = 0;
	wnum.d     = &(snum->d[loop]);
	wnum.top   = div_n;
	/* only needed when BN_ucmp messes up the values between top and max */
	wnum.dmax  = snum->dmax - loop; /* so we don't step out of bounds */

	/* Get the top 2 words of sdiv */
	/* div_n=sdiv->top; */
	d0=sdiv->d[div_n-1];
	d1=(div_n == 1)?0:sdiv->d[div_n-2];

	/* pointer to the 'top' of snum */
	wnump= &(snum->d[num_n-1]);

	/* Setup to 'res' */
	res->neg= (num->neg^divisor->neg);
	if (!bn_wexpand(res,(loop+1))) goto err;
	res->top=loop;
	resp= &(res->d[loop-1]);

	/* space for temp */
	if (!bn_wexpand(tmp,(div_n+1))) goto err;

	if (BN_ucmp(&wnum,sdiv) >= 0)
		{
		/* If BN_DEBUG_RAND is defined BN_ucmp changes (via
		 * bn_pollute) the const bignum arguments =>
		 * clean the values between top and max again */
		bn_clear_top2max(&wnum);
		bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
		*resp=1;
		}
	else
		res->top--;
	/* if res->top == 0 then clear the neg value otherwise decrease
	 * the resp pointer */
	if (res->top == 0)
		res->neg = 0;
	else
		resp--;

	for (i=0; i<loop-1; i++, wnump--, resp--)
		{
		BN_ULONG q,l0;
		/* the first part of the loop uses the top two words of
		 * snum and sdiv to calculate a BN_ULONG q such that
		 * | wnum - sdiv * q | < sdiv */
#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
		BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
		q=bn_div_3_words(wnump,d1,d0);
#else
		BN_ULONG n0,n1,rem=0;

		n0=wnump[0];
		n1=wnump[-1];
		if (n0 == d0)
			q=BN_MASK2;
		else 			/* n0 < d0 */
			{
#ifdef BN_LLONG
			BN_ULLONG t2;

#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
			q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
#else
			q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
			fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n",
				n0, n1, d0, q);
#endif
#endif

#ifndef REMAINDER_IS_ALREADY_CALCULATED
			/*
			 * rem doesn't have to be BN_ULLONG. The least we
			 * know it's less that d0, isn't it?
			 */
			rem=(n1-q*d0)&BN_MASK2;
#endif
			t2=(BN_ULLONG)d1*q;

			for (;;)
				{
				if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
					break;
				q--;
				rem += d0;
				if (rem < d0) break; /* don't let rem overflow */
				t2 -= d1;
				}
#else /* !BN_LLONG */
			BN_ULONG t2l,t2h;

			q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
			fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n",
				n0, n1, d0, q);
#endif
#ifndef REMAINDER_IS_ALREADY_CALCULATED
			rem=(n1-q*d0)&BN_MASK2;
#endif

#if defined(BN_UMULT_LOHI)
			BN_UMULT_LOHI(t2l,t2h,d1,q);
#elif defined(BN_UMULT_HIGH)
			t2l = d1 * q;
			t2h = BN_UMULT_HIGH(d1,q);
#else
	if (no_branch)
		{
			BN_ULONG ql, qh;
			t2l=LBITS(d1); t2h=HBITS(d1);
			ql =LBITS(q);  qh =HBITS(q);
			mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
			}
#endif

			for (;;)
				{
				if ((t2h < rem) ||
					((t2h == rem) && (t2l <= wnump[-2])))
					break;
				q--;
				rem += d0;
				if (rem < d0) break; /* don't let rem overflow */
				if (t2l < d1) t2h--; t2l -= d1;
				}
#endif /* !BN_LLONG */
			}
#endif /* !BN_DIV3W */

		l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
		tmp->d[div_n]=l0;
		wnum.d--;
		/* ingore top values of the bignums just sub the two 
		 * BN_ULONG arrays with bn_sub_words */
		if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
			{
			/* Note: As we have considered only the leading
			 * two BN_ULONGs in the calculation of q, sdiv * q
			 * might be greater than wnum (but then (q-1) * sdiv
			 * is less or equal than wnum)
			 */
			q--;
			if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
				/* we can't have an overflow here (assuming
				 * that q != 0, but if q == 0 then tmp is
				 * zero anyway) */
				(*wnump)++;
			}
		/* store part of the result */
		*resp = q;
		}
	bn_correct_top(snum);
	if (rm != NULL)
		{
		/* Keep a copy of the neg flag in num because if rm==num
		 * BN_rshift() will overwrite it.
		 */
		int neg = num->neg;
		BN_rshift(rm,snum,norm_shift);
		if (!BN_is_zero(rm))
			rm->neg = neg;
		bn_check_top(rm);
		}
	BN_CTX_end(ctx);
	return(1);
err:
	bn_check_top(rm);
	BN_CTX_end(ctx);
	return(0);
	}


/* BN_div_no_branch is a special version of BN_div. It does not contain
 * branches that may leak sensitive information.
 */
static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, 
	const BIGNUM *divisor, BN_CTX *ctx)
	{
	int norm_shift,i,loop;
	BIGNUM *tmp,wnum,*snum,*sdiv,*res;
	BN_ULONG *resp,*wnump;
	BN_ULONG d0,d1;
	int num_n,div_n;

	bn_check_top(dv);
	bn_check_top(rm);
	/* bn_check_top(num); */ /* 'num' has been checked in BN_div() */
	bn_check_top(divisor);

	if (BN_is_zero(divisor))
		{
		BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO);
		return(0);
		}

	BN_CTX_start(ctx);
	tmp=BN_CTX_get(ctx);
	snum=BN_CTX_get(ctx);
	sdiv=BN_CTX_get(ctx);
	if (dv == NULL)
		res=BN_CTX_get(ctx);
	else	res=dv;
	if (sdiv == NULL || res == NULL) goto err;

	/* First we normalise the numbers */
	norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
	if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
	sdiv->neg=0;
	norm_shift+=BN_BITS2;
	if (!(BN_lshift(snum,num,norm_shift))) goto err;
	snum->neg=0;

		/* Since we don't know whether snum is larger than sdiv,
		 * we pad snum with enough zeroes without changing its
		 * value. 
@@ -476,6 +257,7 @@ static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
			snum->d[snum->top] = 0;
			snum->top ++;
			}
		}

	div_n=sdiv->top;
	num_n=snum->top;
@@ -500,12 +282,27 @@ static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
	/* Setup to 'res' */
	res->neg= (num->neg^divisor->neg);
	if (!bn_wexpand(res,(loop+1))) goto err;
	res->top=loop-1;
	res->top=loop-no_branch;
	resp= &(res->d[loop-1]);

	/* space for temp */
	if (!bn_wexpand(tmp,(div_n+1))) goto err;

	if (!no_branch)
		{
		if (BN_ucmp(&wnum,sdiv) >= 0)
			{
			/* If BN_DEBUG_RAND is defined BN_ucmp changes (via
			 * bn_pollute) the const bignum arguments =>
			 * clean the values between top and max again */
			bn_clear_top2max(&wnum);
			bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
			*resp=1;
			}
		else
			res->top--;
		}

	/* if res->top == 0 then clear the neg value otherwise decrease
	 * the resp pointer */
	if (res->top == 0)
@@ -638,7 +435,7 @@ X) -> 0x%08X\n",
			rm->neg = neg;
		bn_check_top(rm);
		}
	bn_correct_top(res);
	if (no_branch)	bn_correct_top(res);
	BN_CTX_end(ctx);
	return(1);
err:
@@ -646,5 +443,4 @@ err:
	BN_CTX_end(ctx);
	return(0);
	}

#endif
+173 −67
Original line number Diff line number Diff line
@@ -113,6 +113,18 @@
#include "cryptlib.h"
#include "bn_lcl.h"

#include <stdlib.h>
#ifdef _WIN32
# include <malloc.h>
# ifndef alloca
#  define alloca _alloca
# endif
#elif defined(__GNUC__)
# ifndef alloca
#  define alloca(s) __builtin_alloca((s))
# endif
#endif

/* maximum precomputation table size for *variable* sliding windows */
#define TABLE_SIZE	32

@@ -522,23 +534,17 @@ err:
 * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
 * from/to that table. */

static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width)
	{
	size_t i, j;

	if (bn_wexpand(b, top) == NULL)
		return 0;
	while (b->top < top)
		{
		b->d[b->top++] = 0;
		}
	
	if (top > b->top)
		top = b->top; /* this works because 'buf' is explicitly zeroed */
	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
		{
		buf[j] = ((unsigned char*)b->d)[i];
		}

	bn_correct_top(b);
	return 1;
	}

@@ -561,7 +567,7 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf

/* Given a pointer value, compute the next address that is a cache line multiple. */
#define MOD_EXP_CTIME_ALIGN(x_) \
	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))

/* This variant of BN_mod_exp_mont() uses fixed windows and the special
 * precomputation memory layout to limit data-dependency to a minimum
@@ -572,17 +578,15 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf
int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
	{
	int i,bits,ret=0,idx,window,wvalue;
	int i,bits,ret=0,window,wvalue;
	int top;
 	BIGNUM *r;
	const BIGNUM *aa;
	BN_MONT_CTX *mont=NULL;

	int numPowers;
	unsigned char *powerbufFree=NULL;
	int powerbufLen = 0;
	unsigned char *powerbuf=NULL;
	BIGNUM *computeTemp=NULL, *am=NULL;
	BIGNUM tmp, am;

	bn_check_top(a);
	bn_check_top(p);
@@ -602,10 +606,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
		return ret;
		}

 	/* Initialize BIGNUM context and allocate intermediate result */
	BN_CTX_start(ctx);
	r = BN_CTX_get(ctx);
	if (r == NULL) goto err;

	/* Allocate a montgomery context if it was not supplied by the caller.
	 * If this is not done, things will break in the montgomery part.
@@ -620,40 +621,154 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,

	/* Get the window size to use with size of p. */
	window = BN_window_bits_for_ctime_exponent_size(bits);
#if defined(OPENSSL_BN_ASM_MONT5)
	if (window==6 && bits<=1024) window=5;	/* ~5% improvement of 2048-bit RSA sign */
#endif

	/* Allocate a buffer large enough to hold all of the pre-computed
	 * powers of a.
	 * powers of am, am itself and tmp.
	 */
	numPowers = 1 << window;
	powerbufLen = sizeof(m->d[0])*top*numPowers;
	powerbufLen = sizeof(m->d[0])*(top*numPowers +
				((2*top)>numPowers?(2*top):numPowers));
#ifdef alloca
	if (powerbufLen < 3072)
		powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
	else
#endif
	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
		goto err;
		
	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
	memset(powerbuf, 0, powerbufLen);

 	/* Initialize the intermediate result. Do this early to save double conversion,
	 * once each for a^0 and intermediate result.
	 */
 	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
#ifdef alloca
	if (powerbufLen < 3072)
		powerbufFree = NULL;
#endif

	/* Initialize computeTemp as a^1 with montgomery precalcs */
	computeTemp = BN_CTX_get(ctx);
	am = BN_CTX_get(ctx);
	if (computeTemp==NULL || am==NULL) goto err;
	/* lay down tmp and am right after powers table */
	tmp.d     = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers);
	am.d      = tmp.d + top;
	tmp.top   = am.top  = 0;
	tmp.dmax  = am.dmax = top;
	tmp.neg   = am.neg  = 0;
	tmp.flags = am.flags = BN_FLG_STATIC_DATA;

	/* prepare a^0 in Montgomery domain */
#if 1
 	if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))	goto err;
#else
	tmp.d[0] = (0-m->d[0])&BN_MASK2;	/* 2^(top*BN_BITS2) - m */
	for (i=1;i<top;i++)
		tmp.d[i] = (~m->d[i])&BN_MASK2;
	tmp.top = top;
#endif

	/* prepare a^1 in Montgomery domain */
	if (a->neg || BN_ucmp(a,m) >= 0)
		{
		if (!BN_mod(am,a,m,ctx))
			goto err;
		aa= am;
		if (!BN_mod(&am,a,m,ctx))			goto err;
		if (!BN_to_montgomery(&am,&am,mont,ctx))	goto err;
		}
	else	if (!BN_to_montgomery(&am,a,mont,ctx))		goto err;

#if defined(OPENSSL_BN_ASM_MONT5)
    /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
     * specifically optimization of cache-timing attack countermeasures
     * and pre-computation optimization. */

    /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
     * 512-bit RSA is hardly relevant, we omit it to spare size... */ 
    if (window==5)
	{
	void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap,
			const void *table,const BN_ULONG *np,
			const BN_ULONG *n0,int num,int power);
	void bn_scatter5(const BN_ULONG *inp,size_t num,
			void *table,size_t power);
	void bn_gather5(BN_ULONG *out,size_t num,
			void *table,size_t power);

	BN_ULONG *np=mont->N.d, *n0=mont->n0;

	/* BN_to_montgomery can contaminate words above .top
	 * [in BN_DEBUG[_DEBUG] build]... */
	for (i=am.top; i<top; i++)	am.d[i]=0;
	for (i=tmp.top; i<top; i++)	tmp.d[i]=0;

	bn_scatter5(tmp.d,top,powerbuf,0);
	bn_scatter5(am.d,am.top,powerbuf,1);
	bn_mul_mont(tmp.d,am.d,am.d,np,n0,top);
	bn_scatter5(tmp.d,top,powerbuf,2);

#if 0
	for (i=3; i<32; i++)
		{
		/* Calculate a^i = a^(i-1) * a */
		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
		bn_scatter5(tmp.d,top,powerbuf,i);
		}
#else
	/* same as above, but uses squaring for 1/2 of operations */
	for (i=4; i<32; i*=2)
		{
		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
		bn_scatter5(tmp.d,top,powerbuf,i);
		}
	for (i=3; i<8; i+=2)
		{
		int j;
		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
		bn_scatter5(tmp.d,top,powerbuf,i);
		for (j=2*i; j<32; j*=2)
			{
			bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
			bn_scatter5(tmp.d,top,powerbuf,j);
			}
		}
	for (; i<16; i+=2)
		{
		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
		bn_scatter5(tmp.d,top,powerbuf,i);
		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
		bn_scatter5(tmp.d,top,powerbuf,2*i);
		}
	for (; i<32; i+=2)
		{
		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
		bn_scatter5(tmp.d,top,powerbuf,i);
		}
#endif
	bits--;
	for (wvalue=0, i=bits%5; i>=0; i--,bits--)
		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
	bn_gather5(tmp.d,top,powerbuf,wvalue);

	/* Scan the exponent one window at a time starting from the most
	 * significant bits.
	 */
	while (bits >= 0)
		{
		for (wvalue=0, i=0; i<5; i++,bits--)
			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);

		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
		bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
		}

	tmp.top=top;
	bn_correct_top(&tmp);
	}
    else
		aa=a;
	if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
	if (!BN_copy(computeTemp, am)) goto err;
	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
#endif
	{
	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err;
	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1, numPowers)) goto err;

	/* If the window size is greater than 1, then calculate
	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
@@ -662,62 +777,54 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
	 */
	if (window > 1)
		{
		for (i=2; i<numPowers; i++)
		if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx))	goto err;
		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err;
		for (i=3; i<numPowers; i++)
			{
			/* Calculate a^i = a^(i-1) * a */
			if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
			if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx))
				goto err;
			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err;
			}
		}

 	/* Adjust the number of bits up to a multiple of the window size.
 	 * If the exponent length is not a multiple of the window size, then
 	 * this pads the most significant bits with zeros to normalize the
 	 * scanning loop to there's no special cases.
 	 *
 	 * * NOTE: Making the window size a power of two less than the native
	 * * word size ensures that the padded bits won't go past the last
 	 * * word in the internal BIGNUM structure. Going past the end will
 	 * * still produce the correct result, but causes a different branch
 	 * * to be taken in the BN_is_bit_set function.
 	 */
 	bits = ((bits+window-1)/window)*window;
 	idx=bits-1;	/* The top bit of the window */
	bits--;
	for (wvalue=0, i=bits%window; i>=0; i--,bits--)
		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
	if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err;
 
	/* Scan the exponent one window at a time starting from the most
	 * significant bits.
	 */
 	while (idx >= 0)
 	while (bits >= 0)
  		{
 		wvalue=0; /* The 'value' of the window */
 		
 		/* Scan the window, squaring the result as we go */
 		for (i=0; i<window; i++,idx--)
 		for (i=0; i<window; i++,bits--)
 			{
			if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))	goto err;
			wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
			if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx))	goto err;
			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
  			}
 		
		/* Fetch the appropriate pre-computed value from the pre-buf */
		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err;

 		/* Multiply the result into the intermediate result */
 		if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
 		if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err;
  		}
	}

 	/* Convert the final result from montgomery to standard format */
	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
	if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err;
	ret=1;
err:
	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
	if (powerbuf!=NULL)
		{
		OPENSSL_cleanse(powerbuf,powerbufLen);
		OPENSSL_free(powerbufFree);
		if (powerbufFree) OPENSSL_free(powerbufFree);
		}
 	if (am!=NULL) BN_clear(am);
 	if (computeTemp!=NULL) BN_clear(computeTemp);
	BN_CTX_end(ctx);
	return(ret);
	}
@@ -988,4 +1095,3 @@ err:
	bn_check_top(r);
	return(ret);
	}
+89 −17
Original line number Diff line number Diff line
@@ -124,6 +124,7 @@ static const BN_ULONG SQR_tb[16] =
    SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
#endif

#if !defined(OPENSSL_BN_ASM_GF2m)
/* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
 * result is a polynomial r with degree < 2 * BN_BITS - 1
 * The caller MUST ensure that the variables have the right amount
@@ -218,7 +219,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c
	r[2] ^= m1 ^ r[1] ^ r[3];  /* h0 ^= m1 ^ l1 ^ h1; */
	r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0;  /* l1 ^= l0 ^ h0 ^ m0; */
	}

#else
void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
#endif 

/* Add polynomials a and b and store result in r; r could be a or b, a and b 
 * could be equal; r is the bitwise XOR of a and b.
@@ -362,21 +365,17 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
int	BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
	{
	int ret = 0;
	const int max = BN_num_bits(p) + 1;
	int *arr=NULL;
	int arr[6];
	bn_check_top(a);
	bn_check_top(p);
	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
	ret = BN_GF2m_poly2arr(p, arr, max);
	if (!ret || ret > max)
	ret = BN_GF2m_poly2arr(p, arr, sizeof(arr)/sizeof(arr[0]));
	if (!ret || ret > (int)(sizeof(arr)/sizeof(arr[0])))
		{
		BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
		goto err;
		return 0;
		}
	ret = BN_GF2m_mod_arr(r, a, arr);
	bn_check_top(r);
err:
	if (arr) OPENSSL_free(arr);
	return ret;
	}

@@ -531,18 +530,18 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)

	BN_CTX_start(ctx);
	
	b = BN_CTX_get(ctx);
	c = BN_CTX_get(ctx);
	u = BN_CTX_get(ctx);
	v = BN_CTX_get(ctx);
	if (v == NULL) goto err;
	if ((b = BN_CTX_get(ctx))==NULL) goto err;
	if ((c = BN_CTX_get(ctx))==NULL) goto err;
	if ((u = BN_CTX_get(ctx))==NULL) goto err;
	if ((v = BN_CTX_get(ctx))==NULL) goto err;

	if (!BN_one(b)) goto err;
	if (!BN_GF2m_mod(u, a, p)) goto err;
	if (!BN_copy(v, p)) goto err;

	if (BN_is_zero(u)) goto err;

	if (!BN_copy(v, p)) goto err;
#if 0
	if (!BN_one(b)) goto err;

	while (1)
		{
		while (!BN_is_odd(u))
@@ -567,13 +566,86 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
		if (!BN_GF2m_add(u, u, v)) goto err;
		if (!BN_GF2m_add(b, b, c)) goto err;
		}
#else
	{
	int i,	ubits = BN_num_bits(u),
		vbits = BN_num_bits(v),	/* v is copy of p */
		top = p->top;
	BN_ULONG *udp,*bdp,*vdp,*cdp;

	bn_wexpand(u,top);	udp = u->d;
				for (i=u->top;i<top;i++) udp[i] = 0;
				u->top = top;
	bn_wexpand(b,top);	bdp = b->d;
				bdp[0] = 1;
				for (i=1;i<top;i++) bdp[i] = 0;
				b->top = top;
	bn_wexpand(c,top);	cdp = c->d;
				for (i=0;i<top;i++) cdp[i] = 0;
				c->top = top;
	vdp = v->d;	/* It pays off to "cache" *->d pointers, because
			 * it allows optimizer to be more aggressive.
			 * But we don't have to "cache" p->d, because *p
			 * is declared 'const'... */
	while (1)
		{
		while (ubits && !(udp[0]&1))
			{
			BN_ULONG u0,u1,b0,b1,mask;

			u0   = udp[0];
			b0   = bdp[0];
			mask = (BN_ULONG)0-(b0&1);
			b0  ^= p->d[0]&mask;
			for (i=0;i<top-1;i++)
				{
				u1 = udp[i+1];
				udp[i] = ((u0>>1)|(u1<<(BN_BITS2-1)))&BN_MASK2;
				u0 = u1;
				b1 = bdp[i+1]^(p->d[i+1]&mask);
				bdp[i] = ((b0>>1)|(b1<<(BN_BITS2-1)))&BN_MASK2;
				b0 = b1;
				}
			udp[i] = u0>>1;
			bdp[i] = b0>>1;
			ubits--;
			}

		if (ubits<=BN_BITS2 && udp[0]==1) break;

		if (ubits<vbits)
			{
			i = ubits; ubits = vbits; vbits = i;
			tmp = u; u = v; v = tmp;
			tmp = b; b = c; c = tmp;
			udp = vdp; vdp = v->d;
			bdp = cdp; cdp = c->d;
			}
		for(i=0;i<top;i++)
			{
			udp[i] ^= vdp[i];
			bdp[i] ^= cdp[i];
			}
		if (ubits==vbits)
			{
			bn_correct_top(u);
			ubits = BN_num_bits(u);
			}
		}
	bn_correct_top(b);
	}
#endif

	if (!BN_copy(r, b)) goto err;
	bn_check_top(r);
	ret = 1;

err:
#ifdef BN_DEBUG /* BN_CTX_end would complain about the expanded form */
        bn_correct_top(c);
        bn_correct_top(u);
        bn_correct_top(v);
#endif
  	BN_CTX_end(ctx);
	return ret;
	}
+16 −3
Original line number Diff line number Diff line
@@ -238,7 +238,7 @@ extern "C" {
#  if defined(__DECC)
#   include <c_asm.h>
#   define BN_UMULT_HIGH(a,b)	(BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
#  elif defined(__GNUC__)
#  elif defined(__GNUC__) && __GNUC__>=2
#   define BN_UMULT_HIGH(a,b)	({	\
	register BN_ULONG ret;		\
	asm ("umulh	%1,%2,%0"	\
@@ -247,7 +247,7 @@ extern "C" {
	ret;			})
#  endif	/* compiler */
# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
#  if defined(__GNUC__)
#  if defined(__GNUC__) && __GNUC__>=2
#   define BN_UMULT_HIGH(a,b)	({	\
	register BN_ULONG ret;		\
	asm ("mulhdu	%0,%1,%2"	\
@@ -257,7 +257,7 @@ extern "C" {
#  endif	/* compiler */
# elif (defined(__x86_64) || defined(__x86_64__)) && \
       (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
#  if defined(__GNUC__)
#  if defined(__GNUC__) && __GNUC__>=2
#   define BN_UMULT_HIGH(a,b)	({	\
	register BN_ULONG ret,discard;	\
	asm ("mulq	%3"		\
@@ -280,6 +280,19 @@ extern "C" {
#   define BN_UMULT_HIGH(a,b)		__umulh((a),(b))
#   define BN_UMULT_LOHI(low,high,a,b)	((low)=_umul128((a),(b),&(high)))
#  endif
# elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
#  if defined(__GNUC__) && __GNUC__>=2
#   define BN_UMULT_HIGH(a,b)	({	\
	register BN_ULONG ret;		\
	asm ("dmultu	%1,%2"		\
	     : "=h"(ret)		\
	     : "r"(a), "r"(b) : "l");	\
	ret;			})
#   define BN_UMULT_LOHI(low,high,a,b)	\
	asm ("dmultu	%2,%3"		\
	     : "=l"(low),"=h"(high)	\
	     : "r"(a), "r"(b));
#  endif
# endif		/* cpu */
#endif		/* OPENSSL_NO_ASM */

+29 −87

File changed.

Preview size limit exceeded, changes collapsed.

Loading