Commit ca04d7a2 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

Leave the decision to call/implement bn_sqr_mont to assembler developer.

parent 40a3c123
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -729,8 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num);
	bn_pollute(a); \
	}

void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num);
void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num);
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num);
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
void     bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
+26 −19
Original line number Diff line number Diff line
@@ -831,13 +831,14 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
#ifdef OPENSSL_BN_ASM_MONT
/*
 * This is essentially reference implementation, which may or may not
 * result in performance improvement. E.g. on IA-32 this does give 40%
 * faster rsa1024 private key operations and 10% faster rsa4096 ones,
 * while on AMD64 it improves rsa1024 sign only by 10% and *worsens*
 * rsa4096 sign by 15%. Once again, it's a reference implementation,
 * one to be used as start-point for platform-specific assembler.
 * result in performance improvement. E.g. on IA-32 this routine was
 * observed to give 40% faster rsa1024 private key operations and 10%
 * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
 * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
 * reference implementation, one to be used as start-point for
 * platform-specific assembler.
 */
void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
	{
	BN_ULONG c0,c1,ml,*tp;
#ifdef mul64
@@ -846,6 +847,9 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_
	volatile BN_ULONG *vp;
	int i=0,j;

#if 0	/* template for platform-specific implementation */
	if (ap==bp)	return bn_sqr_mont(rp,ap,np,n0,num);
#endif
	vp = tp = alloca((num+2)*sizeof(BN_ULONG));

	tp[num]   = bn_mul_words(tp,ap,num,bp[0]);
@@ -890,18 +894,22 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_
		if (tp[num]!=0 || c0==0)
			{
			for(i=0;i<num+2;i++)	vp[i] = 0;
			return;
			return 1;
			}
		}
	for(i=0;i<num;i++)	rp[i] = tp[i],	vp[i] = 0;
	vp[num]   = 0;
	vp[num+1] = 0;
	return 1;
	}

void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num)
	{
	bn_mul_mont(rp,ap,ap,np,n0,num);
	}
#else
/*
 * Return value of 0 indicates that multiplication/convolution was not
 * performed to signal the caller to fall down to alternative/original
 * code-path.
 */
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
{	return 0;	}
#endif /* OPENSSL_BN_ASM_MONT */

#else /* !BN_MUL_COMBA */
@@ -942,7 +950,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
	}

#ifdef OPENSSL_BN_ASM_MONT
void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
	{
	BN_ULONG c0,c1,*tp;
	volatile BN_ULONG *vp;
@@ -972,18 +980,17 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_
		if (tp[num]!=0 || c0==0)
			{
			for(i=0;i<num+2;i++)	vp[i] = 0;
			return;
			return 1;
			}
		}
	for(i=0;i<num;i++)	rp[i] = tp[i],	vp[i] = 0;
	vp[num]   = 0;
	vp[num+1] = 0;
	return 1;
	}

void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num)
	{
	bn_mul_mont(rp,ap,ap,np,n0,num);
	}
#else
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
{	return 0;	}
#endif /* OPENSSL_BN_ASM_MONT */

#endif /* !BN_MUL_COMBA */
+7 −8
Original line number Diff line number Diff line
@@ -80,15 +80,14 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
	if (num>1 && a->top==num && b->top==num)
		{
		if (bn_wexpand(r,num) == NULL) return 0;
		if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
			{
			r->neg = a->neg^b->neg;
			r->top = num;
		if (a==b)
			bn_sqr_mont(r->d,a->d,mont->N.d,mont->n0,num);
		else
			bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num);
			bn_fix_top(r);
			return 1;
			}
		}
#endif

	BN_CTX_start(ctx);