Loading crypto/bn/bn.h +1 −2 Original line number Diff line number Diff line Loading @@ -729,8 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); bn_pollute(a); \ } void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num); int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); Loading crypto/bn/bn_asm.c +26 −19 Original line number Diff line number Diff line Loading @@ -831,13 +831,14 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) #ifdef OPENSSL_BN_ASM_MONT /* * This is essentially reference implementation, which may or may not * result in performance improvement. E.g. on IA-32 this does give 40% * faster rsa1024 private key operations and 10% faster rsa4096 ones, * while on AMD64 it improves rsa1024 sign only by 10% and *worsens* * rsa4096 sign by 15%. Once again, it's a reference implementation, * one to be used as start-point for platform-specific assembler. * result in performance improvement. E.g. on IA-32 this routine was * observed to give 40% faster rsa1024 private key operations and 10% * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a * reference implementation, one to be used as start-point for * platform-specific assembler. */ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { BN_ULONG c0,c1,ml,*tp; #ifdef mul64 Loading @@ -846,6 +847,9 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ volatile BN_ULONG *vp; int i=0,j; #if 0 /* template for platform-specific implementation */ if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num); #endif vp = tp = alloca((num+2)*sizeof(BN_ULONG)); tp[num] = bn_mul_words(tp,ap,num,bp[0]); Loading Loading @@ -890,18 +894,22 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ if (tp[num]!=0 || c0==0) { for(i=0;i<num+2;i++) vp[i] = 0; return; return 1; } } for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; vp[num] = 0; vp[num+1] = 0; return 1; } void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num) { bn_mul_mont(rp,ap,ap,np,n0,num); } #else /* * Return value of 0 indicates that multiplication/convolution was not * performed to signal the caller to fall down to alternative/original * code-path. */ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { return 0; } #endif /* OPENSSL_BN_ASM_MONT */ #else /* !BN_MUL_COMBA */ Loading Loading @@ -942,7 +950,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) } #ifdef OPENSSL_BN_ASM_MONT void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { BN_ULONG c0,c1,*tp; volatile BN_ULONG *vp; Loading Loading @@ -972,18 +980,17 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ if (tp[num]!=0 || c0==0) { for(i=0;i<num+2;i++) vp[i] = 0; return; return 1; } } for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; vp[num] = 0; vp[num+1] = 0; return 1; } void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num) { bn_mul_mont(rp,ap,ap,np,n0,num); } #else int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { return 0; } #endif /* OPENSSL_BN_ASM_MONT */ #endif /* !BN_MUL_COMBA */ crypto/bn/bn_mont.c +7 −8 Original line number Diff line number Diff line Loading @@ -80,15 +80,14 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, if (num>1 && a->top==num && b->top==num) { if (bn_wexpand(r,num) == NULL) return 0; if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) { r->neg = a->neg^b->neg; r->top = num; if (a==b) bn_sqr_mont(r->d,a->d,mont->N.d,mont->n0,num); else bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num); bn_fix_top(r); return 1; } } #endif BN_CTX_start(ctx); Loading Loading
crypto/bn/bn.h +1 −2 Original line number Diff line number Diff line Loading @@ -729,8 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); bn_pollute(a); \ } void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num); int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); Loading
crypto/bn/bn_asm.c +26 −19 Original line number Diff line number Diff line Loading @@ -831,13 +831,14 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) #ifdef OPENSSL_BN_ASM_MONT /* * This is essentially reference implementation, which may or may not * result in performance improvement. E.g. on IA-32 this does give 40% * faster rsa1024 private key operations and 10% faster rsa4096 ones, * while on AMD64 it improves rsa1024 sign only by 10% and *worsens* * rsa4096 sign by 15%. Once again, it's a reference implementation, * one to be used as start-point for platform-specific assembler. * result in performance improvement. E.g. on IA-32 this routine was * observed to give 40% faster rsa1024 private key operations and 10% * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a * reference implementation, one to be used as start-point for * platform-specific assembler. */ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { BN_ULONG c0,c1,ml,*tp; #ifdef mul64 Loading @@ -846,6 +847,9 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ volatile BN_ULONG *vp; int i=0,j; #if 0 /* template for platform-specific implementation */ if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num); #endif vp = tp = alloca((num+2)*sizeof(BN_ULONG)); tp[num] = bn_mul_words(tp,ap,num,bp[0]); Loading Loading @@ -890,18 +894,22 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ if (tp[num]!=0 || c0==0) { for(i=0;i<num+2;i++) vp[i] = 0; return; return 1; } } for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; vp[num] = 0; vp[num+1] = 0; return 1; } void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num) { bn_mul_mont(rp,ap,ap,np,n0,num); } #else /* * Return value of 0 indicates that multiplication/convolution was not * performed to signal the caller to fall down to alternative/original * code-path. */ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { return 0; } #endif /* OPENSSL_BN_ASM_MONT */ #else /* !BN_MUL_COMBA */ Loading Loading @@ -942,7 +950,7 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) } #ifdef OPENSSL_BN_ASM_MONT void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { BN_ULONG c0,c1,*tp; volatile BN_ULONG *vp; Loading Loading @@ -972,18 +980,17 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ if (tp[num]!=0 || c0==0) { for(i=0;i<num+2;i++) vp[i] = 0; return; return 1; } } for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; vp[num] = 0; vp[num+1] = 0; return 1; } void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num) { bn_mul_mont(rp,ap,ap,np,n0,num); } #else int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { return 0; } #endif /* OPENSSL_BN_ASM_MONT */ #endif /* !BN_MUL_COMBA */
crypto/bn/bn_mont.c +7 −8 Original line number Diff line number Diff line Loading @@ -80,15 +80,14 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, if (num>1 && a->top==num && b->top==num) { if (bn_wexpand(r,num) == NULL) return 0; if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) { r->neg = a->neg^b->neg; r->top = num; if (a==b) bn_sqr_mont(r->d,a->d,mont->N.d,mont->n0,num); else bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num); bn_fix_top(r); return 1; } } #endif BN_CTX_start(ctx); Loading