Loading crypto/bn/bn_asm.c +28 −0 Original line number Diff line number Diff line Loading @@ -459,6 +459,34 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) #elif defined(BN_UMULT_LOHI) #define mul_add_c(a,b,c0,c1,c2) { \ BN_ULONG ta=(a),tb=(b); \ BN_UMULT_LOHI(t1,t2,ta,tb); \ c0 += t1; t2 += (c0<t1)?1:0; \ c1 += t2; c2 += (c1<t2)?1:0; \ } #define mul_add_c2(a,b,c0,c1,c2) { \ BN_ULONG ta=(a),tb=(b),t0; \ BN_UMULT_LOHI(t0,t1,ta,tb); \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c1 += t2; c2 += (c1<t2)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ BN_ULONG ta=(a)[i]; \ BN_UMULT_LOHI(t1,t2,ta,ta); \ c0 += t1; t2 += (c0<t1)?1:0; \ c1 += t2; c2 += (c1<t2)?1:0; \ } #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) #elif defined(BN_UMULT_HIGH) #define mul_add_c(a,b,c0,c1,c2) { \ Loading crypto/bn/bn_lcl.h +36 −0 Original line number Diff line number Diff line Loading @@ -270,6 +270,15 @@ extern "C" { : "a"(a),"g"(b) \ : "cc"); # endif # elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT) # if defined(_MSC_VER) && _MSC_VER>=1400 unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b); unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b, unsigned __int64 *h); # pragma intrinsic(__umulh,_umul128) # define BN_UMULT_HIGH(a,b) __umulh((a),(b)) # define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) # endif # endif /* cpu */ #endif /* OPENSSL_NO_ASM */ Loading Loading @@ -313,6 +322,33 @@ extern "C" { (r1)=Hw(t); \ } #elif defined(BN_UMULT_LOHI) #define mul_add(r,a,w,c) { \ BN_ULONG high,low,ret,tmp=(a); \ ret = (r); \ BN_UMULT_LOHI(low,high,w,tmp); \ ret += (c); \ (c) = (ret<(c))?1:0; \ (c) += high; \ ret += low; \ (c) += (ret<low)?1:0; \ (r) = ret; \ } #define mul(r,a,w,c) { \ BN_ULONG high,low,ret,ta=(a); \ BN_UMULT_LOHI(low,high,w,ta); \ ret = low + (c); \ (c) = high; \ (c) += (ret<low)?1:0; \ (r) = ret; \ } #define sqr(r0,r1,a) { \ BN_ULONG tmp=(a); \ BN_UMULT_LOHI(r0,r1,tmp,tmp); \ } #elif defined(BN_UMULT_HIGH) #define mul_add(r,a,w,c) { \ BN_ULONG high,low,ret,tmp=(a); \ Loading Loading
crypto/bn/bn_asm.c +28 −0 Original line number Diff line number Diff line Loading @@ -459,6 +459,34 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) #elif defined(BN_UMULT_LOHI) #define mul_add_c(a,b,c0,c1,c2) { \ BN_ULONG ta=(a),tb=(b); \ BN_UMULT_LOHI(t1,t2,ta,tb); \ c0 += t1; t2 += (c0<t1)?1:0; \ c1 += t2; c2 += (c1<t2)?1:0; \ } #define mul_add_c2(a,b,c0,c1,c2) { \ BN_ULONG ta=(a),tb=(b),t0; \ BN_UMULT_LOHI(t0,t1,ta,tb); \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c1 += t2; c2 += (c1<t2)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ BN_ULONG ta=(a)[i]; \ BN_UMULT_LOHI(t1,t2,ta,ta); \ c0 += t1; t2 += (c0<t1)?1:0; \ c1 += t2; c2 += (c1<t2)?1:0; \ } #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) #elif defined(BN_UMULT_HIGH) #define mul_add_c(a,b,c0,c1,c2) { \ Loading
crypto/bn/bn_lcl.h +36 −0 Original line number Diff line number Diff line Loading @@ -270,6 +270,15 @@ extern "C" { : "a"(a),"g"(b) \ : "cc"); # endif # elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT) # if defined(_MSC_VER) && _MSC_VER>=1400 unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b); unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b, unsigned __int64 *h); # pragma intrinsic(__umulh,_umul128) # define BN_UMULT_HIGH(a,b) __umulh((a),(b)) # define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) # endif # endif /* cpu */ #endif /* OPENSSL_NO_ASM */ Loading Loading @@ -313,6 +322,33 @@ extern "C" { (r1)=Hw(t); \ } #elif defined(BN_UMULT_LOHI) #define mul_add(r,a,w,c) { \ BN_ULONG high,low,ret,tmp=(a); \ ret = (r); \ BN_UMULT_LOHI(low,high,w,tmp); \ ret += (c); \ (c) = (ret<(c))?1:0; \ (c) += high; \ ret += low; \ (c) += (ret<low)?1:0; \ (r) = ret; \ } #define mul(r,a,w,c) { \ BN_ULONG high,low,ret,ta=(a); \ BN_UMULT_LOHI(low,high,w,ta); \ ret = low + (c); \ (c) = high; \ (c) += (ret<low)?1:0; \ (r) = ret; \ } #define sqr(r0,r1,a) { \ BN_ULONG tmp=(a); \ BN_UMULT_LOHI(r0,r1,tmp,tmp); \ } #elif defined(BN_UMULT_HIGH) #define mul_add(r,a,w,c) { \ BN_ULONG high,low,ret,tmp=(a); \ Loading