Commit 1cbdbcd5 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

x86_64 performance patch.

parent 7e201e9f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -391,7 +391,7 @@ my %table=(
"linux-s390",	"gcc:-DB_ENDIAN -DTERMIO -DNO_ASM -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-s390x",	"gcc:-DB_ENDIAN -DTERMIO -DNO_ASM -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64",   "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK RC4_CHAR:asm/ia64.o:::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-x86_64", "gcc:-DL_ENDIAN -DNO_ASM ::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall -DMD32_REG_T=int::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK RC4_CHAR BF_PTR2 DES_INT DES_UNROLL:asm/x86_64-gcc.o:::::::::dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-sparc",	"gcc:-DTERMIOS -O3 -fomit-frame-pointer -mv8 -Wall -DB_ENDIAN::(unknown):::BN_LLONG MD2_CHAR RC4_INDEX DES_UNROLL::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-m68",	"gcc:-DTERMIOS -O3 -fomit-frame-pointer -Wall -DB_ENDIAN::(unknown):::BN_LLONG MD2_CHAR RC4_INDEX DES_UNROLL::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-x86",	"gcc:-DTERMIOS -O3 -fomit-frame-pointer -m486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+1 −4
Original line number Diff line number Diff line
@@ -274,10 +274,7 @@ do_gnu-shared:
	libs="$$libs -l$$i"; \
	done

DETECT_GNU_LD=${CC} -v 2>&1 | grep '^gcc' >/dev/null 2>&1 && \
	my_ld=`${CC} -print-prog-name=ld 2>&1` && \
	[ -n "$$my_ld" ] && \
	$$my_ld -v 2>&1 | grep 'GNU ld' >/dev/null 2>&1
DETECT_GNU_LD=(${CC} -Wl,-V /dev/null 2>&1 | grep '^GNU ld' )>/dev/null

# For Darwin AKA Mac OS/X (dyld)
do_darwin-shared: 
+2 −0
Original line number Diff line number Diff line
@@ -136,6 +136,8 @@ asm/ia64-cpp.o: asm/ia64.S
	$(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s;	\
	rm -f /tmp/ia64.$$$$.s

asm/x86_64-gcc.o: asm/x86_64-gcc.c

files:
	$(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO

+17 −1
Original line number Diff line number Diff line
@@ -150,6 +150,20 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
	    q;					\
	})
#  define REMAINDER_IS_ALREADY_CALCULATED
#  elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
   /*
    * Same story here, but it's 128-bit by 64-bit division. Wow!
    *					<appro@fy.chalmers.se>
    */
#  define bn_div_words(n0,n1,d0)		\
	({  asm volatile (			\
		"divq	%4"			\
		: "=a"(q), "=d"(rem)		\
		: "a"(n1), "d"(n0), "g"(d0)	\
		: "cc");			\
	    q;					\
	})
#  define REMAINDER_IS_ALREADY_CALCULATED
#  endif /* __<cpu> */
# endif /* __GNUC__ */
#endif /* OPENSSL_NO_ASM */
@@ -296,7 +310,9 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
			rem=(n1-q*d0)&BN_MASK2;
#endif

#ifdef BN_UMULT_HIGH
#if defined(BN_UMULT_LOHI)
			BN_UMULT_LOHI(t2l,t2h,d1,q);
#elif defined(BN_UMULT_HIGH)
			t2l = d1 * q;
			t2h = BN_UMULT_HIGH(d1,q);
#else
+16 −1
Original line number Diff line number Diff line
@@ -230,6 +230,21 @@ struct bignum_ctx
	     : "r"(a), "r"(b));		\
	ret;			})
#  endif	/* compiler */
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
#  if defined(__GNUC__)
#   define BN_UMULT_HIGH(a,b)	({	\
	register BN_ULONG ret,discard;	\
	asm ("mulq	%3"		\
	     : "=a"(discard),"=d"(ret)	\
	     : "a"(a), "g"(b)		\
	     : "cc");			\
	ret;			})
#   define BN_UMULT_LOHI(low,high,a,b)	\
	asm ("mulq	%3"		\
		: "=a"(low),"=d"(high)	\
		: "a"(a),"g"(b)		\
		: "cc");
#  endif
# endif		/* cpu */
#endif		/* OPENSSL_NO_ASM */

@@ -337,7 +352,7 @@ struct bignum_ctx

#define LBITS(a)	((a)&BN_MASK2l)
#define HBITS(a)	(((a)>>BN_BITS4)&BN_MASK2l)
#define	L2HBITS(a)	((BN_ULONG)((a)&BN_MASK2l)<<BN_BITS4)
#define	L2HBITS(a)	(((a)<<BN_BITS4)&BN_MASK2)

#define LLBITS(a)	((a)&BN_MASKl)
#define LHBITS(a)	(((a)>>BN_BITS2)&BN_MASKl)
Loading