Kill unused macro and reimplement it for that single context it can (a2eb9688) · Commits · CYBER - Cyber Security / TS 103 523 MSP / TLMSP / TLMSP OpenSSL

crypto/md32_common.h

+36 −53

Original line number	Diff line number	Diff line
		@@ -195,7 +195,6 @@
		* Some GNU C inline assembler templates. Note that these are
		* rotates by constant number of bits! But that's exactly
		* what we need here...
		*
		* <appro@fy.chalmers.se>
		*/
		# if defined(__i386) \|\| defined(__i386__) \|\| defined(__x86_64) \|\| defined(__x86_64__)
		@@ -217,39 +216,6 @@
		})
		# endif
		# endif

		/*
		* Engage compiler specific "fetch in reverse byte order"
		* intrinsic function if available.
		*/
		# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
		/* some GNU C inline assembler templates by <appro@fy.chalmers.se> */
		# if (defined(__i386) \|\| defined(__i386__) \|\| defined(__x86_64) \|\| defined(__x86_64__)) && !defined(I386_ONLY)
		# define BE_FETCH32(a) ({ register unsigned int l=(a);\
		asm ( \
		"bswapl %0" \
		: "=r"(l) : "0"(l)); \
		l; \
		})
		# elif defined(__powerpc)
		# define LE_FETCH32(a) ({ register unsigned int l; \
		asm ( \
		"lwbrx %0,0,%1" \
		: "=r"(l) \
		: "r"(a)); \
		l; \
		})

		# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC)
		# define LE_FETCH32(a) ({ register unsigned int l; \
		asm ( \
		"lda [%1]#ASI_PRIMARY_LITTLE,%0"\
		: "=r"(l) \
		: "r"(a)); \
		l; \
		})
		# endif
		# endif
		#endif /* PEDANTIC */

		#if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */
		@@ -301,28 +267,12 @@
		# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
		# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
		# endif
		# elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
		# ifndef HOST_FETCH32
		# ifdef LE_FETCH32
		# define HOST_FETCH32(p,l) LE_FETCH32(p)
		# elif defined(REVERSE_FETCH32)
		# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
		# endif
		# endif
		# endif
		#elif defined(L_ENDIAN)
		# if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
		# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
		# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER
		# endif
		# elif defined(DATA_ORDER_IS_BIG_ENDIAN)
		# ifndef HOST_FETCH32
		# ifdef BE_FETCH32
		# define HOST_FETCH32(p,l) BE_FETCH32(p)
		# elif defined(REVERSE_FETCH32)
		# define HOST_FETCH32(p,l) REVERSE_FETCH32(p,l)
		# endif
		# endif
		# endif
		#endif

		@@ -334,11 +284,32 @@

		#if defined(DATA_ORDER_IS_BIG_ENDIAN)

		#ifndef PEDANTIC
		# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
		# if defined(__i386) \|\| defined(__i386__) \|\| defined(__x86_64) \|\| defined(__x86_64__)
		/*
		* This gives ~30-40% performance improvement in SHA-256 compiled
		* with gcc [on P4]. Well, first macro to be frank. We can pull
		* this trick on x86* platforms only, because these CPUs can fetch
		* unaligned data without raising an exception.
		*/
		# define HOST_c2l(c,l) ({ (l)=((const unsigned int )(c)); \
		asm ("bswapl %0":"=r"(l):"0"(l)); \
		(c)+=4; (l); })
		# define HOST_l2c(l,c) ({ unsigned int r=(l); \
		asm ("bswapl %0":"=r"(r):"0"(r)); \
		((unsigned int )(c))=r; (c)+=4; r; })
		# endif
		# endif
		#endif

		#ifndef HOST_c2l
		#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
		l\|=(((unsigned long)(*((c)++)))<<16), \
		l\|=(((unsigned long)(*((c)++)))<< 8), \
		l\|=(((unsigned long)(*((c)++))) ), \
		l)
		#endif
		#define HOST_p_c2l(c,l,n) { \
		switch (n) { \
		case 0: l =((unsigned long)(*((c)++)))<<24; \
		@@ -362,19 +333,29 @@
		case 2: l\|=((unsigned long)(*(--(c))))<<16; \
		case 1: l\|=((unsigned long)(*(--(c))))<<24; \
		} }
		#ifndef HOST_l2c
		#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
		*((c)++)=(unsigned char)(((l)>>16)&0xff), \
		*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
		*((c)++)=(unsigned char)(((l) )&0xff), \
		l)
		#endif

		#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)

		#if defined(__i386) \|\| defined(__i386__) \|\| defined(__x86_64) \|\| defined(__x86_64__)
		/* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
		# define HOST_c2l(c,l) ((l)=((const unsigned int )(c)), (c)+=4, l)
		# define HOST_l2c(l,c) (((unsigned int )(c))=(l), (c)+=4, l)
		#endif

		#ifndef HOST_c2l
		#define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \
		l\|=(((unsigned long)(*((c)++)))<< 8), \
		l\|=(((unsigned long)(*((c)++)))<<16), \
		l\|=(((unsigned long)(*((c)++)))<<24), \
		l)
		#endif
		#define HOST_p_c2l(c,l,n) { \
		switch (n) { \
		case 0: l =((unsigned long)(*((c)++))); \
		@@ -398,11 +379,13 @@
		case 2: l\|=((unsigned long)(*(--(c))))<< 8; \
		case 1: l\|=((unsigned long)(*(--(c)))); \
		} }
		#ifndef HOST_l2c
		#define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
		*((c)++)=(unsigned char)(((l)>> 8)&0xff), \
		*((c)++)=(unsigned char)(((l)>>16)&0xff), \
		*((c)++)=(unsigned char)(((l)>>24)&0xff), \
		l)
		#endif

		#endif

		@@ -415,7 +398,7 @@ int HASH_UPDATE (HASH_CTX c, const void data_, size_t len)
		const unsigned char *data=data_;
		register HASH_LONG * p;
		register HASH_LONG l;
		int sw,sc,ew,ec;
		unsigned int sw,sc,ew,ec;

		if (len==0) return 1;

		@@ -481,7 +464,7 @@ int HASH_UPDATE (HASH_CTX c, const void data_, size_t len)
		* Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
		* only if sizeof(HASH_LONG)==4.
		*/
		if ((((unsigned long)data)%4) == 0)
		if ((((size_t)data)%4) == 0)
		{
		/* data is properly aligned so that we can cast it: */
		HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw);
		@@ -530,7 +513,7 @@ int HASH_UPDATE (HASH_CTX c, const void data_, size_t len)
		void HASH_TRANSFORM (HASH_CTX c, const unsigned char data)
		{
		#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
		if ((((unsigned long)data)%4) == 0)
		if ((((size_t)data)%4) == 0)
		/* data is properly aligned so that we can cast it: */
		HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1);
		else

crypto/sha/asm/sha512-sse2.pl

+2 −2

Original line number	Diff line number	Diff line
		@@ -21,11 +21,11 @@
		# Throughput performance in MBps (larger is better):
		#
		# 2.4GHz P4 1.4GHz AMD32 1.4GHz AMD64(*)
		# SHA256/gcc(*) 39 42 59
		# SHA256/gcc(*) 54 43 59
		# SHA512/gcc 17 23 92
		# SHA512/sse2 54() 55()
		# SHA512/icc 26 28
		# SHA256/icc(*) 64 54
		# SHA256/icc(*) 65 54
		#
		# (*) AMD64 and SHA256 numbers are presented mostly for amusement or
		# reference purposes.