Commit 28feb1f8 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

md32_common.h update from HEAD.

parent 9596d1e6
Loading
Loading
Loading
Loading
+79 −294
Original line number Diff line number Diff line
/* crypto/md32_common.h */
/* ====================================================================
 * Copyright (c) 1999-2002 The OpenSSL Project.  All rights reserved.
 * Copyright (c) 1999-2007 The OpenSSL Project.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@@ -47,10 +47,6 @@
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 * ====================================================================
 *
 * This product includes cryptographic software written by Eric Young
 * (eay@cryptsoft.com).  This product includes software written by Tim
 * Hudson (tjh@cryptsoft.com).
 *
 */

/*
@@ -76,40 +72,27 @@
 *		typedef struct {
 *			...
 *			HASH_LONG	Nl,Nh;
 *			either {
 *			HASH_LONG	data[HASH_LBLOCK];
 *			unsigned char	data[HASH_CBLOCK];
 *			};
 *			unsigned int	num;
 *			...
 *			} HASH_CTX;
 *	data[] vector is expected to be zeroed upon first call to
 *	HASH_UPDATE.
 * HASH_UPDATE
 *	name of "Update" function, implemented here.
 * HASH_TRANSFORM
 *	name of "Transform" function, implemented here.
 * HASH_FINAL
 *	name of "Final" function, implemented here.
 * HASH_BLOCK_HOST_ORDER
 *	name of "block" function treating *aligned* input message
 *	in host byte order, implemented externally.
 * HASH_BLOCK_DATA_ORDER
 *	name of "block" function treating *unaligned* input message
 *	in original (data) byte order, implemented externally (it
 *	actually is optional if data and host are of the same
 *	"endianess").
 *	name of "block" function capable of treating *unaligned* input
 *	message in original (data) byte order, implemented externally.
 * HASH_MAKE_STRING
 *	macro convering context variables to an ASCII hash string.
 *
 * Optional macros:
 *
 * B_ENDIAN or L_ENDIAN
 *	defines host byte-order.
 * HASH_LONG_LOG2
 *	defaults to 2 if not states otherwise.
 * HASH_LBLOCK
 *	assumed to be HASH_CBLOCK/4 if not stated otherwise.
 * HASH_BLOCK_DATA_ORDER_ALIGNED
 *	alternative "block" function capable of treating
 *	aligned input message in original (data) order,
 *	implemented externally.
 *
 * MD5 example:
 *
 *	#define DATA_ORDER_IS_LITTLE_ENDIAN
@@ -118,11 +101,9 @@
 *	#define HASH_LONG_LOG2		MD5_LONG_LOG2
 *	#define HASH_CTX		MD5_CTX
 *	#define HASH_CBLOCK		MD5_CBLOCK
 *	#define HASH_LBLOCK		MD5_LBLOCK
 *	#define HASH_UPDATE		MD5_Update
 *	#define HASH_TRANSFORM		MD5_Transform
 *	#define HASH_FINAL		MD5_Final
 *	#define HASH_BLOCK_HOST_ORDER	md5_block_host_order
 *	#define HASH_BLOCK_DATA_ORDER	md5_block_data_order
 *
 *					<appro@fy.chalmers.se>
@@ -152,27 +133,9 @@
#error "HASH_FINAL must be defined!"
#endif

#ifndef HASH_BLOCK_HOST_ORDER
#error "HASH_BLOCK_HOST_ORDER must be defined!"
#endif

#if 0
/*
 * Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED
 * isn't defined.
 */
#ifndef HASH_BLOCK_DATA_ORDER
#error "HASH_BLOCK_DATA_ORDER must be defined!"
#endif
#endif

#ifndef HASH_LBLOCK
#define HASH_LBLOCK	(HASH_CBLOCK/4)
#endif

#ifndef HASH_LONG_LOG2
#define HASH_LONG_LOG2	2
#endif

/*
 * Engage compiler specific rotate intrinsic function if available.
@@ -206,7 +169,8 @@
				: "cc");		\
			   ret;				\
			})
#  elif defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
#  elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
	defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
#   define ROTATE(a,n)	({ register unsigned int ret;	\
				asm (			\
				"rlwinm %0,%1,%2,0,31"	\
@@ -214,80 +178,28 @@
				: "r"(a), "I"(n));	\
			   ret;				\
			})
#  elif defined(__s390x__)
#   define ROTATE(a,n) ({ register unsigned int ret;	\
				asm ("rll %0,%1,%2"	\
				: "=r"(ret)		\
				: "r"(a), "I"(n));	\
			  ret;				\
			})
#  endif
# endif
#endif /* PEDANTIC */

#if HASH_LONG_LOG2==2	/* Engage only if sizeof(HASH_LONG)== 4 */
/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
#ifdef ROTATE
/* 5 instructions with rotate instruction, else 9 */
#define REVERSE_FETCH32(a,l)	(					\
		l=*(const HASH_LONG *)(a),				\
		((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24)))	\
				)
#else
/* 6 instructions with rotate instruction, else 8 */
#define REVERSE_FETCH32(a,l)	(				\
		l=*(const HASH_LONG *)(a),			\
		l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)),	\
		ROTATE(l,16)					\
				)
/*
 * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|...
 * It's rewritten as above for two reasons:
 *	- RISCs aren't good at long constants and have to explicitely
 *	  compose 'em with several (well, usually 2) instructions in a
 *	  register before performing the actual operation and (as you
 *	  already realized:-) having same constant should inspire the
 *	  compiler to permanently allocate the only register for it;
 *	- most modern CPUs have two ALUs, but usually only one has
 *	  circuitry for shifts:-( this minor tweak inspires compiler
 *	  to schedule shift instructions in a better way...
 *
 *				<appro@fy.chalmers.se>
 */
#endif
#endif

#ifndef ROTATE
#define ROTATE(a,n)     (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
#endif

/*
 * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED
 * and HASH_BLOCK_HOST_ORDER ought to be the same if input data
 * and host are of the same "endianess". It's possible to mask
 * this with blank #define HASH_BLOCK_DATA_ORDER though...
 *
 *				<appro@fy.chalmers.se>
 */
#if defined(B_ENDIAN)
#  if defined(DATA_ORDER_IS_BIG_ENDIAN)
#    if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
#      define HASH_BLOCK_DATA_ORDER_ALIGNED	HASH_BLOCK_HOST_ORDER
#    endif
#  endif
#elif defined(L_ENDIAN)
#  if defined(DATA_ORDER_IS_LITTLE_ENDIAN)
#    if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2
#      define HASH_BLOCK_DATA_ORDER_ALIGNED	HASH_BLOCK_HOST_ORDER
#    endif
#  endif
#endif

#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
#ifndef HASH_BLOCK_DATA_ORDER
#error "HASH_BLOCK_DATA_ORDER must be defined!"
#endif
#endif

#if defined(DATA_ORDER_IS_BIG_ENDIAN)

#ifndef PEDANTIC
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
#  if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
      (defined(__x86_64) || defined(__x86_64__))
#   if !defined(B_ENDIAN)
    /*
     * This gives ~30-40% performance improvement in SHA-256 compiled
     * with gcc [on P4]. Well, first macro to be frank. We can pull
@@ -303,6 +215,11 @@
#   endif
#  endif
# endif
#endif
#if defined(__s390__) || defined(__s390x__)
# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
#endif

#ifndef HOST_c2l
#define HOST_c2l(c,l)	(l =(((unsigned long)(*((c)++)))<<24),		\
@@ -311,29 +228,6 @@
			 l|=(((unsigned long)(*((c)++)))    ),		\
			 l)
#endif
#define HOST_p_c2l(c,l,n)	{					\
			switch (n) {					\
			case 0: l =((unsigned long)(*((c)++)))<<24;	\
			case 1: l|=((unsigned long)(*((c)++)))<<16;	\
			case 2: l|=((unsigned long)(*((c)++)))<< 8;	\
			case 3: l|=((unsigned long)(*((c)++)));		\
				} }
#define HOST_p_c2l_p(c,l,sc,len) {					\
			switch (sc) {					\
			case 0: l =((unsigned long)(*((c)++)))<<24;	\
				if (--len == 0) break;			\
			case 1: l|=((unsigned long)(*((c)++)))<<16;	\
				if (--len == 0) break;			\
			case 2: l|=((unsigned long)(*((c)++)))<< 8;	\
				} }
/* NOTE the pointer is not incremented at the end of this */
#define HOST_c2l_p(c,l,n)	{					\
			l=0; (c)+=n;					\
			switch (n) {					\
			case 3: l =((unsigned long)(*(--(c))))<< 8;	\
			case 2: l|=((unsigned long)(*(--(c))))<<16;	\
			case 1: l|=((unsigned long)(*(--(c))))<<24;	\
				} }
#ifndef HOST_l2c
#define HOST_l2c(l,c)	(*((c)++)=(unsigned char)(((l)>>24)&0xff),	\
			 *((c)++)=(unsigned char)(((l)>>16)&0xff),	\
@@ -344,6 +238,18 @@

#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)

#ifndef PEDANTIC
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
#  if defined(__s390x__)
#   define HOST_c2l(c,l)	({ asm ("lrv	%0,0(%1)"		\
					:"=r"(l) : "r"(c));		\
				   (c)+=4; (l);				})
#   define HOST_l2c(l,c)	({ asm ("strv	%0,0(%1)"		\
					: : "r"(l),"r"(c) : "memory");	\
				   (c)+=4; (l);				})
#  endif
# endif
#endif
#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
# ifndef B_ENDIAN
   /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
@@ -359,29 +265,6 @@
			 l|=(((unsigned long)(*((c)++)))<<24),		\
			 l)
#endif
#define HOST_p_c2l(c,l,n)	{					\
			switch (n) {					\
			case 0: l =((unsigned long)(*((c)++)));		\
			case 1: l|=((unsigned long)(*((c)++)))<< 8;	\
			case 2: l|=((unsigned long)(*((c)++)))<<16;	\
			case 3: l|=((unsigned long)(*((c)++)))<<24;	\
				} }
#define HOST_p_c2l_p(c,l,sc,len) {					\
			switch (sc) {					\
			case 0: l =((unsigned long)(*((c)++)));		\
				if (--len == 0) break;			\
			case 1: l|=((unsigned long)(*((c)++)))<< 8;	\
				if (--len == 0) break;			\
			case 2: l|=((unsigned long)(*((c)++)))<<16;	\
				} }
/* NOTE the pointer is not incremented at the end of this */
#define HOST_c2l_p(c,l,n)	{					\
			l=0; (c)+=n;					\
			switch (n) {					\
			case 3: l =((unsigned long)(*(--(c))))<<16;	\
			case 2: l|=((unsigned long)(*(--(c))))<< 8;	\
			case 1: l|=((unsigned long)(*(--(c))));		\
				} }
#ifndef HOST_l2c
#define HOST_l2c(l,c)	(*((c)++)=(unsigned char)(((l)    )&0xff),	\
			 *((c)++)=(unsigned char)(((l)>> 8)&0xff),	\
@@ -399,9 +282,9 @@
int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
	{
	const unsigned char *data=data_;
	register HASH_LONG * p;
	register HASH_LONG l;
	size_t sw,sc,ew,ec;
	unsigned char *p;
	HASH_LONG l;
	size_t n;

	if (len==0) return 1;

@@ -413,101 +296,43 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)
	c->Nh+=(len>>29);	/* might cause compiler warning on 16-bit */
	c->Nl=l;

	if (c->num != 0)
	n = c->num;
	if (n != 0)
		{
		p=c->data;
		sw=c->num>>2;
		sc=c->num&0x03;
		p=(unsigned char *)c->data;

		if ((c->num+len) >= HASH_CBLOCK)
			{
			l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l;
			for (; sw<HASH_LBLOCK; sw++)
		if ((n+len) >= HASH_CBLOCK)
			{
				HOST_c2l(data,l); p[sw]=l;
				}
			HASH_BLOCK_HOST_ORDER (c,p,1);
			len-=(HASH_CBLOCK-c->num);
			memcpy (p+n,data,HASH_CBLOCK-n);
			HASH_BLOCK_DATA_ORDER (c,p,1);
			n      = HASH_CBLOCK-n;
			data  += n;
			len   -= n;
			c->num = 0;
			/* drop through and do the rest */
			memset (p,0,HASH_CBLOCK);	/* keep it zeroed */
			}
		else
			{
			memcpy (p+n,data,len);
			c->num += (unsigned int)len;
			if ((sc+len) < 4) /* ugly, add char's to a word */
				{
				l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l;
				}
			else
				{
				ew=(c->num>>2);
				ec=(c->num&0x03);
				if (sc)
					l=p[sw];
				HOST_p_c2l(data,l,sc);
				p[sw++]=l;
				for (; sw < ew; sw++)
					{
					HOST_c2l(data,l); p[sw]=l;
					}
				if (ec)
					{
					HOST_c2l_p(data,l,ec); p[sw]=l;
					}
				}
			return 1;
			}
		}

	sw=len/HASH_CBLOCK;
	if (sw > 0)
		{
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
		/*
		 * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined
		 * only if sizeof(HASH_LONG)==4.
		 */
		if ((((size_t)data)%4) == 0)
			{
			/* data is properly aligned so that we can cast it: */
			HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw);
			sw*=HASH_CBLOCK;
			data+=sw;
			len-=sw;
			}
		else
#if !defined(HASH_BLOCK_DATA_ORDER)
			while (sw--)
	n = len/HASH_CBLOCK;
	if (n > 0)
		{
				memcpy (p=c->data,data,HASH_CBLOCK);
				HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1);
				data+=HASH_CBLOCK;
				len-=HASH_CBLOCK;
				}
#endif
#endif
#if defined(HASH_BLOCK_DATA_ORDER)
			{
			HASH_BLOCK_DATA_ORDER(c,data,sw);
			sw*=HASH_CBLOCK;
			data+=sw;
			len-=sw;
			}
#endif
		HASH_BLOCK_DATA_ORDER (c,data,n);
		n    *= HASH_CBLOCK;
		data += n;
		len  -= n;
		}

	if (len != 0)
		{
		p = c->data;
		p = (unsigned char *)c->data;
		c->num = len;
		ew=len>>2;	/* words to copy */
		ec=len&0x03;
		for (; ew; ew--,p++)
			{
			HOST_c2l(data,l); *p=l;
			}
		HOST_c2l_p(data,l,ec);
		*p=l;
		memcpy (p,data,len);
		}
	return 1;
	}
@@ -515,73 +340,38 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len)

void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
	{
#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED)
	if ((((size_t)data)%4) == 0)
		/* data is properly aligned so that we can cast it: */
		HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1);
	else
#if !defined(HASH_BLOCK_DATA_ORDER)
		{
		memcpy (c->data,data,HASH_CBLOCK);
		HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1);
		}
#endif
#endif
#if defined(HASH_BLOCK_DATA_ORDER)
	HASH_BLOCK_DATA_ORDER (c,data,1);
#endif
	}


int HASH_FINAL (unsigned char *md, HASH_CTX *c)
	{
	register HASH_LONG *p;
	register unsigned long l;
	register int i,j;
	static const unsigned char end[4]={0x80,0x00,0x00,0x00};
	const unsigned char *cp=end;

	/* c->num should definitly have room for at least one more byte. */
	p=c->data;
	i=c->num>>2;
	j=c->num&0x03;

#if 0
	/* purify often complains about the following line as an
	 * Uninitialized Memory Read.  While this can be true, the
	 * following p_c2l macro will reset l when that case is true.
	 * This is because j&0x03 contains the number of 'valid' bytes
	 * already in p[i].  If and only if j&0x03 == 0, the UMR will
	 * occur but this is also the only time p_c2l will do
	 * l= *(cp++) instead of l|= *(cp++)
	 * Many thanks to Alex Tang <altitude@cic.net> for pickup this
	 * 'potential bug' */
#ifdef PURIFY
	if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */
#endif
	l=p[i];
#else
	l = (j==0) ? 0 : p[i];
#endif
	HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */
	unsigned char *p = (unsigned char *)c->data;
	size_t n = c->num;

	p[n] = 0x80; /* there is always room for one */
	n++;

	if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */
	if (n > (HASH_CBLOCK-8))
		{
		if (i<HASH_LBLOCK) p[i]=0;
		HASH_BLOCK_HOST_ORDER (c,p,1);
		i=0;
		memset (p+n,0,HASH_CBLOCK-n);
		n=0;
		HASH_BLOCK_DATA_ORDER (c,p,1);
		}
	for (; i<(HASH_LBLOCK-2); i++)
		p[i]=0;
	memset (p+n,0,HASH_CBLOCK-8-n);

	p += HASH_CBLOCK-8;
#if   defined(DATA_ORDER_IS_BIG_ENDIAN)
	p[HASH_LBLOCK-2]=c->Nh;
	p[HASH_LBLOCK-1]=c->Nl;
	(void)HOST_l2c(c->Nh,p);
	(void)HOST_l2c(c->Nl,p);
#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
	p[HASH_LBLOCK-2]=c->Nl;
	p[HASH_LBLOCK-1]=c->Nh;
	(void)HOST_l2c(c->Nl,p);
	(void)HOST_l2c(c->Nh,p);
#endif
	HASH_BLOCK_HOST_ORDER (c,p,1);
	p -= HASH_CBLOCK;
	HASH_BLOCK_DATA_ORDER (c,p,1);
	c->num=0;
	memset (p,0,HASH_CBLOCK);

#ifndef HASH_MAKE_STRING
#error "HASH_MAKE_STRING must be defined!"
@@ -589,11 +379,6 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c)
	HASH_MAKE_STRING(c,md);
#endif

	c->num=0;
	/* clear stuff, HASH_BLOCK may be leaving some stuff on the stack
	 * but I'm not worried :-)
	OPENSSL_cleanse((void *)c,sizeof(HASH_CTX));
	 */
	return 1;
	}

+0 −89
Original line number Diff line number Diff line
@@ -84,79 +84,6 @@ FIPS_NON_FIPS_MD_Init(MD4)
	return 1;
	}

#ifndef md4_block_host_order
void md4_block_host_order (MD4_CTX *c, const void *data, size_t num)
	{
	const MD4_LONG *X=data;
	register unsigned MD32_REG_T A,B,C,D;

	A=c->A;
	B=c->B;
	C=c->C;
	D=c->D;

	for (;num--;X+=HASH_LBLOCK)
		{
	/* Round 0 */
	R0(A,B,C,D,X[ 0], 3,0);
	R0(D,A,B,C,X[ 1], 7,0);
	R0(C,D,A,B,X[ 2],11,0);
	R0(B,C,D,A,X[ 3],19,0);
	R0(A,B,C,D,X[ 4], 3,0);
	R0(D,A,B,C,X[ 5], 7,0);
	R0(C,D,A,B,X[ 6],11,0);
	R0(B,C,D,A,X[ 7],19,0);
	R0(A,B,C,D,X[ 8], 3,0);
	R0(D,A,B,C,X[ 9], 7,0);
	R0(C,D,A,B,X[10],11,0);
	R0(B,C,D,A,X[11],19,0);
	R0(A,B,C,D,X[12], 3,0);
	R0(D,A,B,C,X[13], 7,0);
	R0(C,D,A,B,X[14],11,0);
	R0(B,C,D,A,X[15],19,0);
	/* Round 1 */
	R1(A,B,C,D,X[ 0], 3,0x5A827999L);
	R1(D,A,B,C,X[ 4], 5,0x5A827999L);
	R1(C,D,A,B,X[ 8], 9,0x5A827999L);
	R1(B,C,D,A,X[12],13,0x5A827999L);
	R1(A,B,C,D,X[ 1], 3,0x5A827999L);
	R1(D,A,B,C,X[ 5], 5,0x5A827999L);
	R1(C,D,A,B,X[ 9], 9,0x5A827999L);
	R1(B,C,D,A,X[13],13,0x5A827999L);
	R1(A,B,C,D,X[ 2], 3,0x5A827999L);
	R1(D,A,B,C,X[ 6], 5,0x5A827999L);
	R1(C,D,A,B,X[10], 9,0x5A827999L);
	R1(B,C,D,A,X[14],13,0x5A827999L);
	R1(A,B,C,D,X[ 3], 3,0x5A827999L);
	R1(D,A,B,C,X[ 7], 5,0x5A827999L);
	R1(C,D,A,B,X[11], 9,0x5A827999L);
	R1(B,C,D,A,X[15],13,0x5A827999L);
	/* Round 2 */
	R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1);
	R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1);
	R2(C,D,A,B,X[ 4],11,0x6ED9EBA1);
	R2(B,C,D,A,X[12],15,0x6ED9EBA1);
	R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1);
	R2(D,A,B,C,X[10], 9,0x6ED9EBA1);
	R2(C,D,A,B,X[ 6],11,0x6ED9EBA1);
	R2(B,C,D,A,X[14],15,0x6ED9EBA1);
	R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1);
	R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1);
	R2(C,D,A,B,X[ 5],11,0x6ED9EBA1);
	R2(B,C,D,A,X[13],15,0x6ED9EBA1);
	R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1);
	R2(D,A,B,C,X[11], 9,0x6ED9EBA1);
	R2(C,D,A,B,X[ 7],11,0x6ED9EBA1);
	R2(B,C,D,A,X[15],15,0x6ED9EBA1);

	A = c->A += A;
	B = c->B += B;
	C = c->C += C;
	D = c->D += D;
		}
	}
#endif

#ifndef md4_block_data_order
#ifdef X
#undef X
@@ -242,19 +169,3 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num)
		}
	}
#endif

#ifdef undef
int printit(unsigned long *l)
	{
	int i,ii;

	for (i=0; i<2; i++)
		{
		for (ii=0; ii<8; ii++)
			{
			fprintf(stderr,"%08lx ",l[i*8+ii]);
			}
		fprintf(stderr,"\n");
		}
	}
#endif
+0 −44
Original line number Diff line number Diff line
@@ -65,43 +65,13 @@
#define MD4_LONG_LOG2 2 /* default to 32 bits */
#endif

void md4_block_host_order (MD4_CTX *c, const void *p,size_t num);
void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);

#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__)
# if !defined(B_ENDIAN)
/*
 * *_block_host_order is expected to handle aligned data while
 * *_block_data_order - unaligned. As algorithm and host (x86)
 * are in this case of the same "endianness" these two are
 * otherwise indistinguishable. But normally you don't want to
 * call the same function because unaligned access in places
 * where alignment is expected is usually a "Bad Thing". Indeed,
 * on RISCs you get punished with BUS ERROR signal or *severe*
 * performance degradation. Intel CPUs are in turn perfectly
 * capable of loading unaligned data without such drastic side
 * effect. Yes, they say it's slower than aligned load, but no
 * exception is generated and therefore performance degradation
 * is *incomparable* with RISCs. What we should weight here is
 * costs of unaligned access against costs of aligning data.
 * According to my measurements allowing unaligned access results
 * in ~9% performance improvement on Pentium II operating at
 * 266MHz. I won't be surprised if the difference will be higher
 * on faster systems:-)
 *
 *				<appro@fy.chalmers.se>
 */
# define md4_block_data_order md4_block_host_order
# endif
#endif

#define DATA_ORDER_IS_LITTLE_ENDIAN

#define HASH_LONG		MD4_LONG
#define HASH_LONG_LOG2		MD4_LONG_LOG2
#define HASH_CTX		MD4_CTX
#define HASH_CBLOCK		MD4_CBLOCK
#define HASH_LBLOCK		MD4_LBLOCK
#define HASH_UPDATE		MD4_Update
#define HASH_TRANSFORM		MD4_Transform
#define HASH_FINAL		MD4_Final
@@ -112,21 +82,7 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num);
	ll=(c)->C; HOST_l2c(ll,(s));	\
	ll=(c)->D; HOST_l2c(ll,(s));	\
	} while (0)
#define HASH_BLOCK_HOST_ORDER	md4_block_host_order
#if !defined(L_ENDIAN) || defined(md4_block_data_order)
#define	HASH_BLOCK_DATA_ORDER	md4_block_data_order
/*
 * Little-endians (Intel and Alpha) feel better without this.
 * It looks like memcpy does better job than generic
 * md4_block_data_order on copying-n-aligning input data.
 * But frankly speaking I didn't expect such result on Alpha.
 * On the other hand I've got this with egcs-1.0.2 and if
 * program is compiled with another (better?) compiler it
 * might turn out other way around.
 *
 *				<appro@fy.chalmers.se>
 */
#endif

#include "md32_common.h"

+1 −1
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ $X="esi";
 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9,	# R3
 );

&md5_block("md5_block_asm_host_order");
&md5_block("md5_block_asm_data_order");
&asm_finish();

sub Np

crypto/md5/asm/md5-sparcv9.S

deleted100644 → 0
+0 −1031

File deleted.

Preview size limit exceeded, changes collapsed.

Loading