Commit 4a37c487 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

I implemented this when troubleshooting performance problem on SPARC Solaris.

As there is an apparent interest for optimization for footprint, I figured
that this can eventually become useful.
parent ad492c36
Loading
Loading
Loading
Loading
+126 −0
Original line number Diff line number Diff line
@@ -168,6 +168,8 @@ int HASH_INIT (SHA_CTX *c)
#define F_40_59(b,c,d)	(((b) & (c)) | (((b)|(c)) & (d))) 
#define	F_60_79(b,c,d)	F_20_39(b,c,d)

#ifndef OPENSSL_SMALL_FOOTPRINT

#define BODY_00_15(i,a,b,c,d,e,f,xi) \
	(f)=xi+(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
	(b)=ROTATE((b),30);
@@ -470,3 +472,127 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, int num)
		}
	}
#endif

#else	/* OPENSSL_SMALL_FOOTPRINT */

#define BODY_00_15(xi)		 do {	\
	T=E+K_00_19+F_00_19(B,C,D);	\
	E=D, D=C, C=ROTATE(B,30), B=A;	\
	A=ROTATE(A,5)+T+xi;	    } while(0)

#define BODY_16_19(xa,xb,xc,xd)	 do {	\
	Xupdate(T,xa,xa,xb,xc,xd);	\
	T+=E+K_00_19+F_00_19(B,C,D);	\
	E=D, D=C, C=ROTATE(B,30), B=A;	\
	A=ROTATE(A,5)+T;	    } while(0)

#define BODY_20_39(xa,xb,xc,xd)	 do {	\
	Xupdate(T,xa,xa,xb,xc,xd);	\
	T+=E+K_20_39+F_20_39(B,C,D);	\
	E=D, D=C, C=ROTATE(B,30), B=A;	\
	A=ROTATE(A,5)+T;	    } while(0)

#define BODY_40_59(xa,xb,xc,xd)	 do {	\
	Xupdate(T,xa,xa,xb,xc,xd);	\
	T+=E+K_40_59+F_40_59(B,C,D);	\
	E=D, D=C, C=ROTATE(B,30), B=A;	\
	A=ROTATE(A,5)+T;	    } while(0)

#define BODY_60_79(xa,xb,xc,xd)	 do {	\
	Xupdate(T,xa,xa,xb,xc,xd);	\
	T=E+K_60_79+F_60_79(B,C,D);	\
	E=D, D=C, C=ROTATE(B,30), B=A;	\
	A=ROTATE(A,5)+T+xa;	    } while(0)

#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER
void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, int num)
	{
	const SHA_LONG *W=d;
	register unsigned MD32_REG_T A,B,C,D,E,T;
	int i;
	SHA_LONG	X[16];

	A=c->h0;
	B=c->h1;
	C=c->h2;
	D=c->h3;
	E=c->h4;

	for (;;)
		{
	for (i=0;i<16;i++)
	{ X[i]=W[i]; BODY_00_15(X[i]); }
	for (i=0;i<4;i++)
	{ BODY_16_19(X[i],       X[i+2],      X[i+8],     X[(i+13)&15]); }
	for (;i<24;i++)
	{ BODY_20_39(X[i&15],    X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
	for (i=0;i<20;i++)
	{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }
	for (i=4;i<24;i++)
	{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }
	
	c->h0=(c->h0+A)&0xffffffffL; 
	c->h1=(c->h1+B)&0xffffffffL;
	c->h2=(c->h2+C)&0xffffffffL;
	c->h3=(c->h3+D)&0xffffffffL;
	c->h4=(c->h4+E)&0xffffffffL;

	if (--num <= 0) break;

	A=c->h0;
	B=c->h1;
	C=c->h2;
	D=c->h3;
	E=c->h4;

	W+=SHA_LBLOCK;
		}
	}
#endif

#ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER
void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, int num)
	{
	const unsigned char *data=p;
	register unsigned MD32_REG_T A,B,C,D,E,T,l;
	int i;
	SHA_LONG	X[16];

	A=c->h0;
	B=c->h1;
	C=c->h2;
	D=c->h3;
	E=c->h4;

	for (;;)
		{
	for (i=0;i<16;i++)
	{ HOST_c2l(data,l); X[i]=l; BODY_00_15(X[i]); }
	for (i=0;i<4;i++)
	{ BODY_16_19(X[i],       X[i+2],      X[i+8],     X[(i+13)&15]); }
	for (;i<24;i++)
	{ BODY_20_39(X[i&15],    X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
	for (i=0;i<20;i++)
	{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }
	for (i=4;i<24;i++)
	{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }

	c->h0=(c->h0+A)&0xffffffffL; 
	c->h1=(c->h1+B)&0xffffffffL;
	c->h2=(c->h2+C)&0xffffffffL;
	c->h3=(c->h3+D)&0xffffffffL;
	c->h4=(c->h4+E)&0xffffffffL;

	if (--num <= 0) break;

	A=c->h0;
	B=c->h1;
	C=c->h2;
	D=c->h3;
	E=c->h4;

		}
	}
#endif

#endif