Loading crypto/md32_common.h +50 −293 Original line number Diff line number Diff line /* crypto/md32_common.h */ /* ==================================================================== * Copyright (c) 1999-2002 The OpenSSL Project. All rights reserved. * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions Loading Loading @@ -47,10 +47,6 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * * This product includes cryptographic software written by Eric Young * (eay@cryptsoft.com). This product includes software written by Tim * Hudson (tjh@cryptsoft.com). * */ /* Loading @@ -76,40 +72,27 @@ * typedef struct { * ... * HASH_LONG Nl,Nh; * either { * HASH_LONG data[HASH_LBLOCK]; * unsigned char data[HASH_CBLOCK]; * }; * unsigned int num; * ... * } HASH_CTX; * data[] vector is expected to be zeroed upon first call to * HASH_UPDATE. * HASH_UPDATE * name of "Update" function, implemented here. * HASH_TRANSFORM * name of "Transform" function, implemented here. * HASH_FINAL * name of "Final" function, implemented here. * HASH_BLOCK_HOST_ORDER * name of "block" function treating *aligned* input message * in host byte order, implemented externally. * HASH_BLOCK_DATA_ORDER * name of "block" function treating *unaligned* input message * in original (data) byte order, implemented externally (it * actually is optional if data and host are of the same * "endianess"). * name of "block" function capable of treating *unaligned* input * message in original (data) byte order, implemented externally. * HASH_MAKE_STRING * macro convering context variables to an ASCII hash string. * * Optional macros: * * B_ENDIAN or L_ENDIAN * defines host byte-order. * HASH_LONG_LOG2 * defaults to 2 if not states otherwise. * HASH_LBLOCK * assumed to be HASH_CBLOCK/4 if not stated otherwise. * HASH_BLOCK_DATA_ORDER_ALIGNED * alternative "block" function capable of treating * aligned input message in original (data) order, * implemented externally. * * MD5 example: * * #define DATA_ORDER_IS_LITTLE_ENDIAN Loading @@ -118,11 +101,9 @@ * #define HASH_LONG_LOG2 MD5_LONG_LOG2 * #define HASH_CTX MD5_CTX * #define HASH_CBLOCK MD5_CBLOCK * #define HASH_LBLOCK MD5_LBLOCK * #define HASH_UPDATE MD5_Update * #define HASH_TRANSFORM MD5_Transform * #define HASH_FINAL MD5_Final * #define HASH_BLOCK_HOST_ORDER md5_block_host_order * #define HASH_BLOCK_DATA_ORDER md5_block_data_order * * <appro@fy.chalmers.se> Loading Loading @@ -152,27 +133,9 @@ #error "HASH_FINAL must be defined!" #endif #ifndef HASH_BLOCK_HOST_ORDER #error "HASH_BLOCK_HOST_ORDER must be defined!" #endif #if 0 /* * Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED * isn't defined. */ #ifndef HASH_BLOCK_DATA_ORDER #error "HASH_BLOCK_DATA_ORDER must be defined!" #endif #endif #ifndef HASH_LBLOCK #define HASH_LBLOCK (HASH_CBLOCK/4) #endif #ifndef HASH_LONG_LOG2 #define HASH_LONG_LOG2 2 #endif /* * Engage compiler specific rotate intrinsic function if available. Loading Loading @@ -219,70 +182,10 @@ # endif #endif /* PEDANTIC */ #if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */ /* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */ #ifdef ROTATE /* 5 instructions with rotate instruction, else 9 */ #define REVERSE_FETCH32(a,l) ( \ l=*(const HASH_LONG *)(a), \ ((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \ ) #else /* 6 instructions with rotate instruction, else 8 */ #define REVERSE_FETCH32(a,l) ( \ l=*(const HASH_LONG *)(a), \ l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \ ROTATE(l,16) \ ) /* * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|... * It's rewritten as above for two reasons: * - RISCs aren't good at long constants and have to explicitely * compose 'em with several (well, usually 2) instructions in a * register before performing the actual operation and (as you * already realized:-) having same constant should inspire the * compiler to permanently allocate the only register for it; * - most modern CPUs have two ALUs, but usually only one has * circuitry for shifts:-( this minor tweak inspires compiler * to schedule shift instructions in a better way... * * <appro@fy.chalmers.se> */ #endif #endif #ifndef ROTATE #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) #endif /* * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED * and HASH_BLOCK_HOST_ORDER ought to be the same if input data * and host are of the same "endianess". It's possible to mask * this with blank #define HASH_BLOCK_DATA_ORDER though... * * <appro@fy.chalmers.se> */ #if defined(B_ENDIAN) # if defined(DATA_ORDER_IS_BIG_ENDIAN) # if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 # define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER # endif # endif #elif defined(L_ENDIAN) # if defined(DATA_ORDER_IS_LITTLE_ENDIAN) # if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 # define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER # endif # endif #endif #if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) #ifndef HASH_BLOCK_DATA_ORDER #error "HASH_BLOCK_DATA_ORDER must be defined!" #endif #endif #if defined(DATA_ORDER_IS_BIG_ENDIAN) #ifndef PEDANTIC Loading Loading @@ -314,29 +217,6 @@ l|=(((unsigned long)(*((c)++))) ), \ l) #endif #define HOST_p_c2l(c,l,n) { \ switch (n) { \ case 0: l =((unsigned long)(*((c)++)))<<24; \ case 1: l|=((unsigned long)(*((c)++)))<<16; \ case 2: l|=((unsigned long)(*((c)++)))<< 8; \ case 3: l|=((unsigned long)(*((c)++))); \ } } #define HOST_p_c2l_p(c,l,sc,len) { \ switch (sc) { \ case 0: l =((unsigned long)(*((c)++)))<<24; \ if (--len == 0) break; \ case 1: l|=((unsigned long)(*((c)++)))<<16; \ if (--len == 0) break; \ case 2: l|=((unsigned long)(*((c)++)))<< 8; \ } } /* NOTE the pointer is not incremented at the end of this */ #define HOST_c2l_p(c,l,n) { \ l=0; (c)+=n; \ switch (n) { \ case 3: l =((unsigned long)(*(--(c))))<< 8; \ case 2: l|=((unsigned long)(*(--(c))))<<16; \ case 1: l|=((unsigned long)(*(--(c))))<<24; \ } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \ *((c)++)=(unsigned char)(((l)>>16)&0xff), \ Loading @@ -362,29 +242,6 @@ l|=(((unsigned long)(*((c)++)))<<24), \ l) #endif #define HOST_p_c2l(c,l,n) { \ switch (n) { \ case 0: l =((unsigned long)(*((c)++))); \ case 1: l|=((unsigned long)(*((c)++)))<< 8; \ case 2: l|=((unsigned long)(*((c)++)))<<16; \ case 3: l|=((unsigned long)(*((c)++)))<<24; \ } } #define HOST_p_c2l_p(c,l,sc,len) { \ switch (sc) { \ case 0: l =((unsigned long)(*((c)++))); \ if (--len == 0) break; \ case 1: l|=((unsigned long)(*((c)++)))<< 8; \ if (--len == 0) break; \ case 2: l|=((unsigned long)(*((c)++)))<<16; \ } } /* NOTE the pointer is not incremented at the end of this */ #define HOST_c2l_p(c,l,n) { \ l=0; (c)+=n; \ switch (n) { \ case 3: l =((unsigned long)(*(--(c))))<<16; \ case 2: l|=((unsigned long)(*(--(c))))<< 8; \ case 1: l|=((unsigned long)(*(--(c)))); \ } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ Loading @@ -402,9 +259,9 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) { const unsigned char *data=data_; register HASH_LONG * p; register HASH_LONG l; size_t sw,sc,ew,ec; unsigned char *p; HASH_LONG l; size_t n; if (len==0) return 1; Loading @@ -416,101 +273,43 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */ c->Nl=l; if (c->num != 0) n = c->num; if (n != 0) { p=c->data; sw=c->num>>2; sc=c->num&0x03; p=(unsigned char *)c->data; if ((c->num+len) >= HASH_CBLOCK) { l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; for (; sw<HASH_LBLOCK; sw++) if ((n+len) >= HASH_CBLOCK) { HOST_c2l(data,l); p[sw]=l; } HASH_BLOCK_HOST_ORDER (c,p,1); len-=(HASH_CBLOCK-c->num); memcpy (p+n,data,HASH_CBLOCK-n); HASH_BLOCK_DATA_ORDER (c,p,1); n = HASH_CBLOCK-n; data += n; len -= n; c->num = 0; /* drop through and do the rest */ memset (p,0,HASH_CBLOCK); /* keep it zeroed */ } else { memcpy (p+n,data,len); c->num += (unsigned int)len; if ((sc+len) < 4) /* ugly, add char's to a word */ { l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l; } else { ew=(c->num>>2); ec=(c->num&0x03); if (sc) l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; for (; sw < ew; sw++) { HOST_c2l(data,l); p[sw]=l; } if (ec) { HOST_c2l_p(data,l,ec); p[sw]=l; } } return 1; } } sw=len/HASH_CBLOCK; if (sw > 0) n = len/HASH_CBLOCK; if (n > 0) { #if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) /* * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined * only if sizeof(HASH_LONG)==4. */ if ((((size_t)data)%4) == 0) { /* data is properly aligned so that we can cast it: */ HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw); sw*=HASH_CBLOCK; data+=sw; len-=sw; } else #if !defined(HASH_BLOCK_DATA_ORDER) while (sw--) { memcpy (p=c->data,data,HASH_CBLOCK); HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1); data+=HASH_CBLOCK; len-=HASH_CBLOCK; } #endif #endif #if defined(HASH_BLOCK_DATA_ORDER) { HASH_BLOCK_DATA_ORDER(c,data,sw); sw*=HASH_CBLOCK; data+=sw; len-=sw; } #endif HASH_BLOCK_DATA_ORDER (c,data,n); n *= HASH_CBLOCK; data += n; len -= n; } if (len != 0) { p = c->data; p = (unsigned char *)c->data; c->num = len; ew=len>>2; /* words to copy */ ec=len&0x03; for (; ew; ew--,p++) { HOST_c2l(data,l); *p=l; } HOST_c2l_p(data,l,ec); *p=l; memcpy (p,data,len); } return 1; } Loading @@ -518,73 +317,36 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data) { #if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) if ((((size_t)data)%4) == 0) /* data is properly aligned so that we can cast it: */ HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1); else #if !defined(HASH_BLOCK_DATA_ORDER) { memcpy (c->data,data,HASH_CBLOCK); HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1); } #endif #endif #if defined(HASH_BLOCK_DATA_ORDER) HASH_BLOCK_DATA_ORDER (c,data,1); #endif } int HASH_FINAL (unsigned char *md, HASH_CTX *c) { register HASH_LONG *p; register unsigned long l; register int i,j; static const unsigned char end[4]={0x80,0x00,0x00,0x00}; const unsigned char *cp=end; /* c->num should definitly have room for at least one more byte. */ p=c->data; i=c->num>>2; j=c->num&0x03; #if 0 /* purify often complains about the following line as an * Uninitialized Memory Read. While this can be true, the * following p_c2l macro will reset l when that case is true. * This is because j&0x03 contains the number of 'valid' bytes * already in p[i]. If and only if j&0x03 == 0, the UMR will * occur but this is also the only time p_c2l will do * l= *(cp++) instead of l|= *(cp++) * Many thanks to Alex Tang <altitude@cic.net> for pickup this * 'potential bug' */ #ifdef PURIFY if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */ #endif l=p[i]; #else l = (j==0) ? 0 : p[i]; #endif HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */ unsigned char *p = (unsigned char *)c->data; size_t n = c->num; if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */ p[n] = 0x80; /* there is always room for one */ n++; if (n > (HASH_CBLOCK-8)) { if (i<HASH_LBLOCK) p[i]=0; HASH_BLOCK_HOST_ORDER (c,p,1); i=0; HASH_BLOCK_DATA_ORDER (c,p,1); memset (p,0,HASH_CBLOCK); } for (; i<(HASH_LBLOCK-2); i++) p[i]=0; p += HASH_CBLOCK-8; #if defined(DATA_ORDER_IS_BIG_ENDIAN) p[HASH_LBLOCK-2]=c->Nh; p[HASH_LBLOCK-1]=c->Nl; (void)HOST_l2c(c->Nh,p); (void)HOST_l2c(c->Nl,p); #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN) p[HASH_LBLOCK-2]=c->Nl; p[HASH_LBLOCK-1]=c->Nh; (void)HOST_l2c(c->Nl,p); (void)HOST_l2c(c->Nh,p); #endif HASH_BLOCK_HOST_ORDER (c,p,1); p -= HASH_CBLOCK; HASH_BLOCK_DATA_ORDER (c,p,1); c->num=0; memset (p,0,HASH_CBLOCK); #ifndef HASH_MAKE_STRING #error "HASH_MAKE_STRING must be defined!" Loading @@ -592,11 +354,6 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c) HASH_MAKE_STRING(c,md); #endif c->num=0; /* clear stuff, HASH_BLOCK may be leaving some stuff on the stack * but I'm not worried :-) OPENSSL_cleanse((void *)c,sizeof(HASH_CTX)); */ return 1; } Loading crypto/md4/md4_dgst.c +1 −92 Original line number Diff line number Diff line Loading @@ -72,89 +72,14 @@ const char *MD4_version="MD4" OPENSSL_VERSION_PTEXT; int MD4_Init(MD4_CTX *c) { memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; c->B=INIT_DATA_B; c->C=INIT_DATA_C; c->D=INIT_DATA_D; c->Nl=0; c->Nh=0; c->num=0; return 1; } #ifndef md4_block_host_order void md4_block_host_order (MD4_CTX *c, const void *data, size_t num) { const MD4_LONG *X=data; register unsigned MD32_REG_T A,B,C,D; A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;X+=HASH_LBLOCK) { /* Round 0 */ R0(A,B,C,D,X[ 0], 3,0); R0(D,A,B,C,X[ 1], 7,0); R0(C,D,A,B,X[ 2],11,0); R0(B,C,D,A,X[ 3],19,0); R0(A,B,C,D,X[ 4], 3,0); R0(D,A,B,C,X[ 5], 7,0); R0(C,D,A,B,X[ 6],11,0); R0(B,C,D,A,X[ 7],19,0); R0(A,B,C,D,X[ 8], 3,0); R0(D,A,B,C,X[ 9], 7,0); R0(C,D,A,B,X[10],11,0); R0(B,C,D,A,X[11],19,0); R0(A,B,C,D,X[12], 3,0); R0(D,A,B,C,X[13], 7,0); R0(C,D,A,B,X[14],11,0); R0(B,C,D,A,X[15],19,0); /* Round 1 */ R1(A,B,C,D,X[ 0], 3,0x5A827999L); R1(D,A,B,C,X[ 4], 5,0x5A827999L); R1(C,D,A,B,X[ 8], 9,0x5A827999L); R1(B,C,D,A,X[12],13,0x5A827999L); R1(A,B,C,D,X[ 1], 3,0x5A827999L); R1(D,A,B,C,X[ 5], 5,0x5A827999L); R1(C,D,A,B,X[ 9], 9,0x5A827999L); R1(B,C,D,A,X[13],13,0x5A827999L); R1(A,B,C,D,X[ 2], 3,0x5A827999L); R1(D,A,B,C,X[ 6], 5,0x5A827999L); R1(C,D,A,B,X[10], 9,0x5A827999L); R1(B,C,D,A,X[14],13,0x5A827999L); R1(A,B,C,D,X[ 3], 3,0x5A827999L); R1(D,A,B,C,X[ 7], 5,0x5A827999L); R1(C,D,A,B,X[11], 9,0x5A827999L); R1(B,C,D,A,X[15],13,0x5A827999L); /* Round 2 */ R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 4],11,0x6ED9EBA1); R2(B,C,D,A,X[12],15,0x6ED9EBA1); R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1); R2(D,A,B,C,X[10], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 6],11,0x6ED9EBA1); R2(B,C,D,A,X[14],15,0x6ED9EBA1); R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 5],11,0x6ED9EBA1); R2(B,C,D,A,X[13],15,0x6ED9EBA1); R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1); R2(D,A,B,C,X[11], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 7],11,0x6ED9EBA1); R2(B,C,D,A,X[15],15,0x6ED9EBA1); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } } #endif #ifndef md4_block_data_order #ifdef X #undef X Loading Loading @@ -240,19 +165,3 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num) } } #endif #ifdef undef int printit(unsigned long *l) { int i,ii; for (i=0; i<2; i++) { for (ii=0; ii<8; ii++) { fprintf(stderr,"%08lx ",l[i*8+ii]); } fprintf(stderr,"\n"); } } #endif crypto/md4/md4_locl.h +0 −45 Original line number Diff line number Diff line Loading @@ -65,44 +65,13 @@ #define MD4_LONG_LOG2 2 /* default to 32 bits */ #endif void md4_block_host_order (MD4_CTX *c, const void *p,size_t num); void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) # if !defined(B_ENDIAN) /* * *_block_host_order is expected to handle aligned data while * *_block_data_order - unaligned. As algorithm and host (x86) * are in this case of the same "endianness" these two are * otherwise indistinguishable. But normally you don't want to * call the same function because unaligned access in places * where alignment is expected is usually a "Bad Thing". Indeed, * on RISCs you get punished with BUS ERROR signal or *severe* * performance degradation. Intel CPUs are in turn perfectly * capable of loading unaligned data without such drastic side * effect. Yes, they say it's slower than aligned load, but no * exception is generated and therefore performance degradation * is *incomparable* with RISCs. What we should weight here is * costs of unaligned access against costs of aligning data. * According to my measurements allowing unaligned access results * in ~9% performance improvement on Pentium II operating at * 266MHz. I won't be surprised if the difference will be higher * on faster systems:-) * * <appro@fy.chalmers.se> */ # define md4_block_data_order md4_block_host_order # endif #endif #define DATA_ORDER_IS_LITTLE_ENDIAN #define HASH_LONG MD4_LONG #define HASH_LONG_LOG2 MD4_LONG_LOG2 #define HASH_CTX MD4_CTX #define HASH_CBLOCK MD4_CBLOCK #define HASH_LBLOCK MD4_LBLOCK #define HASH_UPDATE MD4_Update #define HASH_TRANSFORM MD4_Transform #define HASH_FINAL MD4_Final Loading @@ -113,21 +82,7 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); ll=(c)->C; HOST_l2c(ll,(s)); \ ll=(c)->D; HOST_l2c(ll,(s)); \ } while (0) #define HASH_BLOCK_HOST_ORDER md4_block_host_order #if !defined(L_ENDIAN) || defined(md4_block_data_order) #define HASH_BLOCK_DATA_ORDER md4_block_data_order /* * Little-endians (Intel and Alpha) feel better without this. * It looks like memcpy does better job than generic * md4_block_data_order on copying-n-aligning input data. * But frankly speaking I didn't expect such result on Alpha. * On the other hand I've got this with egcs-1.0.2 and if * program is compiled with another (better?) compiler it * might turn out other way around. * * <appro@fy.chalmers.se> */ #endif #include "md32_common.h" Loading crypto/md5/asm/md5-586.pl +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ $X="esi"; 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 ); &md5_block("md5_block_asm_host_order"); &md5_block("md5_block_asm_data_order"); &asm_finish(); sub Np Loading crypto/md5/asm/md5-x86_64.pl +4 −4 Original line number Diff line number Diff line Loading @@ -115,9 +115,9 @@ $code .= <<EOF; .text .align 16 .globl md5_block_asm_host_order .type md5_block_asm_host_order,\@function,3 md5_block_asm_host_order: .globl md5_block_asm_data_order .type md5_block_asm_data_order,\@function,3 md5_block_asm_data_order: push %rbp push %rbx push %r12 Loading Loading @@ -243,7 +243,7 @@ $code .= <<EOF; pop %rbx pop %rbp ret .size md5_block_asm_host_order,.-md5_block_asm_host_order .size md5_block_asm_data_order,.-md5_block_asm_data_order EOF print $code; Loading Loading
crypto/md32_common.h +50 −293 Original line number Diff line number Diff line /* crypto/md32_common.h */ /* ==================================================================== * Copyright (c) 1999-2002 The OpenSSL Project. All rights reserved. * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions Loading Loading @@ -47,10 +47,6 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * * This product includes cryptographic software written by Eric Young * (eay@cryptsoft.com). This product includes software written by Tim * Hudson (tjh@cryptsoft.com). * */ /* Loading @@ -76,40 +72,27 @@ * typedef struct { * ... * HASH_LONG Nl,Nh; * either { * HASH_LONG data[HASH_LBLOCK]; * unsigned char data[HASH_CBLOCK]; * }; * unsigned int num; * ... * } HASH_CTX; * data[] vector is expected to be zeroed upon first call to * HASH_UPDATE. * HASH_UPDATE * name of "Update" function, implemented here. * HASH_TRANSFORM * name of "Transform" function, implemented here. * HASH_FINAL * name of "Final" function, implemented here. * HASH_BLOCK_HOST_ORDER * name of "block" function treating *aligned* input message * in host byte order, implemented externally. * HASH_BLOCK_DATA_ORDER * name of "block" function treating *unaligned* input message * in original (data) byte order, implemented externally (it * actually is optional if data and host are of the same * "endianess"). * name of "block" function capable of treating *unaligned* input * message in original (data) byte order, implemented externally. * HASH_MAKE_STRING * macro convering context variables to an ASCII hash string. * * Optional macros: * * B_ENDIAN or L_ENDIAN * defines host byte-order. * HASH_LONG_LOG2 * defaults to 2 if not states otherwise. * HASH_LBLOCK * assumed to be HASH_CBLOCK/4 if not stated otherwise. * HASH_BLOCK_DATA_ORDER_ALIGNED * alternative "block" function capable of treating * aligned input message in original (data) order, * implemented externally. * * MD5 example: * * #define DATA_ORDER_IS_LITTLE_ENDIAN Loading @@ -118,11 +101,9 @@ * #define HASH_LONG_LOG2 MD5_LONG_LOG2 * #define HASH_CTX MD5_CTX * #define HASH_CBLOCK MD5_CBLOCK * #define HASH_LBLOCK MD5_LBLOCK * #define HASH_UPDATE MD5_Update * #define HASH_TRANSFORM MD5_Transform * #define HASH_FINAL MD5_Final * #define HASH_BLOCK_HOST_ORDER md5_block_host_order * #define HASH_BLOCK_DATA_ORDER md5_block_data_order * * <appro@fy.chalmers.se> Loading Loading @@ -152,27 +133,9 @@ #error "HASH_FINAL must be defined!" #endif #ifndef HASH_BLOCK_HOST_ORDER #error "HASH_BLOCK_HOST_ORDER must be defined!" #endif #if 0 /* * Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED * isn't defined. */ #ifndef HASH_BLOCK_DATA_ORDER #error "HASH_BLOCK_DATA_ORDER must be defined!" #endif #endif #ifndef HASH_LBLOCK #define HASH_LBLOCK (HASH_CBLOCK/4) #endif #ifndef HASH_LONG_LOG2 #define HASH_LONG_LOG2 2 #endif /* * Engage compiler specific rotate intrinsic function if available. Loading Loading @@ -219,70 +182,10 @@ # endif #endif /* PEDANTIC */ #if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */ /* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */ #ifdef ROTATE /* 5 instructions with rotate instruction, else 9 */ #define REVERSE_FETCH32(a,l) ( \ l=*(const HASH_LONG *)(a), \ ((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \ ) #else /* 6 instructions with rotate instruction, else 8 */ #define REVERSE_FETCH32(a,l) ( \ l=*(const HASH_LONG *)(a), \ l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \ ROTATE(l,16) \ ) /* * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|... * It's rewritten as above for two reasons: * - RISCs aren't good at long constants and have to explicitely * compose 'em with several (well, usually 2) instructions in a * register before performing the actual operation and (as you * already realized:-) having same constant should inspire the * compiler to permanently allocate the only register for it; * - most modern CPUs have two ALUs, but usually only one has * circuitry for shifts:-( this minor tweak inspires compiler * to schedule shift instructions in a better way... * * <appro@fy.chalmers.se> */ #endif #endif #ifndef ROTATE #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) #endif /* * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED * and HASH_BLOCK_HOST_ORDER ought to be the same if input data * and host are of the same "endianess". It's possible to mask * this with blank #define HASH_BLOCK_DATA_ORDER though... * * <appro@fy.chalmers.se> */ #if defined(B_ENDIAN) # if defined(DATA_ORDER_IS_BIG_ENDIAN) # if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 # define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER # endif # endif #elif defined(L_ENDIAN) # if defined(DATA_ORDER_IS_LITTLE_ENDIAN) # if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 # define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER # endif # endif #endif #if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) #ifndef HASH_BLOCK_DATA_ORDER #error "HASH_BLOCK_DATA_ORDER must be defined!" #endif #endif #if defined(DATA_ORDER_IS_BIG_ENDIAN) #ifndef PEDANTIC Loading Loading @@ -314,29 +217,6 @@ l|=(((unsigned long)(*((c)++))) ), \ l) #endif #define HOST_p_c2l(c,l,n) { \ switch (n) { \ case 0: l =((unsigned long)(*((c)++)))<<24; \ case 1: l|=((unsigned long)(*((c)++)))<<16; \ case 2: l|=((unsigned long)(*((c)++)))<< 8; \ case 3: l|=((unsigned long)(*((c)++))); \ } } #define HOST_p_c2l_p(c,l,sc,len) { \ switch (sc) { \ case 0: l =((unsigned long)(*((c)++)))<<24; \ if (--len == 0) break; \ case 1: l|=((unsigned long)(*((c)++)))<<16; \ if (--len == 0) break; \ case 2: l|=((unsigned long)(*((c)++)))<< 8; \ } } /* NOTE the pointer is not incremented at the end of this */ #define HOST_c2l_p(c,l,n) { \ l=0; (c)+=n; \ switch (n) { \ case 3: l =((unsigned long)(*(--(c))))<< 8; \ case 2: l|=((unsigned long)(*(--(c))))<<16; \ case 1: l|=((unsigned long)(*(--(c))))<<24; \ } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \ *((c)++)=(unsigned char)(((l)>>16)&0xff), \ Loading @@ -362,29 +242,6 @@ l|=(((unsigned long)(*((c)++)))<<24), \ l) #endif #define HOST_p_c2l(c,l,n) { \ switch (n) { \ case 0: l =((unsigned long)(*((c)++))); \ case 1: l|=((unsigned long)(*((c)++)))<< 8; \ case 2: l|=((unsigned long)(*((c)++)))<<16; \ case 3: l|=((unsigned long)(*((c)++)))<<24; \ } } #define HOST_p_c2l_p(c,l,sc,len) { \ switch (sc) { \ case 0: l =((unsigned long)(*((c)++))); \ if (--len == 0) break; \ case 1: l|=((unsigned long)(*((c)++)))<< 8; \ if (--len == 0) break; \ case 2: l|=((unsigned long)(*((c)++)))<<16; \ } } /* NOTE the pointer is not incremented at the end of this */ #define HOST_c2l_p(c,l,n) { \ l=0; (c)+=n; \ switch (n) { \ case 3: l =((unsigned long)(*(--(c))))<<16; \ case 2: l|=((unsigned long)(*(--(c))))<< 8; \ case 1: l|=((unsigned long)(*(--(c)))); \ } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ Loading @@ -402,9 +259,9 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) { const unsigned char *data=data_; register HASH_LONG * p; register HASH_LONG l; size_t sw,sc,ew,ec; unsigned char *p; HASH_LONG l; size_t n; if (len==0) return 1; Loading @@ -416,101 +273,43 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */ c->Nl=l; if (c->num != 0) n = c->num; if (n != 0) { p=c->data; sw=c->num>>2; sc=c->num&0x03; p=(unsigned char *)c->data; if ((c->num+len) >= HASH_CBLOCK) { l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; for (; sw<HASH_LBLOCK; sw++) if ((n+len) >= HASH_CBLOCK) { HOST_c2l(data,l); p[sw]=l; } HASH_BLOCK_HOST_ORDER (c,p,1); len-=(HASH_CBLOCK-c->num); memcpy (p+n,data,HASH_CBLOCK-n); HASH_BLOCK_DATA_ORDER (c,p,1); n = HASH_CBLOCK-n; data += n; len -= n; c->num = 0; /* drop through and do the rest */ memset (p,0,HASH_CBLOCK); /* keep it zeroed */ } else { memcpy (p+n,data,len); c->num += (unsigned int)len; if ((sc+len) < 4) /* ugly, add char's to a word */ { l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l; } else { ew=(c->num>>2); ec=(c->num&0x03); if (sc) l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; for (; sw < ew; sw++) { HOST_c2l(data,l); p[sw]=l; } if (ec) { HOST_c2l_p(data,l,ec); p[sw]=l; } } return 1; } } sw=len/HASH_CBLOCK; if (sw > 0) n = len/HASH_CBLOCK; if (n > 0) { #if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) /* * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined * only if sizeof(HASH_LONG)==4. */ if ((((size_t)data)%4) == 0) { /* data is properly aligned so that we can cast it: */ HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw); sw*=HASH_CBLOCK; data+=sw; len-=sw; } else #if !defined(HASH_BLOCK_DATA_ORDER) while (sw--) { memcpy (p=c->data,data,HASH_CBLOCK); HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1); data+=HASH_CBLOCK; len-=HASH_CBLOCK; } #endif #endif #if defined(HASH_BLOCK_DATA_ORDER) { HASH_BLOCK_DATA_ORDER(c,data,sw); sw*=HASH_CBLOCK; data+=sw; len-=sw; } #endif HASH_BLOCK_DATA_ORDER (c,data,n); n *= HASH_CBLOCK; data += n; len -= n; } if (len != 0) { p = c->data; p = (unsigned char *)c->data; c->num = len; ew=len>>2; /* words to copy */ ec=len&0x03; for (; ew; ew--,p++) { HOST_c2l(data,l); *p=l; } HOST_c2l_p(data,l,ec); *p=l; memcpy (p,data,len); } return 1; } Loading @@ -518,73 +317,36 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data) { #if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) if ((((size_t)data)%4) == 0) /* data is properly aligned so that we can cast it: */ HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1); else #if !defined(HASH_BLOCK_DATA_ORDER) { memcpy (c->data,data,HASH_CBLOCK); HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1); } #endif #endif #if defined(HASH_BLOCK_DATA_ORDER) HASH_BLOCK_DATA_ORDER (c,data,1); #endif } int HASH_FINAL (unsigned char *md, HASH_CTX *c) { register HASH_LONG *p; register unsigned long l; register int i,j; static const unsigned char end[4]={0x80,0x00,0x00,0x00}; const unsigned char *cp=end; /* c->num should definitly have room for at least one more byte. */ p=c->data; i=c->num>>2; j=c->num&0x03; #if 0 /* purify often complains about the following line as an * Uninitialized Memory Read. While this can be true, the * following p_c2l macro will reset l when that case is true. * This is because j&0x03 contains the number of 'valid' bytes * already in p[i]. If and only if j&0x03 == 0, the UMR will * occur but this is also the only time p_c2l will do * l= *(cp++) instead of l|= *(cp++) * Many thanks to Alex Tang <altitude@cic.net> for pickup this * 'potential bug' */ #ifdef PURIFY if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */ #endif l=p[i]; #else l = (j==0) ? 0 : p[i]; #endif HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */ unsigned char *p = (unsigned char *)c->data; size_t n = c->num; if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */ p[n] = 0x80; /* there is always room for one */ n++; if (n > (HASH_CBLOCK-8)) { if (i<HASH_LBLOCK) p[i]=0; HASH_BLOCK_HOST_ORDER (c,p,1); i=0; HASH_BLOCK_DATA_ORDER (c,p,1); memset (p,0,HASH_CBLOCK); } for (; i<(HASH_LBLOCK-2); i++) p[i]=0; p += HASH_CBLOCK-8; #if defined(DATA_ORDER_IS_BIG_ENDIAN) p[HASH_LBLOCK-2]=c->Nh; p[HASH_LBLOCK-1]=c->Nl; (void)HOST_l2c(c->Nh,p); (void)HOST_l2c(c->Nl,p); #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN) p[HASH_LBLOCK-2]=c->Nl; p[HASH_LBLOCK-1]=c->Nh; (void)HOST_l2c(c->Nl,p); (void)HOST_l2c(c->Nh,p); #endif HASH_BLOCK_HOST_ORDER (c,p,1); p -= HASH_CBLOCK; HASH_BLOCK_DATA_ORDER (c,p,1); c->num=0; memset (p,0,HASH_CBLOCK); #ifndef HASH_MAKE_STRING #error "HASH_MAKE_STRING must be defined!" Loading @@ -592,11 +354,6 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c) HASH_MAKE_STRING(c,md); #endif c->num=0; /* clear stuff, HASH_BLOCK may be leaving some stuff on the stack * but I'm not worried :-) OPENSSL_cleanse((void *)c,sizeof(HASH_CTX)); */ return 1; } Loading
crypto/md4/md4_dgst.c +1 −92 Original line number Diff line number Diff line Loading @@ -72,89 +72,14 @@ const char *MD4_version="MD4" OPENSSL_VERSION_PTEXT; int MD4_Init(MD4_CTX *c) { memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; c->B=INIT_DATA_B; c->C=INIT_DATA_C; c->D=INIT_DATA_D; c->Nl=0; c->Nh=0; c->num=0; return 1; } #ifndef md4_block_host_order void md4_block_host_order (MD4_CTX *c, const void *data, size_t num) { const MD4_LONG *X=data; register unsigned MD32_REG_T A,B,C,D; A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;X+=HASH_LBLOCK) { /* Round 0 */ R0(A,B,C,D,X[ 0], 3,0); R0(D,A,B,C,X[ 1], 7,0); R0(C,D,A,B,X[ 2],11,0); R0(B,C,D,A,X[ 3],19,0); R0(A,B,C,D,X[ 4], 3,0); R0(D,A,B,C,X[ 5], 7,0); R0(C,D,A,B,X[ 6],11,0); R0(B,C,D,A,X[ 7],19,0); R0(A,B,C,D,X[ 8], 3,0); R0(D,A,B,C,X[ 9], 7,0); R0(C,D,A,B,X[10],11,0); R0(B,C,D,A,X[11],19,0); R0(A,B,C,D,X[12], 3,0); R0(D,A,B,C,X[13], 7,0); R0(C,D,A,B,X[14],11,0); R0(B,C,D,A,X[15],19,0); /* Round 1 */ R1(A,B,C,D,X[ 0], 3,0x5A827999L); R1(D,A,B,C,X[ 4], 5,0x5A827999L); R1(C,D,A,B,X[ 8], 9,0x5A827999L); R1(B,C,D,A,X[12],13,0x5A827999L); R1(A,B,C,D,X[ 1], 3,0x5A827999L); R1(D,A,B,C,X[ 5], 5,0x5A827999L); R1(C,D,A,B,X[ 9], 9,0x5A827999L); R1(B,C,D,A,X[13],13,0x5A827999L); R1(A,B,C,D,X[ 2], 3,0x5A827999L); R1(D,A,B,C,X[ 6], 5,0x5A827999L); R1(C,D,A,B,X[10], 9,0x5A827999L); R1(B,C,D,A,X[14],13,0x5A827999L); R1(A,B,C,D,X[ 3], 3,0x5A827999L); R1(D,A,B,C,X[ 7], 5,0x5A827999L); R1(C,D,A,B,X[11], 9,0x5A827999L); R1(B,C,D,A,X[15],13,0x5A827999L); /* Round 2 */ R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 4],11,0x6ED9EBA1); R2(B,C,D,A,X[12],15,0x6ED9EBA1); R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1); R2(D,A,B,C,X[10], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 6],11,0x6ED9EBA1); R2(B,C,D,A,X[14],15,0x6ED9EBA1); R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 5],11,0x6ED9EBA1); R2(B,C,D,A,X[13],15,0x6ED9EBA1); R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1); R2(D,A,B,C,X[11], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 7],11,0x6ED9EBA1); R2(B,C,D,A,X[15],15,0x6ED9EBA1); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } } #endif #ifndef md4_block_data_order #ifdef X #undef X Loading Loading @@ -240,19 +165,3 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num) } } #endif #ifdef undef int printit(unsigned long *l) { int i,ii; for (i=0; i<2; i++) { for (ii=0; ii<8; ii++) { fprintf(stderr,"%08lx ",l[i*8+ii]); } fprintf(stderr,"\n"); } } #endif
crypto/md4/md4_locl.h +0 −45 Original line number Diff line number Diff line Loading @@ -65,44 +65,13 @@ #define MD4_LONG_LOG2 2 /* default to 32 bits */ #endif void md4_block_host_order (MD4_CTX *c, const void *p,size_t num); void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) # if !defined(B_ENDIAN) /* * *_block_host_order is expected to handle aligned data while * *_block_data_order - unaligned. As algorithm and host (x86) * are in this case of the same "endianness" these two are * otherwise indistinguishable. But normally you don't want to * call the same function because unaligned access in places * where alignment is expected is usually a "Bad Thing". Indeed, * on RISCs you get punished with BUS ERROR signal or *severe* * performance degradation. Intel CPUs are in turn perfectly * capable of loading unaligned data without such drastic side * effect. Yes, they say it's slower than aligned load, but no * exception is generated and therefore performance degradation * is *incomparable* with RISCs. What we should weight here is * costs of unaligned access against costs of aligning data. * According to my measurements allowing unaligned access results * in ~9% performance improvement on Pentium II operating at * 266MHz. I won't be surprised if the difference will be higher * on faster systems:-) * * <appro@fy.chalmers.se> */ # define md4_block_data_order md4_block_host_order # endif #endif #define DATA_ORDER_IS_LITTLE_ENDIAN #define HASH_LONG MD4_LONG #define HASH_LONG_LOG2 MD4_LONG_LOG2 #define HASH_CTX MD4_CTX #define HASH_CBLOCK MD4_CBLOCK #define HASH_LBLOCK MD4_LBLOCK #define HASH_UPDATE MD4_Update #define HASH_TRANSFORM MD4_Transform #define HASH_FINAL MD4_Final Loading @@ -113,21 +82,7 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); ll=(c)->C; HOST_l2c(ll,(s)); \ ll=(c)->D; HOST_l2c(ll,(s)); \ } while (0) #define HASH_BLOCK_HOST_ORDER md4_block_host_order #if !defined(L_ENDIAN) || defined(md4_block_data_order) #define HASH_BLOCK_DATA_ORDER md4_block_data_order /* * Little-endians (Intel and Alpha) feel better without this. * It looks like memcpy does better job than generic * md4_block_data_order on copying-n-aligning input data. * But frankly speaking I didn't expect such result on Alpha. * On the other hand I've got this with egcs-1.0.2 and if * program is compiled with another (better?) compiler it * might turn out other way around. * * <appro@fy.chalmers.se> */ #endif #include "md32_common.h" Loading
crypto/md5/asm/md5-586.pl +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ $X="esi"; 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 ); &md5_block("md5_block_asm_host_order"); &md5_block("md5_block_asm_data_order"); &asm_finish(); sub Np Loading
crypto/md5/asm/md5-x86_64.pl +4 −4 Original line number Diff line number Diff line Loading @@ -115,9 +115,9 @@ $code .= <<EOF; .text .align 16 .globl md5_block_asm_host_order .type md5_block_asm_host_order,\@function,3 md5_block_asm_host_order: .globl md5_block_asm_data_order .type md5_block_asm_data_order,\@function,3 md5_block_asm_data_order: push %rbp push %rbx push %r12 Loading Loading @@ -243,7 +243,7 @@ $code .= <<EOF; pop %rbx pop %rbp ret .size md5_block_asm_host_order,.-md5_block_asm_host_order .size md5_block_asm_data_order,.-md5_block_asm_data_order EOF print $code; Loading