Commit 0f04379d authored by Andy Polyakov's avatar Andy Polyakov
Browse files

This update gets endianness-neutrality right and adds second required

entry point, md5_block_asm_data_order.
parent 7e4d3359
Loading
Loading
Loading
Loading
+133 −116
Original line number Diff line number Diff line
@@ -86,6 +86,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define	pPad2		p12
#define	pPad3		p13
#define	pSkip		p8
//	This two below shall remain constant througout whole routine
#define	pDataOrder	p14
#define	pHostOrder	p15

#define	A_		out24
#define	B_		out25
@@ -159,6 +162,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define _NOUTPUT	0
#define	_NROTATE	24	/* this must be <= _NINPUTS */

#if defined(_HPUX_SOURCE) && !defined(_LP64)
#define	ADDP	addp4
#else
#define	ADDP	add
#endif

//	Macros for getting the left and right portions of little-endian words

@@ -225,78 +233,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define	LCSave		r21
#define	PFSSave		r20
#define	PRSave		r22
#define	pAgain		p14
#define	pOff		p14

	.rodata
	// Values are specified as bytes to ensure they are
	// in little-endian byte-order.
	.align 4
md5_round_constants:
	data1 0x78, 0xa4, 0x6a, 0xd7	//     0
	data1 0x56, 0xb7, 0xc7, 0xe8	//     1
	data1 0xdb, 0x70, 0x20, 0x24	//     2
	data1 0xee, 0xce, 0xbd, 0xc1	//     3
	data1 0xaf, 0x0f, 0x7c, 0xf5	//     4
	data1 0x2a, 0xc6, 0x87, 0x47	//     5
	data1 0x13, 0x46, 0x30, 0xa8	//     6
	data1 0x01, 0x95, 0x46, 0xfd	//     7
	data1 0xd8, 0x98, 0x80, 0x69	//     8
	data1 0xaf, 0xf7, 0x44, 0x8b	//     9
	data1 0xb1, 0x5b, 0xff, 0xff	//    10
	data1 0xbe, 0xd7, 0x5c, 0x89	//    11
	data1 0x22, 0x11, 0x90, 0x6b	//    12
	data1 0x93, 0x71, 0x98, 0xfd	//    13
	data1 0x8e, 0x43, 0x79, 0xa6	//    14
	data1 0x21, 0x08, 0xb4, 0x49	//    15
	data1 0x62, 0x25, 0x1e, 0xf6	//    16
	data1 0x40, 0xb3, 0x40, 0xc0	//    17
	data1 0x51, 0x5a, 0x5e, 0x26	//    18
	data1 0xaa, 0xc7, 0xb6, 0xe9	//    19
	data1 0x5d, 0x10, 0x2f, 0xd6	//    20
	data1 0x53, 0x14, 0x44, 0x02	//    21
	data1 0x81, 0xe6, 0xa1, 0xd8	//    22
	data1 0xc8, 0xfb, 0xd3, 0xe7	//    23
	data1 0xe6, 0xcd, 0xe1, 0x21	//    24
	data1 0xd6, 0x07, 0x37, 0xc3	//    25
	data1 0x87, 0x0d, 0xd5, 0xf4	//    26
	data1 0xed, 0x14, 0x5a, 0x45	//    27
	data1 0x05, 0xe9, 0xe3, 0xa9	//    28
	data1 0xf8, 0xa3, 0xef, 0xfc	//    29
	data1 0xd9, 0x02, 0x6f, 0x67	//    30
	data1 0x8a, 0x4c, 0x2a, 0x8d	//    31
	data1 0x42, 0x39, 0xfa, 0xff	//    32
	data1 0x81, 0xf6, 0x71, 0x87	//    33
	data1 0x22, 0x61, 0x9d, 0x6d	//    34
	data1 0x0c, 0x38, 0xe5, 0xfd	//    35
	data1 0x44, 0xea, 0xbe, 0xa4	//    36
	data1 0xa9, 0xcf, 0xde, 0x4b	//    37
	data1 0x60, 0x4b, 0xbb, 0xf6	//    38
	data1 0x70, 0xbc, 0xbf, 0xbe	//    39
	data1 0xc6, 0x7e, 0x9b, 0x28	//    40
	data1 0xfa, 0x27, 0xa1, 0xea	//    41
	data1 0x85, 0x30, 0xef, 0xd4	//    42
	data1 0x05, 0x1d, 0x88, 0x04	//    43
	data1 0x39, 0xd0, 0xd4, 0xd9	//    44
	data1 0xe5, 0x99, 0xdb, 0xe6	//    45
	data1 0xf8, 0x7c, 0xa2, 0x1f	//    46
	data1 0x65, 0x56, 0xac, 0xc4	//    47
	data1 0x44, 0x22, 0x29, 0xf4	//    48
	data1 0x97, 0xff, 0x2a, 0x43	//    49
	data1 0xa7, 0x23, 0x94, 0xab	//    50
	data1 0x39, 0xa0, 0x93, 0xfc	//    51
	data1 0xc3, 0x59, 0x5b, 0x65	//    52
	data1 0x92, 0xcc, 0x0c, 0x8f	//    53
	data1 0x7d, 0xf4, 0xef, 0xff	//    54
	data1 0xd1, 0x5d, 0x84, 0x85	//    55
	data1 0x4f, 0x7e, 0xa8, 0x6f	//    56
	data1 0xe0, 0xe6, 0x2c, 0xfe	//    57
	data1 0x14, 0x43, 0x01, 0xa3	//    58
	data1 0xa1, 0x11, 0x08, 0x4e	//    59
	data1 0x82, 0x7e, 0x53, 0xf7	//    60
	data1 0x35, 0xf2, 0x3a, 0xbd	//    61
	data1 0xbb, 0xd2, 0xd7, 0x2a	//    62
	data1 0x91, 0xd3, 0x86, 0xeb	//    63
#define	pAgain		p63
#define	pOff		p63

	.text

@@ -320,52 +258,47 @@ md5_round_constants:

   */

	.type	md5_block_asm_data_order, @function
	.global	md5_block_asm_data_order
	.align	32
	.proc	md5_block_asm_data_order
md5_block_asm_data_order:
{	.mib
	cmp.eq	pDataOrder,pHostOrder = r0,r0
	br.sptk.many	.md5_block
};;
	.endp	md5_block_asm_data_order

	.type	md5_block_asm_host_order, @function
	.global	md5_block_asm_host_order

	.align	32
	.proc	md5_block_asm_host_order
md5_block_asm_host_order:
	.prologue
#ifndef __LP64__
{	.mib
	cmp.eq	pHostOrder,pDataOrder = r0,r0
};;
.md5_block:
{	.mmi
	.save	ar.pfs, PFSSave
	alloc	PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
	addp4	DPtrIn = 0, DPtrIn
	addp4	CtxPtr0 = 0, CtxPtr0
	ADDP	CtxPtr1 = 8, CtxPtr0
	mov	CTable = ip
}
;;
{	.mmi
	nop	0x0
	and	InAlign = 0x3, DPtrIn
	ADDP	DPtrIn = 0, DPtrIn
	ADDP	CtxPtr0 = 0, CtxPtr0
	.save	ar.lc, LCSave
	mov	LCSave = ar.lc
}
#else
;;
.pred.rel	"mutex",pDataOrder,pHostOrder
{	.mmi
	.save ar.pfs, PFSSave
	alloc	PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT
(pDataOrder)	add	CTable = .md5_tbl_data_order#-.md5_block#, CTable
(pHostOrder)	add	CTable = .md5_tbl_host_order#-.md5_block#, CTable	
	and	InAlign = 0x3, DPtrIn
	.save ar.lc, LCSave
	mov	LCSave = ar.lc
}
#endif

{	.mmi
	addl	CTable = @ltoffx(md5_round_constants), gp
	;;
	ld8.mov	CTable = [CTable], md5_round_constants // native byte-order
	add	CtxPtr1 = 8, CtxPtr0
}
#ifdef B_ENDIAN
{
	.mmi
	rum	psr.be		// switch to little-endian mode
	nop.m	0x0
	nop.i	0x0
}
#endif
;;
{	.mmi
	ld4	AccumA = [CtxPtr0], 4
	ld4	AccumC = [CtxPtr1], 4
@@ -379,15 +312,12 @@ md5_block_asm_host_order:
	ld4	AccumD = [CtxPtr1]
	dep	DPtr_ = 0, DPtrIn, 0, 2
} ;;

{	.mmi
#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
(pDataOrder)	rum	psr.be;;	// switch to little-endian
#endif
{	.mmb
	ld4	CTable0 = [CTable], 4
	cmp.ne	pOff, p0 = 0, InAlign
} ;;

{	.mib
	nop.m 0x0
	nop.i 0x0
(pOff)	br.cond.spnt.many .md5_unaligned
} ;;

@@ -431,9 +361,9 @@ md5_block_asm_host_order:
} ;;

.md5_exit:
//	Note that we switch back to the entry endianess AFTER storing so
//	that the memory image of the hash is preserved.

#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
(pDataOrder)	sum	psr.be;;	// switch back to big-endian mode
#endif
{	.mmi
	st4	[CtxPtr0] = AccumB, -4
	st4	[CtxPtr1] = AccumD, -4
@@ -445,9 +375,6 @@ md5_block_asm_host_order:
	mov	ar.lc = LCSave
} ;;
{	.mib
#ifdef B_ENDIAN
	sum	psr.be		// switch back to big-endian mode
#endif
	mov	ar.pfs = PFSSave
	br.ret.sptk.few	rp
} ;;
@@ -1001,9 +928,99 @@ md5_digest_block##offset: \
	nop 0x0 ;							\
	nop 0x0 ;							\
	br.cond.sptk.many md5_digest_GHI ;				\
} ;									\
} ;;									\
	.endp md5digestBlock ## offset

MD5FBLOCK(1)
MD5FBLOCK(2)
MD5FBLOCK(3)

	.align 64
	.type md5_constants, @object
md5_constants:
.md5_tbl_data_order:			// To ensure little-endian data
					// order, code as bytes.
	data1 0x78, 0xa4, 0x6a, 0xd7	//     0
	data1 0x56, 0xb7, 0xc7, 0xe8	//     1
	data1 0xdb, 0x70, 0x20, 0x24	//     2
	data1 0xee, 0xce, 0xbd, 0xc1	//     3
	data1 0xaf, 0x0f, 0x7c, 0xf5	//     4
	data1 0x2a, 0xc6, 0x87, 0x47	//     5
	data1 0x13, 0x46, 0x30, 0xa8	//     6
	data1 0x01, 0x95, 0x46, 0xfd	//     7
	data1 0xd8, 0x98, 0x80, 0x69	//     8
	data1 0xaf, 0xf7, 0x44, 0x8b	//     9
	data1 0xb1, 0x5b, 0xff, 0xff	//    10
	data1 0xbe, 0xd7, 0x5c, 0x89	//    11
	data1 0x22, 0x11, 0x90, 0x6b	//    12
	data1 0x93, 0x71, 0x98, 0xfd	//    13
	data1 0x8e, 0x43, 0x79, 0xa6	//    14
	data1 0x21, 0x08, 0xb4, 0x49	//    15
	data1 0x62, 0x25, 0x1e, 0xf6	//    16
	data1 0x40, 0xb3, 0x40, 0xc0	//    17
	data1 0x51, 0x5a, 0x5e, 0x26	//    18
	data1 0xaa, 0xc7, 0xb6, 0xe9	//    19
	data1 0x5d, 0x10, 0x2f, 0xd6	//    20
	data1 0x53, 0x14, 0x44, 0x02	//    21
	data1 0x81, 0xe6, 0xa1, 0xd8	//    22
	data1 0xc8, 0xfb, 0xd3, 0xe7	//    23
	data1 0xe6, 0xcd, 0xe1, 0x21	//    24
	data1 0xd6, 0x07, 0x37, 0xc3	//    25
	data1 0x87, 0x0d, 0xd5, 0xf4	//    26
	data1 0xed, 0x14, 0x5a, 0x45	//    27
	data1 0x05, 0xe9, 0xe3, 0xa9	//    28
	data1 0xf8, 0xa3, 0xef, 0xfc	//    29
	data1 0xd9, 0x02, 0x6f, 0x67	//    30
	data1 0x8a, 0x4c, 0x2a, 0x8d	//    31
	data1 0x42, 0x39, 0xfa, 0xff	//    32
	data1 0x81, 0xf6, 0x71, 0x87	//    33
	data1 0x22, 0x61, 0x9d, 0x6d	//    34
	data1 0x0c, 0x38, 0xe5, 0xfd	//    35
	data1 0x44, 0xea, 0xbe, 0xa4	//    36
	data1 0xa9, 0xcf, 0xde, 0x4b	//    37
	data1 0x60, 0x4b, 0xbb, 0xf6	//    38
	data1 0x70, 0xbc, 0xbf, 0xbe	//    39
	data1 0xc6, 0x7e, 0x9b, 0x28	//    40
	data1 0xfa, 0x27, 0xa1, 0xea	//    41
	data1 0x85, 0x30, 0xef, 0xd4	//    42
	data1 0x05, 0x1d, 0x88, 0x04	//    43
	data1 0x39, 0xd0, 0xd4, 0xd9	//    44
	data1 0xe5, 0x99, 0xdb, 0xe6	//    45
	data1 0xf8, 0x7c, 0xa2, 0x1f	//    46
	data1 0x65, 0x56, 0xac, 0xc4	//    47
	data1 0x44, 0x22, 0x29, 0xf4	//    48
	data1 0x97, 0xff, 0x2a, 0x43	//    49
	data1 0xa7, 0x23, 0x94, 0xab	//    50
	data1 0x39, 0xa0, 0x93, 0xfc	//    51
	data1 0xc3, 0x59, 0x5b, 0x65	//    52
	data1 0x92, 0xcc, 0x0c, 0x8f	//    53
	data1 0x7d, 0xf4, 0xef, 0xff	//    54
	data1 0xd1, 0x5d, 0x84, 0x85	//    55
	data1 0x4f, 0x7e, 0xa8, 0x6f	//    56
	data1 0xe0, 0xe6, 0x2c, 0xfe	//    57
	data1 0x14, 0x43, 0x01, 0xa3	//    58
	data1 0xa1, 0x11, 0x08, 0x4e	//    59
	data1 0x82, 0x7e, 0x53, 0xf7	//    60
	data1 0x35, 0xf2, 0x3a, 0xbd	//    61
	data1 0xbb, 0xd2, 0xd7, 0x2a	//    62
	data1 0x91, 0xd3, 0x86, 0xeb	//    63

.md5_tbl_host_order:			// OS data order, might as well
					// be little-endian.
	data4 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee	// 0
	data4 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501	// 4
	data4 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be	// 8
	data4 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821	// 12
	data4 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa	// 16
	data4 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8	// 20
	data4 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed	// 24
	data4 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a	// 28
	data4 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c	// 32
	data4 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70	// 36
	data4 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05	// 40
	data4 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665	// 44
	data4 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039	// 48
	data4 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1	// 52
	data4 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1	// 56
	data4 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391	// 60
.size	md5_constants#,64*4*2