Commit 8df5518b authored by Andy Polyakov's avatar Andy Polyakov
Browse files

MIPS assembly pack: add MIPS[32|64]R2 code.

parent 9b222748
Loading
Loading
Loading
Loading
+403 −61
Original line number Diff line number Diff line
@@ -20,6 +20,10 @@
# thing about this module is its endian neutrality, which means that
# it processes data without ever changing byte order...

# September 2012
#
# Add MIPS32R2 code.

######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@@ -52,6 +56,7 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
	$PTR_ADD="dadd";	# incidentally works even on n32
	$PTR_SUB="dsub";	# incidentally works even on n32
	$PTR_INS="dins";
	$REG_S="sd";
	$REG_L="ld";
	$PTR_SLL="dsll";	# incidentally works even on n32
@@ -59,6 +64,7 @@ if ($flavour =~ /64|n32/i) {
} else {
	$PTR_ADD="add";
	$PTR_SUB="sub";
	$PTR_INS="ins";
	$REG_S="sw";
	$REG_L="lw";
	$PTR_SLL="sll";
@@ -138,6 +144,16 @@ _mips_AES_encrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$t0,0($i0)		# Te1[s1>>16]
	_xtr	$i0,$s2,8-2
	lw	$t1,0($i1)		# Te1[s2>>16]
	_xtr	$i1,$s3,8-2
	lw	$t2,0($i2)		# Te1[s3>>16]
	_xtr	$i2,$s0,8-2
	lw	$t3,0($i3)		# Te1[s0>>16]
	_xtr	$i3,$s1,8-2
#else
	lwl	$t0,3($i0)		# Te1[s1>>16]
	lwl	$t1,3($i1)		# Te1[s2>>16]
	lwl	$t2,3($i2)		# Te1[s3>>16]
@@ -150,6 +166,29 @@ _mips_AES_encrypt:
	_xtr	$i2,$s0,8-2
	lwr	$t3,2($i3)		# Te1[s0>>16]
	_xtr	$i3,$s1,8-2
#endif
	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$t0,$t0,8
	rotr	$t1,$t1,8
	rotr	$t2,$t2,8
	rotr	$t3,$t3,8
# if defined(_MIPSEL)
	lw	$t4,0($i0)		# Te1[s1>>16]
	_xtr	$i0,$s3,0-2
	lw	$t5,0($i1)		# Te1[s2>>16]
	_xtr	$i1,$s0,0-2
	lw	$t6,0($i2)		# Te1[s3>>16]
	_xtr	$i2,$s1,0-2
	lw	$t7,0($i3)		# Te1[s0>>16]
	_xtr	$i3,$s2,0-2

	and	$i0,0x3fc
	and	$i1,0x3fc
@@ -159,6 +198,52 @@ _mips_AES_encrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lw	$t8,0($i0)		# Te1[s1>>16]
	$PTR_INS $i0,$s0,2,8
	lw	$t9,0($i1)		# Te1[s2>>16]
	$PTR_INS $i1,$s1,2,8
	lw	$t10,0($i2)		# Te1[s3>>16]
	$PTR_INS $i2,$s2,2,8
	lw	$t11,0($i3)		# Te1[s0>>16]
	$PTR_INS $i3,$s3,2,8
# else
	lw	$t4,0($i0)		# Te1[s1>>16]
	$PTR_INS $i0,$s3,2,8
	lw	$t5,0($i1)		# Te1[s2>>16]
	$PTR_INS $i1,$s0,2,8
	lw	$t6,0($i2)		# Te1[s3>>16]
	$PTR_INS $i2,$s1,2,8
	lw	$t7,0($i3)		# Te1[s0>>16]
	$PTR_INS $i3,$s2,2,8

	lw	$t8,0($i0)		# Te1[s1>>16]
	_xtr	$i0,$s0,24-2
	lw	$t9,0($i1)		# Te1[s2>>16]
	_xtr	$i1,$s1,24-2
	lw	$t10,0($i2)		# Te1[s3>>16]
	_xtr	$i2,$s2,24-2
	lw	$t11,0($i3)		# Te1[s0>>16]
	_xtr	$i3,$s3,24-2

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
# endif
	rotr	$t4,$t4,16
	rotr	$t5,$t5,16
	rotr	$t6,$t6,16
	rotr	$t7,$t7,16

	rotr	$t8,$t8,24
	rotr	$t9,$t9,24
	rotr	$t10,$t10,24
	rotr	$t11,$t11,24
#else
	lwl	$t4,2($i0)		# Te2[s2>>8]
	lwl	$t5,2($i1)		# Te2[s3>>8]
	lwl	$t6,2($i2)		# Te2[s0>>8]
@@ -201,6 +286,7 @@ _mips_AES_encrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#endif
	xor	$t0,$t4
	lw	$t4,0($i0)		# Te0[s0>>24]
	xor	$t1,$t5
@@ -263,6 +349,89 @@ _mips_AES_encrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
	lbu	$t4,2($i0)		# Te4[s2>>8]
	$PTR_INS $i0,$s0,2,8
	lbu	$t5,2($i1)		# Te4[s3>>8]
	$PTR_INS $i1,$s1,2,8
	lbu	$t6,2($i2)		# Te4[s0>>8]
	$PTR_INS $i2,$s2,2,8
	lbu	$t7,2($i3)		# Te4[s1>>8]
	$PTR_INS $i3,$s3,2,8

	lbu	$t8,2($i0)		# Te4[s0>>24]
	_xtr	$i0,$s3,0-2
	lbu	$t9,2($i1)		# Te4[s1>>24]
	_xtr	$i1,$s0,0-2
	lbu	$t10,2($i2)		# Te4[s2>>24]
	_xtr	$i2,$s1,0-2
	lbu	$t11,2($i3)		# Te4[s3>>24]
	_xtr	$i3,$s2,0-2

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
# else
	lbu	$t4,2($i0)		# Te4[s2>>8]
	_xtr	$i0,$s0,24-2
	lbu	$t5,2($i1)		# Te4[s3>>8]
	_xtr	$i1,$s1,24-2
	lbu	$t6,2($i2)		# Te4[s0>>8]
	_xtr	$i2,$s2,24-2
	lbu	$t7,2($i3)		# Te4[s1>>8]
	_xtr	$i3,$s3,24-2

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lbu	$t8,2($i0)		# Te4[s0>>24]
	$PTR_INS $i0,$s3,2,8
	lbu	$t9,2($i1)		# Te4[s1>>24]
	$PTR_INS $i1,$s0,2,8
	lbu	$t10,2($i2)		# Te4[s2>>24]
	$PTR_INS $i2,$s1,2,8
	lbu	$t11,2($i3)		# Te4[s3>>24]
	$PTR_INS $i3,$s2,2,8
# endif
	_ins	$t0,16
	_ins	$t1,16
	_ins	$t2,16
	_ins	$t3,16

	_ins2	$t0,$t4,8
	lbu	$t4,2($i0)		# Te4[s3]
	_ins2	$t1,$t5,8
	lbu	$t5,2($i1)		# Te4[s0]
	_ins2	$t2,$t6,8
	lbu	$t6,2($i2)		# Te4[s1]
	_ins2	$t3,$t7,8
	lbu	$t7,2($i3)		# Te4[s2]

	_ins2	$t0,$t8,24
	lw	$s0,0($key0)
	_ins2	$t1,$t9,24
	lw	$s1,4($key0)
	_ins2	$t2,$t10,24
	lw	$s2,8($key0)
	_ins2	$t3,$t11,24
	lw	$s3,12($key0)

	_ins2	$t0,$t4,0
	_ins2	$t1,$t5,0
	_ins2	$t2,$t6,0
	_ins2	$t3,$t7,0
#else
	lbu	$t4,2($i0)		# Te4[s2>>8]
	_xtr	$i0,$s0,24-2
	lbu	$t5,2($i1)		# Te4[s3>>8]
@@ -340,7 +509,7 @@ _mips_AES_encrypt:
	xor	$t1,$t5
	xor	$t2,$t6
	xor	$t3,$t7

#endif
	xor	$s0,$t0
	xor	$s1,$t1
	xor	$s2,$t2
@@ -465,6 +634,16 @@ _mips_AES_decrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$t0,0($i0)		# Td1[s3>>16]
	_xtr	$i0,$s2,8-2
	lw	$t1,0($i1)		# Td1[s0>>16]
	_xtr	$i1,$s3,8-2
	lw	$t2,0($i2)		# Td1[s1>>16]
	_xtr	$i2,$s0,8-2
	lw	$t3,0($i3)		# Td1[s2>>16]
	_xtr	$i3,$s1,8-2
#else
	lwl	$t0,3($i0)		# Td1[s3>>16]
	lwl	$t1,3($i1)		# Td1[s0>>16]
	lwl	$t2,3($i2)		# Td1[s1>>16]
@@ -477,6 +656,7 @@ _mips_AES_decrypt:
	_xtr	$i2,$s0,8-2
	lwr	$t3,2($i3)		# Td1[s2>>16]
	_xtr	$i3,$s1,8-2
#endif

	and	$i0,0x3fc
	and	$i1,0x3fc
@@ -486,6 +666,75 @@ _mips_AES_decrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$t0,$t0,8
	rotr	$t1,$t1,8
	rotr	$t2,$t2,8
	rotr	$t3,$t3,8
# if defined(_MIPSEL)
	lw	$t4,0($i0)		# Td2[s2>>8]
	_xtr	$i0,$s1,0-2
	lw	$t5,0($i1)		# Td2[s3>>8]
	_xtr	$i1,$s2,0-2
	lw	$t6,0($i2)		# Td2[s0>>8]
	_xtr	$i2,$s3,0-2
	lw	$t7,0($i3)		# Td2[s1>>8]
	_xtr	$i3,$s0,0-2

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lw	$t8,0($i0)		# Td3[s1]
	$PTR_INS $i0,$s0,2,8
	lw	$t9,0($i1)		# Td3[s2]
	$PTR_INS $i1,$s1,2,8
	lw	$t10,0($i2)		# Td3[s3]
	$PTR_INS $i2,$s2,2,8
	lw	$t11,0($i3)		# Td3[s0]
	$PTR_INS $i3,$s3,2,8
#else
	lw	$t4,0($i0)		# Td2[s2>>8]
	$PTR_INS $i0,$s1,2,8
	lw	$t5,0($i1)		# Td2[s3>>8]
	$PTR_INS $i1,$s2,2,8
	lw	$t6,0($i2)		# Td2[s0>>8]
	$PTR_INS $i2,$s3,2,8
	lw	$t7,0($i3)		# Td2[s1>>8]
	$PTR_INS $i3,$s0,2,8

	lw	$t8,0($i0)		# Td3[s1]
	_xtr	$i0,$s0,24-2
	lw	$t9,0($i1)		# Td3[s2]
	_xtr	$i1,$s1,24-2
	lw	$t10,0($i2)		# Td3[s3]
	_xtr	$i2,$s2,24-2
	lw	$t11,0($i3)		# Td3[s0]
	_xtr	$i3,$s3,24-2

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#endif
	rotr	$t4,$t4,16
	rotr	$t5,$t5,16
	rotr	$t6,$t6,16
	rotr	$t7,$t7,16

	rotr	$t8,$t8,24
	rotr	$t9,$t9,24
	rotr	$t10,$t10,24
	rotr	$t11,$t11,24
#else
	lwl	$t4,2($i0)		# Td2[s2>>8]
	lwl	$t5,2($i1)		# Td2[s3>>8]
	lwl	$t6,2($i2)		# Td2[s0>>8]
@@ -528,6 +777,7 @@ _mips_AES_decrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#endif

	xor	$t0,$t4
	lw	$t4,0($i0)		# Td0[s0>>24]
@@ -601,6 +851,81 @@ _mips_AES_decrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
	lbu	$t4,1024($i0)		# Td4[s2>>8]
	$PTR_INS $i0,$s0,0,8
	lbu	$t5,1024($i1)		# Td4[s3>>8]
	$PTR_INS $i1,$s1,0,8
	lbu	$t6,1024($i2)		# Td4[s0>>8]
	$PTR_INS $i2,$s2,0,8
	lbu	$t7,1024($i3)		# Td4[s1>>8]
	$PTR_INS $i3,$s3,0,8

	lbu	$t8,1024($i0)		# Td4[s0>>24]
	_xtr	$i0,$s1,0
	lbu	$t9,1024($i1)		# Td4[s1>>24]
	_xtr	$i1,$s2,0
	lbu	$t10,1024($i2)		# Td4[s2>>24]
	_xtr	$i2,$s3,0
	lbu	$t11,1024($i3)		# Td4[s3>>24]
	_xtr	$i3,$s0,0

	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
# else
	lbu	$t4,1024($i0)		# Td4[s2>>8]
	_xtr	$i0,$s0,24
	lbu	$t5,1024($i1)		# Td4[s3>>8]
	_xtr	$i1,$s1,24
	lbu	$t6,1024($i2)		# Td4[s0>>8]
	_xtr	$i2,$s2,24
	lbu	$t7,1024($i3)		# Td4[s1>>8]
	_xtr	$i3,$s3,24

	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lbu	$t8,1024($i0)		# Td4[s0>>24]
	$PTR_INS $i0,$s1,0,8
	lbu	$t9,1024($i1)		# Td4[s1>>24]
	$PTR_INS $i1,$s2,0,8
	lbu	$t10,1024($i2)		# Td4[s2>>24]
	$PTR_INS $i2,$s3,0,8
	lbu	$t11,1024($i3)		# Td4[s3>>24]
	$PTR_INS $i3,$s0,0,8
# endif
	_ins	$t0,16
	_ins	$t1,16
	_ins	$t2,16
	_ins	$t3,16

	_ins2	$t0,$t4,8
	lbu	$t4,1024($i0)		# Td4[s1]
	_ins2	$t1,$t5,8
	lbu	$t5,1024($i1)		# Td4[s2]
	_ins2	$t2,$t6,8
	lbu	$t6,1024($i2)		# Td4[s3]
	_ins2	$t3,$t7,8
	lbu	$t7,1024($i3)		# Td4[s0]

	_ins2	$t0,$t8,24
	lw	$s0,0($key0)
	_ins2	$t1,$t9,24
	lw	$s1,4($key0)
	_ins2	$t2,$t10,24
	lw	$s2,8($key0)
	_ins2	$t3,$t11,24
	lw	$s3,12($key0)

	_ins2	$t0,$t4,0
	_ins2	$t1,$t5,0
	_ins2	$t2,$t6,0
	_ins2	$t3,$t7,0
#else
	lbu	$t4,1024($i0)		# Td4[s2>>8]
	_xtr	$i0,$s0,24
	lbu	$t5,1024($i1)		# Td4[s3>>8]
@@ -670,6 +995,7 @@ _mips_AES_decrypt:
	xor	$t1,$t5
	xor	$t2,$t6
	xor	$t3,$t7
#endif

	xor	$s0,$t0
	xor	$s1,$t1
@@ -782,7 +1108,7 @@ _mips_AES_set_encrypt_key:
	beqz	$inp,.Lekey_done
	li	$t0,-1
	beqz	$key,.Lekey_done
	$PTR_ADD $rcon,$Tbl,1024+256
	$PTR_ADD $rcon,$Tbl,256

	.set	reorder
	lwl	$rk0,0+$MSB($inp)	# load 128 bits
@@ -834,10 +1160,10 @@ _mips_AES_set_encrypt_key:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lbu	$i0,1024($i0)
	lbu	$i1,1024($i1)
	lbu	$i2,1024($i2)
	lbu	$i3,1024($i3)
	lbu	$i0,0($i0)
	lbu	$i1,0($i1)
	lbu	$i2,0($i2)
	lbu	$i3,0($i3)

	sw	$rk0,0($key)
	sw	$rk1,4($key)
@@ -889,10 +1215,10 @@ _mips_AES_set_encrypt_key:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lbu	$i0,1024($i0)
	lbu	$i1,1024($i1)
	lbu	$i2,1024($i2)
	lbu	$i3,1024($i3)
	lbu	$i0,0($i0)
	lbu	$i1,0($i1)
	lbu	$i2,0($i2)
	lbu	$i3,0($i3)

	sw	$rk0,0($key)
	sw	$rk1,4($key)
@@ -948,10 +1274,10 @@ _mips_AES_set_encrypt_key:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lbu	$i0,1024($i0)
	lbu	$i1,1024($i1)
	lbu	$i2,1024($i2)
	lbu	$i3,1024($i3)
	lbu	$i0,0($i0)
	lbu	$i1,0($i1)
	lbu	$i2,0($i2)
	lbu	$i3,0($i3)

	sw	$rk0,0($key)
	sw	$rk1,4($key)
@@ -990,10 +1316,10 @@ _mips_AES_set_encrypt_key:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lbu	$i0,1024($i0)
	lbu	$i1,1024($i1)
	lbu	$i2,1024($i2)
	lbu	$i3,1024($i3)
	lbu	$i0,0($i0)
	lbu	$i1,0($i1)
	lbu	$i2,0($i2)
	lbu	$i3,0($i3)
	sll	$i0,24
	sll	$i1,16
	sll	$i2,8
@@ -1055,7 +1381,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
___
$code.=<<___;
	.set	reorder
	la	$Tbl,AES_Te		# PIC-ified 'load address'
	la	$Tbl,AES_Te4		# PIC-ified 'load address'

	bal	_mips_AES_set_encrypt_key

@@ -1110,7 +1436,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
___
$code.=<<___;
	.set	reorder
	la	$Tbl,AES_Te		# PIC-ified 'load address'
	la	$Tbl,AES_Te4		# PIC-ified 'load address'

	bal	_mips_AES_set_encrypt_key

@@ -1181,6 +1507,16 @@ $code.=<<___;
	xor	$tpb,$tp9,$tp2
	xor	$tpd,$tp9,$tp4

#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$tp1,$tpd,16
	 xor	$tpe,$tp2
	rotr	$tp2,$tp9,8
	xor	$tpe,$tp1
	rotr	$tp4,$tpb,24
	xor	$tpe,$tp2
	lw	$tp1,4($key)		# modulo-scheduled
	xor	$tpe,$tp4
#else
	_ror	$tp1,$tpd,16
	 xor	$tpe,$tp2
	_ror	$tp2,$tpd,-16
@@ -1195,6 +1531,7 @@ $code.=<<___;
	xor	$tpe,$tp1
	lw	$tp1,4($key)		# modulo-scheduled
	xor	$tpe,$tp2
#endif
	sub	$cnt,1
	sw	$tpe,0($key)
	$PTR_ADD $key,4
@@ -1225,7 +1562,7 @@ ___
# Tables are kept in endian-neutral manner
$code.=<<___;
.rdata
.align	6
.align	10
AES_Te:
.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
@@ -1356,46 +1693,6 @@ AES_Te:
.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a

.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16

.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00

.align	6
AES_Td:
.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
@@ -1558,6 +1855,46 @@ AES_Td:
.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d

AES_Te4:
.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16

.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
___

foreach (split("\n",$code)) {
@@ -1574,6 +1911,9 @@ foreach (split("\n",$code)) {
	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
					:		eval("24-$3"))/e or
	    s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
		sprintf("ins\t$1,$2,%d,8",$big_endian ?	eval($3)
					:		eval("24-$3"))/e or
	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
					:		eval("$3*-1"))/e or
@@ -1596,6 +1936,8 @@ foreach (split("\n",$code)) {
		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
	}

	s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);

	print $_,"\n";
}

+92 −0
Original line number Diff line number Diff line
@@ -15,6 +15,10 @@
# compatible subroutine. There is room for minor optimization on
# little-endian platforms...

# September 2012.
#
# Add MIPS32r2 code.

######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@@ -95,6 +99,10 @@ sub BODY_00_14 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___	if (!$big_endian);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	wsbh	@X[$i],@X[$i]	# byte swap($i)
	rotr	@X[$i],@X[$i],16
#else
	srl	$t0,@X[$i],24	# byte swap($i)
	srl	$t1,@X[$i],8
	andi	$t2,@X[$i],0xFF00
@@ -104,8 +112,22 @@ $code.=<<___ if (!$big_endian);
	or	@X[$i],$t0
	or	$t1,$t2
	or	@X[$i],$t1
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	addu	$e,$K		# $i
	xor	$t0,$c,$d
	rotr	$t1,$a,27
	 lwl	@X[$j],$j*4+$MSB($inp)
	and	$t0,$b
	addu	$e,$t1
	 lwr	@X[$j],$j*4+$LSB($inp)
	xor	$t0,$d
	addu	$e,@X[$i]
	rotr	$b,$b,2
	addu	$e,$t0
#else
	 lwl	@X[$j],$j*4+$MSB($inp)
	sll	$t0,$a,5	# $i
	addu	$e,$K
@@ -121,6 +143,7 @@ $code.=<<___;
	addu	$e,@X[$i]
	or	$b,$t2
	addu	$e,$t0
#endif
___
}

@@ -129,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;

$code.=<<___	if (!$big_endian && $i==15);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	wsbh	@X[$i],@X[$i]	# byte swap($i)
	rotr	@X[$i],@X[$i],16
#else
	srl	$t0,@X[$i],24	# byte swap($i)
	srl	$t1,@X[$i],8
	andi	$t2,@X[$i],0xFF00
@@ -138,8 +165,24 @@ $code.=<<___ if (!$big_endian && $i==15);
	or	@X[$i],$t0
	or	@X[$i],$t1
	or	@X[$i],$t2
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	addu	$e,$K		# $i
	 xor	@X[$j%16],@X[($j+2)%16]
	xor	$t0,$c,$d
	rotr	$t1,$a,27
	 xor	@X[$j%16],@X[($j+8)%16]
	and	$t0,$b
	addu	$e,$t1
	 xor	@X[$j%16],@X[($j+13)%16]
	xor	$t0,$d
	addu	$e,@X[$i%16]
	 rotr	@X[$j%16],@X[$j%16],31
	rotr	$b,$b,2
	addu	$e,$t0
#else
	 xor	@X[$j%16],@X[($j+2)%16]
	sll	$t0,$a,5	# $i
	addu	$e,$K
@@ -159,6 +202,7 @@ $code.=<<___;
	addu	$e,@X[$i%16]
	or	$b,$t2
	addu	$e,$t0
#endif
___
}

@@ -166,6 +210,20 @@ sub BODY_20_39 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	 xor	@X[$j%16],@X[($j+2)%16]
	addu	$e,$K		# $i
	rotr	$t1,$a,27
	 xor	@X[$j%16],@X[($j+8)%16]
	xor	$t0,$c,$d
	addu	$e,$t1
	 xor	@X[$j%16],@X[($j+13)%16]
	xor	$t0,$b
	addu	$e,@X[$i%16]
	 rotr	@X[$j%16],@X[$j%16],31
	rotr	$b,$b,2
	addu	$e,$t0
#else
	 xor	@X[$j%16],@X[($j+2)%16]
	sll	$t0,$a,5	# $i
	addu	$e,$K
@@ -184,8 +242,24 @@ $code.=<<___ if ($i<79);
	 or	@X[$j%16],$t1
	or	$b,$t2
	addu	$e,$t0
#endif
___
$code.=<<___ if ($i==79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	 lw	@X[0],0($ctx)
	addu	$e,$K		# $i
	 lw	@X[1],4($ctx)
	rotr	$t1,$a,27
	 lw	@X[2],8($ctx)
	xor	$t0,$c,$d
	addu	$e,$t1
	 lw	@X[3],12($ctx)
	xor	$t0,$b
	addu	$e,@X[$i%16]
	 lw	@X[4],16($ctx)
	rotr	$b,$b,2
	addu	$e,$t0
#else
	 lw	@X[0],0($ctx)
	sll	$t0,$a,5	# $i
	addu	$e,$K
@@ -203,6 +277,7 @@ $code.=<<___ if ($i==79);
	addu	$e,@X[$i%16]
	or	$b,$t2
	addu	$e,$t0
#endif
___
}

@@ -210,6 +285,22 @@ sub BODY_40_59 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	addu	$e,$K		# $i
	and	$t0,$c,$d
	 xor	@X[$j%16],@X[($j+2)%16]
	rotr	$t1,$a,27
	addu	$e,$t0
	 xor	@X[$j%16],@X[($j+8)%16]
	xor	$t0,$c,$d
	addu	$e,$t1
	 xor	@X[$j%16],@X[($j+13)%16]
	and	$t0,$b
	addu	$e,@X[$i%16]
	 rotr	@X[$j%16],@X[$j%16],31
	rotr	$b,$b,2
	addu	$e,$t0
#else
	 xor	@X[$j%16],@X[($j+2)%16]
	sll	$t0,$a,5	# $i
	addu	$e,$K
@@ -230,6 +321,7 @@ $code.=<<___ if ($i<79);
	addu	$e,@X[$i%16]
	or	$b,$t2
	addu	$e,$t0
#endif
___
}

+61 −10
Original line number Diff line number Diff line
#!/usr/bin/env perl

# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -17,6 +17,10 @@
# ~17%, but it comes for free, because it's same instruction sequence.
# Improvement coefficients are for aligned input.

# September 2012.
#
# Add MIPS[32|64]r2 code.

######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@@ -83,6 +87,7 @@ if ($output =~ /512/) {
	$SLL="dsll";		# shift left logical
	$SRL="dsrl";		# shift right logical
	$ADDU="daddu";
	$ROTR="drotr";
	@Sigma0=(28,34,39);
	@Sigma1=(14,18,41);
	@sigma0=( 7, 1, 8);	# right shift first
@@ -97,6 +102,7 @@ if ($output =~ /512/) {
	$SLL="sll";		# shift left logical
	$SRL="srl";		# shift right logical
	$ADDU="addu";
	$ROTR="rotr";
	@Sigma0=( 2,13,22);
	@Sigma1=( 6,11,25);
	@sigma0=( 3, 7,18);	# right shift first
@@ -124,6 +130,10 @@ $code.=<<___ if ($i<15);
	${LD}r	@X[1],`($i+1)*$SZ+$LSB`($inp)
___
$code.=<<___	if (!$big_endian && $i<16 && $SZ==4);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	wsbh	@X[0],@X[0]		# byte swap($i)
	rotr	@X[0],@X[0],16
#else
	srl	$tmp0,@X[0],24		# byte swap($i)
	srl	$tmp1,@X[0],8
	andi	$tmp2,@X[0],0xFF00
@@ -133,8 +143,13 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
	or	@X[0],$tmp0
	or	$tmp1,$tmp2
	or	@X[0],$tmp1
#endif
___
$code.=<<___	if (!$big_endian && $i<16 && $SZ==8);
#if defined(_MIPS_ARCH_MIPS64R2)
	dsbh	@X[0],@X[0]		# byte swap($i)
	dshd	@X[0],@X[0]
#else
	ori	$tmp0,$zero,0xFF
	dsll	$tmp2,$tmp0,32
	or	$tmp0,$tmp2		# 0x000000FF000000FF
@@ -153,8 +168,31 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
	dsrl	$tmp1,@X[0],32
	dsll	@X[0],32
	or	@X[0],$tmp1
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	xor	$tmp2,$f,$g			# $i
	$ROTR	$tmp0,$e,@Sigma1[0]
	$ADDU	$T1,$X[0],$h
	$ROTR	$tmp1,$e,@Sigma1[1]
	and	$tmp2,$e
	$ROTR	$h,$e,@Sigma1[2]
	xor	$tmp0,$tmp1
	$ROTR	$tmp1,$a,@Sigma0[0]
	xor	$tmp2,$g			# Ch(e,f,g)
	xor	$tmp0,$h			# Sigma1(e)

	$ROTR	$h,$a,@Sigma0[1]
	$ADDU	$T1,$tmp2
	$LD	$tmp2,`$i*$SZ`($Ktbl)		# K[$i]
	xor	$h,$tmp1
	$ROTR	$tmp1,$a,@Sigma0[2]
	$ADDU	$T1,$tmp0
	and	$tmp0,$b,$c
	xor	$h,$tmp1			# Sigma0(a)
	xor	$tmp1,$b,$c
#else
	$ADDU	$T1,$X[0],$h			# $i
	$SRL	$h,$e,@Sigma1[0]
	xor	$tmp2,$f,$g
@@ -184,16 +222,15 @@ $code.=<<___;
	xor	$h,$tmp1
	$SLL	$tmp1,$a,`$SZ*8-@Sigma0[0]`
	xor	$h,$tmp0
	$ST	@X[0],`($i%16)*$SZ`($sp)	# offload to ring buffer
	and	$tmp0,$b,$c
	xor	$h,$tmp1			# Sigma0(a)

	or	$tmp0,$a,$b
	and	$tmp1,$a,$b
	and	$tmp0,$c
	or	$tmp1,$tmp0			# Maj(a,b,c)
	xor	$tmp1,$b,$c
#endif
	$ST	@X[0],`($i%16)*$SZ`($sp)	# offload to ring buffer
	$ADDU	$h,$tmp0
	and	$tmp1,$a
	$ADDU	$T1,$tmp2			# +=K[$i]
	$ADDU	$h,$tmp1

	$ADDU	$h,$tmp1			# +=Maj(a,b,c)
	$ADDU	$d,$T1
	$ADDU	$h,$T1
___
@@ -207,6 +244,20 @@ my $i=@_[0];
my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);

$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	$SRL	$tmp2,@X[1],@sigma0[0]		# Xupdate($i)
	$ROTR	$tmp0,@X[1],@sigma0[1]
	$ADDU	@X[0],@X[9]			# +=X[i+9]
	xor	$tmp2,$tmp0
	$ROTR	$tmp0,@X[1],@sigma0[2]

	$SRL	$tmp3,@X[14],@sigma1[0]
	$ROTR	$tmp1,@X[14],@sigma1[1]
	xor	$tmp2,$tmp0			# sigma0(X[i+1])
	$ROTR	$tmp0,@X[14],@sigma1[2]
	xor	$tmp3,$tmp1
	$ADDU	@X[0],$tmp2
#else
	$SRL	$tmp2,@X[1],@sigma0[0]		# Xupdate($i)
	$ADDU	@X[0],@X[9]			# +=X[i+9]
	$SLL	$tmp1,@X[1],`$SZ*8-@sigma0[2]`
@@ -227,7 +278,7 @@ $code.=<<___;
	xor	$tmp3,$tmp0
	$SRL	$tmp0,@X[14],@sigma1[2]
	xor	$tmp3,$tmp1

#endif
	xor	$tmp3,$tmp0			# sigma1(X[i+14])
	$ADDU	@X[0],$tmp3
___