Commit 947716c1 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

MIPS assembly pack: adapt it for MIPS[32|64]R6.



MIPS[32|64]R6 is binary and source incompatible with previous MIPS ISA
specifications. Fortunately it's still possible to resolve differences
in source code with standard pre-processor and switching to trap-free
version of addition and subtraction instructions.

Reviewed-by: default avatarRichard Levitte <levitte@openssl.org>
parent a4324912
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -216,7 +216,7 @@
    },
    mips32_asm => {
	template	=> 1,
	bn_asm_src      => "bn-mips.s mips-mont.s",
	bn_asm_src      => "bn-mips.S mips-mont.S",
	aes_asm_src     => "aes_cbc.c aes-mips.S",
	sha1_asm_src    => "sha1-mips.S sha256-mips.S",
    },
+65 −22
Original line number Diff line number Diff line
@@ -65,8 +65,8 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64

if ($flavour =~ /64|n32/i) {
	$PTR_LA="dla";
	$PTR_ADD="dadd";	# incidentally works even on n32
	$PTR_SUB="dsub";	# incidentally works even on n32
	$PTR_ADD="daddu";	# incidentally works even on n32
	$PTR_SUB="dsubu";	# incidentally works even on n32
	$PTR_INS="dins";
	$REG_S="sd";
	$REG_L="ld";
@@ -74,8 +74,8 @@ if ($flavour =~ /64|n32/i) {
	$SZREG=8;
} else {
	$PTR_LA="la";
	$PTR_ADD="add";
	$PTR_SUB="sub";
	$PTR_ADD="addu";
	$PTR_SUB="subu";
	$PTR_INS="ins";
	$REG_S="sw";
	$REG_L="lw";
@@ -102,15 +102,13 @@ open STDOUT,">$output";
my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian

$code.=<<___;
#include "mips_arch.h"

.text
#ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h>
#endif

#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
#define _MIPS_ARCH_MIPS32R2
#endif

#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option	pic2
#endif
@@ -146,7 +144,7 @@ _mips_AES_encrypt:
	xor	$s2,$t2
	xor	$s3,$t3

	sub	$cnt,1
	subu	$cnt,1
#if defined(__mips_smartmips)
	ext	$i0,$s1,16,8
.Loop_enc:
@@ -218,7 +216,7 @@ _mips_AES_encrypt:
	xor	$t2,$t6
	xor	$t3,$t7

	sub	$cnt,1
	subu	$cnt,1
	$PTR_ADD $key0,16
	xor	$s0,$t0
	xor	$s1,$t1
@@ -409,7 +407,7 @@ _mips_AES_encrypt:
	xor	$t2,$t6
	xor	$t3,$t7

	sub	$cnt,1
	subu	$cnt,1
	$PTR_ADD $key0,16
	xor	$s0,$t0
	xor	$s1,$t1
@@ -657,6 +655,12 @@ $code.=<<___;
	.set	reorder
	$PTR_LA	$Tbl,AES_Te		# PIC-ified 'load address'

#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
	lw	$s0,0($inp)
	lw	$s1,4($inp)
	lw	$s2,8($inp)
	lw	$s3,12($inp)
#else
	lwl	$s0,0+$MSB($inp)
	lwl	$s1,4+$MSB($inp)
	lwl	$s2,8+$MSB($inp)
@@ -665,9 +669,16 @@ $code.=<<___;
	lwr	$s1,4+$LSB($inp)
	lwr	$s2,8+$LSB($inp)
	lwr	$s3,12+$LSB($inp)
#endif

	bal	_mips_AES_encrypt

#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
	sw	$s0,0($out)
	sw	$s1,4($out)
	sw	$s2,8($out)
	sw	$s3,12($out)
#else
	swr	$s0,0+$LSB($out)
	swr	$s1,4+$LSB($out)
	swr	$s2,8+$LSB($out)
@@ -676,6 +687,7 @@ $code.=<<___;
	swl	$s1,4+$MSB($out)
	swl	$s2,8+$MSB($out)
	swl	$s3,12+$MSB($out)
#endif

	.set	noreorder
	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
@@ -720,7 +732,7 @@ _mips_AES_decrypt:
	xor	$s2,$t2
	xor	$s3,$t3

	sub	$cnt,1
	subu	$cnt,1
#if defined(__mips_smartmips)
	ext	$i0,$s3,16,8
.Loop_dec:
@@ -792,7 +804,7 @@ _mips_AES_decrypt:
	xor	$t2,$t6
	xor	$t3,$t7

	sub	$cnt,1
	subu	$cnt,1
	$PTR_ADD $key0,16
	xor	$s0,$t0
	xor	$s1,$t1
@@ -985,7 +997,7 @@ _mips_AES_decrypt:
	xor	$t2,$t6
	xor	$t3,$t7

	sub	$cnt,1
	subu	$cnt,1
	$PTR_ADD $key0,16
	xor	$s0,$t0
	xor	$s1,$t1
@@ -1228,6 +1240,12 @@ $code.=<<___;
	.set	reorder
	$PTR_LA	$Tbl,AES_Td		# PIC-ified 'load address'

#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
	lw	$s0,0($inp)
	lw	$s1,4($inp)
	lw	$s2,8($inp)
	lw	$s3,12($inp)
#else
	lwl	$s0,0+$MSB($inp)
	lwl	$s1,4+$MSB($inp)
	lwl	$s2,8+$MSB($inp)
@@ -1236,9 +1254,16 @@ $code.=<<___;
	lwr	$s1,4+$LSB($inp)
	lwr	$s2,8+$LSB($inp)
	lwr	$s3,12+$LSB($inp)
#endif

	bal	_mips_AES_decrypt

#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
	sw	$s0,0($out)
	sw	$s1,4($out)
	sw	$s2,8($out)
	sw	$s3,12($out)
#else
	swr	$s0,0+$LSB($out)
	swr	$s1,4+$LSB($out)
	swr	$s2,8+$LSB($out)
@@ -1247,6 +1272,7 @@ $code.=<<___;
	swl	$s1,4+$MSB($out)
	swl	$s2,8+$MSB($out)
	swl	$s3,12+$MSB($out)
#endif

	.set	noreorder
	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
@@ -1295,35 +1321,52 @@ _mips_AES_set_encrypt_key:
	$PTR_ADD $rcon,$Tbl,256

	.set	reorder
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
	lw	$rk0,0($inp)		# load 128 bits
	lw	$rk1,4($inp)
	lw	$rk2,8($inp)
	lw	$rk3,12($inp)
#else
	lwl	$rk0,0+$MSB($inp)	# load 128 bits
	lwl	$rk1,4+$MSB($inp)
	lwl	$rk2,8+$MSB($inp)
	lwl	$rk3,12+$MSB($inp)
	li	$at,128
	lwr	$rk0,0+$LSB($inp)
	lwr	$rk1,4+$LSB($inp)
	lwr	$rk2,8+$LSB($inp)
	lwr	$rk3,12+$LSB($inp)
#endif
	li	$at,128
	.set	noreorder
	beq	$bits,$at,.L128bits
	li	$cnt,10

	.set	reorder
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
	lw	$rk4,16($inp)		# load 192 bits
	lw	$rk5,20($inp)
#else
	lwl	$rk4,16+$MSB($inp)	# load 192 bits
	lwl	$rk5,20+$MSB($inp)
	li	$at,192
	lwr	$rk4,16+$LSB($inp)
	lwr	$rk5,20+$LSB($inp)
#endif
	li	$at,192
	.set	noreorder
	beq	$bits,$at,.L192bits
	li	$cnt,8

	.set	reorder
#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
	lw	$rk6,24($inp)		# load 256 bits
	lw	$rk7,28($inp)
#else
	lwl	$rk6,24+$MSB($inp)	# load 256 bits
	lwl	$rk7,28+$MSB($inp)
	li	$at,256
	lwr	$rk6,24+$LSB($inp)
	lwr	$rk7,28+$LSB($inp)
#endif
	li	$at,256
	.set	noreorder
	beq	$bits,$at,.L256bits
	li	$cnt,7
@@ -1353,7 +1396,7 @@ _mips_AES_set_encrypt_key:
	sw	$rk1,4($key)
	sw	$rk2,8($key)
	sw	$rk3,12($key)
	sub	$cnt,1
	subu	$cnt,1
	$PTR_ADD $key,16

	_bias	$i0,24
@@ -1410,7 +1453,7 @@ _mips_AES_set_encrypt_key:
	sw	$rk3,12($key)
	sw	$rk4,16($key)
	sw	$rk5,20($key)
	sub	$cnt,1
	subu	$cnt,1
	$PTR_ADD $key,24

	_bias	$i0,24
@@ -1471,7 +1514,7 @@ _mips_AES_set_encrypt_key:
	sw	$rk5,20($key)
	sw	$rk6,24($key)
	sw	$rk7,28($key)
	sub	$cnt,1
	subu	$cnt,1

	_bias	$i0,24
	_bias	$i1,16
@@ -1653,7 +1696,7 @@ $code.=<<___;

	lw	$tp1,16($key)		# modulo-scheduled
	lui	$x80808080,0x8080
	sub	$cnt,1
	subu	$cnt,1
	or	$x80808080,0x8080
	sll	$cnt,2
	$PTR_ADD $key,16
@@ -1716,7 +1759,7 @@ $code.=<<___;
	lw	$tp1,4($key)		# modulo-scheduled
	xor	$tpe,$tp2
#endif
	sub	$cnt,1
	subu	$cnt,1
	sw	$tpe,0($key)
	$PTR_ADD $key,4
	bnez	$cnt,.Lmix
+1 −0
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@ GENERATE[aesp8-ppc.s]=asm/aesp8-ppc.pl $(PERLASM_SCHEME)
GENERATE[aes-parisc.s]=asm/aes-parisc.pl $(PERLASM_SCHEME)

GENERATE[aes-mips.S]=asm/aes-mips.pl $(PERLASM_SCHEME)
INCLUDE[aes-mips.o]=..

GENERATE[aesv8-armx.S]=asm/aesv8-armx.pl $(PERLASM_SCHEME)
INCLUDE[aesv8-armx.o]=..
+52 −50
Original line number Diff line number Diff line
@@ -56,14 +56,14 @@
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64

if ($flavour =~ /64|n32/i) {
	$PTR_ADD="dadd";	# incidentally works even on n32
	$PTR_SUB="dsub";	# incidentally works even on n32
	$PTR_ADD="daddu";	# incidentally works even on n32
	$PTR_SUB="dsubu";	# incidentally works even on n32
	$REG_S="sd";
	$REG_L="ld";
	$SZREG=8;
} else {
	$PTR_ADD="add";
	$PTR_SUB="sub";
	$PTR_ADD="addu";
	$PTR_SUB="subu";
	$REG_S="sw";
	$REG_L="lw";
	$SZREG=4;
@@ -121,6 +121,8 @@ $m1=$s11;
$FRAMESIZE=14;

$code=<<___;
#include "mips_arch.h"

.text

.set	noat
@@ -183,27 +185,27 @@ $code.=<<___;
	$PTR_SUB $sp,$num
	and	$sp,$at

	$MULTU	$aj,$bi
	$LD	$alo,$BNSZ($ap)
	$LD	$nlo,$BNSZ($np)
	mflo	$lo0
	mfhi	$hi0
	$MULTU	$lo0,$n0
	mflo	$m1

	$MULTU	$alo,$bi
	mflo	$alo
	mfhi	$ahi

	$MULTU	$nj,$m1
	mflo	$lo1
	mfhi	$hi1
	$MULTU	$nlo,$m1
	$MULTU	($aj,$bi)
	$LD	$ahi,$BNSZ($ap)
	$LD	$nhi,$BNSZ($np)
	mflo	($lo0,$aj,$bi)
	mfhi	($hi0,$aj,$bi)
	$MULTU	($lo0,$n0)
	mflo	($m1,$lo0,$n0)

	$MULTU	($ahi,$bi)
	mflo	($alo,$ahi,$bi)
	mfhi	($ahi,$ahi,$bi)

	$MULTU	($nj,$m1)
	mflo	($lo1,$nj,$m1)
	mfhi	($hi1,$nj,$m1)
	$MULTU	($nhi,$m1)
	$ADDU	$lo1,$lo0
	sltu	$at,$lo1,$lo0
	$ADDU	$hi1,$at
	mflo	$nlo
	mfhi	$nhi
	mflo	($nlo,$nhi,$m1)
	mfhi	($nhi,$nhi,$m1)

	move	$tp,$sp
	li	$j,2*$BNSZ
@@ -215,25 +217,25 @@ $code.=<<___;
	$LD	$aj,($aj)
	$LD	$nj,($nj)

	$MULTU	$aj,$bi
	$MULTU	($aj,$bi)
	$ADDU	$lo0,$alo,$hi0
	$ADDU	$lo1,$nlo,$hi1
	sltu	$at,$lo0,$hi0
	sltu	$t0,$lo1,$hi1
	$ADDU	$hi0,$ahi,$at
	$ADDU	$hi1,$nhi,$t0
	mflo	$alo
	mfhi	$ahi
	mflo	($alo,$aj,$bi)
	mfhi	($ahi,$aj,$bi)

	$ADDU	$lo1,$lo0
	sltu	$at,$lo1,$lo0
	$MULTU	$nj,$m1
	$MULTU	($nj,$m1)
	$ADDU	$hi1,$at
	addu	$j,$BNSZ
	$ST	$lo1,($tp)
	sltu	$t0,$j,$num
	mflo	$nlo
	mfhi	$nhi
	mflo	($nlo,$nj,$m1)
	mfhi	($nhi,$nj,$m1)

	bnez	$t0,.L1st
	$PTR_ADD $tp,$BNSZ
@@ -263,34 +265,34 @@ $code.=<<___;
	$PTR_ADD $bi,$bp,$i
	$LD	$bi,($bi)
	$LD	$aj,($ap)
	$LD	$alo,$BNSZ($ap)
	$LD	$ahi,$BNSZ($ap)
	$LD	$tj,($sp)

	$MULTU	$aj,$bi
	$MULTU	($aj,$bi)
	$LD	$nj,($np)
	$LD	$nlo,$BNSZ($np)
	mflo	$lo0
	mfhi	$hi0
	$LD	$nhi,$BNSZ($np)
	mflo	($lo0,$aj,$bi)
	mfhi	($hi0,$aj,$bi)
	$ADDU	$lo0,$tj
	$MULTU	$lo0,$n0
	$MULTU	($lo0,$n0)
	sltu	$at,$lo0,$tj
	$ADDU	$hi0,$at
	mflo	$m1
	mflo	($m1,$lo0,$n0)

	$MULTU	$alo,$bi
	mflo	$alo
	mfhi	$ahi
	$MULTU	($ahi,$bi)
	mflo	($alo,$ahi,$bi)
	mfhi	($ahi,$ahi,$bi)

	$MULTU	$nj,$m1
	mflo	$lo1
	mfhi	$hi1
	$MULTU	($nj,$m1)
	mflo	($lo1,$nj,$m1)
	mfhi	($hi1,$nj,$m1)

	$MULTU	$nlo,$m1
	$MULTU	($nhi,$m1)
	$ADDU	$lo1,$lo0
	sltu	$at,$lo1,$lo0
	$ADDU	$hi1,$at
	mflo	$nlo
	mfhi	$nhi
	mflo	($nlo,$nhi,$m1)
	mfhi	($nhi,$nhi,$m1)

	move	$tp,$sp
	li	$j,2*$BNSZ
@@ -303,19 +305,19 @@ $code.=<<___;
	$LD	$aj,($aj)
	$LD	$nj,($nj)

	$MULTU	$aj,$bi
	$MULTU	($aj,$bi)
	$ADDU	$lo0,$alo,$hi0
	$ADDU	$lo1,$nlo,$hi1
	sltu	$at,$lo0,$hi0
	sltu	$t0,$lo1,$hi1
	$ADDU	$hi0,$ahi,$at
	$ADDU	$hi1,$nhi,$t0
	mflo	$alo
	mfhi	$ahi
	mflo	($alo,$aj,$bi)
	mfhi	($ahi,$aj,$bi)

	$ADDU	$lo0,$tj
	addu	$j,$BNSZ
	$MULTU	$nj,$m1
	$MULTU	($nj,$m1)
	sltu	$at,$lo0,$tj
	$ADDU	$lo1,$lo0
	$ADDU	$hi0,$at
@@ -323,8 +325,8 @@ $code.=<<___;
	$LD	$tj,2*$BNSZ($tp)
	$ADDU	$hi1,$t0
	sltu	$at,$j,$num
	mflo	$nlo
	mfhi	$nhi
	mflo	($nlo,$nj,$m1)
	mfhi	($nhi,$nj,$m1)
	$ST	$lo1,($tp)
	bnez	$at,.Linner
	$PTR_ADD $tp,$BNSZ
+379 −363

File changed.

Preview size limit exceeded, changes collapsed.

Loading