Commit 23b93b58 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

aes-ppc.pl, sha512-ppc.pl: comply even with Embedded ABI specification

(most restrictive about r2 and r13 usage).
parent a50bce82
Loading
Loading
Loading
Loading
+8 −13
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@ $key="r5";
$Tbl0="r3";
$Tbl1="r6";
$Tbl2="r7";
$Tbl3="r2";
$Tbl3=$out;	# stay away from "r2"; $out is offloaded to stack

$s0="r8";
$s1="r9";
@@ -76,7 +76,7 @@ $s2="r10";
$s3="r11";

$t0="r12";
$t1="r13";
$t1="r0";	# stay away from "r13";
$t2="r14";
$t3="r15";

@@ -100,9 +100,6 @@ $acc13="r29";
$acc14="r30";
$acc15="r31";

# stay away from TLS pointer
if ($SIZE_T==8)	{ die if ($t1 ne "r13");  $t1="r0";		}
else		{ die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";	}
$mask80=$Tbl2;
$mask1b=$Tbl3;

@@ -337,8 +334,7 @@ $code.=<<___;
	$STU	$sp,-$FRAME($sp)
	mflr	r0

	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
@@ -371,6 +367,7 @@ Lenc_unaligned_ok:
	lwz	$s3,12($inp)
	bl	LAES_Te
	bl	Lppc_AES_encrypt_compact
	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
	stw	$s0,0($out)
	stw	$s1,4($out)
	stw	$s2,8($out)
@@ -417,6 +414,7 @@ Lenc_xpage:

	bl	LAES_Te
	bl	Lppc_AES_encrypt_compact
	$POP	$out,`$FRAME-$SIZE_T*19`($sp)

	extrwi	$acc00,$s0,8,0
	extrwi	$acc01,$s0,8,8
@@ -449,8 +447,6 @@ Lenc_xpage:

Lenc_done:
	$POP	r0,`$FRAME+$LRSAVE`($sp)
	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
@@ -771,8 +767,7 @@ Lenc_compact_done:
	$STU	$sp,-$FRAME($sp)
	mflr	r0

	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
@@ -805,6 +800,7 @@ Ldec_unaligned_ok:
	lwz	$s3,12($inp)
	bl	LAES_Td
	bl	Lppc_AES_decrypt_compact
	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
	stw	$s0,0($out)
	stw	$s1,4($out)
	stw	$s2,8($out)
@@ -851,6 +847,7 @@ Ldec_xpage:

	bl	LAES_Td
	bl	Lppc_AES_decrypt_compact
	$POP	$out,`$FRAME-$SIZE_T*19`($sp)

	extrwi	$acc00,$s0,8,0
	extrwi	$acc01,$s0,8,8
@@ -883,8 +880,6 @@ Ldec_xpage:

Ldec_done:
	$POP	r0,`$FRAME+$LRSAVE`($sp)
	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
+18 −16
Original line number Diff line number Diff line
@@ -110,7 +110,7 @@ $B ="r9";
$C  ="r10";
$D  ="r11";
$E  ="r12";
$F  ="r13";	$F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
$F  =$t1;	$t1 = "r0";	# stay away from "r13";
$G  ="r14";
$H  ="r15";

@@ -123,19 +123,18 @@ $inp="r31"; # reassigned $inp! aliases with @X[15]
sub ROUND_00_15 {
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
$code.=<<___;
	$LD	$T,`$i*$SZ`($Tbl)
	$ROR	$a0,$e,$Sigma1[0]
	$ROR	$a1,$e,$Sigma1[1]
	and	$t0,$f,$e
	andc	$t1,$g,$e
	add	$T,$T,$h
	xor	$a0,$a0,$a1
	add	$h,$h,$t1
	andc	$t1,$g,$e
	$ROR	$a1,$a1,`$Sigma1[2]-$Sigma1[1]`
	or	$t0,$t0,$t1		; Ch(e,f,g)
	add	$T,$T,@X[$i]
	add	$h,$h,@X[$i%16]
	xor	$a0,$a0,$a1		; Sigma1(e)
	add	$T,$T,$t0
	add	$T,$T,$a0
	add	$h,$h,$t0
	add	$h,$h,$a0

	$ROR	$a0,$a,$Sigma0[0]
	$ROR	$a1,$a,$Sigma0[1]
@@ -146,9 +145,14 @@ $code.=<<___;
	xor	$t0,$t0,$t1
	and	$t1,$b,$c
	xor	$a0,$a0,$a1		; Sigma0(a)
	add	$d,$d,$T
	add	$d,$d,$h
	xor	$t0,$t0,$t1		; Maj(a,b,c)
	add	$h,$T,$a0
___
$code.=<<___ if ($i<15);
	$LD	$t1,`($i+1)*$SZ`($Tbl)
___
$code.=<<___;
	add	$h,$h,$a0
	add	$h,$h,$t0

___
@@ -169,10 +173,11 @@ $code.=<<___;
	add	@X[$i],@X[$i],@X[($i+9)%16]
	xor	$a0,$a0,$a1		; sigma0(X[(i+1)&0x0f])
	xor	$t0,$t0,$t1		; sigma1(X[(i+14)&0x0f])
	$LD	$t1,`$i*$SZ`($Tbl)
	add	@X[$i],@X[$i],$a0
	add	@X[$i],@X[$i],$t0
___
&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h);
&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
}

$code=<<___;
@@ -188,8 +193,6 @@ $func:

	$PUSH	$ctx,`$FRAME-$SIZE_T*22`($sp)

	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
@@ -283,8 +286,6 @@ Lmemcpy:

Ldone:
	$POP	r0,`$FRAME+$LRSAVE`($sp)
	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
@@ -312,6 +313,7 @@ Ldone:

.align	4
Lsha2_block_private:
	$LD	$t1,0($Tbl)
___
for($i=0;$i<16;$i++) {
$code.=<<___ if ($SZ==4);
@@ -328,8 +330,8 @@ ___
	unshift(@V,pop(@V));
}
$code.=<<___;
	li	$T,`$rounds/16-1`
	mtctr	$T
	li	$t0,`$rounds/16-1`
	mtctr	$t0
.align	4
Lrounds:
	addi	$Tbl,$Tbl,`16*$SZ`