Commit cf96d71c authored by Andy Polyakov's avatar Andy Polyakov
Browse files

PPC assembler pack update from HEAD.

parent 1a111921
Loading
Loading
Loading
Loading
+310 −134
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================

# Needs more work: key setup, page boundaries, CBC routine...
# Needs more work: key setup, CBC routine...
#
# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
# 128-bit key, which is ~40% better than 64-bit code generated by gcc
@@ -18,7 +18,7 @@

# February 2010
#
# Rescheduling instructions to favour Power6 pipeline gives 10%
# Rescheduling instructions to favour Power6 pipeline gave 10%
# performance improvement on the platfrom in question (and marginal
# improvement even on others). It should be noted that Power6 fails
# to process byte in 18 cycles, only in 23, because it fails to issue
@@ -33,11 +33,13 @@ $flavour = shift;

if ($flavour =~ /64/) {
	$SIZE_T	=8;
	$LRSAVE	=2*$SIZE_T;
	$STU	="stdu";
	$POP	="ld";
	$PUSH	="std";
} elsif ($flavour =~ /32/) {
	$SIZE_T	=4;
	$LRSAVE	=$SIZE_T;
	$STU	="stwu";
	$POP	="lwz";
	$PUSH	="stw";
@@ -116,15 +118,19 @@ LAES_Te:
	addi	$Tbl0,$Tbl0,`128-8`
	mtlr	r0
	blr
	.space	`32-24`
	.long	0
	.byte	0,12,0x14,0,0,0,0,0
	.space	`64-9*4`
LAES_Td:
	mflr	r0
	bcl	20,31,\$+4
	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
	addi	$Tbl0,$Tbl0,`128-8-32+2048+256`
	addi	$Tbl0,$Tbl0,`128-64-8+2048+256`
	mtlr	r0
	blr
	.space	`128-32-24`
	.long	0
	.byte	0,12,0x14,0,0,0,0,0
	.space	`128-64-9*4`
___
&_data_word(
	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
@@ -328,10 +334,9 @@ $code.=<<___;
.globl	.AES_encrypt
.align	7
.AES_encrypt:
	mflr	r0
	$STU	$sp,-$FRAME($sp)
	mflr	r0

	$PUSH	r0,`$FRAME-$SIZE_T*21`($sp)
	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
@@ -352,7 +357,14 @@ $code.=<<___;
	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
	$PUSH	r0,`$FRAME+$LRSAVE`($sp)

	andi.	$t0,$inp,3
	andi.	$t1,$out,3
	or.	$t0,$t0,$t1
	bne	Lenc_unaligned

Lenc_unaligned_ok:
	lwz	$s0,0($inp)
	lwz	$s1,4($inp)
	lwz	$s2,8($inp)
@@ -363,8 +375,80 @@ $code.=<<___;
	stw	$s1,4($out)
	stw	$s2,8($out)
	stw	$s3,12($out)
	b	Lenc_done

	$POP	r0,`$FRAME-$SIZE_T*21`($sp)
Lenc_unaligned:
	subfic	$t0,$inp,4096
	subfic	$t1,$out,4096
	andi.	$t0,$t0,4096-16
	beq	Lenc_xpage
	andi.	$t1,$t1,4096-16
	bne	Lenc_unaligned_ok

Lenc_xpage:
	lbz	$acc00,0($inp)
	lbz	$acc01,1($inp)
	lbz	$acc02,2($inp)
	lbz	$s0,3($inp)
	lbz	$acc04,4($inp)
	lbz	$acc05,5($inp)
	lbz	$acc06,6($inp)
	lbz	$s1,7($inp)
	lbz	$acc08,8($inp)
	lbz	$acc09,9($inp)
	lbz	$acc10,10($inp)
	insrwi	$s0,$acc00,8,0
	lbz	$s2,11($inp)
	insrwi	$s1,$acc04,8,0
	lbz	$acc12,12($inp)
	insrwi	$s0,$acc01,8,8
	lbz	$acc13,13($inp)
	insrwi	$s1,$acc05,8,8
	lbz	$acc14,14($inp)
	insrwi	$s0,$acc02,8,16
	lbz	$s3,15($inp)
	insrwi	$s1,$acc06,8,16
	insrwi	$s2,$acc08,8,0
	insrwi	$s3,$acc12,8,0
	insrwi	$s2,$acc09,8,8
	insrwi	$s3,$acc13,8,8
	insrwi	$s2,$acc10,8,16
	insrwi	$s3,$acc14,8,16

	bl	LAES_Te
	bl	Lppc_AES_encrypt_compact

	extrwi	$acc00,$s0,8,0
	extrwi	$acc01,$s0,8,8
	stb	$acc00,0($out)
	extrwi	$acc02,$s0,8,16
	stb	$acc01,1($out)
	stb	$acc02,2($out)
	extrwi	$acc04,$s1,8,0
	stb	$s0,3($out)
	extrwi	$acc05,$s1,8,8
	stb	$acc04,4($out)
	extrwi	$acc06,$s1,8,16
	stb	$acc05,5($out)
	stb	$acc06,6($out)
	extrwi	$acc08,$s2,8,0
	stb	$s1,7($out)
	extrwi	$acc09,$s2,8,8
	stb	$acc08,8($out)
	extrwi	$acc10,$s2,8,16
	stb	$acc09,9($out)
	stb	$acc10,10($out)
	extrwi	$acc12,$s3,8,0
	stb	$s2,11($out)
	extrwi	$acc13,$s3,8,8
	stb	$acc12,12($out)
	extrwi	$acc14,$s3,8,16
	stb	$acc13,13($out)
	stb	$acc14,14($out)
	stb	$s3,15($out)

Lenc_done:
	$POP	r0,`$FRAME+$LRSAVE`($sp)
	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
@@ -388,18 +472,21 @@ $code.=<<___;
	mtlr	r0
	addi	$sp,$sp,$FRAME
	blr
	.long	0
	.byte	0,12,4,1,0x80,18,3,0
	.long	0

.align	5
Lppc_AES_encrypt:
	lwz	$acc00,240($key)
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	addi	$Tbl1,$Tbl0,3
	lwz	$t0,0($key)
	addi	$Tbl2,$Tbl0,2
	lwz	$t1,4($key)
	addi	$Tbl3,$Tbl0,1
	lwz	$t2,8($key)
	addi	$acc00,$acc00,-1
	lwz	$t3,12($key)
	addi	$key,$key,16
	xor	$s0,$s0,$t0
	xor	$s1,$s1,$t1
@@ -413,44 +500,44 @@ Lenc_loop:
	rlwinm	$acc02,$s2,`32-24+3`,21,28
	rlwinm	$acc03,$s3,`32-24+3`,21,28
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	rlwinm	$acc04,$s1,`32-16+3`,21,28
	lwz	$t1,4($key)
	rlwinm	$acc05,$s2,`32-16+3`,21,28
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	rlwinm	$acc06,$s3,`32-16+3`,21,28
	lwz	$t3,12($key)
	rlwinm	$acc07,$s0,`32-16+3`,21,28
	lwzx	$acc00,$Tbl0,$acc00
	lwzx	$acc01,$Tbl0,$acc01
	rlwinm	$acc08,$s2,`32-8+3`,21,28
	lwzx	$acc01,$Tbl0,$acc01
	rlwinm	$acc09,$s3,`32-8+3`,21,28
	lwzx	$acc02,$Tbl0,$acc02
	lwzx	$acc03,$Tbl0,$acc03
	rlwinm	$acc10,$s0,`32-8+3`,21,28
	lwzx	$acc03,$Tbl0,$acc03
	rlwinm	$acc11,$s1,`32-8+3`,21,28
	lwzx	$acc04,$Tbl1,$acc04
	lwzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc12,$s3,`0+3`,21,28
	lwzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc13,$s0,`0+3`,21,28
	lwzx	$acc06,$Tbl1,$acc06
	lwzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc14,$s1,`0+3`,21,28
	lwzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc15,$s2,`0+3`,21,28
	lwzx	$acc08,$Tbl2,$acc08
	lwzx	$acc09,$Tbl2,$acc09
	xor	$t0,$t0,$acc00
	lwzx	$acc09,$Tbl2,$acc09
	xor	$t1,$t1,$acc01
	lwzx	$acc10,$Tbl2,$acc10
	lwzx	$acc11,$Tbl2,$acc11
	xor	$t2,$t2,$acc02
	lwzx	$acc11,$Tbl2,$acc11
	xor	$t3,$t3,$acc03
	lwzx	$acc12,$Tbl3,$acc12
	lwzx	$acc13,$Tbl3,$acc13
	xor	$t0,$t0,$acc04
	lwzx	$acc13,$Tbl3,$acc13
	xor	$t1,$t1,$acc05
	lwzx	$acc14,$Tbl3,$acc14
	lwzx	$acc15,$Tbl3,$acc15
	xor	$t2,$t2,$acc06
	lwzx	$acc15,$Tbl3,$acc15
	xor	$t3,$t3,$acc07
	xor	$t0,$t0,$acc08
	xor	$t1,$t1,$acc09
@@ -466,60 +553,60 @@ Lenc_loop:
	addi	$Tbl2,$Tbl0,2048
	nop
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	rlwinm	$acc00,$s0,`32-24`,24,31
	lwz	$t1,4($key)
	rlwinm	$acc01,$s1,`32-24`,24,31
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	rlwinm	$acc02,$s2,`32-24`,24,31
	lwz	$t3,12($key)
	rlwinm	$acc03,$s3,`32-24`,24,31
	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
	lwz	$acc09,`2048+32`($Tbl0)
	rlwinm	$acc04,$s1,`32-16`,24,31
	lwz	$acc09,`2048+32`($Tbl0)
	rlwinm	$acc05,$s2,`32-16`,24,31
	lwz	$acc10,`2048+64`($Tbl0)
	lwz	$acc11,`2048+96`($Tbl0)
	rlwinm	$acc06,$s3,`32-16`,24,31
	lwz	$acc11,`2048+96`($Tbl0)
	rlwinm	$acc07,$s0,`32-16`,24,31
	lwz	$acc12,`2048+128`($Tbl0)
	lwz	$acc13,`2048+160`($Tbl0)
	rlwinm	$acc08,$s2,`32-8`,24,31
	lwz	$acc13,`2048+160`($Tbl0)
	rlwinm	$acc09,$s3,`32-8`,24,31
	lwz	$acc14,`2048+192`($Tbl0)
	lwz	$acc15,`2048+224`($Tbl0)
	rlwinm	$acc10,$s0,`32-8`,24,31
	lwz	$acc15,`2048+224`($Tbl0)
	rlwinm	$acc11,$s1,`32-8`,24,31
	lbzx	$acc00,$Tbl2,$acc00
	lbzx	$acc01,$Tbl2,$acc01
	rlwinm	$acc12,$s3,`0`,24,31
	lbzx	$acc01,$Tbl2,$acc01
	rlwinm	$acc13,$s0,`0`,24,31
	lbzx	$acc02,$Tbl2,$acc02
	lbzx	$acc03,$Tbl2,$acc03
	rlwinm	$acc14,$s1,`0`,24,31
	lbzx	$acc03,$Tbl2,$acc03
	rlwinm	$acc15,$s2,`0`,24,31
	lbzx	$acc04,$Tbl2,$acc04
	lbzx	$acc05,$Tbl2,$acc05
	rlwinm	$s0,$acc00,24,0,7
	lbzx	$acc05,$Tbl2,$acc05
	rlwinm	$s1,$acc01,24,0,7
	lbzx	$acc06,$Tbl2,$acc06
	lbzx	$acc07,$Tbl2,$acc07
	rlwinm	$s2,$acc02,24,0,7
	lbzx	$acc07,$Tbl2,$acc07
	rlwinm	$s3,$acc03,24,0,7
	lbzx	$acc08,$Tbl2,$acc08
	lbzx	$acc09,$Tbl2,$acc09
	rlwimi	$s0,$acc04,16,8,15
	lbzx	$acc09,$Tbl2,$acc09
	rlwimi	$s1,$acc05,16,8,15
	lbzx	$acc10,$Tbl2,$acc10
	lbzx	$acc11,$Tbl2,$acc11
	rlwimi	$s2,$acc06,16,8,15
	lbzx	$acc11,$Tbl2,$acc11
	rlwimi	$s3,$acc07,16,8,15
	lbzx	$acc12,$Tbl2,$acc12
	lbzx	$acc13,$Tbl2,$acc13
	rlwimi	$s0,$acc08,8,16,23
	lbzx	$acc13,$Tbl2,$acc13
	rlwimi	$s1,$acc09,8,16,23
	lbzx	$acc14,$Tbl2,$acc14
	lbzx	$acc15,$Tbl2,$acc15
	rlwimi	$s2,$acc10,8,16,23
	lbzx	$acc15,$Tbl2,$acc15
	rlwimi	$s3,$acc11,8,16,23
	or	$s0,$s0,$acc12
	or	$s1,$s1,$acc13
@@ -530,29 +617,31 @@ Lenc_loop:
	xor	$s2,$s2,$t2
	xor	$s3,$s3,$t3
	blr
	.long	0
	.byte	0,12,0x14,0,0,0,0,0

.align	4
Lppc_AES_encrypt_compact:
	lwz	$acc00,240($key)
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	addi	$Tbl1,$Tbl0,2048
	lwz	$t0,0($key)
	lis	$mask80,0x8080
	lwz	$t1,4($key)
	lis	$mask1b,0x1b1b
	addi	$key,$key,16
	lwz	$t2,8($key)
	ori	$mask80,$mask80,0x8080
	lwz	$t3,12($key)
	ori	$mask1b,$mask1b,0x1b1b
	addi	$key,$key,16
	mtctr	$acc00
.align	4
Lenc_compact_loop:
	xor	$s0,$s0,$t0
	xor	$s1,$s1,$t1
	xor	$s2,$s2,$t2
	xor	$s3,$s3,$t3
	rlwinm	$acc00,$s0,`32-24`,24,31
	xor	$s2,$s2,$t2
	rlwinm	$acc01,$s1,`32-24`,24,31
	xor	$s3,$s3,$t3
	rlwinm	$acc02,$s2,`32-24`,24,31
	rlwinm	$acc03,$s3,`32-24`,24,31
	rlwinm	$acc04,$s1,`32-16`,24,31
@@ -560,48 +649,48 @@ Lenc_compact_loop:
	rlwinm	$acc06,$s3,`32-16`,24,31
	rlwinm	$acc07,$s0,`32-16`,24,31
	lbzx	$acc00,$Tbl1,$acc00
	lbzx	$acc01,$Tbl1,$acc01
	rlwinm	$acc08,$s2,`32-8`,24,31
	lbzx	$acc01,$Tbl1,$acc01
	rlwinm	$acc09,$s3,`32-8`,24,31
	lbzx	$acc02,$Tbl1,$acc02
	lbzx	$acc03,$Tbl1,$acc03
	rlwinm	$acc10,$s0,`32-8`,24,31
	lbzx	$acc03,$Tbl1,$acc03
	rlwinm	$acc11,$s1,`32-8`,24,31
	lbzx	$acc04,$Tbl1,$acc04
	lbzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc12,$s3,`0`,24,31
	lbzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc13,$s0,`0`,24,31
	lbzx	$acc06,$Tbl1,$acc06
	lbzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc14,$s1,`0`,24,31
	lbzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc15,$s2,`0`,24,31
	lbzx	$acc08,$Tbl1,$acc08
	lbzx	$acc09,$Tbl1,$acc09
	rlwinm	$s0,$acc00,24,0,7
	lbzx	$acc09,$Tbl1,$acc09
	rlwinm	$s1,$acc01,24,0,7
	lbzx	$acc10,$Tbl1,$acc10
	lbzx	$acc11,$Tbl1,$acc11
	rlwinm	$s2,$acc02,24,0,7
	lbzx	$acc11,$Tbl1,$acc11
	rlwinm	$s3,$acc03,24,0,7
	lbzx	$acc12,$Tbl1,$acc12
	lbzx	$acc13,$Tbl1,$acc13
	rlwimi	$s0,$acc04,16,8,15
	lbzx	$acc13,$Tbl1,$acc13
	rlwimi	$s1,$acc05,16,8,15
	lbzx	$acc14,$Tbl1,$acc14
	lbzx	$acc15,$Tbl1,$acc15
	rlwimi	$s2,$acc06,16,8,15
	lbzx	$acc15,$Tbl1,$acc15
	rlwimi	$s3,$acc07,16,8,15
	rlwimi	$s0,$acc08,8,16,23
	rlwimi	$s1,$acc09,8,16,23
	rlwimi	$s2,$acc10,8,16,23
	rlwimi	$s3,$acc11,8,16,23
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	or	$s0,$s0,$acc12
	lwz	$t1,4($key)
	or	$s1,$s1,$acc13
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	or	$s2,$s2,$acc14
	lwz	$t3,12($key)
	or	$s3,$s3,$acc15

	addi	$key,$key,16
@@ -612,12 +701,12 @@ Lenc_compact_loop:
	and	$acc02,$s2,$mask80
	and	$acc03,$s3,$mask80
	srwi	$acc04,$acc00,7		# r1>>7
	srwi	$acc05,$acc01,7
	srwi	$acc06,$acc02,7
	srwi	$acc07,$acc03,7
	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
	srwi	$acc05,$acc01,7
	andc	$acc09,$s1,$mask80
	srwi	$acc06,$acc02,7
	andc	$acc10,$s2,$mask80
	srwi	$acc07,$acc03,7
	andc	$acc11,$s3,$mask80
	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
	sub	$acc01,$acc01,$acc05
@@ -633,32 +722,32 @@ Lenc_compact_loop:
	and	$acc03,$acc03,$mask1b
	xor	$acc00,$acc00,$acc08	# r2
	xor	$acc01,$acc01,$acc09
	xor	$acc02,$acc02,$acc10
	xor	$acc03,$acc03,$acc11

	 rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
	xor	$acc02,$acc02,$acc10
	 rotlwi	$acc13,$s1,16
	xor	$acc03,$acc03,$acc11
	 rotlwi	$acc14,$s2,16
	rotlwi	$acc15,$s3,16

	xor	$s0,$s0,$acc00		# r0^r2
	rotlwi	$acc15,$s3,16
	xor	$s1,$s1,$acc01
	xor	$s2,$s2,$acc02
	xor	$s3,$s3,$acc03
	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
	xor	$s2,$s2,$acc02
	rotrwi	$s1,$s1,24
	xor	$s3,$s3,$acc03
	rotrwi	$s2,$s2,24
	rotrwi	$s3,$s3,24
	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
	rotrwi	$s3,$s3,24
	xor	$s1,$s1,$acc01
	xor	$s2,$s2,$acc02
	xor	$s3,$s3,$acc03
	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
	rotlwi	$acc09,$acc13,8
	rotlwi	$acc10,$acc14,8
	rotlwi	$acc11,$acc15,8
	xor	$s0,$s0,$acc12		#
	rotlwi	$acc09,$acc13,8
	xor	$s1,$s1,$acc13
	rotlwi	$acc10,$acc14,8
	xor	$s2,$s2,$acc14
	rotlwi	$acc11,$acc15,8
	xor	$s3,$s3,$acc15
	xor	$s0,$s0,$acc08		#
	xor	$s1,$s1,$acc09
@@ -673,14 +762,15 @@ Lenc_compact_done:
	xor	$s2,$s2,$t2
	xor	$s3,$s3,$t3
	blr
	.long	0
	.byte	0,12,0x14,0,0,0,0,0

.globl	.AES_decrypt
.align	7
.AES_decrypt:
	mflr	r0
	$STU	$sp,-$FRAME($sp)
	mflr	r0

	$PUSH	r0,`$FRAME-$SIZE_T*21`($sp)
	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
@@ -701,7 +791,14 @@ Lenc_compact_done:
	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
	$PUSH	r0,`$FRAME+$LRSAVE`($sp)

	andi.	$t0,$inp,3
	andi.	$t1,$out,3
	or.	$t0,$t0,$t1
	bne	Ldec_unaligned

Ldec_unaligned_ok:
	lwz	$s0,0($inp)
	lwz	$s1,4($inp)
	lwz	$s2,8($inp)
@@ -712,8 +809,80 @@ Lenc_compact_done:
	stw	$s1,4($out)
	stw	$s2,8($out)
	stw	$s3,12($out)
	b	Ldec_done

Ldec_unaligned:
	subfic	$t0,$inp,4096
	subfic	$t1,$out,4096
	andi.	$t0,$t0,4096-16
	beq	Ldec_xpage
	andi.	$t1,$t1,4096-16
	bne	Ldec_unaligned_ok

Ldec_xpage:
	lbz	$acc00,0($inp)
	lbz	$acc01,1($inp)
	lbz	$acc02,2($inp)
	lbz	$s0,3($inp)
	lbz	$acc04,4($inp)
	lbz	$acc05,5($inp)
	lbz	$acc06,6($inp)
	lbz	$s1,7($inp)
	lbz	$acc08,8($inp)
	lbz	$acc09,9($inp)
	lbz	$acc10,10($inp)
	insrwi	$s0,$acc00,8,0
	lbz	$s2,11($inp)
	insrwi	$s1,$acc04,8,0
	lbz	$acc12,12($inp)
	insrwi	$s0,$acc01,8,8
	lbz	$acc13,13($inp)
	insrwi	$s1,$acc05,8,8
	lbz	$acc14,14($inp)
	insrwi	$s0,$acc02,8,16
	lbz	$s3,15($inp)
	insrwi	$s1,$acc06,8,16
	insrwi	$s2,$acc08,8,0
	insrwi	$s3,$acc12,8,0
	insrwi	$s2,$acc09,8,8
	insrwi	$s3,$acc13,8,8
	insrwi	$s2,$acc10,8,16
	insrwi	$s3,$acc14,8,16

	$POP	r0,`$FRAME-$SIZE_T*21`($sp)
	bl	LAES_Td
	bl	Lppc_AES_decrypt_compact

	extrwi	$acc00,$s0,8,0
	extrwi	$acc01,$s0,8,8
	stb	$acc00,0($out)
	extrwi	$acc02,$s0,8,16
	stb	$acc01,1($out)
	stb	$acc02,2($out)
	extrwi	$acc04,$s1,8,0
	stb	$s0,3($out)
	extrwi	$acc05,$s1,8,8
	stb	$acc04,4($out)
	extrwi	$acc06,$s1,8,16
	stb	$acc05,5($out)
	stb	$acc06,6($out)
	extrwi	$acc08,$s2,8,0
	stb	$s1,7($out)
	extrwi	$acc09,$s2,8,8
	stb	$acc08,8($out)
	extrwi	$acc10,$s2,8,16
	stb	$acc09,9($out)
	stb	$acc10,10($out)
	extrwi	$acc12,$s3,8,0
	stb	$s2,11($out)
	extrwi	$acc13,$s3,8,8
	stb	$acc12,12($out)
	extrwi	$acc14,$s3,8,16
	stb	$acc13,13($out)
	stb	$acc14,14($out)
	stb	$s3,15($out)

Ldec_done:
	$POP	r0,`$FRAME+$LRSAVE`($sp)
	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
@@ -737,18 +906,21 @@ Lenc_compact_done:
	mtlr	r0
	addi	$sp,$sp,$FRAME
	blr
	.long	0
	.byte	0,12,4,1,0x80,18,3,0
	.long	0

.align	5
Lppc_AES_decrypt:
	lwz	$acc00,240($key)
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	addi	$Tbl1,$Tbl0,3
	lwz	$t0,0($key)
	addi	$Tbl2,$Tbl0,2
	lwz	$t1,4($key)
	addi	$Tbl3,$Tbl0,1
	lwz	$t2,8($key)
	addi	$acc00,$acc00,-1
	lwz	$t3,12($key)
	addi	$key,$key,16
	xor	$s0,$s0,$t0
	xor	$s1,$s1,$t1
@@ -762,44 +934,44 @@ Ldec_loop:
	rlwinm	$acc02,$s2,`32-24+3`,21,28
	rlwinm	$acc03,$s3,`32-24+3`,21,28
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	rlwinm	$acc04,$s3,`32-16+3`,21,28
	lwz	$t1,4($key)
	rlwinm	$acc05,$s0,`32-16+3`,21,28
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	rlwinm	$acc06,$s1,`32-16+3`,21,28
	lwz	$t3,12($key)
	rlwinm	$acc07,$s2,`32-16+3`,21,28
	lwzx	$acc00,$Tbl0,$acc00
	lwzx	$acc01,$Tbl0,$acc01
	rlwinm	$acc08,$s2,`32-8+3`,21,28
	lwzx	$acc01,$Tbl0,$acc01
	rlwinm	$acc09,$s3,`32-8+3`,21,28
	lwzx	$acc02,$Tbl0,$acc02
	lwzx	$acc03,$Tbl0,$acc03
	rlwinm	$acc10,$s0,`32-8+3`,21,28
	lwzx	$acc03,$Tbl0,$acc03
	rlwinm	$acc11,$s1,`32-8+3`,21,28
	lwzx	$acc04,$Tbl1,$acc04
	lwzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc12,$s1,`0+3`,21,28
	lwzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc13,$s2,`0+3`,21,28
	lwzx	$acc06,$Tbl1,$acc06
	lwzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc14,$s3,`0+3`,21,28
	lwzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc15,$s0,`0+3`,21,28
	lwzx	$acc08,$Tbl2,$acc08
	lwzx	$acc09,$Tbl2,$acc09
	xor	$t0,$t0,$acc00
	lwzx	$acc09,$Tbl2,$acc09
	xor	$t1,$t1,$acc01
	lwzx	$acc10,$Tbl2,$acc10
	lwzx	$acc11,$Tbl2,$acc11
	xor	$t2,$t2,$acc02
	lwzx	$acc11,$Tbl2,$acc11
	xor	$t3,$t3,$acc03
	lwzx	$acc12,$Tbl3,$acc12
	lwzx	$acc13,$Tbl3,$acc13
	xor	$t0,$t0,$acc04
	lwzx	$acc13,$Tbl3,$acc13
	xor	$t1,$t1,$acc05
	lwzx	$acc14,$Tbl3,$acc14
	lwzx	$acc15,$Tbl3,$acc15
	xor	$t2,$t2,$acc06
	lwzx	$acc15,$Tbl3,$acc15
	xor	$t3,$t3,$acc07
	xor	$t0,$t0,$acc08
	xor	$t1,$t1,$acc09
@@ -815,56 +987,56 @@ Ldec_loop:
	addi	$Tbl2,$Tbl0,2048
	nop
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	rlwinm	$acc00,$s0,`32-24`,24,31
	lwz	$t1,4($key)
	rlwinm	$acc01,$s1,`32-24`,24,31
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	rlwinm	$acc02,$s2,`32-24`,24,31
	lwz	$t3,12($key)
	rlwinm	$acc03,$s3,`32-24`,24,31
	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
	lwz	$acc09,`2048+32`($Tbl0)
	rlwinm	$acc04,$s3,`32-16`,24,31
	lwz	$acc09,`2048+32`($Tbl0)
	rlwinm	$acc05,$s0,`32-16`,24,31
	lwz	$acc10,`2048+64`($Tbl0)
	lwz	$acc11,`2048+96`($Tbl0)
	lbzx	$acc00,$Tbl2,$acc00
	lwz	$acc11,`2048+96`($Tbl0)
	lbzx	$acc01,$Tbl2,$acc01
	lwz	$acc12,`2048+128`($Tbl0)
	lwz	$acc13,`2048+160`($Tbl0)
	rlwinm	$acc06,$s1,`32-16`,24,31
	lwz	$acc13,`2048+160`($Tbl0)
	rlwinm	$acc07,$s2,`32-16`,24,31
	lwz	$acc14,`2048+192`($Tbl0)
	lwz	$acc15,`2048+224`($Tbl0)
	rlwinm	$acc08,$s2,`32-8`,24,31
	lwz	$acc15,`2048+224`($Tbl0)
	rlwinm	$acc09,$s3,`32-8`,24,31
	lbzx	$acc02,$Tbl2,$acc02
	lbzx	$acc03,$Tbl2,$acc03
	rlwinm	$acc10,$s0,`32-8`,24,31
	lbzx	$acc03,$Tbl2,$acc03
	rlwinm	$acc11,$s1,`32-8`,24,31
	lbzx	$acc04,$Tbl2,$acc04
	lbzx	$acc05,$Tbl2,$acc05
	rlwinm	$acc12,$s1,`0`,24,31
	lbzx	$acc05,$Tbl2,$acc05
	rlwinm	$acc13,$s2,`0`,24,31
	lbzx	$acc06,$Tbl2,$acc06
	lbzx	$acc07,$Tbl2,$acc07
	rlwinm	$acc14,$s3,`0`,24,31
	lbzx	$acc07,$Tbl2,$acc07
	rlwinm	$acc15,$s0,`0`,24,31
	lbzx	$acc08,$Tbl2,$acc08
	lbzx	$acc09,$Tbl2,$acc09
	rlwinm	$s0,$acc00,24,0,7
	lbzx	$acc09,$Tbl2,$acc09
	rlwinm	$s1,$acc01,24,0,7
	lbzx	$acc10,$Tbl2,$acc10
	lbzx	$acc11,$Tbl2,$acc11
	rlwinm	$s2,$acc02,24,0,7
	lbzx	$acc11,$Tbl2,$acc11
	rlwinm	$s3,$acc03,24,0,7
	lbzx	$acc12,$Tbl2,$acc12
	lbzx	$acc13,$Tbl2,$acc13
	rlwimi	$s0,$acc04,16,8,15
	lbzx	$acc13,$Tbl2,$acc13
	rlwimi	$s1,$acc05,16,8,15
	lbzx	$acc14,$Tbl2,$acc14
	lbzx	$acc15,$Tbl2,$acc15
	rlwimi	$s2,$acc06,16,8,15
	lbzx	$acc15,$Tbl2,$acc15
	rlwimi	$s3,$acc07,16,8,15
	rlwimi	$s0,$acc08,8,16,23
	rlwimi	$s1,$acc09,8,16,23
@@ -879,20 +1051,22 @@ Ldec_loop:
	xor	$s2,$s2,$t2
	xor	$s3,$s3,$t3
	blr
	.long	0
	.byte	0,12,0x14,0,0,0,0,0

.align	4
Lppc_AES_decrypt_compact:
	lwz	$acc00,240($key)
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	addi	$Tbl1,$Tbl0,2048
	lwz	$t0,0($key)
	lis	$mask80,0x8080
	lwz	$t1,4($key)
	lis	$mask1b,0x1b1b
	addi	$key,$key,16
	lwz	$t2,8($key)
	ori	$mask80,$mask80,0x8080
	lwz	$t3,12($key)
	ori	$mask1b,$mask1b,0x1b1b
	addi	$key,$key,16
___
$code.=<<___ if ($SIZE_T==8);
	insrdi	$mask80,$mask80,32,0
@@ -904,10 +1078,10 @@ $code.=<<___;
Ldec_compact_loop:
	xor	$s0,$s0,$t0
	xor	$s1,$s1,$t1
	xor	$s2,$s2,$t2
	xor	$s3,$s3,$t3
	rlwinm	$acc00,$s0,`32-24`,24,31
	xor	$s2,$s2,$t2
	rlwinm	$acc01,$s1,`32-24`,24,31
	xor	$s3,$s3,$t3
	rlwinm	$acc02,$s2,`32-24`,24,31
	rlwinm	$acc03,$s3,`32-24`,24,31
	rlwinm	$acc04,$s3,`32-16`,24,31
@@ -915,48 +1089,48 @@ Ldec_compact_loop:
	rlwinm	$acc06,$s1,`32-16`,24,31
	rlwinm	$acc07,$s2,`32-16`,24,31
	lbzx	$acc00,$Tbl1,$acc00
	lbzx	$acc01,$Tbl1,$acc01
	rlwinm	$acc08,$s2,`32-8`,24,31
	lbzx	$acc01,$Tbl1,$acc01
	rlwinm	$acc09,$s3,`32-8`,24,31
	lbzx	$acc02,$Tbl1,$acc02
	lbzx	$acc03,$Tbl1,$acc03
	rlwinm	$acc10,$s0,`32-8`,24,31
	lbzx	$acc03,$Tbl1,$acc03
	rlwinm	$acc11,$s1,`32-8`,24,31
	lbzx	$acc04,$Tbl1,$acc04
	lbzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc12,$s1,`0`,24,31
	lbzx	$acc05,$Tbl1,$acc05
	rlwinm	$acc13,$s2,`0`,24,31
	lbzx	$acc06,$Tbl1,$acc06
	lbzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc14,$s3,`0`,24,31
	lbzx	$acc07,$Tbl1,$acc07
	rlwinm	$acc15,$s0,`0`,24,31
	lbzx	$acc08,$Tbl1,$acc08
	lbzx	$acc09,$Tbl1,$acc09
	rlwinm	$s0,$acc00,24,0,7
	lbzx	$acc09,$Tbl1,$acc09
	rlwinm	$s1,$acc01,24,0,7
	lbzx	$acc10,$Tbl1,$acc10
	lbzx	$acc11,$Tbl1,$acc11
	rlwinm	$s2,$acc02,24,0,7
	lbzx	$acc11,$Tbl1,$acc11
	rlwinm	$s3,$acc03,24,0,7
	lbzx	$acc12,$Tbl1,$acc12
	lbzx	$acc13,$Tbl1,$acc13
	rlwimi	$s0,$acc04,16,8,15
	lbzx	$acc13,$Tbl1,$acc13
	rlwimi	$s1,$acc05,16,8,15
	lbzx	$acc14,$Tbl1,$acc14
	lbzx	$acc15,$Tbl1,$acc15
	rlwimi	$s2,$acc06,16,8,15
	lbzx	$acc15,$Tbl1,$acc15
	rlwimi	$s3,$acc07,16,8,15
	rlwimi	$s0,$acc08,8,16,23
	rlwimi	$s1,$acc09,8,16,23
	rlwimi	$s2,$acc10,8,16,23
	rlwimi	$s3,$acc11,8,16,23
	lwz	$t0,0($key)
	lwz	$t1,4($key)
	or	$s0,$s0,$acc12
	lwz	$t1,4($key)
	or	$s1,$s1,$acc13
	lwz	$t2,8($key)
	lwz	$t3,12($key)
	or	$s2,$s2,$acc14
	lwz	$t3,12($key)
	or	$s3,$s3,$acc15

	addi	$key,$key,16
@@ -1030,12 +1204,12 @@ $code.=<<___ if ($SIZE_T==4);
	and	$acc02,$s2,$mask80
	and	$acc03,$s3,$mask80
	srwi	$acc04,$acc00,7		# r1>>7
	srwi	$acc05,$acc01,7
	srwi	$acc06,$acc02,7
	srwi	$acc07,$acc03,7
	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
	srwi	$acc05,$acc01,7
	andc	$acc09,$s1,$mask80
	srwi	$acc06,$acc02,7
	andc	$acc10,$s2,$mask80
	srwi	$acc07,$acc03,7
	andc	$acc11,$s3,$mask80
	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
	sub	$acc01,$acc01,$acc05
@@ -1059,12 +1233,12 @@ $code.=<<___ if ($SIZE_T==4);
	and	$acc06,$acc02,$mask80
	and	$acc07,$acc03,$mask80
	srwi	$acc08,$acc04,7		# r1>>7
	srwi	$acc09,$acc05,7
	srwi	$acc10,$acc06,7
	srwi	$acc11,$acc07,7
	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
	srwi	$acc09,$acc05,7
	andc	$acc13,$acc01,$mask80
	srwi	$acc10,$acc06,7
	andc	$acc14,$acc02,$mask80
	srwi	$acc11,$acc07,7
	andc	$acc15,$acc03,$mask80
	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
	sub	$acc05,$acc05,$acc09
@@ -1085,13 +1259,13 @@ $code.=<<___ if ($SIZE_T==4);

	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
	and	$acc09,$acc05,$mask80
	and	$acc10,$acc06,$mask80
	and	$acc11,$acc07,$mask80
	srwi	$acc12,$acc08,7		# r1>>7
	and	$acc10,$acc06,$mask80
	srwi	$acc13,$acc09,7
	and	$acc11,$acc07,$mask80
	srwi	$acc14,$acc10,7
	srwi	$acc15,$acc11,7
	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
	srwi	$acc15,$acc11,7
	sub	$acc09,$acc09,$acc13
	sub	$acc10,$acc10,$acc14
	sub	$acc11,$acc11,$acc15
@@ -1124,10 +1298,10 @@ ___
$code.=<<___;
	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
	rotrwi	$s1,$s1,8
	rotrwi	$s2,$s2,8
	rotrwi	$s3,$s3,8
	xor	$s0,$s0,$acc00		# ^= r2^r0
	rotrwi	$s2,$s2,8
	xor	$s1,$s1,$acc01
	rotrwi	$s3,$s3,8
	xor	$s2,$s2,$acc02
	xor	$s3,$s3,$acc03
	xor	$acc00,$acc00,$acc08
@@ -1135,32 +1309,32 @@ $code.=<<___;
	xor	$acc02,$acc02,$acc10
	xor	$acc03,$acc03,$acc11
	xor	$s0,$s0,$acc04		# ^= r4^r0
	xor	$s1,$s1,$acc05
	xor	$s2,$s2,$acc06
	xor	$s3,$s3,$acc07
	rotrwi	$acc00,$acc00,24
	xor	$s1,$s1,$acc05
	rotrwi	$acc01,$acc01,24
	xor	$s2,$s2,$acc06
	rotrwi	$acc02,$acc02,24
	xor	$s3,$s3,$acc07
	rotrwi	$acc03,$acc03,24
	xor	$acc04,$acc04,$acc08
	xor	$acc05,$acc05,$acc09
	xor	$acc06,$acc06,$acc10
	xor	$acc07,$acc07,$acc11
	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
	xor	$s1,$s1,$acc09
	xor	$s2,$s2,$acc10
	xor	$s3,$s3,$acc11
	rotrwi	$acc04,$acc04,16
	xor	$s1,$s1,$acc09
	rotrwi	$acc05,$acc05,16
	xor	$s2,$s2,$acc10
	rotrwi	$acc06,$acc06,16
	xor	$s3,$s3,$acc11
	rotrwi	$acc07,$acc07,16
	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
	xor	$s1,$s1,$acc01
	xor	$s2,$s2,$acc02
	xor	$s3,$s3,$acc03
	rotrwi	$acc08,$acc08,8
	xor	$s1,$s1,$acc01
	rotrwi	$acc09,$acc09,8
	xor	$s2,$s2,$acc02
	rotrwi	$acc10,$acc10,8
	xor	$s3,$s3,$acc03
	rotrwi	$acc11,$acc11,8
	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
	xor	$s1,$s1,$acc05
@@ -1180,6 +1354,8 @@ Ldec_compact_done:
	xor	$s3,$s3,$t3
	blr
	.long	0
	.byte	0,12,0x14,0,0,0,0,0

.asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
.align	7
___
+59 −48

File changed.

Preview size limit exceeded, changes collapsed.

+30 −13
Original line number Diff line number Diff line
@@ -389,7 +389,9 @@ $data=<<EOF;
	$ST		r9,`6*$BNSZ`(r3)	#r[6]=c1
	$ST		r10,`7*$BNSZ`(r3)	#r[7]=c2
	blr
	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,2,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -814,8 +816,9 @@ $data=<<EOF;


	blr

	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,2,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -966,7 +969,9 @@ $data=<<EOF;
	$ST	r10,`6*$BNSZ`(r3)	#r[6]=c1
	$ST	r11,`7*$BNSZ`(r3)	#r[7]=c2
	blr
	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,3,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -1502,7 +1507,9 @@ $data=<<EOF;
	$ST	r12,`14*$BNSZ`(r3)	#r[14]=c3;
	$ST	r10,`15*$BNSZ`(r3)	#r[15]=c1;
	blr
	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,3,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -1550,8 +1557,9 @@ Lppcasm_sub_adios:
	subfze	r3,r0		# if carry bit is set then r3 = 0 else -1
	andi.	r3,r3,1         # keep only last bit.
	blr
	.long	0x00000000

	.long	0
	.byte	0,12,0x14,0,0,0,4,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -1594,7 +1602,9 @@ Lppcasm_add_mainloop:
Lppcasm_add_adios:	
	addze	r3,r0			#return carry bit.
	blr
	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,4,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -1707,7 +1717,9 @@ Lppcasm_div8:
Lppcasm_div9:
	or	r3,r8,r0
	blr
	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,3,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -1746,8 +1758,9 @@ Lppcasm_sqr_mainloop:
	bdnz-	Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:	
	blr
	.long	0x00000000

	.long	0
	.byte	0,12,0x14,0,0,0,3,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -1850,7 +1863,9 @@ Lppcasm_mw_REM:
Lppcasm_mw_OVER:	
	addi	r3,r12,0
	blr
	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,4,0
	.long	0

#
#	NOTE:	The following label name should be changed to
@@ -1973,7 +1988,9 @@ Lppcasm_maw_leftover:
Lppcasm_maw_adios:	
	addi	r3,r12,0
	blr
	.long	0x00000000
	.long	0
	.byte	0,12,0x14,0,0,0,4,0
	.long	0
	.align	4
EOF
$data =~ s/\`([^\`]*)\`/eval $1/gem;
+254 −84

File changed.

Preview size limit exceeded, changes collapsed.

crypto/ppccap.c

0 → 100644
+115 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading