Commit 764fe518 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

aesp8-ppc.pl: add CTR mode.

parent 7241a4c7
Loading
Loading
Loading
Loading
+654 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ if ($flavour =~ /64/) {
	$POP	="ld";
	$PUSH	="std";
	$UCMP	="cmpld";
	$SHL	="sldi";
} elsif ($flavour =~ /32/) {
	$SIZE_T	=4;
	$LRSAVE	=$SIZE_T;
@@ -37,6 +38,7 @@ if ($flavour =~ /64/) {
	$POP	="lwz";
	$PUSH	="stw";
	$UCMP	="cmplw";
	$SHL	="slwi";
} else { die "nonsense $flavour"; }

$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
@@ -1211,6 +1213,658 @@ Lcbc_dec8x_done:
___
}}	}}}

#########################################################################
{{{	# CTR procedure[s]						#
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
						map("v$_",(4..11));
my $dat=$tmp;

$code.=<<___;
.globl	.${prefix}_ctr32_encrypt_blocks
.align	5
.${prefix}_ctr32_encrypt_blocks:
	${UCMP}i	$len,1
	bltlr-

	lis		r0,0xfff0
	mfspr		$vrsave,256
	mtspr		256,r0

	li		$idx,15
	vxor		$rndkey0,$rndkey0,$rndkey0
	le?vspltisb	$tmp,0x0f

	lvx		$ivec,0,$ivp		# load [unaligned] iv
	lvsl		$inpperm,0,$ivp
	lvx		$inptail,$idx,$ivp
	 vspltisb	$one,1
	le?vxor		$inpperm,$inpperm,$tmp
	vperm		$ivec,$ivec,$inptail,$inpperm
	 vsldoi		$one,$rndkey0,$one,1

	neg		r11,$inp
	?lvsl		$keyperm,0,$key		# prepare for unaligned key
	lwz		$rounds,240($key)

	lvsr		$inpperm,0,r11		# prepare for unaligned load
	lvx		$inptail,0,$inp
	addi		$inp,$inp,15		# 15 is not typo
	le?vxor		$inpperm,$inpperm,$tmp

	srwi		$rounds,$rounds,1
	li		$idx,16
	subi		$rounds,$rounds,1

	${UCMP}i	$len,8
	bge		_aesp8_ctr32_encrypt8x

	?lvsr		$outperm,0,$out		# prepare for unaligned store
	vspltisb	$outmask,-1
	lvx		$outhead,0,$out
	?vperm		$outmask,$rndkey0,$outmask,$outperm
	le?vxor		$outperm,$outperm,$tmp

	lvx		$rndkey0,0,$key
	mtctr		$rounds
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vxor		$inout,$ivec,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16
	b		Loop_ctr32_enc

.align	5
Loop_ctr32_enc:
	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	addi		$idx,$idx,16
	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vcipher		$inout,$inout,$rndkey0
	lvx		$rndkey0,$idx,$key
	addi		$idx,$idx,16
	bdnz		Loop_ctr32_enc

	vadduwm		$ivec,$ivec,$one
	 vmr		$dat,$inptail
	 lvx		$inptail,0,$inp
	 addi		$inp,$inp,16
	 subic.		$len,$len,1		# blocks--

	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
	vcipher		$inout,$inout,$rndkey1
	lvx		$rndkey1,$idx,$key
	 vperm		$dat,$dat,$inptail,$inpperm
	 li		$idx,16
	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
	 lvx		$rndkey0,0,$key
	vxor		$dat,$dat,$rndkey1	# last round key
	vcipherlast	$inout,$inout,$dat

	 lvx		$rndkey1,$idx,$key
	 addi		$idx,$idx,16
	vperm		$inout,$inout,$inout,$outperm
	vsel		$dat,$outhead,$inout,$outmask
	 mtctr		$rounds
	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
	vmr		$outhead,$inout
	 vxor		$inout,$ivec,$rndkey0
	 lvx		$rndkey0,$idx,$key
	 addi		$idx,$idx,16
	stvx		$dat,0,$out
	addi		$out,$out,16
	bne		Loop_ctr32_enc

	addi		$out,$out,-1
	lvx		$inout,0,$out		# redundant in aligned case
	vsel		$inout,$outhead,$inout,$outmask
	stvx		$inout,0,$out

	mtspr		256,$vrsave
	blr
	.long		0
	.byte		0,12,0x14,0,0,0,6,0
	.long		0
___
#########################################################################
{{	# Optimized CTR procedure					#
my $key_="r11";
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
			# v26-v31 last 6 round keys
my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
my ($two,$three,$four)=($outhead,$outperm,$outmask);

$code.=<<___;
.align	5
_aesp8_ctr32_encrypt8x:
	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
	li		r10,`$FRAME+8*16+15`
	li		r11,`$FRAME+8*16+31`
	stvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	stvx		v21,r11,$sp
	addi		r11,r11,32
	stvx		v22,r10,$sp
	addi		r10,r10,32
	stvx		v23,r11,$sp
	addi		r11,r11,32
	stvx		v24,r10,$sp
	addi		r10,r10,32
	stvx		v25,r11,$sp
	addi		r11,r11,32
	stvx		v26,r10,$sp
	addi		r10,r10,32
	stvx		v27,r11,$sp
	addi		r11,r11,32
	stvx		v28,r10,$sp
	addi		r10,r10,32
	stvx		v29,r11,$sp
	addi		r11,r11,32
	stvx		v30,r10,$sp
	stvx		v31,r11,$sp
	li		r0,-1
	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
	li		$x10,0x10
	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	li		$x20,0x20
	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	li		$x30,0x30
	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	li		$x40,0x40
	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	li		$x50,0x50
	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	li		$x60,0x60
	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	li		$x70,0x70
	mtspr		256,r0

	subi		$rounds,$rounds,3	# -4 in total

	lvx		$rndkey0,$x00,$key	# load key schedule
	lvx		v30,$x10,$key
	addi		$key,$key,0x20
	lvx		v31,$x00,$key
	?vperm		$rndkey0,$rndkey0,v30,$keyperm
	addi		$key_,$sp,$FRAME+15
	mtctr		$rounds

Load_ctr32_enc_key:
	?vperm		v24,v30,v31,$keyperm
	lvx		v30,$x10,$key
	addi		$key,$key,0x20
	stvx		v24,$x00,$key_		# off-load round[1]
	?vperm		v25,v31,v30,$keyperm
	lvx		v31,$x00,$key
	stvx		v25,$x10,$key_		# off-load round[2]
	addi		$key_,$key_,0x20
	bdnz		Load_ctr32_enc_key

	lvx		v26,$x10,$key
	?vperm		v24,v30,v31,$keyperm
	lvx		v27,$x20,$key
	stvx		v24,$x00,$key_		# off-load round[3]
	?vperm		v25,v31,v26,$keyperm
	lvx		v28,$x30,$key
	stvx		v25,$x10,$key_		# off-load round[4]
	addi		$key_,$sp,$FRAME+15	# rewind $key_
	?vperm		v26,v26,v27,$keyperm
	lvx		v29,$x40,$key
	?vperm		v27,v27,v28,$keyperm
	lvx		v30,$x50,$key
	?vperm		v28,v28,v29,$keyperm
	lvx		v31,$x60,$key
	?vperm		v29,v29,v30,$keyperm
	lvx		$out0,$x70,$key		# borrow $out0
	?vperm		v30,v30,v31,$keyperm
	lvx		v24,$x00,$key_		# pre-load round[1]
	?vperm		v31,v31,$out0,$keyperm
	lvx		v25,$x10,$key_		# pre-load round[2]

	vadduwm		$two,$one,$one
	subi		$inp,$inp,15		# undo "caller"
	$SHL		$len,$len,4

	vadduwm		$out1,$ivec,$one	# counter values ...
	vadduwm		$out2,$ivec,$two
	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
	 le?li		$idx,8
	vadduwm		$out3,$out1,$two
	vxor		$out1,$out1,$rndkey0
	 le?lvsl	$inpperm,0,$idx
	vadduwm		$out4,$out2,$two
	vxor		$out2,$out2,$rndkey0
	 le?vspltisb	$tmp,0x0f
	vadduwm		$out5,$out3,$two
	vxor		$out3,$out3,$rndkey0
	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
	vadduwm		$out6,$out4,$two
	vxor		$out4,$out4,$rndkey0
	vadduwm		$out7,$out5,$two
	vxor		$out5,$out5,$rndkey0
	vadduwm		$ivec,$out6,$two	# next counter value
	vxor		$out6,$out6,$rndkey0
	vxor		$out7,$out7,$rndkey0

	mtctr		$rounds
	b		Loop_ctr32_enc8x
.align	5
Loop_ctr32_enc8x:
	vcipher 	$out0,$out0,v24
	vcipher 	$out1,$out1,v24
	vcipher 	$out2,$out2,v24
	vcipher 	$out3,$out3,v24
	vcipher 	$out4,$out4,v24
	vcipher 	$out5,$out5,v24
	vcipher 	$out6,$out6,v24
	vcipher 	$out7,$out7,v24
Loop_ctr32_enc8x_middle:
	lvx		v24,$x20,$key_		# round[3]
	addi		$key_,$key_,0x20

	vcipher 	$out0,$out0,v25
	vcipher 	$out1,$out1,v25
	vcipher 	$out2,$out2,v25
	vcipher 	$out3,$out3,v25
	vcipher 	$out4,$out4,v25
	vcipher 	$out5,$out5,v25
	vcipher 	$out6,$out6,v25
	vcipher 	$out7,$out7,v25
	lvx		v25,$x10,$key_		# round[4]
	bdnz		Loop_ctr32_enc8x

	subic		r11,$len,256		# $len-256, borrow $key_
	vcipher 	$out0,$out0,v24
	vcipher 	$out1,$out1,v24
	vcipher 	$out2,$out2,v24
	vcipher 	$out3,$out3,v24
	vcipher 	$out4,$out4,v24
	vcipher 	$out5,$out5,v24
	vcipher 	$out6,$out6,v24
	vcipher 	$out7,$out7,v24

	subfe		r0,r0,r0		# borrow?-1:0
	vcipher 	$out0,$out0,v25
	vcipher 	$out1,$out1,v25
	vcipher 	$out2,$out2,v25
	vcipher 	$out3,$out3,v25
	vcipher 	$out4,$out4,v25
	vcipher		$out5,$out5,v25
	vcipher		$out6,$out6,v25
	vcipher		$out7,$out7,v25

	and		r0,r0,r11
	addi		$key_,$sp,$FRAME+15	# rewind $key_
	vcipher		$out0,$out0,v26
	vcipher		$out1,$out1,v26
	vcipher		$out2,$out2,v26
	vcipher		$out3,$out3,v26
	vcipher		$out4,$out4,v26
	vcipher		$out5,$out5,v26
	vcipher		$out6,$out6,v26
	vcipher		$out7,$out7,v26
	lvx		v24,$x00,$key_		# re-pre-load round[1]

	subic		$len,$len,129		# $len-=129
	vcipher		$out0,$out0,v27
	addi		$len,$len,1		# $len-=128 really
	vcipher		$out1,$out1,v27
	vcipher		$out2,$out2,v27
	vcipher		$out3,$out3,v27
	vcipher		$out4,$out4,v27
	vcipher		$out5,$out5,v27
	vcipher		$out6,$out6,v27
	vcipher		$out7,$out7,v27
	lvx		v25,$x10,$key_		# re-pre-load round[2]

	vcipher		$out0,$out0,v28
	 lvx_u		$in0,$x00,$inp		# load input
	vcipher		$out1,$out1,v28
	 lvx_u		$in1,$x10,$inp
	vcipher		$out2,$out2,v28
	 lvx_u		$in2,$x20,$inp
	vcipher		$out3,$out3,v28
	 lvx_u		$in3,$x30,$inp
	vcipher		$out4,$out4,v28
	 lvx_u		$in4,$x40,$inp
	vcipher		$out5,$out5,v28
	 lvx_u		$in5,$x50,$inp
	vcipher		$out6,$out6,v28
	 lvx_u		$in6,$x60,$inp
	vcipher		$out7,$out7,v28
	 lvx_u		$in7,$x70,$inp
	 addi		$inp,$inp,0x80

	vcipher		$out0,$out0,v29
	 le?vperm	$in0,$in0,$in0,$inpperm
	vcipher		$out1,$out1,v29
	 le?vperm	$in1,$in1,$in1,$inpperm
	vcipher		$out2,$out2,v29
	 le?vperm	$in2,$in2,$in2,$inpperm
	vcipher		$out3,$out3,v29
	 le?vperm	$in3,$in3,$in3,$inpperm
	vcipher		$out4,$out4,v29
	 le?vperm	$in4,$in4,$in4,$inpperm
	vcipher		$out5,$out5,v29
	 le?vperm	$in5,$in5,$in5,$inpperm
	vcipher		$out6,$out6,v29
	 le?vperm	$in6,$in6,$in6,$inpperm
	vcipher		$out7,$out7,v29
	 le?vperm	$in7,$in7,$in7,$inpperm

	add		$inp,$inp,r0		# $inp is adjusted in such
						# way that at exit from the
						# loop inX-in7 are loaded
						# with last "words"
	subfe.		r0,r0,r0		# borrow?-1:0
	vcipher		$out0,$out0,v30
	 vxor		$in0,$in0,v31		# xor with last round key
	vcipher		$out1,$out1,v30
	 vxor		$in1,$in1,v31
	vcipher		$out2,$out2,v30
	 vxor		$in2,$in2,v31
	vcipher		$out3,$out3,v30
	 vxor		$in3,$in3,v31
	vcipher		$out4,$out4,v30
	 vxor		$in4,$in4,v31
	vcipher		$out5,$out5,v30
	 vxor		$in5,$in5,v31
	vcipher		$out6,$out6,v30
	 vxor		$in6,$in6,v31
	vcipher		$out7,$out7,v30
	 vxor		$in7,$in7,v31

	bne		Lctr32_enc8x_break	# did $len-129 borrow?

	vcipherlast	$in0,$out0,$in0
	vcipherlast	$in1,$out1,$in1
	 vadduwm	$out1,$ivec,$one	# counter values ...
	vcipherlast	$in2,$out2,$in2
	 vadduwm	$out2,$ivec,$two
	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
	vcipherlast	$in3,$out3,$in3
	 vadduwm	$out3,$out1,$two
	 vxor		$out1,$out1,$rndkey0
	vcipherlast	$in4,$out4,$in4
	 vadduwm	$out4,$out2,$two
	 vxor		$out2,$out2,$rndkey0
	vcipherlast	$in5,$out5,$in5
	 vadduwm	$out5,$out3,$two
	 vxor		$out3,$out3,$rndkey0
	vcipherlast	$in6,$out6,$in6
	 vadduwm	$out6,$out4,$two
	 vxor		$out4,$out4,$rndkey0
	vcipherlast	$in7,$out7,$in7
	 vadduwm	$out7,$out5,$two
	 vxor		$out5,$out5,$rndkey0
	le?vperm	$in0,$in0,$in0,$inpperm
	 vadduwm	$ivec,$out6,$two	# next counter value
	 vxor		$out6,$out6,$rndkey0
	le?vperm	$in1,$in1,$in1,$inpperm
	 vxor		$out7,$out7,$rndkey0
	mtctr		$rounds

	 vcipher	$out0,$out0,v24
	stvx_u		$in0,$x00,$out
	le?vperm	$in2,$in2,$in2,$inpperm
	 vcipher	$out1,$out1,v24
	stvx_u		$in1,$x10,$out
	le?vperm	$in3,$in3,$in3,$inpperm
	 vcipher	$out2,$out2,v24
	stvx_u		$in2,$x20,$out
	le?vperm	$in4,$in4,$in4,$inpperm
	 vcipher	$out3,$out3,v24
	stvx_u		$in3,$x30,$out
	le?vperm	$in5,$in5,$in5,$inpperm
	 vcipher	$out4,$out4,v24
	stvx_u		$in4,$x40,$out
	le?vperm	$in6,$in6,$in6,$inpperm
	 vcipher	$out5,$out5,v24
	stvx_u		$in5,$x50,$out
	le?vperm	$in7,$in7,$in7,$inpperm
	 vcipher	$out6,$out6,v24
	stvx_u		$in6,$x60,$out
	 vcipher	$out7,$out7,v24
	stvx_u		$in7,$x70,$out
	addi		$out,$out,0x80

	b		Loop_ctr32_enc8x_middle

.align	5
Lctr32_enc8x_break:
	cmpwi		$len,-0x60
	blt		Lctr32_enc8x_one
	nop
	beq		Lctr32_enc8x_two
	cmpwi		$len,-0x40
	blt		Lctr32_enc8x_three
	nop
	beq		Lctr32_enc8x_four
	cmpwi		$len,-0x20
	blt		Lctr32_enc8x_five
	nop
	beq		Lctr32_enc8x_six
	cmpwi		$len,0x00
	blt		Lctr32_enc8x_seven

Lctr32_enc8x_eight:
	vcipherlast	$out0,$out0,$in0
	vcipherlast	$out1,$out1,$in1
	vcipherlast	$out2,$out2,$in2
	vcipherlast	$out3,$out3,$in3
	vcipherlast	$out4,$out4,$in4
	vcipherlast	$out5,$out5,$in5
	vcipherlast	$out6,$out6,$in6
	vcipherlast	$out7,$out7,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x40,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x50,$out
	le?vperm	$out7,$out7,$out7,$inpperm
	stvx_u		$out6,$x60,$out
	stvx_u		$out7,$x70,$out
	addi		$out,$out,0x80
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_seven:
	vcipherlast	$out0,$out0,$in1
	vcipherlast	$out1,$out1,$in2
	vcipherlast	$out2,$out2,$in3
	vcipherlast	$out3,$out3,$in4
	vcipherlast	$out4,$out4,$in5
	vcipherlast	$out5,$out5,$in6
	vcipherlast	$out6,$out6,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x40,$out
	le?vperm	$out6,$out6,$out6,$inpperm
	stvx_u		$out5,$x50,$out
	stvx_u		$out6,$x60,$out
	addi		$out,$out,0x70
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_six:
	vcipherlast	$out0,$out0,$in2
	vcipherlast	$out1,$out1,$in3
	vcipherlast	$out2,$out2,$in4
	vcipherlast	$out3,$out3,$in5
	vcipherlast	$out4,$out4,$in6
	vcipherlast	$out5,$out5,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	le?vperm	$out5,$out5,$out5,$inpperm
	stvx_u		$out4,$x40,$out
	stvx_u		$out5,$x50,$out
	addi		$out,$out,0x60
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_five:
	vcipherlast	$out0,$out0,$in3
	vcipherlast	$out1,$out1,$in4
	vcipherlast	$out2,$out2,$in5
	vcipherlast	$out3,$out3,$in6
	vcipherlast	$out4,$out4,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	le?vperm	$out4,$out4,$out4,$inpperm
	stvx_u		$out3,$x30,$out
	stvx_u		$out4,$x40,$out
	addi		$out,$out,0x50
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_four:
	vcipherlast	$out0,$out0,$in4
	vcipherlast	$out1,$out1,$in5
	vcipherlast	$out2,$out2,$in6
	vcipherlast	$out3,$out3,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	le?vperm	$out3,$out3,$out3,$inpperm
	stvx_u		$out2,$x20,$out
	stvx_u		$out3,$x30,$out
	addi		$out,$out,0x40
	b		Lctr32_enc8x_done

.align	5
Lctr32_enc8x_three:
	vcipherlast	$out0,$out0,$in5
	vcipherlast	$out1,$out1,$in6
	vcipherlast	$out2,$out2,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	le?vperm	$out2,$out2,$out2,$inpperm
	stvx_u		$out1,$x10,$out
	stvx_u		$out2,$x20,$out
	addi		$out,$out,0x30
	b		Lcbc_dec8x_done

.align	5
Lctr32_enc8x_two:
	vcipherlast	$out0,$out0,$in6
	vcipherlast	$out1,$out1,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	le?vperm	$out1,$out1,$out1,$inpperm
	stvx_u		$out0,$x00,$out
	stvx_u		$out1,$x10,$out
	addi		$out,$out,0x20
	b		Lcbc_dec8x_done

.align	5
Lctr32_enc8x_one:
	vcipherlast	$out0,$out0,$in7

	le?vperm	$out0,$out0,$out0,$inpperm
	stvx_u		$out0,0,$out
	addi		$out,$out,0x10

Lctr32_enc8x_done:
	li		r10,`$FRAME+15`
	li		r11,`$FRAME+31`
	stvx		$inpperm,r10,$sp	# wipe copies of round keys
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32
	stvx		$inpperm,r10,$sp
	addi		r10,r10,32
	stvx		$inpperm,r11,$sp
	addi		r11,r11,32

	mtspr		256,$vrsave
	lvx		v20,r10,$sp		# ABI says so
	addi		r10,r10,32
	lvx		v21,r11,$sp
	addi		r11,r11,32
	lvx		v22,r10,$sp
	addi		r10,r10,32
	lvx		v23,r11,$sp
	addi		r11,r11,32
	lvx		v24,r10,$sp
	addi		r10,r10,32
	lvx		v25,r11,$sp
	addi		r11,r11,32
	lvx		v26,r10,$sp
	addi		r10,r10,32
	lvx		v27,r11,$sp
	addi		r11,r11,32
	lvx		v28,r10,$sp
	addi		r10,r10,32
	lvx		v29,r11,$sp
	addi		r11,r11,32
	lvx		v30,r10,$sp
	lvx		v31,r11,$sp
	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
	blr
	.long		0
	.byte		0,12,0x14,0,0x80,6,6,0
	.long		0
.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
___
}}	}}}

my $consts=1;
foreach(split("\n",$code)) {
        s/\`([^\`]*)\`/eval($1)/geo;
+1 −0
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@ extern unsigned int OPENSSL_ppccap_P;
# define HWAES_encrypt aes_p8_encrypt
# define HWAES_decrypt aes_p8_decrypt
# define HWAES_cbc_encrypt aes_p8_cbc_encrypt
# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
#endif

#if	defined(AES_ASM) && !defined(I386_ONLY) &&	(  \