Commit a61e5122 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

aes/asm/vpaes-ppc.pl: comply with ABI.

parent 34b1008c
Loading
Loading
Loading
Loading
+276 −20
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@ if ($flavour =~ /64/) {
} else { die "nonsense $flavour"; }

$sp="r1";
$FRAME=8*$SIZE_T;
$FRAME=6*$SIZE_T+13*16;	# 13*16 is for v20-v31 offload

$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
@@ -296,10 +296,36 @@ Lenc_entry:
.globl	.vpaes_encrypt
.align	5
.vpaes_encrypt:
	$STU	$sp,-$FRAME($sp)
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mflr	r6
	mfspr	r7, 256			# save vrsave
	stvx	v20,r10,$sp
	addi	r10,r10,16
	stvx	v21,r11,$sp
	addi	r11,r11,16
	stvx	v22,r10,$sp
	addi	r10,r10,16
	stvx	v23,r11,$sp
	addi	r11,r11,16
	stvx	v24,r10,$sp
	addi	r10,r10,16
	stvx	v25,r11,$sp
	addi	r11,r11,16
	stvx	v26,r10,$sp
	addi	r10,r10,16
	stvx	v27,r11,$sp
	addi	r11,r11,16
	stvx	v28,r10,$sp
	addi	r10,r10,16
	stvx	v29,r11,$sp
	addi	r11,r11,16
	stvx	v30,r10,$sp
	stvx	v31,r11,$sp
	lwz	r7,`$FRAME-4`($sp)	# save vrsave
	li	r0, -1
	$PUSH	r6,$LRSAVE($sp)
	$PUSH	r6,`$FRAME+$LRSAVE`($sp)
	mtspr	256, r0			# preserve all AltiVec registers

	bl	_vpaes_encrypt_preheat
@@ -333,11 +359,36 @@ Lenc_entry:
	vsel	v1, $outhead, v1, $outmask
	stvx	v1, 0, $out

	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mtlr	r6
	mtspr	256, r7			# restore vrsave
	lvx	v20,r10,$sp
	addi	r10,r10,16
	lvx	v21,r11,$sp
	addi	r11,r11,16
	lvx	v22,r10,$sp
	addi	r10,r10,16
	lvx	v23,r11,$sp
	addi	r11,r11,16
	lvx	v24,r10,$sp
	addi	r10,r10,16
	lvx	v25,r11,$sp
	addi	r11,r11,16
	lvx	v26,r10,$sp
	addi	r10,r10,16
	lvx	v27,r11,$sp
	addi	r11,r11,16
	lvx	v28,r10,$sp
	addi	r10,r10,16
	lvx	v29,r11,$sp
	addi	r11,r11,16
	lvx	v30,r10,$sp
	lvx	v31,r11,$sp
	addi	$sp,$sp,$FRAME
	blr
	.long	0
	.byte	0,12,0x14,1,0,0,3,0
	.byte	0,12,0x04,1,0x80,0,3,0
	.long	0
.size	.vpaes_encrypt,.-.vpaes_encrypt

@@ -479,10 +530,36 @@ Ldec_entry:
.globl	.vpaes_decrypt
.align	5
.vpaes_decrypt:
	$STU	$sp,-$FRAME($sp)
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mflr	r6
	mfspr	r7, 256			# save vrsave
	stvx	v20,r10,$sp
	addi	r10,r10,16
	stvx	v21,r11,$sp
	addi	r11,r11,16
	stvx	v22,r10,$sp
	addi	r10,r10,16
	stvx	v23,r11,$sp
	addi	r11,r11,16
	stvx	v24,r10,$sp
	addi	r10,r10,16
	stvx	v25,r11,$sp
	addi	r11,r11,16
	stvx	v26,r10,$sp
	addi	r10,r10,16
	stvx	v27,r11,$sp
	addi	r11,r11,16
	stvx	v28,r10,$sp
	addi	r10,r10,16
	stvx	v29,r11,$sp
	addi	r11,r11,16
	stvx	v30,r10,$sp
	stvx	v31,r11,$sp
	lwz	r7,`$FRAME-4`($sp)	# save vrsave
	li	r0, -1
	$PUSH	r6,$LRSAVE($sp)
	$PUSH	r6,`$FRAME+$LRSAVE`($sp)
	mtspr	256, r0			# preserve all AltiVec registers

	bl	_vpaes_decrypt_preheat
@@ -516,23 +593,74 @@ Ldec_entry:
	vsel	v1, $outhead, v1, $outmask
	stvx	v1, 0, $out

	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mtlr	r6
	mtspr	256, r7			# restore vrsave
	lvx	v20,r10,$sp
	addi	r10,r10,16
	lvx	v21,r11,$sp
	addi	r11,r11,16
	lvx	v22,r10,$sp
	addi	r10,r10,16
	lvx	v23,r11,$sp
	addi	r11,r11,16
	lvx	v24,r10,$sp
	addi	r10,r10,16
	lvx	v25,r11,$sp
	addi	r11,r11,16
	lvx	v26,r10,$sp
	addi	r10,r10,16
	lvx	v27,r11,$sp
	addi	r11,r11,16
	lvx	v28,r10,$sp
	addi	r10,r10,16
	lvx	v29,r11,$sp
	addi	r11,r11,16
	lvx	v30,r10,$sp
	lvx	v31,r11,$sp
	addi	$sp,$sp,$FRAME
	blr
	.long	0
	.byte	0,12,0x14,1,0,0,3,0
	.byte	0,12,0x04,1,0x80,0,3,0
	.long	0
.size	.vpaes_decrypt,.-.vpaes_decrypt

.globl	.vpaes_cbc_encrypt
.align	5
.vpaes_cbc_encrypt:
	$STU	$sp,-$FRAME($sp)
	$STU	$sp,-`($FRAME+2*$SIZE_T)`($sp)
	mflr	r0
	$PUSH	r30,$FRAME-$SIZE_T*2($sp)
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mfspr	r12, 256
	stvx	v20,r10,$sp
	addi	r10,r10,16
	stvx	v21,r11,$sp
	addi	r11,r11,16
	stvx	v22,r10,$sp
	addi	r10,r10,16
	stvx	v23,r11,$sp
	addi	r11,r11,16
	stvx	v24,r10,$sp
	addi	r10,r10,16
	stvx	v25,r11,$sp
	addi	r11,r11,16
	stvx	v26,r10,$sp
	addi	r10,r10,16
	stvx	v27,r11,$sp
	addi	r11,r11,16
	stvx	v28,r10,$sp
	addi	r10,r10,16
	stvx	v29,r11,$sp
	addi	r11,r11,16
	stvx	v30,r10,$sp
	stvx	v31,r11,$sp
	lwz	r12,`$FRAME-4`($sp)	# save vrsave
	$PUSH	r30,`$FRAME+$SIZE_T*0`($sp)
	$PUSH	r31,`$FRAME+$SIZE_T*1`($sp)
	li	r9, 16
	$PUSH	r31,$FRAME-$SIZE_T*1($sp)
	$PUSH	r0, $FRAME+$LRSAVE($sp)
	$PUSH	r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)

	sub.	r30, r5, r9		# copy length-16
	mr	r5, r6			# copy pointer to key
@@ -540,7 +668,7 @@ Ldec_entry:
	blt	Lcbc_abort
	cmpwi	r8, 0			# test direction
	li	r6, -1
	mfspr	r7, 256
	mr	r7, r12			# copy vrsave
	mtspr	256, r6			# preserve all AltiVec registers

	lvx	v24, 0, r31		# load [potentially unaligned] iv
@@ -629,12 +757,36 @@ Lcbc_done:
	stvx	v1, r6, r31

	mtspr	256, r7			# restore vrsave
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	lvx	v20,r10,$sp
	addi	r10,r10,16
	lvx	v21,r11,$sp
	addi	r11,r11,16
	lvx	v22,r10,$sp
	addi	r10,r10,16
	lvx	v23,r11,$sp
	addi	r11,r11,16
	lvx	v24,r10,$sp
	addi	r10,r10,16
	lvx	v25,r11,$sp
	addi	r11,r11,16
	lvx	v26,r10,$sp
	addi	r10,r10,16
	lvx	v27,r11,$sp
	addi	r11,r11,16
	lvx	v28,r10,$sp
	addi	r10,r10,16
	lvx	v29,r11,$sp
	addi	r11,r11,16
	lvx	v30,r10,$sp
	lvx	v31,r11,$sp
Lcbc_abort:
	$POP	r0, $FRAME+$LRSAVE($sp)
	$POP	r30,$FRAME-$SIZE_T*2($sp)
	$POP	r31,$FRAME-$SIZE_T*1($sp)
	$POP	r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
	$POP	r30,`$FRAME+$SIZE_T*0`($sp)
	$POP	r31,`$FRAME+$SIZE_T*1`($sp)
	mtlr	r0
	addi	$sp,$sp,$FRAME
	addi	$sp,$sp,`$FRAME+$SIZE_T*2`
	blr
	.long	0
	.byte	0,12,0x04,1,0x80,2,6,0
@@ -1158,10 +1310,36 @@ Lschedule_mangle_dec:
.globl	.vpaes_set_encrypt_key
.align	5
.vpaes_set_encrypt_key:
	$STU	$sp,-$FRAME($sp)
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mflr	r0
	mfspr	r6, 256			# save vrsave
	stvx	v20,r10,$sp
	addi	r10,r10,16
	stvx	v21,r11,$sp
	addi	r11,r11,16
	stvx	v22,r10,$sp
	addi	r10,r10,16
	stvx	v23,r11,$sp
	addi	r11,r11,16
	stvx	v24,r10,$sp
	addi	r10,r10,16
	stvx	v25,r11,$sp
	addi	r11,r11,16
	stvx	v26,r10,$sp
	addi	r10,r10,16
	stvx	v27,r11,$sp
	addi	r11,r11,16
	stvx	v28,r10,$sp
	addi	r10,r10,16
	stvx	v29,r11,$sp
	addi	r11,r11,16
	stvx	v30,r10,$sp
	stvx	v31,r11,$sp
	lwz	r6,`$FRAME-4`($sp)	# save vrsave
	li	r7, -1
	$PUSH	r0, $LRSAVE($sp)
	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
	mtspr	256, r7			# preserve all AltiVec registers

	srwi	r9, $bits, 5		# shr	\$5,%eax
@@ -1172,23 +1350,74 @@ Lschedule_mangle_dec:
	li	r8, 0x30		# mov	\$0x30,%r8d
	bl	_vpaes_schedule_core

	$POP	r0, $LRSAVE($sp)
	$POP	r0, `$FRAME+$LRSAVE`($sp)
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mtspr	256, r6			# restore vrsave
	mtlr	r0
	xor	r3, r3, r3
	lvx	v20,r10,$sp
	addi	r10,r10,16
	lvx	v21,r11,$sp
	addi	r11,r11,16
	lvx	v22,r10,$sp
	addi	r10,r10,16
	lvx	v23,r11,$sp
	addi	r11,r11,16
	lvx	v24,r10,$sp
	addi	r10,r10,16
	lvx	v25,r11,$sp
	addi	r11,r11,16
	lvx	v26,r10,$sp
	addi	r10,r10,16
	lvx	v27,r11,$sp
	addi	r11,r11,16
	lvx	v28,r10,$sp
	addi	r10,r10,16
	lvx	v29,r11,$sp
	addi	r11,r11,16
	lvx	v30,r10,$sp
	lvx	v31,r11,$sp
	addi	$sp,$sp,$FRAME
	blr
	.long	0
	.byte	0,12,0x14,1,0,3,0
	.byte	0,12,0x04,1,0x80,3,0
	.long	0
.size	.vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key

.globl	.vpaes_set_decrypt_key
.align	4
.vpaes_set_decrypt_key:
	$STU	$sp,-$FRAME($sp)
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mflr	r0
	mfspr	r6, 256			# save vrsave
	stvx	v20,r10,$sp
	addi	r10,r10,16
	stvx	v21,r11,$sp
	addi	r11,r11,16
	stvx	v22,r10,$sp
	addi	r10,r10,16
	stvx	v23,r11,$sp
	addi	r11,r11,16
	stvx	v24,r10,$sp
	addi	r10,r10,16
	stvx	v25,r11,$sp
	addi	r11,r11,16
	stvx	v26,r10,$sp
	addi	r10,r10,16
	stvx	v27,r11,$sp
	addi	r11,r11,16
	stvx	v28,r10,$sp
	addi	r10,r10,16
	stvx	v29,r11,$sp
	addi	r11,r11,16
	stvx	v30,r10,$sp
	stvx	v31,r11,$sp
	lwz	r6,`$FRAME-4`($sp)	# save vrsave
	li	r7, -1
	$PUSH	r0, $LRSAVE($sp)
	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
	mtspr	256, r7			# preserve all AltiVec registers

	srwi	r9, $bits, 5		# shr	\$5,%eax
@@ -1204,18 +1433,45 @@ Lschedule_mangle_dec:
	xori	r8, r8, 32		# xor	\$32,%r8d	# nbits==192?0:32
	bl	_vpaes_schedule_core

	$POP	r0,  $LRSAVE($sp)
	$POP	r0,  `$FRAME+$LRSAVE`($sp)
	li	r10,`15+6*$SIZE_T`
	li	r11,`31+6*$SIZE_T`
	mtspr	256, r6			# restore vrsave
	mtlr	r0
	xor	r3, r3, r3
	lvx	v20,r10,$sp
	addi	r10,r10,16
	lvx	v21,r11,$sp
	addi	r11,r11,16
	lvx	v22,r10,$sp
	addi	r10,r10,16
	lvx	v23,r11,$sp
	addi	r11,r11,16
	lvx	v24,r10,$sp
	addi	r10,r10,16
	lvx	v25,r11,$sp
	addi	r11,r11,16
	lvx	v26,r10,$sp
	addi	r10,r10,16
	lvx	v27,r11,$sp
	addi	r11,r11,16
	lvx	v28,r10,$sp
	addi	r10,r10,16
	lvx	v29,r11,$sp
	addi	r11,r11,16
	lvx	v30,r10,$sp
	lvx	v31,r11,$sp
	addi	$sp,$sp,$FRAME
	blr
	.long	0
	.byte	0,12,0x14,1,0,3,0
	.byte	0,12,0x04,1,0x80,3,0
	.long	0
.size	.vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
___
}

$code =~ s/\`([^\`]*)\`/eval($1)/gem;

print $code;

close STDOUT;