Commit cf3aeae4 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

aes-ppc.pl: handle unaligned data on page boundaries.

parent 9a205e59
Loading
Loading
Loading
Loading
+157 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================

# Needs more work: key setup, page boundaries, CBC routine...
# Needs more work: key setup, CBC routine...
#
# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
# 128-bit key, which is ~40% better than 64-bit code generated by gcc
@@ -359,6 +359,12 @@ $code.=<<___;
	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
	$PUSH	r0,`$FRAME+$LRSAVE`($sp)

	andi.	$t0,$inp,3
	andi.	$t1,$out,3
	or.	$t0,$t0,$t1
	bne	Lenc_unaligned

Lenc_unaligned_ok:
	lwz	$s0,0($inp)
	lwz	$s1,4($inp)
	lwz	$s2,8($inp)
@@ -369,7 +375,79 @@ $code.=<<___;
	stw	$s1,4($out)
	stw	$s2,8($out)
	stw	$s3,12($out)
	b	Lenc_done

Lenc_unaligned:
	subfic	$t0,$inp,4096
	subfic	$t1,$out,4096
	andi.	$t0,$t0,4096-16
	beq	Lenc_xpage
	andi.	$t1,$t1,4096-16
	bne	Lenc_unaligned_ok

Lenc_xpage:
	lbz	$acc00,0($inp)
	lbz	$acc01,1($inp)
	lbz	$acc02,2($inp)
	lbz	$s0,3($inp)
	lbz	$acc04,4($inp)
	lbz	$acc05,5($inp)
	lbz	$acc06,6($inp)
	lbz	$s1,7($inp)
	lbz	$acc08,8($inp)
	lbz	$acc09,9($inp)
	lbz	$acc10,10($inp)
	lbz	$s2,11($inp)
	lbz	$acc12,12($inp)
	lbz	$acc13,13($inp)
	lbz	$acc14,14($inp)
	lbz	$s3,15($inp)
	insrwi	$s0,$acc00,8,0
	insrwi	$s1,$acc04,8,0
	insrwi	$s0,$acc01,8,8
	insrwi	$s1,$acc05,8,8
	insrwi	$s0,$acc02,8,16
	insrwi	$s1,$acc06,8,16
	insrwi	$s2,$acc08,8,0
	insrwi	$s3,$acc12,8,0
	insrwi	$s2,$acc09,8,8
	insrwi	$s3,$acc13,8,8
	insrwi	$s2,$acc10,8,16
	insrwi	$s3,$acc14,8,16

	bl	LAES_Te
	bl	Lppc_AES_encrypt_compact

	extrwi	$acc00,$s0,8,0
	extrwi	$acc01,$s0,8,8
	stb	$acc00,0($out)
	extrwi	$acc02,$s0,8,16
	stb	$acc01,1($out)
	stb	$acc02,2($out)
	extrwi	$acc04,$s1,8,0
	stb	$s0,3($out)
	extrwi	$acc05,$s1,8,8
	stb	$acc04,4($out)
	extrwi	$acc06,$s1,8,16
	stb	$acc05,5($out)
	stb	$acc06,6($out)
	extrwi	$acc08,$s2,8,0
	stb	$s1,7($out)
	extrwi	$acc09,$s2,8,8
	stb	$acc08,8($out)
	extrwi	$acc10,$s2,8,16
	stb	$acc09,9($out)
	stb	$acc10,10($out)
	extrwi	$acc12,$s3,8,0
	stb	$s2,11($out)
	extrwi	$acc13,$s3,8,8
	stb	$acc12,12($out)
	extrwi	$acc14,$s3,8,16
	stb	$acc13,13($out)
	stb	$acc14,14($out)
	stb	$s3,15($out)

Lenc_done:
	$POP	r0,`$FRAME+$LRSAVE`($sp)
	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
@@ -715,6 +793,12 @@ Lenc_compact_done:
	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
	$PUSH	r0,`$FRAME+$LRSAVE`($sp)

	andi.	$t0,$inp,3
	andi.	$t1,$out,3
	or.	$t0,$t0,$t1
	bne	Ldec_unaligned

Ldec_unaligned_ok:
	lwz	$s0,0($inp)
	lwz	$s1,4($inp)
	lwz	$s2,8($inp)
@@ -725,7 +809,79 @@ Lenc_compact_done:
	stw	$s1,4($out)
	stw	$s2,8($out)
	stw	$s3,12($out)
	b	Ldec_done

Ldec_unaligned:
	subfic	$t0,$inp,4096
	subfic	$t1,$out,4096
	andi.	$t0,$t0,4096-16
	beq	Ldec_xpage
	andi.	$t1,$t1,4096-16
	bne	Ldec_unaligned_ok

Ldec_xpage:
	lbz	$acc00,0($inp)
	lbz	$acc01,1($inp)
	lbz	$acc02,2($inp)
	lbz	$s0,3($inp)
	lbz	$acc04,4($inp)
	lbz	$acc05,5($inp)
	lbz	$acc06,6($inp)
	lbz	$s1,7($inp)
	lbz	$acc08,8($inp)
	lbz	$acc09,9($inp)
	lbz	$acc10,10($inp)
	lbz	$s2,11($inp)
	lbz	$acc12,12($inp)
	lbz	$acc13,13($inp)
	lbz	$acc14,14($inp)
	lbz	$s3,15($inp)
	insrwi	$s0,$acc00,8,0
	insrwi	$s1,$acc04,8,0
	insrwi	$s0,$acc01,8,8
	insrwi	$s1,$acc05,8,8
	insrwi	$s0,$acc02,8,16
	insrwi	$s1,$acc06,8,16
	insrwi	$s2,$acc08,8,0
	insrwi	$s3,$acc12,8,0
	insrwi	$s2,$acc09,8,8
	insrwi	$s3,$acc13,8,8
	insrwi	$s2,$acc10,8,16
	insrwi	$s3,$acc14,8,16

	bl	LAES_Td
	bl	Lppc_AES_decrypt_compact

	extrwi	$acc00,$s0,8,0
	extrwi	$acc01,$s0,8,8
	stb	$acc00,0($out)
	extrwi	$acc02,$s0,8,16
	stb	$acc01,1($out)
	stb	$acc02,2($out)
	extrwi	$acc04,$s1,8,0
	stb	$s0,3($out)
	extrwi	$acc05,$s1,8,8
	stb	$acc04,4($out)
	extrwi	$acc06,$s1,8,16
	stb	$acc05,5($out)
	stb	$acc06,6($out)
	extrwi	$acc08,$s2,8,0
	stb	$s1,7($out)
	extrwi	$acc09,$s2,8,8
	stb	$acc08,8($out)
	extrwi	$acc10,$s2,8,16
	stb	$acc09,9($out)
	stb	$acc10,10($out)
	extrwi	$acc12,$s3,8,0
	stb	$s2,11($out)
	extrwi	$acc13,$s3,8,8
	stb	$acc12,12($out)
	extrwi	$acc14,$s3,8,16
	stb	$acc13,13($out)
	stb	$acc14,14($out)
	stb	$s3,15($out)

Ldec_done:
	$POP	r0,`$FRAME+$LRSAVE`($sp)
	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
	$POP	r13,`$FRAME-$SIZE_T*19`($sp)