Commit 1b1ff9b9 authored by Andy Polyakov's avatar Andy Polyakov Committed by Richard Levitte
Browse files

sha/asm/keccak1600-ppc64.pl: up 10% performance improvement.

parent 3dcbb6c4
Loading
Loading
Loading
Loading
+20 −20
Original line number Diff line number Diff line
@@ -27,10 +27,10 @@
#
#		r=1088(*)
#
# PPC970/G5	14.6/+120%
# POWER7	10.3/+100%
# POWER8	11.5/+85%
# POWER9	9.4/+45%
# PPC970/G5	14.0/+130%
# POWER7	9.7/+110%
# POWER8	10.6/+100%
# POWER9	8.2/+66%
#
# (*)	Corresponds to SHA3-256. Percentage after slash is improvement
#	over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
@@ -384,19 +384,19 @@ KeccakF1600:
.type	dword_le_load,\@function
.align	5
dword_le_load:
	lbzu	r0,1(r3)
	lbzu	r4,1(r3)
	lbzu	r5,1(r3)
	lbz	r0,1(r3)
	lbz	r4,2(r3)
	lbz	r5,3(r3)
	insrdi	r0,r4,8,48
	lbzu	r4,1(r3)
	lbz	r4,4(r3)
	insrdi	r0,r5,8,40
	lbzu	r5,1(r3)
	lbz	r5,5(r3)
	insrdi	r0,r4,8,32
	lbzu	r4,1(r3)
	lbz	r4,6(r3)
	insrdi	r0,r5,8,24
	lbzu	r5,1(r3)
	lbz	r5,7(r3)
	insrdi	r0,r4,8,16
	lbzu	r4,1(r3)
	lbzu	r4,8(r3)
	insrdi	r0,r5,8,8
	insrdi	r0,r4,8,0
	blr
@@ -657,21 +657,21 @@ SHA3_squeeze:
	${UCMP}i $len,8
	blt	.Lsqueeze_tail

	stbu	r0,1($out)
	stb	r0,1($out)
	srdi	r0,r0,8
	stbu	r0,1($out)
	stb	r0,2($out)
	srdi	r0,r0,8
	stbu	r0,1($out)
	stb	r0,3($out)
	srdi	r0,r0,8
	stbu	r0,1($out)
	stb	r0,4($out)
	srdi	r0,r0,8
	stbu	r0,1($out)
	stb	r0,5($out)
	srdi	r0,r0,8
	stbu	r0,1($out)
	stb	r0,6($out)
	srdi	r0,r0,8
	stbu	r0,1($out)
	stb	r0,7($out)
	srdi	r0,r0,8
	stbu	r0,1($out)
	stbu	r0,8($out)

	subic.	$len,$len,8
	beq	.Lsqueeze_done