Commit 5c359830 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

sha1-ppc.pl: shave off one cycle from BODY_20_39


and improve performance by 10% on POWER[78].

Reviewed-by: default avatarKurt Roeckx <kurt@openssl.org>
parent c8d133e4
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -125,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_;
my $j=$i+1;
$code.=<<___ if ($i<79);
	add	$f,$K,$e
	xor	$t0,$b,$d
	rotlwi	$e,$a,5
	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
	add	$f,$f,@X[$i%16]
	xor	$t0,$b,$c
	xor	$t0,$t0,$c
	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
	add	$f,$f,$e
	add	$f,$f,$t0
	rotlwi	$b,$b,30
	xor	$t0,$t0,$d
	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
	add	$f,$f,$t0
	add	$f,$f,$e
	rotlwi	@X[$j%16],@X[$j%16],1
___
$code.=<<___ if ($i==79);
	add	$f,$K,$e
	xor	$t0,$b,$d
	rotlwi	$e,$a,5
	lwz	r16,0($ctx)
	add	$f,$f,@X[$i%16]
	xor	$t0,$b,$c
	xor	$t0,$t0,$c
	lwz	r17,4($ctx)
	add	$f,$f,$e
	add	$f,$f,$t0
	rotlwi	$b,$b,30
	lwz	r18,8($ctx)
	xor	$t0,$t0,$d
	lwz	r19,12($ctx)
	add	$f,$f,$t0
	add	$f,$f,$e
	lwz	r20,16($ctx)
___
}
+2 −2
Original line number Diff line number Diff line
@@ -13,8 +13,8 @@
# always virtualized setup with possibly throttled processor.
# Relative comparison is therefore more informative. This module is
# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
# else, SHA256 is 16% slower than sha1-ppc.pl and 2.5x slower than
# hardware-assisted aes-128-cbc encrypt. SHA512 is 33% faster than
# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
# result is degree of computational resources' utilization. POWER8 is
# "massively multi-threaded chip" and difference between single- and