Commit 87a75b3e authored by Andy Polyakov's avatar Andy Polyakov
Browse files

ec/asm/ecp_nistz256-{!x86_64}.pl: fix scatter_w7 function.



The ecp_nistz256_scatter_w7 function is called when application
attempts to use custom generator, i.e. rarely. Even though non-x86_64
versions were wrong, it didn't affect point operations, they were just
not as fast as expected.

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6738)
parent f40e0a34
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -894,13 +894,13 @@ ecp_nistz256_scatter_w7:
.Loop_scatter_w7:
	ldr	$mask,[$inp],#4
	subs	$index,$index,#1
	strb	$mask,[$out,#64*0-1]
	strb	$mask,[$out,#64*0]
	mov	$mask,$mask,lsr#8
	strb	$mask,[$out,#64*1-1]
	strb	$mask,[$out,#64*1]
	mov	$mask,$mask,lsr#8
	strb	$mask,[$out,#64*2-1]
	strb	$mask,[$out,#64*2]
	mov	$mask,$mask,lsr#8
	strb	$mask,[$out,#64*3-1]
	strb	$mask,[$out,#64*3]
	add	$out,$out,#64*4
	bne	.Loop_scatter_w7

+8 −8
Original line number Diff line number Diff line
@@ -1776,21 +1776,21 @@ ecp_nistz256_scatter_w7:
	prfm	pstl1strm,[$out,#4096+64*5]
	prfm	pstl1strm,[$out,#4096+64*6]
	prfm	pstl1strm,[$out,#4096+64*7]
	strb	w3,[$out,#64*0-1]
	strb	w3,[$out,#64*0]
	lsr	x3,x3,#8
	strb	w3,[$out,#64*1-1]
	strb	w3,[$out,#64*1]
	lsr	x3,x3,#8
	strb	w3,[$out,#64*2-1]
	strb	w3,[$out,#64*2]
	lsr	x3,x3,#8
	strb	w3,[$out,#64*3-1]
	strb	w3,[$out,#64*3]
	lsr	x3,x3,#8
	strb	w3,[$out,#64*4-1]
	strb	w3,[$out,#64*4]
	lsr	x3,x3,#8
	strb	w3,[$out,#64*5-1]
	strb	w3,[$out,#64*5]
	lsr	x3,x3,#8
	strb	w3,[$out,#64*6-1]
	strb	w3,[$out,#64*6]
	lsr	x3,x3,#8
	strb	w3,[$out,#64*7-1]
	strb	w3,[$out,#64*7]
	add	$out,$out,#64*8
	b.ne	.Loop_scatter_w7

+8 −8
Original line number Diff line number Diff line
@@ -2297,21 +2297,21 @@ ecp_nistz256_scatter_w7:

.Loop_scatter_w7:
	ldu	r0,8($inp)
	stb	r0,64*0-1($out)
	stb	r0,64*0($out)
	srdi	r0,r0,8
	stb	r0,64*1-1($out)
	stb	r0,64*1($out)
	srdi	r0,r0,8
	stb	r0,64*2-1($out)
	stb	r0,64*2($out)
	srdi	r0,r0,8
	stb	r0,64*3-1($out)
	stb	r0,64*3($out)
	srdi	r0,r0,8
	stb	r0,64*4-1($out)
	stb	r0,64*4($out)
	srdi	r0,r0,8
	stb	r0,64*5-1($out)
	stb	r0,64*5($out)
	srdi	r0,r0,8
	stb	r0,64*6-1($out)
	stb	r0,64*6($out)
	srdi	r0,r0,8
	stb	r0,64*7-1($out)
	stb	r0,64*7($out)
	addi	$out,$out,64*8
	bdnz	.Loop_scatter_w7

+4 −4
Original line number Diff line number Diff line
@@ -1531,13 +1531,13 @@ ecp_nistz256_scatter_w7:
	ld	[$inp],%l0
	add	$inp,4,$inp
	subcc	$index,1,$index
	stb	%l0,[$out+64*0-1]
	stb	%l0,[$out+64*0]
	srl	%l0,8,%l1
	stb	%l1,[$out+64*1-1]
	stb	%l1,[$out+64*1]
	srl	%l0,16,%l2
	stb	%l2,[$out+64*2-1]
	stb	%l2,[$out+64*2]
	srl	%l0,24,%l3
	stb	%l3,[$out+64*3-1]
	stb	%l3,[$out+64*3]
	bne	.Loop_scatter_w7
	add	$out,64*4,$out

+1 −1
Original line number Diff line number Diff line
@@ -1179,7 +1179,7 @@ for ($i=0;$i<7;$i++) {
	&mov	("esi",&wparam(1));
	&mov	("ebp",&wparam(2));

	&lea	("edi",&DWP(-1,"edi","ebp"));
	&lea	("edi",&DWP(0,"edi","ebp"));
	&mov	("ebp",64/4);
&set_label("scatter_w7_loop");
	&mov	("eax",&DWP(0,"esi"));