Commit 143ee099 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

ec/asm/ecp_nistz256-*.pl: get corner case logic right.



RT#4284

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent d9375341
Loading
Loading
Loading
Loading
+12 −5
Original line number Diff line number Diff line
@@ -1252,6 +1252,7 @@ ecp_nistz256_point_double:
	stmdb	sp!,{r0-r12,lr}		@ push from r0, unusual, but intentional
	sub	sp,sp,#32*5

.Lpoint_double_shortcut:
	add	r3,sp,#$in_x
	ldmia	$a_ptr!,{r4-r11}	@ copy in_x
	stmia	r3,{r4-r11}
@@ -1371,7 +1372,7 @@ $code.=<<___;
.align	5
ecp_nistz256_point_add:
	stmdb	sp!,{r0-r12,lr}		@ push from r0, unusual, but intentional
	sub	sp,sp,#32*18
	sub	sp,sp,#32*18+16

	ldmia	$b_ptr!,{r4-r11}	@ copy in2
	add	r3,sp,#$in2_x
@@ -1504,9 +1505,9 @@ ecp_nistz256_point_add:
	tst	$t0,$t1
	beq	.Ladd_proceed		@ (in1infty || in2infty)?
	tst	$t2,$t2
	beq	.Ladd_proceed		@ is_equal(S1,S2)?
	beq	.Ladd_double		@ is_equal(S1,S2)?

	ldr	$r_ptr,[sp,#32*18]
	ldr	$r_ptr,[sp,#32*18+16]
	eor	r4,r4,r4
	eor	r5,r5,r5
	eor	r6,r6,r6
@@ -1520,6 +1521,12 @@ ecp_nistz256_point_add:
	stmia	$r_ptr!,{r4-r11}
	b	.Ladd_done

.align	4
.Ladd_double:
	ldr	$a_ptr,[sp,#32*18+20]
	add	sp,sp,#32*(18-5)+16	@ difference in frame sizes
	b	.Lpoint_double_shortcut

.align	4
.Ladd_proceed:
	add	$a_ptr,sp,#$R
@@ -1588,7 +1595,7 @@ ecp_nistz256_point_add:
	add	r3,sp,#$in1_x
	and	r11,r11,r12
	mvn	r12,r12
	ldr	$r_ptr,[sp,#32*18]
	ldr	$r_ptr,[sp,#32*18+16]
___
for($i=0;$i<96;$i+=8) {			# conditional moves
$code.=<<___;
@@ -1610,7 +1617,7 @@ ___
}
$code.=<<___;
.Ladd_done:
	add	sp,sp,#32*18+16		@ +16 means "skip even over saved r0-r3"
	add	sp,sp,#32*18+16+16	@ +16 means "skip even over saved r0-r3"
#if __ARM_ARCH__>=5 || defined(__thumb__)
	ldmia	sp!,{r4-r12,pc}
#else
+13 −3
Original line number Diff line number Diff line
@@ -691,12 +691,13 @@ $code.=<<___;
.type	ecp_nistz256_point_double,%function
.align	5
ecp_nistz256_point_double:
	stp	x29,x30,[sp,#-48]!
	stp	x29,x30,[sp,#-80]!
	add	x29,sp,#0
	stp	x19,x20,[sp,#16]
	stp	x21,x22,[sp,#32]
	sub	sp,sp,#32*4

.Ldouble_shortcut:
	ldp	$acc0,$acc1,[$ap,#32]
	 mov	$rp_real,$rp
	ldp	$acc2,$acc3,[$ap,#48]
@@ -823,7 +824,7 @@ ecp_nistz256_point_double:
	add	sp,x29,#0		// destroy frame
	ldp	x19,x20,[x29,#16]
	ldp	x21,x22,[x29,#32]
	ldp	x29,x30,[sp],#48
	ldp	x29,x30,[sp],#80
	ret
.size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
___
@@ -963,7 +964,7 @@ ecp_nistz256_point_add:
	b.eq	.Ladd_proceed		// (in1infty || in2infty)?

	tst	$temp,$temp
	b.eq	.Ladd_proceed		// is_equal(S1,S2)?
	b.eq	.Ladd_double		// is_equal(S1,S2)?

	eor	$a0,$a0,$a0
	eor	$a1,$a1,$a1
@@ -975,6 +976,15 @@ ecp_nistz256_point_add:
	stp	$a0,$a1,[$rp_real,#80]
	b	.Ladd_done

.align	4
.Ladd_double:
	mov	$ap,$ap_real
	mov	$rp,$rp_real
	ldp	x23,x24,[x29,#48]
	ldp	x25,x26,[x29,#64]
	add	sp,sp,#32*(12-4)	// difference in stack frames
	b	.Ldouble_shortcut

.align	4
.Ladd_proceed:
	add	$rp,sp,#$Rsqr
+9 −1
Original line number Diff line number Diff line
@@ -1197,6 +1197,7 @@ for ($i=0;$i<7;$i++) {
########################################################################
# void ecp_nistz256_point_double(P256_POINT *out,const P256_POINT *inp);
#
&static_label("point_double_shortcut");
&function_begin("ecp_nistz256_point_double");
{   my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4));

@@ -1212,6 +1213,7 @@ for ($i=0;$i<7;$i++) {
	&picmeup("edx","OPENSSL_ia32cap_P","eax",&label("pic"));
	&mov	("ebp",&DWP(0,"edx"));		}

&set_label("point_double_shortcut");
	&mov	("eax",&DWP(0,"esi"));		# copy in_x
	&mov	("ebx",&DWP(4,"esi"));
	&mov	("ecx",&DWP(8,"esi"));
@@ -1491,7 +1493,7 @@ for ($i=0;$i<7;$i++) {
	&mov	("ebx",&DWP(32*18+8,"esp"));
	&jz	(&label("add_proceed"));	# (in1infty || in2infty)?
	&test	("ebx","ebx");
	&jz	(&label("add_proceed"));	# is_equal(S1,S2)?
	&jz	(&label("add_double"));		# is_equal(S1,S2)?

	&mov	("edi",&wparam(0));
	&xor	("eax","eax");
@@ -1499,6 +1501,12 @@ for ($i=0;$i<7;$i++) {
	&data_byte(0xfc,0xf3,0xab);		# cld; stosd
	&jmp	(&label("add_done"));

&set_label("add_double",16);
	&mov	("esi",&wparam(1));
	&mov	("ebp",&DWP(32*18+12,"esp"));	# OPENSSL_ia32cap_P copy
	&add	("esp",4*((8*18+5)-(8*5+1)));	# difference in frame sizes
	&jmp	(&label("point_double_shortcut"));

&set_label("add_proceed",16);
	&mov	("eax",&DWP(32*18+12,"esp"));	# OPENSSL_ia32cap_P copy
	&lea	("esi",&DWP($R,"esp"));