Commit c74aea8d authored by Andy Polyakov's avatar Andy Polyakov Committed by Matt Caswell
Browse files

ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.



RT#4625

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent e3057a57
Loading
Loading
Loading
Loading
+17 −41
Original line number Diff line number Diff line
@@ -1405,27 +1405,19 @@ ecp_nistz256_point_add:
	stmdb	sp!,{r0-r12,lr}		@ push from r0, unusual, but intentional
	sub	sp,sp,#32*18+16

	ldmia	$b_ptr!,{r4-r11}	@ copy in2
	ldmia	$b_ptr!,{r4-r11}	@ copy in2_x
	add	r3,sp,#$in2_x
	orr	r12,r4,r5
	orr	r12,r12,r6
	orr	r12,r12,r7
	orr	r12,r12,r8
	orr	r12,r12,r9
	orr	r12,r12,r10
	orr	r12,r12,r11
	stmia	r3!,{r4-r11}
	ldmia	$b_ptr!,{r4-r11}
	orr	r12,r12,r4
	orr	r12,r12,r5
	ldmia	$b_ptr!,{r4-r11}	@ copy in2_y
	stmia	r3!,{r4-r11}
	ldmia	$b_ptr,{r4-r11}		@ copy in2_z
	orr	r12,r4,r5
	orr	r12,r12,r6
	orr	r12,r12,r7
	orr	r12,r12,r8
	orr	r12,r12,r9
	orr	r12,r12,r10
	orr	r12,r12,r11
	stmia	r3!,{r4-r11}
	ldmia	$b_ptr,{r4-r11}
	cmp	r12,#0
#ifdef	__thumb2__
	it	ne
@@ -1434,27 +1426,19 @@ ecp_nistz256_point_add:
	stmia	r3,{r4-r11}
	str	r12,[sp,#32*18+8]	@ !in2infty

	ldmia	$a_ptr!,{r4-r11}	@ copy in1
	ldmia	$a_ptr!,{r4-r11}	@ copy in1_x
	add	r3,sp,#$in1_x
	orr	r12,r4,r5
	orr	r12,r12,r6
	orr	r12,r12,r7
	orr	r12,r12,r8
	orr	r12,r12,r9
	orr	r12,r12,r10
	orr	r12,r12,r11
	stmia	r3!,{r4-r11}
	ldmia	$a_ptr!,{r4-r11}
	orr	r12,r12,r4
	orr	r12,r12,r5
	ldmia	$a_ptr!,{r4-r11}	@ copy in1_y
	stmia	r3!,{r4-r11}
	ldmia	$a_ptr,{r4-r11}		@ copy in1_z
	orr	r12,r4,r5
	orr	r12,r12,r6
	orr	r12,r12,r7
	orr	r12,r12,r8
	orr	r12,r12,r9
	orr	r12,r12,r10
	orr	r12,r12,r11
	stmia	r3!,{r4-r11}
	ldmia	$a_ptr,{r4-r11}
	cmp	r12,#0
#ifdef	__thumb2__
	it	ne
@@ -1684,27 +1668,19 @@ ecp_nistz256_point_add_affine:
	stmdb	sp!,{r0-r12,lr}		@ push from r0, unusual, but intentional
	sub	sp,sp,#32*15

	ldmia	$a_ptr!,{r4-r11}	@ copy in1
	ldmia	$a_ptr!,{r4-r11}	@ copy in1_x
	add	r3,sp,#$in1_x
	orr	r12,r4,r5
	orr	r12,r12,r6
	orr	r12,r12,r7
	orr	r12,r12,r8
	orr	r12,r12,r9
	orr	r12,r12,r10
	orr	r12,r12,r11
	stmia	r3!,{r4-r11}
	ldmia	$a_ptr!,{r4-r11}
	orr	r12,r12,r4
	orr	r12,r12,r5
	ldmia	$a_ptr!,{r4-r11}	@ copy in1_y
	stmia	r3!,{r4-r11}
	ldmia	$a_ptr,{r4-r11}		@ copy in1_z
	orr	r12,r4,r5
	orr	r12,r12,r6
	orr	r12,r12,r7
	orr	r12,r12,r8
	orr	r12,r12,r9
	orr	r12,r12,r10
	orr	r12,r12,r11
	stmia	r3!,{r4-r11}
	ldmia	$a_ptr,{r4-r11}
	cmp	r12,#0
#ifdef	__thumb2__
	it	ne
@@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine:
	stmia	r3,{r4-r11}
	str	r12,[sp,#32*15+4]	@ !in1infty

	ldmia	$b_ptr!,{r4-r11}	@ copy in2
	ldmia	$b_ptr!,{r4-r11}	@ copy in2_x
	add	r3,sp,#$in2_x
	orr	r12,r4,r5
	orr	r12,r12,r6
@@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine:
	orr	r12,r12,r10
	orr	r12,r12,r11
	stmia	r3!,{r4-r11}
	ldmia	$b_ptr!,{r4-r11}
	ldmia	$b_ptr!,{r4-r11}	@ copy in2_y
	orr	r12,r12,r4
	orr	r12,r12,r5
	orr	r12,r12,r6
+25 −51
Original line number Diff line number Diff line
@@ -862,46 +862,28 @@ ecp_nistz256_point_add:
	stp	x25,x26,[sp,#64]
	sub	sp,sp,#32*12

	ldp	$a0,$a1,[$bp]
	ldp	$a2,$a3,[$bp,#16]
	ldp	$t0,$t1,[$bp,#32]
	ldp	$t2,$t3,[$bp,#48]
	ldp	$a0,$a1,[$bp,#64]	// in2_z
	ldp	$a2,$a3,[$bp,#64+16]
	 mov	$rp_real,$rp
	 mov	$ap_real,$ap
	 mov	$bp_real,$bp
	orr	$a0,$a0,$a1
	orr	$a2,$a2,$a3
	 ldp	$acc0,$acc1,[$ap]
	orr	$t0,$t0,$t1
	orr	$t2,$t2,$t3
	 ldp	$acc2,$acc3,[$ap,#16]
	orr	$a0,$a0,$a2
	orr	$t2,$t0,$t2
	 ldp	$t0,$t1,[$ap,#32]
	orr	$in2infty,$a0,$t2
	cmp	$in2infty,#0
	 ldp	$t2,$t3,[$ap,#48]
	csetm	$in2infty,ne		// !in2infty

	 ldp	$a0,$a1,[$bp_real,#64]	// forward load for p256_sqr_mont
	orr	$acc0,$acc0,$acc1
	orr	$acc2,$acc2,$acc3
	 ldp	$a2,$a3,[$bp_real,#64+16]
	orr	$t0,$t0,$t1
	orr	$t2,$t2,$t3
	orr	$acc0,$acc0,$acc2
	orr	$t0,$t0,$t2
	orr	$in1infty,$acc0,$t0
	cmp	$in1infty,#0
	 ldr	$poly1,.Lpoly+8
	 ldr	$poly3,.Lpoly+24
	csetm	$in1infty,ne		// !in1infty

	orr	$t0,$a0,$a1
	orr	$t2,$a2,$a3
	orr	$in2infty,$t0,$t2
	cmp	$in2infty,#0
	csetm	$in2infty,ne		// !in2infty
	add	$rp,sp,#$Z2sqr
	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z2sqr, in2_z);

	ldp	$a0,$a1,[$ap_real,#64]
	ldp	$a0,$a1,[$ap_real,#64]	// in1_z
	ldp	$a2,$a3,[$ap_real,#64+16]
	orr	$t0,$a0,$a1
	orr	$t2,$a2,$a3
	orr	$in1infty,$t0,$t2
	cmp	$in1infty,#0
	csetm	$in1infty,ne		// !in1infty
	add	$rp,sp,#$Z1sqr
	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);

@@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine:
	ldr	$poly1,.Lpoly+8
	ldr	$poly3,.Lpoly+24

	ldp	$a0,$a1,[$ap]
	ldp	$a2,$a3,[$ap,#16]
	ldp	$t0,$t1,[$ap,#32]
	ldp	$t2,$t3,[$ap,#48]
	orr	$a0,$a0,$a1
	orr	$a2,$a2,$a3
	orr	$t0,$t0,$t1
	orr	$t2,$t2,$t3
	orr	$a0,$a0,$a2
	orr	$t0,$t0,$t2
	orr	$in1infty,$a0,$t0
	ldp	$a0,$a1,[$ap,#64]	// in1_z
	ldp	$a2,$a3,[$ap,#64+16]
	orr	$t0,$a0,$a1
	orr	$t2,$a2,$a3
	orr	$in1infty,$t0,$t2
	cmp	$in1infty,#0
	csetm	$in1infty,ne		// !in1infty

	ldp	$a0,$a1,[$bp]
	ldp	$a2,$a3,[$bp,#16]
	ldp	$t0,$t1,[$bp,#32]
	ldp	$acc0,$acc1,[$bp]	// in2_x
	ldp	$acc2,$acc3,[$bp,#16]
	ldp	$t0,$t1,[$bp,#32]	// in2_y
	ldp	$t2,$t3,[$bp,#48]
	orr	$a0,$a0,$a1
	orr	$a2,$a2,$a3
	orr	$acc0,$acc0,$acc1
	orr	$acc2,$acc2,$acc3
	orr	$t0,$t0,$t1
	orr	$t2,$t2,$t3
	orr	$a0,$a0,$a2
	orr	$acc0,$acc0,$acc2
	orr	$t0,$t0,$t2
	orr	$in2infty,$a0,$t0
	orr	$in2infty,$acc0,$t0
	cmp	$in2infty,#0
	csetm	$in2infty,ne		// !in2infty

	ldp	$a0,$a1,[$ap_real,#64]
	ldp	$a2,$a3,[$ap_real,#64+16]
	add	$rp,sp,#$Z1sqr
	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);

+45 −105
Original line number Diff line number Diff line
@@ -899,71 +899,39 @@ ecp_nistz256_point_add:
	mov	$ap,$ap_real
	mov	$bp,$bp_real

	ld	[$bp],@acc[0]		! in2_x
	ld	[$bp+4],@acc[1]
	ld	[$bp+8],@acc[2]
	ld	[$bp+12],@acc[3]
	ld	[$bp+16],@acc[4]
	ld	[$bp+20],@acc[5]
	ld	[$bp+24],@acc[6]
	ld	[$bp+28],@acc[7]
	ld	[$bp+32],$t0		! in2_y
	ld	[$bp+32+4],$t1
	ld	[$bp+32+8],$t2
	ld	[$bp+32+12],$t3
	ld	[$bp+32+16],$t4
	ld	[$bp+32+20],$t5
	ld	[$bp+32+24],$t6
	ld	[$bp+32+28],$t7
	or	@acc[1],@acc[0],@acc[0]
	or	@acc[3],@acc[2],@acc[2]
	or	@acc[5],@acc[4],@acc[4]
	or	@acc[7],@acc[6],@acc[6]
	or	@acc[2],@acc[0],@acc[0]
	or	@acc[6],@acc[4],@acc[4]
	or	@acc[4],@acc[0],@acc[0]
	ld	[$bp+64],$t0		! in2_z
	ld	[$bp+64+4],$t1
	ld	[$bp+64+8],$t2
	ld	[$bp+64+12],$t3
	ld	[$bp+64+16],$t4
	ld	[$bp+64+20],$t5
	ld	[$bp+64+24],$t6
	ld	[$bp+64+28],$t7
	or	$t1,$t0,$t0
	or	$t3,$t2,$t2
	or	$t5,$t4,$t4
	or	$t7,$t6,$t6
	or	$t2,$t0,$t0
	or	$t6,$t4,$t4
	or	$t4,$t0,$t0
	or	@acc[0],$t0,$t0		! !in2infty
	or	$t4,$t0,$t0		! !in2infty
	movrnz	$t0,-1,$t0
	st	$t0,[%fp+STACK_BIAS-12]

	ld	[$ap],@acc[0]		! in1_x
	ld	[$ap+4],@acc[1]
	ld	[$ap+8],@acc[2]
	ld	[$ap+12],@acc[3]
	ld	[$ap+16],@acc[4]
	ld	[$ap+20],@acc[5]
	ld	[$ap+24],@acc[6]
	ld	[$ap+28],@acc[7]
	ld	[$ap+32],$t0		! in1_y
	ld	[$ap+32+4],$t1
	ld	[$ap+32+8],$t2
	ld	[$ap+32+12],$t3
	ld	[$ap+32+16],$t4
	ld	[$ap+32+20],$t5
	ld	[$ap+32+24],$t6
	ld	[$ap+32+28],$t7
	or	@acc[1],@acc[0],@acc[0]
	or	@acc[3],@acc[2],@acc[2]
	or	@acc[5],@acc[4],@acc[4]
	or	@acc[7],@acc[6],@acc[6]
	or	@acc[2],@acc[0],@acc[0]
	or	@acc[6],@acc[4],@acc[4]
	or	@acc[4],@acc[0],@acc[0]
	ld	[$ap+64],$t0		! in1_z
	ld	[$ap+64+4],$t1
	ld	[$ap+64+8],$t2
	ld	[$ap+64+12],$t3
	ld	[$ap+64+16],$t4
	ld	[$ap+64+20],$t5
	ld	[$ap+64+24],$t6
	ld	[$ap+64+28],$t7
	or	$t1,$t0,$t0
	or	$t3,$t2,$t2
	or	$t5,$t4,$t4
	or	$t7,$t6,$t6
	or	$t2,$t0,$t0
	or	$t6,$t4,$t4
	or	$t4,$t0,$t0
	or	@acc[0],$t0,$t0		! !in1infty
	or	$t4,$t0,$t0		! !in1infty
	movrnz	$t0,-1,$t0
	st	$t0,[%fp+STACK_BIAS-16]

@@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine:
	mov	$ap,$ap_real
	mov	$bp,$bp_real

	ld	[$ap],@acc[0]		! in1_x
	ld	[$ap+4],@acc[1]
	ld	[$ap+8],@acc[2]
	ld	[$ap+12],@acc[3]
	ld	[$ap+16],@acc[4]
	ld	[$ap+20],@acc[5]
	ld	[$ap+24],@acc[6]
	ld	[$ap+28],@acc[7]
	ld	[$ap+32],$t0		! in1_y
	ld	[$ap+32+4],$t1
	ld	[$ap+32+8],$t2
	ld	[$ap+32+12],$t3
	ld	[$ap+32+16],$t4
	ld	[$ap+32+20],$t5
	ld	[$ap+32+24],$t6
	ld	[$ap+32+28],$t7
	or	@acc[1],@acc[0],@acc[0]
	or	@acc[3],@acc[2],@acc[2]
	or	@acc[5],@acc[4],@acc[4]
	or	@acc[7],@acc[6],@acc[6]
	or	@acc[2],@acc[0],@acc[0]
	or	@acc[6],@acc[4],@acc[4]
	or	@acc[4],@acc[0],@acc[0]
	ld	[$ap+64],$t0		! in1_z
	ld	[$ap+64+4],$t1
	ld	[$ap+64+8],$t2
	ld	[$ap+64+12],$t3
	ld	[$ap+64+16],$t4
	ld	[$ap+64+20],$t5
	ld	[$ap+64+24],$t6
	ld	[$ap+64+28],$t7
	or	$t1,$t0,$t0
	or	$t3,$t2,$t2
	or	$t5,$t4,$t4
	or	$t7,$t6,$t6
	or	$t2,$t0,$t0
	or	$t6,$t4,$t4
	or	$t4,$t0,$t0
	or	@acc[0],$t0,$t0		! !in1infty
	or	$t4,$t0,$t0		! !in1infty
	movrnz	$t0,-1,$t0
	st	$t0,[%fp+STACK_BIAS-16]

@@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3:
	stx	$acc2,[%sp+LOCALS64+$in2_y+16]
	stx	$acc3,[%sp+LOCALS64+$in2_y+24]

	or	$a1,$a0,$a0
	or	$a3,$a2,$a2
	or	$acc1,$acc0,$acc0
	or	$acc3,$acc2,$acc2
	or	$a2,$a0,$a0
	or	$acc2,$acc0,$acc0
	or	$acc0,$a0,$a0
	movrnz	$a0,-1,$a0			! !in2infty
	stx	$a0,[%fp+STACK_BIAS-8]

	ld	[$bp+64],$acc0			! in2_z
	ld	[$bp+64+4],$t0
	ld	[$bp+64+8],$acc1
@@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3:
	stx	$acc2,[%sp+LOCALS64+$in2_z+16]
	stx	$acc3,[%sp+LOCALS64+$in2_z+24]

	or	$acc1,$acc0,$acc0
	or	$acc3,$acc2,$acc2
	or	$acc2,$acc0,$acc0
	movrnz	$acc0,-1,$acc0			! !in2infty
	stx	$acc0,[%fp+STACK_BIAS-8]

	or	$a0,$t0,$a0
	ld	[$ap+32],$acc0			! in1_y
	or	$a1,$t1,$a1
@@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3:
	stx	$acc2,[%sp+LOCALS64+$in1_y+16]
	stx	$acc3,[%sp+LOCALS64+$in1_y+24]

	or	$a1,$a0,$a0
	or	$a3,$a2,$a2
	or	$acc1,$acc0,$acc0
	or	$acc3,$acc2,$acc2
	or	$a2,$a0,$a0
	or	$acc2,$acc0,$acc0
	or	$acc0,$a0,$a0
	movrnz	$a0,-1,$a0			! !in1infty
	stx	$a0,[%fp+STACK_BIAS-16]

	ldx	[%sp+LOCALS64+$in2_z],$a0	! forward load
	ldx	[%sp+LOCALS64+$in2_z+8],$a1
	ldx	[%sp+LOCALS64+$in2_z+16],$a2
@@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3:
	stx	$acc2,[%sp+LOCALS64+$in1_z+16]
	stx	$acc3,[%sp+LOCALS64+$in1_z+24]

	or	$acc1,$acc0,$acc0
	or	$acc3,$acc2,$acc2
	or	$acc2,$acc0,$acc0
	movrnz	$acc0,-1,$acc0			! !in1infty
	stx	$acc0,[%fp+STACK_BIAS-16]

	call	__ecp_nistz256_sqr_mont_vis3	! p256_sqr_mont(Z2sqr, in2_z);
	add	%sp,LOCALS64+$Z2sqr,$rp

@@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3:
	stx	$acc2,[%sp+LOCALS64+$in1_y+16]
	stx	$acc3,[%sp+LOCALS64+$in1_y+24]

	or	$a1,$a0,$a0
	or	$a3,$a2,$a2
	or	$acc1,$acc0,$acc0
	or	$acc3,$acc2,$acc2
	or	$a2,$a0,$a0
	or	$acc2,$acc0,$acc0
	or	$acc0,$a0,$a0
	movrnz	$a0,-1,$a0			! !in1infty
	stx	$a0,[%fp+STACK_BIAS-16]

	ld	[$ap+64],$a0			! in1_z
	ld	[$ap+64+4],$t0
	ld	[$ap+64+8],$a1
@@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3:
	stx	$a2,[%sp+LOCALS64+$in1_z+16]
	stx	$a3,[%sp+LOCALS64+$in1_z+24]

	or	$a1,$a0,$t0
	or	$a3,$a2,$t2
	or	$t2,$t0,$t0
	movrnz	$t0,-1,$t0			! !in1infty
	stx	$t0,[%fp+STACK_BIAS-16]

	call	__ecp_nistz256_sqr_mont_vis3	! p256_sqr_mont(Z1sqr, in1_z);
	add	%sp,LOCALS64+$Z1sqr,$rp

+15 −15
Original line number Diff line number Diff line
@@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) {
	&mov	("edx",&DWP($i+12,"esi"));
	&mov	(&DWP($i+0,"edi"),"eax");
	&mov	(&DWP(32*18+12,"esp"),"ebp")	if ($i==0);
	&mov	("ebp","eax")			if ($i==0);
	&or	("ebp","eax")			if ($i!=0 && $i<64);
	&mov	("ebp","eax")			if ($i==64);
	&or	("ebp","eax")			if ($i>64);
	&mov	(&DWP($i+4,"edi"),"ebx");
	&or	("ebp","ebx")			if ($i<64);
	&or	("ebp","ebx")			if ($i>=64);
	&mov	(&DWP($i+8,"edi"),"ecx");
	&or	("ebp","ecx")			if ($i<64);
	&or	("ebp","ecx")			if ($i>=64);
	&mov	(&DWP($i+12,"edi"),"edx");
	&or	("ebp","edx")			if ($i<64);
	&or	("ebp","edx")			if ($i>=64);
    }
	&xor	("eax","eax");
	&mov	("esi",&wparam(1));
@@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) {
	&mov	("ecx",&DWP($i+8,"esi"));
	&mov	("edx",&DWP($i+12,"esi"));
	&mov	(&DWP($i+0,"edi"),"eax");
	&mov	("ebp","eax")			if ($i==0);
	&or	("ebp","eax")			if ($i!=0 && $i<64);
	&mov	("ebp","eax")			if ($i==64);
	&or	("ebp","eax")			if ($i>64);
	&mov	(&DWP($i+4,"edi"),"ebx");
	&or	("ebp","ebx")			if ($i<64);
	&or	("ebp","ebx")			if ($i>=64);
	&mov	(&DWP($i+8,"edi"),"ecx");
	&or	("ebp","ecx")			if ($i<64);
	&or	("ebp","ecx")			if ($i>=64);
	&mov	(&DWP($i+12,"edi"),"edx");
	&or	("ebp","edx")			if ($i<64);
	&or	("ebp","edx")			if ($i>=64);
    }
	&xor	("eax","eax");
	&sub	("eax","ebp");
@@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) {
	&mov	("edx",&DWP($i+12,"esi"));
	&mov	(&DWP($i+0,"edi"),"eax");
	&mov	(&DWP(32*15+8,"esp"),"ebp")	if ($i==0);
	&mov	("ebp","eax")			if ($i==0);
	&or	("ebp","eax")			if ($i!=0 && $i<64);
	&mov	("ebp","eax")			if ($i==64);
	&or	("ebp","eax")			if ($i>64);
	&mov	(&DWP($i+4,"edi"),"ebx");
	&or	("ebp","ebx")			if ($i<64);
	&or	("ebp","ebx")			if ($i>=64);
	&mov	(&DWP($i+8,"edi"),"ecx");
	&or	("ebp","ecx")			if ($i<64);
	&or	("ebp","ecx")			if ($i>=64);
	&mov	(&DWP($i+12,"edi"),"edx");
	&or	("ebp","edx")			if ($i<64);
	&or	("ebp","edx")			if ($i>=64);
    }
	&xor	("eax","eax");
	&mov	("esi",&wparam(2));