Commit dfde4219 authored by Andy Polyakov's avatar Andy Polyakov Committed by Matt Caswell
Browse files

ec/asm/ecp_nistz256-*.pl: addition to perform stricter reduction.



Addition was not preserving inputs' property of being fully reduced.

Thanks to Brian Smith for reporting this.

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent b62b2454
Loading
Loading
Loading
Loading
+74 −50
Original line number Diff line number Diff line
@@ -174,10 +174,7 @@ __ecp_nistz256_mul_by_2:
	adcs	$a6,$a6,$a6
	mov	$ff,#0
	adcs	$a7,$a7,$a7
#ifdef	__thumb2__
	it	cs
#endif
	movcs	$ff,#-1			@ $ff = carry ? -1 : 0
	adc	$ff,$ff,#0

	b	.Lreduce_by_sub
.size	__ecp_nistz256_mul_by_2,.-__ecp_nistz256_mul_by_2
@@ -228,35 +225,45 @@ __ecp_nistz256_add:
	adcs	$a6,$a6,$t2
	mov	$ff,#0
	adcs	$a7,$a7,$t3
#ifdef	__thumb2__
	it	cs
#endif
	movcs	$ff,#-1			@ $ff = carry ? -1 : 0, "broadcast" carry
	adc	$ff,$ff,#0
	ldr	lr,[sp],#4		@ pop lr

.Lreduce_by_sub:

	@ if a+b carries, subtract modulus.
	@ if a+b >= modulus, subtract modulus.
	@
	@ But since comparison implies subtraction, we subtract
	@ modulus and then add it back if subraction borrowed.

	subs	$a0,$a0,#-1
	sbcs	$a1,$a1,#-1
	sbcs	$a2,$a2,#-1
	sbcs	$a3,$a3,#0
	sbcs	$a4,$a4,#0
	sbcs	$a5,$a5,#0
	sbcs	$a6,$a6,#1
	sbcs	$a7,$a7,#-1
	sbc	$ff,$ff,#0

	@ Note that because mod has special form, i.e. consists of
	@ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
	@ using value of broadcasted carry as a whole or extracting
	@ single bit. Follow $ff register...
	@ using value of borrow as a whole or extracting single bit.
	@ Follow $ff register...

	subs	$a0,$a0,$ff		@ subtract synthesized modulus
	sbcs	$a1,$a1,$ff
	adds	$a0,$a0,$ff		@ add synthesized modulus
	adcs	$a1,$a1,$ff
	str	$a0,[$r_ptr,#0]
	sbcs	$a2,$a2,$ff
	adcs	$a2,$a2,$ff
	str	$a1,[$r_ptr,#4]
	sbcs	$a3,$a3,#0
	adcs	$a3,$a3,#0
	str	$a2,[$r_ptr,#8]
	sbcs	$a4,$a4,#0
	adcs	$a4,$a4,#0
	str	$a3,[$r_ptr,#12]
	sbcs	$a5,$a5,#0
	adcs	$a5,$a5,#0
	str	$a4,[$r_ptr,#16]
	sbcs	$a6,$a6,$ff,lsr#31
	adcs	$a6,$a6,$ff,lsr#31
	str	$a5,[$r_ptr,#20]
	sbcs	$a7,$a7,$ff
	adcs	$a7,$a7,$ff
	str	$a6,[$r_ptr,#24]
	str	$a7,[$r_ptr,#28]

@@ -304,26 +311,29 @@ __ecp_nistz256_mul_by_3:
	adcs	$a6,$a6,$a6
	mov	$ff,#0
	adcs	$a7,$a7,$a7
#ifdef	__thumb2__
	it	cs
#endif
	movcs	$ff,#-1			@ $ff = carry ? -1 : 0, "broadcast" carry

	subs	$a0,$a0,$ff		@ subtract synthesized modulus, see
					@ .Lreduce_by_sub for details, except
					@ that we don't write anything to
					@ memory, but keep intermediate
					@ results in registers...
	sbcs	$a1,$a1,$ff
	sbcs	$a2,$a2,$ff
	adc	$ff,$ff,#0

	subs	$a0,$a0,#-1		@ .Lreduce_by_sub but without stores
	sbcs	$a1,$a1,#-1
	sbcs	$a2,$a2,#-1
	sbcs	$a3,$a3,#0
	sbcs	$a4,$a4,#0
	 ldr	$b_ptr,[$a_ptr,#0]
	sbcs	$a5,$a5,#0
	sbcs	$a6,$a6,#1
	sbcs	$a7,$a7,#-1
	sbc	$ff,$ff,#0

	adds	$a0,$a0,$ff		@ add synthesized modulus
	adcs	$a1,$a1,$ff
	adcs	$a2,$a2,$ff
	adcs	$a3,$a3,#0
	adcs	$a4,$a4,#0
	 ldr	$b_ptr,[$a_ptr,#0]
	adcs	$a5,$a5,#0
	 ldr	$t1,[$a_ptr,#4]
	sbcs	$a6,$a6,$ff,lsr#31
	adcs	$a6,$a6,$ff,lsr#31
	 ldr	$t2,[$a_ptr,#8]
	sbcs	$a7,$a7,$ff
	adc	$a7,$a7,$ff

	ldr	$t0,[$a_ptr,#12]
	adds	$a0,$a0,$b_ptr		@ 2*a[0:7]+=a[0:7]
@@ -339,10 +349,7 @@ __ecp_nistz256_mul_by_3:
	adcs	$a6,$a6,$t2
	mov	$ff,#0
	adcs	$a7,$a7,$t3
#ifdef	__thumb2__
	it	cs
#endif
	movcs	$ff,#-1			@ $ff = carry ? -1 : 0, "broadcast" carry
	adc	$ff,$ff,#0
	ldr	lr,[sp],#4		@ pop lr

	b	.Lreduce_by_sub
@@ -1210,25 +1217,42 @@ __ecp_nistz256_add_self:
	adcs	$a6,$a6,$a6
	mov	$ff,#0
	adcs	$a7,$a7,$a7
#ifdef	__thumb2__
	it	cs
#endif
	movcs	$ff,#-1			@ $ff = carry ? -1 : 0
	adc	$ff,$ff,#0

	@ if a+b >= modulus, subtract modulus.
	@
	@ But since comparison implies subtraction, we subtract
	@ modulus and then add it back if subraction borrowed.

	subs	$a0,$a0,#-1
	sbcs	$a1,$a1,#-1
	sbcs	$a2,$a2,#-1
	sbcs	$a3,$a3,#0
	sbcs	$a4,$a4,#0
	sbcs	$a5,$a5,#0
	sbcs	$a6,$a6,#1
	sbcs	$a7,$a7,#-1
	sbc	$ff,$ff,#0

	subs	$a0,$a0,$ff		@ subtract synthesized modulus
	sbcs	$a1,$a1,$ff
	@ Note that because mod has special form, i.e. consists of
	@ 0xffffffff, 1 and 0s, we can conditionally synthesize it by
	@ using value of borrow as a whole or extracting single bit.
	@ Follow $ff register...

	adds	$a0,$a0,$ff		@ add synthesized modulus
	adcs	$a1,$a1,$ff
	str	$a0,[$r_ptr,#0]
	sbcs	$a2,$a2,$ff
	adcs	$a2,$a2,$ff
	str	$a1,[$r_ptr,#4]
	sbcs	$a3,$a3,#0
	adcs	$a3,$a3,#0
	str	$a2,[$r_ptr,#8]
	sbcs	$a4,$a4,#0
	adcs	$a4,$a4,#0
	str	$a3,[$r_ptr,#12]
	sbcs	$a5,$a5,#0
	adcs	$a5,$a5,#0
	str	$a4,[$r_ptr,#16]
	sbcs	$a6,$a6,$ff,lsr#31
	adcs	$a6,$a6,$ff,lsr#31
	str	$a5,[$r_ptr,#20]
	sbcs	$a7,$a7,$ff
	adcs	$a7,$a7,$ff
	str	$a6,[$r_ptr,#24]
	str	$a7,[$r_ptr,#28]

+6 −6
Original line number Diff line number Diff line
@@ -583,14 +583,14 @@ __ecp_nistz256_add:
	adds	$t0,$acc0,#1		// subs	$t0,$a0,#-1 // tmp = ret-modulus
	sbcs	$t1,$acc1,$poly1
	sbcs	$t2,$acc2,xzr
	sbc	$t3,$acc3,$poly3
	cmp	$ap,xzr			// did addition carry?
	sbcs	$t3,$acc3,$poly3
	sbcs	xzr,$ap,xzr		// did subtraction borrow?

	csel	$acc0,$acc0,$t0,eq	// ret = carry ? ret-modulus : ret
	csel	$acc1,$acc1,$t1,eq
	csel	$acc2,$acc2,$t2,eq
	csel	$acc0,$acc0,$t0,lo	// ret = borrow ? ret : ret-modulus
	csel	$acc1,$acc1,$t1,lo
	csel	$acc2,$acc2,$t2,lo
	stp	$acc0,$acc1,[$rp]
	csel	$acc3,$acc3,$t3,eq
	csel	$acc3,$acc3,$t3,lo
	stp	$acc2,$acc3,[$rp,#16]

	ret
+51 −29
Original line number Diff line number Diff line
@@ -406,33 +406,44 @@ __ecp_nistz256_add:
	addccc	@acc[5],$t5,@acc[5]
	addccc	@acc[6],$t6,@acc[6]
	addccc	@acc[7],$t7,@acc[7]
	subc	%g0,%g0,$carry		! broadcast carry bit
	addc	%g0,%g0,$carry

.Lreduce_by_sub:

	! if a+b carries, subtract modulus.
	! if a+b >= modulus, subtract modulus.
	!
	! But since comparison implies subtraction, we subtract
	! modulus and then add it back if subraction borrowed.

	subcc	@acc[0],-1,@acc[0]
	subccc	@acc[1],-1,@acc[1]
	subccc	@acc[2],-1,@acc[2]
	subccc	@acc[3], 0,@acc[3]
	subccc	@acc[4], 0,@acc[4]
	subccc	@acc[5], 0,@acc[5]
	subccc	@acc[6], 1,@acc[6]
	subccc	@acc[7],-1,@acc[7]
	subc	$carry,0,$carry

	! Note that because mod has special form, i.e. consists of
	! 0xffffffff, 1 and 0s, we can conditionally synthesize it by
	! using value of broadcasted borrow and the borrow bit itself.
	! To minimize dependency chain we first broadcast and then
	! extract the bit by negating (follow $bi).
	! using value of borrow and its negative.

	subcc	@acc[0],$carry,@acc[0]	! subtract synthesized modulus
	subccc	@acc[1],$carry,@acc[1]
	addcc	@acc[0],$carry,@acc[0]	! add synthesized modulus
	addccc	@acc[1],$carry,@acc[1]
	neg	$carry,$bi
	st	@acc[0],[$rp]
	subccc	@acc[2],$carry,@acc[2]
	addccc	@acc[2],$carry,@acc[2]
	st	@acc[1],[$rp+4]
	subccc	@acc[3],0,@acc[3]
	addccc	@acc[3],0,@acc[3]
	st	@acc[2],[$rp+8]
	subccc	@acc[4],0,@acc[4]
	addccc	@acc[4],0,@acc[4]
	st	@acc[3],[$rp+12]
	subccc	@acc[5],0,@acc[5]
	addccc	@acc[5],0,@acc[5]
	st	@acc[4],[$rp+16]
	subccc	@acc[6],$bi,@acc[6]
	addccc	@acc[6],$bi,@acc[6]
	st	@acc[5],[$rp+20]
	subc	@acc[7],$carry,@acc[7]
	addc	@acc[7],$carry,@acc[7]
	st	@acc[6],[$rp+24]
	retl
	st	@acc[7],[$rp+28]
@@ -469,7 +480,7 @@ __ecp_nistz256_mul_by_2:
	addccc	@acc[6],@acc[6],@acc[6]
	addccc	@acc[7],@acc[7],@acc[7]
	b	.Lreduce_by_sub
	subc	%g0,%g0,$carry		! broadcast carry bit
	addc	%g0,%g0,$carry
.type	__ecp_nistz256_mul_by_2,#function
.size	__ecp_nistz256_mul_by_2,.-__ecp_nistz256_mul_by_2

@@ -502,17 +513,27 @@ __ecp_nistz256_mul_by_3:
	addccc	@acc[5],@acc[5],$t5
	addccc	@acc[6],@acc[6],$t6
	addccc	@acc[7],@acc[7],$t7
	subc	%g0,%g0,$carry		! broadcast carry bit
	addc	%g0,%g0,$carry

	subcc	$t0,$carry,$t0		! .Lreduce_by_sub but without stores
	neg	$carry,$bi
	subccc	$t1,$carry,$t1
	subccc	$t2,$carry,$t2
	subcc	$t0,-1,$t0		! .Lreduce_by_sub but without stores
	subccc	$t1,-1,$t1
	subccc	$t2,-1,$t2
	subccc	$t3, 0,$t3
	subccc	$t4, 0,$t4
	subccc	$t5, 0,$t5
	subccc	$t6,$bi,$t6
	subc	$t7,$carry,$t7
	subccc	$t6, 1,$t6
	subccc	$t7,-1,$t7
	subc	$carry,0,$carry

	addcc	$t0,$carry,$t0		! add synthesized modulus
	addccc	$t1,$carry,$t1
	neg	$carry,$bi
	addccc	$t2,$carry,$t2
	addccc	$t3,0,$t3
	addccc	$t4,0,$t4
	addccc	$t5,0,$t5
	addccc	$t6,$bi,$t6
	addc	$t7,$carry,$t7

	addcc	$t0,@acc[0],@acc[0]	! 2*a+a=3*a
	addccc	$t1,@acc[1],@acc[1]
@@ -523,7 +544,7 @@ __ecp_nistz256_mul_by_3:
	addccc	$t6,@acc[6],@acc[6]
	addccc	$t7,@acc[7],@acc[7]
	b	.Lreduce_by_sub
	subc	%g0,%g0,$carry		! broadcast carry bit
	addc	%g0,%g0,$carry
.type	__ecp_nistz256_mul_by_3,#function
.size	__ecp_nistz256_mul_by_3,.-__ecp_nistz256_mul_by_3

@@ -1662,14 +1683,15 @@ __ecp_nistz256_add_noload_vis3:
	addcc	$acc0,1,$t0		! add -modulus, i.e. subtract
	addxccc	$acc1,$poly1,$t1
	addxccc	$acc2,$minus1,$t2
	addxc	$acc3,$poly3,$t3
	addxccc	$acc3,$poly3,$t3
	addxc	$acc4,$minus1,$acc4

	movrnz	$acc4,$t0,$acc0		! if a+b carried, ret = ret-mod
	movrnz	$acc4,$t1,$acc1
	movrz	$acc4,$t0,$acc0		! ret = borrow ? ret : ret-modulus
	movrz	$acc4,$t1,$acc1
	stx	$acc0,[$rp]
	movrnz	$acc4,$t2,$acc2
	movrz	$acc4,$t2,$acc2
	stx	$acc1,[$rp+8]
	movrnz	$acc4,$t3,$acc3
	movrz	$acc4,$t3,$acc3
	stx	$acc2,[$rp+16]
	retl
	stx	$acc3,[$rp+24]
+27 −4
Original line number Diff line number Diff line
@@ -284,18 +284,41 @@ for(1..37) {
	&mov	(&DWP(16,"edi"),"eax");
	&adc	("ecx",&DWP(24,"ebp"));
	&mov	(&DWP(20,"edi"),"ebx");
	&mov	("esi",0);
	&adc	("edx",&DWP(28,"ebp"));
	&mov	(&DWP(24,"edi"),"ecx");
	&sbb	("esi","esi");			# broadcast carry bit
	&adc	("esi",0);
	&mov	(&DWP(28,"edi"),"edx");

	# if a+b carries, subtract modulus.
	# if a+b >= modulus, subtract modulus.
	#
	# But since comparison implies subtraction, we subtract modulus
	# to see if it borrows, and then subtract it for real if
	# subtraction didn't borrow.

	&mov	("eax",&DWP(0,"edi"));
	&mov	("ebx",&DWP(4,"edi"));
	&mov	("ecx",&DWP(8,"edi"));
	&sub	("eax",-1);
	&mov	("edx",&DWP(12,"edi"));
	&sbb	("ebx",-1);
	&mov	("eax",&DWP(16,"edi"));
	&sbb	("ecx",-1);
	&mov	("ebx",&DWP(20,"edi"));
	&sbb	("edx",0);
	&mov	("ecx",&DWP(24,"edi"));
	&sbb	("eax",0);
	&mov	("edx",&DWP(28,"edi"));
	&sbb	("ebx",0);
	&sbb	("ecx",1);
	&sbb	("edx",-1);
	&sbb	("esi",0);

	# Note that because mod has special form, i.e. consists of
	# 0xffffffff, 1 and 0s, we can conditionally synthesize it by
	# assigning carry bit to one register, %ebp, and its negative
	# to another, %esi. But we started by calculating %esi...
	# by using borrow.

	&not	("esi");
	&mov	("eax",&DWP(0,"edi"));
	&mov	("ebp","esi");
	&mov	("ebx",&DWP(4,"edi"));