Commit d9375341 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

ec/asm/ecp_nistz256-x86_64.pl: get corner case logic right.



RT#4284

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent e9fd82f6
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -2044,6 +2044,7 @@ $code.=<<___;
	push	%r15
	sub	\$32*5+8, %rsp

.Lpoint_double_shortcut$x:
	movdqu	0x00($a_ptr), %xmm0		# copy	*(P256_POINT *)$a_ptr.x
	mov	$a_ptr, $b_ptr			# backup copy
	movdqu	0x10($a_ptr), %xmm1
@@ -2334,6 +2335,7 @@ $code.=<<___;
	 mov	0x40+8*1($b_ptr), $acc6
	 mov	0x40+8*2($b_ptr), $acc7
	 mov	0x40+8*3($b_ptr), $acc0
	movq	$b_ptr, %xmm1

	lea	0x40-$bias($b_ptr), $a_ptr
	lea	$Z1sqr(%rsp), $r_ptr		# Z1^2
@@ -2389,7 +2391,7 @@ $code.=<<___;
	test	$acc0, $acc0
	jnz	.Ladd_proceed$x			# (in1infty || in2infty)?
	test	$acc1, $acc1
	jz	.Ladd_proceed$x			# is_equal(S1,S2)?
	jz	.Ladd_double$x			# is_equal(S1,S2)?

	movq	%xmm0, $r_ptr			# restore $r_ptr
	pxor	%xmm0, %xmm0
@@ -2401,6 +2403,13 @@ $code.=<<___;
	movdqu	%xmm0, 0x50($r_ptr)
	jmp	.Ladd_done$x

.align	32
.Ladd_double$x:
	movq	%xmm1, $a_ptr			# restore $a_ptr
	movq	%xmm0, $r_ptr			# restore $r_ptr
	add	\$`32*(18-5)`, %rsp		# difference in frame sizes
	jmp	.Lpoint_double_shortcut$x

.align	32
.Ladd_proceed$x:
	`&load_for_sqr("$R(%rsp)", "$src0")`