Commit e72bf967 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

ec/asm/x25519-x86_64.pl: remove redundant carry chain.



Why is it redundant? We're looking at carry from addition of small,
11-bit number to 256-bit one. And carry would mean only one thing,
resulting first limb being small number and remaing ones - zeros.
Hence adding 38 to first limb can't carry.

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/5476)
parent c39e4048
Loading
Loading
Loading
Loading
+6 −15
Original line number Diff line number Diff line
@@ -40,12 +40,12 @@
# P4			+22%		+40%
# Sandy Bridge		-3%		+11%
# Haswell		-1%		+13%
# Broadwell(***)	+26%		+30%
# Skylake(***)		+30%		+47%
# Broadwell(***)	+30%		+35%
# Skylake(***)		+33%		+47%
# Silvermont		+20%		+26%
# Goldmont		+40%		+50%
# Bulldozer		+20%		+9%
# Ryzen(***)		+35%		+32%
# Ryzen(***)		+43%		+40%
# VIA			+170%		+120%
#
# (*)	amd64-51 is popular assembly implementation with 2^51 radix,
@@ -631,13 +631,10 @@ x25519_fe64_sqr:
	and	\$38,%rax

	add	%rax,$acc0
	adc	\$0,$acc1
	mov	$acc0,8*0(%rdi)
	adc	\$0,$acc2
	mov	$acc1,8*1(%rdi)
	adc	\$0,$acc3
	mov	$acc2,8*2(%rdi)
	mov	$acc3,8*3(%rdi)
	mov	$acc0,8*0(%rdi)

	mov	8*3(%rsp),%r15
	mov	8*4(%rsp),%r14
@@ -674,13 +671,10 @@ x25519_fe64_mul121666:
	and	\$38,%rax

	add	%rax,$acc0
	adc	\$0,$acc1
	mov	$acc0,8*0(%rdi)
	adc	\$0,$acc2
	mov	$acc1,8*1(%rdi)
	adc	\$0,$acc3
	mov	$acc2,8*2(%rdi)
	mov	$acc3,8*3(%rdi)
	mov	$acc0,8*0(%rdi)

	ret
.size	x25519_fe64_mul121666,.-x25519_fe64_mul121666
@@ -769,14 +763,11 @@ x25519_fe64_tobytes:
	and	\$19,%rax

	add	%rax,$acc0
	adc	\$0,$acc1
	adc	\$0,$acc2
	adc	\$0,$acc3

	mov	$acc0,8*0(%rdi)
	mov	$acc1,8*1(%rdi)
	mov	$acc2,8*2(%rdi)
	mov	$acc3,8*3(%rdi)
	mov	$acc0,8*0(%rdi)

	ret
.size	x25519_fe64_tobytes,.-x25519_fe64_tobytes