Commit 9a708bf9 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

{arm64|x86_64}cpuid.pl: add special 16-byte case to OPENSSL_memcmp.



OPENSSL_memcmp is a must in GCM decrypt and general-purpose loop takes
quite a portion of execution time for short inputs, more than GHASH for
few-byte inputs according to profiler. Special 16-byte case takes it off
top five list in profiler output.

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6312)
parent c1b2569d
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -115,6 +115,19 @@ OPENSSL_cleanse:
CRYPTO_memcmp:
	eor	w3,w3,w3
	cbz	x2,.Lno_data	// len==0?
	cmp	x2,#16
	b.ne	.Loop_cmp
	ldp	x8,x9,[x0]
	ldp	x10,x11,[x1]
	eor	x8,x8,x10
	eor	x9,x9,x11
	orr	x8,x8,x9
	mov	x0,#1
	cmp	x8,#0
	csel	x0,xzr,x0,eq
	ret

.align	4
.Loop_cmp:
	ldrb	w4,[x0],#1
	ldrb	w5,[x1],#1
+12 −0
Original line number Diff line number Diff line
@@ -271,6 +271,18 @@ CRYPTO_memcmp:
	xor	%r10,%r10
	cmp	\$0,$arg3
	je	.Lno_data
	cmp	\$16,$arg3
	jne	.Loop_cmp
	mov	($arg1),%r10
	mov	8($arg1),%r11
	mov	\$1,$arg3
	xor	($arg2),%r10
	xor	8($arg2),%r11
	or	%r11,%r10
	cmovnz	$arg3,%rax
	ret

.align	16
.Loop_cmp:
	mov	($arg1),%r10b
	lea	1($arg1),$arg1