Commit 2086edb7 authored by David Benjamin's avatar David Benjamin Committed by Kurt Roeckx
Browse files

Fix some CFI issues in x86_64 assembly



The add/double shortcut in ecp_nistz256-x86_64.pl left one instruction
point that did not unwind, and the "slow" path in AES_cbc_encrypt was
not annotated correctly. For the latter, add
.cfi_{remember,restore}_state support to perlasm.

Next, fill in a bunch of functions that are missing no-op .cfi_startproc
and .cfi_endproc blocks. libunwind cannot unwind those stack frames
otherwise.

Finally, work around a bug in libunwind by not encoding rflags. (rflags
isn't a callee-saved register, so there's not much need to annotate it
anyway.)

These were found as part of ABI testing work in BoringSSL.

Reviewed-by: default avatarRichard Levitte <levitte@openssl.org>
GH: #8109
(cherry picked from commit c0e8e5007ba5234d4d448e82a1567e0c4467e629)
parent ed48d203
Loading
Loading
Loading
Loading
+16 −2
Original line number Diff line number Diff line
@@ -554,6 +554,7 @@ $code.=<<___;
.type	_x86_64_AES_encrypt_compact,\@abi-omnipotent
.align	16
_x86_64_AES_encrypt_compact:
.cfi_startproc
	lea	128($sbox),$inp			# size optimization
	mov	0-128($inp),$acc1		# prefetch Te4
	mov	32-128($inp),$acc2
@@ -587,6 +588,7 @@ $code.=<<___;
	xor	8($key),$s2
	xor	12($key),$s3
	.byte	0xf3,0xc3			# rep ret
.cfi_endproc
.size	_x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
___

@@ -1161,6 +1163,7 @@ $code.=<<___;
.type	_x86_64_AES_decrypt_compact,\@abi-omnipotent
.align	16
_x86_64_AES_decrypt_compact:
.cfi_startproc
	lea	128($sbox),$inp			# size optimization
	mov	0-128($inp),$acc1		# prefetch Td4
	mov	32-128($inp),$acc2
@@ -1203,6 +1206,7 @@ $code.=<<___;
	xor	8($key),$s2
	xor	12($key),$s3
	.byte	0xf3,0xc3			# rep ret
.cfi_endproc
.size	_x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
___

@@ -1365,6 +1369,7 @@ AES_set_encrypt_key:
.type	_x86_64_AES_set_encrypt_key,\@abi-omnipotent
.align	16
_x86_64_AES_set_encrypt_key:
.cfi_startproc
	mov	%esi,%ecx			# %ecx=bits
	mov	%rdi,%rsi			# %rsi=userKey
	mov	%rdx,%rdi			# %rdi=key
@@ -1546,6 +1551,7 @@ $code.=<<___;
	mov	\$-1,%rax
.Lexit:
	.byte	0xf3,0xc3			# rep ret
.cfi_endproc
.size	_x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
___

@@ -1728,7 +1734,9 @@ AES_cbc_encrypt:
	cmp	\$0,%rdx	# check length
	je	.Lcbc_epilogue
	pushfq
.cfi_push	49		# %rflags
# This could be .cfi_push 49, but libunwind fails on registers it does not
# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
.cfi_adjust_cfa_offset	8
	push	%rbx
.cfi_push	%rbx
	push	%rbp
@@ -1751,6 +1759,7 @@ AES_cbc_encrypt:
	cmp	\$0,%r9
	cmoveq	%r10,$sbox

.cfi_remember_state
	mov	OPENSSL_ia32cap_P(%rip),%r10d
	cmp	\$$speed_limit,%rdx
	jb	.Lcbc_slow_prologue
@@ -1986,6 +1995,7 @@ AES_cbc_encrypt:
#--------------------------- SLOW ROUTINE ---------------------------#
.align	16
.Lcbc_slow_prologue:
.cfi_restore_state
	# allocate aligned stack frame...
	lea	-88(%rsp),%rbp
	and	\$-64,%rbp
@@ -1997,8 +2007,10 @@ AES_cbc_encrypt:
	sub	%r10,%rbp

	xchg	%rsp,%rbp
.cfi_def_cfa_register	%rbp
	#add	\$8,%rsp	# reserve for return address!
	mov	%rbp,$_rsp	# save %rsp
.cfi_cfa_expression	$_rsp,deref,+64
.Lcbc_slow_body:
	#mov	%rdi,$_inp	# save copy of inp
	#mov	%rsi,$_out	# save copy of out
@@ -2187,7 +2199,9 @@ AES_cbc_encrypt:
.cfi_def_cfa	%rsp,16
.Lcbc_popfq:
	popfq
.cfi_pop	49		# %rflags
# This could be .cfi_pop 49, but libunwind fails on registers it does not
# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
.cfi_adjust_cfa_offset	-8
.Lcbc_epilogue:
	ret
.cfi_endproc
+16 −0
Original line number Diff line number Diff line
@@ -274,6 +274,7 @@ $code.=<<___;
.type	${PREFIX}_encrypt,\@abi-omnipotent
.align	16
${PREFIX}_encrypt:
.cfi_startproc
	movups	($inp),$inout0		# load input
	mov	240($key),$rounds	# key->rounds
___
@@ -284,12 +285,14 @@ $code.=<<___;
	movups	$inout0,($out)		# output
	 pxor	$inout0,$inout0
	ret
.cfi_endproc
.size	${PREFIX}_encrypt,.-${PREFIX}_encrypt

.globl	${PREFIX}_decrypt
.type	${PREFIX}_decrypt,\@abi-omnipotent
.align	16
${PREFIX}_decrypt:
.cfi_startproc
	movups	($inp),$inout0		# load input
	mov	240($key),$rounds	# key->rounds
___
@@ -300,6 +303,7 @@ $code.=<<___;
	movups	$inout0,($out)		# output
	 pxor	$inout0,$inout0
	ret
.cfi_endproc
.size	${PREFIX}_decrypt, .-${PREFIX}_decrypt
___
}
@@ -325,6 +329,7 @@ $code.=<<___;
.type	_aesni_${dir}rypt2,\@abi-omnipotent
.align	16
_aesni_${dir}rypt2:
.cfi_startproc
	$movkey	($key),$rndkey0
	shl	\$4,$rounds
	$movkey	16($key),$rndkey1
@@ -350,6 +355,7 @@ _aesni_${dir}rypt2:
	aes${dir}last	$rndkey0,$inout0
	aes${dir}last	$rndkey0,$inout1
	ret
.cfi_endproc
.size	_aesni_${dir}rypt2,.-_aesni_${dir}rypt2
___
}
@@ -361,6 +367,7 @@ $code.=<<___;
.type	_aesni_${dir}rypt3,\@abi-omnipotent
.align	16
_aesni_${dir}rypt3:
.cfi_startproc
	$movkey	($key),$rndkey0
	shl	\$4,$rounds
	$movkey	16($key),$rndkey1
@@ -391,6 +398,7 @@ _aesni_${dir}rypt3:
	aes${dir}last	$rndkey0,$inout1
	aes${dir}last	$rndkey0,$inout2
	ret
.cfi_endproc
.size	_aesni_${dir}rypt3,.-_aesni_${dir}rypt3
___
}
@@ -406,6 +414,7 @@ $code.=<<___;
.type	_aesni_${dir}rypt4,\@abi-omnipotent
.align	16
_aesni_${dir}rypt4:
.cfi_startproc
	$movkey	($key),$rndkey0
	shl	\$4,$rounds
	$movkey	16($key),$rndkey1
@@ -442,6 +451,7 @@ _aesni_${dir}rypt4:
	aes${dir}last	$rndkey0,$inout2
	aes${dir}last	$rndkey0,$inout3
	ret
.cfi_endproc
.size	_aesni_${dir}rypt4,.-_aesni_${dir}rypt4
___
}
@@ -453,6 +463,7 @@ $code.=<<___;
.type	_aesni_${dir}rypt6,\@abi-omnipotent
.align	16
_aesni_${dir}rypt6:
.cfi_startproc
	$movkey		($key),$rndkey0
	shl		\$4,$rounds
	$movkey		16($key),$rndkey1
@@ -503,6 +514,7 @@ _aesni_${dir}rypt6:
	aes${dir}last	$rndkey0,$inout4
	aes${dir}last	$rndkey0,$inout5
	ret
.cfi_endproc
.size	_aesni_${dir}rypt6,.-_aesni_${dir}rypt6
___
}
@@ -514,6 +526,7 @@ $code.=<<___;
.type	_aesni_${dir}rypt8,\@abi-omnipotent
.align	16
_aesni_${dir}rypt8:
.cfi_startproc
	$movkey		($key),$rndkey0
	shl		\$4,$rounds
	$movkey		16($key),$rndkey1
@@ -574,6 +587,7 @@ _aesni_${dir}rypt8:
	aes${dir}last	$rndkey0,$inout6
	aes${dir}last	$rndkey0,$inout7
	ret
.cfi_endproc
.size	_aesni_${dir}rypt8,.-_aesni_${dir}rypt8
___
}
@@ -598,6 +612,7 @@ $code.=<<___;
.type	aesni_ecb_encrypt,\@function,5
.align	16
aesni_ecb_encrypt:
.cfi_startproc
___
$code.=<<___ if ($win64);
	lea	-0x58(%rsp),%rsp
@@ -943,6 +958,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	ret
.cfi_endproc
.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
___

+6 −0
Original line number Diff line number Diff line
@@ -816,6 +816,7 @@ $code.=<<___;
.type	_bsaes_encrypt8,\@abi-omnipotent
.align	64
_bsaes_encrypt8:
.cfi_startproc
	lea	.LBS0(%rip), $const	# constants table

	movdqa	($key), @XMM[9]		# round 0 key
@@ -875,11 +876,13 @@ $code.=<<___;
	pxor	@XMM[8], @XMM[0]
	pxor	@XMM[8], @XMM[1]
	ret
.cfi_endproc
.size	_bsaes_encrypt8,.-_bsaes_encrypt8

.type	_bsaes_decrypt8,\@abi-omnipotent
.align	64
_bsaes_decrypt8:
.cfi_startproc
	lea	.LBS0(%rip), $const	# constants table

	movdqa	($key), @XMM[9]		# round 0 key
@@ -937,6 +940,7 @@ $code.=<<___;
	pxor	@XMM[8], @XMM[0]
	pxor	@XMM[8], @XMM[1]
	ret
.cfi_endproc
.size	_bsaes_decrypt8,.-_bsaes_decrypt8
___
}
@@ -971,6 +975,7 @@ $code.=<<___;
.type	_bsaes_key_convert,\@abi-omnipotent
.align	16
_bsaes_key_convert:
.cfi_startproc
	lea	.Lmasks(%rip), $const
	movdqu	($inp), %xmm7		# load round 0 key
	lea	0x10($inp), $inp
@@ -1049,6 +1054,7 @@ _bsaes_key_convert:
	movdqa	0x50($const), %xmm7	# .L63
	#movdqa	%xmm6, ($out)		# don't save last round key
	ret
.cfi_endproc
.size	_bsaes_key_convert,.-_bsaes_key_convert
___
}
+26 −0
Original line number Diff line number Diff line
@@ -91,6 +91,7 @@ $code.=<<___;
.type	_vpaes_encrypt_core,\@abi-omnipotent
.align 16
_vpaes_encrypt_core:
.cfi_startproc
	mov	%rdx,	%r9
	mov	\$16,	%r11
	mov	240(%rdx),%eax
@@ -171,6 +172,7 @@ _vpaes_encrypt_core:
	pxor	%xmm4,	%xmm0	# 0 = A
	pshufb	%xmm1,	%xmm0
	ret
.cfi_endproc
.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core

##
@@ -181,6 +183,7 @@ _vpaes_encrypt_core:
.type	_vpaes_decrypt_core,\@abi-omnipotent
.align	16
_vpaes_decrypt_core:
.cfi_startproc
	mov	%rdx,	%r9		# load key
	mov	240(%rdx),%eax
	movdqa	%xmm9,	%xmm1
@@ -277,6 +280,7 @@ _vpaes_decrypt_core:
	pxor	%xmm4,	%xmm0	# 0 = A
	pshufb	%xmm2,	%xmm0
	ret
.cfi_endproc
.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core

########################################################
@@ -287,6 +291,7 @@ _vpaes_decrypt_core:
.type	_vpaes_schedule_core,\@abi-omnipotent
.align	16
_vpaes_schedule_core:
.cfi_startproc
	# rdi = key
	# rsi = size in bits
	# rdx = buffer
@@ -453,6 +458,7 @@ _vpaes_schedule_core:
	pxor	%xmm6,  %xmm6
	pxor	%xmm7,  %xmm7
	ret
.cfi_endproc
.size	_vpaes_schedule_core,.-_vpaes_schedule_core

##
@@ -472,6 +478,7 @@ _vpaes_schedule_core:
.type	_vpaes_schedule_192_smear,\@abi-omnipotent
.align	16
_vpaes_schedule_192_smear:
.cfi_startproc
	pshufd	\$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
	pshufd	\$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
	pxor	%xmm1,	%xmm6		# -> c+d c 0 0
@@ -480,6 +487,7 @@ _vpaes_schedule_192_smear:
	movdqa	%xmm6,	%xmm0
	movhlps	%xmm1,	%xmm6		# clobber low side with zeros
	ret
.cfi_endproc
.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear

##
@@ -503,6 +511,7 @@ _vpaes_schedule_192_smear:
.type	_vpaes_schedule_round,\@abi-omnipotent
.align	16
_vpaes_schedule_round:
.cfi_startproc
	# extract rcon from xmm8
	pxor	%xmm1,	%xmm1
	palignr	\$15,	%xmm8,	%xmm1
@@ -556,6 +565,7 @@ _vpaes_schedule_low_round:
	pxor	%xmm7,	%xmm0
	movdqa	%xmm0,	%xmm7
	ret
.cfi_endproc
.size	_vpaes_schedule_round,.-_vpaes_schedule_round

##
@@ -570,6 +580,7 @@ _vpaes_schedule_low_round:
.type	_vpaes_schedule_transform,\@abi-omnipotent
.align	16
_vpaes_schedule_transform:
.cfi_startproc
	movdqa	%xmm9,	%xmm1
	pandn	%xmm0,	%xmm1
	psrld	\$4,	%xmm1
@@ -580,6 +591,7 @@ _vpaes_schedule_transform:
	pshufb	%xmm1,	%xmm0
	pxor	%xmm2,	%xmm0
	ret
.cfi_endproc
.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform

##
@@ -608,6 +620,7 @@ _vpaes_schedule_transform:
.type	_vpaes_schedule_mangle,\@abi-omnipotent
.align	16
_vpaes_schedule_mangle:
.cfi_startproc
	movdqa	%xmm0,	%xmm4	# save xmm0 for later
	movdqa	.Lk_mc_forward(%rip),%xmm5
	test	%rcx, 	%rcx
@@ -672,6 +685,7 @@ _vpaes_schedule_mangle:
	and	\$0x30,	%r8
	movdqu	%xmm3,	(%rdx)
	ret
.cfi_endproc
.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle

#
@@ -681,6 +695,7 @@ _vpaes_schedule_mangle:
.type	${PREFIX}_set_encrypt_key,\@function,3
.align	16
${PREFIX}_set_encrypt_key:
.cfi_startproc
___
$code.=<<___ if ($win64);
	lea	-0xb8(%rsp),%rsp
@@ -723,12 +738,14 @@ ___
$code.=<<___;
	xor	%eax,%eax
	ret
.cfi_endproc
.size	${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key

.globl	${PREFIX}_set_decrypt_key
.type	${PREFIX}_set_decrypt_key,\@function,3
.align	16
${PREFIX}_set_decrypt_key:
.cfi_startproc
___
$code.=<<___ if ($win64);
	lea	-0xb8(%rsp),%rsp
@@ -776,12 +793,14 @@ ___
$code.=<<___;
	xor	%eax,%eax
	ret
.cfi_endproc
.size	${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key

.globl	${PREFIX}_encrypt
.type	${PREFIX}_encrypt,\@function,3
.align	16
${PREFIX}_encrypt:
.cfi_startproc
___
$code.=<<___ if ($win64);
	lea	-0xb8(%rsp),%rsp
@@ -819,12 +838,14 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	ret
.cfi_endproc
.size	${PREFIX}_encrypt,.-${PREFIX}_encrypt

.globl	${PREFIX}_decrypt
.type	${PREFIX}_decrypt,\@function,3
.align	16
${PREFIX}_decrypt:
.cfi_startproc
___
$code.=<<___ if ($win64);
	lea	-0xb8(%rsp),%rsp
@@ -862,6 +883,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	ret
.cfi_endproc
.size	${PREFIX}_decrypt,.-${PREFIX}_decrypt
___
{
@@ -874,6 +896,7 @@ $code.=<<___;
.type	${PREFIX}_cbc_encrypt,\@function,6
.align	16
${PREFIX}_cbc_encrypt:
.cfi_startproc
	xchg	$key,$len
___
($len,$key)=($key,$len);
@@ -944,6 +967,7 @@ ___
$code.=<<___;
.Lcbc_abort:
	ret
.cfi_endproc
.size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
___
}
@@ -957,6 +981,7 @@ $code.=<<___;
.type	_vpaes_preheat,\@abi-omnipotent
.align	16
_vpaes_preheat:
.cfi_startproc
	lea	.Lk_s0F(%rip), %r10
	movdqa	-0x20(%r10), %xmm10	# .Lk_inv
	movdqa	-0x10(%r10), %xmm11	# .Lk_inv+16
@@ -966,6 +991,7 @@ _vpaes_preheat:
	movdqa	0x50(%r10), %xmm15	# .Lk_sb2
	movdqa	0x60(%r10), %xmm14	# .Lk_sb2+16
	ret
.cfi_endproc
.size	_vpaes_preheat,.-_vpaes_preheat
########################################################
##                                                    ##
+6 −0
Original line number Diff line number Diff line
@@ -1492,6 +1492,7 @@ $code.=<<___;
.type	rsaz_1024_red2norm_avx2,\@abi-omnipotent
.align	32
rsaz_1024_red2norm_avx2:
.cfi_startproc
	sub	\$-128,$inp	# size optimization
	xor	%rax,%rax
___
@@ -1525,12 +1526,14 @@ ___
}
$code.=<<___;
	ret
.cfi_endproc
.size	rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2

.globl	rsaz_1024_norm2red_avx2
.type	rsaz_1024_norm2red_avx2,\@abi-omnipotent
.align	32
rsaz_1024_norm2red_avx2:
.cfi_startproc
	sub	\$-128,$out	# size optimization
	mov	($inp),@T[0]
	mov	\$0x1fffffff,%eax
@@ -1562,6 +1565,7 @@ $code.=<<___;
	mov	@T[0],`8*($j+2)-128`($out)
	mov	@T[0],`8*($j+3)-128`($out)
	ret
.cfi_endproc
.size	rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
___
}
@@ -1573,6 +1577,7 @@ $code.=<<___;
.type	rsaz_1024_scatter5_avx2,\@abi-omnipotent
.align	32
rsaz_1024_scatter5_avx2:
.cfi_startproc
	vzeroupper
	vmovdqu	.Lscatter_permd(%rip),%ymm5
	shl	\$4,$power
@@ -1592,6 +1597,7 @@ rsaz_1024_scatter5_avx2:

	vzeroupper
	ret
.cfi_endproc
.size	rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2

.globl	rsaz_1024_gather5_avx2
Loading