Commit 384e6de4 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

x86_64 assembly pack: Win64 SEH face-lift.



- harmonize handlers with guidelines and themselves;
- fix some bugs in handlers;
- add missing handlers in chacha and ecp_nistz256 modules;

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent e1dbf7f4
Loading
Loading
Loading
Loading
+20 −22
Original line number Diff line number Diff line
@@ -599,6 +599,7 @@ $code.=<<___;
.hidden	asm_AES_encrypt
asm_AES_encrypt:
AES_encrypt:
	mov	%rsp,%rax
	push	%rbx
	push	%rbp
	push	%r12
@@ -607,7 +608,6 @@ AES_encrypt:
	push	%r15

	# allocate frame "above" key schedule
	mov	%rsp,%r10
	lea	-63(%rdx),%rcx	# %rdx is key argument
	and	\$-64,%rsp
	sub	%rsp,%rcx
@@ -617,7 +617,7 @@ AES_encrypt:
	sub	\$32,%rsp

	mov	%rsi,16(%rsp)	# save out
	mov	%r10,24(%rsp)	# save real stack pointer
	mov	%rax,24(%rsp)	# save original stack pointer
.Lenc_prologue:

	mov	%rdx,$key
@@ -649,13 +649,13 @@ AES_encrypt:
	mov	$s2,8($out)
	mov	$s3,12($out)

	mov	(%rsi),%r15
	mov	8(%rsi),%r14
	mov	16(%rsi),%r13
	mov	24(%rsi),%r12
	mov	32(%rsi),%rbp
	mov	40(%rsi),%rbx
	lea	48(%rsi),%rsp
	mov	-48(%rsi),%r15
	mov	-40(%rsi),%r14
	mov	-32(%rsi),%r13
	mov	-24(%rsi),%r12
	mov	-16(%rsi),%rbp
	mov	-8(%rsi),%rbx
	lea	(%rsi),%rsp
.Lenc_epilogue:
	ret
.size	AES_encrypt,.-AES_encrypt
@@ -1197,6 +1197,7 @@ $code.=<<___;
.hidden	asm_AES_decrypt
asm_AES_decrypt:
AES_decrypt:
	mov	%rsp,%rax
	push	%rbx
	push	%rbp
	push	%r12
@@ -1205,7 +1206,6 @@ AES_decrypt:
	push	%r15

	# allocate frame "above" key schedule
	mov	%rsp,%r10
	lea	-63(%rdx),%rcx	# %rdx is key argument
	and	\$-64,%rsp
	sub	%rsp,%rcx
@@ -1215,7 +1215,7 @@ AES_decrypt:
	sub	\$32,%rsp

	mov	%rsi,16(%rsp)	# save out
	mov	%r10,24(%rsp)	# save real stack pointer
	mov	%rax,24(%rsp)	# save original stack pointer
.Ldec_prologue:

	mov	%rdx,$key
@@ -1249,13 +1249,13 @@ AES_decrypt:
	mov	$s2,8($out)
	mov	$s3,12($out)

	mov	(%rsi),%r15
	mov	8(%rsi),%r14
	mov	16(%rsi),%r13
	mov	24(%rsi),%r12
	mov	32(%rsi),%rbp
	mov	40(%rsi),%rbx
	lea	48(%rsi),%rsp
	mov	-48(%rsi),%r15
	mov	-40(%rsi),%r14
	mov	-32(%rsi),%r13
	mov	-24(%rsi),%r12
	mov	-16(%rsi),%rbp
	mov	-8(%rsi),%rbx
	lea	(%rsi),%rsp
.Ldec_epilogue:
	ret
.size	AES_decrypt,.-AES_decrypt
@@ -1675,10 +1675,9 @@ AES_cbc_encrypt:
	mov	%r9d,%r9d	# clear upper half of enc

	lea	.LAES_Te(%rip),$sbox
	lea	.LAES_Td(%rip),%r10
	cmp	\$0,%r9
	jne	.Lcbc_picked_te
	lea	.LAES_Td(%rip),$sbox
.Lcbc_picked_te:
	cmoveq	%r10,$sbox

	mov	OPENSSL_ia32cap_P(%rip),%r10d
	cmp	\$$speed_limit,%rdx
@@ -2580,7 +2579,6 @@ block_se_handler:
	jae	.Lin_block_prologue

	mov	24(%rax),%rax		# pull saved real stack pointer
	lea	48(%rax),%rax		# adjust...

	mov	-8(%rax),%rbx
	mov	-16(%rax),%rbp
+27 −28
Original line number Diff line number Diff line
@@ -341,13 +341,13 @@ $code.=<<___;
${func}_xop:
.Lxop_shortcut:
	mov	`($win64?56:8)`(%rsp),$in0	# load 7th parameter
	mov	%rsp,%rax		# copy %rsp
	push	%rbx
	push	%rbp
	push	%r12
	push	%r13
	push	%r14
	push	%r15
	mov	%rsp,%r11		# copy %rsp
	sub	\$`$framesz+$win64*16*10`,%rsp
	and	\$-64,%rsp		# align stack frame

@@ -363,7 +363,7 @@ ${func}_xop:
	mov	$ivp,$_ivp
	mov	$ctx,$_ctx
	mov	$in0,$_in0
	mov	%r11,$_rsp
	mov	%rax,$_rsp
___
$code.=<<___ if ($win64);
	movaps	%xmm6,`$framesz+16*0`(%rsp)
@@ -617,13 +617,13 @@ $code.=<<___ if ($win64);
	movaps	`$framesz+16*9`(%rsp),%xmm15
___
$code.=<<___;
	mov	(%rsi),%r15
	mov	8(%rsi),%r14
	mov	16(%rsi),%r13
	mov	24(%rsi),%r12
	mov	32(%rsi),%rbp
	mov	40(%rsi),%rbx
	lea	48(%rsi),%rsp
	mov	-48(%rsi),%r15
	mov	-40(%rsi),%r14
	mov	-32(%rsi),%r13
	mov	-24(%rsi),%r12
	mov	-16(%rsi),%rbp
	mov	-8(%rsi),%rbx
	lea	(%rsi),%rsp
.Lepilogue_xop:
	ret
.size	${func}_xop,.-${func}_xop
@@ -639,13 +639,13 @@ $code.=<<___;
${func}_avx:
.Lavx_shortcut:
	mov	`($win64?56:8)`(%rsp),$in0	# load 7th parameter
	mov	%rsp,%rax		# copy %rsp
	push	%rbx
	push	%rbp
	push	%r12
	push	%r13
	push	%r14
	push	%r15
	mov	%rsp,%r11		# copy %rsp
	sub	\$`$framesz+$win64*16*10`,%rsp
	and	\$-64,%rsp		# align stack frame

@@ -661,7 +661,7 @@ ${func}_avx:
	mov	$ivp,$_ivp
	mov	$ctx,$_ctx
	mov	$in0,$_in0
	mov	%r11,$_rsp
	mov	%rax,$_rsp
___
$code.=<<___ if ($win64);
	movaps	%xmm6,`$framesz+16*0`(%rsp)
@@ -868,13 +868,13 @@ $code.=<<___ if ($win64);
	movaps	`$framesz+16*9`(%rsp),%xmm15
___
$code.=<<___;
	mov	(%rsi),%r15
	mov	8(%rsi),%r14
	mov	16(%rsi),%r13
	mov	24(%rsi),%r12
	mov	32(%rsi),%rbp
	mov	40(%rsi),%rbx
	lea	48(%rsi),%rsp
	mov	-48(%rsi),%r15
	mov	-40(%rsi),%r14
	mov	-32(%rsi),%r13
	mov	-24(%rsi),%r12
	mov	-16(%rsi),%rbp
	mov	-8(%rsi),%rbx
	lea	(%rsi),%rsp
.Lepilogue_avx:
	ret
.size	${func}_avx,.-${func}_avx
@@ -935,13 +935,13 @@ $code.=<<___;
${func}_avx2:
.Lavx2_shortcut:
	mov	`($win64?56:8)`(%rsp),$in0	# load 7th parameter
	mov	%rsp,%rax		# copy %rsp
	push	%rbx
	push	%rbp
	push	%r12
	push	%r13
	push	%r14
	push	%r15
	mov	%rsp,%r11		# copy %rsp
	sub	\$`2*$SZ*$rounds+8*8+$win64*16*10`,%rsp
	and	\$-256*$SZ,%rsp		# align stack frame
	add	\$`2*$SZ*($rounds-8)`,%rsp
@@ -958,7 +958,7 @@ ${func}_avx2:
	mov	$ivp,$_ivp
	mov	$ctx,$_ctx
	mov	$in0,$_in0
	mov	%r11,$_rsp
	mov	%rax,$_rsp
___
$code.=<<___ if ($win64);
	movaps	%xmm6,`$framesz+16*0`(%rsp)
@@ -1205,13 +1205,13 @@ $code.=<<___ if ($win64);
	movaps	`$framesz+16*9`(%rsp),%xmm15
___
$code.=<<___;
	mov	(%rsi),%r15
	mov	8(%rsi),%r14
	mov	16(%rsi),%r13
	mov	24(%rsi),%r12
	mov	32(%rsi),%rbp
	mov	40(%rsi),%rbx
	lea	48(%rsi),%rsp
	mov	-48(%rsi),%r15
	mov	-40(%rsi),%r14
	mov	-32(%rsi),%r13
	mov	-24(%rsi),%r12
	mov	-16(%rsi),%rbp
	mov	-8(%rsi),%rbx
	lea	(%rsi),%rsp
.Lepilogue_avx2:
	ret
.size	${func}_avx2,.-${func}_avx2
@@ -1569,7 +1569,6 @@ ___
$code.=<<___;
	mov	%rax,%rsi		# put aside Rsp
	mov	16*$SZ+7*8(%rax),%rax	# pull $_rsp
	lea	48(%rax),%rax

	mov	-8(%rax),%rbx
	mov	-16(%rax),%rbp
+139 −135
Original line number Diff line number Diff line
@@ -1172,7 +1172,7 @@ ___
# with zero-round key xor.
{
my ($in0,$in1,$in2,$in3,$in4,$in5)=map("%xmm$_",(10..15));
my ($key0,$ctr)=("${key_}d","${ivp}d");
my ($key0,$ctr)=("%ebp","${ivp}d");
my $frame_size = 0x80 + ($win64?160:0);

$code.=<<___;
@@ -1201,26 +1201,25 @@ $code.=<<___;

.align	16
.Lctr32_bulk:
	lea	(%rsp),%rax
	lea	(%rsp),$key_			# use $key_ as frame pointer
	push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
___
$code.=<<___ if ($win64);
	movaps	%xmm6,-0xa8(%rax)		# offload everything
	movaps	%xmm7,-0x98(%rax)
	movaps	%xmm8,-0x88(%rax)
	movaps	%xmm9,-0x78(%rax)
	movaps	%xmm10,-0x68(%rax)
	movaps	%xmm11,-0x58(%rax)
	movaps	%xmm12,-0x48(%rax)
	movaps	%xmm13,-0x38(%rax)
	movaps	%xmm14,-0x28(%rax)
	movaps	%xmm15,-0x18(%rax)
	movaps	%xmm6,-0xa8($key_)		# offload everything
	movaps	%xmm7,-0x98($key_)
	movaps	%xmm8,-0x88($key_)
	movaps	%xmm9,-0x78($key_)
	movaps	%xmm10,-0x68($key_)
	movaps	%xmm11,-0x58($key_)
	movaps	%xmm12,-0x48($key_)
	movaps	%xmm13,-0x38($key_)
	movaps	%xmm14,-0x28($key_)
	movaps	%xmm15,-0x18($key_)
.Lctr32_body:
___
$code.=<<___;
	lea	-8(%rax),%rbp

	# 8 16-byte words on top of stack are counter values
	# xor-ed with zero-round key
@@ -1692,26 +1691,26 @@ $code.=<<___ if (!$win64);
	pxor	%xmm15,%xmm15
___
$code.=<<___ if ($win64);
	movaps	-0xa0(%rbp),%xmm6
	movaps	%xmm0,-0xa0(%rbp)		# clear stack
	movaps	-0x90(%rbp),%xmm7
	movaps	%xmm0,-0x90(%rbp)
	movaps	-0x80(%rbp),%xmm8
	movaps	%xmm0,-0x80(%rbp)
	movaps	-0x70(%rbp),%xmm9
	movaps	%xmm0,-0x70(%rbp)
	movaps	-0x60(%rbp),%xmm10
	movaps	%xmm0,-0x60(%rbp)
	movaps	-0x50(%rbp),%xmm11
	movaps	%xmm0,-0x50(%rbp)
	movaps	-0x40(%rbp),%xmm12
	movaps	%xmm0,-0x40(%rbp)
	movaps	-0x30(%rbp),%xmm13
	movaps	%xmm0,-0x30(%rbp)
	movaps	-0x20(%rbp),%xmm14
	movaps	%xmm0,-0x20(%rbp)
	movaps	-0x10(%rbp),%xmm15
	movaps	%xmm0,-0x10(%rbp)
	movaps	-0xa8($key_),%xmm6
	movaps	%xmm0,-0xa8($key_)		# clear stack
	movaps	-0x98($key_),%xmm7
	movaps	%xmm0,-0x98($key_)
	movaps	-0x88($key_),%xmm8
	movaps	%xmm0,-0x88($key_)
	movaps	-0x78($key_),%xmm9
	movaps	%xmm0,-0x78($key_)
	movaps	-0x68($key_),%xmm10
	movaps	%xmm0,-0x68($key_)
	movaps	-0x58($key_),%xmm11
	movaps	%xmm0,-0x58($key_)
	movaps	-0x48($key_),%xmm12
	movaps	%xmm0,-0x48($key_)
	movaps	-0x38($key_),%xmm13
	movaps	%xmm0,-0x38($key_)
	movaps	-0x28($key_),%xmm14
	movaps	%xmm0,-0x28($key_)
	movaps	-0x18($key_),%xmm15
	movaps	%xmm0,-0x18($key_)
	movaps	%xmm0,0x00(%rsp)
	movaps	%xmm0,0x10(%rsp)
	movaps	%xmm0,0x20(%rsp)
@@ -1722,8 +1721,8 @@ $code.=<<___ if ($win64);
	movaps	%xmm0,0x70(%rsp)
___
$code.=<<___;
	lea	(%rbp),%rsp
	pop	%rbp
	mov	-8($key_),%rbp
	lea	($key_),%rsp
.Lctr32_epilogue:
	ret
.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
@@ -1740,32 +1739,32 @@ my @tweak=map("%xmm$_",(10..15));
my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]);
my ($key2,$ivp,$len_)=("%r8","%r9","%r9");
my $frame_size = 0x70 + ($win64?160:0);
my $key_ = "%rbp";	# override so that we can use %r11 as FP

$code.=<<___;
.globl	aesni_xts_encrypt
.type	aesni_xts_encrypt,\@function,6
.align	16
aesni_xts_encrypt:
	lea	(%rsp),%rax
	lea	(%rsp),%r11			# frame pointer
	push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
___
$code.=<<___ if ($win64);
	movaps	%xmm6,-0xa8(%rax)		# offload everything
	movaps	%xmm7,-0x98(%rax)
	movaps	%xmm8,-0x88(%rax)
	movaps	%xmm9,-0x78(%rax)
	movaps	%xmm10,-0x68(%rax)
	movaps	%xmm11,-0x58(%rax)
	movaps	%xmm12,-0x48(%rax)
	movaps	%xmm13,-0x38(%rax)
	movaps	%xmm14,-0x28(%rax)
	movaps	%xmm15,-0x18(%rax)
	movaps	%xmm6,-0xa8(%r11)		# offload everything
	movaps	%xmm7,-0x98(%r11)
	movaps	%xmm8,-0x88(%r11)
	movaps	%xmm9,-0x78(%r11)
	movaps	%xmm10,-0x68(%r11)
	movaps	%xmm11,-0x58(%r11)
	movaps	%xmm12,-0x48(%r11)
	movaps	%xmm13,-0x38(%r11)
	movaps	%xmm14,-0x28(%r11)
	movaps	%xmm15,-0x18(%r11)
.Lxts_enc_body:
___
$code.=<<___;
	lea	-8(%rax),%rbp
	movups	($ivp),$inout0			# load clear-text tweak
	mov	240(%r8),$rounds		# key2->rounds
	mov	240($key),$rnds_		# key1->rounds
@@ -2183,26 +2182,26 @@ $code.=<<___ if (!$win64);
	pxor	%xmm15,%xmm15
___
$code.=<<___ if ($win64);
	movaps	-0xa0(%rbp),%xmm6
	movaps	%xmm0,-0xa0(%rbp)		# clear stack
	movaps	-0x90(%rbp),%xmm7
	movaps	%xmm0,-0x90(%rbp)
	movaps	-0x80(%rbp),%xmm8
	movaps	%xmm0,-0x80(%rbp)
	movaps	-0x70(%rbp),%xmm9
	movaps	%xmm0,-0x70(%rbp)
	movaps	-0x60(%rbp),%xmm10
	movaps	%xmm0,-0x60(%rbp)
	movaps	-0x50(%rbp),%xmm11
	movaps	%xmm0,-0x50(%rbp)
	movaps	-0x40(%rbp),%xmm12
	movaps	%xmm0,-0x40(%rbp)
	movaps	-0x30(%rbp),%xmm13
	movaps	%xmm0,-0x30(%rbp)
	movaps	-0x20(%rbp),%xmm14
	movaps	%xmm0,-0x20(%rbp)
	movaps	-0x10(%rbp),%xmm15
	movaps	%xmm0,-0x10(%rbp)
	movaps	-0xa8(%r11),%xmm6
	movaps	%xmm0,-0xa8(%r11)		# clear stack
	movaps	-0x98(%r11),%xmm7
	movaps	%xmm0,-0x98(%r11)
	movaps	-0x88(%r11),%xmm8
	movaps	%xmm0,-0x88(%r11)
	movaps	-0x78(%r11),%xmm9
	movaps	%xmm0,-0x78(%r11)
	movaps	-0x68(%r11),%xmm10
	movaps	%xmm0,-0x68(%r11)
	movaps	-0x58(%r11),%xmm11
	movaps	%xmm0,-0x58(%r11)
	movaps	-0x48(%r11),%xmm12
	movaps	%xmm0,-0x48(%r11)
	movaps	-0x38(%r11),%xmm13
	movaps	%xmm0,-0x38(%r11)
	movaps	-0x28(%r11),%xmm14
	movaps	%xmm0,-0x28(%r11)
	movaps	-0x18(%r11),%xmm15
	movaps	%xmm0,-0x18(%r11)
	movaps	%xmm0,0x00(%rsp)
	movaps	%xmm0,0x10(%rsp)
	movaps	%xmm0,0x20(%rsp)
@@ -2212,8 +2211,8 @@ $code.=<<___ if ($win64);
	movaps	%xmm0,0x60(%rsp)
___
$code.=<<___;
	lea	(%rbp),%rsp
	pop	%rbp
	mov	-8(%r11),%rbp
	lea	(%r11),%rsp
.Lxts_enc_epilogue:
	ret
.size	aesni_xts_encrypt,.-aesni_xts_encrypt
@@ -2224,26 +2223,25 @@ $code.=<<___;
.type	aesni_xts_decrypt,\@function,6
.align	16
aesni_xts_decrypt:
	lea	(%rsp),%rax
	lea	(%rsp),%r11			# frame pointer
	push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
___
$code.=<<___ if ($win64);
	movaps	%xmm6,-0xa8(%rax)		# offload everything
	movaps	%xmm7,-0x98(%rax)
	movaps	%xmm8,-0x88(%rax)
	movaps	%xmm9,-0x78(%rax)
	movaps	%xmm10,-0x68(%rax)
	movaps	%xmm11,-0x58(%rax)
	movaps	%xmm12,-0x48(%rax)
	movaps	%xmm13,-0x38(%rax)
	movaps	%xmm14,-0x28(%rax)
	movaps	%xmm15,-0x18(%rax)
	movaps	%xmm6,-0xa8(%r11)		# offload everything
	movaps	%xmm7,-0x98(%r11)
	movaps	%xmm8,-0x88(%r11)
	movaps	%xmm9,-0x78(%r11)
	movaps	%xmm10,-0x68(%r11)
	movaps	%xmm11,-0x58(%r11)
	movaps	%xmm12,-0x48(%r11)
	movaps	%xmm13,-0x38(%r11)
	movaps	%xmm14,-0x28(%r11)
	movaps	%xmm15,-0x18(%r11)
.Lxts_dec_body:
___
$code.=<<___;
	lea	-8(%rax),%rbp
	movups	($ivp),$inout0			# load clear-text tweak
	mov	240($key2),$rounds		# key2->rounds
	mov	240($key),$rnds_		# key1->rounds
@@ -2687,26 +2685,26 @@ $code.=<<___ if (!$win64);
	pxor	%xmm15,%xmm15
___
$code.=<<___ if ($win64);
	movaps	-0xa0(%rbp),%xmm6
	movaps	%xmm0,-0xa0(%rbp)		# clear stack
	movaps	-0x90(%rbp),%xmm7
	movaps	%xmm0,-0x90(%rbp)
	movaps	-0x80(%rbp),%xmm8
	movaps	%xmm0,-0x80(%rbp)
	movaps	-0x70(%rbp),%xmm9
	movaps	%xmm0,-0x70(%rbp)
	movaps	-0x60(%rbp),%xmm10
	movaps	%xmm0,-0x60(%rbp)
	movaps	-0x50(%rbp),%xmm11
	movaps	%xmm0,-0x50(%rbp)
	movaps	-0x40(%rbp),%xmm12
	movaps	%xmm0,-0x40(%rbp)
	movaps	-0x30(%rbp),%xmm13
	movaps	%xmm0,-0x30(%rbp)
	movaps	-0x20(%rbp),%xmm14
	movaps	%xmm0,-0x20(%rbp)
	movaps	-0x10(%rbp),%xmm15
	movaps	%xmm0,-0x10(%rbp)
	movaps	-0xa8(%r11),%xmm6
	movaps	%xmm0,-0xa8(%r11)		# clear stack
	movaps	-0x98(%r11),%xmm7
	movaps	%xmm0,-0x98(%r11)
	movaps	-0x88(%r11),%xmm8
	movaps	%xmm0,-0x88(%r11)
	movaps	-0x78(%r11),%xmm9
	movaps	%xmm0,-0x78(%r11)
	movaps	-0x68(%r11),%xmm10
	movaps	%xmm0,-0x68(%r11)
	movaps	-0x58(%r11),%xmm11
	movaps	%xmm0,-0x58(%r11)
	movaps	-0x48(%r11),%xmm12
	movaps	%xmm0,-0x48(%r11)
	movaps	-0x38(%r11),%xmm13
	movaps	%xmm0,-0x38(%r11)
	movaps	-0x28(%r11),%xmm14
	movaps	%xmm0,-0x28(%r11)
	movaps	-0x18(%r11),%xmm15
	movaps	%xmm0,-0x18(%r11)
	movaps	%xmm0,0x00(%rsp)
	movaps	%xmm0,0x10(%rsp)
	movaps	%xmm0,0x20(%rsp)
@@ -2716,8 +2714,8 @@ $code.=<<___ if ($win64);
	movaps	%xmm0,0x60(%rsp)
___
$code.=<<___;
	lea	(%rbp),%rsp
	pop	%rbp
	mov	-8(%r11),%rbp
	lea	(%r11),%rsp
.Lxts_dec_epilogue:
	ret
.size	aesni_xts_decrypt,.-aesni_xts_decrypt
@@ -2943,6 +2941,7 @@ $code.=<<___ if (!$win64);
	pxor	%xmm13,%xmm13
	pxor	%xmm14,%xmm14
	pxor	%xmm15,%xmm15
	lea	0x28(%rsp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x00(%rsp),%xmm6
@@ -2967,14 +2966,14 @@ $code.=<<___ if ($win64);
	movaps	%xmm0,0x90(%rsp)
	lea	0xa0+0x28(%rsp),%rax
.Locb_enc_pop:
	lea	0xa0(%rsp),%rsp
___
$code.=<<___;
	pop	%r14
	pop	%r13
	pop	%r12
	pop	%rbp
	pop	%rbx
	mov	-40(%rax),%r14
	mov	-32(%rax),%r13
	mov	-24(%rax),%r12
	mov	-16(%rax),%rbp
	mov	-8(%rax),%rbx
	lea	(%rax),%rsp
.Locb_enc_epilogue:
	ret
.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
@@ -3410,6 +3409,7 @@ $code.=<<___ if (!$win64);
	pxor	%xmm13,%xmm13
	pxor	%xmm14,%xmm14
	pxor	%xmm15,%xmm15
	lea	0x28(%rsp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x00(%rsp),%xmm6
@@ -3434,14 +3434,14 @@ $code.=<<___ if ($win64);
	movaps	%xmm0,0x90(%rsp)
	lea	0xa0+0x28(%rsp),%rax
.Locb_dec_pop:
	lea	0xa0(%rsp),%rsp
___
$code.=<<___;
	pop	%r14
	pop	%r13
	pop	%r12
	pop	%rbp
	pop	%rbx
	mov	-40(%rax),%r14
	mov	-32(%rax),%r13
	mov	-24(%rax),%r12
	mov	-16(%rax),%rbp
	mov	-8(%rax),%rbx
	lea	(%rax),%rsp
.Locb_dec_epilogue:
	ret
.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
@@ -3650,7 +3650,6 @@ ___
{
my $frame_size = 0x10 + ($win64?0xa0:0);	# used in decrypt
my ($iv,$in0,$in1,$in2,$in3,$in4)=map("%xmm$_",(10..15));
my $inp_=$key_;

$code.=<<___;
.globl	${PREFIX}_cbc_encrypt
@@ -3732,7 +3731,7 @@ $code.=<<___;
	jmp	.Lcbc_ret
.align	16
.Lcbc_decrypt_bulk:
	lea	(%rsp),%rax
	lea	(%rsp),%r11		# frame pointer
	push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
@@ -3750,8 +3749,11 @@ $code.=<<___ if ($win64);
	movaps	%xmm15,0xa0(%rsp)
.Lcbc_decrypt_body:
___

my $inp_=$key_="%rbp";			# reassign $key_

$code.=<<___;
	lea	-8(%rax),%rbp
	mov	$key,$key_		# [re-]backup $key [after reassignment]
	movups	($ivp),$iv
	mov	$rnds_,$rounds
	cmp	\$0x50,$len
@@ -3791,7 +3793,7 @@ $code.=<<___;
	pxor		$rndkey0,$inout1
	$movkey		0x10-0x70($key),$rndkey1
	pxor		$rndkey0,$inout2
	xor		$inp_,$inp_
	mov		\$-1,$inp_
	cmp		\$0x70,$len	# is there at least 0x60 bytes ahead?
	pxor		$rndkey0,$inout3
	pxor		$rndkey0,$inout4
@@ -3807,8 +3809,8 @@ $code.=<<___;
	aesdec		$rndkey1,$inout4
	aesdec		$rndkey1,$inout5
	aesdec		$rndkey1,$inout6
	setnc		${inp_}b
	shl		\$7,$inp_
	adc		\$0,$inp_
	and		\$128,$inp_
	aesdec		$rndkey1,$inout7
	add		$inp,$inp_
	$movkey		0x30-0x70($key),$rndkey1
@@ -4172,8 +4174,8 @@ $code.=<<___ if ($win64);
	movaps	%xmm0,0xa0(%rsp)
___
$code.=<<___;
	lea	(%rbp),%rsp
	pop	%rbp
	mov	-8(%r11),%rbp
	lea	(%r11),%rsp
.Lcbc_ret:
	ret
.size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
@@ -4744,13 +4746,16 @@ ctr_xts_se_handler:
	cmp	%r10,%rbx		# context->Rip>=epilogue label
	jae	.Lcommon_seh_tail

	mov	160($context),%rax	# pull context->Rbp
	lea	-0xa0(%rax),%rsi	# %xmm save area
	mov	208($context),%rax	# pull context->R11

	lea	-0xa8(%rax),%rsi	# %xmm save area
	lea	512($context),%rdi	# & context.Xmm6
	mov	\$20,%ecx		# 10*sizeof(%xmm0)/sizeof(%rax)
	.long	0xa548f3fc		# cld; rep movsq

	jmp	.Lcommon_rbp_tail
	mov	-8(%rax),%rbp		# restore saved %rbp
	mov	%rbp,160($context)	# restore context->Rbp
	jmp	.Lcommon_seh_tail
.size	ctr_xts_se_handler,.-ctr_xts_se_handler

.type	ocb_se_handler,\@abi-omnipotent
@@ -4834,9 +4839,13 @@ cbc_se_handler:
	cmp	%r10,%rbx		# context->Rip<"prologue" label
	jb	.Lcommon_seh_tail

	mov	120($context),%rax	# pull context->Rax

	lea	.Lcbc_decrypt_body(%rip),%r10
	cmp	%r10,%rbx		# context->Rip<cbc_decrypt_body
	jb	.Lrestore_cbc_rax
	jb	.Lcommon_seh_tail

	mov	152($context),%rax	# pull context->Rsp

	lea	.Lcbc_ret(%rip),%r10
	cmp	%r10,%rbx		# context->Rip>="epilogue" label
@@ -4847,15 +4856,10 @@ cbc_se_handler:
	mov	\$20,%ecx		# 10*sizeof(%xmm0)/sizeof(%rax)
	.long	0xa548f3fc		# cld; rep movsq

.Lcommon_rbp_tail:
	mov	160($context),%rax	# pull context->Rbp
	mov	(%rax),%rbp		# restore saved %rbp
	lea	8(%rax),%rax		# adjust stack pointer
	mov	%rbp,160($context)	# restore context->Rbp
	jmp	.Lcommon_seh_tail
	mov	208($context),%rax	# pull context->R11

.Lrestore_cbc_rax:
	mov	120($context),%rax
	mov	-8(%rax),%rbp		# restore saved %rbp
	mov	%rbp,160($context)	# restore context->Rbp

.Lcommon_seh_tail:
	mov	8(%rax),%rdi
+88 −73
Original line number Diff line number Diff line
@@ -1334,7 +1334,7 @@ $code.=<<___;
	cmp	%rax, %rbp
	jb	.Lecb_enc_bzero

	lea	(%rbp),%rsp		# restore %rsp
	lea	0x78(%rbp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x40(%rbp), %xmm6
@@ -1347,17 +1347,17 @@ $code.=<<___ if ($win64);
	movaps	0xb0(%rbp), %xmm13
	movaps	0xc0(%rbp), %xmm14
	movaps	0xd0(%rbp), %xmm15
	lea	0xa0(%rbp), %rsp
	lea	0xa0(%rax), %rax
.Lecb_enc_tail:
___
$code.=<<___;
	mov	0x48(%rsp), %r15
	mov	0x50(%rsp), %r14
	mov	0x58(%rsp), %r13
	mov	0x60(%rsp), %r12
	mov	0x68(%rsp), %rbx
	mov	0x70(%rsp), %rax
	lea	0x78(%rsp), %rsp
	mov	%rax, %rbp
	mov	-48(%rax), %r15
	mov	-40(%rax), %r14
	mov	-32(%rax), %r13
	mov	-24(%rax), %r12
	mov	-16(%rax), %rbx
	mov	-8(%rax), %rbp
	lea	(%rax), %rsp		# restore %rsp
.Lecb_enc_epilogue:
	ret
.size	bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks
@@ -1536,7 +1536,7 @@ $code.=<<___;
	cmp	%rax, %rbp
	jb	.Lecb_dec_bzero

	lea	(%rbp),%rsp		# restore %rsp
	lea	0x78(%rbp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x40(%rbp), %xmm6
@@ -1549,17 +1549,17 @@ $code.=<<___ if ($win64);
	movaps	0xb0(%rbp), %xmm13
	movaps	0xc0(%rbp), %xmm14
	movaps	0xd0(%rbp), %xmm15
	lea	0xa0(%rbp), %rsp
	lea	0xa0(%rax), %rax
.Lecb_dec_tail:
___
$code.=<<___;
	mov	0x48(%rsp), %r15
	mov	0x50(%rsp), %r14
	mov	0x58(%rsp), %r13
	mov	0x60(%rsp), %r12
	mov	0x68(%rsp), %rbx
	mov	0x70(%rsp), %rax
	lea	0x78(%rsp), %rsp
	mov	%rax, %rbp
	mov	-48(%rax), %r15
	mov	-40(%rax), %r14
	mov	-32(%rax), %r13
	mov	-24(%rax), %r12
	mov	-16(%rax), %rbx
	mov	-8(%rax), %rbp
	lea	(%rax), %rsp		# restore %rsp
.Lecb_dec_epilogue:
	ret
.size	bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks
@@ -1826,7 +1826,7 @@ $code.=<<___;
	cmp	%rax, %rbp
	ja	.Lcbc_dec_bzero

	lea	(%rbp),%rsp		# restore %rsp
	lea	0x78(%rbp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x40(%rbp), %xmm6
@@ -1839,17 +1839,17 @@ $code.=<<___ if ($win64);
	movaps	0xb0(%rbp), %xmm13
	movaps	0xc0(%rbp), %xmm14
	movaps	0xd0(%rbp), %xmm15
	lea	0xa0(%rbp), %rsp
	lea	0xa0(%rax), %rax
.Lcbc_dec_tail:
___
$code.=<<___;
	mov	0x48(%rsp), %r15
	mov	0x50(%rsp), %r14
	mov	0x58(%rsp), %r13
	mov	0x60(%rsp), %r12
	mov	0x68(%rsp), %rbx
	mov	0x70(%rsp), %rax
	lea	0x78(%rsp), %rsp
	mov	%rax, %rbp
	mov	-48(%rax), %r15
	mov	-40(%rax), %r14
	mov	-32(%rax), %r13
	mov	-24(%rax), %r12
	mov	-16(%rax), %rbx
	mov	-8(%rax), %rbp
	lea	(%rax), %rsp		# restore %rsp
.Lcbc_dec_epilogue:
	ret
.size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
@@ -2058,7 +2058,7 @@ $code.=<<___;
	cmp	%rax, %rbp
	ja	.Lctr_enc_bzero

	lea	(%rbp),%rsp		# restore %rsp
	lea	0x78(%rbp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x40(%rbp), %xmm6
@@ -2071,17 +2071,17 @@ $code.=<<___ if ($win64);
	movaps	0xb0(%rbp), %xmm13
	movaps	0xc0(%rbp), %xmm14
	movaps	0xd0(%rbp), %xmm15
	lea	0xa0(%rbp), %rsp
	lea	0xa0(%rax), %rax
.Lctr_enc_tail:
___
$code.=<<___;
	mov	0x48(%rsp), %r15
	mov	0x50(%rsp), %r14
	mov	0x58(%rsp), %r13
	mov	0x60(%rsp), %r12
	mov	0x68(%rsp), %rbx
	mov	0x70(%rsp), %rax
	lea	0x78(%rsp), %rsp
	mov	%rax, %rbp
	mov	-48(%rax), %r15
	mov	-40(%rax), %r14
	mov	-32(%rax), %r13
	mov	-24(%rax), %r12
	mov	-16(%rax), %rbx
	mov	-8(%rax), %rbp
	lea	(%rax), %rsp		# restore %rsp
.Lctr_enc_epilogue:
	ret
.size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
@@ -2448,7 +2448,7 @@ $code.=<<___;
	cmp	%rax, %rbp
	ja	.Lxts_enc_bzero

	lea	(%rbp),%rsp		# restore %rsp
	lea	0x78(%rbp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x40(%rbp), %xmm6
@@ -2461,17 +2461,17 @@ $code.=<<___ if ($win64);
	movaps	0xb0(%rbp), %xmm13
	movaps	0xc0(%rbp), %xmm14
	movaps	0xd0(%rbp), %xmm15
	lea	0xa0(%rbp), %rsp
	lea	0xa0(%rax), %rax
.Lxts_enc_tail:
___
$code.=<<___;
	mov	0x48(%rsp), %r15
	mov	0x50(%rsp), %r14
	mov	0x58(%rsp), %r13
	mov	0x60(%rsp), %r12
	mov	0x68(%rsp), %rbx
	mov	0x70(%rsp), %rax
	lea	0x78(%rsp), %rsp
	mov	%rax, %rbp
	mov	-48(%rax), %r15
	mov	-40(%rax), %r14
	mov	-32(%rax), %r13
	mov	-24(%rax), %r12
	mov	-16(%rax), %rbx
	mov	-8(%rax), %rbp
	lea	(%rax), %rsp		# restore %rsp
.Lxts_enc_epilogue:
	ret
.size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
@@ -2855,7 +2855,7 @@ $code.=<<___;
	cmp	%rax, %rbp
	ja	.Lxts_dec_bzero

	lea	(%rbp),%rsp		# restore %rsp
	lea	0x78(%rbp),%rax
___
$code.=<<___ if ($win64);
	movaps	0x40(%rbp), %xmm6
@@ -2868,17 +2868,17 @@ $code.=<<___ if ($win64);
	movaps	0xb0(%rbp), %xmm13
	movaps	0xc0(%rbp), %xmm14
	movaps	0xd0(%rbp), %xmm15
	lea	0xa0(%rbp), %rsp
	lea	0xa0(%rax), %rax
.Lxts_dec_tail:
___
$code.=<<___;
	mov	0x48(%rsp), %r15
	mov	0x50(%rsp), %r14
	mov	0x58(%rsp), %r13
	mov	0x60(%rsp), %r12
	mov	0x68(%rsp), %rbx
	mov	0x70(%rsp), %rax
	lea	0x78(%rsp), %rsp
	mov	%rax, %rbp
	mov	-48(%rax), %r15
	mov	-40(%rax), %r14
	mov	-32(%rax), %r13
	mov	-24(%rax), %r12
	mov	-16(%rax), %rbx
	mov	-8(%rax), %rbp
	lea	(%rax), %rsp		# restore %rsp
.Lxts_dec_epilogue:
	ret
.size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
@@ -2974,31 +2974,34 @@ se_handler:

	mov	0(%r11),%r10d		# HandlerData[0]
	lea	(%rsi,%r10),%r10	# prologue label
	cmp	%r10,%rbx		# context->Rip<prologue label
	jb	.Lin_prologue

	mov	152($context),%rax	# pull context->Rsp
	cmp	%r10,%rbx		# context->Rip<=prologue label
	jbe	.Lin_prologue

	mov	4(%r11),%r10d		# HandlerData[1]
	lea	(%rsi,%r10),%r10	# epilogue label
	cmp	%r10,%rbx		# context->Rip>=epilogue label
	jae	.Lin_prologue

	mov	8(%r11),%r10d		# HandlerData[2]
	lea	(%rsi,%r10),%r10	# epilogue label
	cmp	%r10,%rbx		# context->Rip>=tail label
	jae	.Lin_tail

	mov	160($context),%rax	# pull context->Rbp

	lea	0x40(%rax),%rsi		# %xmm save area
	lea	512($context),%rdi	# &context.Xmm6
	mov	\$20,%ecx		# 10*sizeof(%xmm0)/sizeof(%rax)
	.long	0xa548f3fc		# cld; rep movsq
	lea	0xa0(%rax),%rax		# adjust stack pointer

	mov	0x70(%rax),%rbp
	mov	0x68(%rax),%rbx
	mov	0x60(%rax),%r12
	mov	0x58(%rax),%r13
	mov	0x50(%rax),%r14
	mov	0x48(%rax),%r15
	lea	0x78(%rax),%rax		# adjust stack pointer
	lea	0xa0+0x78(%rax),%rax	# adjust stack pointer

.Lin_tail:
	mov	-48(%rax),%rbp
	mov	-40(%rax),%rbx
	mov	-32(%rax),%r12
	mov	-24(%rax),%r13
	mov	-16(%rax),%r14
	mov	-8(%rax),%r15
	mov	%rbx,144($context)	# restore context->Rbx
	mov	%rbp,160($context)	# restore context->Rbp
	mov	%r12,216($context)	# restore context->R12
@@ -3079,28 +3082,40 @@ $code.=<<___ if ($ecb);
	.byte	9,0,0,0
	.rva	se_handler
	.rva	.Lecb_enc_body,.Lecb_enc_epilogue	# HandlerData[]
	.rva	.Lecb_enc_tail
	.long	0
.Lecb_dec_info:
	.byte	9,0,0,0
	.rva	se_handler
	.rva	.Lecb_dec_body,.Lecb_dec_epilogue	# HandlerData[]
	.rva	.Lecb_dec_tail
	.long	0
___
$code.=<<___;
.Lcbc_dec_info:
	.byte	9,0,0,0
	.rva	se_handler
	.rva	.Lcbc_dec_body,.Lcbc_dec_epilogue	# HandlerData[]
	.rva	.Lcbc_dec_tail
	.long	0
.Lctr_enc_info:
	.byte	9,0,0,0
	.rva	se_handler
	.rva	.Lctr_enc_body,.Lctr_enc_epilogue	# HandlerData[]
	.rva	.Lctr_enc_tail
	.long	0
.Lxts_enc_info:
	.byte	9,0,0,0
	.rva	se_handler
	.rva	.Lxts_enc_body,.Lxts_enc_epilogue	# HandlerData[]
	.rva	.Lxts_enc_tail
	.long	0
.Lxts_dec_info:
	.byte	9,0,0,0
	.rva	se_handler
	.rva	.Lxts_dec_body,.Lxts_dec_epilogue	# HandlerData[]
	.rva	.Lxts_dec_tail
	.long	0
___
}

+1 −1
Original line number Diff line number Diff line
@@ -1738,11 +1738,11 @@ $code.=<<___ if ($win64);
	movaps	-0x38(%r11),%xmm13
	movaps	-0x28(%r11),%xmm14
	movaps	-0x18(%r11),%xmm15
.LSEH_end_rsaz_1024_gather5:
___
$code.=<<___;
	lea	(%r11),%rsp
	ret
.LSEH_end_rsaz_1024_gather5:
.size	rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
___
}
Loading