Commit b84460ad authored by Andy Polyakov's avatar Andy Polyakov
Browse files

aes/asm/*-x86_64.pl: add CFI annotations.



Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent 1cb35b47
Loading
Loading
Loading
Loading
+84 −0
Original line number Diff line number Diff line
@@ -599,13 +599,21 @@ $code.=<<___;
.hidden	asm_AES_encrypt
asm_AES_encrypt:
AES_encrypt:
.cfi_startproc
	mov	%rsp,%rax
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15

	# allocate frame "above" key schedule
	lea	-63(%rdx),%rcx	# %rdx is key argument
@@ -618,6 +626,7 @@ AES_encrypt:

	mov	%rsi,16(%rsp)	# save out
	mov	%rax,24(%rsp)	# save original stack pointer
.cfi_cfa_expression	%rsp+24,deref,+8
.Lenc_prologue:

	mov	%rdx,$key
@@ -644,20 +653,29 @@ AES_encrypt:

	mov	16(%rsp),$out	# restore out
	mov	24(%rsp),%rsi	# restore saved stack pointer
.cfi_def_cfa	%rsi,8
	mov	$s0,0($out)	# write output vector
	mov	$s1,4($out)
	mov	$s2,8($out)
	mov	$s3,12($out)

	mov	-48(%rsi),%r15
.cfi_restore	%r15
	mov	-40(%rsi),%r14
.cfi_restore	%r14
	mov	-32(%rsi),%r13
.cfi_restore	%r13
	mov	-24(%rsi),%r12
.cfi_restore	%r12
	mov	-16(%rsi),%rbp
.cfi_restore	%rbp
	mov	-8(%rsi),%rbx
.cfi_restore	%rbx
	lea	(%rsi),%rsp
.cfi_def_cfa_register	%rsp
.Lenc_epilogue:
	ret
.cfi_endproc
.size	AES_encrypt,.-AES_encrypt
___

@@ -1197,13 +1215,21 @@ $code.=<<___;
.hidden	asm_AES_decrypt
asm_AES_decrypt:
AES_decrypt:
.cfi_startproc
	mov	%rsp,%rax
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15

	# allocate frame "above" key schedule
	lea	-63(%rdx),%rcx	# %rdx is key argument
@@ -1216,6 +1242,7 @@ AES_decrypt:

	mov	%rsi,16(%rsp)	# save out
	mov	%rax,24(%rsp)	# save original stack pointer
.cfi_cfa_expression	%rsp+24,deref,+8
.Ldec_prologue:

	mov	%rdx,$key
@@ -1244,20 +1271,29 @@ AES_decrypt:

	mov	16(%rsp),$out	# restore out
	mov	24(%rsp),%rsi	# restore saved stack pointer
.cfi_def_cfa	%rsi,8
	mov	$s0,0($out)	# write output vector
	mov	$s1,4($out)
	mov	$s2,8($out)
	mov	$s3,12($out)

	mov	-48(%rsi),%r15
.cfi_restore	%r15
	mov	-40(%rsi),%r14
.cfi_restore	%r14
	mov	-32(%rsi),%r13
.cfi_restore	%r13
	mov	-24(%rsi),%r12
.cfi_restore	%r12
	mov	-16(%rsi),%rbp
.cfi_restore	%rbp
	mov	-8(%rsi),%rbx
.cfi_restore	%rbx
	lea	(%rsi),%rsp
.cfi_def_cfa_register	%rsp
.Ldec_epilogue:
	ret
.cfi_endproc
.size	AES_decrypt,.-AES_decrypt
___
#------------------------------------------------------------------#
@@ -1296,22 +1332,34 @@ $code.=<<___;
.type	AES_set_encrypt_key,\@function,3
.align	16
AES_set_encrypt_key:
.cfi_startproc
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12			# redundant, but allows to share
.cfi_push	%r12
	push	%r13			# exception handler...
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	sub	\$8,%rsp
.cfi_adjust_cfa_offset	8
.Lenc_key_prologue:

	call	_x86_64_AES_set_encrypt_key

	mov	40(%rsp),%rbp
.cfi_restore	%rbp
	mov	48(%rsp),%rbx
.cfi_restore	%rbx
	add	\$56,%rsp
.cfi_adjust_cfa_offset	-56
.Lenc_key_epilogue:
	ret
.cfi_endproc
.size	AES_set_encrypt_key,.-AES_set_encrypt_key

.type	_x86_64_AES_set_encrypt_key,\@abi-omnipotent
@@ -1562,13 +1610,21 @@ $code.=<<___;
.type	AES_set_decrypt_key,\@function,3
.align	16
AES_set_decrypt_key:
.cfi_startproc
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	push	%rdx			# save key schedule
.cfi_adjust_cfa_offset	8
.Ldec_key_prologue:

	call	_x86_64_AES_set_encrypt_key
@@ -1622,14 +1678,22 @@ $code.=<<___;
	xor	%rax,%rax
.Labort:
	mov	8(%rsp),%r15
.cfi_restore	%r15
	mov	16(%rsp),%r14
.cfi_restore	%r14
	mov	24(%rsp),%r13
.cfi_restore	%r13
	mov	32(%rsp),%r12
.cfi_restore	%r12
	mov	40(%rsp),%rbp
.cfi_restore	%rbp
	mov	48(%rsp),%rbx
.cfi_restore	%rbx
	add	\$56,%rsp
.cfi_adjust_cfa_offset	-56
.Ldec_key_epilogue:
	ret
.cfi_endproc
.size	AES_set_decrypt_key,.-AES_set_decrypt_key
___

@@ -1660,15 +1724,23 @@ $code.=<<___;
.hidden	asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
AES_cbc_encrypt:
.cfi_startproc
	cmp	\$0,%rdx	# check length
	je	.Lcbc_epilogue
	pushfq
.cfi_push	49		# %rflags
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
.Lcbc_prologue:

	cld
@@ -1713,8 +1785,10 @@ AES_cbc_encrypt:
.Lcbc_te_ok:

	xchg	%rsp,$key
.cfi_def_cfa_register	$key
	#add	\$8,%rsp	# reserve for return address!
	mov	$key,$_rsp	# save %rsp
.cfi_cfa_expression	$_rsp,deref,+64
.Lcbc_fast_body:
	mov	%rdi,$_inp	# save copy of inp
	mov	%rsi,$_out	# save copy of out
@@ -2096,17 +2170,27 @@ AES_cbc_encrypt:
.align	16
.Lcbc_exit:
	mov	$_rsp,%rsi
.cfi_def_cfa	%rsi,64
	mov	(%rsi),%r15
.cfi_restore	%r15
	mov	8(%rsi),%r14
.cfi_restore	%r14
	mov	16(%rsi),%r13
.cfi_restore	%r13
	mov	24(%rsi),%r12
.cfi_restore	%r12
	mov	32(%rsi),%rbp
.cfi_restore	%rbp
	mov	40(%rsi),%rbx
.cfi_restore	%rbx
	lea	48(%rsi),%rsp
.cfi_def_cfa	%rsp,16
.Lcbc_popfq:
	popfq
.cfi_pop	49		# %rflags
.Lcbc_epilogue:
	ret
.cfi_endproc
.size	AES_cbc_encrypt,.-AES_cbc_encrypt
___
}
+72 −0
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ $code.=<<___;
.type	aesni_multi_cbc_encrypt,\@function,3
.align	32
aesni_multi_cbc_encrypt:
.cfi_startproc
___
$code.=<<___ if ($avx);
	cmp	\$2,$num
@@ -118,12 +119,19 @@ $code.=<<___ if ($avx);
___
$code.=<<___;
	mov	%rsp,%rax
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
___
$code.=<<___ if ($win64);
	lea	-0xa8(%rsp),%rsp
@@ -148,6 +156,7 @@ $code.=<<___;
	sub	\$48,%rsp
	and	\$-64,%rsp
	mov	%rax,16(%rsp)			# original %rsp
.cfi_cfa_expression	%rsp+16,deref,+8

.Lenc4x_body:
	movdqu	($key),$zero			# 0-round key
@@ -319,6 +328,7 @@ $code.=<<___;
	jnz	.Loop_enc4x

	mov	16(%rsp),%rax			# original %rsp
.cfi_def_cfa	%rax,8
	mov	24(%rsp),$num

	#pxor	@inp[0],@out[0]
@@ -350,20 +360,29 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-48(%rax),%r15
.cfi_restore	%r15
	mov	-40(%rax),%r14
.cfi_restore	%r14
	mov	-32(%rax),%r13
.cfi_restore	%r13
	mov	-24(%rax),%r12
.cfi_restore	%r12
	mov	-16(%rax),%rbp
.cfi_restore	%rbp
	mov	-8(%rax),%rbx
.cfi_restore	%rbx
	lea	(%rax),%rsp
.cfi_def_cfa_register	%rsp
.Lenc4x_epilogue:
	ret
.cfi_endproc
.size	aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt

.globl	aesni_multi_cbc_decrypt
.type	aesni_multi_cbc_decrypt,\@function,3
.align	32
aesni_multi_cbc_decrypt:
.cfi_startproc
___
$code.=<<___ if ($avx);
	cmp	\$2,$num
@@ -377,12 +396,19 @@ $code.=<<___ if ($avx);
___
$code.=<<___;
	mov	%rsp,%rax
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
___
$code.=<<___ if ($win64);
	lea	-0xa8(%rsp),%rsp
@@ -407,6 +433,7 @@ $code.=<<___;
	sub	\$48,%rsp
	and	\$-64,%rsp
	mov	%rax,16(%rsp)			# original %rsp
.cfi_cfa_expression	%rsp+16,deref,+8

.Ldec4x_body:
	movdqu	($key),$zero			# 0-round key
@@ -578,6 +605,7 @@ $code.=<<___;
	jnz	.Loop_dec4x

	mov	16(%rsp),%rax			# original %rsp
.cfi_def_cfa	%rax,8
	mov	24(%rsp),$num

	lea	`40*4`($inp),$inp
@@ -600,14 +628,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-48(%rax),%r15
.cfi_restore	%r15
	mov	-40(%rax),%r14
.cfi_restore	%r14
	mov	-32(%rax),%r13
.cfi_restore	%r13
	mov	-24(%rax),%r12
.cfi_restore	%r12
	mov	-16(%rax),%rbp
.cfi_restore	%rbp
	mov	-8(%rax),%rbx
.cfi_restore	%rbx
	lea	(%rax),%rsp
.cfi_def_cfa_register	%rsp
.Ldec4x_epilogue:
	ret
.cfi_endproc
.size	aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
___

@@ -623,14 +659,22 @@ $code.=<<___;
.type	aesni_multi_cbc_encrypt_avx,\@function,3
.align	32
aesni_multi_cbc_encrypt_avx:
.cfi_startproc
_avx_cbc_enc_shortcut:
	mov	%rsp,%rax
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
___
$code.=<<___ if ($win64);
	lea	-0xa8(%rsp),%rsp
@@ -657,6 +701,7 @@ $code.=<<___;
	sub	\$192,%rsp
	and	\$-128,%rsp
	mov	%rax,16(%rsp)			# original %rsp
.cfi_cfa_expression	%rsp+16,deref,+8

.Lenc8x_body:
	vzeroupper
@@ -861,6 +906,7 @@ $code.=<<___;
	jnz	.Loop_enc8x

	mov	16(%rsp),%rax			# original %rsp
.cfi_def_cfa	%rax,8
	#mov	24(%rsp),$num
	#lea	`40*8`($inp),$inp
	#dec	$num
@@ -883,27 +929,43 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-48(%rax),%r15
.cfi_restore	%r15
	mov	-40(%rax),%r14
.cfi_restore	%r14
	mov	-32(%rax),%r13
.cfi_restore	%r13
	mov	-24(%rax),%r12
.cfi_restore	%r12
	mov	-16(%rax),%rbp
.cfi_restore	%rbp
	mov	-8(%rax),%rbx
.cfi_restore	%rbx
	lea	(%rax),%rsp
.cfi_def_cfa_register	%rsp
.Lenc8x_epilogue:
	ret
.cfi_endproc
.size	aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx

.type	aesni_multi_cbc_decrypt_avx,\@function,3
.align	32
aesni_multi_cbc_decrypt_avx:
.cfi_startproc
_avx_cbc_dec_shortcut:
	mov	%rsp,%rax
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
___
$code.=<<___ if ($win64);
	lea	-0xa8(%rsp),%rsp
@@ -932,6 +994,7 @@ $code.=<<___;
	and	\$-256,%rsp
	sub	\$192,%rsp
	mov	%rax,16(%rsp)			# original %rsp
.cfi_cfa_expression	%rsp+16,deref,+8

.Ldec8x_body:
	vzeroupper
@@ -1167,6 +1230,7 @@ $code.=<<___;
	jnz	.Loop_dec8x

	mov	16(%rsp),%rax			# original %rsp
.cfi_def_cfa	%rax,8
	#mov	24(%rsp),$num
	#lea	`40*8`($inp),$inp
	#dec	$num
@@ -1189,14 +1253,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-48(%rax),%r15
.cfi_restore	%r15
	mov	-40(%rax),%r14
.cfi_restore	%r14
	mov	-32(%rax),%r13
.cfi_restore	%r13
	mov	-24(%rax),%r12
.cfi_restore	%r12
	mov	-16(%rax),%rbp
.cfi_restore	%rbp
	mov	-8(%rax),%rbx
.cfi_restore	%rbx
	lea	(%rax),%rsp
.cfi_def_cfa_register	%rsp
.Ldec8x_epilogue:
	ret
.cfi_endproc
.size	aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
___
						}}}
+68 −0
Original line number Diff line number Diff line
@@ -186,16 +186,24 @@ $code.=<<___;
.type	aesni_cbc_sha1_enc_ssse3,\@function,6
.align	32
aesni_cbc_sha1_enc_ssse3:
.cfi_startproc
	mov	`($win64?56:8)`(%rsp),$inp	# load 7th argument
	#shr	\$6,$len			# debugging artefact
	#jz	.Lepilogue_ssse3		# debugging artefact
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	lea	`-104-($win64?10*16:0)`(%rsp),%rsp
.cfi_adjust_cfa_offset	`104+($win64?10*16:0)`
	#mov	$in0,$inp			# debugging artefact
	#lea	64(%rsp),$ctx			# debugging artefact
___
@@ -721,15 +729,24 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	lea	`104+($win64?10*16:0)`(%rsp),%rsi
.cfi_def_cfa	%rsi,56
	mov	0(%rsi),%r15
.cfi_restore	%r15
	mov	8(%rsi),%r14
.cfi_restore	%r14
	mov	16(%rsi),%r13
.cfi_restore	%r13
	mov	24(%rsi),%r12
.cfi_restore	%r12
	mov	32(%rsi),%rbp
.cfi_restore	%rbp
	mov	40(%rsi),%rbx
.cfi_restore	%rbx
	lea	48(%rsi),%rsp
.cfi_def_cfa	%rsp,8
.Lepilogue_ssse3:
	ret
.cfi_endproc
.size	aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
___

@@ -837,14 +854,22 @@ $code.=<<___;
.type	aesni256_cbc_sha1_dec_ssse3,\@function,6
.align	32
aesni256_cbc_sha1_dec_ssse3:
.cfi_startproc
	mov	`($win64?56:8)`(%rsp),$inp	# load 7th argument
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	lea	`-104-($win64?10*16:0)`(%rsp),%rsp
.cfi_adjust_cfa_offset	`104+($win64?10*16:0)`
___
$code.=<<___ if ($win64);
	movaps	%xmm6,96+0(%rsp)
@@ -992,15 +1017,24 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	lea	`104+($win64?10*16:0)`(%rsp),%rsi
.cfi_cfa_def	%rsi,56
	mov	0(%rsi),%r15
.cfi_restore	%r15
	mov	8(%rsi),%r14
.cfi_restore	%r14
	mov	16(%rsi),%r13
.cfi_restore	%r13
	mov	24(%rsi),%r12
.cfi_restore	%r12
	mov	32(%rsi),%rbp
.cfi_restore	%rbp
	mov	40(%rsi),%rbx
.cfi_restore	%rbx
	lea	48(%rsi),%rsp
.cfi_cfa_def	%rsp,8
.Lepilogue_dec_ssse3:
	ret
.cfi_endproc
.size	aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3
___
						}}}
@@ -1026,16 +1060,24 @@ $code.=<<___;
.type	aesni_cbc_sha1_enc_avx,\@function,6
.align	32
aesni_cbc_sha1_enc_avx:
.cfi_startproc
	mov	`($win64?56:8)`(%rsp),$inp	# load 7th argument
	#shr	\$6,$len			# debugging artefact
	#jz	.Lepilogue_avx			# debugging artefact
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	lea	`-104-($win64?10*16:0)`(%rsp),%rsp
.cfi_adjust_cfa_offset	`104+($win64?10*16:0)`
	#mov	$in0,$inp			# debugging artefact
	#lea	64(%rsp),$ctx			# debugging artefact
___
@@ -1434,15 +1476,24 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	lea	`104+($win64?10*16:0)`(%rsp),%rsi
.cfi_def_cfa	%rsi,56
	mov	0(%rsi),%r15
.cfi_restore	%r15
	mov	8(%rsi),%r14
.cfi_restore	%r14
	mov	16(%rsi),%r13
.cfi_restore	%r13
	mov	24(%rsi),%r12
.cfi_restore	%r12
	mov	32(%rsi),%rbp
.cfi_restore	%rbp
	mov	40(%rsi),%rbx
.cfi_restore	%rbx
	lea	48(%rsi),%rsp
.cfi_def_cfa	%rsp,8
.Lepilogue_avx:
	ret
.cfi_endproc
.size	aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx
___

@@ -1491,14 +1542,22 @@ $code.=<<___;
.type	aesni256_cbc_sha1_dec_avx,\@function,6
.align	32
aesni256_cbc_sha1_dec_avx:
.cfi_startproc
	mov	`($win64?56:8)`(%rsp),$inp	# load 7th argument
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	lea	`-104-($win64?10*16:0)`(%rsp),%rsp
.cfi_adjust_cfa_offset	`104+($win64?10*16:0)`
___
$code.=<<___ if ($win64);
	movaps	%xmm6,96+0(%rsp)
@@ -1645,15 +1704,24 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	lea	`104+($win64?10*16:0)`(%rsp),%rsi
.cfi_def_cfa	%rsi,56
	mov	0(%rsi),%r15
.cfi_restore	%r15
	mov	8(%rsi),%r14
.cfi_restore	%r14
	mov	16(%rsi),%r13
.cfi_restore	%r13
	mov	24(%rsi),%r12
.cfi_restore	%r12
	mov	32(%rsi),%rbp
.cfi_restore	%rbp
	mov	40(%rsi),%rbx
.cfi_restore	%rbx
	lea	48(%rsi),%rsp
.cfi_def_cfa	%rsp,8
.Lepilogue_dec_avx:
	ret
.cfi_endproc
.size	aesni256_cbc_sha1_dec_avx,.-aesni256_cbc_sha1_dec_avx
___
						}}}
+55 −1
Original line number Diff line number Diff line
@@ -109,7 +109,7 @@ $_key="16*$SZ+3*8(%rsp)";
$_ivp="16*$SZ+4*8(%rsp)";
$_ctx="16*$SZ+5*8(%rsp)";
$_in0="16*$SZ+6*8(%rsp)";
$_rsp="16*$SZ+7*8(%rsp)";
$_rsp="`16*$SZ+7*8`(%rsp)";
$framesz=16*$SZ+8*8;

$code=<<___;
@@ -339,15 +339,23 @@ $code.=<<___;
.type	${func}_xop,\@function,6
.align	64
${func}_xop:
.cfi_startproc
.Lxop_shortcut:
	mov	`($win64?56:8)`(%rsp),$in0	# load 7th parameter
	mov	%rsp,%rax		# copy %rsp
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	sub	\$`$framesz+$win64*16*10`,%rsp
	and	\$-64,%rsp		# align stack frame

@@ -364,6 +372,7 @@ ${func}_xop:
	mov	$ctx,$_ctx
	mov	$in0,$_in0
	mov	%rax,$_rsp
.cfi_cfa_expression	$_rsp,deref,+8
___
$code.=<<___ if ($win64);
	movaps	%xmm6,`$framesz+16*0`(%rsp)
@@ -601,6 +610,7 @@ $code.=<<___;

	mov	$_ivp,$ivp
	mov	$_rsp,%rsi
.cfi_def_cfa	%rsi,8
	vmovdqu	$iv,($ivp)		# output IV
	vzeroall
___
@@ -618,14 +628,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-48(%rsi),%r15
.cfi_restore	%r15
	mov	-40(%rsi),%r14
.cfi_restore	%r14
	mov	-32(%rsi),%r13
.cfi_restore	%r13
	mov	-24(%rsi),%r12
.cfi_restore	%r12
	mov	-16(%rsi),%rbp
.cfi_restore	%rbp
	mov	-8(%rsi),%rbx
.cfi_restore	%rbx
	lea	(%rsi),%rsp
.cfi_def_cfa_register	%rsp
.Lepilogue_xop:
	ret
.cfi_endproc
.size	${func}_xop,.-${func}_xop
___
######################################################################
@@ -637,15 +655,23 @@ $code.=<<___;
.type	${func}_avx,\@function,6
.align	64
${func}_avx:
.cfi_startproc
.Lavx_shortcut:
	mov	`($win64?56:8)`(%rsp),$in0	# load 7th parameter
	mov	%rsp,%rax		# copy %rsp
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	sub	\$`$framesz+$win64*16*10`,%rsp
	and	\$-64,%rsp		# align stack frame

@@ -662,6 +688,7 @@ ${func}_avx:
	mov	$ctx,$_ctx
	mov	$in0,$_in0
	mov	%rax,$_rsp
.cfi_cfa_expression	$_rsp,deref,+8
___
$code.=<<___ if ($win64);
	movaps	%xmm6,`$framesz+16*0`(%rsp)
@@ -852,6 +879,7 @@ $code.=<<___;

	mov	$_ivp,$ivp
	mov	$_rsp,%rsi
.cfi_def_cfa	%rsi,8
	vmovdqu	$iv,($ivp)		# output IV
	vzeroall
___
@@ -869,14 +897,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-48(%rsi),%r15
.cfi_restore	%r15
	mov	-40(%rsi),%r14
.cfi_restore	%r14
	mov	-32(%rsi),%r13
.cfi_restore	%r13
	mov	-24(%rsi),%r12
.cfi_restore	%r12
	mov	-16(%rsi),%rbp
.cfi_restore	%rbp
	mov	-8(%rsi),%rbx
.cfi_restore	%rbx
	lea	(%rsi),%rsp
.cfi_def_cfa_register	%rsp
.Lepilogue_avx:
	ret
.cfi_endproc
.size	${func}_avx,.-${func}_avx
___

@@ -933,15 +969,23 @@ $code.=<<___;
.type	${func}_avx2,\@function,6
.align	64
${func}_avx2:
.cfi_startproc
.Lavx2_shortcut:
	mov	`($win64?56:8)`(%rsp),$in0	# load 7th parameter
	mov	%rsp,%rax		# copy %rsp
.cfi_def_cfa_register	%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	sub	\$`2*$SZ*$rounds+8*8+$win64*16*10`,%rsp
	and	\$-256*$SZ,%rsp		# align stack frame
	add	\$`2*$SZ*($rounds-8)`,%rsp
@@ -959,6 +1003,7 @@ ${func}_avx2:
	mov	$ctx,$_ctx
	mov	$in0,$_in0
	mov	%rax,$_rsp
.cfi_cfa_expression	$_rsp,deref,+8
___
$code.=<<___ if ($win64);
	movaps	%xmm6,`$framesz+16*0`(%rsp)
@@ -1189,6 +1234,7 @@ $code.=<<___;
	lea	($Tbl),%rsp
	mov	$_ivp,$ivp
	mov	$_rsp,%rsi
.cfi_def_cfa	%rsi,8
	vmovdqu	$iv,($ivp)		# output IV
	vzeroall
___
@@ -1206,14 +1252,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-48(%rsi),%r15
.cfi_restore	%r15
	mov	-40(%rsi),%r14
.cfi_restore	%r14
	mov	-32(%rsi),%r13
.cfi_restore	%r13
	mov	-24(%rsi),%r12
.cfi_restore	%r12
	mov	-16(%rsi),%rbp
.cfi_restore	%rbp
	mov	-8(%rsi),%rbx
.cfi_restore	%rbx
	lea	(%rsi),%rsp
.cfi_def_cfa_register	%rsp
.Lepilogue_avx2:
	ret
.cfi_endproc
.size	${func}_avx2,.-${func}_avx2
___
}}
+60 −0
Original line number Diff line number Diff line
@@ -1180,6 +1180,7 @@ $code.=<<___;
.type	aesni_ctr32_encrypt_blocks,\@function,5
.align	16
aesni_ctr32_encrypt_blocks:
.cfi_startproc
	cmp	\$1,$len
	jne	.Lctr32_bulk

@@ -1202,7 +1203,9 @@ $code.=<<___;
.align	16
.Lctr32_bulk:
	lea	(%rsp),$key_			# use $key_ as frame pointer
.cfi_def_cfa_register	$key_
	push	%rbp
.cfi_push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
___
@@ -1722,9 +1725,12 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-8($key_),%rbp
.cfi_restore	%rbp
	lea	($key_),%rsp
.cfi_def_cfa_register	%rsp
.Lctr32_epilogue:
	ret
.cfi_endproc
.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
___
}
@@ -1746,8 +1752,11 @@ $code.=<<___;
.type	aesni_xts_encrypt,\@function,6
.align	16
aesni_xts_encrypt:
.cfi_startproc
	lea	(%rsp),%r11			# frame pointer
.cfi_def_cfa_register	%r11
	push	%rbp
.cfi_push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
___
@@ -2212,9 +2221,12 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-8(%r11),%rbp
.cfi_restore	%rbp
	lea	(%r11),%rsp
.cfi_def_cfa_register	%rsp
.Lxts_enc_epilogue:
	ret
.cfi_endproc
.size	aesni_xts_encrypt,.-aesni_xts_encrypt
___

@@ -2223,8 +2235,11 @@ $code.=<<___;
.type	aesni_xts_decrypt,\@function,6
.align	16
aesni_xts_decrypt:
.cfi_startproc
	lea	(%rsp),%r11			# frame pointer
.cfi_def_cfa_register	%r11
	push	%rbp
.cfi_push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
___
@@ -2715,9 +2730,12 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-8(%r11),%rbp
.cfi_restore	%rbp
	lea	(%r11),%rsp
.cfi_def_cfa_register	%rsp
.Lxts_dec_epilogue:
	ret
.cfi_endproc
.size	aesni_xts_decrypt,.-aesni_xts_decrypt
___
}
@@ -2742,12 +2760,18 @@ $code.=<<___;
.type	aesni_ocb_encrypt,\@function,6
.align	32
aesni_ocb_encrypt:
.cfi_startproc
	lea	(%rsp),%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
___
$code.=<<___ if ($win64);
	lea	-0xa0(%rsp),%rsp
@@ -2942,6 +2966,7 @@ $code.=<<___ if (!$win64);
	pxor	%xmm14,%xmm14
	pxor	%xmm15,%xmm15
	lea	0x28(%rsp),%rax
.cfi_def_cfa	%rax,8
___
$code.=<<___ if ($win64);
	movaps	0x00(%rsp),%xmm6
@@ -2969,13 +2994,20 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-40(%rax),%r14
.cfi_restore	%r14
	mov	-32(%rax),%r13
.cfi_restore	%r13
	mov	-24(%rax),%r12
.cfi_restore	%r12
	mov	-16(%rax),%rbp
.cfi_restore	%rbp
	mov	-8(%rax),%rbx
.cfi_restore	%rbx
	lea	(%rax),%rsp
.cfi_def_cfa_register	%rsp
.Locb_enc_epilogue:
	ret
.cfi_endproc
.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt

.type	__ocb_encrypt6,\@abi-omnipotent
@@ -3188,12 +3220,18 @@ __ocb_encrypt1:
.type	aesni_ocb_decrypt,\@function,6
.align	32
aesni_ocb_decrypt:
.cfi_startproc
	lea	(%rsp),%rax
	push	%rbx
.cfi_push	%rbx
	push	%rbp
.cfi_push	%rbp
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
___
$code.=<<___ if ($win64);
	lea	-0xa0(%rsp),%rsp
@@ -3410,6 +3448,7 @@ $code.=<<___ if (!$win64);
	pxor	%xmm14,%xmm14
	pxor	%xmm15,%xmm15
	lea	0x28(%rsp),%rax
.cfi_def_cfa	%rax,8
___
$code.=<<___ if ($win64);
	movaps	0x00(%rsp),%xmm6
@@ -3437,13 +3476,20 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-40(%rax),%r14
.cfi_restore	%r14
	mov	-32(%rax),%r13
.cfi_restore	%r13
	mov	-24(%rax),%r12
.cfi_restore	%r12
	mov	-16(%rax),%rbp
.cfi_restore	%rbp
	mov	-8(%rax),%rbx
.cfi_restore	%rbx
	lea	(%rax),%rsp
.cfi_def_cfa_register	%rsp
.Locb_dec_epilogue:
	ret
.cfi_endproc
.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt

.type	__ocb_decrypt6,\@abi-omnipotent
@@ -3656,6 +3702,7 @@ $code.=<<___;
.type	${PREFIX}_cbc_encrypt,\@function,6
.align	16
${PREFIX}_cbc_encrypt:
.cfi_startproc
	test	$len,$len		# check length
	jz	.Lcbc_ret

@@ -3732,7 +3779,9 @@ $code.=<<___;
.align	16
.Lcbc_decrypt_bulk:
	lea	(%rsp),%r11		# frame pointer
.cfi_def_cfa_register	%r11
	push	%rbp
.cfi_push	%rbp
	sub	\$$frame_size,%rsp
	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
___
@@ -4175,9 +4224,12 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
	mov	-8(%r11),%rbp
.cfi_restore	%rbp
	lea	(%r11),%rsp
.cfi_def_cfa_register	%rsp
.Lcbc_ret:
	ret
.cfi_endproc
.size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
___
} 
@@ -4198,7 +4250,9 @@ $code.=<<___;
.type	${PREFIX}_set_decrypt_key,\@abi-omnipotent
.align	16
${PREFIX}_set_decrypt_key:
.cfi_startproc
	.byte	0x48,0x83,0xEC,0x08	# sub rsp,8
.cfi_adjust_cfa_offset	8
	call	__aesni_set_encrypt_key
	shl	\$4,$bits		# rounds-1 after _aesni_set_encrypt_key
	test	%eax,%eax
@@ -4231,7 +4285,9 @@ ${PREFIX}_set_decrypt_key:
	pxor	%xmm0,%xmm0
.Ldec_key_ret:
	add	\$8,%rsp
.cfi_adjust_cfa_offset	-8
	ret
.cfi_endproc
.LSEH_end_set_decrypt_key:
.size	${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
___
@@ -4267,7 +4323,9 @@ $code.=<<___;
.align	16
${PREFIX}_set_encrypt_key:
__aesni_set_encrypt_key:
.cfi_startproc
	.byte	0x48,0x83,0xEC,0x08	# sub rsp,8
.cfi_adjust_cfa_offset	8
	mov	\$-1,%rax
	test	$inp,$inp
	jz	.Lenc_key_ret
@@ -4560,7 +4618,9 @@ __aesni_set_encrypt_key:
	pxor	%xmm4,%xmm4
	pxor	%xmm5,%xmm5
	add	\$8,%rsp
.cfi_adjust_cfa_offset	-8
	ret
.cfi_endproc
.LSEH_end_set_encrypt_key:

.align	16
Loading