Commit d3e32630 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

ec/asm/x25519-x86_64.pl: add CFI directives and Windows SE handler.

parent dfd5fb09
Loading
Loading
Loading
Loading
+296 −1
Original line number Diff line number Diff line
@@ -102,13 +102,22 @@ $code.=<<___;
.type	x25519_fe51_mul,\@function,3
.align	32
x25519_fe51_mul:
.cfi_startproc
	push	%rbp
.cfi_push	%rbp
	push	%rbx
.cfi_push	%rbx
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	lea	-8*5(%rsp),%rsp
.cfi_adjust_cfa_offset	40
.Lfe51_mul_body:

	mov	8*0(%rsi),%rax		# f[0]
	mov	8*0(%rdx),%r11		# load g[0-4]
@@ -236,19 +245,30 @@ x25519_fe51_mul:

	mov	8*4(%rsp),%rdi		# restore 1st argument
	jmp	.Lreduce51
.Lfe51_mul_epilogue:
.cfi_endproc
.size	x25519_fe51_mul,.-x25519_fe51_mul

.globl	x25519_fe51_sqr
.type	x25519_fe51_sqr,\@function,2
.align	32
x25519_fe51_sqr:
.cfi_startproc
	push	%rbp
.cfi_push	%rbp
	push	%rbx
.cfi_push	%rbx
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	lea	-8*5(%rsp),%rsp
.cfi_adjust_cfa_offset	40
.Lfe51_sqr_body:

	mov	8*0(%rsi),%rax		# g[0]
	mov	8*2(%rsi),%r15		# g[2]
@@ -391,27 +411,45 @@ x25519_fe51_sqr:
	mov	%r10,8*4(%rdi)

	mov	8*5(%rsp),%r15
.cfi_restore	%r15
	mov	8*6(%rsp),%r14
.cfi_restore	%r14
	mov	8*7(%rsp),%r13
.cfi_restore	%r13
	mov	8*8(%rsp),%r12
.cfi_restore	%r12
	mov	8*9(%rsp),%rbx
.cfi_restore	%rbx
	mov	8*10(%rsp),%rbp
.cfi_restore	%rbp
	lea	8*11(%rsp),%rsp
.cfi_adjust_cfa_offset	88
.Lfe51_sqr_epilogue:
	ret
.cfi_endproc
.size	x25519_fe51_sqr,.-x25519_fe51_sqr

.globl	x25519_fe51_mul121666
.type	x25519_fe51_mul121666,\@function,2
.align	32
x25519_fe51_mul121666:
.cfi_startproc
	push	%rbp
.cfi_push	%rbp
	push	%rbx
.cfi_push	%rbx
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
	mov	\$121666,%eax
.cfi_push	%r15
	lea	-8*5(%rsp),%rsp
.cfi_adjust_cfa_offset	40
.Lfe51_mul121666_body:
	mov	\$121666,%eax

	mulq	8*0(%rsi)
	mov	%rax,%rbx		# %rbx:%rcx = h0
@@ -434,6 +472,8 @@ x25519_fe51_mul121666:
	mov	%rdx,%r15

	jmp	.Lreduce51
.Lfe51_mul121666_epilogue:
.cfi_endproc
.size	x25519_fe51_mul121666,.-x25519_fe51_mul121666
___
########################################################################
@@ -460,14 +500,24 @@ x25519_fe64_eligible:
.type	x25519_fe64_mul,\@function,3
.align	32
x25519_fe64_mul:
.cfi_startproc
	push	%rbp
.cfi_push	%rbp
	push	%rbx
.cfi_push	%rbx
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	push	%rdi			# offload dst
.cfi_push	%rdi
	lea	-8*2(%rsp),%rsp
.cfi_adjust_cfa_offset	16
.Lfe64_mul_body:

	mov	%rdx,%rax
	mov	8*0(%rdx),%rbp		# b[0]
@@ -534,20 +584,32 @@ x25519_fe64_mul:
	adox	%rdi,$acc7		# of=0

	jmp	.Lreduce64
.Lfe64_mul_epilogue:
.cfi_endproc
.size	x25519_fe64_mul,.-x25519_fe64_mul

.globl	x25519_fe64_sqr
.type	x25519_fe64_sqr,\@function,2
.align	32
x25519_fe64_sqr:
.cfi_startproc
	push	%rbp
.cfi_push	%rbp
	push	%rbx
.cfi_push	%rbx
	push	%r12
.cfi_push	%r12
	push	%r13
.cfi_push	%r13
	push	%r14
.cfi_push	%r14
	push	%r15
.cfi_push	%r15
	push	%rdi			# offload dst
.cfi_push	%rdi
	lea	-8*2(%rsp),%rsp
.cfi_adjust_cfa_offset	16
.Lfe64_sqr_body:

	mov	8*0(%rsi),%rdx		# a[0]
	mov	8*1(%rsi),%rcx		# a[1]
@@ -637,19 +699,29 @@ x25519_fe64_sqr:
	mov	$acc0,8*0(%rdi)

	mov	8*3(%rsp),%r15
.cfi_restore	%r15
	mov	8*4(%rsp),%r14
.cfi_restore	%r14
	mov	8*5(%rsp),%r13
.cfi_restore	%r13
	mov	8*6(%rsp),%r12
.cfi_restore	%r12
	mov	8*7(%rsp),%rbx
.cfi_restore	%rbx
	mov	8*8(%rsp),%rbp
.cfi_restore	%rbp
	lea	8*9(%rsp),%rsp
.cfi_adjust_cfa_offset	88
.Lfe64_sqr_epilogue:
	ret
.cfi_endproc
.size	x25519_fe64_sqr,.-x25519_fe64_sqr

.globl	x25519_fe64_mul121666
.type	x25519_fe64_mul121666,\@function,2
.align	32
x25519_fe64_mul121666:
.Lfe64_mul121666_body:
	mov	\$121666,%edx
	mulx	8*0(%rsi),$acc0,%rcx
	mulx	8*1(%rsi),$acc1,%rax
@@ -676,6 +748,7 @@ x25519_fe64_mul121666:
	mov	$acc3,8*3(%rdi)
	mov	$acc0,8*0(%rdi)

.Lfe64_mul121666_epilogue:
	ret
.size	x25519_fe64_mul121666,.-x25519_fe64_mul121666

@@ -683,6 +756,7 @@ x25519_fe64_mul121666:
.type	x25519_fe64_add,\@function,3
.align	32
x25519_fe64_add:
.Lfe64_add_body:
	mov	8*0(%rsi),$acc0
	mov	8*1(%rsi),$acc1
	mov	8*2(%rsi),$acc2
@@ -709,6 +783,7 @@ x25519_fe64_add:
	add	%rax,$acc0
	mov	$acc0,8*0(%rdi)

.Lfe64_add_epilogue:
	ret
.size	x25519_fe64_add,.-x25519_fe64_add

@@ -716,6 +791,7 @@ x25519_fe64_add:
.type	x25519_fe64_sub,\@function,3
.align	32
x25519_fe64_sub:
.Lfe64_sub_body:
	mov	8*0(%rsi),$acc0
	mov	8*1(%rsi),$acc1
	mov	8*2(%rsi),$acc2
@@ -742,6 +818,7 @@ x25519_fe64_sub:
	sub	%rax,$acc0
	mov	$acc0,8*0(%rdi)

.Lfe64_sub_epilogue:
	ret
.size	x25519_fe64_sub,.-x25519_fe64_sub

@@ -749,6 +826,7 @@ x25519_fe64_sub:
.type	x25519_fe64_tobytes,\@function,2
.align	32
x25519_fe64_tobytes:
.Lfe64_to_body:
	mov	8*0(%rsi),$acc0
	mov	8*1(%rsi),$acc1
	mov	8*2(%rsi),$acc2
@@ -782,6 +860,7 @@ x25519_fe64_tobytes:
	mov	$acc2,8*2(%rdi)
	mov	$acc3,8*3(%rdi)

.Lfe64_to_epilogue:
	ret
.size	x25519_fe64_tobytes,.-x25519_fe64_tobytes
___
@@ -817,6 +896,222 @@ $code.=<<___;
.asciz	"X25519 primitives for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
___

# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
if ($win64) {
$rec="%rcx";
$frame="%rdx";
$context="%r8";
$disp="%r9";

$code.=<<___;
.extern	__imp_RtlVirtualUnwind

.type	short_handler,\@abi-omnipotent
.align	16
short_handler:
	push	%rsi
	push	%rdi
	push	%rbx
	push	%rbp
	push	%r12
	push	%r13
	push	%r14
	push	%r15
	pushfq
	sub	\$64,%rsp

	mov	120($context),%rax	# pull context->Rax
	mov	248($context),%rbx	# pull context->Rip

	mov	8($disp),%rsi		# disp->ImageBase
	mov	56($disp),%r11		# disp->HandlerData

	mov	0(%r11),%r10d		# HandlerData[0]
	lea	(%rsi,%r10),%r10	# end of prologue label
	cmp	%r10,%rbx		# context->Rip<end of prologue label
	jb	.Lcommon_seh_tail

	mov	152($context),%rax	# pull context->Rsp
	jmp	.Lcommon_seh_tail
.size	short_handler,.-short_handler

.type	full_handler,\@abi-omnipotent
.align	16
full_handler:
	push	%rsi
	push	%rdi
	push	%rbx
	push	%rbp
	push	%r12
	push	%r13
	push	%r14
	push	%r15
	pushfq
	sub	\$64,%rsp

	mov	120($context),%rax	# pull context->Rax
	mov	248($context),%rbx	# pull context->Rip

	mov	8($disp),%rsi		# disp->ImageBase
	mov	56($disp),%r11		# disp->HandlerData

	mov	0(%r11),%r10d		# HandlerData[0]
	lea	(%rsi,%r10),%r10	# end of prologue label
	cmp	%r10,%rbx		# context->Rip<end of prologue label
	jb	.Lcommon_seh_tail

	mov	152($context),%rax	# pull context->Rsp

	mov	4(%r11),%r10d		# HandlerData[1]
	lea	(%rsi,%r10),%r10	# epilogue label
	cmp	%r10,%rbx		# context->Rip>=epilogue label
	jae	.Lcommon_seh_tail

	mov	8(%r11),%r10d		# HandlerData[2]
	lea	(%rax,%r10),%rax

	mov	-8(%rax),%rbp
	mov	-16(%rax),%rbx
	mov	-24(%rax),%r12
	mov	-32(%rax),%r13
	mov	-40(%rax),%r14
	mov	-48(%rax),%r15
	mov	%rbx,144($context)	# restore context->Rbx
	mov	%rbp,160($context)	# restore context->Rbp
	mov	%r12,216($context)	# restore context->R12
	mov	%r13,224($context)	# restore context->R13
	mov	%r14,232($context)	# restore context->R14
	mov	%r15,240($context)	# restore context->R15

.Lcommon_seh_tail:
	mov	8(%rax),%rdi
	mov	16(%rax),%rsi
	mov	%rax,152($context)	# restore context->Rsp
	mov	%rsi,168($context)	# restore context->Rsi
	mov	%rdi,176($context)	# restore context->Rdi

	mov	40($disp),%rdi		# disp->ContextRecord
	mov	$context,%rsi		# context
	mov	\$154,%ecx		# sizeof(CONTEXT)
	.long	0xa548f3fc		# cld; rep movsq

	mov	$disp,%rsi
	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
	mov	0(%rsi),%r8		# arg3, disp->ControlPc
	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
	mov	40(%rsi),%r10		# disp->ContextRecord
	lea	56(%rsi),%r11		# &disp->HandlerData
	lea	24(%rsi),%r12		# &disp->EstablisherFrame
	mov	%r10,32(%rsp)		# arg5
	mov	%r11,40(%rsp)		# arg6
	mov	%r12,48(%rsp)		# arg7
	mov	%rcx,56(%rsp)		# arg8, (NULL)
	call	*__imp_RtlVirtualUnwind(%rip)

	mov	\$1,%eax		# ExceptionContinueSearch
	add	\$64,%rsp
	popfq
	pop	%r15
	pop	%r14
	pop	%r13
	pop	%r12
	pop	%rbp
	pop	%rbx
	pop	%rdi
	pop	%rsi
	ret
.size	full_handler,.-full_handler

.section	.pdata
.align	4
	.rva	.LSEH_begin_x25519_fe51_mul
	.rva	.LSEH_end_x25519_fe51_mul
	.rva	.LSEH_info_x25519_fe51_mul

	.rva	.LSEH_begin_x25519_fe51_sqr
	.rva	.LSEH_end_x25519_fe51_sqr
	.rva	.LSEH_info_x25519_fe51_sqr

	.rva	.LSEH_begin_x25519_fe51_mul121666
	.rva	.LSEH_end_x25519_fe51_mul121666
	.rva	.LSEH_info_x25519_fe51_mul121666
___
$code.=<<___	if ($addx);
	.rva	.LSEH_begin_x25519_fe64_mul
	.rva	.LSEH_end_x25519_fe64_mul
	.rva	.LSEH_info_x25519_fe64_mul

	.rva	.LSEH_begin_x25519_fe64_sqr
	.rva	.LSEH_end_x25519_fe64_sqr
	.rva	.LSEH_info_x25519_fe64_sqr

	.rva	.LSEH_begin_x25519_fe64_mul121666
	.rva	.LSEH_end_x25519_fe64_mul121666
	.rva	.LSEH_info_x25519_fe64_mul121666

	.rva	.LSEH_begin_x25519_fe64_add
	.rva	.LSEH_end_x25519_fe64_add
	.rva	.LSEH_info_x25519_fe64_add

	.rva	.LSEH_begin_x25519_fe64_sub
	.rva	.LSEH_end_x25519_fe64_sub
	.rva	.LSEH_info_x25519_fe64_sub

	.rva	.LSEH_begin_x25519_fe64_tobytes
	.rva	.LSEH_end_x25519_fe64_tobytes
	.rva	.LSEH_info_x25519_fe64_tobytes
___
$code.=<<___;
.section	.xdata
.align	8
.LSEH_info_x25519_fe51_mul:
	.byte	9,0,0,0
	.rva	full_handler
	.rva	.Lfe51_mul_body,.Lfe51_mul_epilogue	# HandlerData[]
	.long	88,0
.LSEH_info_x25519_fe51_sqr:
	.byte	9,0,0,0
	.rva	full_handler
	.rva	.Lfe51_sqr_body,.Lfe51_sqr_epilogue	# HandlerData[]
	.long	88,0
.LSEH_info_x25519_fe51_mul121666:
	.byte	9,0,0,0
	.rva	full_handler
	.rva	.Lfe51_mul121666_body,.Lfe51_mul121666_epilogue	# HandlerData[]
	.long	88,0
___
$code.=<<___	if ($addx);
.LSEH_info_x25519_fe64_mul:
	.byte	9,0,0,0
	.rva	full_handler
	.rva	.Lfe64_mul_body,.Lfe64_mul_epilogue	# HandlerData[]
	.long	72,0
.LSEH_info_x25519_fe64_sqr:
	.byte	9,0,0,0
	.rva	full_handler
	.rva	.Lfe64_sqr_body,.Lfe64_sqr_epilogue	# HandlerData[]
	.long	72,0
.LSEH_info_x25519_fe64_mul121666:
	.byte	9,0,0,0
	.rva	short_handler
	.rva	.Lfe64_mul121666_body,.Lfe64_mul121666_epilogue	# HandlerData[]
.LSEH_info_x25519_fe64_add:
	.byte	9,0,0,0
	.rva	short_handler
	.rva	.Lfe64_add_body,.Lfe64_add_epilogue	# HandlerData[]
.LSEH_info_x25519_fe64_sub:
	.byte	9,0,0,0
	.rva	short_handler
	.rva	.Lfe64_sub_body,.Lfe64_sub_epilogue	# HandlerData[]
.LSEH_info_x25519_fe64_tobytes:
	.byte	9,0,0,0
	.rva	short_handler
	.rva	.Lfe64_to_body,.Lfe64_to_epilogue	# HandlerData[]
___
}

$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;