Commit b56cb7c6 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

x86*cpuid update [from HEAD].

parent cd5ab329
Loading
Loading
Loading
Loading
+60 −39
Original line number Diff line number Diff line
#!/usr/bin/env perl

$output=shift;
$win64a=1 if ($output =~ /win64a\.[s|asm]/);
$masm=1 if ($output =~ /\.asm/);
open STDOUT,">$output" || die "can't open $output: $!";

print<<___ if(defined($win64a));
print<<___ if(defined($masm));
_TEXT	SEGMENT
PUBLIC	OPENSSL_rdtsc
ALIGN	16
OPENSSL_rdtsc	PROC
	rdtsc
	shl	rdx,32
	or	rax,rdx
	ret
OPENSSL_rdtsc	ENDP

PUBLIC	OPENSSL_atomic_add
ALIGN	16
@@ -45,35 +38,16 @@ OPENSSL_wipe_cpu PROC
	lea	rax,QWORD PTR[rsp+8]
	ret
OPENSSL_wipe_cpu	ENDP

OPENSSL_ia32_cpuid	PROC
	mov	r8,rbx
	mov	eax,1
	cpuid
	shl	rcx,32
	mov	eax,edx
	mov	rbx,r8
	or	rax,rcx
	ret
OPENSSL_ia32_cpuid	ENDP
_TEXT	ENDS

CRT\$XIU	SEGMENT
EXTRN	OPENSSL_cpuid_setup:PROC
DQ	OPENSSL_cpuid_setup
CRT\$XIU	ENDS
END

___
print<<___ if(!defined($win64a));
print<<___ if(!defined($masm));
.text
.globl	OPENSSL_rdtsc
.align	16
OPENSSL_rdtsc:
	rdtsc
	shlq	\$32,%rdx
	orq	%rdx,%rax
	ret
.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc

.globl	OPENSSL_atomic_add
.type	OPENSSL_atomic_add,\@function
@@ -120,19 +94,66 @@ OPENSSL_wipe_cpu:
	ret
.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu

.section	.init
	call	OPENSSL_cpuid_setup

___

open STDOUT,"| $^X perlasm/x86_64-xlate.pl $output";
print<<___;
.text

.globl	OPENSSL_rdtsc
.type	OPENSSL_rdtsc,\@abi-omnipotent
.align	16
OPENSSL_rdtsc:
	rdtsc
	shl	\$32,%rdx
	or	%rdx,%rax
	ret
.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc

.globl	OPENSSL_ia32_cpuid
.type	OPENSSL_ia32_cpuid,\@abi-omnipotent
.align	16
OPENSSL_ia32_cpuid:
	movq	%rbx,%r8
	movl	\$1,%eax
	mov	%rbx,%r8

	xor	%eax,%eax
	cpuid
	xor	%eax,%eax
	cmp	\$0x756e6547,%ebx	# "Genu"
	setne	%al
	mov	%eax,%r9d
	cmp	\$0x49656e69,%edx	# "ineI"
	setne	%al
	or	%eax,%r9d
	cmp	\$0x6c65746e,%ecx	# "ntel"
	setne	%al
	or	%eax,%r9d

	mov	\$1,%eax
	cpuid
	shlq	\$32,%rcx
	movl	%edx,%eax
	movq	%r8,%rbx
	orq	%rcx,%rax
	cmp	\$0,%r9d
	jne	.Lnotintel
	or	\$1<<20,%edx		# use reserved bit to engage RC4_CHAR
	and	\$15,%ah
	cmp	\$15,%ah		# examine Family ID
	je	.Lnotintel
	or	\$1<<30,%edx		# use reserved bit to skip unrolled loop
.Lnotintel:
	bt	\$28,%edx		# test hyper-threading bit
	jnc	.Ldone
	shr	\$16,%ebx
	cmp	\$1,%bl			# see if cache is shared
	ja	.Ldone
	and	\$0xefffffff,%edx	# ~(1<<28)
.Ldone:
	shl	\$32,%rcx
	mov	%edx,%eax
	mov	%r8,%rbx
	or	%rcx,%rax
	ret
.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid

.section	.init
	call	OPENSSL_cpuid_setup
___
close STDOUT;	# flush
+29 −2
Original line number Diff line number Diff line
@@ -19,13 +19,40 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
	&pop	("eax");
	&xor	("ecx","eax");
	&bt	("ecx",21);
	&jnc	(&label("nocpuid"));
	&jnc	(&label("done"));
	&xor	("eax","eax");
	&cpuid	();
	&xor	("eax","eax");
	&cmp	("ebx",0x756e6547);	# "Genu"
	&setne	(&LB("eax"));
	&mov	("ebp","eax");
	&cmp	("edx",0x49656e69);	# "ineI"
	&setne	(&LB("eax"));
	&or	("ebp","eax");
	&cmp	("ecx",0x6c65746e);	# "ntel"
	&setne	(&LB("eax"));
	&or	("ebp","eax");
	&mov	("eax",1);
	&cpuid	();
&set_label("nocpuid");
	&cmp	("ebp",0);
	&jne	(&label("notP4"));
	&and	(&HB("eax"),15);	# familiy ID
	&cmp	(&HB("eax"),15);	# P4?
	&jne	(&label("notP4"));
	&or	("edx",1<<20);		# use reserved bit to engage RC4_CHAR
&set_label("notP4");
	&bt	("edx",28);		# test hyper-threading bit
	&jnc	(&label("done"));
	&shr	("ebx",16);
	&cmp	(&LB("ebx"),1);		# see if cache is shared(*)
	&ja	(&label("done"));
	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
&set_label("done");
	&mov	("eax","edx");
	&mov	("edx","ecx");
&function_end("OPENSSL_ia32_cpuid");
# (*)	on Core2 this value is set to 2 denoting the fact that L2
#	cache is shared between cores.

&external_label("OPENSSL_ia32cap_P");