Commit 761393bb authored by Andy Polyakov's avatar Andy Polyakov
Browse files

x86[_64]cpuid.pl: further refine shared cache detection.

parent 6f71e5ee
Loading
Loading
Loading
Loading
+31 −3
Original line number Diff line number Diff line
@@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid:

	xor	%eax,%eax
	cpuid
	mov	%eax,%r11d		# max value for standard query level

	xor	%eax,%eax
	cmp	\$0x756e6547,%ebx	# "Genu"
	setne	%al
@@ -60,7 +62,6 @@ OPENSSL_ia32_cpuid:
	cmp	\$0x6c65746e,%ecx	# "ntel"
	setne	%al
	or	%eax,%r9d		# 0 indicates Intel CPU
	mov	\$1,%r10d		# "number of [AMD] cores"
	jz	.Lintel

	cmp	\$0x68747541,%ebx	# "Auth"
@@ -74,10 +75,10 @@ OPENSSL_ia32_cpuid:
	or	%eax,%r10d		# 0 indicates AMD CPU
	jnz	.Lintel

	# AMD specific
	mov	\$0x80000000,%eax
	cpuid
	cmp	\$0x80000008,%eax
	mov	\$1,%r10d		# "number of [AMD] cores"
	jb	.Lintel

	mov	\$0x80000008,%eax
@@ -85,7 +86,29 @@ OPENSSL_ia32_cpuid:
	movzb	%cl,%r10		# number of cores - 1
	inc	%r10			# number of cores

	mov	\$1,%eax
	cpuid
	bt	\$28,%edx		# test hyper-threading bit
	jnc	.Ldone
	shr	\$16,%ebx		# number of logical processors
	cmp	%r10b,%bl
	ja	.Ldone
	and	\$0xefffffff,%edx	# ~(1<<28)
	jmp	.Ldone

.Lintel:
	cmp	\$4,%r11d
	mov	\$-1,%r10d
	jb	.Lnocacheinfo

	mov	\$4,%eax
	mov	\$0,%ecx		# query L1D
	cpuid
	mov	%eax,%r10d
	shr	\$14,%r10d
	and	\$0xfff,%r10d		# number of cores -1 per L1D

.Lnocacheinfo:
	mov	\$1,%eax
	cpuid
	cmp	\$0,%r9d
@@ -98,8 +121,13 @@ OPENSSL_ia32_cpuid:
.Lnotintel:
	bt	\$28,%edx		# test hyper-threading bit
	jnc	.Ldone
	and	\$0xefffffff,%edx	# ~(1<<28)
	cmp	\$0,%r10d
	je	.Ldone

	or	\$0x10000000,%edx	# 1<<28
	shr	\$16,%ebx
	cmp	%r10b,%bl		# see if cache is shared
	cmp	\$1,%bl			# see if cache is shared
	ja	.Ldone
	and	\$0xefffffff,%edx	# ~(1<<28)
.Ldone:
+32 −6
Original line number Diff line number Diff line
@@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
	&jnc	(&label("done"));
	&xor	("eax","eax");
	&cpuid	();
	&mov	("edi","eax");		# max value for standard query level

	&xor	("eax","eax");
	&cmp	("ebx",0x756e6547);	# "Genu"
	&setne	(&LB("eax"));
@@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
	&cmp	("ecx",0x6c65746e);	# "ntel"
	&setne	(&LB("eax"));
	&or	("ebp","eax");		# 0 indicates Intel CPU
	&mov	("esi",1);		# "number of [AMD] cores"
	&jz	(&label("intel"));

	&cmp	("ebx",0x68747541);	# "Auth"
@@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
	&or	("esi","eax");		# 0 indicates AMD CPU
	&jnz	(&label("intel"));

	# AMD specific
	&mov	("eax",0x80000000);
	&cpuid	();
	&cmp	("eax",0x80000008);
	&mov	("esi",1);		# "number of [AMD] cores"
	&jb	(&label("intel"));

	&mov	("eax",0x80000008);
@@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
	&movz	("esi",&LB("ecx"));	# number of cores - 1
	&inc	("esi");		# number of cores

	&mov	("eax",1);
	&cpuid	();
	&bt	("edx",28);
	&jnc	(&label("done"));
	&shr	("ebx",16);
	&and	("ebx",0xff);
	&cmp	("ebx","esi");
	&ja	(&label("done"));
	&and	("edx",0xefffffff);	# clear hyper-threading bit
	&jmp	(&label("done"));
	
&set_label("intel");
	&cmp	("edi",4);
	&mov	("edi",-1);
	&jb	(&label("nocacheinfo"));

	&mov	("eax",4);
	&mov	("ecx",0);		# query L1D
	&cpuid	();
	&mov	("edi","eax");
	&shr	("edi",14);
	&and	("edi",0xfff);		# number of cores -1 per L1D

&set_label("nocacheinfo");
	&mov	("eax",1);
	&cpuid	();
	&cmp	("ebp",0);
@@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&set_label("notP4");
	&bt	("edx",28);		# test hyper-threading bit
	&jnc	(&label("done"));
	&and	("edx",0xefffffff);
	&cmp	("edi",0);
	&je	(&label("done"));

	&or	("edx",0x10000000);
	&shr	("ebx",16);
	&and	("ebx",0xff);
	&cmp	("ebx","esi");		# see if cache is shared(*)
	&cmp	(&LB("ebx"),1);
	&ja	(&label("done"));
	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
&set_label("done");
	&mov	("eax","edx");
	&mov	("edx","ecx");
&function_end("OPENSSL_ia32_cpuid");
# (*)	on Core2 this value is set to 2 denoting the fact that L2
#	cache is shared between cores.

&external_label("OPENSSL_ia32cap_P");