Loading crypto/x86_64cpuid.pl +31 −3 Original line number Diff line number Diff line Loading @@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid: xor %eax,%eax cpuid mov %eax,%r11d # max value for standard query level xor %eax,%eax cmp \$0x756e6547,%ebx # "Genu" setne %al Loading @@ -60,7 +62,6 @@ OPENSSL_ia32_cpuid: cmp \$0x6c65746e,%ecx # "ntel" setne %al or %eax,%r9d # 0 indicates Intel CPU mov \$1,%r10d # "number of [AMD] cores" jz .Lintel cmp \$0x68747541,%ebx # "Auth" Loading @@ -74,10 +75,10 @@ OPENSSL_ia32_cpuid: or %eax,%r10d # 0 indicates AMD CPU jnz .Lintel # AMD specific mov \$0x80000000,%eax cpuid cmp \$0x80000008,%eax mov \$1,%r10d # "number of [AMD] cores" jb .Lintel mov \$0x80000008,%eax Loading @@ -85,7 +86,29 @@ OPENSSL_ia32_cpuid: movzb %cl,%r10 # number of cores - 1 inc %r10 # number of cores mov \$1,%eax cpuid bt \$28,%edx # test hyper-threading bit jnc .Ldone shr \$16,%ebx # number of logical processors cmp %r10b,%bl ja .Ldone and \$0xefffffff,%edx # ~(1<<28) jmp .Ldone .Lintel: cmp \$4,%r11d mov \$-1,%r10d jb .Lnocacheinfo mov \$4,%eax mov \$0,%ecx # query L1D cpuid mov %eax,%r10d shr \$14,%r10d and \$0xfff,%r10d # number of cores -1 per L1D .Lnocacheinfo: mov \$1,%eax cpuid cmp \$0,%r9d Loading @@ -98,8 +121,13 @@ OPENSSL_ia32_cpuid: .Lnotintel: bt \$28,%edx # test hyper-threading bit jnc .Ldone and \$0xefffffff,%edx # ~(1<<28) cmp \$0,%r10d je .Ldone or \$0x10000000,%edx # 1<<28 shr \$16,%ebx cmp %r10b,%bl # see if cache is shared cmp \$1,%bl # see if cache is shared ja .Ldone and \$0xefffffff,%edx # ~(1<<28) .Ldone: Loading crypto/x86cpuid.pl +32 −6 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &jnc (&label("done")); &xor ("eax","eax"); &cpuid (); &mov ("edi","eax"); # max value for standard query level &xor ("eax","eax"); &cmp ("ebx",0x756e6547); # "Genu" &setne (&LB("eax")); Loading @@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &cmp ("ecx",0x6c65746e); # "ntel" &setne (&LB("eax")); &or ("ebp","eax"); # 0 indicates Intel CPU &mov ("esi",1); # "number of [AMD] cores" &jz (&label("intel")); &cmp ("ebx",0x68747541); # "Auth" Loading @@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &or ("esi","eax"); # 0 indicates AMD CPU &jnz (&label("intel")); # AMD specific &mov ("eax",0x80000000); &cpuid (); &cmp ("eax",0x80000008); &mov ("esi",1); # "number of [AMD] cores" &jb (&label("intel")); &mov ("eax",0x80000008); Loading @@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &movz ("esi",&LB("ecx")); # number of cores - 1 &inc ("esi"); # number of cores &mov ("eax",1); &cpuid (); &bt ("edx",28); &jnc (&label("done")); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); &ja (&label("done")); &and ("edx",0xefffffff); # clear hyper-threading bit &jmp (&label("done")); &set_label("intel"); &cmp ("edi",4); &mov ("edi",-1); &jb (&label("nocacheinfo")); &mov ("eax",4); &mov ("ecx",0); # query L1D &cpuid (); &mov ("edi","eax"); &shr ("edi",14); &and ("edi",0xfff); # number of cores -1 per L1D &set_label("nocacheinfo"); &mov ("eax",1); &cpuid (); &cmp ("ebp",0); Loading @@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &set_label("notP4"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("done")); &and ("edx",0xefffffff); &cmp ("edi",0); &je (&label("done")); &or ("edx",0x10000000); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); # see if cache is shared(*) &cmp (&LB("ebx"),1); &ja (&label("done")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); # (*) on Core2 this value is set to 2 denoting the fact that L2 # cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); Loading Loading
crypto/x86_64cpuid.pl +31 −3 Original line number Diff line number Diff line Loading @@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid: xor %eax,%eax cpuid mov %eax,%r11d # max value for standard query level xor %eax,%eax cmp \$0x756e6547,%ebx # "Genu" setne %al Loading @@ -60,7 +62,6 @@ OPENSSL_ia32_cpuid: cmp \$0x6c65746e,%ecx # "ntel" setne %al or %eax,%r9d # 0 indicates Intel CPU mov \$1,%r10d # "number of [AMD] cores" jz .Lintel cmp \$0x68747541,%ebx # "Auth" Loading @@ -74,10 +75,10 @@ OPENSSL_ia32_cpuid: or %eax,%r10d # 0 indicates AMD CPU jnz .Lintel # AMD specific mov \$0x80000000,%eax cpuid cmp \$0x80000008,%eax mov \$1,%r10d # "number of [AMD] cores" jb .Lintel mov \$0x80000008,%eax Loading @@ -85,7 +86,29 @@ OPENSSL_ia32_cpuid: movzb %cl,%r10 # number of cores - 1 inc %r10 # number of cores mov \$1,%eax cpuid bt \$28,%edx # test hyper-threading bit jnc .Ldone shr \$16,%ebx # number of logical processors cmp %r10b,%bl ja .Ldone and \$0xefffffff,%edx # ~(1<<28) jmp .Ldone .Lintel: cmp \$4,%r11d mov \$-1,%r10d jb .Lnocacheinfo mov \$4,%eax mov \$0,%ecx # query L1D cpuid mov %eax,%r10d shr \$14,%r10d and \$0xfff,%r10d # number of cores -1 per L1D .Lnocacheinfo: mov \$1,%eax cpuid cmp \$0,%r9d Loading @@ -98,8 +121,13 @@ OPENSSL_ia32_cpuid: .Lnotintel: bt \$28,%edx # test hyper-threading bit jnc .Ldone and \$0xefffffff,%edx # ~(1<<28) cmp \$0,%r10d je .Ldone or \$0x10000000,%edx # 1<<28 shr \$16,%ebx cmp %r10b,%bl # see if cache is shared cmp \$1,%bl # see if cache is shared ja .Ldone and \$0xefffffff,%edx # ~(1<<28) .Ldone: Loading
crypto/x86cpuid.pl +32 −6 Original line number Diff line number Diff line Loading @@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &jnc (&label("done")); &xor ("eax","eax"); &cpuid (); &mov ("edi","eax"); # max value for standard query level &xor ("eax","eax"); &cmp ("ebx",0x756e6547); # "Genu" &setne (&LB("eax")); Loading @@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &cmp ("ecx",0x6c65746e); # "ntel" &setne (&LB("eax")); &or ("ebp","eax"); # 0 indicates Intel CPU &mov ("esi",1); # "number of [AMD] cores" &jz (&label("intel")); &cmp ("ebx",0x68747541); # "Auth" Loading @@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &or ("esi","eax"); # 0 indicates AMD CPU &jnz (&label("intel")); # AMD specific &mov ("eax",0x80000000); &cpuid (); &cmp ("eax",0x80000008); &mov ("esi",1); # "number of [AMD] cores" &jb (&label("intel")); &mov ("eax",0x80000008); Loading @@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &movz ("esi",&LB("ecx")); # number of cores - 1 &inc ("esi"); # number of cores &mov ("eax",1); &cpuid (); &bt ("edx",28); &jnc (&label("done")); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); &ja (&label("done")); &and ("edx",0xefffffff); # clear hyper-threading bit &jmp (&label("done")); &set_label("intel"); &cmp ("edi",4); &mov ("edi",-1); &jb (&label("nocacheinfo")); &mov ("eax",4); &mov ("ecx",0); # query L1D &cpuid (); &mov ("edi","eax"); &shr ("edi",14); &and ("edi",0xfff); # number of cores -1 per L1D &set_label("nocacheinfo"); &mov ("eax",1); &cpuid (); &cmp ("ebp",0); Loading @@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &set_label("notP4"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("done")); &and ("edx",0xefffffff); &cmp ("edi",0); &je (&label("done")); &or ("edx",0x10000000); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); # see if cache is shared(*) &cmp (&LB("ebx"),1); &ja (&label("done")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); # (*) on Core2 this value is set to 2 denoting the fact that L2 # cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); Loading