Loading crypto/x86_64cpuid.pl +41 −16 Original line number Diff line number Diff line Loading @@ -47,7 +47,7 @@ OPENSSL_rdtsc: .type OPENSSL_ia32_cpuid,\@abi-omnipotent .align 16 OPENSSL_ia32_cpuid: mov %rbx,%r8 mov %rbx,%r8 # save %rbx xor %eax,%eax cpuid Loading Loading @@ -79,7 +79,15 @@ OPENSSL_ia32_cpuid: # AMD specific mov \$0x80000000,%eax cpuid cmp \$0x80000008,%eax cmp \$0x80000001,%eax jb .Lintel mov %eax,%r10d mov \$0x80000001,%eax cpuid or %ecx,%r9d and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 cmp \$0x80000008,%r10d jb .Lintel mov \$0x80000008,%eax Loading @@ -90,12 +98,12 @@ OPENSSL_ia32_cpuid: mov \$1,%eax cpuid bt \$28,%edx # test hyper-threading bit jnc .Ldone jnc .Lgeneric shr \$16,%ebx # number of logical processors cmp %r10b,%bl ja .Ldone ja .Lgeneric and \$0xefffffff,%edx # ~(1<<28) jmp .Ldone jmp .Lgeneric .Lintel: cmp \$4,%r11d Loading @@ -121,21 +129,38 @@ OPENSSL_ia32_cpuid: or \$0x40000000,%edx # use reserved bit to skip unrolled loop .Lnotintel: bt \$28,%edx # test hyper-threading bit jnc .Ldone jnc .Lgeneric and \$0xefffffff,%edx # ~(1<<28) cmp \$0,%r10d je .Ldone je .Lgeneric or \$0x10000000,%edx # 1<<28 shr \$16,%ebx cmp \$1,%bl # see if cache is shared ja .Ldone ja .Lgeneric and \$0xefffffff,%edx # ~(1<<28) .Ldone: .Lgeneric: and \$0x00000800,%r9d # isolate AMD XOP flag and \$0xfffff7ff,%ecx or %r9d,%ecx # merge AMD XOP flag shl \$32,%rcx mov %edx,%eax mov %r8,%rbx or %rcx,%rax mov %edx,%ebx or %rcx,%rbx # compose capability vector in %rbx bt \$27+32,%rcx # check OSXSAVE bit jnc .Lclear_avx xor %ecx,%ecx # XCR0 .byte 0x0f,0x01,0xd0 # xgetbv and \$6,%eax # isolate XMM and YMM state support cmp \$6,%eax je .Ldone .Lclear_avx: mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) shl \$32,%rax and %rax,%rbx # clear AVX, FMA and AMD XOP bits .Ldone: mov %rbx,%rax mov %r8,%rbx # restore %rbx ret .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid Loading Loading @@ -250,7 +275,7 @@ OPENSSL_instrument_bus: mov %eax,$lasttick # lasttick = tick mov \$0,$lastdiff # lastdiff = 0 clflush ($out) lock .byte 0xf0 # lock add $lastdiff,($out) jmp .Loop .align 16 Loading @@ -260,7 +285,7 @@ OPENSSL_instrument_bus: mov %edx,$lasttick mov %eax,$lastdiff clflush ($out) lock .byte 0xf0 # lock add %eax,($out) lea 4($out),$out sub \$1,$cnt Loading @@ -284,7 +309,7 @@ OPENSSL_instrument_bus2: mov \$0,$lastdiff # lastdiff = 0 clflush ($out) lock .byte 0xf0 # lock add $lastdiff,($out) rdtsc # collect 1st diff Loading @@ -294,7 +319,7 @@ OPENSSL_instrument_bus2: mov %eax,$lastdiff # lastdiff = diff .Loop2: clflush ($out) lock .byte 0xf0 # lock add %eax,($out) # accumulate diff sub \$1,$max Loading crypto/x86cpuid.pl +47 −16 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &pop ("eax"); &xor ("ecx","eax"); &bt ("ecx",21); &jnc (&label("done")); &jnc (&label("generic")); &xor ("eax","eax"); &cpuid (); &mov ("edi","eax"); # max value for standard query level Loading Loading @@ -51,7 +51,14 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } # AMD specific &mov ("eax",0x80000000); &cpuid (); &cmp ("eax",0x80000008); &cmp ("eax",0x80000001); &jb (&label("intel")); &mov ("esi","eax"); &mov ("eax",0x80000001); &cpuid (); &or ("ebp","ecx"); &and ("ebp",1<<11|1); # isolate XOP bit &cmp ("esi",0x80000008); &jb (&label("intel")); &mov ("eax",0x80000008); Loading @@ -62,13 +69,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &mov ("eax",1); &cpuid (); &bt ("edx",28); &jnc (&label("done")); &jnc (&label("generic")); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); &ja (&label("done")); &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit &jmp (&label("done")); &jmp (&label("generic")); &set_label("intel"); &cmp ("edi",4); Loading @@ -93,19 +100,42 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR &set_label("notP4"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("done")); &jnc (&label("generic")); &and ("edx",0xefffffff); &cmp ("edi",0); &je (&label("done")); &je (&label("generic")); &or ("edx",0x10000000); &shr ("ebx",16); &cmp (&LB("ebx"),1); &ja (&label("done")); &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("generic"); &and ("ebp",1<<11); # isolate AMD XOP flag &and ("ecx",~(1<<11)); &mov ("esi","edx"); &or ("ebp","ecx"); # merge AMD XOP flag &bt ("ecx",26); # check XSAVE bit &jnc (&label("done")); &bt ("ecx",27); # check OSXSAVE bit &jnc (&label("clear_xmm")); &xor ("ecx","ecx"); &data_byte(0x0f,0x01,0xd0); # xgetbv &and ("eax",6); &cmp ("eax",6); &je (&label("done")); &cmp ("eax",2); &je (&label("clear_avx")); &set_label("clear_xmm"); &and ("ebp",~(1<<25|1<<1)); # clear AESNI and PCLMULQDQ bits &and ("esi",~(1<<24)); # clear FXSR &set_label("clear_avx"); &and ("ebp",~(1<<28|1<<12|1<<11));# clear AVX, FMA and AMD XOP bits &set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &mov ("eax","esi"); &mov ("edx","ebp"); &function_end("OPENSSL_ia32_cpuid"); &external_label("OPENSSL_ia32cap_P"); Loading Loading @@ -199,8 +229,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &bt (&DWP(0,"ecx"),1); &jnc (&label("no_x87")); if ($sse2) { &bt (&DWP(0,"ecx"),26); &jnc (&label("no_sse2")); &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits &cmp ("ecx",1<<26|1<<24); &jne (&label("no_sse2")); &pxor ("xmm0","xmm0"); &pxor ("xmm1","xmm1"); &pxor ("xmm2","xmm2"); Loading Loading @@ -331,7 +362,7 @@ my $max = "ebp"; &mov ($lasttick,"eax"); # lasttick = tick &mov ($lastdiff,0); # lastdiff = 0 &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),$lastdiff); &jmp (&label("loop")); Loading @@ -342,7 +373,7 @@ my $max = "ebp"; &mov ($lasttick,"edx"); # lasttick = tick &mov ($lastdiff,"eax"); # lastdiff = diff &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),"eax"); # accumulate diff &lea ($out,&DWP(4,$out)); # ++$out &sub ($cnt,1); # --$cnt Loading Loading @@ -371,7 +402,7 @@ my $max = "ebp"; &mov ($lastdiff,0); # lastdiff = 0 &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),$lastdiff); &rdtsc (); # collect 1st diff Loading @@ -383,7 +414,7 @@ my $max = "ebp"; &set_label("loop2",16); &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),"eax"); # accumulate diff &sub ($max,1); Loading doc/crypto/OPENSSL_ia32cap.pod +45 −24 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ =head1 NAME OPENSSL_ia32cap - finding the IA-32 processor capabilities OPENSSL_ia32cap - the IA-32 processor capabilities vector =head1 SYNOPSIS Loading @@ -18,30 +18,52 @@ input value (see Intel Application Note #241618). Naturally it's meaningful on x86 and x86_64 platforms only. The variable is normally set up automatically upon toolkit initialization, but can be manipulated afterwards to modify crypto library behaviour. For the moment of this writing seven bits are significant, namely: moment of this writing following bits are significant: 1. bit #4 denoting presence of Time-Stamp Counter. 2. bit #20, reserved by Intel, is used to choose among RC4 code paths; 3. bit #23 denoting MMX support; 4. bit #25 denoting SSE support; 5. bit #26 denoting SSE2 support; 6. bit #28 denoting Hyperthreading, which is used to distiguish =item bit #4 denoting presence of Time-Stamp Counter. =item bit #19 denoting availability of CLFLUSH instruction; =item bit #20, reserved by Intel, is used to choose among RC4 code paths; =item bit #23 denoting MMX support; =item bit #24, FXSR bit, denoting availability of XMM registers; =item bit #25 denoting SSE support; =item bit #26 denoting SSE2 support; =item bit #28 denoting Hyperthreading, which is used to distiguish cores with shared cache; 7. bit #30, reserved by Intel, is used to choose among RC4 code =item bit #30, reserved by Intel, is used to choose among RC4 code paths; 8. bit #57 denoting Intel AES instruction set extension; =item bit #33 denoting availability of PCLMULQDQ instruction; =item bit #41 denoting SSSE3, Supplemental SSE3, support; =item bit #43 denoting AMD XOP support (forced to zero on Intel); =item bit #57 denoting AES-NI instruction set extension; =item bit #59, OSXSAVE bit, denoting availability of YMM registers; =item bit #60 denoting AVX extension; For example, clearing bit #26 at run-time disables high-performance SSE2 code present in the crypto library. You might have to do this if target OpenSSL application is executed on SSE2 capable CPU, but under control of OS which does not support SSE2 extentions. Even though you can manipulate the value programmatically, you most likely will find it more appropriate to set up an environment variable with the same name prior starting target application, e.g. on Intel P4 processor 'env OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect without modifying the application source code. Alternatively you can reconfigure the toolkit with no-sse2 option and recompile. SSE2 code present in the crypto library, while clearing bit #24 disables SSE2 code operating on 128-bit XMM register bank. You might have to do the latter if target OpenSSL application is executed on SSE2 capable CPU, but under control of OS that does not enable XMM registers. Even though you can manipulate the value programmatically, you most likely will find it more appropriate to set up an environment variable with the same name prior starting target application, e.g. on Intel P4 processor 'env OPENSSL_ia32cap=0x16980010 apps/openssl', to achieve same effect without modifying the application source code. Alternatively you can reconfigure the toolkit with no-sse2 option and recompile. Less intuituve is clearing bit #28. The truth is that it's not copied from CPUID output verbatim, but is adjusted to reflect whether or not Loading @@ -49,4 +71,3 @@ the data cache is actually shared between logical cores. This in turn affects the decision on whether or not expensive countermeasures against cache-timing attacks are applied, most notably in AES assembler module. =cut Loading
crypto/x86_64cpuid.pl +41 −16 Original line number Diff line number Diff line Loading @@ -47,7 +47,7 @@ OPENSSL_rdtsc: .type OPENSSL_ia32_cpuid,\@abi-omnipotent .align 16 OPENSSL_ia32_cpuid: mov %rbx,%r8 mov %rbx,%r8 # save %rbx xor %eax,%eax cpuid Loading Loading @@ -79,7 +79,15 @@ OPENSSL_ia32_cpuid: # AMD specific mov \$0x80000000,%eax cpuid cmp \$0x80000008,%eax cmp \$0x80000001,%eax jb .Lintel mov %eax,%r10d mov \$0x80000001,%eax cpuid or %ecx,%r9d and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 cmp \$0x80000008,%r10d jb .Lintel mov \$0x80000008,%eax Loading @@ -90,12 +98,12 @@ OPENSSL_ia32_cpuid: mov \$1,%eax cpuid bt \$28,%edx # test hyper-threading bit jnc .Ldone jnc .Lgeneric shr \$16,%ebx # number of logical processors cmp %r10b,%bl ja .Ldone ja .Lgeneric and \$0xefffffff,%edx # ~(1<<28) jmp .Ldone jmp .Lgeneric .Lintel: cmp \$4,%r11d Loading @@ -121,21 +129,38 @@ OPENSSL_ia32_cpuid: or \$0x40000000,%edx # use reserved bit to skip unrolled loop .Lnotintel: bt \$28,%edx # test hyper-threading bit jnc .Ldone jnc .Lgeneric and \$0xefffffff,%edx # ~(1<<28) cmp \$0,%r10d je .Ldone je .Lgeneric or \$0x10000000,%edx # 1<<28 shr \$16,%ebx cmp \$1,%bl # see if cache is shared ja .Ldone ja .Lgeneric and \$0xefffffff,%edx # ~(1<<28) .Ldone: .Lgeneric: and \$0x00000800,%r9d # isolate AMD XOP flag and \$0xfffff7ff,%ecx or %r9d,%ecx # merge AMD XOP flag shl \$32,%rcx mov %edx,%eax mov %r8,%rbx or %rcx,%rax mov %edx,%ebx or %rcx,%rbx # compose capability vector in %rbx bt \$27+32,%rcx # check OSXSAVE bit jnc .Lclear_avx xor %ecx,%ecx # XCR0 .byte 0x0f,0x01,0xd0 # xgetbv and \$6,%eax # isolate XMM and YMM state support cmp \$6,%eax je .Ldone .Lclear_avx: mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) shl \$32,%rax and %rax,%rbx # clear AVX, FMA and AMD XOP bits .Ldone: mov %rbx,%rax mov %r8,%rbx # restore %rbx ret .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid Loading Loading @@ -250,7 +275,7 @@ OPENSSL_instrument_bus: mov %eax,$lasttick # lasttick = tick mov \$0,$lastdiff # lastdiff = 0 clflush ($out) lock .byte 0xf0 # lock add $lastdiff,($out) jmp .Loop .align 16 Loading @@ -260,7 +285,7 @@ OPENSSL_instrument_bus: mov %edx,$lasttick mov %eax,$lastdiff clflush ($out) lock .byte 0xf0 # lock add %eax,($out) lea 4($out),$out sub \$1,$cnt Loading @@ -284,7 +309,7 @@ OPENSSL_instrument_bus2: mov \$0,$lastdiff # lastdiff = 0 clflush ($out) lock .byte 0xf0 # lock add $lastdiff,($out) rdtsc # collect 1st diff Loading @@ -294,7 +319,7 @@ OPENSSL_instrument_bus2: mov %eax,$lastdiff # lastdiff = diff .Loop2: clflush ($out) lock .byte 0xf0 # lock add %eax,($out) # accumulate diff sub \$1,$max Loading
crypto/x86cpuid.pl +47 −16 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &pop ("eax"); &xor ("ecx","eax"); &bt ("ecx",21); &jnc (&label("done")); &jnc (&label("generic")); &xor ("eax","eax"); &cpuid (); &mov ("edi","eax"); # max value for standard query level Loading Loading @@ -51,7 +51,14 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } # AMD specific &mov ("eax",0x80000000); &cpuid (); &cmp ("eax",0x80000008); &cmp ("eax",0x80000001); &jb (&label("intel")); &mov ("esi","eax"); &mov ("eax",0x80000001); &cpuid (); &or ("ebp","ecx"); &and ("ebp",1<<11|1); # isolate XOP bit &cmp ("esi",0x80000008); &jb (&label("intel")); &mov ("eax",0x80000008); Loading @@ -62,13 +69,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &mov ("eax",1); &cpuid (); &bt ("edx",28); &jnc (&label("done")); &jnc (&label("generic")); &shr ("ebx",16); &and ("ebx",0xff); &cmp ("ebx","esi"); &ja (&label("done")); &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit &jmp (&label("done")); &jmp (&label("generic")); &set_label("intel"); &cmp ("edi",4); Loading @@ -93,19 +100,42 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR &set_label("notP4"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("done")); &jnc (&label("generic")); &and ("edx",0xefffffff); &cmp ("edi",0); &je (&label("done")); &je (&label("generic")); &or ("edx",0x10000000); &shr ("ebx",16); &cmp (&LB("ebx"),1); &ja (&label("done")); &ja (&label("generic")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("generic"); &and ("ebp",1<<11); # isolate AMD XOP flag &and ("ecx",~(1<<11)); &mov ("esi","edx"); &or ("ebp","ecx"); # merge AMD XOP flag &bt ("ecx",26); # check XSAVE bit &jnc (&label("done")); &bt ("ecx",27); # check OSXSAVE bit &jnc (&label("clear_xmm")); &xor ("ecx","ecx"); &data_byte(0x0f,0x01,0xd0); # xgetbv &and ("eax",6); &cmp ("eax",6); &je (&label("done")); &cmp ("eax",2); &je (&label("clear_avx")); &set_label("clear_xmm"); &and ("ebp",~(1<<25|1<<1)); # clear AESNI and PCLMULQDQ bits &and ("esi",~(1<<24)); # clear FXSR &set_label("clear_avx"); &and ("ebp",~(1<<28|1<<12|1<<11));# clear AVX, FMA and AMD XOP bits &set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &mov ("eax","esi"); &mov ("edx","ebp"); &function_end("OPENSSL_ia32_cpuid"); &external_label("OPENSSL_ia32cap_P"); Loading Loading @@ -199,8 +229,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &bt (&DWP(0,"ecx"),1); &jnc (&label("no_x87")); if ($sse2) { &bt (&DWP(0,"ecx"),26); &jnc (&label("no_sse2")); &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits &cmp ("ecx",1<<26|1<<24); &jne (&label("no_sse2")); &pxor ("xmm0","xmm0"); &pxor ("xmm1","xmm1"); &pxor ("xmm2","xmm2"); Loading Loading @@ -331,7 +362,7 @@ my $max = "ebp"; &mov ($lasttick,"eax"); # lasttick = tick &mov ($lastdiff,0); # lastdiff = 0 &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),$lastdiff); &jmp (&label("loop")); Loading @@ -342,7 +373,7 @@ my $max = "ebp"; &mov ($lasttick,"edx"); # lasttick = tick &mov ($lastdiff,"eax"); # lastdiff = diff &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),"eax"); # accumulate diff &lea ($out,&DWP(4,$out)); # ++$out &sub ($cnt,1); # --$cnt Loading Loading @@ -371,7 +402,7 @@ my $max = "ebp"; &mov ($lastdiff,0); # lastdiff = 0 &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),$lastdiff); &rdtsc (); # collect 1st diff Loading @@ -383,7 +414,7 @@ my $max = "ebp"; &set_label("loop2",16); &clflush(&DWP(0,$out)); &lock (); &data_byte(0xf0); # lock &add (&DWP(0,$out),"eax"); # accumulate diff &sub ($max,1); Loading
doc/crypto/OPENSSL_ia32cap.pod +45 −24 Original line number Diff line number Diff line Loading @@ -2,7 +2,7 @@ =head1 NAME OPENSSL_ia32cap - finding the IA-32 processor capabilities OPENSSL_ia32cap - the IA-32 processor capabilities vector =head1 SYNOPSIS Loading @@ -18,30 +18,52 @@ input value (see Intel Application Note #241618). Naturally it's meaningful on x86 and x86_64 platforms only. The variable is normally set up automatically upon toolkit initialization, but can be manipulated afterwards to modify crypto library behaviour. For the moment of this writing seven bits are significant, namely: moment of this writing following bits are significant: 1. bit #4 denoting presence of Time-Stamp Counter. 2. bit #20, reserved by Intel, is used to choose among RC4 code paths; 3. bit #23 denoting MMX support; 4. bit #25 denoting SSE support; 5. bit #26 denoting SSE2 support; 6. bit #28 denoting Hyperthreading, which is used to distiguish =item bit #4 denoting presence of Time-Stamp Counter. =item bit #19 denoting availability of CLFLUSH instruction; =item bit #20, reserved by Intel, is used to choose among RC4 code paths; =item bit #23 denoting MMX support; =item bit #24, FXSR bit, denoting availability of XMM registers; =item bit #25 denoting SSE support; =item bit #26 denoting SSE2 support; =item bit #28 denoting Hyperthreading, which is used to distiguish cores with shared cache; 7. bit #30, reserved by Intel, is used to choose among RC4 code =item bit #30, reserved by Intel, is used to choose among RC4 code paths; 8. bit #57 denoting Intel AES instruction set extension; =item bit #33 denoting availability of PCLMULQDQ instruction; =item bit #41 denoting SSSE3, Supplemental SSE3, support; =item bit #43 denoting AMD XOP support (forced to zero on Intel); =item bit #57 denoting AES-NI instruction set extension; =item bit #59, OSXSAVE bit, denoting availability of YMM registers; =item bit #60 denoting AVX extension; For example, clearing bit #26 at run-time disables high-performance SSE2 code present in the crypto library. You might have to do this if target OpenSSL application is executed on SSE2 capable CPU, but under control of OS which does not support SSE2 extentions. Even though you can manipulate the value programmatically, you most likely will find it more appropriate to set up an environment variable with the same name prior starting target application, e.g. on Intel P4 processor 'env OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect without modifying the application source code. Alternatively you can reconfigure the toolkit with no-sse2 option and recompile. SSE2 code present in the crypto library, while clearing bit #24 disables SSE2 code operating on 128-bit XMM register bank. You might have to do the latter if target OpenSSL application is executed on SSE2 capable CPU, but under control of OS that does not enable XMM registers. Even though you can manipulate the value programmatically, you most likely will find it more appropriate to set up an environment variable with the same name prior starting target application, e.g. on Intel P4 processor 'env OPENSSL_ia32cap=0x16980010 apps/openssl', to achieve same effect without modifying the application source code. Alternatively you can reconfigure the toolkit with no-sse2 option and recompile. Less intuituve is clearing bit #28. The truth is that it's not copied from CPUID output verbatim, but is adjusted to reflect whether or not Loading @@ -49,4 +71,3 @@ the data cache is actually shared between logical cores. This in turn affects the decision on whether or not expensive countermeasures against cache-timing attacks are applied, most notably in AES assembler module. =cut