Loading crypto/sparc_arch.h 0 → 100644 +101 −0 Original line number Diff line number Diff line #ifndef __SPARC_ARCH_H__ #define __SPARC_ARCH_H__ #define SPARCV9_TICK_PRIVILEGED (1<<0) #define SPARCV9_PREFER_FPU (1<<1) #define SPARCV9_VIS1 (1<<2) #define SPARCV9_VIS2 (1<<3) /* reserved */ #define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ #define SPARCV9_BLK (1<<5) /* VIS1 block copy */ #define SPARCV9_VIS3 (1<<6) #define SPARCV9_RANDOM (1<<7) #define SPARCV9_64BIT_STACK (1<<8) /* * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register, * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient... */ #define CFR_AES 0x00000001 /* Supports AES opcodes */ #define CFR_DES 0x00000002 /* Supports DES opcodes */ #define CFR_KASUMI 0x00000004 /* Supports KASUMI opcodes */ #define CFR_CAMELLIA 0x00000008 /* Supports CAMELLIA opcodes*/ #define CFR_MD5 0x00000010 /* Supports MD5 opcodes */ #define CFR_SHA1 0x00000020 /* Supports SHA1 opcodes */ #define CFR_SHA256 0x00000040 /* Supports SHA256 opcodes */ #define CFR_SHA512 0x00000080 /* Supports SHA512 opcodes */ #define CFR_MPMUL 0x00000100 /* Supports MPMUL opcodes */ #define CFR_MONTMUL 0x00000200 /* Supports MONTMUL opcodes */ #define CFR_MONTSQR 0x00000400 /* Supports MONTSQR opcodes */ #define CFR_CRC32C 0x00000800 /* Supports CRC32C opcodes */ #if defined(OPENSSL_PIC) && !defined(__PIC__) # define __PIC__ #endif #if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__) # define __arch64__ #endif #define SPARC_PIC_THUNK(reg) \ .align 32; \ .Lpic_thunk: \ jmp %o7 + 8; \ add %o7, reg, reg; #define SPARC_PIC_THUNK_CALL(reg) \ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ call .Lpic_thunk; \ or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; #if 1 # define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg) #else # define SPARC_SETUP_GOT_REG(reg) \ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ call .+8; \ or reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg; \ add %o7, reg, reg #endif #if defined(__arch64__) # define SPARC_LOAD_ADDRESS(SYM, reg) \ setx SYM, %o7, reg; # define LDPTR ldx # define SIZE_T_CC %xcc # define STACK_FRAME 192 # define STACK_BIAS 2047 # define STACK_7thARG (STACK_BIAS+176) #else # define SPARC_LOAD_ADDRESS(SYM, reg) \ set SYM, reg; # define LDPTR ld # define SIZE_T_CC %icc # define STACK_FRAME 112 # define STACK_BIAS 0 # define STACK_7thARG 92 # define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg) #endif #ifdef __PIC__ # undef SPARC_LOAD_ADDRESS # undef SPARC_LOAD_ADDRESS_LEAF # define SPARC_LOAD_ADDRESS(SYM, reg) \ SPARC_SETUP_GOT_REG(reg); \ sethi %hi(SYM), %o7; \ or %o7, %lo(SYM), %o7; \ LDPTR [reg + %o7], reg; #endif #ifndef SPARC_LOAD_ADDRESS_LEAF # define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \ mov %o7, tmp; \ SPARC_LOAD_ADDRESS(SYM, reg) \ mov tmp, %o7; #endif #endif /* __SPARC_ARCH_H__ */ crypto/sparccpuid.S +127 −0 Original line number Diff line number Diff line Loading @@ -251,6 +251,11 @@ _sparcv9_vis1_probe: ! UltraSPARC IIe 7 ! UltraSPARC III 7 ! UltraSPARC T1 24 ! SPARC T4 65(*) ! ! (*) result has lesser to do with VIS instruction latencies, rdtick ! appears that slow, but it does the trick in sense that FP and ! VIS code paths are still slower than integer-only ones. ! ! Numbers for T2 and SPARC64 V-VII are more than welcomed. ! Loading @@ -260,6 +265,8 @@ _sparcv9_vis1_probe: .global _sparcv9_vis1_instrument .align 8 _sparcv9_vis1_instrument: .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 .word 0x91410000 !rd %tick,%o0 .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 Loading Loading @@ -314,6 +321,30 @@ _sparcv9_fmadd_probe: .type _sparcv9_fmadd_probe,#function .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe .global _sparcv9_rdcfr .align 8 _sparcv9_rdcfr: retl .word 0x91468000 !rd %asr26,%o0 .type _sparcv9_rdcfr,#function .size _sparcv9_rdcfr,.-_sparcv9_rdcfr .global _sparcv9_vis3_probe .align 8 _sparcv9_vis3_probe: retl .word 0x81b022a0 !xmulx %g0,%g0,%g0 .type _sparcv9_vis3_probe,#function .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe .global _sparcv9_random .align 8 _sparcv9_random: retl .word 0x91b002a0 !random %o0 .type _sparcv9_random,#function .size _sparcv9_random,.-_sparcv9_vis3_probe .global OPENSSL_cleanse .align 32 OPENSSL_cleanse: Loading Loading @@ -397,6 +428,102 @@ OPENSSL_cleanse: .type OPENSSL_cleanse,#function .size OPENSSL_cleanse,.-OPENSSL_cleanse .global _sparcv9_vis1_instrument_bus .align 8 _sparcv9_vis1_instrument_bus: mov %o1,%o3 ! save cnt .word 0x99410000 !rd %tick,%o4 ! tick mov %o4,%o5 ! lasttick = tick set 0,%g4 ! diff andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 .Loop: .word 0x99410000 !rd %tick,%o4 sub %o4,%o5,%g4 ! diff=tick-lasttick mov %o4,%o5 ! lasttick=tick andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 subcc %o1,1,%o1 ! --$cnt bnz .Loop add %o0,4,%o0 ! ++$out retl mov %o3,%o0 .type _sparcv9_vis1_instrument_bus,#function .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus .global _sparcv9_vis1_instrument_bus2 .align 8 _sparcv9_vis1_instrument_bus2: mov %o1,%o3 ! save cnt sll %o1,2,%o1 ! cnt*=4 .word 0x99410000 !rd %tick,%o4 ! tick mov %o4,%o5 ! lasttick = tick set 0,%g4 ! diff andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 .word 0x99410000 !rd %tick,%o4 ! tick sub %o4,%o5,%g4 ! diff=tick-lasttick mov %o4,%o5 ! lasttick=tick mov %g4,%g5 ! lastdiff=diff .Loop2: andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 subcc %o2,1,%o2 ! --max bz .Ldone2 nop .word 0x99410000 !rd %tick,%o4 ! tick sub %o4,%o5,%g4 ! diff=tick-lasttick mov %o4,%o5 ! lasttick=tick cmp %g4,%g5 mov %g4,%g5 ! lastdiff=diff .word 0x83408000 !rd %ccr,%g1 and %g1,4,%g1 ! isolate zero flag xor %g1,4,%g1 ! flip zero flag subcc %o1,%g1,%o1 ! conditional --$cnt bnz .Loop2 add %o0,%g1,%o0 ! conditional ++$out .Ldone2: srl %o1,2,%o1 retl sub %o3,%o1,%o0 .type _sparcv9_vis1_instrument_bus2,#function .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 .section ".init",#alloc,#execinstr call OPENSSL_cpuid_setup nop crypto/sparcv9cap.c +88 −23 Original line number Diff line number Diff line Loading @@ -4,15 +4,15 @@ #include <setjmp.h> #include <signal.h> #include <sys/time.h> #include <unistd.h> #include <openssl/bn.h> #define SPARCV9_TICK_PRIVILEGED (1<<0) #define SPARCV9_PREFER_FPU (1<<1) #define SPARCV9_VIS1 (1<<2) #define SPARCV9_VIS2 (1<<3) /* reserved */ #define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ #include "sparc_arch.h" static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED; #if defined(__GNUC__) && defined(__linux) __attribute__((visibility("hidden"))) #endif unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_TICK_PRIVILEGED,0}; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) { Loading @@ -20,7 +20,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); if (num>=8 && !(num&1) && (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); else Loading @@ -32,10 +32,15 @@ void _sparcv9_vis1_probe(void); unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); unsigned long _sparcv9_rdcfr(void); void _sparcv9_vis3_probe(void); unsigned long _sparcv9_random(void); size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t); unsigned long OPENSSL_rdtsc(void) { if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED) if (OPENSSL_sparcv9cap_P[0]&SPARCV9_TICK_PRIVILEGED) #if defined(__sun) && defined(__SVR4) return gethrtime(); #else Loading @@ -45,6 +50,24 @@ unsigned long OPENSSL_rdtsc(void) return _sparcv9_rdtick(); } size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) { if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == SPARCV9_BLK) return _sparcv9_vis1_instrument_bus(out,cnt); else return 0; } size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) { if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == SPARCV9_BLK) return _sparcv9_vis1_instrument_bus2(out,cnt,max); else return 0; } #if 0 && defined(__sun) && defined(__SVR4) /* This code path is disabled, because of incompatibility of * libdevinfo.so.1 and libmalloc.so.1 (see below for details) Loading @@ -69,18 +92,18 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name) if (!strcmp (name,"SUNW,UltraSPARC") || !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */ { OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0') OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } /* This is expected to catch remaining UltraSPARCs, such as T1 */ else if (!strncmp(name,"SUNW,UltraSPARC",15)) { OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } Loading @@ -99,7 +122,7 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); return; } Loading @@ -107,17 +130,17 @@ void OPENSSL_cpuid_setup(void) { if (strcmp(si,"sun4v")) /* FPU is preferred for all CPUs, but US-T1/2 */ OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; } if (sysinfo(SI_ISALIST,si,sizeof(si))>0) { if (strstr(si,"+vis")) OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; if (strstr(si,"+vis2")) { OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return; } } Loading Loading @@ -177,12 +200,14 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); if ((e=strchr(e,':'))) OPENSSL_sparcv9cap_P[1]=strtoul(e+1,NULL,0); return; } /* Initial value, fits UltraSPARC-I&II... */ OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; sigfillset(&all_masked); sigdelset(&all_masked,SIGILL); Loading @@ -205,33 +230,73 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_rdtick(); OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_vis1_probe(); OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; /* detect UltraSPARC-Tx, see sparccpud.S for details... */ if (_sparcv9_vis1_instrument() >= 12) OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); else { _sparcv9_vis2_probe(); OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; } } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_fmadd_probe(); OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; } /* * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, * because VIS3 defines even integer instructions. */ if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_vis3_probe(); OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; } if (sigsetjmp(common_jmp,1) == 0) { (void)_sparcv9_random(); OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; } /* * In wait for better solution _sparcv9_rdcfr is masked by * VIS3 flag, because it goes to uninterruptable endless * loop on UltraSPARC II running Solaris. Things might be * different on Linux... */ if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && sigsetjmp(common_jmp,1) == 0) { OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); } sigaction(SIGBUS,&bus_oact,NULL); sigaction(SIGILL,&ill_oact,NULL); sigprocmask(SIG_SETMASK,&oset,NULL); if (sizeof(size_t)==8) OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; #ifdef __linux else { int ret = syscall(340); if (ret>=0 && ret&1) OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; } #endif } #endif Loading
crypto/sparc_arch.h 0 → 100644 +101 −0 Original line number Diff line number Diff line #ifndef __SPARC_ARCH_H__ #define __SPARC_ARCH_H__ #define SPARCV9_TICK_PRIVILEGED (1<<0) #define SPARCV9_PREFER_FPU (1<<1) #define SPARCV9_VIS1 (1<<2) #define SPARCV9_VIS2 (1<<3) /* reserved */ #define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ #define SPARCV9_BLK (1<<5) /* VIS1 block copy */ #define SPARCV9_VIS3 (1<<6) #define SPARCV9_RANDOM (1<<7) #define SPARCV9_64BIT_STACK (1<<8) /* * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register, * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient... */ #define CFR_AES 0x00000001 /* Supports AES opcodes */ #define CFR_DES 0x00000002 /* Supports DES opcodes */ #define CFR_KASUMI 0x00000004 /* Supports KASUMI opcodes */ #define CFR_CAMELLIA 0x00000008 /* Supports CAMELLIA opcodes*/ #define CFR_MD5 0x00000010 /* Supports MD5 opcodes */ #define CFR_SHA1 0x00000020 /* Supports SHA1 opcodes */ #define CFR_SHA256 0x00000040 /* Supports SHA256 opcodes */ #define CFR_SHA512 0x00000080 /* Supports SHA512 opcodes */ #define CFR_MPMUL 0x00000100 /* Supports MPMUL opcodes */ #define CFR_MONTMUL 0x00000200 /* Supports MONTMUL opcodes */ #define CFR_MONTSQR 0x00000400 /* Supports MONTSQR opcodes */ #define CFR_CRC32C 0x00000800 /* Supports CRC32C opcodes */ #if defined(OPENSSL_PIC) && !defined(__PIC__) # define __PIC__ #endif #if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__) # define __arch64__ #endif #define SPARC_PIC_THUNK(reg) \ .align 32; \ .Lpic_thunk: \ jmp %o7 + 8; \ add %o7, reg, reg; #define SPARC_PIC_THUNK_CALL(reg) \ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ call .Lpic_thunk; \ or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; #if 1 # define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg) #else # define SPARC_SETUP_GOT_REG(reg) \ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ call .+8; \ or reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg; \ add %o7, reg, reg #endif #if defined(__arch64__) # define SPARC_LOAD_ADDRESS(SYM, reg) \ setx SYM, %o7, reg; # define LDPTR ldx # define SIZE_T_CC %xcc # define STACK_FRAME 192 # define STACK_BIAS 2047 # define STACK_7thARG (STACK_BIAS+176) #else # define SPARC_LOAD_ADDRESS(SYM, reg) \ set SYM, reg; # define LDPTR ld # define SIZE_T_CC %icc # define STACK_FRAME 112 # define STACK_BIAS 0 # define STACK_7thARG 92 # define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg) #endif #ifdef __PIC__ # undef SPARC_LOAD_ADDRESS # undef SPARC_LOAD_ADDRESS_LEAF # define SPARC_LOAD_ADDRESS(SYM, reg) \ SPARC_SETUP_GOT_REG(reg); \ sethi %hi(SYM), %o7; \ or %o7, %lo(SYM), %o7; \ LDPTR [reg + %o7], reg; #endif #ifndef SPARC_LOAD_ADDRESS_LEAF # define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \ mov %o7, tmp; \ SPARC_LOAD_ADDRESS(SYM, reg) \ mov tmp, %o7; #endif #endif /* __SPARC_ARCH_H__ */
crypto/sparccpuid.S +127 −0 Original line number Diff line number Diff line Loading @@ -251,6 +251,11 @@ _sparcv9_vis1_probe: ! UltraSPARC IIe 7 ! UltraSPARC III 7 ! UltraSPARC T1 24 ! SPARC T4 65(*) ! ! (*) result has lesser to do with VIS instruction latencies, rdtick ! appears that slow, but it does the trick in sense that FP and ! VIS code paths are still slower than integer-only ones. ! ! Numbers for T2 and SPARC64 V-VII are more than welcomed. ! Loading @@ -260,6 +265,8 @@ _sparcv9_vis1_probe: .global _sparcv9_vis1_instrument .align 8 _sparcv9_vis1_instrument: .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 .word 0x91410000 !rd %tick,%o0 .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 Loading Loading @@ -314,6 +321,30 @@ _sparcv9_fmadd_probe: .type _sparcv9_fmadd_probe,#function .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe .global _sparcv9_rdcfr .align 8 _sparcv9_rdcfr: retl .word 0x91468000 !rd %asr26,%o0 .type _sparcv9_rdcfr,#function .size _sparcv9_rdcfr,.-_sparcv9_rdcfr .global _sparcv9_vis3_probe .align 8 _sparcv9_vis3_probe: retl .word 0x81b022a0 !xmulx %g0,%g0,%g0 .type _sparcv9_vis3_probe,#function .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe .global _sparcv9_random .align 8 _sparcv9_random: retl .word 0x91b002a0 !random %o0 .type _sparcv9_random,#function .size _sparcv9_random,.-_sparcv9_vis3_probe .global OPENSSL_cleanse .align 32 OPENSSL_cleanse: Loading Loading @@ -397,6 +428,102 @@ OPENSSL_cleanse: .type OPENSSL_cleanse,#function .size OPENSSL_cleanse,.-OPENSSL_cleanse .global _sparcv9_vis1_instrument_bus .align 8 _sparcv9_vis1_instrument_bus: mov %o1,%o3 ! save cnt .word 0x99410000 !rd %tick,%o4 ! tick mov %o4,%o5 ! lasttick = tick set 0,%g4 ! diff andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 .Loop: .word 0x99410000 !rd %tick,%o4 sub %o4,%o5,%g4 ! diff=tick-lasttick mov %o4,%o5 ! lasttick=tick andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 subcc %o1,1,%o1 ! --$cnt bnz .Loop add %o0,4,%o0 ! ++$out retl mov %o3,%o0 .type _sparcv9_vis1_instrument_bus,#function .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus .global _sparcv9_vis1_instrument_bus2 .align 8 _sparcv9_vis1_instrument_bus2: mov %o1,%o3 ! save cnt sll %o1,2,%o1 ! cnt*=4 .word 0x99410000 !rd %tick,%o4 ! tick mov %o4,%o5 ! lasttick = tick set 0,%g4 ! diff andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 .word 0x99410000 !rd %tick,%o4 ! tick sub %o4,%o5,%g4 ! diff=tick-lasttick mov %o4,%o5 ! lasttick=tick mov %g4,%g5 ! lastdiff=diff .Loop2: andn %o0,63,%g1 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load .word 0x8143e040 !membar #Sync .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit .word 0x8143e040 !membar #Sync ld [%o0],%o4 add %o4,%g4,%g4 .word 0xc9e2100c !cas [%o0],%o4,%g4 subcc %o2,1,%o2 ! --max bz .Ldone2 nop .word 0x99410000 !rd %tick,%o4 ! tick sub %o4,%o5,%g4 ! diff=tick-lasttick mov %o4,%o5 ! lasttick=tick cmp %g4,%g5 mov %g4,%g5 ! lastdiff=diff .word 0x83408000 !rd %ccr,%g1 and %g1,4,%g1 ! isolate zero flag xor %g1,4,%g1 ! flip zero flag subcc %o1,%g1,%o1 ! conditional --$cnt bnz .Loop2 add %o0,%g1,%o0 ! conditional ++$out .Ldone2: srl %o1,2,%o1 retl sub %o3,%o1,%o0 .type _sparcv9_vis1_instrument_bus2,#function .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 .section ".init",#alloc,#execinstr call OPENSSL_cpuid_setup nop
crypto/sparcv9cap.c +88 −23 Original line number Diff line number Diff line Loading @@ -4,15 +4,15 @@ #include <setjmp.h> #include <signal.h> #include <sys/time.h> #include <unistd.h> #include <openssl/bn.h> #define SPARCV9_TICK_PRIVILEGED (1<<0) #define SPARCV9_PREFER_FPU (1<<1) #define SPARCV9_VIS1 (1<<2) #define SPARCV9_VIS2 (1<<3) /* reserved */ #define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ #include "sparc_arch.h" static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED; #if defined(__GNUC__) && defined(__linux) __attribute__((visibility("hidden"))) #endif unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_TICK_PRIVILEGED,0}; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) { Loading @@ -20,7 +20,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); if (num>=8 && !(num&1) && (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); else Loading @@ -32,10 +32,15 @@ void _sparcv9_vis1_probe(void); unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); unsigned long _sparcv9_rdcfr(void); void _sparcv9_vis3_probe(void); unsigned long _sparcv9_random(void); size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t); unsigned long OPENSSL_rdtsc(void) { if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED) if (OPENSSL_sparcv9cap_P[0]&SPARCV9_TICK_PRIVILEGED) #if defined(__sun) && defined(__SVR4) return gethrtime(); #else Loading @@ -45,6 +50,24 @@ unsigned long OPENSSL_rdtsc(void) return _sparcv9_rdtick(); } size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) { if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == SPARCV9_BLK) return _sparcv9_vis1_instrument_bus(out,cnt); else return 0; } size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) { if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == SPARCV9_BLK) return _sparcv9_vis1_instrument_bus2(out,cnt,max); else return 0; } #if 0 && defined(__sun) && defined(__SVR4) /* This code path is disabled, because of incompatibility of * libdevinfo.so.1 and libmalloc.so.1 (see below for details) Loading @@ -69,18 +92,18 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name) if (!strcmp (name,"SUNW,UltraSPARC") || !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */ { OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0') OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } /* This is expected to catch remaining UltraSPARCs, such as T1 */ else if (!strncmp(name,"SUNW,UltraSPARC",15)) { OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } Loading @@ -99,7 +122,7 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); return; } Loading @@ -107,17 +130,17 @@ void OPENSSL_cpuid_setup(void) { if (strcmp(si,"sun4v")) /* FPU is preferred for all CPUs, but US-T1/2 */ OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; } if (sysinfo(SI_ISALIST,si,sizeof(si))>0) { if (strstr(si,"+vis")) OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; if (strstr(si,"+vis2")) { OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return; } } Loading Loading @@ -177,12 +200,14 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); if ((e=strchr(e,':'))) OPENSSL_sparcv9cap_P[1]=strtoul(e+1,NULL,0); return; } /* Initial value, fits UltraSPARC-I&II... */ OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; sigfillset(&all_masked); sigdelset(&all_masked,SIGILL); Loading @@ -205,33 +230,73 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_rdtick(); OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_vis1_probe(); OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; /* detect UltraSPARC-Tx, see sparccpud.S for details... */ if (_sparcv9_vis1_instrument() >= 12) OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); else { _sparcv9_vis2_probe(); OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; } } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_fmadd_probe(); OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; } /* * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, * because VIS3 defines even integer instructions. */ if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_vis3_probe(); OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; } if (sigsetjmp(common_jmp,1) == 0) { (void)_sparcv9_random(); OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; } /* * In wait for better solution _sparcv9_rdcfr is masked by * VIS3 flag, because it goes to uninterruptable endless * loop on UltraSPARC II running Solaris. Things might be * different on Linux... */ if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && sigsetjmp(common_jmp,1) == 0) { OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); } sigaction(SIGBUS,&bus_oact,NULL); sigaction(SIGILL,&ill_oact,NULL); sigprocmask(SIG_SETMASK,&oset,NULL); if (sizeof(size_t)==8) OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; #ifdef __linux else { int ret = syscall(340); if (ret>=0 && ret&1) OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; } #endif } #endif