Loading Configurations/00-base-templates.conf +1 −1 Original line number Diff line number Diff line Loading @@ -278,7 +278,7 @@ ppc32_asm => { template => 1, cpuid_asm_src => "ppccpuid.s ppccap.c", bn_asm_src => "bn-ppc.s ppc-mont.s ppc64-mont.s", bn_asm_src => "bn-ppc.s ppc-mont.s", aes_asm_src => "aes_core.c aes_cbc.c aes-ppc.s vpaes-ppc.s aesp8-ppc.s", sha1_asm_src => "sha1-ppc.s sha256-ppc.s sha512-ppc.s sha256p8-ppc.s sha512p8-ppc.s", modes_asm_src => "ghashp8-ppc.s", Loading crypto/bn/asm/ppc-mont.pl +0 −3 Original line number Diff line number Diff line Loading @@ -134,10 +134,7 @@ $code=<<___; .globl .bn_mul_mont_int .align 5 .bn_mul_mont_int: cmpwi $num,4 mr $rp,r3 ; $rp is reassigned li r3,0 bltlr ___ $code.=<<___ if ($BNSZ==4); cmpwi $num,32 ; longer key performance is not better Loading crypto/ppccap.c +15 −29 Original line number Diff line number Diff line Loading @@ -35,38 +35,24 @@ static sigset_t all_masked; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); if (sizeof(size_t) == 4) { # if 1 || (defined(__APPLE__) && defined(__MACH__)) if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); # else /* * boundary of 32 was experimentally determined on Linux 2.6.22, * might have to be adjusted on AIX... */ if (num >= 32 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) { sigset_t oset; int ret; if (num < 4) return 0; sigprocmask(SIG_SETMASK, &all_masked, &oset); ret = bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); sigprocmask(SIG_SETMASK, &oset, NULL); if ((num & 3) == 0) return bn_mul4x_mont_int(rp, ap, bp, np, n0, num); return ret; } # endif } else if ((OPENSSL_ppccap_P & PPC_FPU64)) /* * this is a "must" on POWER6, but run-time detection is not * implemented yet... * There used to be [optional] call to bn_mul_mont_fpu64 here, * but above subroutine is faster on contemporary processors. * Formulation means that there might be old processors where * FPU code path would be faster, POWER6 perhaps, but there was * no opportunity to figure it out... */ return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); return bn_mul_mont_int(rp, ap, bp, np, n0, num); } Loading Loading
Configurations/00-base-templates.conf +1 −1 Original line number Diff line number Diff line Loading @@ -278,7 +278,7 @@ ppc32_asm => { template => 1, cpuid_asm_src => "ppccpuid.s ppccap.c", bn_asm_src => "bn-ppc.s ppc-mont.s ppc64-mont.s", bn_asm_src => "bn-ppc.s ppc-mont.s", aes_asm_src => "aes_core.c aes_cbc.c aes-ppc.s vpaes-ppc.s aesp8-ppc.s", sha1_asm_src => "sha1-ppc.s sha256-ppc.s sha512-ppc.s sha256p8-ppc.s sha512p8-ppc.s", modes_asm_src => "ghashp8-ppc.s", Loading
crypto/bn/asm/ppc-mont.pl +0 −3 Original line number Diff line number Diff line Loading @@ -134,10 +134,7 @@ $code=<<___; .globl .bn_mul_mont_int .align 5 .bn_mul_mont_int: cmpwi $num,4 mr $rp,r3 ; $rp is reassigned li r3,0 bltlr ___ $code.=<<___ if ($BNSZ==4); cmpwi $num,32 ; longer key performance is not better Loading
crypto/ppccap.c +15 −29 Original line number Diff line number Diff line Loading @@ -35,38 +35,24 @@ static sigset_t all_masked; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); if (sizeof(size_t) == 4) { # if 1 || (defined(__APPLE__) && defined(__MACH__)) if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); # else /* * boundary of 32 was experimentally determined on Linux 2.6.22, * might have to be adjusted on AIX... */ if (num >= 32 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) { sigset_t oset; int ret; if (num < 4) return 0; sigprocmask(SIG_SETMASK, &all_masked, &oset); ret = bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); sigprocmask(SIG_SETMASK, &oset, NULL); if ((num & 3) == 0) return bn_mul4x_mont_int(rp, ap, bp, np, n0, num); return ret; } # endif } else if ((OPENSSL_ppccap_P & PPC_FPU64)) /* * this is a "must" on POWER6, but run-time detection is not * implemented yet... * There used to be [optional] call to bn_mul_mont_fpu64 here, * but above subroutine is faster on contemporary processors. * Formulation means that there might be old processors where * FPU code path would be faster, POWER6 perhaps, but there was * no opportunity to figure it out... */ return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); return bn_mul_mont_int(rp, ap, bp, np, n0, num); } Loading