Commit 80d27cdb authored by Andy Polyakov's avatar Andy Polyakov
Browse files

ppccap.c: engage new multipplication and squaring subroutines.



[And remove FPU mutiplication subroutine.]

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent 68f6d2a0
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -278,7 +278,7 @@
    ppc32_asm => {
	template	=> 1,
	cpuid_asm_src   => "ppccpuid.s ppccap.c",
	bn_asm_src      => "bn-ppc.s ppc-mont.s ppc64-mont.s",
	bn_asm_src      => "bn-ppc.s ppc-mont.s",
	aes_asm_src     => "aes_core.c aes_cbc.c aes-ppc.s vpaes-ppc.s aesp8-ppc.s",
	sha1_asm_src    => "sha1-ppc.s sha256-ppc.s sha512-ppc.s sha256p8-ppc.s sha512p8-ppc.s",
	modes_asm_src   => "ghashp8-ppc.s",
+0 −3
Original line number Diff line number Diff line
@@ -134,10 +134,7 @@ $code=<<___;
.globl	.bn_mul_mont_int
.align	5
.bn_mul_mont_int:
	cmpwi	$num,4
	mr	$rp,r3		; $rp is reassigned
	li	r3,0
	bltlr
___
$code.=<<___ if ($BNSZ==4);
	cmpwi	$num,32		; longer key performance is not better
+15 −29
Original line number Diff line number Diff line
@@ -35,38 +35,24 @@ static sigset_t all_masked;
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                const BN_ULONG *np, const BN_ULONG *n0, int num)
{
    int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap,
                          const BN_ULONG *bp, const BN_ULONG *np,
                          const BN_ULONG *n0, int num);
    int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                        const BN_ULONG *np, const BN_ULONG *n0, int num);
    int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                          const BN_ULONG *np, const BN_ULONG *n0, int num);

    if (sizeof(size_t) == 4) {
# if 1 || (defined(__APPLE__) && defined(__MACH__))
        if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64))
            return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
# else
        /*
         * boundary of 32 was experimentally determined on Linux 2.6.22,
         * might have to be adjusted on AIX...
         */
        if (num >= 32 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) {
            sigset_t oset;
            int ret;
    if (num < 4)
        return 0;

            sigprocmask(SIG_SETMASK, &all_masked, &oset);
            ret = bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
            sigprocmask(SIG_SETMASK, &oset, NULL);
    if ((num & 3) == 0)
        return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);

            return ret;
        }
# endif
    } else if ((OPENSSL_ppccap_P & PPC_FPU64))
    /*
         * this is a "must" on POWER6, but run-time detection is not
         * implemented yet...
     * There used to be [optional] call to bn_mul_mont_fpu64 here,
     * but above subroutine is faster on contemporary processors.
     * Formulation means that there might be old processors where
     * FPU code path would be faster, POWER6 perhaps, but there was
     * no opportunity to figure it out...
     */
        return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);

    return bn_mul_mont_int(rp, ap, bp, np, n0, num);
}