Loading Configure +2 −1 Original line number Diff line number Diff line Loading @@ -135,7 +135,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::"; my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o"; my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void"; my $armv4_asm=":bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64"; my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::"; Loading Loading @@ -1493,6 +1493,7 @@ $cflags.=" -DOPENSSL_BN_ASM_PART_WORDS" if ($bn_obj =~ /bn-586/); $cflags.=" -DOPENSSL_IA32_SSE2" if (!$no_sse2 && $bn_obj =~ /86/); $cflags.=" -DOPENSSL_BN_ASM_MONT" if ($bn_obj =~ /-mont/); $cflags.=" -DOPENSSL_BN_ASM_GF2m" if ($bn_obj =~ /-gf2m/); if ($fips) { Loading TABLE +2 −2 Original line number Diff line number Diff line Loading @@ -1033,7 +1033,7 @@ $sys_id = $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR $cpuid_obj = $bn_obj = bn_asm.o armv4-mont.o $bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o $des_obj = $aes_obj = aes_cbc.o aes-armv4.o $bf_obj = Loading Loading @@ -3689,7 +3689,7 @@ $sys_id = $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR $cpuid_obj = $bn_obj = bn_asm.o armv4-mont.o $bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o $des_obj = $aes_obj = aes_cbc.o aes-armv4.o $bf_obj = Loading crypto/bn/Makefile +3 −0 Original line number Diff line number Diff line Loading @@ -120,6 +120,9 @@ alpha-mont.s: asm/alpha-mont.pl # GNU make "catch all" %-mont.s: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@ %-gf2m.S: asm/%-gf2m.pl; $(PERL) $< $(PERLASM_SCHEME) $@ armv4-gf2m.o: armv4-gf2m.S files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO Loading crypto/bn/asm/armv4-gf2m.pl +24 −11 Original line number Diff line number Diff line Loading @@ -21,13 +21,8 @@ # runs in even less cycles, ~30, improvement is measurable only on # longer keys. One has to optimize code elsewhere to get NEON glow... $a="r1"; $b="r0"; ($a0,$a1,$a2,$a12,$a4,$a14)= ($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); $mask="r12"; while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } Loading Loading @@ -67,9 +62,21 @@ mul_1x1_neon: bx lr .size mul_1x1_neon,.-mul_1x1_neon #endif ___ ################ # private interface to mul_1x1_ialu # $a="r1"; $b="r0"; .align 5 ($a0,$a1,$a2,$a12,$a4,$a14)= ($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); $mask="r12"; $code.=<<___; .type mul_1x1_ialu,%function .align 5 mul_1x1_ialu: mov $a0,#0 bic $a1,$a,#3<<30 @ a1=a&0x3fffffff Loading Loading @@ -147,7 +154,15 @@ mul_1x1_ialu: mov pc,lr .size mul_1x1_ialu,.-mul_1x1_ialu ___ ################ # void bn_GF2m_mul_2x2(BN_ULONG *r, # BN_ULONG a1,BN_ULONG a0, # BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0b1b0 ($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23)); $code.=<<___; .global bn_GF2m_mul_2x2 .type bn_GF2m_mul_2x2,%function .align 5 Loading @@ -157,9 +172,7 @@ bn_GF2m_mul_2x2: .Lpic: ldr r12,[pc,r12] tst r12,#1 beq .Lialu ___ ($A1,$B1,$A0,$B0,$A0B0,$A1B1)=map("d$_",(18..23)); $code.=<<___; veor $A1,$A1 vmov.32 $B1,r3,r3 @ two copies of b1 vmov.32 ${A1}[0],r1 @ a1 Loading crypto/bn/bn_gf2m.c +4 −1 Original line number Diff line number Diff line Loading @@ -126,6 +126,7 @@ static const BN_ULONG SQR_tb[16] = SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] #endif #if !defined(OPENSSL_BN_ASM_GF2m) /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, * result is a polynomial r with degree < 2 * BN_BITS - 1 * The caller MUST ensure that the variables have the right amount Loading Loading @@ -220,7 +221,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ } #else void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); #endif /* Add polynomials a and b and store result in r; r could be a or b, a and b * could be equal; r is the bitwise XOR of a and b. Loading Loading
Configure +2 −1 Original line number Diff line number Diff line Loading @@ -135,7 +135,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::"; my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o"; my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void"; my $armv4_asm=":bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64"; my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::"; Loading Loading @@ -1493,6 +1493,7 @@ $cflags.=" -DOPENSSL_BN_ASM_PART_WORDS" if ($bn_obj =~ /bn-586/); $cflags.=" -DOPENSSL_IA32_SSE2" if (!$no_sse2 && $bn_obj =~ /86/); $cflags.=" -DOPENSSL_BN_ASM_MONT" if ($bn_obj =~ /-mont/); $cflags.=" -DOPENSSL_BN_ASM_GF2m" if ($bn_obj =~ /-gf2m/); if ($fips) { Loading
TABLE +2 −2 Original line number Diff line number Diff line Loading @@ -1033,7 +1033,7 @@ $sys_id = $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR $cpuid_obj = $bn_obj = bn_asm.o armv4-mont.o $bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o $des_obj = $aes_obj = aes_cbc.o aes-armv4.o $bf_obj = Loading Loading @@ -3689,7 +3689,7 @@ $sys_id = $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR $cpuid_obj = $bn_obj = bn_asm.o armv4-mont.o $bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o $des_obj = $aes_obj = aes_cbc.o aes-armv4.o $bf_obj = Loading
crypto/bn/Makefile +3 −0 Original line number Diff line number Diff line Loading @@ -120,6 +120,9 @@ alpha-mont.s: asm/alpha-mont.pl # GNU make "catch all" %-mont.s: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@ %-gf2m.S: asm/%-gf2m.pl; $(PERL) $< $(PERLASM_SCHEME) $@ armv4-gf2m.o: armv4-gf2m.S files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO Loading
crypto/bn/asm/armv4-gf2m.pl +24 −11 Original line number Diff line number Diff line Loading @@ -21,13 +21,8 @@ # runs in even less cycles, ~30, improvement is measurable only on # longer keys. One has to optimize code elsewhere to get NEON glow... $a="r1"; $b="r0"; ($a0,$a1,$a2,$a12,$a4,$a14)= ($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); $mask="r12"; while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } Loading Loading @@ -67,9 +62,21 @@ mul_1x1_neon: bx lr .size mul_1x1_neon,.-mul_1x1_neon #endif ___ ################ # private interface to mul_1x1_ialu # $a="r1"; $b="r0"; .align 5 ($a0,$a1,$a2,$a12,$a4,$a14)= ($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); $mask="r12"; $code.=<<___; .type mul_1x1_ialu,%function .align 5 mul_1x1_ialu: mov $a0,#0 bic $a1,$a,#3<<30 @ a1=a&0x3fffffff Loading Loading @@ -147,7 +154,15 @@ mul_1x1_ialu: mov pc,lr .size mul_1x1_ialu,.-mul_1x1_ialu ___ ################ # void bn_GF2m_mul_2x2(BN_ULONG *r, # BN_ULONG a1,BN_ULONG a0, # BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0b1b0 ($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23)); $code.=<<___; .global bn_GF2m_mul_2x2 .type bn_GF2m_mul_2x2,%function .align 5 Loading @@ -157,9 +172,7 @@ bn_GF2m_mul_2x2: .Lpic: ldr r12,[pc,r12] tst r12,#1 beq .Lialu ___ ($A1,$B1,$A0,$B0,$A0B0,$A1B1)=map("d$_",(18..23)); $code.=<<___; veor $A1,$A1 vmov.32 $B1,r3,r3 @ two copies of b1 vmov.32 ${A1}[0],r1 @ a1 Loading
crypto/bn/bn_gf2m.c +4 −1 Original line number Diff line number Diff line Loading @@ -126,6 +126,7 @@ static const BN_ULONG SQR_tb[16] = SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] #endif #if !defined(OPENSSL_BN_ASM_GF2m) /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, * result is a polynomial r with degree < 2 * BN_BITS - 1 * The caller MUST ensure that the variables have the right amount Loading Loading @@ -220,7 +221,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ } #else void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); #endif /* Add polynomials a and b and store result in r; r could be a or b, a and b * could be equal; r is the bitwise XOR of a and b. Loading