Commit 0c832ec5 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

Add VIS3-capable sparcv9-gf2m module.

parent 947d7827
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -130,7 +130,7 @@ my $x86_elf_asm="$x86_asm:elf";

my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o";
my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void";
my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void";
my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";
+9 −9
Original line number Diff line number Diff line
@@ -174,7 +174,7 @@ $sys_id =
$lflags       = 
$bn_ops       = BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -2616,7 +2616,7 @@ $sys_id = ULTRASPARC
$lflags       = -lsocket -lnsl -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -2649,7 +2649,7 @@ $sys_id = ULTRASPARC
$lflags       = -lsocket -lnsl -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -4398,7 +4398,7 @@ $sys_id = ULTRASPARC
$lflags       = -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -4596,7 +4596,7 @@ $sys_id = ULTRASPARC
$lflags       = -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -5454,7 +5454,7 @@ $sys_id = ULTRASPARC
$lflags       = -lsocket -lnsl -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -5487,7 +5487,7 @@ $sys_id = ULTRASPARC
$lflags       = -lsocket -lnsl -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -5586,7 +5586,7 @@ $sys_id = ULTRASPARC
$lflags       = -lsocket -lnsl -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
@@ -5619,7 +5619,7 @@ $sys_id = ULTRASPARC
$lflags       = -lsocket -lnsl -ldl
$bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj    = sparcv9cap.o sparccpuid.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o
$bn_obj       = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$des_obj      = des_enc-sparc.o fcrypt_b.o
$aes_obj      = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj       = 
+2 −0
Original line number Diff line number Diff line
@@ -79,6 +79,8 @@ sparcv9-mont.s: asm/sparcv9-mont.pl
	$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
vis3-mont.s:		asm/vis3-mont.pl
	$(PERL) asm/vis3-mont.pl $(CFLAGS) > $@
sparcv9-gf2m.S:	asm/sparcv9-gf2m.pl
	$(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@

bn-mips3.o:	asm/mips3.s
	@if [ "$(CC)" = "gcc" ]; then \
+198 −0
Original line number Diff line number Diff line
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# October 2012
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... Except that it has two code paths: one suitable
# for all SPARCv9 processors and one for VIS3-capable ones. Former
# delivers ~25-45% more, more for longer keys, heaviest DH and DSA
# verify operations on venerable UltraSPARC II. On T4 VIS3 code is
# ~100-230% faster than gcc-generated code and ~35-90% faster than
# the pure SPARCv9 code path.

$bits=32;
for (@ARGV)     { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
if ($bits==64)  { $bias=2047; $frame=192; }
else            { $bias=0;    $frame=112; }

$locals=16*8;

$code.=<<___;
#include <sparc_arch.h>

.section        ".text",#alloc,#execinstr
___
$code.=<<___ if ($bits==64);
.register       %g2,#scratch
.register       %g3,#scratch
___

$tab="%l0";

@T=("%g2","%g3");
@i=("%g4","%g5");

($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;

$code.=<<___;
#ifdef __PIC__
SPARC_PIC_THUNK(%g1)
#endif

.globl	bn_GF2m_mul_2x2
.align	16
bn_GF2m_mul_2x2:
        SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
        ld	[%g1+0],%g1             	! OPENSSL_sparcv9cap_P[0]

        andcc	%g1, SPARCV9_VIS3, %g0
        bz,pn	%icc,.Lsoftware
        nop

	sllx	%o1, 32, %o1
	sllx	%o3, 32, %o3
	or	%o2, %o1, %o1
	or	%o4, %o3, %o3
	.word	0x95b262ab			! xmulx   %o1, %o3, %o2
	.word	0x99b262cb			! xmulxhi %o1, %o3, %o4
	srlx	%o2, 32, %o1			! 13 cycles later
	st	%o2, [%o0+0]
	st	%o1, [%o0+4]
	srlx	%o4, 32, %o3
	st	%o4, [%o0+8]
	retl
	st	%o3, [%o0+12]

.align	16
.Lsoftware:
	save	%sp,-$frame-$locals,%sp

	sllx	%i1,32,$a
	mov	-1,$a12
	sllx	%i3,32,$b
	or	%i2,$a,$a
	srlx	$a12,1,$a48			! 0x7fff...
	or	%i4,$b,$b
	srlx	$a12,2,$a12			! 0x3fff...
	add	%sp,$bias+$frame,$tab

	sllx	$a,2,$a4
	mov	$a,$a1
	sllx	$a,1,$a2

	srax	$a4,63,@i[1]			! broadcast 61st bit
	and	$a48,$a4,$a4			! (a<<2)&0x7fff...
	srlx	$a48,2,$a48
	srax	$a2,63,@i[0]			! broadcast 62nd bit
	and	$a12,$a2,$a2			! (a<<1)&0x3fff...
	srax	$a1,63,$lo			! broadcast 63rd bit
	and	$a48,$a1,$a1			! (a<<0)&0x1fff...

	sllx	$a1,3,$a8
	and	$b,$lo,$lo
	and	$b,@i[0],@i[0]
	and	$b,@i[1],@i[1]

	stx	%g0,[$tab+0*8]			! tab[0]=0
	xor	$a1,$a2,$a12
	stx	$a1,[$tab+1*8]			! tab[1]=a1
	stx	$a2,[$tab+2*8]			! tab[2]=a2
	 xor	$a4,$a8,$a48
	stx	$a12,[$tab+3*8]			! tab[3]=a1^a2
	 xor	$a4,$a1,$a1

	stx	$a4,[$tab+4*8]			! tab[4]=a4
	xor	$a4,$a2,$a2
	stx	$a1,[$tab+5*8]			! tab[5]=a1^a4
	xor	$a4,$a12,$a12
	stx	$a2,[$tab+6*8]			! tab[6]=a2^a4
	 xor	$a48,$a1,$a1
	stx	$a12,[$tab+7*8]			! tab[7]=a1^a2^a4
	 xor	$a48,$a2,$a2

	stx	$a8,[$tab+8*8]			! tab[8]=a8
	xor	$a48,$a12,$a12
	stx	$a1,[$tab+9*8]			! tab[9]=a1^a8
	 xor	$a4,$a1,$a1
	stx	$a2,[$tab+10*8]			! tab[10]=a2^a8
	 xor	$a4,$a2,$a2
	stx	$a12,[$tab+11*8]		! tab[11]=a1^a2^a8

	xor	$a4,$a12,$a12
	stx	$a48,[$tab+12*8]		! tab[12]=a4^a8
	 srlx	$lo,1,$hi
	stx	$a1,[$tab+13*8]			! tab[13]=a1^a4^a8
	 sllx	$lo,63,$lo
	stx	$a2,[$tab+14*8]			! tab[14]=a2^a4^a8
	 srlx	@i[0],2,@T[0]
	stx	$a12,[$tab+15*8]		! tab[15]=a1^a2^a4^a8

	sllx	@i[0],62,$a1
	 sllx	$b,3,@i[0]
	srlx	@i[1],3,@T[1]
	 and	@i[0],`0xf<<3`,@i[0]
	sllx	@i[1],61,$a2
	 ldx	[$tab+@i[0]],@i[0]
	 srlx	$b,4-3,@i[1]
	xor	@T[0],$hi,$hi
	 and	@i[1],`0xf<<3`,@i[1]
	xor	$a1,$lo,$lo
	 ldx	[$tab+@i[1]],@i[1]
	xor	@T[1],$hi,$hi

	xor	@i[0],$lo,$lo
	srlx	$b,8-3,@i[0]
	 xor	$a2,$lo,$lo
	and	@i[0],`0xf<<3`,@i[0]
___
for($n=1;$n<14;$n++) {
$code.=<<___;
	sllx	@i[1],`$n*4`,@T[0]
	ldx	[$tab+@i[0]],@i[0]
	srlx	@i[1],`64-$n*4`,@T[1]
	xor	@T[0],$lo,$lo
	srlx	$b,`($n+2)*4`-3,@i[1]
	xor	@T[1],$hi,$hi
	and	@i[1],`0xf<<3`,@i[1]
___
	push(@i,shift(@i)); push(@T,shift(@T));
}
$code.=<<___;
	sllx	@i[1],`$n*4`,@T[0]
	ldx	[$tab+@i[0]],@i[0]
	srlx	@i[1],`64-$n*4`,@T[1]
	xor	@T[0],$lo,$lo

	sllx	@i[0],`($n+1)*4`,@T[0]
	 xor	@T[1],$hi,$hi
	srlx	@i[0],`64-($n+1)*4`,@T[1]
	xor	@T[0],$lo,$lo
	xor	@T[1],$hi,$hi

	srlx	$lo,32,%i1
	st	$lo,[%i0+0]
	st	%i1,[%i0+4]
	srlx	$hi,32,%i2
	st	$hi,[%i0+8]
	st	%i2,[%i0+12]

	ret
	restore
.type	bn_GF2m_mul_2x2,#function
.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.asciz	"GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
.align	4
___

$code =~ s/\`([^\`]*)\`/eval($1)/gem;
print $code;
close STDOUT;