Loading Configure +3 −2 Original line number Diff line number Diff line Loading @@ -135,8 +135,8 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a my $mips3_asm=":bn-mips3.o:::::::::::::void"; my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o::::::void"; my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes_ctr.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o::::::::void"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::64"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64"; my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::"; my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o:::::::"; my $no_asm="::::::::::::::void"; Loading Loading @@ -292,6 +292,7 @@ my %table=( # Since there is mention of this in shlib/hpux10-cc.sh "hpux-parisc-cc-o4","cc:-Ae +O4 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1::pa-risc2.o:::::::::::::void:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64", Loading TABLE +34 −2 Original line number Diff line number Diff line Loading @@ -2933,7 +2933,7 @@ $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = $modes_obj = $modes_obj = ghash-parisc.o $perlasm_scheme = 32 $dso_scheme = dl $shared_target= hpux-shared Loading @@ -2944,6 +2944,38 @@ $ranlib = $arflags = $multilib = /pa1.1 *** hpux-parisc1_1-gcc $cc = gcc $cflags = -O3 -DB_ENDIAN -DBN_DIV2W $unistd = $thread_cflag = -D_REENTRANT $sys_id = $lflags = -Wl,+s -ldld $bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1 $cpuid_obj = pariscid.o $bn_obj = bn_asm.o parisc-mont.o $des_obj = $aes_obj = aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o $bf_obj = $md5_obj = $sha1_obj = sha1-parisc.o sha256-parisc.o sha512-parisc.o $cast_obj = $rc4_obj = rc4-parisc.o $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = $modes_obj = ghash-parisc.o $perlasm_scheme = 32 $dso_scheme = dl $shared_target= hpux-shared $shared_cflag = -fPIC $shared_ldflag = -shared $shared_extension = .sl.$(SHLIB_MAJOR).$(SHLIB_MINOR) $ranlib = $arflags = $multilib = *** hpux-parisc2-cc $cc = cc $cflags = +DA2.0 +DS2.0 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY -D_REENTRANT Loading Loading @@ -3093,7 +3125,7 @@ $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = $modes_obj = $modes_obj = ghash-parisc.o $perlasm_scheme = 64 $dso_scheme = dlfcn $shared_target= hpux-shared Loading crypto/modes/Makefile +3 −1 Original line number Diff line number Diff line Loading @@ -50,9 +50,11 @@ ghash-x86.s: asm/ghash-x86.pl ghash-x86_64.s: asm/ghash-x86_64.pl $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@ ghash-sparcv9.s: asm/ghash-sparcv9.pl $(PERL) asm/ghash-sparcv8.pl $(CFLAGS) > $@ $(PERL) asm/ghash-sparcv9.pl $(CFLAGS) > $@ ghash-alpha.s: asm/ghash-alpha.pl $(PERL) $< | $(CC) -E - | tee $@ > /dev/null ghash-parisc.s: asm/ghash-parisc.pl $($PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ # GNU make "catch all" ghash-%.s: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $(CFLAGS) > $@ Loading crypto/modes/asm/ghash-parisc.pl 0 → 100644 +730 −0 Original line number Diff line number Diff line #!/usr/bin/env perl # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # # April 2010 # # The module implements "4-bit" GCM GHASH function and underlying # single multiplication operation in GF(2^128). "4-bit" means that it # uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC # it processes one byte in 19 cycles, which is more than twice as fast # as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for 8 # cycles, but measured performance on PA-8600 system is ~9 cycles per # processed byte. This is ~2.2x faster than 64-bit code generated by # vendor compiler (which used to be very hard to beat:-). # # Special thanks to polarhome.com for providing HP-UX account. $flavour = shift; $output = shift; open STDOUT,">$output"; if ($flavour =~ /64/) { $LEVEL ="2.0W"; $SIZE_T =8; $FRAME_MARKER =80; $SAVED_RP =16; $PUSH ="std"; $PUSHMA ="std,ma"; $POP ="ldd"; $POPMB ="ldd,mb"; $NREGS =6; } else { $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0"; $SIZE_T =4; $FRAME_MARKER =48; $SAVED_RP =20; $PUSH ="stw"; $PUSHMA ="stwm"; $POP ="ldw"; $POPMB ="ldwm"; $NREGS =11; } $FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker # [+ argument transfer] ################# volatile registers $Xi="%r26"; # argument block $Htbl="%r25"; $inp="%r24"; $len="%r23"; $Hhh=$Htbl; # variables $Hll="%r22"; $Zhh="%r21"; $Zll="%r20"; $cnt="%r19"; $rem_4bit="%r28"; $rem="%r29"; $mask0xf0="%r31"; ################# preserved registers $Thh="%r1"; $Tll="%r2"; $nlo="%r3"; $nhi="%r4"; $byte="%r5"; if ($SIZE_T==4) { $Zhl="%r6"; $Zlh="%r7"; $Hhl="%r8"; $Hlh="%r9"; $Thl="%r10"; $Tlh="%r11"; } $rem2="%r6"; # used in PA-RISC 2.0 code $code.=<<___; .LEVEL $LEVEL .SPACE \$TEXT\$ .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR .ALIGN 64 gcm_gmult_4bit .PROC .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS .ENTRY $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue $PUSHMA %r3,$FRAME(%sp) $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) ___ $code.=<<___ if ($SIZE_T==4); $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) ___ $code.=<<___; blr %r0,$rem_4bit ldi 3,$rem L\$pic_gmult andcm $rem_4bit,$rem,$rem_4bit addl $inp,$len,$len ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit ldi 0xf0,$mask0xf0 ___ $code.=<<___ if ($SIZE_T==4); ldi 31,$rem mtctl $rem,%cr11 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 b L\$parisc1_gmult nop ___ $code.=<<___; ldb 15($Xi),$nlo ldo 8($Htbl),$Hll and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo ldd $nlo($Hll),$Zll ldd $nlo($Hhh),$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldb 14($Xi),$nlo ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem b L\$oop_gmult_pa2 ldi 13,$cnt .ALIGN 8 L\$oop_gmult_pa2 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem ldbx $cnt($Xi),$nlo shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo ldd $rem($rem_4bit),$rem xor $Tll,$Zll,$Zll addib,uv -1,$cnt,L\$oop_gmult_pa2 xor $Thh,$Zhh,$Zhh xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh std $Zll,8($Xi) std $Zhh,0($Xi) ___ $code.=<<___ if ($SIZE_T==4); b L\$done_gmult nop L\$parisc1_gmult ldb 15($Xi),$nlo ldo 12($Htbl),$Hll ldo 8($Htbl),$Hlh ldo 4($Htbl),$Hhl and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo ldwx $nlo($Hll),$Zll ldwx $nlo($Hlh),$Zlh ldwx $nlo($Hhl),$Zhl ldwx $nlo($Hhh),$Zhh zdep $Zll,28,4,$rem ldb 14($Xi),$nlo ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhl,$Zlh,4,$Zlh ldwx $nhi($Hlh),$Tlh shrpw $Zhh,$Zhl,4,$Zhl ldwx $nhi($Hhl),$Thl extru $Zhh,27,28,$Zhh ldwx $nhi($Hhh),$Thh xor $rem,$Zhh,$Zhh and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $Thl,$Zhl,$Zhl b L\$oop_gmult_pa1 ldi 13,$cnt .ALIGN 8 L\$oop_gmult_pa1 zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh ldbx $cnt($Xi),$nlo xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $rem,$Zhh,$Zhh zdep $Zll,28,4,$rem xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh shrpw $Zlh,$Zll,4,$Zll ldwx $rem($rem_4bit),$rem shrpw $Zhl,$Zlh,4,$Zlh shrpw $Zhh,$Zhl,4,$Zhl and $mask0xf0,$nlo,$nhi extru $Zhh,27,28,$Zhh zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $rem,$Zhh,$Zhh addib,uv -1,$cnt,L\$oop_gmult_pa1 xor $Thl,$Zhl,$Zhl zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $rem,$Zhh,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh zdep $Zll,28,4,$rem ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll shrpw $Zhl,$Zlh,4,$Zlh shrpw $Zhh,$Zhl,4,$Zhl extru $Zhh,27,28,$Zhh xor $Tll,$Zll,$Zll xor $Tlh,$Zlh,$Zlh xor $rem,$Zhh,$Zhh stw $Zll,12($Xi) xor $Thl,$Zhl,$Zhl stw $Zlh,8($Xi) xor $Thh,$Zhh,$Zhh stw $Zhl,4($Xi) stw $Zhh,0($Xi) ___ $code.=<<___; L\$done_gmult $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 ___ $code.=<<___ if ($SIZE_T==4); $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 ___ $code.=<<___; bv (%r2) .EXIT $POPMB -$FRAME(%sp),%r3 .PROCEND .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR .ALIGN 64 gcm_ghash_4bit .PROC .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11 .ENTRY $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue $PUSHMA %r3,$FRAME(%sp) $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) ___ $code.=<<___ if ($SIZE_T==4); $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) ___ $code.=<<___; blr %r0,$rem_4bit ldi 3,$rem L\$pic_ghash andcm $rem_4bit,$rem,$rem_4bit addl $inp,$len,$len ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit ldi 0xf0,$mask0xf0 ___ $code.=<<___ if ($SIZE_T==4); ldi 31,$rem mtctl $rem,%cr11 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 b L\$parisc1_ghash nop ___ $code.=<<___; ldb 15($Xi),$nlo ldo 8($Htbl),$Hll L\$outer_ghash_pa2 ldb 15($inp),$nhi xor $nhi,$nlo,$nlo and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo ldd $nlo($Hll),$Zll ldd $nlo($Hhh),$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldb 14($Xi),$nlo ldb 14($inp),$byte ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh xor $byte,$nlo,$nlo and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem b L\$oop_ghash_pa2 ldi 13,$cnt .ALIGN 8 L\$oop_ghash_pa2 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug depd,z $Zll,60,4,$rem2 shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldbx $cnt($Xi),$nlo ldbx $cnt($inp),$byte depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll ldd $rem2($rem_4bit),$rem2 xor $rem2,$Zhh,$Zhh xor $byte,$nlo,$nlo ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo extrd,u $Zhh,59,60,$Zhh xor $Tll,$Zll,$Zll ldd $rem($rem_4bit),$rem addib,uv -1,$cnt,L\$oop_ghash_pa2 xor $Thh,$Zhh,$Zhh xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem2 shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll ldd $rem2($rem_4bit),$rem2 xor $rem2,$Zhh,$Zhh ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh extrd,u $Zhh,59,60,$Zhh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh std $Zll,8($Xi) ldo 16($inp),$inp std $Zhh,0($Xi) cmpb,*<> $inp,$len,L\$outer_ghash_pa2 copy $Zll,$nlo ___ $code.=<<___ if ($SIZE_T==4); b L\$done_ghash nop L\$parisc1_ghash ldb 15($Xi),$nlo ldo 12($Htbl),$Hll ldo 8($Htbl),$Hlh ldo 4($Htbl),$Hhl L\$outer_ghash_pa1 ldb 15($inp),$byte xor $byte,$nlo,$nlo and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo ldwx $nlo($Hll),$Zll ldwx $nlo($Hlh),$Zlh ldwx $nlo($Hhl),$Zhl ldwx $nlo($Hhh),$Zhh zdep $Zll,28,4,$rem ldb 14($Xi),$nlo ldb 14($inp),$byte ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhl,$Zlh,4,$Zlh ldwx $nhi($Hlh),$Tlh shrpw $Zhh,$Zhl,4,$Zhl ldwx $nhi($Hhl),$Thl extru $Zhh,27,28,$Zhh ldwx $nhi($Hhh),$Thh xor $byte,$nlo,$nlo xor $rem,$Zhh,$Zhh and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $Thl,$Zhl,$Zhl b L\$oop_ghash_pa1 ldi 13,$cnt .ALIGN 8 L\$oop_ghash_pa1 zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh ldbx $cnt($Xi),$nlo xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl ldbx $cnt($inp),$byte xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $rem,$Zhh,$Zhh zdep $Zll,28,4,$rem xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh shrpw $Zlh,$Zll,4,$Zll ldwx $rem($rem_4bit),$rem shrpw $Zhl,$Zlh,4,$Zlh xor $byte,$nlo,$nlo shrpw $Zhh,$Zhl,4,$Zhl and $mask0xf0,$nlo,$nhi extru $Zhh,27,28,$Zhh zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $rem,$Zhh,$Zhh addib,uv -1,$cnt,L\$oop_ghash_pa1 xor $Thl,$Zhl,$Zhl zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $rem,$Zhh,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh zdep $Zll,28,4,$rem ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll shrpw $Zhl,$Zlh,4,$Zlh shrpw $Zhh,$Zhl,4,$Zhl extru $Zhh,27,28,$Zhh xor $Tll,$Zll,$Zll xor $Tlh,$Zlh,$Zlh xor $rem,$Zhh,$Zhh stw $Zll,12($Xi) xor $Thl,$Zhl,$Zhl stw $Zlh,8($Xi) xor $Thh,$Zhh,$Zhh stw $Zhl,4($Xi) ldo 16($inp),$inp stw $Zhh,0($Xi) comb,<> $inp,$len,L\$outer_ghash_pa1 copy $Zll,$nlo ___ $code.=<<___; L\$done_ghash $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 ___ $code.=<<___ if ($SIZE_T==4); $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 ___ $code.=<<___; bv (%r2) .EXIT $POPMB -$FRAME(%sp),%r3 .PROCEND .ALIGN 64 L\$rem_4bit .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>" .ALIGN 64 ___ # Explicitly encode PA-RISC 2.0 instructions used in this module, so # that it can be compiled with .LEVEL 1.0. It should be noted that I # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 # directive... my $ldd = sub { my ($mod,$args) = @_; my $orig = "ldd$mod\t$args"; if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset $opcode|=(1<<5) if ($mod =~ /^,m/); $opcode|=(1<<13) if ($mod =~ /^,mb/); sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; my $std = sub { my ($mod,$args) = @_; my $orig = "std$mod\t$args"; if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; my $extrd = sub { my ($mod,$args) = @_; my $orig = "extrd$mod\t$args"; # I only have ",u" completer, it's implicitly encoded... if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); my $len=32-$3; $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); my $len=32-$2; $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len $opcode |= (1<<13) if ($mod =~ /,\**=/); sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; my $shrpd = sub { my ($mod,$args) = @_; my $orig = "shrpd$mod\t$args"; if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; my $cpos=63-$3; $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 { sprintf "\t.WORD\t0x%08x\t; %s", (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; } else { "\t".$orig; } }; my $depd = sub { my ($mod,$args) = @_; my $orig = "depd$mod\t$args"; # I only have ",z" completer, it's impicitly encoded... if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); my $cpos=63-$2; my $len=32-$3; $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; sub assemble { my ($mnemonic,$mod,$args)=@_; my $opcode = eval("\$$mnemonic"); ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; } foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/ge; if ($SIZE_T==4) { s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e; s/cmpb,\*/comb,/; s/,\*/,/; } print $_,"\n"; } close STDOUT; Loading
Configure +3 −2 Original line number Diff line number Diff line Loading @@ -135,8 +135,8 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a my $mips3_asm=":bn-mips3.o:::::::::::::void"; my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o::::::void"; my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes_ctr.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o::::::::void"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::64"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64"; my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::"; my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o:::::::"; my $no_asm="::::::::::::::void"; Loading Loading @@ -292,6 +292,7 @@ my %table=( # Since there is mention of this in shlib/hpux10-cc.sh "hpux-parisc-cc-o4","cc:-Ae +O4 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1::pa-risc2.o:::::::::::::void:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64", Loading
TABLE +34 −2 Original line number Diff line number Diff line Loading @@ -2933,7 +2933,7 @@ $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = $modes_obj = $modes_obj = ghash-parisc.o $perlasm_scheme = 32 $dso_scheme = dl $shared_target= hpux-shared Loading @@ -2944,6 +2944,38 @@ $ranlib = $arflags = $multilib = /pa1.1 *** hpux-parisc1_1-gcc $cc = gcc $cflags = -O3 -DB_ENDIAN -DBN_DIV2W $unistd = $thread_cflag = -D_REENTRANT $sys_id = $lflags = -Wl,+s -ldld $bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1 $cpuid_obj = pariscid.o $bn_obj = bn_asm.o parisc-mont.o $des_obj = $aes_obj = aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o $bf_obj = $md5_obj = $sha1_obj = sha1-parisc.o sha256-parisc.o sha512-parisc.o $cast_obj = $rc4_obj = rc4-parisc.o $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = $modes_obj = ghash-parisc.o $perlasm_scheme = 32 $dso_scheme = dl $shared_target= hpux-shared $shared_cflag = -fPIC $shared_ldflag = -shared $shared_extension = .sl.$(SHLIB_MAJOR).$(SHLIB_MINOR) $ranlib = $arflags = $multilib = *** hpux-parisc2-cc $cc = cc $cflags = +DA2.0 +DS2.0 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY -D_REENTRANT Loading Loading @@ -3093,7 +3125,7 @@ $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = $modes_obj = $modes_obj = ghash-parisc.o $perlasm_scheme = 64 $dso_scheme = dlfcn $shared_target= hpux-shared Loading
crypto/modes/Makefile +3 −1 Original line number Diff line number Diff line Loading @@ -50,9 +50,11 @@ ghash-x86.s: asm/ghash-x86.pl ghash-x86_64.s: asm/ghash-x86_64.pl $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@ ghash-sparcv9.s: asm/ghash-sparcv9.pl $(PERL) asm/ghash-sparcv8.pl $(CFLAGS) > $@ $(PERL) asm/ghash-sparcv9.pl $(CFLAGS) > $@ ghash-alpha.s: asm/ghash-alpha.pl $(PERL) $< | $(CC) -E - | tee $@ > /dev/null ghash-parisc.s: asm/ghash-parisc.pl $($PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ # GNU make "catch all" ghash-%.s: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $(CFLAGS) > $@ Loading
crypto/modes/asm/ghash-parisc.pl 0 → 100644 +730 −0 Original line number Diff line number Diff line #!/usr/bin/env perl # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # # April 2010 # # The module implements "4-bit" GCM GHASH function and underlying # single multiplication operation in GF(2^128). "4-bit" means that it # uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC # it processes one byte in 19 cycles, which is more than twice as fast # as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for 8 # cycles, but measured performance on PA-8600 system is ~9 cycles per # processed byte. This is ~2.2x faster than 64-bit code generated by # vendor compiler (which used to be very hard to beat:-). # # Special thanks to polarhome.com for providing HP-UX account. $flavour = shift; $output = shift; open STDOUT,">$output"; if ($flavour =~ /64/) { $LEVEL ="2.0W"; $SIZE_T =8; $FRAME_MARKER =80; $SAVED_RP =16; $PUSH ="std"; $PUSHMA ="std,ma"; $POP ="ldd"; $POPMB ="ldd,mb"; $NREGS =6; } else { $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0"; $SIZE_T =4; $FRAME_MARKER =48; $SAVED_RP =20; $PUSH ="stw"; $PUSHMA ="stwm"; $POP ="ldw"; $POPMB ="ldwm"; $NREGS =11; } $FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker # [+ argument transfer] ################# volatile registers $Xi="%r26"; # argument block $Htbl="%r25"; $inp="%r24"; $len="%r23"; $Hhh=$Htbl; # variables $Hll="%r22"; $Zhh="%r21"; $Zll="%r20"; $cnt="%r19"; $rem_4bit="%r28"; $rem="%r29"; $mask0xf0="%r31"; ################# preserved registers $Thh="%r1"; $Tll="%r2"; $nlo="%r3"; $nhi="%r4"; $byte="%r5"; if ($SIZE_T==4) { $Zhl="%r6"; $Zlh="%r7"; $Hhl="%r8"; $Hlh="%r9"; $Thl="%r10"; $Tlh="%r11"; } $rem2="%r6"; # used in PA-RISC 2.0 code $code.=<<___; .LEVEL $LEVEL .SPACE \$TEXT\$ .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR .ALIGN 64 gcm_gmult_4bit .PROC .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS .ENTRY $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue $PUSHMA %r3,$FRAME(%sp) $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) ___ $code.=<<___ if ($SIZE_T==4); $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) ___ $code.=<<___; blr %r0,$rem_4bit ldi 3,$rem L\$pic_gmult andcm $rem_4bit,$rem,$rem_4bit addl $inp,$len,$len ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit ldi 0xf0,$mask0xf0 ___ $code.=<<___ if ($SIZE_T==4); ldi 31,$rem mtctl $rem,%cr11 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 b L\$parisc1_gmult nop ___ $code.=<<___; ldb 15($Xi),$nlo ldo 8($Htbl),$Hll and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo ldd $nlo($Hll),$Zll ldd $nlo($Hhh),$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldb 14($Xi),$nlo ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem b L\$oop_gmult_pa2 ldi 13,$cnt .ALIGN 8 L\$oop_gmult_pa2 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem ldbx $cnt($Xi),$nlo shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo ldd $rem($rem_4bit),$rem xor $Tll,$Zll,$Zll addib,uv -1,$cnt,L\$oop_gmult_pa2 xor $Thh,$Zhh,$Zhh xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh std $Zll,8($Xi) std $Zhh,0($Xi) ___ $code.=<<___ if ($SIZE_T==4); b L\$done_gmult nop L\$parisc1_gmult ldb 15($Xi),$nlo ldo 12($Htbl),$Hll ldo 8($Htbl),$Hlh ldo 4($Htbl),$Hhl and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo ldwx $nlo($Hll),$Zll ldwx $nlo($Hlh),$Zlh ldwx $nlo($Hhl),$Zhl ldwx $nlo($Hhh),$Zhh zdep $Zll,28,4,$rem ldb 14($Xi),$nlo ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhl,$Zlh,4,$Zlh ldwx $nhi($Hlh),$Tlh shrpw $Zhh,$Zhl,4,$Zhl ldwx $nhi($Hhl),$Thl extru $Zhh,27,28,$Zhh ldwx $nhi($Hhh),$Thh xor $rem,$Zhh,$Zhh and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $Thl,$Zhl,$Zhl b L\$oop_gmult_pa1 ldi 13,$cnt .ALIGN 8 L\$oop_gmult_pa1 zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh ldbx $cnt($Xi),$nlo xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $rem,$Zhh,$Zhh zdep $Zll,28,4,$rem xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh shrpw $Zlh,$Zll,4,$Zll ldwx $rem($rem_4bit),$rem shrpw $Zhl,$Zlh,4,$Zlh shrpw $Zhh,$Zhl,4,$Zhl and $mask0xf0,$nlo,$nhi extru $Zhh,27,28,$Zhh zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $rem,$Zhh,$Zhh addib,uv -1,$cnt,L\$oop_gmult_pa1 xor $Thl,$Zhl,$Zhl zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $rem,$Zhh,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh zdep $Zll,28,4,$rem ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll shrpw $Zhl,$Zlh,4,$Zlh shrpw $Zhh,$Zhl,4,$Zhl extru $Zhh,27,28,$Zhh xor $Tll,$Zll,$Zll xor $Tlh,$Zlh,$Zlh xor $rem,$Zhh,$Zhh stw $Zll,12($Xi) xor $Thl,$Zhl,$Zhl stw $Zlh,8($Xi) xor $Thh,$Zhh,$Zhh stw $Zhl,4($Xi) stw $Zhh,0($Xi) ___ $code.=<<___; L\$done_gmult $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 ___ $code.=<<___ if ($SIZE_T==4); $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 ___ $code.=<<___; bv (%r2) .EXIT $POPMB -$FRAME(%sp),%r3 .PROCEND .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR .ALIGN 64 gcm_ghash_4bit .PROC .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11 .ENTRY $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue $PUSHMA %r3,$FRAME(%sp) $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) ___ $code.=<<___ if ($SIZE_T==4); $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) ___ $code.=<<___; blr %r0,$rem_4bit ldi 3,$rem L\$pic_ghash andcm $rem_4bit,$rem,$rem_4bit addl $inp,$len,$len ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit ldi 0xf0,$mask0xf0 ___ $code.=<<___ if ($SIZE_T==4); ldi 31,$rem mtctl $rem,%cr11 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 b L\$parisc1_ghash nop ___ $code.=<<___; ldb 15($Xi),$nlo ldo 8($Htbl),$Hll L\$outer_ghash_pa2 ldb 15($inp),$nhi xor $nhi,$nlo,$nlo and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo ldd $nlo($Hll),$Zll ldd $nlo($Hhh),$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldb 14($Xi),$nlo ldb 14($inp),$byte ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh xor $byte,$nlo,$nlo and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem b L\$oop_ghash_pa2 ldi 13,$cnt .ALIGN 8 L\$oop_ghash_pa2 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug depd,z $Zll,60,4,$rem2 shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldbx $cnt($Xi),$nlo ldbx $cnt($inp),$byte depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll ldd $rem2($rem_4bit),$rem2 xor $rem2,$Zhh,$Zhh xor $byte,$nlo,$nlo ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh and $mask0xf0,$nlo,$nhi depd,z $nlo,59,4,$nlo extrd,u $Zhh,59,60,$Zhh xor $Tll,$Zll,$Zll ldd $rem($rem_4bit),$rem addib,uv -1,$cnt,L\$oop_ghash_pa2 xor $Thh,$Zhh,$Zhh xor $rem,$Zhh,$Zhh depd,z $Zll,60,4,$rem2 shrpd $Zhh,$Zll,4,$Zll extrd,u $Zhh,59,60,$Zhh ldd $nlo($Hll),$Tll ldd $nlo($Hhh),$Thh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh depd,z $Zll,60,4,$rem shrpd $Zhh,$Zll,4,$Zll ldd $rem2($rem_4bit),$rem2 xor $rem2,$Zhh,$Zhh ldd $nhi($Hll),$Tll ldd $nhi($Hhh),$Thh extrd,u $Zhh,59,60,$Zhh xor $Tll,$Zll,$Zll xor $Thh,$Zhh,$Zhh ldd $rem($rem_4bit),$rem xor $rem,$Zhh,$Zhh std $Zll,8($Xi) ldo 16($inp),$inp std $Zhh,0($Xi) cmpb,*<> $inp,$len,L\$outer_ghash_pa2 copy $Zll,$nlo ___ $code.=<<___ if ($SIZE_T==4); b L\$done_ghash nop L\$parisc1_ghash ldb 15($Xi),$nlo ldo 12($Htbl),$Hll ldo 8($Htbl),$Hlh ldo 4($Htbl),$Hhl L\$outer_ghash_pa1 ldb 15($inp),$byte xor $byte,$nlo,$nlo and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo ldwx $nlo($Hll),$Zll ldwx $nlo($Hlh),$Zlh ldwx $nlo($Hhl),$Zhl ldwx $nlo($Hhh),$Zhh zdep $Zll,28,4,$rem ldb 14($Xi),$nlo ldb 14($inp),$byte ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhl,$Zlh,4,$Zlh ldwx $nhi($Hlh),$Tlh shrpw $Zhh,$Zhl,4,$Zhl ldwx $nhi($Hhl),$Thl extru $Zhh,27,28,$Zhh ldwx $nhi($Hhh),$Thh xor $byte,$nlo,$nlo xor $rem,$Zhh,$Zhh and $mask0xf0,$nlo,$nhi zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $Thl,$Zhl,$Zhl b L\$oop_ghash_pa1 ldi 13,$cnt .ALIGN 8 L\$oop_ghash_pa1 zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh ldbx $cnt($Xi),$nlo xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl ldbx $cnt($inp),$byte xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $rem,$Zhh,$Zhh zdep $Zll,28,4,$rem xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh shrpw $Zlh,$Zll,4,$Zll ldwx $rem($rem_4bit),$rem shrpw $Zhl,$Zlh,4,$Zlh xor $byte,$nlo,$nlo shrpw $Zhh,$Zhl,4,$Zhl and $mask0xf0,$nlo,$nhi extru $Zhh,27,28,$Zhh zdep $nlo,27,4,$nlo xor $Tll,$Zll,$Zll ldwx $nlo($Hll),$Tll xor $Tlh,$Zlh,$Zlh ldwx $nlo($Hlh),$Tlh xor $rem,$Zhh,$Zhh addib,uv -1,$cnt,L\$oop_ghash_pa1 xor $Thl,$Zhl,$Zhl zdep $Zll,28,4,$rem ldwx $nlo($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll ldwx $nlo($Hhh),$Thh shrpw $Zhl,$Zlh,4,$Zlh xor $Tll,$Zll,$Zll ldwx $nhi($Hll),$Tll shrpw $Zhh,$Zhl,4,$Zhl xor $Tlh,$Zlh,$Zlh ldwx $nhi($Hlh),$Tlh extru $Zhh,27,28,$Zhh xor $rem,$Zhh,$Zhh xor $Thl,$Zhl,$Zhl ldwx $nhi($Hhl),$Thl xor $Thh,$Zhh,$Zhh ldwx $nhi($Hhh),$Thh zdep $Zll,28,4,$rem ldwx $rem($rem_4bit),$rem shrpw $Zlh,$Zll,4,$Zll shrpw $Zhl,$Zlh,4,$Zlh shrpw $Zhh,$Zhl,4,$Zhl extru $Zhh,27,28,$Zhh xor $Tll,$Zll,$Zll xor $Tlh,$Zlh,$Zlh xor $rem,$Zhh,$Zhh stw $Zll,12($Xi) xor $Thl,$Zhl,$Zhl stw $Zlh,8($Xi) xor $Thh,$Zhh,$Zhh stw $Zhl,4($Xi) ldo 16($inp),$inp stw $Zhh,0($Xi) comb,<> $inp,$len,L\$outer_ghash_pa1 copy $Zll,$nlo ___ $code.=<<___; L\$done_ghash $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 ___ $code.=<<___ if ($SIZE_T==4); $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 ___ $code.=<<___; bv (%r2) .EXIT $POPMB -$FRAME(%sp),%r3 .PROCEND .ALIGN 64 L\$rem_4bit .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>" .ALIGN 64 ___ # Explicitly encode PA-RISC 2.0 instructions used in this module, so # that it can be compiled with .LEVEL 1.0. It should be noted that I # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 # directive... my $ldd = sub { my ($mod,$args) = @_; my $orig = "ldd$mod\t$args"; if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset $opcode|=(1<<5) if ($mod =~ /^,m/); $opcode|=(1<<13) if ($mod =~ /^,mb/); sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; my $std = sub { my ($mod,$args) = @_; my $orig = "std$mod\t$args"; if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; my $extrd = sub { my ($mod,$args) = @_; my $orig = "extrd$mod\t$args"; # I only have ",u" completer, it's implicitly encoded... if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); my $len=32-$3; $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); my $len=32-$2; $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len $opcode |= (1<<13) if ($mod =~ /,\**=/); sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; my $shrpd = sub { my ($mod,$args) = @_; my $orig = "shrpd$mod\t$args"; if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; my $cpos=63-$3; $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 { sprintf "\t.WORD\t0x%08x\t; %s", (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; } else { "\t".$orig; } }; my $depd = sub { my ($mod,$args) = @_; my $orig = "depd$mod\t$args"; # I only have ",z" completer, it's impicitly encoded... if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); my $cpos=63-$2; my $len=32-$3; $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; } else { "\t".$orig; } }; sub assemble { my ($mnemonic,$mod,$args)=@_; my $opcode = eval("\$$mnemonic"); ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; } foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/ge; if ($SIZE_T==4) { s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e; s/cmpb,\*/comb,/; s/,\*/,/; } print $_,"\n"; } close STDOUT;