Loading Configure +9 −9 Original line number Diff line number Diff line Loading @@ -314,7 +314,7 @@ my %table=( # *-generic* is endian-neutral target, but ./config is free to # throw in -D[BL]_ENDIAN, whichever appropriate... "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::sha1-ppc_linux32.o::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### IA-32 targets... "linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", Loading @@ -322,7 +322,7 @@ my %table=( #### "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # -bpowerpc64-linux is transient option, -m64 should be the one to use... "linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::sha1-ppc_linux64.o::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", Loading Loading @@ -407,12 +407,12 @@ my %table=( #### IBM's AIX. "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::", "aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:", "aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn::::::-X64", "aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:", "aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn::::::-X64", # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE # at build time. $OBJECT_MODE is respected at ./config stage! "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", "aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", "aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", # # Cray T90 and similar (SDSC) Loading Loading @@ -504,10 +504,10 @@ my %table=( ##### MacOS X (a.k.a. Rhapsody or Darwin) setup "rhapsody-ppc-cc","cc:-O3 -DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}::", "darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::sha1-ppc_osx64.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin-i386-cc","cc:-O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", ##### A/UX "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::", Loading crypto/md32_common.h +2 −1 Original line number Diff line number Diff line Loading @@ -206,7 +206,8 @@ : "cc"); \ ret; \ }) # elif defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) # elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) # define ROTATE(a,n) ({ register unsigned int ret; \ asm ( \ "rlwinm %0,%1,%2,0,31" \ Loading crypto/sha/Makefile +5 −0 Original line number Diff line number Diff line Loading @@ -71,6 +71,11 @@ sha256-x86_64.s: asm/sha512-x86_64.pl sha512-x86_64.s: asm/sha512-x86_64.pl $(PERL) asm/sha512-x86_64.pl $@ sha1-ppc_aix32.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@ sha1-ppc_aix64.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@ # non-AIX targets are believed to be armed with GNU make sha1-ppc_%.s: asm/sha1-ppc.pl; $(PERL) $< $@ files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO Loading crypto/sha/asm/sha1-ppc.pl 0 → 100755 +309 −0 Original line number Diff line number Diff line #!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # project. Rights for redistribution and usage in source and binary # forms are granted according to the OpenSSL license. # ==================================================================== # I let hardware handle unaligned input, except on page boundaries # (see below for details). Otherwise straightforward implementation # with X vector in register bank. The module is big-endian [which is # not big deal as there're no little-endian targets left around]. # gcc-4.0.0 -m64 -m32 # -------------------------- # sha1 +76% +59% $output = shift; if ($output =~ /64\.s/) { $SIZE_T =8; $RZONE =288; $UCMP ="cmpld"; $STU ="stdu"; $POP ="ld"; $PUSH ="std"; } elsif ($output =~ /32\.s/) { $SIZE_T =4; $RZONE =224; $UCMP ="cmplw"; $STU ="stwu"; $POP ="lwz"; $PUSH ="stw"; } else { die "nonsense $output"; } ( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) || die "can't call ../perlasm/ppc-xlate.pl: $!"; $FRAME=24*$SIZE_T; $K ="r0"; $sp ="r1"; $toc="r2"; $ctx="r3"; $inp="r4"; $num="r5"; $t0 ="r15"; $t1 ="r6"; $A ="r7"; $B ="r8"; $C ="r9"; $D ="r10"; $E ="r11"; $T ="r12"; @V=($A,$B,$C,$D,$E,$T); @X=("r16","r17","r18","r19","r20","r21","r22","r23", "r24","r25","r26","r27","r28","r29","r30","r31"); sub BODY_00_19 { my ($i,$a,$b,$c,$d,$e,$f)=@_; my $j=$i+1; $code.=<<___ if ($i==0); lwz @X[$i],$i*4($inp) ___ $code.=<<___ if ($i<15); lwz @X[$j],$j*4($inp) add $f,$K,$e rotlwi $e,$a,5 add $f,$f,@X[$i] and $t0,$c,$b add $f,$f,$e andc $t1,$d,$b rotlwi $b,$b,30 or $t0,$t0,$t1 add $f,$f,$t0 ___ $code.=<<___ if ($i>=15); add $f,$K,$e rotlwi $e,$a,5 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] add $f,$f,@X[$i%16] and $t0,$c,$b xor @X[$j%16],@X[$j%16],@X[($j+8)%16] add $f,$f,$e andc $t1,$d,$b rotlwi $b,$b,30 or $t0,$t0,$t1 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] add $f,$f,$t0 rotlwi @X[$j%16],@X[$j%16],1 ___ } sub BODY_20_39 { my ($i,$a,$b,$c,$d,$e,$f)=@_; my $j=$i+1; $code.=<<___ if ($i<79); add $f,$K,$e rotlwi $e,$a,5 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] add $f,$f,@X[$i%16] xor $t0,$b,$c xor @X[$j%16],@X[$j%16],@X[($j+8)%16] add $f,$f,$e rotlwi $b,$b,30 xor $t0,$t0,$d xor @X[$j%16],@X[$j%16],@X[($j+13)%16] add $f,$f,$t0 rotlwi @X[$j%16],@X[$j%16],1 ___ $code.=<<___ if ($i==79); add $f,$K,$e rotlwi $e,$a,5 lwz r16,0($ctx) add $f,$f,@X[$i%16] xor $t0,$b,$c lwz r17,4($ctx) add $f,$f,$e rotlwi $b,$b,30 lwz r18,8($ctx) xor $t0,$t0,$d lwz r19,12($ctx) add $f,$f,$t0 lwz r20,16($ctx) ___ } sub BODY_40_59 { my ($i,$a,$b,$c,$d,$e,$f)=@_; my $j=$i+1; $code.=<<___; add $f,$K,$e rotlwi $e,$a,5 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] add $f,$f,@X[$i%16] and $t0,$b,$c xor @X[$j%16],@X[$j%16],@X[($j+8)%16] add $f,$f,$e or $t1,$b,$c rotlwi $b,$b,30 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] and $t1,$t1,$d or $t0,$t0,$t1 rotlwi @X[$j%16],@X[$j%16],1 add $f,$f,$t0 ___ } $code=<<___; .text .globl .sha1_block_asm_data_order .align 4 .sha1_block_asm_data_order: mflr r0 $STU $sp,`-($FRAME+64+$RZONE)`($sp) $PUSH r0,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r17,`$FRAME-$SIZE_T*15`($sp) $PUSH r18,`$FRAME-$SIZE_T*14`($sp) $PUSH r19,`$FRAME-$SIZE_T*13`($sp) $PUSH r20,`$FRAME-$SIZE_T*12`($sp) $PUSH r21,`$FRAME-$SIZE_T*11`($sp) $PUSH r22,`$FRAME-$SIZE_T*10`($sp) $PUSH r23,`$FRAME-$SIZE_T*9`($sp) $PUSH r24,`$FRAME-$SIZE_T*8`($sp) $PUSH r25,`$FRAME-$SIZE_T*7`($sp) $PUSH r26,`$FRAME-$SIZE_T*6`($sp) $PUSH r27,`$FRAME-$SIZE_T*5`($sp) $PUSH r28,`$FRAME-$SIZE_T*4`($sp) $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) lwz $A,0($ctx) lwz $B,4($ctx) lwz $C,8($ctx) lwz $D,12($ctx) lwz $E,16($ctx) andi. r0,$inp,3 bne Lunaligned Laligned: mtctr $num bl Lsha1_block_private Ldone: $POP r0,`$FRAME-$SIZE_T*18`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp) $POP r17,`$FRAME-$SIZE_T*15`($sp) $POP r18,`$FRAME-$SIZE_T*14`($sp) $POP r19,`$FRAME-$SIZE_T*13`($sp) $POP r20,`$FRAME-$SIZE_T*12`($sp) $POP r21,`$FRAME-$SIZE_T*11`($sp) $POP r22,`$FRAME-$SIZE_T*10`($sp) $POP r23,`$FRAME-$SIZE_T*9`($sp) $POP r24,`$FRAME-$SIZE_T*8`($sp) $POP r25,`$FRAME-$SIZE_T*7`($sp) $POP r26,`$FRAME-$SIZE_T*6`($sp) $POP r27,`$FRAME-$SIZE_T*5`($sp) $POP r28,`$FRAME-$SIZE_T*4`($sp) $POP r29,`$FRAME-$SIZE_T*3`($sp) $POP r30,`$FRAME-$SIZE_T*2`($sp) $POP r31,`$FRAME-$SIZE_T*1`($sp) mtlr r0 addi $sp,$sp,`$FRAME+64+$RZONE` blr ___ # PowerPC specification allows an implementation to be ill-behaved # upon unaligned access which crosses page boundary. "Better safe # than sorry" principle makes me treat it specially. But I don't # look for particular offending word, but rather for 64-byte input # block which crosses the boundary. Once found that block is aligned # and hashed separately... $code.=<<___; .align 4 Lunaligned: li $t1,4096 subf $t1,$inp,$t1 andi. $t1,$t1,4095 ; distance to closest page boundary srwi. $t1,$t1,6 ; t1/=64 beq Lcross_page $UCMP $num,$t1 ble- Laligned ; didn't cross the page boundary mtctr $t1 subf $num,$t1,$num bl Lsha1_block_private Lcross_page: li $t1,16 mtctr $t1 addi r20,$sp,$FRAME ; spot below the frame Lmemcpy: lbz r16,0($inp) lbz r17,1($inp) lbz r18,2($inp) lbz r19,3($inp) addi $inp,$inp,4 stb r16,0(r20) stb r17,1(r20) stb r18,2(r20) stb r19,3(r20) addi r20,r20,4 bdnz Lmemcpy $PUSH $inp,`$FRAME-$SIZE_T*19`($sp) li $t1,1 addi $inp,$sp,$FRAME mtctr $t1 bl Lsha1_block_private $POP $inp,`$FRAME-$SIZE_T*19`($sp) addic. $num,$num,-1 bne- Lunaligned b Ldone ___ # This is private block function, which uses tailored calling # interface, namely upon entry SHA_CTX is pre-loaded to given # registers and counter register contains amount of chunks to # digest... $code.=<<___; .align 4 Lsha1_block_private: ___ $code.=<<___; # load K_00_19 lis $K,0x5a82 ori $K,$K,0x7999 ___ for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } $code.=<<___; # load K_20_39 lis $K,0x6ed9 ori $K,$K,0xeba1 ___ for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; # load K_40_59 lis $K,0x8f1b ori $K,$K,0xbcdc ___ for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } $code.=<<___; # load K_60_79 lis $K,0xca62 ori $K,$K,0xc1d6 ___ for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; add r16,r16,$E add r17,r17,$T add r18,r18,$A add r19,r19,$B add r20,r20,$C stw r16,0($ctx) mr $A,r16 stw r17,4($ctx) mr $B,r17 stw r18,8($ctx) mr $C,r18 stw r19,12($ctx) mr $D,r19 stw r20,16($ctx) mr $E,r20 addi $inp,$inp,`16*4` bdnz- Lsha1_block_private blr ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; close STDOUT; crypto/sha/sha512.c +1 −1 Original line number Diff line number Diff line Loading @@ -330,7 +330,7 @@ static const SHA_LONG64 K512[80] = { : "0"(p[1]),"1"(p[0])); \ ((SHA_LONG64)hi)<<32|lo; }) # endif # elif defined(_ARCH_PPC) && defined(__64BIT__) # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64) # define ROTR(a,n) ({ unsigned long ret; \ asm ("rotrdi %0,%1,%2" \ : "=r"(ret) \ Loading Loading
Configure +9 −9 Original line number Diff line number Diff line Loading @@ -314,7 +314,7 @@ my %table=( # *-generic* is endian-neutral target, but ./config is free to # throw in -D[BL]_ENDIAN, whichever appropriate... "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linix_ppc32-mont.o:::::sha1-ppc_linux32.o::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### IA-32 targets... "linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", Loading @@ -322,7 +322,7 @@ my %table=( #### "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # -bpowerpc64-linux is transient option, -m64 should be the one to use... "linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::sha1-ppc_linux64.o::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", Loading Loading @@ -407,12 +407,12 @@ my %table=( #### IBM's AIX. "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::", "aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:", "aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn::::::-X64", "aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:", "aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn::::::-X64", # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE # at build time. $OBJECT_MODE is respected at ./config stage! "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", "aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", "aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", # # Cray T90 and similar (SDSC) Loading Loading @@ -504,10 +504,10 @@ my %table=( ##### MacOS X (a.k.a. Rhapsody or Darwin) setup "rhapsody-ppc-cc","cc:-O3 -DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}::", "darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::sha1-ppc_osx64.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin-i386-cc","cc:-O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", ##### A/UX "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::", Loading
crypto/md32_common.h +2 −1 Original line number Diff line number Diff line Loading @@ -206,7 +206,8 @@ : "cc"); \ ret; \ }) # elif defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) # elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__) # define ROTATE(a,n) ({ register unsigned int ret; \ asm ( \ "rlwinm %0,%1,%2,0,31" \ Loading
crypto/sha/Makefile +5 −0 Original line number Diff line number Diff line Loading @@ -71,6 +71,11 @@ sha256-x86_64.s: asm/sha512-x86_64.pl sha512-x86_64.s: asm/sha512-x86_64.pl $(PERL) asm/sha512-x86_64.pl $@ sha1-ppc_aix32.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@ sha1-ppc_aix64.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@ # non-AIX targets are believed to be armed with GNU make sha1-ppc_%.s: asm/sha1-ppc.pl; $(PERL) $< $@ files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO Loading
crypto/sha/asm/sha1-ppc.pl 0 → 100755 +309 −0 Original line number Diff line number Diff line #!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # project. Rights for redistribution and usage in source and binary # forms are granted according to the OpenSSL license. # ==================================================================== # I let hardware handle unaligned input, except on page boundaries # (see below for details). Otherwise straightforward implementation # with X vector in register bank. The module is big-endian [which is # not big deal as there're no little-endian targets left around]. # gcc-4.0.0 -m64 -m32 # -------------------------- # sha1 +76% +59% $output = shift; if ($output =~ /64\.s/) { $SIZE_T =8; $RZONE =288; $UCMP ="cmpld"; $STU ="stdu"; $POP ="ld"; $PUSH ="std"; } elsif ($output =~ /32\.s/) { $SIZE_T =4; $RZONE =224; $UCMP ="cmplw"; $STU ="stwu"; $POP ="lwz"; $PUSH ="stw"; } else { die "nonsense $output"; } ( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) || die "can't call ../perlasm/ppc-xlate.pl: $!"; $FRAME=24*$SIZE_T; $K ="r0"; $sp ="r1"; $toc="r2"; $ctx="r3"; $inp="r4"; $num="r5"; $t0 ="r15"; $t1 ="r6"; $A ="r7"; $B ="r8"; $C ="r9"; $D ="r10"; $E ="r11"; $T ="r12"; @V=($A,$B,$C,$D,$E,$T); @X=("r16","r17","r18","r19","r20","r21","r22","r23", "r24","r25","r26","r27","r28","r29","r30","r31"); sub BODY_00_19 { my ($i,$a,$b,$c,$d,$e,$f)=@_; my $j=$i+1; $code.=<<___ if ($i==0); lwz @X[$i],$i*4($inp) ___ $code.=<<___ if ($i<15); lwz @X[$j],$j*4($inp) add $f,$K,$e rotlwi $e,$a,5 add $f,$f,@X[$i] and $t0,$c,$b add $f,$f,$e andc $t1,$d,$b rotlwi $b,$b,30 or $t0,$t0,$t1 add $f,$f,$t0 ___ $code.=<<___ if ($i>=15); add $f,$K,$e rotlwi $e,$a,5 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] add $f,$f,@X[$i%16] and $t0,$c,$b xor @X[$j%16],@X[$j%16],@X[($j+8)%16] add $f,$f,$e andc $t1,$d,$b rotlwi $b,$b,30 or $t0,$t0,$t1 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] add $f,$f,$t0 rotlwi @X[$j%16],@X[$j%16],1 ___ } sub BODY_20_39 { my ($i,$a,$b,$c,$d,$e,$f)=@_; my $j=$i+1; $code.=<<___ if ($i<79); add $f,$K,$e rotlwi $e,$a,5 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] add $f,$f,@X[$i%16] xor $t0,$b,$c xor @X[$j%16],@X[$j%16],@X[($j+8)%16] add $f,$f,$e rotlwi $b,$b,30 xor $t0,$t0,$d xor @X[$j%16],@X[$j%16],@X[($j+13)%16] add $f,$f,$t0 rotlwi @X[$j%16],@X[$j%16],1 ___ $code.=<<___ if ($i==79); add $f,$K,$e rotlwi $e,$a,5 lwz r16,0($ctx) add $f,$f,@X[$i%16] xor $t0,$b,$c lwz r17,4($ctx) add $f,$f,$e rotlwi $b,$b,30 lwz r18,8($ctx) xor $t0,$t0,$d lwz r19,12($ctx) add $f,$f,$t0 lwz r20,16($ctx) ___ } sub BODY_40_59 { my ($i,$a,$b,$c,$d,$e,$f)=@_; my $j=$i+1; $code.=<<___; add $f,$K,$e rotlwi $e,$a,5 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] add $f,$f,@X[$i%16] and $t0,$b,$c xor @X[$j%16],@X[$j%16],@X[($j+8)%16] add $f,$f,$e or $t1,$b,$c rotlwi $b,$b,30 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] and $t1,$t1,$d or $t0,$t0,$t1 rotlwi @X[$j%16],@X[$j%16],1 add $f,$f,$t0 ___ } $code=<<___; .text .globl .sha1_block_asm_data_order .align 4 .sha1_block_asm_data_order: mflr r0 $STU $sp,`-($FRAME+64+$RZONE)`($sp) $PUSH r0,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r17,`$FRAME-$SIZE_T*15`($sp) $PUSH r18,`$FRAME-$SIZE_T*14`($sp) $PUSH r19,`$FRAME-$SIZE_T*13`($sp) $PUSH r20,`$FRAME-$SIZE_T*12`($sp) $PUSH r21,`$FRAME-$SIZE_T*11`($sp) $PUSH r22,`$FRAME-$SIZE_T*10`($sp) $PUSH r23,`$FRAME-$SIZE_T*9`($sp) $PUSH r24,`$FRAME-$SIZE_T*8`($sp) $PUSH r25,`$FRAME-$SIZE_T*7`($sp) $PUSH r26,`$FRAME-$SIZE_T*6`($sp) $PUSH r27,`$FRAME-$SIZE_T*5`($sp) $PUSH r28,`$FRAME-$SIZE_T*4`($sp) $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) lwz $A,0($ctx) lwz $B,4($ctx) lwz $C,8($ctx) lwz $D,12($ctx) lwz $E,16($ctx) andi. r0,$inp,3 bne Lunaligned Laligned: mtctr $num bl Lsha1_block_private Ldone: $POP r0,`$FRAME-$SIZE_T*18`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp) $POP r17,`$FRAME-$SIZE_T*15`($sp) $POP r18,`$FRAME-$SIZE_T*14`($sp) $POP r19,`$FRAME-$SIZE_T*13`($sp) $POP r20,`$FRAME-$SIZE_T*12`($sp) $POP r21,`$FRAME-$SIZE_T*11`($sp) $POP r22,`$FRAME-$SIZE_T*10`($sp) $POP r23,`$FRAME-$SIZE_T*9`($sp) $POP r24,`$FRAME-$SIZE_T*8`($sp) $POP r25,`$FRAME-$SIZE_T*7`($sp) $POP r26,`$FRAME-$SIZE_T*6`($sp) $POP r27,`$FRAME-$SIZE_T*5`($sp) $POP r28,`$FRAME-$SIZE_T*4`($sp) $POP r29,`$FRAME-$SIZE_T*3`($sp) $POP r30,`$FRAME-$SIZE_T*2`($sp) $POP r31,`$FRAME-$SIZE_T*1`($sp) mtlr r0 addi $sp,$sp,`$FRAME+64+$RZONE` blr ___ # PowerPC specification allows an implementation to be ill-behaved # upon unaligned access which crosses page boundary. "Better safe # than sorry" principle makes me treat it specially. But I don't # look for particular offending word, but rather for 64-byte input # block which crosses the boundary. Once found that block is aligned # and hashed separately... $code.=<<___; .align 4 Lunaligned: li $t1,4096 subf $t1,$inp,$t1 andi. $t1,$t1,4095 ; distance to closest page boundary srwi. $t1,$t1,6 ; t1/=64 beq Lcross_page $UCMP $num,$t1 ble- Laligned ; didn't cross the page boundary mtctr $t1 subf $num,$t1,$num bl Lsha1_block_private Lcross_page: li $t1,16 mtctr $t1 addi r20,$sp,$FRAME ; spot below the frame Lmemcpy: lbz r16,0($inp) lbz r17,1($inp) lbz r18,2($inp) lbz r19,3($inp) addi $inp,$inp,4 stb r16,0(r20) stb r17,1(r20) stb r18,2(r20) stb r19,3(r20) addi r20,r20,4 bdnz Lmemcpy $PUSH $inp,`$FRAME-$SIZE_T*19`($sp) li $t1,1 addi $inp,$sp,$FRAME mtctr $t1 bl Lsha1_block_private $POP $inp,`$FRAME-$SIZE_T*19`($sp) addic. $num,$num,-1 bne- Lunaligned b Ldone ___ # This is private block function, which uses tailored calling # interface, namely upon entry SHA_CTX is pre-loaded to given # registers and counter register contains amount of chunks to # digest... $code.=<<___; .align 4 Lsha1_block_private: ___ $code.=<<___; # load K_00_19 lis $K,0x5a82 ori $K,$K,0x7999 ___ for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } $code.=<<___; # load K_20_39 lis $K,0x6ed9 ori $K,$K,0xeba1 ___ for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; # load K_40_59 lis $K,0x8f1b ori $K,$K,0xbcdc ___ for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } $code.=<<___; # load K_60_79 lis $K,0xca62 ori $K,$K,0xc1d6 ___ for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; add r16,r16,$E add r17,r17,$T add r18,r18,$A add r19,r19,$B add r20,r20,$C stw r16,0($ctx) mr $A,r16 stw r17,4($ctx) mr $B,r17 stw r18,8($ctx) mr $C,r18 stw r19,12($ctx) mr $D,r19 stw r20,16($ctx) mr $E,r20 addi $inp,$inp,`16*4` bdnz- Lsha1_block_private blr ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; close STDOUT;
crypto/sha/sha512.c +1 −1 Original line number Diff line number Diff line Loading @@ -330,7 +330,7 @@ static const SHA_LONG64 K512[80] = { : "0"(p[1]),"1"(p[0])); \ ((SHA_LONG64)hi)<<32|lo; }) # endif # elif defined(_ARCH_PPC) && defined(__64BIT__) # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64) # define ROTR(a,n) ({ unsigned long ret; \ asm ("rotrdi %0,%1,%2" \ : "=r"(ret) \ Loading