Loading crypto/aes/asm/aes-586.pl +9 −9 Original line number Diff line number Diff line Loading @@ -242,7 +242,7 @@ $vertical_spin=0; # shift "verticaly" defaults to 0, because of sub encvert() { my ($te,@s) = @_; my $v0 = $acc, $v1 = $key; my ($v0,$v1) = ($acc,$key); &mov ($v0,$s[3]); # copy s3 &mov (&DWP(4,"esp"),$s[2]); # save s2 Loading Loading @@ -299,7 +299,7 @@ sub encvert() # Another experimental routine, which features "horizontal spin," but # eliminates one reference to stack. Strangely enough runs slower... sub enchoriz() { my $v0 = $key, $v1 = $acc; { my ($v0,$v1) = ($key,$acc); &movz ($v0,&LB($s0)); # 3, 2, 1, 0* &rotr ($s2,8); # 8,11,10, 9 Loading Loading @@ -427,7 +427,7 @@ sub sse_encbody() ###################################################################### sub enccompact() { my $Fn = mov; { my $Fn = \&mov; while ($#_>5) { pop(@_); $Fn=sub{}; } my ($i,$te,@s)=@_; my $tmp = $key; Loading Loading @@ -489,7 +489,7 @@ sub enctransform() &xor ($s[$i],$acc); # r0 ^ r2 &rotl ($s[$i],24); &xor ($s[$i],$acc) # ROTATE(r2^r0,24) ^ r2 &xor ($s[$i],$acc); # ROTATE(r2^r0,24) ^ r2 &rotr ($tmp,16); &xor ($s[$i],$tmp); &rotr ($tmp,8); Loading Loading @@ -1222,7 +1222,7 @@ sub enclast() ###################################################################### sub deccompact() { my $Fn = mov; { my $Fn = \&mov; while ($#_>5) { pop(@_); $Fn=sub{}; } my ($i,$td,@s)=@_; my $tmp = $key; Loading Loading @@ -2182,7 +2182,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &xor ("eax","eax"); &align (4); &data_word(0xABF3F689); # rep stosd &set_label("skip_ezero") &set_label("skip_ezero"); &mov ("esp",$_esp); &popf (); &set_label("drop_out"); Loading Loading @@ -2302,7 +2302,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &xor ("eax","eax"); &align (4); &data_word(0xABF3F689); # rep stosd &set_label("skip_dzero") &set_label("skip_dzero"); &mov ("esp",$_esp); &popf (); &function_end_A(); Loading crypto/aes/asm/aesni-x86.pl +5 −5 Original line number Diff line number Diff line Loading @@ -54,8 +54,8 @@ require "x86asm.pl"; &asm_init($ARGV[0],$0); if ($PREFIX eq "aesni") { $movekey=*movups; } else { $movekey=*movups; } if ($PREFIX eq "aesni") { $movekey=\&movups; } else { $movekey=\&movups; } $len="eax"; $rounds="ecx"; Loading Loading @@ -1816,7 +1816,7 @@ if ($PREFIX eq "aesni") { &movups (&QWP(0x10,$out),$inout1); &lea ($inp,&DWP(0x60,$inp)); &movups (&QWP(0x20,$out),$inout2); &mov ($rounds,$rounds_) # restore $rounds &mov ($rounds,$rounds_); # restore $rounds &movups (&QWP(0x30,$out),$inout3); &mov ($key,$key_); # restore $key &movups (&QWP(0x40,$out),$inout4); Loading Loading @@ -2015,7 +2015,7 @@ if ($PREFIX eq "aesni") { &set_label("12rounds",16); &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey &mov ($rounds,11); &$movekey (&QWP(-16,$key),"xmm0") # round 0 &$movekey (&QWP(-16,$key),"xmm0"); # round 0 &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 &call (&label("key_192a_cold")); &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 Loading Loading @@ -2152,7 +2152,7 @@ if ($PREFIX eq "aesni") { &mov ($key,&wparam(2)); &call ("_aesni_set_encrypt_key"); &mov ($key,&wparam(2)); &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key &shl ($rounds,4); # rounds-1 after _aesni_set_encrypt_key &test ("eax","eax"); &jnz (&label("dec_key_ret")); &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule Loading crypto/camellia/asm/cmll-x86_64.pl +2 −2 Original line number Diff line number Diff line Loading @@ -71,7 +71,7 @@ my $i=@_[0]; my $seed=defined(@_[1])?@_[1]:0; my $scale=$seed<0?-8:8; my $j=($i&1)*2; my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4]; my ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]); $code.=<<___; xor $s0,$t0 # t0^=key[0] Loading Loading @@ -408,7 +408,7 @@ Camellia_Ekeygen: push %r15 .Lkey_prologue: mov %rdi,$keyend # put away arguments, keyBitLength mov %edi,${keyend}d # put away arguments, keyBitLength mov %rdx,$out # keyTable mov 0(%rsi),@S[0] # load 0-127 bits Loading crypto/modes/asm/ghash-x86.pl +3 −3 Original line number Diff line number Diff line Loading @@ -635,7 +635,7 @@ sub mmx_loop() { { my @lo = ("mm0","mm1","mm2"); my @hi = ("mm3","mm4","mm5"); my @tmp = ("mm6","mm7"); my $off1=0,$off2=0,$i; my ($off1,$off2,$i) = (0,0,); &add ($Htbl,128); # optimize for size &lea ("edi",&DWP(16+128,"esp")); Loading Loading @@ -883,7 +883,7 @@ sub reduction_alg9 { # 17/13 times faster than Intel version my ($Xhi,$Xi) = @_; # 1st phase &movdqa ($T1,$Xi) # &movdqa ($T1,$Xi); # &psllq ($Xi,1); &pxor ($Xi,$T1); # &psllq ($Xi,5); # Loading Loading @@ -1019,7 +1019,7 @@ my ($Xhi,$Xi) = @_; &movdqa ($Xhn,$Xn); &pxor ($Xhi,$T1); # "Ii+Xi", consume early &movdqa ($T1,$Xi) #&reduction_alg9($Xhi,$Xi); 1st phase &movdqa ($T1,$Xi); #&reduction_alg9($Xhi,$Xi); 1st phase &psllq ($Xi,1); &pxor ($Xi,$T1); # &psllq ($Xi,5); # Loading crypto/perlasm/cbc.pl +1 −1 Original line number Diff line number Diff line Loading @@ -150,7 +150,7 @@ sub cbc &set_label("PIC_point"); &blindpop("edx"); &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); &mov($count,&DWP(0,"ecx",$count,4)) &mov($count,&DWP(0,"ecx",$count,4)); &add($count,"edx"); &xor("ecx","ecx"); &xor("edx","edx"); Loading Loading
crypto/aes/asm/aes-586.pl +9 −9 Original line number Diff line number Diff line Loading @@ -242,7 +242,7 @@ $vertical_spin=0; # shift "verticaly" defaults to 0, because of sub encvert() { my ($te,@s) = @_; my $v0 = $acc, $v1 = $key; my ($v0,$v1) = ($acc,$key); &mov ($v0,$s[3]); # copy s3 &mov (&DWP(4,"esp"),$s[2]); # save s2 Loading Loading @@ -299,7 +299,7 @@ sub encvert() # Another experimental routine, which features "horizontal spin," but # eliminates one reference to stack. Strangely enough runs slower... sub enchoriz() { my $v0 = $key, $v1 = $acc; { my ($v0,$v1) = ($key,$acc); &movz ($v0,&LB($s0)); # 3, 2, 1, 0* &rotr ($s2,8); # 8,11,10, 9 Loading Loading @@ -427,7 +427,7 @@ sub sse_encbody() ###################################################################### sub enccompact() { my $Fn = mov; { my $Fn = \&mov; while ($#_>5) { pop(@_); $Fn=sub{}; } my ($i,$te,@s)=@_; my $tmp = $key; Loading Loading @@ -489,7 +489,7 @@ sub enctransform() &xor ($s[$i],$acc); # r0 ^ r2 &rotl ($s[$i],24); &xor ($s[$i],$acc) # ROTATE(r2^r0,24) ^ r2 &xor ($s[$i],$acc); # ROTATE(r2^r0,24) ^ r2 &rotr ($tmp,16); &xor ($s[$i],$tmp); &rotr ($tmp,8); Loading Loading @@ -1222,7 +1222,7 @@ sub enclast() ###################################################################### sub deccompact() { my $Fn = mov; { my $Fn = \&mov; while ($#_>5) { pop(@_); $Fn=sub{}; } my ($i,$td,@s)=@_; my $tmp = $key; Loading Loading @@ -2182,7 +2182,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &xor ("eax","eax"); &align (4); &data_word(0xABF3F689); # rep stosd &set_label("skip_ezero") &set_label("skip_ezero"); &mov ("esp",$_esp); &popf (); &set_label("drop_out"); Loading Loading @@ -2302,7 +2302,7 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds &xor ("eax","eax"); &align (4); &data_word(0xABF3F689); # rep stosd &set_label("skip_dzero") &set_label("skip_dzero"); &mov ("esp",$_esp); &popf (); &function_end_A(); Loading
crypto/aes/asm/aesni-x86.pl +5 −5 Original line number Diff line number Diff line Loading @@ -54,8 +54,8 @@ require "x86asm.pl"; &asm_init($ARGV[0],$0); if ($PREFIX eq "aesni") { $movekey=*movups; } else { $movekey=*movups; } if ($PREFIX eq "aesni") { $movekey=\&movups; } else { $movekey=\&movups; } $len="eax"; $rounds="ecx"; Loading Loading @@ -1816,7 +1816,7 @@ if ($PREFIX eq "aesni") { &movups (&QWP(0x10,$out),$inout1); &lea ($inp,&DWP(0x60,$inp)); &movups (&QWP(0x20,$out),$inout2); &mov ($rounds,$rounds_) # restore $rounds &mov ($rounds,$rounds_); # restore $rounds &movups (&QWP(0x30,$out),$inout3); &mov ($key,$key_); # restore $key &movups (&QWP(0x40,$out),$inout4); Loading Loading @@ -2015,7 +2015,7 @@ if ($PREFIX eq "aesni") { &set_label("12rounds",16); &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey &mov ($rounds,11); &$movekey (&QWP(-16,$key),"xmm0") # round 0 &$movekey (&QWP(-16,$key),"xmm0"); # round 0 &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 &call (&label("key_192a_cold")); &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 Loading Loading @@ -2152,7 +2152,7 @@ if ($PREFIX eq "aesni") { &mov ($key,&wparam(2)); &call ("_aesni_set_encrypt_key"); &mov ($key,&wparam(2)); &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key &shl ($rounds,4); # rounds-1 after _aesni_set_encrypt_key &test ("eax","eax"); &jnz (&label("dec_key_ret")); &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule Loading
crypto/camellia/asm/cmll-x86_64.pl +2 −2 Original line number Diff line number Diff line Loading @@ -71,7 +71,7 @@ my $i=@_[0]; my $seed=defined(@_[1])?@_[1]:0; my $scale=$seed<0?-8:8; my $j=($i&1)*2; my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4]; my ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]); $code.=<<___; xor $s0,$t0 # t0^=key[0] Loading Loading @@ -408,7 +408,7 @@ Camellia_Ekeygen: push %r15 .Lkey_prologue: mov %rdi,$keyend # put away arguments, keyBitLength mov %edi,${keyend}d # put away arguments, keyBitLength mov %rdx,$out # keyTable mov 0(%rsi),@S[0] # load 0-127 bits Loading
crypto/modes/asm/ghash-x86.pl +3 −3 Original line number Diff line number Diff line Loading @@ -635,7 +635,7 @@ sub mmx_loop() { { my @lo = ("mm0","mm1","mm2"); my @hi = ("mm3","mm4","mm5"); my @tmp = ("mm6","mm7"); my $off1=0,$off2=0,$i; my ($off1,$off2,$i) = (0,0,); &add ($Htbl,128); # optimize for size &lea ("edi",&DWP(16+128,"esp")); Loading Loading @@ -883,7 +883,7 @@ sub reduction_alg9 { # 17/13 times faster than Intel version my ($Xhi,$Xi) = @_; # 1st phase &movdqa ($T1,$Xi) # &movdqa ($T1,$Xi); # &psllq ($Xi,1); &pxor ($Xi,$T1); # &psllq ($Xi,5); # Loading Loading @@ -1019,7 +1019,7 @@ my ($Xhi,$Xi) = @_; &movdqa ($Xhn,$Xn); &pxor ($Xhi,$T1); # "Ii+Xi", consume early &movdqa ($T1,$Xi) #&reduction_alg9($Xhi,$Xi); 1st phase &movdqa ($T1,$Xi); #&reduction_alg9($Xhi,$Xi); 1st phase &psllq ($Xi,1); &pxor ($Xi,$T1); # &psllq ($Xi,5); # Loading
crypto/perlasm/cbc.pl +1 −1 Original line number Diff line number Diff line Loading @@ -150,7 +150,7 @@ sub cbc &set_label("PIC_point"); &blindpop("edx"); &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); &mov($count,&DWP(0,"ecx",$count,4)) &mov($count,&DWP(0,"ecx",$count,4)); &add($count,"edx"); &xor("ecx","ecx"); &xor("edx","edx"); Loading