Loading crypto/md5/asm/md5-586.pl +8 −9 Original line number Diff line number Diff line Loading @@ -56,14 +56,14 @@ sub R0 &lea($a,&DWP($t,$a,$tmp2,1)); &xor($tmp1,$d); # F function - part 4 &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &add($a,$tmp1); &mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0 &mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1 &rotl($a,$s); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0 &mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1 &add($a,$b); } Loading @@ -74,13 +74,12 @@ sub R1 &comment("R1 $ki"); &lea($a,&DWP($t,$a,$tmp2,1)); &xor($tmp1,$b); # G function - part 2 &and($tmp1,$d); # G function - part 3 &lea($a,&DWP($t,$a,$tmp2,1)); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &xor($tmp1,$c); # G function - part 4 &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &add($a,$tmp1); &mov($tmp1,&Np($c)) if $pos < 1; # G function - part 1 Loading Loading @@ -108,10 +107,10 @@ if (($n & 1) == 0) &lea($a,&DWP($t,$a,$tmp2,1)); &add($a,$tmp1); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)); &rotl($a,$s); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)); &mov($tmp1,&Np($c)); } else Loading @@ -120,11 +119,11 @@ else # make sure to do 'D' first, not 'B', else we clash with # the last add from the previous round. &lea($a,&DWP($t,$a,$tmp2,1)); &add($b,$c); # MOVED FORWARD &xor($tmp1,$d); # H function - part 2 &lea($a,&DWP($t,$a,$tmp2,1)); &xor($tmp1,$b); # H function - part 3 &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); Loading crypto/md5/asm/md5-x86_64.pl +12 −1 Original line number Diff line number Diff line Loading @@ -47,8 +47,8 @@ sub round2_step $code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); $code .= <<EOF; not %r11d /* not z */ lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ and $x, %r12d /* x & z */ lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ and $y, %r11d /* y & (not z) */ mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ or %r11d, %r12d /* (y & (not z)) | (x & z) */ Loading @@ -65,6 +65,7 @@ EOF # %r10d = X[k_next] # %r11d = y' (copy of y for the next step) # Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC) { my $round3_alter=0; sub round3_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; Loading @@ -75,10 +76,20 @@ sub round3_step mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ xor $x, %r11d /* x ^ ... */ add %r11d, $dst /* dst += ... */ EOF $code .= <<EOF if ($round3_alter); rol \$$s, $dst /* dst <<< s */ mov $x, %r11d /* (NEXT STEP) y' = $x */ EOF $code .= <<EOF if (!$round3_alter); mov $x, %r11d /* (NEXT STEP) y' = $x */ rol \$$s, $dst /* dst <<< s */ EOF $code .= <<EOF; add $x, $dst /* dst += x */ EOF $round3_alter^=1; } } # round4_step() does: Loading Loading
crypto/md5/asm/md5-586.pl +8 −9 Original line number Diff line number Diff line Loading @@ -56,14 +56,14 @@ sub R0 &lea($a,&DWP($t,$a,$tmp2,1)); &xor($tmp1,$d); # F function - part 4 &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &add($a,$tmp1); &mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0 &mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1 &rotl($a,$s); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0 &mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1 &add($a,$b); } Loading @@ -74,13 +74,12 @@ sub R1 &comment("R1 $ki"); &lea($a,&DWP($t,$a,$tmp2,1)); &xor($tmp1,$b); # G function - part 2 &and($tmp1,$d); # G function - part 3 &lea($a,&DWP($t,$a,$tmp2,1)); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &xor($tmp1,$c); # G function - part 4 &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); &add($a,$tmp1); &mov($tmp1,&Np($c)) if $pos < 1; # G function - part 1 Loading Loading @@ -108,10 +107,10 @@ if (($n & 1) == 0) &lea($a,&DWP($t,$a,$tmp2,1)); &add($a,$tmp1); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)); &rotl($a,$s); &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)); &mov($tmp1,&Np($c)); } else Loading @@ -120,11 +119,11 @@ else # make sure to do 'D' first, not 'B', else we clash with # the last add from the previous round. &lea($a,&DWP($t,$a,$tmp2,1)); &add($b,$c); # MOVED FORWARD &xor($tmp1,$d); # H function - part 2 &lea($a,&DWP($t,$a,$tmp2,1)); &xor($tmp1,$b); # H function - part 3 &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); Loading
crypto/md5/asm/md5-x86_64.pl +12 −1 Original line number Diff line number Diff line Loading @@ -47,8 +47,8 @@ sub round2_step $code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); $code .= <<EOF; not %r11d /* not z */ lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ and $x, %r12d /* x & z */ lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ and $y, %r11d /* y & (not z) */ mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ or %r11d, %r12d /* (y & (not z)) | (x & z) */ Loading @@ -65,6 +65,7 @@ EOF # %r10d = X[k_next] # %r11d = y' (copy of y for the next step) # Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC) { my $round3_alter=0; sub round3_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; Loading @@ -75,10 +76,20 @@ sub round3_step mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ xor $x, %r11d /* x ^ ... */ add %r11d, $dst /* dst += ... */ EOF $code .= <<EOF if ($round3_alter); rol \$$s, $dst /* dst <<< s */ mov $x, %r11d /* (NEXT STEP) y' = $x */ EOF $code .= <<EOF if (!$round3_alter); mov $x, %r11d /* (NEXT STEP) y' = $x */ rol \$$s, $dst /* dst <<< s */ EOF $code .= <<EOF; add $x, $dst /* dst += x */ EOF $round3_alter^=1; } } # round4_step() does: Loading