Commit addb6e16 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

Throw in AES CBC assembler, up to +40% on aes-128-cbc benchmark.

parent 4d27c4c9
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -114,9 +114,9 @@ my $tlib="-lnsl -lsocket";
my $bits1="THIRTY_TWO_BIT ";
my $bits2="SIXTY_FOUR_BIT ";

my $x86_elf_asm="x86cpuid-elf.o:asm/bn86-elf.o asm/co86-elf.o:asm/dx86-elf.o asm/yx86-elf.o:aes_cbc.o asm/ax86-elf.o:asm/bx86-elf.o:asm/mx86-elf.o:asm/sx86-elf.o asm/s512sse2-elf.o:asm/cx86-elf.o:asm/rx86-elf.o:asm/rm86-elf.o:asm/r586-elf.o";
my $x86_coff_asm="x86cpuid-cof.o:asm/bn86-cof.o asm/co86-cof.o:asm/dx86-cof.o asm/yx86-cof.o:aes_cbc.o asm/ax86-cof.o:asm/bx86-cof.o:asm/mx86-cof.o:asm/sx86-cof.o asm/s512sse2-cof.o:asm/cx86-cof.o:asm/rx86-cof.o:asm/rm86-cof.o:asm/r586-cof.o";
my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o:dx86-out.o yx86-out.o:aes_cbc.o ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o s512sse2-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o";
my $x86_elf_asm="x86cpuid-elf.o:asm/bn86-elf.o asm/co86-elf.o:asm/dx86-elf.o asm/yx86-elf.o:asm/ax86-elf.o:asm/bx86-elf.o:asm/mx86-elf.o:asm/sx86-elf.o asm/s512sse2-elf.o:asm/cx86-elf.o:asm/rx86-elf.o:asm/rm86-elf.o:asm/r586-elf.o";
my $x86_coff_asm="x86cpuid-cof.o:asm/bn86-cof.o asm/co86-cof.o:asm/dx86-cof.o asm/yx86-cof.o:asm/ax86-cof.o:asm/bx86-cof.o:asm/mx86-cof.o:asm/sx86-cof.o asm/s512sse2-cof.o:asm/cx86-cof.o:asm/rx86-cof.o:asm/rm86-cof.o:asm/r586-cof.o";
my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o:dx86-out.o yx86-out.o:ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o s512sse2-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o";

my $ia64_asm=":asm/ia64.o::aes_core.o aes_cbc.o asm/aes-ia64.o:::asm/sha1-ia64.o asm/sha256-ia64.o asm/sha512-ia64.o::asm/rc4-ia64.o::";

+31 −31
Original line number Diff line number Diff line
@@ -92,7 +92,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-cof.o
$bn_obj       = asm/bn86-cof.o asm/co86-cof.o
$des_obj      = asm/dx86-cof.o asm/yx86-cof.o
$aes_obj      = aes_cbc.o asm/ax86-cof.o
$aes_obj      = asm/ax86-cof.o
$bf_obj       = asm/bx86-cof.o
$md5_obj      = asm/mx86-cof.o
$sha1_obj     = asm/sx86-cof.o asm/s512sse2-cof.o
@@ -146,7 +146,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-out.o
$bn_obj       = bn86-out.o co86-out.o
$des_obj      = dx86-out.o yx86-out.o
$aes_obj      = ase_cbc.o ax86-out.o
$aes_obj      = ax86-out.o
$bf_obj       = bx86-out.o
$md5_obj      = mx86-out.o
$sha1_obj     = sx86-out.o s512sse2-out.o
@@ -173,7 +173,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-out.o
$bn_obj       = bn86-out.o co86-out.o
$des_obj      = dx86-out.o yx86-out.o
$aes_obj      = ase_cbc.o ax86-out.o
$aes_obj      = ax86-out.o
$bf_obj       = bx86-out.o
$md5_obj      = mx86-out.o
$sha1_obj     = sx86-out.o s512sse2-out.o
@@ -227,7 +227,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -551,7 +551,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-out.o
$bn_obj       = bn86-out.o co86-out.o
$des_obj      = dx86-out.o yx86-out.o
$aes_obj      = ase_cbc.o ax86-out.o
$aes_obj      = ax86-out.o
$bf_obj       = bx86-out.o
$md5_obj      = mx86-out.o
$sha1_obj     = sx86-out.o s512sse2-out.o
@@ -767,7 +767,7 @@ $bn_ops = BN_LLONG MD2_CHAR RC4_INDEX DES_PTR DES_RISC1 DES_UNROLL
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -794,7 +794,7 @@ $bn_ops = BN_LLONG MD2_CHAR RC4_INDEX DES_PTR DES_RISC1 DES_UNROLL
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1334,7 +1334,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1685,7 +1685,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1739,7 +1739,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1766,7 +1766,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1847,7 +1847,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1874,7 +1874,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1901,7 +1901,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1928,7 +1928,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -1955,7 +1955,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -2090,7 +2090,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -2279,7 +2279,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -2738,7 +2738,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -3035,7 +3035,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-out.o
$bn_obj       = bn86-out.o co86-out.o
$des_obj      = dx86-out.o yx86-out.o
$aes_obj      = ase_cbc.o ax86-out.o
$aes_obj      = ax86-out.o
$bf_obj       = bx86-out.o
$md5_obj      = mx86-out.o
$sha1_obj     = sx86-out.o s512sse2-out.o
@@ -3062,7 +3062,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -3116,7 +3116,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -3197,7 +3197,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -3332,7 +3332,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -3413,7 +3413,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -3629,7 +3629,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-cof.o
$bn_obj       = asm/bn86-cof.o asm/co86-cof.o
$des_obj      = asm/dx86-cof.o asm/yx86-cof.o
$aes_obj      = aes_cbc.o asm/ax86-cof.o
$aes_obj      = asm/ax86-cof.o
$bf_obj       = asm/bx86-cof.o
$md5_obj      = asm/mx86-cof.o
$sha1_obj     = asm/sx86-cof.o asm/s512sse2-cof.o
@@ -4034,7 +4034,7 @@ $bn_ops = DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -4061,7 +4061,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -4277,7 +4277,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -4547,7 +4547,7 @@ $bn_ops = BN_LLONG MD2_CHAR RC4_INDEX DES_PTR DES_RISC1 DES_UNROLL
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
@@ -4574,7 +4574,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj    = x86cpuid-elf.o
$bn_obj       = asm/bn86-elf.o asm/co86-elf.o
$des_obj      = asm/dx86-elf.o asm/yx86-elf.o
$aes_obj      = aes_cbc.o asm/ax86-elf.o
$aes_obj      = asm/ax86-elf.o
$bf_obj       = asm/bx86-elf.o
$md5_obj      = asm/mx86-elf.o
$sha1_obj     = asm/sx86-elf.o asm/s512sse2-elf.o
+334 −90

File changed.

Preview size limit exceeded, changes collapsed.

+1 −0
Original line number Diff line number Diff line
@@ -175,6 +175,7 @@ sub main'cpuid { &out0("DW\t0A20Fh"); }
sub main'rdtsc  { &out0("DW\t0310Fh"); }
sub main'halt	{ &out0("hlt"); }
sub main'movz	{ &out2("movzx",@_); }
sub main'neg	{ &out1("neg",@_); }

# SSE2
sub main'emms	{ &out0("emms"); }
+1 −0
Original line number Diff line number Diff line
@@ -193,6 +193,7 @@ sub main'cpuid { &out0("cpuid"); }
sub main'rdtsc	{ &out0("rdtsc"); }
sub main'halt	{ &out0("hlt"); }
sub main'movz	{ &out2("movzx",@_); }
sub main'neg	{ &out1("neg",@_); }

# SSE2
sub main'emms	{ &out0("emms"); }
Loading