Loading crypto/aes/asm/aes-x86_64.pl +8 −15 Original line number Diff line number Diff line Loading @@ -617,8 +617,7 @@ AES_encrypt: push $key # pick Te4 copy which can't "overlap" with stack frame or key schedule .picmeup $sbox lea AES_Te+2048-.($sbox),$sbox lea .LAES_Te+2048(%rip),$sbox lea 768(%rsp),%rbp sub $sbox,%rbp and \$0x300,%rbp Loading Loading @@ -1210,8 +1209,7 @@ AES_decrypt: push $key # pick Td4 copy which can't "overlap" with stack frame or key schedule .picmeup $sbox lea AES_Td+2048-.($sbox),$sbox lea .LAES_Td+2048(%rip),$sbox lea 768(%rsp),%rbp sub $sbox,%rbp and \$0x300,%rbp Loading Loading @@ -1292,8 +1290,7 @@ _x86_64_AES_set_encrypt_key: test \$-1,%rdi jz .Lbadpointer .picmeup %rbp lea AES_Te-.(%rbp),%rbp lea .LAES_Te(%rip),%rbp lea 2048+128(%rbp),%rbp # prefetch Te4 Loading Loading @@ -1564,8 +1561,7 @@ AES_set_decrypt_key: cmp %rsi,%rdi jne .Linvert .picmeup %rax lea AES_Te+2048+1024-.(%rax),%rax # rcon lea .LAES_Te+2048+1024(%rip),%rax # rcon mov 40(%rax),$mask80 mov 48(%rax),$maskfe Loading Loading @@ -1636,11 +1632,10 @@ AES_cbc_encrypt: cld mov %r9d,%r9d # clear upper half of enc .picmeup $sbox lea AES_Te-.($sbox),$sbox lea .LAES_Te(%rip),$sbox cmp \$0,%r9 jne .Lcbc_picked_te lea AES_Td-AES_Te($sbox),$sbox lea .LAES_Td(%rip),$sbox .Lcbc_picked_te: mov OPENSSL_ia32cap_P(%rip),%eax Loading Loading @@ -2066,9 +2061,8 @@ ___ } $code.=<<___; .globl AES_Te .align 64 AES_Te: .LAES_Te: ___ &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); Loading Loading @@ -2275,9 +2269,8 @@ $code.=<<___; .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b ___ $code.=<<___; .globl AES_Td .align 64 AES_Td: .LAES_Td: ___ &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); Loading crypto/bn/asm/x86_64-gcc.c +2 −2 Original line number Diff line number Diff line Loading @@ -182,7 +182,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) asm ( " subq %2,%2 \n" ".align 16 \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " adcq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" Loading @@ -205,7 +205,7 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) asm ( " subq %2,%2 \n" ".align 16 \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " sbbq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" Loading crypto/rc4/asm/rc4-x86_64.pl +1 −2 Original line number Diff line number Diff line Loading @@ -336,8 +336,7 @@ RC4_set_key: .type RC4_options,\@function,0 .align 16 RC4_options: .picmeup %rax lea .Lopts-.(%rax),%rax lea .Lopts(%rip),%rax mov OPENSSL_ia32cap_P(%rip),%edx bt \$20,%edx jnc .Ldone Loading crypto/sha/asm/sha512-x86_64.pl +5 −4 Original line number Diff line number Diff line Loading @@ -40,14 +40,16 @@ # sha256_block:-( This is presumably because 64-bit shifts/rotates # apparently are not atomic instructions, but implemented in microcode. $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; open STDOUT,"| $^X $xlate $output"; open STDOUT,"| $^X $xlate $flavour $output"; if ($output =~ /512/) { $func="sha512_block_data_order"; Loading Loading @@ -196,8 +198,7 @@ $func: mov %rdx,$_end # save end pointer, "3rd" arg mov %rbp,$_rsp # save copy of %rsp .picmeup $Tbl lea $TABLE-.($Tbl),$Tbl lea $TABLE(%rip),$Tbl mov $SZ*0($ctx),$A mov $SZ*1($ctx),$B Loading crypto/whrlpool/asm/wp-x86_64.pl +1 −2 Original line number Diff line number Diff line Loading @@ -71,8 +71,7 @@ $func: mov %rdx,16(%rbx) mov %rax,32(%rbx) # saved stack pointer .picmeup %rbp lea $table-.(%rbp),%rbp lea $table(%rip),%rbp xor %rcx,%rcx xor %rdx,%rdx Loading Loading
crypto/aes/asm/aes-x86_64.pl +8 −15 Original line number Diff line number Diff line Loading @@ -617,8 +617,7 @@ AES_encrypt: push $key # pick Te4 copy which can't "overlap" with stack frame or key schedule .picmeup $sbox lea AES_Te+2048-.($sbox),$sbox lea .LAES_Te+2048(%rip),$sbox lea 768(%rsp),%rbp sub $sbox,%rbp and \$0x300,%rbp Loading Loading @@ -1210,8 +1209,7 @@ AES_decrypt: push $key # pick Td4 copy which can't "overlap" with stack frame or key schedule .picmeup $sbox lea AES_Td+2048-.($sbox),$sbox lea .LAES_Td+2048(%rip),$sbox lea 768(%rsp),%rbp sub $sbox,%rbp and \$0x300,%rbp Loading Loading @@ -1292,8 +1290,7 @@ _x86_64_AES_set_encrypt_key: test \$-1,%rdi jz .Lbadpointer .picmeup %rbp lea AES_Te-.(%rbp),%rbp lea .LAES_Te(%rip),%rbp lea 2048+128(%rbp),%rbp # prefetch Te4 Loading Loading @@ -1564,8 +1561,7 @@ AES_set_decrypt_key: cmp %rsi,%rdi jne .Linvert .picmeup %rax lea AES_Te+2048+1024-.(%rax),%rax # rcon lea .LAES_Te+2048+1024(%rip),%rax # rcon mov 40(%rax),$mask80 mov 48(%rax),$maskfe Loading Loading @@ -1636,11 +1632,10 @@ AES_cbc_encrypt: cld mov %r9d,%r9d # clear upper half of enc .picmeup $sbox lea AES_Te-.($sbox),$sbox lea .LAES_Te(%rip),$sbox cmp \$0,%r9 jne .Lcbc_picked_te lea AES_Td-AES_Te($sbox),$sbox lea .LAES_Td(%rip),$sbox .Lcbc_picked_te: mov OPENSSL_ia32cap_P(%rip),%eax Loading Loading @@ -2066,9 +2061,8 @@ ___ } $code.=<<___; .globl AES_Te .align 64 AES_Te: .LAES_Te: ___ &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); Loading Loading @@ -2275,9 +2269,8 @@ $code.=<<___; .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b ___ $code.=<<___; .globl AES_Td .align 64 AES_Td: .LAES_Td: ___ &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); Loading
crypto/bn/asm/x86_64-gcc.c +2 −2 Original line number Diff line number Diff line Loading @@ -182,7 +182,7 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) asm ( " subq %2,%2 \n" ".align 16 \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " adcq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" Loading @@ -205,7 +205,7 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) asm ( " subq %2,%2 \n" ".align 16 \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " sbbq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" Loading
crypto/rc4/asm/rc4-x86_64.pl +1 −2 Original line number Diff line number Diff line Loading @@ -336,8 +336,7 @@ RC4_set_key: .type RC4_options,\@function,0 .align 16 RC4_options: .picmeup %rax lea .Lopts-.(%rax),%rax lea .Lopts(%rip),%rax mov OPENSSL_ia32cap_P(%rip),%edx bt \$20,%edx jnc .Ldone Loading
crypto/sha/asm/sha512-x86_64.pl +5 −4 Original line number Diff line number Diff line Loading @@ -40,14 +40,16 @@ # sha256_block:-( This is presumably because 64-bit shifts/rotates # apparently are not atomic instructions, but implemented in microcode. $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; open STDOUT,"| $^X $xlate $output"; open STDOUT,"| $^X $xlate $flavour $output"; if ($output =~ /512/) { $func="sha512_block_data_order"; Loading Loading @@ -196,8 +198,7 @@ $func: mov %rdx,$_end # save end pointer, "3rd" arg mov %rbp,$_rsp # save copy of %rsp .picmeup $Tbl lea $TABLE-.($Tbl),$Tbl lea $TABLE(%rip),$Tbl mov $SZ*0($ctx),$A mov $SZ*1($ctx),$B Loading
crypto/whrlpool/asm/wp-x86_64.pl +1 −2 Original line number Diff line number Diff line Loading @@ -71,8 +71,7 @@ $func: mov %rdx,16(%rbx) mov %rax,32(%rbx) # saved stack pointer .picmeup %rbp lea $table-.(%rbp),%rbp lea $table(%rip),%rbp xor %rcx,%rcx xor %rdx,%rdx Loading