Loading crypto/aes/asm/aes-x86_64.pl +84 −0 Original line number Diff line number Diff line Loading @@ -599,13 +599,21 @@ $code.=<<___; .hidden asm_AES_encrypt asm_AES_encrypt: AES_encrypt: .cfi_startproc mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 # allocate frame "above" key schedule lea -63(%rdx),%rcx # %rdx is key argument Loading @@ -618,6 +626,7 @@ AES_encrypt: mov %rsi,16(%rsp) # save out mov %rax,24(%rsp) # save original stack pointer .cfi_cfa_expression %rsp+24,deref,+8 .Lenc_prologue: mov %rdx,$key Loading @@ -644,20 +653,29 @@ AES_encrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer .cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lenc_epilogue: ret .cfi_endproc .size AES_encrypt,.-AES_encrypt ___ Loading Loading @@ -1197,13 +1215,21 @@ $code.=<<___; .hidden asm_AES_decrypt asm_AES_decrypt: AES_decrypt: .cfi_startproc mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 # allocate frame "above" key schedule lea -63(%rdx),%rcx # %rdx is key argument Loading @@ -1216,6 +1242,7 @@ AES_decrypt: mov %rsi,16(%rsp) # save out mov %rax,24(%rsp) # save original stack pointer .cfi_cfa_expression %rsp+24,deref,+8 .Ldec_prologue: mov %rdx,$key Loading Loading @@ -1244,20 +1271,29 @@ AES_decrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer .cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Ldec_epilogue: ret .cfi_endproc .size AES_decrypt,.-AES_decrypt ___ #------------------------------------------------------------------# Loading Loading @@ -1296,22 +1332,34 @@ $code.=<<___; .type AES_set_encrypt_key,\@function,3 .align 16 AES_set_encrypt_key: .cfi_startproc push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 # redundant, but allows to share .cfi_push %r12 push %r13 # exception handler... .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$8,%rsp .cfi_adjust_cfa_offset 8 .Lenc_key_prologue: call _x86_64_AES_set_encrypt_key mov 40(%rsp),%rbp .cfi_restore %rbp mov 48(%rsp),%rbx .cfi_restore %rbx add \$56,%rsp .cfi_adjust_cfa_offset -56 .Lenc_key_epilogue: ret .cfi_endproc .size AES_set_encrypt_key,.-AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent Loading Loading @@ -1562,13 +1610,21 @@ $code.=<<___; .type AES_set_decrypt_key,\@function,3 .align 16 AES_set_decrypt_key: .cfi_startproc push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 push %rdx # save key schedule .cfi_adjust_cfa_offset 8 .Ldec_key_prologue: call _x86_64_AES_set_encrypt_key Loading Loading @@ -1622,14 +1678,22 @@ $code.=<<___; xor %rax,%rax .Labort: mov 8(%rsp),%r15 .cfi_restore %r15 mov 16(%rsp),%r14 .cfi_restore %r14 mov 24(%rsp),%r13 .cfi_restore %r13 mov 32(%rsp),%r12 .cfi_restore %r12 mov 40(%rsp),%rbp .cfi_restore %rbp mov 48(%rsp),%rbx .cfi_restore %rbx add \$56,%rsp .cfi_adjust_cfa_offset -56 .Ldec_key_epilogue: ret .cfi_endproc .size AES_set_decrypt_key,.-AES_set_decrypt_key ___ Loading Loading @@ -1660,15 +1724,23 @@ $code.=<<___; .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: AES_cbc_encrypt: .cfi_startproc cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq .cfi_push 49 # %rflags push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 .Lcbc_prologue: cld Loading Loading @@ -1713,8 +1785,10 @@ AES_cbc_encrypt: .Lcbc_te_ok: xchg %rsp,$key .cfi_def_cfa_register $key #add \$8,%rsp # reserve for return address! mov $key,$_rsp # save %rsp .cfi_cfa_expression $_rsp,deref,+64 .Lcbc_fast_body: mov %rdi,$_inp # save copy of inp mov %rsi,$_out # save copy of out Loading Loading @@ -2096,17 +2170,27 @@ AES_cbc_encrypt: .align 16 .Lcbc_exit: mov $_rsp,%rsi .cfi_def_cfa %rsi,64 mov (%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq .cfi_pop 49 # %rflags .Lcbc_epilogue: ret .cfi_endproc .size AES_cbc_encrypt,.-AES_cbc_encrypt ___ } Loading crypto/aes/asm/aesni-mb-x86_64.pl +72 −0 Original line number Diff line number Diff line Loading @@ -105,6 +105,7 @@ $code.=<<___; .type aesni_multi_cbc_encrypt,\@function,3 .align 32 aesni_multi_cbc_encrypt: .cfi_startproc ___ $code.=<<___ if ($avx); cmp \$2,$num Loading @@ -118,12 +119,19 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading @@ -148,6 +156,7 @@ $code.=<<___; sub \$48,%rsp and \$-64,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Lenc4x_body: movdqu ($key),$zero # 0-round key Loading Loading @@ -319,6 +328,7 @@ $code.=<<___; jnz .Loop_enc4x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 mov 24(%rsp),$num #pxor @inp[0],@out[0] Loading Loading @@ -350,20 +360,29 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Lenc4x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt .globl aesni_multi_cbc_decrypt .type aesni_multi_cbc_decrypt,\@function,3 .align 32 aesni_multi_cbc_decrypt: .cfi_startproc ___ $code.=<<___ if ($avx); cmp \$2,$num Loading @@ -377,12 +396,19 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading @@ -407,6 +433,7 @@ $code.=<<___; sub \$48,%rsp and \$-64,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Ldec4x_body: movdqu ($key),$zero # 0-round key Loading Loading @@ -578,6 +605,7 @@ $code.=<<___; jnz .Loop_dec4x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 mov 24(%rsp),$num lea `40*4`($inp),$inp Loading @@ -600,14 +628,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Ldec4x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt ___ Loading @@ -623,14 +659,22 @@ $code.=<<___; .type aesni_multi_cbc_encrypt_avx,\@function,3 .align 32 aesni_multi_cbc_encrypt_avx: .cfi_startproc _avx_cbc_enc_shortcut: mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading @@ -657,6 +701,7 @@ $code.=<<___; sub \$192,%rsp and \$-128,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Lenc8x_body: vzeroupper Loading Loading @@ -861,6 +906,7 @@ $code.=<<___; jnz .Loop_enc8x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 #mov 24(%rsp),$num #lea `40*8`($inp),$inp #dec $num Loading @@ -883,27 +929,43 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Lenc8x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx .type aesni_multi_cbc_decrypt_avx,\@function,3 .align 32 aesni_multi_cbc_decrypt_avx: .cfi_startproc _avx_cbc_dec_shortcut: mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading Loading @@ -932,6 +994,7 @@ $code.=<<___; and \$-256,%rsp sub \$192,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Ldec8x_body: vzeroupper Loading Loading @@ -1167,6 +1230,7 @@ $code.=<<___; jnz .Loop_dec8x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 #mov 24(%rsp),$num #lea `40*8`($inp),$inp #dec $num Loading @@ -1189,14 +1253,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Ldec8x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx ___ }}} Loading crypto/aes/asm/aesni-sha1-x86_64.pl +68 −0 Original line number Diff line number Diff line Loading @@ -186,16 +186,24 @@ $code.=<<___; .type aesni_cbc_sha1_enc_ssse3,\@function,6 .align 32 aesni_cbc_sha1_enc_ssse3: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact #jz .Lepilogue_ssse3 # debugging artefact push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` #mov $in0,$inp # debugging artefact #lea 64(%rsp),$ctx # debugging artefact ___ Loading Loading @@ -721,15 +729,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,8 .Lepilogue_ssse3: ret .cfi_endproc .size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 ___ Loading Loading @@ -837,14 +854,22 @@ $code.=<<___; .type aesni256_cbc_sha1_dec_ssse3,\@function,6 .align 32 aesni256_cbc_sha1_dec_ssse3: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,96+0(%rsp) Loading Loading @@ -992,15 +1017,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_cfa_def %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_cfa_def %rsp,8 .Lepilogue_dec_ssse3: ret .cfi_endproc .size aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3 ___ }}} Loading @@ -1026,16 +1060,24 @@ $code.=<<___; .type aesni_cbc_sha1_enc_avx,\@function,6 .align 32 aesni_cbc_sha1_enc_avx: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact #jz .Lepilogue_avx # debugging artefact push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` #mov $in0,$inp # debugging artefact #lea 64(%rsp),$ctx # debugging artefact ___ Loading Loading @@ -1434,15 +1476,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,8 .Lepilogue_avx: ret .cfi_endproc .size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx ___ Loading Loading @@ -1491,14 +1542,22 @@ $code.=<<___; .type aesni256_cbc_sha1_dec_avx,\@function,6 .align 32 aesni256_cbc_sha1_dec_avx: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,96+0(%rsp) Loading Loading @@ -1645,15 +1704,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,8 .Lepilogue_dec_avx: ret .cfi_endproc .size aesni256_cbc_sha1_dec_avx,.-aesni256_cbc_sha1_dec_avx ___ }}} Loading crypto/aes/asm/aesni-sha256-x86_64.pl +55 −1 Original line number Diff line number Diff line Loading @@ -109,7 +109,7 @@ $_key="16*$SZ+3*8(%rsp)"; $_ivp="16*$SZ+4*8(%rsp)"; $_ctx="16*$SZ+5*8(%rsp)"; $_in0="16*$SZ+6*8(%rsp)"; $_rsp="16*$SZ+7*8(%rsp)"; $_rsp="`16*$SZ+7*8`(%rsp)"; $framesz=16*$SZ+8*8; $code=<<___; Loading Loading @@ -339,15 +339,23 @@ $code.=<<___; .type ${func}_xop,\@function,6 .align 64 ${func}_xop: .cfi_startproc .Lxop_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter mov %rsp,%rax # copy %rsp .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$`$framesz+$win64*16*10`,%rsp and \$-64,%rsp # align stack frame Loading @@ -364,6 +372,7 @@ ${func}_xop: mov $ctx,$_ctx mov $in0,$_in0 mov %rax,$_rsp .cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) Loading Loading @@ -601,6 +610,7 @@ $code.=<<___; mov $_ivp,$ivp mov $_rsp,%rsi .cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ Loading @@ -618,14 +628,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lepilogue_xop: ret .cfi_endproc .size ${func}_xop,.-${func}_xop ___ ###################################################################### Loading @@ -637,15 +655,23 @@ $code.=<<___; .type ${func}_avx,\@function,6 .align 64 ${func}_avx: .cfi_startproc .Lavx_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter mov %rsp,%rax # copy %rsp .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$`$framesz+$win64*16*10`,%rsp and \$-64,%rsp # align stack frame Loading @@ -662,6 +688,7 @@ ${func}_avx: mov $ctx,$_ctx mov $in0,$_in0 mov %rax,$_rsp .cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) Loading Loading @@ -852,6 +879,7 @@ $code.=<<___; mov $_ivp,$ivp mov $_rsp,%rsi .cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ Loading @@ -869,14 +897,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lepilogue_avx: ret .cfi_endproc .size ${func}_avx,.-${func}_avx ___ Loading Loading @@ -933,15 +969,23 @@ $code.=<<___; .type ${func}_avx2,\@function,6 .align 64 ${func}_avx2: .cfi_startproc .Lavx2_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter mov %rsp,%rax # copy %rsp .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$`2*$SZ*$rounds+8*8+$win64*16*10`,%rsp and \$-256*$SZ,%rsp # align stack frame add \$`2*$SZ*($rounds-8)`,%rsp Loading @@ -959,6 +1003,7 @@ ${func}_avx2: mov $ctx,$_ctx mov $in0,$_in0 mov %rax,$_rsp .cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) Loading Loading @@ -1189,6 +1234,7 @@ $code.=<<___; lea ($Tbl),%rsp mov $_ivp,$ivp mov $_rsp,%rsi .cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ Loading @@ -1206,14 +1252,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lepilogue_avx2: ret .cfi_endproc .size ${func}_avx2,.-${func}_avx2 ___ }} Loading crypto/aes/asm/aesni-x86_64.pl +60 −0 Original line number Diff line number Diff line Loading @@ -1180,6 +1180,7 @@ $code.=<<___; .type aesni_ctr32_encrypt_blocks,\@function,5 .align 16 aesni_ctr32_encrypt_blocks: .cfi_startproc cmp \$1,$len jne .Lctr32_bulk Loading @@ -1202,7 +1203,9 @@ $code.=<<___; .align 16 .Lctr32_bulk: lea (%rsp),$key_ # use $key_ as frame pointer .cfi_def_cfa_register $key_ push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -1722,9 +1725,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8($key_),%rbp .cfi_restore %rbp lea ($key_),%rsp .cfi_def_cfa_register %rsp .Lctr32_epilogue: ret .cfi_endproc .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks ___ } Loading @@ -1746,8 +1752,11 @@ $code.=<<___; .type aesni_xts_encrypt,\@function,6 .align 16 aesni_xts_encrypt: .cfi_startproc lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -2212,9 +2221,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp .cfi_restore %rbp lea (%r11),%rsp .cfi_def_cfa_register %rsp .Lxts_enc_epilogue: ret .cfi_endproc .size aesni_xts_encrypt,.-aesni_xts_encrypt ___ Loading @@ -2223,8 +2235,11 @@ $code.=<<___; .type aesni_xts_decrypt,\@function,6 .align 16 aesni_xts_decrypt: .cfi_startproc lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -2715,9 +2730,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp .cfi_restore %rbp lea (%r11),%rsp .cfi_def_cfa_register %rsp .Lxts_dec_epilogue: ret .cfi_endproc .size aesni_xts_decrypt,.-aesni_xts_decrypt ___ } Loading @@ -2742,12 +2760,18 @@ $code.=<<___; .type aesni_ocb_encrypt,\@function,6 .align 32 aesni_ocb_encrypt: .cfi_startproc lea (%rsp),%rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp),%rsp Loading Loading @@ -2942,6 +2966,7 @@ $code.=<<___ if (!$win64); pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 lea 0x28(%rsp),%rax .cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x00(%rsp),%xmm6 Loading Loading @@ -2969,13 +2994,20 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Locb_enc_epilogue: ret .cfi_endproc .size aesni_ocb_encrypt,.-aesni_ocb_encrypt .type __ocb_encrypt6,\@abi-omnipotent Loading Loading @@ -3188,12 +3220,18 @@ __ocb_encrypt1: .type aesni_ocb_decrypt,\@function,6 .align 32 aesni_ocb_decrypt: .cfi_startproc lea (%rsp),%rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp),%rsp Loading Loading @@ -3410,6 +3448,7 @@ $code.=<<___ if (!$win64); pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 lea 0x28(%rsp),%rax .cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x00(%rsp),%xmm6 Loading Loading @@ -3437,13 +3476,20 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Locb_dec_epilogue: ret .cfi_endproc .size aesni_ocb_decrypt,.-aesni_ocb_decrypt .type __ocb_decrypt6,\@abi-omnipotent Loading Loading @@ -3656,6 +3702,7 @@ $code.=<<___; .type ${PREFIX}_cbc_encrypt,\@function,6 .align 16 ${PREFIX}_cbc_encrypt: .cfi_startproc test $len,$len # check length jz .Lcbc_ret Loading Loading @@ -3732,7 +3779,9 @@ $code.=<<___; .align 16 .Lcbc_decrypt_bulk: lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -4175,9 +4224,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp .cfi_restore %rbp lea (%r11),%rsp .cfi_def_cfa_register %rsp .Lcbc_ret: ret .cfi_endproc .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ } Loading @@ -4198,7 +4250,9 @@ $code.=<<___; .type ${PREFIX}_set_decrypt_key,\@abi-omnipotent .align 16 ${PREFIX}_set_decrypt_key: .cfi_startproc .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 .cfi_adjust_cfa_offset 8 call __aesni_set_encrypt_key shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key test %eax,%eax Loading Loading @@ -4231,7 +4285,9 @@ ${PREFIX}_set_decrypt_key: pxor %xmm0,%xmm0 .Ldec_key_ret: add \$8,%rsp .cfi_adjust_cfa_offset -8 ret .cfi_endproc .LSEH_end_set_decrypt_key: .size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key ___ Loading Loading @@ -4267,7 +4323,9 @@ $code.=<<___; .align 16 ${PREFIX}_set_encrypt_key: __aesni_set_encrypt_key: .cfi_startproc .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 .cfi_adjust_cfa_offset 8 mov \$-1,%rax test $inp,$inp jz .Lenc_key_ret Loading Loading @@ -4560,7 +4618,9 @@ __aesni_set_encrypt_key: pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 add \$8,%rsp .cfi_adjust_cfa_offset -8 ret .cfi_endproc .LSEH_end_set_encrypt_key: .align 16 Loading Loading
crypto/aes/asm/aes-x86_64.pl +84 −0 Original line number Diff line number Diff line Loading @@ -599,13 +599,21 @@ $code.=<<___; .hidden asm_AES_encrypt asm_AES_encrypt: AES_encrypt: .cfi_startproc mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 # allocate frame "above" key schedule lea -63(%rdx),%rcx # %rdx is key argument Loading @@ -618,6 +626,7 @@ AES_encrypt: mov %rsi,16(%rsp) # save out mov %rax,24(%rsp) # save original stack pointer .cfi_cfa_expression %rsp+24,deref,+8 .Lenc_prologue: mov %rdx,$key Loading @@ -644,20 +653,29 @@ AES_encrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer .cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lenc_epilogue: ret .cfi_endproc .size AES_encrypt,.-AES_encrypt ___ Loading Loading @@ -1197,13 +1215,21 @@ $code.=<<___; .hidden asm_AES_decrypt asm_AES_decrypt: AES_decrypt: .cfi_startproc mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 # allocate frame "above" key schedule lea -63(%rdx),%rcx # %rdx is key argument Loading @@ -1216,6 +1242,7 @@ AES_decrypt: mov %rsi,16(%rsp) # save out mov %rax,24(%rsp) # save original stack pointer .cfi_cfa_expression %rsp+24,deref,+8 .Ldec_prologue: mov %rdx,$key Loading Loading @@ -1244,20 +1271,29 @@ AES_decrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer .cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Ldec_epilogue: ret .cfi_endproc .size AES_decrypt,.-AES_decrypt ___ #------------------------------------------------------------------# Loading Loading @@ -1296,22 +1332,34 @@ $code.=<<___; .type AES_set_encrypt_key,\@function,3 .align 16 AES_set_encrypt_key: .cfi_startproc push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 # redundant, but allows to share .cfi_push %r12 push %r13 # exception handler... .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$8,%rsp .cfi_adjust_cfa_offset 8 .Lenc_key_prologue: call _x86_64_AES_set_encrypt_key mov 40(%rsp),%rbp .cfi_restore %rbp mov 48(%rsp),%rbx .cfi_restore %rbx add \$56,%rsp .cfi_adjust_cfa_offset -56 .Lenc_key_epilogue: ret .cfi_endproc .size AES_set_encrypt_key,.-AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent Loading Loading @@ -1562,13 +1610,21 @@ $code.=<<___; .type AES_set_decrypt_key,\@function,3 .align 16 AES_set_decrypt_key: .cfi_startproc push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 push %rdx # save key schedule .cfi_adjust_cfa_offset 8 .Ldec_key_prologue: call _x86_64_AES_set_encrypt_key Loading Loading @@ -1622,14 +1678,22 @@ $code.=<<___; xor %rax,%rax .Labort: mov 8(%rsp),%r15 .cfi_restore %r15 mov 16(%rsp),%r14 .cfi_restore %r14 mov 24(%rsp),%r13 .cfi_restore %r13 mov 32(%rsp),%r12 .cfi_restore %r12 mov 40(%rsp),%rbp .cfi_restore %rbp mov 48(%rsp),%rbx .cfi_restore %rbx add \$56,%rsp .cfi_adjust_cfa_offset -56 .Ldec_key_epilogue: ret .cfi_endproc .size AES_set_decrypt_key,.-AES_set_decrypt_key ___ Loading Loading @@ -1660,15 +1724,23 @@ $code.=<<___; .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: AES_cbc_encrypt: .cfi_startproc cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq .cfi_push 49 # %rflags push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 .Lcbc_prologue: cld Loading Loading @@ -1713,8 +1785,10 @@ AES_cbc_encrypt: .Lcbc_te_ok: xchg %rsp,$key .cfi_def_cfa_register $key #add \$8,%rsp # reserve for return address! mov $key,$_rsp # save %rsp .cfi_cfa_expression $_rsp,deref,+64 .Lcbc_fast_body: mov %rdi,$_inp # save copy of inp mov %rsi,$_out # save copy of out Loading Loading @@ -2096,17 +2170,27 @@ AES_cbc_encrypt: .align 16 .Lcbc_exit: mov $_rsp,%rsi .cfi_def_cfa %rsi,64 mov (%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq .cfi_pop 49 # %rflags .Lcbc_epilogue: ret .cfi_endproc .size AES_cbc_encrypt,.-AES_cbc_encrypt ___ } Loading
crypto/aes/asm/aesni-mb-x86_64.pl +72 −0 Original line number Diff line number Diff line Loading @@ -105,6 +105,7 @@ $code.=<<___; .type aesni_multi_cbc_encrypt,\@function,3 .align 32 aesni_multi_cbc_encrypt: .cfi_startproc ___ $code.=<<___ if ($avx); cmp \$2,$num Loading @@ -118,12 +119,19 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading @@ -148,6 +156,7 @@ $code.=<<___; sub \$48,%rsp and \$-64,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Lenc4x_body: movdqu ($key),$zero # 0-round key Loading Loading @@ -319,6 +328,7 @@ $code.=<<___; jnz .Loop_enc4x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 mov 24(%rsp),$num #pxor @inp[0],@out[0] Loading Loading @@ -350,20 +360,29 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Lenc4x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt .globl aesni_multi_cbc_decrypt .type aesni_multi_cbc_decrypt,\@function,3 .align 32 aesni_multi_cbc_decrypt: .cfi_startproc ___ $code.=<<___ if ($avx); cmp \$2,$num Loading @@ -377,12 +396,19 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading @@ -407,6 +433,7 @@ $code.=<<___; sub \$48,%rsp and \$-64,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Ldec4x_body: movdqu ($key),$zero # 0-round key Loading Loading @@ -578,6 +605,7 @@ $code.=<<___; jnz .Loop_dec4x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 mov 24(%rsp),$num lea `40*4`($inp),$inp Loading @@ -600,14 +628,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Ldec4x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt ___ Loading @@ -623,14 +659,22 @@ $code.=<<___; .type aesni_multi_cbc_encrypt_avx,\@function,3 .align 32 aesni_multi_cbc_encrypt_avx: .cfi_startproc _avx_cbc_enc_shortcut: mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading @@ -657,6 +701,7 @@ $code.=<<___; sub \$192,%rsp and \$-128,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Lenc8x_body: vzeroupper Loading Loading @@ -861,6 +906,7 @@ $code.=<<___; jnz .Loop_enc8x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 #mov 24(%rsp),$num #lea `40*8`($inp),$inp #dec $num Loading @@ -883,27 +929,43 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Lenc8x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx .type aesni_multi_cbc_decrypt_avx,\@function,3 .align 32 aesni_multi_cbc_decrypt_avx: .cfi_startproc _avx_cbc_dec_shortcut: mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp Loading Loading @@ -932,6 +994,7 @@ $code.=<<___; and \$-256,%rsp sub \$192,%rsp mov %rax,16(%rsp) # original %rsp .cfi_cfa_expression %rsp+16,deref,+8 .Ldec8x_body: vzeroupper Loading Loading @@ -1167,6 +1230,7 @@ $code.=<<___; jnz .Loop_dec8x mov 16(%rsp),%rax # original %rsp .cfi_def_cfa %rax,8 #mov 24(%rsp),$num #lea `40*8`($inp),$inp #dec $num Loading @@ -1189,14 +1253,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 .cfi_restore %r15 mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Ldec8x_epilogue: ret .cfi_endproc .size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx ___ }}} Loading
crypto/aes/asm/aesni-sha1-x86_64.pl +68 −0 Original line number Diff line number Diff line Loading @@ -186,16 +186,24 @@ $code.=<<___; .type aesni_cbc_sha1_enc_ssse3,\@function,6 .align 32 aesni_cbc_sha1_enc_ssse3: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact #jz .Lepilogue_ssse3 # debugging artefact push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` #mov $in0,$inp # debugging artefact #lea 64(%rsp),$ctx # debugging artefact ___ Loading Loading @@ -721,15 +729,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,8 .Lepilogue_ssse3: ret .cfi_endproc .size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 ___ Loading Loading @@ -837,14 +854,22 @@ $code.=<<___; .type aesni256_cbc_sha1_dec_ssse3,\@function,6 .align 32 aesni256_cbc_sha1_dec_ssse3: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,96+0(%rsp) Loading Loading @@ -992,15 +1017,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_cfa_def %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_cfa_def %rsp,8 .Lepilogue_dec_ssse3: ret .cfi_endproc .size aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3 ___ }}} Loading @@ -1026,16 +1060,24 @@ $code.=<<___; .type aesni_cbc_sha1_enc_avx,\@function,6 .align 32 aesni_cbc_sha1_enc_avx: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact #jz .Lepilogue_avx # debugging artefact push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` #mov $in0,$inp # debugging artefact #lea 64(%rsp),$ctx # debugging artefact ___ Loading Loading @@ -1434,15 +1476,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,8 .Lepilogue_avx: ret .cfi_endproc .size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx ___ Loading Loading @@ -1491,14 +1542,22 @@ $code.=<<___; .type aesni256_cbc_sha1_dec_avx,\@function,6 .align 32 aesni256_cbc_sha1_dec_avx: .cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp .cfi_adjust_cfa_offset `104+($win64?10*16:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,96+0(%rsp) Loading Loading @@ -1645,15 +1704,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi .cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 .cfi_restore %r15 mov 8(%rsi),%r14 .cfi_restore %r14 mov 16(%rsi),%r13 .cfi_restore %r13 mov 24(%rsi),%r12 .cfi_restore %r12 mov 32(%rsi),%rbp .cfi_restore %rbp mov 40(%rsi),%rbx .cfi_restore %rbx lea 48(%rsi),%rsp .cfi_def_cfa %rsp,8 .Lepilogue_dec_avx: ret .cfi_endproc .size aesni256_cbc_sha1_dec_avx,.-aesni256_cbc_sha1_dec_avx ___ }}} Loading
crypto/aes/asm/aesni-sha256-x86_64.pl +55 −1 Original line number Diff line number Diff line Loading @@ -109,7 +109,7 @@ $_key="16*$SZ+3*8(%rsp)"; $_ivp="16*$SZ+4*8(%rsp)"; $_ctx="16*$SZ+5*8(%rsp)"; $_in0="16*$SZ+6*8(%rsp)"; $_rsp="16*$SZ+7*8(%rsp)"; $_rsp="`16*$SZ+7*8`(%rsp)"; $framesz=16*$SZ+8*8; $code=<<___; Loading Loading @@ -339,15 +339,23 @@ $code.=<<___; .type ${func}_xop,\@function,6 .align 64 ${func}_xop: .cfi_startproc .Lxop_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter mov %rsp,%rax # copy %rsp .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$`$framesz+$win64*16*10`,%rsp and \$-64,%rsp # align stack frame Loading @@ -364,6 +372,7 @@ ${func}_xop: mov $ctx,$_ctx mov $in0,$_in0 mov %rax,$_rsp .cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) Loading Loading @@ -601,6 +610,7 @@ $code.=<<___; mov $_ivp,$ivp mov $_rsp,%rsi .cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ Loading @@ -618,14 +628,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lepilogue_xop: ret .cfi_endproc .size ${func}_xop,.-${func}_xop ___ ###################################################################### Loading @@ -637,15 +655,23 @@ $code.=<<___; .type ${func}_avx,\@function,6 .align 64 ${func}_avx: .cfi_startproc .Lavx_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter mov %rsp,%rax # copy %rsp .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$`$framesz+$win64*16*10`,%rsp and \$-64,%rsp # align stack frame Loading @@ -662,6 +688,7 @@ ${func}_avx: mov $ctx,$_ctx mov $in0,$_in0 mov %rax,$_rsp .cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) Loading Loading @@ -852,6 +879,7 @@ $code.=<<___; mov $_ivp,$ivp mov $_rsp,%rsi .cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ Loading @@ -869,14 +897,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lepilogue_avx: ret .cfi_endproc .size ${func}_avx,.-${func}_avx ___ Loading Loading @@ -933,15 +969,23 @@ $code.=<<___; .type ${func}_avx2,\@function,6 .align 64 ${func}_avx2: .cfi_startproc .Lavx2_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter mov %rsp,%rax # copy %rsp .cfi_def_cfa_register %rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$`2*$SZ*$rounds+8*8+$win64*16*10`,%rsp and \$-256*$SZ,%rsp # align stack frame add \$`2*$SZ*($rounds-8)`,%rsp Loading @@ -959,6 +1003,7 @@ ${func}_avx2: mov $ctx,$_ctx mov $in0,$_in0 mov %rax,$_rsp .cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) Loading Loading @@ -1189,6 +1234,7 @@ $code.=<<___; lea ($Tbl),%rsp mov $_ivp,$ivp mov $_rsp,%rsi .cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ Loading @@ -1206,14 +1252,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lepilogue_avx2: ret .cfi_endproc .size ${func}_avx2,.-${func}_avx2 ___ }} Loading
crypto/aes/asm/aesni-x86_64.pl +60 −0 Original line number Diff line number Diff line Loading @@ -1180,6 +1180,7 @@ $code.=<<___; .type aesni_ctr32_encrypt_blocks,\@function,5 .align 16 aesni_ctr32_encrypt_blocks: .cfi_startproc cmp \$1,$len jne .Lctr32_bulk Loading @@ -1202,7 +1203,9 @@ $code.=<<___; .align 16 .Lctr32_bulk: lea (%rsp),$key_ # use $key_ as frame pointer .cfi_def_cfa_register $key_ push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -1722,9 +1725,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8($key_),%rbp .cfi_restore %rbp lea ($key_),%rsp .cfi_def_cfa_register %rsp .Lctr32_epilogue: ret .cfi_endproc .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks ___ } Loading @@ -1746,8 +1752,11 @@ $code.=<<___; .type aesni_xts_encrypt,\@function,6 .align 16 aesni_xts_encrypt: .cfi_startproc lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -2212,9 +2221,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp .cfi_restore %rbp lea (%r11),%rsp .cfi_def_cfa_register %rsp .Lxts_enc_epilogue: ret .cfi_endproc .size aesni_xts_encrypt,.-aesni_xts_encrypt ___ Loading @@ -2223,8 +2235,11 @@ $code.=<<___; .type aesni_xts_decrypt,\@function,6 .align 16 aesni_xts_decrypt: .cfi_startproc lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -2715,9 +2730,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp .cfi_restore %rbp lea (%r11),%rsp .cfi_def_cfa_register %rsp .Lxts_dec_epilogue: ret .cfi_endproc .size aesni_xts_decrypt,.-aesni_xts_decrypt ___ } Loading @@ -2742,12 +2760,18 @@ $code.=<<___; .type aesni_ocb_encrypt,\@function,6 .align 32 aesni_ocb_encrypt: .cfi_startproc lea (%rsp),%rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp),%rsp Loading Loading @@ -2942,6 +2966,7 @@ $code.=<<___ if (!$win64); pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 lea 0x28(%rsp),%rax .cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x00(%rsp),%xmm6 Loading Loading @@ -2969,13 +2994,20 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Locb_enc_epilogue: ret .cfi_endproc .size aesni_ocb_encrypt,.-aesni_ocb_encrypt .type __ocb_encrypt6,\@abi-omnipotent Loading Loading @@ -3188,12 +3220,18 @@ __ocb_encrypt1: .type aesni_ocb_decrypt,\@function,6 .align 32 aesni_ocb_decrypt: .cfi_startproc lea (%rsp),%rax push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp),%rsp Loading Loading @@ -3410,6 +3448,7 @@ $code.=<<___ if (!$win64); pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 lea 0x28(%rsp),%rax .cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x00(%rsp),%xmm6 Loading Loading @@ -3437,13 +3476,20 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -40(%rax),%r14 .cfi_restore %r14 mov -32(%rax),%r13 .cfi_restore %r13 mov -24(%rax),%r12 .cfi_restore %r12 mov -16(%rax),%rbp .cfi_restore %rbp mov -8(%rax),%rbx .cfi_restore %rbx lea (%rax),%rsp .cfi_def_cfa_register %rsp .Locb_dec_epilogue: ret .cfi_endproc .size aesni_ocb_decrypt,.-aesni_ocb_decrypt .type __ocb_decrypt6,\@abi-omnipotent Loading Loading @@ -3656,6 +3702,7 @@ $code.=<<___; .type ${PREFIX}_cbc_encrypt,\@function,6 .align 16 ${PREFIX}_cbc_encrypt: .cfi_startproc test $len,$len # check length jz .Lcbc_ret Loading Loading @@ -3732,7 +3779,9 @@ $code.=<<___; .align 16 .Lcbc_decrypt_bulk: lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp .cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ Loading Loading @@ -4175,9 +4224,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp .cfi_restore %rbp lea (%r11),%rsp .cfi_def_cfa_register %rsp .Lcbc_ret: ret .cfi_endproc .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ } Loading @@ -4198,7 +4250,9 @@ $code.=<<___; .type ${PREFIX}_set_decrypt_key,\@abi-omnipotent .align 16 ${PREFIX}_set_decrypt_key: .cfi_startproc .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 .cfi_adjust_cfa_offset 8 call __aesni_set_encrypt_key shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key test %eax,%eax Loading Loading @@ -4231,7 +4285,9 @@ ${PREFIX}_set_decrypt_key: pxor %xmm0,%xmm0 .Ldec_key_ret: add \$8,%rsp .cfi_adjust_cfa_offset -8 ret .cfi_endproc .LSEH_end_set_decrypt_key: .size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key ___ Loading Loading @@ -4267,7 +4323,9 @@ $code.=<<___; .align 16 ${PREFIX}_set_encrypt_key: __aesni_set_encrypt_key: .cfi_startproc .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 .cfi_adjust_cfa_offset 8 mov \$-1,%rax test $inp,$inp jz .Lenc_key_ret Loading Loading @@ -4560,7 +4618,9 @@ __aesni_set_encrypt_key: pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 add \$8,%rsp .cfi_adjust_cfa_offset -8 ret .cfi_endproc .LSEH_end_set_encrypt_key: .align 16 Loading