Loading crypto/chacha/asm/chacha-x86_64.pl +41 −0 Original line number Diff line number Diff line Loading @@ -242,6 +242,7 @@ $code.=<<___; .type ChaCha20_ctr32,\@function,5 .align 64 ChaCha20_ctr32: .cfi_startproc cmp \$0,$len je .Lno_data mov OPENSSL_ia32cap_P+4(%rip),%r10 Loading @@ -255,12 +256,19 @@ $code.=<<___; jnz .LChaCha20_ssse3 push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$64+24,%rsp .cfi_adjust_cfa_offset 64+24 .Lctr32_body: #movdqa .Lsigma(%rip),%xmm0 Loading Loading @@ -401,15 +409,24 @@ $code.=<<___; .Ldone: lea 64+24+48(%rsp),%rsi .cfi_def_cfa %rsi,8 mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lno_data: ret .cfi_endproc .size ChaCha20_ctr32,.-ChaCha20_ctr32 ___ Loading Loading @@ -448,8 +465,10 @@ $code.=<<___; .type ChaCha20_ssse3,\@function,5 .align 32 ChaCha20_ssse3: .cfi_startproc .LChaCha20_ssse3: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 ___ $code.=<<___ if ($avx); test \$`1<<(43-32)`,%r10d Loading Loading @@ -565,8 +584,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .Lssse3_epilogue: ret .cfi_endproc .size ChaCha20_ssse3,.-ChaCha20_ssse3 ___ } Loading Loading @@ -708,8 +729,10 @@ $code.=<<___; .type ChaCha20_4x,\@function,5 .align 32 ChaCha20_4x: .cfi_startproc .LChaCha20_4x: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 mov %r10,%r11 ___ $code.=<<___ if ($avx>1); Loading Loading @@ -1149,8 +1172,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L4x_epilogue: ret .cfi_endproc .size ChaCha20_4x,.-ChaCha20_4x ___ } Loading Loading @@ -1237,8 +1262,10 @@ $code.=<<___; .type ChaCha20_4xop,\@function,5 .align 32 ChaCha20_4xop: .cfi_startproc .LChaCha20_4xop: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 sub \$0x140+$xframe,%rsp ___ ################ stack layout Loading Loading @@ -1601,8 +1628,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L4xop_epilogue: ret .cfi_endproc .size ChaCha20_4xop,.-ChaCha20_4xop ___ } Loading Loading @@ -1735,8 +1764,10 @@ $code.=<<___; .type ChaCha20_8x,\@function,5 .align 32 ChaCha20_8x: .cfi_startproc .LChaCha20_8x: mov %rsp,%r9 # frame register .cfi_def_cfa_register %r9 sub \$0x280+$xframe,%rsp and \$-32,%rsp ___ Loading Loading @@ -2242,8 +2273,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L8x_epilogue: ret .cfi_endproc .size ChaCha20_8x,.-ChaCha20_8x ___ } Loading Loading @@ -2280,8 +2313,10 @@ $code.=<<___; .type ChaCha20_avx512,\@function,5 .align 32 ChaCha20_avx512: .cfi_startproc .LChaCha20_avx512: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 cmp \$512,$len ja .LChaCha20_16x Loading Loading @@ -2461,8 +2496,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .Lavx512_epilogue: ret .cfi_endproc .size ChaCha20_avx512,.-ChaCha20_avx512 ___ } Loading Loading @@ -2544,8 +2581,10 @@ $code.=<<___; .type ChaCha20_16x,\@function,5 .align 32 ChaCha20_16x: .cfi_startproc .LChaCha20_16x: mov %rsp,%r9 # frame register .cfi_def_cfa_register %r9 sub \$64+$xframe,%rsp and \$-64,%rsp ___ Loading Loading @@ -2963,8 +3002,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L16x_epilogue: ret .cfi_endproc .size ChaCha20_16x,.-ChaCha20_16x ___ } Loading Loading
crypto/chacha/asm/chacha-x86_64.pl +41 −0 Original line number Diff line number Diff line Loading @@ -242,6 +242,7 @@ $code.=<<___; .type ChaCha20_ctr32,\@function,5 .align 64 ChaCha20_ctr32: .cfi_startproc cmp \$0,$len je .Lno_data mov OPENSSL_ia32cap_P+4(%rip),%r10 Loading @@ -255,12 +256,19 @@ $code.=<<___; jnz .LChaCha20_ssse3 push %rbx .cfi_push %rbx push %rbp .cfi_push %rbp push %r12 .cfi_push %r12 push %r13 .cfi_push %r13 push %r14 .cfi_push %r14 push %r15 .cfi_push %r15 sub \$64+24,%rsp .cfi_adjust_cfa_offset 64+24 .Lctr32_body: #movdqa .Lsigma(%rip),%xmm0 Loading Loading @@ -401,15 +409,24 @@ $code.=<<___; .Ldone: lea 64+24+48(%rsp),%rsi .cfi_def_cfa %rsi,8 mov -48(%rsi),%r15 .cfi_restore %r15 mov -40(%rsi),%r14 .cfi_restore %r14 mov -32(%rsi),%r13 .cfi_restore %r13 mov -24(%rsi),%r12 .cfi_restore %r12 mov -16(%rsi),%rbp .cfi_restore %rbp mov -8(%rsi),%rbx .cfi_restore %rbx lea (%rsi),%rsp .cfi_def_cfa_register %rsp .Lno_data: ret .cfi_endproc .size ChaCha20_ctr32,.-ChaCha20_ctr32 ___ Loading Loading @@ -448,8 +465,10 @@ $code.=<<___; .type ChaCha20_ssse3,\@function,5 .align 32 ChaCha20_ssse3: .cfi_startproc .LChaCha20_ssse3: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 ___ $code.=<<___ if ($avx); test \$`1<<(43-32)`,%r10d Loading Loading @@ -565,8 +584,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .Lssse3_epilogue: ret .cfi_endproc .size ChaCha20_ssse3,.-ChaCha20_ssse3 ___ } Loading Loading @@ -708,8 +729,10 @@ $code.=<<___; .type ChaCha20_4x,\@function,5 .align 32 ChaCha20_4x: .cfi_startproc .LChaCha20_4x: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 mov %r10,%r11 ___ $code.=<<___ if ($avx>1); Loading Loading @@ -1149,8 +1172,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L4x_epilogue: ret .cfi_endproc .size ChaCha20_4x,.-ChaCha20_4x ___ } Loading Loading @@ -1237,8 +1262,10 @@ $code.=<<___; .type ChaCha20_4xop,\@function,5 .align 32 ChaCha20_4xop: .cfi_startproc .LChaCha20_4xop: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 sub \$0x140+$xframe,%rsp ___ ################ stack layout Loading Loading @@ -1601,8 +1628,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L4xop_epilogue: ret .cfi_endproc .size ChaCha20_4xop,.-ChaCha20_4xop ___ } Loading Loading @@ -1735,8 +1764,10 @@ $code.=<<___; .type ChaCha20_8x,\@function,5 .align 32 ChaCha20_8x: .cfi_startproc .LChaCha20_8x: mov %rsp,%r9 # frame register .cfi_def_cfa_register %r9 sub \$0x280+$xframe,%rsp and \$-32,%rsp ___ Loading Loading @@ -2242,8 +2273,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L8x_epilogue: ret .cfi_endproc .size ChaCha20_8x,.-ChaCha20_8x ___ } Loading Loading @@ -2280,8 +2313,10 @@ $code.=<<___; .type ChaCha20_avx512,\@function,5 .align 32 ChaCha20_avx512: .cfi_startproc .LChaCha20_avx512: mov %rsp,%r9 # frame pointer .cfi_def_cfa_register %r9 cmp \$512,$len ja .LChaCha20_16x Loading Loading @@ -2461,8 +2496,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .Lavx512_epilogue: ret .cfi_endproc .size ChaCha20_avx512,.-ChaCha20_avx512 ___ } Loading Loading @@ -2544,8 +2581,10 @@ $code.=<<___; .type ChaCha20_16x,\@function,5 .align 32 ChaCha20_16x: .cfi_startproc .LChaCha20_16x: mov %rsp,%r9 # frame register .cfi_def_cfa_register %r9 sub \$64+$xframe,%rsp and \$-64,%rsp ___ Loading Loading @@ -2963,8 +3002,10 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r9),%rsp .cfi_def_cfa_register %rsp .L16x_epilogue: ret .cfi_endproc .size ChaCha20_16x,.-ChaCha20_16x ___ } Loading