Loading engines/asm/e_padlock-x86.pl +89 −9 Original line number Diff line number Diff line Loading @@ -352,19 +352,34 @@ my ($mode,$opcode) = @_; &push ("edi"); &push ("esi"); &xor ("eax","eax"); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); if ($::win32 or $::coff) { &push (&::islabel("_win32_segv_handler")); &data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) } &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); # 32 is enough but spec says 128 &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &mov ("eax",&DWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &mov (&DWP(16,"esp"),"eax"); &xor ("eax","eax"); &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 &movaps ("xmm0",&QWP(0,"esp")); &mov ("eax",&DWP(16,"esp")); &mov ("esp","edx"); # restore %esp if ($::win32 or $::coff) { &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &lea ("esp",&DWP(4,"esp")); } &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &mov (&DWP(16,"edi"),"eax"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -373,11 +388,25 @@ my ($mode,$opcode) = @_; &function_begin_B("padlock_sha1_blocks"); &push ("edi"); &push ("esi"); &mov ("eax",-1); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("edx","esp"); # put aside %esp &mov ("ecx",&wparam(2)); &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &mov ("eax",&DWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &mov (&DWP(16,"esp"),"eax"); &mov ("eax",-1); &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 &movaps ("xmm0",&QWP(0,"esp")); &mov ("eax",&DWP(16,"esp")); &mov ("esp","edx"); # restore %esp &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &mov (&DWP(16,"edi"),"eax"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -387,19 +416,34 @@ my ($mode,$opcode) = @_; &push ("edi"); &push ("esi"); &xor ("eax","eax"); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); if ($::win32 or $::coff) { &push (&::islabel("_win32_segv_handler")); &data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) } &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &movups ("xmm1",&QWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &movaps (&QWP(16,"esp"),"xmm1"); &xor ("eax","eax"); &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 &movaps ("xmm0",&QWP(0,"esp")); &movaps ("xmm1",&QWP(16,"esp")); &mov ("esp","edx"); # restore %esp if ($::win32 or $::coff) { &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &lea ("esp",&DWP(4,"esp")); } &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &movups (&QWP(16,"edi"),"xmm1"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -408,11 +452,25 @@ my ($mode,$opcode) = @_; &function_begin_B("padlock_sha256_blocks"); &push ("edi"); &push ("esi"); &mov ("eax",-1); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &movups ("xmm1",&QWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &movaps (&QWP(16,"esp"),"xmm1"); &mov ("eax",-1); &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 &movaps ("xmm0",&QWP(0,"esp")); &movaps ("xmm1",&QWP(16,"esp")); &mov ("esp","edx"); # restore %esp &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &movups (&QWP(16,"edi"),"xmm1"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -424,7 +482,29 @@ my ($mode,$opcode) = @_; &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &movups ("xmm1",&QWP(16,"edi")); &movups ("xmm2",&QWP(32,"edi")); &movups ("xmm3",&QWP(48,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &movaps (&QWP(16,"esp"),"xmm1"); &movaps (&QWP(32,"esp"),"xmm2"); &movaps (&QWP(48,"esp"),"xmm3"); &data_byte(0xf3,0x0f,0xa6,0xe0); # rep xsha512 &movaps ("xmm0",&QWP(0,"esp")); &movaps ("xmm1",&QWP(16,"esp")); &movaps ("xmm2",&QWP(32,"esp")); &movaps ("xmm3",&QWP(48,"esp")); &mov ("esp","edx"); # restore %esp &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &movups (&QWP(16,"edi"),"xmm1"); &movups (&QWP(32,"edi"),"xmm2"); &movups (&QWP(48,"edi"),"xmm3"); &pop ("esi"); &pop ("edi"); &ret (); Loading engines/asm/e_padlock-x86_64.pl +67 −4 Original line number Diff line number Diff line Loading @@ -146,9 +146,20 @@ padlock_xstore: .type padlock_sha1_oneshot,\@function,3 .align 16 padlock_sha1_oneshot: xor %rax,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp mov 16(%rdi),%eax movaps %xmm0,(%rsp) mov %rsp,%rdi mov %eax,16(%rsp) xor %rax,%rax .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 movaps (%rsp),%xmm0 mov 16(%rsp),%eax movups %xmm0,(%rdx) # copy-out context mov %eax,16(%rdx) ret .size padlock_sha1_oneshot,.-padlock_sha1_oneshot Loading @@ -156,9 +167,20 @@ padlock_sha1_oneshot: .type padlock_sha1_blocks,\@function,3 .align 16 padlock_sha1_blocks: mov \$-1,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp mov 16(%rdi),%eax movaps %xmm0,(%rsp) mov %rsp,%rdi mov %eax,16(%rsp) mov \$-1,%rax .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 movaps (%rsp),%xmm0 mov 16(%rsp),%eax movups %xmm0,(%rdx) # copy-out context mov %eax,16(%rdx) ret .size padlock_sha1_blocks,.-padlock_sha1_blocks Loading @@ -166,9 +188,20 @@ padlock_sha1_blocks: .type padlock_sha256_oneshot,\@function,3 .align 16 padlock_sha256_oneshot: xor %rax,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp movups 16(%rdi),%xmm1 movaps %xmm0,(%rsp) mov %rsp,%rdi movaps %xmm1,16(%rsp) xor %rax,%rax .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) ret .size padlock_sha256_oneshot,.-padlock_sha256_oneshot Loading @@ -176,9 +209,20 @@ padlock_sha256_oneshot: .type padlock_sha256_blocks,\@function,3 .align 16 padlock_sha256_blocks: mov \$-1,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp movups 16(%rdi),%xmm1 movaps %xmm0,(%rsp) mov %rsp,%rdi movaps %xmm1,16(%rsp) mov \$-1,%rax .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) ret .size padlock_sha256_blocks,.-padlock_sha256_blocks Loading @@ -187,7 +231,26 @@ padlock_sha256_blocks: .align 16 padlock_sha512_blocks: mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp movups 16(%rdi),%xmm1 movups 32(%rdi),%xmm2 movups 48(%rdi),%xmm3 movaps %xmm0,(%rsp) mov %rsp,%rdi movaps %xmm1,16(%rsp) movaps %xmm2,32(%rsp) movaps %xmm3,48(%rsp) .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movaps 32(%rsp),%xmm2 movaps 48(%rsp),%xmm3 movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) movups %xmm2,32(%rdx) movups %xmm3,48(%rdx) ret .size padlock_sha512_blocks,.-padlock_sha512_blocks ___ Loading Loading
engines/asm/e_padlock-x86.pl +89 −9 Original line number Diff line number Diff line Loading @@ -352,19 +352,34 @@ my ($mode,$opcode) = @_; &push ("edi"); &push ("esi"); &xor ("eax","eax"); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); if ($::win32 or $::coff) { &push (&::islabel("_win32_segv_handler")); &data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) } &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); # 32 is enough but spec says 128 &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &mov ("eax",&DWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &mov (&DWP(16,"esp"),"eax"); &xor ("eax","eax"); &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 &movaps ("xmm0",&QWP(0,"esp")); &mov ("eax",&DWP(16,"esp")); &mov ("esp","edx"); # restore %esp if ($::win32 or $::coff) { &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &lea ("esp",&DWP(4,"esp")); } &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &mov (&DWP(16,"edi"),"eax"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -373,11 +388,25 @@ my ($mode,$opcode) = @_; &function_begin_B("padlock_sha1_blocks"); &push ("edi"); &push ("esi"); &mov ("eax",-1); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("edx","esp"); # put aside %esp &mov ("ecx",&wparam(2)); &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &mov ("eax",&DWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &mov (&DWP(16,"esp"),"eax"); &mov ("eax",-1); &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 &movaps ("xmm0",&QWP(0,"esp")); &mov ("eax",&DWP(16,"esp")); &mov ("esp","edx"); # restore %esp &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &mov (&DWP(16,"edi"),"eax"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -387,19 +416,34 @@ my ($mode,$opcode) = @_; &push ("edi"); &push ("esi"); &xor ("eax","eax"); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); if ($::win32 or $::coff) { &push (&::islabel("_win32_segv_handler")); &data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) } &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &movups ("xmm1",&QWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &movaps (&QWP(16,"esp"),"xmm1"); &xor ("eax","eax"); &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 &movaps ("xmm0",&QWP(0,"esp")); &movaps ("xmm1",&QWP(16,"esp")); &mov ("esp","edx"); # restore %esp if ($::win32 or $::coff) { &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &lea ("esp",&DWP(4,"esp")); } &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &movups (&QWP(16,"edi"),"xmm1"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -408,11 +452,25 @@ my ($mode,$opcode) = @_; &function_begin_B("padlock_sha256_blocks"); &push ("edi"); &push ("esi"); &mov ("eax",-1); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &movups ("xmm1",&QWP(16,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &movaps (&QWP(16,"esp"),"xmm1"); &mov ("eax",-1); &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 &movaps ("xmm0",&QWP(0,"esp")); &movaps ("xmm1",&QWP(16,"esp")); &mov ("esp","edx"); # restore %esp &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &movups (&QWP(16,"edi"),"xmm1"); &pop ("esi"); &pop ("edi"); &ret (); Loading @@ -424,7 +482,29 @@ my ($mode,$opcode) = @_; &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); &mov ("edx","esp"); # put aside %esp &add ("esp",-128); &movups ("xmm0",&QWP(0,"edi")); # copy-in context &and ("esp",-16); &movups ("xmm1",&QWP(16,"edi")); &movups ("xmm2",&QWP(32,"edi")); &movups ("xmm3",&QWP(48,"edi")); &movaps (&QWP(0,"esp"),"xmm0"); &mov ("edi","esp"); &movaps (&QWP(16,"esp"),"xmm1"); &movaps (&QWP(32,"esp"),"xmm2"); &movaps (&QWP(48,"esp"),"xmm3"); &data_byte(0xf3,0x0f,0xa6,0xe0); # rep xsha512 &movaps ("xmm0",&QWP(0,"esp")); &movaps ("xmm1",&QWP(16,"esp")); &movaps ("xmm2",&QWP(32,"esp")); &movaps ("xmm3",&QWP(48,"esp")); &mov ("esp","edx"); # restore %esp &mov ("edi",&wparam(0)); &movups (&QWP(0,"edi"),"xmm0"); # copy-out context &movups (&QWP(16,"edi"),"xmm1"); &movups (&QWP(32,"edi"),"xmm2"); &movups (&QWP(48,"edi"),"xmm3"); &pop ("esi"); &pop ("edi"); &ret (); Loading
engines/asm/e_padlock-x86_64.pl +67 −4 Original line number Diff line number Diff line Loading @@ -146,9 +146,20 @@ padlock_xstore: .type padlock_sha1_oneshot,\@function,3 .align 16 padlock_sha1_oneshot: xor %rax,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp mov 16(%rdi),%eax movaps %xmm0,(%rsp) mov %rsp,%rdi mov %eax,16(%rsp) xor %rax,%rax .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 movaps (%rsp),%xmm0 mov 16(%rsp),%eax movups %xmm0,(%rdx) # copy-out context mov %eax,16(%rdx) ret .size padlock_sha1_oneshot,.-padlock_sha1_oneshot Loading @@ -156,9 +167,20 @@ padlock_sha1_oneshot: .type padlock_sha1_blocks,\@function,3 .align 16 padlock_sha1_blocks: mov \$-1,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp mov 16(%rdi),%eax movaps %xmm0,(%rsp) mov %rsp,%rdi mov %eax,16(%rsp) mov \$-1,%rax .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 movaps (%rsp),%xmm0 mov 16(%rsp),%eax movups %xmm0,(%rdx) # copy-out context mov %eax,16(%rdx) ret .size padlock_sha1_blocks,.-padlock_sha1_blocks Loading @@ -166,9 +188,20 @@ padlock_sha1_blocks: .type padlock_sha256_oneshot,\@function,3 .align 16 padlock_sha256_oneshot: xor %rax,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp movups 16(%rdi),%xmm1 movaps %xmm0,(%rsp) mov %rsp,%rdi movaps %xmm1,16(%rsp) xor %rax,%rax .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) ret .size padlock_sha256_oneshot,.-padlock_sha256_oneshot Loading @@ -176,9 +209,20 @@ padlock_sha256_oneshot: .type padlock_sha256_blocks,\@function,3 .align 16 padlock_sha256_blocks: mov \$-1,%rax mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp movups 16(%rdi),%xmm1 movaps %xmm0,(%rsp) mov %rsp,%rdi movaps %xmm1,16(%rsp) mov \$-1,%rax .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) ret .size padlock_sha256_blocks,.-padlock_sha256_blocks Loading @@ -187,7 +231,26 @@ padlock_sha256_blocks: .align 16 padlock_sha512_blocks: mov %rdx,%rcx mov %rdi,%rdx # put aside %rdi movups (%rdi),%xmm0 # copy-in context sub \$128+8,%rsp movups 16(%rdi),%xmm1 movups 32(%rdi),%xmm2 movups 48(%rdi),%xmm3 movaps %xmm0,(%rsp) mov %rsp,%rdi movaps %xmm1,16(%rsp) movaps %xmm2,32(%rsp) movaps %xmm3,48(%rsp) .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movaps 32(%rsp),%xmm2 movaps 48(%rsp),%xmm3 movups %xmm0,(%rdx) # copy-out context movups %xmm1,16(%rdx) movups %xmm2,32(%rdx) movups %xmm3,48(%rdx) ret .size padlock_sha512_blocks,.-padlock_sha512_blocks ___ Loading