Loading crypto/ec/asm/ecp_nistz256-x86_64.pl +10 −1 Original line number Diff line number Diff line Loading @@ -2044,6 +2044,7 @@ $code.=<<___; push %r15 sub \$32*5+8, %rsp .Lpoint_double_shortcut$x: movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr.x mov $a_ptr, $b_ptr # backup copy movdqu 0x10($a_ptr), %xmm1 Loading Loading @@ -2334,6 +2335,7 @@ $code.=<<___; mov 0x40+8*1($b_ptr), $acc6 mov 0x40+8*2($b_ptr), $acc7 mov 0x40+8*3($b_ptr), $acc0 movq $b_ptr, %xmm1 lea 0x40-$bias($b_ptr), $a_ptr lea $Z1sqr(%rsp), $r_ptr # Z1^2 Loading Loading @@ -2389,7 +2391,7 @@ $code.=<<___; test $acc0, $acc0 jnz .Ladd_proceed$x # (in1infty || in2infty)? test $acc1, $acc1 jz .Ladd_proceed$x # is_equal(S1,S2)? jz .Ladd_double$x # is_equal(S1,S2)? movq %xmm0, $r_ptr # restore $r_ptr pxor %xmm0, %xmm0 Loading @@ -2401,6 +2403,13 @@ $code.=<<___; movdqu %xmm0, 0x50($r_ptr) jmp .Ladd_done$x .align 32 .Ladd_double$x: movq %xmm1, $a_ptr # restore $a_ptr movq %xmm0, $r_ptr # restore $r_ptr add \$`32*(18-5)`, %rsp # difference in frame sizes jmp .Lpoint_double_shortcut$x .align 32 .Ladd_proceed$x: `&load_for_sqr("$R(%rsp)", "$src0")` Loading Loading
crypto/ec/asm/ecp_nistz256-x86_64.pl +10 −1 Original line number Diff line number Diff line Loading @@ -2044,6 +2044,7 @@ $code.=<<___; push %r15 sub \$32*5+8, %rsp .Lpoint_double_shortcut$x: movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr.x mov $a_ptr, $b_ptr # backup copy movdqu 0x10($a_ptr), %xmm1 Loading Loading @@ -2334,6 +2335,7 @@ $code.=<<___; mov 0x40+8*1($b_ptr), $acc6 mov 0x40+8*2($b_ptr), $acc7 mov 0x40+8*3($b_ptr), $acc0 movq $b_ptr, %xmm1 lea 0x40-$bias($b_ptr), $a_ptr lea $Z1sqr(%rsp), $r_ptr # Z1^2 Loading Loading @@ -2389,7 +2391,7 @@ $code.=<<___; test $acc0, $acc0 jnz .Ladd_proceed$x # (in1infty || in2infty)? test $acc1, $acc1 jz .Ladd_proceed$x # is_equal(S1,S2)? jz .Ladd_double$x # is_equal(S1,S2)? movq %xmm0, $r_ptr # restore $r_ptr pxor %xmm0, %xmm0 Loading @@ -2401,6 +2403,13 @@ $code.=<<___; movdqu %xmm0, 0x50($r_ptr) jmp .Ladd_done$x .align 32 .Ladd_double$x: movq %xmm1, $a_ptr # restore $a_ptr movq %xmm0, $r_ptr # restore $r_ptr add \$`32*(18-5)`, %rsp # difference in frame sizes jmp .Lpoint_double_shortcut$x .align 32 .Ladd_proceed$x: `&load_for_sqr("$R(%rsp)", "$src0")` Loading