Loading crypto/ec/asm/ecp_nistz256-armv4.pl +12 −5 Original line number Diff line number Diff line Loading @@ -1252,6 +1252,7 @@ ecp_nistz256_point_double: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*5 .Lpoint_double_shortcut: add r3,sp,#$in_x ldmia $a_ptr!,{r4-r11} @ copy in_x stmia r3,{r4-r11} Loading Loading @@ -1371,7 +1372,7 @@ $code.=<<___; .align 5 ecp_nistz256_point_add: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*18 sub sp,sp,#32*18+16 ldmia $b_ptr!,{r4-r11} @ copy in2 add r3,sp,#$in2_x Loading Loading @@ -1504,9 +1505,9 @@ ecp_nistz256_point_add: tst $t0,$t1 beq .Ladd_proceed @ (in1infty || in2infty)? tst $t2,$t2 beq .Ladd_proceed @ is_equal(S1,S2)? beq .Ladd_double @ is_equal(S1,S2)? ldr $r_ptr,[sp,#32*18] ldr $r_ptr,[sp,#32*18+16] eor r4,r4,r4 eor r5,r5,r5 eor r6,r6,r6 Loading @@ -1520,6 +1521,12 @@ ecp_nistz256_point_add: stmia $r_ptr!,{r4-r11} b .Ladd_done .align 4 .Ladd_double: ldr $a_ptr,[sp,#32*18+20] add sp,sp,#32*(18-5)+16 @ difference in frame sizes b .Lpoint_double_shortcut .align 4 .Ladd_proceed: add $a_ptr,sp,#$R Loading Loading @@ -1588,7 +1595,7 @@ ecp_nistz256_point_add: add r3,sp,#$in1_x and r11,r11,r12 mvn r12,r12 ldr $r_ptr,[sp,#32*18] ldr $r_ptr,[sp,#32*18+16] ___ for($i=0;$i<96;$i+=8) { # conditional moves $code.=<<___; Loading @@ -1610,7 +1617,7 @@ ___ } $code.=<<___; .Ladd_done: add sp,sp,#32*18+16 @ +16 means "skip even over saved r0-r3" add sp,sp,#32*18+16+16 @ +16 means "skip even over saved r0-r3" #if __ARM_ARCH__>=5 || defined(__thumb__) ldmia sp!,{r4-r12,pc} #else Loading crypto/ec/asm/ecp_nistz256-armv8.pl +13 −3 Original line number Diff line number Diff line Loading @@ -691,12 +691,13 @@ $code.=<<___; .type ecp_nistz256_point_double,%function .align 5 ecp_nistz256_point_double: stp x29,x30,[sp,#-48]! stp x29,x30,[sp,#-80]! add x29,sp,#0 stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] sub sp,sp,#32*4 .Ldouble_shortcut: ldp $acc0,$acc1,[$ap,#32] mov $rp_real,$rp ldp $acc2,$acc3,[$ap,#48] Loading Loading @@ -823,7 +824,7 @@ ecp_nistz256_point_double: add sp,x29,#0 // destroy frame ldp x19,x20,[x29,#16] ldp x21,x22,[x29,#32] ldp x29,x30,[sp],#48 ldp x29,x30,[sp],#80 ret .size ecp_nistz256_point_double,.-ecp_nistz256_point_double ___ Loading Loading @@ -963,7 +964,7 @@ ecp_nistz256_point_add: b.eq .Ladd_proceed // (in1infty || in2infty)? tst $temp,$temp b.eq .Ladd_proceed // is_equal(S1,S2)? b.eq .Ladd_double // is_equal(S1,S2)? eor $a0,$a0,$a0 eor $a1,$a1,$a1 Loading @@ -975,6 +976,15 @@ ecp_nistz256_point_add: stp $a0,$a1,[$rp_real,#80] b .Ladd_done .align 4 .Ladd_double: mov $ap,$ap_real mov $rp,$rp_real ldp x23,x24,[x29,#48] ldp x25,x26,[x29,#64] add sp,sp,#32*(12-4) // difference in stack frames b .Ldouble_shortcut .align 4 .Ladd_proceed: add $rp,sp,#$Rsqr Loading crypto/ec/asm/ecp_nistz256-x86.pl +9 −1 Original line number Diff line number Diff line Loading @@ -1197,6 +1197,7 @@ for ($i=0;$i<7;$i++) { ######################################################################## # void ecp_nistz256_point_double(P256_POINT *out,const P256_POINT *inp); # &static_label("point_double_shortcut"); &function_begin("ecp_nistz256_point_double"); { my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4)); Loading @@ -1212,6 +1213,7 @@ for ($i=0;$i<7;$i++) { &picmeup("edx","OPENSSL_ia32cap_P","eax",&label("pic")); &mov ("ebp",&DWP(0,"edx")); } &set_label("point_double_shortcut"); &mov ("eax",&DWP(0,"esi")); # copy in_x &mov ("ebx",&DWP(4,"esi")); &mov ("ecx",&DWP(8,"esi")); Loading Loading @@ -1491,7 +1493,7 @@ for ($i=0;$i<7;$i++) { &mov ("ebx",&DWP(32*18+8,"esp")); &jz (&label("add_proceed")); # (in1infty || in2infty)? &test ("ebx","ebx"); &jz (&label("add_proceed")); # is_equal(S1,S2)? &jz (&label("add_double")); # is_equal(S1,S2)? &mov ("edi",&wparam(0)); &xor ("eax","eax"); Loading @@ -1499,6 +1501,12 @@ for ($i=0;$i<7;$i++) { &data_byte(0xfc,0xf3,0xab); # cld; stosd &jmp (&label("add_done")); &set_label("add_double",16); &mov ("esi",&wparam(1)); &mov ("ebp",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy &add ("esp",4*((8*18+5)-(8*5+1))); # difference in frame sizes &jmp (&label("point_double_shortcut")); &set_label("add_proceed",16); &mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy &lea ("esi",&DWP($R,"esp")); Loading Loading
crypto/ec/asm/ecp_nistz256-armv4.pl +12 −5 Original line number Diff line number Diff line Loading @@ -1252,6 +1252,7 @@ ecp_nistz256_point_double: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*5 .Lpoint_double_shortcut: add r3,sp,#$in_x ldmia $a_ptr!,{r4-r11} @ copy in_x stmia r3,{r4-r11} Loading Loading @@ -1371,7 +1372,7 @@ $code.=<<___; .align 5 ecp_nistz256_point_add: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*18 sub sp,sp,#32*18+16 ldmia $b_ptr!,{r4-r11} @ copy in2 add r3,sp,#$in2_x Loading Loading @@ -1504,9 +1505,9 @@ ecp_nistz256_point_add: tst $t0,$t1 beq .Ladd_proceed @ (in1infty || in2infty)? tst $t2,$t2 beq .Ladd_proceed @ is_equal(S1,S2)? beq .Ladd_double @ is_equal(S1,S2)? ldr $r_ptr,[sp,#32*18] ldr $r_ptr,[sp,#32*18+16] eor r4,r4,r4 eor r5,r5,r5 eor r6,r6,r6 Loading @@ -1520,6 +1521,12 @@ ecp_nistz256_point_add: stmia $r_ptr!,{r4-r11} b .Ladd_done .align 4 .Ladd_double: ldr $a_ptr,[sp,#32*18+20] add sp,sp,#32*(18-5)+16 @ difference in frame sizes b .Lpoint_double_shortcut .align 4 .Ladd_proceed: add $a_ptr,sp,#$R Loading Loading @@ -1588,7 +1595,7 @@ ecp_nistz256_point_add: add r3,sp,#$in1_x and r11,r11,r12 mvn r12,r12 ldr $r_ptr,[sp,#32*18] ldr $r_ptr,[sp,#32*18+16] ___ for($i=0;$i<96;$i+=8) { # conditional moves $code.=<<___; Loading @@ -1610,7 +1617,7 @@ ___ } $code.=<<___; .Ladd_done: add sp,sp,#32*18+16 @ +16 means "skip even over saved r0-r3" add sp,sp,#32*18+16+16 @ +16 means "skip even over saved r0-r3" #if __ARM_ARCH__>=5 || defined(__thumb__) ldmia sp!,{r4-r12,pc} #else Loading
crypto/ec/asm/ecp_nistz256-armv8.pl +13 −3 Original line number Diff line number Diff line Loading @@ -691,12 +691,13 @@ $code.=<<___; .type ecp_nistz256_point_double,%function .align 5 ecp_nistz256_point_double: stp x29,x30,[sp,#-48]! stp x29,x30,[sp,#-80]! add x29,sp,#0 stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] sub sp,sp,#32*4 .Ldouble_shortcut: ldp $acc0,$acc1,[$ap,#32] mov $rp_real,$rp ldp $acc2,$acc3,[$ap,#48] Loading Loading @@ -823,7 +824,7 @@ ecp_nistz256_point_double: add sp,x29,#0 // destroy frame ldp x19,x20,[x29,#16] ldp x21,x22,[x29,#32] ldp x29,x30,[sp],#48 ldp x29,x30,[sp],#80 ret .size ecp_nistz256_point_double,.-ecp_nistz256_point_double ___ Loading Loading @@ -963,7 +964,7 @@ ecp_nistz256_point_add: b.eq .Ladd_proceed // (in1infty || in2infty)? tst $temp,$temp b.eq .Ladd_proceed // is_equal(S1,S2)? b.eq .Ladd_double // is_equal(S1,S2)? eor $a0,$a0,$a0 eor $a1,$a1,$a1 Loading @@ -975,6 +976,15 @@ ecp_nistz256_point_add: stp $a0,$a1,[$rp_real,#80] b .Ladd_done .align 4 .Ladd_double: mov $ap,$ap_real mov $rp,$rp_real ldp x23,x24,[x29,#48] ldp x25,x26,[x29,#64] add sp,sp,#32*(12-4) // difference in stack frames b .Ldouble_shortcut .align 4 .Ladd_proceed: add $rp,sp,#$Rsqr Loading
crypto/ec/asm/ecp_nistz256-x86.pl +9 −1 Original line number Diff line number Diff line Loading @@ -1197,6 +1197,7 @@ for ($i=0;$i<7;$i++) { ######################################################################## # void ecp_nistz256_point_double(P256_POINT *out,const P256_POINT *inp); # &static_label("point_double_shortcut"); &function_begin("ecp_nistz256_point_double"); { my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4)); Loading @@ -1212,6 +1213,7 @@ for ($i=0;$i<7;$i++) { &picmeup("edx","OPENSSL_ia32cap_P","eax",&label("pic")); &mov ("ebp",&DWP(0,"edx")); } &set_label("point_double_shortcut"); &mov ("eax",&DWP(0,"esi")); # copy in_x &mov ("ebx",&DWP(4,"esi")); &mov ("ecx",&DWP(8,"esi")); Loading Loading @@ -1491,7 +1493,7 @@ for ($i=0;$i<7;$i++) { &mov ("ebx",&DWP(32*18+8,"esp")); &jz (&label("add_proceed")); # (in1infty || in2infty)? &test ("ebx","ebx"); &jz (&label("add_proceed")); # is_equal(S1,S2)? &jz (&label("add_double")); # is_equal(S1,S2)? &mov ("edi",&wparam(0)); &xor ("eax","eax"); Loading @@ -1499,6 +1501,12 @@ for ($i=0;$i<7;$i++) { &data_byte(0xfc,0xf3,0xab); # cld; stosd &jmp (&label("add_done")); &set_label("add_double",16); &mov ("esi",&wparam(1)); &mov ("ebp",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy &add ("esp",4*((8*18+5)-(8*5+1))); # difference in frame sizes &jmp (&label("point_double_shortcut")); &set_label("add_proceed",16); &mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy &lea ("esi",&DWP($R,"esp")); Loading