Loading crypto/ec/asm/ecp_nistz256-armv4.pl +17 −41 Original line number Diff line number Diff line Loading @@ -1405,27 +1405,19 @@ ecp_nistz256_point_add: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*18+16 ldmia $b_ptr!,{r4-r11} @ copy in2 ldmia $b_ptr!,{r4-r11} @ copy in2_x add r3,sp,#$in2_x orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $b_ptr!,{r4-r11} orr r12,r12,r4 orr r12,r12,r5 ldmia $b_ptr!,{r4-r11} @ copy in2_y stmia r3!,{r4-r11} ldmia $b_ptr,{r4-r11} @ copy in2_z orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $b_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne Loading @@ -1434,27 +1426,19 @@ ecp_nistz256_point_add: stmia r3,{r4-r11} str r12,[sp,#32*18+8] @ !in2infty ldmia $a_ptr!,{r4-r11} @ copy in1 ldmia $a_ptr!,{r4-r11} @ copy in1_x add r3,sp,#$in1_x orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr!,{r4-r11} orr r12,r12,r4 orr r12,r12,r5 ldmia $a_ptr!,{r4-r11} @ copy in1_y stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} @ copy in1_z orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne Loading Loading @@ -1684,27 +1668,19 @@ ecp_nistz256_point_add_affine: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*15 ldmia $a_ptr!,{r4-r11} @ copy in1 ldmia $a_ptr!,{r4-r11} @ copy in1_x add r3,sp,#$in1_x orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr!,{r4-r11} orr r12,r12,r4 orr r12,r12,r5 ldmia $a_ptr!,{r4-r11} @ copy in1_y stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} @ copy in1_z orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne Loading @@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine: stmia r3,{r4-r11} str r12,[sp,#32*15+4] @ !in1infty ldmia $b_ptr!,{r4-r11} @ copy in2 ldmia $b_ptr!,{r4-r11} @ copy in2_x add r3,sp,#$in2_x orr r12,r4,r5 orr r12,r12,r6 Loading @@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine: orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $b_ptr!,{r4-r11} ldmia $b_ptr!,{r4-r11} @ copy in2_y orr r12,r12,r4 orr r12,r12,r5 orr r12,r12,r6 Loading crypto/ec/asm/ecp_nistz256-armv8.pl +25 −51 Original line number Diff line number Diff line Loading @@ -862,46 +862,28 @@ ecp_nistz256_point_add: stp x25,x26,[sp,#64] sub sp,sp,#32*12 ldp $a0,$a1,[$bp] ldp $a2,$a3,[$bp,#16] ldp $t0,$t1,[$bp,#32] ldp $t2,$t3,[$bp,#48] ldp $a0,$a1,[$bp,#64] // in2_z ldp $a2,$a3,[$bp,#64+16] mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp orr $a0,$a0,$a1 orr $a2,$a2,$a3 ldp $acc0,$acc1,[$ap] orr $t0,$t0,$t1 orr $t2,$t2,$t3 ldp $acc2,$acc3,[$ap,#16] orr $a0,$a0,$a2 orr $t2,$t0,$t2 ldp $t0,$t1,[$ap,#32] orr $in2infty,$a0,$t2 cmp $in2infty,#0 ldp $t2,$t3,[$ap,#48] csetm $in2infty,ne // !in2infty ldp $a0,$a1,[$bp_real,#64] // forward load for p256_sqr_mont orr $acc0,$acc0,$acc1 orr $acc2,$acc2,$acc3 ldp $a2,$a3,[$bp_real,#64+16] orr $t0,$t0,$t1 orr $t2,$t2,$t3 orr $acc0,$acc0,$acc2 orr $t0,$t0,$t2 orr $in1infty,$acc0,$t0 cmp $in1infty,#0 ldr $poly1,.Lpoly+8 ldr $poly3,.Lpoly+24 csetm $in1infty,ne // !in1infty orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in2infty,$t0,$t2 cmp $in2infty,#0 csetm $in2infty,ne // !in2infty add $rp,sp,#$Z2sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z); ldp $a0,$a1,[$ap_real,#64] ldp $a0,$a1,[$ap_real,#64] // in1_z ldp $a2,$a3,[$ap_real,#64+16] orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in1infty,$t0,$t2 cmp $in1infty,#0 csetm $in1infty,ne // !in1infty add $rp,sp,#$Z1sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); Loading Loading @@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine: ldr $poly1,.Lpoly+8 ldr $poly3,.Lpoly+24 ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] ldp $t0,$t1,[$ap,#32] ldp $t2,$t3,[$ap,#48] orr $a0,$a0,$a1 orr $a2,$a2,$a3 orr $t0,$t0,$t1 orr $t2,$t2,$t3 orr $a0,$a0,$a2 orr $t0,$t0,$t2 orr $in1infty,$a0,$t0 ldp $a0,$a1,[$ap,#64] // in1_z ldp $a2,$a3,[$ap,#64+16] orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in1infty,$t0,$t2 cmp $in1infty,#0 csetm $in1infty,ne // !in1infty ldp $a0,$a1,[$bp] ldp $a2,$a3,[$bp,#16] ldp $t0,$t1,[$bp,#32] ldp $acc0,$acc1,[$bp] // in2_x ldp $acc2,$acc3,[$bp,#16] ldp $t0,$t1,[$bp,#32] // in2_y ldp $t2,$t3,[$bp,#48] orr $a0,$a0,$a1 orr $a2,$a2,$a3 orr $acc0,$acc0,$acc1 orr $acc2,$acc2,$acc3 orr $t0,$t0,$t1 orr $t2,$t2,$t3 orr $a0,$a0,$a2 orr $acc0,$acc0,$acc2 orr $t0,$t0,$t2 orr $in2infty,$a0,$t0 orr $in2infty,$acc0,$t0 cmp $in2infty,#0 csetm $in2infty,ne // !in2infty ldp $a0,$a1,[$ap_real,#64] ldp $a2,$a3,[$ap_real,#64+16] add $rp,sp,#$Z1sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); Loading crypto/ec/asm/ecp_nistz256-sparcv9.pl +45 −105 Original line number Diff line number Diff line Loading @@ -899,71 +899,39 @@ ecp_nistz256_point_add: mov $ap,$ap_real mov $bp,$bp_real ld [$bp],@acc[0] ! in2_x ld [$bp+4],@acc[1] ld [$bp+8],@acc[2] ld [$bp+12],@acc[3] ld [$bp+16],@acc[4] ld [$bp+20],@acc[5] ld [$bp+24],@acc[6] ld [$bp+28],@acc[7] ld [$bp+32],$t0 ! in2_y ld [$bp+32+4],$t1 ld [$bp+32+8],$t2 ld [$bp+32+12],$t3 ld [$bp+32+16],$t4 ld [$bp+32+20],$t5 ld [$bp+32+24],$t6 ld [$bp+32+28],$t7 or @acc[1],@acc[0],@acc[0] or @acc[3],@acc[2],@acc[2] or @acc[5],@acc[4],@acc[4] or @acc[7],@acc[6],@acc[6] or @acc[2],@acc[0],@acc[0] or @acc[6],@acc[4],@acc[4] or @acc[4],@acc[0],@acc[0] ld [$bp+64],$t0 ! in2_z ld [$bp+64+4],$t1 ld [$bp+64+8],$t2 ld [$bp+64+12],$t3 ld [$bp+64+16],$t4 ld [$bp+64+20],$t5 ld [$bp+64+24],$t6 ld [$bp+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 or $t4,$t0,$t0 or @acc[0],$t0,$t0 ! !in2infty or $t4,$t0,$t0 ! !in2infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-12] ld [$ap],@acc[0] ! in1_x ld [$ap+4],@acc[1] ld [$ap+8],@acc[2] ld [$ap+12],@acc[3] ld [$ap+16],@acc[4] ld [$ap+20],@acc[5] ld [$ap+24],@acc[6] ld [$ap+28],@acc[7] ld [$ap+32],$t0 ! in1_y ld [$ap+32+4],$t1 ld [$ap+32+8],$t2 ld [$ap+32+12],$t3 ld [$ap+32+16],$t4 ld [$ap+32+20],$t5 ld [$ap+32+24],$t6 ld [$ap+32+28],$t7 or @acc[1],@acc[0],@acc[0] or @acc[3],@acc[2],@acc[2] or @acc[5],@acc[4],@acc[4] or @acc[7],@acc[6],@acc[6] or @acc[2],@acc[0],@acc[0] or @acc[6],@acc[4],@acc[4] or @acc[4],@acc[0],@acc[0] ld [$ap+64],$t0 ! in1_z ld [$ap+64+4],$t1 ld [$ap+64+8],$t2 ld [$ap+64+12],$t3 ld [$ap+64+16],$t4 ld [$ap+64+20],$t5 ld [$ap+64+24],$t6 ld [$ap+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 or $t4,$t0,$t0 or @acc[0],$t0,$t0 ! !in1infty or $t4,$t0,$t0 ! !in1infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-16] Loading Loading @@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine: mov $ap,$ap_real mov $bp,$bp_real ld [$ap],@acc[0] ! in1_x ld [$ap+4],@acc[1] ld [$ap+8],@acc[2] ld [$ap+12],@acc[3] ld [$ap+16],@acc[4] ld [$ap+20],@acc[5] ld [$ap+24],@acc[6] ld [$ap+28],@acc[7] ld [$ap+32],$t0 ! in1_y ld [$ap+32+4],$t1 ld [$ap+32+8],$t2 ld [$ap+32+12],$t3 ld [$ap+32+16],$t4 ld [$ap+32+20],$t5 ld [$ap+32+24],$t6 ld [$ap+32+28],$t7 or @acc[1],@acc[0],@acc[0] or @acc[3],@acc[2],@acc[2] or @acc[5],@acc[4],@acc[4] or @acc[7],@acc[6],@acc[6] or @acc[2],@acc[0],@acc[0] or @acc[6],@acc[4],@acc[4] or @acc[4],@acc[0],@acc[0] ld [$ap+64],$t0 ! in1_z ld [$ap+64+4],$t1 ld [$ap+64+8],$t2 ld [$ap+64+12],$t3 ld [$ap+64+16],$t4 ld [$ap+64+20],$t5 ld [$ap+64+24],$t6 ld [$ap+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 or $t4,$t0,$t0 or @acc[0],$t0,$t0 ! !in1infty or $t4,$t0,$t0 ! !in1infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-16] Loading Loading @@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in2_y+16] stx $acc3,[%sp+LOCALS64+$in2_y+24] or $a1,$a0,$a0 or $a3,$a2,$a2 or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $a2,$a0,$a0 or $acc2,$acc0,$acc0 or $acc0,$a0,$a0 movrnz $a0,-1,$a0 ! !in2infty stx $a0,[%fp+STACK_BIAS-8] ld [$bp+64],$acc0 ! in2_z ld [$bp+64+4],$t0 ld [$bp+64+8],$acc1 Loading Loading @@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in2_z+16] stx $acc3,[%sp+LOCALS64+$in2_z+24] or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $acc2,$acc0,$acc0 movrnz $acc0,-1,$acc0 ! !in2infty stx $acc0,[%fp+STACK_BIAS-8] or $a0,$t0,$a0 ld [$ap+32],$acc0 ! in1_y or $a1,$t1,$a1 Loading Loading @@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in1_y+16] stx $acc3,[%sp+LOCALS64+$in1_y+24] or $a1,$a0,$a0 or $a3,$a2,$a2 or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $a2,$a0,$a0 or $acc2,$acc0,$acc0 or $acc0,$a0,$a0 movrnz $a0,-1,$a0 ! !in1infty stx $a0,[%fp+STACK_BIAS-16] ldx [%sp+LOCALS64+$in2_z],$a0 ! forward load ldx [%sp+LOCALS64+$in2_z+8],$a1 ldx [%sp+LOCALS64+$in2_z+16],$a2 Loading @@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in1_z+16] stx $acc3,[%sp+LOCALS64+$in1_z+24] or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $acc2,$acc0,$acc0 movrnz $acc0,-1,$acc0 ! !in1infty stx $acc0,[%fp+STACK_BIAS-16] call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z2sqr, in2_z); add %sp,LOCALS64+$Z2sqr,$rp Loading Loading @@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3: stx $acc2,[%sp+LOCALS64+$in1_y+16] stx $acc3,[%sp+LOCALS64+$in1_y+24] or $a1,$a0,$a0 or $a3,$a2,$a2 or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $a2,$a0,$a0 or $acc2,$acc0,$acc0 or $acc0,$a0,$a0 movrnz $a0,-1,$a0 ! !in1infty stx $a0,[%fp+STACK_BIAS-16] ld [$ap+64],$a0 ! in1_z ld [$ap+64+4],$t0 ld [$ap+64+8],$a1 Loading @@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3: stx $a2,[%sp+LOCALS64+$in1_z+16] stx $a3,[%sp+LOCALS64+$in1_z+24] or $a1,$a0,$t0 or $a3,$a2,$t2 or $t2,$t0,$t0 movrnz $t0,-1,$t0 ! !in1infty stx $t0,[%fp+STACK_BIAS-16] call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z1sqr, in1_z); add %sp,LOCALS64+$Z1sqr,$rp Loading crypto/ec/asm/ecp_nistz256-x86.pl +15 −15 Original line number Diff line number Diff line Loading @@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*18+12,"esp"),"ebp") if ($i==0); &mov ("ebp","eax") if ($i==0); &or ("ebp","eax") if ($i!=0 && $i<64); &mov ("ebp","eax") if ($i==64); &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); &or ("ebp","ebx") if ($i<64); &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); &or ("ebp","ecx") if ($i<64); &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); &or ("ebp","edx") if ($i<64); &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(1)); Loading @@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) { &mov ("ecx",&DWP($i+8,"esi")); &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov ("ebp","eax") if ($i==0); &or ("ebp","eax") if ($i!=0 && $i<64); &mov ("ebp","eax") if ($i==64); &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); &or ("ebp","ebx") if ($i<64); &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); &or ("ebp","ecx") if ($i<64); &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); &or ("ebp","edx") if ($i<64); &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &sub ("eax","ebp"); Loading Loading @@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*15+8,"esp"),"ebp") if ($i==0); &mov ("ebp","eax") if ($i==0); &or ("ebp","eax") if ($i!=0 && $i<64); &mov ("ebp","eax") if ($i==64); &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); &or ("ebp","ebx") if ($i<64); &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); &or ("ebp","ecx") if ($i<64); &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); &or ("ebp","edx") if ($i<64); &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(2)); Loading Loading
crypto/ec/asm/ecp_nistz256-armv4.pl +17 −41 Original line number Diff line number Diff line Loading @@ -1405,27 +1405,19 @@ ecp_nistz256_point_add: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*18+16 ldmia $b_ptr!,{r4-r11} @ copy in2 ldmia $b_ptr!,{r4-r11} @ copy in2_x add r3,sp,#$in2_x orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $b_ptr!,{r4-r11} orr r12,r12,r4 orr r12,r12,r5 ldmia $b_ptr!,{r4-r11} @ copy in2_y stmia r3!,{r4-r11} ldmia $b_ptr,{r4-r11} @ copy in2_z orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $b_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne Loading @@ -1434,27 +1426,19 @@ ecp_nistz256_point_add: stmia r3,{r4-r11} str r12,[sp,#32*18+8] @ !in2infty ldmia $a_ptr!,{r4-r11} @ copy in1 ldmia $a_ptr!,{r4-r11} @ copy in1_x add r3,sp,#$in1_x orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr!,{r4-r11} orr r12,r12,r4 orr r12,r12,r5 ldmia $a_ptr!,{r4-r11} @ copy in1_y stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} @ copy in1_z orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne Loading Loading @@ -1684,27 +1668,19 @@ ecp_nistz256_point_add_affine: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*15 ldmia $a_ptr!,{r4-r11} @ copy in1 ldmia $a_ptr!,{r4-r11} @ copy in1_x add r3,sp,#$in1_x orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr!,{r4-r11} orr r12,r12,r4 orr r12,r12,r5 ldmia $a_ptr!,{r4-r11} @ copy in1_y stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} @ copy in1_z orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne Loading @@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine: stmia r3,{r4-r11} str r12,[sp,#32*15+4] @ !in1infty ldmia $b_ptr!,{r4-r11} @ copy in2 ldmia $b_ptr!,{r4-r11} @ copy in2_x add r3,sp,#$in2_x orr r12,r4,r5 orr r12,r12,r6 Loading @@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine: orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} ldmia $b_ptr!,{r4-r11} ldmia $b_ptr!,{r4-r11} @ copy in2_y orr r12,r12,r4 orr r12,r12,r5 orr r12,r12,r6 Loading
crypto/ec/asm/ecp_nistz256-armv8.pl +25 −51 Original line number Diff line number Diff line Loading @@ -862,46 +862,28 @@ ecp_nistz256_point_add: stp x25,x26,[sp,#64] sub sp,sp,#32*12 ldp $a0,$a1,[$bp] ldp $a2,$a3,[$bp,#16] ldp $t0,$t1,[$bp,#32] ldp $t2,$t3,[$bp,#48] ldp $a0,$a1,[$bp,#64] // in2_z ldp $a2,$a3,[$bp,#64+16] mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp orr $a0,$a0,$a1 orr $a2,$a2,$a3 ldp $acc0,$acc1,[$ap] orr $t0,$t0,$t1 orr $t2,$t2,$t3 ldp $acc2,$acc3,[$ap,#16] orr $a0,$a0,$a2 orr $t2,$t0,$t2 ldp $t0,$t1,[$ap,#32] orr $in2infty,$a0,$t2 cmp $in2infty,#0 ldp $t2,$t3,[$ap,#48] csetm $in2infty,ne // !in2infty ldp $a0,$a1,[$bp_real,#64] // forward load for p256_sqr_mont orr $acc0,$acc0,$acc1 orr $acc2,$acc2,$acc3 ldp $a2,$a3,[$bp_real,#64+16] orr $t0,$t0,$t1 orr $t2,$t2,$t3 orr $acc0,$acc0,$acc2 orr $t0,$t0,$t2 orr $in1infty,$acc0,$t0 cmp $in1infty,#0 ldr $poly1,.Lpoly+8 ldr $poly3,.Lpoly+24 csetm $in1infty,ne // !in1infty orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in2infty,$t0,$t2 cmp $in2infty,#0 csetm $in2infty,ne // !in2infty add $rp,sp,#$Z2sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z); ldp $a0,$a1,[$ap_real,#64] ldp $a0,$a1,[$ap_real,#64] // in1_z ldp $a2,$a3,[$ap_real,#64+16] orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in1infty,$t0,$t2 cmp $in1infty,#0 csetm $in1infty,ne // !in1infty add $rp,sp,#$Z1sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); Loading Loading @@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine: ldr $poly1,.Lpoly+8 ldr $poly3,.Lpoly+24 ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] ldp $t0,$t1,[$ap,#32] ldp $t2,$t3,[$ap,#48] orr $a0,$a0,$a1 orr $a2,$a2,$a3 orr $t0,$t0,$t1 orr $t2,$t2,$t3 orr $a0,$a0,$a2 orr $t0,$t0,$t2 orr $in1infty,$a0,$t0 ldp $a0,$a1,[$ap,#64] // in1_z ldp $a2,$a3,[$ap,#64+16] orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in1infty,$t0,$t2 cmp $in1infty,#0 csetm $in1infty,ne // !in1infty ldp $a0,$a1,[$bp] ldp $a2,$a3,[$bp,#16] ldp $t0,$t1,[$bp,#32] ldp $acc0,$acc1,[$bp] // in2_x ldp $acc2,$acc3,[$bp,#16] ldp $t0,$t1,[$bp,#32] // in2_y ldp $t2,$t3,[$bp,#48] orr $a0,$a0,$a1 orr $a2,$a2,$a3 orr $acc0,$acc0,$acc1 orr $acc2,$acc2,$acc3 orr $t0,$t0,$t1 orr $t2,$t2,$t3 orr $a0,$a0,$a2 orr $acc0,$acc0,$acc2 orr $t0,$t0,$t2 orr $in2infty,$a0,$t0 orr $in2infty,$acc0,$t0 cmp $in2infty,#0 csetm $in2infty,ne // !in2infty ldp $a0,$a1,[$ap_real,#64] ldp $a2,$a3,[$ap_real,#64+16] add $rp,sp,#$Z1sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); Loading
crypto/ec/asm/ecp_nistz256-sparcv9.pl +45 −105 Original line number Diff line number Diff line Loading @@ -899,71 +899,39 @@ ecp_nistz256_point_add: mov $ap,$ap_real mov $bp,$bp_real ld [$bp],@acc[0] ! in2_x ld [$bp+4],@acc[1] ld [$bp+8],@acc[2] ld [$bp+12],@acc[3] ld [$bp+16],@acc[4] ld [$bp+20],@acc[5] ld [$bp+24],@acc[6] ld [$bp+28],@acc[7] ld [$bp+32],$t0 ! in2_y ld [$bp+32+4],$t1 ld [$bp+32+8],$t2 ld [$bp+32+12],$t3 ld [$bp+32+16],$t4 ld [$bp+32+20],$t5 ld [$bp+32+24],$t6 ld [$bp+32+28],$t7 or @acc[1],@acc[0],@acc[0] or @acc[3],@acc[2],@acc[2] or @acc[5],@acc[4],@acc[4] or @acc[7],@acc[6],@acc[6] or @acc[2],@acc[0],@acc[0] or @acc[6],@acc[4],@acc[4] or @acc[4],@acc[0],@acc[0] ld [$bp+64],$t0 ! in2_z ld [$bp+64+4],$t1 ld [$bp+64+8],$t2 ld [$bp+64+12],$t3 ld [$bp+64+16],$t4 ld [$bp+64+20],$t5 ld [$bp+64+24],$t6 ld [$bp+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 or $t4,$t0,$t0 or @acc[0],$t0,$t0 ! !in2infty or $t4,$t0,$t0 ! !in2infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-12] ld [$ap],@acc[0] ! in1_x ld [$ap+4],@acc[1] ld [$ap+8],@acc[2] ld [$ap+12],@acc[3] ld [$ap+16],@acc[4] ld [$ap+20],@acc[5] ld [$ap+24],@acc[6] ld [$ap+28],@acc[7] ld [$ap+32],$t0 ! in1_y ld [$ap+32+4],$t1 ld [$ap+32+8],$t2 ld [$ap+32+12],$t3 ld [$ap+32+16],$t4 ld [$ap+32+20],$t5 ld [$ap+32+24],$t6 ld [$ap+32+28],$t7 or @acc[1],@acc[0],@acc[0] or @acc[3],@acc[2],@acc[2] or @acc[5],@acc[4],@acc[4] or @acc[7],@acc[6],@acc[6] or @acc[2],@acc[0],@acc[0] or @acc[6],@acc[4],@acc[4] or @acc[4],@acc[0],@acc[0] ld [$ap+64],$t0 ! in1_z ld [$ap+64+4],$t1 ld [$ap+64+8],$t2 ld [$ap+64+12],$t3 ld [$ap+64+16],$t4 ld [$ap+64+20],$t5 ld [$ap+64+24],$t6 ld [$ap+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 or $t4,$t0,$t0 or @acc[0],$t0,$t0 ! !in1infty or $t4,$t0,$t0 ! !in1infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-16] Loading Loading @@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine: mov $ap,$ap_real mov $bp,$bp_real ld [$ap],@acc[0] ! in1_x ld [$ap+4],@acc[1] ld [$ap+8],@acc[2] ld [$ap+12],@acc[3] ld [$ap+16],@acc[4] ld [$ap+20],@acc[5] ld [$ap+24],@acc[6] ld [$ap+28],@acc[7] ld [$ap+32],$t0 ! in1_y ld [$ap+32+4],$t1 ld [$ap+32+8],$t2 ld [$ap+32+12],$t3 ld [$ap+32+16],$t4 ld [$ap+32+20],$t5 ld [$ap+32+24],$t6 ld [$ap+32+28],$t7 or @acc[1],@acc[0],@acc[0] or @acc[3],@acc[2],@acc[2] or @acc[5],@acc[4],@acc[4] or @acc[7],@acc[6],@acc[6] or @acc[2],@acc[0],@acc[0] or @acc[6],@acc[4],@acc[4] or @acc[4],@acc[0],@acc[0] ld [$ap+64],$t0 ! in1_z ld [$ap+64+4],$t1 ld [$ap+64+8],$t2 ld [$ap+64+12],$t3 ld [$ap+64+16],$t4 ld [$ap+64+20],$t5 ld [$ap+64+24],$t6 ld [$ap+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 or $t4,$t0,$t0 or @acc[0],$t0,$t0 ! !in1infty or $t4,$t0,$t0 ! !in1infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-16] Loading Loading @@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in2_y+16] stx $acc3,[%sp+LOCALS64+$in2_y+24] or $a1,$a0,$a0 or $a3,$a2,$a2 or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $a2,$a0,$a0 or $acc2,$acc0,$acc0 or $acc0,$a0,$a0 movrnz $a0,-1,$a0 ! !in2infty stx $a0,[%fp+STACK_BIAS-8] ld [$bp+64],$acc0 ! in2_z ld [$bp+64+4],$t0 ld [$bp+64+8],$acc1 Loading Loading @@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in2_z+16] stx $acc3,[%sp+LOCALS64+$in2_z+24] or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $acc2,$acc0,$acc0 movrnz $acc0,-1,$acc0 ! !in2infty stx $acc0,[%fp+STACK_BIAS-8] or $a0,$t0,$a0 ld [$ap+32],$acc0 ! in1_y or $a1,$t1,$a1 Loading Loading @@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in1_y+16] stx $acc3,[%sp+LOCALS64+$in1_y+24] or $a1,$a0,$a0 or $a3,$a2,$a2 or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $a2,$a0,$a0 or $acc2,$acc0,$acc0 or $acc0,$a0,$a0 movrnz $a0,-1,$a0 ! !in1infty stx $a0,[%fp+STACK_BIAS-16] ldx [%sp+LOCALS64+$in2_z],$a0 ! forward load ldx [%sp+LOCALS64+$in2_z+8],$a1 ldx [%sp+LOCALS64+$in2_z+16],$a2 Loading @@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in1_z+16] stx $acc3,[%sp+LOCALS64+$in1_z+24] or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $acc2,$acc0,$acc0 movrnz $acc0,-1,$acc0 ! !in1infty stx $acc0,[%fp+STACK_BIAS-16] call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z2sqr, in2_z); add %sp,LOCALS64+$Z2sqr,$rp Loading Loading @@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3: stx $acc2,[%sp+LOCALS64+$in1_y+16] stx $acc3,[%sp+LOCALS64+$in1_y+24] or $a1,$a0,$a0 or $a3,$a2,$a2 or $acc1,$acc0,$acc0 or $acc3,$acc2,$acc2 or $a2,$a0,$a0 or $acc2,$acc0,$acc0 or $acc0,$a0,$a0 movrnz $a0,-1,$a0 ! !in1infty stx $a0,[%fp+STACK_BIAS-16] ld [$ap+64],$a0 ! in1_z ld [$ap+64+4],$t0 ld [$ap+64+8],$a1 Loading @@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3: stx $a2,[%sp+LOCALS64+$in1_z+16] stx $a3,[%sp+LOCALS64+$in1_z+24] or $a1,$a0,$t0 or $a3,$a2,$t2 or $t2,$t0,$t0 movrnz $t0,-1,$t0 ! !in1infty stx $t0,[%fp+STACK_BIAS-16] call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z1sqr, in1_z); add %sp,LOCALS64+$Z1sqr,$rp Loading
crypto/ec/asm/ecp_nistz256-x86.pl +15 −15 Original line number Diff line number Diff line Loading @@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*18+12,"esp"),"ebp") if ($i==0); &mov ("ebp","eax") if ($i==0); &or ("ebp","eax") if ($i!=0 && $i<64); &mov ("ebp","eax") if ($i==64); &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); &or ("ebp","ebx") if ($i<64); &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); &or ("ebp","ecx") if ($i<64); &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); &or ("ebp","edx") if ($i<64); &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(1)); Loading @@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) { &mov ("ecx",&DWP($i+8,"esi")); &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov ("ebp","eax") if ($i==0); &or ("ebp","eax") if ($i!=0 && $i<64); &mov ("ebp","eax") if ($i==64); &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); &or ("ebp","ebx") if ($i<64); &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); &or ("ebp","ecx") if ($i<64); &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); &or ("ebp","edx") if ($i<64); &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &sub ("eax","ebp"); Loading Loading @@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*15+8,"esp"),"ebp") if ($i==0); &mov ("ebp","eax") if ($i==0); &or ("ebp","eax") if ($i!=0 && $i<64); &mov ("ebp","eax") if ($i==64); &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); &or ("ebp","ebx") if ($i<64); &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); &or ("ebp","ecx") if ($i<64); &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); &or ("ebp","edx") if ($i<64); &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(2)); Loading