Loading crypto/bn/asm/mips3.s +257 −257 Original line number Diff line number Diff line Loading @@ -1584,17 +1584,17 @@ LEAF(bn_sqr_comba8) dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 Loading @@ -1609,63 +1609,63 @@ LEAF(bn_sqr_comba8) dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 sd c_1,24(a0) dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ mflo t_1 mfhi t_2 Loading @@ -1680,93 +1680,93 @@ LEAF(bn_sqr_comba8) dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 sd c_3,40(a0) dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 Loading @@ -1781,108 +1781,108 @@ LEAF(bn_sqr_comba8) dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 sd c_2,56(a0) dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ mflo t_1 mfhi t_2 Loading @@ -1897,78 +1897,78 @@ LEAF(bn_sqr_comba8) dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 sd c_1,72(a0) dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ mflo t_1 mfhi t_2 Loading @@ -1983,48 +1983,48 @@ LEAF(bn_sqr_comba8) dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 sd c_3,88(a0) dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ mflo t_1 mfhi t_2 Loading @@ -2039,17 +2039,17 @@ LEAF(bn_sqr_comba8) dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 sd c_2,104(a0) dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ Loading @@ -2070,9 +2070,9 @@ LEAF(bn_sqr_comba4) .set reorder ld a_0,0(a1) ld a_1,8(a1) dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ ld a_2,16(a1) ld a_3,24(a1) dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ mflo c_1 mfhi c_2 sd c_1,0(a0) Loading @@ -2093,17 +2093,17 @@ LEAF(bn_sqr_comba4) dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 Loading @@ -2118,48 +2118,48 @@ LEAF(bn_sqr_comba4) dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 sd c_1,24(a0) dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ mflo t_1 mfhi t_2 Loading @@ -2174,17 +2174,17 @@ LEAF(bn_sqr_comba4) dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 sd c_3,40(a0) dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ Loading crypto/bn/asm/x86_64-gcc.c +15 −19 Original line number Diff line number Diff line Loading @@ -269,6 +269,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ /* * Keep in mind that carrying into high part of multiplication result * can not overflow, because it cannot be all-ones. */ #if 0 /* original macros are kept for reference purposes */ #define mul_add_c(a,b,c0,c1,c2) { \ Loading @@ -283,10 +287,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) BN_ULONG ta=(a),tb=(b),t0; \ t1 = BN_UMULT_HIGH(ta,tb); \ t0 = ta * tb; \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ c0 += t0; t1 += (c0<t0)?1:0; \ c1 += t1; c2 += (c1<t1)?1:0; \ } #else #define mul_add_c(a,b,c0,c1,c2) do { \ Loading Loading @@ -324,21 +328,13 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) : "=a"(t1),"=d"(t2) \ : "a"(a),"m"(b) \ : "cc"); \ asm ("addq %0,%0; adcq %2,%1" \ : "+d"(t2),"+r"(c2) \ : "g"(0) \ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ : "+r"(c0),"+r"(c1),"+r"(c2) \ : "r"(t1),"r"(t2),"g"(0) \ : "cc"); \ asm ("addq %0,%0; adcq %2,%1" \ : "+a"(t1),"+d"(t2) \ : "g"(0) \ : "cc"); \ asm ("addq %2,%0; adcq %3,%1" \ : "+r"(c0),"+d"(t2) \ : "a"(t1),"g"(0) \ : "cc"); \ asm ("addq %2,%0; adcq %3,%1" \ : "+r"(c1),"+r"(c2) \ : "d"(t2),"g"(0) \ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ : "+r"(c0),"+r"(c1),"+r"(c2) \ : "r"(t1),"r"(t2),"g"(0) \ : "cc"); \ } while (0) #endif Loading crypto/bn/bn_asm.c +10 −6 Original line number Diff line number Diff line Loading @@ -431,6 +431,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ /* * Keep in mind that carrying into high part of multiplication result * can not overflow, because it cannot be all-ones. */ #ifdef BN_LLONG #define mul_add_c(a,b,c0,c1,c2) \ t=(BN_ULLONG)a*b; \ Loading Loading @@ -471,10 +475,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define mul_add_c2(a,b,c0,c1,c2) { \ BN_ULONG ta=(a),tb=(b),t0; \ BN_UMULT_LOHI(t0,t1,ta,tb); \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ c0 += t0; t1 += (c0<t0)?1:0; \ c1 += t1; c2 += (c1<t1)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ Loading @@ -501,10 +505,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) BN_ULONG ta=(a),tb=(b),t0; \ t1 = BN_UMULT_HIGH(ta,tb); \ t0 = ta * tb; \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ c0 += t0; t1 += (c0<t0)?1:0; \ c1 += t1; c2 += (c1<t1)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ Loading crypto/bn/bntest.c +78 −24 Original line number Diff line number Diff line Loading @@ -676,44 +676,98 @@ int test_mul(BIO *bp) int test_sqr(BIO *bp, BN_CTX *ctx) { BIGNUM a,c,d,e; int i; BIGNUM *a,*c,*d,*e; int i, ret = 0; BN_init(&a); BN_init(&c); BN_init(&d); BN_init(&e); a = BN_new(); c = BN_new(); d = BN_new(); e = BN_new(); if (a == NULL || c == NULL || d == NULL || e == NULL) { goto err; } for (i=0; i<num0; i++) { BN_bntest_rand(&a,40+i*10,0,0); a.neg=rand_neg(); BN_sqr(&c,&a,ctx); BN_bntest_rand(a,40+i*10,0,0); a->neg=rand_neg(); BN_sqr(c,a,ctx); if (bp != NULL) { if (!results) { BN_print(bp,&a); BN_print(bp,a); BIO_puts(bp," * "); BN_print(bp,&a); BN_print(bp,a); BIO_puts(bp," - "); } BN_print(bp,&c); BN_print(bp,c); BIO_puts(bp,"\n"); } BN_div(&d,&e,&c,&a,ctx); BN_sub(&d,&d,&a); if(!BN_is_zero(&d) || !BN_is_zero(&e)) BN_div(d,e,c,a,ctx); BN_sub(d,d,a); if(!BN_is_zero(d) || !BN_is_zero(e)) { fprintf(stderr,"Square test failed!\n"); return 0; goto err; } } BN_free(&a); BN_free(&c); BN_free(&d); BN_free(&e); return(1); /* Regression test for a BN_sqr overflow bug. */ BN_hex2bn(&a, "80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000"); BN_sqr(c, a, ctx); if (bp != NULL) { if (!results) { BN_print(bp,a); BIO_puts(bp," * "); BN_print(bp,a); BIO_puts(bp," - "); } BN_print(bp,c); BIO_puts(bp,"\n"); } BN_mul(d, a, a, ctx); if (BN_cmp(c, d)) { fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " "different results!\n"); goto err; } /* Regression test for a BN_sqr overflow bug. */ BN_hex2bn(&a, "80000000000000000000000080000001FFFFFFFE000000000000000000000000"); BN_sqr(c, a, ctx); if (bp != NULL) { if (!results) { BN_print(bp,a); BIO_puts(bp," * "); BN_print(bp,a); BIO_puts(bp," - "); } BN_print(bp,c); BIO_puts(bp,"\n"); } BN_mul(d, a, a, ctx); if (BN_cmp(c, d)) { fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " "different results!\n"); goto err; } ret = 1; err: if (a != NULL) BN_free(a); if (c != NULL) BN_free(c); if (d != NULL) BN_free(d); if (e != NULL) BN_free(e); return ret; } int test_mont(BIO *bp, BN_CTX *ctx) Loading Loading
crypto/bn/asm/mips3.s +257 −257 Original line number Diff line number Diff line Loading @@ -1584,17 +1584,17 @@ LEAF(bn_sqr_comba8) dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 Loading @@ -1609,63 +1609,63 @@ LEAF(bn_sqr_comba8) dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 sd c_1,24(a0) dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ mflo t_1 mfhi t_2 Loading @@ -1680,93 +1680,93 @@ LEAF(bn_sqr_comba8) dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 sd c_3,40(a0) dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 Loading @@ -1781,108 +1781,108 @@ LEAF(bn_sqr_comba8) dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 sd c_2,56(a0) dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ mflo t_1 mfhi t_2 Loading @@ -1897,78 +1897,78 @@ LEAF(bn_sqr_comba8) dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 sd c_1,72(a0) dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_1,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu AT,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ mflo t_1 mfhi t_2 Loading @@ -1983,48 +1983,48 @@ LEAF(bn_sqr_comba8) dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_2,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu AT,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 sd c_3,88(a0) dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ mflo t_1 mfhi t_2 Loading @@ -2039,17 +2039,17 @@ LEAF(bn_sqr_comba8) dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 sd c_2,104(a0) dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ Loading @@ -2070,9 +2070,9 @@ LEAF(bn_sqr_comba4) .set reorder ld a_0,0(a1) ld a_1,8(a1) dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ ld a_2,16(a1) ld a_3,24(a1) dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ mflo c_1 mfhi c_2 sd c_1,0(a0) Loading @@ -2093,17 +2093,17 @@ LEAF(bn_sqr_comba4) dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 Loading @@ -2118,48 +2118,48 @@ LEAF(bn_sqr_comba4) dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 slt c_3,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu c_3,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 slt AT,t_2,zero daddu c_3,AT dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_1,t_1 daddu AT,t_2 sltu t_1,c_1,t_1 daddu c_2,AT daddu t_2,t_1 sltu AT,c_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sltu t_2,c_2,t_2 daddu c_3,t_2 sd c_1,24(a0) dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ mflo t_1 mfhi t_2 slt c_1,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_2,t_1 daddu AT,t_2 sltu t_1,c_2,t_1 daddu c_3,AT daddu t_2,t_1 sltu c_1,c_3,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sltu t_2,c_3,t_2 daddu c_1,t_2 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ mflo t_1 mfhi t_2 Loading @@ -2174,17 +2174,17 @@ LEAF(bn_sqr_comba4) dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 slt c_2,t_2,zero dsll t_2,1 slt a2,t_1,zero daddu t_2,a2 dsll t_1,1 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_3,t_1 daddu AT,t_2 sltu t_1,c_3,t_1 daddu c_1,AT daddu t_2,t_1 sltu c_2,c_1,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sltu t_2,c_1,t_2 daddu c_2,t_2 sd c_3,40(a0) dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ Loading
crypto/bn/asm/x86_64-gcc.c +15 −19 Original line number Diff line number Diff line Loading @@ -269,6 +269,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ /* * Keep in mind that carrying into high part of multiplication result * can not overflow, because it cannot be all-ones. */ #if 0 /* original macros are kept for reference purposes */ #define mul_add_c(a,b,c0,c1,c2) { \ Loading @@ -283,10 +287,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) BN_ULONG ta=(a),tb=(b),t0; \ t1 = BN_UMULT_HIGH(ta,tb); \ t0 = ta * tb; \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ c0 += t0; t1 += (c0<t0)?1:0; \ c1 += t1; c2 += (c1<t1)?1:0; \ } #else #define mul_add_c(a,b,c0,c1,c2) do { \ Loading Loading @@ -324,21 +328,13 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) : "=a"(t1),"=d"(t2) \ : "a"(a),"m"(b) \ : "cc"); \ asm ("addq %0,%0; adcq %2,%1" \ : "+d"(t2),"+r"(c2) \ : "g"(0) \ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ : "+r"(c0),"+r"(c1),"+r"(c2) \ : "r"(t1),"r"(t2),"g"(0) \ : "cc"); \ asm ("addq %0,%0; adcq %2,%1" \ : "+a"(t1),"+d"(t2) \ : "g"(0) \ : "cc"); \ asm ("addq %2,%0; adcq %3,%1" \ : "+r"(c0),"+d"(t2) \ : "a"(t1),"g"(0) \ : "cc"); \ asm ("addq %2,%0; adcq %3,%1" \ : "+r"(c1),"+r"(c2) \ : "d"(t2),"g"(0) \ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ : "+r"(c0),"+r"(c1),"+r"(c2) \ : "r"(t1),"r"(t2),"g"(0) \ : "cc"); \ } while (0) #endif Loading
crypto/bn/bn_asm.c +10 −6 Original line number Diff line number Diff line Loading @@ -431,6 +431,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ /* * Keep in mind that carrying into high part of multiplication result * can not overflow, because it cannot be all-ones. */ #ifdef BN_LLONG #define mul_add_c(a,b,c0,c1,c2) \ t=(BN_ULLONG)a*b; \ Loading Loading @@ -471,10 +475,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define mul_add_c2(a,b,c0,c1,c2) { \ BN_ULONG ta=(a),tb=(b),t0; \ BN_UMULT_LOHI(t0,t1,ta,tb); \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ c0 += t0; t1 += (c0<t0)?1:0; \ c1 += t1; c2 += (c1<t1)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ Loading @@ -501,10 +505,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) BN_ULONG ta=(a),tb=(b),t0; \ t1 = BN_UMULT_HIGH(ta,tb); \ t0 = ta * tb; \ t2 = t1+t1; c2 += (t2<t1)?1:0; \ t1 = t0+t0; t2 += (t1<t0)?1:0; \ c0 += t1; t2 += (c0<t1)?1:0; \ c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ c0 += t0; t1 += (c0<t0)?1:0; \ c1 += t1; c2 += (c1<t1)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ Loading
crypto/bn/bntest.c +78 −24 Original line number Diff line number Diff line Loading @@ -676,44 +676,98 @@ int test_mul(BIO *bp) int test_sqr(BIO *bp, BN_CTX *ctx) { BIGNUM a,c,d,e; int i; BIGNUM *a,*c,*d,*e; int i, ret = 0; BN_init(&a); BN_init(&c); BN_init(&d); BN_init(&e); a = BN_new(); c = BN_new(); d = BN_new(); e = BN_new(); if (a == NULL || c == NULL || d == NULL || e == NULL) { goto err; } for (i=0; i<num0; i++) { BN_bntest_rand(&a,40+i*10,0,0); a.neg=rand_neg(); BN_sqr(&c,&a,ctx); BN_bntest_rand(a,40+i*10,0,0); a->neg=rand_neg(); BN_sqr(c,a,ctx); if (bp != NULL) { if (!results) { BN_print(bp,&a); BN_print(bp,a); BIO_puts(bp," * "); BN_print(bp,&a); BN_print(bp,a); BIO_puts(bp," - "); } BN_print(bp,&c); BN_print(bp,c); BIO_puts(bp,"\n"); } BN_div(&d,&e,&c,&a,ctx); BN_sub(&d,&d,&a); if(!BN_is_zero(&d) || !BN_is_zero(&e)) BN_div(d,e,c,a,ctx); BN_sub(d,d,a); if(!BN_is_zero(d) || !BN_is_zero(e)) { fprintf(stderr,"Square test failed!\n"); return 0; goto err; } } BN_free(&a); BN_free(&c); BN_free(&d); BN_free(&e); return(1); /* Regression test for a BN_sqr overflow bug. */ BN_hex2bn(&a, "80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000"); BN_sqr(c, a, ctx); if (bp != NULL) { if (!results) { BN_print(bp,a); BIO_puts(bp," * "); BN_print(bp,a); BIO_puts(bp," - "); } BN_print(bp,c); BIO_puts(bp,"\n"); } BN_mul(d, a, a, ctx); if (BN_cmp(c, d)) { fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " "different results!\n"); goto err; } /* Regression test for a BN_sqr overflow bug. */ BN_hex2bn(&a, "80000000000000000000000080000001FFFFFFFE000000000000000000000000"); BN_sqr(c, a, ctx); if (bp != NULL) { if (!results) { BN_print(bp,a); BIO_puts(bp," * "); BN_print(bp,a); BIO_puts(bp," - "); } BN_print(bp,c); BIO_puts(bp,"\n"); } BN_mul(d, a, a, ctx); if (BN_cmp(c, d)) { fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " "different results!\n"); goto err; } ret = 1; err: if (a != NULL) BN_free(a); if (c != NULL) BN_free(c); if (d != NULL) BN_free(d); if (e != NULL) BN_free(e); return ret; } int test_mont(BIO *bp, BN_CTX *ctx) Loading