Commit 425dde5d authored by Dr. Matthias St. Pierre's avatar Dr. Matthias St. Pierre
Browse files

curve25519.c: improve formula alignment

parent 3a17b9a4
Loading
Loading
Loading
Loading
+513 −523
Original line number Diff line number Diff line
@@ -759,6 +759,7 @@ static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
 */
typedef int32_t fe[10];

static const int64_t kBottom21Bits =  0x1fffffLL;
static const int64_t kBottom25Bits = 0x1ffffffLL;
static const int64_t kBottom26Bits = 0x3ffffffLL;
static const int64_t kTop39Bits = 0xfffffffffe000000LL;
@@ -768,7 +769,7 @@ static uint64_t load_3(const uint8_t *in)
{
    uint64_t result;

    result = (uint64_t)in[0];
    result  = ((uint64_t)in[0]);
    result |= ((uint64_t)in[1]) << 8;
    result |= ((uint64_t)in[2]) << 16;
    return result;
@@ -778,7 +779,7 @@ static uint64_t load_4(const uint8_t *in)
{
    uint64_t result;

    result = (uint64_t)in[0];
    result  = ((uint64_t)in[0]);
    result |= ((uint64_t)in[1]) << 8;
    result |= ((uint64_t)in[2]) << 16;
    result |= ((uint64_t)in[3]) << 24;
@@ -797,7 +798,7 @@ static void fe_frombytes(fe h, const uint8_t *s)
    int64_t h6 =  load_3(s + 20) << 7;
    int64_t h7 =  load_3(s + 23) << 5;
    int64_t h8 =  load_3(s + 26) << 4;
    int64_t h9 = (load_3(s + 29) & 8388607) << 2;
    int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
    int64_t carry0;
    int64_t carry1;
    int64_t carry2;
@@ -4612,29 +4613,29 @@ static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
*/
static void x25519_sc_reduce(uint8_t *s)
{
    int64_t s0 = 2097151 & load_3(s);
    int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
    int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
    int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
    int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
    int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
    int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
    int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
    int64_t s8 = 2097151 & load_3(s + 21);
    int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
    int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
    int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
    int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
    int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
    int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
    int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
    int64_t s16 = 2097151 & load_3(s + 42);
    int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
    int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
    int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
    int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
    int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
    int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
    int64_t s0  = kBottom21Bits &  load_3(s);
    int64_t s1  = kBottom21Bits & (load_4(s +  2) >> 5);
    int64_t s2  = kBottom21Bits & (load_3(s +  5) >> 2);
    int64_t s3  = kBottom21Bits & (load_4(s +  7) >> 7);
    int64_t s4  = kBottom21Bits & (load_4(s + 10) >> 4);
    int64_t s5  = kBottom21Bits & (load_3(s + 13) >> 1);
    int64_t s6  = kBottom21Bits & (load_4(s + 15) >> 6);
    int64_t s7  = kBottom21Bits & (load_3(s + 18) >> 3);
    int64_t s8  = kBottom21Bits &  load_3(s + 21);
    int64_t s9  = kBottom21Bits & (load_4(s + 23) >> 5);
    int64_t s10 = kBottom21Bits & (load_3(s + 26) >> 2);
    int64_t s11 = kBottom21Bits & (load_4(s + 28) >> 7);
    int64_t s12 = kBottom21Bits & (load_4(s + 31) >> 4);
    int64_t s13 = kBottom21Bits & (load_3(s + 34) >> 1);
    int64_t s14 = kBottom21Bits & (load_4(s + 36) >> 6);
    int64_t s15 = kBottom21Bits & (load_3(s + 39) >> 3);
    int64_t s16 = kBottom21Bits &  load_3(s + 42);
    int64_t s17 = kBottom21Bits & (load_4(s + 44) >> 5);
    int64_t s18 = kBottom21Bits & (load_3(s + 47) >> 2);
    int64_t s19 = kBottom21Bits & (load_4(s + 49) >> 7);
    int64_t s20 = kBottom21Bits & (load_4(s + 52) >> 4);
    int64_t s21 = kBottom21Bits & (load_3(s + 55) >> 1);
    int64_t s22 = kBottom21Bits & (load_4(s + 57) >> 6);
    int64_t s23 =                 (load_4(s + 60) >> 3);
    int64_t carry0;
    int64_t carry1;
@@ -4957,41 +4958,41 @@ static void x25519_sc_reduce(uint8_t *s)
static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
                      const uint8_t *c)
{
    int64_t a0 = 2097151 & load_3(a);
    int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
    int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
    int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
    int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
    int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
    int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
    int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
    int64_t a8 = 2097151 & load_3(a + 21);
    int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
    int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
    int64_t a0  = kBottom21Bits &  load_3(a);
    int64_t a1  = kBottom21Bits & (load_4(a +  2) >> 5);
    int64_t a2  = kBottom21Bits & (load_3(a +  5) >> 2);
    int64_t a3  = kBottom21Bits & (load_4(a +  7) >> 7);
    int64_t a4  = kBottom21Bits & (load_4(a + 10) >> 4);
    int64_t a5  = kBottom21Bits & (load_3(a + 13) >> 1);
    int64_t a6  = kBottom21Bits & (load_4(a + 15) >> 6);
    int64_t a7  = kBottom21Bits & (load_3(a + 18) >> 3);
    int64_t a8  = kBottom21Bits &  load_3(a + 21);
    int64_t a9  = kBottom21Bits & (load_4(a + 23) >> 5);
    int64_t a10 = kBottom21Bits & (load_3(a + 26) >> 2);
    int64_t a11 =                 (load_4(a + 28) >> 7);
    int64_t b0 = 2097151 & load_3(b);
    int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
    int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
    int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
    int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
    int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
    int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
    int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
    int64_t b8 = 2097151 & load_3(b + 21);
    int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
    int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
    int64_t b0  = kBottom21Bits &  load_3(b);
    int64_t b1  = kBottom21Bits & (load_4(b +  2) >> 5);
    int64_t b2  = kBottom21Bits & (load_3(b +  5) >> 2);
    int64_t b3  = kBottom21Bits & (load_4(b +  7) >> 7);
    int64_t b4  = kBottom21Bits & (load_4(b + 10) >> 4);
    int64_t b5  = kBottom21Bits & (load_3(b + 13) >> 1);
    int64_t b6  = kBottom21Bits & (load_4(b + 15) >> 6);
    int64_t b7  = kBottom21Bits & (load_3(b + 18) >> 3);
    int64_t b8  = kBottom21Bits &  load_3(b + 21);
    int64_t b9  = kBottom21Bits & (load_4(b + 23) >> 5);
    int64_t b10 = kBottom21Bits & (load_3(b + 26) >> 2);
    int64_t b11 =                 (load_4(b + 28) >> 7);
    int64_t c0 = 2097151 & load_3(c);
    int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
    int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
    int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
    int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
    int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
    int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
    int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
    int64_t c8 = 2097151 & load_3(c + 21);
    int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
    int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
    int64_t c0  = kBottom21Bits &  load_3(c);
    int64_t c1  = kBottom21Bits & (load_4(c +  2) >> 5);
    int64_t c2  = kBottom21Bits & (load_3(c +  5) >> 2);
    int64_t c3  = kBottom21Bits & (load_4(c +  7) >> 7);
    int64_t c4  = kBottom21Bits & (load_4(c + 10) >> 4);
    int64_t c5  = kBottom21Bits & (load_3(c + 13) >> 1);
    int64_t c6  = kBottom21Bits & (load_4(c + 15) >> 6);
    int64_t c7  = kBottom21Bits & (load_3(c + 18) >> 3);
    int64_t c8  = kBottom21Bits &  load_3(c + 21);
    int64_t c9  = kBottom21Bits & (load_4(c + 23) >> 5);
    int64_t c10 = kBottom21Bits & (load_3(c + 26) >> 2);
    int64_t c11 =                 (load_4(c + 28) >> 7);
    int64_t s0;
    int64_t s1;
@@ -5047,28 +5048,17 @@ static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
    s3  = c3   +   a0 * b3   +   a1 * b2   +   a2 * b1  +   a3 * b0;
    s4  = c4   +   a0 * b4   +   a1 * b3   +   a2 * b2  +   a3 * b1  +   a4 * b0;
    s5  = c5   +   a0 * b5   +   a1 * b4   +   a2 * b3  +   a3 * b2  +   a4 * b1  +   a5 * b0;
    s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 +
        a6 * b0;
    s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
        a6 * b1 + a7 * b0;
    s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
        a6 * b2 + a7 * b1 + a8 * b0;
    s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
        a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
    s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
        a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
    s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
        a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
    s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 +
        a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
    s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 +
        a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
    s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 +
        a9 * b5 + a10 * b4 + a11 * b3;
    s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 +
        a10 * b5 + a11 * b4;
    s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 +
        a11 * b5;
    s6  = c6   +   a0 * b6   +   a1 * b5   +   a2 * b4  +   a3 * b3  +   a4 * b2  +   a5 * b1 +   a6 * b0;
    s7  = c7   +   a0 * b7   +   a1 * b6   +   a2 * b5  +   a3 * b4  +   a4 * b3  +   a5 * b2 +   a6 * b1   +   a7 * b0;
    s8  = c8   +   a0 * b8   +   a1 * b7   +   a2 * b6  +   a3 * b5  +   a4 * b4  +   a5 * b3 +   a6 * b2   +   a7 * b1   +   a8 * b0;
    s9  = c9   +   a0 * b9   +   a1 * b8   +   a2 * b7  +   a3 * b6  +   a4 * b5  +   a5 * b4 +   a6 * b3   +   a7 * b2   +   a8 * b1  +   a9 * b0;
    s10 = c10  +   a0 * b10  +   a1 * b9   +   a2 * b8  +   a3 * b7  +   a4 * b6  +   a5 * b5 +   a6 * b4   +   a7 * b3   +   a8 * b2  +   a9 * b1  +  a10 * b0;
    s11 = c11  +   a0 * b11  +   a1 * b10  +   a2 * b9  +   a3 * b8  +   a4 * b7  +   a5 * b6 +   a6 * b5   +   a7 * b4   +   a8 * b3  +   a9 * b2  +  a10 * b1  +  a11 * b0;
    s12 =          a1 * b11  +   a2 * b10  +   a3 * b9  +   a4 * b8  +   a5 * b7  +   a6 * b6 +   a7 * b5   +   a8 * b4   +   a9 * b3  +  a10 * b2  +  a11 * b1;
    s13 =          a2 * b11  +   a3 * b10  +   a4 * b9  +   a5 * b8  +   a6 * b7  +   a7 * b6 +   a8 * b5   +   a9 * b4   +  a10 * b3  +  a11 * b2;
    s14 =          a3 * b11  +   a4 * b10  +   a5 * b9  +   a6 * b8  +   a7 * b7  +   a8 * b6 +   a9 * b5   +  a10 * b4   +  a11 * b3;
    s15 =          a4 * b11  +   a5 * b10  +   a6 * b9  +   a7 * b8  +   a8 * b7  +   a9 * b6 +  a10 * b5   +  a11 * b4;
    s16 =          a5 * b11  +   a6 * b10  +   a7 * b9  +   a8 * b8  +   a9 * b7  +  a10 * b6 +  a11 * b5;
    s17 =          a6 * b11  +   a7 * b10  +   a8 * b9  +   a9 * b8  +  a10 * b7  +  a11 * b6;
    s18 =          a7 * b11  +   a8 * b10  +   a9 * b9  +  a10 * b8  +  a11 * b7;
    s19 =          a8 * b11  +   a9 * b10  +  a10 * b9  +  a11 * b8;