Commit 6a1ed7a3 authored by Dr. Matthias St. Pierre's avatar Dr. Matthias St. Pierre
Browse files

curve25519.c: reformat code to follow coding guidelines



Fixes #7698

Reviewed-by: default avatarMatt Caswell <matt@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/7750)

(cherry picked from commit 3a17b9a4)
parent db860ea3
Loading
Loading
Loading
Loading
+2344 −2228
Original line number Diff line number Diff line
@@ -744,18 +744,19 @@ static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],

/*
 * Reference base 2^25.5 implementation.
 */
/*
 *
 * This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
 * 20141124 (http://bench.cr.yp.to/supercop.html).
 *
 * The field functions are shared by Ed25519 and X25519 where possible.
 */

/* fe means field element. Here the field is \Z/(2^255-19). An element t,
/*
 * fe means field element. Here the field is \Z/(2^255-19). An element t,
 * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
 * t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
 * context.  */
 * context.
 */
typedef int32_t fe[10];

static const int64_t kBottom25Bits = 0x1ffffffLL;
@@ -763,16 +764,20 @@ static const int64_t kBottom26Bits = 0x3ffffffLL;
static const int64_t kTop39Bits = 0xfffffffffe000000LL;
static const int64_t kTop38Bits = 0xfffffffffc000000LL;

static uint64_t load_3(const uint8_t *in) {
static uint64_t load_3(const uint8_t *in)
{
    uint64_t result;

    result = (uint64_t)in[0];
    result |= ((uint64_t)in[1]) << 8;
    result |= ((uint64_t)in[2]) << 16;
    return result;
}

static uint64_t load_4(const uint8_t *in) {
static uint64_t load_4(const uint8_t *in)
{
    uint64_t result;

    result = (uint64_t)in[0];
    result |= ((uint64_t)in[1]) << 8;
    result |= ((uint64_t)in[2]) << 16;
@@ -780,7 +785,8 @@ static uint64_t load_4(const uint8_t *in) {
    return result;
}

static void fe_frombytes(fe h, const uint8_t *s) {
static void fe_frombytes(fe h, const uint8_t *s)
{
    /* Ignores top bit of h. */
    int64_t h0 = load_4(s);
    int64_t h1 = load_3(s + 4) << 6;
@@ -827,7 +833,8 @@ static void fe_frombytes(fe h, const uint8_t *s) {
    h[9] = (int32_t)h9;
}

/* Preconditions:
/*
 * Preconditions:
 *   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
 *
 * Write p=2^255-19; q=floor(h/p).
@@ -848,8 +855,10 @@ static void fe_frombytes(fe h, const uint8_t *s) {
 *   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
 *
 *   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
 *   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. */
static void fe_tobytes(uint8_t *s, const fe h) {
 *   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
 */
static void fe_tobytes(uint8_t *s, const fe h)
{
    int32_t h0 = h[0];
    int32_t h1 = h[1];
    int32_t h2 = h[2];
@@ -890,11 +899,12 @@ static void fe_tobytes(uint8_t *s, const fe h) {
                    h9 &= kBottom25Bits;
    /* h10 = carry9 */

  /* Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
    /*
     * Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
     * Have h0+...+2^230 h9 between 0 and 2^255-1;
     * evidently 2^255 h10-2^255 q = 0.
   * Goal: Output h0+...+2^230 h9.  */

     * Goal: Output h0+...+2^230 h9.
     */
    s[0] = (uint8_t)(h0 >> 0);
    s[1] = (uint8_t)(h0 >> 8);
    s[2] = (uint8_t)(h0 >> 16);
@@ -930,20 +940,27 @@ static void fe_tobytes(uint8_t *s, const fe h) {
}

/* h = f */
static void fe_copy(fe h, const fe f) {
static void fe_copy(fe h, const fe f)
{
    memmove(h, f, sizeof(int32_t) * 10);
}

/* h = 0 */
static void fe_0(fe h) { memset(h, 0, sizeof(int32_t) * 10); }
static void fe_0(fe h)
{
    memset(h, 0, sizeof(int32_t) * 10);
}

/* h = 1 */
static void fe_1(fe h) {
static void fe_1(fe h)
{
    memset(h, 0, sizeof(int32_t) * 10);
    h[0] = 1;
}

/* h = f + g
/*
 * h = f + g
 *
 * Can overlap h with f or g.
 *
 * Preconditions:
@@ -951,15 +968,20 @@ static void fe_1(fe h) {
 *    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
 *
 * Postconditions:
 *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
static void fe_add(fe h, const fe f, const fe g) {
 *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
 */
static void fe_add(fe h, const fe f, const fe g)
{
    unsigned i;

    for (i = 0; i < 10; i++) {
        h[i] = f[i] + g[i];
    }
}

/* h = f - g
/*
 * h = f - g
 *
 * Can overlap h with f or g.
 *
 * Preconditions:
@@ -967,15 +989,20 @@ static void fe_add(fe h, const fe f, const fe g) {
 *    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
 *
 * Postconditions:
 *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
static void fe_sub(fe h, const fe f, const fe g) {
 *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
 */
static void fe_sub(fe h, const fe f, const fe g)
{
    unsigned i;

    for (i = 0; i < 10; i++) {
        h[i] = f[i] - g[i];
    }
}

/* h = f * g
/*
 * h = f * g
 *
 * Can overlap h with f or g.
 *
 * Preconditions:
@@ -1001,8 +1028,10 @@ static void fe_sub(fe h, const fe f, const fe g) {
 * 10 of them are 2-way parallelizable and vectorizable.
 * Can get away with 11 carries, but then data flow is much deeper.
 *
 * With tighter constraints on inputs can squeeze carries into int32. */
static void fe_mul(fe h, const fe f, const fe g) {
 * With tighter constraints on inputs can squeeze carries into int32.
 */
static void fe_mul(fe h, const fe f, const fe g)
{
    int32_t f0 = f[0];
    int32_t f1 = f[1];
    int32_t f2 = f[2];
@@ -1218,7 +1247,9 @@ static void fe_mul(fe h, const fe f, const fe g) {
    h[9] = (int32_t)h9;
}

/* h = f * f
/*
 * h = f * f
 *
 * Can overlap h with f.
 *
 * Preconditions:
@@ -1227,8 +1258,10 @@ static void fe_mul(fe h, const fe f, const fe g) {
 * Postconditions:
 *    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
 *
 * See fe_mul.c for discussion of implementation strategy. */
static void fe_sq(fe h, const fe f) {
 * See fe_mul.c for discussion of implementation strategy.
 */
static void fe_sq(fe h, const fe f)
{
    int32_t f0 = f[0];
    int32_t f1 = f[1];
    int32_t f2 = f[2];
@@ -1359,7 +1392,8 @@ static void fe_sq(fe h, const fe f) {
    h[9] = (int32_t)h9;
}

static void fe_invert(fe out, const fe z) {
static void fe_invert(fe out, const fe z)
{
    fe t0;
    fe t1;
    fe t2;
@@ -1454,26 +1488,34 @@ static void fe_invert(fe out, const fe z) {
    fe_mul(out, t1, t0);
}

/* h = -f
/*
 * h = -f
 *
 * Preconditions:
 *    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
 *
 * Postconditions:
 *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
static void fe_neg(fe h, const fe f) {
 *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
 */
static void fe_neg(fe h, const fe f)
{
    unsigned i;

    for (i = 0; i < 10; i++) {
        h[i] = -f[i];
    }
}

/* Replace (f,g) with (g,g) if b == 1;
/*
 * Replace (f,g) with (g,g) if b == 1;
 * replace (f,g) with (f,g) if b == 0.
 *
 * Preconditions: b in {0,1}. */
static void fe_cmov(fe f, const fe g, unsigned b) {
 * Preconditions: b in {0,1}.
 */
static void fe_cmov(fe f, const fe g, unsigned b)
{
    size_t i;

    b = 0-b;
    for (i = 0; i < 10; i++) {
        int32_t x = f[i] ^ g[i];
@@ -1482,31 +1524,41 @@ static void fe_cmov(fe f, const fe g, unsigned b) {
    }
}

/* return 0 if f == 0
/*
 * return 0 if f == 0
 * return 1 if f != 0
 *
 * Preconditions:
 *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
static int fe_isnonzero(const fe f) {
 *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
 */
static int fe_isnonzero(const fe f)
{
    uint8_t s[32];
    static const uint8_t zero[32] = {0};

    fe_tobytes(s, f);

    return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;
}

/* return 1 if f is in {1,3,5,...,q-2}
/*
 * return 1 if f is in {1,3,5,...,q-2}
 * return 0 if f is in {0,2,4,...,q-1}
 *
 * Preconditions:
 *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
static int fe_isnegative(const fe f) {
 *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
 */
static int fe_isnegative(const fe f)
{
    uint8_t s[32];

    fe_tobytes(s, f);
    return s[0] & 1;
}

/* h = 2 * f * f
/*
 * h = 2 * f * f
 *
 * Can overlap h with f.
 *
 * Preconditions:
@@ -1515,8 +1567,10 @@ static int fe_isnegative(const fe f) {
 * Postconditions:
 *    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
 *
 * See fe_mul.c for discussion of implementation strategy. */
static void fe_sq2(fe h, const fe f) {
 * See fe_mul.c for discussion of implementation strategy.
 */
static void fe_sq2(fe h, const fe f)
{
    int32_t f0 = f[0];
    int32_t f1 = f[1];
    int32_t f2 = f[2];
@@ -1658,7 +1712,8 @@ static void fe_sq2(fe h, const fe f) {
    h[9] = (int32_t)h9;
}

static void fe_pow22523(fe out, const fe z) {
static void fe_pow22523(fe out, const fe z)
{
    fe t0;
    fe t1;
    fe t2;
@@ -1715,8 +1770,9 @@ static void fe_pow22523(fe out, const fe z) {
    fe_mul(out, t0, z);
}

/* ge means group element.

/*
 * ge means group element.
 *
 * Here the group is the set of pairs (x,y) of field elements (see fe.h)
 * satisfying -x^2 + y^2 = 1 + d x^2y^2
 * where d = -121665/121666.
@@ -1725,8 +1781,8 @@ static void fe_pow22523(fe out, const fe z) {
 *   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
 *   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
 *   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
 *   ge_precomp (Duif): (y+x,y-x,2dxy) */

 *   ge_precomp (Duif): (y+x,y-x,2dxy)
 */
typedef struct {
    fe X;
    fe Y;
@@ -1760,7 +1816,8 @@ typedef struct {
    fe T2d;
} ge_cached;

static void ge_tobytes(uint8_t *s, const ge_p2 *h) {
static void ge_tobytes(uint8_t *s, const ge_p2 *h)
{
    fe recip;
    fe x;
    fe y;
@@ -1772,7 +1829,8 @@ static void ge_tobytes(uint8_t *s, const ge_p2 *h) {
    s[31] ^= fe_isnegative(x) << 7;
}

static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) {
static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h)
{
    fe recip;
    fe x;
    fe y;
@@ -1784,13 +1842,18 @@ static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) {
    s[31] ^= fe_isnegative(x) << 7;
}

static const fe d = {-10913610, 13857413, -15372611, 6949391,   114729,
                     -8787816,  -6275908, -3247719,  -18696448, -12055116};
static const fe d = {
    -10913610, 13857413, -15372611, 6949391,   114729,
    -8787816,  -6275908, -3247719,  -18696448, -12055116
};

static const fe sqrtm1 = {-32595792, -7943725,  9377950,  3500415, 12389472,
                          -272473,   -25146209, -2005654, 326686,  11406482};
static const fe sqrtm1 = {
    -32595792, -7943725,  9377950,  3500415, 12389472,
    -272473,   -25146209, -2005654, 326686,  11406482
};

static int ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) {
static int ge_frombytes_vartime(ge_p3 *h, const uint8_t *s)
{
    fe u;
    fe v;
    fe v3;
@@ -1833,37 +1896,44 @@ static int ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) {
    return 0;
}

static void ge_p2_0(ge_p2 *h) {
static void ge_p2_0(ge_p2 *h)
{
    fe_0(h->X);
    fe_1(h->Y);
    fe_1(h->Z);
}

static void ge_p3_0(ge_p3 *h) {
static void ge_p3_0(ge_p3 *h)
{
    fe_0(h->X);
    fe_1(h->Y);
    fe_1(h->Z);
    fe_0(h->T);
}

static void ge_precomp_0(ge_precomp *h) {
static void ge_precomp_0(ge_precomp *h)
{
    fe_1(h->yplusx);
    fe_1(h->yminusx);
    fe_0(h->xy2d);
}

/* r = p */
static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p)
{
    fe_copy(r->X, p->X);
    fe_copy(r->Y, p->Y);
    fe_copy(r->Z, p->Z);
}

static const fe d2 = {-21827239, -5839606,  -30745221, 13898782, 229458,
                      15978800,  -12551817, -6495438,  29715968, 9444199};
static const fe d2 = {
    -21827239, -5839606,  -30745221, 13898782, 229458,
    15978800,  -12551817, -6495438,  29715968, 9444199
};

/* r = p */
static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p)
{
    fe_add(r->YplusX, p->Y, p->X);
    fe_sub(r->YminusX, p->Y, p->X);
    fe_copy(r->Z, p->Z);
@@ -1871,14 +1941,16 @@ static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
}

/* r = p */
static void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
static void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p)
{
    fe_mul(r->X, p->X, p->T);
    fe_mul(r->Y, p->Y, p->Z);
    fe_mul(r->Z, p->Z, p->T);
}

/* r = p */
static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p)
{
    fe_mul(r->X, p->X, p->T);
    fe_mul(r->Y, p->Y, p->Z);
    fe_mul(r->Z, p->Z, p->T);
@@ -1886,7 +1958,8 @@ static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
}

/* r = 2 * p */
static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p)
{
    fe t0;

    fe_sq(r->X, p->X);
@@ -1901,14 +1974,16 @@ static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
}

/* r = 2 * p */
static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p)
{
    ge_p2 q;
    ge_p3_to_p2(&q, p);
    ge_p2_dbl(r, &q);
}

/* r = p + q */
static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q)
{
    fe t0;

    fe_add(r->X, p->Y, p->X);
@@ -1924,7 +1999,8 @@ static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
}

/* r = p - q */
static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q)
{
    fe t0;

    fe_add(r->X, p->Y, p->X);
@@ -1940,7 +2016,8 @@ static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
}

/* r = p + q */
static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q)
{
    fe t0;

    fe_add(r->X, p->Y, p->X);
@@ -1957,7 +2034,8 @@ static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
}

/* r = p - q */
static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q)
{
    fe t0;

    fe_add(r->X, p->Y, p->X);
@@ -1973,7 +2051,8 @@ static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
    fe_add(r->T, t0, r->T);
}

static uint8_t equal(signed char b, signed char c) {
static uint8_t equal(signed char b, signed char c)
{
    uint8_t ub = b;
    uint8_t uc = c;
    uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */
@@ -1983,7 +2062,8 @@ static uint8_t equal(signed char b, signed char c) {
    return y;
}

static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b)
{
    fe_cmov(t->yplusx, u->yplusx, b);
    fe_cmov(t->yminusx, u->yminusx, b);
    fe_cmov(t->xy2d, u->xy2d, b);
@@ -4105,13 +4185,16 @@ static const ge_precomp k25519Precomp[32][8] = {
    },
};

static uint8_t negative(signed char b) {
static uint8_t negative(signed char b)
{
    uint32_t x = b;

    x >>= 31; /* 1: yes; 0: no */
    return x;
}

static void table_select(ge_precomp *t, int pos, signed char b) {
static void table_select(ge_precomp *t, int pos, signed char b)
{
    ge_precomp minust;
    uint8_t bnegative = negative(b);
    uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
@@ -4131,13 +4214,17 @@ static void table_select(ge_precomp *t, int pos, signed char b) {
    cmov(t, &minust, bnegative);
}

/* h = a * B
/*
 * h = a * B
 *
 * where a = a[0]+256*a[1]+...+256^31 a[31]
 * B is the Ed25519 base point (x,4/5) with x positive.
 *
 * Preconditions:
 *   a[31] <= 127 */
static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
 *   a[31] <= 127
 */
static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a)
{
    signed char e[64];
    signed char carry;
    ge_p1p1 r;
@@ -4188,12 +4275,16 @@ static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
}

#if !defined(BASE_2_51_IMPLEMENTED)
/* Replace (f,g) with (g,f) if b == 1;
/*
 * Replace (f,g) with (g,f) if b == 1;
 * replace (f,g) with (f,g) if b == 0.
 *
 * Preconditions: b in {0,1}. */
static void fe_cswap(fe f, fe g, unsigned int b) {
 * Preconditions: b in {0,1}.
 */
static void fe_cswap(fe f, fe g, unsigned int b)
{
    size_t i;

    b = 0-b;
    for (i = 0; i < 10; i++) {
        int32_t x = f[i] ^ g[i];
@@ -4203,15 +4294,19 @@ static void fe_cswap(fe f, fe g, unsigned int b) {
    }
}

/* h = f * 121666
/*
 * h = f * 121666
 *
 * Can overlap h with f.
 *
 * Preconditions:
 *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
 *
 * Postconditions:
 *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
static void fe_mul121666(fe h, fe f) {
 *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
 */
static void fe_mul121666(fe h, fe f)
{
    int32_t f0 = f[0];
    int32_t f1 = f[1];
    int32_t f2 = f[2];
@@ -4324,7 +4419,8 @@ static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
}
#endif

static void slide(signed char *r, const uint8_t *a) {
static void slide(signed char *r, const uint8_t *a)
{
    int i;
    int b;
    int k;
@@ -4425,12 +4521,16 @@ static const ge_precomp Bi[8] = {
    },
};

/* r = a * A + b * B
/*
 * r = a * A + b * B
 *
 * where a = a[0]+256*a[1]+...+256^31 a[31].
 * and b = b[0]+256*b[1]+...+256^31 b[31].
 * B is the Ed25519 base point (x,4/5) with x positive. */
 * B is the Ed25519 base point (x,4/5) with x positive.
 */
static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
                                         const ge_p3 *A, const uint8_t *b) {
                                         const ge_p3 *A, const uint8_t *b)
{
    signed char aslide[256];
    signed char bslide[256];
    ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
@@ -4498,17 +4598,20 @@ static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
    }
}

/* The set of scalars is \Z/l
 * where l = 2^252 + 27742317777372353535851937790883648493. */

/* Input:
/*
 * The set of scalars is \Z/l
 * where l = 2^252 + 27742317777372353535851937790883648493.
 *
 * Input:
 *   s[0]+256*s[1]+...+256^63*s[63] = s
 *
 * Output:
 *   s[0]+256*s[1]+...+256^31*s[31] = s mod l
 *   where l = 2^252 + 27742317777372353535851937790883648493.
 *   Overwrites s in place. */
static void x25519_sc_reduce(uint8_t *s) {
 *   Overwrites s in place.
*/
static void x25519_sc_reduce(uint8_t *s)
{
    int64_t s0 = 2097151 & load_3(s);
    int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
    int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
@@ -4841,16 +4944,19 @@ static void x25519_sc_reduce(uint8_t *s) {
    s[31] = (uint8_t)(s11 >> 17);
}

/* Input:
/*
 * Input:
 *   a[0]+256*a[1]+...+256^31*a[31] = a
 *   b[0]+256*b[1]+...+256^31*b[31] = b
 *   c[0]+256*c[1]+...+256^31*c[31] = c
 *
 * Output:
 *   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
 *   where l = 2^252 + 27742317777372353535851937790883648493. */
 *   where l = 2^252 + 27742317777372353535851937790883648493.
 */
static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
                      const uint8_t *c) {
                      const uint8_t *c)
{
    int64_t a0 = 2097151 & load_3(a);
    int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
    int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
@@ -4941,7 +5047,8 @@ static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
    s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
    s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
    s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
  s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
    s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 +
        a6 * b0;
    s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
        a6 * b1 + a7 * b0;
    s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
@@ -4952,15 +5059,16 @@ static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
        a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
    s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
        a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
  s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 +
        a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
  s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 +
        a9 * b4 + a10 * b3 + a11 * b2;
  s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 +
        a10 * b4 + a11 * b3;
  s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 +
        a11 * b4;
  s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
    s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 +
        a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
    s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 +
        a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
    s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 +
        a9 * b5 + a10 * b4 + a11 * b3;
    s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 +
        a10 * b5 + a11 * b4;
    s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 +
        a11 * b5;
    s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
    s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
    s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
@@ -5331,7 +5439,8 @@ static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
}

int ED25519_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len,
                 const uint8_t public_key[32], const uint8_t private_key[32]) {
                 const uint8_t public_key[32], const uint8_t private_key[32])
{
    uint8_t az[SHA512_DIGEST_LENGTH];
    uint8_t nonce[SHA512_DIGEST_LENGTH];
    ge_p3 R;
@@ -5374,7 +5483,8 @@ int ED25519_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len,
static const char allzeroes[15];

int ED25519_verify(const uint8_t *message, size_t message_len,
                   const uint8_t signature[64], const uint8_t public_key[32]) {
                   const uint8_t signature[64], const uint8_t public_key[32])
{
    int i;
    ge_p3 A;
    const uint8_t *r, *s;
@@ -5441,7 +5551,8 @@ int ED25519_verify(const uint8_t *message, size_t message_len,
}

void ED25519_public_from_private(uint8_t out_public_key[32],
                                 const uint8_t private_key[32]) {
                                 const uint8_t private_key[32])
{
    uint8_t az[SHA512_DIGEST_LENGTH];
    ge_p3 A;

@@ -5458,7 +5569,8 @@ void ED25519_public_from_private(uint8_t out_public_key[32],
}

int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32],
           const uint8_t peer_public_value[32]) {
           const uint8_t peer_public_value[32])
{
    static const uint8_t kZeros[32] = {0};
    x25519_scalar_mult(out_shared_key, private_key, peer_public_value);
    /* The all-zero output results when the input is a point of small order. */
@@ -5466,7 +5578,8 @@ int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32],
}

void X25519_public_from_private(uint8_t out_public_value[32],
                                const uint8_t private_key[32]) {
                                const uint8_t private_key[32])
{
    uint8_t e[32];
    ge_p3 A;
    fe zplusy, zminusy, zminusy_inv;
@@ -5478,8 +5591,11 @@ void X25519_public_from_private(uint8_t out_public_value[32],

    ge_scalarmult_base(&A, e);

  /* We only need the u-coordinate of the curve25519 point. The map is
   * u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y). */
    /*
     * We only need the u-coordinate of the curve25519 point.
     * The map is u=(y+1)/(1-y). Since y=Y/Z, this gives
     * u=(Z+Y)/(Z-Y).
     */
    fe_add(zplusy, A.Z, A.Y);
    fe_sub(zminusy, A.Z, A.Y);
    fe_invert(zminusy_inv, zminusy);