Commit 52a9587c authored by Matt Caswell's avatar Matt Caswell
Browse files

Fix build errors for Curve448 code on Windows (VC-WIN32 and VC-WIN64A)

parent 575d5afc
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -15,15 +15,13 @@

# define ARCH_WORD_BITS 32

static __inline__ __attribute((always_inline, unused))
uint32_t word_is_zero(uint32_t a)
static ossl_inline uint32_t word_is_zero(uint32_t a)
{
    /* let's hope the compiler isn't clever enough to optimize this. */
    return (((uint64_t)a) - 1) >> 32;
}

static __inline__ __attribute((always_inline, unused))
uint64_t widemul(uint32_t a, uint32_t b)
static ossl_inline uint64_t widemul(uint32_t a, uint32_t b)
{
    return ((uint64_t)a) * b;
}
+7 −7
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@
# define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0)
#endif

void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
void gf_mul(gf_s * RESTRICT cs, const gf as, const gf bs)
{
    const uint32_t *a = as->limb, *b = bs->limb;
    uint32_t *c = cs->limb;
@@ -71,7 +71,7 @@ void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
    c[1] += ((uint32_t)(accum1));
}

void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
void gf_mulw_unsigned(gf_s * RESTRICT cs, const gf as, uint32_t b)
{
    const uint32_t *a = as->limb;
    uint32_t *c = cs->limb;
@@ -88,15 +88,15 @@ void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
             });

    accum0 += accum8 + c[8];
    c[8] = accum0 & mask;
    c[9] += accum0 >> 28;
    c[8] = ((uint32_t)accum0) & mask;
    c[9] += (uint32_t)(accum0 >> 28);

    accum8 += c[0];
    c[0] = accum8 & mask;
    c[1] += accum8 >> 28;
    c[0] = ((uint32_t)accum8) & mask;
    c[1] += (uint32_t)(accum8 >> 28);
}

void gf_sqr(gf_s * __restrict__ cs, const gf as)
void gf_sqr(gf_s * RESTRICT cs, const gf as)
{
    gf_mul(cs, as, as);         /* Performs better with a dedicated square */
}
+90 −6
Original line number Diff line number Diff line
@@ -36,19 +36,26 @@
 * Instead, we're putting our trust in the loop unroller and unswitcher.
 */

# if defined(__GNUC__) || defined(__clang__)
/*
 * Unaligned big (vector?) register.
 */
typedef struct {
    big_register_t unaligned;
} __attribute__ ((packed)) unaligned_br_t;
} __attribute((packed)) unaligned_br_t;

/*
 * Unaligned word register, for architectures where that matters.
 */
typedef struct {
    word_t unaligned;
} __attribute__ ((packed)) unaligned_word_t;
} __attribute((packed)) unaligned_word_t;

#  define HAS_UNALIGNED_STRUCTS
#  define RESTRICT __restrict__
#else
#  define RESTRICT
# endif

/*
 * Constant-time conditional swap.
@@ -58,26 +65,41 @@ typedef struct {
 * *a and *b must not alias.  Also, they must be at least as aligned
 * as their sizes, if the CPU cares about that sort of thing.
 */
static ossl_inline void constant_time_cond_swap(void *__restrict__ a_,
                                                void *__restrict__ b_,
static ossl_inline void constant_time_cond_swap(void *RESTRICT a_,
                                                void *RESTRICT b_,
                                                word_t elem_bytes,
                                                mask_t doswap)
{
    word_t k;
    unsigned char *a = (unsigned char *)a_;
    unsigned char *b = (unsigned char *)b_;

    big_register_t br_mask = br_set_to_mask(doswap);
# ifndef HAS_UNALIGNED_STRUCTS
    unsigned char doswapc = (unsigned char)(doswap & 0xFF);
# endif

    for (k = 0; k <= elem_bytes - sizeof(big_register_t);
         k += sizeof(big_register_t)) {
        if (elem_bytes % sizeof(big_register_t)) {
            /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
            big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned
                                 ^ ((unaligned_br_t *) (&b[k]))->unaligned;

            xor &= br_mask;
            ((unaligned_br_t *)(&a[k]))->unaligned ^= xor;
            ((unaligned_br_t *)(&b[k]))->unaligned ^= xor;
# else
            size_t i;

            for (i = 0; i < sizeof(big_register_t); i++) {
                unsigned char xor = a[k + i] ^ b[k + i];

                xor &= doswapc;
                a[k + i] ^= xor;
                b[k + i] ^= xor;
            }
# endif
        } else {
            /* aligned */
            big_register_t xor = *((big_register_t *) (&a[k]))
@@ -92,12 +114,24 @@ static ossl_inline void constant_time_cond_swap(void *__restrict__ a_,
        for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
            if (elem_bytes % sizeof(word_t)) {
                /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
                word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned
                             ^ ((unaligned_word_t *)(&b[k]))->unaligned;

                xor &= doswap;
                ((unaligned_word_t *)(&a[k]))->unaligned ^= xor;
                ((unaligned_word_t *)(&b[k]))->unaligned ^= xor;
# else
                size_t i;

                for (i = 0; i < sizeof(word_t); i++) {
                    unsigned char xor = a[k + i] ^ b[k + i];

                    xor &= doswapc;
                    a[k + i] ^= xor;
                    b[k + i] ^= xor;
                }
# endif
            } else {
                /* aligned */
                word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k]));
@@ -127,7 +161,7 @@ static ossl_inline void constant_time_cond_swap(void *__restrict__ a_,
 *
 * The table and output must not alias.
 */
static ossl_inline void constant_time_lookup(void *__restrict__ out_,
static ossl_inline void constant_time_lookup(void *RESTRICT out_,
                                             const void *table_,
                                             word_t elem_bytes,
                                             word_t n_table,
@@ -139,20 +173,36 @@ static ossl_inline void constant_time_lookup(void *__restrict__ out_,
    unsigned char *out = (unsigned char *)out_;
    const unsigned char *table = (const unsigned char *)table_;
    word_t j, k;
# ifndef HAS_UNALIGNED_STRUCTS
    unsigned char maskc;
# endif

    memset(out, 0, elem_bytes);
    for (j = 0; j < n_table; j++, big_i -= big_one) {
        big_register_t br_mask = br_is_zero(big_i);
        word_t mask;

# ifndef HAS_UNALIGNED_STRUCTS
        maskc = (unsigned char)br_mask;
# endif

        for (k = 0; k <= elem_bytes - sizeof(big_register_t);
             k += sizeof(big_register_t)) {
            if (elem_bytes % sizeof(big_register_t)) {
                /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
                ((unaligned_br_t *)(out + k))->unaligned |=
                        br_mask
                        & ((const unaligned_br_t *)
                           (&table[k + j * elem_bytes]))->unaligned;
# else
                size_t i;

                for (i = 0; i < sizeof(big_register_t); i++)
                    out[k + i] |= maskc
                                  & ((unsigned char *) table)
                                    [k + (j * elem_bytes) + i];
# endif
            } else {
                /* aligned */
                *(big_register_t *)(out + k) |=
@@ -162,14 +212,26 @@ static ossl_inline void constant_time_lookup(void *__restrict__ out_,
        }

        mask = word_is_zero(idx ^ j);
# ifndef HAS_UNALIGNED_STRUCTS
        maskc = (unsigned char)mask;
# endif
        if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
            for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
                if (elem_bytes % sizeof(word_t)) {
                    /* input unaligned, output aligned */
# ifdef HAS_UNALIGNED_STRUCTS
                    *(word_t *)(out + k) |=
                            mask
                            & ((const unaligned_word_t *)
                               (&table[k + j * elem_bytes]))->unaligned;
# else
                    size_t i;

                    for (i = 0; i < sizeof(word_t); i++)
                        out[k + i] |= maskc
                                      & ((unsigned char *)table)
                                         [k + (j * elem_bytes) + i];
# endif
                } else {
                    /* aligned */
                    *(word_t *)(out + k) |=
@@ -208,6 +270,9 @@ static ossl_inline void constant_time_select(void *a_,
    const unsigned char *bFalse = (const unsigned char *)bFalse_;
    word_t k;
    big_register_t br_mask = br_set_to_mask(mask);
# ifndef HAS_UNALIGNED_STRUCTS
    unsigned char maskc = (unsigned char)mask;
# endif

    alignment_bytes |= elem_bytes;

@@ -215,10 +280,18 @@ static ossl_inline void constant_time_select(void *a_,
         k += sizeof(big_register_t)) {
        if (alignment_bytes % sizeof(big_register_t)) {
            /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
            ((unaligned_br_t *)(&a[k]))->unaligned =
                    (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
                    | (~br_mask
                       & ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
# else
                    size_t i;

                    for (i = 0; i < sizeof(big_register_t); i++)
                        a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i])
                                   | (~maskc & ((unsigned char *)bFalse)[k + i]);
# endif
        } else {
            /* aligned */
            *(big_register_t *) (a + k) =
@@ -231,10 +304,18 @@ static ossl_inline void constant_time_select(void *a_,
        for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
            if (alignment_bytes % sizeof(word_t)) {
                /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
                ((unaligned_word_t *) (&a[k]))->unaligned =
                    (mask & ((const unaligned_word_t *)(&bTrue[k]))->unaligned)
                    | (~mask &
                       ((const unaligned_word_t *)(&bFalse[k]))->unaligned);
# else
                size_t i;

                for (i = 0; i < sizeof(word_t); i++)
                    a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i])
                               | (~maskc & ((unsigned char *)bFalse)[k + i]);
# endif
            } else {
                /* aligned */
                *(word_t *) (a + k) = (mask & *(const word_t *)(&bTrue[k]))
@@ -250,4 +331,7 @@ static ossl_inline void constant_time_select(void *a_,
    }
}

#undef RESTRICT
#undef HAS_UNALIGNED_STRUCTS

#endif                          /* __CONSTANT_TIME_H__ */
+43 −4
Original line number Diff line number Diff line
@@ -237,7 +237,7 @@ c448_bool_t curve448_point_valid(const curve448_point_t p)
    return mask_to_bool(out);
}

static ossl_inline void constant_time_lookup_niels(niels_s * __restrict__ ni,
static ossl_inline void constant_time_lookup_niels(niels_s * RESTRICT ni,
                                                   const niels_t * table,
                                                   int nelts, int idx)
{
@@ -425,7 +425,7 @@ c448_error_t x448_int(uint8_t out[X_PUBLIC_BYTES],
            sb = -1;

        k_t = (sb >> (t % 8)) & 1;
        k_t = -k_t;             /* set to all 0s or all 1s */
        k_t = 0 - k_t;             /* set to all 0s or all 1s */

        swap ^= k_t;
        gf_cond_swap(x2, x3, swap);
@@ -498,7 +498,7 @@ void x448_derive_public_key(uint8_t out[X_PUBLIC_BYTES],
    memcpy(scalar2, scalar, sizeof(scalar2));
    scalar2[0] &= -(uint8_t)COFACTOR;

    scalar2[X_PRIVATE_BYTES - 1] &= ~(-1u << ((X_PRIVATE_BITS + 7) % 8));
    scalar2[X_PRIVATE_BYTES - 1] &= ~((0u - 1u) << ((X_PRIVATE_BITS + 7) % 8));
    scalar2[X_PRIVATE_BYTES - 1] |= 1 << ((X_PRIVATE_BITS + 7) % 8);

    curve448_scalar_decode_long(the_scalar, scalar2, sizeof(scalar2));
@@ -517,6 +517,45 @@ struct smvt_control {
    int power, addend;
};

#if defined(__GNUC__) || defined(__clang__)
# define NUMTRAILINGZEROS	__builtin_ctz
#else
# define NUMTRAILINGZEROS	numtrailingzeros
static uint32_t numtrailingzeros(uint32_t i)
{
    unsigned int tmp;
    uint32_t num = 31;

    if (i == 0)
        return 32;

    tmp = i << 16;
    if (tmp != 0) {
        i = tmp;
        num -= 16;
    }
    tmp = i << 8;
    if (tmp != 0) {
        i = tmp;
        num -= 8;
    }
    tmp = i << 4;
    if (tmp != 0) {
        i = tmp;
        num -= 4;
    }
    tmp = i << 2;
    if (tmp != 0) {
        i = tmp;
        num -= 2;
    }
    if ((i << 1) != 0)
        num--;

    return num;
}
#endif

static int recode_wnaf(struct smvt_control *control,
                       /* [nbits/(table_bits + 1) + 3] */
                       const curve448_scalar_t scalar,
@@ -549,7 +588,7 @@ static int recode_wnaf(struct smvt_control *control,
        }

        while (current & 0xFFFF) {
            uint32_t pos = __builtin_ctz((uint32_t)current);
            uint32_t pos = NUMTRAILINGZEROS((uint32_t)current);
            uint32_t odd = (uint32_t)current >> pos;
            int32_t delta = odd & mask;

+1 −1
Original line number Diff line number Diff line
@@ -62,7 +62,7 @@ typedef int64_t c448_dsword_t;
# endif

/* C448_TRUE = -1 so that C448_TRUE & x = x */
static const c448_bool_t C448_TRUE = -(c448_bool_t) 1;
static const c448_bool_t C448_TRUE = 0 - (c448_bool_t)1;

/* C448_FALSE = 0 so that C448_FALSE & x = 0 */
static const c448_bool_t C448_FALSE = 0;
Loading