Commit f918504f authored by Matt Caswell's avatar Matt Caswell
Browse files

Remove the curve448 specific constant time implementation



Instead we should use the standard OpenSSL constant time routines.

Reviewed-by: default avatarBernd Edlinger <bernd.edlinger@hotmail.de>
(Merged from https://github.com/openssl/openssl/pull/5105)
parent e0fa6324
Loading
Loading
Loading
Loading
+3 −5
Original line number Diff line number Diff line
@@ -10,16 +10,14 @@
 * Originally written by Mike Hamburg
 */

#include "internal/constant_time_locl.h"

#ifndef __ARCH_ARCH_32_ARCH_INTRINSICS_H__
# define __ARCH_ARCH_32_ARCH_INTRINSICS_H__

# define ARCH_WORD_BITS 32

static ossl_inline uint32_t word_is_zero(uint32_t a)
{
    /* let's hope the compiler isn't clever enough to optimize this. */
    return (((uint64_t)a) - 1) >> 32;
}
#define word_is_zero(a)     constant_time_is_zero_32(a)

static ossl_inline uint64_t widemul(uint32_t a, uint32_t b)
{
+0 −337
Original line number Diff line number Diff line
/*
 * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
 * Copyright 2014 Cryptography Research, Inc.
 *
 * Licensed under the OpenSSL license (the "License").  You may not use
 * this file except in compliance with the License.  You can obtain a copy
 * in the file LICENSE in the source distribution or at
 * https://www.openssl.org/source/license.html
 *
 * Originally written by Mike Hamburg
 */

#ifndef __CONSTANT_TIME_H__
# define __CONSTANT_TIME_H__ 1

# include "word.h"
# include <string.h>

/*
 * Constant-time operations on hopefully-compile-time-sized memory
 * regions.  Needed for flexibility / demagication: not all fields
 * have sizes which are multiples of the vector width, necessitating
 * a change from the Ed448 versions.
 *
 * These routines would be much simpler to define at the byte level,
 * but if not vectorized they would be a significant fraction of the
 * runtime.  Eg on NEON-less ARM, constant_time_lookup is like 15% of
 * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
 *
 * If the compiler could do a good job of autovectorizing the code,
 * we could just leave it with the byte definition.  But that's unlikely
 * on most deployed compilers, especially if you consider that pcmpeq[size]
 * is much faster than moving a scalar to the vector unit (which is what
 * a naive autovectorizer will do with constant_time_lookup on Intel).
 *
 * Instead, we're putting our trust in the loop unroller and unswitcher.
 */

# if defined(__GNUC__) || defined(__clang__)
/*
 * Unaligned big (vector?) register.
 */
typedef struct {
    big_register_t unaligned;
} __attribute((packed)) unaligned_br_t;

/*
 * Unaligned word register, for architectures where that matters.
 */
typedef struct {
    word_t unaligned;
} __attribute((packed)) unaligned_word_t;

#  define HAS_UNALIGNED_STRUCTS
#  define RESTRICT __restrict__
#else
#  define RESTRICT
# endif

/*
 * Constant-time conditional swap.
 *
 * If doswap, then swap elem_bytes between *a and *b.
 *
 * *a and *b must not alias.  Also, they must be at least as aligned
 * as their sizes, if the CPU cares about that sort of thing.
 */
static ossl_inline void constant_time_cond_swap(void *RESTRICT a_,
                                                void *RESTRICT b_,
                                                word_t elem_bytes,
                                                mask_t doswap)
{
    word_t k;
    unsigned char *a = (unsigned char *)a_;
    unsigned char *b = (unsigned char *)b_;
    big_register_t br_mask = br_set_to_mask(doswap);
# ifndef HAS_UNALIGNED_STRUCTS
    unsigned char doswapc = (unsigned char)(doswap & 0xFF);
# endif

    for (k = 0; k <= elem_bytes - sizeof(big_register_t);
         k += sizeof(big_register_t)) {
        if (elem_bytes % sizeof(big_register_t)) {
            /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
            big_register_t xor = ((unaligned_br_t *) (&a[k]))->unaligned
                                 ^ ((unaligned_br_t *) (&b[k]))->unaligned;

            xor &= br_mask;
            ((unaligned_br_t *)(&a[k]))->unaligned ^= xor;
            ((unaligned_br_t *)(&b[k]))->unaligned ^= xor;
# else
            size_t i;

            for (i = 0; i < sizeof(big_register_t); i++) {
                unsigned char xor = a[k + i] ^ b[k + i];

                xor &= doswapc;
                a[k + i] ^= xor;
                b[k + i] ^= xor;
            }
# endif
        } else {
            /* aligned */
            big_register_t xor = *((big_register_t *) (&a[k]))
                                 ^ *((big_register_t *) (&b[k]));
            xor &= br_mask;
            *((big_register_t *)(&a[k])) ^= xor;
            *((big_register_t *)(&b[k])) ^= xor;
        }
    }

    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
        for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
            if (elem_bytes % sizeof(word_t)) {
                /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
                word_t xor = ((unaligned_word_t *)(&a[k]))->unaligned
                             ^ ((unaligned_word_t *)(&b[k]))->unaligned;

                xor &= doswap;
                ((unaligned_word_t *)(&a[k]))->unaligned ^= xor;
                ((unaligned_word_t *)(&b[k]))->unaligned ^= xor;
# else
                size_t i;

                for (i = 0; i < sizeof(word_t); i++) {
                    unsigned char xor = a[k + i] ^ b[k + i];

                    xor &= doswapc;
                    a[k + i] ^= xor;
                    b[k + i] ^= xor;
                }
# endif
            } else {
                /* aligned */
                word_t xor = *((word_t *) (&a[k])) ^ *((word_t *) (&b[k]));
                xor &= doswap;
                *((word_t *)(&a[k])) ^= xor;
                *((word_t *)(&b[k])) ^= xor;
            }
        }
    }

    if (elem_bytes % sizeof(word_t)) {
        for (; k < elem_bytes; k += 1) {
            unsigned char xor = a[k] ^ b[k];

            xor &= doswap;
            a[k] ^= xor;
            b[k] ^= xor;
        }
    }
}

/*
 * Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
 *
 * The table must be at least as aligned as elem_bytes.  The output must be word aligned,
 * and if the input size is vector aligned it must also be vector aligned.
 *
 * The table and output must not alias.
 */
static ossl_inline void constant_time_lookup(void *RESTRICT out_,
                                             const void *table_,
                                             word_t elem_bytes,
                                             word_t n_table,
                                             word_t idx)
{
    big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);

    /* Can't do pointer arithmetic on void * */
    unsigned char *out = (unsigned char *)out_;
    const unsigned char *table = (const unsigned char *)table_;
    word_t j, k;
# ifndef HAS_UNALIGNED_STRUCTS
    unsigned char maskc;
# endif

    memset(out, 0, elem_bytes);
    for (j = 0; j < n_table; j++, big_i -= big_one) {
        big_register_t br_mask = br_is_zero(big_i);
        word_t mask;

# ifndef HAS_UNALIGNED_STRUCTS
        maskc = (unsigned char)br_mask;
# endif

        for (k = 0; k <= elem_bytes - sizeof(big_register_t);
             k += sizeof(big_register_t)) {
            if (elem_bytes % sizeof(big_register_t)) {
                /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
                ((unaligned_br_t *)(out + k))->unaligned |=
                        br_mask
                        & ((const unaligned_br_t *)
                           (&table[k + j * elem_bytes]))->unaligned;
# else
                size_t i;

                for (i = 0; i < sizeof(big_register_t); i++)
                    out[k + i] |= maskc
                                  & ((unsigned char *) table)
                                    [k + (j * elem_bytes) + i];
# endif
            } else {
                /* aligned */
                *(big_register_t *)(out + k) |=
                        br_mask
                        & *(const big_register_t *)(&table[k + j * elem_bytes]);
            }
        }

        mask = word_is_zero(idx ^ j);
# ifndef HAS_UNALIGNED_STRUCTS
        maskc = (unsigned char)mask;
# endif
        if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
            for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
                if (elem_bytes % sizeof(word_t)) {
                    /* input unaligned, output aligned */
# ifdef HAS_UNALIGNED_STRUCTS
                    *(word_t *)(out + k) |=
                            mask
                            & ((const unaligned_word_t *)
                               (&table[k + j * elem_bytes]))->unaligned;
# else
                    size_t i;

                    for (i = 0; i < sizeof(word_t); i++)
                        out[k + i] |= maskc
                                      & ((unsigned char *)table)
                                         [k + (j * elem_bytes) + i];
# endif
                } else {
                    /* aligned */
                    *(word_t *)(out + k) |=
                            mask
                            & *(const word_t *)(&table[k + j * elem_bytes]);
                }
            }
        }

        if (elem_bytes % sizeof(word_t)) {
            for (; k < elem_bytes; k += 1) {
                out[k] |= mask & table[k + j * elem_bytes];
            }
        }
    }
}

/*
 * Constant-time a = mask ? bTrue : bFalse.
 *
 * The input and output must be at least as aligned as alignment_bytes
 * or their size, whichever is smaller.
 *
 * Note that the output is not __restrict__, but if it overlaps either
 * input, it must be equal and not partially overlap.
 */
static ossl_inline void constant_time_select_c448(void *a_,
                                                  const void *bFalse_,
                                                  const void *bTrue_,
                                                  word_t elem_bytes,
                                                  mask_t mask,
                                                  size_t alignment_bytes)
{
    unsigned char *a = (unsigned char *)a_;
    const unsigned char *bTrue = (const unsigned char *)bTrue_;
    const unsigned char *bFalse = (const unsigned char *)bFalse_;
    word_t k;
    big_register_t br_mask = br_set_to_mask(mask);
# ifndef HAS_UNALIGNED_STRUCTS
    unsigned char maskc = (unsigned char)mask;
# endif

    alignment_bytes |= elem_bytes;

    for (k = 0; k <= elem_bytes - sizeof(big_register_t);
         k += sizeof(big_register_t)) {
        if (alignment_bytes % sizeof(big_register_t)) {
            /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
            ((unaligned_br_t *)(&a[k]))->unaligned =
                    (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
                    | (~br_mask
                       & ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
# else
                    size_t i;

                    for (i = 0; i < sizeof(big_register_t); i++)
                        a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i])
                                   | (~maskc & ((unsigned char *)bFalse)[k + i]);
# endif
        } else {
            /* aligned */
            *(big_register_t *) (a + k) =
                    (br_mask & *(const big_register_t *)(&bTrue[k]))
                    | (~br_mask & *(const big_register_t *)(&bFalse[k]));
        }
    }

    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
        for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
            if (alignment_bytes % sizeof(word_t)) {
                /* unaligned */
# ifdef HAS_UNALIGNED_STRUCTS
                ((unaligned_word_t *) (&a[k]))->unaligned =
                    (mask & ((const unaligned_word_t *)(&bTrue[k]))->unaligned)
                    | (~mask &
                       ((const unaligned_word_t *)(&bFalse[k]))->unaligned);
# else
                size_t i;

                for (i = 0; i < sizeof(word_t); i++)
                    a[k + i] = (maskc & ((unsigned char *)bTrue)[k + i])
                               | (~maskc & ((unsigned char *)bFalse)[k + i]);
# endif
            } else {
                /* aligned */
                *(word_t *) (a + k) = (mask & *(const word_t *)(&bTrue[k]))
                    | (~mask & *(const word_t *)(&bFalse[k]));
            }
        }
    }

    if (elem_bytes % sizeof(word_t)) {
        for (; k < elem_bytes; k += 1) {
            a[k] = (mask & bTrue[k]) | (~mask & bFalse[k]);
        }
    }
}

#undef RESTRICT
#undef HAS_UNALIGNED_STRUCTS

#endif                          /* __CONSTANT_TIME_H__ */
+27 −3
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@
#ifndef __GF_H__
# define __GF_H__

# include "constant_time.h"
# include "internal/constant_time_locl.h"
# include <string.h>
# include <assert.h>
# include "word.h"
@@ -128,7 +128,20 @@ static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
/* Constant time, x = is_z ? z : y */
static ossl_inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z)
{
    constant_time_select_c448(x, y, z, sizeof(gf), is_z, 0);
    size_t i;

    for (i = 0; i < NLIMBS; i++) {
#if ARCH_WORD_BITS == 32
        x[0].limb[i] = constant_time_select_32((uint32_t)is_z,
                                               (uint32_t)(z[0].limb[i]),
                                               (uint32_t)(y[0].limb[i]));
#else
        /* Must be 64 bit */
        x[0].limb[i] = constant_time_select_64((uint64_t)is_z,
                                               (uint64_t)(z[0].limb[i]),
                                               (uint64_t)(y[0].limb[i]));
#endif
    }
}

/* Constant time, if (neg) x=-x; */
@@ -142,7 +155,18 @@ static ossl_inline void gf_cond_neg(gf x, mask_t neg)
/* Constant time, if (swap) (x,y) = (y,x); */
static ossl_inline void gf_cond_swap(gf x, gf_s * RESTRICT y, mask_t swap)
{
    constant_time_cond_swap(x, y, sizeof(gf_s), swap);
    size_t i;

    for (i = 0; i < NLIMBS; i++) {
#if ARCH_WORD_BITS == 32
        constant_time_cond_swap_32((uint32_t)swap, (uint32_t *)&(x[0].limb[i]),
                                   (uint32_t *)&(y->limb[i]));
#else
        /* Must be 64 bit */
        constant_time_cond_swap_64((uint64_t)swap, (uint64_t *)&(x[0].limb[i]),
                                   (uint64_t *)&(y->limb[i]));
#endif
    }
}

#endif                          /* __GF_H__ */
+0 −1
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@
#include <openssl/crypto.h>

#include "word.h"
#include "constant_time.h"
#include "point_448.h"

static const c448_word_t MONTGOMERY_FACTOR = (c448_word_t) 0x3bd440fae918bc5;
+0 −52
Original line number Diff line number Diff line
@@ -92,49 +92,21 @@ typedef word_t vecmask_t __attribute__ ((vector_size(32)));

# if defined(__AVX2__)
#  define VECTOR_ALIGNED __attribute__((aligned(32)))
typedef uint32x8_t big_register_t;
typedef uint64x4_t uint64xn_t;
typedef uint32x8_t uint32xn_t;

static ossl_inline big_register_t br_set_to_mask(mask_t x)
{
    uint32_t y = (uint32_t)x;
    big_register_t ret = { y, y, y, y, y, y, y, y };
    return ret;
}
# elif defined(__SSE2__)
#  define VECTOR_ALIGNED __attribute__((aligned(16)))
typedef uint32x4_t big_register_t;
typedef uint64x2_t uint64xn_t;
typedef uint32x4_t uint32xn_t;

static ossl_inline big_register_t br_set_to_mask(mask_t x)
{
    uint32_t y = x;
    big_register_t ret = { y, y, y, y };
    return ret;
}
# elif defined(__ARM_NEON__)
#  define VECTOR_ALIGNED __attribute__((aligned(16)))
typedef uint32x4_t big_register_t;
typedef uint64x2_t uint64xn_t;
typedef uint32x4_t uint32xn_t;

static ossl_inline big_register_t br_set_to_mask(mask_t x)
{
    return vdupq_n_u32(x);
}
# elif !defined(_MSC_VER) \
       && (defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \
           || defined(__aarch64__))
#  define VECTOR_ALIGNED __attribute__((aligned(8)))
typedef uint64_t big_register_t, uint64xn_t;

typedef uint32_t uint32xn_t;
static ossl_inline big_register_t br_set_to_mask(mask_t x)
{
    return (big_register_t) x;
}
# else
#  ifdef __GNUC__
#   define VECTOR_ALIGNED __attribute__((aligned(4)))
@@ -147,32 +119,8 @@ static ossl_inline big_register_t br_set_to_mask(mask_t x)
#  endif
typedef uint64_t uint64xn_t;
typedef uint32_t uint32xn_t;
typedef uint32_t big_register_t;

static ossl_inline big_register_t br_set_to_mask(mask_t x)
{
    return (big_register_t) x;
}
# endif

# if defined(__AVX2__)
static ossl_inline big_register_t br_is_zero(big_register_t x)
{
    return (big_register_t) (x == br_set_to_mask(0));
}
# elif defined(__SSE2__)
static ossl_inline big_register_t br_is_zero(big_register_t x)
{
    return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128());
}
# elif defined(__ARM_NEON__)
static ossl_inline big_register_t br_is_zero(big_register_t x)
{
    return vceqq_u32(x, x ^ x);
}
# else
#  define br_is_zero word_is_zero
# endif

/* PERF: vectorize vs unroll */
# ifdef __clang__