Commit a091e212 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

evp/e_chacha20_poly1305.c: improve performance for short TLS records.



Inputs not longer than 64 bytes are processed ~10% faster, longer
lengths not divisble by 64, e.g. 255, up to ~20%. Unfortunately it's
impossible to measure with apps/speed.c, -aead benchmarks TLS-like
call sequence, but not exact. It took specially crafted code path...

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6597)
parent 249330de
Loading
Loading
Loading
Loading
+148 −10
Original line number Diff line number Diff line
@@ -150,6 +150,7 @@ typedef struct {
    EVP_CHACHA_KEY key;
    unsigned int nonce[12/4];
    unsigned char tag[POLY1305_BLOCK_SIZE];
    unsigned char tls_aad[POLY1305_BLOCK_SIZE];
    struct { uint64_t aad, text; } len;
    int aad, mac_inited, tag_len, nonce_len;
    size_t tls_payload_length;
@@ -179,7 +180,8 @@ static int chacha20_poly1305_init_key(EVP_CIPHER_CTX *ctx,

        /* pad on the left */
        if (actx->nonce_len <= CHACHA_CTR_SIZE)
            memcpy(temp + CHACHA_CTR_SIZE - actx->nonce_len, iv, actx->nonce_len);
            memcpy(temp + CHACHA_CTR_SIZE - actx->nonce_len, iv,
                   actx->nonce_len);

        chacha_init_key(ctx, inkey, temp, enc);

@@ -193,23 +195,158 @@ static int chacha20_poly1305_init_key(EVP_CIPHER_CTX *ctx,
    return 1;
}

#  if !defined(OPENSSL_SMALL_FOOTPRINT)
static const unsigned char zero[2 * CHACHA_BLK_SIZE] = { 0 };

static int chacha20_poly1305_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                                        const unsigned char *in, size_t len)
{
    EVP_CHACHA_AEAD_CTX *actx = aead_data(ctx);
    size_t i, tail, tohash_len, plen = actx->tls_payload_length;
    unsigned char *buf, *tohash, *ctr, storage[2 * CHACHA_BLK_SIZE + 32];

    if (len != plen + POLY1305_BLOCK_SIZE)
        return -1;

    buf = storage + ((0 - (size_t)storage) & 15);   /* align */
    ctr = buf + CHACHA_BLK_SIZE;
    tohash = buf + CHACHA_BLK_SIZE - POLY1305_BLOCK_SIZE;

    if (plen <= CHACHA_BLK_SIZE) {
        actx->key.counter[0] = 0;
        ChaCha20_ctr32(buf, zero, 2 * CHACHA_BLK_SIZE, actx->key.key.d,
                       actx->key.counter);
        Poly1305_Init(POLY1305_ctx(actx), buf);
        actx->key.partial_len = 0;
        memcpy(tohash, actx->tls_aad, POLY1305_BLOCK_SIZE);
        tohash_len = POLY1305_BLOCK_SIZE;
        actx->len.aad = EVP_AEAD_TLS1_AAD_LEN;
        actx->len.text = plen;

        if (ctx->encrypt) {
            for (i = 0; i < plen; i++) {
                out[i] = ctr[i] ^= in[i];
            }
        } else {
            for (i = 0; i < plen; i++) {
                unsigned char c = in[i];
                out[i] = ctr[i] ^ c;
                ctr[i] = c;
            }
        }

        in += i;
        out += i;

        tail = (0 - i) & (POLY1305_BLOCK_SIZE - 1);
        memset(ctr + i, 0, tail);
        ctr += i + tail;
        tohash_len += i + tail;
    } else {
        actx->key.counter[0] = 0;
        ChaCha20_ctr32(buf, zero, CHACHA_BLK_SIZE, actx->key.key.d,
                       actx->key.counter);
        Poly1305_Init(POLY1305_ctx(actx), buf);
        actx->key.counter[0] = 1;
        actx->key.partial_len = 0;
        Poly1305_Update(POLY1305_ctx(actx), actx->tls_aad, POLY1305_BLOCK_SIZE);
        tohash = ctr;
        tohash_len = 0;
        actx->len.aad = EVP_AEAD_TLS1_AAD_LEN;
        actx->len.text = plen;

        if (ctx->encrypt) {
            ChaCha20_ctr32(out, in, plen, actx->key.key.d, actx->key.counter);
            Poly1305_Update(POLY1305_ctx(actx), out, plen);
        } else {
            Poly1305_Update(POLY1305_ctx(actx), in, plen);
            ChaCha20_ctr32(out, in, plen, actx->key.key.d, actx->key.counter);
        }

        in += plen;
        out += plen;
        tail = (0 - plen) & (POLY1305_BLOCK_SIZE - 1);
        Poly1305_Update(POLY1305_ctx(actx), zero, tail);
    }

    {
        const union {
            long one;
            char little;
        } is_endian = { 1 };

        if (is_endian.little) {
            memcpy(ctr, (unsigned char *)&actx->len, POLY1305_BLOCK_SIZE);
        } else {
            ctr[0]  = (unsigned char)(actx->len.aad);
            ctr[1]  = (unsigned char)(actx->len.aad>>8);
            ctr[2]  = (unsigned char)(actx->len.aad>>16);
            ctr[3]  = (unsigned char)(actx->len.aad>>24);
            ctr[4]  = (unsigned char)(actx->len.aad>>32);
            ctr[5]  = (unsigned char)(actx->len.aad>>40);
            ctr[6]  = (unsigned char)(actx->len.aad>>48);
            ctr[7]  = (unsigned char)(actx->len.aad>>56);

            ctr[8]  = (unsigned char)(actx->len.text);
            ctr[9]  = (unsigned char)(actx->len.text>>8);
            ctr[10] = (unsigned char)(actx->len.text>>16);
            ctr[11] = (unsigned char)(actx->len.text>>24);
            ctr[12] = (unsigned char)(actx->len.text>>32);
            ctr[13] = (unsigned char)(actx->len.text>>40);
            ctr[14] = (unsigned char)(actx->len.text>>48);
            ctr[15] = (unsigned char)(actx->len.text>>56);
        }
        tohash_len += POLY1305_BLOCK_SIZE;
    }

    Poly1305_Update(POLY1305_ctx(actx), tohash, tohash_len);
    OPENSSL_cleanse(buf, 2 * CHACHA_BLK_SIZE);
    Poly1305_Final(POLY1305_ctx(actx), ctx->encrypt ? actx->tag
                                                    : tohash);

    actx->tls_payload_length = NO_TLS_PAYLOAD_LENGTH;

    if (ctx->encrypt) {
        memcpy(out, actx->tag, POLY1305_BLOCK_SIZE);
    } else {
        if (CRYPTO_memcmp(tohash, in, POLY1305_BLOCK_SIZE)) {
            memset(out - (len - POLY1305_BLOCK_SIZE), 0,
                   len - POLY1305_BLOCK_SIZE);
            return -1;
        }
    }

    return len;
}
#  else
static const unsigned char zero[CHACHA_BLK_SIZE] = { 0 };
#  endif

static int chacha20_poly1305_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                                    const unsigned char *in, size_t len)
{
    EVP_CHACHA_AEAD_CTX *actx = aead_data(ctx);
    size_t rem, plen = actx->tls_payload_length;
    static const unsigned char zero[POLY1305_BLOCK_SIZE] = { 0 };

    if (!actx->mac_inited) {
#  if !defined(OPENSSL_SMALL_FOOTPRINT)
        if (plen != NO_TLS_PAYLOAD_LENGTH && out != NULL)
            return chacha20_poly1305_tls_cipher(ctx, out, in, len);
#  endif
        actx->key.counter[0] = 0;
        memset(actx->key.buf, 0, sizeof(actx->key.buf));
        ChaCha20_ctr32(actx->key.buf, actx->key.buf, CHACHA_BLK_SIZE,
        ChaCha20_ctr32(actx->key.buf, zero, CHACHA_BLK_SIZE,
                       actx->key.key.d, actx->key.counter);
        Poly1305_Init(POLY1305_ctx(actx), actx->key.buf);
        actx->key.counter[0] = 1;
        actx->key.partial_len = 0;
        actx->len.aad = actx->len.text = 0;
        actx->mac_inited = 1;
        if (plen != NO_TLS_PAYLOAD_LENGTH) {
            Poly1305_Update(POLY1305_ctx(actx), actx->tls_aad,
                            EVP_AEAD_TLS1_AAD_LEN);
            actx->len.aad = EVP_AEAD_TLS1_AAD_LEN;
            actx->aad = 1;
        }
    }

    if (in) {                                   /* aad or text */
@@ -341,6 +478,7 @@ static int chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg,
        actx->tag_len = 0;
        actx->nonce_len = 12;
        actx->tls_payload_length = NO_TLS_PAYLOAD_LENGTH;
        memset(actx->tls_aad, 0, POLY1305_BLOCK_SIZE);
        return 1;

    case EVP_CTRL_COPY:
@@ -393,18 +531,18 @@ static int chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg,
            return 0;
        {
            unsigned int len;
            unsigned char *aad = ptr, temp[POLY1305_BLOCK_SIZE];
            unsigned char *aad = ptr;

            memcpy(actx->tls_aad, ptr, EVP_AEAD_TLS1_AAD_LEN);
            len = aad[EVP_AEAD_TLS1_AAD_LEN - 2] << 8 |
                  aad[EVP_AEAD_TLS1_AAD_LEN - 1];
            aad = actx->tls_aad;
            if (!ctx->encrypt) {
                if (len < POLY1305_BLOCK_SIZE)
                    return 0;
                len -= POLY1305_BLOCK_SIZE;     /* discount attached tag */
                memcpy(temp, aad, EVP_AEAD_TLS1_AAD_LEN - 2);
                aad = temp;
                temp[EVP_AEAD_TLS1_AAD_LEN - 2] = (unsigned char)(len >> 8);
                temp[EVP_AEAD_TLS1_AAD_LEN - 1] = (unsigned char)len;
                aad[EVP_AEAD_TLS1_AAD_LEN - 2] = (unsigned char)(len >> 8);
                aad[EVP_AEAD_TLS1_AAD_LEN - 1] = (unsigned char)len;
            }
            actx->tls_payload_length = len;

@@ -415,7 +553,7 @@ static int chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg,
            actx->key.counter[2] = actx->nonce[1] ^ CHACHA_U8TOU32(aad);
            actx->key.counter[3] = actx->nonce[2] ^ CHACHA_U8TOU32(aad+4);
            actx->mac_inited = 0;
            chacha20_poly1305_cipher(ctx, NULL, aad, EVP_AEAD_TLS1_AAD_LEN);

            return POLY1305_BLOCK_SIZE;         /* tag length */
        }