Commit 41cf2d25 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

evp/e_aes_cbc_hmac_sha[1|256].c: add multi-block implementations [from master].

parent e0d4272a
Loading
Loading
Loading
Loading
+361 −28
Original line number Diff line number Diff line
@@ -58,7 +58,8 @@
#include <openssl/objects.h>
#include <openssl/aes.h>
#include <openssl/sha.h>
#include "evp_locl.h"
#include <openssl/rand.h>
#include "modes_lcl.h"

#ifndef EVP_CIPH_FLAG_AEAD_CIPHER
#define EVP_CIPH_FLAG_AEAD_CIPHER	0x200000
@@ -70,6 +71,10 @@
#define EVP_CIPH_FLAG_DEFAULT_ASN1 0
#endif

#if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)
#define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
#endif

#define TLS1_1_VERSION 0x0302

typedef struct
@@ -90,11 +95,7 @@ typedef struct
	defined(_M_AMD64)	|| defined(_M_X64)	|| \
	defined(__INTEL__)	)

#if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC)
# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; })
#endif

extern unsigned int OPENSSL_ia32cap_P[2];
extern unsigned int OPENSSL_ia32cap_P[3];
#define AESNI_CAPABLE   (1<<(57-32))

int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
@@ -112,6 +113,10 @@ void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks,
		const AES_KEY *key, unsigned char iv[16],
		SHA_CTX *ctx,const void *in0);

void aesni256_cbc_sha1_dec (const void *inp, void *out, size_t blocks,
		const AES_KEY *key, unsigned char iv[16],
		SHA_CTX *ctx,const void *in0);

#define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data)

static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
@@ -136,6 +141,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
	}

#define	STITCHED_CALL
#undef	STITCHED_DECRYPT_CALL

#if !defined(STITCHED_CALL)
#define	aes_off 0
@@ -176,6 +182,198 @@ static void sha1_update(SHA_CTX *c,const void *data,size_t len)
#endif
#define SHA1_Update sha1_update

#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK

typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8]; } SHA1_MB_CTX;
typedef struct { const unsigned char *ptr; int blocks;  } HASH_DESC;

void sha1_multi_block(SHA1_MB_CTX *,const HASH_DESC *,int);

typedef struct { const unsigned char *inp; unsigned char *out;
		 int blocks; u64 iv[2]; } CIPH_DESC; 

void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int);

static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
	unsigned char *out, const unsigned char *inp, size_t inp_len,
	int n4x)	/* n4x is 1 or 2 */
{
	HASH_DESC	hash_d[8], edges[8];
	CIPH_DESC	ciph_d[8];
	unsigned char	storage[sizeof(SHA1_MB_CTX)+32];
	union {	u64	q[16];
		u32	d[32];
		u8	c[128];	} blocks[8];
	SHA1_MB_CTX	*ctx;
	unsigned int	frag, last, packlen, i, x4=4*n4x;
	size_t		ret = 0;
	u8		*IVs;
#if defined(BSWAP8)
	u64		seqnum;
#endif

	ctx = (SHA1_MB_CTX *)(storage+32-((size_t)storage%32));	/* align */

	frag = (unsigned int)inp_len>>(1+n4x);
	last = (unsigned int)inp_len+frag-(frag<<(1+n4x));
	if (last>frag && ((last+13+9)%64)<(x4-1)) {
		frag++;
		last -= x4-1;
	}

	hash_d[0].ptr = inp;
	for (i=1;i<x4;i++)	hash_d[i].ptr = hash_d[i-1].ptr+frag;

#if defined(BSWAP8)
	memcpy(blocks[0].c,key->md.data,8);
	seqnum = BSWAP8(blocks[0].q[0]);
#endif
	for (i=0;i<x4;i++) {
		unsigned int len = (i==(x4-1)?last:frag);

		ctx->A[i] = key->md.h0;
		ctx->B[i] = key->md.h1;
		ctx->C[i] = key->md.h2;
		ctx->D[i] = key->md.h3;
		ctx->E[i] = key->md.h4;

		/* fix seqnum */
#if defined(BSWAP8)
		blocks[i].q[0] = BSWAP8(seqnum+i);
#else
		blocks[i].c[7] += ((u8*)key->md.data)[7]+i;
		if (blocks[i].c[7] < i) {
			int j;

			for (j=6;j>=0;j--) {
				if (blocks[i].c[j]=((u8*)key->md.data)[j]+1) break;
			}
		}
#endif
		blocks[i].c[8] = ((u8*)key->md.data)[8];
		blocks[i].c[9] = ((u8*)key->md.data)[9];
		blocks[i].c[10] = ((u8*)key->md.data)[10];
		/* fix length */
		blocks[i].c[11] = (u8)(len>>8);
		blocks[i].c[12] = (u8)(len);

		memcpy(blocks[i].c+13,hash_d[i].ptr,64-13);
		hash_d[i].ptr += 64-13;
		hash_d[i].blocks = (len-(64-13))/64;

		edges[i].ptr = blocks[i].c;
		edges[i].blocks = 1;
	}

	/* hash 13-byte headers and first 64-13 bytes of inputs */
	sha1_multi_block(ctx,edges,n4x);
	/* hash bulk inputs */
	sha1_multi_block(ctx,hash_d,n4x);

	memset(blocks,0,sizeof(blocks));
	for (i=0;i<x4;i++) {
		unsigned int		len = (i==(x4-1)?last:frag),
					off = hash_d[i].blocks*64;
		const unsigned char    *ptr = hash_d[i].ptr+off;

		off = len-(64-13)-off;	/* remainder actually */
		memcpy(blocks[i].c,ptr,off);
		blocks[i].c[off]=0x80;
		len += 64+13;		/* 64 is HMAC header */
		len *= 8;		/* convert to bits */
		if (off<(64-8)) {
			blocks[i].d[15] = BSWAP4(len);
			edges[i].blocks = 1;			
		} else {
			blocks[i].d[31] = BSWAP4(len);
			edges[i].blocks = 2;
		}
		edges[i].ptr = blocks[i].c;
	}

	/* hash input tails and finalize */
	sha1_multi_block(ctx,edges,n4x);

	memset(blocks,0,sizeof(blocks));
	for (i=0;i<x4;i++) {
		blocks[i].d[0] = BSWAP4(ctx->A[i]);	ctx->A[i] = key->tail.h0;
		blocks[i].d[1] = BSWAP4(ctx->B[i]);	ctx->B[i] = key->tail.h1;
		blocks[i].d[2] = BSWAP4(ctx->C[i]);	ctx->C[i] = key->tail.h2;
		blocks[i].d[3] = BSWAP4(ctx->D[i]);	ctx->D[i] = key->tail.h3;
		blocks[i].d[4] = BSWAP4(ctx->E[i]);	ctx->E[i] = key->tail.h4;
		blocks[i].c[20] = 0x80;
		blocks[i].d[15] = BSWAP4((64+20)*8);
		edges[i].ptr = blocks[i].c;
		edges[i].blocks = 1;
	}

	/* finalize MACs */
	sha1_multi_block(ctx,edges,n4x);

	packlen = 5+16+((frag+20+16)&-16);

	out += (packlen<<(1+n4x))-packlen;
	inp += (frag<<(1+n4x))-frag;

	RAND_bytes((IVs=blocks[0].c),16*x4);	/* ask for IVs in bulk */

	for (i=x4-1;;i--) {
		unsigned int len = (i==(x4-1)?last:frag), pad, j;
		unsigned char *out0 = out;

		out += 5+16;		/* place for header and explicit IV */
		ciph_d[i].inp = out;
		ciph_d[i].out = out;

		memmove(out,inp,len);
		out += len;

		/* write MAC */
		((u32 *)out)[0] = BSWAP4(ctx->A[i]);
		((u32 *)out)[1] = BSWAP4(ctx->B[i]);
		((u32 *)out)[2] = BSWAP4(ctx->C[i]);
		((u32 *)out)[3] = BSWAP4(ctx->D[i]);
		((u32 *)out)[4] = BSWAP4(ctx->E[i]);
		out += 20;
		len += 20;

		/* pad */
		pad = 15-len%16;
		for (j=0;j<=pad;j++) *(out++) = pad;
		len += pad+1;

		ciph_d[i].blocks = len/16;
		len += 16;	/* account for explicit iv */

		/* arrange header */
		out0[0] = ((u8*)key->md.data)[8];
		out0[1] = ((u8*)key->md.data)[9];
		out0[2] = ((u8*)key->md.data)[10];
		out0[3] = (u8)(len>>8);
		out0[4] = (u8)(len);

		/* explicit iv */
		memcpy(ciph_d[i].iv, IVs, 16);
		memcpy(&out0[5],     IVs, 16);

		ret += len+5;

		if (i==0) break;

		out = out0-packlen;
		inp -= frag;
		IVs += 16;
	}

	aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);

	OPENSSL_cleanse(blocks,sizeof(blocks));
	OPENSSL_cleanse(ctx,sizeof(*ctx));

	return ret;
}
#endif

static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
		      const unsigned char *in, size_t len)
	{
@@ -249,28 +447,45 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
		/* arrange cache line alignment */
		pmac = (void *)(((size_t)mac.c+31)&((size_t)0-32));

		/* decrypt HMAC|padding at once */
		aesni_cbc_encrypt(in,out,len,
				&key->ks,ctx->iv,0);

		if (plen) {	/* "TLS" mode of operation */
		if (plen != NO_PAYLOAD_LENGTH) {	/* "TLS" mode of operation */
			size_t inp_len, mask, j, i;
			unsigned int res, maxpad, pad, bitlen;
			int ret = 1;
			union {	unsigned int  u[SHA_LBLOCK];
				unsigned char c[SHA_CBLOCK]; }
				*data = (void *)key->md.data;
#if defined(STITCHED_DECRYPT_CALL)
			unsigned char tail_iv[AES_BLOCK_SIZE];
			int stitch=0;
#endif

			if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3])
			    >= TLS1_1_VERSION)
				iv = AES_BLOCK_SIZE;

			if (len<(iv+SHA_DIGEST_LENGTH+1))
			    >= TLS1_1_VERSION) {
			    	if (len<(AES_BLOCK_SIZE+SHA_DIGEST_LENGTH+1))
					return 0;

				/* omit explicit iv */
			out += iv;
			len -= iv;
				memcpy(ctx->iv,in,AES_BLOCK_SIZE);
				in  += AES_BLOCK_SIZE;
				out += AES_BLOCK_SIZE;
				len -= AES_BLOCK_SIZE;
			}
			else if (len<(SHA_DIGEST_LENGTH+1))
				return 0;

#if defined(STITCHED_DECRYPT_CALL)
			if (len>=1024 && ctx->key_len==32) {
				/* decrypt last block */
				memcpy(tail_iv,in+len-2*AES_BLOCK_SIZE,AES_BLOCK_SIZE);
				aesni_cbc_encrypt(in+len-AES_BLOCK_SIZE,
						out+len-AES_BLOCK_SIZE,AES_BLOCK_SIZE,
						&key->ks,tail_iv,0);
				stitch=1;
			} else
#endif
			/* decrypt HMAC|padding at once */
			aesni_cbc_encrypt(in,out,len,
					&key->ks,ctx->iv,0);

			/* figure out payload length */
			pad = out[len-1];
@@ -290,6 +505,30 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
			key->md = key->head;
			SHA1_Update(&key->md,key->aux.tls_aad,plen);

#if defined(STITCHED_DECRYPT_CALL)
			if (stitch) {
				blocks = (len-(256+32+SHA_CBLOCK))/SHA_CBLOCK;
				aes_off = len-AES_BLOCK_SIZE-blocks*SHA_CBLOCK;
				sha_off = SHA_CBLOCK-plen;

				aesni_cbc_encrypt(in,out,aes_off,
					&key->ks,ctx->iv,0);

				SHA1_Update(&key->md,out,sha_off);
				aesni256_cbc_sha1_dec(in+aes_off,
					out+aes_off,blocks,&key->ks,ctx->iv,
					&key->md,out+sha_off);

				sha_off += blocks*=SHA_CBLOCK;
				out += sha_off;
				len -= sha_off;
				inp_len -= sha_off;

				key->md.Nl += (blocks<<3);	/* at most 18 bits */
				memcpy(ctx->iv,tail_iv,AES_BLOCK_SIZE);
			}
#endif

#if 1
			len -= SHA_DIGEST_LENGTH;		/* amend mac */
			if (len>=(256+SHA_CBLOCK)) {
@@ -303,8 +542,8 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,

			/* but pretend as if we hashed padded payload */
			bitlen = key->md.Nl+(inp_len<<3);	/* at most 18 bits */
#ifdef BSWAP
			bitlen = BSWAP(bitlen);
#ifdef BSWAP4
			bitlen = BSWAP4(bitlen);
#else
			mac.c[0] = 0;
			mac.c[1] = (unsigned char)(bitlen>>16);
@@ -366,12 +605,12 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
			pmac->u[3] |= key->md.h3 & mask;
			pmac->u[4] |= key->md.h4 & mask;

#ifdef BSWAP
			pmac->u[0] = BSWAP(pmac->u[0]);
			pmac->u[1] = BSWAP(pmac->u[1]);
			pmac->u[2] = BSWAP(pmac->u[2]);
			pmac->u[3] = BSWAP(pmac->u[3]);
			pmac->u[4] = BSWAP(pmac->u[4]);
#ifdef BSWAP4
			pmac->u[0] = BSWAP4(pmac->u[0]);
			pmac->u[1] = BSWAP4(pmac->u[1]);
			pmac->u[2] = BSWAP4(pmac->u[2]);
			pmac->u[3] = BSWAP4(pmac->u[3]);
			pmac->u[4] = BSWAP4(pmac->u[4]);
#else
			for (i=0;i<5;i++) {
				res = pmac->u[i];
@@ -444,6 +683,34 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
#endif
			return ret;
		} else {
#if defined(STITCHED_DECRYPT_CALL)
			if (len>=1024 && ctx->key_len==32) {
				if (sha_off%=SHA_CBLOCK)
					blocks = (len-3*SHA_CBLOCK)/SHA_CBLOCK;
				else
					blocks = (len-2*SHA_CBLOCK)/SHA_CBLOCK;
				aes_off = len-blocks*SHA_CBLOCK;

				aesni_cbc_encrypt(in,out,aes_off,
					&key->ks,ctx->iv,0);
				SHA1_Update(&key->md,out,sha_off);
				aesni256_cbc_sha1_dec(in+aes_off,
					out+aes_off,blocks,&key->ks,ctx->iv,
					&key->md,out+sha_off);

				sha_off += blocks*=SHA_CBLOCK;
				out += sha_off;
				len -= sha_off;

				key->md.Nh += blocks>>29;
				key->md.Nl += blocks<<=3;
				if (key->md.Nl<(unsigned int)blocks) key->md.Nh++;
			} else
#endif
			/* decrypt HMAC|padding at once */
			aesni_cbc_encrypt(in,out,len,
					&key->ks,ctx->iv,0);

			SHA1_Update(&key->md,out,len);
		}
	}
@@ -514,6 +781,70 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void
			return SHA_DIGEST_LENGTH;
			}
		}
#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
	case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE:
		return (int)(5+16+((arg+20+16)&-16));
	case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD:
		{
		EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
			(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;
		unsigned int n4x=1, x4;
		unsigned int frag, last, packlen, inp_len;

		if (arg<sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) return -1;

		inp_len = param->inp[11]<<8|param->inp[12];

		if (ctx->encrypt)
			{
			if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION)
				return -1;

			if (inp_len)
				{
				if (inp_len<4096) return 0;	/* too short */

				if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5))
					n4x=2;	/* AVX2 */
				}
			else if ((n4x=param->interleave/4) && n4x<=2)
				inp_len = param->len;
			else
				return -1;

			key->md = key->head;
			SHA1_Update(&key->md,param->inp,13);

			x4 = 4*n4x; n4x += 1;

			frag = inp_len>>n4x;
			last = inp_len+frag-(frag<<n4x);
			if (last>frag && ((last+13+9)%64<(x4-1))) {
				frag++;
				last -= x4-1;
			}

			packlen = 5+16+((frag+20+16)&-16);
			packlen = (packlen<<n4x)-packlen;
			packlen += 5+16+((last+20+16)&-16);

			param->interleave = x4;

			return (int)packlen;
			}
		else
			return -1;	/* not yet */
		}
	case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT:
		{
		EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
			(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;

		return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp,
						param->len,param->interleave/4);
		}
	case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT:
#endif
	default:
		return -1;
		}
@@ -527,7 +858,8 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher =
	NID_undef,
#endif
	16,16,16,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
	EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
	aesni_cbc_hmac_sha1_init_key,
	aesni_cbc_hmac_sha1_cipher,
	NULL,
@@ -546,7 +878,8 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher =
	NID_undef,
#endif
	16,32,16,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
	EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
	aesni_cbc_hmac_sha1_init_key,
	aesni_cbc_hmac_sha1_cipher,
	NULL,
+293 −22
Original line number Diff line number Diff line
@@ -58,6 +58,8 @@
#include <openssl/objects.h>
#include <openssl/aes.h>
#include <openssl/sha.h>
#include <openssl/rand.h>
#include "modes_lcl.h"

#ifndef EVP_CIPH_FLAG_AEAD_CIPHER
#define EVP_CIPH_FLAG_AEAD_CIPHER	0x200000
@@ -69,6 +71,10 @@
#define EVP_CIPH_FLAG_DEFAULT_ASN1 0
#endif

#if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)
#define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
#endif

#define TLS1_1_VERSION 0x0302

typedef struct
@@ -89,12 +95,8 @@ typedef struct
	defined(_M_AMD64)	|| defined(_M_X64)	|| \
	defined(__INTEL__)	)

#if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC)
# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; })
#endif

extern unsigned int OPENSSL_ia32cap_P[3];
#define AESNI_AVX_CAPABLE   (1<<(57-32)|1<<(60-32))
#define AESNI_CAPABLE   (1<<(57-32))

int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
			      AES_KEY *key);
@@ -176,6 +178,207 @@ static void sha256_update(SHA256_CTX *c,const void *data,size_t len)
#endif
#define SHA256_Update sha256_update

#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK

typedef struct { unsigned int A[8],B[8],C[8],D[8],E[8],F[8],G[8],H[8]; } SHA256_MB_CTX;
typedef struct { const unsigned char *ptr; int blocks;  } HASH_DESC;

void sha256_multi_block(SHA256_MB_CTX *,const HASH_DESC *,int);

typedef struct { const unsigned char *inp; unsigned char *out;
		 int blocks; u64 iv[2]; } CIPH_DESC; 

void aesni_multi_cbc_encrypt(CIPH_DESC *,void *,int);

static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key,
	unsigned char *out, const unsigned char *inp, size_t inp_len,
	int n4x)	/* n4x is 1 or 2 */
{
	HASH_DESC	hash_d[8], edges[8];
	CIPH_DESC	ciph_d[8];
	unsigned char	storage[sizeof(SHA256_MB_CTX)+32];
	union {	u64	q[16];
		u32	d[32];
		u8	c[128];	} blocks[8];
	SHA256_MB_CTX	*ctx;
	unsigned int	frag, last, packlen, i, x4=4*n4x;
	size_t		ret = 0;
	u8		*IVs;
#if defined(BSWAP8)
	u64		seqnum;
#endif

	ctx = (SHA256_MB_CTX *)(storage+32-((size_t)storage%32));	/* align */

	frag = (unsigned int)inp_len>>(1+n4x);
	last = (unsigned int)inp_len+frag-(frag<<(1+n4x));
	if (last>frag && ((last+13+9)%64)<(x4-1)) {
		frag++;
		last -= x4-1;
	}

	hash_d[0].ptr = inp;
	for (i=1;i<x4;i++)	hash_d[i].ptr = hash_d[i-1].ptr+frag;

#if defined(BSWAP8)
	memcpy(blocks[0].c,key->md.data,8);
	seqnum = BSWAP8(blocks[0].q[0]);
#endif
	for (i=0;i<x4;i++) {
		unsigned int len = (i==(x4-1)?last:frag);

		ctx->A[i] = key->md.h[0];
		ctx->B[i] = key->md.h[1];
		ctx->C[i] = key->md.h[2];
		ctx->D[i] = key->md.h[3];
		ctx->E[i] = key->md.h[4];
		ctx->F[i] = key->md.h[5];
		ctx->G[i] = key->md.h[6];
		ctx->H[i] = key->md.h[7];

		/* fix seqnum */
#if defined(BSWAP8)
		blocks[i].q[0] = BSWAP8(seqnum+i);
#else
		blocks[i].c[7] += ((u8*)key->md.data)[7]+i;
		if (blocks[i].c[7] < i) {
			int j;

			for (j=6;j>=0;j--) {
				if (blocks[i].c[j]=((u8*)key->md.data)[j]+1) break;
			}
		}
#endif
		blocks[i].c[8] = ((u8*)key->md.data)[8];
		blocks[i].c[9] = ((u8*)key->md.data)[9];
		blocks[i].c[10] = ((u8*)key->md.data)[10];
		/* fix length */
		blocks[i].c[11] = (u8)(len>>8);
		blocks[i].c[12] = (u8)(len);

		memcpy(blocks[i].c+13,hash_d[i].ptr,64-13);
		hash_d[i].ptr += 64-13;
		hash_d[i].blocks = (len-(64-13))/64;

		edges[i].ptr = blocks[i].c;
		edges[i].blocks = 1;
	}

	/* hash 13-byte headers and first 64-13 bytes of inputs */
	sha256_multi_block(ctx,edges,n4x);
	/* hash bulk inputs */
	sha256_multi_block(ctx,hash_d,n4x);

	memset(blocks,0,sizeof(blocks));
	for (i=0;i<x4;i++) {
		unsigned int		len = (i==(x4-1)?last:frag),
					off = hash_d[i].blocks*64;
		const unsigned char    *ptr = hash_d[i].ptr+off;

		off = len-(64-13)-off;	/* remainder actually */
		memcpy(blocks[i].c,ptr,off);
		blocks[i].c[off]=0x80;
		len += 64+13;		/* 64 is HMAC header */
		len *= 8;		/* convert to bits */
		if (off<(64-8)) {
			blocks[i].d[15] = BSWAP4(len);
			edges[i].blocks = 1;			
		} else {
			blocks[i].d[31] = BSWAP4(len);
			edges[i].blocks = 2;
		}
		edges[i].ptr = blocks[i].c;
	}

	/* hash input tails and finalize */
	sha256_multi_block(ctx,edges,n4x);

	memset(blocks,0,sizeof(blocks));
	for (i=0;i<x4;i++) {
		blocks[i].d[0] = BSWAP4(ctx->A[i]);	ctx->A[i] = key->tail.h[0];
		blocks[i].d[1] = BSWAP4(ctx->B[i]);	ctx->B[i] = key->tail.h[1];
		blocks[i].d[2] = BSWAP4(ctx->C[i]);	ctx->C[i] = key->tail.h[2];
		blocks[i].d[3] = BSWAP4(ctx->D[i]);	ctx->D[i] = key->tail.h[3];
		blocks[i].d[4] = BSWAP4(ctx->E[i]);	ctx->E[i] = key->tail.h[4];
		blocks[i].d[5] = BSWAP4(ctx->F[i]);	ctx->F[i] = key->tail.h[5];
		blocks[i].d[6] = BSWAP4(ctx->G[i]);	ctx->G[i] = key->tail.h[6];
		blocks[i].d[7] = BSWAP4(ctx->H[i]);	ctx->H[i] = key->tail.h[7];
		blocks[i].c[32] = 0x80;
		blocks[i].d[15] = BSWAP4((64+32)*8);
		edges[i].ptr = blocks[i].c;
		edges[i].blocks = 1;
	}

	/* finalize MACs */
	sha256_multi_block(ctx,edges,n4x);

	packlen = 5+16+((frag+32+16)&-16);

	out += (packlen<<(1+n4x))-packlen;
	inp += (frag<<(1+n4x))-frag;

	RAND_bytes((IVs=blocks[0].c),16*x4);	/* ask for IVs in bulk */

	for (i=x4-1;;i--) {
		unsigned int len = (i==(x4-1)?last:frag), pad, j;
		unsigned char *out0 = out;

		out += 5+16;		/* place for header and explicit IV */
		ciph_d[i].inp = out;
		ciph_d[i].out = out;

		memmove(out,inp,len);
		out += len;

		/* write MAC */
		((u32 *)out)[0] = BSWAP4(ctx->A[i]);
		((u32 *)out)[1] = BSWAP4(ctx->B[i]);
		((u32 *)out)[2] = BSWAP4(ctx->C[i]);
		((u32 *)out)[3] = BSWAP4(ctx->D[i]);
		((u32 *)out)[4] = BSWAP4(ctx->E[i]);
		((u32 *)out)[5] = BSWAP4(ctx->F[i]);
		((u32 *)out)[6] = BSWAP4(ctx->G[i]);
		((u32 *)out)[7] = BSWAP4(ctx->H[i]);
		out += 32;
		len += 32;

		/* pad */
		pad = 15-len%16;
		for (j=0;j<=pad;j++) *(out++) = pad;
		len += pad+1;

		ciph_d[i].blocks = len/16;
		len += 16;	/* account for explicit iv */

		/* arrange header */
		out0[0] = ((u8*)key->md.data)[8];
		out0[1] = ((u8*)key->md.data)[9];
		out0[2] = ((u8*)key->md.data)[10];
		out0[3] = (u8)(len>>8);
		out0[4] = (u8)(len);

		/* explicit iv */
		memcpy(ciph_d[i].iv, IVs, 16);
		memcpy(&out0[5],     IVs, 16);

		ret += len+5;

		if (i==0) break;

		out = out0-packlen;
		inp -= frag;
		IVs += 16;
	}

	aesni_multi_cbc_encrypt(ciph_d,&key->ks,n4x);

	OPENSSL_cleanse(blocks,sizeof(blocks));
	OPENSSL_cleanse(ctx,sizeof(*ctx));

	return ret;
}
#endif

static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
		      const unsigned char *in, size_t len)
	{
@@ -204,7 +407,9 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
			iv = AES_BLOCK_SIZE;

#if defined(STITCHED_CALL)
		if (plen>(sha_off+iv) && (blocks=(plen-(sha_off+iv))/SHA256_CBLOCK)) {
		if (OPENSSL_ia32cap_P[1]&(1<<(60-32)) && /* AVX? */
		    plen>(sha_off+iv) &&
		    (blocks=(plen-(sha_off+iv))/SHA256_CBLOCK)) {
			SHA256_Update(&key->md,in+iv,sha_off);

			(void)aesni_cbc_sha256_enc(in,out,blocks,&key->ks,
@@ -253,7 +458,7 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
		aesni_cbc_encrypt(in,out,len,
				&key->ks,ctx->iv,0);

		if (plen) {	/* "TLS" mode of operation */
		if (plen != NO_PAYLOAD_LENGTH) {	/* "TLS" mode of operation */
			size_t inp_len, mask, j, i;
			unsigned int res, maxpad, pad, bitlen;
			int ret = 1;
@@ -303,8 +508,8 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,

			/* but pretend as if we hashed padded payload */
			bitlen = key->md.Nl+(inp_len<<3);	/* at most 18 bits */
#ifdef BSWAP
			bitlen = BSWAP(bitlen);
#ifdef BSWAP4
			bitlen = BSWAP4(bitlen);
#else
			mac.c[0] = 0;
			mac.c[1] = (unsigned char)(bitlen>>16);
@@ -378,15 +583,15 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
			pmac->u[6] |= key->md.h[6] & mask;
			pmac->u[7] |= key->md.h[7] & mask;

#ifdef BSWAP
			pmac->u[0] = BSWAP(pmac->u[0]);
			pmac->u[1] = BSWAP(pmac->u[1]);
			pmac->u[2] = BSWAP(pmac->u[2]);
			pmac->u[3] = BSWAP(pmac->u[3]);
			pmac->u[4] = BSWAP(pmac->u[4]);
			pmac->u[5] = BSWAP(pmac->u[5]);
			pmac->u[6] = BSWAP(pmac->u[6]);
			pmac->u[7] = BSWAP(pmac->u[7]);
#ifdef BSWAP4
			pmac->u[0] = BSWAP4(pmac->u[0]);
			pmac->u[1] = BSWAP4(pmac->u[1]);
			pmac->u[2] = BSWAP4(pmac->u[2]);
			pmac->u[3] = BSWAP4(pmac->u[3]);
			pmac->u[4] = BSWAP4(pmac->u[4]);
			pmac->u[5] = BSWAP4(pmac->u[5]);
			pmac->u[6] = BSWAP4(pmac->u[6]);
			pmac->u[7] = BSWAP4(pmac->u[7]);
#else
			for (i=0;i<8;i++) {
				res = pmac->u[i];
@@ -529,6 +734,70 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, vo
			return SHA256_DIGEST_LENGTH;
			}
		}
#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
	case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE:
		return (int)(5+16+((arg+32+16)&-16));
	case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD:
		{
		EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
			(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;
		unsigned int n4x=1, x4;
		unsigned int frag, last, packlen, inp_len;

		if (arg<sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) return -1;

		inp_len = param->inp[11]<<8|param->inp[12];

		if (ctx->encrypt)
			{
			if ((param->inp[9]<<8|param->inp[10]) < TLS1_1_VERSION)
				return -1;

			if (inp_len)
				{
				if (inp_len<4096) return 0;	/* too short */

				if (inp_len>=8192 && OPENSSL_ia32cap_P[2]&(1<<5))
					n4x=2;	/* AVX2 */
				}
			else if ((n4x=param->interleave/4) && n4x<=2)
				inp_len = param->len;
			else
				return -1;

			key->md = key->head;
			SHA256_Update(&key->md,param->inp,13);

			x4 = 4*n4x; n4x += 1;

			frag = inp_len>>n4x;
			last = inp_len+frag-(frag<<n4x);
			if (last>frag && ((last+13+9)%64<(x4-1))) {
				frag++;
				last -= x4-1;
			}

			packlen = 5+16+((frag+32+16)&-16);
			packlen = (packlen<<n4x)-packlen;
			packlen += 5+16+((last+32+16)&-16);

			param->interleave = x4;

			return (int)packlen;
			}
		else
			return -1;	/* not yet */
		}
	case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT:
		{
		EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
			(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *)ptr;

		return (int)tls1_1_multi_block_encrypt(key,param->out,param->inp,
						param->len,param->interleave/4);
		}
	case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT:
#endif
	default:
		return -1;
		}
@@ -542,7 +811,8 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher =
	NID_undef,
#endif
	16,16,16,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
	EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
	aesni_cbc_hmac_sha256_init_key,
	aesni_cbc_hmac_sha256_cipher,
	NULL,
@@ -561,7 +831,8 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher =
	NID_undef,
#endif
	16,32,16,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER,
	EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|
	EVP_CIPH_FLAG_AEAD_CIPHER|EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
	aesni_cbc_hmac_sha256_init_key,
	aesni_cbc_hmac_sha256_cipher,
	NULL,
@@ -574,14 +845,14 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher =

const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void)
	{
	return((OPENSSL_ia32cap_P[1]&AESNI_AVX_CAPABLE)==AESNI_AVX_CAPABLE &&
	return((OPENSSL_ia32cap_P[1]&AESNI_CAPABLE) &&
		aesni_cbc_sha256_enc(NULL,NULL,0,NULL,NULL,NULL,NULL) ?
		&aesni_128_cbc_hmac_sha256_cipher:NULL);
	}

const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void)
	{
	return((OPENSSL_ia32cap_P[1]&AESNI_AVX_CAPABLE)==AESNI_AVX_CAPABLE &&
	return((OPENSSL_ia32cap_P[1]&AESNI_CAPABLE) &&
		aesni_cbc_sha256_enc(NULL,NULL,0,NULL,NULL,NULL,NULL)?
		&aesni_256_cbc_hmac_sha256_cipher:NULL);
	}
+13 −0
Original line number Diff line number Diff line
@@ -364,6 +364,7 @@ struct evp_cipher_st
 */
#define 	EVP_CIPH_FLAG_CUSTOM_CIPHER	0x100000
#define		EVP_CIPH_FLAG_AEAD_CIPHER	0x200000
#define		EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK	0x400000

/* Cipher context flag to indicate we can handle
 * wrap mode: if allowed in older applications it could
@@ -403,6 +404,18 @@ struct evp_cipher_st
/* Set the GCM invocation field, decrypt only */
#define		EVP_CTRL_GCM_SET_IV_INV		0x18

#define		EVP_CTRL_TLS1_1_MULTIBLOCK_AAD	0x19
#define		EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT	0x1a
#define		EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT	0x1b
#define		EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE	0x1c

typedef struct {
	unsigned char *out;
	const unsigned char *inp;
	size_t len;
	unsigned int interleave;
} EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM;

/* GCM TLS constants */
/* Length of fixed part of IV derived from PRF */
#define EVP_GCM_TLS_FIXED_IV_LEN			4