Commit 50452b2e authored by Andy Polyakov's avatar Andy Polyakov
Browse files

e_padlock: add CTR mode.

parent d18762f7
Loading
Loading
Loading
Loading
+11 −10
Original line number Original line Diff line number Diff line
@@ -183,7 +183,7 @@ my ($mode,$opcode) = @_;
&set_label("${mode}_pic_point");
&set_label("${mode}_pic_point");
	&lea	($ctx,&DWP(16,$ctx));	# control word
	&lea	($ctx,&DWP(16,$ctx));	# control word
	&xor	("eax","eax");
	&xor	("eax","eax");
					if ($mode eq "ctr16") {
					if ($mode eq "ctr32") {
	&movq	("mm0",&QWP(-16,$ctx));	# load [upper part of] counter
	&movq	("mm0",&QWP(-16,$ctx));	# load [upper part of] counter
					} else {
					} else {
	&xor	("ebx","ebx");
	&xor	("ebx","ebx");
@@ -216,7 +216,7 @@ my ($mode,$opcode) = @_;
	&mov	(&DWP(8,"ebp"),$len);
	&mov	(&DWP(8,"ebp"),$len);
	&mov	($len,$chunk);
	&mov	($len,$chunk);
	&mov	(&DWP(12,"ebp"),$chunk);	# chunk
	&mov	(&DWP(12,"ebp"),$chunk);	# chunk
						if ($mode eq "ctr16") {
						if ($mode eq "ctr32") {
	&mov	("ecx",&DWP(-4,$ctx));
	&mov	("ecx",&DWP(-4,$ctx));
	&xor	($out,$out);
	&xor	($out,$out);
	&mov	("eax",&DWP(-8,$ctx));		# borrow $len
	&mov	("eax",&DWP(-8,$ctx));		# borrow $len
@@ -257,7 +257,7 @@ my ($mode,$opcode) = @_;
						}
						}
	&mov	($out,&DWP(0,"ebp"));		# restore parameters
	&mov	($out,&DWP(0,"ebp"));		# restore parameters
	&mov	($chunk,&DWP(12,"ebp"));
	&mov	($chunk,&DWP(12,"ebp"));
						if ($mode eq "ctr16") {
						if ($mode eq "ctr32") {
	&mov	($inp,&DWP(4,"ebp"));
	&mov	($inp,&DWP(4,"ebp"));
	&xor	($len,$len);
	&xor	($len,$len);
&set_label("${mode}_xor");
&set_label("${mode}_xor");
@@ -284,7 +284,7 @@ my ($mode,$opcode) = @_;
	&sub	($len,$chunk);
	&sub	($len,$chunk);
	&mov	($chunk,$PADLOCK_CHUNK);
	&mov	($chunk,$PADLOCK_CHUNK);
	&jnz	(&label("${mode}_loop"));
	&jnz	(&label("${mode}_loop"));
						if ($mode ne "ctr16") {
						if ($mode ne "ctr32") {
	&test	($out,0x0f);			# out_misaligned
	&test	($out,0x0f);			# out_misaligned
	&jz	(&label("${mode}_done"));
	&jz	(&label("${mode}_done"));
						}
						}
@@ -296,7 +296,7 @@ my ($mode,$opcode) = @_;
	&data_byte(0xf3,0xab);			# rep stosl
	&data_byte(0xf3,0xab);			# rep stosl
&set_label("${mode}_done");
&set_label("${mode}_done");
	&lea	("esp",&DWP(24,"ebp"));
	&lea	("esp",&DWP(24,"ebp"));
						if ($mode ne "ctr16") {
						if ($mode ne "ctr32") {
	&jmp	(&label("${mode}_exit"));
	&jmp	(&label("${mode}_exit"));


&set_label("${mode}_aligned",16);
&set_label("${mode}_aligned",16);
@@ -311,7 +311,7 @@ my ($mode,$opcode) = @_;
&set_label("${mode}_exit");			}
&set_label("${mode}_exit");			}
	&mov	("eax",1);
	&mov	("eax",1);
	&lea	("esp",&DWP(4,"esp"));		# popf
	&lea	("esp",&DWP(4,"esp"));		# popf
	&emms	()				if ($mode eq "ctr16");
	&emms	()				if ($mode eq "ctr32");
&set_label("${mode}_abort");
&set_label("${mode}_abort");
&function_end("padlock_${mode}_encrypt");
&function_end("padlock_${mode}_encrypt");
}
}
@@ -320,10 +320,11 @@ my ($mode,$opcode) = @_;
&generate_mode("cbc",0xd0);
&generate_mode("cbc",0xd0);
&generate_mode("cfb",0xe0);
&generate_mode("cfb",0xe0);
&generate_mode("ofb",0xe8);
&generate_mode("ofb",0xe8);
&generate_mode("ctr16",0xc8);	# yes, it implements own ctr with ecb opcode,
&generate_mode("ctr32",0xc8);	# yes, it implements own CTR with ECB opcode,
				# because hardware ctr was introduced later
				# because hardware CTR was introduced later
				# and even has errata on certain CPU stepping.
				# and even has errata on certain C7 stepping.
				# own implementation *always* works...
				# own implementation *always* works, though
				# ~15% slower than dedicated hardware...


&function_begin_B("padlock_xstore");
&function_begin_B("padlock_xstore");
	&push	("edi");
	&push	("edi");
+64 −3
Original line number Original line Diff line number Diff line
@@ -9,7 +9,8 @@


# September 2011
# September 2011
#
#
# Assembler helpers for Padlock engine.
# Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
# details.


$flavour = shift;
$flavour = shift;
$output  = shift;
$output  = shift;
@@ -26,7 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";


$code=".text\n";
$code=".text\n";


$PADLOCK_CHUNK=512;	# Must be a power of 2 larger than 16
$PADLOCK_CHUNK=512;	# Must be a power of 2 between 32 and 2^20


$ctx="%rdx";
$ctx="%rdx";
$out="%rdi";
$out="%rdi";
@@ -234,9 +235,23 @@ padlock_${mode}_encrypt:
	neg	%rax
	neg	%rax
	and	\$$PADLOCK_CHUNK-1,$chunk	# chunk%=PADLOCK_CHUNK
	and	\$$PADLOCK_CHUNK-1,$chunk	# chunk%=PADLOCK_CHUNK
	lea	(%rax,%rbp),%rsp
	lea	(%rax,%rbp),%rsp
___
$code.=<<___				if ($mode eq "ctr32");
	mov	-4($ctx),%eax		# pull 32-bit counter
	bswap	%eax
	neg	%eax
	and	\$`$PADLOCK_CHUNK/16-1`,%eax
	jz	.L${mode}_loop
	shl	\$4,%eax
	cmp	%rax,$len
	cmova	%rax,$chunk		# don't let counter cross PADLOCK_CHUNK
___
$code.=<<___;
	jmp	.L${mode}_loop
	jmp	.L${mode}_loop
.align	16
.align	16
.L${mode}_loop:
.L${mode}_loop:
	cmp	$len,$chunk		# ctr32 artefact
	cmova	$len,$chunk		# ctr32 artefact
	mov	$out,%r8		# save parameters
	mov	$out,%r8		# save parameters
	mov	$inp,%r9
	mov	$inp,%r9
	mov	$len,%r10
	mov	$len,%r10
@@ -261,6 +276,16 @@ $code.=<<___ if ($mode !~ /ecb|ctr/);
	movdqa	(%rax),%xmm0
	movdqa	(%rax),%xmm0
	movdqa	%xmm0,-16($ctx)		# copy [or refresh] iv
	movdqa	%xmm0,-16($ctx)		# copy [or refresh] iv
___
___
$code.=<<___				if ($mode eq "ctr32");
	mov	-4($ctx),%eax		# pull 32-bit counter
	test	\$0xffff0000,%eax
	jnz	.L${mode}_no_corr
	bswap	%eax
	add	\$0x10000,%eax
	bswap	%eax
	mov	%eax,-4($ctx)
.L${mode}_no_corr:
___
$code.=<<___;
$code.=<<___;
	mov	%r8,$out		# restore paramters
	mov	%r8,$out		# restore paramters
	mov	%r11,$chunk
	mov	%r11,$chunk
@@ -295,6 +320,29 @@ $code.=<<___;


.align	16
.align	16
.L${mode}_aligned:
.L${mode}_aligned:
___
$code.=<<___				if ($mode eq "ctr32");
	mov	-4($ctx),%eax		# pull 32-bit counter
	mov	\$`16*0x10000`,$chunk
	bswap	%eax
	cmp	$len,$chunk
	cmova	$len,$chunk
	neg	%eax
	and	\$0xffff,%eax
	jz	.L${mode}_aligned_loop
	shl	\$4,%eax
	cmp	%rax,$len
	cmova	%rax,$chunk		# don't let counter cross 2^16
	jmp	.L${mode}_aligned_loop
.align	16
.L${mode}_aligned_loop:
	cmp	$len,$chunk
	cmova	$len,$chunk
	mov	$len,%r10		# save parameters
	mov	$chunk,$len
	mov	$chunk,%r11
___
$code.=<<___;
	lea	-16($ctx),%rax		# ivp
	lea	-16($ctx),%rax		# ivp
	lea	16($ctx),%rbx		# key
	lea	16($ctx),%rbx		# key
	shr	\$4,$len		# len/=AES_BLOCK_SIZE
	shr	\$4,$len		# len/=AES_BLOCK_SIZE
@@ -304,6 +352,19 @@ $code.=<<___ if ($mode !~ /ecb|ctr/);
	movdqa	(%rax),%xmm0
	movdqa	(%rax),%xmm0
	movdqa	%xmm0,-16($ctx)		# copy [or refresh] iv
	movdqa	%xmm0,-16($ctx)		# copy [or refresh] iv
___
___
$code.=<<___				if ($mode eq "ctr32");
	mov	-4($ctx),%eax		# pull 32-bit counter
	bswap	%eax
	add	\$0x10000,%eax
	bswap	%eax
	mov	%eax,-4($ctx)

	mov	%r11,$chunk		# restore paramters
	mov	%r10,$len
	sub	$chunk,$len
	mov	\$`16*0x10000`,$chunk
	jnz	.L${mode}_aligned_loop
___
$code.=<<___;
$code.=<<___;
.L${mode}_exit:
.L${mode}_exit:
	mov	\$1,%eax
	mov	\$1,%eax
@@ -320,7 +381,7 @@ ___
&generate_mode("cbc",0xd0);
&generate_mode("cbc",0xd0);
&generate_mode("cfb",0xe0);
&generate_mode("cfb",0xe0);
&generate_mode("ofb",0xe8);
&generate_mode("ofb",0xe8);
&generate_mode("ctr16",0xd8);
&generate_mode("ctr32",0xd8);	# all 64-bit CPUs have working CTR...


$code.=<<___;
$code.=<<___;
.asciz	"VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
.asciz	"VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
+45 −3
Original line number Original line Diff line number Diff line
@@ -76,6 +76,7 @@
#endif
#endif
#include <openssl/rand.h>
#include <openssl/rand.h>
#include <openssl/err.h>
#include <openssl/err.h>
#include <openssl/modes.h>


#ifndef OPENSSL_NO_HW
#ifndef OPENSSL_NO_HW
#ifndef OPENSSL_NO_HW_PADLOCK
#ifndef OPENSSL_NO_HW_PADLOCK
@@ -337,16 +338,19 @@ static int padlock_cipher_nids[] = {
	NID_aes_128_cbc,
	NID_aes_128_cbc,
	NID_aes_128_cfb,
	NID_aes_128_cfb,
	NID_aes_128_ofb,
	NID_aes_128_ofb,
	NID_aes_128_ctr,


	NID_aes_192_ecb,
	NID_aes_192_ecb,
	NID_aes_192_cbc,
	NID_aes_192_cbc,
	NID_aes_192_cfb,
	NID_aes_192_cfb,
	NID_aes_192_ofb,
	NID_aes_192_ofb,
	NID_aes_192_ctr,


	NID_aes_256_ecb,
	NID_aes_256_ecb,
	NID_aes_256_cbc,
	NID_aes_256_cbc,
	NID_aes_256_cfb,
	NID_aes_256_cfb,
	NID_aes_256_ofb,
	NID_aes_256_ofb,
	NID_aes_256_ctr
};
};
static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
				      sizeof(padlock_cipher_nids[0]));
				      sizeof(padlock_cipher_nids[0]));
@@ -505,10 +509,35 @@ padlock_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
	return 1;
	return 1;
}
}


static void padlock_ctr32_encrypt_glue(const unsigned char *in,
			unsigned char *out, size_t blocks,
			struct padlock_cipher_data *ctx,
			const unsigned char *ivec)
{
	memcpy(ctx->iv,ivec,AES_BLOCK_SIZE);
	padlock_ctr32_encrypt(out,in,ctx,AES_BLOCK_SIZE*blocks);
}

static int
padlock_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
		   const unsigned char *in_arg, size_t nbytes)
{
	struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx);
	unsigned int num = ctx->num;

	CRYPTO_ctr128_encrypt_ctr32(in_arg,out_arg,nbytes,
			cdata,ctx->iv,ctx->buf,&num,
			(ctr128_f)padlock_ctr32_encrypt_glue);

	ctx->num = (size_t)num;
	return 1;
}

#define EVP_CIPHER_block_size_ECB	AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_ECB	AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_CBC	AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_CBC	AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_OFB	1
#define EVP_CIPHER_block_size_OFB	1
#define EVP_CIPHER_block_size_CFB	1
#define EVP_CIPHER_block_size_CFB	1
#define EVP_CIPHER_block_size_CTR	1


/* Declaring so many ciphers by hand would be a pain.
/* Declaring so many ciphers by hand would be a pain.
   Instead introduce a bit of preprocessor magic :-) */
   Instead introduce a bit of preprocessor magic :-) */
@@ -533,16 +562,19 @@ DECLARE_AES_EVP(128,ecb,ECB);
DECLARE_AES_EVP(128,cbc,CBC);
DECLARE_AES_EVP(128,cbc,CBC);
DECLARE_AES_EVP(128,cfb,CFB);
DECLARE_AES_EVP(128,cfb,CFB);
DECLARE_AES_EVP(128,ofb,OFB);
DECLARE_AES_EVP(128,ofb,OFB);
DECLARE_AES_EVP(128,ctr,CTR);


DECLARE_AES_EVP(192,ecb,ECB);
DECLARE_AES_EVP(192,ecb,ECB);
DECLARE_AES_EVP(192,cbc,CBC);
DECLARE_AES_EVP(192,cbc,CBC);
DECLARE_AES_EVP(192,cfb,CFB);
DECLARE_AES_EVP(192,cfb,CFB);
DECLARE_AES_EVP(192,ofb,OFB);
DECLARE_AES_EVP(192,ofb,OFB);
DECLARE_AES_EVP(192,ctr,CTR);


DECLARE_AES_EVP(256,ecb,ECB);
DECLARE_AES_EVP(256,ecb,ECB);
DECLARE_AES_EVP(256,cbc,CBC);
DECLARE_AES_EVP(256,cbc,CBC);
DECLARE_AES_EVP(256,cfb,CFB);
DECLARE_AES_EVP(256,cfb,CFB);
DECLARE_AES_EVP(256,ofb,OFB);
DECLARE_AES_EVP(256,ofb,OFB);
DECLARE_AES_EVP(256,ctr,CTR);


static int
static int
padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
@@ -567,6 +599,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
	  case NID_aes_128_ofb:
	  case NID_aes_128_ofb:
	    *cipher = &padlock_aes_128_ofb;
	    *cipher = &padlock_aes_128_ofb;
	    break;
	    break;
	  case NID_aes_128_ctr:
	    *cipher = &padlock_aes_128_ctr;
	    break;


	  case NID_aes_192_ecb:
	  case NID_aes_192_ecb:
	    *cipher = &padlock_aes_192_ecb;
	    *cipher = &padlock_aes_192_ecb;
@@ -580,6 +615,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
	  case NID_aes_192_ofb:
	  case NID_aes_192_ofb:
	    *cipher = &padlock_aes_192_ofb;
	    *cipher = &padlock_aes_192_ofb;
	    break;
	    break;
	  case NID_aes_192_ctr:
	    *cipher = &padlock_aes_192_ctr;
	    break;


	  case NID_aes_256_ecb:
	  case NID_aes_256_ecb:
	    *cipher = &padlock_aes_256_ecb;
	    *cipher = &padlock_aes_256_ecb;
@@ -593,6 +631,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
	  case NID_aes_256_ofb:
	  case NID_aes_256_ofb:
	    *cipher = &padlock_aes_256_ofb;
	    *cipher = &padlock_aes_256_ofb;
	    break;
	    break;
	  case NID_aes_256_ctr:
	    *cipher = &padlock_aes_256_ctr;
	    break;


	  default:
	  default:
	    /* Sorry, we don't support this NID */
	    /* Sorry, we don't support this NID */
@@ -610,6 +651,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
{
{
	struct padlock_cipher_data *cdata;
	struct padlock_cipher_data *cdata;
	int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
	int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
	unsigned long mode = EVP_CIPHER_CTX_mode(ctx);


	if (key==NULL) return 0;	/* ERROR */
	if (key==NULL) return 0;	/* ERROR */


@@ -617,7 +659,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
	memset(cdata, 0, sizeof(struct padlock_cipher_data));
	memset(cdata, 0, sizeof(struct padlock_cipher_data));


	/* Prepare Control word. */
	/* Prepare Control word. */
	if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
	if (mode == EVP_CIPH_OFB_MODE || mode == EVP_CIPH_CTR_MODE)
		cdata->cword.b.encdec = 0;
		cdata->cword.b.encdec = 0;
	else
	else
		cdata->cword.b.encdec = (ctx->encrypt == 0);
		cdata->cword.b.encdec = (ctx->encrypt == 0);
@@ -640,8 +682,8 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
			   and is listed as hardware errata. They most
			   and is listed as hardware errata. They most
			   likely will fix it at some point and then
			   likely will fix it at some point and then
			   a check for stepping would be due here. */
			   a check for stepping would be due here. */
			if ((EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_ECB_MODE ||
			if ((mode == EVP_CIPH_ECB_MODE ||
			     EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CBC_MODE)
			     mode == EVP_CIPH_CBC_MODE)
			    && !enc)
			    && !enc)
				AES_set_decrypt_key(key, key_len, &cdata->ks);
				AES_set_decrypt_key(key, key_len, &cdata->ks);
			else
			else