Commit d608b4d6 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

AES-NI engine jumbo update.

parent b3f6fe91
Loading
Loading
Loading
Loading
+186 −84
Original line number Diff line number Diff line
@@ -29,8 +29,8 @@ $rounds="ecx";
$key="edx";
$inp="esi";
$out="edi";
$rounds_="ebx";
$key_="ebp";
$rounds_="ebx";	# backup copy for $rounds
$key_="ebp";	# backup copy for $key

$inout0="xmm0";
$inout1="xmm1";
@@ -39,26 +39,23 @@ $rndkey0="xmm3";
$rndkey1="xmm4";
$ivec="xmm5";
$in0="xmm6";
$in1="xmm7";
$in1="xmm7";	$inout3="xmm7";

sub _aesni_generate1	# folded loop
# Inline version of internal aesni_[en|de]crypt1
sub aesni_inline_generate1
{ my $p=shift;

    &function_begin_B("_aesni_${p}rypt1");
    &$movekey		($rndkey0,&QWP(0,$key));
    &$movekey		($rndkey1,&QWP(16,$key));
	&lea		($key,&DWP(16,$key));
    &lea		($key,&DWP(32,$key));
    &pxor		($inout0,$rndkey0);
	&dec		($rounds);
    &set_label("${p}1_loop",16);
    &set_label("${p}1_loop");
	eval"&aes${p}	($inout0,$rndkey1)";
	&dec		($rounds);
	&lea		($key,&DWP(16,$key));
	&$movekey	($rndkey1,&QWP(0,$key));
	&lea		($key,&DWP(16,$key));
    &jnz		(&label("${p}1_loop"));
    eval"&aes${p}last	($inout0,$rndkey1)";
    &ret();
    &function_end_B("_aesni_${p}rypt1");
}

sub aesni_generate1	# fully unrolled loop
@@ -67,7 +64,7 @@ sub aesni_generate1 # fully unrolled loop
    &function_begin_B("_aesni_${p}rypt1");
	&$movekey	($rndkey0,&QWP(0,$key));
	&$movekey	($rndkey1,&QWP(0x10,$key));
	&cmp		($rounds,12);
	&cmp		($rounds,11);
	&pxor		($inout0,$rndkey0);
	&$movekey	($rndkey0,&QWP(0x20,$key));
	&lea		($key,&DWP(0x30,$key));
@@ -107,52 +104,52 @@ sub aesni_generate1 # fully unrolled loop
    &function_end_B("_aesni_${p}rypt1");
}

&aesni_generate1("enc");
# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
# &aesni_generate1("dec");
&function_begin_B("${PREFIX}_encrypt");
	&mov	("eax",&wparam(0));
	&mov	($key,&wparam(2));
	&movups	($inout0,&QWP(0,"eax"));
	&mov	($rounds,&DWP(240,$key));
	&mov	("eax",&wparam(1));
	&call	("_aesni_encrypt1");
	&aesni_inline_generate1("enc");	# &call	("_aesni_encrypt1");
	&movups	(&QWP(0,"eax"),$inout0);
	&ret	();
&function_end_B("${PREFIX}_encrypt");

&aesni_generate1("dec");
# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
# &aesni_generate1("dec");
&function_begin_B("${PREFIX}_decrypt");
	&mov	("eax",&wparam(0));
	&mov	($key,&wparam(2));
	&movups	($inout0,&QWP(0,"eax"));
	&mov	($rounds,&DWP(240,$key));
	&mov	("eax",&wparam(1));
	&call	("_aesni_decrypt1");
	&aesni_inline_generate1("dec");	# &call	("_aesni_decrypt1");
	&movups	(&QWP(0,"eax"),$inout0);
	&ret	();
&function_end_B("${PREFIX}_decrypt");

# _aesni_[en|de]crypt3 are private interfaces, 3 denotes interleave
# factor. Why 3x? Even though aes[enc|dec] latency is 6, it turned
# out that it can be scheduled only every *second* cycle. Thus 3x
# interleave is the one providing optimal utilization, i.e. when
# subroutine's throughput is virtually same as of non-interleaved
# subroutine for number of input blocks up to 3. This is why it
# handles even double-block inputs. Larger interleave factor would
# perform suboptimally on shorter inputs... 


# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave
# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec]
# latency is 6, it turned out that it can be scheduled only every
# *second* cycle. Thus 3x interleave is the one providing optimal
# utilization, i.e. when subroutine's throughput is virtually same as
# of non-interleaved subroutine [for number of input blocks up to 3].
# This is why it makes no sense to implement 2x subroutine. As soon
# as/if Intel improves throughput by making it possible to schedule
# the instructions in question *every* cycles I would have to
# implement 6x interleave and use it in loop...
sub aesni_generate3
{ my $p=shift;

    &function_begin_B("_aesni_${p}rypt3");
	&$movekey	($rndkey0,&QWP(0,$key));
	&$movekey	($rndkey1,&QWP(16,$key));
	&shr		($rounds,1);
	&$movekey	($rndkey1,&QWP(16,$key));
	&lea		($key,&DWP(32,$key));
	&pxor		($inout0,$rndkey0);
	&pxor		($inout1,$rndkey0);
	&dec		($rounds);
	&pxor		($inout2,$rndkey0);
	&jmp		(&label("${p}3_loop"));
    &set_label("${p}3_loop",16);
@@ -177,14 +174,59 @@ sub aesni_generate3
    &ret();
    &function_end_B("_aesni_${p}rypt3");
}

# 4x interleave is implemented to improve small block performance,
# most notably [and naturally] 4 block by ~30%. One can argue that one
# should have implemented 5x as well, but improvement  would be <20%,
# so it's not worth it...
sub aesni_generate4
{ my $p=shift;

    &function_begin_B("_aesni_${p}rypt4");
	&$movekey	($rndkey0,&QWP(0,$key));
	&$movekey	($rndkey1,&QWP(16,$key));
	&shr		($rounds,1);
	&lea		($key,&DWP(32,$key));
	&pxor		($inout0,$rndkey0);
	&pxor		($inout1,$rndkey0);
	&pxor		($inout2,$rndkey0);
	&pxor		($inout3,$rndkey0);
	&jmp		(&label("${p}3_loop"));
    &set_label("${p}3_loop",16);
	eval"&aes${p}	($inout0,$rndkey1)";
	&$movekey	($rndkey0,&QWP(0,$key));
	eval"&aes${p}	($inout1,$rndkey1)";
	&dec		($rounds);
	eval"&aes${p}	($inout2,$rndkey1)";
	eval"&aes${p}	($inout3,$rndkey1)";
	&$movekey	($rndkey1,&QWP(16,$key));
	eval"&aes${p}	($inout0,$rndkey0)";
	&lea		($key,&DWP(32,$key));
	eval"&aes${p}	($inout1,$rndkey0)";
	eval"&aes${p}	($inout2,$rndkey0)";
	eval"&aes${p}	($inout3,$rndkey0)";
	&jnz		(&label("${p}3_loop"));
    eval"&aes${p}	($inout0,$rndkey1)";
    &$movekey		($rndkey0,&QWP(0,$key));
    eval"&aes${p}	($inout1,$rndkey1)";
    eval"&aes${p}	($inout2,$rndkey1)";
    eval"&aes${p}	($inout3,$rndkey1)";
    eval"&aes${p}last	($inout0,$rndkey0)";
    eval"&aes${p}last	($inout1,$rndkey0)";
    eval"&aes${p}last	($inout2,$rndkey0)";
    eval"&aes${p}last	($inout3,$rndkey0)";
    &ret();
    &function_end_B("_aesni_${p}rypt4");
}
&aesni_generate3("enc") if ($PREFIX eq "aesni");
&aesni_generate3("dec");
&aesni_generate4("enc") if ($PREFIX eq "aesni");
&aesni_generate4("dec");

if ($PREFIX eq "aesni") {
# void aesni_ecb_encrypt (const void *in, void *out,
#                         size_t length, const AES_KEY *key,
#                         int enc);

&function_begin("aesni_ecb_encrypt");
	&mov	($inp,&wparam(0));
	&mov	($out,&wparam(1));
@@ -200,79 +242,121 @@ if ($PREFIX eq "aesni") {
	&mov	($rounds_,$rounds);	# backup $rounds
	&jz	(&label("ecb_decrypt"));

	&sub	($len,0x30);
	&jc	(&label("ecb_enc_tail"));
	jmp	(&label("ecb_enc_loop3"));
	&sub	($len,0x40);
	&jbe	(&label("ecb_enc_tail"));
	&jmp	(&label("ecb_enc_loop3"));

&set_label("ecb_enc_loop3",16);
	&movups	($inout0,&QWP(0,$inp));
	&movups	($inout1,&QWP(0x10,$inp));
	&movups	($inout2,&QWP(0x20,$inp));
	&lea	($inp,&DWP(0x30,$inp));
	&call	("_aesni_encrypt3");
	&movups	(&QWP(0,$out),$inout0);
	&sub	($len,0x30);
	&movups	(&QWP(0x10,$out),$inout1);
	&lea	($inp,&DWP(0x30,$inp));
	&lea	($out,&DWP(0x30,$out));
	&movups	(&QWP(-0x30,$out),$inout0);
	&mov	($key,$key_);		# restore $key
	&movups	(&QWP(0x20,$out),$inout2);
	&movups	(&QWP(-0x20,$out),$inout1);
	&mov	($rounds,$rounds_);	# restore $rounds
	&lea	($out,&DWP(0x30,$out));
	&jnc	(&label("ecb_enc_loop3"));
	&movups	(&QWP(-0x10,$out),$inout2);
	&ja	(&label("ecb_enc_loop3"));

&set_label("ecb_enc_tail");
	&add	($len,0x30);
	&add	($len,0x40);
	&jz	(&label("ecb_ret"));

	&cmp	($len,0x10);
	&movups	($inout0,&QWP(0,$inp));
	je	(&label("ecb_enc_one"));
	&je	(&label("ecb_enc_one"));
	&cmp	($len,0x20);
	&movups	($inout1,&QWP(0x10,$inp));
	&call	("_aesni_encrypt3");
	&je	(&label("ecb_enc_two"));
	&cmp	($len,0x30);
	&movups	($inout2,&QWP(0x20,$inp));
	&je	(&label("ecb_enc_three"));
	&movups	($inout3,&QWP(0x30,$inp));
	&call	("_aesni_encrypt4");
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(0x10,$out),$inout1);
	&movups	(&QWP(0x20,$out),$inout2);
	&movups	(&QWP(0x30,$out),$inout3);
	jmp	(&label("ecb_ret"));

&set_label("ecb_enc_one",16);
	&call	("_aesni_encrypt1");
	&aesni_inline_generate1("enc");	# &call	("_aesni_encrypt1");
	&movups	(&QWP(0,$out),$inout0);
	&jmp	(&label("ecb_ret"));

&set_label("ecb_enc_two",16);
	&call	("_aesni_encrypt3");
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(0x10,$out),$inout1);
	&jmp	(&label("ecb_ret"));

&set_label("ecb_enc_three",16);
	&call	("_aesni_encrypt3");
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(0x10,$out),$inout1);
	&movups	(&QWP(0x20,$out),$inout2);
	&jmp	(&label("ecb_ret"));

&set_label("ecb_decrypt",16);
	&sub	($len,0x30);
	&jc	(&label("ecb_dec_tail"));
	jmp	(&label("ecb_dec_loop3"));
	&sub	($len,0x40);
	&jbe	(&label("ecb_dec_tail"));
	&jmp	(&label("ecb_dec_loop3"));

&set_label("ecb_dec_loop3",16);
	&movups	($inout0,&QWP(0,$inp));
	&movups	($inout1,&QWP(0x10,$inp));
	&movups	($inout2,&QWP(0x20,$inp));
	&call	("_aesni_decrypt3");
	&movups	(&QWP(0,$out),$inout0);
	&sub	($len,0x30);
	&lea	($inp,&DWP(0x30,$inp));
	&movups	(&QWP(0x10,$out),$inout1);
	&lea	($out,&DWP(0x30,$out));
	&movups	(&QWP(-0x30,$out),$inout0);
	&mov	($key,$key_);		# restore $key
	&movups	(&QWP(0x20,$out),$inout2);
	&movups	(&QWP(-0x20,$out),$inout1);
	&mov	($rounds,$rounds_);	# restore $rounds
	&lea	($out,&DWP(0x30,$out));
	&jnc	(&label("ecb_dec_loop3"));
	&movups	(&QWP(-0x10,$out),$inout2);
	&ja	(&label("ecb_dec_loop3"));

&set_label("ecb_dec_tail");
	&add	($len,0x30);
	&add	($len,0x40);
	&jz	(&label("ecb_ret"));

	&cmp	($len,0x10);
	&movups	($inout0,&QWP(0,$inp));
	je	(&label("ecb_dec_one"));
	&je	(&label("ecb_dec_one"));
	&cmp	($len,0x20);
	&movups	($inout1,&QWP(0x10,$inp));
	&call	("_aesni_decrypt3");
	&je	(&label("ecb_dec_two"));
	&cmp	($len,0x30);
	&movups	($inout2,&QWP(0x20,$inp));
	&je	(&label("ecb_dec_three"));
	&movups	($inout3,&QWP(0x30,$inp));
	&call	("_aesni_decrypt4");
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(0x10,$out),$inout1);
	jmp	(&label("ecb_ret"));
	&movups	(&QWP(0x20,$out),$inout2);
	&movups	(&QWP(0x30,$out),$inout3);
	&jmp	(&label("ecb_ret"));

&set_label("ecb_dec_one",16);
	&call	("_aesni_decrypt1");
	&aesni_inline_generate1("dec");	# &call	("_aesni_decrypt3");
	&movups	(&QWP(0,$out),$inout0);
	&jmp	(&label("ecb_ret"));

&set_label("ecb_dec_two",16);
	&call	("_aesni_decrypt3");
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(0x10,$out),$inout1);
	&jmp	(&label("ecb_ret"));

&set_label("ecb_dec_three",16);
	&call	("_aesni_decrypt3");
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(0x10,$out),$inout1);
	&movups	(&QWP(0x20,$out),$inout2);

&set_label("ecb_ret");
&function_end("aesni_ecb_encrypt");
@@ -288,7 +372,7 @@ if ($PREFIX eq "aesni") {
	&mov	($key,&wparam(3));
	&test	($len,$len);
	&mov	($key_,&wparam(4));
	&je	(&label("cbc_ret"));
	&jz	(&label("cbc_ret"));

	&cmp	(&wparam(5),0);
	&movups	($ivec,&QWP(0,$key_));	# load IV
@@ -307,12 +391,12 @@ if ($PREFIX eq "aesni") {
	&movups	($ivec,&QWP(0,$inp));
	&lea	($inp,&DWP(16,$inp));
	&pxor	($inout0,$ivec);
	&call	("_aesni_encrypt1");
	&aesni_inline_generate1("enc");	# &call	("_aesni_encrypt3");
	&sub	($len,16);
	&lea	($out,&DWP(16,$out));
	&mov	($rounds,$rounds_);	# restore $rounds
	&mov	($key,$key_);		# restore $key
	&movups	(&QWP(0,$out),$inout0);
	&lea	($out,&DWP(16,$out));
	&movups	(&QWP(-16,$out),$inout0);
	&jnc	(&label("cbc_enc_loop"));
	&add	($len,16);
	&jnz	(&label("cbc_enc_tail"));
@@ -333,8 +417,8 @@ if ($PREFIX eq "aesni") {
	&jmp	(&label("cbc_enc_loop"));

&set_label("cbc_decrypt",16);
	&sub	($len,0x30);
	&jc	(&label("cbc_dec_tail"));
	&sub	($len,0x40);
	&jbe	(&label("cbc_dec_tail"));
	&jmp	(&label("cbc_dec_loop3"));

&set_label("cbc_dec_loop3",16);
@@ -346,20 +430,20 @@ if ($PREFIX eq "aesni") {
	&call	("_aesni_decrypt3");
	&sub	($len,0x30);
	&lea	($inp,&DWP(0x30,$inp));
	&lea	($out,&DWP(0x30,$out));
	&pxor	($inout0,$ivec);
	&pxor	($inout1,$in0);
	&movups	($ivec,&QWP(-0x10,$inp));
	&pxor	($inout2,$in1);
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(-0x30,$out),$inout0);
	&mov	($rounds,$rounds_)	# restore $rounds
	&movups	(&QWP(0x10,$out),$inout1);
	&movups	(&QWP(-0x20,$out),$inout1);
	&mov	($key,$key_);		# restore $key
	&movups	(&QWP(0x20,$out),$inout2);
	&lea	($out,&DWP(0x30,$out));
	&jnc	(&label("cbc_dec_loop3"));
	&movups	(&QWP(-0x10,$out),$inout2);
	&ja	(&label("cbc_dec_loop3"));

&set_label("cbc_dec_tail");
	&add	($len,0x30);
	&add	($len,0x40);
	&jz	(&label("cbc_ret"));

	&movups	($inout0,&QWP(0,$inp));
@@ -371,19 +455,26 @@ if ($PREFIX eq "aesni") {
	&movaps	($in1,$inout1);
	&jbe	(&label("cbc_dec_two"));
	&movups	($inout2,&QWP(0x20,$inp));
	&call	("_aesni_decrypt3");
	&cmp	($len,0x30);
	&jbe	(&label("cbc_dec_three"));
	&movups	($inout3,&QWP(0x30,$inp));
	&call	("_aesni_decrypt4");
	&movups	($rndkey0,&QWP(0x10,$inp));
	&movups	($rndkey1,&QWP(0x20,$inp));
	&pxor	($inout0,$ivec);
	&movups	($ivec,&QWP(0x20,$inp));
	&pxor	($inout1,$in0);
	&pxor	($inout2,$in1);
	&movups	($ivec,&QWP(0x30,$inp));
	&movups	(&QWP(0,$out),$inout0);
	&pxor	($inout2,$rndkey0);
	&pxor	($inout3,$rndkey1);
	&movups	(&QWP(0x10,$out),$inout1);
	&movaps	($inout0,$inout2);
	&lea	($out,&DWP(0x20,$out));
	&movups	(&QWP(0x20,$out),$inout2);
	&movaps	($inout0,$inout3);
	&lea	($out,&DWP(0x30,$out));
	&jmp	(&label("cbc_dec_tail_collected"));

&set_label("cbc_dec_one");
	&call	("_aesni_decrypt1");
	&aesni_inline_generate1("dec");	# &call	("_aesni_decrypt3");
	&pxor	($inout0,$ivec);
	&movaps	($ivec,$in0);
	&jmp	(&label("cbc_dec_tail_collected"));
@@ -396,6 +487,18 @@ if ($PREFIX eq "aesni") {
	&movaps	($inout0,$inout1);
	&movaps	($ivec,$in1);
	&lea	($out,&DWP(0x10,$out));
	&jmp	(&label("cbc_dec_tail_collected"));

&set_label("cbc_dec_three");
	&call	("_aesni_decrypt3");
	&pxor	($inout0,$ivec);
	&pxor	($inout1,$in0);
	&pxor	($inout2,$in1);
	&movups	(&QWP(0,$out),$inout0);
	&movups	(&QWP(0x10,$out),$inout1);
	&movaps	($inout0,$inout2);
	&movups	($ivec,&QWP(0x20,$inp));
	&lea	($out,&DWP(0x20,$out));

&set_label("cbc_dec_tail_collected");
	&and	($len,15);
@@ -446,7 +549,7 @@ if ($PREFIX eq "aesni") {
	&jne	(&label("bad_keybits"));

&set_label("10rounds",16);
	&mov		($rounds,10);
	&mov		($rounds,9);
	&$movekey	(&QWP(-16,$key),"xmm0");	# round 0
	&aeskeygenassist("xmm1","xmm0",0x01);		# round 1
	&call		(&label("key_128_cold"));
@@ -487,7 +590,7 @@ if ($PREFIX eq "aesni") {

&set_label("12rounds",16);
	&movq		("xmm2",&QWP(16,"eax"));	# remaining 1/3 of *userKey
	&mov		($rounds,12);
	&mov		($rounds,11);
	&$movekey	(&QWP(-16,$key),"xmm0")		# round 0
	&aeskeygenassist("xmm1","xmm2",0x01);		# round 1,2
	&call		(&label("key_192a_cold"));
@@ -540,7 +643,7 @@ if ($PREFIX eq "aesni") {

&set_label("14rounds",16);
	&movups		("xmm2",&QWP(16,"eax"));	# remaining half of *userKey
	&mov		($rounds,14);
	&mov		($rounds,13);
	&lea		($key,&DWP(16,$key));
	&$movekey	(&QWP(-32,$key),"xmm0");	# round 0
	&$movekey	(&QWP(-16,$key),"xmm2");	# round 1
@@ -625,10 +728,10 @@ if ($PREFIX eq "aesni") {
	&mov	($key,&wparam(2));
	&call	("_aesni_set_encrypt_key");
	&mov	($key,&wparam(2));
	&shl	($rounds,4)	# actually rounds after _aesni_set_encrypt_key
	&shl	($rounds,4)	# rounds-1 after _aesni_set_encrypt_key
	&test	("eax","eax");
	&jnz	(&label("dec_key_ret"));
	&lea	("eax",&DWP(0,$key,$rounds));	# end of key schedule
	&lea	("eax",&DWP(16,$key,$rounds));	# end of key schedule

	&$movekey	("xmm0",&QWP(0,$key));	# just swap
	&$movekey	("xmm1",&QWP(0,"eax"));
@@ -636,9 +739,8 @@ if ($PREFIX eq "aesni") {
	&$movekey	(&QWP(0,$key),"xmm1");
	&lea		($key,&DWP(16,$key));
	&lea		("eax",&DWP(-16,"eax"));
	&jmp		(&label("dec_key_inverse"));

&set_label("dec_key_inverse",16);
&set_label("dec_key_inverse");
	&$movekey	("xmm0",&QWP(0,$key));	# swap and inverse
	&$movekey	("xmm1",&QWP(0,"eax"));
	&aesimc		("xmm0","xmm0");
+432 −404

File changed.

Preview size limit exceeded, changes collapsed.

+9 −8
Original line number Diff line number Diff line
@@ -147,8 +147,9 @@ static int aesni_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
	((void *)(((unsigned long)(x)+AESNI_MIN_ALIGN-1)&~(AESNI_MIN_ALIGN-1)))

/* Engine names */
static const char *aesni_id = "aesni";
static const char *aesni_name = "Intel AES-NI engine";
static const char   aesni_id[] = "aesni",
		    aesni_name[] = "Intel AES-NI engine",
		    no_aesni_name[] = "Intel AES-NI engine (no-aesni)";

/* ===== Engine "management" functions ===== */

@@ -156,15 +157,15 @@ static const char *aesni_name = "Intel AES-NI engine";
static int
aesni_bind_helper(ENGINE *e)
{
	if (!(OPENSSL_ia32cap_P[1] & (1UL << (57-32))))
		return 0;
	int engage = (OPENSSL_ia32cap_P[1] & (1 << (57-32))) != 0;

	/* Register everything or return with an error */
	if (!ENGINE_set_id(e, aesni_id) ||
	    !ENGINE_set_name(e, aesni_name) ||
	    !ENGINE_set_name(e, engage ? aesni_name : no_aesni_name) ||

	    !ENGINE_set_init_function(e, aesni_init) ||
	    !ENGINE_set_ciphers (e, aesni_ciphers))
	    (engage && !ENGINE_set_ciphers (e, aesni_ciphers))
	    )
		return 0;

	/* Everything looks good */
@@ -286,14 +287,14 @@ static int aesni_cipher_cfb(EVP_CIPHER_CTX *ctx, unsigned char *out,
{	AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
	CRYPTO_cfb128_encrypt(in, out, inl, key, ctx->iv,
				&ctx->num, ctx->encrypt,
				aesni_encrypt);
				(block128_f)aesni_encrypt);
	return 1;
}
static int aesni_cipher_ofb(EVP_CIPHER_CTX *ctx, unsigned char *out,
		 const unsigned char *in, size_t inl)
{	AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
	CRYPTO_ofb128_encrypt(in, out, inl, key, ctx->iv,
				&ctx->num, aesni_encrypt);
				&ctx->num, (block128_f)aesni_encrypt);
	return 1;
}

+4 −8
Original line number Diff line number Diff line
@@ -14,23 +14,19 @@ else
    exit 1;
fi

if $PROG engine aesni | grep aesni; then
if $PROG engine aesni | grep -v no-aesni; then

    HASH=`cat $PROG | $PROG dgst -hex`

    ACE_ALGS="	aes-128-ecb aes-192-ecb aes-256-ecb \
    AES_ALGS="	aes-128-ecb aes-192-ecb aes-256-ecb \
		aes-128-cbc aes-192-cbc aes-256-cbc \
		aes-128-cfb aes-192-cfb aes-256-cfb \
		aes-128-ofb aes-192-ofb aes-256-ofb"
    BUFSIZE="16 32 48 64 80 96 128 999"
    ACE_ALGS="	aes-128-cbc aes-192-cbc aes-256-cbc \
		aes-128-cfb aes-192-cfb aes-256-cfb \
		aes-128-ofb aes-192-ofb aes-256-ofb"
    BUFSIZE="48 64 80 96 128 999"
    BUFSIZE="16 32 48 64 80 96 128 144 999"

    nerr=0

    for alg in $ACE_ALGS; do
    for alg in $AES_ALGS; do
	echo $alg
	for bufsize in $BUFSIZE; do
	    TEST=`(	cat $PROG | \