Commit 6c8ce3c2 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

e_padlock-x86[_64].pl: protection against prefetch errata.

parent 3231e42d
Loading
Loading
Loading
Loading
+32 −8
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@ require "x86asm.pl";

&asm_init($ARGV[0],$0);

%PADLOCK_MARGIN=(ecb=>128, cbc=>64); # prefetch errata
$PADLOCK_CHUNK=512;	# Must be a power of 2 larger than 16

$ctx="edx";
@@ -187,6 +188,10 @@ my ($mode,$opcode) = @_;
	&movq	("mm0",&QWP(-16,$ctx));	# load [upper part of] counter
					} else {
	&xor	("ebx","ebx");
    if ($PADLOCK_MARGIN{$mode}) {
	&cmp	($len,$PADLOCK_MARGIN{$mode});
	&jbe	(&label("${mode}_short"));
    }
	&test	(&DWP(0,$ctx),1<<5);	# align bit in control word
	&jnz	(&label("${mode}_aligned"));
	&test	($out,0x0f);
@@ -285,20 +290,39 @@ my ($mode,$opcode) = @_;
	&mov	($chunk,$PADLOCK_CHUNK);
	&jnz	(&label("${mode}_loop"));
						if ($mode ne "ctr32") {
	&test	($out,0x0f);			# out_misaligned
	&jz	(&label("${mode}_done"));
	&cmp	("esp","ebp");
	&je	(&label("${mode}_done"));
						}
	&mov	($len,"ebp");
	&mov	($out,"esp");
	&sub	($len,"esp");
	&xor	("eax","eax");
	&shr	($len,2);
	&data_byte(0xf3,0xab);			# rep stosl
	&pxor	("xmm0","xmm0");
	&lea	("eax",&DWP(0,"esp"));
&set_label("${mode}_bzero");
	&movaps	(&QWP(0,"eax"),"xmm0");
	&lea	("eax",&DWP(16,"eax"));
	&cmp	("ebp","eax");
	&ja	(&label("${mode}_bzero"));

&set_label("${mode}_done");
	&lea	("esp",&DWP(24,"ebp"));
						if ($mode ne "ctr32") {
	&jmp	(&label("${mode}_exit"));

&set_label("${mode}_short",16);
	&xor	("eax","eax");
	&lea	("ebp",&DWP(-24,"esp"));
	&sub	("eax",$len);
	&lea	("esp",&DWP(0,"eax","ebp"));
	&and	("esp",-16);
	&xor	($chunk,$chunk);
&set_label("${mode}_short_copy");
	&movups	("xmm0",&QWP(0,$inp,$chunk));
	&lea	($chunk,&DWP(16,$chunk));
	&cmp	($len,$chunk);
	&movaps	(&QWP(-16,"esp",$chunk),"xmm0");
	&ja	(&label("${mode}_short_copy"));
	&mov	($inp,"esp");
	&mov	($chunk,$len);
	&jmp	(&label("${mode}_loop"));

&set_label("${mode}_aligned",16);
	&lea	("eax",&DWP(-16,$ctx));		# ivp
	&lea	("ebx",&DWP(16,$ctx));		# key
+41 −9
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";

$code=".text\n";

%PADLOCK_MARGIN=(ecb=>128, cbc=>64, ctr32=>64);	# prefetch errata
$PADLOCK_CHUNK=512;	# Must be a power of 2 between 32 and 2^20

$ctx="%rdx";
@@ -284,6 +285,17 @@ padlock_${mode}_encrypt:
	lea	16($ctx),$ctx		# control word
	xor	%eax,%eax
	xor	%ebx,%ebx
___
# Formally speaking correct condtion is $len<=$margin and $inp+$margin
# crosses page boundary [and next page is unreadable]. But $inp can
# be unaligned in which case data can be copied to $out if latter is
# aligned, in which case $out+$margin has to be checked. Covering all
# cases appears more complicated than just copying short input...
$code.=<<___	if ($PADLOCK_MARGIN{$mode});
	cmp	\$$PADLOCK_MARGIN{$mode},$len
	jbe	.L${mode}_short
___
$code.=<<___;
	testl	\$`1<<5`,($ctx)		# align bit in control word
	jnz	.L${mode}_aligned
	test	\$0x0f,$out
@@ -305,6 +317,7 @@ padlock_${mode}_encrypt:
	lea	(%rax,%rbp),%rsp
___
$code.=<<___				if ($mode eq "ctr32");
.L${mode}_reenter:
	mov	-4($ctx),%eax		# pull 32-bit counter
	bswap	%eax
	neg	%eax
@@ -373,19 +386,38 @@ $code.=<<___;
	mov	\$$PADLOCK_CHUNK,$chunk
	jnz	.L${mode}_loop

	test	\$0x0f,$out
	jz	.L${mode}_done
	cmp	%rsp,%rbp
	je	.L${mode}_done

	pxor	%xmm0,%xmm0
	lea	(%rsp),%rax
.L${mode}_bzero:
	movaps	%xmm0,(%rax)
	lea	16(%rax),%rax
	cmp	%rax,%rbp
	ja	.L${mode}_bzero

	mov	%rbp,$len
	mov	%rsp,$out
	sub	%rsp,$len
	xor	%rax,%rax
	shr	\$3,$len
	.byte	0xf3,0x48,0xab		# rep stosq
.L${mode}_done:
	lea	(%rbp),%rsp
	jmp	.L${mode}_exit

___
$code.=<<___ if ($PADLOCK_MARGIN{$mode});
.align	16
.L${mode}_short:
	mov	%rsp,%rbp
	sub	$len,%rsp
	xor	$chunk,$chunk
.L${mode}_short_copy:
	movups	($inp,$chunk),%xmm0
	lea	16($chunk),$chunk
	cmp	$chunk,$len
	movaps	%xmm0,-16(%rsp,$chunk)
	ja	.L${mode}_short_copy
	mov	%rsp,$inp
	mov	$len,$chunk
	jmp	.L${mode}_`${mode} eq "ctr32"?"reenter":"loop"`
___
$code.=<<___;
.align	16
.L${mode}_aligned:
___