Commit ae007d4d authored by Andy Polyakov's avatar Andy Polyakov
Browse files

wp-mmx.pl: ~10% performance improvement.

parent 660164a9
Loading
Loading
Loading
Loading
+22 −20
Original line number Diff line number Diff line
@@ -118,34 +118,36 @@ $tbl="ebp";
	&movq	(@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8));	# rc[r]
	&mov	("eax",&DWP(0,"esp"));
	&mov	("ebx",&DWP(4,"esp"));
	&movz	("ecx",&LB("eax"));
	&movz	("edx",&HB("eax"));
for($i=0;$i<8;$i++) {
    my $func = ($i==0)? \&movq : \&pxor;
	&movb	(&LB("ecx"),&LB("eax"));
	&movb	(&LB("edx"),&HB("eax"));
	&shr	("eax",16);
	&scale	("esi","ecx");
	&movz	("ecx",&LB("eax"));
	&scale	("edi","edx");
	&shr	("eax",16);
	&movz	("edx",&HB("eax"));
	&pxor	(@mm[0],&QWP(&row(0),$tbl,"esi",8));
	&$func	(@mm[1],&QWP(&row(1),$tbl,"edi",8));
	&movb	(&LB("ecx"),&LB("eax"));
	&movb	(&LB("edx"),&HB("eax"));
	&mov	("eax",&DWP(($i+1)*8,"esp"));
	&scale	("esi","ecx");
	&movz	("ecx",&LB("ebx"));
	&scale	("edi","edx");
	&movz	("edx",&HB("ebx"));
	&$func	(@mm[2],&QWP(&row(2),$tbl,"esi",8));
	&$func	(@mm[3],&QWP(&row(3),$tbl,"edi",8));
	&movb	(&LB("ecx"),&LB("ebx"));
	&movb	(&LB("edx"),&HB("ebx"));
	&shr	("ebx",16);
	&scale	("esi","ecx");
	&movz	("ecx",&LB("ebx"));
	&scale	("edi","edx");
	&shr	("ebx",16);
	&movz	("edx",&HB("ebx"));
	&$func	(@mm[4],&QWP(&row(4),$tbl,"esi",8));
	&$func	(@mm[5],&QWP(&row(5),$tbl,"edi",8));
	&movb	(&LB("ecx"),&LB("ebx"));
	&movb	(&LB("edx"),&HB("ebx"));
	&mov	("ebx",&DWP(($i+1)*8+4,"esp"));
	&scale	("esi","ecx");
	&movz	("ecx",&LB("eax"));
	&scale	("edi","edx");
	&movz	("edx",&HB("eax"));
	&$func	(@mm[6],&QWP(&row(6),$tbl,"esi",8));
	&$func	(@mm[7],&QWP(&row(7),$tbl,"edi",8));
    push(@mm,shift(@mm));
@@ -154,32 +156,32 @@ for($i=0;$i<8;$i++) {
	for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); }    # K=L

for($i=0;$i<8;$i++) {
	&movb	(&LB("ecx"),&LB("eax"));
	&movb	(&LB("edx"),&HB("eax"));
	&shr	("eax",16);
	&scale	("esi","ecx");
	&movz	("ecx",&LB("eax"));
	&scale	("edi","edx");
	&shr	("eax",16);
	&movz	("edx",&HB("eax"));
	&pxor	(@mm[0],&QWP(&row(0),$tbl,"esi",8));
	&pxor	(@mm[1],&QWP(&row(1),$tbl,"edi",8));
	&movb	(&LB("ecx"),&LB("eax"));
	&movb	(&LB("edx"),&HB("eax"));
	&mov	("eax",&DWP(64+($i+1)*8,"esp"))		if ($i<7);
	&scale	("esi","ecx");
	&movz	("ecx",&LB("ebx"));
	&scale	("edi","edx");
	&movz	("edx",&HB("ebx"));
	&pxor	(@mm[2],&QWP(&row(2),$tbl,"esi",8));
	&pxor	(@mm[3],&QWP(&row(3),$tbl,"edi",8));
	&movb	(&LB("ecx"),&LB("ebx"));
	&movb	(&LB("edx"),&HB("ebx"));
	&shr	("ebx",16);
	&scale	("esi","ecx");
	&movz	("ecx",&LB("ebx"));
	&scale	("edi","edx");
	&shr	("ebx",16);
	&movz	("edx",&HB("ebx"));
	&pxor	(@mm[4],&QWP(&row(4),$tbl,"esi",8));
	&pxor	(@mm[5],&QWP(&row(5),$tbl,"edi",8));
	&movb	(&LB("ecx"),&LB("ebx"));
	&movb	(&LB("edx"),&HB("ebx"));
	&mov	("ebx",&DWP(64+($i+1)*8+4,"esp"))	if ($i<7);
	&scale	("esi","ecx");
	&movz	("ecx",&LB("eax"));
	&scale	("edi","edx");
	&movz	("edx",&HB("eax"));
	&pxor	(@mm[6],&QWP(&row(6),$tbl,"esi",8));
	&pxor	(@mm[7],&QWP(&row(7),$tbl,"edi",8));
    push(@mm,shift(@mm));