Commit 28754624 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

Reserve for assembler implementation of RC4_set_key and implement x86 one.

parent a1d91599
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -120,8 +120,8 @@ my $x86_elf_asm="x86cpuid-elf.o:bn86-elf.o co86-elf.o mo86-elf.o:dx86-elf.o yx86
my $x86_coff_asm="x86cpuid-cof.o:bn86-cof.o co86-cof.o mo86-cof.o:dx86-cof.o yx86-cof.o:ax86-cof.o:bx86-cof.o:mx86-cof.o:sx86-cof.o s512sse2-cof.o:cx86-cof.o:rx86-cof.o:rm86-cof.o:r586-cof.o:wp_block.o w86mmx-cof.o";
my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o mo86-out.o:dx86-out.o yx86-out.o:ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o s512sse2-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o:wp_block.o w86mmx-out.o";

my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o";
my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o:::";
my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4_skey.o:::wp-x86_64.o";
my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::";
my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::::::::";
my $alpha_asm=":bn_asm.o alpha-mont.o::::::::::";
my $no_asm=":::::::::::";
@@ -599,7 +599,7 @@ my $des_enc="des_enc.o fcrypt_b.o";
my $aes_enc="aes_core.o aes_cbc.o";
my $bf_enc	="bf_enc.o";
my $cast_enc="c_enc.o";
my $rc4_enc="rc4_enc.o";
my $rc4_enc="rc4_enc.o rc4_skey.o";
my $rc5_enc="rc5_enc.o";
my $md5_obj="";
my $sha1_obj="";
+12 −12
Original line number Diff line number Diff line
@@ -128,7 +128,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -268,7 +268,7 @@ $bf_obj =
$md5_obj      = md5-x86_64.o
$sha1_obj     = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
$cast_obj     = 
$rc4_obj      = rc4-x86_64.o
$rc4_obj      = rc4-x86_64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = wp-x86_64.o
@@ -2172,7 +2172,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -2200,7 +2200,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -2396,7 +2396,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -2424,7 +2424,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -2956,7 +2956,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -2984,7 +2984,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -3012,7 +3012,7 @@ $bf_obj =
$md5_obj      = md5-ia64.o
$sha1_obj     = sha1-ia64.o sha256-ia64.o sha512-ia64.o
$cast_obj     = 
$rc4_obj      = rc4-ia64.o
$rc4_obj      = rc4-ia64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
@@ -3152,7 +3152,7 @@ $bf_obj =
$md5_obj      = md5-x86_64.o
$sha1_obj     = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
$cast_obj     = 
$rc4_obj      = rc4-x86_64.o
$rc4_obj      = rc4-x86_64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = wp-x86_64.o
@@ -3964,7 +3964,7 @@ $bf_obj =
$md5_obj      = md5-x86_64.o
$sha1_obj     = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
$cast_obj     = 
$rc4_obj      = rc4-x86_64.o
$rc4_obj      = rc4-x86_64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = wp-x86_64.o
@@ -3992,7 +3992,7 @@ $bf_obj =
$md5_obj      = md5-x86_64.o
$sha1_obj     = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
$cast_obj     = 
$rc4_obj      = rc4-x86_64.o
$rc4_obj      = rc4-x86_64.o rc4_skey.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = wp-x86_64.o
+2 −2
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ INCLUDES=
CFLAG=-g
AR=		ar r

RC4_ENC=rc4_enc.o
RC4_ENC=rc4_enc.o rc4_skey.o

CFLAGS= $(INCLUDES) $(CFLAG)
ASFLAGS= $(INCLUDES) $(ASFLAG)
@@ -22,7 +22,7 @@ APPS=

LIB=$(TOP)/libcrypto.a
LIBSRC=rc4_skey.c rc4_enc.c
LIBOBJ=rc4_skey.o $(RC4_ENC)
LIBOBJ=$(RC4_ENC)

SRC= $(LIBSRC)

+103 −8
Original line number Diff line number Diff line
@@ -36,10 +36,6 @@ $in="esi";
$out="edi";
$d="ebp";

&RC4("RC4");

&asm_finish();

sub RC4_loop
	{
	local($n,$p,$char)=@_;
@@ -99,11 +95,10 @@ sub RC4_loop
	}


sub RC4
&function_begin_B("RC4");
	{
	local($name)=@_;

	&function_begin_B($name,"");

	&mov($ty,&wparam(1));		# len
	&cmp($ty,0);
@@ -224,7 +219,107 @@ sub RC4
	 &stack_pop(3);
	&movb(	&BP(-4,$d,"",0),&LB($y));
	 &movb(	&BP(-8,$d,"",0),&LB($x));

	&function_end($name);
}
&function_end("RC4");

########################################################################

$inp="esi";
$out="edi";
$idi="ebp";
$ido="ecx";
$idx="edx";

&external_label("OPENSSL_ia32cap_P");

# void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data);
&function_begin("RC4_set_key");
	&mov	($out,&wparam(0));		# load key
	&mov	($idi,&wparam(1));		# load len
	&mov	($inp,&wparam(2));		# load data
	&picmeup($idx,"OPENSSL_ia32cap_P");

	&lea	($out,&DWP(2*4,$out));		# &key->data
	&lea	($inp,&DWP(0,$inp,$idi));	# $inp to point at the end
	&neg	($idi);
	&xor	("eax","eax");
	&mov	(&DWP(-4,$out),$idi);		# borrow key->y

	&bt	(&DWP(0,$idx),20);		# check for bit#20
	&jc	(&label("c1stloop"));

&set_label("w1stloop",16);
	&mov	(&DWP(0,$out,"eax",4),"eax");	# key->data[i]=i;
	&add	(&LB("eax"),1);			# i++;
	&jnc	(&label("w1stloop"));

	&xor	($ido,$ido);
	&xor	($idx,$idx);

&set_label("w2ndloop",16);
	&mov	("eax",&DWP(0,$out,$ido,4));
	&add	(&LB($idx),&BP(0,$inp,$idi));
	&add	(&LB($idx),&LB("eax"));
	&add	($idi,1);
	&mov	("ebx",&DWP(0,$out,$idx,4));
	&jnz	(&label("wnowrap"));
	  &mov	($idi,&DWP(-4,$out));
	&set_label("wnowrap");
	&mov	(&DWP(0,$out,$idx,4),"eax");
	&mov	(&DWP(0,$out,$ido,4),"ebx");
	&add	(&LB($ido),1);
	&jnc	(&label("w2ndloop"));
&jmp	(&label("exit"));

&set_label("c1stloop",16);
	&mov	(&BP(0,$out,"eax"),&LB("eax"));	# key->data[i]=i;
	&add	(&LB("eax"),1);			# i++;
	&jnc	(&label("c1stloop"));

	&xor	($ido,$ido);
	&xor	($idx,$idx);
	&xor	("ebx","ebx");

&set_label("c2ndloop",16);
	&mov	(&LB("eax"),&BP(0,$out,$ido));
	&add	(&LB($idx),&BP(0,$inp,$idi));
	&add	(&LB($idx),&LB("eax"));
	&add	($idi,1);
	&mov	(&LB("ebx"),&BP(0,$out,$idx));
	&jnz	(&label("cnowrap"));
	  &mov	($idi,&DWP(-4,$out));
	&set_label("cnowrap");
	&mov	(&BP(0,$out,$idx),&LB("eax"));
	&mov	(&BP(0,$out,$ido),&LB("ebx"));
	&add	(&LB($ido),1);
	&jnc	(&label("c2ndloop"));

	&mov	(&DWP(256,$out),-1);		# mark schedule as compressed

&set_label("exit");
	&xor	("eax","eax");
	&mov	(&DWP(-8,$out),"eax");		# key->x=0;
	&mov	(&DWP(-4,$out),"eax");		# key->y=0;
&function_end("RC4_set_key");

# const char *RC4_options(void);
&function_begin_B("RC4_options");
	&call	(&label("pic_point"));
&set_label("pic_point");
	&blindpop("eax");
	&lea	("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
	&picmeup("edx","OPENSSL_ia32cap_P");
	&bt	(&DWP(0,"edx"),20);
	&jnc	(&label("skip"));
	  &add	("eax",12);
	&set_label("skip");
	&ret	();
&set_label("opts",64);
&asciz	("rc4(8x,int)");
&asciz	("rc4(1x,char)");
&asciz	("RC4 for x86, OpenSSL project");	# RC4_version
&align	(64);
&function_end_B("RC4_options");

&asm_finish();