gcm128.c and assembler modules: change argument order for gcm_ghash_4bit. (4f39edbf) · Commits · CYBER - Cyber Security / TS 103 523 MSP / TLMSP / TLMSP OpenSSL

crypto/modes/asm/ghash-alpha.pl

+3 −9

Original line number	Diff line number	Diff line
		@@ -31,10 +31,10 @@ $Thi1="t5";
		$Tlo1="t6";
		$rem="t7"; # $8
		#################
		$Xi="a0"; # $16
		$Xi="a0"; # $16, input argument block
		$Htbl="a1";


		$inp="a2";
		$len="a3";
		$nlo="a4"; # $20
		$nhi="a5";
		$Zhi="t8";
		@@ -314,12 +314,6 @@ $code.=<<___;
		.end gcm_gmult_4bit
		___

		# argument block for gcm_ghash_4bit
		$inp="a0"; # $16
		$len="a1";
		$Xi ="a2";
		$Htbl="a3";

		$inhi="s0";
		$inlo="s1";

crypto/modes/asm/ghash-ia64.pl

+4 −4

Original line number	Diff line number	Diff line
		@@ -142,13 +142,13 @@ gcm_ghash_4bit:
		.prologue
		{ .mmi; .save ar.pfs,prevfs
		alloc prevfs=ar.pfs,4,4,0,8
		$ADDP inp=15,in0 // &inp[15]
		$ADDP inp=15,in2 // &inp[15]
		mov rem_4bitp=ip }
		{ .mmi; $ADDP end=in1,in0 // &inp[len]
		$ADDP Xi=15,in2 // &Xi[15]
		{ .mmi; $ADDP end=in3,in2 // &inp[len]
		$ADDP Xi=15,in0 // &Xi[15]
		.save ar.lc,prevlc
		mov prevlc=ar.lc };;
		{ .mmi; $ADDP Htbl=8,in3 // &Htbl[0].lo
		{ .mmi; $ADDP Htbl=8,in1 // &Htbl[0].lo
		mov mask0xf0=0xf0
		.save pr,prevpr
		mov prevpr=pr }

crypto/modes/asm/ghash-sparcv9.pl

+4 −6

Original line number	Diff line number	Diff line
		@@ -54,10 +54,10 @@ $remi="%l5";
		$Htblo="%l6";
		$cnt="%l7";

		$inp="%i0"; # input arguments for gcm_ghash_4bit
		$len="%i1";
		$Xi="%i2";
		$Htbl="%i3";
		$Xi="%i0"; # input argument block
		$Htbl="%i1";
		$inp="%i2";
		$len="%i3";

		$code.=<<___;
		.section ".text",#alloc,#execinstr
		@@ -208,8 +208,6 @@ gcm_ghash_4bit:
		.size gcm_ghash_4bit,(.-gcm_ghash_4bit)
		___

		$Xi="%i0"; # input arguments for gcm_gmult_4bit
		$Htbl="%i1";
		undef $inp;
		undef $len;

crypto/modes/asm/ghash-x86.pl

+17 −17

Original line number	Diff line number	Diff line
		@@ -23,7 +23,7 @@
		# PIII 63 /77 16 24
		# P4 96 /122 30 84(***)
		# Opteron 50 /71 21 30
		# Core2 63 /102 19 28
		# Core2 54 /68 13 18
		#
		# (*) gcc 3.4.x was observed to generate few percent slower code,
		# which is one of reasons why 2.95.3 results were chosen,
		@@ -317,12 +317,12 @@ if ($unroll) {

		&lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));

		&mov ($inp,&wparam(0)); # load in
		&mov ($Zlh,&wparam(1)); # load len
		&mov ($Zhh,&wparam(2)); # load Xi
		&mov ($Htbl,&wparam(3)); # load Htable
		&mov ($Zhh,&wparam(0)); # load Xi
		&mov ($Htbl,&wparam(1)); # load Htable
		&mov ($inp,&wparam(2)); # load in
		&mov ($Zlh,&wparam(3)); # load len
		&add ($Zlh,$inp);
		&mov (&wparam(1),$Zlh); # len to point at the end of input
		&mov (&wparam(3),$Zlh); # len to point at the end of input
		&stack_push(4+1); # +1 for stack alignment
		&mov ($Zll,&DWP(12,$Zhh)); # load Xi[16]
		&mov ($Zhl,&DWP(4,$Zhh));
		@@ -344,10 +344,10 @@ if ($unroll) {
		&mmx_loop("esp","eax");

		&lea ($inp,&DWP(16,$inp));
		&cmp ($inp,&wparam(1));
		&cmp ($inp,&wparam(3));
		&jb (&label("mmx_outer_loop"));

		&mov ($inp,&wparam(2)); # load Xi
		&mov ($inp,&wparam(0)); # load Xi
		&emms ();
		&mov (&DWP(12,$inp),$Zll);
		&mov (&DWP(4,$inp),$Zhl);
		@@ -359,12 +359,12 @@ if ($unroll) {
		&set_label("x86",16);
		}
		&stack_push(16+4+1); # +1 for 64-bit alignment
		&mov ($inp,&wparam(0)); # load in
		&mov ("ecx",&wparam(1)); # load len
		&mov ($Zll,&wparam(2)); # load Xi
		&mov ($Htbl,&wparam(3)); # load Htable
		&mov ($Zll,&wparam(0)); # load Xi
		&mov ($Htbl,&wparam(1)); # load Htable
		&mov ($inp,&wparam(2)); # load in
		&mov ("ecx",&wparam(3)); # load len
		&add ("ecx",$inp);
		&mov (&wparam(1),"ecx");
		&mov (&wparam(3),"ecx");

		&mov ($Zhh,&DWP(0,$Zll)); # load Xi[16]
		&mov ($Zhl,&DWP(4,$Zll));
		@@ -390,14 +390,14 @@ if ($unroll) {
		&call ("_x86_gmult_4bit_inner");
		} else {
		&x86_loop(0);
		&mov ($inp,&wparam(0));
		&mov ($inp,&wparam(2));
		}
		&lea ($inp,&DWP(16,$inp));
		&cmp ($inp,&wparam(1));
		&mov (&wparam(0),$inp) if (!$unroll);
		&cmp ($inp,&wparam(3));
		&mov (&wparam(2),$inp) if (!$unroll);
		&jb (&label("x86_outer_loop"));

		&mov ($inp,&wparam(2)); # load Xi
		&mov ($inp,&wparam(0)); # load Xi
		&mov (&DWP(12,$inp),$Zll);
		&mov (&DWP(8,$inp),$Zlh);
		&mov (&DWP(4,$inp),$Zhl);

crypto/modes/asm/ghash-x86_64.pl

+4 −6

Original line number	Diff line number	Diff line
		@@ -18,7 +18,7 @@
		# gcc 3.4.x assembler
		#
		# Opteron 18.5 10.2 +80%
		# Core2 26.0 16.4 +58%
		# Core2 17.5 11.0 +59%

		$flavour = shift;
		$output = shift;
		@@ -41,10 +41,10 @@ $Zhi="%r9";
		$tmp="%r10";
		$rem_4bit = "%r11";

		# per-function register layout
		$Xi="%rdi";
		$Htbl="%rsi";

		# per-function register layout
		$cnt="%rcx";
		$rem="%rdx";

		@@ -159,10 +159,8 @@ ___


		# per-function register layout
		$inp="%rdi";
		$len="%rsi";
		$Xi="%rdx";
		$Htbl="%rcx";
		$inp="%rdx";
		$len="%rcx";

		$cnt="%rbp";
		$rem="%r12";