Commit 31439046 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

bn/asm/ppc.pl to use ppc-xlate.pl.

parent 11d0ebc8
Loading
Loading
Loading
Loading
+64 −160
Original line number Diff line number Diff line
@@ -151,91 +151,15 @@ if ($opf =~ /32\.s/) {
	$TR=	"td";		# conditional trap
} else { die "nonsense $opf"; }

( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";

# function entry points from the AIX code
#
# There are other, more elegant, ways to handle this. We (IBM) chose
# this approach as it plays well with scripts we run to 'namespace'
# OpenSSL .i.e. we add a prefix to all the public symbols so we can
# co-exist in the same process with other implementations of OpenSSL.
# 'cleverer' ways of doing these substitutions tend to hide data we
# need to be obvious.
#
my @items = ("bn_sqr_comba4",
	     "bn_sqr_comba8",
	     "bn_mul_comba4",
	     "bn_mul_comba8",
	     "bn_sub_words",
	     "bn_add_words",
	     "bn_div_words",
	     "bn_sqr_words",
	     "bn_mul_words",
	     "bn_mul_add_words");

if    ($opf =~ /linux/)	{  do_linux();	}
elsif ($opf =~ /aix/)	{  do_aix();	}
elsif ($opf =~ /osx/)	{  do_osx();	}
else			{  do_bsd();	}

sub do_linux {
    $d=&data();

    if ($BITS==64) {
      foreach $t (@items) {
        $d =~ s/\.$t:/\
\t.section\t".opd","aw"\
\t.align\t3\
\t.globl\t$t\
$t:\
\t.quad\t.$t,.TOC.\@tocbase,0\
\t.size\t$t,24\
\t.previous\n\
\t.type\t.$t,\@function\
\t.globl\t.$t\
.$t:/g;
      }
    }
    else {
      foreach $t (@items) {
        $d=~s/\.$t/$t/g;
      }
    }
    # hide internal labels to avoid pollution of name table...
    $d=~s/Lppcasm_/.Lppcasm_/gm;
    print $d;
}

sub do_aix {
    # AIX assembler is smart enough to please the linker without
    # making us do something special...
    print &data();
}

# MacOSX 32 bit
sub do_osx {
    $d=&data();
    # Change the bn symbol prefix from '.' to '_'
    foreach $t (@items) {
      $d=~s/\.$t/_$t/g;
    }
    # Change .machine to something OS X asm will accept
    $d=~s/\.machine.*/.text/g;
    $d=~s/\#/;/g; # change comment from '#' to ';'
    print $d;
}

# BSD (Untested)
sub do_bsd {
    $d=&data();
    foreach $t (@items) {
      $d=~s/\.$t/_$t/g;
    }
    print $d;
}

sub data {
	local($data)=<<EOF;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";

( defined shift || open STDOUT,"| $^X $xlate $opf" ) ||
	die "can't call $xlate: $!";

$data=<<EOF;
#--------------------------------------------------------------------
#
#
@@ -297,33 +221,20 @@ sub data {
#
#	Defines to be used in the assembly code.
#	
.set r0,0	# we use it as storage for value of 0
.set SP,1	# preserved
.set RTOC,2	# preserved 
.set r3,3	# 1st argument/return value
.set r4,4	# 2nd argument/volatile register
.set r5,5	# 3rd argument/volatile register
.set r6,6	# ...
.set r7,7
.set r8,8
.set r9,9
.set r10,10
.set r11,11
.set r12,12
.set r13,13	# not used, nor any other "below" it...

.set BO_IF_NOT,4
.set BO_IF,12
.set BO_dCTR_NZERO,16
.set BO_dCTR_ZERO,18
.set BO_ALWAYS,20
.set CR0_LT,0;
.set CR0_GT,1;
.set CR0_EQ,2
.set CR1_FX,4;
.set CR1_FEX,5;
.set CR1_VX,6
.set LR,8
#.set r0,0	# we use it as storage for value of 0
#.set SP,1	# preserved
#.set RTOC,2	# preserved 
#.set r3,3	# 1st argument/return value
#.set r4,4	# 2nd argument/volatile register
#.set r5,5	# 3rd argument/volatile register
#.set r6,6	# ...
#.set r7,7
#.set r8,8
#.set r9,9
#.set r10,10
#.set r11,11
#.set r12,12
#.set r13,13	# not used, nor any other "below" it...

#	Declare function names to be global
#	NOTE:	For gcc these names MUST be changed to remove
@@ -478,7 +389,7 @@ sub data {

	$ST		r9,`6*$BNSZ`(r3)	#r[6]=c1
	$ST		r10,`7*$BNSZ`(r3)	#r[7]=c2
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000

#
@@ -903,7 +814,7 @@ sub data {
	$ST		r9, `15*$BNSZ`(r3)	#r[15]=c1;


	bclr	BO_ALWAYS,CR0_LT
	blr

	.long	0x00000000

@@ -1055,7 +966,7 @@ sub data {

	$ST	r10,`6*$BNSZ`(r3)	#r[6]=c1
	$ST	r11,`7*$BNSZ`(r3)	#r[7]=c2
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000

#
@@ -1591,7 +1502,7 @@ sub data {
	adde	r10,r10,r9
	$ST	r12,`14*$BNSZ`(r3)	#r[14]=c3;
	$ST	r10,`15*$BNSZ`(r3)	#r[15]=c1;
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000

#
@@ -1623,7 +1534,7 @@ sub data {
	subfc.	r7,r0,r6        # If r6 is 0 then result is 0.
				# if r6 > 0 then result !=0
				# In either case carry bit is set.
	bc	BO_IF,CR0_EQ,Lppcasm_sub_adios
	beq	Lppcasm_sub_adios
	addi	r4,r4,-$BNSZ
	addi	r3,r3,-$BNSZ
	addi	r5,r5,-$BNSZ
@@ -1635,11 +1546,11 @@ Lppcasm_sub_mainloop:
				# if carry = 1 this is r7-r8. Else it
				# is r7-r8 -1 as we need.
	$STU	r6,$BNSZ(r3)
	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
	bdnz-	Lppcasm_sub_mainloop
Lppcasm_sub_adios:	
	subfze	r3,r0		# if carry bit is set then r3 = 0 else -1
	andi.	r3,r3,1         # keep only last bit.
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000


@@ -1670,7 +1581,7 @@ Lppcasm_sub_adios:
#	check for r6 = 0. Is this needed?
#
	addic.	r6,r6,0		#test r6 and clear carry bit.
	bc	BO_IF,CR0_EQ,Lppcasm_add_adios
	beq	Lppcasm_add_adios
	addi	r4,r4,-$BNSZ
	addi	r3,r3,-$BNSZ
	addi	r5,r5,-$BNSZ
@@ -1680,10 +1591,10 @@ Lppcasm_add_mainloop:
	$LDU	r8,$BNSZ(r5)
	adde	r8,r7,r8
	$STU	r8,$BNSZ(r3)
	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
	bdnz-	Lppcasm_add_mainloop
Lppcasm_add_adios:	
	addze	r3,r0			#return carry bit.
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000

#
@@ -1707,24 +1618,24 @@ Lppcasm_add_adios:
#	r5 = d
	
	$UCMPI	0,r5,0			# compare r5 and 0
	bc	BO_IF_NOT,CR0_EQ,Lppcasm_div1	# proceed if d!=0
	bne	Lppcasm_div1		# proceed if d!=0
	li	r3,-1			# d=0 return -1
	bclr	BO_ALWAYS,CR0_LT	
	blr
Lppcasm_div1:
	xor	r0,r0,r0		#r0=0
	li	r8,$BITS
	$CNTLZ.	r7,r5			#r7 = num leading 0s in d.
	bc	BO_IF,CR0_EQ,Lppcasm_div2	#proceed if no leading zeros
	beq	Lppcasm_div2		#proceed if no leading zeros
	subf	r8,r7,r8		#r8 = BN_num_bits_word(d)
	$SHR.	r9,r3,r8		#are there any bits above r8'th?
	$TR	16,r9,r0		#if there're, signal to dump core...
Lppcasm_div2:
	$UCMP	0,r3,r5			#h>=d?
	bc	BO_IF,CR0_LT,Lppcasm_div3	#goto Lppcasm_div3 if not
	blt	Lppcasm_div3		#goto Lppcasm_div3 if not
	subf	r3,r5,r3		#h-=d ; 
Lppcasm_div3:				#r7 = BN_BITS2-i. so r7=i
	cmpi	0,0,r7,0		# is (i == 0)?
	bc	BO_IF,CR0_EQ,Lppcasm_div4
	beq	Lppcasm_div4
	$SHL	r3,r3,r7		# h = (h<< i)
	$SHR	r8,r4,r8		# r8 = (l >> BN_BITS2 -i)
	$SHL	r5,r5,r7		# d<<=i
@@ -1741,7 +1652,7 @@ Lppcasm_divouterloop:
	$SHRI	r11,r4,`$BITS/2`	#r11= (l&BN_MASK2h)>>BN_BITS4
					# compute here for innerloop.
	$UCMP	0,r8,r9			# is (h>>BN_BITS4)==dh
	bc	BO_IF_NOT,CR0_EQ,Lppcasm_div5	# goto Lppcasm_div5 if not
	bne	Lppcasm_div5		# goto Lppcasm_div5 if not

	li	r8,-1
	$CLRU	r8,r8,`$BITS/2`		#q = BN_MASK2l 
@@ -1762,9 +1673,9 @@ Lppcasm_divinnerloop:
					# the following 2 instructions do that
	$SHLI	r7,r10,`$BITS/2`	# r7 = (t<<BN_BITS4)
	or	r7,r7,r11		# r7|=((l&BN_MASK2h)>>BN_BITS4)
	$UCMP	1,r6,r7			# compare (tl <= r7)
	bc	BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
	bc	BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
	$UCMP	cr1,r6,r7		# compare (tl <= r7)
	bne	Lppcasm_divinnerexit
	ble	cr1,Lppcasm_divinnerexit
	addi	r8,r8,-1		#q--
	subf	r12,r9,r12		#th -=dh
	$CLRU	r10,r5,`$BITS/2`	#r10=dl. t is no longer needed in loop.
@@ -1773,14 +1684,14 @@ Lppcasm_divinnerloop:
Lppcasm_divinnerexit:
	$SHRI	r10,r6,`$BITS/2`	#t=(tl>>BN_BITS4)
	$SHLI	r11,r6,`$BITS/2`	#tl=(tl<<BN_BITS4)&BN_MASK2h;
	$UCMP	1,r4,r11		# compare l and tl
	$UCMP	cr1,r4,r11		# compare l and tl
	add	r12,r12,r10		# th+=t
	bc	BO_IF_NOT,CR1_FX,Lppcasm_div7  # if (l>=tl) goto Lppcasm_div7
	bge	cr1,Lppcasm_div7	# if (l>=tl) goto Lppcasm_div7
	addi	r12,r12,1		# th++
Lppcasm_div7:
	subf	r11,r11,r4		#r11=l-tl
	$UCMP	1,r3,r12		#compare h and th
	bc	BO_IF_NOT,CR1_FX,Lppcasm_div8	#if (h>=th) goto Lppcasm_div8
	$UCMP	cr1,r3,r12		#compare h and th
	bge	cr1,Lppcasm_div8	#if (h>=th) goto Lppcasm_div8
	addi	r8,r8,-1		# q--
	add	r3,r5,r3		# h+=d
Lppcasm_div8:
@@ -1791,12 +1702,12 @@ Lppcasm_div8:
					# the following 2 instructions will do this.
	$INSR	r11,r12,`$BITS/2`,`$BITS/2`	# r11 is the value we want rotated $BITS/2.
	$ROTL	r3,r11,`$BITS/2`	# rotate by $BITS/2 and store in r3
	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ;
	bdz	Lppcasm_div9		#if (count==0) break ;
	$SHLI	r0,r8,`$BITS/2`		#ret =q<<BN_BITS4
	b	Lppcasm_divouterloop
Lppcasm_div9:
	or	r3,r8,r0
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000

#
@@ -1822,7 +1733,7 @@ Lppcasm_div9:
#	No unrolling done here. Not performance critical.

	addic.	r5,r5,0			#test r5.
	bc	BO_IF,CR0_EQ,Lppcasm_sqr_adios
	beq	Lppcasm_sqr_adios
	addi	r4,r4,-$BNSZ
	addi	r3,r3,-$BNSZ
	mtctr	r5
@@ -1833,9 +1744,9 @@ Lppcasm_sqr_mainloop:
	$UMULH  r8,r6,r6
	$STU	r7,$BNSZ(r3)
	$STU	r8,$BNSZ(r3)
	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
	bdnz-	Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:	
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000


@@ -1858,7 +1769,7 @@ Lppcasm_sqr_adios:
	xor	r0,r0,r0
	xor	r12,r12,r12		# used for carry
	rlwinm.	r7,r5,30,2,31		# num >> 2
	bc	BO_IF,CR0_EQ,Lppcasm_mw_REM
	beq	Lppcasm_mw_REM
	mtctr	r7
Lppcasm_mw_LOOP:	
					#mul(rp[0],ap[0],w,c1);
@@ -1896,11 +1807,11 @@ Lppcasm_mw_LOOP:
	
	addi	r3,r3,`4*$BNSZ`
	addi	r4,r4,`4*$BNSZ`
	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
	bdnz-	Lppcasm_mw_LOOP

Lppcasm_mw_REM:
	andi.	r5,r5,0x3
	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER
	beq	Lppcasm_mw_OVER
					#mul(rp[0],ap[0],w,c1);
	$LD	r8,`0*$BNSZ`(r4)
	$UMULL	r9,r6,r8
@@ -1912,7 +1823,7 @@ Lppcasm_mw_REM:
	
	addi	r5,r5,-1
	cmpli	0,0,r5,0
	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER
	beq	Lppcasm_mw_OVER

	
					#mul(rp[1],ap[1],w,c1);
@@ -1926,7 +1837,7 @@ Lppcasm_mw_REM:
	
	addi	r5,r5,-1
	cmpli	0,0,r5,0
	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER
	beq	Lppcasm_mw_OVER
	
					#mul_add(rp[2],ap[2],w,c1);
	$LD	r8,`2*$BNSZ`(r4)
@@ -1939,7 +1850,7 @@ Lppcasm_mw_REM:
		
Lppcasm_mw_OVER:	
	addi	r3,r12,0
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000

#
@@ -1964,7 +1875,7 @@ Lppcasm_mw_OVER:
	xor	r0,r0,r0		#r0 = 0
	xor	r12,r12,r12  		#r12 = 0 . used for carry		
	rlwinm.	r7,r5,30,2,31		# num >> 2
	bc	BO_IF,CR0_EQ,Lppcasm_maw_leftover	# if (num < 4) go LPPCASM_maw_leftover
	beq	Lppcasm_maw_leftover	# if (num < 4) go LPPCASM_maw_leftover
	mtctr	r7
Lppcasm_maw_mainloop:	
					#mul_add(rp[0],ap[0],w,c1);
@@ -2017,11 +1928,11 @@ Lppcasm_maw_mainloop:
	$ST	r11,`3*$BNSZ`(r3)
	addi	r3,r3,`4*$BNSZ`
	addi	r4,r4,`4*$BNSZ`
	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
	bdnz-	Lppcasm_maw_mainloop
	
Lppcasm_maw_leftover:
	andi.	r5,r5,0x3
	bc	BO_IF,CR0_EQ,Lppcasm_maw_adios
	beq	Lppcasm_maw_adios
	addi	r3,r3,-$BNSZ
	addi	r4,r4,-$BNSZ
					#mul_add(rp[0],ap[0],w,c1);
@@ -2036,7 +1947,7 @@ Lppcasm_maw_leftover:
	addze	r12,r10
	$ST	r9,0(r3)
	
	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
	bdz	Lppcasm_maw_adios
					#mul_add(rp[1],ap[1],w,c1);
	$LDU	r8,$BNSZ(r4)	
	$UMULL	r9,r6,r8
@@ -2048,7 +1959,7 @@ Lppcasm_maw_leftover:
	addze	r12,r10
	$ST	r9,0(r3)
	
	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
	bdz	Lppcasm_maw_adios
					#mul_add(rp[2],ap[2],w,c1);
	$LDU	r8,$BNSZ(r4)
	$UMULL	r9,r6,r8
@@ -2062,17 +1973,10 @@ Lppcasm_maw_leftover:
		
Lppcasm_maw_adios:	
	addi	r3,r12,0
	bclr	BO_ALWAYS,CR0_LT
	blr
	.long	0x00000000
	.align	4
EOF
$data =~ s/\`([^\`]*)\`/eval $1/gem;

	# if some assembler chokes on some simplified mnemonic,
	# this is the spot to fix it up, e.g.:
	# GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
	$data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
	# assembler X doesn't accept li, load immediate value
	#$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
	return($data);
}
print $data;
close STDOUT;