Commit 904732f6 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

C64x+ assembly pack: improve EABI support.

parent cf5ecc3e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -410,7 +410,7 @@ my %table=(
"linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
#
# TI_CGT_C6000_7.3.x is a requirement
"linux-c64xplus","cl6x:--linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
"linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",

# Android: linux-* but without -DTERMIO and pointers to headers and libs.
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+1 −1
Original line number Diff line number Diff line
@@ -3995,7 +3995,7 @@ $multilib =

*** linux-c64xplus
$cc           = cl6x
$cflags       = --linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT
$cflags       = --linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT
$unistd       = 
$thread_cflag = -D_REENTRANT
$sys_id       = 
+10 −0
Original line number Diff line number Diff line
@@ -46,6 +46,11 @@ $code=<<___;
	.text
	.if	__TI_EABI__
	.nocmp
	.asg	AES_encrypt,_AES_encrypt
	.asg	AES_decrypt,_AES_decrypt
	.asg	AES_set_encrypt_key,_AES_set_encrypt_key
	.asg	AES_set_decrypt_key,_AES_set_decrypt_key
	.asg	AES_ctr32_encrypt,_AES_ctr32_encrypt
	.endif

	.asg	B3,RA
@@ -1021,7 +1026,11 @@ ___
}
# Tables are kept in endian-neutral manner
$code.=<<___;
	.if	__TI_EABI__
	.sect	".text:aes_asm.const"
	.else
	.sect	".const:aes_asm"
	.endif
	.align	128
AES_Te:
	.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84
@@ -1359,3 +1368,4 @@ AES_Td4:
___

print $code;
close STDOUT;
+44 −7
Original line number Diff line number Diff line
@@ -12,6 +12,18 @@
;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
;;====================================================================
	.text
	.if	__TI_EABI__
	.asg	bn_mul_add_words,_bn_mul_add_words
	.asg	bn_mul_words,_bn_mul_words
	.asg	bn_sqr_words,_bn_sqr_words
	.asg	bn_add_words,_bn_add_words
	.asg	bn_sub_words,_bn_sub_words
	.asg	bn_div_words,_bn_div_words
	.asg	bn_sqr_comba8,_bn_sqr_comba8
	.asg	bn_mul_comba8,_bn_mul_comba8
	.asg	bn_sqr_comba4,_bn_sqr_comba4
	.asg	bn_mul_comba4,_bn_mul_comba4
	.endif

	.asg	B3,RA
	.asg	A4,ARG0
@@ -158,14 +170,39 @@ _bn_sub_words:
	.endasmfunc

	.global	_bn_div_words
	.global	__divull
_bn_div_words:
	.asmfunc
	CALLP	__divull,A3	; jump to rts64plus.lib
||	MV	ARG0,A5
||	MV	ARG1,ARG0
||	MV	ARG2,ARG1
||	ZERO	B5
	LMBD	1,A6,A0		; leading zero bits in dv
	LMBD	1,A4,A1		; leading zero bits in hi
||	MVK	32,B0
	CMPLTU	A1,A0,A2
||	ADD	A0,B0,B0
  [ A2]	BNOP	RA
||[ A2]	MVK	-1,A4		; return overflow
||[!A2]	MV	A4,A3		; reassign hi
  [!A2]	MV	B4,A4		; reassign lo, will be quotient
||[!A2]	MVC	B0,ILC
  [!A2]	SHL	A6,A0,A6	; normalize dv
||	MVK	1,A1

  [!A2]	CMPLTU	A3,A6,A1	; hi<dv?
||[!A2]	SHL	A4,1,A5:A4	; lo<<1
  [!A1]	SUB	A3,A6,A3	; hi-=dv
||[!A1]	OR	1,A4,A4
  [!A2]	SHRU	A3,31,A1	; upper bit
||[!A2]	ADDAH	A5,A3,A3	; hi<<1|lo>>31

	SPLOOP	3
  [!A1]	CMPLTU	A3,A6,A1	; hi<dv?
||[ A1]	ZERO	A1
||	SHL	A4,1,A5:A4	; lo<<1
  [!A1]	SUB	A3,A6,A3	; hi-=dv
||[!A1]	OR	1,A4,A4		; quotient
	SHRU	A3,31,A1	; upper bit
||	ADDAH	A5,A3,A3	; hi<<1|lo>>31
	SPKERNEL

	BNOP	RA,5
	.endasmfunc

;;====================================================================
@@ -256,7 +293,7 @@ _bn_mul_comba4:
||	LDW	*A5++,B6	; ap[0]
||	MV	A0,A3		; const A3=M
	.else
	;; This alternative is exercise in fully unrolled Comba
	;; This alternative is an exercise in fully unrolled Comba
	;; algorithm implementation that operates at n*(n+1)+12, or
	;; as little as 32 cycles...
	LDW	*ARG1[0],B16	; a[0]
+3 −0
Original line number Diff line number Diff line
@@ -107,6 +107,9 @@ ___
}
$code.=<<___;
	.text
	.if	__TI_EABI__
	.asg	bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
	.endif

	.global	_bn_GF2m_mul_2x2
_bn_GF2m_mul_2x2:
Loading