Loading Configure +1 −1 Original line number Diff line number Diff line Loading @@ -410,7 +410,7 @@ my %table=( "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", # # TI_CGT_C6000_7.3.x is a requirement "linux-c64xplus","cl6x:--linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true", "linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true", # Android: linux-* but without -DTERMIO and pointers to headers and libs. "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", Loading TABLE +1 −1 Original line number Diff line number Diff line Loading @@ -3995,7 +3995,7 @@ $multilib = *** linux-c64xplus $cc = cl6x $cflags = --linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT $cflags = --linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT $unistd = $thread_cflag = -D_REENTRANT $sys_id = Loading crypto/aes/asm/aes-c64xplus.pl +10 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,11 @@ $code=<<___; .text .if __TI_EABI__ .nocmp .asg AES_encrypt,_AES_encrypt .asg AES_decrypt,_AES_decrypt .asg AES_set_encrypt_key,_AES_set_encrypt_key .asg AES_set_decrypt_key,_AES_set_decrypt_key .asg AES_ctr32_encrypt,_AES_ctr32_encrypt .endif .asg B3,RA Loading Loading @@ -1021,7 +1026,11 @@ ___ } # Tables are kept in endian-neutral manner $code.=<<___; .if __TI_EABI__ .sect ".text:aes_asm.const" .else .sect ".const:aes_asm" .endif .align 128 AES_Te: .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 Loading Loading @@ -1359,3 +1368,4 @@ AES_Td4: ___ print $code; close STDOUT; crypto/bn/asm/bn-c64xplus.asm +44 −7 Original line number Diff line number Diff line Loading @@ -12,6 +12,18 @@ ;; SPLOOPs spin at ... 2*n cycles [plus epilogue]. ;;==================================================================== .text .if __TI_EABI__ .asg bn_mul_add_words,_bn_mul_add_words .asg bn_mul_words,_bn_mul_words .asg bn_sqr_words,_bn_sqr_words .asg bn_add_words,_bn_add_words .asg bn_sub_words,_bn_sub_words .asg bn_div_words,_bn_div_words .asg bn_sqr_comba8,_bn_sqr_comba8 .asg bn_mul_comba8,_bn_mul_comba8 .asg bn_sqr_comba4,_bn_sqr_comba4 .asg bn_mul_comba4,_bn_mul_comba4 .endif .asg B3,RA .asg A4,ARG0 Loading Loading @@ -158,14 +170,39 @@ _bn_sub_words: .endasmfunc .global _bn_div_words .global __divull _bn_div_words: .asmfunc CALLP __divull,A3 ; jump to rts64plus.lib || MV ARG0,A5 || MV ARG1,ARG0 || MV ARG2,ARG1 || ZERO B5 LMBD 1,A6,A0 ; leading zero bits in dv LMBD 1,A4,A1 ; leading zero bits in hi || MVK 32,B0 CMPLTU A1,A0,A2 || ADD A0,B0,B0 [ A2] BNOP RA ||[ A2] MVK -1,A4 ; return overflow ||[!A2] MV A4,A3 ; reassign hi [!A2] MV B4,A4 ; reassign lo, will be quotient ||[!A2] MVC B0,ILC [!A2] SHL A6,A0,A6 ; normalize dv || MVK 1,A1 [!A2] CMPLTU A3,A6,A1 ; hi<dv? ||[!A2] SHL A4,1,A5:A4 ; lo<<1 [!A1] SUB A3,A6,A3 ; hi-=dv ||[!A1] OR 1,A4,A4 [!A2] SHRU A3,31,A1 ; upper bit ||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31 SPLOOP 3 [!A1] CMPLTU A3,A6,A1 ; hi<dv? ||[ A1] ZERO A1 || SHL A4,1,A5:A4 ; lo<<1 [!A1] SUB A3,A6,A3 ; hi-=dv ||[!A1] OR 1,A4,A4 ; quotient SHRU A3,31,A1 ; upper bit || ADDAH A5,A3,A3 ; hi<<1|lo>>31 SPKERNEL BNOP RA,5 .endasmfunc ;;==================================================================== Loading Loading @@ -256,7 +293,7 @@ _bn_mul_comba4: || LDW *A5++,B6 ; ap[0] || MV A0,A3 ; const A3=M .else ;; This alternative is exercise in fully unrolled Comba ;; This alternative is an exercise in fully unrolled Comba ;; algorithm implementation that operates at n*(n+1)+12, or ;; as little as 32 cycles... LDW *ARG1[0],B16 ; a[0] Loading crypto/bn/asm/c64xplus-gf2m.pl +3 −0 Original line number Diff line number Diff line Loading @@ -107,6 +107,9 @@ ___ } $code.=<<___; .text .if __TI_EABI__ .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2 .endif .global _bn_GF2m_mul_2x2 _bn_GF2m_mul_2x2: Loading Loading
Configure +1 −1 Original line number Diff line number Diff line Loading @@ -410,7 +410,7 @@ my %table=( "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", # # TI_CGT_C6000_7.3.x is a requirement "linux-c64xplus","cl6x:--linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true", "linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true", # Android: linux-* but without -DTERMIO and pointers to headers and libs. "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", Loading
TABLE +1 −1 Original line number Diff line number Diff line Loading @@ -3995,7 +3995,7 @@ $multilib = *** linux-c64xplus $cc = cl6x $cflags = --linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT $cflags = --linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT $unistd = $thread_cflag = -D_REENTRANT $sys_id = Loading
crypto/aes/asm/aes-c64xplus.pl +10 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,11 @@ $code=<<___; .text .if __TI_EABI__ .nocmp .asg AES_encrypt,_AES_encrypt .asg AES_decrypt,_AES_decrypt .asg AES_set_encrypt_key,_AES_set_encrypt_key .asg AES_set_decrypt_key,_AES_set_decrypt_key .asg AES_ctr32_encrypt,_AES_ctr32_encrypt .endif .asg B3,RA Loading Loading @@ -1021,7 +1026,11 @@ ___ } # Tables are kept in endian-neutral manner $code.=<<___; .if __TI_EABI__ .sect ".text:aes_asm.const" .else .sect ".const:aes_asm" .endif .align 128 AES_Te: .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 Loading Loading @@ -1359,3 +1368,4 @@ AES_Td4: ___ print $code; close STDOUT;
crypto/bn/asm/bn-c64xplus.asm +44 −7 Original line number Diff line number Diff line Loading @@ -12,6 +12,18 @@ ;; SPLOOPs spin at ... 2*n cycles [plus epilogue]. ;;==================================================================== .text .if __TI_EABI__ .asg bn_mul_add_words,_bn_mul_add_words .asg bn_mul_words,_bn_mul_words .asg bn_sqr_words,_bn_sqr_words .asg bn_add_words,_bn_add_words .asg bn_sub_words,_bn_sub_words .asg bn_div_words,_bn_div_words .asg bn_sqr_comba8,_bn_sqr_comba8 .asg bn_mul_comba8,_bn_mul_comba8 .asg bn_sqr_comba4,_bn_sqr_comba4 .asg bn_mul_comba4,_bn_mul_comba4 .endif .asg B3,RA .asg A4,ARG0 Loading Loading @@ -158,14 +170,39 @@ _bn_sub_words: .endasmfunc .global _bn_div_words .global __divull _bn_div_words: .asmfunc CALLP __divull,A3 ; jump to rts64plus.lib || MV ARG0,A5 || MV ARG1,ARG0 || MV ARG2,ARG1 || ZERO B5 LMBD 1,A6,A0 ; leading zero bits in dv LMBD 1,A4,A1 ; leading zero bits in hi || MVK 32,B0 CMPLTU A1,A0,A2 || ADD A0,B0,B0 [ A2] BNOP RA ||[ A2] MVK -1,A4 ; return overflow ||[!A2] MV A4,A3 ; reassign hi [!A2] MV B4,A4 ; reassign lo, will be quotient ||[!A2] MVC B0,ILC [!A2] SHL A6,A0,A6 ; normalize dv || MVK 1,A1 [!A2] CMPLTU A3,A6,A1 ; hi<dv? ||[!A2] SHL A4,1,A5:A4 ; lo<<1 [!A1] SUB A3,A6,A3 ; hi-=dv ||[!A1] OR 1,A4,A4 [!A2] SHRU A3,31,A1 ; upper bit ||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31 SPLOOP 3 [!A1] CMPLTU A3,A6,A1 ; hi<dv? ||[ A1] ZERO A1 || SHL A4,1,A5:A4 ; lo<<1 [!A1] SUB A3,A6,A3 ; hi-=dv ||[!A1] OR 1,A4,A4 ; quotient SHRU A3,31,A1 ; upper bit || ADDAH A5,A3,A3 ; hi<<1|lo>>31 SPKERNEL BNOP RA,5 .endasmfunc ;;==================================================================== Loading Loading @@ -256,7 +293,7 @@ _bn_mul_comba4: || LDW *A5++,B6 ; ap[0] || MV A0,A3 ; const A3=M .else ;; This alternative is exercise in fully unrolled Comba ;; This alternative is an exercise in fully unrolled Comba ;; algorithm implementation that operates at n*(n+1)+12, or ;; as little as 32 cycles... LDW *ARG1[0],B16 ; a[0] Loading
crypto/bn/asm/c64xplus-gf2m.pl +3 −0 Original line number Diff line number Diff line Loading @@ -107,6 +107,9 @@ ___ } $code.=<<___; .text .if __TI_EABI__ .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2 .endif .global _bn_GF2m_mul_2x2 _bn_GF2m_mul_2x2: Loading