Commit 496f2b14 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

C64x+ assembply pack: add RC4 module.

parent bd227733
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -414,7 +414,7 @@ my %table=(
"linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
#
# TI_CGT_C6000_7.3.x is a requirement
"linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
"linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o::rc4-c64xplus.o:::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",

# Android: linux-* but without -DTERMIO and pointers to headers and libs.
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+2 −2
Original line number Diff line number Diff line
@@ -1652,7 +1652,7 @@ $multilib =

*** debug-VC-WIN32
$cc           = cl
$cflags       = -W3 -WX -Gs0 -GF -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE
$cflags       = -W3 -Gs0 -GF -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE
$unistd       = 
$thread_cflag = 
$sys_id       = WIN32
@@ -4174,7 +4174,7 @@ $bf_obj =
$md5_obj      = 
$sha1_obj     = sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o
$cast_obj     = 
$rc4_obj      = 
$rc4_obj      = rc4-c64xplus.o
$rmd160_obj   = 
$rc5_obj      = 
$wp_obj       = 
+183 −0
Original line number Diff line number Diff line
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# RC4 for C64x+.
#
# April 2014
#
# RC4 subroutine processes one byte in 7.0 cycles, which is 3x faster
# than TI CGT-generated code. Loop is scheduled in such way that
# there is only one reference to memory in each cycle. This is done
# to avoid L1D memory banking conflicts, see SPRU871 TI publication
# for further details. Otherwise it should be possible to schedule
# the loop for iteration interval of 6...

($KEY,$LEN,$INP,$OUT)=("A4","B4","A6","B6");

($KEYA,$XX,$TY,$xx,$ONE,$ret)=map("A$_",(5,7,8,9,1,2));
($KEYB,$YY,$TX,$tx,$SUM,$dat)=map("B$_",(5,7,8,9,1,2));

$code.=<<___;
	.text

	.if	.ASSEMBLER_VERSION<7000000
	.asg	0,__TI_EABI__
	.endif
	.if	__TI_EABI__
	.nocmp
	.asg	RC4,_RC4
	.asg	RC4_set_key,_RC4_set_key
	.asg	RC4_options,_RC4_options
	.endif

	.global	_RC4
	.align	16
_RC4:
	.asmfunc
	MV	$LEN,B0
  [!B0]	BNOP	B3			; if (len==0) return;
||[B0]	ADD	$KEY,2,$KEYA
||[B0]	ADD	$KEY,2,$KEYB
  [B0]	MVK	1,$ONE
||[B0]	LDBU	*${KEYA}[-2],$XX	; key->x
  [B0]	LDBU	*${KEYB}[-1],$YY	; key->y
||	NOP	4

	ADD4	$ONE,$XX,$XX
	LDBU	*${KEYA}[$XX],$TX
||	MVC	$LEN,ILC
	NOP	4
;;==================================================
	SPLOOP	7
||	ADD4	$TX,$YY,$YY

	LDBU	*${KEYB}[$YY],$TY
||	MVD	$XX,$xx
||	ADD4	$ONE,$XX,$XX
	LDBU	*${KEYA}[$XX],$tx
	CMPEQ	$YY,$XX,B0
||	NOP	3
	STB	$TX,*${KEYB}[$YY]
||[B0]	ADD4	$TX,$YY,$YY
	STB	$TY,*${KEYA}[$xx]
||[!B0]	ADD4	$tx,$YY,$YY
||[!B0]	MVD	$tx,$TX
	ADD4	$TY,$TX,$SUM		; [0,0] $TX is not replaced by $tx yet!
||	NOP	2
	LDBU	*$INP++,$dat
||	NOP	2
	LDBU	*${KEYB}[$SUM],$ret
||	NOP	5
	XOR.L	$dat,$ret,$ret
	SPKERNEL
||	STB	$ret,*$OUT++
;;==================================================
	SUB4	$XX,$ONE,$XX
||	NOP	5
	STB	$XX,*${KEYA}[-2]	; key->x
||	SUB4	$YY,$TX,$YY
||	BNOP	B3	
	STB	$YY,*${KEYB}[-1]	; key->y
||	NOP	5
	.endasmfunc

	.global	_RC4_set_key
	.align	16
_RC4_set_key:
	.asmfunc
	.if	.BIG_ENDIAN
	MVK	0x00000404,$ONE
||	MVK	0x00000203,B0
	MVKH	0x04040000,$ONE
||	MVKH	0x00010000,B0
	.else
	MVK	0x00000404,$ONE
||	MVK	0x00000100,B0
	MVKH	0x04040000,$ONE
||	MVKH	0x03020000,B0
	.endif
	ADD	$KEY,2,$KEYA
||	ADD	$KEY,2,$KEYB
||	ADD	$INP,$LEN,$ret		; end of input
	LDBU	*${INP}++,$dat
||	MVK	0,$TX
	STH	$TX,*${KEY}++		; key->x=key->y=0
||	MV	B0,A0
||	MVK	64-4,B0

;;==================================================
	SPLOOPD	1
||	MVC	B0,ILC

	STNW	A0,*${KEY}++
||	ADD4	$ONE,A0,A0
	SPKERNEL
;;==================================================

	MVK	0,$YY
||	MVK	0,$XX
	MVK	1,$ONE
||	MVK	256-1,B0

;;==================================================
	SPLOOPD	8
||	MVC	B0,ILC

	ADD4	$dat,$YY,$YY
||	CMPEQ	$INP,$ret,A0		; end of input?
	LDBU	*${KEYB}[$YY],$TY
||	MVD	$XX,$xx
||	ADD4	$ONE,$XX,$XX
	LDBU	*${KEYA}[$XX],$tx
||[A0]	SUB	$INP,$LEN,$INP		; rewind
	LDBU	*${INP}++,$dat
||	CMPEQ	$YY,$XX,B0
||	NOP	3
	STB	$TX,*${KEYB}[$YY]
||[B0]	ADD4	$TX,$YY,$YY
	STB	$TY,*${KEYA}[$xx]
||[!B0]	ADD4	$tx,$YY,$YY
||[!B0]	MV	$tx,$TX
	SPKERNEL
;;==================================================

	BNOP	B3,5
	.endasmfunc

	.global	_RC4_options
	.align	16
_RC4_options:
_rc4_options:
	.asmfunc
	BNOP	B3,1
	ADDKPC	_rc4_options,B4
	.if	__TI_EABI__
	MVKL	\$PCR_OFFSET(rc4_options,_rc4_options),A4
	MVKH	\$PCR_OFFSET(rc4_options,_rc4_options),A4
	.else
	MVKL	(rc4_options-_rc4_options),A4
	MVKH	(rc4_options-_rc4_options),A4
	.endif
	ADD	B4,A4,A4
	.endasmfunc

	.if	__TI_EABI__
	.sect	".text:rc4_options.const"
	.else
	.sect	".const:rc4_options"
	.endif
	.align	4
rc4_options:
	.cstring "rc4(sploop,char)"
	.cstring "RC4 for C64+, CRYPTOGAMS by <appro\@openssl.org>"
	.align	4
___

print $code;
close STDOUT;