Commit de504945 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

Two extra instructions in RC4 character loop give 80% performance

improvement on Core2. I still need to detect Core2 and choose this
path...
parent 3d1def01
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -221,6 +221,8 @@ $code.=<<___;
	movb	$TY#b,($dat,$XX[0])
	add	$TX[0]#b,$TY#b
	add	\$1,$XX[0]#b
	movzb	$TY#b,$TY#d
	movzb	$XX[0]#b,$XX[0]#d
	movzb	($dat,$TY),$TY#d
	movzb	($dat,$XX[0]),$TX[0]#d
	xorb	($inp),$TY#b