Commit 5fabb88a authored by Andy Polyakov's avatar Andy Polyakov
Browse files

Multiple assembler packs: add experimental memory bus instrumentation.

parent 764ef439
Loading
Loading
Loading
Loading
+90 −0
Original line number Diff line number Diff line
@@ -126,3 +126,93 @@ OPENSSL_cleanse:
.Ldone: ret	($26)
.end	OPENSSL_cleanse
___
{
my ($out,$cnt,$max)=("\$16","\$17","\$18");
my ($tick,$lasttick)=("\$19","\$20");
my ($diff,$lastdiff)=("\$21","\$22");
my ($v0,$ra,$sp,$zero)=("\$0","\$26","\$30","\$31");

print <<___;
.globl	OPENSSL_instrument_bus
.ent	OPENSSL_instrument_bus
OPENSSL_instrument_bus:
	.frame	$sp,0,$ra
	.prologue 0
	mov	$cnt,$v0

	rpcc	$lasttick
	mov	0,$diff

	ecb	($out)
	ldl_l	$tick,0($out)
	addl	$diff,$tick,$tick
	mov	$tick,$diff
	stl_c	$tick,0($out)
	stl	$diff,0($out)

.Loop:	rpcc	$tick
	subq	$tick,$lasttick,$diff
	mov	$tick,$lasttick

	ecb	($out)
	ldl_l	$tick,0($out)
	addl	$diff,$tick,$tick
	mov	$tick,$diff
	stl_c	$tick,0($out)
	stl	$diff,0($out)

	subl	$cnt,1,$cnt
	lda	$out,4($out)
	bne	$cnt,.Loop

	ret	($ra)
.end	OPENSSL_instrument_bus

.globl	OPENSSL_instrument_bus2
.ent	OPENSSL_instrument_bus2
OPENSSL_instrument_bus2:
	.frame	$sp,0,$ra
	.prologue 0
	mov	$cnt,$v0

	rpcc	$lasttick
	mov	0,$diff

	ecb	($out)
	ldl_l	$tick,0($out)
	addl	$diff,$tick,$tick
	mov	$tick,$diff
	stl_c	$tick,0($out)
	stl	$diff,0($out)

	rpcc	$tick
	subq	$tick,$lasttick,$diff
	mov	$tick,$lasttick
	mov	$diff,$lastdiff
.Loop2:
	ecb	($out)
	ldl_l	$tick,0($out)
	addl	$diff,$tick,$tick
	mov	$tick,$diff
	stl_c	$tick,0($out)
	stl	$diff,0($out)

	subl	$max,1,$max
	beq	$max,.Ldone2

	rpcc	$tick
	subq	$tick,$lasttick,$diff
	mov	$tick,$lasttick
	subq	$lastdiff,$diff,$tick
	mov	$diff,$lastdiff
	cmovne	$tick,1,$tick
	subl	$cnt,$tick,$cnt
	s4addq	$tick,$out,$out
	bne	$cnt,.Loop2

.Ldone2:
	subl	$v0,$cnt,$v0
	ret	($ra)
.end	OPENSSL_instrument_bus2
___
}
+87 −1
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ OPENSSL_atomic_add:
{ .mii;	mov		ar.ccv=r2
	add		r8=r2,r33
	mov		r3=r2		};;
{ .mmi;	mf
{ .mmi;	mf;;
	cmpxchg4.acq	r2=[r32],r8,ar.ccv
	nop.i		0		};;
{ .mib;	cmp.ne		p6,p0=r2,r3
@@ -165,3 +165,89 @@ OPENSSL_cleanse:
(p7)	br.cond.dpnt	.Little
(p6)	br.ret.sptk.many	b0	};;
.endp	OPENSSL_cleanse#

.global	OPENSSL_instrument_bus#
.proc	OPENSSL_instrument_bus#
OPENSSL_instrument_cache:
{ .mmi;	mov		r2=r33
#if defined(_HPUX_SOURCE) && !defined(_LP64)
	addp4		r32=0,r32
#endif
					}
{ .mmi;	mov		r8=ar.itc;;
	mov		r10=r0
	mov		r9=r8		};;

{ .mmi;	fc		r32;;
	ld4		r8=[r32]	};;
{ .mmi;	mf
	mov		ar.ccv=r8
	add		r8=r8,r10	};;
{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
					};;
.Loop:
{ .mmi;	mov		r8=ar.itc;;
	sub		r10=r8,r9		// diff=tick-lasttick
	mov		r9=r8		};;	// lasttick=tick
{ .mmi;	fc		r32;;
	ld4		r8=[r32]	};;
{ .mmi;	mf
	mov		ar.ccv=r8
	add		r8=r8,r10	};;
{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
	add		r33=-1,r33
	add		r32=4,r32	};;
{ .mib;	cmp4.ne		p6,p0=0,r33
(p6)	br.cond.dptk	.Loop		};;

{ .mib;	sub		r8=r2,r33
	br.ret.sptk.many	b0	};;
.endp	OPENSSL_instrument_bus#

.global	OPENSSL_instrument_bus2#
.proc	OPENSSL_instrument_bus2#
OPENSSL_instrument_cache2:
{ .mmi;	mov		r2=r33			// put aside cnt
#if defined(_HPUX_SOURCE) && !defined(_LP64)
	addp4		r32=0,r32
#endif
					}
{ .mmi;	mov		r8=ar.itc;;
	mov		r10=r0
	mov		r9=r8		};;

{ .mmi;	fc		r32;;
	ld4		r8=[r32]	};;
{ .mmi;	mf
	mov		ar.ccv=r8
	add		r8=r8,r10	};;
{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
					};;

{ .mmi;	mov		r8=ar.itc;;
	sub		r10=r8,r9
	mov		r9=r8		};;
.Loop2:
{ .mmi;	mov		r11=r10			// lastdiff=diff
	add		r34=-1,r34	};;	// --max
{ .mmi;	fc		r32;;
	ld4		r8=[r32]
	cmp4.eq		p6,p0=0,r34	};;
{ .mmi;	mf
	mov		ar.ccv=r8
	add		r8=r8,r10	};;
{ .mmb;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
(p6)	br.cond.spnt	.Ldone2		};;

{ .mmi;	mov		r8=ar.itc;;
	sub		r10=r8,r9		// diff=tick-lasttick
	mov		r9=r8		};;	// lasttick=tick
{ .mmi;	cmp.ne		p6,p0=r10,r11;;		// diff!=lastdiff
(p6)	add		r33=-1,r33	};;	// conditional --cnt
{ .mib;	cmp4.ne		p7,p0=0,r33
(p6)	add		r32=4,r32		// conditional ++out
(p7)	br.cond.dptk	.Loop2		};;
.Ldone2:
{ .mib;	sub		r8=r2,r33
	br.ret.sptk.many	b0	};;
.endp	OPENSSL_instrument_bus2#
+90 −4
Original line number Diff line number Diff line
@@ -87,8 +87,8 @@ OPENSSL_wipe_cpu
	.PROCEND
___
{
$inp="%r26";
$len="%r25";
my $inp="%r26";
my $len="%r25";

$code.=<<___;
	.EXPORT	OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR
@@ -112,9 +112,9 @@ Lalign

Laligned
	andcm		$len,%r1,%r28
Loop
Lot
	$ST		%r0,0($inp)
	addib,*<>	-$SIZE_T,%r28,Loop
	addib,*<>	-$SIZE_T,%r28,Lot
	ldo		$SIZE_T($inp),$inp

	and,*<>		$len,%r1,$len
@@ -130,7 +130,93 @@ Ldone
	.PROCEND
___
}
{
my ($out,$cnt,$max)=("%r26","%r25","%r24");
my ($tick,$lasttick)=("%r23","%r22");
my ($diff,$lastdiff)=("%r21","%r20");

$code.=<<___;
	.EXPORT	OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR
	.ALIGN	8
OPENSSL_instrument_bus
	.PROC
	.CALLINFO	NO_CALLS
	.ENTRY
	copy		$cnt,$rv
	mfctl		%cr16,$tick
	copy		$tick,$lasttick
	ldi		0,$diff

	fdc		0($out)
	ldw		0($out),$tick
	add		$diff,$tick,$tick
	stw		$tick,0($out)
Loop
	mfctl		%cr16,$tick
	sub		$tick,$lasttick,$diff
	copy		$tick,$lasttick

	fdc		0($out)
	ldw		0($out),$tick
	add		$diff,$tick,$tick
	stw		$tick,0($out)

	addib,<>	-1,$cnt,Loop
	addi		4,$out,$out

	bv		($rp)
	.EXIT
	sub		$rv,$cnt,$rv
	.PROCEND

	.EXPORT	OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR
	.ALIGN	8
OPENSSL_instrument_bus2
	.PROC
	.CALLINFO	NO_CALLS
	.ENTRY
	copy		$cnt,$rv
	sub		%r0,$cnt,$cnt

	mfctl		%cr16,$tick
	copy		$tick,$lasttick
	ldi		0,$diff

	fdc		0($out)
	ldw		0($out),$tick
	add		$diff,$tick,$tick
	stw		$tick,0($out)

	mfctl		%cr16,$tick
	sub		$tick,$lasttick,$diff
	copy		$tick,$lasttick
Loop2
	copy		$diff,$lastdiff
	fdc		0($out)
	ldw		0($out),$tick
	add		$diff,$tick,$tick
	stw		$tick,0($out)

	addib,=		-1,$max,Ldone2
	nop

	mfctl		%cr16,$tick
	sub		$tick,$lasttick,$diff
	copy		$tick,$lasttick
	cmpclr,<>	$lastdiff,$diff,$tick
	ldi		1,$tick

	ldi		1,%r1
	xor		%r1,$tick,$tick
	addb,<>		$tick,$cnt,Loop2
	shladd,l	$tick,2,$out,$out
Ldone2
	bv		($rp)
	.EXIT
	add		$rv,$cnt,$rv
	.PROCEND
___
}
$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
$code =~ s/,\*/,/gm if ($SIZE_T==4);
print $code;
+85 −2
Original line number Diff line number Diff line
@@ -69,10 +69,10 @@ $code=<<___;
.globl	.OPENSSL_atomic_add
.align	4
.OPENSSL_atomic_add:
Loop:	lwarx	r5,0,r3
Ladd:	lwarx	r5,0,r3
	add	r0,r4,r5
	stwcx.	r0,0,r3
	bne-	Loop
	bne-	Ladd
	$SIGNX	r3,r0
	blr

@@ -112,6 +112,89 @@ Laligned:
	bne	Little
	blr
___
{
my ($out,$cnt,$max)=("r3","r4","r5");
my ($tick,$lasttick)=("r6","r7");
my ($diff,$lastdiff)=("r8","r9");

$code.=<<___;
.globl	.OPENSSL_instrument_bus
.align	4
.OPENSSL_instrument_bus:
	mtctr	$cnt

	mftb	$lasttick		# collect 1st tick
	li	$diff,0

	dcbf	0,$out			# flush cache line
	lwarx	$tick,0,$out		# load and lock
	add	$tick,$tick,$diff
	stwcx.	$tick,0,$out
	stwx	$tick,0,$out

Loop:	mftb	$tick
	sub	$diff,$tick,$lasttick
	mr	$lasttick,$tick
	dcbf	0,$out			# flush cache line
	lwarx	$tick,0,$out		# load and lock
	add	$tick,$tick,$diff
	stwcx.	$tick,0,$out
	stwx	$tick,0,$out
	addi	$out,$out,4		# ++$out
	bdnz	Loop

	mr	r3,$cnt
	blr

.globl	.OPENSSL_instrument_bus2
.align	4
.OPENSSL_instrument_bus2:
	mr	r0,$cnt
	slwi	$cnt,$cnt,2

	mftb	$lasttick		# collect 1st tick
	li	$diff,0

	dcbf	0,$out			# flush cache line
	lwarx	$tick,0,$out		# load and lock
	add	$tick,$tick,$diff
	stwcx.	$tick,0,$out
	stwx	$tick,0,$out

	mftb	$tick			# collect 1st diff
	sub	$diff,$tick,$lasttick
	mr	$lasttick,$tick
	mr	$lastdiff,$diff
Loop2:
	dcbf	0,$out			# flush cache line
	lwarx	$tick,0,$out		# load and lock
	add	$tick,$tick,$diff
	stwcx.	$tick,0,$out
	stwx	$tick,0,$out

	addic.	$max,$max,-1
	beq	Ldone2

	mftb	$tick
	sub	$diff,$tick,$lasttick
	mr	$lasttick,$tick
	cmplw	7,$diff,$lastdiff
	mr	$lastdiff,$diff

	mfcr	$tick			# pull cr
	not	$tick,$tick		# flip bits
	rlwinm	$tick,$tick,1,29,29	# isolate flipped eq bit and scale

	sub.	$cnt,$cnt,$tick		# conditional --$cnt
	add	$out,$out,$tick		# conditional ++$out
	bne	Loop2

Ldone2:
	srwi	$cnt,$cnt,2
	sub	r3,r0,$cnt
	blr
___
}

$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
+16 −0
Original line number Diff line number Diff line
@@ -93,6 +93,22 @@ OPENSSL_cleanse:
	br	%r14
.size	OPENSSL_cleanse,.-OPENSSL_cleanse

.globl	OPENSSL_instrument_bus
.type	OPENSSL_instrument_bus,@function
.align	16
OPENSSL_instrument_bus:
	lghi	%r2,0
	br	%r14
.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus

.globl	OPENSSL_instrument_bus2
.type	OPENSSL_instrument_bus2,@function
.align	16
OPENSSL_instrument_bus2:
	lghi	%r2,0
	br	%r14
.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2

.section	.init
	brasl	%r14,OPENSSL_cpuid_setup

Loading