Commit 5dcf70a1 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

ARM assembly pack: get ARMv7 instruction endianness right.

Pointer out and suggested by: Ard Biesheuvel.
parent cd91fd7c
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -715,8 +715,8 @@ _armv4_AES_set_encrypt_key:
.Ldone:	mov	r0,#0
	ldmia   sp!,{r4-r12,lr}
.Labrt:
#if defined(__thumb2__) && __ARM_ARCH__>=7
	.short	0x4770			@ bx lr in Thumb2 encoding
#if __ARM_ARCH__>=5
	ret				@ bx lr
#else
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
@@ -1203,6 +1203,7 @@ _armv4_AES_decrypt:
___

$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx\tlr/gm;

open SELF,$0;
while(<SELF>) {
+45 −25
Original line number Diff line number Diff line
@@ -7,42 +7,46 @@
.global	_armv7_neon_probe
.type	_armv7_neon_probe,%function
_armv7_neon_probe:
	.word	0xf26ee1fe	@ vorr	q15,q15,q15
	.word	0xe12fff1e	@ bx	lr
	.byte	0xf0,0x01,0x60,0xf2	@ vorr	q8,q8,q8
	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
.size	_armv7_neon_probe,.-_armv7_neon_probe

.global	_armv7_tick
.type	_armv7_tick,%function
_armv7_tick:
	mrrc	p15,1,r0,r1,c14		@ CNTVCT
#if __ARM_ARCH__>=5
	bx	lr
#else
	.word	0xe12fff1e		@ bx	lr
#endif
.size	_armv7_tick,.-_armv7_tick

.global	_armv8_aes_probe
.type	_armv8_aes_probe,%function
_armv8_aes_probe:
	.word	0xf3b00300	@ aese.8	q0,q0
	.word	0xe12fff1e	@ bx	lr
	.byte	0x00,0x03,0xb0,0xf3	@ aese.8	q0,q0
	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
.size	_armv8_aes_probe,.-_armv8_aes_probe

.global	_armv8_sha1_probe
.type	_armv8_sha1_probe,%function
_armv8_sha1_probe:
	.word	0xf2000c40	@ sha1c.32	q0,q0,q0
	.word	0xe12fff1e	@ bx	lr
	.byte	0x40,0x0c,0x00,0xf2	@ sha1c.32	q0,q0,q0
	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
.size	_armv8_sha1_probe,.-_armv8_sha1_probe

.global	_armv8_sha256_probe
.type	_armv8_sha256_probe,%function
_armv8_sha256_probe:
	.word	0xf3000c40	@ sha256h.32	q0,q0,q0
	.word	0xe12fff1e	@ bx	lr
	.byte	0x40,0x0c,0x00,0xf3	@ sha256h.32	q0,q0,q0
	.byte	0x1e,0xff,0x2f,0xe1	@ bx lr
.size	_armv8_sha256_probe,.-_armv8_sha256_probe
.global	_armv8_pmull_probe
.type	_armv8_pmull_probe,%function
_armv8_pmull_probe:
	.word	0xf2a00e00	@ vmull.p64	q0,d0,d0
	.word	0xe12fff1e	@ bx	lr
	.byte	0x00,0x0e,0xa0,0xf2	@ vmull.p64	q0,d0,d0
	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
.size	_armv8_pmull_probe,.-_armv8_pmull_probe

.align	5
@@ -56,7 +60,7 @@ OPENSSL_atomic_add:
	cmp	r2,#0
	bne	.Ladd
	mov	r0,r3
	.word	0xe12fff1e	@ bx	lr
	bx	lr
#else
	stmdb	sp!,{r4-r6,lr}
	ldr	r2,.Lspinlock
@@ -109,9 +113,13 @@ OPENSSL_cleanse:
	adds	r1,r1,#4
	bne	.Little
.Lcleanse_done:
#if __ARM_ARCH__>=5
	bx	lr
#else
	tst	lr,#1
	moveq	pc,lr
	.word	0xe12fff1e	@ bx	lr
#endif
.size	OPENSSL_cleanse,.-OPENSSL_cleanse

.global	OPENSSL_wipe_cpu
@@ -125,41 +133,53 @@ OPENSSL_wipe_cpu:
	eor	ip,ip,ip
	tst	r0,#1
	beq	.Lwipe_done
	.word	0xf3000150	@ veor    q0, q0, q0
	.word	0xf3022152	@ veor    q1, q1, q1
	.word	0xf3044154	@ veor    q2, q2, q2
	.word	0xf3066156	@ veor    q3, q3, q3
	.word	0xf34001f0	@ veor    q8, q8, q8
	.word	0xf34221f2	@ veor    q9, q9, q9
	.word	0xf34441f4	@ veor    q10, q10, q10
	.word	0xf34661f6	@ veor    q11, q11, q11
	.word	0xf34881f8	@ veor    q12, q12, q12
	.word	0xf34aa1fa	@ veor    q13, q13, q13
	.word	0xf34cc1fc	@ veor    q14, q14, q14
	.word	0xf34ee1fe	@ veor    q15, q15, q15
	.byte	0x50,0x01,0x00,0xf3	@ veor	q0, q0, q0
	.byte	0x52,0x21,0x02,0xf3	@ veor	q1, q1, q1
	.byte	0x54,0x41,0x04,0xf3	@ veor	q2, q2, q2
	.byte	0x56,0x61,0x06,0xf3	@ veor	q3, q3, q3
	.byte	0xf0,0x01,0x40,0xf3	@ veor	q8, q8, q8
	.byte	0xf2,0x21,0x42,0xf3	@ veor	q9, q9, q9
	.byte	0xf4,0x41,0x44,0xf3	@ veor	q10, q10, q10
	.byte	0xf6,0x61,0x46,0xf3	@ veor	q11, q11, q11
	.byte	0xf8,0x81,0x48,0xf3	@ veor	q12, q12, q12
	.byte	0xfa,0xa1,0x4a,0xf3	@ veor	q13, q13, q13
	.byte	0xfc,0xc1,0x4c,0xf3	@ veor	q14, q14, q14
	.byte	0xfe,0xe1,0x4e,0xf3	@ veor	q14, q14, q14
.Lwipe_done:
	mov	r0,sp
#if __ARM_ARCH__>=5
	bx	lr
#else
	tst	lr,#1
	moveq	pc,lr
	.word	0xe12fff1e	@ bx	lr
#endif
.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu

.global	OPENSSL_instrument_bus
.type	OPENSSL_instrument_bus,%function
OPENSSL_instrument_bus:
	eor	r0,r0,r0
#if __ARM_ARCH__>=5
	bx	lr
#else
	tst	lr,#1
	moveq	pc,lr
	.word	0xe12fff1e	@ bx	lr
#endif
.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus

.global	OPENSSL_instrument_bus2
.type	OPENSSL_instrument_bus2,%function
OPENSSL_instrument_bus2:
	eor	r0,r0,r0
#if __ARM_ARCH__>=5
	bx	lr
#else
	tst	lr,#1
	moveq	pc,lr
	.word	0xe12fff1e	@ bx	lr
#endif
.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2

.align	5
+2 −1
Original line number Diff line number Diff line
@@ -202,7 +202,7 @@ bn_GF2m_mul_2x2:
	veor		$r, $r, $t2

	vst1.32		{$r}, [r0]
	bx	lr
	ret		@ bx lr
.align	4
.Lialu:
#endif
@@ -273,6 +273,7 @@ foreach (split("\n",$code)) {
	s/\`([^\`]*)\`/eval $1/geo;

	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
	s/\bret\b/bx	lr/go		or
	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;    # make it possible to compile with -march=armv4

	print $_,"\n";
+8 −2
Original line number Diff line number Diff line
@@ -230,9 +230,14 @@ bn_mul_mont:
	ldmia	sp!,{r4-r12,lr}		@ restore registers
	add	sp,sp,#2*4		@ skip over {r0,r2}
	mov	r0,#1
.Labrt:	tst	lr,#1
.Labrt:
#if __ARM_ARCH__>=5
	ret				@ bx lr
#else
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
	bx	lr			@ interoperable with Thumb ISA:-)
#endif
.size	bn_mul_mont,.-bn_mul_mont
___
{
@@ -650,7 +655,7 @@ bn_mul8x_mont_neon:
	sub	sp,ip,#96
        vldmia  sp!,{d8-d15}
        ldmia   sp!,{r4-r11}
	bx	lr
	ret						@ bx lr
.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
___
@@ -665,5 +670,6 @@ ___

$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx	lr/gm;
print $code;
close STDOUT;
+3 −2
Original line number Diff line number Diff line
@@ -386,7 +386,7 @@ gcm_init_neon:
	veor		$IN,$IN,$t0		@ twisted H
	vstmia		r0,{$IN}

	bx	lr
	ret					@ bx lr
.size	gcm_init_neon,.-gcm_init_neon

.global	gcm_gmult_neon
@@ -470,7 +470,7 @@ $code.=<<___;
	vst1.64		$Xl#hi,[$Xi,:64]!	@ write out Xi
	vst1.64		$Xl#lo,[$Xi,:64]

	bx	lr
	ret					@ bx lr
.size	gcm_ghash_neon,.-gcm_ghash_neon
#endif
___
@@ -484,6 +484,7 @@ foreach (split("\n",$code)) {
	s/\`([^\`]*)\`/eval $1/geo;

	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
	s/\bret\b/bx	lr/go		or
	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;    # make it possible to compile with -march=armv4

	print $_,"\n";
Loading