Commit b1a5d1c6 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

sha/asm/sha512-armv4.pl: adapt for use in Linux kernel context.



Follow-up to sha256-armv4.pl in cooperation with Ard Biesheuvel
(Linaro) and Sami Tolvanen (Google).

Reviewed-by: default avatarRich Salz <rsalz@openssl.org>
parent 51f8d095
Loading
Loading
Loading
Loading
+56 −9
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPL terms is granted.
# ====================================================================

# SHA512 block procedure for ARMv4. September 2007.
@@ -136,6 +138,9 @@ $code.=<<___;
	teq	$t0,#$magic

	ldr	$t3,[sp,#$Coff+0]	@ c.lo
#if __ARM_ARCH__>=7
	it	eq			@ Thumb2 thing, sanity check in ARM
#endif
	orreq	$Ktbl,$Ktbl,#1
	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
@@ -173,7 +178,17 @@ $code.=<<___;
___
}
$code=<<___;
#ifndef __KERNEL__
# include "arm_arch.h"
# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
# define VFP_ABI_PUSH
# define VFP_ABI_POP
#endif

#ifdef __ARMEL__
# define LO 0
# define HI 4
@@ -185,7 +200,18 @@ $code=<<___;
#endif

.text
#if __ARM_ARCH__<7
.code	32
#else
.syntax unified
# ifdef __thumb2__
#  define adrl adr
.thumb
# else
.code   32
# endif
#endif

.type	K512,%object
.align	5
K512:
@@ -230,7 +256,7 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size	K512,.-K512
#if __ARM_MAX_ARCH__>=7
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap:
.word	OPENSSL_armcap_P-sha512_block_data_order
.skip	32-4
@@ -241,14 +267,18 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.global	sha512_block_data_order
.type	sha512_block_data_order,%function
sha512_block_data_order:
#if __ARM_ARCH__<7
	sub	r3,pc,#8		@ sha512_block_data_order
	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
#if __ARM_MAX_ARCH__>=7
#else
	adr	r3,sha512_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
	ldr	r12,.LOPENSSL_armcap
	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
	tst	r12,#1
	bne	.LNEON
#endif
	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
	stmdb	sp!,{r4-r12,lr}
	sub	$Ktbl,r3,#672		@ K512
	sub	sp,sp,#9*8
@@ -362,6 +392,9 @@ $code.=<<___;
___
	&BODY_00_15(0x17);
$code.=<<___;
#if __ARM_ARCH__>=7
	ittt	eq			@ Thumb2 thing, sanity check in ARM
#endif
	ldreq	$t0,[sp,#`$Xoff+8*(16-1)`+0]
	ldreq	$t1,[sp,#`$Xoff+8*(16-1)`+4]
	beq	.L16_79
@@ -446,6 +479,7 @@ $code.=<<___;
	moveq	pc,lr			@ be binary compatible with V4, yet
	bx	lr			@ interoperable with Thumb ISA:-)
#endif
.size	sha512_block_data_order,.-sha512_block_data_order
___

{
@@ -552,11 +586,15 @@ $code.=<<___;
.arch	armv7-a
.fpu	neon

.global	sha512_block_data_order_neon
.type	sha512_block_data_order_neon,%function
.align	4
sha512_block_data_order_neon:
.LNEON:
	dmb				@ errata #451034 on early Cortex A8
	vstmdb	sp!,{d8-d15}		@ ABI specification says so
	sub	$Ktbl,r3,#672		@ K512
	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
	VFP_ABI_PUSH
	adrl	$Ktbl,K512
	vldmia	$ctx,{$A-$H}		@ load context
.Loop_neon:
___
@@ -581,16 +619,16 @@ $code.=<<___;
	sub		$Ktbl,#640	@ rewind K512
	bne		.Loop_neon

	vldmia	sp!,{d8-d15}		@ epilogue
	VFP_ABI_POP
	ret				@ bx lr
.size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
#endif
___
}
$code.=<<___;
.size	sha512_block_data_order,.-sha512_block_data_order
.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align	2
#if __ARM_MAX_ARCH__>=7
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm	OPENSSL_armcap_P,4,4
#endif
___
@@ -598,5 +636,14 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx	lr/gm;

open SELF,$0;
while(<SELF>) {
	next if (/^#!/);
	last if (!s/^#/@/ and !/^$/);
	print;
}
close SELF;

print $code;
close STDOUT; # enforce flush