Loading crypto/aes/asm/aes-armv4.pl +105 −28 Original line number Diff line number Diff line #!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. Loading Loading @@ -51,9 +51,18 @@ $key="r11"; $rounds="r12"; $code=<<___; #ifndef __KERNEL__ # include "arm_arch.h" #else # define __ARM_ARCH__ __LINUX_ARM_ARCH__ #endif .text #if __ARM_ARCH__<7 .code 32 #else .syntax unified #endif .type AES_Te,%object .align 5 Loading Loading @@ -167,7 +176,11 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: #if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt #else adr r3,AES_encrypt #endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 Loading Loading @@ -409,11 +422,21 @@ _armv4_AES_encrypt: .align 5 AES_set_encrypt_key: _armv4_AES_set_encrypt_key: #if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key #else adr r3,AES_set_encrypt_key #endif teq r0,#0 #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt teq r2,#0 #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt Loading @@ -422,6 +445,9 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 #if __ARM_ARCH__>=7 itt ne @ Thumb2 thing, sanity check in ARM #endif movne r0,#-1 bne .Labrt Loading Loading @@ -576,6 +602,9 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#216 beq .Ldone Loading Loading @@ -645,6 +674,9 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#256 beq .Ldone Loading Loading @@ -674,11 +706,17 @@ _armv4_AES_set_encrypt_key: str $i3,[$key,#-4] b .L256_loop .align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} .Labrt: tst lr,#1 .Labrt: #if defined(__thumb2__) && __ARM_ARCH__>=7 .short 0x4770 @ bx lr in Thumb2 encoding #else tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif .size AES_set_encrypt_key,.-AES_set_encrypt_key .global AES_set_decrypt_key Loading @@ -688,34 +726,57 @@ AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 ldrne lr,[sp],#4 @ pop lr ldr lr,[sp],#4 @ pop lr bne .Labrt stmdb sp!,{r4-r12} mov r0,r2 @ AES_set_encrypt_key preserves r2, mov r1,r2 @ which is AES_KEY *key b _armv4_AES_set_enc2dec_key .size AES_set_decrypt_key,.-AES_set_decrypt_key ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, mov $key,r2 @ which is AES_KEY *key mov $i1,r2 add $i2,r2,$rounds,lsl#4 @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) .global AES_set_enc2dec_key .type AES_set_enc2dec_key,%function .align 5 AES_set_enc2dec_key: _armv4_AES_set_enc2dec_key: stmdb sp!,{r4-r12,lr} ldr $rounds,[r0,#240] mov $i1,r0 @ input add $i2,r0,$rounds,lsl#4 mov $key,r1 @ ouput add $tbl,r1,$rounds,lsl#4 str $rounds,[r1,#240] .Linv: ldr $s0,[$i1],#16 ldr $s1,[$i1,#-12] ldr $s2,[$i1,#-8] ldr $s3,[$i1,#-4] ldr $t1,[$i2],#-16 ldr $t2,[$i2,#16+4] ldr $t3,[$i2,#16+8] ldr $i3,[$i2,#16+12] str $s0,[$tbl],#-16 str $s1,[$tbl,#16+4] str $s2,[$tbl,#16+8] str $s3,[$tbl,#16+12] str $t1,[$key],#16 str $t2,[$key,#-12] str $t3,[$key,#-8] str $i3,[$key,#-4] teq $i1,$i2 bne .Linv .Linv: ldr $s0,[$i1] ldr $s0,[$i1] ldr $s1,[$i1,#4] ldr $s2,[$i1,#8] ldr $s3,[$i1,#12] ldr $t1,[$i2] ldr $t2,[$i2,#4] ldr $t3,[$i2,#8] ldr $i3,[$i2,#12] str $s0,[$i2],#-16 str $s1,[$i2,#16+4] str $s2,[$i2,#16+8] str $s3,[$i2,#16+12] str $t1,[$i1],#16 str $t2,[$i1,#-12] str $t3,[$i1,#-8] str $i3,[$i1,#-4] teq $i1,$i2 bne .Linv str $s0,[$key] str $s1,[$key,#4] str $s2,[$key,#8] str $s3,[$key,#12] sub $key,$key,$rounds,lsl#3 ___ $mask80=$i1; $mask1b=$i2; Loading Loading @@ -773,7 +834,7 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif .size AES_set_decrypt_key,.-AES_set_decrypt_key .size AES_set_enc2dec_key,.-AES_set_enc2dec_key .type AES_Td,%object .align 5 Loading Loading @@ -883,7 +944,11 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: #if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt #else adr r3,AES_decrypt #endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 Loading Loading @@ -1080,8 +1145,9 @@ _armv4_AES_decrypt: ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] and $i3,lr,$s1,lsr#8 add $s1,$tbl,$s1,lsr#24 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] ldrb $s1,[$s1] @ Td4[s1>>24] ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] eor $s0,$i1,$s0,lsl#24 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] Loading @@ -1094,7 +1160,8 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] and $i3,lr,$s2,lsr#16 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] add $s2,$tbl,$s2,lsr#24 ldrb $s2,[$s2] @ Td4[s2>>24] eor $s0,$s0,$i1,lsl#8 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] eor $s1,$i2,$s1,lsl#16 Loading @@ -1106,8 +1173,9 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] and $i3,lr,$s3 @ i2 add $s3,$tbl,$s3,lsr#24 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] ldrb $s3,[$s3] @ Td4[s3>>24] eor $s0,$s0,$i1,lsl#16 ldr $i1,[$key,#0] eor $s1,$s1,$i2,lsl#8 Loading @@ -1130,5 +1198,14 @@ _armv4_AES_decrypt: ___ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 open SELF,$0; while(<SELF>) { next if (/^#!/); last if (!s/^#/@/ and !/^$/); print; } close SELF; print $code; close STDOUT; # enforce flush crypto/aes/asm/bsaes-armv7.pl +297 −55 Original line number Diff line number Diff line Loading @@ -5,6 +5,10 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # # Specific modes and adaptation for Linux kernel by Ard Biesheuvel # <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is # granted. # ==================================================================== # Bit-sliced AES for ARM NEON Loading Loading @@ -37,6 +41,12 @@ # # <appro@openssl.org> # April-August 2013 # # Add CBC, CTR and XTS subroutines, adapt for kernel use. # # <ard.biesheuvel@linaro.org> while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; Loading Loading @@ -620,17 +630,34 @@ ___ } $code.=<<___; #ifndef __KERNEL__ # include "arm_arch.h" # define VFP_ABI_PUSH vstmdb sp!,{d8-d15} # define VFP_ABI_POP vldmia sp!,{d8-d15} # define VFP_ABI_FRAME 0x40 #else # define VFP_ABI_PUSH # define VFP_ABI_POP # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK # define __ARM_ARCH__ __LINUX_ARM_ARCH__ #endif #ifdef __thumb__ # define adrl adr #endif #if __ARM_ARCH__>=7 .text .code 32 .syntax unified @ ARMv7-capable assembler is expected to handle this .fpu neon .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: sub $const,pc,#8 @ _bsaes_decrypt8 adr $const,_bsaes_decrypt8 vldmia $key!, {@XMM[9]} @ round 0 key add $const,$const,#.LM0ISR-_bsaes_decrypt8 Loading Loading @@ -677,6 +704,7 @@ ___ &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); $code.=<<___; vldmia $const, {@XMM[12]} @ .LISR ite eq @ Thumb2 thing, sanity check in ARM addeq $const,$const,#0x10 bne .Ldec_loop vldmia $const, {@XMM[12]} @ .LISRM0 Loading Loading @@ -717,8 +745,6 @@ _bsaes_const: .quad 0x02060a0e03070b0f, 0x0004080c0105090d .LREVM0SR: .quad 0x090d01050c000408, 0x03070b0f060a0e02 .Lxts_magic: .quad 1, 0x87 .asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" .align 6 .size _bsaes_const,.-_bsaes_const Loading @@ -726,7 +752,7 @@ _bsaes_const: .type _bsaes_encrypt8,%function .align 4 _bsaes_encrypt8: sub $const,pc,#8 @ _bsaes_encrypt8 adr $const,_bsaes_encrypt8 vldmia $key!, {@XMM[9]} @ round 0 key sub $const,$const,#_bsaes_encrypt8-.LM0SR Loading Loading @@ -775,6 +801,7 @@ ___ &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); $code.=<<___; vldmia $const, {@XMM[12]} @ .LSR ite eq @ Thumb2 thing, samity check in ARM addeq $const,$const,#0x10 bne .Lenc_loop vldmia $const, {@XMM[12]} @ .LSRM0 Loading Loading @@ -829,7 +856,7 @@ $code.=<<___; .type _bsaes_key_convert,%function .align 4 _bsaes_key_convert: sub $const,pc,#8 @ _bsaes_key_convert adr $const,_bsaes_key_convert vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key sub $const,$const,#_bsaes_key_convert-.LM0 vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key Loading Loading @@ -998,32 +1025,62 @@ $code.=<<___; .type bsaes_cbc_encrypt,%function .align 5 bsaes_cbc_encrypt: #ifndef __KERNEL__ cmp $len, #128 #ifndef __thumb__ blo AES_cbc_encrypt #else bhs 1f b AES_cbc_encrypt 1: #endif #endif @ it is up to the caller to make sure we are called with enc == 0 mov ip, sp stmdb sp!, {r4-r10, lr} vstmdb sp!, {d8-d15} @ ABI specification says so ldr $ivp, [sp, #0x60] @ IV is 1st arg on the stack VFP_ABI_PUSH ldr $ivp, [ip] @ IV is 1st arg on the stack mov $len, $len, lsr#4 @ len in 16 byte blocks sub sp, #0x10 @ scratch space to carry over the IV mov $fp, sp @ save sp @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key add sp, sp, #`128-32` @ size of bit-sliced key schedule #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key add r12, #`128-32` @ sifze of bit-slices key schedule @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds mov r12, $keysched @ pass key schedule mov sp, r12 @ sp is $keysched bl _bsaes_key_convert vldmia $keysched, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia $keysched, {@XMM[7]} #else ldr r12, [$key, #244] eors r12, #1 beq 0f @ populate the key schedule str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert add r4, $key, #248 vldmia r4, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia r4, {@XMM[7]} .align 2 0: #endif vld1.8 {@XMM[15]}, [$ivp] @ load IV b .Lcbc_dec_loop Loading @@ -1035,7 +1092,11 @@ bsaes_cbc_encrypt: vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! #ifndef BSAES_ASM_EXTENDED_KEY mov r4, $keysched @ pass the key #else add r4, $key, #248 #endif vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! mov r5, $rounds vld1.8 {@XMM[6]-@XMM[7]}, [$inp] Loading Loading @@ -1075,7 +1136,11 @@ bsaes_cbc_encrypt: cmp $len, #2 blo .Lcbc_dec_one vld1.8 {@XMM[1]}, [$inp]! #ifndef BSAES_ASM_EXTENDED_KEY mov r4, $keysched @ pass the key #else add r4, $key, #248 #endif mov r5, $rounds vstmia $fp, {@XMM[15]} @ put aside IV beq .Lcbc_dec_two Loading Loading @@ -1207,16 +1272,19 @@ bsaes_cbc_encrypt: vst1.8 {@XMM[0]}, [$rounds] @ write output .Lcbc_dec_done: #ifndef BSAES_ASM_EXTENDED_KEY vmov.i32 q0, #0 vmov.i32 q1, #0 .Lcbc_dec_bzero: @ wipe key schedule [if any] vstmia $keysched!, {q0-q1} teq $keysched, $fp cmp $keysched, $fp bne .Lcbc_dec_bzero #endif add sp, $fp, #0x10 mov sp, $fp add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb vst1.8 {@XMM[15]}, [$ivp] @ return IV vldmia sp!, {d8-d15} VFP_ABI_POP ldmia sp!, {r4-r10, pc} .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt ___ Loading @@ -1235,21 +1303,23 @@ bsaes_ctr32_encrypt_blocks: cmp $len, #8 @ use plain AES for blo .Lctr_enc_short @ small sizes mov ip, sp stmdb sp!, {r4-r10, lr} vstmdb sp!, {d8-d15} @ ABI specification says so ldr $ctr, [sp, #0x60] @ ctr is 1st arg on the stack VFP_ABI_PUSH ldr $ctr, [ip] @ ctr is 1st arg on the stack sub sp, sp, #0x10 @ scratch space to carry over the ctr mov $fp, sp @ save sp @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key add sp, sp, #`128-32` @ size of bit-sliced key schedule #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key add r12, #`128-32` @ size of bit-sliced key schedule @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds mov r12, $keysched @ pass key schedule mov sp, r12 @ sp is $keysched bl _bsaes_key_convert veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} @ save last round key Loading @@ -1257,6 +1327,27 @@ bsaes_ctr32_encrypt_blocks: vld1.8 {@XMM[0]}, [$ctr] @ load counter add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr vldmia $keysched, {@XMM[4]} @ load round0 key #else ldr r12, [$key, #244] eors r12, #1 beq 0f @ populate the key schedule str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} @ save last round key .align 2 0: add r12, $key, #248 vld1.8 {@XMM[0]}, [$ctr] @ load counter adrl $ctr, .LREVM0SR @ borrow $ctr vldmia r12, {@XMM[4]} @ load round0 key sub sp, #0x10 @ place for adjusted round0 key #endif vmov.i32 @XMM[8],#1 @ compose 1<<96 veor @XMM[9],@XMM[9],@XMM[9] Loading @@ -1283,7 +1374,11 @@ bsaes_ctr32_encrypt_blocks: @ to flip byte order in 32-bit counter vldmia $keysched, {@XMM[9]} @ load round0 key #ifndef BSAES_ASM_EXTENDED_KEY add r4, $keysched, #0x10 @ pass next round key #else add r4, $key, #`248+16` #endif vldmia $ctr, {@XMM[8]} @ .LREVM0SR mov r5, $rounds @ pass rounds vstmia $fp, {@XMM[10]} @ save next counter Loading Loading @@ -1359,13 +1454,18 @@ bsaes_ctr32_encrypt_blocks: .Lctr_enc_done: vmov.i32 q0, #0 vmov.i32 q1, #0 #ifndef BSAES_ASM_EXTENDED_KEY .Lctr_enc_bzero: @ wipe key schedule [if any] vstmia $keysched!, {q0-q1} teq $keysched, $fp cmp $keysched, $fp bne .Lctr_enc_bzero #else vstmia $keysched, {q0-q1} #endif add sp, $fp, #0x10 vldmia sp!, {d8-d15} mov sp, $fp add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb VFP_ABI_POP ldmia sp!, {r4-r10, pc} @ return .align 4 Loading Loading @@ -1407,7 +1507,10 @@ bsaes_ctr32_encrypt_blocks: subs r6, r6, #1 bne .Lctr_enc_short_loop add sp, sp, #0x20 vmov.i32 q0, #0 vmov.i32 q1, #0 vstmia sp!, {q0-q1} ldmia sp!, {r4-r8, pc} .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks ___ Loading @@ -1428,41 +1531,66 @@ $code.=<<___; .type bsaes_xts_encrypt,%function .align 4 bsaes_xts_encrypt: mov ip, sp stmdb sp!, {r4-r10, lr} @ 0x20 vstmdb sp!, {d8-d15} @ 0x40 VFP_ABI_PUSH mov r6, sp @ future $fp sub sp, #0x10 @ 0x10 mov $inp, r0 mov $out, r1 mov $len, r2 mov $key, r3 bic sp, #0xf @ align at 16 bytes sub r0, sp, #0x10 @ 0x10 bic r0, #0xf @ align at 16 bytes mov sp, r0 #ifdef XTS_CHAIN_TWEAK ldr r0, [ip] @ pointer to input tweak #else @ generate initial tweak ldr r0, [r6, #0x64] @ iv[] ldr r0, [ip, #4] @ iv[] mov r1, sp ldr r2, [r6, #0x60] @ key2 ldr r2, [ip, #0] @ key2 bl AES_encrypt mov r0,sp @ pointer to initial tweak #endif @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds mov $fp, r6 mov r0, sp @ pointer to initial tweak sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add sp, sp, #`128-32` @ size of bit-sliced key schedule sub sp, sp, #`32+16` @ place for tweak[9] #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add r12, #`128-32` @ size of bit-sliced key schedule sub r12, #`32+16` @ place for tweak[9] @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, sp, #0x90 @ pass key schedule mov sp, r12 add r12, #0x90 @ pass key schedule bl _bsaes_key_convert veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} @ save last round key #else ldr r12, [$key, #244] eors r12, #1 beq 0f str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} .align 2 0: sub sp, #0x90 @ place for tweak[9] #endif vld1.8 {@XMM[8]}, [r0] @ initial tweak add $magic, $const, #.Lxts_magic-.LM0 adr $magic, .Lxts_magic subs $len, #0x80 blo .Lxts_enc_short Loading Loading @@ -1502,7 +1630,11 @@ $code.=<<___; vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds veor @XMM[7], @XMM[7], @XMM[15] Loading Loading @@ -1567,7 +1699,11 @@ $code.=<<___; vld1.8 {@XMM[6]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -1597,7 +1733,11 @@ $code.=<<___; vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak veor @XMM[4], @XMM[4], @XMM[12] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[5], @XMM[5], @XMM[13] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1619,12 +1759,22 @@ $code.=<<___; vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak b .Lxts_enc_done .align 4 @ put this in range for both ARM and Thumb mode adr instructions .align 5 .Lxts_magic: .quad 1, 0x87 .align 5 .Lxts_enc_5: vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak veor @XMM[3], @XMM[3], @XMM[11] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[4], @XMM[4], @XMM[12] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1650,7 +1800,11 @@ $code.=<<___; vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak veor @XMM[2], @XMM[2], @XMM[10] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[3], @XMM[3], @XMM[11] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1673,7 +1827,11 @@ $code.=<<___; vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak veor @XMM[1], @XMM[1], @XMM[9] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[2], @XMM[2], @XMM[10] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1695,7 +1853,11 @@ $code.=<<___; vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak veor @XMM[0], @XMM[0], @XMM[8] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[1], @XMM[1], @XMM[9] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -1728,6 +1890,7 @@ $code.=<<___; vmov @XMM[8], @XMM[9] @ next round tweak .Lxts_enc_done: #ifndef XTS_CHAIN_TWEAK adds $len, #0x10 beq .Lxts_enc_ret sub r6, $out, #0x10 Loading Loading @@ -1755,18 +1918,25 @@ $code.=<<___; veor @XMM[0], @XMM[0], @XMM[8] vst1.8 {@XMM[0]}, [r6] mov $fp, r4 #endif .Lxts_enc_ret: bic r0, $fp, #0xf vmov.i32 q0, #0 vmov.i32 q1, #0 #ifdef XTS_CHAIN_TWEAK ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak #endif .Lxts_enc_bzero: @ wipe key schedule [if any] vstmia sp!, {q0-q1} teq sp, r0 cmp sp, r0 bne .Lxts_enc_bzero mov sp, $fp vldmia sp!, {d8-d15} #ifdef XTS_CHAIN_TWEAK vst1.8 {@XMM[8]}, [r1] #endif VFP_ABI_POP ldmia sp!, {r4-r10, pc} @ return .size bsaes_xts_encrypt,.-bsaes_xts_encrypt Loading @@ -1775,46 +1945,74 @@ $code.=<<___; .type bsaes_xts_decrypt,%function .align 4 bsaes_xts_decrypt: mov ip, sp stmdb sp!, {r4-r10, lr} @ 0x20 vstmdb sp!, {d8-d15} @ 0x40 VFP_ABI_PUSH mov r6, sp @ future $fp sub sp, #0x10 @ 0x10 mov $inp, r0 mov $out, r1 mov $len, r2 mov $key, r3 bic sp, #0xf @ align at 16 bytes sub r0, sp, #0x10 @ 0x10 bic r0, #0xf @ align at 16 bytes mov sp, r0 #ifdef XTS_CHAIN_TWEAK ldr r0, [ip] @ pointer to input tweak #else @ generate initial tweak ldr r0, [r6, #0x64] @ iv[] ldr r0, [ip, #4] @ iv[] mov r1, sp ldr r2, [r6, #0x60] @ key2 ldr r2, [ip, #0] @ key2 bl AES_encrypt mov r0, sp @ pointer to initial tweak #endif @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds mov $fp, r6 mov r0, sp @ pointer to initial tweak sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add sp, sp, #`128-32` @ size of bit-sliced key schedule sub sp, sp, #`32+16` @ place for tweak[9] #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add r12, #`128-32` @ size of bit-sliced key schedule sub r12, #`32+16` @ place for tweak[9] @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, sp, #0x90 @ pass key schedule mov sp, r12 add r12, #0x90 @ pass key schedule bl _bsaes_key_convert add r4, sp, #0x90 vldmia r4, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia r4, {@XMM[7]} #else ldr r12, [$key, #244] eors r12, #1 beq 0f str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert add r4, $key, #248 vldmia r4, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia r4, {@XMM[7]} .align 2 0: sub sp, #0x90 @ place for tweak[9] #endif vld1.8 {@XMM[8]}, [r0] @ initial tweak add $magic, $const, #.Lxts_magic-.LM0 adr $magic, .Lxts_magic tst $len, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne $len, #0x10 @ subtract another 16 bytes subs $len, #0x80 Loading Loading @@ -1855,7 +2053,11 @@ $code.=<<___; vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds veor @XMM[7], @XMM[7], @XMM[15] Loading Loading @@ -1920,7 +2122,11 @@ $code.=<<___; vld1.8 {@XMM[6]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -1950,7 +2156,11 @@ $code.=<<___; vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak veor @XMM[4], @XMM[4], @XMM[12] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[5], @XMM[5], @XMM[13] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1977,7 +2187,11 @@ $code.=<<___; vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak veor @XMM[3], @XMM[3], @XMM[11] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[4], @XMM[4], @XMM[12] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -2003,7 +2217,11 @@ $code.=<<___; vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak veor @XMM[2], @XMM[2], @XMM[10] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[3], @XMM[3], @XMM[11] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -2026,7 +2244,11 @@ $code.=<<___; vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak veor @XMM[1], @XMM[1], @XMM[9] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[2], @XMM[2], @XMM[10] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -2048,7 +2270,11 @@ $code.=<<___; vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak veor @XMM[0], @XMM[0], @XMM[8] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[1], @XMM[1], @XMM[9] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -2083,6 +2309,7 @@ $code.=<<___; vmov @XMM[8], @XMM[9] @ next round tweak .Lxts_dec_done: #ifndef XTS_CHAIN_TWEAK adds $len, #0x10 beq .Lxts_dec_ret Loading Loading @@ -2132,18 +2359,25 @@ $code.=<<___; veor @XMM[0], @XMM[0], @XMM[8] vst1.8 {@XMM[0]}, [r6] mov $fp, r4 #endif .Lxts_dec_ret: bic r0, $fp, #0xf vmov.i32 q0, #0 vmov.i32 q1, #0 #ifdef XTS_CHAIN_TWEAK ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak #endif .Lxts_dec_bzero: @ wipe key schedule [if any] vstmia sp!, {q0-q1} teq sp, r0 cmp sp, r0 bne .Lxts_dec_bzero mov sp, $fp vldmia sp!, {d8-d15} #ifdef XTS_CHAIN_TWEAK vst1.8 {@XMM[8]}, [r1] #endif VFP_ABI_POP ldmia sp!, {r4-r10, pc} @ return .size bsaes_xts_decrypt,.-bsaes_xts_decrypt Loading @@ -2155,6 +2389,14 @@ ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; open SELF,$0; while(<SELF>) { next if (/^#!/); last if (!s/^#/@/ and !/^$/); print; } close SELF; print $code; close STDOUT; Loading
crypto/aes/asm/aes-armv4.pl +105 −28 Original line number Diff line number Diff line #!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. Loading Loading @@ -51,9 +51,18 @@ $key="r11"; $rounds="r12"; $code=<<___; #ifndef __KERNEL__ # include "arm_arch.h" #else # define __ARM_ARCH__ __LINUX_ARM_ARCH__ #endif .text #if __ARM_ARCH__<7 .code 32 #else .syntax unified #endif .type AES_Te,%object .align 5 Loading Loading @@ -167,7 +176,11 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: #if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt #else adr r3,AES_encrypt #endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 Loading Loading @@ -409,11 +422,21 @@ _armv4_AES_encrypt: .align 5 AES_set_encrypt_key: _armv4_AES_set_encrypt_key: #if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key #else adr r3,AES_set_encrypt_key #endif teq r0,#0 #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt teq r2,#0 #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt Loading @@ -422,6 +445,9 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 #if __ARM_ARCH__>=7 itt ne @ Thumb2 thing, sanity check in ARM #endif movne r0,#-1 bne .Labrt Loading Loading @@ -576,6 +602,9 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#216 beq .Ldone Loading Loading @@ -645,6 +674,9 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] #if __ARM_ARCH__>=7 itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#256 beq .Ldone Loading Loading @@ -674,11 +706,17 @@ _armv4_AES_set_encrypt_key: str $i3,[$key,#-4] b .L256_loop .align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} .Labrt: tst lr,#1 .Labrt: #if defined(__thumb2__) && __ARM_ARCH__>=7 .short 0x4770 @ bx lr in Thumb2 encoding #else tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif .size AES_set_encrypt_key,.-AES_set_encrypt_key .global AES_set_decrypt_key Loading @@ -688,34 +726,57 @@ AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 ldrne lr,[sp],#4 @ pop lr ldr lr,[sp],#4 @ pop lr bne .Labrt stmdb sp!,{r4-r12} mov r0,r2 @ AES_set_encrypt_key preserves r2, mov r1,r2 @ which is AES_KEY *key b _armv4_AES_set_enc2dec_key .size AES_set_decrypt_key,.-AES_set_decrypt_key ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, mov $key,r2 @ which is AES_KEY *key mov $i1,r2 add $i2,r2,$rounds,lsl#4 @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) .global AES_set_enc2dec_key .type AES_set_enc2dec_key,%function .align 5 AES_set_enc2dec_key: _armv4_AES_set_enc2dec_key: stmdb sp!,{r4-r12,lr} ldr $rounds,[r0,#240] mov $i1,r0 @ input add $i2,r0,$rounds,lsl#4 mov $key,r1 @ ouput add $tbl,r1,$rounds,lsl#4 str $rounds,[r1,#240] .Linv: ldr $s0,[$i1],#16 ldr $s1,[$i1,#-12] ldr $s2,[$i1,#-8] ldr $s3,[$i1,#-4] ldr $t1,[$i2],#-16 ldr $t2,[$i2,#16+4] ldr $t3,[$i2,#16+8] ldr $i3,[$i2,#16+12] str $s0,[$tbl],#-16 str $s1,[$tbl,#16+4] str $s2,[$tbl,#16+8] str $s3,[$tbl,#16+12] str $t1,[$key],#16 str $t2,[$key,#-12] str $t3,[$key,#-8] str $i3,[$key,#-4] teq $i1,$i2 bne .Linv .Linv: ldr $s0,[$i1] ldr $s0,[$i1] ldr $s1,[$i1,#4] ldr $s2,[$i1,#8] ldr $s3,[$i1,#12] ldr $t1,[$i2] ldr $t2,[$i2,#4] ldr $t3,[$i2,#8] ldr $i3,[$i2,#12] str $s0,[$i2],#-16 str $s1,[$i2,#16+4] str $s2,[$i2,#16+8] str $s3,[$i2,#16+12] str $t1,[$i1],#16 str $t2,[$i1,#-12] str $t3,[$i1,#-8] str $i3,[$i1,#-4] teq $i1,$i2 bne .Linv str $s0,[$key] str $s1,[$key,#4] str $s2,[$key,#8] str $s3,[$key,#12] sub $key,$key,$rounds,lsl#3 ___ $mask80=$i1; $mask1b=$i2; Loading Loading @@ -773,7 +834,7 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif .size AES_set_decrypt_key,.-AES_set_decrypt_key .size AES_set_enc2dec_key,.-AES_set_enc2dec_key .type AES_Td,%object .align 5 Loading Loading @@ -883,7 +944,11 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: #if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt #else adr r3,AES_decrypt #endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 Loading Loading @@ -1080,8 +1145,9 @@ _armv4_AES_decrypt: ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] and $i3,lr,$s1,lsr#8 add $s1,$tbl,$s1,lsr#24 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] ldrb $s1,[$s1] @ Td4[s1>>24] ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] eor $s0,$i1,$s0,lsl#24 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] Loading @@ -1094,7 +1160,8 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] and $i3,lr,$s2,lsr#16 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] add $s2,$tbl,$s2,lsr#24 ldrb $s2,[$s2] @ Td4[s2>>24] eor $s0,$s0,$i1,lsl#8 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] eor $s1,$i2,$s1,lsl#16 Loading @@ -1106,8 +1173,9 @@ _armv4_AES_decrypt: ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] and $i3,lr,$s3 @ i2 add $s3,$tbl,$s3,lsr#24 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] ldrb $s3,[$s3] @ Td4[s3>>24] eor $s0,$s0,$i1,lsl#16 ldr $i1,[$key,#0] eor $s1,$s1,$i2,lsl#8 Loading @@ -1130,5 +1198,14 @@ _armv4_AES_decrypt: ___ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 open SELF,$0; while(<SELF>) { next if (/^#!/); last if (!s/^#/@/ and !/^$/); print; } close SELF; print $code; close STDOUT; # enforce flush
crypto/aes/asm/bsaes-armv7.pl +297 −55 Original line number Diff line number Diff line Loading @@ -5,6 +5,10 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # # Specific modes and adaptation for Linux kernel by Ard Biesheuvel # <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is # granted. # ==================================================================== # Bit-sliced AES for ARM NEON Loading Loading @@ -37,6 +41,12 @@ # # <appro@openssl.org> # April-August 2013 # # Add CBC, CTR and XTS subroutines, adapt for kernel use. # # <ard.biesheuvel@linaro.org> while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; Loading Loading @@ -620,17 +630,34 @@ ___ } $code.=<<___; #ifndef __KERNEL__ # include "arm_arch.h" # define VFP_ABI_PUSH vstmdb sp!,{d8-d15} # define VFP_ABI_POP vldmia sp!,{d8-d15} # define VFP_ABI_FRAME 0x40 #else # define VFP_ABI_PUSH # define VFP_ABI_POP # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK # define __ARM_ARCH__ __LINUX_ARM_ARCH__ #endif #ifdef __thumb__ # define adrl adr #endif #if __ARM_ARCH__>=7 .text .code 32 .syntax unified @ ARMv7-capable assembler is expected to handle this .fpu neon .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: sub $const,pc,#8 @ _bsaes_decrypt8 adr $const,_bsaes_decrypt8 vldmia $key!, {@XMM[9]} @ round 0 key add $const,$const,#.LM0ISR-_bsaes_decrypt8 Loading Loading @@ -677,6 +704,7 @@ ___ &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); $code.=<<___; vldmia $const, {@XMM[12]} @ .LISR ite eq @ Thumb2 thing, sanity check in ARM addeq $const,$const,#0x10 bne .Ldec_loop vldmia $const, {@XMM[12]} @ .LISRM0 Loading Loading @@ -717,8 +745,6 @@ _bsaes_const: .quad 0x02060a0e03070b0f, 0x0004080c0105090d .LREVM0SR: .quad 0x090d01050c000408, 0x03070b0f060a0e02 .Lxts_magic: .quad 1, 0x87 .asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" .align 6 .size _bsaes_const,.-_bsaes_const Loading @@ -726,7 +752,7 @@ _bsaes_const: .type _bsaes_encrypt8,%function .align 4 _bsaes_encrypt8: sub $const,pc,#8 @ _bsaes_encrypt8 adr $const,_bsaes_encrypt8 vldmia $key!, {@XMM[9]} @ round 0 key sub $const,$const,#_bsaes_encrypt8-.LM0SR Loading Loading @@ -775,6 +801,7 @@ ___ &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); $code.=<<___; vldmia $const, {@XMM[12]} @ .LSR ite eq @ Thumb2 thing, samity check in ARM addeq $const,$const,#0x10 bne .Lenc_loop vldmia $const, {@XMM[12]} @ .LSRM0 Loading Loading @@ -829,7 +856,7 @@ $code.=<<___; .type _bsaes_key_convert,%function .align 4 _bsaes_key_convert: sub $const,pc,#8 @ _bsaes_key_convert adr $const,_bsaes_key_convert vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key sub $const,$const,#_bsaes_key_convert-.LM0 vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key Loading Loading @@ -998,32 +1025,62 @@ $code.=<<___; .type bsaes_cbc_encrypt,%function .align 5 bsaes_cbc_encrypt: #ifndef __KERNEL__ cmp $len, #128 #ifndef __thumb__ blo AES_cbc_encrypt #else bhs 1f b AES_cbc_encrypt 1: #endif #endif @ it is up to the caller to make sure we are called with enc == 0 mov ip, sp stmdb sp!, {r4-r10, lr} vstmdb sp!, {d8-d15} @ ABI specification says so ldr $ivp, [sp, #0x60] @ IV is 1st arg on the stack VFP_ABI_PUSH ldr $ivp, [ip] @ IV is 1st arg on the stack mov $len, $len, lsr#4 @ len in 16 byte blocks sub sp, #0x10 @ scratch space to carry over the IV mov $fp, sp @ save sp @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key add sp, sp, #`128-32` @ size of bit-sliced key schedule #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key add r12, #`128-32` @ sifze of bit-slices key schedule @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds mov r12, $keysched @ pass key schedule mov sp, r12 @ sp is $keysched bl _bsaes_key_convert vldmia $keysched, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia $keysched, {@XMM[7]} #else ldr r12, [$key, #244] eors r12, #1 beq 0f @ populate the key schedule str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert add r4, $key, #248 vldmia r4, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia r4, {@XMM[7]} .align 2 0: #endif vld1.8 {@XMM[15]}, [$ivp] @ load IV b .Lcbc_dec_loop Loading @@ -1035,7 +1092,11 @@ bsaes_cbc_encrypt: vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! #ifndef BSAES_ASM_EXTENDED_KEY mov r4, $keysched @ pass the key #else add r4, $key, #248 #endif vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! mov r5, $rounds vld1.8 {@XMM[6]-@XMM[7]}, [$inp] Loading Loading @@ -1075,7 +1136,11 @@ bsaes_cbc_encrypt: cmp $len, #2 blo .Lcbc_dec_one vld1.8 {@XMM[1]}, [$inp]! #ifndef BSAES_ASM_EXTENDED_KEY mov r4, $keysched @ pass the key #else add r4, $key, #248 #endif mov r5, $rounds vstmia $fp, {@XMM[15]} @ put aside IV beq .Lcbc_dec_two Loading Loading @@ -1207,16 +1272,19 @@ bsaes_cbc_encrypt: vst1.8 {@XMM[0]}, [$rounds] @ write output .Lcbc_dec_done: #ifndef BSAES_ASM_EXTENDED_KEY vmov.i32 q0, #0 vmov.i32 q1, #0 .Lcbc_dec_bzero: @ wipe key schedule [if any] vstmia $keysched!, {q0-q1} teq $keysched, $fp cmp $keysched, $fp bne .Lcbc_dec_bzero #endif add sp, $fp, #0x10 mov sp, $fp add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb vst1.8 {@XMM[15]}, [$ivp] @ return IV vldmia sp!, {d8-d15} VFP_ABI_POP ldmia sp!, {r4-r10, pc} .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt ___ Loading @@ -1235,21 +1303,23 @@ bsaes_ctr32_encrypt_blocks: cmp $len, #8 @ use plain AES for blo .Lctr_enc_short @ small sizes mov ip, sp stmdb sp!, {r4-r10, lr} vstmdb sp!, {d8-d15} @ ABI specification says so ldr $ctr, [sp, #0x60] @ ctr is 1st arg on the stack VFP_ABI_PUSH ldr $ctr, [ip] @ ctr is 1st arg on the stack sub sp, sp, #0x10 @ scratch space to carry over the ctr mov $fp, sp @ save sp @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key add sp, sp, #`128-32` @ size of bit-sliced key schedule #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key add r12, #`128-32` @ size of bit-sliced key schedule @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds mov r12, $keysched @ pass key schedule mov sp, r12 @ sp is $keysched bl _bsaes_key_convert veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} @ save last round key Loading @@ -1257,6 +1327,27 @@ bsaes_ctr32_encrypt_blocks: vld1.8 {@XMM[0]}, [$ctr] @ load counter add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr vldmia $keysched, {@XMM[4]} @ load round0 key #else ldr r12, [$key, #244] eors r12, #1 beq 0f @ populate the key schedule str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} @ save last round key .align 2 0: add r12, $key, #248 vld1.8 {@XMM[0]}, [$ctr] @ load counter adrl $ctr, .LREVM0SR @ borrow $ctr vldmia r12, {@XMM[4]} @ load round0 key sub sp, #0x10 @ place for adjusted round0 key #endif vmov.i32 @XMM[8],#1 @ compose 1<<96 veor @XMM[9],@XMM[9],@XMM[9] Loading @@ -1283,7 +1374,11 @@ bsaes_ctr32_encrypt_blocks: @ to flip byte order in 32-bit counter vldmia $keysched, {@XMM[9]} @ load round0 key #ifndef BSAES_ASM_EXTENDED_KEY add r4, $keysched, #0x10 @ pass next round key #else add r4, $key, #`248+16` #endif vldmia $ctr, {@XMM[8]} @ .LREVM0SR mov r5, $rounds @ pass rounds vstmia $fp, {@XMM[10]} @ save next counter Loading Loading @@ -1359,13 +1454,18 @@ bsaes_ctr32_encrypt_blocks: .Lctr_enc_done: vmov.i32 q0, #0 vmov.i32 q1, #0 #ifndef BSAES_ASM_EXTENDED_KEY .Lctr_enc_bzero: @ wipe key schedule [if any] vstmia $keysched!, {q0-q1} teq $keysched, $fp cmp $keysched, $fp bne .Lctr_enc_bzero #else vstmia $keysched, {q0-q1} #endif add sp, $fp, #0x10 vldmia sp!, {d8-d15} mov sp, $fp add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb VFP_ABI_POP ldmia sp!, {r4-r10, pc} @ return .align 4 Loading Loading @@ -1407,7 +1507,10 @@ bsaes_ctr32_encrypt_blocks: subs r6, r6, #1 bne .Lctr_enc_short_loop add sp, sp, #0x20 vmov.i32 q0, #0 vmov.i32 q1, #0 vstmia sp!, {q0-q1} ldmia sp!, {r4-r8, pc} .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks ___ Loading @@ -1428,41 +1531,66 @@ $code.=<<___; .type bsaes_xts_encrypt,%function .align 4 bsaes_xts_encrypt: mov ip, sp stmdb sp!, {r4-r10, lr} @ 0x20 vstmdb sp!, {d8-d15} @ 0x40 VFP_ABI_PUSH mov r6, sp @ future $fp sub sp, #0x10 @ 0x10 mov $inp, r0 mov $out, r1 mov $len, r2 mov $key, r3 bic sp, #0xf @ align at 16 bytes sub r0, sp, #0x10 @ 0x10 bic r0, #0xf @ align at 16 bytes mov sp, r0 #ifdef XTS_CHAIN_TWEAK ldr r0, [ip] @ pointer to input tweak #else @ generate initial tweak ldr r0, [r6, #0x64] @ iv[] ldr r0, [ip, #4] @ iv[] mov r1, sp ldr r2, [r6, #0x60] @ key2 ldr r2, [ip, #0] @ key2 bl AES_encrypt mov r0,sp @ pointer to initial tweak #endif @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds mov $fp, r6 mov r0, sp @ pointer to initial tweak sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add sp, sp, #`128-32` @ size of bit-sliced key schedule sub sp, sp, #`32+16` @ place for tweak[9] #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add r12, #`128-32` @ size of bit-sliced key schedule sub r12, #`32+16` @ place for tweak[9] @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, sp, #0x90 @ pass key schedule mov sp, r12 add r12, #0x90 @ pass key schedule bl _bsaes_key_convert veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} @ save last round key #else ldr r12, [$key, #244] eors r12, #1 beq 0f str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key vstmia r12, {@XMM[7]} .align 2 0: sub sp, #0x90 @ place for tweak[9] #endif vld1.8 {@XMM[8]}, [r0] @ initial tweak add $magic, $const, #.Lxts_magic-.LM0 adr $magic, .Lxts_magic subs $len, #0x80 blo .Lxts_enc_short Loading Loading @@ -1502,7 +1630,11 @@ $code.=<<___; vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds veor @XMM[7], @XMM[7], @XMM[15] Loading Loading @@ -1567,7 +1699,11 @@ $code.=<<___; vld1.8 {@XMM[6]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -1597,7 +1733,11 @@ $code.=<<___; vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak veor @XMM[4], @XMM[4], @XMM[12] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[5], @XMM[5], @XMM[13] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1619,12 +1759,22 @@ $code.=<<___; vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak b .Lxts_enc_done .align 4 @ put this in range for both ARM and Thumb mode adr instructions .align 5 .Lxts_magic: .quad 1, 0x87 .align 5 .Lxts_enc_5: vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak veor @XMM[3], @XMM[3], @XMM[11] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[4], @XMM[4], @XMM[12] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1650,7 +1800,11 @@ $code.=<<___; vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak veor @XMM[2], @XMM[2], @XMM[10] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[3], @XMM[3], @XMM[11] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1673,7 +1827,11 @@ $code.=<<___; vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak veor @XMM[1], @XMM[1], @XMM[9] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[2], @XMM[2], @XMM[10] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1695,7 +1853,11 @@ $code.=<<___; vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak veor @XMM[0], @XMM[0], @XMM[8] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[1], @XMM[1], @XMM[9] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -1728,6 +1890,7 @@ $code.=<<___; vmov @XMM[8], @XMM[9] @ next round tweak .Lxts_enc_done: #ifndef XTS_CHAIN_TWEAK adds $len, #0x10 beq .Lxts_enc_ret sub r6, $out, #0x10 Loading Loading @@ -1755,18 +1918,25 @@ $code.=<<___; veor @XMM[0], @XMM[0], @XMM[8] vst1.8 {@XMM[0]}, [r6] mov $fp, r4 #endif .Lxts_enc_ret: bic r0, $fp, #0xf vmov.i32 q0, #0 vmov.i32 q1, #0 #ifdef XTS_CHAIN_TWEAK ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak #endif .Lxts_enc_bzero: @ wipe key schedule [if any] vstmia sp!, {q0-q1} teq sp, r0 cmp sp, r0 bne .Lxts_enc_bzero mov sp, $fp vldmia sp!, {d8-d15} #ifdef XTS_CHAIN_TWEAK vst1.8 {@XMM[8]}, [r1] #endif VFP_ABI_POP ldmia sp!, {r4-r10, pc} @ return .size bsaes_xts_encrypt,.-bsaes_xts_encrypt Loading @@ -1775,46 +1945,74 @@ $code.=<<___; .type bsaes_xts_decrypt,%function .align 4 bsaes_xts_decrypt: mov ip, sp stmdb sp!, {r4-r10, lr} @ 0x20 vstmdb sp!, {d8-d15} @ 0x40 VFP_ABI_PUSH mov r6, sp @ future $fp sub sp, #0x10 @ 0x10 mov $inp, r0 mov $out, r1 mov $len, r2 mov $key, r3 bic sp, #0xf @ align at 16 bytes sub r0, sp, #0x10 @ 0x10 bic r0, #0xf @ align at 16 bytes mov sp, r0 #ifdef XTS_CHAIN_TWEAK ldr r0, [ip] @ pointer to input tweak #else @ generate initial tweak ldr r0, [r6, #0x64] @ iv[] ldr r0, [ip, #4] @ iv[] mov r1, sp ldr r2, [r6, #0x60] @ key2 ldr r2, [ip, #0] @ key2 bl AES_encrypt mov r0, sp @ pointer to initial tweak #endif @ allocate the key schedule on the stack ldr $rounds, [$key, #240] @ get # of rounds mov $fp, r6 mov r0, sp @ pointer to initial tweak sub sp, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add sp, sp, #`128-32` @ size of bit-sliced key schedule sub sp, sp, #`32+16` @ place for tweak[9] #ifndef BSAES_ASM_EXTENDED_KEY @ allocate the key schedule on the stack sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key @ add r12, #`128-32` @ size of bit-sliced key schedule sub r12, #`32+16` @ place for tweak[9] @ populate the key schedule mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, sp, #0x90 @ pass key schedule mov sp, r12 add r12, #0x90 @ pass key schedule bl _bsaes_key_convert add r4, sp, #0x90 vldmia r4, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia r4, {@XMM[7]} #else ldr r12, [$key, #244] eors r12, #1 beq 0f str r12, [$key, #244] mov r4, $key @ pass key mov r5, $rounds @ pass # of rounds add r12, $key, #248 @ pass key schedule bl _bsaes_key_convert add r4, $key, #248 vldmia r4, {@XMM[6]} vstmia r12, {@XMM[15]} @ save last round key veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key vstmia r4, {@XMM[7]} .align 2 0: sub sp, #0x90 @ place for tweak[9] #endif vld1.8 {@XMM[8]}, [r0] @ initial tweak add $magic, $const, #.Lxts_magic-.LM0 adr $magic, .Lxts_magic tst $len, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne $len, #0x10 @ subtract another 16 bytes subs $len, #0x80 Loading Loading @@ -1855,7 +2053,11 @@ $code.=<<___; vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds veor @XMM[7], @XMM[7], @XMM[15] Loading Loading @@ -1920,7 +2122,11 @@ $code.=<<___; vld1.8 {@XMM[6]}, [$inp]! veor @XMM[5], @XMM[5], @XMM[13] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[6], @XMM[6], @XMM[14] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -1950,7 +2156,11 @@ $code.=<<___; vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak veor @XMM[4], @XMM[4], @XMM[12] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[5], @XMM[5], @XMM[13] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -1977,7 +2187,11 @@ $code.=<<___; vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak veor @XMM[3], @XMM[3], @XMM[11] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[4], @XMM[4], @XMM[12] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -2003,7 +2217,11 @@ $code.=<<___; vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak veor @XMM[2], @XMM[2], @XMM[10] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[3], @XMM[3], @XMM[11] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -2026,7 +2244,11 @@ $code.=<<___; vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak veor @XMM[1], @XMM[1], @XMM[9] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[2], @XMM[2], @XMM[10] mov r5, $rounds @ pass rounds mov r0, sp Loading @@ -2048,7 +2270,11 @@ $code.=<<___; vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak veor @XMM[0], @XMM[0], @XMM[8] #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule #else add r4, $key, #248 @ pass key schedule #endif veor @XMM[1], @XMM[1], @XMM[9] mov r5, $rounds @ pass rounds mov r0, sp Loading Loading @@ -2083,6 +2309,7 @@ $code.=<<___; vmov @XMM[8], @XMM[9] @ next round tweak .Lxts_dec_done: #ifndef XTS_CHAIN_TWEAK adds $len, #0x10 beq .Lxts_dec_ret Loading Loading @@ -2132,18 +2359,25 @@ $code.=<<___; veor @XMM[0], @XMM[0], @XMM[8] vst1.8 {@XMM[0]}, [r6] mov $fp, r4 #endif .Lxts_dec_ret: bic r0, $fp, #0xf vmov.i32 q0, #0 vmov.i32 q1, #0 #ifdef XTS_CHAIN_TWEAK ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak #endif .Lxts_dec_bzero: @ wipe key schedule [if any] vstmia sp!, {q0-q1} teq sp, r0 cmp sp, r0 bne .Lxts_dec_bzero mov sp, $fp vldmia sp!, {d8-d15} #ifdef XTS_CHAIN_TWEAK vst1.8 {@XMM[8]}, [r1] #endif VFP_ABI_POP ldmia sp!, {r4-r10, pc} @ return .size bsaes_xts_decrypt,.-bsaes_xts_decrypt Loading @@ -2155,6 +2389,14 @@ ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; open SELF,$0; while(<SELF>) { next if (/^#!/); last if (!s/^#/@/ and !/^$/); print; } close SELF; print $code; close STDOUT;