Loading crypto/aes/asm/aes-armv4.pl +10 −14 Original line number Diff line number Diff line Loading @@ -70,16 +70,12 @@ $code=<<___; #endif .text #if __ARM_ARCH__<7 .code 32 #else .syntax unified #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif #endif .type AES_Te,%object .align 5 Loading Loading @@ -193,7 +189,7 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: #if __ARM_ARCH__<7 #ifndef __thumb2__ sub r3,pc,#8 @ AES_encrypt #else adr r3,AES_encrypt Loading Loading @@ -443,19 +439,19 @@ _armv4_AES_encrypt: .align 5 AES_set_encrypt_key: _armv4_AES_set_encrypt_key: #if __ARM_ARCH__<7 #ifndef __thumb2__ sub r3,pc,#8 @ AES_set_encrypt_key #else adr r3,AES_set_encrypt_key #endif teq r0,#0 #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt teq r2,#0 #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 Loading @@ -466,7 +462,7 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt ne @ Thumb2 thing, sanity check in ARM #endif movne r0,#-1 Loading Loading @@ -627,7 +623,7 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#216 Loading Loading @@ -699,7 +695,7 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#256 Loading Loading @@ -969,7 +965,7 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: #if __ARM_ARCH__<7 #ifndef __thumb2__ sub r3,pc,#8 @ AES_decrypt #else adr r3,AES_decrypt Loading crypto/armv4cpuid.pl +24 −0 Original line number Diff line number Diff line Loading @@ -15,7 +15,12 @@ $code.=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif .align 5 .global OPENSSL_atomic_add Loading Loading @@ -59,6 +64,9 @@ OPENSSL_atomic_add: OPENSSL_cleanse: eor ip,ip,ip cmp r1,#7 #ifdef __thumb2__ itt hs #endif subhs r1,r1,#4 bhs .Lot cmp r1,#0 Loading Loading @@ -116,27 +124,43 @@ _armv7_tick: .global _armv8_aes_probe .type _armv8_aes_probe,%function _armv8_aes_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0 #else .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0 #endif bx lr .size _armv8_aes_probe,.-_armv8_aes_probe .global _armv8_sha1_probe .type _armv8_sha1_probe,%function _armv8_sha1_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0 #else .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0 #endif bx lr .size _armv8_sha1_probe,.-_armv8_sha1_probe .global _armv8_sha256_probe .type _armv8_sha256_probe,%function _armv8_sha256_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0 #else .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0 #endif bx lr .size _armv8_sha256_probe,.-_armv8_sha256_probe .global _armv8_pmull_probe .type _armv8_pmull_probe,%function _armv8_pmull_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0 #else .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0 #endif bx lr .size _armv8_pmull_probe,.-_armv8_pmull_probe #endif Loading crypto/bn/asm/armv4-gf2m.pl +30 −6 Original line number Diff line number Diff line Loading @@ -51,7 +51,12 @@ $code=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif ___ ################ # private interface to mul_1x1_ialu Loading Loading @@ -132,11 +137,17 @@ mul_1x1_ialu: eor $hi,$hi,$t0,lsr#8 ldr $t0,[sp,$i0] @ tab[b >> 30 ] #ifdef __thumb2__ itt ne #endif eorne $lo,$lo,$b,lsl#30 eorne $hi,$hi,$b,lsr#2 tst $a,#1<<31 eor $lo,$lo,$t1,lsl#27 eor $hi,$hi,$t1,lsr#5 #ifdef __thumb2__ itt ne #endif eorne $lo,$lo,$b,lsl#31 eorne $hi,$hi,$b,lsr#1 eor $lo,$lo,$t0,lsl#30 Loading @@ -156,20 +167,33 @@ $code.=<<___; .align 5 bn_GF2m_mul_2x2: #if __ARM_MAX_ARCH__>=7 stmdb sp!,{r10,lr} ldr r12,.LOPENSSL_armcap .Lpic: ldr r12,[pc,r12] tst r12,#1 adr r10,.LOPENSSL_armcap ldr r12,[r12,r10] #ifdef __APPLE__ ldr r12,[r12] #endif tst r12,#ARMV7_NEON itt ne ldrne r10,[sp],#8 bne .LNEON stmdb sp!,{r4-r9} #else stmdb sp!,{r4-r10,lr} #endif ___ $ret="r10"; # reassigned 1st argument $code.=<<___; stmdb sp!,{r4-r10,lr} mov $ret,r0 @ reassign 1st argument mov $b,r3 @ $b=b1 sub r7,sp,#36 mov r8,sp and r7,r7,#-32 ldr r3,[sp,#32] @ load b0 mov $mask,#7<<2 sub sp,sp,#32 @ allocate tab[8] mov sp,r7 @ allocate tab[8] str r8,[r7,#32] bl mul_1x1_ialu @ a1·b1 str $lo,[$ret,#8] Loading @@ -193,6 +217,7 @@ ___ $code.=<<___; ldmia $ret,{@r[0]-@r[3]} eor $lo,$lo,$hi ldr sp,[sp,#32] @ destroy tab[8] eor $hi,$hi,@r[1] eor $lo,$lo,@r[0] eor $hi,$hi,@r[2] Loading @@ -200,7 +225,6 @@ $code.=<<___; eor $hi,$hi,@r[3] str $hi,[$ret,#8] eor $lo,$lo,$hi add sp,sp,#32 @ destroy tab[8] str $lo,[$ret,#4] #if __ARM_ARCH__>=5 Loading Loading @@ -279,7 +303,7 @@ $code.=<<___; #if __ARM_MAX_ARCH__>=7 .align 5 .LOPENSSL_armcap: .word OPENSSL_armcap_P-(.Lpic+8) .word OPENSSL_armcap_P-. #endif .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .align 5 Loading crypto/bn/asm/armv4-mont.pl +29 −6 Original line number Diff line number Diff line Loading @@ -82,7 +82,12 @@ $code=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif #if __ARM_MAX_ARCH__>=7 .align 5 Loading @@ -101,7 +106,7 @@ bn_mul_mont: #if __ARM_MAX_ARCH__>=7 tst ip,#7 bne .Lialu adr r0,bn_mul_mont adr r0,.Lbn_mul_mont ldr r2,.LOPENSSL_armcap ldr r0,[r0,r2] #ifdef __APPLE__ Loading @@ -117,6 +122,9 @@ bn_mul_mont: #endif cmp ip,#2 mov $num,ip @ load num #ifdef __thumb2__ ittt lt #endif movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt Loading Loading @@ -164,10 +172,11 @@ bn_mul_mont: ldr $n0,[$_n0] @ restore n0 adc $nhi,$nhi,#0 str $nlo,[$num] @ tp[num-1]= mov $tj,sp str $nhi,[$num,#4] @ tp[num]= .Louter: sub $tj,$num,sp @ "original" $num-1 value sub $tj,$num,$tj @ "original" $num-1 value sub $ap,$ap,$tj @ "rewind" ap to &ap[1] ldr $bi,[$tp,#4]! @ *(++bp) sub $np,$np,$tj @ "rewind" np to &np[1] Loading Loading @@ -212,11 +221,16 @@ bn_mul_mont: str $nhi,[$num,#4] @ tp[num]= cmp $tp,$tj #ifdef __thumb2__ itt ne #endif movne $tj,sp bne .Louter ldr $rp,[$_rp] @ pull rp mov $aj,sp add $num,$num,#4 @ $num to point at &tp[num] sub $aj,$num,sp @ "original" num value sub $aj,$num,$aj @ "original" num value mov $tp,sp @ "rewind" $tp mov $ap,$tp @ "borrow" $ap sub $np,$np,$aj @ "rewind" $np to &np[0] Loading @@ -242,7 +256,8 @@ bn_mul_mont: cmp $tp,$num bne .Lcopy add sp,$num,#4 @ skip over tp[num+1] mov sp,$num add sp,sp,#4 @ skip over tp[num+1] ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 Loading Loading @@ -283,6 +298,7 @@ bn_mul8x_mont_neon: stmdb sp!,{r4-r11} vstmdb sp!,{d8-d15} @ ABI specification says so ldmia ip,{r4-r5} @ load rest of parameter block mov ip,sp sub $toutptr,sp,#16 vld1.32 {${Bi}[0]}, [$bptr,:32]! Loading Loading @@ -638,8 +654,9 @@ bn_mul8x_mont_neon: bne .LNEON_sub ldr r10, [$aptr] @ load top-most bit mov r11,sp veor q0,q0,q0 sub r11,$bptr,sp @ this is num*4 sub r11,$bptr,r11 @ this is num*4 veor q1,q1,q1 mov $aptr,sp sub $rptr,$rptr,r11 @ rewind $rptr Loading @@ -649,27 +666,33 @@ bn_mul8x_mont_neon: .LNEON_copy_n_zap: ldmia $aptr!, {r4-r7} ldmia $rptr, {r8-r11} it cc movcc r8, r4 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe itt cc movcc r9, r5 movcc r10,r6 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe it cc movcc r11,r7 ldmia $aptr, {r4-r7} stmia $rptr!, {r8-r11} sub $aptr,$aptr,#16 ldmia $rptr, {r8-r11} it cc movcc r8, r4 vst1.64 {q0-q1}, [$aptr,:256]! @ wipe itt cc movcc r9, r5 movcc r10,r6 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe it cc movcc r11,r7 teq $aptr,$bptr @ preserves carry stmia $rptr!, {r8-r11} bne .LNEON_copy_n_zap sub sp,ip,#96 mov sp,ip vldmia sp!,{d8-d15} ldmia sp!,{r4-r11} ret @ bx lr Loading crypto/ec/asm/ecp_nistz256-armv4.pl +38 −0 Original line number Diff line number Diff line Loading @@ -45,7 +45,12 @@ $code.=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif ___ ######################################################################## # Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7 Loading Loading @@ -162,6 +167,9 @@ __ecp_nistz256_mul_by_2: adcs $a6,$a6,$a6 mov $ff,#0 adcs $a7,$a7,$a7 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0 b .Lreduce_by_sub Loading Loading @@ -213,6 +221,9 @@ __ecp_nistz256_add: adcs $a6,$a6,$t2 mov $ff,#0 adcs $a7,$a7,$t3 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry ldr lr,[sp],#4 @ pop lr Loading Loading @@ -286,6 +297,9 @@ __ecp_nistz256_mul_by_3: adcs $a6,$a6,$a6 mov $ff,#0 adcs $a7,$a7,$a7 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry subs $a0,$a0,$ff @ subtract synthesized modulus, see Loading Loading @@ -318,6 +332,9 @@ __ecp_nistz256_mul_by_3: adcs $a6,$a6,$t2 mov $ff,#0 adcs $a7,$a7,$t3 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry ldr lr,[sp],#4 @ pop lr Loading Loading @@ -781,6 +798,9 @@ ecp_nistz256_gather_w5: cmp $index,#0 mov $mask,#0 #ifdef __thumb2__ itt ne #endif subne $index,$index,#1 movne $mask,#-1 add $inp,$inp,$index,lsl#2 Loading Loading @@ -887,6 +907,9 @@ ecp_nistz256_gather_w7: cmp $index,#0 mov $mask,#0 #ifdef __thumb2__ itt ne #endif subne $index,$index,#1 movne $mask,#-1 add $inp,$inp,$index Loading Loading @@ -1180,6 +1203,9 @@ __ecp_nistz256_add_self: adcs $a6,$a6,$a6 mov $ff,#0 adcs $a7,$a7,$a7 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0 subs $a0,$a0,$ff @ subtract synthesized modulus Loading Loading @@ -1369,6 +1395,9 @@ ecp_nistz256_point_add: stmia r3!,{r4-r11} ldmia $b_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 stmia r3,{r4-r11} str r12,[sp,#32*18+8] @ !in2infty Loading @@ -1395,6 +1424,9 @@ ecp_nistz256_point_add: stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 stmia r3,{r4-r11} str r12,[sp,#32*18+4] @ !in1infty Loading Loading @@ -1636,6 +1668,9 @@ ecp_nistz256_point_add_affine: stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 stmia r3,{r4-r11} str r12,[sp,#32*15+4] @ !in1infty Loading @@ -1661,6 +1696,9 @@ ecp_nistz256_point_add_affine: orr r12,r12,r11 stmia r3!,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 str r12,[sp,#32*15+8] @ !in2infty Loading Loading
crypto/aes/asm/aes-armv4.pl +10 −14 Original line number Diff line number Diff line Loading @@ -70,16 +70,12 @@ $code=<<___; #endif .text #if __ARM_ARCH__<7 .code 32 #else .syntax unified #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif #endif .type AES_Te,%object .align 5 Loading Loading @@ -193,7 +189,7 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: #if __ARM_ARCH__<7 #ifndef __thumb2__ sub r3,pc,#8 @ AES_encrypt #else adr r3,AES_encrypt Loading Loading @@ -443,19 +439,19 @@ _armv4_AES_encrypt: .align 5 AES_set_encrypt_key: _armv4_AES_set_encrypt_key: #if __ARM_ARCH__<7 #ifndef __thumb2__ sub r3,pc,#8 @ AES_set_encrypt_key #else adr r3,AES_set_encrypt_key #endif teq r0,#0 #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt teq r2,#0 #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 Loading @@ -466,7 +462,7 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt ne @ Thumb2 thing, sanity check in ARM #endif movne r0,#-1 Loading Loading @@ -627,7 +623,7 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#216 Loading Loading @@ -699,7 +695,7 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] #if __ARM_ARCH__>=7 #ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#256 Loading Loading @@ -969,7 +965,7 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: #if __ARM_ARCH__<7 #ifndef __thumb2__ sub r3,pc,#8 @ AES_decrypt #else adr r3,AES_decrypt Loading
crypto/armv4cpuid.pl +24 −0 Original line number Diff line number Diff line Loading @@ -15,7 +15,12 @@ $code.=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif .align 5 .global OPENSSL_atomic_add Loading Loading @@ -59,6 +64,9 @@ OPENSSL_atomic_add: OPENSSL_cleanse: eor ip,ip,ip cmp r1,#7 #ifdef __thumb2__ itt hs #endif subhs r1,r1,#4 bhs .Lot cmp r1,#0 Loading Loading @@ -116,27 +124,43 @@ _armv7_tick: .global _armv8_aes_probe .type _armv8_aes_probe,%function _armv8_aes_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0 #else .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0 #endif bx lr .size _armv8_aes_probe,.-_armv8_aes_probe .global _armv8_sha1_probe .type _armv8_sha1_probe,%function _armv8_sha1_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0 #else .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0 #endif bx lr .size _armv8_sha1_probe,.-_armv8_sha1_probe .global _armv8_sha256_probe .type _armv8_sha256_probe,%function _armv8_sha256_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0 #else .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0 #endif bx lr .size _armv8_sha256_probe,.-_armv8_sha256_probe .global _armv8_pmull_probe .type _armv8_pmull_probe,%function _armv8_pmull_probe: #if defined(__thumb2__) && !defined(__APPLE__) .byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0 #else .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0 #endif bx lr .size _armv8_pmull_probe,.-_armv8_pmull_probe #endif Loading
crypto/bn/asm/armv4-gf2m.pl +30 −6 Original line number Diff line number Diff line Loading @@ -51,7 +51,12 @@ $code=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif ___ ################ # private interface to mul_1x1_ialu Loading Loading @@ -132,11 +137,17 @@ mul_1x1_ialu: eor $hi,$hi,$t0,lsr#8 ldr $t0,[sp,$i0] @ tab[b >> 30 ] #ifdef __thumb2__ itt ne #endif eorne $lo,$lo,$b,lsl#30 eorne $hi,$hi,$b,lsr#2 tst $a,#1<<31 eor $lo,$lo,$t1,lsl#27 eor $hi,$hi,$t1,lsr#5 #ifdef __thumb2__ itt ne #endif eorne $lo,$lo,$b,lsl#31 eorne $hi,$hi,$b,lsr#1 eor $lo,$lo,$t0,lsl#30 Loading @@ -156,20 +167,33 @@ $code.=<<___; .align 5 bn_GF2m_mul_2x2: #if __ARM_MAX_ARCH__>=7 stmdb sp!,{r10,lr} ldr r12,.LOPENSSL_armcap .Lpic: ldr r12,[pc,r12] tst r12,#1 adr r10,.LOPENSSL_armcap ldr r12,[r12,r10] #ifdef __APPLE__ ldr r12,[r12] #endif tst r12,#ARMV7_NEON itt ne ldrne r10,[sp],#8 bne .LNEON stmdb sp!,{r4-r9} #else stmdb sp!,{r4-r10,lr} #endif ___ $ret="r10"; # reassigned 1st argument $code.=<<___; stmdb sp!,{r4-r10,lr} mov $ret,r0 @ reassign 1st argument mov $b,r3 @ $b=b1 sub r7,sp,#36 mov r8,sp and r7,r7,#-32 ldr r3,[sp,#32] @ load b0 mov $mask,#7<<2 sub sp,sp,#32 @ allocate tab[8] mov sp,r7 @ allocate tab[8] str r8,[r7,#32] bl mul_1x1_ialu @ a1·b1 str $lo,[$ret,#8] Loading @@ -193,6 +217,7 @@ ___ $code.=<<___; ldmia $ret,{@r[0]-@r[3]} eor $lo,$lo,$hi ldr sp,[sp,#32] @ destroy tab[8] eor $hi,$hi,@r[1] eor $lo,$lo,@r[0] eor $hi,$hi,@r[2] Loading @@ -200,7 +225,6 @@ $code.=<<___; eor $hi,$hi,@r[3] str $hi,[$ret,#8] eor $lo,$lo,$hi add sp,sp,#32 @ destroy tab[8] str $lo,[$ret,#4] #if __ARM_ARCH__>=5 Loading Loading @@ -279,7 +303,7 @@ $code.=<<___; #if __ARM_MAX_ARCH__>=7 .align 5 .LOPENSSL_armcap: .word OPENSSL_armcap_P-(.Lpic+8) .word OPENSSL_armcap_P-. #endif .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .align 5 Loading
crypto/bn/asm/armv4-mont.pl +29 −6 Original line number Diff line number Diff line Loading @@ -82,7 +82,12 @@ $code=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif #if __ARM_MAX_ARCH__>=7 .align 5 Loading @@ -101,7 +106,7 @@ bn_mul_mont: #if __ARM_MAX_ARCH__>=7 tst ip,#7 bne .Lialu adr r0,bn_mul_mont adr r0,.Lbn_mul_mont ldr r2,.LOPENSSL_armcap ldr r0,[r0,r2] #ifdef __APPLE__ Loading @@ -117,6 +122,9 @@ bn_mul_mont: #endif cmp ip,#2 mov $num,ip @ load num #ifdef __thumb2__ ittt lt #endif movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt Loading Loading @@ -164,10 +172,11 @@ bn_mul_mont: ldr $n0,[$_n0] @ restore n0 adc $nhi,$nhi,#0 str $nlo,[$num] @ tp[num-1]= mov $tj,sp str $nhi,[$num,#4] @ tp[num]= .Louter: sub $tj,$num,sp @ "original" $num-1 value sub $tj,$num,$tj @ "original" $num-1 value sub $ap,$ap,$tj @ "rewind" ap to &ap[1] ldr $bi,[$tp,#4]! @ *(++bp) sub $np,$np,$tj @ "rewind" np to &np[1] Loading Loading @@ -212,11 +221,16 @@ bn_mul_mont: str $nhi,[$num,#4] @ tp[num]= cmp $tp,$tj #ifdef __thumb2__ itt ne #endif movne $tj,sp bne .Louter ldr $rp,[$_rp] @ pull rp mov $aj,sp add $num,$num,#4 @ $num to point at &tp[num] sub $aj,$num,sp @ "original" num value sub $aj,$num,$aj @ "original" num value mov $tp,sp @ "rewind" $tp mov $ap,$tp @ "borrow" $ap sub $np,$np,$aj @ "rewind" $np to &np[0] Loading @@ -242,7 +256,8 @@ bn_mul_mont: cmp $tp,$num bne .Lcopy add sp,$num,#4 @ skip over tp[num+1] mov sp,$num add sp,sp,#4 @ skip over tp[num+1] ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 Loading Loading @@ -283,6 +298,7 @@ bn_mul8x_mont_neon: stmdb sp!,{r4-r11} vstmdb sp!,{d8-d15} @ ABI specification says so ldmia ip,{r4-r5} @ load rest of parameter block mov ip,sp sub $toutptr,sp,#16 vld1.32 {${Bi}[0]}, [$bptr,:32]! Loading Loading @@ -638,8 +654,9 @@ bn_mul8x_mont_neon: bne .LNEON_sub ldr r10, [$aptr] @ load top-most bit mov r11,sp veor q0,q0,q0 sub r11,$bptr,sp @ this is num*4 sub r11,$bptr,r11 @ this is num*4 veor q1,q1,q1 mov $aptr,sp sub $rptr,$rptr,r11 @ rewind $rptr Loading @@ -649,27 +666,33 @@ bn_mul8x_mont_neon: .LNEON_copy_n_zap: ldmia $aptr!, {r4-r7} ldmia $rptr, {r8-r11} it cc movcc r8, r4 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe itt cc movcc r9, r5 movcc r10,r6 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe it cc movcc r11,r7 ldmia $aptr, {r4-r7} stmia $rptr!, {r8-r11} sub $aptr,$aptr,#16 ldmia $rptr, {r8-r11} it cc movcc r8, r4 vst1.64 {q0-q1}, [$aptr,:256]! @ wipe itt cc movcc r9, r5 movcc r10,r6 vst1.64 {q0-q1}, [$nptr,:256]! @ wipe it cc movcc r11,r7 teq $aptr,$bptr @ preserves carry stmia $rptr!, {r8-r11} bne .LNEON_copy_n_zap sub sp,ip,#96 mov sp,ip vldmia sp!,{d8-d15} ldmia sp!,{r4-r11} ret @ bx lr Loading
crypto/ec/asm/ecp_nistz256-armv4.pl +38 −0 Original line number Diff line number Diff line Loading @@ -45,7 +45,12 @@ $code.=<<___; #include "arm_arch.h" .text #if defined(__thumb2__) && !defined(__APPLE__) .syntax unified .thumb #else .code 32 #endif ___ ######################################################################## # Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7 Loading Loading @@ -162,6 +167,9 @@ __ecp_nistz256_mul_by_2: adcs $a6,$a6,$a6 mov $ff,#0 adcs $a7,$a7,$a7 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0 b .Lreduce_by_sub Loading Loading @@ -213,6 +221,9 @@ __ecp_nistz256_add: adcs $a6,$a6,$t2 mov $ff,#0 adcs $a7,$a7,$t3 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry ldr lr,[sp],#4 @ pop lr Loading Loading @@ -286,6 +297,9 @@ __ecp_nistz256_mul_by_3: adcs $a6,$a6,$a6 mov $ff,#0 adcs $a7,$a7,$a7 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry subs $a0,$a0,$ff @ subtract synthesized modulus, see Loading Loading @@ -318,6 +332,9 @@ __ecp_nistz256_mul_by_3: adcs $a6,$a6,$t2 mov $ff,#0 adcs $a7,$a7,$t3 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry ldr lr,[sp],#4 @ pop lr Loading Loading @@ -781,6 +798,9 @@ ecp_nistz256_gather_w5: cmp $index,#0 mov $mask,#0 #ifdef __thumb2__ itt ne #endif subne $index,$index,#1 movne $mask,#-1 add $inp,$inp,$index,lsl#2 Loading Loading @@ -887,6 +907,9 @@ ecp_nistz256_gather_w7: cmp $index,#0 mov $mask,#0 #ifdef __thumb2__ itt ne #endif subne $index,$index,#1 movne $mask,#-1 add $inp,$inp,$index Loading Loading @@ -1180,6 +1203,9 @@ __ecp_nistz256_add_self: adcs $a6,$a6,$a6 mov $ff,#0 adcs $a7,$a7,$a7 #ifdef __thumb2__ it cs #endif movcs $ff,#-1 @ $ff = carry ? -1 : 0 subs $a0,$a0,$ff @ subtract synthesized modulus Loading Loading @@ -1369,6 +1395,9 @@ ecp_nistz256_point_add: stmia r3!,{r4-r11} ldmia $b_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 stmia r3,{r4-r11} str r12,[sp,#32*18+8] @ !in2infty Loading @@ -1395,6 +1424,9 @@ ecp_nistz256_point_add: stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 stmia r3,{r4-r11} str r12,[sp,#32*18+4] @ !in1infty Loading Loading @@ -1636,6 +1668,9 @@ ecp_nistz256_point_add_affine: stmia r3!,{r4-r11} ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 stmia r3,{r4-r11} str r12,[sp,#32*15+4] @ !in1infty Loading @@ -1661,6 +1696,9 @@ ecp_nistz256_point_add_affine: orr r12,r12,r11 stmia r3!,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne #endif movne r12,#-1 str r12,[sp,#32*15+8] @ !in2infty Loading