Commit 56274078 authored by Omid Najafi's avatar Omid Najafi Committed by Bernd Edlinger
Browse files

Fix syntax error for the armv4 assembler

The error was from the alignment syntax of the code.
More details:
https://stackoverflow.com/questions/57316823/arm-assembly-syntax-in-vst-vld-commands?noredirect=1#comment101133590_57316823



CLA: trivial

Fixes: #9518

Reviewed-by: default avatarPaul Dale <paul.dale@oracle.com>
Reviewed-by: default avatarBernd Edlinger <bernd.edlinger@hotmail.de>
(Merged from https://github.com/openssl/openssl/pull/9518)

(cherry picked from commit 2a17758940657cc3a97b032104a92f0aa304f863)
parent 7b72912c
Loading
Loading
Loading
Loading
+111 −111
Original line number Original line Diff line number Diff line
@@ -1104,9 +1104,9 @@ KeccakF1600_neon:
.align	4
.align	4
.Loop_neon:
.Loop_neon:
	@ Theta
	@ Theta
	vst1.64		{q4},  [r0:64]		@ offload A[0..1][4]
	vst1.64		{q4},  [r0,:64]		@ offload A[0..1][4]
	veor		q13, q0,  q5		@ A[0..1][0]^A[2..3][0]
	veor		q13, q0,  q5		@ A[0..1][0]^A[2..3][0]
	vst1.64		{d18}, [r1:64]		@ offload A[2][4]
	vst1.64		{d18}, [r1,:64]		@ offload A[2][4]
	veor		q14, q1,  q6		@ A[0..1][1]^A[2..3][1]
	veor		q14, q1,  q6		@ A[0..1][1]^A[2..3][1]
	veor		q15, q2,  q7		@ A[0..1][2]^A[2..3][2]
	veor		q15, q2,  q7		@ A[0..1][2]^A[2..3][2]
	veor		d26, d26, d27		@ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
	veor		d26, d26, d27		@ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
@@ -1149,10 +1149,10 @@ KeccakF1600_neon:
	veor		d16, d16, d28		@ A[2][3] ^= C[2]
	veor		d16, d16, d28		@ A[2][3] ^= C[2]
	veor		d17, d17, d28		@ A[3][3] ^= C[2]
	veor		d17, d17, d28		@ A[3][3] ^= C[2]
	veor		d23, d23, d28		@ A[4][3] ^= C[2]
	veor		d23, d23, d28		@ A[4][3] ^= C[2]
	vld1.64		{q4},  [r0:64]		@ restore A[0..1][4]
	vld1.64		{q4},  [r0,:64]		@ restore A[0..1][4]
	vmov		d28, d29
	vmov		d28, d29


	vld1.64		{d18}, [r1:64]		@ restore A[2][4]
	vld1.64		{d18}, [r1,:64]		@ restore A[2][4]
	veor		q2,  q2,  q13		@ A[0..1][2] ^= D[2]
	veor		q2,  q2,  q13		@ A[0..1][2] ^= D[2]
	veor		q7,  q7,  q13		@ A[2..3][2] ^= D[2]
	veor		q7,  q7,  q13		@ A[2..3][2] ^= D[2]
	veor		d22, d22, d27		@ A[4][2]    ^= D[2]
	veor		d22, d22, d27		@ A[4][2]    ^= D[2]
@@ -1227,7 +1227,7 @@ KeccakF1600_neon:
	veor		q13, q13, q0		@ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
	veor		q13, q13, q0		@ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
	veor		q14, q14, q1		@ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
	veor		q14, q14, q1		@ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
	veor		q2,  q2,  q15		@ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
	veor		q2,  q2,  q15		@ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
	vst1.64		{q13}, [r0:64]		@ offload A[0..1][0]
	vst1.64		{q13}, [r0,:64]		@ offload A[0..1][0]
	vbic		q13, q0,  q4
	vbic		q13, q0,  q4
	vbic		q15, q1,  q0
	vbic		q15, q1,  q0
	vmov		q1,  q14		@ A[0..1][1]
	vmov		q1,  q14		@ A[0..1][1]
@@ -1248,10 +1248,10 @@ KeccakF1600_neon:
	vmov		q14, q10		@ A[4][0..1]
	vmov		q14, q10		@ A[4][0..1]
	veor		q9,  q9,  q13		@ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
	veor		q9,  q9,  q13		@ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])


	vld1.64		d25, [r2:64]!		@ Iota[i++]
	vld1.64		d25, [r2,:64]!		@ Iota[i++]
	vbic		d26, d22, d21
	vbic		d26, d22, d21
	vbic		d27, d23, d22
	vbic		d27, d23, d22
	vld1.64		{q0}, [r0:64]		@ restore A[0..1][0]
	vld1.64		{q0}, [r0,:64]		@ restore A[0..1][0]
	veor		d20, d20, d26		@ A[4][0] ^= (~A[4][1] & A[4][2])
	veor		d20, d20, d26		@ A[4][0] ^= (~A[4][1] & A[4][2])
	vbic		d26, d24, d23
	vbic		d26, d24, d23
	veor		d21, d21, d27		@ A[4][1] ^= (~A[4][2] & A[4][3])
	veor		d21, d21, d27		@ A[4][1] ^= (~A[4][2] & A[4][3])
@@ -1279,32 +1279,32 @@ SHA3_absorb_neon:
	mov	r5, r2			@ len
	mov	r5, r2			@ len
	mov	r6, r3			@ bsz
	mov	r6, r3			@ bsz


	vld1.32	{d0}, [r0:64]!		@ A[0][0]
	vld1.32	{d0}, [r0,:64]!		@ A[0][0]
	vld1.32	{d2}, [r0:64]!		@ A[0][1]
	vld1.32	{d2}, [r0,:64]!		@ A[0][1]
	vld1.32	{d4}, [r0:64]!		@ A[0][2]
	vld1.32	{d4}, [r0,:64]!		@ A[0][2]
	vld1.32	{d6}, [r0:64]!		@ A[0][3]
	vld1.32	{d6}, [r0,:64]!		@ A[0][3]
	vld1.32	{d8}, [r0:64]!		@ A[0][4]
	vld1.32	{d8}, [r0,:64]!		@ A[0][4]


	vld1.32	{d1}, [r0:64]!		@ A[1][0]
	vld1.32	{d1}, [r0,:64]!		@ A[1][0]
	vld1.32	{d3}, [r0:64]!		@ A[1][1]
	vld1.32	{d3}, [r0,:64]!		@ A[1][1]
	vld1.32	{d5}, [r0:64]!		@ A[1][2]
	vld1.32	{d5}, [r0,:64]!		@ A[1][2]
	vld1.32	{d7}, [r0:64]!		@ A[1][3]
	vld1.32	{d7}, [r0,:64]!		@ A[1][3]
	vld1.32	{d9}, [r0:64]!		@ A[1][4]
	vld1.32	{d9}, [r0,:64]!		@ A[1][4]


	vld1.32	{d10}, [r0:64]!		@ A[2][0]
	vld1.32	{d10}, [r0,:64]!		@ A[2][0]
	vld1.32	{d12}, [r0:64]!		@ A[2][1]
	vld1.32	{d12}, [r0,:64]!		@ A[2][1]
	vld1.32	{d14}, [r0:64]!		@ A[2][2]
	vld1.32	{d14}, [r0,:64]!		@ A[2][2]
	vld1.32	{d16}, [r0:64]!		@ A[2][3]
	vld1.32	{d16}, [r0,:64]!		@ A[2][3]
	vld1.32	{d18}, [r0:64]!		@ A[2][4]
	vld1.32	{d18}, [r0,:64]!		@ A[2][4]


	vld1.32	{d11}, [r0:64]!		@ A[3][0]
	vld1.32	{d11}, [r0,:64]!		@ A[3][0]
	vld1.32	{d13}, [r0:64]!		@ A[3][1]
	vld1.32	{d13}, [r0,:64]!		@ A[3][1]
	vld1.32	{d15}, [r0:64]!		@ A[3][2]
	vld1.32	{d15}, [r0,:64]!		@ A[3][2]
	vld1.32	{d17}, [r0:64]!		@ A[3][3]
	vld1.32	{d17}, [r0,:64]!		@ A[3][3]
	vld1.32	{d19}, [r0:64]!		@ A[3][4]
	vld1.32	{d19}, [r0,:64]!		@ A[3][4]


	vld1.32	{d20-d23}, [r0:64]!	@ A[4][0..3]
	vld1.32	{d20-d23}, [r0,:64]!	@ A[4][0..3]
	vld1.32	{d24}, [r0:64]		@ A[4][4]
	vld1.32	{d24}, [r0,:64]		@ A[4][4]
	sub	r0, r0, #24*8		@ rewind
	sub	r0, r0, #24*8		@ rewind
	b	.Loop_absorb_neon
	b	.Loop_absorb_neon


@@ -1411,32 +1411,32 @@ SHA3_absorb_neon:


.align	4
.align	4
.Labsorbed_neon:
.Labsorbed_neon:
	vst1.32	{d0}, [r0:64]!		@ A[0][0..4]
	vst1.32	{d0}, [r0,:64]!		@ A[0][0..4]
	vst1.32	{d2}, [r0:64]!
	vst1.32	{d2}, [r0,:64]!
	vst1.32	{d4}, [r0:64]!
	vst1.32	{d4}, [r0,:64]!
	vst1.32	{d6}, [r0:64]!
	vst1.32	{d6}, [r0,:64]!
	vst1.32	{d8}, [r0:64]!
	vst1.32	{d8}, [r0,:64]!


	vst1.32	{d1}, [r0:64]!		@ A[1][0..4]
	vst1.32	{d1}, [r0,:64]!		@ A[1][0..4]
	vst1.32	{d3}, [r0:64]!
	vst1.32	{d3}, [r0,:64]!
	vst1.32	{d5}, [r0:64]!
	vst1.32	{d5}, [r0,:64]!
	vst1.32	{d7}, [r0:64]!
	vst1.32	{d7}, [r0,:64]!
	vst1.32	{d9}, [r0:64]!
	vst1.32	{d9}, [r0,:64]!


	vst1.32	{d10}, [r0:64]!		@ A[2][0..4]
	vst1.32	{d10}, [r0,:64]!		@ A[2][0..4]
	vst1.32	{d12}, [r0:64]!
	vst1.32	{d12}, [r0,:64]!
	vst1.32	{d14}, [r0:64]!
	vst1.32	{d14}, [r0,:64]!
	vst1.32	{d16}, [r0:64]!
	vst1.32	{d16}, [r0,:64]!
	vst1.32	{d18}, [r0:64]!
	vst1.32	{d18}, [r0,:64]!


	vst1.32	{d11}, [r0:64]!		@ A[3][0..4]
	vst1.32	{d11}, [r0,:64]!		@ A[3][0..4]
	vst1.32	{d13}, [r0:64]!
	vst1.32	{d13}, [r0,:64]!
	vst1.32	{d15}, [r0:64]!
	vst1.32	{d15}, [r0,:64]!
	vst1.32	{d17}, [r0:64]!
	vst1.32	{d17}, [r0,:64]!
	vst1.32	{d19}, [r0:64]!
	vst1.32	{d19}, [r0,:64]!


	vst1.32	{d20-d23}, [r0:64]!	@ A[4][0..4]
	vst1.32	{d20-d23}, [r0,:64]!	@ A[4][0..4]
	vst1.32	{d24}, [r0:64]
	vst1.32	{d24}, [r0,:64]


	mov	r0, r5			@ return value
	mov	r0, r5			@ return value
	vldmia	sp!, {d8-d15}
	vldmia	sp!, {d8-d15}
@@ -1471,64 +1471,64 @@ SHA3_squeeze_neon:


	vstmdb	sp!,  {d8-d15}
	vstmdb	sp!,  {d8-d15}


	vld1.32	{d0}, [r0:64]!		@ A[0][0..4]
	vld1.32	{d0}, [r0,:64]!		@ A[0][0..4]
	vld1.32	{d2}, [r0:64]!
	vld1.32	{d2}, [r0,:64]!
	vld1.32	{d4}, [r0:64]!
	vld1.32	{d4}, [r0,:64]!
	vld1.32	{d6}, [r0:64]!
	vld1.32	{d6}, [r0,:64]!
	vld1.32	{d8}, [r0:64]!
	vld1.32	{d8}, [r0,:64]!


	vld1.32	{d1}, [r0:64]!		@ A[1][0..4]
	vld1.32	{d1}, [r0,:64]!		@ A[1][0..4]
	vld1.32	{d3}, [r0:64]!
	vld1.32	{d3}, [r0,:64]!
	vld1.32	{d5}, [r0:64]!
	vld1.32	{d5}, [r0,:64]!
	vld1.32	{d7}, [r0:64]!
	vld1.32	{d7}, [r0,:64]!
	vld1.32	{d9}, [r0:64]!
	vld1.32	{d9}, [r0,:64]!


	vld1.32	{d10}, [r0:64]!		@ A[2][0..4]
	vld1.32	{d10}, [r0,:64]!		@ A[2][0..4]
	vld1.32	{d12}, [r0:64]!
	vld1.32	{d12}, [r0,:64]!
	vld1.32	{d14}, [r0:64]!
	vld1.32	{d14}, [r0,:64]!
	vld1.32	{d16}, [r0:64]!
	vld1.32	{d16}, [r0,:64]!
	vld1.32	{d18}, [r0:64]!
	vld1.32	{d18}, [r0,:64]!


	vld1.32	{d11}, [r0:64]!		@ A[3][0..4]
	vld1.32	{d11}, [r0,:64]!		@ A[3][0..4]
	vld1.32	{d13}, [r0:64]!
	vld1.32	{d13}, [r0,:64]!
	vld1.32	{d15}, [r0:64]!
	vld1.32	{d15}, [r0,:64]!
	vld1.32	{d17}, [r0:64]!
	vld1.32	{d17}, [r0,:64]!
	vld1.32	{d19}, [r0:64]!
	vld1.32	{d19}, [r0,:64]!


	vld1.32	{d20-d23}, [r0:64]!	@ A[4][0..4]
	vld1.32	{d20-d23}, [r0,:64]!	@ A[4][0..4]
	vld1.32	{d24}, [r0:64]
	vld1.32	{d24}, [r0,:64]
	sub	r0, r0, #24*8		@ rewind
	sub	r0, r0, #24*8		@ rewind


	bl	KeccakF1600_neon
	bl	KeccakF1600_neon


	mov	r12, r0			@ A_flat
	mov	r12, r0			@ A_flat
	vst1.32	{d0}, [r0:64]!		@ A[0][0..4]
	vst1.32	{d0}, [r0,:64]!		@ A[0][0..4]
	vst1.32	{d2}, [r0:64]!
	vst1.32	{d2}, [r0,:64]!
	vst1.32	{d4}, [r0:64]!
	vst1.32	{d4}, [r0,:64]!
	vst1.32	{d6}, [r0:64]!
	vst1.32	{d6}, [r0,:64]!
	vst1.32	{d8}, [r0:64]!
	vst1.32	{d8}, [r0,:64]!


	vst1.32	{d1}, [r0:64]!		@ A[1][0..4]
	vst1.32	{d1}, [r0,:64]!		@ A[1][0..4]
	vst1.32	{d3}, [r0:64]!
	vst1.32	{d3}, [r0,:64]!
	vst1.32	{d5}, [r0:64]!
	vst1.32	{d5}, [r0,:64]!
	vst1.32	{d7}, [r0:64]!
	vst1.32	{d7}, [r0,:64]!
	vst1.32	{d9}, [r0:64]!
	vst1.32	{d9}, [r0,:64]!


	vst1.32	{d10}, [r0:64]!		@ A[2][0..4]
	vst1.32	{d10}, [r0,:64]!		@ A[2][0..4]
	vst1.32	{d12}, [r0:64]!
	vst1.32	{d12}, [r0,:64]!
	vst1.32	{d14}, [r0:64]!
	vst1.32	{d14}, [r0,:64]!
	vst1.32	{d16}, [r0:64]!
	vst1.32	{d16}, [r0,:64]!
	vst1.32	{d18}, [r0:64]!
	vst1.32	{d18}, [r0,:64]!


	vst1.32	{d11}, [r0:64]!		@ A[3][0..4]
	vst1.32	{d11}, [r0,:64]!		@ A[3][0..4]
	vst1.32	{d13}, [r0:64]!
	vst1.32	{d13}, [r0,:64]!
	vst1.32	{d15}, [r0:64]!
	vst1.32	{d15}, [r0,:64]!
	vst1.32	{d17}, [r0:64]!
	vst1.32	{d17}, [r0,:64]!
	vst1.32	{d19}, [r0:64]!
	vst1.32	{d19}, [r0,:64]!


	vst1.32	{d20-d23}, [r0:64]!	@ A[4][0..4]
	vst1.32	{d20-d23}, [r0,:64]!	@ A[4][0..4]
	mov	r14, r6			@ bsz
	mov	r14, r6			@ bsz
	vst1.32	{d24}, [r0:64]
	vst1.32	{d24}, [r0,:64]
	mov	r0,  r12		@ rewind
	mov	r0,  r12		@ rewind


	vldmia	sp!, {d8-d15}
	vldmia	sp!, {d8-d15}