Commit 45921723 authored by Markus Stockhausen's avatar Markus Stockhausen Committed by Richard Levitte
Browse files

MIPS32R3 provides the EXT instruction to extract bits from


registers. As the AES table is already 1K aligned we can
use it everywhere and speedup table address calculation by
10%. Performance numbers:

decryption         16B       64B      256B     1024B     8192B
-------------------------------------------------------------------
aes-256-cbc   5636.84k  6443.26k  6689.02k  6752.94k  6766.59k bef.
aes-256-cbc   6200.31k  7195.71k  7504.30k  7585.11k  7599.45k aft.
-------------------------------------------------------------------
aes-128-cbc   7313.85k  8653.67k  9079.55k  9188.35k  9205.08k bef.
aes-128-cbc   7925.38k  9557.99k 10092.37k 10232.15k 10272.77k aft.

encryption         16B       64B      256B     1024B     8192B
-------------------------------------------------------------------
aes-256 cbc   6009.65k  6592.70k  6766.59k  6806.87k  6815.74k bef.
aes-256 cbc   6643.93k  7388.69k  7605.33k  7657.81k  7675.90k aft.
-------------------------------------------------------------------
aes-128 cbc   7862.09k  8892.48k  9214.04k  9291.78k  9311.57k bef.
aes-128 cbc   8639.29k  9881.17k 10265.86k 10363.56k 10392.92k aft.

Reviewed-by: default avatarPaul Dale <paul.dale@oracle.com>
Reviewed-by: default avatarRichard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/8206)
parent 54d00677
Loading
Loading
Loading
Loading
+85 −49
Original line number Diff line number Diff line
#! /usr/bin/env perl
# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
# Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the Apache License 2.0 (the "License").  You may not use
# this file except in compliance with the License.  You can obtain a copy
@@ -34,6 +34,11 @@
# instead, code path is chosen upon pre-process time, pass -mips32r2
# or/and -msmartmips.

# February 2019
#
# Normalize MIPS32R2 AES table address calculation by always using EXT
# instruction. This reduces the standard codebase by another 10%. 

######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@@ -223,6 +228,33 @@ _mips_AES_encrypt:
	ext	$i0,$s1,16,8

	_xtr	$i0,$s1,16-2
#else
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	move	$i0,$Tbl
	move	$i1,$Tbl
	move	$i2,$Tbl
	move	$i3,$Tbl
	ext	$t0,$s1,16,8
.Loop_enc:
	ext	$t1,$s2,16,8
	ext	$t2,$s3,16,8
	ext	$t3,$s0,16,8
	$PTR_INS $i0,$t0,2,8
	$PTR_INS $i1,$t1,2,8
	$PTR_INS $i2,$t2,2,8
	$PTR_INS $i3,$t3,2,8
	lw	$t0,0($i0)		# Te1[s1>>16]
	ext	$t4,$s2,8,8
	lw	$t1,0($i1)		# Te1[s2>>16]
	ext	$t5,$s3,8,8
	lw	$t2,0($i2)		# Te1[s3>>16]
	ext	$t6,$s0,8,8
	lw	$t3,0($i3)		# Te1[s0>>16]
	ext	$t7,$s1,8,8
	$PTR_INS $i0,$t4,2,8
	$PTR_INS $i1,$t5,2,8
	$PTR_INS $i2,$t6,2,8
	$PTR_INS $i3,$t7,2,8
#else
	_xtr	$i0,$s1,16-2
.Loop_enc:
@@ -237,16 +269,6 @@ _mips_AES_encrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$t0,0($i0)		# Te1[s1>>16]
	_xtr	$i0,$s2,8-2
	lw	$t1,0($i1)		# Te1[s2>>16]
	_xtr	$i1,$s3,8-2
	lw	$t2,0($i2)		# Te1[s3>>16]
	_xtr	$i2,$s0,8-2
	lw	$t3,0($i3)		# Te1[s0>>16]
	_xtr	$i3,$s1,8-2
#else
	lwl	$t0,3($i0)		# Te1[s1>>16]
	lwl	$t1,3($i1)		# Te1[s2>>16]
	lwl	$t2,3($i2)		# Te1[s3>>16]
@@ -259,7 +281,6 @@ _mips_AES_encrypt:
	_xtr	$i2,$s0,8-2
	lwr	$t3,2($i3)		# Te1[s0>>16]
	_xtr	$i3,$s1,8-2
#endif
	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
@@ -268,6 +289,7 @@ _mips_AES_encrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#endif
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$t0,$t0,8
	rotr	$t1,$t1,8
@@ -275,22 +297,18 @@ _mips_AES_encrypt:
	rotr	$t3,$t3,8
# if defined(_MIPSEL)
	lw	$t4,0($i0)		# Te2[s2>>8]
	_xtr	$i0,$s3,0-2
	ext	$t8,$s3,0,8
	lw	$t5,0($i1)		# Te2[s3>>8]
	_xtr	$i1,$s0,0-2
	ext	$t9,$s0,0,8
	lw	$t6,0($i2)		# Te2[s0>>8]
	_xtr	$i2,$s1,0-2
	ext	$t10,$s1,0,8
	lw	$t7,0($i3)		# Te2[s1>>8]
	_xtr	$i3,$s2,0-2
	ext	$t11,$s2,0,8
	$PTR_INS $i0,$t8,2,8
	$PTR_INS $i1,$t9,2,8
	$PTR_INS $i2,$t10,2,8
	$PTR_INS $i3,$t11,2,8

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	lw	$t8,0($i0)		# Te3[s3]
	$PTR_INS $i0,$s0,2,8
	lw	$t9,0($i1)		# Te3[s0]
@@ -411,6 +429,9 @@ _mips_AES_encrypt:
	xor	$s3,$t3
	.set	noreorder
	bnez	$cnt,.Loop_enc
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	ext	$t0,$s1,16,8
#endif
	_xtr	$i0,$s1,16-2
#endif

@@ -811,6 +832,33 @@ _mips_AES_decrypt:
	ext	$i0,$s3,16,8

	_xtr	$i0,$s3,16-2
#else
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	move	$i0,$Tbl
	move	$i1,$Tbl
	move	$i2,$Tbl
	move	$i3,$Tbl
	ext	$t0,$s3,16,8
.Loop_dec:
	ext	$t1,$s0,16,8
	ext	$t2,$s1,16,8
	ext	$t3,$s2,16,8
	$PTR_INS $i0,$t0,2,8
	$PTR_INS $i1,$t1,2,8
	$PTR_INS $i2,$t2,2,8
	$PTR_INS $i3,$t3,2,8
	lw	$t0,0($i0)		# Td1[s3>>16]
	ext	$t4,$s2,8,8
	lw	$t1,0($i1)		# Td1[s0>>16]
	ext	$t5,$s3,8,8
	lw	$t2,0($i2)		# Td1[s1>>16]
	ext	$t6,$s0,8,8
	lw	$t3,0($i3)		# Td1[s2>>16]
	ext	$t7,$s1,8,8
	$PTR_INS $i0,$t4,2,8
	$PTR_INS $i1,$t5,2,8
	$PTR_INS $i2,$t6,2,8
	$PTR_INS $i3,$t7,2,8
#else
	_xtr	$i0,$s3,16-2
.Loop_dec:
@@ -825,16 +873,6 @@ _mips_AES_decrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	lw	$t0,0($i0)		# Td1[s3>>16]
	_xtr	$i0,$s2,8-2
	lw	$t1,0($i1)		# Td1[s0>>16]
	_xtr	$i1,$s3,8-2
	lw	$t2,0($i2)		# Td1[s1>>16]
	_xtr	$i2,$s0,8-2
	lw	$t3,0($i3)		# Td1[s2>>16]
	_xtr	$i3,$s1,8-2
#else
	lwl	$t0,3($i0)		# Td1[s3>>16]
	lwl	$t1,3($i1)		# Td1[s0>>16]
	lwl	$t2,3($i2)		# Td1[s1>>16]
@@ -847,8 +885,6 @@ _mips_AES_decrypt:
	_xtr	$i2,$s0,8-2
	lwr	$t3,2($i3)		# Td1[s2>>16]
	_xtr	$i3,$s1,8-2
#endif

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
@@ -857,6 +893,7 @@ _mips_AES_decrypt:
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
#endif
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	rotr	$t0,$t0,8
	rotr	$t1,$t1,8
@@ -864,22 +901,17 @@ _mips_AES_decrypt:
	rotr	$t3,$t3,8
# if defined(_MIPSEL)
	lw	$t4,0($i0)		# Td2[s2>>8]
	_xtr	$i0,$s1,0-2
	ext	$t8,$s1,0,8
	lw	$t5,0($i1)		# Td2[s3>>8]
	_xtr	$i1,$s2,0-2
	ext	$t9,$s2,0,8
	lw	$t6,0($i2)		# Td2[s0>>8]
	_xtr	$i2,$s3,0-2
	ext	$t10,$s3,0,8
	lw	$t7,0($i3)		# Td2[s1>>8]
	_xtr	$i3,$s0,0-2

	and	$i0,0x3fc
	and	$i1,0x3fc
	and	$i2,0x3fc
	and	$i3,0x3fc
	$PTR_ADD $i0,$Tbl
	$PTR_ADD $i1,$Tbl
	$PTR_ADD $i2,$Tbl
	$PTR_ADD $i3,$Tbl
	ext	$t11,$s0,0,8
	$PTR_INS $i0,$t8,2,8
	$PTR_INS $i1,$t9,2,8
	$PTR_INS $i2,$t10,2,8
	$PTR_INS $i3,$t11,2,8
	lw	$t8,0($i0)		# Td3[s1]
	$PTR_INS $i0,$s0,2,8
	lw	$t9,0($i1)		# Td3[s2]
@@ -1001,6 +1033,10 @@ _mips_AES_decrypt:
	xor	$s3,$t3
	.set	noreorder
	bnez	$cnt,.Loop_dec
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
	ext	$t0,$s3,16,8
#endif

	_xtr	$i0,$s3,16-2
#endif