Commit a5fd24d1 authored by Andy Polyakov's avatar Andy Polyakov
Browse files

aesni-sha256-x86_64.pl: fix crash on AMD Jaguar.



It was also found that stich performs suboptimally on AMD Jaguar, hence
execution is limited to XOP-capable and Intel processors.

Reviewed-by: default avatarKurt Roeckx <kurt@openssl.org>
parent 39e46af6
Loading
Loading
Loading
Loading
+2 −5
Original line number Diff line number Diff line
@@ -140,11 +140,8 @@ $code.=<<___ if ($avx>1);
	je	${func}_avx2
___
$code.=<<___;
	and	\$`1<<30`,%eax			# mask "Intel CPU" bit
	and	\$`1<<28|1<<9`,%r10d		# mask AVX+SSSE3 bits
	or	%eax,%r10d
	cmp	\$`1<<28|1<<9|1<<30`,%r10d
	je	${func}_avx
	and	\$`1<<28`,%r10d			# check for AVX
	jnz	${func}_avx
	ud2
___
						}
+11 −0
Original line number Diff line number Diff line
@@ -498,7 +498,18 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx,
            iv = AES_BLOCK_SIZE;

#  if defined(STITCHED_CALL)
        /*
         * Assembly stitch handles AVX-capable processors, but its
         * performance is not optimal on AMD Jaguar, ~40% worse, for
         * unknown reasons. Incidentally processor in question supports
         * AVX, but not AMD-specific XOP extension, which can be used
         * to identify it and avoid stitch invocation. So that after we
         * establish that current CPU supports AVX, we even see if it's
         * either even XOP-capable Bulldozer-based or GenuineIntel one.
         */
        if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */
            ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */
             | (OPENSSL_ia32cap_P[0] & (1<<30))) &&    /* "Intel CPU"? */
            plen > (sha_off + iv) &&
            (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) {
            SHA256_Update(&key->md, in + iv, sha_off);