Loading crypto/aes/asm/aes-s390x.pl +1 −1 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ # for CBC is not utilized, nor multiple blocks are ever processed. # Then software key schedule can be postponed till hardware support # detection... Performance improvement over assembler is reportedly # ~2.5x, but can reach >15x [naturally on larger chunks] if proper # ~2.5x, but can reach >8x [naturally on larger chunks] if proper # support is implemented. $t1="%r0"; Loading crypto/sha/asm/sha1-s390x.pl +2 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ # # Performance is >30% better than gcc 3.3 generated code. But the real # twist is that SHA1 hardware support is detected and utilized. In # which case performance can reach further >8x for larger chunks. # which case performance can reach further >4.5x for larger chunks. $kimdfunc=1; # magic function code for kimd instruction Loading Loading @@ -160,6 +160,7 @@ $code.=<<___ if ($kimdfunc); lgr %r2,$inp sllg %r3,$len,6 .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 .Lsoftware: ___ Loading crypto/sha/asm/sha512-s390x.pl +2 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,7 @@ # "pathologically" high, in particular in comparison to other SHA # modules). But the real twist is that it detects if hardware support # for SHA256 is available and in such case utilizes it. Then the # performance can reach >12x of assembler one for larger chunks. # performance can reach >6.5x of assembler one for larger chunks. # # sha512_block_data_order is ~70% faster than gcc 3.3 generated code. Loading Loading @@ -219,6 +219,7 @@ $code.=<<___ if ($kimdfunc); lgr %r2,$inp sllg %r3,$len,`log(16*$SZ)/log(2)` .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 .Lsoftware: ___ Loading Loading
crypto/aes/asm/aes-s390x.pl +1 −1 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ # for CBC is not utilized, nor multiple blocks are ever processed. # Then software key schedule can be postponed till hardware support # detection... Performance improvement over assembler is reportedly # ~2.5x, but can reach >15x [naturally on larger chunks] if proper # ~2.5x, but can reach >8x [naturally on larger chunks] if proper # support is implemented. $t1="%r0"; Loading
crypto/sha/asm/sha1-s390x.pl +2 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ # # Performance is >30% better than gcc 3.3 generated code. But the real # twist is that SHA1 hardware support is detected and utilized. In # which case performance can reach further >8x for larger chunks. # which case performance can reach further >4.5x for larger chunks. $kimdfunc=1; # magic function code for kimd instruction Loading Loading @@ -160,6 +160,7 @@ $code.=<<___ if ($kimdfunc); lgr %r2,$inp sllg %r3,$len,6 .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 .Lsoftware: ___ Loading
crypto/sha/asm/sha512-s390x.pl +2 −1 Original line number Diff line number Diff line Loading @@ -16,7 +16,7 @@ # "pathologically" high, in particular in comparison to other SHA # modules). But the real twist is that it detects if hardware support # for SHA256 is available and in such case utilizes it. Then the # performance can reach >12x of assembler one for larger chunks. # performance can reach >6.5x of assembler one for larger chunks. # # sha512_block_data_order is ~70% faster than gcc 3.3 generated code. Loading Loading @@ -219,6 +219,7 @@ $code.=<<___ if ($kimdfunc); lgr %r2,$inp sllg %r3,$len,`log(16*$SZ)/log(2)` .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 .Lsoftware: ___ Loading