Loading crypto/aes/asm/aesni-x86.pl +124 −123 Original line number Original line Diff line number Diff line #!/usr/bin/env perl #!/usr/bin/env perl # ==================================================================== # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # details see http://www.openssl.org/~appro/cryptogams/. Loading Loading @@ -208,25 +208,26 @@ sub aesni_generate3 &function_begin_B("_aesni_${p}rypt3"); &function_begin_B("_aesni_${p}rypt3"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(0,$key)); &shr ($rounds,1); &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key)); &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); &add ($rounds,16); &set_label("${p}3_loop"); &set_label("${p}3_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}3_loop")); &jnz (&label("${p}3_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; Loading @@ -248,27 +249,29 @@ sub aesni_generate4 &function_begin_B("_aesni_${p}rypt4"); &function_begin_B("_aesni_${p}rypt4"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &shr ($rounds,1); &shl ($rounds,4); &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout3,$rndkey0); &pxor ($inout3,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key)); &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); &data_byte (0x0f,0x1f,0x40,0x00); &add ($rounds,16); &set_label("${p}4_loop"); &set_label("${p}4_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}4_loop")); &jnz (&label("${p}4_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; Loading @@ -289,43 +292,43 @@ sub aesni_generate6 &function_begin_B("_aesni_${p}rypt6"); &function_begin_B("_aesni_${p}rypt6"); &static_label("_aesni_${p}rypt6_enter"); &static_label("_aesni_${p}rypt6_enter"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(0,$key)); &shr ($rounds,1); &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); # pxor does better here &pxor ($inout1,$rndkey0); # pxor does better here eval"&aes${p} ($inout0,$rndkey1)"; &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; &pxor ($inout3,$rndkey0); &pxor ($inout3,$rndkey0); &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout4,$rndkey0); &pxor ($inout4,$rndkey0); eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &add ($rounds,16); eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; &$movekey ($rndkey0,&QWP(0,$key)); eval"&aes${p} ($inout5,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jmp (&label("_aesni_${p}rypt6_enter")); &jmp (&label("_aesni_${p}rypt6_enter")); &set_label("${p}6_loop",16); &set_label("${p}6_loop",16); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; &set_label("_aesni_${p}rypt6_enter",16); &set_label("_aesni_${p}rypt6_enter"); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout4,$rndkey0)"; eval"&aes${p} ($inout4,$rndkey0)"; eval"&aes${p} ($inout5,$rndkey0)"; eval"&aes${p} ($inout5,$rndkey0)"; &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}6_loop")); &jnz (&label("${p}6_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; Loading Loading @@ -610,11 +613,13 @@ if ($PREFIX eq "aesni") { &mov (&DWP(24,"esp"),$key_); &mov (&DWP(24,"esp"),$key_); &mov (&DWP(28,"esp"),$key_); &mov (&DWP(28,"esp"),$key_); &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,16); &lea ($key_,&DWP(0,$key)); &lea ($key_,&DWP(0,$key)); &movdqa ($inout3,&QWP(0,"esp")); &movdqa ($inout3,&QWP(0,"esp")); &movdqa ($inout0,$ivec); &movdqa ($inout0,$ivec); &mov ($rounds_,$rounds); &lea ($key,&DWP(32,$key,$rounds)); &sub ($rounds_,$rounds); &pshufb ($ivec,$inout3); &pshufb ($ivec,$inout3); &set_label("ccm64_enc_outer"); &set_label("ccm64_enc_outer"); Loading @@ -625,33 +630,31 @@ if ($PREFIX eq "aesni") { &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &xorps ($rndkey0,$in0); &xorps ($rndkey0,$in0); &lea ($key,&DWP(32,$key_)); &xorps ($cmac,$rndkey0); # cmac^=inp &xorps ($cmac,$rndkey0); # cmac^=inp &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key_)); &set_label("ccm64_enc2_loop"); &set_label("ccm64_enc2_loop"); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &dec ($rounds); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); &aesenc ($inout0,$rndkey0); &aesenc ($inout0,$rndkey0); &lea ($key,&DWP(32,$key)); &aesenc ($cmac,$rndkey0); &aesenc ($cmac,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("ccm64_enc2_loop")); &jnz (&label("ccm64_enc2_loop")); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &paddq ($ivec,&QWP(16,"esp")); &paddq ($ivec,&QWP(16,"esp")); &dec ($len); &aesenclast ($inout0,$rndkey0); &aesenclast ($inout0,$rndkey0); &aesenclast ($cmac,$rndkey0); &aesenclast ($cmac,$rndkey0); &dec ($len); &lea ($inp,&DWP(16,$inp)); &lea ($inp,&DWP(16,$inp)); &xorps ($in0,$inout0); # inp^=E(ivec) &xorps ($in0,$inout0); # inp^=E(ivec) &movdqa ($inout0,$ivec); &movdqa ($inout0,$ivec); &movups (&QWP(0,$out),$in0); # save output &movups (&QWP(0,$out),$in0); # save output &lea ($out,&DWP(16,$out)); &pshufb ($inout0,$inout3); &pshufb ($inout0,$inout3); &lea ($out,&DWP(16,$out)); &jnz (&label("ccm64_enc_outer")); &jnz (&label("ccm64_enc_outer")); &mov ("esp",&DWP(48,"esp")); &mov ("esp",&DWP(48,"esp")); Loading Loading @@ -700,15 +703,19 @@ if ($PREFIX eq "aesni") { { &aesni_inline_generate1("enc"); } { &aesni_inline_generate1("enc"); } else else { &call ("_aesni_encrypt1"); } { &call ("_aesni_encrypt1"); } &shl ($rounds_,4); &mov ($rounds,16); &movups ($in0,&QWP(0,$inp)); # load inp &movups ($in0,&QWP(0,$inp)); # load inp &paddq ($ivec,&QWP(16,"esp")); &paddq ($ivec,&QWP(16,"esp")); &lea ($inp,&QWP(16,$inp)); &lea ($inp,&QWP(16,$inp)); &sub ($rounds,$rounds_); &lea ($key,&DWP(32,$key_,$rounds_)); &mov ($rounds_,$rounds); &jmp (&label("ccm64_dec_outer")); &jmp (&label("ccm64_dec_outer")); &set_label("ccm64_dec_outer",16); &set_label("ccm64_dec_outer",16); &xorps ($in0,$inout0); # inp ^= E(ivec) &xorps ($in0,$inout0); # inp ^= E(ivec) &movdqa ($inout0,$ivec); &movdqa ($inout0,$ivec); &mov ($rounds,$rounds_); &movups (&QWP(0,$out),$in0); # save output &movups (&QWP(0,$out),$in0); # save output &lea ($out,&DWP(16,$out)); &lea ($out,&DWP(16,$out)); &pshufb ($inout0,$inout3); &pshufb ($inout0,$inout3); Loading @@ -717,34 +724,33 @@ if ($PREFIX eq "aesni") { &jz (&label("ccm64_dec_break")); &jz (&label("ccm64_dec_break")); &$movekey ($rndkey0,&QWP(0,$key_)); &$movekey ($rndkey0,&QWP(0,$key_)); &shr ($rounds,1); &mov ($rounds,$rounds_); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &xorps ($in0,$rndkey0); &xorps ($in0,$rndkey0); &lea ($key,&DWP(32,$key_)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &xorps ($cmac,$in0); # cmac^=out &xorps ($cmac,$in0); # cmac^=out &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key_)); &set_label("ccm64_dec2_loop"); &set_label("ccm64_dec2_loop"); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &dec ($rounds); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); &aesenc ($inout0,$rndkey0); &aesenc ($inout0,$rndkey0); &lea ($key,&DWP(32,$key)); &aesenc ($cmac,$rndkey0); &aesenc ($cmac,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("ccm64_dec2_loop")); &jnz (&label("ccm64_dec2_loop")); &movups ($in0,&QWP(0,$inp)); # load inp &movups ($in0,&QWP(0,$inp)); # load inp &paddq ($ivec,&QWP(16,"esp")); &paddq ($ivec,&QWP(16,"esp")); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &lea ($inp,&QWP(16,$inp)); &aesenclast ($inout0,$rndkey0); &aesenclast ($inout0,$rndkey0); &aesenclast ($cmac,$rndkey0); &aesenclast ($cmac,$rndkey0); &lea ($inp,&QWP(16,$inp)); &jmp (&label("ccm64_dec_outer")); &jmp (&label("ccm64_dec_outer")); &set_label("ccm64_dec_break",16); &set_label("ccm64_dec_break",16); &mov ($rounds,&DWP(240,$key_)); &mov ($key,$key_); &mov ($key,$key_); if ($inline) if ($inline) { &aesni_inline_generate1("enc",$cmac,$in0); } { &aesni_inline_generate1("enc",$cmac,$in0); } Loading @@ -763,7 +769,7 @@ if ($PREFIX eq "aesni") { # const char *ivec); # const char *ivec); # # # Handles only complete blocks, operates on 32-bit counter and # Handles only complete blocks, operates on 32-bit counter and # does not update *ivec! (see engine/eng_aesni.c for details) # does not update *ivec! (see crypto/modes/ctr128.c for details) # # # stack layout: # stack layout: # 0 pshufb mask # 0 pshufb mask Loading Loading @@ -810,66 +816,61 @@ if ($PREFIX eq "aesni") { # compose 2 vectors of 3x32-bit counters # compose 2 vectors of 3x32-bit counters &bswap ($rounds_); &bswap ($rounds_); &pxor ($rndkey1,$rndkey1); &pxor ($rndkey0,$rndkey0); &pxor ($rndkey0,$rndkey0); &pxor ($rndkey1,$rndkey1); &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask &pinsrd ($rndkey1,$rounds_,0); &pinsrd ($rndkey0,$rounds_,0); &lea ($key_,&DWP(3,$rounds_)); &lea ($key_,&DWP(3,$rounds_)); &pinsrd ($rndkey0,$key_,0); &pinsrd ($rndkey1,$key_,0); &inc ($rounds_); &inc ($rounds_); &pinsrd ($rndkey1,$rounds_,1); &pinsrd ($rndkey0,$rounds_,1); &inc ($key_); &inc ($key_); &pinsrd ($rndkey0,$key_,1); &pinsrd ($rndkey1,$key_,1); &inc ($rounds_); &inc ($rounds_); &pinsrd ($rndkey1,$rounds_,2); &pinsrd ($rndkey0,$rounds_,2); &inc ($key_); &inc ($key_); &pinsrd ($rndkey0,$key_,2); &pinsrd ($rndkey1,$key_,2); &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet &pshufb ($rndkey1,$inout0); # byte swap &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet &pshufb ($rndkey0,$inout0); # byte swap &pshufb ($rndkey0,$inout0); # byte swap &movdqu ($inout4,&QWP(0,$key)); # key[0] &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet &pshufb ($rndkey1,$inout0); # byte swap &pshufd ($inout0,$rndkey1,3<<6); # place counter to upper dword &pshufd ($inout0,$rndkey0,3<<6); # place counter to upper dword &pshufd ($inout1,$rndkey1,2<<6); &pshufd ($inout1,$rndkey0,2<<6); &cmp ($len,6); &cmp ($len,6); &jb (&label("ctr32_tail")); &jb (&label("ctr32_tail")); &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec &pxor ($inout5,$inout4); # counter-less ivec^key[0] &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,16); &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec^key[0] &mov ($key_,$key); # backup $key &mov ($key_,$key); # backup $key &mov ($rounds_,$rounds); # backup $rounds &sub ($rounds_,$rounds); # backup twisted $rounds &lea ($key,&DWP(32,$key,$rounds)); &sub ($len,6); &sub ($len,6); &jmp (&label("ctr32_loop6")); &jmp (&label("ctr32_loop6")); &set_label("ctr32_loop6",16); &set_label("ctr32_loop6",16); &pshufd ($inout2,$rndkey1,1<<6); # inlining _aesni_encrypt6's prologue gives ~6% improvement... &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec &pshufd ($inout2,$rndkey0,1<<6); &pshufd ($inout3,$rndkey0,3<<6); &movdqa ($rndkey0,&QWP(32,"esp")); # pull counter-less ivec &por ($inout0,$rndkey1); # merge counter-less ivec &pshufd ($inout3,$rndkey1,3<<6); &pshufd ($inout4,$rndkey0,2<<6); &pxor ($inout0,$rndkey0); # merge counter-less ivec &por ($inout1,$rndkey1); &pshufd ($inout4,$rndkey1,2<<6); &pshufd ($inout5,$rndkey0,1<<6); &por ($inout2,$rndkey1); &por ($inout3,$rndkey1); &por ($inout4,$rndkey1); &por ($inout5,$rndkey1); # inlining _aesni_encrypt6's prologue gives ~4% improvement... &$movekey ($rndkey0,&QWP(0,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &lea ($key,&DWP(32,$key_)); &dec ($rounds); &pxor ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout1,$rndkey0); &aesenc ($inout0,$rndkey1); &pshufd ($inout5,$rndkey1,1<<6); &$movekey ($rndkey1,&QWP(16,$key_)); &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); &aesenc ($inout1,$rndkey1); &pxor ($inout3,$rndkey0); &pxor ($inout3,$rndkey0); &aesenc ($inout2,$rndkey1); &aesenc ($inout0,$rndkey1); &pxor ($inout4,$rndkey0); &pxor ($inout4,$rndkey0); &aesenc ($inout3,$rndkey1); &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &aesenc ($inout1,$rndkey1); &$movekey ($rndkey0,&QWP(32,$key_)); &mov ($rounds,$rounds_); &aesenc ($inout2,$rndkey1); &aesenc ($inout3,$rndkey1); &aesenc ($inout4,$rndkey1); &aesenc ($inout4,$rndkey1); &$movekey ($rndkey0,&QWP(0,$key)); &aesenc ($inout5,$rndkey1); &aesenc ($inout5,$rndkey1); &call (&label("_aesni_encrypt6_enter")); &call (&label("_aesni_encrypt6_enter")); Loading @@ -882,12 +883,12 @@ if ($PREFIX eq "aesni") { &movups (&QWP(0,$out),$inout0); &movups (&QWP(0,$out),$inout0); &movdqa ($rndkey0,&QWP(16,"esp")); # load increment &movdqa ($rndkey0,&QWP(16,"esp")); # load increment &xorps ($inout2,$rndkey1); &xorps ($inout2,$rndkey1); &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet &movdqa ($rndkey1,&QWP(64,"esp")); # load 2nd triplet &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x20,$out),$inout2); &movups (&QWP(0x20,$out),$inout2); &paddd ($rndkey1,$rndkey0); # 1st triplet increment &paddd ($rndkey1,$rndkey0); # 2nd triplet increment &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment &paddd ($rndkey0,&QWP(48,"esp")); # 1st triplet increment &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask &movups ($inout1,&QWP(0x30,$inp)); &movups ($inout1,&QWP(0x30,$inp)); Loading @@ -895,44 +896,44 @@ if ($PREFIX eq "aesni") { &xorps ($inout3,$inout1); &xorps ($inout3,$inout1); &movups ($inout1,&QWP(0x50,$inp)); &movups ($inout1,&QWP(0x50,$inp)); &lea ($inp,&DWP(0x60,$inp)); &lea ($inp,&DWP(0x60,$inp)); &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet &pshufb ($rndkey1,$inout0); # byte swap &pshufb ($rndkey0,$inout0); # byte swap &xorps ($inout4,$inout2); &xorps ($inout4,$inout2); &movups (&QWP(0x30,$out),$inout3); &movups (&QWP(0x30,$out),$inout3); &xorps ($inout5,$inout1); &xorps ($inout5,$inout1); &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet &pshufb ($rndkey0,$inout0); # byte swap &pshufb ($rndkey1,$inout0); # byte swap &movups (&QWP(0x40,$out),$inout4); &movups (&QWP(0x40,$out),$inout4); &pshufd ($inout0,$rndkey1,3<<6); &pshufd ($inout0,$rndkey0,3<<6); &movups (&QWP(0x50,$out),$inout5); &movups (&QWP(0x50,$out),$inout5); &lea ($out,&DWP(0x60,$out)); &lea ($out,&DWP(0x60,$out)); &mov ($rounds,$rounds_); &pshufd ($inout1,$rndkey0,2<<6); &pshufd ($inout1,$rndkey1,2<<6); &sub ($len,6); &sub ($len,6); &jnc (&label("ctr32_loop6")); &jnc (&label("ctr32_loop6")); &add ($len,6); &add ($len,6); &jz (&label("ctr32_ret")); &jz (&label("ctr32_ret")); &movdqu ($inout5,&QWP(0,$key_)); &mov ($key,$key_); &mov ($key,$key_); &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds &pxor ($inout5,&QWP(32,"esp")); # restore count-less ivec &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec &mov ($rounds,&DWP(240,$key_)); # restore $rounds &set_label("ctr32_tail"); &set_label("ctr32_tail"); &por ($inout0,$inout5); &por ($inout0,$inout5); &cmp ($len,2); &cmp ($len,2); &jb (&label("ctr32_one")); &jb (&label("ctr32_one")); &pshufd ($inout2,$rndkey1,1<<6); &pshufd ($inout2,$rndkey0,1<<6); &por ($inout1,$inout5); &por ($inout1,$inout5); &je (&label("ctr32_two")); &je (&label("ctr32_two")); &pshufd ($inout3,$rndkey0,3<<6); &pshufd ($inout3,$rndkey1,3<<6); &por ($inout2,$inout5); &por ($inout2,$inout5); &cmp ($len,4); &cmp ($len,4); &jb (&label("ctr32_three")); &jb (&label("ctr32_three")); &pshufd ($inout4,$rndkey0,2<<6); &pshufd ($inout4,$rndkey1,2<<6); &por ($inout3,$inout5); &por ($inout3,$inout5); &je (&label("ctr32_four")); &je (&label("ctr32_four")); Loading Loading @@ -1057,8 +1058,10 @@ if ($PREFIX eq "aesni") { &sub ($len,16*6); &sub ($len,16*6); &jc (&label("xts_enc_short")); &jc (&label("xts_enc_short")); &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,$rounds); &mov ($rounds_,16); &sub ($rounds_,$rounds); &lea ($key,&DWP(32,$key,$rounds)); &jmp (&label("xts_enc_loop6")); &jmp (&label("xts_enc_loop6")); &set_label("xts_enc_loop6",16); &set_label("xts_enc_loop6",16); Loading @@ -1080,6 +1083,7 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$tweak); &pxor ($inout5,$tweak); # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] &mov ($rounds,$rounds_); # restore $rounds &movdqu ($inout1,&QWP(16*1,$inp)); &movdqu ($inout1,&QWP(16*1,$inp)); &xorps ($inout0,$rndkey0); # input^=rndkey[0] &xorps ($inout0,$rndkey0); # input^=rndkey[0] &movdqu ($inout2,&QWP(16*2,$inp)); &movdqu ($inout2,&QWP(16*2,$inp)); Loading @@ -1096,19 +1100,17 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$rndkey1); &pxor ($inout5,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &lea ($key,&DWP(32,$key_)); &pxor ($inout1,&QWP(16*1,"esp")); &pxor ($inout1,&QWP(16*1,"esp")); &aesenc ($inout0,$rndkey1); &pxor ($inout2,&QWP(16*2,"esp")); &pxor ($inout2,&QWP(16*2,"esp")); &aesenc ($inout1,$rndkey1); &aesenc ($inout0,$rndkey1); &pxor ($inout3,&QWP(16*3,"esp")); &pxor ($inout3,&QWP(16*3,"esp")); &dec ($rounds); &aesenc ($inout2,$rndkey1); &pxor ($inout4,&QWP(16*4,"esp")); &pxor ($inout4,&QWP(16*4,"esp")); &aesenc ($inout3,$rndkey1); &aesenc ($inout1,$rndkey1); &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &$movekey ($rndkey0,&QWP(32,$key_)); &aesenc ($inout2,$rndkey1); &aesenc ($inout3,$rndkey1); &aesenc ($inout4,$rndkey1); &aesenc ($inout4,$rndkey1); &$movekey ($rndkey0,&QWP(0,$key)); &aesenc ($inout5,$rndkey1); &aesenc ($inout5,$rndkey1); &call (&label("_aesni_encrypt6_enter")); &call (&label("_aesni_encrypt6_enter")); Loading @@ -1135,13 +1137,12 @@ if ($PREFIX eq "aesni") { &paddq ($tweak,$tweak); # &psllq($tweak,1); &paddq ($tweak,$tweak); # &psllq($tweak,1); &pand ($twres,$twmask); # isolate carry and residue &pand ($twres,$twmask); # isolate carry and residue &pcmpgtd($twtmp,$tweak); # broadcast upper bits &pcmpgtd($twtmp,$tweak); # broadcast upper bits &mov ($rounds,$rounds_); # restore $rounds &pxor ($tweak,$twres); &pxor ($tweak,$twres); &sub ($len,16*6); &sub ($len,16*6); &jnc (&label("xts_enc_loop6")); &jnc (&label("xts_enc_loop6")); &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds &mov ($rounds,&DWP(240,$key_)); # restore $rounds &mov ($key,$key_); # restore $key &mov ($key,$key_); # restore $key &mov ($rounds_,$rounds); &mov ($rounds_,$rounds); Loading Loading @@ -1399,8 +1400,10 @@ if ($PREFIX eq "aesni") { &sub ($len,16*6); &sub ($len,16*6); &jc (&label("xts_dec_short")); &jc (&label("xts_dec_short")); &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,$rounds); &mov ($rounds_,16); &sub ($rounds_,$rounds); &lea ($key,&DWP(32,$key,$rounds)); &jmp (&label("xts_dec_loop6")); &jmp (&label("xts_dec_loop6")); &set_label("xts_dec_loop6",16); &set_label("xts_dec_loop6",16); Loading @@ -1422,6 +1425,7 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$tweak); &pxor ($inout5,$tweak); # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] &mov ($rounds,$rounds_); &movdqu ($inout1,&QWP(16*1,$inp)); &movdqu ($inout1,&QWP(16*1,$inp)); &xorps ($inout0,$rndkey0); # input^=rndkey[0] &xorps ($inout0,$rndkey0); # input^=rndkey[0] &movdqu ($inout2,&QWP(16*2,$inp)); &movdqu ($inout2,&QWP(16*2,$inp)); Loading @@ -1438,19 +1442,17 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$rndkey1); &pxor ($inout5,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &lea ($key,&DWP(32,$key_)); &pxor ($inout1,&QWP(16*1,"esp")); &pxor ($inout1,&QWP(16*1,"esp")); &aesdec ($inout0,$rndkey1); &pxor ($inout2,&QWP(16*2,"esp")); &pxor ($inout2,&QWP(16*2,"esp")); &aesdec ($inout1,$rndkey1); &aesdec ($inout0,$rndkey1); &pxor ($inout3,&QWP(16*3,"esp")); &pxor ($inout3,&QWP(16*3,"esp")); &dec ($rounds); &aesdec ($inout2,$rndkey1); &pxor ($inout4,&QWP(16*4,"esp")); &pxor ($inout4,&QWP(16*4,"esp")); &aesdec ($inout3,$rndkey1); &aesdec ($inout1,$rndkey1); &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &$movekey ($rndkey0,&QWP(32,$key_)); &aesdec ($inout2,$rndkey1); &aesdec ($inout3,$rndkey1); &aesdec ($inout4,$rndkey1); &aesdec ($inout4,$rndkey1); &$movekey ($rndkey0,&QWP(0,$key)); &aesdec ($inout5,$rndkey1); &aesdec ($inout5,$rndkey1); &call (&label("_aesni_decrypt6_enter")); &call (&label("_aesni_decrypt6_enter")); Loading @@ -1477,13 +1479,12 @@ if ($PREFIX eq "aesni") { &paddq ($tweak,$tweak); # &psllq($tweak,1); &paddq ($tweak,$tweak); # &psllq($tweak,1); &pand ($twres,$twmask); # isolate carry and residue &pand ($twres,$twmask); # isolate carry and residue &pcmpgtd($twtmp,$tweak); # broadcast upper bits &pcmpgtd($twtmp,$tweak); # broadcast upper bits &mov ($rounds,$rounds_); # restore $rounds &pxor ($tweak,$twres); &pxor ($tweak,$twres); &sub ($len,16*6); &sub ($len,16*6); &jnc (&label("xts_dec_loop6")); &jnc (&label("xts_dec_loop6")); &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds &mov ($rounds,&DWP(240,$key_)); # restore $rounds &mov ($key,$key_); # restore $key &mov ($key,$key_); # restore $key &mov ($rounds_,$rounds); &mov ($rounds_,$rounds); Loading crypto/aes/asm/aesni-x86_64.pl +215 −178 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
crypto/aes/asm/aesni-x86.pl +124 −123 Original line number Original line Diff line number Diff line #!/usr/bin/env perl #!/usr/bin/env perl # ==================================================================== # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # details see http://www.openssl.org/~appro/cryptogams/. Loading Loading @@ -208,25 +208,26 @@ sub aesni_generate3 &function_begin_B("_aesni_${p}rypt3"); &function_begin_B("_aesni_${p}rypt3"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(0,$key)); &shr ($rounds,1); &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key)); &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); &add ($rounds,16); &set_label("${p}3_loop"); &set_label("${p}3_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}3_loop")); &jnz (&label("${p}3_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; Loading @@ -248,27 +249,29 @@ sub aesni_generate4 &function_begin_B("_aesni_${p}rypt4"); &function_begin_B("_aesni_${p}rypt4"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &shr ($rounds,1); &shl ($rounds,4); &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout3,$rndkey0); &pxor ($inout3,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key)); &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); &data_byte (0x0f,0x1f,0x40,0x00); &add ($rounds,16); &set_label("${p}4_loop"); &set_label("${p}4_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}4_loop")); &jnz (&label("${p}4_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; Loading @@ -289,43 +292,43 @@ sub aesni_generate6 &function_begin_B("_aesni_${p}rypt6"); &function_begin_B("_aesni_${p}rypt6"); &static_label("_aesni_${p}rypt6_enter"); &static_label("_aesni_${p}rypt6_enter"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(0,$key)); &shr ($rounds,1); &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(16,$key)); &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); # pxor does better here &pxor ($inout1,$rndkey0); # pxor does better here eval"&aes${p} ($inout0,$rndkey1)"; &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; &pxor ($inout3,$rndkey0); &pxor ($inout3,$rndkey0); &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout4,$rndkey0); &pxor ($inout4,$rndkey0); eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &lea ($key,&DWP(32,$key,$rounds)); &neg ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &add ($rounds,16); eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; &$movekey ($rndkey0,&QWP(0,$key)); eval"&aes${p} ($inout5,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jmp (&label("_aesni_${p}rypt6_enter")); &jmp (&label("_aesni_${p}rypt6_enter")); &set_label("${p}6_loop",16); &set_label("${p}6_loop",16); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; &set_label("_aesni_${p}rypt6_enter",16); &set_label("_aesni_${p}rypt6_enter"); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout4,$rndkey0)"; eval"&aes${p} ($inout4,$rndkey0)"; eval"&aes${p} ($inout5,$rndkey0)"; eval"&aes${p} ($inout5,$rndkey0)"; &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}6_loop")); &jnz (&label("${p}6_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout0,$rndkey1)"; Loading Loading @@ -610,11 +613,13 @@ if ($PREFIX eq "aesni") { &mov (&DWP(24,"esp"),$key_); &mov (&DWP(24,"esp"),$key_); &mov (&DWP(28,"esp"),$key_); &mov (&DWP(28,"esp"),$key_); &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,16); &lea ($key_,&DWP(0,$key)); &lea ($key_,&DWP(0,$key)); &movdqa ($inout3,&QWP(0,"esp")); &movdqa ($inout3,&QWP(0,"esp")); &movdqa ($inout0,$ivec); &movdqa ($inout0,$ivec); &mov ($rounds_,$rounds); &lea ($key,&DWP(32,$key,$rounds)); &sub ($rounds_,$rounds); &pshufb ($ivec,$inout3); &pshufb ($ivec,$inout3); &set_label("ccm64_enc_outer"); &set_label("ccm64_enc_outer"); Loading @@ -625,33 +630,31 @@ if ($PREFIX eq "aesni") { &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &xorps ($rndkey0,$in0); &xorps ($rndkey0,$in0); &lea ($key,&DWP(32,$key_)); &xorps ($cmac,$rndkey0); # cmac^=inp &xorps ($cmac,$rndkey0); # cmac^=inp &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key_)); &set_label("ccm64_enc2_loop"); &set_label("ccm64_enc2_loop"); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &dec ($rounds); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); &aesenc ($inout0,$rndkey0); &aesenc ($inout0,$rndkey0); &lea ($key,&DWP(32,$key)); &aesenc ($cmac,$rndkey0); &aesenc ($cmac,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("ccm64_enc2_loop")); &jnz (&label("ccm64_enc2_loop")); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &paddq ($ivec,&QWP(16,"esp")); &paddq ($ivec,&QWP(16,"esp")); &dec ($len); &aesenclast ($inout0,$rndkey0); &aesenclast ($inout0,$rndkey0); &aesenclast ($cmac,$rndkey0); &aesenclast ($cmac,$rndkey0); &dec ($len); &lea ($inp,&DWP(16,$inp)); &lea ($inp,&DWP(16,$inp)); &xorps ($in0,$inout0); # inp^=E(ivec) &xorps ($in0,$inout0); # inp^=E(ivec) &movdqa ($inout0,$ivec); &movdqa ($inout0,$ivec); &movups (&QWP(0,$out),$in0); # save output &movups (&QWP(0,$out),$in0); # save output &lea ($out,&DWP(16,$out)); &pshufb ($inout0,$inout3); &pshufb ($inout0,$inout3); &lea ($out,&DWP(16,$out)); &jnz (&label("ccm64_enc_outer")); &jnz (&label("ccm64_enc_outer")); &mov ("esp",&DWP(48,"esp")); &mov ("esp",&DWP(48,"esp")); Loading Loading @@ -700,15 +703,19 @@ if ($PREFIX eq "aesni") { { &aesni_inline_generate1("enc"); } { &aesni_inline_generate1("enc"); } else else { &call ("_aesni_encrypt1"); } { &call ("_aesni_encrypt1"); } &shl ($rounds_,4); &mov ($rounds,16); &movups ($in0,&QWP(0,$inp)); # load inp &movups ($in0,&QWP(0,$inp)); # load inp &paddq ($ivec,&QWP(16,"esp")); &paddq ($ivec,&QWP(16,"esp")); &lea ($inp,&QWP(16,$inp)); &lea ($inp,&QWP(16,$inp)); &sub ($rounds,$rounds_); &lea ($key,&DWP(32,$key_,$rounds_)); &mov ($rounds_,$rounds); &jmp (&label("ccm64_dec_outer")); &jmp (&label("ccm64_dec_outer")); &set_label("ccm64_dec_outer",16); &set_label("ccm64_dec_outer",16); &xorps ($in0,$inout0); # inp ^= E(ivec) &xorps ($in0,$inout0); # inp ^= E(ivec) &movdqa ($inout0,$ivec); &movdqa ($inout0,$ivec); &mov ($rounds,$rounds_); &movups (&QWP(0,$out),$in0); # save output &movups (&QWP(0,$out),$in0); # save output &lea ($out,&DWP(16,$out)); &lea ($out,&DWP(16,$out)); &pshufb ($inout0,$inout3); &pshufb ($inout0,$inout3); Loading @@ -717,34 +724,33 @@ if ($PREFIX eq "aesni") { &jz (&label("ccm64_dec_break")); &jz (&label("ccm64_dec_break")); &$movekey ($rndkey0,&QWP(0,$key_)); &$movekey ($rndkey0,&QWP(0,$key_)); &shr ($rounds,1); &mov ($rounds,$rounds_); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &xorps ($in0,$rndkey0); &xorps ($in0,$rndkey0); &lea ($key,&DWP(32,$key_)); &xorps ($inout0,$rndkey0); &xorps ($inout0,$rndkey0); &xorps ($cmac,$in0); # cmac^=out &xorps ($cmac,$in0); # cmac^=out &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(32,$key_)); &set_label("ccm64_dec2_loop"); &set_label("ccm64_dec2_loop"); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &dec ($rounds); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key)); &$movekey ($rndkey1,&QWP(0,$key,$rounds)); &add ($rounds,32); &aesenc ($inout0,$rndkey0); &aesenc ($inout0,$rndkey0); &lea ($key,&DWP(32,$key)); &aesenc ($cmac,$rndkey0); &aesenc ($cmac,$rndkey0); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("ccm64_dec2_loop")); &jnz (&label("ccm64_dec2_loop")); &movups ($in0,&QWP(0,$inp)); # load inp &movups ($in0,&QWP(0,$inp)); # load inp &paddq ($ivec,&QWP(16,"esp")); &paddq ($ivec,&QWP(16,"esp")); &aesenc ($inout0,$rndkey1); &aesenc ($inout0,$rndkey1); &aesenc ($cmac,$rndkey1); &aesenc ($cmac,$rndkey1); &lea ($inp,&QWP(16,$inp)); &aesenclast ($inout0,$rndkey0); &aesenclast ($inout0,$rndkey0); &aesenclast ($cmac,$rndkey0); &aesenclast ($cmac,$rndkey0); &lea ($inp,&QWP(16,$inp)); &jmp (&label("ccm64_dec_outer")); &jmp (&label("ccm64_dec_outer")); &set_label("ccm64_dec_break",16); &set_label("ccm64_dec_break",16); &mov ($rounds,&DWP(240,$key_)); &mov ($key,$key_); &mov ($key,$key_); if ($inline) if ($inline) { &aesni_inline_generate1("enc",$cmac,$in0); } { &aesni_inline_generate1("enc",$cmac,$in0); } Loading @@ -763,7 +769,7 @@ if ($PREFIX eq "aesni") { # const char *ivec); # const char *ivec); # # # Handles only complete blocks, operates on 32-bit counter and # Handles only complete blocks, operates on 32-bit counter and # does not update *ivec! (see engine/eng_aesni.c for details) # does not update *ivec! (see crypto/modes/ctr128.c for details) # # # stack layout: # stack layout: # 0 pshufb mask # 0 pshufb mask Loading Loading @@ -810,66 +816,61 @@ if ($PREFIX eq "aesni") { # compose 2 vectors of 3x32-bit counters # compose 2 vectors of 3x32-bit counters &bswap ($rounds_); &bswap ($rounds_); &pxor ($rndkey1,$rndkey1); &pxor ($rndkey0,$rndkey0); &pxor ($rndkey0,$rndkey0); &pxor ($rndkey1,$rndkey1); &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask &pinsrd ($rndkey1,$rounds_,0); &pinsrd ($rndkey0,$rounds_,0); &lea ($key_,&DWP(3,$rounds_)); &lea ($key_,&DWP(3,$rounds_)); &pinsrd ($rndkey0,$key_,0); &pinsrd ($rndkey1,$key_,0); &inc ($rounds_); &inc ($rounds_); &pinsrd ($rndkey1,$rounds_,1); &pinsrd ($rndkey0,$rounds_,1); &inc ($key_); &inc ($key_); &pinsrd ($rndkey0,$key_,1); &pinsrd ($rndkey1,$key_,1); &inc ($rounds_); &inc ($rounds_); &pinsrd ($rndkey1,$rounds_,2); &pinsrd ($rndkey0,$rounds_,2); &inc ($key_); &inc ($key_); &pinsrd ($rndkey0,$key_,2); &pinsrd ($rndkey1,$key_,2); &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet &pshufb ($rndkey1,$inout0); # byte swap &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet &pshufb ($rndkey0,$inout0); # byte swap &pshufb ($rndkey0,$inout0); # byte swap &movdqu ($inout4,&QWP(0,$key)); # key[0] &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet &pshufb ($rndkey1,$inout0); # byte swap &pshufd ($inout0,$rndkey1,3<<6); # place counter to upper dword &pshufd ($inout0,$rndkey0,3<<6); # place counter to upper dword &pshufd ($inout1,$rndkey1,2<<6); &pshufd ($inout1,$rndkey0,2<<6); &cmp ($len,6); &cmp ($len,6); &jb (&label("ctr32_tail")); &jb (&label("ctr32_tail")); &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec &pxor ($inout5,$inout4); # counter-less ivec^key[0] &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,16); &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec^key[0] &mov ($key_,$key); # backup $key &mov ($key_,$key); # backup $key &mov ($rounds_,$rounds); # backup $rounds &sub ($rounds_,$rounds); # backup twisted $rounds &lea ($key,&DWP(32,$key,$rounds)); &sub ($len,6); &sub ($len,6); &jmp (&label("ctr32_loop6")); &jmp (&label("ctr32_loop6")); &set_label("ctr32_loop6",16); &set_label("ctr32_loop6",16); &pshufd ($inout2,$rndkey1,1<<6); # inlining _aesni_encrypt6's prologue gives ~6% improvement... &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec &pshufd ($inout2,$rndkey0,1<<6); &pshufd ($inout3,$rndkey0,3<<6); &movdqa ($rndkey0,&QWP(32,"esp")); # pull counter-less ivec &por ($inout0,$rndkey1); # merge counter-less ivec &pshufd ($inout3,$rndkey1,3<<6); &pshufd ($inout4,$rndkey0,2<<6); &pxor ($inout0,$rndkey0); # merge counter-less ivec &por ($inout1,$rndkey1); &pshufd ($inout4,$rndkey1,2<<6); &pshufd ($inout5,$rndkey0,1<<6); &por ($inout2,$rndkey1); &por ($inout3,$rndkey1); &por ($inout4,$rndkey1); &por ($inout5,$rndkey1); # inlining _aesni_encrypt6's prologue gives ~4% improvement... &$movekey ($rndkey0,&QWP(0,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &lea ($key,&DWP(32,$key_)); &dec ($rounds); &pxor ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout1,$rndkey0); &aesenc ($inout0,$rndkey1); &pshufd ($inout5,$rndkey1,1<<6); &$movekey ($rndkey1,&QWP(16,$key_)); &pxor ($inout2,$rndkey0); &pxor ($inout2,$rndkey0); &aesenc ($inout1,$rndkey1); &pxor ($inout3,$rndkey0); &pxor ($inout3,$rndkey0); &aesenc ($inout2,$rndkey1); &aesenc ($inout0,$rndkey1); &pxor ($inout4,$rndkey0); &pxor ($inout4,$rndkey0); &aesenc ($inout3,$rndkey1); &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &aesenc ($inout1,$rndkey1); &$movekey ($rndkey0,&QWP(32,$key_)); &mov ($rounds,$rounds_); &aesenc ($inout2,$rndkey1); &aesenc ($inout3,$rndkey1); &aesenc ($inout4,$rndkey1); &aesenc ($inout4,$rndkey1); &$movekey ($rndkey0,&QWP(0,$key)); &aesenc ($inout5,$rndkey1); &aesenc ($inout5,$rndkey1); &call (&label("_aesni_encrypt6_enter")); &call (&label("_aesni_encrypt6_enter")); Loading @@ -882,12 +883,12 @@ if ($PREFIX eq "aesni") { &movups (&QWP(0,$out),$inout0); &movups (&QWP(0,$out),$inout0); &movdqa ($rndkey0,&QWP(16,"esp")); # load increment &movdqa ($rndkey0,&QWP(16,"esp")); # load increment &xorps ($inout2,$rndkey1); &xorps ($inout2,$rndkey1); &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet &movdqa ($rndkey1,&QWP(64,"esp")); # load 2nd triplet &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x20,$out),$inout2); &movups (&QWP(0x20,$out),$inout2); &paddd ($rndkey1,$rndkey0); # 1st triplet increment &paddd ($rndkey1,$rndkey0); # 2nd triplet increment &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment &paddd ($rndkey0,&QWP(48,"esp")); # 1st triplet increment &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask &movups ($inout1,&QWP(0x30,$inp)); &movups ($inout1,&QWP(0x30,$inp)); Loading @@ -895,44 +896,44 @@ if ($PREFIX eq "aesni") { &xorps ($inout3,$inout1); &xorps ($inout3,$inout1); &movups ($inout1,&QWP(0x50,$inp)); &movups ($inout1,&QWP(0x50,$inp)); &lea ($inp,&DWP(0x60,$inp)); &lea ($inp,&DWP(0x60,$inp)); &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet &pshufb ($rndkey1,$inout0); # byte swap &pshufb ($rndkey0,$inout0); # byte swap &xorps ($inout4,$inout2); &xorps ($inout4,$inout2); &movups (&QWP(0x30,$out),$inout3); &movups (&QWP(0x30,$out),$inout3); &xorps ($inout5,$inout1); &xorps ($inout5,$inout1); &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet &pshufb ($rndkey0,$inout0); # byte swap &pshufb ($rndkey1,$inout0); # byte swap &movups (&QWP(0x40,$out),$inout4); &movups (&QWP(0x40,$out),$inout4); &pshufd ($inout0,$rndkey1,3<<6); &pshufd ($inout0,$rndkey0,3<<6); &movups (&QWP(0x50,$out),$inout5); &movups (&QWP(0x50,$out),$inout5); &lea ($out,&DWP(0x60,$out)); &lea ($out,&DWP(0x60,$out)); &mov ($rounds,$rounds_); &pshufd ($inout1,$rndkey0,2<<6); &pshufd ($inout1,$rndkey1,2<<6); &sub ($len,6); &sub ($len,6); &jnc (&label("ctr32_loop6")); &jnc (&label("ctr32_loop6")); &add ($len,6); &add ($len,6); &jz (&label("ctr32_ret")); &jz (&label("ctr32_ret")); &movdqu ($inout5,&QWP(0,$key_)); &mov ($key,$key_); &mov ($key,$key_); &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds &pxor ($inout5,&QWP(32,"esp")); # restore count-less ivec &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec &mov ($rounds,&DWP(240,$key_)); # restore $rounds &set_label("ctr32_tail"); &set_label("ctr32_tail"); &por ($inout0,$inout5); &por ($inout0,$inout5); &cmp ($len,2); &cmp ($len,2); &jb (&label("ctr32_one")); &jb (&label("ctr32_one")); &pshufd ($inout2,$rndkey1,1<<6); &pshufd ($inout2,$rndkey0,1<<6); &por ($inout1,$inout5); &por ($inout1,$inout5); &je (&label("ctr32_two")); &je (&label("ctr32_two")); &pshufd ($inout3,$rndkey0,3<<6); &pshufd ($inout3,$rndkey1,3<<6); &por ($inout2,$inout5); &por ($inout2,$inout5); &cmp ($len,4); &cmp ($len,4); &jb (&label("ctr32_three")); &jb (&label("ctr32_three")); &pshufd ($inout4,$rndkey0,2<<6); &pshufd ($inout4,$rndkey1,2<<6); &por ($inout3,$inout5); &por ($inout3,$inout5); &je (&label("ctr32_four")); &je (&label("ctr32_four")); Loading Loading @@ -1057,8 +1058,10 @@ if ($PREFIX eq "aesni") { &sub ($len,16*6); &sub ($len,16*6); &jc (&label("xts_enc_short")); &jc (&label("xts_enc_short")); &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,$rounds); &mov ($rounds_,16); &sub ($rounds_,$rounds); &lea ($key,&DWP(32,$key,$rounds)); &jmp (&label("xts_enc_loop6")); &jmp (&label("xts_enc_loop6")); &set_label("xts_enc_loop6",16); &set_label("xts_enc_loop6",16); Loading @@ -1080,6 +1083,7 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$tweak); &pxor ($inout5,$tweak); # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] &mov ($rounds,$rounds_); # restore $rounds &movdqu ($inout1,&QWP(16*1,$inp)); &movdqu ($inout1,&QWP(16*1,$inp)); &xorps ($inout0,$rndkey0); # input^=rndkey[0] &xorps ($inout0,$rndkey0); # input^=rndkey[0] &movdqu ($inout2,&QWP(16*2,$inp)); &movdqu ($inout2,&QWP(16*2,$inp)); Loading @@ -1096,19 +1100,17 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$rndkey1); &pxor ($inout5,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &lea ($key,&DWP(32,$key_)); &pxor ($inout1,&QWP(16*1,"esp")); &pxor ($inout1,&QWP(16*1,"esp")); &aesenc ($inout0,$rndkey1); &pxor ($inout2,&QWP(16*2,"esp")); &pxor ($inout2,&QWP(16*2,"esp")); &aesenc ($inout1,$rndkey1); &aesenc ($inout0,$rndkey1); &pxor ($inout3,&QWP(16*3,"esp")); &pxor ($inout3,&QWP(16*3,"esp")); &dec ($rounds); &aesenc ($inout2,$rndkey1); &pxor ($inout4,&QWP(16*4,"esp")); &pxor ($inout4,&QWP(16*4,"esp")); &aesenc ($inout3,$rndkey1); &aesenc ($inout1,$rndkey1); &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &$movekey ($rndkey0,&QWP(32,$key_)); &aesenc ($inout2,$rndkey1); &aesenc ($inout3,$rndkey1); &aesenc ($inout4,$rndkey1); &aesenc ($inout4,$rndkey1); &$movekey ($rndkey0,&QWP(0,$key)); &aesenc ($inout5,$rndkey1); &aesenc ($inout5,$rndkey1); &call (&label("_aesni_encrypt6_enter")); &call (&label("_aesni_encrypt6_enter")); Loading @@ -1135,13 +1137,12 @@ if ($PREFIX eq "aesni") { &paddq ($tweak,$tweak); # &psllq($tweak,1); &paddq ($tweak,$tweak); # &psllq($tweak,1); &pand ($twres,$twmask); # isolate carry and residue &pand ($twres,$twmask); # isolate carry and residue &pcmpgtd($twtmp,$tweak); # broadcast upper bits &pcmpgtd($twtmp,$tweak); # broadcast upper bits &mov ($rounds,$rounds_); # restore $rounds &pxor ($tweak,$twres); &pxor ($tweak,$twres); &sub ($len,16*6); &sub ($len,16*6); &jnc (&label("xts_enc_loop6")); &jnc (&label("xts_enc_loop6")); &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds &mov ($rounds,&DWP(240,$key_)); # restore $rounds &mov ($key,$key_); # restore $key &mov ($key,$key_); # restore $key &mov ($rounds_,$rounds); &mov ($rounds_,$rounds); Loading Loading @@ -1399,8 +1400,10 @@ if ($PREFIX eq "aesni") { &sub ($len,16*6); &sub ($len,16*6); &jc (&label("xts_dec_short")); &jc (&label("xts_dec_short")); &shr ($rounds,1); &shl ($rounds,4); &mov ($rounds_,$rounds); &mov ($rounds_,16); &sub ($rounds_,$rounds); &lea ($key,&DWP(32,$key,$rounds)); &jmp (&label("xts_dec_loop6")); &jmp (&label("xts_dec_loop6")); &set_label("xts_dec_loop6",16); &set_label("xts_dec_loop6",16); Loading @@ -1422,6 +1425,7 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$tweak); &pxor ($inout5,$tweak); # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] &mov ($rounds,$rounds_); &movdqu ($inout1,&QWP(16*1,$inp)); &movdqu ($inout1,&QWP(16*1,$inp)); &xorps ($inout0,$rndkey0); # input^=rndkey[0] &xorps ($inout0,$rndkey0); # input^=rndkey[0] &movdqu ($inout2,&QWP(16*2,$inp)); &movdqu ($inout2,&QWP(16*2,$inp)); Loading @@ -1438,19 +1442,17 @@ if ($PREFIX eq "aesni") { &pxor ($inout5,$rndkey1); &pxor ($inout5,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key_)); &$movekey ($rndkey1,&QWP(16,$key_)); &lea ($key,&DWP(32,$key_)); &pxor ($inout1,&QWP(16*1,"esp")); &pxor ($inout1,&QWP(16*1,"esp")); &aesdec ($inout0,$rndkey1); &pxor ($inout2,&QWP(16*2,"esp")); &pxor ($inout2,&QWP(16*2,"esp")); &aesdec ($inout1,$rndkey1); &aesdec ($inout0,$rndkey1); &pxor ($inout3,&QWP(16*3,"esp")); &pxor ($inout3,&QWP(16*3,"esp")); &dec ($rounds); &aesdec ($inout2,$rndkey1); &pxor ($inout4,&QWP(16*4,"esp")); &pxor ($inout4,&QWP(16*4,"esp")); &aesdec ($inout3,$rndkey1); &aesdec ($inout1,$rndkey1); &pxor ($inout5,$rndkey0); &pxor ($inout5,$rndkey0); &$movekey ($rndkey0,&QWP(32,$key_)); &aesdec ($inout2,$rndkey1); &aesdec ($inout3,$rndkey1); &aesdec ($inout4,$rndkey1); &aesdec ($inout4,$rndkey1); &$movekey ($rndkey0,&QWP(0,$key)); &aesdec ($inout5,$rndkey1); &aesdec ($inout5,$rndkey1); &call (&label("_aesni_decrypt6_enter")); &call (&label("_aesni_decrypt6_enter")); Loading @@ -1477,13 +1479,12 @@ if ($PREFIX eq "aesni") { &paddq ($tweak,$tweak); # &psllq($tweak,1); &paddq ($tweak,$tweak); # &psllq($tweak,1); &pand ($twres,$twmask); # isolate carry and residue &pand ($twres,$twmask); # isolate carry and residue &pcmpgtd($twtmp,$tweak); # broadcast upper bits &pcmpgtd($twtmp,$tweak); # broadcast upper bits &mov ($rounds,$rounds_); # restore $rounds &pxor ($tweak,$twres); &pxor ($tweak,$twres); &sub ($len,16*6); &sub ($len,16*6); &jnc (&label("xts_dec_loop6")); &jnc (&label("xts_dec_loop6")); &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds &mov ($rounds,&DWP(240,$key_)); # restore $rounds &mov ($key,$key_); # restore $key &mov ($key,$key_); # restore $key &mov ($rounds_,$rounds); &mov ($rounds_,$rounds); Loading
crypto/aes/asm/aesni-x86_64.pl +215 −178 File changed.Preview size limit exceeded, changes collapsed. Show changes