Loading engines/asm/e_padlock-x86.pl +11 −10 Original line number Original line Diff line number Diff line Loading @@ -183,7 +183,7 @@ my ($mode,$opcode) = @_; &set_label("${mode}_pic_point"); &set_label("${mode}_pic_point"); &lea ($ctx,&DWP(16,$ctx)); # control word &lea ($ctx,&DWP(16,$ctx)); # control word &xor ("eax","eax"); &xor ("eax","eax"); if ($mode eq "ctr16") { if ($mode eq "ctr32") { &movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter &movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter } else { } else { &xor ("ebx","ebx"); &xor ("ebx","ebx"); Loading Loading @@ -216,7 +216,7 @@ my ($mode,$opcode) = @_; &mov (&DWP(8,"ebp"),$len); &mov (&DWP(8,"ebp"),$len); &mov ($len,$chunk); &mov ($len,$chunk); &mov (&DWP(12,"ebp"),$chunk); # chunk &mov (&DWP(12,"ebp"),$chunk); # chunk if ($mode eq "ctr16") { if ($mode eq "ctr32") { &mov ("ecx",&DWP(-4,$ctx)); &mov ("ecx",&DWP(-4,$ctx)); &xor ($out,$out); &xor ($out,$out); &mov ("eax",&DWP(-8,$ctx)); # borrow $len &mov ("eax",&DWP(-8,$ctx)); # borrow $len Loading Loading @@ -257,7 +257,7 @@ my ($mode,$opcode) = @_; } } &mov ($out,&DWP(0,"ebp")); # restore parameters &mov ($out,&DWP(0,"ebp")); # restore parameters &mov ($chunk,&DWP(12,"ebp")); &mov ($chunk,&DWP(12,"ebp")); if ($mode eq "ctr16") { if ($mode eq "ctr32") { &mov ($inp,&DWP(4,"ebp")); &mov ($inp,&DWP(4,"ebp")); &xor ($len,$len); &xor ($len,$len); &set_label("${mode}_xor"); &set_label("${mode}_xor"); Loading @@ -284,7 +284,7 @@ my ($mode,$opcode) = @_; &sub ($len,$chunk); &sub ($len,$chunk); &mov ($chunk,$PADLOCK_CHUNK); &mov ($chunk,$PADLOCK_CHUNK); &jnz (&label("${mode}_loop")); &jnz (&label("${mode}_loop")); if ($mode ne "ctr16") { if ($mode ne "ctr32") { &test ($out,0x0f); # out_misaligned &test ($out,0x0f); # out_misaligned &jz (&label("${mode}_done")); &jz (&label("${mode}_done")); } } Loading @@ -296,7 +296,7 @@ my ($mode,$opcode) = @_; &data_byte(0xf3,0xab); # rep stosl &data_byte(0xf3,0xab); # rep stosl &set_label("${mode}_done"); &set_label("${mode}_done"); &lea ("esp",&DWP(24,"ebp")); &lea ("esp",&DWP(24,"ebp")); if ($mode ne "ctr16") { if ($mode ne "ctr32") { &jmp (&label("${mode}_exit")); &jmp (&label("${mode}_exit")); &set_label("${mode}_aligned",16); &set_label("${mode}_aligned",16); Loading @@ -311,7 +311,7 @@ my ($mode,$opcode) = @_; &set_label("${mode}_exit"); } &set_label("${mode}_exit"); } &mov ("eax",1); &mov ("eax",1); &lea ("esp",&DWP(4,"esp")); # popf &lea ("esp",&DWP(4,"esp")); # popf &emms () if ($mode eq "ctr16"); &emms () if ($mode eq "ctr32"); &set_label("${mode}_abort"); &set_label("${mode}_abort"); &function_end("padlock_${mode}_encrypt"); &function_end("padlock_${mode}_encrypt"); } } Loading @@ -320,10 +320,11 @@ my ($mode,$opcode) = @_; &generate_mode("cbc",0xd0); &generate_mode("cbc",0xd0); &generate_mode("cfb",0xe0); &generate_mode("cfb",0xe0); &generate_mode("ofb",0xe8); &generate_mode("ofb",0xe8); &generate_mode("ctr16",0xc8); # yes, it implements own ctr with ecb opcode, &generate_mode("ctr32",0xc8); # yes, it implements own CTR with ECB opcode, # because hardware ctr was introduced later # because hardware CTR was introduced later # and even has errata on certain CPU stepping. # and even has errata on certain C7 stepping. # own implementation *always* works... # own implementation *always* works, though # ~15% slower than dedicated hardware... &function_begin_B("padlock_xstore"); &function_begin_B("padlock_xstore"); &push ("edi"); &push ("edi"); Loading engines/asm/e_padlock-x86_64.pl +64 −3 Original line number Original line Diff line number Diff line Loading @@ -9,7 +9,8 @@ # September 2011 # September 2011 # # # Assembler helpers for Padlock engine. # Assembler helpers for Padlock engine. See even e_padlock-x86.pl for # details. $flavour = shift; $flavour = shift; $output = shift; $output = shift; Loading @@ -26,7 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output"; $code=".text\n"; $code=".text\n"; $PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16 $PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20 $ctx="%rdx"; $ctx="%rdx"; $out="%rdi"; $out="%rdi"; Loading Loading @@ -234,9 +235,23 @@ padlock_${mode}_encrypt: neg %rax neg %rax and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK lea (%rax,%rbp),%rsp lea (%rax,%rbp),%rsp ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter bswap %eax neg %eax and \$`$PADLOCK_CHUNK/16-1`,%eax jz .L${mode}_loop shl \$4,%eax cmp %rax,$len cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK ___ $code.=<<___; jmp .L${mode}_loop jmp .L${mode}_loop .align 16 .align 16 .L${mode}_loop: .L${mode}_loop: cmp $len,$chunk # ctr32 artefact cmova $len,$chunk # ctr32 artefact mov $out,%r8 # save parameters mov $out,%r8 # save parameters mov $inp,%r9 mov $inp,%r9 mov $len,%r10 mov $len,%r10 Loading @@ -261,6 +276,16 @@ $code.=<<___ if ($mode !~ /ecb|ctr/); movdqa (%rax),%xmm0 movdqa (%rax),%xmm0 movdqa %xmm0,-16($ctx) # copy [or refresh] iv movdqa %xmm0,-16($ctx) # copy [or refresh] iv ___ ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter test \$0xffff0000,%eax jnz .L${mode}_no_corr bswap %eax add \$0x10000,%eax bswap %eax mov %eax,-4($ctx) .L${mode}_no_corr: ___ $code.=<<___; $code.=<<___; mov %r8,$out # restore paramters mov %r8,$out # restore paramters mov %r11,$chunk mov %r11,$chunk Loading Loading @@ -295,6 +320,29 @@ $code.=<<___; .align 16 .align 16 .L${mode}_aligned: .L${mode}_aligned: ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter mov \$`16*0x10000`,$chunk bswap %eax cmp $len,$chunk cmova $len,$chunk neg %eax and \$0xffff,%eax jz .L${mode}_aligned_loop shl \$4,%eax cmp %rax,$len cmova %rax,$chunk # don't let counter cross 2^16 jmp .L${mode}_aligned_loop .align 16 .L${mode}_aligned_loop: cmp $len,$chunk cmova $len,$chunk mov $len,%r10 # save parameters mov $chunk,$len mov $chunk,%r11 ___ $code.=<<___; lea -16($ctx),%rax # ivp lea -16($ctx),%rax # ivp lea 16($ctx),%rbx # key lea 16($ctx),%rbx # key shr \$4,$len # len/=AES_BLOCK_SIZE shr \$4,$len # len/=AES_BLOCK_SIZE Loading @@ -304,6 +352,19 @@ $code.=<<___ if ($mode !~ /ecb|ctr/); movdqa (%rax),%xmm0 movdqa (%rax),%xmm0 movdqa %xmm0,-16($ctx) # copy [or refresh] iv movdqa %xmm0,-16($ctx) # copy [or refresh] iv ___ ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter bswap %eax add \$0x10000,%eax bswap %eax mov %eax,-4($ctx) mov %r11,$chunk # restore paramters mov %r10,$len sub $chunk,$len mov \$`16*0x10000`,$chunk jnz .L${mode}_aligned_loop ___ $code.=<<___; $code.=<<___; .L${mode}_exit: .L${mode}_exit: mov \$1,%eax mov \$1,%eax Loading @@ -320,7 +381,7 @@ ___ &generate_mode("cbc",0xd0); &generate_mode("cbc",0xd0); &generate_mode("cfb",0xe0); &generate_mode("cfb",0xe0); &generate_mode("ofb",0xe8); &generate_mode("ofb",0xe8); &generate_mode("ctr16",0xd8); &generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR... $code.=<<___; $code.=<<___; .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>" .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>" Loading engines/e_padlock.c +45 −3 Original line number Original line Diff line number Diff line Loading @@ -76,6 +76,7 @@ #endif #endif #include <openssl/rand.h> #include <openssl/rand.h> #include <openssl/err.h> #include <openssl/err.h> #include <openssl/modes.h> #ifndef OPENSSL_NO_HW #ifndef OPENSSL_NO_HW #ifndef OPENSSL_NO_HW_PADLOCK #ifndef OPENSSL_NO_HW_PADLOCK Loading Loading @@ -337,16 +338,19 @@ static int padlock_cipher_nids[] = { NID_aes_128_cbc, NID_aes_128_cbc, NID_aes_128_cfb, NID_aes_128_cfb, NID_aes_128_ofb, NID_aes_128_ofb, NID_aes_128_ctr, NID_aes_192_ecb, NID_aes_192_ecb, NID_aes_192_cbc, NID_aes_192_cbc, NID_aes_192_cfb, NID_aes_192_cfb, NID_aes_192_ofb, NID_aes_192_ofb, NID_aes_192_ctr, NID_aes_256_ecb, NID_aes_256_ecb, NID_aes_256_cbc, NID_aes_256_cbc, NID_aes_256_cfb, NID_aes_256_cfb, NID_aes_256_ofb, NID_aes_256_ofb, NID_aes_256_ctr }; }; static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ sizeof(padlock_cipher_nids[0])); sizeof(padlock_cipher_nids[0])); Loading Loading @@ -505,10 +509,35 @@ padlock_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, return 1; return 1; } } static void padlock_ctr32_encrypt_glue(const unsigned char *in, unsigned char *out, size_t blocks, struct padlock_cipher_data *ctx, const unsigned char *ivec) { memcpy(ctx->iv,ivec,AES_BLOCK_SIZE); padlock_ctr32_encrypt(out,in,ctx,AES_BLOCK_SIZE*blocks); } static int padlock_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, const unsigned char *in_arg, size_t nbytes) { struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx); unsigned int num = ctx->num; CRYPTO_ctr128_encrypt_ctr32(in_arg,out_arg,nbytes, cdata,ctx->iv,ctx->buf,&num, (ctr128_f)padlock_ctr32_encrypt_glue); ctx->num = (size_t)num; return 1; } #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE #define EVP_CIPHER_block_size_OFB 1 #define EVP_CIPHER_block_size_OFB 1 #define EVP_CIPHER_block_size_CFB 1 #define EVP_CIPHER_block_size_CFB 1 #define EVP_CIPHER_block_size_CTR 1 /* Declaring so many ciphers by hand would be a pain. /* Declaring so many ciphers by hand would be a pain. Instead introduce a bit of preprocessor magic :-) */ Instead introduce a bit of preprocessor magic :-) */ Loading @@ -533,16 +562,19 @@ DECLARE_AES_EVP(128,ecb,ECB); DECLARE_AES_EVP(128,cbc,CBC); DECLARE_AES_EVP(128,cbc,CBC); DECLARE_AES_EVP(128,cfb,CFB); DECLARE_AES_EVP(128,cfb,CFB); DECLARE_AES_EVP(128,ofb,OFB); DECLARE_AES_EVP(128,ofb,OFB); DECLARE_AES_EVP(128,ctr,CTR); DECLARE_AES_EVP(192,ecb,ECB); DECLARE_AES_EVP(192,ecb,ECB); DECLARE_AES_EVP(192,cbc,CBC); DECLARE_AES_EVP(192,cbc,CBC); DECLARE_AES_EVP(192,cfb,CFB); DECLARE_AES_EVP(192,cfb,CFB); DECLARE_AES_EVP(192,ofb,OFB); DECLARE_AES_EVP(192,ofb,OFB); DECLARE_AES_EVP(192,ctr,CTR); DECLARE_AES_EVP(256,ecb,ECB); DECLARE_AES_EVP(256,ecb,ECB); DECLARE_AES_EVP(256,cbc,CBC); DECLARE_AES_EVP(256,cbc,CBC); DECLARE_AES_EVP(256,cfb,CFB); DECLARE_AES_EVP(256,cfb,CFB); DECLARE_AES_EVP(256,ofb,OFB); DECLARE_AES_EVP(256,ofb,OFB); DECLARE_AES_EVP(256,ctr,CTR); static int static int padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) Loading @@ -567,6 +599,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid case NID_aes_128_ofb: case NID_aes_128_ofb: *cipher = &padlock_aes_128_ofb; *cipher = &padlock_aes_128_ofb; break; break; case NID_aes_128_ctr: *cipher = &padlock_aes_128_ctr; break; case NID_aes_192_ecb: case NID_aes_192_ecb: *cipher = &padlock_aes_192_ecb; *cipher = &padlock_aes_192_ecb; Loading @@ -580,6 +615,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid case NID_aes_192_ofb: case NID_aes_192_ofb: *cipher = &padlock_aes_192_ofb; *cipher = &padlock_aes_192_ofb; break; break; case NID_aes_192_ctr: *cipher = &padlock_aes_192_ctr; break; case NID_aes_256_ecb: case NID_aes_256_ecb: *cipher = &padlock_aes_256_ecb; *cipher = &padlock_aes_256_ecb; Loading @@ -593,6 +631,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid case NID_aes_256_ofb: case NID_aes_256_ofb: *cipher = &padlock_aes_256_ofb; *cipher = &padlock_aes_256_ofb; break; break; case NID_aes_256_ctr: *cipher = &padlock_aes_256_ctr; break; default: default: /* Sorry, we don't support this NID */ /* Sorry, we don't support this NID */ Loading @@ -610,6 +651,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, { { struct padlock_cipher_data *cdata; struct padlock_cipher_data *cdata; int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; unsigned long mode = EVP_CIPHER_CTX_mode(ctx); if (key==NULL) return 0; /* ERROR */ if (key==NULL) return 0; /* ERROR */ Loading @@ -617,7 +659,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, memset(cdata, 0, sizeof(struct padlock_cipher_data)); memset(cdata, 0, sizeof(struct padlock_cipher_data)); /* Prepare Control word. */ /* Prepare Control word. */ if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) if (mode == EVP_CIPH_OFB_MODE || mode == EVP_CIPH_CTR_MODE) cdata->cword.b.encdec = 0; cdata->cword.b.encdec = 0; else else cdata->cword.b.encdec = (ctx->encrypt == 0); cdata->cword.b.encdec = (ctx->encrypt == 0); Loading @@ -640,8 +682,8 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, and is listed as hardware errata. They most and is listed as hardware errata. They most likely will fix it at some point and then likely will fix it at some point and then a check for stepping would be due here. */ a check for stepping would be due here. */ if ((EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_ECB_MODE || if ((mode == EVP_CIPH_ECB_MODE || EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CBC_MODE) mode == EVP_CIPH_CBC_MODE) && !enc) && !enc) AES_set_decrypt_key(key, key_len, &cdata->ks); AES_set_decrypt_key(key, key_len, &cdata->ks); else else Loading Loading
engines/asm/e_padlock-x86.pl +11 −10 Original line number Original line Diff line number Diff line Loading @@ -183,7 +183,7 @@ my ($mode,$opcode) = @_; &set_label("${mode}_pic_point"); &set_label("${mode}_pic_point"); &lea ($ctx,&DWP(16,$ctx)); # control word &lea ($ctx,&DWP(16,$ctx)); # control word &xor ("eax","eax"); &xor ("eax","eax"); if ($mode eq "ctr16") { if ($mode eq "ctr32") { &movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter &movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter } else { } else { &xor ("ebx","ebx"); &xor ("ebx","ebx"); Loading Loading @@ -216,7 +216,7 @@ my ($mode,$opcode) = @_; &mov (&DWP(8,"ebp"),$len); &mov (&DWP(8,"ebp"),$len); &mov ($len,$chunk); &mov ($len,$chunk); &mov (&DWP(12,"ebp"),$chunk); # chunk &mov (&DWP(12,"ebp"),$chunk); # chunk if ($mode eq "ctr16") { if ($mode eq "ctr32") { &mov ("ecx",&DWP(-4,$ctx)); &mov ("ecx",&DWP(-4,$ctx)); &xor ($out,$out); &xor ($out,$out); &mov ("eax",&DWP(-8,$ctx)); # borrow $len &mov ("eax",&DWP(-8,$ctx)); # borrow $len Loading Loading @@ -257,7 +257,7 @@ my ($mode,$opcode) = @_; } } &mov ($out,&DWP(0,"ebp")); # restore parameters &mov ($out,&DWP(0,"ebp")); # restore parameters &mov ($chunk,&DWP(12,"ebp")); &mov ($chunk,&DWP(12,"ebp")); if ($mode eq "ctr16") { if ($mode eq "ctr32") { &mov ($inp,&DWP(4,"ebp")); &mov ($inp,&DWP(4,"ebp")); &xor ($len,$len); &xor ($len,$len); &set_label("${mode}_xor"); &set_label("${mode}_xor"); Loading @@ -284,7 +284,7 @@ my ($mode,$opcode) = @_; &sub ($len,$chunk); &sub ($len,$chunk); &mov ($chunk,$PADLOCK_CHUNK); &mov ($chunk,$PADLOCK_CHUNK); &jnz (&label("${mode}_loop")); &jnz (&label("${mode}_loop")); if ($mode ne "ctr16") { if ($mode ne "ctr32") { &test ($out,0x0f); # out_misaligned &test ($out,0x0f); # out_misaligned &jz (&label("${mode}_done")); &jz (&label("${mode}_done")); } } Loading @@ -296,7 +296,7 @@ my ($mode,$opcode) = @_; &data_byte(0xf3,0xab); # rep stosl &data_byte(0xf3,0xab); # rep stosl &set_label("${mode}_done"); &set_label("${mode}_done"); &lea ("esp",&DWP(24,"ebp")); &lea ("esp",&DWP(24,"ebp")); if ($mode ne "ctr16") { if ($mode ne "ctr32") { &jmp (&label("${mode}_exit")); &jmp (&label("${mode}_exit")); &set_label("${mode}_aligned",16); &set_label("${mode}_aligned",16); Loading @@ -311,7 +311,7 @@ my ($mode,$opcode) = @_; &set_label("${mode}_exit"); } &set_label("${mode}_exit"); } &mov ("eax",1); &mov ("eax",1); &lea ("esp",&DWP(4,"esp")); # popf &lea ("esp",&DWP(4,"esp")); # popf &emms () if ($mode eq "ctr16"); &emms () if ($mode eq "ctr32"); &set_label("${mode}_abort"); &set_label("${mode}_abort"); &function_end("padlock_${mode}_encrypt"); &function_end("padlock_${mode}_encrypt"); } } Loading @@ -320,10 +320,11 @@ my ($mode,$opcode) = @_; &generate_mode("cbc",0xd0); &generate_mode("cbc",0xd0); &generate_mode("cfb",0xe0); &generate_mode("cfb",0xe0); &generate_mode("ofb",0xe8); &generate_mode("ofb",0xe8); &generate_mode("ctr16",0xc8); # yes, it implements own ctr with ecb opcode, &generate_mode("ctr32",0xc8); # yes, it implements own CTR with ECB opcode, # because hardware ctr was introduced later # because hardware CTR was introduced later # and even has errata on certain CPU stepping. # and even has errata on certain C7 stepping. # own implementation *always* works... # own implementation *always* works, though # ~15% slower than dedicated hardware... &function_begin_B("padlock_xstore"); &function_begin_B("padlock_xstore"); &push ("edi"); &push ("edi"); Loading
engines/asm/e_padlock-x86_64.pl +64 −3 Original line number Original line Diff line number Diff line Loading @@ -9,7 +9,8 @@ # September 2011 # September 2011 # # # Assembler helpers for Padlock engine. # Assembler helpers for Padlock engine. See even e_padlock-x86.pl for # details. $flavour = shift; $flavour = shift; $output = shift; $output = shift; Loading @@ -26,7 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output"; $code=".text\n"; $code=".text\n"; $PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16 $PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20 $ctx="%rdx"; $ctx="%rdx"; $out="%rdi"; $out="%rdi"; Loading Loading @@ -234,9 +235,23 @@ padlock_${mode}_encrypt: neg %rax neg %rax and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK lea (%rax,%rbp),%rsp lea (%rax,%rbp),%rsp ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter bswap %eax neg %eax and \$`$PADLOCK_CHUNK/16-1`,%eax jz .L${mode}_loop shl \$4,%eax cmp %rax,$len cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK ___ $code.=<<___; jmp .L${mode}_loop jmp .L${mode}_loop .align 16 .align 16 .L${mode}_loop: .L${mode}_loop: cmp $len,$chunk # ctr32 artefact cmova $len,$chunk # ctr32 artefact mov $out,%r8 # save parameters mov $out,%r8 # save parameters mov $inp,%r9 mov $inp,%r9 mov $len,%r10 mov $len,%r10 Loading @@ -261,6 +276,16 @@ $code.=<<___ if ($mode !~ /ecb|ctr/); movdqa (%rax),%xmm0 movdqa (%rax),%xmm0 movdqa %xmm0,-16($ctx) # copy [or refresh] iv movdqa %xmm0,-16($ctx) # copy [or refresh] iv ___ ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter test \$0xffff0000,%eax jnz .L${mode}_no_corr bswap %eax add \$0x10000,%eax bswap %eax mov %eax,-4($ctx) .L${mode}_no_corr: ___ $code.=<<___; $code.=<<___; mov %r8,$out # restore paramters mov %r8,$out # restore paramters mov %r11,$chunk mov %r11,$chunk Loading Loading @@ -295,6 +320,29 @@ $code.=<<___; .align 16 .align 16 .L${mode}_aligned: .L${mode}_aligned: ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter mov \$`16*0x10000`,$chunk bswap %eax cmp $len,$chunk cmova $len,$chunk neg %eax and \$0xffff,%eax jz .L${mode}_aligned_loop shl \$4,%eax cmp %rax,$len cmova %rax,$chunk # don't let counter cross 2^16 jmp .L${mode}_aligned_loop .align 16 .L${mode}_aligned_loop: cmp $len,$chunk cmova $len,$chunk mov $len,%r10 # save parameters mov $chunk,$len mov $chunk,%r11 ___ $code.=<<___; lea -16($ctx),%rax # ivp lea -16($ctx),%rax # ivp lea 16($ctx),%rbx # key lea 16($ctx),%rbx # key shr \$4,$len # len/=AES_BLOCK_SIZE shr \$4,$len # len/=AES_BLOCK_SIZE Loading @@ -304,6 +352,19 @@ $code.=<<___ if ($mode !~ /ecb|ctr/); movdqa (%rax),%xmm0 movdqa (%rax),%xmm0 movdqa %xmm0,-16($ctx) # copy [or refresh] iv movdqa %xmm0,-16($ctx) # copy [or refresh] iv ___ ___ $code.=<<___ if ($mode eq "ctr32"); mov -4($ctx),%eax # pull 32-bit counter bswap %eax add \$0x10000,%eax bswap %eax mov %eax,-4($ctx) mov %r11,$chunk # restore paramters mov %r10,$len sub $chunk,$len mov \$`16*0x10000`,$chunk jnz .L${mode}_aligned_loop ___ $code.=<<___; $code.=<<___; .L${mode}_exit: .L${mode}_exit: mov \$1,%eax mov \$1,%eax Loading @@ -320,7 +381,7 @@ ___ &generate_mode("cbc",0xd0); &generate_mode("cbc",0xd0); &generate_mode("cfb",0xe0); &generate_mode("cfb",0xe0); &generate_mode("ofb",0xe8); &generate_mode("ofb",0xe8); &generate_mode("ctr16",0xd8); &generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR... $code.=<<___; $code.=<<___; .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>" .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>" Loading
engines/e_padlock.c +45 −3 Original line number Original line Diff line number Diff line Loading @@ -76,6 +76,7 @@ #endif #endif #include <openssl/rand.h> #include <openssl/rand.h> #include <openssl/err.h> #include <openssl/err.h> #include <openssl/modes.h> #ifndef OPENSSL_NO_HW #ifndef OPENSSL_NO_HW #ifndef OPENSSL_NO_HW_PADLOCK #ifndef OPENSSL_NO_HW_PADLOCK Loading Loading @@ -337,16 +338,19 @@ static int padlock_cipher_nids[] = { NID_aes_128_cbc, NID_aes_128_cbc, NID_aes_128_cfb, NID_aes_128_cfb, NID_aes_128_ofb, NID_aes_128_ofb, NID_aes_128_ctr, NID_aes_192_ecb, NID_aes_192_ecb, NID_aes_192_cbc, NID_aes_192_cbc, NID_aes_192_cfb, NID_aes_192_cfb, NID_aes_192_ofb, NID_aes_192_ofb, NID_aes_192_ctr, NID_aes_256_ecb, NID_aes_256_ecb, NID_aes_256_cbc, NID_aes_256_cbc, NID_aes_256_cfb, NID_aes_256_cfb, NID_aes_256_ofb, NID_aes_256_ofb, NID_aes_256_ctr }; }; static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ sizeof(padlock_cipher_nids[0])); sizeof(padlock_cipher_nids[0])); Loading Loading @@ -505,10 +509,35 @@ padlock_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, return 1; return 1; } } static void padlock_ctr32_encrypt_glue(const unsigned char *in, unsigned char *out, size_t blocks, struct padlock_cipher_data *ctx, const unsigned char *ivec) { memcpy(ctx->iv,ivec,AES_BLOCK_SIZE); padlock_ctr32_encrypt(out,in,ctx,AES_BLOCK_SIZE*blocks); } static int padlock_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, const unsigned char *in_arg, size_t nbytes) { struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx); unsigned int num = ctx->num; CRYPTO_ctr128_encrypt_ctr32(in_arg,out_arg,nbytes, cdata,ctx->iv,ctx->buf,&num, (ctr128_f)padlock_ctr32_encrypt_glue); ctx->num = (size_t)num; return 1; } #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE #define EVP_CIPHER_block_size_OFB 1 #define EVP_CIPHER_block_size_OFB 1 #define EVP_CIPHER_block_size_CFB 1 #define EVP_CIPHER_block_size_CFB 1 #define EVP_CIPHER_block_size_CTR 1 /* Declaring so many ciphers by hand would be a pain. /* Declaring so many ciphers by hand would be a pain. Instead introduce a bit of preprocessor magic :-) */ Instead introduce a bit of preprocessor magic :-) */ Loading @@ -533,16 +562,19 @@ DECLARE_AES_EVP(128,ecb,ECB); DECLARE_AES_EVP(128,cbc,CBC); DECLARE_AES_EVP(128,cbc,CBC); DECLARE_AES_EVP(128,cfb,CFB); DECLARE_AES_EVP(128,cfb,CFB); DECLARE_AES_EVP(128,ofb,OFB); DECLARE_AES_EVP(128,ofb,OFB); DECLARE_AES_EVP(128,ctr,CTR); DECLARE_AES_EVP(192,ecb,ECB); DECLARE_AES_EVP(192,ecb,ECB); DECLARE_AES_EVP(192,cbc,CBC); DECLARE_AES_EVP(192,cbc,CBC); DECLARE_AES_EVP(192,cfb,CFB); DECLARE_AES_EVP(192,cfb,CFB); DECLARE_AES_EVP(192,ofb,OFB); DECLARE_AES_EVP(192,ofb,OFB); DECLARE_AES_EVP(192,ctr,CTR); DECLARE_AES_EVP(256,ecb,ECB); DECLARE_AES_EVP(256,ecb,ECB); DECLARE_AES_EVP(256,cbc,CBC); DECLARE_AES_EVP(256,cbc,CBC); DECLARE_AES_EVP(256,cfb,CFB); DECLARE_AES_EVP(256,cfb,CFB); DECLARE_AES_EVP(256,ofb,OFB); DECLARE_AES_EVP(256,ofb,OFB); DECLARE_AES_EVP(256,ctr,CTR); static int static int padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) Loading @@ -567,6 +599,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid case NID_aes_128_ofb: case NID_aes_128_ofb: *cipher = &padlock_aes_128_ofb; *cipher = &padlock_aes_128_ofb; break; break; case NID_aes_128_ctr: *cipher = &padlock_aes_128_ctr; break; case NID_aes_192_ecb: case NID_aes_192_ecb: *cipher = &padlock_aes_192_ecb; *cipher = &padlock_aes_192_ecb; Loading @@ -580,6 +615,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid case NID_aes_192_ofb: case NID_aes_192_ofb: *cipher = &padlock_aes_192_ofb; *cipher = &padlock_aes_192_ofb; break; break; case NID_aes_192_ctr: *cipher = &padlock_aes_192_ctr; break; case NID_aes_256_ecb: case NID_aes_256_ecb: *cipher = &padlock_aes_256_ecb; *cipher = &padlock_aes_256_ecb; Loading @@ -593,6 +631,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid case NID_aes_256_ofb: case NID_aes_256_ofb: *cipher = &padlock_aes_256_ofb; *cipher = &padlock_aes_256_ofb; break; break; case NID_aes_256_ctr: *cipher = &padlock_aes_256_ctr; break; default: default: /* Sorry, we don't support this NID */ /* Sorry, we don't support this NID */ Loading @@ -610,6 +651,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, { { struct padlock_cipher_data *cdata; struct padlock_cipher_data *cdata; int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; unsigned long mode = EVP_CIPHER_CTX_mode(ctx); if (key==NULL) return 0; /* ERROR */ if (key==NULL) return 0; /* ERROR */ Loading @@ -617,7 +659,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, memset(cdata, 0, sizeof(struct padlock_cipher_data)); memset(cdata, 0, sizeof(struct padlock_cipher_data)); /* Prepare Control word. */ /* Prepare Control word. */ if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) if (mode == EVP_CIPH_OFB_MODE || mode == EVP_CIPH_CTR_MODE) cdata->cword.b.encdec = 0; cdata->cword.b.encdec = 0; else else cdata->cword.b.encdec = (ctx->encrypt == 0); cdata->cword.b.encdec = (ctx->encrypt == 0); Loading @@ -640,8 +682,8 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, and is listed as hardware errata. They most and is listed as hardware errata. They most likely will fix it at some point and then likely will fix it at some point and then a check for stepping would be due here. */ a check for stepping would be due here. */ if ((EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_ECB_MODE || if ((mode == EVP_CIPH_ECB_MODE || EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CBC_MODE) mode == EVP_CIPH_CBC_MODE) && !enc) && !enc) AES_set_decrypt_key(key, key_len, &cdata->ks); AES_set_decrypt_key(key, key_len, &cdata->ks); else else Loading