Loading crypto/aes/asm/aes-ppc.pl +157 −1 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # Needs more work: key setup, page boundaries, CBC routine... # Needs more work: key setup, CBC routine... # # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with # 128-bit key, which is ~40% better than 64-bit code generated by gcc Loading Loading @@ -359,6 +359,12 @@ $code.=<<___; $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) andi. $t0,$inp,3 andi. $t1,$out,3 or. $t0,$t0,$t1 bne Lenc_unaligned Lenc_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) Loading @@ -369,7 +375,79 @@ $code.=<<___; stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) b Lenc_done Lenc_unaligned: subfic $t0,$inp,4096 subfic $t1,$out,4096 andi. $t0,$t0,4096-16 beq Lenc_xpage andi. $t1,$t1,4096-16 bne Lenc_unaligned_ok Lenc_xpage: lbz $acc00,0($inp) lbz $acc01,1($inp) lbz $acc02,2($inp) lbz $s0,3($inp) lbz $acc04,4($inp) lbz $acc05,5($inp) lbz $acc06,6($inp) lbz $s1,7($inp) lbz $acc08,8($inp) lbz $acc09,9($inp) lbz $acc10,10($inp) lbz $s2,11($inp) lbz $acc12,12($inp) lbz $acc13,13($inp) lbz $acc14,14($inp) lbz $s3,15($inp) insrwi $s0,$acc00,8,0 insrwi $s1,$acc04,8,0 insrwi $s0,$acc01,8,8 insrwi $s1,$acc05,8,8 insrwi $s0,$acc02,8,16 insrwi $s1,$acc06,8,16 insrwi $s2,$acc08,8,0 insrwi $s3,$acc12,8,0 insrwi $s2,$acc09,8,8 insrwi $s3,$acc13,8,8 insrwi $s2,$acc10,8,16 insrwi $s3,$acc14,8,16 bl LAES_Te bl Lppc_AES_encrypt_compact extrwi $acc00,$s0,8,0 extrwi $acc01,$s0,8,8 stb $acc00,0($out) extrwi $acc02,$s0,8,16 stb $acc01,1($out) stb $acc02,2($out) extrwi $acc04,$s1,8,0 stb $s0,3($out) extrwi $acc05,$s1,8,8 stb $acc04,4($out) extrwi $acc06,$s1,8,16 stb $acc05,5($out) stb $acc06,6($out) extrwi $acc08,$s2,8,0 stb $s1,7($out) extrwi $acc09,$s2,8,8 stb $acc08,8($out) extrwi $acc10,$s2,8,16 stb $acc09,9($out) stb $acc10,10($out) extrwi $acc12,$s3,8,0 stb $s2,11($out) extrwi $acc13,$s3,8,8 stb $acc12,12($out) extrwi $acc14,$s3,8,16 stb $acc13,13($out) stb $acc14,14($out) stb $s3,15($out) Lenc_done: $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) Loading Loading @@ -715,6 +793,12 @@ Lenc_compact_done: $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) andi. $t0,$inp,3 andi. $t1,$out,3 or. $t0,$t0,$t1 bne Ldec_unaligned Ldec_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) Loading @@ -725,7 +809,79 @@ Lenc_compact_done: stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) b Ldec_done Ldec_unaligned: subfic $t0,$inp,4096 subfic $t1,$out,4096 andi. $t0,$t0,4096-16 beq Ldec_xpage andi. $t1,$t1,4096-16 bne Ldec_unaligned_ok Ldec_xpage: lbz $acc00,0($inp) lbz $acc01,1($inp) lbz $acc02,2($inp) lbz $s0,3($inp) lbz $acc04,4($inp) lbz $acc05,5($inp) lbz $acc06,6($inp) lbz $s1,7($inp) lbz $acc08,8($inp) lbz $acc09,9($inp) lbz $acc10,10($inp) lbz $s2,11($inp) lbz $acc12,12($inp) lbz $acc13,13($inp) lbz $acc14,14($inp) lbz $s3,15($inp) insrwi $s0,$acc00,8,0 insrwi $s1,$acc04,8,0 insrwi $s0,$acc01,8,8 insrwi $s1,$acc05,8,8 insrwi $s0,$acc02,8,16 insrwi $s1,$acc06,8,16 insrwi $s2,$acc08,8,0 insrwi $s3,$acc12,8,0 insrwi $s2,$acc09,8,8 insrwi $s3,$acc13,8,8 insrwi $s2,$acc10,8,16 insrwi $s3,$acc14,8,16 bl LAES_Td bl Lppc_AES_decrypt_compact extrwi $acc00,$s0,8,0 extrwi $acc01,$s0,8,8 stb $acc00,0($out) extrwi $acc02,$s0,8,16 stb $acc01,1($out) stb $acc02,2($out) extrwi $acc04,$s1,8,0 stb $s0,3($out) extrwi $acc05,$s1,8,8 stb $acc04,4($out) extrwi $acc06,$s1,8,16 stb $acc05,5($out) stb $acc06,6($out) extrwi $acc08,$s2,8,0 stb $s1,7($out) extrwi $acc09,$s2,8,8 stb $acc08,8($out) extrwi $acc10,$s2,8,16 stb $acc09,9($out) stb $acc10,10($out) extrwi $acc12,$s3,8,0 stb $s2,11($out) extrwi $acc13,$s3,8,8 stb $acc12,12($out) extrwi $acc14,$s3,8,16 stb $acc13,13($out) stb $acc14,14($out) stb $s3,15($out) Ldec_done: $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) Loading Loading
crypto/aes/asm/aes-ppc.pl +157 −1 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # Needs more work: key setup, page boundaries, CBC routine... # Needs more work: key setup, CBC routine... # # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with # 128-bit key, which is ~40% better than 64-bit code generated by gcc Loading Loading @@ -359,6 +359,12 @@ $code.=<<___; $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) andi. $t0,$inp,3 andi. $t1,$out,3 or. $t0,$t0,$t1 bne Lenc_unaligned Lenc_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) Loading @@ -369,7 +375,79 @@ $code.=<<___; stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) b Lenc_done Lenc_unaligned: subfic $t0,$inp,4096 subfic $t1,$out,4096 andi. $t0,$t0,4096-16 beq Lenc_xpage andi. $t1,$t1,4096-16 bne Lenc_unaligned_ok Lenc_xpage: lbz $acc00,0($inp) lbz $acc01,1($inp) lbz $acc02,2($inp) lbz $s0,3($inp) lbz $acc04,4($inp) lbz $acc05,5($inp) lbz $acc06,6($inp) lbz $s1,7($inp) lbz $acc08,8($inp) lbz $acc09,9($inp) lbz $acc10,10($inp) lbz $s2,11($inp) lbz $acc12,12($inp) lbz $acc13,13($inp) lbz $acc14,14($inp) lbz $s3,15($inp) insrwi $s0,$acc00,8,0 insrwi $s1,$acc04,8,0 insrwi $s0,$acc01,8,8 insrwi $s1,$acc05,8,8 insrwi $s0,$acc02,8,16 insrwi $s1,$acc06,8,16 insrwi $s2,$acc08,8,0 insrwi $s3,$acc12,8,0 insrwi $s2,$acc09,8,8 insrwi $s3,$acc13,8,8 insrwi $s2,$acc10,8,16 insrwi $s3,$acc14,8,16 bl LAES_Te bl Lppc_AES_encrypt_compact extrwi $acc00,$s0,8,0 extrwi $acc01,$s0,8,8 stb $acc00,0($out) extrwi $acc02,$s0,8,16 stb $acc01,1($out) stb $acc02,2($out) extrwi $acc04,$s1,8,0 stb $s0,3($out) extrwi $acc05,$s1,8,8 stb $acc04,4($out) extrwi $acc06,$s1,8,16 stb $acc05,5($out) stb $acc06,6($out) extrwi $acc08,$s2,8,0 stb $s1,7($out) extrwi $acc09,$s2,8,8 stb $acc08,8($out) extrwi $acc10,$s2,8,16 stb $acc09,9($out) stb $acc10,10($out) extrwi $acc12,$s3,8,0 stb $s2,11($out) extrwi $acc13,$s3,8,8 stb $acc12,12($out) extrwi $acc14,$s3,8,16 stb $acc13,13($out) stb $acc14,14($out) stb $s3,15($out) Lenc_done: $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) Loading Loading @@ -715,6 +793,12 @@ Lenc_compact_done: $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) andi. $t0,$inp,3 andi. $t1,$out,3 or. $t0,$t0,$t1 bne Ldec_unaligned Ldec_unaligned_ok: lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) Loading @@ -725,7 +809,79 @@ Lenc_compact_done: stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) b Ldec_done Ldec_unaligned: subfic $t0,$inp,4096 subfic $t1,$out,4096 andi. $t0,$t0,4096-16 beq Ldec_xpage andi. $t1,$t1,4096-16 bne Ldec_unaligned_ok Ldec_xpage: lbz $acc00,0($inp) lbz $acc01,1($inp) lbz $acc02,2($inp) lbz $s0,3($inp) lbz $acc04,4($inp) lbz $acc05,5($inp) lbz $acc06,6($inp) lbz $s1,7($inp) lbz $acc08,8($inp) lbz $acc09,9($inp) lbz $acc10,10($inp) lbz $s2,11($inp) lbz $acc12,12($inp) lbz $acc13,13($inp) lbz $acc14,14($inp) lbz $s3,15($inp) insrwi $s0,$acc00,8,0 insrwi $s1,$acc04,8,0 insrwi $s0,$acc01,8,8 insrwi $s1,$acc05,8,8 insrwi $s0,$acc02,8,16 insrwi $s1,$acc06,8,16 insrwi $s2,$acc08,8,0 insrwi $s3,$acc12,8,0 insrwi $s2,$acc09,8,8 insrwi $s3,$acc13,8,8 insrwi $s2,$acc10,8,16 insrwi $s3,$acc14,8,16 bl LAES_Td bl Lppc_AES_decrypt_compact extrwi $acc00,$s0,8,0 extrwi $acc01,$s0,8,8 stb $acc00,0($out) extrwi $acc02,$s0,8,16 stb $acc01,1($out) stb $acc02,2($out) extrwi $acc04,$s1,8,0 stb $s0,3($out) extrwi $acc05,$s1,8,8 stb $acc04,4($out) extrwi $acc06,$s1,8,16 stb $acc05,5($out) stb $acc06,6($out) extrwi $acc08,$s2,8,0 stb $s1,7($out) extrwi $acc09,$s2,8,8 stb $acc08,8($out) extrwi $acc10,$s2,8,16 stb $acc09,9($out) stb $acc10,10($out) extrwi $acc12,$s3,8,0 stb $s2,11($out) extrwi $acc13,$s3,8,8 stb $acc12,12($out) extrwi $acc14,$s3,8,16 stb $acc13,13($out) stb $acc14,14($out) stb $s3,15($out) Ldec_done: $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) Loading