Loading crypto/aes/asm/aes-ia64.S +41 −31 Original line number Diff line number Diff line Loading @@ -24,7 +24,9 @@ rk0=r8; rk1=r9; prsave=r10; pfssave=r2; lcsave=r10; prsave=r3; maskff=r11; twenty4=r14; sixteen=r15; Loading Loading @@ -67,6 +69,9 @@ te0=r40; te1=r41; te2=r42; te3=r43; // Clobber: r16-r31,rk0-rk1,r32-r43 .align 32 _ia64_AES_encrypt: .prologue .altrp b6 .body { .mmi; alloc r16=ar.pfs,12,0,0,8 LDKEY t0=[rk0],2*KSZ mov pr.rot=1<<16 } Loading Loading @@ -179,20 +184,21 @@ _ia64_AES_encrypt: .skip 16 AES_encrypt: .prologue .save ar.pfs,r2 { .mmi; alloc r2=ar.pfs,3,0,12,0 addl out8=@ltoff(AES_Te#),gp .save ar.lc,r3 mov r3=ar.lc } { .mmi; and out0=3,in0 ADDP in0=0,in0 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds .save ar.pfs,pfssave { .mmi; alloc pfssave=ar.pfs,3,0,12,0 and out0=3,in0 mov r3=ip } { .mmi; ADDP in0=0,in0 ADDP out11=KSZ*60,in2 // &AES_KEY->rounds .save ar.lc,lcsave mov lcsave=ar.lc };; .body { .mmi; ld8 out8=[out8] // Te0 ld4 out11=[out11] // AES_KEY->rounds { .mmi; ld4 out11=[out11] // AES_KEY->rounds add out8=(AES_Te#-AES_encrypt#),r3 // Te0 .save pr,prsave mov prsave=pr } .body #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... { .mib; cmp.ne p6,p0=out0,r0 add out0=4,in0 Loading @@ -215,8 +221,8 @@ AES_encrypt: ADDP in1=0,in1 (p6) br.spnt .Le_o_unaligned };; { .mii; mov ar.pfs=r2 mov ar.lc=r3 } { .mii; mov ar.pfs=psfsave mov ar.lc=lcsave } { .mmi; st4 [in1]=r16,8 // s0 st4 [in0]=r20,8 // s1 mov pr=prsave,0x1ffff };; Loading Loading @@ -299,10 +305,10 @@ AES_encrypt: mov pr=prsave,0x1ffff }//;; { .mmi; st1 [out1]=r26,4 st1 [out0]=r27,4 mov ar.pfs=r2 };; mov ar.pfs=pfssave };; { .mmi; st1 [out3]=r28 st1 [out2]=r29 mov ar.lc=r3 }//;; mov ar.lc=lcsave }//;; { .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };; Loading Loading @@ -359,6 +365,9 @@ while(<>) { // Clobber: r16-r31,rk0-rk1,r32-r43 .align 32 _ia64_AES_decrypt: .prologue .altrp b6 .body { .mmi; alloc r16=ar.pfs,12,0,0,8 LDKEY t0=[rk0],2*KSZ mov pr.rot=1<<16 } Loading Loading @@ -471,20 +480,21 @@ _ia64_AES_decrypt: .skip 16 AES_decrypt: .prologue .save ar.pfs,r2 { .mmi; alloc r2=ar.pfs,3,0,12,0 addl out8=@ltoff(AES_Td#),gp .save ar.lc,r3 mov r3=ar.lc } { .mmi; and out0=3,in0 ADDP in0=0,in0 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds .save ar.pfs,pfssave { .mmi; alloc pfssave=ar.pfs,3,0,12,0 and out0=3,in0 mov r3=ip } { .mmi; ADDP in0=0,in0 ADDP out11=KSZ*60,in2 // &AES_KEY->rounds .save ar.lc,lcsave mov lcsave=ar.lc };; .body { .mmi; ld8 out8=[out8] // Te0 ld4 out11=[out11] // AES_KEY->rounds { .mmi; ld4 out11=[out11] // AES_KEY->rounds add out8=(AES_Td#-AES_decrypt#),r3 // Td0 .save pr,prsave mov prsave=pr } .body #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... { .mib; cmp.ne p6,p0=out0,r0 add out0=4,in0 Loading @@ -507,8 +517,8 @@ AES_decrypt: ADDP in1=0,in1 (p6) br.spnt .Ld_o_unaligned };; { .mii; mov ar.pfs=r2 mov ar.lc=r3 } { .mii; mov ar.pfs=pfssave mov ar.lc=lcsave } { .mmi; st4 [in1]=r16,8 // s0 st4 [in0]=r20,8 // s1 mov pr=prsave,0x1ffff };; Loading Loading @@ -591,10 +601,10 @@ AES_decrypt: mov pr=prsave,0x1ffff }//;; { .mmi; st1 [out1]=r26,4 st1 [out0]=r27,4 mov ar.pfs=r2 };; mov ar.pfs=pfssave };; { .mmi; st1 [out3]=r28 st1 [out2]=r29 mov ar.lc=r3 }//;; mov ar.lc=lcsave }//;; { .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };; Loading crypto/sha/asm/sha512-ia64.pl +15 −13 Original line number Diff line number Diff line Loading @@ -110,6 +110,8 @@ $code=<<___; .explicit .text pfssave=r2; lcsave=r3; prsave=r14; K=r15; A=r16; B=r17; C=r18; D=r19; Loading @@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51; // small constants .align 32 $func: .prologue .save ar.pfs,r2 { .mmi; alloc r2=ar.pfs,3,17,0,16 .save ar.pfs,pfssave { .mmi; alloc pfssave=ar.pfs,3,17,0,16 $ADDP ctx=0,r32 // 1st arg .save ar.lc,r3 mov r3=ar.lc } .save ar.lc,lcsave mov lcsave=ar.lc } { .mmi; $ADDP input=0,r33 // 2nd arg addl Ktbl=\@ltoff($TABLE#),gp mov num=r34 // 3rd arg .save pr,prsave mov prsave=pr };; .body { .mii; ld8 Ktbl=[Ktbl] mov num=r34 };; // 3rd arg { .mib; add r8=0*$SZ,ctx add r9=1*$SZ,ctx brp.loop.imp .L_first16,.L_first16_ctop Loading @@ -151,20 +150,23 @@ $func: brp.loop.imp .L_rest,.L_rest_ctop };; // load A-H .Lpic_point: { .mmi; $LDW A=[r8],4*$SZ $LDW B=[r9],4*$SZ mov sgm0=$sigma0[2] } mov Ktbl=ip } { .mmi; $LDW C=[r10],4*$SZ $LDW D=[r11],4*$SZ mov sgm1=$sigma1[2] };; mov sgm0=$sigma0[2] };; { .mmi; $LDW E=[r8] $LDW F=[r9] } $LDW F=[r9] add Ktbl=($TABLE#-.Lpic_point),Ktbl } { .mmi; $LDW G=[r10] $LDW H=[r11] cmp.ne p15,p14=0,r35 };; // used in sha256_block .L_outer: { .mii; mov ar.lc=15 { .mii; mov sgm1=$sigma1[2] mov ar.lc=15 mov ar.ec=1 };; .align 32 .L_first16: Loading Loading @@ -329,7 +331,7 @@ $code.=<<___; (p6) add Ktbl=-$SZ*$rounds,Ktbl } { .mmi; $LDW r38=[r10],-4*$SZ $LDW r39=[r11],-4*$SZ (p7) mov ar.lc=r3 };; (p7) mov ar.lc=lcsave };; { .mmi; add A=A,r32 add B=B,r33 add C=C,r34 } Loading Loading
crypto/aes/asm/aes-ia64.S +41 −31 Original line number Diff line number Diff line Loading @@ -24,7 +24,9 @@ rk0=r8; rk1=r9; prsave=r10; pfssave=r2; lcsave=r10; prsave=r3; maskff=r11; twenty4=r14; sixteen=r15; Loading Loading @@ -67,6 +69,9 @@ te0=r40; te1=r41; te2=r42; te3=r43; // Clobber: r16-r31,rk0-rk1,r32-r43 .align 32 _ia64_AES_encrypt: .prologue .altrp b6 .body { .mmi; alloc r16=ar.pfs,12,0,0,8 LDKEY t0=[rk0],2*KSZ mov pr.rot=1<<16 } Loading Loading @@ -179,20 +184,21 @@ _ia64_AES_encrypt: .skip 16 AES_encrypt: .prologue .save ar.pfs,r2 { .mmi; alloc r2=ar.pfs,3,0,12,0 addl out8=@ltoff(AES_Te#),gp .save ar.lc,r3 mov r3=ar.lc } { .mmi; and out0=3,in0 ADDP in0=0,in0 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds .save ar.pfs,pfssave { .mmi; alloc pfssave=ar.pfs,3,0,12,0 and out0=3,in0 mov r3=ip } { .mmi; ADDP in0=0,in0 ADDP out11=KSZ*60,in2 // &AES_KEY->rounds .save ar.lc,lcsave mov lcsave=ar.lc };; .body { .mmi; ld8 out8=[out8] // Te0 ld4 out11=[out11] // AES_KEY->rounds { .mmi; ld4 out11=[out11] // AES_KEY->rounds add out8=(AES_Te#-AES_encrypt#),r3 // Te0 .save pr,prsave mov prsave=pr } .body #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... { .mib; cmp.ne p6,p0=out0,r0 add out0=4,in0 Loading @@ -215,8 +221,8 @@ AES_encrypt: ADDP in1=0,in1 (p6) br.spnt .Le_o_unaligned };; { .mii; mov ar.pfs=r2 mov ar.lc=r3 } { .mii; mov ar.pfs=psfsave mov ar.lc=lcsave } { .mmi; st4 [in1]=r16,8 // s0 st4 [in0]=r20,8 // s1 mov pr=prsave,0x1ffff };; Loading Loading @@ -299,10 +305,10 @@ AES_encrypt: mov pr=prsave,0x1ffff }//;; { .mmi; st1 [out1]=r26,4 st1 [out0]=r27,4 mov ar.pfs=r2 };; mov ar.pfs=pfssave };; { .mmi; st1 [out3]=r28 st1 [out2]=r29 mov ar.lc=r3 }//;; mov ar.lc=lcsave }//;; { .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };; Loading Loading @@ -359,6 +365,9 @@ while(<>) { // Clobber: r16-r31,rk0-rk1,r32-r43 .align 32 _ia64_AES_decrypt: .prologue .altrp b6 .body { .mmi; alloc r16=ar.pfs,12,0,0,8 LDKEY t0=[rk0],2*KSZ mov pr.rot=1<<16 } Loading Loading @@ -471,20 +480,21 @@ _ia64_AES_decrypt: .skip 16 AES_decrypt: .prologue .save ar.pfs,r2 { .mmi; alloc r2=ar.pfs,3,0,12,0 addl out8=@ltoff(AES_Td#),gp .save ar.lc,r3 mov r3=ar.lc } { .mmi; and out0=3,in0 ADDP in0=0,in0 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds .save ar.pfs,pfssave { .mmi; alloc pfssave=ar.pfs,3,0,12,0 and out0=3,in0 mov r3=ip } { .mmi; ADDP in0=0,in0 ADDP out11=KSZ*60,in2 // &AES_KEY->rounds .save ar.lc,lcsave mov lcsave=ar.lc };; .body { .mmi; ld8 out8=[out8] // Te0 ld4 out11=[out11] // AES_KEY->rounds { .mmi; ld4 out11=[out11] // AES_KEY->rounds add out8=(AES_Td#-AES_decrypt#),r3 // Td0 .save pr,prsave mov prsave=pr } .body #if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... { .mib; cmp.ne p6,p0=out0,r0 add out0=4,in0 Loading @@ -507,8 +517,8 @@ AES_decrypt: ADDP in1=0,in1 (p6) br.spnt .Ld_o_unaligned };; { .mii; mov ar.pfs=r2 mov ar.lc=r3 } { .mii; mov ar.pfs=pfssave mov ar.lc=lcsave } { .mmi; st4 [in1]=r16,8 // s0 st4 [in0]=r20,8 // s1 mov pr=prsave,0x1ffff };; Loading Loading @@ -591,10 +601,10 @@ AES_decrypt: mov pr=prsave,0x1ffff }//;; { .mmi; st1 [out1]=r26,4 st1 [out0]=r27,4 mov ar.pfs=r2 };; mov ar.pfs=pfssave };; { .mmi; st1 [out3]=r28 st1 [out2]=r29 mov ar.lc=r3 }//;; mov ar.lc=lcsave }//;; { .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };; Loading
crypto/sha/asm/sha512-ia64.pl +15 −13 Original line number Diff line number Diff line Loading @@ -110,6 +110,8 @@ $code=<<___; .explicit .text pfssave=r2; lcsave=r3; prsave=r14; K=r15; A=r16; B=r17; C=r18; D=r19; Loading @@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51; // small constants .align 32 $func: .prologue .save ar.pfs,r2 { .mmi; alloc r2=ar.pfs,3,17,0,16 .save ar.pfs,pfssave { .mmi; alloc pfssave=ar.pfs,3,17,0,16 $ADDP ctx=0,r32 // 1st arg .save ar.lc,r3 mov r3=ar.lc } .save ar.lc,lcsave mov lcsave=ar.lc } { .mmi; $ADDP input=0,r33 // 2nd arg addl Ktbl=\@ltoff($TABLE#),gp mov num=r34 // 3rd arg .save pr,prsave mov prsave=pr };; .body { .mii; ld8 Ktbl=[Ktbl] mov num=r34 };; // 3rd arg { .mib; add r8=0*$SZ,ctx add r9=1*$SZ,ctx brp.loop.imp .L_first16,.L_first16_ctop Loading @@ -151,20 +150,23 @@ $func: brp.loop.imp .L_rest,.L_rest_ctop };; // load A-H .Lpic_point: { .mmi; $LDW A=[r8],4*$SZ $LDW B=[r9],4*$SZ mov sgm0=$sigma0[2] } mov Ktbl=ip } { .mmi; $LDW C=[r10],4*$SZ $LDW D=[r11],4*$SZ mov sgm1=$sigma1[2] };; mov sgm0=$sigma0[2] };; { .mmi; $LDW E=[r8] $LDW F=[r9] } $LDW F=[r9] add Ktbl=($TABLE#-.Lpic_point),Ktbl } { .mmi; $LDW G=[r10] $LDW H=[r11] cmp.ne p15,p14=0,r35 };; // used in sha256_block .L_outer: { .mii; mov ar.lc=15 { .mii; mov sgm1=$sigma1[2] mov ar.lc=15 mov ar.ec=1 };; .align 32 .L_first16: Loading Loading @@ -329,7 +331,7 @@ $code.=<<___; (p6) add Ktbl=-$SZ*$rounds,Ktbl } { .mmi; $LDW r38=[r10],-4*$SZ $LDW r39=[r11],-4*$SZ (p7) mov ar.lc=r3 };; (p7) mov ar.lc=lcsave };; { .mmi; add A=A,r32 add B=B,r33 add C=C,r34 } Loading