Loading crypto/md5/asm/md5-sparcv9.pl +14 −13 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ # MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than # code generated by Sun C 5.2. # SPARC T4 MD5 hardware achieves 3.24 cycles per byte, which is 2.1x # SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x # faster than software. Multi-process benchmark saturates at 12x # single-process result on 8-core processor, or ~11GBps per 2.85GHz # socket. Loading Loading @@ -221,15 +221,15 @@ md5_block_asm_data_order: be .Lsoftware nop rd %asi, %g5 wr %g0, 0x88, %asi ! ASI_PRIMARY_LITTLE lda [%o0 + 0x00] %asi, %f0 ! load context lda [%o0 + 0x04] %asi, %f1 mov 4, %g1 andcc %o1, 0x7, %g0 lda [%o0 + 0x08] %asi, %f2 lda [%o0 + %g0]0x88, %f0 ! load context lda [%o0 + %g1]0x88, %f1 add %o0, 8, %o0 lda [%o0 + %g0]0x88, %f2 lda [%o0 + %g1]0x88, %f3 bne,pn %icc, .Lhwunaligned lda [%o0 + 0x0c] %asi, %f3 sub %o0, 8, %o0 .Lhw_loop: ldd [%o1 + 0x00], %f8 Loading @@ -250,12 +250,13 @@ md5_block_asm_data_order: nop .Lhwfinish: sta %f0, [%o0 + 0x00] %asi ! store context sta %f1, [%o0 + 0x04] %asi sta %f2, [%o0 + 0x08] %asi sta %f3, [%o0 + 0x0c] %asi sta %f0, [%o0 + %g0]0x88 ! store context sta %f1, [%o0 + %g1]0x88 add %o0, 8, %o0 sta %f2, [%o0 + %g0]0x88 sta %f3, [%o0 + %g1]0x88 retl wr %g5, 0x0, %asi ! restore %asi nop .align 8 .Lhwunaligned: Loading Loading
crypto/md5/asm/md5-sparcv9.pl +14 −13 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ # MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than # code generated by Sun C 5.2. # SPARC T4 MD5 hardware achieves 3.24 cycles per byte, which is 2.1x # SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x # faster than software. Multi-process benchmark saturates at 12x # single-process result on 8-core processor, or ~11GBps per 2.85GHz # socket. Loading Loading @@ -221,15 +221,15 @@ md5_block_asm_data_order: be .Lsoftware nop rd %asi, %g5 wr %g0, 0x88, %asi ! ASI_PRIMARY_LITTLE lda [%o0 + 0x00] %asi, %f0 ! load context lda [%o0 + 0x04] %asi, %f1 mov 4, %g1 andcc %o1, 0x7, %g0 lda [%o0 + 0x08] %asi, %f2 lda [%o0 + %g0]0x88, %f0 ! load context lda [%o0 + %g1]0x88, %f1 add %o0, 8, %o0 lda [%o0 + %g0]0x88, %f2 lda [%o0 + %g1]0x88, %f3 bne,pn %icc, .Lhwunaligned lda [%o0 + 0x0c] %asi, %f3 sub %o0, 8, %o0 .Lhw_loop: ldd [%o1 + 0x00], %f8 Loading @@ -250,12 +250,13 @@ md5_block_asm_data_order: nop .Lhwfinish: sta %f0, [%o0 + 0x00] %asi ! store context sta %f1, [%o0 + 0x04] %asi sta %f2, [%o0 + 0x08] %asi sta %f3, [%o0 + 0x0c] %asi sta %f0, [%o0 + %g0]0x88 ! store context sta %f1, [%o0 + %g1]0x88 add %o0, 8, %o0 sta %f2, [%o0 + %g0]0x88 sta %f3, [%o0 + %g1]0x88 retl wr %g5, 0x0, %asi ! restore %asi nop .align 8 .Lhwunaligned: Loading