Loading crypto/bn/asm/alpha-mont.pl +23 −31 Original line number Diff line number Diff line Loading @@ -258,56 +258,48 @@ bn_mul_mont: stq $hi1,16($tp) bne $tj,.Louter s8addq $num,sp,$ap mov $rp,$bp s8addq $num,sp,$tj # &tp[num] mov $rp,$bp # put rp aside mov sp,$tp mov 0,$hi0 bne $hi1,.Lsub cmpult $nj,$lo1,AT bne AT,.Lsub .align 4 .Lcopy: ldq AT,($tp) lda $tp,8($tp) stq AT,($rp) cmpult $tp,$ap,AT stq zero,-8($tp) nop lda $rp,8($rp) bne AT,.Lcopy mov 1,v0 br .Lexit mov sp,$ap srl $nj,62,AT # boundary condition... beq AT,.Lcopy # ... is met mov 0,$hi0 # clear borrow bit .align 4 .Lsub: ldq $lo0,($tp) ldq $lo1,($np) subq $lo0,$lo1,$lo1 lda $tp,8($tp) lda $np,8($np) subq $lo0,$lo1,$lo1 # tp[i]-np[i] cmpult $lo0,$lo1,AT subq $lo1,$hi0,$lo0 cmpult $lo1,$lo0,$hi0 lda $tp,8($tp) or $hi0,AT,$hi0 lda $np,8($np) stq $lo0,($rp) cmpult $tp,$ap,v0 cmpult $tp,$tj,v0 lda $rp,8($rp) bne v0,.Lsub subq $hi1,$hi0,$hi0 subq $hi1,$hi0,$hi0 # handle upmost overflow bit mov sp,$tp cmpule $hi1,$hi0,AT mov $bp,$rp bne AT,.Lcopy mov $bp,$rp # restore rp and sp,$hi0,$ap bic $bp,$hi0,$bp bis $bp,$ap,$ap # ap=borrow?tp:rp .align 4 .Lzap: stq zero,($tp) cmpult $tp,$ap,AT .Lcopy: ldq $aj,($ap) # copy or in-place refresh lda $tp,8($tp) bne AT,.Lzap lda $rp,8($rp) lda $ap,8($ap) stq zero,-8($tp) # zap tp cmpult $tp,$tj,AT stq $aj,-8($rp) bne AT,.Lcopy mov 1,v0 .align 4 .Lexit: .set noreorder mov fp,sp Loading crypto/bn/asm/armv4-mont.pl +21 −23 Original line number Diff line number Diff line Loading @@ -61,7 +61,7 @@ bn_mul_mont: cmp $num,#2 movlt r0,#0 addlt sp,sp,#2*4 blt .Labort blt .Labrt stmdb sp!,{r4-r12,lr} @ save 10 registers Loading Loading @@ -160,27 +160,13 @@ bn_mul_mont: add $num,$num,#4 @ $num to point at &tp[num] sub $aj,$num,sp @ "original" num value mov $tp,sp @ "rewind" $tp mov $ap,$tp @ "borrow" $ap sub $np,$np,$aj @ "rewind" $np to &np[0] cmp $nhi,#0 @ upmost carry bne .Lsub cmp $nlo,$nj @ tp[num-1]-np[num-1] bhs .Lsub .Lcopy: ldr $tj,[$tp] str sp,[$tp],#4 @ zap tp str $tj,[$rp],#4 cmp $tp,$num bne .Lcopy .Lexit: add sp,$num,#4 @ skip over tp[num+1] ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 .Labort:tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) movs $tj,$nj,lsr#30 @ boundary condition... beq .Lcopy @ ... is met subs $tj,$tj,$tj @ "clear" carry flag .Lsub: ldr $tj,[$tp],#4 ldr $nj,[$np],#4 sbcs $tj,$tj,$nj @ tp[j]-np[j] Loading @@ -190,12 +176,24 @@ bn_mul_mont: sbcs $nhi,$nhi,#0 @ upmost carry mov $tp,sp @ "rewind" $tp sub $rp,$rp,$aj @ "rewind" $rp blo .Lcopy @ tp was less after all .Lzap: str sp,[$tp],#4 and $ap,$tp,$nhi bic $np,$rp,$nhi orr $ap,$ap,$np @ ap=borrow?tp:rp .Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh str sp,[$tp],#4 @ zap tp str $tj,[$rp],#4 cmp $tp,$num bne .Lzap bal .Lexit bne .Lcopy add sp,$num,#4 @ skip over tp[num+1] ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 .Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) .size bn_mul_mont,.-bn_mul_mont .asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" ___ Loading crypto/bn/asm/mips3-mont.pl +34 −39 Original line number Diff line number Diff line Loading @@ -265,27 +265,50 @@ bn_mul_mont: addu $i,8 sltu s7,$i,$num bnez s7,.Louter .set noreorder PTR_ADD $ap,sp,$num PTR_ADD $tj,sp,$num # &tp[num] move $tp,sp move $ap,sp bnez $hi1,.Lsub li $hi0,0 sgeu AT,$lo1,$nj beqz AT,.Lsub nop dsrl AT,$nj,62 # boundary condition... beqz AT,.Lcopy # ... is met li $hi0,0 # clear borrow bit .align 4 .Lcopy: ld AT,($tp) .Lsub: ld $lo0,($tp) ld $lo1,($np) PTR_ADD $tp,8 PTR_ADD $np,8 dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] sgtu AT,$lo1,$lo0 dsubu $lo0,$lo1,$hi0 sgtu $hi0,$lo0,$lo1 sd $lo0,($rp) or $hi0,AT sltu AT,$tp,$tj bnez AT,.Lsub PTR_ADD $rp,8 dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit move $tp,sp PTR_SUB $rp,$num # restore rp not $hi1,$hi0 and $ap,$hi0,sp and $bp,$hi1,$rp or $ap,$ap,$bp # ap=borrow?tp:rp .align 4 .Lcopy: ld $aj,($ap) PTR_ADD $ap,8 PTR_ADD $tp,8 sd AT,($rp) sltu AT,$tp,$ap sd zero,-8($tp) sltu AT,$tp,$tj sd $aj,($rp) bnez AT,.Lcopy PTR_ADD $rp,8 .Lexit: ld s0,0($fp) ld s1,8($fp) ld s2,16($fp) Loading @@ -297,34 +320,6 @@ bn_mul_mont: li v0,1 jr ra PTR_ADD sp,$fp,64 .align 4 .Lsub: ld $lo0,($tp) ld $lo1,($np) dsubu $lo1,$lo0,$lo1 sgtu AT,$lo1,$lo0 dsubu $lo0,$lo1,$hi0 sgtu $hi0,$lo0,$lo1 PTR_ADD $tp,8 or $hi0,AT PTR_ADD $np,8 sd $lo0,($rp) sltu AT,$tp,$ap bnez AT,.Lsub PTR_ADD $rp,8 dsubu $hi0,$hi1,$hi0 move $tp,sp sgtu AT,$hi0,$hi1 bnez AT,.Lcopy PTR_SUB $rp,$num .align 4 .Lzap: sd zero,($tp) sltu AT,$tp,$ap bnez AT,.Lzap PTR_ADD $tp,8 b .Lexit nop .set reorder END(bn_mul_mont) .rdata Loading crypto/bn/asm/ppc-mont.pl +26 −25 Original line number Diff line number Diff line Loading @@ -2,8 +2,9 @@ # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # project. Rights for redistribution and usage in source and binary # forms are granted according to the OpenSSL license. # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # April 2006 Loading Loading @@ -42,6 +43,7 @@ if ($output =~ /32\-mont\.s/) { $UMULL= "mullw"; # unsigned multiply low $UMULH= "mulhwu"; # unsigned multiply high $UCMP= "cmplw"; # unsigned compare $SHRI= "srwi"; # unsigned shift right by immediate $PUSH= $ST; $POP= $LD; } elsif ($output =~ /64\-mont\.s/) { Loading @@ -62,6 +64,7 @@ if ($output =~ /32\-mont\.s/) { $UMULL= "mulld"; # unsigned multiply low $UMULH= "mulhdu"; # unsigned multiply high $UCMP= "cmpld"; # unsigned compare $SHRI= "srdi"; # unsigned shift right by immediate $PUSH= $ST; $POP= $LD; } else { die "nonsense $output"; } Loading Loading @@ -264,24 +267,37 @@ Linner: addi $i,$i,$BNSZ ble- Louter $SHRI. $nj,$nj,$BITS-2 ; check boundary condition addi $num,$num,2 ; restore $num subfc $j,$j,$j ; j=0 and "clear" XER[CA] addi $tp,$sp,$FRAME addi $ap,$sp,$FRAME mtctr $num beq Lcopy ; boundary condition is met .align 4 Lsub: $LDX $tj,$tp,$j $LDX $nj,$np,$j subfe $aj,$nj,$tj ; tp[j]-np[j] $STX $aj,$rp,$j addi $j,$j,$BNSZ bdnz- Lsub li $j,0 mtctr $num subfe $ovf,$j,$ovf ; handle upmost overflow bit and $ap,$tp,$ovf andc $np,$rp,$ovf or $ap,$ap,$np ; ap=borrow?tp:rp subfc. $ovf,$j,$ovf ; sets XER[CA] bne Lsub $UCMP $hi1,$nj bge Lsub .align 4 Lcopy: $LDX $tj,$tp,$j Lcopy: ; copy or in-place refresh $LDX $tj,$ap,$j $STX $tj,$rp,$j $STX $j,$tp,$j ; zap at once addi $j,$j,$BNSZ bdnz- Lcopy Lexit: $POP r14,`4*$SIZE_T`($sp) $POP r15,`5*$SIZE_T`($sp) $POP r16,`6*$SIZE_T`($sp) Loading @@ -298,22 +314,7 @@ Lexit: li r3,1 blr .long 0 .align 4 Lsub: $LDX $tj,$tp,$j $LDX $nj,$np,$j subfe $tj,$nj,$tj ; tp[j]-np[j] $STX $tj,$rp,$j addi $j,$j,$BNSZ bdnz- Lsub li $j,0 subfe. $ovf,$j,$ovf mtctr $num bne Lcopy .align 4 Lzap: $STX $j,$tp,$j addi $j,$j,$BNSZ bdnz- Lzap b Lexit .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; Loading crypto/bn/asm/s390x-mont.pl +25 −25 Original line number Diff line number Diff line Loading @@ -176,45 +176,45 @@ bn_mul_mont: ___ undef $bi; $count=$ap; undef $ap; $count=$bp; undef $bp; $code.=<<___; lg $rp,16+16($fp) # reincarnate rp la $ap,8($fp) lgr $j,$num ltgr $AHI,$AHI jnz .Lsub # upmost overflow bit is not zero #slg $NHI,-8($np) # tp[num-1]-np[num-1] lghi $count,-8 # buggy assembler slg $NHI,0($count,$np) # buggy assembler jnle .Lsub # branch if not borrow .Lcopy: lg $alo,8($j,$fp) stg $j,8($j,$fp) stg $alo,0($j,$rp) aghi $j,8 jnz .Lcopy .Lexit: lmg %r6,%r15,16+48($fp) lghi %r2,1 # signal "processed" br %r14 #lg $nhi,-8($np) # buggy assembler lghi $count,-8 # buggy assembler lg $nhi,0($count,$np) # buggy assembler srag $nhi,$nhi,62 # boundary condition... jz .Lcopy # ... is met .Lsub: lcgr $count,$num lcgr $count,$num sra $count,3 # incidentally clears "borrow" .Lsubloop: lg $alo,8($j,$fp) .Lsub: lg $alo,0($j,$ap) slbg $alo,0($j,$np) stg $alo,0($j,$rp) la $j,8($j) brct $count,.Lsubloop brct $count,.Lsub lghi $ahi,0 slbgr $AHI,$ahi slbgr $AHI,$ahi # handle upmost carry ngr $ap,$AHI lghi $np,-1 xgr $np,$AHI ngr $np,$rp ogr $ap,$np # ap=borrow?tp:rp lgr $j,$num jle .Lcopy # branch if borrow .Lzap: stg $j,8($j,$fp) .Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh stg $j,8($j,$fp) # zap tp stg $alo,0($j,$rp) aghi $j,8 jnz .Lzap j .Lexit jnz .Lcopy lmg %r6,%r15,16+48($fp) lghi %r2,1 # signal "processed" br %r14 .size bn_mul_mont,.-bn_mul_mont .string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>" ___ Loading Loading
crypto/bn/asm/alpha-mont.pl +23 −31 Original line number Diff line number Diff line Loading @@ -258,56 +258,48 @@ bn_mul_mont: stq $hi1,16($tp) bne $tj,.Louter s8addq $num,sp,$ap mov $rp,$bp s8addq $num,sp,$tj # &tp[num] mov $rp,$bp # put rp aside mov sp,$tp mov 0,$hi0 bne $hi1,.Lsub cmpult $nj,$lo1,AT bne AT,.Lsub .align 4 .Lcopy: ldq AT,($tp) lda $tp,8($tp) stq AT,($rp) cmpult $tp,$ap,AT stq zero,-8($tp) nop lda $rp,8($rp) bne AT,.Lcopy mov 1,v0 br .Lexit mov sp,$ap srl $nj,62,AT # boundary condition... beq AT,.Lcopy # ... is met mov 0,$hi0 # clear borrow bit .align 4 .Lsub: ldq $lo0,($tp) ldq $lo1,($np) subq $lo0,$lo1,$lo1 lda $tp,8($tp) lda $np,8($np) subq $lo0,$lo1,$lo1 # tp[i]-np[i] cmpult $lo0,$lo1,AT subq $lo1,$hi0,$lo0 cmpult $lo1,$lo0,$hi0 lda $tp,8($tp) or $hi0,AT,$hi0 lda $np,8($np) stq $lo0,($rp) cmpult $tp,$ap,v0 cmpult $tp,$tj,v0 lda $rp,8($rp) bne v0,.Lsub subq $hi1,$hi0,$hi0 subq $hi1,$hi0,$hi0 # handle upmost overflow bit mov sp,$tp cmpule $hi1,$hi0,AT mov $bp,$rp bne AT,.Lcopy mov $bp,$rp # restore rp and sp,$hi0,$ap bic $bp,$hi0,$bp bis $bp,$ap,$ap # ap=borrow?tp:rp .align 4 .Lzap: stq zero,($tp) cmpult $tp,$ap,AT .Lcopy: ldq $aj,($ap) # copy or in-place refresh lda $tp,8($tp) bne AT,.Lzap lda $rp,8($rp) lda $ap,8($ap) stq zero,-8($tp) # zap tp cmpult $tp,$tj,AT stq $aj,-8($rp) bne AT,.Lcopy mov 1,v0 .align 4 .Lexit: .set noreorder mov fp,sp Loading
crypto/bn/asm/armv4-mont.pl +21 −23 Original line number Diff line number Diff line Loading @@ -61,7 +61,7 @@ bn_mul_mont: cmp $num,#2 movlt r0,#0 addlt sp,sp,#2*4 blt .Labort blt .Labrt stmdb sp!,{r4-r12,lr} @ save 10 registers Loading Loading @@ -160,27 +160,13 @@ bn_mul_mont: add $num,$num,#4 @ $num to point at &tp[num] sub $aj,$num,sp @ "original" num value mov $tp,sp @ "rewind" $tp mov $ap,$tp @ "borrow" $ap sub $np,$np,$aj @ "rewind" $np to &np[0] cmp $nhi,#0 @ upmost carry bne .Lsub cmp $nlo,$nj @ tp[num-1]-np[num-1] bhs .Lsub .Lcopy: ldr $tj,[$tp] str sp,[$tp],#4 @ zap tp str $tj,[$rp],#4 cmp $tp,$num bne .Lcopy .Lexit: add sp,$num,#4 @ skip over tp[num+1] ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 .Labort:tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) movs $tj,$nj,lsr#30 @ boundary condition... beq .Lcopy @ ... is met subs $tj,$tj,$tj @ "clear" carry flag .Lsub: ldr $tj,[$tp],#4 ldr $nj,[$np],#4 sbcs $tj,$tj,$nj @ tp[j]-np[j] Loading @@ -190,12 +176,24 @@ bn_mul_mont: sbcs $nhi,$nhi,#0 @ upmost carry mov $tp,sp @ "rewind" $tp sub $rp,$rp,$aj @ "rewind" $rp blo .Lcopy @ tp was less after all .Lzap: str sp,[$tp],#4 and $ap,$tp,$nhi bic $np,$rp,$nhi orr $ap,$ap,$np @ ap=borrow?tp:rp .Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh str sp,[$tp],#4 @ zap tp str $tj,[$rp],#4 cmp $tp,$num bne .Lzap bal .Lexit bne .Lcopy add sp,$num,#4 @ skip over tp[num+1] ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 .Labrt: tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) .size bn_mul_mont,.-bn_mul_mont .asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" ___ Loading
crypto/bn/asm/mips3-mont.pl +34 −39 Original line number Diff line number Diff line Loading @@ -265,27 +265,50 @@ bn_mul_mont: addu $i,8 sltu s7,$i,$num bnez s7,.Louter .set noreorder PTR_ADD $ap,sp,$num PTR_ADD $tj,sp,$num # &tp[num] move $tp,sp move $ap,sp bnez $hi1,.Lsub li $hi0,0 sgeu AT,$lo1,$nj beqz AT,.Lsub nop dsrl AT,$nj,62 # boundary condition... beqz AT,.Lcopy # ... is met li $hi0,0 # clear borrow bit .align 4 .Lcopy: ld AT,($tp) .Lsub: ld $lo0,($tp) ld $lo1,($np) PTR_ADD $tp,8 PTR_ADD $np,8 dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] sgtu AT,$lo1,$lo0 dsubu $lo0,$lo1,$hi0 sgtu $hi0,$lo0,$lo1 sd $lo0,($rp) or $hi0,AT sltu AT,$tp,$tj bnez AT,.Lsub PTR_ADD $rp,8 dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit move $tp,sp PTR_SUB $rp,$num # restore rp not $hi1,$hi0 and $ap,$hi0,sp and $bp,$hi1,$rp or $ap,$ap,$bp # ap=borrow?tp:rp .align 4 .Lcopy: ld $aj,($ap) PTR_ADD $ap,8 PTR_ADD $tp,8 sd AT,($rp) sltu AT,$tp,$ap sd zero,-8($tp) sltu AT,$tp,$tj sd $aj,($rp) bnez AT,.Lcopy PTR_ADD $rp,8 .Lexit: ld s0,0($fp) ld s1,8($fp) ld s2,16($fp) Loading @@ -297,34 +320,6 @@ bn_mul_mont: li v0,1 jr ra PTR_ADD sp,$fp,64 .align 4 .Lsub: ld $lo0,($tp) ld $lo1,($np) dsubu $lo1,$lo0,$lo1 sgtu AT,$lo1,$lo0 dsubu $lo0,$lo1,$hi0 sgtu $hi0,$lo0,$lo1 PTR_ADD $tp,8 or $hi0,AT PTR_ADD $np,8 sd $lo0,($rp) sltu AT,$tp,$ap bnez AT,.Lsub PTR_ADD $rp,8 dsubu $hi0,$hi1,$hi0 move $tp,sp sgtu AT,$hi0,$hi1 bnez AT,.Lcopy PTR_SUB $rp,$num .align 4 .Lzap: sd zero,($tp) sltu AT,$tp,$ap bnez AT,.Lzap PTR_ADD $tp,8 b .Lexit nop .set reorder END(bn_mul_mont) .rdata Loading
crypto/bn/asm/ppc-mont.pl +26 −25 Original line number Diff line number Diff line Loading @@ -2,8 +2,9 @@ # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # project. Rights for redistribution and usage in source and binary # forms are granted according to the OpenSSL license. # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # April 2006 Loading Loading @@ -42,6 +43,7 @@ if ($output =~ /32\-mont\.s/) { $UMULL= "mullw"; # unsigned multiply low $UMULH= "mulhwu"; # unsigned multiply high $UCMP= "cmplw"; # unsigned compare $SHRI= "srwi"; # unsigned shift right by immediate $PUSH= $ST; $POP= $LD; } elsif ($output =~ /64\-mont\.s/) { Loading @@ -62,6 +64,7 @@ if ($output =~ /32\-mont\.s/) { $UMULL= "mulld"; # unsigned multiply low $UMULH= "mulhdu"; # unsigned multiply high $UCMP= "cmpld"; # unsigned compare $SHRI= "srdi"; # unsigned shift right by immediate $PUSH= $ST; $POP= $LD; } else { die "nonsense $output"; } Loading Loading @@ -264,24 +267,37 @@ Linner: addi $i,$i,$BNSZ ble- Louter $SHRI. $nj,$nj,$BITS-2 ; check boundary condition addi $num,$num,2 ; restore $num subfc $j,$j,$j ; j=0 and "clear" XER[CA] addi $tp,$sp,$FRAME addi $ap,$sp,$FRAME mtctr $num beq Lcopy ; boundary condition is met .align 4 Lsub: $LDX $tj,$tp,$j $LDX $nj,$np,$j subfe $aj,$nj,$tj ; tp[j]-np[j] $STX $aj,$rp,$j addi $j,$j,$BNSZ bdnz- Lsub li $j,0 mtctr $num subfe $ovf,$j,$ovf ; handle upmost overflow bit and $ap,$tp,$ovf andc $np,$rp,$ovf or $ap,$ap,$np ; ap=borrow?tp:rp subfc. $ovf,$j,$ovf ; sets XER[CA] bne Lsub $UCMP $hi1,$nj bge Lsub .align 4 Lcopy: $LDX $tj,$tp,$j Lcopy: ; copy or in-place refresh $LDX $tj,$ap,$j $STX $tj,$rp,$j $STX $j,$tp,$j ; zap at once addi $j,$j,$BNSZ bdnz- Lcopy Lexit: $POP r14,`4*$SIZE_T`($sp) $POP r15,`5*$SIZE_T`($sp) $POP r16,`6*$SIZE_T`($sp) Loading @@ -298,22 +314,7 @@ Lexit: li r3,1 blr .long 0 .align 4 Lsub: $LDX $tj,$tp,$j $LDX $nj,$np,$j subfe $tj,$nj,$tj ; tp[j]-np[j] $STX $tj,$rp,$j addi $j,$j,$BNSZ bdnz- Lsub li $j,0 subfe. $ovf,$j,$ovf mtctr $num bne Lcopy .align 4 Lzap: $STX $j,$tp,$j addi $j,$j,$BNSZ bdnz- Lzap b Lexit .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; Loading
crypto/bn/asm/s390x-mont.pl +25 −25 Original line number Diff line number Diff line Loading @@ -176,45 +176,45 @@ bn_mul_mont: ___ undef $bi; $count=$ap; undef $ap; $count=$bp; undef $bp; $code.=<<___; lg $rp,16+16($fp) # reincarnate rp la $ap,8($fp) lgr $j,$num ltgr $AHI,$AHI jnz .Lsub # upmost overflow bit is not zero #slg $NHI,-8($np) # tp[num-1]-np[num-1] lghi $count,-8 # buggy assembler slg $NHI,0($count,$np) # buggy assembler jnle .Lsub # branch if not borrow .Lcopy: lg $alo,8($j,$fp) stg $j,8($j,$fp) stg $alo,0($j,$rp) aghi $j,8 jnz .Lcopy .Lexit: lmg %r6,%r15,16+48($fp) lghi %r2,1 # signal "processed" br %r14 #lg $nhi,-8($np) # buggy assembler lghi $count,-8 # buggy assembler lg $nhi,0($count,$np) # buggy assembler srag $nhi,$nhi,62 # boundary condition... jz .Lcopy # ... is met .Lsub: lcgr $count,$num lcgr $count,$num sra $count,3 # incidentally clears "borrow" .Lsubloop: lg $alo,8($j,$fp) .Lsub: lg $alo,0($j,$ap) slbg $alo,0($j,$np) stg $alo,0($j,$rp) la $j,8($j) brct $count,.Lsubloop brct $count,.Lsub lghi $ahi,0 slbgr $AHI,$ahi slbgr $AHI,$ahi # handle upmost carry ngr $ap,$AHI lghi $np,-1 xgr $np,$AHI ngr $np,$rp ogr $ap,$np # ap=borrow?tp:rp lgr $j,$num jle .Lcopy # branch if borrow .Lzap: stg $j,8($j,$fp) .Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh stg $j,8($j,$fp) # zap tp stg $alo,0($j,$rp) aghi $j,8 jnz .Lzap j .Lexit jnz .Lcopy lmg %r6,%r15,16+48($fp) lghi %r2,1 # signal "processed" br %r14 .size bn_mul_mont,.-bn_mul_mont .string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>" ___ Loading