1.text 2.globl bn_mul_mont 3.type bn_mul_mont,@function 4bn_mul_mont: 5 lgf %r1,96(%r15) # pull %r1 6 sla %r1,2 # %r1 to enumerate bytes 7 la %r4,0(%r1,%r4) 8 9 st %r2,2*4(%r15) 10 11 cghi %r1,16 # 12 lghi %r2,0 # 13 blr %r14 # if(%r1<16) return 0; 14 tmll %r1,4 15 bnzr %r14 # if (%r1&1) return 0; 16 stm %r3,%r15,3*4(%r15) 17 18 lghi %r2,-96-8 # leave room for carry bit 19 lcgr %r7,%r1 # -%r1 20 lgr %r0,%r15 21 la %r2,0(%r2,%r15) 22 la %r15,0(%r7,%r2) # alloca 23 st %r0,0(%r15) # back chain 24 25 sra %r1,3 # restore %r1 26 la %r4,0(%r7,%r4) # restore %r4 27 ahi %r1,-1 # adjust %r1 for inner loop 28 lg %r6,0(%r6) # pull n0 29 rllg %r6,%r6,32 30 31 lg %r2,0(%r4) 32 rllg %r2,%r2,32 33 lg %r9,0(%r3) 34 rllg %r9,%r9,32 35 mlgr %r8,%r2 # ap[0]*bp[0] 36 lgr %r12,%r8 37 38 lgr %r0,%r9 # "tp[0]"*n0 39 msgr %r0,%r6 40 41 lg %r11,0(%r5) # 42 rllg %r11,%r11,32 43 mlgr %r10,%r0 # np[0]*m1 44 algr %r11,%r9 # +="tp[0]" 45 lghi %r13,0 46 alcgr %r13,%r10 47 48 la %r7,8 # j=1 49 lr %r14,%r1 50 51.align 16 52.L1st: 53 lg %r9,0(%r7,%r3) 54 rllg %r9,%r9,32 55 mlgr %r8,%r2 # ap[j]*bp[0] 56 algr %r9,%r12 57 lghi %r12,0 58 alcgr %r12,%r8 59 60 lg %r11,0(%r7,%r5) 61 rllg %r11,%r11,32 62 mlgr %r10,%r0 # np[j]*m1 63 algr %r11,%r13 64 lghi %r13,0 65 alcgr %r10,%r13 # +="tp[j]" 66 algr %r11,%r9 67 alcgr %r13,%r10 68 69 stg %r11,96-8(%r7,%r15) # tp[j-1]= 70 la %r7,8(%r7) # j++ 71 brct %r14,.L1st 72 73 algr %r13,%r12 74 lghi %r12,0 75 alcgr %r12,%r12 # upmost overflow bit 76 stg %r13,96-8(%r7,%r15) 77 stg %r12,96(%r7,%r15) 78 la %r4,8(%r4) # bp++ 79 80.Louter: 81 lg %r2,0(%r4) # bp[i] 82 rllg %r2,%r2,32 83 lg %r9,0(%r3) 84 rllg %r9,%r9,32 85 mlgr %r8,%r2 # ap[0]*bp[i] 86 alg %r9,96(%r15) # +=tp[0] 87 lghi %r12,0 88 alcgr %r12,%r8 89 90 lgr %r0,%r9 91 msgr %r0,%r6 # tp[0]*n0 92 93 lg %r11,0(%r5) # np[0] 94 rllg %r11,%r11,32 95 mlgr %r10,%r0 # np[0]*m1 96 algr %r11,%r9 # +="tp[0]" 97 lghi %r13,0 98 alcgr %r13,%r10 99 100 la %r7,8 # j=1 101 lr %r14,%r1 102 103.align 16 104.Linner: 105 lg %r9,0(%r7,%r3) 106 rllg %r9,%r9,32 107 mlgr %r8,%r2 # ap[j]*bp[i] 108 algr %r9,%r12 109 lghi %r12,0 110 alcgr %r8,%r12 111 alg %r9,96(%r7,%r15)# +=tp[j] 112 alcgr %r12,%r8 113 114 lg %r11,0(%r7,%r5) 115 rllg %r11,%r11,32 116 mlgr %r10,%r0 # np[j]*m1 117 algr %r11,%r13 118 lghi %r13,0 119 alcgr %r10,%r13 120 algr %r11,%r9 # +="tp[j]" 121 alcgr %r13,%r10 122 123 stg %r11,96-8(%r7,%r15) # tp[j-1]= 124 la %r7,8(%r7) # j++ 125 brct %r14,.Linner 126 127 algr %r13,%r12 128 lghi %r12,0 129 alcgr %r12,%r12 130 alg %r13,96(%r7,%r15)# accumulate previous upmost overflow bit 131 lghi %r8,0 132 alcgr %r12,%r8 # new upmost overflow bit 133 stg %r13,96-8(%r7,%r15) 134 stg %r12,96(%r7,%r15) 135 136 la %r4,8(%r4) # bp++ 137 cl %r4,120(%r7,%r15) # compare to &bp[num] 138 jne .Louter 139 140 l %r2,112(%r7,%r15) # reincarnate rp 141 la %r3,96(%r15) 142 ahi %r1,1 # restore %r1, incidentally clears "borrow" 143 144 la %r7,0 145 lr %r14,%r1 146.Lsub: lg %r9,0(%r7,%r3) 147 lg %r11,0(%r7,%r5) 148 rllg %r11,%r11,32 149 slbgr %r9,%r11 150 stg %r9,0(%r7,%r2) 151 la %r7,8(%r7) 152 brct %r14,.Lsub 153 lghi %r8,0 154 slbgr %r12,%r8 # handle upmost carry 155 lghi %r13,-1 156 xgr %r13,%r12 157 158 la %r7,0 159 lgr %r14,%r1 160.Lcopy: lg %r8,96(%r7,%r15) # conditional copy 161 lg %r9,0(%r7,%r2) 162 ngr %r8,%r12 163 ngr %r9,%r13 164 ogr %r9,%r8 165 rllg %r9,%r9,32 166 stg %r7,96(%r7,%r15) # zap tp 167 stg %r9,0(%r7,%r2) 168 la %r7,8(%r7) 169 brct %r14,.Lcopy 170 171 la %r1,128(%r7,%r15) 172 lm %r6,%r15,0(%r1) 173 lghi %r2,1 # signal "processed" 174 br %r14 175.size bn_mul_mont,.-bn_mul_mont 176.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro@openssl.org>" 177