1.text 2 3.set noat 4.set noreorder 5 6.align 5 7.globl bn_mul_mont 8.ent bn_mul_mont 9bn_mul_mont: 10 slt $1,$9,4 11 bnez $1,1f 12 li $2,0 13 slt $1,$9,17 # on in-order CPU 14 bnezl $1,bn_mul_mont_internal 15 nop 161: jr $31 17 li $4,0 18.end bn_mul_mont 19 20.align 5 21.ent bn_mul_mont_internal 22bn_mul_mont_internal: 23 .frame $30,14*4,$31 24 .mask 0x40000000|16711680,-4 25 sub $29,14*4 26 sw $30,(14-1)*4($29) 27 sw $23,(14-2)*4($29) 28 sw $22,(14-3)*4($29) 29 sw $21,(14-4)*4($29) 30 sw $20,(14-5)*4($29) 31 sw $19,(14-6)*4($29) 32 sw $18,(14-7)*4($29) 33 sw $17,(14-8)*4($29) 34 sw $16,(14-9)*4($29) 35 move $30,$29 36 37 .set reorder 38 lw $8,0($8) 39 lw $13,0($6) # bp[0] 40 lw $12,0($5) # ap[0] 41 lw $14,0($7) # np[0] 42 43 sub $29,2*4 # place for two extra words 44 sll $9,2 45 li $1,-4096 46 sub $29,$9 47 and $29,$1 48 49 multu $12,$13 50 lw $16,4($5) 51 lw $18,4($7) 52 mflo $10 53 mfhi $11 54 multu $10,$8 55 mflo $23 56 57 multu $16,$13 58 mflo $16 59 mfhi $17 60 61 multu $14,$23 62 mflo $24 63 mfhi $25 64 multu $18,$23 65 addu $24,$10 66 sltu $1,$24,$10 67 addu $25,$1 68 mflo $18 69 mfhi $19 70 71 move $15,$29 72 li $22,2*4 73.align 4 74.L1st: 75 .set noreorder 76 add $12,$5,$22 77 add $14,$7,$22 78 lw $12,($12) 79 lw $14,($14) 80 81 multu $12,$13 82 addu $10,$16,$11 83 addu $24,$18,$25 84 sltu $1,$10,$11 85 sltu $2,$24,$25 86 addu $11,$17,$1 87 addu $25,$19,$2 88 mflo $16 89 mfhi $17 90 91 addu $24,$10 92 sltu $1,$24,$10 93 multu $14,$23 94 addu $25,$1 95 addu $22,4 96 sw $24,($15) 97 sltu $2,$22,$9 98 mflo $18 99 mfhi $19 100 101 bnez $2,.L1st 102 add $15,4 103 .set reorder 104 105 addu $10,$16,$11 106 sltu $1,$10,$11 107 addu $11,$17,$1 108 109 addu $24,$18,$25 110 sltu $2,$24,$25 111 addu $25,$19,$2 112 addu $24,$10 113 sltu $1,$24,$10 114 addu $25,$1 115 116 sw $24,($15) 117 118 addu $25,$11 119 sltu $1,$25,$11 120 sw $25,4($15) 121 sw $1,2*4($15) 122 123 li $21,4 124.align 4 125.Louter: 126 add $13,$6,$21 127 lw $13,($13) 128 lw $12,($5) 129 lw $16,4($5) 130 lw $20,($29) 131 132 multu $12,$13 133 lw $14,($7) 134 lw $18,4($7) 135 mflo $10 136 mfhi $11 137 addu $10,$20 138 multu $10,$8 139 sltu $1,$10,$20 140 addu $11,$1 141 mflo $23 142 143 multu $16,$13 144 mflo $16 145 mfhi $17 146 147 multu $14,$23 148 mflo $24 149 mfhi $25 150 151 multu $18,$23 152 addu $24,$10 153 sltu $1,$24,$10 154 addu $25,$1 155 mflo $18 156 mfhi $19 157 158 move $15,$29 159 li $22,2*4 160 lw $20,4($15) 161.align 4 162.Linner: 163 .set noreorder 164 add $12,$5,$22 165 add $14,$7,$22 166 lw $12,($12) 167 lw $14,($14) 168 169 multu $12,$13 170 addu $10,$16,$11 171 addu $24,$18,$25 172 sltu $1,$10,$11 173 sltu $2,$24,$25 174 addu $11,$17,$1 175 addu $25,$19,$2 176 mflo $16 177 mfhi $17 178 179 addu $10,$20 180 addu $22,4 181 multu $14,$23 182 sltu $1,$10,$20 183 addu $24,$10 184 addu $11,$1 185 sltu $2,$24,$10 186 lw $20,2*4($15) 187 addu $25,$2 188 sltu $1,$22,$9 189 mflo $18 190 mfhi $19 191 sw $24,($15) 192 bnez $1,.Linner 193 add $15,4 194 .set reorder 195 196 addu $10,$16,$11 197 sltu $1,$10,$11 198 addu $11,$17,$1 199 addu $10,$20 200 sltu $2,$10,$20 201 addu $11,$2 202 203 lw $20,2*4($15) 204 addu $24,$18,$25 205 sltu $1,$24,$25 206 addu $25,$19,$1 207 addu $24,$10 208 sltu $2,$24,$10 209 addu $25,$2 210 sw $24,($15) 211 212 addu $24,$25,$11 213 sltu $25,$24,$11 214 addu $24,$20 215 sltu $1,$24,$20 216 addu $25,$1 217 sw $24,4($15) 218 sw $25,2*4($15) 219 220 addu $21,4 221 sltu $2,$21,$9 222 bnez $2,.Louter 223 224 .set noreorder 225 add $20,$29,$9 # &tp[num] 226 move $15,$29 227 move $5,$29 228 li $11,0 # clear borrow bit 229 230.align 4 231.Lsub: lw $10,($15) 232 lw $24,($7) 233 add $15,4 234 add $7,4 235 subu $24,$10,$24 # tp[i]-np[i] 236 sgtu $1,$24,$10 237 subu $10,$24,$11 238 sgtu $11,$10,$24 239 sw $10,($4) 240 or $11,$1 241 sltu $1,$15,$20 242 bnez $1,.Lsub 243 add $4,4 244 245 subu $11,$25,$11 # handle upmost overflow bit 246 move $15,$29 247 sub $4,$9 # restore rp 248 not $25,$11 249 250 and $5,$11,$29 251 and $6,$25,$4 252 or $5,$5,$6 # ap=borrow?tp:rp 253 254.align 4 255.Lcopy: lw $12,($5) 256 add $5,4 257 sw $0,($15) 258 add $15,4 259 sltu $1,$15,$20 260 sw $12,($4) 261 bnez $1,.Lcopy 262 add $4,4 263 264 li $4,1 265 li $2,1 266 267 .set noreorder 268 move $29,$30 269 lw $30,(14-1)*4($29) 270 lw $23,(14-2)*4($29) 271 lw $22,(14-3)*4($29) 272 lw $21,(14-4)*4($29) 273 lw $20,(14-5)*4($29) 274 lw $19,(14-6)*4($29) 275 lw $18,(14-7)*4($29) 276 lw $17,(14-8)*4($29) 277 lw $16,(14-9)*4($29) 278 jr $31 279 add $29,14*4 280.end bn_mul_mont_internal 281.rdata 282.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" 283