1#include "mips_arch.h" 2 3.text 4 5.set noat 6.set noreorder 7 8.align 5 9.globl bn_mul_mont 10.ent bn_mul_mont 11bn_mul_mont: 12 slt $1,$9,4 13 bnez $1,1f 14 li $2,0 15 slt $1,$9,17 # on in-order CPU 16 bnez $1,bn_mul_mont_internal 17 nop 181: jr $31 19 li $4,0 20.end bn_mul_mont 21 22.align 5 23.ent bn_mul_mont_internal 24bn_mul_mont_internal: 25 .frame $30,14*8,$31 26 .mask 0x40000000|16711680,-8 27 dsubu $29,14*8 28 sd $30,(14-1)*8($29) 29 sd $23,(14-2)*8($29) 30 sd $22,(14-3)*8($29) 31 sd $21,(14-4)*8($29) 32 sd $20,(14-5)*8($29) 33 sd $19,(14-6)*8($29) 34 sd $18,(14-7)*8($29) 35 sd $17,(14-8)*8($29) 36 sd $16,(14-9)*8($29) 37 move $30,$29 38 39 .set reorder 40 ld $8,0($8) 41 ld $13,0($6) # bp[0] 42 ld $12,0($5) # ap[0] 43 ld $14,0($7) # np[0] 44 45 dsubu $29,2*8 # place for two extra words 46 sll $9,3 47 li $1,-4096 48 dsubu $29,$9 49 and $29,$1 50 51 dmultu ($12,$13) 52 ld $17,8($5) 53 ld $19,8($7) 54 mflo ($10,$12,$13) 55 mfhi ($11,$12,$13) 56 dmultu ($10,$8) 57 mflo ($23,$10,$8) 58 59 dmultu ($17,$13) 60 mflo ($16,$17,$13) 61 mfhi ($17,$17,$13) 62 63 dmultu ($14,$23) 64 mflo ($24,$14,$23) 65 mfhi ($25,$14,$23) 66 dmultu ($19,$23) 67 daddu $24,$10 68 sltu $1,$24,$10 69 daddu $25,$1 70 mflo ($18,$19,$23) 71 mfhi ($19,$19,$23) 72 73 move $15,$29 74 li $22,2*8 75.align 4 76.L1st: 77 .set noreorder 78 daddu $12,$5,$22 79 daddu $14,$7,$22 80 ld $12,($12) 81 ld $14,($14) 82 83 dmultu ($12,$13) 84 daddu $10,$16,$11 85 daddu $24,$18,$25 86 sltu $1,$10,$11 87 sltu $2,$24,$25 88 daddu $11,$17,$1 89 daddu $25,$19,$2 90 mflo ($16,$12,$13) 91 mfhi ($17,$12,$13) 92 93 daddu $24,$10 94 sltu $1,$24,$10 95 dmultu ($14,$23) 96 daddu $25,$1 97 addu $22,8 98 sd $24,($15) 99 sltu $2,$22,$9 100 mflo ($18,$14,$23) 101 mfhi ($19,$14,$23) 102 103 bnez $2,.L1st 104 daddu $15,8 105 .set reorder 106 107 daddu $10,$16,$11 108 sltu $1,$10,$11 109 daddu $11,$17,$1 110 111 daddu $24,$18,$25 112 sltu $2,$24,$25 113 daddu $25,$19,$2 114 daddu $24,$10 115 sltu $1,$24,$10 116 daddu $25,$1 117 118 sd $24,($15) 119 120 daddu $25,$11 121 sltu $1,$25,$11 122 sd $25,8($15) 123 sd $1,2*8($15) 124 125 li $21,8 126.align 4 127.Louter: 128 daddu $13,$6,$21 129 ld $13,($13) 130 ld $12,($5) 131 ld $17,8($5) 132 ld $20,($29) 133 134 dmultu ($12,$13) 135 ld $14,($7) 136 ld $19,8($7) 137 mflo ($10,$12,$13) 138 mfhi ($11,$12,$13) 139 daddu $10,$20 140 dmultu ($10,$8) 141 sltu $1,$10,$20 142 daddu $11,$1 143 mflo ($23,$10,$8) 144 145 dmultu ($17,$13) 146 mflo ($16,$17,$13) 147 mfhi ($17,$17,$13) 148 149 dmultu ($14,$23) 150 mflo ($24,$14,$23) 151 mfhi ($25,$14,$23) 152 153 dmultu ($19,$23) 154 daddu $24,$10 155 sltu $1,$24,$10 156 daddu $25,$1 157 mflo ($18,$19,$23) 158 mfhi ($19,$19,$23) 159 160 move $15,$29 161 li $22,2*8 162 ld $20,8($15) 163.align 4 164.Linner: 165 .set noreorder 166 daddu $12,$5,$22 167 daddu $14,$7,$22 168 ld $12,($12) 169 ld $14,($14) 170 171 dmultu ($12,$13) 172 daddu $10,$16,$11 173 daddu $24,$18,$25 174 sltu $1,$10,$11 175 sltu $2,$24,$25 176 daddu $11,$17,$1 177 daddu $25,$19,$2 178 mflo ($16,$12,$13) 179 mfhi ($17,$12,$13) 180 181 daddu $10,$20 182 addu $22,8 183 dmultu ($14,$23) 184 sltu $1,$10,$20 185 daddu $24,$10 186 daddu $11,$1 187 sltu $2,$24,$10 188 ld $20,2*8($15) 189 daddu $25,$2 190 sltu $1,$22,$9 191 mflo ($18,$14,$23) 192 mfhi ($19,$14,$23) 193 sd $24,($15) 194 bnez $1,.Linner 195 daddu $15,8 196 .set reorder 197 198 daddu $10,$16,$11 199 sltu $1,$10,$11 200 daddu $11,$17,$1 201 daddu $10,$20 202 sltu $2,$10,$20 203 daddu $11,$2 204 205 ld $20,2*8($15) 206 daddu $24,$18,$25 207 sltu $1,$24,$25 208 daddu $25,$19,$1 209 daddu $24,$10 210 sltu $2,$24,$10 211 daddu $25,$2 212 sd $24,($15) 213 214 daddu $24,$25,$11 215 sltu $25,$24,$11 216 daddu $24,$20 217 sltu $1,$24,$20 218 daddu $25,$1 219 sd $24,8($15) 220 sd $25,2*8($15) 221 222 addu $21,8 223 sltu $2,$21,$9 224 bnez $2,.Louter 225 226 .set noreorder 227 daddu $20,$29,$9 # &tp[num] 228 move $15,$29 229 move $5,$29 230 li $11,0 # clear borrow bit 231 232.align 4 233.Lsub: ld $10,($15) 234 ld $24,($7) 235 daddu $15,8 236 daddu $7,8 237 dsubu $24,$10,$24 # tp[i]-np[i] 238 sgtu $1,$24,$10 239 dsubu $10,$24,$11 240 sgtu $11,$10,$24 241 sd $10,($4) 242 or $11,$1 243 sltu $1,$15,$20 244 bnez $1,.Lsub 245 daddu $4,8 246 247 dsubu $11,$25,$11 # handle upmost overflow bit 248 move $15,$29 249 dsubu $4,$9 # restore rp 250 not $25,$11 251 252.Lcopy: ld $14,($15) # conditional move 253 ld $12,($4) 254 sd $0,($15) 255 daddu $15,8 256 and $14,$11 257 and $12,$25 258 or $12,$14 259 sltu $1,$15,$20 260 sd $12,($4) 261 bnez $1,.Lcopy 262 daddu $4,8 263 264 li $4,1 265 li $2,1 266 267 .set noreorder 268 move $29,$30 269 ld $30,(14-1)*8($29) 270 ld $23,(14-2)*8($29) 271 ld $22,(14-3)*8($29) 272 ld $21,(14-4)*8($29) 273 ld $20,(14-5)*8($29) 274 ld $19,(14-6)*8($29) 275 ld $18,(14-7)*8($29) 276 ld $17,(14-8)*8($29) 277 ld $16,(14-9)*8($29) 278 jr $31 279 daddu $29,14*8 280.end bn_mul_mont_internal 281.rdata 282.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" 283