1#if defined(__i386__) 2.text 3.globl bn_mul_mont 4.hidden bn_mul_mont 5.type bn_mul_mont,@function 6.align 16 7bn_mul_mont: 8.L_bn_mul_mont_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 xorl %eax,%eax 14 movl 40(%esp),%edi 15 cmpl $4,%edi 16 jl .L000just_leave 17 leal 20(%esp),%esi 18 leal 24(%esp),%edx 19 addl $2,%edi 20 negl %edi 21 leal -32(%esp,%edi,4),%ebp 22 negl %edi 23 movl %ebp,%eax 24 subl %edx,%eax 25 andl $2047,%eax 26 subl %eax,%ebp 27 xorl %ebp,%edx 28 andl $2048,%edx 29 xorl $2048,%edx 30 subl %edx,%ebp 31 andl $-64,%ebp 32 movl %esp,%eax 33 subl %ebp,%eax 34 andl $-4096,%eax 35 movl %esp,%edx 36 leal (%ebp,%eax,1),%esp 37 movl (%esp),%eax 38 cmpl %ebp,%esp 39 ja .L001page_walk 40 jmp .L002page_walk_done 41.align 16 42.L001page_walk: 43 leal -4096(%esp),%esp 44 movl (%esp),%eax 45 cmpl %ebp,%esp 46 ja .L001page_walk 47.L002page_walk_done: 48 movl (%esi),%eax 49 movl 4(%esi),%ebx 50 movl 8(%esi),%ecx 51 movl 12(%esi),%ebp 52 movl 16(%esi),%esi 53 movl (%esi),%esi 54 movl %eax,4(%esp) 55 movl %ebx,8(%esp) 56 movl %ecx,12(%esp) 57 movl %ebp,16(%esp) 58 movl %esi,20(%esp) 59 leal -3(%edi),%ebx 60 movl %edx,24(%esp) 61 call .L003PIC_me_up 62.L003PIC_me_up: 63 popl %eax 64 leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax 65 btl $26,(%eax) 66 jnc .L004non_sse2 67 movl $-1,%eax 68 movd %eax,%mm7 69 movl 8(%esp),%esi 70 movl 12(%esp),%edi 71 movl 16(%esp),%ebp 72 xorl %edx,%edx 73 xorl %ecx,%ecx 74 movd (%edi),%mm4 75 movd (%esi),%mm5 76 movd (%ebp),%mm3 77 pmuludq %mm4,%mm5 78 movq %mm5,%mm2 79 movq %mm5,%mm0 80 pand %mm7,%mm0 81 pmuludq 20(%esp),%mm5 82 pmuludq %mm5,%mm3 83 paddq %mm0,%mm3 84 movd 4(%ebp),%mm1 85 movd 4(%esi),%mm0 86 psrlq $32,%mm2 87 psrlq $32,%mm3 88 incl %ecx 89.align 16 90.L0051st: 91 pmuludq %mm4,%mm0 92 pmuludq %mm5,%mm1 93 paddq %mm0,%mm2 94 paddq %mm1,%mm3 95 movq %mm2,%mm0 96 pand %mm7,%mm0 97 movd 4(%ebp,%ecx,4),%mm1 98 paddq %mm0,%mm3 99 movd 4(%esi,%ecx,4),%mm0 100 psrlq $32,%mm2 101 movd %mm3,28(%esp,%ecx,4) 102 psrlq $32,%mm3 103 leal 1(%ecx),%ecx 104 cmpl %ebx,%ecx 105 jl .L0051st 106 pmuludq %mm4,%mm0 107 pmuludq %mm5,%mm1 108 paddq %mm0,%mm2 109 paddq %mm1,%mm3 110 movq %mm2,%mm0 111 pand %mm7,%mm0 112 paddq %mm0,%mm3 113 movd %mm3,28(%esp,%ecx,4) 114 psrlq $32,%mm2 115 psrlq $32,%mm3 116 paddq %mm2,%mm3 117 movq %mm3,32(%esp,%ebx,4) 118 incl %edx 119.L006outer: 120 xorl %ecx,%ecx 121 movd (%edi,%edx,4),%mm4 122 movd (%esi),%mm5 123 movd 32(%esp),%mm6 124 movd (%ebp),%mm3 125 pmuludq %mm4,%mm5 126 paddq %mm6,%mm5 127 movq %mm5,%mm0 128 movq %mm5,%mm2 129 pand %mm7,%mm0 130 pmuludq 20(%esp),%mm5 131 pmuludq %mm5,%mm3 132 paddq %mm0,%mm3 133 movd 36(%esp),%mm6 134 movd 4(%ebp),%mm1 135 movd 4(%esi),%mm0 136 psrlq $32,%mm2 137 psrlq $32,%mm3 138 paddq %mm6,%mm2 139 incl %ecx 140 decl %ebx 141.L007inner: 142 pmuludq %mm4,%mm0 143 pmuludq %mm5,%mm1 144 paddq %mm0,%mm2 145 paddq %mm1,%mm3 146 movq %mm2,%mm0 147 movd 36(%esp,%ecx,4),%mm6 148 pand %mm7,%mm0 149 movd 4(%ebp,%ecx,4),%mm1 150 paddq %mm0,%mm3 151 movd 4(%esi,%ecx,4),%mm0 152 psrlq $32,%mm2 153 movd %mm3,28(%esp,%ecx,4) 154 psrlq $32,%mm3 155 paddq %mm6,%mm2 156 decl %ebx 157 leal 1(%ecx),%ecx 158 jnz .L007inner 159 movl %ecx,%ebx 160 pmuludq %mm4,%mm0 161 pmuludq %mm5,%mm1 162 paddq %mm0,%mm2 163 paddq %mm1,%mm3 164 movq %mm2,%mm0 165 pand %mm7,%mm0 166 paddq %mm0,%mm3 167 movd %mm3,28(%esp,%ecx,4) 168 psrlq $32,%mm2 169 psrlq $32,%mm3 170 movd 36(%esp,%ebx,4),%mm6 171 paddq %mm2,%mm3 172 paddq %mm6,%mm3 173 movq %mm3,32(%esp,%ebx,4) 174 leal 1(%edx),%edx 175 cmpl %ebx,%edx 176 jle .L006outer 177 emms 178 jmp .L008common_tail 179.align 16 180.L004non_sse2: 181 movl 8(%esp),%esi 182 leal 1(%ebx),%ebp 183 movl 12(%esp),%edi 184 xorl %ecx,%ecx 185 movl %esi,%edx 186 andl $1,%ebp 187 subl %edi,%edx 188 leal 4(%edi,%ebx,4),%eax 189 orl %edx,%ebp 190 movl (%edi),%edi 191 jz .L009bn_sqr_mont 192 movl %eax,28(%esp) 193 movl (%esi),%eax 194 xorl %edx,%edx 195.align 16 196.L010mull: 197 movl %edx,%ebp 198 mull %edi 199 addl %eax,%ebp 200 leal 1(%ecx),%ecx 201 adcl $0,%edx 202 movl (%esi,%ecx,4),%eax 203 cmpl %ebx,%ecx 204 movl %ebp,28(%esp,%ecx,4) 205 jl .L010mull 206 movl %edx,%ebp 207 mull %edi 208 movl 20(%esp),%edi 209 addl %ebp,%eax 210 movl 16(%esp),%esi 211 adcl $0,%edx 212 imull 32(%esp),%edi 213 movl %eax,32(%esp,%ebx,4) 214 xorl %ecx,%ecx 215 movl %edx,36(%esp,%ebx,4) 216 movl %ecx,40(%esp,%ebx,4) 217 movl (%esi),%eax 218 mull %edi 219 addl 32(%esp),%eax 220 movl 4(%esi),%eax 221 adcl $0,%edx 222 incl %ecx 223 jmp .L0112ndmadd 224.align 16 225.L0121stmadd: 226 movl %edx,%ebp 227 mull %edi 228 addl 32(%esp,%ecx,4),%ebp 229 leal 1(%ecx),%ecx 230 adcl $0,%edx 231 addl %eax,%ebp 232 movl (%esi,%ecx,4),%eax 233 adcl $0,%edx 234 cmpl %ebx,%ecx 235 movl %ebp,28(%esp,%ecx,4) 236 jl .L0121stmadd 237 movl %edx,%ebp 238 mull %edi 239 addl 32(%esp,%ebx,4),%eax 240 movl 20(%esp),%edi 241 adcl $0,%edx 242 movl 16(%esp),%esi 243 addl %eax,%ebp 244 adcl $0,%edx 245 imull 32(%esp),%edi 246 xorl %ecx,%ecx 247 addl 36(%esp,%ebx,4),%edx 248 movl %ebp,32(%esp,%ebx,4) 249 adcl $0,%ecx 250 movl (%esi),%eax 251 movl %edx,36(%esp,%ebx,4) 252 movl %ecx,40(%esp,%ebx,4) 253 mull %edi 254 addl 32(%esp),%eax 255 movl 4(%esi),%eax 256 adcl $0,%edx 257 movl $1,%ecx 258.align 16 259.L0112ndmadd: 260 movl %edx,%ebp 261 mull %edi 262 addl 32(%esp,%ecx,4),%ebp 263 leal 1(%ecx),%ecx 264 adcl $0,%edx 265 addl %eax,%ebp 266 movl (%esi,%ecx,4),%eax 267 adcl $0,%edx 268 cmpl %ebx,%ecx 269 movl %ebp,24(%esp,%ecx,4) 270 jl .L0112ndmadd 271 movl %edx,%ebp 272 mull %edi 273 addl 32(%esp,%ebx,4),%ebp 274 adcl $0,%edx 275 addl %eax,%ebp 276 adcl $0,%edx 277 movl %ebp,28(%esp,%ebx,4) 278 xorl %eax,%eax 279 movl 12(%esp),%ecx 280 addl 36(%esp,%ebx,4),%edx 281 adcl 40(%esp,%ebx,4),%eax 282 leal 4(%ecx),%ecx 283 movl %edx,32(%esp,%ebx,4) 284 cmpl 28(%esp),%ecx 285 movl %eax,36(%esp,%ebx,4) 286 je .L008common_tail 287 movl (%ecx),%edi 288 movl 8(%esp),%esi 289 movl %ecx,12(%esp) 290 xorl %ecx,%ecx 291 xorl %edx,%edx 292 movl (%esi),%eax 293 jmp .L0121stmadd 294.align 16 295.L009bn_sqr_mont: 296 movl %ebx,(%esp) 297 movl %ecx,12(%esp) 298 movl %edi,%eax 299 mull %edi 300 movl %eax,32(%esp) 301 movl %edx,%ebx 302 shrl $1,%edx 303 andl $1,%ebx 304 incl %ecx 305.align 16 306.L013sqr: 307 movl (%esi,%ecx,4),%eax 308 movl %edx,%ebp 309 mull %edi 310 addl %ebp,%eax 311 leal 1(%ecx),%ecx 312 adcl $0,%edx 313 leal (%ebx,%eax,2),%ebp 314 shrl $31,%eax 315 cmpl (%esp),%ecx 316 movl %eax,%ebx 317 movl %ebp,28(%esp,%ecx,4) 318 jl .L013sqr 319 movl (%esi,%ecx,4),%eax 320 movl %edx,%ebp 321 mull %edi 322 addl %ebp,%eax 323 movl 20(%esp),%edi 324 adcl $0,%edx 325 movl 16(%esp),%esi 326 leal (%ebx,%eax,2),%ebp 327 imull 32(%esp),%edi 328 shrl $31,%eax 329 movl %ebp,32(%esp,%ecx,4) 330 leal (%eax,%edx,2),%ebp 331 movl (%esi),%eax 332 shrl $31,%edx 333 movl %ebp,36(%esp,%ecx,4) 334 movl %edx,40(%esp,%ecx,4) 335 mull %edi 336 addl 32(%esp),%eax 337 movl %ecx,%ebx 338 adcl $0,%edx 339 movl 4(%esi),%eax 340 movl $1,%ecx 341.align 16 342.L0143rdmadd: 343 movl %edx,%ebp 344 mull %edi 345 addl 32(%esp,%ecx,4),%ebp 346 adcl $0,%edx 347 addl %eax,%ebp 348 movl 4(%esi,%ecx,4),%eax 349 adcl $0,%edx 350 movl %ebp,28(%esp,%ecx,4) 351 movl %edx,%ebp 352 mull %edi 353 addl 36(%esp,%ecx,4),%ebp 354 leal 2(%ecx),%ecx 355 adcl $0,%edx 356 addl %eax,%ebp 357 movl (%esi,%ecx,4),%eax 358 adcl $0,%edx 359 cmpl %ebx,%ecx 360 movl %ebp,24(%esp,%ecx,4) 361 jl .L0143rdmadd 362 movl %edx,%ebp 363 mull %edi 364 addl 32(%esp,%ebx,4),%ebp 365 adcl $0,%edx 366 addl %eax,%ebp 367 adcl $0,%edx 368 movl %ebp,28(%esp,%ebx,4) 369 movl 12(%esp),%ecx 370 xorl %eax,%eax 371 movl 8(%esp),%esi 372 addl 36(%esp,%ebx,4),%edx 373 adcl 40(%esp,%ebx,4),%eax 374 movl %edx,32(%esp,%ebx,4) 375 cmpl %ebx,%ecx 376 movl %eax,36(%esp,%ebx,4) 377 je .L008common_tail 378 movl 4(%esi,%ecx,4),%edi 379 leal 1(%ecx),%ecx 380 movl %edi,%eax 381 movl %ecx,12(%esp) 382 mull %edi 383 addl 32(%esp,%ecx,4),%eax 384 adcl $0,%edx 385 movl %eax,32(%esp,%ecx,4) 386 xorl %ebp,%ebp 387 cmpl %ebx,%ecx 388 leal 1(%ecx),%ecx 389 je .L015sqrlast 390 movl %edx,%ebx 391 shrl $1,%edx 392 andl $1,%ebx 393.align 16 394.L016sqradd: 395 movl (%esi,%ecx,4),%eax 396 movl %edx,%ebp 397 mull %edi 398 addl %ebp,%eax 399 leal (%eax,%eax,1),%ebp 400 adcl $0,%edx 401 shrl $31,%eax 402 addl 32(%esp,%ecx,4),%ebp 403 leal 1(%ecx),%ecx 404 adcl $0,%eax 405 addl %ebx,%ebp 406 adcl $0,%eax 407 cmpl (%esp),%ecx 408 movl %ebp,28(%esp,%ecx,4) 409 movl %eax,%ebx 410 jle .L016sqradd 411 movl %edx,%ebp 412 addl %edx,%edx 413 shrl $31,%ebp 414 addl %ebx,%edx 415 adcl $0,%ebp 416.L015sqrlast: 417 movl 20(%esp),%edi 418 movl 16(%esp),%esi 419 imull 32(%esp),%edi 420 addl 32(%esp,%ecx,4),%edx 421 movl (%esi),%eax 422 adcl $0,%ebp 423 movl %edx,32(%esp,%ecx,4) 424 movl %ebp,36(%esp,%ecx,4) 425 mull %edi 426 addl 32(%esp),%eax 427 leal -1(%ecx),%ebx 428 adcl $0,%edx 429 movl $1,%ecx 430 movl 4(%esi),%eax 431 jmp .L0143rdmadd 432.align 16 433.L008common_tail: 434 movl 16(%esp),%ebp 435 movl 4(%esp),%edi 436 leal 32(%esp),%esi 437 movl (%esi),%eax 438 movl %ebx,%ecx 439 xorl %edx,%edx 440.align 16 441.L017sub: 442 sbbl (%ebp,%edx,4),%eax 443 movl %eax,(%edi,%edx,4) 444 decl %ecx 445 movl 4(%esi,%edx,4),%eax 446 leal 1(%edx),%edx 447 jge .L017sub 448 sbbl $0,%eax 449 andl %eax,%esi 450 notl %eax 451 movl %edi,%ebp 452 andl %eax,%ebp 453 orl %ebp,%esi 454.align 16 455.L018copy: 456 movl (%esi,%ebx,4),%eax 457 movl %eax,(%edi,%ebx,4) 458 movl %ecx,32(%esp,%ebx,4) 459 decl %ebx 460 jge .L018copy 461 movl 24(%esp),%esp 462 movl $1,%eax 463.L000just_leave: 464 popl %edi 465 popl %esi 466 popl %ebx 467 popl %ebp 468 ret 469.size bn_mul_mont,.-.L_bn_mul_mont_begin 470.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 471.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 472.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 473.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 474.byte 111,114,103,62,0 475#endif 476