1.text 2.globl bn_mul_mont 3.type bn_mul_mont,@function 4.align 16 5bn_mul_mont: 6.L_bn_mul_mont_begin: 7 %ifdef __CET__ 8 9.byte 243,15,30,251 10 %endif 11 12 pushl %ebp 13 pushl %ebx 14 pushl %esi 15 pushl %edi 16 xorl %eax,%eax 17 movl 40(%esp),%edi 18 cmpl $4,%edi 19 jl .L000just_leave 20 leal 20(%esp),%esi 21 leal 24(%esp),%edx 22 addl $2,%edi 23 negl %edi 24 leal -32(%esp,%edi,4),%ebp 25 negl %edi 26 movl %ebp,%eax 27 subl %edx,%eax 28 andl $2047,%eax 29 subl %eax,%ebp 30 xorl %ebp,%edx 31 andl $2048,%edx 32 xorl $2048,%edx 33 subl %edx,%ebp 34 andl $-64,%ebp 35 movl %esp,%eax 36 subl %ebp,%eax 37 andl $-4096,%eax 38 movl %esp,%edx 39 leal (%ebp,%eax,1),%esp 40 movl (%esp),%eax 41 cmpl %ebp,%esp 42 ja .L001page_walk 43 jmp .L002page_walk_done 44.align 16 45.L001page_walk: 46 leal -4096(%esp),%esp 47 movl (%esp),%eax 48 cmpl %ebp,%esp 49 ja .L001page_walk 50.L002page_walk_done: 51 movl (%esi),%eax 52 movl 4(%esi),%ebx 53 movl 8(%esi),%ecx 54 movl 12(%esi),%ebp 55 movl 16(%esi),%esi 56 movl (%esi),%esi 57 movl %eax,4(%esp) 58 movl %ebx,8(%esp) 59 movl %ecx,12(%esp) 60 movl %ebp,16(%esp) 61 movl %esi,20(%esp) 62 leal -3(%edi),%ebx 63 movl %edx,24(%esp) 64 call .L003PIC_me_up 65.L003PIC_me_up: 66 popl %eax 67 leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax 68 btl $26,(%eax) 69 jnc .L004non_sse2 70 movl $-1,%eax 71 movd %eax,%mm7 72 movl 8(%esp),%esi 73 movl 12(%esp),%edi 74 movl 16(%esp),%ebp 75 xorl %edx,%edx 76 xorl %ecx,%ecx 77 movd (%edi),%mm4 78 movd (%esi),%mm5 79 movd (%ebp),%mm3 80 pmuludq %mm4,%mm5 81 movq %mm5,%mm2 82 movq %mm5,%mm0 83 pand %mm7,%mm0 84 pmuludq 20(%esp),%mm5 85 pmuludq %mm5,%mm3 86 paddq %mm0,%mm3 87 movd 4(%ebp),%mm1 88 movd 4(%esi),%mm0 89 psrlq $32,%mm2 90 psrlq $32,%mm3 91 incl %ecx 92.align 16 93.L0051st: 94 pmuludq %mm4,%mm0 95 pmuludq %mm5,%mm1 96 paddq %mm0,%mm2 97 paddq %mm1,%mm3 98 movq %mm2,%mm0 99 pand %mm7,%mm0 100 movd 4(%ebp,%ecx,4),%mm1 101 paddq %mm0,%mm3 102 movd 4(%esi,%ecx,4),%mm0 103 psrlq $32,%mm2 104 movd %mm3,28(%esp,%ecx,4) 105 psrlq $32,%mm3 106 leal 1(%ecx),%ecx 107 cmpl %ebx,%ecx 108 jl .L0051st 109 pmuludq %mm4,%mm0 110 pmuludq %mm5,%mm1 111 paddq %mm0,%mm2 112 paddq %mm1,%mm3 113 movq %mm2,%mm0 114 pand %mm7,%mm0 115 paddq %mm0,%mm3 116 movd %mm3,28(%esp,%ecx,4) 117 psrlq $32,%mm2 118 psrlq $32,%mm3 119 paddq %mm2,%mm3 120 movq %mm3,32(%esp,%ebx,4) 121 incl %edx 122.L006outer: 123 xorl %ecx,%ecx 124 movd (%edi,%edx,4),%mm4 125 movd (%esi),%mm5 126 movd 32(%esp),%mm6 127 movd (%ebp),%mm3 128 pmuludq %mm4,%mm5 129 paddq %mm6,%mm5 130 movq %mm5,%mm0 131 movq %mm5,%mm2 132 pand %mm7,%mm0 133 pmuludq 20(%esp),%mm5 134 pmuludq %mm5,%mm3 135 paddq %mm0,%mm3 136 movd 36(%esp),%mm6 137 movd 4(%ebp),%mm1 138 movd 4(%esi),%mm0 139 psrlq $32,%mm2 140 psrlq $32,%mm3 141 paddq %mm6,%mm2 142 incl %ecx 143 decl %ebx 144.L007inner: 145 pmuludq %mm4,%mm0 146 pmuludq %mm5,%mm1 147 paddq %mm0,%mm2 148 paddq %mm1,%mm3 149 movq %mm2,%mm0 150 movd 36(%esp,%ecx,4),%mm6 151 pand %mm7,%mm0 152 movd 4(%ebp,%ecx,4),%mm1 153 paddq %mm0,%mm3 154 movd 4(%esi,%ecx,4),%mm0 155 psrlq $32,%mm2 156 movd %mm3,28(%esp,%ecx,4) 157 psrlq $32,%mm3 158 paddq %mm6,%mm2 159 decl %ebx 160 leal 1(%ecx),%ecx 161 jnz .L007inner 162 movl %ecx,%ebx 163 pmuludq %mm4,%mm0 164 pmuludq %mm5,%mm1 165 paddq %mm0,%mm2 166 paddq %mm1,%mm3 167 movq %mm2,%mm0 168 pand %mm7,%mm0 169 paddq %mm0,%mm3 170 movd %mm3,28(%esp,%ecx,4) 171 psrlq $32,%mm2 172 psrlq $32,%mm3 173 movd 36(%esp,%ebx,4),%mm6 174 paddq %mm2,%mm3 175 paddq %mm6,%mm3 176 movq %mm3,32(%esp,%ebx,4) 177 leal 1(%edx),%edx 178 cmpl %ebx,%edx 179 jle .L006outer 180 emms 181 jmp .L008common_tail 182.align 16 183.L004non_sse2: 184 movl 8(%esp),%esi 185 leal 1(%ebx),%ebp 186 movl 12(%esp),%edi 187 xorl %ecx,%ecx 188 movl %esi,%edx 189 andl $1,%ebp 190 subl %edi,%edx 191 leal 4(%edi,%ebx,4),%eax 192 orl %edx,%ebp 193 movl (%edi),%edi 194 jz .L009bn_sqr_mont 195 movl %eax,28(%esp) 196 movl (%esi),%eax 197 xorl %edx,%edx 198.align 16 199.L010mull: 200 movl %edx,%ebp 201 mull %edi 202 addl %eax,%ebp 203 leal 1(%ecx),%ecx 204 adcl $0,%edx 205 movl (%esi,%ecx,4),%eax 206 cmpl %ebx,%ecx 207 movl %ebp,28(%esp,%ecx,4) 208 jl .L010mull 209 movl %edx,%ebp 210 mull %edi 211 movl 20(%esp),%edi 212 addl %ebp,%eax 213 movl 16(%esp),%esi 214 adcl $0,%edx 215 imull 32(%esp),%edi 216 movl %eax,32(%esp,%ebx,4) 217 xorl %ecx,%ecx 218 movl %edx,36(%esp,%ebx,4) 219 movl %ecx,40(%esp,%ebx,4) 220 movl (%esi),%eax 221 mull %edi 222 addl 32(%esp),%eax 223 movl 4(%esi),%eax 224 adcl $0,%edx 225 incl %ecx 226 jmp .L0112ndmadd 227.align 16 228.L0121stmadd: 229 movl %edx,%ebp 230 mull %edi 231 addl 32(%esp,%ecx,4),%ebp 232 leal 1(%ecx),%ecx 233 adcl $0,%edx 234 addl %eax,%ebp 235 movl (%esi,%ecx,4),%eax 236 adcl $0,%edx 237 cmpl %ebx,%ecx 238 movl %ebp,28(%esp,%ecx,4) 239 jl .L0121stmadd 240 movl %edx,%ebp 241 mull %edi 242 addl 32(%esp,%ebx,4),%eax 243 movl 20(%esp),%edi 244 adcl $0,%edx 245 movl 16(%esp),%esi 246 addl %eax,%ebp 247 adcl $0,%edx 248 imull 32(%esp),%edi 249 xorl %ecx,%ecx 250 addl 36(%esp,%ebx,4),%edx 251 movl %ebp,32(%esp,%ebx,4) 252 adcl $0,%ecx 253 movl (%esi),%eax 254 movl %edx,36(%esp,%ebx,4) 255 movl %ecx,40(%esp,%ebx,4) 256 mull %edi 257 addl 32(%esp),%eax 258 movl 4(%esi),%eax 259 adcl $0,%edx 260 movl $1,%ecx 261.align 16 262.L0112ndmadd: 263 movl %edx,%ebp 264 mull %edi 265 addl 32(%esp,%ecx,4),%ebp 266 leal 1(%ecx),%ecx 267 adcl $0,%edx 268 addl %eax,%ebp 269 movl (%esi,%ecx,4),%eax 270 adcl $0,%edx 271 cmpl %ebx,%ecx 272 movl %ebp,24(%esp,%ecx,4) 273 jl .L0112ndmadd 274 movl %edx,%ebp 275 mull %edi 276 addl 32(%esp,%ebx,4),%ebp 277 adcl $0,%edx 278 addl %eax,%ebp 279 adcl $0,%edx 280 movl %ebp,28(%esp,%ebx,4) 281 xorl %eax,%eax 282 movl 12(%esp),%ecx 283 addl 36(%esp,%ebx,4),%edx 284 adcl 40(%esp,%ebx,4),%eax 285 leal 4(%ecx),%ecx 286 movl %edx,32(%esp,%ebx,4) 287 cmpl 28(%esp),%ecx 288 movl %eax,36(%esp,%ebx,4) 289 je .L008common_tail 290 movl (%ecx),%edi 291 movl 8(%esp),%esi 292 movl %ecx,12(%esp) 293 xorl %ecx,%ecx 294 xorl %edx,%edx 295 movl (%esi),%eax 296 jmp .L0121stmadd 297.align 16 298.L009bn_sqr_mont: 299 movl %ebx,(%esp) 300 movl %ecx,12(%esp) 301 movl %edi,%eax 302 mull %edi 303 movl %eax,32(%esp) 304 movl %edx,%ebx 305 shrl $1,%edx 306 andl $1,%ebx 307 incl %ecx 308.align 16 309.L013sqr: 310 movl (%esi,%ecx,4),%eax 311 movl %edx,%ebp 312 mull %edi 313 addl %ebp,%eax 314 leal 1(%ecx),%ecx 315 adcl $0,%edx 316 leal (%ebx,%eax,2),%ebp 317 shrl $31,%eax 318 cmpl (%esp),%ecx 319 movl %eax,%ebx 320 movl %ebp,28(%esp,%ecx,4) 321 jl .L013sqr 322 movl (%esi,%ecx,4),%eax 323 movl %edx,%ebp 324 mull %edi 325 addl %ebp,%eax 326 movl 20(%esp),%edi 327 adcl $0,%edx 328 movl 16(%esp),%esi 329 leal (%ebx,%eax,2),%ebp 330 imull 32(%esp),%edi 331 shrl $31,%eax 332 movl %ebp,32(%esp,%ecx,4) 333 leal (%eax,%edx,2),%ebp 334 movl (%esi),%eax 335 shrl $31,%edx 336 movl %ebp,36(%esp,%ecx,4) 337 movl %edx,40(%esp,%ecx,4) 338 mull %edi 339 addl 32(%esp),%eax 340 movl %ecx,%ebx 341 adcl $0,%edx 342 movl 4(%esi),%eax 343 movl $1,%ecx 344.align 16 345.L0143rdmadd: 346 movl %edx,%ebp 347 mull %edi 348 addl 32(%esp,%ecx,4),%ebp 349 adcl $0,%edx 350 addl %eax,%ebp 351 movl 4(%esi,%ecx,4),%eax 352 adcl $0,%edx 353 movl %ebp,28(%esp,%ecx,4) 354 movl %edx,%ebp 355 mull %edi 356 addl 36(%esp,%ecx,4),%ebp 357 leal 2(%ecx),%ecx 358 adcl $0,%edx 359 addl %eax,%ebp 360 movl (%esi,%ecx,4),%eax 361 adcl $0,%edx 362 cmpl %ebx,%ecx 363 movl %ebp,24(%esp,%ecx,4) 364 jl .L0143rdmadd 365 movl %edx,%ebp 366 mull %edi 367 addl 32(%esp,%ebx,4),%ebp 368 adcl $0,%edx 369 addl %eax,%ebp 370 adcl $0,%edx 371 movl %ebp,28(%esp,%ebx,4) 372 movl 12(%esp),%ecx 373 xorl %eax,%eax 374 movl 8(%esp),%esi 375 addl 36(%esp,%ebx,4),%edx 376 adcl 40(%esp,%ebx,4),%eax 377 movl %edx,32(%esp,%ebx,4) 378 cmpl %ebx,%ecx 379 movl %eax,36(%esp,%ebx,4) 380 je .L008common_tail 381 movl 4(%esi,%ecx,4),%edi 382 leal 1(%ecx),%ecx 383 movl %edi,%eax 384 movl %ecx,12(%esp) 385 mull %edi 386 addl 32(%esp,%ecx,4),%eax 387 adcl $0,%edx 388 movl %eax,32(%esp,%ecx,4) 389 xorl %ebp,%ebp 390 cmpl %ebx,%ecx 391 leal 1(%ecx),%ecx 392 je .L015sqrlast 393 movl %edx,%ebx 394 shrl $1,%edx 395 andl $1,%ebx 396.align 16 397.L016sqradd: 398 movl (%esi,%ecx,4),%eax 399 movl %edx,%ebp 400 mull %edi 401 addl %ebp,%eax 402 leal (%eax,%eax,1),%ebp 403 adcl $0,%edx 404 shrl $31,%eax 405 addl 32(%esp,%ecx,4),%ebp 406 leal 1(%ecx),%ecx 407 adcl $0,%eax 408 addl %ebx,%ebp 409 adcl $0,%eax 410 cmpl (%esp),%ecx 411 movl %ebp,28(%esp,%ecx,4) 412 movl %eax,%ebx 413 jle .L016sqradd 414 movl %edx,%ebp 415 addl %edx,%edx 416 shrl $31,%ebp 417 addl %ebx,%edx 418 adcl $0,%ebp 419.L015sqrlast: 420 movl 20(%esp),%edi 421 movl 16(%esp),%esi 422 imull 32(%esp),%edi 423 addl 32(%esp,%ecx,4),%edx 424 movl (%esi),%eax 425 adcl $0,%ebp 426 movl %edx,32(%esp,%ecx,4) 427 movl %ebp,36(%esp,%ecx,4) 428 mull %edi 429 addl 32(%esp),%eax 430 leal -1(%ecx),%ebx 431 adcl $0,%edx 432 movl $1,%ecx 433 movl 4(%esi),%eax 434 jmp .L0143rdmadd 435.align 16 436.L008common_tail: 437 movl 16(%esp),%ebp 438 movl 4(%esp),%edi 439 leal 32(%esp),%esi 440 movl (%esi),%eax 441 movl %ebx,%ecx 442 xorl %edx,%edx 443.align 16 444.L017sub: 445 sbbl (%ebp,%edx,4),%eax 446 movl %eax,(%edi,%edx,4) 447 decl %ecx 448 movl 4(%esi,%edx,4),%eax 449 leal 1(%edx),%edx 450 jge .L017sub 451 sbbl $0,%eax 452 movl $-1,%edx 453 xorl %eax,%edx 454 jmp .L018copy 455.align 16 456.L018copy: 457 movl 32(%esp,%ebx,4),%esi 458 movl (%edi,%ebx,4),%ebp 459 movl %ecx,32(%esp,%ebx,4) 460 andl %eax,%esi 461 andl %edx,%ebp 462 orl %esi,%ebp 463 movl %ebp,(%edi,%ebx,4) 464 decl %ebx 465 jge .L018copy 466 movl 24(%esp),%esp 467 movl $1,%eax 468.L000just_leave: 469 popl %edi 470 popl %esi 471 popl %ebx 472 popl %ebp 473 ret 474.size bn_mul_mont,.-.L_bn_mul_mont_begin 475.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 476.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 477.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 478.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 479.byte 111,114,103,62,0 480.comm OPENSSL_ia32cap_P,16,4 481 482 .section ".note.gnu.property", "a" 483 .p2align 2 484 .long 1f - 0f 485 .long 4f - 1f 486 .long 5 4870: 488 .asciz "GNU" 4891: 490 .p2align 2 491 .long 0xc0000002 492 .long 3f - 2f 4932: 494 .long 3 4953: 496 .p2align 2 4974: 498