1.text 2.globl _bn_mul_mont 3.align 4 4_bn_mul_mont: 5L_bn_mul_mont_begin: 6 %ifdef __CET__ 7 8.byte 243,15,30,251 9 %endif 10 11 pushl %ebp 12 pushl %ebx 13 pushl %esi 14 pushl %edi 15 xorl %eax,%eax 16 movl 40(%esp),%edi 17 cmpl $4,%edi 18 jl L000just_leave 19 leal 20(%esp),%esi 20 leal 24(%esp),%edx 21 addl $2,%edi 22 negl %edi 23 leal -32(%esp,%edi,4),%ebp 24 negl %edi 25 movl %ebp,%eax 26 subl %edx,%eax 27 andl $2047,%eax 28 subl %eax,%ebp 29 xorl %ebp,%edx 30 andl $2048,%edx 31 xorl $2048,%edx 32 subl %edx,%ebp 33 andl $-64,%ebp 34 movl %esp,%eax 35 subl %ebp,%eax 36 andl $-4096,%eax 37 movl %esp,%edx 38 leal (%ebp,%eax,1),%esp 39 movl (%esp),%eax 40 cmpl %ebp,%esp 41 ja L001page_walk 42 jmp L002page_walk_done 43.align 4,0x90 44L001page_walk: 45 leal -4096(%esp),%esp 46 movl (%esp),%eax 47 cmpl %ebp,%esp 48 ja L001page_walk 49L002page_walk_done: 50 movl (%esi),%eax 51 movl 4(%esi),%ebx 52 movl 8(%esi),%ecx 53 movl 12(%esi),%ebp 54 movl 16(%esi),%esi 55 movl (%esi),%esi 56 movl %eax,4(%esp) 57 movl %ebx,8(%esp) 58 movl %ecx,12(%esp) 59 movl %ebp,16(%esp) 60 movl %esi,20(%esp) 61 leal -3(%edi),%ebx 62 movl %edx,24(%esp) 63 call L003PIC_me_up 64L003PIC_me_up: 65 popl %eax 66 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax 67 btl $26,(%eax) 68 jnc L004non_sse2 69 movl $-1,%eax 70 movd %eax,%mm7 71 movl 8(%esp),%esi 72 movl 12(%esp),%edi 73 movl 16(%esp),%ebp 74 xorl %edx,%edx 75 xorl %ecx,%ecx 76 movd (%edi),%mm4 77 movd (%esi),%mm5 78 movd (%ebp),%mm3 79 pmuludq %mm4,%mm5 80 movq %mm5,%mm2 81 movq %mm5,%mm0 82 pand %mm7,%mm0 83 pmuludq 20(%esp),%mm5 84 pmuludq %mm5,%mm3 85 paddq %mm0,%mm3 86 movd 4(%ebp),%mm1 87 movd 4(%esi),%mm0 88 psrlq $32,%mm2 89 psrlq $32,%mm3 90 incl %ecx 91.align 4,0x90 92L0051st: 93 pmuludq %mm4,%mm0 94 pmuludq %mm5,%mm1 95 paddq %mm0,%mm2 96 paddq %mm1,%mm3 97 movq %mm2,%mm0 98 pand %mm7,%mm0 99 movd 4(%ebp,%ecx,4),%mm1 100 paddq %mm0,%mm3 101 movd 4(%esi,%ecx,4),%mm0 102 psrlq $32,%mm2 103 movd %mm3,28(%esp,%ecx,4) 104 psrlq $32,%mm3 105 leal 1(%ecx),%ecx 106 cmpl %ebx,%ecx 107 jl L0051st 108 pmuludq %mm4,%mm0 109 pmuludq %mm5,%mm1 110 paddq %mm0,%mm2 111 paddq %mm1,%mm3 112 movq %mm2,%mm0 113 pand %mm7,%mm0 114 paddq %mm0,%mm3 115 movd %mm3,28(%esp,%ecx,4) 116 psrlq $32,%mm2 117 psrlq $32,%mm3 118 paddq %mm2,%mm3 119 movq %mm3,32(%esp,%ebx,4) 120 incl %edx 121L006outer: 122 xorl %ecx,%ecx 123 movd (%edi,%edx,4),%mm4 124 movd (%esi),%mm5 125 movd 32(%esp),%mm6 126 movd (%ebp),%mm3 127 pmuludq %mm4,%mm5 128 paddq %mm6,%mm5 129 movq %mm5,%mm0 130 movq %mm5,%mm2 131 pand %mm7,%mm0 132 pmuludq 20(%esp),%mm5 133 pmuludq %mm5,%mm3 134 paddq %mm0,%mm3 135 movd 36(%esp),%mm6 136 movd 4(%ebp),%mm1 137 movd 4(%esi),%mm0 138 psrlq $32,%mm2 139 psrlq $32,%mm3 140 paddq %mm6,%mm2 141 incl %ecx 142 decl %ebx 143L007inner: 144 pmuludq %mm4,%mm0 145 pmuludq %mm5,%mm1 146 paddq %mm0,%mm2 147 paddq %mm1,%mm3 148 movq %mm2,%mm0 149 movd 36(%esp,%ecx,4),%mm6 150 pand %mm7,%mm0 151 movd 4(%ebp,%ecx,4),%mm1 152 paddq %mm0,%mm3 153 movd 4(%esi,%ecx,4),%mm0 154 psrlq $32,%mm2 155 movd %mm3,28(%esp,%ecx,4) 156 psrlq $32,%mm3 157 paddq %mm6,%mm2 158 decl %ebx 159 leal 1(%ecx),%ecx 160 jnz L007inner 161 movl %ecx,%ebx 162 pmuludq %mm4,%mm0 163 pmuludq %mm5,%mm1 164 paddq %mm0,%mm2 165 paddq %mm1,%mm3 166 movq %mm2,%mm0 167 pand %mm7,%mm0 168 paddq %mm0,%mm3 169 movd %mm3,28(%esp,%ecx,4) 170 psrlq $32,%mm2 171 psrlq $32,%mm3 172 movd 36(%esp,%ebx,4),%mm6 173 paddq %mm2,%mm3 174 paddq %mm6,%mm3 175 movq %mm3,32(%esp,%ebx,4) 176 leal 1(%edx),%edx 177 cmpl %ebx,%edx 178 jle L006outer 179 emms 180 jmp L008common_tail 181.align 4,0x90 182L004non_sse2: 183 movl 8(%esp),%esi 184 leal 1(%ebx),%ebp 185 movl 12(%esp),%edi 186 xorl %ecx,%ecx 187 movl %esi,%edx 188 andl $1,%ebp 189 subl %edi,%edx 190 leal 4(%edi,%ebx,4),%eax 191 orl %edx,%ebp 192 movl (%edi),%edi 193 jz L009bn_sqr_mont 194 movl %eax,28(%esp) 195 movl (%esi),%eax 196 xorl %edx,%edx 197.align 4,0x90 198L010mull: 199 movl %edx,%ebp 200 mull %edi 201 addl %eax,%ebp 202 leal 1(%ecx),%ecx 203 adcl $0,%edx 204 movl (%esi,%ecx,4),%eax 205 cmpl %ebx,%ecx 206 movl %ebp,28(%esp,%ecx,4) 207 jl L010mull 208 movl %edx,%ebp 209 mull %edi 210 movl 20(%esp),%edi 211 addl %ebp,%eax 212 movl 16(%esp),%esi 213 adcl $0,%edx 214 imull 32(%esp),%edi 215 movl %eax,32(%esp,%ebx,4) 216 xorl %ecx,%ecx 217 movl %edx,36(%esp,%ebx,4) 218 movl %ecx,40(%esp,%ebx,4) 219 movl (%esi),%eax 220 mull %edi 221 addl 32(%esp),%eax 222 movl 4(%esi),%eax 223 adcl $0,%edx 224 incl %ecx 225 jmp L0112ndmadd 226.align 4,0x90 227L0121stmadd: 228 movl %edx,%ebp 229 mull %edi 230 addl 32(%esp,%ecx,4),%ebp 231 leal 1(%ecx),%ecx 232 adcl $0,%edx 233 addl %eax,%ebp 234 movl (%esi,%ecx,4),%eax 235 adcl $0,%edx 236 cmpl %ebx,%ecx 237 movl %ebp,28(%esp,%ecx,4) 238 jl L0121stmadd 239 movl %edx,%ebp 240 mull %edi 241 addl 32(%esp,%ebx,4),%eax 242 movl 20(%esp),%edi 243 adcl $0,%edx 244 movl 16(%esp),%esi 245 addl %eax,%ebp 246 adcl $0,%edx 247 imull 32(%esp),%edi 248 xorl %ecx,%ecx 249 addl 36(%esp,%ebx,4),%edx 250 movl %ebp,32(%esp,%ebx,4) 251 adcl $0,%ecx 252 movl (%esi),%eax 253 movl %edx,36(%esp,%ebx,4) 254 movl %ecx,40(%esp,%ebx,4) 255 mull %edi 256 addl 32(%esp),%eax 257 movl 4(%esi),%eax 258 adcl $0,%edx 259 movl $1,%ecx 260.align 4,0x90 261L0112ndmadd: 262 movl %edx,%ebp 263 mull %edi 264 addl 32(%esp,%ecx,4),%ebp 265 leal 1(%ecx),%ecx 266 adcl $0,%edx 267 addl %eax,%ebp 268 movl (%esi,%ecx,4),%eax 269 adcl $0,%edx 270 cmpl %ebx,%ecx 271 movl %ebp,24(%esp,%ecx,4) 272 jl L0112ndmadd 273 movl %edx,%ebp 274 mull %edi 275 addl 32(%esp,%ebx,4),%ebp 276 adcl $0,%edx 277 addl %eax,%ebp 278 adcl $0,%edx 279 movl %ebp,28(%esp,%ebx,4) 280 xorl %eax,%eax 281 movl 12(%esp),%ecx 282 addl 36(%esp,%ebx,4),%edx 283 adcl 40(%esp,%ebx,4),%eax 284 leal 4(%ecx),%ecx 285 movl %edx,32(%esp,%ebx,4) 286 cmpl 28(%esp),%ecx 287 movl %eax,36(%esp,%ebx,4) 288 je L008common_tail 289 movl (%ecx),%edi 290 movl 8(%esp),%esi 291 movl %ecx,12(%esp) 292 xorl %ecx,%ecx 293 xorl %edx,%edx 294 movl (%esi),%eax 295 jmp L0121stmadd 296.align 4,0x90 297L009bn_sqr_mont: 298 movl %ebx,(%esp) 299 movl %ecx,12(%esp) 300 movl %edi,%eax 301 mull %edi 302 movl %eax,32(%esp) 303 movl %edx,%ebx 304 shrl $1,%edx 305 andl $1,%ebx 306 incl %ecx 307.align 4,0x90 308L013sqr: 309 movl (%esi,%ecx,4),%eax 310 movl %edx,%ebp 311 mull %edi 312 addl %ebp,%eax 313 leal 1(%ecx),%ecx 314 adcl $0,%edx 315 leal (%ebx,%eax,2),%ebp 316 shrl $31,%eax 317 cmpl (%esp),%ecx 318 movl %eax,%ebx 319 movl %ebp,28(%esp,%ecx,4) 320 jl L013sqr 321 movl (%esi,%ecx,4),%eax 322 movl %edx,%ebp 323 mull %edi 324 addl %ebp,%eax 325 movl 20(%esp),%edi 326 adcl $0,%edx 327 movl 16(%esp),%esi 328 leal (%ebx,%eax,2),%ebp 329 imull 32(%esp),%edi 330 shrl $31,%eax 331 movl %ebp,32(%esp,%ecx,4) 332 leal (%eax,%edx,2),%ebp 333 movl (%esi),%eax 334 shrl $31,%edx 335 movl %ebp,36(%esp,%ecx,4) 336 movl %edx,40(%esp,%ecx,4) 337 mull %edi 338 addl 32(%esp),%eax 339 movl %ecx,%ebx 340 adcl $0,%edx 341 movl 4(%esi),%eax 342 movl $1,%ecx 343.align 4,0x90 344L0143rdmadd: 345 movl %edx,%ebp 346 mull %edi 347 addl 32(%esp,%ecx,4),%ebp 348 adcl $0,%edx 349 addl %eax,%ebp 350 movl 4(%esi,%ecx,4),%eax 351 adcl $0,%edx 352 movl %ebp,28(%esp,%ecx,4) 353 movl %edx,%ebp 354 mull %edi 355 addl 36(%esp,%ecx,4),%ebp 356 leal 2(%ecx),%ecx 357 adcl $0,%edx 358 addl %eax,%ebp 359 movl (%esi,%ecx,4),%eax 360 adcl $0,%edx 361 cmpl %ebx,%ecx 362 movl %ebp,24(%esp,%ecx,4) 363 jl L0143rdmadd 364 movl %edx,%ebp 365 mull %edi 366 addl 32(%esp,%ebx,4),%ebp 367 adcl $0,%edx 368 addl %eax,%ebp 369 adcl $0,%edx 370 movl %ebp,28(%esp,%ebx,4) 371 movl 12(%esp),%ecx 372 xorl %eax,%eax 373 movl 8(%esp),%esi 374 addl 36(%esp,%ebx,4),%edx 375 adcl 40(%esp,%ebx,4),%eax 376 movl %edx,32(%esp,%ebx,4) 377 cmpl %ebx,%ecx 378 movl %eax,36(%esp,%ebx,4) 379 je L008common_tail 380 movl 4(%esi,%ecx,4),%edi 381 leal 1(%ecx),%ecx 382 movl %edi,%eax 383 movl %ecx,12(%esp) 384 mull %edi 385 addl 32(%esp,%ecx,4),%eax 386 adcl $0,%edx 387 movl %eax,32(%esp,%ecx,4) 388 xorl %ebp,%ebp 389 cmpl %ebx,%ecx 390 leal 1(%ecx),%ecx 391 je L015sqrlast 392 movl %edx,%ebx 393 shrl $1,%edx 394 andl $1,%ebx 395.align 4,0x90 396L016sqradd: 397 movl (%esi,%ecx,4),%eax 398 movl %edx,%ebp 399 mull %edi 400 addl %ebp,%eax 401 leal (%eax,%eax,1),%ebp 402 adcl $0,%edx 403 shrl $31,%eax 404 addl 32(%esp,%ecx,4),%ebp 405 leal 1(%ecx),%ecx 406 adcl $0,%eax 407 addl %ebx,%ebp 408 adcl $0,%eax 409 cmpl (%esp),%ecx 410 movl %ebp,28(%esp,%ecx,4) 411 movl %eax,%ebx 412 jle L016sqradd 413 movl %edx,%ebp 414 addl %edx,%edx 415 shrl $31,%ebp 416 addl %ebx,%edx 417 adcl $0,%ebp 418L015sqrlast: 419 movl 20(%esp),%edi 420 movl 16(%esp),%esi 421 imull 32(%esp),%edi 422 addl 32(%esp,%ecx,4),%edx 423 movl (%esi),%eax 424 adcl $0,%ebp 425 movl %edx,32(%esp,%ecx,4) 426 movl %ebp,36(%esp,%ecx,4) 427 mull %edi 428 addl 32(%esp),%eax 429 leal -1(%ecx),%ebx 430 adcl $0,%edx 431 movl $1,%ecx 432 movl 4(%esi),%eax 433 jmp L0143rdmadd 434.align 4,0x90 435L008common_tail: 436 movl 16(%esp),%ebp 437 movl 4(%esp),%edi 438 leal 32(%esp),%esi 439 movl (%esi),%eax 440 movl %ebx,%ecx 441 xorl %edx,%edx 442.align 4,0x90 443L017sub: 444 sbbl (%ebp,%edx,4),%eax 445 movl %eax,(%edi,%edx,4) 446 decl %ecx 447 movl 4(%esi,%edx,4),%eax 448 leal 1(%edx),%edx 449 jge L017sub 450 sbbl $0,%eax 451 movl $-1,%edx 452 xorl %eax,%edx 453 jmp L018copy 454.align 4,0x90 455L018copy: 456 movl 32(%esp,%ebx,4),%esi 457 movl (%edi,%ebx,4),%ebp 458 movl %ecx,32(%esp,%ebx,4) 459 andl %eax,%esi 460 andl %edx,%ebp 461 orl %esi,%ebp 462 movl %ebp,(%edi,%ebx,4) 463 decl %ebx 464 jge L018copy 465 movl 24(%esp),%esp 466 movl $1,%eax 467L000just_leave: 468 popl %edi 469 popl %esi 470 popl %ebx 471 popl %ebp 472 ret 473.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 474.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 475.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 476.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 477.byte 111,114,103,62,0 478.section __IMPORT,__pointers,non_lazy_symbol_pointers 479L_OPENSSL_ia32cap_P$non_lazy_ptr: 480.indirect_symbol _OPENSSL_ia32cap_P 481.long 0 482.comm _OPENSSL_ia32cap_P,16,2 483