1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13 14.extern OPENSSL_ia32cap_P 15.hidden OPENSSL_ia32cap_P 16 17.globl bn_mul_mont_gather5 18.hidden bn_mul_mont_gather5 19.type bn_mul_mont_gather5,@function 20.align 64 21bn_mul_mont_gather5: 22.cfi_startproc 23 movl %r9d,%r9d 24 movq %rsp,%rax 25.cfi_def_cfa_register %rax 26 testl $7,%r9d 27 jnz .Lmul_enter 28 leaq OPENSSL_ia32cap_P(%rip),%r11 29 movl 8(%r11),%r11d 30 jmp .Lmul4x_enter 31 32.align 16 33.Lmul_enter: 34 movd 8(%rsp),%xmm5 35 pushq %rbx 36.cfi_offset %rbx,-16 37 pushq %rbp 38.cfi_offset %rbp,-24 39 pushq %r12 40.cfi_offset %r12,-32 41 pushq %r13 42.cfi_offset %r13,-40 43 pushq %r14 44.cfi_offset %r14,-48 45 pushq %r15 46.cfi_offset %r15,-56 47 48 negq %r9 49 movq %rsp,%r11 50 leaq -280(%rsp,%r9,8),%r10 51 negq %r9 52 andq $-1024,%r10 53 54 55 56 57 58 59 60 61 62 subq %r10,%r11 63 andq $-4096,%r11 64 leaq (%r10,%r11,1),%rsp 65 movq (%rsp),%r11 66 cmpq %r10,%rsp 67 ja .Lmul_page_walk 68 jmp .Lmul_page_walk_done 69 70.Lmul_page_walk: 71 leaq -4096(%rsp),%rsp 72 movq (%rsp),%r11 73 cmpq %r10,%rsp 74 ja .Lmul_page_walk 75.Lmul_page_walk_done: 76 77 leaq .Linc(%rip),%r10 78 movq %rax,8(%rsp,%r9,8) 79.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 80.Lmul_body: 81 82 leaq 128(%rdx),%r12 83 movdqa 0(%r10),%xmm0 84 movdqa 16(%r10),%xmm1 85 leaq 24-112(%rsp,%r9,8),%r10 86 andq $-16,%r10 87 88 pshufd $0,%xmm5,%xmm5 89 movdqa %xmm1,%xmm4 90 movdqa %xmm1,%xmm2 91 paddd %xmm0,%xmm1 92 pcmpeqd %xmm5,%xmm0 93.byte 0x67 94 movdqa %xmm4,%xmm3 95 paddd %xmm1,%xmm2 96 pcmpeqd %xmm5,%xmm1 97 movdqa %xmm0,112(%r10) 98 movdqa %xmm4,%xmm0 99 100 paddd %xmm2,%xmm3 101 pcmpeqd %xmm5,%xmm2 102 movdqa %xmm1,128(%r10) 103 movdqa %xmm4,%xmm1 104 105 paddd %xmm3,%xmm0 106 pcmpeqd %xmm5,%xmm3 107 movdqa %xmm2,144(%r10) 108 movdqa %xmm4,%xmm2 109 110 paddd %xmm0,%xmm1 111 pcmpeqd %xmm5,%xmm0 112 movdqa %xmm3,160(%r10) 113 movdqa %xmm4,%xmm3 114 paddd %xmm1,%xmm2 115 pcmpeqd %xmm5,%xmm1 116 movdqa %xmm0,176(%r10) 117 movdqa %xmm4,%xmm0 118 119 paddd %xmm2,%xmm3 120 pcmpeqd %xmm5,%xmm2 121 movdqa %xmm1,192(%r10) 122 movdqa %xmm4,%xmm1 123 124 paddd %xmm3,%xmm0 125 pcmpeqd %xmm5,%xmm3 126 movdqa %xmm2,208(%r10) 127 movdqa %xmm4,%xmm2 128 129 paddd %xmm0,%xmm1 130 pcmpeqd %xmm5,%xmm0 131 movdqa %xmm3,224(%r10) 132 movdqa %xmm4,%xmm3 133 paddd %xmm1,%xmm2 134 pcmpeqd %xmm5,%xmm1 135 movdqa %xmm0,240(%r10) 136 movdqa %xmm4,%xmm0 137 138 paddd %xmm2,%xmm3 139 pcmpeqd %xmm5,%xmm2 140 movdqa %xmm1,256(%r10) 141 movdqa %xmm4,%xmm1 142 143 paddd %xmm3,%xmm0 144 pcmpeqd %xmm5,%xmm3 145 movdqa %xmm2,272(%r10) 146 movdqa %xmm4,%xmm2 147 148 paddd %xmm0,%xmm1 149 pcmpeqd %xmm5,%xmm0 150 movdqa %xmm3,288(%r10) 151 movdqa %xmm4,%xmm3 152 paddd %xmm1,%xmm2 153 pcmpeqd %xmm5,%xmm1 154 movdqa %xmm0,304(%r10) 155 156 paddd %xmm2,%xmm3 157.byte 0x67 158 pcmpeqd %xmm5,%xmm2 159 movdqa %xmm1,320(%r10) 160 161 pcmpeqd %xmm5,%xmm3 162 movdqa %xmm2,336(%r10) 163 pand 64(%r12),%xmm0 164 165 pand 80(%r12),%xmm1 166 pand 96(%r12),%xmm2 167 movdqa %xmm3,352(%r10) 168 pand 112(%r12),%xmm3 169 por %xmm2,%xmm0 170 por %xmm3,%xmm1 171 movdqa -128(%r12),%xmm4 172 movdqa -112(%r12),%xmm5 173 movdqa -96(%r12),%xmm2 174 pand 112(%r10),%xmm4 175 movdqa -80(%r12),%xmm3 176 pand 128(%r10),%xmm5 177 por %xmm4,%xmm0 178 pand 144(%r10),%xmm2 179 por %xmm5,%xmm1 180 pand 160(%r10),%xmm3 181 por %xmm2,%xmm0 182 por %xmm3,%xmm1 183 movdqa -64(%r12),%xmm4 184 movdqa -48(%r12),%xmm5 185 movdqa -32(%r12),%xmm2 186 pand 176(%r10),%xmm4 187 movdqa -16(%r12),%xmm3 188 pand 192(%r10),%xmm5 189 por %xmm4,%xmm0 190 pand 208(%r10),%xmm2 191 por %xmm5,%xmm1 192 pand 224(%r10),%xmm3 193 por %xmm2,%xmm0 194 por %xmm3,%xmm1 195 movdqa 0(%r12),%xmm4 196 movdqa 16(%r12),%xmm5 197 movdqa 32(%r12),%xmm2 198 pand 240(%r10),%xmm4 199 movdqa 48(%r12),%xmm3 200 pand 256(%r10),%xmm5 201 por %xmm4,%xmm0 202 pand 272(%r10),%xmm2 203 por %xmm5,%xmm1 204 pand 288(%r10),%xmm3 205 por %xmm2,%xmm0 206 por %xmm3,%xmm1 207 por %xmm1,%xmm0 208 pshufd $0x4e,%xmm0,%xmm1 209 por %xmm1,%xmm0 210 leaq 256(%r12),%r12 211.byte 102,72,15,126,195 212 213 movq (%r8),%r8 214 movq (%rsi),%rax 215 216 xorq %r14,%r14 217 xorq %r15,%r15 218 219 movq %r8,%rbp 220 mulq %rbx 221 movq %rax,%r10 222 movq (%rcx),%rax 223 224 imulq %r10,%rbp 225 movq %rdx,%r11 226 227 mulq %rbp 228 addq %rax,%r10 229 movq 8(%rsi),%rax 230 adcq $0,%rdx 231 movq %rdx,%r13 232 233 leaq 1(%r15),%r15 234 jmp .L1st_enter 235 236.align 16 237.L1st: 238 addq %rax,%r13 239 movq (%rsi,%r15,8),%rax 240 adcq $0,%rdx 241 addq %r11,%r13 242 movq %r10,%r11 243 adcq $0,%rdx 244 movq %r13,-16(%rsp,%r15,8) 245 movq %rdx,%r13 246 247.L1st_enter: 248 mulq %rbx 249 addq %rax,%r11 250 movq (%rcx,%r15,8),%rax 251 adcq $0,%rdx 252 leaq 1(%r15),%r15 253 movq %rdx,%r10 254 255 mulq %rbp 256 cmpq %r9,%r15 257 jne .L1st 258 259 260 addq %rax,%r13 261 adcq $0,%rdx 262 addq %r11,%r13 263 adcq $0,%rdx 264 movq %r13,-16(%rsp,%r9,8) 265 movq %rdx,%r13 266 movq %r10,%r11 267 268 xorq %rdx,%rdx 269 addq %r11,%r13 270 adcq $0,%rdx 271 movq %r13,-8(%rsp,%r9,8) 272 movq %rdx,(%rsp,%r9,8) 273 274 leaq 1(%r14),%r14 275 jmp .Louter 276.align 16 277.Louter: 278 leaq 24+128(%rsp,%r9,8),%rdx 279 andq $-16,%rdx 280 pxor %xmm4,%xmm4 281 pxor %xmm5,%xmm5 282 movdqa -128(%r12),%xmm0 283 movdqa -112(%r12),%xmm1 284 movdqa -96(%r12),%xmm2 285 movdqa -80(%r12),%xmm3 286 pand -128(%rdx),%xmm0 287 pand -112(%rdx),%xmm1 288 por %xmm0,%xmm4 289 pand -96(%rdx),%xmm2 290 por %xmm1,%xmm5 291 pand -80(%rdx),%xmm3 292 por %xmm2,%xmm4 293 por %xmm3,%xmm5 294 movdqa -64(%r12),%xmm0 295 movdqa -48(%r12),%xmm1 296 movdqa -32(%r12),%xmm2 297 movdqa -16(%r12),%xmm3 298 pand -64(%rdx),%xmm0 299 pand -48(%rdx),%xmm1 300 por %xmm0,%xmm4 301 pand -32(%rdx),%xmm2 302 por %xmm1,%xmm5 303 pand -16(%rdx),%xmm3 304 por %xmm2,%xmm4 305 por %xmm3,%xmm5 306 movdqa 0(%r12),%xmm0 307 movdqa 16(%r12),%xmm1 308 movdqa 32(%r12),%xmm2 309 movdqa 48(%r12),%xmm3 310 pand 0(%rdx),%xmm0 311 pand 16(%rdx),%xmm1 312 por %xmm0,%xmm4 313 pand 32(%rdx),%xmm2 314 por %xmm1,%xmm5 315 pand 48(%rdx),%xmm3 316 por %xmm2,%xmm4 317 por %xmm3,%xmm5 318 movdqa 64(%r12),%xmm0 319 movdqa 80(%r12),%xmm1 320 movdqa 96(%r12),%xmm2 321 movdqa 112(%r12),%xmm3 322 pand 64(%rdx),%xmm0 323 pand 80(%rdx),%xmm1 324 por %xmm0,%xmm4 325 pand 96(%rdx),%xmm2 326 por %xmm1,%xmm5 327 pand 112(%rdx),%xmm3 328 por %xmm2,%xmm4 329 por %xmm3,%xmm5 330 por %xmm5,%xmm4 331 pshufd $0x4e,%xmm4,%xmm0 332 por %xmm4,%xmm0 333 leaq 256(%r12),%r12 334 335 movq (%rsi),%rax 336.byte 102,72,15,126,195 337 338 xorq %r15,%r15 339 movq %r8,%rbp 340 movq (%rsp),%r10 341 342 mulq %rbx 343 addq %rax,%r10 344 movq (%rcx),%rax 345 adcq $0,%rdx 346 347 imulq %r10,%rbp 348 movq %rdx,%r11 349 350 mulq %rbp 351 addq %rax,%r10 352 movq 8(%rsi),%rax 353 adcq $0,%rdx 354 movq 8(%rsp),%r10 355 movq %rdx,%r13 356 357 leaq 1(%r15),%r15 358 jmp .Linner_enter 359 360.align 16 361.Linner: 362 addq %rax,%r13 363 movq (%rsi,%r15,8),%rax 364 adcq $0,%rdx 365 addq %r10,%r13 366 movq (%rsp,%r15,8),%r10 367 adcq $0,%rdx 368 movq %r13,-16(%rsp,%r15,8) 369 movq %rdx,%r13 370 371.Linner_enter: 372 mulq %rbx 373 addq %rax,%r11 374 movq (%rcx,%r15,8),%rax 375 adcq $0,%rdx 376 addq %r11,%r10 377 movq %rdx,%r11 378 adcq $0,%r11 379 leaq 1(%r15),%r15 380 381 mulq %rbp 382 cmpq %r9,%r15 383 jne .Linner 384 385 addq %rax,%r13 386 adcq $0,%rdx 387 addq %r10,%r13 388 movq (%rsp,%r9,8),%r10 389 adcq $0,%rdx 390 movq %r13,-16(%rsp,%r9,8) 391 movq %rdx,%r13 392 393 xorq %rdx,%rdx 394 addq %r11,%r13 395 adcq $0,%rdx 396 addq %r10,%r13 397 adcq $0,%rdx 398 movq %r13,-8(%rsp,%r9,8) 399 movq %rdx,(%rsp,%r9,8) 400 401 leaq 1(%r14),%r14 402 cmpq %r9,%r14 403 jb .Louter 404 405 xorq %r14,%r14 406 movq (%rsp),%rax 407 leaq (%rsp),%rsi 408 movq %r9,%r15 409 jmp .Lsub 410.align 16 411.Lsub: sbbq (%rcx,%r14,8),%rax 412 movq %rax,(%rdi,%r14,8) 413 movq 8(%rsi,%r14,8),%rax 414 leaq 1(%r14),%r14 415 decq %r15 416 jnz .Lsub 417 418 sbbq $0,%rax 419 movq $-1,%rbx 420 xorq %rax,%rbx 421 xorq %r14,%r14 422 movq %r9,%r15 423 424.Lcopy: 425 movq (%rdi,%r14,8),%rcx 426 movq (%rsp,%r14,8),%rdx 427 andq %rbx,%rcx 428 andq %rax,%rdx 429 movq %r14,(%rsp,%r14,8) 430 orq %rcx,%rdx 431 movq %rdx,(%rdi,%r14,8) 432 leaq 1(%r14),%r14 433 subq $1,%r15 434 jnz .Lcopy 435 436 movq 8(%rsp,%r9,8),%rsi 437.cfi_def_cfa %rsi,8 438 movq $1,%rax 439 440 movq -48(%rsi),%r15 441.cfi_restore %r15 442 movq -40(%rsi),%r14 443.cfi_restore %r14 444 movq -32(%rsi),%r13 445.cfi_restore %r13 446 movq -24(%rsi),%r12 447.cfi_restore %r12 448 movq -16(%rsi),%rbp 449.cfi_restore %rbp 450 movq -8(%rsi),%rbx 451.cfi_restore %rbx 452 leaq (%rsi),%rsp 453.cfi_def_cfa_register %rsp 454.Lmul_epilogue: 455 .byte 0xf3,0xc3 456.cfi_endproc 457.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 458.type bn_mul4x_mont_gather5,@function 459.align 32 460bn_mul4x_mont_gather5: 461.cfi_startproc 462.byte 0x67 463 movq %rsp,%rax 464.cfi_def_cfa_register %rax 465.Lmul4x_enter: 466 andl $0x80108,%r11d 467 cmpl $0x80108,%r11d 468 je .Lmulx4x_enter 469 pushq %rbx 470.cfi_offset %rbx,-16 471 pushq %rbp 472.cfi_offset %rbp,-24 473 pushq %r12 474.cfi_offset %r12,-32 475 pushq %r13 476.cfi_offset %r13,-40 477 pushq %r14 478.cfi_offset %r14,-48 479 pushq %r15 480.cfi_offset %r15,-56 481.Lmul4x_prologue: 482 483.byte 0x67 484 shll $3,%r9d 485 leaq (%r9,%r9,2),%r10 486 negq %r9 487 488 489 490 491 492 493 494 495 496 497 leaq -320(%rsp,%r9,2),%r11 498 movq %rsp,%rbp 499 subq %rdi,%r11 500 andq $4095,%r11 501 cmpq %r11,%r10 502 jb .Lmul4xsp_alt 503 subq %r11,%rbp 504 leaq -320(%rbp,%r9,2),%rbp 505 jmp .Lmul4xsp_done 506 507.align 32 508.Lmul4xsp_alt: 509 leaq 4096-320(,%r9,2),%r10 510 leaq -320(%rbp,%r9,2),%rbp 511 subq %r10,%r11 512 movq $0,%r10 513 cmovcq %r10,%r11 514 subq %r11,%rbp 515.Lmul4xsp_done: 516 andq $-64,%rbp 517 movq %rsp,%r11 518 subq %rbp,%r11 519 andq $-4096,%r11 520 leaq (%r11,%rbp,1),%rsp 521 movq (%rsp),%r10 522 cmpq %rbp,%rsp 523 ja .Lmul4x_page_walk 524 jmp .Lmul4x_page_walk_done 525 526.Lmul4x_page_walk: 527 leaq -4096(%rsp),%rsp 528 movq (%rsp),%r10 529 cmpq %rbp,%rsp 530 ja .Lmul4x_page_walk 531.Lmul4x_page_walk_done: 532 533 negq %r9 534 535 movq %rax,40(%rsp) 536.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 537.Lmul4x_body: 538 539 call mul4x_internal 540 541 movq 40(%rsp),%rsi 542.cfi_def_cfa %rsi,8 543 movq $1,%rax 544 545 movq -48(%rsi),%r15 546.cfi_restore %r15 547 movq -40(%rsi),%r14 548.cfi_restore %r14 549 movq -32(%rsi),%r13 550.cfi_restore %r13 551 movq -24(%rsi),%r12 552.cfi_restore %r12 553 movq -16(%rsi),%rbp 554.cfi_restore %rbp 555 movq -8(%rsi),%rbx 556.cfi_restore %rbx 557 leaq (%rsi),%rsp 558.cfi_def_cfa_register %rsp 559.Lmul4x_epilogue: 560 .byte 0xf3,0xc3 561.cfi_endproc 562.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 563 564.type mul4x_internal,@function 565.align 32 566mul4x_internal: 567.cfi_startproc 568 shlq $5,%r9 569 movd 8(%rax),%xmm5 570 leaq .Linc(%rip),%rax 571 leaq 128(%rdx,%r9,1),%r13 572 shrq $5,%r9 573 movdqa 0(%rax),%xmm0 574 movdqa 16(%rax),%xmm1 575 leaq 88-112(%rsp,%r9,1),%r10 576 leaq 128(%rdx),%r12 577 578 pshufd $0,%xmm5,%xmm5 579 movdqa %xmm1,%xmm4 580.byte 0x67,0x67 581 movdqa %xmm1,%xmm2 582 paddd %xmm0,%xmm1 583 pcmpeqd %xmm5,%xmm0 584.byte 0x67 585 movdqa %xmm4,%xmm3 586 paddd %xmm1,%xmm2 587 pcmpeqd %xmm5,%xmm1 588 movdqa %xmm0,112(%r10) 589 movdqa %xmm4,%xmm0 590 591 paddd %xmm2,%xmm3 592 pcmpeqd %xmm5,%xmm2 593 movdqa %xmm1,128(%r10) 594 movdqa %xmm4,%xmm1 595 596 paddd %xmm3,%xmm0 597 pcmpeqd %xmm5,%xmm3 598 movdqa %xmm2,144(%r10) 599 movdqa %xmm4,%xmm2 600 601 paddd %xmm0,%xmm1 602 pcmpeqd %xmm5,%xmm0 603 movdqa %xmm3,160(%r10) 604 movdqa %xmm4,%xmm3 605 paddd %xmm1,%xmm2 606 pcmpeqd %xmm5,%xmm1 607 movdqa %xmm0,176(%r10) 608 movdqa %xmm4,%xmm0 609 610 paddd %xmm2,%xmm3 611 pcmpeqd %xmm5,%xmm2 612 movdqa %xmm1,192(%r10) 613 movdqa %xmm4,%xmm1 614 615 paddd %xmm3,%xmm0 616 pcmpeqd %xmm5,%xmm3 617 movdqa %xmm2,208(%r10) 618 movdqa %xmm4,%xmm2 619 620 paddd %xmm0,%xmm1 621 pcmpeqd %xmm5,%xmm0 622 movdqa %xmm3,224(%r10) 623 movdqa %xmm4,%xmm3 624 paddd %xmm1,%xmm2 625 pcmpeqd %xmm5,%xmm1 626 movdqa %xmm0,240(%r10) 627 movdqa %xmm4,%xmm0 628 629 paddd %xmm2,%xmm3 630 pcmpeqd %xmm5,%xmm2 631 movdqa %xmm1,256(%r10) 632 movdqa %xmm4,%xmm1 633 634 paddd %xmm3,%xmm0 635 pcmpeqd %xmm5,%xmm3 636 movdqa %xmm2,272(%r10) 637 movdqa %xmm4,%xmm2 638 639 paddd %xmm0,%xmm1 640 pcmpeqd %xmm5,%xmm0 641 movdqa %xmm3,288(%r10) 642 movdqa %xmm4,%xmm3 643 paddd %xmm1,%xmm2 644 pcmpeqd %xmm5,%xmm1 645 movdqa %xmm0,304(%r10) 646 647 paddd %xmm2,%xmm3 648.byte 0x67 649 pcmpeqd %xmm5,%xmm2 650 movdqa %xmm1,320(%r10) 651 652 pcmpeqd %xmm5,%xmm3 653 movdqa %xmm2,336(%r10) 654 pand 64(%r12),%xmm0 655 656 pand 80(%r12),%xmm1 657 pand 96(%r12),%xmm2 658 movdqa %xmm3,352(%r10) 659 pand 112(%r12),%xmm3 660 por %xmm2,%xmm0 661 por %xmm3,%xmm1 662 movdqa -128(%r12),%xmm4 663 movdqa -112(%r12),%xmm5 664 movdqa -96(%r12),%xmm2 665 pand 112(%r10),%xmm4 666 movdqa -80(%r12),%xmm3 667 pand 128(%r10),%xmm5 668 por %xmm4,%xmm0 669 pand 144(%r10),%xmm2 670 por %xmm5,%xmm1 671 pand 160(%r10),%xmm3 672 por %xmm2,%xmm0 673 por %xmm3,%xmm1 674 movdqa -64(%r12),%xmm4 675 movdqa -48(%r12),%xmm5 676 movdqa -32(%r12),%xmm2 677 pand 176(%r10),%xmm4 678 movdqa -16(%r12),%xmm3 679 pand 192(%r10),%xmm5 680 por %xmm4,%xmm0 681 pand 208(%r10),%xmm2 682 por %xmm5,%xmm1 683 pand 224(%r10),%xmm3 684 por %xmm2,%xmm0 685 por %xmm3,%xmm1 686 movdqa 0(%r12),%xmm4 687 movdqa 16(%r12),%xmm5 688 movdqa 32(%r12),%xmm2 689 pand 240(%r10),%xmm4 690 movdqa 48(%r12),%xmm3 691 pand 256(%r10),%xmm5 692 por %xmm4,%xmm0 693 pand 272(%r10),%xmm2 694 por %xmm5,%xmm1 695 pand 288(%r10),%xmm3 696 por %xmm2,%xmm0 697 por %xmm3,%xmm1 698 por %xmm1,%xmm0 699 pshufd $0x4e,%xmm0,%xmm1 700 por %xmm1,%xmm0 701 leaq 256(%r12),%r12 702.byte 102,72,15,126,195 703 704 movq %r13,16+8(%rsp) 705 movq %rdi,56+8(%rsp) 706 707 movq (%r8),%r8 708 movq (%rsi),%rax 709 leaq (%rsi,%r9,1),%rsi 710 negq %r9 711 712 movq %r8,%rbp 713 mulq %rbx 714 movq %rax,%r10 715 movq (%rcx),%rax 716 717 imulq %r10,%rbp 718 leaq 64+8(%rsp),%r14 719 movq %rdx,%r11 720 721 mulq %rbp 722 addq %rax,%r10 723 movq 8(%rsi,%r9,1),%rax 724 adcq $0,%rdx 725 movq %rdx,%rdi 726 727 mulq %rbx 728 addq %rax,%r11 729 movq 8(%rcx),%rax 730 adcq $0,%rdx 731 movq %rdx,%r10 732 733 mulq %rbp 734 addq %rax,%rdi 735 movq 16(%rsi,%r9,1),%rax 736 adcq $0,%rdx 737 addq %r11,%rdi 738 leaq 32(%r9),%r15 739 leaq 32(%rcx),%rcx 740 adcq $0,%rdx 741 movq %rdi,(%r14) 742 movq %rdx,%r13 743 jmp .L1st4x 744 745.align 32 746.L1st4x: 747 mulq %rbx 748 addq %rax,%r10 749 movq -16(%rcx),%rax 750 leaq 32(%r14),%r14 751 adcq $0,%rdx 752 movq %rdx,%r11 753 754 mulq %rbp 755 addq %rax,%r13 756 movq -8(%rsi,%r15,1),%rax 757 adcq $0,%rdx 758 addq %r10,%r13 759 adcq $0,%rdx 760 movq %r13,-24(%r14) 761 movq %rdx,%rdi 762 763 mulq %rbx 764 addq %rax,%r11 765 movq -8(%rcx),%rax 766 adcq $0,%rdx 767 movq %rdx,%r10 768 769 mulq %rbp 770 addq %rax,%rdi 771 movq (%rsi,%r15,1),%rax 772 adcq $0,%rdx 773 addq %r11,%rdi 774 adcq $0,%rdx 775 movq %rdi,-16(%r14) 776 movq %rdx,%r13 777 778 mulq %rbx 779 addq %rax,%r10 780 movq 0(%rcx),%rax 781 adcq $0,%rdx 782 movq %rdx,%r11 783 784 mulq %rbp 785 addq %rax,%r13 786 movq 8(%rsi,%r15,1),%rax 787 adcq $0,%rdx 788 addq %r10,%r13 789 adcq $0,%rdx 790 movq %r13,-8(%r14) 791 movq %rdx,%rdi 792 793 mulq %rbx 794 addq %rax,%r11 795 movq 8(%rcx),%rax 796 adcq $0,%rdx 797 movq %rdx,%r10 798 799 mulq %rbp 800 addq %rax,%rdi 801 movq 16(%rsi,%r15,1),%rax 802 adcq $0,%rdx 803 addq %r11,%rdi 804 leaq 32(%rcx),%rcx 805 adcq $0,%rdx 806 movq %rdi,(%r14) 807 movq %rdx,%r13 808 809 addq $32,%r15 810 jnz .L1st4x 811 812 mulq %rbx 813 addq %rax,%r10 814 movq -16(%rcx),%rax 815 leaq 32(%r14),%r14 816 adcq $0,%rdx 817 movq %rdx,%r11 818 819 mulq %rbp 820 addq %rax,%r13 821 movq -8(%rsi),%rax 822 adcq $0,%rdx 823 addq %r10,%r13 824 adcq $0,%rdx 825 movq %r13,-24(%r14) 826 movq %rdx,%rdi 827 828 mulq %rbx 829 addq %rax,%r11 830 movq -8(%rcx),%rax 831 adcq $0,%rdx 832 movq %rdx,%r10 833 834 mulq %rbp 835 addq %rax,%rdi 836 movq (%rsi,%r9,1),%rax 837 adcq $0,%rdx 838 addq %r11,%rdi 839 adcq $0,%rdx 840 movq %rdi,-16(%r14) 841 movq %rdx,%r13 842 843 leaq (%rcx,%r9,1),%rcx 844 845 xorq %rdi,%rdi 846 addq %r10,%r13 847 adcq $0,%rdi 848 movq %r13,-8(%r14) 849 850 jmp .Louter4x 851 852.align 32 853.Louter4x: 854 leaq 16+128(%r14),%rdx 855 pxor %xmm4,%xmm4 856 pxor %xmm5,%xmm5 857 movdqa -128(%r12),%xmm0 858 movdqa -112(%r12),%xmm1 859 movdqa -96(%r12),%xmm2 860 movdqa -80(%r12),%xmm3 861 pand -128(%rdx),%xmm0 862 pand -112(%rdx),%xmm1 863 por %xmm0,%xmm4 864 pand -96(%rdx),%xmm2 865 por %xmm1,%xmm5 866 pand -80(%rdx),%xmm3 867 por %xmm2,%xmm4 868 por %xmm3,%xmm5 869 movdqa -64(%r12),%xmm0 870 movdqa -48(%r12),%xmm1 871 movdqa -32(%r12),%xmm2 872 movdqa -16(%r12),%xmm3 873 pand -64(%rdx),%xmm0 874 pand -48(%rdx),%xmm1 875 por %xmm0,%xmm4 876 pand -32(%rdx),%xmm2 877 por %xmm1,%xmm5 878 pand -16(%rdx),%xmm3 879 por %xmm2,%xmm4 880 por %xmm3,%xmm5 881 movdqa 0(%r12),%xmm0 882 movdqa 16(%r12),%xmm1 883 movdqa 32(%r12),%xmm2 884 movdqa 48(%r12),%xmm3 885 pand 0(%rdx),%xmm0 886 pand 16(%rdx),%xmm1 887 por %xmm0,%xmm4 888 pand 32(%rdx),%xmm2 889 por %xmm1,%xmm5 890 pand 48(%rdx),%xmm3 891 por %xmm2,%xmm4 892 por %xmm3,%xmm5 893 movdqa 64(%r12),%xmm0 894 movdqa 80(%r12),%xmm1 895 movdqa 96(%r12),%xmm2 896 movdqa 112(%r12),%xmm3 897 pand 64(%rdx),%xmm0 898 pand 80(%rdx),%xmm1 899 por %xmm0,%xmm4 900 pand 96(%rdx),%xmm2 901 por %xmm1,%xmm5 902 pand 112(%rdx),%xmm3 903 por %xmm2,%xmm4 904 por %xmm3,%xmm5 905 por %xmm5,%xmm4 906 pshufd $0x4e,%xmm4,%xmm0 907 por %xmm4,%xmm0 908 leaq 256(%r12),%r12 909.byte 102,72,15,126,195 910 911 movq (%r14,%r9,1),%r10 912 movq %r8,%rbp 913 mulq %rbx 914 addq %rax,%r10 915 movq (%rcx),%rax 916 adcq $0,%rdx 917 918 imulq %r10,%rbp 919 movq %rdx,%r11 920 movq %rdi,(%r14) 921 922 leaq (%r14,%r9,1),%r14 923 924 mulq %rbp 925 addq %rax,%r10 926 movq 8(%rsi,%r9,1),%rax 927 adcq $0,%rdx 928 movq %rdx,%rdi 929 930 mulq %rbx 931 addq %rax,%r11 932 movq 8(%rcx),%rax 933 adcq $0,%rdx 934 addq 8(%r14),%r11 935 adcq $0,%rdx 936 movq %rdx,%r10 937 938 mulq %rbp 939 addq %rax,%rdi 940 movq 16(%rsi,%r9,1),%rax 941 adcq $0,%rdx 942 addq %r11,%rdi 943 leaq 32(%r9),%r15 944 leaq 32(%rcx),%rcx 945 adcq $0,%rdx 946 movq %rdx,%r13 947 jmp .Linner4x 948 949.align 32 950.Linner4x: 951 mulq %rbx 952 addq %rax,%r10 953 movq -16(%rcx),%rax 954 adcq $0,%rdx 955 addq 16(%r14),%r10 956 leaq 32(%r14),%r14 957 adcq $0,%rdx 958 movq %rdx,%r11 959 960 mulq %rbp 961 addq %rax,%r13 962 movq -8(%rsi,%r15,1),%rax 963 adcq $0,%rdx 964 addq %r10,%r13 965 adcq $0,%rdx 966 movq %rdi,-32(%r14) 967 movq %rdx,%rdi 968 969 mulq %rbx 970 addq %rax,%r11 971 movq -8(%rcx),%rax 972 adcq $0,%rdx 973 addq -8(%r14),%r11 974 adcq $0,%rdx 975 movq %rdx,%r10 976 977 mulq %rbp 978 addq %rax,%rdi 979 movq (%rsi,%r15,1),%rax 980 adcq $0,%rdx 981 addq %r11,%rdi 982 adcq $0,%rdx 983 movq %r13,-24(%r14) 984 movq %rdx,%r13 985 986 mulq %rbx 987 addq %rax,%r10 988 movq 0(%rcx),%rax 989 adcq $0,%rdx 990 addq (%r14),%r10 991 adcq $0,%rdx 992 movq %rdx,%r11 993 994 mulq %rbp 995 addq %rax,%r13 996 movq 8(%rsi,%r15,1),%rax 997 adcq $0,%rdx 998 addq %r10,%r13 999 adcq $0,%rdx 1000 movq %rdi,-16(%r14) 1001 movq %rdx,%rdi 1002 1003 mulq %rbx 1004 addq %rax,%r11 1005 movq 8(%rcx),%rax 1006 adcq $0,%rdx 1007 addq 8(%r14),%r11 1008 adcq $0,%rdx 1009 movq %rdx,%r10 1010 1011 mulq %rbp 1012 addq %rax,%rdi 1013 movq 16(%rsi,%r15,1),%rax 1014 adcq $0,%rdx 1015 addq %r11,%rdi 1016 leaq 32(%rcx),%rcx 1017 adcq $0,%rdx 1018 movq %r13,-8(%r14) 1019 movq %rdx,%r13 1020 1021 addq $32,%r15 1022 jnz .Linner4x 1023 1024 mulq %rbx 1025 addq %rax,%r10 1026 movq -16(%rcx),%rax 1027 adcq $0,%rdx 1028 addq 16(%r14),%r10 1029 leaq 32(%r14),%r14 1030 adcq $0,%rdx 1031 movq %rdx,%r11 1032 1033 mulq %rbp 1034 addq %rax,%r13 1035 movq -8(%rsi),%rax 1036 adcq $0,%rdx 1037 addq %r10,%r13 1038 adcq $0,%rdx 1039 movq %rdi,-32(%r14) 1040 movq %rdx,%rdi 1041 1042 mulq %rbx 1043 addq %rax,%r11 1044 movq %rbp,%rax 1045 movq -8(%rcx),%rbp 1046 adcq $0,%rdx 1047 addq -8(%r14),%r11 1048 adcq $0,%rdx 1049 movq %rdx,%r10 1050 1051 mulq %rbp 1052 addq %rax,%rdi 1053 movq (%rsi,%r9,1),%rax 1054 adcq $0,%rdx 1055 addq %r11,%rdi 1056 adcq $0,%rdx 1057 movq %r13,-24(%r14) 1058 movq %rdx,%r13 1059 1060 movq %rdi,-16(%r14) 1061 leaq (%rcx,%r9,1),%rcx 1062 1063 xorq %rdi,%rdi 1064 addq %r10,%r13 1065 adcq $0,%rdi 1066 addq (%r14),%r13 1067 adcq $0,%rdi 1068 movq %r13,-8(%r14) 1069 1070 cmpq 16+8(%rsp),%r12 1071 jb .Louter4x 1072 xorq %rax,%rax 1073 subq %r13,%rbp 1074 adcq %r15,%r15 1075 orq %r15,%rdi 1076 subq %rdi,%rax 1077 leaq (%r14,%r9,1),%rbx 1078 movq (%rcx),%r12 1079 leaq (%rcx),%rbp 1080 movq %r9,%rcx 1081 sarq $3+2,%rcx 1082 movq 56+8(%rsp),%rdi 1083 decq %r12 1084 xorq %r10,%r10 1085 movq 8(%rbp),%r13 1086 movq 16(%rbp),%r14 1087 movq 24(%rbp),%r15 1088 jmp .Lsqr4x_sub_entry 1089.cfi_endproc 1090.size mul4x_internal,.-mul4x_internal 1091.globl bn_power5 1092.hidden bn_power5 1093.type bn_power5,@function 1094.align 32 1095bn_power5: 1096.cfi_startproc 1097 movq %rsp,%rax 1098.cfi_def_cfa_register %rax 1099 leaq OPENSSL_ia32cap_P(%rip),%r11 1100 movl 8(%r11),%r11d 1101 andl $0x80108,%r11d 1102 cmpl $0x80108,%r11d 1103 je .Lpowerx5_enter 1104 pushq %rbx 1105.cfi_offset %rbx,-16 1106 pushq %rbp 1107.cfi_offset %rbp,-24 1108 pushq %r12 1109.cfi_offset %r12,-32 1110 pushq %r13 1111.cfi_offset %r13,-40 1112 pushq %r14 1113.cfi_offset %r14,-48 1114 pushq %r15 1115.cfi_offset %r15,-56 1116.Lpower5_prologue: 1117 1118 shll $3,%r9d 1119 leal (%r9,%r9,2),%r10d 1120 negq %r9 1121 movq (%r8),%r8 1122 1123 1124 1125 1126 1127 1128 1129 1130 leaq -320(%rsp,%r9,2),%r11 1131 movq %rsp,%rbp 1132 subq %rdi,%r11 1133 andq $4095,%r11 1134 cmpq %r11,%r10 1135 jb .Lpwr_sp_alt 1136 subq %r11,%rbp 1137 leaq -320(%rbp,%r9,2),%rbp 1138 jmp .Lpwr_sp_done 1139 1140.align 32 1141.Lpwr_sp_alt: 1142 leaq 4096-320(,%r9,2),%r10 1143 leaq -320(%rbp,%r9,2),%rbp 1144 subq %r10,%r11 1145 movq $0,%r10 1146 cmovcq %r10,%r11 1147 subq %r11,%rbp 1148.Lpwr_sp_done: 1149 andq $-64,%rbp 1150 movq %rsp,%r11 1151 subq %rbp,%r11 1152 andq $-4096,%r11 1153 leaq (%r11,%rbp,1),%rsp 1154 movq (%rsp),%r10 1155 cmpq %rbp,%rsp 1156 ja .Lpwr_page_walk 1157 jmp .Lpwr_page_walk_done 1158 1159.Lpwr_page_walk: 1160 leaq -4096(%rsp),%rsp 1161 movq (%rsp),%r10 1162 cmpq %rbp,%rsp 1163 ja .Lpwr_page_walk 1164.Lpwr_page_walk_done: 1165 1166 movq %r9,%r10 1167 negq %r9 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 movq %r8,32(%rsp) 1179 movq %rax,40(%rsp) 1180.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1181.Lpower5_body: 1182.byte 102,72,15,110,207 1183.byte 102,72,15,110,209 1184.byte 102,73,15,110,218 1185.byte 102,72,15,110,226 1186 1187 call __bn_sqr8x_internal 1188 call __bn_post4x_internal 1189 call __bn_sqr8x_internal 1190 call __bn_post4x_internal 1191 call __bn_sqr8x_internal 1192 call __bn_post4x_internal 1193 call __bn_sqr8x_internal 1194 call __bn_post4x_internal 1195 call __bn_sqr8x_internal 1196 call __bn_post4x_internal 1197 1198.byte 102,72,15,126,209 1199.byte 102,72,15,126,226 1200 movq %rsi,%rdi 1201 movq 40(%rsp),%rax 1202 leaq 32(%rsp),%r8 1203 1204 call mul4x_internal 1205 1206 movq 40(%rsp),%rsi 1207.cfi_def_cfa %rsi,8 1208 movq $1,%rax 1209 movq -48(%rsi),%r15 1210.cfi_restore %r15 1211 movq -40(%rsi),%r14 1212.cfi_restore %r14 1213 movq -32(%rsi),%r13 1214.cfi_restore %r13 1215 movq -24(%rsi),%r12 1216.cfi_restore %r12 1217 movq -16(%rsi),%rbp 1218.cfi_restore %rbp 1219 movq -8(%rsi),%rbx 1220.cfi_restore %rbx 1221 leaq (%rsi),%rsp 1222.cfi_def_cfa_register %rsp 1223.Lpower5_epilogue: 1224 .byte 0xf3,0xc3 1225.cfi_endproc 1226.size bn_power5,.-bn_power5 1227 1228.globl bn_sqr8x_internal 1229.hidden bn_sqr8x_internal 1230.hidden bn_sqr8x_internal 1231.type bn_sqr8x_internal,@function 1232.align 32 1233bn_sqr8x_internal: 1234__bn_sqr8x_internal: 1235.cfi_startproc 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 leaq 32(%r10),%rbp 1310 leaq (%rsi,%r9,1),%rsi 1311 1312 movq %r9,%rcx 1313 1314 1315 movq -32(%rsi,%rbp,1),%r14 1316 leaq 48+8(%rsp,%r9,2),%rdi 1317 movq -24(%rsi,%rbp,1),%rax 1318 leaq -32(%rdi,%rbp,1),%rdi 1319 movq -16(%rsi,%rbp,1),%rbx 1320 movq %rax,%r15 1321 1322 mulq %r14 1323 movq %rax,%r10 1324 movq %rbx,%rax 1325 movq %rdx,%r11 1326 movq %r10,-24(%rdi,%rbp,1) 1327 1328 mulq %r14 1329 addq %rax,%r11 1330 movq %rbx,%rax 1331 adcq $0,%rdx 1332 movq %r11,-16(%rdi,%rbp,1) 1333 movq %rdx,%r10 1334 1335 1336 movq -8(%rsi,%rbp,1),%rbx 1337 mulq %r15 1338 movq %rax,%r12 1339 movq %rbx,%rax 1340 movq %rdx,%r13 1341 1342 leaq (%rbp),%rcx 1343 mulq %r14 1344 addq %rax,%r10 1345 movq %rbx,%rax 1346 movq %rdx,%r11 1347 adcq $0,%r11 1348 addq %r12,%r10 1349 adcq $0,%r11 1350 movq %r10,-8(%rdi,%rcx,1) 1351 jmp .Lsqr4x_1st 1352 1353.align 32 1354.Lsqr4x_1st: 1355 movq (%rsi,%rcx,1),%rbx 1356 mulq %r15 1357 addq %rax,%r13 1358 movq %rbx,%rax 1359 movq %rdx,%r12 1360 adcq $0,%r12 1361 1362 mulq %r14 1363 addq %rax,%r11 1364 movq %rbx,%rax 1365 movq 8(%rsi,%rcx,1),%rbx 1366 movq %rdx,%r10 1367 adcq $0,%r10 1368 addq %r13,%r11 1369 adcq $0,%r10 1370 1371 1372 mulq %r15 1373 addq %rax,%r12 1374 movq %rbx,%rax 1375 movq %r11,(%rdi,%rcx,1) 1376 movq %rdx,%r13 1377 adcq $0,%r13 1378 1379 mulq %r14 1380 addq %rax,%r10 1381 movq %rbx,%rax 1382 movq 16(%rsi,%rcx,1),%rbx 1383 movq %rdx,%r11 1384 adcq $0,%r11 1385 addq %r12,%r10 1386 adcq $0,%r11 1387 1388 mulq %r15 1389 addq %rax,%r13 1390 movq %rbx,%rax 1391 movq %r10,8(%rdi,%rcx,1) 1392 movq %rdx,%r12 1393 adcq $0,%r12 1394 1395 mulq %r14 1396 addq %rax,%r11 1397 movq %rbx,%rax 1398 movq 24(%rsi,%rcx,1),%rbx 1399 movq %rdx,%r10 1400 adcq $0,%r10 1401 addq %r13,%r11 1402 adcq $0,%r10 1403 1404 1405 mulq %r15 1406 addq %rax,%r12 1407 movq %rbx,%rax 1408 movq %r11,16(%rdi,%rcx,1) 1409 movq %rdx,%r13 1410 adcq $0,%r13 1411 leaq 32(%rcx),%rcx 1412 1413 mulq %r14 1414 addq %rax,%r10 1415 movq %rbx,%rax 1416 movq %rdx,%r11 1417 adcq $0,%r11 1418 addq %r12,%r10 1419 adcq $0,%r11 1420 movq %r10,-8(%rdi,%rcx,1) 1421 1422 cmpq $0,%rcx 1423 jne .Lsqr4x_1st 1424 1425 mulq %r15 1426 addq %rax,%r13 1427 leaq 16(%rbp),%rbp 1428 adcq $0,%rdx 1429 addq %r11,%r13 1430 adcq $0,%rdx 1431 1432 movq %r13,(%rdi) 1433 movq %rdx,%r12 1434 movq %rdx,8(%rdi) 1435 jmp .Lsqr4x_outer 1436 1437.align 32 1438.Lsqr4x_outer: 1439 movq -32(%rsi,%rbp,1),%r14 1440 leaq 48+8(%rsp,%r9,2),%rdi 1441 movq -24(%rsi,%rbp,1),%rax 1442 leaq -32(%rdi,%rbp,1),%rdi 1443 movq -16(%rsi,%rbp,1),%rbx 1444 movq %rax,%r15 1445 1446 mulq %r14 1447 movq -24(%rdi,%rbp,1),%r10 1448 addq %rax,%r10 1449 movq %rbx,%rax 1450 adcq $0,%rdx 1451 movq %r10,-24(%rdi,%rbp,1) 1452 movq %rdx,%r11 1453 1454 mulq %r14 1455 addq %rax,%r11 1456 movq %rbx,%rax 1457 adcq $0,%rdx 1458 addq -16(%rdi,%rbp,1),%r11 1459 movq %rdx,%r10 1460 adcq $0,%r10 1461 movq %r11,-16(%rdi,%rbp,1) 1462 1463 xorq %r12,%r12 1464 1465 movq -8(%rsi,%rbp,1),%rbx 1466 mulq %r15 1467 addq %rax,%r12 1468 movq %rbx,%rax 1469 adcq $0,%rdx 1470 addq -8(%rdi,%rbp,1),%r12 1471 movq %rdx,%r13 1472 adcq $0,%r13 1473 1474 mulq %r14 1475 addq %rax,%r10 1476 movq %rbx,%rax 1477 adcq $0,%rdx 1478 addq %r12,%r10 1479 movq %rdx,%r11 1480 adcq $0,%r11 1481 movq %r10,-8(%rdi,%rbp,1) 1482 1483 leaq (%rbp),%rcx 1484 jmp .Lsqr4x_inner 1485 1486.align 32 1487.Lsqr4x_inner: 1488 movq (%rsi,%rcx,1),%rbx 1489 mulq %r15 1490 addq %rax,%r13 1491 movq %rbx,%rax 1492 movq %rdx,%r12 1493 adcq $0,%r12 1494 addq (%rdi,%rcx,1),%r13 1495 adcq $0,%r12 1496 1497.byte 0x67 1498 mulq %r14 1499 addq %rax,%r11 1500 movq %rbx,%rax 1501 movq 8(%rsi,%rcx,1),%rbx 1502 movq %rdx,%r10 1503 adcq $0,%r10 1504 addq %r13,%r11 1505 adcq $0,%r10 1506 1507 mulq %r15 1508 addq %rax,%r12 1509 movq %r11,(%rdi,%rcx,1) 1510 movq %rbx,%rax 1511 movq %rdx,%r13 1512 adcq $0,%r13 1513 addq 8(%rdi,%rcx,1),%r12 1514 leaq 16(%rcx),%rcx 1515 adcq $0,%r13 1516 1517 mulq %r14 1518 addq %rax,%r10 1519 movq %rbx,%rax 1520 adcq $0,%rdx 1521 addq %r12,%r10 1522 movq %rdx,%r11 1523 adcq $0,%r11 1524 movq %r10,-8(%rdi,%rcx,1) 1525 1526 cmpq $0,%rcx 1527 jne .Lsqr4x_inner 1528 1529.byte 0x67 1530 mulq %r15 1531 addq %rax,%r13 1532 adcq $0,%rdx 1533 addq %r11,%r13 1534 adcq $0,%rdx 1535 1536 movq %r13,(%rdi) 1537 movq %rdx,%r12 1538 movq %rdx,8(%rdi) 1539 1540 addq $16,%rbp 1541 jnz .Lsqr4x_outer 1542 1543 1544 movq -32(%rsi),%r14 1545 leaq 48+8(%rsp,%r9,2),%rdi 1546 movq -24(%rsi),%rax 1547 leaq -32(%rdi,%rbp,1),%rdi 1548 movq -16(%rsi),%rbx 1549 movq %rax,%r15 1550 1551 mulq %r14 1552 addq %rax,%r10 1553 movq %rbx,%rax 1554 movq %rdx,%r11 1555 adcq $0,%r11 1556 1557 mulq %r14 1558 addq %rax,%r11 1559 movq %rbx,%rax 1560 movq %r10,-24(%rdi) 1561 movq %rdx,%r10 1562 adcq $0,%r10 1563 addq %r13,%r11 1564 movq -8(%rsi),%rbx 1565 adcq $0,%r10 1566 1567 mulq %r15 1568 addq %rax,%r12 1569 movq %rbx,%rax 1570 movq %r11,-16(%rdi) 1571 movq %rdx,%r13 1572 adcq $0,%r13 1573 1574 mulq %r14 1575 addq %rax,%r10 1576 movq %rbx,%rax 1577 movq %rdx,%r11 1578 adcq $0,%r11 1579 addq %r12,%r10 1580 adcq $0,%r11 1581 movq %r10,-8(%rdi) 1582 1583 mulq %r15 1584 addq %rax,%r13 1585 movq -16(%rsi),%rax 1586 adcq $0,%rdx 1587 addq %r11,%r13 1588 adcq $0,%rdx 1589 1590 movq %r13,(%rdi) 1591 movq %rdx,%r12 1592 movq %rdx,8(%rdi) 1593 1594 mulq %rbx 1595 addq $16,%rbp 1596 xorq %r14,%r14 1597 subq %r9,%rbp 1598 xorq %r15,%r15 1599 1600 addq %r12,%rax 1601 adcq $0,%rdx 1602 movq %rax,8(%rdi) 1603 movq %rdx,16(%rdi) 1604 movq %r15,24(%rdi) 1605 1606 movq -16(%rsi,%rbp,1),%rax 1607 leaq 48+8(%rsp),%rdi 1608 xorq %r10,%r10 1609 movq 8(%rdi),%r11 1610 1611 leaq (%r14,%r10,2),%r12 1612 shrq $63,%r10 1613 leaq (%rcx,%r11,2),%r13 1614 shrq $63,%r11 1615 orq %r10,%r13 1616 movq 16(%rdi),%r10 1617 movq %r11,%r14 1618 mulq %rax 1619 negq %r15 1620 movq 24(%rdi),%r11 1621 adcq %rax,%r12 1622 movq -8(%rsi,%rbp,1),%rax 1623 movq %r12,(%rdi) 1624 adcq %rdx,%r13 1625 1626 leaq (%r14,%r10,2),%rbx 1627 movq %r13,8(%rdi) 1628 sbbq %r15,%r15 1629 shrq $63,%r10 1630 leaq (%rcx,%r11,2),%r8 1631 shrq $63,%r11 1632 orq %r10,%r8 1633 movq 32(%rdi),%r10 1634 movq %r11,%r14 1635 mulq %rax 1636 negq %r15 1637 movq 40(%rdi),%r11 1638 adcq %rax,%rbx 1639 movq 0(%rsi,%rbp,1),%rax 1640 movq %rbx,16(%rdi) 1641 adcq %rdx,%r8 1642 leaq 16(%rbp),%rbp 1643 movq %r8,24(%rdi) 1644 sbbq %r15,%r15 1645 leaq 64(%rdi),%rdi 1646 jmp .Lsqr4x_shift_n_add 1647 1648.align 32 1649.Lsqr4x_shift_n_add: 1650 leaq (%r14,%r10,2),%r12 1651 shrq $63,%r10 1652 leaq (%rcx,%r11,2),%r13 1653 shrq $63,%r11 1654 orq %r10,%r13 1655 movq -16(%rdi),%r10 1656 movq %r11,%r14 1657 mulq %rax 1658 negq %r15 1659 movq -8(%rdi),%r11 1660 adcq %rax,%r12 1661 movq -8(%rsi,%rbp,1),%rax 1662 movq %r12,-32(%rdi) 1663 adcq %rdx,%r13 1664 1665 leaq (%r14,%r10,2),%rbx 1666 movq %r13,-24(%rdi) 1667 sbbq %r15,%r15 1668 shrq $63,%r10 1669 leaq (%rcx,%r11,2),%r8 1670 shrq $63,%r11 1671 orq %r10,%r8 1672 movq 0(%rdi),%r10 1673 movq %r11,%r14 1674 mulq %rax 1675 negq %r15 1676 movq 8(%rdi),%r11 1677 adcq %rax,%rbx 1678 movq 0(%rsi,%rbp,1),%rax 1679 movq %rbx,-16(%rdi) 1680 adcq %rdx,%r8 1681 1682 leaq (%r14,%r10,2),%r12 1683 movq %r8,-8(%rdi) 1684 sbbq %r15,%r15 1685 shrq $63,%r10 1686 leaq (%rcx,%r11,2),%r13 1687 shrq $63,%r11 1688 orq %r10,%r13 1689 movq 16(%rdi),%r10 1690 movq %r11,%r14 1691 mulq %rax 1692 negq %r15 1693 movq 24(%rdi),%r11 1694 adcq %rax,%r12 1695 movq 8(%rsi,%rbp,1),%rax 1696 movq %r12,0(%rdi) 1697 adcq %rdx,%r13 1698 1699 leaq (%r14,%r10,2),%rbx 1700 movq %r13,8(%rdi) 1701 sbbq %r15,%r15 1702 shrq $63,%r10 1703 leaq (%rcx,%r11,2),%r8 1704 shrq $63,%r11 1705 orq %r10,%r8 1706 movq 32(%rdi),%r10 1707 movq %r11,%r14 1708 mulq %rax 1709 negq %r15 1710 movq 40(%rdi),%r11 1711 adcq %rax,%rbx 1712 movq 16(%rsi,%rbp,1),%rax 1713 movq %rbx,16(%rdi) 1714 adcq %rdx,%r8 1715 movq %r8,24(%rdi) 1716 sbbq %r15,%r15 1717 leaq 64(%rdi),%rdi 1718 addq $32,%rbp 1719 jnz .Lsqr4x_shift_n_add 1720 1721 leaq (%r14,%r10,2),%r12 1722.byte 0x67 1723 shrq $63,%r10 1724 leaq (%rcx,%r11,2),%r13 1725 shrq $63,%r11 1726 orq %r10,%r13 1727 movq -16(%rdi),%r10 1728 movq %r11,%r14 1729 mulq %rax 1730 negq %r15 1731 movq -8(%rdi),%r11 1732 adcq %rax,%r12 1733 movq -8(%rsi),%rax 1734 movq %r12,-32(%rdi) 1735 adcq %rdx,%r13 1736 1737 leaq (%r14,%r10,2),%rbx 1738 movq %r13,-24(%rdi) 1739 sbbq %r15,%r15 1740 shrq $63,%r10 1741 leaq (%rcx,%r11,2),%r8 1742 shrq $63,%r11 1743 orq %r10,%r8 1744 mulq %rax 1745 negq %r15 1746 adcq %rax,%rbx 1747 adcq %rdx,%r8 1748 movq %rbx,-16(%rdi) 1749 movq %r8,-8(%rdi) 1750.byte 102,72,15,126,213 1751__bn_sqr8x_reduction: 1752 xorq %rax,%rax 1753 leaq (%r9,%rbp,1),%rcx 1754 leaq 48+8(%rsp,%r9,2),%rdx 1755 movq %rcx,0+8(%rsp) 1756 leaq 48+8(%rsp,%r9,1),%rdi 1757 movq %rdx,8+8(%rsp) 1758 negq %r9 1759 jmp .L8x_reduction_loop 1760 1761.align 32 1762.L8x_reduction_loop: 1763 leaq (%rdi,%r9,1),%rdi 1764.byte 0x66 1765 movq 0(%rdi),%rbx 1766 movq 8(%rdi),%r9 1767 movq 16(%rdi),%r10 1768 movq 24(%rdi),%r11 1769 movq 32(%rdi),%r12 1770 movq 40(%rdi),%r13 1771 movq 48(%rdi),%r14 1772 movq 56(%rdi),%r15 1773 movq %rax,(%rdx) 1774 leaq 64(%rdi),%rdi 1775 1776.byte 0x67 1777 movq %rbx,%r8 1778 imulq 32+8(%rsp),%rbx 1779 movq 0(%rbp),%rax 1780 movl $8,%ecx 1781 jmp .L8x_reduce 1782 1783.align 32 1784.L8x_reduce: 1785 mulq %rbx 1786 movq 8(%rbp),%rax 1787 negq %r8 1788 movq %rdx,%r8 1789 adcq $0,%r8 1790 1791 mulq %rbx 1792 addq %rax,%r9 1793 movq 16(%rbp),%rax 1794 adcq $0,%rdx 1795 addq %r9,%r8 1796 movq %rbx,48-8+8(%rsp,%rcx,8) 1797 movq %rdx,%r9 1798 adcq $0,%r9 1799 1800 mulq %rbx 1801 addq %rax,%r10 1802 movq 24(%rbp),%rax 1803 adcq $0,%rdx 1804 addq %r10,%r9 1805 movq 32+8(%rsp),%rsi 1806 movq %rdx,%r10 1807 adcq $0,%r10 1808 1809 mulq %rbx 1810 addq %rax,%r11 1811 movq 32(%rbp),%rax 1812 adcq $0,%rdx 1813 imulq %r8,%rsi 1814 addq %r11,%r10 1815 movq %rdx,%r11 1816 adcq $0,%r11 1817 1818 mulq %rbx 1819 addq %rax,%r12 1820 movq 40(%rbp),%rax 1821 adcq $0,%rdx 1822 addq %r12,%r11 1823 movq %rdx,%r12 1824 adcq $0,%r12 1825 1826 mulq %rbx 1827 addq %rax,%r13 1828 movq 48(%rbp),%rax 1829 adcq $0,%rdx 1830 addq %r13,%r12 1831 movq %rdx,%r13 1832 adcq $0,%r13 1833 1834 mulq %rbx 1835 addq %rax,%r14 1836 movq 56(%rbp),%rax 1837 adcq $0,%rdx 1838 addq %r14,%r13 1839 movq %rdx,%r14 1840 adcq $0,%r14 1841 1842 mulq %rbx 1843 movq %rsi,%rbx 1844 addq %rax,%r15 1845 movq 0(%rbp),%rax 1846 adcq $0,%rdx 1847 addq %r15,%r14 1848 movq %rdx,%r15 1849 adcq $0,%r15 1850 1851 decl %ecx 1852 jnz .L8x_reduce 1853 1854 leaq 64(%rbp),%rbp 1855 xorq %rax,%rax 1856 movq 8+8(%rsp),%rdx 1857 cmpq 0+8(%rsp),%rbp 1858 jae .L8x_no_tail 1859 1860.byte 0x66 1861 addq 0(%rdi),%r8 1862 adcq 8(%rdi),%r9 1863 adcq 16(%rdi),%r10 1864 adcq 24(%rdi),%r11 1865 adcq 32(%rdi),%r12 1866 adcq 40(%rdi),%r13 1867 adcq 48(%rdi),%r14 1868 adcq 56(%rdi),%r15 1869 sbbq %rsi,%rsi 1870 1871 movq 48+56+8(%rsp),%rbx 1872 movl $8,%ecx 1873 movq 0(%rbp),%rax 1874 jmp .L8x_tail 1875 1876.align 32 1877.L8x_tail: 1878 mulq %rbx 1879 addq %rax,%r8 1880 movq 8(%rbp),%rax 1881 movq %r8,(%rdi) 1882 movq %rdx,%r8 1883 adcq $0,%r8 1884 1885 mulq %rbx 1886 addq %rax,%r9 1887 movq 16(%rbp),%rax 1888 adcq $0,%rdx 1889 addq %r9,%r8 1890 leaq 8(%rdi),%rdi 1891 movq %rdx,%r9 1892 adcq $0,%r9 1893 1894 mulq %rbx 1895 addq %rax,%r10 1896 movq 24(%rbp),%rax 1897 adcq $0,%rdx 1898 addq %r10,%r9 1899 movq %rdx,%r10 1900 adcq $0,%r10 1901 1902 mulq %rbx 1903 addq %rax,%r11 1904 movq 32(%rbp),%rax 1905 adcq $0,%rdx 1906 addq %r11,%r10 1907 movq %rdx,%r11 1908 adcq $0,%r11 1909 1910 mulq %rbx 1911 addq %rax,%r12 1912 movq 40(%rbp),%rax 1913 adcq $0,%rdx 1914 addq %r12,%r11 1915 movq %rdx,%r12 1916 adcq $0,%r12 1917 1918 mulq %rbx 1919 addq %rax,%r13 1920 movq 48(%rbp),%rax 1921 adcq $0,%rdx 1922 addq %r13,%r12 1923 movq %rdx,%r13 1924 adcq $0,%r13 1925 1926 mulq %rbx 1927 addq %rax,%r14 1928 movq 56(%rbp),%rax 1929 adcq $0,%rdx 1930 addq %r14,%r13 1931 movq %rdx,%r14 1932 adcq $0,%r14 1933 1934 mulq %rbx 1935 movq 48-16+8(%rsp,%rcx,8),%rbx 1936 addq %rax,%r15 1937 adcq $0,%rdx 1938 addq %r15,%r14 1939 movq 0(%rbp),%rax 1940 movq %rdx,%r15 1941 adcq $0,%r15 1942 1943 decl %ecx 1944 jnz .L8x_tail 1945 1946 leaq 64(%rbp),%rbp 1947 movq 8+8(%rsp),%rdx 1948 cmpq 0+8(%rsp),%rbp 1949 jae .L8x_tail_done 1950 1951 movq 48+56+8(%rsp),%rbx 1952 negq %rsi 1953 movq 0(%rbp),%rax 1954 adcq 0(%rdi),%r8 1955 adcq 8(%rdi),%r9 1956 adcq 16(%rdi),%r10 1957 adcq 24(%rdi),%r11 1958 adcq 32(%rdi),%r12 1959 adcq 40(%rdi),%r13 1960 adcq 48(%rdi),%r14 1961 adcq 56(%rdi),%r15 1962 sbbq %rsi,%rsi 1963 1964 movl $8,%ecx 1965 jmp .L8x_tail 1966 1967.align 32 1968.L8x_tail_done: 1969 xorq %rax,%rax 1970 addq (%rdx),%r8 1971 adcq $0,%r9 1972 adcq $0,%r10 1973 adcq $0,%r11 1974 adcq $0,%r12 1975 adcq $0,%r13 1976 adcq $0,%r14 1977 adcq $0,%r15 1978 adcq $0,%rax 1979 1980 negq %rsi 1981.L8x_no_tail: 1982 adcq 0(%rdi),%r8 1983 adcq 8(%rdi),%r9 1984 adcq 16(%rdi),%r10 1985 adcq 24(%rdi),%r11 1986 adcq 32(%rdi),%r12 1987 adcq 40(%rdi),%r13 1988 adcq 48(%rdi),%r14 1989 adcq 56(%rdi),%r15 1990 adcq $0,%rax 1991 movq -8(%rbp),%rcx 1992 xorq %rsi,%rsi 1993 1994.byte 102,72,15,126,213 1995 1996 movq %r8,0(%rdi) 1997 movq %r9,8(%rdi) 1998.byte 102,73,15,126,217 1999 movq %r10,16(%rdi) 2000 movq %r11,24(%rdi) 2001 movq %r12,32(%rdi) 2002 movq %r13,40(%rdi) 2003 movq %r14,48(%rdi) 2004 movq %r15,56(%rdi) 2005 leaq 64(%rdi),%rdi 2006 2007 cmpq %rdx,%rdi 2008 jb .L8x_reduction_loop 2009 .byte 0xf3,0xc3 2010.cfi_endproc 2011.size bn_sqr8x_internal,.-bn_sqr8x_internal 2012.type __bn_post4x_internal,@function 2013.align 32 2014__bn_post4x_internal: 2015.cfi_startproc 2016 movq 0(%rbp),%r12 2017 leaq (%rdi,%r9,1),%rbx 2018 movq %r9,%rcx 2019.byte 102,72,15,126,207 2020 negq %rax 2021.byte 102,72,15,126,206 2022 sarq $3+2,%rcx 2023 decq %r12 2024 xorq %r10,%r10 2025 movq 8(%rbp),%r13 2026 movq 16(%rbp),%r14 2027 movq 24(%rbp),%r15 2028 jmp .Lsqr4x_sub_entry 2029 2030.align 16 2031.Lsqr4x_sub: 2032 movq 0(%rbp),%r12 2033 movq 8(%rbp),%r13 2034 movq 16(%rbp),%r14 2035 movq 24(%rbp),%r15 2036.Lsqr4x_sub_entry: 2037 leaq 32(%rbp),%rbp 2038 notq %r12 2039 notq %r13 2040 notq %r14 2041 notq %r15 2042 andq %rax,%r12 2043 andq %rax,%r13 2044 andq %rax,%r14 2045 andq %rax,%r15 2046 2047 negq %r10 2048 adcq 0(%rbx),%r12 2049 adcq 8(%rbx),%r13 2050 adcq 16(%rbx),%r14 2051 adcq 24(%rbx),%r15 2052 movq %r12,0(%rdi) 2053 leaq 32(%rbx),%rbx 2054 movq %r13,8(%rdi) 2055 sbbq %r10,%r10 2056 movq %r14,16(%rdi) 2057 movq %r15,24(%rdi) 2058 leaq 32(%rdi),%rdi 2059 2060 incq %rcx 2061 jnz .Lsqr4x_sub 2062 2063 movq %r9,%r10 2064 negq %r9 2065 .byte 0xf3,0xc3 2066.cfi_endproc 2067.size __bn_post4x_internal,.-__bn_post4x_internal 2068.globl bn_from_montgomery 2069.hidden bn_from_montgomery 2070.type bn_from_montgomery,@function 2071.align 32 2072bn_from_montgomery: 2073.cfi_startproc 2074 testl $7,%r9d 2075 jz bn_from_mont8x 2076 xorl %eax,%eax 2077 .byte 0xf3,0xc3 2078.cfi_endproc 2079.size bn_from_montgomery,.-bn_from_montgomery 2080 2081.type bn_from_mont8x,@function 2082.align 32 2083bn_from_mont8x: 2084.cfi_startproc 2085.byte 0x67 2086 movq %rsp,%rax 2087.cfi_def_cfa_register %rax 2088 pushq %rbx 2089.cfi_offset %rbx,-16 2090 pushq %rbp 2091.cfi_offset %rbp,-24 2092 pushq %r12 2093.cfi_offset %r12,-32 2094 pushq %r13 2095.cfi_offset %r13,-40 2096 pushq %r14 2097.cfi_offset %r14,-48 2098 pushq %r15 2099.cfi_offset %r15,-56 2100.Lfrom_prologue: 2101 2102 shll $3,%r9d 2103 leaq (%r9,%r9,2),%r10 2104 negq %r9 2105 movq (%r8),%r8 2106 2107 2108 2109 2110 2111 2112 2113 2114 leaq -320(%rsp,%r9,2),%r11 2115 movq %rsp,%rbp 2116 subq %rdi,%r11 2117 andq $4095,%r11 2118 cmpq %r11,%r10 2119 jb .Lfrom_sp_alt 2120 subq %r11,%rbp 2121 leaq -320(%rbp,%r9,2),%rbp 2122 jmp .Lfrom_sp_done 2123 2124.align 32 2125.Lfrom_sp_alt: 2126 leaq 4096-320(,%r9,2),%r10 2127 leaq -320(%rbp,%r9,2),%rbp 2128 subq %r10,%r11 2129 movq $0,%r10 2130 cmovcq %r10,%r11 2131 subq %r11,%rbp 2132.Lfrom_sp_done: 2133 andq $-64,%rbp 2134 movq %rsp,%r11 2135 subq %rbp,%r11 2136 andq $-4096,%r11 2137 leaq (%r11,%rbp,1),%rsp 2138 movq (%rsp),%r10 2139 cmpq %rbp,%rsp 2140 ja .Lfrom_page_walk 2141 jmp .Lfrom_page_walk_done 2142 2143.Lfrom_page_walk: 2144 leaq -4096(%rsp),%rsp 2145 movq (%rsp),%r10 2146 cmpq %rbp,%rsp 2147 ja .Lfrom_page_walk 2148.Lfrom_page_walk_done: 2149 2150 movq %r9,%r10 2151 negq %r9 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 movq %r8,32(%rsp) 2163 movq %rax,40(%rsp) 2164.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2165.Lfrom_body: 2166 movq %r9,%r11 2167 leaq 48(%rsp),%rax 2168 pxor %xmm0,%xmm0 2169 jmp .Lmul_by_1 2170 2171.align 32 2172.Lmul_by_1: 2173 movdqu (%rsi),%xmm1 2174 movdqu 16(%rsi),%xmm2 2175 movdqu 32(%rsi),%xmm3 2176 movdqa %xmm0,(%rax,%r9,1) 2177 movdqu 48(%rsi),%xmm4 2178 movdqa %xmm0,16(%rax,%r9,1) 2179.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2180 movdqa %xmm1,(%rax) 2181 movdqa %xmm0,32(%rax,%r9,1) 2182 movdqa %xmm2,16(%rax) 2183 movdqa %xmm0,48(%rax,%r9,1) 2184 movdqa %xmm3,32(%rax) 2185 movdqa %xmm4,48(%rax) 2186 leaq 64(%rax),%rax 2187 subq $64,%r11 2188 jnz .Lmul_by_1 2189 2190.byte 102,72,15,110,207 2191.byte 102,72,15,110,209 2192.byte 0x67 2193 movq %rcx,%rbp 2194.byte 102,73,15,110,218 2195 leaq OPENSSL_ia32cap_P(%rip),%r11 2196 movl 8(%r11),%r11d 2197 andl $0x80108,%r11d 2198 cmpl $0x80108,%r11d 2199 jne .Lfrom_mont_nox 2200 2201 leaq (%rax,%r9,1),%rdi 2202 call __bn_sqrx8x_reduction 2203 call __bn_postx4x_internal 2204 2205 pxor %xmm0,%xmm0 2206 leaq 48(%rsp),%rax 2207 jmp .Lfrom_mont_zero 2208 2209.align 32 2210.Lfrom_mont_nox: 2211 call __bn_sqr8x_reduction 2212 call __bn_post4x_internal 2213 2214 pxor %xmm0,%xmm0 2215 leaq 48(%rsp),%rax 2216 jmp .Lfrom_mont_zero 2217 2218.align 32 2219.Lfrom_mont_zero: 2220 movq 40(%rsp),%rsi 2221.cfi_def_cfa %rsi,8 2222 movdqa %xmm0,0(%rax) 2223 movdqa %xmm0,16(%rax) 2224 movdqa %xmm0,32(%rax) 2225 movdqa %xmm0,48(%rax) 2226 leaq 64(%rax),%rax 2227 subq $32,%r9 2228 jnz .Lfrom_mont_zero 2229 2230 movq $1,%rax 2231 movq -48(%rsi),%r15 2232.cfi_restore %r15 2233 movq -40(%rsi),%r14 2234.cfi_restore %r14 2235 movq -32(%rsi),%r13 2236.cfi_restore %r13 2237 movq -24(%rsi),%r12 2238.cfi_restore %r12 2239 movq -16(%rsi),%rbp 2240.cfi_restore %rbp 2241 movq -8(%rsi),%rbx 2242.cfi_restore %rbx 2243 leaq (%rsi),%rsp 2244.cfi_def_cfa_register %rsp 2245.Lfrom_epilogue: 2246 .byte 0xf3,0xc3 2247.cfi_endproc 2248.size bn_from_mont8x,.-bn_from_mont8x 2249.type bn_mulx4x_mont_gather5,@function 2250.align 32 2251bn_mulx4x_mont_gather5: 2252.cfi_startproc 2253 movq %rsp,%rax 2254.cfi_def_cfa_register %rax 2255.Lmulx4x_enter: 2256 pushq %rbx 2257.cfi_offset %rbx,-16 2258 pushq %rbp 2259.cfi_offset %rbp,-24 2260 pushq %r12 2261.cfi_offset %r12,-32 2262 pushq %r13 2263.cfi_offset %r13,-40 2264 pushq %r14 2265.cfi_offset %r14,-48 2266 pushq %r15 2267.cfi_offset %r15,-56 2268.Lmulx4x_prologue: 2269 2270 shll $3,%r9d 2271 leaq (%r9,%r9,2),%r10 2272 negq %r9 2273 movq (%r8),%r8 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 leaq -320(%rsp,%r9,2),%r11 2285 movq %rsp,%rbp 2286 subq %rdi,%r11 2287 andq $4095,%r11 2288 cmpq %r11,%r10 2289 jb .Lmulx4xsp_alt 2290 subq %r11,%rbp 2291 leaq -320(%rbp,%r9,2),%rbp 2292 jmp .Lmulx4xsp_done 2293 2294.Lmulx4xsp_alt: 2295 leaq 4096-320(,%r9,2),%r10 2296 leaq -320(%rbp,%r9,2),%rbp 2297 subq %r10,%r11 2298 movq $0,%r10 2299 cmovcq %r10,%r11 2300 subq %r11,%rbp 2301.Lmulx4xsp_done: 2302 andq $-64,%rbp 2303 movq %rsp,%r11 2304 subq %rbp,%r11 2305 andq $-4096,%r11 2306 leaq (%r11,%rbp,1),%rsp 2307 movq (%rsp),%r10 2308 cmpq %rbp,%rsp 2309 ja .Lmulx4x_page_walk 2310 jmp .Lmulx4x_page_walk_done 2311 2312.Lmulx4x_page_walk: 2313 leaq -4096(%rsp),%rsp 2314 movq (%rsp),%r10 2315 cmpq %rbp,%rsp 2316 ja .Lmulx4x_page_walk 2317.Lmulx4x_page_walk_done: 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 movq %r8,32(%rsp) 2332 movq %rax,40(%rsp) 2333.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2334.Lmulx4x_body: 2335 call mulx4x_internal 2336 2337 movq 40(%rsp),%rsi 2338.cfi_def_cfa %rsi,8 2339 movq $1,%rax 2340 2341 movq -48(%rsi),%r15 2342.cfi_restore %r15 2343 movq -40(%rsi),%r14 2344.cfi_restore %r14 2345 movq -32(%rsi),%r13 2346.cfi_restore %r13 2347 movq -24(%rsi),%r12 2348.cfi_restore %r12 2349 movq -16(%rsi),%rbp 2350.cfi_restore %rbp 2351 movq -8(%rsi),%rbx 2352.cfi_restore %rbx 2353 leaq (%rsi),%rsp 2354.cfi_def_cfa_register %rsp 2355.Lmulx4x_epilogue: 2356 .byte 0xf3,0xc3 2357.cfi_endproc 2358.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2359 2360.type mulx4x_internal,@function 2361.align 32 2362mulx4x_internal: 2363.cfi_startproc 2364 movq %r9,8(%rsp) 2365 movq %r9,%r10 2366 negq %r9 2367 shlq $5,%r9 2368 negq %r10 2369 leaq 128(%rdx,%r9,1),%r13 2370 shrq $5+5,%r9 2371 movd 8(%rax),%xmm5 2372 subq $1,%r9 2373 leaq .Linc(%rip),%rax 2374 movq %r13,16+8(%rsp) 2375 movq %r9,24+8(%rsp) 2376 movq %rdi,56+8(%rsp) 2377 movdqa 0(%rax),%xmm0 2378 movdqa 16(%rax),%xmm1 2379 leaq 88-112(%rsp,%r10,1),%r10 2380 leaq 128(%rdx),%rdi 2381 2382 pshufd $0,%xmm5,%xmm5 2383 movdqa %xmm1,%xmm4 2384.byte 0x67 2385 movdqa %xmm1,%xmm2 2386.byte 0x67 2387 paddd %xmm0,%xmm1 2388 pcmpeqd %xmm5,%xmm0 2389 movdqa %xmm4,%xmm3 2390 paddd %xmm1,%xmm2 2391 pcmpeqd %xmm5,%xmm1 2392 movdqa %xmm0,112(%r10) 2393 movdqa %xmm4,%xmm0 2394 2395 paddd %xmm2,%xmm3 2396 pcmpeqd %xmm5,%xmm2 2397 movdqa %xmm1,128(%r10) 2398 movdqa %xmm4,%xmm1 2399 2400 paddd %xmm3,%xmm0 2401 pcmpeqd %xmm5,%xmm3 2402 movdqa %xmm2,144(%r10) 2403 movdqa %xmm4,%xmm2 2404 2405 paddd %xmm0,%xmm1 2406 pcmpeqd %xmm5,%xmm0 2407 movdqa %xmm3,160(%r10) 2408 movdqa %xmm4,%xmm3 2409 paddd %xmm1,%xmm2 2410 pcmpeqd %xmm5,%xmm1 2411 movdqa %xmm0,176(%r10) 2412 movdqa %xmm4,%xmm0 2413 2414 paddd %xmm2,%xmm3 2415 pcmpeqd %xmm5,%xmm2 2416 movdqa %xmm1,192(%r10) 2417 movdqa %xmm4,%xmm1 2418 2419 paddd %xmm3,%xmm0 2420 pcmpeqd %xmm5,%xmm3 2421 movdqa %xmm2,208(%r10) 2422 movdqa %xmm4,%xmm2 2423 2424 paddd %xmm0,%xmm1 2425 pcmpeqd %xmm5,%xmm0 2426 movdqa %xmm3,224(%r10) 2427 movdqa %xmm4,%xmm3 2428 paddd %xmm1,%xmm2 2429 pcmpeqd %xmm5,%xmm1 2430 movdqa %xmm0,240(%r10) 2431 movdqa %xmm4,%xmm0 2432 2433 paddd %xmm2,%xmm3 2434 pcmpeqd %xmm5,%xmm2 2435 movdqa %xmm1,256(%r10) 2436 movdqa %xmm4,%xmm1 2437 2438 paddd %xmm3,%xmm0 2439 pcmpeqd %xmm5,%xmm3 2440 movdqa %xmm2,272(%r10) 2441 movdqa %xmm4,%xmm2 2442 2443 paddd %xmm0,%xmm1 2444 pcmpeqd %xmm5,%xmm0 2445 movdqa %xmm3,288(%r10) 2446 movdqa %xmm4,%xmm3 2447.byte 0x67 2448 paddd %xmm1,%xmm2 2449 pcmpeqd %xmm5,%xmm1 2450 movdqa %xmm0,304(%r10) 2451 2452 paddd %xmm2,%xmm3 2453 pcmpeqd %xmm5,%xmm2 2454 movdqa %xmm1,320(%r10) 2455 2456 pcmpeqd %xmm5,%xmm3 2457 movdqa %xmm2,336(%r10) 2458 2459 pand 64(%rdi),%xmm0 2460 pand 80(%rdi),%xmm1 2461 pand 96(%rdi),%xmm2 2462 movdqa %xmm3,352(%r10) 2463 pand 112(%rdi),%xmm3 2464 por %xmm2,%xmm0 2465 por %xmm3,%xmm1 2466 movdqa -128(%rdi),%xmm4 2467 movdqa -112(%rdi),%xmm5 2468 movdqa -96(%rdi),%xmm2 2469 pand 112(%r10),%xmm4 2470 movdqa -80(%rdi),%xmm3 2471 pand 128(%r10),%xmm5 2472 por %xmm4,%xmm0 2473 pand 144(%r10),%xmm2 2474 por %xmm5,%xmm1 2475 pand 160(%r10),%xmm3 2476 por %xmm2,%xmm0 2477 por %xmm3,%xmm1 2478 movdqa -64(%rdi),%xmm4 2479 movdqa -48(%rdi),%xmm5 2480 movdqa -32(%rdi),%xmm2 2481 pand 176(%r10),%xmm4 2482 movdqa -16(%rdi),%xmm3 2483 pand 192(%r10),%xmm5 2484 por %xmm4,%xmm0 2485 pand 208(%r10),%xmm2 2486 por %xmm5,%xmm1 2487 pand 224(%r10),%xmm3 2488 por %xmm2,%xmm0 2489 por %xmm3,%xmm1 2490 movdqa 0(%rdi),%xmm4 2491 movdqa 16(%rdi),%xmm5 2492 movdqa 32(%rdi),%xmm2 2493 pand 240(%r10),%xmm4 2494 movdqa 48(%rdi),%xmm3 2495 pand 256(%r10),%xmm5 2496 por %xmm4,%xmm0 2497 pand 272(%r10),%xmm2 2498 por %xmm5,%xmm1 2499 pand 288(%r10),%xmm3 2500 por %xmm2,%xmm0 2501 por %xmm3,%xmm1 2502 pxor %xmm1,%xmm0 2503 pshufd $0x4e,%xmm0,%xmm1 2504 por %xmm1,%xmm0 2505 leaq 256(%rdi),%rdi 2506.byte 102,72,15,126,194 2507 leaq 64+32+8(%rsp),%rbx 2508 2509 movq %rdx,%r9 2510 mulxq 0(%rsi),%r8,%rax 2511 mulxq 8(%rsi),%r11,%r12 2512 addq %rax,%r11 2513 mulxq 16(%rsi),%rax,%r13 2514 adcq %rax,%r12 2515 adcq $0,%r13 2516 mulxq 24(%rsi),%rax,%r14 2517 2518 movq %r8,%r15 2519 imulq 32+8(%rsp),%r8 2520 xorq %rbp,%rbp 2521 movq %r8,%rdx 2522 2523 movq %rdi,8+8(%rsp) 2524 2525 leaq 32(%rsi),%rsi 2526 adcxq %rax,%r13 2527 adcxq %rbp,%r14 2528 2529 mulxq 0(%rcx),%rax,%r10 2530 adcxq %rax,%r15 2531 adoxq %r11,%r10 2532 mulxq 8(%rcx),%rax,%r11 2533 adcxq %rax,%r10 2534 adoxq %r12,%r11 2535 mulxq 16(%rcx),%rax,%r12 2536 movq 24+8(%rsp),%rdi 2537 movq %r10,-32(%rbx) 2538 adcxq %rax,%r11 2539 adoxq %r13,%r12 2540 mulxq 24(%rcx),%rax,%r15 2541 movq %r9,%rdx 2542 movq %r11,-24(%rbx) 2543 adcxq %rax,%r12 2544 adoxq %rbp,%r15 2545 leaq 32(%rcx),%rcx 2546 movq %r12,-16(%rbx) 2547 jmp .Lmulx4x_1st 2548 2549.align 32 2550.Lmulx4x_1st: 2551 adcxq %rbp,%r15 2552 mulxq 0(%rsi),%r10,%rax 2553 adcxq %r14,%r10 2554 mulxq 8(%rsi),%r11,%r14 2555 adcxq %rax,%r11 2556 mulxq 16(%rsi),%r12,%rax 2557 adcxq %r14,%r12 2558 mulxq 24(%rsi),%r13,%r14 2559.byte 0x67,0x67 2560 movq %r8,%rdx 2561 adcxq %rax,%r13 2562 adcxq %rbp,%r14 2563 leaq 32(%rsi),%rsi 2564 leaq 32(%rbx),%rbx 2565 2566 adoxq %r15,%r10 2567 mulxq 0(%rcx),%rax,%r15 2568 adcxq %rax,%r10 2569 adoxq %r15,%r11 2570 mulxq 8(%rcx),%rax,%r15 2571 adcxq %rax,%r11 2572 adoxq %r15,%r12 2573 mulxq 16(%rcx),%rax,%r15 2574 movq %r10,-40(%rbx) 2575 adcxq %rax,%r12 2576 movq %r11,-32(%rbx) 2577 adoxq %r15,%r13 2578 mulxq 24(%rcx),%rax,%r15 2579 movq %r9,%rdx 2580 movq %r12,-24(%rbx) 2581 adcxq %rax,%r13 2582 adoxq %rbp,%r15 2583 leaq 32(%rcx),%rcx 2584 movq %r13,-16(%rbx) 2585 2586 decq %rdi 2587 jnz .Lmulx4x_1st 2588 2589 movq 8(%rsp),%rax 2590 adcq %rbp,%r15 2591 leaq (%rsi,%rax,1),%rsi 2592 addq %r15,%r14 2593 movq 8+8(%rsp),%rdi 2594 adcq %rbp,%rbp 2595 movq %r14,-8(%rbx) 2596 jmp .Lmulx4x_outer 2597 2598.align 32 2599.Lmulx4x_outer: 2600 leaq 16-256(%rbx),%r10 2601 pxor %xmm4,%xmm4 2602.byte 0x67,0x67 2603 pxor %xmm5,%xmm5 2604 movdqa -128(%rdi),%xmm0 2605 movdqa -112(%rdi),%xmm1 2606 movdqa -96(%rdi),%xmm2 2607 pand 256(%r10),%xmm0 2608 movdqa -80(%rdi),%xmm3 2609 pand 272(%r10),%xmm1 2610 por %xmm0,%xmm4 2611 pand 288(%r10),%xmm2 2612 por %xmm1,%xmm5 2613 pand 304(%r10),%xmm3 2614 por %xmm2,%xmm4 2615 por %xmm3,%xmm5 2616 movdqa -64(%rdi),%xmm0 2617 movdqa -48(%rdi),%xmm1 2618 movdqa -32(%rdi),%xmm2 2619 pand 320(%r10),%xmm0 2620 movdqa -16(%rdi),%xmm3 2621 pand 336(%r10),%xmm1 2622 por %xmm0,%xmm4 2623 pand 352(%r10),%xmm2 2624 por %xmm1,%xmm5 2625 pand 368(%r10),%xmm3 2626 por %xmm2,%xmm4 2627 por %xmm3,%xmm5 2628 movdqa 0(%rdi),%xmm0 2629 movdqa 16(%rdi),%xmm1 2630 movdqa 32(%rdi),%xmm2 2631 pand 384(%r10),%xmm0 2632 movdqa 48(%rdi),%xmm3 2633 pand 400(%r10),%xmm1 2634 por %xmm0,%xmm4 2635 pand 416(%r10),%xmm2 2636 por %xmm1,%xmm5 2637 pand 432(%r10),%xmm3 2638 por %xmm2,%xmm4 2639 por %xmm3,%xmm5 2640 movdqa 64(%rdi),%xmm0 2641 movdqa 80(%rdi),%xmm1 2642 movdqa 96(%rdi),%xmm2 2643 pand 448(%r10),%xmm0 2644 movdqa 112(%rdi),%xmm3 2645 pand 464(%r10),%xmm1 2646 por %xmm0,%xmm4 2647 pand 480(%r10),%xmm2 2648 por %xmm1,%xmm5 2649 pand 496(%r10),%xmm3 2650 por %xmm2,%xmm4 2651 por %xmm3,%xmm5 2652 por %xmm5,%xmm4 2653 pshufd $0x4e,%xmm4,%xmm0 2654 por %xmm4,%xmm0 2655 leaq 256(%rdi),%rdi 2656.byte 102,72,15,126,194 2657 2658 movq %rbp,(%rbx) 2659 leaq 32(%rbx,%rax,1),%rbx 2660 mulxq 0(%rsi),%r8,%r11 2661 xorq %rbp,%rbp 2662 movq %rdx,%r9 2663 mulxq 8(%rsi),%r14,%r12 2664 adoxq -32(%rbx),%r8 2665 adcxq %r14,%r11 2666 mulxq 16(%rsi),%r15,%r13 2667 adoxq -24(%rbx),%r11 2668 adcxq %r15,%r12 2669 mulxq 24(%rsi),%rdx,%r14 2670 adoxq -16(%rbx),%r12 2671 adcxq %rdx,%r13 2672 leaq (%rcx,%rax,1),%rcx 2673 leaq 32(%rsi),%rsi 2674 adoxq -8(%rbx),%r13 2675 adcxq %rbp,%r14 2676 adoxq %rbp,%r14 2677 2678 movq %r8,%r15 2679 imulq 32+8(%rsp),%r8 2680 2681 movq %r8,%rdx 2682 xorq %rbp,%rbp 2683 movq %rdi,8+8(%rsp) 2684 2685 mulxq 0(%rcx),%rax,%r10 2686 adcxq %rax,%r15 2687 adoxq %r11,%r10 2688 mulxq 8(%rcx),%rax,%r11 2689 adcxq %rax,%r10 2690 adoxq %r12,%r11 2691 mulxq 16(%rcx),%rax,%r12 2692 adcxq %rax,%r11 2693 adoxq %r13,%r12 2694 mulxq 24(%rcx),%rax,%r15 2695 movq %r9,%rdx 2696 movq 24+8(%rsp),%rdi 2697 movq %r10,-32(%rbx) 2698 adcxq %rax,%r12 2699 movq %r11,-24(%rbx) 2700 adoxq %rbp,%r15 2701 movq %r12,-16(%rbx) 2702 leaq 32(%rcx),%rcx 2703 jmp .Lmulx4x_inner 2704 2705.align 32 2706.Lmulx4x_inner: 2707 mulxq 0(%rsi),%r10,%rax 2708 adcxq %rbp,%r15 2709 adoxq %r14,%r10 2710 mulxq 8(%rsi),%r11,%r14 2711 adcxq 0(%rbx),%r10 2712 adoxq %rax,%r11 2713 mulxq 16(%rsi),%r12,%rax 2714 adcxq 8(%rbx),%r11 2715 adoxq %r14,%r12 2716 mulxq 24(%rsi),%r13,%r14 2717 movq %r8,%rdx 2718 adcxq 16(%rbx),%r12 2719 adoxq %rax,%r13 2720 adcxq 24(%rbx),%r13 2721 adoxq %rbp,%r14 2722 leaq 32(%rsi),%rsi 2723 leaq 32(%rbx),%rbx 2724 adcxq %rbp,%r14 2725 2726 adoxq %r15,%r10 2727 mulxq 0(%rcx),%rax,%r15 2728 adcxq %rax,%r10 2729 adoxq %r15,%r11 2730 mulxq 8(%rcx),%rax,%r15 2731 adcxq %rax,%r11 2732 adoxq %r15,%r12 2733 mulxq 16(%rcx),%rax,%r15 2734 movq %r10,-40(%rbx) 2735 adcxq %rax,%r12 2736 adoxq %r15,%r13 2737 movq %r11,-32(%rbx) 2738 mulxq 24(%rcx),%rax,%r15 2739 movq %r9,%rdx 2740 leaq 32(%rcx),%rcx 2741 movq %r12,-24(%rbx) 2742 adcxq %rax,%r13 2743 adoxq %rbp,%r15 2744 movq %r13,-16(%rbx) 2745 2746 decq %rdi 2747 jnz .Lmulx4x_inner 2748 2749 movq 0+8(%rsp),%rax 2750 adcq %rbp,%r15 2751 subq 0(%rbx),%rdi 2752 movq 8+8(%rsp),%rdi 2753 movq 16+8(%rsp),%r10 2754 adcq %r15,%r14 2755 leaq (%rsi,%rax,1),%rsi 2756 adcq %rbp,%rbp 2757 movq %r14,-8(%rbx) 2758 2759 cmpq %r10,%rdi 2760 jb .Lmulx4x_outer 2761 2762 movq -8(%rcx),%r10 2763 movq %rbp,%r8 2764 movq (%rcx,%rax,1),%r12 2765 leaq (%rcx,%rax,1),%rbp 2766 movq %rax,%rcx 2767 leaq (%rbx,%rax,1),%rdi 2768 xorl %eax,%eax 2769 xorq %r15,%r15 2770 subq %r14,%r10 2771 adcq %r15,%r15 2772 orq %r15,%r8 2773 sarq $3+2,%rcx 2774 subq %r8,%rax 2775 movq 56+8(%rsp),%rdx 2776 decq %r12 2777 movq 8(%rbp),%r13 2778 xorq %r8,%r8 2779 movq 16(%rbp),%r14 2780 movq 24(%rbp),%r15 2781 jmp .Lsqrx4x_sub_entry 2782.cfi_endproc 2783.size mulx4x_internal,.-mulx4x_internal 2784.type bn_powerx5,@function 2785.align 32 2786bn_powerx5: 2787.cfi_startproc 2788 movq %rsp,%rax 2789.cfi_def_cfa_register %rax 2790.Lpowerx5_enter: 2791 pushq %rbx 2792.cfi_offset %rbx,-16 2793 pushq %rbp 2794.cfi_offset %rbp,-24 2795 pushq %r12 2796.cfi_offset %r12,-32 2797 pushq %r13 2798.cfi_offset %r13,-40 2799 pushq %r14 2800.cfi_offset %r14,-48 2801 pushq %r15 2802.cfi_offset %r15,-56 2803.Lpowerx5_prologue: 2804 2805 shll $3,%r9d 2806 leaq (%r9,%r9,2),%r10 2807 negq %r9 2808 movq (%r8),%r8 2809 2810 2811 2812 2813 2814 2815 2816 2817 leaq -320(%rsp,%r9,2),%r11 2818 movq %rsp,%rbp 2819 subq %rdi,%r11 2820 andq $4095,%r11 2821 cmpq %r11,%r10 2822 jb .Lpwrx_sp_alt 2823 subq %r11,%rbp 2824 leaq -320(%rbp,%r9,2),%rbp 2825 jmp .Lpwrx_sp_done 2826 2827.align 32 2828.Lpwrx_sp_alt: 2829 leaq 4096-320(,%r9,2),%r10 2830 leaq -320(%rbp,%r9,2),%rbp 2831 subq %r10,%r11 2832 movq $0,%r10 2833 cmovcq %r10,%r11 2834 subq %r11,%rbp 2835.Lpwrx_sp_done: 2836 andq $-64,%rbp 2837 movq %rsp,%r11 2838 subq %rbp,%r11 2839 andq $-4096,%r11 2840 leaq (%r11,%rbp,1),%rsp 2841 movq (%rsp),%r10 2842 cmpq %rbp,%rsp 2843 ja .Lpwrx_page_walk 2844 jmp .Lpwrx_page_walk_done 2845 2846.Lpwrx_page_walk: 2847 leaq -4096(%rsp),%rsp 2848 movq (%rsp),%r10 2849 cmpq %rbp,%rsp 2850 ja .Lpwrx_page_walk 2851.Lpwrx_page_walk_done: 2852 2853 movq %r9,%r10 2854 negq %r9 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 pxor %xmm0,%xmm0 2868.byte 102,72,15,110,207 2869.byte 102,72,15,110,209 2870.byte 102,73,15,110,218 2871.byte 102,72,15,110,226 2872 movq %r8,32(%rsp) 2873 movq %rax,40(%rsp) 2874.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2875.Lpowerx5_body: 2876 2877 call __bn_sqrx8x_internal 2878 call __bn_postx4x_internal 2879 call __bn_sqrx8x_internal 2880 call __bn_postx4x_internal 2881 call __bn_sqrx8x_internal 2882 call __bn_postx4x_internal 2883 call __bn_sqrx8x_internal 2884 call __bn_postx4x_internal 2885 call __bn_sqrx8x_internal 2886 call __bn_postx4x_internal 2887 2888 movq %r10,%r9 2889 movq %rsi,%rdi 2890.byte 102,72,15,126,209 2891.byte 102,72,15,126,226 2892 movq 40(%rsp),%rax 2893 2894 call mulx4x_internal 2895 2896 movq 40(%rsp),%rsi 2897.cfi_def_cfa %rsi,8 2898 movq $1,%rax 2899 2900 movq -48(%rsi),%r15 2901.cfi_restore %r15 2902 movq -40(%rsi),%r14 2903.cfi_restore %r14 2904 movq -32(%rsi),%r13 2905.cfi_restore %r13 2906 movq -24(%rsi),%r12 2907.cfi_restore %r12 2908 movq -16(%rsi),%rbp 2909.cfi_restore %rbp 2910 movq -8(%rsi),%rbx 2911.cfi_restore %rbx 2912 leaq (%rsi),%rsp 2913.cfi_def_cfa_register %rsp 2914.Lpowerx5_epilogue: 2915 .byte 0xf3,0xc3 2916.cfi_endproc 2917.size bn_powerx5,.-bn_powerx5 2918 2919.globl bn_sqrx8x_internal 2920.hidden bn_sqrx8x_internal 2921.type bn_sqrx8x_internal,@function 2922.align 32 2923bn_sqrx8x_internal: 2924__bn_sqrx8x_internal: 2925.cfi_startproc 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 leaq 48+8(%rsp),%rdi 2967 leaq (%rsi,%r9,1),%rbp 2968 movq %r9,0+8(%rsp) 2969 movq %rbp,8+8(%rsp) 2970 jmp .Lsqr8x_zero_start 2971 2972.align 32 2973.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2974.Lsqrx8x_zero: 2975.byte 0x3e 2976 movdqa %xmm0,0(%rdi) 2977 movdqa %xmm0,16(%rdi) 2978 movdqa %xmm0,32(%rdi) 2979 movdqa %xmm0,48(%rdi) 2980.Lsqr8x_zero_start: 2981 movdqa %xmm0,64(%rdi) 2982 movdqa %xmm0,80(%rdi) 2983 movdqa %xmm0,96(%rdi) 2984 movdqa %xmm0,112(%rdi) 2985 leaq 128(%rdi),%rdi 2986 subq $64,%r9 2987 jnz .Lsqrx8x_zero 2988 2989 movq 0(%rsi),%rdx 2990 2991 xorq %r10,%r10 2992 xorq %r11,%r11 2993 xorq %r12,%r12 2994 xorq %r13,%r13 2995 xorq %r14,%r14 2996 xorq %r15,%r15 2997 leaq 48+8(%rsp),%rdi 2998 xorq %rbp,%rbp 2999 jmp .Lsqrx8x_outer_loop 3000 3001.align 32 3002.Lsqrx8x_outer_loop: 3003 mulxq 8(%rsi),%r8,%rax 3004 adcxq %r9,%r8 3005 adoxq %rax,%r10 3006 mulxq 16(%rsi),%r9,%rax 3007 adcxq %r10,%r9 3008 adoxq %rax,%r11 3009.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 3010 adcxq %r11,%r10 3011 adoxq %rax,%r12 3012.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 3013 adcxq %r12,%r11 3014 adoxq %rax,%r13 3015 mulxq 40(%rsi),%r12,%rax 3016 adcxq %r13,%r12 3017 adoxq %rax,%r14 3018 mulxq 48(%rsi),%r13,%rax 3019 adcxq %r14,%r13 3020 adoxq %r15,%rax 3021 mulxq 56(%rsi),%r14,%r15 3022 movq 8(%rsi),%rdx 3023 adcxq %rax,%r14 3024 adoxq %rbp,%r15 3025 adcq 64(%rdi),%r15 3026 movq %r8,8(%rdi) 3027 movq %r9,16(%rdi) 3028 sbbq %rcx,%rcx 3029 xorq %rbp,%rbp 3030 3031 3032 mulxq 16(%rsi),%r8,%rbx 3033 mulxq 24(%rsi),%r9,%rax 3034 adcxq %r10,%r8 3035 adoxq %rbx,%r9 3036 mulxq 32(%rsi),%r10,%rbx 3037 adcxq %r11,%r9 3038 adoxq %rax,%r10 3039.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 3040 adcxq %r12,%r10 3041 adoxq %rbx,%r11 3042.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 3043 adcxq %r13,%r11 3044 adoxq %r14,%r12 3045.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 3046 movq 16(%rsi),%rdx 3047 adcxq %rax,%r12 3048 adoxq %rbx,%r13 3049 adcxq %r15,%r13 3050 adoxq %rbp,%r14 3051 adcxq %rbp,%r14 3052 3053 movq %r8,24(%rdi) 3054 movq %r9,32(%rdi) 3055 3056 mulxq 24(%rsi),%r8,%rbx 3057 mulxq 32(%rsi),%r9,%rax 3058 adcxq %r10,%r8 3059 adoxq %rbx,%r9 3060 mulxq 40(%rsi),%r10,%rbx 3061 adcxq %r11,%r9 3062 adoxq %rax,%r10 3063.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 3064 adcxq %r12,%r10 3065 adoxq %r13,%r11 3066.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 3067.byte 0x3e 3068 movq 24(%rsi),%rdx 3069 adcxq %rbx,%r11 3070 adoxq %rax,%r12 3071 adcxq %r14,%r12 3072 movq %r8,40(%rdi) 3073 movq %r9,48(%rdi) 3074 mulxq 32(%rsi),%r8,%rax 3075 adoxq %rbp,%r13 3076 adcxq %rbp,%r13 3077 3078 mulxq 40(%rsi),%r9,%rbx 3079 adcxq %r10,%r8 3080 adoxq %rax,%r9 3081 mulxq 48(%rsi),%r10,%rax 3082 adcxq %r11,%r9 3083 adoxq %r12,%r10 3084 mulxq 56(%rsi),%r11,%r12 3085 movq 32(%rsi),%rdx 3086 movq 40(%rsi),%r14 3087 adcxq %rbx,%r10 3088 adoxq %rax,%r11 3089 movq 48(%rsi),%r15 3090 adcxq %r13,%r11 3091 adoxq %rbp,%r12 3092 adcxq %rbp,%r12 3093 3094 movq %r8,56(%rdi) 3095 movq %r9,64(%rdi) 3096 3097 mulxq %r14,%r9,%rax 3098 movq 56(%rsi),%r8 3099 adcxq %r10,%r9 3100 mulxq %r15,%r10,%rbx 3101 adoxq %rax,%r10 3102 adcxq %r11,%r10 3103 mulxq %r8,%r11,%rax 3104 movq %r14,%rdx 3105 adoxq %rbx,%r11 3106 adcxq %r12,%r11 3107 3108 adcxq %rbp,%rax 3109 3110 mulxq %r15,%r14,%rbx 3111 mulxq %r8,%r12,%r13 3112 movq %r15,%rdx 3113 leaq 64(%rsi),%rsi 3114 adcxq %r14,%r11 3115 adoxq %rbx,%r12 3116 adcxq %rax,%r12 3117 adoxq %rbp,%r13 3118 3119.byte 0x67,0x67 3120 mulxq %r8,%r8,%r14 3121 adcxq %r8,%r13 3122 adcxq %rbp,%r14 3123 3124 cmpq 8+8(%rsp),%rsi 3125 je .Lsqrx8x_outer_break 3126 3127 negq %rcx 3128 movq $-8,%rcx 3129 movq %rbp,%r15 3130 movq 64(%rdi),%r8 3131 adcxq 72(%rdi),%r9 3132 adcxq 80(%rdi),%r10 3133 adcxq 88(%rdi),%r11 3134 adcq 96(%rdi),%r12 3135 adcq 104(%rdi),%r13 3136 adcq 112(%rdi),%r14 3137 adcq 120(%rdi),%r15 3138 leaq (%rsi),%rbp 3139 leaq 128(%rdi),%rdi 3140 sbbq %rax,%rax 3141 3142 movq -64(%rsi),%rdx 3143 movq %rax,16+8(%rsp) 3144 movq %rdi,24+8(%rsp) 3145 3146 3147 xorl %eax,%eax 3148 jmp .Lsqrx8x_loop 3149 3150.align 32 3151.Lsqrx8x_loop: 3152 movq %r8,%rbx 3153 mulxq 0(%rbp),%rax,%r8 3154 adcxq %rax,%rbx 3155 adoxq %r9,%r8 3156 3157 mulxq 8(%rbp),%rax,%r9 3158 adcxq %rax,%r8 3159 adoxq %r10,%r9 3160 3161 mulxq 16(%rbp),%rax,%r10 3162 adcxq %rax,%r9 3163 adoxq %r11,%r10 3164 3165 mulxq 24(%rbp),%rax,%r11 3166 adcxq %rax,%r10 3167 adoxq %r12,%r11 3168 3169.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3170 adcxq %rax,%r11 3171 adoxq %r13,%r12 3172 3173 mulxq 40(%rbp),%rax,%r13 3174 adcxq %rax,%r12 3175 adoxq %r14,%r13 3176 3177 mulxq 48(%rbp),%rax,%r14 3178 movq %rbx,(%rdi,%rcx,8) 3179 movl $0,%ebx 3180 adcxq %rax,%r13 3181 adoxq %r15,%r14 3182 3183.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3184 movq 8(%rsi,%rcx,8),%rdx 3185 adcxq %rax,%r14 3186 adoxq %rbx,%r15 3187 adcxq %rbx,%r15 3188 3189.byte 0x67 3190 incq %rcx 3191 jnz .Lsqrx8x_loop 3192 3193 leaq 64(%rbp),%rbp 3194 movq $-8,%rcx 3195 cmpq 8+8(%rsp),%rbp 3196 je .Lsqrx8x_break 3197 3198 subq 16+8(%rsp),%rbx 3199.byte 0x66 3200 movq -64(%rsi),%rdx 3201 adcxq 0(%rdi),%r8 3202 adcxq 8(%rdi),%r9 3203 adcq 16(%rdi),%r10 3204 adcq 24(%rdi),%r11 3205 adcq 32(%rdi),%r12 3206 adcq 40(%rdi),%r13 3207 adcq 48(%rdi),%r14 3208 adcq 56(%rdi),%r15 3209 leaq 64(%rdi),%rdi 3210.byte 0x67 3211 sbbq %rax,%rax 3212 xorl %ebx,%ebx 3213 movq %rax,16+8(%rsp) 3214 jmp .Lsqrx8x_loop 3215 3216.align 32 3217.Lsqrx8x_break: 3218 xorq %rbp,%rbp 3219 subq 16+8(%rsp),%rbx 3220 adcxq %rbp,%r8 3221 movq 24+8(%rsp),%rcx 3222 adcxq %rbp,%r9 3223 movq 0(%rsi),%rdx 3224 adcq $0,%r10 3225 movq %r8,0(%rdi) 3226 adcq $0,%r11 3227 adcq $0,%r12 3228 adcq $0,%r13 3229 adcq $0,%r14 3230 adcq $0,%r15 3231 cmpq %rcx,%rdi 3232 je .Lsqrx8x_outer_loop 3233 3234 movq %r9,8(%rdi) 3235 movq 8(%rcx),%r9 3236 movq %r10,16(%rdi) 3237 movq 16(%rcx),%r10 3238 movq %r11,24(%rdi) 3239 movq 24(%rcx),%r11 3240 movq %r12,32(%rdi) 3241 movq 32(%rcx),%r12 3242 movq %r13,40(%rdi) 3243 movq 40(%rcx),%r13 3244 movq %r14,48(%rdi) 3245 movq 48(%rcx),%r14 3246 movq %r15,56(%rdi) 3247 movq 56(%rcx),%r15 3248 movq %rcx,%rdi 3249 jmp .Lsqrx8x_outer_loop 3250 3251.align 32 3252.Lsqrx8x_outer_break: 3253 movq %r9,72(%rdi) 3254.byte 102,72,15,126,217 3255 movq %r10,80(%rdi) 3256 movq %r11,88(%rdi) 3257 movq %r12,96(%rdi) 3258 movq %r13,104(%rdi) 3259 movq %r14,112(%rdi) 3260 leaq 48+8(%rsp),%rdi 3261 movq (%rsi,%rcx,1),%rdx 3262 3263 movq 8(%rdi),%r11 3264 xorq %r10,%r10 3265 movq 0+8(%rsp),%r9 3266 adoxq %r11,%r11 3267 movq 16(%rdi),%r12 3268 movq 24(%rdi),%r13 3269 3270 3271.align 32 3272.Lsqrx4x_shift_n_add: 3273 mulxq %rdx,%rax,%rbx 3274 adoxq %r12,%r12 3275 adcxq %r10,%rax 3276.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3277.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3278 adoxq %r13,%r13 3279 adcxq %r11,%rbx 3280 movq 40(%rdi),%r11 3281 movq %rax,0(%rdi) 3282 movq %rbx,8(%rdi) 3283 3284 mulxq %rdx,%rax,%rbx 3285 adoxq %r10,%r10 3286 adcxq %r12,%rax 3287 movq 16(%rsi,%rcx,1),%rdx 3288 movq 48(%rdi),%r12 3289 adoxq %r11,%r11 3290 adcxq %r13,%rbx 3291 movq 56(%rdi),%r13 3292 movq %rax,16(%rdi) 3293 movq %rbx,24(%rdi) 3294 3295 mulxq %rdx,%rax,%rbx 3296 adoxq %r12,%r12 3297 adcxq %r10,%rax 3298 movq 24(%rsi,%rcx,1),%rdx 3299 leaq 32(%rcx),%rcx 3300 movq 64(%rdi),%r10 3301 adoxq %r13,%r13 3302 adcxq %r11,%rbx 3303 movq 72(%rdi),%r11 3304 movq %rax,32(%rdi) 3305 movq %rbx,40(%rdi) 3306 3307 mulxq %rdx,%rax,%rbx 3308 adoxq %r10,%r10 3309 adcxq %r12,%rax 3310 jrcxz .Lsqrx4x_shift_n_add_break 3311.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3312 adoxq %r11,%r11 3313 adcxq %r13,%rbx 3314 movq 80(%rdi),%r12 3315 movq 88(%rdi),%r13 3316 movq %rax,48(%rdi) 3317 movq %rbx,56(%rdi) 3318 leaq 64(%rdi),%rdi 3319 nop 3320 jmp .Lsqrx4x_shift_n_add 3321 3322.align 32 3323.Lsqrx4x_shift_n_add_break: 3324 adcxq %r13,%rbx 3325 movq %rax,48(%rdi) 3326 movq %rbx,56(%rdi) 3327 leaq 64(%rdi),%rdi 3328.byte 102,72,15,126,213 3329__bn_sqrx8x_reduction: 3330 xorl %eax,%eax 3331 movq 32+8(%rsp),%rbx 3332 movq 48+8(%rsp),%rdx 3333 leaq -64(%rbp,%r9,1),%rcx 3334 3335 movq %rcx,0+8(%rsp) 3336 movq %rdi,8+8(%rsp) 3337 3338 leaq 48+8(%rsp),%rdi 3339 jmp .Lsqrx8x_reduction_loop 3340 3341.align 32 3342.Lsqrx8x_reduction_loop: 3343 movq 8(%rdi),%r9 3344 movq 16(%rdi),%r10 3345 movq 24(%rdi),%r11 3346 movq 32(%rdi),%r12 3347 movq %rdx,%r8 3348 imulq %rbx,%rdx 3349 movq 40(%rdi),%r13 3350 movq 48(%rdi),%r14 3351 movq 56(%rdi),%r15 3352 movq %rax,24+8(%rsp) 3353 3354 leaq 64(%rdi),%rdi 3355 xorq %rsi,%rsi 3356 movq $-8,%rcx 3357 jmp .Lsqrx8x_reduce 3358 3359.align 32 3360.Lsqrx8x_reduce: 3361 movq %r8,%rbx 3362 mulxq 0(%rbp),%rax,%r8 3363 adcxq %rbx,%rax 3364 adoxq %r9,%r8 3365 3366 mulxq 8(%rbp),%rbx,%r9 3367 adcxq %rbx,%r8 3368 adoxq %r10,%r9 3369 3370 mulxq 16(%rbp),%rbx,%r10 3371 adcxq %rbx,%r9 3372 adoxq %r11,%r10 3373 3374 mulxq 24(%rbp),%rbx,%r11 3375 adcxq %rbx,%r10 3376 adoxq %r12,%r11 3377 3378.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3379 movq %rdx,%rax 3380 movq %r8,%rdx 3381 adcxq %rbx,%r11 3382 adoxq %r13,%r12 3383 3384 mulxq 32+8(%rsp),%rbx,%rdx 3385 movq %rax,%rdx 3386 movq %rax,64+48+8(%rsp,%rcx,8) 3387 3388 mulxq 40(%rbp),%rax,%r13 3389 adcxq %rax,%r12 3390 adoxq %r14,%r13 3391 3392 mulxq 48(%rbp),%rax,%r14 3393 adcxq %rax,%r13 3394 adoxq %r15,%r14 3395 3396 mulxq 56(%rbp),%rax,%r15 3397 movq %rbx,%rdx 3398 adcxq %rax,%r14 3399 adoxq %rsi,%r15 3400 adcxq %rsi,%r15 3401 3402.byte 0x67,0x67,0x67 3403 incq %rcx 3404 jnz .Lsqrx8x_reduce 3405 3406 movq %rsi,%rax 3407 cmpq 0+8(%rsp),%rbp 3408 jae .Lsqrx8x_no_tail 3409 3410 movq 48+8(%rsp),%rdx 3411 addq 0(%rdi),%r8 3412 leaq 64(%rbp),%rbp 3413 movq $-8,%rcx 3414 adcxq 8(%rdi),%r9 3415 adcxq 16(%rdi),%r10 3416 adcq 24(%rdi),%r11 3417 adcq 32(%rdi),%r12 3418 adcq 40(%rdi),%r13 3419 adcq 48(%rdi),%r14 3420 adcq 56(%rdi),%r15 3421 leaq 64(%rdi),%rdi 3422 sbbq %rax,%rax 3423 3424 xorq %rsi,%rsi 3425 movq %rax,16+8(%rsp) 3426 jmp .Lsqrx8x_tail 3427 3428.align 32 3429.Lsqrx8x_tail: 3430 movq %r8,%rbx 3431 mulxq 0(%rbp),%rax,%r8 3432 adcxq %rax,%rbx 3433 adoxq %r9,%r8 3434 3435 mulxq 8(%rbp),%rax,%r9 3436 adcxq %rax,%r8 3437 adoxq %r10,%r9 3438 3439 mulxq 16(%rbp),%rax,%r10 3440 adcxq %rax,%r9 3441 adoxq %r11,%r10 3442 3443 mulxq 24(%rbp),%rax,%r11 3444 adcxq %rax,%r10 3445 adoxq %r12,%r11 3446 3447.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3448 adcxq %rax,%r11 3449 adoxq %r13,%r12 3450 3451 mulxq 40(%rbp),%rax,%r13 3452 adcxq %rax,%r12 3453 adoxq %r14,%r13 3454 3455 mulxq 48(%rbp),%rax,%r14 3456 adcxq %rax,%r13 3457 adoxq %r15,%r14 3458 3459 mulxq 56(%rbp),%rax,%r15 3460 movq 72+48+8(%rsp,%rcx,8),%rdx 3461 adcxq %rax,%r14 3462 adoxq %rsi,%r15 3463 movq %rbx,(%rdi,%rcx,8) 3464 movq %r8,%rbx 3465 adcxq %rsi,%r15 3466 3467 incq %rcx 3468 jnz .Lsqrx8x_tail 3469 3470 cmpq 0+8(%rsp),%rbp 3471 jae .Lsqrx8x_tail_done 3472 3473 subq 16+8(%rsp),%rsi 3474 movq 48+8(%rsp),%rdx 3475 leaq 64(%rbp),%rbp 3476 adcq 0(%rdi),%r8 3477 adcq 8(%rdi),%r9 3478 adcq 16(%rdi),%r10 3479 adcq 24(%rdi),%r11 3480 adcq 32(%rdi),%r12 3481 adcq 40(%rdi),%r13 3482 adcq 48(%rdi),%r14 3483 adcq 56(%rdi),%r15 3484 leaq 64(%rdi),%rdi 3485 sbbq %rax,%rax 3486 subq $8,%rcx 3487 3488 xorq %rsi,%rsi 3489 movq %rax,16+8(%rsp) 3490 jmp .Lsqrx8x_tail 3491 3492.align 32 3493.Lsqrx8x_tail_done: 3494 xorq %rax,%rax 3495 addq 24+8(%rsp),%r8 3496 adcq $0,%r9 3497 adcq $0,%r10 3498 adcq $0,%r11 3499 adcq $0,%r12 3500 adcq $0,%r13 3501 adcq $0,%r14 3502 adcq $0,%r15 3503 adcq $0,%rax 3504 3505 subq 16+8(%rsp),%rsi 3506.Lsqrx8x_no_tail: 3507 adcq 0(%rdi),%r8 3508.byte 102,72,15,126,217 3509 adcq 8(%rdi),%r9 3510 movq 56(%rbp),%rsi 3511.byte 102,72,15,126,213 3512 adcq 16(%rdi),%r10 3513 adcq 24(%rdi),%r11 3514 adcq 32(%rdi),%r12 3515 adcq 40(%rdi),%r13 3516 adcq 48(%rdi),%r14 3517 adcq 56(%rdi),%r15 3518 adcq $0,%rax 3519 3520 movq 32+8(%rsp),%rbx 3521 movq 64(%rdi,%rcx,1),%rdx 3522 3523 movq %r8,0(%rdi) 3524 leaq 64(%rdi),%r8 3525 movq %r9,8(%rdi) 3526 movq %r10,16(%rdi) 3527 movq %r11,24(%rdi) 3528 movq %r12,32(%rdi) 3529 movq %r13,40(%rdi) 3530 movq %r14,48(%rdi) 3531 movq %r15,56(%rdi) 3532 3533 leaq 64(%rdi,%rcx,1),%rdi 3534 cmpq 8+8(%rsp),%r8 3535 jb .Lsqrx8x_reduction_loop 3536 .byte 0xf3,0xc3 3537.cfi_endproc 3538.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3539.align 32 3540.type __bn_postx4x_internal,@function 3541__bn_postx4x_internal: 3542.cfi_startproc 3543 movq 0(%rbp),%r12 3544 movq %rcx,%r10 3545 movq %rcx,%r9 3546 negq %rax 3547 sarq $3+2,%rcx 3548 3549.byte 102,72,15,126,202 3550.byte 102,72,15,126,206 3551 decq %r12 3552 movq 8(%rbp),%r13 3553 xorq %r8,%r8 3554 movq 16(%rbp),%r14 3555 movq 24(%rbp),%r15 3556 jmp .Lsqrx4x_sub_entry 3557 3558.align 16 3559.Lsqrx4x_sub: 3560 movq 0(%rbp),%r12 3561 movq 8(%rbp),%r13 3562 movq 16(%rbp),%r14 3563 movq 24(%rbp),%r15 3564.Lsqrx4x_sub_entry: 3565 andnq %rax,%r12,%r12 3566 leaq 32(%rbp),%rbp 3567 andnq %rax,%r13,%r13 3568 andnq %rax,%r14,%r14 3569 andnq %rax,%r15,%r15 3570 3571 negq %r8 3572 adcq 0(%rdi),%r12 3573 adcq 8(%rdi),%r13 3574 adcq 16(%rdi),%r14 3575 adcq 24(%rdi),%r15 3576 movq %r12,0(%rdx) 3577 leaq 32(%rdi),%rdi 3578 movq %r13,8(%rdx) 3579 sbbq %r8,%r8 3580 movq %r14,16(%rdx) 3581 movq %r15,24(%rdx) 3582 leaq 32(%rdx),%rdx 3583 3584 incq %rcx 3585 jnz .Lsqrx4x_sub 3586 3587 negq %r9 3588 3589 .byte 0xf3,0xc3 3590.cfi_endproc 3591.size __bn_postx4x_internal,.-__bn_postx4x_internal 3592.globl bn_scatter5 3593.hidden bn_scatter5 3594.type bn_scatter5,@function 3595.align 16 3596bn_scatter5: 3597.cfi_startproc 3598 cmpl $0,%esi 3599 jz .Lscatter_epilogue 3600 leaq (%rdx,%rcx,8),%rdx 3601.Lscatter: 3602 movq (%rdi),%rax 3603 leaq 8(%rdi),%rdi 3604 movq %rax,(%rdx) 3605 leaq 256(%rdx),%rdx 3606 subl $1,%esi 3607 jnz .Lscatter 3608.Lscatter_epilogue: 3609 .byte 0xf3,0xc3 3610.cfi_endproc 3611.size bn_scatter5,.-bn_scatter5 3612 3613.globl bn_gather5 3614.hidden bn_gather5 3615.type bn_gather5,@function 3616.align 32 3617bn_gather5: 3618.cfi_startproc 3619.LSEH_begin_bn_gather5: 3620 3621.byte 0x4c,0x8d,0x14,0x24 3622.cfi_def_cfa_register %r10 3623.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3624 leaq .Linc(%rip),%rax 3625 andq $-16,%rsp 3626 3627 movd %ecx,%xmm5 3628 movdqa 0(%rax),%xmm0 3629 movdqa 16(%rax),%xmm1 3630 leaq 128(%rdx),%r11 3631 leaq 128(%rsp),%rax 3632 3633 pshufd $0,%xmm5,%xmm5 3634 movdqa %xmm1,%xmm4 3635 movdqa %xmm1,%xmm2 3636 paddd %xmm0,%xmm1 3637 pcmpeqd %xmm5,%xmm0 3638 movdqa %xmm4,%xmm3 3639 3640 paddd %xmm1,%xmm2 3641 pcmpeqd %xmm5,%xmm1 3642 movdqa %xmm0,-128(%rax) 3643 movdqa %xmm4,%xmm0 3644 3645 paddd %xmm2,%xmm3 3646 pcmpeqd %xmm5,%xmm2 3647 movdqa %xmm1,-112(%rax) 3648 movdqa %xmm4,%xmm1 3649 3650 paddd %xmm3,%xmm0 3651 pcmpeqd %xmm5,%xmm3 3652 movdqa %xmm2,-96(%rax) 3653 movdqa %xmm4,%xmm2 3654 paddd %xmm0,%xmm1 3655 pcmpeqd %xmm5,%xmm0 3656 movdqa %xmm3,-80(%rax) 3657 movdqa %xmm4,%xmm3 3658 3659 paddd %xmm1,%xmm2 3660 pcmpeqd %xmm5,%xmm1 3661 movdqa %xmm0,-64(%rax) 3662 movdqa %xmm4,%xmm0 3663 3664 paddd %xmm2,%xmm3 3665 pcmpeqd %xmm5,%xmm2 3666 movdqa %xmm1,-48(%rax) 3667 movdqa %xmm4,%xmm1 3668 3669 paddd %xmm3,%xmm0 3670 pcmpeqd %xmm5,%xmm3 3671 movdqa %xmm2,-32(%rax) 3672 movdqa %xmm4,%xmm2 3673 paddd %xmm0,%xmm1 3674 pcmpeqd %xmm5,%xmm0 3675 movdqa %xmm3,-16(%rax) 3676 movdqa %xmm4,%xmm3 3677 3678 paddd %xmm1,%xmm2 3679 pcmpeqd %xmm5,%xmm1 3680 movdqa %xmm0,0(%rax) 3681 movdqa %xmm4,%xmm0 3682 3683 paddd %xmm2,%xmm3 3684 pcmpeqd %xmm5,%xmm2 3685 movdqa %xmm1,16(%rax) 3686 movdqa %xmm4,%xmm1 3687 3688 paddd %xmm3,%xmm0 3689 pcmpeqd %xmm5,%xmm3 3690 movdqa %xmm2,32(%rax) 3691 movdqa %xmm4,%xmm2 3692 paddd %xmm0,%xmm1 3693 pcmpeqd %xmm5,%xmm0 3694 movdqa %xmm3,48(%rax) 3695 movdqa %xmm4,%xmm3 3696 3697 paddd %xmm1,%xmm2 3698 pcmpeqd %xmm5,%xmm1 3699 movdqa %xmm0,64(%rax) 3700 movdqa %xmm4,%xmm0 3701 3702 paddd %xmm2,%xmm3 3703 pcmpeqd %xmm5,%xmm2 3704 movdqa %xmm1,80(%rax) 3705 movdqa %xmm4,%xmm1 3706 3707 paddd %xmm3,%xmm0 3708 pcmpeqd %xmm5,%xmm3 3709 movdqa %xmm2,96(%rax) 3710 movdqa %xmm4,%xmm2 3711 movdqa %xmm3,112(%rax) 3712 jmp .Lgather 3713 3714.align 32 3715.Lgather: 3716 pxor %xmm4,%xmm4 3717 pxor %xmm5,%xmm5 3718 movdqa -128(%r11),%xmm0 3719 movdqa -112(%r11),%xmm1 3720 movdqa -96(%r11),%xmm2 3721 pand -128(%rax),%xmm0 3722 movdqa -80(%r11),%xmm3 3723 pand -112(%rax),%xmm1 3724 por %xmm0,%xmm4 3725 pand -96(%rax),%xmm2 3726 por %xmm1,%xmm5 3727 pand -80(%rax),%xmm3 3728 por %xmm2,%xmm4 3729 por %xmm3,%xmm5 3730 movdqa -64(%r11),%xmm0 3731 movdqa -48(%r11),%xmm1 3732 movdqa -32(%r11),%xmm2 3733 pand -64(%rax),%xmm0 3734 movdqa -16(%r11),%xmm3 3735 pand -48(%rax),%xmm1 3736 por %xmm0,%xmm4 3737 pand -32(%rax),%xmm2 3738 por %xmm1,%xmm5 3739 pand -16(%rax),%xmm3 3740 por %xmm2,%xmm4 3741 por %xmm3,%xmm5 3742 movdqa 0(%r11),%xmm0 3743 movdqa 16(%r11),%xmm1 3744 movdqa 32(%r11),%xmm2 3745 pand 0(%rax),%xmm0 3746 movdqa 48(%r11),%xmm3 3747 pand 16(%rax),%xmm1 3748 por %xmm0,%xmm4 3749 pand 32(%rax),%xmm2 3750 por %xmm1,%xmm5 3751 pand 48(%rax),%xmm3 3752 por %xmm2,%xmm4 3753 por %xmm3,%xmm5 3754 movdqa 64(%r11),%xmm0 3755 movdqa 80(%r11),%xmm1 3756 movdqa 96(%r11),%xmm2 3757 pand 64(%rax),%xmm0 3758 movdqa 112(%r11),%xmm3 3759 pand 80(%rax),%xmm1 3760 por %xmm0,%xmm4 3761 pand 96(%rax),%xmm2 3762 por %xmm1,%xmm5 3763 pand 112(%rax),%xmm3 3764 por %xmm2,%xmm4 3765 por %xmm3,%xmm5 3766 por %xmm5,%xmm4 3767 leaq 256(%r11),%r11 3768 pshufd $0x4e,%xmm4,%xmm0 3769 por %xmm4,%xmm0 3770 movq %xmm0,(%rdi) 3771 leaq 8(%rdi),%rdi 3772 subl $1,%esi 3773 jnz .Lgather 3774 3775 leaq (%r10),%rsp 3776.cfi_def_cfa_register %rsp 3777 .byte 0xf3,0xc3 3778.LSEH_end_bn_gather5: 3779.cfi_endproc 3780.size bn_gather5,.-bn_gather5 3781.align 64 3782.Linc: 3783.long 0,0, 1,1 3784.long 2,2, 2,2 3785.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3786#endif 3787.section .note.GNU-stack,"",@progbits 3788