1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) && defined(__ELF__) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15 16.extern OPENSSL_ia32cap_P 17.hidden OPENSSL_ia32cap_P 18 19.globl bn_mul_mont_gather5 20.hidden bn_mul_mont_gather5 21.type bn_mul_mont_gather5,@function 22.align 64 23bn_mul_mont_gather5: 24.cfi_startproc 25 movl %r9d,%r9d 26 movq %rsp,%rax 27.cfi_def_cfa_register %rax 28 testl $7,%r9d 29 jnz .Lmul_enter 30 leaq OPENSSL_ia32cap_P(%rip),%r11 31 movl 8(%r11),%r11d 32 jmp .Lmul4x_enter 33 34.align 16 35.Lmul_enter: 36 movd 8(%rsp),%xmm5 37 pushq %rbx 38.cfi_offset %rbx,-16 39 pushq %rbp 40.cfi_offset %rbp,-24 41 pushq %r12 42.cfi_offset %r12,-32 43 pushq %r13 44.cfi_offset %r13,-40 45 pushq %r14 46.cfi_offset %r14,-48 47 pushq %r15 48.cfi_offset %r15,-56 49 50 negq %r9 51 movq %rsp,%r11 52 leaq -280(%rsp,%r9,8),%r10 53 negq %r9 54 andq $-1024,%r10 55 56 57 58 59 60 61 62 63 64 subq %r10,%r11 65 andq $-4096,%r11 66 leaq (%r10,%r11,1),%rsp 67 movq (%rsp),%r11 68 cmpq %r10,%rsp 69 ja .Lmul_page_walk 70 jmp .Lmul_page_walk_done 71 72.Lmul_page_walk: 73 leaq -4096(%rsp),%rsp 74 movq (%rsp),%r11 75 cmpq %r10,%rsp 76 ja .Lmul_page_walk 77.Lmul_page_walk_done: 78 79 leaq .Linc(%rip),%r10 80 movq %rax,8(%rsp,%r9,8) 81.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 82.Lmul_body: 83 84 leaq 128(%rdx),%r12 85 movdqa 0(%r10),%xmm0 86 movdqa 16(%r10),%xmm1 87 leaq 24-112(%rsp,%r9,8),%r10 88 andq $-16,%r10 89 90 pshufd $0,%xmm5,%xmm5 91 movdqa %xmm1,%xmm4 92 movdqa %xmm1,%xmm2 93 paddd %xmm0,%xmm1 94 pcmpeqd %xmm5,%xmm0 95.byte 0x67 96 movdqa %xmm4,%xmm3 97 paddd %xmm1,%xmm2 98 pcmpeqd %xmm5,%xmm1 99 movdqa %xmm0,112(%r10) 100 movdqa %xmm4,%xmm0 101 102 paddd %xmm2,%xmm3 103 pcmpeqd %xmm5,%xmm2 104 movdqa %xmm1,128(%r10) 105 movdqa %xmm4,%xmm1 106 107 paddd %xmm3,%xmm0 108 pcmpeqd %xmm5,%xmm3 109 movdqa %xmm2,144(%r10) 110 movdqa %xmm4,%xmm2 111 112 paddd %xmm0,%xmm1 113 pcmpeqd %xmm5,%xmm0 114 movdqa %xmm3,160(%r10) 115 movdqa %xmm4,%xmm3 116 paddd %xmm1,%xmm2 117 pcmpeqd %xmm5,%xmm1 118 movdqa %xmm0,176(%r10) 119 movdqa %xmm4,%xmm0 120 121 paddd %xmm2,%xmm3 122 pcmpeqd %xmm5,%xmm2 123 movdqa %xmm1,192(%r10) 124 movdqa %xmm4,%xmm1 125 126 paddd %xmm3,%xmm0 127 pcmpeqd %xmm5,%xmm3 128 movdqa %xmm2,208(%r10) 129 movdqa %xmm4,%xmm2 130 131 paddd %xmm0,%xmm1 132 pcmpeqd %xmm5,%xmm0 133 movdqa %xmm3,224(%r10) 134 movdqa %xmm4,%xmm3 135 paddd %xmm1,%xmm2 136 pcmpeqd %xmm5,%xmm1 137 movdqa %xmm0,240(%r10) 138 movdqa %xmm4,%xmm0 139 140 paddd %xmm2,%xmm3 141 pcmpeqd %xmm5,%xmm2 142 movdqa %xmm1,256(%r10) 143 movdqa %xmm4,%xmm1 144 145 paddd %xmm3,%xmm0 146 pcmpeqd %xmm5,%xmm3 147 movdqa %xmm2,272(%r10) 148 movdqa %xmm4,%xmm2 149 150 paddd %xmm0,%xmm1 151 pcmpeqd %xmm5,%xmm0 152 movdqa %xmm3,288(%r10) 153 movdqa %xmm4,%xmm3 154 paddd %xmm1,%xmm2 155 pcmpeqd %xmm5,%xmm1 156 movdqa %xmm0,304(%r10) 157 158 paddd %xmm2,%xmm3 159.byte 0x67 160 pcmpeqd %xmm5,%xmm2 161 movdqa %xmm1,320(%r10) 162 163 pcmpeqd %xmm5,%xmm3 164 movdqa %xmm2,336(%r10) 165 pand 64(%r12),%xmm0 166 167 pand 80(%r12),%xmm1 168 pand 96(%r12),%xmm2 169 movdqa %xmm3,352(%r10) 170 pand 112(%r12),%xmm3 171 por %xmm2,%xmm0 172 por %xmm3,%xmm1 173 movdqa -128(%r12),%xmm4 174 movdqa -112(%r12),%xmm5 175 movdqa -96(%r12),%xmm2 176 pand 112(%r10),%xmm4 177 movdqa -80(%r12),%xmm3 178 pand 128(%r10),%xmm5 179 por %xmm4,%xmm0 180 pand 144(%r10),%xmm2 181 por %xmm5,%xmm1 182 pand 160(%r10),%xmm3 183 por %xmm2,%xmm0 184 por %xmm3,%xmm1 185 movdqa -64(%r12),%xmm4 186 movdqa -48(%r12),%xmm5 187 movdqa -32(%r12),%xmm2 188 pand 176(%r10),%xmm4 189 movdqa -16(%r12),%xmm3 190 pand 192(%r10),%xmm5 191 por %xmm4,%xmm0 192 pand 208(%r10),%xmm2 193 por %xmm5,%xmm1 194 pand 224(%r10),%xmm3 195 por %xmm2,%xmm0 196 por %xmm3,%xmm1 197 movdqa 0(%r12),%xmm4 198 movdqa 16(%r12),%xmm5 199 movdqa 32(%r12),%xmm2 200 pand 240(%r10),%xmm4 201 movdqa 48(%r12),%xmm3 202 pand 256(%r10),%xmm5 203 por %xmm4,%xmm0 204 pand 272(%r10),%xmm2 205 por %xmm5,%xmm1 206 pand 288(%r10),%xmm3 207 por %xmm2,%xmm0 208 por %xmm3,%xmm1 209 por %xmm1,%xmm0 210 211 pshufd $0x4e,%xmm0,%xmm1 212 por %xmm1,%xmm0 213 leaq 256(%r12),%r12 214.byte 102,72,15,126,195 215 216 movq (%r8),%r8 217 movq (%rsi),%rax 218 219 xorq %r14,%r14 220 xorq %r15,%r15 221 222 movq %r8,%rbp 223 mulq %rbx 224 movq %rax,%r10 225 movq (%rcx),%rax 226 227 imulq %r10,%rbp 228 movq %rdx,%r11 229 230 mulq %rbp 231 addq %rax,%r10 232 movq 8(%rsi),%rax 233 adcq $0,%rdx 234 movq %rdx,%r13 235 236 leaq 1(%r15),%r15 237 jmp .L1st_enter 238 239.align 16 240.L1st: 241 addq %rax,%r13 242 movq (%rsi,%r15,8),%rax 243 adcq $0,%rdx 244 addq %r11,%r13 245 movq %r10,%r11 246 adcq $0,%rdx 247 movq %r13,-16(%rsp,%r15,8) 248 movq %rdx,%r13 249 250.L1st_enter: 251 mulq %rbx 252 addq %rax,%r11 253 movq (%rcx,%r15,8),%rax 254 adcq $0,%rdx 255 leaq 1(%r15),%r15 256 movq %rdx,%r10 257 258 mulq %rbp 259 cmpq %r9,%r15 260 jne .L1st 261 262 263 addq %rax,%r13 264 adcq $0,%rdx 265 addq %r11,%r13 266 adcq $0,%rdx 267 movq %r13,-16(%rsp,%r9,8) 268 movq %rdx,%r13 269 movq %r10,%r11 270 271 xorq %rdx,%rdx 272 addq %r11,%r13 273 adcq $0,%rdx 274 movq %r13,-8(%rsp,%r9,8) 275 movq %rdx,(%rsp,%r9,8) 276 277 leaq 1(%r14),%r14 278 jmp .Louter 279.align 16 280.Louter: 281 leaq 24+128(%rsp,%r9,8),%rdx 282 andq $-16,%rdx 283 pxor %xmm4,%xmm4 284 pxor %xmm5,%xmm5 285 movdqa -128(%r12),%xmm0 286 movdqa -112(%r12),%xmm1 287 movdqa -96(%r12),%xmm2 288 movdqa -80(%r12),%xmm3 289 pand -128(%rdx),%xmm0 290 pand -112(%rdx),%xmm1 291 por %xmm0,%xmm4 292 pand -96(%rdx),%xmm2 293 por %xmm1,%xmm5 294 pand -80(%rdx),%xmm3 295 por %xmm2,%xmm4 296 por %xmm3,%xmm5 297 movdqa -64(%r12),%xmm0 298 movdqa -48(%r12),%xmm1 299 movdqa -32(%r12),%xmm2 300 movdqa -16(%r12),%xmm3 301 pand -64(%rdx),%xmm0 302 pand -48(%rdx),%xmm1 303 por %xmm0,%xmm4 304 pand -32(%rdx),%xmm2 305 por %xmm1,%xmm5 306 pand -16(%rdx),%xmm3 307 por %xmm2,%xmm4 308 por %xmm3,%xmm5 309 movdqa 0(%r12),%xmm0 310 movdqa 16(%r12),%xmm1 311 movdqa 32(%r12),%xmm2 312 movdqa 48(%r12),%xmm3 313 pand 0(%rdx),%xmm0 314 pand 16(%rdx),%xmm1 315 por %xmm0,%xmm4 316 pand 32(%rdx),%xmm2 317 por %xmm1,%xmm5 318 pand 48(%rdx),%xmm3 319 por %xmm2,%xmm4 320 por %xmm3,%xmm5 321 movdqa 64(%r12),%xmm0 322 movdqa 80(%r12),%xmm1 323 movdqa 96(%r12),%xmm2 324 movdqa 112(%r12),%xmm3 325 pand 64(%rdx),%xmm0 326 pand 80(%rdx),%xmm1 327 por %xmm0,%xmm4 328 pand 96(%rdx),%xmm2 329 por %xmm1,%xmm5 330 pand 112(%rdx),%xmm3 331 por %xmm2,%xmm4 332 por %xmm3,%xmm5 333 por %xmm5,%xmm4 334 335 pshufd $0x4e,%xmm4,%xmm0 336 por %xmm4,%xmm0 337 leaq 256(%r12),%r12 338 339 movq (%rsi),%rax 340.byte 102,72,15,126,195 341 342 xorq %r15,%r15 343 movq %r8,%rbp 344 movq (%rsp),%r10 345 346 mulq %rbx 347 addq %rax,%r10 348 movq (%rcx),%rax 349 adcq $0,%rdx 350 351 imulq %r10,%rbp 352 movq %rdx,%r11 353 354 mulq %rbp 355 addq %rax,%r10 356 movq 8(%rsi),%rax 357 adcq $0,%rdx 358 movq 8(%rsp),%r10 359 movq %rdx,%r13 360 361 leaq 1(%r15),%r15 362 jmp .Linner_enter 363 364.align 16 365.Linner: 366 addq %rax,%r13 367 movq (%rsi,%r15,8),%rax 368 adcq $0,%rdx 369 addq %r10,%r13 370 movq (%rsp,%r15,8),%r10 371 adcq $0,%rdx 372 movq %r13,-16(%rsp,%r15,8) 373 movq %rdx,%r13 374 375.Linner_enter: 376 mulq %rbx 377 addq %rax,%r11 378 movq (%rcx,%r15,8),%rax 379 adcq $0,%rdx 380 addq %r11,%r10 381 movq %rdx,%r11 382 adcq $0,%r11 383 leaq 1(%r15),%r15 384 385 mulq %rbp 386 cmpq %r9,%r15 387 jne .Linner 388 389 addq %rax,%r13 390 adcq $0,%rdx 391 addq %r10,%r13 392 movq (%rsp,%r9,8),%r10 393 adcq $0,%rdx 394 movq %r13,-16(%rsp,%r9,8) 395 movq %rdx,%r13 396 397 xorq %rdx,%rdx 398 addq %r11,%r13 399 adcq $0,%rdx 400 addq %r10,%r13 401 adcq $0,%rdx 402 movq %r13,-8(%rsp,%r9,8) 403 movq %rdx,(%rsp,%r9,8) 404 405 leaq 1(%r14),%r14 406 cmpq %r9,%r14 407 jb .Louter 408 409 xorq %r14,%r14 410 movq (%rsp),%rax 411 leaq (%rsp),%rsi 412 movq %r9,%r15 413 jmp .Lsub 414.align 16 415.Lsub: sbbq (%rcx,%r14,8),%rax 416 movq %rax,(%rdi,%r14,8) 417 movq 8(%rsi,%r14,8),%rax 418 leaq 1(%r14),%r14 419 decq %r15 420 jnz .Lsub 421 422 sbbq $0,%rax 423 movq $-1,%rbx 424 xorq %rax,%rbx 425 xorq %r14,%r14 426 movq %r9,%r15 427 428.Lcopy: 429 movq (%rdi,%r14,8),%rcx 430 movq (%rsp,%r14,8),%rdx 431 andq %rbx,%rcx 432 andq %rax,%rdx 433 movq %r14,(%rsp,%r14,8) 434 orq %rcx,%rdx 435 movq %rdx,(%rdi,%r14,8) 436 leaq 1(%r14),%r14 437 subq $1,%r15 438 jnz .Lcopy 439 440 movq 8(%rsp,%r9,8),%rsi 441.cfi_def_cfa %rsi,8 442 movq $1,%rax 443 444 movq -48(%rsi),%r15 445.cfi_restore %r15 446 movq -40(%rsi),%r14 447.cfi_restore %r14 448 movq -32(%rsi),%r13 449.cfi_restore %r13 450 movq -24(%rsi),%r12 451.cfi_restore %r12 452 movq -16(%rsi),%rbp 453.cfi_restore %rbp 454 movq -8(%rsi),%rbx 455.cfi_restore %rbx 456 leaq (%rsi),%rsp 457.cfi_def_cfa_register %rsp 458.Lmul_epilogue: 459 .byte 0xf3,0xc3 460.cfi_endproc 461.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 462.type bn_mul4x_mont_gather5,@function 463.align 32 464bn_mul4x_mont_gather5: 465.cfi_startproc 466.byte 0x67 467 movq %rsp,%rax 468.cfi_def_cfa_register %rax 469.Lmul4x_enter: 470 andl $0x80108,%r11d 471 cmpl $0x80108,%r11d 472 je .Lmulx4x_enter 473 pushq %rbx 474.cfi_offset %rbx,-16 475 pushq %rbp 476.cfi_offset %rbp,-24 477 pushq %r12 478.cfi_offset %r12,-32 479 pushq %r13 480.cfi_offset %r13,-40 481 pushq %r14 482.cfi_offset %r14,-48 483 pushq %r15 484.cfi_offset %r15,-56 485.Lmul4x_prologue: 486 487.byte 0x67 488 shll $3,%r9d 489 leaq (%r9,%r9,2),%r10 490 negq %r9 491 492 493 494 495 496 497 498 499 500 501 leaq -320(%rsp,%r9,2),%r11 502 movq %rsp,%rbp 503 subq %rdi,%r11 504 andq $4095,%r11 505 cmpq %r11,%r10 506 jb .Lmul4xsp_alt 507 subq %r11,%rbp 508 leaq -320(%rbp,%r9,2),%rbp 509 jmp .Lmul4xsp_done 510 511.align 32 512.Lmul4xsp_alt: 513 leaq 4096-320(,%r9,2),%r10 514 leaq -320(%rbp,%r9,2),%rbp 515 subq %r10,%r11 516 movq $0,%r10 517 cmovcq %r10,%r11 518 subq %r11,%rbp 519.Lmul4xsp_done: 520 andq $-64,%rbp 521 movq %rsp,%r11 522 subq %rbp,%r11 523 andq $-4096,%r11 524 leaq (%r11,%rbp,1),%rsp 525 movq (%rsp),%r10 526 cmpq %rbp,%rsp 527 ja .Lmul4x_page_walk 528 jmp .Lmul4x_page_walk_done 529 530.Lmul4x_page_walk: 531 leaq -4096(%rsp),%rsp 532 movq (%rsp),%r10 533 cmpq %rbp,%rsp 534 ja .Lmul4x_page_walk 535.Lmul4x_page_walk_done: 536 537 negq %r9 538 539 movq %rax,40(%rsp) 540.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 541.Lmul4x_body: 542 543 call mul4x_internal 544 545 movq 40(%rsp),%rsi 546.cfi_def_cfa %rsi,8 547 movq $1,%rax 548 549 movq -48(%rsi),%r15 550.cfi_restore %r15 551 movq -40(%rsi),%r14 552.cfi_restore %r14 553 movq -32(%rsi),%r13 554.cfi_restore %r13 555 movq -24(%rsi),%r12 556.cfi_restore %r12 557 movq -16(%rsi),%rbp 558.cfi_restore %rbp 559 movq -8(%rsi),%rbx 560.cfi_restore %rbx 561 leaq (%rsi),%rsp 562.cfi_def_cfa_register %rsp 563.Lmul4x_epilogue: 564 .byte 0xf3,0xc3 565.cfi_endproc 566.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 567 568.type mul4x_internal,@function 569.align 32 570mul4x_internal: 571.cfi_startproc 572 shlq $5,%r9 573 movd 8(%rax),%xmm5 574 leaq .Linc(%rip),%rax 575 leaq 128(%rdx,%r9,1),%r13 576 shrq $5,%r9 577 movdqa 0(%rax),%xmm0 578 movdqa 16(%rax),%xmm1 579 leaq 88-112(%rsp,%r9,1),%r10 580 leaq 128(%rdx),%r12 581 582 pshufd $0,%xmm5,%xmm5 583 movdqa %xmm1,%xmm4 584.byte 0x67,0x67 585 movdqa %xmm1,%xmm2 586 paddd %xmm0,%xmm1 587 pcmpeqd %xmm5,%xmm0 588.byte 0x67 589 movdqa %xmm4,%xmm3 590 paddd %xmm1,%xmm2 591 pcmpeqd %xmm5,%xmm1 592 movdqa %xmm0,112(%r10) 593 movdqa %xmm4,%xmm0 594 595 paddd %xmm2,%xmm3 596 pcmpeqd %xmm5,%xmm2 597 movdqa %xmm1,128(%r10) 598 movdqa %xmm4,%xmm1 599 600 paddd %xmm3,%xmm0 601 pcmpeqd %xmm5,%xmm3 602 movdqa %xmm2,144(%r10) 603 movdqa %xmm4,%xmm2 604 605 paddd %xmm0,%xmm1 606 pcmpeqd %xmm5,%xmm0 607 movdqa %xmm3,160(%r10) 608 movdqa %xmm4,%xmm3 609 paddd %xmm1,%xmm2 610 pcmpeqd %xmm5,%xmm1 611 movdqa %xmm0,176(%r10) 612 movdqa %xmm4,%xmm0 613 614 paddd %xmm2,%xmm3 615 pcmpeqd %xmm5,%xmm2 616 movdqa %xmm1,192(%r10) 617 movdqa %xmm4,%xmm1 618 619 paddd %xmm3,%xmm0 620 pcmpeqd %xmm5,%xmm3 621 movdqa %xmm2,208(%r10) 622 movdqa %xmm4,%xmm2 623 624 paddd %xmm0,%xmm1 625 pcmpeqd %xmm5,%xmm0 626 movdqa %xmm3,224(%r10) 627 movdqa %xmm4,%xmm3 628 paddd %xmm1,%xmm2 629 pcmpeqd %xmm5,%xmm1 630 movdqa %xmm0,240(%r10) 631 movdqa %xmm4,%xmm0 632 633 paddd %xmm2,%xmm3 634 pcmpeqd %xmm5,%xmm2 635 movdqa %xmm1,256(%r10) 636 movdqa %xmm4,%xmm1 637 638 paddd %xmm3,%xmm0 639 pcmpeqd %xmm5,%xmm3 640 movdqa %xmm2,272(%r10) 641 movdqa %xmm4,%xmm2 642 643 paddd %xmm0,%xmm1 644 pcmpeqd %xmm5,%xmm0 645 movdqa %xmm3,288(%r10) 646 movdqa %xmm4,%xmm3 647 paddd %xmm1,%xmm2 648 pcmpeqd %xmm5,%xmm1 649 movdqa %xmm0,304(%r10) 650 651 paddd %xmm2,%xmm3 652.byte 0x67 653 pcmpeqd %xmm5,%xmm2 654 movdqa %xmm1,320(%r10) 655 656 pcmpeqd %xmm5,%xmm3 657 movdqa %xmm2,336(%r10) 658 pand 64(%r12),%xmm0 659 660 pand 80(%r12),%xmm1 661 pand 96(%r12),%xmm2 662 movdqa %xmm3,352(%r10) 663 pand 112(%r12),%xmm3 664 por %xmm2,%xmm0 665 por %xmm3,%xmm1 666 movdqa -128(%r12),%xmm4 667 movdqa -112(%r12),%xmm5 668 movdqa -96(%r12),%xmm2 669 pand 112(%r10),%xmm4 670 movdqa -80(%r12),%xmm3 671 pand 128(%r10),%xmm5 672 por %xmm4,%xmm0 673 pand 144(%r10),%xmm2 674 por %xmm5,%xmm1 675 pand 160(%r10),%xmm3 676 por %xmm2,%xmm0 677 por %xmm3,%xmm1 678 movdqa -64(%r12),%xmm4 679 movdqa -48(%r12),%xmm5 680 movdqa -32(%r12),%xmm2 681 pand 176(%r10),%xmm4 682 movdqa -16(%r12),%xmm3 683 pand 192(%r10),%xmm5 684 por %xmm4,%xmm0 685 pand 208(%r10),%xmm2 686 por %xmm5,%xmm1 687 pand 224(%r10),%xmm3 688 por %xmm2,%xmm0 689 por %xmm3,%xmm1 690 movdqa 0(%r12),%xmm4 691 movdqa 16(%r12),%xmm5 692 movdqa 32(%r12),%xmm2 693 pand 240(%r10),%xmm4 694 movdqa 48(%r12),%xmm3 695 pand 256(%r10),%xmm5 696 por %xmm4,%xmm0 697 pand 272(%r10),%xmm2 698 por %xmm5,%xmm1 699 pand 288(%r10),%xmm3 700 por %xmm2,%xmm0 701 por %xmm3,%xmm1 702 por %xmm1,%xmm0 703 704 pshufd $0x4e,%xmm0,%xmm1 705 por %xmm1,%xmm0 706 leaq 256(%r12),%r12 707.byte 102,72,15,126,195 708 709 movq %r13,16+8(%rsp) 710 movq %rdi,56+8(%rsp) 711 712 movq (%r8),%r8 713 movq (%rsi),%rax 714 leaq (%rsi,%r9,1),%rsi 715 negq %r9 716 717 movq %r8,%rbp 718 mulq %rbx 719 movq %rax,%r10 720 movq (%rcx),%rax 721 722 imulq %r10,%rbp 723 leaq 64+8(%rsp),%r14 724 movq %rdx,%r11 725 726 mulq %rbp 727 addq %rax,%r10 728 movq 8(%rsi,%r9,1),%rax 729 adcq $0,%rdx 730 movq %rdx,%rdi 731 732 mulq %rbx 733 addq %rax,%r11 734 movq 8(%rcx),%rax 735 adcq $0,%rdx 736 movq %rdx,%r10 737 738 mulq %rbp 739 addq %rax,%rdi 740 movq 16(%rsi,%r9,1),%rax 741 adcq $0,%rdx 742 addq %r11,%rdi 743 leaq 32(%r9),%r15 744 leaq 32(%rcx),%rcx 745 adcq $0,%rdx 746 movq %rdi,(%r14) 747 movq %rdx,%r13 748 jmp .L1st4x 749 750.align 32 751.L1st4x: 752 mulq %rbx 753 addq %rax,%r10 754 movq -16(%rcx),%rax 755 leaq 32(%r14),%r14 756 adcq $0,%rdx 757 movq %rdx,%r11 758 759 mulq %rbp 760 addq %rax,%r13 761 movq -8(%rsi,%r15,1),%rax 762 adcq $0,%rdx 763 addq %r10,%r13 764 adcq $0,%rdx 765 movq %r13,-24(%r14) 766 movq %rdx,%rdi 767 768 mulq %rbx 769 addq %rax,%r11 770 movq -8(%rcx),%rax 771 adcq $0,%rdx 772 movq %rdx,%r10 773 774 mulq %rbp 775 addq %rax,%rdi 776 movq (%rsi,%r15,1),%rax 777 adcq $0,%rdx 778 addq %r11,%rdi 779 adcq $0,%rdx 780 movq %rdi,-16(%r14) 781 movq %rdx,%r13 782 783 mulq %rbx 784 addq %rax,%r10 785 movq 0(%rcx),%rax 786 adcq $0,%rdx 787 movq %rdx,%r11 788 789 mulq %rbp 790 addq %rax,%r13 791 movq 8(%rsi,%r15,1),%rax 792 adcq $0,%rdx 793 addq %r10,%r13 794 adcq $0,%rdx 795 movq %r13,-8(%r14) 796 movq %rdx,%rdi 797 798 mulq %rbx 799 addq %rax,%r11 800 movq 8(%rcx),%rax 801 adcq $0,%rdx 802 movq %rdx,%r10 803 804 mulq %rbp 805 addq %rax,%rdi 806 movq 16(%rsi,%r15,1),%rax 807 adcq $0,%rdx 808 addq %r11,%rdi 809 leaq 32(%rcx),%rcx 810 adcq $0,%rdx 811 movq %rdi,(%r14) 812 movq %rdx,%r13 813 814 addq $32,%r15 815 jnz .L1st4x 816 817 mulq %rbx 818 addq %rax,%r10 819 movq -16(%rcx),%rax 820 leaq 32(%r14),%r14 821 adcq $0,%rdx 822 movq %rdx,%r11 823 824 mulq %rbp 825 addq %rax,%r13 826 movq -8(%rsi),%rax 827 adcq $0,%rdx 828 addq %r10,%r13 829 adcq $0,%rdx 830 movq %r13,-24(%r14) 831 movq %rdx,%rdi 832 833 mulq %rbx 834 addq %rax,%r11 835 movq -8(%rcx),%rax 836 adcq $0,%rdx 837 movq %rdx,%r10 838 839 mulq %rbp 840 addq %rax,%rdi 841 movq (%rsi,%r9,1),%rax 842 adcq $0,%rdx 843 addq %r11,%rdi 844 adcq $0,%rdx 845 movq %rdi,-16(%r14) 846 movq %rdx,%r13 847 848 leaq (%rcx,%r9,1),%rcx 849 850 xorq %rdi,%rdi 851 addq %r10,%r13 852 adcq $0,%rdi 853 movq %r13,-8(%r14) 854 855 jmp .Louter4x 856 857.align 32 858.Louter4x: 859 leaq 16+128(%r14),%rdx 860 pxor %xmm4,%xmm4 861 pxor %xmm5,%xmm5 862 movdqa -128(%r12),%xmm0 863 movdqa -112(%r12),%xmm1 864 movdqa -96(%r12),%xmm2 865 movdqa -80(%r12),%xmm3 866 pand -128(%rdx),%xmm0 867 pand -112(%rdx),%xmm1 868 por %xmm0,%xmm4 869 pand -96(%rdx),%xmm2 870 por %xmm1,%xmm5 871 pand -80(%rdx),%xmm3 872 por %xmm2,%xmm4 873 por %xmm3,%xmm5 874 movdqa -64(%r12),%xmm0 875 movdqa -48(%r12),%xmm1 876 movdqa -32(%r12),%xmm2 877 movdqa -16(%r12),%xmm3 878 pand -64(%rdx),%xmm0 879 pand -48(%rdx),%xmm1 880 por %xmm0,%xmm4 881 pand -32(%rdx),%xmm2 882 por %xmm1,%xmm5 883 pand -16(%rdx),%xmm3 884 por %xmm2,%xmm4 885 por %xmm3,%xmm5 886 movdqa 0(%r12),%xmm0 887 movdqa 16(%r12),%xmm1 888 movdqa 32(%r12),%xmm2 889 movdqa 48(%r12),%xmm3 890 pand 0(%rdx),%xmm0 891 pand 16(%rdx),%xmm1 892 por %xmm0,%xmm4 893 pand 32(%rdx),%xmm2 894 por %xmm1,%xmm5 895 pand 48(%rdx),%xmm3 896 por %xmm2,%xmm4 897 por %xmm3,%xmm5 898 movdqa 64(%r12),%xmm0 899 movdqa 80(%r12),%xmm1 900 movdqa 96(%r12),%xmm2 901 movdqa 112(%r12),%xmm3 902 pand 64(%rdx),%xmm0 903 pand 80(%rdx),%xmm1 904 por %xmm0,%xmm4 905 pand 96(%rdx),%xmm2 906 por %xmm1,%xmm5 907 pand 112(%rdx),%xmm3 908 por %xmm2,%xmm4 909 por %xmm3,%xmm5 910 por %xmm5,%xmm4 911 912 pshufd $0x4e,%xmm4,%xmm0 913 por %xmm4,%xmm0 914 leaq 256(%r12),%r12 915.byte 102,72,15,126,195 916 917 movq (%r14,%r9,1),%r10 918 movq %r8,%rbp 919 mulq %rbx 920 addq %rax,%r10 921 movq (%rcx),%rax 922 adcq $0,%rdx 923 924 imulq %r10,%rbp 925 movq %rdx,%r11 926 movq %rdi,(%r14) 927 928 leaq (%r14,%r9,1),%r14 929 930 mulq %rbp 931 addq %rax,%r10 932 movq 8(%rsi,%r9,1),%rax 933 adcq $0,%rdx 934 movq %rdx,%rdi 935 936 mulq %rbx 937 addq %rax,%r11 938 movq 8(%rcx),%rax 939 adcq $0,%rdx 940 addq 8(%r14),%r11 941 adcq $0,%rdx 942 movq %rdx,%r10 943 944 mulq %rbp 945 addq %rax,%rdi 946 movq 16(%rsi,%r9,1),%rax 947 adcq $0,%rdx 948 addq %r11,%rdi 949 leaq 32(%r9),%r15 950 leaq 32(%rcx),%rcx 951 adcq $0,%rdx 952 movq %rdx,%r13 953 jmp .Linner4x 954 955.align 32 956.Linner4x: 957 mulq %rbx 958 addq %rax,%r10 959 movq -16(%rcx),%rax 960 adcq $0,%rdx 961 addq 16(%r14),%r10 962 leaq 32(%r14),%r14 963 adcq $0,%rdx 964 movq %rdx,%r11 965 966 mulq %rbp 967 addq %rax,%r13 968 movq -8(%rsi,%r15,1),%rax 969 adcq $0,%rdx 970 addq %r10,%r13 971 adcq $0,%rdx 972 movq %rdi,-32(%r14) 973 movq %rdx,%rdi 974 975 mulq %rbx 976 addq %rax,%r11 977 movq -8(%rcx),%rax 978 adcq $0,%rdx 979 addq -8(%r14),%r11 980 adcq $0,%rdx 981 movq %rdx,%r10 982 983 mulq %rbp 984 addq %rax,%rdi 985 movq (%rsi,%r15,1),%rax 986 adcq $0,%rdx 987 addq %r11,%rdi 988 adcq $0,%rdx 989 movq %r13,-24(%r14) 990 movq %rdx,%r13 991 992 mulq %rbx 993 addq %rax,%r10 994 movq 0(%rcx),%rax 995 adcq $0,%rdx 996 addq (%r14),%r10 997 adcq $0,%rdx 998 movq %rdx,%r11 999 1000 mulq %rbp 1001 addq %rax,%r13 1002 movq 8(%rsi,%r15,1),%rax 1003 adcq $0,%rdx 1004 addq %r10,%r13 1005 adcq $0,%rdx 1006 movq %rdi,-16(%r14) 1007 movq %rdx,%rdi 1008 1009 mulq %rbx 1010 addq %rax,%r11 1011 movq 8(%rcx),%rax 1012 adcq $0,%rdx 1013 addq 8(%r14),%r11 1014 adcq $0,%rdx 1015 movq %rdx,%r10 1016 1017 mulq %rbp 1018 addq %rax,%rdi 1019 movq 16(%rsi,%r15,1),%rax 1020 adcq $0,%rdx 1021 addq %r11,%rdi 1022 leaq 32(%rcx),%rcx 1023 adcq $0,%rdx 1024 movq %r13,-8(%r14) 1025 movq %rdx,%r13 1026 1027 addq $32,%r15 1028 jnz .Linner4x 1029 1030 mulq %rbx 1031 addq %rax,%r10 1032 movq -16(%rcx),%rax 1033 adcq $0,%rdx 1034 addq 16(%r14),%r10 1035 leaq 32(%r14),%r14 1036 adcq $0,%rdx 1037 movq %rdx,%r11 1038 1039 mulq %rbp 1040 addq %rax,%r13 1041 movq -8(%rsi),%rax 1042 adcq $0,%rdx 1043 addq %r10,%r13 1044 adcq $0,%rdx 1045 movq %rdi,-32(%r14) 1046 movq %rdx,%rdi 1047 1048 mulq %rbx 1049 addq %rax,%r11 1050 movq %rbp,%rax 1051 movq -8(%rcx),%rbp 1052 adcq $0,%rdx 1053 addq -8(%r14),%r11 1054 adcq $0,%rdx 1055 movq %rdx,%r10 1056 1057 mulq %rbp 1058 addq %rax,%rdi 1059 movq (%rsi,%r9,1),%rax 1060 adcq $0,%rdx 1061 addq %r11,%rdi 1062 adcq $0,%rdx 1063 movq %r13,-24(%r14) 1064 movq %rdx,%r13 1065 1066 movq %rdi,-16(%r14) 1067 leaq (%rcx,%r9,1),%rcx 1068 1069 xorq %rdi,%rdi 1070 addq %r10,%r13 1071 adcq $0,%rdi 1072 addq (%r14),%r13 1073 adcq $0,%rdi 1074 movq %r13,-8(%r14) 1075 1076 cmpq 16+8(%rsp),%r12 1077 jb .Louter4x 1078 xorq %rax,%rax 1079 subq %r13,%rbp 1080 adcq %r15,%r15 1081 orq %r15,%rdi 1082 subq %rdi,%rax 1083 leaq (%r14,%r9,1),%rbx 1084 movq (%rcx),%r12 1085 leaq (%rcx),%rbp 1086 movq %r9,%rcx 1087 sarq $3+2,%rcx 1088 movq 56+8(%rsp),%rdi 1089 decq %r12 1090 xorq %r10,%r10 1091 movq 8(%rbp),%r13 1092 movq 16(%rbp),%r14 1093 movq 24(%rbp),%r15 1094 jmp .Lsqr4x_sub_entry 1095.cfi_endproc 1096.size mul4x_internal,.-mul4x_internal 1097.globl bn_power5 1098.hidden bn_power5 1099.type bn_power5,@function 1100.align 32 1101bn_power5: 1102.cfi_startproc 1103 movq %rsp,%rax 1104.cfi_def_cfa_register %rax 1105 leaq OPENSSL_ia32cap_P(%rip),%r11 1106 movl 8(%r11),%r11d 1107 andl $0x80108,%r11d 1108 cmpl $0x80108,%r11d 1109 je .Lpowerx5_enter 1110 pushq %rbx 1111.cfi_offset %rbx,-16 1112 pushq %rbp 1113.cfi_offset %rbp,-24 1114 pushq %r12 1115.cfi_offset %r12,-32 1116 pushq %r13 1117.cfi_offset %r13,-40 1118 pushq %r14 1119.cfi_offset %r14,-48 1120 pushq %r15 1121.cfi_offset %r15,-56 1122.Lpower5_prologue: 1123 1124 shll $3,%r9d 1125 leal (%r9,%r9,2),%r10d 1126 negq %r9 1127 movq (%r8),%r8 1128 1129 1130 1131 1132 1133 1134 1135 1136 leaq -320(%rsp,%r9,2),%r11 1137 movq %rsp,%rbp 1138 subq %rdi,%r11 1139 andq $4095,%r11 1140 cmpq %r11,%r10 1141 jb .Lpwr_sp_alt 1142 subq %r11,%rbp 1143 leaq -320(%rbp,%r9,2),%rbp 1144 jmp .Lpwr_sp_done 1145 1146.align 32 1147.Lpwr_sp_alt: 1148 leaq 4096-320(,%r9,2),%r10 1149 leaq -320(%rbp,%r9,2),%rbp 1150 subq %r10,%r11 1151 movq $0,%r10 1152 cmovcq %r10,%r11 1153 subq %r11,%rbp 1154.Lpwr_sp_done: 1155 andq $-64,%rbp 1156 movq %rsp,%r11 1157 subq %rbp,%r11 1158 andq $-4096,%r11 1159 leaq (%r11,%rbp,1),%rsp 1160 movq (%rsp),%r10 1161 cmpq %rbp,%rsp 1162 ja .Lpwr_page_walk 1163 jmp .Lpwr_page_walk_done 1164 1165.Lpwr_page_walk: 1166 leaq -4096(%rsp),%rsp 1167 movq (%rsp),%r10 1168 cmpq %rbp,%rsp 1169 ja .Lpwr_page_walk 1170.Lpwr_page_walk_done: 1171 1172 movq %r9,%r10 1173 negq %r9 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 movq %r8,32(%rsp) 1185 movq %rax,40(%rsp) 1186.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1187.Lpower5_body: 1188.byte 102,72,15,110,207 1189.byte 102,72,15,110,209 1190.byte 102,73,15,110,218 1191.byte 102,72,15,110,226 1192 1193 call __bn_sqr8x_internal 1194 call __bn_post4x_internal 1195 call __bn_sqr8x_internal 1196 call __bn_post4x_internal 1197 call __bn_sqr8x_internal 1198 call __bn_post4x_internal 1199 call __bn_sqr8x_internal 1200 call __bn_post4x_internal 1201 call __bn_sqr8x_internal 1202 call __bn_post4x_internal 1203 1204.byte 102,72,15,126,209 1205.byte 102,72,15,126,226 1206 movq %rsi,%rdi 1207 movq 40(%rsp),%rax 1208 leaq 32(%rsp),%r8 1209 1210 call mul4x_internal 1211 1212 movq 40(%rsp),%rsi 1213.cfi_def_cfa %rsi,8 1214 movq $1,%rax 1215 movq -48(%rsi),%r15 1216.cfi_restore %r15 1217 movq -40(%rsi),%r14 1218.cfi_restore %r14 1219 movq -32(%rsi),%r13 1220.cfi_restore %r13 1221 movq -24(%rsi),%r12 1222.cfi_restore %r12 1223 movq -16(%rsi),%rbp 1224.cfi_restore %rbp 1225 movq -8(%rsi),%rbx 1226.cfi_restore %rbx 1227 leaq (%rsi),%rsp 1228.cfi_def_cfa_register %rsp 1229.Lpower5_epilogue: 1230 .byte 0xf3,0xc3 1231.cfi_endproc 1232.size bn_power5,.-bn_power5 1233 1234.globl bn_sqr8x_internal 1235.hidden bn_sqr8x_internal 1236.hidden bn_sqr8x_internal 1237.type bn_sqr8x_internal,@function 1238.align 32 1239bn_sqr8x_internal: 1240__bn_sqr8x_internal: 1241.cfi_startproc 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 leaq 32(%r10),%rbp 1316 leaq (%rsi,%r9,1),%rsi 1317 1318 movq %r9,%rcx 1319 1320 1321 movq -32(%rsi,%rbp,1),%r14 1322 leaq 48+8(%rsp,%r9,2),%rdi 1323 movq -24(%rsi,%rbp,1),%rax 1324 leaq -32(%rdi,%rbp,1),%rdi 1325 movq -16(%rsi,%rbp,1),%rbx 1326 movq %rax,%r15 1327 1328 mulq %r14 1329 movq %rax,%r10 1330 movq %rbx,%rax 1331 movq %rdx,%r11 1332 movq %r10,-24(%rdi,%rbp,1) 1333 1334 mulq %r14 1335 addq %rax,%r11 1336 movq %rbx,%rax 1337 adcq $0,%rdx 1338 movq %r11,-16(%rdi,%rbp,1) 1339 movq %rdx,%r10 1340 1341 1342 movq -8(%rsi,%rbp,1),%rbx 1343 mulq %r15 1344 movq %rax,%r12 1345 movq %rbx,%rax 1346 movq %rdx,%r13 1347 1348 leaq (%rbp),%rcx 1349 mulq %r14 1350 addq %rax,%r10 1351 movq %rbx,%rax 1352 movq %rdx,%r11 1353 adcq $0,%r11 1354 addq %r12,%r10 1355 adcq $0,%r11 1356 movq %r10,-8(%rdi,%rcx,1) 1357 jmp .Lsqr4x_1st 1358 1359.align 32 1360.Lsqr4x_1st: 1361 movq (%rsi,%rcx,1),%rbx 1362 mulq %r15 1363 addq %rax,%r13 1364 movq %rbx,%rax 1365 movq %rdx,%r12 1366 adcq $0,%r12 1367 1368 mulq %r14 1369 addq %rax,%r11 1370 movq %rbx,%rax 1371 movq 8(%rsi,%rcx,1),%rbx 1372 movq %rdx,%r10 1373 adcq $0,%r10 1374 addq %r13,%r11 1375 adcq $0,%r10 1376 1377 1378 mulq %r15 1379 addq %rax,%r12 1380 movq %rbx,%rax 1381 movq %r11,(%rdi,%rcx,1) 1382 movq %rdx,%r13 1383 adcq $0,%r13 1384 1385 mulq %r14 1386 addq %rax,%r10 1387 movq %rbx,%rax 1388 movq 16(%rsi,%rcx,1),%rbx 1389 movq %rdx,%r11 1390 adcq $0,%r11 1391 addq %r12,%r10 1392 adcq $0,%r11 1393 1394 mulq %r15 1395 addq %rax,%r13 1396 movq %rbx,%rax 1397 movq %r10,8(%rdi,%rcx,1) 1398 movq %rdx,%r12 1399 adcq $0,%r12 1400 1401 mulq %r14 1402 addq %rax,%r11 1403 movq %rbx,%rax 1404 movq 24(%rsi,%rcx,1),%rbx 1405 movq %rdx,%r10 1406 adcq $0,%r10 1407 addq %r13,%r11 1408 adcq $0,%r10 1409 1410 1411 mulq %r15 1412 addq %rax,%r12 1413 movq %rbx,%rax 1414 movq %r11,16(%rdi,%rcx,1) 1415 movq %rdx,%r13 1416 adcq $0,%r13 1417 leaq 32(%rcx),%rcx 1418 1419 mulq %r14 1420 addq %rax,%r10 1421 movq %rbx,%rax 1422 movq %rdx,%r11 1423 adcq $0,%r11 1424 addq %r12,%r10 1425 adcq $0,%r11 1426 movq %r10,-8(%rdi,%rcx,1) 1427 1428 cmpq $0,%rcx 1429 jne .Lsqr4x_1st 1430 1431 mulq %r15 1432 addq %rax,%r13 1433 leaq 16(%rbp),%rbp 1434 adcq $0,%rdx 1435 addq %r11,%r13 1436 adcq $0,%rdx 1437 1438 movq %r13,(%rdi) 1439 movq %rdx,%r12 1440 movq %rdx,8(%rdi) 1441 jmp .Lsqr4x_outer 1442 1443.align 32 1444.Lsqr4x_outer: 1445 movq -32(%rsi,%rbp,1),%r14 1446 leaq 48+8(%rsp,%r9,2),%rdi 1447 movq -24(%rsi,%rbp,1),%rax 1448 leaq -32(%rdi,%rbp,1),%rdi 1449 movq -16(%rsi,%rbp,1),%rbx 1450 movq %rax,%r15 1451 1452 mulq %r14 1453 movq -24(%rdi,%rbp,1),%r10 1454 addq %rax,%r10 1455 movq %rbx,%rax 1456 adcq $0,%rdx 1457 movq %r10,-24(%rdi,%rbp,1) 1458 movq %rdx,%r11 1459 1460 mulq %r14 1461 addq %rax,%r11 1462 movq %rbx,%rax 1463 adcq $0,%rdx 1464 addq -16(%rdi,%rbp,1),%r11 1465 movq %rdx,%r10 1466 adcq $0,%r10 1467 movq %r11,-16(%rdi,%rbp,1) 1468 1469 xorq %r12,%r12 1470 1471 movq -8(%rsi,%rbp,1),%rbx 1472 mulq %r15 1473 addq %rax,%r12 1474 movq %rbx,%rax 1475 adcq $0,%rdx 1476 addq -8(%rdi,%rbp,1),%r12 1477 movq %rdx,%r13 1478 adcq $0,%r13 1479 1480 mulq %r14 1481 addq %rax,%r10 1482 movq %rbx,%rax 1483 adcq $0,%rdx 1484 addq %r12,%r10 1485 movq %rdx,%r11 1486 adcq $0,%r11 1487 movq %r10,-8(%rdi,%rbp,1) 1488 1489 leaq (%rbp),%rcx 1490 jmp .Lsqr4x_inner 1491 1492.align 32 1493.Lsqr4x_inner: 1494 movq (%rsi,%rcx,1),%rbx 1495 mulq %r15 1496 addq %rax,%r13 1497 movq %rbx,%rax 1498 movq %rdx,%r12 1499 adcq $0,%r12 1500 addq (%rdi,%rcx,1),%r13 1501 adcq $0,%r12 1502 1503.byte 0x67 1504 mulq %r14 1505 addq %rax,%r11 1506 movq %rbx,%rax 1507 movq 8(%rsi,%rcx,1),%rbx 1508 movq %rdx,%r10 1509 adcq $0,%r10 1510 addq %r13,%r11 1511 adcq $0,%r10 1512 1513 mulq %r15 1514 addq %rax,%r12 1515 movq %r11,(%rdi,%rcx,1) 1516 movq %rbx,%rax 1517 movq %rdx,%r13 1518 adcq $0,%r13 1519 addq 8(%rdi,%rcx,1),%r12 1520 leaq 16(%rcx),%rcx 1521 adcq $0,%r13 1522 1523 mulq %r14 1524 addq %rax,%r10 1525 movq %rbx,%rax 1526 adcq $0,%rdx 1527 addq %r12,%r10 1528 movq %rdx,%r11 1529 adcq $0,%r11 1530 movq %r10,-8(%rdi,%rcx,1) 1531 1532 cmpq $0,%rcx 1533 jne .Lsqr4x_inner 1534 1535.byte 0x67 1536 mulq %r15 1537 addq %rax,%r13 1538 adcq $0,%rdx 1539 addq %r11,%r13 1540 adcq $0,%rdx 1541 1542 movq %r13,(%rdi) 1543 movq %rdx,%r12 1544 movq %rdx,8(%rdi) 1545 1546 addq $16,%rbp 1547 jnz .Lsqr4x_outer 1548 1549 1550 movq -32(%rsi),%r14 1551 leaq 48+8(%rsp,%r9,2),%rdi 1552 movq -24(%rsi),%rax 1553 leaq -32(%rdi,%rbp,1),%rdi 1554 movq -16(%rsi),%rbx 1555 movq %rax,%r15 1556 1557 mulq %r14 1558 addq %rax,%r10 1559 movq %rbx,%rax 1560 movq %rdx,%r11 1561 adcq $0,%r11 1562 1563 mulq %r14 1564 addq %rax,%r11 1565 movq %rbx,%rax 1566 movq %r10,-24(%rdi) 1567 movq %rdx,%r10 1568 adcq $0,%r10 1569 addq %r13,%r11 1570 movq -8(%rsi),%rbx 1571 adcq $0,%r10 1572 1573 mulq %r15 1574 addq %rax,%r12 1575 movq %rbx,%rax 1576 movq %r11,-16(%rdi) 1577 movq %rdx,%r13 1578 adcq $0,%r13 1579 1580 mulq %r14 1581 addq %rax,%r10 1582 movq %rbx,%rax 1583 movq %rdx,%r11 1584 adcq $0,%r11 1585 addq %r12,%r10 1586 adcq $0,%r11 1587 movq %r10,-8(%rdi) 1588 1589 mulq %r15 1590 addq %rax,%r13 1591 movq -16(%rsi),%rax 1592 adcq $0,%rdx 1593 addq %r11,%r13 1594 adcq $0,%rdx 1595 1596 movq %r13,(%rdi) 1597 movq %rdx,%r12 1598 movq %rdx,8(%rdi) 1599 1600 mulq %rbx 1601 addq $16,%rbp 1602 xorq %r14,%r14 1603 subq %r9,%rbp 1604 xorq %r15,%r15 1605 1606 addq %r12,%rax 1607 adcq $0,%rdx 1608 movq %rax,8(%rdi) 1609 movq %rdx,16(%rdi) 1610 movq %r15,24(%rdi) 1611 1612 movq -16(%rsi,%rbp,1),%rax 1613 leaq 48+8(%rsp),%rdi 1614 xorq %r10,%r10 1615 movq 8(%rdi),%r11 1616 1617 leaq (%r14,%r10,2),%r12 1618 shrq $63,%r10 1619 leaq (%rcx,%r11,2),%r13 1620 shrq $63,%r11 1621 orq %r10,%r13 1622 movq 16(%rdi),%r10 1623 movq %r11,%r14 1624 mulq %rax 1625 negq %r15 1626 movq 24(%rdi),%r11 1627 adcq %rax,%r12 1628 movq -8(%rsi,%rbp,1),%rax 1629 movq %r12,(%rdi) 1630 adcq %rdx,%r13 1631 1632 leaq (%r14,%r10,2),%rbx 1633 movq %r13,8(%rdi) 1634 sbbq %r15,%r15 1635 shrq $63,%r10 1636 leaq (%rcx,%r11,2),%r8 1637 shrq $63,%r11 1638 orq %r10,%r8 1639 movq 32(%rdi),%r10 1640 movq %r11,%r14 1641 mulq %rax 1642 negq %r15 1643 movq 40(%rdi),%r11 1644 adcq %rax,%rbx 1645 movq 0(%rsi,%rbp,1),%rax 1646 movq %rbx,16(%rdi) 1647 adcq %rdx,%r8 1648 leaq 16(%rbp),%rbp 1649 movq %r8,24(%rdi) 1650 sbbq %r15,%r15 1651 leaq 64(%rdi),%rdi 1652 jmp .Lsqr4x_shift_n_add 1653 1654.align 32 1655.Lsqr4x_shift_n_add: 1656 leaq (%r14,%r10,2),%r12 1657 shrq $63,%r10 1658 leaq (%rcx,%r11,2),%r13 1659 shrq $63,%r11 1660 orq %r10,%r13 1661 movq -16(%rdi),%r10 1662 movq %r11,%r14 1663 mulq %rax 1664 negq %r15 1665 movq -8(%rdi),%r11 1666 adcq %rax,%r12 1667 movq -8(%rsi,%rbp,1),%rax 1668 movq %r12,-32(%rdi) 1669 adcq %rdx,%r13 1670 1671 leaq (%r14,%r10,2),%rbx 1672 movq %r13,-24(%rdi) 1673 sbbq %r15,%r15 1674 shrq $63,%r10 1675 leaq (%rcx,%r11,2),%r8 1676 shrq $63,%r11 1677 orq %r10,%r8 1678 movq 0(%rdi),%r10 1679 movq %r11,%r14 1680 mulq %rax 1681 negq %r15 1682 movq 8(%rdi),%r11 1683 adcq %rax,%rbx 1684 movq 0(%rsi,%rbp,1),%rax 1685 movq %rbx,-16(%rdi) 1686 adcq %rdx,%r8 1687 1688 leaq (%r14,%r10,2),%r12 1689 movq %r8,-8(%rdi) 1690 sbbq %r15,%r15 1691 shrq $63,%r10 1692 leaq (%rcx,%r11,2),%r13 1693 shrq $63,%r11 1694 orq %r10,%r13 1695 movq 16(%rdi),%r10 1696 movq %r11,%r14 1697 mulq %rax 1698 negq %r15 1699 movq 24(%rdi),%r11 1700 adcq %rax,%r12 1701 movq 8(%rsi,%rbp,1),%rax 1702 movq %r12,0(%rdi) 1703 adcq %rdx,%r13 1704 1705 leaq (%r14,%r10,2),%rbx 1706 movq %r13,8(%rdi) 1707 sbbq %r15,%r15 1708 shrq $63,%r10 1709 leaq (%rcx,%r11,2),%r8 1710 shrq $63,%r11 1711 orq %r10,%r8 1712 movq 32(%rdi),%r10 1713 movq %r11,%r14 1714 mulq %rax 1715 negq %r15 1716 movq 40(%rdi),%r11 1717 adcq %rax,%rbx 1718 movq 16(%rsi,%rbp,1),%rax 1719 movq %rbx,16(%rdi) 1720 adcq %rdx,%r8 1721 movq %r8,24(%rdi) 1722 sbbq %r15,%r15 1723 leaq 64(%rdi),%rdi 1724 addq $32,%rbp 1725 jnz .Lsqr4x_shift_n_add 1726 1727 leaq (%r14,%r10,2),%r12 1728.byte 0x67 1729 shrq $63,%r10 1730 leaq (%rcx,%r11,2),%r13 1731 shrq $63,%r11 1732 orq %r10,%r13 1733 movq -16(%rdi),%r10 1734 movq %r11,%r14 1735 mulq %rax 1736 negq %r15 1737 movq -8(%rdi),%r11 1738 adcq %rax,%r12 1739 movq -8(%rsi),%rax 1740 movq %r12,-32(%rdi) 1741 adcq %rdx,%r13 1742 1743 leaq (%r14,%r10,2),%rbx 1744 movq %r13,-24(%rdi) 1745 sbbq %r15,%r15 1746 shrq $63,%r10 1747 leaq (%rcx,%r11,2),%r8 1748 shrq $63,%r11 1749 orq %r10,%r8 1750 mulq %rax 1751 negq %r15 1752 adcq %rax,%rbx 1753 adcq %rdx,%r8 1754 movq %rbx,-16(%rdi) 1755 movq %r8,-8(%rdi) 1756.byte 102,72,15,126,213 1757__bn_sqr8x_reduction: 1758 xorq %rax,%rax 1759 leaq (%r9,%rbp,1),%rcx 1760 leaq 48+8(%rsp,%r9,2),%rdx 1761 movq %rcx,0+8(%rsp) 1762 leaq 48+8(%rsp,%r9,1),%rdi 1763 movq %rdx,8+8(%rsp) 1764 negq %r9 1765 jmp .L8x_reduction_loop 1766 1767.align 32 1768.L8x_reduction_loop: 1769 leaq (%rdi,%r9,1),%rdi 1770.byte 0x66 1771 movq 0(%rdi),%rbx 1772 movq 8(%rdi),%r9 1773 movq 16(%rdi),%r10 1774 movq 24(%rdi),%r11 1775 movq 32(%rdi),%r12 1776 movq 40(%rdi),%r13 1777 movq 48(%rdi),%r14 1778 movq 56(%rdi),%r15 1779 movq %rax,(%rdx) 1780 leaq 64(%rdi),%rdi 1781 1782.byte 0x67 1783 movq %rbx,%r8 1784 imulq 32+8(%rsp),%rbx 1785 movq 0(%rbp),%rax 1786 movl $8,%ecx 1787 jmp .L8x_reduce 1788 1789.align 32 1790.L8x_reduce: 1791 mulq %rbx 1792 movq 8(%rbp),%rax 1793 negq %r8 1794 movq %rdx,%r8 1795 adcq $0,%r8 1796 1797 mulq %rbx 1798 addq %rax,%r9 1799 movq 16(%rbp),%rax 1800 adcq $0,%rdx 1801 addq %r9,%r8 1802 movq %rbx,48-8+8(%rsp,%rcx,8) 1803 movq %rdx,%r9 1804 adcq $0,%r9 1805 1806 mulq %rbx 1807 addq %rax,%r10 1808 movq 24(%rbp),%rax 1809 adcq $0,%rdx 1810 addq %r10,%r9 1811 movq 32+8(%rsp),%rsi 1812 movq %rdx,%r10 1813 adcq $0,%r10 1814 1815 mulq %rbx 1816 addq %rax,%r11 1817 movq 32(%rbp),%rax 1818 adcq $0,%rdx 1819 imulq %r8,%rsi 1820 addq %r11,%r10 1821 movq %rdx,%r11 1822 adcq $0,%r11 1823 1824 mulq %rbx 1825 addq %rax,%r12 1826 movq 40(%rbp),%rax 1827 adcq $0,%rdx 1828 addq %r12,%r11 1829 movq %rdx,%r12 1830 adcq $0,%r12 1831 1832 mulq %rbx 1833 addq %rax,%r13 1834 movq 48(%rbp),%rax 1835 adcq $0,%rdx 1836 addq %r13,%r12 1837 movq %rdx,%r13 1838 adcq $0,%r13 1839 1840 mulq %rbx 1841 addq %rax,%r14 1842 movq 56(%rbp),%rax 1843 adcq $0,%rdx 1844 addq %r14,%r13 1845 movq %rdx,%r14 1846 adcq $0,%r14 1847 1848 mulq %rbx 1849 movq %rsi,%rbx 1850 addq %rax,%r15 1851 movq 0(%rbp),%rax 1852 adcq $0,%rdx 1853 addq %r15,%r14 1854 movq %rdx,%r15 1855 adcq $0,%r15 1856 1857 decl %ecx 1858 jnz .L8x_reduce 1859 1860 leaq 64(%rbp),%rbp 1861 xorq %rax,%rax 1862 movq 8+8(%rsp),%rdx 1863 cmpq 0+8(%rsp),%rbp 1864 jae .L8x_no_tail 1865 1866.byte 0x66 1867 addq 0(%rdi),%r8 1868 adcq 8(%rdi),%r9 1869 adcq 16(%rdi),%r10 1870 adcq 24(%rdi),%r11 1871 adcq 32(%rdi),%r12 1872 adcq 40(%rdi),%r13 1873 adcq 48(%rdi),%r14 1874 adcq 56(%rdi),%r15 1875 sbbq %rsi,%rsi 1876 1877 movq 48+56+8(%rsp),%rbx 1878 movl $8,%ecx 1879 movq 0(%rbp),%rax 1880 jmp .L8x_tail 1881 1882.align 32 1883.L8x_tail: 1884 mulq %rbx 1885 addq %rax,%r8 1886 movq 8(%rbp),%rax 1887 movq %r8,(%rdi) 1888 movq %rdx,%r8 1889 adcq $0,%r8 1890 1891 mulq %rbx 1892 addq %rax,%r9 1893 movq 16(%rbp),%rax 1894 adcq $0,%rdx 1895 addq %r9,%r8 1896 leaq 8(%rdi),%rdi 1897 movq %rdx,%r9 1898 adcq $0,%r9 1899 1900 mulq %rbx 1901 addq %rax,%r10 1902 movq 24(%rbp),%rax 1903 adcq $0,%rdx 1904 addq %r10,%r9 1905 movq %rdx,%r10 1906 adcq $0,%r10 1907 1908 mulq %rbx 1909 addq %rax,%r11 1910 movq 32(%rbp),%rax 1911 adcq $0,%rdx 1912 addq %r11,%r10 1913 movq %rdx,%r11 1914 adcq $0,%r11 1915 1916 mulq %rbx 1917 addq %rax,%r12 1918 movq 40(%rbp),%rax 1919 adcq $0,%rdx 1920 addq %r12,%r11 1921 movq %rdx,%r12 1922 adcq $0,%r12 1923 1924 mulq %rbx 1925 addq %rax,%r13 1926 movq 48(%rbp),%rax 1927 adcq $0,%rdx 1928 addq %r13,%r12 1929 movq %rdx,%r13 1930 adcq $0,%r13 1931 1932 mulq %rbx 1933 addq %rax,%r14 1934 movq 56(%rbp),%rax 1935 adcq $0,%rdx 1936 addq %r14,%r13 1937 movq %rdx,%r14 1938 adcq $0,%r14 1939 1940 mulq %rbx 1941 movq 48-16+8(%rsp,%rcx,8),%rbx 1942 addq %rax,%r15 1943 adcq $0,%rdx 1944 addq %r15,%r14 1945 movq 0(%rbp),%rax 1946 movq %rdx,%r15 1947 adcq $0,%r15 1948 1949 decl %ecx 1950 jnz .L8x_tail 1951 1952 leaq 64(%rbp),%rbp 1953 movq 8+8(%rsp),%rdx 1954 cmpq 0+8(%rsp),%rbp 1955 jae .L8x_tail_done 1956 1957 movq 48+56+8(%rsp),%rbx 1958 negq %rsi 1959 movq 0(%rbp),%rax 1960 adcq 0(%rdi),%r8 1961 adcq 8(%rdi),%r9 1962 adcq 16(%rdi),%r10 1963 adcq 24(%rdi),%r11 1964 adcq 32(%rdi),%r12 1965 adcq 40(%rdi),%r13 1966 adcq 48(%rdi),%r14 1967 adcq 56(%rdi),%r15 1968 sbbq %rsi,%rsi 1969 1970 movl $8,%ecx 1971 jmp .L8x_tail 1972 1973.align 32 1974.L8x_tail_done: 1975 xorq %rax,%rax 1976 addq (%rdx),%r8 1977 adcq $0,%r9 1978 adcq $0,%r10 1979 adcq $0,%r11 1980 adcq $0,%r12 1981 adcq $0,%r13 1982 adcq $0,%r14 1983 adcq $0,%r15 1984 adcq $0,%rax 1985 1986 negq %rsi 1987.L8x_no_tail: 1988 adcq 0(%rdi),%r8 1989 adcq 8(%rdi),%r9 1990 adcq 16(%rdi),%r10 1991 adcq 24(%rdi),%r11 1992 adcq 32(%rdi),%r12 1993 adcq 40(%rdi),%r13 1994 adcq 48(%rdi),%r14 1995 adcq 56(%rdi),%r15 1996 adcq $0,%rax 1997 movq -8(%rbp),%rcx 1998 xorq %rsi,%rsi 1999 2000.byte 102,72,15,126,213 2001 2002 movq %r8,0(%rdi) 2003 movq %r9,8(%rdi) 2004.byte 102,73,15,126,217 2005 movq %r10,16(%rdi) 2006 movq %r11,24(%rdi) 2007 movq %r12,32(%rdi) 2008 movq %r13,40(%rdi) 2009 movq %r14,48(%rdi) 2010 movq %r15,56(%rdi) 2011 leaq 64(%rdi),%rdi 2012 2013 cmpq %rdx,%rdi 2014 jb .L8x_reduction_loop 2015 .byte 0xf3,0xc3 2016.cfi_endproc 2017.size bn_sqr8x_internal,.-bn_sqr8x_internal 2018.type __bn_post4x_internal,@function 2019.align 32 2020__bn_post4x_internal: 2021.cfi_startproc 2022 movq 0(%rbp),%r12 2023 leaq (%rdi,%r9,1),%rbx 2024 movq %r9,%rcx 2025.byte 102,72,15,126,207 2026 negq %rax 2027.byte 102,72,15,126,206 2028 sarq $3+2,%rcx 2029 decq %r12 2030 xorq %r10,%r10 2031 movq 8(%rbp),%r13 2032 movq 16(%rbp),%r14 2033 movq 24(%rbp),%r15 2034 jmp .Lsqr4x_sub_entry 2035 2036.align 16 2037.Lsqr4x_sub: 2038 movq 0(%rbp),%r12 2039 movq 8(%rbp),%r13 2040 movq 16(%rbp),%r14 2041 movq 24(%rbp),%r15 2042.Lsqr4x_sub_entry: 2043 leaq 32(%rbp),%rbp 2044 notq %r12 2045 notq %r13 2046 notq %r14 2047 notq %r15 2048 andq %rax,%r12 2049 andq %rax,%r13 2050 andq %rax,%r14 2051 andq %rax,%r15 2052 2053 negq %r10 2054 adcq 0(%rbx),%r12 2055 adcq 8(%rbx),%r13 2056 adcq 16(%rbx),%r14 2057 adcq 24(%rbx),%r15 2058 movq %r12,0(%rdi) 2059 leaq 32(%rbx),%rbx 2060 movq %r13,8(%rdi) 2061 sbbq %r10,%r10 2062 movq %r14,16(%rdi) 2063 movq %r15,24(%rdi) 2064 leaq 32(%rdi),%rdi 2065 2066 incq %rcx 2067 jnz .Lsqr4x_sub 2068 2069 movq %r9,%r10 2070 negq %r9 2071 .byte 0xf3,0xc3 2072.cfi_endproc 2073.size __bn_post4x_internal,.-__bn_post4x_internal 2074.type bn_mulx4x_mont_gather5,@function 2075.align 32 2076bn_mulx4x_mont_gather5: 2077.cfi_startproc 2078 movq %rsp,%rax 2079.cfi_def_cfa_register %rax 2080.Lmulx4x_enter: 2081 pushq %rbx 2082.cfi_offset %rbx,-16 2083 pushq %rbp 2084.cfi_offset %rbp,-24 2085 pushq %r12 2086.cfi_offset %r12,-32 2087 pushq %r13 2088.cfi_offset %r13,-40 2089 pushq %r14 2090.cfi_offset %r14,-48 2091 pushq %r15 2092.cfi_offset %r15,-56 2093.Lmulx4x_prologue: 2094 2095 shll $3,%r9d 2096 leaq (%r9,%r9,2),%r10 2097 negq %r9 2098 movq (%r8),%r8 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 leaq -320(%rsp,%r9,2),%r11 2110 movq %rsp,%rbp 2111 subq %rdi,%r11 2112 andq $4095,%r11 2113 cmpq %r11,%r10 2114 jb .Lmulx4xsp_alt 2115 subq %r11,%rbp 2116 leaq -320(%rbp,%r9,2),%rbp 2117 jmp .Lmulx4xsp_done 2118 2119.Lmulx4xsp_alt: 2120 leaq 4096-320(,%r9,2),%r10 2121 leaq -320(%rbp,%r9,2),%rbp 2122 subq %r10,%r11 2123 movq $0,%r10 2124 cmovcq %r10,%r11 2125 subq %r11,%rbp 2126.Lmulx4xsp_done: 2127 andq $-64,%rbp 2128 movq %rsp,%r11 2129 subq %rbp,%r11 2130 andq $-4096,%r11 2131 leaq (%r11,%rbp,1),%rsp 2132 movq (%rsp),%r10 2133 cmpq %rbp,%rsp 2134 ja .Lmulx4x_page_walk 2135 jmp .Lmulx4x_page_walk_done 2136 2137.Lmulx4x_page_walk: 2138 leaq -4096(%rsp),%rsp 2139 movq (%rsp),%r10 2140 cmpq %rbp,%rsp 2141 ja .Lmulx4x_page_walk 2142.Lmulx4x_page_walk_done: 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 movq %r8,32(%rsp) 2157 movq %rax,40(%rsp) 2158.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2159.Lmulx4x_body: 2160 call mulx4x_internal 2161 2162 movq 40(%rsp),%rsi 2163.cfi_def_cfa %rsi,8 2164 movq $1,%rax 2165 2166 movq -48(%rsi),%r15 2167.cfi_restore %r15 2168 movq -40(%rsi),%r14 2169.cfi_restore %r14 2170 movq -32(%rsi),%r13 2171.cfi_restore %r13 2172 movq -24(%rsi),%r12 2173.cfi_restore %r12 2174 movq -16(%rsi),%rbp 2175.cfi_restore %rbp 2176 movq -8(%rsi),%rbx 2177.cfi_restore %rbx 2178 leaq (%rsi),%rsp 2179.cfi_def_cfa_register %rsp 2180.Lmulx4x_epilogue: 2181 .byte 0xf3,0xc3 2182.cfi_endproc 2183.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2184 2185.type mulx4x_internal,@function 2186.align 32 2187mulx4x_internal: 2188.cfi_startproc 2189 movq %r9,8(%rsp) 2190 movq %r9,%r10 2191 negq %r9 2192 shlq $5,%r9 2193 negq %r10 2194 leaq 128(%rdx,%r9,1),%r13 2195 shrq $5+5,%r9 2196 movd 8(%rax),%xmm5 2197 subq $1,%r9 2198 leaq .Linc(%rip),%rax 2199 movq %r13,16+8(%rsp) 2200 movq %r9,24+8(%rsp) 2201 movq %rdi,56+8(%rsp) 2202 movdqa 0(%rax),%xmm0 2203 movdqa 16(%rax),%xmm1 2204 leaq 88-112(%rsp,%r10,1),%r10 2205 leaq 128(%rdx),%rdi 2206 2207 pshufd $0,%xmm5,%xmm5 2208 movdqa %xmm1,%xmm4 2209.byte 0x67 2210 movdqa %xmm1,%xmm2 2211.byte 0x67 2212 paddd %xmm0,%xmm1 2213 pcmpeqd %xmm5,%xmm0 2214 movdqa %xmm4,%xmm3 2215 paddd %xmm1,%xmm2 2216 pcmpeqd %xmm5,%xmm1 2217 movdqa %xmm0,112(%r10) 2218 movdqa %xmm4,%xmm0 2219 2220 paddd %xmm2,%xmm3 2221 pcmpeqd %xmm5,%xmm2 2222 movdqa %xmm1,128(%r10) 2223 movdqa %xmm4,%xmm1 2224 2225 paddd %xmm3,%xmm0 2226 pcmpeqd %xmm5,%xmm3 2227 movdqa %xmm2,144(%r10) 2228 movdqa %xmm4,%xmm2 2229 2230 paddd %xmm0,%xmm1 2231 pcmpeqd %xmm5,%xmm0 2232 movdqa %xmm3,160(%r10) 2233 movdqa %xmm4,%xmm3 2234 paddd %xmm1,%xmm2 2235 pcmpeqd %xmm5,%xmm1 2236 movdqa %xmm0,176(%r10) 2237 movdqa %xmm4,%xmm0 2238 2239 paddd %xmm2,%xmm3 2240 pcmpeqd %xmm5,%xmm2 2241 movdqa %xmm1,192(%r10) 2242 movdqa %xmm4,%xmm1 2243 2244 paddd %xmm3,%xmm0 2245 pcmpeqd %xmm5,%xmm3 2246 movdqa %xmm2,208(%r10) 2247 movdqa %xmm4,%xmm2 2248 2249 paddd %xmm0,%xmm1 2250 pcmpeqd %xmm5,%xmm0 2251 movdqa %xmm3,224(%r10) 2252 movdqa %xmm4,%xmm3 2253 paddd %xmm1,%xmm2 2254 pcmpeqd %xmm5,%xmm1 2255 movdqa %xmm0,240(%r10) 2256 movdqa %xmm4,%xmm0 2257 2258 paddd %xmm2,%xmm3 2259 pcmpeqd %xmm5,%xmm2 2260 movdqa %xmm1,256(%r10) 2261 movdqa %xmm4,%xmm1 2262 2263 paddd %xmm3,%xmm0 2264 pcmpeqd %xmm5,%xmm3 2265 movdqa %xmm2,272(%r10) 2266 movdqa %xmm4,%xmm2 2267 2268 paddd %xmm0,%xmm1 2269 pcmpeqd %xmm5,%xmm0 2270 movdqa %xmm3,288(%r10) 2271 movdqa %xmm4,%xmm3 2272.byte 0x67 2273 paddd %xmm1,%xmm2 2274 pcmpeqd %xmm5,%xmm1 2275 movdqa %xmm0,304(%r10) 2276 2277 paddd %xmm2,%xmm3 2278 pcmpeqd %xmm5,%xmm2 2279 movdqa %xmm1,320(%r10) 2280 2281 pcmpeqd %xmm5,%xmm3 2282 movdqa %xmm2,336(%r10) 2283 2284 pand 64(%rdi),%xmm0 2285 pand 80(%rdi),%xmm1 2286 pand 96(%rdi),%xmm2 2287 movdqa %xmm3,352(%r10) 2288 pand 112(%rdi),%xmm3 2289 por %xmm2,%xmm0 2290 por %xmm3,%xmm1 2291 movdqa -128(%rdi),%xmm4 2292 movdqa -112(%rdi),%xmm5 2293 movdqa -96(%rdi),%xmm2 2294 pand 112(%r10),%xmm4 2295 movdqa -80(%rdi),%xmm3 2296 pand 128(%r10),%xmm5 2297 por %xmm4,%xmm0 2298 pand 144(%r10),%xmm2 2299 por %xmm5,%xmm1 2300 pand 160(%r10),%xmm3 2301 por %xmm2,%xmm0 2302 por %xmm3,%xmm1 2303 movdqa -64(%rdi),%xmm4 2304 movdqa -48(%rdi),%xmm5 2305 movdqa -32(%rdi),%xmm2 2306 pand 176(%r10),%xmm4 2307 movdqa -16(%rdi),%xmm3 2308 pand 192(%r10),%xmm5 2309 por %xmm4,%xmm0 2310 pand 208(%r10),%xmm2 2311 por %xmm5,%xmm1 2312 pand 224(%r10),%xmm3 2313 por %xmm2,%xmm0 2314 por %xmm3,%xmm1 2315 movdqa 0(%rdi),%xmm4 2316 movdqa 16(%rdi),%xmm5 2317 movdqa 32(%rdi),%xmm2 2318 pand 240(%r10),%xmm4 2319 movdqa 48(%rdi),%xmm3 2320 pand 256(%r10),%xmm5 2321 por %xmm4,%xmm0 2322 pand 272(%r10),%xmm2 2323 por %xmm5,%xmm1 2324 pand 288(%r10),%xmm3 2325 por %xmm2,%xmm0 2326 por %xmm3,%xmm1 2327 pxor %xmm1,%xmm0 2328 2329 pshufd $0x4e,%xmm0,%xmm1 2330 por %xmm1,%xmm0 2331 leaq 256(%rdi),%rdi 2332.byte 102,72,15,126,194 2333 leaq 64+32+8(%rsp),%rbx 2334 2335 movq %rdx,%r9 2336 mulxq 0(%rsi),%r8,%rax 2337 mulxq 8(%rsi),%r11,%r12 2338 addq %rax,%r11 2339 mulxq 16(%rsi),%rax,%r13 2340 adcq %rax,%r12 2341 adcq $0,%r13 2342 mulxq 24(%rsi),%rax,%r14 2343 2344 movq %r8,%r15 2345 imulq 32+8(%rsp),%r8 2346 xorq %rbp,%rbp 2347 movq %r8,%rdx 2348 2349 movq %rdi,8+8(%rsp) 2350 2351 leaq 32(%rsi),%rsi 2352 adcxq %rax,%r13 2353 adcxq %rbp,%r14 2354 2355 mulxq 0(%rcx),%rax,%r10 2356 adcxq %rax,%r15 2357 adoxq %r11,%r10 2358 mulxq 8(%rcx),%rax,%r11 2359 adcxq %rax,%r10 2360 adoxq %r12,%r11 2361 mulxq 16(%rcx),%rax,%r12 2362 movq 24+8(%rsp),%rdi 2363 movq %r10,-32(%rbx) 2364 adcxq %rax,%r11 2365 adoxq %r13,%r12 2366 mulxq 24(%rcx),%rax,%r15 2367 movq %r9,%rdx 2368 movq %r11,-24(%rbx) 2369 adcxq %rax,%r12 2370 adoxq %rbp,%r15 2371 leaq 32(%rcx),%rcx 2372 movq %r12,-16(%rbx) 2373 jmp .Lmulx4x_1st 2374 2375.align 32 2376.Lmulx4x_1st: 2377 adcxq %rbp,%r15 2378 mulxq 0(%rsi),%r10,%rax 2379 adcxq %r14,%r10 2380 mulxq 8(%rsi),%r11,%r14 2381 adcxq %rax,%r11 2382 mulxq 16(%rsi),%r12,%rax 2383 adcxq %r14,%r12 2384 mulxq 24(%rsi),%r13,%r14 2385.byte 0x67,0x67 2386 movq %r8,%rdx 2387 adcxq %rax,%r13 2388 adcxq %rbp,%r14 2389 leaq 32(%rsi),%rsi 2390 leaq 32(%rbx),%rbx 2391 2392 adoxq %r15,%r10 2393 mulxq 0(%rcx),%rax,%r15 2394 adcxq %rax,%r10 2395 adoxq %r15,%r11 2396 mulxq 8(%rcx),%rax,%r15 2397 adcxq %rax,%r11 2398 adoxq %r15,%r12 2399 mulxq 16(%rcx),%rax,%r15 2400 movq %r10,-40(%rbx) 2401 adcxq %rax,%r12 2402 movq %r11,-32(%rbx) 2403 adoxq %r15,%r13 2404 mulxq 24(%rcx),%rax,%r15 2405 movq %r9,%rdx 2406 movq %r12,-24(%rbx) 2407 adcxq %rax,%r13 2408 adoxq %rbp,%r15 2409 leaq 32(%rcx),%rcx 2410 movq %r13,-16(%rbx) 2411 2412 decq %rdi 2413 jnz .Lmulx4x_1st 2414 2415 movq 8(%rsp),%rax 2416 adcq %rbp,%r15 2417 leaq (%rsi,%rax,1),%rsi 2418 addq %r15,%r14 2419 movq 8+8(%rsp),%rdi 2420 adcq %rbp,%rbp 2421 movq %r14,-8(%rbx) 2422 jmp .Lmulx4x_outer 2423 2424.align 32 2425.Lmulx4x_outer: 2426 leaq 16-256(%rbx),%r10 2427 pxor %xmm4,%xmm4 2428.byte 0x67,0x67 2429 pxor %xmm5,%xmm5 2430 movdqa -128(%rdi),%xmm0 2431 movdqa -112(%rdi),%xmm1 2432 movdqa -96(%rdi),%xmm2 2433 pand 256(%r10),%xmm0 2434 movdqa -80(%rdi),%xmm3 2435 pand 272(%r10),%xmm1 2436 por %xmm0,%xmm4 2437 pand 288(%r10),%xmm2 2438 por %xmm1,%xmm5 2439 pand 304(%r10),%xmm3 2440 por %xmm2,%xmm4 2441 por %xmm3,%xmm5 2442 movdqa -64(%rdi),%xmm0 2443 movdqa -48(%rdi),%xmm1 2444 movdqa -32(%rdi),%xmm2 2445 pand 320(%r10),%xmm0 2446 movdqa -16(%rdi),%xmm3 2447 pand 336(%r10),%xmm1 2448 por %xmm0,%xmm4 2449 pand 352(%r10),%xmm2 2450 por %xmm1,%xmm5 2451 pand 368(%r10),%xmm3 2452 por %xmm2,%xmm4 2453 por %xmm3,%xmm5 2454 movdqa 0(%rdi),%xmm0 2455 movdqa 16(%rdi),%xmm1 2456 movdqa 32(%rdi),%xmm2 2457 pand 384(%r10),%xmm0 2458 movdqa 48(%rdi),%xmm3 2459 pand 400(%r10),%xmm1 2460 por %xmm0,%xmm4 2461 pand 416(%r10),%xmm2 2462 por %xmm1,%xmm5 2463 pand 432(%r10),%xmm3 2464 por %xmm2,%xmm4 2465 por %xmm3,%xmm5 2466 movdqa 64(%rdi),%xmm0 2467 movdqa 80(%rdi),%xmm1 2468 movdqa 96(%rdi),%xmm2 2469 pand 448(%r10),%xmm0 2470 movdqa 112(%rdi),%xmm3 2471 pand 464(%r10),%xmm1 2472 por %xmm0,%xmm4 2473 pand 480(%r10),%xmm2 2474 por %xmm1,%xmm5 2475 pand 496(%r10),%xmm3 2476 por %xmm2,%xmm4 2477 por %xmm3,%xmm5 2478 por %xmm5,%xmm4 2479 2480 pshufd $0x4e,%xmm4,%xmm0 2481 por %xmm4,%xmm0 2482 leaq 256(%rdi),%rdi 2483.byte 102,72,15,126,194 2484 2485 movq %rbp,(%rbx) 2486 leaq 32(%rbx,%rax,1),%rbx 2487 mulxq 0(%rsi),%r8,%r11 2488 xorq %rbp,%rbp 2489 movq %rdx,%r9 2490 mulxq 8(%rsi),%r14,%r12 2491 adoxq -32(%rbx),%r8 2492 adcxq %r14,%r11 2493 mulxq 16(%rsi),%r15,%r13 2494 adoxq -24(%rbx),%r11 2495 adcxq %r15,%r12 2496 mulxq 24(%rsi),%rdx,%r14 2497 adoxq -16(%rbx),%r12 2498 adcxq %rdx,%r13 2499 leaq (%rcx,%rax,1),%rcx 2500 leaq 32(%rsi),%rsi 2501 adoxq -8(%rbx),%r13 2502 adcxq %rbp,%r14 2503 adoxq %rbp,%r14 2504 2505 movq %r8,%r15 2506 imulq 32+8(%rsp),%r8 2507 2508 movq %r8,%rdx 2509 xorq %rbp,%rbp 2510 movq %rdi,8+8(%rsp) 2511 2512 mulxq 0(%rcx),%rax,%r10 2513 adcxq %rax,%r15 2514 adoxq %r11,%r10 2515 mulxq 8(%rcx),%rax,%r11 2516 adcxq %rax,%r10 2517 adoxq %r12,%r11 2518 mulxq 16(%rcx),%rax,%r12 2519 adcxq %rax,%r11 2520 adoxq %r13,%r12 2521 mulxq 24(%rcx),%rax,%r15 2522 movq %r9,%rdx 2523 movq 24+8(%rsp),%rdi 2524 movq %r10,-32(%rbx) 2525 adcxq %rax,%r12 2526 movq %r11,-24(%rbx) 2527 adoxq %rbp,%r15 2528 movq %r12,-16(%rbx) 2529 leaq 32(%rcx),%rcx 2530 jmp .Lmulx4x_inner 2531 2532.align 32 2533.Lmulx4x_inner: 2534 mulxq 0(%rsi),%r10,%rax 2535 adcxq %rbp,%r15 2536 adoxq %r14,%r10 2537 mulxq 8(%rsi),%r11,%r14 2538 adcxq 0(%rbx),%r10 2539 adoxq %rax,%r11 2540 mulxq 16(%rsi),%r12,%rax 2541 adcxq 8(%rbx),%r11 2542 adoxq %r14,%r12 2543 mulxq 24(%rsi),%r13,%r14 2544 movq %r8,%rdx 2545 adcxq 16(%rbx),%r12 2546 adoxq %rax,%r13 2547 adcxq 24(%rbx),%r13 2548 adoxq %rbp,%r14 2549 leaq 32(%rsi),%rsi 2550 leaq 32(%rbx),%rbx 2551 adcxq %rbp,%r14 2552 2553 adoxq %r15,%r10 2554 mulxq 0(%rcx),%rax,%r15 2555 adcxq %rax,%r10 2556 adoxq %r15,%r11 2557 mulxq 8(%rcx),%rax,%r15 2558 adcxq %rax,%r11 2559 adoxq %r15,%r12 2560 mulxq 16(%rcx),%rax,%r15 2561 movq %r10,-40(%rbx) 2562 adcxq %rax,%r12 2563 adoxq %r15,%r13 2564 movq %r11,-32(%rbx) 2565 mulxq 24(%rcx),%rax,%r15 2566 movq %r9,%rdx 2567 leaq 32(%rcx),%rcx 2568 movq %r12,-24(%rbx) 2569 adcxq %rax,%r13 2570 adoxq %rbp,%r15 2571 movq %r13,-16(%rbx) 2572 2573 decq %rdi 2574 jnz .Lmulx4x_inner 2575 2576 movq 0+8(%rsp),%rax 2577 adcq %rbp,%r15 2578 subq 0(%rbx),%rdi 2579 movq 8+8(%rsp),%rdi 2580 movq 16+8(%rsp),%r10 2581 adcq %r15,%r14 2582 leaq (%rsi,%rax,1),%rsi 2583 adcq %rbp,%rbp 2584 movq %r14,-8(%rbx) 2585 2586 cmpq %r10,%rdi 2587 jb .Lmulx4x_outer 2588 2589 movq -8(%rcx),%r10 2590 movq %rbp,%r8 2591 movq (%rcx,%rax,1),%r12 2592 leaq (%rcx,%rax,1),%rbp 2593 movq %rax,%rcx 2594 leaq (%rbx,%rax,1),%rdi 2595 xorl %eax,%eax 2596 xorq %r15,%r15 2597 subq %r14,%r10 2598 adcq %r15,%r15 2599 orq %r15,%r8 2600 sarq $3+2,%rcx 2601 subq %r8,%rax 2602 movq 56+8(%rsp),%rdx 2603 decq %r12 2604 movq 8(%rbp),%r13 2605 xorq %r8,%r8 2606 movq 16(%rbp),%r14 2607 movq 24(%rbp),%r15 2608 jmp .Lsqrx4x_sub_entry 2609.cfi_endproc 2610.size mulx4x_internal,.-mulx4x_internal 2611.type bn_powerx5,@function 2612.align 32 2613bn_powerx5: 2614.cfi_startproc 2615 movq %rsp,%rax 2616.cfi_def_cfa_register %rax 2617.Lpowerx5_enter: 2618 pushq %rbx 2619.cfi_offset %rbx,-16 2620 pushq %rbp 2621.cfi_offset %rbp,-24 2622 pushq %r12 2623.cfi_offset %r12,-32 2624 pushq %r13 2625.cfi_offset %r13,-40 2626 pushq %r14 2627.cfi_offset %r14,-48 2628 pushq %r15 2629.cfi_offset %r15,-56 2630.Lpowerx5_prologue: 2631 2632 shll $3,%r9d 2633 leaq (%r9,%r9,2),%r10 2634 negq %r9 2635 movq (%r8),%r8 2636 2637 2638 2639 2640 2641 2642 2643 2644 leaq -320(%rsp,%r9,2),%r11 2645 movq %rsp,%rbp 2646 subq %rdi,%r11 2647 andq $4095,%r11 2648 cmpq %r11,%r10 2649 jb .Lpwrx_sp_alt 2650 subq %r11,%rbp 2651 leaq -320(%rbp,%r9,2),%rbp 2652 jmp .Lpwrx_sp_done 2653 2654.align 32 2655.Lpwrx_sp_alt: 2656 leaq 4096-320(,%r9,2),%r10 2657 leaq -320(%rbp,%r9,2),%rbp 2658 subq %r10,%r11 2659 movq $0,%r10 2660 cmovcq %r10,%r11 2661 subq %r11,%rbp 2662.Lpwrx_sp_done: 2663 andq $-64,%rbp 2664 movq %rsp,%r11 2665 subq %rbp,%r11 2666 andq $-4096,%r11 2667 leaq (%r11,%rbp,1),%rsp 2668 movq (%rsp),%r10 2669 cmpq %rbp,%rsp 2670 ja .Lpwrx_page_walk 2671 jmp .Lpwrx_page_walk_done 2672 2673.Lpwrx_page_walk: 2674 leaq -4096(%rsp),%rsp 2675 movq (%rsp),%r10 2676 cmpq %rbp,%rsp 2677 ja .Lpwrx_page_walk 2678.Lpwrx_page_walk_done: 2679 2680 movq %r9,%r10 2681 negq %r9 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 pxor %xmm0,%xmm0 2695.byte 102,72,15,110,207 2696.byte 102,72,15,110,209 2697.byte 102,73,15,110,218 2698.byte 102,72,15,110,226 2699 movq %r8,32(%rsp) 2700 movq %rax,40(%rsp) 2701.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2702.Lpowerx5_body: 2703 2704 call __bn_sqrx8x_internal 2705 call __bn_postx4x_internal 2706 call __bn_sqrx8x_internal 2707 call __bn_postx4x_internal 2708 call __bn_sqrx8x_internal 2709 call __bn_postx4x_internal 2710 call __bn_sqrx8x_internal 2711 call __bn_postx4x_internal 2712 call __bn_sqrx8x_internal 2713 call __bn_postx4x_internal 2714 2715 movq %r10,%r9 2716 movq %rsi,%rdi 2717.byte 102,72,15,126,209 2718.byte 102,72,15,126,226 2719 movq 40(%rsp),%rax 2720 2721 call mulx4x_internal 2722 2723 movq 40(%rsp),%rsi 2724.cfi_def_cfa %rsi,8 2725 movq $1,%rax 2726 2727 movq -48(%rsi),%r15 2728.cfi_restore %r15 2729 movq -40(%rsi),%r14 2730.cfi_restore %r14 2731 movq -32(%rsi),%r13 2732.cfi_restore %r13 2733 movq -24(%rsi),%r12 2734.cfi_restore %r12 2735 movq -16(%rsi),%rbp 2736.cfi_restore %rbp 2737 movq -8(%rsi),%rbx 2738.cfi_restore %rbx 2739 leaq (%rsi),%rsp 2740.cfi_def_cfa_register %rsp 2741.Lpowerx5_epilogue: 2742 .byte 0xf3,0xc3 2743.cfi_endproc 2744.size bn_powerx5,.-bn_powerx5 2745 2746.globl bn_sqrx8x_internal 2747.hidden bn_sqrx8x_internal 2748.hidden bn_sqrx8x_internal 2749.type bn_sqrx8x_internal,@function 2750.align 32 2751bn_sqrx8x_internal: 2752__bn_sqrx8x_internal: 2753.cfi_startproc 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 leaq 48+8(%rsp),%rdi 2795 leaq (%rsi,%r9,1),%rbp 2796 movq %r9,0+8(%rsp) 2797 movq %rbp,8+8(%rsp) 2798 jmp .Lsqr8x_zero_start 2799 2800.align 32 2801.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2802.Lsqrx8x_zero: 2803.byte 0x3e 2804 movdqa %xmm0,0(%rdi) 2805 movdqa %xmm0,16(%rdi) 2806 movdqa %xmm0,32(%rdi) 2807 movdqa %xmm0,48(%rdi) 2808.Lsqr8x_zero_start: 2809 movdqa %xmm0,64(%rdi) 2810 movdqa %xmm0,80(%rdi) 2811 movdqa %xmm0,96(%rdi) 2812 movdqa %xmm0,112(%rdi) 2813 leaq 128(%rdi),%rdi 2814 subq $64,%r9 2815 jnz .Lsqrx8x_zero 2816 2817 movq 0(%rsi),%rdx 2818 2819 xorq %r10,%r10 2820 xorq %r11,%r11 2821 xorq %r12,%r12 2822 xorq %r13,%r13 2823 xorq %r14,%r14 2824 xorq %r15,%r15 2825 leaq 48+8(%rsp),%rdi 2826 xorq %rbp,%rbp 2827 jmp .Lsqrx8x_outer_loop 2828 2829.align 32 2830.Lsqrx8x_outer_loop: 2831 mulxq 8(%rsi),%r8,%rax 2832 adcxq %r9,%r8 2833 adoxq %rax,%r10 2834 mulxq 16(%rsi),%r9,%rax 2835 adcxq %r10,%r9 2836 adoxq %rax,%r11 2837.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2838 adcxq %r11,%r10 2839 adoxq %rax,%r12 2840.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2841 adcxq %r12,%r11 2842 adoxq %rax,%r13 2843 mulxq 40(%rsi),%r12,%rax 2844 adcxq %r13,%r12 2845 adoxq %rax,%r14 2846 mulxq 48(%rsi),%r13,%rax 2847 adcxq %r14,%r13 2848 adoxq %r15,%rax 2849 mulxq 56(%rsi),%r14,%r15 2850 movq 8(%rsi),%rdx 2851 adcxq %rax,%r14 2852 adoxq %rbp,%r15 2853 adcq 64(%rdi),%r15 2854 movq %r8,8(%rdi) 2855 movq %r9,16(%rdi) 2856 sbbq %rcx,%rcx 2857 xorq %rbp,%rbp 2858 2859 2860 mulxq 16(%rsi),%r8,%rbx 2861 mulxq 24(%rsi),%r9,%rax 2862 adcxq %r10,%r8 2863 adoxq %rbx,%r9 2864 mulxq 32(%rsi),%r10,%rbx 2865 adcxq %r11,%r9 2866 adoxq %rax,%r10 2867.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2868 adcxq %r12,%r10 2869 adoxq %rbx,%r11 2870.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2871 adcxq %r13,%r11 2872 adoxq %r14,%r12 2873.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2874 movq 16(%rsi),%rdx 2875 adcxq %rax,%r12 2876 adoxq %rbx,%r13 2877 adcxq %r15,%r13 2878 adoxq %rbp,%r14 2879 adcxq %rbp,%r14 2880 2881 movq %r8,24(%rdi) 2882 movq %r9,32(%rdi) 2883 2884 mulxq 24(%rsi),%r8,%rbx 2885 mulxq 32(%rsi),%r9,%rax 2886 adcxq %r10,%r8 2887 adoxq %rbx,%r9 2888 mulxq 40(%rsi),%r10,%rbx 2889 adcxq %r11,%r9 2890 adoxq %rax,%r10 2891.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2892 adcxq %r12,%r10 2893 adoxq %r13,%r11 2894.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2895.byte 0x3e 2896 movq 24(%rsi),%rdx 2897 adcxq %rbx,%r11 2898 adoxq %rax,%r12 2899 adcxq %r14,%r12 2900 movq %r8,40(%rdi) 2901 movq %r9,48(%rdi) 2902 mulxq 32(%rsi),%r8,%rax 2903 adoxq %rbp,%r13 2904 adcxq %rbp,%r13 2905 2906 mulxq 40(%rsi),%r9,%rbx 2907 adcxq %r10,%r8 2908 adoxq %rax,%r9 2909 mulxq 48(%rsi),%r10,%rax 2910 adcxq %r11,%r9 2911 adoxq %r12,%r10 2912 mulxq 56(%rsi),%r11,%r12 2913 movq 32(%rsi),%rdx 2914 movq 40(%rsi),%r14 2915 adcxq %rbx,%r10 2916 adoxq %rax,%r11 2917 movq 48(%rsi),%r15 2918 adcxq %r13,%r11 2919 adoxq %rbp,%r12 2920 adcxq %rbp,%r12 2921 2922 movq %r8,56(%rdi) 2923 movq %r9,64(%rdi) 2924 2925 mulxq %r14,%r9,%rax 2926 movq 56(%rsi),%r8 2927 adcxq %r10,%r9 2928 mulxq %r15,%r10,%rbx 2929 adoxq %rax,%r10 2930 adcxq %r11,%r10 2931 mulxq %r8,%r11,%rax 2932 movq %r14,%rdx 2933 adoxq %rbx,%r11 2934 adcxq %r12,%r11 2935 2936 adcxq %rbp,%rax 2937 2938 mulxq %r15,%r14,%rbx 2939 mulxq %r8,%r12,%r13 2940 movq %r15,%rdx 2941 leaq 64(%rsi),%rsi 2942 adcxq %r14,%r11 2943 adoxq %rbx,%r12 2944 adcxq %rax,%r12 2945 adoxq %rbp,%r13 2946 2947.byte 0x67,0x67 2948 mulxq %r8,%r8,%r14 2949 adcxq %r8,%r13 2950 adcxq %rbp,%r14 2951 2952 cmpq 8+8(%rsp),%rsi 2953 je .Lsqrx8x_outer_break 2954 2955 negq %rcx 2956 movq $-8,%rcx 2957 movq %rbp,%r15 2958 movq 64(%rdi),%r8 2959 adcxq 72(%rdi),%r9 2960 adcxq 80(%rdi),%r10 2961 adcxq 88(%rdi),%r11 2962 adcq 96(%rdi),%r12 2963 adcq 104(%rdi),%r13 2964 adcq 112(%rdi),%r14 2965 adcq 120(%rdi),%r15 2966 leaq (%rsi),%rbp 2967 leaq 128(%rdi),%rdi 2968 sbbq %rax,%rax 2969 2970 movq -64(%rsi),%rdx 2971 movq %rax,16+8(%rsp) 2972 movq %rdi,24+8(%rsp) 2973 2974 2975 xorl %eax,%eax 2976 jmp .Lsqrx8x_loop 2977 2978.align 32 2979.Lsqrx8x_loop: 2980 movq %r8,%rbx 2981 mulxq 0(%rbp),%rax,%r8 2982 adcxq %rax,%rbx 2983 adoxq %r9,%r8 2984 2985 mulxq 8(%rbp),%rax,%r9 2986 adcxq %rax,%r8 2987 adoxq %r10,%r9 2988 2989 mulxq 16(%rbp),%rax,%r10 2990 adcxq %rax,%r9 2991 adoxq %r11,%r10 2992 2993 mulxq 24(%rbp),%rax,%r11 2994 adcxq %rax,%r10 2995 adoxq %r12,%r11 2996 2997.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 2998 adcxq %rax,%r11 2999 adoxq %r13,%r12 3000 3001 mulxq 40(%rbp),%rax,%r13 3002 adcxq %rax,%r12 3003 adoxq %r14,%r13 3004 3005 mulxq 48(%rbp),%rax,%r14 3006 movq %rbx,(%rdi,%rcx,8) 3007 movl $0,%ebx 3008 adcxq %rax,%r13 3009 adoxq %r15,%r14 3010 3011.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3012 movq 8(%rsi,%rcx,8),%rdx 3013 adcxq %rax,%r14 3014 adoxq %rbx,%r15 3015 adcxq %rbx,%r15 3016 3017.byte 0x67 3018 incq %rcx 3019 jnz .Lsqrx8x_loop 3020 3021 leaq 64(%rbp),%rbp 3022 movq $-8,%rcx 3023 cmpq 8+8(%rsp),%rbp 3024 je .Lsqrx8x_break 3025 3026 subq 16+8(%rsp),%rbx 3027.byte 0x66 3028 movq -64(%rsi),%rdx 3029 adcxq 0(%rdi),%r8 3030 adcxq 8(%rdi),%r9 3031 adcq 16(%rdi),%r10 3032 adcq 24(%rdi),%r11 3033 adcq 32(%rdi),%r12 3034 adcq 40(%rdi),%r13 3035 adcq 48(%rdi),%r14 3036 adcq 56(%rdi),%r15 3037 leaq 64(%rdi),%rdi 3038.byte 0x67 3039 sbbq %rax,%rax 3040 xorl %ebx,%ebx 3041 movq %rax,16+8(%rsp) 3042 jmp .Lsqrx8x_loop 3043 3044.align 32 3045.Lsqrx8x_break: 3046 xorq %rbp,%rbp 3047 subq 16+8(%rsp),%rbx 3048 adcxq %rbp,%r8 3049 movq 24+8(%rsp),%rcx 3050 adcxq %rbp,%r9 3051 movq 0(%rsi),%rdx 3052 adcq $0,%r10 3053 movq %r8,0(%rdi) 3054 adcq $0,%r11 3055 adcq $0,%r12 3056 adcq $0,%r13 3057 adcq $0,%r14 3058 adcq $0,%r15 3059 cmpq %rcx,%rdi 3060 je .Lsqrx8x_outer_loop 3061 3062 movq %r9,8(%rdi) 3063 movq 8(%rcx),%r9 3064 movq %r10,16(%rdi) 3065 movq 16(%rcx),%r10 3066 movq %r11,24(%rdi) 3067 movq 24(%rcx),%r11 3068 movq %r12,32(%rdi) 3069 movq 32(%rcx),%r12 3070 movq %r13,40(%rdi) 3071 movq 40(%rcx),%r13 3072 movq %r14,48(%rdi) 3073 movq 48(%rcx),%r14 3074 movq %r15,56(%rdi) 3075 movq 56(%rcx),%r15 3076 movq %rcx,%rdi 3077 jmp .Lsqrx8x_outer_loop 3078 3079.align 32 3080.Lsqrx8x_outer_break: 3081 movq %r9,72(%rdi) 3082.byte 102,72,15,126,217 3083 movq %r10,80(%rdi) 3084 movq %r11,88(%rdi) 3085 movq %r12,96(%rdi) 3086 movq %r13,104(%rdi) 3087 movq %r14,112(%rdi) 3088 leaq 48+8(%rsp),%rdi 3089 movq (%rsi,%rcx,1),%rdx 3090 3091 movq 8(%rdi),%r11 3092 xorq %r10,%r10 3093 movq 0+8(%rsp),%r9 3094 adoxq %r11,%r11 3095 movq 16(%rdi),%r12 3096 movq 24(%rdi),%r13 3097 3098 3099.align 32 3100.Lsqrx4x_shift_n_add: 3101 mulxq %rdx,%rax,%rbx 3102 adoxq %r12,%r12 3103 adcxq %r10,%rax 3104.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3105.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3106 adoxq %r13,%r13 3107 adcxq %r11,%rbx 3108 movq 40(%rdi),%r11 3109 movq %rax,0(%rdi) 3110 movq %rbx,8(%rdi) 3111 3112 mulxq %rdx,%rax,%rbx 3113 adoxq %r10,%r10 3114 adcxq %r12,%rax 3115 movq 16(%rsi,%rcx,1),%rdx 3116 movq 48(%rdi),%r12 3117 adoxq %r11,%r11 3118 adcxq %r13,%rbx 3119 movq 56(%rdi),%r13 3120 movq %rax,16(%rdi) 3121 movq %rbx,24(%rdi) 3122 3123 mulxq %rdx,%rax,%rbx 3124 adoxq %r12,%r12 3125 adcxq %r10,%rax 3126 movq 24(%rsi,%rcx,1),%rdx 3127 leaq 32(%rcx),%rcx 3128 movq 64(%rdi),%r10 3129 adoxq %r13,%r13 3130 adcxq %r11,%rbx 3131 movq 72(%rdi),%r11 3132 movq %rax,32(%rdi) 3133 movq %rbx,40(%rdi) 3134 3135 mulxq %rdx,%rax,%rbx 3136 adoxq %r10,%r10 3137 adcxq %r12,%rax 3138 jrcxz .Lsqrx4x_shift_n_add_break 3139.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3140 adoxq %r11,%r11 3141 adcxq %r13,%rbx 3142 movq 80(%rdi),%r12 3143 movq 88(%rdi),%r13 3144 movq %rax,48(%rdi) 3145 movq %rbx,56(%rdi) 3146 leaq 64(%rdi),%rdi 3147 nop 3148 jmp .Lsqrx4x_shift_n_add 3149 3150.align 32 3151.Lsqrx4x_shift_n_add_break: 3152 adcxq %r13,%rbx 3153 movq %rax,48(%rdi) 3154 movq %rbx,56(%rdi) 3155 leaq 64(%rdi),%rdi 3156.byte 102,72,15,126,213 3157__bn_sqrx8x_reduction: 3158 xorl %eax,%eax 3159 movq 32+8(%rsp),%rbx 3160 movq 48+8(%rsp),%rdx 3161 leaq -64(%rbp,%r9,1),%rcx 3162 3163 movq %rcx,0+8(%rsp) 3164 movq %rdi,8+8(%rsp) 3165 3166 leaq 48+8(%rsp),%rdi 3167 jmp .Lsqrx8x_reduction_loop 3168 3169.align 32 3170.Lsqrx8x_reduction_loop: 3171 movq 8(%rdi),%r9 3172 movq 16(%rdi),%r10 3173 movq 24(%rdi),%r11 3174 movq 32(%rdi),%r12 3175 movq %rdx,%r8 3176 imulq %rbx,%rdx 3177 movq 40(%rdi),%r13 3178 movq 48(%rdi),%r14 3179 movq 56(%rdi),%r15 3180 movq %rax,24+8(%rsp) 3181 3182 leaq 64(%rdi),%rdi 3183 xorq %rsi,%rsi 3184 movq $-8,%rcx 3185 jmp .Lsqrx8x_reduce 3186 3187.align 32 3188.Lsqrx8x_reduce: 3189 movq %r8,%rbx 3190 mulxq 0(%rbp),%rax,%r8 3191 adcxq %rbx,%rax 3192 adoxq %r9,%r8 3193 3194 mulxq 8(%rbp),%rbx,%r9 3195 adcxq %rbx,%r8 3196 adoxq %r10,%r9 3197 3198 mulxq 16(%rbp),%rbx,%r10 3199 adcxq %rbx,%r9 3200 adoxq %r11,%r10 3201 3202 mulxq 24(%rbp),%rbx,%r11 3203 adcxq %rbx,%r10 3204 adoxq %r12,%r11 3205 3206.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3207 movq %rdx,%rax 3208 movq %r8,%rdx 3209 adcxq %rbx,%r11 3210 adoxq %r13,%r12 3211 3212 mulxq 32+8(%rsp),%rbx,%rdx 3213 movq %rax,%rdx 3214 movq %rax,64+48+8(%rsp,%rcx,8) 3215 3216 mulxq 40(%rbp),%rax,%r13 3217 adcxq %rax,%r12 3218 adoxq %r14,%r13 3219 3220 mulxq 48(%rbp),%rax,%r14 3221 adcxq %rax,%r13 3222 adoxq %r15,%r14 3223 3224 mulxq 56(%rbp),%rax,%r15 3225 movq %rbx,%rdx 3226 adcxq %rax,%r14 3227 adoxq %rsi,%r15 3228 adcxq %rsi,%r15 3229 3230.byte 0x67,0x67,0x67 3231 incq %rcx 3232 jnz .Lsqrx8x_reduce 3233 3234 movq %rsi,%rax 3235 cmpq 0+8(%rsp),%rbp 3236 jae .Lsqrx8x_no_tail 3237 3238 movq 48+8(%rsp),%rdx 3239 addq 0(%rdi),%r8 3240 leaq 64(%rbp),%rbp 3241 movq $-8,%rcx 3242 adcxq 8(%rdi),%r9 3243 adcxq 16(%rdi),%r10 3244 adcq 24(%rdi),%r11 3245 adcq 32(%rdi),%r12 3246 adcq 40(%rdi),%r13 3247 adcq 48(%rdi),%r14 3248 adcq 56(%rdi),%r15 3249 leaq 64(%rdi),%rdi 3250 sbbq %rax,%rax 3251 3252 xorq %rsi,%rsi 3253 movq %rax,16+8(%rsp) 3254 jmp .Lsqrx8x_tail 3255 3256.align 32 3257.Lsqrx8x_tail: 3258 movq %r8,%rbx 3259 mulxq 0(%rbp),%rax,%r8 3260 adcxq %rax,%rbx 3261 adoxq %r9,%r8 3262 3263 mulxq 8(%rbp),%rax,%r9 3264 adcxq %rax,%r8 3265 adoxq %r10,%r9 3266 3267 mulxq 16(%rbp),%rax,%r10 3268 adcxq %rax,%r9 3269 adoxq %r11,%r10 3270 3271 mulxq 24(%rbp),%rax,%r11 3272 adcxq %rax,%r10 3273 adoxq %r12,%r11 3274 3275.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3276 adcxq %rax,%r11 3277 adoxq %r13,%r12 3278 3279 mulxq 40(%rbp),%rax,%r13 3280 adcxq %rax,%r12 3281 adoxq %r14,%r13 3282 3283 mulxq 48(%rbp),%rax,%r14 3284 adcxq %rax,%r13 3285 adoxq %r15,%r14 3286 3287 mulxq 56(%rbp),%rax,%r15 3288 movq 72+48+8(%rsp,%rcx,8),%rdx 3289 adcxq %rax,%r14 3290 adoxq %rsi,%r15 3291 movq %rbx,(%rdi,%rcx,8) 3292 movq %r8,%rbx 3293 adcxq %rsi,%r15 3294 3295 incq %rcx 3296 jnz .Lsqrx8x_tail 3297 3298 cmpq 0+8(%rsp),%rbp 3299 jae .Lsqrx8x_tail_done 3300 3301 subq 16+8(%rsp),%rsi 3302 movq 48+8(%rsp),%rdx 3303 leaq 64(%rbp),%rbp 3304 adcq 0(%rdi),%r8 3305 adcq 8(%rdi),%r9 3306 adcq 16(%rdi),%r10 3307 adcq 24(%rdi),%r11 3308 adcq 32(%rdi),%r12 3309 adcq 40(%rdi),%r13 3310 adcq 48(%rdi),%r14 3311 adcq 56(%rdi),%r15 3312 leaq 64(%rdi),%rdi 3313 sbbq %rax,%rax 3314 subq $8,%rcx 3315 3316 xorq %rsi,%rsi 3317 movq %rax,16+8(%rsp) 3318 jmp .Lsqrx8x_tail 3319 3320.align 32 3321.Lsqrx8x_tail_done: 3322 xorq %rax,%rax 3323 addq 24+8(%rsp),%r8 3324 adcq $0,%r9 3325 adcq $0,%r10 3326 adcq $0,%r11 3327 adcq $0,%r12 3328 adcq $0,%r13 3329 adcq $0,%r14 3330 adcq $0,%r15 3331 adcq $0,%rax 3332 3333 subq 16+8(%rsp),%rsi 3334.Lsqrx8x_no_tail: 3335 adcq 0(%rdi),%r8 3336.byte 102,72,15,126,217 3337 adcq 8(%rdi),%r9 3338 movq 56(%rbp),%rsi 3339.byte 102,72,15,126,213 3340 adcq 16(%rdi),%r10 3341 adcq 24(%rdi),%r11 3342 adcq 32(%rdi),%r12 3343 adcq 40(%rdi),%r13 3344 adcq 48(%rdi),%r14 3345 adcq 56(%rdi),%r15 3346 adcq $0,%rax 3347 3348 movq 32+8(%rsp),%rbx 3349 movq 64(%rdi,%rcx,1),%rdx 3350 3351 movq %r8,0(%rdi) 3352 leaq 64(%rdi),%r8 3353 movq %r9,8(%rdi) 3354 movq %r10,16(%rdi) 3355 movq %r11,24(%rdi) 3356 movq %r12,32(%rdi) 3357 movq %r13,40(%rdi) 3358 movq %r14,48(%rdi) 3359 movq %r15,56(%rdi) 3360 3361 leaq 64(%rdi,%rcx,1),%rdi 3362 cmpq 8+8(%rsp),%r8 3363 jb .Lsqrx8x_reduction_loop 3364 .byte 0xf3,0xc3 3365.cfi_endproc 3366.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3367.align 32 3368.type __bn_postx4x_internal,@function 3369__bn_postx4x_internal: 3370.cfi_startproc 3371 movq 0(%rbp),%r12 3372 movq %rcx,%r10 3373 movq %rcx,%r9 3374 negq %rax 3375 sarq $3+2,%rcx 3376 3377.byte 102,72,15,126,202 3378.byte 102,72,15,126,206 3379 decq %r12 3380 movq 8(%rbp),%r13 3381 xorq %r8,%r8 3382 movq 16(%rbp),%r14 3383 movq 24(%rbp),%r15 3384 jmp .Lsqrx4x_sub_entry 3385 3386.align 16 3387.Lsqrx4x_sub: 3388 movq 0(%rbp),%r12 3389 movq 8(%rbp),%r13 3390 movq 16(%rbp),%r14 3391 movq 24(%rbp),%r15 3392.Lsqrx4x_sub_entry: 3393 andnq %rax,%r12,%r12 3394 leaq 32(%rbp),%rbp 3395 andnq %rax,%r13,%r13 3396 andnq %rax,%r14,%r14 3397 andnq %rax,%r15,%r15 3398 3399 negq %r8 3400 adcq 0(%rdi),%r12 3401 adcq 8(%rdi),%r13 3402 adcq 16(%rdi),%r14 3403 adcq 24(%rdi),%r15 3404 movq %r12,0(%rdx) 3405 leaq 32(%rdi),%rdi 3406 movq %r13,8(%rdx) 3407 sbbq %r8,%r8 3408 movq %r14,16(%rdx) 3409 movq %r15,24(%rdx) 3410 leaq 32(%rdx),%rdx 3411 3412 incq %rcx 3413 jnz .Lsqrx4x_sub 3414 3415 negq %r9 3416 3417 .byte 0xf3,0xc3 3418.cfi_endproc 3419.size __bn_postx4x_internal,.-__bn_postx4x_internal 3420.globl bn_scatter5 3421.hidden bn_scatter5 3422.type bn_scatter5,@function 3423.align 16 3424bn_scatter5: 3425.cfi_startproc 3426 cmpl $0,%esi 3427 jz .Lscatter_epilogue 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 leaq (%rdx,%rcx,8),%rdx 3438.Lscatter: 3439 movq (%rdi),%rax 3440 leaq 8(%rdi),%rdi 3441 movq %rax,(%rdx) 3442 leaq 256(%rdx),%rdx 3443 subl $1,%esi 3444 jnz .Lscatter 3445.Lscatter_epilogue: 3446 .byte 0xf3,0xc3 3447.cfi_endproc 3448.size bn_scatter5,.-bn_scatter5 3449 3450.globl bn_gather5 3451.hidden bn_gather5 3452.type bn_gather5,@function 3453.align 32 3454bn_gather5: 3455.cfi_startproc 3456.LSEH_begin_bn_gather5: 3457 3458.byte 0x4c,0x8d,0x14,0x24 3459.cfi_def_cfa_register %r10 3460.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3461 leaq .Linc(%rip),%rax 3462 andq $-16,%rsp 3463 3464 movd %ecx,%xmm5 3465 movdqa 0(%rax),%xmm0 3466 movdqa 16(%rax),%xmm1 3467 leaq 128(%rdx),%r11 3468 leaq 128(%rsp),%rax 3469 3470 pshufd $0,%xmm5,%xmm5 3471 movdqa %xmm1,%xmm4 3472 movdqa %xmm1,%xmm2 3473 paddd %xmm0,%xmm1 3474 pcmpeqd %xmm5,%xmm0 3475 movdqa %xmm4,%xmm3 3476 3477 paddd %xmm1,%xmm2 3478 pcmpeqd %xmm5,%xmm1 3479 movdqa %xmm0,-128(%rax) 3480 movdqa %xmm4,%xmm0 3481 3482 paddd %xmm2,%xmm3 3483 pcmpeqd %xmm5,%xmm2 3484 movdqa %xmm1,-112(%rax) 3485 movdqa %xmm4,%xmm1 3486 3487 paddd %xmm3,%xmm0 3488 pcmpeqd %xmm5,%xmm3 3489 movdqa %xmm2,-96(%rax) 3490 movdqa %xmm4,%xmm2 3491 paddd %xmm0,%xmm1 3492 pcmpeqd %xmm5,%xmm0 3493 movdqa %xmm3,-80(%rax) 3494 movdqa %xmm4,%xmm3 3495 3496 paddd %xmm1,%xmm2 3497 pcmpeqd %xmm5,%xmm1 3498 movdqa %xmm0,-64(%rax) 3499 movdqa %xmm4,%xmm0 3500 3501 paddd %xmm2,%xmm3 3502 pcmpeqd %xmm5,%xmm2 3503 movdqa %xmm1,-48(%rax) 3504 movdqa %xmm4,%xmm1 3505 3506 paddd %xmm3,%xmm0 3507 pcmpeqd %xmm5,%xmm3 3508 movdqa %xmm2,-32(%rax) 3509 movdqa %xmm4,%xmm2 3510 paddd %xmm0,%xmm1 3511 pcmpeqd %xmm5,%xmm0 3512 movdqa %xmm3,-16(%rax) 3513 movdqa %xmm4,%xmm3 3514 3515 paddd %xmm1,%xmm2 3516 pcmpeqd %xmm5,%xmm1 3517 movdqa %xmm0,0(%rax) 3518 movdqa %xmm4,%xmm0 3519 3520 paddd %xmm2,%xmm3 3521 pcmpeqd %xmm5,%xmm2 3522 movdqa %xmm1,16(%rax) 3523 movdqa %xmm4,%xmm1 3524 3525 paddd %xmm3,%xmm0 3526 pcmpeqd %xmm5,%xmm3 3527 movdqa %xmm2,32(%rax) 3528 movdqa %xmm4,%xmm2 3529 paddd %xmm0,%xmm1 3530 pcmpeqd %xmm5,%xmm0 3531 movdqa %xmm3,48(%rax) 3532 movdqa %xmm4,%xmm3 3533 3534 paddd %xmm1,%xmm2 3535 pcmpeqd %xmm5,%xmm1 3536 movdqa %xmm0,64(%rax) 3537 movdqa %xmm4,%xmm0 3538 3539 paddd %xmm2,%xmm3 3540 pcmpeqd %xmm5,%xmm2 3541 movdqa %xmm1,80(%rax) 3542 movdqa %xmm4,%xmm1 3543 3544 paddd %xmm3,%xmm0 3545 pcmpeqd %xmm5,%xmm3 3546 movdqa %xmm2,96(%rax) 3547 movdqa %xmm4,%xmm2 3548 movdqa %xmm3,112(%rax) 3549 jmp .Lgather 3550 3551.align 32 3552.Lgather: 3553 pxor %xmm4,%xmm4 3554 pxor %xmm5,%xmm5 3555 movdqa -128(%r11),%xmm0 3556 movdqa -112(%r11),%xmm1 3557 movdqa -96(%r11),%xmm2 3558 pand -128(%rax),%xmm0 3559 movdqa -80(%r11),%xmm3 3560 pand -112(%rax),%xmm1 3561 por %xmm0,%xmm4 3562 pand -96(%rax),%xmm2 3563 por %xmm1,%xmm5 3564 pand -80(%rax),%xmm3 3565 por %xmm2,%xmm4 3566 por %xmm3,%xmm5 3567 movdqa -64(%r11),%xmm0 3568 movdqa -48(%r11),%xmm1 3569 movdqa -32(%r11),%xmm2 3570 pand -64(%rax),%xmm0 3571 movdqa -16(%r11),%xmm3 3572 pand -48(%rax),%xmm1 3573 por %xmm0,%xmm4 3574 pand -32(%rax),%xmm2 3575 por %xmm1,%xmm5 3576 pand -16(%rax),%xmm3 3577 por %xmm2,%xmm4 3578 por %xmm3,%xmm5 3579 movdqa 0(%r11),%xmm0 3580 movdqa 16(%r11),%xmm1 3581 movdqa 32(%r11),%xmm2 3582 pand 0(%rax),%xmm0 3583 movdqa 48(%r11),%xmm3 3584 pand 16(%rax),%xmm1 3585 por %xmm0,%xmm4 3586 pand 32(%rax),%xmm2 3587 por %xmm1,%xmm5 3588 pand 48(%rax),%xmm3 3589 por %xmm2,%xmm4 3590 por %xmm3,%xmm5 3591 movdqa 64(%r11),%xmm0 3592 movdqa 80(%r11),%xmm1 3593 movdqa 96(%r11),%xmm2 3594 pand 64(%rax),%xmm0 3595 movdqa 112(%r11),%xmm3 3596 pand 80(%rax),%xmm1 3597 por %xmm0,%xmm4 3598 pand 96(%rax),%xmm2 3599 por %xmm1,%xmm5 3600 pand 112(%rax),%xmm3 3601 por %xmm2,%xmm4 3602 por %xmm3,%xmm5 3603 por %xmm5,%xmm4 3604 leaq 256(%r11),%r11 3605 3606 pshufd $0x4e,%xmm4,%xmm0 3607 por %xmm4,%xmm0 3608 movq %xmm0,(%rdi) 3609 leaq 8(%rdi),%rdi 3610 subl $1,%esi 3611 jnz .Lgather 3612 3613 leaq (%r10),%rsp 3614.cfi_def_cfa_register %rsp 3615 .byte 0xf3,0xc3 3616.LSEH_end_bn_gather5: 3617.cfi_endproc 3618.size bn_gather5,.-bn_gather5 3619.section .rodata 3620.align 64 3621.Linc: 3622.long 0,0, 1,1 3623.long 2,2, 2,2 3624.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3625.text 3626#endif 3627#if defined(__ELF__) 3628// See https://www.airs.com/blog/archives/518. 3629.section .note.GNU-stack,"",%progbits 3630#endif 3631