1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15 16.extern OPENSSL_ia32cap_P 17.hidden OPENSSL_ia32cap_P 18 19.globl bn_mul_mont_gather5 20.hidden bn_mul_mont_gather5 21.type bn_mul_mont_gather5,@function 22.align 64 23bn_mul_mont_gather5: 24.cfi_startproc 25 movl %r9d,%r9d 26 movq %rsp,%rax 27.cfi_def_cfa_register %rax 28 testl $7,%r9d 29 jnz .Lmul_enter 30 leaq OPENSSL_ia32cap_P(%rip),%r11 31 movl 8(%r11),%r11d 32 jmp .Lmul4x_enter 33 34.align 16 35.Lmul_enter: 36 movd 8(%rsp),%xmm5 37 pushq %rbx 38.cfi_offset %rbx,-16 39 pushq %rbp 40.cfi_offset %rbp,-24 41 pushq %r12 42.cfi_offset %r12,-32 43 pushq %r13 44.cfi_offset %r13,-40 45 pushq %r14 46.cfi_offset %r14,-48 47 pushq %r15 48.cfi_offset %r15,-56 49 50 negq %r9 51 movq %rsp,%r11 52 leaq -280(%rsp,%r9,8),%r10 53 negq %r9 54 andq $-1024,%r10 55 56 57 58 59 60 61 62 63 64 subq %r10,%r11 65 andq $-4096,%r11 66 leaq (%r10,%r11,1),%rsp 67 movq (%rsp),%r11 68 cmpq %r10,%rsp 69 ja .Lmul_page_walk 70 jmp .Lmul_page_walk_done 71 72.Lmul_page_walk: 73 leaq -4096(%rsp),%rsp 74 movq (%rsp),%r11 75 cmpq %r10,%rsp 76 ja .Lmul_page_walk 77.Lmul_page_walk_done: 78 79 leaq .Linc(%rip),%r10 80 movq %rax,8(%rsp,%r9,8) 81.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 82.Lmul_body: 83 84 leaq 128(%rdx),%r12 85 movdqa 0(%r10),%xmm0 86 movdqa 16(%r10),%xmm1 87 leaq 24-112(%rsp,%r9,8),%r10 88 andq $-16,%r10 89 90 pshufd $0,%xmm5,%xmm5 91 movdqa %xmm1,%xmm4 92 movdqa %xmm1,%xmm2 93 paddd %xmm0,%xmm1 94 pcmpeqd %xmm5,%xmm0 95.byte 0x67 96 movdqa %xmm4,%xmm3 97 paddd %xmm1,%xmm2 98 pcmpeqd %xmm5,%xmm1 99 movdqa %xmm0,112(%r10) 100 movdqa %xmm4,%xmm0 101 102 paddd %xmm2,%xmm3 103 pcmpeqd %xmm5,%xmm2 104 movdqa %xmm1,128(%r10) 105 movdqa %xmm4,%xmm1 106 107 paddd %xmm3,%xmm0 108 pcmpeqd %xmm5,%xmm3 109 movdqa %xmm2,144(%r10) 110 movdqa %xmm4,%xmm2 111 112 paddd %xmm0,%xmm1 113 pcmpeqd %xmm5,%xmm0 114 movdqa %xmm3,160(%r10) 115 movdqa %xmm4,%xmm3 116 paddd %xmm1,%xmm2 117 pcmpeqd %xmm5,%xmm1 118 movdqa %xmm0,176(%r10) 119 movdqa %xmm4,%xmm0 120 121 paddd %xmm2,%xmm3 122 pcmpeqd %xmm5,%xmm2 123 movdqa %xmm1,192(%r10) 124 movdqa %xmm4,%xmm1 125 126 paddd %xmm3,%xmm0 127 pcmpeqd %xmm5,%xmm3 128 movdqa %xmm2,208(%r10) 129 movdqa %xmm4,%xmm2 130 131 paddd %xmm0,%xmm1 132 pcmpeqd %xmm5,%xmm0 133 movdqa %xmm3,224(%r10) 134 movdqa %xmm4,%xmm3 135 paddd %xmm1,%xmm2 136 pcmpeqd %xmm5,%xmm1 137 movdqa %xmm0,240(%r10) 138 movdqa %xmm4,%xmm0 139 140 paddd %xmm2,%xmm3 141 pcmpeqd %xmm5,%xmm2 142 movdqa %xmm1,256(%r10) 143 movdqa %xmm4,%xmm1 144 145 paddd %xmm3,%xmm0 146 pcmpeqd %xmm5,%xmm3 147 movdqa %xmm2,272(%r10) 148 movdqa %xmm4,%xmm2 149 150 paddd %xmm0,%xmm1 151 pcmpeqd %xmm5,%xmm0 152 movdqa %xmm3,288(%r10) 153 movdqa %xmm4,%xmm3 154 paddd %xmm1,%xmm2 155 pcmpeqd %xmm5,%xmm1 156 movdqa %xmm0,304(%r10) 157 158 paddd %xmm2,%xmm3 159.byte 0x67 160 pcmpeqd %xmm5,%xmm2 161 movdqa %xmm1,320(%r10) 162 163 pcmpeqd %xmm5,%xmm3 164 movdqa %xmm2,336(%r10) 165 pand 64(%r12),%xmm0 166 167 pand 80(%r12),%xmm1 168 pand 96(%r12),%xmm2 169 movdqa %xmm3,352(%r10) 170 pand 112(%r12),%xmm3 171 por %xmm2,%xmm0 172 por %xmm3,%xmm1 173 movdqa -128(%r12),%xmm4 174 movdqa -112(%r12),%xmm5 175 movdqa -96(%r12),%xmm2 176 pand 112(%r10),%xmm4 177 movdqa -80(%r12),%xmm3 178 pand 128(%r10),%xmm5 179 por %xmm4,%xmm0 180 pand 144(%r10),%xmm2 181 por %xmm5,%xmm1 182 pand 160(%r10),%xmm3 183 por %xmm2,%xmm0 184 por %xmm3,%xmm1 185 movdqa -64(%r12),%xmm4 186 movdqa -48(%r12),%xmm5 187 movdqa -32(%r12),%xmm2 188 pand 176(%r10),%xmm4 189 movdqa -16(%r12),%xmm3 190 pand 192(%r10),%xmm5 191 por %xmm4,%xmm0 192 pand 208(%r10),%xmm2 193 por %xmm5,%xmm1 194 pand 224(%r10),%xmm3 195 por %xmm2,%xmm0 196 por %xmm3,%xmm1 197 movdqa 0(%r12),%xmm4 198 movdqa 16(%r12),%xmm5 199 movdqa 32(%r12),%xmm2 200 pand 240(%r10),%xmm4 201 movdqa 48(%r12),%xmm3 202 pand 256(%r10),%xmm5 203 por %xmm4,%xmm0 204 pand 272(%r10),%xmm2 205 por %xmm5,%xmm1 206 pand 288(%r10),%xmm3 207 por %xmm2,%xmm0 208 por %xmm3,%xmm1 209 por %xmm1,%xmm0 210 pshufd $0x4e,%xmm0,%xmm1 211 por %xmm1,%xmm0 212 leaq 256(%r12),%r12 213.byte 102,72,15,126,195 214 215 movq (%r8),%r8 216 movq (%rsi),%rax 217 218 xorq %r14,%r14 219 xorq %r15,%r15 220 221 movq %r8,%rbp 222 mulq %rbx 223 movq %rax,%r10 224 movq (%rcx),%rax 225 226 imulq %r10,%rbp 227 movq %rdx,%r11 228 229 mulq %rbp 230 addq %rax,%r10 231 movq 8(%rsi),%rax 232 adcq $0,%rdx 233 movq %rdx,%r13 234 235 leaq 1(%r15),%r15 236 jmp .L1st_enter 237 238.align 16 239.L1st: 240 addq %rax,%r13 241 movq (%rsi,%r15,8),%rax 242 adcq $0,%rdx 243 addq %r11,%r13 244 movq %r10,%r11 245 adcq $0,%rdx 246 movq %r13,-16(%rsp,%r15,8) 247 movq %rdx,%r13 248 249.L1st_enter: 250 mulq %rbx 251 addq %rax,%r11 252 movq (%rcx,%r15,8),%rax 253 adcq $0,%rdx 254 leaq 1(%r15),%r15 255 movq %rdx,%r10 256 257 mulq %rbp 258 cmpq %r9,%r15 259 jne .L1st 260 261 262 addq %rax,%r13 263 adcq $0,%rdx 264 addq %r11,%r13 265 adcq $0,%rdx 266 movq %r13,-16(%rsp,%r9,8) 267 movq %rdx,%r13 268 movq %r10,%r11 269 270 xorq %rdx,%rdx 271 addq %r11,%r13 272 adcq $0,%rdx 273 movq %r13,-8(%rsp,%r9,8) 274 movq %rdx,(%rsp,%r9,8) 275 276 leaq 1(%r14),%r14 277 jmp .Louter 278.align 16 279.Louter: 280 leaq 24+128(%rsp,%r9,8),%rdx 281 andq $-16,%rdx 282 pxor %xmm4,%xmm4 283 pxor %xmm5,%xmm5 284 movdqa -128(%r12),%xmm0 285 movdqa -112(%r12),%xmm1 286 movdqa -96(%r12),%xmm2 287 movdqa -80(%r12),%xmm3 288 pand -128(%rdx),%xmm0 289 pand -112(%rdx),%xmm1 290 por %xmm0,%xmm4 291 pand -96(%rdx),%xmm2 292 por %xmm1,%xmm5 293 pand -80(%rdx),%xmm3 294 por %xmm2,%xmm4 295 por %xmm3,%xmm5 296 movdqa -64(%r12),%xmm0 297 movdqa -48(%r12),%xmm1 298 movdqa -32(%r12),%xmm2 299 movdqa -16(%r12),%xmm3 300 pand -64(%rdx),%xmm0 301 pand -48(%rdx),%xmm1 302 por %xmm0,%xmm4 303 pand -32(%rdx),%xmm2 304 por %xmm1,%xmm5 305 pand -16(%rdx),%xmm3 306 por %xmm2,%xmm4 307 por %xmm3,%xmm5 308 movdqa 0(%r12),%xmm0 309 movdqa 16(%r12),%xmm1 310 movdqa 32(%r12),%xmm2 311 movdqa 48(%r12),%xmm3 312 pand 0(%rdx),%xmm0 313 pand 16(%rdx),%xmm1 314 por %xmm0,%xmm4 315 pand 32(%rdx),%xmm2 316 por %xmm1,%xmm5 317 pand 48(%rdx),%xmm3 318 por %xmm2,%xmm4 319 por %xmm3,%xmm5 320 movdqa 64(%r12),%xmm0 321 movdqa 80(%r12),%xmm1 322 movdqa 96(%r12),%xmm2 323 movdqa 112(%r12),%xmm3 324 pand 64(%rdx),%xmm0 325 pand 80(%rdx),%xmm1 326 por %xmm0,%xmm4 327 pand 96(%rdx),%xmm2 328 por %xmm1,%xmm5 329 pand 112(%rdx),%xmm3 330 por %xmm2,%xmm4 331 por %xmm3,%xmm5 332 por %xmm5,%xmm4 333 pshufd $0x4e,%xmm4,%xmm0 334 por %xmm4,%xmm0 335 leaq 256(%r12),%r12 336 337 movq (%rsi),%rax 338.byte 102,72,15,126,195 339 340 xorq %r15,%r15 341 movq %r8,%rbp 342 movq (%rsp),%r10 343 344 mulq %rbx 345 addq %rax,%r10 346 movq (%rcx),%rax 347 adcq $0,%rdx 348 349 imulq %r10,%rbp 350 movq %rdx,%r11 351 352 mulq %rbp 353 addq %rax,%r10 354 movq 8(%rsi),%rax 355 adcq $0,%rdx 356 movq 8(%rsp),%r10 357 movq %rdx,%r13 358 359 leaq 1(%r15),%r15 360 jmp .Linner_enter 361 362.align 16 363.Linner: 364 addq %rax,%r13 365 movq (%rsi,%r15,8),%rax 366 adcq $0,%rdx 367 addq %r10,%r13 368 movq (%rsp,%r15,8),%r10 369 adcq $0,%rdx 370 movq %r13,-16(%rsp,%r15,8) 371 movq %rdx,%r13 372 373.Linner_enter: 374 mulq %rbx 375 addq %rax,%r11 376 movq (%rcx,%r15,8),%rax 377 adcq $0,%rdx 378 addq %r11,%r10 379 movq %rdx,%r11 380 adcq $0,%r11 381 leaq 1(%r15),%r15 382 383 mulq %rbp 384 cmpq %r9,%r15 385 jne .Linner 386 387 addq %rax,%r13 388 adcq $0,%rdx 389 addq %r10,%r13 390 movq (%rsp,%r9,8),%r10 391 adcq $0,%rdx 392 movq %r13,-16(%rsp,%r9,8) 393 movq %rdx,%r13 394 395 xorq %rdx,%rdx 396 addq %r11,%r13 397 adcq $0,%rdx 398 addq %r10,%r13 399 adcq $0,%rdx 400 movq %r13,-8(%rsp,%r9,8) 401 movq %rdx,(%rsp,%r9,8) 402 403 leaq 1(%r14),%r14 404 cmpq %r9,%r14 405 jb .Louter 406 407 xorq %r14,%r14 408 movq (%rsp),%rax 409 leaq (%rsp),%rsi 410 movq %r9,%r15 411 jmp .Lsub 412.align 16 413.Lsub: sbbq (%rcx,%r14,8),%rax 414 movq %rax,(%rdi,%r14,8) 415 movq 8(%rsi,%r14,8),%rax 416 leaq 1(%r14),%r14 417 decq %r15 418 jnz .Lsub 419 420 sbbq $0,%rax 421 movq $-1,%rbx 422 xorq %rax,%rbx 423 xorq %r14,%r14 424 movq %r9,%r15 425 426.Lcopy: 427 movq (%rdi,%r14,8),%rcx 428 movq (%rsp,%r14,8),%rdx 429 andq %rbx,%rcx 430 andq %rax,%rdx 431 movq %r14,(%rsp,%r14,8) 432 orq %rcx,%rdx 433 movq %rdx,(%rdi,%r14,8) 434 leaq 1(%r14),%r14 435 subq $1,%r15 436 jnz .Lcopy 437 438 movq 8(%rsp,%r9,8),%rsi 439.cfi_def_cfa %rsi,8 440 movq $1,%rax 441 442 movq -48(%rsi),%r15 443.cfi_restore %r15 444 movq -40(%rsi),%r14 445.cfi_restore %r14 446 movq -32(%rsi),%r13 447.cfi_restore %r13 448 movq -24(%rsi),%r12 449.cfi_restore %r12 450 movq -16(%rsi),%rbp 451.cfi_restore %rbp 452 movq -8(%rsi),%rbx 453.cfi_restore %rbx 454 leaq (%rsi),%rsp 455.cfi_def_cfa_register %rsp 456.Lmul_epilogue: 457 .byte 0xf3,0xc3 458.cfi_endproc 459.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 460.type bn_mul4x_mont_gather5,@function 461.align 32 462bn_mul4x_mont_gather5: 463.cfi_startproc 464.byte 0x67 465 movq %rsp,%rax 466.cfi_def_cfa_register %rax 467.Lmul4x_enter: 468 andl $0x80108,%r11d 469 cmpl $0x80108,%r11d 470 je .Lmulx4x_enter 471 pushq %rbx 472.cfi_offset %rbx,-16 473 pushq %rbp 474.cfi_offset %rbp,-24 475 pushq %r12 476.cfi_offset %r12,-32 477 pushq %r13 478.cfi_offset %r13,-40 479 pushq %r14 480.cfi_offset %r14,-48 481 pushq %r15 482.cfi_offset %r15,-56 483.Lmul4x_prologue: 484 485.byte 0x67 486 shll $3,%r9d 487 leaq (%r9,%r9,2),%r10 488 negq %r9 489 490 491 492 493 494 495 496 497 498 499 leaq -320(%rsp,%r9,2),%r11 500 movq %rsp,%rbp 501 subq %rdi,%r11 502 andq $4095,%r11 503 cmpq %r11,%r10 504 jb .Lmul4xsp_alt 505 subq %r11,%rbp 506 leaq -320(%rbp,%r9,2),%rbp 507 jmp .Lmul4xsp_done 508 509.align 32 510.Lmul4xsp_alt: 511 leaq 4096-320(,%r9,2),%r10 512 leaq -320(%rbp,%r9,2),%rbp 513 subq %r10,%r11 514 movq $0,%r10 515 cmovcq %r10,%r11 516 subq %r11,%rbp 517.Lmul4xsp_done: 518 andq $-64,%rbp 519 movq %rsp,%r11 520 subq %rbp,%r11 521 andq $-4096,%r11 522 leaq (%r11,%rbp,1),%rsp 523 movq (%rsp),%r10 524 cmpq %rbp,%rsp 525 ja .Lmul4x_page_walk 526 jmp .Lmul4x_page_walk_done 527 528.Lmul4x_page_walk: 529 leaq -4096(%rsp),%rsp 530 movq (%rsp),%r10 531 cmpq %rbp,%rsp 532 ja .Lmul4x_page_walk 533.Lmul4x_page_walk_done: 534 535 negq %r9 536 537 movq %rax,40(%rsp) 538.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 539.Lmul4x_body: 540 541 call mul4x_internal 542 543 movq 40(%rsp),%rsi 544.cfi_def_cfa %rsi,8 545 movq $1,%rax 546 547 movq -48(%rsi),%r15 548.cfi_restore %r15 549 movq -40(%rsi),%r14 550.cfi_restore %r14 551 movq -32(%rsi),%r13 552.cfi_restore %r13 553 movq -24(%rsi),%r12 554.cfi_restore %r12 555 movq -16(%rsi),%rbp 556.cfi_restore %rbp 557 movq -8(%rsi),%rbx 558.cfi_restore %rbx 559 leaq (%rsi),%rsp 560.cfi_def_cfa_register %rsp 561.Lmul4x_epilogue: 562 .byte 0xf3,0xc3 563.cfi_endproc 564.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 565 566.type mul4x_internal,@function 567.align 32 568mul4x_internal: 569.cfi_startproc 570 shlq $5,%r9 571 movd 8(%rax),%xmm5 572 leaq .Linc(%rip),%rax 573 leaq 128(%rdx,%r9,1),%r13 574 shrq $5,%r9 575 movdqa 0(%rax),%xmm0 576 movdqa 16(%rax),%xmm1 577 leaq 88-112(%rsp,%r9,1),%r10 578 leaq 128(%rdx),%r12 579 580 pshufd $0,%xmm5,%xmm5 581 movdqa %xmm1,%xmm4 582.byte 0x67,0x67 583 movdqa %xmm1,%xmm2 584 paddd %xmm0,%xmm1 585 pcmpeqd %xmm5,%xmm0 586.byte 0x67 587 movdqa %xmm4,%xmm3 588 paddd %xmm1,%xmm2 589 pcmpeqd %xmm5,%xmm1 590 movdqa %xmm0,112(%r10) 591 movdqa %xmm4,%xmm0 592 593 paddd %xmm2,%xmm3 594 pcmpeqd %xmm5,%xmm2 595 movdqa %xmm1,128(%r10) 596 movdqa %xmm4,%xmm1 597 598 paddd %xmm3,%xmm0 599 pcmpeqd %xmm5,%xmm3 600 movdqa %xmm2,144(%r10) 601 movdqa %xmm4,%xmm2 602 603 paddd %xmm0,%xmm1 604 pcmpeqd %xmm5,%xmm0 605 movdqa %xmm3,160(%r10) 606 movdqa %xmm4,%xmm3 607 paddd %xmm1,%xmm2 608 pcmpeqd %xmm5,%xmm1 609 movdqa %xmm0,176(%r10) 610 movdqa %xmm4,%xmm0 611 612 paddd %xmm2,%xmm3 613 pcmpeqd %xmm5,%xmm2 614 movdqa %xmm1,192(%r10) 615 movdqa %xmm4,%xmm1 616 617 paddd %xmm3,%xmm0 618 pcmpeqd %xmm5,%xmm3 619 movdqa %xmm2,208(%r10) 620 movdqa %xmm4,%xmm2 621 622 paddd %xmm0,%xmm1 623 pcmpeqd %xmm5,%xmm0 624 movdqa %xmm3,224(%r10) 625 movdqa %xmm4,%xmm3 626 paddd %xmm1,%xmm2 627 pcmpeqd %xmm5,%xmm1 628 movdqa %xmm0,240(%r10) 629 movdqa %xmm4,%xmm0 630 631 paddd %xmm2,%xmm3 632 pcmpeqd %xmm5,%xmm2 633 movdqa %xmm1,256(%r10) 634 movdqa %xmm4,%xmm1 635 636 paddd %xmm3,%xmm0 637 pcmpeqd %xmm5,%xmm3 638 movdqa %xmm2,272(%r10) 639 movdqa %xmm4,%xmm2 640 641 paddd %xmm0,%xmm1 642 pcmpeqd %xmm5,%xmm0 643 movdqa %xmm3,288(%r10) 644 movdqa %xmm4,%xmm3 645 paddd %xmm1,%xmm2 646 pcmpeqd %xmm5,%xmm1 647 movdqa %xmm0,304(%r10) 648 649 paddd %xmm2,%xmm3 650.byte 0x67 651 pcmpeqd %xmm5,%xmm2 652 movdqa %xmm1,320(%r10) 653 654 pcmpeqd %xmm5,%xmm3 655 movdqa %xmm2,336(%r10) 656 pand 64(%r12),%xmm0 657 658 pand 80(%r12),%xmm1 659 pand 96(%r12),%xmm2 660 movdqa %xmm3,352(%r10) 661 pand 112(%r12),%xmm3 662 por %xmm2,%xmm0 663 por %xmm3,%xmm1 664 movdqa -128(%r12),%xmm4 665 movdqa -112(%r12),%xmm5 666 movdqa -96(%r12),%xmm2 667 pand 112(%r10),%xmm4 668 movdqa -80(%r12),%xmm3 669 pand 128(%r10),%xmm5 670 por %xmm4,%xmm0 671 pand 144(%r10),%xmm2 672 por %xmm5,%xmm1 673 pand 160(%r10),%xmm3 674 por %xmm2,%xmm0 675 por %xmm3,%xmm1 676 movdqa -64(%r12),%xmm4 677 movdqa -48(%r12),%xmm5 678 movdqa -32(%r12),%xmm2 679 pand 176(%r10),%xmm4 680 movdqa -16(%r12),%xmm3 681 pand 192(%r10),%xmm5 682 por %xmm4,%xmm0 683 pand 208(%r10),%xmm2 684 por %xmm5,%xmm1 685 pand 224(%r10),%xmm3 686 por %xmm2,%xmm0 687 por %xmm3,%xmm1 688 movdqa 0(%r12),%xmm4 689 movdqa 16(%r12),%xmm5 690 movdqa 32(%r12),%xmm2 691 pand 240(%r10),%xmm4 692 movdqa 48(%r12),%xmm3 693 pand 256(%r10),%xmm5 694 por %xmm4,%xmm0 695 pand 272(%r10),%xmm2 696 por %xmm5,%xmm1 697 pand 288(%r10),%xmm3 698 por %xmm2,%xmm0 699 por %xmm3,%xmm1 700 por %xmm1,%xmm0 701 pshufd $0x4e,%xmm0,%xmm1 702 por %xmm1,%xmm0 703 leaq 256(%r12),%r12 704.byte 102,72,15,126,195 705 706 movq %r13,16+8(%rsp) 707 movq %rdi,56+8(%rsp) 708 709 movq (%r8),%r8 710 movq (%rsi),%rax 711 leaq (%rsi,%r9,1),%rsi 712 negq %r9 713 714 movq %r8,%rbp 715 mulq %rbx 716 movq %rax,%r10 717 movq (%rcx),%rax 718 719 imulq %r10,%rbp 720 leaq 64+8(%rsp),%r14 721 movq %rdx,%r11 722 723 mulq %rbp 724 addq %rax,%r10 725 movq 8(%rsi,%r9,1),%rax 726 adcq $0,%rdx 727 movq %rdx,%rdi 728 729 mulq %rbx 730 addq %rax,%r11 731 movq 8(%rcx),%rax 732 adcq $0,%rdx 733 movq %rdx,%r10 734 735 mulq %rbp 736 addq %rax,%rdi 737 movq 16(%rsi,%r9,1),%rax 738 adcq $0,%rdx 739 addq %r11,%rdi 740 leaq 32(%r9),%r15 741 leaq 32(%rcx),%rcx 742 adcq $0,%rdx 743 movq %rdi,(%r14) 744 movq %rdx,%r13 745 jmp .L1st4x 746 747.align 32 748.L1st4x: 749 mulq %rbx 750 addq %rax,%r10 751 movq -16(%rcx),%rax 752 leaq 32(%r14),%r14 753 adcq $0,%rdx 754 movq %rdx,%r11 755 756 mulq %rbp 757 addq %rax,%r13 758 movq -8(%rsi,%r15,1),%rax 759 adcq $0,%rdx 760 addq %r10,%r13 761 adcq $0,%rdx 762 movq %r13,-24(%r14) 763 movq %rdx,%rdi 764 765 mulq %rbx 766 addq %rax,%r11 767 movq -8(%rcx),%rax 768 adcq $0,%rdx 769 movq %rdx,%r10 770 771 mulq %rbp 772 addq %rax,%rdi 773 movq (%rsi,%r15,1),%rax 774 adcq $0,%rdx 775 addq %r11,%rdi 776 adcq $0,%rdx 777 movq %rdi,-16(%r14) 778 movq %rdx,%r13 779 780 mulq %rbx 781 addq %rax,%r10 782 movq 0(%rcx),%rax 783 adcq $0,%rdx 784 movq %rdx,%r11 785 786 mulq %rbp 787 addq %rax,%r13 788 movq 8(%rsi,%r15,1),%rax 789 adcq $0,%rdx 790 addq %r10,%r13 791 adcq $0,%rdx 792 movq %r13,-8(%r14) 793 movq %rdx,%rdi 794 795 mulq %rbx 796 addq %rax,%r11 797 movq 8(%rcx),%rax 798 adcq $0,%rdx 799 movq %rdx,%r10 800 801 mulq %rbp 802 addq %rax,%rdi 803 movq 16(%rsi,%r15,1),%rax 804 adcq $0,%rdx 805 addq %r11,%rdi 806 leaq 32(%rcx),%rcx 807 adcq $0,%rdx 808 movq %rdi,(%r14) 809 movq %rdx,%r13 810 811 addq $32,%r15 812 jnz .L1st4x 813 814 mulq %rbx 815 addq %rax,%r10 816 movq -16(%rcx),%rax 817 leaq 32(%r14),%r14 818 adcq $0,%rdx 819 movq %rdx,%r11 820 821 mulq %rbp 822 addq %rax,%r13 823 movq -8(%rsi),%rax 824 adcq $0,%rdx 825 addq %r10,%r13 826 adcq $0,%rdx 827 movq %r13,-24(%r14) 828 movq %rdx,%rdi 829 830 mulq %rbx 831 addq %rax,%r11 832 movq -8(%rcx),%rax 833 adcq $0,%rdx 834 movq %rdx,%r10 835 836 mulq %rbp 837 addq %rax,%rdi 838 movq (%rsi,%r9,1),%rax 839 adcq $0,%rdx 840 addq %r11,%rdi 841 adcq $0,%rdx 842 movq %rdi,-16(%r14) 843 movq %rdx,%r13 844 845 leaq (%rcx,%r9,1),%rcx 846 847 xorq %rdi,%rdi 848 addq %r10,%r13 849 adcq $0,%rdi 850 movq %r13,-8(%r14) 851 852 jmp .Louter4x 853 854.align 32 855.Louter4x: 856 leaq 16+128(%r14),%rdx 857 pxor %xmm4,%xmm4 858 pxor %xmm5,%xmm5 859 movdqa -128(%r12),%xmm0 860 movdqa -112(%r12),%xmm1 861 movdqa -96(%r12),%xmm2 862 movdqa -80(%r12),%xmm3 863 pand -128(%rdx),%xmm0 864 pand -112(%rdx),%xmm1 865 por %xmm0,%xmm4 866 pand -96(%rdx),%xmm2 867 por %xmm1,%xmm5 868 pand -80(%rdx),%xmm3 869 por %xmm2,%xmm4 870 por %xmm3,%xmm5 871 movdqa -64(%r12),%xmm0 872 movdqa -48(%r12),%xmm1 873 movdqa -32(%r12),%xmm2 874 movdqa -16(%r12),%xmm3 875 pand -64(%rdx),%xmm0 876 pand -48(%rdx),%xmm1 877 por %xmm0,%xmm4 878 pand -32(%rdx),%xmm2 879 por %xmm1,%xmm5 880 pand -16(%rdx),%xmm3 881 por %xmm2,%xmm4 882 por %xmm3,%xmm5 883 movdqa 0(%r12),%xmm0 884 movdqa 16(%r12),%xmm1 885 movdqa 32(%r12),%xmm2 886 movdqa 48(%r12),%xmm3 887 pand 0(%rdx),%xmm0 888 pand 16(%rdx),%xmm1 889 por %xmm0,%xmm4 890 pand 32(%rdx),%xmm2 891 por %xmm1,%xmm5 892 pand 48(%rdx),%xmm3 893 por %xmm2,%xmm4 894 por %xmm3,%xmm5 895 movdqa 64(%r12),%xmm0 896 movdqa 80(%r12),%xmm1 897 movdqa 96(%r12),%xmm2 898 movdqa 112(%r12),%xmm3 899 pand 64(%rdx),%xmm0 900 pand 80(%rdx),%xmm1 901 por %xmm0,%xmm4 902 pand 96(%rdx),%xmm2 903 por %xmm1,%xmm5 904 pand 112(%rdx),%xmm3 905 por %xmm2,%xmm4 906 por %xmm3,%xmm5 907 por %xmm5,%xmm4 908 pshufd $0x4e,%xmm4,%xmm0 909 por %xmm4,%xmm0 910 leaq 256(%r12),%r12 911.byte 102,72,15,126,195 912 913 movq (%r14,%r9,1),%r10 914 movq %r8,%rbp 915 mulq %rbx 916 addq %rax,%r10 917 movq (%rcx),%rax 918 adcq $0,%rdx 919 920 imulq %r10,%rbp 921 movq %rdx,%r11 922 movq %rdi,(%r14) 923 924 leaq (%r14,%r9,1),%r14 925 926 mulq %rbp 927 addq %rax,%r10 928 movq 8(%rsi,%r9,1),%rax 929 adcq $0,%rdx 930 movq %rdx,%rdi 931 932 mulq %rbx 933 addq %rax,%r11 934 movq 8(%rcx),%rax 935 adcq $0,%rdx 936 addq 8(%r14),%r11 937 adcq $0,%rdx 938 movq %rdx,%r10 939 940 mulq %rbp 941 addq %rax,%rdi 942 movq 16(%rsi,%r9,1),%rax 943 adcq $0,%rdx 944 addq %r11,%rdi 945 leaq 32(%r9),%r15 946 leaq 32(%rcx),%rcx 947 adcq $0,%rdx 948 movq %rdx,%r13 949 jmp .Linner4x 950 951.align 32 952.Linner4x: 953 mulq %rbx 954 addq %rax,%r10 955 movq -16(%rcx),%rax 956 adcq $0,%rdx 957 addq 16(%r14),%r10 958 leaq 32(%r14),%r14 959 adcq $0,%rdx 960 movq %rdx,%r11 961 962 mulq %rbp 963 addq %rax,%r13 964 movq -8(%rsi,%r15,1),%rax 965 adcq $0,%rdx 966 addq %r10,%r13 967 adcq $0,%rdx 968 movq %rdi,-32(%r14) 969 movq %rdx,%rdi 970 971 mulq %rbx 972 addq %rax,%r11 973 movq -8(%rcx),%rax 974 adcq $0,%rdx 975 addq -8(%r14),%r11 976 adcq $0,%rdx 977 movq %rdx,%r10 978 979 mulq %rbp 980 addq %rax,%rdi 981 movq (%rsi,%r15,1),%rax 982 adcq $0,%rdx 983 addq %r11,%rdi 984 adcq $0,%rdx 985 movq %r13,-24(%r14) 986 movq %rdx,%r13 987 988 mulq %rbx 989 addq %rax,%r10 990 movq 0(%rcx),%rax 991 adcq $0,%rdx 992 addq (%r14),%r10 993 adcq $0,%rdx 994 movq %rdx,%r11 995 996 mulq %rbp 997 addq %rax,%r13 998 movq 8(%rsi,%r15,1),%rax 999 adcq $0,%rdx 1000 addq %r10,%r13 1001 adcq $0,%rdx 1002 movq %rdi,-16(%r14) 1003 movq %rdx,%rdi 1004 1005 mulq %rbx 1006 addq %rax,%r11 1007 movq 8(%rcx),%rax 1008 adcq $0,%rdx 1009 addq 8(%r14),%r11 1010 adcq $0,%rdx 1011 movq %rdx,%r10 1012 1013 mulq %rbp 1014 addq %rax,%rdi 1015 movq 16(%rsi,%r15,1),%rax 1016 adcq $0,%rdx 1017 addq %r11,%rdi 1018 leaq 32(%rcx),%rcx 1019 adcq $0,%rdx 1020 movq %r13,-8(%r14) 1021 movq %rdx,%r13 1022 1023 addq $32,%r15 1024 jnz .Linner4x 1025 1026 mulq %rbx 1027 addq %rax,%r10 1028 movq -16(%rcx),%rax 1029 adcq $0,%rdx 1030 addq 16(%r14),%r10 1031 leaq 32(%r14),%r14 1032 adcq $0,%rdx 1033 movq %rdx,%r11 1034 1035 mulq %rbp 1036 addq %rax,%r13 1037 movq -8(%rsi),%rax 1038 adcq $0,%rdx 1039 addq %r10,%r13 1040 adcq $0,%rdx 1041 movq %rdi,-32(%r14) 1042 movq %rdx,%rdi 1043 1044 mulq %rbx 1045 addq %rax,%r11 1046 movq %rbp,%rax 1047 movq -8(%rcx),%rbp 1048 adcq $0,%rdx 1049 addq -8(%r14),%r11 1050 adcq $0,%rdx 1051 movq %rdx,%r10 1052 1053 mulq %rbp 1054 addq %rax,%rdi 1055 movq (%rsi,%r9,1),%rax 1056 adcq $0,%rdx 1057 addq %r11,%rdi 1058 adcq $0,%rdx 1059 movq %r13,-24(%r14) 1060 movq %rdx,%r13 1061 1062 movq %rdi,-16(%r14) 1063 leaq (%rcx,%r9,1),%rcx 1064 1065 xorq %rdi,%rdi 1066 addq %r10,%r13 1067 adcq $0,%rdi 1068 addq (%r14),%r13 1069 adcq $0,%rdi 1070 movq %r13,-8(%r14) 1071 1072 cmpq 16+8(%rsp),%r12 1073 jb .Louter4x 1074 xorq %rax,%rax 1075 subq %r13,%rbp 1076 adcq %r15,%r15 1077 orq %r15,%rdi 1078 subq %rdi,%rax 1079 leaq (%r14,%r9,1),%rbx 1080 movq (%rcx),%r12 1081 leaq (%rcx),%rbp 1082 movq %r9,%rcx 1083 sarq $3+2,%rcx 1084 movq 56+8(%rsp),%rdi 1085 decq %r12 1086 xorq %r10,%r10 1087 movq 8(%rbp),%r13 1088 movq 16(%rbp),%r14 1089 movq 24(%rbp),%r15 1090 jmp .Lsqr4x_sub_entry 1091.cfi_endproc 1092.size mul4x_internal,.-mul4x_internal 1093.globl bn_power5 1094.hidden bn_power5 1095.type bn_power5,@function 1096.align 32 1097bn_power5: 1098.cfi_startproc 1099 movq %rsp,%rax 1100.cfi_def_cfa_register %rax 1101 leaq OPENSSL_ia32cap_P(%rip),%r11 1102 movl 8(%r11),%r11d 1103 andl $0x80108,%r11d 1104 cmpl $0x80108,%r11d 1105 je .Lpowerx5_enter 1106 pushq %rbx 1107.cfi_offset %rbx,-16 1108 pushq %rbp 1109.cfi_offset %rbp,-24 1110 pushq %r12 1111.cfi_offset %r12,-32 1112 pushq %r13 1113.cfi_offset %r13,-40 1114 pushq %r14 1115.cfi_offset %r14,-48 1116 pushq %r15 1117.cfi_offset %r15,-56 1118.Lpower5_prologue: 1119 1120 shll $3,%r9d 1121 leal (%r9,%r9,2),%r10d 1122 negq %r9 1123 movq (%r8),%r8 1124 1125 1126 1127 1128 1129 1130 1131 1132 leaq -320(%rsp,%r9,2),%r11 1133 movq %rsp,%rbp 1134 subq %rdi,%r11 1135 andq $4095,%r11 1136 cmpq %r11,%r10 1137 jb .Lpwr_sp_alt 1138 subq %r11,%rbp 1139 leaq -320(%rbp,%r9,2),%rbp 1140 jmp .Lpwr_sp_done 1141 1142.align 32 1143.Lpwr_sp_alt: 1144 leaq 4096-320(,%r9,2),%r10 1145 leaq -320(%rbp,%r9,2),%rbp 1146 subq %r10,%r11 1147 movq $0,%r10 1148 cmovcq %r10,%r11 1149 subq %r11,%rbp 1150.Lpwr_sp_done: 1151 andq $-64,%rbp 1152 movq %rsp,%r11 1153 subq %rbp,%r11 1154 andq $-4096,%r11 1155 leaq (%r11,%rbp,1),%rsp 1156 movq (%rsp),%r10 1157 cmpq %rbp,%rsp 1158 ja .Lpwr_page_walk 1159 jmp .Lpwr_page_walk_done 1160 1161.Lpwr_page_walk: 1162 leaq -4096(%rsp),%rsp 1163 movq (%rsp),%r10 1164 cmpq %rbp,%rsp 1165 ja .Lpwr_page_walk 1166.Lpwr_page_walk_done: 1167 1168 movq %r9,%r10 1169 negq %r9 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 movq %r8,32(%rsp) 1181 movq %rax,40(%rsp) 1182.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1183.Lpower5_body: 1184.byte 102,72,15,110,207 1185.byte 102,72,15,110,209 1186.byte 102,73,15,110,218 1187.byte 102,72,15,110,226 1188 1189 call __bn_sqr8x_internal 1190 call __bn_post4x_internal 1191 call __bn_sqr8x_internal 1192 call __bn_post4x_internal 1193 call __bn_sqr8x_internal 1194 call __bn_post4x_internal 1195 call __bn_sqr8x_internal 1196 call __bn_post4x_internal 1197 call __bn_sqr8x_internal 1198 call __bn_post4x_internal 1199 1200.byte 102,72,15,126,209 1201.byte 102,72,15,126,226 1202 movq %rsi,%rdi 1203 movq 40(%rsp),%rax 1204 leaq 32(%rsp),%r8 1205 1206 call mul4x_internal 1207 1208 movq 40(%rsp),%rsi 1209.cfi_def_cfa %rsi,8 1210 movq $1,%rax 1211 movq -48(%rsi),%r15 1212.cfi_restore %r15 1213 movq -40(%rsi),%r14 1214.cfi_restore %r14 1215 movq -32(%rsi),%r13 1216.cfi_restore %r13 1217 movq -24(%rsi),%r12 1218.cfi_restore %r12 1219 movq -16(%rsi),%rbp 1220.cfi_restore %rbp 1221 movq -8(%rsi),%rbx 1222.cfi_restore %rbx 1223 leaq (%rsi),%rsp 1224.cfi_def_cfa_register %rsp 1225.Lpower5_epilogue: 1226 .byte 0xf3,0xc3 1227.cfi_endproc 1228.size bn_power5,.-bn_power5 1229 1230.globl bn_sqr8x_internal 1231.hidden bn_sqr8x_internal 1232.hidden bn_sqr8x_internal 1233.type bn_sqr8x_internal,@function 1234.align 32 1235bn_sqr8x_internal: 1236__bn_sqr8x_internal: 1237.cfi_startproc 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 leaq 32(%r10),%rbp 1312 leaq (%rsi,%r9,1),%rsi 1313 1314 movq %r9,%rcx 1315 1316 1317 movq -32(%rsi,%rbp,1),%r14 1318 leaq 48+8(%rsp,%r9,2),%rdi 1319 movq -24(%rsi,%rbp,1),%rax 1320 leaq -32(%rdi,%rbp,1),%rdi 1321 movq -16(%rsi,%rbp,1),%rbx 1322 movq %rax,%r15 1323 1324 mulq %r14 1325 movq %rax,%r10 1326 movq %rbx,%rax 1327 movq %rdx,%r11 1328 movq %r10,-24(%rdi,%rbp,1) 1329 1330 mulq %r14 1331 addq %rax,%r11 1332 movq %rbx,%rax 1333 adcq $0,%rdx 1334 movq %r11,-16(%rdi,%rbp,1) 1335 movq %rdx,%r10 1336 1337 1338 movq -8(%rsi,%rbp,1),%rbx 1339 mulq %r15 1340 movq %rax,%r12 1341 movq %rbx,%rax 1342 movq %rdx,%r13 1343 1344 leaq (%rbp),%rcx 1345 mulq %r14 1346 addq %rax,%r10 1347 movq %rbx,%rax 1348 movq %rdx,%r11 1349 adcq $0,%r11 1350 addq %r12,%r10 1351 adcq $0,%r11 1352 movq %r10,-8(%rdi,%rcx,1) 1353 jmp .Lsqr4x_1st 1354 1355.align 32 1356.Lsqr4x_1st: 1357 movq (%rsi,%rcx,1),%rbx 1358 mulq %r15 1359 addq %rax,%r13 1360 movq %rbx,%rax 1361 movq %rdx,%r12 1362 adcq $0,%r12 1363 1364 mulq %r14 1365 addq %rax,%r11 1366 movq %rbx,%rax 1367 movq 8(%rsi,%rcx,1),%rbx 1368 movq %rdx,%r10 1369 adcq $0,%r10 1370 addq %r13,%r11 1371 adcq $0,%r10 1372 1373 1374 mulq %r15 1375 addq %rax,%r12 1376 movq %rbx,%rax 1377 movq %r11,(%rdi,%rcx,1) 1378 movq %rdx,%r13 1379 adcq $0,%r13 1380 1381 mulq %r14 1382 addq %rax,%r10 1383 movq %rbx,%rax 1384 movq 16(%rsi,%rcx,1),%rbx 1385 movq %rdx,%r11 1386 adcq $0,%r11 1387 addq %r12,%r10 1388 adcq $0,%r11 1389 1390 mulq %r15 1391 addq %rax,%r13 1392 movq %rbx,%rax 1393 movq %r10,8(%rdi,%rcx,1) 1394 movq %rdx,%r12 1395 adcq $0,%r12 1396 1397 mulq %r14 1398 addq %rax,%r11 1399 movq %rbx,%rax 1400 movq 24(%rsi,%rcx,1),%rbx 1401 movq %rdx,%r10 1402 adcq $0,%r10 1403 addq %r13,%r11 1404 adcq $0,%r10 1405 1406 1407 mulq %r15 1408 addq %rax,%r12 1409 movq %rbx,%rax 1410 movq %r11,16(%rdi,%rcx,1) 1411 movq %rdx,%r13 1412 adcq $0,%r13 1413 leaq 32(%rcx),%rcx 1414 1415 mulq %r14 1416 addq %rax,%r10 1417 movq %rbx,%rax 1418 movq %rdx,%r11 1419 adcq $0,%r11 1420 addq %r12,%r10 1421 adcq $0,%r11 1422 movq %r10,-8(%rdi,%rcx,1) 1423 1424 cmpq $0,%rcx 1425 jne .Lsqr4x_1st 1426 1427 mulq %r15 1428 addq %rax,%r13 1429 leaq 16(%rbp),%rbp 1430 adcq $0,%rdx 1431 addq %r11,%r13 1432 adcq $0,%rdx 1433 1434 movq %r13,(%rdi) 1435 movq %rdx,%r12 1436 movq %rdx,8(%rdi) 1437 jmp .Lsqr4x_outer 1438 1439.align 32 1440.Lsqr4x_outer: 1441 movq -32(%rsi,%rbp,1),%r14 1442 leaq 48+8(%rsp,%r9,2),%rdi 1443 movq -24(%rsi,%rbp,1),%rax 1444 leaq -32(%rdi,%rbp,1),%rdi 1445 movq -16(%rsi,%rbp,1),%rbx 1446 movq %rax,%r15 1447 1448 mulq %r14 1449 movq -24(%rdi,%rbp,1),%r10 1450 addq %rax,%r10 1451 movq %rbx,%rax 1452 adcq $0,%rdx 1453 movq %r10,-24(%rdi,%rbp,1) 1454 movq %rdx,%r11 1455 1456 mulq %r14 1457 addq %rax,%r11 1458 movq %rbx,%rax 1459 adcq $0,%rdx 1460 addq -16(%rdi,%rbp,1),%r11 1461 movq %rdx,%r10 1462 adcq $0,%r10 1463 movq %r11,-16(%rdi,%rbp,1) 1464 1465 xorq %r12,%r12 1466 1467 movq -8(%rsi,%rbp,1),%rbx 1468 mulq %r15 1469 addq %rax,%r12 1470 movq %rbx,%rax 1471 adcq $0,%rdx 1472 addq -8(%rdi,%rbp,1),%r12 1473 movq %rdx,%r13 1474 adcq $0,%r13 1475 1476 mulq %r14 1477 addq %rax,%r10 1478 movq %rbx,%rax 1479 adcq $0,%rdx 1480 addq %r12,%r10 1481 movq %rdx,%r11 1482 adcq $0,%r11 1483 movq %r10,-8(%rdi,%rbp,1) 1484 1485 leaq (%rbp),%rcx 1486 jmp .Lsqr4x_inner 1487 1488.align 32 1489.Lsqr4x_inner: 1490 movq (%rsi,%rcx,1),%rbx 1491 mulq %r15 1492 addq %rax,%r13 1493 movq %rbx,%rax 1494 movq %rdx,%r12 1495 adcq $0,%r12 1496 addq (%rdi,%rcx,1),%r13 1497 adcq $0,%r12 1498 1499.byte 0x67 1500 mulq %r14 1501 addq %rax,%r11 1502 movq %rbx,%rax 1503 movq 8(%rsi,%rcx,1),%rbx 1504 movq %rdx,%r10 1505 adcq $0,%r10 1506 addq %r13,%r11 1507 adcq $0,%r10 1508 1509 mulq %r15 1510 addq %rax,%r12 1511 movq %r11,(%rdi,%rcx,1) 1512 movq %rbx,%rax 1513 movq %rdx,%r13 1514 adcq $0,%r13 1515 addq 8(%rdi,%rcx,1),%r12 1516 leaq 16(%rcx),%rcx 1517 adcq $0,%r13 1518 1519 mulq %r14 1520 addq %rax,%r10 1521 movq %rbx,%rax 1522 adcq $0,%rdx 1523 addq %r12,%r10 1524 movq %rdx,%r11 1525 adcq $0,%r11 1526 movq %r10,-8(%rdi,%rcx,1) 1527 1528 cmpq $0,%rcx 1529 jne .Lsqr4x_inner 1530 1531.byte 0x67 1532 mulq %r15 1533 addq %rax,%r13 1534 adcq $0,%rdx 1535 addq %r11,%r13 1536 adcq $0,%rdx 1537 1538 movq %r13,(%rdi) 1539 movq %rdx,%r12 1540 movq %rdx,8(%rdi) 1541 1542 addq $16,%rbp 1543 jnz .Lsqr4x_outer 1544 1545 1546 movq -32(%rsi),%r14 1547 leaq 48+8(%rsp,%r9,2),%rdi 1548 movq -24(%rsi),%rax 1549 leaq -32(%rdi,%rbp,1),%rdi 1550 movq -16(%rsi),%rbx 1551 movq %rax,%r15 1552 1553 mulq %r14 1554 addq %rax,%r10 1555 movq %rbx,%rax 1556 movq %rdx,%r11 1557 adcq $0,%r11 1558 1559 mulq %r14 1560 addq %rax,%r11 1561 movq %rbx,%rax 1562 movq %r10,-24(%rdi) 1563 movq %rdx,%r10 1564 adcq $0,%r10 1565 addq %r13,%r11 1566 movq -8(%rsi),%rbx 1567 adcq $0,%r10 1568 1569 mulq %r15 1570 addq %rax,%r12 1571 movq %rbx,%rax 1572 movq %r11,-16(%rdi) 1573 movq %rdx,%r13 1574 adcq $0,%r13 1575 1576 mulq %r14 1577 addq %rax,%r10 1578 movq %rbx,%rax 1579 movq %rdx,%r11 1580 adcq $0,%r11 1581 addq %r12,%r10 1582 adcq $0,%r11 1583 movq %r10,-8(%rdi) 1584 1585 mulq %r15 1586 addq %rax,%r13 1587 movq -16(%rsi),%rax 1588 adcq $0,%rdx 1589 addq %r11,%r13 1590 adcq $0,%rdx 1591 1592 movq %r13,(%rdi) 1593 movq %rdx,%r12 1594 movq %rdx,8(%rdi) 1595 1596 mulq %rbx 1597 addq $16,%rbp 1598 xorq %r14,%r14 1599 subq %r9,%rbp 1600 xorq %r15,%r15 1601 1602 addq %r12,%rax 1603 adcq $0,%rdx 1604 movq %rax,8(%rdi) 1605 movq %rdx,16(%rdi) 1606 movq %r15,24(%rdi) 1607 1608 movq -16(%rsi,%rbp,1),%rax 1609 leaq 48+8(%rsp),%rdi 1610 xorq %r10,%r10 1611 movq 8(%rdi),%r11 1612 1613 leaq (%r14,%r10,2),%r12 1614 shrq $63,%r10 1615 leaq (%rcx,%r11,2),%r13 1616 shrq $63,%r11 1617 orq %r10,%r13 1618 movq 16(%rdi),%r10 1619 movq %r11,%r14 1620 mulq %rax 1621 negq %r15 1622 movq 24(%rdi),%r11 1623 adcq %rax,%r12 1624 movq -8(%rsi,%rbp,1),%rax 1625 movq %r12,(%rdi) 1626 adcq %rdx,%r13 1627 1628 leaq (%r14,%r10,2),%rbx 1629 movq %r13,8(%rdi) 1630 sbbq %r15,%r15 1631 shrq $63,%r10 1632 leaq (%rcx,%r11,2),%r8 1633 shrq $63,%r11 1634 orq %r10,%r8 1635 movq 32(%rdi),%r10 1636 movq %r11,%r14 1637 mulq %rax 1638 negq %r15 1639 movq 40(%rdi),%r11 1640 adcq %rax,%rbx 1641 movq 0(%rsi,%rbp,1),%rax 1642 movq %rbx,16(%rdi) 1643 adcq %rdx,%r8 1644 leaq 16(%rbp),%rbp 1645 movq %r8,24(%rdi) 1646 sbbq %r15,%r15 1647 leaq 64(%rdi),%rdi 1648 jmp .Lsqr4x_shift_n_add 1649 1650.align 32 1651.Lsqr4x_shift_n_add: 1652 leaq (%r14,%r10,2),%r12 1653 shrq $63,%r10 1654 leaq (%rcx,%r11,2),%r13 1655 shrq $63,%r11 1656 orq %r10,%r13 1657 movq -16(%rdi),%r10 1658 movq %r11,%r14 1659 mulq %rax 1660 negq %r15 1661 movq -8(%rdi),%r11 1662 adcq %rax,%r12 1663 movq -8(%rsi,%rbp,1),%rax 1664 movq %r12,-32(%rdi) 1665 adcq %rdx,%r13 1666 1667 leaq (%r14,%r10,2),%rbx 1668 movq %r13,-24(%rdi) 1669 sbbq %r15,%r15 1670 shrq $63,%r10 1671 leaq (%rcx,%r11,2),%r8 1672 shrq $63,%r11 1673 orq %r10,%r8 1674 movq 0(%rdi),%r10 1675 movq %r11,%r14 1676 mulq %rax 1677 negq %r15 1678 movq 8(%rdi),%r11 1679 adcq %rax,%rbx 1680 movq 0(%rsi,%rbp,1),%rax 1681 movq %rbx,-16(%rdi) 1682 adcq %rdx,%r8 1683 1684 leaq (%r14,%r10,2),%r12 1685 movq %r8,-8(%rdi) 1686 sbbq %r15,%r15 1687 shrq $63,%r10 1688 leaq (%rcx,%r11,2),%r13 1689 shrq $63,%r11 1690 orq %r10,%r13 1691 movq 16(%rdi),%r10 1692 movq %r11,%r14 1693 mulq %rax 1694 negq %r15 1695 movq 24(%rdi),%r11 1696 adcq %rax,%r12 1697 movq 8(%rsi,%rbp,1),%rax 1698 movq %r12,0(%rdi) 1699 adcq %rdx,%r13 1700 1701 leaq (%r14,%r10,2),%rbx 1702 movq %r13,8(%rdi) 1703 sbbq %r15,%r15 1704 shrq $63,%r10 1705 leaq (%rcx,%r11,2),%r8 1706 shrq $63,%r11 1707 orq %r10,%r8 1708 movq 32(%rdi),%r10 1709 movq %r11,%r14 1710 mulq %rax 1711 negq %r15 1712 movq 40(%rdi),%r11 1713 adcq %rax,%rbx 1714 movq 16(%rsi,%rbp,1),%rax 1715 movq %rbx,16(%rdi) 1716 adcq %rdx,%r8 1717 movq %r8,24(%rdi) 1718 sbbq %r15,%r15 1719 leaq 64(%rdi),%rdi 1720 addq $32,%rbp 1721 jnz .Lsqr4x_shift_n_add 1722 1723 leaq (%r14,%r10,2),%r12 1724.byte 0x67 1725 shrq $63,%r10 1726 leaq (%rcx,%r11,2),%r13 1727 shrq $63,%r11 1728 orq %r10,%r13 1729 movq -16(%rdi),%r10 1730 movq %r11,%r14 1731 mulq %rax 1732 negq %r15 1733 movq -8(%rdi),%r11 1734 adcq %rax,%r12 1735 movq -8(%rsi),%rax 1736 movq %r12,-32(%rdi) 1737 adcq %rdx,%r13 1738 1739 leaq (%r14,%r10,2),%rbx 1740 movq %r13,-24(%rdi) 1741 sbbq %r15,%r15 1742 shrq $63,%r10 1743 leaq (%rcx,%r11,2),%r8 1744 shrq $63,%r11 1745 orq %r10,%r8 1746 mulq %rax 1747 negq %r15 1748 adcq %rax,%rbx 1749 adcq %rdx,%r8 1750 movq %rbx,-16(%rdi) 1751 movq %r8,-8(%rdi) 1752.byte 102,72,15,126,213 1753__bn_sqr8x_reduction: 1754 xorq %rax,%rax 1755 leaq (%r9,%rbp,1),%rcx 1756 leaq 48+8(%rsp,%r9,2),%rdx 1757 movq %rcx,0+8(%rsp) 1758 leaq 48+8(%rsp,%r9,1),%rdi 1759 movq %rdx,8+8(%rsp) 1760 negq %r9 1761 jmp .L8x_reduction_loop 1762 1763.align 32 1764.L8x_reduction_loop: 1765 leaq (%rdi,%r9,1),%rdi 1766.byte 0x66 1767 movq 0(%rdi),%rbx 1768 movq 8(%rdi),%r9 1769 movq 16(%rdi),%r10 1770 movq 24(%rdi),%r11 1771 movq 32(%rdi),%r12 1772 movq 40(%rdi),%r13 1773 movq 48(%rdi),%r14 1774 movq 56(%rdi),%r15 1775 movq %rax,(%rdx) 1776 leaq 64(%rdi),%rdi 1777 1778.byte 0x67 1779 movq %rbx,%r8 1780 imulq 32+8(%rsp),%rbx 1781 movq 0(%rbp),%rax 1782 movl $8,%ecx 1783 jmp .L8x_reduce 1784 1785.align 32 1786.L8x_reduce: 1787 mulq %rbx 1788 movq 8(%rbp),%rax 1789 negq %r8 1790 movq %rdx,%r8 1791 adcq $0,%r8 1792 1793 mulq %rbx 1794 addq %rax,%r9 1795 movq 16(%rbp),%rax 1796 adcq $0,%rdx 1797 addq %r9,%r8 1798 movq %rbx,48-8+8(%rsp,%rcx,8) 1799 movq %rdx,%r9 1800 adcq $0,%r9 1801 1802 mulq %rbx 1803 addq %rax,%r10 1804 movq 24(%rbp),%rax 1805 adcq $0,%rdx 1806 addq %r10,%r9 1807 movq 32+8(%rsp),%rsi 1808 movq %rdx,%r10 1809 adcq $0,%r10 1810 1811 mulq %rbx 1812 addq %rax,%r11 1813 movq 32(%rbp),%rax 1814 adcq $0,%rdx 1815 imulq %r8,%rsi 1816 addq %r11,%r10 1817 movq %rdx,%r11 1818 adcq $0,%r11 1819 1820 mulq %rbx 1821 addq %rax,%r12 1822 movq 40(%rbp),%rax 1823 adcq $0,%rdx 1824 addq %r12,%r11 1825 movq %rdx,%r12 1826 adcq $0,%r12 1827 1828 mulq %rbx 1829 addq %rax,%r13 1830 movq 48(%rbp),%rax 1831 adcq $0,%rdx 1832 addq %r13,%r12 1833 movq %rdx,%r13 1834 adcq $0,%r13 1835 1836 mulq %rbx 1837 addq %rax,%r14 1838 movq 56(%rbp),%rax 1839 adcq $0,%rdx 1840 addq %r14,%r13 1841 movq %rdx,%r14 1842 adcq $0,%r14 1843 1844 mulq %rbx 1845 movq %rsi,%rbx 1846 addq %rax,%r15 1847 movq 0(%rbp),%rax 1848 adcq $0,%rdx 1849 addq %r15,%r14 1850 movq %rdx,%r15 1851 adcq $0,%r15 1852 1853 decl %ecx 1854 jnz .L8x_reduce 1855 1856 leaq 64(%rbp),%rbp 1857 xorq %rax,%rax 1858 movq 8+8(%rsp),%rdx 1859 cmpq 0+8(%rsp),%rbp 1860 jae .L8x_no_tail 1861 1862.byte 0x66 1863 addq 0(%rdi),%r8 1864 adcq 8(%rdi),%r9 1865 adcq 16(%rdi),%r10 1866 adcq 24(%rdi),%r11 1867 adcq 32(%rdi),%r12 1868 adcq 40(%rdi),%r13 1869 adcq 48(%rdi),%r14 1870 adcq 56(%rdi),%r15 1871 sbbq %rsi,%rsi 1872 1873 movq 48+56+8(%rsp),%rbx 1874 movl $8,%ecx 1875 movq 0(%rbp),%rax 1876 jmp .L8x_tail 1877 1878.align 32 1879.L8x_tail: 1880 mulq %rbx 1881 addq %rax,%r8 1882 movq 8(%rbp),%rax 1883 movq %r8,(%rdi) 1884 movq %rdx,%r8 1885 adcq $0,%r8 1886 1887 mulq %rbx 1888 addq %rax,%r9 1889 movq 16(%rbp),%rax 1890 adcq $0,%rdx 1891 addq %r9,%r8 1892 leaq 8(%rdi),%rdi 1893 movq %rdx,%r9 1894 adcq $0,%r9 1895 1896 mulq %rbx 1897 addq %rax,%r10 1898 movq 24(%rbp),%rax 1899 adcq $0,%rdx 1900 addq %r10,%r9 1901 movq %rdx,%r10 1902 adcq $0,%r10 1903 1904 mulq %rbx 1905 addq %rax,%r11 1906 movq 32(%rbp),%rax 1907 adcq $0,%rdx 1908 addq %r11,%r10 1909 movq %rdx,%r11 1910 adcq $0,%r11 1911 1912 mulq %rbx 1913 addq %rax,%r12 1914 movq 40(%rbp),%rax 1915 adcq $0,%rdx 1916 addq %r12,%r11 1917 movq %rdx,%r12 1918 adcq $0,%r12 1919 1920 mulq %rbx 1921 addq %rax,%r13 1922 movq 48(%rbp),%rax 1923 adcq $0,%rdx 1924 addq %r13,%r12 1925 movq %rdx,%r13 1926 adcq $0,%r13 1927 1928 mulq %rbx 1929 addq %rax,%r14 1930 movq 56(%rbp),%rax 1931 adcq $0,%rdx 1932 addq %r14,%r13 1933 movq %rdx,%r14 1934 adcq $0,%r14 1935 1936 mulq %rbx 1937 movq 48-16+8(%rsp,%rcx,8),%rbx 1938 addq %rax,%r15 1939 adcq $0,%rdx 1940 addq %r15,%r14 1941 movq 0(%rbp),%rax 1942 movq %rdx,%r15 1943 adcq $0,%r15 1944 1945 decl %ecx 1946 jnz .L8x_tail 1947 1948 leaq 64(%rbp),%rbp 1949 movq 8+8(%rsp),%rdx 1950 cmpq 0+8(%rsp),%rbp 1951 jae .L8x_tail_done 1952 1953 movq 48+56+8(%rsp),%rbx 1954 negq %rsi 1955 movq 0(%rbp),%rax 1956 adcq 0(%rdi),%r8 1957 adcq 8(%rdi),%r9 1958 adcq 16(%rdi),%r10 1959 adcq 24(%rdi),%r11 1960 adcq 32(%rdi),%r12 1961 adcq 40(%rdi),%r13 1962 adcq 48(%rdi),%r14 1963 adcq 56(%rdi),%r15 1964 sbbq %rsi,%rsi 1965 1966 movl $8,%ecx 1967 jmp .L8x_tail 1968 1969.align 32 1970.L8x_tail_done: 1971 xorq %rax,%rax 1972 addq (%rdx),%r8 1973 adcq $0,%r9 1974 adcq $0,%r10 1975 adcq $0,%r11 1976 adcq $0,%r12 1977 adcq $0,%r13 1978 adcq $0,%r14 1979 adcq $0,%r15 1980 adcq $0,%rax 1981 1982 negq %rsi 1983.L8x_no_tail: 1984 adcq 0(%rdi),%r8 1985 adcq 8(%rdi),%r9 1986 adcq 16(%rdi),%r10 1987 adcq 24(%rdi),%r11 1988 adcq 32(%rdi),%r12 1989 adcq 40(%rdi),%r13 1990 adcq 48(%rdi),%r14 1991 adcq 56(%rdi),%r15 1992 adcq $0,%rax 1993 movq -8(%rbp),%rcx 1994 xorq %rsi,%rsi 1995 1996.byte 102,72,15,126,213 1997 1998 movq %r8,0(%rdi) 1999 movq %r9,8(%rdi) 2000.byte 102,73,15,126,217 2001 movq %r10,16(%rdi) 2002 movq %r11,24(%rdi) 2003 movq %r12,32(%rdi) 2004 movq %r13,40(%rdi) 2005 movq %r14,48(%rdi) 2006 movq %r15,56(%rdi) 2007 leaq 64(%rdi),%rdi 2008 2009 cmpq %rdx,%rdi 2010 jb .L8x_reduction_loop 2011 .byte 0xf3,0xc3 2012.cfi_endproc 2013.size bn_sqr8x_internal,.-bn_sqr8x_internal 2014.type __bn_post4x_internal,@function 2015.align 32 2016__bn_post4x_internal: 2017.cfi_startproc 2018 movq 0(%rbp),%r12 2019 leaq (%rdi,%r9,1),%rbx 2020 movq %r9,%rcx 2021.byte 102,72,15,126,207 2022 negq %rax 2023.byte 102,72,15,126,206 2024 sarq $3+2,%rcx 2025 decq %r12 2026 xorq %r10,%r10 2027 movq 8(%rbp),%r13 2028 movq 16(%rbp),%r14 2029 movq 24(%rbp),%r15 2030 jmp .Lsqr4x_sub_entry 2031 2032.align 16 2033.Lsqr4x_sub: 2034 movq 0(%rbp),%r12 2035 movq 8(%rbp),%r13 2036 movq 16(%rbp),%r14 2037 movq 24(%rbp),%r15 2038.Lsqr4x_sub_entry: 2039 leaq 32(%rbp),%rbp 2040 notq %r12 2041 notq %r13 2042 notq %r14 2043 notq %r15 2044 andq %rax,%r12 2045 andq %rax,%r13 2046 andq %rax,%r14 2047 andq %rax,%r15 2048 2049 negq %r10 2050 adcq 0(%rbx),%r12 2051 adcq 8(%rbx),%r13 2052 adcq 16(%rbx),%r14 2053 adcq 24(%rbx),%r15 2054 movq %r12,0(%rdi) 2055 leaq 32(%rbx),%rbx 2056 movq %r13,8(%rdi) 2057 sbbq %r10,%r10 2058 movq %r14,16(%rdi) 2059 movq %r15,24(%rdi) 2060 leaq 32(%rdi),%rdi 2061 2062 incq %rcx 2063 jnz .Lsqr4x_sub 2064 2065 movq %r9,%r10 2066 negq %r9 2067 .byte 0xf3,0xc3 2068.cfi_endproc 2069.size __bn_post4x_internal,.-__bn_post4x_internal 2070.globl bn_from_montgomery 2071.hidden bn_from_montgomery 2072.type bn_from_montgomery,@function 2073.align 32 2074bn_from_montgomery: 2075.cfi_startproc 2076 testl $7,%r9d 2077 jz bn_from_mont8x 2078 xorl %eax,%eax 2079 .byte 0xf3,0xc3 2080.cfi_endproc 2081.size bn_from_montgomery,.-bn_from_montgomery 2082 2083.type bn_from_mont8x,@function 2084.align 32 2085bn_from_mont8x: 2086.cfi_startproc 2087.byte 0x67 2088 movq %rsp,%rax 2089.cfi_def_cfa_register %rax 2090 pushq %rbx 2091.cfi_offset %rbx,-16 2092 pushq %rbp 2093.cfi_offset %rbp,-24 2094 pushq %r12 2095.cfi_offset %r12,-32 2096 pushq %r13 2097.cfi_offset %r13,-40 2098 pushq %r14 2099.cfi_offset %r14,-48 2100 pushq %r15 2101.cfi_offset %r15,-56 2102.Lfrom_prologue: 2103 2104 shll $3,%r9d 2105 leaq (%r9,%r9,2),%r10 2106 negq %r9 2107 movq (%r8),%r8 2108 2109 2110 2111 2112 2113 2114 2115 2116 leaq -320(%rsp,%r9,2),%r11 2117 movq %rsp,%rbp 2118 subq %rdi,%r11 2119 andq $4095,%r11 2120 cmpq %r11,%r10 2121 jb .Lfrom_sp_alt 2122 subq %r11,%rbp 2123 leaq -320(%rbp,%r9,2),%rbp 2124 jmp .Lfrom_sp_done 2125 2126.align 32 2127.Lfrom_sp_alt: 2128 leaq 4096-320(,%r9,2),%r10 2129 leaq -320(%rbp,%r9,2),%rbp 2130 subq %r10,%r11 2131 movq $0,%r10 2132 cmovcq %r10,%r11 2133 subq %r11,%rbp 2134.Lfrom_sp_done: 2135 andq $-64,%rbp 2136 movq %rsp,%r11 2137 subq %rbp,%r11 2138 andq $-4096,%r11 2139 leaq (%r11,%rbp,1),%rsp 2140 movq (%rsp),%r10 2141 cmpq %rbp,%rsp 2142 ja .Lfrom_page_walk 2143 jmp .Lfrom_page_walk_done 2144 2145.Lfrom_page_walk: 2146 leaq -4096(%rsp),%rsp 2147 movq (%rsp),%r10 2148 cmpq %rbp,%rsp 2149 ja .Lfrom_page_walk 2150.Lfrom_page_walk_done: 2151 2152 movq %r9,%r10 2153 negq %r9 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 movq %r8,32(%rsp) 2165 movq %rax,40(%rsp) 2166.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2167.Lfrom_body: 2168 movq %r9,%r11 2169 leaq 48(%rsp),%rax 2170 pxor %xmm0,%xmm0 2171 jmp .Lmul_by_1 2172 2173.align 32 2174.Lmul_by_1: 2175 movdqu (%rsi),%xmm1 2176 movdqu 16(%rsi),%xmm2 2177 movdqu 32(%rsi),%xmm3 2178 movdqa %xmm0,(%rax,%r9,1) 2179 movdqu 48(%rsi),%xmm4 2180 movdqa %xmm0,16(%rax,%r9,1) 2181.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2182 movdqa %xmm1,(%rax) 2183 movdqa %xmm0,32(%rax,%r9,1) 2184 movdqa %xmm2,16(%rax) 2185 movdqa %xmm0,48(%rax,%r9,1) 2186 movdqa %xmm3,32(%rax) 2187 movdqa %xmm4,48(%rax) 2188 leaq 64(%rax),%rax 2189 subq $64,%r11 2190 jnz .Lmul_by_1 2191 2192.byte 102,72,15,110,207 2193.byte 102,72,15,110,209 2194.byte 0x67 2195 movq %rcx,%rbp 2196.byte 102,73,15,110,218 2197 leaq OPENSSL_ia32cap_P(%rip),%r11 2198 movl 8(%r11),%r11d 2199 andl $0x80108,%r11d 2200 cmpl $0x80108,%r11d 2201 jne .Lfrom_mont_nox 2202 2203 leaq (%rax,%r9,1),%rdi 2204 call __bn_sqrx8x_reduction 2205 call __bn_postx4x_internal 2206 2207 pxor %xmm0,%xmm0 2208 leaq 48(%rsp),%rax 2209 jmp .Lfrom_mont_zero 2210 2211.align 32 2212.Lfrom_mont_nox: 2213 call __bn_sqr8x_reduction 2214 call __bn_post4x_internal 2215 2216 pxor %xmm0,%xmm0 2217 leaq 48(%rsp),%rax 2218 jmp .Lfrom_mont_zero 2219 2220.align 32 2221.Lfrom_mont_zero: 2222 movq 40(%rsp),%rsi 2223.cfi_def_cfa %rsi,8 2224 movdqa %xmm0,0(%rax) 2225 movdqa %xmm0,16(%rax) 2226 movdqa %xmm0,32(%rax) 2227 movdqa %xmm0,48(%rax) 2228 leaq 64(%rax),%rax 2229 subq $32,%r9 2230 jnz .Lfrom_mont_zero 2231 2232 movq $1,%rax 2233 movq -48(%rsi),%r15 2234.cfi_restore %r15 2235 movq -40(%rsi),%r14 2236.cfi_restore %r14 2237 movq -32(%rsi),%r13 2238.cfi_restore %r13 2239 movq -24(%rsi),%r12 2240.cfi_restore %r12 2241 movq -16(%rsi),%rbp 2242.cfi_restore %rbp 2243 movq -8(%rsi),%rbx 2244.cfi_restore %rbx 2245 leaq (%rsi),%rsp 2246.cfi_def_cfa_register %rsp 2247.Lfrom_epilogue: 2248 .byte 0xf3,0xc3 2249.cfi_endproc 2250.size bn_from_mont8x,.-bn_from_mont8x 2251.type bn_mulx4x_mont_gather5,@function 2252.align 32 2253bn_mulx4x_mont_gather5: 2254.cfi_startproc 2255 movq %rsp,%rax 2256.cfi_def_cfa_register %rax 2257.Lmulx4x_enter: 2258 pushq %rbx 2259.cfi_offset %rbx,-16 2260 pushq %rbp 2261.cfi_offset %rbp,-24 2262 pushq %r12 2263.cfi_offset %r12,-32 2264 pushq %r13 2265.cfi_offset %r13,-40 2266 pushq %r14 2267.cfi_offset %r14,-48 2268 pushq %r15 2269.cfi_offset %r15,-56 2270.Lmulx4x_prologue: 2271 2272 shll $3,%r9d 2273 leaq (%r9,%r9,2),%r10 2274 negq %r9 2275 movq (%r8),%r8 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 leaq -320(%rsp,%r9,2),%r11 2287 movq %rsp,%rbp 2288 subq %rdi,%r11 2289 andq $4095,%r11 2290 cmpq %r11,%r10 2291 jb .Lmulx4xsp_alt 2292 subq %r11,%rbp 2293 leaq -320(%rbp,%r9,2),%rbp 2294 jmp .Lmulx4xsp_done 2295 2296.Lmulx4xsp_alt: 2297 leaq 4096-320(,%r9,2),%r10 2298 leaq -320(%rbp,%r9,2),%rbp 2299 subq %r10,%r11 2300 movq $0,%r10 2301 cmovcq %r10,%r11 2302 subq %r11,%rbp 2303.Lmulx4xsp_done: 2304 andq $-64,%rbp 2305 movq %rsp,%r11 2306 subq %rbp,%r11 2307 andq $-4096,%r11 2308 leaq (%r11,%rbp,1),%rsp 2309 movq (%rsp),%r10 2310 cmpq %rbp,%rsp 2311 ja .Lmulx4x_page_walk 2312 jmp .Lmulx4x_page_walk_done 2313 2314.Lmulx4x_page_walk: 2315 leaq -4096(%rsp),%rsp 2316 movq (%rsp),%r10 2317 cmpq %rbp,%rsp 2318 ja .Lmulx4x_page_walk 2319.Lmulx4x_page_walk_done: 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 movq %r8,32(%rsp) 2334 movq %rax,40(%rsp) 2335.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2336.Lmulx4x_body: 2337 call mulx4x_internal 2338 2339 movq 40(%rsp),%rsi 2340.cfi_def_cfa %rsi,8 2341 movq $1,%rax 2342 2343 movq -48(%rsi),%r15 2344.cfi_restore %r15 2345 movq -40(%rsi),%r14 2346.cfi_restore %r14 2347 movq -32(%rsi),%r13 2348.cfi_restore %r13 2349 movq -24(%rsi),%r12 2350.cfi_restore %r12 2351 movq -16(%rsi),%rbp 2352.cfi_restore %rbp 2353 movq -8(%rsi),%rbx 2354.cfi_restore %rbx 2355 leaq (%rsi),%rsp 2356.cfi_def_cfa_register %rsp 2357.Lmulx4x_epilogue: 2358 .byte 0xf3,0xc3 2359.cfi_endproc 2360.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2361 2362.type mulx4x_internal,@function 2363.align 32 2364mulx4x_internal: 2365.cfi_startproc 2366 movq %r9,8(%rsp) 2367 movq %r9,%r10 2368 negq %r9 2369 shlq $5,%r9 2370 negq %r10 2371 leaq 128(%rdx,%r9,1),%r13 2372 shrq $5+5,%r9 2373 movd 8(%rax),%xmm5 2374 subq $1,%r9 2375 leaq .Linc(%rip),%rax 2376 movq %r13,16+8(%rsp) 2377 movq %r9,24+8(%rsp) 2378 movq %rdi,56+8(%rsp) 2379 movdqa 0(%rax),%xmm0 2380 movdqa 16(%rax),%xmm1 2381 leaq 88-112(%rsp,%r10,1),%r10 2382 leaq 128(%rdx),%rdi 2383 2384 pshufd $0,%xmm5,%xmm5 2385 movdqa %xmm1,%xmm4 2386.byte 0x67 2387 movdqa %xmm1,%xmm2 2388.byte 0x67 2389 paddd %xmm0,%xmm1 2390 pcmpeqd %xmm5,%xmm0 2391 movdqa %xmm4,%xmm3 2392 paddd %xmm1,%xmm2 2393 pcmpeqd %xmm5,%xmm1 2394 movdqa %xmm0,112(%r10) 2395 movdqa %xmm4,%xmm0 2396 2397 paddd %xmm2,%xmm3 2398 pcmpeqd %xmm5,%xmm2 2399 movdqa %xmm1,128(%r10) 2400 movdqa %xmm4,%xmm1 2401 2402 paddd %xmm3,%xmm0 2403 pcmpeqd %xmm5,%xmm3 2404 movdqa %xmm2,144(%r10) 2405 movdqa %xmm4,%xmm2 2406 2407 paddd %xmm0,%xmm1 2408 pcmpeqd %xmm5,%xmm0 2409 movdqa %xmm3,160(%r10) 2410 movdqa %xmm4,%xmm3 2411 paddd %xmm1,%xmm2 2412 pcmpeqd %xmm5,%xmm1 2413 movdqa %xmm0,176(%r10) 2414 movdqa %xmm4,%xmm0 2415 2416 paddd %xmm2,%xmm3 2417 pcmpeqd %xmm5,%xmm2 2418 movdqa %xmm1,192(%r10) 2419 movdqa %xmm4,%xmm1 2420 2421 paddd %xmm3,%xmm0 2422 pcmpeqd %xmm5,%xmm3 2423 movdqa %xmm2,208(%r10) 2424 movdqa %xmm4,%xmm2 2425 2426 paddd %xmm0,%xmm1 2427 pcmpeqd %xmm5,%xmm0 2428 movdqa %xmm3,224(%r10) 2429 movdqa %xmm4,%xmm3 2430 paddd %xmm1,%xmm2 2431 pcmpeqd %xmm5,%xmm1 2432 movdqa %xmm0,240(%r10) 2433 movdqa %xmm4,%xmm0 2434 2435 paddd %xmm2,%xmm3 2436 pcmpeqd %xmm5,%xmm2 2437 movdqa %xmm1,256(%r10) 2438 movdqa %xmm4,%xmm1 2439 2440 paddd %xmm3,%xmm0 2441 pcmpeqd %xmm5,%xmm3 2442 movdqa %xmm2,272(%r10) 2443 movdqa %xmm4,%xmm2 2444 2445 paddd %xmm0,%xmm1 2446 pcmpeqd %xmm5,%xmm0 2447 movdqa %xmm3,288(%r10) 2448 movdqa %xmm4,%xmm3 2449.byte 0x67 2450 paddd %xmm1,%xmm2 2451 pcmpeqd %xmm5,%xmm1 2452 movdqa %xmm0,304(%r10) 2453 2454 paddd %xmm2,%xmm3 2455 pcmpeqd %xmm5,%xmm2 2456 movdqa %xmm1,320(%r10) 2457 2458 pcmpeqd %xmm5,%xmm3 2459 movdqa %xmm2,336(%r10) 2460 2461 pand 64(%rdi),%xmm0 2462 pand 80(%rdi),%xmm1 2463 pand 96(%rdi),%xmm2 2464 movdqa %xmm3,352(%r10) 2465 pand 112(%rdi),%xmm3 2466 por %xmm2,%xmm0 2467 por %xmm3,%xmm1 2468 movdqa -128(%rdi),%xmm4 2469 movdqa -112(%rdi),%xmm5 2470 movdqa -96(%rdi),%xmm2 2471 pand 112(%r10),%xmm4 2472 movdqa -80(%rdi),%xmm3 2473 pand 128(%r10),%xmm5 2474 por %xmm4,%xmm0 2475 pand 144(%r10),%xmm2 2476 por %xmm5,%xmm1 2477 pand 160(%r10),%xmm3 2478 por %xmm2,%xmm0 2479 por %xmm3,%xmm1 2480 movdqa -64(%rdi),%xmm4 2481 movdqa -48(%rdi),%xmm5 2482 movdqa -32(%rdi),%xmm2 2483 pand 176(%r10),%xmm4 2484 movdqa -16(%rdi),%xmm3 2485 pand 192(%r10),%xmm5 2486 por %xmm4,%xmm0 2487 pand 208(%r10),%xmm2 2488 por %xmm5,%xmm1 2489 pand 224(%r10),%xmm3 2490 por %xmm2,%xmm0 2491 por %xmm3,%xmm1 2492 movdqa 0(%rdi),%xmm4 2493 movdqa 16(%rdi),%xmm5 2494 movdqa 32(%rdi),%xmm2 2495 pand 240(%r10),%xmm4 2496 movdqa 48(%rdi),%xmm3 2497 pand 256(%r10),%xmm5 2498 por %xmm4,%xmm0 2499 pand 272(%r10),%xmm2 2500 por %xmm5,%xmm1 2501 pand 288(%r10),%xmm3 2502 por %xmm2,%xmm0 2503 por %xmm3,%xmm1 2504 pxor %xmm1,%xmm0 2505 pshufd $0x4e,%xmm0,%xmm1 2506 por %xmm1,%xmm0 2507 leaq 256(%rdi),%rdi 2508.byte 102,72,15,126,194 2509 leaq 64+32+8(%rsp),%rbx 2510 2511 movq %rdx,%r9 2512 mulxq 0(%rsi),%r8,%rax 2513 mulxq 8(%rsi),%r11,%r12 2514 addq %rax,%r11 2515 mulxq 16(%rsi),%rax,%r13 2516 adcq %rax,%r12 2517 adcq $0,%r13 2518 mulxq 24(%rsi),%rax,%r14 2519 2520 movq %r8,%r15 2521 imulq 32+8(%rsp),%r8 2522 xorq %rbp,%rbp 2523 movq %r8,%rdx 2524 2525 movq %rdi,8+8(%rsp) 2526 2527 leaq 32(%rsi),%rsi 2528 adcxq %rax,%r13 2529 adcxq %rbp,%r14 2530 2531 mulxq 0(%rcx),%rax,%r10 2532 adcxq %rax,%r15 2533 adoxq %r11,%r10 2534 mulxq 8(%rcx),%rax,%r11 2535 adcxq %rax,%r10 2536 adoxq %r12,%r11 2537 mulxq 16(%rcx),%rax,%r12 2538 movq 24+8(%rsp),%rdi 2539 movq %r10,-32(%rbx) 2540 adcxq %rax,%r11 2541 adoxq %r13,%r12 2542 mulxq 24(%rcx),%rax,%r15 2543 movq %r9,%rdx 2544 movq %r11,-24(%rbx) 2545 adcxq %rax,%r12 2546 adoxq %rbp,%r15 2547 leaq 32(%rcx),%rcx 2548 movq %r12,-16(%rbx) 2549 jmp .Lmulx4x_1st 2550 2551.align 32 2552.Lmulx4x_1st: 2553 adcxq %rbp,%r15 2554 mulxq 0(%rsi),%r10,%rax 2555 adcxq %r14,%r10 2556 mulxq 8(%rsi),%r11,%r14 2557 adcxq %rax,%r11 2558 mulxq 16(%rsi),%r12,%rax 2559 adcxq %r14,%r12 2560 mulxq 24(%rsi),%r13,%r14 2561.byte 0x67,0x67 2562 movq %r8,%rdx 2563 adcxq %rax,%r13 2564 adcxq %rbp,%r14 2565 leaq 32(%rsi),%rsi 2566 leaq 32(%rbx),%rbx 2567 2568 adoxq %r15,%r10 2569 mulxq 0(%rcx),%rax,%r15 2570 adcxq %rax,%r10 2571 adoxq %r15,%r11 2572 mulxq 8(%rcx),%rax,%r15 2573 adcxq %rax,%r11 2574 adoxq %r15,%r12 2575 mulxq 16(%rcx),%rax,%r15 2576 movq %r10,-40(%rbx) 2577 adcxq %rax,%r12 2578 movq %r11,-32(%rbx) 2579 adoxq %r15,%r13 2580 mulxq 24(%rcx),%rax,%r15 2581 movq %r9,%rdx 2582 movq %r12,-24(%rbx) 2583 adcxq %rax,%r13 2584 adoxq %rbp,%r15 2585 leaq 32(%rcx),%rcx 2586 movq %r13,-16(%rbx) 2587 2588 decq %rdi 2589 jnz .Lmulx4x_1st 2590 2591 movq 8(%rsp),%rax 2592 adcq %rbp,%r15 2593 leaq (%rsi,%rax,1),%rsi 2594 addq %r15,%r14 2595 movq 8+8(%rsp),%rdi 2596 adcq %rbp,%rbp 2597 movq %r14,-8(%rbx) 2598 jmp .Lmulx4x_outer 2599 2600.align 32 2601.Lmulx4x_outer: 2602 leaq 16-256(%rbx),%r10 2603 pxor %xmm4,%xmm4 2604.byte 0x67,0x67 2605 pxor %xmm5,%xmm5 2606 movdqa -128(%rdi),%xmm0 2607 movdqa -112(%rdi),%xmm1 2608 movdqa -96(%rdi),%xmm2 2609 pand 256(%r10),%xmm0 2610 movdqa -80(%rdi),%xmm3 2611 pand 272(%r10),%xmm1 2612 por %xmm0,%xmm4 2613 pand 288(%r10),%xmm2 2614 por %xmm1,%xmm5 2615 pand 304(%r10),%xmm3 2616 por %xmm2,%xmm4 2617 por %xmm3,%xmm5 2618 movdqa -64(%rdi),%xmm0 2619 movdqa -48(%rdi),%xmm1 2620 movdqa -32(%rdi),%xmm2 2621 pand 320(%r10),%xmm0 2622 movdqa -16(%rdi),%xmm3 2623 pand 336(%r10),%xmm1 2624 por %xmm0,%xmm4 2625 pand 352(%r10),%xmm2 2626 por %xmm1,%xmm5 2627 pand 368(%r10),%xmm3 2628 por %xmm2,%xmm4 2629 por %xmm3,%xmm5 2630 movdqa 0(%rdi),%xmm0 2631 movdqa 16(%rdi),%xmm1 2632 movdqa 32(%rdi),%xmm2 2633 pand 384(%r10),%xmm0 2634 movdqa 48(%rdi),%xmm3 2635 pand 400(%r10),%xmm1 2636 por %xmm0,%xmm4 2637 pand 416(%r10),%xmm2 2638 por %xmm1,%xmm5 2639 pand 432(%r10),%xmm3 2640 por %xmm2,%xmm4 2641 por %xmm3,%xmm5 2642 movdqa 64(%rdi),%xmm0 2643 movdqa 80(%rdi),%xmm1 2644 movdqa 96(%rdi),%xmm2 2645 pand 448(%r10),%xmm0 2646 movdqa 112(%rdi),%xmm3 2647 pand 464(%r10),%xmm1 2648 por %xmm0,%xmm4 2649 pand 480(%r10),%xmm2 2650 por %xmm1,%xmm5 2651 pand 496(%r10),%xmm3 2652 por %xmm2,%xmm4 2653 por %xmm3,%xmm5 2654 por %xmm5,%xmm4 2655 pshufd $0x4e,%xmm4,%xmm0 2656 por %xmm4,%xmm0 2657 leaq 256(%rdi),%rdi 2658.byte 102,72,15,126,194 2659 2660 movq %rbp,(%rbx) 2661 leaq 32(%rbx,%rax,1),%rbx 2662 mulxq 0(%rsi),%r8,%r11 2663 xorq %rbp,%rbp 2664 movq %rdx,%r9 2665 mulxq 8(%rsi),%r14,%r12 2666 adoxq -32(%rbx),%r8 2667 adcxq %r14,%r11 2668 mulxq 16(%rsi),%r15,%r13 2669 adoxq -24(%rbx),%r11 2670 adcxq %r15,%r12 2671 mulxq 24(%rsi),%rdx,%r14 2672 adoxq -16(%rbx),%r12 2673 adcxq %rdx,%r13 2674 leaq (%rcx,%rax,1),%rcx 2675 leaq 32(%rsi),%rsi 2676 adoxq -8(%rbx),%r13 2677 adcxq %rbp,%r14 2678 adoxq %rbp,%r14 2679 2680 movq %r8,%r15 2681 imulq 32+8(%rsp),%r8 2682 2683 movq %r8,%rdx 2684 xorq %rbp,%rbp 2685 movq %rdi,8+8(%rsp) 2686 2687 mulxq 0(%rcx),%rax,%r10 2688 adcxq %rax,%r15 2689 adoxq %r11,%r10 2690 mulxq 8(%rcx),%rax,%r11 2691 adcxq %rax,%r10 2692 adoxq %r12,%r11 2693 mulxq 16(%rcx),%rax,%r12 2694 adcxq %rax,%r11 2695 adoxq %r13,%r12 2696 mulxq 24(%rcx),%rax,%r15 2697 movq %r9,%rdx 2698 movq 24+8(%rsp),%rdi 2699 movq %r10,-32(%rbx) 2700 adcxq %rax,%r12 2701 movq %r11,-24(%rbx) 2702 adoxq %rbp,%r15 2703 movq %r12,-16(%rbx) 2704 leaq 32(%rcx),%rcx 2705 jmp .Lmulx4x_inner 2706 2707.align 32 2708.Lmulx4x_inner: 2709 mulxq 0(%rsi),%r10,%rax 2710 adcxq %rbp,%r15 2711 adoxq %r14,%r10 2712 mulxq 8(%rsi),%r11,%r14 2713 adcxq 0(%rbx),%r10 2714 adoxq %rax,%r11 2715 mulxq 16(%rsi),%r12,%rax 2716 adcxq 8(%rbx),%r11 2717 adoxq %r14,%r12 2718 mulxq 24(%rsi),%r13,%r14 2719 movq %r8,%rdx 2720 adcxq 16(%rbx),%r12 2721 adoxq %rax,%r13 2722 adcxq 24(%rbx),%r13 2723 adoxq %rbp,%r14 2724 leaq 32(%rsi),%rsi 2725 leaq 32(%rbx),%rbx 2726 adcxq %rbp,%r14 2727 2728 adoxq %r15,%r10 2729 mulxq 0(%rcx),%rax,%r15 2730 adcxq %rax,%r10 2731 adoxq %r15,%r11 2732 mulxq 8(%rcx),%rax,%r15 2733 adcxq %rax,%r11 2734 adoxq %r15,%r12 2735 mulxq 16(%rcx),%rax,%r15 2736 movq %r10,-40(%rbx) 2737 adcxq %rax,%r12 2738 adoxq %r15,%r13 2739 movq %r11,-32(%rbx) 2740 mulxq 24(%rcx),%rax,%r15 2741 movq %r9,%rdx 2742 leaq 32(%rcx),%rcx 2743 movq %r12,-24(%rbx) 2744 adcxq %rax,%r13 2745 adoxq %rbp,%r15 2746 movq %r13,-16(%rbx) 2747 2748 decq %rdi 2749 jnz .Lmulx4x_inner 2750 2751 movq 0+8(%rsp),%rax 2752 adcq %rbp,%r15 2753 subq 0(%rbx),%rdi 2754 movq 8+8(%rsp),%rdi 2755 movq 16+8(%rsp),%r10 2756 adcq %r15,%r14 2757 leaq (%rsi,%rax,1),%rsi 2758 adcq %rbp,%rbp 2759 movq %r14,-8(%rbx) 2760 2761 cmpq %r10,%rdi 2762 jb .Lmulx4x_outer 2763 2764 movq -8(%rcx),%r10 2765 movq %rbp,%r8 2766 movq (%rcx,%rax,1),%r12 2767 leaq (%rcx,%rax,1),%rbp 2768 movq %rax,%rcx 2769 leaq (%rbx,%rax,1),%rdi 2770 xorl %eax,%eax 2771 xorq %r15,%r15 2772 subq %r14,%r10 2773 adcq %r15,%r15 2774 orq %r15,%r8 2775 sarq $3+2,%rcx 2776 subq %r8,%rax 2777 movq 56+8(%rsp),%rdx 2778 decq %r12 2779 movq 8(%rbp),%r13 2780 xorq %r8,%r8 2781 movq 16(%rbp),%r14 2782 movq 24(%rbp),%r15 2783 jmp .Lsqrx4x_sub_entry 2784.cfi_endproc 2785.size mulx4x_internal,.-mulx4x_internal 2786.type bn_powerx5,@function 2787.align 32 2788bn_powerx5: 2789.cfi_startproc 2790 movq %rsp,%rax 2791.cfi_def_cfa_register %rax 2792.Lpowerx5_enter: 2793 pushq %rbx 2794.cfi_offset %rbx,-16 2795 pushq %rbp 2796.cfi_offset %rbp,-24 2797 pushq %r12 2798.cfi_offset %r12,-32 2799 pushq %r13 2800.cfi_offset %r13,-40 2801 pushq %r14 2802.cfi_offset %r14,-48 2803 pushq %r15 2804.cfi_offset %r15,-56 2805.Lpowerx5_prologue: 2806 2807 shll $3,%r9d 2808 leaq (%r9,%r9,2),%r10 2809 negq %r9 2810 movq (%r8),%r8 2811 2812 2813 2814 2815 2816 2817 2818 2819 leaq -320(%rsp,%r9,2),%r11 2820 movq %rsp,%rbp 2821 subq %rdi,%r11 2822 andq $4095,%r11 2823 cmpq %r11,%r10 2824 jb .Lpwrx_sp_alt 2825 subq %r11,%rbp 2826 leaq -320(%rbp,%r9,2),%rbp 2827 jmp .Lpwrx_sp_done 2828 2829.align 32 2830.Lpwrx_sp_alt: 2831 leaq 4096-320(,%r9,2),%r10 2832 leaq -320(%rbp,%r9,2),%rbp 2833 subq %r10,%r11 2834 movq $0,%r10 2835 cmovcq %r10,%r11 2836 subq %r11,%rbp 2837.Lpwrx_sp_done: 2838 andq $-64,%rbp 2839 movq %rsp,%r11 2840 subq %rbp,%r11 2841 andq $-4096,%r11 2842 leaq (%r11,%rbp,1),%rsp 2843 movq (%rsp),%r10 2844 cmpq %rbp,%rsp 2845 ja .Lpwrx_page_walk 2846 jmp .Lpwrx_page_walk_done 2847 2848.Lpwrx_page_walk: 2849 leaq -4096(%rsp),%rsp 2850 movq (%rsp),%r10 2851 cmpq %rbp,%rsp 2852 ja .Lpwrx_page_walk 2853.Lpwrx_page_walk_done: 2854 2855 movq %r9,%r10 2856 negq %r9 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 pxor %xmm0,%xmm0 2870.byte 102,72,15,110,207 2871.byte 102,72,15,110,209 2872.byte 102,73,15,110,218 2873.byte 102,72,15,110,226 2874 movq %r8,32(%rsp) 2875 movq %rax,40(%rsp) 2876.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2877.Lpowerx5_body: 2878 2879 call __bn_sqrx8x_internal 2880 call __bn_postx4x_internal 2881 call __bn_sqrx8x_internal 2882 call __bn_postx4x_internal 2883 call __bn_sqrx8x_internal 2884 call __bn_postx4x_internal 2885 call __bn_sqrx8x_internal 2886 call __bn_postx4x_internal 2887 call __bn_sqrx8x_internal 2888 call __bn_postx4x_internal 2889 2890 movq %r10,%r9 2891 movq %rsi,%rdi 2892.byte 102,72,15,126,209 2893.byte 102,72,15,126,226 2894 movq 40(%rsp),%rax 2895 2896 call mulx4x_internal 2897 2898 movq 40(%rsp),%rsi 2899.cfi_def_cfa %rsi,8 2900 movq $1,%rax 2901 2902 movq -48(%rsi),%r15 2903.cfi_restore %r15 2904 movq -40(%rsi),%r14 2905.cfi_restore %r14 2906 movq -32(%rsi),%r13 2907.cfi_restore %r13 2908 movq -24(%rsi),%r12 2909.cfi_restore %r12 2910 movq -16(%rsi),%rbp 2911.cfi_restore %rbp 2912 movq -8(%rsi),%rbx 2913.cfi_restore %rbx 2914 leaq (%rsi),%rsp 2915.cfi_def_cfa_register %rsp 2916.Lpowerx5_epilogue: 2917 .byte 0xf3,0xc3 2918.cfi_endproc 2919.size bn_powerx5,.-bn_powerx5 2920 2921.globl bn_sqrx8x_internal 2922.hidden bn_sqrx8x_internal 2923.hidden bn_sqrx8x_internal 2924.type bn_sqrx8x_internal,@function 2925.align 32 2926bn_sqrx8x_internal: 2927__bn_sqrx8x_internal: 2928.cfi_startproc 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 leaq 48+8(%rsp),%rdi 2970 leaq (%rsi,%r9,1),%rbp 2971 movq %r9,0+8(%rsp) 2972 movq %rbp,8+8(%rsp) 2973 jmp .Lsqr8x_zero_start 2974 2975.align 32 2976.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2977.Lsqrx8x_zero: 2978.byte 0x3e 2979 movdqa %xmm0,0(%rdi) 2980 movdqa %xmm0,16(%rdi) 2981 movdqa %xmm0,32(%rdi) 2982 movdqa %xmm0,48(%rdi) 2983.Lsqr8x_zero_start: 2984 movdqa %xmm0,64(%rdi) 2985 movdqa %xmm0,80(%rdi) 2986 movdqa %xmm0,96(%rdi) 2987 movdqa %xmm0,112(%rdi) 2988 leaq 128(%rdi),%rdi 2989 subq $64,%r9 2990 jnz .Lsqrx8x_zero 2991 2992 movq 0(%rsi),%rdx 2993 2994 xorq %r10,%r10 2995 xorq %r11,%r11 2996 xorq %r12,%r12 2997 xorq %r13,%r13 2998 xorq %r14,%r14 2999 xorq %r15,%r15 3000 leaq 48+8(%rsp),%rdi 3001 xorq %rbp,%rbp 3002 jmp .Lsqrx8x_outer_loop 3003 3004.align 32 3005.Lsqrx8x_outer_loop: 3006 mulxq 8(%rsi),%r8,%rax 3007 adcxq %r9,%r8 3008 adoxq %rax,%r10 3009 mulxq 16(%rsi),%r9,%rax 3010 adcxq %r10,%r9 3011 adoxq %rax,%r11 3012.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 3013 adcxq %r11,%r10 3014 adoxq %rax,%r12 3015.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 3016 adcxq %r12,%r11 3017 adoxq %rax,%r13 3018 mulxq 40(%rsi),%r12,%rax 3019 adcxq %r13,%r12 3020 adoxq %rax,%r14 3021 mulxq 48(%rsi),%r13,%rax 3022 adcxq %r14,%r13 3023 adoxq %r15,%rax 3024 mulxq 56(%rsi),%r14,%r15 3025 movq 8(%rsi),%rdx 3026 adcxq %rax,%r14 3027 adoxq %rbp,%r15 3028 adcq 64(%rdi),%r15 3029 movq %r8,8(%rdi) 3030 movq %r9,16(%rdi) 3031 sbbq %rcx,%rcx 3032 xorq %rbp,%rbp 3033 3034 3035 mulxq 16(%rsi),%r8,%rbx 3036 mulxq 24(%rsi),%r9,%rax 3037 adcxq %r10,%r8 3038 adoxq %rbx,%r9 3039 mulxq 32(%rsi),%r10,%rbx 3040 adcxq %r11,%r9 3041 adoxq %rax,%r10 3042.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 3043 adcxq %r12,%r10 3044 adoxq %rbx,%r11 3045.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 3046 adcxq %r13,%r11 3047 adoxq %r14,%r12 3048.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 3049 movq 16(%rsi),%rdx 3050 adcxq %rax,%r12 3051 adoxq %rbx,%r13 3052 adcxq %r15,%r13 3053 adoxq %rbp,%r14 3054 adcxq %rbp,%r14 3055 3056 movq %r8,24(%rdi) 3057 movq %r9,32(%rdi) 3058 3059 mulxq 24(%rsi),%r8,%rbx 3060 mulxq 32(%rsi),%r9,%rax 3061 adcxq %r10,%r8 3062 adoxq %rbx,%r9 3063 mulxq 40(%rsi),%r10,%rbx 3064 adcxq %r11,%r9 3065 adoxq %rax,%r10 3066.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 3067 adcxq %r12,%r10 3068 adoxq %r13,%r11 3069.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 3070.byte 0x3e 3071 movq 24(%rsi),%rdx 3072 adcxq %rbx,%r11 3073 adoxq %rax,%r12 3074 adcxq %r14,%r12 3075 movq %r8,40(%rdi) 3076 movq %r9,48(%rdi) 3077 mulxq 32(%rsi),%r8,%rax 3078 adoxq %rbp,%r13 3079 adcxq %rbp,%r13 3080 3081 mulxq 40(%rsi),%r9,%rbx 3082 adcxq %r10,%r8 3083 adoxq %rax,%r9 3084 mulxq 48(%rsi),%r10,%rax 3085 adcxq %r11,%r9 3086 adoxq %r12,%r10 3087 mulxq 56(%rsi),%r11,%r12 3088 movq 32(%rsi),%rdx 3089 movq 40(%rsi),%r14 3090 adcxq %rbx,%r10 3091 adoxq %rax,%r11 3092 movq 48(%rsi),%r15 3093 adcxq %r13,%r11 3094 adoxq %rbp,%r12 3095 adcxq %rbp,%r12 3096 3097 movq %r8,56(%rdi) 3098 movq %r9,64(%rdi) 3099 3100 mulxq %r14,%r9,%rax 3101 movq 56(%rsi),%r8 3102 adcxq %r10,%r9 3103 mulxq %r15,%r10,%rbx 3104 adoxq %rax,%r10 3105 adcxq %r11,%r10 3106 mulxq %r8,%r11,%rax 3107 movq %r14,%rdx 3108 adoxq %rbx,%r11 3109 adcxq %r12,%r11 3110 3111 adcxq %rbp,%rax 3112 3113 mulxq %r15,%r14,%rbx 3114 mulxq %r8,%r12,%r13 3115 movq %r15,%rdx 3116 leaq 64(%rsi),%rsi 3117 adcxq %r14,%r11 3118 adoxq %rbx,%r12 3119 adcxq %rax,%r12 3120 adoxq %rbp,%r13 3121 3122.byte 0x67,0x67 3123 mulxq %r8,%r8,%r14 3124 adcxq %r8,%r13 3125 adcxq %rbp,%r14 3126 3127 cmpq 8+8(%rsp),%rsi 3128 je .Lsqrx8x_outer_break 3129 3130 negq %rcx 3131 movq $-8,%rcx 3132 movq %rbp,%r15 3133 movq 64(%rdi),%r8 3134 adcxq 72(%rdi),%r9 3135 adcxq 80(%rdi),%r10 3136 adcxq 88(%rdi),%r11 3137 adcq 96(%rdi),%r12 3138 adcq 104(%rdi),%r13 3139 adcq 112(%rdi),%r14 3140 adcq 120(%rdi),%r15 3141 leaq (%rsi),%rbp 3142 leaq 128(%rdi),%rdi 3143 sbbq %rax,%rax 3144 3145 movq -64(%rsi),%rdx 3146 movq %rax,16+8(%rsp) 3147 movq %rdi,24+8(%rsp) 3148 3149 3150 xorl %eax,%eax 3151 jmp .Lsqrx8x_loop 3152 3153.align 32 3154.Lsqrx8x_loop: 3155 movq %r8,%rbx 3156 mulxq 0(%rbp),%rax,%r8 3157 adcxq %rax,%rbx 3158 adoxq %r9,%r8 3159 3160 mulxq 8(%rbp),%rax,%r9 3161 adcxq %rax,%r8 3162 adoxq %r10,%r9 3163 3164 mulxq 16(%rbp),%rax,%r10 3165 adcxq %rax,%r9 3166 adoxq %r11,%r10 3167 3168 mulxq 24(%rbp),%rax,%r11 3169 adcxq %rax,%r10 3170 adoxq %r12,%r11 3171 3172.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3173 adcxq %rax,%r11 3174 adoxq %r13,%r12 3175 3176 mulxq 40(%rbp),%rax,%r13 3177 adcxq %rax,%r12 3178 adoxq %r14,%r13 3179 3180 mulxq 48(%rbp),%rax,%r14 3181 movq %rbx,(%rdi,%rcx,8) 3182 movl $0,%ebx 3183 adcxq %rax,%r13 3184 adoxq %r15,%r14 3185 3186.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3187 movq 8(%rsi,%rcx,8),%rdx 3188 adcxq %rax,%r14 3189 adoxq %rbx,%r15 3190 adcxq %rbx,%r15 3191 3192.byte 0x67 3193 incq %rcx 3194 jnz .Lsqrx8x_loop 3195 3196 leaq 64(%rbp),%rbp 3197 movq $-8,%rcx 3198 cmpq 8+8(%rsp),%rbp 3199 je .Lsqrx8x_break 3200 3201 subq 16+8(%rsp),%rbx 3202.byte 0x66 3203 movq -64(%rsi),%rdx 3204 adcxq 0(%rdi),%r8 3205 adcxq 8(%rdi),%r9 3206 adcq 16(%rdi),%r10 3207 adcq 24(%rdi),%r11 3208 adcq 32(%rdi),%r12 3209 adcq 40(%rdi),%r13 3210 adcq 48(%rdi),%r14 3211 adcq 56(%rdi),%r15 3212 leaq 64(%rdi),%rdi 3213.byte 0x67 3214 sbbq %rax,%rax 3215 xorl %ebx,%ebx 3216 movq %rax,16+8(%rsp) 3217 jmp .Lsqrx8x_loop 3218 3219.align 32 3220.Lsqrx8x_break: 3221 xorq %rbp,%rbp 3222 subq 16+8(%rsp),%rbx 3223 adcxq %rbp,%r8 3224 movq 24+8(%rsp),%rcx 3225 adcxq %rbp,%r9 3226 movq 0(%rsi),%rdx 3227 adcq $0,%r10 3228 movq %r8,0(%rdi) 3229 adcq $0,%r11 3230 adcq $0,%r12 3231 adcq $0,%r13 3232 adcq $0,%r14 3233 adcq $0,%r15 3234 cmpq %rcx,%rdi 3235 je .Lsqrx8x_outer_loop 3236 3237 movq %r9,8(%rdi) 3238 movq 8(%rcx),%r9 3239 movq %r10,16(%rdi) 3240 movq 16(%rcx),%r10 3241 movq %r11,24(%rdi) 3242 movq 24(%rcx),%r11 3243 movq %r12,32(%rdi) 3244 movq 32(%rcx),%r12 3245 movq %r13,40(%rdi) 3246 movq 40(%rcx),%r13 3247 movq %r14,48(%rdi) 3248 movq 48(%rcx),%r14 3249 movq %r15,56(%rdi) 3250 movq 56(%rcx),%r15 3251 movq %rcx,%rdi 3252 jmp .Lsqrx8x_outer_loop 3253 3254.align 32 3255.Lsqrx8x_outer_break: 3256 movq %r9,72(%rdi) 3257.byte 102,72,15,126,217 3258 movq %r10,80(%rdi) 3259 movq %r11,88(%rdi) 3260 movq %r12,96(%rdi) 3261 movq %r13,104(%rdi) 3262 movq %r14,112(%rdi) 3263 leaq 48+8(%rsp),%rdi 3264 movq (%rsi,%rcx,1),%rdx 3265 3266 movq 8(%rdi),%r11 3267 xorq %r10,%r10 3268 movq 0+8(%rsp),%r9 3269 adoxq %r11,%r11 3270 movq 16(%rdi),%r12 3271 movq 24(%rdi),%r13 3272 3273 3274.align 32 3275.Lsqrx4x_shift_n_add: 3276 mulxq %rdx,%rax,%rbx 3277 adoxq %r12,%r12 3278 adcxq %r10,%rax 3279.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3280.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3281 adoxq %r13,%r13 3282 adcxq %r11,%rbx 3283 movq 40(%rdi),%r11 3284 movq %rax,0(%rdi) 3285 movq %rbx,8(%rdi) 3286 3287 mulxq %rdx,%rax,%rbx 3288 adoxq %r10,%r10 3289 adcxq %r12,%rax 3290 movq 16(%rsi,%rcx,1),%rdx 3291 movq 48(%rdi),%r12 3292 adoxq %r11,%r11 3293 adcxq %r13,%rbx 3294 movq 56(%rdi),%r13 3295 movq %rax,16(%rdi) 3296 movq %rbx,24(%rdi) 3297 3298 mulxq %rdx,%rax,%rbx 3299 adoxq %r12,%r12 3300 adcxq %r10,%rax 3301 movq 24(%rsi,%rcx,1),%rdx 3302 leaq 32(%rcx),%rcx 3303 movq 64(%rdi),%r10 3304 adoxq %r13,%r13 3305 adcxq %r11,%rbx 3306 movq 72(%rdi),%r11 3307 movq %rax,32(%rdi) 3308 movq %rbx,40(%rdi) 3309 3310 mulxq %rdx,%rax,%rbx 3311 adoxq %r10,%r10 3312 adcxq %r12,%rax 3313 jrcxz .Lsqrx4x_shift_n_add_break 3314.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3315 adoxq %r11,%r11 3316 adcxq %r13,%rbx 3317 movq 80(%rdi),%r12 3318 movq 88(%rdi),%r13 3319 movq %rax,48(%rdi) 3320 movq %rbx,56(%rdi) 3321 leaq 64(%rdi),%rdi 3322 nop 3323 jmp .Lsqrx4x_shift_n_add 3324 3325.align 32 3326.Lsqrx4x_shift_n_add_break: 3327 adcxq %r13,%rbx 3328 movq %rax,48(%rdi) 3329 movq %rbx,56(%rdi) 3330 leaq 64(%rdi),%rdi 3331.byte 102,72,15,126,213 3332__bn_sqrx8x_reduction: 3333 xorl %eax,%eax 3334 movq 32+8(%rsp),%rbx 3335 movq 48+8(%rsp),%rdx 3336 leaq -64(%rbp,%r9,1),%rcx 3337 3338 movq %rcx,0+8(%rsp) 3339 movq %rdi,8+8(%rsp) 3340 3341 leaq 48+8(%rsp),%rdi 3342 jmp .Lsqrx8x_reduction_loop 3343 3344.align 32 3345.Lsqrx8x_reduction_loop: 3346 movq 8(%rdi),%r9 3347 movq 16(%rdi),%r10 3348 movq 24(%rdi),%r11 3349 movq 32(%rdi),%r12 3350 movq %rdx,%r8 3351 imulq %rbx,%rdx 3352 movq 40(%rdi),%r13 3353 movq 48(%rdi),%r14 3354 movq 56(%rdi),%r15 3355 movq %rax,24+8(%rsp) 3356 3357 leaq 64(%rdi),%rdi 3358 xorq %rsi,%rsi 3359 movq $-8,%rcx 3360 jmp .Lsqrx8x_reduce 3361 3362.align 32 3363.Lsqrx8x_reduce: 3364 movq %r8,%rbx 3365 mulxq 0(%rbp),%rax,%r8 3366 adcxq %rbx,%rax 3367 adoxq %r9,%r8 3368 3369 mulxq 8(%rbp),%rbx,%r9 3370 adcxq %rbx,%r8 3371 adoxq %r10,%r9 3372 3373 mulxq 16(%rbp),%rbx,%r10 3374 adcxq %rbx,%r9 3375 adoxq %r11,%r10 3376 3377 mulxq 24(%rbp),%rbx,%r11 3378 adcxq %rbx,%r10 3379 adoxq %r12,%r11 3380 3381.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3382 movq %rdx,%rax 3383 movq %r8,%rdx 3384 adcxq %rbx,%r11 3385 adoxq %r13,%r12 3386 3387 mulxq 32+8(%rsp),%rbx,%rdx 3388 movq %rax,%rdx 3389 movq %rax,64+48+8(%rsp,%rcx,8) 3390 3391 mulxq 40(%rbp),%rax,%r13 3392 adcxq %rax,%r12 3393 adoxq %r14,%r13 3394 3395 mulxq 48(%rbp),%rax,%r14 3396 adcxq %rax,%r13 3397 adoxq %r15,%r14 3398 3399 mulxq 56(%rbp),%rax,%r15 3400 movq %rbx,%rdx 3401 adcxq %rax,%r14 3402 adoxq %rsi,%r15 3403 adcxq %rsi,%r15 3404 3405.byte 0x67,0x67,0x67 3406 incq %rcx 3407 jnz .Lsqrx8x_reduce 3408 3409 movq %rsi,%rax 3410 cmpq 0+8(%rsp),%rbp 3411 jae .Lsqrx8x_no_tail 3412 3413 movq 48+8(%rsp),%rdx 3414 addq 0(%rdi),%r8 3415 leaq 64(%rbp),%rbp 3416 movq $-8,%rcx 3417 adcxq 8(%rdi),%r9 3418 adcxq 16(%rdi),%r10 3419 adcq 24(%rdi),%r11 3420 adcq 32(%rdi),%r12 3421 adcq 40(%rdi),%r13 3422 adcq 48(%rdi),%r14 3423 adcq 56(%rdi),%r15 3424 leaq 64(%rdi),%rdi 3425 sbbq %rax,%rax 3426 3427 xorq %rsi,%rsi 3428 movq %rax,16+8(%rsp) 3429 jmp .Lsqrx8x_tail 3430 3431.align 32 3432.Lsqrx8x_tail: 3433 movq %r8,%rbx 3434 mulxq 0(%rbp),%rax,%r8 3435 adcxq %rax,%rbx 3436 adoxq %r9,%r8 3437 3438 mulxq 8(%rbp),%rax,%r9 3439 adcxq %rax,%r8 3440 adoxq %r10,%r9 3441 3442 mulxq 16(%rbp),%rax,%r10 3443 adcxq %rax,%r9 3444 adoxq %r11,%r10 3445 3446 mulxq 24(%rbp),%rax,%r11 3447 adcxq %rax,%r10 3448 adoxq %r12,%r11 3449 3450.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3451 adcxq %rax,%r11 3452 adoxq %r13,%r12 3453 3454 mulxq 40(%rbp),%rax,%r13 3455 adcxq %rax,%r12 3456 adoxq %r14,%r13 3457 3458 mulxq 48(%rbp),%rax,%r14 3459 adcxq %rax,%r13 3460 adoxq %r15,%r14 3461 3462 mulxq 56(%rbp),%rax,%r15 3463 movq 72+48+8(%rsp,%rcx,8),%rdx 3464 adcxq %rax,%r14 3465 adoxq %rsi,%r15 3466 movq %rbx,(%rdi,%rcx,8) 3467 movq %r8,%rbx 3468 adcxq %rsi,%r15 3469 3470 incq %rcx 3471 jnz .Lsqrx8x_tail 3472 3473 cmpq 0+8(%rsp),%rbp 3474 jae .Lsqrx8x_tail_done 3475 3476 subq 16+8(%rsp),%rsi 3477 movq 48+8(%rsp),%rdx 3478 leaq 64(%rbp),%rbp 3479 adcq 0(%rdi),%r8 3480 adcq 8(%rdi),%r9 3481 adcq 16(%rdi),%r10 3482 adcq 24(%rdi),%r11 3483 adcq 32(%rdi),%r12 3484 adcq 40(%rdi),%r13 3485 adcq 48(%rdi),%r14 3486 adcq 56(%rdi),%r15 3487 leaq 64(%rdi),%rdi 3488 sbbq %rax,%rax 3489 subq $8,%rcx 3490 3491 xorq %rsi,%rsi 3492 movq %rax,16+8(%rsp) 3493 jmp .Lsqrx8x_tail 3494 3495.align 32 3496.Lsqrx8x_tail_done: 3497 xorq %rax,%rax 3498 addq 24+8(%rsp),%r8 3499 adcq $0,%r9 3500 adcq $0,%r10 3501 adcq $0,%r11 3502 adcq $0,%r12 3503 adcq $0,%r13 3504 adcq $0,%r14 3505 adcq $0,%r15 3506 adcq $0,%rax 3507 3508 subq 16+8(%rsp),%rsi 3509.Lsqrx8x_no_tail: 3510 adcq 0(%rdi),%r8 3511.byte 102,72,15,126,217 3512 adcq 8(%rdi),%r9 3513 movq 56(%rbp),%rsi 3514.byte 102,72,15,126,213 3515 adcq 16(%rdi),%r10 3516 adcq 24(%rdi),%r11 3517 adcq 32(%rdi),%r12 3518 adcq 40(%rdi),%r13 3519 adcq 48(%rdi),%r14 3520 adcq 56(%rdi),%r15 3521 adcq $0,%rax 3522 3523 movq 32+8(%rsp),%rbx 3524 movq 64(%rdi,%rcx,1),%rdx 3525 3526 movq %r8,0(%rdi) 3527 leaq 64(%rdi),%r8 3528 movq %r9,8(%rdi) 3529 movq %r10,16(%rdi) 3530 movq %r11,24(%rdi) 3531 movq %r12,32(%rdi) 3532 movq %r13,40(%rdi) 3533 movq %r14,48(%rdi) 3534 movq %r15,56(%rdi) 3535 3536 leaq 64(%rdi,%rcx,1),%rdi 3537 cmpq 8+8(%rsp),%r8 3538 jb .Lsqrx8x_reduction_loop 3539 .byte 0xf3,0xc3 3540.cfi_endproc 3541.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3542.align 32 3543.type __bn_postx4x_internal,@function 3544__bn_postx4x_internal: 3545.cfi_startproc 3546 movq 0(%rbp),%r12 3547 movq %rcx,%r10 3548 movq %rcx,%r9 3549 negq %rax 3550 sarq $3+2,%rcx 3551 3552.byte 102,72,15,126,202 3553.byte 102,72,15,126,206 3554 decq %r12 3555 movq 8(%rbp),%r13 3556 xorq %r8,%r8 3557 movq 16(%rbp),%r14 3558 movq 24(%rbp),%r15 3559 jmp .Lsqrx4x_sub_entry 3560 3561.align 16 3562.Lsqrx4x_sub: 3563 movq 0(%rbp),%r12 3564 movq 8(%rbp),%r13 3565 movq 16(%rbp),%r14 3566 movq 24(%rbp),%r15 3567.Lsqrx4x_sub_entry: 3568 andnq %rax,%r12,%r12 3569 leaq 32(%rbp),%rbp 3570 andnq %rax,%r13,%r13 3571 andnq %rax,%r14,%r14 3572 andnq %rax,%r15,%r15 3573 3574 negq %r8 3575 adcq 0(%rdi),%r12 3576 adcq 8(%rdi),%r13 3577 adcq 16(%rdi),%r14 3578 adcq 24(%rdi),%r15 3579 movq %r12,0(%rdx) 3580 leaq 32(%rdi),%rdi 3581 movq %r13,8(%rdx) 3582 sbbq %r8,%r8 3583 movq %r14,16(%rdx) 3584 movq %r15,24(%rdx) 3585 leaq 32(%rdx),%rdx 3586 3587 incq %rcx 3588 jnz .Lsqrx4x_sub 3589 3590 negq %r9 3591 3592 .byte 0xf3,0xc3 3593.cfi_endproc 3594.size __bn_postx4x_internal,.-__bn_postx4x_internal 3595.globl bn_scatter5 3596.hidden bn_scatter5 3597.type bn_scatter5,@function 3598.align 16 3599bn_scatter5: 3600.cfi_startproc 3601 cmpl $0,%esi 3602 jz .Lscatter_epilogue 3603 leaq (%rdx,%rcx,8),%rdx 3604.Lscatter: 3605 movq (%rdi),%rax 3606 leaq 8(%rdi),%rdi 3607 movq %rax,(%rdx) 3608 leaq 256(%rdx),%rdx 3609 subl $1,%esi 3610 jnz .Lscatter 3611.Lscatter_epilogue: 3612 .byte 0xf3,0xc3 3613.cfi_endproc 3614.size bn_scatter5,.-bn_scatter5 3615 3616.globl bn_gather5 3617.hidden bn_gather5 3618.type bn_gather5,@function 3619.align 32 3620bn_gather5: 3621.cfi_startproc 3622.LSEH_begin_bn_gather5: 3623 3624.byte 0x4c,0x8d,0x14,0x24 3625.cfi_def_cfa_register %r10 3626.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3627 leaq .Linc(%rip),%rax 3628 andq $-16,%rsp 3629 3630 movd %ecx,%xmm5 3631 movdqa 0(%rax),%xmm0 3632 movdqa 16(%rax),%xmm1 3633 leaq 128(%rdx),%r11 3634 leaq 128(%rsp),%rax 3635 3636 pshufd $0,%xmm5,%xmm5 3637 movdqa %xmm1,%xmm4 3638 movdqa %xmm1,%xmm2 3639 paddd %xmm0,%xmm1 3640 pcmpeqd %xmm5,%xmm0 3641 movdqa %xmm4,%xmm3 3642 3643 paddd %xmm1,%xmm2 3644 pcmpeqd %xmm5,%xmm1 3645 movdqa %xmm0,-128(%rax) 3646 movdqa %xmm4,%xmm0 3647 3648 paddd %xmm2,%xmm3 3649 pcmpeqd %xmm5,%xmm2 3650 movdqa %xmm1,-112(%rax) 3651 movdqa %xmm4,%xmm1 3652 3653 paddd %xmm3,%xmm0 3654 pcmpeqd %xmm5,%xmm3 3655 movdqa %xmm2,-96(%rax) 3656 movdqa %xmm4,%xmm2 3657 paddd %xmm0,%xmm1 3658 pcmpeqd %xmm5,%xmm0 3659 movdqa %xmm3,-80(%rax) 3660 movdqa %xmm4,%xmm3 3661 3662 paddd %xmm1,%xmm2 3663 pcmpeqd %xmm5,%xmm1 3664 movdqa %xmm0,-64(%rax) 3665 movdqa %xmm4,%xmm0 3666 3667 paddd %xmm2,%xmm3 3668 pcmpeqd %xmm5,%xmm2 3669 movdqa %xmm1,-48(%rax) 3670 movdqa %xmm4,%xmm1 3671 3672 paddd %xmm3,%xmm0 3673 pcmpeqd %xmm5,%xmm3 3674 movdqa %xmm2,-32(%rax) 3675 movdqa %xmm4,%xmm2 3676 paddd %xmm0,%xmm1 3677 pcmpeqd %xmm5,%xmm0 3678 movdqa %xmm3,-16(%rax) 3679 movdqa %xmm4,%xmm3 3680 3681 paddd %xmm1,%xmm2 3682 pcmpeqd %xmm5,%xmm1 3683 movdqa %xmm0,0(%rax) 3684 movdqa %xmm4,%xmm0 3685 3686 paddd %xmm2,%xmm3 3687 pcmpeqd %xmm5,%xmm2 3688 movdqa %xmm1,16(%rax) 3689 movdqa %xmm4,%xmm1 3690 3691 paddd %xmm3,%xmm0 3692 pcmpeqd %xmm5,%xmm3 3693 movdqa %xmm2,32(%rax) 3694 movdqa %xmm4,%xmm2 3695 paddd %xmm0,%xmm1 3696 pcmpeqd %xmm5,%xmm0 3697 movdqa %xmm3,48(%rax) 3698 movdqa %xmm4,%xmm3 3699 3700 paddd %xmm1,%xmm2 3701 pcmpeqd %xmm5,%xmm1 3702 movdqa %xmm0,64(%rax) 3703 movdqa %xmm4,%xmm0 3704 3705 paddd %xmm2,%xmm3 3706 pcmpeqd %xmm5,%xmm2 3707 movdqa %xmm1,80(%rax) 3708 movdqa %xmm4,%xmm1 3709 3710 paddd %xmm3,%xmm0 3711 pcmpeqd %xmm5,%xmm3 3712 movdqa %xmm2,96(%rax) 3713 movdqa %xmm4,%xmm2 3714 movdqa %xmm3,112(%rax) 3715 jmp .Lgather 3716 3717.align 32 3718.Lgather: 3719 pxor %xmm4,%xmm4 3720 pxor %xmm5,%xmm5 3721 movdqa -128(%r11),%xmm0 3722 movdqa -112(%r11),%xmm1 3723 movdqa -96(%r11),%xmm2 3724 pand -128(%rax),%xmm0 3725 movdqa -80(%r11),%xmm3 3726 pand -112(%rax),%xmm1 3727 por %xmm0,%xmm4 3728 pand -96(%rax),%xmm2 3729 por %xmm1,%xmm5 3730 pand -80(%rax),%xmm3 3731 por %xmm2,%xmm4 3732 por %xmm3,%xmm5 3733 movdqa -64(%r11),%xmm0 3734 movdqa -48(%r11),%xmm1 3735 movdqa -32(%r11),%xmm2 3736 pand -64(%rax),%xmm0 3737 movdqa -16(%r11),%xmm3 3738 pand -48(%rax),%xmm1 3739 por %xmm0,%xmm4 3740 pand -32(%rax),%xmm2 3741 por %xmm1,%xmm5 3742 pand -16(%rax),%xmm3 3743 por %xmm2,%xmm4 3744 por %xmm3,%xmm5 3745 movdqa 0(%r11),%xmm0 3746 movdqa 16(%r11),%xmm1 3747 movdqa 32(%r11),%xmm2 3748 pand 0(%rax),%xmm0 3749 movdqa 48(%r11),%xmm3 3750 pand 16(%rax),%xmm1 3751 por %xmm0,%xmm4 3752 pand 32(%rax),%xmm2 3753 por %xmm1,%xmm5 3754 pand 48(%rax),%xmm3 3755 por %xmm2,%xmm4 3756 por %xmm3,%xmm5 3757 movdqa 64(%r11),%xmm0 3758 movdqa 80(%r11),%xmm1 3759 movdqa 96(%r11),%xmm2 3760 pand 64(%rax),%xmm0 3761 movdqa 112(%r11),%xmm3 3762 pand 80(%rax),%xmm1 3763 por %xmm0,%xmm4 3764 pand 96(%rax),%xmm2 3765 por %xmm1,%xmm5 3766 pand 112(%rax),%xmm3 3767 por %xmm2,%xmm4 3768 por %xmm3,%xmm5 3769 por %xmm5,%xmm4 3770 leaq 256(%r11),%r11 3771 pshufd $0x4e,%xmm4,%xmm0 3772 por %xmm4,%xmm0 3773 movq %xmm0,(%rdi) 3774 leaq 8(%rdi),%rdi 3775 subl $1,%esi 3776 jnz .Lgather 3777 3778 leaq (%r10),%rsp 3779.cfi_def_cfa_register %rsp 3780 .byte 0xf3,0xc3 3781.LSEH_end_bn_gather5: 3782.cfi_endproc 3783.size bn_gather5,.-bn_gather5 3784.align 64 3785.Linc: 3786.long 0,0, 1,1 3787.long 2,2, 2,2 3788.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3789#endif 3790