1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11.text 12 13.extern GFp_ia32cap_P 14.hidden GFp_ia32cap_P 15 16.globl GFp_bn_mul_mont 17.hidden GFp_bn_mul_mont 18.type GFp_bn_mul_mont,@function 19.align 16 20GFp_bn_mul_mont: 21.cfi_startproc 22 movl %r9d,%r9d 23 movq %rsp,%rax 24.cfi_def_cfa_register %rax 25 testl $3,%r9d 26 jnz .Lmul_enter 27 cmpl $8,%r9d 28 jb .Lmul_enter 29 movl GFp_ia32cap_P+8(%rip),%r11d 30 cmpq %rsi,%rdx 31 jne .Lmul4x_enter 32 testl $7,%r9d 33 jz .Lsqr8x_enter 34 jmp .Lmul4x_enter 35 36.align 16 37.Lmul_enter: 38 pushq %rbx 39.cfi_offset %rbx,-16 40 pushq %rbp 41.cfi_offset %rbp,-24 42 pushq %r12 43.cfi_offset %r12,-32 44 pushq %r13 45.cfi_offset %r13,-40 46 pushq %r14 47.cfi_offset %r14,-48 48 pushq %r15 49.cfi_offset %r15,-56 50 51 negq %r9 52 movq %rsp,%r11 53 leaq -16(%rsp,%r9,8),%r10 54 negq %r9 55 andq $-1024,%r10 56 57 58 59 60 61 62 63 64 65 subq %r10,%r11 66 andq $-4096,%r11 67 leaq (%r10,%r11,1),%rsp 68 movq (%rsp),%r11 69 cmpq %r10,%rsp 70 ja .Lmul_page_walk 71 jmp .Lmul_page_walk_done 72 73.align 16 74.Lmul_page_walk: 75 leaq -4096(%rsp),%rsp 76 movq (%rsp),%r11 77 cmpq %r10,%rsp 78 ja .Lmul_page_walk 79.Lmul_page_walk_done: 80 81 movq %rax,8(%rsp,%r9,8) 82.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 83.Lmul_body: 84 movq %rdx,%r12 85 movq (%r8),%r8 86 movq (%r12),%rbx 87 movq (%rsi),%rax 88 89 xorq %r14,%r14 90 xorq %r15,%r15 91 92 movq %r8,%rbp 93 mulq %rbx 94 movq %rax,%r10 95 movq (%rcx),%rax 96 97 imulq %r10,%rbp 98 movq %rdx,%r11 99 100 mulq %rbp 101 addq %rax,%r10 102 movq 8(%rsi),%rax 103 adcq $0,%rdx 104 movq %rdx,%r13 105 106 leaq 1(%r15),%r15 107 jmp .L1st_enter 108 109.align 16 110.L1st: 111 addq %rax,%r13 112 movq (%rsi,%r15,8),%rax 113 adcq $0,%rdx 114 addq %r11,%r13 115 movq %r10,%r11 116 adcq $0,%rdx 117 movq %r13,-16(%rsp,%r15,8) 118 movq %rdx,%r13 119 120.L1st_enter: 121 mulq %rbx 122 addq %rax,%r11 123 movq (%rcx,%r15,8),%rax 124 adcq $0,%rdx 125 leaq 1(%r15),%r15 126 movq %rdx,%r10 127 128 mulq %rbp 129 cmpq %r9,%r15 130 jne .L1st 131 132 addq %rax,%r13 133 movq (%rsi),%rax 134 adcq $0,%rdx 135 addq %r11,%r13 136 adcq $0,%rdx 137 movq %r13,-16(%rsp,%r15,8) 138 movq %rdx,%r13 139 movq %r10,%r11 140 141 xorq %rdx,%rdx 142 addq %r11,%r13 143 adcq $0,%rdx 144 movq %r13,-8(%rsp,%r9,8) 145 movq %rdx,(%rsp,%r9,8) 146 147 leaq 1(%r14),%r14 148 jmp .Louter 149.align 16 150.Louter: 151 movq (%r12,%r14,8),%rbx 152 xorq %r15,%r15 153 movq %r8,%rbp 154 movq (%rsp),%r10 155 mulq %rbx 156 addq %rax,%r10 157 movq (%rcx),%rax 158 adcq $0,%rdx 159 160 imulq %r10,%rbp 161 movq %rdx,%r11 162 163 mulq %rbp 164 addq %rax,%r10 165 movq 8(%rsi),%rax 166 adcq $0,%rdx 167 movq 8(%rsp),%r10 168 movq %rdx,%r13 169 170 leaq 1(%r15),%r15 171 jmp .Linner_enter 172 173.align 16 174.Linner: 175 addq %rax,%r13 176 movq (%rsi,%r15,8),%rax 177 adcq $0,%rdx 178 addq %r10,%r13 179 movq (%rsp,%r15,8),%r10 180 adcq $0,%rdx 181 movq %r13,-16(%rsp,%r15,8) 182 movq %rdx,%r13 183 184.Linner_enter: 185 mulq %rbx 186 addq %rax,%r11 187 movq (%rcx,%r15,8),%rax 188 adcq $0,%rdx 189 addq %r11,%r10 190 movq %rdx,%r11 191 adcq $0,%r11 192 leaq 1(%r15),%r15 193 194 mulq %rbp 195 cmpq %r9,%r15 196 jne .Linner 197 198 addq %rax,%r13 199 movq (%rsi),%rax 200 adcq $0,%rdx 201 addq %r10,%r13 202 movq (%rsp,%r15,8),%r10 203 adcq $0,%rdx 204 movq %r13,-16(%rsp,%r15,8) 205 movq %rdx,%r13 206 207 xorq %rdx,%rdx 208 addq %r11,%r13 209 adcq $0,%rdx 210 addq %r10,%r13 211 adcq $0,%rdx 212 movq %r13,-8(%rsp,%r9,8) 213 movq %rdx,(%rsp,%r9,8) 214 215 leaq 1(%r14),%r14 216 cmpq %r9,%r14 217 jb .Louter 218 219 xorq %r14,%r14 220 movq (%rsp),%rax 221 movq %r9,%r15 222 223.align 16 224.Lsub: sbbq (%rcx,%r14,8),%rax 225 movq %rax,(%rdi,%r14,8) 226 movq 8(%rsp,%r14,8),%rax 227 leaq 1(%r14),%r14 228 decq %r15 229 jnz .Lsub 230 231 sbbq $0,%rax 232 movq $-1,%rbx 233 xorq %rax,%rbx 234 xorq %r14,%r14 235 movq %r9,%r15 236 237.Lcopy: 238 movq (%rdi,%r14,8),%rcx 239 movq (%rsp,%r14,8),%rdx 240 andq %rbx,%rcx 241 andq %rax,%rdx 242 movq %r9,(%rsp,%r14,8) 243 orq %rcx,%rdx 244 movq %rdx,(%rdi,%r14,8) 245 leaq 1(%r14),%r14 246 subq $1,%r15 247 jnz .Lcopy 248 249 movq 8(%rsp,%r9,8),%rsi 250.cfi_def_cfa %rsi,8 251 movq $1,%rax 252 movq -48(%rsi),%r15 253.cfi_restore %r15 254 movq -40(%rsi),%r14 255.cfi_restore %r14 256 movq -32(%rsi),%r13 257.cfi_restore %r13 258 movq -24(%rsi),%r12 259.cfi_restore %r12 260 movq -16(%rsi),%rbp 261.cfi_restore %rbp 262 movq -8(%rsi),%rbx 263.cfi_restore %rbx 264 leaq (%rsi),%rsp 265.cfi_def_cfa_register %rsp 266.Lmul_epilogue: 267 .byte 0xf3,0xc3 268.cfi_endproc 269.size GFp_bn_mul_mont,.-GFp_bn_mul_mont 270.type bn_mul4x_mont,@function 271.align 16 272bn_mul4x_mont: 273.cfi_startproc 274 movl %r9d,%r9d 275 movq %rsp,%rax 276.cfi_def_cfa_register %rax 277.Lmul4x_enter: 278 andl $0x80100,%r11d 279 cmpl $0x80100,%r11d 280 je .Lmulx4x_enter 281 pushq %rbx 282.cfi_offset %rbx,-16 283 pushq %rbp 284.cfi_offset %rbp,-24 285 pushq %r12 286.cfi_offset %r12,-32 287 pushq %r13 288.cfi_offset %r13,-40 289 pushq %r14 290.cfi_offset %r14,-48 291 pushq %r15 292.cfi_offset %r15,-56 293 294 negq %r9 295 movq %rsp,%r11 296 leaq -32(%rsp,%r9,8),%r10 297 negq %r9 298 andq $-1024,%r10 299 300 subq %r10,%r11 301 andq $-4096,%r11 302 leaq (%r10,%r11,1),%rsp 303 movq (%rsp),%r11 304 cmpq %r10,%rsp 305 ja .Lmul4x_page_walk 306 jmp .Lmul4x_page_walk_done 307 308.Lmul4x_page_walk: 309 leaq -4096(%rsp),%rsp 310 movq (%rsp),%r11 311 cmpq %r10,%rsp 312 ja .Lmul4x_page_walk 313.Lmul4x_page_walk_done: 314 315 movq %rax,8(%rsp,%r9,8) 316.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 317.Lmul4x_body: 318 movq %rdi,16(%rsp,%r9,8) 319 movq %rdx,%r12 320 movq (%r8),%r8 321 movq (%r12),%rbx 322 movq (%rsi),%rax 323 324 xorq %r14,%r14 325 xorq %r15,%r15 326 327 movq %r8,%rbp 328 mulq %rbx 329 movq %rax,%r10 330 movq (%rcx),%rax 331 332 imulq %r10,%rbp 333 movq %rdx,%r11 334 335 mulq %rbp 336 addq %rax,%r10 337 movq 8(%rsi),%rax 338 adcq $0,%rdx 339 movq %rdx,%rdi 340 341 mulq %rbx 342 addq %rax,%r11 343 movq 8(%rcx),%rax 344 adcq $0,%rdx 345 movq %rdx,%r10 346 347 mulq %rbp 348 addq %rax,%rdi 349 movq 16(%rsi),%rax 350 adcq $0,%rdx 351 addq %r11,%rdi 352 leaq 4(%r15),%r15 353 adcq $0,%rdx 354 movq %rdi,(%rsp) 355 movq %rdx,%r13 356 jmp .L1st4x 357.align 16 358.L1st4x: 359 mulq %rbx 360 addq %rax,%r10 361 movq -16(%rcx,%r15,8),%rax 362 adcq $0,%rdx 363 movq %rdx,%r11 364 365 mulq %rbp 366 addq %rax,%r13 367 movq -8(%rsi,%r15,8),%rax 368 adcq $0,%rdx 369 addq %r10,%r13 370 adcq $0,%rdx 371 movq %r13,-24(%rsp,%r15,8) 372 movq %rdx,%rdi 373 374 mulq %rbx 375 addq %rax,%r11 376 movq -8(%rcx,%r15,8),%rax 377 adcq $0,%rdx 378 movq %rdx,%r10 379 380 mulq %rbp 381 addq %rax,%rdi 382 movq (%rsi,%r15,8),%rax 383 adcq $0,%rdx 384 addq %r11,%rdi 385 adcq $0,%rdx 386 movq %rdi,-16(%rsp,%r15,8) 387 movq %rdx,%r13 388 389 mulq %rbx 390 addq %rax,%r10 391 movq (%rcx,%r15,8),%rax 392 adcq $0,%rdx 393 movq %rdx,%r11 394 395 mulq %rbp 396 addq %rax,%r13 397 movq 8(%rsi,%r15,8),%rax 398 adcq $0,%rdx 399 addq %r10,%r13 400 adcq $0,%rdx 401 movq %r13,-8(%rsp,%r15,8) 402 movq %rdx,%rdi 403 404 mulq %rbx 405 addq %rax,%r11 406 movq 8(%rcx,%r15,8),%rax 407 adcq $0,%rdx 408 leaq 4(%r15),%r15 409 movq %rdx,%r10 410 411 mulq %rbp 412 addq %rax,%rdi 413 movq -16(%rsi,%r15,8),%rax 414 adcq $0,%rdx 415 addq %r11,%rdi 416 adcq $0,%rdx 417 movq %rdi,-32(%rsp,%r15,8) 418 movq %rdx,%r13 419 cmpq %r9,%r15 420 jb .L1st4x 421 422 mulq %rbx 423 addq %rax,%r10 424 movq -16(%rcx,%r15,8),%rax 425 adcq $0,%rdx 426 movq %rdx,%r11 427 428 mulq %rbp 429 addq %rax,%r13 430 movq -8(%rsi,%r15,8),%rax 431 adcq $0,%rdx 432 addq %r10,%r13 433 adcq $0,%rdx 434 movq %r13,-24(%rsp,%r15,8) 435 movq %rdx,%rdi 436 437 mulq %rbx 438 addq %rax,%r11 439 movq -8(%rcx,%r15,8),%rax 440 adcq $0,%rdx 441 movq %rdx,%r10 442 443 mulq %rbp 444 addq %rax,%rdi 445 movq (%rsi),%rax 446 adcq $0,%rdx 447 addq %r11,%rdi 448 adcq $0,%rdx 449 movq %rdi,-16(%rsp,%r15,8) 450 movq %rdx,%r13 451 452 xorq %rdi,%rdi 453 addq %r10,%r13 454 adcq $0,%rdi 455 movq %r13,-8(%rsp,%r15,8) 456 movq %rdi,(%rsp,%r15,8) 457 458 leaq 1(%r14),%r14 459.align 4 460.Louter4x: 461 movq (%r12,%r14,8),%rbx 462 xorq %r15,%r15 463 movq (%rsp),%r10 464 movq %r8,%rbp 465 mulq %rbx 466 addq %rax,%r10 467 movq (%rcx),%rax 468 adcq $0,%rdx 469 470 imulq %r10,%rbp 471 movq %rdx,%r11 472 473 mulq %rbp 474 addq %rax,%r10 475 movq 8(%rsi),%rax 476 adcq $0,%rdx 477 movq %rdx,%rdi 478 479 mulq %rbx 480 addq %rax,%r11 481 movq 8(%rcx),%rax 482 adcq $0,%rdx 483 addq 8(%rsp),%r11 484 adcq $0,%rdx 485 movq %rdx,%r10 486 487 mulq %rbp 488 addq %rax,%rdi 489 movq 16(%rsi),%rax 490 adcq $0,%rdx 491 addq %r11,%rdi 492 leaq 4(%r15),%r15 493 adcq $0,%rdx 494 movq %rdi,(%rsp) 495 movq %rdx,%r13 496 jmp .Linner4x 497.align 16 498.Linner4x: 499 mulq %rbx 500 addq %rax,%r10 501 movq -16(%rcx,%r15,8),%rax 502 adcq $0,%rdx 503 addq -16(%rsp,%r15,8),%r10 504 adcq $0,%rdx 505 movq %rdx,%r11 506 507 mulq %rbp 508 addq %rax,%r13 509 movq -8(%rsi,%r15,8),%rax 510 adcq $0,%rdx 511 addq %r10,%r13 512 adcq $0,%rdx 513 movq %r13,-24(%rsp,%r15,8) 514 movq %rdx,%rdi 515 516 mulq %rbx 517 addq %rax,%r11 518 movq -8(%rcx,%r15,8),%rax 519 adcq $0,%rdx 520 addq -8(%rsp,%r15,8),%r11 521 adcq $0,%rdx 522 movq %rdx,%r10 523 524 mulq %rbp 525 addq %rax,%rdi 526 movq (%rsi,%r15,8),%rax 527 adcq $0,%rdx 528 addq %r11,%rdi 529 adcq $0,%rdx 530 movq %rdi,-16(%rsp,%r15,8) 531 movq %rdx,%r13 532 533 mulq %rbx 534 addq %rax,%r10 535 movq (%rcx,%r15,8),%rax 536 adcq $0,%rdx 537 addq (%rsp,%r15,8),%r10 538 adcq $0,%rdx 539 movq %rdx,%r11 540 541 mulq %rbp 542 addq %rax,%r13 543 movq 8(%rsi,%r15,8),%rax 544 adcq $0,%rdx 545 addq %r10,%r13 546 adcq $0,%rdx 547 movq %r13,-8(%rsp,%r15,8) 548 movq %rdx,%rdi 549 550 mulq %rbx 551 addq %rax,%r11 552 movq 8(%rcx,%r15,8),%rax 553 adcq $0,%rdx 554 addq 8(%rsp,%r15,8),%r11 555 adcq $0,%rdx 556 leaq 4(%r15),%r15 557 movq %rdx,%r10 558 559 mulq %rbp 560 addq %rax,%rdi 561 movq -16(%rsi,%r15,8),%rax 562 adcq $0,%rdx 563 addq %r11,%rdi 564 adcq $0,%rdx 565 movq %rdi,-32(%rsp,%r15,8) 566 movq %rdx,%r13 567 cmpq %r9,%r15 568 jb .Linner4x 569 570 mulq %rbx 571 addq %rax,%r10 572 movq -16(%rcx,%r15,8),%rax 573 adcq $0,%rdx 574 addq -16(%rsp,%r15,8),%r10 575 adcq $0,%rdx 576 movq %rdx,%r11 577 578 mulq %rbp 579 addq %rax,%r13 580 movq -8(%rsi,%r15,8),%rax 581 adcq $0,%rdx 582 addq %r10,%r13 583 adcq $0,%rdx 584 movq %r13,-24(%rsp,%r15,8) 585 movq %rdx,%rdi 586 587 mulq %rbx 588 addq %rax,%r11 589 movq -8(%rcx,%r15,8),%rax 590 adcq $0,%rdx 591 addq -8(%rsp,%r15,8),%r11 592 adcq $0,%rdx 593 leaq 1(%r14),%r14 594 movq %rdx,%r10 595 596 mulq %rbp 597 addq %rax,%rdi 598 movq (%rsi),%rax 599 adcq $0,%rdx 600 addq %r11,%rdi 601 adcq $0,%rdx 602 movq %rdi,-16(%rsp,%r15,8) 603 movq %rdx,%r13 604 605 xorq %rdi,%rdi 606 addq %r10,%r13 607 adcq $0,%rdi 608 addq (%rsp,%r9,8),%r13 609 adcq $0,%rdi 610 movq %r13,-8(%rsp,%r15,8) 611 movq %rdi,(%rsp,%r15,8) 612 613 cmpq %r9,%r14 614 jb .Louter4x 615 movq 16(%rsp,%r9,8),%rdi 616 leaq -4(%r9),%r15 617 movq 0(%rsp),%rax 618 movq 8(%rsp),%rdx 619 shrq $2,%r15 620 leaq (%rsp),%rsi 621 xorq %r14,%r14 622 623 subq 0(%rcx),%rax 624 movq 16(%rsi),%rbx 625 movq 24(%rsi),%rbp 626 sbbq 8(%rcx),%rdx 627 628.Lsub4x: 629 movq %rax,0(%rdi,%r14,8) 630 movq %rdx,8(%rdi,%r14,8) 631 sbbq 16(%rcx,%r14,8),%rbx 632 movq 32(%rsi,%r14,8),%rax 633 movq 40(%rsi,%r14,8),%rdx 634 sbbq 24(%rcx,%r14,8),%rbp 635 movq %rbx,16(%rdi,%r14,8) 636 movq %rbp,24(%rdi,%r14,8) 637 sbbq 32(%rcx,%r14,8),%rax 638 movq 48(%rsi,%r14,8),%rbx 639 movq 56(%rsi,%r14,8),%rbp 640 sbbq 40(%rcx,%r14,8),%rdx 641 leaq 4(%r14),%r14 642 decq %r15 643 jnz .Lsub4x 644 645 movq %rax,0(%rdi,%r14,8) 646 movq 32(%rsi,%r14,8),%rax 647 sbbq 16(%rcx,%r14,8),%rbx 648 movq %rdx,8(%rdi,%r14,8) 649 sbbq 24(%rcx,%r14,8),%rbp 650 movq %rbx,16(%rdi,%r14,8) 651 652 sbbq $0,%rax 653 movq %rbp,24(%rdi,%r14,8) 654 pxor %xmm0,%xmm0 655.byte 102,72,15,110,224 656 pcmpeqd %xmm5,%xmm5 657 pshufd $0,%xmm4,%xmm4 658 movq %r9,%r15 659 pxor %xmm4,%xmm5 660 shrq $2,%r15 661 xorl %eax,%eax 662 663 jmp .Lcopy4x 664.align 16 665.Lcopy4x: 666 movdqa (%rsp,%rax,1),%xmm1 667 movdqu (%rdi,%rax,1),%xmm2 668 pand %xmm4,%xmm1 669 pand %xmm5,%xmm2 670 movdqa 16(%rsp,%rax,1),%xmm3 671 movdqa %xmm0,(%rsp,%rax,1) 672 por %xmm2,%xmm1 673 movdqu 16(%rdi,%rax,1),%xmm2 674 movdqu %xmm1,(%rdi,%rax,1) 675 pand %xmm4,%xmm3 676 pand %xmm5,%xmm2 677 movdqa %xmm0,16(%rsp,%rax,1) 678 por %xmm2,%xmm3 679 movdqu %xmm3,16(%rdi,%rax,1) 680 leaq 32(%rax),%rax 681 decq %r15 682 jnz .Lcopy4x 683 movq 8(%rsp,%r9,8),%rsi 684.cfi_def_cfa %rsi, 8 685 movq $1,%rax 686 movq -48(%rsi),%r15 687.cfi_restore %r15 688 movq -40(%rsi),%r14 689.cfi_restore %r14 690 movq -32(%rsi),%r13 691.cfi_restore %r13 692 movq -24(%rsi),%r12 693.cfi_restore %r12 694 movq -16(%rsi),%rbp 695.cfi_restore %rbp 696 movq -8(%rsi),%rbx 697.cfi_restore %rbx 698 leaq (%rsi),%rsp 699.cfi_def_cfa_register %rsp 700.Lmul4x_epilogue: 701 .byte 0xf3,0xc3 702.cfi_endproc 703.size bn_mul4x_mont,.-bn_mul4x_mont 704.extern GFp_bn_sqrx8x_internal 705.hidden GFp_bn_sqrx8x_internal 706.extern GFp_bn_sqr8x_internal 707.hidden GFp_bn_sqr8x_internal 708 709.type bn_sqr8x_mont,@function 710.align 32 711bn_sqr8x_mont: 712.cfi_startproc 713 movq %rsp,%rax 714.cfi_def_cfa_register %rax 715.Lsqr8x_enter: 716 pushq %rbx 717.cfi_offset %rbx,-16 718 pushq %rbp 719.cfi_offset %rbp,-24 720 pushq %r12 721.cfi_offset %r12,-32 722 pushq %r13 723.cfi_offset %r13,-40 724 pushq %r14 725.cfi_offset %r14,-48 726 pushq %r15 727.cfi_offset %r15,-56 728.Lsqr8x_prologue: 729 730 movl %r9d,%r10d 731 shll $3,%r9d 732 shlq $3+2,%r10 733 negq %r9 734 735 736 737 738 739 740 leaq -64(%rsp,%r9,2),%r11 741 movq %rsp,%rbp 742 movq (%r8),%r8 743 subq %rsi,%r11 744 andq $4095,%r11 745 cmpq %r11,%r10 746 jb .Lsqr8x_sp_alt 747 subq %r11,%rbp 748 leaq -64(%rbp,%r9,2),%rbp 749 jmp .Lsqr8x_sp_done 750 751.align 32 752.Lsqr8x_sp_alt: 753 leaq 4096-64(,%r9,2),%r10 754 leaq -64(%rbp,%r9,2),%rbp 755 subq %r10,%r11 756 movq $0,%r10 757 cmovcq %r10,%r11 758 subq %r11,%rbp 759.Lsqr8x_sp_done: 760 andq $-64,%rbp 761 movq %rsp,%r11 762 subq %rbp,%r11 763 andq $-4096,%r11 764 leaq (%r11,%rbp,1),%rsp 765 movq (%rsp),%r10 766 cmpq %rbp,%rsp 767 ja .Lsqr8x_page_walk 768 jmp .Lsqr8x_page_walk_done 769 770.align 16 771.Lsqr8x_page_walk: 772 leaq -4096(%rsp),%rsp 773 movq (%rsp),%r10 774 cmpq %rbp,%rsp 775 ja .Lsqr8x_page_walk 776.Lsqr8x_page_walk_done: 777 778 movq %r9,%r10 779 negq %r9 780 781 movq %r8,32(%rsp) 782 movq %rax,40(%rsp) 783.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 784.Lsqr8x_body: 785 786.byte 102,72,15,110,209 787 pxor %xmm0,%xmm0 788.byte 102,72,15,110,207 789.byte 102,73,15,110,218 790 movl GFp_ia32cap_P+8(%rip),%eax 791 andl $0x80100,%eax 792 cmpl $0x80100,%eax 793 jne .Lsqr8x_nox 794 795 call GFp_bn_sqrx8x_internal 796 797 798 799 800 leaq (%r8,%rcx,1),%rbx 801 movq %rcx,%r9 802 movq %rcx,%rdx 803.byte 102,72,15,126,207 804 sarq $3+2,%rcx 805 jmp .Lsqr8x_sub 806 807.align 32 808.Lsqr8x_nox: 809 call GFp_bn_sqr8x_internal 810 811 812 813 814 leaq (%rdi,%r9,1),%rbx 815 movq %r9,%rcx 816 movq %r9,%rdx 817.byte 102,72,15,126,207 818 sarq $3+2,%rcx 819 jmp .Lsqr8x_sub 820 821.align 32 822.Lsqr8x_sub: 823 movq 0(%rbx),%r12 824 movq 8(%rbx),%r13 825 movq 16(%rbx),%r14 826 movq 24(%rbx),%r15 827 leaq 32(%rbx),%rbx 828 sbbq 0(%rbp),%r12 829 sbbq 8(%rbp),%r13 830 sbbq 16(%rbp),%r14 831 sbbq 24(%rbp),%r15 832 leaq 32(%rbp),%rbp 833 movq %r12,0(%rdi) 834 movq %r13,8(%rdi) 835 movq %r14,16(%rdi) 836 movq %r15,24(%rdi) 837 leaq 32(%rdi),%rdi 838 incq %rcx 839 jnz .Lsqr8x_sub 840 841 sbbq $0,%rax 842 leaq (%rbx,%r9,1),%rbx 843 leaq (%rdi,%r9,1),%rdi 844 845.byte 102,72,15,110,200 846 pxor %xmm0,%xmm0 847 pshufd $0,%xmm1,%xmm1 848 movq 40(%rsp),%rsi 849.cfi_def_cfa %rsi,8 850 jmp .Lsqr8x_cond_copy 851 852.align 32 853.Lsqr8x_cond_copy: 854 movdqa 0(%rbx),%xmm2 855 movdqa 16(%rbx),%xmm3 856 leaq 32(%rbx),%rbx 857 movdqu 0(%rdi),%xmm4 858 movdqu 16(%rdi),%xmm5 859 leaq 32(%rdi),%rdi 860 movdqa %xmm0,-32(%rbx) 861 movdqa %xmm0,-16(%rbx) 862 movdqa %xmm0,-32(%rbx,%rdx,1) 863 movdqa %xmm0,-16(%rbx,%rdx,1) 864 pcmpeqd %xmm1,%xmm0 865 pand %xmm1,%xmm2 866 pand %xmm1,%xmm3 867 pand %xmm0,%xmm4 868 pand %xmm0,%xmm5 869 pxor %xmm0,%xmm0 870 por %xmm2,%xmm4 871 por %xmm3,%xmm5 872 movdqu %xmm4,-32(%rdi) 873 movdqu %xmm5,-16(%rdi) 874 addq $32,%r9 875 jnz .Lsqr8x_cond_copy 876 877 movq $1,%rax 878 movq -48(%rsi),%r15 879.cfi_restore %r15 880 movq -40(%rsi),%r14 881.cfi_restore %r14 882 movq -32(%rsi),%r13 883.cfi_restore %r13 884 movq -24(%rsi),%r12 885.cfi_restore %r12 886 movq -16(%rsi),%rbp 887.cfi_restore %rbp 888 movq -8(%rsi),%rbx 889.cfi_restore %rbx 890 leaq (%rsi),%rsp 891.cfi_def_cfa_register %rsp 892.Lsqr8x_epilogue: 893 .byte 0xf3,0xc3 894.cfi_endproc 895.size bn_sqr8x_mont,.-bn_sqr8x_mont 896.type bn_mulx4x_mont,@function 897.align 32 898bn_mulx4x_mont: 899.cfi_startproc 900 movq %rsp,%rax 901.cfi_def_cfa_register %rax 902.Lmulx4x_enter: 903 pushq %rbx 904.cfi_offset %rbx,-16 905 pushq %rbp 906.cfi_offset %rbp,-24 907 pushq %r12 908.cfi_offset %r12,-32 909 pushq %r13 910.cfi_offset %r13,-40 911 pushq %r14 912.cfi_offset %r14,-48 913 pushq %r15 914.cfi_offset %r15,-56 915.Lmulx4x_prologue: 916 917 shll $3,%r9d 918 xorq %r10,%r10 919 subq %r9,%r10 920 movq (%r8),%r8 921 leaq -72(%rsp,%r10,1),%rbp 922 andq $-128,%rbp 923 movq %rsp,%r11 924 subq %rbp,%r11 925 andq $-4096,%r11 926 leaq (%r11,%rbp,1),%rsp 927 movq (%rsp),%r10 928 cmpq %rbp,%rsp 929 ja .Lmulx4x_page_walk 930 jmp .Lmulx4x_page_walk_done 931 932.align 16 933.Lmulx4x_page_walk: 934 leaq -4096(%rsp),%rsp 935 movq (%rsp),%r10 936 cmpq %rbp,%rsp 937 ja .Lmulx4x_page_walk 938.Lmulx4x_page_walk_done: 939 940 leaq (%rdx,%r9,1),%r10 941 942 943 944 945 946 947 948 949 950 951 952 953 movq %r9,0(%rsp) 954 shrq $5,%r9 955 movq %r10,16(%rsp) 956 subq $1,%r9 957 movq %r8,24(%rsp) 958 movq %rdi,32(%rsp) 959 movq %rax,40(%rsp) 960.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 961 movq %r9,48(%rsp) 962 jmp .Lmulx4x_body 963 964.align 32 965.Lmulx4x_body: 966 leaq 8(%rdx),%rdi 967 movq (%rdx),%rdx 968 leaq 64+32(%rsp),%rbx 969 movq %rdx,%r9 970 971 mulxq 0(%rsi),%r8,%rax 972 mulxq 8(%rsi),%r11,%r14 973 addq %rax,%r11 974 movq %rdi,8(%rsp) 975 mulxq 16(%rsi),%r12,%r13 976 adcq %r14,%r12 977 adcq $0,%r13 978 979 movq %r8,%rdi 980 imulq 24(%rsp),%r8 981 xorq %rbp,%rbp 982 983 mulxq 24(%rsi),%rax,%r14 984 movq %r8,%rdx 985 leaq 32(%rsi),%rsi 986 adcxq %rax,%r13 987 adcxq %rbp,%r14 988 989 mulxq 0(%rcx),%rax,%r10 990 adcxq %rax,%rdi 991 adoxq %r11,%r10 992 mulxq 8(%rcx),%rax,%r11 993 adcxq %rax,%r10 994 adoxq %r12,%r11 995.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 996 movq 48(%rsp),%rdi 997 movq %r10,-32(%rbx) 998 adcxq %rax,%r11 999 adoxq %r13,%r12 1000 mulxq 24(%rcx),%rax,%r15 1001 movq %r9,%rdx 1002 movq %r11,-24(%rbx) 1003 adcxq %rax,%r12 1004 adoxq %rbp,%r15 1005 leaq 32(%rcx),%rcx 1006 movq %r12,-16(%rbx) 1007 1008 jmp .Lmulx4x_1st 1009 1010.align 32 1011.Lmulx4x_1st: 1012 adcxq %rbp,%r15 1013 mulxq 0(%rsi),%r10,%rax 1014 adcxq %r14,%r10 1015 mulxq 8(%rsi),%r11,%r14 1016 adcxq %rax,%r11 1017 mulxq 16(%rsi),%r12,%rax 1018 adcxq %r14,%r12 1019 mulxq 24(%rsi),%r13,%r14 1020.byte 0x67,0x67 1021 movq %r8,%rdx 1022 adcxq %rax,%r13 1023 adcxq %rbp,%r14 1024 leaq 32(%rsi),%rsi 1025 leaq 32(%rbx),%rbx 1026 1027 adoxq %r15,%r10 1028 mulxq 0(%rcx),%rax,%r15 1029 adcxq %rax,%r10 1030 adoxq %r15,%r11 1031 mulxq 8(%rcx),%rax,%r15 1032 adcxq %rax,%r11 1033 adoxq %r15,%r12 1034 mulxq 16(%rcx),%rax,%r15 1035 movq %r10,-40(%rbx) 1036 adcxq %rax,%r12 1037 movq %r11,-32(%rbx) 1038 adoxq %r15,%r13 1039 mulxq 24(%rcx),%rax,%r15 1040 movq %r9,%rdx 1041 movq %r12,-24(%rbx) 1042 adcxq %rax,%r13 1043 adoxq %rbp,%r15 1044 leaq 32(%rcx),%rcx 1045 movq %r13,-16(%rbx) 1046 1047 decq %rdi 1048 jnz .Lmulx4x_1st 1049 1050 movq 0(%rsp),%rax 1051 movq 8(%rsp),%rdi 1052 adcq %rbp,%r15 1053 addq %r15,%r14 1054 sbbq %r15,%r15 1055 movq %r14,-8(%rbx) 1056 jmp .Lmulx4x_outer 1057 1058.align 32 1059.Lmulx4x_outer: 1060 movq (%rdi),%rdx 1061 leaq 8(%rdi),%rdi 1062 subq %rax,%rsi 1063 movq %r15,(%rbx) 1064 leaq 64+32(%rsp),%rbx 1065 subq %rax,%rcx 1066 1067 mulxq 0(%rsi),%r8,%r11 1068 xorl %ebp,%ebp 1069 movq %rdx,%r9 1070 mulxq 8(%rsi),%r14,%r12 1071 adoxq -32(%rbx),%r8 1072 adcxq %r14,%r11 1073 mulxq 16(%rsi),%r15,%r13 1074 adoxq -24(%rbx),%r11 1075 adcxq %r15,%r12 1076 adoxq -16(%rbx),%r12 1077 adcxq %rbp,%r13 1078 adoxq %rbp,%r13 1079 1080 movq %rdi,8(%rsp) 1081 movq %r8,%r15 1082 imulq 24(%rsp),%r8 1083 xorl %ebp,%ebp 1084 1085 mulxq 24(%rsi),%rax,%r14 1086 movq %r8,%rdx 1087 adcxq %rax,%r13 1088 adoxq -8(%rbx),%r13 1089 adcxq %rbp,%r14 1090 leaq 32(%rsi),%rsi 1091 adoxq %rbp,%r14 1092 1093 mulxq 0(%rcx),%rax,%r10 1094 adcxq %rax,%r15 1095 adoxq %r11,%r10 1096 mulxq 8(%rcx),%rax,%r11 1097 adcxq %rax,%r10 1098 adoxq %r12,%r11 1099 mulxq 16(%rcx),%rax,%r12 1100 movq %r10,-32(%rbx) 1101 adcxq %rax,%r11 1102 adoxq %r13,%r12 1103 mulxq 24(%rcx),%rax,%r15 1104 movq %r9,%rdx 1105 movq %r11,-24(%rbx) 1106 leaq 32(%rcx),%rcx 1107 adcxq %rax,%r12 1108 adoxq %rbp,%r15 1109 movq 48(%rsp),%rdi 1110 movq %r12,-16(%rbx) 1111 1112 jmp .Lmulx4x_inner 1113 1114.align 32 1115.Lmulx4x_inner: 1116 mulxq 0(%rsi),%r10,%rax 1117 adcxq %rbp,%r15 1118 adoxq %r14,%r10 1119 mulxq 8(%rsi),%r11,%r14 1120 adcxq 0(%rbx),%r10 1121 adoxq %rax,%r11 1122 mulxq 16(%rsi),%r12,%rax 1123 adcxq 8(%rbx),%r11 1124 adoxq %r14,%r12 1125 mulxq 24(%rsi),%r13,%r14 1126 movq %r8,%rdx 1127 adcxq 16(%rbx),%r12 1128 adoxq %rax,%r13 1129 adcxq 24(%rbx),%r13 1130 adoxq %rbp,%r14 1131 leaq 32(%rsi),%rsi 1132 leaq 32(%rbx),%rbx 1133 adcxq %rbp,%r14 1134 1135 adoxq %r15,%r10 1136 mulxq 0(%rcx),%rax,%r15 1137 adcxq %rax,%r10 1138 adoxq %r15,%r11 1139 mulxq 8(%rcx),%rax,%r15 1140 adcxq %rax,%r11 1141 adoxq %r15,%r12 1142 mulxq 16(%rcx),%rax,%r15 1143 movq %r10,-40(%rbx) 1144 adcxq %rax,%r12 1145 adoxq %r15,%r13 1146 mulxq 24(%rcx),%rax,%r15 1147 movq %r9,%rdx 1148 movq %r11,-32(%rbx) 1149 movq %r12,-24(%rbx) 1150 adcxq %rax,%r13 1151 adoxq %rbp,%r15 1152 leaq 32(%rcx),%rcx 1153 movq %r13,-16(%rbx) 1154 1155 decq %rdi 1156 jnz .Lmulx4x_inner 1157 1158 movq 0(%rsp),%rax 1159 movq 8(%rsp),%rdi 1160 adcq %rbp,%r15 1161 subq 0(%rbx),%rbp 1162 adcq %r15,%r14 1163 sbbq %r15,%r15 1164 movq %r14,-8(%rbx) 1165 1166 cmpq 16(%rsp),%rdi 1167 jne .Lmulx4x_outer 1168 1169 leaq 64(%rsp),%rbx 1170 subq %rax,%rcx 1171 negq %r15 1172 movq %rax,%rdx 1173 shrq $3+2,%rax 1174 movq 32(%rsp),%rdi 1175 jmp .Lmulx4x_sub 1176 1177.align 32 1178.Lmulx4x_sub: 1179 movq 0(%rbx),%r11 1180 movq 8(%rbx),%r12 1181 movq 16(%rbx),%r13 1182 movq 24(%rbx),%r14 1183 leaq 32(%rbx),%rbx 1184 sbbq 0(%rcx),%r11 1185 sbbq 8(%rcx),%r12 1186 sbbq 16(%rcx),%r13 1187 sbbq 24(%rcx),%r14 1188 leaq 32(%rcx),%rcx 1189 movq %r11,0(%rdi) 1190 movq %r12,8(%rdi) 1191 movq %r13,16(%rdi) 1192 movq %r14,24(%rdi) 1193 leaq 32(%rdi),%rdi 1194 decq %rax 1195 jnz .Lmulx4x_sub 1196 1197 sbbq $0,%r15 1198 leaq 64(%rsp),%rbx 1199 subq %rdx,%rdi 1200 1201.byte 102,73,15,110,207 1202 pxor %xmm0,%xmm0 1203 pshufd $0,%xmm1,%xmm1 1204 movq 40(%rsp),%rsi 1205.cfi_def_cfa %rsi,8 1206 jmp .Lmulx4x_cond_copy 1207 1208.align 32 1209.Lmulx4x_cond_copy: 1210 movdqa 0(%rbx),%xmm2 1211 movdqa 16(%rbx),%xmm3 1212 leaq 32(%rbx),%rbx 1213 movdqu 0(%rdi),%xmm4 1214 movdqu 16(%rdi),%xmm5 1215 leaq 32(%rdi),%rdi 1216 movdqa %xmm0,-32(%rbx) 1217 movdqa %xmm0,-16(%rbx) 1218 pcmpeqd %xmm1,%xmm0 1219 pand %xmm1,%xmm2 1220 pand %xmm1,%xmm3 1221 pand %xmm0,%xmm4 1222 pand %xmm0,%xmm5 1223 pxor %xmm0,%xmm0 1224 por %xmm2,%xmm4 1225 por %xmm3,%xmm5 1226 movdqu %xmm4,-32(%rdi) 1227 movdqu %xmm5,-16(%rdi) 1228 subq $32,%rdx 1229 jnz .Lmulx4x_cond_copy 1230 1231 movq %rdx,(%rbx) 1232 1233 movq $1,%rax 1234 movq -48(%rsi),%r15 1235.cfi_restore %r15 1236 movq -40(%rsi),%r14 1237.cfi_restore %r14 1238 movq -32(%rsi),%r13 1239.cfi_restore %r13 1240 movq -24(%rsi),%r12 1241.cfi_restore %r12 1242 movq -16(%rsi),%rbp 1243.cfi_restore %rbp 1244 movq -8(%rsi),%rbx 1245.cfi_restore %rbx 1246 leaq (%rsi),%rsp 1247.cfi_def_cfa_register %rsp 1248.Lmulx4x_epilogue: 1249 .byte 0xf3,0xc3 1250.cfi_endproc 1251.size bn_mulx4x_mont,.-bn_mulx4x_mont 1252.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1253.align 16 1254#endif 1255.section .note.GNU-stack,"",@progbits 1256