1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13.extern OPENSSL_ia32cap_P 14.hidden OPENSSL_ia32cap_P 15 16 17.align 64 18.Lpoly: 19.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 20 21.LOne: 22.long 1,1,1,1,1,1,1,1 23.LTwo: 24.long 2,2,2,2,2,2,2,2 25.LThree: 26.long 3,3,3,3,3,3,3,3 27.LONE_mont: 28.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe 29 30 31.Lord: 32.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 33.LordK: 34.quad 0xccd1c8aaee00bc4f 35 36 37 38.globl nistz256_neg 39.hidden nistz256_neg 40.type nistz256_neg,@function 41.align 32 42nistz256_neg: 43.cfi_startproc 44 pushq %r12 45.cfi_adjust_cfa_offset 8 46.cfi_offset %r12,-16 47 pushq %r13 48.cfi_adjust_cfa_offset 8 49.cfi_offset %r13,-24 50.Lneg_body: 51 52 xorq %r8,%r8 53 xorq %r9,%r9 54 xorq %r10,%r10 55 xorq %r11,%r11 56 xorq %r13,%r13 57 58 subq 0(%rsi),%r8 59 sbbq 8(%rsi),%r9 60 sbbq 16(%rsi),%r10 61 movq %r8,%rax 62 sbbq 24(%rsi),%r11 63 leaq .Lpoly(%rip),%rsi 64 movq %r9,%rdx 65 sbbq $0,%r13 66 67 addq 0(%rsi),%r8 68 movq %r10,%rcx 69 adcq 8(%rsi),%r9 70 adcq 16(%rsi),%r10 71 movq %r11,%r12 72 adcq 24(%rsi),%r11 73 testq %r13,%r13 74 75 cmovzq %rax,%r8 76 cmovzq %rdx,%r9 77 movq %r8,0(%rdi) 78 cmovzq %rcx,%r10 79 movq %r9,8(%rdi) 80 cmovzq %r12,%r11 81 movq %r10,16(%rdi) 82 movq %r11,24(%rdi) 83 84 movq 0(%rsp),%r13 85.cfi_restore %r13 86 movq 8(%rsp),%r12 87.cfi_restore %r12 88 leaq 16(%rsp),%rsp 89.cfi_adjust_cfa_offset -16 90.Lneg_epilogue: 91 .byte 0xf3,0xc3 92.cfi_endproc 93.size nistz256_neg,.-nistz256_neg 94 95 96 97 98 99 100.globl p256_scalar_mul_mont 101.hidden p256_scalar_mul_mont 102.type p256_scalar_mul_mont,@function 103.align 32 104p256_scalar_mul_mont: 105.cfi_startproc 106 leaq OPENSSL_ia32cap_P(%rip),%rcx 107 movq 8(%rcx),%rcx 108 andl $0x80100,%ecx 109 cmpl $0x80100,%ecx 110 je .Lecp_nistz256_ord_mul_montx 111 pushq %rbp 112.cfi_adjust_cfa_offset 8 113.cfi_offset %rbp,-16 114 pushq %rbx 115.cfi_adjust_cfa_offset 8 116.cfi_offset %rbx,-24 117 pushq %r12 118.cfi_adjust_cfa_offset 8 119.cfi_offset %r12,-32 120 pushq %r13 121.cfi_adjust_cfa_offset 8 122.cfi_offset %r13,-40 123 pushq %r14 124.cfi_adjust_cfa_offset 8 125.cfi_offset %r14,-48 126 pushq %r15 127.cfi_adjust_cfa_offset 8 128.cfi_offset %r15,-56 129.Lord_mul_body: 130 131 movq 0(%rdx),%rax 132 movq %rdx,%rbx 133 leaq .Lord(%rip),%r14 134 movq .LordK(%rip),%r15 135 136 137 movq %rax,%rcx 138 mulq 0(%rsi) 139 movq %rax,%r8 140 movq %rcx,%rax 141 movq %rdx,%r9 142 143 mulq 8(%rsi) 144 addq %rax,%r9 145 movq %rcx,%rax 146 adcq $0,%rdx 147 movq %rdx,%r10 148 149 mulq 16(%rsi) 150 addq %rax,%r10 151 movq %rcx,%rax 152 adcq $0,%rdx 153 154 movq %r8,%r13 155 imulq %r15,%r8 156 157 movq %rdx,%r11 158 mulq 24(%rsi) 159 addq %rax,%r11 160 movq %r8,%rax 161 adcq $0,%rdx 162 movq %rdx,%r12 163 164 165 mulq 0(%r14) 166 movq %r8,%rbp 167 addq %rax,%r13 168 movq %r8,%rax 169 adcq $0,%rdx 170 movq %rdx,%rcx 171 172 subq %r8,%r10 173 sbbq $0,%r8 174 175 mulq 8(%r14) 176 addq %rcx,%r9 177 adcq $0,%rdx 178 addq %rax,%r9 179 movq %rbp,%rax 180 adcq %rdx,%r10 181 movq %rbp,%rdx 182 adcq $0,%r8 183 184 shlq $32,%rax 185 shrq $32,%rdx 186 subq %rax,%r11 187 movq 8(%rbx),%rax 188 sbbq %rdx,%rbp 189 190 addq %r8,%r11 191 adcq %rbp,%r12 192 adcq $0,%r13 193 194 195 movq %rax,%rcx 196 mulq 0(%rsi) 197 addq %rax,%r9 198 movq %rcx,%rax 199 adcq $0,%rdx 200 movq %rdx,%rbp 201 202 mulq 8(%rsi) 203 addq %rbp,%r10 204 adcq $0,%rdx 205 addq %rax,%r10 206 movq %rcx,%rax 207 adcq $0,%rdx 208 movq %rdx,%rbp 209 210 mulq 16(%rsi) 211 addq %rbp,%r11 212 adcq $0,%rdx 213 addq %rax,%r11 214 movq %rcx,%rax 215 adcq $0,%rdx 216 217 movq %r9,%rcx 218 imulq %r15,%r9 219 220 movq %rdx,%rbp 221 mulq 24(%rsi) 222 addq %rbp,%r12 223 adcq $0,%rdx 224 xorq %r8,%r8 225 addq %rax,%r12 226 movq %r9,%rax 227 adcq %rdx,%r13 228 adcq $0,%r8 229 230 231 mulq 0(%r14) 232 movq %r9,%rbp 233 addq %rax,%rcx 234 movq %r9,%rax 235 adcq %rdx,%rcx 236 237 subq %r9,%r11 238 sbbq $0,%r9 239 240 mulq 8(%r14) 241 addq %rcx,%r10 242 adcq $0,%rdx 243 addq %rax,%r10 244 movq %rbp,%rax 245 adcq %rdx,%r11 246 movq %rbp,%rdx 247 adcq $0,%r9 248 249 shlq $32,%rax 250 shrq $32,%rdx 251 subq %rax,%r12 252 movq 16(%rbx),%rax 253 sbbq %rdx,%rbp 254 255 addq %r9,%r12 256 adcq %rbp,%r13 257 adcq $0,%r8 258 259 260 movq %rax,%rcx 261 mulq 0(%rsi) 262 addq %rax,%r10 263 movq %rcx,%rax 264 adcq $0,%rdx 265 movq %rdx,%rbp 266 267 mulq 8(%rsi) 268 addq %rbp,%r11 269 adcq $0,%rdx 270 addq %rax,%r11 271 movq %rcx,%rax 272 adcq $0,%rdx 273 movq %rdx,%rbp 274 275 mulq 16(%rsi) 276 addq %rbp,%r12 277 adcq $0,%rdx 278 addq %rax,%r12 279 movq %rcx,%rax 280 adcq $0,%rdx 281 282 movq %r10,%rcx 283 imulq %r15,%r10 284 285 movq %rdx,%rbp 286 mulq 24(%rsi) 287 addq %rbp,%r13 288 adcq $0,%rdx 289 xorq %r9,%r9 290 addq %rax,%r13 291 movq %r10,%rax 292 adcq %rdx,%r8 293 adcq $0,%r9 294 295 296 mulq 0(%r14) 297 movq %r10,%rbp 298 addq %rax,%rcx 299 movq %r10,%rax 300 adcq %rdx,%rcx 301 302 subq %r10,%r12 303 sbbq $0,%r10 304 305 mulq 8(%r14) 306 addq %rcx,%r11 307 adcq $0,%rdx 308 addq %rax,%r11 309 movq %rbp,%rax 310 adcq %rdx,%r12 311 movq %rbp,%rdx 312 adcq $0,%r10 313 314 shlq $32,%rax 315 shrq $32,%rdx 316 subq %rax,%r13 317 movq 24(%rbx),%rax 318 sbbq %rdx,%rbp 319 320 addq %r10,%r13 321 adcq %rbp,%r8 322 adcq $0,%r9 323 324 325 movq %rax,%rcx 326 mulq 0(%rsi) 327 addq %rax,%r11 328 movq %rcx,%rax 329 adcq $0,%rdx 330 movq %rdx,%rbp 331 332 mulq 8(%rsi) 333 addq %rbp,%r12 334 adcq $0,%rdx 335 addq %rax,%r12 336 movq %rcx,%rax 337 adcq $0,%rdx 338 movq %rdx,%rbp 339 340 mulq 16(%rsi) 341 addq %rbp,%r13 342 adcq $0,%rdx 343 addq %rax,%r13 344 movq %rcx,%rax 345 adcq $0,%rdx 346 347 movq %r11,%rcx 348 imulq %r15,%r11 349 350 movq %rdx,%rbp 351 mulq 24(%rsi) 352 addq %rbp,%r8 353 adcq $0,%rdx 354 xorq %r10,%r10 355 addq %rax,%r8 356 movq %r11,%rax 357 adcq %rdx,%r9 358 adcq $0,%r10 359 360 361 mulq 0(%r14) 362 movq %r11,%rbp 363 addq %rax,%rcx 364 movq %r11,%rax 365 adcq %rdx,%rcx 366 367 subq %r11,%r13 368 sbbq $0,%r11 369 370 mulq 8(%r14) 371 addq %rcx,%r12 372 adcq $0,%rdx 373 addq %rax,%r12 374 movq %rbp,%rax 375 adcq %rdx,%r13 376 movq %rbp,%rdx 377 adcq $0,%r11 378 379 shlq $32,%rax 380 shrq $32,%rdx 381 subq %rax,%r8 382 sbbq %rdx,%rbp 383 384 addq %r11,%r8 385 adcq %rbp,%r9 386 adcq $0,%r10 387 388 389 movq %r12,%rsi 390 subq 0(%r14),%r12 391 movq %r13,%r11 392 sbbq 8(%r14),%r13 393 movq %r8,%rcx 394 sbbq 16(%r14),%r8 395 movq %r9,%rbp 396 sbbq 24(%r14),%r9 397 sbbq $0,%r10 398 399 cmovcq %rsi,%r12 400 cmovcq %r11,%r13 401 cmovcq %rcx,%r8 402 cmovcq %rbp,%r9 403 404 movq %r12,0(%rdi) 405 movq %r13,8(%rdi) 406 movq %r8,16(%rdi) 407 movq %r9,24(%rdi) 408 409 movq 0(%rsp),%r15 410.cfi_restore %r15 411 movq 8(%rsp),%r14 412.cfi_restore %r14 413 movq 16(%rsp),%r13 414.cfi_restore %r13 415 movq 24(%rsp),%r12 416.cfi_restore %r12 417 movq 32(%rsp),%rbx 418.cfi_restore %rbx 419 movq 40(%rsp),%rbp 420.cfi_restore %rbp 421 leaq 48(%rsp),%rsp 422.cfi_adjust_cfa_offset -48 423.Lord_mul_epilogue: 424 .byte 0xf3,0xc3 425.cfi_endproc 426.size p256_scalar_mul_mont,.-p256_scalar_mul_mont 427 428 429 430 431 432 433 434.globl p256_scalar_sqr_rep_mont 435.hidden p256_scalar_sqr_rep_mont 436.type p256_scalar_sqr_rep_mont,@function 437.align 32 438p256_scalar_sqr_rep_mont: 439.cfi_startproc 440 leaq OPENSSL_ia32cap_P(%rip),%rcx 441 movq 8(%rcx),%rcx 442 andl $0x80100,%ecx 443 cmpl $0x80100,%ecx 444 je .Lecp_nistz256_ord_sqr_montx 445 pushq %rbp 446.cfi_adjust_cfa_offset 8 447.cfi_offset %rbp,-16 448 pushq %rbx 449.cfi_adjust_cfa_offset 8 450.cfi_offset %rbx,-24 451 pushq %r12 452.cfi_adjust_cfa_offset 8 453.cfi_offset %r12,-32 454 pushq %r13 455.cfi_adjust_cfa_offset 8 456.cfi_offset %r13,-40 457 pushq %r14 458.cfi_adjust_cfa_offset 8 459.cfi_offset %r14,-48 460 pushq %r15 461.cfi_adjust_cfa_offset 8 462.cfi_offset %r15,-56 463.Lord_sqr_body: 464 465 movq 0(%rsi),%r8 466 movq 8(%rsi),%rax 467 movq 16(%rsi),%r14 468 movq 24(%rsi),%r15 469 leaq .Lord(%rip),%rsi 470 movq %rdx,%rbx 471 jmp .Loop_ord_sqr 472 473.align 32 474.Loop_ord_sqr: 475 476 movq %rax,%rbp 477 mulq %r8 478 movq %rax,%r9 479.byte 102,72,15,110,205 480 movq %r14,%rax 481 movq %rdx,%r10 482 483 mulq %r8 484 addq %rax,%r10 485 movq %r15,%rax 486.byte 102,73,15,110,214 487 adcq $0,%rdx 488 movq %rdx,%r11 489 490 mulq %r8 491 addq %rax,%r11 492 movq %r15,%rax 493.byte 102,73,15,110,223 494 adcq $0,%rdx 495 movq %rdx,%r12 496 497 498 mulq %r14 499 movq %rax,%r13 500 movq %r14,%rax 501 movq %rdx,%r14 502 503 504 mulq %rbp 505 addq %rax,%r11 506 movq %r15,%rax 507 adcq $0,%rdx 508 movq %rdx,%r15 509 510 mulq %rbp 511 addq %rax,%r12 512 adcq $0,%rdx 513 514 addq %r15,%r12 515 adcq %rdx,%r13 516 adcq $0,%r14 517 518 519 xorq %r15,%r15 520 movq %r8,%rax 521 addq %r9,%r9 522 adcq %r10,%r10 523 adcq %r11,%r11 524 adcq %r12,%r12 525 adcq %r13,%r13 526 adcq %r14,%r14 527 adcq $0,%r15 528 529 530 mulq %rax 531 movq %rax,%r8 532.byte 102,72,15,126,200 533 movq %rdx,%rbp 534 535 mulq %rax 536 addq %rbp,%r9 537 adcq %rax,%r10 538.byte 102,72,15,126,208 539 adcq $0,%rdx 540 movq %rdx,%rbp 541 542 mulq %rax 543 addq %rbp,%r11 544 adcq %rax,%r12 545.byte 102,72,15,126,216 546 adcq $0,%rdx 547 movq %rdx,%rbp 548 549 movq %r8,%rcx 550 imulq 32(%rsi),%r8 551 552 mulq %rax 553 addq %rbp,%r13 554 adcq %rax,%r14 555 movq 0(%rsi),%rax 556 adcq %rdx,%r15 557 558 559 mulq %r8 560 movq %r8,%rbp 561 addq %rax,%rcx 562 movq 8(%rsi),%rax 563 adcq %rdx,%rcx 564 565 subq %r8,%r10 566 sbbq $0,%rbp 567 568 mulq %r8 569 addq %rcx,%r9 570 adcq $0,%rdx 571 addq %rax,%r9 572 movq %r8,%rax 573 adcq %rdx,%r10 574 movq %r8,%rdx 575 adcq $0,%rbp 576 577 movq %r9,%rcx 578 imulq 32(%rsi),%r9 579 580 shlq $32,%rax 581 shrq $32,%rdx 582 subq %rax,%r11 583 movq 0(%rsi),%rax 584 sbbq %rdx,%r8 585 586 addq %rbp,%r11 587 adcq $0,%r8 588 589 590 mulq %r9 591 movq %r9,%rbp 592 addq %rax,%rcx 593 movq 8(%rsi),%rax 594 adcq %rdx,%rcx 595 596 subq %r9,%r11 597 sbbq $0,%rbp 598 599 mulq %r9 600 addq %rcx,%r10 601 adcq $0,%rdx 602 addq %rax,%r10 603 movq %r9,%rax 604 adcq %rdx,%r11 605 movq %r9,%rdx 606 adcq $0,%rbp 607 608 movq %r10,%rcx 609 imulq 32(%rsi),%r10 610 611 shlq $32,%rax 612 shrq $32,%rdx 613 subq %rax,%r8 614 movq 0(%rsi),%rax 615 sbbq %rdx,%r9 616 617 addq %rbp,%r8 618 adcq $0,%r9 619 620 621 mulq %r10 622 movq %r10,%rbp 623 addq %rax,%rcx 624 movq 8(%rsi),%rax 625 adcq %rdx,%rcx 626 627 subq %r10,%r8 628 sbbq $0,%rbp 629 630 mulq %r10 631 addq %rcx,%r11 632 adcq $0,%rdx 633 addq %rax,%r11 634 movq %r10,%rax 635 adcq %rdx,%r8 636 movq %r10,%rdx 637 adcq $0,%rbp 638 639 movq %r11,%rcx 640 imulq 32(%rsi),%r11 641 642 shlq $32,%rax 643 shrq $32,%rdx 644 subq %rax,%r9 645 movq 0(%rsi),%rax 646 sbbq %rdx,%r10 647 648 addq %rbp,%r9 649 adcq $0,%r10 650 651 652 mulq %r11 653 movq %r11,%rbp 654 addq %rax,%rcx 655 movq 8(%rsi),%rax 656 adcq %rdx,%rcx 657 658 subq %r11,%r9 659 sbbq $0,%rbp 660 661 mulq %r11 662 addq %rcx,%r8 663 adcq $0,%rdx 664 addq %rax,%r8 665 movq %r11,%rax 666 adcq %rdx,%r9 667 movq %r11,%rdx 668 adcq $0,%rbp 669 670 shlq $32,%rax 671 shrq $32,%rdx 672 subq %rax,%r10 673 sbbq %rdx,%r11 674 675 addq %rbp,%r10 676 adcq $0,%r11 677 678 679 xorq %rdx,%rdx 680 addq %r12,%r8 681 adcq %r13,%r9 682 movq %r8,%r12 683 adcq %r14,%r10 684 adcq %r15,%r11 685 movq %r9,%rax 686 adcq $0,%rdx 687 688 689 subq 0(%rsi),%r8 690 movq %r10,%r14 691 sbbq 8(%rsi),%r9 692 sbbq 16(%rsi),%r10 693 movq %r11,%r15 694 sbbq 24(%rsi),%r11 695 sbbq $0,%rdx 696 697 cmovcq %r12,%r8 698 cmovncq %r9,%rax 699 cmovncq %r10,%r14 700 cmovncq %r11,%r15 701 702 decq %rbx 703 jnz .Loop_ord_sqr 704 705 movq %r8,0(%rdi) 706 movq %rax,8(%rdi) 707 pxor %xmm1,%xmm1 708 movq %r14,16(%rdi) 709 pxor %xmm2,%xmm2 710 movq %r15,24(%rdi) 711 pxor %xmm3,%xmm3 712 713 movq 0(%rsp),%r15 714.cfi_restore %r15 715 movq 8(%rsp),%r14 716.cfi_restore %r14 717 movq 16(%rsp),%r13 718.cfi_restore %r13 719 movq 24(%rsp),%r12 720.cfi_restore %r12 721 movq 32(%rsp),%rbx 722.cfi_restore %rbx 723 movq 40(%rsp),%rbp 724.cfi_restore %rbp 725 leaq 48(%rsp),%rsp 726.cfi_adjust_cfa_offset -48 727.Lord_sqr_epilogue: 728 .byte 0xf3,0xc3 729.cfi_endproc 730.size p256_scalar_sqr_rep_mont,.-p256_scalar_sqr_rep_mont 731 732.type ecp_nistz256_ord_mul_montx,@function 733.align 32 734ecp_nistz256_ord_mul_montx: 735.cfi_startproc 736.Lecp_nistz256_ord_mul_montx: 737 pushq %rbp 738.cfi_adjust_cfa_offset 8 739.cfi_offset %rbp,-16 740 pushq %rbx 741.cfi_adjust_cfa_offset 8 742.cfi_offset %rbx,-24 743 pushq %r12 744.cfi_adjust_cfa_offset 8 745.cfi_offset %r12,-32 746 pushq %r13 747.cfi_adjust_cfa_offset 8 748.cfi_offset %r13,-40 749 pushq %r14 750.cfi_adjust_cfa_offset 8 751.cfi_offset %r14,-48 752 pushq %r15 753.cfi_adjust_cfa_offset 8 754.cfi_offset %r15,-56 755.Lord_mulx_body: 756 757 movq %rdx,%rbx 758 movq 0(%rdx),%rdx 759 movq 0(%rsi),%r9 760 movq 8(%rsi),%r10 761 movq 16(%rsi),%r11 762 movq 24(%rsi),%r12 763 leaq -128(%rsi),%rsi 764 leaq .Lord-128(%rip),%r14 765 movq .LordK(%rip),%r15 766 767 768 mulxq %r9,%r8,%r9 769 mulxq %r10,%rcx,%r10 770 mulxq %r11,%rbp,%r11 771 addq %rcx,%r9 772 mulxq %r12,%rcx,%r12 773 movq %r8,%rdx 774 mulxq %r15,%rdx,%rax 775 adcq %rbp,%r10 776 adcq %rcx,%r11 777 adcq $0,%r12 778 779 780 xorq %r13,%r13 781 mulxq 0+128(%r14),%rcx,%rbp 782 adcxq %rcx,%r8 783 adoxq %rbp,%r9 784 785 mulxq 8+128(%r14),%rcx,%rbp 786 adcxq %rcx,%r9 787 adoxq %rbp,%r10 788 789 mulxq 16+128(%r14),%rcx,%rbp 790 adcxq %rcx,%r10 791 adoxq %rbp,%r11 792 793 mulxq 24+128(%r14),%rcx,%rbp 794 movq 8(%rbx),%rdx 795 adcxq %rcx,%r11 796 adoxq %rbp,%r12 797 adcxq %r8,%r12 798 adoxq %r8,%r13 799 adcq $0,%r13 800 801 802 mulxq 0+128(%rsi),%rcx,%rbp 803 adcxq %rcx,%r9 804 adoxq %rbp,%r10 805 806 mulxq 8+128(%rsi),%rcx,%rbp 807 adcxq %rcx,%r10 808 adoxq %rbp,%r11 809 810 mulxq 16+128(%rsi),%rcx,%rbp 811 adcxq %rcx,%r11 812 adoxq %rbp,%r12 813 814 mulxq 24+128(%rsi),%rcx,%rbp 815 movq %r9,%rdx 816 mulxq %r15,%rdx,%rax 817 adcxq %rcx,%r12 818 adoxq %rbp,%r13 819 820 adcxq %r8,%r13 821 adoxq %r8,%r8 822 adcq $0,%r8 823 824 825 mulxq 0+128(%r14),%rcx,%rbp 826 adcxq %rcx,%r9 827 adoxq %rbp,%r10 828 829 mulxq 8+128(%r14),%rcx,%rbp 830 adcxq %rcx,%r10 831 adoxq %rbp,%r11 832 833 mulxq 16+128(%r14),%rcx,%rbp 834 adcxq %rcx,%r11 835 adoxq %rbp,%r12 836 837 mulxq 24+128(%r14),%rcx,%rbp 838 movq 16(%rbx),%rdx 839 adcxq %rcx,%r12 840 adoxq %rbp,%r13 841 adcxq %r9,%r13 842 adoxq %r9,%r8 843 adcq $0,%r8 844 845 846 mulxq 0+128(%rsi),%rcx,%rbp 847 adcxq %rcx,%r10 848 adoxq %rbp,%r11 849 850 mulxq 8+128(%rsi),%rcx,%rbp 851 adcxq %rcx,%r11 852 adoxq %rbp,%r12 853 854 mulxq 16+128(%rsi),%rcx,%rbp 855 adcxq %rcx,%r12 856 adoxq %rbp,%r13 857 858 mulxq 24+128(%rsi),%rcx,%rbp 859 movq %r10,%rdx 860 mulxq %r15,%rdx,%rax 861 adcxq %rcx,%r13 862 adoxq %rbp,%r8 863 864 adcxq %r9,%r8 865 adoxq %r9,%r9 866 adcq $0,%r9 867 868 869 mulxq 0+128(%r14),%rcx,%rbp 870 adcxq %rcx,%r10 871 adoxq %rbp,%r11 872 873 mulxq 8+128(%r14),%rcx,%rbp 874 adcxq %rcx,%r11 875 adoxq %rbp,%r12 876 877 mulxq 16+128(%r14),%rcx,%rbp 878 adcxq %rcx,%r12 879 adoxq %rbp,%r13 880 881 mulxq 24+128(%r14),%rcx,%rbp 882 movq 24(%rbx),%rdx 883 adcxq %rcx,%r13 884 adoxq %rbp,%r8 885 adcxq %r10,%r8 886 adoxq %r10,%r9 887 adcq $0,%r9 888 889 890 mulxq 0+128(%rsi),%rcx,%rbp 891 adcxq %rcx,%r11 892 adoxq %rbp,%r12 893 894 mulxq 8+128(%rsi),%rcx,%rbp 895 adcxq %rcx,%r12 896 adoxq %rbp,%r13 897 898 mulxq 16+128(%rsi),%rcx,%rbp 899 adcxq %rcx,%r13 900 adoxq %rbp,%r8 901 902 mulxq 24+128(%rsi),%rcx,%rbp 903 movq %r11,%rdx 904 mulxq %r15,%rdx,%rax 905 adcxq %rcx,%r8 906 adoxq %rbp,%r9 907 908 adcxq %r10,%r9 909 adoxq %r10,%r10 910 adcq $0,%r10 911 912 913 mulxq 0+128(%r14),%rcx,%rbp 914 adcxq %rcx,%r11 915 adoxq %rbp,%r12 916 917 mulxq 8+128(%r14),%rcx,%rbp 918 adcxq %rcx,%r12 919 adoxq %rbp,%r13 920 921 mulxq 16+128(%r14),%rcx,%rbp 922 adcxq %rcx,%r13 923 adoxq %rbp,%r8 924 925 mulxq 24+128(%r14),%rcx,%rbp 926 leaq 128(%r14),%r14 927 movq %r12,%rbx 928 adcxq %rcx,%r8 929 adoxq %rbp,%r9 930 movq %r13,%rdx 931 adcxq %r11,%r9 932 adoxq %r11,%r10 933 adcq $0,%r10 934 935 936 937 movq %r8,%rcx 938 subq 0(%r14),%r12 939 sbbq 8(%r14),%r13 940 sbbq 16(%r14),%r8 941 movq %r9,%rbp 942 sbbq 24(%r14),%r9 943 sbbq $0,%r10 944 945 cmovcq %rbx,%r12 946 cmovcq %rdx,%r13 947 cmovcq %rcx,%r8 948 cmovcq %rbp,%r9 949 950 movq %r12,0(%rdi) 951 movq %r13,8(%rdi) 952 movq %r8,16(%rdi) 953 movq %r9,24(%rdi) 954 955 movq 0(%rsp),%r15 956.cfi_restore %r15 957 movq 8(%rsp),%r14 958.cfi_restore %r14 959 movq 16(%rsp),%r13 960.cfi_restore %r13 961 movq 24(%rsp),%r12 962.cfi_restore %r12 963 movq 32(%rsp),%rbx 964.cfi_restore %rbx 965 movq 40(%rsp),%rbp 966.cfi_restore %rbp 967 leaq 48(%rsp),%rsp 968.cfi_adjust_cfa_offset -48 969.Lord_mulx_epilogue: 970 .byte 0xf3,0xc3 971.cfi_endproc 972.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx 973 974.type ecp_nistz256_ord_sqr_montx,@function 975.align 32 976ecp_nistz256_ord_sqr_montx: 977.cfi_startproc 978.Lecp_nistz256_ord_sqr_montx: 979 pushq %rbp 980.cfi_adjust_cfa_offset 8 981.cfi_offset %rbp,-16 982 pushq %rbx 983.cfi_adjust_cfa_offset 8 984.cfi_offset %rbx,-24 985 pushq %r12 986.cfi_adjust_cfa_offset 8 987.cfi_offset %r12,-32 988 pushq %r13 989.cfi_adjust_cfa_offset 8 990.cfi_offset %r13,-40 991 pushq %r14 992.cfi_adjust_cfa_offset 8 993.cfi_offset %r14,-48 994 pushq %r15 995.cfi_adjust_cfa_offset 8 996.cfi_offset %r15,-56 997.Lord_sqrx_body: 998 999 movq %rdx,%rbx 1000 movq 0(%rsi),%rdx 1001 movq 8(%rsi),%r14 1002 movq 16(%rsi),%r15 1003 movq 24(%rsi),%r8 1004 leaq .Lord(%rip),%rsi 1005 jmp .Loop_ord_sqrx 1006 1007.align 32 1008.Loop_ord_sqrx: 1009 mulxq %r14,%r9,%r10 1010 mulxq %r15,%rcx,%r11 1011 movq %rdx,%rax 1012.byte 102,73,15,110,206 1013 mulxq %r8,%rbp,%r12 1014 movq %r14,%rdx 1015 addq %rcx,%r10 1016.byte 102,73,15,110,215 1017 adcq %rbp,%r11 1018 adcq $0,%r12 1019 xorq %r13,%r13 1020 1021 mulxq %r15,%rcx,%rbp 1022 adcxq %rcx,%r11 1023 adoxq %rbp,%r12 1024 1025 mulxq %r8,%rcx,%rbp 1026 movq %r15,%rdx 1027 adcxq %rcx,%r12 1028 adoxq %rbp,%r13 1029 adcq $0,%r13 1030 1031 mulxq %r8,%rcx,%r14 1032 movq %rax,%rdx 1033.byte 102,73,15,110,216 1034 xorq %r15,%r15 1035 adcxq %r9,%r9 1036 adoxq %rcx,%r13 1037 adcxq %r10,%r10 1038 adoxq %r15,%r14 1039 1040 1041 mulxq %rdx,%r8,%rbp 1042.byte 102,72,15,126,202 1043 adcxq %r11,%r11 1044 adoxq %rbp,%r9 1045 adcxq %r12,%r12 1046 mulxq %rdx,%rcx,%rax 1047.byte 102,72,15,126,210 1048 adcxq %r13,%r13 1049 adoxq %rcx,%r10 1050 adcxq %r14,%r14 1051 mulxq %rdx,%rcx,%rbp 1052.byte 0x67 1053.byte 102,72,15,126,218 1054 adoxq %rax,%r11 1055 adcxq %r15,%r15 1056 adoxq %rcx,%r12 1057 adoxq %rbp,%r13 1058 mulxq %rdx,%rcx,%rax 1059 adoxq %rcx,%r14 1060 adoxq %rax,%r15 1061 1062 1063 movq %r8,%rdx 1064 mulxq 32(%rsi),%rdx,%rcx 1065 1066 xorq %rax,%rax 1067 mulxq 0(%rsi),%rcx,%rbp 1068 adcxq %rcx,%r8 1069 adoxq %rbp,%r9 1070 mulxq 8(%rsi),%rcx,%rbp 1071 adcxq %rcx,%r9 1072 adoxq %rbp,%r10 1073 mulxq 16(%rsi),%rcx,%rbp 1074 adcxq %rcx,%r10 1075 adoxq %rbp,%r11 1076 mulxq 24(%rsi),%rcx,%rbp 1077 adcxq %rcx,%r11 1078 adoxq %rbp,%r8 1079 adcxq %rax,%r8 1080 1081 1082 movq %r9,%rdx 1083 mulxq 32(%rsi),%rdx,%rcx 1084 1085 mulxq 0(%rsi),%rcx,%rbp 1086 adoxq %rcx,%r9 1087 adcxq %rbp,%r10 1088 mulxq 8(%rsi),%rcx,%rbp 1089 adoxq %rcx,%r10 1090 adcxq %rbp,%r11 1091 mulxq 16(%rsi),%rcx,%rbp 1092 adoxq %rcx,%r11 1093 adcxq %rbp,%r8 1094 mulxq 24(%rsi),%rcx,%rbp 1095 adoxq %rcx,%r8 1096 adcxq %rbp,%r9 1097 adoxq %rax,%r9 1098 1099 1100 movq %r10,%rdx 1101 mulxq 32(%rsi),%rdx,%rcx 1102 1103 mulxq 0(%rsi),%rcx,%rbp 1104 adcxq %rcx,%r10 1105 adoxq %rbp,%r11 1106 mulxq 8(%rsi),%rcx,%rbp 1107 adcxq %rcx,%r11 1108 adoxq %rbp,%r8 1109 mulxq 16(%rsi),%rcx,%rbp 1110 adcxq %rcx,%r8 1111 adoxq %rbp,%r9 1112 mulxq 24(%rsi),%rcx,%rbp 1113 adcxq %rcx,%r9 1114 adoxq %rbp,%r10 1115 adcxq %rax,%r10 1116 1117 1118 movq %r11,%rdx 1119 mulxq 32(%rsi),%rdx,%rcx 1120 1121 mulxq 0(%rsi),%rcx,%rbp 1122 adoxq %rcx,%r11 1123 adcxq %rbp,%r8 1124 mulxq 8(%rsi),%rcx,%rbp 1125 adoxq %rcx,%r8 1126 adcxq %rbp,%r9 1127 mulxq 16(%rsi),%rcx,%rbp 1128 adoxq %rcx,%r9 1129 adcxq %rbp,%r10 1130 mulxq 24(%rsi),%rcx,%rbp 1131 adoxq %rcx,%r10 1132 adcxq %rbp,%r11 1133 adoxq %rax,%r11 1134 1135 1136 addq %r8,%r12 1137 adcq %r13,%r9 1138 movq %r12,%rdx 1139 adcq %r14,%r10 1140 adcq %r15,%r11 1141 movq %r9,%r14 1142 adcq $0,%rax 1143 1144 1145 subq 0(%rsi),%r12 1146 movq %r10,%r15 1147 sbbq 8(%rsi),%r9 1148 sbbq 16(%rsi),%r10 1149 movq %r11,%r8 1150 sbbq 24(%rsi),%r11 1151 sbbq $0,%rax 1152 1153 cmovncq %r12,%rdx 1154 cmovncq %r9,%r14 1155 cmovncq %r10,%r15 1156 cmovncq %r11,%r8 1157 1158 decq %rbx 1159 jnz .Loop_ord_sqrx 1160 1161 movq %rdx,0(%rdi) 1162 movq %r14,8(%rdi) 1163 pxor %xmm1,%xmm1 1164 movq %r15,16(%rdi) 1165 pxor %xmm2,%xmm2 1166 movq %r8,24(%rdi) 1167 pxor %xmm3,%xmm3 1168 1169 movq 0(%rsp),%r15 1170.cfi_restore %r15 1171 movq 8(%rsp),%r14 1172.cfi_restore %r14 1173 movq 16(%rsp),%r13 1174.cfi_restore %r13 1175 movq 24(%rsp),%r12 1176.cfi_restore %r12 1177 movq 32(%rsp),%rbx 1178.cfi_restore %rbx 1179 movq 40(%rsp),%rbp 1180.cfi_restore %rbp 1181 leaq 48(%rsp),%rsp 1182.cfi_adjust_cfa_offset -48 1183.Lord_sqrx_epilogue: 1184 .byte 0xf3,0xc3 1185.cfi_endproc 1186.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx 1187 1188 1189 1190 1191 1192 1193.globl p256_mul_mont 1194.hidden p256_mul_mont 1195.type p256_mul_mont,@function 1196.align 32 1197p256_mul_mont: 1198.cfi_startproc 1199 leaq OPENSSL_ia32cap_P(%rip),%rcx 1200 movq 8(%rcx),%rcx 1201 andl $0x80100,%ecx 1202.Lmul_mont: 1203 pushq %rbp 1204.cfi_adjust_cfa_offset 8 1205.cfi_offset %rbp,-16 1206 pushq %rbx 1207.cfi_adjust_cfa_offset 8 1208.cfi_offset %rbx,-24 1209 pushq %r12 1210.cfi_adjust_cfa_offset 8 1211.cfi_offset %r12,-32 1212 pushq %r13 1213.cfi_adjust_cfa_offset 8 1214.cfi_offset %r13,-40 1215 pushq %r14 1216.cfi_adjust_cfa_offset 8 1217.cfi_offset %r14,-48 1218 pushq %r15 1219.cfi_adjust_cfa_offset 8 1220.cfi_offset %r15,-56 1221.Lmul_body: 1222 cmpl $0x80100,%ecx 1223 je .Lmul_montx 1224 movq %rdx,%rbx 1225 movq 0(%rdx),%rax 1226 movq 0(%rsi),%r9 1227 movq 8(%rsi),%r10 1228 movq 16(%rsi),%r11 1229 movq 24(%rsi),%r12 1230 1231 call __ecp_nistz256_mul_montq 1232 jmp .Lmul_mont_done 1233 1234.align 32 1235.Lmul_montx: 1236 movq %rdx,%rbx 1237 movq 0(%rdx),%rdx 1238 movq 0(%rsi),%r9 1239 movq 8(%rsi),%r10 1240 movq 16(%rsi),%r11 1241 movq 24(%rsi),%r12 1242 leaq -128(%rsi),%rsi 1243 1244 call __ecp_nistz256_mul_montx 1245.Lmul_mont_done: 1246 movq 0(%rsp),%r15 1247.cfi_restore %r15 1248 movq 8(%rsp),%r14 1249.cfi_restore %r14 1250 movq 16(%rsp),%r13 1251.cfi_restore %r13 1252 movq 24(%rsp),%r12 1253.cfi_restore %r12 1254 movq 32(%rsp),%rbx 1255.cfi_restore %rbx 1256 movq 40(%rsp),%rbp 1257.cfi_restore %rbp 1258 leaq 48(%rsp),%rsp 1259.cfi_adjust_cfa_offset -48 1260.Lmul_epilogue: 1261 .byte 0xf3,0xc3 1262.cfi_endproc 1263.size p256_mul_mont,.-p256_mul_mont 1264 1265.type __ecp_nistz256_mul_montq,@function 1266.align 32 1267__ecp_nistz256_mul_montq: 1268.cfi_startproc 1269 1270 1271 movq %rax,%rbp 1272 mulq %r9 1273 movq .Lpoly+8(%rip),%r14 1274 movq %rax,%r8 1275 movq %rbp,%rax 1276 movq %rdx,%r9 1277 1278 mulq %r10 1279 movq .Lpoly+24(%rip),%r15 1280 addq %rax,%r9 1281 movq %rbp,%rax 1282 adcq $0,%rdx 1283 movq %rdx,%r10 1284 1285 mulq %r11 1286 addq %rax,%r10 1287 movq %rbp,%rax 1288 adcq $0,%rdx 1289 movq %rdx,%r11 1290 1291 mulq %r12 1292 addq %rax,%r11 1293 movq %r8,%rax 1294 adcq $0,%rdx 1295 xorq %r13,%r13 1296 movq %rdx,%r12 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 movq %r8,%rbp 1308 shlq $32,%r8 1309 mulq %r15 1310 shrq $32,%rbp 1311 addq %r8,%r9 1312 adcq %rbp,%r10 1313 adcq %rax,%r11 1314 movq 8(%rbx),%rax 1315 adcq %rdx,%r12 1316 adcq $0,%r13 1317 xorq %r8,%r8 1318 1319 1320 1321 movq %rax,%rbp 1322 mulq 0(%rsi) 1323 addq %rax,%r9 1324 movq %rbp,%rax 1325 adcq $0,%rdx 1326 movq %rdx,%rcx 1327 1328 mulq 8(%rsi) 1329 addq %rcx,%r10 1330 adcq $0,%rdx 1331 addq %rax,%r10 1332 movq %rbp,%rax 1333 adcq $0,%rdx 1334 movq %rdx,%rcx 1335 1336 mulq 16(%rsi) 1337 addq %rcx,%r11 1338 adcq $0,%rdx 1339 addq %rax,%r11 1340 movq %rbp,%rax 1341 adcq $0,%rdx 1342 movq %rdx,%rcx 1343 1344 mulq 24(%rsi) 1345 addq %rcx,%r12 1346 adcq $0,%rdx 1347 addq %rax,%r12 1348 movq %r9,%rax 1349 adcq %rdx,%r13 1350 adcq $0,%r8 1351 1352 1353 1354 movq %r9,%rbp 1355 shlq $32,%r9 1356 mulq %r15 1357 shrq $32,%rbp 1358 addq %r9,%r10 1359 adcq %rbp,%r11 1360 adcq %rax,%r12 1361 movq 16(%rbx),%rax 1362 adcq %rdx,%r13 1363 adcq $0,%r8 1364 xorq %r9,%r9 1365 1366 1367 1368 movq %rax,%rbp 1369 mulq 0(%rsi) 1370 addq %rax,%r10 1371 movq %rbp,%rax 1372 adcq $0,%rdx 1373 movq %rdx,%rcx 1374 1375 mulq 8(%rsi) 1376 addq %rcx,%r11 1377 adcq $0,%rdx 1378 addq %rax,%r11 1379 movq %rbp,%rax 1380 adcq $0,%rdx 1381 movq %rdx,%rcx 1382 1383 mulq 16(%rsi) 1384 addq %rcx,%r12 1385 adcq $0,%rdx 1386 addq %rax,%r12 1387 movq %rbp,%rax 1388 adcq $0,%rdx 1389 movq %rdx,%rcx 1390 1391 mulq 24(%rsi) 1392 addq %rcx,%r13 1393 adcq $0,%rdx 1394 addq %rax,%r13 1395 movq %r10,%rax 1396 adcq %rdx,%r8 1397 adcq $0,%r9 1398 1399 1400 1401 movq %r10,%rbp 1402 shlq $32,%r10 1403 mulq %r15 1404 shrq $32,%rbp 1405 addq %r10,%r11 1406 adcq %rbp,%r12 1407 adcq %rax,%r13 1408 movq 24(%rbx),%rax 1409 adcq %rdx,%r8 1410 adcq $0,%r9 1411 xorq %r10,%r10 1412 1413 1414 1415 movq %rax,%rbp 1416 mulq 0(%rsi) 1417 addq %rax,%r11 1418 movq %rbp,%rax 1419 adcq $0,%rdx 1420 movq %rdx,%rcx 1421 1422 mulq 8(%rsi) 1423 addq %rcx,%r12 1424 adcq $0,%rdx 1425 addq %rax,%r12 1426 movq %rbp,%rax 1427 adcq $0,%rdx 1428 movq %rdx,%rcx 1429 1430 mulq 16(%rsi) 1431 addq %rcx,%r13 1432 adcq $0,%rdx 1433 addq %rax,%r13 1434 movq %rbp,%rax 1435 adcq $0,%rdx 1436 movq %rdx,%rcx 1437 1438 mulq 24(%rsi) 1439 addq %rcx,%r8 1440 adcq $0,%rdx 1441 addq %rax,%r8 1442 movq %r11,%rax 1443 adcq %rdx,%r9 1444 adcq $0,%r10 1445 1446 1447 1448 movq %r11,%rbp 1449 shlq $32,%r11 1450 mulq %r15 1451 shrq $32,%rbp 1452 addq %r11,%r12 1453 adcq %rbp,%r13 1454 movq %r12,%rcx 1455 adcq %rax,%r8 1456 adcq %rdx,%r9 1457 movq %r13,%rbp 1458 adcq $0,%r10 1459 1460 1461 1462 subq $-1,%r12 1463 movq %r8,%rbx 1464 sbbq %r14,%r13 1465 sbbq $0,%r8 1466 movq %r9,%rdx 1467 sbbq %r15,%r9 1468 sbbq $0,%r10 1469 1470 cmovcq %rcx,%r12 1471 cmovcq %rbp,%r13 1472 movq %r12,0(%rdi) 1473 cmovcq %rbx,%r8 1474 movq %r13,8(%rdi) 1475 cmovcq %rdx,%r9 1476 movq %r8,16(%rdi) 1477 movq %r9,24(%rdi) 1478 1479 .byte 0xf3,0xc3 1480.cfi_endproc 1481.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq 1482 1483 1484 1485 1486 1487 1488 1489 1490.globl p256_sqr_mont 1491.hidden p256_sqr_mont 1492.type p256_sqr_mont,@function 1493.align 32 1494p256_sqr_mont: 1495.cfi_startproc 1496 leaq OPENSSL_ia32cap_P(%rip),%rcx 1497 movq 8(%rcx),%rcx 1498 andl $0x80100,%ecx 1499 pushq %rbp 1500.cfi_adjust_cfa_offset 8 1501.cfi_offset %rbp,-16 1502 pushq %rbx 1503.cfi_adjust_cfa_offset 8 1504.cfi_offset %rbx,-24 1505 pushq %r12 1506.cfi_adjust_cfa_offset 8 1507.cfi_offset %r12,-32 1508 pushq %r13 1509.cfi_adjust_cfa_offset 8 1510.cfi_offset %r13,-40 1511 pushq %r14 1512.cfi_adjust_cfa_offset 8 1513.cfi_offset %r14,-48 1514 pushq %r15 1515.cfi_adjust_cfa_offset 8 1516.cfi_offset %r15,-56 1517.Lsqr_body: 1518 cmpl $0x80100,%ecx 1519 je .Lsqr_montx 1520 movq 0(%rsi),%rax 1521 movq 8(%rsi),%r14 1522 movq 16(%rsi),%r15 1523 movq 24(%rsi),%r8 1524 1525 call __ecp_nistz256_sqr_montq 1526 jmp .Lsqr_mont_done 1527 1528.align 32 1529.Lsqr_montx: 1530 movq 0(%rsi),%rdx 1531 movq 8(%rsi),%r14 1532 movq 16(%rsi),%r15 1533 movq 24(%rsi),%r8 1534 leaq -128(%rsi),%rsi 1535 1536 call __ecp_nistz256_sqr_montx 1537.Lsqr_mont_done: 1538 movq 0(%rsp),%r15 1539.cfi_restore %r15 1540 movq 8(%rsp),%r14 1541.cfi_restore %r14 1542 movq 16(%rsp),%r13 1543.cfi_restore %r13 1544 movq 24(%rsp),%r12 1545.cfi_restore %r12 1546 movq 32(%rsp),%rbx 1547.cfi_restore %rbx 1548 movq 40(%rsp),%rbp 1549.cfi_restore %rbp 1550 leaq 48(%rsp),%rsp 1551.cfi_adjust_cfa_offset -48 1552.Lsqr_epilogue: 1553 .byte 0xf3,0xc3 1554.cfi_endproc 1555.size p256_sqr_mont,.-p256_sqr_mont 1556 1557.type __ecp_nistz256_sqr_montq,@function 1558.align 32 1559__ecp_nistz256_sqr_montq: 1560.cfi_startproc 1561 movq %rax,%r13 1562 mulq %r14 1563 movq %rax,%r9 1564 movq %r15,%rax 1565 movq %rdx,%r10 1566 1567 mulq %r13 1568 addq %rax,%r10 1569 movq %r8,%rax 1570 adcq $0,%rdx 1571 movq %rdx,%r11 1572 1573 mulq %r13 1574 addq %rax,%r11 1575 movq %r15,%rax 1576 adcq $0,%rdx 1577 movq %rdx,%r12 1578 1579 1580 mulq %r14 1581 addq %rax,%r11 1582 movq %r8,%rax 1583 adcq $0,%rdx 1584 movq %rdx,%rbp 1585 1586 mulq %r14 1587 addq %rax,%r12 1588 movq %r8,%rax 1589 adcq $0,%rdx 1590 addq %rbp,%r12 1591 movq %rdx,%r13 1592 adcq $0,%r13 1593 1594 1595 mulq %r15 1596 xorq %r15,%r15 1597 addq %rax,%r13 1598 movq 0(%rsi),%rax 1599 movq %rdx,%r14 1600 adcq $0,%r14 1601 1602 addq %r9,%r9 1603 adcq %r10,%r10 1604 adcq %r11,%r11 1605 adcq %r12,%r12 1606 adcq %r13,%r13 1607 adcq %r14,%r14 1608 adcq $0,%r15 1609 1610 mulq %rax 1611 movq %rax,%r8 1612 movq 8(%rsi),%rax 1613 movq %rdx,%rcx 1614 1615 mulq %rax 1616 addq %rcx,%r9 1617 adcq %rax,%r10 1618 movq 16(%rsi),%rax 1619 adcq $0,%rdx 1620 movq %rdx,%rcx 1621 1622 mulq %rax 1623 addq %rcx,%r11 1624 adcq %rax,%r12 1625 movq 24(%rsi),%rax 1626 adcq $0,%rdx 1627 movq %rdx,%rcx 1628 1629 mulq %rax 1630 addq %rcx,%r13 1631 adcq %rax,%r14 1632 movq %r8,%rax 1633 adcq %rdx,%r15 1634 1635 movq .Lpoly+8(%rip),%rsi 1636 movq .Lpoly+24(%rip),%rbp 1637 1638 1639 1640 1641 movq %r8,%rcx 1642 shlq $32,%r8 1643 mulq %rbp 1644 shrq $32,%rcx 1645 addq %r8,%r9 1646 adcq %rcx,%r10 1647 adcq %rax,%r11 1648 movq %r9,%rax 1649 adcq $0,%rdx 1650 1651 1652 1653 movq %r9,%rcx 1654 shlq $32,%r9 1655 movq %rdx,%r8 1656 mulq %rbp 1657 shrq $32,%rcx 1658 addq %r9,%r10 1659 adcq %rcx,%r11 1660 adcq %rax,%r8 1661 movq %r10,%rax 1662 adcq $0,%rdx 1663 1664 1665 1666 movq %r10,%rcx 1667 shlq $32,%r10 1668 movq %rdx,%r9 1669 mulq %rbp 1670 shrq $32,%rcx 1671 addq %r10,%r11 1672 adcq %rcx,%r8 1673 adcq %rax,%r9 1674 movq %r11,%rax 1675 adcq $0,%rdx 1676 1677 1678 1679 movq %r11,%rcx 1680 shlq $32,%r11 1681 movq %rdx,%r10 1682 mulq %rbp 1683 shrq $32,%rcx 1684 addq %r11,%r8 1685 adcq %rcx,%r9 1686 adcq %rax,%r10 1687 adcq $0,%rdx 1688 xorq %r11,%r11 1689 1690 1691 1692 addq %r8,%r12 1693 adcq %r9,%r13 1694 movq %r12,%r8 1695 adcq %r10,%r14 1696 adcq %rdx,%r15 1697 movq %r13,%r9 1698 adcq $0,%r11 1699 1700 subq $-1,%r12 1701 movq %r14,%r10 1702 sbbq %rsi,%r13 1703 sbbq $0,%r14 1704 movq %r15,%rcx 1705 sbbq %rbp,%r15 1706 sbbq $0,%r11 1707 1708 cmovcq %r8,%r12 1709 cmovcq %r9,%r13 1710 movq %r12,0(%rdi) 1711 cmovcq %r10,%r14 1712 movq %r13,8(%rdi) 1713 cmovcq %rcx,%r15 1714 movq %r14,16(%rdi) 1715 movq %r15,24(%rdi) 1716 1717 .byte 0xf3,0xc3 1718.cfi_endproc 1719.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq 1720.type __ecp_nistz256_mul_montx,@function 1721.align 32 1722__ecp_nistz256_mul_montx: 1723.cfi_startproc 1724 1725 1726 mulxq %r9,%r8,%r9 1727 mulxq %r10,%rcx,%r10 1728 movq $32,%r14 1729 xorq %r13,%r13 1730 mulxq %r11,%rbp,%r11 1731 movq .Lpoly+24(%rip),%r15 1732 adcq %rcx,%r9 1733 mulxq %r12,%rcx,%r12 1734 movq %r8,%rdx 1735 adcq %rbp,%r10 1736 shlxq %r14,%r8,%rbp 1737 adcq %rcx,%r11 1738 shrxq %r14,%r8,%rcx 1739 adcq $0,%r12 1740 1741 1742 1743 addq %rbp,%r9 1744 adcq %rcx,%r10 1745 1746 mulxq %r15,%rcx,%rbp 1747 movq 8(%rbx),%rdx 1748 adcq %rcx,%r11 1749 adcq %rbp,%r12 1750 adcq $0,%r13 1751 xorq %r8,%r8 1752 1753 1754 1755 mulxq 0+128(%rsi),%rcx,%rbp 1756 adcxq %rcx,%r9 1757 adoxq %rbp,%r10 1758 1759 mulxq 8+128(%rsi),%rcx,%rbp 1760 adcxq %rcx,%r10 1761 adoxq %rbp,%r11 1762 1763 mulxq 16+128(%rsi),%rcx,%rbp 1764 adcxq %rcx,%r11 1765 adoxq %rbp,%r12 1766 1767 mulxq 24+128(%rsi),%rcx,%rbp 1768 movq %r9,%rdx 1769 adcxq %rcx,%r12 1770 shlxq %r14,%r9,%rcx 1771 adoxq %rbp,%r13 1772 shrxq %r14,%r9,%rbp 1773 1774 adcxq %r8,%r13 1775 adoxq %r8,%r8 1776 adcq $0,%r8 1777 1778 1779 1780 addq %rcx,%r10 1781 adcq %rbp,%r11 1782 1783 mulxq %r15,%rcx,%rbp 1784 movq 16(%rbx),%rdx 1785 adcq %rcx,%r12 1786 adcq %rbp,%r13 1787 adcq $0,%r8 1788 xorq %r9,%r9 1789 1790 1791 1792 mulxq 0+128(%rsi),%rcx,%rbp 1793 adcxq %rcx,%r10 1794 adoxq %rbp,%r11 1795 1796 mulxq 8+128(%rsi),%rcx,%rbp 1797 adcxq %rcx,%r11 1798 adoxq %rbp,%r12 1799 1800 mulxq 16+128(%rsi),%rcx,%rbp 1801 adcxq %rcx,%r12 1802 adoxq %rbp,%r13 1803 1804 mulxq 24+128(%rsi),%rcx,%rbp 1805 movq %r10,%rdx 1806 adcxq %rcx,%r13 1807 shlxq %r14,%r10,%rcx 1808 adoxq %rbp,%r8 1809 shrxq %r14,%r10,%rbp 1810 1811 adcxq %r9,%r8 1812 adoxq %r9,%r9 1813 adcq $0,%r9 1814 1815 1816 1817 addq %rcx,%r11 1818 adcq %rbp,%r12 1819 1820 mulxq %r15,%rcx,%rbp 1821 movq 24(%rbx),%rdx 1822 adcq %rcx,%r13 1823 adcq %rbp,%r8 1824 adcq $0,%r9 1825 xorq %r10,%r10 1826 1827 1828 1829 mulxq 0+128(%rsi),%rcx,%rbp 1830 adcxq %rcx,%r11 1831 adoxq %rbp,%r12 1832 1833 mulxq 8+128(%rsi),%rcx,%rbp 1834 adcxq %rcx,%r12 1835 adoxq %rbp,%r13 1836 1837 mulxq 16+128(%rsi),%rcx,%rbp 1838 adcxq %rcx,%r13 1839 adoxq %rbp,%r8 1840 1841 mulxq 24+128(%rsi),%rcx,%rbp 1842 movq %r11,%rdx 1843 adcxq %rcx,%r8 1844 shlxq %r14,%r11,%rcx 1845 adoxq %rbp,%r9 1846 shrxq %r14,%r11,%rbp 1847 1848 adcxq %r10,%r9 1849 adoxq %r10,%r10 1850 adcq $0,%r10 1851 1852 1853 1854 addq %rcx,%r12 1855 adcq %rbp,%r13 1856 1857 mulxq %r15,%rcx,%rbp 1858 movq %r12,%rbx 1859 movq .Lpoly+8(%rip),%r14 1860 adcq %rcx,%r8 1861 movq %r13,%rdx 1862 adcq %rbp,%r9 1863 adcq $0,%r10 1864 1865 1866 1867 xorl %eax,%eax 1868 movq %r8,%rcx 1869 sbbq $-1,%r12 1870 sbbq %r14,%r13 1871 sbbq $0,%r8 1872 movq %r9,%rbp 1873 sbbq %r15,%r9 1874 sbbq $0,%r10 1875 1876 cmovcq %rbx,%r12 1877 cmovcq %rdx,%r13 1878 movq %r12,0(%rdi) 1879 cmovcq %rcx,%r8 1880 movq %r13,8(%rdi) 1881 cmovcq %rbp,%r9 1882 movq %r8,16(%rdi) 1883 movq %r9,24(%rdi) 1884 1885 .byte 0xf3,0xc3 1886.cfi_endproc 1887.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx 1888 1889.type __ecp_nistz256_sqr_montx,@function 1890.align 32 1891__ecp_nistz256_sqr_montx: 1892.cfi_startproc 1893 mulxq %r14,%r9,%r10 1894 mulxq %r15,%rcx,%r11 1895 xorl %eax,%eax 1896 adcq %rcx,%r10 1897 mulxq %r8,%rbp,%r12 1898 movq %r14,%rdx 1899 adcq %rbp,%r11 1900 adcq $0,%r12 1901 xorq %r13,%r13 1902 1903 1904 mulxq %r15,%rcx,%rbp 1905 adcxq %rcx,%r11 1906 adoxq %rbp,%r12 1907 1908 mulxq %r8,%rcx,%rbp 1909 movq %r15,%rdx 1910 adcxq %rcx,%r12 1911 adoxq %rbp,%r13 1912 adcq $0,%r13 1913 1914 1915 mulxq %r8,%rcx,%r14 1916 movq 0+128(%rsi),%rdx 1917 xorq %r15,%r15 1918 adcxq %r9,%r9 1919 adoxq %rcx,%r13 1920 adcxq %r10,%r10 1921 adoxq %r15,%r14 1922 1923 mulxq %rdx,%r8,%rbp 1924 movq 8+128(%rsi),%rdx 1925 adcxq %r11,%r11 1926 adoxq %rbp,%r9 1927 adcxq %r12,%r12 1928 mulxq %rdx,%rcx,%rax 1929 movq 16+128(%rsi),%rdx 1930 adcxq %r13,%r13 1931 adoxq %rcx,%r10 1932 adcxq %r14,%r14 1933.byte 0x67 1934 mulxq %rdx,%rcx,%rbp 1935 movq 24+128(%rsi),%rdx 1936 adoxq %rax,%r11 1937 adcxq %r15,%r15 1938 adoxq %rcx,%r12 1939 movq $32,%rsi 1940 adoxq %rbp,%r13 1941.byte 0x67,0x67 1942 mulxq %rdx,%rcx,%rax 1943 movq .Lpoly+24(%rip),%rdx 1944 adoxq %rcx,%r14 1945 shlxq %rsi,%r8,%rcx 1946 adoxq %rax,%r15 1947 shrxq %rsi,%r8,%rax 1948 movq %rdx,%rbp 1949 1950 1951 addq %rcx,%r9 1952 adcq %rax,%r10 1953 1954 mulxq %r8,%rcx,%r8 1955 adcq %rcx,%r11 1956 shlxq %rsi,%r9,%rcx 1957 adcq $0,%r8 1958 shrxq %rsi,%r9,%rax 1959 1960 1961 addq %rcx,%r10 1962 adcq %rax,%r11 1963 1964 mulxq %r9,%rcx,%r9 1965 adcq %rcx,%r8 1966 shlxq %rsi,%r10,%rcx 1967 adcq $0,%r9 1968 shrxq %rsi,%r10,%rax 1969 1970 1971 addq %rcx,%r11 1972 adcq %rax,%r8 1973 1974 mulxq %r10,%rcx,%r10 1975 adcq %rcx,%r9 1976 shlxq %rsi,%r11,%rcx 1977 adcq $0,%r10 1978 shrxq %rsi,%r11,%rax 1979 1980 1981 addq %rcx,%r8 1982 adcq %rax,%r9 1983 1984 mulxq %r11,%rcx,%r11 1985 adcq %rcx,%r10 1986 adcq $0,%r11 1987 1988 xorq %rdx,%rdx 1989 addq %r8,%r12 1990 movq .Lpoly+8(%rip),%rsi 1991 adcq %r9,%r13 1992 movq %r12,%r8 1993 adcq %r10,%r14 1994 adcq %r11,%r15 1995 movq %r13,%r9 1996 adcq $0,%rdx 1997 1998 subq $-1,%r12 1999 movq %r14,%r10 2000 sbbq %rsi,%r13 2001 sbbq $0,%r14 2002 movq %r15,%r11 2003 sbbq %rbp,%r15 2004 sbbq $0,%rdx 2005 2006 cmovcq %r8,%r12 2007 cmovcq %r9,%r13 2008 movq %r12,0(%rdi) 2009 cmovcq %r10,%r14 2010 movq %r13,8(%rdi) 2011 cmovcq %r11,%r15 2012 movq %r14,16(%rdi) 2013 movq %r15,24(%rdi) 2014 2015 .byte 0xf3,0xc3 2016.cfi_endproc 2017.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx 2018 2019 2020.globl nistz256_select_w5 2021.hidden nistz256_select_w5 2022.type nistz256_select_w5,@function 2023.align 32 2024nistz256_select_w5: 2025.cfi_startproc 2026 leaq OPENSSL_ia32cap_P(%rip),%rax 2027 movq 8(%rax),%rax 2028 testl $32,%eax 2029 jnz .Lavx2_select_w5 2030 movdqa .LOne(%rip),%xmm0 2031 movd %edx,%xmm1 2032 2033 pxor %xmm2,%xmm2 2034 pxor %xmm3,%xmm3 2035 pxor %xmm4,%xmm4 2036 pxor %xmm5,%xmm5 2037 pxor %xmm6,%xmm6 2038 pxor %xmm7,%xmm7 2039 2040 movdqa %xmm0,%xmm8 2041 pshufd $0,%xmm1,%xmm1 2042 2043 movq $16,%rax 2044.Lselect_loop_sse_w5: 2045 2046 movdqa %xmm8,%xmm15 2047 paddd %xmm0,%xmm8 2048 pcmpeqd %xmm1,%xmm15 2049 2050 movdqa 0(%rsi),%xmm9 2051 movdqa 16(%rsi),%xmm10 2052 movdqa 32(%rsi),%xmm11 2053 movdqa 48(%rsi),%xmm12 2054 movdqa 64(%rsi),%xmm13 2055 movdqa 80(%rsi),%xmm14 2056 leaq 96(%rsi),%rsi 2057 2058 pand %xmm15,%xmm9 2059 pand %xmm15,%xmm10 2060 por %xmm9,%xmm2 2061 pand %xmm15,%xmm11 2062 por %xmm10,%xmm3 2063 pand %xmm15,%xmm12 2064 por %xmm11,%xmm4 2065 pand %xmm15,%xmm13 2066 por %xmm12,%xmm5 2067 pand %xmm15,%xmm14 2068 por %xmm13,%xmm6 2069 por %xmm14,%xmm7 2070 2071 decq %rax 2072 jnz .Lselect_loop_sse_w5 2073 2074 movdqu %xmm2,0(%rdi) 2075 movdqu %xmm3,16(%rdi) 2076 movdqu %xmm4,32(%rdi) 2077 movdqu %xmm5,48(%rdi) 2078 movdqu %xmm6,64(%rdi) 2079 movdqu %xmm7,80(%rdi) 2080 .byte 0xf3,0xc3 2081.cfi_endproc 2082.LSEH_end_nistz256_select_w5: 2083.size nistz256_select_w5,.-nistz256_select_w5 2084 2085 2086 2087.globl nistz256_select_w7 2088.hidden nistz256_select_w7 2089.type nistz256_select_w7,@function 2090.align 32 2091nistz256_select_w7: 2092.cfi_startproc 2093 leaq OPENSSL_ia32cap_P(%rip),%rax 2094 movq 8(%rax),%rax 2095 testl $32,%eax 2096 jnz .Lavx2_select_w7 2097 movdqa .LOne(%rip),%xmm8 2098 movd %edx,%xmm1 2099 2100 pxor %xmm2,%xmm2 2101 pxor %xmm3,%xmm3 2102 pxor %xmm4,%xmm4 2103 pxor %xmm5,%xmm5 2104 2105 movdqa %xmm8,%xmm0 2106 pshufd $0,%xmm1,%xmm1 2107 movq $64,%rax 2108 2109.Lselect_loop_sse_w7: 2110 movdqa %xmm8,%xmm15 2111 paddd %xmm0,%xmm8 2112 movdqa 0(%rsi),%xmm9 2113 movdqa 16(%rsi),%xmm10 2114 pcmpeqd %xmm1,%xmm15 2115 movdqa 32(%rsi),%xmm11 2116 movdqa 48(%rsi),%xmm12 2117 leaq 64(%rsi),%rsi 2118 2119 pand %xmm15,%xmm9 2120 pand %xmm15,%xmm10 2121 por %xmm9,%xmm2 2122 pand %xmm15,%xmm11 2123 por %xmm10,%xmm3 2124 pand %xmm15,%xmm12 2125 por %xmm11,%xmm4 2126 prefetcht0 255(%rsi) 2127 por %xmm12,%xmm5 2128 2129 decq %rax 2130 jnz .Lselect_loop_sse_w7 2131 2132 movdqu %xmm2,0(%rdi) 2133 movdqu %xmm3,16(%rdi) 2134 movdqu %xmm4,32(%rdi) 2135 movdqu %xmm5,48(%rdi) 2136 .byte 0xf3,0xc3 2137.cfi_endproc 2138.LSEH_end_nistz256_select_w7: 2139.size nistz256_select_w7,.-nistz256_select_w7 2140 2141 2142.type ecp_nistz256_avx2_select_w5,@function 2143.align 32 2144ecp_nistz256_avx2_select_w5: 2145.cfi_startproc 2146.Lavx2_select_w5: 2147 vzeroupper 2148 vmovdqa .LTwo(%rip),%ymm0 2149 2150 vpxor %ymm2,%ymm2,%ymm2 2151 vpxor %ymm3,%ymm3,%ymm3 2152 vpxor %ymm4,%ymm4,%ymm4 2153 2154 vmovdqa .LOne(%rip),%ymm5 2155 vmovdqa .LTwo(%rip),%ymm10 2156 2157 vmovd %edx,%xmm1 2158 vpermd %ymm1,%ymm2,%ymm1 2159 2160 movq $8,%rax 2161.Lselect_loop_avx2_w5: 2162 2163 vmovdqa 0(%rsi),%ymm6 2164 vmovdqa 32(%rsi),%ymm7 2165 vmovdqa 64(%rsi),%ymm8 2166 2167 vmovdqa 96(%rsi),%ymm11 2168 vmovdqa 128(%rsi),%ymm12 2169 vmovdqa 160(%rsi),%ymm13 2170 2171 vpcmpeqd %ymm1,%ymm5,%ymm9 2172 vpcmpeqd %ymm1,%ymm10,%ymm14 2173 2174 vpaddd %ymm0,%ymm5,%ymm5 2175 vpaddd %ymm0,%ymm10,%ymm10 2176 leaq 192(%rsi),%rsi 2177 2178 vpand %ymm9,%ymm6,%ymm6 2179 vpand %ymm9,%ymm7,%ymm7 2180 vpand %ymm9,%ymm8,%ymm8 2181 vpand %ymm14,%ymm11,%ymm11 2182 vpand %ymm14,%ymm12,%ymm12 2183 vpand %ymm14,%ymm13,%ymm13 2184 2185 vpxor %ymm6,%ymm2,%ymm2 2186 vpxor %ymm7,%ymm3,%ymm3 2187 vpxor %ymm8,%ymm4,%ymm4 2188 vpxor %ymm11,%ymm2,%ymm2 2189 vpxor %ymm12,%ymm3,%ymm3 2190 vpxor %ymm13,%ymm4,%ymm4 2191 2192 decq %rax 2193 jnz .Lselect_loop_avx2_w5 2194 2195 vmovdqu %ymm2,0(%rdi) 2196 vmovdqu %ymm3,32(%rdi) 2197 vmovdqu %ymm4,64(%rdi) 2198 vzeroupper 2199 .byte 0xf3,0xc3 2200.cfi_endproc 2201.LSEH_end_ecp_nistz256_avx2_select_w5: 2202.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 2203 2204 2205 2206.type ecp_nistz256_avx2_select_w7,@function 2207.align 32 2208ecp_nistz256_avx2_select_w7: 2209.cfi_startproc 2210.Lavx2_select_w7: 2211 vzeroupper 2212 vmovdqa .LThree(%rip),%ymm0 2213 2214 vpxor %ymm2,%ymm2,%ymm2 2215 vpxor %ymm3,%ymm3,%ymm3 2216 2217 vmovdqa .LOne(%rip),%ymm4 2218 vmovdqa .LTwo(%rip),%ymm8 2219 vmovdqa .LThree(%rip),%ymm12 2220 2221 vmovd %edx,%xmm1 2222 vpermd %ymm1,%ymm2,%ymm1 2223 2224 2225 movq $21,%rax 2226.Lselect_loop_avx2_w7: 2227 2228 vmovdqa 0(%rsi),%ymm5 2229 vmovdqa 32(%rsi),%ymm6 2230 2231 vmovdqa 64(%rsi),%ymm9 2232 vmovdqa 96(%rsi),%ymm10 2233 2234 vmovdqa 128(%rsi),%ymm13 2235 vmovdqa 160(%rsi),%ymm14 2236 2237 vpcmpeqd %ymm1,%ymm4,%ymm7 2238 vpcmpeqd %ymm1,%ymm8,%ymm11 2239 vpcmpeqd %ymm1,%ymm12,%ymm15 2240 2241 vpaddd %ymm0,%ymm4,%ymm4 2242 vpaddd %ymm0,%ymm8,%ymm8 2243 vpaddd %ymm0,%ymm12,%ymm12 2244 leaq 192(%rsi),%rsi 2245 2246 vpand %ymm7,%ymm5,%ymm5 2247 vpand %ymm7,%ymm6,%ymm6 2248 vpand %ymm11,%ymm9,%ymm9 2249 vpand %ymm11,%ymm10,%ymm10 2250 vpand %ymm15,%ymm13,%ymm13 2251 vpand %ymm15,%ymm14,%ymm14 2252 2253 vpxor %ymm5,%ymm2,%ymm2 2254 vpxor %ymm6,%ymm3,%ymm3 2255 vpxor %ymm9,%ymm2,%ymm2 2256 vpxor %ymm10,%ymm3,%ymm3 2257 vpxor %ymm13,%ymm2,%ymm2 2258 vpxor %ymm14,%ymm3,%ymm3 2259 2260 decq %rax 2261 jnz .Lselect_loop_avx2_w7 2262 2263 2264 vmovdqa 0(%rsi),%ymm5 2265 vmovdqa 32(%rsi),%ymm6 2266 2267 vpcmpeqd %ymm1,%ymm4,%ymm7 2268 2269 vpand %ymm7,%ymm5,%ymm5 2270 vpand %ymm7,%ymm6,%ymm6 2271 2272 vpxor %ymm5,%ymm2,%ymm2 2273 vpxor %ymm6,%ymm3,%ymm3 2274 2275 vmovdqu %ymm2,0(%rdi) 2276 vmovdqu %ymm3,32(%rdi) 2277 vzeroupper 2278 .byte 0xf3,0xc3 2279.cfi_endproc 2280.LSEH_end_ecp_nistz256_avx2_select_w7: 2281.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 2282.type __ecp_nistz256_add_toq,@function 2283.align 32 2284__ecp_nistz256_add_toq: 2285.cfi_startproc 2286 xorq %r11,%r11 2287 addq 0(%rbx),%r12 2288 adcq 8(%rbx),%r13 2289 movq %r12,%rax 2290 adcq 16(%rbx),%r8 2291 adcq 24(%rbx),%r9 2292 movq %r13,%rbp 2293 adcq $0,%r11 2294 2295 subq $-1,%r12 2296 movq %r8,%rcx 2297 sbbq %r14,%r13 2298 sbbq $0,%r8 2299 movq %r9,%r10 2300 sbbq %r15,%r9 2301 sbbq $0,%r11 2302 2303 cmovcq %rax,%r12 2304 cmovcq %rbp,%r13 2305 movq %r12,0(%rdi) 2306 cmovcq %rcx,%r8 2307 movq %r13,8(%rdi) 2308 cmovcq %r10,%r9 2309 movq %r8,16(%rdi) 2310 movq %r9,24(%rdi) 2311 2312 .byte 0xf3,0xc3 2313.cfi_endproc 2314.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq 2315 2316.type __ecp_nistz256_sub_fromq,@function 2317.align 32 2318__ecp_nistz256_sub_fromq: 2319.cfi_startproc 2320 subq 0(%rbx),%r12 2321 sbbq 8(%rbx),%r13 2322 movq %r12,%rax 2323 sbbq 16(%rbx),%r8 2324 sbbq 24(%rbx),%r9 2325 movq %r13,%rbp 2326 sbbq %r11,%r11 2327 2328 addq $-1,%r12 2329 movq %r8,%rcx 2330 adcq %r14,%r13 2331 adcq $0,%r8 2332 movq %r9,%r10 2333 adcq %r15,%r9 2334 testq %r11,%r11 2335 2336 cmovzq %rax,%r12 2337 cmovzq %rbp,%r13 2338 movq %r12,0(%rdi) 2339 cmovzq %rcx,%r8 2340 movq %r13,8(%rdi) 2341 cmovzq %r10,%r9 2342 movq %r8,16(%rdi) 2343 movq %r9,24(%rdi) 2344 2345 .byte 0xf3,0xc3 2346.cfi_endproc 2347.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq 2348 2349.type __ecp_nistz256_subq,@function 2350.align 32 2351__ecp_nistz256_subq: 2352.cfi_startproc 2353 subq %r12,%rax 2354 sbbq %r13,%rbp 2355 movq %rax,%r12 2356 sbbq %r8,%rcx 2357 sbbq %r9,%r10 2358 movq %rbp,%r13 2359 sbbq %r11,%r11 2360 2361 addq $-1,%rax 2362 movq %rcx,%r8 2363 adcq %r14,%rbp 2364 adcq $0,%rcx 2365 movq %r10,%r9 2366 adcq %r15,%r10 2367 testq %r11,%r11 2368 2369 cmovnzq %rax,%r12 2370 cmovnzq %rbp,%r13 2371 cmovnzq %rcx,%r8 2372 cmovnzq %r10,%r9 2373 2374 .byte 0xf3,0xc3 2375.cfi_endproc 2376.size __ecp_nistz256_subq,.-__ecp_nistz256_subq 2377 2378.type __ecp_nistz256_mul_by_2q,@function 2379.align 32 2380__ecp_nistz256_mul_by_2q: 2381.cfi_startproc 2382 xorq %r11,%r11 2383 addq %r12,%r12 2384 adcq %r13,%r13 2385 movq %r12,%rax 2386 adcq %r8,%r8 2387 adcq %r9,%r9 2388 movq %r13,%rbp 2389 adcq $0,%r11 2390 2391 subq $-1,%r12 2392 movq %r8,%rcx 2393 sbbq %r14,%r13 2394 sbbq $0,%r8 2395 movq %r9,%r10 2396 sbbq %r15,%r9 2397 sbbq $0,%r11 2398 2399 cmovcq %rax,%r12 2400 cmovcq %rbp,%r13 2401 movq %r12,0(%rdi) 2402 cmovcq %rcx,%r8 2403 movq %r13,8(%rdi) 2404 cmovcq %r10,%r9 2405 movq %r8,16(%rdi) 2406 movq %r9,24(%rdi) 2407 2408 .byte 0xf3,0xc3 2409.cfi_endproc 2410.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q 2411.globl p256_point_double 2412.hidden p256_point_double 2413.type p256_point_double,@function 2414.align 32 2415p256_point_double: 2416.cfi_startproc 2417 leaq OPENSSL_ia32cap_P(%rip),%rcx 2418 movq 8(%rcx),%rcx 2419 andl $0x80100,%ecx 2420 cmpl $0x80100,%ecx 2421 je .Lpoint_doublex 2422 pushq %rbp 2423.cfi_adjust_cfa_offset 8 2424.cfi_offset %rbp,-16 2425 pushq %rbx 2426.cfi_adjust_cfa_offset 8 2427.cfi_offset %rbx,-24 2428 pushq %r12 2429.cfi_adjust_cfa_offset 8 2430.cfi_offset %r12,-32 2431 pushq %r13 2432.cfi_adjust_cfa_offset 8 2433.cfi_offset %r13,-40 2434 pushq %r14 2435.cfi_adjust_cfa_offset 8 2436.cfi_offset %r14,-48 2437 pushq %r15 2438.cfi_adjust_cfa_offset 8 2439.cfi_offset %r15,-56 2440 subq $160+8,%rsp 2441.cfi_adjust_cfa_offset 32*5+8 2442.Lpoint_doubleq_body: 2443 2444.Lpoint_double_shortcutq: 2445 movdqu 0(%rsi),%xmm0 2446 movq %rsi,%rbx 2447 movdqu 16(%rsi),%xmm1 2448 movq 32+0(%rsi),%r12 2449 movq 32+8(%rsi),%r13 2450 movq 32+16(%rsi),%r8 2451 movq 32+24(%rsi),%r9 2452 movq .Lpoly+8(%rip),%r14 2453 movq .Lpoly+24(%rip),%r15 2454 movdqa %xmm0,96(%rsp) 2455 movdqa %xmm1,96+16(%rsp) 2456 leaq 32(%rdi),%r10 2457 leaq 64(%rdi),%r11 2458.byte 102,72,15,110,199 2459.byte 102,73,15,110,202 2460.byte 102,73,15,110,211 2461 2462 leaq 0(%rsp),%rdi 2463 call __ecp_nistz256_mul_by_2q 2464 2465 movq 64+0(%rsi),%rax 2466 movq 64+8(%rsi),%r14 2467 movq 64+16(%rsi),%r15 2468 movq 64+24(%rsi),%r8 2469 leaq 64-0(%rsi),%rsi 2470 leaq 64(%rsp),%rdi 2471 call __ecp_nistz256_sqr_montq 2472 2473 movq 0+0(%rsp),%rax 2474 movq 8+0(%rsp),%r14 2475 leaq 0+0(%rsp),%rsi 2476 movq 16+0(%rsp),%r15 2477 movq 24+0(%rsp),%r8 2478 leaq 0(%rsp),%rdi 2479 call __ecp_nistz256_sqr_montq 2480 2481 movq 32(%rbx),%rax 2482 movq 64+0(%rbx),%r9 2483 movq 64+8(%rbx),%r10 2484 movq 64+16(%rbx),%r11 2485 movq 64+24(%rbx),%r12 2486 leaq 64-0(%rbx),%rsi 2487 leaq 32(%rbx),%rbx 2488.byte 102,72,15,126,215 2489 call __ecp_nistz256_mul_montq 2490 call __ecp_nistz256_mul_by_2q 2491 2492 movq 96+0(%rsp),%r12 2493 movq 96+8(%rsp),%r13 2494 leaq 64(%rsp),%rbx 2495 movq 96+16(%rsp),%r8 2496 movq 96+24(%rsp),%r9 2497 leaq 32(%rsp),%rdi 2498 call __ecp_nistz256_add_toq 2499 2500 movq 96+0(%rsp),%r12 2501 movq 96+8(%rsp),%r13 2502 leaq 64(%rsp),%rbx 2503 movq 96+16(%rsp),%r8 2504 movq 96+24(%rsp),%r9 2505 leaq 64(%rsp),%rdi 2506 call __ecp_nistz256_sub_fromq 2507 2508 movq 0+0(%rsp),%rax 2509 movq 8+0(%rsp),%r14 2510 leaq 0+0(%rsp),%rsi 2511 movq 16+0(%rsp),%r15 2512 movq 24+0(%rsp),%r8 2513.byte 102,72,15,126,207 2514 call __ecp_nistz256_sqr_montq 2515 xorq %r9,%r9 2516 movq %r12,%rax 2517 addq $-1,%r12 2518 movq %r13,%r10 2519 adcq %rsi,%r13 2520 movq %r14,%rcx 2521 adcq $0,%r14 2522 movq %r15,%r8 2523 adcq %rbp,%r15 2524 adcq $0,%r9 2525 xorq %rsi,%rsi 2526 testq $1,%rax 2527 2528 cmovzq %rax,%r12 2529 cmovzq %r10,%r13 2530 cmovzq %rcx,%r14 2531 cmovzq %r8,%r15 2532 cmovzq %rsi,%r9 2533 2534 movq %r13,%rax 2535 shrq $1,%r12 2536 shlq $63,%rax 2537 movq %r14,%r10 2538 shrq $1,%r13 2539 orq %rax,%r12 2540 shlq $63,%r10 2541 movq %r15,%rcx 2542 shrq $1,%r14 2543 orq %r10,%r13 2544 shlq $63,%rcx 2545 movq %r12,0(%rdi) 2546 shrq $1,%r15 2547 movq %r13,8(%rdi) 2548 shlq $63,%r9 2549 orq %rcx,%r14 2550 orq %r9,%r15 2551 movq %r14,16(%rdi) 2552 movq %r15,24(%rdi) 2553 movq 64(%rsp),%rax 2554 leaq 64(%rsp),%rbx 2555 movq 0+32(%rsp),%r9 2556 movq 8+32(%rsp),%r10 2557 leaq 0+32(%rsp),%rsi 2558 movq 16+32(%rsp),%r11 2559 movq 24+32(%rsp),%r12 2560 leaq 32(%rsp),%rdi 2561 call __ecp_nistz256_mul_montq 2562 2563 leaq 128(%rsp),%rdi 2564 call __ecp_nistz256_mul_by_2q 2565 2566 leaq 32(%rsp),%rbx 2567 leaq 32(%rsp),%rdi 2568 call __ecp_nistz256_add_toq 2569 2570 movq 96(%rsp),%rax 2571 leaq 96(%rsp),%rbx 2572 movq 0+0(%rsp),%r9 2573 movq 8+0(%rsp),%r10 2574 leaq 0+0(%rsp),%rsi 2575 movq 16+0(%rsp),%r11 2576 movq 24+0(%rsp),%r12 2577 leaq 0(%rsp),%rdi 2578 call __ecp_nistz256_mul_montq 2579 2580 leaq 128(%rsp),%rdi 2581 call __ecp_nistz256_mul_by_2q 2582 2583 movq 0+32(%rsp),%rax 2584 movq 8+32(%rsp),%r14 2585 leaq 0+32(%rsp),%rsi 2586 movq 16+32(%rsp),%r15 2587 movq 24+32(%rsp),%r8 2588.byte 102,72,15,126,199 2589 call __ecp_nistz256_sqr_montq 2590 2591 leaq 128(%rsp),%rbx 2592 movq %r14,%r8 2593 movq %r15,%r9 2594 movq %rsi,%r14 2595 movq %rbp,%r15 2596 call __ecp_nistz256_sub_fromq 2597 2598 movq 0+0(%rsp),%rax 2599 movq 0+8(%rsp),%rbp 2600 movq 0+16(%rsp),%rcx 2601 movq 0+24(%rsp),%r10 2602 leaq 0(%rsp),%rdi 2603 call __ecp_nistz256_subq 2604 2605 movq 32(%rsp),%rax 2606 leaq 32(%rsp),%rbx 2607 movq %r12,%r14 2608 xorl %ecx,%ecx 2609 movq %r12,0+0(%rsp) 2610 movq %r13,%r10 2611 movq %r13,0+8(%rsp) 2612 cmovzq %r8,%r11 2613 movq %r8,0+16(%rsp) 2614 leaq 0-0(%rsp),%rsi 2615 cmovzq %r9,%r12 2616 movq %r9,0+24(%rsp) 2617 movq %r14,%r9 2618 leaq 0(%rsp),%rdi 2619 call __ecp_nistz256_mul_montq 2620 2621.byte 102,72,15,126,203 2622.byte 102,72,15,126,207 2623 call __ecp_nistz256_sub_fromq 2624 2625 leaq 160+56(%rsp),%rsi 2626.cfi_def_cfa %rsi,8 2627 movq -48(%rsi),%r15 2628.cfi_restore %r15 2629 movq -40(%rsi),%r14 2630.cfi_restore %r14 2631 movq -32(%rsi),%r13 2632.cfi_restore %r13 2633 movq -24(%rsi),%r12 2634.cfi_restore %r12 2635 movq -16(%rsi),%rbx 2636.cfi_restore %rbx 2637 movq -8(%rsi),%rbp 2638.cfi_restore %rbp 2639 leaq (%rsi),%rsp 2640.cfi_def_cfa_register %rsp 2641.Lpoint_doubleq_epilogue: 2642 .byte 0xf3,0xc3 2643.cfi_endproc 2644.size p256_point_double,.-p256_point_double 2645.globl p256_point_add 2646.hidden p256_point_add 2647.type p256_point_add,@function 2648.align 32 2649p256_point_add: 2650.cfi_startproc 2651 leaq OPENSSL_ia32cap_P(%rip),%rcx 2652 movq 8(%rcx),%rcx 2653 andl $0x80100,%ecx 2654 cmpl $0x80100,%ecx 2655 je .Lpoint_addx 2656 pushq %rbp 2657.cfi_adjust_cfa_offset 8 2658.cfi_offset %rbp,-16 2659 pushq %rbx 2660.cfi_adjust_cfa_offset 8 2661.cfi_offset %rbx,-24 2662 pushq %r12 2663.cfi_adjust_cfa_offset 8 2664.cfi_offset %r12,-32 2665 pushq %r13 2666.cfi_adjust_cfa_offset 8 2667.cfi_offset %r13,-40 2668 pushq %r14 2669.cfi_adjust_cfa_offset 8 2670.cfi_offset %r14,-48 2671 pushq %r15 2672.cfi_adjust_cfa_offset 8 2673.cfi_offset %r15,-56 2674 subq $576+8,%rsp 2675.cfi_adjust_cfa_offset 32*18+8 2676.Lpoint_addq_body: 2677 2678 movdqu 0(%rsi),%xmm0 2679 movdqu 16(%rsi),%xmm1 2680 movdqu 32(%rsi),%xmm2 2681 movdqu 48(%rsi),%xmm3 2682 movdqu 64(%rsi),%xmm4 2683 movdqu 80(%rsi),%xmm5 2684 movq %rsi,%rbx 2685 movq %rdx,%rsi 2686 movdqa %xmm0,384(%rsp) 2687 movdqa %xmm1,384+16(%rsp) 2688 movdqa %xmm2,416(%rsp) 2689 movdqa %xmm3,416+16(%rsp) 2690 movdqa %xmm4,448(%rsp) 2691 movdqa %xmm5,448+16(%rsp) 2692 por %xmm4,%xmm5 2693 2694 movdqu 0(%rsi),%xmm0 2695 pshufd $0xb1,%xmm5,%xmm3 2696 movdqu 16(%rsi),%xmm1 2697 movdqu 32(%rsi),%xmm2 2698 por %xmm3,%xmm5 2699 movdqu 48(%rsi),%xmm3 2700 movq 64+0(%rsi),%rax 2701 movq 64+8(%rsi),%r14 2702 movq 64+16(%rsi),%r15 2703 movq 64+24(%rsi),%r8 2704 movdqa %xmm0,480(%rsp) 2705 pshufd $0x1e,%xmm5,%xmm4 2706 movdqa %xmm1,480+16(%rsp) 2707 movdqu 64(%rsi),%xmm0 2708 movdqu 80(%rsi),%xmm1 2709 movdqa %xmm2,512(%rsp) 2710 movdqa %xmm3,512+16(%rsp) 2711 por %xmm4,%xmm5 2712 pxor %xmm4,%xmm4 2713 por %xmm0,%xmm1 2714.byte 102,72,15,110,199 2715 2716 leaq 64-0(%rsi),%rsi 2717 movq %rax,544+0(%rsp) 2718 movq %r14,544+8(%rsp) 2719 movq %r15,544+16(%rsp) 2720 movq %r8,544+24(%rsp) 2721 leaq 96(%rsp),%rdi 2722 call __ecp_nistz256_sqr_montq 2723 2724 pcmpeqd %xmm4,%xmm5 2725 pshufd $0xb1,%xmm1,%xmm4 2726 por %xmm1,%xmm4 2727 pshufd $0,%xmm5,%xmm5 2728 pshufd $0x1e,%xmm4,%xmm3 2729 por %xmm3,%xmm4 2730 pxor %xmm3,%xmm3 2731 pcmpeqd %xmm3,%xmm4 2732 pshufd $0,%xmm4,%xmm4 2733 movq 64+0(%rbx),%rax 2734 movq 64+8(%rbx),%r14 2735 movq 64+16(%rbx),%r15 2736 movq 64+24(%rbx),%r8 2737.byte 102,72,15,110,203 2738 2739 leaq 64-0(%rbx),%rsi 2740 leaq 32(%rsp),%rdi 2741 call __ecp_nistz256_sqr_montq 2742 2743 movq 544(%rsp),%rax 2744 leaq 544(%rsp),%rbx 2745 movq 0+96(%rsp),%r9 2746 movq 8+96(%rsp),%r10 2747 leaq 0+96(%rsp),%rsi 2748 movq 16+96(%rsp),%r11 2749 movq 24+96(%rsp),%r12 2750 leaq 224(%rsp),%rdi 2751 call __ecp_nistz256_mul_montq 2752 2753 movq 448(%rsp),%rax 2754 leaq 448(%rsp),%rbx 2755 movq 0+32(%rsp),%r9 2756 movq 8+32(%rsp),%r10 2757 leaq 0+32(%rsp),%rsi 2758 movq 16+32(%rsp),%r11 2759 movq 24+32(%rsp),%r12 2760 leaq 256(%rsp),%rdi 2761 call __ecp_nistz256_mul_montq 2762 2763 movq 416(%rsp),%rax 2764 leaq 416(%rsp),%rbx 2765 movq 0+224(%rsp),%r9 2766 movq 8+224(%rsp),%r10 2767 leaq 0+224(%rsp),%rsi 2768 movq 16+224(%rsp),%r11 2769 movq 24+224(%rsp),%r12 2770 leaq 224(%rsp),%rdi 2771 call __ecp_nistz256_mul_montq 2772 2773 movq 512(%rsp),%rax 2774 leaq 512(%rsp),%rbx 2775 movq 0+256(%rsp),%r9 2776 movq 8+256(%rsp),%r10 2777 leaq 0+256(%rsp),%rsi 2778 movq 16+256(%rsp),%r11 2779 movq 24+256(%rsp),%r12 2780 leaq 256(%rsp),%rdi 2781 call __ecp_nistz256_mul_montq 2782 2783 leaq 224(%rsp),%rbx 2784 leaq 64(%rsp),%rdi 2785 call __ecp_nistz256_sub_fromq 2786 2787 orq %r13,%r12 2788 movdqa %xmm4,%xmm2 2789 orq %r8,%r12 2790 orq %r9,%r12 2791 por %xmm5,%xmm2 2792.byte 102,73,15,110,220 2793 2794 movq 384(%rsp),%rax 2795 leaq 384(%rsp),%rbx 2796 movq 0+96(%rsp),%r9 2797 movq 8+96(%rsp),%r10 2798 leaq 0+96(%rsp),%rsi 2799 movq 16+96(%rsp),%r11 2800 movq 24+96(%rsp),%r12 2801 leaq 160(%rsp),%rdi 2802 call __ecp_nistz256_mul_montq 2803 2804 movq 480(%rsp),%rax 2805 leaq 480(%rsp),%rbx 2806 movq 0+32(%rsp),%r9 2807 movq 8+32(%rsp),%r10 2808 leaq 0+32(%rsp),%rsi 2809 movq 16+32(%rsp),%r11 2810 movq 24+32(%rsp),%r12 2811 leaq 192(%rsp),%rdi 2812 call __ecp_nistz256_mul_montq 2813 2814 leaq 160(%rsp),%rbx 2815 leaq 0(%rsp),%rdi 2816 call __ecp_nistz256_sub_fromq 2817 2818 orq %r13,%r12 2819 orq %r8,%r12 2820 orq %r9,%r12 2821 2822.byte 102,73,15,126,208 2823.byte 102,73,15,126,217 2824 orq %r8,%r12 2825.byte 0x3e 2826 jnz .Ladd_proceedq 2827 2828 2829 2830 testq %r9,%r9 2831 jz .Ladd_doubleq 2832 2833 2834 2835 2836 2837 2838.byte 102,72,15,126,199 2839 pxor %xmm0,%xmm0 2840 movdqu %xmm0,0(%rdi) 2841 movdqu %xmm0,16(%rdi) 2842 movdqu %xmm0,32(%rdi) 2843 movdqu %xmm0,48(%rdi) 2844 movdqu %xmm0,64(%rdi) 2845 movdqu %xmm0,80(%rdi) 2846 jmp .Ladd_doneq 2847 2848.align 32 2849.Ladd_doubleq: 2850.byte 102,72,15,126,206 2851.byte 102,72,15,126,199 2852 addq $416,%rsp 2853.cfi_adjust_cfa_offset -416 2854 jmp .Lpoint_double_shortcutq 2855.cfi_adjust_cfa_offset 416 2856 2857.align 32 2858.Ladd_proceedq: 2859 movq 0+64(%rsp),%rax 2860 movq 8+64(%rsp),%r14 2861 leaq 0+64(%rsp),%rsi 2862 movq 16+64(%rsp),%r15 2863 movq 24+64(%rsp),%r8 2864 leaq 96(%rsp),%rdi 2865 call __ecp_nistz256_sqr_montq 2866 2867 movq 448(%rsp),%rax 2868 leaq 448(%rsp),%rbx 2869 movq 0+0(%rsp),%r9 2870 movq 8+0(%rsp),%r10 2871 leaq 0+0(%rsp),%rsi 2872 movq 16+0(%rsp),%r11 2873 movq 24+0(%rsp),%r12 2874 leaq 352(%rsp),%rdi 2875 call __ecp_nistz256_mul_montq 2876 2877 movq 0+0(%rsp),%rax 2878 movq 8+0(%rsp),%r14 2879 leaq 0+0(%rsp),%rsi 2880 movq 16+0(%rsp),%r15 2881 movq 24+0(%rsp),%r8 2882 leaq 32(%rsp),%rdi 2883 call __ecp_nistz256_sqr_montq 2884 2885 movq 544(%rsp),%rax 2886 leaq 544(%rsp),%rbx 2887 movq 0+352(%rsp),%r9 2888 movq 8+352(%rsp),%r10 2889 leaq 0+352(%rsp),%rsi 2890 movq 16+352(%rsp),%r11 2891 movq 24+352(%rsp),%r12 2892 leaq 352(%rsp),%rdi 2893 call __ecp_nistz256_mul_montq 2894 2895 movq 0(%rsp),%rax 2896 leaq 0(%rsp),%rbx 2897 movq 0+32(%rsp),%r9 2898 movq 8+32(%rsp),%r10 2899 leaq 0+32(%rsp),%rsi 2900 movq 16+32(%rsp),%r11 2901 movq 24+32(%rsp),%r12 2902 leaq 128(%rsp),%rdi 2903 call __ecp_nistz256_mul_montq 2904 2905 movq 160(%rsp),%rax 2906 leaq 160(%rsp),%rbx 2907 movq 0+32(%rsp),%r9 2908 movq 8+32(%rsp),%r10 2909 leaq 0+32(%rsp),%rsi 2910 movq 16+32(%rsp),%r11 2911 movq 24+32(%rsp),%r12 2912 leaq 192(%rsp),%rdi 2913 call __ecp_nistz256_mul_montq 2914 2915 2916 2917 2918 xorq %r11,%r11 2919 addq %r12,%r12 2920 leaq 96(%rsp),%rsi 2921 adcq %r13,%r13 2922 movq %r12,%rax 2923 adcq %r8,%r8 2924 adcq %r9,%r9 2925 movq %r13,%rbp 2926 adcq $0,%r11 2927 2928 subq $-1,%r12 2929 movq %r8,%rcx 2930 sbbq %r14,%r13 2931 sbbq $0,%r8 2932 movq %r9,%r10 2933 sbbq %r15,%r9 2934 sbbq $0,%r11 2935 2936 cmovcq %rax,%r12 2937 movq 0(%rsi),%rax 2938 cmovcq %rbp,%r13 2939 movq 8(%rsi),%rbp 2940 cmovcq %rcx,%r8 2941 movq 16(%rsi),%rcx 2942 cmovcq %r10,%r9 2943 movq 24(%rsi),%r10 2944 2945 call __ecp_nistz256_subq 2946 2947 leaq 128(%rsp),%rbx 2948 leaq 288(%rsp),%rdi 2949 call __ecp_nistz256_sub_fromq 2950 2951 movq 192+0(%rsp),%rax 2952 movq 192+8(%rsp),%rbp 2953 movq 192+16(%rsp),%rcx 2954 movq 192+24(%rsp),%r10 2955 leaq 320(%rsp),%rdi 2956 2957 call __ecp_nistz256_subq 2958 2959 movq %r12,0(%rdi) 2960 movq %r13,8(%rdi) 2961 movq %r8,16(%rdi) 2962 movq %r9,24(%rdi) 2963 movq 128(%rsp),%rax 2964 leaq 128(%rsp),%rbx 2965 movq 0+224(%rsp),%r9 2966 movq 8+224(%rsp),%r10 2967 leaq 0+224(%rsp),%rsi 2968 movq 16+224(%rsp),%r11 2969 movq 24+224(%rsp),%r12 2970 leaq 256(%rsp),%rdi 2971 call __ecp_nistz256_mul_montq 2972 2973 movq 320(%rsp),%rax 2974 leaq 320(%rsp),%rbx 2975 movq 0+64(%rsp),%r9 2976 movq 8+64(%rsp),%r10 2977 leaq 0+64(%rsp),%rsi 2978 movq 16+64(%rsp),%r11 2979 movq 24+64(%rsp),%r12 2980 leaq 320(%rsp),%rdi 2981 call __ecp_nistz256_mul_montq 2982 2983 leaq 256(%rsp),%rbx 2984 leaq 320(%rsp),%rdi 2985 call __ecp_nistz256_sub_fromq 2986 2987.byte 102,72,15,126,199 2988 2989 movdqa %xmm5,%xmm0 2990 movdqa %xmm5,%xmm1 2991 pandn 352(%rsp),%xmm0 2992 movdqa %xmm5,%xmm2 2993 pandn 352+16(%rsp),%xmm1 2994 movdqa %xmm5,%xmm3 2995 pand 544(%rsp),%xmm2 2996 pand 544+16(%rsp),%xmm3 2997 por %xmm0,%xmm2 2998 por %xmm1,%xmm3 2999 3000 movdqa %xmm4,%xmm0 3001 movdqa %xmm4,%xmm1 3002 pandn %xmm2,%xmm0 3003 movdqa %xmm4,%xmm2 3004 pandn %xmm3,%xmm1 3005 movdqa %xmm4,%xmm3 3006 pand 448(%rsp),%xmm2 3007 pand 448+16(%rsp),%xmm3 3008 por %xmm0,%xmm2 3009 por %xmm1,%xmm3 3010 movdqu %xmm2,64(%rdi) 3011 movdqu %xmm3,80(%rdi) 3012 3013 movdqa %xmm5,%xmm0 3014 movdqa %xmm5,%xmm1 3015 pandn 288(%rsp),%xmm0 3016 movdqa %xmm5,%xmm2 3017 pandn 288+16(%rsp),%xmm1 3018 movdqa %xmm5,%xmm3 3019 pand 480(%rsp),%xmm2 3020 pand 480+16(%rsp),%xmm3 3021 por %xmm0,%xmm2 3022 por %xmm1,%xmm3 3023 3024 movdqa %xmm4,%xmm0 3025 movdqa %xmm4,%xmm1 3026 pandn %xmm2,%xmm0 3027 movdqa %xmm4,%xmm2 3028 pandn %xmm3,%xmm1 3029 movdqa %xmm4,%xmm3 3030 pand 384(%rsp),%xmm2 3031 pand 384+16(%rsp),%xmm3 3032 por %xmm0,%xmm2 3033 por %xmm1,%xmm3 3034 movdqu %xmm2,0(%rdi) 3035 movdqu %xmm3,16(%rdi) 3036 3037 movdqa %xmm5,%xmm0 3038 movdqa %xmm5,%xmm1 3039 pandn 320(%rsp),%xmm0 3040 movdqa %xmm5,%xmm2 3041 pandn 320+16(%rsp),%xmm1 3042 movdqa %xmm5,%xmm3 3043 pand 512(%rsp),%xmm2 3044 pand 512+16(%rsp),%xmm3 3045 por %xmm0,%xmm2 3046 por %xmm1,%xmm3 3047 3048 movdqa %xmm4,%xmm0 3049 movdqa %xmm4,%xmm1 3050 pandn %xmm2,%xmm0 3051 movdqa %xmm4,%xmm2 3052 pandn %xmm3,%xmm1 3053 movdqa %xmm4,%xmm3 3054 pand 416(%rsp),%xmm2 3055 pand 416+16(%rsp),%xmm3 3056 por %xmm0,%xmm2 3057 por %xmm1,%xmm3 3058 movdqu %xmm2,32(%rdi) 3059 movdqu %xmm3,48(%rdi) 3060 3061.Ladd_doneq: 3062 leaq 576+56(%rsp),%rsi 3063.cfi_def_cfa %rsi,8 3064 movq -48(%rsi),%r15 3065.cfi_restore %r15 3066 movq -40(%rsi),%r14 3067.cfi_restore %r14 3068 movq -32(%rsi),%r13 3069.cfi_restore %r13 3070 movq -24(%rsi),%r12 3071.cfi_restore %r12 3072 movq -16(%rsi),%rbx 3073.cfi_restore %rbx 3074 movq -8(%rsi),%rbp 3075.cfi_restore %rbp 3076 leaq (%rsi),%rsp 3077.cfi_def_cfa_register %rsp 3078.Lpoint_addq_epilogue: 3079 .byte 0xf3,0xc3 3080.cfi_endproc 3081.size p256_point_add,.-p256_point_add 3082.globl p256_point_add_affine 3083.hidden p256_point_add_affine 3084.type p256_point_add_affine,@function 3085.align 32 3086p256_point_add_affine: 3087.cfi_startproc 3088 leaq OPENSSL_ia32cap_P(%rip),%rcx 3089 movq 8(%rcx),%rcx 3090 andl $0x80100,%ecx 3091 cmpl $0x80100,%ecx 3092 je .Lpoint_add_affinex 3093 pushq %rbp 3094.cfi_adjust_cfa_offset 8 3095.cfi_offset %rbp,-16 3096 pushq %rbx 3097.cfi_adjust_cfa_offset 8 3098.cfi_offset %rbx,-24 3099 pushq %r12 3100.cfi_adjust_cfa_offset 8 3101.cfi_offset %r12,-32 3102 pushq %r13 3103.cfi_adjust_cfa_offset 8 3104.cfi_offset %r13,-40 3105 pushq %r14 3106.cfi_adjust_cfa_offset 8 3107.cfi_offset %r14,-48 3108 pushq %r15 3109.cfi_adjust_cfa_offset 8 3110.cfi_offset %r15,-56 3111 subq $480+8,%rsp 3112.cfi_adjust_cfa_offset 32*15+8 3113.Ladd_affineq_body: 3114 3115 movdqu 0(%rsi),%xmm0 3116 movq %rdx,%rbx 3117 movdqu 16(%rsi),%xmm1 3118 movdqu 32(%rsi),%xmm2 3119 movdqu 48(%rsi),%xmm3 3120 movdqu 64(%rsi),%xmm4 3121 movdqu 80(%rsi),%xmm5 3122 movq 64+0(%rsi),%rax 3123 movq 64+8(%rsi),%r14 3124 movq 64+16(%rsi),%r15 3125 movq 64+24(%rsi),%r8 3126 movdqa %xmm0,320(%rsp) 3127 movdqa %xmm1,320+16(%rsp) 3128 movdqa %xmm2,352(%rsp) 3129 movdqa %xmm3,352+16(%rsp) 3130 movdqa %xmm4,384(%rsp) 3131 movdqa %xmm5,384+16(%rsp) 3132 por %xmm4,%xmm5 3133 3134 movdqu 0(%rbx),%xmm0 3135 pshufd $0xb1,%xmm5,%xmm3 3136 movdqu 16(%rbx),%xmm1 3137 movdqu 32(%rbx),%xmm2 3138 por %xmm3,%xmm5 3139 movdqu 48(%rbx),%xmm3 3140 movdqa %xmm0,416(%rsp) 3141 pshufd $0x1e,%xmm5,%xmm4 3142 movdqa %xmm1,416+16(%rsp) 3143 por %xmm0,%xmm1 3144.byte 102,72,15,110,199 3145 movdqa %xmm2,448(%rsp) 3146 movdqa %xmm3,448+16(%rsp) 3147 por %xmm2,%xmm3 3148 por %xmm4,%xmm5 3149 pxor %xmm4,%xmm4 3150 por %xmm1,%xmm3 3151 3152 leaq 64-0(%rsi),%rsi 3153 leaq 32(%rsp),%rdi 3154 call __ecp_nistz256_sqr_montq 3155 3156 pcmpeqd %xmm4,%xmm5 3157 pshufd $0xb1,%xmm3,%xmm4 3158 movq 0(%rbx),%rax 3159 3160 movq %r12,%r9 3161 por %xmm3,%xmm4 3162 pshufd $0,%xmm5,%xmm5 3163 pshufd $0x1e,%xmm4,%xmm3 3164 movq %r13,%r10 3165 por %xmm3,%xmm4 3166 pxor %xmm3,%xmm3 3167 movq %r14,%r11 3168 pcmpeqd %xmm3,%xmm4 3169 pshufd $0,%xmm4,%xmm4 3170 3171 leaq 32-0(%rsp),%rsi 3172 movq %r15,%r12 3173 leaq 0(%rsp),%rdi 3174 call __ecp_nistz256_mul_montq 3175 3176 leaq 320(%rsp),%rbx 3177 leaq 64(%rsp),%rdi 3178 call __ecp_nistz256_sub_fromq 3179 3180 movq 384(%rsp),%rax 3181 leaq 384(%rsp),%rbx 3182 movq 0+32(%rsp),%r9 3183 movq 8+32(%rsp),%r10 3184 leaq 0+32(%rsp),%rsi 3185 movq 16+32(%rsp),%r11 3186 movq 24+32(%rsp),%r12 3187 leaq 32(%rsp),%rdi 3188 call __ecp_nistz256_mul_montq 3189 3190 movq 384(%rsp),%rax 3191 leaq 384(%rsp),%rbx 3192 movq 0+64(%rsp),%r9 3193 movq 8+64(%rsp),%r10 3194 leaq 0+64(%rsp),%rsi 3195 movq 16+64(%rsp),%r11 3196 movq 24+64(%rsp),%r12 3197 leaq 288(%rsp),%rdi 3198 call __ecp_nistz256_mul_montq 3199 3200 movq 448(%rsp),%rax 3201 leaq 448(%rsp),%rbx 3202 movq 0+32(%rsp),%r9 3203 movq 8+32(%rsp),%r10 3204 leaq 0+32(%rsp),%rsi 3205 movq 16+32(%rsp),%r11 3206 movq 24+32(%rsp),%r12 3207 leaq 32(%rsp),%rdi 3208 call __ecp_nistz256_mul_montq 3209 3210 leaq 352(%rsp),%rbx 3211 leaq 96(%rsp),%rdi 3212 call __ecp_nistz256_sub_fromq 3213 3214 movq 0+64(%rsp),%rax 3215 movq 8+64(%rsp),%r14 3216 leaq 0+64(%rsp),%rsi 3217 movq 16+64(%rsp),%r15 3218 movq 24+64(%rsp),%r8 3219 leaq 128(%rsp),%rdi 3220 call __ecp_nistz256_sqr_montq 3221 3222 movq 0+96(%rsp),%rax 3223 movq 8+96(%rsp),%r14 3224 leaq 0+96(%rsp),%rsi 3225 movq 16+96(%rsp),%r15 3226 movq 24+96(%rsp),%r8 3227 leaq 192(%rsp),%rdi 3228 call __ecp_nistz256_sqr_montq 3229 3230 movq 128(%rsp),%rax 3231 leaq 128(%rsp),%rbx 3232 movq 0+64(%rsp),%r9 3233 movq 8+64(%rsp),%r10 3234 leaq 0+64(%rsp),%rsi 3235 movq 16+64(%rsp),%r11 3236 movq 24+64(%rsp),%r12 3237 leaq 160(%rsp),%rdi 3238 call __ecp_nistz256_mul_montq 3239 3240 movq 320(%rsp),%rax 3241 leaq 320(%rsp),%rbx 3242 movq 0+128(%rsp),%r9 3243 movq 8+128(%rsp),%r10 3244 leaq 0+128(%rsp),%rsi 3245 movq 16+128(%rsp),%r11 3246 movq 24+128(%rsp),%r12 3247 leaq 0(%rsp),%rdi 3248 call __ecp_nistz256_mul_montq 3249 3250 3251 3252 3253 xorq %r11,%r11 3254 addq %r12,%r12 3255 leaq 192(%rsp),%rsi 3256 adcq %r13,%r13 3257 movq %r12,%rax 3258 adcq %r8,%r8 3259 adcq %r9,%r9 3260 movq %r13,%rbp 3261 adcq $0,%r11 3262 3263 subq $-1,%r12 3264 movq %r8,%rcx 3265 sbbq %r14,%r13 3266 sbbq $0,%r8 3267 movq %r9,%r10 3268 sbbq %r15,%r9 3269 sbbq $0,%r11 3270 3271 cmovcq %rax,%r12 3272 movq 0(%rsi),%rax 3273 cmovcq %rbp,%r13 3274 movq 8(%rsi),%rbp 3275 cmovcq %rcx,%r8 3276 movq 16(%rsi),%rcx 3277 cmovcq %r10,%r9 3278 movq 24(%rsi),%r10 3279 3280 call __ecp_nistz256_subq 3281 3282 leaq 160(%rsp),%rbx 3283 leaq 224(%rsp),%rdi 3284 call __ecp_nistz256_sub_fromq 3285 3286 movq 0+0(%rsp),%rax 3287 movq 0+8(%rsp),%rbp 3288 movq 0+16(%rsp),%rcx 3289 movq 0+24(%rsp),%r10 3290 leaq 64(%rsp),%rdi 3291 3292 call __ecp_nistz256_subq 3293 3294 movq %r12,0(%rdi) 3295 movq %r13,8(%rdi) 3296 movq %r8,16(%rdi) 3297 movq %r9,24(%rdi) 3298 movq 352(%rsp),%rax 3299 leaq 352(%rsp),%rbx 3300 movq 0+160(%rsp),%r9 3301 movq 8+160(%rsp),%r10 3302 leaq 0+160(%rsp),%rsi 3303 movq 16+160(%rsp),%r11 3304 movq 24+160(%rsp),%r12 3305 leaq 32(%rsp),%rdi 3306 call __ecp_nistz256_mul_montq 3307 3308 movq 96(%rsp),%rax 3309 leaq 96(%rsp),%rbx 3310 movq 0+64(%rsp),%r9 3311 movq 8+64(%rsp),%r10 3312 leaq 0+64(%rsp),%rsi 3313 movq 16+64(%rsp),%r11 3314 movq 24+64(%rsp),%r12 3315 leaq 64(%rsp),%rdi 3316 call __ecp_nistz256_mul_montq 3317 3318 leaq 32(%rsp),%rbx 3319 leaq 256(%rsp),%rdi 3320 call __ecp_nistz256_sub_fromq 3321 3322.byte 102,72,15,126,199 3323 3324 movdqa %xmm5,%xmm0 3325 movdqa %xmm5,%xmm1 3326 pandn 288(%rsp),%xmm0 3327 movdqa %xmm5,%xmm2 3328 pandn 288+16(%rsp),%xmm1 3329 movdqa %xmm5,%xmm3 3330 pand .LONE_mont(%rip),%xmm2 3331 pand .LONE_mont+16(%rip),%xmm3 3332 por %xmm0,%xmm2 3333 por %xmm1,%xmm3 3334 3335 movdqa %xmm4,%xmm0 3336 movdqa %xmm4,%xmm1 3337 pandn %xmm2,%xmm0 3338 movdqa %xmm4,%xmm2 3339 pandn %xmm3,%xmm1 3340 movdqa %xmm4,%xmm3 3341 pand 384(%rsp),%xmm2 3342 pand 384+16(%rsp),%xmm3 3343 por %xmm0,%xmm2 3344 por %xmm1,%xmm3 3345 movdqu %xmm2,64(%rdi) 3346 movdqu %xmm3,80(%rdi) 3347 3348 movdqa %xmm5,%xmm0 3349 movdqa %xmm5,%xmm1 3350 pandn 224(%rsp),%xmm0 3351 movdqa %xmm5,%xmm2 3352 pandn 224+16(%rsp),%xmm1 3353 movdqa %xmm5,%xmm3 3354 pand 416(%rsp),%xmm2 3355 pand 416+16(%rsp),%xmm3 3356 por %xmm0,%xmm2 3357 por %xmm1,%xmm3 3358 3359 movdqa %xmm4,%xmm0 3360 movdqa %xmm4,%xmm1 3361 pandn %xmm2,%xmm0 3362 movdqa %xmm4,%xmm2 3363 pandn %xmm3,%xmm1 3364 movdqa %xmm4,%xmm3 3365 pand 320(%rsp),%xmm2 3366 pand 320+16(%rsp),%xmm3 3367 por %xmm0,%xmm2 3368 por %xmm1,%xmm3 3369 movdqu %xmm2,0(%rdi) 3370 movdqu %xmm3,16(%rdi) 3371 3372 movdqa %xmm5,%xmm0 3373 movdqa %xmm5,%xmm1 3374 pandn 256(%rsp),%xmm0 3375 movdqa %xmm5,%xmm2 3376 pandn 256+16(%rsp),%xmm1 3377 movdqa %xmm5,%xmm3 3378 pand 448(%rsp),%xmm2 3379 pand 448+16(%rsp),%xmm3 3380 por %xmm0,%xmm2 3381 por %xmm1,%xmm3 3382 3383 movdqa %xmm4,%xmm0 3384 movdqa %xmm4,%xmm1 3385 pandn %xmm2,%xmm0 3386 movdqa %xmm4,%xmm2 3387 pandn %xmm3,%xmm1 3388 movdqa %xmm4,%xmm3 3389 pand 352(%rsp),%xmm2 3390 pand 352+16(%rsp),%xmm3 3391 por %xmm0,%xmm2 3392 por %xmm1,%xmm3 3393 movdqu %xmm2,32(%rdi) 3394 movdqu %xmm3,48(%rdi) 3395 3396 leaq 480+56(%rsp),%rsi 3397.cfi_def_cfa %rsi,8 3398 movq -48(%rsi),%r15 3399.cfi_restore %r15 3400 movq -40(%rsi),%r14 3401.cfi_restore %r14 3402 movq -32(%rsi),%r13 3403.cfi_restore %r13 3404 movq -24(%rsi),%r12 3405.cfi_restore %r12 3406 movq -16(%rsi),%rbx 3407.cfi_restore %rbx 3408 movq -8(%rsi),%rbp 3409.cfi_restore %rbp 3410 leaq (%rsi),%rsp 3411.cfi_def_cfa_register %rsp 3412.Ladd_affineq_epilogue: 3413 .byte 0xf3,0xc3 3414.cfi_endproc 3415.size p256_point_add_affine,.-p256_point_add_affine 3416.type __ecp_nistz256_add_tox,@function 3417.align 32 3418__ecp_nistz256_add_tox: 3419.cfi_startproc 3420 xorq %r11,%r11 3421 adcq 0(%rbx),%r12 3422 adcq 8(%rbx),%r13 3423 movq %r12,%rax 3424 adcq 16(%rbx),%r8 3425 adcq 24(%rbx),%r9 3426 movq %r13,%rbp 3427 adcq $0,%r11 3428 3429 xorq %r10,%r10 3430 sbbq $-1,%r12 3431 movq %r8,%rcx 3432 sbbq %r14,%r13 3433 sbbq $0,%r8 3434 movq %r9,%r10 3435 sbbq %r15,%r9 3436 sbbq $0,%r11 3437 3438 cmovcq %rax,%r12 3439 cmovcq %rbp,%r13 3440 movq %r12,0(%rdi) 3441 cmovcq %rcx,%r8 3442 movq %r13,8(%rdi) 3443 cmovcq %r10,%r9 3444 movq %r8,16(%rdi) 3445 movq %r9,24(%rdi) 3446 3447 .byte 0xf3,0xc3 3448.cfi_endproc 3449.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox 3450 3451.type __ecp_nistz256_sub_fromx,@function 3452.align 32 3453__ecp_nistz256_sub_fromx: 3454.cfi_startproc 3455 xorq %r11,%r11 3456 sbbq 0(%rbx),%r12 3457 sbbq 8(%rbx),%r13 3458 movq %r12,%rax 3459 sbbq 16(%rbx),%r8 3460 sbbq 24(%rbx),%r9 3461 movq %r13,%rbp 3462 sbbq $0,%r11 3463 3464 xorq %r10,%r10 3465 adcq $-1,%r12 3466 movq %r8,%rcx 3467 adcq %r14,%r13 3468 adcq $0,%r8 3469 movq %r9,%r10 3470 adcq %r15,%r9 3471 3472 btq $0,%r11 3473 cmovncq %rax,%r12 3474 cmovncq %rbp,%r13 3475 movq %r12,0(%rdi) 3476 cmovncq %rcx,%r8 3477 movq %r13,8(%rdi) 3478 cmovncq %r10,%r9 3479 movq %r8,16(%rdi) 3480 movq %r9,24(%rdi) 3481 3482 .byte 0xf3,0xc3 3483.cfi_endproc 3484.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx 3485 3486.type __ecp_nistz256_subx,@function 3487.align 32 3488__ecp_nistz256_subx: 3489.cfi_startproc 3490 xorq %r11,%r11 3491 sbbq %r12,%rax 3492 sbbq %r13,%rbp 3493 movq %rax,%r12 3494 sbbq %r8,%rcx 3495 sbbq %r9,%r10 3496 movq %rbp,%r13 3497 sbbq $0,%r11 3498 3499 xorq %r9,%r9 3500 adcq $-1,%rax 3501 movq %rcx,%r8 3502 adcq %r14,%rbp 3503 adcq $0,%rcx 3504 movq %r10,%r9 3505 adcq %r15,%r10 3506 3507 btq $0,%r11 3508 cmovcq %rax,%r12 3509 cmovcq %rbp,%r13 3510 cmovcq %rcx,%r8 3511 cmovcq %r10,%r9 3512 3513 .byte 0xf3,0xc3 3514.cfi_endproc 3515.size __ecp_nistz256_subx,.-__ecp_nistz256_subx 3516 3517.type __ecp_nistz256_mul_by_2x,@function 3518.align 32 3519__ecp_nistz256_mul_by_2x: 3520.cfi_startproc 3521 xorq %r11,%r11 3522 adcq %r12,%r12 3523 adcq %r13,%r13 3524 movq %r12,%rax 3525 adcq %r8,%r8 3526 adcq %r9,%r9 3527 movq %r13,%rbp 3528 adcq $0,%r11 3529 3530 xorq %r10,%r10 3531 sbbq $-1,%r12 3532 movq %r8,%rcx 3533 sbbq %r14,%r13 3534 sbbq $0,%r8 3535 movq %r9,%r10 3536 sbbq %r15,%r9 3537 sbbq $0,%r11 3538 3539 cmovcq %rax,%r12 3540 cmovcq %rbp,%r13 3541 movq %r12,0(%rdi) 3542 cmovcq %rcx,%r8 3543 movq %r13,8(%rdi) 3544 cmovcq %r10,%r9 3545 movq %r8,16(%rdi) 3546 movq %r9,24(%rdi) 3547 3548 .byte 0xf3,0xc3 3549.cfi_endproc 3550.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x 3551.type p256_point_doublex,@function 3552.align 32 3553p256_point_doublex: 3554.cfi_startproc 3555.Lpoint_doublex: 3556 pushq %rbp 3557.cfi_adjust_cfa_offset 8 3558.cfi_offset %rbp,-16 3559 pushq %rbx 3560.cfi_adjust_cfa_offset 8 3561.cfi_offset %rbx,-24 3562 pushq %r12 3563.cfi_adjust_cfa_offset 8 3564.cfi_offset %r12,-32 3565 pushq %r13 3566.cfi_adjust_cfa_offset 8 3567.cfi_offset %r13,-40 3568 pushq %r14 3569.cfi_adjust_cfa_offset 8 3570.cfi_offset %r14,-48 3571 pushq %r15 3572.cfi_adjust_cfa_offset 8 3573.cfi_offset %r15,-56 3574 subq $160+8,%rsp 3575.cfi_adjust_cfa_offset 32*5+8 3576.Lpoint_doublex_body: 3577 3578.Lpoint_double_shortcutx: 3579 movdqu 0(%rsi),%xmm0 3580 movq %rsi,%rbx 3581 movdqu 16(%rsi),%xmm1 3582 movq 32+0(%rsi),%r12 3583 movq 32+8(%rsi),%r13 3584 movq 32+16(%rsi),%r8 3585 movq 32+24(%rsi),%r9 3586 movq .Lpoly+8(%rip),%r14 3587 movq .Lpoly+24(%rip),%r15 3588 movdqa %xmm0,96(%rsp) 3589 movdqa %xmm1,96+16(%rsp) 3590 leaq 32(%rdi),%r10 3591 leaq 64(%rdi),%r11 3592.byte 102,72,15,110,199 3593.byte 102,73,15,110,202 3594.byte 102,73,15,110,211 3595 3596 leaq 0(%rsp),%rdi 3597 call __ecp_nistz256_mul_by_2x 3598 3599 movq 64+0(%rsi),%rdx 3600 movq 64+8(%rsi),%r14 3601 movq 64+16(%rsi),%r15 3602 movq 64+24(%rsi),%r8 3603 leaq 64-128(%rsi),%rsi 3604 leaq 64(%rsp),%rdi 3605 call __ecp_nistz256_sqr_montx 3606 3607 movq 0+0(%rsp),%rdx 3608 movq 8+0(%rsp),%r14 3609 leaq -128+0(%rsp),%rsi 3610 movq 16+0(%rsp),%r15 3611 movq 24+0(%rsp),%r8 3612 leaq 0(%rsp),%rdi 3613 call __ecp_nistz256_sqr_montx 3614 3615 movq 32(%rbx),%rdx 3616 movq 64+0(%rbx),%r9 3617 movq 64+8(%rbx),%r10 3618 movq 64+16(%rbx),%r11 3619 movq 64+24(%rbx),%r12 3620 leaq 64-128(%rbx),%rsi 3621 leaq 32(%rbx),%rbx 3622.byte 102,72,15,126,215 3623 call __ecp_nistz256_mul_montx 3624 call __ecp_nistz256_mul_by_2x 3625 3626 movq 96+0(%rsp),%r12 3627 movq 96+8(%rsp),%r13 3628 leaq 64(%rsp),%rbx 3629 movq 96+16(%rsp),%r8 3630 movq 96+24(%rsp),%r9 3631 leaq 32(%rsp),%rdi 3632 call __ecp_nistz256_add_tox 3633 3634 movq 96+0(%rsp),%r12 3635 movq 96+8(%rsp),%r13 3636 leaq 64(%rsp),%rbx 3637 movq 96+16(%rsp),%r8 3638 movq 96+24(%rsp),%r9 3639 leaq 64(%rsp),%rdi 3640 call __ecp_nistz256_sub_fromx 3641 3642 movq 0+0(%rsp),%rdx 3643 movq 8+0(%rsp),%r14 3644 leaq -128+0(%rsp),%rsi 3645 movq 16+0(%rsp),%r15 3646 movq 24+0(%rsp),%r8 3647.byte 102,72,15,126,207 3648 call __ecp_nistz256_sqr_montx 3649 xorq %r9,%r9 3650 movq %r12,%rax 3651 addq $-1,%r12 3652 movq %r13,%r10 3653 adcq %rsi,%r13 3654 movq %r14,%rcx 3655 adcq $0,%r14 3656 movq %r15,%r8 3657 adcq %rbp,%r15 3658 adcq $0,%r9 3659 xorq %rsi,%rsi 3660 testq $1,%rax 3661 3662 cmovzq %rax,%r12 3663 cmovzq %r10,%r13 3664 cmovzq %rcx,%r14 3665 cmovzq %r8,%r15 3666 cmovzq %rsi,%r9 3667 3668 movq %r13,%rax 3669 shrq $1,%r12 3670 shlq $63,%rax 3671 movq %r14,%r10 3672 shrq $1,%r13 3673 orq %rax,%r12 3674 shlq $63,%r10 3675 movq %r15,%rcx 3676 shrq $1,%r14 3677 orq %r10,%r13 3678 shlq $63,%rcx 3679 movq %r12,0(%rdi) 3680 shrq $1,%r15 3681 movq %r13,8(%rdi) 3682 shlq $63,%r9 3683 orq %rcx,%r14 3684 orq %r9,%r15 3685 movq %r14,16(%rdi) 3686 movq %r15,24(%rdi) 3687 movq 64(%rsp),%rdx 3688 leaq 64(%rsp),%rbx 3689 movq 0+32(%rsp),%r9 3690 movq 8+32(%rsp),%r10 3691 leaq -128+32(%rsp),%rsi 3692 movq 16+32(%rsp),%r11 3693 movq 24+32(%rsp),%r12 3694 leaq 32(%rsp),%rdi 3695 call __ecp_nistz256_mul_montx 3696 3697 leaq 128(%rsp),%rdi 3698 call __ecp_nistz256_mul_by_2x 3699 3700 leaq 32(%rsp),%rbx 3701 leaq 32(%rsp),%rdi 3702 call __ecp_nistz256_add_tox 3703 3704 movq 96(%rsp),%rdx 3705 leaq 96(%rsp),%rbx 3706 movq 0+0(%rsp),%r9 3707 movq 8+0(%rsp),%r10 3708 leaq -128+0(%rsp),%rsi 3709 movq 16+0(%rsp),%r11 3710 movq 24+0(%rsp),%r12 3711 leaq 0(%rsp),%rdi 3712 call __ecp_nistz256_mul_montx 3713 3714 leaq 128(%rsp),%rdi 3715 call __ecp_nistz256_mul_by_2x 3716 3717 movq 0+32(%rsp),%rdx 3718 movq 8+32(%rsp),%r14 3719 leaq -128+32(%rsp),%rsi 3720 movq 16+32(%rsp),%r15 3721 movq 24+32(%rsp),%r8 3722.byte 102,72,15,126,199 3723 call __ecp_nistz256_sqr_montx 3724 3725 leaq 128(%rsp),%rbx 3726 movq %r14,%r8 3727 movq %r15,%r9 3728 movq %rsi,%r14 3729 movq %rbp,%r15 3730 call __ecp_nistz256_sub_fromx 3731 3732 movq 0+0(%rsp),%rax 3733 movq 0+8(%rsp),%rbp 3734 movq 0+16(%rsp),%rcx 3735 movq 0+24(%rsp),%r10 3736 leaq 0(%rsp),%rdi 3737 call __ecp_nistz256_subx 3738 3739 movq 32(%rsp),%rdx 3740 leaq 32(%rsp),%rbx 3741 movq %r12,%r14 3742 xorl %ecx,%ecx 3743 movq %r12,0+0(%rsp) 3744 movq %r13,%r10 3745 movq %r13,0+8(%rsp) 3746 cmovzq %r8,%r11 3747 movq %r8,0+16(%rsp) 3748 leaq 0-128(%rsp),%rsi 3749 cmovzq %r9,%r12 3750 movq %r9,0+24(%rsp) 3751 movq %r14,%r9 3752 leaq 0(%rsp),%rdi 3753 call __ecp_nistz256_mul_montx 3754 3755.byte 102,72,15,126,203 3756.byte 102,72,15,126,207 3757 call __ecp_nistz256_sub_fromx 3758 3759 leaq 160+56(%rsp),%rsi 3760.cfi_def_cfa %rsi,8 3761 movq -48(%rsi),%r15 3762.cfi_restore %r15 3763 movq -40(%rsi),%r14 3764.cfi_restore %r14 3765 movq -32(%rsi),%r13 3766.cfi_restore %r13 3767 movq -24(%rsi),%r12 3768.cfi_restore %r12 3769 movq -16(%rsi),%rbx 3770.cfi_restore %rbx 3771 movq -8(%rsi),%rbp 3772.cfi_restore %rbp 3773 leaq (%rsi),%rsp 3774.cfi_def_cfa_register %rsp 3775.Lpoint_doublex_epilogue: 3776 .byte 0xf3,0xc3 3777.cfi_endproc 3778.size p256_point_doublex,.-p256_point_doublex 3779.type p256_point_addx,@function 3780.align 32 3781p256_point_addx: 3782.cfi_startproc 3783.Lpoint_addx: 3784 pushq %rbp 3785.cfi_adjust_cfa_offset 8 3786.cfi_offset %rbp,-16 3787 pushq %rbx 3788.cfi_adjust_cfa_offset 8 3789.cfi_offset %rbx,-24 3790 pushq %r12 3791.cfi_adjust_cfa_offset 8 3792.cfi_offset %r12,-32 3793 pushq %r13 3794.cfi_adjust_cfa_offset 8 3795.cfi_offset %r13,-40 3796 pushq %r14 3797.cfi_adjust_cfa_offset 8 3798.cfi_offset %r14,-48 3799 pushq %r15 3800.cfi_adjust_cfa_offset 8 3801.cfi_offset %r15,-56 3802 subq $576+8,%rsp 3803.cfi_adjust_cfa_offset 32*18+8 3804.Lpoint_addx_body: 3805 3806 movdqu 0(%rsi),%xmm0 3807 movdqu 16(%rsi),%xmm1 3808 movdqu 32(%rsi),%xmm2 3809 movdqu 48(%rsi),%xmm3 3810 movdqu 64(%rsi),%xmm4 3811 movdqu 80(%rsi),%xmm5 3812 movq %rsi,%rbx 3813 movq %rdx,%rsi 3814 movdqa %xmm0,384(%rsp) 3815 movdqa %xmm1,384+16(%rsp) 3816 movdqa %xmm2,416(%rsp) 3817 movdqa %xmm3,416+16(%rsp) 3818 movdqa %xmm4,448(%rsp) 3819 movdqa %xmm5,448+16(%rsp) 3820 por %xmm4,%xmm5 3821 3822 movdqu 0(%rsi),%xmm0 3823 pshufd $0xb1,%xmm5,%xmm3 3824 movdqu 16(%rsi),%xmm1 3825 movdqu 32(%rsi),%xmm2 3826 por %xmm3,%xmm5 3827 movdqu 48(%rsi),%xmm3 3828 movq 64+0(%rsi),%rdx 3829 movq 64+8(%rsi),%r14 3830 movq 64+16(%rsi),%r15 3831 movq 64+24(%rsi),%r8 3832 movdqa %xmm0,480(%rsp) 3833 pshufd $0x1e,%xmm5,%xmm4 3834 movdqa %xmm1,480+16(%rsp) 3835 movdqu 64(%rsi),%xmm0 3836 movdqu 80(%rsi),%xmm1 3837 movdqa %xmm2,512(%rsp) 3838 movdqa %xmm3,512+16(%rsp) 3839 por %xmm4,%xmm5 3840 pxor %xmm4,%xmm4 3841 por %xmm0,%xmm1 3842.byte 102,72,15,110,199 3843 3844 leaq 64-128(%rsi),%rsi 3845 movq %rdx,544+0(%rsp) 3846 movq %r14,544+8(%rsp) 3847 movq %r15,544+16(%rsp) 3848 movq %r8,544+24(%rsp) 3849 leaq 96(%rsp),%rdi 3850 call __ecp_nistz256_sqr_montx 3851 3852 pcmpeqd %xmm4,%xmm5 3853 pshufd $0xb1,%xmm1,%xmm4 3854 por %xmm1,%xmm4 3855 pshufd $0,%xmm5,%xmm5 3856 pshufd $0x1e,%xmm4,%xmm3 3857 por %xmm3,%xmm4 3858 pxor %xmm3,%xmm3 3859 pcmpeqd %xmm3,%xmm4 3860 pshufd $0,%xmm4,%xmm4 3861 movq 64+0(%rbx),%rdx 3862 movq 64+8(%rbx),%r14 3863 movq 64+16(%rbx),%r15 3864 movq 64+24(%rbx),%r8 3865.byte 102,72,15,110,203 3866 3867 leaq 64-128(%rbx),%rsi 3868 leaq 32(%rsp),%rdi 3869 call __ecp_nistz256_sqr_montx 3870 3871 movq 544(%rsp),%rdx 3872 leaq 544(%rsp),%rbx 3873 movq 0+96(%rsp),%r9 3874 movq 8+96(%rsp),%r10 3875 leaq -128+96(%rsp),%rsi 3876 movq 16+96(%rsp),%r11 3877 movq 24+96(%rsp),%r12 3878 leaq 224(%rsp),%rdi 3879 call __ecp_nistz256_mul_montx 3880 3881 movq 448(%rsp),%rdx 3882 leaq 448(%rsp),%rbx 3883 movq 0+32(%rsp),%r9 3884 movq 8+32(%rsp),%r10 3885 leaq -128+32(%rsp),%rsi 3886 movq 16+32(%rsp),%r11 3887 movq 24+32(%rsp),%r12 3888 leaq 256(%rsp),%rdi 3889 call __ecp_nistz256_mul_montx 3890 3891 movq 416(%rsp),%rdx 3892 leaq 416(%rsp),%rbx 3893 movq 0+224(%rsp),%r9 3894 movq 8+224(%rsp),%r10 3895 leaq -128+224(%rsp),%rsi 3896 movq 16+224(%rsp),%r11 3897 movq 24+224(%rsp),%r12 3898 leaq 224(%rsp),%rdi 3899 call __ecp_nistz256_mul_montx 3900 3901 movq 512(%rsp),%rdx 3902 leaq 512(%rsp),%rbx 3903 movq 0+256(%rsp),%r9 3904 movq 8+256(%rsp),%r10 3905 leaq -128+256(%rsp),%rsi 3906 movq 16+256(%rsp),%r11 3907 movq 24+256(%rsp),%r12 3908 leaq 256(%rsp),%rdi 3909 call __ecp_nistz256_mul_montx 3910 3911 leaq 224(%rsp),%rbx 3912 leaq 64(%rsp),%rdi 3913 call __ecp_nistz256_sub_fromx 3914 3915 orq %r13,%r12 3916 movdqa %xmm4,%xmm2 3917 orq %r8,%r12 3918 orq %r9,%r12 3919 por %xmm5,%xmm2 3920.byte 102,73,15,110,220 3921 3922 movq 384(%rsp),%rdx 3923 leaq 384(%rsp),%rbx 3924 movq 0+96(%rsp),%r9 3925 movq 8+96(%rsp),%r10 3926 leaq -128+96(%rsp),%rsi 3927 movq 16+96(%rsp),%r11 3928 movq 24+96(%rsp),%r12 3929 leaq 160(%rsp),%rdi 3930 call __ecp_nistz256_mul_montx 3931 3932 movq 480(%rsp),%rdx 3933 leaq 480(%rsp),%rbx 3934 movq 0+32(%rsp),%r9 3935 movq 8+32(%rsp),%r10 3936 leaq -128+32(%rsp),%rsi 3937 movq 16+32(%rsp),%r11 3938 movq 24+32(%rsp),%r12 3939 leaq 192(%rsp),%rdi 3940 call __ecp_nistz256_mul_montx 3941 3942 leaq 160(%rsp),%rbx 3943 leaq 0(%rsp),%rdi 3944 call __ecp_nistz256_sub_fromx 3945 3946 orq %r13,%r12 3947 orq %r8,%r12 3948 orq %r9,%r12 3949 3950.byte 102,73,15,126,208 3951.byte 102,73,15,126,217 3952 orq %r8,%r12 3953.byte 0x3e 3954 jnz .Ladd_proceedx 3955 3956 3957 3958 testq %r9,%r9 3959 jz .Ladd_doublex 3960 3961 3962 3963 3964 3965 3966.byte 102,72,15,126,199 3967 pxor %xmm0,%xmm0 3968 movdqu %xmm0,0(%rdi) 3969 movdqu %xmm0,16(%rdi) 3970 movdqu %xmm0,32(%rdi) 3971 movdqu %xmm0,48(%rdi) 3972 movdqu %xmm0,64(%rdi) 3973 movdqu %xmm0,80(%rdi) 3974 jmp .Ladd_donex 3975 3976.align 32 3977.Ladd_doublex: 3978.byte 102,72,15,126,206 3979.byte 102,72,15,126,199 3980 addq $416,%rsp 3981.cfi_adjust_cfa_offset -416 3982 jmp .Lpoint_double_shortcutx 3983.cfi_adjust_cfa_offset 416 3984 3985.align 32 3986.Ladd_proceedx: 3987 movq 0+64(%rsp),%rdx 3988 movq 8+64(%rsp),%r14 3989 leaq -128+64(%rsp),%rsi 3990 movq 16+64(%rsp),%r15 3991 movq 24+64(%rsp),%r8 3992 leaq 96(%rsp),%rdi 3993 call __ecp_nistz256_sqr_montx 3994 3995 movq 448(%rsp),%rdx 3996 leaq 448(%rsp),%rbx 3997 movq 0+0(%rsp),%r9 3998 movq 8+0(%rsp),%r10 3999 leaq -128+0(%rsp),%rsi 4000 movq 16+0(%rsp),%r11 4001 movq 24+0(%rsp),%r12 4002 leaq 352(%rsp),%rdi 4003 call __ecp_nistz256_mul_montx 4004 4005 movq 0+0(%rsp),%rdx 4006 movq 8+0(%rsp),%r14 4007 leaq -128+0(%rsp),%rsi 4008 movq 16+0(%rsp),%r15 4009 movq 24+0(%rsp),%r8 4010 leaq 32(%rsp),%rdi 4011 call __ecp_nistz256_sqr_montx 4012 4013 movq 544(%rsp),%rdx 4014 leaq 544(%rsp),%rbx 4015 movq 0+352(%rsp),%r9 4016 movq 8+352(%rsp),%r10 4017 leaq -128+352(%rsp),%rsi 4018 movq 16+352(%rsp),%r11 4019 movq 24+352(%rsp),%r12 4020 leaq 352(%rsp),%rdi 4021 call __ecp_nistz256_mul_montx 4022 4023 movq 0(%rsp),%rdx 4024 leaq 0(%rsp),%rbx 4025 movq 0+32(%rsp),%r9 4026 movq 8+32(%rsp),%r10 4027 leaq -128+32(%rsp),%rsi 4028 movq 16+32(%rsp),%r11 4029 movq 24+32(%rsp),%r12 4030 leaq 128(%rsp),%rdi 4031 call __ecp_nistz256_mul_montx 4032 4033 movq 160(%rsp),%rdx 4034 leaq 160(%rsp),%rbx 4035 movq 0+32(%rsp),%r9 4036 movq 8+32(%rsp),%r10 4037 leaq -128+32(%rsp),%rsi 4038 movq 16+32(%rsp),%r11 4039 movq 24+32(%rsp),%r12 4040 leaq 192(%rsp),%rdi 4041 call __ecp_nistz256_mul_montx 4042 4043 4044 4045 4046 xorq %r11,%r11 4047 addq %r12,%r12 4048 leaq 96(%rsp),%rsi 4049 adcq %r13,%r13 4050 movq %r12,%rax 4051 adcq %r8,%r8 4052 adcq %r9,%r9 4053 movq %r13,%rbp 4054 adcq $0,%r11 4055 4056 subq $-1,%r12 4057 movq %r8,%rcx 4058 sbbq %r14,%r13 4059 sbbq $0,%r8 4060 movq %r9,%r10 4061 sbbq %r15,%r9 4062 sbbq $0,%r11 4063 4064 cmovcq %rax,%r12 4065 movq 0(%rsi),%rax 4066 cmovcq %rbp,%r13 4067 movq 8(%rsi),%rbp 4068 cmovcq %rcx,%r8 4069 movq 16(%rsi),%rcx 4070 cmovcq %r10,%r9 4071 movq 24(%rsi),%r10 4072 4073 call __ecp_nistz256_subx 4074 4075 leaq 128(%rsp),%rbx 4076 leaq 288(%rsp),%rdi 4077 call __ecp_nistz256_sub_fromx 4078 4079 movq 192+0(%rsp),%rax 4080 movq 192+8(%rsp),%rbp 4081 movq 192+16(%rsp),%rcx 4082 movq 192+24(%rsp),%r10 4083 leaq 320(%rsp),%rdi 4084 4085 call __ecp_nistz256_subx 4086 4087 movq %r12,0(%rdi) 4088 movq %r13,8(%rdi) 4089 movq %r8,16(%rdi) 4090 movq %r9,24(%rdi) 4091 movq 128(%rsp),%rdx 4092 leaq 128(%rsp),%rbx 4093 movq 0+224(%rsp),%r9 4094 movq 8+224(%rsp),%r10 4095 leaq -128+224(%rsp),%rsi 4096 movq 16+224(%rsp),%r11 4097 movq 24+224(%rsp),%r12 4098 leaq 256(%rsp),%rdi 4099 call __ecp_nistz256_mul_montx 4100 4101 movq 320(%rsp),%rdx 4102 leaq 320(%rsp),%rbx 4103 movq 0+64(%rsp),%r9 4104 movq 8+64(%rsp),%r10 4105 leaq -128+64(%rsp),%rsi 4106 movq 16+64(%rsp),%r11 4107 movq 24+64(%rsp),%r12 4108 leaq 320(%rsp),%rdi 4109 call __ecp_nistz256_mul_montx 4110 4111 leaq 256(%rsp),%rbx 4112 leaq 320(%rsp),%rdi 4113 call __ecp_nistz256_sub_fromx 4114 4115.byte 102,72,15,126,199 4116 4117 movdqa %xmm5,%xmm0 4118 movdqa %xmm5,%xmm1 4119 pandn 352(%rsp),%xmm0 4120 movdqa %xmm5,%xmm2 4121 pandn 352+16(%rsp),%xmm1 4122 movdqa %xmm5,%xmm3 4123 pand 544(%rsp),%xmm2 4124 pand 544+16(%rsp),%xmm3 4125 por %xmm0,%xmm2 4126 por %xmm1,%xmm3 4127 4128 movdqa %xmm4,%xmm0 4129 movdqa %xmm4,%xmm1 4130 pandn %xmm2,%xmm0 4131 movdqa %xmm4,%xmm2 4132 pandn %xmm3,%xmm1 4133 movdqa %xmm4,%xmm3 4134 pand 448(%rsp),%xmm2 4135 pand 448+16(%rsp),%xmm3 4136 por %xmm0,%xmm2 4137 por %xmm1,%xmm3 4138 movdqu %xmm2,64(%rdi) 4139 movdqu %xmm3,80(%rdi) 4140 4141 movdqa %xmm5,%xmm0 4142 movdqa %xmm5,%xmm1 4143 pandn 288(%rsp),%xmm0 4144 movdqa %xmm5,%xmm2 4145 pandn 288+16(%rsp),%xmm1 4146 movdqa %xmm5,%xmm3 4147 pand 480(%rsp),%xmm2 4148 pand 480+16(%rsp),%xmm3 4149 por %xmm0,%xmm2 4150 por %xmm1,%xmm3 4151 4152 movdqa %xmm4,%xmm0 4153 movdqa %xmm4,%xmm1 4154 pandn %xmm2,%xmm0 4155 movdqa %xmm4,%xmm2 4156 pandn %xmm3,%xmm1 4157 movdqa %xmm4,%xmm3 4158 pand 384(%rsp),%xmm2 4159 pand 384+16(%rsp),%xmm3 4160 por %xmm0,%xmm2 4161 por %xmm1,%xmm3 4162 movdqu %xmm2,0(%rdi) 4163 movdqu %xmm3,16(%rdi) 4164 4165 movdqa %xmm5,%xmm0 4166 movdqa %xmm5,%xmm1 4167 pandn 320(%rsp),%xmm0 4168 movdqa %xmm5,%xmm2 4169 pandn 320+16(%rsp),%xmm1 4170 movdqa %xmm5,%xmm3 4171 pand 512(%rsp),%xmm2 4172 pand 512+16(%rsp),%xmm3 4173 por %xmm0,%xmm2 4174 por %xmm1,%xmm3 4175 4176 movdqa %xmm4,%xmm0 4177 movdqa %xmm4,%xmm1 4178 pandn %xmm2,%xmm0 4179 movdqa %xmm4,%xmm2 4180 pandn %xmm3,%xmm1 4181 movdqa %xmm4,%xmm3 4182 pand 416(%rsp),%xmm2 4183 pand 416+16(%rsp),%xmm3 4184 por %xmm0,%xmm2 4185 por %xmm1,%xmm3 4186 movdqu %xmm2,32(%rdi) 4187 movdqu %xmm3,48(%rdi) 4188 4189.Ladd_donex: 4190 leaq 576+56(%rsp),%rsi 4191.cfi_def_cfa %rsi,8 4192 movq -48(%rsi),%r15 4193.cfi_restore %r15 4194 movq -40(%rsi),%r14 4195.cfi_restore %r14 4196 movq -32(%rsi),%r13 4197.cfi_restore %r13 4198 movq -24(%rsi),%r12 4199.cfi_restore %r12 4200 movq -16(%rsi),%rbx 4201.cfi_restore %rbx 4202 movq -8(%rsi),%rbp 4203.cfi_restore %rbp 4204 leaq (%rsi),%rsp 4205.cfi_def_cfa_register %rsp 4206.Lpoint_addx_epilogue: 4207 .byte 0xf3,0xc3 4208.cfi_endproc 4209.size p256_point_addx,.-p256_point_addx 4210.type p256_point_add_affinex,@function 4211.align 32 4212p256_point_add_affinex: 4213.cfi_startproc 4214.Lpoint_add_affinex: 4215 pushq %rbp 4216.cfi_adjust_cfa_offset 8 4217.cfi_offset %rbp,-16 4218 pushq %rbx 4219.cfi_adjust_cfa_offset 8 4220.cfi_offset %rbx,-24 4221 pushq %r12 4222.cfi_adjust_cfa_offset 8 4223.cfi_offset %r12,-32 4224 pushq %r13 4225.cfi_adjust_cfa_offset 8 4226.cfi_offset %r13,-40 4227 pushq %r14 4228.cfi_adjust_cfa_offset 8 4229.cfi_offset %r14,-48 4230 pushq %r15 4231.cfi_adjust_cfa_offset 8 4232.cfi_offset %r15,-56 4233 subq $480+8,%rsp 4234.cfi_adjust_cfa_offset 32*15+8 4235.Ladd_affinex_body: 4236 4237 movdqu 0(%rsi),%xmm0 4238 movq %rdx,%rbx 4239 movdqu 16(%rsi),%xmm1 4240 movdqu 32(%rsi),%xmm2 4241 movdqu 48(%rsi),%xmm3 4242 movdqu 64(%rsi),%xmm4 4243 movdqu 80(%rsi),%xmm5 4244 movq 64+0(%rsi),%rdx 4245 movq 64+8(%rsi),%r14 4246 movq 64+16(%rsi),%r15 4247 movq 64+24(%rsi),%r8 4248 movdqa %xmm0,320(%rsp) 4249 movdqa %xmm1,320+16(%rsp) 4250 movdqa %xmm2,352(%rsp) 4251 movdqa %xmm3,352+16(%rsp) 4252 movdqa %xmm4,384(%rsp) 4253 movdqa %xmm5,384+16(%rsp) 4254 por %xmm4,%xmm5 4255 4256 movdqu 0(%rbx),%xmm0 4257 pshufd $0xb1,%xmm5,%xmm3 4258 movdqu 16(%rbx),%xmm1 4259 movdqu 32(%rbx),%xmm2 4260 por %xmm3,%xmm5 4261 movdqu 48(%rbx),%xmm3 4262 movdqa %xmm0,416(%rsp) 4263 pshufd $0x1e,%xmm5,%xmm4 4264 movdqa %xmm1,416+16(%rsp) 4265 por %xmm0,%xmm1 4266.byte 102,72,15,110,199 4267 movdqa %xmm2,448(%rsp) 4268 movdqa %xmm3,448+16(%rsp) 4269 por %xmm2,%xmm3 4270 por %xmm4,%xmm5 4271 pxor %xmm4,%xmm4 4272 por %xmm1,%xmm3 4273 4274 leaq 64-128(%rsi),%rsi 4275 leaq 32(%rsp),%rdi 4276 call __ecp_nistz256_sqr_montx 4277 4278 pcmpeqd %xmm4,%xmm5 4279 pshufd $0xb1,%xmm3,%xmm4 4280 movq 0(%rbx),%rdx 4281 4282 movq %r12,%r9 4283 por %xmm3,%xmm4 4284 pshufd $0,%xmm5,%xmm5 4285 pshufd $0x1e,%xmm4,%xmm3 4286 movq %r13,%r10 4287 por %xmm3,%xmm4 4288 pxor %xmm3,%xmm3 4289 movq %r14,%r11 4290 pcmpeqd %xmm3,%xmm4 4291 pshufd $0,%xmm4,%xmm4 4292 4293 leaq 32-128(%rsp),%rsi 4294 movq %r15,%r12 4295 leaq 0(%rsp),%rdi 4296 call __ecp_nistz256_mul_montx 4297 4298 leaq 320(%rsp),%rbx 4299 leaq 64(%rsp),%rdi 4300 call __ecp_nistz256_sub_fromx 4301 4302 movq 384(%rsp),%rdx 4303 leaq 384(%rsp),%rbx 4304 movq 0+32(%rsp),%r9 4305 movq 8+32(%rsp),%r10 4306 leaq -128+32(%rsp),%rsi 4307 movq 16+32(%rsp),%r11 4308 movq 24+32(%rsp),%r12 4309 leaq 32(%rsp),%rdi 4310 call __ecp_nistz256_mul_montx 4311 4312 movq 384(%rsp),%rdx 4313 leaq 384(%rsp),%rbx 4314 movq 0+64(%rsp),%r9 4315 movq 8+64(%rsp),%r10 4316 leaq -128+64(%rsp),%rsi 4317 movq 16+64(%rsp),%r11 4318 movq 24+64(%rsp),%r12 4319 leaq 288(%rsp),%rdi 4320 call __ecp_nistz256_mul_montx 4321 4322 movq 448(%rsp),%rdx 4323 leaq 448(%rsp),%rbx 4324 movq 0+32(%rsp),%r9 4325 movq 8+32(%rsp),%r10 4326 leaq -128+32(%rsp),%rsi 4327 movq 16+32(%rsp),%r11 4328 movq 24+32(%rsp),%r12 4329 leaq 32(%rsp),%rdi 4330 call __ecp_nistz256_mul_montx 4331 4332 leaq 352(%rsp),%rbx 4333 leaq 96(%rsp),%rdi 4334 call __ecp_nistz256_sub_fromx 4335 4336 movq 0+64(%rsp),%rdx 4337 movq 8+64(%rsp),%r14 4338 leaq -128+64(%rsp),%rsi 4339 movq 16+64(%rsp),%r15 4340 movq 24+64(%rsp),%r8 4341 leaq 128(%rsp),%rdi 4342 call __ecp_nistz256_sqr_montx 4343 4344 movq 0+96(%rsp),%rdx 4345 movq 8+96(%rsp),%r14 4346 leaq -128+96(%rsp),%rsi 4347 movq 16+96(%rsp),%r15 4348 movq 24+96(%rsp),%r8 4349 leaq 192(%rsp),%rdi 4350 call __ecp_nistz256_sqr_montx 4351 4352 movq 128(%rsp),%rdx 4353 leaq 128(%rsp),%rbx 4354 movq 0+64(%rsp),%r9 4355 movq 8+64(%rsp),%r10 4356 leaq -128+64(%rsp),%rsi 4357 movq 16+64(%rsp),%r11 4358 movq 24+64(%rsp),%r12 4359 leaq 160(%rsp),%rdi 4360 call __ecp_nistz256_mul_montx 4361 4362 movq 320(%rsp),%rdx 4363 leaq 320(%rsp),%rbx 4364 movq 0+128(%rsp),%r9 4365 movq 8+128(%rsp),%r10 4366 leaq -128+128(%rsp),%rsi 4367 movq 16+128(%rsp),%r11 4368 movq 24+128(%rsp),%r12 4369 leaq 0(%rsp),%rdi 4370 call __ecp_nistz256_mul_montx 4371 4372 4373 4374 4375 xorq %r11,%r11 4376 addq %r12,%r12 4377 leaq 192(%rsp),%rsi 4378 adcq %r13,%r13 4379 movq %r12,%rax 4380 adcq %r8,%r8 4381 adcq %r9,%r9 4382 movq %r13,%rbp 4383 adcq $0,%r11 4384 4385 subq $-1,%r12 4386 movq %r8,%rcx 4387 sbbq %r14,%r13 4388 sbbq $0,%r8 4389 movq %r9,%r10 4390 sbbq %r15,%r9 4391 sbbq $0,%r11 4392 4393 cmovcq %rax,%r12 4394 movq 0(%rsi),%rax 4395 cmovcq %rbp,%r13 4396 movq 8(%rsi),%rbp 4397 cmovcq %rcx,%r8 4398 movq 16(%rsi),%rcx 4399 cmovcq %r10,%r9 4400 movq 24(%rsi),%r10 4401 4402 call __ecp_nistz256_subx 4403 4404 leaq 160(%rsp),%rbx 4405 leaq 224(%rsp),%rdi 4406 call __ecp_nistz256_sub_fromx 4407 4408 movq 0+0(%rsp),%rax 4409 movq 0+8(%rsp),%rbp 4410 movq 0+16(%rsp),%rcx 4411 movq 0+24(%rsp),%r10 4412 leaq 64(%rsp),%rdi 4413 4414 call __ecp_nistz256_subx 4415 4416 movq %r12,0(%rdi) 4417 movq %r13,8(%rdi) 4418 movq %r8,16(%rdi) 4419 movq %r9,24(%rdi) 4420 movq 352(%rsp),%rdx 4421 leaq 352(%rsp),%rbx 4422 movq 0+160(%rsp),%r9 4423 movq 8+160(%rsp),%r10 4424 leaq -128+160(%rsp),%rsi 4425 movq 16+160(%rsp),%r11 4426 movq 24+160(%rsp),%r12 4427 leaq 32(%rsp),%rdi 4428 call __ecp_nistz256_mul_montx 4429 4430 movq 96(%rsp),%rdx 4431 leaq 96(%rsp),%rbx 4432 movq 0+64(%rsp),%r9 4433 movq 8+64(%rsp),%r10 4434 leaq -128+64(%rsp),%rsi 4435 movq 16+64(%rsp),%r11 4436 movq 24+64(%rsp),%r12 4437 leaq 64(%rsp),%rdi 4438 call __ecp_nistz256_mul_montx 4439 4440 leaq 32(%rsp),%rbx 4441 leaq 256(%rsp),%rdi 4442 call __ecp_nistz256_sub_fromx 4443 4444.byte 102,72,15,126,199 4445 4446 movdqa %xmm5,%xmm0 4447 movdqa %xmm5,%xmm1 4448 pandn 288(%rsp),%xmm0 4449 movdqa %xmm5,%xmm2 4450 pandn 288+16(%rsp),%xmm1 4451 movdqa %xmm5,%xmm3 4452 pand .LONE_mont(%rip),%xmm2 4453 pand .LONE_mont+16(%rip),%xmm3 4454 por %xmm0,%xmm2 4455 por %xmm1,%xmm3 4456 4457 movdqa %xmm4,%xmm0 4458 movdqa %xmm4,%xmm1 4459 pandn %xmm2,%xmm0 4460 movdqa %xmm4,%xmm2 4461 pandn %xmm3,%xmm1 4462 movdqa %xmm4,%xmm3 4463 pand 384(%rsp),%xmm2 4464 pand 384+16(%rsp),%xmm3 4465 por %xmm0,%xmm2 4466 por %xmm1,%xmm3 4467 movdqu %xmm2,64(%rdi) 4468 movdqu %xmm3,80(%rdi) 4469 4470 movdqa %xmm5,%xmm0 4471 movdqa %xmm5,%xmm1 4472 pandn 224(%rsp),%xmm0 4473 movdqa %xmm5,%xmm2 4474 pandn 224+16(%rsp),%xmm1 4475 movdqa %xmm5,%xmm3 4476 pand 416(%rsp),%xmm2 4477 pand 416+16(%rsp),%xmm3 4478 por %xmm0,%xmm2 4479 por %xmm1,%xmm3 4480 4481 movdqa %xmm4,%xmm0 4482 movdqa %xmm4,%xmm1 4483 pandn %xmm2,%xmm0 4484 movdqa %xmm4,%xmm2 4485 pandn %xmm3,%xmm1 4486 movdqa %xmm4,%xmm3 4487 pand 320(%rsp),%xmm2 4488 pand 320+16(%rsp),%xmm3 4489 por %xmm0,%xmm2 4490 por %xmm1,%xmm3 4491 movdqu %xmm2,0(%rdi) 4492 movdqu %xmm3,16(%rdi) 4493 4494 movdqa %xmm5,%xmm0 4495 movdqa %xmm5,%xmm1 4496 pandn 256(%rsp),%xmm0 4497 movdqa %xmm5,%xmm2 4498 pandn 256+16(%rsp),%xmm1 4499 movdqa %xmm5,%xmm3 4500 pand 448(%rsp),%xmm2 4501 pand 448+16(%rsp),%xmm3 4502 por %xmm0,%xmm2 4503 por %xmm1,%xmm3 4504 4505 movdqa %xmm4,%xmm0 4506 movdqa %xmm4,%xmm1 4507 pandn %xmm2,%xmm0 4508 movdqa %xmm4,%xmm2 4509 pandn %xmm3,%xmm1 4510 movdqa %xmm4,%xmm3 4511 pand 352(%rsp),%xmm2 4512 pand 352+16(%rsp),%xmm3 4513 por %xmm0,%xmm2 4514 por %xmm1,%xmm3 4515 movdqu %xmm2,32(%rdi) 4516 movdqu %xmm3,48(%rdi) 4517 4518 leaq 480+56(%rsp),%rsi 4519.cfi_def_cfa %rsi,8 4520 movq -48(%rsi),%r15 4521.cfi_restore %r15 4522 movq -40(%rsi),%r14 4523.cfi_restore %r14 4524 movq -32(%rsi),%r13 4525.cfi_restore %r13 4526 movq -24(%rsi),%r12 4527.cfi_restore %r12 4528 movq -16(%rsi),%rbx 4529.cfi_restore %rbx 4530 movq -8(%rsi),%rbp 4531.cfi_restore %rbp 4532 leaq (%rsi),%rsp 4533.cfi_def_cfa_register %rsp 4534.Ladd_affinex_epilogue: 4535 .byte 0xf3,0xc3 4536.cfi_endproc 4537.size p256_point_add_affinex,.-p256_point_add_affinex 4538#endif 4539.section .note.GNU-stack,"",@progbits 4540