1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7EXTERN OPENSSL_ia32cap_P 8 9 10ALIGN 64 11$L$poly: 12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 13 14$L$One: 15 DD 1,1,1,1,1,1,1,1 16$L$Two: 17 DD 2,2,2,2,2,2,2,2 18$L$Three: 19 DD 3,3,3,3,3,3,3,3 20$L$ONE_mont: 21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe 22 23 24 25global ecp_nistz256_neg 26 27ALIGN 32 28ecp_nistz256_neg: 29 mov QWORD[8+rsp],rdi ;WIN64 prologue 30 mov QWORD[16+rsp],rsi 31 mov rax,rsp 32$L$SEH_begin_ecp_nistz256_neg: 33 mov rdi,rcx 34 mov rsi,rdx 35 36 37 push r12 38 push r13 39 40 xor r8,r8 41 xor r9,r9 42 xor r10,r10 43 xor r11,r11 44 xor r13,r13 45 46 sub r8,QWORD[rsi] 47 sbb r9,QWORD[8+rsi] 48 sbb r10,QWORD[16+rsi] 49 mov rax,r8 50 sbb r11,QWORD[24+rsi] 51 lea rsi,[$L$poly] 52 mov rdx,r9 53 sbb r13,0 54 55 add r8,QWORD[rsi] 56 mov rcx,r10 57 adc r9,QWORD[8+rsi] 58 adc r10,QWORD[16+rsi] 59 mov r12,r11 60 adc r11,QWORD[24+rsi] 61 test r13,r13 62 63 cmovz r8,rax 64 cmovz r9,rdx 65 mov QWORD[rdi],r8 66 cmovz r10,rcx 67 mov QWORD[8+rdi],r9 68 cmovz r11,r12 69 mov QWORD[16+rdi],r10 70 mov QWORD[24+rdi],r11 71 72 pop r13 73 pop r12 74 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 75 mov rsi,QWORD[16+rsp] 76 DB 0F3h,0C3h ;repret 77$L$SEH_end_ecp_nistz256_neg: 78 79 80 81 82 83 84global ecp_nistz256_mul_mont 85 86ALIGN 32 87ecp_nistz256_mul_mont: 88 mov QWORD[8+rsp],rdi ;WIN64 prologue 89 mov QWORD[16+rsp],rsi 90 mov rax,rsp 91$L$SEH_begin_ecp_nistz256_mul_mont: 92 mov rdi,rcx 93 mov rsi,rdx 94 mov rdx,r8 95 96 97$L$mul_mont: 98 push rbp 99 push rbx 100 push r12 101 push r13 102 push r14 103 push r15 104 mov rbx,rdx 105 mov rax,QWORD[rdx] 106 mov r9,QWORD[rsi] 107 mov r10,QWORD[8+rsi] 108 mov r11,QWORD[16+rsi] 109 mov r12,QWORD[24+rsi] 110 111 call __ecp_nistz256_mul_montq 112$L$mul_mont_done: 113 pop r15 114 pop r14 115 pop r13 116 pop r12 117 pop rbx 118 pop rbp 119 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 120 mov rsi,QWORD[16+rsp] 121 DB 0F3h,0C3h ;repret 122$L$SEH_end_ecp_nistz256_mul_mont: 123 124 125ALIGN 32 126__ecp_nistz256_mul_montq: 127 128 129 mov rbp,rax 130 mul r9 131 mov r14,QWORD[(($L$poly+8))] 132 mov r8,rax 133 mov rax,rbp 134 mov r9,rdx 135 136 mul r10 137 mov r15,QWORD[(($L$poly+24))] 138 add r9,rax 139 mov rax,rbp 140 adc rdx,0 141 mov r10,rdx 142 143 mul r11 144 add r10,rax 145 mov rax,rbp 146 adc rdx,0 147 mov r11,rdx 148 149 mul r12 150 add r11,rax 151 mov rax,r8 152 adc rdx,0 153 xor r13,r13 154 mov r12,rdx 155 156 157 158 159 160 161 162 163 164 165 mov rbp,r8 166 shl r8,32 167 mul r15 168 shr rbp,32 169 add r9,r8 170 adc r10,rbp 171 adc r11,rax 172 mov rax,QWORD[8+rbx] 173 adc r12,rdx 174 adc r13,0 175 xor r8,r8 176 177 178 179 mov rbp,rax 180 mul QWORD[rsi] 181 add r9,rax 182 mov rax,rbp 183 adc rdx,0 184 mov rcx,rdx 185 186 mul QWORD[8+rsi] 187 add r10,rcx 188 adc rdx,0 189 add r10,rax 190 mov rax,rbp 191 adc rdx,0 192 mov rcx,rdx 193 194 mul QWORD[16+rsi] 195 add r11,rcx 196 adc rdx,0 197 add r11,rax 198 mov rax,rbp 199 adc rdx,0 200 mov rcx,rdx 201 202 mul QWORD[24+rsi] 203 add r12,rcx 204 adc rdx,0 205 add r12,rax 206 mov rax,r9 207 adc r13,rdx 208 adc r8,0 209 210 211 212 mov rbp,r9 213 shl r9,32 214 mul r15 215 shr rbp,32 216 add r10,r9 217 adc r11,rbp 218 adc r12,rax 219 mov rax,QWORD[16+rbx] 220 adc r13,rdx 221 adc r8,0 222 xor r9,r9 223 224 225 226 mov rbp,rax 227 mul QWORD[rsi] 228 add r10,rax 229 mov rax,rbp 230 adc rdx,0 231 mov rcx,rdx 232 233 mul QWORD[8+rsi] 234 add r11,rcx 235 adc rdx,0 236 add r11,rax 237 mov rax,rbp 238 adc rdx,0 239 mov rcx,rdx 240 241 mul QWORD[16+rsi] 242 add r12,rcx 243 adc rdx,0 244 add r12,rax 245 mov rax,rbp 246 adc rdx,0 247 mov rcx,rdx 248 249 mul QWORD[24+rsi] 250 add r13,rcx 251 adc rdx,0 252 add r13,rax 253 mov rax,r10 254 adc r8,rdx 255 adc r9,0 256 257 258 259 mov rbp,r10 260 shl r10,32 261 mul r15 262 shr rbp,32 263 add r11,r10 264 adc r12,rbp 265 adc r13,rax 266 mov rax,QWORD[24+rbx] 267 adc r8,rdx 268 adc r9,0 269 xor r10,r10 270 271 272 273 mov rbp,rax 274 mul QWORD[rsi] 275 add r11,rax 276 mov rax,rbp 277 adc rdx,0 278 mov rcx,rdx 279 280 mul QWORD[8+rsi] 281 add r12,rcx 282 adc rdx,0 283 add r12,rax 284 mov rax,rbp 285 adc rdx,0 286 mov rcx,rdx 287 288 mul QWORD[16+rsi] 289 add r13,rcx 290 adc rdx,0 291 add r13,rax 292 mov rax,rbp 293 adc rdx,0 294 mov rcx,rdx 295 296 mul QWORD[24+rsi] 297 add r8,rcx 298 adc rdx,0 299 add r8,rax 300 mov rax,r11 301 adc r9,rdx 302 adc r10,0 303 304 305 306 mov rbp,r11 307 shl r11,32 308 mul r15 309 shr rbp,32 310 add r12,r11 311 adc r13,rbp 312 mov rcx,r12 313 adc r8,rax 314 adc r9,rdx 315 mov rbp,r13 316 adc r10,0 317 318 319 320 sub r12,-1 321 mov rbx,r8 322 sbb r13,r14 323 sbb r8,0 324 mov rdx,r9 325 sbb r9,r15 326 sbb r10,0 327 328 cmovc r12,rcx 329 cmovc r13,rbp 330 mov QWORD[rdi],r12 331 cmovc r8,rbx 332 mov QWORD[8+rdi],r13 333 cmovc r9,rdx 334 mov QWORD[16+rdi],r8 335 mov QWORD[24+rdi],r9 336 337 DB 0F3h,0C3h ;repret 338 339 340 341 342 343 344 345 346 347global ecp_nistz256_sqr_mont 348 349ALIGN 32 350ecp_nistz256_sqr_mont: 351 mov QWORD[8+rsp],rdi ;WIN64 prologue 352 mov QWORD[16+rsp],rsi 353 mov rax,rsp 354$L$SEH_begin_ecp_nistz256_sqr_mont: 355 mov rdi,rcx 356 mov rsi,rdx 357 358 359 push rbp 360 push rbx 361 push r12 362 push r13 363 push r14 364 push r15 365 mov rax,QWORD[rsi] 366 mov r14,QWORD[8+rsi] 367 mov r15,QWORD[16+rsi] 368 mov r8,QWORD[24+rsi] 369 370 call __ecp_nistz256_sqr_montq 371$L$sqr_mont_done: 372 pop r15 373 pop r14 374 pop r13 375 pop r12 376 pop rbx 377 pop rbp 378 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 379 mov rsi,QWORD[16+rsp] 380 DB 0F3h,0C3h ;repret 381$L$SEH_end_ecp_nistz256_sqr_mont: 382 383 384ALIGN 32 385__ecp_nistz256_sqr_montq: 386 mov r13,rax 387 mul r14 388 mov r9,rax 389 mov rax,r15 390 mov r10,rdx 391 392 mul r13 393 add r10,rax 394 mov rax,r8 395 adc rdx,0 396 mov r11,rdx 397 398 mul r13 399 add r11,rax 400 mov rax,r15 401 adc rdx,0 402 mov r12,rdx 403 404 405 mul r14 406 add r11,rax 407 mov rax,r8 408 adc rdx,0 409 mov rbp,rdx 410 411 mul r14 412 add r12,rax 413 mov rax,r8 414 adc rdx,0 415 add r12,rbp 416 mov r13,rdx 417 adc r13,0 418 419 420 mul r15 421 xor r15,r15 422 add r13,rax 423 mov rax,QWORD[rsi] 424 mov r14,rdx 425 adc r14,0 426 427 add r9,r9 428 adc r10,r10 429 adc r11,r11 430 adc r12,r12 431 adc r13,r13 432 adc r14,r14 433 adc r15,0 434 435 mul rax 436 mov r8,rax 437 mov rax,QWORD[8+rsi] 438 mov rcx,rdx 439 440 mul rax 441 add r9,rcx 442 adc r10,rax 443 mov rax,QWORD[16+rsi] 444 adc rdx,0 445 mov rcx,rdx 446 447 mul rax 448 add r11,rcx 449 adc r12,rax 450 mov rax,QWORD[24+rsi] 451 adc rdx,0 452 mov rcx,rdx 453 454 mul rax 455 add r13,rcx 456 adc r14,rax 457 mov rax,r8 458 adc r15,rdx 459 460 mov rsi,QWORD[(($L$poly+8))] 461 mov rbp,QWORD[(($L$poly+24))] 462 463 464 465 466 mov rcx,r8 467 shl r8,32 468 mul rbp 469 shr rcx,32 470 add r9,r8 471 adc r10,rcx 472 adc r11,rax 473 mov rax,r9 474 adc rdx,0 475 476 477 478 mov rcx,r9 479 shl r9,32 480 mov r8,rdx 481 mul rbp 482 shr rcx,32 483 add r10,r9 484 adc r11,rcx 485 adc r8,rax 486 mov rax,r10 487 adc rdx,0 488 489 490 491 mov rcx,r10 492 shl r10,32 493 mov r9,rdx 494 mul rbp 495 shr rcx,32 496 add r11,r10 497 adc r8,rcx 498 adc r9,rax 499 mov rax,r11 500 adc rdx,0 501 502 503 504 mov rcx,r11 505 shl r11,32 506 mov r10,rdx 507 mul rbp 508 shr rcx,32 509 add r8,r11 510 adc r9,rcx 511 adc r10,rax 512 adc rdx,0 513 xor r11,r11 514 515 516 517 add r12,r8 518 adc r13,r9 519 mov r8,r12 520 adc r14,r10 521 adc r15,rdx 522 mov r9,r13 523 adc r11,0 524 525 sub r12,-1 526 mov r10,r14 527 sbb r13,rsi 528 sbb r14,0 529 mov rcx,r15 530 sbb r15,rbp 531 sbb r11,0 532 533 cmovc r12,r8 534 cmovc r13,r9 535 mov QWORD[rdi],r12 536 cmovc r14,r10 537 mov QWORD[8+rdi],r13 538 cmovc r15,rcx 539 mov QWORD[16+rdi],r14 540 mov QWORD[24+rdi],r15 541 542 DB 0F3h,0C3h ;repret 543 544 545 546global ecp_nistz256_select_w5 547 548ALIGN 32 549ecp_nistz256_select_w5: 550 lea rax,[OPENSSL_ia32cap_P] 551 mov rax,QWORD[8+rax] 552 test eax,32 553 jnz NEAR $L$avx2_select_w5 554 lea rax,[((-136))+rsp] 555$L$SEH_begin_ecp_nistz256_select_w5: 556DB 0x48,0x8d,0x60,0xe0 557DB 0x0f,0x29,0x70,0xe0 558DB 0x0f,0x29,0x78,0xf0 559DB 0x44,0x0f,0x29,0x00 560DB 0x44,0x0f,0x29,0x48,0x10 561DB 0x44,0x0f,0x29,0x50,0x20 562DB 0x44,0x0f,0x29,0x58,0x30 563DB 0x44,0x0f,0x29,0x60,0x40 564DB 0x44,0x0f,0x29,0x68,0x50 565DB 0x44,0x0f,0x29,0x70,0x60 566DB 0x44,0x0f,0x29,0x78,0x70 567 movdqa xmm0,XMMWORD[$L$One] 568 movd xmm1,r8d 569 570 pxor xmm2,xmm2 571 pxor xmm3,xmm3 572 pxor xmm4,xmm4 573 pxor xmm5,xmm5 574 pxor xmm6,xmm6 575 pxor xmm7,xmm7 576 577 movdqa xmm8,xmm0 578 pshufd xmm1,xmm1,0 579 580 mov rax,16 581$L$select_loop_sse_w5: 582 583 movdqa xmm15,xmm8 584 paddd xmm8,xmm0 585 pcmpeqd xmm15,xmm1 586 587 movdqa xmm9,XMMWORD[rdx] 588 movdqa xmm10,XMMWORD[16+rdx] 589 movdqa xmm11,XMMWORD[32+rdx] 590 movdqa xmm12,XMMWORD[48+rdx] 591 movdqa xmm13,XMMWORD[64+rdx] 592 movdqa xmm14,XMMWORD[80+rdx] 593 lea rdx,[96+rdx] 594 595 pand xmm9,xmm15 596 pand xmm10,xmm15 597 por xmm2,xmm9 598 pand xmm11,xmm15 599 por xmm3,xmm10 600 pand xmm12,xmm15 601 por xmm4,xmm11 602 pand xmm13,xmm15 603 por xmm5,xmm12 604 pand xmm14,xmm15 605 por xmm6,xmm13 606 por xmm7,xmm14 607 608 dec rax 609 jnz NEAR $L$select_loop_sse_w5 610 611 movdqu XMMWORD[rcx],xmm2 612 movdqu XMMWORD[16+rcx],xmm3 613 movdqu XMMWORD[32+rcx],xmm4 614 movdqu XMMWORD[48+rcx],xmm5 615 movdqu XMMWORD[64+rcx],xmm6 616 movdqu XMMWORD[80+rcx],xmm7 617 movaps xmm6,XMMWORD[rsp] 618 movaps xmm7,XMMWORD[16+rsp] 619 movaps xmm8,XMMWORD[32+rsp] 620 movaps xmm9,XMMWORD[48+rsp] 621 movaps xmm10,XMMWORD[64+rsp] 622 movaps xmm11,XMMWORD[80+rsp] 623 movaps xmm12,XMMWORD[96+rsp] 624 movaps xmm13,XMMWORD[112+rsp] 625 movaps xmm14,XMMWORD[128+rsp] 626 movaps xmm15,XMMWORD[144+rsp] 627 lea rsp,[168+rsp] 628$L$SEH_end_ecp_nistz256_select_w5: 629 DB 0F3h,0C3h ;repret 630 631 632 633 634global ecp_nistz256_select_w7 635 636ALIGN 32 637ecp_nistz256_select_w7: 638 lea rax,[OPENSSL_ia32cap_P] 639 mov rax,QWORD[8+rax] 640 test eax,32 641 jnz NEAR $L$avx2_select_w7 642 lea rax,[((-136))+rsp] 643$L$SEH_begin_ecp_nistz256_select_w7: 644DB 0x48,0x8d,0x60,0xe0 645DB 0x0f,0x29,0x70,0xe0 646DB 0x0f,0x29,0x78,0xf0 647DB 0x44,0x0f,0x29,0x00 648DB 0x44,0x0f,0x29,0x48,0x10 649DB 0x44,0x0f,0x29,0x50,0x20 650DB 0x44,0x0f,0x29,0x58,0x30 651DB 0x44,0x0f,0x29,0x60,0x40 652DB 0x44,0x0f,0x29,0x68,0x50 653DB 0x44,0x0f,0x29,0x70,0x60 654DB 0x44,0x0f,0x29,0x78,0x70 655 movdqa xmm8,XMMWORD[$L$One] 656 movd xmm1,r8d 657 658 pxor xmm2,xmm2 659 pxor xmm3,xmm3 660 pxor xmm4,xmm4 661 pxor xmm5,xmm5 662 663 movdqa xmm0,xmm8 664 pshufd xmm1,xmm1,0 665 mov rax,64 666 667$L$select_loop_sse_w7: 668 movdqa xmm15,xmm8 669 paddd xmm8,xmm0 670 movdqa xmm9,XMMWORD[rdx] 671 movdqa xmm10,XMMWORD[16+rdx] 672 pcmpeqd xmm15,xmm1 673 movdqa xmm11,XMMWORD[32+rdx] 674 movdqa xmm12,XMMWORD[48+rdx] 675 lea rdx,[64+rdx] 676 677 pand xmm9,xmm15 678 pand xmm10,xmm15 679 por xmm2,xmm9 680 pand xmm11,xmm15 681 por xmm3,xmm10 682 pand xmm12,xmm15 683 por xmm4,xmm11 684 prefetcht0 [255+rdx] 685 por xmm5,xmm12 686 687 dec rax 688 jnz NEAR $L$select_loop_sse_w7 689 690 movdqu XMMWORD[rcx],xmm2 691 movdqu XMMWORD[16+rcx],xmm3 692 movdqu XMMWORD[32+rcx],xmm4 693 movdqu XMMWORD[48+rcx],xmm5 694 movaps xmm6,XMMWORD[rsp] 695 movaps xmm7,XMMWORD[16+rsp] 696 movaps xmm8,XMMWORD[32+rsp] 697 movaps xmm9,XMMWORD[48+rsp] 698 movaps xmm10,XMMWORD[64+rsp] 699 movaps xmm11,XMMWORD[80+rsp] 700 movaps xmm12,XMMWORD[96+rsp] 701 movaps xmm13,XMMWORD[112+rsp] 702 movaps xmm14,XMMWORD[128+rsp] 703 movaps xmm15,XMMWORD[144+rsp] 704 lea rsp,[168+rsp] 705$L$SEH_end_ecp_nistz256_select_w7: 706 DB 0F3h,0C3h ;repret 707 708 709 710 711ALIGN 32 712ecp_nistz256_avx2_select_w5: 713$L$avx2_select_w5: 714 vzeroupper 715 lea rax,[((-136))+rsp] 716$L$SEH_begin_ecp_nistz256_avx2_select_w5: 717DB 0x48,0x8d,0x60,0xe0 718DB 0xc5,0xf8,0x29,0x70,0xe0 719DB 0xc5,0xf8,0x29,0x78,0xf0 720DB 0xc5,0x78,0x29,0x40,0x00 721DB 0xc5,0x78,0x29,0x48,0x10 722DB 0xc5,0x78,0x29,0x50,0x20 723DB 0xc5,0x78,0x29,0x58,0x30 724DB 0xc5,0x78,0x29,0x60,0x40 725DB 0xc5,0x78,0x29,0x68,0x50 726DB 0xc5,0x78,0x29,0x70,0x60 727DB 0xc5,0x78,0x29,0x78,0x70 728 vmovdqa ymm0,YMMWORD[$L$Two] 729 730 vpxor ymm2,ymm2,ymm2 731 vpxor ymm3,ymm3,ymm3 732 vpxor ymm4,ymm4,ymm4 733 734 vmovdqa ymm5,YMMWORD[$L$One] 735 vmovdqa ymm10,YMMWORD[$L$Two] 736 737 vmovd xmm1,r8d 738 vpermd ymm1,ymm2,ymm1 739 740 mov rax,8 741$L$select_loop_avx2_w5: 742 743 vmovdqa ymm6,YMMWORD[rdx] 744 vmovdqa ymm7,YMMWORD[32+rdx] 745 vmovdqa ymm8,YMMWORD[64+rdx] 746 747 vmovdqa ymm11,YMMWORD[96+rdx] 748 vmovdqa ymm12,YMMWORD[128+rdx] 749 vmovdqa ymm13,YMMWORD[160+rdx] 750 751 vpcmpeqd ymm9,ymm5,ymm1 752 vpcmpeqd ymm14,ymm10,ymm1 753 754 vpaddd ymm5,ymm5,ymm0 755 vpaddd ymm10,ymm10,ymm0 756 lea rdx,[192+rdx] 757 758 vpand ymm6,ymm6,ymm9 759 vpand ymm7,ymm7,ymm9 760 vpand ymm8,ymm8,ymm9 761 vpand ymm11,ymm11,ymm14 762 vpand ymm12,ymm12,ymm14 763 vpand ymm13,ymm13,ymm14 764 765 vpxor ymm2,ymm2,ymm6 766 vpxor ymm3,ymm3,ymm7 767 vpxor ymm4,ymm4,ymm8 768 vpxor ymm2,ymm2,ymm11 769 vpxor ymm3,ymm3,ymm12 770 vpxor ymm4,ymm4,ymm13 771 772 dec rax 773 jnz NEAR $L$select_loop_avx2_w5 774 775 vmovdqu YMMWORD[rcx],ymm2 776 vmovdqu YMMWORD[32+rcx],ymm3 777 vmovdqu YMMWORD[64+rcx],ymm4 778 vzeroupper 779 movaps xmm6,XMMWORD[rsp] 780 movaps xmm7,XMMWORD[16+rsp] 781 movaps xmm8,XMMWORD[32+rsp] 782 movaps xmm9,XMMWORD[48+rsp] 783 movaps xmm10,XMMWORD[64+rsp] 784 movaps xmm11,XMMWORD[80+rsp] 785 movaps xmm12,XMMWORD[96+rsp] 786 movaps xmm13,XMMWORD[112+rsp] 787 movaps xmm14,XMMWORD[128+rsp] 788 movaps xmm15,XMMWORD[144+rsp] 789 lea rsp,[168+rsp] 790$L$SEH_end_ecp_nistz256_avx2_select_w5: 791 DB 0F3h,0C3h ;repret 792 793 794 795 796global ecp_nistz256_avx2_select_w7 797 798ALIGN 32 799ecp_nistz256_avx2_select_w7: 800$L$avx2_select_w7: 801 vzeroupper 802 lea rax,[((-136))+rsp] 803$L$SEH_begin_ecp_nistz256_avx2_select_w7: 804DB 0x48,0x8d,0x60,0xe0 805DB 0xc5,0xf8,0x29,0x70,0xe0 806DB 0xc5,0xf8,0x29,0x78,0xf0 807DB 0xc5,0x78,0x29,0x40,0x00 808DB 0xc5,0x78,0x29,0x48,0x10 809DB 0xc5,0x78,0x29,0x50,0x20 810DB 0xc5,0x78,0x29,0x58,0x30 811DB 0xc5,0x78,0x29,0x60,0x40 812DB 0xc5,0x78,0x29,0x68,0x50 813DB 0xc5,0x78,0x29,0x70,0x60 814DB 0xc5,0x78,0x29,0x78,0x70 815 vmovdqa ymm0,YMMWORD[$L$Three] 816 817 vpxor ymm2,ymm2,ymm2 818 vpxor ymm3,ymm3,ymm3 819 820 vmovdqa ymm4,YMMWORD[$L$One] 821 vmovdqa ymm8,YMMWORD[$L$Two] 822 vmovdqa ymm12,YMMWORD[$L$Three] 823 824 vmovd xmm1,r8d 825 vpermd ymm1,ymm2,ymm1 826 827 828 mov rax,21 829$L$select_loop_avx2_w7: 830 831 vmovdqa ymm5,YMMWORD[rdx] 832 vmovdqa ymm6,YMMWORD[32+rdx] 833 834 vmovdqa ymm9,YMMWORD[64+rdx] 835 vmovdqa ymm10,YMMWORD[96+rdx] 836 837 vmovdqa ymm13,YMMWORD[128+rdx] 838 vmovdqa ymm14,YMMWORD[160+rdx] 839 840 vpcmpeqd ymm7,ymm4,ymm1 841 vpcmpeqd ymm11,ymm8,ymm1 842 vpcmpeqd ymm15,ymm12,ymm1 843 844 vpaddd ymm4,ymm4,ymm0 845 vpaddd ymm8,ymm8,ymm0 846 vpaddd ymm12,ymm12,ymm0 847 lea rdx,[192+rdx] 848 849 vpand ymm5,ymm5,ymm7 850 vpand ymm6,ymm6,ymm7 851 vpand ymm9,ymm9,ymm11 852 vpand ymm10,ymm10,ymm11 853 vpand ymm13,ymm13,ymm15 854 vpand ymm14,ymm14,ymm15 855 856 vpxor ymm2,ymm2,ymm5 857 vpxor ymm3,ymm3,ymm6 858 vpxor ymm2,ymm2,ymm9 859 vpxor ymm3,ymm3,ymm10 860 vpxor ymm2,ymm2,ymm13 861 vpxor ymm3,ymm3,ymm14 862 863 dec rax 864 jnz NEAR $L$select_loop_avx2_w7 865 866 867 vmovdqa ymm5,YMMWORD[rdx] 868 vmovdqa ymm6,YMMWORD[32+rdx] 869 870 vpcmpeqd ymm7,ymm4,ymm1 871 872 vpand ymm5,ymm5,ymm7 873 vpand ymm6,ymm6,ymm7 874 875 vpxor ymm2,ymm2,ymm5 876 vpxor ymm3,ymm3,ymm6 877 878 vmovdqu YMMWORD[rcx],ymm2 879 vmovdqu YMMWORD[32+rcx],ymm3 880 vzeroupper 881 movaps xmm6,XMMWORD[rsp] 882 movaps xmm7,XMMWORD[16+rsp] 883 movaps xmm8,XMMWORD[32+rsp] 884 movaps xmm9,XMMWORD[48+rsp] 885 movaps xmm10,XMMWORD[64+rsp] 886 movaps xmm11,XMMWORD[80+rsp] 887 movaps xmm12,XMMWORD[96+rsp] 888 movaps xmm13,XMMWORD[112+rsp] 889 movaps xmm14,XMMWORD[128+rsp] 890 movaps xmm15,XMMWORD[144+rsp] 891 lea rsp,[168+rsp] 892$L$SEH_end_ecp_nistz256_avx2_select_w7: 893 DB 0F3h,0C3h ;repret 894 895 896ALIGN 32 897__ecp_nistz256_add_toq: 898 xor r11,r11 899 add r12,QWORD[rbx] 900 adc r13,QWORD[8+rbx] 901 mov rax,r12 902 adc r8,QWORD[16+rbx] 903 adc r9,QWORD[24+rbx] 904 mov rbp,r13 905 adc r11,0 906 907 sub r12,-1 908 mov rcx,r8 909 sbb r13,r14 910 sbb r8,0 911 mov r10,r9 912 sbb r9,r15 913 sbb r11,0 914 915 cmovc r12,rax 916 cmovc r13,rbp 917 mov QWORD[rdi],r12 918 cmovc r8,rcx 919 mov QWORD[8+rdi],r13 920 cmovc r9,r10 921 mov QWORD[16+rdi],r8 922 mov QWORD[24+rdi],r9 923 924 DB 0F3h,0C3h ;repret 925 926 927 928ALIGN 32 929__ecp_nistz256_sub_fromq: 930 sub r12,QWORD[rbx] 931 sbb r13,QWORD[8+rbx] 932 mov rax,r12 933 sbb r8,QWORD[16+rbx] 934 sbb r9,QWORD[24+rbx] 935 mov rbp,r13 936 sbb r11,r11 937 938 add r12,-1 939 mov rcx,r8 940 adc r13,r14 941 adc r8,0 942 mov r10,r9 943 adc r9,r15 944 test r11,r11 945 946 cmovz r12,rax 947 cmovz r13,rbp 948 mov QWORD[rdi],r12 949 cmovz r8,rcx 950 mov QWORD[8+rdi],r13 951 cmovz r9,r10 952 mov QWORD[16+rdi],r8 953 mov QWORD[24+rdi],r9 954 955 DB 0F3h,0C3h ;repret 956 957 958 959ALIGN 32 960__ecp_nistz256_subq: 961 sub rax,r12 962 sbb rbp,r13 963 mov r12,rax 964 sbb rcx,r8 965 sbb r10,r9 966 mov r13,rbp 967 sbb r11,r11 968 969 add rax,-1 970 mov r8,rcx 971 adc rbp,r14 972 adc rcx,0 973 mov r9,r10 974 adc r10,r15 975 test r11,r11 976 977 cmovnz r12,rax 978 cmovnz r13,rbp 979 cmovnz r8,rcx 980 cmovnz r9,r10 981 982 DB 0F3h,0C3h ;repret 983 984 985 986ALIGN 32 987__ecp_nistz256_mul_by_2q: 988 xor r11,r11 989 add r12,r12 990 adc r13,r13 991 mov rax,r12 992 adc r8,r8 993 adc r9,r9 994 mov rbp,r13 995 adc r11,0 996 997 sub r12,-1 998 mov rcx,r8 999 sbb r13,r14 1000 sbb r8,0 1001 mov r10,r9 1002 sbb r9,r15 1003 sbb r11,0 1004 1005 cmovc r12,rax 1006 cmovc r13,rbp 1007 mov QWORD[rdi],r12 1008 cmovc r8,rcx 1009 mov QWORD[8+rdi],r13 1010 cmovc r9,r10 1011 mov QWORD[16+rdi],r8 1012 mov QWORD[24+rdi],r9 1013 1014 DB 0F3h,0C3h ;repret 1015 1016global ecp_nistz256_point_double 1017 1018ALIGN 32 1019ecp_nistz256_point_double: 1020 mov QWORD[8+rsp],rdi ;WIN64 prologue 1021 mov QWORD[16+rsp],rsi 1022 mov rax,rsp 1023$L$SEH_begin_ecp_nistz256_point_double: 1024 mov rdi,rcx 1025 mov rsi,rdx 1026 1027 1028 push rbp 1029 push rbx 1030 push r12 1031 push r13 1032 push r14 1033 push r15 1034 sub rsp,32*5+8 1035 1036$L$point_double_shortcutq: 1037 movdqu xmm0,XMMWORD[rsi] 1038 mov rbx,rsi 1039 movdqu xmm1,XMMWORD[16+rsi] 1040 mov r12,QWORD[((32+0))+rsi] 1041 mov r13,QWORD[((32+8))+rsi] 1042 mov r8,QWORD[((32+16))+rsi] 1043 mov r9,QWORD[((32+24))+rsi] 1044 mov r14,QWORD[(($L$poly+8))] 1045 mov r15,QWORD[(($L$poly+24))] 1046 movdqa XMMWORD[96+rsp],xmm0 1047 movdqa XMMWORD[(96+16)+rsp],xmm1 1048 lea r10,[32+rdi] 1049 lea r11,[64+rdi] 1050DB 102,72,15,110,199 1051DB 102,73,15,110,202 1052DB 102,73,15,110,211 1053 1054 lea rdi,[rsp] 1055 call __ecp_nistz256_mul_by_2q 1056 1057 mov rax,QWORD[((64+0))+rsi] 1058 mov r14,QWORD[((64+8))+rsi] 1059 mov r15,QWORD[((64+16))+rsi] 1060 mov r8,QWORD[((64+24))+rsi] 1061 lea rsi,[((64-0))+rsi] 1062 lea rdi,[64+rsp] 1063 call __ecp_nistz256_sqr_montq 1064 1065 mov rax,QWORD[((0+0))+rsp] 1066 mov r14,QWORD[((8+0))+rsp] 1067 lea rsi,[((0+0))+rsp] 1068 mov r15,QWORD[((16+0))+rsp] 1069 mov r8,QWORD[((24+0))+rsp] 1070 lea rdi,[rsp] 1071 call __ecp_nistz256_sqr_montq 1072 1073 mov rax,QWORD[32+rbx] 1074 mov r9,QWORD[((64+0))+rbx] 1075 mov r10,QWORD[((64+8))+rbx] 1076 mov r11,QWORD[((64+16))+rbx] 1077 mov r12,QWORD[((64+24))+rbx] 1078 lea rsi,[((64-0))+rbx] 1079 lea rbx,[32+rbx] 1080DB 102,72,15,126,215 1081 call __ecp_nistz256_mul_montq 1082 call __ecp_nistz256_mul_by_2q 1083 1084 mov r12,QWORD[((96+0))+rsp] 1085 mov r13,QWORD[((96+8))+rsp] 1086 lea rbx,[64+rsp] 1087 mov r8,QWORD[((96+16))+rsp] 1088 mov r9,QWORD[((96+24))+rsp] 1089 lea rdi,[32+rsp] 1090 call __ecp_nistz256_add_toq 1091 1092 mov r12,QWORD[((96+0))+rsp] 1093 mov r13,QWORD[((96+8))+rsp] 1094 lea rbx,[64+rsp] 1095 mov r8,QWORD[((96+16))+rsp] 1096 mov r9,QWORD[((96+24))+rsp] 1097 lea rdi,[64+rsp] 1098 call __ecp_nistz256_sub_fromq 1099 1100 mov rax,QWORD[((0+0))+rsp] 1101 mov r14,QWORD[((8+0))+rsp] 1102 lea rsi,[((0+0))+rsp] 1103 mov r15,QWORD[((16+0))+rsp] 1104 mov r8,QWORD[((24+0))+rsp] 1105DB 102,72,15,126,207 1106 call __ecp_nistz256_sqr_montq 1107 xor r9,r9 1108 mov rax,r12 1109 add r12,-1 1110 mov r10,r13 1111 adc r13,rsi 1112 mov rcx,r14 1113 adc r14,0 1114 mov r8,r15 1115 adc r15,rbp 1116 adc r9,0 1117 xor rsi,rsi 1118 test rax,1 1119 1120 cmovz r12,rax 1121 cmovz r13,r10 1122 cmovz r14,rcx 1123 cmovz r15,r8 1124 cmovz r9,rsi 1125 1126 mov rax,r13 1127 shr r12,1 1128 shl rax,63 1129 mov r10,r14 1130 shr r13,1 1131 or r12,rax 1132 shl r10,63 1133 mov rcx,r15 1134 shr r14,1 1135 or r13,r10 1136 shl rcx,63 1137 mov QWORD[rdi],r12 1138 shr r15,1 1139 mov QWORD[8+rdi],r13 1140 shl r9,63 1141 or r14,rcx 1142 or r15,r9 1143 mov QWORD[16+rdi],r14 1144 mov QWORD[24+rdi],r15 1145 mov rax,QWORD[64+rsp] 1146 lea rbx,[64+rsp] 1147 mov r9,QWORD[((0+32))+rsp] 1148 mov r10,QWORD[((8+32))+rsp] 1149 lea rsi,[((0+32))+rsp] 1150 mov r11,QWORD[((16+32))+rsp] 1151 mov r12,QWORD[((24+32))+rsp] 1152 lea rdi,[32+rsp] 1153 call __ecp_nistz256_mul_montq 1154 1155 lea rdi,[128+rsp] 1156 call __ecp_nistz256_mul_by_2q 1157 1158 lea rbx,[32+rsp] 1159 lea rdi,[32+rsp] 1160 call __ecp_nistz256_add_toq 1161 1162 mov rax,QWORD[96+rsp] 1163 lea rbx,[96+rsp] 1164 mov r9,QWORD[((0+0))+rsp] 1165 mov r10,QWORD[((8+0))+rsp] 1166 lea rsi,[((0+0))+rsp] 1167 mov r11,QWORD[((16+0))+rsp] 1168 mov r12,QWORD[((24+0))+rsp] 1169 lea rdi,[rsp] 1170 call __ecp_nistz256_mul_montq 1171 1172 lea rdi,[128+rsp] 1173 call __ecp_nistz256_mul_by_2q 1174 1175 mov rax,QWORD[((0+32))+rsp] 1176 mov r14,QWORD[((8+32))+rsp] 1177 lea rsi,[((0+32))+rsp] 1178 mov r15,QWORD[((16+32))+rsp] 1179 mov r8,QWORD[((24+32))+rsp] 1180DB 102,72,15,126,199 1181 call __ecp_nistz256_sqr_montq 1182 1183 lea rbx,[128+rsp] 1184 mov r8,r14 1185 mov r9,r15 1186 mov r14,rsi 1187 mov r15,rbp 1188 call __ecp_nistz256_sub_fromq 1189 1190 mov rax,QWORD[((0+0))+rsp] 1191 mov rbp,QWORD[((0+8))+rsp] 1192 mov rcx,QWORD[((0+16))+rsp] 1193 mov r10,QWORD[((0+24))+rsp] 1194 lea rdi,[rsp] 1195 call __ecp_nistz256_subq 1196 1197 mov rax,QWORD[32+rsp] 1198 lea rbx,[32+rsp] 1199 mov r14,r12 1200 xor ecx,ecx 1201 mov QWORD[((0+0))+rsp],r12 1202 mov r10,r13 1203 mov QWORD[((0+8))+rsp],r13 1204 cmovz r11,r8 1205 mov QWORD[((0+16))+rsp],r8 1206 lea rsi,[((0-0))+rsp] 1207 cmovz r12,r9 1208 mov QWORD[((0+24))+rsp],r9 1209 mov r9,r14 1210 lea rdi,[rsp] 1211 call __ecp_nistz256_mul_montq 1212 1213DB 102,72,15,126,203 1214DB 102,72,15,126,207 1215 call __ecp_nistz256_sub_fromq 1216 1217 add rsp,32*5+8 1218 pop r15 1219 pop r14 1220 pop r13 1221 pop r12 1222 pop rbx 1223 pop rbp 1224 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1225 mov rsi,QWORD[16+rsp] 1226 DB 0F3h,0C3h ;repret 1227$L$SEH_end_ecp_nistz256_point_double: 1228global ecp_nistz256_point_add 1229 1230ALIGN 32 1231ecp_nistz256_point_add: 1232 mov QWORD[8+rsp],rdi ;WIN64 prologue 1233 mov QWORD[16+rsp],rsi 1234 mov rax,rsp 1235$L$SEH_begin_ecp_nistz256_point_add: 1236 mov rdi,rcx 1237 mov rsi,rdx 1238 mov rdx,r8 1239 1240 1241 push rbp 1242 push rbx 1243 push r12 1244 push r13 1245 push r14 1246 push r15 1247 sub rsp,32*18+8 1248 1249 movdqu xmm0,XMMWORD[rsi] 1250 movdqu xmm1,XMMWORD[16+rsi] 1251 movdqu xmm2,XMMWORD[32+rsi] 1252 movdqu xmm3,XMMWORD[48+rsi] 1253 movdqu xmm4,XMMWORD[64+rsi] 1254 movdqu xmm5,XMMWORD[80+rsi] 1255 mov rbx,rsi 1256 mov rsi,rdx 1257 movdqa XMMWORD[384+rsp],xmm0 1258 movdqa XMMWORD[(384+16)+rsp],xmm1 1259 movdqa XMMWORD[416+rsp],xmm2 1260 movdqa XMMWORD[(416+16)+rsp],xmm3 1261 movdqa XMMWORD[448+rsp],xmm4 1262 movdqa XMMWORD[(448+16)+rsp],xmm5 1263 por xmm5,xmm4 1264 1265 movdqu xmm0,XMMWORD[rsi] 1266 pshufd xmm3,xmm5,0xb1 1267 movdqu xmm1,XMMWORD[16+rsi] 1268 movdqu xmm2,XMMWORD[32+rsi] 1269 por xmm5,xmm3 1270 movdqu xmm3,XMMWORD[48+rsi] 1271 mov rax,QWORD[((64+0))+rsi] 1272 mov r14,QWORD[((64+8))+rsi] 1273 mov r15,QWORD[((64+16))+rsi] 1274 mov r8,QWORD[((64+24))+rsi] 1275 movdqa XMMWORD[480+rsp],xmm0 1276 pshufd xmm4,xmm5,0x1e 1277 movdqa XMMWORD[(480+16)+rsp],xmm1 1278 movdqu xmm0,XMMWORD[64+rsi] 1279 movdqu xmm1,XMMWORD[80+rsi] 1280 movdqa XMMWORD[512+rsp],xmm2 1281 movdqa XMMWORD[(512+16)+rsp],xmm3 1282 por xmm5,xmm4 1283 pxor xmm4,xmm4 1284 por xmm1,xmm0 1285DB 102,72,15,110,199 1286 1287 lea rsi,[((64-0))+rsi] 1288 mov QWORD[((544+0))+rsp],rax 1289 mov QWORD[((544+8))+rsp],r14 1290 mov QWORD[((544+16))+rsp],r15 1291 mov QWORD[((544+24))+rsp],r8 1292 lea rdi,[96+rsp] 1293 call __ecp_nistz256_sqr_montq 1294 1295 pcmpeqd xmm5,xmm4 1296 pshufd xmm4,xmm1,0xb1 1297 por xmm4,xmm1 1298 pshufd xmm5,xmm5,0 1299 pshufd xmm3,xmm4,0x1e 1300 por xmm4,xmm3 1301 pxor xmm3,xmm3 1302 pcmpeqd xmm4,xmm3 1303 pshufd xmm4,xmm4,0 1304 mov rax,QWORD[((64+0))+rbx] 1305 mov r14,QWORD[((64+8))+rbx] 1306 mov r15,QWORD[((64+16))+rbx] 1307 mov r8,QWORD[((64+24))+rbx] 1308DB 102,72,15,110,203 1309 1310 lea rsi,[((64-0))+rbx] 1311 lea rdi,[32+rsp] 1312 call __ecp_nistz256_sqr_montq 1313 1314 mov rax,QWORD[544+rsp] 1315 lea rbx,[544+rsp] 1316 mov r9,QWORD[((0+96))+rsp] 1317 mov r10,QWORD[((8+96))+rsp] 1318 lea rsi,[((0+96))+rsp] 1319 mov r11,QWORD[((16+96))+rsp] 1320 mov r12,QWORD[((24+96))+rsp] 1321 lea rdi,[224+rsp] 1322 call __ecp_nistz256_mul_montq 1323 1324 mov rax,QWORD[448+rsp] 1325 lea rbx,[448+rsp] 1326 mov r9,QWORD[((0+32))+rsp] 1327 mov r10,QWORD[((8+32))+rsp] 1328 lea rsi,[((0+32))+rsp] 1329 mov r11,QWORD[((16+32))+rsp] 1330 mov r12,QWORD[((24+32))+rsp] 1331 lea rdi,[256+rsp] 1332 call __ecp_nistz256_mul_montq 1333 1334 mov rax,QWORD[416+rsp] 1335 lea rbx,[416+rsp] 1336 mov r9,QWORD[((0+224))+rsp] 1337 mov r10,QWORD[((8+224))+rsp] 1338 lea rsi,[((0+224))+rsp] 1339 mov r11,QWORD[((16+224))+rsp] 1340 mov r12,QWORD[((24+224))+rsp] 1341 lea rdi,[224+rsp] 1342 call __ecp_nistz256_mul_montq 1343 1344 mov rax,QWORD[512+rsp] 1345 lea rbx,[512+rsp] 1346 mov r9,QWORD[((0+256))+rsp] 1347 mov r10,QWORD[((8+256))+rsp] 1348 lea rsi,[((0+256))+rsp] 1349 mov r11,QWORD[((16+256))+rsp] 1350 mov r12,QWORD[((24+256))+rsp] 1351 lea rdi,[256+rsp] 1352 call __ecp_nistz256_mul_montq 1353 1354 lea rbx,[224+rsp] 1355 lea rdi,[64+rsp] 1356 call __ecp_nistz256_sub_fromq 1357 1358 or r12,r13 1359 movdqa xmm2,xmm4 1360 or r12,r8 1361 or r12,r9 1362 por xmm2,xmm5 1363DB 102,73,15,110,220 1364 1365 mov rax,QWORD[384+rsp] 1366 lea rbx,[384+rsp] 1367 mov r9,QWORD[((0+96))+rsp] 1368 mov r10,QWORD[((8+96))+rsp] 1369 lea rsi,[((0+96))+rsp] 1370 mov r11,QWORD[((16+96))+rsp] 1371 mov r12,QWORD[((24+96))+rsp] 1372 lea rdi,[160+rsp] 1373 call __ecp_nistz256_mul_montq 1374 1375 mov rax,QWORD[480+rsp] 1376 lea rbx,[480+rsp] 1377 mov r9,QWORD[((0+32))+rsp] 1378 mov r10,QWORD[((8+32))+rsp] 1379 lea rsi,[((0+32))+rsp] 1380 mov r11,QWORD[((16+32))+rsp] 1381 mov r12,QWORD[((24+32))+rsp] 1382 lea rdi,[192+rsp] 1383 call __ecp_nistz256_mul_montq 1384 1385 lea rbx,[160+rsp] 1386 lea rdi,[rsp] 1387 call __ecp_nistz256_sub_fromq 1388 1389 or r12,r13 1390 or r12,r8 1391 or r12,r9 1392 1393DB 0x3e 1394 jnz NEAR $L$add_proceedq 1395DB 102,73,15,126,208 1396DB 102,73,15,126,217 1397 test r8,r8 1398 jnz NEAR $L$add_proceedq 1399 test r9,r9 1400 jz NEAR $L$add_doubleq 1401 1402DB 102,72,15,126,199 1403 pxor xmm0,xmm0 1404 movdqu XMMWORD[rdi],xmm0 1405 movdqu XMMWORD[16+rdi],xmm0 1406 movdqu XMMWORD[32+rdi],xmm0 1407 movdqu XMMWORD[48+rdi],xmm0 1408 movdqu XMMWORD[64+rdi],xmm0 1409 movdqu XMMWORD[80+rdi],xmm0 1410 jmp NEAR $L$add_doneq 1411 1412ALIGN 32 1413$L$add_doubleq: 1414DB 102,72,15,126,206 1415DB 102,72,15,126,199 1416 add rsp,416 1417 jmp NEAR $L$point_double_shortcutq 1418 1419ALIGN 32 1420$L$add_proceedq: 1421 mov rax,QWORD[((0+64))+rsp] 1422 mov r14,QWORD[((8+64))+rsp] 1423 lea rsi,[((0+64))+rsp] 1424 mov r15,QWORD[((16+64))+rsp] 1425 mov r8,QWORD[((24+64))+rsp] 1426 lea rdi,[96+rsp] 1427 call __ecp_nistz256_sqr_montq 1428 1429 mov rax,QWORD[448+rsp] 1430 lea rbx,[448+rsp] 1431 mov r9,QWORD[((0+0))+rsp] 1432 mov r10,QWORD[((8+0))+rsp] 1433 lea rsi,[((0+0))+rsp] 1434 mov r11,QWORD[((16+0))+rsp] 1435 mov r12,QWORD[((24+0))+rsp] 1436 lea rdi,[352+rsp] 1437 call __ecp_nistz256_mul_montq 1438 1439 mov rax,QWORD[((0+0))+rsp] 1440 mov r14,QWORD[((8+0))+rsp] 1441 lea rsi,[((0+0))+rsp] 1442 mov r15,QWORD[((16+0))+rsp] 1443 mov r8,QWORD[((24+0))+rsp] 1444 lea rdi,[32+rsp] 1445 call __ecp_nistz256_sqr_montq 1446 1447 mov rax,QWORD[544+rsp] 1448 lea rbx,[544+rsp] 1449 mov r9,QWORD[((0+352))+rsp] 1450 mov r10,QWORD[((8+352))+rsp] 1451 lea rsi,[((0+352))+rsp] 1452 mov r11,QWORD[((16+352))+rsp] 1453 mov r12,QWORD[((24+352))+rsp] 1454 lea rdi,[352+rsp] 1455 call __ecp_nistz256_mul_montq 1456 1457 mov rax,QWORD[rsp] 1458 lea rbx,[rsp] 1459 mov r9,QWORD[((0+32))+rsp] 1460 mov r10,QWORD[((8+32))+rsp] 1461 lea rsi,[((0+32))+rsp] 1462 mov r11,QWORD[((16+32))+rsp] 1463 mov r12,QWORD[((24+32))+rsp] 1464 lea rdi,[128+rsp] 1465 call __ecp_nistz256_mul_montq 1466 1467 mov rax,QWORD[160+rsp] 1468 lea rbx,[160+rsp] 1469 mov r9,QWORD[((0+32))+rsp] 1470 mov r10,QWORD[((8+32))+rsp] 1471 lea rsi,[((0+32))+rsp] 1472 mov r11,QWORD[((16+32))+rsp] 1473 mov r12,QWORD[((24+32))+rsp] 1474 lea rdi,[192+rsp] 1475 call __ecp_nistz256_mul_montq 1476 1477 1478 1479 1480 xor r11,r11 1481 add r12,r12 1482 lea rsi,[96+rsp] 1483 adc r13,r13 1484 mov rax,r12 1485 adc r8,r8 1486 adc r9,r9 1487 mov rbp,r13 1488 adc r11,0 1489 1490 sub r12,-1 1491 mov rcx,r8 1492 sbb r13,r14 1493 sbb r8,0 1494 mov r10,r9 1495 sbb r9,r15 1496 sbb r11,0 1497 1498 cmovc r12,rax 1499 mov rax,QWORD[rsi] 1500 cmovc r13,rbp 1501 mov rbp,QWORD[8+rsi] 1502 cmovc r8,rcx 1503 mov rcx,QWORD[16+rsi] 1504 cmovc r9,r10 1505 mov r10,QWORD[24+rsi] 1506 1507 call __ecp_nistz256_subq 1508 1509 lea rbx,[128+rsp] 1510 lea rdi,[288+rsp] 1511 call __ecp_nistz256_sub_fromq 1512 1513 mov rax,QWORD[((192+0))+rsp] 1514 mov rbp,QWORD[((192+8))+rsp] 1515 mov rcx,QWORD[((192+16))+rsp] 1516 mov r10,QWORD[((192+24))+rsp] 1517 lea rdi,[320+rsp] 1518 1519 call __ecp_nistz256_subq 1520 1521 mov QWORD[rdi],r12 1522 mov QWORD[8+rdi],r13 1523 mov QWORD[16+rdi],r8 1524 mov QWORD[24+rdi],r9 1525 mov rax,QWORD[128+rsp] 1526 lea rbx,[128+rsp] 1527 mov r9,QWORD[((0+224))+rsp] 1528 mov r10,QWORD[((8+224))+rsp] 1529 lea rsi,[((0+224))+rsp] 1530 mov r11,QWORD[((16+224))+rsp] 1531 mov r12,QWORD[((24+224))+rsp] 1532 lea rdi,[256+rsp] 1533 call __ecp_nistz256_mul_montq 1534 1535 mov rax,QWORD[320+rsp] 1536 lea rbx,[320+rsp] 1537 mov r9,QWORD[((0+64))+rsp] 1538 mov r10,QWORD[((8+64))+rsp] 1539 lea rsi,[((0+64))+rsp] 1540 mov r11,QWORD[((16+64))+rsp] 1541 mov r12,QWORD[((24+64))+rsp] 1542 lea rdi,[320+rsp] 1543 call __ecp_nistz256_mul_montq 1544 1545 lea rbx,[256+rsp] 1546 lea rdi,[320+rsp] 1547 call __ecp_nistz256_sub_fromq 1548 1549DB 102,72,15,126,199 1550 1551 movdqa xmm0,xmm5 1552 movdqa xmm1,xmm5 1553 pandn xmm0,XMMWORD[352+rsp] 1554 movdqa xmm2,xmm5 1555 pandn xmm1,XMMWORD[((352+16))+rsp] 1556 movdqa xmm3,xmm5 1557 pand xmm2,XMMWORD[544+rsp] 1558 pand xmm3,XMMWORD[((544+16))+rsp] 1559 por xmm2,xmm0 1560 por xmm3,xmm1 1561 1562 movdqa xmm0,xmm4 1563 movdqa xmm1,xmm4 1564 pandn xmm0,xmm2 1565 movdqa xmm2,xmm4 1566 pandn xmm1,xmm3 1567 movdqa xmm3,xmm4 1568 pand xmm2,XMMWORD[448+rsp] 1569 pand xmm3,XMMWORD[((448+16))+rsp] 1570 por xmm2,xmm0 1571 por xmm3,xmm1 1572 movdqu XMMWORD[64+rdi],xmm2 1573 movdqu XMMWORD[80+rdi],xmm3 1574 1575 movdqa xmm0,xmm5 1576 movdqa xmm1,xmm5 1577 pandn xmm0,XMMWORD[288+rsp] 1578 movdqa xmm2,xmm5 1579 pandn xmm1,XMMWORD[((288+16))+rsp] 1580 movdqa xmm3,xmm5 1581 pand xmm2,XMMWORD[480+rsp] 1582 pand xmm3,XMMWORD[((480+16))+rsp] 1583 por xmm2,xmm0 1584 por xmm3,xmm1 1585 1586 movdqa xmm0,xmm4 1587 movdqa xmm1,xmm4 1588 pandn xmm0,xmm2 1589 movdqa xmm2,xmm4 1590 pandn xmm1,xmm3 1591 movdqa xmm3,xmm4 1592 pand xmm2,XMMWORD[384+rsp] 1593 pand xmm3,XMMWORD[((384+16))+rsp] 1594 por xmm2,xmm0 1595 por xmm3,xmm1 1596 movdqu XMMWORD[rdi],xmm2 1597 movdqu XMMWORD[16+rdi],xmm3 1598 1599 movdqa xmm0,xmm5 1600 movdqa xmm1,xmm5 1601 pandn xmm0,XMMWORD[320+rsp] 1602 movdqa xmm2,xmm5 1603 pandn xmm1,XMMWORD[((320+16))+rsp] 1604 movdqa xmm3,xmm5 1605 pand xmm2,XMMWORD[512+rsp] 1606 pand xmm3,XMMWORD[((512+16))+rsp] 1607 por xmm2,xmm0 1608 por xmm3,xmm1 1609 1610 movdqa xmm0,xmm4 1611 movdqa xmm1,xmm4 1612 pandn xmm0,xmm2 1613 movdqa xmm2,xmm4 1614 pandn xmm1,xmm3 1615 movdqa xmm3,xmm4 1616 pand xmm2,XMMWORD[416+rsp] 1617 pand xmm3,XMMWORD[((416+16))+rsp] 1618 por xmm2,xmm0 1619 por xmm3,xmm1 1620 movdqu XMMWORD[32+rdi],xmm2 1621 movdqu XMMWORD[48+rdi],xmm3 1622 1623$L$add_doneq: 1624 add rsp,32*18+8 1625 pop r15 1626 pop r14 1627 pop r13 1628 pop r12 1629 pop rbx 1630 pop rbp 1631 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1632 mov rsi,QWORD[16+rsp] 1633 DB 0F3h,0C3h ;repret 1634$L$SEH_end_ecp_nistz256_point_add: 1635global ecp_nistz256_point_add_affine 1636 1637ALIGN 32 1638ecp_nistz256_point_add_affine: 1639 mov QWORD[8+rsp],rdi ;WIN64 prologue 1640 mov QWORD[16+rsp],rsi 1641 mov rax,rsp 1642$L$SEH_begin_ecp_nistz256_point_add_affine: 1643 mov rdi,rcx 1644 mov rsi,rdx 1645 mov rdx,r8 1646 1647 1648 push rbp 1649 push rbx 1650 push r12 1651 push r13 1652 push r14 1653 push r15 1654 sub rsp,32*15+8 1655 1656 movdqu xmm0,XMMWORD[rsi] 1657 mov rbx,rdx 1658 movdqu xmm1,XMMWORD[16+rsi] 1659 movdqu xmm2,XMMWORD[32+rsi] 1660 movdqu xmm3,XMMWORD[48+rsi] 1661 movdqu xmm4,XMMWORD[64+rsi] 1662 movdqu xmm5,XMMWORD[80+rsi] 1663 mov rax,QWORD[((64+0))+rsi] 1664 mov r14,QWORD[((64+8))+rsi] 1665 mov r15,QWORD[((64+16))+rsi] 1666 mov r8,QWORD[((64+24))+rsi] 1667 movdqa XMMWORD[320+rsp],xmm0 1668 movdqa XMMWORD[(320+16)+rsp],xmm1 1669 movdqa XMMWORD[352+rsp],xmm2 1670 movdqa XMMWORD[(352+16)+rsp],xmm3 1671 movdqa XMMWORD[384+rsp],xmm4 1672 movdqa XMMWORD[(384+16)+rsp],xmm5 1673 por xmm5,xmm4 1674 1675 movdqu xmm0,XMMWORD[rbx] 1676 pshufd xmm3,xmm5,0xb1 1677 movdqu xmm1,XMMWORD[16+rbx] 1678 movdqu xmm2,XMMWORD[32+rbx] 1679 por xmm5,xmm3 1680 movdqu xmm3,XMMWORD[48+rbx] 1681 movdqa XMMWORD[416+rsp],xmm0 1682 pshufd xmm4,xmm5,0x1e 1683 movdqa XMMWORD[(416+16)+rsp],xmm1 1684 por xmm1,xmm0 1685DB 102,72,15,110,199 1686 movdqa XMMWORD[448+rsp],xmm2 1687 movdqa XMMWORD[(448+16)+rsp],xmm3 1688 por xmm3,xmm2 1689 por xmm5,xmm4 1690 pxor xmm4,xmm4 1691 por xmm3,xmm1 1692 1693 lea rsi,[((64-0))+rsi] 1694 lea rdi,[32+rsp] 1695 call __ecp_nistz256_sqr_montq 1696 1697 pcmpeqd xmm5,xmm4 1698 pshufd xmm4,xmm3,0xb1 1699 mov rax,QWORD[rbx] 1700 1701 mov r9,r12 1702 por xmm4,xmm3 1703 pshufd xmm5,xmm5,0 1704 pshufd xmm3,xmm4,0x1e 1705 mov r10,r13 1706 por xmm4,xmm3 1707 pxor xmm3,xmm3 1708 mov r11,r14 1709 pcmpeqd xmm4,xmm3 1710 pshufd xmm4,xmm4,0 1711 1712 lea rsi,[((32-0))+rsp] 1713 mov r12,r15 1714 lea rdi,[rsp] 1715 call __ecp_nistz256_mul_montq 1716 1717 lea rbx,[320+rsp] 1718 lea rdi,[64+rsp] 1719 call __ecp_nistz256_sub_fromq 1720 1721 mov rax,QWORD[384+rsp] 1722 lea rbx,[384+rsp] 1723 mov r9,QWORD[((0+32))+rsp] 1724 mov r10,QWORD[((8+32))+rsp] 1725 lea rsi,[((0+32))+rsp] 1726 mov r11,QWORD[((16+32))+rsp] 1727 mov r12,QWORD[((24+32))+rsp] 1728 lea rdi,[32+rsp] 1729 call __ecp_nistz256_mul_montq 1730 1731 mov rax,QWORD[384+rsp] 1732 lea rbx,[384+rsp] 1733 mov r9,QWORD[((0+64))+rsp] 1734 mov r10,QWORD[((8+64))+rsp] 1735 lea rsi,[((0+64))+rsp] 1736 mov r11,QWORD[((16+64))+rsp] 1737 mov r12,QWORD[((24+64))+rsp] 1738 lea rdi,[288+rsp] 1739 call __ecp_nistz256_mul_montq 1740 1741 mov rax,QWORD[448+rsp] 1742 lea rbx,[448+rsp] 1743 mov r9,QWORD[((0+32))+rsp] 1744 mov r10,QWORD[((8+32))+rsp] 1745 lea rsi,[((0+32))+rsp] 1746 mov r11,QWORD[((16+32))+rsp] 1747 mov r12,QWORD[((24+32))+rsp] 1748 lea rdi,[32+rsp] 1749 call __ecp_nistz256_mul_montq 1750 1751 lea rbx,[352+rsp] 1752 lea rdi,[96+rsp] 1753 call __ecp_nistz256_sub_fromq 1754 1755 mov rax,QWORD[((0+64))+rsp] 1756 mov r14,QWORD[((8+64))+rsp] 1757 lea rsi,[((0+64))+rsp] 1758 mov r15,QWORD[((16+64))+rsp] 1759 mov r8,QWORD[((24+64))+rsp] 1760 lea rdi,[128+rsp] 1761 call __ecp_nistz256_sqr_montq 1762 1763 mov rax,QWORD[((0+96))+rsp] 1764 mov r14,QWORD[((8+96))+rsp] 1765 lea rsi,[((0+96))+rsp] 1766 mov r15,QWORD[((16+96))+rsp] 1767 mov r8,QWORD[((24+96))+rsp] 1768 lea rdi,[192+rsp] 1769 call __ecp_nistz256_sqr_montq 1770 1771 mov rax,QWORD[128+rsp] 1772 lea rbx,[128+rsp] 1773 mov r9,QWORD[((0+64))+rsp] 1774 mov r10,QWORD[((8+64))+rsp] 1775 lea rsi,[((0+64))+rsp] 1776 mov r11,QWORD[((16+64))+rsp] 1777 mov r12,QWORD[((24+64))+rsp] 1778 lea rdi,[160+rsp] 1779 call __ecp_nistz256_mul_montq 1780 1781 mov rax,QWORD[320+rsp] 1782 lea rbx,[320+rsp] 1783 mov r9,QWORD[((0+128))+rsp] 1784 mov r10,QWORD[((8+128))+rsp] 1785 lea rsi,[((0+128))+rsp] 1786 mov r11,QWORD[((16+128))+rsp] 1787 mov r12,QWORD[((24+128))+rsp] 1788 lea rdi,[rsp] 1789 call __ecp_nistz256_mul_montq 1790 1791 1792 1793 1794 xor r11,r11 1795 add r12,r12 1796 lea rsi,[192+rsp] 1797 adc r13,r13 1798 mov rax,r12 1799 adc r8,r8 1800 adc r9,r9 1801 mov rbp,r13 1802 adc r11,0 1803 1804 sub r12,-1 1805 mov rcx,r8 1806 sbb r13,r14 1807 sbb r8,0 1808 mov r10,r9 1809 sbb r9,r15 1810 sbb r11,0 1811 1812 cmovc r12,rax 1813 mov rax,QWORD[rsi] 1814 cmovc r13,rbp 1815 mov rbp,QWORD[8+rsi] 1816 cmovc r8,rcx 1817 mov rcx,QWORD[16+rsi] 1818 cmovc r9,r10 1819 mov r10,QWORD[24+rsi] 1820 1821 call __ecp_nistz256_subq 1822 1823 lea rbx,[160+rsp] 1824 lea rdi,[224+rsp] 1825 call __ecp_nistz256_sub_fromq 1826 1827 mov rax,QWORD[((0+0))+rsp] 1828 mov rbp,QWORD[((0+8))+rsp] 1829 mov rcx,QWORD[((0+16))+rsp] 1830 mov r10,QWORD[((0+24))+rsp] 1831 lea rdi,[64+rsp] 1832 1833 call __ecp_nistz256_subq 1834 1835 mov QWORD[rdi],r12 1836 mov QWORD[8+rdi],r13 1837 mov QWORD[16+rdi],r8 1838 mov QWORD[24+rdi],r9 1839 mov rax,QWORD[352+rsp] 1840 lea rbx,[352+rsp] 1841 mov r9,QWORD[((0+160))+rsp] 1842 mov r10,QWORD[((8+160))+rsp] 1843 lea rsi,[((0+160))+rsp] 1844 mov r11,QWORD[((16+160))+rsp] 1845 mov r12,QWORD[((24+160))+rsp] 1846 lea rdi,[32+rsp] 1847 call __ecp_nistz256_mul_montq 1848 1849 mov rax,QWORD[96+rsp] 1850 lea rbx,[96+rsp] 1851 mov r9,QWORD[((0+64))+rsp] 1852 mov r10,QWORD[((8+64))+rsp] 1853 lea rsi,[((0+64))+rsp] 1854 mov r11,QWORD[((16+64))+rsp] 1855 mov r12,QWORD[((24+64))+rsp] 1856 lea rdi,[64+rsp] 1857 call __ecp_nistz256_mul_montq 1858 1859 lea rbx,[32+rsp] 1860 lea rdi,[256+rsp] 1861 call __ecp_nistz256_sub_fromq 1862 1863DB 102,72,15,126,199 1864 1865 movdqa xmm0,xmm5 1866 movdqa xmm1,xmm5 1867 pandn xmm0,XMMWORD[288+rsp] 1868 movdqa xmm2,xmm5 1869 pandn xmm1,XMMWORD[((288+16))+rsp] 1870 movdqa xmm3,xmm5 1871 pand xmm2,XMMWORD[$L$ONE_mont] 1872 pand xmm3,XMMWORD[(($L$ONE_mont+16))] 1873 por xmm2,xmm0 1874 por xmm3,xmm1 1875 1876 movdqa xmm0,xmm4 1877 movdqa xmm1,xmm4 1878 pandn xmm0,xmm2 1879 movdqa xmm2,xmm4 1880 pandn xmm1,xmm3 1881 movdqa xmm3,xmm4 1882 pand xmm2,XMMWORD[384+rsp] 1883 pand xmm3,XMMWORD[((384+16))+rsp] 1884 por xmm2,xmm0 1885 por xmm3,xmm1 1886 movdqu XMMWORD[64+rdi],xmm2 1887 movdqu XMMWORD[80+rdi],xmm3 1888 1889 movdqa xmm0,xmm5 1890 movdqa xmm1,xmm5 1891 pandn xmm0,XMMWORD[224+rsp] 1892 movdqa xmm2,xmm5 1893 pandn xmm1,XMMWORD[((224+16))+rsp] 1894 movdqa xmm3,xmm5 1895 pand xmm2,XMMWORD[416+rsp] 1896 pand xmm3,XMMWORD[((416+16))+rsp] 1897 por xmm2,xmm0 1898 por xmm3,xmm1 1899 1900 movdqa xmm0,xmm4 1901 movdqa xmm1,xmm4 1902 pandn xmm0,xmm2 1903 movdqa xmm2,xmm4 1904 pandn xmm1,xmm3 1905 movdqa xmm3,xmm4 1906 pand xmm2,XMMWORD[320+rsp] 1907 pand xmm3,XMMWORD[((320+16))+rsp] 1908 por xmm2,xmm0 1909 por xmm3,xmm1 1910 movdqu XMMWORD[rdi],xmm2 1911 movdqu XMMWORD[16+rdi],xmm3 1912 1913 movdqa xmm0,xmm5 1914 movdqa xmm1,xmm5 1915 pandn xmm0,XMMWORD[256+rsp] 1916 movdqa xmm2,xmm5 1917 pandn xmm1,XMMWORD[((256+16))+rsp] 1918 movdqa xmm3,xmm5 1919 pand xmm2,XMMWORD[448+rsp] 1920 pand xmm3,XMMWORD[((448+16))+rsp] 1921 por xmm2,xmm0 1922 por xmm3,xmm1 1923 1924 movdqa xmm0,xmm4 1925 movdqa xmm1,xmm4 1926 pandn xmm0,xmm2 1927 movdqa xmm2,xmm4 1928 pandn xmm1,xmm3 1929 movdqa xmm3,xmm4 1930 pand xmm2,XMMWORD[352+rsp] 1931 pand xmm3,XMMWORD[((352+16))+rsp] 1932 por xmm2,xmm0 1933 por xmm3,xmm1 1934 movdqu XMMWORD[32+rdi],xmm2 1935 movdqu XMMWORD[48+rdi],xmm3 1936 1937 add rsp,32*15+8 1938 pop r15 1939 pop r14 1940 pop r13 1941 pop r12 1942 pop rbx 1943 pop rbp 1944 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1945 mov rsi,QWORD[16+rsp] 1946 DB 0F3h,0C3h ;repret 1947$L$SEH_end_ecp_nistz256_point_add_affine: 1948