1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%ifdef BORINGSSL_PREFIX 10%include "boringssl_prefix_symbols_nasm.inc" 11%endif 12section .text code align=64 13 14 15EXTERN OPENSSL_ia32cap_P 16 17global bn_mul_mont 18 19ALIGN 16 20bn_mul_mont: 21 mov QWORD[8+rsp],rdi ;WIN64 prologue 22 mov QWORD[16+rsp],rsi 23 mov rax,rsp 24$L$SEH_begin_bn_mul_mont: 25 mov rdi,rcx 26 mov rsi,rdx 27 mov rdx,r8 28 mov rcx,r9 29 mov r8,QWORD[40+rsp] 30 mov r9,QWORD[48+rsp] 31 32 33 34 mov r9d,r9d 35 mov rax,rsp 36 37 test r9d,3 38 jnz NEAR $L$mul_enter 39 cmp r9d,8 40 jb NEAR $L$mul_enter 41 lea r11,[OPENSSL_ia32cap_P] 42 mov r11d,DWORD[8+r11] 43 cmp rdx,rsi 44 jne NEAR $L$mul4x_enter 45 test r9d,7 46 jz NEAR $L$sqr8x_enter 47 jmp NEAR $L$mul4x_enter 48 49ALIGN 16 50$L$mul_enter: 51 push rbx 52 53 push rbp 54 55 push r12 56 57 push r13 58 59 push r14 60 61 push r15 62 63 64 neg r9 65 mov r11,rsp 66 lea r10,[((-16))+r9*8+rsp] 67 neg r9 68 and r10,-1024 69 70 71 72 73 74 75 76 77 78 sub r11,r10 79 and r11,-4096 80 lea rsp,[r11*1+r10] 81 mov r11,QWORD[rsp] 82 cmp rsp,r10 83 ja NEAR $L$mul_page_walk 84 jmp NEAR $L$mul_page_walk_done 85 86ALIGN 16 87$L$mul_page_walk: 88 lea rsp,[((-4096))+rsp] 89 mov r11,QWORD[rsp] 90 cmp rsp,r10 91 ja NEAR $L$mul_page_walk 92$L$mul_page_walk_done: 93 94 mov QWORD[8+r9*8+rsp],rax 95 96$L$mul_body: 97 mov r12,rdx 98 mov r8,QWORD[r8] 99 mov rbx,QWORD[r12] 100 mov rax,QWORD[rsi] 101 102 xor r14,r14 103 xor r15,r15 104 105 mov rbp,r8 106 mul rbx 107 mov r10,rax 108 mov rax,QWORD[rcx] 109 110 imul rbp,r10 111 mov r11,rdx 112 113 mul rbp 114 add r10,rax 115 mov rax,QWORD[8+rsi] 116 adc rdx,0 117 mov r13,rdx 118 119 lea r15,[1+r15] 120 jmp NEAR $L$1st_enter 121 122ALIGN 16 123$L$1st: 124 add r13,rax 125 mov rax,QWORD[r15*8+rsi] 126 adc rdx,0 127 add r13,r11 128 mov r11,r10 129 adc rdx,0 130 mov QWORD[((-16))+r15*8+rsp],r13 131 mov r13,rdx 132 133$L$1st_enter: 134 mul rbx 135 add r11,rax 136 mov rax,QWORD[r15*8+rcx] 137 adc rdx,0 138 lea r15,[1+r15] 139 mov r10,rdx 140 141 mul rbp 142 cmp r15,r9 143 jne NEAR $L$1st 144 145 add r13,rax 146 mov rax,QWORD[rsi] 147 adc rdx,0 148 add r13,r11 149 adc rdx,0 150 mov QWORD[((-16))+r15*8+rsp],r13 151 mov r13,rdx 152 mov r11,r10 153 154 xor rdx,rdx 155 add r13,r11 156 adc rdx,0 157 mov QWORD[((-8))+r9*8+rsp],r13 158 mov QWORD[r9*8+rsp],rdx 159 160 lea r14,[1+r14] 161 jmp NEAR $L$outer 162ALIGN 16 163$L$outer: 164 mov rbx,QWORD[r14*8+r12] 165 xor r15,r15 166 mov rbp,r8 167 mov r10,QWORD[rsp] 168 mul rbx 169 add r10,rax 170 mov rax,QWORD[rcx] 171 adc rdx,0 172 173 imul rbp,r10 174 mov r11,rdx 175 176 mul rbp 177 add r10,rax 178 mov rax,QWORD[8+rsi] 179 adc rdx,0 180 mov r10,QWORD[8+rsp] 181 mov r13,rdx 182 183 lea r15,[1+r15] 184 jmp NEAR $L$inner_enter 185 186ALIGN 16 187$L$inner: 188 add r13,rax 189 mov rax,QWORD[r15*8+rsi] 190 adc rdx,0 191 add r13,r10 192 mov r10,QWORD[r15*8+rsp] 193 adc rdx,0 194 mov QWORD[((-16))+r15*8+rsp],r13 195 mov r13,rdx 196 197$L$inner_enter: 198 mul rbx 199 add r11,rax 200 mov rax,QWORD[r15*8+rcx] 201 adc rdx,0 202 add r10,r11 203 mov r11,rdx 204 adc r11,0 205 lea r15,[1+r15] 206 207 mul rbp 208 cmp r15,r9 209 jne NEAR $L$inner 210 211 add r13,rax 212 mov rax,QWORD[rsi] 213 adc rdx,0 214 add r13,r10 215 mov r10,QWORD[r15*8+rsp] 216 adc rdx,0 217 mov QWORD[((-16))+r15*8+rsp],r13 218 mov r13,rdx 219 220 xor rdx,rdx 221 add r13,r11 222 adc rdx,0 223 add r13,r10 224 adc rdx,0 225 mov QWORD[((-8))+r9*8+rsp],r13 226 mov QWORD[r9*8+rsp],rdx 227 228 lea r14,[1+r14] 229 cmp r14,r9 230 jb NEAR $L$outer 231 232 xor r14,r14 233 mov rax,QWORD[rsp] 234 mov r15,r9 235 236ALIGN 16 237$L$sub: sbb rax,QWORD[r14*8+rcx] 238 mov QWORD[r14*8+rdi],rax 239 mov rax,QWORD[8+r14*8+rsp] 240 lea r14,[1+r14] 241 dec r15 242 jnz NEAR $L$sub 243 244 sbb rax,0 245 mov rbx,-1 246 xor rbx,rax 247 xor r14,r14 248 mov r15,r9 249 250$L$copy: 251 mov rcx,QWORD[r14*8+rdi] 252 mov rdx,QWORD[r14*8+rsp] 253 and rcx,rbx 254 and rdx,rax 255 mov QWORD[r14*8+rsp],r9 256 or rdx,rcx 257 mov QWORD[r14*8+rdi],rdx 258 lea r14,[1+r14] 259 sub r15,1 260 jnz NEAR $L$copy 261 262 mov rsi,QWORD[8+r9*8+rsp] 263 264 mov rax,1 265 mov r15,QWORD[((-48))+rsi] 266 267 mov r14,QWORD[((-40))+rsi] 268 269 mov r13,QWORD[((-32))+rsi] 270 271 mov r12,QWORD[((-24))+rsi] 272 273 mov rbp,QWORD[((-16))+rsi] 274 275 mov rbx,QWORD[((-8))+rsi] 276 277 lea rsp,[rsi] 278 279$L$mul_epilogue: 280 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 281 mov rsi,QWORD[16+rsp] 282 DB 0F3h,0C3h ;repret 283 284$L$SEH_end_bn_mul_mont: 285 286ALIGN 16 287bn_mul4x_mont: 288 mov QWORD[8+rsp],rdi ;WIN64 prologue 289 mov QWORD[16+rsp],rsi 290 mov rax,rsp 291$L$SEH_begin_bn_mul4x_mont: 292 mov rdi,rcx 293 mov rsi,rdx 294 mov rdx,r8 295 mov rcx,r9 296 mov r8,QWORD[40+rsp] 297 mov r9,QWORD[48+rsp] 298 299 300 301 mov r9d,r9d 302 mov rax,rsp 303 304$L$mul4x_enter: 305 and r11d,0x80100 306 cmp r11d,0x80100 307 je NEAR $L$mulx4x_enter 308 push rbx 309 310 push rbp 311 312 push r12 313 314 push r13 315 316 push r14 317 318 push r15 319 320 321 neg r9 322 mov r11,rsp 323 lea r10,[((-32))+r9*8+rsp] 324 neg r9 325 and r10,-1024 326 327 sub r11,r10 328 and r11,-4096 329 lea rsp,[r11*1+r10] 330 mov r11,QWORD[rsp] 331 cmp rsp,r10 332 ja NEAR $L$mul4x_page_walk 333 jmp NEAR $L$mul4x_page_walk_done 334 335$L$mul4x_page_walk: 336 lea rsp,[((-4096))+rsp] 337 mov r11,QWORD[rsp] 338 cmp rsp,r10 339 ja NEAR $L$mul4x_page_walk 340$L$mul4x_page_walk_done: 341 342 mov QWORD[8+r9*8+rsp],rax 343 344$L$mul4x_body: 345 mov QWORD[16+r9*8+rsp],rdi 346 mov r12,rdx 347 mov r8,QWORD[r8] 348 mov rbx,QWORD[r12] 349 mov rax,QWORD[rsi] 350 351 xor r14,r14 352 xor r15,r15 353 354 mov rbp,r8 355 mul rbx 356 mov r10,rax 357 mov rax,QWORD[rcx] 358 359 imul rbp,r10 360 mov r11,rdx 361 362 mul rbp 363 add r10,rax 364 mov rax,QWORD[8+rsi] 365 adc rdx,0 366 mov rdi,rdx 367 368 mul rbx 369 add r11,rax 370 mov rax,QWORD[8+rcx] 371 adc rdx,0 372 mov r10,rdx 373 374 mul rbp 375 add rdi,rax 376 mov rax,QWORD[16+rsi] 377 adc rdx,0 378 add rdi,r11 379 lea r15,[4+r15] 380 adc rdx,0 381 mov QWORD[rsp],rdi 382 mov r13,rdx 383 jmp NEAR $L$1st4x 384ALIGN 16 385$L$1st4x: 386 mul rbx 387 add r10,rax 388 mov rax,QWORD[((-16))+r15*8+rcx] 389 adc rdx,0 390 mov r11,rdx 391 392 mul rbp 393 add r13,rax 394 mov rax,QWORD[((-8))+r15*8+rsi] 395 adc rdx,0 396 add r13,r10 397 adc rdx,0 398 mov QWORD[((-24))+r15*8+rsp],r13 399 mov rdi,rdx 400 401 mul rbx 402 add r11,rax 403 mov rax,QWORD[((-8))+r15*8+rcx] 404 adc rdx,0 405 mov r10,rdx 406 407 mul rbp 408 add rdi,rax 409 mov rax,QWORD[r15*8+rsi] 410 adc rdx,0 411 add rdi,r11 412 adc rdx,0 413 mov QWORD[((-16))+r15*8+rsp],rdi 414 mov r13,rdx 415 416 mul rbx 417 add r10,rax 418 mov rax,QWORD[r15*8+rcx] 419 adc rdx,0 420 mov r11,rdx 421 422 mul rbp 423 add r13,rax 424 mov rax,QWORD[8+r15*8+rsi] 425 adc rdx,0 426 add r13,r10 427 adc rdx,0 428 mov QWORD[((-8))+r15*8+rsp],r13 429 mov rdi,rdx 430 431 mul rbx 432 add r11,rax 433 mov rax,QWORD[8+r15*8+rcx] 434 adc rdx,0 435 lea r15,[4+r15] 436 mov r10,rdx 437 438 mul rbp 439 add rdi,rax 440 mov rax,QWORD[((-16))+r15*8+rsi] 441 adc rdx,0 442 add rdi,r11 443 adc rdx,0 444 mov QWORD[((-32))+r15*8+rsp],rdi 445 mov r13,rdx 446 cmp r15,r9 447 jb NEAR $L$1st4x 448 449 mul rbx 450 add r10,rax 451 mov rax,QWORD[((-16))+r15*8+rcx] 452 adc rdx,0 453 mov r11,rdx 454 455 mul rbp 456 add r13,rax 457 mov rax,QWORD[((-8))+r15*8+rsi] 458 adc rdx,0 459 add r13,r10 460 adc rdx,0 461 mov QWORD[((-24))+r15*8+rsp],r13 462 mov rdi,rdx 463 464 mul rbx 465 add r11,rax 466 mov rax,QWORD[((-8))+r15*8+rcx] 467 adc rdx,0 468 mov r10,rdx 469 470 mul rbp 471 add rdi,rax 472 mov rax,QWORD[rsi] 473 adc rdx,0 474 add rdi,r11 475 adc rdx,0 476 mov QWORD[((-16))+r15*8+rsp],rdi 477 mov r13,rdx 478 479 xor rdi,rdi 480 add r13,r10 481 adc rdi,0 482 mov QWORD[((-8))+r15*8+rsp],r13 483 mov QWORD[r15*8+rsp],rdi 484 485 lea r14,[1+r14] 486ALIGN 4 487$L$outer4x: 488 mov rbx,QWORD[r14*8+r12] 489 xor r15,r15 490 mov r10,QWORD[rsp] 491 mov rbp,r8 492 mul rbx 493 add r10,rax 494 mov rax,QWORD[rcx] 495 adc rdx,0 496 497 imul rbp,r10 498 mov r11,rdx 499 500 mul rbp 501 add r10,rax 502 mov rax,QWORD[8+rsi] 503 adc rdx,0 504 mov rdi,rdx 505 506 mul rbx 507 add r11,rax 508 mov rax,QWORD[8+rcx] 509 adc rdx,0 510 add r11,QWORD[8+rsp] 511 adc rdx,0 512 mov r10,rdx 513 514 mul rbp 515 add rdi,rax 516 mov rax,QWORD[16+rsi] 517 adc rdx,0 518 add rdi,r11 519 lea r15,[4+r15] 520 adc rdx,0 521 mov QWORD[rsp],rdi 522 mov r13,rdx 523 jmp NEAR $L$inner4x 524ALIGN 16 525$L$inner4x: 526 mul rbx 527 add r10,rax 528 mov rax,QWORD[((-16))+r15*8+rcx] 529 adc rdx,0 530 add r10,QWORD[((-16))+r15*8+rsp] 531 adc rdx,0 532 mov r11,rdx 533 534 mul rbp 535 add r13,rax 536 mov rax,QWORD[((-8))+r15*8+rsi] 537 adc rdx,0 538 add r13,r10 539 adc rdx,0 540 mov QWORD[((-24))+r15*8+rsp],r13 541 mov rdi,rdx 542 543 mul rbx 544 add r11,rax 545 mov rax,QWORD[((-8))+r15*8+rcx] 546 adc rdx,0 547 add r11,QWORD[((-8))+r15*8+rsp] 548 adc rdx,0 549 mov r10,rdx 550 551 mul rbp 552 add rdi,rax 553 mov rax,QWORD[r15*8+rsi] 554 adc rdx,0 555 add rdi,r11 556 adc rdx,0 557 mov QWORD[((-16))+r15*8+rsp],rdi 558 mov r13,rdx 559 560 mul rbx 561 add r10,rax 562 mov rax,QWORD[r15*8+rcx] 563 adc rdx,0 564 add r10,QWORD[r15*8+rsp] 565 adc rdx,0 566 mov r11,rdx 567 568 mul rbp 569 add r13,rax 570 mov rax,QWORD[8+r15*8+rsi] 571 adc rdx,0 572 add r13,r10 573 adc rdx,0 574 mov QWORD[((-8))+r15*8+rsp],r13 575 mov rdi,rdx 576 577 mul rbx 578 add r11,rax 579 mov rax,QWORD[8+r15*8+rcx] 580 adc rdx,0 581 add r11,QWORD[8+r15*8+rsp] 582 adc rdx,0 583 lea r15,[4+r15] 584 mov r10,rdx 585 586 mul rbp 587 add rdi,rax 588 mov rax,QWORD[((-16))+r15*8+rsi] 589 adc rdx,0 590 add rdi,r11 591 adc rdx,0 592 mov QWORD[((-32))+r15*8+rsp],rdi 593 mov r13,rdx 594 cmp r15,r9 595 jb NEAR $L$inner4x 596 597 mul rbx 598 add r10,rax 599 mov rax,QWORD[((-16))+r15*8+rcx] 600 adc rdx,0 601 add r10,QWORD[((-16))+r15*8+rsp] 602 adc rdx,0 603 mov r11,rdx 604 605 mul rbp 606 add r13,rax 607 mov rax,QWORD[((-8))+r15*8+rsi] 608 adc rdx,0 609 add r13,r10 610 adc rdx,0 611 mov QWORD[((-24))+r15*8+rsp],r13 612 mov rdi,rdx 613 614 mul rbx 615 add r11,rax 616 mov rax,QWORD[((-8))+r15*8+rcx] 617 adc rdx,0 618 add r11,QWORD[((-8))+r15*8+rsp] 619 adc rdx,0 620 lea r14,[1+r14] 621 mov r10,rdx 622 623 mul rbp 624 add rdi,rax 625 mov rax,QWORD[rsi] 626 adc rdx,0 627 add rdi,r11 628 adc rdx,0 629 mov QWORD[((-16))+r15*8+rsp],rdi 630 mov r13,rdx 631 632 xor rdi,rdi 633 add r13,r10 634 adc rdi,0 635 add r13,QWORD[r9*8+rsp] 636 adc rdi,0 637 mov QWORD[((-8))+r15*8+rsp],r13 638 mov QWORD[r15*8+rsp],rdi 639 640 cmp r14,r9 641 jb NEAR $L$outer4x 642 mov rdi,QWORD[16+r9*8+rsp] 643 lea r15,[((-4))+r9] 644 mov rax,QWORD[rsp] 645 mov rdx,QWORD[8+rsp] 646 shr r15,2 647 lea rsi,[rsp] 648 xor r14,r14 649 650 sub rax,QWORD[rcx] 651 mov rbx,QWORD[16+rsi] 652 mov rbp,QWORD[24+rsi] 653 sbb rdx,QWORD[8+rcx] 654 655$L$sub4x: 656 mov QWORD[r14*8+rdi],rax 657 mov QWORD[8+r14*8+rdi],rdx 658 sbb rbx,QWORD[16+r14*8+rcx] 659 mov rax,QWORD[32+r14*8+rsi] 660 mov rdx,QWORD[40+r14*8+rsi] 661 sbb rbp,QWORD[24+r14*8+rcx] 662 mov QWORD[16+r14*8+rdi],rbx 663 mov QWORD[24+r14*8+rdi],rbp 664 sbb rax,QWORD[32+r14*8+rcx] 665 mov rbx,QWORD[48+r14*8+rsi] 666 mov rbp,QWORD[56+r14*8+rsi] 667 sbb rdx,QWORD[40+r14*8+rcx] 668 lea r14,[4+r14] 669 dec r15 670 jnz NEAR $L$sub4x 671 672 mov QWORD[r14*8+rdi],rax 673 mov rax,QWORD[32+r14*8+rsi] 674 sbb rbx,QWORD[16+r14*8+rcx] 675 mov QWORD[8+r14*8+rdi],rdx 676 sbb rbp,QWORD[24+r14*8+rcx] 677 mov QWORD[16+r14*8+rdi],rbx 678 679 sbb rax,0 680 mov QWORD[24+r14*8+rdi],rbp 681 pxor xmm0,xmm0 682DB 102,72,15,110,224 683 pcmpeqd xmm5,xmm5 684 pshufd xmm4,xmm4,0 685 mov r15,r9 686 pxor xmm5,xmm4 687 shr r15,2 688 xor eax,eax 689 690 jmp NEAR $L$copy4x 691ALIGN 16 692$L$copy4x: 693 movdqa xmm1,XMMWORD[rax*1+rsp] 694 movdqu xmm2,XMMWORD[rax*1+rdi] 695 pand xmm1,xmm4 696 pand xmm2,xmm5 697 movdqa xmm3,XMMWORD[16+rax*1+rsp] 698 movdqa XMMWORD[rax*1+rsp],xmm0 699 por xmm1,xmm2 700 movdqu xmm2,XMMWORD[16+rax*1+rdi] 701 movdqu XMMWORD[rax*1+rdi],xmm1 702 pand xmm3,xmm4 703 pand xmm2,xmm5 704 movdqa XMMWORD[16+rax*1+rsp],xmm0 705 por xmm3,xmm2 706 movdqu XMMWORD[16+rax*1+rdi],xmm3 707 lea rax,[32+rax] 708 dec r15 709 jnz NEAR $L$copy4x 710 mov rsi,QWORD[8+r9*8+rsp] 711 712 mov rax,1 713 mov r15,QWORD[((-48))+rsi] 714 715 mov r14,QWORD[((-40))+rsi] 716 717 mov r13,QWORD[((-32))+rsi] 718 719 mov r12,QWORD[((-24))+rsi] 720 721 mov rbp,QWORD[((-16))+rsi] 722 723 mov rbx,QWORD[((-8))+rsi] 724 725 lea rsp,[rsi] 726 727$L$mul4x_epilogue: 728 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 729 mov rsi,QWORD[16+rsp] 730 DB 0F3h,0C3h ;repret 731 732$L$SEH_end_bn_mul4x_mont: 733EXTERN bn_sqrx8x_internal 734EXTERN bn_sqr8x_internal 735 736 737ALIGN 32 738bn_sqr8x_mont: 739 mov QWORD[8+rsp],rdi ;WIN64 prologue 740 mov QWORD[16+rsp],rsi 741 mov rax,rsp 742$L$SEH_begin_bn_sqr8x_mont: 743 mov rdi,rcx 744 mov rsi,rdx 745 mov rdx,r8 746 mov rcx,r9 747 mov r8,QWORD[40+rsp] 748 mov r9,QWORD[48+rsp] 749 750 751 752 mov rax,rsp 753 754$L$sqr8x_enter: 755 push rbx 756 757 push rbp 758 759 push r12 760 761 push r13 762 763 push r14 764 765 push r15 766 767$L$sqr8x_prologue: 768 769 mov r10d,r9d 770 shl r9d,3 771 shl r10,3+2 772 neg r9 773 774 775 776 777 778 779 lea r11,[((-64))+r9*2+rsp] 780 mov rbp,rsp 781 mov r8,QWORD[r8] 782 sub r11,rsi 783 and r11,4095 784 cmp r10,r11 785 jb NEAR $L$sqr8x_sp_alt 786 sub rbp,r11 787 lea rbp,[((-64))+r9*2+rbp] 788 jmp NEAR $L$sqr8x_sp_done 789 790ALIGN 32 791$L$sqr8x_sp_alt: 792 lea r10,[((4096-64))+r9*2] 793 lea rbp,[((-64))+r9*2+rbp] 794 sub r11,r10 795 mov r10,0 796 cmovc r11,r10 797 sub rbp,r11 798$L$sqr8x_sp_done: 799 and rbp,-64 800 mov r11,rsp 801 sub r11,rbp 802 and r11,-4096 803 lea rsp,[rbp*1+r11] 804 mov r10,QWORD[rsp] 805 cmp rsp,rbp 806 ja NEAR $L$sqr8x_page_walk 807 jmp NEAR $L$sqr8x_page_walk_done 808 809ALIGN 16 810$L$sqr8x_page_walk: 811 lea rsp,[((-4096))+rsp] 812 mov r10,QWORD[rsp] 813 cmp rsp,rbp 814 ja NEAR $L$sqr8x_page_walk 815$L$sqr8x_page_walk_done: 816 817 mov r10,r9 818 neg r9 819 820 mov QWORD[32+rsp],r8 821 mov QWORD[40+rsp],rax 822 823$L$sqr8x_body: 824 825DB 102,72,15,110,209 826 pxor xmm0,xmm0 827DB 102,72,15,110,207 828DB 102,73,15,110,218 829 lea rax,[OPENSSL_ia32cap_P] 830 mov eax,DWORD[8+rax] 831 and eax,0x80100 832 cmp eax,0x80100 833 jne NEAR $L$sqr8x_nox 834 835 call bn_sqrx8x_internal 836 837 838 839 840 lea rbx,[rcx*1+r8] 841 mov r9,rcx 842 mov rdx,rcx 843DB 102,72,15,126,207 844 sar rcx,3+2 845 jmp NEAR $L$sqr8x_sub 846 847ALIGN 32 848$L$sqr8x_nox: 849 call bn_sqr8x_internal 850 851 852 853 854 lea rbx,[r9*1+rdi] 855 mov rcx,r9 856 mov rdx,r9 857DB 102,72,15,126,207 858 sar rcx,3+2 859 jmp NEAR $L$sqr8x_sub 860 861ALIGN 32 862$L$sqr8x_sub: 863 mov r12,QWORD[rbx] 864 mov r13,QWORD[8+rbx] 865 mov r14,QWORD[16+rbx] 866 mov r15,QWORD[24+rbx] 867 lea rbx,[32+rbx] 868 sbb r12,QWORD[rbp] 869 sbb r13,QWORD[8+rbp] 870 sbb r14,QWORD[16+rbp] 871 sbb r15,QWORD[24+rbp] 872 lea rbp,[32+rbp] 873 mov QWORD[rdi],r12 874 mov QWORD[8+rdi],r13 875 mov QWORD[16+rdi],r14 876 mov QWORD[24+rdi],r15 877 lea rdi,[32+rdi] 878 inc rcx 879 jnz NEAR $L$sqr8x_sub 880 881 sbb rax,0 882 lea rbx,[r9*1+rbx] 883 lea rdi,[r9*1+rdi] 884 885DB 102,72,15,110,200 886 pxor xmm0,xmm0 887 pshufd xmm1,xmm1,0 888 mov rsi,QWORD[40+rsp] 889 890 jmp NEAR $L$sqr8x_cond_copy 891 892ALIGN 32 893$L$sqr8x_cond_copy: 894 movdqa xmm2,XMMWORD[rbx] 895 movdqa xmm3,XMMWORD[16+rbx] 896 lea rbx,[32+rbx] 897 movdqu xmm4,XMMWORD[rdi] 898 movdqu xmm5,XMMWORD[16+rdi] 899 lea rdi,[32+rdi] 900 movdqa XMMWORD[(-32)+rbx],xmm0 901 movdqa XMMWORD[(-16)+rbx],xmm0 902 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 903 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 904 pcmpeqd xmm0,xmm1 905 pand xmm2,xmm1 906 pand xmm3,xmm1 907 pand xmm4,xmm0 908 pand xmm5,xmm0 909 pxor xmm0,xmm0 910 por xmm4,xmm2 911 por xmm5,xmm3 912 movdqu XMMWORD[(-32)+rdi],xmm4 913 movdqu XMMWORD[(-16)+rdi],xmm5 914 add r9,32 915 jnz NEAR $L$sqr8x_cond_copy 916 917 mov rax,1 918 mov r15,QWORD[((-48))+rsi] 919 920 mov r14,QWORD[((-40))+rsi] 921 922 mov r13,QWORD[((-32))+rsi] 923 924 mov r12,QWORD[((-24))+rsi] 925 926 mov rbp,QWORD[((-16))+rsi] 927 928 mov rbx,QWORD[((-8))+rsi] 929 930 lea rsp,[rsi] 931 932$L$sqr8x_epilogue: 933 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 934 mov rsi,QWORD[16+rsp] 935 DB 0F3h,0C3h ;repret 936 937$L$SEH_end_bn_sqr8x_mont: 938 939ALIGN 32 940bn_mulx4x_mont: 941 mov QWORD[8+rsp],rdi ;WIN64 prologue 942 mov QWORD[16+rsp],rsi 943 mov rax,rsp 944$L$SEH_begin_bn_mulx4x_mont: 945 mov rdi,rcx 946 mov rsi,rdx 947 mov rdx,r8 948 mov rcx,r9 949 mov r8,QWORD[40+rsp] 950 mov r9,QWORD[48+rsp] 951 952 953 954 mov rax,rsp 955 956$L$mulx4x_enter: 957 push rbx 958 959 push rbp 960 961 push r12 962 963 push r13 964 965 push r14 966 967 push r15 968 969$L$mulx4x_prologue: 970 971 shl r9d,3 972 xor r10,r10 973 sub r10,r9 974 mov r8,QWORD[r8] 975 lea rbp,[((-72))+r10*1+rsp] 976 and rbp,-128 977 mov r11,rsp 978 sub r11,rbp 979 and r11,-4096 980 lea rsp,[rbp*1+r11] 981 mov r10,QWORD[rsp] 982 cmp rsp,rbp 983 ja NEAR $L$mulx4x_page_walk 984 jmp NEAR $L$mulx4x_page_walk_done 985 986ALIGN 16 987$L$mulx4x_page_walk: 988 lea rsp,[((-4096))+rsp] 989 mov r10,QWORD[rsp] 990 cmp rsp,rbp 991 ja NEAR $L$mulx4x_page_walk 992$L$mulx4x_page_walk_done: 993 994 lea r10,[r9*1+rdx] 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 mov QWORD[rsp],r9 1008 shr r9,5 1009 mov QWORD[16+rsp],r10 1010 sub r9,1 1011 mov QWORD[24+rsp],r8 1012 mov QWORD[32+rsp],rdi 1013 mov QWORD[40+rsp],rax 1014 1015 mov QWORD[48+rsp],r9 1016 jmp NEAR $L$mulx4x_body 1017 1018ALIGN 32 1019$L$mulx4x_body: 1020 lea rdi,[8+rdx] 1021 mov rdx,QWORD[rdx] 1022 lea rbx,[((64+32))+rsp] 1023 mov r9,rdx 1024 1025 mulx rax,r8,QWORD[rsi] 1026 mulx r14,r11,QWORD[8+rsi] 1027 add r11,rax 1028 mov QWORD[8+rsp],rdi 1029 mulx r13,r12,QWORD[16+rsi] 1030 adc r12,r14 1031 adc r13,0 1032 1033 mov rdi,r8 1034 imul r8,QWORD[24+rsp] 1035 xor rbp,rbp 1036 1037 mulx r14,rax,QWORD[24+rsi] 1038 mov rdx,r8 1039 lea rsi,[32+rsi] 1040 adcx r13,rax 1041 adcx r14,rbp 1042 1043 mulx r10,rax,QWORD[rcx] 1044 adcx rdi,rax 1045 adox r10,r11 1046 mulx r11,rax,QWORD[8+rcx] 1047 adcx r10,rax 1048 adox r11,r12 1049DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 1050 mov rdi,QWORD[48+rsp] 1051 mov QWORD[((-32))+rbx],r10 1052 adcx r11,rax 1053 adox r12,r13 1054 mulx r15,rax,QWORD[24+rcx] 1055 mov rdx,r9 1056 mov QWORD[((-24))+rbx],r11 1057 adcx r12,rax 1058 adox r15,rbp 1059 lea rcx,[32+rcx] 1060 mov QWORD[((-16))+rbx],r12 1061 1062 jmp NEAR $L$mulx4x_1st 1063 1064ALIGN 32 1065$L$mulx4x_1st: 1066 adcx r15,rbp 1067 mulx rax,r10,QWORD[rsi] 1068 adcx r10,r14 1069 mulx r14,r11,QWORD[8+rsi] 1070 adcx r11,rax 1071 mulx rax,r12,QWORD[16+rsi] 1072 adcx r12,r14 1073 mulx r14,r13,QWORD[24+rsi] 1074DB 0x67,0x67 1075 mov rdx,r8 1076 adcx r13,rax 1077 adcx r14,rbp 1078 lea rsi,[32+rsi] 1079 lea rbx,[32+rbx] 1080 1081 adox r10,r15 1082 mulx r15,rax,QWORD[rcx] 1083 adcx r10,rax 1084 adox r11,r15 1085 mulx r15,rax,QWORD[8+rcx] 1086 adcx r11,rax 1087 adox r12,r15 1088 mulx r15,rax,QWORD[16+rcx] 1089 mov QWORD[((-40))+rbx],r10 1090 adcx r12,rax 1091 mov QWORD[((-32))+rbx],r11 1092 adox r13,r15 1093 mulx r15,rax,QWORD[24+rcx] 1094 mov rdx,r9 1095 mov QWORD[((-24))+rbx],r12 1096 adcx r13,rax 1097 adox r15,rbp 1098 lea rcx,[32+rcx] 1099 mov QWORD[((-16))+rbx],r13 1100 1101 dec rdi 1102 jnz NEAR $L$mulx4x_1st 1103 1104 mov rax,QWORD[rsp] 1105 mov rdi,QWORD[8+rsp] 1106 adc r15,rbp 1107 add r14,r15 1108 sbb r15,r15 1109 mov QWORD[((-8))+rbx],r14 1110 jmp NEAR $L$mulx4x_outer 1111 1112ALIGN 32 1113$L$mulx4x_outer: 1114 mov rdx,QWORD[rdi] 1115 lea rdi,[8+rdi] 1116 sub rsi,rax 1117 mov QWORD[rbx],r15 1118 lea rbx,[((64+32))+rsp] 1119 sub rcx,rax 1120 1121 mulx r11,r8,QWORD[rsi] 1122 xor ebp,ebp 1123 mov r9,rdx 1124 mulx r12,r14,QWORD[8+rsi] 1125 adox r8,QWORD[((-32))+rbx] 1126 adcx r11,r14 1127 mulx r13,r15,QWORD[16+rsi] 1128 adox r11,QWORD[((-24))+rbx] 1129 adcx r12,r15 1130 adox r12,QWORD[((-16))+rbx] 1131 adcx r13,rbp 1132 adox r13,rbp 1133 1134 mov QWORD[8+rsp],rdi 1135 mov r15,r8 1136 imul r8,QWORD[24+rsp] 1137 xor ebp,ebp 1138 1139 mulx r14,rax,QWORD[24+rsi] 1140 mov rdx,r8 1141 adcx r13,rax 1142 adox r13,QWORD[((-8))+rbx] 1143 adcx r14,rbp 1144 lea rsi,[32+rsi] 1145 adox r14,rbp 1146 1147 mulx r10,rax,QWORD[rcx] 1148 adcx r15,rax 1149 adox r10,r11 1150 mulx r11,rax,QWORD[8+rcx] 1151 adcx r10,rax 1152 adox r11,r12 1153 mulx r12,rax,QWORD[16+rcx] 1154 mov QWORD[((-32))+rbx],r10 1155 adcx r11,rax 1156 adox r12,r13 1157 mulx r15,rax,QWORD[24+rcx] 1158 mov rdx,r9 1159 mov QWORD[((-24))+rbx],r11 1160 lea rcx,[32+rcx] 1161 adcx r12,rax 1162 adox r15,rbp 1163 mov rdi,QWORD[48+rsp] 1164 mov QWORD[((-16))+rbx],r12 1165 1166 jmp NEAR $L$mulx4x_inner 1167 1168ALIGN 32 1169$L$mulx4x_inner: 1170 mulx rax,r10,QWORD[rsi] 1171 adcx r15,rbp 1172 adox r10,r14 1173 mulx r14,r11,QWORD[8+rsi] 1174 adcx r10,QWORD[rbx] 1175 adox r11,rax 1176 mulx rax,r12,QWORD[16+rsi] 1177 adcx r11,QWORD[8+rbx] 1178 adox r12,r14 1179 mulx r14,r13,QWORD[24+rsi] 1180 mov rdx,r8 1181 adcx r12,QWORD[16+rbx] 1182 adox r13,rax 1183 adcx r13,QWORD[24+rbx] 1184 adox r14,rbp 1185 lea rsi,[32+rsi] 1186 lea rbx,[32+rbx] 1187 adcx r14,rbp 1188 1189 adox r10,r15 1190 mulx r15,rax,QWORD[rcx] 1191 adcx r10,rax 1192 adox r11,r15 1193 mulx r15,rax,QWORD[8+rcx] 1194 adcx r11,rax 1195 adox r12,r15 1196 mulx r15,rax,QWORD[16+rcx] 1197 mov QWORD[((-40))+rbx],r10 1198 adcx r12,rax 1199 adox r13,r15 1200 mulx r15,rax,QWORD[24+rcx] 1201 mov rdx,r9 1202 mov QWORD[((-32))+rbx],r11 1203 mov QWORD[((-24))+rbx],r12 1204 adcx r13,rax 1205 adox r15,rbp 1206 lea rcx,[32+rcx] 1207 mov QWORD[((-16))+rbx],r13 1208 1209 dec rdi 1210 jnz NEAR $L$mulx4x_inner 1211 1212 mov rax,QWORD[rsp] 1213 mov rdi,QWORD[8+rsp] 1214 adc r15,rbp 1215 sub rbp,QWORD[rbx] 1216 adc r14,r15 1217 sbb r15,r15 1218 mov QWORD[((-8))+rbx],r14 1219 1220 cmp rdi,QWORD[16+rsp] 1221 jne NEAR $L$mulx4x_outer 1222 1223 lea rbx,[64+rsp] 1224 sub rcx,rax 1225 neg r15 1226 mov rdx,rax 1227 shr rax,3+2 1228 mov rdi,QWORD[32+rsp] 1229 jmp NEAR $L$mulx4x_sub 1230 1231ALIGN 32 1232$L$mulx4x_sub: 1233 mov r11,QWORD[rbx] 1234 mov r12,QWORD[8+rbx] 1235 mov r13,QWORD[16+rbx] 1236 mov r14,QWORD[24+rbx] 1237 lea rbx,[32+rbx] 1238 sbb r11,QWORD[rcx] 1239 sbb r12,QWORD[8+rcx] 1240 sbb r13,QWORD[16+rcx] 1241 sbb r14,QWORD[24+rcx] 1242 lea rcx,[32+rcx] 1243 mov QWORD[rdi],r11 1244 mov QWORD[8+rdi],r12 1245 mov QWORD[16+rdi],r13 1246 mov QWORD[24+rdi],r14 1247 lea rdi,[32+rdi] 1248 dec rax 1249 jnz NEAR $L$mulx4x_sub 1250 1251 sbb r15,0 1252 lea rbx,[64+rsp] 1253 sub rdi,rdx 1254 1255DB 102,73,15,110,207 1256 pxor xmm0,xmm0 1257 pshufd xmm1,xmm1,0 1258 mov rsi,QWORD[40+rsp] 1259 1260 jmp NEAR $L$mulx4x_cond_copy 1261 1262ALIGN 32 1263$L$mulx4x_cond_copy: 1264 movdqa xmm2,XMMWORD[rbx] 1265 movdqa xmm3,XMMWORD[16+rbx] 1266 lea rbx,[32+rbx] 1267 movdqu xmm4,XMMWORD[rdi] 1268 movdqu xmm5,XMMWORD[16+rdi] 1269 lea rdi,[32+rdi] 1270 movdqa XMMWORD[(-32)+rbx],xmm0 1271 movdqa XMMWORD[(-16)+rbx],xmm0 1272 pcmpeqd xmm0,xmm1 1273 pand xmm2,xmm1 1274 pand xmm3,xmm1 1275 pand xmm4,xmm0 1276 pand xmm5,xmm0 1277 pxor xmm0,xmm0 1278 por xmm4,xmm2 1279 por xmm5,xmm3 1280 movdqu XMMWORD[(-32)+rdi],xmm4 1281 movdqu XMMWORD[(-16)+rdi],xmm5 1282 sub rdx,32 1283 jnz NEAR $L$mulx4x_cond_copy 1284 1285 mov QWORD[rbx],rdx 1286 1287 mov rax,1 1288 mov r15,QWORD[((-48))+rsi] 1289 1290 mov r14,QWORD[((-40))+rsi] 1291 1292 mov r13,QWORD[((-32))+rsi] 1293 1294 mov r12,QWORD[((-24))+rsi] 1295 1296 mov rbp,QWORD[((-16))+rsi] 1297 1298 mov rbx,QWORD[((-8))+rsi] 1299 1300 lea rsp,[rsi] 1301 1302$L$mulx4x_epilogue: 1303 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1304 mov rsi,QWORD[16+rsp] 1305 DB 0F3h,0C3h ;repret 1306 1307$L$SEH_end_bn_mulx4x_mont: 1308DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1309DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 1310DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 1311DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 1312DB 115,108,46,111,114,103,62,0 1313ALIGN 16 1314EXTERN __imp_RtlVirtualUnwind 1315 1316ALIGN 16 1317mul_handler: 1318 push rsi 1319 push rdi 1320 push rbx 1321 push rbp 1322 push r12 1323 push r13 1324 push r14 1325 push r15 1326 pushfq 1327 sub rsp,64 1328 1329 mov rax,QWORD[120+r8] 1330 mov rbx,QWORD[248+r8] 1331 1332 mov rsi,QWORD[8+r9] 1333 mov r11,QWORD[56+r9] 1334 1335 mov r10d,DWORD[r11] 1336 lea r10,[r10*1+rsi] 1337 cmp rbx,r10 1338 jb NEAR $L$common_seh_tail 1339 1340 mov rax,QWORD[152+r8] 1341 1342 mov r10d,DWORD[4+r11] 1343 lea r10,[r10*1+rsi] 1344 cmp rbx,r10 1345 jae NEAR $L$common_seh_tail 1346 1347 mov r10,QWORD[192+r8] 1348 mov rax,QWORD[8+r10*8+rax] 1349 1350 jmp NEAR $L$common_pop_regs 1351 1352 1353 1354ALIGN 16 1355sqr_handler: 1356 push rsi 1357 push rdi 1358 push rbx 1359 push rbp 1360 push r12 1361 push r13 1362 push r14 1363 push r15 1364 pushfq 1365 sub rsp,64 1366 1367 mov rax,QWORD[120+r8] 1368 mov rbx,QWORD[248+r8] 1369 1370 mov rsi,QWORD[8+r9] 1371 mov r11,QWORD[56+r9] 1372 1373 mov r10d,DWORD[r11] 1374 lea r10,[r10*1+rsi] 1375 cmp rbx,r10 1376 jb NEAR $L$common_seh_tail 1377 1378 mov r10d,DWORD[4+r11] 1379 lea r10,[r10*1+rsi] 1380 cmp rbx,r10 1381 jb NEAR $L$common_pop_regs 1382 1383 mov rax,QWORD[152+r8] 1384 1385 mov r10d,DWORD[8+r11] 1386 lea r10,[r10*1+rsi] 1387 cmp rbx,r10 1388 jae NEAR $L$common_seh_tail 1389 1390 mov rax,QWORD[40+rax] 1391 1392$L$common_pop_regs: 1393 mov rbx,QWORD[((-8))+rax] 1394 mov rbp,QWORD[((-16))+rax] 1395 mov r12,QWORD[((-24))+rax] 1396 mov r13,QWORD[((-32))+rax] 1397 mov r14,QWORD[((-40))+rax] 1398 mov r15,QWORD[((-48))+rax] 1399 mov QWORD[144+r8],rbx 1400 mov QWORD[160+r8],rbp 1401 mov QWORD[216+r8],r12 1402 mov QWORD[224+r8],r13 1403 mov QWORD[232+r8],r14 1404 mov QWORD[240+r8],r15 1405 1406$L$common_seh_tail: 1407 mov rdi,QWORD[8+rax] 1408 mov rsi,QWORD[16+rax] 1409 mov QWORD[152+r8],rax 1410 mov QWORD[168+r8],rsi 1411 mov QWORD[176+r8],rdi 1412 1413 mov rdi,QWORD[40+r9] 1414 mov rsi,r8 1415 mov ecx,154 1416 DD 0xa548f3fc 1417 1418 mov rsi,r9 1419 xor rcx,rcx 1420 mov rdx,QWORD[8+rsi] 1421 mov r8,QWORD[rsi] 1422 mov r9,QWORD[16+rsi] 1423 mov r10,QWORD[40+rsi] 1424 lea r11,[56+rsi] 1425 lea r12,[24+rsi] 1426 mov QWORD[32+rsp],r10 1427 mov QWORD[40+rsp],r11 1428 mov QWORD[48+rsp],r12 1429 mov QWORD[56+rsp],rcx 1430 call QWORD[__imp_RtlVirtualUnwind] 1431 1432 mov eax,1 1433 add rsp,64 1434 popfq 1435 pop r15 1436 pop r14 1437 pop r13 1438 pop r12 1439 pop rbp 1440 pop rbx 1441 pop rdi 1442 pop rsi 1443 DB 0F3h,0C3h ;repret 1444 1445 1446section .pdata rdata align=4 1447ALIGN 4 1448 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase 1449 DD $L$SEH_end_bn_mul_mont wrt ..imagebase 1450 DD $L$SEH_info_bn_mul_mont wrt ..imagebase 1451 1452 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase 1453 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase 1454 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase 1455 1456 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase 1457 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1458 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1459 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase 1460 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase 1461 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase 1462section .xdata rdata align=8 1463ALIGN 8 1464$L$SEH_info_bn_mul_mont: 1465DB 9,0,0,0 1466 DD mul_handler wrt ..imagebase 1467 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 1468$L$SEH_info_bn_mul4x_mont: 1469DB 9,0,0,0 1470 DD mul_handler wrt ..imagebase 1471 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 1472$L$SEH_info_bn_sqr8x_mont: 1473DB 9,0,0,0 1474 DD sqr_handler wrt ..imagebase 1475 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1476ALIGN 8 1477$L$SEH_info_bn_mulx4x_mont: 1478DB 9,0,0,0 1479 DD sqr_handler wrt ..imagebase 1480 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 1481ALIGN 8 1482