1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%include "ring_core_generated/prefix_symbols_nasm.inc" 10section .text code align=64 11 12 13EXTERN OPENSSL_ia32cap_P 14 15global bn_mul_mont 16 17ALIGN 16 18bn_mul_mont: 19 mov QWORD[8+rsp],rdi ;WIN64 prologue 20 mov QWORD[16+rsp],rsi 21 mov rax,rsp 22$L$SEH_begin_bn_mul_mont: 23 mov rdi,rcx 24 mov rsi,rdx 25 mov rdx,r8 26 mov rcx,r9 27 mov r8,QWORD[40+rsp] 28 mov r9,QWORD[48+rsp] 29 30 31 32 mov r9d,r9d 33 mov rax,rsp 34 35 test r9d,3 36 jnz NEAR $L$mul_enter 37 cmp r9d,8 38 jb NEAR $L$mul_enter 39 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))] 40 cmp rdx,rsi 41 jne NEAR $L$mul4x_enter 42 test r9d,7 43 jz NEAR $L$sqr8x_enter 44 jmp NEAR $L$mul4x_enter 45 46ALIGN 16 47$L$mul_enter: 48 push rbx 49 50 push rbp 51 52 push r12 53 54 push r13 55 56 push r14 57 58 push r15 59 60 61 neg r9 62 mov r11,rsp 63 lea r10,[((-16))+r9*8+rsp] 64 neg r9 65 and r10,-1024 66 67 68 69 70 71 72 73 74 75 sub r11,r10 76 and r11,-4096 77 lea rsp,[r11*1+r10] 78 mov r11,QWORD[rsp] 79 cmp rsp,r10 80 ja NEAR $L$mul_page_walk 81 jmp NEAR $L$mul_page_walk_done 82 83ALIGN 16 84$L$mul_page_walk: 85 lea rsp,[((-4096))+rsp] 86 mov r11,QWORD[rsp] 87 cmp rsp,r10 88 ja NEAR $L$mul_page_walk 89$L$mul_page_walk_done: 90 91 mov QWORD[8+r9*8+rsp],rax 92 93$L$mul_body: 94 mov r12,rdx 95 mov r8,QWORD[r8] 96 mov rbx,QWORD[r12] 97 mov rax,QWORD[rsi] 98 99 xor r14,r14 100 xor r15,r15 101 102 mov rbp,r8 103 mul rbx 104 mov r10,rax 105 mov rax,QWORD[rcx] 106 107 imul rbp,r10 108 mov r11,rdx 109 110 mul rbp 111 add r10,rax 112 mov rax,QWORD[8+rsi] 113 adc rdx,0 114 mov r13,rdx 115 116 lea r15,[1+r15] 117 jmp NEAR $L$1st_enter 118 119ALIGN 16 120$L$1st: 121 add r13,rax 122 mov rax,QWORD[r15*8+rsi] 123 adc rdx,0 124 add r13,r11 125 mov r11,r10 126 adc rdx,0 127 mov QWORD[((-16))+r15*8+rsp],r13 128 mov r13,rdx 129 130$L$1st_enter: 131 mul rbx 132 add r11,rax 133 mov rax,QWORD[r15*8+rcx] 134 adc rdx,0 135 lea r15,[1+r15] 136 mov r10,rdx 137 138 mul rbp 139 cmp r15,r9 140 jne NEAR $L$1st 141 142 add r13,rax 143 mov rax,QWORD[rsi] 144 adc rdx,0 145 add r13,r11 146 adc rdx,0 147 mov QWORD[((-16))+r15*8+rsp],r13 148 mov r13,rdx 149 mov r11,r10 150 151 xor rdx,rdx 152 add r13,r11 153 adc rdx,0 154 mov QWORD[((-8))+r9*8+rsp],r13 155 mov QWORD[r9*8+rsp],rdx 156 157 lea r14,[1+r14] 158 jmp NEAR $L$outer 159ALIGN 16 160$L$outer: 161 mov rbx,QWORD[r14*8+r12] 162 xor r15,r15 163 mov rbp,r8 164 mov r10,QWORD[rsp] 165 mul rbx 166 add r10,rax 167 mov rax,QWORD[rcx] 168 adc rdx,0 169 170 imul rbp,r10 171 mov r11,rdx 172 173 mul rbp 174 add r10,rax 175 mov rax,QWORD[8+rsi] 176 adc rdx,0 177 mov r10,QWORD[8+rsp] 178 mov r13,rdx 179 180 lea r15,[1+r15] 181 jmp NEAR $L$inner_enter 182 183ALIGN 16 184$L$inner: 185 add r13,rax 186 mov rax,QWORD[r15*8+rsi] 187 adc rdx,0 188 add r13,r10 189 mov r10,QWORD[r15*8+rsp] 190 adc rdx,0 191 mov QWORD[((-16))+r15*8+rsp],r13 192 mov r13,rdx 193 194$L$inner_enter: 195 mul rbx 196 add r11,rax 197 mov rax,QWORD[r15*8+rcx] 198 adc rdx,0 199 add r10,r11 200 mov r11,rdx 201 adc r11,0 202 lea r15,[1+r15] 203 204 mul rbp 205 cmp r15,r9 206 jne NEAR $L$inner 207 208 add r13,rax 209 mov rax,QWORD[rsi] 210 adc rdx,0 211 add r13,r10 212 mov r10,QWORD[r15*8+rsp] 213 adc rdx,0 214 mov QWORD[((-16))+r15*8+rsp],r13 215 mov r13,rdx 216 217 xor rdx,rdx 218 add r13,r11 219 adc rdx,0 220 add r13,r10 221 adc rdx,0 222 mov QWORD[((-8))+r9*8+rsp],r13 223 mov QWORD[r9*8+rsp],rdx 224 225 lea r14,[1+r14] 226 cmp r14,r9 227 jb NEAR $L$outer 228 229 xor r14,r14 230 mov rax,QWORD[rsp] 231 mov r15,r9 232 233ALIGN 16 234$L$sub: sbb rax,QWORD[r14*8+rcx] 235 mov QWORD[r14*8+rdi],rax 236 mov rax,QWORD[8+r14*8+rsp] 237 lea r14,[1+r14] 238 dec r15 239 jnz NEAR $L$sub 240 241 sbb rax,0 242 mov rbx,-1 243 xor rbx,rax 244 xor r14,r14 245 mov r15,r9 246 247$L$copy: 248 mov rcx,QWORD[r14*8+rdi] 249 mov rdx,QWORD[r14*8+rsp] 250 and rcx,rbx 251 and rdx,rax 252 mov QWORD[r14*8+rsp],r9 253 or rdx,rcx 254 mov QWORD[r14*8+rdi],rdx 255 lea r14,[1+r14] 256 sub r15,1 257 jnz NEAR $L$copy 258 259 mov rsi,QWORD[8+r9*8+rsp] 260 261 mov rax,1 262 mov r15,QWORD[((-48))+rsi] 263 264 mov r14,QWORD[((-40))+rsi] 265 266 mov r13,QWORD[((-32))+rsi] 267 268 mov r12,QWORD[((-24))+rsi] 269 270 mov rbp,QWORD[((-16))+rsi] 271 272 mov rbx,QWORD[((-8))+rsi] 273 274 lea rsp,[rsi] 275 276$L$mul_epilogue: 277 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 278 mov rsi,QWORD[16+rsp] 279 DB 0F3h,0C3h ;repret 280 281$L$SEH_end_bn_mul_mont: 282 283ALIGN 16 284bn_mul4x_mont: 285 mov QWORD[8+rsp],rdi ;WIN64 prologue 286 mov QWORD[16+rsp],rsi 287 mov rax,rsp 288$L$SEH_begin_bn_mul4x_mont: 289 mov rdi,rcx 290 mov rsi,rdx 291 mov rdx,r8 292 mov rcx,r9 293 mov r8,QWORD[40+rsp] 294 mov r9,QWORD[48+rsp] 295 296 297 298 mov r9d,r9d 299 mov rax,rsp 300 301$L$mul4x_enter: 302 and r11d,0x80100 303 cmp r11d,0x80100 304 je NEAR $L$mulx4x_enter 305 push rbx 306 307 push rbp 308 309 push r12 310 311 push r13 312 313 push r14 314 315 push r15 316 317 318 neg r9 319 mov r11,rsp 320 lea r10,[((-32))+r9*8+rsp] 321 neg r9 322 and r10,-1024 323 324 sub r11,r10 325 and r11,-4096 326 lea rsp,[r11*1+r10] 327 mov r11,QWORD[rsp] 328 cmp rsp,r10 329 ja NEAR $L$mul4x_page_walk 330 jmp NEAR $L$mul4x_page_walk_done 331 332$L$mul4x_page_walk: 333 lea rsp,[((-4096))+rsp] 334 mov r11,QWORD[rsp] 335 cmp rsp,r10 336 ja NEAR $L$mul4x_page_walk 337$L$mul4x_page_walk_done: 338 339 mov QWORD[8+r9*8+rsp],rax 340 341$L$mul4x_body: 342 mov QWORD[16+r9*8+rsp],rdi 343 mov r12,rdx 344 mov r8,QWORD[r8] 345 mov rbx,QWORD[r12] 346 mov rax,QWORD[rsi] 347 348 xor r14,r14 349 xor r15,r15 350 351 mov rbp,r8 352 mul rbx 353 mov r10,rax 354 mov rax,QWORD[rcx] 355 356 imul rbp,r10 357 mov r11,rdx 358 359 mul rbp 360 add r10,rax 361 mov rax,QWORD[8+rsi] 362 adc rdx,0 363 mov rdi,rdx 364 365 mul rbx 366 add r11,rax 367 mov rax,QWORD[8+rcx] 368 adc rdx,0 369 mov r10,rdx 370 371 mul rbp 372 add rdi,rax 373 mov rax,QWORD[16+rsi] 374 adc rdx,0 375 add rdi,r11 376 lea r15,[4+r15] 377 adc rdx,0 378 mov QWORD[rsp],rdi 379 mov r13,rdx 380 jmp NEAR $L$1st4x 381ALIGN 16 382$L$1st4x: 383 mul rbx 384 add r10,rax 385 mov rax,QWORD[((-16))+r15*8+rcx] 386 adc rdx,0 387 mov r11,rdx 388 389 mul rbp 390 add r13,rax 391 mov rax,QWORD[((-8))+r15*8+rsi] 392 adc rdx,0 393 add r13,r10 394 adc rdx,0 395 mov QWORD[((-24))+r15*8+rsp],r13 396 mov rdi,rdx 397 398 mul rbx 399 add r11,rax 400 mov rax,QWORD[((-8))+r15*8+rcx] 401 adc rdx,0 402 mov r10,rdx 403 404 mul rbp 405 add rdi,rax 406 mov rax,QWORD[r15*8+rsi] 407 adc rdx,0 408 add rdi,r11 409 adc rdx,0 410 mov QWORD[((-16))+r15*8+rsp],rdi 411 mov r13,rdx 412 413 mul rbx 414 add r10,rax 415 mov rax,QWORD[r15*8+rcx] 416 adc rdx,0 417 mov r11,rdx 418 419 mul rbp 420 add r13,rax 421 mov rax,QWORD[8+r15*8+rsi] 422 adc rdx,0 423 add r13,r10 424 adc rdx,0 425 mov QWORD[((-8))+r15*8+rsp],r13 426 mov rdi,rdx 427 428 mul rbx 429 add r11,rax 430 mov rax,QWORD[8+r15*8+rcx] 431 adc rdx,0 432 lea r15,[4+r15] 433 mov r10,rdx 434 435 mul rbp 436 add rdi,rax 437 mov rax,QWORD[((-16))+r15*8+rsi] 438 adc rdx,0 439 add rdi,r11 440 adc rdx,0 441 mov QWORD[((-32))+r15*8+rsp],rdi 442 mov r13,rdx 443 cmp r15,r9 444 jb NEAR $L$1st4x 445 446 mul rbx 447 add r10,rax 448 mov rax,QWORD[((-16))+r15*8+rcx] 449 adc rdx,0 450 mov r11,rdx 451 452 mul rbp 453 add r13,rax 454 mov rax,QWORD[((-8))+r15*8+rsi] 455 adc rdx,0 456 add r13,r10 457 adc rdx,0 458 mov QWORD[((-24))+r15*8+rsp],r13 459 mov rdi,rdx 460 461 mul rbx 462 add r11,rax 463 mov rax,QWORD[((-8))+r15*8+rcx] 464 adc rdx,0 465 mov r10,rdx 466 467 mul rbp 468 add rdi,rax 469 mov rax,QWORD[rsi] 470 adc rdx,0 471 add rdi,r11 472 adc rdx,0 473 mov QWORD[((-16))+r15*8+rsp],rdi 474 mov r13,rdx 475 476 xor rdi,rdi 477 add r13,r10 478 adc rdi,0 479 mov QWORD[((-8))+r15*8+rsp],r13 480 mov QWORD[r15*8+rsp],rdi 481 482 lea r14,[1+r14] 483ALIGN 4 484$L$outer4x: 485 mov rbx,QWORD[r14*8+r12] 486 xor r15,r15 487 mov r10,QWORD[rsp] 488 mov rbp,r8 489 mul rbx 490 add r10,rax 491 mov rax,QWORD[rcx] 492 adc rdx,0 493 494 imul rbp,r10 495 mov r11,rdx 496 497 mul rbp 498 add r10,rax 499 mov rax,QWORD[8+rsi] 500 adc rdx,0 501 mov rdi,rdx 502 503 mul rbx 504 add r11,rax 505 mov rax,QWORD[8+rcx] 506 adc rdx,0 507 add r11,QWORD[8+rsp] 508 adc rdx,0 509 mov r10,rdx 510 511 mul rbp 512 add rdi,rax 513 mov rax,QWORD[16+rsi] 514 adc rdx,0 515 add rdi,r11 516 lea r15,[4+r15] 517 adc rdx,0 518 mov QWORD[rsp],rdi 519 mov r13,rdx 520 jmp NEAR $L$inner4x 521ALIGN 16 522$L$inner4x: 523 mul rbx 524 add r10,rax 525 mov rax,QWORD[((-16))+r15*8+rcx] 526 adc rdx,0 527 add r10,QWORD[((-16))+r15*8+rsp] 528 adc rdx,0 529 mov r11,rdx 530 531 mul rbp 532 add r13,rax 533 mov rax,QWORD[((-8))+r15*8+rsi] 534 adc rdx,0 535 add r13,r10 536 adc rdx,0 537 mov QWORD[((-24))+r15*8+rsp],r13 538 mov rdi,rdx 539 540 mul rbx 541 add r11,rax 542 mov rax,QWORD[((-8))+r15*8+rcx] 543 adc rdx,0 544 add r11,QWORD[((-8))+r15*8+rsp] 545 adc rdx,0 546 mov r10,rdx 547 548 mul rbp 549 add rdi,rax 550 mov rax,QWORD[r15*8+rsi] 551 adc rdx,0 552 add rdi,r11 553 adc rdx,0 554 mov QWORD[((-16))+r15*8+rsp],rdi 555 mov r13,rdx 556 557 mul rbx 558 add r10,rax 559 mov rax,QWORD[r15*8+rcx] 560 adc rdx,0 561 add r10,QWORD[r15*8+rsp] 562 adc rdx,0 563 mov r11,rdx 564 565 mul rbp 566 add r13,rax 567 mov rax,QWORD[8+r15*8+rsi] 568 adc rdx,0 569 add r13,r10 570 adc rdx,0 571 mov QWORD[((-8))+r15*8+rsp],r13 572 mov rdi,rdx 573 574 mul rbx 575 add r11,rax 576 mov rax,QWORD[8+r15*8+rcx] 577 adc rdx,0 578 add r11,QWORD[8+r15*8+rsp] 579 adc rdx,0 580 lea r15,[4+r15] 581 mov r10,rdx 582 583 mul rbp 584 add rdi,rax 585 mov rax,QWORD[((-16))+r15*8+rsi] 586 adc rdx,0 587 add rdi,r11 588 adc rdx,0 589 mov QWORD[((-32))+r15*8+rsp],rdi 590 mov r13,rdx 591 cmp r15,r9 592 jb NEAR $L$inner4x 593 594 mul rbx 595 add r10,rax 596 mov rax,QWORD[((-16))+r15*8+rcx] 597 adc rdx,0 598 add r10,QWORD[((-16))+r15*8+rsp] 599 adc rdx,0 600 mov r11,rdx 601 602 mul rbp 603 add r13,rax 604 mov rax,QWORD[((-8))+r15*8+rsi] 605 adc rdx,0 606 add r13,r10 607 adc rdx,0 608 mov QWORD[((-24))+r15*8+rsp],r13 609 mov rdi,rdx 610 611 mul rbx 612 add r11,rax 613 mov rax,QWORD[((-8))+r15*8+rcx] 614 adc rdx,0 615 add r11,QWORD[((-8))+r15*8+rsp] 616 adc rdx,0 617 lea r14,[1+r14] 618 mov r10,rdx 619 620 mul rbp 621 add rdi,rax 622 mov rax,QWORD[rsi] 623 adc rdx,0 624 add rdi,r11 625 adc rdx,0 626 mov QWORD[((-16))+r15*8+rsp],rdi 627 mov r13,rdx 628 629 xor rdi,rdi 630 add r13,r10 631 adc rdi,0 632 add r13,QWORD[r9*8+rsp] 633 adc rdi,0 634 mov QWORD[((-8))+r15*8+rsp],r13 635 mov QWORD[r15*8+rsp],rdi 636 637 cmp r14,r9 638 jb NEAR $L$outer4x 639 mov rdi,QWORD[16+r9*8+rsp] 640 lea r15,[((-4))+r9] 641 mov rax,QWORD[rsp] 642 mov rdx,QWORD[8+rsp] 643 shr r15,2 644 lea rsi,[rsp] 645 xor r14,r14 646 647 sub rax,QWORD[rcx] 648 mov rbx,QWORD[16+rsi] 649 mov rbp,QWORD[24+rsi] 650 sbb rdx,QWORD[8+rcx] 651 652$L$sub4x: 653 mov QWORD[r14*8+rdi],rax 654 mov QWORD[8+r14*8+rdi],rdx 655 sbb rbx,QWORD[16+r14*8+rcx] 656 mov rax,QWORD[32+r14*8+rsi] 657 mov rdx,QWORD[40+r14*8+rsi] 658 sbb rbp,QWORD[24+r14*8+rcx] 659 mov QWORD[16+r14*8+rdi],rbx 660 mov QWORD[24+r14*8+rdi],rbp 661 sbb rax,QWORD[32+r14*8+rcx] 662 mov rbx,QWORD[48+r14*8+rsi] 663 mov rbp,QWORD[56+r14*8+rsi] 664 sbb rdx,QWORD[40+r14*8+rcx] 665 lea r14,[4+r14] 666 dec r15 667 jnz NEAR $L$sub4x 668 669 mov QWORD[r14*8+rdi],rax 670 mov rax,QWORD[32+r14*8+rsi] 671 sbb rbx,QWORD[16+r14*8+rcx] 672 mov QWORD[8+r14*8+rdi],rdx 673 sbb rbp,QWORD[24+r14*8+rcx] 674 mov QWORD[16+r14*8+rdi],rbx 675 676 sbb rax,0 677 mov QWORD[24+r14*8+rdi],rbp 678 pxor xmm0,xmm0 679DB 102,72,15,110,224 680 pcmpeqd xmm5,xmm5 681 pshufd xmm4,xmm4,0 682 mov r15,r9 683 pxor xmm5,xmm4 684 shr r15,2 685 xor eax,eax 686 687 jmp NEAR $L$copy4x 688ALIGN 16 689$L$copy4x: 690 movdqa xmm1,XMMWORD[rax*1+rsp] 691 movdqu xmm2,XMMWORD[rax*1+rdi] 692 pand xmm1,xmm4 693 pand xmm2,xmm5 694 movdqa xmm3,XMMWORD[16+rax*1+rsp] 695 movdqa XMMWORD[rax*1+rsp],xmm0 696 por xmm1,xmm2 697 movdqu xmm2,XMMWORD[16+rax*1+rdi] 698 movdqu XMMWORD[rax*1+rdi],xmm1 699 pand xmm3,xmm4 700 pand xmm2,xmm5 701 movdqa XMMWORD[16+rax*1+rsp],xmm0 702 por xmm3,xmm2 703 movdqu XMMWORD[16+rax*1+rdi],xmm3 704 lea rax,[32+rax] 705 dec r15 706 jnz NEAR $L$copy4x 707 mov rsi,QWORD[8+r9*8+rsp] 708 709 mov rax,1 710 mov r15,QWORD[((-48))+rsi] 711 712 mov r14,QWORD[((-40))+rsi] 713 714 mov r13,QWORD[((-32))+rsi] 715 716 mov r12,QWORD[((-24))+rsi] 717 718 mov rbp,QWORD[((-16))+rsi] 719 720 mov rbx,QWORD[((-8))+rsi] 721 722 lea rsp,[rsi] 723 724$L$mul4x_epilogue: 725 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 726 mov rsi,QWORD[16+rsp] 727 DB 0F3h,0C3h ;repret 728 729$L$SEH_end_bn_mul4x_mont: 730EXTERN bn_sqrx8x_internal 731EXTERN bn_sqr8x_internal 732 733 734ALIGN 32 735bn_sqr8x_mont: 736 mov QWORD[8+rsp],rdi ;WIN64 prologue 737 mov QWORD[16+rsp],rsi 738 mov rax,rsp 739$L$SEH_begin_bn_sqr8x_mont: 740 mov rdi,rcx 741 mov rsi,rdx 742 mov rdx,r8 743 mov rcx,r9 744 mov r8,QWORD[40+rsp] 745 mov r9,QWORD[48+rsp] 746 747 748 749 mov rax,rsp 750 751$L$sqr8x_enter: 752 push rbx 753 754 push rbp 755 756 push r12 757 758 push r13 759 760 push r14 761 762 push r15 763 764$L$sqr8x_prologue: 765 766 mov r10d,r9d 767 shl r9d,3 768 shl r10,3+2 769 neg r9 770 771 772 773 774 775 776 lea r11,[((-64))+r9*2+rsp] 777 mov rbp,rsp 778 mov r8,QWORD[r8] 779 sub r11,rsi 780 and r11,4095 781 cmp r10,r11 782 jb NEAR $L$sqr8x_sp_alt 783 sub rbp,r11 784 lea rbp,[((-64))+r9*2+rbp] 785 jmp NEAR $L$sqr8x_sp_done 786 787ALIGN 32 788$L$sqr8x_sp_alt: 789 lea r10,[((4096-64))+r9*2] 790 lea rbp,[((-64))+r9*2+rbp] 791 sub r11,r10 792 mov r10,0 793 cmovc r11,r10 794 sub rbp,r11 795$L$sqr8x_sp_done: 796 and rbp,-64 797 mov r11,rsp 798 sub r11,rbp 799 and r11,-4096 800 lea rsp,[rbp*1+r11] 801 mov r10,QWORD[rsp] 802 cmp rsp,rbp 803 ja NEAR $L$sqr8x_page_walk 804 jmp NEAR $L$sqr8x_page_walk_done 805 806ALIGN 16 807$L$sqr8x_page_walk: 808 lea rsp,[((-4096))+rsp] 809 mov r10,QWORD[rsp] 810 cmp rsp,rbp 811 ja NEAR $L$sqr8x_page_walk 812$L$sqr8x_page_walk_done: 813 814 mov r10,r9 815 neg r9 816 817 mov QWORD[32+rsp],r8 818 mov QWORD[40+rsp],rax 819 820$L$sqr8x_body: 821 822DB 102,72,15,110,209 823 pxor xmm0,xmm0 824DB 102,72,15,110,207 825DB 102,73,15,110,218 826 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 827 and eax,0x80100 828 cmp eax,0x80100 829 jne NEAR $L$sqr8x_nox 830 831 call bn_sqrx8x_internal 832 833 834 835 836 lea rbx,[rcx*1+r8] 837 mov r9,rcx 838 mov rdx,rcx 839DB 102,72,15,126,207 840 sar rcx,3+2 841 jmp NEAR $L$sqr8x_sub 842 843ALIGN 32 844$L$sqr8x_nox: 845 call bn_sqr8x_internal 846 847 848 849 850 lea rbx,[r9*1+rdi] 851 mov rcx,r9 852 mov rdx,r9 853DB 102,72,15,126,207 854 sar rcx,3+2 855 jmp NEAR $L$sqr8x_sub 856 857ALIGN 32 858$L$sqr8x_sub: 859 mov r12,QWORD[rbx] 860 mov r13,QWORD[8+rbx] 861 mov r14,QWORD[16+rbx] 862 mov r15,QWORD[24+rbx] 863 lea rbx,[32+rbx] 864 sbb r12,QWORD[rbp] 865 sbb r13,QWORD[8+rbp] 866 sbb r14,QWORD[16+rbp] 867 sbb r15,QWORD[24+rbp] 868 lea rbp,[32+rbp] 869 mov QWORD[rdi],r12 870 mov QWORD[8+rdi],r13 871 mov QWORD[16+rdi],r14 872 mov QWORD[24+rdi],r15 873 lea rdi,[32+rdi] 874 inc rcx 875 jnz NEAR $L$sqr8x_sub 876 877 sbb rax,0 878 lea rbx,[r9*1+rbx] 879 lea rdi,[r9*1+rdi] 880 881DB 102,72,15,110,200 882 pxor xmm0,xmm0 883 pshufd xmm1,xmm1,0 884 mov rsi,QWORD[40+rsp] 885 886 jmp NEAR $L$sqr8x_cond_copy 887 888ALIGN 32 889$L$sqr8x_cond_copy: 890 movdqa xmm2,XMMWORD[rbx] 891 movdqa xmm3,XMMWORD[16+rbx] 892 lea rbx,[32+rbx] 893 movdqu xmm4,XMMWORD[rdi] 894 movdqu xmm5,XMMWORD[16+rdi] 895 lea rdi,[32+rdi] 896 movdqa XMMWORD[(-32)+rbx],xmm0 897 movdqa XMMWORD[(-16)+rbx],xmm0 898 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 899 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 900 pcmpeqd xmm0,xmm1 901 pand xmm2,xmm1 902 pand xmm3,xmm1 903 pand xmm4,xmm0 904 pand xmm5,xmm0 905 pxor xmm0,xmm0 906 por xmm4,xmm2 907 por xmm5,xmm3 908 movdqu XMMWORD[(-32)+rdi],xmm4 909 movdqu XMMWORD[(-16)+rdi],xmm5 910 add r9,32 911 jnz NEAR $L$sqr8x_cond_copy 912 913 mov rax,1 914 mov r15,QWORD[((-48))+rsi] 915 916 mov r14,QWORD[((-40))+rsi] 917 918 mov r13,QWORD[((-32))+rsi] 919 920 mov r12,QWORD[((-24))+rsi] 921 922 mov rbp,QWORD[((-16))+rsi] 923 924 mov rbx,QWORD[((-8))+rsi] 925 926 lea rsp,[rsi] 927 928$L$sqr8x_epilogue: 929 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 930 mov rsi,QWORD[16+rsp] 931 DB 0F3h,0C3h ;repret 932 933$L$SEH_end_bn_sqr8x_mont: 934 935ALIGN 32 936bn_mulx4x_mont: 937 mov QWORD[8+rsp],rdi ;WIN64 prologue 938 mov QWORD[16+rsp],rsi 939 mov rax,rsp 940$L$SEH_begin_bn_mulx4x_mont: 941 mov rdi,rcx 942 mov rsi,rdx 943 mov rdx,r8 944 mov rcx,r9 945 mov r8,QWORD[40+rsp] 946 mov r9,QWORD[48+rsp] 947 948 949 950 mov rax,rsp 951 952$L$mulx4x_enter: 953 push rbx 954 955 push rbp 956 957 push r12 958 959 push r13 960 961 push r14 962 963 push r15 964 965$L$mulx4x_prologue: 966 967 shl r9d,3 968 xor r10,r10 969 sub r10,r9 970 mov r8,QWORD[r8] 971 lea rbp,[((-72))+r10*1+rsp] 972 and rbp,-128 973 mov r11,rsp 974 sub r11,rbp 975 and r11,-4096 976 lea rsp,[rbp*1+r11] 977 mov r10,QWORD[rsp] 978 cmp rsp,rbp 979 ja NEAR $L$mulx4x_page_walk 980 jmp NEAR $L$mulx4x_page_walk_done 981 982ALIGN 16 983$L$mulx4x_page_walk: 984 lea rsp,[((-4096))+rsp] 985 mov r10,QWORD[rsp] 986 cmp rsp,rbp 987 ja NEAR $L$mulx4x_page_walk 988$L$mulx4x_page_walk_done: 989 990 lea r10,[r9*1+rdx] 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 mov QWORD[rsp],r9 1004 shr r9,5 1005 mov QWORD[16+rsp],r10 1006 sub r9,1 1007 mov QWORD[24+rsp],r8 1008 mov QWORD[32+rsp],rdi 1009 mov QWORD[40+rsp],rax 1010 1011 mov QWORD[48+rsp],r9 1012 jmp NEAR $L$mulx4x_body 1013 1014ALIGN 32 1015$L$mulx4x_body: 1016 lea rdi,[8+rdx] 1017 mov rdx,QWORD[rdx] 1018 lea rbx,[((64+32))+rsp] 1019 mov r9,rdx 1020 1021 mulx rax,r8,QWORD[rsi] 1022 mulx r14,r11,QWORD[8+rsi] 1023 add r11,rax 1024 mov QWORD[8+rsp],rdi 1025 mulx r13,r12,QWORD[16+rsi] 1026 adc r12,r14 1027 adc r13,0 1028 1029 mov rdi,r8 1030 imul r8,QWORD[24+rsp] 1031 xor rbp,rbp 1032 1033 mulx r14,rax,QWORD[24+rsi] 1034 mov rdx,r8 1035 lea rsi,[32+rsi] 1036 adcx r13,rax 1037 adcx r14,rbp 1038 1039 mulx r10,rax,QWORD[rcx] 1040 adcx rdi,rax 1041 adox r10,r11 1042 mulx r11,rax,QWORD[8+rcx] 1043 adcx r10,rax 1044 adox r11,r12 1045DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 1046 mov rdi,QWORD[48+rsp] 1047 mov QWORD[((-32))+rbx],r10 1048 adcx r11,rax 1049 adox r12,r13 1050 mulx r15,rax,QWORD[24+rcx] 1051 mov rdx,r9 1052 mov QWORD[((-24))+rbx],r11 1053 adcx r12,rax 1054 adox r15,rbp 1055 lea rcx,[32+rcx] 1056 mov QWORD[((-16))+rbx],r12 1057 1058 jmp NEAR $L$mulx4x_1st 1059 1060ALIGN 32 1061$L$mulx4x_1st: 1062 adcx r15,rbp 1063 mulx rax,r10,QWORD[rsi] 1064 adcx r10,r14 1065 mulx r14,r11,QWORD[8+rsi] 1066 adcx r11,rax 1067 mulx rax,r12,QWORD[16+rsi] 1068 adcx r12,r14 1069 mulx r14,r13,QWORD[24+rsi] 1070DB 0x67,0x67 1071 mov rdx,r8 1072 adcx r13,rax 1073 adcx r14,rbp 1074 lea rsi,[32+rsi] 1075 lea rbx,[32+rbx] 1076 1077 adox r10,r15 1078 mulx r15,rax,QWORD[rcx] 1079 adcx r10,rax 1080 adox r11,r15 1081 mulx r15,rax,QWORD[8+rcx] 1082 adcx r11,rax 1083 adox r12,r15 1084 mulx r15,rax,QWORD[16+rcx] 1085 mov QWORD[((-40))+rbx],r10 1086 adcx r12,rax 1087 mov QWORD[((-32))+rbx],r11 1088 adox r13,r15 1089 mulx r15,rax,QWORD[24+rcx] 1090 mov rdx,r9 1091 mov QWORD[((-24))+rbx],r12 1092 adcx r13,rax 1093 adox r15,rbp 1094 lea rcx,[32+rcx] 1095 mov QWORD[((-16))+rbx],r13 1096 1097 dec rdi 1098 jnz NEAR $L$mulx4x_1st 1099 1100 mov rax,QWORD[rsp] 1101 mov rdi,QWORD[8+rsp] 1102 adc r15,rbp 1103 add r14,r15 1104 sbb r15,r15 1105 mov QWORD[((-8))+rbx],r14 1106 jmp NEAR $L$mulx4x_outer 1107 1108ALIGN 32 1109$L$mulx4x_outer: 1110 mov rdx,QWORD[rdi] 1111 lea rdi,[8+rdi] 1112 sub rsi,rax 1113 mov QWORD[rbx],r15 1114 lea rbx,[((64+32))+rsp] 1115 sub rcx,rax 1116 1117 mulx r11,r8,QWORD[rsi] 1118 xor ebp,ebp 1119 mov r9,rdx 1120 mulx r12,r14,QWORD[8+rsi] 1121 adox r8,QWORD[((-32))+rbx] 1122 adcx r11,r14 1123 mulx r13,r15,QWORD[16+rsi] 1124 adox r11,QWORD[((-24))+rbx] 1125 adcx r12,r15 1126 adox r12,QWORD[((-16))+rbx] 1127 adcx r13,rbp 1128 adox r13,rbp 1129 1130 mov QWORD[8+rsp],rdi 1131 mov r15,r8 1132 imul r8,QWORD[24+rsp] 1133 xor ebp,ebp 1134 1135 mulx r14,rax,QWORD[24+rsi] 1136 mov rdx,r8 1137 adcx r13,rax 1138 adox r13,QWORD[((-8))+rbx] 1139 adcx r14,rbp 1140 lea rsi,[32+rsi] 1141 adox r14,rbp 1142 1143 mulx r10,rax,QWORD[rcx] 1144 adcx r15,rax 1145 adox r10,r11 1146 mulx r11,rax,QWORD[8+rcx] 1147 adcx r10,rax 1148 adox r11,r12 1149 mulx r12,rax,QWORD[16+rcx] 1150 mov QWORD[((-32))+rbx],r10 1151 adcx r11,rax 1152 adox r12,r13 1153 mulx r15,rax,QWORD[24+rcx] 1154 mov rdx,r9 1155 mov QWORD[((-24))+rbx],r11 1156 lea rcx,[32+rcx] 1157 adcx r12,rax 1158 adox r15,rbp 1159 mov rdi,QWORD[48+rsp] 1160 mov QWORD[((-16))+rbx],r12 1161 1162 jmp NEAR $L$mulx4x_inner 1163 1164ALIGN 32 1165$L$mulx4x_inner: 1166 mulx rax,r10,QWORD[rsi] 1167 adcx r15,rbp 1168 adox r10,r14 1169 mulx r14,r11,QWORD[8+rsi] 1170 adcx r10,QWORD[rbx] 1171 adox r11,rax 1172 mulx rax,r12,QWORD[16+rsi] 1173 adcx r11,QWORD[8+rbx] 1174 adox r12,r14 1175 mulx r14,r13,QWORD[24+rsi] 1176 mov rdx,r8 1177 adcx r12,QWORD[16+rbx] 1178 adox r13,rax 1179 adcx r13,QWORD[24+rbx] 1180 adox r14,rbp 1181 lea rsi,[32+rsi] 1182 lea rbx,[32+rbx] 1183 adcx r14,rbp 1184 1185 adox r10,r15 1186 mulx r15,rax,QWORD[rcx] 1187 adcx r10,rax 1188 adox r11,r15 1189 mulx r15,rax,QWORD[8+rcx] 1190 adcx r11,rax 1191 adox r12,r15 1192 mulx r15,rax,QWORD[16+rcx] 1193 mov QWORD[((-40))+rbx],r10 1194 adcx r12,rax 1195 adox r13,r15 1196 mulx r15,rax,QWORD[24+rcx] 1197 mov rdx,r9 1198 mov QWORD[((-32))+rbx],r11 1199 mov QWORD[((-24))+rbx],r12 1200 adcx r13,rax 1201 adox r15,rbp 1202 lea rcx,[32+rcx] 1203 mov QWORD[((-16))+rbx],r13 1204 1205 dec rdi 1206 jnz NEAR $L$mulx4x_inner 1207 1208 mov rax,QWORD[rsp] 1209 mov rdi,QWORD[8+rsp] 1210 adc r15,rbp 1211 sub rbp,QWORD[rbx] 1212 adc r14,r15 1213 sbb r15,r15 1214 mov QWORD[((-8))+rbx],r14 1215 1216 cmp rdi,QWORD[16+rsp] 1217 jne NEAR $L$mulx4x_outer 1218 1219 lea rbx,[64+rsp] 1220 sub rcx,rax 1221 neg r15 1222 mov rdx,rax 1223 shr rax,3+2 1224 mov rdi,QWORD[32+rsp] 1225 jmp NEAR $L$mulx4x_sub 1226 1227ALIGN 32 1228$L$mulx4x_sub: 1229 mov r11,QWORD[rbx] 1230 mov r12,QWORD[8+rbx] 1231 mov r13,QWORD[16+rbx] 1232 mov r14,QWORD[24+rbx] 1233 lea rbx,[32+rbx] 1234 sbb r11,QWORD[rcx] 1235 sbb r12,QWORD[8+rcx] 1236 sbb r13,QWORD[16+rcx] 1237 sbb r14,QWORD[24+rcx] 1238 lea rcx,[32+rcx] 1239 mov QWORD[rdi],r11 1240 mov QWORD[8+rdi],r12 1241 mov QWORD[16+rdi],r13 1242 mov QWORD[24+rdi],r14 1243 lea rdi,[32+rdi] 1244 dec rax 1245 jnz NEAR $L$mulx4x_sub 1246 1247 sbb r15,0 1248 lea rbx,[64+rsp] 1249 sub rdi,rdx 1250 1251DB 102,73,15,110,207 1252 pxor xmm0,xmm0 1253 pshufd xmm1,xmm1,0 1254 mov rsi,QWORD[40+rsp] 1255 1256 jmp NEAR $L$mulx4x_cond_copy 1257 1258ALIGN 32 1259$L$mulx4x_cond_copy: 1260 movdqa xmm2,XMMWORD[rbx] 1261 movdqa xmm3,XMMWORD[16+rbx] 1262 lea rbx,[32+rbx] 1263 movdqu xmm4,XMMWORD[rdi] 1264 movdqu xmm5,XMMWORD[16+rdi] 1265 lea rdi,[32+rdi] 1266 movdqa XMMWORD[(-32)+rbx],xmm0 1267 movdqa XMMWORD[(-16)+rbx],xmm0 1268 pcmpeqd xmm0,xmm1 1269 pand xmm2,xmm1 1270 pand xmm3,xmm1 1271 pand xmm4,xmm0 1272 pand xmm5,xmm0 1273 pxor xmm0,xmm0 1274 por xmm4,xmm2 1275 por xmm5,xmm3 1276 movdqu XMMWORD[(-32)+rdi],xmm4 1277 movdqu XMMWORD[(-16)+rdi],xmm5 1278 sub rdx,32 1279 jnz NEAR $L$mulx4x_cond_copy 1280 1281 mov QWORD[rbx],rdx 1282 1283 mov rax,1 1284 mov r15,QWORD[((-48))+rsi] 1285 1286 mov r14,QWORD[((-40))+rsi] 1287 1288 mov r13,QWORD[((-32))+rsi] 1289 1290 mov r12,QWORD[((-24))+rsi] 1291 1292 mov rbp,QWORD[((-16))+rsi] 1293 1294 mov rbx,QWORD[((-8))+rsi] 1295 1296 lea rsp,[rsi] 1297 1298$L$mulx4x_epilogue: 1299 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1300 mov rsi,QWORD[16+rsp] 1301 DB 0F3h,0C3h ;repret 1302 1303$L$SEH_end_bn_mulx4x_mont: 1304DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1305DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 1306DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 1307DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 1308DB 115,108,46,111,114,103,62,0 1309ALIGN 16 1310EXTERN __imp_RtlVirtualUnwind 1311 1312ALIGN 16 1313mul_handler: 1314 push rsi 1315 push rdi 1316 push rbx 1317 push rbp 1318 push r12 1319 push r13 1320 push r14 1321 push r15 1322 pushfq 1323 sub rsp,64 1324 1325 mov rax,QWORD[120+r8] 1326 mov rbx,QWORD[248+r8] 1327 1328 mov rsi,QWORD[8+r9] 1329 mov r11,QWORD[56+r9] 1330 1331 mov r10d,DWORD[r11] 1332 lea r10,[r10*1+rsi] 1333 cmp rbx,r10 1334 jb NEAR $L$common_seh_tail 1335 1336 mov rax,QWORD[152+r8] 1337 1338 mov r10d,DWORD[4+r11] 1339 lea r10,[r10*1+rsi] 1340 cmp rbx,r10 1341 jae NEAR $L$common_seh_tail 1342 1343 mov r10,QWORD[192+r8] 1344 mov rax,QWORD[8+r10*8+rax] 1345 1346 jmp NEAR $L$common_pop_regs 1347 1348 1349 1350ALIGN 16 1351sqr_handler: 1352 push rsi 1353 push rdi 1354 push rbx 1355 push rbp 1356 push r12 1357 push r13 1358 push r14 1359 push r15 1360 pushfq 1361 sub rsp,64 1362 1363 mov rax,QWORD[120+r8] 1364 mov rbx,QWORD[248+r8] 1365 1366 mov rsi,QWORD[8+r9] 1367 mov r11,QWORD[56+r9] 1368 1369 mov r10d,DWORD[r11] 1370 lea r10,[r10*1+rsi] 1371 cmp rbx,r10 1372 jb NEAR $L$common_seh_tail 1373 1374 mov r10d,DWORD[4+r11] 1375 lea r10,[r10*1+rsi] 1376 cmp rbx,r10 1377 jb NEAR $L$common_pop_regs 1378 1379 mov rax,QWORD[152+r8] 1380 1381 mov r10d,DWORD[8+r11] 1382 lea r10,[r10*1+rsi] 1383 cmp rbx,r10 1384 jae NEAR $L$common_seh_tail 1385 1386 mov rax,QWORD[40+rax] 1387 1388$L$common_pop_regs: 1389 mov rbx,QWORD[((-8))+rax] 1390 mov rbp,QWORD[((-16))+rax] 1391 mov r12,QWORD[((-24))+rax] 1392 mov r13,QWORD[((-32))+rax] 1393 mov r14,QWORD[((-40))+rax] 1394 mov r15,QWORD[((-48))+rax] 1395 mov QWORD[144+r8],rbx 1396 mov QWORD[160+r8],rbp 1397 mov QWORD[216+r8],r12 1398 mov QWORD[224+r8],r13 1399 mov QWORD[232+r8],r14 1400 mov QWORD[240+r8],r15 1401 1402$L$common_seh_tail: 1403 mov rdi,QWORD[8+rax] 1404 mov rsi,QWORD[16+rax] 1405 mov QWORD[152+r8],rax 1406 mov QWORD[168+r8],rsi 1407 mov QWORD[176+r8],rdi 1408 1409 mov rdi,QWORD[40+r9] 1410 mov rsi,r8 1411 mov ecx,154 1412 DD 0xa548f3fc 1413 1414 mov rsi,r9 1415 xor rcx,rcx 1416 mov rdx,QWORD[8+rsi] 1417 mov r8,QWORD[rsi] 1418 mov r9,QWORD[16+rsi] 1419 mov r10,QWORD[40+rsi] 1420 lea r11,[56+rsi] 1421 lea r12,[24+rsi] 1422 mov QWORD[32+rsp],r10 1423 mov QWORD[40+rsp],r11 1424 mov QWORD[48+rsp],r12 1425 mov QWORD[56+rsp],rcx 1426 call QWORD[__imp_RtlVirtualUnwind] 1427 1428 mov eax,1 1429 add rsp,64 1430 popfq 1431 pop r15 1432 pop r14 1433 pop r13 1434 pop r12 1435 pop rbp 1436 pop rbx 1437 pop rdi 1438 pop rsi 1439 DB 0F3h,0C3h ;repret 1440 1441 1442section .pdata rdata align=4 1443ALIGN 4 1444 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase 1445 DD $L$SEH_end_bn_mul_mont wrt ..imagebase 1446 DD $L$SEH_info_bn_mul_mont wrt ..imagebase 1447 1448 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase 1449 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase 1450 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase 1451 1452 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase 1453 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1454 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1455 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase 1456 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase 1457 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase 1458section .xdata rdata align=8 1459ALIGN 8 1460$L$SEH_info_bn_mul_mont: 1461DB 9,0,0,0 1462 DD mul_handler wrt ..imagebase 1463 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 1464$L$SEH_info_bn_mul4x_mont: 1465DB 9,0,0,0 1466 DD mul_handler wrt ..imagebase 1467 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 1468$L$SEH_info_bn_sqr8x_mont: 1469DB 9,0,0,0 1470 DD sqr_handler wrt ..imagebase 1471 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1472ALIGN 8 1473$L$SEH_info_bn_mulx4x_mont: 1474DB 9,0,0,0 1475 DD sqr_handler wrt ..imagebase 1476 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 1477ALIGN 8 1478