1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7 8EXTERN OPENSSL_ia32cap_P 9 10global bn_mul_mont 11 12ALIGN 16 13bn_mul_mont: 14 mov QWORD[8+rsp],rdi ;WIN64 prologue 15 mov QWORD[16+rsp],rsi 16 mov rax,rsp 17$L$SEH_begin_bn_mul_mont: 18 mov rdi,rcx 19 mov rsi,rdx 20 mov rdx,r8 21 mov rcx,r9 22 mov r8,QWORD[40+rsp] 23 mov r9,QWORD[48+rsp] 24 25 26 27 mov r9d,r9d 28 mov rax,rsp 29 30 test r9d,3 31 jnz NEAR $L$mul_enter 32 cmp r9d,8 33 jb NEAR $L$mul_enter 34 cmp rdx,rsi 35 jne NEAR $L$mul4x_enter 36 test r9d,7 37 jz NEAR $L$sqr8x_enter 38 jmp NEAR $L$mul4x_enter 39 40ALIGN 16 41$L$mul_enter: 42 push rbx 43 44 push rbp 45 46 push r12 47 48 push r13 49 50 push r14 51 52 push r15 53 54 55 neg r9 56 mov r11,rsp 57 lea r10,[((-16))+r9*8+rsp] 58 neg r9 59 and r10,-1024 60 61 62 63 64 65 66 67 68 69 sub r11,r10 70 and r11,-4096 71 lea rsp,[r11*1+r10] 72 mov r11,QWORD[rsp] 73 cmp rsp,r10 74 ja NEAR $L$mul_page_walk 75 jmp NEAR $L$mul_page_walk_done 76 77ALIGN 16 78$L$mul_page_walk: 79 lea rsp,[((-4096))+rsp] 80 mov r11,QWORD[rsp] 81 cmp rsp,r10 82 ja NEAR $L$mul_page_walk 83$L$mul_page_walk_done: 84 85 mov QWORD[8+r9*8+rsp],rax 86 87$L$mul_body: 88 mov r12,rdx 89 mov r8,QWORD[r8] 90 mov rbx,QWORD[r12] 91 mov rax,QWORD[rsi] 92 93 xor r14,r14 94 xor r15,r15 95 96 mov rbp,r8 97 mul rbx 98 mov r10,rax 99 mov rax,QWORD[rcx] 100 101 imul rbp,r10 102 mov r11,rdx 103 104 mul rbp 105 add r10,rax 106 mov rax,QWORD[8+rsi] 107 adc rdx,0 108 mov r13,rdx 109 110 lea r15,[1+r15] 111 jmp NEAR $L$1st_enter 112 113ALIGN 16 114$L$1st: 115 add r13,rax 116 mov rax,QWORD[r15*8+rsi] 117 adc rdx,0 118 add r13,r11 119 mov r11,r10 120 adc rdx,0 121 mov QWORD[((-16))+r15*8+rsp],r13 122 mov r13,rdx 123 124$L$1st_enter: 125 mul rbx 126 add r11,rax 127 mov rax,QWORD[r15*8+rcx] 128 adc rdx,0 129 lea r15,[1+r15] 130 mov r10,rdx 131 132 mul rbp 133 cmp r15,r9 134 jne NEAR $L$1st 135 136 add r13,rax 137 mov rax,QWORD[rsi] 138 adc rdx,0 139 add r13,r11 140 adc rdx,0 141 mov QWORD[((-16))+r15*8+rsp],r13 142 mov r13,rdx 143 mov r11,r10 144 145 xor rdx,rdx 146 add r13,r11 147 adc rdx,0 148 mov QWORD[((-8))+r9*8+rsp],r13 149 mov QWORD[r9*8+rsp],rdx 150 151 lea r14,[1+r14] 152 jmp NEAR $L$outer 153ALIGN 16 154$L$outer: 155 mov rbx,QWORD[r14*8+r12] 156 xor r15,r15 157 mov rbp,r8 158 mov r10,QWORD[rsp] 159 mul rbx 160 add r10,rax 161 mov rax,QWORD[rcx] 162 adc rdx,0 163 164 imul rbp,r10 165 mov r11,rdx 166 167 mul rbp 168 add r10,rax 169 mov rax,QWORD[8+rsi] 170 adc rdx,0 171 mov r10,QWORD[8+rsp] 172 mov r13,rdx 173 174 lea r15,[1+r15] 175 jmp NEAR $L$inner_enter 176 177ALIGN 16 178$L$inner: 179 add r13,rax 180 mov rax,QWORD[r15*8+rsi] 181 adc rdx,0 182 add r13,r10 183 mov r10,QWORD[r15*8+rsp] 184 adc rdx,0 185 mov QWORD[((-16))+r15*8+rsp],r13 186 mov r13,rdx 187 188$L$inner_enter: 189 mul rbx 190 add r11,rax 191 mov rax,QWORD[r15*8+rcx] 192 adc rdx,0 193 add r10,r11 194 mov r11,rdx 195 adc r11,0 196 lea r15,[1+r15] 197 198 mul rbp 199 cmp r15,r9 200 jne NEAR $L$inner 201 202 add r13,rax 203 mov rax,QWORD[rsi] 204 adc rdx,0 205 add r13,r10 206 mov r10,QWORD[r15*8+rsp] 207 adc rdx,0 208 mov QWORD[((-16))+r15*8+rsp],r13 209 mov r13,rdx 210 211 xor rdx,rdx 212 add r13,r11 213 adc rdx,0 214 add r13,r10 215 adc rdx,0 216 mov QWORD[((-8))+r9*8+rsp],r13 217 mov QWORD[r9*8+rsp],rdx 218 219 lea r14,[1+r14] 220 cmp r14,r9 221 jb NEAR $L$outer 222 223 xor r14,r14 224 mov rax,QWORD[rsp] 225 lea rsi,[rsp] 226 mov r15,r9 227 jmp NEAR $L$sub 228ALIGN 16 229$L$sub: 230 sbb rax,QWORD[r14*8+rcx] 231 mov QWORD[r14*8+rdi],rax 232 mov rax,QWORD[8+r14*8+rsi] 233 lea r14,[1+r14] 234 dec r15 235 jnz NEAR $L$sub 236 237 sbb rax,0 238 xor r14,r14 239 and rsi,rax 240 not rax 241 mov rcx,rdi 242 and rcx,rax 243 mov r15,r9 244 or rsi,rcx 245ALIGN 16 246$L$copy: 247 mov rax,QWORD[r14*8+rsi] 248 mov QWORD[r14*8+rsp],r14 249 mov QWORD[r14*8+rdi],rax 250 lea r14,[1+r14] 251 sub r15,1 252 jnz NEAR $L$copy 253 254 mov rsi,QWORD[8+r9*8+rsp] 255 256 mov rax,1 257 mov r15,QWORD[((-48))+rsi] 258 259 mov r14,QWORD[((-40))+rsi] 260 261 mov r13,QWORD[((-32))+rsi] 262 263 mov r12,QWORD[((-24))+rsi] 264 265 mov rbp,QWORD[((-16))+rsi] 266 267 mov rbx,QWORD[((-8))+rsi] 268 269 lea rsp,[rsi] 270 271$L$mul_epilogue: 272 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 273 mov rsi,QWORD[16+rsp] 274 DB 0F3h,0C3h ;repret 275 276$L$SEH_end_bn_mul_mont: 277 278ALIGN 16 279bn_mul4x_mont: 280 mov QWORD[8+rsp],rdi ;WIN64 prologue 281 mov QWORD[16+rsp],rsi 282 mov rax,rsp 283$L$SEH_begin_bn_mul4x_mont: 284 mov rdi,rcx 285 mov rsi,rdx 286 mov rdx,r8 287 mov rcx,r9 288 mov r8,QWORD[40+rsp] 289 mov r9,QWORD[48+rsp] 290 291 292 293 mov r9d,r9d 294 mov rax,rsp 295 296$L$mul4x_enter: 297 push rbx 298 299 push rbp 300 301 push r12 302 303 push r13 304 305 push r14 306 307 push r15 308 309 310 neg r9 311 mov r11,rsp 312 lea r10,[((-32))+r9*8+rsp] 313 neg r9 314 and r10,-1024 315 316 sub r11,r10 317 and r11,-4096 318 lea rsp,[r11*1+r10] 319 mov r11,QWORD[rsp] 320 cmp rsp,r10 321 ja NEAR $L$mul4x_page_walk 322 jmp NEAR $L$mul4x_page_walk_done 323 324$L$mul4x_page_walk: 325 lea rsp,[((-4096))+rsp] 326 mov r11,QWORD[rsp] 327 cmp rsp,r10 328 ja NEAR $L$mul4x_page_walk 329$L$mul4x_page_walk_done: 330 331 mov QWORD[8+r9*8+rsp],rax 332 333$L$mul4x_body: 334 mov QWORD[16+r9*8+rsp],rdi 335 mov r12,rdx 336 mov r8,QWORD[r8] 337 mov rbx,QWORD[r12] 338 mov rax,QWORD[rsi] 339 340 xor r14,r14 341 xor r15,r15 342 343 mov rbp,r8 344 mul rbx 345 mov r10,rax 346 mov rax,QWORD[rcx] 347 348 imul rbp,r10 349 mov r11,rdx 350 351 mul rbp 352 add r10,rax 353 mov rax,QWORD[8+rsi] 354 adc rdx,0 355 mov rdi,rdx 356 357 mul rbx 358 add r11,rax 359 mov rax,QWORD[8+rcx] 360 adc rdx,0 361 mov r10,rdx 362 363 mul rbp 364 add rdi,rax 365 mov rax,QWORD[16+rsi] 366 adc rdx,0 367 add rdi,r11 368 lea r15,[4+r15] 369 adc rdx,0 370 mov QWORD[rsp],rdi 371 mov r13,rdx 372 jmp NEAR $L$1st4x 373ALIGN 16 374$L$1st4x: 375 mul rbx 376 add r10,rax 377 mov rax,QWORD[((-16))+r15*8+rcx] 378 adc rdx,0 379 mov r11,rdx 380 381 mul rbp 382 add r13,rax 383 mov rax,QWORD[((-8))+r15*8+rsi] 384 adc rdx,0 385 add r13,r10 386 adc rdx,0 387 mov QWORD[((-24))+r15*8+rsp],r13 388 mov rdi,rdx 389 390 mul rbx 391 add r11,rax 392 mov rax,QWORD[((-8))+r15*8+rcx] 393 adc rdx,0 394 mov r10,rdx 395 396 mul rbp 397 add rdi,rax 398 mov rax,QWORD[r15*8+rsi] 399 adc rdx,0 400 add rdi,r11 401 adc rdx,0 402 mov QWORD[((-16))+r15*8+rsp],rdi 403 mov r13,rdx 404 405 mul rbx 406 add r10,rax 407 mov rax,QWORD[r15*8+rcx] 408 adc rdx,0 409 mov r11,rdx 410 411 mul rbp 412 add r13,rax 413 mov rax,QWORD[8+r15*8+rsi] 414 adc rdx,0 415 add r13,r10 416 adc rdx,0 417 mov QWORD[((-8))+r15*8+rsp],r13 418 mov rdi,rdx 419 420 mul rbx 421 add r11,rax 422 mov rax,QWORD[8+r15*8+rcx] 423 adc rdx,0 424 lea r15,[4+r15] 425 mov r10,rdx 426 427 mul rbp 428 add rdi,rax 429 mov rax,QWORD[((-16))+r15*8+rsi] 430 adc rdx,0 431 add rdi,r11 432 adc rdx,0 433 mov QWORD[((-32))+r15*8+rsp],rdi 434 mov r13,rdx 435 cmp r15,r9 436 jb NEAR $L$1st4x 437 438 mul rbx 439 add r10,rax 440 mov rax,QWORD[((-16))+r15*8+rcx] 441 adc rdx,0 442 mov r11,rdx 443 444 mul rbp 445 add r13,rax 446 mov rax,QWORD[((-8))+r15*8+rsi] 447 adc rdx,0 448 add r13,r10 449 adc rdx,0 450 mov QWORD[((-24))+r15*8+rsp],r13 451 mov rdi,rdx 452 453 mul rbx 454 add r11,rax 455 mov rax,QWORD[((-8))+r15*8+rcx] 456 adc rdx,0 457 mov r10,rdx 458 459 mul rbp 460 add rdi,rax 461 mov rax,QWORD[rsi] 462 adc rdx,0 463 add rdi,r11 464 adc rdx,0 465 mov QWORD[((-16))+r15*8+rsp],rdi 466 mov r13,rdx 467 468 xor rdi,rdi 469 add r13,r10 470 adc rdi,0 471 mov QWORD[((-8))+r15*8+rsp],r13 472 mov QWORD[r15*8+rsp],rdi 473 474 lea r14,[1+r14] 475ALIGN 4 476$L$outer4x: 477 mov rbx,QWORD[r14*8+r12] 478 xor r15,r15 479 mov r10,QWORD[rsp] 480 mov rbp,r8 481 mul rbx 482 add r10,rax 483 mov rax,QWORD[rcx] 484 adc rdx,0 485 486 imul rbp,r10 487 mov r11,rdx 488 489 mul rbp 490 add r10,rax 491 mov rax,QWORD[8+rsi] 492 adc rdx,0 493 mov rdi,rdx 494 495 mul rbx 496 add r11,rax 497 mov rax,QWORD[8+rcx] 498 adc rdx,0 499 add r11,QWORD[8+rsp] 500 adc rdx,0 501 mov r10,rdx 502 503 mul rbp 504 add rdi,rax 505 mov rax,QWORD[16+rsi] 506 adc rdx,0 507 add rdi,r11 508 lea r15,[4+r15] 509 adc rdx,0 510 mov QWORD[rsp],rdi 511 mov r13,rdx 512 jmp NEAR $L$inner4x 513ALIGN 16 514$L$inner4x: 515 mul rbx 516 add r10,rax 517 mov rax,QWORD[((-16))+r15*8+rcx] 518 adc rdx,0 519 add r10,QWORD[((-16))+r15*8+rsp] 520 adc rdx,0 521 mov r11,rdx 522 523 mul rbp 524 add r13,rax 525 mov rax,QWORD[((-8))+r15*8+rsi] 526 adc rdx,0 527 add r13,r10 528 adc rdx,0 529 mov QWORD[((-24))+r15*8+rsp],r13 530 mov rdi,rdx 531 532 mul rbx 533 add r11,rax 534 mov rax,QWORD[((-8))+r15*8+rcx] 535 adc rdx,0 536 add r11,QWORD[((-8))+r15*8+rsp] 537 adc rdx,0 538 mov r10,rdx 539 540 mul rbp 541 add rdi,rax 542 mov rax,QWORD[r15*8+rsi] 543 adc rdx,0 544 add rdi,r11 545 adc rdx,0 546 mov QWORD[((-16))+r15*8+rsp],rdi 547 mov r13,rdx 548 549 mul rbx 550 add r10,rax 551 mov rax,QWORD[r15*8+rcx] 552 adc rdx,0 553 add r10,QWORD[r15*8+rsp] 554 adc rdx,0 555 mov r11,rdx 556 557 mul rbp 558 add r13,rax 559 mov rax,QWORD[8+r15*8+rsi] 560 adc rdx,0 561 add r13,r10 562 adc rdx,0 563 mov QWORD[((-8))+r15*8+rsp],r13 564 mov rdi,rdx 565 566 mul rbx 567 add r11,rax 568 mov rax,QWORD[8+r15*8+rcx] 569 adc rdx,0 570 add r11,QWORD[8+r15*8+rsp] 571 adc rdx,0 572 lea r15,[4+r15] 573 mov r10,rdx 574 575 mul rbp 576 add rdi,rax 577 mov rax,QWORD[((-16))+r15*8+rsi] 578 adc rdx,0 579 add rdi,r11 580 adc rdx,0 581 mov QWORD[((-32))+r15*8+rsp],rdi 582 mov r13,rdx 583 cmp r15,r9 584 jb NEAR $L$inner4x 585 586 mul rbx 587 add r10,rax 588 mov rax,QWORD[((-16))+r15*8+rcx] 589 adc rdx,0 590 add r10,QWORD[((-16))+r15*8+rsp] 591 adc rdx,0 592 mov r11,rdx 593 594 mul rbp 595 add r13,rax 596 mov rax,QWORD[((-8))+r15*8+rsi] 597 adc rdx,0 598 add r13,r10 599 adc rdx,0 600 mov QWORD[((-24))+r15*8+rsp],r13 601 mov rdi,rdx 602 603 mul rbx 604 add r11,rax 605 mov rax,QWORD[((-8))+r15*8+rcx] 606 adc rdx,0 607 add r11,QWORD[((-8))+r15*8+rsp] 608 adc rdx,0 609 lea r14,[1+r14] 610 mov r10,rdx 611 612 mul rbp 613 add rdi,rax 614 mov rax,QWORD[rsi] 615 adc rdx,0 616 add rdi,r11 617 adc rdx,0 618 mov QWORD[((-16))+r15*8+rsp],rdi 619 mov r13,rdx 620 621 xor rdi,rdi 622 add r13,r10 623 adc rdi,0 624 add r13,QWORD[r9*8+rsp] 625 adc rdi,0 626 mov QWORD[((-8))+r15*8+rsp],r13 627 mov QWORD[r15*8+rsp],rdi 628 629 cmp r14,r9 630 jb NEAR $L$outer4x 631 mov rdi,QWORD[16+r9*8+rsp] 632 lea r15,[((-4))+r9] 633 mov rax,QWORD[rsp] 634 pxor xmm0,xmm0 635 mov rdx,QWORD[8+rsp] 636 shr r15,2 637 lea rsi,[rsp] 638 xor r14,r14 639 640 sub rax,QWORD[rcx] 641 mov rbx,QWORD[16+rsi] 642 mov rbp,QWORD[24+rsi] 643 sbb rdx,QWORD[8+rcx] 644 jmp NEAR $L$sub4x 645ALIGN 16 646$L$sub4x: 647 mov QWORD[r14*8+rdi],rax 648 mov QWORD[8+r14*8+rdi],rdx 649 sbb rbx,QWORD[16+r14*8+rcx] 650 mov rax,QWORD[32+r14*8+rsi] 651 mov rdx,QWORD[40+r14*8+rsi] 652 sbb rbp,QWORD[24+r14*8+rcx] 653 mov QWORD[16+r14*8+rdi],rbx 654 mov QWORD[24+r14*8+rdi],rbp 655 sbb rax,QWORD[32+r14*8+rcx] 656 mov rbx,QWORD[48+r14*8+rsi] 657 mov rbp,QWORD[56+r14*8+rsi] 658 sbb rdx,QWORD[40+r14*8+rcx] 659 lea r14,[4+r14] 660 dec r15 661 jnz NEAR $L$sub4x 662 663 mov QWORD[r14*8+rdi],rax 664 mov rax,QWORD[32+r14*8+rsi] 665 sbb rbx,QWORD[16+r14*8+rcx] 666 mov QWORD[8+r14*8+rdi],rdx 667 sbb rbp,QWORD[24+r14*8+rcx] 668 mov QWORD[16+r14*8+rdi],rbx 669 670 sbb rax,0 671 mov QWORD[24+r14*8+rdi],rbp 672 xor r14,r14 673 and rsi,rax 674 not rax 675 mov rcx,rdi 676 and rcx,rax 677 lea r15,[((-4))+r9] 678 or rsi,rcx 679 shr r15,2 680 681 movdqu xmm1,XMMWORD[rsi] 682 movdqa XMMWORD[rsp],xmm0 683 movdqu XMMWORD[rdi],xmm1 684 jmp NEAR $L$copy4x 685ALIGN 16 686$L$copy4x: 687 movdqu xmm2,XMMWORD[16+r14*1+rsi] 688 movdqu xmm1,XMMWORD[32+r14*1+rsi] 689 movdqa XMMWORD[16+r14*1+rsp],xmm0 690 movdqu XMMWORD[16+r14*1+rdi],xmm2 691 movdqa XMMWORD[32+r14*1+rsp],xmm0 692 movdqu XMMWORD[32+r14*1+rdi],xmm1 693 lea r14,[32+r14] 694 dec r15 695 jnz NEAR $L$copy4x 696 697 movdqu xmm2,XMMWORD[16+r14*1+rsi] 698 movdqa XMMWORD[16+r14*1+rsp],xmm0 699 movdqu XMMWORD[16+r14*1+rdi],xmm2 700 mov rsi,QWORD[8+r9*8+rsp] 701 702 mov rax,1 703 mov r15,QWORD[((-48))+rsi] 704 705 mov r14,QWORD[((-40))+rsi] 706 707 mov r13,QWORD[((-32))+rsi] 708 709 mov r12,QWORD[((-24))+rsi] 710 711 mov rbp,QWORD[((-16))+rsi] 712 713 mov rbx,QWORD[((-8))+rsi] 714 715 lea rsp,[rsi] 716 717$L$mul4x_epilogue: 718 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 719 mov rsi,QWORD[16+rsp] 720 DB 0F3h,0C3h ;repret 721 722$L$SEH_end_bn_mul4x_mont: 723EXTERN bn_sqr8x_internal 724 725 726ALIGN 32 727bn_sqr8x_mont: 728 mov QWORD[8+rsp],rdi ;WIN64 prologue 729 mov QWORD[16+rsp],rsi 730 mov rax,rsp 731$L$SEH_begin_bn_sqr8x_mont: 732 mov rdi,rcx 733 mov rsi,rdx 734 mov rdx,r8 735 mov rcx,r9 736 mov r8,QWORD[40+rsp] 737 mov r9,QWORD[48+rsp] 738 739 740 741 mov rax,rsp 742 743$L$sqr8x_enter: 744 push rbx 745 746 push rbp 747 748 push r12 749 750 push r13 751 752 push r14 753 754 push r15 755 756$L$sqr8x_prologue: 757 758 mov r10d,r9d 759 shl r9d,3 760 shl r10,3+2 761 neg r9 762 763 764 765 766 767 768 lea r11,[((-64))+r9*2+rsp] 769 mov rbp,rsp 770 mov r8,QWORD[r8] 771 sub r11,rsi 772 and r11,4095 773 cmp r10,r11 774 jb NEAR $L$sqr8x_sp_alt 775 sub rbp,r11 776 lea rbp,[((-64))+r9*2+rbp] 777 jmp NEAR $L$sqr8x_sp_done 778 779ALIGN 32 780$L$sqr8x_sp_alt: 781 lea r10,[((4096-64))+r9*2] 782 lea rbp,[((-64))+r9*2+rbp] 783 sub r11,r10 784 mov r10,0 785 cmovc r11,r10 786 sub rbp,r11 787$L$sqr8x_sp_done: 788 and rbp,-64 789 mov r11,rsp 790 sub r11,rbp 791 and r11,-4096 792 lea rsp,[rbp*1+r11] 793 mov r10,QWORD[rsp] 794 cmp rsp,rbp 795 ja NEAR $L$sqr8x_page_walk 796 jmp NEAR $L$sqr8x_page_walk_done 797 798ALIGN 16 799$L$sqr8x_page_walk: 800 lea rsp,[((-4096))+rsp] 801 mov r10,QWORD[rsp] 802 cmp rsp,rbp 803 ja NEAR $L$sqr8x_page_walk 804$L$sqr8x_page_walk_done: 805 806 mov r10,r9 807 neg r9 808 809 mov QWORD[32+rsp],r8 810 mov QWORD[40+rsp],rax 811 812$L$sqr8x_body: 813 814DB 102,72,15,110,209 815 pxor xmm0,xmm0 816DB 102,72,15,110,207 817DB 102,73,15,110,218 818 call bn_sqr8x_internal 819 820 821 822 823 lea rbx,[r9*1+rdi] 824 mov rcx,r9 825 mov rdx,r9 826DB 102,72,15,126,207 827 sar rcx,3+2 828 jmp NEAR $L$sqr8x_sub 829 830ALIGN 32 831$L$sqr8x_sub: 832 mov r12,QWORD[rbx] 833 mov r13,QWORD[8+rbx] 834 mov r14,QWORD[16+rbx] 835 mov r15,QWORD[24+rbx] 836 lea rbx,[32+rbx] 837 sbb r12,QWORD[rbp] 838 sbb r13,QWORD[8+rbp] 839 sbb r14,QWORD[16+rbp] 840 sbb r15,QWORD[24+rbp] 841 lea rbp,[32+rbp] 842 mov QWORD[rdi],r12 843 mov QWORD[8+rdi],r13 844 mov QWORD[16+rdi],r14 845 mov QWORD[24+rdi],r15 846 lea rdi,[32+rdi] 847 inc rcx 848 jnz NEAR $L$sqr8x_sub 849 850 sbb rax,0 851 lea rbx,[r9*1+rbx] 852 lea rdi,[r9*1+rdi] 853 854DB 102,72,15,110,200 855 pxor xmm0,xmm0 856 pshufd xmm1,xmm1,0 857 mov rsi,QWORD[40+rsp] 858 859 jmp NEAR $L$sqr8x_cond_copy 860 861ALIGN 32 862$L$sqr8x_cond_copy: 863 movdqa xmm2,XMMWORD[rbx] 864 movdqa xmm3,XMMWORD[16+rbx] 865 lea rbx,[32+rbx] 866 movdqu xmm4,XMMWORD[rdi] 867 movdqu xmm5,XMMWORD[16+rdi] 868 lea rdi,[32+rdi] 869 movdqa XMMWORD[(-32)+rbx],xmm0 870 movdqa XMMWORD[(-16)+rbx],xmm0 871 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 872 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 873 pcmpeqd xmm0,xmm1 874 pand xmm2,xmm1 875 pand xmm3,xmm1 876 pand xmm4,xmm0 877 pand xmm5,xmm0 878 pxor xmm0,xmm0 879 por xmm4,xmm2 880 por xmm5,xmm3 881 movdqu XMMWORD[(-32)+rdi],xmm4 882 movdqu XMMWORD[(-16)+rdi],xmm5 883 add r9,32 884 jnz NEAR $L$sqr8x_cond_copy 885 886 mov rax,1 887 mov r15,QWORD[((-48))+rsi] 888 889 mov r14,QWORD[((-40))+rsi] 890 891 mov r13,QWORD[((-32))+rsi] 892 893 mov r12,QWORD[((-24))+rsi] 894 895 mov rbp,QWORD[((-16))+rsi] 896 897 mov rbx,QWORD[((-8))+rsi] 898 899 lea rsp,[rsi] 900 901$L$sqr8x_epilogue: 902 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 903 mov rsi,QWORD[16+rsp] 904 DB 0F3h,0C3h ;repret 905 906$L$SEH_end_bn_sqr8x_mont: 907DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 908DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 909DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 910DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 911DB 115,108,46,111,114,103,62,0 912ALIGN 16 913EXTERN __imp_RtlVirtualUnwind 914 915ALIGN 16 916mul_handler: 917 push rsi 918 push rdi 919 push rbx 920 push rbp 921 push r12 922 push r13 923 push r14 924 push r15 925 pushfq 926 sub rsp,64 927 928 mov rax,QWORD[120+r8] 929 mov rbx,QWORD[248+r8] 930 931 mov rsi,QWORD[8+r9] 932 mov r11,QWORD[56+r9] 933 934 mov r10d,DWORD[r11] 935 lea r10,[r10*1+rsi] 936 cmp rbx,r10 937 jb NEAR $L$common_seh_tail 938 939 mov rax,QWORD[152+r8] 940 941 mov r10d,DWORD[4+r11] 942 lea r10,[r10*1+rsi] 943 cmp rbx,r10 944 jae NEAR $L$common_seh_tail 945 946 mov r10,QWORD[192+r8] 947 mov rax,QWORD[8+r10*8+rax] 948 949 jmp NEAR $L$common_pop_regs 950 951 952 953ALIGN 16 954sqr_handler: 955 push rsi 956 push rdi 957 push rbx 958 push rbp 959 push r12 960 push r13 961 push r14 962 push r15 963 pushfq 964 sub rsp,64 965 966 mov rax,QWORD[120+r8] 967 mov rbx,QWORD[248+r8] 968 969 mov rsi,QWORD[8+r9] 970 mov r11,QWORD[56+r9] 971 972 mov r10d,DWORD[r11] 973 lea r10,[r10*1+rsi] 974 cmp rbx,r10 975 jb NEAR $L$common_seh_tail 976 977 mov r10d,DWORD[4+r11] 978 lea r10,[r10*1+rsi] 979 cmp rbx,r10 980 jb NEAR $L$common_pop_regs 981 982 mov rax,QWORD[152+r8] 983 984 mov r10d,DWORD[8+r11] 985 lea r10,[r10*1+rsi] 986 cmp rbx,r10 987 jae NEAR $L$common_seh_tail 988 989 mov rax,QWORD[40+rax] 990 991$L$common_pop_regs: 992 mov rbx,QWORD[((-8))+rax] 993 mov rbp,QWORD[((-16))+rax] 994 mov r12,QWORD[((-24))+rax] 995 mov r13,QWORD[((-32))+rax] 996 mov r14,QWORD[((-40))+rax] 997 mov r15,QWORD[((-48))+rax] 998 mov QWORD[144+r8],rbx 999 mov QWORD[160+r8],rbp 1000 mov QWORD[216+r8],r12 1001 mov QWORD[224+r8],r13 1002 mov QWORD[232+r8],r14 1003 mov QWORD[240+r8],r15 1004 1005$L$common_seh_tail: 1006 mov rdi,QWORD[8+rax] 1007 mov rsi,QWORD[16+rax] 1008 mov QWORD[152+r8],rax 1009 mov QWORD[168+r8],rsi 1010 mov QWORD[176+r8],rdi 1011 1012 mov rdi,QWORD[40+r9] 1013 mov rsi,r8 1014 mov ecx,154 1015 DD 0xa548f3fc 1016 1017 mov rsi,r9 1018 xor rcx,rcx 1019 mov rdx,QWORD[8+rsi] 1020 mov r8,QWORD[rsi] 1021 mov r9,QWORD[16+rsi] 1022 mov r10,QWORD[40+rsi] 1023 lea r11,[56+rsi] 1024 lea r12,[24+rsi] 1025 mov QWORD[32+rsp],r10 1026 mov QWORD[40+rsp],r11 1027 mov QWORD[48+rsp],r12 1028 mov QWORD[56+rsp],rcx 1029 call QWORD[__imp_RtlVirtualUnwind] 1030 1031 mov eax,1 1032 add rsp,64 1033 popfq 1034 pop r15 1035 pop r14 1036 pop r13 1037 pop r12 1038 pop rbp 1039 pop rbx 1040 pop rdi 1041 pop rsi 1042 DB 0F3h,0C3h ;repret 1043 1044 1045section .pdata rdata align=4 1046ALIGN 4 1047 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase 1048 DD $L$SEH_end_bn_mul_mont wrt ..imagebase 1049 DD $L$SEH_info_bn_mul_mont wrt ..imagebase 1050 1051 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase 1052 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase 1053 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase 1054 1055 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase 1056 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1057 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1058section .xdata rdata align=8 1059ALIGN 8 1060$L$SEH_info_bn_mul_mont: 1061DB 9,0,0,0 1062 DD mul_handler wrt ..imagebase 1063 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 1064$L$SEH_info_bn_mul4x_mont: 1065DB 9,0,0,0 1066 DD mul_handler wrt ..imagebase 1067 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 1068$L$SEH_info_bn_sqr8x_mont: 1069DB 9,0,0,0 1070 DD sqr_handler wrt ..imagebase 1071 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1072ALIGN 8 1073