1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%include "ring_core_generated/prefix_symbols_nasm.inc" 10section .text code align=64 11 12 13EXTERN OPENSSL_ia32cap_P 14 15global bn_mul_mont_gather5 16 17ALIGN 64 18bn_mul_mont_gather5: 19 mov QWORD[8+rsp],rdi ;WIN64 prologue 20 mov QWORD[16+rsp],rsi 21 mov rax,rsp 22$L$SEH_begin_bn_mul_mont_gather5: 23 mov rdi,rcx 24 mov rsi,rdx 25 mov rdx,r8 26 mov rcx,r9 27 mov r8,QWORD[40+rsp] 28 mov r9,QWORD[48+rsp] 29 30 31 32 mov r9d,r9d 33 mov rax,rsp 34 35 test r9d,7 36 jnz NEAR $L$mul_enter 37 lea r11,[OPENSSL_ia32cap_P] 38 mov r11d,DWORD[8+r11] 39 jmp NEAR $L$mul4x_enter 40 41ALIGN 16 42$L$mul_enter: 43 movd xmm5,DWORD[56+rsp] 44 push rbx 45 46 push rbp 47 48 push r12 49 50 push r13 51 52 push r14 53 54 push r15 55 56 57 neg r9 58 mov r11,rsp 59 lea r10,[((-280))+r9*8+rsp] 60 neg r9 61 and r10,-1024 62 63 64 65 66 67 68 69 70 71 sub r11,r10 72 and r11,-4096 73 lea rsp,[r11*1+r10] 74 mov r11,QWORD[rsp] 75 cmp rsp,r10 76 ja NEAR $L$mul_page_walk 77 jmp NEAR $L$mul_page_walk_done 78 79$L$mul_page_walk: 80 lea rsp,[((-4096))+rsp] 81 mov r11,QWORD[rsp] 82 cmp rsp,r10 83 ja NEAR $L$mul_page_walk 84$L$mul_page_walk_done: 85 86 lea r10,[$L$inc] 87 mov QWORD[8+r9*8+rsp],rax 88 89$L$mul_body: 90 91 lea r12,[128+rdx] 92 movdqa xmm0,XMMWORD[r10] 93 movdqa xmm1,XMMWORD[16+r10] 94 lea r10,[((24-112))+r9*8+rsp] 95 and r10,-16 96 97 pshufd xmm5,xmm5,0 98 movdqa xmm4,xmm1 99 movdqa xmm2,xmm1 100 paddd xmm1,xmm0 101 pcmpeqd xmm0,xmm5 102DB 0x67 103 movdqa xmm3,xmm4 104 paddd xmm2,xmm1 105 pcmpeqd xmm1,xmm5 106 movdqa XMMWORD[112+r10],xmm0 107 movdqa xmm0,xmm4 108 109 paddd xmm3,xmm2 110 pcmpeqd xmm2,xmm5 111 movdqa XMMWORD[128+r10],xmm1 112 movdqa xmm1,xmm4 113 114 paddd xmm0,xmm3 115 pcmpeqd xmm3,xmm5 116 movdqa XMMWORD[144+r10],xmm2 117 movdqa xmm2,xmm4 118 119 paddd xmm1,xmm0 120 pcmpeqd xmm0,xmm5 121 movdqa XMMWORD[160+r10],xmm3 122 movdqa xmm3,xmm4 123 paddd xmm2,xmm1 124 pcmpeqd xmm1,xmm5 125 movdqa XMMWORD[176+r10],xmm0 126 movdqa xmm0,xmm4 127 128 paddd xmm3,xmm2 129 pcmpeqd xmm2,xmm5 130 movdqa XMMWORD[192+r10],xmm1 131 movdqa xmm1,xmm4 132 133 paddd xmm0,xmm3 134 pcmpeqd xmm3,xmm5 135 movdqa XMMWORD[208+r10],xmm2 136 movdqa xmm2,xmm4 137 138 paddd xmm1,xmm0 139 pcmpeqd xmm0,xmm5 140 movdqa XMMWORD[224+r10],xmm3 141 movdqa xmm3,xmm4 142 paddd xmm2,xmm1 143 pcmpeqd xmm1,xmm5 144 movdqa XMMWORD[240+r10],xmm0 145 movdqa xmm0,xmm4 146 147 paddd xmm3,xmm2 148 pcmpeqd xmm2,xmm5 149 movdqa XMMWORD[256+r10],xmm1 150 movdqa xmm1,xmm4 151 152 paddd xmm0,xmm3 153 pcmpeqd xmm3,xmm5 154 movdqa XMMWORD[272+r10],xmm2 155 movdqa xmm2,xmm4 156 157 paddd xmm1,xmm0 158 pcmpeqd xmm0,xmm5 159 movdqa XMMWORD[288+r10],xmm3 160 movdqa xmm3,xmm4 161 paddd xmm2,xmm1 162 pcmpeqd xmm1,xmm5 163 movdqa XMMWORD[304+r10],xmm0 164 165 paddd xmm3,xmm2 166DB 0x67 167 pcmpeqd xmm2,xmm5 168 movdqa XMMWORD[320+r10],xmm1 169 170 pcmpeqd xmm3,xmm5 171 movdqa XMMWORD[336+r10],xmm2 172 pand xmm0,XMMWORD[64+r12] 173 174 pand xmm1,XMMWORD[80+r12] 175 pand xmm2,XMMWORD[96+r12] 176 movdqa XMMWORD[352+r10],xmm3 177 pand xmm3,XMMWORD[112+r12] 178 por xmm0,xmm2 179 por xmm1,xmm3 180 movdqa xmm4,XMMWORD[((-128))+r12] 181 movdqa xmm5,XMMWORD[((-112))+r12] 182 movdqa xmm2,XMMWORD[((-96))+r12] 183 pand xmm4,XMMWORD[112+r10] 184 movdqa xmm3,XMMWORD[((-80))+r12] 185 pand xmm5,XMMWORD[128+r10] 186 por xmm0,xmm4 187 pand xmm2,XMMWORD[144+r10] 188 por xmm1,xmm5 189 pand xmm3,XMMWORD[160+r10] 190 por xmm0,xmm2 191 por xmm1,xmm3 192 movdqa xmm4,XMMWORD[((-64))+r12] 193 movdqa xmm5,XMMWORD[((-48))+r12] 194 movdqa xmm2,XMMWORD[((-32))+r12] 195 pand xmm4,XMMWORD[176+r10] 196 movdqa xmm3,XMMWORD[((-16))+r12] 197 pand xmm5,XMMWORD[192+r10] 198 por xmm0,xmm4 199 pand xmm2,XMMWORD[208+r10] 200 por xmm1,xmm5 201 pand xmm3,XMMWORD[224+r10] 202 por xmm0,xmm2 203 por xmm1,xmm3 204 movdqa xmm4,XMMWORD[r12] 205 movdqa xmm5,XMMWORD[16+r12] 206 movdqa xmm2,XMMWORD[32+r12] 207 pand xmm4,XMMWORD[240+r10] 208 movdqa xmm3,XMMWORD[48+r12] 209 pand xmm5,XMMWORD[256+r10] 210 por xmm0,xmm4 211 pand xmm2,XMMWORD[272+r10] 212 por xmm1,xmm5 213 pand xmm3,XMMWORD[288+r10] 214 por xmm0,xmm2 215 por xmm1,xmm3 216 por xmm0,xmm1 217 pshufd xmm1,xmm0,0x4e 218 por xmm0,xmm1 219 lea r12,[256+r12] 220DB 102,72,15,126,195 221 222 mov r8,QWORD[r8] 223 mov rax,QWORD[rsi] 224 225 xor r14,r14 226 xor r15,r15 227 228 mov rbp,r8 229 mul rbx 230 mov r10,rax 231 mov rax,QWORD[rcx] 232 233 imul rbp,r10 234 mov r11,rdx 235 236 mul rbp 237 add r10,rax 238 mov rax,QWORD[8+rsi] 239 adc rdx,0 240 mov r13,rdx 241 242 lea r15,[1+r15] 243 jmp NEAR $L$1st_enter 244 245ALIGN 16 246$L$1st: 247 add r13,rax 248 mov rax,QWORD[r15*8+rsi] 249 adc rdx,0 250 add r13,r11 251 mov r11,r10 252 adc rdx,0 253 mov QWORD[((-16))+r15*8+rsp],r13 254 mov r13,rdx 255 256$L$1st_enter: 257 mul rbx 258 add r11,rax 259 mov rax,QWORD[r15*8+rcx] 260 adc rdx,0 261 lea r15,[1+r15] 262 mov r10,rdx 263 264 mul rbp 265 cmp r15,r9 266 jne NEAR $L$1st 267 268 269 add r13,rax 270 adc rdx,0 271 add r13,r11 272 adc rdx,0 273 mov QWORD[((-16))+r9*8+rsp],r13 274 mov r13,rdx 275 mov r11,r10 276 277 xor rdx,rdx 278 add r13,r11 279 adc rdx,0 280 mov QWORD[((-8))+r9*8+rsp],r13 281 mov QWORD[r9*8+rsp],rdx 282 283 lea r14,[1+r14] 284 jmp NEAR $L$outer 285ALIGN 16 286$L$outer: 287 lea rdx,[((24+128))+r9*8+rsp] 288 and rdx,-16 289 pxor xmm4,xmm4 290 pxor xmm5,xmm5 291 movdqa xmm0,XMMWORD[((-128))+r12] 292 movdqa xmm1,XMMWORD[((-112))+r12] 293 movdqa xmm2,XMMWORD[((-96))+r12] 294 movdqa xmm3,XMMWORD[((-80))+r12] 295 pand xmm0,XMMWORD[((-128))+rdx] 296 pand xmm1,XMMWORD[((-112))+rdx] 297 por xmm4,xmm0 298 pand xmm2,XMMWORD[((-96))+rdx] 299 por xmm5,xmm1 300 pand xmm3,XMMWORD[((-80))+rdx] 301 por xmm4,xmm2 302 por xmm5,xmm3 303 movdqa xmm0,XMMWORD[((-64))+r12] 304 movdqa xmm1,XMMWORD[((-48))+r12] 305 movdqa xmm2,XMMWORD[((-32))+r12] 306 movdqa xmm3,XMMWORD[((-16))+r12] 307 pand xmm0,XMMWORD[((-64))+rdx] 308 pand xmm1,XMMWORD[((-48))+rdx] 309 por xmm4,xmm0 310 pand xmm2,XMMWORD[((-32))+rdx] 311 por xmm5,xmm1 312 pand xmm3,XMMWORD[((-16))+rdx] 313 por xmm4,xmm2 314 por xmm5,xmm3 315 movdqa xmm0,XMMWORD[r12] 316 movdqa xmm1,XMMWORD[16+r12] 317 movdqa xmm2,XMMWORD[32+r12] 318 movdqa xmm3,XMMWORD[48+r12] 319 pand xmm0,XMMWORD[rdx] 320 pand xmm1,XMMWORD[16+rdx] 321 por xmm4,xmm0 322 pand xmm2,XMMWORD[32+rdx] 323 por xmm5,xmm1 324 pand xmm3,XMMWORD[48+rdx] 325 por xmm4,xmm2 326 por xmm5,xmm3 327 movdqa xmm0,XMMWORD[64+r12] 328 movdqa xmm1,XMMWORD[80+r12] 329 movdqa xmm2,XMMWORD[96+r12] 330 movdqa xmm3,XMMWORD[112+r12] 331 pand xmm0,XMMWORD[64+rdx] 332 pand xmm1,XMMWORD[80+rdx] 333 por xmm4,xmm0 334 pand xmm2,XMMWORD[96+rdx] 335 por xmm5,xmm1 336 pand xmm3,XMMWORD[112+rdx] 337 por xmm4,xmm2 338 por xmm5,xmm3 339 por xmm4,xmm5 340 pshufd xmm0,xmm4,0x4e 341 por xmm0,xmm4 342 lea r12,[256+r12] 343 344 mov rax,QWORD[rsi] 345DB 102,72,15,126,195 346 347 xor r15,r15 348 mov rbp,r8 349 mov r10,QWORD[rsp] 350 351 mul rbx 352 add r10,rax 353 mov rax,QWORD[rcx] 354 adc rdx,0 355 356 imul rbp,r10 357 mov r11,rdx 358 359 mul rbp 360 add r10,rax 361 mov rax,QWORD[8+rsi] 362 adc rdx,0 363 mov r10,QWORD[8+rsp] 364 mov r13,rdx 365 366 lea r15,[1+r15] 367 jmp NEAR $L$inner_enter 368 369ALIGN 16 370$L$inner: 371 add r13,rax 372 mov rax,QWORD[r15*8+rsi] 373 adc rdx,0 374 add r13,r10 375 mov r10,QWORD[r15*8+rsp] 376 adc rdx,0 377 mov QWORD[((-16))+r15*8+rsp],r13 378 mov r13,rdx 379 380$L$inner_enter: 381 mul rbx 382 add r11,rax 383 mov rax,QWORD[r15*8+rcx] 384 adc rdx,0 385 add r10,r11 386 mov r11,rdx 387 adc r11,0 388 lea r15,[1+r15] 389 390 mul rbp 391 cmp r15,r9 392 jne NEAR $L$inner 393 394 add r13,rax 395 adc rdx,0 396 add r13,r10 397 mov r10,QWORD[r9*8+rsp] 398 adc rdx,0 399 mov QWORD[((-16))+r9*8+rsp],r13 400 mov r13,rdx 401 402 xor rdx,rdx 403 add r13,r11 404 adc rdx,0 405 add r13,r10 406 adc rdx,0 407 mov QWORD[((-8))+r9*8+rsp],r13 408 mov QWORD[r9*8+rsp],rdx 409 410 lea r14,[1+r14] 411 cmp r14,r9 412 jb NEAR $L$outer 413 414 xor r14,r14 415 mov rax,QWORD[rsp] 416 lea rsi,[rsp] 417 mov r15,r9 418 jmp NEAR $L$sub 419ALIGN 16 420$L$sub: sbb rax,QWORD[r14*8+rcx] 421 mov QWORD[r14*8+rdi],rax 422 mov rax,QWORD[8+r14*8+rsi] 423 lea r14,[1+r14] 424 dec r15 425 jnz NEAR $L$sub 426 427 sbb rax,0 428 mov rbx,-1 429 xor rbx,rax 430 xor r14,r14 431 mov r15,r9 432 433$L$copy: 434 mov rcx,QWORD[r14*8+rdi] 435 mov rdx,QWORD[r14*8+rsp] 436 and rcx,rbx 437 and rdx,rax 438 mov QWORD[r14*8+rsp],r14 439 or rdx,rcx 440 mov QWORD[r14*8+rdi],rdx 441 lea r14,[1+r14] 442 sub r15,1 443 jnz NEAR $L$copy 444 445 mov rsi,QWORD[8+r9*8+rsp] 446 447 mov rax,1 448 449 mov r15,QWORD[((-48))+rsi] 450 451 mov r14,QWORD[((-40))+rsi] 452 453 mov r13,QWORD[((-32))+rsi] 454 455 mov r12,QWORD[((-24))+rsi] 456 457 mov rbp,QWORD[((-16))+rsi] 458 459 mov rbx,QWORD[((-8))+rsi] 460 461 lea rsp,[rsi] 462 463$L$mul_epilogue: 464 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 465 mov rsi,QWORD[16+rsp] 466 DB 0F3h,0C3h ;repret 467 468$L$SEH_end_bn_mul_mont_gather5: 469 470ALIGN 32 471bn_mul4x_mont_gather5: 472 mov QWORD[8+rsp],rdi ;WIN64 prologue 473 mov QWORD[16+rsp],rsi 474 mov rax,rsp 475$L$SEH_begin_bn_mul4x_mont_gather5: 476 mov rdi,rcx 477 mov rsi,rdx 478 mov rdx,r8 479 mov rcx,r9 480 mov r8,QWORD[40+rsp] 481 mov r9,QWORD[48+rsp] 482 483 484 485DB 0x67 486 mov rax,rsp 487 488$L$mul4x_enter: 489 and r11d,0x80108 490 cmp r11d,0x80108 491 je NEAR $L$mulx4x_enter 492 push rbx 493 494 push rbp 495 496 push r12 497 498 push r13 499 500 push r14 501 502 push r15 503 504$L$mul4x_prologue: 505 506DB 0x67 507 shl r9d,3 508 lea r10,[r9*2+r9] 509 neg r9 510 511 512 513 514 515 516 517 518 519 520 lea r11,[((-320))+r9*2+rsp] 521 mov rbp,rsp 522 sub r11,rdi 523 and r11,4095 524 cmp r10,r11 525 jb NEAR $L$mul4xsp_alt 526 sub rbp,r11 527 lea rbp,[((-320))+r9*2+rbp] 528 jmp NEAR $L$mul4xsp_done 529 530ALIGN 32 531$L$mul4xsp_alt: 532 lea r10,[((4096-320))+r9*2] 533 lea rbp,[((-320))+r9*2+rbp] 534 sub r11,r10 535 mov r10,0 536 cmovc r11,r10 537 sub rbp,r11 538$L$mul4xsp_done: 539 and rbp,-64 540 mov r11,rsp 541 sub r11,rbp 542 and r11,-4096 543 lea rsp,[rbp*1+r11] 544 mov r10,QWORD[rsp] 545 cmp rsp,rbp 546 ja NEAR $L$mul4x_page_walk 547 jmp NEAR $L$mul4x_page_walk_done 548 549$L$mul4x_page_walk: 550 lea rsp,[((-4096))+rsp] 551 mov r10,QWORD[rsp] 552 cmp rsp,rbp 553 ja NEAR $L$mul4x_page_walk 554$L$mul4x_page_walk_done: 555 556 neg r9 557 558 mov QWORD[40+rsp],rax 559 560$L$mul4x_body: 561 562 call mul4x_internal 563 564 mov rsi,QWORD[40+rsp] 565 566 mov rax,1 567 568 mov r15,QWORD[((-48))+rsi] 569 570 mov r14,QWORD[((-40))+rsi] 571 572 mov r13,QWORD[((-32))+rsi] 573 574 mov r12,QWORD[((-24))+rsi] 575 576 mov rbp,QWORD[((-16))+rsi] 577 578 mov rbx,QWORD[((-8))+rsi] 579 580 lea rsp,[rsi] 581 582$L$mul4x_epilogue: 583 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 584 mov rsi,QWORD[16+rsp] 585 DB 0F3h,0C3h ;repret 586 587$L$SEH_end_bn_mul4x_mont_gather5: 588 589 590ALIGN 32 591mul4x_internal: 592 593 shl r9,5 594 movd xmm5,DWORD[56+rax] 595 lea rax,[$L$inc] 596 lea r13,[128+r9*1+rdx] 597 shr r9,5 598 movdqa xmm0,XMMWORD[rax] 599 movdqa xmm1,XMMWORD[16+rax] 600 lea r10,[((88-112))+r9*1+rsp] 601 lea r12,[128+rdx] 602 603 pshufd xmm5,xmm5,0 604 movdqa xmm4,xmm1 605DB 0x67,0x67 606 movdqa xmm2,xmm1 607 paddd xmm1,xmm0 608 pcmpeqd xmm0,xmm5 609DB 0x67 610 movdqa xmm3,xmm4 611 paddd xmm2,xmm1 612 pcmpeqd xmm1,xmm5 613 movdqa XMMWORD[112+r10],xmm0 614 movdqa xmm0,xmm4 615 616 paddd xmm3,xmm2 617 pcmpeqd xmm2,xmm5 618 movdqa XMMWORD[128+r10],xmm1 619 movdqa xmm1,xmm4 620 621 paddd xmm0,xmm3 622 pcmpeqd xmm3,xmm5 623 movdqa XMMWORD[144+r10],xmm2 624 movdqa xmm2,xmm4 625 626 paddd xmm1,xmm0 627 pcmpeqd xmm0,xmm5 628 movdqa XMMWORD[160+r10],xmm3 629 movdqa xmm3,xmm4 630 paddd xmm2,xmm1 631 pcmpeqd xmm1,xmm5 632 movdqa XMMWORD[176+r10],xmm0 633 movdqa xmm0,xmm4 634 635 paddd xmm3,xmm2 636 pcmpeqd xmm2,xmm5 637 movdqa XMMWORD[192+r10],xmm1 638 movdqa xmm1,xmm4 639 640 paddd xmm0,xmm3 641 pcmpeqd xmm3,xmm5 642 movdqa XMMWORD[208+r10],xmm2 643 movdqa xmm2,xmm4 644 645 paddd xmm1,xmm0 646 pcmpeqd xmm0,xmm5 647 movdqa XMMWORD[224+r10],xmm3 648 movdqa xmm3,xmm4 649 paddd xmm2,xmm1 650 pcmpeqd xmm1,xmm5 651 movdqa XMMWORD[240+r10],xmm0 652 movdqa xmm0,xmm4 653 654 paddd xmm3,xmm2 655 pcmpeqd xmm2,xmm5 656 movdqa XMMWORD[256+r10],xmm1 657 movdqa xmm1,xmm4 658 659 paddd xmm0,xmm3 660 pcmpeqd xmm3,xmm5 661 movdqa XMMWORD[272+r10],xmm2 662 movdqa xmm2,xmm4 663 664 paddd xmm1,xmm0 665 pcmpeqd xmm0,xmm5 666 movdqa XMMWORD[288+r10],xmm3 667 movdqa xmm3,xmm4 668 paddd xmm2,xmm1 669 pcmpeqd xmm1,xmm5 670 movdqa XMMWORD[304+r10],xmm0 671 672 paddd xmm3,xmm2 673DB 0x67 674 pcmpeqd xmm2,xmm5 675 movdqa XMMWORD[320+r10],xmm1 676 677 pcmpeqd xmm3,xmm5 678 movdqa XMMWORD[336+r10],xmm2 679 pand xmm0,XMMWORD[64+r12] 680 681 pand xmm1,XMMWORD[80+r12] 682 pand xmm2,XMMWORD[96+r12] 683 movdqa XMMWORD[352+r10],xmm3 684 pand xmm3,XMMWORD[112+r12] 685 por xmm0,xmm2 686 por xmm1,xmm3 687 movdqa xmm4,XMMWORD[((-128))+r12] 688 movdqa xmm5,XMMWORD[((-112))+r12] 689 movdqa xmm2,XMMWORD[((-96))+r12] 690 pand xmm4,XMMWORD[112+r10] 691 movdqa xmm3,XMMWORD[((-80))+r12] 692 pand xmm5,XMMWORD[128+r10] 693 por xmm0,xmm4 694 pand xmm2,XMMWORD[144+r10] 695 por xmm1,xmm5 696 pand xmm3,XMMWORD[160+r10] 697 por xmm0,xmm2 698 por xmm1,xmm3 699 movdqa xmm4,XMMWORD[((-64))+r12] 700 movdqa xmm5,XMMWORD[((-48))+r12] 701 movdqa xmm2,XMMWORD[((-32))+r12] 702 pand xmm4,XMMWORD[176+r10] 703 movdqa xmm3,XMMWORD[((-16))+r12] 704 pand xmm5,XMMWORD[192+r10] 705 por xmm0,xmm4 706 pand xmm2,XMMWORD[208+r10] 707 por xmm1,xmm5 708 pand xmm3,XMMWORD[224+r10] 709 por xmm0,xmm2 710 por xmm1,xmm3 711 movdqa xmm4,XMMWORD[r12] 712 movdqa xmm5,XMMWORD[16+r12] 713 movdqa xmm2,XMMWORD[32+r12] 714 pand xmm4,XMMWORD[240+r10] 715 movdqa xmm3,XMMWORD[48+r12] 716 pand xmm5,XMMWORD[256+r10] 717 por xmm0,xmm4 718 pand xmm2,XMMWORD[272+r10] 719 por xmm1,xmm5 720 pand xmm3,XMMWORD[288+r10] 721 por xmm0,xmm2 722 por xmm1,xmm3 723 por xmm0,xmm1 724 pshufd xmm1,xmm0,0x4e 725 por xmm0,xmm1 726 lea r12,[256+r12] 727DB 102,72,15,126,195 728 729 mov QWORD[((16+8))+rsp],r13 730 mov QWORD[((56+8))+rsp],rdi 731 732 mov r8,QWORD[r8] 733 mov rax,QWORD[rsi] 734 lea rsi,[r9*1+rsi] 735 neg r9 736 737 mov rbp,r8 738 mul rbx 739 mov r10,rax 740 mov rax,QWORD[rcx] 741 742 imul rbp,r10 743 lea r14,[((64+8))+rsp] 744 mov r11,rdx 745 746 mul rbp 747 add r10,rax 748 mov rax,QWORD[8+r9*1+rsi] 749 adc rdx,0 750 mov rdi,rdx 751 752 mul rbx 753 add r11,rax 754 mov rax,QWORD[8+rcx] 755 adc rdx,0 756 mov r10,rdx 757 758 mul rbp 759 add rdi,rax 760 mov rax,QWORD[16+r9*1+rsi] 761 adc rdx,0 762 add rdi,r11 763 lea r15,[32+r9] 764 lea rcx,[32+rcx] 765 adc rdx,0 766 mov QWORD[r14],rdi 767 mov r13,rdx 768 jmp NEAR $L$1st4x 769 770ALIGN 32 771$L$1st4x: 772 mul rbx 773 add r10,rax 774 mov rax,QWORD[((-16))+rcx] 775 lea r14,[32+r14] 776 adc rdx,0 777 mov r11,rdx 778 779 mul rbp 780 add r13,rax 781 mov rax,QWORD[((-8))+r15*1+rsi] 782 adc rdx,0 783 add r13,r10 784 adc rdx,0 785 mov QWORD[((-24))+r14],r13 786 mov rdi,rdx 787 788 mul rbx 789 add r11,rax 790 mov rax,QWORD[((-8))+rcx] 791 adc rdx,0 792 mov r10,rdx 793 794 mul rbp 795 add rdi,rax 796 mov rax,QWORD[r15*1+rsi] 797 adc rdx,0 798 add rdi,r11 799 adc rdx,0 800 mov QWORD[((-16))+r14],rdi 801 mov r13,rdx 802 803 mul rbx 804 add r10,rax 805 mov rax,QWORD[rcx] 806 adc rdx,0 807 mov r11,rdx 808 809 mul rbp 810 add r13,rax 811 mov rax,QWORD[8+r15*1+rsi] 812 adc rdx,0 813 add r13,r10 814 adc rdx,0 815 mov QWORD[((-8))+r14],r13 816 mov rdi,rdx 817 818 mul rbx 819 add r11,rax 820 mov rax,QWORD[8+rcx] 821 adc rdx,0 822 mov r10,rdx 823 824 mul rbp 825 add rdi,rax 826 mov rax,QWORD[16+r15*1+rsi] 827 adc rdx,0 828 add rdi,r11 829 lea rcx,[32+rcx] 830 adc rdx,0 831 mov QWORD[r14],rdi 832 mov r13,rdx 833 834 add r15,32 835 jnz NEAR $L$1st4x 836 837 mul rbx 838 add r10,rax 839 mov rax,QWORD[((-16))+rcx] 840 lea r14,[32+r14] 841 adc rdx,0 842 mov r11,rdx 843 844 mul rbp 845 add r13,rax 846 mov rax,QWORD[((-8))+rsi] 847 adc rdx,0 848 add r13,r10 849 adc rdx,0 850 mov QWORD[((-24))+r14],r13 851 mov rdi,rdx 852 853 mul rbx 854 add r11,rax 855 mov rax,QWORD[((-8))+rcx] 856 adc rdx,0 857 mov r10,rdx 858 859 mul rbp 860 add rdi,rax 861 mov rax,QWORD[r9*1+rsi] 862 adc rdx,0 863 add rdi,r11 864 adc rdx,0 865 mov QWORD[((-16))+r14],rdi 866 mov r13,rdx 867 868 lea rcx,[r9*1+rcx] 869 870 xor rdi,rdi 871 add r13,r10 872 adc rdi,0 873 mov QWORD[((-8))+r14],r13 874 875 jmp NEAR $L$outer4x 876 877ALIGN 32 878$L$outer4x: 879 lea rdx,[((16+128))+r14] 880 pxor xmm4,xmm4 881 pxor xmm5,xmm5 882 movdqa xmm0,XMMWORD[((-128))+r12] 883 movdqa xmm1,XMMWORD[((-112))+r12] 884 movdqa xmm2,XMMWORD[((-96))+r12] 885 movdqa xmm3,XMMWORD[((-80))+r12] 886 pand xmm0,XMMWORD[((-128))+rdx] 887 pand xmm1,XMMWORD[((-112))+rdx] 888 por xmm4,xmm0 889 pand xmm2,XMMWORD[((-96))+rdx] 890 por xmm5,xmm1 891 pand xmm3,XMMWORD[((-80))+rdx] 892 por xmm4,xmm2 893 por xmm5,xmm3 894 movdqa xmm0,XMMWORD[((-64))+r12] 895 movdqa xmm1,XMMWORD[((-48))+r12] 896 movdqa xmm2,XMMWORD[((-32))+r12] 897 movdqa xmm3,XMMWORD[((-16))+r12] 898 pand xmm0,XMMWORD[((-64))+rdx] 899 pand xmm1,XMMWORD[((-48))+rdx] 900 por xmm4,xmm0 901 pand xmm2,XMMWORD[((-32))+rdx] 902 por xmm5,xmm1 903 pand xmm3,XMMWORD[((-16))+rdx] 904 por xmm4,xmm2 905 por xmm5,xmm3 906 movdqa xmm0,XMMWORD[r12] 907 movdqa xmm1,XMMWORD[16+r12] 908 movdqa xmm2,XMMWORD[32+r12] 909 movdqa xmm3,XMMWORD[48+r12] 910 pand xmm0,XMMWORD[rdx] 911 pand xmm1,XMMWORD[16+rdx] 912 por xmm4,xmm0 913 pand xmm2,XMMWORD[32+rdx] 914 por xmm5,xmm1 915 pand xmm3,XMMWORD[48+rdx] 916 por xmm4,xmm2 917 por xmm5,xmm3 918 movdqa xmm0,XMMWORD[64+r12] 919 movdqa xmm1,XMMWORD[80+r12] 920 movdqa xmm2,XMMWORD[96+r12] 921 movdqa xmm3,XMMWORD[112+r12] 922 pand xmm0,XMMWORD[64+rdx] 923 pand xmm1,XMMWORD[80+rdx] 924 por xmm4,xmm0 925 pand xmm2,XMMWORD[96+rdx] 926 por xmm5,xmm1 927 pand xmm3,XMMWORD[112+rdx] 928 por xmm4,xmm2 929 por xmm5,xmm3 930 por xmm4,xmm5 931 pshufd xmm0,xmm4,0x4e 932 por xmm0,xmm4 933 lea r12,[256+r12] 934DB 102,72,15,126,195 935 936 mov r10,QWORD[r9*1+r14] 937 mov rbp,r8 938 mul rbx 939 add r10,rax 940 mov rax,QWORD[rcx] 941 adc rdx,0 942 943 imul rbp,r10 944 mov r11,rdx 945 mov QWORD[r14],rdi 946 947 lea r14,[r9*1+r14] 948 949 mul rbp 950 add r10,rax 951 mov rax,QWORD[8+r9*1+rsi] 952 adc rdx,0 953 mov rdi,rdx 954 955 mul rbx 956 add r11,rax 957 mov rax,QWORD[8+rcx] 958 adc rdx,0 959 add r11,QWORD[8+r14] 960 adc rdx,0 961 mov r10,rdx 962 963 mul rbp 964 add rdi,rax 965 mov rax,QWORD[16+r9*1+rsi] 966 adc rdx,0 967 add rdi,r11 968 lea r15,[32+r9] 969 lea rcx,[32+rcx] 970 adc rdx,0 971 mov r13,rdx 972 jmp NEAR $L$inner4x 973 974ALIGN 32 975$L$inner4x: 976 mul rbx 977 add r10,rax 978 mov rax,QWORD[((-16))+rcx] 979 adc rdx,0 980 add r10,QWORD[16+r14] 981 lea r14,[32+r14] 982 adc rdx,0 983 mov r11,rdx 984 985 mul rbp 986 add r13,rax 987 mov rax,QWORD[((-8))+r15*1+rsi] 988 adc rdx,0 989 add r13,r10 990 adc rdx,0 991 mov QWORD[((-32))+r14],rdi 992 mov rdi,rdx 993 994 mul rbx 995 add r11,rax 996 mov rax,QWORD[((-8))+rcx] 997 adc rdx,0 998 add r11,QWORD[((-8))+r14] 999 adc rdx,0 1000 mov r10,rdx 1001 1002 mul rbp 1003 add rdi,rax 1004 mov rax,QWORD[r15*1+rsi] 1005 adc rdx,0 1006 add rdi,r11 1007 adc rdx,0 1008 mov QWORD[((-24))+r14],r13 1009 mov r13,rdx 1010 1011 mul rbx 1012 add r10,rax 1013 mov rax,QWORD[rcx] 1014 adc rdx,0 1015 add r10,QWORD[r14] 1016 adc rdx,0 1017 mov r11,rdx 1018 1019 mul rbp 1020 add r13,rax 1021 mov rax,QWORD[8+r15*1+rsi] 1022 adc rdx,0 1023 add r13,r10 1024 adc rdx,0 1025 mov QWORD[((-16))+r14],rdi 1026 mov rdi,rdx 1027 1028 mul rbx 1029 add r11,rax 1030 mov rax,QWORD[8+rcx] 1031 adc rdx,0 1032 add r11,QWORD[8+r14] 1033 adc rdx,0 1034 mov r10,rdx 1035 1036 mul rbp 1037 add rdi,rax 1038 mov rax,QWORD[16+r15*1+rsi] 1039 adc rdx,0 1040 add rdi,r11 1041 lea rcx,[32+rcx] 1042 adc rdx,0 1043 mov QWORD[((-8))+r14],r13 1044 mov r13,rdx 1045 1046 add r15,32 1047 jnz NEAR $L$inner4x 1048 1049 mul rbx 1050 add r10,rax 1051 mov rax,QWORD[((-16))+rcx] 1052 adc rdx,0 1053 add r10,QWORD[16+r14] 1054 lea r14,[32+r14] 1055 adc rdx,0 1056 mov r11,rdx 1057 1058 mul rbp 1059 add r13,rax 1060 mov rax,QWORD[((-8))+rsi] 1061 adc rdx,0 1062 add r13,r10 1063 adc rdx,0 1064 mov QWORD[((-32))+r14],rdi 1065 mov rdi,rdx 1066 1067 mul rbx 1068 add r11,rax 1069 mov rax,rbp 1070 mov rbp,QWORD[((-8))+rcx] 1071 adc rdx,0 1072 add r11,QWORD[((-8))+r14] 1073 adc rdx,0 1074 mov r10,rdx 1075 1076 mul rbp 1077 add rdi,rax 1078 mov rax,QWORD[r9*1+rsi] 1079 adc rdx,0 1080 add rdi,r11 1081 adc rdx,0 1082 mov QWORD[((-24))+r14],r13 1083 mov r13,rdx 1084 1085 mov QWORD[((-16))+r14],rdi 1086 lea rcx,[r9*1+rcx] 1087 1088 xor rdi,rdi 1089 add r13,r10 1090 adc rdi,0 1091 add r13,QWORD[r14] 1092 adc rdi,0 1093 mov QWORD[((-8))+r14],r13 1094 1095 cmp r12,QWORD[((16+8))+rsp] 1096 jb NEAR $L$outer4x 1097 xor rax,rax 1098 sub rbp,r13 1099 adc r15,r15 1100 or rdi,r15 1101 sub rax,rdi 1102 lea rbx,[r9*1+r14] 1103 mov r12,QWORD[rcx] 1104 lea rbp,[rcx] 1105 mov rcx,r9 1106 sar rcx,3+2 1107 mov rdi,QWORD[((56+8))+rsp] 1108 dec r12 1109 xor r10,r10 1110 mov r13,QWORD[8+rbp] 1111 mov r14,QWORD[16+rbp] 1112 mov r15,QWORD[24+rbp] 1113 jmp NEAR $L$sqr4x_sub_entry 1114 1115 1116global bn_power5 1117 1118ALIGN 32 1119bn_power5: 1120 mov QWORD[8+rsp],rdi ;WIN64 prologue 1121 mov QWORD[16+rsp],rsi 1122 mov rax,rsp 1123$L$SEH_begin_bn_power5: 1124 mov rdi,rcx 1125 mov rsi,rdx 1126 mov rdx,r8 1127 mov rcx,r9 1128 mov r8,QWORD[40+rsp] 1129 mov r9,QWORD[48+rsp] 1130 1131 1132 1133 mov rax,rsp 1134 1135 lea r11,[OPENSSL_ia32cap_P] 1136 mov r11d,DWORD[8+r11] 1137 and r11d,0x80108 1138 cmp r11d,0x80108 1139 je NEAR $L$powerx5_enter 1140 push rbx 1141 1142 push rbp 1143 1144 push r12 1145 1146 push r13 1147 1148 push r14 1149 1150 push r15 1151 1152$L$power5_prologue: 1153 1154 shl r9d,3 1155 lea r10d,[r9*2+r9] 1156 neg r9 1157 mov r8,QWORD[r8] 1158 1159 1160 1161 1162 1163 1164 1165 1166 lea r11,[((-320))+r9*2+rsp] 1167 mov rbp,rsp 1168 sub r11,rdi 1169 and r11,4095 1170 cmp r10,r11 1171 jb NEAR $L$pwr_sp_alt 1172 sub rbp,r11 1173 lea rbp,[((-320))+r9*2+rbp] 1174 jmp NEAR $L$pwr_sp_done 1175 1176ALIGN 32 1177$L$pwr_sp_alt: 1178 lea r10,[((4096-320))+r9*2] 1179 lea rbp,[((-320))+r9*2+rbp] 1180 sub r11,r10 1181 mov r10,0 1182 cmovc r11,r10 1183 sub rbp,r11 1184$L$pwr_sp_done: 1185 and rbp,-64 1186 mov r11,rsp 1187 sub r11,rbp 1188 and r11,-4096 1189 lea rsp,[rbp*1+r11] 1190 mov r10,QWORD[rsp] 1191 cmp rsp,rbp 1192 ja NEAR $L$pwr_page_walk 1193 jmp NEAR $L$pwr_page_walk_done 1194 1195$L$pwr_page_walk: 1196 lea rsp,[((-4096))+rsp] 1197 mov r10,QWORD[rsp] 1198 cmp rsp,rbp 1199 ja NEAR $L$pwr_page_walk 1200$L$pwr_page_walk_done: 1201 1202 mov r10,r9 1203 neg r9 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 mov QWORD[32+rsp],r8 1215 mov QWORD[40+rsp],rax 1216 1217$L$power5_body: 1218DB 102,72,15,110,207 1219DB 102,72,15,110,209 1220DB 102,73,15,110,218 1221DB 102,72,15,110,226 1222 1223 call __bn_sqr8x_internal 1224 call __bn_post4x_internal 1225 call __bn_sqr8x_internal 1226 call __bn_post4x_internal 1227 call __bn_sqr8x_internal 1228 call __bn_post4x_internal 1229 call __bn_sqr8x_internal 1230 call __bn_post4x_internal 1231 call __bn_sqr8x_internal 1232 call __bn_post4x_internal 1233 1234DB 102,72,15,126,209 1235DB 102,72,15,126,226 1236 mov rdi,rsi 1237 mov rax,QWORD[40+rsp] 1238 lea r8,[32+rsp] 1239 1240 call mul4x_internal 1241 1242 mov rsi,QWORD[40+rsp] 1243 1244 mov rax,1 1245 mov r15,QWORD[((-48))+rsi] 1246 1247 mov r14,QWORD[((-40))+rsi] 1248 1249 mov r13,QWORD[((-32))+rsi] 1250 1251 mov r12,QWORD[((-24))+rsi] 1252 1253 mov rbp,QWORD[((-16))+rsi] 1254 1255 mov rbx,QWORD[((-8))+rsi] 1256 1257 lea rsp,[rsi] 1258 1259$L$power5_epilogue: 1260 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1261 mov rsi,QWORD[16+rsp] 1262 DB 0F3h,0C3h ;repret 1263 1264$L$SEH_end_bn_power5: 1265 1266global bn_sqr8x_internal 1267 1268 1269ALIGN 32 1270bn_sqr8x_internal: 1271__bn_sqr8x_internal: 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 lea rbp,[32+r10] 1347 lea rsi,[r9*1+rsi] 1348 1349 mov rcx,r9 1350 1351 1352 mov r14,QWORD[((-32))+rbp*1+rsi] 1353 lea rdi,[((48+8))+r9*2+rsp] 1354 mov rax,QWORD[((-24))+rbp*1+rsi] 1355 lea rdi,[((-32))+rbp*1+rdi] 1356 mov rbx,QWORD[((-16))+rbp*1+rsi] 1357 mov r15,rax 1358 1359 mul r14 1360 mov r10,rax 1361 mov rax,rbx 1362 mov r11,rdx 1363 mov QWORD[((-24))+rbp*1+rdi],r10 1364 1365 mul r14 1366 add r11,rax 1367 mov rax,rbx 1368 adc rdx,0 1369 mov QWORD[((-16))+rbp*1+rdi],r11 1370 mov r10,rdx 1371 1372 1373 mov rbx,QWORD[((-8))+rbp*1+rsi] 1374 mul r15 1375 mov r12,rax 1376 mov rax,rbx 1377 mov r13,rdx 1378 1379 lea rcx,[rbp] 1380 mul r14 1381 add r10,rax 1382 mov rax,rbx 1383 mov r11,rdx 1384 adc r11,0 1385 add r10,r12 1386 adc r11,0 1387 mov QWORD[((-8))+rcx*1+rdi],r10 1388 jmp NEAR $L$sqr4x_1st 1389 1390ALIGN 32 1391$L$sqr4x_1st: 1392 mov rbx,QWORD[rcx*1+rsi] 1393 mul r15 1394 add r13,rax 1395 mov rax,rbx 1396 mov r12,rdx 1397 adc r12,0 1398 1399 mul r14 1400 add r11,rax 1401 mov rax,rbx 1402 mov rbx,QWORD[8+rcx*1+rsi] 1403 mov r10,rdx 1404 adc r10,0 1405 add r11,r13 1406 adc r10,0 1407 1408 1409 mul r15 1410 add r12,rax 1411 mov rax,rbx 1412 mov QWORD[rcx*1+rdi],r11 1413 mov r13,rdx 1414 adc r13,0 1415 1416 mul r14 1417 add r10,rax 1418 mov rax,rbx 1419 mov rbx,QWORD[16+rcx*1+rsi] 1420 mov r11,rdx 1421 adc r11,0 1422 add r10,r12 1423 adc r11,0 1424 1425 mul r15 1426 add r13,rax 1427 mov rax,rbx 1428 mov QWORD[8+rcx*1+rdi],r10 1429 mov r12,rdx 1430 adc r12,0 1431 1432 mul r14 1433 add r11,rax 1434 mov rax,rbx 1435 mov rbx,QWORD[24+rcx*1+rsi] 1436 mov r10,rdx 1437 adc r10,0 1438 add r11,r13 1439 adc r10,0 1440 1441 1442 mul r15 1443 add r12,rax 1444 mov rax,rbx 1445 mov QWORD[16+rcx*1+rdi],r11 1446 mov r13,rdx 1447 adc r13,0 1448 lea rcx,[32+rcx] 1449 1450 mul r14 1451 add r10,rax 1452 mov rax,rbx 1453 mov r11,rdx 1454 adc r11,0 1455 add r10,r12 1456 adc r11,0 1457 mov QWORD[((-8))+rcx*1+rdi],r10 1458 1459 cmp rcx,0 1460 jne NEAR $L$sqr4x_1st 1461 1462 mul r15 1463 add r13,rax 1464 lea rbp,[16+rbp] 1465 adc rdx,0 1466 add r13,r11 1467 adc rdx,0 1468 1469 mov QWORD[rdi],r13 1470 mov r12,rdx 1471 mov QWORD[8+rdi],rdx 1472 jmp NEAR $L$sqr4x_outer 1473 1474ALIGN 32 1475$L$sqr4x_outer: 1476 mov r14,QWORD[((-32))+rbp*1+rsi] 1477 lea rdi,[((48+8))+r9*2+rsp] 1478 mov rax,QWORD[((-24))+rbp*1+rsi] 1479 lea rdi,[((-32))+rbp*1+rdi] 1480 mov rbx,QWORD[((-16))+rbp*1+rsi] 1481 mov r15,rax 1482 1483 mul r14 1484 mov r10,QWORD[((-24))+rbp*1+rdi] 1485 add r10,rax 1486 mov rax,rbx 1487 adc rdx,0 1488 mov QWORD[((-24))+rbp*1+rdi],r10 1489 mov r11,rdx 1490 1491 mul r14 1492 add r11,rax 1493 mov rax,rbx 1494 adc rdx,0 1495 add r11,QWORD[((-16))+rbp*1+rdi] 1496 mov r10,rdx 1497 adc r10,0 1498 mov QWORD[((-16))+rbp*1+rdi],r11 1499 1500 xor r12,r12 1501 1502 mov rbx,QWORD[((-8))+rbp*1+rsi] 1503 mul r15 1504 add r12,rax 1505 mov rax,rbx 1506 adc rdx,0 1507 add r12,QWORD[((-8))+rbp*1+rdi] 1508 mov r13,rdx 1509 adc r13,0 1510 1511 mul r14 1512 add r10,rax 1513 mov rax,rbx 1514 adc rdx,0 1515 add r10,r12 1516 mov r11,rdx 1517 adc r11,0 1518 mov QWORD[((-8))+rbp*1+rdi],r10 1519 1520 lea rcx,[rbp] 1521 jmp NEAR $L$sqr4x_inner 1522 1523ALIGN 32 1524$L$sqr4x_inner: 1525 mov rbx,QWORD[rcx*1+rsi] 1526 mul r15 1527 add r13,rax 1528 mov rax,rbx 1529 mov r12,rdx 1530 adc r12,0 1531 add r13,QWORD[rcx*1+rdi] 1532 adc r12,0 1533 1534DB 0x67 1535 mul r14 1536 add r11,rax 1537 mov rax,rbx 1538 mov rbx,QWORD[8+rcx*1+rsi] 1539 mov r10,rdx 1540 adc r10,0 1541 add r11,r13 1542 adc r10,0 1543 1544 mul r15 1545 add r12,rax 1546 mov QWORD[rcx*1+rdi],r11 1547 mov rax,rbx 1548 mov r13,rdx 1549 adc r13,0 1550 add r12,QWORD[8+rcx*1+rdi] 1551 lea rcx,[16+rcx] 1552 adc r13,0 1553 1554 mul r14 1555 add r10,rax 1556 mov rax,rbx 1557 adc rdx,0 1558 add r10,r12 1559 mov r11,rdx 1560 adc r11,0 1561 mov QWORD[((-8))+rcx*1+rdi],r10 1562 1563 cmp rcx,0 1564 jne NEAR $L$sqr4x_inner 1565 1566DB 0x67 1567 mul r15 1568 add r13,rax 1569 adc rdx,0 1570 add r13,r11 1571 adc rdx,0 1572 1573 mov QWORD[rdi],r13 1574 mov r12,rdx 1575 mov QWORD[8+rdi],rdx 1576 1577 add rbp,16 1578 jnz NEAR $L$sqr4x_outer 1579 1580 1581 mov r14,QWORD[((-32))+rsi] 1582 lea rdi,[((48+8))+r9*2+rsp] 1583 mov rax,QWORD[((-24))+rsi] 1584 lea rdi,[((-32))+rbp*1+rdi] 1585 mov rbx,QWORD[((-16))+rsi] 1586 mov r15,rax 1587 1588 mul r14 1589 add r10,rax 1590 mov rax,rbx 1591 mov r11,rdx 1592 adc r11,0 1593 1594 mul r14 1595 add r11,rax 1596 mov rax,rbx 1597 mov QWORD[((-24))+rdi],r10 1598 mov r10,rdx 1599 adc r10,0 1600 add r11,r13 1601 mov rbx,QWORD[((-8))+rsi] 1602 adc r10,0 1603 1604 mul r15 1605 add r12,rax 1606 mov rax,rbx 1607 mov QWORD[((-16))+rdi],r11 1608 mov r13,rdx 1609 adc r13,0 1610 1611 mul r14 1612 add r10,rax 1613 mov rax,rbx 1614 mov r11,rdx 1615 adc r11,0 1616 add r10,r12 1617 adc r11,0 1618 mov QWORD[((-8))+rdi],r10 1619 1620 mul r15 1621 add r13,rax 1622 mov rax,QWORD[((-16))+rsi] 1623 adc rdx,0 1624 add r13,r11 1625 adc rdx,0 1626 1627 mov QWORD[rdi],r13 1628 mov r12,rdx 1629 mov QWORD[8+rdi],rdx 1630 1631 mul rbx 1632 add rbp,16 1633 xor r14,r14 1634 sub rbp,r9 1635 xor r15,r15 1636 1637 add rax,r12 1638 adc rdx,0 1639 mov QWORD[8+rdi],rax 1640 mov QWORD[16+rdi],rdx 1641 mov QWORD[24+rdi],r15 1642 1643 mov rax,QWORD[((-16))+rbp*1+rsi] 1644 lea rdi,[((48+8))+rsp] 1645 xor r10,r10 1646 mov r11,QWORD[8+rdi] 1647 1648 lea r12,[r10*2+r14] 1649 shr r10,63 1650 lea r13,[r11*2+rcx] 1651 shr r11,63 1652 or r13,r10 1653 mov r10,QWORD[16+rdi] 1654 mov r14,r11 1655 mul rax 1656 neg r15 1657 mov r11,QWORD[24+rdi] 1658 adc r12,rax 1659 mov rax,QWORD[((-8))+rbp*1+rsi] 1660 mov QWORD[rdi],r12 1661 adc r13,rdx 1662 1663 lea rbx,[r10*2+r14] 1664 mov QWORD[8+rdi],r13 1665 sbb r15,r15 1666 shr r10,63 1667 lea r8,[r11*2+rcx] 1668 shr r11,63 1669 or r8,r10 1670 mov r10,QWORD[32+rdi] 1671 mov r14,r11 1672 mul rax 1673 neg r15 1674 mov r11,QWORD[40+rdi] 1675 adc rbx,rax 1676 mov rax,QWORD[rbp*1+rsi] 1677 mov QWORD[16+rdi],rbx 1678 adc r8,rdx 1679 lea rbp,[16+rbp] 1680 mov QWORD[24+rdi],r8 1681 sbb r15,r15 1682 lea rdi,[64+rdi] 1683 jmp NEAR $L$sqr4x_shift_n_add 1684 1685ALIGN 32 1686$L$sqr4x_shift_n_add: 1687 lea r12,[r10*2+r14] 1688 shr r10,63 1689 lea r13,[r11*2+rcx] 1690 shr r11,63 1691 or r13,r10 1692 mov r10,QWORD[((-16))+rdi] 1693 mov r14,r11 1694 mul rax 1695 neg r15 1696 mov r11,QWORD[((-8))+rdi] 1697 adc r12,rax 1698 mov rax,QWORD[((-8))+rbp*1+rsi] 1699 mov QWORD[((-32))+rdi],r12 1700 adc r13,rdx 1701 1702 lea rbx,[r10*2+r14] 1703 mov QWORD[((-24))+rdi],r13 1704 sbb r15,r15 1705 shr r10,63 1706 lea r8,[r11*2+rcx] 1707 shr r11,63 1708 or r8,r10 1709 mov r10,QWORD[rdi] 1710 mov r14,r11 1711 mul rax 1712 neg r15 1713 mov r11,QWORD[8+rdi] 1714 adc rbx,rax 1715 mov rax,QWORD[rbp*1+rsi] 1716 mov QWORD[((-16))+rdi],rbx 1717 adc r8,rdx 1718 1719 lea r12,[r10*2+r14] 1720 mov QWORD[((-8))+rdi],r8 1721 sbb r15,r15 1722 shr r10,63 1723 lea r13,[r11*2+rcx] 1724 shr r11,63 1725 or r13,r10 1726 mov r10,QWORD[16+rdi] 1727 mov r14,r11 1728 mul rax 1729 neg r15 1730 mov r11,QWORD[24+rdi] 1731 adc r12,rax 1732 mov rax,QWORD[8+rbp*1+rsi] 1733 mov QWORD[rdi],r12 1734 adc r13,rdx 1735 1736 lea rbx,[r10*2+r14] 1737 mov QWORD[8+rdi],r13 1738 sbb r15,r15 1739 shr r10,63 1740 lea r8,[r11*2+rcx] 1741 shr r11,63 1742 or r8,r10 1743 mov r10,QWORD[32+rdi] 1744 mov r14,r11 1745 mul rax 1746 neg r15 1747 mov r11,QWORD[40+rdi] 1748 adc rbx,rax 1749 mov rax,QWORD[16+rbp*1+rsi] 1750 mov QWORD[16+rdi],rbx 1751 adc r8,rdx 1752 mov QWORD[24+rdi],r8 1753 sbb r15,r15 1754 lea rdi,[64+rdi] 1755 add rbp,32 1756 jnz NEAR $L$sqr4x_shift_n_add 1757 1758 lea r12,[r10*2+r14] 1759DB 0x67 1760 shr r10,63 1761 lea r13,[r11*2+rcx] 1762 shr r11,63 1763 or r13,r10 1764 mov r10,QWORD[((-16))+rdi] 1765 mov r14,r11 1766 mul rax 1767 neg r15 1768 mov r11,QWORD[((-8))+rdi] 1769 adc r12,rax 1770 mov rax,QWORD[((-8))+rsi] 1771 mov QWORD[((-32))+rdi],r12 1772 adc r13,rdx 1773 1774 lea rbx,[r10*2+r14] 1775 mov QWORD[((-24))+rdi],r13 1776 sbb r15,r15 1777 shr r10,63 1778 lea r8,[r11*2+rcx] 1779 shr r11,63 1780 or r8,r10 1781 mul rax 1782 neg r15 1783 adc rbx,rax 1784 adc r8,rdx 1785 mov QWORD[((-16))+rdi],rbx 1786 mov QWORD[((-8))+rdi],r8 1787DB 102,72,15,126,213 1788__bn_sqr8x_reduction: 1789 xor rax,rax 1790 lea rcx,[rbp*1+r9] 1791 lea rdx,[((48+8))+r9*2+rsp] 1792 mov QWORD[((0+8))+rsp],rcx 1793 lea rdi,[((48+8))+r9*1+rsp] 1794 mov QWORD[((8+8))+rsp],rdx 1795 neg r9 1796 jmp NEAR $L$8x_reduction_loop 1797 1798ALIGN 32 1799$L$8x_reduction_loop: 1800 lea rdi,[r9*1+rdi] 1801DB 0x66 1802 mov rbx,QWORD[rdi] 1803 mov r9,QWORD[8+rdi] 1804 mov r10,QWORD[16+rdi] 1805 mov r11,QWORD[24+rdi] 1806 mov r12,QWORD[32+rdi] 1807 mov r13,QWORD[40+rdi] 1808 mov r14,QWORD[48+rdi] 1809 mov r15,QWORD[56+rdi] 1810 mov QWORD[rdx],rax 1811 lea rdi,[64+rdi] 1812 1813DB 0x67 1814 mov r8,rbx 1815 imul rbx,QWORD[((32+8))+rsp] 1816 mov rax,QWORD[rbp] 1817 mov ecx,8 1818 jmp NEAR $L$8x_reduce 1819 1820ALIGN 32 1821$L$8x_reduce: 1822 mul rbx 1823 mov rax,QWORD[8+rbp] 1824 neg r8 1825 mov r8,rdx 1826 adc r8,0 1827 1828 mul rbx 1829 add r9,rax 1830 mov rax,QWORD[16+rbp] 1831 adc rdx,0 1832 add r8,r9 1833 mov QWORD[((48-8+8))+rcx*8+rsp],rbx 1834 mov r9,rdx 1835 adc r9,0 1836 1837 mul rbx 1838 add r10,rax 1839 mov rax,QWORD[24+rbp] 1840 adc rdx,0 1841 add r9,r10 1842 mov rsi,QWORD[((32+8))+rsp] 1843 mov r10,rdx 1844 adc r10,0 1845 1846 mul rbx 1847 add r11,rax 1848 mov rax,QWORD[32+rbp] 1849 adc rdx,0 1850 imul rsi,r8 1851 add r10,r11 1852 mov r11,rdx 1853 adc r11,0 1854 1855 mul rbx 1856 add r12,rax 1857 mov rax,QWORD[40+rbp] 1858 adc rdx,0 1859 add r11,r12 1860 mov r12,rdx 1861 adc r12,0 1862 1863 mul rbx 1864 add r13,rax 1865 mov rax,QWORD[48+rbp] 1866 adc rdx,0 1867 add r12,r13 1868 mov r13,rdx 1869 adc r13,0 1870 1871 mul rbx 1872 add r14,rax 1873 mov rax,QWORD[56+rbp] 1874 adc rdx,0 1875 add r13,r14 1876 mov r14,rdx 1877 adc r14,0 1878 1879 mul rbx 1880 mov rbx,rsi 1881 add r15,rax 1882 mov rax,QWORD[rbp] 1883 adc rdx,0 1884 add r14,r15 1885 mov r15,rdx 1886 adc r15,0 1887 1888 dec ecx 1889 jnz NEAR $L$8x_reduce 1890 1891 lea rbp,[64+rbp] 1892 xor rax,rax 1893 mov rdx,QWORD[((8+8))+rsp] 1894 cmp rbp,QWORD[((0+8))+rsp] 1895 jae NEAR $L$8x_no_tail 1896 1897DB 0x66 1898 add r8,QWORD[rdi] 1899 adc r9,QWORD[8+rdi] 1900 adc r10,QWORD[16+rdi] 1901 adc r11,QWORD[24+rdi] 1902 adc r12,QWORD[32+rdi] 1903 adc r13,QWORD[40+rdi] 1904 adc r14,QWORD[48+rdi] 1905 adc r15,QWORD[56+rdi] 1906 sbb rsi,rsi 1907 1908 mov rbx,QWORD[((48+56+8))+rsp] 1909 mov ecx,8 1910 mov rax,QWORD[rbp] 1911 jmp NEAR $L$8x_tail 1912 1913ALIGN 32 1914$L$8x_tail: 1915 mul rbx 1916 add r8,rax 1917 mov rax,QWORD[8+rbp] 1918 mov QWORD[rdi],r8 1919 mov r8,rdx 1920 adc r8,0 1921 1922 mul rbx 1923 add r9,rax 1924 mov rax,QWORD[16+rbp] 1925 adc rdx,0 1926 add r8,r9 1927 lea rdi,[8+rdi] 1928 mov r9,rdx 1929 adc r9,0 1930 1931 mul rbx 1932 add r10,rax 1933 mov rax,QWORD[24+rbp] 1934 adc rdx,0 1935 add r9,r10 1936 mov r10,rdx 1937 adc r10,0 1938 1939 mul rbx 1940 add r11,rax 1941 mov rax,QWORD[32+rbp] 1942 adc rdx,0 1943 add r10,r11 1944 mov r11,rdx 1945 adc r11,0 1946 1947 mul rbx 1948 add r12,rax 1949 mov rax,QWORD[40+rbp] 1950 adc rdx,0 1951 add r11,r12 1952 mov r12,rdx 1953 adc r12,0 1954 1955 mul rbx 1956 add r13,rax 1957 mov rax,QWORD[48+rbp] 1958 adc rdx,0 1959 add r12,r13 1960 mov r13,rdx 1961 adc r13,0 1962 1963 mul rbx 1964 add r14,rax 1965 mov rax,QWORD[56+rbp] 1966 adc rdx,0 1967 add r13,r14 1968 mov r14,rdx 1969 adc r14,0 1970 1971 mul rbx 1972 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] 1973 add r15,rax 1974 adc rdx,0 1975 add r14,r15 1976 mov rax,QWORD[rbp] 1977 mov r15,rdx 1978 adc r15,0 1979 1980 dec ecx 1981 jnz NEAR $L$8x_tail 1982 1983 lea rbp,[64+rbp] 1984 mov rdx,QWORD[((8+8))+rsp] 1985 cmp rbp,QWORD[((0+8))+rsp] 1986 jae NEAR $L$8x_tail_done 1987 1988 mov rbx,QWORD[((48+56+8))+rsp] 1989 neg rsi 1990 mov rax,QWORD[rbp] 1991 adc r8,QWORD[rdi] 1992 adc r9,QWORD[8+rdi] 1993 adc r10,QWORD[16+rdi] 1994 adc r11,QWORD[24+rdi] 1995 adc r12,QWORD[32+rdi] 1996 adc r13,QWORD[40+rdi] 1997 adc r14,QWORD[48+rdi] 1998 adc r15,QWORD[56+rdi] 1999 sbb rsi,rsi 2000 2001 mov ecx,8 2002 jmp NEAR $L$8x_tail 2003 2004ALIGN 32 2005$L$8x_tail_done: 2006 xor rax,rax 2007 add r8,QWORD[rdx] 2008 adc r9,0 2009 adc r10,0 2010 adc r11,0 2011 adc r12,0 2012 adc r13,0 2013 adc r14,0 2014 adc r15,0 2015 adc rax,0 2016 2017 neg rsi 2018$L$8x_no_tail: 2019 adc r8,QWORD[rdi] 2020 adc r9,QWORD[8+rdi] 2021 adc r10,QWORD[16+rdi] 2022 adc r11,QWORD[24+rdi] 2023 adc r12,QWORD[32+rdi] 2024 adc r13,QWORD[40+rdi] 2025 adc r14,QWORD[48+rdi] 2026 adc r15,QWORD[56+rdi] 2027 adc rax,0 2028 mov rcx,QWORD[((-8))+rbp] 2029 xor rsi,rsi 2030 2031DB 102,72,15,126,213 2032 2033 mov QWORD[rdi],r8 2034 mov QWORD[8+rdi],r9 2035DB 102,73,15,126,217 2036 mov QWORD[16+rdi],r10 2037 mov QWORD[24+rdi],r11 2038 mov QWORD[32+rdi],r12 2039 mov QWORD[40+rdi],r13 2040 mov QWORD[48+rdi],r14 2041 mov QWORD[56+rdi],r15 2042 lea rdi,[64+rdi] 2043 2044 cmp rdi,rdx 2045 jb NEAR $L$8x_reduction_loop 2046 DB 0F3h,0C3h ;repret 2047 2048 2049 2050ALIGN 32 2051__bn_post4x_internal: 2052 2053 mov r12,QWORD[rbp] 2054 lea rbx,[r9*1+rdi] 2055 mov rcx,r9 2056DB 102,72,15,126,207 2057 neg rax 2058DB 102,72,15,126,206 2059 sar rcx,3+2 2060 dec r12 2061 xor r10,r10 2062 mov r13,QWORD[8+rbp] 2063 mov r14,QWORD[16+rbp] 2064 mov r15,QWORD[24+rbp] 2065 jmp NEAR $L$sqr4x_sub_entry 2066 2067ALIGN 16 2068$L$sqr4x_sub: 2069 mov r12,QWORD[rbp] 2070 mov r13,QWORD[8+rbp] 2071 mov r14,QWORD[16+rbp] 2072 mov r15,QWORD[24+rbp] 2073$L$sqr4x_sub_entry: 2074 lea rbp,[32+rbp] 2075 not r12 2076 not r13 2077 not r14 2078 not r15 2079 and r12,rax 2080 and r13,rax 2081 and r14,rax 2082 and r15,rax 2083 2084 neg r10 2085 adc r12,QWORD[rbx] 2086 adc r13,QWORD[8+rbx] 2087 adc r14,QWORD[16+rbx] 2088 adc r15,QWORD[24+rbx] 2089 mov QWORD[rdi],r12 2090 lea rbx,[32+rbx] 2091 mov QWORD[8+rdi],r13 2092 sbb r10,r10 2093 mov QWORD[16+rdi],r14 2094 mov QWORD[24+rdi],r15 2095 lea rdi,[32+rdi] 2096 2097 inc rcx 2098 jnz NEAR $L$sqr4x_sub 2099 2100 mov r10,r9 2101 neg r9 2102 DB 0F3h,0C3h ;repret 2103 2104 2105global bn_from_montgomery 2106 2107ALIGN 32 2108bn_from_montgomery: 2109 2110 test DWORD[48+rsp],7 2111 jz NEAR bn_from_mont8x 2112 xor eax,eax 2113 DB 0F3h,0C3h ;repret 2114 2115 2116 2117 2118ALIGN 32 2119bn_from_mont8x: 2120 mov QWORD[8+rsp],rdi ;WIN64 prologue 2121 mov QWORD[16+rsp],rsi 2122 mov rax,rsp 2123$L$SEH_begin_bn_from_mont8x: 2124 mov rdi,rcx 2125 mov rsi,rdx 2126 mov rdx,r8 2127 mov rcx,r9 2128 mov r8,QWORD[40+rsp] 2129 mov r9,QWORD[48+rsp] 2130 2131 2132 2133DB 0x67 2134 mov rax,rsp 2135 2136 push rbx 2137 2138 push rbp 2139 2140 push r12 2141 2142 push r13 2143 2144 push r14 2145 2146 push r15 2147 2148$L$from_prologue: 2149 2150 shl r9d,3 2151 lea r10,[r9*2+r9] 2152 neg r9 2153 mov r8,QWORD[r8] 2154 2155 2156 2157 2158 2159 2160 2161 2162 lea r11,[((-320))+r9*2+rsp] 2163 mov rbp,rsp 2164 sub r11,rdi 2165 and r11,4095 2166 cmp r10,r11 2167 jb NEAR $L$from_sp_alt 2168 sub rbp,r11 2169 lea rbp,[((-320))+r9*2+rbp] 2170 jmp NEAR $L$from_sp_done 2171 2172ALIGN 32 2173$L$from_sp_alt: 2174 lea r10,[((4096-320))+r9*2] 2175 lea rbp,[((-320))+r9*2+rbp] 2176 sub r11,r10 2177 mov r10,0 2178 cmovc r11,r10 2179 sub rbp,r11 2180$L$from_sp_done: 2181 and rbp,-64 2182 mov r11,rsp 2183 sub r11,rbp 2184 and r11,-4096 2185 lea rsp,[rbp*1+r11] 2186 mov r10,QWORD[rsp] 2187 cmp rsp,rbp 2188 ja NEAR $L$from_page_walk 2189 jmp NEAR $L$from_page_walk_done 2190 2191$L$from_page_walk: 2192 lea rsp,[((-4096))+rsp] 2193 mov r10,QWORD[rsp] 2194 cmp rsp,rbp 2195 ja NEAR $L$from_page_walk 2196$L$from_page_walk_done: 2197 2198 mov r10,r9 2199 neg r9 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 mov QWORD[32+rsp],r8 2211 mov QWORD[40+rsp],rax 2212 2213$L$from_body: 2214 mov r11,r9 2215 lea rax,[48+rsp] 2216 pxor xmm0,xmm0 2217 jmp NEAR $L$mul_by_1 2218 2219ALIGN 32 2220$L$mul_by_1: 2221 movdqu xmm1,XMMWORD[rsi] 2222 movdqu xmm2,XMMWORD[16+rsi] 2223 movdqu xmm3,XMMWORD[32+rsi] 2224 movdqa XMMWORD[r9*1+rax],xmm0 2225 movdqu xmm4,XMMWORD[48+rsi] 2226 movdqa XMMWORD[16+r9*1+rax],xmm0 2227DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2228 movdqa XMMWORD[rax],xmm1 2229 movdqa XMMWORD[32+r9*1+rax],xmm0 2230 movdqa XMMWORD[16+rax],xmm2 2231 movdqa XMMWORD[48+r9*1+rax],xmm0 2232 movdqa XMMWORD[32+rax],xmm3 2233 movdqa XMMWORD[48+rax],xmm4 2234 lea rax,[64+rax] 2235 sub r11,64 2236 jnz NEAR $L$mul_by_1 2237 2238DB 102,72,15,110,207 2239DB 102,72,15,110,209 2240DB 0x67 2241 mov rbp,rcx 2242DB 102,73,15,110,218 2243 lea r11,[OPENSSL_ia32cap_P] 2244 mov r11d,DWORD[8+r11] 2245 and r11d,0x80108 2246 cmp r11d,0x80108 2247 jne NEAR $L$from_mont_nox 2248 2249 lea rdi,[r9*1+rax] 2250 call __bn_sqrx8x_reduction 2251 call __bn_postx4x_internal 2252 2253 pxor xmm0,xmm0 2254 lea rax,[48+rsp] 2255 jmp NEAR $L$from_mont_zero 2256 2257ALIGN 32 2258$L$from_mont_nox: 2259 call __bn_sqr8x_reduction 2260 call __bn_post4x_internal 2261 2262 pxor xmm0,xmm0 2263 lea rax,[48+rsp] 2264 jmp NEAR $L$from_mont_zero 2265 2266ALIGN 32 2267$L$from_mont_zero: 2268 mov rsi,QWORD[40+rsp] 2269 2270 movdqa XMMWORD[rax],xmm0 2271 movdqa XMMWORD[16+rax],xmm0 2272 movdqa XMMWORD[32+rax],xmm0 2273 movdqa XMMWORD[48+rax],xmm0 2274 lea rax,[64+rax] 2275 sub r9,32 2276 jnz NEAR $L$from_mont_zero 2277 2278 mov rax,1 2279 mov r15,QWORD[((-48))+rsi] 2280 2281 mov r14,QWORD[((-40))+rsi] 2282 2283 mov r13,QWORD[((-32))+rsi] 2284 2285 mov r12,QWORD[((-24))+rsi] 2286 2287 mov rbp,QWORD[((-16))+rsi] 2288 2289 mov rbx,QWORD[((-8))+rsi] 2290 2291 lea rsp,[rsi] 2292 2293$L$from_epilogue: 2294 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2295 mov rsi,QWORD[16+rsp] 2296 DB 0F3h,0C3h ;repret 2297 2298$L$SEH_end_bn_from_mont8x: 2299 2300ALIGN 32 2301bn_mulx4x_mont_gather5: 2302 mov QWORD[8+rsp],rdi ;WIN64 prologue 2303 mov QWORD[16+rsp],rsi 2304 mov rax,rsp 2305$L$SEH_begin_bn_mulx4x_mont_gather5: 2306 mov rdi,rcx 2307 mov rsi,rdx 2308 mov rdx,r8 2309 mov rcx,r9 2310 mov r8,QWORD[40+rsp] 2311 mov r9,QWORD[48+rsp] 2312 2313 2314 2315 mov rax,rsp 2316 2317$L$mulx4x_enter: 2318 push rbx 2319 2320 push rbp 2321 2322 push r12 2323 2324 push r13 2325 2326 push r14 2327 2328 push r15 2329 2330$L$mulx4x_prologue: 2331 2332 shl r9d,3 2333 lea r10,[r9*2+r9] 2334 neg r9 2335 mov r8,QWORD[r8] 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 lea r11,[((-320))+r9*2+rsp] 2347 mov rbp,rsp 2348 sub r11,rdi 2349 and r11,4095 2350 cmp r10,r11 2351 jb NEAR $L$mulx4xsp_alt 2352 sub rbp,r11 2353 lea rbp,[((-320))+r9*2+rbp] 2354 jmp NEAR $L$mulx4xsp_done 2355 2356$L$mulx4xsp_alt: 2357 lea r10,[((4096-320))+r9*2] 2358 lea rbp,[((-320))+r9*2+rbp] 2359 sub r11,r10 2360 mov r10,0 2361 cmovc r11,r10 2362 sub rbp,r11 2363$L$mulx4xsp_done: 2364 and rbp,-64 2365 mov r11,rsp 2366 sub r11,rbp 2367 and r11,-4096 2368 lea rsp,[rbp*1+r11] 2369 mov r10,QWORD[rsp] 2370 cmp rsp,rbp 2371 ja NEAR $L$mulx4x_page_walk 2372 jmp NEAR $L$mulx4x_page_walk_done 2373 2374$L$mulx4x_page_walk: 2375 lea rsp,[((-4096))+rsp] 2376 mov r10,QWORD[rsp] 2377 cmp rsp,rbp 2378 ja NEAR $L$mulx4x_page_walk 2379$L$mulx4x_page_walk_done: 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 mov QWORD[32+rsp],r8 2394 mov QWORD[40+rsp],rax 2395 2396$L$mulx4x_body: 2397 call mulx4x_internal 2398 2399 mov rsi,QWORD[40+rsp] 2400 2401 mov rax,1 2402 2403 mov r15,QWORD[((-48))+rsi] 2404 2405 mov r14,QWORD[((-40))+rsi] 2406 2407 mov r13,QWORD[((-32))+rsi] 2408 2409 mov r12,QWORD[((-24))+rsi] 2410 2411 mov rbp,QWORD[((-16))+rsi] 2412 2413 mov rbx,QWORD[((-8))+rsi] 2414 2415 lea rsp,[rsi] 2416 2417$L$mulx4x_epilogue: 2418 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2419 mov rsi,QWORD[16+rsp] 2420 DB 0F3h,0C3h ;repret 2421 2422$L$SEH_end_bn_mulx4x_mont_gather5: 2423 2424 2425ALIGN 32 2426mulx4x_internal: 2427 2428 mov QWORD[8+rsp],r9 2429 mov r10,r9 2430 neg r9 2431 shl r9,5 2432 neg r10 2433 lea r13,[128+r9*1+rdx] 2434 shr r9,5+5 2435 movd xmm5,DWORD[56+rax] 2436 sub r9,1 2437 lea rax,[$L$inc] 2438 mov QWORD[((16+8))+rsp],r13 2439 mov QWORD[((24+8))+rsp],r9 2440 mov QWORD[((56+8))+rsp],rdi 2441 movdqa xmm0,XMMWORD[rax] 2442 movdqa xmm1,XMMWORD[16+rax] 2443 lea r10,[((88-112))+r10*1+rsp] 2444 lea rdi,[128+rdx] 2445 2446 pshufd xmm5,xmm5,0 2447 movdqa xmm4,xmm1 2448DB 0x67 2449 movdqa xmm2,xmm1 2450DB 0x67 2451 paddd xmm1,xmm0 2452 pcmpeqd xmm0,xmm5 2453 movdqa xmm3,xmm4 2454 paddd xmm2,xmm1 2455 pcmpeqd xmm1,xmm5 2456 movdqa XMMWORD[112+r10],xmm0 2457 movdqa xmm0,xmm4 2458 2459 paddd xmm3,xmm2 2460 pcmpeqd xmm2,xmm5 2461 movdqa XMMWORD[128+r10],xmm1 2462 movdqa xmm1,xmm4 2463 2464 paddd xmm0,xmm3 2465 pcmpeqd xmm3,xmm5 2466 movdqa XMMWORD[144+r10],xmm2 2467 movdqa xmm2,xmm4 2468 2469 paddd xmm1,xmm0 2470 pcmpeqd xmm0,xmm5 2471 movdqa XMMWORD[160+r10],xmm3 2472 movdqa xmm3,xmm4 2473 paddd xmm2,xmm1 2474 pcmpeqd xmm1,xmm5 2475 movdqa XMMWORD[176+r10],xmm0 2476 movdqa xmm0,xmm4 2477 2478 paddd xmm3,xmm2 2479 pcmpeqd xmm2,xmm5 2480 movdqa XMMWORD[192+r10],xmm1 2481 movdqa xmm1,xmm4 2482 2483 paddd xmm0,xmm3 2484 pcmpeqd xmm3,xmm5 2485 movdqa XMMWORD[208+r10],xmm2 2486 movdqa xmm2,xmm4 2487 2488 paddd xmm1,xmm0 2489 pcmpeqd xmm0,xmm5 2490 movdqa XMMWORD[224+r10],xmm3 2491 movdqa xmm3,xmm4 2492 paddd xmm2,xmm1 2493 pcmpeqd xmm1,xmm5 2494 movdqa XMMWORD[240+r10],xmm0 2495 movdqa xmm0,xmm4 2496 2497 paddd xmm3,xmm2 2498 pcmpeqd xmm2,xmm5 2499 movdqa XMMWORD[256+r10],xmm1 2500 movdqa xmm1,xmm4 2501 2502 paddd xmm0,xmm3 2503 pcmpeqd xmm3,xmm5 2504 movdqa XMMWORD[272+r10],xmm2 2505 movdqa xmm2,xmm4 2506 2507 paddd xmm1,xmm0 2508 pcmpeqd xmm0,xmm5 2509 movdqa XMMWORD[288+r10],xmm3 2510 movdqa xmm3,xmm4 2511DB 0x67 2512 paddd xmm2,xmm1 2513 pcmpeqd xmm1,xmm5 2514 movdqa XMMWORD[304+r10],xmm0 2515 2516 paddd xmm3,xmm2 2517 pcmpeqd xmm2,xmm5 2518 movdqa XMMWORD[320+r10],xmm1 2519 2520 pcmpeqd xmm3,xmm5 2521 movdqa XMMWORD[336+r10],xmm2 2522 2523 pand xmm0,XMMWORD[64+rdi] 2524 pand xmm1,XMMWORD[80+rdi] 2525 pand xmm2,XMMWORD[96+rdi] 2526 movdqa XMMWORD[352+r10],xmm3 2527 pand xmm3,XMMWORD[112+rdi] 2528 por xmm0,xmm2 2529 por xmm1,xmm3 2530 movdqa xmm4,XMMWORD[((-128))+rdi] 2531 movdqa xmm5,XMMWORD[((-112))+rdi] 2532 movdqa xmm2,XMMWORD[((-96))+rdi] 2533 pand xmm4,XMMWORD[112+r10] 2534 movdqa xmm3,XMMWORD[((-80))+rdi] 2535 pand xmm5,XMMWORD[128+r10] 2536 por xmm0,xmm4 2537 pand xmm2,XMMWORD[144+r10] 2538 por xmm1,xmm5 2539 pand xmm3,XMMWORD[160+r10] 2540 por xmm0,xmm2 2541 por xmm1,xmm3 2542 movdqa xmm4,XMMWORD[((-64))+rdi] 2543 movdqa xmm5,XMMWORD[((-48))+rdi] 2544 movdqa xmm2,XMMWORD[((-32))+rdi] 2545 pand xmm4,XMMWORD[176+r10] 2546 movdqa xmm3,XMMWORD[((-16))+rdi] 2547 pand xmm5,XMMWORD[192+r10] 2548 por xmm0,xmm4 2549 pand xmm2,XMMWORD[208+r10] 2550 por xmm1,xmm5 2551 pand xmm3,XMMWORD[224+r10] 2552 por xmm0,xmm2 2553 por xmm1,xmm3 2554 movdqa xmm4,XMMWORD[rdi] 2555 movdqa xmm5,XMMWORD[16+rdi] 2556 movdqa xmm2,XMMWORD[32+rdi] 2557 pand xmm4,XMMWORD[240+r10] 2558 movdqa xmm3,XMMWORD[48+rdi] 2559 pand xmm5,XMMWORD[256+r10] 2560 por xmm0,xmm4 2561 pand xmm2,XMMWORD[272+r10] 2562 por xmm1,xmm5 2563 pand xmm3,XMMWORD[288+r10] 2564 por xmm0,xmm2 2565 por xmm1,xmm3 2566 pxor xmm0,xmm1 2567 pshufd xmm1,xmm0,0x4e 2568 por xmm0,xmm1 2569 lea rdi,[256+rdi] 2570DB 102,72,15,126,194 2571 lea rbx,[((64+32+8))+rsp] 2572 2573 mov r9,rdx 2574 mulx rax,r8,QWORD[rsi] 2575 mulx r12,r11,QWORD[8+rsi] 2576 add r11,rax 2577 mulx r13,rax,QWORD[16+rsi] 2578 adc r12,rax 2579 adc r13,0 2580 mulx r14,rax,QWORD[24+rsi] 2581 2582 mov r15,r8 2583 imul r8,QWORD[((32+8))+rsp] 2584 xor rbp,rbp 2585 mov rdx,r8 2586 2587 mov QWORD[((8+8))+rsp],rdi 2588 2589 lea rsi,[32+rsi] 2590 adcx r13,rax 2591 adcx r14,rbp 2592 2593 mulx r10,rax,QWORD[rcx] 2594 adcx r15,rax 2595 adox r10,r11 2596 mulx r11,rax,QWORD[8+rcx] 2597 adcx r10,rax 2598 adox r11,r12 2599 mulx r12,rax,QWORD[16+rcx] 2600 mov rdi,QWORD[((24+8))+rsp] 2601 mov QWORD[((-32))+rbx],r10 2602 adcx r11,rax 2603 adox r12,r13 2604 mulx r15,rax,QWORD[24+rcx] 2605 mov rdx,r9 2606 mov QWORD[((-24))+rbx],r11 2607 adcx r12,rax 2608 adox r15,rbp 2609 lea rcx,[32+rcx] 2610 mov QWORD[((-16))+rbx],r12 2611 jmp NEAR $L$mulx4x_1st 2612 2613ALIGN 32 2614$L$mulx4x_1st: 2615 adcx r15,rbp 2616 mulx rax,r10,QWORD[rsi] 2617 adcx r10,r14 2618 mulx r14,r11,QWORD[8+rsi] 2619 adcx r11,rax 2620 mulx rax,r12,QWORD[16+rsi] 2621 adcx r12,r14 2622 mulx r14,r13,QWORD[24+rsi] 2623DB 0x67,0x67 2624 mov rdx,r8 2625 adcx r13,rax 2626 adcx r14,rbp 2627 lea rsi,[32+rsi] 2628 lea rbx,[32+rbx] 2629 2630 adox r10,r15 2631 mulx r15,rax,QWORD[rcx] 2632 adcx r10,rax 2633 adox r11,r15 2634 mulx r15,rax,QWORD[8+rcx] 2635 adcx r11,rax 2636 adox r12,r15 2637 mulx r15,rax,QWORD[16+rcx] 2638 mov QWORD[((-40))+rbx],r10 2639 adcx r12,rax 2640 mov QWORD[((-32))+rbx],r11 2641 adox r13,r15 2642 mulx r15,rax,QWORD[24+rcx] 2643 mov rdx,r9 2644 mov QWORD[((-24))+rbx],r12 2645 adcx r13,rax 2646 adox r15,rbp 2647 lea rcx,[32+rcx] 2648 mov QWORD[((-16))+rbx],r13 2649 2650 dec rdi 2651 jnz NEAR $L$mulx4x_1st 2652 2653 mov rax,QWORD[8+rsp] 2654 adc r15,rbp 2655 lea rsi,[rax*1+rsi] 2656 add r14,r15 2657 mov rdi,QWORD[((8+8))+rsp] 2658 adc rbp,rbp 2659 mov QWORD[((-8))+rbx],r14 2660 jmp NEAR $L$mulx4x_outer 2661 2662ALIGN 32 2663$L$mulx4x_outer: 2664 lea r10,[((16-256))+rbx] 2665 pxor xmm4,xmm4 2666DB 0x67,0x67 2667 pxor xmm5,xmm5 2668 movdqa xmm0,XMMWORD[((-128))+rdi] 2669 movdqa xmm1,XMMWORD[((-112))+rdi] 2670 movdqa xmm2,XMMWORD[((-96))+rdi] 2671 pand xmm0,XMMWORD[256+r10] 2672 movdqa xmm3,XMMWORD[((-80))+rdi] 2673 pand xmm1,XMMWORD[272+r10] 2674 por xmm4,xmm0 2675 pand xmm2,XMMWORD[288+r10] 2676 por xmm5,xmm1 2677 pand xmm3,XMMWORD[304+r10] 2678 por xmm4,xmm2 2679 por xmm5,xmm3 2680 movdqa xmm0,XMMWORD[((-64))+rdi] 2681 movdqa xmm1,XMMWORD[((-48))+rdi] 2682 movdqa xmm2,XMMWORD[((-32))+rdi] 2683 pand xmm0,XMMWORD[320+r10] 2684 movdqa xmm3,XMMWORD[((-16))+rdi] 2685 pand xmm1,XMMWORD[336+r10] 2686 por xmm4,xmm0 2687 pand xmm2,XMMWORD[352+r10] 2688 por xmm5,xmm1 2689 pand xmm3,XMMWORD[368+r10] 2690 por xmm4,xmm2 2691 por xmm5,xmm3 2692 movdqa xmm0,XMMWORD[rdi] 2693 movdqa xmm1,XMMWORD[16+rdi] 2694 movdqa xmm2,XMMWORD[32+rdi] 2695 pand xmm0,XMMWORD[384+r10] 2696 movdqa xmm3,XMMWORD[48+rdi] 2697 pand xmm1,XMMWORD[400+r10] 2698 por xmm4,xmm0 2699 pand xmm2,XMMWORD[416+r10] 2700 por xmm5,xmm1 2701 pand xmm3,XMMWORD[432+r10] 2702 por xmm4,xmm2 2703 por xmm5,xmm3 2704 movdqa xmm0,XMMWORD[64+rdi] 2705 movdqa xmm1,XMMWORD[80+rdi] 2706 movdqa xmm2,XMMWORD[96+rdi] 2707 pand xmm0,XMMWORD[448+r10] 2708 movdqa xmm3,XMMWORD[112+rdi] 2709 pand xmm1,XMMWORD[464+r10] 2710 por xmm4,xmm0 2711 pand xmm2,XMMWORD[480+r10] 2712 por xmm5,xmm1 2713 pand xmm3,XMMWORD[496+r10] 2714 por xmm4,xmm2 2715 por xmm5,xmm3 2716 por xmm4,xmm5 2717 pshufd xmm0,xmm4,0x4e 2718 por xmm0,xmm4 2719 lea rdi,[256+rdi] 2720DB 102,72,15,126,194 2721 2722 mov QWORD[rbx],rbp 2723 lea rbx,[32+rax*1+rbx] 2724 mulx r11,r8,QWORD[rsi] 2725 xor rbp,rbp 2726 mov r9,rdx 2727 mulx r12,r14,QWORD[8+rsi] 2728 adox r8,QWORD[((-32))+rbx] 2729 adcx r11,r14 2730 mulx r13,r15,QWORD[16+rsi] 2731 adox r11,QWORD[((-24))+rbx] 2732 adcx r12,r15 2733 mulx r14,rdx,QWORD[24+rsi] 2734 adox r12,QWORD[((-16))+rbx] 2735 adcx r13,rdx 2736 lea rcx,[rax*1+rcx] 2737 lea rsi,[32+rsi] 2738 adox r13,QWORD[((-8))+rbx] 2739 adcx r14,rbp 2740 adox r14,rbp 2741 2742 mov r15,r8 2743 imul r8,QWORD[((32+8))+rsp] 2744 2745 mov rdx,r8 2746 xor rbp,rbp 2747 mov QWORD[((8+8))+rsp],rdi 2748 2749 mulx r10,rax,QWORD[rcx] 2750 adcx r15,rax 2751 adox r10,r11 2752 mulx r11,rax,QWORD[8+rcx] 2753 adcx r10,rax 2754 adox r11,r12 2755 mulx r12,rax,QWORD[16+rcx] 2756 adcx r11,rax 2757 adox r12,r13 2758 mulx r15,rax,QWORD[24+rcx] 2759 mov rdx,r9 2760 mov rdi,QWORD[((24+8))+rsp] 2761 mov QWORD[((-32))+rbx],r10 2762 adcx r12,rax 2763 mov QWORD[((-24))+rbx],r11 2764 adox r15,rbp 2765 mov QWORD[((-16))+rbx],r12 2766 lea rcx,[32+rcx] 2767 jmp NEAR $L$mulx4x_inner 2768 2769ALIGN 32 2770$L$mulx4x_inner: 2771 mulx rax,r10,QWORD[rsi] 2772 adcx r15,rbp 2773 adox r10,r14 2774 mulx r14,r11,QWORD[8+rsi] 2775 adcx r10,QWORD[rbx] 2776 adox r11,rax 2777 mulx rax,r12,QWORD[16+rsi] 2778 adcx r11,QWORD[8+rbx] 2779 adox r12,r14 2780 mulx r14,r13,QWORD[24+rsi] 2781 mov rdx,r8 2782 adcx r12,QWORD[16+rbx] 2783 adox r13,rax 2784 adcx r13,QWORD[24+rbx] 2785 adox r14,rbp 2786 lea rsi,[32+rsi] 2787 lea rbx,[32+rbx] 2788 adcx r14,rbp 2789 2790 adox r10,r15 2791 mulx r15,rax,QWORD[rcx] 2792 adcx r10,rax 2793 adox r11,r15 2794 mulx r15,rax,QWORD[8+rcx] 2795 adcx r11,rax 2796 adox r12,r15 2797 mulx r15,rax,QWORD[16+rcx] 2798 mov QWORD[((-40))+rbx],r10 2799 adcx r12,rax 2800 adox r13,r15 2801 mov QWORD[((-32))+rbx],r11 2802 mulx r15,rax,QWORD[24+rcx] 2803 mov rdx,r9 2804 lea rcx,[32+rcx] 2805 mov QWORD[((-24))+rbx],r12 2806 adcx r13,rax 2807 adox r15,rbp 2808 mov QWORD[((-16))+rbx],r13 2809 2810 dec rdi 2811 jnz NEAR $L$mulx4x_inner 2812 2813 mov rax,QWORD[((0+8))+rsp] 2814 adc r15,rbp 2815 sub rdi,QWORD[rbx] 2816 mov rdi,QWORD[((8+8))+rsp] 2817 mov r10,QWORD[((16+8))+rsp] 2818 adc r14,r15 2819 lea rsi,[rax*1+rsi] 2820 adc rbp,rbp 2821 mov QWORD[((-8))+rbx],r14 2822 2823 cmp rdi,r10 2824 jb NEAR $L$mulx4x_outer 2825 2826 mov r10,QWORD[((-8))+rcx] 2827 mov r8,rbp 2828 mov r12,QWORD[rax*1+rcx] 2829 lea rbp,[rax*1+rcx] 2830 mov rcx,rax 2831 lea rdi,[rax*1+rbx] 2832 xor eax,eax 2833 xor r15,r15 2834 sub r10,r14 2835 adc r15,r15 2836 or r8,r15 2837 sar rcx,3+2 2838 sub rax,r8 2839 mov rdx,QWORD[((56+8))+rsp] 2840 dec r12 2841 mov r13,QWORD[8+rbp] 2842 xor r8,r8 2843 mov r14,QWORD[16+rbp] 2844 mov r15,QWORD[24+rbp] 2845 jmp NEAR $L$sqrx4x_sub_entry 2846 2847 2848 2849ALIGN 32 2850bn_powerx5: 2851 mov QWORD[8+rsp],rdi ;WIN64 prologue 2852 mov QWORD[16+rsp],rsi 2853 mov rax,rsp 2854$L$SEH_begin_bn_powerx5: 2855 mov rdi,rcx 2856 mov rsi,rdx 2857 mov rdx,r8 2858 mov rcx,r9 2859 mov r8,QWORD[40+rsp] 2860 mov r9,QWORD[48+rsp] 2861 2862 2863 2864 mov rax,rsp 2865 2866$L$powerx5_enter: 2867 push rbx 2868 2869 push rbp 2870 2871 push r12 2872 2873 push r13 2874 2875 push r14 2876 2877 push r15 2878 2879$L$powerx5_prologue: 2880 2881 shl r9d,3 2882 lea r10,[r9*2+r9] 2883 neg r9 2884 mov r8,QWORD[r8] 2885 2886 2887 2888 2889 2890 2891 2892 2893 lea r11,[((-320))+r9*2+rsp] 2894 mov rbp,rsp 2895 sub r11,rdi 2896 and r11,4095 2897 cmp r10,r11 2898 jb NEAR $L$pwrx_sp_alt 2899 sub rbp,r11 2900 lea rbp,[((-320))+r9*2+rbp] 2901 jmp NEAR $L$pwrx_sp_done 2902 2903ALIGN 32 2904$L$pwrx_sp_alt: 2905 lea r10,[((4096-320))+r9*2] 2906 lea rbp,[((-320))+r9*2+rbp] 2907 sub r11,r10 2908 mov r10,0 2909 cmovc r11,r10 2910 sub rbp,r11 2911$L$pwrx_sp_done: 2912 and rbp,-64 2913 mov r11,rsp 2914 sub r11,rbp 2915 and r11,-4096 2916 lea rsp,[rbp*1+r11] 2917 mov r10,QWORD[rsp] 2918 cmp rsp,rbp 2919 ja NEAR $L$pwrx_page_walk 2920 jmp NEAR $L$pwrx_page_walk_done 2921 2922$L$pwrx_page_walk: 2923 lea rsp,[((-4096))+rsp] 2924 mov r10,QWORD[rsp] 2925 cmp rsp,rbp 2926 ja NEAR $L$pwrx_page_walk 2927$L$pwrx_page_walk_done: 2928 2929 mov r10,r9 2930 neg r9 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 pxor xmm0,xmm0 2944DB 102,72,15,110,207 2945DB 102,72,15,110,209 2946DB 102,73,15,110,218 2947DB 102,72,15,110,226 2948 mov QWORD[32+rsp],r8 2949 mov QWORD[40+rsp],rax 2950 2951$L$powerx5_body: 2952 2953 call __bn_sqrx8x_internal 2954 call __bn_postx4x_internal 2955 call __bn_sqrx8x_internal 2956 call __bn_postx4x_internal 2957 call __bn_sqrx8x_internal 2958 call __bn_postx4x_internal 2959 call __bn_sqrx8x_internal 2960 call __bn_postx4x_internal 2961 call __bn_sqrx8x_internal 2962 call __bn_postx4x_internal 2963 2964 mov r9,r10 2965 mov rdi,rsi 2966DB 102,72,15,126,209 2967DB 102,72,15,126,226 2968 mov rax,QWORD[40+rsp] 2969 2970 call mulx4x_internal 2971 2972 mov rsi,QWORD[40+rsp] 2973 2974 mov rax,1 2975 2976 mov r15,QWORD[((-48))+rsi] 2977 2978 mov r14,QWORD[((-40))+rsi] 2979 2980 mov r13,QWORD[((-32))+rsi] 2981 2982 mov r12,QWORD[((-24))+rsi] 2983 2984 mov rbp,QWORD[((-16))+rsi] 2985 2986 mov rbx,QWORD[((-8))+rsi] 2987 2988 lea rsp,[rsi] 2989 2990$L$powerx5_epilogue: 2991 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2992 mov rsi,QWORD[16+rsp] 2993 DB 0F3h,0C3h ;repret 2994 2995$L$SEH_end_bn_powerx5: 2996 2997global bn_sqrx8x_internal 2998 2999ALIGN 32 3000bn_sqrx8x_internal: 3001__bn_sqrx8x_internal: 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 lea rdi,[((48+8))+rsp] 3044 lea rbp,[r9*1+rsi] 3045 mov QWORD[((0+8))+rsp],r9 3046 mov QWORD[((8+8))+rsp],rbp 3047 jmp NEAR $L$sqr8x_zero_start 3048 3049ALIGN 32 3050DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 3051$L$sqrx8x_zero: 3052DB 0x3e 3053 movdqa XMMWORD[rdi],xmm0 3054 movdqa XMMWORD[16+rdi],xmm0 3055 movdqa XMMWORD[32+rdi],xmm0 3056 movdqa XMMWORD[48+rdi],xmm0 3057$L$sqr8x_zero_start: 3058 movdqa XMMWORD[64+rdi],xmm0 3059 movdqa XMMWORD[80+rdi],xmm0 3060 movdqa XMMWORD[96+rdi],xmm0 3061 movdqa XMMWORD[112+rdi],xmm0 3062 lea rdi,[128+rdi] 3063 sub r9,64 3064 jnz NEAR $L$sqrx8x_zero 3065 3066 mov rdx,QWORD[rsi] 3067 3068 xor r10,r10 3069 xor r11,r11 3070 xor r12,r12 3071 xor r13,r13 3072 xor r14,r14 3073 xor r15,r15 3074 lea rdi,[((48+8))+rsp] 3075 xor rbp,rbp 3076 jmp NEAR $L$sqrx8x_outer_loop 3077 3078ALIGN 32 3079$L$sqrx8x_outer_loop: 3080 mulx rax,r8,QWORD[8+rsi] 3081 adcx r8,r9 3082 adox r10,rax 3083 mulx rax,r9,QWORD[16+rsi] 3084 adcx r9,r10 3085 adox r11,rax 3086DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 3087 adcx r10,r11 3088 adox r12,rax 3089DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 3090 adcx r11,r12 3091 adox r13,rax 3092 mulx rax,r12,QWORD[40+rsi] 3093 adcx r12,r13 3094 adox r14,rax 3095 mulx rax,r13,QWORD[48+rsi] 3096 adcx r13,r14 3097 adox rax,r15 3098 mulx r15,r14,QWORD[56+rsi] 3099 mov rdx,QWORD[8+rsi] 3100 adcx r14,rax 3101 adox r15,rbp 3102 adc r15,QWORD[64+rdi] 3103 mov QWORD[8+rdi],r8 3104 mov QWORD[16+rdi],r9 3105 sbb rcx,rcx 3106 xor rbp,rbp 3107 3108 3109 mulx rbx,r8,QWORD[16+rsi] 3110 mulx rax,r9,QWORD[24+rsi] 3111 adcx r8,r10 3112 adox r9,rbx 3113 mulx rbx,r10,QWORD[32+rsi] 3114 adcx r9,r11 3115 adox r10,rax 3116DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 3117 adcx r10,r12 3118 adox r11,rbx 3119DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 3120 adcx r11,r13 3121 adox r12,r14 3122DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 3123 mov rdx,QWORD[16+rsi] 3124 adcx r12,rax 3125 adox r13,rbx 3126 adcx r13,r15 3127 adox r14,rbp 3128 adcx r14,rbp 3129 3130 mov QWORD[24+rdi],r8 3131 mov QWORD[32+rdi],r9 3132 3133 mulx rbx,r8,QWORD[24+rsi] 3134 mulx rax,r9,QWORD[32+rsi] 3135 adcx r8,r10 3136 adox r9,rbx 3137 mulx rbx,r10,QWORD[40+rsi] 3138 adcx r9,r11 3139 adox r10,rax 3140DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 3141 adcx r10,r12 3142 adox r11,r13 3143DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 3144DB 0x3e 3145 mov rdx,QWORD[24+rsi] 3146 adcx r11,rbx 3147 adox r12,rax 3148 adcx r12,r14 3149 mov QWORD[40+rdi],r8 3150 mov QWORD[48+rdi],r9 3151 mulx rax,r8,QWORD[32+rsi] 3152 adox r13,rbp 3153 adcx r13,rbp 3154 3155 mulx rbx,r9,QWORD[40+rsi] 3156 adcx r8,r10 3157 adox r9,rax 3158 mulx rax,r10,QWORD[48+rsi] 3159 adcx r9,r11 3160 adox r10,r12 3161 mulx r12,r11,QWORD[56+rsi] 3162 mov rdx,QWORD[32+rsi] 3163 mov r14,QWORD[40+rsi] 3164 adcx r10,rbx 3165 adox r11,rax 3166 mov r15,QWORD[48+rsi] 3167 adcx r11,r13 3168 adox r12,rbp 3169 adcx r12,rbp 3170 3171 mov QWORD[56+rdi],r8 3172 mov QWORD[64+rdi],r9 3173 3174 mulx rax,r9,r14 3175 mov r8,QWORD[56+rsi] 3176 adcx r9,r10 3177 mulx rbx,r10,r15 3178 adox r10,rax 3179 adcx r10,r11 3180 mulx rax,r11,r8 3181 mov rdx,r14 3182 adox r11,rbx 3183 adcx r11,r12 3184 3185 adcx rax,rbp 3186 3187 mulx rbx,r14,r15 3188 mulx r13,r12,r8 3189 mov rdx,r15 3190 lea rsi,[64+rsi] 3191 adcx r11,r14 3192 adox r12,rbx 3193 adcx r12,rax 3194 adox r13,rbp 3195 3196DB 0x67,0x67 3197 mulx r14,r8,r8 3198 adcx r13,r8 3199 adcx r14,rbp 3200 3201 cmp rsi,QWORD[((8+8))+rsp] 3202 je NEAR $L$sqrx8x_outer_break 3203 3204 neg rcx 3205 mov rcx,-8 3206 mov r15,rbp 3207 mov r8,QWORD[64+rdi] 3208 adcx r9,QWORD[72+rdi] 3209 adcx r10,QWORD[80+rdi] 3210 adcx r11,QWORD[88+rdi] 3211 adc r12,QWORD[96+rdi] 3212 adc r13,QWORD[104+rdi] 3213 adc r14,QWORD[112+rdi] 3214 adc r15,QWORD[120+rdi] 3215 lea rbp,[rsi] 3216 lea rdi,[128+rdi] 3217 sbb rax,rax 3218 3219 mov rdx,QWORD[((-64))+rsi] 3220 mov QWORD[((16+8))+rsp],rax 3221 mov QWORD[((24+8))+rsp],rdi 3222 3223 3224 xor eax,eax 3225 jmp NEAR $L$sqrx8x_loop 3226 3227ALIGN 32 3228$L$sqrx8x_loop: 3229 mov rbx,r8 3230 mulx r8,rax,QWORD[rbp] 3231 adcx rbx,rax 3232 adox r8,r9 3233 3234 mulx r9,rax,QWORD[8+rbp] 3235 adcx r8,rax 3236 adox r9,r10 3237 3238 mulx r10,rax,QWORD[16+rbp] 3239 adcx r9,rax 3240 adox r10,r11 3241 3242 mulx r11,rax,QWORD[24+rbp] 3243 adcx r10,rax 3244 adox r11,r12 3245 3246DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3247 adcx r11,rax 3248 adox r12,r13 3249 3250 mulx r13,rax,QWORD[40+rbp] 3251 adcx r12,rax 3252 adox r13,r14 3253 3254 mulx r14,rax,QWORD[48+rbp] 3255 mov QWORD[rcx*8+rdi],rbx 3256 mov ebx,0 3257 adcx r13,rax 3258 adox r14,r15 3259 3260DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3261 mov rdx,QWORD[8+rcx*8+rsi] 3262 adcx r14,rax 3263 adox r15,rbx 3264 adcx r15,rbx 3265 3266DB 0x67 3267 inc rcx 3268 jnz NEAR $L$sqrx8x_loop 3269 3270 lea rbp,[64+rbp] 3271 mov rcx,-8 3272 cmp rbp,QWORD[((8+8))+rsp] 3273 je NEAR $L$sqrx8x_break 3274 3275 sub rbx,QWORD[((16+8))+rsp] 3276DB 0x66 3277 mov rdx,QWORD[((-64))+rsi] 3278 adcx r8,QWORD[rdi] 3279 adcx r9,QWORD[8+rdi] 3280 adc r10,QWORD[16+rdi] 3281 adc r11,QWORD[24+rdi] 3282 adc r12,QWORD[32+rdi] 3283 adc r13,QWORD[40+rdi] 3284 adc r14,QWORD[48+rdi] 3285 adc r15,QWORD[56+rdi] 3286 lea rdi,[64+rdi] 3287DB 0x67 3288 sbb rax,rax 3289 xor ebx,ebx 3290 mov QWORD[((16+8))+rsp],rax 3291 jmp NEAR $L$sqrx8x_loop 3292 3293ALIGN 32 3294$L$sqrx8x_break: 3295 xor rbp,rbp 3296 sub rbx,QWORD[((16+8))+rsp] 3297 adcx r8,rbp 3298 mov rcx,QWORD[((24+8))+rsp] 3299 adcx r9,rbp 3300 mov rdx,QWORD[rsi] 3301 adc r10,0 3302 mov QWORD[rdi],r8 3303 adc r11,0 3304 adc r12,0 3305 adc r13,0 3306 adc r14,0 3307 adc r15,0 3308 cmp rdi,rcx 3309 je NEAR $L$sqrx8x_outer_loop 3310 3311 mov QWORD[8+rdi],r9 3312 mov r9,QWORD[8+rcx] 3313 mov QWORD[16+rdi],r10 3314 mov r10,QWORD[16+rcx] 3315 mov QWORD[24+rdi],r11 3316 mov r11,QWORD[24+rcx] 3317 mov QWORD[32+rdi],r12 3318 mov r12,QWORD[32+rcx] 3319 mov QWORD[40+rdi],r13 3320 mov r13,QWORD[40+rcx] 3321 mov QWORD[48+rdi],r14 3322 mov r14,QWORD[48+rcx] 3323 mov QWORD[56+rdi],r15 3324 mov r15,QWORD[56+rcx] 3325 mov rdi,rcx 3326 jmp NEAR $L$sqrx8x_outer_loop 3327 3328ALIGN 32 3329$L$sqrx8x_outer_break: 3330 mov QWORD[72+rdi],r9 3331DB 102,72,15,126,217 3332 mov QWORD[80+rdi],r10 3333 mov QWORD[88+rdi],r11 3334 mov QWORD[96+rdi],r12 3335 mov QWORD[104+rdi],r13 3336 mov QWORD[112+rdi],r14 3337 lea rdi,[((48+8))+rsp] 3338 mov rdx,QWORD[rcx*1+rsi] 3339 3340 mov r11,QWORD[8+rdi] 3341 xor r10,r10 3342 mov r9,QWORD[((0+8))+rsp] 3343 adox r11,r11 3344 mov r12,QWORD[16+rdi] 3345 mov r13,QWORD[24+rdi] 3346 3347 3348ALIGN 32 3349$L$sqrx4x_shift_n_add: 3350 mulx rbx,rax,rdx 3351 adox r12,r12 3352 adcx rax,r10 3353DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3354DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3355 adox r13,r13 3356 adcx rbx,r11 3357 mov r11,QWORD[40+rdi] 3358 mov QWORD[rdi],rax 3359 mov QWORD[8+rdi],rbx 3360 3361 mulx rbx,rax,rdx 3362 adox r10,r10 3363 adcx rax,r12 3364 mov rdx,QWORD[16+rcx*1+rsi] 3365 mov r12,QWORD[48+rdi] 3366 adox r11,r11 3367 adcx rbx,r13 3368 mov r13,QWORD[56+rdi] 3369 mov QWORD[16+rdi],rax 3370 mov QWORD[24+rdi],rbx 3371 3372 mulx rbx,rax,rdx 3373 adox r12,r12 3374 adcx rax,r10 3375 mov rdx,QWORD[24+rcx*1+rsi] 3376 lea rcx,[32+rcx] 3377 mov r10,QWORD[64+rdi] 3378 adox r13,r13 3379 adcx rbx,r11 3380 mov r11,QWORD[72+rdi] 3381 mov QWORD[32+rdi],rax 3382 mov QWORD[40+rdi],rbx 3383 3384 mulx rbx,rax,rdx 3385 adox r10,r10 3386 adcx rax,r12 3387 jrcxz $L$sqrx4x_shift_n_add_break 3388DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3389 adox r11,r11 3390 adcx rbx,r13 3391 mov r12,QWORD[80+rdi] 3392 mov r13,QWORD[88+rdi] 3393 mov QWORD[48+rdi],rax 3394 mov QWORD[56+rdi],rbx 3395 lea rdi,[64+rdi] 3396 nop 3397 jmp NEAR $L$sqrx4x_shift_n_add 3398 3399ALIGN 32 3400$L$sqrx4x_shift_n_add_break: 3401 adcx rbx,r13 3402 mov QWORD[48+rdi],rax 3403 mov QWORD[56+rdi],rbx 3404 lea rdi,[64+rdi] 3405DB 102,72,15,126,213 3406__bn_sqrx8x_reduction: 3407 xor eax,eax 3408 mov rbx,QWORD[((32+8))+rsp] 3409 mov rdx,QWORD[((48+8))+rsp] 3410 lea rcx,[((-64))+r9*1+rbp] 3411 3412 mov QWORD[((0+8))+rsp],rcx 3413 mov QWORD[((8+8))+rsp],rdi 3414 3415 lea rdi,[((48+8))+rsp] 3416 jmp NEAR $L$sqrx8x_reduction_loop 3417 3418ALIGN 32 3419$L$sqrx8x_reduction_loop: 3420 mov r9,QWORD[8+rdi] 3421 mov r10,QWORD[16+rdi] 3422 mov r11,QWORD[24+rdi] 3423 mov r12,QWORD[32+rdi] 3424 mov r8,rdx 3425 imul rdx,rbx 3426 mov r13,QWORD[40+rdi] 3427 mov r14,QWORD[48+rdi] 3428 mov r15,QWORD[56+rdi] 3429 mov QWORD[((24+8))+rsp],rax 3430 3431 lea rdi,[64+rdi] 3432 xor rsi,rsi 3433 mov rcx,-8 3434 jmp NEAR $L$sqrx8x_reduce 3435 3436ALIGN 32 3437$L$sqrx8x_reduce: 3438 mov rbx,r8 3439 mulx r8,rax,QWORD[rbp] 3440 adcx rax,rbx 3441 adox r8,r9 3442 3443 mulx r9,rbx,QWORD[8+rbp] 3444 adcx r8,rbx 3445 adox r9,r10 3446 3447 mulx r10,rbx,QWORD[16+rbp] 3448 adcx r9,rbx 3449 adox r10,r11 3450 3451 mulx r11,rbx,QWORD[24+rbp] 3452 adcx r10,rbx 3453 adox r11,r12 3454 3455DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3456 mov rax,rdx 3457 mov rdx,r8 3458 adcx r11,rbx 3459 adox r12,r13 3460 3461 mulx rdx,rbx,QWORD[((32+8))+rsp] 3462 mov rdx,rax 3463 mov QWORD[((64+48+8))+rcx*8+rsp],rax 3464 3465 mulx r13,rax,QWORD[40+rbp] 3466 adcx r12,rax 3467 adox r13,r14 3468 3469 mulx r14,rax,QWORD[48+rbp] 3470 adcx r13,rax 3471 adox r14,r15 3472 3473 mulx r15,rax,QWORD[56+rbp] 3474 mov rdx,rbx 3475 adcx r14,rax 3476 adox r15,rsi 3477 adcx r15,rsi 3478 3479DB 0x67,0x67,0x67 3480 inc rcx 3481 jnz NEAR $L$sqrx8x_reduce 3482 3483 mov rax,rsi 3484 cmp rbp,QWORD[((0+8))+rsp] 3485 jae NEAR $L$sqrx8x_no_tail 3486 3487 mov rdx,QWORD[((48+8))+rsp] 3488 add r8,QWORD[rdi] 3489 lea rbp,[64+rbp] 3490 mov rcx,-8 3491 adcx r9,QWORD[8+rdi] 3492 adcx r10,QWORD[16+rdi] 3493 adc r11,QWORD[24+rdi] 3494 adc r12,QWORD[32+rdi] 3495 adc r13,QWORD[40+rdi] 3496 adc r14,QWORD[48+rdi] 3497 adc r15,QWORD[56+rdi] 3498 lea rdi,[64+rdi] 3499 sbb rax,rax 3500 3501 xor rsi,rsi 3502 mov QWORD[((16+8))+rsp],rax 3503 jmp NEAR $L$sqrx8x_tail 3504 3505ALIGN 32 3506$L$sqrx8x_tail: 3507 mov rbx,r8 3508 mulx r8,rax,QWORD[rbp] 3509 adcx rbx,rax 3510 adox r8,r9 3511 3512 mulx r9,rax,QWORD[8+rbp] 3513 adcx r8,rax 3514 adox r9,r10 3515 3516 mulx r10,rax,QWORD[16+rbp] 3517 adcx r9,rax 3518 adox r10,r11 3519 3520 mulx r11,rax,QWORD[24+rbp] 3521 adcx r10,rax 3522 adox r11,r12 3523 3524DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3525 adcx r11,rax 3526 adox r12,r13 3527 3528 mulx r13,rax,QWORD[40+rbp] 3529 adcx r12,rax 3530 adox r13,r14 3531 3532 mulx r14,rax,QWORD[48+rbp] 3533 adcx r13,rax 3534 adox r14,r15 3535 3536 mulx r15,rax,QWORD[56+rbp] 3537 mov rdx,QWORD[((72+48+8))+rcx*8+rsp] 3538 adcx r14,rax 3539 adox r15,rsi 3540 mov QWORD[rcx*8+rdi],rbx 3541 mov rbx,r8 3542 adcx r15,rsi 3543 3544 inc rcx 3545 jnz NEAR $L$sqrx8x_tail 3546 3547 cmp rbp,QWORD[((0+8))+rsp] 3548 jae NEAR $L$sqrx8x_tail_done 3549 3550 sub rsi,QWORD[((16+8))+rsp] 3551 mov rdx,QWORD[((48+8))+rsp] 3552 lea rbp,[64+rbp] 3553 adc r8,QWORD[rdi] 3554 adc r9,QWORD[8+rdi] 3555 adc r10,QWORD[16+rdi] 3556 adc r11,QWORD[24+rdi] 3557 adc r12,QWORD[32+rdi] 3558 adc r13,QWORD[40+rdi] 3559 adc r14,QWORD[48+rdi] 3560 adc r15,QWORD[56+rdi] 3561 lea rdi,[64+rdi] 3562 sbb rax,rax 3563 sub rcx,8 3564 3565 xor rsi,rsi 3566 mov QWORD[((16+8))+rsp],rax 3567 jmp NEAR $L$sqrx8x_tail 3568 3569ALIGN 32 3570$L$sqrx8x_tail_done: 3571 xor rax,rax 3572 add r8,QWORD[((24+8))+rsp] 3573 adc r9,0 3574 adc r10,0 3575 adc r11,0 3576 adc r12,0 3577 adc r13,0 3578 adc r14,0 3579 adc r15,0 3580 adc rax,0 3581 3582 sub rsi,QWORD[((16+8))+rsp] 3583$L$sqrx8x_no_tail: 3584 adc r8,QWORD[rdi] 3585DB 102,72,15,126,217 3586 adc r9,QWORD[8+rdi] 3587 mov rsi,QWORD[56+rbp] 3588DB 102,72,15,126,213 3589 adc r10,QWORD[16+rdi] 3590 adc r11,QWORD[24+rdi] 3591 adc r12,QWORD[32+rdi] 3592 adc r13,QWORD[40+rdi] 3593 adc r14,QWORD[48+rdi] 3594 adc r15,QWORD[56+rdi] 3595 adc rax,0 3596 3597 mov rbx,QWORD[((32+8))+rsp] 3598 mov rdx,QWORD[64+rcx*1+rdi] 3599 3600 mov QWORD[rdi],r8 3601 lea r8,[64+rdi] 3602 mov QWORD[8+rdi],r9 3603 mov QWORD[16+rdi],r10 3604 mov QWORD[24+rdi],r11 3605 mov QWORD[32+rdi],r12 3606 mov QWORD[40+rdi],r13 3607 mov QWORD[48+rdi],r14 3608 mov QWORD[56+rdi],r15 3609 3610 lea rdi,[64+rcx*1+rdi] 3611 cmp r8,QWORD[((8+8))+rsp] 3612 jb NEAR $L$sqrx8x_reduction_loop 3613 DB 0F3h,0C3h ;repret 3614 3615 3616ALIGN 32 3617 3618__bn_postx4x_internal: 3619 3620 mov r12,QWORD[rbp] 3621 mov r10,rcx 3622 mov r9,rcx 3623 neg rax 3624 sar rcx,3+2 3625 3626DB 102,72,15,126,202 3627DB 102,72,15,126,206 3628 dec r12 3629 mov r13,QWORD[8+rbp] 3630 xor r8,r8 3631 mov r14,QWORD[16+rbp] 3632 mov r15,QWORD[24+rbp] 3633 jmp NEAR $L$sqrx4x_sub_entry 3634 3635ALIGN 16 3636$L$sqrx4x_sub: 3637 mov r12,QWORD[rbp] 3638 mov r13,QWORD[8+rbp] 3639 mov r14,QWORD[16+rbp] 3640 mov r15,QWORD[24+rbp] 3641$L$sqrx4x_sub_entry: 3642 andn r12,r12,rax 3643 lea rbp,[32+rbp] 3644 andn r13,r13,rax 3645 andn r14,r14,rax 3646 andn r15,r15,rax 3647 3648 neg r8 3649 adc r12,QWORD[rdi] 3650 adc r13,QWORD[8+rdi] 3651 adc r14,QWORD[16+rdi] 3652 adc r15,QWORD[24+rdi] 3653 mov QWORD[rdx],r12 3654 lea rdi,[32+rdi] 3655 mov QWORD[8+rdx],r13 3656 sbb r8,r8 3657 mov QWORD[16+rdx],r14 3658 mov QWORD[24+rdx],r15 3659 lea rdx,[32+rdx] 3660 3661 inc rcx 3662 jnz NEAR $L$sqrx4x_sub 3663 3664 neg r9 3665 3666 DB 0F3h,0C3h ;repret 3667 3668 3669global bn_scatter5 3670 3671ALIGN 16 3672bn_scatter5: 3673 3674 cmp edx,0 3675 jz NEAR $L$scatter_epilogue 3676 lea r8,[r9*8+r8] 3677$L$scatter: 3678 mov rax,QWORD[rcx] 3679 lea rcx,[8+rcx] 3680 mov QWORD[r8],rax 3681 lea r8,[256+r8] 3682 sub edx,1 3683 jnz NEAR $L$scatter 3684$L$scatter_epilogue: 3685 DB 0F3h,0C3h ;repret 3686 3687 3688 3689global bn_gather5 3690 3691ALIGN 32 3692bn_gather5: 3693 3694$L$SEH_begin_bn_gather5: 3695 3696DB 0x4c,0x8d,0x14,0x24 3697 3698DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3699 lea rax,[$L$inc] 3700 and rsp,-16 3701 3702 movd xmm5,r9d 3703 movdqa xmm0,XMMWORD[rax] 3704 movdqa xmm1,XMMWORD[16+rax] 3705 lea r11,[128+r8] 3706 lea rax,[128+rsp] 3707 3708 pshufd xmm5,xmm5,0 3709 movdqa xmm4,xmm1 3710 movdqa xmm2,xmm1 3711 paddd xmm1,xmm0 3712 pcmpeqd xmm0,xmm5 3713 movdqa xmm3,xmm4 3714 3715 paddd xmm2,xmm1 3716 pcmpeqd xmm1,xmm5 3717 movdqa XMMWORD[(-128)+rax],xmm0 3718 movdqa xmm0,xmm4 3719 3720 paddd xmm3,xmm2 3721 pcmpeqd xmm2,xmm5 3722 movdqa XMMWORD[(-112)+rax],xmm1 3723 movdqa xmm1,xmm4 3724 3725 paddd xmm0,xmm3 3726 pcmpeqd xmm3,xmm5 3727 movdqa XMMWORD[(-96)+rax],xmm2 3728 movdqa xmm2,xmm4 3729 paddd xmm1,xmm0 3730 pcmpeqd xmm0,xmm5 3731 movdqa XMMWORD[(-80)+rax],xmm3 3732 movdqa xmm3,xmm4 3733 3734 paddd xmm2,xmm1 3735 pcmpeqd xmm1,xmm5 3736 movdqa XMMWORD[(-64)+rax],xmm0 3737 movdqa xmm0,xmm4 3738 3739 paddd xmm3,xmm2 3740 pcmpeqd xmm2,xmm5 3741 movdqa XMMWORD[(-48)+rax],xmm1 3742 movdqa xmm1,xmm4 3743 3744 paddd xmm0,xmm3 3745 pcmpeqd xmm3,xmm5 3746 movdqa XMMWORD[(-32)+rax],xmm2 3747 movdqa xmm2,xmm4 3748 paddd xmm1,xmm0 3749 pcmpeqd xmm0,xmm5 3750 movdqa XMMWORD[(-16)+rax],xmm3 3751 movdqa xmm3,xmm4 3752 3753 paddd xmm2,xmm1 3754 pcmpeqd xmm1,xmm5 3755 movdqa XMMWORD[rax],xmm0 3756 movdqa xmm0,xmm4 3757 3758 paddd xmm3,xmm2 3759 pcmpeqd xmm2,xmm5 3760 movdqa XMMWORD[16+rax],xmm1 3761 movdqa xmm1,xmm4 3762 3763 paddd xmm0,xmm3 3764 pcmpeqd xmm3,xmm5 3765 movdqa XMMWORD[32+rax],xmm2 3766 movdqa xmm2,xmm4 3767 paddd xmm1,xmm0 3768 pcmpeqd xmm0,xmm5 3769 movdqa XMMWORD[48+rax],xmm3 3770 movdqa xmm3,xmm4 3771 3772 paddd xmm2,xmm1 3773 pcmpeqd xmm1,xmm5 3774 movdqa XMMWORD[64+rax],xmm0 3775 movdqa xmm0,xmm4 3776 3777 paddd xmm3,xmm2 3778 pcmpeqd xmm2,xmm5 3779 movdqa XMMWORD[80+rax],xmm1 3780 movdqa xmm1,xmm4 3781 3782 paddd xmm0,xmm3 3783 pcmpeqd xmm3,xmm5 3784 movdqa XMMWORD[96+rax],xmm2 3785 movdqa xmm2,xmm4 3786 movdqa XMMWORD[112+rax],xmm3 3787 jmp NEAR $L$gather 3788 3789ALIGN 32 3790$L$gather: 3791 pxor xmm4,xmm4 3792 pxor xmm5,xmm5 3793 movdqa xmm0,XMMWORD[((-128))+r11] 3794 movdqa xmm1,XMMWORD[((-112))+r11] 3795 movdqa xmm2,XMMWORD[((-96))+r11] 3796 pand xmm0,XMMWORD[((-128))+rax] 3797 movdqa xmm3,XMMWORD[((-80))+r11] 3798 pand xmm1,XMMWORD[((-112))+rax] 3799 por xmm4,xmm0 3800 pand xmm2,XMMWORD[((-96))+rax] 3801 por xmm5,xmm1 3802 pand xmm3,XMMWORD[((-80))+rax] 3803 por xmm4,xmm2 3804 por xmm5,xmm3 3805 movdqa xmm0,XMMWORD[((-64))+r11] 3806 movdqa xmm1,XMMWORD[((-48))+r11] 3807 movdqa xmm2,XMMWORD[((-32))+r11] 3808 pand xmm0,XMMWORD[((-64))+rax] 3809 movdqa xmm3,XMMWORD[((-16))+r11] 3810 pand xmm1,XMMWORD[((-48))+rax] 3811 por xmm4,xmm0 3812 pand xmm2,XMMWORD[((-32))+rax] 3813 por xmm5,xmm1 3814 pand xmm3,XMMWORD[((-16))+rax] 3815 por xmm4,xmm2 3816 por xmm5,xmm3 3817 movdqa xmm0,XMMWORD[r11] 3818 movdqa xmm1,XMMWORD[16+r11] 3819 movdqa xmm2,XMMWORD[32+r11] 3820 pand xmm0,XMMWORD[rax] 3821 movdqa xmm3,XMMWORD[48+r11] 3822 pand xmm1,XMMWORD[16+rax] 3823 por xmm4,xmm0 3824 pand xmm2,XMMWORD[32+rax] 3825 por xmm5,xmm1 3826 pand xmm3,XMMWORD[48+rax] 3827 por xmm4,xmm2 3828 por xmm5,xmm3 3829 movdqa xmm0,XMMWORD[64+r11] 3830 movdqa xmm1,XMMWORD[80+r11] 3831 movdqa xmm2,XMMWORD[96+r11] 3832 pand xmm0,XMMWORD[64+rax] 3833 movdqa xmm3,XMMWORD[112+r11] 3834 pand xmm1,XMMWORD[80+rax] 3835 por xmm4,xmm0 3836 pand xmm2,XMMWORD[96+rax] 3837 por xmm5,xmm1 3838 pand xmm3,XMMWORD[112+rax] 3839 por xmm4,xmm2 3840 por xmm5,xmm3 3841 por xmm4,xmm5 3842 lea r11,[256+r11] 3843 pshufd xmm0,xmm4,0x4e 3844 por xmm0,xmm4 3845 movq QWORD[rcx],xmm0 3846 lea rcx,[8+rcx] 3847 sub edx,1 3848 jnz NEAR $L$gather 3849 3850 lea rsp,[r10] 3851 3852 DB 0F3h,0C3h ;repret 3853$L$SEH_end_bn_gather5: 3854 3855 3856ALIGN 64 3857$L$inc: 3858 DD 0,0,1,1 3859 DD 2,2,2,2 3860DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 3861DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 3862DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 3863DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 3864DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 3865DB 112,101,110,115,115,108,46,111,114,103,62,0 3866EXTERN __imp_RtlVirtualUnwind 3867 3868ALIGN 16 3869mul_handler: 3870 push rsi 3871 push rdi 3872 push rbx 3873 push rbp 3874 push r12 3875 push r13 3876 push r14 3877 push r15 3878 pushfq 3879 sub rsp,64 3880 3881 mov rax,QWORD[120+r8] 3882 mov rbx,QWORD[248+r8] 3883 3884 mov rsi,QWORD[8+r9] 3885 mov r11,QWORD[56+r9] 3886 3887 mov r10d,DWORD[r11] 3888 lea r10,[r10*1+rsi] 3889 cmp rbx,r10 3890 jb NEAR $L$common_seh_tail 3891 3892 mov r10d,DWORD[4+r11] 3893 lea r10,[r10*1+rsi] 3894 cmp rbx,r10 3895 jb NEAR $L$common_pop_regs 3896 3897 mov rax,QWORD[152+r8] 3898 3899 mov r10d,DWORD[8+r11] 3900 lea r10,[r10*1+rsi] 3901 cmp rbx,r10 3902 jae NEAR $L$common_seh_tail 3903 3904 lea r10,[$L$mul_epilogue] 3905 cmp rbx,r10 3906 ja NEAR $L$body_40 3907 3908 mov r10,QWORD[192+r8] 3909 mov rax,QWORD[8+r10*8+rax] 3910 3911 jmp NEAR $L$common_pop_regs 3912 3913$L$body_40: 3914 mov rax,QWORD[40+rax] 3915$L$common_pop_regs: 3916 mov rbx,QWORD[((-8))+rax] 3917 mov rbp,QWORD[((-16))+rax] 3918 mov r12,QWORD[((-24))+rax] 3919 mov r13,QWORD[((-32))+rax] 3920 mov r14,QWORD[((-40))+rax] 3921 mov r15,QWORD[((-48))+rax] 3922 mov QWORD[144+r8],rbx 3923 mov QWORD[160+r8],rbp 3924 mov QWORD[216+r8],r12 3925 mov QWORD[224+r8],r13 3926 mov QWORD[232+r8],r14 3927 mov QWORD[240+r8],r15 3928 3929$L$common_seh_tail: 3930 mov rdi,QWORD[8+rax] 3931 mov rsi,QWORD[16+rax] 3932 mov QWORD[152+r8],rax 3933 mov QWORD[168+r8],rsi 3934 mov QWORD[176+r8],rdi 3935 3936 mov rdi,QWORD[40+r9] 3937 mov rsi,r8 3938 mov ecx,154 3939 DD 0xa548f3fc 3940 3941 mov rsi,r9 3942 xor rcx,rcx 3943 mov rdx,QWORD[8+rsi] 3944 mov r8,QWORD[rsi] 3945 mov r9,QWORD[16+rsi] 3946 mov r10,QWORD[40+rsi] 3947 lea r11,[56+rsi] 3948 lea r12,[24+rsi] 3949 mov QWORD[32+rsp],r10 3950 mov QWORD[40+rsp],r11 3951 mov QWORD[48+rsp],r12 3952 mov QWORD[56+rsp],rcx 3953 call QWORD[__imp_RtlVirtualUnwind] 3954 3955 mov eax,1 3956 add rsp,64 3957 popfq 3958 pop r15 3959 pop r14 3960 pop r13 3961 pop r12 3962 pop rbp 3963 pop rbx 3964 pop rdi 3965 pop rsi 3966 DB 0F3h,0C3h ;repret 3967 3968 3969section .pdata rdata align=4 3970ALIGN 4 3971 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase 3972 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase 3973 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase 3974 3975 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase 3976 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase 3977 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase 3978 3979 DD $L$SEH_begin_bn_power5 wrt ..imagebase 3980 DD $L$SEH_end_bn_power5 wrt ..imagebase 3981 DD $L$SEH_info_bn_power5 wrt ..imagebase 3982 3983 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase 3984 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 3985 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 3986 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase 3987 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase 3988 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase 3989 3990 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase 3991 DD $L$SEH_end_bn_powerx5 wrt ..imagebase 3992 DD $L$SEH_info_bn_powerx5 wrt ..imagebase 3993 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 3994 DD $L$SEH_end_bn_gather5 wrt ..imagebase 3995 DD $L$SEH_info_bn_gather5 wrt ..imagebase 3996 3997section .xdata rdata align=8 3998ALIGN 8 3999$L$SEH_info_bn_mul_mont_gather5: 4000DB 9,0,0,0 4001 DD mul_handler wrt ..imagebase 4002 DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 4003ALIGN 8 4004$L$SEH_info_bn_mul4x_mont_gather5: 4005DB 9,0,0,0 4006 DD mul_handler wrt ..imagebase 4007 DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 4008ALIGN 8 4009$L$SEH_info_bn_power5: 4010DB 9,0,0,0 4011 DD mul_handler wrt ..imagebase 4012 DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase 4013ALIGN 8 4014$L$SEH_info_bn_from_mont8x: 4015DB 9,0,0,0 4016 DD mul_handler wrt ..imagebase 4017 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 4018ALIGN 8 4019$L$SEH_info_bn_mulx4x_mont_gather5: 4020DB 9,0,0,0 4021 DD mul_handler wrt ..imagebase 4022 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 4023ALIGN 8 4024$L$SEH_info_bn_powerx5: 4025DB 9,0,0,0 4026 DD mul_handler wrt ..imagebase 4027 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase 4028ALIGN 8 4029$L$SEH_info_bn_gather5: 4030DB 0x01,0x0b,0x03,0x0a 4031DB 0x0b,0x01,0x21,0x00 4032DB 0x04,0xa3,0x00,0x00 4033ALIGN 8 4034