1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13 14.extern OPENSSL_ia32cap_P 15.hidden OPENSSL_ia32cap_P 16 17.globl bn_mul_mont 18.hidden bn_mul_mont 19.type bn_mul_mont,@function 20.align 16 21bn_mul_mont: 22.cfi_startproc 23 movl %r9d,%r9d 24 movq %rsp,%rax 25.cfi_def_cfa_register %rax 26 testl $3,%r9d 27 jnz .Lmul_enter 28 cmpl $8,%r9d 29 jb .Lmul_enter 30 movl OPENSSL_ia32cap_P+8(%rip),%r11d 31 cmpq %rsi,%rdx 32 jne .Lmul4x_enter 33 testl $7,%r9d 34 jz .Lsqr8x_enter 35 jmp .Lmul4x_enter 36 37.align 16 38.Lmul_enter: 39 pushq %rbx 40.cfi_offset %rbx,-16 41 pushq %rbp 42.cfi_offset %rbp,-24 43 pushq %r12 44.cfi_offset %r12,-32 45 pushq %r13 46.cfi_offset %r13,-40 47 pushq %r14 48.cfi_offset %r14,-48 49 pushq %r15 50.cfi_offset %r15,-56 51 52 negq %r9 53 movq %rsp,%r11 54 leaq -16(%rsp,%r9,8),%r10 55 negq %r9 56 andq $-1024,%r10 57 58 59 60 61 62 63 64 65 66 subq %r10,%r11 67 andq $-4096,%r11 68 leaq (%r10,%r11,1),%rsp 69 movq (%rsp),%r11 70 cmpq %r10,%rsp 71 ja .Lmul_page_walk 72 jmp .Lmul_page_walk_done 73 74.align 16 75.Lmul_page_walk: 76 leaq -4096(%rsp),%rsp 77 movq (%rsp),%r11 78 cmpq %r10,%rsp 79 ja .Lmul_page_walk 80.Lmul_page_walk_done: 81 82 movq %rax,8(%rsp,%r9,8) 83.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 84.Lmul_body: 85 movq %rdx,%r12 86 movq (%r8),%r8 87 movq (%r12),%rbx 88 movq (%rsi),%rax 89 90 xorq %r14,%r14 91 xorq %r15,%r15 92 93 movq %r8,%rbp 94 mulq %rbx 95 movq %rax,%r10 96 movq (%rcx),%rax 97 98 imulq %r10,%rbp 99 movq %rdx,%r11 100 101 mulq %rbp 102 addq %rax,%r10 103 movq 8(%rsi),%rax 104 adcq $0,%rdx 105 movq %rdx,%r13 106 107 leaq 1(%r15),%r15 108 jmp .L1st_enter 109 110.align 16 111.L1st: 112 addq %rax,%r13 113 movq (%rsi,%r15,8),%rax 114 adcq $0,%rdx 115 addq %r11,%r13 116 movq %r10,%r11 117 adcq $0,%rdx 118 movq %r13,-16(%rsp,%r15,8) 119 movq %rdx,%r13 120 121.L1st_enter: 122 mulq %rbx 123 addq %rax,%r11 124 movq (%rcx,%r15,8),%rax 125 adcq $0,%rdx 126 leaq 1(%r15),%r15 127 movq %rdx,%r10 128 129 mulq %rbp 130 cmpq %r9,%r15 131 jne .L1st 132 133 addq %rax,%r13 134 movq (%rsi),%rax 135 adcq $0,%rdx 136 addq %r11,%r13 137 adcq $0,%rdx 138 movq %r13,-16(%rsp,%r15,8) 139 movq %rdx,%r13 140 movq %r10,%r11 141 142 xorq %rdx,%rdx 143 addq %r11,%r13 144 adcq $0,%rdx 145 movq %r13,-8(%rsp,%r9,8) 146 movq %rdx,(%rsp,%r9,8) 147 148 leaq 1(%r14),%r14 149 jmp .Louter 150.align 16 151.Louter: 152 movq (%r12,%r14,8),%rbx 153 xorq %r15,%r15 154 movq %r8,%rbp 155 movq (%rsp),%r10 156 mulq %rbx 157 addq %rax,%r10 158 movq (%rcx),%rax 159 adcq $0,%rdx 160 161 imulq %r10,%rbp 162 movq %rdx,%r11 163 164 mulq %rbp 165 addq %rax,%r10 166 movq 8(%rsi),%rax 167 adcq $0,%rdx 168 movq 8(%rsp),%r10 169 movq %rdx,%r13 170 171 leaq 1(%r15),%r15 172 jmp .Linner_enter 173 174.align 16 175.Linner: 176 addq %rax,%r13 177 movq (%rsi,%r15,8),%rax 178 adcq $0,%rdx 179 addq %r10,%r13 180 movq (%rsp,%r15,8),%r10 181 adcq $0,%rdx 182 movq %r13,-16(%rsp,%r15,8) 183 movq %rdx,%r13 184 185.Linner_enter: 186 mulq %rbx 187 addq %rax,%r11 188 movq (%rcx,%r15,8),%rax 189 adcq $0,%rdx 190 addq %r11,%r10 191 movq %rdx,%r11 192 adcq $0,%r11 193 leaq 1(%r15),%r15 194 195 mulq %rbp 196 cmpq %r9,%r15 197 jne .Linner 198 199 addq %rax,%r13 200 movq (%rsi),%rax 201 adcq $0,%rdx 202 addq %r10,%r13 203 movq (%rsp,%r15,8),%r10 204 adcq $0,%rdx 205 movq %r13,-16(%rsp,%r15,8) 206 movq %rdx,%r13 207 208 xorq %rdx,%rdx 209 addq %r11,%r13 210 adcq $0,%rdx 211 addq %r10,%r13 212 adcq $0,%rdx 213 movq %r13,-8(%rsp,%r9,8) 214 movq %rdx,(%rsp,%r9,8) 215 216 leaq 1(%r14),%r14 217 cmpq %r9,%r14 218 jb .Louter 219 220 xorq %r14,%r14 221 movq (%rsp),%rax 222 movq %r9,%r15 223 224.align 16 225.Lsub: sbbq (%rcx,%r14,8),%rax 226 movq %rax,(%rdi,%r14,8) 227 movq 8(%rsp,%r14,8),%rax 228 leaq 1(%r14),%r14 229 decq %r15 230 jnz .Lsub 231 232 sbbq $0,%rax 233 movq $-1,%rbx 234 xorq %rax,%rbx 235 xorq %r14,%r14 236 movq %r9,%r15 237 238.Lcopy: 239 movq (%rdi,%r14,8),%rcx 240 movq (%rsp,%r14,8),%rdx 241 andq %rbx,%rcx 242 andq %rax,%rdx 243 movq %r9,(%rsp,%r14,8) 244 orq %rcx,%rdx 245 movq %rdx,(%rdi,%r14,8) 246 leaq 1(%r14),%r14 247 subq $1,%r15 248 jnz .Lcopy 249 250 movq 8(%rsp,%r9,8),%rsi 251.cfi_def_cfa %rsi,8 252 movq $1,%rax 253 movq -48(%rsi),%r15 254.cfi_restore %r15 255 movq -40(%rsi),%r14 256.cfi_restore %r14 257 movq -32(%rsi),%r13 258.cfi_restore %r13 259 movq -24(%rsi),%r12 260.cfi_restore %r12 261 movq -16(%rsi),%rbp 262.cfi_restore %rbp 263 movq -8(%rsi),%rbx 264.cfi_restore %rbx 265 leaq (%rsi),%rsp 266.cfi_def_cfa_register %rsp 267.Lmul_epilogue: 268 .byte 0xf3,0xc3 269.cfi_endproc 270.size bn_mul_mont,.-bn_mul_mont 271.type bn_mul4x_mont,@function 272.align 16 273bn_mul4x_mont: 274.cfi_startproc 275 movl %r9d,%r9d 276 movq %rsp,%rax 277.cfi_def_cfa_register %rax 278.Lmul4x_enter: 279 andl $0x80100,%r11d 280 cmpl $0x80100,%r11d 281 je .Lmulx4x_enter 282 pushq %rbx 283.cfi_offset %rbx,-16 284 pushq %rbp 285.cfi_offset %rbp,-24 286 pushq %r12 287.cfi_offset %r12,-32 288 pushq %r13 289.cfi_offset %r13,-40 290 pushq %r14 291.cfi_offset %r14,-48 292 pushq %r15 293.cfi_offset %r15,-56 294 295 negq %r9 296 movq %rsp,%r11 297 leaq -32(%rsp,%r9,8),%r10 298 negq %r9 299 andq $-1024,%r10 300 301 subq %r10,%r11 302 andq $-4096,%r11 303 leaq (%r10,%r11,1),%rsp 304 movq (%rsp),%r11 305 cmpq %r10,%rsp 306 ja .Lmul4x_page_walk 307 jmp .Lmul4x_page_walk_done 308 309.Lmul4x_page_walk: 310 leaq -4096(%rsp),%rsp 311 movq (%rsp),%r11 312 cmpq %r10,%rsp 313 ja .Lmul4x_page_walk 314.Lmul4x_page_walk_done: 315 316 movq %rax,8(%rsp,%r9,8) 317.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 318.Lmul4x_body: 319 movq %rdi,16(%rsp,%r9,8) 320 movq %rdx,%r12 321 movq (%r8),%r8 322 movq (%r12),%rbx 323 movq (%rsi),%rax 324 325 xorq %r14,%r14 326 xorq %r15,%r15 327 328 movq %r8,%rbp 329 mulq %rbx 330 movq %rax,%r10 331 movq (%rcx),%rax 332 333 imulq %r10,%rbp 334 movq %rdx,%r11 335 336 mulq %rbp 337 addq %rax,%r10 338 movq 8(%rsi),%rax 339 adcq $0,%rdx 340 movq %rdx,%rdi 341 342 mulq %rbx 343 addq %rax,%r11 344 movq 8(%rcx),%rax 345 adcq $0,%rdx 346 movq %rdx,%r10 347 348 mulq %rbp 349 addq %rax,%rdi 350 movq 16(%rsi),%rax 351 adcq $0,%rdx 352 addq %r11,%rdi 353 leaq 4(%r15),%r15 354 adcq $0,%rdx 355 movq %rdi,(%rsp) 356 movq %rdx,%r13 357 jmp .L1st4x 358.align 16 359.L1st4x: 360 mulq %rbx 361 addq %rax,%r10 362 movq -16(%rcx,%r15,8),%rax 363 adcq $0,%rdx 364 movq %rdx,%r11 365 366 mulq %rbp 367 addq %rax,%r13 368 movq -8(%rsi,%r15,8),%rax 369 adcq $0,%rdx 370 addq %r10,%r13 371 adcq $0,%rdx 372 movq %r13,-24(%rsp,%r15,8) 373 movq %rdx,%rdi 374 375 mulq %rbx 376 addq %rax,%r11 377 movq -8(%rcx,%r15,8),%rax 378 adcq $0,%rdx 379 movq %rdx,%r10 380 381 mulq %rbp 382 addq %rax,%rdi 383 movq (%rsi,%r15,8),%rax 384 adcq $0,%rdx 385 addq %r11,%rdi 386 adcq $0,%rdx 387 movq %rdi,-16(%rsp,%r15,8) 388 movq %rdx,%r13 389 390 mulq %rbx 391 addq %rax,%r10 392 movq (%rcx,%r15,8),%rax 393 adcq $0,%rdx 394 movq %rdx,%r11 395 396 mulq %rbp 397 addq %rax,%r13 398 movq 8(%rsi,%r15,8),%rax 399 adcq $0,%rdx 400 addq %r10,%r13 401 adcq $0,%rdx 402 movq %r13,-8(%rsp,%r15,8) 403 movq %rdx,%rdi 404 405 mulq %rbx 406 addq %rax,%r11 407 movq 8(%rcx,%r15,8),%rax 408 adcq $0,%rdx 409 leaq 4(%r15),%r15 410 movq %rdx,%r10 411 412 mulq %rbp 413 addq %rax,%rdi 414 movq -16(%rsi,%r15,8),%rax 415 adcq $0,%rdx 416 addq %r11,%rdi 417 adcq $0,%rdx 418 movq %rdi,-32(%rsp,%r15,8) 419 movq %rdx,%r13 420 cmpq %r9,%r15 421 jb .L1st4x 422 423 mulq %rbx 424 addq %rax,%r10 425 movq -16(%rcx,%r15,8),%rax 426 adcq $0,%rdx 427 movq %rdx,%r11 428 429 mulq %rbp 430 addq %rax,%r13 431 movq -8(%rsi,%r15,8),%rax 432 adcq $0,%rdx 433 addq %r10,%r13 434 adcq $0,%rdx 435 movq %r13,-24(%rsp,%r15,8) 436 movq %rdx,%rdi 437 438 mulq %rbx 439 addq %rax,%r11 440 movq -8(%rcx,%r15,8),%rax 441 adcq $0,%rdx 442 movq %rdx,%r10 443 444 mulq %rbp 445 addq %rax,%rdi 446 movq (%rsi),%rax 447 adcq $0,%rdx 448 addq %r11,%rdi 449 adcq $0,%rdx 450 movq %rdi,-16(%rsp,%r15,8) 451 movq %rdx,%r13 452 453 xorq %rdi,%rdi 454 addq %r10,%r13 455 adcq $0,%rdi 456 movq %r13,-8(%rsp,%r15,8) 457 movq %rdi,(%rsp,%r15,8) 458 459 leaq 1(%r14),%r14 460.align 4 461.Louter4x: 462 movq (%r12,%r14,8),%rbx 463 xorq %r15,%r15 464 movq (%rsp),%r10 465 movq %r8,%rbp 466 mulq %rbx 467 addq %rax,%r10 468 movq (%rcx),%rax 469 adcq $0,%rdx 470 471 imulq %r10,%rbp 472 movq %rdx,%r11 473 474 mulq %rbp 475 addq %rax,%r10 476 movq 8(%rsi),%rax 477 adcq $0,%rdx 478 movq %rdx,%rdi 479 480 mulq %rbx 481 addq %rax,%r11 482 movq 8(%rcx),%rax 483 adcq $0,%rdx 484 addq 8(%rsp),%r11 485 adcq $0,%rdx 486 movq %rdx,%r10 487 488 mulq %rbp 489 addq %rax,%rdi 490 movq 16(%rsi),%rax 491 adcq $0,%rdx 492 addq %r11,%rdi 493 leaq 4(%r15),%r15 494 adcq $0,%rdx 495 movq %rdi,(%rsp) 496 movq %rdx,%r13 497 jmp .Linner4x 498.align 16 499.Linner4x: 500 mulq %rbx 501 addq %rax,%r10 502 movq -16(%rcx,%r15,8),%rax 503 adcq $0,%rdx 504 addq -16(%rsp,%r15,8),%r10 505 adcq $0,%rdx 506 movq %rdx,%r11 507 508 mulq %rbp 509 addq %rax,%r13 510 movq -8(%rsi,%r15,8),%rax 511 adcq $0,%rdx 512 addq %r10,%r13 513 adcq $0,%rdx 514 movq %r13,-24(%rsp,%r15,8) 515 movq %rdx,%rdi 516 517 mulq %rbx 518 addq %rax,%r11 519 movq -8(%rcx,%r15,8),%rax 520 adcq $0,%rdx 521 addq -8(%rsp,%r15,8),%r11 522 adcq $0,%rdx 523 movq %rdx,%r10 524 525 mulq %rbp 526 addq %rax,%rdi 527 movq (%rsi,%r15,8),%rax 528 adcq $0,%rdx 529 addq %r11,%rdi 530 adcq $0,%rdx 531 movq %rdi,-16(%rsp,%r15,8) 532 movq %rdx,%r13 533 534 mulq %rbx 535 addq %rax,%r10 536 movq (%rcx,%r15,8),%rax 537 adcq $0,%rdx 538 addq (%rsp,%r15,8),%r10 539 adcq $0,%rdx 540 movq %rdx,%r11 541 542 mulq %rbp 543 addq %rax,%r13 544 movq 8(%rsi,%r15,8),%rax 545 adcq $0,%rdx 546 addq %r10,%r13 547 adcq $0,%rdx 548 movq %r13,-8(%rsp,%r15,8) 549 movq %rdx,%rdi 550 551 mulq %rbx 552 addq %rax,%r11 553 movq 8(%rcx,%r15,8),%rax 554 adcq $0,%rdx 555 addq 8(%rsp,%r15,8),%r11 556 adcq $0,%rdx 557 leaq 4(%r15),%r15 558 movq %rdx,%r10 559 560 mulq %rbp 561 addq %rax,%rdi 562 movq -16(%rsi,%r15,8),%rax 563 adcq $0,%rdx 564 addq %r11,%rdi 565 adcq $0,%rdx 566 movq %rdi,-32(%rsp,%r15,8) 567 movq %rdx,%r13 568 cmpq %r9,%r15 569 jb .Linner4x 570 571 mulq %rbx 572 addq %rax,%r10 573 movq -16(%rcx,%r15,8),%rax 574 adcq $0,%rdx 575 addq -16(%rsp,%r15,8),%r10 576 adcq $0,%rdx 577 movq %rdx,%r11 578 579 mulq %rbp 580 addq %rax,%r13 581 movq -8(%rsi,%r15,8),%rax 582 adcq $0,%rdx 583 addq %r10,%r13 584 adcq $0,%rdx 585 movq %r13,-24(%rsp,%r15,8) 586 movq %rdx,%rdi 587 588 mulq %rbx 589 addq %rax,%r11 590 movq -8(%rcx,%r15,8),%rax 591 adcq $0,%rdx 592 addq -8(%rsp,%r15,8),%r11 593 adcq $0,%rdx 594 leaq 1(%r14),%r14 595 movq %rdx,%r10 596 597 mulq %rbp 598 addq %rax,%rdi 599 movq (%rsi),%rax 600 adcq $0,%rdx 601 addq %r11,%rdi 602 adcq $0,%rdx 603 movq %rdi,-16(%rsp,%r15,8) 604 movq %rdx,%r13 605 606 xorq %rdi,%rdi 607 addq %r10,%r13 608 adcq $0,%rdi 609 addq (%rsp,%r9,8),%r13 610 adcq $0,%rdi 611 movq %r13,-8(%rsp,%r15,8) 612 movq %rdi,(%rsp,%r15,8) 613 614 cmpq %r9,%r14 615 jb .Louter4x 616 movq 16(%rsp,%r9,8),%rdi 617 leaq -4(%r9),%r15 618 movq 0(%rsp),%rax 619 movq 8(%rsp),%rdx 620 shrq $2,%r15 621 leaq (%rsp),%rsi 622 xorq %r14,%r14 623 624 subq 0(%rcx),%rax 625 movq 16(%rsi),%rbx 626 movq 24(%rsi),%rbp 627 sbbq 8(%rcx),%rdx 628 629.Lsub4x: 630 movq %rax,0(%rdi,%r14,8) 631 movq %rdx,8(%rdi,%r14,8) 632 sbbq 16(%rcx,%r14,8),%rbx 633 movq 32(%rsi,%r14,8),%rax 634 movq 40(%rsi,%r14,8),%rdx 635 sbbq 24(%rcx,%r14,8),%rbp 636 movq %rbx,16(%rdi,%r14,8) 637 movq %rbp,24(%rdi,%r14,8) 638 sbbq 32(%rcx,%r14,8),%rax 639 movq 48(%rsi,%r14,8),%rbx 640 movq 56(%rsi,%r14,8),%rbp 641 sbbq 40(%rcx,%r14,8),%rdx 642 leaq 4(%r14),%r14 643 decq %r15 644 jnz .Lsub4x 645 646 movq %rax,0(%rdi,%r14,8) 647 movq 32(%rsi,%r14,8),%rax 648 sbbq 16(%rcx,%r14,8),%rbx 649 movq %rdx,8(%rdi,%r14,8) 650 sbbq 24(%rcx,%r14,8),%rbp 651 movq %rbx,16(%rdi,%r14,8) 652 653 sbbq $0,%rax 654 movq %rbp,24(%rdi,%r14,8) 655 pxor %xmm0,%xmm0 656.byte 102,72,15,110,224 657 pcmpeqd %xmm5,%xmm5 658 pshufd $0,%xmm4,%xmm4 659 movq %r9,%r15 660 pxor %xmm4,%xmm5 661 shrq $2,%r15 662 xorl %eax,%eax 663 664 jmp .Lcopy4x 665.align 16 666.Lcopy4x: 667 movdqa (%rsp,%rax,1),%xmm1 668 movdqu (%rdi,%rax,1),%xmm2 669 pand %xmm4,%xmm1 670 pand %xmm5,%xmm2 671 movdqa 16(%rsp,%rax,1),%xmm3 672 movdqa %xmm0,(%rsp,%rax,1) 673 por %xmm2,%xmm1 674 movdqu 16(%rdi,%rax,1),%xmm2 675 movdqu %xmm1,(%rdi,%rax,1) 676 pand %xmm4,%xmm3 677 pand %xmm5,%xmm2 678 movdqa %xmm0,16(%rsp,%rax,1) 679 por %xmm2,%xmm3 680 movdqu %xmm3,16(%rdi,%rax,1) 681 leaq 32(%rax),%rax 682 decq %r15 683 jnz .Lcopy4x 684 movq 8(%rsp,%r9,8),%rsi 685.cfi_def_cfa %rsi, 8 686 movq $1,%rax 687 movq -48(%rsi),%r15 688.cfi_restore %r15 689 movq -40(%rsi),%r14 690.cfi_restore %r14 691 movq -32(%rsi),%r13 692.cfi_restore %r13 693 movq -24(%rsi),%r12 694.cfi_restore %r12 695 movq -16(%rsi),%rbp 696.cfi_restore %rbp 697 movq -8(%rsi),%rbx 698.cfi_restore %rbx 699 leaq (%rsi),%rsp 700.cfi_def_cfa_register %rsp 701.Lmul4x_epilogue: 702 .byte 0xf3,0xc3 703.cfi_endproc 704.size bn_mul4x_mont,.-bn_mul4x_mont 705.extern bn_sqrx8x_internal 706.hidden bn_sqrx8x_internal 707.extern bn_sqr8x_internal 708.hidden bn_sqr8x_internal 709 710.type bn_sqr8x_mont,@function 711.align 32 712bn_sqr8x_mont: 713.cfi_startproc 714 movq %rsp,%rax 715.cfi_def_cfa_register %rax 716.Lsqr8x_enter: 717 pushq %rbx 718.cfi_offset %rbx,-16 719 pushq %rbp 720.cfi_offset %rbp,-24 721 pushq %r12 722.cfi_offset %r12,-32 723 pushq %r13 724.cfi_offset %r13,-40 725 pushq %r14 726.cfi_offset %r14,-48 727 pushq %r15 728.cfi_offset %r15,-56 729.Lsqr8x_prologue: 730 731 movl %r9d,%r10d 732 shll $3,%r9d 733 shlq $3+2,%r10 734 negq %r9 735 736 737 738 739 740 741 leaq -64(%rsp,%r9,2),%r11 742 movq %rsp,%rbp 743 movq (%r8),%r8 744 subq %rsi,%r11 745 andq $4095,%r11 746 cmpq %r11,%r10 747 jb .Lsqr8x_sp_alt 748 subq %r11,%rbp 749 leaq -64(%rbp,%r9,2),%rbp 750 jmp .Lsqr8x_sp_done 751 752.align 32 753.Lsqr8x_sp_alt: 754 leaq 4096-64(,%r9,2),%r10 755 leaq -64(%rbp,%r9,2),%rbp 756 subq %r10,%r11 757 movq $0,%r10 758 cmovcq %r10,%r11 759 subq %r11,%rbp 760.Lsqr8x_sp_done: 761 andq $-64,%rbp 762 movq %rsp,%r11 763 subq %rbp,%r11 764 andq $-4096,%r11 765 leaq (%r11,%rbp,1),%rsp 766 movq (%rsp),%r10 767 cmpq %rbp,%rsp 768 ja .Lsqr8x_page_walk 769 jmp .Lsqr8x_page_walk_done 770 771.align 16 772.Lsqr8x_page_walk: 773 leaq -4096(%rsp),%rsp 774 movq (%rsp),%r10 775 cmpq %rbp,%rsp 776 ja .Lsqr8x_page_walk 777.Lsqr8x_page_walk_done: 778 779 movq %r9,%r10 780 negq %r9 781 782 movq %r8,32(%rsp) 783 movq %rax,40(%rsp) 784.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 785.Lsqr8x_body: 786 787.byte 102,72,15,110,209 788 pxor %xmm0,%xmm0 789.byte 102,72,15,110,207 790.byte 102,73,15,110,218 791 movl OPENSSL_ia32cap_P+8(%rip),%eax 792 andl $0x80100,%eax 793 cmpl $0x80100,%eax 794 jne .Lsqr8x_nox 795 796 call bn_sqrx8x_internal 797 798 799 800 801 leaq (%r8,%rcx,1),%rbx 802 movq %rcx,%r9 803 movq %rcx,%rdx 804.byte 102,72,15,126,207 805 sarq $3+2,%rcx 806 jmp .Lsqr8x_sub 807 808.align 32 809.Lsqr8x_nox: 810 call bn_sqr8x_internal 811 812 813 814 815 leaq (%rdi,%r9,1),%rbx 816 movq %r9,%rcx 817 movq %r9,%rdx 818.byte 102,72,15,126,207 819 sarq $3+2,%rcx 820 jmp .Lsqr8x_sub 821 822.align 32 823.Lsqr8x_sub: 824 movq 0(%rbx),%r12 825 movq 8(%rbx),%r13 826 movq 16(%rbx),%r14 827 movq 24(%rbx),%r15 828 leaq 32(%rbx),%rbx 829 sbbq 0(%rbp),%r12 830 sbbq 8(%rbp),%r13 831 sbbq 16(%rbp),%r14 832 sbbq 24(%rbp),%r15 833 leaq 32(%rbp),%rbp 834 movq %r12,0(%rdi) 835 movq %r13,8(%rdi) 836 movq %r14,16(%rdi) 837 movq %r15,24(%rdi) 838 leaq 32(%rdi),%rdi 839 incq %rcx 840 jnz .Lsqr8x_sub 841 842 sbbq $0,%rax 843 leaq (%rbx,%r9,1),%rbx 844 leaq (%rdi,%r9,1),%rdi 845 846.byte 102,72,15,110,200 847 pxor %xmm0,%xmm0 848 pshufd $0,%xmm1,%xmm1 849 movq 40(%rsp),%rsi 850.cfi_def_cfa %rsi,8 851 jmp .Lsqr8x_cond_copy 852 853.align 32 854.Lsqr8x_cond_copy: 855 movdqa 0(%rbx),%xmm2 856 movdqa 16(%rbx),%xmm3 857 leaq 32(%rbx),%rbx 858 movdqu 0(%rdi),%xmm4 859 movdqu 16(%rdi),%xmm5 860 leaq 32(%rdi),%rdi 861 movdqa %xmm0,-32(%rbx) 862 movdqa %xmm0,-16(%rbx) 863 movdqa %xmm0,-32(%rbx,%rdx,1) 864 movdqa %xmm0,-16(%rbx,%rdx,1) 865 pcmpeqd %xmm1,%xmm0 866 pand %xmm1,%xmm2 867 pand %xmm1,%xmm3 868 pand %xmm0,%xmm4 869 pand %xmm0,%xmm5 870 pxor %xmm0,%xmm0 871 por %xmm2,%xmm4 872 por %xmm3,%xmm5 873 movdqu %xmm4,-32(%rdi) 874 movdqu %xmm5,-16(%rdi) 875 addq $32,%r9 876 jnz .Lsqr8x_cond_copy 877 878 movq $1,%rax 879 movq -48(%rsi),%r15 880.cfi_restore %r15 881 movq -40(%rsi),%r14 882.cfi_restore %r14 883 movq -32(%rsi),%r13 884.cfi_restore %r13 885 movq -24(%rsi),%r12 886.cfi_restore %r12 887 movq -16(%rsi),%rbp 888.cfi_restore %rbp 889 movq -8(%rsi),%rbx 890.cfi_restore %rbx 891 leaq (%rsi),%rsp 892.cfi_def_cfa_register %rsp 893.Lsqr8x_epilogue: 894 .byte 0xf3,0xc3 895.cfi_endproc 896.size bn_sqr8x_mont,.-bn_sqr8x_mont 897.type bn_mulx4x_mont,@function 898.align 32 899bn_mulx4x_mont: 900.cfi_startproc 901 movq %rsp,%rax 902.cfi_def_cfa_register %rax 903.Lmulx4x_enter: 904 pushq %rbx 905.cfi_offset %rbx,-16 906 pushq %rbp 907.cfi_offset %rbp,-24 908 pushq %r12 909.cfi_offset %r12,-32 910 pushq %r13 911.cfi_offset %r13,-40 912 pushq %r14 913.cfi_offset %r14,-48 914 pushq %r15 915.cfi_offset %r15,-56 916.Lmulx4x_prologue: 917 918 shll $3,%r9d 919 xorq %r10,%r10 920 subq %r9,%r10 921 movq (%r8),%r8 922 leaq -72(%rsp,%r10,1),%rbp 923 andq $-128,%rbp 924 movq %rsp,%r11 925 subq %rbp,%r11 926 andq $-4096,%r11 927 leaq (%r11,%rbp,1),%rsp 928 movq (%rsp),%r10 929 cmpq %rbp,%rsp 930 ja .Lmulx4x_page_walk 931 jmp .Lmulx4x_page_walk_done 932 933.align 16 934.Lmulx4x_page_walk: 935 leaq -4096(%rsp),%rsp 936 movq (%rsp),%r10 937 cmpq %rbp,%rsp 938 ja .Lmulx4x_page_walk 939.Lmulx4x_page_walk_done: 940 941 leaq (%rdx,%r9,1),%r10 942 943 944 945 946 947 948 949 950 951 952 953 954 movq %r9,0(%rsp) 955 shrq $5,%r9 956 movq %r10,16(%rsp) 957 subq $1,%r9 958 movq %r8,24(%rsp) 959 movq %rdi,32(%rsp) 960 movq %rax,40(%rsp) 961.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 962 movq %r9,48(%rsp) 963 jmp .Lmulx4x_body 964 965.align 32 966.Lmulx4x_body: 967 leaq 8(%rdx),%rdi 968 movq (%rdx),%rdx 969 leaq 64+32(%rsp),%rbx 970 movq %rdx,%r9 971 972 mulxq 0(%rsi),%r8,%rax 973 mulxq 8(%rsi),%r11,%r14 974 addq %rax,%r11 975 movq %rdi,8(%rsp) 976 mulxq 16(%rsi),%r12,%r13 977 adcq %r14,%r12 978 adcq $0,%r13 979 980 movq %r8,%rdi 981 imulq 24(%rsp),%r8 982 xorq %rbp,%rbp 983 984 mulxq 24(%rsi),%rax,%r14 985 movq %r8,%rdx 986 leaq 32(%rsi),%rsi 987 adcxq %rax,%r13 988 adcxq %rbp,%r14 989 990 mulxq 0(%rcx),%rax,%r10 991 adcxq %rax,%rdi 992 adoxq %r11,%r10 993 mulxq 8(%rcx),%rax,%r11 994 adcxq %rax,%r10 995 adoxq %r12,%r11 996.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 997 movq 48(%rsp),%rdi 998 movq %r10,-32(%rbx) 999 adcxq %rax,%r11 1000 adoxq %r13,%r12 1001 mulxq 24(%rcx),%rax,%r15 1002 movq %r9,%rdx 1003 movq %r11,-24(%rbx) 1004 adcxq %rax,%r12 1005 adoxq %rbp,%r15 1006 leaq 32(%rcx),%rcx 1007 movq %r12,-16(%rbx) 1008 1009 jmp .Lmulx4x_1st 1010 1011.align 32 1012.Lmulx4x_1st: 1013 adcxq %rbp,%r15 1014 mulxq 0(%rsi),%r10,%rax 1015 adcxq %r14,%r10 1016 mulxq 8(%rsi),%r11,%r14 1017 adcxq %rax,%r11 1018 mulxq 16(%rsi),%r12,%rax 1019 adcxq %r14,%r12 1020 mulxq 24(%rsi),%r13,%r14 1021.byte 0x67,0x67 1022 movq %r8,%rdx 1023 adcxq %rax,%r13 1024 adcxq %rbp,%r14 1025 leaq 32(%rsi),%rsi 1026 leaq 32(%rbx),%rbx 1027 1028 adoxq %r15,%r10 1029 mulxq 0(%rcx),%rax,%r15 1030 adcxq %rax,%r10 1031 adoxq %r15,%r11 1032 mulxq 8(%rcx),%rax,%r15 1033 adcxq %rax,%r11 1034 adoxq %r15,%r12 1035 mulxq 16(%rcx),%rax,%r15 1036 movq %r10,-40(%rbx) 1037 adcxq %rax,%r12 1038 movq %r11,-32(%rbx) 1039 adoxq %r15,%r13 1040 mulxq 24(%rcx),%rax,%r15 1041 movq %r9,%rdx 1042 movq %r12,-24(%rbx) 1043 adcxq %rax,%r13 1044 adoxq %rbp,%r15 1045 leaq 32(%rcx),%rcx 1046 movq %r13,-16(%rbx) 1047 1048 decq %rdi 1049 jnz .Lmulx4x_1st 1050 1051 movq 0(%rsp),%rax 1052 movq 8(%rsp),%rdi 1053 adcq %rbp,%r15 1054 addq %r15,%r14 1055 sbbq %r15,%r15 1056 movq %r14,-8(%rbx) 1057 jmp .Lmulx4x_outer 1058 1059.align 32 1060.Lmulx4x_outer: 1061 movq (%rdi),%rdx 1062 leaq 8(%rdi),%rdi 1063 subq %rax,%rsi 1064 movq %r15,(%rbx) 1065 leaq 64+32(%rsp),%rbx 1066 subq %rax,%rcx 1067 1068 mulxq 0(%rsi),%r8,%r11 1069 xorl %ebp,%ebp 1070 movq %rdx,%r9 1071 mulxq 8(%rsi),%r14,%r12 1072 adoxq -32(%rbx),%r8 1073 adcxq %r14,%r11 1074 mulxq 16(%rsi),%r15,%r13 1075 adoxq -24(%rbx),%r11 1076 adcxq %r15,%r12 1077 adoxq -16(%rbx),%r12 1078 adcxq %rbp,%r13 1079 adoxq %rbp,%r13 1080 1081 movq %rdi,8(%rsp) 1082 movq %r8,%r15 1083 imulq 24(%rsp),%r8 1084 xorl %ebp,%ebp 1085 1086 mulxq 24(%rsi),%rax,%r14 1087 movq %r8,%rdx 1088 adcxq %rax,%r13 1089 adoxq -8(%rbx),%r13 1090 adcxq %rbp,%r14 1091 leaq 32(%rsi),%rsi 1092 adoxq %rbp,%r14 1093 1094 mulxq 0(%rcx),%rax,%r10 1095 adcxq %rax,%r15 1096 adoxq %r11,%r10 1097 mulxq 8(%rcx),%rax,%r11 1098 adcxq %rax,%r10 1099 adoxq %r12,%r11 1100 mulxq 16(%rcx),%rax,%r12 1101 movq %r10,-32(%rbx) 1102 adcxq %rax,%r11 1103 adoxq %r13,%r12 1104 mulxq 24(%rcx),%rax,%r15 1105 movq %r9,%rdx 1106 movq %r11,-24(%rbx) 1107 leaq 32(%rcx),%rcx 1108 adcxq %rax,%r12 1109 adoxq %rbp,%r15 1110 movq 48(%rsp),%rdi 1111 movq %r12,-16(%rbx) 1112 1113 jmp .Lmulx4x_inner 1114 1115.align 32 1116.Lmulx4x_inner: 1117 mulxq 0(%rsi),%r10,%rax 1118 adcxq %rbp,%r15 1119 adoxq %r14,%r10 1120 mulxq 8(%rsi),%r11,%r14 1121 adcxq 0(%rbx),%r10 1122 adoxq %rax,%r11 1123 mulxq 16(%rsi),%r12,%rax 1124 adcxq 8(%rbx),%r11 1125 adoxq %r14,%r12 1126 mulxq 24(%rsi),%r13,%r14 1127 movq %r8,%rdx 1128 adcxq 16(%rbx),%r12 1129 adoxq %rax,%r13 1130 adcxq 24(%rbx),%r13 1131 adoxq %rbp,%r14 1132 leaq 32(%rsi),%rsi 1133 leaq 32(%rbx),%rbx 1134 adcxq %rbp,%r14 1135 1136 adoxq %r15,%r10 1137 mulxq 0(%rcx),%rax,%r15 1138 adcxq %rax,%r10 1139 adoxq %r15,%r11 1140 mulxq 8(%rcx),%rax,%r15 1141 adcxq %rax,%r11 1142 adoxq %r15,%r12 1143 mulxq 16(%rcx),%rax,%r15 1144 movq %r10,-40(%rbx) 1145 adcxq %rax,%r12 1146 adoxq %r15,%r13 1147 mulxq 24(%rcx),%rax,%r15 1148 movq %r9,%rdx 1149 movq %r11,-32(%rbx) 1150 movq %r12,-24(%rbx) 1151 adcxq %rax,%r13 1152 adoxq %rbp,%r15 1153 leaq 32(%rcx),%rcx 1154 movq %r13,-16(%rbx) 1155 1156 decq %rdi 1157 jnz .Lmulx4x_inner 1158 1159 movq 0(%rsp),%rax 1160 movq 8(%rsp),%rdi 1161 adcq %rbp,%r15 1162 subq 0(%rbx),%rbp 1163 adcq %r15,%r14 1164 sbbq %r15,%r15 1165 movq %r14,-8(%rbx) 1166 1167 cmpq 16(%rsp),%rdi 1168 jne .Lmulx4x_outer 1169 1170 leaq 64(%rsp),%rbx 1171 subq %rax,%rcx 1172 negq %r15 1173 movq %rax,%rdx 1174 shrq $3+2,%rax 1175 movq 32(%rsp),%rdi 1176 jmp .Lmulx4x_sub 1177 1178.align 32 1179.Lmulx4x_sub: 1180 movq 0(%rbx),%r11 1181 movq 8(%rbx),%r12 1182 movq 16(%rbx),%r13 1183 movq 24(%rbx),%r14 1184 leaq 32(%rbx),%rbx 1185 sbbq 0(%rcx),%r11 1186 sbbq 8(%rcx),%r12 1187 sbbq 16(%rcx),%r13 1188 sbbq 24(%rcx),%r14 1189 leaq 32(%rcx),%rcx 1190 movq %r11,0(%rdi) 1191 movq %r12,8(%rdi) 1192 movq %r13,16(%rdi) 1193 movq %r14,24(%rdi) 1194 leaq 32(%rdi),%rdi 1195 decq %rax 1196 jnz .Lmulx4x_sub 1197 1198 sbbq $0,%r15 1199 leaq 64(%rsp),%rbx 1200 subq %rdx,%rdi 1201 1202.byte 102,73,15,110,207 1203 pxor %xmm0,%xmm0 1204 pshufd $0,%xmm1,%xmm1 1205 movq 40(%rsp),%rsi 1206.cfi_def_cfa %rsi,8 1207 jmp .Lmulx4x_cond_copy 1208 1209.align 32 1210.Lmulx4x_cond_copy: 1211 movdqa 0(%rbx),%xmm2 1212 movdqa 16(%rbx),%xmm3 1213 leaq 32(%rbx),%rbx 1214 movdqu 0(%rdi),%xmm4 1215 movdqu 16(%rdi),%xmm5 1216 leaq 32(%rdi),%rdi 1217 movdqa %xmm0,-32(%rbx) 1218 movdqa %xmm0,-16(%rbx) 1219 pcmpeqd %xmm1,%xmm0 1220 pand %xmm1,%xmm2 1221 pand %xmm1,%xmm3 1222 pand %xmm0,%xmm4 1223 pand %xmm0,%xmm5 1224 pxor %xmm0,%xmm0 1225 por %xmm2,%xmm4 1226 por %xmm3,%xmm5 1227 movdqu %xmm4,-32(%rdi) 1228 movdqu %xmm5,-16(%rdi) 1229 subq $32,%rdx 1230 jnz .Lmulx4x_cond_copy 1231 1232 movq %rdx,(%rbx) 1233 1234 movq $1,%rax 1235 movq -48(%rsi),%r15 1236.cfi_restore %r15 1237 movq -40(%rsi),%r14 1238.cfi_restore %r14 1239 movq -32(%rsi),%r13 1240.cfi_restore %r13 1241 movq -24(%rsi),%r12 1242.cfi_restore %r12 1243 movq -16(%rsi),%rbp 1244.cfi_restore %rbp 1245 movq -8(%rsi),%rbx 1246.cfi_restore %rbx 1247 leaq (%rsi),%rsp 1248.cfi_def_cfa_register %rsp 1249.Lmulx4x_epilogue: 1250 .byte 0xf3,0xc3 1251.cfi_endproc 1252.size bn_mulx4x_mont,.-bn_mulx4x_mont 1253.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1254.align 16 1255#endif 1256.section .note.GNU-stack,"",@progbits 1257