1.text 2 3.globl bn_mul_mont_gather5 4.type bn_mul_mont_gather5,@function 5.align 64 6bn_mul_mont_gather5: 7 testl $3,%r9d 8 jnz .Lmul_enter 9 cmpl $8,%r9d 10 jb .Lmul_enter 11 jmp .Lmul4x_enter 12 13.align 16 14.Lmul_enter: 15 movl %r9d,%r9d 16 movl 8(%rsp),%r10d 17 pushq %rbx 18 pushq %rbp 19 pushq %r12 20 pushq %r13 21 pushq %r14 22 pushq %r15 23 movq %rsp,%rax 24 leaq 2(%r9),%r11 25 negq %r11 26 leaq (%rsp,%r11,8),%rsp 27 andq $-1024,%rsp 28 29 movq %rax,8(%rsp,%r9,8) 30.Lmul_body: 31 movq %rdx,%r12 32 movq %r10,%r11 33 shrq $3,%r10 34 andq $7,%r11 35 notq %r10 36 leaq .Lmagic_masks(%rip),%rax 37 andq $3,%r10 38 leaq 96(%r12,%r11,8),%r12 39 movq 0(%rax,%r10,8),%xmm4 40 movq 8(%rax,%r10,8),%xmm5 41 movq 16(%rax,%r10,8),%xmm6 42 movq 24(%rax,%r10,8),%xmm7 43 44 movq -96(%r12),%xmm0 45 movq -32(%r12),%xmm1 46 pand %xmm4,%xmm0 47 movq 32(%r12),%xmm2 48 pand %xmm5,%xmm1 49 movq 96(%r12),%xmm3 50 pand %xmm6,%xmm2 51 por %xmm1,%xmm0 52 pand %xmm7,%xmm3 53 por %xmm2,%xmm0 54 leaq 256(%r12),%r12 55 por %xmm3,%xmm0 56 57.byte 102,72,15,126,195 58 59 movq (%r8),%r8 60 movq (%rsi),%rax 61 62 xorq %r14,%r14 63 xorq %r15,%r15 64 65 movq -96(%r12),%xmm0 66 movq -32(%r12),%xmm1 67 pand %xmm4,%xmm0 68 movq 32(%r12),%xmm2 69 pand %xmm5,%xmm1 70 71 movq %r8,%rbp 72 mulq %rbx 73 movq %rax,%r10 74 movq (%rcx),%rax 75 76 movq 96(%r12),%xmm3 77 pand %xmm6,%xmm2 78 por %xmm1,%xmm0 79 pand %xmm7,%xmm3 80 81 imulq %r10,%rbp 82 movq %rdx,%r11 83 84 por %xmm2,%xmm0 85 leaq 256(%r12),%r12 86 por %xmm3,%xmm0 87 88 mulq %rbp 89 addq %rax,%r10 90 movq 8(%rsi),%rax 91 adcq $0,%rdx 92 movq %rdx,%r13 93 94 leaq 1(%r15),%r15 95 jmp .L1st_enter 96 97.align 16 98.L1st: 99 addq %rax,%r13 100 movq (%rsi,%r15,8),%rax 101 adcq $0,%rdx 102 addq %r11,%r13 103 movq %r10,%r11 104 adcq $0,%rdx 105 movq %r13,-16(%rsp,%r15,8) 106 movq %rdx,%r13 107 108.L1st_enter: 109 mulq %rbx 110 addq %rax,%r11 111 movq (%rcx,%r15,8),%rax 112 adcq $0,%rdx 113 leaq 1(%r15),%r15 114 movq %rdx,%r10 115 116 mulq %rbp 117 cmpq %r9,%r15 118 jne .L1st 119 120.byte 102,72,15,126,195 121 122 addq %rax,%r13 123 movq (%rsi),%rax 124 adcq $0,%rdx 125 addq %r11,%r13 126 adcq $0,%rdx 127 movq %r13,-16(%rsp,%r15,8) 128 movq %rdx,%r13 129 movq %r10,%r11 130 131 xorq %rdx,%rdx 132 addq %r11,%r13 133 adcq $0,%rdx 134 movq %r13,-8(%rsp,%r9,8) 135 movq %rdx,(%rsp,%r9,8) 136 137 leaq 1(%r14),%r14 138 jmp .Louter 139.align 16 140.Louter: 141 xorq %r15,%r15 142 movq %r8,%rbp 143 movq (%rsp),%r10 144 145 movq -96(%r12),%xmm0 146 movq -32(%r12),%xmm1 147 pand %xmm4,%xmm0 148 movq 32(%r12),%xmm2 149 pand %xmm5,%xmm1 150 151 mulq %rbx 152 addq %rax,%r10 153 movq (%rcx),%rax 154 adcq $0,%rdx 155 156 movq 96(%r12),%xmm3 157 pand %xmm6,%xmm2 158 por %xmm1,%xmm0 159 pand %xmm7,%xmm3 160 161 imulq %r10,%rbp 162 movq %rdx,%r11 163 164 por %xmm2,%xmm0 165 leaq 256(%r12),%r12 166 por %xmm3,%xmm0 167 168 mulq %rbp 169 addq %rax,%r10 170 movq 8(%rsi),%rax 171 adcq $0,%rdx 172 movq 8(%rsp),%r10 173 movq %rdx,%r13 174 175 leaq 1(%r15),%r15 176 jmp .Linner_enter 177 178.align 16 179.Linner: 180 addq %rax,%r13 181 movq (%rsi,%r15,8),%rax 182 adcq $0,%rdx 183 addq %r10,%r13 184 movq (%rsp,%r15,8),%r10 185 adcq $0,%rdx 186 movq %r13,-16(%rsp,%r15,8) 187 movq %rdx,%r13 188 189.Linner_enter: 190 mulq %rbx 191 addq %rax,%r11 192 movq (%rcx,%r15,8),%rax 193 adcq $0,%rdx 194 addq %r11,%r10 195 movq %rdx,%r11 196 adcq $0,%r11 197 leaq 1(%r15),%r15 198 199 mulq %rbp 200 cmpq %r9,%r15 201 jne .Linner 202 203.byte 102,72,15,126,195 204 205 addq %rax,%r13 206 movq (%rsi),%rax 207 adcq $0,%rdx 208 addq %r10,%r13 209 movq (%rsp,%r15,8),%r10 210 adcq $0,%rdx 211 movq %r13,-16(%rsp,%r15,8) 212 movq %rdx,%r13 213 214 xorq %rdx,%rdx 215 addq %r11,%r13 216 adcq $0,%rdx 217 addq %r10,%r13 218 adcq $0,%rdx 219 movq %r13,-8(%rsp,%r9,8) 220 movq %rdx,(%rsp,%r9,8) 221 222 leaq 1(%r14),%r14 223 cmpq %r9,%r14 224 jl .Louter 225 226 xorq %r14,%r14 227 movq (%rsp),%rax 228 leaq (%rsp),%rsi 229 movq %r9,%r15 230 jmp .Lsub 231.align 16 232.Lsub: sbbq (%rcx,%r14,8),%rax 233 movq %rax,(%rdi,%r14,8) 234 movq 8(%rsi,%r14,8),%rax 235 leaq 1(%r14),%r14 236 decq %r15 237 jnz .Lsub 238 239 sbbq $0,%rax 240 xorq %r14,%r14 241 andq %rax,%rsi 242 notq %rax 243 movq %rdi,%rcx 244 andq %rax,%rcx 245 movq %r9,%r15 246 orq %rcx,%rsi 247.align 16 248.Lcopy: 249 movq (%rsi,%r14,8),%rax 250 movq %r14,(%rsp,%r14,8) 251 movq %rax,(%rdi,%r14,8) 252 leaq 1(%r14),%r14 253 subq $1,%r15 254 jnz .Lcopy 255 256 movq 8(%rsp,%r9,8),%rsi 257 movq $1,%rax 258 movq (%rsi),%r15 259 movq 8(%rsi),%r14 260 movq 16(%rsi),%r13 261 movq 24(%rsi),%r12 262 movq 32(%rsi),%rbp 263 movq 40(%rsi),%rbx 264 leaq 48(%rsi),%rsp 265.Lmul_epilogue: 266 .byte 0xf3,0xc3 267.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 268.type bn_mul4x_mont_gather5,@function 269.align 16 270bn_mul4x_mont_gather5: 271.Lmul4x_enter: 272 movl %r9d,%r9d 273 movl 8(%rsp),%r10d 274 pushq %rbx 275 pushq %rbp 276 pushq %r12 277 pushq %r13 278 pushq %r14 279 pushq %r15 280 movq %rsp,%rax 281 leaq 4(%r9),%r11 282 negq %r11 283 leaq (%rsp,%r11,8),%rsp 284 andq $-1024,%rsp 285 286 movq %rax,8(%rsp,%r9,8) 287.Lmul4x_body: 288 movq %rdi,16(%rsp,%r9,8) 289 movq %rdx,%r12 290 movq %r10,%r11 291 shrq $3,%r10 292 andq $7,%r11 293 notq %r10 294 leaq .Lmagic_masks(%rip),%rax 295 andq $3,%r10 296 leaq 96(%r12,%r11,8),%r12 297 movq 0(%rax,%r10,8),%xmm4 298 movq 8(%rax,%r10,8),%xmm5 299 movq 16(%rax,%r10,8),%xmm6 300 movq 24(%rax,%r10,8),%xmm7 301 302 movq -96(%r12),%xmm0 303 movq -32(%r12),%xmm1 304 pand %xmm4,%xmm0 305 movq 32(%r12),%xmm2 306 pand %xmm5,%xmm1 307 movq 96(%r12),%xmm3 308 pand %xmm6,%xmm2 309 por %xmm1,%xmm0 310 pand %xmm7,%xmm3 311 por %xmm2,%xmm0 312 leaq 256(%r12),%r12 313 por %xmm3,%xmm0 314 315.byte 102,72,15,126,195 316 movq (%r8),%r8 317 movq (%rsi),%rax 318 319 xorq %r14,%r14 320 xorq %r15,%r15 321 322 movq -96(%r12),%xmm0 323 movq -32(%r12),%xmm1 324 pand %xmm4,%xmm0 325 movq 32(%r12),%xmm2 326 pand %xmm5,%xmm1 327 328 movq %r8,%rbp 329 mulq %rbx 330 movq %rax,%r10 331 movq (%rcx),%rax 332 333 movq 96(%r12),%xmm3 334 pand %xmm6,%xmm2 335 por %xmm1,%xmm0 336 pand %xmm7,%xmm3 337 338 imulq %r10,%rbp 339 movq %rdx,%r11 340 341 por %xmm2,%xmm0 342 leaq 256(%r12),%r12 343 por %xmm3,%xmm0 344 345 mulq %rbp 346 addq %rax,%r10 347 movq 8(%rsi),%rax 348 adcq $0,%rdx 349 movq %rdx,%rdi 350 351 mulq %rbx 352 addq %rax,%r11 353 movq 8(%rcx),%rax 354 adcq $0,%rdx 355 movq %rdx,%r10 356 357 mulq %rbp 358 addq %rax,%rdi 359 movq 16(%rsi),%rax 360 adcq $0,%rdx 361 addq %r11,%rdi 362 leaq 4(%r15),%r15 363 adcq $0,%rdx 364 movq %rdi,(%rsp) 365 movq %rdx,%r13 366 jmp .L1st4x 367.align 16 368.L1st4x: 369 mulq %rbx 370 addq %rax,%r10 371 movq -16(%rcx,%r15,8),%rax 372 adcq $0,%rdx 373 movq %rdx,%r11 374 375 mulq %rbp 376 addq %rax,%r13 377 movq -8(%rsi,%r15,8),%rax 378 adcq $0,%rdx 379 addq %r10,%r13 380 adcq $0,%rdx 381 movq %r13,-24(%rsp,%r15,8) 382 movq %rdx,%rdi 383 384 mulq %rbx 385 addq %rax,%r11 386 movq -8(%rcx,%r15,8),%rax 387 adcq $0,%rdx 388 movq %rdx,%r10 389 390 mulq %rbp 391 addq %rax,%rdi 392 movq (%rsi,%r15,8),%rax 393 adcq $0,%rdx 394 addq %r11,%rdi 395 adcq $0,%rdx 396 movq %rdi,-16(%rsp,%r15,8) 397 movq %rdx,%r13 398 399 mulq %rbx 400 addq %rax,%r10 401 movq (%rcx,%r15,8),%rax 402 adcq $0,%rdx 403 movq %rdx,%r11 404 405 mulq %rbp 406 addq %rax,%r13 407 movq 8(%rsi,%r15,8),%rax 408 adcq $0,%rdx 409 addq %r10,%r13 410 adcq $0,%rdx 411 movq %r13,-8(%rsp,%r15,8) 412 movq %rdx,%rdi 413 414 mulq %rbx 415 addq %rax,%r11 416 movq 8(%rcx,%r15,8),%rax 417 adcq $0,%rdx 418 leaq 4(%r15),%r15 419 movq %rdx,%r10 420 421 mulq %rbp 422 addq %rax,%rdi 423 movq -16(%rsi,%r15,8),%rax 424 adcq $0,%rdx 425 addq %r11,%rdi 426 adcq $0,%rdx 427 movq %rdi,-32(%rsp,%r15,8) 428 movq %rdx,%r13 429 cmpq %r9,%r15 430 jl .L1st4x 431 432 mulq %rbx 433 addq %rax,%r10 434 movq -16(%rcx,%r15,8),%rax 435 adcq $0,%rdx 436 movq %rdx,%r11 437 438 mulq %rbp 439 addq %rax,%r13 440 movq -8(%rsi,%r15,8),%rax 441 adcq $0,%rdx 442 addq %r10,%r13 443 adcq $0,%rdx 444 movq %r13,-24(%rsp,%r15,8) 445 movq %rdx,%rdi 446 447 mulq %rbx 448 addq %rax,%r11 449 movq -8(%rcx,%r15,8),%rax 450 adcq $0,%rdx 451 movq %rdx,%r10 452 453 mulq %rbp 454 addq %rax,%rdi 455 movq (%rsi),%rax 456 adcq $0,%rdx 457 addq %r11,%rdi 458 adcq $0,%rdx 459 movq %rdi,-16(%rsp,%r15,8) 460 movq %rdx,%r13 461 462.byte 102,72,15,126,195 463 464 xorq %rdi,%rdi 465 addq %r10,%r13 466 adcq $0,%rdi 467 movq %r13,-8(%rsp,%r15,8) 468 movq %rdi,(%rsp,%r15,8) 469 470 leaq 1(%r14),%r14 471.align 4 472.Louter4x: 473 xorq %r15,%r15 474 movq -96(%r12),%xmm0 475 movq -32(%r12),%xmm1 476 pand %xmm4,%xmm0 477 movq 32(%r12),%xmm2 478 pand %xmm5,%xmm1 479 480 movq (%rsp),%r10 481 movq %r8,%rbp 482 mulq %rbx 483 addq %rax,%r10 484 movq (%rcx),%rax 485 adcq $0,%rdx 486 487 movq 96(%r12),%xmm3 488 pand %xmm6,%xmm2 489 por %xmm1,%xmm0 490 pand %xmm7,%xmm3 491 492 imulq %r10,%rbp 493 movq %rdx,%r11 494 495 por %xmm2,%xmm0 496 leaq 256(%r12),%r12 497 por %xmm3,%xmm0 498 499 mulq %rbp 500 addq %rax,%r10 501 movq 8(%rsi),%rax 502 adcq $0,%rdx 503 movq %rdx,%rdi 504 505 mulq %rbx 506 addq %rax,%r11 507 movq 8(%rcx),%rax 508 adcq $0,%rdx 509 addq 8(%rsp),%r11 510 adcq $0,%rdx 511 movq %rdx,%r10 512 513 mulq %rbp 514 addq %rax,%rdi 515 movq 16(%rsi),%rax 516 adcq $0,%rdx 517 addq %r11,%rdi 518 leaq 4(%r15),%r15 519 adcq $0,%rdx 520 movq %rdx,%r13 521 jmp .Linner4x 522.align 16 523.Linner4x: 524 mulq %rbx 525 addq %rax,%r10 526 movq -16(%rcx,%r15,8),%rax 527 adcq $0,%rdx 528 addq -16(%rsp,%r15,8),%r10 529 adcq $0,%rdx 530 movq %rdx,%r11 531 532 mulq %rbp 533 addq %rax,%r13 534 movq -8(%rsi,%r15,8),%rax 535 adcq $0,%rdx 536 addq %r10,%r13 537 adcq $0,%rdx 538 movq %rdi,-32(%rsp,%r15,8) 539 movq %rdx,%rdi 540 541 mulq %rbx 542 addq %rax,%r11 543 movq -8(%rcx,%r15,8),%rax 544 adcq $0,%rdx 545 addq -8(%rsp,%r15,8),%r11 546 adcq $0,%rdx 547 movq %rdx,%r10 548 549 mulq %rbp 550 addq %rax,%rdi 551 movq (%rsi,%r15,8),%rax 552 adcq $0,%rdx 553 addq %r11,%rdi 554 adcq $0,%rdx 555 movq %r13,-24(%rsp,%r15,8) 556 movq %rdx,%r13 557 558 mulq %rbx 559 addq %rax,%r10 560 movq (%rcx,%r15,8),%rax 561 adcq $0,%rdx 562 addq (%rsp,%r15,8),%r10 563 adcq $0,%rdx 564 movq %rdx,%r11 565 566 mulq %rbp 567 addq %rax,%r13 568 movq 8(%rsi,%r15,8),%rax 569 adcq $0,%rdx 570 addq %r10,%r13 571 adcq $0,%rdx 572 movq %rdi,-16(%rsp,%r15,8) 573 movq %rdx,%rdi 574 575 mulq %rbx 576 addq %rax,%r11 577 movq 8(%rcx,%r15,8),%rax 578 adcq $0,%rdx 579 addq 8(%rsp,%r15,8),%r11 580 adcq $0,%rdx 581 leaq 4(%r15),%r15 582 movq %rdx,%r10 583 584 mulq %rbp 585 addq %rax,%rdi 586 movq -16(%rsi,%r15,8),%rax 587 adcq $0,%rdx 588 addq %r11,%rdi 589 adcq $0,%rdx 590 movq %r13,-40(%rsp,%r15,8) 591 movq %rdx,%r13 592 cmpq %r9,%r15 593 jl .Linner4x 594 595 mulq %rbx 596 addq %rax,%r10 597 movq -16(%rcx,%r15,8),%rax 598 adcq $0,%rdx 599 addq -16(%rsp,%r15,8),%r10 600 adcq $0,%rdx 601 movq %rdx,%r11 602 603 mulq %rbp 604 addq %rax,%r13 605 movq -8(%rsi,%r15,8),%rax 606 adcq $0,%rdx 607 addq %r10,%r13 608 adcq $0,%rdx 609 movq %rdi,-32(%rsp,%r15,8) 610 movq %rdx,%rdi 611 612 mulq %rbx 613 addq %rax,%r11 614 movq -8(%rcx,%r15,8),%rax 615 adcq $0,%rdx 616 addq -8(%rsp,%r15,8),%r11 617 adcq $0,%rdx 618 leaq 1(%r14),%r14 619 movq %rdx,%r10 620 621 mulq %rbp 622 addq %rax,%rdi 623 movq (%rsi),%rax 624 adcq $0,%rdx 625 addq %r11,%rdi 626 adcq $0,%rdx 627 movq %r13,-24(%rsp,%r15,8) 628 movq %rdx,%r13 629 630.byte 102,72,15,126,195 631 movq %rdi,-16(%rsp,%r15,8) 632 633 xorq %rdi,%rdi 634 addq %r10,%r13 635 adcq $0,%rdi 636 addq (%rsp,%r9,8),%r13 637 adcq $0,%rdi 638 movq %r13,-8(%rsp,%r15,8) 639 movq %rdi,(%rsp,%r15,8) 640 641 cmpq %r9,%r14 642 jl .Louter4x 643 movq 16(%rsp,%r9,8),%rdi 644 movq 0(%rsp),%rax 645 pxor %xmm0,%xmm0 646 movq 8(%rsp),%rdx 647 shrq $2,%r9 648 leaq (%rsp),%rsi 649 xorq %r14,%r14 650 651 subq 0(%rcx),%rax 652 movq 16(%rsi),%rbx 653 movq 24(%rsi),%rbp 654 sbbq 8(%rcx),%rdx 655 leaq -1(%r9),%r15 656 jmp .Lsub4x 657.align 16 658.Lsub4x: 659 movq %rax,0(%rdi,%r14,8) 660 movq %rdx,8(%rdi,%r14,8) 661 sbbq 16(%rcx,%r14,8),%rbx 662 movq 32(%rsi,%r14,8),%rax 663 movq 40(%rsi,%r14,8),%rdx 664 sbbq 24(%rcx,%r14,8),%rbp 665 movq %rbx,16(%rdi,%r14,8) 666 movq %rbp,24(%rdi,%r14,8) 667 sbbq 32(%rcx,%r14,8),%rax 668 movq 48(%rsi,%r14,8),%rbx 669 movq 56(%rsi,%r14,8),%rbp 670 sbbq 40(%rcx,%r14,8),%rdx 671 leaq 4(%r14),%r14 672 decq %r15 673 jnz .Lsub4x 674 675 movq %rax,0(%rdi,%r14,8) 676 movq 32(%rsi,%r14,8),%rax 677 sbbq 16(%rcx,%r14,8),%rbx 678 movq %rdx,8(%rdi,%r14,8) 679 sbbq 24(%rcx,%r14,8),%rbp 680 movq %rbx,16(%rdi,%r14,8) 681 682 sbbq $0,%rax 683 movq %rbp,24(%rdi,%r14,8) 684 xorq %r14,%r14 685 andq %rax,%rsi 686 notq %rax 687 movq %rdi,%rcx 688 andq %rax,%rcx 689 leaq -1(%r9),%r15 690 orq %rcx,%rsi 691 692 movdqu (%rsi),%xmm1 693 movdqa %xmm0,(%rsp) 694 movdqu %xmm1,(%rdi) 695 jmp .Lcopy4x 696.align 16 697.Lcopy4x: 698 movdqu 16(%rsi,%r14,1),%xmm2 699 movdqu 32(%rsi,%r14,1),%xmm1 700 movdqa %xmm0,16(%rsp,%r14,1) 701 movdqu %xmm2,16(%rdi,%r14,1) 702 movdqa %xmm0,32(%rsp,%r14,1) 703 movdqu %xmm1,32(%rdi,%r14,1) 704 leaq 32(%r14),%r14 705 decq %r15 706 jnz .Lcopy4x 707 708 shlq $2,%r9 709 movdqu 16(%rsi,%r14,1),%xmm2 710 movdqa %xmm0,16(%rsp,%r14,1) 711 movdqu %xmm2,16(%rdi,%r14,1) 712 movq 8(%rsp,%r9,8),%rsi 713 movq $1,%rax 714 movq (%rsi),%r15 715 movq 8(%rsi),%r14 716 movq 16(%rsi),%r13 717 movq 24(%rsi),%r12 718 movq 32(%rsi),%rbp 719 movq 40(%rsi),%rbx 720 leaq 48(%rsi),%rsp 721.Lmul4x_epilogue: 722 .byte 0xf3,0xc3 723.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 724.globl bn_scatter5 725.type bn_scatter5,@function 726.align 16 727bn_scatter5: 728 cmpq $0,%rsi 729 jz .Lscatter_epilogue 730 leaq (%rdx,%rcx,8),%rdx 731.Lscatter: 732 movq (%rdi),%rax 733 leaq 8(%rdi),%rdi 734 movq %rax,(%rdx) 735 leaq 256(%rdx),%rdx 736 subq $1,%rsi 737 jnz .Lscatter 738.Lscatter_epilogue: 739 .byte 0xf3,0xc3 740.size bn_scatter5,.-bn_scatter5 741 742.globl bn_gather5 743.type bn_gather5,@function 744.align 16 745bn_gather5: 746 movq %rcx,%r11 747 shrq $3,%rcx 748 andq $7,%r11 749 notq %rcx 750 leaq .Lmagic_masks(%rip),%rax 751 andq $3,%rcx 752 leaq 96(%rdx,%r11,8),%rdx 753 movq 0(%rax,%rcx,8),%xmm4 754 movq 8(%rax,%rcx,8),%xmm5 755 movq 16(%rax,%rcx,8),%xmm6 756 movq 24(%rax,%rcx,8),%xmm7 757 jmp .Lgather 758.align 16 759.Lgather: 760 movq -96(%rdx),%xmm0 761 movq -32(%rdx),%xmm1 762 pand %xmm4,%xmm0 763 movq 32(%rdx),%xmm2 764 pand %xmm5,%xmm1 765 movq 96(%rdx),%xmm3 766 pand %xmm6,%xmm2 767 por %xmm1,%xmm0 768 pand %xmm7,%xmm3 769 por %xmm2,%xmm0 770 leaq 256(%rdx),%rdx 771 por %xmm3,%xmm0 772 773 movq %xmm0,(%rdi) 774 leaq 8(%rdi),%rdi 775 subq $1,%rsi 776 jnz .Lgather 777 .byte 0xf3,0xc3 778.LSEH_end_bn_gather5: 779.size bn_gather5,.-bn_gather5 780.align 64 781.Lmagic_masks: 782.long 0,0, 0,0, 0,0, -1,-1 783.long 0,0, 0,0, 0,0, 0,0 784.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 785