1#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 2.text 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20.p2align 4 21_vpaes_encrypt_core: 22 movq %rdx,%r9 23 movq $16,%r11 24 movl 240(%rdx),%eax 25 movdqa %xmm9,%xmm1 26 movdqa L$k_ipt(%rip),%xmm2 27 pandn %xmm0,%xmm1 28 movdqu (%r9),%xmm5 29 psrld $4,%xmm1 30 pand %xmm9,%xmm0 31.byte 102,15,56,0,208 32 movdqa L$k_ipt+16(%rip),%xmm0 33.byte 102,15,56,0,193 34 pxor %xmm5,%xmm2 35 addq $16,%r9 36 pxor %xmm2,%xmm0 37 leaq L$k_mc_backward(%rip),%r10 38 jmp L$enc_entry 39 40.p2align 4 41L$enc_loop: 42 43 movdqa %xmm13,%xmm4 44 movdqa %xmm12,%xmm0 45.byte 102,15,56,0,226 46.byte 102,15,56,0,195 47 pxor %xmm5,%xmm4 48 movdqa %xmm15,%xmm5 49 pxor %xmm4,%xmm0 50 movdqa -64(%r11,%r10,1),%xmm1 51.byte 102,15,56,0,234 52 movdqa (%r11,%r10,1),%xmm4 53 movdqa %xmm14,%xmm2 54.byte 102,15,56,0,211 55 movdqa %xmm0,%xmm3 56 pxor %xmm5,%xmm2 57.byte 102,15,56,0,193 58 addq $16,%r9 59 pxor %xmm2,%xmm0 60.byte 102,15,56,0,220 61 addq $16,%r11 62 pxor %xmm0,%xmm3 63.byte 102,15,56,0,193 64 andq $0x30,%r11 65 subq $1,%rax 66 pxor %xmm3,%xmm0 67 68L$enc_entry: 69 70 movdqa %xmm9,%xmm1 71 movdqa %xmm11,%xmm5 72 pandn %xmm0,%xmm1 73 psrld $4,%xmm1 74 pand %xmm9,%xmm0 75.byte 102,15,56,0,232 76 movdqa %xmm10,%xmm3 77 pxor %xmm1,%xmm0 78.byte 102,15,56,0,217 79 movdqa %xmm10,%xmm4 80 pxor %xmm5,%xmm3 81.byte 102,15,56,0,224 82 movdqa %xmm10,%xmm2 83 pxor %xmm5,%xmm4 84.byte 102,15,56,0,211 85 movdqa %xmm10,%xmm3 86 pxor %xmm0,%xmm2 87.byte 102,15,56,0,220 88 movdqu (%r9),%xmm5 89 pxor %xmm1,%xmm3 90 jnz L$enc_loop 91 92 93 movdqa -96(%r10),%xmm4 94 movdqa -80(%r10),%xmm0 95.byte 102,15,56,0,226 96 pxor %xmm5,%xmm4 97.byte 102,15,56,0,195 98 movdqa 64(%r11,%r10,1),%xmm1 99 pxor %xmm4,%xmm0 100.byte 102,15,56,0,193 101 .byte 0xf3,0xc3 102 103 104 105 106 107 108 109 110.p2align 4 111_vpaes_decrypt_core: 112 movq %rdx,%r9 113 movl 240(%rdx),%eax 114 movdqa %xmm9,%xmm1 115 movdqa L$k_dipt(%rip),%xmm2 116 pandn %xmm0,%xmm1 117 movq %rax,%r11 118 psrld $4,%xmm1 119 movdqu (%r9),%xmm5 120 shlq $4,%r11 121 pand %xmm9,%xmm0 122.byte 102,15,56,0,208 123 movdqa L$k_dipt+16(%rip),%xmm0 124 xorq $0x30,%r11 125 leaq L$k_dsbd(%rip),%r10 126.byte 102,15,56,0,193 127 andq $0x30,%r11 128 pxor %xmm5,%xmm2 129 movdqa L$k_mc_forward+48(%rip),%xmm5 130 pxor %xmm2,%xmm0 131 addq $16,%r9 132 addq %r10,%r11 133 jmp L$dec_entry 134 135.p2align 4 136L$dec_loop: 137 138 139 140 movdqa -32(%r10),%xmm4 141 movdqa -16(%r10),%xmm1 142.byte 102,15,56,0,226 143.byte 102,15,56,0,203 144 pxor %xmm4,%xmm0 145 movdqa 0(%r10),%xmm4 146 pxor %xmm1,%xmm0 147 movdqa 16(%r10),%xmm1 148 149.byte 102,15,56,0,226 150.byte 102,15,56,0,197 151.byte 102,15,56,0,203 152 pxor %xmm4,%xmm0 153 movdqa 32(%r10),%xmm4 154 pxor %xmm1,%xmm0 155 movdqa 48(%r10),%xmm1 156 157.byte 102,15,56,0,226 158.byte 102,15,56,0,197 159.byte 102,15,56,0,203 160 pxor %xmm4,%xmm0 161 movdqa 64(%r10),%xmm4 162 pxor %xmm1,%xmm0 163 movdqa 80(%r10),%xmm1 164 165.byte 102,15,56,0,226 166.byte 102,15,56,0,197 167.byte 102,15,56,0,203 168 pxor %xmm4,%xmm0 169 addq $16,%r9 170.byte 102,15,58,15,237,12 171 pxor %xmm1,%xmm0 172 subq $1,%rax 173 174L$dec_entry: 175 176 movdqa %xmm9,%xmm1 177 pandn %xmm0,%xmm1 178 movdqa %xmm11,%xmm2 179 psrld $4,%xmm1 180 pand %xmm9,%xmm0 181.byte 102,15,56,0,208 182 movdqa %xmm10,%xmm3 183 pxor %xmm1,%xmm0 184.byte 102,15,56,0,217 185 movdqa %xmm10,%xmm4 186 pxor %xmm2,%xmm3 187.byte 102,15,56,0,224 188 pxor %xmm2,%xmm4 189 movdqa %xmm10,%xmm2 190.byte 102,15,56,0,211 191 movdqa %xmm10,%xmm3 192 pxor %xmm0,%xmm2 193.byte 102,15,56,0,220 194 movdqu (%r9),%xmm0 195 pxor %xmm1,%xmm3 196 jnz L$dec_loop 197 198 199 movdqa 96(%r10),%xmm4 200.byte 102,15,56,0,226 201 pxor %xmm0,%xmm4 202 movdqa 112(%r10),%xmm0 203 movdqa -352(%r11),%xmm2 204.byte 102,15,56,0,195 205 pxor %xmm4,%xmm0 206.byte 102,15,56,0,194 207 .byte 0xf3,0xc3 208 209 210 211 212 213 214 215 216.p2align 4 217_vpaes_schedule_core: 218 219 220 221 222 223 call _vpaes_preheat 224 movdqa L$k_rcon(%rip),%xmm8 225 movdqu (%rdi),%xmm0 226 227 228 movdqa %xmm0,%xmm3 229 leaq L$k_ipt(%rip),%r11 230 call _vpaes_schedule_transform 231 movdqa %xmm0,%xmm7 232 233 leaq L$k_sr(%rip),%r10 234 testq %rcx,%rcx 235 jnz L$schedule_am_decrypting 236 237 238 movdqu %xmm0,(%rdx) 239 jmp L$schedule_go 240 241L$schedule_am_decrypting: 242 243 movdqa (%r8,%r10,1),%xmm1 244.byte 102,15,56,0,217 245 movdqu %xmm3,(%rdx) 246 xorq $0x30,%r8 247 248L$schedule_go: 249 cmpl $192,%esi 250 ja L$schedule_256 251 je L$schedule_192 252 253 254 255 256 257 258 259 260 261 262L$schedule_128: 263 movl $10,%esi 264 265L$oop_schedule_128: 266 call _vpaes_schedule_round 267 decq %rsi 268 jz L$schedule_mangle_last 269 call _vpaes_schedule_mangle 270 jmp L$oop_schedule_128 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287.p2align 4 288L$schedule_192: 289 movdqu 8(%rdi),%xmm0 290 call _vpaes_schedule_transform 291 movdqa %xmm0,%xmm6 292 pxor %xmm4,%xmm4 293 movhlps %xmm4,%xmm6 294 movl $4,%esi 295 296L$oop_schedule_192: 297 call _vpaes_schedule_round 298.byte 102,15,58,15,198,8 299 call _vpaes_schedule_mangle 300 call _vpaes_schedule_192_smear 301 call _vpaes_schedule_mangle 302 call _vpaes_schedule_round 303 decq %rsi 304 jz L$schedule_mangle_last 305 call _vpaes_schedule_mangle 306 call _vpaes_schedule_192_smear 307 jmp L$oop_schedule_192 308 309 310 311 312 313 314 315 316 317 318 319.p2align 4 320L$schedule_256: 321 movdqu 16(%rdi),%xmm0 322 call _vpaes_schedule_transform 323 movl $7,%esi 324 325L$oop_schedule_256: 326 call _vpaes_schedule_mangle 327 movdqa %xmm0,%xmm6 328 329 330 call _vpaes_schedule_round 331 decq %rsi 332 jz L$schedule_mangle_last 333 call _vpaes_schedule_mangle 334 335 336 pshufd $0xFF,%xmm0,%xmm0 337 movdqa %xmm7,%xmm5 338 movdqa %xmm6,%xmm7 339 call _vpaes_schedule_low_round 340 movdqa %xmm5,%xmm7 341 342 jmp L$oop_schedule_256 343 344 345 346 347 348 349 350 351 352 353 354 355.p2align 4 356L$schedule_mangle_last: 357 358 leaq L$k_deskew(%rip),%r11 359 testq %rcx,%rcx 360 jnz L$schedule_mangle_last_dec 361 362 363 movdqa (%r8,%r10,1),%xmm1 364.byte 102,15,56,0,193 365 leaq L$k_opt(%rip),%r11 366 addq $32,%rdx 367 368L$schedule_mangle_last_dec: 369 addq $-16,%rdx 370 pxor L$k_s63(%rip),%xmm0 371 call _vpaes_schedule_transform 372 movdqu %xmm0,(%rdx) 373 374 375 pxor %xmm0,%xmm0 376 pxor %xmm1,%xmm1 377 pxor %xmm2,%xmm2 378 pxor %xmm3,%xmm3 379 pxor %xmm4,%xmm4 380 pxor %xmm5,%xmm5 381 pxor %xmm6,%xmm6 382 pxor %xmm7,%xmm7 383 .byte 0xf3,0xc3 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401.p2align 4 402_vpaes_schedule_192_smear: 403 pshufd $0x80,%xmm6,%xmm1 404 pshufd $0xFE,%xmm7,%xmm0 405 pxor %xmm1,%xmm6 406 pxor %xmm1,%xmm1 407 pxor %xmm0,%xmm6 408 movdqa %xmm6,%xmm0 409 movhlps %xmm1,%xmm6 410 .byte 0xf3,0xc3 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432.p2align 4 433_vpaes_schedule_round: 434 435 pxor %xmm1,%xmm1 436.byte 102,65,15,58,15,200,15 437.byte 102,69,15,58,15,192,15 438 pxor %xmm1,%xmm7 439 440 441 pshufd $0xFF,%xmm0,%xmm0 442.byte 102,15,58,15,192,1 443 444 445 446 447_vpaes_schedule_low_round: 448 449 movdqa %xmm7,%xmm1 450 pslldq $4,%xmm7 451 pxor %xmm1,%xmm7 452 movdqa %xmm7,%xmm1 453 pslldq $8,%xmm7 454 pxor %xmm1,%xmm7 455 pxor L$k_s63(%rip),%xmm7 456 457 458 movdqa %xmm9,%xmm1 459 pandn %xmm0,%xmm1 460 psrld $4,%xmm1 461 pand %xmm9,%xmm0 462 movdqa %xmm11,%xmm2 463.byte 102,15,56,0,208 464 pxor %xmm1,%xmm0 465 movdqa %xmm10,%xmm3 466.byte 102,15,56,0,217 467 pxor %xmm2,%xmm3 468 movdqa %xmm10,%xmm4 469.byte 102,15,56,0,224 470 pxor %xmm2,%xmm4 471 movdqa %xmm10,%xmm2 472.byte 102,15,56,0,211 473 pxor %xmm0,%xmm2 474 movdqa %xmm10,%xmm3 475.byte 102,15,56,0,220 476 pxor %xmm1,%xmm3 477 movdqa %xmm13,%xmm4 478.byte 102,15,56,0,226 479 movdqa %xmm12,%xmm0 480.byte 102,15,56,0,195 481 pxor %xmm4,%xmm0 482 483 484 pxor %xmm7,%xmm0 485 movdqa %xmm0,%xmm7 486 .byte 0xf3,0xc3 487 488 489 490 491 492 493 494 495 496 497 498 499.p2align 4 500_vpaes_schedule_transform: 501 movdqa %xmm9,%xmm1 502 pandn %xmm0,%xmm1 503 psrld $4,%xmm1 504 pand %xmm9,%xmm0 505 movdqa (%r11),%xmm2 506.byte 102,15,56,0,208 507 movdqa 16(%r11),%xmm0 508.byte 102,15,56,0,193 509 pxor %xmm2,%xmm0 510 .byte 0xf3,0xc3 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537.p2align 4 538_vpaes_schedule_mangle: 539 movdqa %xmm0,%xmm4 540 movdqa L$k_mc_forward(%rip),%xmm5 541 testq %rcx,%rcx 542 jnz L$schedule_mangle_dec 543 544 545 addq $16,%rdx 546 pxor L$k_s63(%rip),%xmm4 547.byte 102,15,56,0,229 548 movdqa %xmm4,%xmm3 549.byte 102,15,56,0,229 550 pxor %xmm4,%xmm3 551.byte 102,15,56,0,229 552 pxor %xmm4,%xmm3 553 554 jmp L$schedule_mangle_both 555.p2align 4 556L$schedule_mangle_dec: 557 558 leaq L$k_dksd(%rip),%r11 559 movdqa %xmm9,%xmm1 560 pandn %xmm4,%xmm1 561 psrld $4,%xmm1 562 pand %xmm9,%xmm4 563 564 movdqa 0(%r11),%xmm2 565.byte 102,15,56,0,212 566 movdqa 16(%r11),%xmm3 567.byte 102,15,56,0,217 568 pxor %xmm2,%xmm3 569.byte 102,15,56,0,221 570 571 movdqa 32(%r11),%xmm2 572.byte 102,15,56,0,212 573 pxor %xmm3,%xmm2 574 movdqa 48(%r11),%xmm3 575.byte 102,15,56,0,217 576 pxor %xmm2,%xmm3 577.byte 102,15,56,0,221 578 579 movdqa 64(%r11),%xmm2 580.byte 102,15,56,0,212 581 pxor %xmm3,%xmm2 582 movdqa 80(%r11),%xmm3 583.byte 102,15,56,0,217 584 pxor %xmm2,%xmm3 585.byte 102,15,56,0,221 586 587 movdqa 96(%r11),%xmm2 588.byte 102,15,56,0,212 589 pxor %xmm3,%xmm2 590 movdqa 112(%r11),%xmm3 591.byte 102,15,56,0,217 592 pxor %xmm2,%xmm3 593 594 addq $-16,%rdx 595 596L$schedule_mangle_both: 597 movdqa (%r8,%r10,1),%xmm1 598.byte 102,15,56,0,217 599 addq $-16,%r8 600 andq $0x30,%r8 601 movdqu %xmm3,(%rdx) 602 .byte 0xf3,0xc3 603 604 605 606 607 608.globl _vpaes_set_encrypt_key 609.private_extern _vpaes_set_encrypt_key 610 611.p2align 4 612_vpaes_set_encrypt_key: 613 movl %esi,%eax 614 shrl $5,%eax 615 addl $5,%eax 616 movl %eax,240(%rdx) 617 618 movl $0,%ecx 619 movl $0x30,%r8d 620 call _vpaes_schedule_core 621 xorl %eax,%eax 622 .byte 0xf3,0xc3 623 624 625.globl _vpaes_set_decrypt_key 626.private_extern _vpaes_set_decrypt_key 627 628.p2align 4 629_vpaes_set_decrypt_key: 630 movl %esi,%eax 631 shrl $5,%eax 632 addl $5,%eax 633 movl %eax,240(%rdx) 634 shll $4,%eax 635 leaq 16(%rdx,%rax,1),%rdx 636 637 movl $1,%ecx 638 movl %esi,%r8d 639 shrl $1,%r8d 640 andl $32,%r8d 641 xorl $32,%r8d 642 call _vpaes_schedule_core 643 xorl %eax,%eax 644 .byte 0xf3,0xc3 645 646 647.globl _vpaes_encrypt 648.private_extern _vpaes_encrypt 649 650.p2align 4 651_vpaes_encrypt: 652 movdqu (%rdi),%xmm0 653 call _vpaes_preheat 654 call _vpaes_encrypt_core 655 movdqu %xmm0,(%rsi) 656 .byte 0xf3,0xc3 657 658 659.globl _vpaes_decrypt 660.private_extern _vpaes_decrypt 661 662.p2align 4 663_vpaes_decrypt: 664 movdqu (%rdi),%xmm0 665 call _vpaes_preheat 666 call _vpaes_decrypt_core 667 movdqu %xmm0,(%rsi) 668 .byte 0xf3,0xc3 669 670.globl _vpaes_cbc_encrypt 671.private_extern _vpaes_cbc_encrypt 672 673.p2align 4 674_vpaes_cbc_encrypt: 675 xchgq %rcx,%rdx 676 subq $16,%rcx 677 jc L$cbc_abort 678 movdqu (%r8),%xmm6 679 subq %rdi,%rsi 680 call _vpaes_preheat 681 cmpl $0,%r9d 682 je L$cbc_dec_loop 683 jmp L$cbc_enc_loop 684.p2align 4 685L$cbc_enc_loop: 686 movdqu (%rdi),%xmm0 687 pxor %xmm6,%xmm0 688 call _vpaes_encrypt_core 689 movdqa %xmm0,%xmm6 690 movdqu %xmm0,(%rsi,%rdi,1) 691 leaq 16(%rdi),%rdi 692 subq $16,%rcx 693 jnc L$cbc_enc_loop 694 jmp L$cbc_done 695.p2align 4 696L$cbc_dec_loop: 697 movdqu (%rdi),%xmm0 698 movdqa %xmm0,%xmm7 699 call _vpaes_decrypt_core 700 pxor %xmm6,%xmm0 701 movdqa %xmm7,%xmm6 702 movdqu %xmm0,(%rsi,%rdi,1) 703 leaq 16(%rdi),%rdi 704 subq $16,%rcx 705 jnc L$cbc_dec_loop 706L$cbc_done: 707 movdqu %xmm6,(%r8) 708L$cbc_abort: 709 .byte 0xf3,0xc3 710 711 712 713 714 715 716 717 718.p2align 4 719_vpaes_preheat: 720 leaq L$k_s0F(%rip),%r10 721 movdqa -32(%r10),%xmm10 722 movdqa -16(%r10),%xmm11 723 movdqa 0(%r10),%xmm9 724 movdqa 48(%r10),%xmm13 725 movdqa 64(%r10),%xmm12 726 movdqa 80(%r10),%xmm15 727 movdqa 96(%r10),%xmm14 728 .byte 0xf3,0xc3 729 730 731 732 733 734 735 736.p2align 6 737_vpaes_consts: 738L$k_inv: 739.quad 0x0E05060F0D080180, 0x040703090A0B0C02 740.quad 0x01040A060F0B0780, 0x030D0E0C02050809 741 742L$k_s0F: 743.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F 744 745L$k_ipt: 746.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 747.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 748 749L$k_sb1: 750.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 751.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF 752L$k_sb2: 753.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD 754.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A 755L$k_sbo: 756.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 757.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA 758 759L$k_mc_forward: 760.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 761.quad 0x080B0A0904070605, 0x000302010C0F0E0D 762.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 763.quad 0x000302010C0F0E0D, 0x080B0A0904070605 764 765L$k_mc_backward: 766.quad 0x0605040702010003, 0x0E0D0C0F0A09080B 767.quad 0x020100030E0D0C0F, 0x0A09080B06050407 768.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 769.quad 0x0A09080B06050407, 0x020100030E0D0C0F 770 771L$k_sr: 772.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 773.quad 0x030E09040F0A0500, 0x0B06010C07020D08 774.quad 0x0F060D040B020900, 0x070E050C030A0108 775.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 776 777L$k_rcon: 778.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 779 780L$k_s63: 781.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B 782 783L$k_opt: 784.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 785.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 786 787L$k_deskew: 788.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A 789.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 790 791 792 793 794 795L$k_dksd: 796.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 797.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E 798L$k_dksb: 799.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 800.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 801L$k_dkse: 802.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 803.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 804L$k_dks9: 805.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC 806.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE 807 808 809 810 811 812L$k_dipt: 813.quad 0x0F505B040B545F00, 0x154A411E114E451A 814.quad 0x86E383E660056500, 0x12771772F491F194 815 816L$k_dsb9: 817.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 818.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 819L$k_dsbd: 820.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 821.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 822L$k_dsbb: 823.quad 0xD022649296B44200, 0x602646F6B0F2D404 824.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B 825L$k_dsbe: 826.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 827.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 828L$k_dsbo: 829.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D 830.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C 831.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 832.p2align 6 833 834#endif 835