1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29.type _vpaes_encrypt_core,@function 30.align 16 31_vpaes_encrypt_core: 32.cfi_startproc 33 movq %rdx,%r9 34 movq $16,%r11 35 movl 240(%rdx),%eax 36 movdqa %xmm9,%xmm1 37 movdqa .Lk_ipt(%rip),%xmm2 38 pandn %xmm0,%xmm1 39 movdqu (%r9),%xmm5 40 psrld $4,%xmm1 41 pand %xmm9,%xmm0 42.byte 102,15,56,0,208 43 movdqa .Lk_ipt+16(%rip),%xmm0 44.byte 102,15,56,0,193 45 pxor %xmm5,%xmm2 46 addq $16,%r9 47 pxor %xmm2,%xmm0 48 leaq .Lk_mc_backward(%rip),%r10 49 jmp .Lenc_entry 50 51.align 16 52.Lenc_loop: 53 54 movdqa %xmm13,%xmm4 55 movdqa %xmm12,%xmm0 56.byte 102,15,56,0,226 57.byte 102,15,56,0,195 58 pxor %xmm5,%xmm4 59 movdqa %xmm15,%xmm5 60 pxor %xmm4,%xmm0 61 movdqa -64(%r11,%r10,1),%xmm1 62.byte 102,15,56,0,234 63 movdqa (%r11,%r10,1),%xmm4 64 movdqa %xmm14,%xmm2 65.byte 102,15,56,0,211 66 movdqa %xmm0,%xmm3 67 pxor %xmm5,%xmm2 68.byte 102,15,56,0,193 69 addq $16,%r9 70 pxor %xmm2,%xmm0 71.byte 102,15,56,0,220 72 addq $16,%r11 73 pxor %xmm0,%xmm3 74.byte 102,15,56,0,193 75 andq $0x30,%r11 76 subq $1,%rax 77 pxor %xmm3,%xmm0 78 79.Lenc_entry: 80 81 movdqa %xmm9,%xmm1 82 movdqa %xmm11,%xmm5 83 pandn %xmm0,%xmm1 84 psrld $4,%xmm1 85 pand %xmm9,%xmm0 86.byte 102,15,56,0,232 87 movdqa %xmm10,%xmm3 88 pxor %xmm1,%xmm0 89.byte 102,15,56,0,217 90 movdqa %xmm10,%xmm4 91 pxor %xmm5,%xmm3 92.byte 102,15,56,0,224 93 movdqa %xmm10,%xmm2 94 pxor %xmm5,%xmm4 95.byte 102,15,56,0,211 96 movdqa %xmm10,%xmm3 97 pxor %xmm0,%xmm2 98.byte 102,15,56,0,220 99 movdqu (%r9),%xmm5 100 pxor %xmm1,%xmm3 101 jnz .Lenc_loop 102 103 104 movdqa -96(%r10),%xmm4 105 movdqa -80(%r10),%xmm0 106.byte 102,15,56,0,226 107 pxor %xmm5,%xmm4 108.byte 102,15,56,0,195 109 movdqa 64(%r11,%r10,1),%xmm1 110 pxor %xmm4,%xmm0 111.byte 102,15,56,0,193 112 .byte 0xf3,0xc3 113.cfi_endproc 114.size _vpaes_encrypt_core,.-_vpaes_encrypt_core 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145.type _vpaes_encrypt_core_2x,@function 146.align 16 147_vpaes_encrypt_core_2x: 148.cfi_startproc 149 movq %rdx,%r9 150 movq $16,%r11 151 movl 240(%rdx),%eax 152 movdqa %xmm9,%xmm1 153 movdqa %xmm9,%xmm7 154 movdqa .Lk_ipt(%rip),%xmm2 155 movdqa %xmm2,%xmm8 156 pandn %xmm0,%xmm1 157 pandn %xmm6,%xmm7 158 movdqu (%r9),%xmm5 159 160 psrld $4,%xmm1 161 psrld $4,%xmm7 162 pand %xmm9,%xmm0 163 pand %xmm9,%xmm6 164.byte 102,15,56,0,208 165.byte 102,68,15,56,0,198 166 movdqa .Lk_ipt+16(%rip),%xmm0 167 movdqa %xmm0,%xmm6 168.byte 102,15,56,0,193 169.byte 102,15,56,0,247 170 pxor %xmm5,%xmm2 171 pxor %xmm5,%xmm8 172 addq $16,%r9 173 pxor %xmm2,%xmm0 174 pxor %xmm8,%xmm6 175 leaq .Lk_mc_backward(%rip),%r10 176 jmp .Lenc2x_entry 177 178.align 16 179.Lenc2x_loop: 180 181 movdqa .Lk_sb1(%rip),%xmm4 182 movdqa .Lk_sb1+16(%rip),%xmm0 183 movdqa %xmm4,%xmm12 184 movdqa %xmm0,%xmm6 185.byte 102,15,56,0,226 186.byte 102,69,15,56,0,224 187.byte 102,15,56,0,195 188.byte 102,65,15,56,0,243 189 pxor %xmm5,%xmm4 190 pxor %xmm5,%xmm12 191 movdqa .Lk_sb2(%rip),%xmm5 192 movdqa %xmm5,%xmm13 193 pxor %xmm4,%xmm0 194 pxor %xmm12,%xmm6 195 movdqa -64(%r11,%r10,1),%xmm1 196 197.byte 102,15,56,0,234 198.byte 102,69,15,56,0,232 199 movdqa (%r11,%r10,1),%xmm4 200 201 movdqa .Lk_sb2+16(%rip),%xmm2 202 movdqa %xmm2,%xmm8 203.byte 102,15,56,0,211 204.byte 102,69,15,56,0,195 205 movdqa %xmm0,%xmm3 206 movdqa %xmm6,%xmm11 207 pxor %xmm5,%xmm2 208 pxor %xmm13,%xmm8 209.byte 102,15,56,0,193 210.byte 102,15,56,0,241 211 addq $16,%r9 212 pxor %xmm2,%xmm0 213 pxor %xmm8,%xmm6 214.byte 102,15,56,0,220 215.byte 102,68,15,56,0,220 216 addq $16,%r11 217 pxor %xmm0,%xmm3 218 pxor %xmm6,%xmm11 219.byte 102,15,56,0,193 220.byte 102,15,56,0,241 221 andq $0x30,%r11 222 subq $1,%rax 223 pxor %xmm3,%xmm0 224 pxor %xmm11,%xmm6 225 226.Lenc2x_entry: 227 228 movdqa %xmm9,%xmm1 229 movdqa %xmm9,%xmm7 230 movdqa .Lk_inv+16(%rip),%xmm5 231 movdqa %xmm5,%xmm13 232 pandn %xmm0,%xmm1 233 pandn %xmm6,%xmm7 234 psrld $4,%xmm1 235 psrld $4,%xmm7 236 pand %xmm9,%xmm0 237 pand %xmm9,%xmm6 238.byte 102,15,56,0,232 239.byte 102,68,15,56,0,238 240 movdqa %xmm10,%xmm3 241 movdqa %xmm10,%xmm11 242 pxor %xmm1,%xmm0 243 pxor %xmm7,%xmm6 244.byte 102,15,56,0,217 245.byte 102,68,15,56,0,223 246 movdqa %xmm10,%xmm4 247 movdqa %xmm10,%xmm12 248 pxor %xmm5,%xmm3 249 pxor %xmm13,%xmm11 250.byte 102,15,56,0,224 251.byte 102,68,15,56,0,230 252 movdqa %xmm10,%xmm2 253 movdqa %xmm10,%xmm8 254 pxor %xmm5,%xmm4 255 pxor %xmm13,%xmm12 256.byte 102,15,56,0,211 257.byte 102,69,15,56,0,195 258 movdqa %xmm10,%xmm3 259 movdqa %xmm10,%xmm11 260 pxor %xmm0,%xmm2 261 pxor %xmm6,%xmm8 262.byte 102,15,56,0,220 263.byte 102,69,15,56,0,220 264 movdqu (%r9),%xmm5 265 266 pxor %xmm1,%xmm3 267 pxor %xmm7,%xmm11 268 jnz .Lenc2x_loop 269 270 271 movdqa -96(%r10),%xmm4 272 movdqa -80(%r10),%xmm0 273 movdqa %xmm4,%xmm12 274 movdqa %xmm0,%xmm6 275.byte 102,15,56,0,226 276.byte 102,69,15,56,0,224 277 pxor %xmm5,%xmm4 278 pxor %xmm5,%xmm12 279.byte 102,15,56,0,195 280.byte 102,65,15,56,0,243 281 movdqa 64(%r11,%r10,1),%xmm1 282 283 pxor %xmm4,%xmm0 284 pxor %xmm12,%xmm6 285.byte 102,15,56,0,193 286.byte 102,15,56,0,241 287 .byte 0xf3,0xc3 288.cfi_endproc 289.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x 290 291 292 293 294 295 296.type _vpaes_schedule_core,@function 297.align 16 298_vpaes_schedule_core: 299.cfi_startproc 300 301 302 303 304 305 call _vpaes_preheat 306 movdqa .Lk_rcon(%rip),%xmm8 307 movdqu (%rdi),%xmm0 308 309 310 movdqa %xmm0,%xmm3 311 leaq .Lk_ipt(%rip),%r11 312 call _vpaes_schedule_transform 313 movdqa %xmm0,%xmm7 314 315 leaq .Lk_sr(%rip),%r10 316 317 318 movdqu %xmm0,(%rdx) 319 320.Lschedule_go: 321 cmpl $192,%esi 322 ja .Lschedule_256 323 324 325 326 327 328 329 330 331 332 333 334.Lschedule_128: 335 movl $10,%esi 336 337.Loop_schedule_128: 338 call _vpaes_schedule_round 339 decq %rsi 340 jz .Lschedule_mangle_last 341 call _vpaes_schedule_mangle 342 jmp .Loop_schedule_128 343 344 345 346 347 348 349 350 351 352 353 354.align 16 355.Lschedule_256: 356 movdqu 16(%rdi),%xmm0 357 call _vpaes_schedule_transform 358 movl $7,%esi 359 360.Loop_schedule_256: 361 call _vpaes_schedule_mangle 362 movdqa %xmm0,%xmm6 363 364 365 call _vpaes_schedule_round 366 decq %rsi 367 jz .Lschedule_mangle_last 368 call _vpaes_schedule_mangle 369 370 371 pshufd $0xFF,%xmm0,%xmm0 372 movdqa %xmm7,%xmm5 373 movdqa %xmm6,%xmm7 374 call _vpaes_schedule_low_round 375 movdqa %xmm5,%xmm7 376 377 jmp .Loop_schedule_256 378 379 380 381 382 383 384 385 386 387 388 389 390.align 16 391.Lschedule_mangle_last: 392 393 leaq .Lk_deskew(%rip),%r11 394 395 396 movdqa (%r8,%r10,1),%xmm1 397.byte 102,15,56,0,193 398 leaq .Lk_opt(%rip),%r11 399 addq $32,%rdx 400 401.Lschedule_mangle_last_dec: 402 addq $-16,%rdx 403 pxor .Lk_s63(%rip),%xmm0 404 call _vpaes_schedule_transform 405 movdqu %xmm0,(%rdx) 406 407 408 pxor %xmm0,%xmm0 409 pxor %xmm1,%xmm1 410 pxor %xmm2,%xmm2 411 pxor %xmm3,%xmm3 412 pxor %xmm4,%xmm4 413 pxor %xmm5,%xmm5 414 pxor %xmm6,%xmm6 415 pxor %xmm7,%xmm7 416 .byte 0xf3,0xc3 417.cfi_endproc 418.size _vpaes_schedule_core,.-_vpaes_schedule_core 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438.type _vpaes_schedule_round,@function 439.align 16 440_vpaes_schedule_round: 441.cfi_startproc 442 443 pxor %xmm1,%xmm1 444.byte 102,65,15,58,15,200,15 445.byte 102,69,15,58,15,192,15 446 pxor %xmm1,%xmm7 447 448 449 pshufd $0xFF,%xmm0,%xmm0 450.byte 102,15,58,15,192,1 451 452 453 454 455_vpaes_schedule_low_round: 456 457 movdqa %xmm7,%xmm1 458 pslldq $4,%xmm7 459 pxor %xmm1,%xmm7 460 movdqa %xmm7,%xmm1 461 pslldq $8,%xmm7 462 pxor %xmm1,%xmm7 463 pxor .Lk_s63(%rip),%xmm7 464 465 466 movdqa %xmm9,%xmm1 467 pandn %xmm0,%xmm1 468 psrld $4,%xmm1 469 pand %xmm9,%xmm0 470 movdqa %xmm11,%xmm2 471.byte 102,15,56,0,208 472 pxor %xmm1,%xmm0 473 movdqa %xmm10,%xmm3 474.byte 102,15,56,0,217 475 pxor %xmm2,%xmm3 476 movdqa %xmm10,%xmm4 477.byte 102,15,56,0,224 478 pxor %xmm2,%xmm4 479 movdqa %xmm10,%xmm2 480.byte 102,15,56,0,211 481 pxor %xmm0,%xmm2 482 movdqa %xmm10,%xmm3 483.byte 102,15,56,0,220 484 pxor %xmm1,%xmm3 485 movdqa %xmm13,%xmm4 486.byte 102,15,56,0,226 487 movdqa %xmm12,%xmm0 488.byte 102,15,56,0,195 489 pxor %xmm4,%xmm0 490 491 492 pxor %xmm7,%xmm0 493 movdqa %xmm0,%xmm7 494 .byte 0xf3,0xc3 495.cfi_endproc 496.size _vpaes_schedule_round,.-_vpaes_schedule_round 497 498 499 500 501 502 503 504 505 506 507.type _vpaes_schedule_transform,@function 508.align 16 509_vpaes_schedule_transform: 510.cfi_startproc 511 movdqa %xmm9,%xmm1 512 pandn %xmm0,%xmm1 513 psrld $4,%xmm1 514 pand %xmm9,%xmm0 515 movdqa (%r11),%xmm2 516.byte 102,15,56,0,208 517 movdqa 16(%r11),%xmm0 518.byte 102,15,56,0,193 519 pxor %xmm2,%xmm0 520 .byte 0xf3,0xc3 521.cfi_endproc 522.size _vpaes_schedule_transform,.-_vpaes_schedule_transform 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547.type _vpaes_schedule_mangle,@function 548.align 16 549_vpaes_schedule_mangle: 550.cfi_startproc 551 movdqa %xmm0,%xmm4 552 movdqa .Lk_mc_forward(%rip),%xmm5 553 554 555 addq $16,%rdx 556 pxor .Lk_s63(%rip),%xmm4 557.byte 102,15,56,0,229 558 movdqa %xmm4,%xmm3 559.byte 102,15,56,0,229 560 pxor %xmm4,%xmm3 561.byte 102,15,56,0,229 562 pxor %xmm4,%xmm3 563 564.Lschedule_mangle_both: 565 movdqa (%r8,%r10,1),%xmm1 566.byte 102,15,56,0,217 567 addq $-16,%r8 568 andq $0x30,%r8 569 movdqu %xmm3,(%rdx) 570 .byte 0xf3,0xc3 571.cfi_endproc 572.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle 573 574 575 576 577.globl vpaes_set_encrypt_key 578.hidden vpaes_set_encrypt_key 579.type vpaes_set_encrypt_key,@function 580.align 16 581vpaes_set_encrypt_key: 582.cfi_startproc 583#ifdef BORINGSSL_DISPATCH_TEST 584.extern BORINGSSL_function_hit 585.hidden BORINGSSL_function_hit 586 movb $1,BORINGSSL_function_hit+5(%rip) 587#endif 588 589 movl %esi,%eax 590 shrl $5,%eax 591 addl $5,%eax 592 movl %eax,240(%rdx) 593 594 movl $0,%ecx 595 movl $0x30,%r8d 596 call _vpaes_schedule_core 597 xorl %eax,%eax 598 .byte 0xf3,0xc3 599.cfi_endproc 600.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key 601 602.globl vpaes_encrypt 603.hidden vpaes_encrypt 604.type vpaes_encrypt,@function 605.align 16 606vpaes_encrypt: 607.cfi_startproc 608 movdqu (%rdi),%xmm0 609 call _vpaes_preheat 610 call _vpaes_encrypt_core 611 movdqu %xmm0,(%rsi) 612 .byte 0xf3,0xc3 613.cfi_endproc 614.size vpaes_encrypt,.-vpaes_encrypt 615.globl vpaes_ctr32_encrypt_blocks 616.hidden vpaes_ctr32_encrypt_blocks 617.type vpaes_ctr32_encrypt_blocks,@function 618.align 16 619vpaes_ctr32_encrypt_blocks: 620.cfi_startproc 621 622 xchgq %rcx,%rdx 623 testq %rcx,%rcx 624 jz .Lctr32_abort 625 movdqu (%r8),%xmm0 626 movdqa .Lctr_add_one(%rip),%xmm8 627 subq %rdi,%rsi 628 call _vpaes_preheat 629 movdqa %xmm0,%xmm6 630 pshufb .Lrev_ctr(%rip),%xmm6 631 632 testq $1,%rcx 633 jz .Lctr32_prep_loop 634 635 636 637 movdqu (%rdi),%xmm7 638 call _vpaes_encrypt_core 639 pxor %xmm7,%xmm0 640 paddd %xmm8,%xmm6 641 movdqu %xmm0,(%rsi,%rdi,1) 642 subq $1,%rcx 643 leaq 16(%rdi),%rdi 644 jz .Lctr32_done 645 646.Lctr32_prep_loop: 647 648 649 movdqa %xmm6,%xmm14 650 movdqa %xmm6,%xmm15 651 paddd %xmm8,%xmm15 652 653.Lctr32_loop: 654 movdqa .Lrev_ctr(%rip),%xmm1 655 movdqa %xmm14,%xmm0 656 movdqa %xmm15,%xmm6 657.byte 102,15,56,0,193 658.byte 102,15,56,0,241 659 call _vpaes_encrypt_core_2x 660 movdqu (%rdi),%xmm1 661 movdqu 16(%rdi),%xmm2 662 movdqa .Lctr_add_two(%rip),%xmm3 663 pxor %xmm1,%xmm0 664 pxor %xmm2,%xmm6 665 paddd %xmm3,%xmm14 666 paddd %xmm3,%xmm15 667 movdqu %xmm0,(%rsi,%rdi,1) 668 movdqu %xmm6,16(%rsi,%rdi,1) 669 subq $2,%rcx 670 leaq 32(%rdi),%rdi 671 jnz .Lctr32_loop 672 673.Lctr32_done: 674.Lctr32_abort: 675 .byte 0xf3,0xc3 676.cfi_endproc 677.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks 678 679 680 681 682 683 684.type _vpaes_preheat,@function 685.align 16 686_vpaes_preheat: 687.cfi_startproc 688 leaq .Lk_s0F(%rip),%r10 689 movdqa -32(%r10),%xmm10 690 movdqa -16(%r10),%xmm11 691 movdqa 0(%r10),%xmm9 692 movdqa 48(%r10),%xmm13 693 movdqa 64(%r10),%xmm12 694 movdqa 80(%r10),%xmm15 695 movdqa 96(%r10),%xmm14 696 .byte 0xf3,0xc3 697.cfi_endproc 698.size _vpaes_preheat,.-_vpaes_preheat 699 700 701 702 703 704.type _vpaes_consts,@object 705.align 64 706_vpaes_consts: 707.Lk_inv: 708.quad 0x0E05060F0D080180, 0x040703090A0B0C02 709.quad 0x01040A060F0B0780, 0x030D0E0C02050809 710 711.Lk_s0F: 712.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F 713 714.Lk_ipt: 715.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 716.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 717 718.Lk_sb1: 719.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 720.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF 721.Lk_sb2: 722.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD 723.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A 724.Lk_sbo: 725.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 726.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA 727 728.Lk_mc_forward: 729.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 730.quad 0x080B0A0904070605, 0x000302010C0F0E0D 731.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 732.quad 0x000302010C0F0E0D, 0x080B0A0904070605 733 734.Lk_mc_backward: 735.quad 0x0605040702010003, 0x0E0D0C0F0A09080B 736.quad 0x020100030E0D0C0F, 0x0A09080B06050407 737.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 738.quad 0x0A09080B06050407, 0x020100030E0D0C0F 739 740.Lk_sr: 741.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 742.quad 0x030E09040F0A0500, 0x0B06010C07020D08 743.quad 0x0F060D040B020900, 0x070E050C030A0108 744.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 745 746.Lk_rcon: 747.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 748 749.Lk_s63: 750.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B 751 752.Lk_opt: 753.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 754.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 755 756.Lk_deskew: 757.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A 758.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 759 760 761.Lrev_ctr: 762.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 763 764 765.Lctr_add_one: 766.quad 0x0000000000000000, 0x0000000100000000 767.Lctr_add_two: 768.quad 0x0000000000000000, 0x0000000200000000 769 770.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 771.align 64 772.size _vpaes_consts,.-_vpaes_consts 773#endif 774.section .note.GNU-stack,"",@progbits 775