1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15.extern OPENSSL_ia32cap_P 16.hidden OPENSSL_ia32cap_P 17.globl aes_hw_encrypt 18.hidden aes_hw_encrypt 19.type aes_hw_encrypt,@function 20.align 16 21aes_hw_encrypt: 22.cfi_startproc 23#ifdef BORINGSSL_DISPATCH_TEST 24.extern BORINGSSL_function_hit 25.hidden BORINGSSL_function_hit 26 movb $1,BORINGSSL_function_hit+1(%rip) 27#endif 28 movups (%rdi),%xmm2 29 movl 240(%rdx),%eax 30 movups (%rdx),%xmm0 31 movups 16(%rdx),%xmm1 32 leaq 32(%rdx),%rdx 33 xorps %xmm0,%xmm2 34.Loop_enc1_1: 35.byte 102,15,56,220,209 36 decl %eax 37 movups (%rdx),%xmm1 38 leaq 16(%rdx),%rdx 39 jnz .Loop_enc1_1 40.byte 102,15,56,221,209 41 pxor %xmm0,%xmm0 42 pxor %xmm1,%xmm1 43 movups %xmm2,(%rsi) 44 pxor %xmm2,%xmm2 45 .byte 0xf3,0xc3 46.cfi_endproc 47.size aes_hw_encrypt,.-aes_hw_encrypt 48 49.globl aes_hw_decrypt 50.hidden aes_hw_decrypt 51.type aes_hw_decrypt,@function 52.align 16 53aes_hw_decrypt: 54.cfi_startproc 55 movups (%rdi),%xmm2 56 movl 240(%rdx),%eax 57 movups (%rdx),%xmm0 58 movups 16(%rdx),%xmm1 59 leaq 32(%rdx),%rdx 60 xorps %xmm0,%xmm2 61.Loop_dec1_2: 62.byte 102,15,56,222,209 63 decl %eax 64 movups (%rdx),%xmm1 65 leaq 16(%rdx),%rdx 66 jnz .Loop_dec1_2 67.byte 102,15,56,223,209 68 pxor %xmm0,%xmm0 69 pxor %xmm1,%xmm1 70 movups %xmm2,(%rsi) 71 pxor %xmm2,%xmm2 72 .byte 0xf3,0xc3 73.cfi_endproc 74.size aes_hw_decrypt, .-aes_hw_decrypt 75.type _aesni_encrypt2,@function 76.align 16 77_aesni_encrypt2: 78.cfi_startproc 79 movups (%rcx),%xmm0 80 shll $4,%eax 81 movups 16(%rcx),%xmm1 82 xorps %xmm0,%xmm2 83 xorps %xmm0,%xmm3 84 movups 32(%rcx),%xmm0 85 leaq 32(%rcx,%rax,1),%rcx 86 negq %rax 87 addq $16,%rax 88 89.Lenc_loop2: 90.byte 102,15,56,220,209 91.byte 102,15,56,220,217 92 movups (%rcx,%rax,1),%xmm1 93 addq $32,%rax 94.byte 102,15,56,220,208 95.byte 102,15,56,220,216 96 movups -16(%rcx,%rax,1),%xmm0 97 jnz .Lenc_loop2 98 99.byte 102,15,56,220,209 100.byte 102,15,56,220,217 101.byte 102,15,56,221,208 102.byte 102,15,56,221,216 103 .byte 0xf3,0xc3 104.cfi_endproc 105.size _aesni_encrypt2,.-_aesni_encrypt2 106.type _aesni_decrypt2,@function 107.align 16 108_aesni_decrypt2: 109.cfi_startproc 110 movups (%rcx),%xmm0 111 shll $4,%eax 112 movups 16(%rcx),%xmm1 113 xorps %xmm0,%xmm2 114 xorps %xmm0,%xmm3 115 movups 32(%rcx),%xmm0 116 leaq 32(%rcx,%rax,1),%rcx 117 negq %rax 118 addq $16,%rax 119 120.Ldec_loop2: 121.byte 102,15,56,222,209 122.byte 102,15,56,222,217 123 movups (%rcx,%rax,1),%xmm1 124 addq $32,%rax 125.byte 102,15,56,222,208 126.byte 102,15,56,222,216 127 movups -16(%rcx,%rax,1),%xmm0 128 jnz .Ldec_loop2 129 130.byte 102,15,56,222,209 131.byte 102,15,56,222,217 132.byte 102,15,56,223,208 133.byte 102,15,56,223,216 134 .byte 0xf3,0xc3 135.cfi_endproc 136.size _aesni_decrypt2,.-_aesni_decrypt2 137.type _aesni_encrypt3,@function 138.align 16 139_aesni_encrypt3: 140.cfi_startproc 141 movups (%rcx),%xmm0 142 shll $4,%eax 143 movups 16(%rcx),%xmm1 144 xorps %xmm0,%xmm2 145 xorps %xmm0,%xmm3 146 xorps %xmm0,%xmm4 147 movups 32(%rcx),%xmm0 148 leaq 32(%rcx,%rax,1),%rcx 149 negq %rax 150 addq $16,%rax 151 152.Lenc_loop3: 153.byte 102,15,56,220,209 154.byte 102,15,56,220,217 155.byte 102,15,56,220,225 156 movups (%rcx,%rax,1),%xmm1 157 addq $32,%rax 158.byte 102,15,56,220,208 159.byte 102,15,56,220,216 160.byte 102,15,56,220,224 161 movups -16(%rcx,%rax,1),%xmm0 162 jnz .Lenc_loop3 163 164.byte 102,15,56,220,209 165.byte 102,15,56,220,217 166.byte 102,15,56,220,225 167.byte 102,15,56,221,208 168.byte 102,15,56,221,216 169.byte 102,15,56,221,224 170 .byte 0xf3,0xc3 171.cfi_endproc 172.size _aesni_encrypt3,.-_aesni_encrypt3 173.type _aesni_decrypt3,@function 174.align 16 175_aesni_decrypt3: 176.cfi_startproc 177 movups (%rcx),%xmm0 178 shll $4,%eax 179 movups 16(%rcx),%xmm1 180 xorps %xmm0,%xmm2 181 xorps %xmm0,%xmm3 182 xorps %xmm0,%xmm4 183 movups 32(%rcx),%xmm0 184 leaq 32(%rcx,%rax,1),%rcx 185 negq %rax 186 addq $16,%rax 187 188.Ldec_loop3: 189.byte 102,15,56,222,209 190.byte 102,15,56,222,217 191.byte 102,15,56,222,225 192 movups (%rcx,%rax,1),%xmm1 193 addq $32,%rax 194.byte 102,15,56,222,208 195.byte 102,15,56,222,216 196.byte 102,15,56,222,224 197 movups -16(%rcx,%rax,1),%xmm0 198 jnz .Ldec_loop3 199 200.byte 102,15,56,222,209 201.byte 102,15,56,222,217 202.byte 102,15,56,222,225 203.byte 102,15,56,223,208 204.byte 102,15,56,223,216 205.byte 102,15,56,223,224 206 .byte 0xf3,0xc3 207.cfi_endproc 208.size _aesni_decrypt3,.-_aesni_decrypt3 209.type _aesni_encrypt4,@function 210.align 16 211_aesni_encrypt4: 212.cfi_startproc 213 movups (%rcx),%xmm0 214 shll $4,%eax 215 movups 16(%rcx),%xmm1 216 xorps %xmm0,%xmm2 217 xorps %xmm0,%xmm3 218 xorps %xmm0,%xmm4 219 xorps %xmm0,%xmm5 220 movups 32(%rcx),%xmm0 221 leaq 32(%rcx,%rax,1),%rcx 222 negq %rax 223.byte 0x0f,0x1f,0x00 224 addq $16,%rax 225 226.Lenc_loop4: 227.byte 102,15,56,220,209 228.byte 102,15,56,220,217 229.byte 102,15,56,220,225 230.byte 102,15,56,220,233 231 movups (%rcx,%rax,1),%xmm1 232 addq $32,%rax 233.byte 102,15,56,220,208 234.byte 102,15,56,220,216 235.byte 102,15,56,220,224 236.byte 102,15,56,220,232 237 movups -16(%rcx,%rax,1),%xmm0 238 jnz .Lenc_loop4 239 240.byte 102,15,56,220,209 241.byte 102,15,56,220,217 242.byte 102,15,56,220,225 243.byte 102,15,56,220,233 244.byte 102,15,56,221,208 245.byte 102,15,56,221,216 246.byte 102,15,56,221,224 247.byte 102,15,56,221,232 248 .byte 0xf3,0xc3 249.cfi_endproc 250.size _aesni_encrypt4,.-_aesni_encrypt4 251.type _aesni_decrypt4,@function 252.align 16 253_aesni_decrypt4: 254.cfi_startproc 255 movups (%rcx),%xmm0 256 shll $4,%eax 257 movups 16(%rcx),%xmm1 258 xorps %xmm0,%xmm2 259 xorps %xmm0,%xmm3 260 xorps %xmm0,%xmm4 261 xorps %xmm0,%xmm5 262 movups 32(%rcx),%xmm0 263 leaq 32(%rcx,%rax,1),%rcx 264 negq %rax 265.byte 0x0f,0x1f,0x00 266 addq $16,%rax 267 268.Ldec_loop4: 269.byte 102,15,56,222,209 270.byte 102,15,56,222,217 271.byte 102,15,56,222,225 272.byte 102,15,56,222,233 273 movups (%rcx,%rax,1),%xmm1 274 addq $32,%rax 275.byte 102,15,56,222,208 276.byte 102,15,56,222,216 277.byte 102,15,56,222,224 278.byte 102,15,56,222,232 279 movups -16(%rcx,%rax,1),%xmm0 280 jnz .Ldec_loop4 281 282.byte 102,15,56,222,209 283.byte 102,15,56,222,217 284.byte 102,15,56,222,225 285.byte 102,15,56,222,233 286.byte 102,15,56,223,208 287.byte 102,15,56,223,216 288.byte 102,15,56,223,224 289.byte 102,15,56,223,232 290 .byte 0xf3,0xc3 291.cfi_endproc 292.size _aesni_decrypt4,.-_aesni_decrypt4 293.type _aesni_encrypt6,@function 294.align 16 295_aesni_encrypt6: 296.cfi_startproc 297 movups (%rcx),%xmm0 298 shll $4,%eax 299 movups 16(%rcx),%xmm1 300 xorps %xmm0,%xmm2 301 pxor %xmm0,%xmm3 302 pxor %xmm0,%xmm4 303.byte 102,15,56,220,209 304 leaq 32(%rcx,%rax,1),%rcx 305 negq %rax 306.byte 102,15,56,220,217 307 pxor %xmm0,%xmm5 308 pxor %xmm0,%xmm6 309.byte 102,15,56,220,225 310 pxor %xmm0,%xmm7 311 movups (%rcx,%rax,1),%xmm0 312 addq $16,%rax 313 jmp .Lenc_loop6_enter 314.align 16 315.Lenc_loop6: 316.byte 102,15,56,220,209 317.byte 102,15,56,220,217 318.byte 102,15,56,220,225 319.Lenc_loop6_enter: 320.byte 102,15,56,220,233 321.byte 102,15,56,220,241 322.byte 102,15,56,220,249 323 movups (%rcx,%rax,1),%xmm1 324 addq $32,%rax 325.byte 102,15,56,220,208 326.byte 102,15,56,220,216 327.byte 102,15,56,220,224 328.byte 102,15,56,220,232 329.byte 102,15,56,220,240 330.byte 102,15,56,220,248 331 movups -16(%rcx,%rax,1),%xmm0 332 jnz .Lenc_loop6 333 334.byte 102,15,56,220,209 335.byte 102,15,56,220,217 336.byte 102,15,56,220,225 337.byte 102,15,56,220,233 338.byte 102,15,56,220,241 339.byte 102,15,56,220,249 340.byte 102,15,56,221,208 341.byte 102,15,56,221,216 342.byte 102,15,56,221,224 343.byte 102,15,56,221,232 344.byte 102,15,56,221,240 345.byte 102,15,56,221,248 346 .byte 0xf3,0xc3 347.cfi_endproc 348.size _aesni_encrypt6,.-_aesni_encrypt6 349.type _aesni_decrypt6,@function 350.align 16 351_aesni_decrypt6: 352.cfi_startproc 353 movups (%rcx),%xmm0 354 shll $4,%eax 355 movups 16(%rcx),%xmm1 356 xorps %xmm0,%xmm2 357 pxor %xmm0,%xmm3 358 pxor %xmm0,%xmm4 359.byte 102,15,56,222,209 360 leaq 32(%rcx,%rax,1),%rcx 361 negq %rax 362.byte 102,15,56,222,217 363 pxor %xmm0,%xmm5 364 pxor %xmm0,%xmm6 365.byte 102,15,56,222,225 366 pxor %xmm0,%xmm7 367 movups (%rcx,%rax,1),%xmm0 368 addq $16,%rax 369 jmp .Ldec_loop6_enter 370.align 16 371.Ldec_loop6: 372.byte 102,15,56,222,209 373.byte 102,15,56,222,217 374.byte 102,15,56,222,225 375.Ldec_loop6_enter: 376.byte 102,15,56,222,233 377.byte 102,15,56,222,241 378.byte 102,15,56,222,249 379 movups (%rcx,%rax,1),%xmm1 380 addq $32,%rax 381.byte 102,15,56,222,208 382.byte 102,15,56,222,216 383.byte 102,15,56,222,224 384.byte 102,15,56,222,232 385.byte 102,15,56,222,240 386.byte 102,15,56,222,248 387 movups -16(%rcx,%rax,1),%xmm0 388 jnz .Ldec_loop6 389 390.byte 102,15,56,222,209 391.byte 102,15,56,222,217 392.byte 102,15,56,222,225 393.byte 102,15,56,222,233 394.byte 102,15,56,222,241 395.byte 102,15,56,222,249 396.byte 102,15,56,223,208 397.byte 102,15,56,223,216 398.byte 102,15,56,223,224 399.byte 102,15,56,223,232 400.byte 102,15,56,223,240 401.byte 102,15,56,223,248 402 .byte 0xf3,0xc3 403.cfi_endproc 404.size _aesni_decrypt6,.-_aesni_decrypt6 405.type _aesni_encrypt8,@function 406.align 16 407_aesni_encrypt8: 408.cfi_startproc 409 movups (%rcx),%xmm0 410 shll $4,%eax 411 movups 16(%rcx),%xmm1 412 xorps %xmm0,%xmm2 413 xorps %xmm0,%xmm3 414 pxor %xmm0,%xmm4 415 pxor %xmm0,%xmm5 416 pxor %xmm0,%xmm6 417 leaq 32(%rcx,%rax,1),%rcx 418 negq %rax 419.byte 102,15,56,220,209 420 pxor %xmm0,%xmm7 421 pxor %xmm0,%xmm8 422.byte 102,15,56,220,217 423 pxor %xmm0,%xmm9 424 movups (%rcx,%rax,1),%xmm0 425 addq $16,%rax 426 jmp .Lenc_loop8_inner 427.align 16 428.Lenc_loop8: 429.byte 102,15,56,220,209 430.byte 102,15,56,220,217 431.Lenc_loop8_inner: 432.byte 102,15,56,220,225 433.byte 102,15,56,220,233 434.byte 102,15,56,220,241 435.byte 102,15,56,220,249 436.byte 102,68,15,56,220,193 437.byte 102,68,15,56,220,201 438.Lenc_loop8_enter: 439 movups (%rcx,%rax,1),%xmm1 440 addq $32,%rax 441.byte 102,15,56,220,208 442.byte 102,15,56,220,216 443.byte 102,15,56,220,224 444.byte 102,15,56,220,232 445.byte 102,15,56,220,240 446.byte 102,15,56,220,248 447.byte 102,68,15,56,220,192 448.byte 102,68,15,56,220,200 449 movups -16(%rcx,%rax,1),%xmm0 450 jnz .Lenc_loop8 451 452.byte 102,15,56,220,209 453.byte 102,15,56,220,217 454.byte 102,15,56,220,225 455.byte 102,15,56,220,233 456.byte 102,15,56,220,241 457.byte 102,15,56,220,249 458.byte 102,68,15,56,220,193 459.byte 102,68,15,56,220,201 460.byte 102,15,56,221,208 461.byte 102,15,56,221,216 462.byte 102,15,56,221,224 463.byte 102,15,56,221,232 464.byte 102,15,56,221,240 465.byte 102,15,56,221,248 466.byte 102,68,15,56,221,192 467.byte 102,68,15,56,221,200 468 .byte 0xf3,0xc3 469.cfi_endproc 470.size _aesni_encrypt8,.-_aesni_encrypt8 471.type _aesni_decrypt8,@function 472.align 16 473_aesni_decrypt8: 474.cfi_startproc 475 movups (%rcx),%xmm0 476 shll $4,%eax 477 movups 16(%rcx),%xmm1 478 xorps %xmm0,%xmm2 479 xorps %xmm0,%xmm3 480 pxor %xmm0,%xmm4 481 pxor %xmm0,%xmm5 482 pxor %xmm0,%xmm6 483 leaq 32(%rcx,%rax,1),%rcx 484 negq %rax 485.byte 102,15,56,222,209 486 pxor %xmm0,%xmm7 487 pxor %xmm0,%xmm8 488.byte 102,15,56,222,217 489 pxor %xmm0,%xmm9 490 movups (%rcx,%rax,1),%xmm0 491 addq $16,%rax 492 jmp .Ldec_loop8_inner 493.align 16 494.Ldec_loop8: 495.byte 102,15,56,222,209 496.byte 102,15,56,222,217 497.Ldec_loop8_inner: 498.byte 102,15,56,222,225 499.byte 102,15,56,222,233 500.byte 102,15,56,222,241 501.byte 102,15,56,222,249 502.byte 102,68,15,56,222,193 503.byte 102,68,15,56,222,201 504.Ldec_loop8_enter: 505 movups (%rcx,%rax,1),%xmm1 506 addq $32,%rax 507.byte 102,15,56,222,208 508.byte 102,15,56,222,216 509.byte 102,15,56,222,224 510.byte 102,15,56,222,232 511.byte 102,15,56,222,240 512.byte 102,15,56,222,248 513.byte 102,68,15,56,222,192 514.byte 102,68,15,56,222,200 515 movups -16(%rcx,%rax,1),%xmm0 516 jnz .Ldec_loop8 517 518.byte 102,15,56,222,209 519.byte 102,15,56,222,217 520.byte 102,15,56,222,225 521.byte 102,15,56,222,233 522.byte 102,15,56,222,241 523.byte 102,15,56,222,249 524.byte 102,68,15,56,222,193 525.byte 102,68,15,56,222,201 526.byte 102,15,56,223,208 527.byte 102,15,56,223,216 528.byte 102,15,56,223,224 529.byte 102,15,56,223,232 530.byte 102,15,56,223,240 531.byte 102,15,56,223,248 532.byte 102,68,15,56,223,192 533.byte 102,68,15,56,223,200 534 .byte 0xf3,0xc3 535.cfi_endproc 536.size _aesni_decrypt8,.-_aesni_decrypt8 537.globl aes_hw_ecb_encrypt 538.hidden aes_hw_ecb_encrypt 539.type aes_hw_ecb_encrypt,@function 540.align 16 541aes_hw_ecb_encrypt: 542.cfi_startproc 543 andq $-16,%rdx 544 jz .Lecb_ret 545 546 movl 240(%rcx),%eax 547 movups (%rcx),%xmm0 548 movq %rcx,%r11 549 movl %eax,%r10d 550 testl %r8d,%r8d 551 jz .Lecb_decrypt 552 553 cmpq $0x80,%rdx 554 jb .Lecb_enc_tail 555 556 movdqu (%rdi),%xmm2 557 movdqu 16(%rdi),%xmm3 558 movdqu 32(%rdi),%xmm4 559 movdqu 48(%rdi),%xmm5 560 movdqu 64(%rdi),%xmm6 561 movdqu 80(%rdi),%xmm7 562 movdqu 96(%rdi),%xmm8 563 movdqu 112(%rdi),%xmm9 564 leaq 128(%rdi),%rdi 565 subq $0x80,%rdx 566 jmp .Lecb_enc_loop8_enter 567.align 16 568.Lecb_enc_loop8: 569 movups %xmm2,(%rsi) 570 movq %r11,%rcx 571 movdqu (%rdi),%xmm2 572 movl %r10d,%eax 573 movups %xmm3,16(%rsi) 574 movdqu 16(%rdi),%xmm3 575 movups %xmm4,32(%rsi) 576 movdqu 32(%rdi),%xmm4 577 movups %xmm5,48(%rsi) 578 movdqu 48(%rdi),%xmm5 579 movups %xmm6,64(%rsi) 580 movdqu 64(%rdi),%xmm6 581 movups %xmm7,80(%rsi) 582 movdqu 80(%rdi),%xmm7 583 movups %xmm8,96(%rsi) 584 movdqu 96(%rdi),%xmm8 585 movups %xmm9,112(%rsi) 586 leaq 128(%rsi),%rsi 587 movdqu 112(%rdi),%xmm9 588 leaq 128(%rdi),%rdi 589.Lecb_enc_loop8_enter: 590 591 call _aesni_encrypt8 592 593 subq $0x80,%rdx 594 jnc .Lecb_enc_loop8 595 596 movups %xmm2,(%rsi) 597 movq %r11,%rcx 598 movups %xmm3,16(%rsi) 599 movl %r10d,%eax 600 movups %xmm4,32(%rsi) 601 movups %xmm5,48(%rsi) 602 movups %xmm6,64(%rsi) 603 movups %xmm7,80(%rsi) 604 movups %xmm8,96(%rsi) 605 movups %xmm9,112(%rsi) 606 leaq 128(%rsi),%rsi 607 addq $0x80,%rdx 608 jz .Lecb_ret 609 610.Lecb_enc_tail: 611 movups (%rdi),%xmm2 612 cmpq $0x20,%rdx 613 jb .Lecb_enc_one 614 movups 16(%rdi),%xmm3 615 je .Lecb_enc_two 616 movups 32(%rdi),%xmm4 617 cmpq $0x40,%rdx 618 jb .Lecb_enc_three 619 movups 48(%rdi),%xmm5 620 je .Lecb_enc_four 621 movups 64(%rdi),%xmm6 622 cmpq $0x60,%rdx 623 jb .Lecb_enc_five 624 movups 80(%rdi),%xmm7 625 je .Lecb_enc_six 626 movdqu 96(%rdi),%xmm8 627 xorps %xmm9,%xmm9 628 call _aesni_encrypt8 629 movups %xmm2,(%rsi) 630 movups %xmm3,16(%rsi) 631 movups %xmm4,32(%rsi) 632 movups %xmm5,48(%rsi) 633 movups %xmm6,64(%rsi) 634 movups %xmm7,80(%rsi) 635 movups %xmm8,96(%rsi) 636 jmp .Lecb_ret 637.align 16 638.Lecb_enc_one: 639 movups (%rcx),%xmm0 640 movups 16(%rcx),%xmm1 641 leaq 32(%rcx),%rcx 642 xorps %xmm0,%xmm2 643.Loop_enc1_3: 644.byte 102,15,56,220,209 645 decl %eax 646 movups (%rcx),%xmm1 647 leaq 16(%rcx),%rcx 648 jnz .Loop_enc1_3 649.byte 102,15,56,221,209 650 movups %xmm2,(%rsi) 651 jmp .Lecb_ret 652.align 16 653.Lecb_enc_two: 654 call _aesni_encrypt2 655 movups %xmm2,(%rsi) 656 movups %xmm3,16(%rsi) 657 jmp .Lecb_ret 658.align 16 659.Lecb_enc_three: 660 call _aesni_encrypt3 661 movups %xmm2,(%rsi) 662 movups %xmm3,16(%rsi) 663 movups %xmm4,32(%rsi) 664 jmp .Lecb_ret 665.align 16 666.Lecb_enc_four: 667 call _aesni_encrypt4 668 movups %xmm2,(%rsi) 669 movups %xmm3,16(%rsi) 670 movups %xmm4,32(%rsi) 671 movups %xmm5,48(%rsi) 672 jmp .Lecb_ret 673.align 16 674.Lecb_enc_five: 675 xorps %xmm7,%xmm7 676 call _aesni_encrypt6 677 movups %xmm2,(%rsi) 678 movups %xmm3,16(%rsi) 679 movups %xmm4,32(%rsi) 680 movups %xmm5,48(%rsi) 681 movups %xmm6,64(%rsi) 682 jmp .Lecb_ret 683.align 16 684.Lecb_enc_six: 685 call _aesni_encrypt6 686 movups %xmm2,(%rsi) 687 movups %xmm3,16(%rsi) 688 movups %xmm4,32(%rsi) 689 movups %xmm5,48(%rsi) 690 movups %xmm6,64(%rsi) 691 movups %xmm7,80(%rsi) 692 jmp .Lecb_ret 693 694.align 16 695.Lecb_decrypt: 696 cmpq $0x80,%rdx 697 jb .Lecb_dec_tail 698 699 movdqu (%rdi),%xmm2 700 movdqu 16(%rdi),%xmm3 701 movdqu 32(%rdi),%xmm4 702 movdqu 48(%rdi),%xmm5 703 movdqu 64(%rdi),%xmm6 704 movdqu 80(%rdi),%xmm7 705 movdqu 96(%rdi),%xmm8 706 movdqu 112(%rdi),%xmm9 707 leaq 128(%rdi),%rdi 708 subq $0x80,%rdx 709 jmp .Lecb_dec_loop8_enter 710.align 16 711.Lecb_dec_loop8: 712 movups %xmm2,(%rsi) 713 movq %r11,%rcx 714 movdqu (%rdi),%xmm2 715 movl %r10d,%eax 716 movups %xmm3,16(%rsi) 717 movdqu 16(%rdi),%xmm3 718 movups %xmm4,32(%rsi) 719 movdqu 32(%rdi),%xmm4 720 movups %xmm5,48(%rsi) 721 movdqu 48(%rdi),%xmm5 722 movups %xmm6,64(%rsi) 723 movdqu 64(%rdi),%xmm6 724 movups %xmm7,80(%rsi) 725 movdqu 80(%rdi),%xmm7 726 movups %xmm8,96(%rsi) 727 movdqu 96(%rdi),%xmm8 728 movups %xmm9,112(%rsi) 729 leaq 128(%rsi),%rsi 730 movdqu 112(%rdi),%xmm9 731 leaq 128(%rdi),%rdi 732.Lecb_dec_loop8_enter: 733 734 call _aesni_decrypt8 735 736 movups (%r11),%xmm0 737 subq $0x80,%rdx 738 jnc .Lecb_dec_loop8 739 740 movups %xmm2,(%rsi) 741 pxor %xmm2,%xmm2 742 movq %r11,%rcx 743 movups %xmm3,16(%rsi) 744 pxor %xmm3,%xmm3 745 movl %r10d,%eax 746 movups %xmm4,32(%rsi) 747 pxor %xmm4,%xmm4 748 movups %xmm5,48(%rsi) 749 pxor %xmm5,%xmm5 750 movups %xmm6,64(%rsi) 751 pxor %xmm6,%xmm6 752 movups %xmm7,80(%rsi) 753 pxor %xmm7,%xmm7 754 movups %xmm8,96(%rsi) 755 pxor %xmm8,%xmm8 756 movups %xmm9,112(%rsi) 757 pxor %xmm9,%xmm9 758 leaq 128(%rsi),%rsi 759 addq $0x80,%rdx 760 jz .Lecb_ret 761 762.Lecb_dec_tail: 763 movups (%rdi),%xmm2 764 cmpq $0x20,%rdx 765 jb .Lecb_dec_one 766 movups 16(%rdi),%xmm3 767 je .Lecb_dec_two 768 movups 32(%rdi),%xmm4 769 cmpq $0x40,%rdx 770 jb .Lecb_dec_three 771 movups 48(%rdi),%xmm5 772 je .Lecb_dec_four 773 movups 64(%rdi),%xmm6 774 cmpq $0x60,%rdx 775 jb .Lecb_dec_five 776 movups 80(%rdi),%xmm7 777 je .Lecb_dec_six 778 movups 96(%rdi),%xmm8 779 movups (%rcx),%xmm0 780 xorps %xmm9,%xmm9 781 call _aesni_decrypt8 782 movups %xmm2,(%rsi) 783 pxor %xmm2,%xmm2 784 movups %xmm3,16(%rsi) 785 pxor %xmm3,%xmm3 786 movups %xmm4,32(%rsi) 787 pxor %xmm4,%xmm4 788 movups %xmm5,48(%rsi) 789 pxor %xmm5,%xmm5 790 movups %xmm6,64(%rsi) 791 pxor %xmm6,%xmm6 792 movups %xmm7,80(%rsi) 793 pxor %xmm7,%xmm7 794 movups %xmm8,96(%rsi) 795 pxor %xmm8,%xmm8 796 pxor %xmm9,%xmm9 797 jmp .Lecb_ret 798.align 16 799.Lecb_dec_one: 800 movups (%rcx),%xmm0 801 movups 16(%rcx),%xmm1 802 leaq 32(%rcx),%rcx 803 xorps %xmm0,%xmm2 804.Loop_dec1_4: 805.byte 102,15,56,222,209 806 decl %eax 807 movups (%rcx),%xmm1 808 leaq 16(%rcx),%rcx 809 jnz .Loop_dec1_4 810.byte 102,15,56,223,209 811 movups %xmm2,(%rsi) 812 pxor %xmm2,%xmm2 813 jmp .Lecb_ret 814.align 16 815.Lecb_dec_two: 816 call _aesni_decrypt2 817 movups %xmm2,(%rsi) 818 pxor %xmm2,%xmm2 819 movups %xmm3,16(%rsi) 820 pxor %xmm3,%xmm3 821 jmp .Lecb_ret 822.align 16 823.Lecb_dec_three: 824 call _aesni_decrypt3 825 movups %xmm2,(%rsi) 826 pxor %xmm2,%xmm2 827 movups %xmm3,16(%rsi) 828 pxor %xmm3,%xmm3 829 movups %xmm4,32(%rsi) 830 pxor %xmm4,%xmm4 831 jmp .Lecb_ret 832.align 16 833.Lecb_dec_four: 834 call _aesni_decrypt4 835 movups %xmm2,(%rsi) 836 pxor %xmm2,%xmm2 837 movups %xmm3,16(%rsi) 838 pxor %xmm3,%xmm3 839 movups %xmm4,32(%rsi) 840 pxor %xmm4,%xmm4 841 movups %xmm5,48(%rsi) 842 pxor %xmm5,%xmm5 843 jmp .Lecb_ret 844.align 16 845.Lecb_dec_five: 846 xorps %xmm7,%xmm7 847 call _aesni_decrypt6 848 movups %xmm2,(%rsi) 849 pxor %xmm2,%xmm2 850 movups %xmm3,16(%rsi) 851 pxor %xmm3,%xmm3 852 movups %xmm4,32(%rsi) 853 pxor %xmm4,%xmm4 854 movups %xmm5,48(%rsi) 855 pxor %xmm5,%xmm5 856 movups %xmm6,64(%rsi) 857 pxor %xmm6,%xmm6 858 pxor %xmm7,%xmm7 859 jmp .Lecb_ret 860.align 16 861.Lecb_dec_six: 862 call _aesni_decrypt6 863 movups %xmm2,(%rsi) 864 pxor %xmm2,%xmm2 865 movups %xmm3,16(%rsi) 866 pxor %xmm3,%xmm3 867 movups %xmm4,32(%rsi) 868 pxor %xmm4,%xmm4 869 movups %xmm5,48(%rsi) 870 pxor %xmm5,%xmm5 871 movups %xmm6,64(%rsi) 872 pxor %xmm6,%xmm6 873 movups %xmm7,80(%rsi) 874 pxor %xmm7,%xmm7 875 876.Lecb_ret: 877 xorps %xmm0,%xmm0 878 pxor %xmm1,%xmm1 879 .byte 0xf3,0xc3 880.cfi_endproc 881.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt 882.globl aes_hw_ctr32_encrypt_blocks 883.hidden aes_hw_ctr32_encrypt_blocks 884.type aes_hw_ctr32_encrypt_blocks,@function 885.align 16 886aes_hw_ctr32_encrypt_blocks: 887.cfi_startproc 888#ifdef BORINGSSL_DISPATCH_TEST 889 movb $1,BORINGSSL_function_hit(%rip) 890#endif 891 cmpq $1,%rdx 892 jne .Lctr32_bulk 893 894 895 896 movups (%r8),%xmm2 897 movups (%rdi),%xmm3 898 movl 240(%rcx),%edx 899 movups (%rcx),%xmm0 900 movups 16(%rcx),%xmm1 901 leaq 32(%rcx),%rcx 902 xorps %xmm0,%xmm2 903.Loop_enc1_5: 904.byte 102,15,56,220,209 905 decl %edx 906 movups (%rcx),%xmm1 907 leaq 16(%rcx),%rcx 908 jnz .Loop_enc1_5 909.byte 102,15,56,221,209 910 pxor %xmm0,%xmm0 911 pxor %xmm1,%xmm1 912 xorps %xmm3,%xmm2 913 pxor %xmm3,%xmm3 914 movups %xmm2,(%rsi) 915 xorps %xmm2,%xmm2 916 jmp .Lctr32_epilogue 917 918.align 16 919.Lctr32_bulk: 920 leaq (%rsp),%r11 921.cfi_def_cfa_register %r11 922 pushq %rbp 923.cfi_offset %rbp,-16 924 subq $128,%rsp 925 andq $-16,%rsp 926 927 928 929 930 movdqu (%r8),%xmm2 931 movdqu (%rcx),%xmm0 932 movl 12(%r8),%r8d 933 pxor %xmm0,%xmm2 934 movl 12(%rcx),%ebp 935 movdqa %xmm2,0(%rsp) 936 bswapl %r8d 937 movdqa %xmm2,%xmm3 938 movdqa %xmm2,%xmm4 939 movdqa %xmm2,%xmm5 940 movdqa %xmm2,64(%rsp) 941 movdqa %xmm2,80(%rsp) 942 movdqa %xmm2,96(%rsp) 943 movq %rdx,%r10 944 movdqa %xmm2,112(%rsp) 945 946 leaq 1(%r8),%rax 947 leaq 2(%r8),%rdx 948 bswapl %eax 949 bswapl %edx 950 xorl %ebp,%eax 951 xorl %ebp,%edx 952.byte 102,15,58,34,216,3 953 leaq 3(%r8),%rax 954 movdqa %xmm3,16(%rsp) 955.byte 102,15,58,34,226,3 956 bswapl %eax 957 movq %r10,%rdx 958 leaq 4(%r8),%r10 959 movdqa %xmm4,32(%rsp) 960 xorl %ebp,%eax 961 bswapl %r10d 962.byte 102,15,58,34,232,3 963 xorl %ebp,%r10d 964 movdqa %xmm5,48(%rsp) 965 leaq 5(%r8),%r9 966 movl %r10d,64+12(%rsp) 967 bswapl %r9d 968 leaq 6(%r8),%r10 969 movl 240(%rcx),%eax 970 xorl %ebp,%r9d 971 bswapl %r10d 972 movl %r9d,80+12(%rsp) 973 xorl %ebp,%r10d 974 leaq 7(%r8),%r9 975 movl %r10d,96+12(%rsp) 976 bswapl %r9d 977 leaq OPENSSL_ia32cap_P(%rip),%r10 978 movl 4(%r10),%r10d 979 xorl %ebp,%r9d 980 andl $71303168,%r10d 981 movl %r9d,112+12(%rsp) 982 983 movups 16(%rcx),%xmm1 984 985 movdqa 64(%rsp),%xmm6 986 movdqa 80(%rsp),%xmm7 987 988 cmpq $8,%rdx 989 jb .Lctr32_tail 990 991 subq $6,%rdx 992 cmpl $4194304,%r10d 993 je .Lctr32_6x 994 995 leaq 128(%rcx),%rcx 996 subq $2,%rdx 997 jmp .Lctr32_loop8 998 999.align 16 1000.Lctr32_6x: 1001 shll $4,%eax 1002 movl $48,%r10d 1003 bswapl %ebp 1004 leaq 32(%rcx,%rax,1),%rcx 1005 subq %rax,%r10 1006 jmp .Lctr32_loop6 1007 1008.align 16 1009.Lctr32_loop6: 1010 addl $6,%r8d 1011 movups -48(%rcx,%r10,1),%xmm0 1012.byte 102,15,56,220,209 1013 movl %r8d,%eax 1014 xorl %ebp,%eax 1015.byte 102,15,56,220,217 1016.byte 0x0f,0x38,0xf1,0x44,0x24,12 1017 leal 1(%r8),%eax 1018.byte 102,15,56,220,225 1019 xorl %ebp,%eax 1020.byte 0x0f,0x38,0xf1,0x44,0x24,28 1021.byte 102,15,56,220,233 1022 leal 2(%r8),%eax 1023 xorl %ebp,%eax 1024.byte 102,15,56,220,241 1025.byte 0x0f,0x38,0xf1,0x44,0x24,44 1026 leal 3(%r8),%eax 1027.byte 102,15,56,220,249 1028 movups -32(%rcx,%r10,1),%xmm1 1029 xorl %ebp,%eax 1030 1031.byte 102,15,56,220,208 1032.byte 0x0f,0x38,0xf1,0x44,0x24,60 1033 leal 4(%r8),%eax 1034.byte 102,15,56,220,216 1035 xorl %ebp,%eax 1036.byte 0x0f,0x38,0xf1,0x44,0x24,76 1037.byte 102,15,56,220,224 1038 leal 5(%r8),%eax 1039 xorl %ebp,%eax 1040.byte 102,15,56,220,232 1041.byte 0x0f,0x38,0xf1,0x44,0x24,92 1042 movq %r10,%rax 1043.byte 102,15,56,220,240 1044.byte 102,15,56,220,248 1045 movups -16(%rcx,%r10,1),%xmm0 1046 1047 call .Lenc_loop6 1048 1049 movdqu (%rdi),%xmm8 1050 movdqu 16(%rdi),%xmm9 1051 movdqu 32(%rdi),%xmm10 1052 movdqu 48(%rdi),%xmm11 1053 movdqu 64(%rdi),%xmm12 1054 movdqu 80(%rdi),%xmm13 1055 leaq 96(%rdi),%rdi 1056 movups -64(%rcx,%r10,1),%xmm1 1057 pxor %xmm2,%xmm8 1058 movaps 0(%rsp),%xmm2 1059 pxor %xmm3,%xmm9 1060 movaps 16(%rsp),%xmm3 1061 pxor %xmm4,%xmm10 1062 movaps 32(%rsp),%xmm4 1063 pxor %xmm5,%xmm11 1064 movaps 48(%rsp),%xmm5 1065 pxor %xmm6,%xmm12 1066 movaps 64(%rsp),%xmm6 1067 pxor %xmm7,%xmm13 1068 movaps 80(%rsp),%xmm7 1069 movdqu %xmm8,(%rsi) 1070 movdqu %xmm9,16(%rsi) 1071 movdqu %xmm10,32(%rsi) 1072 movdqu %xmm11,48(%rsi) 1073 movdqu %xmm12,64(%rsi) 1074 movdqu %xmm13,80(%rsi) 1075 leaq 96(%rsi),%rsi 1076 1077 subq $6,%rdx 1078 jnc .Lctr32_loop6 1079 1080 addq $6,%rdx 1081 jz .Lctr32_done 1082 1083 leal -48(%r10),%eax 1084 leaq -80(%rcx,%r10,1),%rcx 1085 negl %eax 1086 shrl $4,%eax 1087 jmp .Lctr32_tail 1088 1089.align 32 1090.Lctr32_loop8: 1091 addl $8,%r8d 1092 movdqa 96(%rsp),%xmm8 1093.byte 102,15,56,220,209 1094 movl %r8d,%r9d 1095 movdqa 112(%rsp),%xmm9 1096.byte 102,15,56,220,217 1097 bswapl %r9d 1098 movups 32-128(%rcx),%xmm0 1099.byte 102,15,56,220,225 1100 xorl %ebp,%r9d 1101 nop 1102.byte 102,15,56,220,233 1103 movl %r9d,0+12(%rsp) 1104 leaq 1(%r8),%r9 1105.byte 102,15,56,220,241 1106.byte 102,15,56,220,249 1107.byte 102,68,15,56,220,193 1108.byte 102,68,15,56,220,201 1109 movups 48-128(%rcx),%xmm1 1110 bswapl %r9d 1111.byte 102,15,56,220,208 1112.byte 102,15,56,220,216 1113 xorl %ebp,%r9d 1114.byte 0x66,0x90 1115.byte 102,15,56,220,224 1116.byte 102,15,56,220,232 1117 movl %r9d,16+12(%rsp) 1118 leaq 2(%r8),%r9 1119.byte 102,15,56,220,240 1120.byte 102,15,56,220,248 1121.byte 102,68,15,56,220,192 1122.byte 102,68,15,56,220,200 1123 movups 64-128(%rcx),%xmm0 1124 bswapl %r9d 1125.byte 102,15,56,220,209 1126.byte 102,15,56,220,217 1127 xorl %ebp,%r9d 1128.byte 0x66,0x90 1129.byte 102,15,56,220,225 1130.byte 102,15,56,220,233 1131 movl %r9d,32+12(%rsp) 1132 leaq 3(%r8),%r9 1133.byte 102,15,56,220,241 1134.byte 102,15,56,220,249 1135.byte 102,68,15,56,220,193 1136.byte 102,68,15,56,220,201 1137 movups 80-128(%rcx),%xmm1 1138 bswapl %r9d 1139.byte 102,15,56,220,208 1140.byte 102,15,56,220,216 1141 xorl %ebp,%r9d 1142.byte 0x66,0x90 1143.byte 102,15,56,220,224 1144.byte 102,15,56,220,232 1145 movl %r9d,48+12(%rsp) 1146 leaq 4(%r8),%r9 1147.byte 102,15,56,220,240 1148.byte 102,15,56,220,248 1149.byte 102,68,15,56,220,192 1150.byte 102,68,15,56,220,200 1151 movups 96-128(%rcx),%xmm0 1152 bswapl %r9d 1153.byte 102,15,56,220,209 1154.byte 102,15,56,220,217 1155 xorl %ebp,%r9d 1156.byte 0x66,0x90 1157.byte 102,15,56,220,225 1158.byte 102,15,56,220,233 1159 movl %r9d,64+12(%rsp) 1160 leaq 5(%r8),%r9 1161.byte 102,15,56,220,241 1162.byte 102,15,56,220,249 1163.byte 102,68,15,56,220,193 1164.byte 102,68,15,56,220,201 1165 movups 112-128(%rcx),%xmm1 1166 bswapl %r9d 1167.byte 102,15,56,220,208 1168.byte 102,15,56,220,216 1169 xorl %ebp,%r9d 1170.byte 0x66,0x90 1171.byte 102,15,56,220,224 1172.byte 102,15,56,220,232 1173 movl %r9d,80+12(%rsp) 1174 leaq 6(%r8),%r9 1175.byte 102,15,56,220,240 1176.byte 102,15,56,220,248 1177.byte 102,68,15,56,220,192 1178.byte 102,68,15,56,220,200 1179 movups 128-128(%rcx),%xmm0 1180 bswapl %r9d 1181.byte 102,15,56,220,209 1182.byte 102,15,56,220,217 1183 xorl %ebp,%r9d 1184.byte 0x66,0x90 1185.byte 102,15,56,220,225 1186.byte 102,15,56,220,233 1187 movl %r9d,96+12(%rsp) 1188 leaq 7(%r8),%r9 1189.byte 102,15,56,220,241 1190.byte 102,15,56,220,249 1191.byte 102,68,15,56,220,193 1192.byte 102,68,15,56,220,201 1193 movups 144-128(%rcx),%xmm1 1194 bswapl %r9d 1195.byte 102,15,56,220,208 1196.byte 102,15,56,220,216 1197.byte 102,15,56,220,224 1198 xorl %ebp,%r9d 1199 movdqu 0(%rdi),%xmm10 1200.byte 102,15,56,220,232 1201 movl %r9d,112+12(%rsp) 1202 cmpl $11,%eax 1203.byte 102,15,56,220,240 1204.byte 102,15,56,220,248 1205.byte 102,68,15,56,220,192 1206.byte 102,68,15,56,220,200 1207 movups 160-128(%rcx),%xmm0 1208 1209 jb .Lctr32_enc_done 1210 1211.byte 102,15,56,220,209 1212.byte 102,15,56,220,217 1213.byte 102,15,56,220,225 1214.byte 102,15,56,220,233 1215.byte 102,15,56,220,241 1216.byte 102,15,56,220,249 1217.byte 102,68,15,56,220,193 1218.byte 102,68,15,56,220,201 1219 movups 176-128(%rcx),%xmm1 1220 1221.byte 102,15,56,220,208 1222.byte 102,15,56,220,216 1223.byte 102,15,56,220,224 1224.byte 102,15,56,220,232 1225.byte 102,15,56,220,240 1226.byte 102,15,56,220,248 1227.byte 102,68,15,56,220,192 1228.byte 102,68,15,56,220,200 1229 movups 192-128(%rcx),%xmm0 1230 je .Lctr32_enc_done 1231 1232.byte 102,15,56,220,209 1233.byte 102,15,56,220,217 1234.byte 102,15,56,220,225 1235.byte 102,15,56,220,233 1236.byte 102,15,56,220,241 1237.byte 102,15,56,220,249 1238.byte 102,68,15,56,220,193 1239.byte 102,68,15,56,220,201 1240 movups 208-128(%rcx),%xmm1 1241 1242.byte 102,15,56,220,208 1243.byte 102,15,56,220,216 1244.byte 102,15,56,220,224 1245.byte 102,15,56,220,232 1246.byte 102,15,56,220,240 1247.byte 102,15,56,220,248 1248.byte 102,68,15,56,220,192 1249.byte 102,68,15,56,220,200 1250 movups 224-128(%rcx),%xmm0 1251 jmp .Lctr32_enc_done 1252 1253.align 16 1254.Lctr32_enc_done: 1255 movdqu 16(%rdi),%xmm11 1256 pxor %xmm0,%xmm10 1257 movdqu 32(%rdi),%xmm12 1258 pxor %xmm0,%xmm11 1259 movdqu 48(%rdi),%xmm13 1260 pxor %xmm0,%xmm12 1261 movdqu 64(%rdi),%xmm14 1262 pxor %xmm0,%xmm13 1263 movdqu 80(%rdi),%xmm15 1264 pxor %xmm0,%xmm14 1265 pxor %xmm0,%xmm15 1266.byte 102,15,56,220,209 1267.byte 102,15,56,220,217 1268.byte 102,15,56,220,225 1269.byte 102,15,56,220,233 1270.byte 102,15,56,220,241 1271.byte 102,15,56,220,249 1272.byte 102,68,15,56,220,193 1273.byte 102,68,15,56,220,201 1274 movdqu 96(%rdi),%xmm1 1275 leaq 128(%rdi),%rdi 1276 1277.byte 102,65,15,56,221,210 1278 pxor %xmm0,%xmm1 1279 movdqu 112-128(%rdi),%xmm10 1280.byte 102,65,15,56,221,219 1281 pxor %xmm0,%xmm10 1282 movdqa 0(%rsp),%xmm11 1283.byte 102,65,15,56,221,228 1284.byte 102,65,15,56,221,237 1285 movdqa 16(%rsp),%xmm12 1286 movdqa 32(%rsp),%xmm13 1287.byte 102,65,15,56,221,246 1288.byte 102,65,15,56,221,255 1289 movdqa 48(%rsp),%xmm14 1290 movdqa 64(%rsp),%xmm15 1291.byte 102,68,15,56,221,193 1292 movdqa 80(%rsp),%xmm0 1293 movups 16-128(%rcx),%xmm1 1294.byte 102,69,15,56,221,202 1295 1296 movups %xmm2,(%rsi) 1297 movdqa %xmm11,%xmm2 1298 movups %xmm3,16(%rsi) 1299 movdqa %xmm12,%xmm3 1300 movups %xmm4,32(%rsi) 1301 movdqa %xmm13,%xmm4 1302 movups %xmm5,48(%rsi) 1303 movdqa %xmm14,%xmm5 1304 movups %xmm6,64(%rsi) 1305 movdqa %xmm15,%xmm6 1306 movups %xmm7,80(%rsi) 1307 movdqa %xmm0,%xmm7 1308 movups %xmm8,96(%rsi) 1309 movups %xmm9,112(%rsi) 1310 leaq 128(%rsi),%rsi 1311 1312 subq $8,%rdx 1313 jnc .Lctr32_loop8 1314 1315 addq $8,%rdx 1316 jz .Lctr32_done 1317 leaq -128(%rcx),%rcx 1318 1319.Lctr32_tail: 1320 1321 1322 leaq 16(%rcx),%rcx 1323 cmpq $4,%rdx 1324 jb .Lctr32_loop3 1325 je .Lctr32_loop4 1326 1327 1328 shll $4,%eax 1329 movdqa 96(%rsp),%xmm8 1330 pxor %xmm9,%xmm9 1331 1332 movups 16(%rcx),%xmm0 1333.byte 102,15,56,220,209 1334.byte 102,15,56,220,217 1335 leaq 32-16(%rcx,%rax,1),%rcx 1336 negq %rax 1337.byte 102,15,56,220,225 1338 addq $16,%rax 1339 movups (%rdi),%xmm10 1340.byte 102,15,56,220,233 1341.byte 102,15,56,220,241 1342 movups 16(%rdi),%xmm11 1343 movups 32(%rdi),%xmm12 1344.byte 102,15,56,220,249 1345.byte 102,68,15,56,220,193 1346 1347 call .Lenc_loop8_enter 1348 1349 movdqu 48(%rdi),%xmm13 1350 pxor %xmm10,%xmm2 1351 movdqu 64(%rdi),%xmm10 1352 pxor %xmm11,%xmm3 1353 movdqu %xmm2,(%rsi) 1354 pxor %xmm12,%xmm4 1355 movdqu %xmm3,16(%rsi) 1356 pxor %xmm13,%xmm5 1357 movdqu %xmm4,32(%rsi) 1358 pxor %xmm10,%xmm6 1359 movdqu %xmm5,48(%rsi) 1360 movdqu %xmm6,64(%rsi) 1361 cmpq $6,%rdx 1362 jb .Lctr32_done 1363 1364 movups 80(%rdi),%xmm11 1365 xorps %xmm11,%xmm7 1366 movups %xmm7,80(%rsi) 1367 je .Lctr32_done 1368 1369 movups 96(%rdi),%xmm12 1370 xorps %xmm12,%xmm8 1371 movups %xmm8,96(%rsi) 1372 jmp .Lctr32_done 1373 1374.align 32 1375.Lctr32_loop4: 1376.byte 102,15,56,220,209 1377 leaq 16(%rcx),%rcx 1378 decl %eax 1379.byte 102,15,56,220,217 1380.byte 102,15,56,220,225 1381.byte 102,15,56,220,233 1382 movups (%rcx),%xmm1 1383 jnz .Lctr32_loop4 1384.byte 102,15,56,221,209 1385.byte 102,15,56,221,217 1386 movups (%rdi),%xmm10 1387 movups 16(%rdi),%xmm11 1388.byte 102,15,56,221,225 1389.byte 102,15,56,221,233 1390 movups 32(%rdi),%xmm12 1391 movups 48(%rdi),%xmm13 1392 1393 xorps %xmm10,%xmm2 1394 movups %xmm2,(%rsi) 1395 xorps %xmm11,%xmm3 1396 movups %xmm3,16(%rsi) 1397 pxor %xmm12,%xmm4 1398 movdqu %xmm4,32(%rsi) 1399 pxor %xmm13,%xmm5 1400 movdqu %xmm5,48(%rsi) 1401 jmp .Lctr32_done 1402 1403.align 32 1404.Lctr32_loop3: 1405.byte 102,15,56,220,209 1406 leaq 16(%rcx),%rcx 1407 decl %eax 1408.byte 102,15,56,220,217 1409.byte 102,15,56,220,225 1410 movups (%rcx),%xmm1 1411 jnz .Lctr32_loop3 1412.byte 102,15,56,221,209 1413.byte 102,15,56,221,217 1414.byte 102,15,56,221,225 1415 1416 movups (%rdi),%xmm10 1417 xorps %xmm10,%xmm2 1418 movups %xmm2,(%rsi) 1419 cmpq $2,%rdx 1420 jb .Lctr32_done 1421 1422 movups 16(%rdi),%xmm11 1423 xorps %xmm11,%xmm3 1424 movups %xmm3,16(%rsi) 1425 je .Lctr32_done 1426 1427 movups 32(%rdi),%xmm12 1428 xorps %xmm12,%xmm4 1429 movups %xmm4,32(%rsi) 1430 1431.Lctr32_done: 1432 xorps %xmm0,%xmm0 1433 xorl %ebp,%ebp 1434 pxor %xmm1,%xmm1 1435 pxor %xmm2,%xmm2 1436 pxor %xmm3,%xmm3 1437 pxor %xmm4,%xmm4 1438 pxor %xmm5,%xmm5 1439 pxor %xmm6,%xmm6 1440 pxor %xmm7,%xmm7 1441 movaps %xmm0,0(%rsp) 1442 pxor %xmm8,%xmm8 1443 movaps %xmm0,16(%rsp) 1444 pxor %xmm9,%xmm9 1445 movaps %xmm0,32(%rsp) 1446 pxor %xmm10,%xmm10 1447 movaps %xmm0,48(%rsp) 1448 pxor %xmm11,%xmm11 1449 movaps %xmm0,64(%rsp) 1450 pxor %xmm12,%xmm12 1451 movaps %xmm0,80(%rsp) 1452 pxor %xmm13,%xmm13 1453 movaps %xmm0,96(%rsp) 1454 pxor %xmm14,%xmm14 1455 movaps %xmm0,112(%rsp) 1456 pxor %xmm15,%xmm15 1457 movq -8(%r11),%rbp 1458.cfi_restore %rbp 1459 leaq (%r11),%rsp 1460.cfi_def_cfa_register %rsp 1461.Lctr32_epilogue: 1462 .byte 0xf3,0xc3 1463.cfi_endproc 1464.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 1465.globl aes_hw_cbc_encrypt 1466.hidden aes_hw_cbc_encrypt 1467.type aes_hw_cbc_encrypt,@function 1468.align 16 1469aes_hw_cbc_encrypt: 1470.cfi_startproc 1471 testq %rdx,%rdx 1472 jz .Lcbc_ret 1473 1474 movl 240(%rcx),%r10d 1475 movq %rcx,%r11 1476 testl %r9d,%r9d 1477 jz .Lcbc_decrypt 1478 1479 movups (%r8),%xmm2 1480 movl %r10d,%eax 1481 cmpq $16,%rdx 1482 jb .Lcbc_enc_tail 1483 subq $16,%rdx 1484 jmp .Lcbc_enc_loop 1485.align 16 1486.Lcbc_enc_loop: 1487 movups (%rdi),%xmm3 1488 leaq 16(%rdi),%rdi 1489 1490 movups (%rcx),%xmm0 1491 movups 16(%rcx),%xmm1 1492 xorps %xmm0,%xmm3 1493 leaq 32(%rcx),%rcx 1494 xorps %xmm3,%xmm2 1495.Loop_enc1_6: 1496.byte 102,15,56,220,209 1497 decl %eax 1498 movups (%rcx),%xmm1 1499 leaq 16(%rcx),%rcx 1500 jnz .Loop_enc1_6 1501.byte 102,15,56,221,209 1502 movl %r10d,%eax 1503 movq %r11,%rcx 1504 movups %xmm2,0(%rsi) 1505 leaq 16(%rsi),%rsi 1506 subq $16,%rdx 1507 jnc .Lcbc_enc_loop 1508 addq $16,%rdx 1509 jnz .Lcbc_enc_tail 1510 pxor %xmm0,%xmm0 1511 pxor %xmm1,%xmm1 1512 movups %xmm2,(%r8) 1513 pxor %xmm2,%xmm2 1514 pxor %xmm3,%xmm3 1515 jmp .Lcbc_ret 1516 1517.Lcbc_enc_tail: 1518 movq %rdx,%rcx 1519 xchgq %rdi,%rsi 1520.long 0x9066A4F3 1521 movl $16,%ecx 1522 subq %rdx,%rcx 1523 xorl %eax,%eax 1524.long 0x9066AAF3 1525 leaq -16(%rdi),%rdi 1526 movl %r10d,%eax 1527 movq %rdi,%rsi 1528 movq %r11,%rcx 1529 xorq %rdx,%rdx 1530 jmp .Lcbc_enc_loop 1531 1532.align 16 1533.Lcbc_decrypt: 1534 cmpq $16,%rdx 1535 jne .Lcbc_decrypt_bulk 1536 1537 1538 1539 movdqu (%rdi),%xmm2 1540 movdqu (%r8),%xmm3 1541 movdqa %xmm2,%xmm4 1542 movups (%rcx),%xmm0 1543 movups 16(%rcx),%xmm1 1544 leaq 32(%rcx),%rcx 1545 xorps %xmm0,%xmm2 1546.Loop_dec1_7: 1547.byte 102,15,56,222,209 1548 decl %r10d 1549 movups (%rcx),%xmm1 1550 leaq 16(%rcx),%rcx 1551 jnz .Loop_dec1_7 1552.byte 102,15,56,223,209 1553 pxor %xmm0,%xmm0 1554 pxor %xmm1,%xmm1 1555 movdqu %xmm4,(%r8) 1556 xorps %xmm3,%xmm2 1557 pxor %xmm3,%xmm3 1558 movups %xmm2,(%rsi) 1559 pxor %xmm2,%xmm2 1560 jmp .Lcbc_ret 1561.align 16 1562.Lcbc_decrypt_bulk: 1563 leaq (%rsp),%r11 1564.cfi_def_cfa_register %r11 1565 pushq %rbp 1566.cfi_offset %rbp,-16 1567 subq $16,%rsp 1568 andq $-16,%rsp 1569 movq %rcx,%rbp 1570 movups (%r8),%xmm10 1571 movl %r10d,%eax 1572 cmpq $0x50,%rdx 1573 jbe .Lcbc_dec_tail 1574 1575 movups (%rcx),%xmm0 1576 movdqu 0(%rdi),%xmm2 1577 movdqu 16(%rdi),%xmm3 1578 movdqa %xmm2,%xmm11 1579 movdqu 32(%rdi),%xmm4 1580 movdqa %xmm3,%xmm12 1581 movdqu 48(%rdi),%xmm5 1582 movdqa %xmm4,%xmm13 1583 movdqu 64(%rdi),%xmm6 1584 movdqa %xmm5,%xmm14 1585 movdqu 80(%rdi),%xmm7 1586 movdqa %xmm6,%xmm15 1587 leaq OPENSSL_ia32cap_P(%rip),%r9 1588 movl 4(%r9),%r9d 1589 cmpq $0x70,%rdx 1590 jbe .Lcbc_dec_six_or_seven 1591 1592 andl $71303168,%r9d 1593 subq $0x50,%rdx 1594 cmpl $4194304,%r9d 1595 je .Lcbc_dec_loop6_enter 1596 subq $0x20,%rdx 1597 leaq 112(%rcx),%rcx 1598 jmp .Lcbc_dec_loop8_enter 1599.align 16 1600.Lcbc_dec_loop8: 1601 movups %xmm9,(%rsi) 1602 leaq 16(%rsi),%rsi 1603.Lcbc_dec_loop8_enter: 1604 movdqu 96(%rdi),%xmm8 1605 pxor %xmm0,%xmm2 1606 movdqu 112(%rdi),%xmm9 1607 pxor %xmm0,%xmm3 1608 movups 16-112(%rcx),%xmm1 1609 pxor %xmm0,%xmm4 1610 movq $-1,%rbp 1611 cmpq $0x70,%rdx 1612 pxor %xmm0,%xmm5 1613 pxor %xmm0,%xmm6 1614 pxor %xmm0,%xmm7 1615 pxor %xmm0,%xmm8 1616 1617.byte 102,15,56,222,209 1618 pxor %xmm0,%xmm9 1619 movups 32-112(%rcx),%xmm0 1620.byte 102,15,56,222,217 1621.byte 102,15,56,222,225 1622.byte 102,15,56,222,233 1623.byte 102,15,56,222,241 1624.byte 102,15,56,222,249 1625.byte 102,68,15,56,222,193 1626 adcq $0,%rbp 1627 andq $128,%rbp 1628.byte 102,68,15,56,222,201 1629 addq %rdi,%rbp 1630 movups 48-112(%rcx),%xmm1 1631.byte 102,15,56,222,208 1632.byte 102,15,56,222,216 1633.byte 102,15,56,222,224 1634.byte 102,15,56,222,232 1635.byte 102,15,56,222,240 1636.byte 102,15,56,222,248 1637.byte 102,68,15,56,222,192 1638.byte 102,68,15,56,222,200 1639 movups 64-112(%rcx),%xmm0 1640 nop 1641.byte 102,15,56,222,209 1642.byte 102,15,56,222,217 1643.byte 102,15,56,222,225 1644.byte 102,15,56,222,233 1645.byte 102,15,56,222,241 1646.byte 102,15,56,222,249 1647.byte 102,68,15,56,222,193 1648.byte 102,68,15,56,222,201 1649 movups 80-112(%rcx),%xmm1 1650 nop 1651.byte 102,15,56,222,208 1652.byte 102,15,56,222,216 1653.byte 102,15,56,222,224 1654.byte 102,15,56,222,232 1655.byte 102,15,56,222,240 1656.byte 102,15,56,222,248 1657.byte 102,68,15,56,222,192 1658.byte 102,68,15,56,222,200 1659 movups 96-112(%rcx),%xmm0 1660 nop 1661.byte 102,15,56,222,209 1662.byte 102,15,56,222,217 1663.byte 102,15,56,222,225 1664.byte 102,15,56,222,233 1665.byte 102,15,56,222,241 1666.byte 102,15,56,222,249 1667.byte 102,68,15,56,222,193 1668.byte 102,68,15,56,222,201 1669 movups 112-112(%rcx),%xmm1 1670 nop 1671.byte 102,15,56,222,208 1672.byte 102,15,56,222,216 1673.byte 102,15,56,222,224 1674.byte 102,15,56,222,232 1675.byte 102,15,56,222,240 1676.byte 102,15,56,222,248 1677.byte 102,68,15,56,222,192 1678.byte 102,68,15,56,222,200 1679 movups 128-112(%rcx),%xmm0 1680 nop 1681.byte 102,15,56,222,209 1682.byte 102,15,56,222,217 1683.byte 102,15,56,222,225 1684.byte 102,15,56,222,233 1685.byte 102,15,56,222,241 1686.byte 102,15,56,222,249 1687.byte 102,68,15,56,222,193 1688.byte 102,68,15,56,222,201 1689 movups 144-112(%rcx),%xmm1 1690 cmpl $11,%eax 1691.byte 102,15,56,222,208 1692.byte 102,15,56,222,216 1693.byte 102,15,56,222,224 1694.byte 102,15,56,222,232 1695.byte 102,15,56,222,240 1696.byte 102,15,56,222,248 1697.byte 102,68,15,56,222,192 1698.byte 102,68,15,56,222,200 1699 movups 160-112(%rcx),%xmm0 1700 jb .Lcbc_dec_done 1701.byte 102,15,56,222,209 1702.byte 102,15,56,222,217 1703.byte 102,15,56,222,225 1704.byte 102,15,56,222,233 1705.byte 102,15,56,222,241 1706.byte 102,15,56,222,249 1707.byte 102,68,15,56,222,193 1708.byte 102,68,15,56,222,201 1709 movups 176-112(%rcx),%xmm1 1710 nop 1711.byte 102,15,56,222,208 1712.byte 102,15,56,222,216 1713.byte 102,15,56,222,224 1714.byte 102,15,56,222,232 1715.byte 102,15,56,222,240 1716.byte 102,15,56,222,248 1717.byte 102,68,15,56,222,192 1718.byte 102,68,15,56,222,200 1719 movups 192-112(%rcx),%xmm0 1720 je .Lcbc_dec_done 1721.byte 102,15,56,222,209 1722.byte 102,15,56,222,217 1723.byte 102,15,56,222,225 1724.byte 102,15,56,222,233 1725.byte 102,15,56,222,241 1726.byte 102,15,56,222,249 1727.byte 102,68,15,56,222,193 1728.byte 102,68,15,56,222,201 1729 movups 208-112(%rcx),%xmm1 1730 nop 1731.byte 102,15,56,222,208 1732.byte 102,15,56,222,216 1733.byte 102,15,56,222,224 1734.byte 102,15,56,222,232 1735.byte 102,15,56,222,240 1736.byte 102,15,56,222,248 1737.byte 102,68,15,56,222,192 1738.byte 102,68,15,56,222,200 1739 movups 224-112(%rcx),%xmm0 1740 jmp .Lcbc_dec_done 1741.align 16 1742.Lcbc_dec_done: 1743.byte 102,15,56,222,209 1744.byte 102,15,56,222,217 1745 pxor %xmm0,%xmm10 1746 pxor %xmm0,%xmm11 1747.byte 102,15,56,222,225 1748.byte 102,15,56,222,233 1749 pxor %xmm0,%xmm12 1750 pxor %xmm0,%xmm13 1751.byte 102,15,56,222,241 1752.byte 102,15,56,222,249 1753 pxor %xmm0,%xmm14 1754 pxor %xmm0,%xmm15 1755.byte 102,68,15,56,222,193 1756.byte 102,68,15,56,222,201 1757 movdqu 80(%rdi),%xmm1 1758 1759.byte 102,65,15,56,223,210 1760 movdqu 96(%rdi),%xmm10 1761 pxor %xmm0,%xmm1 1762.byte 102,65,15,56,223,219 1763 pxor %xmm0,%xmm10 1764 movdqu 112(%rdi),%xmm0 1765.byte 102,65,15,56,223,228 1766 leaq 128(%rdi),%rdi 1767 movdqu 0(%rbp),%xmm11 1768.byte 102,65,15,56,223,237 1769.byte 102,65,15,56,223,246 1770 movdqu 16(%rbp),%xmm12 1771 movdqu 32(%rbp),%xmm13 1772.byte 102,65,15,56,223,255 1773.byte 102,68,15,56,223,193 1774 movdqu 48(%rbp),%xmm14 1775 movdqu 64(%rbp),%xmm15 1776.byte 102,69,15,56,223,202 1777 movdqa %xmm0,%xmm10 1778 movdqu 80(%rbp),%xmm1 1779 movups -112(%rcx),%xmm0 1780 1781 movups %xmm2,(%rsi) 1782 movdqa %xmm11,%xmm2 1783 movups %xmm3,16(%rsi) 1784 movdqa %xmm12,%xmm3 1785 movups %xmm4,32(%rsi) 1786 movdqa %xmm13,%xmm4 1787 movups %xmm5,48(%rsi) 1788 movdqa %xmm14,%xmm5 1789 movups %xmm6,64(%rsi) 1790 movdqa %xmm15,%xmm6 1791 movups %xmm7,80(%rsi) 1792 movdqa %xmm1,%xmm7 1793 movups %xmm8,96(%rsi) 1794 leaq 112(%rsi),%rsi 1795 1796 subq $0x80,%rdx 1797 ja .Lcbc_dec_loop8 1798 1799 movaps %xmm9,%xmm2 1800 leaq -112(%rcx),%rcx 1801 addq $0x70,%rdx 1802 jle .Lcbc_dec_clear_tail_collected 1803 movups %xmm9,(%rsi) 1804 leaq 16(%rsi),%rsi 1805 cmpq $0x50,%rdx 1806 jbe .Lcbc_dec_tail 1807 1808 movaps %xmm11,%xmm2 1809.Lcbc_dec_six_or_seven: 1810 cmpq $0x60,%rdx 1811 ja .Lcbc_dec_seven 1812 1813 movaps %xmm7,%xmm8 1814 call _aesni_decrypt6 1815 pxor %xmm10,%xmm2 1816 movaps %xmm8,%xmm10 1817 pxor %xmm11,%xmm3 1818 movdqu %xmm2,(%rsi) 1819 pxor %xmm12,%xmm4 1820 movdqu %xmm3,16(%rsi) 1821 pxor %xmm3,%xmm3 1822 pxor %xmm13,%xmm5 1823 movdqu %xmm4,32(%rsi) 1824 pxor %xmm4,%xmm4 1825 pxor %xmm14,%xmm6 1826 movdqu %xmm5,48(%rsi) 1827 pxor %xmm5,%xmm5 1828 pxor %xmm15,%xmm7 1829 movdqu %xmm6,64(%rsi) 1830 pxor %xmm6,%xmm6 1831 leaq 80(%rsi),%rsi 1832 movdqa %xmm7,%xmm2 1833 pxor %xmm7,%xmm7 1834 jmp .Lcbc_dec_tail_collected 1835 1836.align 16 1837.Lcbc_dec_seven: 1838 movups 96(%rdi),%xmm8 1839 xorps %xmm9,%xmm9 1840 call _aesni_decrypt8 1841 movups 80(%rdi),%xmm9 1842 pxor %xmm10,%xmm2 1843 movups 96(%rdi),%xmm10 1844 pxor %xmm11,%xmm3 1845 movdqu %xmm2,(%rsi) 1846 pxor %xmm12,%xmm4 1847 movdqu %xmm3,16(%rsi) 1848 pxor %xmm3,%xmm3 1849 pxor %xmm13,%xmm5 1850 movdqu %xmm4,32(%rsi) 1851 pxor %xmm4,%xmm4 1852 pxor %xmm14,%xmm6 1853 movdqu %xmm5,48(%rsi) 1854 pxor %xmm5,%xmm5 1855 pxor %xmm15,%xmm7 1856 movdqu %xmm6,64(%rsi) 1857 pxor %xmm6,%xmm6 1858 pxor %xmm9,%xmm8 1859 movdqu %xmm7,80(%rsi) 1860 pxor %xmm7,%xmm7 1861 leaq 96(%rsi),%rsi 1862 movdqa %xmm8,%xmm2 1863 pxor %xmm8,%xmm8 1864 pxor %xmm9,%xmm9 1865 jmp .Lcbc_dec_tail_collected 1866 1867.align 16 1868.Lcbc_dec_loop6: 1869 movups %xmm7,(%rsi) 1870 leaq 16(%rsi),%rsi 1871 movdqu 0(%rdi),%xmm2 1872 movdqu 16(%rdi),%xmm3 1873 movdqa %xmm2,%xmm11 1874 movdqu 32(%rdi),%xmm4 1875 movdqa %xmm3,%xmm12 1876 movdqu 48(%rdi),%xmm5 1877 movdqa %xmm4,%xmm13 1878 movdqu 64(%rdi),%xmm6 1879 movdqa %xmm5,%xmm14 1880 movdqu 80(%rdi),%xmm7 1881 movdqa %xmm6,%xmm15 1882.Lcbc_dec_loop6_enter: 1883 leaq 96(%rdi),%rdi 1884 movdqa %xmm7,%xmm8 1885 1886 call _aesni_decrypt6 1887 1888 pxor %xmm10,%xmm2 1889 movdqa %xmm8,%xmm10 1890 pxor %xmm11,%xmm3 1891 movdqu %xmm2,(%rsi) 1892 pxor %xmm12,%xmm4 1893 movdqu %xmm3,16(%rsi) 1894 pxor %xmm13,%xmm5 1895 movdqu %xmm4,32(%rsi) 1896 pxor %xmm14,%xmm6 1897 movq %rbp,%rcx 1898 movdqu %xmm5,48(%rsi) 1899 pxor %xmm15,%xmm7 1900 movl %r10d,%eax 1901 movdqu %xmm6,64(%rsi) 1902 leaq 80(%rsi),%rsi 1903 subq $0x60,%rdx 1904 ja .Lcbc_dec_loop6 1905 1906 movdqa %xmm7,%xmm2 1907 addq $0x50,%rdx 1908 jle .Lcbc_dec_clear_tail_collected 1909 movups %xmm7,(%rsi) 1910 leaq 16(%rsi),%rsi 1911 1912.Lcbc_dec_tail: 1913 movups (%rdi),%xmm2 1914 subq $0x10,%rdx 1915 jbe .Lcbc_dec_one 1916 1917 movups 16(%rdi),%xmm3 1918 movaps %xmm2,%xmm11 1919 subq $0x10,%rdx 1920 jbe .Lcbc_dec_two 1921 1922 movups 32(%rdi),%xmm4 1923 movaps %xmm3,%xmm12 1924 subq $0x10,%rdx 1925 jbe .Lcbc_dec_three 1926 1927 movups 48(%rdi),%xmm5 1928 movaps %xmm4,%xmm13 1929 subq $0x10,%rdx 1930 jbe .Lcbc_dec_four 1931 1932 movups 64(%rdi),%xmm6 1933 movaps %xmm5,%xmm14 1934 movaps %xmm6,%xmm15 1935 xorps %xmm7,%xmm7 1936 call _aesni_decrypt6 1937 pxor %xmm10,%xmm2 1938 movaps %xmm15,%xmm10 1939 pxor %xmm11,%xmm3 1940 movdqu %xmm2,(%rsi) 1941 pxor %xmm12,%xmm4 1942 movdqu %xmm3,16(%rsi) 1943 pxor %xmm3,%xmm3 1944 pxor %xmm13,%xmm5 1945 movdqu %xmm4,32(%rsi) 1946 pxor %xmm4,%xmm4 1947 pxor %xmm14,%xmm6 1948 movdqu %xmm5,48(%rsi) 1949 pxor %xmm5,%xmm5 1950 leaq 64(%rsi),%rsi 1951 movdqa %xmm6,%xmm2 1952 pxor %xmm6,%xmm6 1953 pxor %xmm7,%xmm7 1954 subq $0x10,%rdx 1955 jmp .Lcbc_dec_tail_collected 1956 1957.align 16 1958.Lcbc_dec_one: 1959 movaps %xmm2,%xmm11 1960 movups (%rcx),%xmm0 1961 movups 16(%rcx),%xmm1 1962 leaq 32(%rcx),%rcx 1963 xorps %xmm0,%xmm2 1964.Loop_dec1_8: 1965.byte 102,15,56,222,209 1966 decl %eax 1967 movups (%rcx),%xmm1 1968 leaq 16(%rcx),%rcx 1969 jnz .Loop_dec1_8 1970.byte 102,15,56,223,209 1971 xorps %xmm10,%xmm2 1972 movaps %xmm11,%xmm10 1973 jmp .Lcbc_dec_tail_collected 1974.align 16 1975.Lcbc_dec_two: 1976 movaps %xmm3,%xmm12 1977 call _aesni_decrypt2 1978 pxor %xmm10,%xmm2 1979 movaps %xmm12,%xmm10 1980 pxor %xmm11,%xmm3 1981 movdqu %xmm2,(%rsi) 1982 movdqa %xmm3,%xmm2 1983 pxor %xmm3,%xmm3 1984 leaq 16(%rsi),%rsi 1985 jmp .Lcbc_dec_tail_collected 1986.align 16 1987.Lcbc_dec_three: 1988 movaps %xmm4,%xmm13 1989 call _aesni_decrypt3 1990 pxor %xmm10,%xmm2 1991 movaps %xmm13,%xmm10 1992 pxor %xmm11,%xmm3 1993 movdqu %xmm2,(%rsi) 1994 pxor %xmm12,%xmm4 1995 movdqu %xmm3,16(%rsi) 1996 pxor %xmm3,%xmm3 1997 movdqa %xmm4,%xmm2 1998 pxor %xmm4,%xmm4 1999 leaq 32(%rsi),%rsi 2000 jmp .Lcbc_dec_tail_collected 2001.align 16 2002.Lcbc_dec_four: 2003 movaps %xmm5,%xmm14 2004 call _aesni_decrypt4 2005 pxor %xmm10,%xmm2 2006 movaps %xmm14,%xmm10 2007 pxor %xmm11,%xmm3 2008 movdqu %xmm2,(%rsi) 2009 pxor %xmm12,%xmm4 2010 movdqu %xmm3,16(%rsi) 2011 pxor %xmm3,%xmm3 2012 pxor %xmm13,%xmm5 2013 movdqu %xmm4,32(%rsi) 2014 pxor %xmm4,%xmm4 2015 movdqa %xmm5,%xmm2 2016 pxor %xmm5,%xmm5 2017 leaq 48(%rsi),%rsi 2018 jmp .Lcbc_dec_tail_collected 2019 2020.align 16 2021.Lcbc_dec_clear_tail_collected: 2022 pxor %xmm3,%xmm3 2023 pxor %xmm4,%xmm4 2024 pxor %xmm5,%xmm5 2025 pxor %xmm6,%xmm6 2026 pxor %xmm7,%xmm7 2027 pxor %xmm8,%xmm8 2028 pxor %xmm9,%xmm9 2029.Lcbc_dec_tail_collected: 2030 movups %xmm10,(%r8) 2031 andq $15,%rdx 2032 jnz .Lcbc_dec_tail_partial 2033 movups %xmm2,(%rsi) 2034 pxor %xmm2,%xmm2 2035 jmp .Lcbc_dec_ret 2036.align 16 2037.Lcbc_dec_tail_partial: 2038 movaps %xmm2,(%rsp) 2039 pxor %xmm2,%xmm2 2040 movq $16,%rcx 2041 movq %rsi,%rdi 2042 subq %rdx,%rcx 2043 leaq (%rsp),%rsi 2044.long 0x9066A4F3 2045 movdqa %xmm2,(%rsp) 2046 2047.Lcbc_dec_ret: 2048 xorps %xmm0,%xmm0 2049 pxor %xmm1,%xmm1 2050 movq -8(%r11),%rbp 2051.cfi_restore %rbp 2052 leaq (%r11),%rsp 2053.cfi_def_cfa_register %rsp 2054.Lcbc_ret: 2055 .byte 0xf3,0xc3 2056.cfi_endproc 2057.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 2058.globl aes_hw_set_decrypt_key 2059.hidden aes_hw_set_decrypt_key 2060.type aes_hw_set_decrypt_key,@function 2061.align 16 2062aes_hw_set_decrypt_key: 2063.cfi_startproc 2064.byte 0x48,0x83,0xEC,0x08 2065.cfi_adjust_cfa_offset 8 2066 call __aesni_set_encrypt_key 2067 shll $4,%esi 2068 testl %eax,%eax 2069 jnz .Ldec_key_ret 2070 leaq 16(%rdx,%rsi,1),%rdi 2071 2072 movups (%rdx),%xmm0 2073 movups (%rdi),%xmm1 2074 movups %xmm0,(%rdi) 2075 movups %xmm1,(%rdx) 2076 leaq 16(%rdx),%rdx 2077 leaq -16(%rdi),%rdi 2078 2079.Ldec_key_inverse: 2080 movups (%rdx),%xmm0 2081 movups (%rdi),%xmm1 2082.byte 102,15,56,219,192 2083.byte 102,15,56,219,201 2084 leaq 16(%rdx),%rdx 2085 leaq -16(%rdi),%rdi 2086 movups %xmm0,16(%rdi) 2087 movups %xmm1,-16(%rdx) 2088 cmpq %rdx,%rdi 2089 ja .Ldec_key_inverse 2090 2091 movups (%rdx),%xmm0 2092.byte 102,15,56,219,192 2093 pxor %xmm1,%xmm1 2094 movups %xmm0,(%rdi) 2095 pxor %xmm0,%xmm0 2096.Ldec_key_ret: 2097 addq $8,%rsp 2098.cfi_adjust_cfa_offset -8 2099 .byte 0xf3,0xc3 2100.cfi_endproc 2101.LSEH_end_set_decrypt_key: 2102.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 2103.globl aes_hw_set_encrypt_key 2104.hidden aes_hw_set_encrypt_key 2105.type aes_hw_set_encrypt_key,@function 2106.align 16 2107aes_hw_set_encrypt_key: 2108__aesni_set_encrypt_key: 2109.cfi_startproc 2110#ifdef BORINGSSL_DISPATCH_TEST 2111 movb $1,BORINGSSL_function_hit+3(%rip) 2112#endif 2113.byte 0x48,0x83,0xEC,0x08 2114.cfi_adjust_cfa_offset 8 2115 movq $-1,%rax 2116 testq %rdi,%rdi 2117 jz .Lenc_key_ret 2118 testq %rdx,%rdx 2119 jz .Lenc_key_ret 2120 2121 movups (%rdi),%xmm0 2122 xorps %xmm4,%xmm4 2123 leaq OPENSSL_ia32cap_P(%rip),%r10 2124 movl 4(%r10),%r10d 2125 andl $268437504,%r10d 2126 leaq 16(%rdx),%rax 2127 cmpl $256,%esi 2128 je .L14rounds 2129 cmpl $192,%esi 2130 je .L12rounds 2131 cmpl $128,%esi 2132 jne .Lbad_keybits 2133 2134.L10rounds: 2135 movl $9,%esi 2136 cmpl $268435456,%r10d 2137 je .L10rounds_alt 2138 2139 movups %xmm0,(%rdx) 2140.byte 102,15,58,223,200,1 2141 call .Lkey_expansion_128_cold 2142.byte 102,15,58,223,200,2 2143 call .Lkey_expansion_128 2144.byte 102,15,58,223,200,4 2145 call .Lkey_expansion_128 2146.byte 102,15,58,223,200,8 2147 call .Lkey_expansion_128 2148.byte 102,15,58,223,200,16 2149 call .Lkey_expansion_128 2150.byte 102,15,58,223,200,32 2151 call .Lkey_expansion_128 2152.byte 102,15,58,223,200,64 2153 call .Lkey_expansion_128 2154.byte 102,15,58,223,200,128 2155 call .Lkey_expansion_128 2156.byte 102,15,58,223,200,27 2157 call .Lkey_expansion_128 2158.byte 102,15,58,223,200,54 2159 call .Lkey_expansion_128 2160 movups %xmm0,(%rax) 2161 movl %esi,80(%rax) 2162 xorl %eax,%eax 2163 jmp .Lenc_key_ret 2164 2165.align 16 2166.L10rounds_alt: 2167 movdqa .Lkey_rotate(%rip),%xmm5 2168 movl $8,%r10d 2169 movdqa .Lkey_rcon1(%rip),%xmm4 2170 movdqa %xmm0,%xmm2 2171 movdqu %xmm0,(%rdx) 2172 jmp .Loop_key128 2173 2174.align 16 2175.Loop_key128: 2176.byte 102,15,56,0,197 2177.byte 102,15,56,221,196 2178 pslld $1,%xmm4 2179 leaq 16(%rax),%rax 2180 2181 movdqa %xmm2,%xmm3 2182 pslldq $4,%xmm2 2183 pxor %xmm2,%xmm3 2184 pslldq $4,%xmm2 2185 pxor %xmm2,%xmm3 2186 pslldq $4,%xmm2 2187 pxor %xmm3,%xmm2 2188 2189 pxor %xmm2,%xmm0 2190 movdqu %xmm0,-16(%rax) 2191 movdqa %xmm0,%xmm2 2192 2193 decl %r10d 2194 jnz .Loop_key128 2195 2196 movdqa .Lkey_rcon1b(%rip),%xmm4 2197 2198.byte 102,15,56,0,197 2199.byte 102,15,56,221,196 2200 pslld $1,%xmm4 2201 2202 movdqa %xmm2,%xmm3 2203 pslldq $4,%xmm2 2204 pxor %xmm2,%xmm3 2205 pslldq $4,%xmm2 2206 pxor %xmm2,%xmm3 2207 pslldq $4,%xmm2 2208 pxor %xmm3,%xmm2 2209 2210 pxor %xmm2,%xmm0 2211 movdqu %xmm0,(%rax) 2212 2213 movdqa %xmm0,%xmm2 2214.byte 102,15,56,0,197 2215.byte 102,15,56,221,196 2216 2217 movdqa %xmm2,%xmm3 2218 pslldq $4,%xmm2 2219 pxor %xmm2,%xmm3 2220 pslldq $4,%xmm2 2221 pxor %xmm2,%xmm3 2222 pslldq $4,%xmm2 2223 pxor %xmm3,%xmm2 2224 2225 pxor %xmm2,%xmm0 2226 movdqu %xmm0,16(%rax) 2227 2228 movl %esi,96(%rax) 2229 xorl %eax,%eax 2230 jmp .Lenc_key_ret 2231 2232.align 16 2233.L12rounds: 2234 movq 16(%rdi),%xmm2 2235 movl $11,%esi 2236 cmpl $268435456,%r10d 2237 je .L12rounds_alt 2238 2239 movups %xmm0,(%rdx) 2240.byte 102,15,58,223,202,1 2241 call .Lkey_expansion_192a_cold 2242.byte 102,15,58,223,202,2 2243 call .Lkey_expansion_192b 2244.byte 102,15,58,223,202,4 2245 call .Lkey_expansion_192a 2246.byte 102,15,58,223,202,8 2247 call .Lkey_expansion_192b 2248.byte 102,15,58,223,202,16 2249 call .Lkey_expansion_192a 2250.byte 102,15,58,223,202,32 2251 call .Lkey_expansion_192b 2252.byte 102,15,58,223,202,64 2253 call .Lkey_expansion_192a 2254.byte 102,15,58,223,202,128 2255 call .Lkey_expansion_192b 2256 movups %xmm0,(%rax) 2257 movl %esi,48(%rax) 2258 xorq %rax,%rax 2259 jmp .Lenc_key_ret 2260 2261.align 16 2262.L12rounds_alt: 2263 movdqa .Lkey_rotate192(%rip),%xmm5 2264 movdqa .Lkey_rcon1(%rip),%xmm4 2265 movl $8,%r10d 2266 movdqu %xmm0,(%rdx) 2267 jmp .Loop_key192 2268 2269.align 16 2270.Loop_key192: 2271 movq %xmm2,0(%rax) 2272 movdqa %xmm2,%xmm1 2273.byte 102,15,56,0,213 2274.byte 102,15,56,221,212 2275 pslld $1,%xmm4 2276 leaq 24(%rax),%rax 2277 2278 movdqa %xmm0,%xmm3 2279 pslldq $4,%xmm0 2280 pxor %xmm0,%xmm3 2281 pslldq $4,%xmm0 2282 pxor %xmm0,%xmm3 2283 pslldq $4,%xmm0 2284 pxor %xmm3,%xmm0 2285 2286 pshufd $0xff,%xmm0,%xmm3 2287 pxor %xmm1,%xmm3 2288 pslldq $4,%xmm1 2289 pxor %xmm1,%xmm3 2290 2291 pxor %xmm2,%xmm0 2292 pxor %xmm3,%xmm2 2293 movdqu %xmm0,-16(%rax) 2294 2295 decl %r10d 2296 jnz .Loop_key192 2297 2298 movl %esi,32(%rax) 2299 xorl %eax,%eax 2300 jmp .Lenc_key_ret 2301 2302.align 16 2303.L14rounds: 2304 movups 16(%rdi),%xmm2 2305 movl $13,%esi 2306 leaq 16(%rax),%rax 2307 cmpl $268435456,%r10d 2308 je .L14rounds_alt 2309 2310 movups %xmm0,(%rdx) 2311 movups %xmm2,16(%rdx) 2312.byte 102,15,58,223,202,1 2313 call .Lkey_expansion_256a_cold 2314.byte 102,15,58,223,200,1 2315 call .Lkey_expansion_256b 2316.byte 102,15,58,223,202,2 2317 call .Lkey_expansion_256a 2318.byte 102,15,58,223,200,2 2319 call .Lkey_expansion_256b 2320.byte 102,15,58,223,202,4 2321 call .Lkey_expansion_256a 2322.byte 102,15,58,223,200,4 2323 call .Lkey_expansion_256b 2324.byte 102,15,58,223,202,8 2325 call .Lkey_expansion_256a 2326.byte 102,15,58,223,200,8 2327 call .Lkey_expansion_256b 2328.byte 102,15,58,223,202,16 2329 call .Lkey_expansion_256a 2330.byte 102,15,58,223,200,16 2331 call .Lkey_expansion_256b 2332.byte 102,15,58,223,202,32 2333 call .Lkey_expansion_256a 2334.byte 102,15,58,223,200,32 2335 call .Lkey_expansion_256b 2336.byte 102,15,58,223,202,64 2337 call .Lkey_expansion_256a 2338 movups %xmm0,(%rax) 2339 movl %esi,16(%rax) 2340 xorq %rax,%rax 2341 jmp .Lenc_key_ret 2342 2343.align 16 2344.L14rounds_alt: 2345 movdqa .Lkey_rotate(%rip),%xmm5 2346 movdqa .Lkey_rcon1(%rip),%xmm4 2347 movl $7,%r10d 2348 movdqu %xmm0,0(%rdx) 2349 movdqa %xmm2,%xmm1 2350 movdqu %xmm2,16(%rdx) 2351 jmp .Loop_key256 2352 2353.align 16 2354.Loop_key256: 2355.byte 102,15,56,0,213 2356.byte 102,15,56,221,212 2357 2358 movdqa %xmm0,%xmm3 2359 pslldq $4,%xmm0 2360 pxor %xmm0,%xmm3 2361 pslldq $4,%xmm0 2362 pxor %xmm0,%xmm3 2363 pslldq $4,%xmm0 2364 pxor %xmm3,%xmm0 2365 pslld $1,%xmm4 2366 2367 pxor %xmm2,%xmm0 2368 movdqu %xmm0,(%rax) 2369 2370 decl %r10d 2371 jz .Ldone_key256 2372 2373 pshufd $0xff,%xmm0,%xmm2 2374 pxor %xmm3,%xmm3 2375.byte 102,15,56,221,211 2376 2377 movdqa %xmm1,%xmm3 2378 pslldq $4,%xmm1 2379 pxor %xmm1,%xmm3 2380 pslldq $4,%xmm1 2381 pxor %xmm1,%xmm3 2382 pslldq $4,%xmm1 2383 pxor %xmm3,%xmm1 2384 2385 pxor %xmm1,%xmm2 2386 movdqu %xmm2,16(%rax) 2387 leaq 32(%rax),%rax 2388 movdqa %xmm2,%xmm1 2389 2390 jmp .Loop_key256 2391 2392.Ldone_key256: 2393 movl %esi,16(%rax) 2394 xorl %eax,%eax 2395 jmp .Lenc_key_ret 2396 2397.align 16 2398.Lbad_keybits: 2399 movq $-2,%rax 2400.Lenc_key_ret: 2401 pxor %xmm0,%xmm0 2402 pxor %xmm1,%xmm1 2403 pxor %xmm2,%xmm2 2404 pxor %xmm3,%xmm3 2405 pxor %xmm4,%xmm4 2406 pxor %xmm5,%xmm5 2407 addq $8,%rsp 2408.cfi_adjust_cfa_offset -8 2409 .byte 0xf3,0xc3 2410.cfi_endproc 2411.LSEH_end_set_encrypt_key: 2412 2413.align 16 2414.Lkey_expansion_128: 2415 movups %xmm0,(%rax) 2416 leaq 16(%rax),%rax 2417.Lkey_expansion_128_cold: 2418 shufps $16,%xmm0,%xmm4 2419 xorps %xmm4,%xmm0 2420 shufps $140,%xmm0,%xmm4 2421 xorps %xmm4,%xmm0 2422 shufps $255,%xmm1,%xmm1 2423 xorps %xmm1,%xmm0 2424 .byte 0xf3,0xc3 2425 2426.align 16 2427.Lkey_expansion_192a: 2428 movups %xmm0,(%rax) 2429 leaq 16(%rax),%rax 2430.Lkey_expansion_192a_cold: 2431 movaps %xmm2,%xmm5 2432.Lkey_expansion_192b_warm: 2433 shufps $16,%xmm0,%xmm4 2434 movdqa %xmm2,%xmm3 2435 xorps %xmm4,%xmm0 2436 shufps $140,%xmm0,%xmm4 2437 pslldq $4,%xmm3 2438 xorps %xmm4,%xmm0 2439 pshufd $85,%xmm1,%xmm1 2440 pxor %xmm3,%xmm2 2441 pxor %xmm1,%xmm0 2442 pshufd $255,%xmm0,%xmm3 2443 pxor %xmm3,%xmm2 2444 .byte 0xf3,0xc3 2445 2446.align 16 2447.Lkey_expansion_192b: 2448 movaps %xmm0,%xmm3 2449 shufps $68,%xmm0,%xmm5 2450 movups %xmm5,(%rax) 2451 shufps $78,%xmm2,%xmm3 2452 movups %xmm3,16(%rax) 2453 leaq 32(%rax),%rax 2454 jmp .Lkey_expansion_192b_warm 2455 2456.align 16 2457.Lkey_expansion_256a: 2458 movups %xmm2,(%rax) 2459 leaq 16(%rax),%rax 2460.Lkey_expansion_256a_cold: 2461 shufps $16,%xmm0,%xmm4 2462 xorps %xmm4,%xmm0 2463 shufps $140,%xmm0,%xmm4 2464 xorps %xmm4,%xmm0 2465 shufps $255,%xmm1,%xmm1 2466 xorps %xmm1,%xmm0 2467 .byte 0xf3,0xc3 2468 2469.align 16 2470.Lkey_expansion_256b: 2471 movups %xmm0,(%rax) 2472 leaq 16(%rax),%rax 2473 2474 shufps $16,%xmm2,%xmm4 2475 xorps %xmm4,%xmm2 2476 shufps $140,%xmm2,%xmm4 2477 xorps %xmm4,%xmm2 2478 shufps $170,%xmm1,%xmm1 2479 xorps %xmm1,%xmm2 2480 .byte 0xf3,0xc3 2481.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 2482.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 2483.align 64 2484.Lbswap_mask: 2485.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2486.Lincrement32: 2487.long 6,6,6,0 2488.Lincrement64: 2489.long 1,0,0,0 2490.Lxts_magic: 2491.long 0x87,0,1,0 2492.Lincrement1: 2493.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 2494.Lkey_rotate: 2495.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 2496.Lkey_rotate192: 2497.long 0x04070605,0x04070605,0x04070605,0x04070605 2498.Lkey_rcon1: 2499.long 1,1,1,1 2500.Lkey_rcon1b: 2501.long 0x1b,0x1b,0x1b,0x1b 2502 2503.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2504.align 64 2505#endif 2506.section .note.GNU-stack,"",@progbits 2507