1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15.extern OPENSSL_ia32cap_P 16.hidden OPENSSL_ia32cap_P 17.globl aes_hw_encrypt 18.hidden aes_hw_encrypt 19.type aes_hw_encrypt,@function 20.align 16 21aes_hw_encrypt: 22.cfi_startproc 23#ifndef NDEBUG 24#ifndef BORINGSSL_FIPS 25.extern BORINGSSL_function_hit 26.hidden BORINGSSL_function_hit 27 movb $1,BORINGSSL_function_hit+1(%rip) 28#endif 29#endif 30 movups (%rdi),%xmm2 31 movl 240(%rdx),%eax 32 movups (%rdx),%xmm0 33 movups 16(%rdx),%xmm1 34 leaq 32(%rdx),%rdx 35 xorps %xmm0,%xmm2 36.Loop_enc1_1: 37.byte 102,15,56,220,209 38 decl %eax 39 movups (%rdx),%xmm1 40 leaq 16(%rdx),%rdx 41 jnz .Loop_enc1_1 42.byte 102,15,56,221,209 43 pxor %xmm0,%xmm0 44 pxor %xmm1,%xmm1 45 movups %xmm2,(%rsi) 46 pxor %xmm2,%xmm2 47 .byte 0xf3,0xc3 48.cfi_endproc 49.size aes_hw_encrypt,.-aes_hw_encrypt 50 51.globl aes_hw_decrypt 52.hidden aes_hw_decrypt 53.type aes_hw_decrypt,@function 54.align 16 55aes_hw_decrypt: 56.cfi_startproc 57 movups (%rdi),%xmm2 58 movl 240(%rdx),%eax 59 movups (%rdx),%xmm0 60 movups 16(%rdx),%xmm1 61 leaq 32(%rdx),%rdx 62 xorps %xmm0,%xmm2 63.Loop_dec1_2: 64.byte 102,15,56,222,209 65 decl %eax 66 movups (%rdx),%xmm1 67 leaq 16(%rdx),%rdx 68 jnz .Loop_dec1_2 69.byte 102,15,56,223,209 70 pxor %xmm0,%xmm0 71 pxor %xmm1,%xmm1 72 movups %xmm2,(%rsi) 73 pxor %xmm2,%xmm2 74 .byte 0xf3,0xc3 75.cfi_endproc 76.size aes_hw_decrypt, .-aes_hw_decrypt 77.type _aesni_encrypt2,@function 78.align 16 79_aesni_encrypt2: 80.cfi_startproc 81 movups (%rcx),%xmm0 82 shll $4,%eax 83 movups 16(%rcx),%xmm1 84 xorps %xmm0,%xmm2 85 xorps %xmm0,%xmm3 86 movups 32(%rcx),%xmm0 87 leaq 32(%rcx,%rax,1),%rcx 88 negq %rax 89 addq $16,%rax 90 91.Lenc_loop2: 92.byte 102,15,56,220,209 93.byte 102,15,56,220,217 94 movups (%rcx,%rax,1),%xmm1 95 addq $32,%rax 96.byte 102,15,56,220,208 97.byte 102,15,56,220,216 98 movups -16(%rcx,%rax,1),%xmm0 99 jnz .Lenc_loop2 100 101.byte 102,15,56,220,209 102.byte 102,15,56,220,217 103.byte 102,15,56,221,208 104.byte 102,15,56,221,216 105 .byte 0xf3,0xc3 106.cfi_endproc 107.size _aesni_encrypt2,.-_aesni_encrypt2 108.type _aesni_decrypt2,@function 109.align 16 110_aesni_decrypt2: 111.cfi_startproc 112 movups (%rcx),%xmm0 113 shll $4,%eax 114 movups 16(%rcx),%xmm1 115 xorps %xmm0,%xmm2 116 xorps %xmm0,%xmm3 117 movups 32(%rcx),%xmm0 118 leaq 32(%rcx,%rax,1),%rcx 119 negq %rax 120 addq $16,%rax 121 122.Ldec_loop2: 123.byte 102,15,56,222,209 124.byte 102,15,56,222,217 125 movups (%rcx,%rax,1),%xmm1 126 addq $32,%rax 127.byte 102,15,56,222,208 128.byte 102,15,56,222,216 129 movups -16(%rcx,%rax,1),%xmm0 130 jnz .Ldec_loop2 131 132.byte 102,15,56,222,209 133.byte 102,15,56,222,217 134.byte 102,15,56,223,208 135.byte 102,15,56,223,216 136 .byte 0xf3,0xc3 137.cfi_endproc 138.size _aesni_decrypt2,.-_aesni_decrypt2 139.type _aesni_encrypt3,@function 140.align 16 141_aesni_encrypt3: 142.cfi_startproc 143 movups (%rcx),%xmm0 144 shll $4,%eax 145 movups 16(%rcx),%xmm1 146 xorps %xmm0,%xmm2 147 xorps %xmm0,%xmm3 148 xorps %xmm0,%xmm4 149 movups 32(%rcx),%xmm0 150 leaq 32(%rcx,%rax,1),%rcx 151 negq %rax 152 addq $16,%rax 153 154.Lenc_loop3: 155.byte 102,15,56,220,209 156.byte 102,15,56,220,217 157.byte 102,15,56,220,225 158 movups (%rcx,%rax,1),%xmm1 159 addq $32,%rax 160.byte 102,15,56,220,208 161.byte 102,15,56,220,216 162.byte 102,15,56,220,224 163 movups -16(%rcx,%rax,1),%xmm0 164 jnz .Lenc_loop3 165 166.byte 102,15,56,220,209 167.byte 102,15,56,220,217 168.byte 102,15,56,220,225 169.byte 102,15,56,221,208 170.byte 102,15,56,221,216 171.byte 102,15,56,221,224 172 .byte 0xf3,0xc3 173.cfi_endproc 174.size _aesni_encrypt3,.-_aesni_encrypt3 175.type _aesni_decrypt3,@function 176.align 16 177_aesni_decrypt3: 178.cfi_startproc 179 movups (%rcx),%xmm0 180 shll $4,%eax 181 movups 16(%rcx),%xmm1 182 xorps %xmm0,%xmm2 183 xorps %xmm0,%xmm3 184 xorps %xmm0,%xmm4 185 movups 32(%rcx),%xmm0 186 leaq 32(%rcx,%rax,1),%rcx 187 negq %rax 188 addq $16,%rax 189 190.Ldec_loop3: 191.byte 102,15,56,222,209 192.byte 102,15,56,222,217 193.byte 102,15,56,222,225 194 movups (%rcx,%rax,1),%xmm1 195 addq $32,%rax 196.byte 102,15,56,222,208 197.byte 102,15,56,222,216 198.byte 102,15,56,222,224 199 movups -16(%rcx,%rax,1),%xmm0 200 jnz .Ldec_loop3 201 202.byte 102,15,56,222,209 203.byte 102,15,56,222,217 204.byte 102,15,56,222,225 205.byte 102,15,56,223,208 206.byte 102,15,56,223,216 207.byte 102,15,56,223,224 208 .byte 0xf3,0xc3 209.cfi_endproc 210.size _aesni_decrypt3,.-_aesni_decrypt3 211.type _aesni_encrypt4,@function 212.align 16 213_aesni_encrypt4: 214.cfi_startproc 215 movups (%rcx),%xmm0 216 shll $4,%eax 217 movups 16(%rcx),%xmm1 218 xorps %xmm0,%xmm2 219 xorps %xmm0,%xmm3 220 xorps %xmm0,%xmm4 221 xorps %xmm0,%xmm5 222 movups 32(%rcx),%xmm0 223 leaq 32(%rcx,%rax,1),%rcx 224 negq %rax 225.byte 0x0f,0x1f,0x00 226 addq $16,%rax 227 228.Lenc_loop4: 229.byte 102,15,56,220,209 230.byte 102,15,56,220,217 231.byte 102,15,56,220,225 232.byte 102,15,56,220,233 233 movups (%rcx,%rax,1),%xmm1 234 addq $32,%rax 235.byte 102,15,56,220,208 236.byte 102,15,56,220,216 237.byte 102,15,56,220,224 238.byte 102,15,56,220,232 239 movups -16(%rcx,%rax,1),%xmm0 240 jnz .Lenc_loop4 241 242.byte 102,15,56,220,209 243.byte 102,15,56,220,217 244.byte 102,15,56,220,225 245.byte 102,15,56,220,233 246.byte 102,15,56,221,208 247.byte 102,15,56,221,216 248.byte 102,15,56,221,224 249.byte 102,15,56,221,232 250 .byte 0xf3,0xc3 251.cfi_endproc 252.size _aesni_encrypt4,.-_aesni_encrypt4 253.type _aesni_decrypt4,@function 254.align 16 255_aesni_decrypt4: 256.cfi_startproc 257 movups (%rcx),%xmm0 258 shll $4,%eax 259 movups 16(%rcx),%xmm1 260 xorps %xmm0,%xmm2 261 xorps %xmm0,%xmm3 262 xorps %xmm0,%xmm4 263 xorps %xmm0,%xmm5 264 movups 32(%rcx),%xmm0 265 leaq 32(%rcx,%rax,1),%rcx 266 negq %rax 267.byte 0x0f,0x1f,0x00 268 addq $16,%rax 269 270.Ldec_loop4: 271.byte 102,15,56,222,209 272.byte 102,15,56,222,217 273.byte 102,15,56,222,225 274.byte 102,15,56,222,233 275 movups (%rcx,%rax,1),%xmm1 276 addq $32,%rax 277.byte 102,15,56,222,208 278.byte 102,15,56,222,216 279.byte 102,15,56,222,224 280.byte 102,15,56,222,232 281 movups -16(%rcx,%rax,1),%xmm0 282 jnz .Ldec_loop4 283 284.byte 102,15,56,222,209 285.byte 102,15,56,222,217 286.byte 102,15,56,222,225 287.byte 102,15,56,222,233 288.byte 102,15,56,223,208 289.byte 102,15,56,223,216 290.byte 102,15,56,223,224 291.byte 102,15,56,223,232 292 .byte 0xf3,0xc3 293.cfi_endproc 294.size _aesni_decrypt4,.-_aesni_decrypt4 295.type _aesni_encrypt6,@function 296.align 16 297_aesni_encrypt6: 298.cfi_startproc 299 movups (%rcx),%xmm0 300 shll $4,%eax 301 movups 16(%rcx),%xmm1 302 xorps %xmm0,%xmm2 303 pxor %xmm0,%xmm3 304 pxor %xmm0,%xmm4 305.byte 102,15,56,220,209 306 leaq 32(%rcx,%rax,1),%rcx 307 negq %rax 308.byte 102,15,56,220,217 309 pxor %xmm0,%xmm5 310 pxor %xmm0,%xmm6 311.byte 102,15,56,220,225 312 pxor %xmm0,%xmm7 313 movups (%rcx,%rax,1),%xmm0 314 addq $16,%rax 315 jmp .Lenc_loop6_enter 316.align 16 317.Lenc_loop6: 318.byte 102,15,56,220,209 319.byte 102,15,56,220,217 320.byte 102,15,56,220,225 321.Lenc_loop6_enter: 322.byte 102,15,56,220,233 323.byte 102,15,56,220,241 324.byte 102,15,56,220,249 325 movups (%rcx,%rax,1),%xmm1 326 addq $32,%rax 327.byte 102,15,56,220,208 328.byte 102,15,56,220,216 329.byte 102,15,56,220,224 330.byte 102,15,56,220,232 331.byte 102,15,56,220,240 332.byte 102,15,56,220,248 333 movups -16(%rcx,%rax,1),%xmm0 334 jnz .Lenc_loop6 335 336.byte 102,15,56,220,209 337.byte 102,15,56,220,217 338.byte 102,15,56,220,225 339.byte 102,15,56,220,233 340.byte 102,15,56,220,241 341.byte 102,15,56,220,249 342.byte 102,15,56,221,208 343.byte 102,15,56,221,216 344.byte 102,15,56,221,224 345.byte 102,15,56,221,232 346.byte 102,15,56,221,240 347.byte 102,15,56,221,248 348 .byte 0xf3,0xc3 349.cfi_endproc 350.size _aesni_encrypt6,.-_aesni_encrypt6 351.type _aesni_decrypt6,@function 352.align 16 353_aesni_decrypt6: 354.cfi_startproc 355 movups (%rcx),%xmm0 356 shll $4,%eax 357 movups 16(%rcx),%xmm1 358 xorps %xmm0,%xmm2 359 pxor %xmm0,%xmm3 360 pxor %xmm0,%xmm4 361.byte 102,15,56,222,209 362 leaq 32(%rcx,%rax,1),%rcx 363 negq %rax 364.byte 102,15,56,222,217 365 pxor %xmm0,%xmm5 366 pxor %xmm0,%xmm6 367.byte 102,15,56,222,225 368 pxor %xmm0,%xmm7 369 movups (%rcx,%rax,1),%xmm0 370 addq $16,%rax 371 jmp .Ldec_loop6_enter 372.align 16 373.Ldec_loop6: 374.byte 102,15,56,222,209 375.byte 102,15,56,222,217 376.byte 102,15,56,222,225 377.Ldec_loop6_enter: 378.byte 102,15,56,222,233 379.byte 102,15,56,222,241 380.byte 102,15,56,222,249 381 movups (%rcx,%rax,1),%xmm1 382 addq $32,%rax 383.byte 102,15,56,222,208 384.byte 102,15,56,222,216 385.byte 102,15,56,222,224 386.byte 102,15,56,222,232 387.byte 102,15,56,222,240 388.byte 102,15,56,222,248 389 movups -16(%rcx,%rax,1),%xmm0 390 jnz .Ldec_loop6 391 392.byte 102,15,56,222,209 393.byte 102,15,56,222,217 394.byte 102,15,56,222,225 395.byte 102,15,56,222,233 396.byte 102,15,56,222,241 397.byte 102,15,56,222,249 398.byte 102,15,56,223,208 399.byte 102,15,56,223,216 400.byte 102,15,56,223,224 401.byte 102,15,56,223,232 402.byte 102,15,56,223,240 403.byte 102,15,56,223,248 404 .byte 0xf3,0xc3 405.cfi_endproc 406.size _aesni_decrypt6,.-_aesni_decrypt6 407.type _aesni_encrypt8,@function 408.align 16 409_aesni_encrypt8: 410.cfi_startproc 411 movups (%rcx),%xmm0 412 shll $4,%eax 413 movups 16(%rcx),%xmm1 414 xorps %xmm0,%xmm2 415 xorps %xmm0,%xmm3 416 pxor %xmm0,%xmm4 417 pxor %xmm0,%xmm5 418 pxor %xmm0,%xmm6 419 leaq 32(%rcx,%rax,1),%rcx 420 negq %rax 421.byte 102,15,56,220,209 422 pxor %xmm0,%xmm7 423 pxor %xmm0,%xmm8 424.byte 102,15,56,220,217 425 pxor %xmm0,%xmm9 426 movups (%rcx,%rax,1),%xmm0 427 addq $16,%rax 428 jmp .Lenc_loop8_inner 429.align 16 430.Lenc_loop8: 431.byte 102,15,56,220,209 432.byte 102,15,56,220,217 433.Lenc_loop8_inner: 434.byte 102,15,56,220,225 435.byte 102,15,56,220,233 436.byte 102,15,56,220,241 437.byte 102,15,56,220,249 438.byte 102,68,15,56,220,193 439.byte 102,68,15,56,220,201 440.Lenc_loop8_enter: 441 movups (%rcx,%rax,1),%xmm1 442 addq $32,%rax 443.byte 102,15,56,220,208 444.byte 102,15,56,220,216 445.byte 102,15,56,220,224 446.byte 102,15,56,220,232 447.byte 102,15,56,220,240 448.byte 102,15,56,220,248 449.byte 102,68,15,56,220,192 450.byte 102,68,15,56,220,200 451 movups -16(%rcx,%rax,1),%xmm0 452 jnz .Lenc_loop8 453 454.byte 102,15,56,220,209 455.byte 102,15,56,220,217 456.byte 102,15,56,220,225 457.byte 102,15,56,220,233 458.byte 102,15,56,220,241 459.byte 102,15,56,220,249 460.byte 102,68,15,56,220,193 461.byte 102,68,15,56,220,201 462.byte 102,15,56,221,208 463.byte 102,15,56,221,216 464.byte 102,15,56,221,224 465.byte 102,15,56,221,232 466.byte 102,15,56,221,240 467.byte 102,15,56,221,248 468.byte 102,68,15,56,221,192 469.byte 102,68,15,56,221,200 470 .byte 0xf3,0xc3 471.cfi_endproc 472.size _aesni_encrypt8,.-_aesni_encrypt8 473.type _aesni_decrypt8,@function 474.align 16 475_aesni_decrypt8: 476.cfi_startproc 477 movups (%rcx),%xmm0 478 shll $4,%eax 479 movups 16(%rcx),%xmm1 480 xorps %xmm0,%xmm2 481 xorps %xmm0,%xmm3 482 pxor %xmm0,%xmm4 483 pxor %xmm0,%xmm5 484 pxor %xmm0,%xmm6 485 leaq 32(%rcx,%rax,1),%rcx 486 negq %rax 487.byte 102,15,56,222,209 488 pxor %xmm0,%xmm7 489 pxor %xmm0,%xmm8 490.byte 102,15,56,222,217 491 pxor %xmm0,%xmm9 492 movups (%rcx,%rax,1),%xmm0 493 addq $16,%rax 494 jmp .Ldec_loop8_inner 495.align 16 496.Ldec_loop8: 497.byte 102,15,56,222,209 498.byte 102,15,56,222,217 499.Ldec_loop8_inner: 500.byte 102,15,56,222,225 501.byte 102,15,56,222,233 502.byte 102,15,56,222,241 503.byte 102,15,56,222,249 504.byte 102,68,15,56,222,193 505.byte 102,68,15,56,222,201 506.Ldec_loop8_enter: 507 movups (%rcx,%rax,1),%xmm1 508 addq $32,%rax 509.byte 102,15,56,222,208 510.byte 102,15,56,222,216 511.byte 102,15,56,222,224 512.byte 102,15,56,222,232 513.byte 102,15,56,222,240 514.byte 102,15,56,222,248 515.byte 102,68,15,56,222,192 516.byte 102,68,15,56,222,200 517 movups -16(%rcx,%rax,1),%xmm0 518 jnz .Ldec_loop8 519 520.byte 102,15,56,222,209 521.byte 102,15,56,222,217 522.byte 102,15,56,222,225 523.byte 102,15,56,222,233 524.byte 102,15,56,222,241 525.byte 102,15,56,222,249 526.byte 102,68,15,56,222,193 527.byte 102,68,15,56,222,201 528.byte 102,15,56,223,208 529.byte 102,15,56,223,216 530.byte 102,15,56,223,224 531.byte 102,15,56,223,232 532.byte 102,15,56,223,240 533.byte 102,15,56,223,248 534.byte 102,68,15,56,223,192 535.byte 102,68,15,56,223,200 536 .byte 0xf3,0xc3 537.cfi_endproc 538.size _aesni_decrypt8,.-_aesni_decrypt8 539.globl aes_hw_ecb_encrypt 540.hidden aes_hw_ecb_encrypt 541.type aes_hw_ecb_encrypt,@function 542.align 16 543aes_hw_ecb_encrypt: 544.cfi_startproc 545 andq $-16,%rdx 546 jz .Lecb_ret 547 548 movl 240(%rcx),%eax 549 movups (%rcx),%xmm0 550 movq %rcx,%r11 551 movl %eax,%r10d 552 testl %r8d,%r8d 553 jz .Lecb_decrypt 554 555 cmpq $0x80,%rdx 556 jb .Lecb_enc_tail 557 558 movdqu (%rdi),%xmm2 559 movdqu 16(%rdi),%xmm3 560 movdqu 32(%rdi),%xmm4 561 movdqu 48(%rdi),%xmm5 562 movdqu 64(%rdi),%xmm6 563 movdqu 80(%rdi),%xmm7 564 movdqu 96(%rdi),%xmm8 565 movdqu 112(%rdi),%xmm9 566 leaq 128(%rdi),%rdi 567 subq $0x80,%rdx 568 jmp .Lecb_enc_loop8_enter 569.align 16 570.Lecb_enc_loop8: 571 movups %xmm2,(%rsi) 572 movq %r11,%rcx 573 movdqu (%rdi),%xmm2 574 movl %r10d,%eax 575 movups %xmm3,16(%rsi) 576 movdqu 16(%rdi),%xmm3 577 movups %xmm4,32(%rsi) 578 movdqu 32(%rdi),%xmm4 579 movups %xmm5,48(%rsi) 580 movdqu 48(%rdi),%xmm5 581 movups %xmm6,64(%rsi) 582 movdqu 64(%rdi),%xmm6 583 movups %xmm7,80(%rsi) 584 movdqu 80(%rdi),%xmm7 585 movups %xmm8,96(%rsi) 586 movdqu 96(%rdi),%xmm8 587 movups %xmm9,112(%rsi) 588 leaq 128(%rsi),%rsi 589 movdqu 112(%rdi),%xmm9 590 leaq 128(%rdi),%rdi 591.Lecb_enc_loop8_enter: 592 593 call _aesni_encrypt8 594 595 subq $0x80,%rdx 596 jnc .Lecb_enc_loop8 597 598 movups %xmm2,(%rsi) 599 movq %r11,%rcx 600 movups %xmm3,16(%rsi) 601 movl %r10d,%eax 602 movups %xmm4,32(%rsi) 603 movups %xmm5,48(%rsi) 604 movups %xmm6,64(%rsi) 605 movups %xmm7,80(%rsi) 606 movups %xmm8,96(%rsi) 607 movups %xmm9,112(%rsi) 608 leaq 128(%rsi),%rsi 609 addq $0x80,%rdx 610 jz .Lecb_ret 611 612.Lecb_enc_tail: 613 movups (%rdi),%xmm2 614 cmpq $0x20,%rdx 615 jb .Lecb_enc_one 616 movups 16(%rdi),%xmm3 617 je .Lecb_enc_two 618 movups 32(%rdi),%xmm4 619 cmpq $0x40,%rdx 620 jb .Lecb_enc_three 621 movups 48(%rdi),%xmm5 622 je .Lecb_enc_four 623 movups 64(%rdi),%xmm6 624 cmpq $0x60,%rdx 625 jb .Lecb_enc_five 626 movups 80(%rdi),%xmm7 627 je .Lecb_enc_six 628 movdqu 96(%rdi),%xmm8 629 xorps %xmm9,%xmm9 630 call _aesni_encrypt8 631 movups %xmm2,(%rsi) 632 movups %xmm3,16(%rsi) 633 movups %xmm4,32(%rsi) 634 movups %xmm5,48(%rsi) 635 movups %xmm6,64(%rsi) 636 movups %xmm7,80(%rsi) 637 movups %xmm8,96(%rsi) 638 jmp .Lecb_ret 639.align 16 640.Lecb_enc_one: 641 movups (%rcx),%xmm0 642 movups 16(%rcx),%xmm1 643 leaq 32(%rcx),%rcx 644 xorps %xmm0,%xmm2 645.Loop_enc1_3: 646.byte 102,15,56,220,209 647 decl %eax 648 movups (%rcx),%xmm1 649 leaq 16(%rcx),%rcx 650 jnz .Loop_enc1_3 651.byte 102,15,56,221,209 652 movups %xmm2,(%rsi) 653 jmp .Lecb_ret 654.align 16 655.Lecb_enc_two: 656 call _aesni_encrypt2 657 movups %xmm2,(%rsi) 658 movups %xmm3,16(%rsi) 659 jmp .Lecb_ret 660.align 16 661.Lecb_enc_three: 662 call _aesni_encrypt3 663 movups %xmm2,(%rsi) 664 movups %xmm3,16(%rsi) 665 movups %xmm4,32(%rsi) 666 jmp .Lecb_ret 667.align 16 668.Lecb_enc_four: 669 call _aesni_encrypt4 670 movups %xmm2,(%rsi) 671 movups %xmm3,16(%rsi) 672 movups %xmm4,32(%rsi) 673 movups %xmm5,48(%rsi) 674 jmp .Lecb_ret 675.align 16 676.Lecb_enc_five: 677 xorps %xmm7,%xmm7 678 call _aesni_encrypt6 679 movups %xmm2,(%rsi) 680 movups %xmm3,16(%rsi) 681 movups %xmm4,32(%rsi) 682 movups %xmm5,48(%rsi) 683 movups %xmm6,64(%rsi) 684 jmp .Lecb_ret 685.align 16 686.Lecb_enc_six: 687 call _aesni_encrypt6 688 movups %xmm2,(%rsi) 689 movups %xmm3,16(%rsi) 690 movups %xmm4,32(%rsi) 691 movups %xmm5,48(%rsi) 692 movups %xmm6,64(%rsi) 693 movups %xmm7,80(%rsi) 694 jmp .Lecb_ret 695 696.align 16 697.Lecb_decrypt: 698 cmpq $0x80,%rdx 699 jb .Lecb_dec_tail 700 701 movdqu (%rdi),%xmm2 702 movdqu 16(%rdi),%xmm3 703 movdqu 32(%rdi),%xmm4 704 movdqu 48(%rdi),%xmm5 705 movdqu 64(%rdi),%xmm6 706 movdqu 80(%rdi),%xmm7 707 movdqu 96(%rdi),%xmm8 708 movdqu 112(%rdi),%xmm9 709 leaq 128(%rdi),%rdi 710 subq $0x80,%rdx 711 jmp .Lecb_dec_loop8_enter 712.align 16 713.Lecb_dec_loop8: 714 movups %xmm2,(%rsi) 715 movq %r11,%rcx 716 movdqu (%rdi),%xmm2 717 movl %r10d,%eax 718 movups %xmm3,16(%rsi) 719 movdqu 16(%rdi),%xmm3 720 movups %xmm4,32(%rsi) 721 movdqu 32(%rdi),%xmm4 722 movups %xmm5,48(%rsi) 723 movdqu 48(%rdi),%xmm5 724 movups %xmm6,64(%rsi) 725 movdqu 64(%rdi),%xmm6 726 movups %xmm7,80(%rsi) 727 movdqu 80(%rdi),%xmm7 728 movups %xmm8,96(%rsi) 729 movdqu 96(%rdi),%xmm8 730 movups %xmm9,112(%rsi) 731 leaq 128(%rsi),%rsi 732 movdqu 112(%rdi),%xmm9 733 leaq 128(%rdi),%rdi 734.Lecb_dec_loop8_enter: 735 736 call _aesni_decrypt8 737 738 movups (%r11),%xmm0 739 subq $0x80,%rdx 740 jnc .Lecb_dec_loop8 741 742 movups %xmm2,(%rsi) 743 pxor %xmm2,%xmm2 744 movq %r11,%rcx 745 movups %xmm3,16(%rsi) 746 pxor %xmm3,%xmm3 747 movl %r10d,%eax 748 movups %xmm4,32(%rsi) 749 pxor %xmm4,%xmm4 750 movups %xmm5,48(%rsi) 751 pxor %xmm5,%xmm5 752 movups %xmm6,64(%rsi) 753 pxor %xmm6,%xmm6 754 movups %xmm7,80(%rsi) 755 pxor %xmm7,%xmm7 756 movups %xmm8,96(%rsi) 757 pxor %xmm8,%xmm8 758 movups %xmm9,112(%rsi) 759 pxor %xmm9,%xmm9 760 leaq 128(%rsi),%rsi 761 addq $0x80,%rdx 762 jz .Lecb_ret 763 764.Lecb_dec_tail: 765 movups (%rdi),%xmm2 766 cmpq $0x20,%rdx 767 jb .Lecb_dec_one 768 movups 16(%rdi),%xmm3 769 je .Lecb_dec_two 770 movups 32(%rdi),%xmm4 771 cmpq $0x40,%rdx 772 jb .Lecb_dec_three 773 movups 48(%rdi),%xmm5 774 je .Lecb_dec_four 775 movups 64(%rdi),%xmm6 776 cmpq $0x60,%rdx 777 jb .Lecb_dec_five 778 movups 80(%rdi),%xmm7 779 je .Lecb_dec_six 780 movups 96(%rdi),%xmm8 781 movups (%rcx),%xmm0 782 xorps %xmm9,%xmm9 783 call _aesni_decrypt8 784 movups %xmm2,(%rsi) 785 pxor %xmm2,%xmm2 786 movups %xmm3,16(%rsi) 787 pxor %xmm3,%xmm3 788 movups %xmm4,32(%rsi) 789 pxor %xmm4,%xmm4 790 movups %xmm5,48(%rsi) 791 pxor %xmm5,%xmm5 792 movups %xmm6,64(%rsi) 793 pxor %xmm6,%xmm6 794 movups %xmm7,80(%rsi) 795 pxor %xmm7,%xmm7 796 movups %xmm8,96(%rsi) 797 pxor %xmm8,%xmm8 798 pxor %xmm9,%xmm9 799 jmp .Lecb_ret 800.align 16 801.Lecb_dec_one: 802 movups (%rcx),%xmm0 803 movups 16(%rcx),%xmm1 804 leaq 32(%rcx),%rcx 805 xorps %xmm0,%xmm2 806.Loop_dec1_4: 807.byte 102,15,56,222,209 808 decl %eax 809 movups (%rcx),%xmm1 810 leaq 16(%rcx),%rcx 811 jnz .Loop_dec1_4 812.byte 102,15,56,223,209 813 movups %xmm2,(%rsi) 814 pxor %xmm2,%xmm2 815 jmp .Lecb_ret 816.align 16 817.Lecb_dec_two: 818 call _aesni_decrypt2 819 movups %xmm2,(%rsi) 820 pxor %xmm2,%xmm2 821 movups %xmm3,16(%rsi) 822 pxor %xmm3,%xmm3 823 jmp .Lecb_ret 824.align 16 825.Lecb_dec_three: 826 call _aesni_decrypt3 827 movups %xmm2,(%rsi) 828 pxor %xmm2,%xmm2 829 movups %xmm3,16(%rsi) 830 pxor %xmm3,%xmm3 831 movups %xmm4,32(%rsi) 832 pxor %xmm4,%xmm4 833 jmp .Lecb_ret 834.align 16 835.Lecb_dec_four: 836 call _aesni_decrypt4 837 movups %xmm2,(%rsi) 838 pxor %xmm2,%xmm2 839 movups %xmm3,16(%rsi) 840 pxor %xmm3,%xmm3 841 movups %xmm4,32(%rsi) 842 pxor %xmm4,%xmm4 843 movups %xmm5,48(%rsi) 844 pxor %xmm5,%xmm5 845 jmp .Lecb_ret 846.align 16 847.Lecb_dec_five: 848 xorps %xmm7,%xmm7 849 call _aesni_decrypt6 850 movups %xmm2,(%rsi) 851 pxor %xmm2,%xmm2 852 movups %xmm3,16(%rsi) 853 pxor %xmm3,%xmm3 854 movups %xmm4,32(%rsi) 855 pxor %xmm4,%xmm4 856 movups %xmm5,48(%rsi) 857 pxor %xmm5,%xmm5 858 movups %xmm6,64(%rsi) 859 pxor %xmm6,%xmm6 860 pxor %xmm7,%xmm7 861 jmp .Lecb_ret 862.align 16 863.Lecb_dec_six: 864 call _aesni_decrypt6 865 movups %xmm2,(%rsi) 866 pxor %xmm2,%xmm2 867 movups %xmm3,16(%rsi) 868 pxor %xmm3,%xmm3 869 movups %xmm4,32(%rsi) 870 pxor %xmm4,%xmm4 871 movups %xmm5,48(%rsi) 872 pxor %xmm5,%xmm5 873 movups %xmm6,64(%rsi) 874 pxor %xmm6,%xmm6 875 movups %xmm7,80(%rsi) 876 pxor %xmm7,%xmm7 877 878.Lecb_ret: 879 xorps %xmm0,%xmm0 880 pxor %xmm1,%xmm1 881 .byte 0xf3,0xc3 882.cfi_endproc 883.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt 884.globl aes_hw_ctr32_encrypt_blocks 885.hidden aes_hw_ctr32_encrypt_blocks 886.type aes_hw_ctr32_encrypt_blocks,@function 887.align 16 888aes_hw_ctr32_encrypt_blocks: 889.cfi_startproc 890#ifndef NDEBUG 891#ifndef BORINGSSL_FIPS 892 movb $1,BORINGSSL_function_hit(%rip) 893#endif 894#endif 895 cmpq $1,%rdx 896 jne .Lctr32_bulk 897 898 899 900 movups (%r8),%xmm2 901 movups (%rdi),%xmm3 902 movl 240(%rcx),%edx 903 movups (%rcx),%xmm0 904 movups 16(%rcx),%xmm1 905 leaq 32(%rcx),%rcx 906 xorps %xmm0,%xmm2 907.Loop_enc1_5: 908.byte 102,15,56,220,209 909 decl %edx 910 movups (%rcx),%xmm1 911 leaq 16(%rcx),%rcx 912 jnz .Loop_enc1_5 913.byte 102,15,56,221,209 914 pxor %xmm0,%xmm0 915 pxor %xmm1,%xmm1 916 xorps %xmm3,%xmm2 917 pxor %xmm3,%xmm3 918 movups %xmm2,(%rsi) 919 xorps %xmm2,%xmm2 920 jmp .Lctr32_epilogue 921 922.align 16 923.Lctr32_bulk: 924 leaq (%rsp),%r11 925.cfi_def_cfa_register %r11 926 pushq %rbp 927.cfi_offset %rbp,-16 928 subq $128,%rsp 929 andq $-16,%rsp 930 931 932 933 934 movdqu (%r8),%xmm2 935 movdqu (%rcx),%xmm0 936 movl 12(%r8),%r8d 937 pxor %xmm0,%xmm2 938 movl 12(%rcx),%ebp 939 movdqa %xmm2,0(%rsp) 940 bswapl %r8d 941 movdqa %xmm2,%xmm3 942 movdqa %xmm2,%xmm4 943 movdqa %xmm2,%xmm5 944 movdqa %xmm2,64(%rsp) 945 movdqa %xmm2,80(%rsp) 946 movdqa %xmm2,96(%rsp) 947 movq %rdx,%r10 948 movdqa %xmm2,112(%rsp) 949 950 leaq 1(%r8),%rax 951 leaq 2(%r8),%rdx 952 bswapl %eax 953 bswapl %edx 954 xorl %ebp,%eax 955 xorl %ebp,%edx 956.byte 102,15,58,34,216,3 957 leaq 3(%r8),%rax 958 movdqa %xmm3,16(%rsp) 959.byte 102,15,58,34,226,3 960 bswapl %eax 961 movq %r10,%rdx 962 leaq 4(%r8),%r10 963 movdqa %xmm4,32(%rsp) 964 xorl %ebp,%eax 965 bswapl %r10d 966.byte 102,15,58,34,232,3 967 xorl %ebp,%r10d 968 movdqa %xmm5,48(%rsp) 969 leaq 5(%r8),%r9 970 movl %r10d,64+12(%rsp) 971 bswapl %r9d 972 leaq 6(%r8),%r10 973 movl 240(%rcx),%eax 974 xorl %ebp,%r9d 975 bswapl %r10d 976 movl %r9d,80+12(%rsp) 977 xorl %ebp,%r10d 978 leaq 7(%r8),%r9 979 movl %r10d,96+12(%rsp) 980 bswapl %r9d 981 leaq OPENSSL_ia32cap_P(%rip),%r10 982 movl 4(%r10),%r10d 983 xorl %ebp,%r9d 984 andl $71303168,%r10d 985 movl %r9d,112+12(%rsp) 986 987 movups 16(%rcx),%xmm1 988 989 movdqa 64(%rsp),%xmm6 990 movdqa 80(%rsp),%xmm7 991 992 cmpq $8,%rdx 993 jb .Lctr32_tail 994 995 subq $6,%rdx 996 cmpl $4194304,%r10d 997 je .Lctr32_6x 998 999 leaq 128(%rcx),%rcx 1000 subq $2,%rdx 1001 jmp .Lctr32_loop8 1002 1003.align 16 1004.Lctr32_6x: 1005 shll $4,%eax 1006 movl $48,%r10d 1007 bswapl %ebp 1008 leaq 32(%rcx,%rax,1),%rcx 1009 subq %rax,%r10 1010 jmp .Lctr32_loop6 1011 1012.align 16 1013.Lctr32_loop6: 1014 addl $6,%r8d 1015 movups -48(%rcx,%r10,1),%xmm0 1016.byte 102,15,56,220,209 1017 movl %r8d,%eax 1018 xorl %ebp,%eax 1019.byte 102,15,56,220,217 1020.byte 0x0f,0x38,0xf1,0x44,0x24,12 1021 leal 1(%r8),%eax 1022.byte 102,15,56,220,225 1023 xorl %ebp,%eax 1024.byte 0x0f,0x38,0xf1,0x44,0x24,28 1025.byte 102,15,56,220,233 1026 leal 2(%r8),%eax 1027 xorl %ebp,%eax 1028.byte 102,15,56,220,241 1029.byte 0x0f,0x38,0xf1,0x44,0x24,44 1030 leal 3(%r8),%eax 1031.byte 102,15,56,220,249 1032 movups -32(%rcx,%r10,1),%xmm1 1033 xorl %ebp,%eax 1034 1035.byte 102,15,56,220,208 1036.byte 0x0f,0x38,0xf1,0x44,0x24,60 1037 leal 4(%r8),%eax 1038.byte 102,15,56,220,216 1039 xorl %ebp,%eax 1040.byte 0x0f,0x38,0xf1,0x44,0x24,76 1041.byte 102,15,56,220,224 1042 leal 5(%r8),%eax 1043 xorl %ebp,%eax 1044.byte 102,15,56,220,232 1045.byte 0x0f,0x38,0xf1,0x44,0x24,92 1046 movq %r10,%rax 1047.byte 102,15,56,220,240 1048.byte 102,15,56,220,248 1049 movups -16(%rcx,%r10,1),%xmm0 1050 1051 call .Lenc_loop6 1052 1053 movdqu (%rdi),%xmm8 1054 movdqu 16(%rdi),%xmm9 1055 movdqu 32(%rdi),%xmm10 1056 movdqu 48(%rdi),%xmm11 1057 movdqu 64(%rdi),%xmm12 1058 movdqu 80(%rdi),%xmm13 1059 leaq 96(%rdi),%rdi 1060 movups -64(%rcx,%r10,1),%xmm1 1061 pxor %xmm2,%xmm8 1062 movaps 0(%rsp),%xmm2 1063 pxor %xmm3,%xmm9 1064 movaps 16(%rsp),%xmm3 1065 pxor %xmm4,%xmm10 1066 movaps 32(%rsp),%xmm4 1067 pxor %xmm5,%xmm11 1068 movaps 48(%rsp),%xmm5 1069 pxor %xmm6,%xmm12 1070 movaps 64(%rsp),%xmm6 1071 pxor %xmm7,%xmm13 1072 movaps 80(%rsp),%xmm7 1073 movdqu %xmm8,(%rsi) 1074 movdqu %xmm9,16(%rsi) 1075 movdqu %xmm10,32(%rsi) 1076 movdqu %xmm11,48(%rsi) 1077 movdqu %xmm12,64(%rsi) 1078 movdqu %xmm13,80(%rsi) 1079 leaq 96(%rsi),%rsi 1080 1081 subq $6,%rdx 1082 jnc .Lctr32_loop6 1083 1084 addq $6,%rdx 1085 jz .Lctr32_done 1086 1087 leal -48(%r10),%eax 1088 leaq -80(%rcx,%r10,1),%rcx 1089 negl %eax 1090 shrl $4,%eax 1091 jmp .Lctr32_tail 1092 1093.align 32 1094.Lctr32_loop8: 1095 addl $8,%r8d 1096 movdqa 96(%rsp),%xmm8 1097.byte 102,15,56,220,209 1098 movl %r8d,%r9d 1099 movdqa 112(%rsp),%xmm9 1100.byte 102,15,56,220,217 1101 bswapl %r9d 1102 movups 32-128(%rcx),%xmm0 1103.byte 102,15,56,220,225 1104 xorl %ebp,%r9d 1105 nop 1106.byte 102,15,56,220,233 1107 movl %r9d,0+12(%rsp) 1108 leaq 1(%r8),%r9 1109.byte 102,15,56,220,241 1110.byte 102,15,56,220,249 1111.byte 102,68,15,56,220,193 1112.byte 102,68,15,56,220,201 1113 movups 48-128(%rcx),%xmm1 1114 bswapl %r9d 1115.byte 102,15,56,220,208 1116.byte 102,15,56,220,216 1117 xorl %ebp,%r9d 1118.byte 0x66,0x90 1119.byte 102,15,56,220,224 1120.byte 102,15,56,220,232 1121 movl %r9d,16+12(%rsp) 1122 leaq 2(%r8),%r9 1123.byte 102,15,56,220,240 1124.byte 102,15,56,220,248 1125.byte 102,68,15,56,220,192 1126.byte 102,68,15,56,220,200 1127 movups 64-128(%rcx),%xmm0 1128 bswapl %r9d 1129.byte 102,15,56,220,209 1130.byte 102,15,56,220,217 1131 xorl %ebp,%r9d 1132.byte 0x66,0x90 1133.byte 102,15,56,220,225 1134.byte 102,15,56,220,233 1135 movl %r9d,32+12(%rsp) 1136 leaq 3(%r8),%r9 1137.byte 102,15,56,220,241 1138.byte 102,15,56,220,249 1139.byte 102,68,15,56,220,193 1140.byte 102,68,15,56,220,201 1141 movups 80-128(%rcx),%xmm1 1142 bswapl %r9d 1143.byte 102,15,56,220,208 1144.byte 102,15,56,220,216 1145 xorl %ebp,%r9d 1146.byte 0x66,0x90 1147.byte 102,15,56,220,224 1148.byte 102,15,56,220,232 1149 movl %r9d,48+12(%rsp) 1150 leaq 4(%r8),%r9 1151.byte 102,15,56,220,240 1152.byte 102,15,56,220,248 1153.byte 102,68,15,56,220,192 1154.byte 102,68,15,56,220,200 1155 movups 96-128(%rcx),%xmm0 1156 bswapl %r9d 1157.byte 102,15,56,220,209 1158.byte 102,15,56,220,217 1159 xorl %ebp,%r9d 1160.byte 0x66,0x90 1161.byte 102,15,56,220,225 1162.byte 102,15,56,220,233 1163 movl %r9d,64+12(%rsp) 1164 leaq 5(%r8),%r9 1165.byte 102,15,56,220,241 1166.byte 102,15,56,220,249 1167.byte 102,68,15,56,220,193 1168.byte 102,68,15,56,220,201 1169 movups 112-128(%rcx),%xmm1 1170 bswapl %r9d 1171.byte 102,15,56,220,208 1172.byte 102,15,56,220,216 1173 xorl %ebp,%r9d 1174.byte 0x66,0x90 1175.byte 102,15,56,220,224 1176.byte 102,15,56,220,232 1177 movl %r9d,80+12(%rsp) 1178 leaq 6(%r8),%r9 1179.byte 102,15,56,220,240 1180.byte 102,15,56,220,248 1181.byte 102,68,15,56,220,192 1182.byte 102,68,15,56,220,200 1183 movups 128-128(%rcx),%xmm0 1184 bswapl %r9d 1185.byte 102,15,56,220,209 1186.byte 102,15,56,220,217 1187 xorl %ebp,%r9d 1188.byte 0x66,0x90 1189.byte 102,15,56,220,225 1190.byte 102,15,56,220,233 1191 movl %r9d,96+12(%rsp) 1192 leaq 7(%r8),%r9 1193.byte 102,15,56,220,241 1194.byte 102,15,56,220,249 1195.byte 102,68,15,56,220,193 1196.byte 102,68,15,56,220,201 1197 movups 144-128(%rcx),%xmm1 1198 bswapl %r9d 1199.byte 102,15,56,220,208 1200.byte 102,15,56,220,216 1201.byte 102,15,56,220,224 1202 xorl %ebp,%r9d 1203 movdqu 0(%rdi),%xmm10 1204.byte 102,15,56,220,232 1205 movl %r9d,112+12(%rsp) 1206 cmpl $11,%eax 1207.byte 102,15,56,220,240 1208.byte 102,15,56,220,248 1209.byte 102,68,15,56,220,192 1210.byte 102,68,15,56,220,200 1211 movups 160-128(%rcx),%xmm0 1212 1213 jb .Lctr32_enc_done 1214 1215.byte 102,15,56,220,209 1216.byte 102,15,56,220,217 1217.byte 102,15,56,220,225 1218.byte 102,15,56,220,233 1219.byte 102,15,56,220,241 1220.byte 102,15,56,220,249 1221.byte 102,68,15,56,220,193 1222.byte 102,68,15,56,220,201 1223 movups 176-128(%rcx),%xmm1 1224 1225.byte 102,15,56,220,208 1226.byte 102,15,56,220,216 1227.byte 102,15,56,220,224 1228.byte 102,15,56,220,232 1229.byte 102,15,56,220,240 1230.byte 102,15,56,220,248 1231.byte 102,68,15,56,220,192 1232.byte 102,68,15,56,220,200 1233 movups 192-128(%rcx),%xmm0 1234 je .Lctr32_enc_done 1235 1236.byte 102,15,56,220,209 1237.byte 102,15,56,220,217 1238.byte 102,15,56,220,225 1239.byte 102,15,56,220,233 1240.byte 102,15,56,220,241 1241.byte 102,15,56,220,249 1242.byte 102,68,15,56,220,193 1243.byte 102,68,15,56,220,201 1244 movups 208-128(%rcx),%xmm1 1245 1246.byte 102,15,56,220,208 1247.byte 102,15,56,220,216 1248.byte 102,15,56,220,224 1249.byte 102,15,56,220,232 1250.byte 102,15,56,220,240 1251.byte 102,15,56,220,248 1252.byte 102,68,15,56,220,192 1253.byte 102,68,15,56,220,200 1254 movups 224-128(%rcx),%xmm0 1255 jmp .Lctr32_enc_done 1256 1257.align 16 1258.Lctr32_enc_done: 1259 movdqu 16(%rdi),%xmm11 1260 pxor %xmm0,%xmm10 1261 movdqu 32(%rdi),%xmm12 1262 pxor %xmm0,%xmm11 1263 movdqu 48(%rdi),%xmm13 1264 pxor %xmm0,%xmm12 1265 movdqu 64(%rdi),%xmm14 1266 pxor %xmm0,%xmm13 1267 movdqu 80(%rdi),%xmm15 1268 pxor %xmm0,%xmm14 1269 pxor %xmm0,%xmm15 1270.byte 102,15,56,220,209 1271.byte 102,15,56,220,217 1272.byte 102,15,56,220,225 1273.byte 102,15,56,220,233 1274.byte 102,15,56,220,241 1275.byte 102,15,56,220,249 1276.byte 102,68,15,56,220,193 1277.byte 102,68,15,56,220,201 1278 movdqu 96(%rdi),%xmm1 1279 leaq 128(%rdi),%rdi 1280 1281.byte 102,65,15,56,221,210 1282 pxor %xmm0,%xmm1 1283 movdqu 112-128(%rdi),%xmm10 1284.byte 102,65,15,56,221,219 1285 pxor %xmm0,%xmm10 1286 movdqa 0(%rsp),%xmm11 1287.byte 102,65,15,56,221,228 1288.byte 102,65,15,56,221,237 1289 movdqa 16(%rsp),%xmm12 1290 movdqa 32(%rsp),%xmm13 1291.byte 102,65,15,56,221,246 1292.byte 102,65,15,56,221,255 1293 movdqa 48(%rsp),%xmm14 1294 movdqa 64(%rsp),%xmm15 1295.byte 102,68,15,56,221,193 1296 movdqa 80(%rsp),%xmm0 1297 movups 16-128(%rcx),%xmm1 1298.byte 102,69,15,56,221,202 1299 1300 movups %xmm2,(%rsi) 1301 movdqa %xmm11,%xmm2 1302 movups %xmm3,16(%rsi) 1303 movdqa %xmm12,%xmm3 1304 movups %xmm4,32(%rsi) 1305 movdqa %xmm13,%xmm4 1306 movups %xmm5,48(%rsi) 1307 movdqa %xmm14,%xmm5 1308 movups %xmm6,64(%rsi) 1309 movdqa %xmm15,%xmm6 1310 movups %xmm7,80(%rsi) 1311 movdqa %xmm0,%xmm7 1312 movups %xmm8,96(%rsi) 1313 movups %xmm9,112(%rsi) 1314 leaq 128(%rsi),%rsi 1315 1316 subq $8,%rdx 1317 jnc .Lctr32_loop8 1318 1319 addq $8,%rdx 1320 jz .Lctr32_done 1321 leaq -128(%rcx),%rcx 1322 1323.Lctr32_tail: 1324 1325 1326 leaq 16(%rcx),%rcx 1327 cmpq $4,%rdx 1328 jb .Lctr32_loop3 1329 je .Lctr32_loop4 1330 1331 1332 shll $4,%eax 1333 movdqa 96(%rsp),%xmm8 1334 pxor %xmm9,%xmm9 1335 1336 movups 16(%rcx),%xmm0 1337.byte 102,15,56,220,209 1338.byte 102,15,56,220,217 1339 leaq 32-16(%rcx,%rax,1),%rcx 1340 negq %rax 1341.byte 102,15,56,220,225 1342 addq $16,%rax 1343 movups (%rdi),%xmm10 1344.byte 102,15,56,220,233 1345.byte 102,15,56,220,241 1346 movups 16(%rdi),%xmm11 1347 movups 32(%rdi),%xmm12 1348.byte 102,15,56,220,249 1349.byte 102,68,15,56,220,193 1350 1351 call .Lenc_loop8_enter 1352 1353 movdqu 48(%rdi),%xmm13 1354 pxor %xmm10,%xmm2 1355 movdqu 64(%rdi),%xmm10 1356 pxor %xmm11,%xmm3 1357 movdqu %xmm2,(%rsi) 1358 pxor %xmm12,%xmm4 1359 movdqu %xmm3,16(%rsi) 1360 pxor %xmm13,%xmm5 1361 movdqu %xmm4,32(%rsi) 1362 pxor %xmm10,%xmm6 1363 movdqu %xmm5,48(%rsi) 1364 movdqu %xmm6,64(%rsi) 1365 cmpq $6,%rdx 1366 jb .Lctr32_done 1367 1368 movups 80(%rdi),%xmm11 1369 xorps %xmm11,%xmm7 1370 movups %xmm7,80(%rsi) 1371 je .Lctr32_done 1372 1373 movups 96(%rdi),%xmm12 1374 xorps %xmm12,%xmm8 1375 movups %xmm8,96(%rsi) 1376 jmp .Lctr32_done 1377 1378.align 32 1379.Lctr32_loop4: 1380.byte 102,15,56,220,209 1381 leaq 16(%rcx),%rcx 1382 decl %eax 1383.byte 102,15,56,220,217 1384.byte 102,15,56,220,225 1385.byte 102,15,56,220,233 1386 movups (%rcx),%xmm1 1387 jnz .Lctr32_loop4 1388.byte 102,15,56,221,209 1389.byte 102,15,56,221,217 1390 movups (%rdi),%xmm10 1391 movups 16(%rdi),%xmm11 1392.byte 102,15,56,221,225 1393.byte 102,15,56,221,233 1394 movups 32(%rdi),%xmm12 1395 movups 48(%rdi),%xmm13 1396 1397 xorps %xmm10,%xmm2 1398 movups %xmm2,(%rsi) 1399 xorps %xmm11,%xmm3 1400 movups %xmm3,16(%rsi) 1401 pxor %xmm12,%xmm4 1402 movdqu %xmm4,32(%rsi) 1403 pxor %xmm13,%xmm5 1404 movdqu %xmm5,48(%rsi) 1405 jmp .Lctr32_done 1406 1407.align 32 1408.Lctr32_loop3: 1409.byte 102,15,56,220,209 1410 leaq 16(%rcx),%rcx 1411 decl %eax 1412.byte 102,15,56,220,217 1413.byte 102,15,56,220,225 1414 movups (%rcx),%xmm1 1415 jnz .Lctr32_loop3 1416.byte 102,15,56,221,209 1417.byte 102,15,56,221,217 1418.byte 102,15,56,221,225 1419 1420 movups (%rdi),%xmm10 1421 xorps %xmm10,%xmm2 1422 movups %xmm2,(%rsi) 1423 cmpq $2,%rdx 1424 jb .Lctr32_done 1425 1426 movups 16(%rdi),%xmm11 1427 xorps %xmm11,%xmm3 1428 movups %xmm3,16(%rsi) 1429 je .Lctr32_done 1430 1431 movups 32(%rdi),%xmm12 1432 xorps %xmm12,%xmm4 1433 movups %xmm4,32(%rsi) 1434 1435.Lctr32_done: 1436 xorps %xmm0,%xmm0 1437 xorl %ebp,%ebp 1438 pxor %xmm1,%xmm1 1439 pxor %xmm2,%xmm2 1440 pxor %xmm3,%xmm3 1441 pxor %xmm4,%xmm4 1442 pxor %xmm5,%xmm5 1443 pxor %xmm6,%xmm6 1444 pxor %xmm7,%xmm7 1445 movaps %xmm0,0(%rsp) 1446 pxor %xmm8,%xmm8 1447 movaps %xmm0,16(%rsp) 1448 pxor %xmm9,%xmm9 1449 movaps %xmm0,32(%rsp) 1450 pxor %xmm10,%xmm10 1451 movaps %xmm0,48(%rsp) 1452 pxor %xmm11,%xmm11 1453 movaps %xmm0,64(%rsp) 1454 pxor %xmm12,%xmm12 1455 movaps %xmm0,80(%rsp) 1456 pxor %xmm13,%xmm13 1457 movaps %xmm0,96(%rsp) 1458 pxor %xmm14,%xmm14 1459 movaps %xmm0,112(%rsp) 1460 pxor %xmm15,%xmm15 1461 movq -8(%r11),%rbp 1462.cfi_restore %rbp 1463 leaq (%r11),%rsp 1464.cfi_def_cfa_register %rsp 1465.Lctr32_epilogue: 1466 .byte 0xf3,0xc3 1467.cfi_endproc 1468.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 1469.globl aes_hw_cbc_encrypt 1470.hidden aes_hw_cbc_encrypt 1471.type aes_hw_cbc_encrypt,@function 1472.align 16 1473aes_hw_cbc_encrypt: 1474.cfi_startproc 1475 testq %rdx,%rdx 1476 jz .Lcbc_ret 1477 1478 movl 240(%rcx),%r10d 1479 movq %rcx,%r11 1480 testl %r9d,%r9d 1481 jz .Lcbc_decrypt 1482 1483 movups (%r8),%xmm2 1484 movl %r10d,%eax 1485 cmpq $16,%rdx 1486 jb .Lcbc_enc_tail 1487 subq $16,%rdx 1488 jmp .Lcbc_enc_loop 1489.align 16 1490.Lcbc_enc_loop: 1491 movups (%rdi),%xmm3 1492 leaq 16(%rdi),%rdi 1493 1494 movups (%rcx),%xmm0 1495 movups 16(%rcx),%xmm1 1496 xorps %xmm0,%xmm3 1497 leaq 32(%rcx),%rcx 1498 xorps %xmm3,%xmm2 1499.Loop_enc1_6: 1500.byte 102,15,56,220,209 1501 decl %eax 1502 movups (%rcx),%xmm1 1503 leaq 16(%rcx),%rcx 1504 jnz .Loop_enc1_6 1505.byte 102,15,56,221,209 1506 movl %r10d,%eax 1507 movq %r11,%rcx 1508 movups %xmm2,0(%rsi) 1509 leaq 16(%rsi),%rsi 1510 subq $16,%rdx 1511 jnc .Lcbc_enc_loop 1512 addq $16,%rdx 1513 jnz .Lcbc_enc_tail 1514 pxor %xmm0,%xmm0 1515 pxor %xmm1,%xmm1 1516 movups %xmm2,(%r8) 1517 pxor %xmm2,%xmm2 1518 pxor %xmm3,%xmm3 1519 jmp .Lcbc_ret 1520 1521.Lcbc_enc_tail: 1522 movq %rdx,%rcx 1523 xchgq %rdi,%rsi 1524.long 0x9066A4F3 1525 movl $16,%ecx 1526 subq %rdx,%rcx 1527 xorl %eax,%eax 1528.long 0x9066AAF3 1529 leaq -16(%rdi),%rdi 1530 movl %r10d,%eax 1531 movq %rdi,%rsi 1532 movq %r11,%rcx 1533 xorq %rdx,%rdx 1534 jmp .Lcbc_enc_loop 1535 1536.align 16 1537.Lcbc_decrypt: 1538 cmpq $16,%rdx 1539 jne .Lcbc_decrypt_bulk 1540 1541 1542 1543 movdqu (%rdi),%xmm2 1544 movdqu (%r8),%xmm3 1545 movdqa %xmm2,%xmm4 1546 movups (%rcx),%xmm0 1547 movups 16(%rcx),%xmm1 1548 leaq 32(%rcx),%rcx 1549 xorps %xmm0,%xmm2 1550.Loop_dec1_7: 1551.byte 102,15,56,222,209 1552 decl %r10d 1553 movups (%rcx),%xmm1 1554 leaq 16(%rcx),%rcx 1555 jnz .Loop_dec1_7 1556.byte 102,15,56,223,209 1557 pxor %xmm0,%xmm0 1558 pxor %xmm1,%xmm1 1559 movdqu %xmm4,(%r8) 1560 xorps %xmm3,%xmm2 1561 pxor %xmm3,%xmm3 1562 movups %xmm2,(%rsi) 1563 pxor %xmm2,%xmm2 1564 jmp .Lcbc_ret 1565.align 16 1566.Lcbc_decrypt_bulk: 1567 leaq (%rsp),%r11 1568.cfi_def_cfa_register %r11 1569 pushq %rbp 1570.cfi_offset %rbp,-16 1571 subq $16,%rsp 1572 andq $-16,%rsp 1573 movq %rcx,%rbp 1574 movups (%r8),%xmm10 1575 movl %r10d,%eax 1576 cmpq $0x50,%rdx 1577 jbe .Lcbc_dec_tail 1578 1579 movups (%rcx),%xmm0 1580 movdqu 0(%rdi),%xmm2 1581 movdqu 16(%rdi),%xmm3 1582 movdqa %xmm2,%xmm11 1583 movdqu 32(%rdi),%xmm4 1584 movdqa %xmm3,%xmm12 1585 movdqu 48(%rdi),%xmm5 1586 movdqa %xmm4,%xmm13 1587 movdqu 64(%rdi),%xmm6 1588 movdqa %xmm5,%xmm14 1589 movdqu 80(%rdi),%xmm7 1590 movdqa %xmm6,%xmm15 1591 leaq OPENSSL_ia32cap_P(%rip),%r9 1592 movl 4(%r9),%r9d 1593 cmpq $0x70,%rdx 1594 jbe .Lcbc_dec_six_or_seven 1595 1596 andl $71303168,%r9d 1597 subq $0x50,%rdx 1598 cmpl $4194304,%r9d 1599 je .Lcbc_dec_loop6_enter 1600 subq $0x20,%rdx 1601 leaq 112(%rcx),%rcx 1602 jmp .Lcbc_dec_loop8_enter 1603.align 16 1604.Lcbc_dec_loop8: 1605 movups %xmm9,(%rsi) 1606 leaq 16(%rsi),%rsi 1607.Lcbc_dec_loop8_enter: 1608 movdqu 96(%rdi),%xmm8 1609 pxor %xmm0,%xmm2 1610 movdqu 112(%rdi),%xmm9 1611 pxor %xmm0,%xmm3 1612 movups 16-112(%rcx),%xmm1 1613 pxor %xmm0,%xmm4 1614 movq $-1,%rbp 1615 cmpq $0x70,%rdx 1616 pxor %xmm0,%xmm5 1617 pxor %xmm0,%xmm6 1618 pxor %xmm0,%xmm7 1619 pxor %xmm0,%xmm8 1620 1621.byte 102,15,56,222,209 1622 pxor %xmm0,%xmm9 1623 movups 32-112(%rcx),%xmm0 1624.byte 102,15,56,222,217 1625.byte 102,15,56,222,225 1626.byte 102,15,56,222,233 1627.byte 102,15,56,222,241 1628.byte 102,15,56,222,249 1629.byte 102,68,15,56,222,193 1630 adcq $0,%rbp 1631 andq $128,%rbp 1632.byte 102,68,15,56,222,201 1633 addq %rdi,%rbp 1634 movups 48-112(%rcx),%xmm1 1635.byte 102,15,56,222,208 1636.byte 102,15,56,222,216 1637.byte 102,15,56,222,224 1638.byte 102,15,56,222,232 1639.byte 102,15,56,222,240 1640.byte 102,15,56,222,248 1641.byte 102,68,15,56,222,192 1642.byte 102,68,15,56,222,200 1643 movups 64-112(%rcx),%xmm0 1644 nop 1645.byte 102,15,56,222,209 1646.byte 102,15,56,222,217 1647.byte 102,15,56,222,225 1648.byte 102,15,56,222,233 1649.byte 102,15,56,222,241 1650.byte 102,15,56,222,249 1651.byte 102,68,15,56,222,193 1652.byte 102,68,15,56,222,201 1653 movups 80-112(%rcx),%xmm1 1654 nop 1655.byte 102,15,56,222,208 1656.byte 102,15,56,222,216 1657.byte 102,15,56,222,224 1658.byte 102,15,56,222,232 1659.byte 102,15,56,222,240 1660.byte 102,15,56,222,248 1661.byte 102,68,15,56,222,192 1662.byte 102,68,15,56,222,200 1663 movups 96-112(%rcx),%xmm0 1664 nop 1665.byte 102,15,56,222,209 1666.byte 102,15,56,222,217 1667.byte 102,15,56,222,225 1668.byte 102,15,56,222,233 1669.byte 102,15,56,222,241 1670.byte 102,15,56,222,249 1671.byte 102,68,15,56,222,193 1672.byte 102,68,15,56,222,201 1673 movups 112-112(%rcx),%xmm1 1674 nop 1675.byte 102,15,56,222,208 1676.byte 102,15,56,222,216 1677.byte 102,15,56,222,224 1678.byte 102,15,56,222,232 1679.byte 102,15,56,222,240 1680.byte 102,15,56,222,248 1681.byte 102,68,15,56,222,192 1682.byte 102,68,15,56,222,200 1683 movups 128-112(%rcx),%xmm0 1684 nop 1685.byte 102,15,56,222,209 1686.byte 102,15,56,222,217 1687.byte 102,15,56,222,225 1688.byte 102,15,56,222,233 1689.byte 102,15,56,222,241 1690.byte 102,15,56,222,249 1691.byte 102,68,15,56,222,193 1692.byte 102,68,15,56,222,201 1693 movups 144-112(%rcx),%xmm1 1694 cmpl $11,%eax 1695.byte 102,15,56,222,208 1696.byte 102,15,56,222,216 1697.byte 102,15,56,222,224 1698.byte 102,15,56,222,232 1699.byte 102,15,56,222,240 1700.byte 102,15,56,222,248 1701.byte 102,68,15,56,222,192 1702.byte 102,68,15,56,222,200 1703 movups 160-112(%rcx),%xmm0 1704 jb .Lcbc_dec_done 1705.byte 102,15,56,222,209 1706.byte 102,15,56,222,217 1707.byte 102,15,56,222,225 1708.byte 102,15,56,222,233 1709.byte 102,15,56,222,241 1710.byte 102,15,56,222,249 1711.byte 102,68,15,56,222,193 1712.byte 102,68,15,56,222,201 1713 movups 176-112(%rcx),%xmm1 1714 nop 1715.byte 102,15,56,222,208 1716.byte 102,15,56,222,216 1717.byte 102,15,56,222,224 1718.byte 102,15,56,222,232 1719.byte 102,15,56,222,240 1720.byte 102,15,56,222,248 1721.byte 102,68,15,56,222,192 1722.byte 102,68,15,56,222,200 1723 movups 192-112(%rcx),%xmm0 1724 je .Lcbc_dec_done 1725.byte 102,15,56,222,209 1726.byte 102,15,56,222,217 1727.byte 102,15,56,222,225 1728.byte 102,15,56,222,233 1729.byte 102,15,56,222,241 1730.byte 102,15,56,222,249 1731.byte 102,68,15,56,222,193 1732.byte 102,68,15,56,222,201 1733 movups 208-112(%rcx),%xmm1 1734 nop 1735.byte 102,15,56,222,208 1736.byte 102,15,56,222,216 1737.byte 102,15,56,222,224 1738.byte 102,15,56,222,232 1739.byte 102,15,56,222,240 1740.byte 102,15,56,222,248 1741.byte 102,68,15,56,222,192 1742.byte 102,68,15,56,222,200 1743 movups 224-112(%rcx),%xmm0 1744 jmp .Lcbc_dec_done 1745.align 16 1746.Lcbc_dec_done: 1747.byte 102,15,56,222,209 1748.byte 102,15,56,222,217 1749 pxor %xmm0,%xmm10 1750 pxor %xmm0,%xmm11 1751.byte 102,15,56,222,225 1752.byte 102,15,56,222,233 1753 pxor %xmm0,%xmm12 1754 pxor %xmm0,%xmm13 1755.byte 102,15,56,222,241 1756.byte 102,15,56,222,249 1757 pxor %xmm0,%xmm14 1758 pxor %xmm0,%xmm15 1759.byte 102,68,15,56,222,193 1760.byte 102,68,15,56,222,201 1761 movdqu 80(%rdi),%xmm1 1762 1763.byte 102,65,15,56,223,210 1764 movdqu 96(%rdi),%xmm10 1765 pxor %xmm0,%xmm1 1766.byte 102,65,15,56,223,219 1767 pxor %xmm0,%xmm10 1768 movdqu 112(%rdi),%xmm0 1769.byte 102,65,15,56,223,228 1770 leaq 128(%rdi),%rdi 1771 movdqu 0(%rbp),%xmm11 1772.byte 102,65,15,56,223,237 1773.byte 102,65,15,56,223,246 1774 movdqu 16(%rbp),%xmm12 1775 movdqu 32(%rbp),%xmm13 1776.byte 102,65,15,56,223,255 1777.byte 102,68,15,56,223,193 1778 movdqu 48(%rbp),%xmm14 1779 movdqu 64(%rbp),%xmm15 1780.byte 102,69,15,56,223,202 1781 movdqa %xmm0,%xmm10 1782 movdqu 80(%rbp),%xmm1 1783 movups -112(%rcx),%xmm0 1784 1785 movups %xmm2,(%rsi) 1786 movdqa %xmm11,%xmm2 1787 movups %xmm3,16(%rsi) 1788 movdqa %xmm12,%xmm3 1789 movups %xmm4,32(%rsi) 1790 movdqa %xmm13,%xmm4 1791 movups %xmm5,48(%rsi) 1792 movdqa %xmm14,%xmm5 1793 movups %xmm6,64(%rsi) 1794 movdqa %xmm15,%xmm6 1795 movups %xmm7,80(%rsi) 1796 movdqa %xmm1,%xmm7 1797 movups %xmm8,96(%rsi) 1798 leaq 112(%rsi),%rsi 1799 1800 subq $0x80,%rdx 1801 ja .Lcbc_dec_loop8 1802 1803 movaps %xmm9,%xmm2 1804 leaq -112(%rcx),%rcx 1805 addq $0x70,%rdx 1806 jle .Lcbc_dec_clear_tail_collected 1807 movups %xmm9,(%rsi) 1808 leaq 16(%rsi),%rsi 1809 cmpq $0x50,%rdx 1810 jbe .Lcbc_dec_tail 1811 1812 movaps %xmm11,%xmm2 1813.Lcbc_dec_six_or_seven: 1814 cmpq $0x60,%rdx 1815 ja .Lcbc_dec_seven 1816 1817 movaps %xmm7,%xmm8 1818 call _aesni_decrypt6 1819 pxor %xmm10,%xmm2 1820 movaps %xmm8,%xmm10 1821 pxor %xmm11,%xmm3 1822 movdqu %xmm2,(%rsi) 1823 pxor %xmm12,%xmm4 1824 movdqu %xmm3,16(%rsi) 1825 pxor %xmm3,%xmm3 1826 pxor %xmm13,%xmm5 1827 movdqu %xmm4,32(%rsi) 1828 pxor %xmm4,%xmm4 1829 pxor %xmm14,%xmm6 1830 movdqu %xmm5,48(%rsi) 1831 pxor %xmm5,%xmm5 1832 pxor %xmm15,%xmm7 1833 movdqu %xmm6,64(%rsi) 1834 pxor %xmm6,%xmm6 1835 leaq 80(%rsi),%rsi 1836 movdqa %xmm7,%xmm2 1837 pxor %xmm7,%xmm7 1838 jmp .Lcbc_dec_tail_collected 1839 1840.align 16 1841.Lcbc_dec_seven: 1842 movups 96(%rdi),%xmm8 1843 xorps %xmm9,%xmm9 1844 call _aesni_decrypt8 1845 movups 80(%rdi),%xmm9 1846 pxor %xmm10,%xmm2 1847 movups 96(%rdi),%xmm10 1848 pxor %xmm11,%xmm3 1849 movdqu %xmm2,(%rsi) 1850 pxor %xmm12,%xmm4 1851 movdqu %xmm3,16(%rsi) 1852 pxor %xmm3,%xmm3 1853 pxor %xmm13,%xmm5 1854 movdqu %xmm4,32(%rsi) 1855 pxor %xmm4,%xmm4 1856 pxor %xmm14,%xmm6 1857 movdqu %xmm5,48(%rsi) 1858 pxor %xmm5,%xmm5 1859 pxor %xmm15,%xmm7 1860 movdqu %xmm6,64(%rsi) 1861 pxor %xmm6,%xmm6 1862 pxor %xmm9,%xmm8 1863 movdqu %xmm7,80(%rsi) 1864 pxor %xmm7,%xmm7 1865 leaq 96(%rsi),%rsi 1866 movdqa %xmm8,%xmm2 1867 pxor %xmm8,%xmm8 1868 pxor %xmm9,%xmm9 1869 jmp .Lcbc_dec_tail_collected 1870 1871.align 16 1872.Lcbc_dec_loop6: 1873 movups %xmm7,(%rsi) 1874 leaq 16(%rsi),%rsi 1875 movdqu 0(%rdi),%xmm2 1876 movdqu 16(%rdi),%xmm3 1877 movdqa %xmm2,%xmm11 1878 movdqu 32(%rdi),%xmm4 1879 movdqa %xmm3,%xmm12 1880 movdqu 48(%rdi),%xmm5 1881 movdqa %xmm4,%xmm13 1882 movdqu 64(%rdi),%xmm6 1883 movdqa %xmm5,%xmm14 1884 movdqu 80(%rdi),%xmm7 1885 movdqa %xmm6,%xmm15 1886.Lcbc_dec_loop6_enter: 1887 leaq 96(%rdi),%rdi 1888 movdqa %xmm7,%xmm8 1889 1890 call _aesni_decrypt6 1891 1892 pxor %xmm10,%xmm2 1893 movdqa %xmm8,%xmm10 1894 pxor %xmm11,%xmm3 1895 movdqu %xmm2,(%rsi) 1896 pxor %xmm12,%xmm4 1897 movdqu %xmm3,16(%rsi) 1898 pxor %xmm13,%xmm5 1899 movdqu %xmm4,32(%rsi) 1900 pxor %xmm14,%xmm6 1901 movq %rbp,%rcx 1902 movdqu %xmm5,48(%rsi) 1903 pxor %xmm15,%xmm7 1904 movl %r10d,%eax 1905 movdqu %xmm6,64(%rsi) 1906 leaq 80(%rsi),%rsi 1907 subq $0x60,%rdx 1908 ja .Lcbc_dec_loop6 1909 1910 movdqa %xmm7,%xmm2 1911 addq $0x50,%rdx 1912 jle .Lcbc_dec_clear_tail_collected 1913 movups %xmm7,(%rsi) 1914 leaq 16(%rsi),%rsi 1915 1916.Lcbc_dec_tail: 1917 movups (%rdi),%xmm2 1918 subq $0x10,%rdx 1919 jbe .Lcbc_dec_one 1920 1921 movups 16(%rdi),%xmm3 1922 movaps %xmm2,%xmm11 1923 subq $0x10,%rdx 1924 jbe .Lcbc_dec_two 1925 1926 movups 32(%rdi),%xmm4 1927 movaps %xmm3,%xmm12 1928 subq $0x10,%rdx 1929 jbe .Lcbc_dec_three 1930 1931 movups 48(%rdi),%xmm5 1932 movaps %xmm4,%xmm13 1933 subq $0x10,%rdx 1934 jbe .Lcbc_dec_four 1935 1936 movups 64(%rdi),%xmm6 1937 movaps %xmm5,%xmm14 1938 movaps %xmm6,%xmm15 1939 xorps %xmm7,%xmm7 1940 call _aesni_decrypt6 1941 pxor %xmm10,%xmm2 1942 movaps %xmm15,%xmm10 1943 pxor %xmm11,%xmm3 1944 movdqu %xmm2,(%rsi) 1945 pxor %xmm12,%xmm4 1946 movdqu %xmm3,16(%rsi) 1947 pxor %xmm3,%xmm3 1948 pxor %xmm13,%xmm5 1949 movdqu %xmm4,32(%rsi) 1950 pxor %xmm4,%xmm4 1951 pxor %xmm14,%xmm6 1952 movdqu %xmm5,48(%rsi) 1953 pxor %xmm5,%xmm5 1954 leaq 64(%rsi),%rsi 1955 movdqa %xmm6,%xmm2 1956 pxor %xmm6,%xmm6 1957 pxor %xmm7,%xmm7 1958 subq $0x10,%rdx 1959 jmp .Lcbc_dec_tail_collected 1960 1961.align 16 1962.Lcbc_dec_one: 1963 movaps %xmm2,%xmm11 1964 movups (%rcx),%xmm0 1965 movups 16(%rcx),%xmm1 1966 leaq 32(%rcx),%rcx 1967 xorps %xmm0,%xmm2 1968.Loop_dec1_8: 1969.byte 102,15,56,222,209 1970 decl %eax 1971 movups (%rcx),%xmm1 1972 leaq 16(%rcx),%rcx 1973 jnz .Loop_dec1_8 1974.byte 102,15,56,223,209 1975 xorps %xmm10,%xmm2 1976 movaps %xmm11,%xmm10 1977 jmp .Lcbc_dec_tail_collected 1978.align 16 1979.Lcbc_dec_two: 1980 movaps %xmm3,%xmm12 1981 call _aesni_decrypt2 1982 pxor %xmm10,%xmm2 1983 movaps %xmm12,%xmm10 1984 pxor %xmm11,%xmm3 1985 movdqu %xmm2,(%rsi) 1986 movdqa %xmm3,%xmm2 1987 pxor %xmm3,%xmm3 1988 leaq 16(%rsi),%rsi 1989 jmp .Lcbc_dec_tail_collected 1990.align 16 1991.Lcbc_dec_three: 1992 movaps %xmm4,%xmm13 1993 call _aesni_decrypt3 1994 pxor %xmm10,%xmm2 1995 movaps %xmm13,%xmm10 1996 pxor %xmm11,%xmm3 1997 movdqu %xmm2,(%rsi) 1998 pxor %xmm12,%xmm4 1999 movdqu %xmm3,16(%rsi) 2000 pxor %xmm3,%xmm3 2001 movdqa %xmm4,%xmm2 2002 pxor %xmm4,%xmm4 2003 leaq 32(%rsi),%rsi 2004 jmp .Lcbc_dec_tail_collected 2005.align 16 2006.Lcbc_dec_four: 2007 movaps %xmm5,%xmm14 2008 call _aesni_decrypt4 2009 pxor %xmm10,%xmm2 2010 movaps %xmm14,%xmm10 2011 pxor %xmm11,%xmm3 2012 movdqu %xmm2,(%rsi) 2013 pxor %xmm12,%xmm4 2014 movdqu %xmm3,16(%rsi) 2015 pxor %xmm3,%xmm3 2016 pxor %xmm13,%xmm5 2017 movdqu %xmm4,32(%rsi) 2018 pxor %xmm4,%xmm4 2019 movdqa %xmm5,%xmm2 2020 pxor %xmm5,%xmm5 2021 leaq 48(%rsi),%rsi 2022 jmp .Lcbc_dec_tail_collected 2023 2024.align 16 2025.Lcbc_dec_clear_tail_collected: 2026 pxor %xmm3,%xmm3 2027 pxor %xmm4,%xmm4 2028 pxor %xmm5,%xmm5 2029 pxor %xmm6,%xmm6 2030 pxor %xmm7,%xmm7 2031 pxor %xmm8,%xmm8 2032 pxor %xmm9,%xmm9 2033.Lcbc_dec_tail_collected: 2034 movups %xmm10,(%r8) 2035 andq $15,%rdx 2036 jnz .Lcbc_dec_tail_partial 2037 movups %xmm2,(%rsi) 2038 pxor %xmm2,%xmm2 2039 jmp .Lcbc_dec_ret 2040.align 16 2041.Lcbc_dec_tail_partial: 2042 movaps %xmm2,(%rsp) 2043 pxor %xmm2,%xmm2 2044 movq $16,%rcx 2045 movq %rsi,%rdi 2046 subq %rdx,%rcx 2047 leaq (%rsp),%rsi 2048.long 0x9066A4F3 2049 movdqa %xmm2,(%rsp) 2050 2051.Lcbc_dec_ret: 2052 xorps %xmm0,%xmm0 2053 pxor %xmm1,%xmm1 2054 movq -8(%r11),%rbp 2055.cfi_restore %rbp 2056 leaq (%r11),%rsp 2057.cfi_def_cfa_register %rsp 2058.Lcbc_ret: 2059 .byte 0xf3,0xc3 2060.cfi_endproc 2061.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 2062.globl aes_hw_set_decrypt_key 2063.hidden aes_hw_set_decrypt_key 2064.type aes_hw_set_decrypt_key,@function 2065.align 16 2066aes_hw_set_decrypt_key: 2067.cfi_startproc 2068.byte 0x48,0x83,0xEC,0x08 2069.cfi_adjust_cfa_offset 8 2070 call __aesni_set_encrypt_key 2071 shll $4,%esi 2072 testl %eax,%eax 2073 jnz .Ldec_key_ret 2074 leaq 16(%rdx,%rsi,1),%rdi 2075 2076 movups (%rdx),%xmm0 2077 movups (%rdi),%xmm1 2078 movups %xmm0,(%rdi) 2079 movups %xmm1,(%rdx) 2080 leaq 16(%rdx),%rdx 2081 leaq -16(%rdi),%rdi 2082 2083.Ldec_key_inverse: 2084 movups (%rdx),%xmm0 2085 movups (%rdi),%xmm1 2086.byte 102,15,56,219,192 2087.byte 102,15,56,219,201 2088 leaq 16(%rdx),%rdx 2089 leaq -16(%rdi),%rdi 2090 movups %xmm0,16(%rdi) 2091 movups %xmm1,-16(%rdx) 2092 cmpq %rdx,%rdi 2093 ja .Ldec_key_inverse 2094 2095 movups (%rdx),%xmm0 2096.byte 102,15,56,219,192 2097 pxor %xmm1,%xmm1 2098 movups %xmm0,(%rdi) 2099 pxor %xmm0,%xmm0 2100.Ldec_key_ret: 2101 addq $8,%rsp 2102.cfi_adjust_cfa_offset -8 2103 .byte 0xf3,0xc3 2104.cfi_endproc 2105.LSEH_end_set_decrypt_key: 2106.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 2107.globl aes_hw_set_encrypt_key 2108.hidden aes_hw_set_encrypt_key 2109.type aes_hw_set_encrypt_key,@function 2110.align 16 2111aes_hw_set_encrypt_key: 2112__aesni_set_encrypt_key: 2113.cfi_startproc 2114#ifndef NDEBUG 2115#ifndef BORINGSSL_FIPS 2116 movb $1,BORINGSSL_function_hit+3(%rip) 2117#endif 2118#endif 2119.byte 0x48,0x83,0xEC,0x08 2120.cfi_adjust_cfa_offset 8 2121 movq $-1,%rax 2122 testq %rdi,%rdi 2123 jz .Lenc_key_ret 2124 testq %rdx,%rdx 2125 jz .Lenc_key_ret 2126 2127 movups (%rdi),%xmm0 2128 xorps %xmm4,%xmm4 2129 leaq OPENSSL_ia32cap_P(%rip),%r10 2130 movl 4(%r10),%r10d 2131 andl $268437504,%r10d 2132 leaq 16(%rdx),%rax 2133 cmpl $256,%esi 2134 je .L14rounds 2135 cmpl $192,%esi 2136 je .L12rounds 2137 cmpl $128,%esi 2138 jne .Lbad_keybits 2139 2140.L10rounds: 2141 movl $9,%esi 2142 cmpl $268435456,%r10d 2143 je .L10rounds_alt 2144 2145 movups %xmm0,(%rdx) 2146.byte 102,15,58,223,200,1 2147 call .Lkey_expansion_128_cold 2148.byte 102,15,58,223,200,2 2149 call .Lkey_expansion_128 2150.byte 102,15,58,223,200,4 2151 call .Lkey_expansion_128 2152.byte 102,15,58,223,200,8 2153 call .Lkey_expansion_128 2154.byte 102,15,58,223,200,16 2155 call .Lkey_expansion_128 2156.byte 102,15,58,223,200,32 2157 call .Lkey_expansion_128 2158.byte 102,15,58,223,200,64 2159 call .Lkey_expansion_128 2160.byte 102,15,58,223,200,128 2161 call .Lkey_expansion_128 2162.byte 102,15,58,223,200,27 2163 call .Lkey_expansion_128 2164.byte 102,15,58,223,200,54 2165 call .Lkey_expansion_128 2166 movups %xmm0,(%rax) 2167 movl %esi,80(%rax) 2168 xorl %eax,%eax 2169 jmp .Lenc_key_ret 2170 2171.align 16 2172.L10rounds_alt: 2173 movdqa .Lkey_rotate(%rip),%xmm5 2174 movl $8,%r10d 2175 movdqa .Lkey_rcon1(%rip),%xmm4 2176 movdqa %xmm0,%xmm2 2177 movdqu %xmm0,(%rdx) 2178 jmp .Loop_key128 2179 2180.align 16 2181.Loop_key128: 2182.byte 102,15,56,0,197 2183.byte 102,15,56,221,196 2184 pslld $1,%xmm4 2185 leaq 16(%rax),%rax 2186 2187 movdqa %xmm2,%xmm3 2188 pslldq $4,%xmm2 2189 pxor %xmm2,%xmm3 2190 pslldq $4,%xmm2 2191 pxor %xmm2,%xmm3 2192 pslldq $4,%xmm2 2193 pxor %xmm3,%xmm2 2194 2195 pxor %xmm2,%xmm0 2196 movdqu %xmm0,-16(%rax) 2197 movdqa %xmm0,%xmm2 2198 2199 decl %r10d 2200 jnz .Loop_key128 2201 2202 movdqa .Lkey_rcon1b(%rip),%xmm4 2203 2204.byte 102,15,56,0,197 2205.byte 102,15,56,221,196 2206 pslld $1,%xmm4 2207 2208 movdqa %xmm2,%xmm3 2209 pslldq $4,%xmm2 2210 pxor %xmm2,%xmm3 2211 pslldq $4,%xmm2 2212 pxor %xmm2,%xmm3 2213 pslldq $4,%xmm2 2214 pxor %xmm3,%xmm2 2215 2216 pxor %xmm2,%xmm0 2217 movdqu %xmm0,(%rax) 2218 2219 movdqa %xmm0,%xmm2 2220.byte 102,15,56,0,197 2221.byte 102,15,56,221,196 2222 2223 movdqa %xmm2,%xmm3 2224 pslldq $4,%xmm2 2225 pxor %xmm2,%xmm3 2226 pslldq $4,%xmm2 2227 pxor %xmm2,%xmm3 2228 pslldq $4,%xmm2 2229 pxor %xmm3,%xmm2 2230 2231 pxor %xmm2,%xmm0 2232 movdqu %xmm0,16(%rax) 2233 2234 movl %esi,96(%rax) 2235 xorl %eax,%eax 2236 jmp .Lenc_key_ret 2237 2238.align 16 2239.L12rounds: 2240 movq 16(%rdi),%xmm2 2241 movl $11,%esi 2242 cmpl $268435456,%r10d 2243 je .L12rounds_alt 2244 2245 movups %xmm0,(%rdx) 2246.byte 102,15,58,223,202,1 2247 call .Lkey_expansion_192a_cold 2248.byte 102,15,58,223,202,2 2249 call .Lkey_expansion_192b 2250.byte 102,15,58,223,202,4 2251 call .Lkey_expansion_192a 2252.byte 102,15,58,223,202,8 2253 call .Lkey_expansion_192b 2254.byte 102,15,58,223,202,16 2255 call .Lkey_expansion_192a 2256.byte 102,15,58,223,202,32 2257 call .Lkey_expansion_192b 2258.byte 102,15,58,223,202,64 2259 call .Lkey_expansion_192a 2260.byte 102,15,58,223,202,128 2261 call .Lkey_expansion_192b 2262 movups %xmm0,(%rax) 2263 movl %esi,48(%rax) 2264 xorq %rax,%rax 2265 jmp .Lenc_key_ret 2266 2267.align 16 2268.L12rounds_alt: 2269 movdqa .Lkey_rotate192(%rip),%xmm5 2270 movdqa .Lkey_rcon1(%rip),%xmm4 2271 movl $8,%r10d 2272 movdqu %xmm0,(%rdx) 2273 jmp .Loop_key192 2274 2275.align 16 2276.Loop_key192: 2277 movq %xmm2,0(%rax) 2278 movdqa %xmm2,%xmm1 2279.byte 102,15,56,0,213 2280.byte 102,15,56,221,212 2281 pslld $1,%xmm4 2282 leaq 24(%rax),%rax 2283 2284 movdqa %xmm0,%xmm3 2285 pslldq $4,%xmm0 2286 pxor %xmm0,%xmm3 2287 pslldq $4,%xmm0 2288 pxor %xmm0,%xmm3 2289 pslldq $4,%xmm0 2290 pxor %xmm3,%xmm0 2291 2292 pshufd $0xff,%xmm0,%xmm3 2293 pxor %xmm1,%xmm3 2294 pslldq $4,%xmm1 2295 pxor %xmm1,%xmm3 2296 2297 pxor %xmm2,%xmm0 2298 pxor %xmm3,%xmm2 2299 movdqu %xmm0,-16(%rax) 2300 2301 decl %r10d 2302 jnz .Loop_key192 2303 2304 movl %esi,32(%rax) 2305 xorl %eax,%eax 2306 jmp .Lenc_key_ret 2307 2308.align 16 2309.L14rounds: 2310 movups 16(%rdi),%xmm2 2311 movl $13,%esi 2312 leaq 16(%rax),%rax 2313 cmpl $268435456,%r10d 2314 je .L14rounds_alt 2315 2316 movups %xmm0,(%rdx) 2317 movups %xmm2,16(%rdx) 2318.byte 102,15,58,223,202,1 2319 call .Lkey_expansion_256a_cold 2320.byte 102,15,58,223,200,1 2321 call .Lkey_expansion_256b 2322.byte 102,15,58,223,202,2 2323 call .Lkey_expansion_256a 2324.byte 102,15,58,223,200,2 2325 call .Lkey_expansion_256b 2326.byte 102,15,58,223,202,4 2327 call .Lkey_expansion_256a 2328.byte 102,15,58,223,200,4 2329 call .Lkey_expansion_256b 2330.byte 102,15,58,223,202,8 2331 call .Lkey_expansion_256a 2332.byte 102,15,58,223,200,8 2333 call .Lkey_expansion_256b 2334.byte 102,15,58,223,202,16 2335 call .Lkey_expansion_256a 2336.byte 102,15,58,223,200,16 2337 call .Lkey_expansion_256b 2338.byte 102,15,58,223,202,32 2339 call .Lkey_expansion_256a 2340.byte 102,15,58,223,200,32 2341 call .Lkey_expansion_256b 2342.byte 102,15,58,223,202,64 2343 call .Lkey_expansion_256a 2344 movups %xmm0,(%rax) 2345 movl %esi,16(%rax) 2346 xorq %rax,%rax 2347 jmp .Lenc_key_ret 2348 2349.align 16 2350.L14rounds_alt: 2351 movdqa .Lkey_rotate(%rip),%xmm5 2352 movdqa .Lkey_rcon1(%rip),%xmm4 2353 movl $7,%r10d 2354 movdqu %xmm0,0(%rdx) 2355 movdqa %xmm2,%xmm1 2356 movdqu %xmm2,16(%rdx) 2357 jmp .Loop_key256 2358 2359.align 16 2360.Loop_key256: 2361.byte 102,15,56,0,213 2362.byte 102,15,56,221,212 2363 2364 movdqa %xmm0,%xmm3 2365 pslldq $4,%xmm0 2366 pxor %xmm0,%xmm3 2367 pslldq $4,%xmm0 2368 pxor %xmm0,%xmm3 2369 pslldq $4,%xmm0 2370 pxor %xmm3,%xmm0 2371 pslld $1,%xmm4 2372 2373 pxor %xmm2,%xmm0 2374 movdqu %xmm0,(%rax) 2375 2376 decl %r10d 2377 jz .Ldone_key256 2378 2379 pshufd $0xff,%xmm0,%xmm2 2380 pxor %xmm3,%xmm3 2381.byte 102,15,56,221,211 2382 2383 movdqa %xmm1,%xmm3 2384 pslldq $4,%xmm1 2385 pxor %xmm1,%xmm3 2386 pslldq $4,%xmm1 2387 pxor %xmm1,%xmm3 2388 pslldq $4,%xmm1 2389 pxor %xmm3,%xmm1 2390 2391 pxor %xmm1,%xmm2 2392 movdqu %xmm2,16(%rax) 2393 leaq 32(%rax),%rax 2394 movdqa %xmm2,%xmm1 2395 2396 jmp .Loop_key256 2397 2398.Ldone_key256: 2399 movl %esi,16(%rax) 2400 xorl %eax,%eax 2401 jmp .Lenc_key_ret 2402 2403.align 16 2404.Lbad_keybits: 2405 movq $-2,%rax 2406.Lenc_key_ret: 2407 pxor %xmm0,%xmm0 2408 pxor %xmm1,%xmm1 2409 pxor %xmm2,%xmm2 2410 pxor %xmm3,%xmm3 2411 pxor %xmm4,%xmm4 2412 pxor %xmm5,%xmm5 2413 addq $8,%rsp 2414.cfi_adjust_cfa_offset -8 2415 .byte 0xf3,0xc3 2416.cfi_endproc 2417.LSEH_end_set_encrypt_key: 2418 2419.align 16 2420.Lkey_expansion_128: 2421 movups %xmm0,(%rax) 2422 leaq 16(%rax),%rax 2423.Lkey_expansion_128_cold: 2424 shufps $16,%xmm0,%xmm4 2425 xorps %xmm4,%xmm0 2426 shufps $140,%xmm0,%xmm4 2427 xorps %xmm4,%xmm0 2428 shufps $255,%xmm1,%xmm1 2429 xorps %xmm1,%xmm0 2430 .byte 0xf3,0xc3 2431 2432.align 16 2433.Lkey_expansion_192a: 2434 movups %xmm0,(%rax) 2435 leaq 16(%rax),%rax 2436.Lkey_expansion_192a_cold: 2437 movaps %xmm2,%xmm5 2438.Lkey_expansion_192b_warm: 2439 shufps $16,%xmm0,%xmm4 2440 movdqa %xmm2,%xmm3 2441 xorps %xmm4,%xmm0 2442 shufps $140,%xmm0,%xmm4 2443 pslldq $4,%xmm3 2444 xorps %xmm4,%xmm0 2445 pshufd $85,%xmm1,%xmm1 2446 pxor %xmm3,%xmm2 2447 pxor %xmm1,%xmm0 2448 pshufd $255,%xmm0,%xmm3 2449 pxor %xmm3,%xmm2 2450 .byte 0xf3,0xc3 2451 2452.align 16 2453.Lkey_expansion_192b: 2454 movaps %xmm0,%xmm3 2455 shufps $68,%xmm0,%xmm5 2456 movups %xmm5,(%rax) 2457 shufps $78,%xmm2,%xmm3 2458 movups %xmm3,16(%rax) 2459 leaq 32(%rax),%rax 2460 jmp .Lkey_expansion_192b_warm 2461 2462.align 16 2463.Lkey_expansion_256a: 2464 movups %xmm2,(%rax) 2465 leaq 16(%rax),%rax 2466.Lkey_expansion_256a_cold: 2467 shufps $16,%xmm0,%xmm4 2468 xorps %xmm4,%xmm0 2469 shufps $140,%xmm0,%xmm4 2470 xorps %xmm4,%xmm0 2471 shufps $255,%xmm1,%xmm1 2472 xorps %xmm1,%xmm0 2473 .byte 0xf3,0xc3 2474 2475.align 16 2476.Lkey_expansion_256b: 2477 movups %xmm0,(%rax) 2478 leaq 16(%rax),%rax 2479 2480 shufps $16,%xmm2,%xmm4 2481 xorps %xmm4,%xmm2 2482 shufps $140,%xmm2,%xmm4 2483 xorps %xmm4,%xmm2 2484 shufps $170,%xmm1,%xmm1 2485 xorps %xmm1,%xmm2 2486 .byte 0xf3,0xc3 2487.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 2488.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 2489.align 64 2490.Lbswap_mask: 2491.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2492.Lincrement32: 2493.long 6,6,6,0 2494.Lincrement64: 2495.long 1,0,0,0 2496.Lxts_magic: 2497.long 0x87,0,1,0 2498.Lincrement1: 2499.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 2500.Lkey_rotate: 2501.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 2502.Lkey_rotate192: 2503.long 0x04070605,0x04070605,0x04070605,0x04070605 2504.Lkey_rcon1: 2505.long 1,1,1,1 2506.Lkey_rcon1b: 2507.long 0x1b,0x1b,0x1b,0x1b 2508 2509.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2510.align 64 2511#endif 2512