1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13.extern OPENSSL_ia32cap_P 14.hidden OPENSSL_ia32cap_P 15.globl aes_hw_encrypt 16.hidden aes_hw_encrypt 17.type aes_hw_encrypt,@function 18.align 16 19aes_hw_encrypt: 20.cfi_startproc 21 movups (%rdi),%xmm2 22 movl 240(%rdx),%eax 23 movups (%rdx),%xmm0 24 movups 16(%rdx),%xmm1 25 leaq 32(%rdx),%rdx 26 xorps %xmm0,%xmm2 27.Loop_enc1_1: 28.byte 102,15,56,220,209 29 decl %eax 30 movups (%rdx),%xmm1 31 leaq 16(%rdx),%rdx 32 jnz .Loop_enc1_1 33.byte 102,15,56,221,209 34 pxor %xmm0,%xmm0 35 pxor %xmm1,%xmm1 36 movups %xmm2,(%rsi) 37 pxor %xmm2,%xmm2 38 .byte 0xf3,0xc3 39.cfi_endproc 40.size aes_hw_encrypt,.-aes_hw_encrypt 41.type _aesni_encrypt2,@function 42.align 16 43_aesni_encrypt2: 44.cfi_startproc 45 movups (%rcx),%xmm0 46 shll $4,%eax 47 movups 16(%rcx),%xmm1 48 xorps %xmm0,%xmm2 49 xorps %xmm0,%xmm3 50 movups 32(%rcx),%xmm0 51 leaq 32(%rcx,%rax,1),%rcx 52 negq %rax 53 addq $16,%rax 54 55.Lenc_loop2: 56.byte 102,15,56,220,209 57.byte 102,15,56,220,217 58 movups (%rcx,%rax,1),%xmm1 59 addq $32,%rax 60.byte 102,15,56,220,208 61.byte 102,15,56,220,216 62 movups -16(%rcx,%rax,1),%xmm0 63 jnz .Lenc_loop2 64 65.byte 102,15,56,220,209 66.byte 102,15,56,220,217 67.byte 102,15,56,221,208 68.byte 102,15,56,221,216 69 .byte 0xf3,0xc3 70.cfi_endproc 71.size _aesni_encrypt2,.-_aesni_encrypt2 72.type _aesni_encrypt3,@function 73.align 16 74_aesni_encrypt3: 75.cfi_startproc 76 movups (%rcx),%xmm0 77 shll $4,%eax 78 movups 16(%rcx),%xmm1 79 xorps %xmm0,%xmm2 80 xorps %xmm0,%xmm3 81 xorps %xmm0,%xmm4 82 movups 32(%rcx),%xmm0 83 leaq 32(%rcx,%rax,1),%rcx 84 negq %rax 85 addq $16,%rax 86 87.Lenc_loop3: 88.byte 102,15,56,220,209 89.byte 102,15,56,220,217 90.byte 102,15,56,220,225 91 movups (%rcx,%rax,1),%xmm1 92 addq $32,%rax 93.byte 102,15,56,220,208 94.byte 102,15,56,220,216 95.byte 102,15,56,220,224 96 movups -16(%rcx,%rax,1),%xmm0 97 jnz .Lenc_loop3 98 99.byte 102,15,56,220,209 100.byte 102,15,56,220,217 101.byte 102,15,56,220,225 102.byte 102,15,56,221,208 103.byte 102,15,56,221,216 104.byte 102,15,56,221,224 105 .byte 0xf3,0xc3 106.cfi_endproc 107.size _aesni_encrypt3,.-_aesni_encrypt3 108.type _aesni_encrypt4,@function 109.align 16 110_aesni_encrypt4: 111.cfi_startproc 112 movups (%rcx),%xmm0 113 shll $4,%eax 114 movups 16(%rcx),%xmm1 115 xorps %xmm0,%xmm2 116 xorps %xmm0,%xmm3 117 xorps %xmm0,%xmm4 118 xorps %xmm0,%xmm5 119 movups 32(%rcx),%xmm0 120 leaq 32(%rcx,%rax,1),%rcx 121 negq %rax 122.byte 0x0f,0x1f,0x00 123 addq $16,%rax 124 125.Lenc_loop4: 126.byte 102,15,56,220,209 127.byte 102,15,56,220,217 128.byte 102,15,56,220,225 129.byte 102,15,56,220,233 130 movups (%rcx,%rax,1),%xmm1 131 addq $32,%rax 132.byte 102,15,56,220,208 133.byte 102,15,56,220,216 134.byte 102,15,56,220,224 135.byte 102,15,56,220,232 136 movups -16(%rcx,%rax,1),%xmm0 137 jnz .Lenc_loop4 138 139.byte 102,15,56,220,209 140.byte 102,15,56,220,217 141.byte 102,15,56,220,225 142.byte 102,15,56,220,233 143.byte 102,15,56,221,208 144.byte 102,15,56,221,216 145.byte 102,15,56,221,224 146.byte 102,15,56,221,232 147 .byte 0xf3,0xc3 148.cfi_endproc 149.size _aesni_encrypt4,.-_aesni_encrypt4 150.type _aesni_encrypt6,@function 151.align 16 152_aesni_encrypt6: 153.cfi_startproc 154 movups (%rcx),%xmm0 155 shll $4,%eax 156 movups 16(%rcx),%xmm1 157 xorps %xmm0,%xmm2 158 pxor %xmm0,%xmm3 159 pxor %xmm0,%xmm4 160.byte 102,15,56,220,209 161 leaq 32(%rcx,%rax,1),%rcx 162 negq %rax 163.byte 102,15,56,220,217 164 pxor %xmm0,%xmm5 165 pxor %xmm0,%xmm6 166.byte 102,15,56,220,225 167 pxor %xmm0,%xmm7 168 movups (%rcx,%rax,1),%xmm0 169 addq $16,%rax 170 jmp .Lenc_loop6_enter 171.align 16 172.Lenc_loop6: 173.byte 102,15,56,220,209 174.byte 102,15,56,220,217 175.byte 102,15,56,220,225 176.Lenc_loop6_enter: 177.byte 102,15,56,220,233 178.byte 102,15,56,220,241 179.byte 102,15,56,220,249 180 movups (%rcx,%rax,1),%xmm1 181 addq $32,%rax 182.byte 102,15,56,220,208 183.byte 102,15,56,220,216 184.byte 102,15,56,220,224 185.byte 102,15,56,220,232 186.byte 102,15,56,220,240 187.byte 102,15,56,220,248 188 movups -16(%rcx,%rax,1),%xmm0 189 jnz .Lenc_loop6 190 191.byte 102,15,56,220,209 192.byte 102,15,56,220,217 193.byte 102,15,56,220,225 194.byte 102,15,56,220,233 195.byte 102,15,56,220,241 196.byte 102,15,56,220,249 197.byte 102,15,56,221,208 198.byte 102,15,56,221,216 199.byte 102,15,56,221,224 200.byte 102,15,56,221,232 201.byte 102,15,56,221,240 202.byte 102,15,56,221,248 203 .byte 0xf3,0xc3 204.cfi_endproc 205.size _aesni_encrypt6,.-_aesni_encrypt6 206.type _aesni_encrypt8,@function 207.align 16 208_aesni_encrypt8: 209.cfi_startproc 210 movups (%rcx),%xmm0 211 shll $4,%eax 212 movups 16(%rcx),%xmm1 213 xorps %xmm0,%xmm2 214 xorps %xmm0,%xmm3 215 pxor %xmm0,%xmm4 216 pxor %xmm0,%xmm5 217 pxor %xmm0,%xmm6 218 leaq 32(%rcx,%rax,1),%rcx 219 negq %rax 220.byte 102,15,56,220,209 221 pxor %xmm0,%xmm7 222 pxor %xmm0,%xmm8 223.byte 102,15,56,220,217 224 pxor %xmm0,%xmm9 225 movups (%rcx,%rax,1),%xmm0 226 addq $16,%rax 227 jmp .Lenc_loop8_inner 228.align 16 229.Lenc_loop8: 230.byte 102,15,56,220,209 231.byte 102,15,56,220,217 232.Lenc_loop8_inner: 233.byte 102,15,56,220,225 234.byte 102,15,56,220,233 235.byte 102,15,56,220,241 236.byte 102,15,56,220,249 237.byte 102,68,15,56,220,193 238.byte 102,68,15,56,220,201 239.Lenc_loop8_enter: 240 movups (%rcx,%rax,1),%xmm1 241 addq $32,%rax 242.byte 102,15,56,220,208 243.byte 102,15,56,220,216 244.byte 102,15,56,220,224 245.byte 102,15,56,220,232 246.byte 102,15,56,220,240 247.byte 102,15,56,220,248 248.byte 102,68,15,56,220,192 249.byte 102,68,15,56,220,200 250 movups -16(%rcx,%rax,1),%xmm0 251 jnz .Lenc_loop8 252 253.byte 102,15,56,220,209 254.byte 102,15,56,220,217 255.byte 102,15,56,220,225 256.byte 102,15,56,220,233 257.byte 102,15,56,220,241 258.byte 102,15,56,220,249 259.byte 102,68,15,56,220,193 260.byte 102,68,15,56,220,201 261.byte 102,15,56,221,208 262.byte 102,15,56,221,216 263.byte 102,15,56,221,224 264.byte 102,15,56,221,232 265.byte 102,15,56,221,240 266.byte 102,15,56,221,248 267.byte 102,68,15,56,221,192 268.byte 102,68,15,56,221,200 269 .byte 0xf3,0xc3 270.cfi_endproc 271.size _aesni_encrypt8,.-_aesni_encrypt8 272.globl aes_hw_ctr32_encrypt_blocks 273.hidden aes_hw_ctr32_encrypt_blocks 274.type aes_hw_ctr32_encrypt_blocks,@function 275.align 16 276aes_hw_ctr32_encrypt_blocks: 277.cfi_startproc 278 cmpq $1,%rdx 279 jne .Lctr32_bulk 280 281 282 283 movups (%r8),%xmm2 284 movups (%rdi),%xmm3 285 movl 240(%rcx),%edx 286 movups (%rcx),%xmm0 287 movups 16(%rcx),%xmm1 288 leaq 32(%rcx),%rcx 289 xorps %xmm0,%xmm2 290.Loop_enc1_2: 291.byte 102,15,56,220,209 292 decl %edx 293 movups (%rcx),%xmm1 294 leaq 16(%rcx),%rcx 295 jnz .Loop_enc1_2 296.byte 102,15,56,221,209 297 pxor %xmm0,%xmm0 298 pxor %xmm1,%xmm1 299 xorps %xmm3,%xmm2 300 pxor %xmm3,%xmm3 301 movups %xmm2,(%rsi) 302 xorps %xmm2,%xmm2 303 jmp .Lctr32_epilogue 304 305.align 16 306.Lctr32_bulk: 307 leaq (%rsp),%r11 308.cfi_def_cfa_register %r11 309 pushq %rbp 310.cfi_offset %rbp,-16 311 subq $128,%rsp 312 andq $-16,%rsp 313 314 315 316 317 movdqu (%r8),%xmm2 318 movdqu (%rcx),%xmm0 319 movl 12(%r8),%r8d 320 pxor %xmm0,%xmm2 321 movl 12(%rcx),%ebp 322 movdqa %xmm2,0(%rsp) 323 bswapl %r8d 324 movdqa %xmm2,%xmm3 325 movdqa %xmm2,%xmm4 326 movdqa %xmm2,%xmm5 327 movdqa %xmm2,64(%rsp) 328 movdqa %xmm2,80(%rsp) 329 movdqa %xmm2,96(%rsp) 330 movq %rdx,%r10 331 movdqa %xmm2,112(%rsp) 332 333 leaq 1(%r8),%rax 334 leaq 2(%r8),%rdx 335 bswapl %eax 336 bswapl %edx 337 xorl %ebp,%eax 338 xorl %ebp,%edx 339.byte 102,15,58,34,216,3 340 leaq 3(%r8),%rax 341 movdqa %xmm3,16(%rsp) 342.byte 102,15,58,34,226,3 343 bswapl %eax 344 movq %r10,%rdx 345 leaq 4(%r8),%r10 346 movdqa %xmm4,32(%rsp) 347 xorl %ebp,%eax 348 bswapl %r10d 349.byte 102,15,58,34,232,3 350 xorl %ebp,%r10d 351 movdqa %xmm5,48(%rsp) 352 leaq 5(%r8),%r9 353 movl %r10d,64+12(%rsp) 354 bswapl %r9d 355 leaq 6(%r8),%r10 356 movl 240(%rcx),%eax 357 xorl %ebp,%r9d 358 bswapl %r10d 359 movl %r9d,80+12(%rsp) 360 xorl %ebp,%r10d 361 leaq 7(%r8),%r9 362 movl %r10d,96+12(%rsp) 363 bswapl %r9d 364 leaq OPENSSL_ia32cap_P(%rip),%r10 365 movl 4(%r10),%r10d 366 xorl %ebp,%r9d 367 andl $71303168,%r10d 368 movl %r9d,112+12(%rsp) 369 370 movups 16(%rcx),%xmm1 371 372 movdqa 64(%rsp),%xmm6 373 movdqa 80(%rsp),%xmm7 374 375 cmpq $8,%rdx 376 jb .Lctr32_tail 377 378 subq $6,%rdx 379 cmpl $4194304,%r10d 380 je .Lctr32_6x 381 382 leaq 128(%rcx),%rcx 383 subq $2,%rdx 384 jmp .Lctr32_loop8 385 386.align 16 387.Lctr32_6x: 388 shll $4,%eax 389 movl $48,%r10d 390 bswapl %ebp 391 leaq 32(%rcx,%rax,1),%rcx 392 subq %rax,%r10 393 jmp .Lctr32_loop6 394 395.align 16 396.Lctr32_loop6: 397 addl $6,%r8d 398 movups -48(%rcx,%r10,1),%xmm0 399.byte 102,15,56,220,209 400 movl %r8d,%eax 401 xorl %ebp,%eax 402.byte 102,15,56,220,217 403.byte 0x0f,0x38,0xf1,0x44,0x24,12 404 leal 1(%r8),%eax 405.byte 102,15,56,220,225 406 xorl %ebp,%eax 407.byte 0x0f,0x38,0xf1,0x44,0x24,28 408.byte 102,15,56,220,233 409 leal 2(%r8),%eax 410 xorl %ebp,%eax 411.byte 102,15,56,220,241 412.byte 0x0f,0x38,0xf1,0x44,0x24,44 413 leal 3(%r8),%eax 414.byte 102,15,56,220,249 415 movups -32(%rcx,%r10,1),%xmm1 416 xorl %ebp,%eax 417 418.byte 102,15,56,220,208 419.byte 0x0f,0x38,0xf1,0x44,0x24,60 420 leal 4(%r8),%eax 421.byte 102,15,56,220,216 422 xorl %ebp,%eax 423.byte 0x0f,0x38,0xf1,0x44,0x24,76 424.byte 102,15,56,220,224 425 leal 5(%r8),%eax 426 xorl %ebp,%eax 427.byte 102,15,56,220,232 428.byte 0x0f,0x38,0xf1,0x44,0x24,92 429 movq %r10,%rax 430.byte 102,15,56,220,240 431.byte 102,15,56,220,248 432 movups -16(%rcx,%r10,1),%xmm0 433 434 call .Lenc_loop6 435 436 movdqu (%rdi),%xmm8 437 movdqu 16(%rdi),%xmm9 438 movdqu 32(%rdi),%xmm10 439 movdqu 48(%rdi),%xmm11 440 movdqu 64(%rdi),%xmm12 441 movdqu 80(%rdi),%xmm13 442 leaq 96(%rdi),%rdi 443 movups -64(%rcx,%r10,1),%xmm1 444 pxor %xmm2,%xmm8 445 movaps 0(%rsp),%xmm2 446 pxor %xmm3,%xmm9 447 movaps 16(%rsp),%xmm3 448 pxor %xmm4,%xmm10 449 movaps 32(%rsp),%xmm4 450 pxor %xmm5,%xmm11 451 movaps 48(%rsp),%xmm5 452 pxor %xmm6,%xmm12 453 movaps 64(%rsp),%xmm6 454 pxor %xmm7,%xmm13 455 movaps 80(%rsp),%xmm7 456 movdqu %xmm8,(%rsi) 457 movdqu %xmm9,16(%rsi) 458 movdqu %xmm10,32(%rsi) 459 movdqu %xmm11,48(%rsi) 460 movdqu %xmm12,64(%rsi) 461 movdqu %xmm13,80(%rsi) 462 leaq 96(%rsi),%rsi 463 464 subq $6,%rdx 465 jnc .Lctr32_loop6 466 467 addq $6,%rdx 468 jz .Lctr32_done 469 470 leal -48(%r10),%eax 471 leaq -80(%rcx,%r10,1),%rcx 472 negl %eax 473 shrl $4,%eax 474 jmp .Lctr32_tail 475 476.align 32 477.Lctr32_loop8: 478 addl $8,%r8d 479 movdqa 96(%rsp),%xmm8 480.byte 102,15,56,220,209 481 movl %r8d,%r9d 482 movdqa 112(%rsp),%xmm9 483.byte 102,15,56,220,217 484 bswapl %r9d 485 movups 32-128(%rcx),%xmm0 486.byte 102,15,56,220,225 487 xorl %ebp,%r9d 488 nop 489.byte 102,15,56,220,233 490 movl %r9d,0+12(%rsp) 491 leaq 1(%r8),%r9 492.byte 102,15,56,220,241 493.byte 102,15,56,220,249 494.byte 102,68,15,56,220,193 495.byte 102,68,15,56,220,201 496 movups 48-128(%rcx),%xmm1 497 bswapl %r9d 498.byte 102,15,56,220,208 499.byte 102,15,56,220,216 500 xorl %ebp,%r9d 501.byte 0x66,0x90 502.byte 102,15,56,220,224 503.byte 102,15,56,220,232 504 movl %r9d,16+12(%rsp) 505 leaq 2(%r8),%r9 506.byte 102,15,56,220,240 507.byte 102,15,56,220,248 508.byte 102,68,15,56,220,192 509.byte 102,68,15,56,220,200 510 movups 64-128(%rcx),%xmm0 511 bswapl %r9d 512.byte 102,15,56,220,209 513.byte 102,15,56,220,217 514 xorl %ebp,%r9d 515.byte 0x66,0x90 516.byte 102,15,56,220,225 517.byte 102,15,56,220,233 518 movl %r9d,32+12(%rsp) 519 leaq 3(%r8),%r9 520.byte 102,15,56,220,241 521.byte 102,15,56,220,249 522.byte 102,68,15,56,220,193 523.byte 102,68,15,56,220,201 524 movups 80-128(%rcx),%xmm1 525 bswapl %r9d 526.byte 102,15,56,220,208 527.byte 102,15,56,220,216 528 xorl %ebp,%r9d 529.byte 0x66,0x90 530.byte 102,15,56,220,224 531.byte 102,15,56,220,232 532 movl %r9d,48+12(%rsp) 533 leaq 4(%r8),%r9 534.byte 102,15,56,220,240 535.byte 102,15,56,220,248 536.byte 102,68,15,56,220,192 537.byte 102,68,15,56,220,200 538 movups 96-128(%rcx),%xmm0 539 bswapl %r9d 540.byte 102,15,56,220,209 541.byte 102,15,56,220,217 542 xorl %ebp,%r9d 543.byte 0x66,0x90 544.byte 102,15,56,220,225 545.byte 102,15,56,220,233 546 movl %r9d,64+12(%rsp) 547 leaq 5(%r8),%r9 548.byte 102,15,56,220,241 549.byte 102,15,56,220,249 550.byte 102,68,15,56,220,193 551.byte 102,68,15,56,220,201 552 movups 112-128(%rcx),%xmm1 553 bswapl %r9d 554.byte 102,15,56,220,208 555.byte 102,15,56,220,216 556 xorl %ebp,%r9d 557.byte 0x66,0x90 558.byte 102,15,56,220,224 559.byte 102,15,56,220,232 560 movl %r9d,80+12(%rsp) 561 leaq 6(%r8),%r9 562.byte 102,15,56,220,240 563.byte 102,15,56,220,248 564.byte 102,68,15,56,220,192 565.byte 102,68,15,56,220,200 566 movups 128-128(%rcx),%xmm0 567 bswapl %r9d 568.byte 102,15,56,220,209 569.byte 102,15,56,220,217 570 xorl %ebp,%r9d 571.byte 0x66,0x90 572.byte 102,15,56,220,225 573.byte 102,15,56,220,233 574 movl %r9d,96+12(%rsp) 575 leaq 7(%r8),%r9 576.byte 102,15,56,220,241 577.byte 102,15,56,220,249 578.byte 102,68,15,56,220,193 579.byte 102,68,15,56,220,201 580 movups 144-128(%rcx),%xmm1 581 bswapl %r9d 582.byte 102,15,56,220,208 583.byte 102,15,56,220,216 584.byte 102,15,56,220,224 585 xorl %ebp,%r9d 586 movdqu 0(%rdi),%xmm10 587.byte 102,15,56,220,232 588 movl %r9d,112+12(%rsp) 589 cmpl $11,%eax 590.byte 102,15,56,220,240 591.byte 102,15,56,220,248 592.byte 102,68,15,56,220,192 593.byte 102,68,15,56,220,200 594 movups 160-128(%rcx),%xmm0 595 596 jb .Lctr32_enc_done 597 598.byte 102,15,56,220,209 599.byte 102,15,56,220,217 600.byte 102,15,56,220,225 601.byte 102,15,56,220,233 602.byte 102,15,56,220,241 603.byte 102,15,56,220,249 604.byte 102,68,15,56,220,193 605.byte 102,68,15,56,220,201 606 movups 176-128(%rcx),%xmm1 607 608.byte 102,15,56,220,208 609.byte 102,15,56,220,216 610.byte 102,15,56,220,224 611.byte 102,15,56,220,232 612.byte 102,15,56,220,240 613.byte 102,15,56,220,248 614.byte 102,68,15,56,220,192 615.byte 102,68,15,56,220,200 616 movups 192-128(%rcx),%xmm0 617 618 619 620.byte 102,15,56,220,209 621.byte 102,15,56,220,217 622.byte 102,15,56,220,225 623.byte 102,15,56,220,233 624.byte 102,15,56,220,241 625.byte 102,15,56,220,249 626.byte 102,68,15,56,220,193 627.byte 102,68,15,56,220,201 628 movups 208-128(%rcx),%xmm1 629 630.byte 102,15,56,220,208 631.byte 102,15,56,220,216 632.byte 102,15,56,220,224 633.byte 102,15,56,220,232 634.byte 102,15,56,220,240 635.byte 102,15,56,220,248 636.byte 102,68,15,56,220,192 637.byte 102,68,15,56,220,200 638 movups 224-128(%rcx),%xmm0 639 jmp .Lctr32_enc_done 640 641.align 16 642.Lctr32_enc_done: 643 movdqu 16(%rdi),%xmm11 644 pxor %xmm0,%xmm10 645 movdqu 32(%rdi),%xmm12 646 pxor %xmm0,%xmm11 647 movdqu 48(%rdi),%xmm13 648 pxor %xmm0,%xmm12 649 movdqu 64(%rdi),%xmm14 650 pxor %xmm0,%xmm13 651 movdqu 80(%rdi),%xmm15 652 pxor %xmm0,%xmm14 653 pxor %xmm0,%xmm15 654.byte 102,15,56,220,209 655.byte 102,15,56,220,217 656.byte 102,15,56,220,225 657.byte 102,15,56,220,233 658.byte 102,15,56,220,241 659.byte 102,15,56,220,249 660.byte 102,68,15,56,220,193 661.byte 102,68,15,56,220,201 662 movdqu 96(%rdi),%xmm1 663 leaq 128(%rdi),%rdi 664 665.byte 102,65,15,56,221,210 666 pxor %xmm0,%xmm1 667 movdqu 112-128(%rdi),%xmm10 668.byte 102,65,15,56,221,219 669 pxor %xmm0,%xmm10 670 movdqa 0(%rsp),%xmm11 671.byte 102,65,15,56,221,228 672.byte 102,65,15,56,221,237 673 movdqa 16(%rsp),%xmm12 674 movdqa 32(%rsp),%xmm13 675.byte 102,65,15,56,221,246 676.byte 102,65,15,56,221,255 677 movdqa 48(%rsp),%xmm14 678 movdqa 64(%rsp),%xmm15 679.byte 102,68,15,56,221,193 680 movdqa 80(%rsp),%xmm0 681 movups 16-128(%rcx),%xmm1 682.byte 102,69,15,56,221,202 683 684 movups %xmm2,(%rsi) 685 movdqa %xmm11,%xmm2 686 movups %xmm3,16(%rsi) 687 movdqa %xmm12,%xmm3 688 movups %xmm4,32(%rsi) 689 movdqa %xmm13,%xmm4 690 movups %xmm5,48(%rsi) 691 movdqa %xmm14,%xmm5 692 movups %xmm6,64(%rsi) 693 movdqa %xmm15,%xmm6 694 movups %xmm7,80(%rsi) 695 movdqa %xmm0,%xmm7 696 movups %xmm8,96(%rsi) 697 movups %xmm9,112(%rsi) 698 leaq 128(%rsi),%rsi 699 700 subq $8,%rdx 701 jnc .Lctr32_loop8 702 703 addq $8,%rdx 704 jz .Lctr32_done 705 leaq -128(%rcx),%rcx 706 707.Lctr32_tail: 708 709 710 leaq 16(%rcx),%rcx 711 cmpq $4,%rdx 712 jb .Lctr32_loop3 713 je .Lctr32_loop4 714 715 716 shll $4,%eax 717 movdqa 96(%rsp),%xmm8 718 pxor %xmm9,%xmm9 719 720 movups 16(%rcx),%xmm0 721.byte 102,15,56,220,209 722.byte 102,15,56,220,217 723 leaq 32-16(%rcx,%rax,1),%rcx 724 negq %rax 725.byte 102,15,56,220,225 726 addq $16,%rax 727 movups (%rdi),%xmm10 728.byte 102,15,56,220,233 729.byte 102,15,56,220,241 730 movups 16(%rdi),%xmm11 731 movups 32(%rdi),%xmm12 732.byte 102,15,56,220,249 733.byte 102,68,15,56,220,193 734 735 call .Lenc_loop8_enter 736 737 movdqu 48(%rdi),%xmm13 738 pxor %xmm10,%xmm2 739 movdqu 64(%rdi),%xmm10 740 pxor %xmm11,%xmm3 741 movdqu %xmm2,(%rsi) 742 pxor %xmm12,%xmm4 743 movdqu %xmm3,16(%rsi) 744 pxor %xmm13,%xmm5 745 movdqu %xmm4,32(%rsi) 746 pxor %xmm10,%xmm6 747 movdqu %xmm5,48(%rsi) 748 movdqu %xmm6,64(%rsi) 749 cmpq $6,%rdx 750 jb .Lctr32_done 751 752 movups 80(%rdi),%xmm11 753 xorps %xmm11,%xmm7 754 movups %xmm7,80(%rsi) 755 je .Lctr32_done 756 757 movups 96(%rdi),%xmm12 758 xorps %xmm12,%xmm8 759 movups %xmm8,96(%rsi) 760 jmp .Lctr32_done 761 762.align 32 763.Lctr32_loop4: 764.byte 102,15,56,220,209 765 leaq 16(%rcx),%rcx 766 decl %eax 767.byte 102,15,56,220,217 768.byte 102,15,56,220,225 769.byte 102,15,56,220,233 770 movups (%rcx),%xmm1 771 jnz .Lctr32_loop4 772.byte 102,15,56,221,209 773.byte 102,15,56,221,217 774 movups (%rdi),%xmm10 775 movups 16(%rdi),%xmm11 776.byte 102,15,56,221,225 777.byte 102,15,56,221,233 778 movups 32(%rdi),%xmm12 779 movups 48(%rdi),%xmm13 780 781 xorps %xmm10,%xmm2 782 movups %xmm2,(%rsi) 783 xorps %xmm11,%xmm3 784 movups %xmm3,16(%rsi) 785 pxor %xmm12,%xmm4 786 movdqu %xmm4,32(%rsi) 787 pxor %xmm13,%xmm5 788 movdqu %xmm5,48(%rsi) 789 jmp .Lctr32_done 790 791.align 32 792.Lctr32_loop3: 793.byte 102,15,56,220,209 794 leaq 16(%rcx),%rcx 795 decl %eax 796.byte 102,15,56,220,217 797.byte 102,15,56,220,225 798 movups (%rcx),%xmm1 799 jnz .Lctr32_loop3 800.byte 102,15,56,221,209 801.byte 102,15,56,221,217 802.byte 102,15,56,221,225 803 804 movups (%rdi),%xmm10 805 xorps %xmm10,%xmm2 806 movups %xmm2,(%rsi) 807 cmpq $2,%rdx 808 jb .Lctr32_done 809 810 movups 16(%rdi),%xmm11 811 xorps %xmm11,%xmm3 812 movups %xmm3,16(%rsi) 813 je .Lctr32_done 814 815 movups 32(%rdi),%xmm12 816 xorps %xmm12,%xmm4 817 movups %xmm4,32(%rsi) 818 819.Lctr32_done: 820 xorps %xmm0,%xmm0 821 xorl %ebp,%ebp 822 pxor %xmm1,%xmm1 823 pxor %xmm2,%xmm2 824 pxor %xmm3,%xmm3 825 pxor %xmm4,%xmm4 826 pxor %xmm5,%xmm5 827 pxor %xmm6,%xmm6 828 pxor %xmm7,%xmm7 829 movaps %xmm0,0(%rsp) 830 pxor %xmm8,%xmm8 831 movaps %xmm0,16(%rsp) 832 pxor %xmm9,%xmm9 833 movaps %xmm0,32(%rsp) 834 pxor %xmm10,%xmm10 835 movaps %xmm0,48(%rsp) 836 pxor %xmm11,%xmm11 837 movaps %xmm0,64(%rsp) 838 pxor %xmm12,%xmm12 839 movaps %xmm0,80(%rsp) 840 pxor %xmm13,%xmm13 841 movaps %xmm0,96(%rsp) 842 pxor %xmm14,%xmm14 843 movaps %xmm0,112(%rsp) 844 pxor %xmm15,%xmm15 845 movq -8(%r11),%rbp 846.cfi_restore %rbp 847 leaq (%r11),%rsp 848.cfi_def_cfa_register %rsp 849.Lctr32_epilogue: 850 .byte 0xf3,0xc3 851.cfi_endproc 852.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 853.globl aes_hw_set_encrypt_key 854.hidden aes_hw_set_encrypt_key 855.type aes_hw_set_encrypt_key,@function 856.align 16 857aes_hw_set_encrypt_key: 858__aesni_set_encrypt_key: 859.cfi_startproc 860.byte 0x48,0x83,0xEC,0x08 861.cfi_adjust_cfa_offset 8 862 movq $-1,%rax 863 testq %rdi,%rdi 864 jz .Lenc_key_ret 865 testq %rdx,%rdx 866 jz .Lenc_key_ret 867 868 movups (%rdi),%xmm0 869 xorps %xmm4,%xmm4 870 leaq OPENSSL_ia32cap_P(%rip),%r10 871 movl 4(%r10),%r10d 872 andl $268437504,%r10d 873 leaq 16(%rdx),%rax 874 cmpl $256,%esi 875 je .L14rounds 876 877 cmpl $128,%esi 878 jne .Lbad_keybits 879 880.L10rounds: 881 movl $9,%esi 882 cmpl $268435456,%r10d 883 je .L10rounds_alt 884 885 movups %xmm0,(%rdx) 886.byte 102,15,58,223,200,1 887 call .Lkey_expansion_128_cold 888.byte 102,15,58,223,200,2 889 call .Lkey_expansion_128 890.byte 102,15,58,223,200,4 891 call .Lkey_expansion_128 892.byte 102,15,58,223,200,8 893 call .Lkey_expansion_128 894.byte 102,15,58,223,200,16 895 call .Lkey_expansion_128 896.byte 102,15,58,223,200,32 897 call .Lkey_expansion_128 898.byte 102,15,58,223,200,64 899 call .Lkey_expansion_128 900.byte 102,15,58,223,200,128 901 call .Lkey_expansion_128 902.byte 102,15,58,223,200,27 903 call .Lkey_expansion_128 904.byte 102,15,58,223,200,54 905 call .Lkey_expansion_128 906 movups %xmm0,(%rax) 907 movl %esi,80(%rax) 908 xorl %eax,%eax 909 jmp .Lenc_key_ret 910 911.align 16 912.L10rounds_alt: 913 movdqa .Lkey_rotate(%rip),%xmm5 914 movl $8,%r10d 915 movdqa .Lkey_rcon1(%rip),%xmm4 916 movdqa %xmm0,%xmm2 917 movdqu %xmm0,(%rdx) 918 jmp .Loop_key128 919 920.align 16 921.Loop_key128: 922.byte 102,15,56,0,197 923.byte 102,15,56,221,196 924 pslld $1,%xmm4 925 leaq 16(%rax),%rax 926 927 movdqa %xmm2,%xmm3 928 pslldq $4,%xmm2 929 pxor %xmm2,%xmm3 930 pslldq $4,%xmm2 931 pxor %xmm2,%xmm3 932 pslldq $4,%xmm2 933 pxor %xmm3,%xmm2 934 935 pxor %xmm2,%xmm0 936 movdqu %xmm0,-16(%rax) 937 movdqa %xmm0,%xmm2 938 939 decl %r10d 940 jnz .Loop_key128 941 942 movdqa .Lkey_rcon1b(%rip),%xmm4 943 944.byte 102,15,56,0,197 945.byte 102,15,56,221,196 946 pslld $1,%xmm4 947 948 movdqa %xmm2,%xmm3 949 pslldq $4,%xmm2 950 pxor %xmm2,%xmm3 951 pslldq $4,%xmm2 952 pxor %xmm2,%xmm3 953 pslldq $4,%xmm2 954 pxor %xmm3,%xmm2 955 956 pxor %xmm2,%xmm0 957 movdqu %xmm0,(%rax) 958 959 movdqa %xmm0,%xmm2 960.byte 102,15,56,0,197 961.byte 102,15,56,221,196 962 963 movdqa %xmm2,%xmm3 964 pslldq $4,%xmm2 965 pxor %xmm2,%xmm3 966 pslldq $4,%xmm2 967 pxor %xmm2,%xmm3 968 pslldq $4,%xmm2 969 pxor %xmm3,%xmm2 970 971 pxor %xmm2,%xmm0 972 movdqu %xmm0,16(%rax) 973 974 movl %esi,96(%rax) 975 xorl %eax,%eax 976 jmp .Lenc_key_ret 977 978 979 980.align 16 981.L14rounds: 982 movups 16(%rdi),%xmm2 983 movl $13,%esi 984 leaq 16(%rax),%rax 985 cmpl $268435456,%r10d 986 je .L14rounds_alt 987 988 movups %xmm0,(%rdx) 989 movups %xmm2,16(%rdx) 990.byte 102,15,58,223,202,1 991 call .Lkey_expansion_256a_cold 992.byte 102,15,58,223,200,1 993 call .Lkey_expansion_256b 994.byte 102,15,58,223,202,2 995 call .Lkey_expansion_256a 996.byte 102,15,58,223,200,2 997 call .Lkey_expansion_256b 998.byte 102,15,58,223,202,4 999 call .Lkey_expansion_256a 1000.byte 102,15,58,223,200,4 1001 call .Lkey_expansion_256b 1002.byte 102,15,58,223,202,8 1003 call .Lkey_expansion_256a 1004.byte 102,15,58,223,200,8 1005 call .Lkey_expansion_256b 1006.byte 102,15,58,223,202,16 1007 call .Lkey_expansion_256a 1008.byte 102,15,58,223,200,16 1009 call .Lkey_expansion_256b 1010.byte 102,15,58,223,202,32 1011 call .Lkey_expansion_256a 1012.byte 102,15,58,223,200,32 1013 call .Lkey_expansion_256b 1014.byte 102,15,58,223,202,64 1015 call .Lkey_expansion_256a 1016 movups %xmm0,(%rax) 1017 movl %esi,16(%rax) 1018 xorq %rax,%rax 1019 jmp .Lenc_key_ret 1020 1021.align 16 1022.L14rounds_alt: 1023 movdqa .Lkey_rotate(%rip),%xmm5 1024 movdqa .Lkey_rcon1(%rip),%xmm4 1025 movl $7,%r10d 1026 movdqu %xmm0,0(%rdx) 1027 movdqa %xmm2,%xmm1 1028 movdqu %xmm2,16(%rdx) 1029 jmp .Loop_key256 1030 1031.align 16 1032.Loop_key256: 1033.byte 102,15,56,0,213 1034.byte 102,15,56,221,212 1035 1036 movdqa %xmm0,%xmm3 1037 pslldq $4,%xmm0 1038 pxor %xmm0,%xmm3 1039 pslldq $4,%xmm0 1040 pxor %xmm0,%xmm3 1041 pslldq $4,%xmm0 1042 pxor %xmm3,%xmm0 1043 pslld $1,%xmm4 1044 1045 pxor %xmm2,%xmm0 1046 movdqu %xmm0,(%rax) 1047 1048 decl %r10d 1049 jz .Ldone_key256 1050 1051 pshufd $0xff,%xmm0,%xmm2 1052 pxor %xmm3,%xmm3 1053.byte 102,15,56,221,211 1054 1055 movdqa %xmm1,%xmm3 1056 pslldq $4,%xmm1 1057 pxor %xmm1,%xmm3 1058 pslldq $4,%xmm1 1059 pxor %xmm1,%xmm3 1060 pslldq $4,%xmm1 1061 pxor %xmm3,%xmm1 1062 1063 pxor %xmm1,%xmm2 1064 movdqu %xmm2,16(%rax) 1065 leaq 32(%rax),%rax 1066 movdqa %xmm2,%xmm1 1067 1068 jmp .Loop_key256 1069 1070.Ldone_key256: 1071 movl %esi,16(%rax) 1072 xorl %eax,%eax 1073 jmp .Lenc_key_ret 1074 1075.align 16 1076.Lbad_keybits: 1077 movq $-2,%rax 1078.Lenc_key_ret: 1079 pxor %xmm0,%xmm0 1080 pxor %xmm1,%xmm1 1081 pxor %xmm2,%xmm2 1082 pxor %xmm3,%xmm3 1083 pxor %xmm4,%xmm4 1084 pxor %xmm5,%xmm5 1085 addq $8,%rsp 1086.cfi_adjust_cfa_offset -8 1087 .byte 0xf3,0xc3 1088.cfi_endproc 1089.LSEH_end_set_encrypt_key: 1090 1091.align 16 1092.Lkey_expansion_128: 1093 movups %xmm0,(%rax) 1094 leaq 16(%rax),%rax 1095.Lkey_expansion_128_cold: 1096 shufps $16,%xmm0,%xmm4 1097 xorps %xmm4,%xmm0 1098 shufps $140,%xmm0,%xmm4 1099 xorps %xmm4,%xmm0 1100 shufps $255,%xmm1,%xmm1 1101 xorps %xmm1,%xmm0 1102 .byte 0xf3,0xc3 1103 1104.align 16 1105.Lkey_expansion_192a: 1106 movups %xmm0,(%rax) 1107 leaq 16(%rax),%rax 1108.Lkey_expansion_192a_cold: 1109 movaps %xmm2,%xmm5 1110.Lkey_expansion_192b_warm: 1111 shufps $16,%xmm0,%xmm4 1112 movdqa %xmm2,%xmm3 1113 xorps %xmm4,%xmm0 1114 shufps $140,%xmm0,%xmm4 1115 pslldq $4,%xmm3 1116 xorps %xmm4,%xmm0 1117 pshufd $85,%xmm1,%xmm1 1118 pxor %xmm3,%xmm2 1119 pxor %xmm1,%xmm0 1120 pshufd $255,%xmm0,%xmm3 1121 pxor %xmm3,%xmm2 1122 .byte 0xf3,0xc3 1123 1124.align 16 1125.Lkey_expansion_192b: 1126 movaps %xmm0,%xmm3 1127 shufps $68,%xmm0,%xmm5 1128 movups %xmm5,(%rax) 1129 shufps $78,%xmm2,%xmm3 1130 movups %xmm3,16(%rax) 1131 leaq 32(%rax),%rax 1132 jmp .Lkey_expansion_192b_warm 1133 1134.align 16 1135.Lkey_expansion_256a: 1136 movups %xmm2,(%rax) 1137 leaq 16(%rax),%rax 1138.Lkey_expansion_256a_cold: 1139 shufps $16,%xmm0,%xmm4 1140 xorps %xmm4,%xmm0 1141 shufps $140,%xmm0,%xmm4 1142 xorps %xmm4,%xmm0 1143 shufps $255,%xmm1,%xmm1 1144 xorps %xmm1,%xmm0 1145 .byte 0xf3,0xc3 1146 1147.align 16 1148.Lkey_expansion_256b: 1149 movups %xmm0,(%rax) 1150 leaq 16(%rax),%rax 1151 1152 shufps $16,%xmm2,%xmm4 1153 xorps %xmm4,%xmm2 1154 shufps $140,%xmm2,%xmm4 1155 xorps %xmm4,%xmm2 1156 shufps $170,%xmm1,%xmm1 1157 xorps %xmm1,%xmm2 1158 .byte 0xf3,0xc3 1159.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 1160.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 1161.align 64 1162.Lbswap_mask: 1163.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1164.Lincrement32: 1165.long 6,6,6,0 1166.Lincrement64: 1167.long 1,0,0,0 1168.Lincrement1: 1169.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 1170.Lkey_rotate: 1171.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 1172.Lkey_rotate192: 1173.long 0x04070605,0x04070605,0x04070605,0x04070605 1174.Lkey_rcon1: 1175.long 1,1,1,1 1176.Lkey_rcon1b: 1177.long 0x1b,0x1b,0x1b,0x1b 1178 1179.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1180.align 64 1181#endif 1182.section .note.GNU-stack,"",@progbits 1183