1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13 14.globl _aes_hw_encrypt 15.private_extern _aes_hw_encrypt 16 17.p2align 4 18_aes_hw_encrypt: 19 20 movups (%rdi),%xmm2 21 movl 240(%rdx),%eax 22 movups (%rdx),%xmm0 23 movups 16(%rdx),%xmm1 24 leaq 32(%rdx),%rdx 25 xorps %xmm0,%xmm2 26L$oop_enc1_1: 27.byte 102,15,56,220,209 28 decl %eax 29 movups (%rdx),%xmm1 30 leaq 16(%rdx),%rdx 31 jnz L$oop_enc1_1 32.byte 102,15,56,221,209 33 pxor %xmm0,%xmm0 34 pxor %xmm1,%xmm1 35 movups %xmm2,(%rsi) 36 pxor %xmm2,%xmm2 37 .byte 0xf3,0xc3 38 39 40 41.p2align 4 42_aesni_encrypt2: 43 44 movups (%rcx),%xmm0 45 shll $4,%eax 46 movups 16(%rcx),%xmm1 47 xorps %xmm0,%xmm2 48 xorps %xmm0,%xmm3 49 movups 32(%rcx),%xmm0 50 leaq 32(%rcx,%rax,1),%rcx 51 negq %rax 52 addq $16,%rax 53 54L$enc_loop2: 55.byte 102,15,56,220,209 56.byte 102,15,56,220,217 57 movups (%rcx,%rax,1),%xmm1 58 addq $32,%rax 59.byte 102,15,56,220,208 60.byte 102,15,56,220,216 61 movups -16(%rcx,%rax,1),%xmm0 62 jnz L$enc_loop2 63 64.byte 102,15,56,220,209 65.byte 102,15,56,220,217 66.byte 102,15,56,221,208 67.byte 102,15,56,221,216 68 .byte 0xf3,0xc3 69 70 71 72.p2align 4 73_aesni_encrypt3: 74 75 movups (%rcx),%xmm0 76 shll $4,%eax 77 movups 16(%rcx),%xmm1 78 xorps %xmm0,%xmm2 79 xorps %xmm0,%xmm3 80 xorps %xmm0,%xmm4 81 movups 32(%rcx),%xmm0 82 leaq 32(%rcx,%rax,1),%rcx 83 negq %rax 84 addq $16,%rax 85 86L$enc_loop3: 87.byte 102,15,56,220,209 88.byte 102,15,56,220,217 89.byte 102,15,56,220,225 90 movups (%rcx,%rax,1),%xmm1 91 addq $32,%rax 92.byte 102,15,56,220,208 93.byte 102,15,56,220,216 94.byte 102,15,56,220,224 95 movups -16(%rcx,%rax,1),%xmm0 96 jnz L$enc_loop3 97 98.byte 102,15,56,220,209 99.byte 102,15,56,220,217 100.byte 102,15,56,220,225 101.byte 102,15,56,221,208 102.byte 102,15,56,221,216 103.byte 102,15,56,221,224 104 .byte 0xf3,0xc3 105 106 107 108.p2align 4 109_aesni_encrypt4: 110 111 movups (%rcx),%xmm0 112 shll $4,%eax 113 movups 16(%rcx),%xmm1 114 xorps %xmm0,%xmm2 115 xorps %xmm0,%xmm3 116 xorps %xmm0,%xmm4 117 xorps %xmm0,%xmm5 118 movups 32(%rcx),%xmm0 119 leaq 32(%rcx,%rax,1),%rcx 120 negq %rax 121.byte 0x0f,0x1f,0x00 122 addq $16,%rax 123 124L$enc_loop4: 125.byte 102,15,56,220,209 126.byte 102,15,56,220,217 127.byte 102,15,56,220,225 128.byte 102,15,56,220,233 129 movups (%rcx,%rax,1),%xmm1 130 addq $32,%rax 131.byte 102,15,56,220,208 132.byte 102,15,56,220,216 133.byte 102,15,56,220,224 134.byte 102,15,56,220,232 135 movups -16(%rcx,%rax,1),%xmm0 136 jnz L$enc_loop4 137 138.byte 102,15,56,220,209 139.byte 102,15,56,220,217 140.byte 102,15,56,220,225 141.byte 102,15,56,220,233 142.byte 102,15,56,221,208 143.byte 102,15,56,221,216 144.byte 102,15,56,221,224 145.byte 102,15,56,221,232 146 .byte 0xf3,0xc3 147 148 149 150.p2align 4 151_aesni_encrypt6: 152 153 movups (%rcx),%xmm0 154 shll $4,%eax 155 movups 16(%rcx),%xmm1 156 xorps %xmm0,%xmm2 157 pxor %xmm0,%xmm3 158 pxor %xmm0,%xmm4 159.byte 102,15,56,220,209 160 leaq 32(%rcx,%rax,1),%rcx 161 negq %rax 162.byte 102,15,56,220,217 163 pxor %xmm0,%xmm5 164 pxor %xmm0,%xmm6 165.byte 102,15,56,220,225 166 pxor %xmm0,%xmm7 167 movups (%rcx,%rax,1),%xmm0 168 addq $16,%rax 169 jmp L$enc_loop6_enter 170.p2align 4 171L$enc_loop6: 172.byte 102,15,56,220,209 173.byte 102,15,56,220,217 174.byte 102,15,56,220,225 175L$enc_loop6_enter: 176.byte 102,15,56,220,233 177.byte 102,15,56,220,241 178.byte 102,15,56,220,249 179 movups (%rcx,%rax,1),%xmm1 180 addq $32,%rax 181.byte 102,15,56,220,208 182.byte 102,15,56,220,216 183.byte 102,15,56,220,224 184.byte 102,15,56,220,232 185.byte 102,15,56,220,240 186.byte 102,15,56,220,248 187 movups -16(%rcx,%rax,1),%xmm0 188 jnz L$enc_loop6 189 190.byte 102,15,56,220,209 191.byte 102,15,56,220,217 192.byte 102,15,56,220,225 193.byte 102,15,56,220,233 194.byte 102,15,56,220,241 195.byte 102,15,56,220,249 196.byte 102,15,56,221,208 197.byte 102,15,56,221,216 198.byte 102,15,56,221,224 199.byte 102,15,56,221,232 200.byte 102,15,56,221,240 201.byte 102,15,56,221,248 202 .byte 0xf3,0xc3 203 204 205 206.p2align 4 207_aesni_encrypt8: 208 209 movups (%rcx),%xmm0 210 shll $4,%eax 211 movups 16(%rcx),%xmm1 212 xorps %xmm0,%xmm2 213 xorps %xmm0,%xmm3 214 pxor %xmm0,%xmm4 215 pxor %xmm0,%xmm5 216 pxor %xmm0,%xmm6 217 leaq 32(%rcx,%rax,1),%rcx 218 negq %rax 219.byte 102,15,56,220,209 220 pxor %xmm0,%xmm7 221 pxor %xmm0,%xmm8 222.byte 102,15,56,220,217 223 pxor %xmm0,%xmm9 224 movups (%rcx,%rax,1),%xmm0 225 addq $16,%rax 226 jmp L$enc_loop8_inner 227.p2align 4 228L$enc_loop8: 229.byte 102,15,56,220,209 230.byte 102,15,56,220,217 231L$enc_loop8_inner: 232.byte 102,15,56,220,225 233.byte 102,15,56,220,233 234.byte 102,15,56,220,241 235.byte 102,15,56,220,249 236.byte 102,68,15,56,220,193 237.byte 102,68,15,56,220,201 238L$enc_loop8_enter: 239 movups (%rcx,%rax,1),%xmm1 240 addq $32,%rax 241.byte 102,15,56,220,208 242.byte 102,15,56,220,216 243.byte 102,15,56,220,224 244.byte 102,15,56,220,232 245.byte 102,15,56,220,240 246.byte 102,15,56,220,248 247.byte 102,68,15,56,220,192 248.byte 102,68,15,56,220,200 249 movups -16(%rcx,%rax,1),%xmm0 250 jnz L$enc_loop8 251 252.byte 102,15,56,220,209 253.byte 102,15,56,220,217 254.byte 102,15,56,220,225 255.byte 102,15,56,220,233 256.byte 102,15,56,220,241 257.byte 102,15,56,220,249 258.byte 102,68,15,56,220,193 259.byte 102,68,15,56,220,201 260.byte 102,15,56,221,208 261.byte 102,15,56,221,216 262.byte 102,15,56,221,224 263.byte 102,15,56,221,232 264.byte 102,15,56,221,240 265.byte 102,15,56,221,248 266.byte 102,68,15,56,221,192 267.byte 102,68,15,56,221,200 268 .byte 0xf3,0xc3 269 270 271.globl _aes_hw_ctr32_encrypt_blocks 272.private_extern _aes_hw_ctr32_encrypt_blocks 273 274.p2align 4 275_aes_hw_ctr32_encrypt_blocks: 276 277 cmpq $1,%rdx 278 jne L$ctr32_bulk 279 280 281 282 movups (%r8),%xmm2 283 movups (%rdi),%xmm3 284 movl 240(%rcx),%edx 285 movups (%rcx),%xmm0 286 movups 16(%rcx),%xmm1 287 leaq 32(%rcx),%rcx 288 xorps %xmm0,%xmm2 289L$oop_enc1_2: 290.byte 102,15,56,220,209 291 decl %edx 292 movups (%rcx),%xmm1 293 leaq 16(%rcx),%rcx 294 jnz L$oop_enc1_2 295.byte 102,15,56,221,209 296 pxor %xmm0,%xmm0 297 pxor %xmm1,%xmm1 298 xorps %xmm3,%xmm2 299 pxor %xmm3,%xmm3 300 movups %xmm2,(%rsi) 301 xorps %xmm2,%xmm2 302 jmp L$ctr32_epilogue 303 304.p2align 4 305L$ctr32_bulk: 306 leaq (%rsp),%r11 307 308 pushq %rbp 309 310 subq $128,%rsp 311 andq $-16,%rsp 312 313 314 315 316 movdqu (%r8),%xmm2 317 movdqu (%rcx),%xmm0 318 movl 12(%r8),%r8d 319 pxor %xmm0,%xmm2 320 movl 12(%rcx),%ebp 321 movdqa %xmm2,0(%rsp) 322 bswapl %r8d 323 movdqa %xmm2,%xmm3 324 movdqa %xmm2,%xmm4 325 movdqa %xmm2,%xmm5 326 movdqa %xmm2,64(%rsp) 327 movdqa %xmm2,80(%rsp) 328 movdqa %xmm2,96(%rsp) 329 movq %rdx,%r10 330 movdqa %xmm2,112(%rsp) 331 332 leaq 1(%r8),%rax 333 leaq 2(%r8),%rdx 334 bswapl %eax 335 bswapl %edx 336 xorl %ebp,%eax 337 xorl %ebp,%edx 338.byte 102,15,58,34,216,3 339 leaq 3(%r8),%rax 340 movdqa %xmm3,16(%rsp) 341.byte 102,15,58,34,226,3 342 bswapl %eax 343 movq %r10,%rdx 344 leaq 4(%r8),%r10 345 movdqa %xmm4,32(%rsp) 346 xorl %ebp,%eax 347 bswapl %r10d 348.byte 102,15,58,34,232,3 349 xorl %ebp,%r10d 350 movdqa %xmm5,48(%rsp) 351 leaq 5(%r8),%r9 352 movl %r10d,64+12(%rsp) 353 bswapl %r9d 354 leaq 6(%r8),%r10 355 movl 240(%rcx),%eax 356 xorl %ebp,%r9d 357 bswapl %r10d 358 movl %r9d,80+12(%rsp) 359 xorl %ebp,%r10d 360 leaq 7(%r8),%r9 361 movl %r10d,96+12(%rsp) 362 bswapl %r9d 363 leaq _OPENSSL_ia32cap_P(%rip),%r10 364 movl 4(%r10),%r10d 365 xorl %ebp,%r9d 366 andl $71303168,%r10d 367 movl %r9d,112+12(%rsp) 368 369 movups 16(%rcx),%xmm1 370 371 movdqa 64(%rsp),%xmm6 372 movdqa 80(%rsp),%xmm7 373 374 cmpq $8,%rdx 375 jb L$ctr32_tail 376 377 subq $6,%rdx 378 cmpl $4194304,%r10d 379 je L$ctr32_6x 380 381 leaq 128(%rcx),%rcx 382 subq $2,%rdx 383 jmp L$ctr32_loop8 384 385.p2align 4 386L$ctr32_6x: 387 shll $4,%eax 388 movl $48,%r10d 389 bswapl %ebp 390 leaq 32(%rcx,%rax,1),%rcx 391 subq %rax,%r10 392 jmp L$ctr32_loop6 393 394.p2align 4 395L$ctr32_loop6: 396 addl $6,%r8d 397 movups -48(%rcx,%r10,1),%xmm0 398.byte 102,15,56,220,209 399 movl %r8d,%eax 400 xorl %ebp,%eax 401.byte 102,15,56,220,217 402.byte 0x0f,0x38,0xf1,0x44,0x24,12 403 leal 1(%r8),%eax 404.byte 102,15,56,220,225 405 xorl %ebp,%eax 406.byte 0x0f,0x38,0xf1,0x44,0x24,28 407.byte 102,15,56,220,233 408 leal 2(%r8),%eax 409 xorl %ebp,%eax 410.byte 102,15,56,220,241 411.byte 0x0f,0x38,0xf1,0x44,0x24,44 412 leal 3(%r8),%eax 413.byte 102,15,56,220,249 414 movups -32(%rcx,%r10,1),%xmm1 415 xorl %ebp,%eax 416 417.byte 102,15,56,220,208 418.byte 0x0f,0x38,0xf1,0x44,0x24,60 419 leal 4(%r8),%eax 420.byte 102,15,56,220,216 421 xorl %ebp,%eax 422.byte 0x0f,0x38,0xf1,0x44,0x24,76 423.byte 102,15,56,220,224 424 leal 5(%r8),%eax 425 xorl %ebp,%eax 426.byte 102,15,56,220,232 427.byte 0x0f,0x38,0xf1,0x44,0x24,92 428 movq %r10,%rax 429.byte 102,15,56,220,240 430.byte 102,15,56,220,248 431 movups -16(%rcx,%r10,1),%xmm0 432 433 call L$enc_loop6 434 435 movdqu (%rdi),%xmm8 436 movdqu 16(%rdi),%xmm9 437 movdqu 32(%rdi),%xmm10 438 movdqu 48(%rdi),%xmm11 439 movdqu 64(%rdi),%xmm12 440 movdqu 80(%rdi),%xmm13 441 leaq 96(%rdi),%rdi 442 movups -64(%rcx,%r10,1),%xmm1 443 pxor %xmm2,%xmm8 444 movaps 0(%rsp),%xmm2 445 pxor %xmm3,%xmm9 446 movaps 16(%rsp),%xmm3 447 pxor %xmm4,%xmm10 448 movaps 32(%rsp),%xmm4 449 pxor %xmm5,%xmm11 450 movaps 48(%rsp),%xmm5 451 pxor %xmm6,%xmm12 452 movaps 64(%rsp),%xmm6 453 pxor %xmm7,%xmm13 454 movaps 80(%rsp),%xmm7 455 movdqu %xmm8,(%rsi) 456 movdqu %xmm9,16(%rsi) 457 movdqu %xmm10,32(%rsi) 458 movdqu %xmm11,48(%rsi) 459 movdqu %xmm12,64(%rsi) 460 movdqu %xmm13,80(%rsi) 461 leaq 96(%rsi),%rsi 462 463 subq $6,%rdx 464 jnc L$ctr32_loop6 465 466 addq $6,%rdx 467 jz L$ctr32_done 468 469 leal -48(%r10),%eax 470 leaq -80(%rcx,%r10,1),%rcx 471 negl %eax 472 shrl $4,%eax 473 jmp L$ctr32_tail 474 475.p2align 5 476L$ctr32_loop8: 477 addl $8,%r8d 478 movdqa 96(%rsp),%xmm8 479.byte 102,15,56,220,209 480 movl %r8d,%r9d 481 movdqa 112(%rsp),%xmm9 482.byte 102,15,56,220,217 483 bswapl %r9d 484 movups 32-128(%rcx),%xmm0 485.byte 102,15,56,220,225 486 xorl %ebp,%r9d 487 nop 488.byte 102,15,56,220,233 489 movl %r9d,0+12(%rsp) 490 leaq 1(%r8),%r9 491.byte 102,15,56,220,241 492.byte 102,15,56,220,249 493.byte 102,68,15,56,220,193 494.byte 102,68,15,56,220,201 495 movups 48-128(%rcx),%xmm1 496 bswapl %r9d 497.byte 102,15,56,220,208 498.byte 102,15,56,220,216 499 xorl %ebp,%r9d 500.byte 0x66,0x90 501.byte 102,15,56,220,224 502.byte 102,15,56,220,232 503 movl %r9d,16+12(%rsp) 504 leaq 2(%r8),%r9 505.byte 102,15,56,220,240 506.byte 102,15,56,220,248 507.byte 102,68,15,56,220,192 508.byte 102,68,15,56,220,200 509 movups 64-128(%rcx),%xmm0 510 bswapl %r9d 511.byte 102,15,56,220,209 512.byte 102,15,56,220,217 513 xorl %ebp,%r9d 514.byte 0x66,0x90 515.byte 102,15,56,220,225 516.byte 102,15,56,220,233 517 movl %r9d,32+12(%rsp) 518 leaq 3(%r8),%r9 519.byte 102,15,56,220,241 520.byte 102,15,56,220,249 521.byte 102,68,15,56,220,193 522.byte 102,68,15,56,220,201 523 movups 80-128(%rcx),%xmm1 524 bswapl %r9d 525.byte 102,15,56,220,208 526.byte 102,15,56,220,216 527 xorl %ebp,%r9d 528.byte 0x66,0x90 529.byte 102,15,56,220,224 530.byte 102,15,56,220,232 531 movl %r9d,48+12(%rsp) 532 leaq 4(%r8),%r9 533.byte 102,15,56,220,240 534.byte 102,15,56,220,248 535.byte 102,68,15,56,220,192 536.byte 102,68,15,56,220,200 537 movups 96-128(%rcx),%xmm0 538 bswapl %r9d 539.byte 102,15,56,220,209 540.byte 102,15,56,220,217 541 xorl %ebp,%r9d 542.byte 0x66,0x90 543.byte 102,15,56,220,225 544.byte 102,15,56,220,233 545 movl %r9d,64+12(%rsp) 546 leaq 5(%r8),%r9 547.byte 102,15,56,220,241 548.byte 102,15,56,220,249 549.byte 102,68,15,56,220,193 550.byte 102,68,15,56,220,201 551 movups 112-128(%rcx),%xmm1 552 bswapl %r9d 553.byte 102,15,56,220,208 554.byte 102,15,56,220,216 555 xorl %ebp,%r9d 556.byte 0x66,0x90 557.byte 102,15,56,220,224 558.byte 102,15,56,220,232 559 movl %r9d,80+12(%rsp) 560 leaq 6(%r8),%r9 561.byte 102,15,56,220,240 562.byte 102,15,56,220,248 563.byte 102,68,15,56,220,192 564.byte 102,68,15,56,220,200 565 movups 128-128(%rcx),%xmm0 566 bswapl %r9d 567.byte 102,15,56,220,209 568.byte 102,15,56,220,217 569 xorl %ebp,%r9d 570.byte 0x66,0x90 571.byte 102,15,56,220,225 572.byte 102,15,56,220,233 573 movl %r9d,96+12(%rsp) 574 leaq 7(%r8),%r9 575.byte 102,15,56,220,241 576.byte 102,15,56,220,249 577.byte 102,68,15,56,220,193 578.byte 102,68,15,56,220,201 579 movups 144-128(%rcx),%xmm1 580 bswapl %r9d 581.byte 102,15,56,220,208 582.byte 102,15,56,220,216 583.byte 102,15,56,220,224 584 xorl %ebp,%r9d 585 movdqu 0(%rdi),%xmm10 586.byte 102,15,56,220,232 587 movl %r9d,112+12(%rsp) 588 cmpl $11,%eax 589.byte 102,15,56,220,240 590.byte 102,15,56,220,248 591.byte 102,68,15,56,220,192 592.byte 102,68,15,56,220,200 593 movups 160-128(%rcx),%xmm0 594 595 jb L$ctr32_enc_done 596 597.byte 102,15,56,220,209 598.byte 102,15,56,220,217 599.byte 102,15,56,220,225 600.byte 102,15,56,220,233 601.byte 102,15,56,220,241 602.byte 102,15,56,220,249 603.byte 102,68,15,56,220,193 604.byte 102,68,15,56,220,201 605 movups 176-128(%rcx),%xmm1 606 607.byte 102,15,56,220,208 608.byte 102,15,56,220,216 609.byte 102,15,56,220,224 610.byte 102,15,56,220,232 611.byte 102,15,56,220,240 612.byte 102,15,56,220,248 613.byte 102,68,15,56,220,192 614.byte 102,68,15,56,220,200 615 movups 192-128(%rcx),%xmm0 616 617 618 619.byte 102,15,56,220,209 620.byte 102,15,56,220,217 621.byte 102,15,56,220,225 622.byte 102,15,56,220,233 623.byte 102,15,56,220,241 624.byte 102,15,56,220,249 625.byte 102,68,15,56,220,193 626.byte 102,68,15,56,220,201 627 movups 208-128(%rcx),%xmm1 628 629.byte 102,15,56,220,208 630.byte 102,15,56,220,216 631.byte 102,15,56,220,224 632.byte 102,15,56,220,232 633.byte 102,15,56,220,240 634.byte 102,15,56,220,248 635.byte 102,68,15,56,220,192 636.byte 102,68,15,56,220,200 637 movups 224-128(%rcx),%xmm0 638 jmp L$ctr32_enc_done 639 640.p2align 4 641L$ctr32_enc_done: 642 movdqu 16(%rdi),%xmm11 643 pxor %xmm0,%xmm10 644 movdqu 32(%rdi),%xmm12 645 pxor %xmm0,%xmm11 646 movdqu 48(%rdi),%xmm13 647 pxor %xmm0,%xmm12 648 movdqu 64(%rdi),%xmm14 649 pxor %xmm0,%xmm13 650 movdqu 80(%rdi),%xmm15 651 pxor %xmm0,%xmm14 652 pxor %xmm0,%xmm15 653.byte 102,15,56,220,209 654.byte 102,15,56,220,217 655.byte 102,15,56,220,225 656.byte 102,15,56,220,233 657.byte 102,15,56,220,241 658.byte 102,15,56,220,249 659.byte 102,68,15,56,220,193 660.byte 102,68,15,56,220,201 661 movdqu 96(%rdi),%xmm1 662 leaq 128(%rdi),%rdi 663 664.byte 102,65,15,56,221,210 665 pxor %xmm0,%xmm1 666 movdqu 112-128(%rdi),%xmm10 667.byte 102,65,15,56,221,219 668 pxor %xmm0,%xmm10 669 movdqa 0(%rsp),%xmm11 670.byte 102,65,15,56,221,228 671.byte 102,65,15,56,221,237 672 movdqa 16(%rsp),%xmm12 673 movdqa 32(%rsp),%xmm13 674.byte 102,65,15,56,221,246 675.byte 102,65,15,56,221,255 676 movdqa 48(%rsp),%xmm14 677 movdqa 64(%rsp),%xmm15 678.byte 102,68,15,56,221,193 679 movdqa 80(%rsp),%xmm0 680 movups 16-128(%rcx),%xmm1 681.byte 102,69,15,56,221,202 682 683 movups %xmm2,(%rsi) 684 movdqa %xmm11,%xmm2 685 movups %xmm3,16(%rsi) 686 movdqa %xmm12,%xmm3 687 movups %xmm4,32(%rsi) 688 movdqa %xmm13,%xmm4 689 movups %xmm5,48(%rsi) 690 movdqa %xmm14,%xmm5 691 movups %xmm6,64(%rsi) 692 movdqa %xmm15,%xmm6 693 movups %xmm7,80(%rsi) 694 movdqa %xmm0,%xmm7 695 movups %xmm8,96(%rsi) 696 movups %xmm9,112(%rsi) 697 leaq 128(%rsi),%rsi 698 699 subq $8,%rdx 700 jnc L$ctr32_loop8 701 702 addq $8,%rdx 703 jz L$ctr32_done 704 leaq -128(%rcx),%rcx 705 706L$ctr32_tail: 707 708 709 leaq 16(%rcx),%rcx 710 cmpq $4,%rdx 711 jb L$ctr32_loop3 712 je L$ctr32_loop4 713 714 715 shll $4,%eax 716 movdqa 96(%rsp),%xmm8 717 pxor %xmm9,%xmm9 718 719 movups 16(%rcx),%xmm0 720.byte 102,15,56,220,209 721.byte 102,15,56,220,217 722 leaq 32-16(%rcx,%rax,1),%rcx 723 negq %rax 724.byte 102,15,56,220,225 725 addq $16,%rax 726 movups (%rdi),%xmm10 727.byte 102,15,56,220,233 728.byte 102,15,56,220,241 729 movups 16(%rdi),%xmm11 730 movups 32(%rdi),%xmm12 731.byte 102,15,56,220,249 732.byte 102,68,15,56,220,193 733 734 call L$enc_loop8_enter 735 736 movdqu 48(%rdi),%xmm13 737 pxor %xmm10,%xmm2 738 movdqu 64(%rdi),%xmm10 739 pxor %xmm11,%xmm3 740 movdqu %xmm2,(%rsi) 741 pxor %xmm12,%xmm4 742 movdqu %xmm3,16(%rsi) 743 pxor %xmm13,%xmm5 744 movdqu %xmm4,32(%rsi) 745 pxor %xmm10,%xmm6 746 movdqu %xmm5,48(%rsi) 747 movdqu %xmm6,64(%rsi) 748 cmpq $6,%rdx 749 jb L$ctr32_done 750 751 movups 80(%rdi),%xmm11 752 xorps %xmm11,%xmm7 753 movups %xmm7,80(%rsi) 754 je L$ctr32_done 755 756 movups 96(%rdi),%xmm12 757 xorps %xmm12,%xmm8 758 movups %xmm8,96(%rsi) 759 jmp L$ctr32_done 760 761.p2align 5 762L$ctr32_loop4: 763.byte 102,15,56,220,209 764 leaq 16(%rcx),%rcx 765 decl %eax 766.byte 102,15,56,220,217 767.byte 102,15,56,220,225 768.byte 102,15,56,220,233 769 movups (%rcx),%xmm1 770 jnz L$ctr32_loop4 771.byte 102,15,56,221,209 772.byte 102,15,56,221,217 773 movups (%rdi),%xmm10 774 movups 16(%rdi),%xmm11 775.byte 102,15,56,221,225 776.byte 102,15,56,221,233 777 movups 32(%rdi),%xmm12 778 movups 48(%rdi),%xmm13 779 780 xorps %xmm10,%xmm2 781 movups %xmm2,(%rsi) 782 xorps %xmm11,%xmm3 783 movups %xmm3,16(%rsi) 784 pxor %xmm12,%xmm4 785 movdqu %xmm4,32(%rsi) 786 pxor %xmm13,%xmm5 787 movdqu %xmm5,48(%rsi) 788 jmp L$ctr32_done 789 790.p2align 5 791L$ctr32_loop3: 792.byte 102,15,56,220,209 793 leaq 16(%rcx),%rcx 794 decl %eax 795.byte 102,15,56,220,217 796.byte 102,15,56,220,225 797 movups (%rcx),%xmm1 798 jnz L$ctr32_loop3 799.byte 102,15,56,221,209 800.byte 102,15,56,221,217 801.byte 102,15,56,221,225 802 803 movups (%rdi),%xmm10 804 xorps %xmm10,%xmm2 805 movups %xmm2,(%rsi) 806 cmpq $2,%rdx 807 jb L$ctr32_done 808 809 movups 16(%rdi),%xmm11 810 xorps %xmm11,%xmm3 811 movups %xmm3,16(%rsi) 812 je L$ctr32_done 813 814 movups 32(%rdi),%xmm12 815 xorps %xmm12,%xmm4 816 movups %xmm4,32(%rsi) 817 818L$ctr32_done: 819 xorps %xmm0,%xmm0 820 xorl %ebp,%ebp 821 pxor %xmm1,%xmm1 822 pxor %xmm2,%xmm2 823 pxor %xmm3,%xmm3 824 pxor %xmm4,%xmm4 825 pxor %xmm5,%xmm5 826 pxor %xmm6,%xmm6 827 pxor %xmm7,%xmm7 828 movaps %xmm0,0(%rsp) 829 pxor %xmm8,%xmm8 830 movaps %xmm0,16(%rsp) 831 pxor %xmm9,%xmm9 832 movaps %xmm0,32(%rsp) 833 pxor %xmm10,%xmm10 834 movaps %xmm0,48(%rsp) 835 pxor %xmm11,%xmm11 836 movaps %xmm0,64(%rsp) 837 pxor %xmm12,%xmm12 838 movaps %xmm0,80(%rsp) 839 pxor %xmm13,%xmm13 840 movaps %xmm0,96(%rsp) 841 pxor %xmm14,%xmm14 842 movaps %xmm0,112(%rsp) 843 pxor %xmm15,%xmm15 844 movq -8(%r11),%rbp 845 846 leaq (%r11),%rsp 847 848L$ctr32_epilogue: 849 .byte 0xf3,0xc3 850 851 852.globl _aes_hw_set_encrypt_key 853.private_extern _aes_hw_set_encrypt_key 854 855.p2align 4 856_aes_hw_set_encrypt_key: 857__aesni_set_encrypt_key: 858 859.byte 0x48,0x83,0xEC,0x08 860 861 movq $-1,%rax 862 testq %rdi,%rdi 863 jz L$enc_key_ret 864 testq %rdx,%rdx 865 jz L$enc_key_ret 866 867 movups (%rdi),%xmm0 868 xorps %xmm4,%xmm4 869 leaq _OPENSSL_ia32cap_P(%rip),%r10 870 movl 4(%r10),%r10d 871 andl $268437504,%r10d 872 leaq 16(%rdx),%rax 873 cmpl $256,%esi 874 je L$14rounds 875 876 cmpl $128,%esi 877 jne L$bad_keybits 878 879L$10rounds: 880 movl $9,%esi 881 cmpl $268435456,%r10d 882 je L$10rounds_alt 883 884 movups %xmm0,(%rdx) 885.byte 102,15,58,223,200,1 886 call L$key_expansion_128_cold 887.byte 102,15,58,223,200,2 888 call L$key_expansion_128 889.byte 102,15,58,223,200,4 890 call L$key_expansion_128 891.byte 102,15,58,223,200,8 892 call L$key_expansion_128 893.byte 102,15,58,223,200,16 894 call L$key_expansion_128 895.byte 102,15,58,223,200,32 896 call L$key_expansion_128 897.byte 102,15,58,223,200,64 898 call L$key_expansion_128 899.byte 102,15,58,223,200,128 900 call L$key_expansion_128 901.byte 102,15,58,223,200,27 902 call L$key_expansion_128 903.byte 102,15,58,223,200,54 904 call L$key_expansion_128 905 movups %xmm0,(%rax) 906 movl %esi,80(%rax) 907 xorl %eax,%eax 908 jmp L$enc_key_ret 909 910.p2align 4 911L$10rounds_alt: 912 movdqa L$key_rotate(%rip),%xmm5 913 movl $8,%r10d 914 movdqa L$key_rcon1(%rip),%xmm4 915 movdqa %xmm0,%xmm2 916 movdqu %xmm0,(%rdx) 917 jmp L$oop_key128 918 919.p2align 4 920L$oop_key128: 921.byte 102,15,56,0,197 922.byte 102,15,56,221,196 923 pslld $1,%xmm4 924 leaq 16(%rax),%rax 925 926 movdqa %xmm2,%xmm3 927 pslldq $4,%xmm2 928 pxor %xmm2,%xmm3 929 pslldq $4,%xmm2 930 pxor %xmm2,%xmm3 931 pslldq $4,%xmm2 932 pxor %xmm3,%xmm2 933 934 pxor %xmm2,%xmm0 935 movdqu %xmm0,-16(%rax) 936 movdqa %xmm0,%xmm2 937 938 decl %r10d 939 jnz L$oop_key128 940 941 movdqa L$key_rcon1b(%rip),%xmm4 942 943.byte 102,15,56,0,197 944.byte 102,15,56,221,196 945 pslld $1,%xmm4 946 947 movdqa %xmm2,%xmm3 948 pslldq $4,%xmm2 949 pxor %xmm2,%xmm3 950 pslldq $4,%xmm2 951 pxor %xmm2,%xmm3 952 pslldq $4,%xmm2 953 pxor %xmm3,%xmm2 954 955 pxor %xmm2,%xmm0 956 movdqu %xmm0,(%rax) 957 958 movdqa %xmm0,%xmm2 959.byte 102,15,56,0,197 960.byte 102,15,56,221,196 961 962 movdqa %xmm2,%xmm3 963 pslldq $4,%xmm2 964 pxor %xmm2,%xmm3 965 pslldq $4,%xmm2 966 pxor %xmm2,%xmm3 967 pslldq $4,%xmm2 968 pxor %xmm3,%xmm2 969 970 pxor %xmm2,%xmm0 971 movdqu %xmm0,16(%rax) 972 973 movl %esi,96(%rax) 974 xorl %eax,%eax 975 jmp L$enc_key_ret 976 977 978 979.p2align 4 980L$14rounds: 981 movups 16(%rdi),%xmm2 982 movl $13,%esi 983 leaq 16(%rax),%rax 984 cmpl $268435456,%r10d 985 je L$14rounds_alt 986 987 movups %xmm0,(%rdx) 988 movups %xmm2,16(%rdx) 989.byte 102,15,58,223,202,1 990 call L$key_expansion_256a_cold 991.byte 102,15,58,223,200,1 992 call L$key_expansion_256b 993.byte 102,15,58,223,202,2 994 call L$key_expansion_256a 995.byte 102,15,58,223,200,2 996 call L$key_expansion_256b 997.byte 102,15,58,223,202,4 998 call L$key_expansion_256a 999.byte 102,15,58,223,200,4 1000 call L$key_expansion_256b 1001.byte 102,15,58,223,202,8 1002 call L$key_expansion_256a 1003.byte 102,15,58,223,200,8 1004 call L$key_expansion_256b 1005.byte 102,15,58,223,202,16 1006 call L$key_expansion_256a 1007.byte 102,15,58,223,200,16 1008 call L$key_expansion_256b 1009.byte 102,15,58,223,202,32 1010 call L$key_expansion_256a 1011.byte 102,15,58,223,200,32 1012 call L$key_expansion_256b 1013.byte 102,15,58,223,202,64 1014 call L$key_expansion_256a 1015 movups %xmm0,(%rax) 1016 movl %esi,16(%rax) 1017 xorq %rax,%rax 1018 jmp L$enc_key_ret 1019 1020.p2align 4 1021L$14rounds_alt: 1022 movdqa L$key_rotate(%rip),%xmm5 1023 movdqa L$key_rcon1(%rip),%xmm4 1024 movl $7,%r10d 1025 movdqu %xmm0,0(%rdx) 1026 movdqa %xmm2,%xmm1 1027 movdqu %xmm2,16(%rdx) 1028 jmp L$oop_key256 1029 1030.p2align 4 1031L$oop_key256: 1032.byte 102,15,56,0,213 1033.byte 102,15,56,221,212 1034 1035 movdqa %xmm0,%xmm3 1036 pslldq $4,%xmm0 1037 pxor %xmm0,%xmm3 1038 pslldq $4,%xmm0 1039 pxor %xmm0,%xmm3 1040 pslldq $4,%xmm0 1041 pxor %xmm3,%xmm0 1042 pslld $1,%xmm4 1043 1044 pxor %xmm2,%xmm0 1045 movdqu %xmm0,(%rax) 1046 1047 decl %r10d 1048 jz L$done_key256 1049 1050 pshufd $0xff,%xmm0,%xmm2 1051 pxor %xmm3,%xmm3 1052.byte 102,15,56,221,211 1053 1054 movdqa %xmm1,%xmm3 1055 pslldq $4,%xmm1 1056 pxor %xmm1,%xmm3 1057 pslldq $4,%xmm1 1058 pxor %xmm1,%xmm3 1059 pslldq $4,%xmm1 1060 pxor %xmm3,%xmm1 1061 1062 pxor %xmm1,%xmm2 1063 movdqu %xmm2,16(%rax) 1064 leaq 32(%rax),%rax 1065 movdqa %xmm2,%xmm1 1066 1067 jmp L$oop_key256 1068 1069L$done_key256: 1070 movl %esi,16(%rax) 1071 xorl %eax,%eax 1072 jmp L$enc_key_ret 1073 1074.p2align 4 1075L$bad_keybits: 1076 movq $-2,%rax 1077L$enc_key_ret: 1078 pxor %xmm0,%xmm0 1079 pxor %xmm1,%xmm1 1080 pxor %xmm2,%xmm2 1081 pxor %xmm3,%xmm3 1082 pxor %xmm4,%xmm4 1083 pxor %xmm5,%xmm5 1084 addq $8,%rsp 1085 1086 .byte 0xf3,0xc3 1087 1088L$SEH_end_set_encrypt_key: 1089 1090.p2align 4 1091L$key_expansion_128: 1092 movups %xmm0,(%rax) 1093 leaq 16(%rax),%rax 1094L$key_expansion_128_cold: 1095 shufps $16,%xmm0,%xmm4 1096 xorps %xmm4,%xmm0 1097 shufps $140,%xmm0,%xmm4 1098 xorps %xmm4,%xmm0 1099 shufps $255,%xmm1,%xmm1 1100 xorps %xmm1,%xmm0 1101 .byte 0xf3,0xc3 1102 1103.p2align 4 1104L$key_expansion_192a: 1105 movups %xmm0,(%rax) 1106 leaq 16(%rax),%rax 1107L$key_expansion_192a_cold: 1108 movaps %xmm2,%xmm5 1109L$key_expansion_192b_warm: 1110 shufps $16,%xmm0,%xmm4 1111 movdqa %xmm2,%xmm3 1112 xorps %xmm4,%xmm0 1113 shufps $140,%xmm0,%xmm4 1114 pslldq $4,%xmm3 1115 xorps %xmm4,%xmm0 1116 pshufd $85,%xmm1,%xmm1 1117 pxor %xmm3,%xmm2 1118 pxor %xmm1,%xmm0 1119 pshufd $255,%xmm0,%xmm3 1120 pxor %xmm3,%xmm2 1121 .byte 0xf3,0xc3 1122 1123.p2align 4 1124L$key_expansion_192b: 1125 movaps %xmm0,%xmm3 1126 shufps $68,%xmm0,%xmm5 1127 movups %xmm5,(%rax) 1128 shufps $78,%xmm2,%xmm3 1129 movups %xmm3,16(%rax) 1130 leaq 32(%rax),%rax 1131 jmp L$key_expansion_192b_warm 1132 1133.p2align 4 1134L$key_expansion_256a: 1135 movups %xmm2,(%rax) 1136 leaq 16(%rax),%rax 1137L$key_expansion_256a_cold: 1138 shufps $16,%xmm0,%xmm4 1139 xorps %xmm4,%xmm0 1140 shufps $140,%xmm0,%xmm4 1141 xorps %xmm4,%xmm0 1142 shufps $255,%xmm1,%xmm1 1143 xorps %xmm1,%xmm0 1144 .byte 0xf3,0xc3 1145 1146.p2align 4 1147L$key_expansion_256b: 1148 movups %xmm0,(%rax) 1149 leaq 16(%rax),%rax 1150 1151 shufps $16,%xmm2,%xmm4 1152 xorps %xmm4,%xmm2 1153 shufps $140,%xmm2,%xmm4 1154 xorps %xmm4,%xmm2 1155 shufps $170,%xmm1,%xmm1 1156 xorps %xmm1,%xmm2 1157 .byte 0xf3,0xc3 1158 1159 1160.p2align 6 1161L$bswap_mask: 1162.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1163L$increment32: 1164.long 6,6,6,0 1165L$increment64: 1166.long 1,0,0,0 1167L$increment1: 1168.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 1169L$key_rotate: 1170.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 1171L$key_rotate192: 1172.long 0x04070605,0x04070605,0x04070605,0x04070605 1173L$key_rcon1: 1174.long 1,1,1,1 1175L$key_rcon1b: 1176.long 0x1b,0x1b,0x1b,0x1b 1177 1178.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1179.p2align 6 1180#endif 1181