1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11.text 12 13.globl _GFp_aes_hw_encrypt 14.private_extern _GFp_aes_hw_encrypt 15 16.p2align 4 17_GFp_aes_hw_encrypt: 18 19 movups (%rdi),%xmm2 20 movl 240(%rdx),%eax 21 movups (%rdx),%xmm0 22 movups 16(%rdx),%xmm1 23 leaq 32(%rdx),%rdx 24 xorps %xmm0,%xmm2 25L$oop_enc1_1: 26.byte 102,15,56,220,209 27 decl %eax 28 movups (%rdx),%xmm1 29 leaq 16(%rdx),%rdx 30 jnz L$oop_enc1_1 31.byte 102,15,56,221,209 32 pxor %xmm0,%xmm0 33 pxor %xmm1,%xmm1 34 movups %xmm2,(%rsi) 35 pxor %xmm2,%xmm2 36 .byte 0xf3,0xc3 37 38 39 40.p2align 4 41_aesni_encrypt2: 42 43 movups (%rcx),%xmm0 44 shll $4,%eax 45 movups 16(%rcx),%xmm1 46 xorps %xmm0,%xmm2 47 xorps %xmm0,%xmm3 48 movups 32(%rcx),%xmm0 49 leaq 32(%rcx,%rax,1),%rcx 50 negq %rax 51 addq $16,%rax 52 53L$enc_loop2: 54.byte 102,15,56,220,209 55.byte 102,15,56,220,217 56 movups (%rcx,%rax,1),%xmm1 57 addq $32,%rax 58.byte 102,15,56,220,208 59.byte 102,15,56,220,216 60 movups -16(%rcx,%rax,1),%xmm0 61 jnz L$enc_loop2 62 63.byte 102,15,56,220,209 64.byte 102,15,56,220,217 65.byte 102,15,56,221,208 66.byte 102,15,56,221,216 67 .byte 0xf3,0xc3 68 69 70 71.p2align 4 72_aesni_encrypt3: 73 74 movups (%rcx),%xmm0 75 shll $4,%eax 76 movups 16(%rcx),%xmm1 77 xorps %xmm0,%xmm2 78 xorps %xmm0,%xmm3 79 xorps %xmm0,%xmm4 80 movups 32(%rcx),%xmm0 81 leaq 32(%rcx,%rax,1),%rcx 82 negq %rax 83 addq $16,%rax 84 85L$enc_loop3: 86.byte 102,15,56,220,209 87.byte 102,15,56,220,217 88.byte 102,15,56,220,225 89 movups (%rcx,%rax,1),%xmm1 90 addq $32,%rax 91.byte 102,15,56,220,208 92.byte 102,15,56,220,216 93.byte 102,15,56,220,224 94 movups -16(%rcx,%rax,1),%xmm0 95 jnz L$enc_loop3 96 97.byte 102,15,56,220,209 98.byte 102,15,56,220,217 99.byte 102,15,56,220,225 100.byte 102,15,56,221,208 101.byte 102,15,56,221,216 102.byte 102,15,56,221,224 103 .byte 0xf3,0xc3 104 105 106 107.p2align 4 108_aesni_encrypt4: 109 110 movups (%rcx),%xmm0 111 shll $4,%eax 112 movups 16(%rcx),%xmm1 113 xorps %xmm0,%xmm2 114 xorps %xmm0,%xmm3 115 xorps %xmm0,%xmm4 116 xorps %xmm0,%xmm5 117 movups 32(%rcx),%xmm0 118 leaq 32(%rcx,%rax,1),%rcx 119 negq %rax 120.byte 0x0f,0x1f,0x00 121 addq $16,%rax 122 123L$enc_loop4: 124.byte 102,15,56,220,209 125.byte 102,15,56,220,217 126.byte 102,15,56,220,225 127.byte 102,15,56,220,233 128 movups (%rcx,%rax,1),%xmm1 129 addq $32,%rax 130.byte 102,15,56,220,208 131.byte 102,15,56,220,216 132.byte 102,15,56,220,224 133.byte 102,15,56,220,232 134 movups -16(%rcx,%rax,1),%xmm0 135 jnz L$enc_loop4 136 137.byte 102,15,56,220,209 138.byte 102,15,56,220,217 139.byte 102,15,56,220,225 140.byte 102,15,56,220,233 141.byte 102,15,56,221,208 142.byte 102,15,56,221,216 143.byte 102,15,56,221,224 144.byte 102,15,56,221,232 145 .byte 0xf3,0xc3 146 147 148 149.p2align 4 150_aesni_encrypt6: 151 152 movups (%rcx),%xmm0 153 shll $4,%eax 154 movups 16(%rcx),%xmm1 155 xorps %xmm0,%xmm2 156 pxor %xmm0,%xmm3 157 pxor %xmm0,%xmm4 158.byte 102,15,56,220,209 159 leaq 32(%rcx,%rax,1),%rcx 160 negq %rax 161.byte 102,15,56,220,217 162 pxor %xmm0,%xmm5 163 pxor %xmm0,%xmm6 164.byte 102,15,56,220,225 165 pxor %xmm0,%xmm7 166 movups (%rcx,%rax,1),%xmm0 167 addq $16,%rax 168 jmp L$enc_loop6_enter 169.p2align 4 170L$enc_loop6: 171.byte 102,15,56,220,209 172.byte 102,15,56,220,217 173.byte 102,15,56,220,225 174L$enc_loop6_enter: 175.byte 102,15,56,220,233 176.byte 102,15,56,220,241 177.byte 102,15,56,220,249 178 movups (%rcx,%rax,1),%xmm1 179 addq $32,%rax 180.byte 102,15,56,220,208 181.byte 102,15,56,220,216 182.byte 102,15,56,220,224 183.byte 102,15,56,220,232 184.byte 102,15,56,220,240 185.byte 102,15,56,220,248 186 movups -16(%rcx,%rax,1),%xmm0 187 jnz L$enc_loop6 188 189.byte 102,15,56,220,209 190.byte 102,15,56,220,217 191.byte 102,15,56,220,225 192.byte 102,15,56,220,233 193.byte 102,15,56,220,241 194.byte 102,15,56,220,249 195.byte 102,15,56,221,208 196.byte 102,15,56,221,216 197.byte 102,15,56,221,224 198.byte 102,15,56,221,232 199.byte 102,15,56,221,240 200.byte 102,15,56,221,248 201 .byte 0xf3,0xc3 202 203 204 205.p2align 4 206_aesni_encrypt8: 207 208 movups (%rcx),%xmm0 209 shll $4,%eax 210 movups 16(%rcx),%xmm1 211 xorps %xmm0,%xmm2 212 xorps %xmm0,%xmm3 213 pxor %xmm0,%xmm4 214 pxor %xmm0,%xmm5 215 pxor %xmm0,%xmm6 216 leaq 32(%rcx,%rax,1),%rcx 217 negq %rax 218.byte 102,15,56,220,209 219 pxor %xmm0,%xmm7 220 pxor %xmm0,%xmm8 221.byte 102,15,56,220,217 222 pxor %xmm0,%xmm9 223 movups (%rcx,%rax,1),%xmm0 224 addq $16,%rax 225 jmp L$enc_loop8_inner 226.p2align 4 227L$enc_loop8: 228.byte 102,15,56,220,209 229.byte 102,15,56,220,217 230L$enc_loop8_inner: 231.byte 102,15,56,220,225 232.byte 102,15,56,220,233 233.byte 102,15,56,220,241 234.byte 102,15,56,220,249 235.byte 102,68,15,56,220,193 236.byte 102,68,15,56,220,201 237L$enc_loop8_enter: 238 movups (%rcx,%rax,1),%xmm1 239 addq $32,%rax 240.byte 102,15,56,220,208 241.byte 102,15,56,220,216 242.byte 102,15,56,220,224 243.byte 102,15,56,220,232 244.byte 102,15,56,220,240 245.byte 102,15,56,220,248 246.byte 102,68,15,56,220,192 247.byte 102,68,15,56,220,200 248 movups -16(%rcx,%rax,1),%xmm0 249 jnz L$enc_loop8 250 251.byte 102,15,56,220,209 252.byte 102,15,56,220,217 253.byte 102,15,56,220,225 254.byte 102,15,56,220,233 255.byte 102,15,56,220,241 256.byte 102,15,56,220,249 257.byte 102,68,15,56,220,193 258.byte 102,68,15,56,220,201 259.byte 102,15,56,221,208 260.byte 102,15,56,221,216 261.byte 102,15,56,221,224 262.byte 102,15,56,221,232 263.byte 102,15,56,221,240 264.byte 102,15,56,221,248 265.byte 102,68,15,56,221,192 266.byte 102,68,15,56,221,200 267 .byte 0xf3,0xc3 268 269 270.globl _GFp_aes_hw_ctr32_encrypt_blocks 271.private_extern _GFp_aes_hw_ctr32_encrypt_blocks 272 273.p2align 4 274_GFp_aes_hw_ctr32_encrypt_blocks: 275 276 cmpq $1,%rdx 277 jne L$ctr32_bulk 278 279 280 281 movups (%r8),%xmm2 282 movups (%rdi),%xmm3 283 movl 240(%rcx),%edx 284 movups (%rcx),%xmm0 285 movups 16(%rcx),%xmm1 286 leaq 32(%rcx),%rcx 287 xorps %xmm0,%xmm2 288L$oop_enc1_2: 289.byte 102,15,56,220,209 290 decl %edx 291 movups (%rcx),%xmm1 292 leaq 16(%rcx),%rcx 293 jnz L$oop_enc1_2 294.byte 102,15,56,221,209 295 pxor %xmm0,%xmm0 296 pxor %xmm1,%xmm1 297 xorps %xmm3,%xmm2 298 pxor %xmm3,%xmm3 299 movups %xmm2,(%rsi) 300 xorps %xmm2,%xmm2 301 jmp L$ctr32_epilogue 302 303.p2align 4 304L$ctr32_bulk: 305 leaq (%rsp),%r11 306 307 pushq %rbp 308 309 subq $128,%rsp 310 andq $-16,%rsp 311 312 313 314 315 movdqu (%r8),%xmm2 316 movdqu (%rcx),%xmm0 317 movl 12(%r8),%r8d 318 pxor %xmm0,%xmm2 319 movl 12(%rcx),%ebp 320 movdqa %xmm2,0(%rsp) 321 bswapl %r8d 322 movdqa %xmm2,%xmm3 323 movdqa %xmm2,%xmm4 324 movdqa %xmm2,%xmm5 325 movdqa %xmm2,64(%rsp) 326 movdqa %xmm2,80(%rsp) 327 movdqa %xmm2,96(%rsp) 328 movq %rdx,%r10 329 movdqa %xmm2,112(%rsp) 330 331 leaq 1(%r8),%rax 332 leaq 2(%r8),%rdx 333 bswapl %eax 334 bswapl %edx 335 xorl %ebp,%eax 336 xorl %ebp,%edx 337.byte 102,15,58,34,216,3 338 leaq 3(%r8),%rax 339 movdqa %xmm3,16(%rsp) 340.byte 102,15,58,34,226,3 341 bswapl %eax 342 movq %r10,%rdx 343 leaq 4(%r8),%r10 344 movdqa %xmm4,32(%rsp) 345 xorl %ebp,%eax 346 bswapl %r10d 347.byte 102,15,58,34,232,3 348 xorl %ebp,%r10d 349 movdqa %xmm5,48(%rsp) 350 leaq 5(%r8),%r9 351 movl %r10d,64+12(%rsp) 352 bswapl %r9d 353 leaq 6(%r8),%r10 354 movl 240(%rcx),%eax 355 xorl %ebp,%r9d 356 bswapl %r10d 357 movl %r9d,80+12(%rsp) 358 xorl %ebp,%r10d 359 leaq 7(%r8),%r9 360 movl %r10d,96+12(%rsp) 361 bswapl %r9d 362 leaq _GFp_ia32cap_P(%rip),%r10 363 movl 4(%r10),%r10d 364 xorl %ebp,%r9d 365 andl $71303168,%r10d 366 movl %r9d,112+12(%rsp) 367 368 movups 16(%rcx),%xmm1 369 370 movdqa 64(%rsp),%xmm6 371 movdqa 80(%rsp),%xmm7 372 373 cmpq $8,%rdx 374 jb L$ctr32_tail 375 376 subq $6,%rdx 377 cmpl $4194304,%r10d 378 je L$ctr32_6x 379 380 leaq 128(%rcx),%rcx 381 subq $2,%rdx 382 jmp L$ctr32_loop8 383 384.p2align 4 385L$ctr32_6x: 386 shll $4,%eax 387 movl $48,%r10d 388 bswapl %ebp 389 leaq 32(%rcx,%rax,1),%rcx 390 subq %rax,%r10 391 jmp L$ctr32_loop6 392 393.p2align 4 394L$ctr32_loop6: 395 addl $6,%r8d 396 movups -48(%rcx,%r10,1),%xmm0 397.byte 102,15,56,220,209 398 movl %r8d,%eax 399 xorl %ebp,%eax 400.byte 102,15,56,220,217 401.byte 0x0f,0x38,0xf1,0x44,0x24,12 402 leal 1(%r8),%eax 403.byte 102,15,56,220,225 404 xorl %ebp,%eax 405.byte 0x0f,0x38,0xf1,0x44,0x24,28 406.byte 102,15,56,220,233 407 leal 2(%r8),%eax 408 xorl %ebp,%eax 409.byte 102,15,56,220,241 410.byte 0x0f,0x38,0xf1,0x44,0x24,44 411 leal 3(%r8),%eax 412.byte 102,15,56,220,249 413 movups -32(%rcx,%r10,1),%xmm1 414 xorl %ebp,%eax 415 416.byte 102,15,56,220,208 417.byte 0x0f,0x38,0xf1,0x44,0x24,60 418 leal 4(%r8),%eax 419.byte 102,15,56,220,216 420 xorl %ebp,%eax 421.byte 0x0f,0x38,0xf1,0x44,0x24,76 422.byte 102,15,56,220,224 423 leal 5(%r8),%eax 424 xorl %ebp,%eax 425.byte 102,15,56,220,232 426.byte 0x0f,0x38,0xf1,0x44,0x24,92 427 movq %r10,%rax 428.byte 102,15,56,220,240 429.byte 102,15,56,220,248 430 movups -16(%rcx,%r10,1),%xmm0 431 432 call L$enc_loop6 433 434 movdqu (%rdi),%xmm8 435 movdqu 16(%rdi),%xmm9 436 movdqu 32(%rdi),%xmm10 437 movdqu 48(%rdi),%xmm11 438 movdqu 64(%rdi),%xmm12 439 movdqu 80(%rdi),%xmm13 440 leaq 96(%rdi),%rdi 441 movups -64(%rcx,%r10,1),%xmm1 442 pxor %xmm2,%xmm8 443 movaps 0(%rsp),%xmm2 444 pxor %xmm3,%xmm9 445 movaps 16(%rsp),%xmm3 446 pxor %xmm4,%xmm10 447 movaps 32(%rsp),%xmm4 448 pxor %xmm5,%xmm11 449 movaps 48(%rsp),%xmm5 450 pxor %xmm6,%xmm12 451 movaps 64(%rsp),%xmm6 452 pxor %xmm7,%xmm13 453 movaps 80(%rsp),%xmm7 454 movdqu %xmm8,(%rsi) 455 movdqu %xmm9,16(%rsi) 456 movdqu %xmm10,32(%rsi) 457 movdqu %xmm11,48(%rsi) 458 movdqu %xmm12,64(%rsi) 459 movdqu %xmm13,80(%rsi) 460 leaq 96(%rsi),%rsi 461 462 subq $6,%rdx 463 jnc L$ctr32_loop6 464 465 addq $6,%rdx 466 jz L$ctr32_done 467 468 leal -48(%r10),%eax 469 leaq -80(%rcx,%r10,1),%rcx 470 negl %eax 471 shrl $4,%eax 472 jmp L$ctr32_tail 473 474.p2align 5 475L$ctr32_loop8: 476 addl $8,%r8d 477 movdqa 96(%rsp),%xmm8 478.byte 102,15,56,220,209 479 movl %r8d,%r9d 480 movdqa 112(%rsp),%xmm9 481.byte 102,15,56,220,217 482 bswapl %r9d 483 movups 32-128(%rcx),%xmm0 484.byte 102,15,56,220,225 485 xorl %ebp,%r9d 486 nop 487.byte 102,15,56,220,233 488 movl %r9d,0+12(%rsp) 489 leaq 1(%r8),%r9 490.byte 102,15,56,220,241 491.byte 102,15,56,220,249 492.byte 102,68,15,56,220,193 493.byte 102,68,15,56,220,201 494 movups 48-128(%rcx),%xmm1 495 bswapl %r9d 496.byte 102,15,56,220,208 497.byte 102,15,56,220,216 498 xorl %ebp,%r9d 499.byte 0x66,0x90 500.byte 102,15,56,220,224 501.byte 102,15,56,220,232 502 movl %r9d,16+12(%rsp) 503 leaq 2(%r8),%r9 504.byte 102,15,56,220,240 505.byte 102,15,56,220,248 506.byte 102,68,15,56,220,192 507.byte 102,68,15,56,220,200 508 movups 64-128(%rcx),%xmm0 509 bswapl %r9d 510.byte 102,15,56,220,209 511.byte 102,15,56,220,217 512 xorl %ebp,%r9d 513.byte 0x66,0x90 514.byte 102,15,56,220,225 515.byte 102,15,56,220,233 516 movl %r9d,32+12(%rsp) 517 leaq 3(%r8),%r9 518.byte 102,15,56,220,241 519.byte 102,15,56,220,249 520.byte 102,68,15,56,220,193 521.byte 102,68,15,56,220,201 522 movups 80-128(%rcx),%xmm1 523 bswapl %r9d 524.byte 102,15,56,220,208 525.byte 102,15,56,220,216 526 xorl %ebp,%r9d 527.byte 0x66,0x90 528.byte 102,15,56,220,224 529.byte 102,15,56,220,232 530 movl %r9d,48+12(%rsp) 531 leaq 4(%r8),%r9 532.byte 102,15,56,220,240 533.byte 102,15,56,220,248 534.byte 102,68,15,56,220,192 535.byte 102,68,15,56,220,200 536 movups 96-128(%rcx),%xmm0 537 bswapl %r9d 538.byte 102,15,56,220,209 539.byte 102,15,56,220,217 540 xorl %ebp,%r9d 541.byte 0x66,0x90 542.byte 102,15,56,220,225 543.byte 102,15,56,220,233 544 movl %r9d,64+12(%rsp) 545 leaq 5(%r8),%r9 546.byte 102,15,56,220,241 547.byte 102,15,56,220,249 548.byte 102,68,15,56,220,193 549.byte 102,68,15,56,220,201 550 movups 112-128(%rcx),%xmm1 551 bswapl %r9d 552.byte 102,15,56,220,208 553.byte 102,15,56,220,216 554 xorl %ebp,%r9d 555.byte 0x66,0x90 556.byte 102,15,56,220,224 557.byte 102,15,56,220,232 558 movl %r9d,80+12(%rsp) 559 leaq 6(%r8),%r9 560.byte 102,15,56,220,240 561.byte 102,15,56,220,248 562.byte 102,68,15,56,220,192 563.byte 102,68,15,56,220,200 564 movups 128-128(%rcx),%xmm0 565 bswapl %r9d 566.byte 102,15,56,220,209 567.byte 102,15,56,220,217 568 xorl %ebp,%r9d 569.byte 0x66,0x90 570.byte 102,15,56,220,225 571.byte 102,15,56,220,233 572 movl %r9d,96+12(%rsp) 573 leaq 7(%r8),%r9 574.byte 102,15,56,220,241 575.byte 102,15,56,220,249 576.byte 102,68,15,56,220,193 577.byte 102,68,15,56,220,201 578 movups 144-128(%rcx),%xmm1 579 bswapl %r9d 580.byte 102,15,56,220,208 581.byte 102,15,56,220,216 582.byte 102,15,56,220,224 583 xorl %ebp,%r9d 584 movdqu 0(%rdi),%xmm10 585.byte 102,15,56,220,232 586 movl %r9d,112+12(%rsp) 587 cmpl $11,%eax 588.byte 102,15,56,220,240 589.byte 102,15,56,220,248 590.byte 102,68,15,56,220,192 591.byte 102,68,15,56,220,200 592 movups 160-128(%rcx),%xmm0 593 594 jb L$ctr32_enc_done 595 596.byte 102,15,56,220,209 597.byte 102,15,56,220,217 598.byte 102,15,56,220,225 599.byte 102,15,56,220,233 600.byte 102,15,56,220,241 601.byte 102,15,56,220,249 602.byte 102,68,15,56,220,193 603.byte 102,68,15,56,220,201 604 movups 176-128(%rcx),%xmm1 605 606.byte 102,15,56,220,208 607.byte 102,15,56,220,216 608.byte 102,15,56,220,224 609.byte 102,15,56,220,232 610.byte 102,15,56,220,240 611.byte 102,15,56,220,248 612.byte 102,68,15,56,220,192 613.byte 102,68,15,56,220,200 614 movups 192-128(%rcx),%xmm0 615 616 617 618.byte 102,15,56,220,209 619.byte 102,15,56,220,217 620.byte 102,15,56,220,225 621.byte 102,15,56,220,233 622.byte 102,15,56,220,241 623.byte 102,15,56,220,249 624.byte 102,68,15,56,220,193 625.byte 102,68,15,56,220,201 626 movups 208-128(%rcx),%xmm1 627 628.byte 102,15,56,220,208 629.byte 102,15,56,220,216 630.byte 102,15,56,220,224 631.byte 102,15,56,220,232 632.byte 102,15,56,220,240 633.byte 102,15,56,220,248 634.byte 102,68,15,56,220,192 635.byte 102,68,15,56,220,200 636 movups 224-128(%rcx),%xmm0 637 jmp L$ctr32_enc_done 638 639.p2align 4 640L$ctr32_enc_done: 641 movdqu 16(%rdi),%xmm11 642 pxor %xmm0,%xmm10 643 movdqu 32(%rdi),%xmm12 644 pxor %xmm0,%xmm11 645 movdqu 48(%rdi),%xmm13 646 pxor %xmm0,%xmm12 647 movdqu 64(%rdi),%xmm14 648 pxor %xmm0,%xmm13 649 movdqu 80(%rdi),%xmm15 650 pxor %xmm0,%xmm14 651 pxor %xmm0,%xmm15 652.byte 102,15,56,220,209 653.byte 102,15,56,220,217 654.byte 102,15,56,220,225 655.byte 102,15,56,220,233 656.byte 102,15,56,220,241 657.byte 102,15,56,220,249 658.byte 102,68,15,56,220,193 659.byte 102,68,15,56,220,201 660 movdqu 96(%rdi),%xmm1 661 leaq 128(%rdi),%rdi 662 663.byte 102,65,15,56,221,210 664 pxor %xmm0,%xmm1 665 movdqu 112-128(%rdi),%xmm10 666.byte 102,65,15,56,221,219 667 pxor %xmm0,%xmm10 668 movdqa 0(%rsp),%xmm11 669.byte 102,65,15,56,221,228 670.byte 102,65,15,56,221,237 671 movdqa 16(%rsp),%xmm12 672 movdqa 32(%rsp),%xmm13 673.byte 102,65,15,56,221,246 674.byte 102,65,15,56,221,255 675 movdqa 48(%rsp),%xmm14 676 movdqa 64(%rsp),%xmm15 677.byte 102,68,15,56,221,193 678 movdqa 80(%rsp),%xmm0 679 movups 16-128(%rcx),%xmm1 680.byte 102,69,15,56,221,202 681 682 movups %xmm2,(%rsi) 683 movdqa %xmm11,%xmm2 684 movups %xmm3,16(%rsi) 685 movdqa %xmm12,%xmm3 686 movups %xmm4,32(%rsi) 687 movdqa %xmm13,%xmm4 688 movups %xmm5,48(%rsi) 689 movdqa %xmm14,%xmm5 690 movups %xmm6,64(%rsi) 691 movdqa %xmm15,%xmm6 692 movups %xmm7,80(%rsi) 693 movdqa %xmm0,%xmm7 694 movups %xmm8,96(%rsi) 695 movups %xmm9,112(%rsi) 696 leaq 128(%rsi),%rsi 697 698 subq $8,%rdx 699 jnc L$ctr32_loop8 700 701 addq $8,%rdx 702 jz L$ctr32_done 703 leaq -128(%rcx),%rcx 704 705L$ctr32_tail: 706 707 708 leaq 16(%rcx),%rcx 709 cmpq $4,%rdx 710 jb L$ctr32_loop3 711 je L$ctr32_loop4 712 713 714 shll $4,%eax 715 movdqa 96(%rsp),%xmm8 716 pxor %xmm9,%xmm9 717 718 movups 16(%rcx),%xmm0 719.byte 102,15,56,220,209 720.byte 102,15,56,220,217 721 leaq 32-16(%rcx,%rax,1),%rcx 722 negq %rax 723.byte 102,15,56,220,225 724 addq $16,%rax 725 movups (%rdi),%xmm10 726.byte 102,15,56,220,233 727.byte 102,15,56,220,241 728 movups 16(%rdi),%xmm11 729 movups 32(%rdi),%xmm12 730.byte 102,15,56,220,249 731.byte 102,68,15,56,220,193 732 733 call L$enc_loop8_enter 734 735 movdqu 48(%rdi),%xmm13 736 pxor %xmm10,%xmm2 737 movdqu 64(%rdi),%xmm10 738 pxor %xmm11,%xmm3 739 movdqu %xmm2,(%rsi) 740 pxor %xmm12,%xmm4 741 movdqu %xmm3,16(%rsi) 742 pxor %xmm13,%xmm5 743 movdqu %xmm4,32(%rsi) 744 pxor %xmm10,%xmm6 745 movdqu %xmm5,48(%rsi) 746 movdqu %xmm6,64(%rsi) 747 cmpq $6,%rdx 748 jb L$ctr32_done 749 750 movups 80(%rdi),%xmm11 751 xorps %xmm11,%xmm7 752 movups %xmm7,80(%rsi) 753 je L$ctr32_done 754 755 movups 96(%rdi),%xmm12 756 xorps %xmm12,%xmm8 757 movups %xmm8,96(%rsi) 758 jmp L$ctr32_done 759 760.p2align 5 761L$ctr32_loop4: 762.byte 102,15,56,220,209 763 leaq 16(%rcx),%rcx 764 decl %eax 765.byte 102,15,56,220,217 766.byte 102,15,56,220,225 767.byte 102,15,56,220,233 768 movups (%rcx),%xmm1 769 jnz L$ctr32_loop4 770.byte 102,15,56,221,209 771.byte 102,15,56,221,217 772 movups (%rdi),%xmm10 773 movups 16(%rdi),%xmm11 774.byte 102,15,56,221,225 775.byte 102,15,56,221,233 776 movups 32(%rdi),%xmm12 777 movups 48(%rdi),%xmm13 778 779 xorps %xmm10,%xmm2 780 movups %xmm2,(%rsi) 781 xorps %xmm11,%xmm3 782 movups %xmm3,16(%rsi) 783 pxor %xmm12,%xmm4 784 movdqu %xmm4,32(%rsi) 785 pxor %xmm13,%xmm5 786 movdqu %xmm5,48(%rsi) 787 jmp L$ctr32_done 788 789.p2align 5 790L$ctr32_loop3: 791.byte 102,15,56,220,209 792 leaq 16(%rcx),%rcx 793 decl %eax 794.byte 102,15,56,220,217 795.byte 102,15,56,220,225 796 movups (%rcx),%xmm1 797 jnz L$ctr32_loop3 798.byte 102,15,56,221,209 799.byte 102,15,56,221,217 800.byte 102,15,56,221,225 801 802 movups (%rdi),%xmm10 803 xorps %xmm10,%xmm2 804 movups %xmm2,(%rsi) 805 cmpq $2,%rdx 806 jb L$ctr32_done 807 808 movups 16(%rdi),%xmm11 809 xorps %xmm11,%xmm3 810 movups %xmm3,16(%rsi) 811 je L$ctr32_done 812 813 movups 32(%rdi),%xmm12 814 xorps %xmm12,%xmm4 815 movups %xmm4,32(%rsi) 816 817L$ctr32_done: 818 xorps %xmm0,%xmm0 819 xorl %ebp,%ebp 820 pxor %xmm1,%xmm1 821 pxor %xmm2,%xmm2 822 pxor %xmm3,%xmm3 823 pxor %xmm4,%xmm4 824 pxor %xmm5,%xmm5 825 pxor %xmm6,%xmm6 826 pxor %xmm7,%xmm7 827 movaps %xmm0,0(%rsp) 828 pxor %xmm8,%xmm8 829 movaps %xmm0,16(%rsp) 830 pxor %xmm9,%xmm9 831 movaps %xmm0,32(%rsp) 832 pxor %xmm10,%xmm10 833 movaps %xmm0,48(%rsp) 834 pxor %xmm11,%xmm11 835 movaps %xmm0,64(%rsp) 836 pxor %xmm12,%xmm12 837 movaps %xmm0,80(%rsp) 838 pxor %xmm13,%xmm13 839 movaps %xmm0,96(%rsp) 840 pxor %xmm14,%xmm14 841 movaps %xmm0,112(%rsp) 842 pxor %xmm15,%xmm15 843 movq -8(%r11),%rbp 844 845 leaq (%r11),%rsp 846 847L$ctr32_epilogue: 848 .byte 0xf3,0xc3 849 850 851.globl _GFp_aes_hw_set_encrypt_key 852.private_extern _GFp_aes_hw_set_encrypt_key 853 854.p2align 4 855_GFp_aes_hw_set_encrypt_key: 856__aesni_set_encrypt_key: 857 858.byte 0x48,0x83,0xEC,0x08 859 860 movq $-1,%rax 861 testq %rdi,%rdi 862 jz L$enc_key_ret 863 testq %rdx,%rdx 864 jz L$enc_key_ret 865 866 movups (%rdi),%xmm0 867 xorps %xmm4,%xmm4 868 leaq _GFp_ia32cap_P(%rip),%r10 869 movl 4(%r10),%r10d 870 andl $268437504,%r10d 871 leaq 16(%rdx),%rax 872 cmpl $256,%esi 873 je L$14rounds 874 875 cmpl $128,%esi 876 jne L$bad_keybits 877 878L$10rounds: 879 movl $9,%esi 880 cmpl $268435456,%r10d 881 je L$10rounds_alt 882 883 movups %xmm0,(%rdx) 884.byte 102,15,58,223,200,1 885 call L$key_expansion_128_cold 886.byte 102,15,58,223,200,2 887 call L$key_expansion_128 888.byte 102,15,58,223,200,4 889 call L$key_expansion_128 890.byte 102,15,58,223,200,8 891 call L$key_expansion_128 892.byte 102,15,58,223,200,16 893 call L$key_expansion_128 894.byte 102,15,58,223,200,32 895 call L$key_expansion_128 896.byte 102,15,58,223,200,64 897 call L$key_expansion_128 898.byte 102,15,58,223,200,128 899 call L$key_expansion_128 900.byte 102,15,58,223,200,27 901 call L$key_expansion_128 902.byte 102,15,58,223,200,54 903 call L$key_expansion_128 904 movups %xmm0,(%rax) 905 movl %esi,80(%rax) 906 xorl %eax,%eax 907 jmp L$enc_key_ret 908 909.p2align 4 910L$10rounds_alt: 911 movdqa L$key_rotate(%rip),%xmm5 912 movl $8,%r10d 913 movdqa L$key_rcon1(%rip),%xmm4 914 movdqa %xmm0,%xmm2 915 movdqu %xmm0,(%rdx) 916 jmp L$oop_key128 917 918.p2align 4 919L$oop_key128: 920.byte 102,15,56,0,197 921.byte 102,15,56,221,196 922 pslld $1,%xmm4 923 leaq 16(%rax),%rax 924 925 movdqa %xmm2,%xmm3 926 pslldq $4,%xmm2 927 pxor %xmm2,%xmm3 928 pslldq $4,%xmm2 929 pxor %xmm2,%xmm3 930 pslldq $4,%xmm2 931 pxor %xmm3,%xmm2 932 933 pxor %xmm2,%xmm0 934 movdqu %xmm0,-16(%rax) 935 movdqa %xmm0,%xmm2 936 937 decl %r10d 938 jnz L$oop_key128 939 940 movdqa L$key_rcon1b(%rip),%xmm4 941 942.byte 102,15,56,0,197 943.byte 102,15,56,221,196 944 pslld $1,%xmm4 945 946 movdqa %xmm2,%xmm3 947 pslldq $4,%xmm2 948 pxor %xmm2,%xmm3 949 pslldq $4,%xmm2 950 pxor %xmm2,%xmm3 951 pslldq $4,%xmm2 952 pxor %xmm3,%xmm2 953 954 pxor %xmm2,%xmm0 955 movdqu %xmm0,(%rax) 956 957 movdqa %xmm0,%xmm2 958.byte 102,15,56,0,197 959.byte 102,15,56,221,196 960 961 movdqa %xmm2,%xmm3 962 pslldq $4,%xmm2 963 pxor %xmm2,%xmm3 964 pslldq $4,%xmm2 965 pxor %xmm2,%xmm3 966 pslldq $4,%xmm2 967 pxor %xmm3,%xmm2 968 969 pxor %xmm2,%xmm0 970 movdqu %xmm0,16(%rax) 971 972 movl %esi,96(%rax) 973 xorl %eax,%eax 974 jmp L$enc_key_ret 975 976 977 978.p2align 4 979L$14rounds: 980 movups 16(%rdi),%xmm2 981 movl $13,%esi 982 leaq 16(%rax),%rax 983 cmpl $268435456,%r10d 984 je L$14rounds_alt 985 986 movups %xmm0,(%rdx) 987 movups %xmm2,16(%rdx) 988.byte 102,15,58,223,202,1 989 call L$key_expansion_256a_cold 990.byte 102,15,58,223,200,1 991 call L$key_expansion_256b 992.byte 102,15,58,223,202,2 993 call L$key_expansion_256a 994.byte 102,15,58,223,200,2 995 call L$key_expansion_256b 996.byte 102,15,58,223,202,4 997 call L$key_expansion_256a 998.byte 102,15,58,223,200,4 999 call L$key_expansion_256b 1000.byte 102,15,58,223,202,8 1001 call L$key_expansion_256a 1002.byte 102,15,58,223,200,8 1003 call L$key_expansion_256b 1004.byte 102,15,58,223,202,16 1005 call L$key_expansion_256a 1006.byte 102,15,58,223,200,16 1007 call L$key_expansion_256b 1008.byte 102,15,58,223,202,32 1009 call L$key_expansion_256a 1010.byte 102,15,58,223,200,32 1011 call L$key_expansion_256b 1012.byte 102,15,58,223,202,64 1013 call L$key_expansion_256a 1014 movups %xmm0,(%rax) 1015 movl %esi,16(%rax) 1016 xorq %rax,%rax 1017 jmp L$enc_key_ret 1018 1019.p2align 4 1020L$14rounds_alt: 1021 movdqa L$key_rotate(%rip),%xmm5 1022 movdqa L$key_rcon1(%rip),%xmm4 1023 movl $7,%r10d 1024 movdqu %xmm0,0(%rdx) 1025 movdqa %xmm2,%xmm1 1026 movdqu %xmm2,16(%rdx) 1027 jmp L$oop_key256 1028 1029.p2align 4 1030L$oop_key256: 1031.byte 102,15,56,0,213 1032.byte 102,15,56,221,212 1033 1034 movdqa %xmm0,%xmm3 1035 pslldq $4,%xmm0 1036 pxor %xmm0,%xmm3 1037 pslldq $4,%xmm0 1038 pxor %xmm0,%xmm3 1039 pslldq $4,%xmm0 1040 pxor %xmm3,%xmm0 1041 pslld $1,%xmm4 1042 1043 pxor %xmm2,%xmm0 1044 movdqu %xmm0,(%rax) 1045 1046 decl %r10d 1047 jz L$done_key256 1048 1049 pshufd $0xff,%xmm0,%xmm2 1050 pxor %xmm3,%xmm3 1051.byte 102,15,56,221,211 1052 1053 movdqa %xmm1,%xmm3 1054 pslldq $4,%xmm1 1055 pxor %xmm1,%xmm3 1056 pslldq $4,%xmm1 1057 pxor %xmm1,%xmm3 1058 pslldq $4,%xmm1 1059 pxor %xmm3,%xmm1 1060 1061 pxor %xmm1,%xmm2 1062 movdqu %xmm2,16(%rax) 1063 leaq 32(%rax),%rax 1064 movdqa %xmm2,%xmm1 1065 1066 jmp L$oop_key256 1067 1068L$done_key256: 1069 movl %esi,16(%rax) 1070 xorl %eax,%eax 1071 jmp L$enc_key_ret 1072 1073.p2align 4 1074L$bad_keybits: 1075 movq $-2,%rax 1076L$enc_key_ret: 1077 pxor %xmm0,%xmm0 1078 pxor %xmm1,%xmm1 1079 pxor %xmm2,%xmm2 1080 pxor %xmm3,%xmm3 1081 pxor %xmm4,%xmm4 1082 pxor %xmm5,%xmm5 1083 addq $8,%rsp 1084 1085 .byte 0xf3,0xc3 1086 1087L$SEH_end_GFp_set_encrypt_key: 1088 1089.p2align 4 1090L$key_expansion_128: 1091 movups %xmm0,(%rax) 1092 leaq 16(%rax),%rax 1093L$key_expansion_128_cold: 1094 shufps $16,%xmm0,%xmm4 1095 xorps %xmm4,%xmm0 1096 shufps $140,%xmm0,%xmm4 1097 xorps %xmm4,%xmm0 1098 shufps $255,%xmm1,%xmm1 1099 xorps %xmm1,%xmm0 1100 .byte 0xf3,0xc3 1101 1102.p2align 4 1103L$key_expansion_192a: 1104 movups %xmm0,(%rax) 1105 leaq 16(%rax),%rax 1106L$key_expansion_192a_cold: 1107 movaps %xmm2,%xmm5 1108L$key_expansion_192b_warm: 1109 shufps $16,%xmm0,%xmm4 1110 movdqa %xmm2,%xmm3 1111 xorps %xmm4,%xmm0 1112 shufps $140,%xmm0,%xmm4 1113 pslldq $4,%xmm3 1114 xorps %xmm4,%xmm0 1115 pshufd $85,%xmm1,%xmm1 1116 pxor %xmm3,%xmm2 1117 pxor %xmm1,%xmm0 1118 pshufd $255,%xmm0,%xmm3 1119 pxor %xmm3,%xmm2 1120 .byte 0xf3,0xc3 1121 1122.p2align 4 1123L$key_expansion_192b: 1124 movaps %xmm0,%xmm3 1125 shufps $68,%xmm0,%xmm5 1126 movups %xmm5,(%rax) 1127 shufps $78,%xmm2,%xmm3 1128 movups %xmm3,16(%rax) 1129 leaq 32(%rax),%rax 1130 jmp L$key_expansion_192b_warm 1131 1132.p2align 4 1133L$key_expansion_256a: 1134 movups %xmm2,(%rax) 1135 leaq 16(%rax),%rax 1136L$key_expansion_256a_cold: 1137 shufps $16,%xmm0,%xmm4 1138 xorps %xmm4,%xmm0 1139 shufps $140,%xmm0,%xmm4 1140 xorps %xmm4,%xmm0 1141 shufps $255,%xmm1,%xmm1 1142 xorps %xmm1,%xmm0 1143 .byte 0xf3,0xc3 1144 1145.p2align 4 1146L$key_expansion_256b: 1147 movups %xmm0,(%rax) 1148 leaq 16(%rax),%rax 1149 1150 shufps $16,%xmm2,%xmm4 1151 xorps %xmm4,%xmm2 1152 shufps $140,%xmm2,%xmm4 1153 xorps %xmm4,%xmm2 1154 shufps $170,%xmm1,%xmm1 1155 xorps %xmm1,%xmm2 1156 .byte 0xf3,0xc3 1157 1158 1159.p2align 6 1160L$bswap_mask: 1161.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1162L$increment32: 1163.long 6,6,6,0 1164L$increment64: 1165.long 1,0,0,0 1166L$increment1: 1167.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 1168L$key_rotate: 1169.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 1170L$key_rotate192: 1171.long 0x04070605,0x04070605,0x04070605,0x04070605 1172L$key_rcon1: 1173.long 1,1,1,1 1174L$key_rcon1b: 1175.long 0x1b,0x1b,0x1b,0x1b 1176 1177.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1178.p2align 6 1179#endif 1180