1#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 2.text 3 4.globl _aesni_encrypt 5.private_extern _aesni_encrypt 6 7.p2align 4 8_aesni_encrypt: 9 movups (%rdi),%xmm2 10 movl 240(%rdx),%eax 11 movups (%rdx),%xmm0 12 movups 16(%rdx),%xmm1 13 leaq 32(%rdx),%rdx 14 xorps %xmm0,%xmm2 15L$oop_enc1_1: 16.byte 102,15,56,220,209 17 decl %eax 18 movups (%rdx),%xmm1 19 leaq 16(%rdx),%rdx 20 jnz L$oop_enc1_1 21.byte 102,15,56,221,209 22 pxor %xmm0,%xmm0 23 pxor %xmm1,%xmm1 24 movups %xmm2,(%rsi) 25 pxor %xmm2,%xmm2 26 .byte 0xf3,0xc3 27 28 29.globl _aesni_decrypt 30.private_extern _aesni_decrypt 31 32.p2align 4 33_aesni_decrypt: 34 movups (%rdi),%xmm2 35 movl 240(%rdx),%eax 36 movups (%rdx),%xmm0 37 movups 16(%rdx),%xmm1 38 leaq 32(%rdx),%rdx 39 xorps %xmm0,%xmm2 40L$oop_dec1_2: 41.byte 102,15,56,222,209 42 decl %eax 43 movups (%rdx),%xmm1 44 leaq 16(%rdx),%rdx 45 jnz L$oop_dec1_2 46.byte 102,15,56,223,209 47 pxor %xmm0,%xmm0 48 pxor %xmm1,%xmm1 49 movups %xmm2,(%rsi) 50 pxor %xmm2,%xmm2 51 .byte 0xf3,0xc3 52 53 54.p2align 4 55_aesni_encrypt2: 56 movups (%rcx),%xmm0 57 shll $4,%eax 58 movups 16(%rcx),%xmm1 59 xorps %xmm0,%xmm2 60 xorps %xmm0,%xmm3 61 movups 32(%rcx),%xmm0 62 leaq 32(%rcx,%rax,1),%rcx 63 negq %rax 64 addq $16,%rax 65 66L$enc_loop2: 67.byte 102,15,56,220,209 68.byte 102,15,56,220,217 69 movups (%rcx,%rax,1),%xmm1 70 addq $32,%rax 71.byte 102,15,56,220,208 72.byte 102,15,56,220,216 73 movups -16(%rcx,%rax,1),%xmm0 74 jnz L$enc_loop2 75 76.byte 102,15,56,220,209 77.byte 102,15,56,220,217 78.byte 102,15,56,221,208 79.byte 102,15,56,221,216 80 .byte 0xf3,0xc3 81 82 83.p2align 4 84_aesni_decrypt2: 85 movups (%rcx),%xmm0 86 shll $4,%eax 87 movups 16(%rcx),%xmm1 88 xorps %xmm0,%xmm2 89 xorps %xmm0,%xmm3 90 movups 32(%rcx),%xmm0 91 leaq 32(%rcx,%rax,1),%rcx 92 negq %rax 93 addq $16,%rax 94 95L$dec_loop2: 96.byte 102,15,56,222,209 97.byte 102,15,56,222,217 98 movups (%rcx,%rax,1),%xmm1 99 addq $32,%rax 100.byte 102,15,56,222,208 101.byte 102,15,56,222,216 102 movups -16(%rcx,%rax,1),%xmm0 103 jnz L$dec_loop2 104 105.byte 102,15,56,222,209 106.byte 102,15,56,222,217 107.byte 102,15,56,223,208 108.byte 102,15,56,223,216 109 .byte 0xf3,0xc3 110 111 112.p2align 4 113_aesni_encrypt3: 114 movups (%rcx),%xmm0 115 shll $4,%eax 116 movups 16(%rcx),%xmm1 117 xorps %xmm0,%xmm2 118 xorps %xmm0,%xmm3 119 xorps %xmm0,%xmm4 120 movups 32(%rcx),%xmm0 121 leaq 32(%rcx,%rax,1),%rcx 122 negq %rax 123 addq $16,%rax 124 125L$enc_loop3: 126.byte 102,15,56,220,209 127.byte 102,15,56,220,217 128.byte 102,15,56,220,225 129 movups (%rcx,%rax,1),%xmm1 130 addq $32,%rax 131.byte 102,15,56,220,208 132.byte 102,15,56,220,216 133.byte 102,15,56,220,224 134 movups -16(%rcx,%rax,1),%xmm0 135 jnz L$enc_loop3 136 137.byte 102,15,56,220,209 138.byte 102,15,56,220,217 139.byte 102,15,56,220,225 140.byte 102,15,56,221,208 141.byte 102,15,56,221,216 142.byte 102,15,56,221,224 143 .byte 0xf3,0xc3 144 145 146.p2align 4 147_aesni_decrypt3: 148 movups (%rcx),%xmm0 149 shll $4,%eax 150 movups 16(%rcx),%xmm1 151 xorps %xmm0,%xmm2 152 xorps %xmm0,%xmm3 153 xorps %xmm0,%xmm4 154 movups 32(%rcx),%xmm0 155 leaq 32(%rcx,%rax,1),%rcx 156 negq %rax 157 addq $16,%rax 158 159L$dec_loop3: 160.byte 102,15,56,222,209 161.byte 102,15,56,222,217 162.byte 102,15,56,222,225 163 movups (%rcx,%rax,1),%xmm1 164 addq $32,%rax 165.byte 102,15,56,222,208 166.byte 102,15,56,222,216 167.byte 102,15,56,222,224 168 movups -16(%rcx,%rax,1),%xmm0 169 jnz L$dec_loop3 170 171.byte 102,15,56,222,209 172.byte 102,15,56,222,217 173.byte 102,15,56,222,225 174.byte 102,15,56,223,208 175.byte 102,15,56,223,216 176.byte 102,15,56,223,224 177 .byte 0xf3,0xc3 178 179 180.p2align 4 181_aesni_encrypt4: 182 movups (%rcx),%xmm0 183 shll $4,%eax 184 movups 16(%rcx),%xmm1 185 xorps %xmm0,%xmm2 186 xorps %xmm0,%xmm3 187 xorps %xmm0,%xmm4 188 xorps %xmm0,%xmm5 189 movups 32(%rcx),%xmm0 190 leaq 32(%rcx,%rax,1),%rcx 191 negq %rax 192.byte 0x0f,0x1f,0x00 193 addq $16,%rax 194 195L$enc_loop4: 196.byte 102,15,56,220,209 197.byte 102,15,56,220,217 198.byte 102,15,56,220,225 199.byte 102,15,56,220,233 200 movups (%rcx,%rax,1),%xmm1 201 addq $32,%rax 202.byte 102,15,56,220,208 203.byte 102,15,56,220,216 204.byte 102,15,56,220,224 205.byte 102,15,56,220,232 206 movups -16(%rcx,%rax,1),%xmm0 207 jnz L$enc_loop4 208 209.byte 102,15,56,220,209 210.byte 102,15,56,220,217 211.byte 102,15,56,220,225 212.byte 102,15,56,220,233 213.byte 102,15,56,221,208 214.byte 102,15,56,221,216 215.byte 102,15,56,221,224 216.byte 102,15,56,221,232 217 .byte 0xf3,0xc3 218 219 220.p2align 4 221_aesni_decrypt4: 222 movups (%rcx),%xmm0 223 shll $4,%eax 224 movups 16(%rcx),%xmm1 225 xorps %xmm0,%xmm2 226 xorps %xmm0,%xmm3 227 xorps %xmm0,%xmm4 228 xorps %xmm0,%xmm5 229 movups 32(%rcx),%xmm0 230 leaq 32(%rcx,%rax,1),%rcx 231 negq %rax 232.byte 0x0f,0x1f,0x00 233 addq $16,%rax 234 235L$dec_loop4: 236.byte 102,15,56,222,209 237.byte 102,15,56,222,217 238.byte 102,15,56,222,225 239.byte 102,15,56,222,233 240 movups (%rcx,%rax,1),%xmm1 241 addq $32,%rax 242.byte 102,15,56,222,208 243.byte 102,15,56,222,216 244.byte 102,15,56,222,224 245.byte 102,15,56,222,232 246 movups -16(%rcx,%rax,1),%xmm0 247 jnz L$dec_loop4 248 249.byte 102,15,56,222,209 250.byte 102,15,56,222,217 251.byte 102,15,56,222,225 252.byte 102,15,56,222,233 253.byte 102,15,56,223,208 254.byte 102,15,56,223,216 255.byte 102,15,56,223,224 256.byte 102,15,56,223,232 257 .byte 0xf3,0xc3 258 259 260.p2align 4 261_aesni_encrypt6: 262 movups (%rcx),%xmm0 263 shll $4,%eax 264 movups 16(%rcx),%xmm1 265 xorps %xmm0,%xmm2 266 pxor %xmm0,%xmm3 267 pxor %xmm0,%xmm4 268.byte 102,15,56,220,209 269 leaq 32(%rcx,%rax,1),%rcx 270 negq %rax 271.byte 102,15,56,220,217 272 pxor %xmm0,%xmm5 273 pxor %xmm0,%xmm6 274.byte 102,15,56,220,225 275 pxor %xmm0,%xmm7 276 movups (%rcx,%rax,1),%xmm0 277 addq $16,%rax 278 jmp L$enc_loop6_enter 279.p2align 4 280L$enc_loop6: 281.byte 102,15,56,220,209 282.byte 102,15,56,220,217 283.byte 102,15,56,220,225 284L$enc_loop6_enter: 285.byte 102,15,56,220,233 286.byte 102,15,56,220,241 287.byte 102,15,56,220,249 288 movups (%rcx,%rax,1),%xmm1 289 addq $32,%rax 290.byte 102,15,56,220,208 291.byte 102,15,56,220,216 292.byte 102,15,56,220,224 293.byte 102,15,56,220,232 294.byte 102,15,56,220,240 295.byte 102,15,56,220,248 296 movups -16(%rcx,%rax,1),%xmm0 297 jnz L$enc_loop6 298 299.byte 102,15,56,220,209 300.byte 102,15,56,220,217 301.byte 102,15,56,220,225 302.byte 102,15,56,220,233 303.byte 102,15,56,220,241 304.byte 102,15,56,220,249 305.byte 102,15,56,221,208 306.byte 102,15,56,221,216 307.byte 102,15,56,221,224 308.byte 102,15,56,221,232 309.byte 102,15,56,221,240 310.byte 102,15,56,221,248 311 .byte 0xf3,0xc3 312 313 314.p2align 4 315_aesni_decrypt6: 316 movups (%rcx),%xmm0 317 shll $4,%eax 318 movups 16(%rcx),%xmm1 319 xorps %xmm0,%xmm2 320 pxor %xmm0,%xmm3 321 pxor %xmm0,%xmm4 322.byte 102,15,56,222,209 323 leaq 32(%rcx,%rax,1),%rcx 324 negq %rax 325.byte 102,15,56,222,217 326 pxor %xmm0,%xmm5 327 pxor %xmm0,%xmm6 328.byte 102,15,56,222,225 329 pxor %xmm0,%xmm7 330 movups (%rcx,%rax,1),%xmm0 331 addq $16,%rax 332 jmp L$dec_loop6_enter 333.p2align 4 334L$dec_loop6: 335.byte 102,15,56,222,209 336.byte 102,15,56,222,217 337.byte 102,15,56,222,225 338L$dec_loop6_enter: 339.byte 102,15,56,222,233 340.byte 102,15,56,222,241 341.byte 102,15,56,222,249 342 movups (%rcx,%rax,1),%xmm1 343 addq $32,%rax 344.byte 102,15,56,222,208 345.byte 102,15,56,222,216 346.byte 102,15,56,222,224 347.byte 102,15,56,222,232 348.byte 102,15,56,222,240 349.byte 102,15,56,222,248 350 movups -16(%rcx,%rax,1),%xmm0 351 jnz L$dec_loop6 352 353.byte 102,15,56,222,209 354.byte 102,15,56,222,217 355.byte 102,15,56,222,225 356.byte 102,15,56,222,233 357.byte 102,15,56,222,241 358.byte 102,15,56,222,249 359.byte 102,15,56,223,208 360.byte 102,15,56,223,216 361.byte 102,15,56,223,224 362.byte 102,15,56,223,232 363.byte 102,15,56,223,240 364.byte 102,15,56,223,248 365 .byte 0xf3,0xc3 366 367 368.p2align 4 369_aesni_encrypt8: 370 movups (%rcx),%xmm0 371 shll $4,%eax 372 movups 16(%rcx),%xmm1 373 xorps %xmm0,%xmm2 374 xorps %xmm0,%xmm3 375 pxor %xmm0,%xmm4 376 pxor %xmm0,%xmm5 377 pxor %xmm0,%xmm6 378 leaq 32(%rcx,%rax,1),%rcx 379 negq %rax 380.byte 102,15,56,220,209 381 pxor %xmm0,%xmm7 382 pxor %xmm0,%xmm8 383.byte 102,15,56,220,217 384 pxor %xmm0,%xmm9 385 movups (%rcx,%rax,1),%xmm0 386 addq $16,%rax 387 jmp L$enc_loop8_inner 388.p2align 4 389L$enc_loop8: 390.byte 102,15,56,220,209 391.byte 102,15,56,220,217 392L$enc_loop8_inner: 393.byte 102,15,56,220,225 394.byte 102,15,56,220,233 395.byte 102,15,56,220,241 396.byte 102,15,56,220,249 397.byte 102,68,15,56,220,193 398.byte 102,68,15,56,220,201 399L$enc_loop8_enter: 400 movups (%rcx,%rax,1),%xmm1 401 addq $32,%rax 402.byte 102,15,56,220,208 403.byte 102,15,56,220,216 404.byte 102,15,56,220,224 405.byte 102,15,56,220,232 406.byte 102,15,56,220,240 407.byte 102,15,56,220,248 408.byte 102,68,15,56,220,192 409.byte 102,68,15,56,220,200 410 movups -16(%rcx,%rax,1),%xmm0 411 jnz L$enc_loop8 412 413.byte 102,15,56,220,209 414.byte 102,15,56,220,217 415.byte 102,15,56,220,225 416.byte 102,15,56,220,233 417.byte 102,15,56,220,241 418.byte 102,15,56,220,249 419.byte 102,68,15,56,220,193 420.byte 102,68,15,56,220,201 421.byte 102,15,56,221,208 422.byte 102,15,56,221,216 423.byte 102,15,56,221,224 424.byte 102,15,56,221,232 425.byte 102,15,56,221,240 426.byte 102,15,56,221,248 427.byte 102,68,15,56,221,192 428.byte 102,68,15,56,221,200 429 .byte 0xf3,0xc3 430 431 432.p2align 4 433_aesni_decrypt8: 434 movups (%rcx),%xmm0 435 shll $4,%eax 436 movups 16(%rcx),%xmm1 437 xorps %xmm0,%xmm2 438 xorps %xmm0,%xmm3 439 pxor %xmm0,%xmm4 440 pxor %xmm0,%xmm5 441 pxor %xmm0,%xmm6 442 leaq 32(%rcx,%rax,1),%rcx 443 negq %rax 444.byte 102,15,56,222,209 445 pxor %xmm0,%xmm7 446 pxor %xmm0,%xmm8 447.byte 102,15,56,222,217 448 pxor %xmm0,%xmm9 449 movups (%rcx,%rax,1),%xmm0 450 addq $16,%rax 451 jmp L$dec_loop8_inner 452.p2align 4 453L$dec_loop8: 454.byte 102,15,56,222,209 455.byte 102,15,56,222,217 456L$dec_loop8_inner: 457.byte 102,15,56,222,225 458.byte 102,15,56,222,233 459.byte 102,15,56,222,241 460.byte 102,15,56,222,249 461.byte 102,68,15,56,222,193 462.byte 102,68,15,56,222,201 463L$dec_loop8_enter: 464 movups (%rcx,%rax,1),%xmm1 465 addq $32,%rax 466.byte 102,15,56,222,208 467.byte 102,15,56,222,216 468.byte 102,15,56,222,224 469.byte 102,15,56,222,232 470.byte 102,15,56,222,240 471.byte 102,15,56,222,248 472.byte 102,68,15,56,222,192 473.byte 102,68,15,56,222,200 474 movups -16(%rcx,%rax,1),%xmm0 475 jnz L$dec_loop8 476 477.byte 102,15,56,222,209 478.byte 102,15,56,222,217 479.byte 102,15,56,222,225 480.byte 102,15,56,222,233 481.byte 102,15,56,222,241 482.byte 102,15,56,222,249 483.byte 102,68,15,56,222,193 484.byte 102,68,15,56,222,201 485.byte 102,15,56,223,208 486.byte 102,15,56,223,216 487.byte 102,15,56,223,224 488.byte 102,15,56,223,232 489.byte 102,15,56,223,240 490.byte 102,15,56,223,248 491.byte 102,68,15,56,223,192 492.byte 102,68,15,56,223,200 493 .byte 0xf3,0xc3 494 495.globl _aesni_ecb_encrypt 496.private_extern _aesni_ecb_encrypt 497 498.p2align 4 499_aesni_ecb_encrypt: 500 andq $-16,%rdx 501 jz L$ecb_ret 502 503 movl 240(%rcx),%eax 504 movups (%rcx),%xmm0 505 movq %rcx,%r11 506 movl %eax,%r10d 507 testl %r8d,%r8d 508 jz L$ecb_decrypt 509 510 cmpq $0x80,%rdx 511 jb L$ecb_enc_tail 512 513 movdqu (%rdi),%xmm2 514 movdqu 16(%rdi),%xmm3 515 movdqu 32(%rdi),%xmm4 516 movdqu 48(%rdi),%xmm5 517 movdqu 64(%rdi),%xmm6 518 movdqu 80(%rdi),%xmm7 519 movdqu 96(%rdi),%xmm8 520 movdqu 112(%rdi),%xmm9 521 leaq 128(%rdi),%rdi 522 subq $0x80,%rdx 523 jmp L$ecb_enc_loop8_enter 524.p2align 4 525L$ecb_enc_loop8: 526 movups %xmm2,(%rsi) 527 movq %r11,%rcx 528 movdqu (%rdi),%xmm2 529 movl %r10d,%eax 530 movups %xmm3,16(%rsi) 531 movdqu 16(%rdi),%xmm3 532 movups %xmm4,32(%rsi) 533 movdqu 32(%rdi),%xmm4 534 movups %xmm5,48(%rsi) 535 movdqu 48(%rdi),%xmm5 536 movups %xmm6,64(%rsi) 537 movdqu 64(%rdi),%xmm6 538 movups %xmm7,80(%rsi) 539 movdqu 80(%rdi),%xmm7 540 movups %xmm8,96(%rsi) 541 movdqu 96(%rdi),%xmm8 542 movups %xmm9,112(%rsi) 543 leaq 128(%rsi),%rsi 544 movdqu 112(%rdi),%xmm9 545 leaq 128(%rdi),%rdi 546L$ecb_enc_loop8_enter: 547 548 call _aesni_encrypt8 549 550 subq $0x80,%rdx 551 jnc L$ecb_enc_loop8 552 553 movups %xmm2,(%rsi) 554 movq %r11,%rcx 555 movups %xmm3,16(%rsi) 556 movl %r10d,%eax 557 movups %xmm4,32(%rsi) 558 movups %xmm5,48(%rsi) 559 movups %xmm6,64(%rsi) 560 movups %xmm7,80(%rsi) 561 movups %xmm8,96(%rsi) 562 movups %xmm9,112(%rsi) 563 leaq 128(%rsi),%rsi 564 addq $0x80,%rdx 565 jz L$ecb_ret 566 567L$ecb_enc_tail: 568 movups (%rdi),%xmm2 569 cmpq $0x20,%rdx 570 jb L$ecb_enc_one 571 movups 16(%rdi),%xmm3 572 je L$ecb_enc_two 573 movups 32(%rdi),%xmm4 574 cmpq $0x40,%rdx 575 jb L$ecb_enc_three 576 movups 48(%rdi),%xmm5 577 je L$ecb_enc_four 578 movups 64(%rdi),%xmm6 579 cmpq $0x60,%rdx 580 jb L$ecb_enc_five 581 movups 80(%rdi),%xmm7 582 je L$ecb_enc_six 583 movdqu 96(%rdi),%xmm8 584 xorps %xmm9,%xmm9 585 call _aesni_encrypt8 586 movups %xmm2,(%rsi) 587 movups %xmm3,16(%rsi) 588 movups %xmm4,32(%rsi) 589 movups %xmm5,48(%rsi) 590 movups %xmm6,64(%rsi) 591 movups %xmm7,80(%rsi) 592 movups %xmm8,96(%rsi) 593 jmp L$ecb_ret 594.p2align 4 595L$ecb_enc_one: 596 movups (%rcx),%xmm0 597 movups 16(%rcx),%xmm1 598 leaq 32(%rcx),%rcx 599 xorps %xmm0,%xmm2 600L$oop_enc1_3: 601.byte 102,15,56,220,209 602 decl %eax 603 movups (%rcx),%xmm1 604 leaq 16(%rcx),%rcx 605 jnz L$oop_enc1_3 606.byte 102,15,56,221,209 607 movups %xmm2,(%rsi) 608 jmp L$ecb_ret 609.p2align 4 610L$ecb_enc_two: 611 call _aesni_encrypt2 612 movups %xmm2,(%rsi) 613 movups %xmm3,16(%rsi) 614 jmp L$ecb_ret 615.p2align 4 616L$ecb_enc_three: 617 call _aesni_encrypt3 618 movups %xmm2,(%rsi) 619 movups %xmm3,16(%rsi) 620 movups %xmm4,32(%rsi) 621 jmp L$ecb_ret 622.p2align 4 623L$ecb_enc_four: 624 call _aesni_encrypt4 625 movups %xmm2,(%rsi) 626 movups %xmm3,16(%rsi) 627 movups %xmm4,32(%rsi) 628 movups %xmm5,48(%rsi) 629 jmp L$ecb_ret 630.p2align 4 631L$ecb_enc_five: 632 xorps %xmm7,%xmm7 633 call _aesni_encrypt6 634 movups %xmm2,(%rsi) 635 movups %xmm3,16(%rsi) 636 movups %xmm4,32(%rsi) 637 movups %xmm5,48(%rsi) 638 movups %xmm6,64(%rsi) 639 jmp L$ecb_ret 640.p2align 4 641L$ecb_enc_six: 642 call _aesni_encrypt6 643 movups %xmm2,(%rsi) 644 movups %xmm3,16(%rsi) 645 movups %xmm4,32(%rsi) 646 movups %xmm5,48(%rsi) 647 movups %xmm6,64(%rsi) 648 movups %xmm7,80(%rsi) 649 jmp L$ecb_ret 650 651.p2align 4 652L$ecb_decrypt: 653 cmpq $0x80,%rdx 654 jb L$ecb_dec_tail 655 656 movdqu (%rdi),%xmm2 657 movdqu 16(%rdi),%xmm3 658 movdqu 32(%rdi),%xmm4 659 movdqu 48(%rdi),%xmm5 660 movdqu 64(%rdi),%xmm6 661 movdqu 80(%rdi),%xmm7 662 movdqu 96(%rdi),%xmm8 663 movdqu 112(%rdi),%xmm9 664 leaq 128(%rdi),%rdi 665 subq $0x80,%rdx 666 jmp L$ecb_dec_loop8_enter 667.p2align 4 668L$ecb_dec_loop8: 669 movups %xmm2,(%rsi) 670 movq %r11,%rcx 671 movdqu (%rdi),%xmm2 672 movl %r10d,%eax 673 movups %xmm3,16(%rsi) 674 movdqu 16(%rdi),%xmm3 675 movups %xmm4,32(%rsi) 676 movdqu 32(%rdi),%xmm4 677 movups %xmm5,48(%rsi) 678 movdqu 48(%rdi),%xmm5 679 movups %xmm6,64(%rsi) 680 movdqu 64(%rdi),%xmm6 681 movups %xmm7,80(%rsi) 682 movdqu 80(%rdi),%xmm7 683 movups %xmm8,96(%rsi) 684 movdqu 96(%rdi),%xmm8 685 movups %xmm9,112(%rsi) 686 leaq 128(%rsi),%rsi 687 movdqu 112(%rdi),%xmm9 688 leaq 128(%rdi),%rdi 689L$ecb_dec_loop8_enter: 690 691 call _aesni_decrypt8 692 693 movups (%r11),%xmm0 694 subq $0x80,%rdx 695 jnc L$ecb_dec_loop8 696 697 movups %xmm2,(%rsi) 698 pxor %xmm2,%xmm2 699 movq %r11,%rcx 700 movups %xmm3,16(%rsi) 701 pxor %xmm3,%xmm3 702 movl %r10d,%eax 703 movups %xmm4,32(%rsi) 704 pxor %xmm4,%xmm4 705 movups %xmm5,48(%rsi) 706 pxor %xmm5,%xmm5 707 movups %xmm6,64(%rsi) 708 pxor %xmm6,%xmm6 709 movups %xmm7,80(%rsi) 710 pxor %xmm7,%xmm7 711 movups %xmm8,96(%rsi) 712 pxor %xmm8,%xmm8 713 movups %xmm9,112(%rsi) 714 pxor %xmm9,%xmm9 715 leaq 128(%rsi),%rsi 716 addq $0x80,%rdx 717 jz L$ecb_ret 718 719L$ecb_dec_tail: 720 movups (%rdi),%xmm2 721 cmpq $0x20,%rdx 722 jb L$ecb_dec_one 723 movups 16(%rdi),%xmm3 724 je L$ecb_dec_two 725 movups 32(%rdi),%xmm4 726 cmpq $0x40,%rdx 727 jb L$ecb_dec_three 728 movups 48(%rdi),%xmm5 729 je L$ecb_dec_four 730 movups 64(%rdi),%xmm6 731 cmpq $0x60,%rdx 732 jb L$ecb_dec_five 733 movups 80(%rdi),%xmm7 734 je L$ecb_dec_six 735 movups 96(%rdi),%xmm8 736 movups (%rcx),%xmm0 737 xorps %xmm9,%xmm9 738 call _aesni_decrypt8 739 movups %xmm2,(%rsi) 740 pxor %xmm2,%xmm2 741 movups %xmm3,16(%rsi) 742 pxor %xmm3,%xmm3 743 movups %xmm4,32(%rsi) 744 pxor %xmm4,%xmm4 745 movups %xmm5,48(%rsi) 746 pxor %xmm5,%xmm5 747 movups %xmm6,64(%rsi) 748 pxor %xmm6,%xmm6 749 movups %xmm7,80(%rsi) 750 pxor %xmm7,%xmm7 751 movups %xmm8,96(%rsi) 752 pxor %xmm8,%xmm8 753 pxor %xmm9,%xmm9 754 jmp L$ecb_ret 755.p2align 4 756L$ecb_dec_one: 757 movups (%rcx),%xmm0 758 movups 16(%rcx),%xmm1 759 leaq 32(%rcx),%rcx 760 xorps %xmm0,%xmm2 761L$oop_dec1_4: 762.byte 102,15,56,222,209 763 decl %eax 764 movups (%rcx),%xmm1 765 leaq 16(%rcx),%rcx 766 jnz L$oop_dec1_4 767.byte 102,15,56,223,209 768 movups %xmm2,(%rsi) 769 pxor %xmm2,%xmm2 770 jmp L$ecb_ret 771.p2align 4 772L$ecb_dec_two: 773 call _aesni_decrypt2 774 movups %xmm2,(%rsi) 775 pxor %xmm2,%xmm2 776 movups %xmm3,16(%rsi) 777 pxor %xmm3,%xmm3 778 jmp L$ecb_ret 779.p2align 4 780L$ecb_dec_three: 781 call _aesni_decrypt3 782 movups %xmm2,(%rsi) 783 pxor %xmm2,%xmm2 784 movups %xmm3,16(%rsi) 785 pxor %xmm3,%xmm3 786 movups %xmm4,32(%rsi) 787 pxor %xmm4,%xmm4 788 jmp L$ecb_ret 789.p2align 4 790L$ecb_dec_four: 791 call _aesni_decrypt4 792 movups %xmm2,(%rsi) 793 pxor %xmm2,%xmm2 794 movups %xmm3,16(%rsi) 795 pxor %xmm3,%xmm3 796 movups %xmm4,32(%rsi) 797 pxor %xmm4,%xmm4 798 movups %xmm5,48(%rsi) 799 pxor %xmm5,%xmm5 800 jmp L$ecb_ret 801.p2align 4 802L$ecb_dec_five: 803 xorps %xmm7,%xmm7 804 call _aesni_decrypt6 805 movups %xmm2,(%rsi) 806 pxor %xmm2,%xmm2 807 movups %xmm3,16(%rsi) 808 pxor %xmm3,%xmm3 809 movups %xmm4,32(%rsi) 810 pxor %xmm4,%xmm4 811 movups %xmm5,48(%rsi) 812 pxor %xmm5,%xmm5 813 movups %xmm6,64(%rsi) 814 pxor %xmm6,%xmm6 815 pxor %xmm7,%xmm7 816 jmp L$ecb_ret 817.p2align 4 818L$ecb_dec_six: 819 call _aesni_decrypt6 820 movups %xmm2,(%rsi) 821 pxor %xmm2,%xmm2 822 movups %xmm3,16(%rsi) 823 pxor %xmm3,%xmm3 824 movups %xmm4,32(%rsi) 825 pxor %xmm4,%xmm4 826 movups %xmm5,48(%rsi) 827 pxor %xmm5,%xmm5 828 movups %xmm6,64(%rsi) 829 pxor %xmm6,%xmm6 830 movups %xmm7,80(%rsi) 831 pxor %xmm7,%xmm7 832 833L$ecb_ret: 834 xorps %xmm0,%xmm0 835 pxor %xmm1,%xmm1 836 .byte 0xf3,0xc3 837 838.globl _aesni_ccm64_encrypt_blocks 839.private_extern _aesni_ccm64_encrypt_blocks 840 841.p2align 4 842_aesni_ccm64_encrypt_blocks: 843 movl 240(%rcx),%eax 844 movdqu (%r8),%xmm6 845 movdqa L$increment64(%rip),%xmm9 846 movdqa L$bswap_mask(%rip),%xmm7 847 848 shll $4,%eax 849 movl $16,%r10d 850 leaq 0(%rcx),%r11 851 movdqu (%r9),%xmm3 852 movdqa %xmm6,%xmm2 853 leaq 32(%rcx,%rax,1),%rcx 854.byte 102,15,56,0,247 855 subq %rax,%r10 856 jmp L$ccm64_enc_outer 857.p2align 4 858L$ccm64_enc_outer: 859 movups (%r11),%xmm0 860 movq %r10,%rax 861 movups (%rdi),%xmm8 862 863 xorps %xmm0,%xmm2 864 movups 16(%r11),%xmm1 865 xorps %xmm8,%xmm0 866 xorps %xmm0,%xmm3 867 movups 32(%r11),%xmm0 868 869L$ccm64_enc2_loop: 870.byte 102,15,56,220,209 871.byte 102,15,56,220,217 872 movups (%rcx,%rax,1),%xmm1 873 addq $32,%rax 874.byte 102,15,56,220,208 875.byte 102,15,56,220,216 876 movups -16(%rcx,%rax,1),%xmm0 877 jnz L$ccm64_enc2_loop 878.byte 102,15,56,220,209 879.byte 102,15,56,220,217 880 paddq %xmm9,%xmm6 881 decq %rdx 882.byte 102,15,56,221,208 883.byte 102,15,56,221,216 884 885 leaq 16(%rdi),%rdi 886 xorps %xmm2,%xmm8 887 movdqa %xmm6,%xmm2 888 movups %xmm8,(%rsi) 889.byte 102,15,56,0,215 890 leaq 16(%rsi),%rsi 891 jnz L$ccm64_enc_outer 892 893 pxor %xmm0,%xmm0 894 pxor %xmm1,%xmm1 895 pxor %xmm2,%xmm2 896 movups %xmm3,(%r9) 897 pxor %xmm3,%xmm3 898 pxor %xmm8,%xmm8 899 pxor %xmm6,%xmm6 900 .byte 0xf3,0xc3 901 902.globl _aesni_ccm64_decrypt_blocks 903.private_extern _aesni_ccm64_decrypt_blocks 904 905.p2align 4 906_aesni_ccm64_decrypt_blocks: 907 movl 240(%rcx),%eax 908 movups (%r8),%xmm6 909 movdqu (%r9),%xmm3 910 movdqa L$increment64(%rip),%xmm9 911 movdqa L$bswap_mask(%rip),%xmm7 912 913 movaps %xmm6,%xmm2 914 movl %eax,%r10d 915 movq %rcx,%r11 916.byte 102,15,56,0,247 917 movups (%rcx),%xmm0 918 movups 16(%rcx),%xmm1 919 leaq 32(%rcx),%rcx 920 xorps %xmm0,%xmm2 921L$oop_enc1_5: 922.byte 102,15,56,220,209 923 decl %eax 924 movups (%rcx),%xmm1 925 leaq 16(%rcx),%rcx 926 jnz L$oop_enc1_5 927.byte 102,15,56,221,209 928 shll $4,%r10d 929 movl $16,%eax 930 movups (%rdi),%xmm8 931 paddq %xmm9,%xmm6 932 leaq 16(%rdi),%rdi 933 subq %r10,%rax 934 leaq 32(%r11,%r10,1),%rcx 935 movq %rax,%r10 936 jmp L$ccm64_dec_outer 937.p2align 4 938L$ccm64_dec_outer: 939 xorps %xmm2,%xmm8 940 movdqa %xmm6,%xmm2 941 movups %xmm8,(%rsi) 942 leaq 16(%rsi),%rsi 943.byte 102,15,56,0,215 944 945 subq $1,%rdx 946 jz L$ccm64_dec_break 947 948 movups (%r11),%xmm0 949 movq %r10,%rax 950 movups 16(%r11),%xmm1 951 xorps %xmm0,%xmm8 952 xorps %xmm0,%xmm2 953 xorps %xmm8,%xmm3 954 movups 32(%r11),%xmm0 955 jmp L$ccm64_dec2_loop 956.p2align 4 957L$ccm64_dec2_loop: 958.byte 102,15,56,220,209 959.byte 102,15,56,220,217 960 movups (%rcx,%rax,1),%xmm1 961 addq $32,%rax 962.byte 102,15,56,220,208 963.byte 102,15,56,220,216 964 movups -16(%rcx,%rax,1),%xmm0 965 jnz L$ccm64_dec2_loop 966 movups (%rdi),%xmm8 967 paddq %xmm9,%xmm6 968.byte 102,15,56,220,209 969.byte 102,15,56,220,217 970.byte 102,15,56,221,208 971.byte 102,15,56,221,216 972 leaq 16(%rdi),%rdi 973 jmp L$ccm64_dec_outer 974 975.p2align 4 976L$ccm64_dec_break: 977 978 movl 240(%r11),%eax 979 movups (%r11),%xmm0 980 movups 16(%r11),%xmm1 981 xorps %xmm0,%xmm8 982 leaq 32(%r11),%r11 983 xorps %xmm8,%xmm3 984L$oop_enc1_6: 985.byte 102,15,56,220,217 986 decl %eax 987 movups (%r11),%xmm1 988 leaq 16(%r11),%r11 989 jnz L$oop_enc1_6 990.byte 102,15,56,221,217 991 pxor %xmm0,%xmm0 992 pxor %xmm1,%xmm1 993 pxor %xmm2,%xmm2 994 movups %xmm3,(%r9) 995 pxor %xmm3,%xmm3 996 pxor %xmm8,%xmm8 997 pxor %xmm6,%xmm6 998 .byte 0xf3,0xc3 999 1000.globl _aesni_ctr32_encrypt_blocks 1001.private_extern _aesni_ctr32_encrypt_blocks 1002 1003.p2align 4 1004_aesni_ctr32_encrypt_blocks: 1005 cmpq $1,%rdx 1006 jne L$ctr32_bulk 1007 1008 1009 1010 movups (%r8),%xmm2 1011 movups (%rdi),%xmm3 1012 movl 240(%rcx),%edx 1013 movups (%rcx),%xmm0 1014 movups 16(%rcx),%xmm1 1015 leaq 32(%rcx),%rcx 1016 xorps %xmm0,%xmm2 1017L$oop_enc1_7: 1018.byte 102,15,56,220,209 1019 decl %edx 1020 movups (%rcx),%xmm1 1021 leaq 16(%rcx),%rcx 1022 jnz L$oop_enc1_7 1023.byte 102,15,56,221,209 1024 pxor %xmm0,%xmm0 1025 pxor %xmm1,%xmm1 1026 xorps %xmm3,%xmm2 1027 pxor %xmm3,%xmm3 1028 movups %xmm2,(%rsi) 1029 xorps %xmm2,%xmm2 1030 jmp L$ctr32_epilogue 1031 1032.p2align 4 1033L$ctr32_bulk: 1034 leaq (%rsp),%r11 1035 pushq %rbp 1036 subq $128,%rsp 1037 andq $-16,%rsp 1038 1039 1040 1041 1042 movdqu (%r8),%xmm2 1043 movdqu (%rcx),%xmm0 1044 movl 12(%r8),%r8d 1045 pxor %xmm0,%xmm2 1046 movl 12(%rcx),%ebp 1047 movdqa %xmm2,0(%rsp) 1048 bswapl %r8d 1049 movdqa %xmm2,%xmm3 1050 movdqa %xmm2,%xmm4 1051 movdqa %xmm2,%xmm5 1052 movdqa %xmm2,64(%rsp) 1053 movdqa %xmm2,80(%rsp) 1054 movdqa %xmm2,96(%rsp) 1055 movq %rdx,%r10 1056 movdqa %xmm2,112(%rsp) 1057 1058 leaq 1(%r8),%rax 1059 leaq 2(%r8),%rdx 1060 bswapl %eax 1061 bswapl %edx 1062 xorl %ebp,%eax 1063 xorl %ebp,%edx 1064.byte 102,15,58,34,216,3 1065 leaq 3(%r8),%rax 1066 movdqa %xmm3,16(%rsp) 1067.byte 102,15,58,34,226,3 1068 bswapl %eax 1069 movq %r10,%rdx 1070 leaq 4(%r8),%r10 1071 movdqa %xmm4,32(%rsp) 1072 xorl %ebp,%eax 1073 bswapl %r10d 1074.byte 102,15,58,34,232,3 1075 xorl %ebp,%r10d 1076 movdqa %xmm5,48(%rsp) 1077 leaq 5(%r8),%r9 1078 movl %r10d,64+12(%rsp) 1079 bswapl %r9d 1080 leaq 6(%r8),%r10 1081 movl 240(%rcx),%eax 1082 xorl %ebp,%r9d 1083 bswapl %r10d 1084 movl %r9d,80+12(%rsp) 1085 xorl %ebp,%r10d 1086 leaq 7(%r8),%r9 1087 movl %r10d,96+12(%rsp) 1088 bswapl %r9d 1089 leaq _OPENSSL_ia32cap_P(%rip),%r10 1090 movl 4(%r10),%r10d 1091 xorl %ebp,%r9d 1092 andl $71303168,%r10d 1093 movl %r9d,112+12(%rsp) 1094 1095 movups 16(%rcx),%xmm1 1096 1097 movdqa 64(%rsp),%xmm6 1098 movdqa 80(%rsp),%xmm7 1099 1100 cmpq $8,%rdx 1101 jb L$ctr32_tail 1102 1103 subq $6,%rdx 1104 cmpl $4194304,%r10d 1105 je L$ctr32_6x 1106 1107 leaq 128(%rcx),%rcx 1108 subq $2,%rdx 1109 jmp L$ctr32_loop8 1110 1111.p2align 4 1112L$ctr32_6x: 1113 shll $4,%eax 1114 movl $48,%r10d 1115 bswapl %ebp 1116 leaq 32(%rcx,%rax,1),%rcx 1117 subq %rax,%r10 1118 jmp L$ctr32_loop6 1119 1120.p2align 4 1121L$ctr32_loop6: 1122 addl $6,%r8d 1123 movups -48(%rcx,%r10,1),%xmm0 1124.byte 102,15,56,220,209 1125 movl %r8d,%eax 1126 xorl %ebp,%eax 1127.byte 102,15,56,220,217 1128.byte 0x0f,0x38,0xf1,0x44,0x24,12 1129 leal 1(%r8),%eax 1130.byte 102,15,56,220,225 1131 xorl %ebp,%eax 1132.byte 0x0f,0x38,0xf1,0x44,0x24,28 1133.byte 102,15,56,220,233 1134 leal 2(%r8),%eax 1135 xorl %ebp,%eax 1136.byte 102,15,56,220,241 1137.byte 0x0f,0x38,0xf1,0x44,0x24,44 1138 leal 3(%r8),%eax 1139.byte 102,15,56,220,249 1140 movups -32(%rcx,%r10,1),%xmm1 1141 xorl %ebp,%eax 1142 1143.byte 102,15,56,220,208 1144.byte 0x0f,0x38,0xf1,0x44,0x24,60 1145 leal 4(%r8),%eax 1146.byte 102,15,56,220,216 1147 xorl %ebp,%eax 1148.byte 0x0f,0x38,0xf1,0x44,0x24,76 1149.byte 102,15,56,220,224 1150 leal 5(%r8),%eax 1151 xorl %ebp,%eax 1152.byte 102,15,56,220,232 1153.byte 0x0f,0x38,0xf1,0x44,0x24,92 1154 movq %r10,%rax 1155.byte 102,15,56,220,240 1156.byte 102,15,56,220,248 1157 movups -16(%rcx,%r10,1),%xmm0 1158 1159 call L$enc_loop6 1160 1161 movdqu (%rdi),%xmm8 1162 movdqu 16(%rdi),%xmm9 1163 movdqu 32(%rdi),%xmm10 1164 movdqu 48(%rdi),%xmm11 1165 movdqu 64(%rdi),%xmm12 1166 movdqu 80(%rdi),%xmm13 1167 leaq 96(%rdi),%rdi 1168 movups -64(%rcx,%r10,1),%xmm1 1169 pxor %xmm2,%xmm8 1170 movaps 0(%rsp),%xmm2 1171 pxor %xmm3,%xmm9 1172 movaps 16(%rsp),%xmm3 1173 pxor %xmm4,%xmm10 1174 movaps 32(%rsp),%xmm4 1175 pxor %xmm5,%xmm11 1176 movaps 48(%rsp),%xmm5 1177 pxor %xmm6,%xmm12 1178 movaps 64(%rsp),%xmm6 1179 pxor %xmm7,%xmm13 1180 movaps 80(%rsp),%xmm7 1181 movdqu %xmm8,(%rsi) 1182 movdqu %xmm9,16(%rsi) 1183 movdqu %xmm10,32(%rsi) 1184 movdqu %xmm11,48(%rsi) 1185 movdqu %xmm12,64(%rsi) 1186 movdqu %xmm13,80(%rsi) 1187 leaq 96(%rsi),%rsi 1188 1189 subq $6,%rdx 1190 jnc L$ctr32_loop6 1191 1192 addq $6,%rdx 1193 jz L$ctr32_done 1194 1195 leal -48(%r10),%eax 1196 leaq -80(%rcx,%r10,1),%rcx 1197 negl %eax 1198 shrl $4,%eax 1199 jmp L$ctr32_tail 1200 1201.p2align 5 1202L$ctr32_loop8: 1203 addl $8,%r8d 1204 movdqa 96(%rsp),%xmm8 1205.byte 102,15,56,220,209 1206 movl %r8d,%r9d 1207 movdqa 112(%rsp),%xmm9 1208.byte 102,15,56,220,217 1209 bswapl %r9d 1210 movups 32-128(%rcx),%xmm0 1211.byte 102,15,56,220,225 1212 xorl %ebp,%r9d 1213 nop 1214.byte 102,15,56,220,233 1215 movl %r9d,0+12(%rsp) 1216 leaq 1(%r8),%r9 1217.byte 102,15,56,220,241 1218.byte 102,15,56,220,249 1219.byte 102,68,15,56,220,193 1220.byte 102,68,15,56,220,201 1221 movups 48-128(%rcx),%xmm1 1222 bswapl %r9d 1223.byte 102,15,56,220,208 1224.byte 102,15,56,220,216 1225 xorl %ebp,%r9d 1226.byte 0x66,0x90 1227.byte 102,15,56,220,224 1228.byte 102,15,56,220,232 1229 movl %r9d,16+12(%rsp) 1230 leaq 2(%r8),%r9 1231.byte 102,15,56,220,240 1232.byte 102,15,56,220,248 1233.byte 102,68,15,56,220,192 1234.byte 102,68,15,56,220,200 1235 movups 64-128(%rcx),%xmm0 1236 bswapl %r9d 1237.byte 102,15,56,220,209 1238.byte 102,15,56,220,217 1239 xorl %ebp,%r9d 1240.byte 0x66,0x90 1241.byte 102,15,56,220,225 1242.byte 102,15,56,220,233 1243 movl %r9d,32+12(%rsp) 1244 leaq 3(%r8),%r9 1245.byte 102,15,56,220,241 1246.byte 102,15,56,220,249 1247.byte 102,68,15,56,220,193 1248.byte 102,68,15,56,220,201 1249 movups 80-128(%rcx),%xmm1 1250 bswapl %r9d 1251.byte 102,15,56,220,208 1252.byte 102,15,56,220,216 1253 xorl %ebp,%r9d 1254.byte 0x66,0x90 1255.byte 102,15,56,220,224 1256.byte 102,15,56,220,232 1257 movl %r9d,48+12(%rsp) 1258 leaq 4(%r8),%r9 1259.byte 102,15,56,220,240 1260.byte 102,15,56,220,248 1261.byte 102,68,15,56,220,192 1262.byte 102,68,15,56,220,200 1263 movups 96-128(%rcx),%xmm0 1264 bswapl %r9d 1265.byte 102,15,56,220,209 1266.byte 102,15,56,220,217 1267 xorl %ebp,%r9d 1268.byte 0x66,0x90 1269.byte 102,15,56,220,225 1270.byte 102,15,56,220,233 1271 movl %r9d,64+12(%rsp) 1272 leaq 5(%r8),%r9 1273.byte 102,15,56,220,241 1274.byte 102,15,56,220,249 1275.byte 102,68,15,56,220,193 1276.byte 102,68,15,56,220,201 1277 movups 112-128(%rcx),%xmm1 1278 bswapl %r9d 1279.byte 102,15,56,220,208 1280.byte 102,15,56,220,216 1281 xorl %ebp,%r9d 1282.byte 0x66,0x90 1283.byte 102,15,56,220,224 1284.byte 102,15,56,220,232 1285 movl %r9d,80+12(%rsp) 1286 leaq 6(%r8),%r9 1287.byte 102,15,56,220,240 1288.byte 102,15,56,220,248 1289.byte 102,68,15,56,220,192 1290.byte 102,68,15,56,220,200 1291 movups 128-128(%rcx),%xmm0 1292 bswapl %r9d 1293.byte 102,15,56,220,209 1294.byte 102,15,56,220,217 1295 xorl %ebp,%r9d 1296.byte 0x66,0x90 1297.byte 102,15,56,220,225 1298.byte 102,15,56,220,233 1299 movl %r9d,96+12(%rsp) 1300 leaq 7(%r8),%r9 1301.byte 102,15,56,220,241 1302.byte 102,15,56,220,249 1303.byte 102,68,15,56,220,193 1304.byte 102,68,15,56,220,201 1305 movups 144-128(%rcx),%xmm1 1306 bswapl %r9d 1307.byte 102,15,56,220,208 1308.byte 102,15,56,220,216 1309.byte 102,15,56,220,224 1310 xorl %ebp,%r9d 1311 movdqu 0(%rdi),%xmm10 1312.byte 102,15,56,220,232 1313 movl %r9d,112+12(%rsp) 1314 cmpl $11,%eax 1315.byte 102,15,56,220,240 1316.byte 102,15,56,220,248 1317.byte 102,68,15,56,220,192 1318.byte 102,68,15,56,220,200 1319 movups 160-128(%rcx),%xmm0 1320 1321 jb L$ctr32_enc_done 1322 1323.byte 102,15,56,220,209 1324.byte 102,15,56,220,217 1325.byte 102,15,56,220,225 1326.byte 102,15,56,220,233 1327.byte 102,15,56,220,241 1328.byte 102,15,56,220,249 1329.byte 102,68,15,56,220,193 1330.byte 102,68,15,56,220,201 1331 movups 176-128(%rcx),%xmm1 1332 1333.byte 102,15,56,220,208 1334.byte 102,15,56,220,216 1335.byte 102,15,56,220,224 1336.byte 102,15,56,220,232 1337.byte 102,15,56,220,240 1338.byte 102,15,56,220,248 1339.byte 102,68,15,56,220,192 1340.byte 102,68,15,56,220,200 1341 movups 192-128(%rcx),%xmm0 1342 je L$ctr32_enc_done 1343 1344.byte 102,15,56,220,209 1345.byte 102,15,56,220,217 1346.byte 102,15,56,220,225 1347.byte 102,15,56,220,233 1348.byte 102,15,56,220,241 1349.byte 102,15,56,220,249 1350.byte 102,68,15,56,220,193 1351.byte 102,68,15,56,220,201 1352 movups 208-128(%rcx),%xmm1 1353 1354.byte 102,15,56,220,208 1355.byte 102,15,56,220,216 1356.byte 102,15,56,220,224 1357.byte 102,15,56,220,232 1358.byte 102,15,56,220,240 1359.byte 102,15,56,220,248 1360.byte 102,68,15,56,220,192 1361.byte 102,68,15,56,220,200 1362 movups 224-128(%rcx),%xmm0 1363 jmp L$ctr32_enc_done 1364 1365.p2align 4 1366L$ctr32_enc_done: 1367 movdqu 16(%rdi),%xmm11 1368 pxor %xmm0,%xmm10 1369 movdqu 32(%rdi),%xmm12 1370 pxor %xmm0,%xmm11 1371 movdqu 48(%rdi),%xmm13 1372 pxor %xmm0,%xmm12 1373 movdqu 64(%rdi),%xmm14 1374 pxor %xmm0,%xmm13 1375 movdqu 80(%rdi),%xmm15 1376 pxor %xmm0,%xmm14 1377 pxor %xmm0,%xmm15 1378.byte 102,15,56,220,209 1379.byte 102,15,56,220,217 1380.byte 102,15,56,220,225 1381.byte 102,15,56,220,233 1382.byte 102,15,56,220,241 1383.byte 102,15,56,220,249 1384.byte 102,68,15,56,220,193 1385.byte 102,68,15,56,220,201 1386 movdqu 96(%rdi),%xmm1 1387 leaq 128(%rdi),%rdi 1388 1389.byte 102,65,15,56,221,210 1390 pxor %xmm0,%xmm1 1391 movdqu 112-128(%rdi),%xmm10 1392.byte 102,65,15,56,221,219 1393 pxor %xmm0,%xmm10 1394 movdqa 0(%rsp),%xmm11 1395.byte 102,65,15,56,221,228 1396.byte 102,65,15,56,221,237 1397 movdqa 16(%rsp),%xmm12 1398 movdqa 32(%rsp),%xmm13 1399.byte 102,65,15,56,221,246 1400.byte 102,65,15,56,221,255 1401 movdqa 48(%rsp),%xmm14 1402 movdqa 64(%rsp),%xmm15 1403.byte 102,68,15,56,221,193 1404 movdqa 80(%rsp),%xmm0 1405 movups 16-128(%rcx),%xmm1 1406.byte 102,69,15,56,221,202 1407 1408 movups %xmm2,(%rsi) 1409 movdqa %xmm11,%xmm2 1410 movups %xmm3,16(%rsi) 1411 movdqa %xmm12,%xmm3 1412 movups %xmm4,32(%rsi) 1413 movdqa %xmm13,%xmm4 1414 movups %xmm5,48(%rsi) 1415 movdqa %xmm14,%xmm5 1416 movups %xmm6,64(%rsi) 1417 movdqa %xmm15,%xmm6 1418 movups %xmm7,80(%rsi) 1419 movdqa %xmm0,%xmm7 1420 movups %xmm8,96(%rsi) 1421 movups %xmm9,112(%rsi) 1422 leaq 128(%rsi),%rsi 1423 1424 subq $8,%rdx 1425 jnc L$ctr32_loop8 1426 1427 addq $8,%rdx 1428 jz L$ctr32_done 1429 leaq -128(%rcx),%rcx 1430 1431L$ctr32_tail: 1432 1433 1434 leaq 16(%rcx),%rcx 1435 cmpq $4,%rdx 1436 jb L$ctr32_loop3 1437 je L$ctr32_loop4 1438 1439 1440 shll $4,%eax 1441 movdqa 96(%rsp),%xmm8 1442 pxor %xmm9,%xmm9 1443 1444 movups 16(%rcx),%xmm0 1445.byte 102,15,56,220,209 1446.byte 102,15,56,220,217 1447 leaq 32-16(%rcx,%rax,1),%rcx 1448 negq %rax 1449.byte 102,15,56,220,225 1450 addq $16,%rax 1451 movups (%rdi),%xmm10 1452.byte 102,15,56,220,233 1453.byte 102,15,56,220,241 1454 movups 16(%rdi),%xmm11 1455 movups 32(%rdi),%xmm12 1456.byte 102,15,56,220,249 1457.byte 102,68,15,56,220,193 1458 1459 call L$enc_loop8_enter 1460 1461 movdqu 48(%rdi),%xmm13 1462 pxor %xmm10,%xmm2 1463 movdqu 64(%rdi),%xmm10 1464 pxor %xmm11,%xmm3 1465 movdqu %xmm2,(%rsi) 1466 pxor %xmm12,%xmm4 1467 movdqu %xmm3,16(%rsi) 1468 pxor %xmm13,%xmm5 1469 movdqu %xmm4,32(%rsi) 1470 pxor %xmm10,%xmm6 1471 movdqu %xmm5,48(%rsi) 1472 movdqu %xmm6,64(%rsi) 1473 cmpq $6,%rdx 1474 jb L$ctr32_done 1475 1476 movups 80(%rdi),%xmm11 1477 xorps %xmm11,%xmm7 1478 movups %xmm7,80(%rsi) 1479 je L$ctr32_done 1480 1481 movups 96(%rdi),%xmm12 1482 xorps %xmm12,%xmm8 1483 movups %xmm8,96(%rsi) 1484 jmp L$ctr32_done 1485 1486.p2align 5 1487L$ctr32_loop4: 1488.byte 102,15,56,220,209 1489 leaq 16(%rcx),%rcx 1490 decl %eax 1491.byte 102,15,56,220,217 1492.byte 102,15,56,220,225 1493.byte 102,15,56,220,233 1494 movups (%rcx),%xmm1 1495 jnz L$ctr32_loop4 1496.byte 102,15,56,221,209 1497.byte 102,15,56,221,217 1498 movups (%rdi),%xmm10 1499 movups 16(%rdi),%xmm11 1500.byte 102,15,56,221,225 1501.byte 102,15,56,221,233 1502 movups 32(%rdi),%xmm12 1503 movups 48(%rdi),%xmm13 1504 1505 xorps %xmm10,%xmm2 1506 movups %xmm2,(%rsi) 1507 xorps %xmm11,%xmm3 1508 movups %xmm3,16(%rsi) 1509 pxor %xmm12,%xmm4 1510 movdqu %xmm4,32(%rsi) 1511 pxor %xmm13,%xmm5 1512 movdqu %xmm5,48(%rsi) 1513 jmp L$ctr32_done 1514 1515.p2align 5 1516L$ctr32_loop3: 1517.byte 102,15,56,220,209 1518 leaq 16(%rcx),%rcx 1519 decl %eax 1520.byte 102,15,56,220,217 1521.byte 102,15,56,220,225 1522 movups (%rcx),%xmm1 1523 jnz L$ctr32_loop3 1524.byte 102,15,56,221,209 1525.byte 102,15,56,221,217 1526.byte 102,15,56,221,225 1527 1528 movups (%rdi),%xmm10 1529 xorps %xmm10,%xmm2 1530 movups %xmm2,(%rsi) 1531 cmpq $2,%rdx 1532 jb L$ctr32_done 1533 1534 movups 16(%rdi),%xmm11 1535 xorps %xmm11,%xmm3 1536 movups %xmm3,16(%rsi) 1537 je L$ctr32_done 1538 1539 movups 32(%rdi),%xmm12 1540 xorps %xmm12,%xmm4 1541 movups %xmm4,32(%rsi) 1542 1543L$ctr32_done: 1544 xorps %xmm0,%xmm0 1545 xorl %ebp,%ebp 1546 pxor %xmm1,%xmm1 1547 pxor %xmm2,%xmm2 1548 pxor %xmm3,%xmm3 1549 pxor %xmm4,%xmm4 1550 pxor %xmm5,%xmm5 1551 pxor %xmm6,%xmm6 1552 pxor %xmm7,%xmm7 1553 movaps %xmm0,0(%rsp) 1554 pxor %xmm8,%xmm8 1555 movaps %xmm0,16(%rsp) 1556 pxor %xmm9,%xmm9 1557 movaps %xmm0,32(%rsp) 1558 pxor %xmm10,%xmm10 1559 movaps %xmm0,48(%rsp) 1560 pxor %xmm11,%xmm11 1561 movaps %xmm0,64(%rsp) 1562 pxor %xmm12,%xmm12 1563 movaps %xmm0,80(%rsp) 1564 pxor %xmm13,%xmm13 1565 movaps %xmm0,96(%rsp) 1566 pxor %xmm14,%xmm14 1567 movaps %xmm0,112(%rsp) 1568 pxor %xmm15,%xmm15 1569 movq -8(%r11),%rbp 1570 leaq (%r11),%rsp 1571L$ctr32_epilogue: 1572 .byte 0xf3,0xc3 1573 1574.globl _aesni_xts_encrypt 1575.private_extern _aesni_xts_encrypt 1576 1577.p2align 4 1578_aesni_xts_encrypt: 1579 leaq (%rsp),%r11 1580 pushq %rbp 1581 subq $112,%rsp 1582 andq $-16,%rsp 1583 movups (%r9),%xmm2 1584 movl 240(%r8),%eax 1585 movl 240(%rcx),%r10d 1586 movups (%r8),%xmm0 1587 movups 16(%r8),%xmm1 1588 leaq 32(%r8),%r8 1589 xorps %xmm0,%xmm2 1590L$oop_enc1_8: 1591.byte 102,15,56,220,209 1592 decl %eax 1593 movups (%r8),%xmm1 1594 leaq 16(%r8),%r8 1595 jnz L$oop_enc1_8 1596.byte 102,15,56,221,209 1597 movups (%rcx),%xmm0 1598 movq %rcx,%rbp 1599 movl %r10d,%eax 1600 shll $4,%r10d 1601 movq %rdx,%r9 1602 andq $-16,%rdx 1603 1604 movups 16(%rcx,%r10,1),%xmm1 1605 1606 movdqa L$xts_magic(%rip),%xmm8 1607 movdqa %xmm2,%xmm15 1608 pshufd $0x5f,%xmm2,%xmm9 1609 pxor %xmm0,%xmm1 1610 movdqa %xmm9,%xmm14 1611 paddd %xmm9,%xmm9 1612 movdqa %xmm15,%xmm10 1613 psrad $31,%xmm14 1614 paddq %xmm15,%xmm15 1615 pand %xmm8,%xmm14 1616 pxor %xmm0,%xmm10 1617 pxor %xmm14,%xmm15 1618 movdqa %xmm9,%xmm14 1619 paddd %xmm9,%xmm9 1620 movdqa %xmm15,%xmm11 1621 psrad $31,%xmm14 1622 paddq %xmm15,%xmm15 1623 pand %xmm8,%xmm14 1624 pxor %xmm0,%xmm11 1625 pxor %xmm14,%xmm15 1626 movdqa %xmm9,%xmm14 1627 paddd %xmm9,%xmm9 1628 movdqa %xmm15,%xmm12 1629 psrad $31,%xmm14 1630 paddq %xmm15,%xmm15 1631 pand %xmm8,%xmm14 1632 pxor %xmm0,%xmm12 1633 pxor %xmm14,%xmm15 1634 movdqa %xmm9,%xmm14 1635 paddd %xmm9,%xmm9 1636 movdqa %xmm15,%xmm13 1637 psrad $31,%xmm14 1638 paddq %xmm15,%xmm15 1639 pand %xmm8,%xmm14 1640 pxor %xmm0,%xmm13 1641 pxor %xmm14,%xmm15 1642 movdqa %xmm15,%xmm14 1643 psrad $31,%xmm9 1644 paddq %xmm15,%xmm15 1645 pand %xmm8,%xmm9 1646 pxor %xmm0,%xmm14 1647 pxor %xmm9,%xmm15 1648 movaps %xmm1,96(%rsp) 1649 1650 subq $96,%rdx 1651 jc L$xts_enc_short 1652 1653 movl $16+96,%eax 1654 leaq 32(%rbp,%r10,1),%rcx 1655 subq %r10,%rax 1656 movups 16(%rbp),%xmm1 1657 movq %rax,%r10 1658 leaq L$xts_magic(%rip),%r8 1659 jmp L$xts_enc_grandloop 1660 1661.p2align 5 1662L$xts_enc_grandloop: 1663 movdqu 0(%rdi),%xmm2 1664 movdqa %xmm0,%xmm8 1665 movdqu 16(%rdi),%xmm3 1666 pxor %xmm10,%xmm2 1667 movdqu 32(%rdi),%xmm4 1668 pxor %xmm11,%xmm3 1669.byte 102,15,56,220,209 1670 movdqu 48(%rdi),%xmm5 1671 pxor %xmm12,%xmm4 1672.byte 102,15,56,220,217 1673 movdqu 64(%rdi),%xmm6 1674 pxor %xmm13,%xmm5 1675.byte 102,15,56,220,225 1676 movdqu 80(%rdi),%xmm7 1677 pxor %xmm15,%xmm8 1678 movdqa 96(%rsp),%xmm9 1679 pxor %xmm14,%xmm6 1680.byte 102,15,56,220,233 1681 movups 32(%rbp),%xmm0 1682 leaq 96(%rdi),%rdi 1683 pxor %xmm8,%xmm7 1684 1685 pxor %xmm9,%xmm10 1686.byte 102,15,56,220,241 1687 pxor %xmm9,%xmm11 1688 movdqa %xmm10,0(%rsp) 1689.byte 102,15,56,220,249 1690 movups 48(%rbp),%xmm1 1691 pxor %xmm9,%xmm12 1692 1693.byte 102,15,56,220,208 1694 pxor %xmm9,%xmm13 1695 movdqa %xmm11,16(%rsp) 1696.byte 102,15,56,220,216 1697 pxor %xmm9,%xmm14 1698 movdqa %xmm12,32(%rsp) 1699.byte 102,15,56,220,224 1700.byte 102,15,56,220,232 1701 pxor %xmm9,%xmm8 1702 movdqa %xmm14,64(%rsp) 1703.byte 102,15,56,220,240 1704.byte 102,15,56,220,248 1705 movups 64(%rbp),%xmm0 1706 movdqa %xmm8,80(%rsp) 1707 pshufd $0x5f,%xmm15,%xmm9 1708 jmp L$xts_enc_loop6 1709.p2align 5 1710L$xts_enc_loop6: 1711.byte 102,15,56,220,209 1712.byte 102,15,56,220,217 1713.byte 102,15,56,220,225 1714.byte 102,15,56,220,233 1715.byte 102,15,56,220,241 1716.byte 102,15,56,220,249 1717 movups -64(%rcx,%rax,1),%xmm1 1718 addq $32,%rax 1719 1720.byte 102,15,56,220,208 1721.byte 102,15,56,220,216 1722.byte 102,15,56,220,224 1723.byte 102,15,56,220,232 1724.byte 102,15,56,220,240 1725.byte 102,15,56,220,248 1726 movups -80(%rcx,%rax,1),%xmm0 1727 jnz L$xts_enc_loop6 1728 1729 movdqa (%r8),%xmm8 1730 movdqa %xmm9,%xmm14 1731 paddd %xmm9,%xmm9 1732.byte 102,15,56,220,209 1733 paddq %xmm15,%xmm15 1734 psrad $31,%xmm14 1735.byte 102,15,56,220,217 1736 pand %xmm8,%xmm14 1737 movups (%rbp),%xmm10 1738.byte 102,15,56,220,225 1739.byte 102,15,56,220,233 1740.byte 102,15,56,220,241 1741 pxor %xmm14,%xmm15 1742 movaps %xmm10,%xmm11 1743.byte 102,15,56,220,249 1744 movups -64(%rcx),%xmm1 1745 1746 movdqa %xmm9,%xmm14 1747.byte 102,15,56,220,208 1748 paddd %xmm9,%xmm9 1749 pxor %xmm15,%xmm10 1750.byte 102,15,56,220,216 1751 psrad $31,%xmm14 1752 paddq %xmm15,%xmm15 1753.byte 102,15,56,220,224 1754.byte 102,15,56,220,232 1755 pand %xmm8,%xmm14 1756 movaps %xmm11,%xmm12 1757.byte 102,15,56,220,240 1758 pxor %xmm14,%xmm15 1759 movdqa %xmm9,%xmm14 1760.byte 102,15,56,220,248 1761 movups -48(%rcx),%xmm0 1762 1763 paddd %xmm9,%xmm9 1764.byte 102,15,56,220,209 1765 pxor %xmm15,%xmm11 1766 psrad $31,%xmm14 1767.byte 102,15,56,220,217 1768 paddq %xmm15,%xmm15 1769 pand %xmm8,%xmm14 1770.byte 102,15,56,220,225 1771.byte 102,15,56,220,233 1772 movdqa %xmm13,48(%rsp) 1773 pxor %xmm14,%xmm15 1774.byte 102,15,56,220,241 1775 movaps %xmm12,%xmm13 1776 movdqa %xmm9,%xmm14 1777.byte 102,15,56,220,249 1778 movups -32(%rcx),%xmm1 1779 1780 paddd %xmm9,%xmm9 1781.byte 102,15,56,220,208 1782 pxor %xmm15,%xmm12 1783 psrad $31,%xmm14 1784.byte 102,15,56,220,216 1785 paddq %xmm15,%xmm15 1786 pand %xmm8,%xmm14 1787.byte 102,15,56,220,224 1788.byte 102,15,56,220,232 1789.byte 102,15,56,220,240 1790 pxor %xmm14,%xmm15 1791 movaps %xmm13,%xmm14 1792.byte 102,15,56,220,248 1793 1794 movdqa %xmm9,%xmm0 1795 paddd %xmm9,%xmm9 1796.byte 102,15,56,220,209 1797 pxor %xmm15,%xmm13 1798 psrad $31,%xmm0 1799.byte 102,15,56,220,217 1800 paddq %xmm15,%xmm15 1801 pand %xmm8,%xmm0 1802.byte 102,15,56,220,225 1803.byte 102,15,56,220,233 1804 pxor %xmm0,%xmm15 1805 movups (%rbp),%xmm0 1806.byte 102,15,56,220,241 1807.byte 102,15,56,220,249 1808 movups 16(%rbp),%xmm1 1809 1810 pxor %xmm15,%xmm14 1811.byte 102,15,56,221,84,36,0 1812 psrad $31,%xmm9 1813 paddq %xmm15,%xmm15 1814.byte 102,15,56,221,92,36,16 1815.byte 102,15,56,221,100,36,32 1816 pand %xmm8,%xmm9 1817 movq %r10,%rax 1818.byte 102,15,56,221,108,36,48 1819.byte 102,15,56,221,116,36,64 1820.byte 102,15,56,221,124,36,80 1821 pxor %xmm9,%xmm15 1822 1823 leaq 96(%rsi),%rsi 1824 movups %xmm2,-96(%rsi) 1825 movups %xmm3,-80(%rsi) 1826 movups %xmm4,-64(%rsi) 1827 movups %xmm5,-48(%rsi) 1828 movups %xmm6,-32(%rsi) 1829 movups %xmm7,-16(%rsi) 1830 subq $96,%rdx 1831 jnc L$xts_enc_grandloop 1832 1833 movl $16+96,%eax 1834 subl %r10d,%eax 1835 movq %rbp,%rcx 1836 shrl $4,%eax 1837 1838L$xts_enc_short: 1839 1840 movl %eax,%r10d 1841 pxor %xmm0,%xmm10 1842 addq $96,%rdx 1843 jz L$xts_enc_done 1844 1845 pxor %xmm0,%xmm11 1846 cmpq $0x20,%rdx 1847 jb L$xts_enc_one 1848 pxor %xmm0,%xmm12 1849 je L$xts_enc_two 1850 1851 pxor %xmm0,%xmm13 1852 cmpq $0x40,%rdx 1853 jb L$xts_enc_three 1854 pxor %xmm0,%xmm14 1855 je L$xts_enc_four 1856 1857 movdqu (%rdi),%xmm2 1858 movdqu 16(%rdi),%xmm3 1859 movdqu 32(%rdi),%xmm4 1860 pxor %xmm10,%xmm2 1861 movdqu 48(%rdi),%xmm5 1862 pxor %xmm11,%xmm3 1863 movdqu 64(%rdi),%xmm6 1864 leaq 80(%rdi),%rdi 1865 pxor %xmm12,%xmm4 1866 pxor %xmm13,%xmm5 1867 pxor %xmm14,%xmm6 1868 pxor %xmm7,%xmm7 1869 1870 call _aesni_encrypt6 1871 1872 xorps %xmm10,%xmm2 1873 movdqa %xmm15,%xmm10 1874 xorps %xmm11,%xmm3 1875 xorps %xmm12,%xmm4 1876 movdqu %xmm2,(%rsi) 1877 xorps %xmm13,%xmm5 1878 movdqu %xmm3,16(%rsi) 1879 xorps %xmm14,%xmm6 1880 movdqu %xmm4,32(%rsi) 1881 movdqu %xmm5,48(%rsi) 1882 movdqu %xmm6,64(%rsi) 1883 leaq 80(%rsi),%rsi 1884 jmp L$xts_enc_done 1885 1886.p2align 4 1887L$xts_enc_one: 1888 movups (%rdi),%xmm2 1889 leaq 16(%rdi),%rdi 1890 xorps %xmm10,%xmm2 1891 movups (%rcx),%xmm0 1892 movups 16(%rcx),%xmm1 1893 leaq 32(%rcx),%rcx 1894 xorps %xmm0,%xmm2 1895L$oop_enc1_9: 1896.byte 102,15,56,220,209 1897 decl %eax 1898 movups (%rcx),%xmm1 1899 leaq 16(%rcx),%rcx 1900 jnz L$oop_enc1_9 1901.byte 102,15,56,221,209 1902 xorps %xmm10,%xmm2 1903 movdqa %xmm11,%xmm10 1904 movups %xmm2,(%rsi) 1905 leaq 16(%rsi),%rsi 1906 jmp L$xts_enc_done 1907 1908.p2align 4 1909L$xts_enc_two: 1910 movups (%rdi),%xmm2 1911 movups 16(%rdi),%xmm3 1912 leaq 32(%rdi),%rdi 1913 xorps %xmm10,%xmm2 1914 xorps %xmm11,%xmm3 1915 1916 call _aesni_encrypt2 1917 1918 xorps %xmm10,%xmm2 1919 movdqa %xmm12,%xmm10 1920 xorps %xmm11,%xmm3 1921 movups %xmm2,(%rsi) 1922 movups %xmm3,16(%rsi) 1923 leaq 32(%rsi),%rsi 1924 jmp L$xts_enc_done 1925 1926.p2align 4 1927L$xts_enc_three: 1928 movups (%rdi),%xmm2 1929 movups 16(%rdi),%xmm3 1930 movups 32(%rdi),%xmm4 1931 leaq 48(%rdi),%rdi 1932 xorps %xmm10,%xmm2 1933 xorps %xmm11,%xmm3 1934 xorps %xmm12,%xmm4 1935 1936 call _aesni_encrypt3 1937 1938 xorps %xmm10,%xmm2 1939 movdqa %xmm13,%xmm10 1940 xorps %xmm11,%xmm3 1941 xorps %xmm12,%xmm4 1942 movups %xmm2,(%rsi) 1943 movups %xmm3,16(%rsi) 1944 movups %xmm4,32(%rsi) 1945 leaq 48(%rsi),%rsi 1946 jmp L$xts_enc_done 1947 1948.p2align 4 1949L$xts_enc_four: 1950 movups (%rdi),%xmm2 1951 movups 16(%rdi),%xmm3 1952 movups 32(%rdi),%xmm4 1953 xorps %xmm10,%xmm2 1954 movups 48(%rdi),%xmm5 1955 leaq 64(%rdi),%rdi 1956 xorps %xmm11,%xmm3 1957 xorps %xmm12,%xmm4 1958 xorps %xmm13,%xmm5 1959 1960 call _aesni_encrypt4 1961 1962 pxor %xmm10,%xmm2 1963 movdqa %xmm14,%xmm10 1964 pxor %xmm11,%xmm3 1965 pxor %xmm12,%xmm4 1966 movdqu %xmm2,(%rsi) 1967 pxor %xmm13,%xmm5 1968 movdqu %xmm3,16(%rsi) 1969 movdqu %xmm4,32(%rsi) 1970 movdqu %xmm5,48(%rsi) 1971 leaq 64(%rsi),%rsi 1972 jmp L$xts_enc_done 1973 1974.p2align 4 1975L$xts_enc_done: 1976 andq $15,%r9 1977 jz L$xts_enc_ret 1978 movq %r9,%rdx 1979 1980L$xts_enc_steal: 1981 movzbl (%rdi),%eax 1982 movzbl -16(%rsi),%ecx 1983 leaq 1(%rdi),%rdi 1984 movb %al,-16(%rsi) 1985 movb %cl,0(%rsi) 1986 leaq 1(%rsi),%rsi 1987 subq $1,%rdx 1988 jnz L$xts_enc_steal 1989 1990 subq %r9,%rsi 1991 movq %rbp,%rcx 1992 movl %r10d,%eax 1993 1994 movups -16(%rsi),%xmm2 1995 xorps %xmm10,%xmm2 1996 movups (%rcx),%xmm0 1997 movups 16(%rcx),%xmm1 1998 leaq 32(%rcx),%rcx 1999 xorps %xmm0,%xmm2 2000L$oop_enc1_10: 2001.byte 102,15,56,220,209 2002 decl %eax 2003 movups (%rcx),%xmm1 2004 leaq 16(%rcx),%rcx 2005 jnz L$oop_enc1_10 2006.byte 102,15,56,221,209 2007 xorps %xmm10,%xmm2 2008 movups %xmm2,-16(%rsi) 2009 2010L$xts_enc_ret: 2011 xorps %xmm0,%xmm0 2012 pxor %xmm1,%xmm1 2013 pxor %xmm2,%xmm2 2014 pxor %xmm3,%xmm3 2015 pxor %xmm4,%xmm4 2016 pxor %xmm5,%xmm5 2017 pxor %xmm6,%xmm6 2018 pxor %xmm7,%xmm7 2019 movaps %xmm0,0(%rsp) 2020 pxor %xmm8,%xmm8 2021 movaps %xmm0,16(%rsp) 2022 pxor %xmm9,%xmm9 2023 movaps %xmm0,32(%rsp) 2024 pxor %xmm10,%xmm10 2025 movaps %xmm0,48(%rsp) 2026 pxor %xmm11,%xmm11 2027 movaps %xmm0,64(%rsp) 2028 pxor %xmm12,%xmm12 2029 movaps %xmm0,80(%rsp) 2030 pxor %xmm13,%xmm13 2031 movaps %xmm0,96(%rsp) 2032 pxor %xmm14,%xmm14 2033 pxor %xmm15,%xmm15 2034 movq -8(%r11),%rbp 2035 leaq (%r11),%rsp 2036L$xts_enc_epilogue: 2037 .byte 0xf3,0xc3 2038 2039.globl _aesni_xts_decrypt 2040.private_extern _aesni_xts_decrypt 2041 2042.p2align 4 2043_aesni_xts_decrypt: 2044 leaq (%rsp),%r11 2045 pushq %rbp 2046 subq $112,%rsp 2047 andq $-16,%rsp 2048 movups (%r9),%xmm2 2049 movl 240(%r8),%eax 2050 movl 240(%rcx),%r10d 2051 movups (%r8),%xmm0 2052 movups 16(%r8),%xmm1 2053 leaq 32(%r8),%r8 2054 xorps %xmm0,%xmm2 2055L$oop_enc1_11: 2056.byte 102,15,56,220,209 2057 decl %eax 2058 movups (%r8),%xmm1 2059 leaq 16(%r8),%r8 2060 jnz L$oop_enc1_11 2061.byte 102,15,56,221,209 2062 xorl %eax,%eax 2063 testq $15,%rdx 2064 setnz %al 2065 shlq $4,%rax 2066 subq %rax,%rdx 2067 2068 movups (%rcx),%xmm0 2069 movq %rcx,%rbp 2070 movl %r10d,%eax 2071 shll $4,%r10d 2072 movq %rdx,%r9 2073 andq $-16,%rdx 2074 2075 movups 16(%rcx,%r10,1),%xmm1 2076 2077 movdqa L$xts_magic(%rip),%xmm8 2078 movdqa %xmm2,%xmm15 2079 pshufd $0x5f,%xmm2,%xmm9 2080 pxor %xmm0,%xmm1 2081 movdqa %xmm9,%xmm14 2082 paddd %xmm9,%xmm9 2083 movdqa %xmm15,%xmm10 2084 psrad $31,%xmm14 2085 paddq %xmm15,%xmm15 2086 pand %xmm8,%xmm14 2087 pxor %xmm0,%xmm10 2088 pxor %xmm14,%xmm15 2089 movdqa %xmm9,%xmm14 2090 paddd %xmm9,%xmm9 2091 movdqa %xmm15,%xmm11 2092 psrad $31,%xmm14 2093 paddq %xmm15,%xmm15 2094 pand %xmm8,%xmm14 2095 pxor %xmm0,%xmm11 2096 pxor %xmm14,%xmm15 2097 movdqa %xmm9,%xmm14 2098 paddd %xmm9,%xmm9 2099 movdqa %xmm15,%xmm12 2100 psrad $31,%xmm14 2101 paddq %xmm15,%xmm15 2102 pand %xmm8,%xmm14 2103 pxor %xmm0,%xmm12 2104 pxor %xmm14,%xmm15 2105 movdqa %xmm9,%xmm14 2106 paddd %xmm9,%xmm9 2107 movdqa %xmm15,%xmm13 2108 psrad $31,%xmm14 2109 paddq %xmm15,%xmm15 2110 pand %xmm8,%xmm14 2111 pxor %xmm0,%xmm13 2112 pxor %xmm14,%xmm15 2113 movdqa %xmm15,%xmm14 2114 psrad $31,%xmm9 2115 paddq %xmm15,%xmm15 2116 pand %xmm8,%xmm9 2117 pxor %xmm0,%xmm14 2118 pxor %xmm9,%xmm15 2119 movaps %xmm1,96(%rsp) 2120 2121 subq $96,%rdx 2122 jc L$xts_dec_short 2123 2124 movl $16+96,%eax 2125 leaq 32(%rbp,%r10,1),%rcx 2126 subq %r10,%rax 2127 movups 16(%rbp),%xmm1 2128 movq %rax,%r10 2129 leaq L$xts_magic(%rip),%r8 2130 jmp L$xts_dec_grandloop 2131 2132.p2align 5 2133L$xts_dec_grandloop: 2134 movdqu 0(%rdi),%xmm2 2135 movdqa %xmm0,%xmm8 2136 movdqu 16(%rdi),%xmm3 2137 pxor %xmm10,%xmm2 2138 movdqu 32(%rdi),%xmm4 2139 pxor %xmm11,%xmm3 2140.byte 102,15,56,222,209 2141 movdqu 48(%rdi),%xmm5 2142 pxor %xmm12,%xmm4 2143.byte 102,15,56,222,217 2144 movdqu 64(%rdi),%xmm6 2145 pxor %xmm13,%xmm5 2146.byte 102,15,56,222,225 2147 movdqu 80(%rdi),%xmm7 2148 pxor %xmm15,%xmm8 2149 movdqa 96(%rsp),%xmm9 2150 pxor %xmm14,%xmm6 2151.byte 102,15,56,222,233 2152 movups 32(%rbp),%xmm0 2153 leaq 96(%rdi),%rdi 2154 pxor %xmm8,%xmm7 2155 2156 pxor %xmm9,%xmm10 2157.byte 102,15,56,222,241 2158 pxor %xmm9,%xmm11 2159 movdqa %xmm10,0(%rsp) 2160.byte 102,15,56,222,249 2161 movups 48(%rbp),%xmm1 2162 pxor %xmm9,%xmm12 2163 2164.byte 102,15,56,222,208 2165 pxor %xmm9,%xmm13 2166 movdqa %xmm11,16(%rsp) 2167.byte 102,15,56,222,216 2168 pxor %xmm9,%xmm14 2169 movdqa %xmm12,32(%rsp) 2170.byte 102,15,56,222,224 2171.byte 102,15,56,222,232 2172 pxor %xmm9,%xmm8 2173 movdqa %xmm14,64(%rsp) 2174.byte 102,15,56,222,240 2175.byte 102,15,56,222,248 2176 movups 64(%rbp),%xmm0 2177 movdqa %xmm8,80(%rsp) 2178 pshufd $0x5f,%xmm15,%xmm9 2179 jmp L$xts_dec_loop6 2180.p2align 5 2181L$xts_dec_loop6: 2182.byte 102,15,56,222,209 2183.byte 102,15,56,222,217 2184.byte 102,15,56,222,225 2185.byte 102,15,56,222,233 2186.byte 102,15,56,222,241 2187.byte 102,15,56,222,249 2188 movups -64(%rcx,%rax,1),%xmm1 2189 addq $32,%rax 2190 2191.byte 102,15,56,222,208 2192.byte 102,15,56,222,216 2193.byte 102,15,56,222,224 2194.byte 102,15,56,222,232 2195.byte 102,15,56,222,240 2196.byte 102,15,56,222,248 2197 movups -80(%rcx,%rax,1),%xmm0 2198 jnz L$xts_dec_loop6 2199 2200 movdqa (%r8),%xmm8 2201 movdqa %xmm9,%xmm14 2202 paddd %xmm9,%xmm9 2203.byte 102,15,56,222,209 2204 paddq %xmm15,%xmm15 2205 psrad $31,%xmm14 2206.byte 102,15,56,222,217 2207 pand %xmm8,%xmm14 2208 movups (%rbp),%xmm10 2209.byte 102,15,56,222,225 2210.byte 102,15,56,222,233 2211.byte 102,15,56,222,241 2212 pxor %xmm14,%xmm15 2213 movaps %xmm10,%xmm11 2214.byte 102,15,56,222,249 2215 movups -64(%rcx),%xmm1 2216 2217 movdqa %xmm9,%xmm14 2218.byte 102,15,56,222,208 2219 paddd %xmm9,%xmm9 2220 pxor %xmm15,%xmm10 2221.byte 102,15,56,222,216 2222 psrad $31,%xmm14 2223 paddq %xmm15,%xmm15 2224.byte 102,15,56,222,224 2225.byte 102,15,56,222,232 2226 pand %xmm8,%xmm14 2227 movaps %xmm11,%xmm12 2228.byte 102,15,56,222,240 2229 pxor %xmm14,%xmm15 2230 movdqa %xmm9,%xmm14 2231.byte 102,15,56,222,248 2232 movups -48(%rcx),%xmm0 2233 2234 paddd %xmm9,%xmm9 2235.byte 102,15,56,222,209 2236 pxor %xmm15,%xmm11 2237 psrad $31,%xmm14 2238.byte 102,15,56,222,217 2239 paddq %xmm15,%xmm15 2240 pand %xmm8,%xmm14 2241.byte 102,15,56,222,225 2242.byte 102,15,56,222,233 2243 movdqa %xmm13,48(%rsp) 2244 pxor %xmm14,%xmm15 2245.byte 102,15,56,222,241 2246 movaps %xmm12,%xmm13 2247 movdqa %xmm9,%xmm14 2248.byte 102,15,56,222,249 2249 movups -32(%rcx),%xmm1 2250 2251 paddd %xmm9,%xmm9 2252.byte 102,15,56,222,208 2253 pxor %xmm15,%xmm12 2254 psrad $31,%xmm14 2255.byte 102,15,56,222,216 2256 paddq %xmm15,%xmm15 2257 pand %xmm8,%xmm14 2258.byte 102,15,56,222,224 2259.byte 102,15,56,222,232 2260.byte 102,15,56,222,240 2261 pxor %xmm14,%xmm15 2262 movaps %xmm13,%xmm14 2263.byte 102,15,56,222,248 2264 2265 movdqa %xmm9,%xmm0 2266 paddd %xmm9,%xmm9 2267.byte 102,15,56,222,209 2268 pxor %xmm15,%xmm13 2269 psrad $31,%xmm0 2270.byte 102,15,56,222,217 2271 paddq %xmm15,%xmm15 2272 pand %xmm8,%xmm0 2273.byte 102,15,56,222,225 2274.byte 102,15,56,222,233 2275 pxor %xmm0,%xmm15 2276 movups (%rbp),%xmm0 2277.byte 102,15,56,222,241 2278.byte 102,15,56,222,249 2279 movups 16(%rbp),%xmm1 2280 2281 pxor %xmm15,%xmm14 2282.byte 102,15,56,223,84,36,0 2283 psrad $31,%xmm9 2284 paddq %xmm15,%xmm15 2285.byte 102,15,56,223,92,36,16 2286.byte 102,15,56,223,100,36,32 2287 pand %xmm8,%xmm9 2288 movq %r10,%rax 2289.byte 102,15,56,223,108,36,48 2290.byte 102,15,56,223,116,36,64 2291.byte 102,15,56,223,124,36,80 2292 pxor %xmm9,%xmm15 2293 2294 leaq 96(%rsi),%rsi 2295 movups %xmm2,-96(%rsi) 2296 movups %xmm3,-80(%rsi) 2297 movups %xmm4,-64(%rsi) 2298 movups %xmm5,-48(%rsi) 2299 movups %xmm6,-32(%rsi) 2300 movups %xmm7,-16(%rsi) 2301 subq $96,%rdx 2302 jnc L$xts_dec_grandloop 2303 2304 movl $16+96,%eax 2305 subl %r10d,%eax 2306 movq %rbp,%rcx 2307 shrl $4,%eax 2308 2309L$xts_dec_short: 2310 2311 movl %eax,%r10d 2312 pxor %xmm0,%xmm10 2313 pxor %xmm0,%xmm11 2314 addq $96,%rdx 2315 jz L$xts_dec_done 2316 2317 pxor %xmm0,%xmm12 2318 cmpq $0x20,%rdx 2319 jb L$xts_dec_one 2320 pxor %xmm0,%xmm13 2321 je L$xts_dec_two 2322 2323 pxor %xmm0,%xmm14 2324 cmpq $0x40,%rdx 2325 jb L$xts_dec_three 2326 je L$xts_dec_four 2327 2328 movdqu (%rdi),%xmm2 2329 movdqu 16(%rdi),%xmm3 2330 movdqu 32(%rdi),%xmm4 2331 pxor %xmm10,%xmm2 2332 movdqu 48(%rdi),%xmm5 2333 pxor %xmm11,%xmm3 2334 movdqu 64(%rdi),%xmm6 2335 leaq 80(%rdi),%rdi 2336 pxor %xmm12,%xmm4 2337 pxor %xmm13,%xmm5 2338 pxor %xmm14,%xmm6 2339 2340 call _aesni_decrypt6 2341 2342 xorps %xmm10,%xmm2 2343 xorps %xmm11,%xmm3 2344 xorps %xmm12,%xmm4 2345 movdqu %xmm2,(%rsi) 2346 xorps %xmm13,%xmm5 2347 movdqu %xmm3,16(%rsi) 2348 xorps %xmm14,%xmm6 2349 movdqu %xmm4,32(%rsi) 2350 pxor %xmm14,%xmm14 2351 movdqu %xmm5,48(%rsi) 2352 pcmpgtd %xmm15,%xmm14 2353 movdqu %xmm6,64(%rsi) 2354 leaq 80(%rsi),%rsi 2355 pshufd $0x13,%xmm14,%xmm11 2356 andq $15,%r9 2357 jz L$xts_dec_ret 2358 2359 movdqa %xmm15,%xmm10 2360 paddq %xmm15,%xmm15 2361 pand %xmm8,%xmm11 2362 pxor %xmm15,%xmm11 2363 jmp L$xts_dec_done2 2364 2365.p2align 4 2366L$xts_dec_one: 2367 movups (%rdi),%xmm2 2368 leaq 16(%rdi),%rdi 2369 xorps %xmm10,%xmm2 2370 movups (%rcx),%xmm0 2371 movups 16(%rcx),%xmm1 2372 leaq 32(%rcx),%rcx 2373 xorps %xmm0,%xmm2 2374L$oop_dec1_12: 2375.byte 102,15,56,222,209 2376 decl %eax 2377 movups (%rcx),%xmm1 2378 leaq 16(%rcx),%rcx 2379 jnz L$oop_dec1_12 2380.byte 102,15,56,223,209 2381 xorps %xmm10,%xmm2 2382 movdqa %xmm11,%xmm10 2383 movups %xmm2,(%rsi) 2384 movdqa %xmm12,%xmm11 2385 leaq 16(%rsi),%rsi 2386 jmp L$xts_dec_done 2387 2388.p2align 4 2389L$xts_dec_two: 2390 movups (%rdi),%xmm2 2391 movups 16(%rdi),%xmm3 2392 leaq 32(%rdi),%rdi 2393 xorps %xmm10,%xmm2 2394 xorps %xmm11,%xmm3 2395 2396 call _aesni_decrypt2 2397 2398 xorps %xmm10,%xmm2 2399 movdqa %xmm12,%xmm10 2400 xorps %xmm11,%xmm3 2401 movdqa %xmm13,%xmm11 2402 movups %xmm2,(%rsi) 2403 movups %xmm3,16(%rsi) 2404 leaq 32(%rsi),%rsi 2405 jmp L$xts_dec_done 2406 2407.p2align 4 2408L$xts_dec_three: 2409 movups (%rdi),%xmm2 2410 movups 16(%rdi),%xmm3 2411 movups 32(%rdi),%xmm4 2412 leaq 48(%rdi),%rdi 2413 xorps %xmm10,%xmm2 2414 xorps %xmm11,%xmm3 2415 xorps %xmm12,%xmm4 2416 2417 call _aesni_decrypt3 2418 2419 xorps %xmm10,%xmm2 2420 movdqa %xmm13,%xmm10 2421 xorps %xmm11,%xmm3 2422 movdqa %xmm14,%xmm11 2423 xorps %xmm12,%xmm4 2424 movups %xmm2,(%rsi) 2425 movups %xmm3,16(%rsi) 2426 movups %xmm4,32(%rsi) 2427 leaq 48(%rsi),%rsi 2428 jmp L$xts_dec_done 2429 2430.p2align 4 2431L$xts_dec_four: 2432 movups (%rdi),%xmm2 2433 movups 16(%rdi),%xmm3 2434 movups 32(%rdi),%xmm4 2435 xorps %xmm10,%xmm2 2436 movups 48(%rdi),%xmm5 2437 leaq 64(%rdi),%rdi 2438 xorps %xmm11,%xmm3 2439 xorps %xmm12,%xmm4 2440 xorps %xmm13,%xmm5 2441 2442 call _aesni_decrypt4 2443 2444 pxor %xmm10,%xmm2 2445 movdqa %xmm14,%xmm10 2446 pxor %xmm11,%xmm3 2447 movdqa %xmm15,%xmm11 2448 pxor %xmm12,%xmm4 2449 movdqu %xmm2,(%rsi) 2450 pxor %xmm13,%xmm5 2451 movdqu %xmm3,16(%rsi) 2452 movdqu %xmm4,32(%rsi) 2453 movdqu %xmm5,48(%rsi) 2454 leaq 64(%rsi),%rsi 2455 jmp L$xts_dec_done 2456 2457.p2align 4 2458L$xts_dec_done: 2459 andq $15,%r9 2460 jz L$xts_dec_ret 2461L$xts_dec_done2: 2462 movq %r9,%rdx 2463 movq %rbp,%rcx 2464 movl %r10d,%eax 2465 2466 movups (%rdi),%xmm2 2467 xorps %xmm11,%xmm2 2468 movups (%rcx),%xmm0 2469 movups 16(%rcx),%xmm1 2470 leaq 32(%rcx),%rcx 2471 xorps %xmm0,%xmm2 2472L$oop_dec1_13: 2473.byte 102,15,56,222,209 2474 decl %eax 2475 movups (%rcx),%xmm1 2476 leaq 16(%rcx),%rcx 2477 jnz L$oop_dec1_13 2478.byte 102,15,56,223,209 2479 xorps %xmm11,%xmm2 2480 movups %xmm2,(%rsi) 2481 2482L$xts_dec_steal: 2483 movzbl 16(%rdi),%eax 2484 movzbl (%rsi),%ecx 2485 leaq 1(%rdi),%rdi 2486 movb %al,(%rsi) 2487 movb %cl,16(%rsi) 2488 leaq 1(%rsi),%rsi 2489 subq $1,%rdx 2490 jnz L$xts_dec_steal 2491 2492 subq %r9,%rsi 2493 movq %rbp,%rcx 2494 movl %r10d,%eax 2495 2496 movups (%rsi),%xmm2 2497 xorps %xmm10,%xmm2 2498 movups (%rcx),%xmm0 2499 movups 16(%rcx),%xmm1 2500 leaq 32(%rcx),%rcx 2501 xorps %xmm0,%xmm2 2502L$oop_dec1_14: 2503.byte 102,15,56,222,209 2504 decl %eax 2505 movups (%rcx),%xmm1 2506 leaq 16(%rcx),%rcx 2507 jnz L$oop_dec1_14 2508.byte 102,15,56,223,209 2509 xorps %xmm10,%xmm2 2510 movups %xmm2,(%rsi) 2511 2512L$xts_dec_ret: 2513 xorps %xmm0,%xmm0 2514 pxor %xmm1,%xmm1 2515 pxor %xmm2,%xmm2 2516 pxor %xmm3,%xmm3 2517 pxor %xmm4,%xmm4 2518 pxor %xmm5,%xmm5 2519 pxor %xmm6,%xmm6 2520 pxor %xmm7,%xmm7 2521 movaps %xmm0,0(%rsp) 2522 pxor %xmm8,%xmm8 2523 movaps %xmm0,16(%rsp) 2524 pxor %xmm9,%xmm9 2525 movaps %xmm0,32(%rsp) 2526 pxor %xmm10,%xmm10 2527 movaps %xmm0,48(%rsp) 2528 pxor %xmm11,%xmm11 2529 movaps %xmm0,64(%rsp) 2530 pxor %xmm12,%xmm12 2531 movaps %xmm0,80(%rsp) 2532 pxor %xmm13,%xmm13 2533 movaps %xmm0,96(%rsp) 2534 pxor %xmm14,%xmm14 2535 pxor %xmm15,%xmm15 2536 movq -8(%r11),%rbp 2537 leaq (%r11),%rsp 2538L$xts_dec_epilogue: 2539 .byte 0xf3,0xc3 2540 2541.globl _aesni_ocb_encrypt 2542.private_extern _aesni_ocb_encrypt 2543 2544.p2align 5 2545_aesni_ocb_encrypt: 2546 leaq (%rsp),%rax 2547 pushq %rbx 2548 pushq %rbp 2549 pushq %r12 2550 pushq %r13 2551 pushq %r14 2552 movq 8(%rax),%rbx 2553 movq 8+8(%rax),%rbp 2554 2555 movl 240(%rcx),%r10d 2556 movq %rcx,%r11 2557 shll $4,%r10d 2558 movups (%rcx),%xmm9 2559 movups 16(%rcx,%r10,1),%xmm1 2560 2561 movdqu (%r9),%xmm15 2562 pxor %xmm1,%xmm9 2563 pxor %xmm1,%xmm15 2564 2565 movl $16+32,%eax 2566 leaq 32(%r11,%r10,1),%rcx 2567 movups 16(%r11),%xmm1 2568 subq %r10,%rax 2569 movq %rax,%r10 2570 2571 movdqu (%rbx),%xmm10 2572 movdqu (%rbp),%xmm8 2573 2574 testq $1,%r8 2575 jnz L$ocb_enc_odd 2576 2577 bsfq %r8,%r12 2578 addq $1,%r8 2579 shlq $4,%r12 2580 movdqu (%rbx,%r12,1),%xmm7 2581 movdqu (%rdi),%xmm2 2582 leaq 16(%rdi),%rdi 2583 2584 call __ocb_encrypt1 2585 2586 movdqa %xmm7,%xmm15 2587 movups %xmm2,(%rsi) 2588 leaq 16(%rsi),%rsi 2589 subq $1,%rdx 2590 jz L$ocb_enc_done 2591 2592L$ocb_enc_odd: 2593 leaq 1(%r8),%r12 2594 leaq 3(%r8),%r13 2595 leaq 5(%r8),%r14 2596 leaq 6(%r8),%r8 2597 bsfq %r12,%r12 2598 bsfq %r13,%r13 2599 bsfq %r14,%r14 2600 shlq $4,%r12 2601 shlq $4,%r13 2602 shlq $4,%r14 2603 2604 subq $6,%rdx 2605 jc L$ocb_enc_short 2606 jmp L$ocb_enc_grandloop 2607 2608.p2align 5 2609L$ocb_enc_grandloop: 2610 movdqu 0(%rdi),%xmm2 2611 movdqu 16(%rdi),%xmm3 2612 movdqu 32(%rdi),%xmm4 2613 movdqu 48(%rdi),%xmm5 2614 movdqu 64(%rdi),%xmm6 2615 movdqu 80(%rdi),%xmm7 2616 leaq 96(%rdi),%rdi 2617 2618 call __ocb_encrypt6 2619 2620 movups %xmm2,0(%rsi) 2621 movups %xmm3,16(%rsi) 2622 movups %xmm4,32(%rsi) 2623 movups %xmm5,48(%rsi) 2624 movups %xmm6,64(%rsi) 2625 movups %xmm7,80(%rsi) 2626 leaq 96(%rsi),%rsi 2627 subq $6,%rdx 2628 jnc L$ocb_enc_grandloop 2629 2630L$ocb_enc_short: 2631 addq $6,%rdx 2632 jz L$ocb_enc_done 2633 2634 movdqu 0(%rdi),%xmm2 2635 cmpq $2,%rdx 2636 jb L$ocb_enc_one 2637 movdqu 16(%rdi),%xmm3 2638 je L$ocb_enc_two 2639 2640 movdqu 32(%rdi),%xmm4 2641 cmpq $4,%rdx 2642 jb L$ocb_enc_three 2643 movdqu 48(%rdi),%xmm5 2644 je L$ocb_enc_four 2645 2646 movdqu 64(%rdi),%xmm6 2647 pxor %xmm7,%xmm7 2648 2649 call __ocb_encrypt6 2650 2651 movdqa %xmm14,%xmm15 2652 movups %xmm2,0(%rsi) 2653 movups %xmm3,16(%rsi) 2654 movups %xmm4,32(%rsi) 2655 movups %xmm5,48(%rsi) 2656 movups %xmm6,64(%rsi) 2657 2658 jmp L$ocb_enc_done 2659 2660.p2align 4 2661L$ocb_enc_one: 2662 movdqa %xmm10,%xmm7 2663 2664 call __ocb_encrypt1 2665 2666 movdqa %xmm7,%xmm15 2667 movups %xmm2,0(%rsi) 2668 jmp L$ocb_enc_done 2669 2670.p2align 4 2671L$ocb_enc_two: 2672 pxor %xmm4,%xmm4 2673 pxor %xmm5,%xmm5 2674 2675 call __ocb_encrypt4 2676 2677 movdqa %xmm11,%xmm15 2678 movups %xmm2,0(%rsi) 2679 movups %xmm3,16(%rsi) 2680 2681 jmp L$ocb_enc_done 2682 2683.p2align 4 2684L$ocb_enc_three: 2685 pxor %xmm5,%xmm5 2686 2687 call __ocb_encrypt4 2688 2689 movdqa %xmm12,%xmm15 2690 movups %xmm2,0(%rsi) 2691 movups %xmm3,16(%rsi) 2692 movups %xmm4,32(%rsi) 2693 2694 jmp L$ocb_enc_done 2695 2696.p2align 4 2697L$ocb_enc_four: 2698 call __ocb_encrypt4 2699 2700 movdqa %xmm13,%xmm15 2701 movups %xmm2,0(%rsi) 2702 movups %xmm3,16(%rsi) 2703 movups %xmm4,32(%rsi) 2704 movups %xmm5,48(%rsi) 2705 2706L$ocb_enc_done: 2707 pxor %xmm0,%xmm15 2708 movdqu %xmm8,(%rbp) 2709 movdqu %xmm15,(%r9) 2710 2711 xorps %xmm0,%xmm0 2712 pxor %xmm1,%xmm1 2713 pxor %xmm2,%xmm2 2714 pxor %xmm3,%xmm3 2715 pxor %xmm4,%xmm4 2716 pxor %xmm5,%xmm5 2717 pxor %xmm6,%xmm6 2718 pxor %xmm7,%xmm7 2719 pxor %xmm8,%xmm8 2720 pxor %xmm9,%xmm9 2721 pxor %xmm10,%xmm10 2722 pxor %xmm11,%xmm11 2723 pxor %xmm12,%xmm12 2724 pxor %xmm13,%xmm13 2725 pxor %xmm14,%xmm14 2726 pxor %xmm15,%xmm15 2727 leaq 40(%rsp),%rax 2728 movq -40(%rax),%r14 2729 movq -32(%rax),%r13 2730 movq -24(%rax),%r12 2731 movq -16(%rax),%rbp 2732 movq -8(%rax),%rbx 2733 leaq (%rax),%rsp 2734L$ocb_enc_epilogue: 2735 .byte 0xf3,0xc3 2736 2737 2738 2739.p2align 5 2740__ocb_encrypt6: 2741 pxor %xmm9,%xmm15 2742 movdqu (%rbx,%r12,1),%xmm11 2743 movdqa %xmm10,%xmm12 2744 movdqu (%rbx,%r13,1),%xmm13 2745 movdqa %xmm10,%xmm14 2746 pxor %xmm15,%xmm10 2747 movdqu (%rbx,%r14,1),%xmm15 2748 pxor %xmm10,%xmm11 2749 pxor %xmm2,%xmm8 2750 pxor %xmm10,%xmm2 2751 pxor %xmm11,%xmm12 2752 pxor %xmm3,%xmm8 2753 pxor %xmm11,%xmm3 2754 pxor %xmm12,%xmm13 2755 pxor %xmm4,%xmm8 2756 pxor %xmm12,%xmm4 2757 pxor %xmm13,%xmm14 2758 pxor %xmm5,%xmm8 2759 pxor %xmm13,%xmm5 2760 pxor %xmm14,%xmm15 2761 pxor %xmm6,%xmm8 2762 pxor %xmm14,%xmm6 2763 pxor %xmm7,%xmm8 2764 pxor %xmm15,%xmm7 2765 movups 32(%r11),%xmm0 2766 2767 leaq 1(%r8),%r12 2768 leaq 3(%r8),%r13 2769 leaq 5(%r8),%r14 2770 addq $6,%r8 2771 pxor %xmm9,%xmm10 2772 bsfq %r12,%r12 2773 bsfq %r13,%r13 2774 bsfq %r14,%r14 2775 2776.byte 102,15,56,220,209 2777.byte 102,15,56,220,217 2778.byte 102,15,56,220,225 2779.byte 102,15,56,220,233 2780 pxor %xmm9,%xmm11 2781 pxor %xmm9,%xmm12 2782.byte 102,15,56,220,241 2783 pxor %xmm9,%xmm13 2784 pxor %xmm9,%xmm14 2785.byte 102,15,56,220,249 2786 movups 48(%r11),%xmm1 2787 pxor %xmm9,%xmm15 2788 2789.byte 102,15,56,220,208 2790.byte 102,15,56,220,216 2791.byte 102,15,56,220,224 2792.byte 102,15,56,220,232 2793.byte 102,15,56,220,240 2794.byte 102,15,56,220,248 2795 movups 64(%r11),%xmm0 2796 shlq $4,%r12 2797 shlq $4,%r13 2798 jmp L$ocb_enc_loop6 2799 2800.p2align 5 2801L$ocb_enc_loop6: 2802.byte 102,15,56,220,209 2803.byte 102,15,56,220,217 2804.byte 102,15,56,220,225 2805.byte 102,15,56,220,233 2806.byte 102,15,56,220,241 2807.byte 102,15,56,220,249 2808 movups (%rcx,%rax,1),%xmm1 2809 addq $32,%rax 2810 2811.byte 102,15,56,220,208 2812.byte 102,15,56,220,216 2813.byte 102,15,56,220,224 2814.byte 102,15,56,220,232 2815.byte 102,15,56,220,240 2816.byte 102,15,56,220,248 2817 movups -16(%rcx,%rax,1),%xmm0 2818 jnz L$ocb_enc_loop6 2819 2820.byte 102,15,56,220,209 2821.byte 102,15,56,220,217 2822.byte 102,15,56,220,225 2823.byte 102,15,56,220,233 2824.byte 102,15,56,220,241 2825.byte 102,15,56,220,249 2826 movups 16(%r11),%xmm1 2827 shlq $4,%r14 2828 2829.byte 102,65,15,56,221,210 2830 movdqu (%rbx),%xmm10 2831 movq %r10,%rax 2832.byte 102,65,15,56,221,219 2833.byte 102,65,15,56,221,228 2834.byte 102,65,15,56,221,237 2835.byte 102,65,15,56,221,246 2836.byte 102,65,15,56,221,255 2837 .byte 0xf3,0xc3 2838 2839 2840 2841.p2align 5 2842__ocb_encrypt4: 2843 pxor %xmm9,%xmm15 2844 movdqu (%rbx,%r12,1),%xmm11 2845 movdqa %xmm10,%xmm12 2846 movdqu (%rbx,%r13,1),%xmm13 2847 pxor %xmm15,%xmm10 2848 pxor %xmm10,%xmm11 2849 pxor %xmm2,%xmm8 2850 pxor %xmm10,%xmm2 2851 pxor %xmm11,%xmm12 2852 pxor %xmm3,%xmm8 2853 pxor %xmm11,%xmm3 2854 pxor %xmm12,%xmm13 2855 pxor %xmm4,%xmm8 2856 pxor %xmm12,%xmm4 2857 pxor %xmm5,%xmm8 2858 pxor %xmm13,%xmm5 2859 movups 32(%r11),%xmm0 2860 2861 pxor %xmm9,%xmm10 2862 pxor %xmm9,%xmm11 2863 pxor %xmm9,%xmm12 2864 pxor %xmm9,%xmm13 2865 2866.byte 102,15,56,220,209 2867.byte 102,15,56,220,217 2868.byte 102,15,56,220,225 2869.byte 102,15,56,220,233 2870 movups 48(%r11),%xmm1 2871 2872.byte 102,15,56,220,208 2873.byte 102,15,56,220,216 2874.byte 102,15,56,220,224 2875.byte 102,15,56,220,232 2876 movups 64(%r11),%xmm0 2877 jmp L$ocb_enc_loop4 2878 2879.p2align 5 2880L$ocb_enc_loop4: 2881.byte 102,15,56,220,209 2882.byte 102,15,56,220,217 2883.byte 102,15,56,220,225 2884.byte 102,15,56,220,233 2885 movups (%rcx,%rax,1),%xmm1 2886 addq $32,%rax 2887 2888.byte 102,15,56,220,208 2889.byte 102,15,56,220,216 2890.byte 102,15,56,220,224 2891.byte 102,15,56,220,232 2892 movups -16(%rcx,%rax,1),%xmm0 2893 jnz L$ocb_enc_loop4 2894 2895.byte 102,15,56,220,209 2896.byte 102,15,56,220,217 2897.byte 102,15,56,220,225 2898.byte 102,15,56,220,233 2899 movups 16(%r11),%xmm1 2900 movq %r10,%rax 2901 2902.byte 102,65,15,56,221,210 2903.byte 102,65,15,56,221,219 2904.byte 102,65,15,56,221,228 2905.byte 102,65,15,56,221,237 2906 .byte 0xf3,0xc3 2907 2908 2909 2910.p2align 5 2911__ocb_encrypt1: 2912 pxor %xmm15,%xmm7 2913 pxor %xmm9,%xmm7 2914 pxor %xmm2,%xmm8 2915 pxor %xmm7,%xmm2 2916 movups 32(%r11),%xmm0 2917 2918.byte 102,15,56,220,209 2919 movups 48(%r11),%xmm1 2920 pxor %xmm9,%xmm7 2921 2922.byte 102,15,56,220,208 2923 movups 64(%r11),%xmm0 2924 jmp L$ocb_enc_loop1 2925 2926.p2align 5 2927L$ocb_enc_loop1: 2928.byte 102,15,56,220,209 2929 movups (%rcx,%rax,1),%xmm1 2930 addq $32,%rax 2931 2932.byte 102,15,56,220,208 2933 movups -16(%rcx,%rax,1),%xmm0 2934 jnz L$ocb_enc_loop1 2935 2936.byte 102,15,56,220,209 2937 movups 16(%r11),%xmm1 2938 movq %r10,%rax 2939 2940.byte 102,15,56,221,215 2941 .byte 0xf3,0xc3 2942 2943 2944.globl _aesni_ocb_decrypt 2945.private_extern _aesni_ocb_decrypt 2946 2947.p2align 5 2948_aesni_ocb_decrypt: 2949 leaq (%rsp),%rax 2950 pushq %rbx 2951 pushq %rbp 2952 pushq %r12 2953 pushq %r13 2954 pushq %r14 2955 movq 8(%rax),%rbx 2956 movq 8+8(%rax),%rbp 2957 2958 movl 240(%rcx),%r10d 2959 movq %rcx,%r11 2960 shll $4,%r10d 2961 movups (%rcx),%xmm9 2962 movups 16(%rcx,%r10,1),%xmm1 2963 2964 movdqu (%r9),%xmm15 2965 pxor %xmm1,%xmm9 2966 pxor %xmm1,%xmm15 2967 2968 movl $16+32,%eax 2969 leaq 32(%r11,%r10,1),%rcx 2970 movups 16(%r11),%xmm1 2971 subq %r10,%rax 2972 movq %rax,%r10 2973 2974 movdqu (%rbx),%xmm10 2975 movdqu (%rbp),%xmm8 2976 2977 testq $1,%r8 2978 jnz L$ocb_dec_odd 2979 2980 bsfq %r8,%r12 2981 addq $1,%r8 2982 shlq $4,%r12 2983 movdqu (%rbx,%r12,1),%xmm7 2984 movdqu (%rdi),%xmm2 2985 leaq 16(%rdi),%rdi 2986 2987 call __ocb_decrypt1 2988 2989 movdqa %xmm7,%xmm15 2990 movups %xmm2,(%rsi) 2991 xorps %xmm2,%xmm8 2992 leaq 16(%rsi),%rsi 2993 subq $1,%rdx 2994 jz L$ocb_dec_done 2995 2996L$ocb_dec_odd: 2997 leaq 1(%r8),%r12 2998 leaq 3(%r8),%r13 2999 leaq 5(%r8),%r14 3000 leaq 6(%r8),%r8 3001 bsfq %r12,%r12 3002 bsfq %r13,%r13 3003 bsfq %r14,%r14 3004 shlq $4,%r12 3005 shlq $4,%r13 3006 shlq $4,%r14 3007 3008 subq $6,%rdx 3009 jc L$ocb_dec_short 3010 jmp L$ocb_dec_grandloop 3011 3012.p2align 5 3013L$ocb_dec_grandloop: 3014 movdqu 0(%rdi),%xmm2 3015 movdqu 16(%rdi),%xmm3 3016 movdqu 32(%rdi),%xmm4 3017 movdqu 48(%rdi),%xmm5 3018 movdqu 64(%rdi),%xmm6 3019 movdqu 80(%rdi),%xmm7 3020 leaq 96(%rdi),%rdi 3021 3022 call __ocb_decrypt6 3023 3024 movups %xmm2,0(%rsi) 3025 pxor %xmm2,%xmm8 3026 movups %xmm3,16(%rsi) 3027 pxor %xmm3,%xmm8 3028 movups %xmm4,32(%rsi) 3029 pxor %xmm4,%xmm8 3030 movups %xmm5,48(%rsi) 3031 pxor %xmm5,%xmm8 3032 movups %xmm6,64(%rsi) 3033 pxor %xmm6,%xmm8 3034 movups %xmm7,80(%rsi) 3035 pxor %xmm7,%xmm8 3036 leaq 96(%rsi),%rsi 3037 subq $6,%rdx 3038 jnc L$ocb_dec_grandloop 3039 3040L$ocb_dec_short: 3041 addq $6,%rdx 3042 jz L$ocb_dec_done 3043 3044 movdqu 0(%rdi),%xmm2 3045 cmpq $2,%rdx 3046 jb L$ocb_dec_one 3047 movdqu 16(%rdi),%xmm3 3048 je L$ocb_dec_two 3049 3050 movdqu 32(%rdi),%xmm4 3051 cmpq $4,%rdx 3052 jb L$ocb_dec_three 3053 movdqu 48(%rdi),%xmm5 3054 je L$ocb_dec_four 3055 3056 movdqu 64(%rdi),%xmm6 3057 pxor %xmm7,%xmm7 3058 3059 call __ocb_decrypt6 3060 3061 movdqa %xmm14,%xmm15 3062 movups %xmm2,0(%rsi) 3063 pxor %xmm2,%xmm8 3064 movups %xmm3,16(%rsi) 3065 pxor %xmm3,%xmm8 3066 movups %xmm4,32(%rsi) 3067 pxor %xmm4,%xmm8 3068 movups %xmm5,48(%rsi) 3069 pxor %xmm5,%xmm8 3070 movups %xmm6,64(%rsi) 3071 pxor %xmm6,%xmm8 3072 3073 jmp L$ocb_dec_done 3074 3075.p2align 4 3076L$ocb_dec_one: 3077 movdqa %xmm10,%xmm7 3078 3079 call __ocb_decrypt1 3080 3081 movdqa %xmm7,%xmm15 3082 movups %xmm2,0(%rsi) 3083 xorps %xmm2,%xmm8 3084 jmp L$ocb_dec_done 3085 3086.p2align 4 3087L$ocb_dec_two: 3088 pxor %xmm4,%xmm4 3089 pxor %xmm5,%xmm5 3090 3091 call __ocb_decrypt4 3092 3093 movdqa %xmm11,%xmm15 3094 movups %xmm2,0(%rsi) 3095 xorps %xmm2,%xmm8 3096 movups %xmm3,16(%rsi) 3097 xorps %xmm3,%xmm8 3098 3099 jmp L$ocb_dec_done 3100 3101.p2align 4 3102L$ocb_dec_three: 3103 pxor %xmm5,%xmm5 3104 3105 call __ocb_decrypt4 3106 3107 movdqa %xmm12,%xmm15 3108 movups %xmm2,0(%rsi) 3109 xorps %xmm2,%xmm8 3110 movups %xmm3,16(%rsi) 3111 xorps %xmm3,%xmm8 3112 movups %xmm4,32(%rsi) 3113 xorps %xmm4,%xmm8 3114 3115 jmp L$ocb_dec_done 3116 3117.p2align 4 3118L$ocb_dec_four: 3119 call __ocb_decrypt4 3120 3121 movdqa %xmm13,%xmm15 3122 movups %xmm2,0(%rsi) 3123 pxor %xmm2,%xmm8 3124 movups %xmm3,16(%rsi) 3125 pxor %xmm3,%xmm8 3126 movups %xmm4,32(%rsi) 3127 pxor %xmm4,%xmm8 3128 movups %xmm5,48(%rsi) 3129 pxor %xmm5,%xmm8 3130 3131L$ocb_dec_done: 3132 pxor %xmm0,%xmm15 3133 movdqu %xmm8,(%rbp) 3134 movdqu %xmm15,(%r9) 3135 3136 xorps %xmm0,%xmm0 3137 pxor %xmm1,%xmm1 3138 pxor %xmm2,%xmm2 3139 pxor %xmm3,%xmm3 3140 pxor %xmm4,%xmm4 3141 pxor %xmm5,%xmm5 3142 pxor %xmm6,%xmm6 3143 pxor %xmm7,%xmm7 3144 pxor %xmm8,%xmm8 3145 pxor %xmm9,%xmm9 3146 pxor %xmm10,%xmm10 3147 pxor %xmm11,%xmm11 3148 pxor %xmm12,%xmm12 3149 pxor %xmm13,%xmm13 3150 pxor %xmm14,%xmm14 3151 pxor %xmm15,%xmm15 3152 leaq 40(%rsp),%rax 3153 movq -40(%rax),%r14 3154 movq -32(%rax),%r13 3155 movq -24(%rax),%r12 3156 movq -16(%rax),%rbp 3157 movq -8(%rax),%rbx 3158 leaq (%rax),%rsp 3159L$ocb_dec_epilogue: 3160 .byte 0xf3,0xc3 3161 3162 3163 3164.p2align 5 3165__ocb_decrypt6: 3166 pxor %xmm9,%xmm15 3167 movdqu (%rbx,%r12,1),%xmm11 3168 movdqa %xmm10,%xmm12 3169 movdqu (%rbx,%r13,1),%xmm13 3170 movdqa %xmm10,%xmm14 3171 pxor %xmm15,%xmm10 3172 movdqu (%rbx,%r14,1),%xmm15 3173 pxor %xmm10,%xmm11 3174 pxor %xmm10,%xmm2 3175 pxor %xmm11,%xmm12 3176 pxor %xmm11,%xmm3 3177 pxor %xmm12,%xmm13 3178 pxor %xmm12,%xmm4 3179 pxor %xmm13,%xmm14 3180 pxor %xmm13,%xmm5 3181 pxor %xmm14,%xmm15 3182 pxor %xmm14,%xmm6 3183 pxor %xmm15,%xmm7 3184 movups 32(%r11),%xmm0 3185 3186 leaq 1(%r8),%r12 3187 leaq 3(%r8),%r13 3188 leaq 5(%r8),%r14 3189 addq $6,%r8 3190 pxor %xmm9,%xmm10 3191 bsfq %r12,%r12 3192 bsfq %r13,%r13 3193 bsfq %r14,%r14 3194 3195.byte 102,15,56,222,209 3196.byte 102,15,56,222,217 3197.byte 102,15,56,222,225 3198.byte 102,15,56,222,233 3199 pxor %xmm9,%xmm11 3200 pxor %xmm9,%xmm12 3201.byte 102,15,56,222,241 3202 pxor %xmm9,%xmm13 3203 pxor %xmm9,%xmm14 3204.byte 102,15,56,222,249 3205 movups 48(%r11),%xmm1 3206 pxor %xmm9,%xmm15 3207 3208.byte 102,15,56,222,208 3209.byte 102,15,56,222,216 3210.byte 102,15,56,222,224 3211.byte 102,15,56,222,232 3212.byte 102,15,56,222,240 3213.byte 102,15,56,222,248 3214 movups 64(%r11),%xmm0 3215 shlq $4,%r12 3216 shlq $4,%r13 3217 jmp L$ocb_dec_loop6 3218 3219.p2align 5 3220L$ocb_dec_loop6: 3221.byte 102,15,56,222,209 3222.byte 102,15,56,222,217 3223.byte 102,15,56,222,225 3224.byte 102,15,56,222,233 3225.byte 102,15,56,222,241 3226.byte 102,15,56,222,249 3227 movups (%rcx,%rax,1),%xmm1 3228 addq $32,%rax 3229 3230.byte 102,15,56,222,208 3231.byte 102,15,56,222,216 3232.byte 102,15,56,222,224 3233.byte 102,15,56,222,232 3234.byte 102,15,56,222,240 3235.byte 102,15,56,222,248 3236 movups -16(%rcx,%rax,1),%xmm0 3237 jnz L$ocb_dec_loop6 3238 3239.byte 102,15,56,222,209 3240.byte 102,15,56,222,217 3241.byte 102,15,56,222,225 3242.byte 102,15,56,222,233 3243.byte 102,15,56,222,241 3244.byte 102,15,56,222,249 3245 movups 16(%r11),%xmm1 3246 shlq $4,%r14 3247 3248.byte 102,65,15,56,223,210 3249 movdqu (%rbx),%xmm10 3250 movq %r10,%rax 3251.byte 102,65,15,56,223,219 3252.byte 102,65,15,56,223,228 3253.byte 102,65,15,56,223,237 3254.byte 102,65,15,56,223,246 3255.byte 102,65,15,56,223,255 3256 .byte 0xf3,0xc3 3257 3258 3259 3260.p2align 5 3261__ocb_decrypt4: 3262 pxor %xmm9,%xmm15 3263 movdqu (%rbx,%r12,1),%xmm11 3264 movdqa %xmm10,%xmm12 3265 movdqu (%rbx,%r13,1),%xmm13 3266 pxor %xmm15,%xmm10 3267 pxor %xmm10,%xmm11 3268 pxor %xmm10,%xmm2 3269 pxor %xmm11,%xmm12 3270 pxor %xmm11,%xmm3 3271 pxor %xmm12,%xmm13 3272 pxor %xmm12,%xmm4 3273 pxor %xmm13,%xmm5 3274 movups 32(%r11),%xmm0 3275 3276 pxor %xmm9,%xmm10 3277 pxor %xmm9,%xmm11 3278 pxor %xmm9,%xmm12 3279 pxor %xmm9,%xmm13 3280 3281.byte 102,15,56,222,209 3282.byte 102,15,56,222,217 3283.byte 102,15,56,222,225 3284.byte 102,15,56,222,233 3285 movups 48(%r11),%xmm1 3286 3287.byte 102,15,56,222,208 3288.byte 102,15,56,222,216 3289.byte 102,15,56,222,224 3290.byte 102,15,56,222,232 3291 movups 64(%r11),%xmm0 3292 jmp L$ocb_dec_loop4 3293 3294.p2align 5 3295L$ocb_dec_loop4: 3296.byte 102,15,56,222,209 3297.byte 102,15,56,222,217 3298.byte 102,15,56,222,225 3299.byte 102,15,56,222,233 3300 movups (%rcx,%rax,1),%xmm1 3301 addq $32,%rax 3302 3303.byte 102,15,56,222,208 3304.byte 102,15,56,222,216 3305.byte 102,15,56,222,224 3306.byte 102,15,56,222,232 3307 movups -16(%rcx,%rax,1),%xmm0 3308 jnz L$ocb_dec_loop4 3309 3310.byte 102,15,56,222,209 3311.byte 102,15,56,222,217 3312.byte 102,15,56,222,225 3313.byte 102,15,56,222,233 3314 movups 16(%r11),%xmm1 3315 movq %r10,%rax 3316 3317.byte 102,65,15,56,223,210 3318.byte 102,65,15,56,223,219 3319.byte 102,65,15,56,223,228 3320.byte 102,65,15,56,223,237 3321 .byte 0xf3,0xc3 3322 3323 3324 3325.p2align 5 3326__ocb_decrypt1: 3327 pxor %xmm15,%xmm7 3328 pxor %xmm9,%xmm7 3329 pxor %xmm7,%xmm2 3330 movups 32(%r11),%xmm0 3331 3332.byte 102,15,56,222,209 3333 movups 48(%r11),%xmm1 3334 pxor %xmm9,%xmm7 3335 3336.byte 102,15,56,222,208 3337 movups 64(%r11),%xmm0 3338 jmp L$ocb_dec_loop1 3339 3340.p2align 5 3341L$ocb_dec_loop1: 3342.byte 102,15,56,222,209 3343 movups (%rcx,%rax,1),%xmm1 3344 addq $32,%rax 3345 3346.byte 102,15,56,222,208 3347 movups -16(%rcx,%rax,1),%xmm0 3348 jnz L$ocb_dec_loop1 3349 3350.byte 102,15,56,222,209 3351 movups 16(%r11),%xmm1 3352 movq %r10,%rax 3353 3354.byte 102,15,56,223,215 3355 .byte 0xf3,0xc3 3356 3357.globl _aesni_cbc_encrypt 3358.private_extern _aesni_cbc_encrypt 3359 3360.p2align 4 3361_aesni_cbc_encrypt: 3362 testq %rdx,%rdx 3363 jz L$cbc_ret 3364 3365 movl 240(%rcx),%r10d 3366 movq %rcx,%r11 3367 testl %r9d,%r9d 3368 jz L$cbc_decrypt 3369 3370 movups (%r8),%xmm2 3371 movl %r10d,%eax 3372 cmpq $16,%rdx 3373 jb L$cbc_enc_tail 3374 subq $16,%rdx 3375 jmp L$cbc_enc_loop 3376.p2align 4 3377L$cbc_enc_loop: 3378 movups (%rdi),%xmm3 3379 leaq 16(%rdi),%rdi 3380 3381 movups (%rcx),%xmm0 3382 movups 16(%rcx),%xmm1 3383 xorps %xmm0,%xmm3 3384 leaq 32(%rcx),%rcx 3385 xorps %xmm3,%xmm2 3386L$oop_enc1_15: 3387.byte 102,15,56,220,209 3388 decl %eax 3389 movups (%rcx),%xmm1 3390 leaq 16(%rcx),%rcx 3391 jnz L$oop_enc1_15 3392.byte 102,15,56,221,209 3393 movl %r10d,%eax 3394 movq %r11,%rcx 3395 movups %xmm2,0(%rsi) 3396 leaq 16(%rsi),%rsi 3397 subq $16,%rdx 3398 jnc L$cbc_enc_loop 3399 addq $16,%rdx 3400 jnz L$cbc_enc_tail 3401 pxor %xmm0,%xmm0 3402 pxor %xmm1,%xmm1 3403 movups %xmm2,(%r8) 3404 pxor %xmm2,%xmm2 3405 pxor %xmm3,%xmm3 3406 jmp L$cbc_ret 3407 3408L$cbc_enc_tail: 3409 movq %rdx,%rcx 3410 xchgq %rdi,%rsi 3411.long 0x9066A4F3 3412 movl $16,%ecx 3413 subq %rdx,%rcx 3414 xorl %eax,%eax 3415.long 0x9066AAF3 3416 leaq -16(%rdi),%rdi 3417 movl %r10d,%eax 3418 movq %rdi,%rsi 3419 movq %r11,%rcx 3420 xorq %rdx,%rdx 3421 jmp L$cbc_enc_loop 3422 3423.p2align 4 3424L$cbc_decrypt: 3425 cmpq $16,%rdx 3426 jne L$cbc_decrypt_bulk 3427 3428 3429 3430 movdqu (%rdi),%xmm2 3431 movdqu (%r8),%xmm3 3432 movdqa %xmm2,%xmm4 3433 movups (%rcx),%xmm0 3434 movups 16(%rcx),%xmm1 3435 leaq 32(%rcx),%rcx 3436 xorps %xmm0,%xmm2 3437L$oop_dec1_16: 3438.byte 102,15,56,222,209 3439 decl %r10d 3440 movups (%rcx),%xmm1 3441 leaq 16(%rcx),%rcx 3442 jnz L$oop_dec1_16 3443.byte 102,15,56,223,209 3444 pxor %xmm0,%xmm0 3445 pxor %xmm1,%xmm1 3446 movdqu %xmm4,(%r8) 3447 xorps %xmm3,%xmm2 3448 pxor %xmm3,%xmm3 3449 movups %xmm2,(%rsi) 3450 pxor %xmm2,%xmm2 3451 jmp L$cbc_ret 3452.p2align 4 3453L$cbc_decrypt_bulk: 3454 leaq (%rsp),%r11 3455 pushq %rbp 3456 subq $16,%rsp 3457 andq $-16,%rsp 3458 movq %rcx,%rbp 3459 movups (%r8),%xmm10 3460 movl %r10d,%eax 3461 cmpq $0x50,%rdx 3462 jbe L$cbc_dec_tail 3463 3464 movups (%rcx),%xmm0 3465 movdqu 0(%rdi),%xmm2 3466 movdqu 16(%rdi),%xmm3 3467 movdqa %xmm2,%xmm11 3468 movdqu 32(%rdi),%xmm4 3469 movdqa %xmm3,%xmm12 3470 movdqu 48(%rdi),%xmm5 3471 movdqa %xmm4,%xmm13 3472 movdqu 64(%rdi),%xmm6 3473 movdqa %xmm5,%xmm14 3474 movdqu 80(%rdi),%xmm7 3475 movdqa %xmm6,%xmm15 3476 leaq _OPENSSL_ia32cap_P(%rip),%r9 3477 movl 4(%r9),%r9d 3478 cmpq $0x70,%rdx 3479 jbe L$cbc_dec_six_or_seven 3480 3481 andl $71303168,%r9d 3482 subq $0x50,%rdx 3483 cmpl $4194304,%r9d 3484 je L$cbc_dec_loop6_enter 3485 subq $0x20,%rdx 3486 leaq 112(%rcx),%rcx 3487 jmp L$cbc_dec_loop8_enter 3488.p2align 4 3489L$cbc_dec_loop8: 3490 movups %xmm9,(%rsi) 3491 leaq 16(%rsi),%rsi 3492L$cbc_dec_loop8_enter: 3493 movdqu 96(%rdi),%xmm8 3494 pxor %xmm0,%xmm2 3495 movdqu 112(%rdi),%xmm9 3496 pxor %xmm0,%xmm3 3497 movups 16-112(%rcx),%xmm1 3498 pxor %xmm0,%xmm4 3499 movq $-1,%rbp 3500 cmpq $0x70,%rdx 3501 pxor %xmm0,%xmm5 3502 pxor %xmm0,%xmm6 3503 pxor %xmm0,%xmm7 3504 pxor %xmm0,%xmm8 3505 3506.byte 102,15,56,222,209 3507 pxor %xmm0,%xmm9 3508 movups 32-112(%rcx),%xmm0 3509.byte 102,15,56,222,217 3510.byte 102,15,56,222,225 3511.byte 102,15,56,222,233 3512.byte 102,15,56,222,241 3513.byte 102,15,56,222,249 3514.byte 102,68,15,56,222,193 3515 adcq $0,%rbp 3516 andq $128,%rbp 3517.byte 102,68,15,56,222,201 3518 addq %rdi,%rbp 3519 movups 48-112(%rcx),%xmm1 3520.byte 102,15,56,222,208 3521.byte 102,15,56,222,216 3522.byte 102,15,56,222,224 3523.byte 102,15,56,222,232 3524.byte 102,15,56,222,240 3525.byte 102,15,56,222,248 3526.byte 102,68,15,56,222,192 3527.byte 102,68,15,56,222,200 3528 movups 64-112(%rcx),%xmm0 3529 nop 3530.byte 102,15,56,222,209 3531.byte 102,15,56,222,217 3532.byte 102,15,56,222,225 3533.byte 102,15,56,222,233 3534.byte 102,15,56,222,241 3535.byte 102,15,56,222,249 3536.byte 102,68,15,56,222,193 3537.byte 102,68,15,56,222,201 3538 movups 80-112(%rcx),%xmm1 3539 nop 3540.byte 102,15,56,222,208 3541.byte 102,15,56,222,216 3542.byte 102,15,56,222,224 3543.byte 102,15,56,222,232 3544.byte 102,15,56,222,240 3545.byte 102,15,56,222,248 3546.byte 102,68,15,56,222,192 3547.byte 102,68,15,56,222,200 3548 movups 96-112(%rcx),%xmm0 3549 nop 3550.byte 102,15,56,222,209 3551.byte 102,15,56,222,217 3552.byte 102,15,56,222,225 3553.byte 102,15,56,222,233 3554.byte 102,15,56,222,241 3555.byte 102,15,56,222,249 3556.byte 102,68,15,56,222,193 3557.byte 102,68,15,56,222,201 3558 movups 112-112(%rcx),%xmm1 3559 nop 3560.byte 102,15,56,222,208 3561.byte 102,15,56,222,216 3562.byte 102,15,56,222,224 3563.byte 102,15,56,222,232 3564.byte 102,15,56,222,240 3565.byte 102,15,56,222,248 3566.byte 102,68,15,56,222,192 3567.byte 102,68,15,56,222,200 3568 movups 128-112(%rcx),%xmm0 3569 nop 3570.byte 102,15,56,222,209 3571.byte 102,15,56,222,217 3572.byte 102,15,56,222,225 3573.byte 102,15,56,222,233 3574.byte 102,15,56,222,241 3575.byte 102,15,56,222,249 3576.byte 102,68,15,56,222,193 3577.byte 102,68,15,56,222,201 3578 movups 144-112(%rcx),%xmm1 3579 cmpl $11,%eax 3580.byte 102,15,56,222,208 3581.byte 102,15,56,222,216 3582.byte 102,15,56,222,224 3583.byte 102,15,56,222,232 3584.byte 102,15,56,222,240 3585.byte 102,15,56,222,248 3586.byte 102,68,15,56,222,192 3587.byte 102,68,15,56,222,200 3588 movups 160-112(%rcx),%xmm0 3589 jb L$cbc_dec_done 3590.byte 102,15,56,222,209 3591.byte 102,15,56,222,217 3592.byte 102,15,56,222,225 3593.byte 102,15,56,222,233 3594.byte 102,15,56,222,241 3595.byte 102,15,56,222,249 3596.byte 102,68,15,56,222,193 3597.byte 102,68,15,56,222,201 3598 movups 176-112(%rcx),%xmm1 3599 nop 3600.byte 102,15,56,222,208 3601.byte 102,15,56,222,216 3602.byte 102,15,56,222,224 3603.byte 102,15,56,222,232 3604.byte 102,15,56,222,240 3605.byte 102,15,56,222,248 3606.byte 102,68,15,56,222,192 3607.byte 102,68,15,56,222,200 3608 movups 192-112(%rcx),%xmm0 3609 je L$cbc_dec_done 3610.byte 102,15,56,222,209 3611.byte 102,15,56,222,217 3612.byte 102,15,56,222,225 3613.byte 102,15,56,222,233 3614.byte 102,15,56,222,241 3615.byte 102,15,56,222,249 3616.byte 102,68,15,56,222,193 3617.byte 102,68,15,56,222,201 3618 movups 208-112(%rcx),%xmm1 3619 nop 3620.byte 102,15,56,222,208 3621.byte 102,15,56,222,216 3622.byte 102,15,56,222,224 3623.byte 102,15,56,222,232 3624.byte 102,15,56,222,240 3625.byte 102,15,56,222,248 3626.byte 102,68,15,56,222,192 3627.byte 102,68,15,56,222,200 3628 movups 224-112(%rcx),%xmm0 3629 jmp L$cbc_dec_done 3630.p2align 4 3631L$cbc_dec_done: 3632.byte 102,15,56,222,209 3633.byte 102,15,56,222,217 3634 pxor %xmm0,%xmm10 3635 pxor %xmm0,%xmm11 3636.byte 102,15,56,222,225 3637.byte 102,15,56,222,233 3638 pxor %xmm0,%xmm12 3639 pxor %xmm0,%xmm13 3640.byte 102,15,56,222,241 3641.byte 102,15,56,222,249 3642 pxor %xmm0,%xmm14 3643 pxor %xmm0,%xmm15 3644.byte 102,68,15,56,222,193 3645.byte 102,68,15,56,222,201 3646 movdqu 80(%rdi),%xmm1 3647 3648.byte 102,65,15,56,223,210 3649 movdqu 96(%rdi),%xmm10 3650 pxor %xmm0,%xmm1 3651.byte 102,65,15,56,223,219 3652 pxor %xmm0,%xmm10 3653 movdqu 112(%rdi),%xmm0 3654.byte 102,65,15,56,223,228 3655 leaq 128(%rdi),%rdi 3656 movdqu 0(%rbp),%xmm11 3657.byte 102,65,15,56,223,237 3658.byte 102,65,15,56,223,246 3659 movdqu 16(%rbp),%xmm12 3660 movdqu 32(%rbp),%xmm13 3661.byte 102,65,15,56,223,255 3662.byte 102,68,15,56,223,193 3663 movdqu 48(%rbp),%xmm14 3664 movdqu 64(%rbp),%xmm15 3665.byte 102,69,15,56,223,202 3666 movdqa %xmm0,%xmm10 3667 movdqu 80(%rbp),%xmm1 3668 movups -112(%rcx),%xmm0 3669 3670 movups %xmm2,(%rsi) 3671 movdqa %xmm11,%xmm2 3672 movups %xmm3,16(%rsi) 3673 movdqa %xmm12,%xmm3 3674 movups %xmm4,32(%rsi) 3675 movdqa %xmm13,%xmm4 3676 movups %xmm5,48(%rsi) 3677 movdqa %xmm14,%xmm5 3678 movups %xmm6,64(%rsi) 3679 movdqa %xmm15,%xmm6 3680 movups %xmm7,80(%rsi) 3681 movdqa %xmm1,%xmm7 3682 movups %xmm8,96(%rsi) 3683 leaq 112(%rsi),%rsi 3684 3685 subq $0x80,%rdx 3686 ja L$cbc_dec_loop8 3687 3688 movaps %xmm9,%xmm2 3689 leaq -112(%rcx),%rcx 3690 addq $0x70,%rdx 3691 jle L$cbc_dec_clear_tail_collected 3692 movups %xmm9,(%rsi) 3693 leaq 16(%rsi),%rsi 3694 cmpq $0x50,%rdx 3695 jbe L$cbc_dec_tail 3696 3697 movaps %xmm11,%xmm2 3698L$cbc_dec_six_or_seven: 3699 cmpq $0x60,%rdx 3700 ja L$cbc_dec_seven 3701 3702 movaps %xmm7,%xmm8 3703 call _aesni_decrypt6 3704 pxor %xmm10,%xmm2 3705 movaps %xmm8,%xmm10 3706 pxor %xmm11,%xmm3 3707 movdqu %xmm2,(%rsi) 3708 pxor %xmm12,%xmm4 3709 movdqu %xmm3,16(%rsi) 3710 pxor %xmm3,%xmm3 3711 pxor %xmm13,%xmm5 3712 movdqu %xmm4,32(%rsi) 3713 pxor %xmm4,%xmm4 3714 pxor %xmm14,%xmm6 3715 movdqu %xmm5,48(%rsi) 3716 pxor %xmm5,%xmm5 3717 pxor %xmm15,%xmm7 3718 movdqu %xmm6,64(%rsi) 3719 pxor %xmm6,%xmm6 3720 leaq 80(%rsi),%rsi 3721 movdqa %xmm7,%xmm2 3722 pxor %xmm7,%xmm7 3723 jmp L$cbc_dec_tail_collected 3724 3725.p2align 4 3726L$cbc_dec_seven: 3727 movups 96(%rdi),%xmm8 3728 xorps %xmm9,%xmm9 3729 call _aesni_decrypt8 3730 movups 80(%rdi),%xmm9 3731 pxor %xmm10,%xmm2 3732 movups 96(%rdi),%xmm10 3733 pxor %xmm11,%xmm3 3734 movdqu %xmm2,(%rsi) 3735 pxor %xmm12,%xmm4 3736 movdqu %xmm3,16(%rsi) 3737 pxor %xmm3,%xmm3 3738 pxor %xmm13,%xmm5 3739 movdqu %xmm4,32(%rsi) 3740 pxor %xmm4,%xmm4 3741 pxor %xmm14,%xmm6 3742 movdqu %xmm5,48(%rsi) 3743 pxor %xmm5,%xmm5 3744 pxor %xmm15,%xmm7 3745 movdqu %xmm6,64(%rsi) 3746 pxor %xmm6,%xmm6 3747 pxor %xmm9,%xmm8 3748 movdqu %xmm7,80(%rsi) 3749 pxor %xmm7,%xmm7 3750 leaq 96(%rsi),%rsi 3751 movdqa %xmm8,%xmm2 3752 pxor %xmm8,%xmm8 3753 pxor %xmm9,%xmm9 3754 jmp L$cbc_dec_tail_collected 3755 3756.p2align 4 3757L$cbc_dec_loop6: 3758 movups %xmm7,(%rsi) 3759 leaq 16(%rsi),%rsi 3760 movdqu 0(%rdi),%xmm2 3761 movdqu 16(%rdi),%xmm3 3762 movdqa %xmm2,%xmm11 3763 movdqu 32(%rdi),%xmm4 3764 movdqa %xmm3,%xmm12 3765 movdqu 48(%rdi),%xmm5 3766 movdqa %xmm4,%xmm13 3767 movdqu 64(%rdi),%xmm6 3768 movdqa %xmm5,%xmm14 3769 movdqu 80(%rdi),%xmm7 3770 movdqa %xmm6,%xmm15 3771L$cbc_dec_loop6_enter: 3772 leaq 96(%rdi),%rdi 3773 movdqa %xmm7,%xmm8 3774 3775 call _aesni_decrypt6 3776 3777 pxor %xmm10,%xmm2 3778 movdqa %xmm8,%xmm10 3779 pxor %xmm11,%xmm3 3780 movdqu %xmm2,(%rsi) 3781 pxor %xmm12,%xmm4 3782 movdqu %xmm3,16(%rsi) 3783 pxor %xmm13,%xmm5 3784 movdqu %xmm4,32(%rsi) 3785 pxor %xmm14,%xmm6 3786 movq %rbp,%rcx 3787 movdqu %xmm5,48(%rsi) 3788 pxor %xmm15,%xmm7 3789 movl %r10d,%eax 3790 movdqu %xmm6,64(%rsi) 3791 leaq 80(%rsi),%rsi 3792 subq $0x60,%rdx 3793 ja L$cbc_dec_loop6 3794 3795 movdqa %xmm7,%xmm2 3796 addq $0x50,%rdx 3797 jle L$cbc_dec_clear_tail_collected 3798 movups %xmm7,(%rsi) 3799 leaq 16(%rsi),%rsi 3800 3801L$cbc_dec_tail: 3802 movups (%rdi),%xmm2 3803 subq $0x10,%rdx 3804 jbe L$cbc_dec_one 3805 3806 movups 16(%rdi),%xmm3 3807 movaps %xmm2,%xmm11 3808 subq $0x10,%rdx 3809 jbe L$cbc_dec_two 3810 3811 movups 32(%rdi),%xmm4 3812 movaps %xmm3,%xmm12 3813 subq $0x10,%rdx 3814 jbe L$cbc_dec_three 3815 3816 movups 48(%rdi),%xmm5 3817 movaps %xmm4,%xmm13 3818 subq $0x10,%rdx 3819 jbe L$cbc_dec_four 3820 3821 movups 64(%rdi),%xmm6 3822 movaps %xmm5,%xmm14 3823 movaps %xmm6,%xmm15 3824 xorps %xmm7,%xmm7 3825 call _aesni_decrypt6 3826 pxor %xmm10,%xmm2 3827 movaps %xmm15,%xmm10 3828 pxor %xmm11,%xmm3 3829 movdqu %xmm2,(%rsi) 3830 pxor %xmm12,%xmm4 3831 movdqu %xmm3,16(%rsi) 3832 pxor %xmm3,%xmm3 3833 pxor %xmm13,%xmm5 3834 movdqu %xmm4,32(%rsi) 3835 pxor %xmm4,%xmm4 3836 pxor %xmm14,%xmm6 3837 movdqu %xmm5,48(%rsi) 3838 pxor %xmm5,%xmm5 3839 leaq 64(%rsi),%rsi 3840 movdqa %xmm6,%xmm2 3841 pxor %xmm6,%xmm6 3842 pxor %xmm7,%xmm7 3843 subq $0x10,%rdx 3844 jmp L$cbc_dec_tail_collected 3845 3846.p2align 4 3847L$cbc_dec_one: 3848 movaps %xmm2,%xmm11 3849 movups (%rcx),%xmm0 3850 movups 16(%rcx),%xmm1 3851 leaq 32(%rcx),%rcx 3852 xorps %xmm0,%xmm2 3853L$oop_dec1_17: 3854.byte 102,15,56,222,209 3855 decl %eax 3856 movups (%rcx),%xmm1 3857 leaq 16(%rcx),%rcx 3858 jnz L$oop_dec1_17 3859.byte 102,15,56,223,209 3860 xorps %xmm10,%xmm2 3861 movaps %xmm11,%xmm10 3862 jmp L$cbc_dec_tail_collected 3863.p2align 4 3864L$cbc_dec_two: 3865 movaps %xmm3,%xmm12 3866 call _aesni_decrypt2 3867 pxor %xmm10,%xmm2 3868 movaps %xmm12,%xmm10 3869 pxor %xmm11,%xmm3 3870 movdqu %xmm2,(%rsi) 3871 movdqa %xmm3,%xmm2 3872 pxor %xmm3,%xmm3 3873 leaq 16(%rsi),%rsi 3874 jmp L$cbc_dec_tail_collected 3875.p2align 4 3876L$cbc_dec_three: 3877 movaps %xmm4,%xmm13 3878 call _aesni_decrypt3 3879 pxor %xmm10,%xmm2 3880 movaps %xmm13,%xmm10 3881 pxor %xmm11,%xmm3 3882 movdqu %xmm2,(%rsi) 3883 pxor %xmm12,%xmm4 3884 movdqu %xmm3,16(%rsi) 3885 pxor %xmm3,%xmm3 3886 movdqa %xmm4,%xmm2 3887 pxor %xmm4,%xmm4 3888 leaq 32(%rsi),%rsi 3889 jmp L$cbc_dec_tail_collected 3890.p2align 4 3891L$cbc_dec_four: 3892 movaps %xmm5,%xmm14 3893 call _aesni_decrypt4 3894 pxor %xmm10,%xmm2 3895 movaps %xmm14,%xmm10 3896 pxor %xmm11,%xmm3 3897 movdqu %xmm2,(%rsi) 3898 pxor %xmm12,%xmm4 3899 movdqu %xmm3,16(%rsi) 3900 pxor %xmm3,%xmm3 3901 pxor %xmm13,%xmm5 3902 movdqu %xmm4,32(%rsi) 3903 pxor %xmm4,%xmm4 3904 movdqa %xmm5,%xmm2 3905 pxor %xmm5,%xmm5 3906 leaq 48(%rsi),%rsi 3907 jmp L$cbc_dec_tail_collected 3908 3909.p2align 4 3910L$cbc_dec_clear_tail_collected: 3911 pxor %xmm3,%xmm3 3912 pxor %xmm4,%xmm4 3913 pxor %xmm5,%xmm5 3914 pxor %xmm6,%xmm6 3915 pxor %xmm7,%xmm7 3916 pxor %xmm8,%xmm8 3917 pxor %xmm9,%xmm9 3918L$cbc_dec_tail_collected: 3919 movups %xmm10,(%r8) 3920 andq $15,%rdx 3921 jnz L$cbc_dec_tail_partial 3922 movups %xmm2,(%rsi) 3923 pxor %xmm2,%xmm2 3924 jmp L$cbc_dec_ret 3925.p2align 4 3926L$cbc_dec_tail_partial: 3927 movaps %xmm2,(%rsp) 3928 pxor %xmm2,%xmm2 3929 movq $16,%rcx 3930 movq %rsi,%rdi 3931 subq %rdx,%rcx 3932 leaq (%rsp),%rsi 3933.long 0x9066A4F3 3934 movdqa %xmm2,(%rsp) 3935 3936L$cbc_dec_ret: 3937 xorps %xmm0,%xmm0 3938 pxor %xmm1,%xmm1 3939 movq -8(%r11),%rbp 3940 leaq (%r11),%rsp 3941L$cbc_ret: 3942 .byte 0xf3,0xc3 3943 3944.globl _aesni_set_decrypt_key 3945.private_extern _aesni_set_decrypt_key 3946 3947.p2align 4 3948_aesni_set_decrypt_key: 3949.byte 0x48,0x83,0xEC,0x08 3950 call __aesni_set_encrypt_key 3951 shll $4,%esi 3952 testl %eax,%eax 3953 jnz L$dec_key_ret 3954 leaq 16(%rdx,%rsi,1),%rdi 3955 3956 movups (%rdx),%xmm0 3957 movups (%rdi),%xmm1 3958 movups %xmm0,(%rdi) 3959 movups %xmm1,(%rdx) 3960 leaq 16(%rdx),%rdx 3961 leaq -16(%rdi),%rdi 3962 3963L$dec_key_inverse: 3964 movups (%rdx),%xmm0 3965 movups (%rdi),%xmm1 3966.byte 102,15,56,219,192 3967.byte 102,15,56,219,201 3968 leaq 16(%rdx),%rdx 3969 leaq -16(%rdi),%rdi 3970 movups %xmm0,16(%rdi) 3971 movups %xmm1,-16(%rdx) 3972 cmpq %rdx,%rdi 3973 ja L$dec_key_inverse 3974 3975 movups (%rdx),%xmm0 3976.byte 102,15,56,219,192 3977 pxor %xmm1,%xmm1 3978 movups %xmm0,(%rdi) 3979 pxor %xmm0,%xmm0 3980L$dec_key_ret: 3981 addq $8,%rsp 3982 .byte 0xf3,0xc3 3983L$SEH_end_set_decrypt_key: 3984 3985.globl _aesni_set_encrypt_key 3986.private_extern _aesni_set_encrypt_key 3987 3988.p2align 4 3989_aesni_set_encrypt_key: 3990__aesni_set_encrypt_key: 3991.byte 0x48,0x83,0xEC,0x08 3992 movq $-1,%rax 3993 testq %rdi,%rdi 3994 jz L$enc_key_ret 3995 testq %rdx,%rdx 3996 jz L$enc_key_ret 3997 3998 movups (%rdi),%xmm0 3999 xorps %xmm4,%xmm4 4000 leaq _OPENSSL_ia32cap_P(%rip),%r10 4001 movl 4(%r10),%r10d 4002 andl $268437504,%r10d 4003 leaq 16(%rdx),%rax 4004 cmpl $256,%esi 4005 je L$14rounds 4006 cmpl $192,%esi 4007 je L$12rounds 4008 cmpl $128,%esi 4009 jne L$bad_keybits 4010 4011L$10rounds: 4012 movl $9,%esi 4013 cmpl $268435456,%r10d 4014 je L$10rounds_alt 4015 4016 movups %xmm0,(%rdx) 4017.byte 102,15,58,223,200,1 4018 call L$key_expansion_128_cold 4019.byte 102,15,58,223,200,2 4020 call L$key_expansion_128 4021.byte 102,15,58,223,200,4 4022 call L$key_expansion_128 4023.byte 102,15,58,223,200,8 4024 call L$key_expansion_128 4025.byte 102,15,58,223,200,16 4026 call L$key_expansion_128 4027.byte 102,15,58,223,200,32 4028 call L$key_expansion_128 4029.byte 102,15,58,223,200,64 4030 call L$key_expansion_128 4031.byte 102,15,58,223,200,128 4032 call L$key_expansion_128 4033.byte 102,15,58,223,200,27 4034 call L$key_expansion_128 4035.byte 102,15,58,223,200,54 4036 call L$key_expansion_128 4037 movups %xmm0,(%rax) 4038 movl %esi,80(%rax) 4039 xorl %eax,%eax 4040 jmp L$enc_key_ret 4041 4042.p2align 4 4043L$10rounds_alt: 4044 movdqa L$key_rotate(%rip),%xmm5 4045 movl $8,%r10d 4046 movdqa L$key_rcon1(%rip),%xmm4 4047 movdqa %xmm0,%xmm2 4048 movdqu %xmm0,(%rdx) 4049 jmp L$oop_key128 4050 4051.p2align 4 4052L$oop_key128: 4053.byte 102,15,56,0,197 4054.byte 102,15,56,221,196 4055 pslld $1,%xmm4 4056 leaq 16(%rax),%rax 4057 4058 movdqa %xmm2,%xmm3 4059 pslldq $4,%xmm2 4060 pxor %xmm2,%xmm3 4061 pslldq $4,%xmm2 4062 pxor %xmm2,%xmm3 4063 pslldq $4,%xmm2 4064 pxor %xmm3,%xmm2 4065 4066 pxor %xmm2,%xmm0 4067 movdqu %xmm0,-16(%rax) 4068 movdqa %xmm0,%xmm2 4069 4070 decl %r10d 4071 jnz L$oop_key128 4072 4073 movdqa L$key_rcon1b(%rip),%xmm4 4074 4075.byte 102,15,56,0,197 4076.byte 102,15,56,221,196 4077 pslld $1,%xmm4 4078 4079 movdqa %xmm2,%xmm3 4080 pslldq $4,%xmm2 4081 pxor %xmm2,%xmm3 4082 pslldq $4,%xmm2 4083 pxor %xmm2,%xmm3 4084 pslldq $4,%xmm2 4085 pxor %xmm3,%xmm2 4086 4087 pxor %xmm2,%xmm0 4088 movdqu %xmm0,(%rax) 4089 4090 movdqa %xmm0,%xmm2 4091.byte 102,15,56,0,197 4092.byte 102,15,56,221,196 4093 4094 movdqa %xmm2,%xmm3 4095 pslldq $4,%xmm2 4096 pxor %xmm2,%xmm3 4097 pslldq $4,%xmm2 4098 pxor %xmm2,%xmm3 4099 pslldq $4,%xmm2 4100 pxor %xmm3,%xmm2 4101 4102 pxor %xmm2,%xmm0 4103 movdqu %xmm0,16(%rax) 4104 4105 movl %esi,96(%rax) 4106 xorl %eax,%eax 4107 jmp L$enc_key_ret 4108 4109.p2align 4 4110L$12rounds: 4111 movq 16(%rdi),%xmm2 4112 movl $11,%esi 4113 cmpl $268435456,%r10d 4114 je L$12rounds_alt 4115 4116 movups %xmm0,(%rdx) 4117.byte 102,15,58,223,202,1 4118 call L$key_expansion_192a_cold 4119.byte 102,15,58,223,202,2 4120 call L$key_expansion_192b 4121.byte 102,15,58,223,202,4 4122 call L$key_expansion_192a 4123.byte 102,15,58,223,202,8 4124 call L$key_expansion_192b 4125.byte 102,15,58,223,202,16 4126 call L$key_expansion_192a 4127.byte 102,15,58,223,202,32 4128 call L$key_expansion_192b 4129.byte 102,15,58,223,202,64 4130 call L$key_expansion_192a 4131.byte 102,15,58,223,202,128 4132 call L$key_expansion_192b 4133 movups %xmm0,(%rax) 4134 movl %esi,48(%rax) 4135 xorq %rax,%rax 4136 jmp L$enc_key_ret 4137 4138.p2align 4 4139L$12rounds_alt: 4140 movdqa L$key_rotate192(%rip),%xmm5 4141 movdqa L$key_rcon1(%rip),%xmm4 4142 movl $8,%r10d 4143 movdqu %xmm0,(%rdx) 4144 jmp L$oop_key192 4145 4146.p2align 4 4147L$oop_key192: 4148 movq %xmm2,0(%rax) 4149 movdqa %xmm2,%xmm1 4150.byte 102,15,56,0,213 4151.byte 102,15,56,221,212 4152 pslld $1,%xmm4 4153 leaq 24(%rax),%rax 4154 4155 movdqa %xmm0,%xmm3 4156 pslldq $4,%xmm0 4157 pxor %xmm0,%xmm3 4158 pslldq $4,%xmm0 4159 pxor %xmm0,%xmm3 4160 pslldq $4,%xmm0 4161 pxor %xmm3,%xmm0 4162 4163 pshufd $0xff,%xmm0,%xmm3 4164 pxor %xmm1,%xmm3 4165 pslldq $4,%xmm1 4166 pxor %xmm1,%xmm3 4167 4168 pxor %xmm2,%xmm0 4169 pxor %xmm3,%xmm2 4170 movdqu %xmm0,-16(%rax) 4171 4172 decl %r10d 4173 jnz L$oop_key192 4174 4175 movl %esi,32(%rax) 4176 xorl %eax,%eax 4177 jmp L$enc_key_ret 4178 4179.p2align 4 4180L$14rounds: 4181 movups 16(%rdi),%xmm2 4182 movl $13,%esi 4183 leaq 16(%rax),%rax 4184 cmpl $268435456,%r10d 4185 je L$14rounds_alt 4186 4187 movups %xmm0,(%rdx) 4188 movups %xmm2,16(%rdx) 4189.byte 102,15,58,223,202,1 4190 call L$key_expansion_256a_cold 4191.byte 102,15,58,223,200,1 4192 call L$key_expansion_256b 4193.byte 102,15,58,223,202,2 4194 call L$key_expansion_256a 4195.byte 102,15,58,223,200,2 4196 call L$key_expansion_256b 4197.byte 102,15,58,223,202,4 4198 call L$key_expansion_256a 4199.byte 102,15,58,223,200,4 4200 call L$key_expansion_256b 4201.byte 102,15,58,223,202,8 4202 call L$key_expansion_256a 4203.byte 102,15,58,223,200,8 4204 call L$key_expansion_256b 4205.byte 102,15,58,223,202,16 4206 call L$key_expansion_256a 4207.byte 102,15,58,223,200,16 4208 call L$key_expansion_256b 4209.byte 102,15,58,223,202,32 4210 call L$key_expansion_256a 4211.byte 102,15,58,223,200,32 4212 call L$key_expansion_256b 4213.byte 102,15,58,223,202,64 4214 call L$key_expansion_256a 4215 movups %xmm0,(%rax) 4216 movl %esi,16(%rax) 4217 xorq %rax,%rax 4218 jmp L$enc_key_ret 4219 4220.p2align 4 4221L$14rounds_alt: 4222 movdqa L$key_rotate(%rip),%xmm5 4223 movdqa L$key_rcon1(%rip),%xmm4 4224 movl $7,%r10d 4225 movdqu %xmm0,0(%rdx) 4226 movdqa %xmm2,%xmm1 4227 movdqu %xmm2,16(%rdx) 4228 jmp L$oop_key256 4229 4230.p2align 4 4231L$oop_key256: 4232.byte 102,15,56,0,213 4233.byte 102,15,56,221,212 4234 4235 movdqa %xmm0,%xmm3 4236 pslldq $4,%xmm0 4237 pxor %xmm0,%xmm3 4238 pslldq $4,%xmm0 4239 pxor %xmm0,%xmm3 4240 pslldq $4,%xmm0 4241 pxor %xmm3,%xmm0 4242 pslld $1,%xmm4 4243 4244 pxor %xmm2,%xmm0 4245 movdqu %xmm0,(%rax) 4246 4247 decl %r10d 4248 jz L$done_key256 4249 4250 pshufd $0xff,%xmm0,%xmm2 4251 pxor %xmm3,%xmm3 4252.byte 102,15,56,221,211 4253 4254 movdqa %xmm1,%xmm3 4255 pslldq $4,%xmm1 4256 pxor %xmm1,%xmm3 4257 pslldq $4,%xmm1 4258 pxor %xmm1,%xmm3 4259 pslldq $4,%xmm1 4260 pxor %xmm3,%xmm1 4261 4262 pxor %xmm1,%xmm2 4263 movdqu %xmm2,16(%rax) 4264 leaq 32(%rax),%rax 4265 movdqa %xmm2,%xmm1 4266 4267 jmp L$oop_key256 4268 4269L$done_key256: 4270 movl %esi,16(%rax) 4271 xorl %eax,%eax 4272 jmp L$enc_key_ret 4273 4274.p2align 4 4275L$bad_keybits: 4276 movq $-2,%rax 4277L$enc_key_ret: 4278 pxor %xmm0,%xmm0 4279 pxor %xmm1,%xmm1 4280 pxor %xmm2,%xmm2 4281 pxor %xmm3,%xmm3 4282 pxor %xmm4,%xmm4 4283 pxor %xmm5,%xmm5 4284 addq $8,%rsp 4285 .byte 0xf3,0xc3 4286L$SEH_end_set_encrypt_key: 4287 4288.p2align 4 4289L$key_expansion_128: 4290 movups %xmm0,(%rax) 4291 leaq 16(%rax),%rax 4292L$key_expansion_128_cold: 4293 shufps $16,%xmm0,%xmm4 4294 xorps %xmm4,%xmm0 4295 shufps $140,%xmm0,%xmm4 4296 xorps %xmm4,%xmm0 4297 shufps $255,%xmm1,%xmm1 4298 xorps %xmm1,%xmm0 4299 .byte 0xf3,0xc3 4300 4301.p2align 4 4302L$key_expansion_192a: 4303 movups %xmm0,(%rax) 4304 leaq 16(%rax),%rax 4305L$key_expansion_192a_cold: 4306 movaps %xmm2,%xmm5 4307L$key_expansion_192b_warm: 4308 shufps $16,%xmm0,%xmm4 4309 movdqa %xmm2,%xmm3 4310 xorps %xmm4,%xmm0 4311 shufps $140,%xmm0,%xmm4 4312 pslldq $4,%xmm3 4313 xorps %xmm4,%xmm0 4314 pshufd $85,%xmm1,%xmm1 4315 pxor %xmm3,%xmm2 4316 pxor %xmm1,%xmm0 4317 pshufd $255,%xmm0,%xmm3 4318 pxor %xmm3,%xmm2 4319 .byte 0xf3,0xc3 4320 4321.p2align 4 4322L$key_expansion_192b: 4323 movaps %xmm0,%xmm3 4324 shufps $68,%xmm0,%xmm5 4325 movups %xmm5,(%rax) 4326 shufps $78,%xmm2,%xmm3 4327 movups %xmm3,16(%rax) 4328 leaq 32(%rax),%rax 4329 jmp L$key_expansion_192b_warm 4330 4331.p2align 4 4332L$key_expansion_256a: 4333 movups %xmm2,(%rax) 4334 leaq 16(%rax),%rax 4335L$key_expansion_256a_cold: 4336 shufps $16,%xmm0,%xmm4 4337 xorps %xmm4,%xmm0 4338 shufps $140,%xmm0,%xmm4 4339 xorps %xmm4,%xmm0 4340 shufps $255,%xmm1,%xmm1 4341 xorps %xmm1,%xmm0 4342 .byte 0xf3,0xc3 4343 4344.p2align 4 4345L$key_expansion_256b: 4346 movups %xmm0,(%rax) 4347 leaq 16(%rax),%rax 4348 4349 shufps $16,%xmm2,%xmm4 4350 xorps %xmm4,%xmm2 4351 shufps $140,%xmm2,%xmm4 4352 xorps %xmm4,%xmm2 4353 shufps $170,%xmm1,%xmm1 4354 xorps %xmm1,%xmm2 4355 .byte 0xf3,0xc3 4356 4357 4358.p2align 6 4359L$bswap_mask: 4360.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 4361L$increment32: 4362.long 6,6,6,0 4363L$increment64: 4364.long 1,0,0,0 4365L$xts_magic: 4366.long 0x87,0,1,0 4367L$increment1: 4368.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 4369L$key_rotate: 4370.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 4371L$key_rotate192: 4372.long 0x04070605,0x04070605,0x04070605,0x04070605 4373L$key_rcon1: 4374.long 1,1,1,1 4375L$key_rcon1b: 4376.long 0x1b,0x1b,0x1b,0x1b 4377 4378.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 4379.p2align 6 4380#endif 4381