1.text 2.globl _aesni_encrypt 3.align 4 4_aesni_encrypt: 5L_aesni_encrypt_begin: 6 %ifdef __CET__ 7 8.byte 243,15,30,251 9 %endif 10 11 movl 4(%esp),%eax 12 movl 12(%esp),%edx 13 movups (%eax),%xmm2 14 movl 240(%edx),%ecx 15 movl 8(%esp),%eax 16 movups (%edx),%xmm0 17 movups 16(%edx),%xmm1 18 leal 32(%edx),%edx 19 xorps %xmm0,%xmm2 20L000enc1_loop_1: 21.byte 102,15,56,220,209 22 decl %ecx 23 movups (%edx),%xmm1 24 leal 16(%edx),%edx 25 jnz L000enc1_loop_1 26.byte 102,15,56,221,209 27 pxor %xmm0,%xmm0 28 pxor %xmm1,%xmm1 29 movups %xmm2,(%eax) 30 pxor %xmm2,%xmm2 31 ret 32.globl _aesni_decrypt 33.align 4 34_aesni_decrypt: 35L_aesni_decrypt_begin: 36 %ifdef __CET__ 37 38.byte 243,15,30,251 39 %endif 40 41 movl 4(%esp),%eax 42 movl 12(%esp),%edx 43 movups (%eax),%xmm2 44 movl 240(%edx),%ecx 45 movl 8(%esp),%eax 46 movups (%edx),%xmm0 47 movups 16(%edx),%xmm1 48 leal 32(%edx),%edx 49 xorps %xmm0,%xmm2 50L001dec1_loop_2: 51.byte 102,15,56,222,209 52 decl %ecx 53 movups (%edx),%xmm1 54 leal 16(%edx),%edx 55 jnz L001dec1_loop_2 56.byte 102,15,56,223,209 57 pxor %xmm0,%xmm0 58 pxor %xmm1,%xmm1 59 movups %xmm2,(%eax) 60 pxor %xmm2,%xmm2 61 ret 62.align 4 63__aesni_encrypt2: 64 %ifdef __CET__ 65 66.byte 243,15,30,251 67 %endif 68 69 movups (%edx),%xmm0 70 shll $4,%ecx 71 movups 16(%edx),%xmm1 72 xorps %xmm0,%xmm2 73 pxor %xmm0,%xmm3 74 movups 32(%edx),%xmm0 75 leal 32(%edx,%ecx,1),%edx 76 negl %ecx 77 addl $16,%ecx 78L002enc2_loop: 79.byte 102,15,56,220,209 80.byte 102,15,56,220,217 81 movups (%edx,%ecx,1),%xmm1 82 addl $32,%ecx 83.byte 102,15,56,220,208 84.byte 102,15,56,220,216 85 movups -16(%edx,%ecx,1),%xmm0 86 jnz L002enc2_loop 87.byte 102,15,56,220,209 88.byte 102,15,56,220,217 89.byte 102,15,56,221,208 90.byte 102,15,56,221,216 91 ret 92.align 4 93__aesni_decrypt2: 94 %ifdef __CET__ 95 96.byte 243,15,30,251 97 %endif 98 99 movups (%edx),%xmm0 100 shll $4,%ecx 101 movups 16(%edx),%xmm1 102 xorps %xmm0,%xmm2 103 pxor %xmm0,%xmm3 104 movups 32(%edx),%xmm0 105 leal 32(%edx,%ecx,1),%edx 106 negl %ecx 107 addl $16,%ecx 108L003dec2_loop: 109.byte 102,15,56,222,209 110.byte 102,15,56,222,217 111 movups (%edx,%ecx,1),%xmm1 112 addl $32,%ecx 113.byte 102,15,56,222,208 114.byte 102,15,56,222,216 115 movups -16(%edx,%ecx,1),%xmm0 116 jnz L003dec2_loop 117.byte 102,15,56,222,209 118.byte 102,15,56,222,217 119.byte 102,15,56,223,208 120.byte 102,15,56,223,216 121 ret 122.align 4 123__aesni_encrypt3: 124 %ifdef __CET__ 125 126.byte 243,15,30,251 127 %endif 128 129 movups (%edx),%xmm0 130 shll $4,%ecx 131 movups 16(%edx),%xmm1 132 xorps %xmm0,%xmm2 133 pxor %xmm0,%xmm3 134 pxor %xmm0,%xmm4 135 movups 32(%edx),%xmm0 136 leal 32(%edx,%ecx,1),%edx 137 negl %ecx 138 addl $16,%ecx 139L004enc3_loop: 140.byte 102,15,56,220,209 141.byte 102,15,56,220,217 142.byte 102,15,56,220,225 143 movups (%edx,%ecx,1),%xmm1 144 addl $32,%ecx 145.byte 102,15,56,220,208 146.byte 102,15,56,220,216 147.byte 102,15,56,220,224 148 movups -16(%edx,%ecx,1),%xmm0 149 jnz L004enc3_loop 150.byte 102,15,56,220,209 151.byte 102,15,56,220,217 152.byte 102,15,56,220,225 153.byte 102,15,56,221,208 154.byte 102,15,56,221,216 155.byte 102,15,56,221,224 156 ret 157.align 4 158__aesni_decrypt3: 159 %ifdef __CET__ 160 161.byte 243,15,30,251 162 %endif 163 164 movups (%edx),%xmm0 165 shll $4,%ecx 166 movups 16(%edx),%xmm1 167 xorps %xmm0,%xmm2 168 pxor %xmm0,%xmm3 169 pxor %xmm0,%xmm4 170 movups 32(%edx),%xmm0 171 leal 32(%edx,%ecx,1),%edx 172 negl %ecx 173 addl $16,%ecx 174L005dec3_loop: 175.byte 102,15,56,222,209 176.byte 102,15,56,222,217 177.byte 102,15,56,222,225 178 movups (%edx,%ecx,1),%xmm1 179 addl $32,%ecx 180.byte 102,15,56,222,208 181.byte 102,15,56,222,216 182.byte 102,15,56,222,224 183 movups -16(%edx,%ecx,1),%xmm0 184 jnz L005dec3_loop 185.byte 102,15,56,222,209 186.byte 102,15,56,222,217 187.byte 102,15,56,222,225 188.byte 102,15,56,223,208 189.byte 102,15,56,223,216 190.byte 102,15,56,223,224 191 ret 192.align 4 193__aesni_encrypt4: 194 %ifdef __CET__ 195 196.byte 243,15,30,251 197 %endif 198 199 movups (%edx),%xmm0 200 movups 16(%edx),%xmm1 201 shll $4,%ecx 202 xorps %xmm0,%xmm2 203 pxor %xmm0,%xmm3 204 pxor %xmm0,%xmm4 205 pxor %xmm0,%xmm5 206 movups 32(%edx),%xmm0 207 leal 32(%edx,%ecx,1),%edx 208 negl %ecx 209.byte 15,31,64,0 210 addl $16,%ecx 211L006enc4_loop: 212.byte 102,15,56,220,209 213.byte 102,15,56,220,217 214.byte 102,15,56,220,225 215.byte 102,15,56,220,233 216 movups (%edx,%ecx,1),%xmm1 217 addl $32,%ecx 218.byte 102,15,56,220,208 219.byte 102,15,56,220,216 220.byte 102,15,56,220,224 221.byte 102,15,56,220,232 222 movups -16(%edx,%ecx,1),%xmm0 223 jnz L006enc4_loop 224.byte 102,15,56,220,209 225.byte 102,15,56,220,217 226.byte 102,15,56,220,225 227.byte 102,15,56,220,233 228.byte 102,15,56,221,208 229.byte 102,15,56,221,216 230.byte 102,15,56,221,224 231.byte 102,15,56,221,232 232 ret 233.align 4 234__aesni_decrypt4: 235 %ifdef __CET__ 236 237.byte 243,15,30,251 238 %endif 239 240 movups (%edx),%xmm0 241 movups 16(%edx),%xmm1 242 shll $4,%ecx 243 xorps %xmm0,%xmm2 244 pxor %xmm0,%xmm3 245 pxor %xmm0,%xmm4 246 pxor %xmm0,%xmm5 247 movups 32(%edx),%xmm0 248 leal 32(%edx,%ecx,1),%edx 249 negl %ecx 250.byte 15,31,64,0 251 addl $16,%ecx 252L007dec4_loop: 253.byte 102,15,56,222,209 254.byte 102,15,56,222,217 255.byte 102,15,56,222,225 256.byte 102,15,56,222,233 257 movups (%edx,%ecx,1),%xmm1 258 addl $32,%ecx 259.byte 102,15,56,222,208 260.byte 102,15,56,222,216 261.byte 102,15,56,222,224 262.byte 102,15,56,222,232 263 movups -16(%edx,%ecx,1),%xmm0 264 jnz L007dec4_loop 265.byte 102,15,56,222,209 266.byte 102,15,56,222,217 267.byte 102,15,56,222,225 268.byte 102,15,56,222,233 269.byte 102,15,56,223,208 270.byte 102,15,56,223,216 271.byte 102,15,56,223,224 272.byte 102,15,56,223,232 273 ret 274.align 4 275__aesni_encrypt6: 276 %ifdef __CET__ 277 278.byte 243,15,30,251 279 %endif 280 281 movups (%edx),%xmm0 282 shll $4,%ecx 283 movups 16(%edx),%xmm1 284 xorps %xmm0,%xmm2 285 pxor %xmm0,%xmm3 286 pxor %xmm0,%xmm4 287.byte 102,15,56,220,209 288 pxor %xmm0,%xmm5 289 pxor %xmm0,%xmm6 290.byte 102,15,56,220,217 291 leal 32(%edx,%ecx,1),%edx 292 negl %ecx 293.byte 102,15,56,220,225 294 pxor %xmm0,%xmm7 295 movups (%edx,%ecx,1),%xmm0 296 addl $16,%ecx 297 jmp L008_aesni_encrypt6_inner 298.align 4,0x90 299L009enc6_loop: 300.byte 102,15,56,220,209 301.byte 102,15,56,220,217 302.byte 102,15,56,220,225 303L008_aesni_encrypt6_inner: 304.byte 102,15,56,220,233 305.byte 102,15,56,220,241 306.byte 102,15,56,220,249 307L_aesni_encrypt6_enter: 308 movups (%edx,%ecx,1),%xmm1 309 addl $32,%ecx 310.byte 102,15,56,220,208 311.byte 102,15,56,220,216 312.byte 102,15,56,220,224 313.byte 102,15,56,220,232 314.byte 102,15,56,220,240 315.byte 102,15,56,220,248 316 movups -16(%edx,%ecx,1),%xmm0 317 jnz L009enc6_loop 318.byte 102,15,56,220,209 319.byte 102,15,56,220,217 320.byte 102,15,56,220,225 321.byte 102,15,56,220,233 322.byte 102,15,56,220,241 323.byte 102,15,56,220,249 324.byte 102,15,56,221,208 325.byte 102,15,56,221,216 326.byte 102,15,56,221,224 327.byte 102,15,56,221,232 328.byte 102,15,56,221,240 329.byte 102,15,56,221,248 330 ret 331.align 4 332__aesni_decrypt6: 333 %ifdef __CET__ 334 335.byte 243,15,30,251 336 %endif 337 338 movups (%edx),%xmm0 339 shll $4,%ecx 340 movups 16(%edx),%xmm1 341 xorps %xmm0,%xmm2 342 pxor %xmm0,%xmm3 343 pxor %xmm0,%xmm4 344.byte 102,15,56,222,209 345 pxor %xmm0,%xmm5 346 pxor %xmm0,%xmm6 347.byte 102,15,56,222,217 348 leal 32(%edx,%ecx,1),%edx 349 negl %ecx 350.byte 102,15,56,222,225 351 pxor %xmm0,%xmm7 352 movups (%edx,%ecx,1),%xmm0 353 addl $16,%ecx 354 jmp L010_aesni_decrypt6_inner 355.align 4,0x90 356L011dec6_loop: 357.byte 102,15,56,222,209 358.byte 102,15,56,222,217 359.byte 102,15,56,222,225 360L010_aesni_decrypt6_inner: 361.byte 102,15,56,222,233 362.byte 102,15,56,222,241 363.byte 102,15,56,222,249 364L_aesni_decrypt6_enter: 365 movups (%edx,%ecx,1),%xmm1 366 addl $32,%ecx 367.byte 102,15,56,222,208 368.byte 102,15,56,222,216 369.byte 102,15,56,222,224 370.byte 102,15,56,222,232 371.byte 102,15,56,222,240 372.byte 102,15,56,222,248 373 movups -16(%edx,%ecx,1),%xmm0 374 jnz L011dec6_loop 375.byte 102,15,56,222,209 376.byte 102,15,56,222,217 377.byte 102,15,56,222,225 378.byte 102,15,56,222,233 379.byte 102,15,56,222,241 380.byte 102,15,56,222,249 381.byte 102,15,56,223,208 382.byte 102,15,56,223,216 383.byte 102,15,56,223,224 384.byte 102,15,56,223,232 385.byte 102,15,56,223,240 386.byte 102,15,56,223,248 387 ret 388.globl _aesni_ecb_encrypt 389.align 4 390_aesni_ecb_encrypt: 391L_aesni_ecb_encrypt_begin: 392 %ifdef __CET__ 393 394.byte 243,15,30,251 395 %endif 396 397 pushl %ebp 398 pushl %ebx 399 pushl %esi 400 pushl %edi 401 movl 20(%esp),%esi 402 movl 24(%esp),%edi 403 movl 28(%esp),%eax 404 movl 32(%esp),%edx 405 movl 36(%esp),%ebx 406 andl $-16,%eax 407 jz L012ecb_ret 408 movl 240(%edx),%ecx 409 testl %ebx,%ebx 410 jz L013ecb_decrypt 411 movl %edx,%ebp 412 movl %ecx,%ebx 413 cmpl $96,%eax 414 jb L014ecb_enc_tail 415 movdqu (%esi),%xmm2 416 movdqu 16(%esi),%xmm3 417 movdqu 32(%esi),%xmm4 418 movdqu 48(%esi),%xmm5 419 movdqu 64(%esi),%xmm6 420 movdqu 80(%esi),%xmm7 421 leal 96(%esi),%esi 422 subl $96,%eax 423 jmp L015ecb_enc_loop6_enter 424.align 4,0x90 425L016ecb_enc_loop6: 426 movups %xmm2,(%edi) 427 movdqu (%esi),%xmm2 428 movups %xmm3,16(%edi) 429 movdqu 16(%esi),%xmm3 430 movups %xmm4,32(%edi) 431 movdqu 32(%esi),%xmm4 432 movups %xmm5,48(%edi) 433 movdqu 48(%esi),%xmm5 434 movups %xmm6,64(%edi) 435 movdqu 64(%esi),%xmm6 436 movups %xmm7,80(%edi) 437 leal 96(%edi),%edi 438 movdqu 80(%esi),%xmm7 439 leal 96(%esi),%esi 440L015ecb_enc_loop6_enter: 441 call __aesni_encrypt6 442 movl %ebp,%edx 443 movl %ebx,%ecx 444 subl $96,%eax 445 jnc L016ecb_enc_loop6 446 movups %xmm2,(%edi) 447 movups %xmm3,16(%edi) 448 movups %xmm4,32(%edi) 449 movups %xmm5,48(%edi) 450 movups %xmm6,64(%edi) 451 movups %xmm7,80(%edi) 452 leal 96(%edi),%edi 453 addl $96,%eax 454 jz L012ecb_ret 455L014ecb_enc_tail: 456 movups (%esi),%xmm2 457 cmpl $32,%eax 458 jb L017ecb_enc_one 459 movups 16(%esi),%xmm3 460 je L018ecb_enc_two 461 movups 32(%esi),%xmm4 462 cmpl $64,%eax 463 jb L019ecb_enc_three 464 movups 48(%esi),%xmm5 465 je L020ecb_enc_four 466 movups 64(%esi),%xmm6 467 xorps %xmm7,%xmm7 468 call __aesni_encrypt6 469 movups %xmm2,(%edi) 470 movups %xmm3,16(%edi) 471 movups %xmm4,32(%edi) 472 movups %xmm5,48(%edi) 473 movups %xmm6,64(%edi) 474 jmp L012ecb_ret 475.align 4,0x90 476L017ecb_enc_one: 477 movups (%edx),%xmm0 478 movups 16(%edx),%xmm1 479 leal 32(%edx),%edx 480 xorps %xmm0,%xmm2 481L021enc1_loop_3: 482.byte 102,15,56,220,209 483 decl %ecx 484 movups (%edx),%xmm1 485 leal 16(%edx),%edx 486 jnz L021enc1_loop_3 487.byte 102,15,56,221,209 488 movups %xmm2,(%edi) 489 jmp L012ecb_ret 490.align 4,0x90 491L018ecb_enc_two: 492 call __aesni_encrypt2 493 movups %xmm2,(%edi) 494 movups %xmm3,16(%edi) 495 jmp L012ecb_ret 496.align 4,0x90 497L019ecb_enc_three: 498 call __aesni_encrypt3 499 movups %xmm2,(%edi) 500 movups %xmm3,16(%edi) 501 movups %xmm4,32(%edi) 502 jmp L012ecb_ret 503.align 4,0x90 504L020ecb_enc_four: 505 call __aesni_encrypt4 506 movups %xmm2,(%edi) 507 movups %xmm3,16(%edi) 508 movups %xmm4,32(%edi) 509 movups %xmm5,48(%edi) 510 jmp L012ecb_ret 511.align 4,0x90 512L013ecb_decrypt: 513 movl %edx,%ebp 514 movl %ecx,%ebx 515 cmpl $96,%eax 516 jb L022ecb_dec_tail 517 movdqu (%esi),%xmm2 518 movdqu 16(%esi),%xmm3 519 movdqu 32(%esi),%xmm4 520 movdqu 48(%esi),%xmm5 521 movdqu 64(%esi),%xmm6 522 movdqu 80(%esi),%xmm7 523 leal 96(%esi),%esi 524 subl $96,%eax 525 jmp L023ecb_dec_loop6_enter 526.align 4,0x90 527L024ecb_dec_loop6: 528 movups %xmm2,(%edi) 529 movdqu (%esi),%xmm2 530 movups %xmm3,16(%edi) 531 movdqu 16(%esi),%xmm3 532 movups %xmm4,32(%edi) 533 movdqu 32(%esi),%xmm4 534 movups %xmm5,48(%edi) 535 movdqu 48(%esi),%xmm5 536 movups %xmm6,64(%edi) 537 movdqu 64(%esi),%xmm6 538 movups %xmm7,80(%edi) 539 leal 96(%edi),%edi 540 movdqu 80(%esi),%xmm7 541 leal 96(%esi),%esi 542L023ecb_dec_loop6_enter: 543 call __aesni_decrypt6 544 movl %ebp,%edx 545 movl %ebx,%ecx 546 subl $96,%eax 547 jnc L024ecb_dec_loop6 548 movups %xmm2,(%edi) 549 movups %xmm3,16(%edi) 550 movups %xmm4,32(%edi) 551 movups %xmm5,48(%edi) 552 movups %xmm6,64(%edi) 553 movups %xmm7,80(%edi) 554 leal 96(%edi),%edi 555 addl $96,%eax 556 jz L012ecb_ret 557L022ecb_dec_tail: 558 movups (%esi),%xmm2 559 cmpl $32,%eax 560 jb L025ecb_dec_one 561 movups 16(%esi),%xmm3 562 je L026ecb_dec_two 563 movups 32(%esi),%xmm4 564 cmpl $64,%eax 565 jb L027ecb_dec_three 566 movups 48(%esi),%xmm5 567 je L028ecb_dec_four 568 movups 64(%esi),%xmm6 569 xorps %xmm7,%xmm7 570 call __aesni_decrypt6 571 movups %xmm2,(%edi) 572 movups %xmm3,16(%edi) 573 movups %xmm4,32(%edi) 574 movups %xmm5,48(%edi) 575 movups %xmm6,64(%edi) 576 jmp L012ecb_ret 577.align 4,0x90 578L025ecb_dec_one: 579 movups (%edx),%xmm0 580 movups 16(%edx),%xmm1 581 leal 32(%edx),%edx 582 xorps %xmm0,%xmm2 583L029dec1_loop_4: 584.byte 102,15,56,222,209 585 decl %ecx 586 movups (%edx),%xmm1 587 leal 16(%edx),%edx 588 jnz L029dec1_loop_4 589.byte 102,15,56,223,209 590 movups %xmm2,(%edi) 591 jmp L012ecb_ret 592.align 4,0x90 593L026ecb_dec_two: 594 call __aesni_decrypt2 595 movups %xmm2,(%edi) 596 movups %xmm3,16(%edi) 597 jmp L012ecb_ret 598.align 4,0x90 599L027ecb_dec_three: 600 call __aesni_decrypt3 601 movups %xmm2,(%edi) 602 movups %xmm3,16(%edi) 603 movups %xmm4,32(%edi) 604 jmp L012ecb_ret 605.align 4,0x90 606L028ecb_dec_four: 607 call __aesni_decrypt4 608 movups %xmm2,(%edi) 609 movups %xmm3,16(%edi) 610 movups %xmm4,32(%edi) 611 movups %xmm5,48(%edi) 612L012ecb_ret: 613 pxor %xmm0,%xmm0 614 pxor %xmm1,%xmm1 615 pxor %xmm2,%xmm2 616 pxor %xmm3,%xmm3 617 pxor %xmm4,%xmm4 618 pxor %xmm5,%xmm5 619 pxor %xmm6,%xmm6 620 pxor %xmm7,%xmm7 621 popl %edi 622 popl %esi 623 popl %ebx 624 popl %ebp 625 ret 626.globl _aesni_ccm64_encrypt_blocks 627.align 4 628_aesni_ccm64_encrypt_blocks: 629L_aesni_ccm64_encrypt_blocks_begin: 630 %ifdef __CET__ 631 632.byte 243,15,30,251 633 %endif 634 635 pushl %ebp 636 pushl %ebx 637 pushl %esi 638 pushl %edi 639 movl 20(%esp),%esi 640 movl 24(%esp),%edi 641 movl 28(%esp),%eax 642 movl 32(%esp),%edx 643 movl 36(%esp),%ebx 644 movl 40(%esp),%ecx 645 movl %esp,%ebp 646 subl $60,%esp 647 andl $-16,%esp 648 movl %ebp,48(%esp) 649 movdqu (%ebx),%xmm7 650 movdqu (%ecx),%xmm3 651 movl 240(%edx),%ecx 652 movl $202182159,(%esp) 653 movl $134810123,4(%esp) 654 movl $67438087,8(%esp) 655 movl $66051,12(%esp) 656 movl $1,%ebx 657 xorl %ebp,%ebp 658 movl %ebx,16(%esp) 659 movl %ebp,20(%esp) 660 movl %ebp,24(%esp) 661 movl %ebp,28(%esp) 662 shll $4,%ecx 663 movl $16,%ebx 664 leal (%edx),%ebp 665 movdqa (%esp),%xmm5 666 movdqa %xmm7,%xmm2 667 leal 32(%edx,%ecx,1),%edx 668 subl %ecx,%ebx 669.byte 102,15,56,0,253 670L030ccm64_enc_outer: 671 movups (%ebp),%xmm0 672 movl %ebx,%ecx 673 movups (%esi),%xmm6 674 xorps %xmm0,%xmm2 675 movups 16(%ebp),%xmm1 676 xorps %xmm6,%xmm0 677 xorps %xmm0,%xmm3 678 movups 32(%ebp),%xmm0 679L031ccm64_enc2_loop: 680.byte 102,15,56,220,209 681.byte 102,15,56,220,217 682 movups (%edx,%ecx,1),%xmm1 683 addl $32,%ecx 684.byte 102,15,56,220,208 685.byte 102,15,56,220,216 686 movups -16(%edx,%ecx,1),%xmm0 687 jnz L031ccm64_enc2_loop 688.byte 102,15,56,220,209 689.byte 102,15,56,220,217 690 paddq 16(%esp),%xmm7 691 decl %eax 692.byte 102,15,56,221,208 693.byte 102,15,56,221,216 694 leal 16(%esi),%esi 695 xorps %xmm2,%xmm6 696 movdqa %xmm7,%xmm2 697 movups %xmm6,(%edi) 698.byte 102,15,56,0,213 699 leal 16(%edi),%edi 700 jnz L030ccm64_enc_outer 701 movl 48(%esp),%esp 702 movl 40(%esp),%edi 703 movups %xmm3,(%edi) 704 pxor %xmm0,%xmm0 705 pxor %xmm1,%xmm1 706 pxor %xmm2,%xmm2 707 pxor %xmm3,%xmm3 708 pxor %xmm4,%xmm4 709 pxor %xmm5,%xmm5 710 pxor %xmm6,%xmm6 711 pxor %xmm7,%xmm7 712 popl %edi 713 popl %esi 714 popl %ebx 715 popl %ebp 716 ret 717.globl _aesni_ccm64_decrypt_blocks 718.align 4 719_aesni_ccm64_decrypt_blocks: 720L_aesni_ccm64_decrypt_blocks_begin: 721 %ifdef __CET__ 722 723.byte 243,15,30,251 724 %endif 725 726 pushl %ebp 727 pushl %ebx 728 pushl %esi 729 pushl %edi 730 movl 20(%esp),%esi 731 movl 24(%esp),%edi 732 movl 28(%esp),%eax 733 movl 32(%esp),%edx 734 movl 36(%esp),%ebx 735 movl 40(%esp),%ecx 736 movl %esp,%ebp 737 subl $60,%esp 738 andl $-16,%esp 739 movl %ebp,48(%esp) 740 movdqu (%ebx),%xmm7 741 movdqu (%ecx),%xmm3 742 movl 240(%edx),%ecx 743 movl $202182159,(%esp) 744 movl $134810123,4(%esp) 745 movl $67438087,8(%esp) 746 movl $66051,12(%esp) 747 movl $1,%ebx 748 xorl %ebp,%ebp 749 movl %ebx,16(%esp) 750 movl %ebp,20(%esp) 751 movl %ebp,24(%esp) 752 movl %ebp,28(%esp) 753 movdqa (%esp),%xmm5 754 movdqa %xmm7,%xmm2 755 movl %edx,%ebp 756 movl %ecx,%ebx 757.byte 102,15,56,0,253 758 movups (%edx),%xmm0 759 movups 16(%edx),%xmm1 760 leal 32(%edx),%edx 761 xorps %xmm0,%xmm2 762L032enc1_loop_5: 763.byte 102,15,56,220,209 764 decl %ecx 765 movups (%edx),%xmm1 766 leal 16(%edx),%edx 767 jnz L032enc1_loop_5 768.byte 102,15,56,221,209 769 shll $4,%ebx 770 movl $16,%ecx 771 movups (%esi),%xmm6 772 paddq 16(%esp),%xmm7 773 leal 16(%esi),%esi 774 subl %ebx,%ecx 775 leal 32(%ebp,%ebx,1),%edx 776 movl %ecx,%ebx 777 jmp L033ccm64_dec_outer 778.align 4,0x90 779L033ccm64_dec_outer: 780 xorps %xmm2,%xmm6 781 movdqa %xmm7,%xmm2 782 movups %xmm6,(%edi) 783 leal 16(%edi),%edi 784.byte 102,15,56,0,213 785 subl $1,%eax 786 jz L034ccm64_dec_break 787 movups (%ebp),%xmm0 788 movl %ebx,%ecx 789 movups 16(%ebp),%xmm1 790 xorps %xmm0,%xmm6 791 xorps %xmm0,%xmm2 792 xorps %xmm6,%xmm3 793 movups 32(%ebp),%xmm0 794L035ccm64_dec2_loop: 795.byte 102,15,56,220,209 796.byte 102,15,56,220,217 797 movups (%edx,%ecx,1),%xmm1 798 addl $32,%ecx 799.byte 102,15,56,220,208 800.byte 102,15,56,220,216 801 movups -16(%edx,%ecx,1),%xmm0 802 jnz L035ccm64_dec2_loop 803 movups (%esi),%xmm6 804 paddq 16(%esp),%xmm7 805.byte 102,15,56,220,209 806.byte 102,15,56,220,217 807.byte 102,15,56,221,208 808.byte 102,15,56,221,216 809 leal 16(%esi),%esi 810 jmp L033ccm64_dec_outer 811.align 4,0x90 812L034ccm64_dec_break: 813 movl 240(%ebp),%ecx 814 movl %ebp,%edx 815 movups (%edx),%xmm0 816 movups 16(%edx),%xmm1 817 xorps %xmm0,%xmm6 818 leal 32(%edx),%edx 819 xorps %xmm6,%xmm3 820L036enc1_loop_6: 821.byte 102,15,56,220,217 822 decl %ecx 823 movups (%edx),%xmm1 824 leal 16(%edx),%edx 825 jnz L036enc1_loop_6 826.byte 102,15,56,221,217 827 movl 48(%esp),%esp 828 movl 40(%esp),%edi 829 movups %xmm3,(%edi) 830 pxor %xmm0,%xmm0 831 pxor %xmm1,%xmm1 832 pxor %xmm2,%xmm2 833 pxor %xmm3,%xmm3 834 pxor %xmm4,%xmm4 835 pxor %xmm5,%xmm5 836 pxor %xmm6,%xmm6 837 pxor %xmm7,%xmm7 838 popl %edi 839 popl %esi 840 popl %ebx 841 popl %ebp 842 ret 843.globl _aesni_ctr32_encrypt_blocks 844.align 4 845_aesni_ctr32_encrypt_blocks: 846L_aesni_ctr32_encrypt_blocks_begin: 847 %ifdef __CET__ 848 849.byte 243,15,30,251 850 %endif 851 852 pushl %ebp 853 pushl %ebx 854 pushl %esi 855 pushl %edi 856 movl 20(%esp),%esi 857 movl 24(%esp),%edi 858 movl 28(%esp),%eax 859 movl 32(%esp),%edx 860 movl 36(%esp),%ebx 861 movl %esp,%ebp 862 subl $88,%esp 863 andl $-16,%esp 864 movl %ebp,80(%esp) 865 cmpl $1,%eax 866 je L037ctr32_one_shortcut 867 movdqu (%ebx),%xmm7 868 movl $202182159,(%esp) 869 movl $134810123,4(%esp) 870 movl $67438087,8(%esp) 871 movl $66051,12(%esp) 872 movl $6,%ecx 873 xorl %ebp,%ebp 874 movl %ecx,16(%esp) 875 movl %ecx,20(%esp) 876 movl %ecx,24(%esp) 877 movl %ebp,28(%esp) 878.byte 102,15,58,22,251,3 879.byte 102,15,58,34,253,3 880 movl 240(%edx),%ecx 881 bswap %ebx 882 pxor %xmm0,%xmm0 883 pxor %xmm1,%xmm1 884 movdqa (%esp),%xmm2 885.byte 102,15,58,34,195,0 886 leal 3(%ebx),%ebp 887.byte 102,15,58,34,205,0 888 incl %ebx 889.byte 102,15,58,34,195,1 890 incl %ebp 891.byte 102,15,58,34,205,1 892 incl %ebx 893.byte 102,15,58,34,195,2 894 incl %ebp 895.byte 102,15,58,34,205,2 896 movdqa %xmm0,48(%esp) 897.byte 102,15,56,0,194 898 movdqu (%edx),%xmm6 899 movdqa %xmm1,64(%esp) 900.byte 102,15,56,0,202 901 pshufd $192,%xmm0,%xmm2 902 pshufd $128,%xmm0,%xmm3 903 cmpl $6,%eax 904 jb L038ctr32_tail 905 pxor %xmm6,%xmm7 906 shll $4,%ecx 907 movl $16,%ebx 908 movdqa %xmm7,32(%esp) 909 movl %edx,%ebp 910 subl %ecx,%ebx 911 leal 32(%edx,%ecx,1),%edx 912 subl $6,%eax 913 jmp L039ctr32_loop6 914.align 4,0x90 915L039ctr32_loop6: 916 pshufd $64,%xmm0,%xmm4 917 movdqa 32(%esp),%xmm0 918 pshufd $192,%xmm1,%xmm5 919 pxor %xmm0,%xmm2 920 pshufd $128,%xmm1,%xmm6 921 pxor %xmm0,%xmm3 922 pshufd $64,%xmm1,%xmm7 923 movups 16(%ebp),%xmm1 924 pxor %xmm0,%xmm4 925 pxor %xmm0,%xmm5 926.byte 102,15,56,220,209 927 pxor %xmm0,%xmm6 928 pxor %xmm0,%xmm7 929.byte 102,15,56,220,217 930 movups 32(%ebp),%xmm0 931 movl %ebx,%ecx 932.byte 102,15,56,220,225 933.byte 102,15,56,220,233 934.byte 102,15,56,220,241 935.byte 102,15,56,220,249 936 call L_aesni_encrypt6_enter 937 movups (%esi),%xmm1 938 movups 16(%esi),%xmm0 939 xorps %xmm1,%xmm2 940 movups 32(%esi),%xmm1 941 xorps %xmm0,%xmm3 942 movups %xmm2,(%edi) 943 movdqa 16(%esp),%xmm0 944 xorps %xmm1,%xmm4 945 movdqa 64(%esp),%xmm1 946 movups %xmm3,16(%edi) 947 movups %xmm4,32(%edi) 948 paddd %xmm0,%xmm1 949 paddd 48(%esp),%xmm0 950 movdqa (%esp),%xmm2 951 movups 48(%esi),%xmm3 952 movups 64(%esi),%xmm4 953 xorps %xmm3,%xmm5 954 movups 80(%esi),%xmm3 955 leal 96(%esi),%esi 956 movdqa %xmm0,48(%esp) 957.byte 102,15,56,0,194 958 xorps %xmm4,%xmm6 959 movups %xmm5,48(%edi) 960 xorps %xmm3,%xmm7 961 movdqa %xmm1,64(%esp) 962.byte 102,15,56,0,202 963 movups %xmm6,64(%edi) 964 pshufd $192,%xmm0,%xmm2 965 movups %xmm7,80(%edi) 966 leal 96(%edi),%edi 967 pshufd $128,%xmm0,%xmm3 968 subl $6,%eax 969 jnc L039ctr32_loop6 970 addl $6,%eax 971 jz L040ctr32_ret 972 movdqu (%ebp),%xmm7 973 movl %ebp,%edx 974 pxor 32(%esp),%xmm7 975 movl 240(%ebp),%ecx 976L038ctr32_tail: 977 por %xmm7,%xmm2 978 cmpl $2,%eax 979 jb L041ctr32_one 980 pshufd $64,%xmm0,%xmm4 981 por %xmm7,%xmm3 982 je L042ctr32_two 983 pshufd $192,%xmm1,%xmm5 984 por %xmm7,%xmm4 985 cmpl $4,%eax 986 jb L043ctr32_three 987 pshufd $128,%xmm1,%xmm6 988 por %xmm7,%xmm5 989 je L044ctr32_four 990 por %xmm7,%xmm6 991 call __aesni_encrypt6 992 movups (%esi),%xmm1 993 movups 16(%esi),%xmm0 994 xorps %xmm1,%xmm2 995 movups 32(%esi),%xmm1 996 xorps %xmm0,%xmm3 997 movups 48(%esi),%xmm0 998 xorps %xmm1,%xmm4 999 movups 64(%esi),%xmm1 1000 xorps %xmm0,%xmm5 1001 movups %xmm2,(%edi) 1002 xorps %xmm1,%xmm6 1003 movups %xmm3,16(%edi) 1004 movups %xmm4,32(%edi) 1005 movups %xmm5,48(%edi) 1006 movups %xmm6,64(%edi) 1007 jmp L040ctr32_ret 1008.align 4,0x90 1009L037ctr32_one_shortcut: 1010 movups (%ebx),%xmm2 1011 movl 240(%edx),%ecx 1012L041ctr32_one: 1013 movups (%edx),%xmm0 1014 movups 16(%edx),%xmm1 1015 leal 32(%edx),%edx 1016 xorps %xmm0,%xmm2 1017L045enc1_loop_7: 1018.byte 102,15,56,220,209 1019 decl %ecx 1020 movups (%edx),%xmm1 1021 leal 16(%edx),%edx 1022 jnz L045enc1_loop_7 1023.byte 102,15,56,221,209 1024 movups (%esi),%xmm6 1025 xorps %xmm2,%xmm6 1026 movups %xmm6,(%edi) 1027 jmp L040ctr32_ret 1028.align 4,0x90 1029L042ctr32_two: 1030 call __aesni_encrypt2 1031 movups (%esi),%xmm5 1032 movups 16(%esi),%xmm6 1033 xorps %xmm5,%xmm2 1034 xorps %xmm6,%xmm3 1035 movups %xmm2,(%edi) 1036 movups %xmm3,16(%edi) 1037 jmp L040ctr32_ret 1038.align 4,0x90 1039L043ctr32_three: 1040 call __aesni_encrypt3 1041 movups (%esi),%xmm5 1042 movups 16(%esi),%xmm6 1043 xorps %xmm5,%xmm2 1044 movups 32(%esi),%xmm7 1045 xorps %xmm6,%xmm3 1046 movups %xmm2,(%edi) 1047 xorps %xmm7,%xmm4 1048 movups %xmm3,16(%edi) 1049 movups %xmm4,32(%edi) 1050 jmp L040ctr32_ret 1051.align 4,0x90 1052L044ctr32_four: 1053 call __aesni_encrypt4 1054 movups (%esi),%xmm6 1055 movups 16(%esi),%xmm7 1056 movups 32(%esi),%xmm1 1057 xorps %xmm6,%xmm2 1058 movups 48(%esi),%xmm0 1059 xorps %xmm7,%xmm3 1060 movups %xmm2,(%edi) 1061 xorps %xmm1,%xmm4 1062 movups %xmm3,16(%edi) 1063 xorps %xmm0,%xmm5 1064 movups %xmm4,32(%edi) 1065 movups %xmm5,48(%edi) 1066L040ctr32_ret: 1067 pxor %xmm0,%xmm0 1068 pxor %xmm1,%xmm1 1069 pxor %xmm2,%xmm2 1070 pxor %xmm3,%xmm3 1071 pxor %xmm4,%xmm4 1072 movdqa %xmm0,32(%esp) 1073 pxor %xmm5,%xmm5 1074 movdqa %xmm0,48(%esp) 1075 pxor %xmm6,%xmm6 1076 movdqa %xmm0,64(%esp) 1077 pxor %xmm7,%xmm7 1078 movl 80(%esp),%esp 1079 popl %edi 1080 popl %esi 1081 popl %ebx 1082 popl %ebp 1083 ret 1084.globl _aesni_xts_encrypt 1085.align 4 1086_aesni_xts_encrypt: 1087L_aesni_xts_encrypt_begin: 1088 %ifdef __CET__ 1089 1090.byte 243,15,30,251 1091 %endif 1092 1093 pushl %ebp 1094 pushl %ebx 1095 pushl %esi 1096 pushl %edi 1097 movl 36(%esp),%edx 1098 movl 40(%esp),%esi 1099 movl 240(%edx),%ecx 1100 movups (%esi),%xmm2 1101 movups (%edx),%xmm0 1102 movups 16(%edx),%xmm1 1103 leal 32(%edx),%edx 1104 xorps %xmm0,%xmm2 1105L046enc1_loop_8: 1106.byte 102,15,56,220,209 1107 decl %ecx 1108 movups (%edx),%xmm1 1109 leal 16(%edx),%edx 1110 jnz L046enc1_loop_8 1111.byte 102,15,56,221,209 1112 movl 20(%esp),%esi 1113 movl 24(%esp),%edi 1114 movl 28(%esp),%eax 1115 movl 32(%esp),%edx 1116 movl %esp,%ebp 1117 subl $120,%esp 1118 movl 240(%edx),%ecx 1119 andl $-16,%esp 1120 movl $135,96(%esp) 1121 movl $0,100(%esp) 1122 movl $1,104(%esp) 1123 movl $0,108(%esp) 1124 movl %eax,112(%esp) 1125 movl %ebp,116(%esp) 1126 movdqa %xmm2,%xmm1 1127 pxor %xmm0,%xmm0 1128 movdqa 96(%esp),%xmm3 1129 pcmpgtd %xmm1,%xmm0 1130 andl $-16,%eax 1131 movl %edx,%ebp 1132 movl %ecx,%ebx 1133 subl $96,%eax 1134 jc L047xts_enc_short 1135 shll $4,%ecx 1136 movl $16,%ebx 1137 subl %ecx,%ebx 1138 leal 32(%edx,%ecx,1),%edx 1139 jmp L048xts_enc_loop6 1140.align 4,0x90 1141L048xts_enc_loop6: 1142 pshufd $19,%xmm0,%xmm2 1143 pxor %xmm0,%xmm0 1144 movdqa %xmm1,(%esp) 1145 paddq %xmm1,%xmm1 1146 pand %xmm3,%xmm2 1147 pcmpgtd %xmm1,%xmm0 1148 pxor %xmm2,%xmm1 1149 pshufd $19,%xmm0,%xmm2 1150 pxor %xmm0,%xmm0 1151 movdqa %xmm1,16(%esp) 1152 paddq %xmm1,%xmm1 1153 pand %xmm3,%xmm2 1154 pcmpgtd %xmm1,%xmm0 1155 pxor %xmm2,%xmm1 1156 pshufd $19,%xmm0,%xmm2 1157 pxor %xmm0,%xmm0 1158 movdqa %xmm1,32(%esp) 1159 paddq %xmm1,%xmm1 1160 pand %xmm3,%xmm2 1161 pcmpgtd %xmm1,%xmm0 1162 pxor %xmm2,%xmm1 1163 pshufd $19,%xmm0,%xmm2 1164 pxor %xmm0,%xmm0 1165 movdqa %xmm1,48(%esp) 1166 paddq %xmm1,%xmm1 1167 pand %xmm3,%xmm2 1168 pcmpgtd %xmm1,%xmm0 1169 pxor %xmm2,%xmm1 1170 pshufd $19,%xmm0,%xmm7 1171 movdqa %xmm1,64(%esp) 1172 paddq %xmm1,%xmm1 1173 movups (%ebp),%xmm0 1174 pand %xmm3,%xmm7 1175 movups (%esi),%xmm2 1176 pxor %xmm1,%xmm7 1177 movl %ebx,%ecx 1178 movdqu 16(%esi),%xmm3 1179 xorps %xmm0,%xmm2 1180 movdqu 32(%esi),%xmm4 1181 pxor %xmm0,%xmm3 1182 movdqu 48(%esi),%xmm5 1183 pxor %xmm0,%xmm4 1184 movdqu 64(%esi),%xmm6 1185 pxor %xmm0,%xmm5 1186 movdqu 80(%esi),%xmm1 1187 pxor %xmm0,%xmm6 1188 leal 96(%esi),%esi 1189 pxor (%esp),%xmm2 1190 movdqa %xmm7,80(%esp) 1191 pxor %xmm1,%xmm7 1192 movups 16(%ebp),%xmm1 1193 pxor 16(%esp),%xmm3 1194 pxor 32(%esp),%xmm4 1195.byte 102,15,56,220,209 1196 pxor 48(%esp),%xmm5 1197 pxor 64(%esp),%xmm6 1198.byte 102,15,56,220,217 1199 pxor %xmm0,%xmm7 1200 movups 32(%ebp),%xmm0 1201.byte 102,15,56,220,225 1202.byte 102,15,56,220,233 1203.byte 102,15,56,220,241 1204.byte 102,15,56,220,249 1205 call L_aesni_encrypt6_enter 1206 movdqa 80(%esp),%xmm1 1207 pxor %xmm0,%xmm0 1208 xorps (%esp),%xmm2 1209 pcmpgtd %xmm1,%xmm0 1210 xorps 16(%esp),%xmm3 1211 movups %xmm2,(%edi) 1212 xorps 32(%esp),%xmm4 1213 movups %xmm3,16(%edi) 1214 xorps 48(%esp),%xmm5 1215 movups %xmm4,32(%edi) 1216 xorps 64(%esp),%xmm6 1217 movups %xmm5,48(%edi) 1218 xorps %xmm1,%xmm7 1219 movups %xmm6,64(%edi) 1220 pshufd $19,%xmm0,%xmm2 1221 movups %xmm7,80(%edi) 1222 leal 96(%edi),%edi 1223 movdqa 96(%esp),%xmm3 1224 pxor %xmm0,%xmm0 1225 paddq %xmm1,%xmm1 1226 pand %xmm3,%xmm2 1227 pcmpgtd %xmm1,%xmm0 1228 pxor %xmm2,%xmm1 1229 subl $96,%eax 1230 jnc L048xts_enc_loop6 1231 movl 240(%ebp),%ecx 1232 movl %ebp,%edx 1233 movl %ecx,%ebx 1234L047xts_enc_short: 1235 addl $96,%eax 1236 jz L049xts_enc_done6x 1237 movdqa %xmm1,%xmm5 1238 cmpl $32,%eax 1239 jb L050xts_enc_one 1240 pshufd $19,%xmm0,%xmm2 1241 pxor %xmm0,%xmm0 1242 paddq %xmm1,%xmm1 1243 pand %xmm3,%xmm2 1244 pcmpgtd %xmm1,%xmm0 1245 pxor %xmm2,%xmm1 1246 je L051xts_enc_two 1247 pshufd $19,%xmm0,%xmm2 1248 pxor %xmm0,%xmm0 1249 movdqa %xmm1,%xmm6 1250 paddq %xmm1,%xmm1 1251 pand %xmm3,%xmm2 1252 pcmpgtd %xmm1,%xmm0 1253 pxor %xmm2,%xmm1 1254 cmpl $64,%eax 1255 jb L052xts_enc_three 1256 pshufd $19,%xmm0,%xmm2 1257 pxor %xmm0,%xmm0 1258 movdqa %xmm1,%xmm7 1259 paddq %xmm1,%xmm1 1260 pand %xmm3,%xmm2 1261 pcmpgtd %xmm1,%xmm0 1262 pxor %xmm2,%xmm1 1263 movdqa %xmm5,(%esp) 1264 movdqa %xmm6,16(%esp) 1265 je L053xts_enc_four 1266 movdqa %xmm7,32(%esp) 1267 pshufd $19,%xmm0,%xmm7 1268 movdqa %xmm1,48(%esp) 1269 paddq %xmm1,%xmm1 1270 pand %xmm3,%xmm7 1271 pxor %xmm1,%xmm7 1272 movdqu (%esi),%xmm2 1273 movdqu 16(%esi),%xmm3 1274 movdqu 32(%esi),%xmm4 1275 pxor (%esp),%xmm2 1276 movdqu 48(%esi),%xmm5 1277 pxor 16(%esp),%xmm3 1278 movdqu 64(%esi),%xmm6 1279 pxor 32(%esp),%xmm4 1280 leal 80(%esi),%esi 1281 pxor 48(%esp),%xmm5 1282 movdqa %xmm7,64(%esp) 1283 pxor %xmm7,%xmm6 1284 call __aesni_encrypt6 1285 movaps 64(%esp),%xmm1 1286 xorps (%esp),%xmm2 1287 xorps 16(%esp),%xmm3 1288 xorps 32(%esp),%xmm4 1289 movups %xmm2,(%edi) 1290 xorps 48(%esp),%xmm5 1291 movups %xmm3,16(%edi) 1292 xorps %xmm1,%xmm6 1293 movups %xmm4,32(%edi) 1294 movups %xmm5,48(%edi) 1295 movups %xmm6,64(%edi) 1296 leal 80(%edi),%edi 1297 jmp L054xts_enc_done 1298.align 4,0x90 1299L050xts_enc_one: 1300 movups (%esi),%xmm2 1301 leal 16(%esi),%esi 1302 xorps %xmm5,%xmm2 1303 movups (%edx),%xmm0 1304 movups 16(%edx),%xmm1 1305 leal 32(%edx),%edx 1306 xorps %xmm0,%xmm2 1307L055enc1_loop_9: 1308.byte 102,15,56,220,209 1309 decl %ecx 1310 movups (%edx),%xmm1 1311 leal 16(%edx),%edx 1312 jnz L055enc1_loop_9 1313.byte 102,15,56,221,209 1314 xorps %xmm5,%xmm2 1315 movups %xmm2,(%edi) 1316 leal 16(%edi),%edi 1317 movdqa %xmm5,%xmm1 1318 jmp L054xts_enc_done 1319.align 4,0x90 1320L051xts_enc_two: 1321 movaps %xmm1,%xmm6 1322 movups (%esi),%xmm2 1323 movups 16(%esi),%xmm3 1324 leal 32(%esi),%esi 1325 xorps %xmm5,%xmm2 1326 xorps %xmm6,%xmm3 1327 call __aesni_encrypt2 1328 xorps %xmm5,%xmm2 1329 xorps %xmm6,%xmm3 1330 movups %xmm2,(%edi) 1331 movups %xmm3,16(%edi) 1332 leal 32(%edi),%edi 1333 movdqa %xmm6,%xmm1 1334 jmp L054xts_enc_done 1335.align 4,0x90 1336L052xts_enc_three: 1337 movaps %xmm1,%xmm7 1338 movups (%esi),%xmm2 1339 movups 16(%esi),%xmm3 1340 movups 32(%esi),%xmm4 1341 leal 48(%esi),%esi 1342 xorps %xmm5,%xmm2 1343 xorps %xmm6,%xmm3 1344 xorps %xmm7,%xmm4 1345 call __aesni_encrypt3 1346 xorps %xmm5,%xmm2 1347 xorps %xmm6,%xmm3 1348 xorps %xmm7,%xmm4 1349 movups %xmm2,(%edi) 1350 movups %xmm3,16(%edi) 1351 movups %xmm4,32(%edi) 1352 leal 48(%edi),%edi 1353 movdqa %xmm7,%xmm1 1354 jmp L054xts_enc_done 1355.align 4,0x90 1356L053xts_enc_four: 1357 movaps %xmm1,%xmm6 1358 movups (%esi),%xmm2 1359 movups 16(%esi),%xmm3 1360 movups 32(%esi),%xmm4 1361 xorps (%esp),%xmm2 1362 movups 48(%esi),%xmm5 1363 leal 64(%esi),%esi 1364 xorps 16(%esp),%xmm3 1365 xorps %xmm7,%xmm4 1366 xorps %xmm6,%xmm5 1367 call __aesni_encrypt4 1368 xorps (%esp),%xmm2 1369 xorps 16(%esp),%xmm3 1370 xorps %xmm7,%xmm4 1371 movups %xmm2,(%edi) 1372 xorps %xmm6,%xmm5 1373 movups %xmm3,16(%edi) 1374 movups %xmm4,32(%edi) 1375 movups %xmm5,48(%edi) 1376 leal 64(%edi),%edi 1377 movdqa %xmm6,%xmm1 1378 jmp L054xts_enc_done 1379.align 4,0x90 1380L049xts_enc_done6x: 1381 movl 112(%esp),%eax 1382 andl $15,%eax 1383 jz L056xts_enc_ret 1384 movdqa %xmm1,%xmm5 1385 movl %eax,112(%esp) 1386 jmp L057xts_enc_steal 1387.align 4,0x90 1388L054xts_enc_done: 1389 movl 112(%esp),%eax 1390 pxor %xmm0,%xmm0 1391 andl $15,%eax 1392 jz L056xts_enc_ret 1393 pcmpgtd %xmm1,%xmm0 1394 movl %eax,112(%esp) 1395 pshufd $19,%xmm0,%xmm5 1396 paddq %xmm1,%xmm1 1397 pand 96(%esp),%xmm5 1398 pxor %xmm1,%xmm5 1399L057xts_enc_steal: 1400 movzbl (%esi),%ecx 1401 movzbl -16(%edi),%edx 1402 leal 1(%esi),%esi 1403 movb %cl,-16(%edi) 1404 movb %dl,(%edi) 1405 leal 1(%edi),%edi 1406 subl $1,%eax 1407 jnz L057xts_enc_steal 1408 subl 112(%esp),%edi 1409 movl %ebp,%edx 1410 movl %ebx,%ecx 1411 movups -16(%edi),%xmm2 1412 xorps %xmm5,%xmm2 1413 movups (%edx),%xmm0 1414 movups 16(%edx),%xmm1 1415 leal 32(%edx),%edx 1416 xorps %xmm0,%xmm2 1417L058enc1_loop_10: 1418.byte 102,15,56,220,209 1419 decl %ecx 1420 movups (%edx),%xmm1 1421 leal 16(%edx),%edx 1422 jnz L058enc1_loop_10 1423.byte 102,15,56,221,209 1424 xorps %xmm5,%xmm2 1425 movups %xmm2,-16(%edi) 1426L056xts_enc_ret: 1427 pxor %xmm0,%xmm0 1428 pxor %xmm1,%xmm1 1429 pxor %xmm2,%xmm2 1430 movdqa %xmm0,(%esp) 1431 pxor %xmm3,%xmm3 1432 movdqa %xmm0,16(%esp) 1433 pxor %xmm4,%xmm4 1434 movdqa %xmm0,32(%esp) 1435 pxor %xmm5,%xmm5 1436 movdqa %xmm0,48(%esp) 1437 pxor %xmm6,%xmm6 1438 movdqa %xmm0,64(%esp) 1439 pxor %xmm7,%xmm7 1440 movdqa %xmm0,80(%esp) 1441 movl 116(%esp),%esp 1442 popl %edi 1443 popl %esi 1444 popl %ebx 1445 popl %ebp 1446 ret 1447.globl _aesni_xts_decrypt 1448.align 4 1449_aesni_xts_decrypt: 1450L_aesni_xts_decrypt_begin: 1451 %ifdef __CET__ 1452 1453.byte 243,15,30,251 1454 %endif 1455 1456 pushl %ebp 1457 pushl %ebx 1458 pushl %esi 1459 pushl %edi 1460 movl 36(%esp),%edx 1461 movl 40(%esp),%esi 1462 movl 240(%edx),%ecx 1463 movups (%esi),%xmm2 1464 movups (%edx),%xmm0 1465 movups 16(%edx),%xmm1 1466 leal 32(%edx),%edx 1467 xorps %xmm0,%xmm2 1468L059enc1_loop_11: 1469.byte 102,15,56,220,209 1470 decl %ecx 1471 movups (%edx),%xmm1 1472 leal 16(%edx),%edx 1473 jnz L059enc1_loop_11 1474.byte 102,15,56,221,209 1475 movl 20(%esp),%esi 1476 movl 24(%esp),%edi 1477 movl 28(%esp),%eax 1478 movl 32(%esp),%edx 1479 movl %esp,%ebp 1480 subl $120,%esp 1481 andl $-16,%esp 1482 xorl %ebx,%ebx 1483 testl $15,%eax 1484 setnz %bl 1485 shll $4,%ebx 1486 subl %ebx,%eax 1487 movl $135,96(%esp) 1488 movl $0,100(%esp) 1489 movl $1,104(%esp) 1490 movl $0,108(%esp) 1491 movl %eax,112(%esp) 1492 movl %ebp,116(%esp) 1493 movl 240(%edx),%ecx 1494 movl %edx,%ebp 1495 movl %ecx,%ebx 1496 movdqa %xmm2,%xmm1 1497 pxor %xmm0,%xmm0 1498 movdqa 96(%esp),%xmm3 1499 pcmpgtd %xmm1,%xmm0 1500 andl $-16,%eax 1501 subl $96,%eax 1502 jc L060xts_dec_short 1503 shll $4,%ecx 1504 movl $16,%ebx 1505 subl %ecx,%ebx 1506 leal 32(%edx,%ecx,1),%edx 1507 jmp L061xts_dec_loop6 1508.align 4,0x90 1509L061xts_dec_loop6: 1510 pshufd $19,%xmm0,%xmm2 1511 pxor %xmm0,%xmm0 1512 movdqa %xmm1,(%esp) 1513 paddq %xmm1,%xmm1 1514 pand %xmm3,%xmm2 1515 pcmpgtd %xmm1,%xmm0 1516 pxor %xmm2,%xmm1 1517 pshufd $19,%xmm0,%xmm2 1518 pxor %xmm0,%xmm0 1519 movdqa %xmm1,16(%esp) 1520 paddq %xmm1,%xmm1 1521 pand %xmm3,%xmm2 1522 pcmpgtd %xmm1,%xmm0 1523 pxor %xmm2,%xmm1 1524 pshufd $19,%xmm0,%xmm2 1525 pxor %xmm0,%xmm0 1526 movdqa %xmm1,32(%esp) 1527 paddq %xmm1,%xmm1 1528 pand %xmm3,%xmm2 1529 pcmpgtd %xmm1,%xmm0 1530 pxor %xmm2,%xmm1 1531 pshufd $19,%xmm0,%xmm2 1532 pxor %xmm0,%xmm0 1533 movdqa %xmm1,48(%esp) 1534 paddq %xmm1,%xmm1 1535 pand %xmm3,%xmm2 1536 pcmpgtd %xmm1,%xmm0 1537 pxor %xmm2,%xmm1 1538 pshufd $19,%xmm0,%xmm7 1539 movdqa %xmm1,64(%esp) 1540 paddq %xmm1,%xmm1 1541 movups (%ebp),%xmm0 1542 pand %xmm3,%xmm7 1543 movups (%esi),%xmm2 1544 pxor %xmm1,%xmm7 1545 movl %ebx,%ecx 1546 movdqu 16(%esi),%xmm3 1547 xorps %xmm0,%xmm2 1548 movdqu 32(%esi),%xmm4 1549 pxor %xmm0,%xmm3 1550 movdqu 48(%esi),%xmm5 1551 pxor %xmm0,%xmm4 1552 movdqu 64(%esi),%xmm6 1553 pxor %xmm0,%xmm5 1554 movdqu 80(%esi),%xmm1 1555 pxor %xmm0,%xmm6 1556 leal 96(%esi),%esi 1557 pxor (%esp),%xmm2 1558 movdqa %xmm7,80(%esp) 1559 pxor %xmm1,%xmm7 1560 movups 16(%ebp),%xmm1 1561 pxor 16(%esp),%xmm3 1562 pxor 32(%esp),%xmm4 1563.byte 102,15,56,222,209 1564 pxor 48(%esp),%xmm5 1565 pxor 64(%esp),%xmm6 1566.byte 102,15,56,222,217 1567 pxor %xmm0,%xmm7 1568 movups 32(%ebp),%xmm0 1569.byte 102,15,56,222,225 1570.byte 102,15,56,222,233 1571.byte 102,15,56,222,241 1572.byte 102,15,56,222,249 1573 call L_aesni_decrypt6_enter 1574 movdqa 80(%esp),%xmm1 1575 pxor %xmm0,%xmm0 1576 xorps (%esp),%xmm2 1577 pcmpgtd %xmm1,%xmm0 1578 xorps 16(%esp),%xmm3 1579 movups %xmm2,(%edi) 1580 xorps 32(%esp),%xmm4 1581 movups %xmm3,16(%edi) 1582 xorps 48(%esp),%xmm5 1583 movups %xmm4,32(%edi) 1584 xorps 64(%esp),%xmm6 1585 movups %xmm5,48(%edi) 1586 xorps %xmm1,%xmm7 1587 movups %xmm6,64(%edi) 1588 pshufd $19,%xmm0,%xmm2 1589 movups %xmm7,80(%edi) 1590 leal 96(%edi),%edi 1591 movdqa 96(%esp),%xmm3 1592 pxor %xmm0,%xmm0 1593 paddq %xmm1,%xmm1 1594 pand %xmm3,%xmm2 1595 pcmpgtd %xmm1,%xmm0 1596 pxor %xmm2,%xmm1 1597 subl $96,%eax 1598 jnc L061xts_dec_loop6 1599 movl 240(%ebp),%ecx 1600 movl %ebp,%edx 1601 movl %ecx,%ebx 1602L060xts_dec_short: 1603 addl $96,%eax 1604 jz L062xts_dec_done6x 1605 movdqa %xmm1,%xmm5 1606 cmpl $32,%eax 1607 jb L063xts_dec_one 1608 pshufd $19,%xmm0,%xmm2 1609 pxor %xmm0,%xmm0 1610 paddq %xmm1,%xmm1 1611 pand %xmm3,%xmm2 1612 pcmpgtd %xmm1,%xmm0 1613 pxor %xmm2,%xmm1 1614 je L064xts_dec_two 1615 pshufd $19,%xmm0,%xmm2 1616 pxor %xmm0,%xmm0 1617 movdqa %xmm1,%xmm6 1618 paddq %xmm1,%xmm1 1619 pand %xmm3,%xmm2 1620 pcmpgtd %xmm1,%xmm0 1621 pxor %xmm2,%xmm1 1622 cmpl $64,%eax 1623 jb L065xts_dec_three 1624 pshufd $19,%xmm0,%xmm2 1625 pxor %xmm0,%xmm0 1626 movdqa %xmm1,%xmm7 1627 paddq %xmm1,%xmm1 1628 pand %xmm3,%xmm2 1629 pcmpgtd %xmm1,%xmm0 1630 pxor %xmm2,%xmm1 1631 movdqa %xmm5,(%esp) 1632 movdqa %xmm6,16(%esp) 1633 je L066xts_dec_four 1634 movdqa %xmm7,32(%esp) 1635 pshufd $19,%xmm0,%xmm7 1636 movdqa %xmm1,48(%esp) 1637 paddq %xmm1,%xmm1 1638 pand %xmm3,%xmm7 1639 pxor %xmm1,%xmm7 1640 movdqu (%esi),%xmm2 1641 movdqu 16(%esi),%xmm3 1642 movdqu 32(%esi),%xmm4 1643 pxor (%esp),%xmm2 1644 movdqu 48(%esi),%xmm5 1645 pxor 16(%esp),%xmm3 1646 movdqu 64(%esi),%xmm6 1647 pxor 32(%esp),%xmm4 1648 leal 80(%esi),%esi 1649 pxor 48(%esp),%xmm5 1650 movdqa %xmm7,64(%esp) 1651 pxor %xmm7,%xmm6 1652 call __aesni_decrypt6 1653 movaps 64(%esp),%xmm1 1654 xorps (%esp),%xmm2 1655 xorps 16(%esp),%xmm3 1656 xorps 32(%esp),%xmm4 1657 movups %xmm2,(%edi) 1658 xorps 48(%esp),%xmm5 1659 movups %xmm3,16(%edi) 1660 xorps %xmm1,%xmm6 1661 movups %xmm4,32(%edi) 1662 movups %xmm5,48(%edi) 1663 movups %xmm6,64(%edi) 1664 leal 80(%edi),%edi 1665 jmp L067xts_dec_done 1666.align 4,0x90 1667L063xts_dec_one: 1668 movups (%esi),%xmm2 1669 leal 16(%esi),%esi 1670 xorps %xmm5,%xmm2 1671 movups (%edx),%xmm0 1672 movups 16(%edx),%xmm1 1673 leal 32(%edx),%edx 1674 xorps %xmm0,%xmm2 1675L068dec1_loop_12: 1676.byte 102,15,56,222,209 1677 decl %ecx 1678 movups (%edx),%xmm1 1679 leal 16(%edx),%edx 1680 jnz L068dec1_loop_12 1681.byte 102,15,56,223,209 1682 xorps %xmm5,%xmm2 1683 movups %xmm2,(%edi) 1684 leal 16(%edi),%edi 1685 movdqa %xmm5,%xmm1 1686 jmp L067xts_dec_done 1687.align 4,0x90 1688L064xts_dec_two: 1689 movaps %xmm1,%xmm6 1690 movups (%esi),%xmm2 1691 movups 16(%esi),%xmm3 1692 leal 32(%esi),%esi 1693 xorps %xmm5,%xmm2 1694 xorps %xmm6,%xmm3 1695 call __aesni_decrypt2 1696 xorps %xmm5,%xmm2 1697 xorps %xmm6,%xmm3 1698 movups %xmm2,(%edi) 1699 movups %xmm3,16(%edi) 1700 leal 32(%edi),%edi 1701 movdqa %xmm6,%xmm1 1702 jmp L067xts_dec_done 1703.align 4,0x90 1704L065xts_dec_three: 1705 movaps %xmm1,%xmm7 1706 movups (%esi),%xmm2 1707 movups 16(%esi),%xmm3 1708 movups 32(%esi),%xmm4 1709 leal 48(%esi),%esi 1710 xorps %xmm5,%xmm2 1711 xorps %xmm6,%xmm3 1712 xorps %xmm7,%xmm4 1713 call __aesni_decrypt3 1714 xorps %xmm5,%xmm2 1715 xorps %xmm6,%xmm3 1716 xorps %xmm7,%xmm4 1717 movups %xmm2,(%edi) 1718 movups %xmm3,16(%edi) 1719 movups %xmm4,32(%edi) 1720 leal 48(%edi),%edi 1721 movdqa %xmm7,%xmm1 1722 jmp L067xts_dec_done 1723.align 4,0x90 1724L066xts_dec_four: 1725 movaps %xmm1,%xmm6 1726 movups (%esi),%xmm2 1727 movups 16(%esi),%xmm3 1728 movups 32(%esi),%xmm4 1729 xorps (%esp),%xmm2 1730 movups 48(%esi),%xmm5 1731 leal 64(%esi),%esi 1732 xorps 16(%esp),%xmm3 1733 xorps %xmm7,%xmm4 1734 xorps %xmm6,%xmm5 1735 call __aesni_decrypt4 1736 xorps (%esp),%xmm2 1737 xorps 16(%esp),%xmm3 1738 xorps %xmm7,%xmm4 1739 movups %xmm2,(%edi) 1740 xorps %xmm6,%xmm5 1741 movups %xmm3,16(%edi) 1742 movups %xmm4,32(%edi) 1743 movups %xmm5,48(%edi) 1744 leal 64(%edi),%edi 1745 movdqa %xmm6,%xmm1 1746 jmp L067xts_dec_done 1747.align 4,0x90 1748L062xts_dec_done6x: 1749 movl 112(%esp),%eax 1750 andl $15,%eax 1751 jz L069xts_dec_ret 1752 movl %eax,112(%esp) 1753 jmp L070xts_dec_only_one_more 1754.align 4,0x90 1755L067xts_dec_done: 1756 movl 112(%esp),%eax 1757 pxor %xmm0,%xmm0 1758 andl $15,%eax 1759 jz L069xts_dec_ret 1760 pcmpgtd %xmm1,%xmm0 1761 movl %eax,112(%esp) 1762 pshufd $19,%xmm0,%xmm2 1763 pxor %xmm0,%xmm0 1764 movdqa 96(%esp),%xmm3 1765 paddq %xmm1,%xmm1 1766 pand %xmm3,%xmm2 1767 pcmpgtd %xmm1,%xmm0 1768 pxor %xmm2,%xmm1 1769L070xts_dec_only_one_more: 1770 pshufd $19,%xmm0,%xmm5 1771 movdqa %xmm1,%xmm6 1772 paddq %xmm1,%xmm1 1773 pand %xmm3,%xmm5 1774 pxor %xmm1,%xmm5 1775 movl %ebp,%edx 1776 movl %ebx,%ecx 1777 movups (%esi),%xmm2 1778 xorps %xmm5,%xmm2 1779 movups (%edx),%xmm0 1780 movups 16(%edx),%xmm1 1781 leal 32(%edx),%edx 1782 xorps %xmm0,%xmm2 1783L071dec1_loop_13: 1784.byte 102,15,56,222,209 1785 decl %ecx 1786 movups (%edx),%xmm1 1787 leal 16(%edx),%edx 1788 jnz L071dec1_loop_13 1789.byte 102,15,56,223,209 1790 xorps %xmm5,%xmm2 1791 movups %xmm2,(%edi) 1792L072xts_dec_steal: 1793 movzbl 16(%esi),%ecx 1794 movzbl (%edi),%edx 1795 leal 1(%esi),%esi 1796 movb %cl,(%edi) 1797 movb %dl,16(%edi) 1798 leal 1(%edi),%edi 1799 subl $1,%eax 1800 jnz L072xts_dec_steal 1801 subl 112(%esp),%edi 1802 movl %ebp,%edx 1803 movl %ebx,%ecx 1804 movups (%edi),%xmm2 1805 xorps %xmm6,%xmm2 1806 movups (%edx),%xmm0 1807 movups 16(%edx),%xmm1 1808 leal 32(%edx),%edx 1809 xorps %xmm0,%xmm2 1810L073dec1_loop_14: 1811.byte 102,15,56,222,209 1812 decl %ecx 1813 movups (%edx),%xmm1 1814 leal 16(%edx),%edx 1815 jnz L073dec1_loop_14 1816.byte 102,15,56,223,209 1817 xorps %xmm6,%xmm2 1818 movups %xmm2,(%edi) 1819L069xts_dec_ret: 1820 pxor %xmm0,%xmm0 1821 pxor %xmm1,%xmm1 1822 pxor %xmm2,%xmm2 1823 movdqa %xmm0,(%esp) 1824 pxor %xmm3,%xmm3 1825 movdqa %xmm0,16(%esp) 1826 pxor %xmm4,%xmm4 1827 movdqa %xmm0,32(%esp) 1828 pxor %xmm5,%xmm5 1829 movdqa %xmm0,48(%esp) 1830 pxor %xmm6,%xmm6 1831 movdqa %xmm0,64(%esp) 1832 pxor %xmm7,%xmm7 1833 movdqa %xmm0,80(%esp) 1834 movl 116(%esp),%esp 1835 popl %edi 1836 popl %esi 1837 popl %ebx 1838 popl %ebp 1839 ret 1840.globl _aesni_ocb_encrypt 1841.align 4 1842_aesni_ocb_encrypt: 1843L_aesni_ocb_encrypt_begin: 1844 %ifdef __CET__ 1845 1846.byte 243,15,30,251 1847 %endif 1848 1849 pushl %ebp 1850 pushl %ebx 1851 pushl %esi 1852 pushl %edi 1853 movl 40(%esp),%ecx 1854 movl 48(%esp),%ebx 1855 movl 20(%esp),%esi 1856 movl 24(%esp),%edi 1857 movl 28(%esp),%eax 1858 movl 32(%esp),%edx 1859 movdqu (%ecx),%xmm0 1860 movl 36(%esp),%ebp 1861 movdqu (%ebx),%xmm1 1862 movl 44(%esp),%ebx 1863 movl %esp,%ecx 1864 subl $132,%esp 1865 andl $-16,%esp 1866 subl %esi,%edi 1867 shll $4,%eax 1868 leal -96(%esi,%eax,1),%eax 1869 movl %edi,120(%esp) 1870 movl %eax,124(%esp) 1871 movl %ecx,128(%esp) 1872 movl 240(%edx),%ecx 1873 testl $1,%ebp 1874 jnz L074odd 1875 bsfl %ebp,%eax 1876 addl $1,%ebp 1877 shll $4,%eax 1878 movdqu (%ebx,%eax,1),%xmm7 1879 movl %edx,%eax 1880 movdqu (%esi),%xmm2 1881 leal 16(%esi),%esi 1882 pxor %xmm0,%xmm7 1883 pxor %xmm2,%xmm1 1884 pxor %xmm7,%xmm2 1885 movdqa %xmm1,%xmm6 1886 movups (%edx),%xmm0 1887 movups 16(%edx),%xmm1 1888 leal 32(%edx),%edx 1889 xorps %xmm0,%xmm2 1890L075enc1_loop_15: 1891.byte 102,15,56,220,209 1892 decl %ecx 1893 movups (%edx),%xmm1 1894 leal 16(%edx),%edx 1895 jnz L075enc1_loop_15 1896.byte 102,15,56,221,209 1897 xorps %xmm7,%xmm2 1898 movdqa %xmm7,%xmm0 1899 movdqa %xmm6,%xmm1 1900 movups %xmm2,-16(%edi,%esi,1) 1901 movl 240(%eax),%ecx 1902 movl %eax,%edx 1903 movl 124(%esp),%eax 1904L074odd: 1905 shll $4,%ecx 1906 movl $16,%edi 1907 subl %ecx,%edi 1908 movl %edx,112(%esp) 1909 leal 32(%edx,%ecx,1),%edx 1910 movl %edi,116(%esp) 1911 cmpl %eax,%esi 1912 ja L076short 1913 jmp L077grandloop 1914.align 5,0x90 1915L077grandloop: 1916 leal 1(%ebp),%ecx 1917 leal 3(%ebp),%eax 1918 leal 5(%ebp),%edi 1919 addl $6,%ebp 1920 bsfl %ecx,%ecx 1921 bsfl %eax,%eax 1922 bsfl %edi,%edi 1923 shll $4,%ecx 1924 shll $4,%eax 1925 shll $4,%edi 1926 movdqu (%ebx),%xmm2 1927 movdqu (%ebx,%ecx,1),%xmm3 1928 movl 116(%esp),%ecx 1929 movdqa %xmm2,%xmm4 1930 movdqu (%ebx,%eax,1),%xmm5 1931 movdqa %xmm2,%xmm6 1932 movdqu (%ebx,%edi,1),%xmm7 1933 pxor %xmm0,%xmm2 1934 pxor %xmm2,%xmm3 1935 movdqa %xmm2,(%esp) 1936 pxor %xmm3,%xmm4 1937 movdqa %xmm3,16(%esp) 1938 pxor %xmm4,%xmm5 1939 movdqa %xmm4,32(%esp) 1940 pxor %xmm5,%xmm6 1941 movdqa %xmm5,48(%esp) 1942 pxor %xmm6,%xmm7 1943 movdqa %xmm6,64(%esp) 1944 movdqa %xmm7,80(%esp) 1945 movups -48(%edx,%ecx,1),%xmm0 1946 movdqu (%esi),%xmm2 1947 movdqu 16(%esi),%xmm3 1948 movdqu 32(%esi),%xmm4 1949 movdqu 48(%esi),%xmm5 1950 movdqu 64(%esi),%xmm6 1951 movdqu 80(%esi),%xmm7 1952 leal 96(%esi),%esi 1953 pxor %xmm2,%xmm1 1954 pxor %xmm0,%xmm2 1955 pxor %xmm3,%xmm1 1956 pxor %xmm0,%xmm3 1957 pxor %xmm4,%xmm1 1958 pxor %xmm0,%xmm4 1959 pxor %xmm5,%xmm1 1960 pxor %xmm0,%xmm5 1961 pxor %xmm6,%xmm1 1962 pxor %xmm0,%xmm6 1963 pxor %xmm7,%xmm1 1964 pxor %xmm0,%xmm7 1965 movdqa %xmm1,96(%esp) 1966 movups -32(%edx,%ecx,1),%xmm1 1967 pxor (%esp),%xmm2 1968 pxor 16(%esp),%xmm3 1969 pxor 32(%esp),%xmm4 1970 pxor 48(%esp),%xmm5 1971 pxor 64(%esp),%xmm6 1972 pxor 80(%esp),%xmm7 1973 movups -16(%edx,%ecx,1),%xmm0 1974.byte 102,15,56,220,209 1975.byte 102,15,56,220,217 1976.byte 102,15,56,220,225 1977.byte 102,15,56,220,233 1978.byte 102,15,56,220,241 1979.byte 102,15,56,220,249 1980 movl 120(%esp),%edi 1981 movl 124(%esp),%eax 1982 call L_aesni_encrypt6_enter 1983 movdqa 80(%esp),%xmm0 1984 pxor (%esp),%xmm2 1985 pxor 16(%esp),%xmm3 1986 pxor 32(%esp),%xmm4 1987 pxor 48(%esp),%xmm5 1988 pxor 64(%esp),%xmm6 1989 pxor %xmm0,%xmm7 1990 movdqa 96(%esp),%xmm1 1991 movdqu %xmm2,-96(%edi,%esi,1) 1992 movdqu %xmm3,-80(%edi,%esi,1) 1993 movdqu %xmm4,-64(%edi,%esi,1) 1994 movdqu %xmm5,-48(%edi,%esi,1) 1995 movdqu %xmm6,-32(%edi,%esi,1) 1996 movdqu %xmm7,-16(%edi,%esi,1) 1997 cmpl %eax,%esi 1998 jbe L077grandloop 1999L076short: 2000 addl $96,%eax 2001 subl %esi,%eax 2002 jz L078done 2003 cmpl $32,%eax 2004 jb L079one 2005 je L080two 2006 cmpl $64,%eax 2007 jb L081three 2008 je L082four 2009 leal 1(%ebp),%ecx 2010 leal 3(%ebp),%eax 2011 bsfl %ecx,%ecx 2012 bsfl %eax,%eax 2013 shll $4,%ecx 2014 shll $4,%eax 2015 movdqu (%ebx),%xmm2 2016 movdqu (%ebx,%ecx,1),%xmm3 2017 movl 116(%esp),%ecx 2018 movdqa %xmm2,%xmm4 2019 movdqu (%ebx,%eax,1),%xmm5 2020 movdqa %xmm2,%xmm6 2021 pxor %xmm0,%xmm2 2022 pxor %xmm2,%xmm3 2023 movdqa %xmm2,(%esp) 2024 pxor %xmm3,%xmm4 2025 movdqa %xmm3,16(%esp) 2026 pxor %xmm4,%xmm5 2027 movdqa %xmm4,32(%esp) 2028 pxor %xmm5,%xmm6 2029 movdqa %xmm5,48(%esp) 2030 pxor %xmm6,%xmm7 2031 movdqa %xmm6,64(%esp) 2032 movups -48(%edx,%ecx,1),%xmm0 2033 movdqu (%esi),%xmm2 2034 movdqu 16(%esi),%xmm3 2035 movdqu 32(%esi),%xmm4 2036 movdqu 48(%esi),%xmm5 2037 movdqu 64(%esi),%xmm6 2038 pxor %xmm7,%xmm7 2039 pxor %xmm2,%xmm1 2040 pxor %xmm0,%xmm2 2041 pxor %xmm3,%xmm1 2042 pxor %xmm0,%xmm3 2043 pxor %xmm4,%xmm1 2044 pxor %xmm0,%xmm4 2045 pxor %xmm5,%xmm1 2046 pxor %xmm0,%xmm5 2047 pxor %xmm6,%xmm1 2048 pxor %xmm0,%xmm6 2049 movdqa %xmm1,96(%esp) 2050 movups -32(%edx,%ecx,1),%xmm1 2051 pxor (%esp),%xmm2 2052 pxor 16(%esp),%xmm3 2053 pxor 32(%esp),%xmm4 2054 pxor 48(%esp),%xmm5 2055 pxor 64(%esp),%xmm6 2056 movups -16(%edx,%ecx,1),%xmm0 2057.byte 102,15,56,220,209 2058.byte 102,15,56,220,217 2059.byte 102,15,56,220,225 2060.byte 102,15,56,220,233 2061.byte 102,15,56,220,241 2062.byte 102,15,56,220,249 2063 movl 120(%esp),%edi 2064 call L_aesni_encrypt6_enter 2065 movdqa 64(%esp),%xmm0 2066 pxor (%esp),%xmm2 2067 pxor 16(%esp),%xmm3 2068 pxor 32(%esp),%xmm4 2069 pxor 48(%esp),%xmm5 2070 pxor %xmm0,%xmm6 2071 movdqa 96(%esp),%xmm1 2072 movdqu %xmm2,(%edi,%esi,1) 2073 movdqu %xmm3,16(%edi,%esi,1) 2074 movdqu %xmm4,32(%edi,%esi,1) 2075 movdqu %xmm5,48(%edi,%esi,1) 2076 movdqu %xmm6,64(%edi,%esi,1) 2077 jmp L078done 2078.align 4,0x90 2079L079one: 2080 movdqu (%ebx),%xmm7 2081 movl 112(%esp),%edx 2082 movdqu (%esi),%xmm2 2083 movl 240(%edx),%ecx 2084 pxor %xmm0,%xmm7 2085 pxor %xmm2,%xmm1 2086 pxor %xmm7,%xmm2 2087 movdqa %xmm1,%xmm6 2088 movl 120(%esp),%edi 2089 movups (%edx),%xmm0 2090 movups 16(%edx),%xmm1 2091 leal 32(%edx),%edx 2092 xorps %xmm0,%xmm2 2093L083enc1_loop_16: 2094.byte 102,15,56,220,209 2095 decl %ecx 2096 movups (%edx),%xmm1 2097 leal 16(%edx),%edx 2098 jnz L083enc1_loop_16 2099.byte 102,15,56,221,209 2100 xorps %xmm7,%xmm2 2101 movdqa %xmm7,%xmm0 2102 movdqa %xmm6,%xmm1 2103 movups %xmm2,(%edi,%esi,1) 2104 jmp L078done 2105.align 4,0x90 2106L080two: 2107 leal 1(%ebp),%ecx 2108 movl 112(%esp),%edx 2109 bsfl %ecx,%ecx 2110 shll $4,%ecx 2111 movdqu (%ebx),%xmm6 2112 movdqu (%ebx,%ecx,1),%xmm7 2113 movdqu (%esi),%xmm2 2114 movdqu 16(%esi),%xmm3 2115 movl 240(%edx),%ecx 2116 pxor %xmm0,%xmm6 2117 pxor %xmm6,%xmm7 2118 pxor %xmm2,%xmm1 2119 pxor %xmm6,%xmm2 2120 pxor %xmm3,%xmm1 2121 pxor %xmm7,%xmm3 2122 movdqa %xmm1,%xmm5 2123 movl 120(%esp),%edi 2124 call __aesni_encrypt2 2125 xorps %xmm6,%xmm2 2126 xorps %xmm7,%xmm3 2127 movdqa %xmm7,%xmm0 2128 movdqa %xmm5,%xmm1 2129 movups %xmm2,(%edi,%esi,1) 2130 movups %xmm3,16(%edi,%esi,1) 2131 jmp L078done 2132.align 4,0x90 2133L081three: 2134 leal 1(%ebp),%ecx 2135 movl 112(%esp),%edx 2136 bsfl %ecx,%ecx 2137 shll $4,%ecx 2138 movdqu (%ebx),%xmm5 2139 movdqu (%ebx,%ecx,1),%xmm6 2140 movdqa %xmm5,%xmm7 2141 movdqu (%esi),%xmm2 2142 movdqu 16(%esi),%xmm3 2143 movdqu 32(%esi),%xmm4 2144 movl 240(%edx),%ecx 2145 pxor %xmm0,%xmm5 2146 pxor %xmm5,%xmm6 2147 pxor %xmm6,%xmm7 2148 pxor %xmm2,%xmm1 2149 pxor %xmm5,%xmm2 2150 pxor %xmm3,%xmm1 2151 pxor %xmm6,%xmm3 2152 pxor %xmm4,%xmm1 2153 pxor %xmm7,%xmm4 2154 movdqa %xmm1,96(%esp) 2155 movl 120(%esp),%edi 2156 call __aesni_encrypt3 2157 xorps %xmm5,%xmm2 2158 xorps %xmm6,%xmm3 2159 xorps %xmm7,%xmm4 2160 movdqa %xmm7,%xmm0 2161 movdqa 96(%esp),%xmm1 2162 movups %xmm2,(%edi,%esi,1) 2163 movups %xmm3,16(%edi,%esi,1) 2164 movups %xmm4,32(%edi,%esi,1) 2165 jmp L078done 2166.align 4,0x90 2167L082four: 2168 leal 1(%ebp),%ecx 2169 leal 3(%ebp),%eax 2170 bsfl %ecx,%ecx 2171 bsfl %eax,%eax 2172 movl 112(%esp),%edx 2173 shll $4,%ecx 2174 shll $4,%eax 2175 movdqu (%ebx),%xmm4 2176 movdqu (%ebx,%ecx,1),%xmm5 2177 movdqa %xmm4,%xmm6 2178 movdqu (%ebx,%eax,1),%xmm7 2179 pxor %xmm0,%xmm4 2180 movdqu (%esi),%xmm2 2181 pxor %xmm4,%xmm5 2182 movdqu 16(%esi),%xmm3 2183 pxor %xmm5,%xmm6 2184 movdqa %xmm4,(%esp) 2185 pxor %xmm6,%xmm7 2186 movdqa %xmm5,16(%esp) 2187 movdqu 32(%esi),%xmm4 2188 movdqu 48(%esi),%xmm5 2189 movl 240(%edx),%ecx 2190 pxor %xmm2,%xmm1 2191 pxor (%esp),%xmm2 2192 pxor %xmm3,%xmm1 2193 pxor 16(%esp),%xmm3 2194 pxor %xmm4,%xmm1 2195 pxor %xmm6,%xmm4 2196 pxor %xmm5,%xmm1 2197 pxor %xmm7,%xmm5 2198 movdqa %xmm1,96(%esp) 2199 movl 120(%esp),%edi 2200 call __aesni_encrypt4 2201 xorps (%esp),%xmm2 2202 xorps 16(%esp),%xmm3 2203 xorps %xmm6,%xmm4 2204 movups %xmm2,(%edi,%esi,1) 2205 xorps %xmm7,%xmm5 2206 movups %xmm3,16(%edi,%esi,1) 2207 movdqa %xmm7,%xmm0 2208 movups %xmm4,32(%edi,%esi,1) 2209 movdqa 96(%esp),%xmm1 2210 movups %xmm5,48(%edi,%esi,1) 2211L078done: 2212 movl 128(%esp),%edx 2213 pxor %xmm2,%xmm2 2214 pxor %xmm3,%xmm3 2215 movdqa %xmm2,(%esp) 2216 pxor %xmm4,%xmm4 2217 movdqa %xmm2,16(%esp) 2218 pxor %xmm5,%xmm5 2219 movdqa %xmm2,32(%esp) 2220 pxor %xmm6,%xmm6 2221 movdqa %xmm2,48(%esp) 2222 pxor %xmm7,%xmm7 2223 movdqa %xmm2,64(%esp) 2224 movdqa %xmm2,80(%esp) 2225 movdqa %xmm2,96(%esp) 2226 leal (%edx),%esp 2227 movl 40(%esp),%ecx 2228 movl 48(%esp),%ebx 2229 movdqu %xmm0,(%ecx) 2230 pxor %xmm0,%xmm0 2231 movdqu %xmm1,(%ebx) 2232 pxor %xmm1,%xmm1 2233 popl %edi 2234 popl %esi 2235 popl %ebx 2236 popl %ebp 2237 ret 2238.globl _aesni_ocb_decrypt 2239.align 4 2240_aesni_ocb_decrypt: 2241L_aesni_ocb_decrypt_begin: 2242 %ifdef __CET__ 2243 2244.byte 243,15,30,251 2245 %endif 2246 2247 pushl %ebp 2248 pushl %ebx 2249 pushl %esi 2250 pushl %edi 2251 movl 40(%esp),%ecx 2252 movl 48(%esp),%ebx 2253 movl 20(%esp),%esi 2254 movl 24(%esp),%edi 2255 movl 28(%esp),%eax 2256 movl 32(%esp),%edx 2257 movdqu (%ecx),%xmm0 2258 movl 36(%esp),%ebp 2259 movdqu (%ebx),%xmm1 2260 movl 44(%esp),%ebx 2261 movl %esp,%ecx 2262 subl $132,%esp 2263 andl $-16,%esp 2264 subl %esi,%edi 2265 shll $4,%eax 2266 leal -96(%esi,%eax,1),%eax 2267 movl %edi,120(%esp) 2268 movl %eax,124(%esp) 2269 movl %ecx,128(%esp) 2270 movl 240(%edx),%ecx 2271 testl $1,%ebp 2272 jnz L084odd 2273 bsfl %ebp,%eax 2274 addl $1,%ebp 2275 shll $4,%eax 2276 movdqu (%ebx,%eax,1),%xmm7 2277 movl %edx,%eax 2278 movdqu (%esi),%xmm2 2279 leal 16(%esi),%esi 2280 pxor %xmm0,%xmm7 2281 pxor %xmm7,%xmm2 2282 movdqa %xmm1,%xmm6 2283 movups (%edx),%xmm0 2284 movups 16(%edx),%xmm1 2285 leal 32(%edx),%edx 2286 xorps %xmm0,%xmm2 2287L085dec1_loop_17: 2288.byte 102,15,56,222,209 2289 decl %ecx 2290 movups (%edx),%xmm1 2291 leal 16(%edx),%edx 2292 jnz L085dec1_loop_17 2293.byte 102,15,56,223,209 2294 xorps %xmm7,%xmm2 2295 movaps %xmm6,%xmm1 2296 movdqa %xmm7,%xmm0 2297 xorps %xmm2,%xmm1 2298 movups %xmm2,-16(%edi,%esi,1) 2299 movl 240(%eax),%ecx 2300 movl %eax,%edx 2301 movl 124(%esp),%eax 2302L084odd: 2303 shll $4,%ecx 2304 movl $16,%edi 2305 subl %ecx,%edi 2306 movl %edx,112(%esp) 2307 leal 32(%edx,%ecx,1),%edx 2308 movl %edi,116(%esp) 2309 cmpl %eax,%esi 2310 ja L086short 2311 jmp L087grandloop 2312.align 5,0x90 2313L087grandloop: 2314 leal 1(%ebp),%ecx 2315 leal 3(%ebp),%eax 2316 leal 5(%ebp),%edi 2317 addl $6,%ebp 2318 bsfl %ecx,%ecx 2319 bsfl %eax,%eax 2320 bsfl %edi,%edi 2321 shll $4,%ecx 2322 shll $4,%eax 2323 shll $4,%edi 2324 movdqu (%ebx),%xmm2 2325 movdqu (%ebx,%ecx,1),%xmm3 2326 movl 116(%esp),%ecx 2327 movdqa %xmm2,%xmm4 2328 movdqu (%ebx,%eax,1),%xmm5 2329 movdqa %xmm2,%xmm6 2330 movdqu (%ebx,%edi,1),%xmm7 2331 pxor %xmm0,%xmm2 2332 pxor %xmm2,%xmm3 2333 movdqa %xmm2,(%esp) 2334 pxor %xmm3,%xmm4 2335 movdqa %xmm3,16(%esp) 2336 pxor %xmm4,%xmm5 2337 movdqa %xmm4,32(%esp) 2338 pxor %xmm5,%xmm6 2339 movdqa %xmm5,48(%esp) 2340 pxor %xmm6,%xmm7 2341 movdqa %xmm6,64(%esp) 2342 movdqa %xmm7,80(%esp) 2343 movups -48(%edx,%ecx,1),%xmm0 2344 movdqu (%esi),%xmm2 2345 movdqu 16(%esi),%xmm3 2346 movdqu 32(%esi),%xmm4 2347 movdqu 48(%esi),%xmm5 2348 movdqu 64(%esi),%xmm6 2349 movdqu 80(%esi),%xmm7 2350 leal 96(%esi),%esi 2351 movdqa %xmm1,96(%esp) 2352 pxor %xmm0,%xmm2 2353 pxor %xmm0,%xmm3 2354 pxor %xmm0,%xmm4 2355 pxor %xmm0,%xmm5 2356 pxor %xmm0,%xmm6 2357 pxor %xmm0,%xmm7 2358 movups -32(%edx,%ecx,1),%xmm1 2359 pxor (%esp),%xmm2 2360 pxor 16(%esp),%xmm3 2361 pxor 32(%esp),%xmm4 2362 pxor 48(%esp),%xmm5 2363 pxor 64(%esp),%xmm6 2364 pxor 80(%esp),%xmm7 2365 movups -16(%edx,%ecx,1),%xmm0 2366.byte 102,15,56,222,209 2367.byte 102,15,56,222,217 2368.byte 102,15,56,222,225 2369.byte 102,15,56,222,233 2370.byte 102,15,56,222,241 2371.byte 102,15,56,222,249 2372 movl 120(%esp),%edi 2373 movl 124(%esp),%eax 2374 call L_aesni_decrypt6_enter 2375 movdqa 80(%esp),%xmm0 2376 pxor (%esp),%xmm2 2377 movdqa 96(%esp),%xmm1 2378 pxor 16(%esp),%xmm3 2379 pxor 32(%esp),%xmm4 2380 pxor 48(%esp),%xmm5 2381 pxor 64(%esp),%xmm6 2382 pxor %xmm0,%xmm7 2383 pxor %xmm2,%xmm1 2384 movdqu %xmm2,-96(%edi,%esi,1) 2385 pxor %xmm3,%xmm1 2386 movdqu %xmm3,-80(%edi,%esi,1) 2387 pxor %xmm4,%xmm1 2388 movdqu %xmm4,-64(%edi,%esi,1) 2389 pxor %xmm5,%xmm1 2390 movdqu %xmm5,-48(%edi,%esi,1) 2391 pxor %xmm6,%xmm1 2392 movdqu %xmm6,-32(%edi,%esi,1) 2393 pxor %xmm7,%xmm1 2394 movdqu %xmm7,-16(%edi,%esi,1) 2395 cmpl %eax,%esi 2396 jbe L087grandloop 2397L086short: 2398 addl $96,%eax 2399 subl %esi,%eax 2400 jz L088done 2401 cmpl $32,%eax 2402 jb L089one 2403 je L090two 2404 cmpl $64,%eax 2405 jb L091three 2406 je L092four 2407 leal 1(%ebp),%ecx 2408 leal 3(%ebp),%eax 2409 bsfl %ecx,%ecx 2410 bsfl %eax,%eax 2411 shll $4,%ecx 2412 shll $4,%eax 2413 movdqu (%ebx),%xmm2 2414 movdqu (%ebx,%ecx,1),%xmm3 2415 movl 116(%esp),%ecx 2416 movdqa %xmm2,%xmm4 2417 movdqu (%ebx,%eax,1),%xmm5 2418 movdqa %xmm2,%xmm6 2419 pxor %xmm0,%xmm2 2420 pxor %xmm2,%xmm3 2421 movdqa %xmm2,(%esp) 2422 pxor %xmm3,%xmm4 2423 movdqa %xmm3,16(%esp) 2424 pxor %xmm4,%xmm5 2425 movdqa %xmm4,32(%esp) 2426 pxor %xmm5,%xmm6 2427 movdqa %xmm5,48(%esp) 2428 pxor %xmm6,%xmm7 2429 movdqa %xmm6,64(%esp) 2430 movups -48(%edx,%ecx,1),%xmm0 2431 movdqu (%esi),%xmm2 2432 movdqu 16(%esi),%xmm3 2433 movdqu 32(%esi),%xmm4 2434 movdqu 48(%esi),%xmm5 2435 movdqu 64(%esi),%xmm6 2436 pxor %xmm7,%xmm7 2437 movdqa %xmm1,96(%esp) 2438 pxor %xmm0,%xmm2 2439 pxor %xmm0,%xmm3 2440 pxor %xmm0,%xmm4 2441 pxor %xmm0,%xmm5 2442 pxor %xmm0,%xmm6 2443 movups -32(%edx,%ecx,1),%xmm1 2444 pxor (%esp),%xmm2 2445 pxor 16(%esp),%xmm3 2446 pxor 32(%esp),%xmm4 2447 pxor 48(%esp),%xmm5 2448 pxor 64(%esp),%xmm6 2449 movups -16(%edx,%ecx,1),%xmm0 2450.byte 102,15,56,222,209 2451.byte 102,15,56,222,217 2452.byte 102,15,56,222,225 2453.byte 102,15,56,222,233 2454.byte 102,15,56,222,241 2455.byte 102,15,56,222,249 2456 movl 120(%esp),%edi 2457 call L_aesni_decrypt6_enter 2458 movdqa 64(%esp),%xmm0 2459 pxor (%esp),%xmm2 2460 movdqa 96(%esp),%xmm1 2461 pxor 16(%esp),%xmm3 2462 pxor 32(%esp),%xmm4 2463 pxor 48(%esp),%xmm5 2464 pxor %xmm0,%xmm6 2465 pxor %xmm2,%xmm1 2466 movdqu %xmm2,(%edi,%esi,1) 2467 pxor %xmm3,%xmm1 2468 movdqu %xmm3,16(%edi,%esi,1) 2469 pxor %xmm4,%xmm1 2470 movdqu %xmm4,32(%edi,%esi,1) 2471 pxor %xmm5,%xmm1 2472 movdqu %xmm5,48(%edi,%esi,1) 2473 pxor %xmm6,%xmm1 2474 movdqu %xmm6,64(%edi,%esi,1) 2475 jmp L088done 2476.align 4,0x90 2477L089one: 2478 movdqu (%ebx),%xmm7 2479 movl 112(%esp),%edx 2480 movdqu (%esi),%xmm2 2481 movl 240(%edx),%ecx 2482 pxor %xmm0,%xmm7 2483 pxor %xmm7,%xmm2 2484 movdqa %xmm1,%xmm6 2485 movl 120(%esp),%edi 2486 movups (%edx),%xmm0 2487 movups 16(%edx),%xmm1 2488 leal 32(%edx),%edx 2489 xorps %xmm0,%xmm2 2490L093dec1_loop_18: 2491.byte 102,15,56,222,209 2492 decl %ecx 2493 movups (%edx),%xmm1 2494 leal 16(%edx),%edx 2495 jnz L093dec1_loop_18 2496.byte 102,15,56,223,209 2497 xorps %xmm7,%xmm2 2498 movaps %xmm6,%xmm1 2499 movdqa %xmm7,%xmm0 2500 xorps %xmm2,%xmm1 2501 movups %xmm2,(%edi,%esi,1) 2502 jmp L088done 2503.align 4,0x90 2504L090two: 2505 leal 1(%ebp),%ecx 2506 movl 112(%esp),%edx 2507 bsfl %ecx,%ecx 2508 shll $4,%ecx 2509 movdqu (%ebx),%xmm6 2510 movdqu (%ebx,%ecx,1),%xmm7 2511 movdqu (%esi),%xmm2 2512 movdqu 16(%esi),%xmm3 2513 movl 240(%edx),%ecx 2514 movdqa %xmm1,%xmm5 2515 pxor %xmm0,%xmm6 2516 pxor %xmm6,%xmm7 2517 pxor %xmm6,%xmm2 2518 pxor %xmm7,%xmm3 2519 movl 120(%esp),%edi 2520 call __aesni_decrypt2 2521 xorps %xmm6,%xmm2 2522 xorps %xmm7,%xmm3 2523 movdqa %xmm7,%xmm0 2524 xorps %xmm2,%xmm5 2525 movups %xmm2,(%edi,%esi,1) 2526 xorps %xmm3,%xmm5 2527 movups %xmm3,16(%edi,%esi,1) 2528 movaps %xmm5,%xmm1 2529 jmp L088done 2530.align 4,0x90 2531L091three: 2532 leal 1(%ebp),%ecx 2533 movl 112(%esp),%edx 2534 bsfl %ecx,%ecx 2535 shll $4,%ecx 2536 movdqu (%ebx),%xmm5 2537 movdqu (%ebx,%ecx,1),%xmm6 2538 movdqa %xmm5,%xmm7 2539 movdqu (%esi),%xmm2 2540 movdqu 16(%esi),%xmm3 2541 movdqu 32(%esi),%xmm4 2542 movl 240(%edx),%ecx 2543 movdqa %xmm1,96(%esp) 2544 pxor %xmm0,%xmm5 2545 pxor %xmm5,%xmm6 2546 pxor %xmm6,%xmm7 2547 pxor %xmm5,%xmm2 2548 pxor %xmm6,%xmm3 2549 pxor %xmm7,%xmm4 2550 movl 120(%esp),%edi 2551 call __aesni_decrypt3 2552 movdqa 96(%esp),%xmm1 2553 xorps %xmm5,%xmm2 2554 xorps %xmm6,%xmm3 2555 xorps %xmm7,%xmm4 2556 movups %xmm2,(%edi,%esi,1) 2557 pxor %xmm2,%xmm1 2558 movdqa %xmm7,%xmm0 2559 movups %xmm3,16(%edi,%esi,1) 2560 pxor %xmm3,%xmm1 2561 movups %xmm4,32(%edi,%esi,1) 2562 pxor %xmm4,%xmm1 2563 jmp L088done 2564.align 4,0x90 2565L092four: 2566 leal 1(%ebp),%ecx 2567 leal 3(%ebp),%eax 2568 bsfl %ecx,%ecx 2569 bsfl %eax,%eax 2570 movl 112(%esp),%edx 2571 shll $4,%ecx 2572 shll $4,%eax 2573 movdqu (%ebx),%xmm4 2574 movdqu (%ebx,%ecx,1),%xmm5 2575 movdqa %xmm4,%xmm6 2576 movdqu (%ebx,%eax,1),%xmm7 2577 pxor %xmm0,%xmm4 2578 movdqu (%esi),%xmm2 2579 pxor %xmm4,%xmm5 2580 movdqu 16(%esi),%xmm3 2581 pxor %xmm5,%xmm6 2582 movdqa %xmm4,(%esp) 2583 pxor %xmm6,%xmm7 2584 movdqa %xmm5,16(%esp) 2585 movdqu 32(%esi),%xmm4 2586 movdqu 48(%esi),%xmm5 2587 movl 240(%edx),%ecx 2588 movdqa %xmm1,96(%esp) 2589 pxor (%esp),%xmm2 2590 pxor 16(%esp),%xmm3 2591 pxor %xmm6,%xmm4 2592 pxor %xmm7,%xmm5 2593 movl 120(%esp),%edi 2594 call __aesni_decrypt4 2595 movdqa 96(%esp),%xmm1 2596 xorps (%esp),%xmm2 2597 xorps 16(%esp),%xmm3 2598 xorps %xmm6,%xmm4 2599 movups %xmm2,(%edi,%esi,1) 2600 pxor %xmm2,%xmm1 2601 xorps %xmm7,%xmm5 2602 movups %xmm3,16(%edi,%esi,1) 2603 pxor %xmm3,%xmm1 2604 movdqa %xmm7,%xmm0 2605 movups %xmm4,32(%edi,%esi,1) 2606 pxor %xmm4,%xmm1 2607 movups %xmm5,48(%edi,%esi,1) 2608 pxor %xmm5,%xmm1 2609L088done: 2610 movl 128(%esp),%edx 2611 pxor %xmm2,%xmm2 2612 pxor %xmm3,%xmm3 2613 movdqa %xmm2,(%esp) 2614 pxor %xmm4,%xmm4 2615 movdqa %xmm2,16(%esp) 2616 pxor %xmm5,%xmm5 2617 movdqa %xmm2,32(%esp) 2618 pxor %xmm6,%xmm6 2619 movdqa %xmm2,48(%esp) 2620 pxor %xmm7,%xmm7 2621 movdqa %xmm2,64(%esp) 2622 movdqa %xmm2,80(%esp) 2623 movdqa %xmm2,96(%esp) 2624 leal (%edx),%esp 2625 movl 40(%esp),%ecx 2626 movl 48(%esp),%ebx 2627 movdqu %xmm0,(%ecx) 2628 pxor %xmm0,%xmm0 2629 movdqu %xmm1,(%ebx) 2630 pxor %xmm1,%xmm1 2631 popl %edi 2632 popl %esi 2633 popl %ebx 2634 popl %ebp 2635 ret 2636.globl _aesni_cbc_encrypt 2637.align 4 2638_aesni_cbc_encrypt: 2639L_aesni_cbc_encrypt_begin: 2640 %ifdef __CET__ 2641 2642.byte 243,15,30,251 2643 %endif 2644 2645 pushl %ebp 2646 pushl %ebx 2647 pushl %esi 2648 pushl %edi 2649 movl 20(%esp),%esi 2650 movl %esp,%ebx 2651 movl 24(%esp),%edi 2652 subl $24,%ebx 2653 movl 28(%esp),%eax 2654 andl $-16,%ebx 2655 movl 32(%esp),%edx 2656 movl 36(%esp),%ebp 2657 testl %eax,%eax 2658 jz L094cbc_abort 2659 cmpl $0,40(%esp) 2660 xchgl %esp,%ebx 2661 movups (%ebp),%xmm7 2662 movl 240(%edx),%ecx 2663 movl %edx,%ebp 2664 movl %ebx,16(%esp) 2665 movl %ecx,%ebx 2666 je L095cbc_decrypt 2667 movaps %xmm7,%xmm2 2668 cmpl $16,%eax 2669 jb L096cbc_enc_tail 2670 subl $16,%eax 2671 jmp L097cbc_enc_loop 2672.align 4,0x90 2673L097cbc_enc_loop: 2674 movups (%esi),%xmm7 2675 leal 16(%esi),%esi 2676 movups (%edx),%xmm0 2677 movups 16(%edx),%xmm1 2678 xorps %xmm0,%xmm7 2679 leal 32(%edx),%edx 2680 xorps %xmm7,%xmm2 2681L098enc1_loop_19: 2682.byte 102,15,56,220,209 2683 decl %ecx 2684 movups (%edx),%xmm1 2685 leal 16(%edx),%edx 2686 jnz L098enc1_loop_19 2687.byte 102,15,56,221,209 2688 movl %ebx,%ecx 2689 movl %ebp,%edx 2690 movups %xmm2,(%edi) 2691 leal 16(%edi),%edi 2692 subl $16,%eax 2693 jnc L097cbc_enc_loop 2694 addl $16,%eax 2695 jnz L096cbc_enc_tail 2696 movaps %xmm2,%xmm7 2697 pxor %xmm2,%xmm2 2698 jmp L099cbc_ret 2699L096cbc_enc_tail: 2700 movl %eax,%ecx 2701.long 2767451785 2702 movl $16,%ecx 2703 subl %eax,%ecx 2704 xorl %eax,%eax 2705.long 2868115081 2706 leal -16(%edi),%edi 2707 movl %ebx,%ecx 2708 movl %edi,%esi 2709 movl %ebp,%edx 2710 jmp L097cbc_enc_loop 2711.align 4,0x90 2712L095cbc_decrypt: 2713 cmpl $80,%eax 2714 jbe L100cbc_dec_tail 2715 movaps %xmm7,(%esp) 2716 subl $80,%eax 2717 jmp L101cbc_dec_loop6_enter 2718.align 4,0x90 2719L102cbc_dec_loop6: 2720 movaps %xmm0,(%esp) 2721 movups %xmm7,(%edi) 2722 leal 16(%edi),%edi 2723L101cbc_dec_loop6_enter: 2724 movdqu (%esi),%xmm2 2725 movdqu 16(%esi),%xmm3 2726 movdqu 32(%esi),%xmm4 2727 movdqu 48(%esi),%xmm5 2728 movdqu 64(%esi),%xmm6 2729 movdqu 80(%esi),%xmm7 2730 call __aesni_decrypt6 2731 movups (%esi),%xmm1 2732 movups 16(%esi),%xmm0 2733 xorps (%esp),%xmm2 2734 xorps %xmm1,%xmm3 2735 movups 32(%esi),%xmm1 2736 xorps %xmm0,%xmm4 2737 movups 48(%esi),%xmm0 2738 xorps %xmm1,%xmm5 2739 movups 64(%esi),%xmm1 2740 xorps %xmm0,%xmm6 2741 movups 80(%esi),%xmm0 2742 xorps %xmm1,%xmm7 2743 movups %xmm2,(%edi) 2744 movups %xmm3,16(%edi) 2745 leal 96(%esi),%esi 2746 movups %xmm4,32(%edi) 2747 movl %ebx,%ecx 2748 movups %xmm5,48(%edi) 2749 movl %ebp,%edx 2750 movups %xmm6,64(%edi) 2751 leal 80(%edi),%edi 2752 subl $96,%eax 2753 ja L102cbc_dec_loop6 2754 movaps %xmm7,%xmm2 2755 movaps %xmm0,%xmm7 2756 addl $80,%eax 2757 jle L103cbc_dec_clear_tail_collected 2758 movups %xmm2,(%edi) 2759 leal 16(%edi),%edi 2760L100cbc_dec_tail: 2761 movups (%esi),%xmm2 2762 movaps %xmm2,%xmm6 2763 cmpl $16,%eax 2764 jbe L104cbc_dec_one 2765 movups 16(%esi),%xmm3 2766 movaps %xmm3,%xmm5 2767 cmpl $32,%eax 2768 jbe L105cbc_dec_two 2769 movups 32(%esi),%xmm4 2770 cmpl $48,%eax 2771 jbe L106cbc_dec_three 2772 movups 48(%esi),%xmm5 2773 cmpl $64,%eax 2774 jbe L107cbc_dec_four 2775 movups 64(%esi),%xmm6 2776 movaps %xmm7,(%esp) 2777 movups (%esi),%xmm2 2778 xorps %xmm7,%xmm7 2779 call __aesni_decrypt6 2780 movups (%esi),%xmm1 2781 movups 16(%esi),%xmm0 2782 xorps (%esp),%xmm2 2783 xorps %xmm1,%xmm3 2784 movups 32(%esi),%xmm1 2785 xorps %xmm0,%xmm4 2786 movups 48(%esi),%xmm0 2787 xorps %xmm1,%xmm5 2788 movups 64(%esi),%xmm7 2789 xorps %xmm0,%xmm6 2790 movups %xmm2,(%edi) 2791 movups %xmm3,16(%edi) 2792 pxor %xmm3,%xmm3 2793 movups %xmm4,32(%edi) 2794 pxor %xmm4,%xmm4 2795 movups %xmm5,48(%edi) 2796 pxor %xmm5,%xmm5 2797 leal 64(%edi),%edi 2798 movaps %xmm6,%xmm2 2799 pxor %xmm6,%xmm6 2800 subl $80,%eax 2801 jmp L108cbc_dec_tail_collected 2802.align 4,0x90 2803L104cbc_dec_one: 2804 movups (%edx),%xmm0 2805 movups 16(%edx),%xmm1 2806 leal 32(%edx),%edx 2807 xorps %xmm0,%xmm2 2808L109dec1_loop_20: 2809.byte 102,15,56,222,209 2810 decl %ecx 2811 movups (%edx),%xmm1 2812 leal 16(%edx),%edx 2813 jnz L109dec1_loop_20 2814.byte 102,15,56,223,209 2815 xorps %xmm7,%xmm2 2816 movaps %xmm6,%xmm7 2817 subl $16,%eax 2818 jmp L108cbc_dec_tail_collected 2819.align 4,0x90 2820L105cbc_dec_two: 2821 call __aesni_decrypt2 2822 xorps %xmm7,%xmm2 2823 xorps %xmm6,%xmm3 2824 movups %xmm2,(%edi) 2825 movaps %xmm3,%xmm2 2826 pxor %xmm3,%xmm3 2827 leal 16(%edi),%edi 2828 movaps %xmm5,%xmm7 2829 subl $32,%eax 2830 jmp L108cbc_dec_tail_collected 2831.align 4,0x90 2832L106cbc_dec_three: 2833 call __aesni_decrypt3 2834 xorps %xmm7,%xmm2 2835 xorps %xmm6,%xmm3 2836 xorps %xmm5,%xmm4 2837 movups %xmm2,(%edi) 2838 movaps %xmm4,%xmm2 2839 pxor %xmm4,%xmm4 2840 movups %xmm3,16(%edi) 2841 pxor %xmm3,%xmm3 2842 leal 32(%edi),%edi 2843 movups 32(%esi),%xmm7 2844 subl $48,%eax 2845 jmp L108cbc_dec_tail_collected 2846.align 4,0x90 2847L107cbc_dec_four: 2848 call __aesni_decrypt4 2849 movups 16(%esi),%xmm1 2850 movups 32(%esi),%xmm0 2851 xorps %xmm7,%xmm2 2852 movups 48(%esi),%xmm7 2853 xorps %xmm6,%xmm3 2854 movups %xmm2,(%edi) 2855 xorps %xmm1,%xmm4 2856 movups %xmm3,16(%edi) 2857 pxor %xmm3,%xmm3 2858 xorps %xmm0,%xmm5 2859 movups %xmm4,32(%edi) 2860 pxor %xmm4,%xmm4 2861 leal 48(%edi),%edi 2862 movaps %xmm5,%xmm2 2863 pxor %xmm5,%xmm5 2864 subl $64,%eax 2865 jmp L108cbc_dec_tail_collected 2866.align 4,0x90 2867L103cbc_dec_clear_tail_collected: 2868 pxor %xmm3,%xmm3 2869 pxor %xmm4,%xmm4 2870 pxor %xmm5,%xmm5 2871 pxor %xmm6,%xmm6 2872L108cbc_dec_tail_collected: 2873 andl $15,%eax 2874 jnz L110cbc_dec_tail_partial 2875 movups %xmm2,(%edi) 2876 pxor %xmm0,%xmm0 2877 jmp L099cbc_ret 2878.align 4,0x90 2879L110cbc_dec_tail_partial: 2880 movaps %xmm2,(%esp) 2881 pxor %xmm0,%xmm0 2882 movl $16,%ecx 2883 movl %esp,%esi 2884 subl %eax,%ecx 2885.long 2767451785 2886 movdqa %xmm2,(%esp) 2887L099cbc_ret: 2888 movl 16(%esp),%esp 2889 movl 36(%esp),%ebp 2890 pxor %xmm2,%xmm2 2891 pxor %xmm1,%xmm1 2892 movups %xmm7,(%ebp) 2893 pxor %xmm7,%xmm7 2894L094cbc_abort: 2895 popl %edi 2896 popl %esi 2897 popl %ebx 2898 popl %ebp 2899 ret 2900.align 4 2901__aesni_set_encrypt_key: 2902 %ifdef __CET__ 2903 2904.byte 243,15,30,251 2905 %endif 2906 2907 pushl %ebp 2908 pushl %ebx 2909 testl %eax,%eax 2910 jz L111bad_pointer 2911 testl %edx,%edx 2912 jz L111bad_pointer 2913 call L112pic 2914L112pic: 2915 popl %ebx 2916 leal Lkey_const-L112pic(%ebx),%ebx 2917 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp 2918 movups (%eax),%xmm0 2919 xorps %xmm4,%xmm4 2920 movl 4(%ebp),%ebp 2921 leal 16(%edx),%edx 2922 andl $268437504,%ebp 2923 cmpl $256,%ecx 2924 je L11314rounds 2925 cmpl $192,%ecx 2926 je L11412rounds 2927 cmpl $128,%ecx 2928 jne L115bad_keybits 2929.align 4,0x90 2930L11610rounds: 2931 cmpl $268435456,%ebp 2932 je L11710rounds_alt 2933 movl $9,%ecx 2934 movups %xmm0,-16(%edx) 2935.byte 102,15,58,223,200,1 2936 call L118key_128_cold 2937.byte 102,15,58,223,200,2 2938 call L119key_128 2939.byte 102,15,58,223,200,4 2940 call L119key_128 2941.byte 102,15,58,223,200,8 2942 call L119key_128 2943.byte 102,15,58,223,200,16 2944 call L119key_128 2945.byte 102,15,58,223,200,32 2946 call L119key_128 2947.byte 102,15,58,223,200,64 2948 call L119key_128 2949.byte 102,15,58,223,200,128 2950 call L119key_128 2951.byte 102,15,58,223,200,27 2952 call L119key_128 2953.byte 102,15,58,223,200,54 2954 call L119key_128 2955 movups %xmm0,(%edx) 2956 movl %ecx,80(%edx) 2957 jmp L120good_key 2958.align 4,0x90 2959L119key_128: 2960 movups %xmm0,(%edx) 2961 leal 16(%edx),%edx 2962L118key_128_cold: 2963 shufps $16,%xmm0,%xmm4 2964 xorps %xmm4,%xmm0 2965 shufps $140,%xmm0,%xmm4 2966 xorps %xmm4,%xmm0 2967 shufps $255,%xmm1,%xmm1 2968 xorps %xmm1,%xmm0 2969 ret 2970.align 4,0x90 2971L11710rounds_alt: 2972 movdqa (%ebx),%xmm5 2973 movl $8,%ecx 2974 movdqa 32(%ebx),%xmm4 2975 movdqa %xmm0,%xmm2 2976 movdqu %xmm0,-16(%edx) 2977L121loop_key128: 2978.byte 102,15,56,0,197 2979.byte 102,15,56,221,196 2980 pslld $1,%xmm4 2981 leal 16(%edx),%edx 2982 movdqa %xmm2,%xmm3 2983 pslldq $4,%xmm2 2984 pxor %xmm2,%xmm3 2985 pslldq $4,%xmm2 2986 pxor %xmm2,%xmm3 2987 pslldq $4,%xmm2 2988 pxor %xmm3,%xmm2 2989 pxor %xmm2,%xmm0 2990 movdqu %xmm0,-16(%edx) 2991 movdqa %xmm0,%xmm2 2992 decl %ecx 2993 jnz L121loop_key128 2994 movdqa 48(%ebx),%xmm4 2995.byte 102,15,56,0,197 2996.byte 102,15,56,221,196 2997 pslld $1,%xmm4 2998 movdqa %xmm2,%xmm3 2999 pslldq $4,%xmm2 3000 pxor %xmm2,%xmm3 3001 pslldq $4,%xmm2 3002 pxor %xmm2,%xmm3 3003 pslldq $4,%xmm2 3004 pxor %xmm3,%xmm2 3005 pxor %xmm2,%xmm0 3006 movdqu %xmm0,(%edx) 3007 movdqa %xmm0,%xmm2 3008.byte 102,15,56,0,197 3009.byte 102,15,56,221,196 3010 movdqa %xmm2,%xmm3 3011 pslldq $4,%xmm2 3012 pxor %xmm2,%xmm3 3013 pslldq $4,%xmm2 3014 pxor %xmm2,%xmm3 3015 pslldq $4,%xmm2 3016 pxor %xmm3,%xmm2 3017 pxor %xmm2,%xmm0 3018 movdqu %xmm0,16(%edx) 3019 movl $9,%ecx 3020 movl %ecx,96(%edx) 3021 jmp L120good_key 3022.align 4,0x90 3023L11412rounds: 3024 movq 16(%eax),%xmm2 3025 cmpl $268435456,%ebp 3026 je L12212rounds_alt 3027 movl $11,%ecx 3028 movups %xmm0,-16(%edx) 3029.byte 102,15,58,223,202,1 3030 call L123key_192a_cold 3031.byte 102,15,58,223,202,2 3032 call L124key_192b 3033.byte 102,15,58,223,202,4 3034 call L125key_192a 3035.byte 102,15,58,223,202,8 3036 call L124key_192b 3037.byte 102,15,58,223,202,16 3038 call L125key_192a 3039.byte 102,15,58,223,202,32 3040 call L124key_192b 3041.byte 102,15,58,223,202,64 3042 call L125key_192a 3043.byte 102,15,58,223,202,128 3044 call L124key_192b 3045 movups %xmm0,(%edx) 3046 movl %ecx,48(%edx) 3047 jmp L120good_key 3048.align 4,0x90 3049L125key_192a: 3050 movups %xmm0,(%edx) 3051 leal 16(%edx),%edx 3052.align 4,0x90 3053L123key_192a_cold: 3054 movaps %xmm2,%xmm5 3055L126key_192b_warm: 3056 shufps $16,%xmm0,%xmm4 3057 movdqa %xmm2,%xmm3 3058 xorps %xmm4,%xmm0 3059 shufps $140,%xmm0,%xmm4 3060 pslldq $4,%xmm3 3061 xorps %xmm4,%xmm0 3062 pshufd $85,%xmm1,%xmm1 3063 pxor %xmm3,%xmm2 3064 pxor %xmm1,%xmm0 3065 pshufd $255,%xmm0,%xmm3 3066 pxor %xmm3,%xmm2 3067 ret 3068.align 4,0x90 3069L124key_192b: 3070 movaps %xmm0,%xmm3 3071 shufps $68,%xmm0,%xmm5 3072 movups %xmm5,(%edx) 3073 shufps $78,%xmm2,%xmm3 3074 movups %xmm3,16(%edx) 3075 leal 32(%edx),%edx 3076 jmp L126key_192b_warm 3077.align 4,0x90 3078L12212rounds_alt: 3079 movdqa 16(%ebx),%xmm5 3080 movdqa 32(%ebx),%xmm4 3081 movl $8,%ecx 3082 movdqu %xmm0,-16(%edx) 3083L127loop_key192: 3084 movq %xmm2,(%edx) 3085 movdqa %xmm2,%xmm1 3086.byte 102,15,56,0,213 3087.byte 102,15,56,221,212 3088 pslld $1,%xmm4 3089 leal 24(%edx),%edx 3090 movdqa %xmm0,%xmm3 3091 pslldq $4,%xmm0 3092 pxor %xmm0,%xmm3 3093 pslldq $4,%xmm0 3094 pxor %xmm0,%xmm3 3095 pslldq $4,%xmm0 3096 pxor %xmm3,%xmm0 3097 pshufd $255,%xmm0,%xmm3 3098 pxor %xmm1,%xmm3 3099 pslldq $4,%xmm1 3100 pxor %xmm1,%xmm3 3101 pxor %xmm2,%xmm0 3102 pxor %xmm3,%xmm2 3103 movdqu %xmm0,-16(%edx) 3104 decl %ecx 3105 jnz L127loop_key192 3106 movl $11,%ecx 3107 movl %ecx,32(%edx) 3108 jmp L120good_key 3109.align 4,0x90 3110L11314rounds: 3111 movups 16(%eax),%xmm2 3112 leal 16(%edx),%edx 3113 cmpl $268435456,%ebp 3114 je L12814rounds_alt 3115 movl $13,%ecx 3116 movups %xmm0,-32(%edx) 3117 movups %xmm2,-16(%edx) 3118.byte 102,15,58,223,202,1 3119 call L129key_256a_cold 3120.byte 102,15,58,223,200,1 3121 call L130key_256b 3122.byte 102,15,58,223,202,2 3123 call L131key_256a 3124.byte 102,15,58,223,200,2 3125 call L130key_256b 3126.byte 102,15,58,223,202,4 3127 call L131key_256a 3128.byte 102,15,58,223,200,4 3129 call L130key_256b 3130.byte 102,15,58,223,202,8 3131 call L131key_256a 3132.byte 102,15,58,223,200,8 3133 call L130key_256b 3134.byte 102,15,58,223,202,16 3135 call L131key_256a 3136.byte 102,15,58,223,200,16 3137 call L130key_256b 3138.byte 102,15,58,223,202,32 3139 call L131key_256a 3140.byte 102,15,58,223,200,32 3141 call L130key_256b 3142.byte 102,15,58,223,202,64 3143 call L131key_256a 3144 movups %xmm0,(%edx) 3145 movl %ecx,16(%edx) 3146 xorl %eax,%eax 3147 jmp L120good_key 3148.align 4,0x90 3149L131key_256a: 3150 movups %xmm2,(%edx) 3151 leal 16(%edx),%edx 3152L129key_256a_cold: 3153 shufps $16,%xmm0,%xmm4 3154 xorps %xmm4,%xmm0 3155 shufps $140,%xmm0,%xmm4 3156 xorps %xmm4,%xmm0 3157 shufps $255,%xmm1,%xmm1 3158 xorps %xmm1,%xmm0 3159 ret 3160.align 4,0x90 3161L130key_256b: 3162 movups %xmm0,(%edx) 3163 leal 16(%edx),%edx 3164 shufps $16,%xmm2,%xmm4 3165 xorps %xmm4,%xmm2 3166 shufps $140,%xmm2,%xmm4 3167 xorps %xmm4,%xmm2 3168 shufps $170,%xmm1,%xmm1 3169 xorps %xmm1,%xmm2 3170 ret 3171.align 4,0x90 3172L12814rounds_alt: 3173 movdqa (%ebx),%xmm5 3174 movdqa 32(%ebx),%xmm4 3175 movl $7,%ecx 3176 movdqu %xmm0,-32(%edx) 3177 movdqa %xmm2,%xmm1 3178 movdqu %xmm2,-16(%edx) 3179L132loop_key256: 3180.byte 102,15,56,0,213 3181.byte 102,15,56,221,212 3182 movdqa %xmm0,%xmm3 3183 pslldq $4,%xmm0 3184 pxor %xmm0,%xmm3 3185 pslldq $4,%xmm0 3186 pxor %xmm0,%xmm3 3187 pslldq $4,%xmm0 3188 pxor %xmm3,%xmm0 3189 pslld $1,%xmm4 3190 pxor %xmm2,%xmm0 3191 movdqu %xmm0,(%edx) 3192 decl %ecx 3193 jz L133done_key256 3194 pshufd $255,%xmm0,%xmm2 3195 pxor %xmm3,%xmm3 3196.byte 102,15,56,221,211 3197 movdqa %xmm1,%xmm3 3198 pslldq $4,%xmm1 3199 pxor %xmm1,%xmm3 3200 pslldq $4,%xmm1 3201 pxor %xmm1,%xmm3 3202 pslldq $4,%xmm1 3203 pxor %xmm3,%xmm1 3204 pxor %xmm1,%xmm2 3205 movdqu %xmm2,16(%edx) 3206 leal 32(%edx),%edx 3207 movdqa %xmm2,%xmm1 3208 jmp L132loop_key256 3209L133done_key256: 3210 movl $13,%ecx 3211 movl %ecx,16(%edx) 3212L120good_key: 3213 pxor %xmm0,%xmm0 3214 pxor %xmm1,%xmm1 3215 pxor %xmm2,%xmm2 3216 pxor %xmm3,%xmm3 3217 pxor %xmm4,%xmm4 3218 pxor %xmm5,%xmm5 3219 xorl %eax,%eax 3220 popl %ebx 3221 popl %ebp 3222 ret 3223.align 2,0x90 3224L111bad_pointer: 3225 movl $-1,%eax 3226 popl %ebx 3227 popl %ebp 3228 ret 3229.align 2,0x90 3230L115bad_keybits: 3231 pxor %xmm0,%xmm0 3232 movl $-2,%eax 3233 popl %ebx 3234 popl %ebp 3235 ret 3236.globl _aesni_set_encrypt_key 3237.align 4 3238_aesni_set_encrypt_key: 3239L_aesni_set_encrypt_key_begin: 3240 %ifdef __CET__ 3241 3242.byte 243,15,30,251 3243 %endif 3244 3245 movl 4(%esp),%eax 3246 movl 8(%esp),%ecx 3247 movl 12(%esp),%edx 3248 call __aesni_set_encrypt_key 3249 ret 3250.globl _aesni_set_decrypt_key 3251.align 4 3252_aesni_set_decrypt_key: 3253L_aesni_set_decrypt_key_begin: 3254 %ifdef __CET__ 3255 3256.byte 243,15,30,251 3257 %endif 3258 3259 movl 4(%esp),%eax 3260 movl 8(%esp),%ecx 3261 movl 12(%esp),%edx 3262 call __aesni_set_encrypt_key 3263 movl 12(%esp),%edx 3264 shll $4,%ecx 3265 testl %eax,%eax 3266 jnz L134dec_key_ret 3267 leal 16(%edx,%ecx,1),%eax 3268 movups (%edx),%xmm0 3269 movups (%eax),%xmm1 3270 movups %xmm0,(%eax) 3271 movups %xmm1,(%edx) 3272 leal 16(%edx),%edx 3273 leal -16(%eax),%eax 3274L135dec_key_inverse: 3275 movups (%edx),%xmm0 3276 movups (%eax),%xmm1 3277.byte 102,15,56,219,192 3278.byte 102,15,56,219,201 3279 leal 16(%edx),%edx 3280 leal -16(%eax),%eax 3281 movups %xmm0,16(%eax) 3282 movups %xmm1,-16(%edx) 3283 cmpl %edx,%eax 3284 ja L135dec_key_inverse 3285 movups (%edx),%xmm0 3286.byte 102,15,56,219,192 3287 movups %xmm0,(%edx) 3288 pxor %xmm0,%xmm0 3289 pxor %xmm1,%xmm1 3290 xorl %eax,%eax 3291L134dec_key_ret: 3292 ret 3293.align 6,0x90 3294Lkey_const: 3295.long 202313229,202313229,202313229,202313229 3296.long 67569157,67569157,67569157,67569157 3297.long 1,1,1,1 3298.long 27,27,27,27 3299.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 3300.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 3301.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 3302.byte 115,108,46,111,114,103,62,0 3303.section __IMPORT,__pointers,non_lazy_symbol_pointers 3304L_OPENSSL_ia32cap_P$non_lazy_ptr: 3305.indirect_symbol _OPENSSL_ia32cap_P 3306.long 0 3307.comm _OPENSSL_ia32cap_P,16,2 3308