1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13.extern OPENSSL_ia32cap_P 14.hidden OPENSSL_ia32cap_P 15.globl gcm_init_clmul 16.hidden gcm_init_clmul 17.type gcm_init_clmul,@function 18.align 16 19gcm_init_clmul: 20.cfi_startproc 21.L_init_clmul: 22 movdqu (%rsi),%xmm2 23 pshufd $78,%xmm2,%xmm2 24 25 26 pshufd $255,%xmm2,%xmm4 27 movdqa %xmm2,%xmm3 28 psllq $1,%xmm2 29 pxor %xmm5,%xmm5 30 psrlq $63,%xmm3 31 pcmpgtd %xmm4,%xmm5 32 pslldq $8,%xmm3 33 por %xmm3,%xmm2 34 35 36 pand .L0x1c2_polynomial(%rip),%xmm5 37 pxor %xmm5,%xmm2 38 39 40 pshufd $78,%xmm2,%xmm6 41 movdqa %xmm2,%xmm0 42 pxor %xmm2,%xmm6 43 movdqa %xmm0,%xmm1 44 pshufd $78,%xmm0,%xmm3 45 pxor %xmm0,%xmm3 46.byte 102,15,58,68,194,0 47.byte 102,15,58,68,202,17 48.byte 102,15,58,68,222,0 49 pxor %xmm0,%xmm3 50 pxor %xmm1,%xmm3 51 52 movdqa %xmm3,%xmm4 53 psrldq $8,%xmm3 54 pslldq $8,%xmm4 55 pxor %xmm3,%xmm1 56 pxor %xmm4,%xmm0 57 58 movdqa %xmm0,%xmm4 59 movdqa %xmm0,%xmm3 60 psllq $5,%xmm0 61 pxor %xmm0,%xmm3 62 psllq $1,%xmm0 63 pxor %xmm3,%xmm0 64 psllq $57,%xmm0 65 movdqa %xmm0,%xmm3 66 pslldq $8,%xmm0 67 psrldq $8,%xmm3 68 pxor %xmm4,%xmm0 69 pxor %xmm3,%xmm1 70 71 72 movdqa %xmm0,%xmm4 73 psrlq $1,%xmm0 74 pxor %xmm4,%xmm1 75 pxor %xmm0,%xmm4 76 psrlq $5,%xmm0 77 pxor %xmm4,%xmm0 78 psrlq $1,%xmm0 79 pxor %xmm1,%xmm0 80 pshufd $78,%xmm2,%xmm3 81 pshufd $78,%xmm0,%xmm4 82 pxor %xmm2,%xmm3 83 movdqu %xmm2,0(%rdi) 84 pxor %xmm0,%xmm4 85 movdqu %xmm0,16(%rdi) 86.byte 102,15,58,15,227,8 87 movdqu %xmm4,32(%rdi) 88 movdqa %xmm0,%xmm1 89 pshufd $78,%xmm0,%xmm3 90 pxor %xmm0,%xmm3 91.byte 102,15,58,68,194,0 92.byte 102,15,58,68,202,17 93.byte 102,15,58,68,222,0 94 pxor %xmm0,%xmm3 95 pxor %xmm1,%xmm3 96 97 movdqa %xmm3,%xmm4 98 psrldq $8,%xmm3 99 pslldq $8,%xmm4 100 pxor %xmm3,%xmm1 101 pxor %xmm4,%xmm0 102 103 movdqa %xmm0,%xmm4 104 movdqa %xmm0,%xmm3 105 psllq $5,%xmm0 106 pxor %xmm0,%xmm3 107 psllq $1,%xmm0 108 pxor %xmm3,%xmm0 109 psllq $57,%xmm0 110 movdqa %xmm0,%xmm3 111 pslldq $8,%xmm0 112 psrldq $8,%xmm3 113 pxor %xmm4,%xmm0 114 pxor %xmm3,%xmm1 115 116 117 movdqa %xmm0,%xmm4 118 psrlq $1,%xmm0 119 pxor %xmm4,%xmm1 120 pxor %xmm0,%xmm4 121 psrlq $5,%xmm0 122 pxor %xmm4,%xmm0 123 psrlq $1,%xmm0 124 pxor %xmm1,%xmm0 125 movdqa %xmm0,%xmm5 126 movdqa %xmm0,%xmm1 127 pshufd $78,%xmm0,%xmm3 128 pxor %xmm0,%xmm3 129.byte 102,15,58,68,194,0 130.byte 102,15,58,68,202,17 131.byte 102,15,58,68,222,0 132 pxor %xmm0,%xmm3 133 pxor %xmm1,%xmm3 134 135 movdqa %xmm3,%xmm4 136 psrldq $8,%xmm3 137 pslldq $8,%xmm4 138 pxor %xmm3,%xmm1 139 pxor %xmm4,%xmm0 140 141 movdqa %xmm0,%xmm4 142 movdqa %xmm0,%xmm3 143 psllq $5,%xmm0 144 pxor %xmm0,%xmm3 145 psllq $1,%xmm0 146 pxor %xmm3,%xmm0 147 psllq $57,%xmm0 148 movdqa %xmm0,%xmm3 149 pslldq $8,%xmm0 150 psrldq $8,%xmm3 151 pxor %xmm4,%xmm0 152 pxor %xmm3,%xmm1 153 154 155 movdqa %xmm0,%xmm4 156 psrlq $1,%xmm0 157 pxor %xmm4,%xmm1 158 pxor %xmm0,%xmm4 159 psrlq $5,%xmm0 160 pxor %xmm4,%xmm0 161 psrlq $1,%xmm0 162 pxor %xmm1,%xmm0 163 pshufd $78,%xmm5,%xmm3 164 pshufd $78,%xmm0,%xmm4 165 pxor %xmm5,%xmm3 166 movdqu %xmm5,48(%rdi) 167 pxor %xmm0,%xmm4 168 movdqu %xmm0,64(%rdi) 169.byte 102,15,58,15,227,8 170 movdqu %xmm4,80(%rdi) 171 .byte 0xf3,0xc3 172.cfi_endproc 173.size gcm_init_clmul,.-gcm_init_clmul 174.globl gcm_gmult_clmul 175.hidden gcm_gmult_clmul 176.type gcm_gmult_clmul,@function 177.align 16 178gcm_gmult_clmul: 179.cfi_startproc 180.L_gmult_clmul: 181 movdqu (%rdi),%xmm0 182 movdqa .Lbswap_mask(%rip),%xmm5 183 movdqu (%rsi),%xmm2 184 movdqu 32(%rsi),%xmm4 185.byte 102,15,56,0,197 186 movdqa %xmm0,%xmm1 187 pshufd $78,%xmm0,%xmm3 188 pxor %xmm0,%xmm3 189.byte 102,15,58,68,194,0 190.byte 102,15,58,68,202,17 191.byte 102,15,58,68,220,0 192 pxor %xmm0,%xmm3 193 pxor %xmm1,%xmm3 194 195 movdqa %xmm3,%xmm4 196 psrldq $8,%xmm3 197 pslldq $8,%xmm4 198 pxor %xmm3,%xmm1 199 pxor %xmm4,%xmm0 200 201 movdqa %xmm0,%xmm4 202 movdqa %xmm0,%xmm3 203 psllq $5,%xmm0 204 pxor %xmm0,%xmm3 205 psllq $1,%xmm0 206 pxor %xmm3,%xmm0 207 psllq $57,%xmm0 208 movdqa %xmm0,%xmm3 209 pslldq $8,%xmm0 210 psrldq $8,%xmm3 211 pxor %xmm4,%xmm0 212 pxor %xmm3,%xmm1 213 214 215 movdqa %xmm0,%xmm4 216 psrlq $1,%xmm0 217 pxor %xmm4,%xmm1 218 pxor %xmm0,%xmm4 219 psrlq $5,%xmm0 220 pxor %xmm4,%xmm0 221 psrlq $1,%xmm0 222 pxor %xmm1,%xmm0 223.byte 102,15,56,0,197 224 movdqu %xmm0,(%rdi) 225 .byte 0xf3,0xc3 226.cfi_endproc 227.size gcm_gmult_clmul,.-gcm_gmult_clmul 228.globl gcm_ghash_clmul 229.hidden gcm_ghash_clmul 230.type gcm_ghash_clmul,@function 231.align 32 232gcm_ghash_clmul: 233.cfi_startproc 234.L_ghash_clmul: 235 movdqa .Lbswap_mask(%rip),%xmm10 236 237 movdqu (%rdi),%xmm0 238 movdqu (%rsi),%xmm2 239 movdqu 32(%rsi),%xmm7 240.byte 102,65,15,56,0,194 241 242 subq $0x10,%rcx 243 jz .Lodd_tail 244 245 movdqu 16(%rsi),%xmm6 246 leaq OPENSSL_ia32cap_P(%rip),%rax 247 movl 4(%rax),%eax 248 cmpq $0x30,%rcx 249 jb .Lskip4x 250 251 andl $71303168,%eax 252 cmpl $4194304,%eax 253 je .Lskip4x 254 255 subq $0x30,%rcx 256 movq $0xA040608020C0E000,%rax 257 movdqu 48(%rsi),%xmm14 258 movdqu 64(%rsi),%xmm15 259 260 261 262 263 movdqu 48(%rdx),%xmm3 264 movdqu 32(%rdx),%xmm11 265.byte 102,65,15,56,0,218 266.byte 102,69,15,56,0,218 267 movdqa %xmm3,%xmm5 268 pshufd $78,%xmm3,%xmm4 269 pxor %xmm3,%xmm4 270.byte 102,15,58,68,218,0 271.byte 102,15,58,68,234,17 272.byte 102,15,58,68,231,0 273 274 movdqa %xmm11,%xmm13 275 pshufd $78,%xmm11,%xmm12 276 pxor %xmm11,%xmm12 277.byte 102,68,15,58,68,222,0 278.byte 102,68,15,58,68,238,17 279.byte 102,68,15,58,68,231,16 280 xorps %xmm11,%xmm3 281 xorps %xmm13,%xmm5 282 movups 80(%rsi),%xmm7 283 xorps %xmm12,%xmm4 284 285 movdqu 16(%rdx),%xmm11 286 movdqu 0(%rdx),%xmm8 287.byte 102,69,15,56,0,218 288.byte 102,69,15,56,0,194 289 movdqa %xmm11,%xmm13 290 pshufd $78,%xmm11,%xmm12 291 pxor %xmm8,%xmm0 292 pxor %xmm11,%xmm12 293.byte 102,69,15,58,68,222,0 294 movdqa %xmm0,%xmm1 295 pshufd $78,%xmm0,%xmm8 296 pxor %xmm0,%xmm8 297.byte 102,69,15,58,68,238,17 298.byte 102,68,15,58,68,231,0 299 xorps %xmm11,%xmm3 300 xorps %xmm13,%xmm5 301 302 leaq 64(%rdx),%rdx 303 subq $0x40,%rcx 304 jc .Ltail4x 305 306 jmp .Lmod4_loop 307.align 32 308.Lmod4_loop: 309.byte 102,65,15,58,68,199,0 310 xorps %xmm12,%xmm4 311 movdqu 48(%rdx),%xmm11 312.byte 102,69,15,56,0,218 313.byte 102,65,15,58,68,207,17 314 xorps %xmm3,%xmm0 315 movdqu 32(%rdx),%xmm3 316 movdqa %xmm11,%xmm13 317.byte 102,68,15,58,68,199,16 318 pshufd $78,%xmm11,%xmm12 319 xorps %xmm5,%xmm1 320 pxor %xmm11,%xmm12 321.byte 102,65,15,56,0,218 322 movups 32(%rsi),%xmm7 323 xorps %xmm4,%xmm8 324.byte 102,68,15,58,68,218,0 325 pshufd $78,%xmm3,%xmm4 326 327 pxor %xmm0,%xmm8 328 movdqa %xmm3,%xmm5 329 pxor %xmm1,%xmm8 330 pxor %xmm3,%xmm4 331 movdqa %xmm8,%xmm9 332.byte 102,68,15,58,68,234,17 333 pslldq $8,%xmm8 334 psrldq $8,%xmm9 335 pxor %xmm8,%xmm0 336 movdqa .L7_mask(%rip),%xmm8 337 pxor %xmm9,%xmm1 338.byte 102,76,15,110,200 339 340 pand %xmm0,%xmm8 341.byte 102,69,15,56,0,200 342 pxor %xmm0,%xmm9 343.byte 102,68,15,58,68,231,0 344 psllq $57,%xmm9 345 movdqa %xmm9,%xmm8 346 pslldq $8,%xmm9 347.byte 102,15,58,68,222,0 348 psrldq $8,%xmm8 349 pxor %xmm9,%xmm0 350 pxor %xmm8,%xmm1 351 movdqu 0(%rdx),%xmm8 352 353 movdqa %xmm0,%xmm9 354 psrlq $1,%xmm0 355.byte 102,15,58,68,238,17 356 xorps %xmm11,%xmm3 357 movdqu 16(%rdx),%xmm11 358.byte 102,69,15,56,0,218 359.byte 102,15,58,68,231,16 360 xorps %xmm13,%xmm5 361 movups 80(%rsi),%xmm7 362.byte 102,69,15,56,0,194 363 pxor %xmm9,%xmm1 364 pxor %xmm0,%xmm9 365 psrlq $5,%xmm0 366 367 movdqa %xmm11,%xmm13 368 pxor %xmm12,%xmm4 369 pshufd $78,%xmm11,%xmm12 370 pxor %xmm9,%xmm0 371 pxor %xmm8,%xmm1 372 pxor %xmm11,%xmm12 373.byte 102,69,15,58,68,222,0 374 psrlq $1,%xmm0 375 pxor %xmm1,%xmm0 376 movdqa %xmm0,%xmm1 377.byte 102,69,15,58,68,238,17 378 xorps %xmm11,%xmm3 379 pshufd $78,%xmm0,%xmm8 380 pxor %xmm0,%xmm8 381 382.byte 102,68,15,58,68,231,0 383 xorps %xmm13,%xmm5 384 385 leaq 64(%rdx),%rdx 386 subq $0x40,%rcx 387 jnc .Lmod4_loop 388 389.Ltail4x: 390.byte 102,65,15,58,68,199,0 391.byte 102,65,15,58,68,207,17 392.byte 102,68,15,58,68,199,16 393 xorps %xmm12,%xmm4 394 xorps %xmm3,%xmm0 395 xorps %xmm5,%xmm1 396 pxor %xmm0,%xmm1 397 pxor %xmm4,%xmm8 398 399 pxor %xmm1,%xmm8 400 pxor %xmm0,%xmm1 401 402 movdqa %xmm8,%xmm9 403 psrldq $8,%xmm8 404 pslldq $8,%xmm9 405 pxor %xmm8,%xmm1 406 pxor %xmm9,%xmm0 407 408 movdqa %xmm0,%xmm4 409 movdqa %xmm0,%xmm3 410 psllq $5,%xmm0 411 pxor %xmm0,%xmm3 412 psllq $1,%xmm0 413 pxor %xmm3,%xmm0 414 psllq $57,%xmm0 415 movdqa %xmm0,%xmm3 416 pslldq $8,%xmm0 417 psrldq $8,%xmm3 418 pxor %xmm4,%xmm0 419 pxor %xmm3,%xmm1 420 421 422 movdqa %xmm0,%xmm4 423 psrlq $1,%xmm0 424 pxor %xmm4,%xmm1 425 pxor %xmm0,%xmm4 426 psrlq $5,%xmm0 427 pxor %xmm4,%xmm0 428 psrlq $1,%xmm0 429 pxor %xmm1,%xmm0 430 addq $0x40,%rcx 431 jz .Ldone 432 movdqu 32(%rsi),%xmm7 433 subq $0x10,%rcx 434 jz .Lodd_tail 435.Lskip4x: 436 437 438 439 440 441 movdqu (%rdx),%xmm8 442 movdqu 16(%rdx),%xmm3 443.byte 102,69,15,56,0,194 444.byte 102,65,15,56,0,218 445 pxor %xmm8,%xmm0 446 447 movdqa %xmm3,%xmm5 448 pshufd $78,%xmm3,%xmm4 449 pxor %xmm3,%xmm4 450.byte 102,15,58,68,218,0 451.byte 102,15,58,68,234,17 452.byte 102,15,58,68,231,0 453 454 leaq 32(%rdx),%rdx 455 nop 456 subq $0x20,%rcx 457 jbe .Leven_tail 458 nop 459 jmp .Lmod_loop 460 461.align 32 462.Lmod_loop: 463 movdqa %xmm0,%xmm1 464 movdqa %xmm4,%xmm8 465 pshufd $78,%xmm0,%xmm4 466 pxor %xmm0,%xmm4 467 468.byte 102,15,58,68,198,0 469.byte 102,15,58,68,206,17 470.byte 102,15,58,68,231,16 471 472 pxor %xmm3,%xmm0 473 pxor %xmm5,%xmm1 474 movdqu (%rdx),%xmm9 475 pxor %xmm0,%xmm8 476.byte 102,69,15,56,0,202 477 movdqu 16(%rdx),%xmm3 478 479 pxor %xmm1,%xmm8 480 pxor %xmm9,%xmm1 481 pxor %xmm8,%xmm4 482.byte 102,65,15,56,0,218 483 movdqa %xmm4,%xmm8 484 psrldq $8,%xmm8 485 pslldq $8,%xmm4 486 pxor %xmm8,%xmm1 487 pxor %xmm4,%xmm0 488 489 movdqa %xmm3,%xmm5 490 491 movdqa %xmm0,%xmm9 492 movdqa %xmm0,%xmm8 493 psllq $5,%xmm0 494 pxor %xmm0,%xmm8 495.byte 102,15,58,68,218,0 496 psllq $1,%xmm0 497 pxor %xmm8,%xmm0 498 psllq $57,%xmm0 499 movdqa %xmm0,%xmm8 500 pslldq $8,%xmm0 501 psrldq $8,%xmm8 502 pxor %xmm9,%xmm0 503 pshufd $78,%xmm5,%xmm4 504 pxor %xmm8,%xmm1 505 pxor %xmm5,%xmm4 506 507 movdqa %xmm0,%xmm9 508 psrlq $1,%xmm0 509.byte 102,15,58,68,234,17 510 pxor %xmm9,%xmm1 511 pxor %xmm0,%xmm9 512 psrlq $5,%xmm0 513 pxor %xmm9,%xmm0 514 leaq 32(%rdx),%rdx 515 psrlq $1,%xmm0 516.byte 102,15,58,68,231,0 517 pxor %xmm1,%xmm0 518 519 subq $0x20,%rcx 520 ja .Lmod_loop 521 522.Leven_tail: 523 movdqa %xmm0,%xmm1 524 movdqa %xmm4,%xmm8 525 pshufd $78,%xmm0,%xmm4 526 pxor %xmm0,%xmm4 527 528.byte 102,15,58,68,198,0 529.byte 102,15,58,68,206,17 530.byte 102,15,58,68,231,16 531 532 pxor %xmm3,%xmm0 533 pxor %xmm5,%xmm1 534 pxor %xmm0,%xmm8 535 pxor %xmm1,%xmm8 536 pxor %xmm8,%xmm4 537 movdqa %xmm4,%xmm8 538 psrldq $8,%xmm8 539 pslldq $8,%xmm4 540 pxor %xmm8,%xmm1 541 pxor %xmm4,%xmm0 542 543 movdqa %xmm0,%xmm4 544 movdqa %xmm0,%xmm3 545 psllq $5,%xmm0 546 pxor %xmm0,%xmm3 547 psllq $1,%xmm0 548 pxor %xmm3,%xmm0 549 psllq $57,%xmm0 550 movdqa %xmm0,%xmm3 551 pslldq $8,%xmm0 552 psrldq $8,%xmm3 553 pxor %xmm4,%xmm0 554 pxor %xmm3,%xmm1 555 556 557 movdqa %xmm0,%xmm4 558 psrlq $1,%xmm0 559 pxor %xmm4,%xmm1 560 pxor %xmm0,%xmm4 561 psrlq $5,%xmm0 562 pxor %xmm4,%xmm0 563 psrlq $1,%xmm0 564 pxor %xmm1,%xmm0 565 testq %rcx,%rcx 566 jnz .Ldone 567 568.Lodd_tail: 569 movdqu (%rdx),%xmm8 570.byte 102,69,15,56,0,194 571 pxor %xmm8,%xmm0 572 movdqa %xmm0,%xmm1 573 pshufd $78,%xmm0,%xmm3 574 pxor %xmm0,%xmm3 575.byte 102,15,58,68,194,0 576.byte 102,15,58,68,202,17 577.byte 102,15,58,68,223,0 578 pxor %xmm0,%xmm3 579 pxor %xmm1,%xmm3 580 581 movdqa %xmm3,%xmm4 582 psrldq $8,%xmm3 583 pslldq $8,%xmm4 584 pxor %xmm3,%xmm1 585 pxor %xmm4,%xmm0 586 587 movdqa %xmm0,%xmm4 588 movdqa %xmm0,%xmm3 589 psllq $5,%xmm0 590 pxor %xmm0,%xmm3 591 psllq $1,%xmm0 592 pxor %xmm3,%xmm0 593 psllq $57,%xmm0 594 movdqa %xmm0,%xmm3 595 pslldq $8,%xmm0 596 psrldq $8,%xmm3 597 pxor %xmm4,%xmm0 598 pxor %xmm3,%xmm1 599 600 601 movdqa %xmm0,%xmm4 602 psrlq $1,%xmm0 603 pxor %xmm4,%xmm1 604 pxor %xmm0,%xmm4 605 psrlq $5,%xmm0 606 pxor %xmm4,%xmm0 607 psrlq $1,%xmm0 608 pxor %xmm1,%xmm0 609.Ldone: 610.byte 102,65,15,56,0,194 611 movdqu %xmm0,(%rdi) 612 .byte 0xf3,0xc3 613.cfi_endproc 614.size gcm_ghash_clmul,.-gcm_ghash_clmul 615.globl gcm_init_avx 616.hidden gcm_init_avx 617.type gcm_init_avx,@function 618.align 32 619gcm_init_avx: 620.cfi_startproc 621 vzeroupper 622 623 vmovdqu (%rsi),%xmm2 624 vpshufd $78,%xmm2,%xmm2 625 626 627 vpshufd $255,%xmm2,%xmm4 628 vpsrlq $63,%xmm2,%xmm3 629 vpsllq $1,%xmm2,%xmm2 630 vpxor %xmm5,%xmm5,%xmm5 631 vpcmpgtd %xmm4,%xmm5,%xmm5 632 vpslldq $8,%xmm3,%xmm3 633 vpor %xmm3,%xmm2,%xmm2 634 635 636 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 637 vpxor %xmm5,%xmm2,%xmm2 638 639 vpunpckhqdq %xmm2,%xmm2,%xmm6 640 vmovdqa %xmm2,%xmm0 641 vpxor %xmm2,%xmm6,%xmm6 642 movq $4,%r10 643 jmp .Linit_start_avx 644.align 32 645.Linit_loop_avx: 646 vpalignr $8,%xmm3,%xmm4,%xmm5 647 vmovdqu %xmm5,-16(%rdi) 648 vpunpckhqdq %xmm0,%xmm0,%xmm3 649 vpxor %xmm0,%xmm3,%xmm3 650 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 651 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 652 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 653 vpxor %xmm0,%xmm1,%xmm4 654 vpxor %xmm4,%xmm3,%xmm3 655 656 vpslldq $8,%xmm3,%xmm4 657 vpsrldq $8,%xmm3,%xmm3 658 vpxor %xmm4,%xmm0,%xmm0 659 vpxor %xmm3,%xmm1,%xmm1 660 vpsllq $57,%xmm0,%xmm3 661 vpsllq $62,%xmm0,%xmm4 662 vpxor %xmm3,%xmm4,%xmm4 663 vpsllq $63,%xmm0,%xmm3 664 vpxor %xmm3,%xmm4,%xmm4 665 vpslldq $8,%xmm4,%xmm3 666 vpsrldq $8,%xmm4,%xmm4 667 vpxor %xmm3,%xmm0,%xmm0 668 vpxor %xmm4,%xmm1,%xmm1 669 670 vpsrlq $1,%xmm0,%xmm4 671 vpxor %xmm0,%xmm1,%xmm1 672 vpxor %xmm4,%xmm0,%xmm0 673 vpsrlq $5,%xmm4,%xmm4 674 vpxor %xmm4,%xmm0,%xmm0 675 vpsrlq $1,%xmm0,%xmm0 676 vpxor %xmm1,%xmm0,%xmm0 677.Linit_start_avx: 678 vmovdqa %xmm0,%xmm5 679 vpunpckhqdq %xmm0,%xmm0,%xmm3 680 vpxor %xmm0,%xmm3,%xmm3 681 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 682 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 683 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 684 vpxor %xmm0,%xmm1,%xmm4 685 vpxor %xmm4,%xmm3,%xmm3 686 687 vpslldq $8,%xmm3,%xmm4 688 vpsrldq $8,%xmm3,%xmm3 689 vpxor %xmm4,%xmm0,%xmm0 690 vpxor %xmm3,%xmm1,%xmm1 691 vpsllq $57,%xmm0,%xmm3 692 vpsllq $62,%xmm0,%xmm4 693 vpxor %xmm3,%xmm4,%xmm4 694 vpsllq $63,%xmm0,%xmm3 695 vpxor %xmm3,%xmm4,%xmm4 696 vpslldq $8,%xmm4,%xmm3 697 vpsrldq $8,%xmm4,%xmm4 698 vpxor %xmm3,%xmm0,%xmm0 699 vpxor %xmm4,%xmm1,%xmm1 700 701 vpsrlq $1,%xmm0,%xmm4 702 vpxor %xmm0,%xmm1,%xmm1 703 vpxor %xmm4,%xmm0,%xmm0 704 vpsrlq $5,%xmm4,%xmm4 705 vpxor %xmm4,%xmm0,%xmm0 706 vpsrlq $1,%xmm0,%xmm0 707 vpxor %xmm1,%xmm0,%xmm0 708 vpshufd $78,%xmm5,%xmm3 709 vpshufd $78,%xmm0,%xmm4 710 vpxor %xmm5,%xmm3,%xmm3 711 vmovdqu %xmm5,0(%rdi) 712 vpxor %xmm0,%xmm4,%xmm4 713 vmovdqu %xmm0,16(%rdi) 714 leaq 48(%rdi),%rdi 715 subq $1,%r10 716 jnz .Linit_loop_avx 717 718 vpalignr $8,%xmm4,%xmm3,%xmm5 719 vmovdqu %xmm5,-16(%rdi) 720 721 vzeroupper 722 .byte 0xf3,0xc3 723.cfi_endproc 724.size gcm_init_avx,.-gcm_init_avx 725.globl gcm_ghash_avx 726.hidden gcm_ghash_avx 727.type gcm_ghash_avx,@function 728.align 32 729gcm_ghash_avx: 730.cfi_startproc 731 vzeroupper 732 733 vmovdqu (%rdi),%xmm10 734 leaq .L0x1c2_polynomial(%rip),%r10 735 leaq 64(%rsi),%rsi 736 vmovdqu .Lbswap_mask(%rip),%xmm13 737 vpshufb %xmm13,%xmm10,%xmm10 738 cmpq $0x80,%rcx 739 jb .Lshort_avx 740 subq $0x80,%rcx 741 742 vmovdqu 112(%rdx),%xmm14 743 vmovdqu 0-64(%rsi),%xmm6 744 vpshufb %xmm13,%xmm14,%xmm14 745 vmovdqu 32-64(%rsi),%xmm7 746 747 vpunpckhqdq %xmm14,%xmm14,%xmm9 748 vmovdqu 96(%rdx),%xmm15 749 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 750 vpxor %xmm14,%xmm9,%xmm9 751 vpshufb %xmm13,%xmm15,%xmm15 752 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 753 vmovdqu 16-64(%rsi),%xmm6 754 vpunpckhqdq %xmm15,%xmm15,%xmm8 755 vmovdqu 80(%rdx),%xmm14 756 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 757 vpxor %xmm15,%xmm8,%xmm8 758 759 vpshufb %xmm13,%xmm14,%xmm14 760 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 761 vpunpckhqdq %xmm14,%xmm14,%xmm9 762 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 763 vmovdqu 48-64(%rsi),%xmm6 764 vpxor %xmm14,%xmm9,%xmm9 765 vmovdqu 64(%rdx),%xmm15 766 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 767 vmovdqu 80-64(%rsi),%xmm7 768 769 vpshufb %xmm13,%xmm15,%xmm15 770 vpxor %xmm0,%xmm3,%xmm3 771 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 772 vpxor %xmm1,%xmm4,%xmm4 773 vpunpckhqdq %xmm15,%xmm15,%xmm8 774 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 775 vmovdqu 64-64(%rsi),%xmm6 776 vpxor %xmm2,%xmm5,%xmm5 777 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 778 vpxor %xmm15,%xmm8,%xmm8 779 780 vmovdqu 48(%rdx),%xmm14 781 vpxor %xmm3,%xmm0,%xmm0 782 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 783 vpxor %xmm4,%xmm1,%xmm1 784 vpshufb %xmm13,%xmm14,%xmm14 785 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 786 vmovdqu 96-64(%rsi),%xmm6 787 vpxor %xmm5,%xmm2,%xmm2 788 vpunpckhqdq %xmm14,%xmm14,%xmm9 789 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 790 vmovdqu 128-64(%rsi),%xmm7 791 vpxor %xmm14,%xmm9,%xmm9 792 793 vmovdqu 32(%rdx),%xmm15 794 vpxor %xmm0,%xmm3,%xmm3 795 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 796 vpxor %xmm1,%xmm4,%xmm4 797 vpshufb %xmm13,%xmm15,%xmm15 798 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 799 vmovdqu 112-64(%rsi),%xmm6 800 vpxor %xmm2,%xmm5,%xmm5 801 vpunpckhqdq %xmm15,%xmm15,%xmm8 802 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 803 vpxor %xmm15,%xmm8,%xmm8 804 805 vmovdqu 16(%rdx),%xmm14 806 vpxor %xmm3,%xmm0,%xmm0 807 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 808 vpxor %xmm4,%xmm1,%xmm1 809 vpshufb %xmm13,%xmm14,%xmm14 810 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 811 vmovdqu 144-64(%rsi),%xmm6 812 vpxor %xmm5,%xmm2,%xmm2 813 vpunpckhqdq %xmm14,%xmm14,%xmm9 814 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 815 vmovdqu 176-64(%rsi),%xmm7 816 vpxor %xmm14,%xmm9,%xmm9 817 818 vmovdqu (%rdx),%xmm15 819 vpxor %xmm0,%xmm3,%xmm3 820 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 821 vpxor %xmm1,%xmm4,%xmm4 822 vpshufb %xmm13,%xmm15,%xmm15 823 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 824 vmovdqu 160-64(%rsi),%xmm6 825 vpxor %xmm2,%xmm5,%xmm5 826 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 827 828 leaq 128(%rdx),%rdx 829 cmpq $0x80,%rcx 830 jb .Ltail_avx 831 832 vpxor %xmm10,%xmm15,%xmm15 833 subq $0x80,%rcx 834 jmp .Loop8x_avx 835 836.align 32 837.Loop8x_avx: 838 vpunpckhqdq %xmm15,%xmm15,%xmm8 839 vmovdqu 112(%rdx),%xmm14 840 vpxor %xmm0,%xmm3,%xmm3 841 vpxor %xmm15,%xmm8,%xmm8 842 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 843 vpshufb %xmm13,%xmm14,%xmm14 844 vpxor %xmm1,%xmm4,%xmm4 845 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 846 vmovdqu 0-64(%rsi),%xmm6 847 vpunpckhqdq %xmm14,%xmm14,%xmm9 848 vpxor %xmm2,%xmm5,%xmm5 849 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 850 vmovdqu 32-64(%rsi),%xmm7 851 vpxor %xmm14,%xmm9,%xmm9 852 853 vmovdqu 96(%rdx),%xmm15 854 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 855 vpxor %xmm3,%xmm10,%xmm10 856 vpshufb %xmm13,%xmm15,%xmm15 857 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 858 vxorps %xmm4,%xmm11,%xmm11 859 vmovdqu 16-64(%rsi),%xmm6 860 vpunpckhqdq %xmm15,%xmm15,%xmm8 861 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 862 vpxor %xmm5,%xmm12,%xmm12 863 vxorps %xmm15,%xmm8,%xmm8 864 865 vmovdqu 80(%rdx),%xmm14 866 vpxor %xmm10,%xmm12,%xmm12 867 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 868 vpxor %xmm11,%xmm12,%xmm12 869 vpslldq $8,%xmm12,%xmm9 870 vpxor %xmm0,%xmm3,%xmm3 871 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 872 vpsrldq $8,%xmm12,%xmm12 873 vpxor %xmm9,%xmm10,%xmm10 874 vmovdqu 48-64(%rsi),%xmm6 875 vpshufb %xmm13,%xmm14,%xmm14 876 vxorps %xmm12,%xmm11,%xmm11 877 vpxor %xmm1,%xmm4,%xmm4 878 vpunpckhqdq %xmm14,%xmm14,%xmm9 879 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 880 vmovdqu 80-64(%rsi),%xmm7 881 vpxor %xmm14,%xmm9,%xmm9 882 vpxor %xmm2,%xmm5,%xmm5 883 884 vmovdqu 64(%rdx),%xmm15 885 vpalignr $8,%xmm10,%xmm10,%xmm12 886 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 887 vpshufb %xmm13,%xmm15,%xmm15 888 vpxor %xmm3,%xmm0,%xmm0 889 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 890 vmovdqu 64-64(%rsi),%xmm6 891 vpunpckhqdq %xmm15,%xmm15,%xmm8 892 vpxor %xmm4,%xmm1,%xmm1 893 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 894 vxorps %xmm15,%xmm8,%xmm8 895 vpxor %xmm5,%xmm2,%xmm2 896 897 vmovdqu 48(%rdx),%xmm14 898 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 899 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 900 vpshufb %xmm13,%xmm14,%xmm14 901 vpxor %xmm0,%xmm3,%xmm3 902 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 903 vmovdqu 96-64(%rsi),%xmm6 904 vpunpckhqdq %xmm14,%xmm14,%xmm9 905 vpxor %xmm1,%xmm4,%xmm4 906 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 907 vmovdqu 128-64(%rsi),%xmm7 908 vpxor %xmm14,%xmm9,%xmm9 909 vpxor %xmm2,%xmm5,%xmm5 910 911 vmovdqu 32(%rdx),%xmm15 912 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 913 vpshufb %xmm13,%xmm15,%xmm15 914 vpxor %xmm3,%xmm0,%xmm0 915 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 916 vmovdqu 112-64(%rsi),%xmm6 917 vpunpckhqdq %xmm15,%xmm15,%xmm8 918 vpxor %xmm4,%xmm1,%xmm1 919 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 920 vpxor %xmm15,%xmm8,%xmm8 921 vpxor %xmm5,%xmm2,%xmm2 922 vxorps %xmm12,%xmm10,%xmm10 923 924 vmovdqu 16(%rdx),%xmm14 925 vpalignr $8,%xmm10,%xmm10,%xmm12 926 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 927 vpshufb %xmm13,%xmm14,%xmm14 928 vpxor %xmm0,%xmm3,%xmm3 929 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 930 vmovdqu 144-64(%rsi),%xmm6 931 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 932 vxorps %xmm11,%xmm12,%xmm12 933 vpunpckhqdq %xmm14,%xmm14,%xmm9 934 vpxor %xmm1,%xmm4,%xmm4 935 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 936 vmovdqu 176-64(%rsi),%xmm7 937 vpxor %xmm14,%xmm9,%xmm9 938 vpxor %xmm2,%xmm5,%xmm5 939 940 vmovdqu (%rdx),%xmm15 941 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 942 vpshufb %xmm13,%xmm15,%xmm15 943 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 944 vmovdqu 160-64(%rsi),%xmm6 945 vpxor %xmm12,%xmm15,%xmm15 946 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 947 vpxor %xmm10,%xmm15,%xmm15 948 949 leaq 128(%rdx),%rdx 950 subq $0x80,%rcx 951 jnc .Loop8x_avx 952 953 addq $0x80,%rcx 954 jmp .Ltail_no_xor_avx 955 956.align 32 957.Lshort_avx: 958 vmovdqu -16(%rdx,%rcx,1),%xmm14 959 leaq (%rdx,%rcx,1),%rdx 960 vmovdqu 0-64(%rsi),%xmm6 961 vmovdqu 32-64(%rsi),%xmm7 962 vpshufb %xmm13,%xmm14,%xmm15 963 964 vmovdqa %xmm0,%xmm3 965 vmovdqa %xmm1,%xmm4 966 vmovdqa %xmm2,%xmm5 967 subq $0x10,%rcx 968 jz .Ltail_avx 969 970 vpunpckhqdq %xmm15,%xmm15,%xmm8 971 vpxor %xmm0,%xmm3,%xmm3 972 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 973 vpxor %xmm15,%xmm8,%xmm8 974 vmovdqu -32(%rdx),%xmm14 975 vpxor %xmm1,%xmm4,%xmm4 976 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 977 vmovdqu 16-64(%rsi),%xmm6 978 vpshufb %xmm13,%xmm14,%xmm15 979 vpxor %xmm2,%xmm5,%xmm5 980 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 981 vpsrldq $8,%xmm7,%xmm7 982 subq $0x10,%rcx 983 jz .Ltail_avx 984 985 vpunpckhqdq %xmm15,%xmm15,%xmm8 986 vpxor %xmm0,%xmm3,%xmm3 987 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 988 vpxor %xmm15,%xmm8,%xmm8 989 vmovdqu -48(%rdx),%xmm14 990 vpxor %xmm1,%xmm4,%xmm4 991 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 992 vmovdqu 48-64(%rsi),%xmm6 993 vpshufb %xmm13,%xmm14,%xmm15 994 vpxor %xmm2,%xmm5,%xmm5 995 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 996 vmovdqu 80-64(%rsi),%xmm7 997 subq $0x10,%rcx 998 jz .Ltail_avx 999 1000 vpunpckhqdq %xmm15,%xmm15,%xmm8 1001 vpxor %xmm0,%xmm3,%xmm3 1002 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1003 vpxor %xmm15,%xmm8,%xmm8 1004 vmovdqu -64(%rdx),%xmm14 1005 vpxor %xmm1,%xmm4,%xmm4 1006 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1007 vmovdqu 64-64(%rsi),%xmm6 1008 vpshufb %xmm13,%xmm14,%xmm15 1009 vpxor %xmm2,%xmm5,%xmm5 1010 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1011 vpsrldq $8,%xmm7,%xmm7 1012 subq $0x10,%rcx 1013 jz .Ltail_avx 1014 1015 vpunpckhqdq %xmm15,%xmm15,%xmm8 1016 vpxor %xmm0,%xmm3,%xmm3 1017 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1018 vpxor %xmm15,%xmm8,%xmm8 1019 vmovdqu -80(%rdx),%xmm14 1020 vpxor %xmm1,%xmm4,%xmm4 1021 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1022 vmovdqu 96-64(%rsi),%xmm6 1023 vpshufb %xmm13,%xmm14,%xmm15 1024 vpxor %xmm2,%xmm5,%xmm5 1025 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1026 vmovdqu 128-64(%rsi),%xmm7 1027 subq $0x10,%rcx 1028 jz .Ltail_avx 1029 1030 vpunpckhqdq %xmm15,%xmm15,%xmm8 1031 vpxor %xmm0,%xmm3,%xmm3 1032 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1033 vpxor %xmm15,%xmm8,%xmm8 1034 vmovdqu -96(%rdx),%xmm14 1035 vpxor %xmm1,%xmm4,%xmm4 1036 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1037 vmovdqu 112-64(%rsi),%xmm6 1038 vpshufb %xmm13,%xmm14,%xmm15 1039 vpxor %xmm2,%xmm5,%xmm5 1040 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1041 vpsrldq $8,%xmm7,%xmm7 1042 subq $0x10,%rcx 1043 jz .Ltail_avx 1044 1045 vpunpckhqdq %xmm15,%xmm15,%xmm8 1046 vpxor %xmm0,%xmm3,%xmm3 1047 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1048 vpxor %xmm15,%xmm8,%xmm8 1049 vmovdqu -112(%rdx),%xmm14 1050 vpxor %xmm1,%xmm4,%xmm4 1051 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1052 vmovdqu 144-64(%rsi),%xmm6 1053 vpshufb %xmm13,%xmm14,%xmm15 1054 vpxor %xmm2,%xmm5,%xmm5 1055 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1056 vmovq 184-64(%rsi),%xmm7 1057 subq $0x10,%rcx 1058 jmp .Ltail_avx 1059 1060.align 32 1061.Ltail_avx: 1062 vpxor %xmm10,%xmm15,%xmm15 1063.Ltail_no_xor_avx: 1064 vpunpckhqdq %xmm15,%xmm15,%xmm8 1065 vpxor %xmm0,%xmm3,%xmm3 1066 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1067 vpxor %xmm15,%xmm8,%xmm8 1068 vpxor %xmm1,%xmm4,%xmm4 1069 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1070 vpxor %xmm2,%xmm5,%xmm5 1071 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1072 1073 vmovdqu (%r10),%xmm12 1074 1075 vpxor %xmm0,%xmm3,%xmm10 1076 vpxor %xmm1,%xmm4,%xmm11 1077 vpxor %xmm2,%xmm5,%xmm5 1078 1079 vpxor %xmm10,%xmm5,%xmm5 1080 vpxor %xmm11,%xmm5,%xmm5 1081 vpslldq $8,%xmm5,%xmm9 1082 vpsrldq $8,%xmm5,%xmm5 1083 vpxor %xmm9,%xmm10,%xmm10 1084 vpxor %xmm5,%xmm11,%xmm11 1085 1086 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1087 vpalignr $8,%xmm10,%xmm10,%xmm10 1088 vpxor %xmm9,%xmm10,%xmm10 1089 1090 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1091 vpalignr $8,%xmm10,%xmm10,%xmm10 1092 vpxor %xmm11,%xmm10,%xmm10 1093 vpxor %xmm9,%xmm10,%xmm10 1094 1095 cmpq $0,%rcx 1096 jne .Lshort_avx 1097 1098 vpshufb %xmm13,%xmm10,%xmm10 1099 vmovdqu %xmm10,(%rdi) 1100 vzeroupper 1101 .byte 0xf3,0xc3 1102.cfi_endproc 1103.size gcm_ghash_avx,.-gcm_ghash_avx 1104.align 64 1105.Lbswap_mask: 1106.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1107.L0x1c2_polynomial: 1108.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1109.L7_mask: 1110.long 7,0,7,0 1111.align 64 1112 1113.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1114.align 64 1115#endif 1116.section .note.GNU-stack,"",@progbits 1117