1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15.extern OPENSSL_ia32cap_P 16.hidden OPENSSL_ia32cap_P 17 18.globl gcm_gmult_4bit 19.hidden gcm_gmult_4bit 20.type gcm_gmult_4bit,@function 21.align 16 22gcm_gmult_4bit: 23.cfi_startproc 24 pushq %rbx 25.cfi_adjust_cfa_offset 8 26.cfi_offset %rbx,-16 27 pushq %rbp 28.cfi_adjust_cfa_offset 8 29.cfi_offset %rbp,-24 30 pushq %r12 31.cfi_adjust_cfa_offset 8 32.cfi_offset %r12,-32 33 pushq %r13 34.cfi_adjust_cfa_offset 8 35.cfi_offset %r13,-40 36 pushq %r14 37.cfi_adjust_cfa_offset 8 38.cfi_offset %r14,-48 39 pushq %r15 40.cfi_adjust_cfa_offset 8 41.cfi_offset %r15,-56 42 subq $280,%rsp 43.cfi_adjust_cfa_offset 280 44.Lgmult_prologue: 45 46 movzbq 15(%rdi),%r8 47 leaq .Lrem_4bit(%rip),%r11 48 xorq %rax,%rax 49 xorq %rbx,%rbx 50 movb %r8b,%al 51 movb %r8b,%bl 52 shlb $4,%al 53 movq $14,%rcx 54 movq 8(%rsi,%rax,1),%r8 55 movq (%rsi,%rax,1),%r9 56 andb $0xf0,%bl 57 movq %r8,%rdx 58 jmp .Loop1 59 60.align 16 61.Loop1: 62 shrq $4,%r8 63 andq $0xf,%rdx 64 movq %r9,%r10 65 movb (%rdi,%rcx,1),%al 66 shrq $4,%r9 67 xorq 8(%rsi,%rbx,1),%r8 68 shlq $60,%r10 69 xorq (%rsi,%rbx,1),%r9 70 movb %al,%bl 71 xorq (%r11,%rdx,8),%r9 72 movq %r8,%rdx 73 shlb $4,%al 74 xorq %r10,%r8 75 decq %rcx 76 js .Lbreak1 77 78 shrq $4,%r8 79 andq $0xf,%rdx 80 movq %r9,%r10 81 shrq $4,%r9 82 xorq 8(%rsi,%rax,1),%r8 83 shlq $60,%r10 84 xorq (%rsi,%rax,1),%r9 85 andb $0xf0,%bl 86 xorq (%r11,%rdx,8),%r9 87 movq %r8,%rdx 88 xorq %r10,%r8 89 jmp .Loop1 90 91.align 16 92.Lbreak1: 93 shrq $4,%r8 94 andq $0xf,%rdx 95 movq %r9,%r10 96 shrq $4,%r9 97 xorq 8(%rsi,%rax,1),%r8 98 shlq $60,%r10 99 xorq (%rsi,%rax,1),%r9 100 andb $0xf0,%bl 101 xorq (%r11,%rdx,8),%r9 102 movq %r8,%rdx 103 xorq %r10,%r8 104 105 shrq $4,%r8 106 andq $0xf,%rdx 107 movq %r9,%r10 108 shrq $4,%r9 109 xorq 8(%rsi,%rbx,1),%r8 110 shlq $60,%r10 111 xorq (%rsi,%rbx,1),%r9 112 xorq %r10,%r8 113 xorq (%r11,%rdx,8),%r9 114 115 bswapq %r8 116 bswapq %r9 117 movq %r8,8(%rdi) 118 movq %r9,(%rdi) 119 120 leaq 280+48(%rsp),%rsi 121.cfi_def_cfa %rsi,8 122 movq -8(%rsi),%rbx 123.cfi_restore %rbx 124 leaq (%rsi),%rsp 125.cfi_def_cfa_register %rsp 126.Lgmult_epilogue: 127 .byte 0xf3,0xc3 128.cfi_endproc 129.size gcm_gmult_4bit,.-gcm_gmult_4bit 130.globl gcm_ghash_4bit 131.hidden gcm_ghash_4bit 132.type gcm_ghash_4bit,@function 133.align 16 134gcm_ghash_4bit: 135.cfi_startproc 136 pushq %rbx 137.cfi_adjust_cfa_offset 8 138.cfi_offset %rbx,-16 139 pushq %rbp 140.cfi_adjust_cfa_offset 8 141.cfi_offset %rbp,-24 142 pushq %r12 143.cfi_adjust_cfa_offset 8 144.cfi_offset %r12,-32 145 pushq %r13 146.cfi_adjust_cfa_offset 8 147.cfi_offset %r13,-40 148 pushq %r14 149.cfi_adjust_cfa_offset 8 150.cfi_offset %r14,-48 151 pushq %r15 152.cfi_adjust_cfa_offset 8 153.cfi_offset %r15,-56 154 subq $280,%rsp 155.cfi_adjust_cfa_offset 280 156.Lghash_prologue: 157 movq %rdx,%r14 158 movq %rcx,%r15 159 subq $-128,%rsi 160 leaq 16+128(%rsp),%rbp 161 xorl %edx,%edx 162 movq 0+0-128(%rsi),%r8 163 movq 0+8-128(%rsi),%rax 164 movb %al,%dl 165 shrq $4,%rax 166 movq %r8,%r10 167 shrq $4,%r8 168 movq 16+0-128(%rsi),%r9 169 shlb $4,%dl 170 movq 16+8-128(%rsi),%rbx 171 shlq $60,%r10 172 movb %dl,0(%rsp) 173 orq %r10,%rax 174 movb %bl,%dl 175 shrq $4,%rbx 176 movq %r9,%r10 177 shrq $4,%r9 178 movq %r8,0(%rbp) 179 movq 32+0-128(%rsi),%r8 180 shlb $4,%dl 181 movq %rax,0-128(%rbp) 182 movq 32+8-128(%rsi),%rax 183 shlq $60,%r10 184 movb %dl,1(%rsp) 185 orq %r10,%rbx 186 movb %al,%dl 187 shrq $4,%rax 188 movq %r8,%r10 189 shrq $4,%r8 190 movq %r9,8(%rbp) 191 movq 48+0-128(%rsi),%r9 192 shlb $4,%dl 193 movq %rbx,8-128(%rbp) 194 movq 48+8-128(%rsi),%rbx 195 shlq $60,%r10 196 movb %dl,2(%rsp) 197 orq %r10,%rax 198 movb %bl,%dl 199 shrq $4,%rbx 200 movq %r9,%r10 201 shrq $4,%r9 202 movq %r8,16(%rbp) 203 movq 64+0-128(%rsi),%r8 204 shlb $4,%dl 205 movq %rax,16-128(%rbp) 206 movq 64+8-128(%rsi),%rax 207 shlq $60,%r10 208 movb %dl,3(%rsp) 209 orq %r10,%rbx 210 movb %al,%dl 211 shrq $4,%rax 212 movq %r8,%r10 213 shrq $4,%r8 214 movq %r9,24(%rbp) 215 movq 80+0-128(%rsi),%r9 216 shlb $4,%dl 217 movq %rbx,24-128(%rbp) 218 movq 80+8-128(%rsi),%rbx 219 shlq $60,%r10 220 movb %dl,4(%rsp) 221 orq %r10,%rax 222 movb %bl,%dl 223 shrq $4,%rbx 224 movq %r9,%r10 225 shrq $4,%r9 226 movq %r8,32(%rbp) 227 movq 96+0-128(%rsi),%r8 228 shlb $4,%dl 229 movq %rax,32-128(%rbp) 230 movq 96+8-128(%rsi),%rax 231 shlq $60,%r10 232 movb %dl,5(%rsp) 233 orq %r10,%rbx 234 movb %al,%dl 235 shrq $4,%rax 236 movq %r8,%r10 237 shrq $4,%r8 238 movq %r9,40(%rbp) 239 movq 112+0-128(%rsi),%r9 240 shlb $4,%dl 241 movq %rbx,40-128(%rbp) 242 movq 112+8-128(%rsi),%rbx 243 shlq $60,%r10 244 movb %dl,6(%rsp) 245 orq %r10,%rax 246 movb %bl,%dl 247 shrq $4,%rbx 248 movq %r9,%r10 249 shrq $4,%r9 250 movq %r8,48(%rbp) 251 movq 128+0-128(%rsi),%r8 252 shlb $4,%dl 253 movq %rax,48-128(%rbp) 254 movq 128+8-128(%rsi),%rax 255 shlq $60,%r10 256 movb %dl,7(%rsp) 257 orq %r10,%rbx 258 movb %al,%dl 259 shrq $4,%rax 260 movq %r8,%r10 261 shrq $4,%r8 262 movq %r9,56(%rbp) 263 movq 144+0-128(%rsi),%r9 264 shlb $4,%dl 265 movq %rbx,56-128(%rbp) 266 movq 144+8-128(%rsi),%rbx 267 shlq $60,%r10 268 movb %dl,8(%rsp) 269 orq %r10,%rax 270 movb %bl,%dl 271 shrq $4,%rbx 272 movq %r9,%r10 273 shrq $4,%r9 274 movq %r8,64(%rbp) 275 movq 160+0-128(%rsi),%r8 276 shlb $4,%dl 277 movq %rax,64-128(%rbp) 278 movq 160+8-128(%rsi),%rax 279 shlq $60,%r10 280 movb %dl,9(%rsp) 281 orq %r10,%rbx 282 movb %al,%dl 283 shrq $4,%rax 284 movq %r8,%r10 285 shrq $4,%r8 286 movq %r9,72(%rbp) 287 movq 176+0-128(%rsi),%r9 288 shlb $4,%dl 289 movq %rbx,72-128(%rbp) 290 movq 176+8-128(%rsi),%rbx 291 shlq $60,%r10 292 movb %dl,10(%rsp) 293 orq %r10,%rax 294 movb %bl,%dl 295 shrq $4,%rbx 296 movq %r9,%r10 297 shrq $4,%r9 298 movq %r8,80(%rbp) 299 movq 192+0-128(%rsi),%r8 300 shlb $4,%dl 301 movq %rax,80-128(%rbp) 302 movq 192+8-128(%rsi),%rax 303 shlq $60,%r10 304 movb %dl,11(%rsp) 305 orq %r10,%rbx 306 movb %al,%dl 307 shrq $4,%rax 308 movq %r8,%r10 309 shrq $4,%r8 310 movq %r9,88(%rbp) 311 movq 208+0-128(%rsi),%r9 312 shlb $4,%dl 313 movq %rbx,88-128(%rbp) 314 movq 208+8-128(%rsi),%rbx 315 shlq $60,%r10 316 movb %dl,12(%rsp) 317 orq %r10,%rax 318 movb %bl,%dl 319 shrq $4,%rbx 320 movq %r9,%r10 321 shrq $4,%r9 322 movq %r8,96(%rbp) 323 movq 224+0-128(%rsi),%r8 324 shlb $4,%dl 325 movq %rax,96-128(%rbp) 326 movq 224+8-128(%rsi),%rax 327 shlq $60,%r10 328 movb %dl,13(%rsp) 329 orq %r10,%rbx 330 movb %al,%dl 331 shrq $4,%rax 332 movq %r8,%r10 333 shrq $4,%r8 334 movq %r9,104(%rbp) 335 movq 240+0-128(%rsi),%r9 336 shlb $4,%dl 337 movq %rbx,104-128(%rbp) 338 movq 240+8-128(%rsi),%rbx 339 shlq $60,%r10 340 movb %dl,14(%rsp) 341 orq %r10,%rax 342 movb %bl,%dl 343 shrq $4,%rbx 344 movq %r9,%r10 345 shrq $4,%r9 346 movq %r8,112(%rbp) 347 shlb $4,%dl 348 movq %rax,112-128(%rbp) 349 shlq $60,%r10 350 movb %dl,15(%rsp) 351 orq %r10,%rbx 352 movq %r9,120(%rbp) 353 movq %rbx,120-128(%rbp) 354 addq $-128,%rsi 355 movq 8(%rdi),%r8 356 movq 0(%rdi),%r9 357 addq %r14,%r15 358 leaq .Lrem_8bit(%rip),%r11 359 jmp .Louter_loop 360.align 16 361.Louter_loop: 362 xorq (%r14),%r9 363 movq 8(%r14),%rdx 364 leaq 16(%r14),%r14 365 xorq %r8,%rdx 366 movq %r9,(%rdi) 367 movq %rdx,8(%rdi) 368 shrq $32,%rdx 369 xorq %rax,%rax 370 roll $8,%edx 371 movb %dl,%al 372 movzbl %dl,%ebx 373 shlb $4,%al 374 shrl $4,%ebx 375 roll $8,%edx 376 movq 8(%rsi,%rax,1),%r8 377 movq (%rsi,%rax,1),%r9 378 movb %dl,%al 379 movzbl %dl,%ecx 380 shlb $4,%al 381 movzbq (%rsp,%rbx,1),%r12 382 shrl $4,%ecx 383 xorq %r8,%r12 384 movq %r9,%r10 385 shrq $8,%r8 386 movzbq %r12b,%r12 387 shrq $8,%r9 388 xorq -128(%rbp,%rbx,8),%r8 389 shlq $56,%r10 390 xorq (%rbp,%rbx,8),%r9 391 roll $8,%edx 392 xorq 8(%rsi,%rax,1),%r8 393 xorq (%rsi,%rax,1),%r9 394 movb %dl,%al 395 xorq %r10,%r8 396 movzwq (%r11,%r12,2),%r12 397 movzbl %dl,%ebx 398 shlb $4,%al 399 movzbq (%rsp,%rcx,1),%r13 400 shrl $4,%ebx 401 shlq $48,%r12 402 xorq %r8,%r13 403 movq %r9,%r10 404 xorq %r12,%r9 405 shrq $8,%r8 406 movzbq %r13b,%r13 407 shrq $8,%r9 408 xorq -128(%rbp,%rcx,8),%r8 409 shlq $56,%r10 410 xorq (%rbp,%rcx,8),%r9 411 roll $8,%edx 412 xorq 8(%rsi,%rax,1),%r8 413 xorq (%rsi,%rax,1),%r9 414 movb %dl,%al 415 xorq %r10,%r8 416 movzwq (%r11,%r13,2),%r13 417 movzbl %dl,%ecx 418 shlb $4,%al 419 movzbq (%rsp,%rbx,1),%r12 420 shrl $4,%ecx 421 shlq $48,%r13 422 xorq %r8,%r12 423 movq %r9,%r10 424 xorq %r13,%r9 425 shrq $8,%r8 426 movzbq %r12b,%r12 427 movl 8(%rdi),%edx 428 shrq $8,%r9 429 xorq -128(%rbp,%rbx,8),%r8 430 shlq $56,%r10 431 xorq (%rbp,%rbx,8),%r9 432 roll $8,%edx 433 xorq 8(%rsi,%rax,1),%r8 434 xorq (%rsi,%rax,1),%r9 435 movb %dl,%al 436 xorq %r10,%r8 437 movzwq (%r11,%r12,2),%r12 438 movzbl %dl,%ebx 439 shlb $4,%al 440 movzbq (%rsp,%rcx,1),%r13 441 shrl $4,%ebx 442 shlq $48,%r12 443 xorq %r8,%r13 444 movq %r9,%r10 445 xorq %r12,%r9 446 shrq $8,%r8 447 movzbq %r13b,%r13 448 shrq $8,%r9 449 xorq -128(%rbp,%rcx,8),%r8 450 shlq $56,%r10 451 xorq (%rbp,%rcx,8),%r9 452 roll $8,%edx 453 xorq 8(%rsi,%rax,1),%r8 454 xorq (%rsi,%rax,1),%r9 455 movb %dl,%al 456 xorq %r10,%r8 457 movzwq (%r11,%r13,2),%r13 458 movzbl %dl,%ecx 459 shlb $4,%al 460 movzbq (%rsp,%rbx,1),%r12 461 shrl $4,%ecx 462 shlq $48,%r13 463 xorq %r8,%r12 464 movq %r9,%r10 465 xorq %r13,%r9 466 shrq $8,%r8 467 movzbq %r12b,%r12 468 shrq $8,%r9 469 xorq -128(%rbp,%rbx,8),%r8 470 shlq $56,%r10 471 xorq (%rbp,%rbx,8),%r9 472 roll $8,%edx 473 xorq 8(%rsi,%rax,1),%r8 474 xorq (%rsi,%rax,1),%r9 475 movb %dl,%al 476 xorq %r10,%r8 477 movzwq (%r11,%r12,2),%r12 478 movzbl %dl,%ebx 479 shlb $4,%al 480 movzbq (%rsp,%rcx,1),%r13 481 shrl $4,%ebx 482 shlq $48,%r12 483 xorq %r8,%r13 484 movq %r9,%r10 485 xorq %r12,%r9 486 shrq $8,%r8 487 movzbq %r13b,%r13 488 shrq $8,%r9 489 xorq -128(%rbp,%rcx,8),%r8 490 shlq $56,%r10 491 xorq (%rbp,%rcx,8),%r9 492 roll $8,%edx 493 xorq 8(%rsi,%rax,1),%r8 494 xorq (%rsi,%rax,1),%r9 495 movb %dl,%al 496 xorq %r10,%r8 497 movzwq (%r11,%r13,2),%r13 498 movzbl %dl,%ecx 499 shlb $4,%al 500 movzbq (%rsp,%rbx,1),%r12 501 shrl $4,%ecx 502 shlq $48,%r13 503 xorq %r8,%r12 504 movq %r9,%r10 505 xorq %r13,%r9 506 shrq $8,%r8 507 movzbq %r12b,%r12 508 movl 4(%rdi),%edx 509 shrq $8,%r9 510 xorq -128(%rbp,%rbx,8),%r8 511 shlq $56,%r10 512 xorq (%rbp,%rbx,8),%r9 513 roll $8,%edx 514 xorq 8(%rsi,%rax,1),%r8 515 xorq (%rsi,%rax,1),%r9 516 movb %dl,%al 517 xorq %r10,%r8 518 movzwq (%r11,%r12,2),%r12 519 movzbl %dl,%ebx 520 shlb $4,%al 521 movzbq (%rsp,%rcx,1),%r13 522 shrl $4,%ebx 523 shlq $48,%r12 524 xorq %r8,%r13 525 movq %r9,%r10 526 xorq %r12,%r9 527 shrq $8,%r8 528 movzbq %r13b,%r13 529 shrq $8,%r9 530 xorq -128(%rbp,%rcx,8),%r8 531 shlq $56,%r10 532 xorq (%rbp,%rcx,8),%r9 533 roll $8,%edx 534 xorq 8(%rsi,%rax,1),%r8 535 xorq (%rsi,%rax,1),%r9 536 movb %dl,%al 537 xorq %r10,%r8 538 movzwq (%r11,%r13,2),%r13 539 movzbl %dl,%ecx 540 shlb $4,%al 541 movzbq (%rsp,%rbx,1),%r12 542 shrl $4,%ecx 543 shlq $48,%r13 544 xorq %r8,%r12 545 movq %r9,%r10 546 xorq %r13,%r9 547 shrq $8,%r8 548 movzbq %r12b,%r12 549 shrq $8,%r9 550 xorq -128(%rbp,%rbx,8),%r8 551 shlq $56,%r10 552 xorq (%rbp,%rbx,8),%r9 553 roll $8,%edx 554 xorq 8(%rsi,%rax,1),%r8 555 xorq (%rsi,%rax,1),%r9 556 movb %dl,%al 557 xorq %r10,%r8 558 movzwq (%r11,%r12,2),%r12 559 movzbl %dl,%ebx 560 shlb $4,%al 561 movzbq (%rsp,%rcx,1),%r13 562 shrl $4,%ebx 563 shlq $48,%r12 564 xorq %r8,%r13 565 movq %r9,%r10 566 xorq %r12,%r9 567 shrq $8,%r8 568 movzbq %r13b,%r13 569 shrq $8,%r9 570 xorq -128(%rbp,%rcx,8),%r8 571 shlq $56,%r10 572 xorq (%rbp,%rcx,8),%r9 573 roll $8,%edx 574 xorq 8(%rsi,%rax,1),%r8 575 xorq (%rsi,%rax,1),%r9 576 movb %dl,%al 577 xorq %r10,%r8 578 movzwq (%r11,%r13,2),%r13 579 movzbl %dl,%ecx 580 shlb $4,%al 581 movzbq (%rsp,%rbx,1),%r12 582 shrl $4,%ecx 583 shlq $48,%r13 584 xorq %r8,%r12 585 movq %r9,%r10 586 xorq %r13,%r9 587 shrq $8,%r8 588 movzbq %r12b,%r12 589 movl 0(%rdi),%edx 590 shrq $8,%r9 591 xorq -128(%rbp,%rbx,8),%r8 592 shlq $56,%r10 593 xorq (%rbp,%rbx,8),%r9 594 roll $8,%edx 595 xorq 8(%rsi,%rax,1),%r8 596 xorq (%rsi,%rax,1),%r9 597 movb %dl,%al 598 xorq %r10,%r8 599 movzwq (%r11,%r12,2),%r12 600 movzbl %dl,%ebx 601 shlb $4,%al 602 movzbq (%rsp,%rcx,1),%r13 603 shrl $4,%ebx 604 shlq $48,%r12 605 xorq %r8,%r13 606 movq %r9,%r10 607 xorq %r12,%r9 608 shrq $8,%r8 609 movzbq %r13b,%r13 610 shrq $8,%r9 611 xorq -128(%rbp,%rcx,8),%r8 612 shlq $56,%r10 613 xorq (%rbp,%rcx,8),%r9 614 roll $8,%edx 615 xorq 8(%rsi,%rax,1),%r8 616 xorq (%rsi,%rax,1),%r9 617 movb %dl,%al 618 xorq %r10,%r8 619 movzwq (%r11,%r13,2),%r13 620 movzbl %dl,%ecx 621 shlb $4,%al 622 movzbq (%rsp,%rbx,1),%r12 623 shrl $4,%ecx 624 shlq $48,%r13 625 xorq %r8,%r12 626 movq %r9,%r10 627 xorq %r13,%r9 628 shrq $8,%r8 629 movzbq %r12b,%r12 630 shrq $8,%r9 631 xorq -128(%rbp,%rbx,8),%r8 632 shlq $56,%r10 633 xorq (%rbp,%rbx,8),%r9 634 roll $8,%edx 635 xorq 8(%rsi,%rax,1),%r8 636 xorq (%rsi,%rax,1),%r9 637 movb %dl,%al 638 xorq %r10,%r8 639 movzwq (%r11,%r12,2),%r12 640 movzbl %dl,%ebx 641 shlb $4,%al 642 movzbq (%rsp,%rcx,1),%r13 643 shrl $4,%ebx 644 shlq $48,%r12 645 xorq %r8,%r13 646 movq %r9,%r10 647 xorq %r12,%r9 648 shrq $8,%r8 649 movzbq %r13b,%r13 650 shrq $8,%r9 651 xorq -128(%rbp,%rcx,8),%r8 652 shlq $56,%r10 653 xorq (%rbp,%rcx,8),%r9 654 roll $8,%edx 655 xorq 8(%rsi,%rax,1),%r8 656 xorq (%rsi,%rax,1),%r9 657 movb %dl,%al 658 xorq %r10,%r8 659 movzwq (%r11,%r13,2),%r13 660 movzbl %dl,%ecx 661 shlb $4,%al 662 movzbq (%rsp,%rbx,1),%r12 663 andl $240,%ecx 664 shlq $48,%r13 665 xorq %r8,%r12 666 movq %r9,%r10 667 xorq %r13,%r9 668 shrq $8,%r8 669 movzbq %r12b,%r12 670 movl -4(%rdi),%edx 671 shrq $8,%r9 672 xorq -128(%rbp,%rbx,8),%r8 673 shlq $56,%r10 674 xorq (%rbp,%rbx,8),%r9 675 movzwq (%r11,%r12,2),%r12 676 xorq 8(%rsi,%rax,1),%r8 677 xorq (%rsi,%rax,1),%r9 678 shlq $48,%r12 679 xorq %r10,%r8 680 xorq %r12,%r9 681 movzbq %r8b,%r13 682 shrq $4,%r8 683 movq %r9,%r10 684 shlb $4,%r13b 685 shrq $4,%r9 686 xorq 8(%rsi,%rcx,1),%r8 687 movzwq (%r11,%r13,2),%r13 688 shlq $60,%r10 689 xorq (%rsi,%rcx,1),%r9 690 xorq %r10,%r8 691 shlq $48,%r13 692 bswapq %r8 693 xorq %r13,%r9 694 bswapq %r9 695 cmpq %r15,%r14 696 jb .Louter_loop 697 movq %r8,8(%rdi) 698 movq %r9,(%rdi) 699 700 leaq 280+48(%rsp),%rsi 701.cfi_def_cfa %rsi,8 702 movq -48(%rsi),%r15 703.cfi_restore %r15 704 movq -40(%rsi),%r14 705.cfi_restore %r14 706 movq -32(%rsi),%r13 707.cfi_restore %r13 708 movq -24(%rsi),%r12 709.cfi_restore %r12 710 movq -16(%rsi),%rbp 711.cfi_restore %rbp 712 movq -8(%rsi),%rbx 713.cfi_restore %rbx 714 leaq 0(%rsi),%rsp 715.cfi_def_cfa_register %rsp 716.Lghash_epilogue: 717 .byte 0xf3,0xc3 718.cfi_endproc 719.size gcm_ghash_4bit,.-gcm_ghash_4bit 720.globl gcm_init_clmul 721.hidden gcm_init_clmul 722.type gcm_init_clmul,@function 723.align 16 724gcm_init_clmul: 725.cfi_startproc 726.L_init_clmul: 727 movdqu (%rsi),%xmm2 728 pshufd $78,%xmm2,%xmm2 729 730 731 pshufd $255,%xmm2,%xmm4 732 movdqa %xmm2,%xmm3 733 psllq $1,%xmm2 734 pxor %xmm5,%xmm5 735 psrlq $63,%xmm3 736 pcmpgtd %xmm4,%xmm5 737 pslldq $8,%xmm3 738 por %xmm3,%xmm2 739 740 741 pand .L0x1c2_polynomial(%rip),%xmm5 742 pxor %xmm5,%xmm2 743 744 745 pshufd $78,%xmm2,%xmm6 746 movdqa %xmm2,%xmm0 747 pxor %xmm2,%xmm6 748 movdqa %xmm0,%xmm1 749 pshufd $78,%xmm0,%xmm3 750 pxor %xmm0,%xmm3 751.byte 102,15,58,68,194,0 752.byte 102,15,58,68,202,17 753.byte 102,15,58,68,222,0 754 pxor %xmm0,%xmm3 755 pxor %xmm1,%xmm3 756 757 movdqa %xmm3,%xmm4 758 psrldq $8,%xmm3 759 pslldq $8,%xmm4 760 pxor %xmm3,%xmm1 761 pxor %xmm4,%xmm0 762 763 movdqa %xmm0,%xmm4 764 movdqa %xmm0,%xmm3 765 psllq $5,%xmm0 766 pxor %xmm0,%xmm3 767 psllq $1,%xmm0 768 pxor %xmm3,%xmm0 769 psllq $57,%xmm0 770 movdqa %xmm0,%xmm3 771 pslldq $8,%xmm0 772 psrldq $8,%xmm3 773 pxor %xmm4,%xmm0 774 pxor %xmm3,%xmm1 775 776 777 movdqa %xmm0,%xmm4 778 psrlq $1,%xmm0 779 pxor %xmm4,%xmm1 780 pxor %xmm0,%xmm4 781 psrlq $5,%xmm0 782 pxor %xmm4,%xmm0 783 psrlq $1,%xmm0 784 pxor %xmm1,%xmm0 785 pshufd $78,%xmm2,%xmm3 786 pshufd $78,%xmm0,%xmm4 787 pxor %xmm2,%xmm3 788 movdqu %xmm2,0(%rdi) 789 pxor %xmm0,%xmm4 790 movdqu %xmm0,16(%rdi) 791.byte 102,15,58,15,227,8 792 movdqu %xmm4,32(%rdi) 793 movdqa %xmm0,%xmm1 794 pshufd $78,%xmm0,%xmm3 795 pxor %xmm0,%xmm3 796.byte 102,15,58,68,194,0 797.byte 102,15,58,68,202,17 798.byte 102,15,58,68,222,0 799 pxor %xmm0,%xmm3 800 pxor %xmm1,%xmm3 801 802 movdqa %xmm3,%xmm4 803 psrldq $8,%xmm3 804 pslldq $8,%xmm4 805 pxor %xmm3,%xmm1 806 pxor %xmm4,%xmm0 807 808 movdqa %xmm0,%xmm4 809 movdqa %xmm0,%xmm3 810 psllq $5,%xmm0 811 pxor %xmm0,%xmm3 812 psllq $1,%xmm0 813 pxor %xmm3,%xmm0 814 psllq $57,%xmm0 815 movdqa %xmm0,%xmm3 816 pslldq $8,%xmm0 817 psrldq $8,%xmm3 818 pxor %xmm4,%xmm0 819 pxor %xmm3,%xmm1 820 821 822 movdqa %xmm0,%xmm4 823 psrlq $1,%xmm0 824 pxor %xmm4,%xmm1 825 pxor %xmm0,%xmm4 826 psrlq $5,%xmm0 827 pxor %xmm4,%xmm0 828 psrlq $1,%xmm0 829 pxor %xmm1,%xmm0 830 movdqa %xmm0,%xmm5 831 movdqa %xmm0,%xmm1 832 pshufd $78,%xmm0,%xmm3 833 pxor %xmm0,%xmm3 834.byte 102,15,58,68,194,0 835.byte 102,15,58,68,202,17 836.byte 102,15,58,68,222,0 837 pxor %xmm0,%xmm3 838 pxor %xmm1,%xmm3 839 840 movdqa %xmm3,%xmm4 841 psrldq $8,%xmm3 842 pslldq $8,%xmm4 843 pxor %xmm3,%xmm1 844 pxor %xmm4,%xmm0 845 846 movdqa %xmm0,%xmm4 847 movdqa %xmm0,%xmm3 848 psllq $5,%xmm0 849 pxor %xmm0,%xmm3 850 psllq $1,%xmm0 851 pxor %xmm3,%xmm0 852 psllq $57,%xmm0 853 movdqa %xmm0,%xmm3 854 pslldq $8,%xmm0 855 psrldq $8,%xmm3 856 pxor %xmm4,%xmm0 857 pxor %xmm3,%xmm1 858 859 860 movdqa %xmm0,%xmm4 861 psrlq $1,%xmm0 862 pxor %xmm4,%xmm1 863 pxor %xmm0,%xmm4 864 psrlq $5,%xmm0 865 pxor %xmm4,%xmm0 866 psrlq $1,%xmm0 867 pxor %xmm1,%xmm0 868 pshufd $78,%xmm5,%xmm3 869 pshufd $78,%xmm0,%xmm4 870 pxor %xmm5,%xmm3 871 movdqu %xmm5,48(%rdi) 872 pxor %xmm0,%xmm4 873 movdqu %xmm0,64(%rdi) 874.byte 102,15,58,15,227,8 875 movdqu %xmm4,80(%rdi) 876 .byte 0xf3,0xc3 877.cfi_endproc 878.size gcm_init_clmul,.-gcm_init_clmul 879.globl gcm_gmult_clmul 880.hidden gcm_gmult_clmul 881.type gcm_gmult_clmul,@function 882.align 16 883gcm_gmult_clmul: 884.cfi_startproc 885.L_gmult_clmul: 886 movdqu (%rdi),%xmm0 887 movdqa .Lbswap_mask(%rip),%xmm5 888 movdqu (%rsi),%xmm2 889 movdqu 32(%rsi),%xmm4 890.byte 102,15,56,0,197 891 movdqa %xmm0,%xmm1 892 pshufd $78,%xmm0,%xmm3 893 pxor %xmm0,%xmm3 894.byte 102,15,58,68,194,0 895.byte 102,15,58,68,202,17 896.byte 102,15,58,68,220,0 897 pxor %xmm0,%xmm3 898 pxor %xmm1,%xmm3 899 900 movdqa %xmm3,%xmm4 901 psrldq $8,%xmm3 902 pslldq $8,%xmm4 903 pxor %xmm3,%xmm1 904 pxor %xmm4,%xmm0 905 906 movdqa %xmm0,%xmm4 907 movdqa %xmm0,%xmm3 908 psllq $5,%xmm0 909 pxor %xmm0,%xmm3 910 psllq $1,%xmm0 911 pxor %xmm3,%xmm0 912 psllq $57,%xmm0 913 movdqa %xmm0,%xmm3 914 pslldq $8,%xmm0 915 psrldq $8,%xmm3 916 pxor %xmm4,%xmm0 917 pxor %xmm3,%xmm1 918 919 920 movdqa %xmm0,%xmm4 921 psrlq $1,%xmm0 922 pxor %xmm4,%xmm1 923 pxor %xmm0,%xmm4 924 psrlq $5,%xmm0 925 pxor %xmm4,%xmm0 926 psrlq $1,%xmm0 927 pxor %xmm1,%xmm0 928.byte 102,15,56,0,197 929 movdqu %xmm0,(%rdi) 930 .byte 0xf3,0xc3 931.cfi_endproc 932.size gcm_gmult_clmul,.-gcm_gmult_clmul 933.globl gcm_ghash_clmul 934.hidden gcm_ghash_clmul 935.type gcm_ghash_clmul,@function 936.align 32 937gcm_ghash_clmul: 938.cfi_startproc 939.L_ghash_clmul: 940 movdqa .Lbswap_mask(%rip),%xmm10 941 942 movdqu (%rdi),%xmm0 943 movdqu (%rsi),%xmm2 944 movdqu 32(%rsi),%xmm7 945.byte 102,65,15,56,0,194 946 947 subq $0x10,%rcx 948 jz .Lodd_tail 949 950 movdqu 16(%rsi),%xmm6 951 leaq OPENSSL_ia32cap_P(%rip),%rax 952 movl 4(%rax),%eax 953 cmpq $0x30,%rcx 954 jb .Lskip4x 955 956 andl $71303168,%eax 957 cmpl $4194304,%eax 958 je .Lskip4x 959 960 subq $0x30,%rcx 961 movq $0xA040608020C0E000,%rax 962 movdqu 48(%rsi),%xmm14 963 movdqu 64(%rsi),%xmm15 964 965 966 967 968 movdqu 48(%rdx),%xmm3 969 movdqu 32(%rdx),%xmm11 970.byte 102,65,15,56,0,218 971.byte 102,69,15,56,0,218 972 movdqa %xmm3,%xmm5 973 pshufd $78,%xmm3,%xmm4 974 pxor %xmm3,%xmm4 975.byte 102,15,58,68,218,0 976.byte 102,15,58,68,234,17 977.byte 102,15,58,68,231,0 978 979 movdqa %xmm11,%xmm13 980 pshufd $78,%xmm11,%xmm12 981 pxor %xmm11,%xmm12 982.byte 102,68,15,58,68,222,0 983.byte 102,68,15,58,68,238,17 984.byte 102,68,15,58,68,231,16 985 xorps %xmm11,%xmm3 986 xorps %xmm13,%xmm5 987 movups 80(%rsi),%xmm7 988 xorps %xmm12,%xmm4 989 990 movdqu 16(%rdx),%xmm11 991 movdqu 0(%rdx),%xmm8 992.byte 102,69,15,56,0,218 993.byte 102,69,15,56,0,194 994 movdqa %xmm11,%xmm13 995 pshufd $78,%xmm11,%xmm12 996 pxor %xmm8,%xmm0 997 pxor %xmm11,%xmm12 998.byte 102,69,15,58,68,222,0 999 movdqa %xmm0,%xmm1 1000 pshufd $78,%xmm0,%xmm8 1001 pxor %xmm0,%xmm8 1002.byte 102,69,15,58,68,238,17 1003.byte 102,68,15,58,68,231,0 1004 xorps %xmm11,%xmm3 1005 xorps %xmm13,%xmm5 1006 1007 leaq 64(%rdx),%rdx 1008 subq $0x40,%rcx 1009 jc .Ltail4x 1010 1011 jmp .Lmod4_loop 1012.align 32 1013.Lmod4_loop: 1014.byte 102,65,15,58,68,199,0 1015 xorps %xmm12,%xmm4 1016 movdqu 48(%rdx),%xmm11 1017.byte 102,69,15,56,0,218 1018.byte 102,65,15,58,68,207,17 1019 xorps %xmm3,%xmm0 1020 movdqu 32(%rdx),%xmm3 1021 movdqa %xmm11,%xmm13 1022.byte 102,68,15,58,68,199,16 1023 pshufd $78,%xmm11,%xmm12 1024 xorps %xmm5,%xmm1 1025 pxor %xmm11,%xmm12 1026.byte 102,65,15,56,0,218 1027 movups 32(%rsi),%xmm7 1028 xorps %xmm4,%xmm8 1029.byte 102,68,15,58,68,218,0 1030 pshufd $78,%xmm3,%xmm4 1031 1032 pxor %xmm0,%xmm8 1033 movdqa %xmm3,%xmm5 1034 pxor %xmm1,%xmm8 1035 pxor %xmm3,%xmm4 1036 movdqa %xmm8,%xmm9 1037.byte 102,68,15,58,68,234,17 1038 pslldq $8,%xmm8 1039 psrldq $8,%xmm9 1040 pxor %xmm8,%xmm0 1041 movdqa .L7_mask(%rip),%xmm8 1042 pxor %xmm9,%xmm1 1043.byte 102,76,15,110,200 1044 1045 pand %xmm0,%xmm8 1046.byte 102,69,15,56,0,200 1047 pxor %xmm0,%xmm9 1048.byte 102,68,15,58,68,231,0 1049 psllq $57,%xmm9 1050 movdqa %xmm9,%xmm8 1051 pslldq $8,%xmm9 1052.byte 102,15,58,68,222,0 1053 psrldq $8,%xmm8 1054 pxor %xmm9,%xmm0 1055 pxor %xmm8,%xmm1 1056 movdqu 0(%rdx),%xmm8 1057 1058 movdqa %xmm0,%xmm9 1059 psrlq $1,%xmm0 1060.byte 102,15,58,68,238,17 1061 xorps %xmm11,%xmm3 1062 movdqu 16(%rdx),%xmm11 1063.byte 102,69,15,56,0,218 1064.byte 102,15,58,68,231,16 1065 xorps %xmm13,%xmm5 1066 movups 80(%rsi),%xmm7 1067.byte 102,69,15,56,0,194 1068 pxor %xmm9,%xmm1 1069 pxor %xmm0,%xmm9 1070 psrlq $5,%xmm0 1071 1072 movdqa %xmm11,%xmm13 1073 pxor %xmm12,%xmm4 1074 pshufd $78,%xmm11,%xmm12 1075 pxor %xmm9,%xmm0 1076 pxor %xmm8,%xmm1 1077 pxor %xmm11,%xmm12 1078.byte 102,69,15,58,68,222,0 1079 psrlq $1,%xmm0 1080 pxor %xmm1,%xmm0 1081 movdqa %xmm0,%xmm1 1082.byte 102,69,15,58,68,238,17 1083 xorps %xmm11,%xmm3 1084 pshufd $78,%xmm0,%xmm8 1085 pxor %xmm0,%xmm8 1086 1087.byte 102,68,15,58,68,231,0 1088 xorps %xmm13,%xmm5 1089 1090 leaq 64(%rdx),%rdx 1091 subq $0x40,%rcx 1092 jnc .Lmod4_loop 1093 1094.Ltail4x: 1095.byte 102,65,15,58,68,199,0 1096.byte 102,65,15,58,68,207,17 1097.byte 102,68,15,58,68,199,16 1098 xorps %xmm12,%xmm4 1099 xorps %xmm3,%xmm0 1100 xorps %xmm5,%xmm1 1101 pxor %xmm0,%xmm1 1102 pxor %xmm4,%xmm8 1103 1104 pxor %xmm1,%xmm8 1105 pxor %xmm0,%xmm1 1106 1107 movdqa %xmm8,%xmm9 1108 psrldq $8,%xmm8 1109 pslldq $8,%xmm9 1110 pxor %xmm8,%xmm1 1111 pxor %xmm9,%xmm0 1112 1113 movdqa %xmm0,%xmm4 1114 movdqa %xmm0,%xmm3 1115 psllq $5,%xmm0 1116 pxor %xmm0,%xmm3 1117 psllq $1,%xmm0 1118 pxor %xmm3,%xmm0 1119 psllq $57,%xmm0 1120 movdqa %xmm0,%xmm3 1121 pslldq $8,%xmm0 1122 psrldq $8,%xmm3 1123 pxor %xmm4,%xmm0 1124 pxor %xmm3,%xmm1 1125 1126 1127 movdqa %xmm0,%xmm4 1128 psrlq $1,%xmm0 1129 pxor %xmm4,%xmm1 1130 pxor %xmm0,%xmm4 1131 psrlq $5,%xmm0 1132 pxor %xmm4,%xmm0 1133 psrlq $1,%xmm0 1134 pxor %xmm1,%xmm0 1135 addq $0x40,%rcx 1136 jz .Ldone 1137 movdqu 32(%rsi),%xmm7 1138 subq $0x10,%rcx 1139 jz .Lodd_tail 1140.Lskip4x: 1141 1142 1143 1144 1145 1146 movdqu (%rdx),%xmm8 1147 movdqu 16(%rdx),%xmm3 1148.byte 102,69,15,56,0,194 1149.byte 102,65,15,56,0,218 1150 pxor %xmm8,%xmm0 1151 1152 movdqa %xmm3,%xmm5 1153 pshufd $78,%xmm3,%xmm4 1154 pxor %xmm3,%xmm4 1155.byte 102,15,58,68,218,0 1156.byte 102,15,58,68,234,17 1157.byte 102,15,58,68,231,0 1158 1159 leaq 32(%rdx),%rdx 1160 nop 1161 subq $0x20,%rcx 1162 jbe .Leven_tail 1163 nop 1164 jmp .Lmod_loop 1165 1166.align 32 1167.Lmod_loop: 1168 movdqa %xmm0,%xmm1 1169 movdqa %xmm4,%xmm8 1170 pshufd $78,%xmm0,%xmm4 1171 pxor %xmm0,%xmm4 1172 1173.byte 102,15,58,68,198,0 1174.byte 102,15,58,68,206,17 1175.byte 102,15,58,68,231,16 1176 1177 pxor %xmm3,%xmm0 1178 pxor %xmm5,%xmm1 1179 movdqu (%rdx),%xmm9 1180 pxor %xmm0,%xmm8 1181.byte 102,69,15,56,0,202 1182 movdqu 16(%rdx),%xmm3 1183 1184 pxor %xmm1,%xmm8 1185 pxor %xmm9,%xmm1 1186 pxor %xmm8,%xmm4 1187.byte 102,65,15,56,0,218 1188 movdqa %xmm4,%xmm8 1189 psrldq $8,%xmm8 1190 pslldq $8,%xmm4 1191 pxor %xmm8,%xmm1 1192 pxor %xmm4,%xmm0 1193 1194 movdqa %xmm3,%xmm5 1195 1196 movdqa %xmm0,%xmm9 1197 movdqa %xmm0,%xmm8 1198 psllq $5,%xmm0 1199 pxor %xmm0,%xmm8 1200.byte 102,15,58,68,218,0 1201 psllq $1,%xmm0 1202 pxor %xmm8,%xmm0 1203 psllq $57,%xmm0 1204 movdqa %xmm0,%xmm8 1205 pslldq $8,%xmm0 1206 psrldq $8,%xmm8 1207 pxor %xmm9,%xmm0 1208 pshufd $78,%xmm5,%xmm4 1209 pxor %xmm8,%xmm1 1210 pxor %xmm5,%xmm4 1211 1212 movdqa %xmm0,%xmm9 1213 psrlq $1,%xmm0 1214.byte 102,15,58,68,234,17 1215 pxor %xmm9,%xmm1 1216 pxor %xmm0,%xmm9 1217 psrlq $5,%xmm0 1218 pxor %xmm9,%xmm0 1219 leaq 32(%rdx),%rdx 1220 psrlq $1,%xmm0 1221.byte 102,15,58,68,231,0 1222 pxor %xmm1,%xmm0 1223 1224 subq $0x20,%rcx 1225 ja .Lmod_loop 1226 1227.Leven_tail: 1228 movdqa %xmm0,%xmm1 1229 movdqa %xmm4,%xmm8 1230 pshufd $78,%xmm0,%xmm4 1231 pxor %xmm0,%xmm4 1232 1233.byte 102,15,58,68,198,0 1234.byte 102,15,58,68,206,17 1235.byte 102,15,58,68,231,16 1236 1237 pxor %xmm3,%xmm0 1238 pxor %xmm5,%xmm1 1239 pxor %xmm0,%xmm8 1240 pxor %xmm1,%xmm8 1241 pxor %xmm8,%xmm4 1242 movdqa %xmm4,%xmm8 1243 psrldq $8,%xmm8 1244 pslldq $8,%xmm4 1245 pxor %xmm8,%xmm1 1246 pxor %xmm4,%xmm0 1247 1248 movdqa %xmm0,%xmm4 1249 movdqa %xmm0,%xmm3 1250 psllq $5,%xmm0 1251 pxor %xmm0,%xmm3 1252 psllq $1,%xmm0 1253 pxor %xmm3,%xmm0 1254 psllq $57,%xmm0 1255 movdqa %xmm0,%xmm3 1256 pslldq $8,%xmm0 1257 psrldq $8,%xmm3 1258 pxor %xmm4,%xmm0 1259 pxor %xmm3,%xmm1 1260 1261 1262 movdqa %xmm0,%xmm4 1263 psrlq $1,%xmm0 1264 pxor %xmm4,%xmm1 1265 pxor %xmm0,%xmm4 1266 psrlq $5,%xmm0 1267 pxor %xmm4,%xmm0 1268 psrlq $1,%xmm0 1269 pxor %xmm1,%xmm0 1270 testq %rcx,%rcx 1271 jnz .Ldone 1272 1273.Lodd_tail: 1274 movdqu (%rdx),%xmm8 1275.byte 102,69,15,56,0,194 1276 pxor %xmm8,%xmm0 1277 movdqa %xmm0,%xmm1 1278 pshufd $78,%xmm0,%xmm3 1279 pxor %xmm0,%xmm3 1280.byte 102,15,58,68,194,0 1281.byte 102,15,58,68,202,17 1282.byte 102,15,58,68,223,0 1283 pxor %xmm0,%xmm3 1284 pxor %xmm1,%xmm3 1285 1286 movdqa %xmm3,%xmm4 1287 psrldq $8,%xmm3 1288 pslldq $8,%xmm4 1289 pxor %xmm3,%xmm1 1290 pxor %xmm4,%xmm0 1291 1292 movdqa %xmm0,%xmm4 1293 movdqa %xmm0,%xmm3 1294 psllq $5,%xmm0 1295 pxor %xmm0,%xmm3 1296 psllq $1,%xmm0 1297 pxor %xmm3,%xmm0 1298 psllq $57,%xmm0 1299 movdqa %xmm0,%xmm3 1300 pslldq $8,%xmm0 1301 psrldq $8,%xmm3 1302 pxor %xmm4,%xmm0 1303 pxor %xmm3,%xmm1 1304 1305 1306 movdqa %xmm0,%xmm4 1307 psrlq $1,%xmm0 1308 pxor %xmm4,%xmm1 1309 pxor %xmm0,%xmm4 1310 psrlq $5,%xmm0 1311 pxor %xmm4,%xmm0 1312 psrlq $1,%xmm0 1313 pxor %xmm1,%xmm0 1314.Ldone: 1315.byte 102,65,15,56,0,194 1316 movdqu %xmm0,(%rdi) 1317 .byte 0xf3,0xc3 1318.cfi_endproc 1319.size gcm_ghash_clmul,.-gcm_ghash_clmul 1320.globl gcm_init_avx 1321.hidden gcm_init_avx 1322.type gcm_init_avx,@function 1323.align 32 1324gcm_init_avx: 1325.cfi_startproc 1326 vzeroupper 1327 1328 vmovdqu (%rsi),%xmm2 1329 vpshufd $78,%xmm2,%xmm2 1330 1331 1332 vpshufd $255,%xmm2,%xmm4 1333 vpsrlq $63,%xmm2,%xmm3 1334 vpsllq $1,%xmm2,%xmm2 1335 vpxor %xmm5,%xmm5,%xmm5 1336 vpcmpgtd %xmm4,%xmm5,%xmm5 1337 vpslldq $8,%xmm3,%xmm3 1338 vpor %xmm3,%xmm2,%xmm2 1339 1340 1341 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 1342 vpxor %xmm5,%xmm2,%xmm2 1343 1344 vpunpckhqdq %xmm2,%xmm2,%xmm6 1345 vmovdqa %xmm2,%xmm0 1346 vpxor %xmm2,%xmm6,%xmm6 1347 movq $4,%r10 1348 jmp .Linit_start_avx 1349.align 32 1350.Linit_loop_avx: 1351 vpalignr $8,%xmm3,%xmm4,%xmm5 1352 vmovdqu %xmm5,-16(%rdi) 1353 vpunpckhqdq %xmm0,%xmm0,%xmm3 1354 vpxor %xmm0,%xmm3,%xmm3 1355 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1356 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1357 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1358 vpxor %xmm0,%xmm1,%xmm4 1359 vpxor %xmm4,%xmm3,%xmm3 1360 1361 vpslldq $8,%xmm3,%xmm4 1362 vpsrldq $8,%xmm3,%xmm3 1363 vpxor %xmm4,%xmm0,%xmm0 1364 vpxor %xmm3,%xmm1,%xmm1 1365 vpsllq $57,%xmm0,%xmm3 1366 vpsllq $62,%xmm0,%xmm4 1367 vpxor %xmm3,%xmm4,%xmm4 1368 vpsllq $63,%xmm0,%xmm3 1369 vpxor %xmm3,%xmm4,%xmm4 1370 vpslldq $8,%xmm4,%xmm3 1371 vpsrldq $8,%xmm4,%xmm4 1372 vpxor %xmm3,%xmm0,%xmm0 1373 vpxor %xmm4,%xmm1,%xmm1 1374 1375 vpsrlq $1,%xmm0,%xmm4 1376 vpxor %xmm0,%xmm1,%xmm1 1377 vpxor %xmm4,%xmm0,%xmm0 1378 vpsrlq $5,%xmm4,%xmm4 1379 vpxor %xmm4,%xmm0,%xmm0 1380 vpsrlq $1,%xmm0,%xmm0 1381 vpxor %xmm1,%xmm0,%xmm0 1382.Linit_start_avx: 1383 vmovdqa %xmm0,%xmm5 1384 vpunpckhqdq %xmm0,%xmm0,%xmm3 1385 vpxor %xmm0,%xmm3,%xmm3 1386 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1387 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1388 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1389 vpxor %xmm0,%xmm1,%xmm4 1390 vpxor %xmm4,%xmm3,%xmm3 1391 1392 vpslldq $8,%xmm3,%xmm4 1393 vpsrldq $8,%xmm3,%xmm3 1394 vpxor %xmm4,%xmm0,%xmm0 1395 vpxor %xmm3,%xmm1,%xmm1 1396 vpsllq $57,%xmm0,%xmm3 1397 vpsllq $62,%xmm0,%xmm4 1398 vpxor %xmm3,%xmm4,%xmm4 1399 vpsllq $63,%xmm0,%xmm3 1400 vpxor %xmm3,%xmm4,%xmm4 1401 vpslldq $8,%xmm4,%xmm3 1402 vpsrldq $8,%xmm4,%xmm4 1403 vpxor %xmm3,%xmm0,%xmm0 1404 vpxor %xmm4,%xmm1,%xmm1 1405 1406 vpsrlq $1,%xmm0,%xmm4 1407 vpxor %xmm0,%xmm1,%xmm1 1408 vpxor %xmm4,%xmm0,%xmm0 1409 vpsrlq $5,%xmm4,%xmm4 1410 vpxor %xmm4,%xmm0,%xmm0 1411 vpsrlq $1,%xmm0,%xmm0 1412 vpxor %xmm1,%xmm0,%xmm0 1413 vpshufd $78,%xmm5,%xmm3 1414 vpshufd $78,%xmm0,%xmm4 1415 vpxor %xmm5,%xmm3,%xmm3 1416 vmovdqu %xmm5,0(%rdi) 1417 vpxor %xmm0,%xmm4,%xmm4 1418 vmovdqu %xmm0,16(%rdi) 1419 leaq 48(%rdi),%rdi 1420 subq $1,%r10 1421 jnz .Linit_loop_avx 1422 1423 vpalignr $8,%xmm4,%xmm3,%xmm5 1424 vmovdqu %xmm5,-16(%rdi) 1425 1426 vzeroupper 1427 .byte 0xf3,0xc3 1428.cfi_endproc 1429.size gcm_init_avx,.-gcm_init_avx 1430.globl gcm_gmult_avx 1431.hidden gcm_gmult_avx 1432.type gcm_gmult_avx,@function 1433.align 32 1434gcm_gmult_avx: 1435.cfi_startproc 1436 jmp .L_gmult_clmul 1437.cfi_endproc 1438.size gcm_gmult_avx,.-gcm_gmult_avx 1439.globl gcm_ghash_avx 1440.hidden gcm_ghash_avx 1441.type gcm_ghash_avx,@function 1442.align 32 1443gcm_ghash_avx: 1444.cfi_startproc 1445 vzeroupper 1446 1447 vmovdqu (%rdi),%xmm10 1448 leaq .L0x1c2_polynomial(%rip),%r10 1449 leaq 64(%rsi),%rsi 1450 vmovdqu .Lbswap_mask(%rip),%xmm13 1451 vpshufb %xmm13,%xmm10,%xmm10 1452 cmpq $0x80,%rcx 1453 jb .Lshort_avx 1454 subq $0x80,%rcx 1455 1456 vmovdqu 112(%rdx),%xmm14 1457 vmovdqu 0-64(%rsi),%xmm6 1458 vpshufb %xmm13,%xmm14,%xmm14 1459 vmovdqu 32-64(%rsi),%xmm7 1460 1461 vpunpckhqdq %xmm14,%xmm14,%xmm9 1462 vmovdqu 96(%rdx),%xmm15 1463 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1464 vpxor %xmm14,%xmm9,%xmm9 1465 vpshufb %xmm13,%xmm15,%xmm15 1466 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1467 vmovdqu 16-64(%rsi),%xmm6 1468 vpunpckhqdq %xmm15,%xmm15,%xmm8 1469 vmovdqu 80(%rdx),%xmm14 1470 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1471 vpxor %xmm15,%xmm8,%xmm8 1472 1473 vpshufb %xmm13,%xmm14,%xmm14 1474 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1475 vpunpckhqdq %xmm14,%xmm14,%xmm9 1476 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1477 vmovdqu 48-64(%rsi),%xmm6 1478 vpxor %xmm14,%xmm9,%xmm9 1479 vmovdqu 64(%rdx),%xmm15 1480 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1481 vmovdqu 80-64(%rsi),%xmm7 1482 1483 vpshufb %xmm13,%xmm15,%xmm15 1484 vpxor %xmm0,%xmm3,%xmm3 1485 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1486 vpxor %xmm1,%xmm4,%xmm4 1487 vpunpckhqdq %xmm15,%xmm15,%xmm8 1488 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1489 vmovdqu 64-64(%rsi),%xmm6 1490 vpxor %xmm2,%xmm5,%xmm5 1491 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1492 vpxor %xmm15,%xmm8,%xmm8 1493 1494 vmovdqu 48(%rdx),%xmm14 1495 vpxor %xmm3,%xmm0,%xmm0 1496 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1497 vpxor %xmm4,%xmm1,%xmm1 1498 vpshufb %xmm13,%xmm14,%xmm14 1499 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1500 vmovdqu 96-64(%rsi),%xmm6 1501 vpxor %xmm5,%xmm2,%xmm2 1502 vpunpckhqdq %xmm14,%xmm14,%xmm9 1503 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1504 vmovdqu 128-64(%rsi),%xmm7 1505 vpxor %xmm14,%xmm9,%xmm9 1506 1507 vmovdqu 32(%rdx),%xmm15 1508 vpxor %xmm0,%xmm3,%xmm3 1509 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1510 vpxor %xmm1,%xmm4,%xmm4 1511 vpshufb %xmm13,%xmm15,%xmm15 1512 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1513 vmovdqu 112-64(%rsi),%xmm6 1514 vpxor %xmm2,%xmm5,%xmm5 1515 vpunpckhqdq %xmm15,%xmm15,%xmm8 1516 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1517 vpxor %xmm15,%xmm8,%xmm8 1518 1519 vmovdqu 16(%rdx),%xmm14 1520 vpxor %xmm3,%xmm0,%xmm0 1521 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1522 vpxor %xmm4,%xmm1,%xmm1 1523 vpshufb %xmm13,%xmm14,%xmm14 1524 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1525 vmovdqu 144-64(%rsi),%xmm6 1526 vpxor %xmm5,%xmm2,%xmm2 1527 vpunpckhqdq %xmm14,%xmm14,%xmm9 1528 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1529 vmovdqu 176-64(%rsi),%xmm7 1530 vpxor %xmm14,%xmm9,%xmm9 1531 1532 vmovdqu (%rdx),%xmm15 1533 vpxor %xmm0,%xmm3,%xmm3 1534 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1535 vpxor %xmm1,%xmm4,%xmm4 1536 vpshufb %xmm13,%xmm15,%xmm15 1537 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1538 vmovdqu 160-64(%rsi),%xmm6 1539 vpxor %xmm2,%xmm5,%xmm5 1540 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1541 1542 leaq 128(%rdx),%rdx 1543 cmpq $0x80,%rcx 1544 jb .Ltail_avx 1545 1546 vpxor %xmm10,%xmm15,%xmm15 1547 subq $0x80,%rcx 1548 jmp .Loop8x_avx 1549 1550.align 32 1551.Loop8x_avx: 1552 vpunpckhqdq %xmm15,%xmm15,%xmm8 1553 vmovdqu 112(%rdx),%xmm14 1554 vpxor %xmm0,%xmm3,%xmm3 1555 vpxor %xmm15,%xmm8,%xmm8 1556 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 1557 vpshufb %xmm13,%xmm14,%xmm14 1558 vpxor %xmm1,%xmm4,%xmm4 1559 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 1560 vmovdqu 0-64(%rsi),%xmm6 1561 vpunpckhqdq %xmm14,%xmm14,%xmm9 1562 vpxor %xmm2,%xmm5,%xmm5 1563 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 1564 vmovdqu 32-64(%rsi),%xmm7 1565 vpxor %xmm14,%xmm9,%xmm9 1566 1567 vmovdqu 96(%rdx),%xmm15 1568 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1569 vpxor %xmm3,%xmm10,%xmm10 1570 vpshufb %xmm13,%xmm15,%xmm15 1571 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1572 vxorps %xmm4,%xmm11,%xmm11 1573 vmovdqu 16-64(%rsi),%xmm6 1574 vpunpckhqdq %xmm15,%xmm15,%xmm8 1575 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1576 vpxor %xmm5,%xmm12,%xmm12 1577 vxorps %xmm15,%xmm8,%xmm8 1578 1579 vmovdqu 80(%rdx),%xmm14 1580 vpxor %xmm10,%xmm12,%xmm12 1581 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1582 vpxor %xmm11,%xmm12,%xmm12 1583 vpslldq $8,%xmm12,%xmm9 1584 vpxor %xmm0,%xmm3,%xmm3 1585 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1586 vpsrldq $8,%xmm12,%xmm12 1587 vpxor %xmm9,%xmm10,%xmm10 1588 vmovdqu 48-64(%rsi),%xmm6 1589 vpshufb %xmm13,%xmm14,%xmm14 1590 vxorps %xmm12,%xmm11,%xmm11 1591 vpxor %xmm1,%xmm4,%xmm4 1592 vpunpckhqdq %xmm14,%xmm14,%xmm9 1593 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1594 vmovdqu 80-64(%rsi),%xmm7 1595 vpxor %xmm14,%xmm9,%xmm9 1596 vpxor %xmm2,%xmm5,%xmm5 1597 1598 vmovdqu 64(%rdx),%xmm15 1599 vpalignr $8,%xmm10,%xmm10,%xmm12 1600 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1601 vpshufb %xmm13,%xmm15,%xmm15 1602 vpxor %xmm3,%xmm0,%xmm0 1603 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1604 vmovdqu 64-64(%rsi),%xmm6 1605 vpunpckhqdq %xmm15,%xmm15,%xmm8 1606 vpxor %xmm4,%xmm1,%xmm1 1607 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1608 vxorps %xmm15,%xmm8,%xmm8 1609 vpxor %xmm5,%xmm2,%xmm2 1610 1611 vmovdqu 48(%rdx),%xmm14 1612 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1613 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1614 vpshufb %xmm13,%xmm14,%xmm14 1615 vpxor %xmm0,%xmm3,%xmm3 1616 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1617 vmovdqu 96-64(%rsi),%xmm6 1618 vpunpckhqdq %xmm14,%xmm14,%xmm9 1619 vpxor %xmm1,%xmm4,%xmm4 1620 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1621 vmovdqu 128-64(%rsi),%xmm7 1622 vpxor %xmm14,%xmm9,%xmm9 1623 vpxor %xmm2,%xmm5,%xmm5 1624 1625 vmovdqu 32(%rdx),%xmm15 1626 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1627 vpshufb %xmm13,%xmm15,%xmm15 1628 vpxor %xmm3,%xmm0,%xmm0 1629 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1630 vmovdqu 112-64(%rsi),%xmm6 1631 vpunpckhqdq %xmm15,%xmm15,%xmm8 1632 vpxor %xmm4,%xmm1,%xmm1 1633 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1634 vpxor %xmm15,%xmm8,%xmm8 1635 vpxor %xmm5,%xmm2,%xmm2 1636 vxorps %xmm12,%xmm10,%xmm10 1637 1638 vmovdqu 16(%rdx),%xmm14 1639 vpalignr $8,%xmm10,%xmm10,%xmm12 1640 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1641 vpshufb %xmm13,%xmm14,%xmm14 1642 vpxor %xmm0,%xmm3,%xmm3 1643 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1644 vmovdqu 144-64(%rsi),%xmm6 1645 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1646 vxorps %xmm11,%xmm12,%xmm12 1647 vpunpckhqdq %xmm14,%xmm14,%xmm9 1648 vpxor %xmm1,%xmm4,%xmm4 1649 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1650 vmovdqu 176-64(%rsi),%xmm7 1651 vpxor %xmm14,%xmm9,%xmm9 1652 vpxor %xmm2,%xmm5,%xmm5 1653 1654 vmovdqu (%rdx),%xmm15 1655 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1656 vpshufb %xmm13,%xmm15,%xmm15 1657 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1658 vmovdqu 160-64(%rsi),%xmm6 1659 vpxor %xmm12,%xmm15,%xmm15 1660 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1661 vpxor %xmm10,%xmm15,%xmm15 1662 1663 leaq 128(%rdx),%rdx 1664 subq $0x80,%rcx 1665 jnc .Loop8x_avx 1666 1667 addq $0x80,%rcx 1668 jmp .Ltail_no_xor_avx 1669 1670.align 32 1671.Lshort_avx: 1672 vmovdqu -16(%rdx,%rcx,1),%xmm14 1673 leaq (%rdx,%rcx,1),%rdx 1674 vmovdqu 0-64(%rsi),%xmm6 1675 vmovdqu 32-64(%rsi),%xmm7 1676 vpshufb %xmm13,%xmm14,%xmm15 1677 1678 vmovdqa %xmm0,%xmm3 1679 vmovdqa %xmm1,%xmm4 1680 vmovdqa %xmm2,%xmm5 1681 subq $0x10,%rcx 1682 jz .Ltail_avx 1683 1684 vpunpckhqdq %xmm15,%xmm15,%xmm8 1685 vpxor %xmm0,%xmm3,%xmm3 1686 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1687 vpxor %xmm15,%xmm8,%xmm8 1688 vmovdqu -32(%rdx),%xmm14 1689 vpxor %xmm1,%xmm4,%xmm4 1690 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1691 vmovdqu 16-64(%rsi),%xmm6 1692 vpshufb %xmm13,%xmm14,%xmm15 1693 vpxor %xmm2,%xmm5,%xmm5 1694 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1695 vpsrldq $8,%xmm7,%xmm7 1696 subq $0x10,%rcx 1697 jz .Ltail_avx 1698 1699 vpunpckhqdq %xmm15,%xmm15,%xmm8 1700 vpxor %xmm0,%xmm3,%xmm3 1701 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1702 vpxor %xmm15,%xmm8,%xmm8 1703 vmovdqu -48(%rdx),%xmm14 1704 vpxor %xmm1,%xmm4,%xmm4 1705 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1706 vmovdqu 48-64(%rsi),%xmm6 1707 vpshufb %xmm13,%xmm14,%xmm15 1708 vpxor %xmm2,%xmm5,%xmm5 1709 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1710 vmovdqu 80-64(%rsi),%xmm7 1711 subq $0x10,%rcx 1712 jz .Ltail_avx 1713 1714 vpunpckhqdq %xmm15,%xmm15,%xmm8 1715 vpxor %xmm0,%xmm3,%xmm3 1716 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1717 vpxor %xmm15,%xmm8,%xmm8 1718 vmovdqu -64(%rdx),%xmm14 1719 vpxor %xmm1,%xmm4,%xmm4 1720 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1721 vmovdqu 64-64(%rsi),%xmm6 1722 vpshufb %xmm13,%xmm14,%xmm15 1723 vpxor %xmm2,%xmm5,%xmm5 1724 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1725 vpsrldq $8,%xmm7,%xmm7 1726 subq $0x10,%rcx 1727 jz .Ltail_avx 1728 1729 vpunpckhqdq %xmm15,%xmm15,%xmm8 1730 vpxor %xmm0,%xmm3,%xmm3 1731 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1732 vpxor %xmm15,%xmm8,%xmm8 1733 vmovdqu -80(%rdx),%xmm14 1734 vpxor %xmm1,%xmm4,%xmm4 1735 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1736 vmovdqu 96-64(%rsi),%xmm6 1737 vpshufb %xmm13,%xmm14,%xmm15 1738 vpxor %xmm2,%xmm5,%xmm5 1739 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1740 vmovdqu 128-64(%rsi),%xmm7 1741 subq $0x10,%rcx 1742 jz .Ltail_avx 1743 1744 vpunpckhqdq %xmm15,%xmm15,%xmm8 1745 vpxor %xmm0,%xmm3,%xmm3 1746 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1747 vpxor %xmm15,%xmm8,%xmm8 1748 vmovdqu -96(%rdx),%xmm14 1749 vpxor %xmm1,%xmm4,%xmm4 1750 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1751 vmovdqu 112-64(%rsi),%xmm6 1752 vpshufb %xmm13,%xmm14,%xmm15 1753 vpxor %xmm2,%xmm5,%xmm5 1754 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1755 vpsrldq $8,%xmm7,%xmm7 1756 subq $0x10,%rcx 1757 jz .Ltail_avx 1758 1759 vpunpckhqdq %xmm15,%xmm15,%xmm8 1760 vpxor %xmm0,%xmm3,%xmm3 1761 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1762 vpxor %xmm15,%xmm8,%xmm8 1763 vmovdqu -112(%rdx),%xmm14 1764 vpxor %xmm1,%xmm4,%xmm4 1765 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1766 vmovdqu 144-64(%rsi),%xmm6 1767 vpshufb %xmm13,%xmm14,%xmm15 1768 vpxor %xmm2,%xmm5,%xmm5 1769 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1770 vmovq 184-64(%rsi),%xmm7 1771 subq $0x10,%rcx 1772 jmp .Ltail_avx 1773 1774.align 32 1775.Ltail_avx: 1776 vpxor %xmm10,%xmm15,%xmm15 1777.Ltail_no_xor_avx: 1778 vpunpckhqdq %xmm15,%xmm15,%xmm8 1779 vpxor %xmm0,%xmm3,%xmm3 1780 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1781 vpxor %xmm15,%xmm8,%xmm8 1782 vpxor %xmm1,%xmm4,%xmm4 1783 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1784 vpxor %xmm2,%xmm5,%xmm5 1785 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1786 1787 vmovdqu (%r10),%xmm12 1788 1789 vpxor %xmm0,%xmm3,%xmm10 1790 vpxor %xmm1,%xmm4,%xmm11 1791 vpxor %xmm2,%xmm5,%xmm5 1792 1793 vpxor %xmm10,%xmm5,%xmm5 1794 vpxor %xmm11,%xmm5,%xmm5 1795 vpslldq $8,%xmm5,%xmm9 1796 vpsrldq $8,%xmm5,%xmm5 1797 vpxor %xmm9,%xmm10,%xmm10 1798 vpxor %xmm5,%xmm11,%xmm11 1799 1800 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1801 vpalignr $8,%xmm10,%xmm10,%xmm10 1802 vpxor %xmm9,%xmm10,%xmm10 1803 1804 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1805 vpalignr $8,%xmm10,%xmm10,%xmm10 1806 vpxor %xmm11,%xmm10,%xmm10 1807 vpxor %xmm9,%xmm10,%xmm10 1808 1809 cmpq $0,%rcx 1810 jne .Lshort_avx 1811 1812 vpshufb %xmm13,%xmm10,%xmm10 1813 vmovdqu %xmm10,(%rdi) 1814 vzeroupper 1815 .byte 0xf3,0xc3 1816.cfi_endproc 1817.size gcm_ghash_avx,.-gcm_ghash_avx 1818.align 64 1819.Lbswap_mask: 1820.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1821.L0x1c2_polynomial: 1822.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1823.L7_mask: 1824.long 7,0,7,0 1825.L7_mask_poly: 1826.long 7,0,450,0 1827.align 64 1828.type .Lrem_4bit,@object 1829.Lrem_4bit: 1830.long 0,0,0,471859200,0,943718400,0,610271232 1831.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1832.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1833.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1834.type .Lrem_8bit,@object 1835.Lrem_8bit: 1836.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 1837.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 1838.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 1839.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 1840.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 1841.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1842.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1843.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1844.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1845.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1846.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1847.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1848.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1849.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1850.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1851.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1852.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1853.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1854.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1855.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1856.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1857.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1858.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1859.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1860.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1861.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1862.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1863.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1864.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1865.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1866.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1867.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1868 1869.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1870.align 64 1871#endif 1872