1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%ifdef BORINGSSL_PREFIX 10%include "boringssl_prefix_symbols_nasm.inc" 11%endif 12section .text code align=64 13 14EXTERN OPENSSL_ia32cap_P 15 16global gcm_gmult_4bit 17 18ALIGN 16 19gcm_gmult_4bit: 20 mov QWORD[8+rsp],rdi ;WIN64 prologue 21 mov QWORD[16+rsp],rsi 22 mov rax,rsp 23$L$SEH_begin_gcm_gmult_4bit: 24 mov rdi,rcx 25 mov rsi,rdx 26 27 28 29 push rbx 30 31 push rbp 32 33 push r12 34 35 push r13 36 37 push r14 38 39 push r15 40 41 sub rsp,280 42 43$L$gmult_prologue: 44 45 movzx r8,BYTE[15+rdi] 46 lea r11,[$L$rem_4bit] 47 xor rax,rax 48 xor rbx,rbx 49 mov al,r8b 50 mov bl,r8b 51 shl al,4 52 mov rcx,14 53 mov r8,QWORD[8+rax*1+rsi] 54 mov r9,QWORD[rax*1+rsi] 55 and bl,0xf0 56 mov rdx,r8 57 jmp NEAR $L$oop1 58 59ALIGN 16 60$L$oop1: 61 shr r8,4 62 and rdx,0xf 63 mov r10,r9 64 mov al,BYTE[rcx*1+rdi] 65 shr r9,4 66 xor r8,QWORD[8+rbx*1+rsi] 67 shl r10,60 68 xor r9,QWORD[rbx*1+rsi] 69 mov bl,al 70 xor r9,QWORD[rdx*8+r11] 71 mov rdx,r8 72 shl al,4 73 xor r8,r10 74 dec rcx 75 js NEAR $L$break1 76 77 shr r8,4 78 and rdx,0xf 79 mov r10,r9 80 shr r9,4 81 xor r8,QWORD[8+rax*1+rsi] 82 shl r10,60 83 xor r9,QWORD[rax*1+rsi] 84 and bl,0xf0 85 xor r9,QWORD[rdx*8+r11] 86 mov rdx,r8 87 xor r8,r10 88 jmp NEAR $L$oop1 89 90ALIGN 16 91$L$break1: 92 shr r8,4 93 and rdx,0xf 94 mov r10,r9 95 shr r9,4 96 xor r8,QWORD[8+rax*1+rsi] 97 shl r10,60 98 xor r9,QWORD[rax*1+rsi] 99 and bl,0xf0 100 xor r9,QWORD[rdx*8+r11] 101 mov rdx,r8 102 xor r8,r10 103 104 shr r8,4 105 and rdx,0xf 106 mov r10,r9 107 shr r9,4 108 xor r8,QWORD[8+rbx*1+rsi] 109 shl r10,60 110 xor r9,QWORD[rbx*1+rsi] 111 xor r8,r10 112 xor r9,QWORD[rdx*8+r11] 113 114 bswap r8 115 bswap r9 116 mov QWORD[8+rdi],r8 117 mov QWORD[rdi],r9 118 119 lea rsi,[((280+48))+rsp] 120 121 mov rbx,QWORD[((-8))+rsi] 122 123 lea rsp,[rsi] 124 125$L$gmult_epilogue: 126 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 127 mov rsi,QWORD[16+rsp] 128 DB 0F3h,0C3h ;repret 129 130$L$SEH_end_gcm_gmult_4bit: 131global gcm_ghash_4bit 132 133ALIGN 16 134gcm_ghash_4bit: 135 mov QWORD[8+rsp],rdi ;WIN64 prologue 136 mov QWORD[16+rsp],rsi 137 mov rax,rsp 138$L$SEH_begin_gcm_ghash_4bit: 139 mov rdi,rcx 140 mov rsi,rdx 141 mov rdx,r8 142 mov rcx,r9 143 144 145 146 push rbx 147 148 push rbp 149 150 push r12 151 152 push r13 153 154 push r14 155 156 push r15 157 158 sub rsp,280 159 160$L$ghash_prologue: 161 mov r14,rdx 162 mov r15,rcx 163 sub rsi,-128 164 lea rbp,[((16+128))+rsp] 165 xor edx,edx 166 mov r8,QWORD[((0+0-128))+rsi] 167 mov rax,QWORD[((0+8-128))+rsi] 168 mov dl,al 169 shr rax,4 170 mov r10,r8 171 shr r8,4 172 mov r9,QWORD[((16+0-128))+rsi] 173 shl dl,4 174 mov rbx,QWORD[((16+8-128))+rsi] 175 shl r10,60 176 mov BYTE[rsp],dl 177 or rax,r10 178 mov dl,bl 179 shr rbx,4 180 mov r10,r9 181 shr r9,4 182 mov QWORD[rbp],r8 183 mov r8,QWORD[((32+0-128))+rsi] 184 shl dl,4 185 mov QWORD[((0-128))+rbp],rax 186 mov rax,QWORD[((32+8-128))+rsi] 187 shl r10,60 188 mov BYTE[1+rsp],dl 189 or rbx,r10 190 mov dl,al 191 shr rax,4 192 mov r10,r8 193 shr r8,4 194 mov QWORD[8+rbp],r9 195 mov r9,QWORD[((48+0-128))+rsi] 196 shl dl,4 197 mov QWORD[((8-128))+rbp],rbx 198 mov rbx,QWORD[((48+8-128))+rsi] 199 shl r10,60 200 mov BYTE[2+rsp],dl 201 or rax,r10 202 mov dl,bl 203 shr rbx,4 204 mov r10,r9 205 shr r9,4 206 mov QWORD[16+rbp],r8 207 mov r8,QWORD[((64+0-128))+rsi] 208 shl dl,4 209 mov QWORD[((16-128))+rbp],rax 210 mov rax,QWORD[((64+8-128))+rsi] 211 shl r10,60 212 mov BYTE[3+rsp],dl 213 or rbx,r10 214 mov dl,al 215 shr rax,4 216 mov r10,r8 217 shr r8,4 218 mov QWORD[24+rbp],r9 219 mov r9,QWORD[((80+0-128))+rsi] 220 shl dl,4 221 mov QWORD[((24-128))+rbp],rbx 222 mov rbx,QWORD[((80+8-128))+rsi] 223 shl r10,60 224 mov BYTE[4+rsp],dl 225 or rax,r10 226 mov dl,bl 227 shr rbx,4 228 mov r10,r9 229 shr r9,4 230 mov QWORD[32+rbp],r8 231 mov r8,QWORD[((96+0-128))+rsi] 232 shl dl,4 233 mov QWORD[((32-128))+rbp],rax 234 mov rax,QWORD[((96+8-128))+rsi] 235 shl r10,60 236 mov BYTE[5+rsp],dl 237 or rbx,r10 238 mov dl,al 239 shr rax,4 240 mov r10,r8 241 shr r8,4 242 mov QWORD[40+rbp],r9 243 mov r9,QWORD[((112+0-128))+rsi] 244 shl dl,4 245 mov QWORD[((40-128))+rbp],rbx 246 mov rbx,QWORD[((112+8-128))+rsi] 247 shl r10,60 248 mov BYTE[6+rsp],dl 249 or rax,r10 250 mov dl,bl 251 shr rbx,4 252 mov r10,r9 253 shr r9,4 254 mov QWORD[48+rbp],r8 255 mov r8,QWORD[((128+0-128))+rsi] 256 shl dl,4 257 mov QWORD[((48-128))+rbp],rax 258 mov rax,QWORD[((128+8-128))+rsi] 259 shl r10,60 260 mov BYTE[7+rsp],dl 261 or rbx,r10 262 mov dl,al 263 shr rax,4 264 mov r10,r8 265 shr r8,4 266 mov QWORD[56+rbp],r9 267 mov r9,QWORD[((144+0-128))+rsi] 268 shl dl,4 269 mov QWORD[((56-128))+rbp],rbx 270 mov rbx,QWORD[((144+8-128))+rsi] 271 shl r10,60 272 mov BYTE[8+rsp],dl 273 or rax,r10 274 mov dl,bl 275 shr rbx,4 276 mov r10,r9 277 shr r9,4 278 mov QWORD[64+rbp],r8 279 mov r8,QWORD[((160+0-128))+rsi] 280 shl dl,4 281 mov QWORD[((64-128))+rbp],rax 282 mov rax,QWORD[((160+8-128))+rsi] 283 shl r10,60 284 mov BYTE[9+rsp],dl 285 or rbx,r10 286 mov dl,al 287 shr rax,4 288 mov r10,r8 289 shr r8,4 290 mov QWORD[72+rbp],r9 291 mov r9,QWORD[((176+0-128))+rsi] 292 shl dl,4 293 mov QWORD[((72-128))+rbp],rbx 294 mov rbx,QWORD[((176+8-128))+rsi] 295 shl r10,60 296 mov BYTE[10+rsp],dl 297 or rax,r10 298 mov dl,bl 299 shr rbx,4 300 mov r10,r9 301 shr r9,4 302 mov QWORD[80+rbp],r8 303 mov r8,QWORD[((192+0-128))+rsi] 304 shl dl,4 305 mov QWORD[((80-128))+rbp],rax 306 mov rax,QWORD[((192+8-128))+rsi] 307 shl r10,60 308 mov BYTE[11+rsp],dl 309 or rbx,r10 310 mov dl,al 311 shr rax,4 312 mov r10,r8 313 shr r8,4 314 mov QWORD[88+rbp],r9 315 mov r9,QWORD[((208+0-128))+rsi] 316 shl dl,4 317 mov QWORD[((88-128))+rbp],rbx 318 mov rbx,QWORD[((208+8-128))+rsi] 319 shl r10,60 320 mov BYTE[12+rsp],dl 321 or rax,r10 322 mov dl,bl 323 shr rbx,4 324 mov r10,r9 325 shr r9,4 326 mov QWORD[96+rbp],r8 327 mov r8,QWORD[((224+0-128))+rsi] 328 shl dl,4 329 mov QWORD[((96-128))+rbp],rax 330 mov rax,QWORD[((224+8-128))+rsi] 331 shl r10,60 332 mov BYTE[13+rsp],dl 333 or rbx,r10 334 mov dl,al 335 shr rax,4 336 mov r10,r8 337 shr r8,4 338 mov QWORD[104+rbp],r9 339 mov r9,QWORD[((240+0-128))+rsi] 340 shl dl,4 341 mov QWORD[((104-128))+rbp],rbx 342 mov rbx,QWORD[((240+8-128))+rsi] 343 shl r10,60 344 mov BYTE[14+rsp],dl 345 or rax,r10 346 mov dl,bl 347 shr rbx,4 348 mov r10,r9 349 shr r9,4 350 mov QWORD[112+rbp],r8 351 shl dl,4 352 mov QWORD[((112-128))+rbp],rax 353 shl r10,60 354 mov BYTE[15+rsp],dl 355 or rbx,r10 356 mov QWORD[120+rbp],r9 357 mov QWORD[((120-128))+rbp],rbx 358 add rsi,-128 359 mov r8,QWORD[8+rdi] 360 mov r9,QWORD[rdi] 361 add r15,r14 362 lea r11,[$L$rem_8bit] 363 jmp NEAR $L$outer_loop 364ALIGN 16 365$L$outer_loop: 366 xor r9,QWORD[r14] 367 mov rdx,QWORD[8+r14] 368 lea r14,[16+r14] 369 xor rdx,r8 370 mov QWORD[rdi],r9 371 mov QWORD[8+rdi],rdx 372 shr rdx,32 373 xor rax,rax 374 rol edx,8 375 mov al,dl 376 movzx ebx,dl 377 shl al,4 378 shr ebx,4 379 rol edx,8 380 mov r8,QWORD[8+rax*1+rsi] 381 mov r9,QWORD[rax*1+rsi] 382 mov al,dl 383 movzx ecx,dl 384 shl al,4 385 movzx r12,BYTE[rbx*1+rsp] 386 shr ecx,4 387 xor r12,r8 388 mov r10,r9 389 shr r8,8 390 movzx r12,r12b 391 shr r9,8 392 xor r8,QWORD[((-128))+rbx*8+rbp] 393 shl r10,56 394 xor r9,QWORD[rbx*8+rbp] 395 rol edx,8 396 xor r8,QWORD[8+rax*1+rsi] 397 xor r9,QWORD[rax*1+rsi] 398 mov al,dl 399 xor r8,r10 400 movzx r12,WORD[r12*2+r11] 401 movzx ebx,dl 402 shl al,4 403 movzx r13,BYTE[rcx*1+rsp] 404 shr ebx,4 405 shl r12,48 406 xor r13,r8 407 mov r10,r9 408 xor r9,r12 409 shr r8,8 410 movzx r13,r13b 411 shr r9,8 412 xor r8,QWORD[((-128))+rcx*8+rbp] 413 shl r10,56 414 xor r9,QWORD[rcx*8+rbp] 415 rol edx,8 416 xor r8,QWORD[8+rax*1+rsi] 417 xor r9,QWORD[rax*1+rsi] 418 mov al,dl 419 xor r8,r10 420 movzx r13,WORD[r13*2+r11] 421 movzx ecx,dl 422 shl al,4 423 movzx r12,BYTE[rbx*1+rsp] 424 shr ecx,4 425 shl r13,48 426 xor r12,r8 427 mov r10,r9 428 xor r9,r13 429 shr r8,8 430 movzx r12,r12b 431 mov edx,DWORD[8+rdi] 432 shr r9,8 433 xor r8,QWORD[((-128))+rbx*8+rbp] 434 shl r10,56 435 xor r9,QWORD[rbx*8+rbp] 436 rol edx,8 437 xor r8,QWORD[8+rax*1+rsi] 438 xor r9,QWORD[rax*1+rsi] 439 mov al,dl 440 xor r8,r10 441 movzx r12,WORD[r12*2+r11] 442 movzx ebx,dl 443 shl al,4 444 movzx r13,BYTE[rcx*1+rsp] 445 shr ebx,4 446 shl r12,48 447 xor r13,r8 448 mov r10,r9 449 xor r9,r12 450 shr r8,8 451 movzx r13,r13b 452 shr r9,8 453 xor r8,QWORD[((-128))+rcx*8+rbp] 454 shl r10,56 455 xor r9,QWORD[rcx*8+rbp] 456 rol edx,8 457 xor r8,QWORD[8+rax*1+rsi] 458 xor r9,QWORD[rax*1+rsi] 459 mov al,dl 460 xor r8,r10 461 movzx r13,WORD[r13*2+r11] 462 movzx ecx,dl 463 shl al,4 464 movzx r12,BYTE[rbx*1+rsp] 465 shr ecx,4 466 shl r13,48 467 xor r12,r8 468 mov r10,r9 469 xor r9,r13 470 shr r8,8 471 movzx r12,r12b 472 shr r9,8 473 xor r8,QWORD[((-128))+rbx*8+rbp] 474 shl r10,56 475 xor r9,QWORD[rbx*8+rbp] 476 rol edx,8 477 xor r8,QWORD[8+rax*1+rsi] 478 xor r9,QWORD[rax*1+rsi] 479 mov al,dl 480 xor r8,r10 481 movzx r12,WORD[r12*2+r11] 482 movzx ebx,dl 483 shl al,4 484 movzx r13,BYTE[rcx*1+rsp] 485 shr ebx,4 486 shl r12,48 487 xor r13,r8 488 mov r10,r9 489 xor r9,r12 490 shr r8,8 491 movzx r13,r13b 492 shr r9,8 493 xor r8,QWORD[((-128))+rcx*8+rbp] 494 shl r10,56 495 xor r9,QWORD[rcx*8+rbp] 496 rol edx,8 497 xor r8,QWORD[8+rax*1+rsi] 498 xor r9,QWORD[rax*1+rsi] 499 mov al,dl 500 xor r8,r10 501 movzx r13,WORD[r13*2+r11] 502 movzx ecx,dl 503 shl al,4 504 movzx r12,BYTE[rbx*1+rsp] 505 shr ecx,4 506 shl r13,48 507 xor r12,r8 508 mov r10,r9 509 xor r9,r13 510 shr r8,8 511 movzx r12,r12b 512 mov edx,DWORD[4+rdi] 513 shr r9,8 514 xor r8,QWORD[((-128))+rbx*8+rbp] 515 shl r10,56 516 xor r9,QWORD[rbx*8+rbp] 517 rol edx,8 518 xor r8,QWORD[8+rax*1+rsi] 519 xor r9,QWORD[rax*1+rsi] 520 mov al,dl 521 xor r8,r10 522 movzx r12,WORD[r12*2+r11] 523 movzx ebx,dl 524 shl al,4 525 movzx r13,BYTE[rcx*1+rsp] 526 shr ebx,4 527 shl r12,48 528 xor r13,r8 529 mov r10,r9 530 xor r9,r12 531 shr r8,8 532 movzx r13,r13b 533 shr r9,8 534 xor r8,QWORD[((-128))+rcx*8+rbp] 535 shl r10,56 536 xor r9,QWORD[rcx*8+rbp] 537 rol edx,8 538 xor r8,QWORD[8+rax*1+rsi] 539 xor r9,QWORD[rax*1+rsi] 540 mov al,dl 541 xor r8,r10 542 movzx r13,WORD[r13*2+r11] 543 movzx ecx,dl 544 shl al,4 545 movzx r12,BYTE[rbx*1+rsp] 546 shr ecx,4 547 shl r13,48 548 xor r12,r8 549 mov r10,r9 550 xor r9,r13 551 shr r8,8 552 movzx r12,r12b 553 shr r9,8 554 xor r8,QWORD[((-128))+rbx*8+rbp] 555 shl r10,56 556 xor r9,QWORD[rbx*8+rbp] 557 rol edx,8 558 xor r8,QWORD[8+rax*1+rsi] 559 xor r9,QWORD[rax*1+rsi] 560 mov al,dl 561 xor r8,r10 562 movzx r12,WORD[r12*2+r11] 563 movzx ebx,dl 564 shl al,4 565 movzx r13,BYTE[rcx*1+rsp] 566 shr ebx,4 567 shl r12,48 568 xor r13,r8 569 mov r10,r9 570 xor r9,r12 571 shr r8,8 572 movzx r13,r13b 573 shr r9,8 574 xor r8,QWORD[((-128))+rcx*8+rbp] 575 shl r10,56 576 xor r9,QWORD[rcx*8+rbp] 577 rol edx,8 578 xor r8,QWORD[8+rax*1+rsi] 579 xor r9,QWORD[rax*1+rsi] 580 mov al,dl 581 xor r8,r10 582 movzx r13,WORD[r13*2+r11] 583 movzx ecx,dl 584 shl al,4 585 movzx r12,BYTE[rbx*1+rsp] 586 shr ecx,4 587 shl r13,48 588 xor r12,r8 589 mov r10,r9 590 xor r9,r13 591 shr r8,8 592 movzx r12,r12b 593 mov edx,DWORD[rdi] 594 shr r9,8 595 xor r8,QWORD[((-128))+rbx*8+rbp] 596 shl r10,56 597 xor r9,QWORD[rbx*8+rbp] 598 rol edx,8 599 xor r8,QWORD[8+rax*1+rsi] 600 xor r9,QWORD[rax*1+rsi] 601 mov al,dl 602 xor r8,r10 603 movzx r12,WORD[r12*2+r11] 604 movzx ebx,dl 605 shl al,4 606 movzx r13,BYTE[rcx*1+rsp] 607 shr ebx,4 608 shl r12,48 609 xor r13,r8 610 mov r10,r9 611 xor r9,r12 612 shr r8,8 613 movzx r13,r13b 614 shr r9,8 615 xor r8,QWORD[((-128))+rcx*8+rbp] 616 shl r10,56 617 xor r9,QWORD[rcx*8+rbp] 618 rol edx,8 619 xor r8,QWORD[8+rax*1+rsi] 620 xor r9,QWORD[rax*1+rsi] 621 mov al,dl 622 xor r8,r10 623 movzx r13,WORD[r13*2+r11] 624 movzx ecx,dl 625 shl al,4 626 movzx r12,BYTE[rbx*1+rsp] 627 shr ecx,4 628 shl r13,48 629 xor r12,r8 630 mov r10,r9 631 xor r9,r13 632 shr r8,8 633 movzx r12,r12b 634 shr r9,8 635 xor r8,QWORD[((-128))+rbx*8+rbp] 636 shl r10,56 637 xor r9,QWORD[rbx*8+rbp] 638 rol edx,8 639 xor r8,QWORD[8+rax*1+rsi] 640 xor r9,QWORD[rax*1+rsi] 641 mov al,dl 642 xor r8,r10 643 movzx r12,WORD[r12*2+r11] 644 movzx ebx,dl 645 shl al,4 646 movzx r13,BYTE[rcx*1+rsp] 647 shr ebx,4 648 shl r12,48 649 xor r13,r8 650 mov r10,r9 651 xor r9,r12 652 shr r8,8 653 movzx r13,r13b 654 shr r9,8 655 xor r8,QWORD[((-128))+rcx*8+rbp] 656 shl r10,56 657 xor r9,QWORD[rcx*8+rbp] 658 rol edx,8 659 xor r8,QWORD[8+rax*1+rsi] 660 xor r9,QWORD[rax*1+rsi] 661 mov al,dl 662 xor r8,r10 663 movzx r13,WORD[r13*2+r11] 664 movzx ecx,dl 665 shl al,4 666 movzx r12,BYTE[rbx*1+rsp] 667 and ecx,240 668 shl r13,48 669 xor r12,r8 670 mov r10,r9 671 xor r9,r13 672 shr r8,8 673 movzx r12,r12b 674 mov edx,DWORD[((-4))+rdi] 675 shr r9,8 676 xor r8,QWORD[((-128))+rbx*8+rbp] 677 shl r10,56 678 xor r9,QWORD[rbx*8+rbp] 679 movzx r12,WORD[r12*2+r11] 680 xor r8,QWORD[8+rax*1+rsi] 681 xor r9,QWORD[rax*1+rsi] 682 shl r12,48 683 xor r8,r10 684 xor r9,r12 685 movzx r13,r8b 686 shr r8,4 687 mov r10,r9 688 shl r13b,4 689 shr r9,4 690 xor r8,QWORD[8+rcx*1+rsi] 691 movzx r13,WORD[r13*2+r11] 692 shl r10,60 693 xor r9,QWORD[rcx*1+rsi] 694 xor r8,r10 695 shl r13,48 696 bswap r8 697 xor r9,r13 698 bswap r9 699 cmp r14,r15 700 jb NEAR $L$outer_loop 701 mov QWORD[8+rdi],r8 702 mov QWORD[rdi],r9 703 704 lea rsi,[((280+48))+rsp] 705 706 mov r15,QWORD[((-48))+rsi] 707 708 mov r14,QWORD[((-40))+rsi] 709 710 mov r13,QWORD[((-32))+rsi] 711 712 mov r12,QWORD[((-24))+rsi] 713 714 mov rbp,QWORD[((-16))+rsi] 715 716 mov rbx,QWORD[((-8))+rsi] 717 718 lea rsp,[rsi] 719 720$L$ghash_epilogue: 721 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 722 mov rsi,QWORD[16+rsp] 723 DB 0F3h,0C3h ;repret 724 725$L$SEH_end_gcm_ghash_4bit: 726global gcm_init_clmul 727 728ALIGN 16 729gcm_init_clmul: 730 731$L$_init_clmul: 732$L$SEH_begin_gcm_init_clmul: 733 734DB 0x48,0x83,0xec,0x18 735DB 0x0f,0x29,0x34,0x24 736 movdqu xmm2,XMMWORD[rdx] 737 pshufd xmm2,xmm2,78 738 739 740 pshufd xmm4,xmm2,255 741 movdqa xmm3,xmm2 742 psllq xmm2,1 743 pxor xmm5,xmm5 744 psrlq xmm3,63 745 pcmpgtd xmm5,xmm4 746 pslldq xmm3,8 747 por xmm2,xmm3 748 749 750 pand xmm5,XMMWORD[$L$0x1c2_polynomial] 751 pxor xmm2,xmm5 752 753 754 pshufd xmm6,xmm2,78 755 movdqa xmm0,xmm2 756 pxor xmm6,xmm2 757 movdqa xmm1,xmm0 758 pshufd xmm3,xmm0,78 759 pxor xmm3,xmm0 760DB 102,15,58,68,194,0 761DB 102,15,58,68,202,17 762DB 102,15,58,68,222,0 763 pxor xmm3,xmm0 764 pxor xmm3,xmm1 765 766 movdqa xmm4,xmm3 767 psrldq xmm3,8 768 pslldq xmm4,8 769 pxor xmm1,xmm3 770 pxor xmm0,xmm4 771 772 movdqa xmm4,xmm0 773 movdqa xmm3,xmm0 774 psllq xmm0,5 775 pxor xmm3,xmm0 776 psllq xmm0,1 777 pxor xmm0,xmm3 778 psllq xmm0,57 779 movdqa xmm3,xmm0 780 pslldq xmm0,8 781 psrldq xmm3,8 782 pxor xmm0,xmm4 783 pxor xmm1,xmm3 784 785 786 movdqa xmm4,xmm0 787 psrlq xmm0,1 788 pxor xmm1,xmm4 789 pxor xmm4,xmm0 790 psrlq xmm0,5 791 pxor xmm0,xmm4 792 psrlq xmm0,1 793 pxor xmm0,xmm1 794 pshufd xmm3,xmm2,78 795 pshufd xmm4,xmm0,78 796 pxor xmm3,xmm2 797 movdqu XMMWORD[rcx],xmm2 798 pxor xmm4,xmm0 799 movdqu XMMWORD[16+rcx],xmm0 800DB 102,15,58,15,227,8 801 movdqu XMMWORD[32+rcx],xmm4 802 movdqa xmm1,xmm0 803 pshufd xmm3,xmm0,78 804 pxor xmm3,xmm0 805DB 102,15,58,68,194,0 806DB 102,15,58,68,202,17 807DB 102,15,58,68,222,0 808 pxor xmm3,xmm0 809 pxor xmm3,xmm1 810 811 movdqa xmm4,xmm3 812 psrldq xmm3,8 813 pslldq xmm4,8 814 pxor xmm1,xmm3 815 pxor xmm0,xmm4 816 817 movdqa xmm4,xmm0 818 movdqa xmm3,xmm0 819 psllq xmm0,5 820 pxor xmm3,xmm0 821 psllq xmm0,1 822 pxor xmm0,xmm3 823 psllq xmm0,57 824 movdqa xmm3,xmm0 825 pslldq xmm0,8 826 psrldq xmm3,8 827 pxor xmm0,xmm4 828 pxor xmm1,xmm3 829 830 831 movdqa xmm4,xmm0 832 psrlq xmm0,1 833 pxor xmm1,xmm4 834 pxor xmm4,xmm0 835 psrlq xmm0,5 836 pxor xmm0,xmm4 837 psrlq xmm0,1 838 pxor xmm0,xmm1 839 movdqa xmm5,xmm0 840 movdqa xmm1,xmm0 841 pshufd xmm3,xmm0,78 842 pxor xmm3,xmm0 843DB 102,15,58,68,194,0 844DB 102,15,58,68,202,17 845DB 102,15,58,68,222,0 846 pxor xmm3,xmm0 847 pxor xmm3,xmm1 848 849 movdqa xmm4,xmm3 850 psrldq xmm3,8 851 pslldq xmm4,8 852 pxor xmm1,xmm3 853 pxor xmm0,xmm4 854 855 movdqa xmm4,xmm0 856 movdqa xmm3,xmm0 857 psllq xmm0,5 858 pxor xmm3,xmm0 859 psllq xmm0,1 860 pxor xmm0,xmm3 861 psllq xmm0,57 862 movdqa xmm3,xmm0 863 pslldq xmm0,8 864 psrldq xmm3,8 865 pxor xmm0,xmm4 866 pxor xmm1,xmm3 867 868 869 movdqa xmm4,xmm0 870 psrlq xmm0,1 871 pxor xmm1,xmm4 872 pxor xmm4,xmm0 873 psrlq xmm0,5 874 pxor xmm0,xmm4 875 psrlq xmm0,1 876 pxor xmm0,xmm1 877 pshufd xmm3,xmm5,78 878 pshufd xmm4,xmm0,78 879 pxor xmm3,xmm5 880 movdqu XMMWORD[48+rcx],xmm5 881 pxor xmm4,xmm0 882 movdqu XMMWORD[64+rcx],xmm0 883DB 102,15,58,15,227,8 884 movdqu XMMWORD[80+rcx],xmm4 885 movaps xmm6,XMMWORD[rsp] 886 lea rsp,[24+rsp] 887$L$SEH_end_gcm_init_clmul: 888 DB 0F3h,0C3h ;repret 889 890 891global gcm_gmult_clmul 892 893ALIGN 16 894gcm_gmult_clmul: 895 896$L$_gmult_clmul: 897 movdqu xmm0,XMMWORD[rcx] 898 movdqa xmm5,XMMWORD[$L$bswap_mask] 899 movdqu xmm2,XMMWORD[rdx] 900 movdqu xmm4,XMMWORD[32+rdx] 901DB 102,15,56,0,197 902 movdqa xmm1,xmm0 903 pshufd xmm3,xmm0,78 904 pxor xmm3,xmm0 905DB 102,15,58,68,194,0 906DB 102,15,58,68,202,17 907DB 102,15,58,68,220,0 908 pxor xmm3,xmm0 909 pxor xmm3,xmm1 910 911 movdqa xmm4,xmm3 912 psrldq xmm3,8 913 pslldq xmm4,8 914 pxor xmm1,xmm3 915 pxor xmm0,xmm4 916 917 movdqa xmm4,xmm0 918 movdqa xmm3,xmm0 919 psllq xmm0,5 920 pxor xmm3,xmm0 921 psllq xmm0,1 922 pxor xmm0,xmm3 923 psllq xmm0,57 924 movdqa xmm3,xmm0 925 pslldq xmm0,8 926 psrldq xmm3,8 927 pxor xmm0,xmm4 928 pxor xmm1,xmm3 929 930 931 movdqa xmm4,xmm0 932 psrlq xmm0,1 933 pxor xmm1,xmm4 934 pxor xmm4,xmm0 935 psrlq xmm0,5 936 pxor xmm0,xmm4 937 psrlq xmm0,1 938 pxor xmm0,xmm1 939DB 102,15,56,0,197 940 movdqu XMMWORD[rcx],xmm0 941 DB 0F3h,0C3h ;repret 942 943 944global gcm_ghash_clmul 945 946ALIGN 32 947gcm_ghash_clmul: 948 949$L$_ghash_clmul: 950 lea rax,[((-136))+rsp] 951$L$SEH_begin_gcm_ghash_clmul: 952 953DB 0x48,0x8d,0x60,0xe0 954DB 0x0f,0x29,0x70,0xe0 955DB 0x0f,0x29,0x78,0xf0 956DB 0x44,0x0f,0x29,0x00 957DB 0x44,0x0f,0x29,0x48,0x10 958DB 0x44,0x0f,0x29,0x50,0x20 959DB 0x44,0x0f,0x29,0x58,0x30 960DB 0x44,0x0f,0x29,0x60,0x40 961DB 0x44,0x0f,0x29,0x68,0x50 962DB 0x44,0x0f,0x29,0x70,0x60 963DB 0x44,0x0f,0x29,0x78,0x70 964 movdqa xmm10,XMMWORD[$L$bswap_mask] 965 966 movdqu xmm0,XMMWORD[rcx] 967 movdqu xmm2,XMMWORD[rdx] 968 movdqu xmm7,XMMWORD[32+rdx] 969DB 102,65,15,56,0,194 970 971 sub r9,0x10 972 jz NEAR $L$odd_tail 973 974 movdqu xmm6,XMMWORD[16+rdx] 975 lea rax,[OPENSSL_ia32cap_P] 976 mov eax,DWORD[4+rax] 977 cmp r9,0x30 978 jb NEAR $L$skip4x 979 980 and eax,71303168 981 cmp eax,4194304 982 je NEAR $L$skip4x 983 984 sub r9,0x30 985 mov rax,0xA040608020C0E000 986 movdqu xmm14,XMMWORD[48+rdx] 987 movdqu xmm15,XMMWORD[64+rdx] 988 989 990 991 992 movdqu xmm3,XMMWORD[48+r8] 993 movdqu xmm11,XMMWORD[32+r8] 994DB 102,65,15,56,0,218 995DB 102,69,15,56,0,218 996 movdqa xmm5,xmm3 997 pshufd xmm4,xmm3,78 998 pxor xmm4,xmm3 999DB 102,15,58,68,218,0 1000DB 102,15,58,68,234,17 1001DB 102,15,58,68,231,0 1002 1003 movdqa xmm13,xmm11 1004 pshufd xmm12,xmm11,78 1005 pxor xmm12,xmm11 1006DB 102,68,15,58,68,222,0 1007DB 102,68,15,58,68,238,17 1008DB 102,68,15,58,68,231,16 1009 xorps xmm3,xmm11 1010 xorps xmm5,xmm13 1011 movups xmm7,XMMWORD[80+rdx] 1012 xorps xmm4,xmm12 1013 1014 movdqu xmm11,XMMWORD[16+r8] 1015 movdqu xmm8,XMMWORD[r8] 1016DB 102,69,15,56,0,218 1017DB 102,69,15,56,0,194 1018 movdqa xmm13,xmm11 1019 pshufd xmm12,xmm11,78 1020 pxor xmm0,xmm8 1021 pxor xmm12,xmm11 1022DB 102,69,15,58,68,222,0 1023 movdqa xmm1,xmm0 1024 pshufd xmm8,xmm0,78 1025 pxor xmm8,xmm0 1026DB 102,69,15,58,68,238,17 1027DB 102,68,15,58,68,231,0 1028 xorps xmm3,xmm11 1029 xorps xmm5,xmm13 1030 1031 lea r8,[64+r8] 1032 sub r9,0x40 1033 jc NEAR $L$tail4x 1034 1035 jmp NEAR $L$mod4_loop 1036ALIGN 32 1037$L$mod4_loop: 1038DB 102,65,15,58,68,199,0 1039 xorps xmm4,xmm12 1040 movdqu xmm11,XMMWORD[48+r8] 1041DB 102,69,15,56,0,218 1042DB 102,65,15,58,68,207,17 1043 xorps xmm0,xmm3 1044 movdqu xmm3,XMMWORD[32+r8] 1045 movdqa xmm13,xmm11 1046DB 102,68,15,58,68,199,16 1047 pshufd xmm12,xmm11,78 1048 xorps xmm1,xmm5 1049 pxor xmm12,xmm11 1050DB 102,65,15,56,0,218 1051 movups xmm7,XMMWORD[32+rdx] 1052 xorps xmm8,xmm4 1053DB 102,68,15,58,68,218,0 1054 pshufd xmm4,xmm3,78 1055 1056 pxor xmm8,xmm0 1057 movdqa xmm5,xmm3 1058 pxor xmm8,xmm1 1059 pxor xmm4,xmm3 1060 movdqa xmm9,xmm8 1061DB 102,68,15,58,68,234,17 1062 pslldq xmm8,8 1063 psrldq xmm9,8 1064 pxor xmm0,xmm8 1065 movdqa xmm8,XMMWORD[$L$7_mask] 1066 pxor xmm1,xmm9 1067DB 102,76,15,110,200 1068 1069 pand xmm8,xmm0 1070DB 102,69,15,56,0,200 1071 pxor xmm9,xmm0 1072DB 102,68,15,58,68,231,0 1073 psllq xmm9,57 1074 movdqa xmm8,xmm9 1075 pslldq xmm9,8 1076DB 102,15,58,68,222,0 1077 psrldq xmm8,8 1078 pxor xmm0,xmm9 1079 pxor xmm1,xmm8 1080 movdqu xmm8,XMMWORD[r8] 1081 1082 movdqa xmm9,xmm0 1083 psrlq xmm0,1 1084DB 102,15,58,68,238,17 1085 xorps xmm3,xmm11 1086 movdqu xmm11,XMMWORD[16+r8] 1087DB 102,69,15,56,0,218 1088DB 102,15,58,68,231,16 1089 xorps xmm5,xmm13 1090 movups xmm7,XMMWORD[80+rdx] 1091DB 102,69,15,56,0,194 1092 pxor xmm1,xmm9 1093 pxor xmm9,xmm0 1094 psrlq xmm0,5 1095 1096 movdqa xmm13,xmm11 1097 pxor xmm4,xmm12 1098 pshufd xmm12,xmm11,78 1099 pxor xmm0,xmm9 1100 pxor xmm1,xmm8 1101 pxor xmm12,xmm11 1102DB 102,69,15,58,68,222,0 1103 psrlq xmm0,1 1104 pxor xmm0,xmm1 1105 movdqa xmm1,xmm0 1106DB 102,69,15,58,68,238,17 1107 xorps xmm3,xmm11 1108 pshufd xmm8,xmm0,78 1109 pxor xmm8,xmm0 1110 1111DB 102,68,15,58,68,231,0 1112 xorps xmm5,xmm13 1113 1114 lea r8,[64+r8] 1115 sub r9,0x40 1116 jnc NEAR $L$mod4_loop 1117 1118$L$tail4x: 1119DB 102,65,15,58,68,199,0 1120DB 102,65,15,58,68,207,17 1121DB 102,68,15,58,68,199,16 1122 xorps xmm4,xmm12 1123 xorps xmm0,xmm3 1124 xorps xmm1,xmm5 1125 pxor xmm1,xmm0 1126 pxor xmm8,xmm4 1127 1128 pxor xmm8,xmm1 1129 pxor xmm1,xmm0 1130 1131 movdqa xmm9,xmm8 1132 psrldq xmm8,8 1133 pslldq xmm9,8 1134 pxor xmm1,xmm8 1135 pxor xmm0,xmm9 1136 1137 movdqa xmm4,xmm0 1138 movdqa xmm3,xmm0 1139 psllq xmm0,5 1140 pxor xmm3,xmm0 1141 psllq xmm0,1 1142 pxor xmm0,xmm3 1143 psllq xmm0,57 1144 movdqa xmm3,xmm0 1145 pslldq xmm0,8 1146 psrldq xmm3,8 1147 pxor xmm0,xmm4 1148 pxor xmm1,xmm3 1149 1150 1151 movdqa xmm4,xmm0 1152 psrlq xmm0,1 1153 pxor xmm1,xmm4 1154 pxor xmm4,xmm0 1155 psrlq xmm0,5 1156 pxor xmm0,xmm4 1157 psrlq xmm0,1 1158 pxor xmm0,xmm1 1159 add r9,0x40 1160 jz NEAR $L$done 1161 movdqu xmm7,XMMWORD[32+rdx] 1162 sub r9,0x10 1163 jz NEAR $L$odd_tail 1164$L$skip4x: 1165 1166 1167 1168 1169 1170 movdqu xmm8,XMMWORD[r8] 1171 movdqu xmm3,XMMWORD[16+r8] 1172DB 102,69,15,56,0,194 1173DB 102,65,15,56,0,218 1174 pxor xmm0,xmm8 1175 1176 movdqa xmm5,xmm3 1177 pshufd xmm4,xmm3,78 1178 pxor xmm4,xmm3 1179DB 102,15,58,68,218,0 1180DB 102,15,58,68,234,17 1181DB 102,15,58,68,231,0 1182 1183 lea r8,[32+r8] 1184 nop 1185 sub r9,0x20 1186 jbe NEAR $L$even_tail 1187 nop 1188 jmp NEAR $L$mod_loop 1189 1190ALIGN 32 1191$L$mod_loop: 1192 movdqa xmm1,xmm0 1193 movdqa xmm8,xmm4 1194 pshufd xmm4,xmm0,78 1195 pxor xmm4,xmm0 1196 1197DB 102,15,58,68,198,0 1198DB 102,15,58,68,206,17 1199DB 102,15,58,68,231,16 1200 1201 pxor xmm0,xmm3 1202 pxor xmm1,xmm5 1203 movdqu xmm9,XMMWORD[r8] 1204 pxor xmm8,xmm0 1205DB 102,69,15,56,0,202 1206 movdqu xmm3,XMMWORD[16+r8] 1207 1208 pxor xmm8,xmm1 1209 pxor xmm1,xmm9 1210 pxor xmm4,xmm8 1211DB 102,65,15,56,0,218 1212 movdqa xmm8,xmm4 1213 psrldq xmm8,8 1214 pslldq xmm4,8 1215 pxor xmm1,xmm8 1216 pxor xmm0,xmm4 1217 1218 movdqa xmm5,xmm3 1219 1220 movdqa xmm9,xmm0 1221 movdqa xmm8,xmm0 1222 psllq xmm0,5 1223 pxor xmm8,xmm0 1224DB 102,15,58,68,218,0 1225 psllq xmm0,1 1226 pxor xmm0,xmm8 1227 psllq xmm0,57 1228 movdqa xmm8,xmm0 1229 pslldq xmm0,8 1230 psrldq xmm8,8 1231 pxor xmm0,xmm9 1232 pshufd xmm4,xmm5,78 1233 pxor xmm1,xmm8 1234 pxor xmm4,xmm5 1235 1236 movdqa xmm9,xmm0 1237 psrlq xmm0,1 1238DB 102,15,58,68,234,17 1239 pxor xmm1,xmm9 1240 pxor xmm9,xmm0 1241 psrlq xmm0,5 1242 pxor xmm0,xmm9 1243 lea r8,[32+r8] 1244 psrlq xmm0,1 1245DB 102,15,58,68,231,0 1246 pxor xmm0,xmm1 1247 1248 sub r9,0x20 1249 ja NEAR $L$mod_loop 1250 1251$L$even_tail: 1252 movdqa xmm1,xmm0 1253 movdqa xmm8,xmm4 1254 pshufd xmm4,xmm0,78 1255 pxor xmm4,xmm0 1256 1257DB 102,15,58,68,198,0 1258DB 102,15,58,68,206,17 1259DB 102,15,58,68,231,16 1260 1261 pxor xmm0,xmm3 1262 pxor xmm1,xmm5 1263 pxor xmm8,xmm0 1264 pxor xmm8,xmm1 1265 pxor xmm4,xmm8 1266 movdqa xmm8,xmm4 1267 psrldq xmm8,8 1268 pslldq xmm4,8 1269 pxor xmm1,xmm8 1270 pxor xmm0,xmm4 1271 1272 movdqa xmm4,xmm0 1273 movdqa xmm3,xmm0 1274 psllq xmm0,5 1275 pxor xmm3,xmm0 1276 psllq xmm0,1 1277 pxor xmm0,xmm3 1278 psllq xmm0,57 1279 movdqa xmm3,xmm0 1280 pslldq xmm0,8 1281 psrldq xmm3,8 1282 pxor xmm0,xmm4 1283 pxor xmm1,xmm3 1284 1285 1286 movdqa xmm4,xmm0 1287 psrlq xmm0,1 1288 pxor xmm1,xmm4 1289 pxor xmm4,xmm0 1290 psrlq xmm0,5 1291 pxor xmm0,xmm4 1292 psrlq xmm0,1 1293 pxor xmm0,xmm1 1294 test r9,r9 1295 jnz NEAR $L$done 1296 1297$L$odd_tail: 1298 movdqu xmm8,XMMWORD[r8] 1299DB 102,69,15,56,0,194 1300 pxor xmm0,xmm8 1301 movdqa xmm1,xmm0 1302 pshufd xmm3,xmm0,78 1303 pxor xmm3,xmm0 1304DB 102,15,58,68,194,0 1305DB 102,15,58,68,202,17 1306DB 102,15,58,68,223,0 1307 pxor xmm3,xmm0 1308 pxor xmm3,xmm1 1309 1310 movdqa xmm4,xmm3 1311 psrldq xmm3,8 1312 pslldq xmm4,8 1313 pxor xmm1,xmm3 1314 pxor xmm0,xmm4 1315 1316 movdqa xmm4,xmm0 1317 movdqa xmm3,xmm0 1318 psllq xmm0,5 1319 pxor xmm3,xmm0 1320 psllq xmm0,1 1321 pxor xmm0,xmm3 1322 psllq xmm0,57 1323 movdqa xmm3,xmm0 1324 pslldq xmm0,8 1325 psrldq xmm3,8 1326 pxor xmm0,xmm4 1327 pxor xmm1,xmm3 1328 1329 1330 movdqa xmm4,xmm0 1331 psrlq xmm0,1 1332 pxor xmm1,xmm4 1333 pxor xmm4,xmm0 1334 psrlq xmm0,5 1335 pxor xmm0,xmm4 1336 psrlq xmm0,1 1337 pxor xmm0,xmm1 1338$L$done: 1339DB 102,65,15,56,0,194 1340 movdqu XMMWORD[rcx],xmm0 1341 movaps xmm6,XMMWORD[rsp] 1342 movaps xmm7,XMMWORD[16+rsp] 1343 movaps xmm8,XMMWORD[32+rsp] 1344 movaps xmm9,XMMWORD[48+rsp] 1345 movaps xmm10,XMMWORD[64+rsp] 1346 movaps xmm11,XMMWORD[80+rsp] 1347 movaps xmm12,XMMWORD[96+rsp] 1348 movaps xmm13,XMMWORD[112+rsp] 1349 movaps xmm14,XMMWORD[128+rsp] 1350 movaps xmm15,XMMWORD[144+rsp] 1351 lea rsp,[168+rsp] 1352$L$SEH_end_gcm_ghash_clmul: 1353 DB 0F3h,0C3h ;repret 1354 1355 1356global gcm_init_avx 1357 1358ALIGN 32 1359gcm_init_avx: 1360 1361$L$SEH_begin_gcm_init_avx: 1362 1363DB 0x48,0x83,0xec,0x18 1364DB 0x0f,0x29,0x34,0x24 1365 vzeroupper 1366 1367 vmovdqu xmm2,XMMWORD[rdx] 1368 vpshufd xmm2,xmm2,78 1369 1370 1371 vpshufd xmm4,xmm2,255 1372 vpsrlq xmm3,xmm2,63 1373 vpsllq xmm2,xmm2,1 1374 vpxor xmm5,xmm5,xmm5 1375 vpcmpgtd xmm5,xmm5,xmm4 1376 vpslldq xmm3,xmm3,8 1377 vpor xmm2,xmm2,xmm3 1378 1379 1380 vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial] 1381 vpxor xmm2,xmm2,xmm5 1382 1383 vpunpckhqdq xmm6,xmm2,xmm2 1384 vmovdqa xmm0,xmm2 1385 vpxor xmm6,xmm6,xmm2 1386 mov r10,4 1387 jmp NEAR $L$init_start_avx 1388ALIGN 32 1389$L$init_loop_avx: 1390 vpalignr xmm5,xmm4,xmm3,8 1391 vmovdqu XMMWORD[(-16)+rcx],xmm5 1392 vpunpckhqdq xmm3,xmm0,xmm0 1393 vpxor xmm3,xmm3,xmm0 1394 vpclmulqdq xmm1,xmm0,xmm2,0x11 1395 vpclmulqdq xmm0,xmm0,xmm2,0x00 1396 vpclmulqdq xmm3,xmm3,xmm6,0x00 1397 vpxor xmm4,xmm1,xmm0 1398 vpxor xmm3,xmm3,xmm4 1399 1400 vpslldq xmm4,xmm3,8 1401 vpsrldq xmm3,xmm3,8 1402 vpxor xmm0,xmm0,xmm4 1403 vpxor xmm1,xmm1,xmm3 1404 vpsllq xmm3,xmm0,57 1405 vpsllq xmm4,xmm0,62 1406 vpxor xmm4,xmm4,xmm3 1407 vpsllq xmm3,xmm0,63 1408 vpxor xmm4,xmm4,xmm3 1409 vpslldq xmm3,xmm4,8 1410 vpsrldq xmm4,xmm4,8 1411 vpxor xmm0,xmm0,xmm3 1412 vpxor xmm1,xmm1,xmm4 1413 1414 vpsrlq xmm4,xmm0,1 1415 vpxor xmm1,xmm1,xmm0 1416 vpxor xmm0,xmm0,xmm4 1417 vpsrlq xmm4,xmm4,5 1418 vpxor xmm0,xmm0,xmm4 1419 vpsrlq xmm0,xmm0,1 1420 vpxor xmm0,xmm0,xmm1 1421$L$init_start_avx: 1422 vmovdqa xmm5,xmm0 1423 vpunpckhqdq xmm3,xmm0,xmm0 1424 vpxor xmm3,xmm3,xmm0 1425 vpclmulqdq xmm1,xmm0,xmm2,0x11 1426 vpclmulqdq xmm0,xmm0,xmm2,0x00 1427 vpclmulqdq xmm3,xmm3,xmm6,0x00 1428 vpxor xmm4,xmm1,xmm0 1429 vpxor xmm3,xmm3,xmm4 1430 1431 vpslldq xmm4,xmm3,8 1432 vpsrldq xmm3,xmm3,8 1433 vpxor xmm0,xmm0,xmm4 1434 vpxor xmm1,xmm1,xmm3 1435 vpsllq xmm3,xmm0,57 1436 vpsllq xmm4,xmm0,62 1437 vpxor xmm4,xmm4,xmm3 1438 vpsllq xmm3,xmm0,63 1439 vpxor xmm4,xmm4,xmm3 1440 vpslldq xmm3,xmm4,8 1441 vpsrldq xmm4,xmm4,8 1442 vpxor xmm0,xmm0,xmm3 1443 vpxor xmm1,xmm1,xmm4 1444 1445 vpsrlq xmm4,xmm0,1 1446 vpxor xmm1,xmm1,xmm0 1447 vpxor xmm0,xmm0,xmm4 1448 vpsrlq xmm4,xmm4,5 1449 vpxor xmm0,xmm0,xmm4 1450 vpsrlq xmm0,xmm0,1 1451 vpxor xmm0,xmm0,xmm1 1452 vpshufd xmm3,xmm5,78 1453 vpshufd xmm4,xmm0,78 1454 vpxor xmm3,xmm3,xmm5 1455 vmovdqu XMMWORD[rcx],xmm5 1456 vpxor xmm4,xmm4,xmm0 1457 vmovdqu XMMWORD[16+rcx],xmm0 1458 lea rcx,[48+rcx] 1459 sub r10,1 1460 jnz NEAR $L$init_loop_avx 1461 1462 vpalignr xmm5,xmm3,xmm4,8 1463 vmovdqu XMMWORD[(-16)+rcx],xmm5 1464 1465 vzeroupper 1466 movaps xmm6,XMMWORD[rsp] 1467 lea rsp,[24+rsp] 1468$L$SEH_end_gcm_init_avx: 1469 DB 0F3h,0C3h ;repret 1470 1471 1472global gcm_gmult_avx 1473 1474ALIGN 32 1475gcm_gmult_avx: 1476 1477 jmp NEAR $L$_gmult_clmul 1478 1479 1480global gcm_ghash_avx 1481 1482ALIGN 32 1483gcm_ghash_avx: 1484 1485 lea rax,[((-136))+rsp] 1486$L$SEH_begin_gcm_ghash_avx: 1487 1488DB 0x48,0x8d,0x60,0xe0 1489DB 0x0f,0x29,0x70,0xe0 1490DB 0x0f,0x29,0x78,0xf0 1491DB 0x44,0x0f,0x29,0x00 1492DB 0x44,0x0f,0x29,0x48,0x10 1493DB 0x44,0x0f,0x29,0x50,0x20 1494DB 0x44,0x0f,0x29,0x58,0x30 1495DB 0x44,0x0f,0x29,0x60,0x40 1496DB 0x44,0x0f,0x29,0x68,0x50 1497DB 0x44,0x0f,0x29,0x70,0x60 1498DB 0x44,0x0f,0x29,0x78,0x70 1499 vzeroupper 1500 1501 vmovdqu xmm10,XMMWORD[rcx] 1502 lea r10,[$L$0x1c2_polynomial] 1503 lea rdx,[64+rdx] 1504 vmovdqu xmm13,XMMWORD[$L$bswap_mask] 1505 vpshufb xmm10,xmm10,xmm13 1506 cmp r9,0x80 1507 jb NEAR $L$short_avx 1508 sub r9,0x80 1509 1510 vmovdqu xmm14,XMMWORD[112+r8] 1511 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1512 vpshufb xmm14,xmm14,xmm13 1513 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1514 1515 vpunpckhqdq xmm9,xmm14,xmm14 1516 vmovdqu xmm15,XMMWORD[96+r8] 1517 vpclmulqdq xmm0,xmm14,xmm6,0x00 1518 vpxor xmm9,xmm9,xmm14 1519 vpshufb xmm15,xmm15,xmm13 1520 vpclmulqdq xmm1,xmm14,xmm6,0x11 1521 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1522 vpunpckhqdq xmm8,xmm15,xmm15 1523 vmovdqu xmm14,XMMWORD[80+r8] 1524 vpclmulqdq xmm2,xmm9,xmm7,0x00 1525 vpxor xmm8,xmm8,xmm15 1526 1527 vpshufb xmm14,xmm14,xmm13 1528 vpclmulqdq xmm3,xmm15,xmm6,0x00 1529 vpunpckhqdq xmm9,xmm14,xmm14 1530 vpclmulqdq xmm4,xmm15,xmm6,0x11 1531 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1532 vpxor xmm9,xmm9,xmm14 1533 vmovdqu xmm15,XMMWORD[64+r8] 1534 vpclmulqdq xmm5,xmm8,xmm7,0x10 1535 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1536 1537 vpshufb xmm15,xmm15,xmm13 1538 vpxor xmm3,xmm3,xmm0 1539 vpclmulqdq xmm0,xmm14,xmm6,0x00 1540 vpxor xmm4,xmm4,xmm1 1541 vpunpckhqdq xmm8,xmm15,xmm15 1542 vpclmulqdq xmm1,xmm14,xmm6,0x11 1543 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1544 vpxor xmm5,xmm5,xmm2 1545 vpclmulqdq xmm2,xmm9,xmm7,0x00 1546 vpxor xmm8,xmm8,xmm15 1547 1548 vmovdqu xmm14,XMMWORD[48+r8] 1549 vpxor xmm0,xmm0,xmm3 1550 vpclmulqdq xmm3,xmm15,xmm6,0x00 1551 vpxor xmm1,xmm1,xmm4 1552 vpshufb xmm14,xmm14,xmm13 1553 vpclmulqdq xmm4,xmm15,xmm6,0x11 1554 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1555 vpxor xmm2,xmm2,xmm5 1556 vpunpckhqdq xmm9,xmm14,xmm14 1557 vpclmulqdq xmm5,xmm8,xmm7,0x10 1558 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1559 vpxor xmm9,xmm9,xmm14 1560 1561 vmovdqu xmm15,XMMWORD[32+r8] 1562 vpxor xmm3,xmm3,xmm0 1563 vpclmulqdq xmm0,xmm14,xmm6,0x00 1564 vpxor xmm4,xmm4,xmm1 1565 vpshufb xmm15,xmm15,xmm13 1566 vpclmulqdq xmm1,xmm14,xmm6,0x11 1567 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1568 vpxor xmm5,xmm5,xmm2 1569 vpunpckhqdq xmm8,xmm15,xmm15 1570 vpclmulqdq xmm2,xmm9,xmm7,0x00 1571 vpxor xmm8,xmm8,xmm15 1572 1573 vmovdqu xmm14,XMMWORD[16+r8] 1574 vpxor xmm0,xmm0,xmm3 1575 vpclmulqdq xmm3,xmm15,xmm6,0x00 1576 vpxor xmm1,xmm1,xmm4 1577 vpshufb xmm14,xmm14,xmm13 1578 vpclmulqdq xmm4,xmm15,xmm6,0x11 1579 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1580 vpxor xmm2,xmm2,xmm5 1581 vpunpckhqdq xmm9,xmm14,xmm14 1582 vpclmulqdq xmm5,xmm8,xmm7,0x10 1583 vmovdqu xmm7,XMMWORD[((176-64))+rdx] 1584 vpxor xmm9,xmm9,xmm14 1585 1586 vmovdqu xmm15,XMMWORD[r8] 1587 vpxor xmm3,xmm3,xmm0 1588 vpclmulqdq xmm0,xmm14,xmm6,0x00 1589 vpxor xmm4,xmm4,xmm1 1590 vpshufb xmm15,xmm15,xmm13 1591 vpclmulqdq xmm1,xmm14,xmm6,0x11 1592 vmovdqu xmm6,XMMWORD[((160-64))+rdx] 1593 vpxor xmm5,xmm5,xmm2 1594 vpclmulqdq xmm2,xmm9,xmm7,0x10 1595 1596 lea r8,[128+r8] 1597 cmp r9,0x80 1598 jb NEAR $L$tail_avx 1599 1600 vpxor xmm15,xmm15,xmm10 1601 sub r9,0x80 1602 jmp NEAR $L$oop8x_avx 1603 1604ALIGN 32 1605$L$oop8x_avx: 1606 vpunpckhqdq xmm8,xmm15,xmm15 1607 vmovdqu xmm14,XMMWORD[112+r8] 1608 vpxor xmm3,xmm3,xmm0 1609 vpxor xmm8,xmm8,xmm15 1610 vpclmulqdq xmm10,xmm15,xmm6,0x00 1611 vpshufb xmm14,xmm14,xmm13 1612 vpxor xmm4,xmm4,xmm1 1613 vpclmulqdq xmm11,xmm15,xmm6,0x11 1614 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1615 vpunpckhqdq xmm9,xmm14,xmm14 1616 vpxor xmm5,xmm5,xmm2 1617 vpclmulqdq xmm12,xmm8,xmm7,0x00 1618 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1619 vpxor xmm9,xmm9,xmm14 1620 1621 vmovdqu xmm15,XMMWORD[96+r8] 1622 vpclmulqdq xmm0,xmm14,xmm6,0x00 1623 vpxor xmm10,xmm10,xmm3 1624 vpshufb xmm15,xmm15,xmm13 1625 vpclmulqdq xmm1,xmm14,xmm6,0x11 1626 vxorps xmm11,xmm11,xmm4 1627 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1628 vpunpckhqdq xmm8,xmm15,xmm15 1629 vpclmulqdq xmm2,xmm9,xmm7,0x00 1630 vpxor xmm12,xmm12,xmm5 1631 vxorps xmm8,xmm8,xmm15 1632 1633 vmovdqu xmm14,XMMWORD[80+r8] 1634 vpxor xmm12,xmm12,xmm10 1635 vpclmulqdq xmm3,xmm15,xmm6,0x00 1636 vpxor xmm12,xmm12,xmm11 1637 vpslldq xmm9,xmm12,8 1638 vpxor xmm3,xmm3,xmm0 1639 vpclmulqdq xmm4,xmm15,xmm6,0x11 1640 vpsrldq xmm12,xmm12,8 1641 vpxor xmm10,xmm10,xmm9 1642 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1643 vpshufb xmm14,xmm14,xmm13 1644 vxorps xmm11,xmm11,xmm12 1645 vpxor xmm4,xmm4,xmm1 1646 vpunpckhqdq xmm9,xmm14,xmm14 1647 vpclmulqdq xmm5,xmm8,xmm7,0x10 1648 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1649 vpxor xmm9,xmm9,xmm14 1650 vpxor xmm5,xmm5,xmm2 1651 1652 vmovdqu xmm15,XMMWORD[64+r8] 1653 vpalignr xmm12,xmm10,xmm10,8 1654 vpclmulqdq xmm0,xmm14,xmm6,0x00 1655 vpshufb xmm15,xmm15,xmm13 1656 vpxor xmm0,xmm0,xmm3 1657 vpclmulqdq xmm1,xmm14,xmm6,0x11 1658 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1659 vpunpckhqdq xmm8,xmm15,xmm15 1660 vpxor xmm1,xmm1,xmm4 1661 vpclmulqdq xmm2,xmm9,xmm7,0x00 1662 vxorps xmm8,xmm8,xmm15 1663 vpxor xmm2,xmm2,xmm5 1664 1665 vmovdqu xmm14,XMMWORD[48+r8] 1666 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 1667 vpclmulqdq xmm3,xmm15,xmm6,0x00 1668 vpshufb xmm14,xmm14,xmm13 1669 vpxor xmm3,xmm3,xmm0 1670 vpclmulqdq xmm4,xmm15,xmm6,0x11 1671 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1672 vpunpckhqdq xmm9,xmm14,xmm14 1673 vpxor xmm4,xmm4,xmm1 1674 vpclmulqdq xmm5,xmm8,xmm7,0x10 1675 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1676 vpxor xmm9,xmm9,xmm14 1677 vpxor xmm5,xmm5,xmm2 1678 1679 vmovdqu xmm15,XMMWORD[32+r8] 1680 vpclmulqdq xmm0,xmm14,xmm6,0x00 1681 vpshufb xmm15,xmm15,xmm13 1682 vpxor xmm0,xmm0,xmm3 1683 vpclmulqdq xmm1,xmm14,xmm6,0x11 1684 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1685 vpunpckhqdq xmm8,xmm15,xmm15 1686 vpxor xmm1,xmm1,xmm4 1687 vpclmulqdq xmm2,xmm9,xmm7,0x00 1688 vpxor xmm8,xmm8,xmm15 1689 vpxor xmm2,xmm2,xmm5 1690 vxorps xmm10,xmm10,xmm12 1691 1692 vmovdqu xmm14,XMMWORD[16+r8] 1693 vpalignr xmm12,xmm10,xmm10,8 1694 vpclmulqdq xmm3,xmm15,xmm6,0x00 1695 vpshufb xmm14,xmm14,xmm13 1696 vpxor xmm3,xmm3,xmm0 1697 vpclmulqdq xmm4,xmm15,xmm6,0x11 1698 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1699 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 1700 vxorps xmm12,xmm12,xmm11 1701 vpunpckhqdq xmm9,xmm14,xmm14 1702 vpxor xmm4,xmm4,xmm1 1703 vpclmulqdq xmm5,xmm8,xmm7,0x10 1704 vmovdqu xmm7,XMMWORD[((176-64))+rdx] 1705 vpxor xmm9,xmm9,xmm14 1706 vpxor xmm5,xmm5,xmm2 1707 1708 vmovdqu xmm15,XMMWORD[r8] 1709 vpclmulqdq xmm0,xmm14,xmm6,0x00 1710 vpshufb xmm15,xmm15,xmm13 1711 vpclmulqdq xmm1,xmm14,xmm6,0x11 1712 vmovdqu xmm6,XMMWORD[((160-64))+rdx] 1713 vpxor xmm15,xmm15,xmm12 1714 vpclmulqdq xmm2,xmm9,xmm7,0x10 1715 vpxor xmm15,xmm15,xmm10 1716 1717 lea r8,[128+r8] 1718 sub r9,0x80 1719 jnc NEAR $L$oop8x_avx 1720 1721 add r9,0x80 1722 jmp NEAR $L$tail_no_xor_avx 1723 1724ALIGN 32 1725$L$short_avx: 1726 vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8] 1727 lea r8,[r9*1+r8] 1728 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1729 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1730 vpshufb xmm15,xmm14,xmm13 1731 1732 vmovdqa xmm3,xmm0 1733 vmovdqa xmm4,xmm1 1734 vmovdqa xmm5,xmm2 1735 sub r9,0x10 1736 jz NEAR $L$tail_avx 1737 1738 vpunpckhqdq xmm8,xmm15,xmm15 1739 vpxor xmm3,xmm3,xmm0 1740 vpclmulqdq xmm0,xmm15,xmm6,0x00 1741 vpxor xmm8,xmm8,xmm15 1742 vmovdqu xmm14,XMMWORD[((-32))+r8] 1743 vpxor xmm4,xmm4,xmm1 1744 vpclmulqdq xmm1,xmm15,xmm6,0x11 1745 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1746 vpshufb xmm15,xmm14,xmm13 1747 vpxor xmm5,xmm5,xmm2 1748 vpclmulqdq xmm2,xmm8,xmm7,0x00 1749 vpsrldq xmm7,xmm7,8 1750 sub r9,0x10 1751 jz NEAR $L$tail_avx 1752 1753 vpunpckhqdq xmm8,xmm15,xmm15 1754 vpxor xmm3,xmm3,xmm0 1755 vpclmulqdq xmm0,xmm15,xmm6,0x00 1756 vpxor xmm8,xmm8,xmm15 1757 vmovdqu xmm14,XMMWORD[((-48))+r8] 1758 vpxor xmm4,xmm4,xmm1 1759 vpclmulqdq xmm1,xmm15,xmm6,0x11 1760 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1761 vpshufb xmm15,xmm14,xmm13 1762 vpxor xmm5,xmm5,xmm2 1763 vpclmulqdq xmm2,xmm8,xmm7,0x00 1764 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1765 sub r9,0x10 1766 jz NEAR $L$tail_avx 1767 1768 vpunpckhqdq xmm8,xmm15,xmm15 1769 vpxor xmm3,xmm3,xmm0 1770 vpclmulqdq xmm0,xmm15,xmm6,0x00 1771 vpxor xmm8,xmm8,xmm15 1772 vmovdqu xmm14,XMMWORD[((-64))+r8] 1773 vpxor xmm4,xmm4,xmm1 1774 vpclmulqdq xmm1,xmm15,xmm6,0x11 1775 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1776 vpshufb xmm15,xmm14,xmm13 1777 vpxor xmm5,xmm5,xmm2 1778 vpclmulqdq xmm2,xmm8,xmm7,0x00 1779 vpsrldq xmm7,xmm7,8 1780 sub r9,0x10 1781 jz NEAR $L$tail_avx 1782 1783 vpunpckhqdq xmm8,xmm15,xmm15 1784 vpxor xmm3,xmm3,xmm0 1785 vpclmulqdq xmm0,xmm15,xmm6,0x00 1786 vpxor xmm8,xmm8,xmm15 1787 vmovdqu xmm14,XMMWORD[((-80))+r8] 1788 vpxor xmm4,xmm4,xmm1 1789 vpclmulqdq xmm1,xmm15,xmm6,0x11 1790 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1791 vpshufb xmm15,xmm14,xmm13 1792 vpxor xmm5,xmm5,xmm2 1793 vpclmulqdq xmm2,xmm8,xmm7,0x00 1794 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1795 sub r9,0x10 1796 jz NEAR $L$tail_avx 1797 1798 vpunpckhqdq xmm8,xmm15,xmm15 1799 vpxor xmm3,xmm3,xmm0 1800 vpclmulqdq xmm0,xmm15,xmm6,0x00 1801 vpxor xmm8,xmm8,xmm15 1802 vmovdqu xmm14,XMMWORD[((-96))+r8] 1803 vpxor xmm4,xmm4,xmm1 1804 vpclmulqdq xmm1,xmm15,xmm6,0x11 1805 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1806 vpshufb xmm15,xmm14,xmm13 1807 vpxor xmm5,xmm5,xmm2 1808 vpclmulqdq xmm2,xmm8,xmm7,0x00 1809 vpsrldq xmm7,xmm7,8 1810 sub r9,0x10 1811 jz NEAR $L$tail_avx 1812 1813 vpunpckhqdq xmm8,xmm15,xmm15 1814 vpxor xmm3,xmm3,xmm0 1815 vpclmulqdq xmm0,xmm15,xmm6,0x00 1816 vpxor xmm8,xmm8,xmm15 1817 vmovdqu xmm14,XMMWORD[((-112))+r8] 1818 vpxor xmm4,xmm4,xmm1 1819 vpclmulqdq xmm1,xmm15,xmm6,0x11 1820 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1821 vpshufb xmm15,xmm14,xmm13 1822 vpxor xmm5,xmm5,xmm2 1823 vpclmulqdq xmm2,xmm8,xmm7,0x00 1824 vmovq xmm7,QWORD[((184-64))+rdx] 1825 sub r9,0x10 1826 jmp NEAR $L$tail_avx 1827 1828ALIGN 32 1829$L$tail_avx: 1830 vpxor xmm15,xmm15,xmm10 1831$L$tail_no_xor_avx: 1832 vpunpckhqdq xmm8,xmm15,xmm15 1833 vpxor xmm3,xmm3,xmm0 1834 vpclmulqdq xmm0,xmm15,xmm6,0x00 1835 vpxor xmm8,xmm8,xmm15 1836 vpxor xmm4,xmm4,xmm1 1837 vpclmulqdq xmm1,xmm15,xmm6,0x11 1838 vpxor xmm5,xmm5,xmm2 1839 vpclmulqdq xmm2,xmm8,xmm7,0x00 1840 1841 vmovdqu xmm12,XMMWORD[r10] 1842 1843 vpxor xmm10,xmm3,xmm0 1844 vpxor xmm11,xmm4,xmm1 1845 vpxor xmm5,xmm5,xmm2 1846 1847 vpxor xmm5,xmm5,xmm10 1848 vpxor xmm5,xmm5,xmm11 1849 vpslldq xmm9,xmm5,8 1850 vpsrldq xmm5,xmm5,8 1851 vpxor xmm10,xmm10,xmm9 1852 vpxor xmm11,xmm11,xmm5 1853 1854 vpclmulqdq xmm9,xmm10,xmm12,0x10 1855 vpalignr xmm10,xmm10,xmm10,8 1856 vpxor xmm10,xmm10,xmm9 1857 1858 vpclmulqdq xmm9,xmm10,xmm12,0x10 1859 vpalignr xmm10,xmm10,xmm10,8 1860 vpxor xmm10,xmm10,xmm11 1861 vpxor xmm10,xmm10,xmm9 1862 1863 cmp r9,0 1864 jne NEAR $L$short_avx 1865 1866 vpshufb xmm10,xmm10,xmm13 1867 vmovdqu XMMWORD[rcx],xmm10 1868 vzeroupper 1869 movaps xmm6,XMMWORD[rsp] 1870 movaps xmm7,XMMWORD[16+rsp] 1871 movaps xmm8,XMMWORD[32+rsp] 1872 movaps xmm9,XMMWORD[48+rsp] 1873 movaps xmm10,XMMWORD[64+rsp] 1874 movaps xmm11,XMMWORD[80+rsp] 1875 movaps xmm12,XMMWORD[96+rsp] 1876 movaps xmm13,XMMWORD[112+rsp] 1877 movaps xmm14,XMMWORD[128+rsp] 1878 movaps xmm15,XMMWORD[144+rsp] 1879 lea rsp,[168+rsp] 1880$L$SEH_end_gcm_ghash_avx: 1881 DB 0F3h,0C3h ;repret 1882 1883 1884ALIGN 64 1885$L$bswap_mask: 1886DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1887$L$0x1c2_polynomial: 1888DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1889$L$7_mask: 1890 DD 7,0,7,0 1891$L$7_mask_poly: 1892 DD 7,0,450,0 1893ALIGN 64 1894 1895$L$rem_4bit: 1896 DD 0,0,0,471859200,0,943718400,0,610271232 1897 DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1898 DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1899 DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1900 1901$L$rem_8bit: 1902 DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 1903 DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 1904 DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 1905 DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 1906 DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 1907 DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1908 DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1909 DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1910 DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1911 DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1912 DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1913 DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1914 DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1915 DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1916 DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1917 DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1918 DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1919 DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1920 DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1921 DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1922 DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1923 DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1924 DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1925 DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1926 DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1927 DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1928 DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1929 DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1930 DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1931 DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1932 DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1933 DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1934 1935DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 1936DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1937DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1938DB 114,103,62,0 1939ALIGN 64 1940EXTERN __imp_RtlVirtualUnwind 1941 1942ALIGN 16 1943se_handler: 1944 push rsi 1945 push rdi 1946 push rbx 1947 push rbp 1948 push r12 1949 push r13 1950 push r14 1951 push r15 1952 pushfq 1953 sub rsp,64 1954 1955 mov rax,QWORD[120+r8] 1956 mov rbx,QWORD[248+r8] 1957 1958 mov rsi,QWORD[8+r9] 1959 mov r11,QWORD[56+r9] 1960 1961 mov r10d,DWORD[r11] 1962 lea r10,[r10*1+rsi] 1963 cmp rbx,r10 1964 jb NEAR $L$in_prologue 1965 1966 mov rax,QWORD[152+r8] 1967 1968 mov r10d,DWORD[4+r11] 1969 lea r10,[r10*1+rsi] 1970 cmp rbx,r10 1971 jae NEAR $L$in_prologue 1972 1973 lea rax,[((48+280))+rax] 1974 1975 mov rbx,QWORD[((-8))+rax] 1976 mov rbp,QWORD[((-16))+rax] 1977 mov r12,QWORD[((-24))+rax] 1978 mov r13,QWORD[((-32))+rax] 1979 mov r14,QWORD[((-40))+rax] 1980 mov r15,QWORD[((-48))+rax] 1981 mov QWORD[144+r8],rbx 1982 mov QWORD[160+r8],rbp 1983 mov QWORD[216+r8],r12 1984 mov QWORD[224+r8],r13 1985 mov QWORD[232+r8],r14 1986 mov QWORD[240+r8],r15 1987 1988$L$in_prologue: 1989 mov rdi,QWORD[8+rax] 1990 mov rsi,QWORD[16+rax] 1991 mov QWORD[152+r8],rax 1992 mov QWORD[168+r8],rsi 1993 mov QWORD[176+r8],rdi 1994 1995 mov rdi,QWORD[40+r9] 1996 mov rsi,r8 1997 mov ecx,154 1998 DD 0xa548f3fc 1999 2000 mov rsi,r9 2001 xor rcx,rcx 2002 mov rdx,QWORD[8+rsi] 2003 mov r8,QWORD[rsi] 2004 mov r9,QWORD[16+rsi] 2005 mov r10,QWORD[40+rsi] 2006 lea r11,[56+rsi] 2007 lea r12,[24+rsi] 2008 mov QWORD[32+rsp],r10 2009 mov QWORD[40+rsp],r11 2010 mov QWORD[48+rsp],r12 2011 mov QWORD[56+rsp],rcx 2012 call QWORD[__imp_RtlVirtualUnwind] 2013 2014 mov eax,1 2015 add rsp,64 2016 popfq 2017 pop r15 2018 pop r14 2019 pop r13 2020 pop r12 2021 pop rbp 2022 pop rbx 2023 pop rdi 2024 pop rsi 2025 DB 0F3h,0C3h ;repret 2026 2027 2028section .pdata rdata align=4 2029ALIGN 4 2030 DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase 2031 DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase 2032 DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase 2033 2034 DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase 2035 DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase 2036 DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase 2037 2038 DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase 2039 DD $L$SEH_end_gcm_init_clmul wrt ..imagebase 2040 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase 2041 2042 DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase 2043 DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase 2044 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase 2045 DD $L$SEH_begin_gcm_init_avx wrt ..imagebase 2046 DD $L$SEH_end_gcm_init_avx wrt ..imagebase 2047 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase 2048 2049 DD $L$SEH_begin_gcm_ghash_avx wrt ..imagebase 2050 DD $L$SEH_end_gcm_ghash_avx wrt ..imagebase 2051 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase 2052section .xdata rdata align=8 2053ALIGN 8 2054$L$SEH_info_gcm_gmult_4bit: 2055DB 9,0,0,0 2056 DD se_handler wrt ..imagebase 2057 DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase 2058$L$SEH_info_gcm_ghash_4bit: 2059DB 9,0,0,0 2060 DD se_handler wrt ..imagebase 2061 DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase 2062$L$SEH_info_gcm_init_clmul: 2063DB 0x01,0x08,0x03,0x00 2064DB 0x08,0x68,0x00,0x00 2065DB 0x04,0x22,0x00,0x00 2066$L$SEH_info_gcm_ghash_clmul: 2067DB 0x01,0x33,0x16,0x00 2068DB 0x33,0xf8,0x09,0x00 2069DB 0x2e,0xe8,0x08,0x00 2070DB 0x29,0xd8,0x07,0x00 2071DB 0x24,0xc8,0x06,0x00 2072DB 0x1f,0xb8,0x05,0x00 2073DB 0x1a,0xa8,0x04,0x00 2074DB 0x15,0x98,0x03,0x00 2075DB 0x10,0x88,0x02,0x00 2076DB 0x0c,0x78,0x01,0x00 2077DB 0x08,0x68,0x00,0x00 2078DB 0x04,0x01,0x15,0x00 2079