1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__,obj 8section code use32 class=code align=64 9%elifidn __OUTPUT_FORMAT__,win32 10%ifdef __YASM_VERSION_ID__ 11%if __YASM_VERSION_ID__ < 01010000h 12%error yasm version 1.1.0 or later needed. 13%endif 14; Yasm automatically includes .00 and complains about redefining it. 15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 16%else 17$@feat.00 equ 1 18%endif 19section .text code align=64 20%else 21section .text code 22%endif 23global _gcm_gmult_4bit_mmx 24align 16 25_gcm_gmult_4bit_mmx: 26L$_gcm_gmult_4bit_mmx_begin: 27 push ebp 28 push ebx 29 push esi 30 push edi 31 mov edi,DWORD [20+esp] 32 mov esi,DWORD [24+esp] 33 call L$000pic_point 34L$000pic_point: 35 pop eax 36 lea eax,[(L$rem_4bit-L$000pic_point)+eax] 37 movzx ebx,BYTE [15+edi] 38 xor ecx,ecx 39 mov edx,ebx 40 mov cl,dl 41 mov ebp,14 42 shl cl,4 43 and edx,240 44 movq mm0,[8+ecx*1+esi] 45 movq mm1,[ecx*1+esi] 46 movd ebx,mm0 47 jmp NEAR L$001mmx_loop 48align 16 49L$001mmx_loop: 50 psrlq mm0,4 51 and ebx,15 52 movq mm2,mm1 53 psrlq mm1,4 54 pxor mm0,[8+edx*1+esi] 55 mov cl,BYTE [ebp*1+edi] 56 psllq mm2,60 57 pxor mm1,[ebx*8+eax] 58 dec ebp 59 movd ebx,mm0 60 pxor mm1,[edx*1+esi] 61 mov edx,ecx 62 pxor mm0,mm2 63 js NEAR L$002mmx_break 64 shl cl,4 65 and ebx,15 66 psrlq mm0,4 67 and edx,240 68 movq mm2,mm1 69 psrlq mm1,4 70 pxor mm0,[8+ecx*1+esi] 71 psllq mm2,60 72 pxor mm1,[ebx*8+eax] 73 movd ebx,mm0 74 pxor mm1,[ecx*1+esi] 75 pxor mm0,mm2 76 jmp NEAR L$001mmx_loop 77align 16 78L$002mmx_break: 79 shl cl,4 80 and ebx,15 81 psrlq mm0,4 82 and edx,240 83 movq mm2,mm1 84 psrlq mm1,4 85 pxor mm0,[8+ecx*1+esi] 86 psllq mm2,60 87 pxor mm1,[ebx*8+eax] 88 movd ebx,mm0 89 pxor mm1,[ecx*1+esi] 90 pxor mm0,mm2 91 psrlq mm0,4 92 and ebx,15 93 movq mm2,mm1 94 psrlq mm1,4 95 pxor mm0,[8+edx*1+esi] 96 psllq mm2,60 97 pxor mm1,[ebx*8+eax] 98 movd ebx,mm0 99 pxor mm1,[edx*1+esi] 100 pxor mm0,mm2 101 psrlq mm0,32 102 movd edx,mm1 103 psrlq mm1,32 104 movd ecx,mm0 105 movd ebp,mm1 106 bswap ebx 107 bswap edx 108 bswap ecx 109 bswap ebp 110 emms 111 mov DWORD [12+edi],ebx 112 mov DWORD [4+edi],edx 113 mov DWORD [8+edi],ecx 114 mov DWORD [edi],ebp 115 pop edi 116 pop esi 117 pop ebx 118 pop ebp 119 ret 120global _gcm_ghash_4bit_mmx 121align 16 122_gcm_ghash_4bit_mmx: 123L$_gcm_ghash_4bit_mmx_begin: 124 push ebp 125 push ebx 126 push esi 127 push edi 128 mov eax,DWORD [20+esp] 129 mov ebx,DWORD [24+esp] 130 mov ecx,DWORD [28+esp] 131 mov edx,DWORD [32+esp] 132 mov ebp,esp 133 call L$003pic_point 134L$003pic_point: 135 pop esi 136 lea esi,[(L$rem_8bit-L$003pic_point)+esi] 137 sub esp,544 138 and esp,-64 139 sub esp,16 140 add edx,ecx 141 mov DWORD [544+esp],eax 142 mov DWORD [552+esp],edx 143 mov DWORD [556+esp],ebp 144 add ebx,128 145 lea edi,[144+esp] 146 lea ebp,[400+esp] 147 mov edx,DWORD [ebx-120] 148 movq mm0,[ebx-120] 149 movq mm3,[ebx-128] 150 shl edx,4 151 mov BYTE [esp],dl 152 mov edx,DWORD [ebx-104] 153 movq mm2,[ebx-104] 154 movq mm5,[ebx-112] 155 movq [edi-128],mm0 156 psrlq mm0,4 157 movq [edi],mm3 158 movq mm7,mm3 159 psrlq mm3,4 160 shl edx,4 161 mov BYTE [1+esp],dl 162 mov edx,DWORD [ebx-88] 163 movq mm1,[ebx-88] 164 psllq mm7,60 165 movq mm4,[ebx-96] 166 por mm0,mm7 167 movq [edi-120],mm2 168 psrlq mm2,4 169 movq [8+edi],mm5 170 movq mm6,mm5 171 movq [ebp-128],mm0 172 psrlq mm5,4 173 movq [ebp],mm3 174 shl edx,4 175 mov BYTE [2+esp],dl 176 mov edx,DWORD [ebx-72] 177 movq mm0,[ebx-72] 178 psllq mm6,60 179 movq mm3,[ebx-80] 180 por mm2,mm6 181 movq [edi-112],mm1 182 psrlq mm1,4 183 movq [16+edi],mm4 184 movq mm7,mm4 185 movq [ebp-120],mm2 186 psrlq mm4,4 187 movq [8+ebp],mm5 188 shl edx,4 189 mov BYTE [3+esp],dl 190 mov edx,DWORD [ebx-56] 191 movq mm2,[ebx-56] 192 psllq mm7,60 193 movq mm5,[ebx-64] 194 por mm1,mm7 195 movq [edi-104],mm0 196 psrlq mm0,4 197 movq [24+edi],mm3 198 movq mm6,mm3 199 movq [ebp-112],mm1 200 psrlq mm3,4 201 movq [16+ebp],mm4 202 shl edx,4 203 mov BYTE [4+esp],dl 204 mov edx,DWORD [ebx-40] 205 movq mm1,[ebx-40] 206 psllq mm6,60 207 movq mm4,[ebx-48] 208 por mm0,mm6 209 movq [edi-96],mm2 210 psrlq mm2,4 211 movq [32+edi],mm5 212 movq mm7,mm5 213 movq [ebp-104],mm0 214 psrlq mm5,4 215 movq [24+ebp],mm3 216 shl edx,4 217 mov BYTE [5+esp],dl 218 mov edx,DWORD [ebx-24] 219 movq mm0,[ebx-24] 220 psllq mm7,60 221 movq mm3,[ebx-32] 222 por mm2,mm7 223 movq [edi-88],mm1 224 psrlq mm1,4 225 movq [40+edi],mm4 226 movq mm6,mm4 227 movq [ebp-96],mm2 228 psrlq mm4,4 229 movq [32+ebp],mm5 230 shl edx,4 231 mov BYTE [6+esp],dl 232 mov edx,DWORD [ebx-8] 233 movq mm2,[ebx-8] 234 psllq mm6,60 235 movq mm5,[ebx-16] 236 por mm1,mm6 237 movq [edi-80],mm0 238 psrlq mm0,4 239 movq [48+edi],mm3 240 movq mm7,mm3 241 movq [ebp-88],mm1 242 psrlq mm3,4 243 movq [40+ebp],mm4 244 shl edx,4 245 mov BYTE [7+esp],dl 246 mov edx,DWORD [8+ebx] 247 movq mm1,[8+ebx] 248 psllq mm7,60 249 movq mm4,[ebx] 250 por mm0,mm7 251 movq [edi-72],mm2 252 psrlq mm2,4 253 movq [56+edi],mm5 254 movq mm6,mm5 255 movq [ebp-80],mm0 256 psrlq mm5,4 257 movq [48+ebp],mm3 258 shl edx,4 259 mov BYTE [8+esp],dl 260 mov edx,DWORD [24+ebx] 261 movq mm0,[24+ebx] 262 psllq mm6,60 263 movq mm3,[16+ebx] 264 por mm2,mm6 265 movq [edi-64],mm1 266 psrlq mm1,4 267 movq [64+edi],mm4 268 movq mm7,mm4 269 movq [ebp-72],mm2 270 psrlq mm4,4 271 movq [56+ebp],mm5 272 shl edx,4 273 mov BYTE [9+esp],dl 274 mov edx,DWORD [40+ebx] 275 movq mm2,[40+ebx] 276 psllq mm7,60 277 movq mm5,[32+ebx] 278 por mm1,mm7 279 movq [edi-56],mm0 280 psrlq mm0,4 281 movq [72+edi],mm3 282 movq mm6,mm3 283 movq [ebp-64],mm1 284 psrlq mm3,4 285 movq [64+ebp],mm4 286 shl edx,4 287 mov BYTE [10+esp],dl 288 mov edx,DWORD [56+ebx] 289 movq mm1,[56+ebx] 290 psllq mm6,60 291 movq mm4,[48+ebx] 292 por mm0,mm6 293 movq [edi-48],mm2 294 psrlq mm2,4 295 movq [80+edi],mm5 296 movq mm7,mm5 297 movq [ebp-56],mm0 298 psrlq mm5,4 299 movq [72+ebp],mm3 300 shl edx,4 301 mov BYTE [11+esp],dl 302 mov edx,DWORD [72+ebx] 303 movq mm0,[72+ebx] 304 psllq mm7,60 305 movq mm3,[64+ebx] 306 por mm2,mm7 307 movq [edi-40],mm1 308 psrlq mm1,4 309 movq [88+edi],mm4 310 movq mm6,mm4 311 movq [ebp-48],mm2 312 psrlq mm4,4 313 movq [80+ebp],mm5 314 shl edx,4 315 mov BYTE [12+esp],dl 316 mov edx,DWORD [88+ebx] 317 movq mm2,[88+ebx] 318 psllq mm6,60 319 movq mm5,[80+ebx] 320 por mm1,mm6 321 movq [edi-32],mm0 322 psrlq mm0,4 323 movq [96+edi],mm3 324 movq mm7,mm3 325 movq [ebp-40],mm1 326 psrlq mm3,4 327 movq [88+ebp],mm4 328 shl edx,4 329 mov BYTE [13+esp],dl 330 mov edx,DWORD [104+ebx] 331 movq mm1,[104+ebx] 332 psllq mm7,60 333 movq mm4,[96+ebx] 334 por mm0,mm7 335 movq [edi-24],mm2 336 psrlq mm2,4 337 movq [104+edi],mm5 338 movq mm6,mm5 339 movq [ebp-32],mm0 340 psrlq mm5,4 341 movq [96+ebp],mm3 342 shl edx,4 343 mov BYTE [14+esp],dl 344 mov edx,DWORD [120+ebx] 345 movq mm0,[120+ebx] 346 psllq mm6,60 347 movq mm3,[112+ebx] 348 por mm2,mm6 349 movq [edi-16],mm1 350 psrlq mm1,4 351 movq [112+edi],mm4 352 movq mm7,mm4 353 movq [ebp-24],mm2 354 psrlq mm4,4 355 movq [104+ebp],mm5 356 shl edx,4 357 mov BYTE [15+esp],dl 358 psllq mm7,60 359 por mm1,mm7 360 movq [edi-8],mm0 361 psrlq mm0,4 362 movq [120+edi],mm3 363 movq mm6,mm3 364 movq [ebp-16],mm1 365 psrlq mm3,4 366 movq [112+ebp],mm4 367 psllq mm6,60 368 por mm0,mm6 369 movq [ebp-8],mm0 370 movq [120+ebp],mm3 371 movq mm6,[eax] 372 mov ebx,DWORD [8+eax] 373 mov edx,DWORD [12+eax] 374align 16 375L$004outer: 376 xor edx,DWORD [12+ecx] 377 xor ebx,DWORD [8+ecx] 378 pxor mm6,[ecx] 379 lea ecx,[16+ecx] 380 mov DWORD [536+esp],ebx 381 movq [528+esp],mm6 382 mov DWORD [548+esp],ecx 383 xor eax,eax 384 rol edx,8 385 mov al,dl 386 mov ebp,eax 387 and al,15 388 shr ebp,4 389 pxor mm0,mm0 390 rol edx,8 391 pxor mm1,mm1 392 pxor mm2,mm2 393 movq mm7,[16+eax*8+esp] 394 movq mm6,[144+eax*8+esp] 395 mov al,dl 396 movd ebx,mm7 397 psrlq mm7,8 398 movq mm3,mm6 399 mov edi,eax 400 psrlq mm6,8 401 pxor mm7,[272+ebp*8+esp] 402 and al,15 403 psllq mm3,56 404 shr edi,4 405 pxor mm7,[16+eax*8+esp] 406 rol edx,8 407 pxor mm6,[144+eax*8+esp] 408 pxor mm7,mm3 409 pxor mm6,[400+ebp*8+esp] 410 xor bl,BYTE [ebp*1+esp] 411 mov al,dl 412 movd ecx,mm7 413 movzx ebx,bl 414 psrlq mm7,8 415 movq mm3,mm6 416 mov ebp,eax 417 psrlq mm6,8 418 pxor mm7,[272+edi*8+esp] 419 and al,15 420 psllq mm3,56 421 shr ebp,4 422 pinsrw mm2,WORD [ebx*2+esi],2 423 pxor mm7,[16+eax*8+esp] 424 rol edx,8 425 pxor mm6,[144+eax*8+esp] 426 pxor mm7,mm3 427 pxor mm6,[400+edi*8+esp] 428 xor cl,BYTE [edi*1+esp] 429 mov al,dl 430 mov edx,DWORD [536+esp] 431 movd ebx,mm7 432 movzx ecx,cl 433 psrlq mm7,8 434 movq mm3,mm6 435 mov edi,eax 436 psrlq mm6,8 437 pxor mm7,[272+ebp*8+esp] 438 and al,15 439 psllq mm3,56 440 pxor mm6,mm2 441 shr edi,4 442 pinsrw mm1,WORD [ecx*2+esi],2 443 pxor mm7,[16+eax*8+esp] 444 rol edx,8 445 pxor mm6,[144+eax*8+esp] 446 pxor mm7,mm3 447 pxor mm6,[400+ebp*8+esp] 448 xor bl,BYTE [ebp*1+esp] 449 mov al,dl 450 movd ecx,mm7 451 movzx ebx,bl 452 psrlq mm7,8 453 movq mm3,mm6 454 mov ebp,eax 455 psrlq mm6,8 456 pxor mm7,[272+edi*8+esp] 457 and al,15 458 psllq mm3,56 459 pxor mm6,mm1 460 shr ebp,4 461 pinsrw mm0,WORD [ebx*2+esi],2 462 pxor mm7,[16+eax*8+esp] 463 rol edx,8 464 pxor mm6,[144+eax*8+esp] 465 pxor mm7,mm3 466 pxor mm6,[400+edi*8+esp] 467 xor cl,BYTE [edi*1+esp] 468 mov al,dl 469 movd ebx,mm7 470 movzx ecx,cl 471 psrlq mm7,8 472 movq mm3,mm6 473 mov edi,eax 474 psrlq mm6,8 475 pxor mm7,[272+ebp*8+esp] 476 and al,15 477 psllq mm3,56 478 pxor mm6,mm0 479 shr edi,4 480 pinsrw mm2,WORD [ecx*2+esi],2 481 pxor mm7,[16+eax*8+esp] 482 rol edx,8 483 pxor mm6,[144+eax*8+esp] 484 pxor mm7,mm3 485 pxor mm6,[400+ebp*8+esp] 486 xor bl,BYTE [ebp*1+esp] 487 mov al,dl 488 movd ecx,mm7 489 movzx ebx,bl 490 psrlq mm7,8 491 movq mm3,mm6 492 mov ebp,eax 493 psrlq mm6,8 494 pxor mm7,[272+edi*8+esp] 495 and al,15 496 psllq mm3,56 497 pxor mm6,mm2 498 shr ebp,4 499 pinsrw mm1,WORD [ebx*2+esi],2 500 pxor mm7,[16+eax*8+esp] 501 rol edx,8 502 pxor mm6,[144+eax*8+esp] 503 pxor mm7,mm3 504 pxor mm6,[400+edi*8+esp] 505 xor cl,BYTE [edi*1+esp] 506 mov al,dl 507 mov edx,DWORD [532+esp] 508 movd ebx,mm7 509 movzx ecx,cl 510 psrlq mm7,8 511 movq mm3,mm6 512 mov edi,eax 513 psrlq mm6,8 514 pxor mm7,[272+ebp*8+esp] 515 and al,15 516 psllq mm3,56 517 pxor mm6,mm1 518 shr edi,4 519 pinsrw mm0,WORD [ecx*2+esi],2 520 pxor mm7,[16+eax*8+esp] 521 rol edx,8 522 pxor mm6,[144+eax*8+esp] 523 pxor mm7,mm3 524 pxor mm6,[400+ebp*8+esp] 525 xor bl,BYTE [ebp*1+esp] 526 mov al,dl 527 movd ecx,mm7 528 movzx ebx,bl 529 psrlq mm7,8 530 movq mm3,mm6 531 mov ebp,eax 532 psrlq mm6,8 533 pxor mm7,[272+edi*8+esp] 534 and al,15 535 psllq mm3,56 536 pxor mm6,mm0 537 shr ebp,4 538 pinsrw mm2,WORD [ebx*2+esi],2 539 pxor mm7,[16+eax*8+esp] 540 rol edx,8 541 pxor mm6,[144+eax*8+esp] 542 pxor mm7,mm3 543 pxor mm6,[400+edi*8+esp] 544 xor cl,BYTE [edi*1+esp] 545 mov al,dl 546 movd ebx,mm7 547 movzx ecx,cl 548 psrlq mm7,8 549 movq mm3,mm6 550 mov edi,eax 551 psrlq mm6,8 552 pxor mm7,[272+ebp*8+esp] 553 and al,15 554 psllq mm3,56 555 pxor mm6,mm2 556 shr edi,4 557 pinsrw mm1,WORD [ecx*2+esi],2 558 pxor mm7,[16+eax*8+esp] 559 rol edx,8 560 pxor mm6,[144+eax*8+esp] 561 pxor mm7,mm3 562 pxor mm6,[400+ebp*8+esp] 563 xor bl,BYTE [ebp*1+esp] 564 mov al,dl 565 movd ecx,mm7 566 movzx ebx,bl 567 psrlq mm7,8 568 movq mm3,mm6 569 mov ebp,eax 570 psrlq mm6,8 571 pxor mm7,[272+edi*8+esp] 572 and al,15 573 psllq mm3,56 574 pxor mm6,mm1 575 shr ebp,4 576 pinsrw mm0,WORD [ebx*2+esi],2 577 pxor mm7,[16+eax*8+esp] 578 rol edx,8 579 pxor mm6,[144+eax*8+esp] 580 pxor mm7,mm3 581 pxor mm6,[400+edi*8+esp] 582 xor cl,BYTE [edi*1+esp] 583 mov al,dl 584 mov edx,DWORD [528+esp] 585 movd ebx,mm7 586 movzx ecx,cl 587 psrlq mm7,8 588 movq mm3,mm6 589 mov edi,eax 590 psrlq mm6,8 591 pxor mm7,[272+ebp*8+esp] 592 and al,15 593 psllq mm3,56 594 pxor mm6,mm0 595 shr edi,4 596 pinsrw mm2,WORD [ecx*2+esi],2 597 pxor mm7,[16+eax*8+esp] 598 rol edx,8 599 pxor mm6,[144+eax*8+esp] 600 pxor mm7,mm3 601 pxor mm6,[400+ebp*8+esp] 602 xor bl,BYTE [ebp*1+esp] 603 mov al,dl 604 movd ecx,mm7 605 movzx ebx,bl 606 psrlq mm7,8 607 movq mm3,mm6 608 mov ebp,eax 609 psrlq mm6,8 610 pxor mm7,[272+edi*8+esp] 611 and al,15 612 psllq mm3,56 613 pxor mm6,mm2 614 shr ebp,4 615 pinsrw mm1,WORD [ebx*2+esi],2 616 pxor mm7,[16+eax*8+esp] 617 rol edx,8 618 pxor mm6,[144+eax*8+esp] 619 pxor mm7,mm3 620 pxor mm6,[400+edi*8+esp] 621 xor cl,BYTE [edi*1+esp] 622 mov al,dl 623 movd ebx,mm7 624 movzx ecx,cl 625 psrlq mm7,8 626 movq mm3,mm6 627 mov edi,eax 628 psrlq mm6,8 629 pxor mm7,[272+ebp*8+esp] 630 and al,15 631 psllq mm3,56 632 pxor mm6,mm1 633 shr edi,4 634 pinsrw mm0,WORD [ecx*2+esi],2 635 pxor mm7,[16+eax*8+esp] 636 rol edx,8 637 pxor mm6,[144+eax*8+esp] 638 pxor mm7,mm3 639 pxor mm6,[400+ebp*8+esp] 640 xor bl,BYTE [ebp*1+esp] 641 mov al,dl 642 movd ecx,mm7 643 movzx ebx,bl 644 psrlq mm7,8 645 movq mm3,mm6 646 mov ebp,eax 647 psrlq mm6,8 648 pxor mm7,[272+edi*8+esp] 649 and al,15 650 psllq mm3,56 651 pxor mm6,mm0 652 shr ebp,4 653 pinsrw mm2,WORD [ebx*2+esi],2 654 pxor mm7,[16+eax*8+esp] 655 rol edx,8 656 pxor mm6,[144+eax*8+esp] 657 pxor mm7,mm3 658 pxor mm6,[400+edi*8+esp] 659 xor cl,BYTE [edi*1+esp] 660 mov al,dl 661 mov edx,DWORD [524+esp] 662 movd ebx,mm7 663 movzx ecx,cl 664 psrlq mm7,8 665 movq mm3,mm6 666 mov edi,eax 667 psrlq mm6,8 668 pxor mm7,[272+ebp*8+esp] 669 and al,15 670 psllq mm3,56 671 pxor mm6,mm2 672 shr edi,4 673 pinsrw mm1,WORD [ecx*2+esi],2 674 pxor mm7,[16+eax*8+esp] 675 pxor mm6,[144+eax*8+esp] 676 xor bl,BYTE [ebp*1+esp] 677 pxor mm7,mm3 678 pxor mm6,[400+ebp*8+esp] 679 movzx ebx,bl 680 pxor mm2,mm2 681 psllq mm1,4 682 movd ecx,mm7 683 psrlq mm7,4 684 movq mm3,mm6 685 psrlq mm6,4 686 shl ecx,4 687 pxor mm7,[16+edi*8+esp] 688 psllq mm3,60 689 movzx ecx,cl 690 pxor mm7,mm3 691 pxor mm6,[144+edi*8+esp] 692 pinsrw mm0,WORD [ebx*2+esi],2 693 pxor mm6,mm1 694 movd edx,mm7 695 pinsrw mm2,WORD [ecx*2+esi],3 696 psllq mm0,12 697 pxor mm6,mm0 698 psrlq mm7,32 699 pxor mm6,mm2 700 mov ecx,DWORD [548+esp] 701 movd ebx,mm7 702 movq mm3,mm6 703 psllw mm6,8 704 psrlw mm3,8 705 por mm6,mm3 706 bswap edx 707 pshufw mm6,mm6,27 708 bswap ebx 709 cmp ecx,DWORD [552+esp] 710 jne NEAR L$004outer 711 mov eax,DWORD [544+esp] 712 mov DWORD [12+eax],edx 713 mov DWORD [8+eax],ebx 714 movq [eax],mm6 715 mov esp,DWORD [556+esp] 716 emms 717 pop edi 718 pop esi 719 pop ebx 720 pop ebp 721 ret 722global _gcm_init_clmul 723align 16 724_gcm_init_clmul: 725L$_gcm_init_clmul_begin: 726 mov edx,DWORD [4+esp] 727 mov eax,DWORD [8+esp] 728 call L$005pic 729L$005pic: 730 pop ecx 731 lea ecx,[(L$bswap-L$005pic)+ecx] 732 movdqu xmm2,[eax] 733 pshufd xmm2,xmm2,78 734 pshufd xmm4,xmm2,255 735 movdqa xmm3,xmm2 736 psllq xmm2,1 737 pxor xmm5,xmm5 738 psrlq xmm3,63 739 pcmpgtd xmm5,xmm4 740 pslldq xmm3,8 741 por xmm2,xmm3 742 pand xmm5,[16+ecx] 743 pxor xmm2,xmm5 744 movdqa xmm0,xmm2 745 movdqa xmm1,xmm0 746 pshufd xmm3,xmm0,78 747 pshufd xmm4,xmm2,78 748 pxor xmm3,xmm0 749 pxor xmm4,xmm2 750db 102,15,58,68,194,0 751db 102,15,58,68,202,17 752db 102,15,58,68,220,0 753 xorps xmm3,xmm0 754 xorps xmm3,xmm1 755 movdqa xmm4,xmm3 756 psrldq xmm3,8 757 pslldq xmm4,8 758 pxor xmm1,xmm3 759 pxor xmm0,xmm4 760 movdqa xmm4,xmm0 761 movdqa xmm3,xmm0 762 psllq xmm0,5 763 pxor xmm3,xmm0 764 psllq xmm0,1 765 pxor xmm0,xmm3 766 psllq xmm0,57 767 movdqa xmm3,xmm0 768 pslldq xmm0,8 769 psrldq xmm3,8 770 pxor xmm0,xmm4 771 pxor xmm1,xmm3 772 movdqa xmm4,xmm0 773 psrlq xmm0,1 774 pxor xmm1,xmm4 775 pxor xmm4,xmm0 776 psrlq xmm0,5 777 pxor xmm0,xmm4 778 psrlq xmm0,1 779 pxor xmm0,xmm1 780 pshufd xmm3,xmm2,78 781 pshufd xmm4,xmm0,78 782 pxor xmm3,xmm2 783 movdqu [edx],xmm2 784 pxor xmm4,xmm0 785 movdqu [16+edx],xmm0 786db 102,15,58,15,227,8 787 movdqu [32+edx],xmm4 788 ret 789global _gcm_gmult_clmul 790align 16 791_gcm_gmult_clmul: 792L$_gcm_gmult_clmul_begin: 793 mov eax,DWORD [4+esp] 794 mov edx,DWORD [8+esp] 795 call L$006pic 796L$006pic: 797 pop ecx 798 lea ecx,[(L$bswap-L$006pic)+ecx] 799 movdqu xmm0,[eax] 800 movdqa xmm5,[ecx] 801 movups xmm2,[edx] 802db 102,15,56,0,197 803 movups xmm4,[32+edx] 804 movdqa xmm1,xmm0 805 pshufd xmm3,xmm0,78 806 pxor xmm3,xmm0 807db 102,15,58,68,194,0 808db 102,15,58,68,202,17 809db 102,15,58,68,220,0 810 xorps xmm3,xmm0 811 xorps xmm3,xmm1 812 movdqa xmm4,xmm3 813 psrldq xmm3,8 814 pslldq xmm4,8 815 pxor xmm1,xmm3 816 pxor xmm0,xmm4 817 movdqa xmm4,xmm0 818 movdqa xmm3,xmm0 819 psllq xmm0,5 820 pxor xmm3,xmm0 821 psllq xmm0,1 822 pxor xmm0,xmm3 823 psllq xmm0,57 824 movdqa xmm3,xmm0 825 pslldq xmm0,8 826 psrldq xmm3,8 827 pxor xmm0,xmm4 828 pxor xmm1,xmm3 829 movdqa xmm4,xmm0 830 psrlq xmm0,1 831 pxor xmm1,xmm4 832 pxor xmm4,xmm0 833 psrlq xmm0,5 834 pxor xmm0,xmm4 835 psrlq xmm0,1 836 pxor xmm0,xmm1 837db 102,15,56,0,197 838 movdqu [eax],xmm0 839 ret 840global _gcm_ghash_clmul 841align 16 842_gcm_ghash_clmul: 843L$_gcm_ghash_clmul_begin: 844 push ebp 845 push ebx 846 push esi 847 push edi 848 mov eax,DWORD [20+esp] 849 mov edx,DWORD [24+esp] 850 mov esi,DWORD [28+esp] 851 mov ebx,DWORD [32+esp] 852 call L$007pic 853L$007pic: 854 pop ecx 855 lea ecx,[(L$bswap-L$007pic)+ecx] 856 movdqu xmm0,[eax] 857 movdqa xmm5,[ecx] 858 movdqu xmm2,[edx] 859db 102,15,56,0,197 860 sub ebx,16 861 jz NEAR L$008odd_tail 862 movdqu xmm3,[esi] 863 movdqu xmm6,[16+esi] 864db 102,15,56,0,221 865db 102,15,56,0,245 866 movdqu xmm5,[32+edx] 867 pxor xmm0,xmm3 868 pshufd xmm3,xmm6,78 869 movdqa xmm7,xmm6 870 pxor xmm3,xmm6 871 lea esi,[32+esi] 872db 102,15,58,68,242,0 873db 102,15,58,68,250,17 874db 102,15,58,68,221,0 875 movups xmm2,[16+edx] 876 nop 877 sub ebx,32 878 jbe NEAR L$009even_tail 879 jmp NEAR L$010mod_loop 880align 32 881L$010mod_loop: 882 pshufd xmm4,xmm0,78 883 movdqa xmm1,xmm0 884 pxor xmm4,xmm0 885 nop 886db 102,15,58,68,194,0 887db 102,15,58,68,202,17 888db 102,15,58,68,229,16 889 movups xmm2,[edx] 890 xorps xmm0,xmm6 891 movdqa xmm5,[ecx] 892 xorps xmm1,xmm7 893 movdqu xmm7,[esi] 894 pxor xmm3,xmm0 895 movdqu xmm6,[16+esi] 896 pxor xmm3,xmm1 897db 102,15,56,0,253 898 pxor xmm4,xmm3 899 movdqa xmm3,xmm4 900 psrldq xmm4,8 901 pslldq xmm3,8 902 pxor xmm1,xmm4 903 pxor xmm0,xmm3 904db 102,15,56,0,245 905 pxor xmm1,xmm7 906 movdqa xmm7,xmm6 907 movdqa xmm4,xmm0 908 movdqa xmm3,xmm0 909 psllq xmm0,5 910 pxor xmm3,xmm0 911 psllq xmm0,1 912 pxor xmm0,xmm3 913db 102,15,58,68,242,0 914 movups xmm5,[32+edx] 915 psllq xmm0,57 916 movdqa xmm3,xmm0 917 pslldq xmm0,8 918 psrldq xmm3,8 919 pxor xmm0,xmm4 920 pxor xmm1,xmm3 921 pshufd xmm3,xmm7,78 922 movdqa xmm4,xmm0 923 psrlq xmm0,1 924 pxor xmm3,xmm7 925 pxor xmm1,xmm4 926db 102,15,58,68,250,17 927 movups xmm2,[16+edx] 928 pxor xmm4,xmm0 929 psrlq xmm0,5 930 pxor xmm0,xmm4 931 psrlq xmm0,1 932 pxor xmm0,xmm1 933db 102,15,58,68,221,0 934 lea esi,[32+esi] 935 sub ebx,32 936 ja NEAR L$010mod_loop 937L$009even_tail: 938 pshufd xmm4,xmm0,78 939 movdqa xmm1,xmm0 940 pxor xmm4,xmm0 941db 102,15,58,68,194,0 942db 102,15,58,68,202,17 943db 102,15,58,68,229,16 944 movdqa xmm5,[ecx] 945 xorps xmm0,xmm6 946 xorps xmm1,xmm7 947 pxor xmm3,xmm0 948 pxor xmm3,xmm1 949 pxor xmm4,xmm3 950 movdqa xmm3,xmm4 951 psrldq xmm4,8 952 pslldq xmm3,8 953 pxor xmm1,xmm4 954 pxor xmm0,xmm3 955 movdqa xmm4,xmm0 956 movdqa xmm3,xmm0 957 psllq xmm0,5 958 pxor xmm3,xmm0 959 psllq xmm0,1 960 pxor xmm0,xmm3 961 psllq xmm0,57 962 movdqa xmm3,xmm0 963 pslldq xmm0,8 964 psrldq xmm3,8 965 pxor xmm0,xmm4 966 pxor xmm1,xmm3 967 movdqa xmm4,xmm0 968 psrlq xmm0,1 969 pxor xmm1,xmm4 970 pxor xmm4,xmm0 971 psrlq xmm0,5 972 pxor xmm0,xmm4 973 psrlq xmm0,1 974 pxor xmm0,xmm1 975 test ebx,ebx 976 jnz NEAR L$011done 977 movups xmm2,[edx] 978L$008odd_tail: 979 movdqu xmm3,[esi] 980db 102,15,56,0,221 981 pxor xmm0,xmm3 982 movdqa xmm1,xmm0 983 pshufd xmm3,xmm0,78 984 pshufd xmm4,xmm2,78 985 pxor xmm3,xmm0 986 pxor xmm4,xmm2 987db 102,15,58,68,194,0 988db 102,15,58,68,202,17 989db 102,15,58,68,220,0 990 xorps xmm3,xmm0 991 xorps xmm3,xmm1 992 movdqa xmm4,xmm3 993 psrldq xmm3,8 994 pslldq xmm4,8 995 pxor xmm1,xmm3 996 pxor xmm0,xmm4 997 movdqa xmm4,xmm0 998 movdqa xmm3,xmm0 999 psllq xmm0,5 1000 pxor xmm3,xmm0 1001 psllq xmm0,1 1002 pxor xmm0,xmm3 1003 psllq xmm0,57 1004 movdqa xmm3,xmm0 1005 pslldq xmm0,8 1006 psrldq xmm3,8 1007 pxor xmm0,xmm4 1008 pxor xmm1,xmm3 1009 movdqa xmm4,xmm0 1010 psrlq xmm0,1 1011 pxor xmm1,xmm4 1012 pxor xmm4,xmm0 1013 psrlq xmm0,5 1014 pxor xmm0,xmm4 1015 psrlq xmm0,1 1016 pxor xmm0,xmm1 1017L$011done: 1018db 102,15,56,0,197 1019 movdqu [eax],xmm0 1020 pop edi 1021 pop esi 1022 pop ebx 1023 pop ebp 1024 ret 1025align 64 1026L$bswap: 1027db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1028db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1029align 64 1030L$rem_8bit: 1031dw 0,450,900,582,1800,1738,1164,1358 1032dw 3600,4050,3476,3158,2328,2266,2716,2910 1033dw 7200,7650,8100,7782,6952,6890,6316,6510 1034dw 4656,5106,4532,4214,5432,5370,5820,6014 1035dw 14400,14722,15300,14854,16200,16010,15564,15630 1036dw 13904,14226,13780,13334,12632,12442,13020,13086 1037dw 9312,9634,10212,9766,9064,8874,8428,8494 1038dw 10864,11186,10740,10294,11640,11450,12028,12094 1039dw 28800,28994,29444,29382,30600,30282,29708,30158 1040dw 32400,32594,32020,31958,31128,30810,31260,31710 1041dw 27808,28002,28452,28390,27560,27242,26668,27118 1042dw 25264,25458,24884,24822,26040,25722,26172,26622 1043dw 18624,18690,19268,19078,20424,19978,19532,19854 1044dw 18128,18194,17748,17558,16856,16410,16988,17310 1045dw 21728,21794,22372,22182,21480,21034,20588,20910 1046dw 23280,23346,22900,22710,24056,23610,24188,24510 1047dw 57600,57538,57988,58182,58888,59338,58764,58446 1048dw 61200,61138,60564,60758,59416,59866,60316,59998 1049dw 64800,64738,65188,65382,64040,64490,63916,63598 1050dw 62256,62194,61620,61814,62520,62970,63420,63102 1051dw 55616,55426,56004,56070,56904,57226,56780,56334 1052dw 55120,54930,54484,54550,53336,53658,54236,53790 1053dw 50528,50338,50916,50982,49768,50090,49644,49198 1054dw 52080,51890,51444,51510,52344,52666,53244,52798 1055dw 37248,36930,37380,37830,38536,38730,38156,38094 1056dw 40848,40530,39956,40406,39064,39258,39708,39646 1057dw 36256,35938,36388,36838,35496,35690,35116,35054 1058dw 33712,33394,32820,33270,33976,34170,34620,34558 1059dw 43456,43010,43588,43910,44744,44810,44364,44174 1060dw 42960,42514,42068,42390,41176,41242,41820,41630 1061dw 46560,46114,46692,47014,45800,45866,45420,45230 1062dw 48112,47666,47220,47542,48376,48442,49020,48830 1063align 64 1064L$rem_4bit: 1065dd 0,0,0,471859200,0,943718400,0,610271232 1066dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1067dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1068dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1069db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1070db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1071db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1072db 0 1073