1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4$@feat.00 equ 1 5section .text code align=64 6%else 7section .text code 8%endif 9;extern _OPENSSL_ia32cap_P 10align 64 11global _poly1305_init 12align 16 13_poly1305_init: 14L$_poly1305_init_begin: 15 push ebp 16 push ebx 17 push esi 18 push edi 19 mov edi,DWORD [20+esp] 20 mov esi,DWORD [24+esp] 21 mov ebp,DWORD [28+esp] 22 xor eax,eax 23 mov DWORD [edi],eax 24 mov DWORD [4+edi],eax 25 mov DWORD [8+edi],eax 26 mov DWORD [12+edi],eax 27 mov DWORD [16+edi],eax 28 mov DWORD [20+edi],eax 29 cmp esi,0 30 je NEAR L$000nokey 31 call L$001pic_point 32L$001pic_point: 33 pop ebx 34 lea eax,[(_poly1305_blocks-L$001pic_point)+ebx] 35 lea edx,[(_poly1305_emit-L$001pic_point)+ebx] 36 lea edi,[_OPENSSL_ia32cap_P] 37 mov ecx,DWORD [edi] 38 and ecx,83886080 39 cmp ecx,83886080 40 jne NEAR L$002no_sse2 41 lea eax,[(__poly1305_blocks_sse2-L$001pic_point)+ebx] 42 lea edx,[(__poly1305_emit_sse2-L$001pic_point)+ebx] 43 mov ecx,DWORD [8+edi] 44 test ecx,32 45 jz NEAR L$002no_sse2 46 lea eax,[(__poly1305_blocks_avx2-L$001pic_point)+ebx] 47L$002no_sse2: 48 mov edi,DWORD [20+esp] 49 mov DWORD [ebp],eax 50 mov DWORD [4+ebp],edx 51 mov eax,DWORD [esi] 52 mov ebx,DWORD [4+esi] 53 mov ecx,DWORD [8+esi] 54 mov edx,DWORD [12+esi] 55 and eax,268435455 56 and ebx,268435452 57 and ecx,268435452 58 and edx,268435452 59 mov DWORD [24+edi],eax 60 mov DWORD [28+edi],ebx 61 mov DWORD [32+edi],ecx 62 mov DWORD [36+edi],edx 63 mov eax,1 64L$000nokey: 65 pop edi 66 pop esi 67 pop ebx 68 pop ebp 69 ret 70global _poly1305_blocks 71align 16 72_poly1305_blocks: 73L$_poly1305_blocks_begin: 74 push ebp 75 push ebx 76 push esi 77 push edi 78 mov edi,DWORD [20+esp] 79 mov esi,DWORD [24+esp] 80 mov ecx,DWORD [28+esp] 81L$enter_blocks: 82 and ecx,-15 83 jz NEAR L$003nodata 84 sub esp,64 85 mov eax,DWORD [24+edi] 86 mov ebx,DWORD [28+edi] 87 lea ebp,[ecx*1+esi] 88 mov ecx,DWORD [32+edi] 89 mov edx,DWORD [36+edi] 90 mov DWORD [92+esp],ebp 91 mov ebp,esi 92 mov DWORD [36+esp],eax 93 mov eax,ebx 94 shr eax,2 95 mov DWORD [40+esp],ebx 96 add eax,ebx 97 mov ebx,ecx 98 shr ebx,2 99 mov DWORD [44+esp],ecx 100 add ebx,ecx 101 mov ecx,edx 102 shr ecx,2 103 mov DWORD [48+esp],edx 104 add ecx,edx 105 mov DWORD [52+esp],eax 106 mov DWORD [56+esp],ebx 107 mov DWORD [60+esp],ecx 108 mov eax,DWORD [edi] 109 mov ebx,DWORD [4+edi] 110 mov ecx,DWORD [8+edi] 111 mov esi,DWORD [12+edi] 112 mov edi,DWORD [16+edi] 113 jmp NEAR L$004loop 114align 32 115L$004loop: 116 add eax,DWORD [ebp] 117 adc ebx,DWORD [4+ebp] 118 adc ecx,DWORD [8+ebp] 119 adc esi,DWORD [12+ebp] 120 lea ebp,[16+ebp] 121 adc edi,DWORD [96+esp] 122 mov DWORD [esp],eax 123 mov DWORD [12+esp],esi 124 mul DWORD [36+esp] 125 mov DWORD [16+esp],edi 126 mov edi,eax 127 mov eax,ebx 128 mov esi,edx 129 mul DWORD [60+esp] 130 add edi,eax 131 mov eax,ecx 132 adc esi,edx 133 mul DWORD [56+esp] 134 add edi,eax 135 mov eax,DWORD [12+esp] 136 adc esi,edx 137 mul DWORD [52+esp] 138 add edi,eax 139 mov eax,DWORD [esp] 140 adc esi,edx 141 mul DWORD [40+esp] 142 mov DWORD [20+esp],edi 143 xor edi,edi 144 add esi,eax 145 mov eax,ebx 146 adc edi,edx 147 mul DWORD [36+esp] 148 add esi,eax 149 mov eax,ecx 150 adc edi,edx 151 mul DWORD [60+esp] 152 add esi,eax 153 mov eax,DWORD [12+esp] 154 adc edi,edx 155 mul DWORD [56+esp] 156 add esi,eax 157 mov eax,DWORD [16+esp] 158 adc edi,edx 159 imul eax,DWORD [52+esp] 160 add esi,eax 161 mov eax,DWORD [esp] 162 adc edi,0 163 mul DWORD [44+esp] 164 mov DWORD [24+esp],esi 165 xor esi,esi 166 add edi,eax 167 mov eax,ebx 168 adc esi,edx 169 mul DWORD [40+esp] 170 add edi,eax 171 mov eax,ecx 172 adc esi,edx 173 mul DWORD [36+esp] 174 add edi,eax 175 mov eax,DWORD [12+esp] 176 adc esi,edx 177 mul DWORD [60+esp] 178 add edi,eax 179 mov eax,DWORD [16+esp] 180 adc esi,edx 181 imul eax,DWORD [56+esp] 182 add edi,eax 183 mov eax,DWORD [esp] 184 adc esi,0 185 mul DWORD [48+esp] 186 mov DWORD [28+esp],edi 187 xor edi,edi 188 add esi,eax 189 mov eax,ebx 190 adc edi,edx 191 mul DWORD [44+esp] 192 add esi,eax 193 mov eax,ecx 194 adc edi,edx 195 mul DWORD [40+esp] 196 add esi,eax 197 mov eax,DWORD [12+esp] 198 adc edi,edx 199 mul DWORD [36+esp] 200 add esi,eax 201 mov ecx,DWORD [16+esp] 202 adc edi,edx 203 mov edx,ecx 204 imul ecx,DWORD [60+esp] 205 add esi,ecx 206 mov eax,DWORD [20+esp] 207 adc edi,0 208 imul edx,DWORD [36+esp] 209 add edx,edi 210 mov ebx,DWORD [24+esp] 211 mov ecx,DWORD [28+esp] 212 mov edi,edx 213 shr edx,2 214 and edi,3 215 lea edx,[edx*4+edx] 216 add eax,edx 217 adc ebx,0 218 adc ecx,0 219 adc esi,0 220 adc edi,0 221 cmp ebp,DWORD [92+esp] 222 jne NEAR L$004loop 223 mov edx,DWORD [84+esp] 224 add esp,64 225 mov DWORD [edx],eax 226 mov DWORD [4+edx],ebx 227 mov DWORD [8+edx],ecx 228 mov DWORD [12+edx],esi 229 mov DWORD [16+edx],edi 230L$003nodata: 231 pop edi 232 pop esi 233 pop ebx 234 pop ebp 235 ret 236global _poly1305_emit 237align 16 238_poly1305_emit: 239L$_poly1305_emit_begin: 240 push ebp 241 push ebx 242 push esi 243 push edi 244 mov ebp,DWORD [20+esp] 245L$enter_emit: 246 mov edi,DWORD [24+esp] 247 mov eax,DWORD [ebp] 248 mov ebx,DWORD [4+ebp] 249 mov ecx,DWORD [8+ebp] 250 mov edx,DWORD [12+ebp] 251 mov esi,DWORD [16+ebp] 252 add eax,5 253 adc ebx,0 254 adc ecx,0 255 adc edx,0 256 adc esi,0 257 shr esi,2 258 neg esi 259 and eax,esi 260 and ebx,esi 261 and ecx,esi 262 and edx,esi 263 mov DWORD [edi],eax 264 mov DWORD [4+edi],ebx 265 mov DWORD [8+edi],ecx 266 mov DWORD [12+edi],edx 267 not esi 268 mov eax,DWORD [ebp] 269 mov ebx,DWORD [4+ebp] 270 mov ecx,DWORD [8+ebp] 271 mov edx,DWORD [12+ebp] 272 mov ebp,DWORD [28+esp] 273 and eax,esi 274 and ebx,esi 275 and ecx,esi 276 and edx,esi 277 or eax,DWORD [edi] 278 or ebx,DWORD [4+edi] 279 or ecx,DWORD [8+edi] 280 or edx,DWORD [12+edi] 281 add eax,DWORD [ebp] 282 adc ebx,DWORD [4+ebp] 283 adc ecx,DWORD [8+ebp] 284 adc edx,DWORD [12+ebp] 285 mov DWORD [edi],eax 286 mov DWORD [4+edi],ebx 287 mov DWORD [8+edi],ecx 288 mov DWORD [12+edi],edx 289 pop edi 290 pop esi 291 pop ebx 292 pop ebp 293 ret 294align 32 295align 16 296__poly1305_init_sse2: 297 movdqu xmm4,[24+edi] 298 lea edi,[48+edi] 299 mov ebp,esp 300 sub esp,224 301 and esp,-16 302 movq xmm7,[64+ebx] 303 movdqa xmm0,xmm4 304 movdqa xmm1,xmm4 305 movdqa xmm2,xmm4 306 pand xmm0,xmm7 307 psrlq xmm1,26 308 psrldq xmm2,6 309 pand xmm1,xmm7 310 movdqa xmm3,xmm2 311 psrlq xmm2,4 312 psrlq xmm3,30 313 pand xmm2,xmm7 314 pand xmm3,xmm7 315 psrldq xmm4,13 316 lea edx,[144+esp] 317 mov ecx,2 318L$005square: 319 movdqa [esp],xmm0 320 movdqa [16+esp],xmm1 321 movdqa [32+esp],xmm2 322 movdqa [48+esp],xmm3 323 movdqa [64+esp],xmm4 324 movdqa xmm6,xmm1 325 movdqa xmm5,xmm2 326 pslld xmm6,2 327 pslld xmm5,2 328 paddd xmm6,xmm1 329 paddd xmm5,xmm2 330 movdqa [80+esp],xmm6 331 movdqa [96+esp],xmm5 332 movdqa xmm6,xmm3 333 movdqa xmm5,xmm4 334 pslld xmm6,2 335 pslld xmm5,2 336 paddd xmm6,xmm3 337 paddd xmm5,xmm4 338 movdqa [112+esp],xmm6 339 movdqa [128+esp],xmm5 340 pshufd xmm6,xmm0,68 341 movdqa xmm5,xmm1 342 pshufd xmm1,xmm1,68 343 pshufd xmm2,xmm2,68 344 pshufd xmm3,xmm3,68 345 pshufd xmm4,xmm4,68 346 movdqa [edx],xmm6 347 movdqa [16+edx],xmm1 348 movdqa [32+edx],xmm2 349 movdqa [48+edx],xmm3 350 movdqa [64+edx],xmm4 351 pmuludq xmm4,xmm0 352 pmuludq xmm3,xmm0 353 pmuludq xmm2,xmm0 354 pmuludq xmm1,xmm0 355 pmuludq xmm0,xmm6 356 movdqa xmm6,xmm5 357 pmuludq xmm5,[48+edx] 358 movdqa xmm7,xmm6 359 pmuludq xmm6,[32+edx] 360 paddq xmm4,xmm5 361 movdqa xmm5,xmm7 362 pmuludq xmm7,[16+edx] 363 paddq xmm3,xmm6 364 movdqa xmm6,[80+esp] 365 pmuludq xmm5,[edx] 366 paddq xmm2,xmm7 367 pmuludq xmm6,[64+edx] 368 movdqa xmm7,[32+esp] 369 paddq xmm1,xmm5 370 movdqa xmm5,xmm7 371 pmuludq xmm7,[32+edx] 372 paddq xmm0,xmm6 373 movdqa xmm6,xmm5 374 pmuludq xmm5,[16+edx] 375 paddq xmm4,xmm7 376 movdqa xmm7,[96+esp] 377 pmuludq xmm6,[edx] 378 paddq xmm3,xmm5 379 movdqa xmm5,xmm7 380 pmuludq xmm7,[64+edx] 381 paddq xmm2,xmm6 382 pmuludq xmm5,[48+edx] 383 movdqa xmm6,[48+esp] 384 paddq xmm1,xmm7 385 movdqa xmm7,xmm6 386 pmuludq xmm6,[16+edx] 387 paddq xmm0,xmm5 388 movdqa xmm5,[112+esp] 389 pmuludq xmm7,[edx] 390 paddq xmm4,xmm6 391 movdqa xmm6,xmm5 392 pmuludq xmm5,[64+edx] 393 paddq xmm3,xmm7 394 movdqa xmm7,xmm6 395 pmuludq xmm6,[48+edx] 396 paddq xmm2,xmm5 397 pmuludq xmm7,[32+edx] 398 movdqa xmm5,[64+esp] 399 paddq xmm1,xmm6 400 movdqa xmm6,[128+esp] 401 pmuludq xmm5,[edx] 402 paddq xmm0,xmm7 403 movdqa xmm7,xmm6 404 pmuludq xmm6,[64+edx] 405 paddq xmm4,xmm5 406 movdqa xmm5,xmm7 407 pmuludq xmm7,[16+edx] 408 paddq xmm3,xmm6 409 movdqa xmm6,xmm5 410 pmuludq xmm5,[32+edx] 411 paddq xmm0,xmm7 412 pmuludq xmm6,[48+edx] 413 movdqa xmm7,[64+ebx] 414 paddq xmm1,xmm5 415 paddq xmm2,xmm6 416 movdqa xmm5,xmm3 417 pand xmm3,xmm7 418 psrlq xmm5,26 419 paddq xmm5,xmm4 420 movdqa xmm6,xmm0 421 pand xmm0,xmm7 422 psrlq xmm6,26 423 movdqa xmm4,xmm5 424 paddq xmm6,xmm1 425 psrlq xmm5,26 426 pand xmm4,xmm7 427 movdqa xmm1,xmm6 428 psrlq xmm6,26 429 paddd xmm0,xmm5 430 psllq xmm5,2 431 paddq xmm6,xmm2 432 paddq xmm5,xmm0 433 pand xmm1,xmm7 434 movdqa xmm2,xmm6 435 psrlq xmm6,26 436 pand xmm2,xmm7 437 paddd xmm6,xmm3 438 movdqa xmm0,xmm5 439 psrlq xmm5,26 440 movdqa xmm3,xmm6 441 psrlq xmm6,26 442 pand xmm0,xmm7 443 paddd xmm1,xmm5 444 pand xmm3,xmm7 445 paddd xmm4,xmm6 446 dec ecx 447 jz NEAR L$006square_break 448 punpcklqdq xmm0,[esp] 449 punpcklqdq xmm1,[16+esp] 450 punpcklqdq xmm2,[32+esp] 451 punpcklqdq xmm3,[48+esp] 452 punpcklqdq xmm4,[64+esp] 453 jmp NEAR L$005square 454L$006square_break: 455 psllq xmm0,32 456 psllq xmm1,32 457 psllq xmm2,32 458 psllq xmm3,32 459 psllq xmm4,32 460 por xmm0,[esp] 461 por xmm1,[16+esp] 462 por xmm2,[32+esp] 463 por xmm3,[48+esp] 464 por xmm4,[64+esp] 465 pshufd xmm0,xmm0,141 466 pshufd xmm1,xmm1,141 467 pshufd xmm2,xmm2,141 468 pshufd xmm3,xmm3,141 469 pshufd xmm4,xmm4,141 470 movdqu [edi],xmm0 471 movdqu [16+edi],xmm1 472 movdqu [32+edi],xmm2 473 movdqu [48+edi],xmm3 474 movdqu [64+edi],xmm4 475 movdqa xmm6,xmm1 476 movdqa xmm5,xmm2 477 pslld xmm6,2 478 pslld xmm5,2 479 paddd xmm6,xmm1 480 paddd xmm5,xmm2 481 movdqu [80+edi],xmm6 482 movdqu [96+edi],xmm5 483 movdqa xmm6,xmm3 484 movdqa xmm5,xmm4 485 pslld xmm6,2 486 pslld xmm5,2 487 paddd xmm6,xmm3 488 paddd xmm5,xmm4 489 movdqu [112+edi],xmm6 490 movdqu [128+edi],xmm5 491 mov esp,ebp 492 lea edi,[edi-48] 493 ret 494align 32 495align 16 496__poly1305_blocks_sse2: 497 push ebp 498 push ebx 499 push esi 500 push edi 501 mov edi,DWORD [20+esp] 502 mov esi,DWORD [24+esp] 503 mov ecx,DWORD [28+esp] 504 mov eax,DWORD [20+edi] 505 and ecx,-16 506 jz NEAR L$007nodata 507 cmp ecx,64 508 jae NEAR L$008enter_sse2 509 test eax,eax 510 jz NEAR L$enter_blocks 511align 16 512L$008enter_sse2: 513 call L$009pic_point 514L$009pic_point: 515 pop ebx 516 lea ebx,[(L$const_sse2-L$009pic_point)+ebx] 517 test eax,eax 518 jnz NEAR L$010base2_26 519 call __poly1305_init_sse2 520 mov eax,DWORD [edi] 521 mov ecx,DWORD [3+edi] 522 mov edx,DWORD [6+edi] 523 mov esi,DWORD [9+edi] 524 mov ebp,DWORD [13+edi] 525 mov DWORD [20+edi],1 526 shr ecx,2 527 and eax,67108863 528 shr edx,4 529 and ecx,67108863 530 shr esi,6 531 and edx,67108863 532 movd xmm0,eax 533 movd xmm1,ecx 534 movd xmm2,edx 535 movd xmm3,esi 536 movd xmm4,ebp 537 mov esi,DWORD [24+esp] 538 mov ecx,DWORD [28+esp] 539 jmp NEAR L$011base2_32 540align 16 541L$010base2_26: 542 movd xmm0,DWORD [edi] 543 movd xmm1,DWORD [4+edi] 544 movd xmm2,DWORD [8+edi] 545 movd xmm3,DWORD [12+edi] 546 movd xmm4,DWORD [16+edi] 547 movdqa xmm7,[64+ebx] 548L$011base2_32: 549 mov eax,DWORD [32+esp] 550 mov ebp,esp 551 sub esp,528 552 and esp,-16 553 lea edi,[48+edi] 554 shl eax,24 555 test ecx,31 556 jz NEAR L$012even 557 movdqu xmm6,[esi] 558 lea esi,[16+esi] 559 movdqa xmm5,xmm6 560 pand xmm6,xmm7 561 paddd xmm0,xmm6 562 movdqa xmm6,xmm5 563 psrlq xmm5,26 564 psrldq xmm6,6 565 pand xmm5,xmm7 566 paddd xmm1,xmm5 567 movdqa xmm5,xmm6 568 psrlq xmm6,4 569 pand xmm6,xmm7 570 paddd xmm2,xmm6 571 movdqa xmm6,xmm5 572 psrlq xmm5,30 573 pand xmm5,xmm7 574 psrldq xmm6,7 575 paddd xmm3,xmm5 576 movd xmm5,eax 577 paddd xmm4,xmm6 578 movd xmm6,DWORD [12+edi] 579 paddd xmm4,xmm5 580 movdqa [esp],xmm0 581 movdqa [16+esp],xmm1 582 movdqa [32+esp],xmm2 583 movdqa [48+esp],xmm3 584 movdqa [64+esp],xmm4 585 pmuludq xmm0,xmm6 586 pmuludq xmm1,xmm6 587 pmuludq xmm2,xmm6 588 movd xmm5,DWORD [28+edi] 589 pmuludq xmm3,xmm6 590 pmuludq xmm4,xmm6 591 movdqa xmm6,xmm5 592 pmuludq xmm5,[48+esp] 593 movdqa xmm7,xmm6 594 pmuludq xmm6,[32+esp] 595 paddq xmm4,xmm5 596 movdqa xmm5,xmm7 597 pmuludq xmm7,[16+esp] 598 paddq xmm3,xmm6 599 movd xmm6,DWORD [92+edi] 600 pmuludq xmm5,[esp] 601 paddq xmm2,xmm7 602 pmuludq xmm6,[64+esp] 603 movd xmm7,DWORD [44+edi] 604 paddq xmm1,xmm5 605 movdqa xmm5,xmm7 606 pmuludq xmm7,[32+esp] 607 paddq xmm0,xmm6 608 movdqa xmm6,xmm5 609 pmuludq xmm5,[16+esp] 610 paddq xmm4,xmm7 611 movd xmm7,DWORD [108+edi] 612 pmuludq xmm6,[esp] 613 paddq xmm3,xmm5 614 movdqa xmm5,xmm7 615 pmuludq xmm7,[64+esp] 616 paddq xmm2,xmm6 617 pmuludq xmm5,[48+esp] 618 movd xmm6,DWORD [60+edi] 619 paddq xmm1,xmm7 620 movdqa xmm7,xmm6 621 pmuludq xmm6,[16+esp] 622 paddq xmm0,xmm5 623 movd xmm5,DWORD [124+edi] 624 pmuludq xmm7,[esp] 625 paddq xmm4,xmm6 626 movdqa xmm6,xmm5 627 pmuludq xmm5,[64+esp] 628 paddq xmm3,xmm7 629 movdqa xmm7,xmm6 630 pmuludq xmm6,[48+esp] 631 paddq xmm2,xmm5 632 pmuludq xmm7,[32+esp] 633 movd xmm5,DWORD [76+edi] 634 paddq xmm1,xmm6 635 movd xmm6,DWORD [140+edi] 636 pmuludq xmm5,[esp] 637 paddq xmm0,xmm7 638 movdqa xmm7,xmm6 639 pmuludq xmm6,[64+esp] 640 paddq xmm4,xmm5 641 movdqa xmm5,xmm7 642 pmuludq xmm7,[16+esp] 643 paddq xmm3,xmm6 644 movdqa xmm6,xmm5 645 pmuludq xmm5,[32+esp] 646 paddq xmm0,xmm7 647 pmuludq xmm6,[48+esp] 648 movdqa xmm7,[64+ebx] 649 paddq xmm1,xmm5 650 paddq xmm2,xmm6 651 movdqa xmm5,xmm3 652 pand xmm3,xmm7 653 psrlq xmm5,26 654 paddq xmm5,xmm4 655 movdqa xmm6,xmm0 656 pand xmm0,xmm7 657 psrlq xmm6,26 658 movdqa xmm4,xmm5 659 paddq xmm6,xmm1 660 psrlq xmm5,26 661 pand xmm4,xmm7 662 movdqa xmm1,xmm6 663 psrlq xmm6,26 664 paddd xmm0,xmm5 665 psllq xmm5,2 666 paddq xmm6,xmm2 667 paddq xmm5,xmm0 668 pand xmm1,xmm7 669 movdqa xmm2,xmm6 670 psrlq xmm6,26 671 pand xmm2,xmm7 672 paddd xmm6,xmm3 673 movdqa xmm0,xmm5 674 psrlq xmm5,26 675 movdqa xmm3,xmm6 676 psrlq xmm6,26 677 pand xmm0,xmm7 678 paddd xmm1,xmm5 679 pand xmm3,xmm7 680 paddd xmm4,xmm6 681 sub ecx,16 682 jz NEAR L$013done 683L$012even: 684 lea edx,[384+esp] 685 lea eax,[esi-32] 686 sub ecx,64 687 movdqu xmm5,[edi] 688 pshufd xmm6,xmm5,68 689 cmovb esi,eax 690 pshufd xmm5,xmm5,238 691 movdqa [edx],xmm6 692 lea eax,[160+esp] 693 movdqu xmm6,[16+edi] 694 movdqa [edx-144],xmm5 695 pshufd xmm5,xmm6,68 696 pshufd xmm6,xmm6,238 697 movdqa [16+edx],xmm5 698 movdqu xmm5,[32+edi] 699 movdqa [edx-128],xmm6 700 pshufd xmm6,xmm5,68 701 pshufd xmm5,xmm5,238 702 movdqa [32+edx],xmm6 703 movdqu xmm6,[48+edi] 704 movdqa [edx-112],xmm5 705 pshufd xmm5,xmm6,68 706 pshufd xmm6,xmm6,238 707 movdqa [48+edx],xmm5 708 movdqu xmm5,[64+edi] 709 movdqa [edx-96],xmm6 710 pshufd xmm6,xmm5,68 711 pshufd xmm5,xmm5,238 712 movdqa [64+edx],xmm6 713 movdqu xmm6,[80+edi] 714 movdqa [edx-80],xmm5 715 pshufd xmm5,xmm6,68 716 pshufd xmm6,xmm6,238 717 movdqa [80+edx],xmm5 718 movdqu xmm5,[96+edi] 719 movdqa [edx-64],xmm6 720 pshufd xmm6,xmm5,68 721 pshufd xmm5,xmm5,238 722 movdqa [96+edx],xmm6 723 movdqu xmm6,[112+edi] 724 movdqa [edx-48],xmm5 725 pshufd xmm5,xmm6,68 726 pshufd xmm6,xmm6,238 727 movdqa [112+edx],xmm5 728 movdqu xmm5,[128+edi] 729 movdqa [edx-32],xmm6 730 pshufd xmm6,xmm5,68 731 pshufd xmm5,xmm5,238 732 movdqa [128+edx],xmm6 733 movdqa [edx-16],xmm5 734 movdqu xmm5,[32+esi] 735 movdqu xmm6,[48+esi] 736 lea esi,[32+esi] 737 movdqa [112+esp],xmm2 738 movdqa [128+esp],xmm3 739 movdqa [144+esp],xmm4 740 movdqa xmm2,xmm5 741 movdqa xmm3,xmm6 742 psrldq xmm2,6 743 psrldq xmm3,6 744 movdqa xmm4,xmm5 745 punpcklqdq xmm2,xmm3 746 punpckhqdq xmm4,xmm6 747 punpcklqdq xmm5,xmm6 748 movdqa xmm3,xmm2 749 psrlq xmm2,4 750 psrlq xmm3,30 751 movdqa xmm6,xmm5 752 psrlq xmm4,40 753 psrlq xmm6,26 754 pand xmm5,xmm7 755 pand xmm6,xmm7 756 pand xmm2,xmm7 757 pand xmm3,xmm7 758 por xmm4,[ebx] 759 movdqa [80+esp],xmm0 760 movdqa [96+esp],xmm1 761 jbe NEAR L$014skip_loop 762 jmp NEAR L$015loop 763align 32 764L$015loop: 765 movdqa xmm7,[edx-144] 766 movdqa [16+eax],xmm6 767 movdqa [32+eax],xmm2 768 movdqa [48+eax],xmm3 769 movdqa [64+eax],xmm4 770 movdqa xmm1,xmm5 771 pmuludq xmm5,xmm7 772 movdqa xmm0,xmm6 773 pmuludq xmm6,xmm7 774 pmuludq xmm2,xmm7 775 pmuludq xmm3,xmm7 776 pmuludq xmm4,xmm7 777 pmuludq xmm0,[edx-16] 778 movdqa xmm7,xmm1 779 pmuludq xmm1,[edx-128] 780 paddq xmm0,xmm5 781 movdqa xmm5,xmm7 782 pmuludq xmm7,[edx-112] 783 paddq xmm1,xmm6 784 movdqa xmm6,xmm5 785 pmuludq xmm5,[edx-96] 786 paddq xmm2,xmm7 787 movdqa xmm7,[16+eax] 788 pmuludq xmm6,[edx-80] 789 paddq xmm3,xmm5 790 movdqa xmm5,xmm7 791 pmuludq xmm7,[edx-128] 792 paddq xmm4,xmm6 793 movdqa xmm6,xmm5 794 pmuludq xmm5,[edx-112] 795 paddq xmm2,xmm7 796 movdqa xmm7,[32+eax] 797 pmuludq xmm6,[edx-96] 798 paddq xmm3,xmm5 799 movdqa xmm5,xmm7 800 pmuludq xmm7,[edx-32] 801 paddq xmm4,xmm6 802 movdqa xmm6,xmm5 803 pmuludq xmm5,[edx-16] 804 paddq xmm0,xmm7 805 movdqa xmm7,xmm6 806 pmuludq xmm6,[edx-128] 807 paddq xmm1,xmm5 808 movdqa xmm5,[48+eax] 809 pmuludq xmm7,[edx-112] 810 paddq xmm3,xmm6 811 movdqa xmm6,xmm5 812 pmuludq xmm5,[edx-48] 813 paddq xmm4,xmm7 814 movdqa xmm7,xmm6 815 pmuludq xmm6,[edx-32] 816 paddq xmm0,xmm5 817 movdqa xmm5,xmm7 818 pmuludq xmm7,[edx-16] 819 paddq xmm1,xmm6 820 movdqa xmm6,[64+eax] 821 pmuludq xmm5,[edx-128] 822 paddq xmm2,xmm7 823 movdqa xmm7,xmm6 824 pmuludq xmm6,[edx-16] 825 paddq xmm4,xmm5 826 movdqa xmm5,xmm7 827 pmuludq xmm7,[edx-64] 828 paddq xmm3,xmm6 829 movdqa xmm6,xmm5 830 pmuludq xmm5,[edx-48] 831 paddq xmm0,xmm7 832 movdqa xmm7,[64+ebx] 833 pmuludq xmm6,[edx-32] 834 paddq xmm1,xmm5 835 paddq xmm2,xmm6 836 movdqu xmm5,[esi-32] 837 movdqu xmm6,[esi-16] 838 lea esi,[32+esi] 839 movdqa [32+esp],xmm2 840 movdqa [48+esp],xmm3 841 movdqa [64+esp],xmm4 842 movdqa xmm2,xmm5 843 movdqa xmm3,xmm6 844 psrldq xmm2,6 845 psrldq xmm3,6 846 movdqa xmm4,xmm5 847 punpcklqdq xmm2,xmm3 848 punpckhqdq xmm4,xmm6 849 punpcklqdq xmm5,xmm6 850 movdqa xmm3,xmm2 851 psrlq xmm2,4 852 psrlq xmm3,30 853 movdqa xmm6,xmm5 854 psrlq xmm4,40 855 psrlq xmm6,26 856 pand xmm5,xmm7 857 pand xmm6,xmm7 858 pand xmm2,xmm7 859 pand xmm3,xmm7 860 por xmm4,[ebx] 861 lea eax,[esi-32] 862 sub ecx,64 863 paddd xmm5,[80+esp] 864 paddd xmm6,[96+esp] 865 paddd xmm2,[112+esp] 866 paddd xmm3,[128+esp] 867 paddd xmm4,[144+esp] 868 cmovb esi,eax 869 lea eax,[160+esp] 870 movdqa xmm7,[edx] 871 movdqa [16+esp],xmm1 872 movdqa [16+eax],xmm6 873 movdqa [32+eax],xmm2 874 movdqa [48+eax],xmm3 875 movdqa [64+eax],xmm4 876 movdqa xmm1,xmm5 877 pmuludq xmm5,xmm7 878 paddq xmm5,xmm0 879 movdqa xmm0,xmm6 880 pmuludq xmm6,xmm7 881 pmuludq xmm2,xmm7 882 pmuludq xmm3,xmm7 883 pmuludq xmm4,xmm7 884 paddq xmm6,[16+esp] 885 paddq xmm2,[32+esp] 886 paddq xmm3,[48+esp] 887 paddq xmm4,[64+esp] 888 pmuludq xmm0,[128+edx] 889 movdqa xmm7,xmm1 890 pmuludq xmm1,[16+edx] 891 paddq xmm0,xmm5 892 movdqa xmm5,xmm7 893 pmuludq xmm7,[32+edx] 894 paddq xmm1,xmm6 895 movdqa xmm6,xmm5 896 pmuludq xmm5,[48+edx] 897 paddq xmm2,xmm7 898 movdqa xmm7,[16+eax] 899 pmuludq xmm6,[64+edx] 900 paddq xmm3,xmm5 901 movdqa xmm5,xmm7 902 pmuludq xmm7,[16+edx] 903 paddq xmm4,xmm6 904 movdqa xmm6,xmm5 905 pmuludq xmm5,[32+edx] 906 paddq xmm2,xmm7 907 movdqa xmm7,[32+eax] 908 pmuludq xmm6,[48+edx] 909 paddq xmm3,xmm5 910 movdqa xmm5,xmm7 911 pmuludq xmm7,[112+edx] 912 paddq xmm4,xmm6 913 movdqa xmm6,xmm5 914 pmuludq xmm5,[128+edx] 915 paddq xmm0,xmm7 916 movdqa xmm7,xmm6 917 pmuludq xmm6,[16+edx] 918 paddq xmm1,xmm5 919 movdqa xmm5,[48+eax] 920 pmuludq xmm7,[32+edx] 921 paddq xmm3,xmm6 922 movdqa xmm6,xmm5 923 pmuludq xmm5,[96+edx] 924 paddq xmm4,xmm7 925 movdqa xmm7,xmm6 926 pmuludq xmm6,[112+edx] 927 paddq xmm0,xmm5 928 movdqa xmm5,xmm7 929 pmuludq xmm7,[128+edx] 930 paddq xmm1,xmm6 931 movdqa xmm6,[64+eax] 932 pmuludq xmm5,[16+edx] 933 paddq xmm2,xmm7 934 movdqa xmm7,xmm6 935 pmuludq xmm6,[128+edx] 936 paddq xmm4,xmm5 937 movdqa xmm5,xmm7 938 pmuludq xmm7,[80+edx] 939 paddq xmm3,xmm6 940 movdqa xmm6,xmm5 941 pmuludq xmm5,[96+edx] 942 paddq xmm0,xmm7 943 movdqa xmm7,[64+ebx] 944 pmuludq xmm6,[112+edx] 945 paddq xmm1,xmm5 946 paddq xmm2,xmm6 947 movdqa xmm5,xmm3 948 pand xmm3,xmm7 949 psrlq xmm5,26 950 paddq xmm5,xmm4 951 movdqa xmm6,xmm0 952 pand xmm0,xmm7 953 psrlq xmm6,26 954 movdqa xmm4,xmm5 955 paddq xmm6,xmm1 956 psrlq xmm5,26 957 pand xmm4,xmm7 958 movdqa xmm1,xmm6 959 psrlq xmm6,26 960 paddd xmm0,xmm5 961 psllq xmm5,2 962 paddq xmm6,xmm2 963 paddq xmm5,xmm0 964 pand xmm1,xmm7 965 movdqa xmm2,xmm6 966 psrlq xmm6,26 967 pand xmm2,xmm7 968 paddd xmm6,xmm3 969 movdqa xmm0,xmm5 970 psrlq xmm5,26 971 movdqa xmm3,xmm6 972 psrlq xmm6,26 973 pand xmm0,xmm7 974 paddd xmm1,xmm5 975 pand xmm3,xmm7 976 paddd xmm4,xmm6 977 movdqu xmm5,[32+esi] 978 movdqu xmm6,[48+esi] 979 lea esi,[32+esi] 980 movdqa [112+esp],xmm2 981 movdqa [128+esp],xmm3 982 movdqa [144+esp],xmm4 983 movdqa xmm2,xmm5 984 movdqa xmm3,xmm6 985 psrldq xmm2,6 986 psrldq xmm3,6 987 movdqa xmm4,xmm5 988 punpcklqdq xmm2,xmm3 989 punpckhqdq xmm4,xmm6 990 punpcklqdq xmm5,xmm6 991 movdqa xmm3,xmm2 992 psrlq xmm2,4 993 psrlq xmm3,30 994 movdqa xmm6,xmm5 995 psrlq xmm4,40 996 psrlq xmm6,26 997 pand xmm5,xmm7 998 pand xmm6,xmm7 999 pand xmm2,xmm7 1000 pand xmm3,xmm7 1001 por xmm4,[ebx] 1002 movdqa [80+esp],xmm0 1003 movdqa [96+esp],xmm1 1004 ja NEAR L$015loop 1005L$014skip_loop: 1006 pshufd xmm7,[edx-144],16 1007 add ecx,32 1008 jnz NEAR L$016long_tail 1009 paddd xmm5,xmm0 1010 paddd xmm6,xmm1 1011 paddd xmm2,[112+esp] 1012 paddd xmm3,[128+esp] 1013 paddd xmm4,[144+esp] 1014L$016long_tail: 1015 movdqa [eax],xmm5 1016 movdqa [16+eax],xmm6 1017 movdqa [32+eax],xmm2 1018 movdqa [48+eax],xmm3 1019 movdqa [64+eax],xmm4 1020 pmuludq xmm5,xmm7 1021 pmuludq xmm6,xmm7 1022 pmuludq xmm2,xmm7 1023 movdqa xmm0,xmm5 1024 pshufd xmm5,[edx-128],16 1025 pmuludq xmm3,xmm7 1026 movdqa xmm1,xmm6 1027 pmuludq xmm4,xmm7 1028 movdqa xmm6,xmm5 1029 pmuludq xmm5,[48+eax] 1030 movdqa xmm7,xmm6 1031 pmuludq xmm6,[32+eax] 1032 paddq xmm4,xmm5 1033 movdqa xmm5,xmm7 1034 pmuludq xmm7,[16+eax] 1035 paddq xmm3,xmm6 1036 pshufd xmm6,[edx-64],16 1037 pmuludq xmm5,[eax] 1038 paddq xmm2,xmm7 1039 pmuludq xmm6,[64+eax] 1040 pshufd xmm7,[edx-112],16 1041 paddq xmm1,xmm5 1042 movdqa xmm5,xmm7 1043 pmuludq xmm7,[32+eax] 1044 paddq xmm0,xmm6 1045 movdqa xmm6,xmm5 1046 pmuludq xmm5,[16+eax] 1047 paddq xmm4,xmm7 1048 pshufd xmm7,[edx-48],16 1049 pmuludq xmm6,[eax] 1050 paddq xmm3,xmm5 1051 movdqa xmm5,xmm7 1052 pmuludq xmm7,[64+eax] 1053 paddq xmm2,xmm6 1054 pmuludq xmm5,[48+eax] 1055 pshufd xmm6,[edx-96],16 1056 paddq xmm1,xmm7 1057 movdqa xmm7,xmm6 1058 pmuludq xmm6,[16+eax] 1059 paddq xmm0,xmm5 1060 pshufd xmm5,[edx-32],16 1061 pmuludq xmm7,[eax] 1062 paddq xmm4,xmm6 1063 movdqa xmm6,xmm5 1064 pmuludq xmm5,[64+eax] 1065 paddq xmm3,xmm7 1066 movdqa xmm7,xmm6 1067 pmuludq xmm6,[48+eax] 1068 paddq xmm2,xmm5 1069 pmuludq xmm7,[32+eax] 1070 pshufd xmm5,[edx-80],16 1071 paddq xmm1,xmm6 1072 pshufd xmm6,[edx-16],16 1073 pmuludq xmm5,[eax] 1074 paddq xmm0,xmm7 1075 movdqa xmm7,xmm6 1076 pmuludq xmm6,[64+eax] 1077 paddq xmm4,xmm5 1078 movdqa xmm5,xmm7 1079 pmuludq xmm7,[16+eax] 1080 paddq xmm3,xmm6 1081 movdqa xmm6,xmm5 1082 pmuludq xmm5,[32+eax] 1083 paddq xmm0,xmm7 1084 pmuludq xmm6,[48+eax] 1085 movdqa xmm7,[64+ebx] 1086 paddq xmm1,xmm5 1087 paddq xmm2,xmm6 1088 jz NEAR L$017short_tail 1089 movdqu xmm5,[esi-32] 1090 movdqu xmm6,[esi-16] 1091 lea esi,[32+esi] 1092 movdqa [32+esp],xmm2 1093 movdqa [48+esp],xmm3 1094 movdqa [64+esp],xmm4 1095 movdqa xmm2,xmm5 1096 movdqa xmm3,xmm6 1097 psrldq xmm2,6 1098 psrldq xmm3,6 1099 movdqa xmm4,xmm5 1100 punpcklqdq xmm2,xmm3 1101 punpckhqdq xmm4,xmm6 1102 punpcklqdq xmm5,xmm6 1103 movdqa xmm3,xmm2 1104 psrlq xmm2,4 1105 psrlq xmm3,30 1106 movdqa xmm6,xmm5 1107 psrlq xmm4,40 1108 psrlq xmm6,26 1109 pand xmm5,xmm7 1110 pand xmm6,xmm7 1111 pand xmm2,xmm7 1112 pand xmm3,xmm7 1113 por xmm4,[ebx] 1114 pshufd xmm7,[edx],16 1115 paddd xmm5,[80+esp] 1116 paddd xmm6,[96+esp] 1117 paddd xmm2,[112+esp] 1118 paddd xmm3,[128+esp] 1119 paddd xmm4,[144+esp] 1120 movdqa [esp],xmm5 1121 pmuludq xmm5,xmm7 1122 movdqa [16+esp],xmm6 1123 pmuludq xmm6,xmm7 1124 paddq xmm0,xmm5 1125 movdqa xmm5,xmm2 1126 pmuludq xmm2,xmm7 1127 paddq xmm1,xmm6 1128 movdqa xmm6,xmm3 1129 pmuludq xmm3,xmm7 1130 paddq xmm2,[32+esp] 1131 movdqa [32+esp],xmm5 1132 pshufd xmm5,[16+edx],16 1133 paddq xmm3,[48+esp] 1134 movdqa [48+esp],xmm6 1135 movdqa xmm6,xmm4 1136 pmuludq xmm4,xmm7 1137 paddq xmm4,[64+esp] 1138 movdqa [64+esp],xmm6 1139 movdqa xmm6,xmm5 1140 pmuludq xmm5,[48+esp] 1141 movdqa xmm7,xmm6 1142 pmuludq xmm6,[32+esp] 1143 paddq xmm4,xmm5 1144 movdqa xmm5,xmm7 1145 pmuludq xmm7,[16+esp] 1146 paddq xmm3,xmm6 1147 pshufd xmm6,[80+edx],16 1148 pmuludq xmm5,[esp] 1149 paddq xmm2,xmm7 1150 pmuludq xmm6,[64+esp] 1151 pshufd xmm7,[32+edx],16 1152 paddq xmm1,xmm5 1153 movdqa xmm5,xmm7 1154 pmuludq xmm7,[32+esp] 1155 paddq xmm0,xmm6 1156 movdqa xmm6,xmm5 1157 pmuludq xmm5,[16+esp] 1158 paddq xmm4,xmm7 1159 pshufd xmm7,[96+edx],16 1160 pmuludq xmm6,[esp] 1161 paddq xmm3,xmm5 1162 movdqa xmm5,xmm7 1163 pmuludq xmm7,[64+esp] 1164 paddq xmm2,xmm6 1165 pmuludq xmm5,[48+esp] 1166 pshufd xmm6,[48+edx],16 1167 paddq xmm1,xmm7 1168 movdqa xmm7,xmm6 1169 pmuludq xmm6,[16+esp] 1170 paddq xmm0,xmm5 1171 pshufd xmm5,[112+edx],16 1172 pmuludq xmm7,[esp] 1173 paddq xmm4,xmm6 1174 movdqa xmm6,xmm5 1175 pmuludq xmm5,[64+esp] 1176 paddq xmm3,xmm7 1177 movdqa xmm7,xmm6 1178 pmuludq xmm6,[48+esp] 1179 paddq xmm2,xmm5 1180 pmuludq xmm7,[32+esp] 1181 pshufd xmm5,[64+edx],16 1182 paddq xmm1,xmm6 1183 pshufd xmm6,[128+edx],16 1184 pmuludq xmm5,[esp] 1185 paddq xmm0,xmm7 1186 movdqa xmm7,xmm6 1187 pmuludq xmm6,[64+esp] 1188 paddq xmm4,xmm5 1189 movdqa xmm5,xmm7 1190 pmuludq xmm7,[16+esp] 1191 paddq xmm3,xmm6 1192 movdqa xmm6,xmm5 1193 pmuludq xmm5,[32+esp] 1194 paddq xmm0,xmm7 1195 pmuludq xmm6,[48+esp] 1196 movdqa xmm7,[64+ebx] 1197 paddq xmm1,xmm5 1198 paddq xmm2,xmm6 1199L$017short_tail: 1200 pshufd xmm6,xmm4,78 1201 pshufd xmm5,xmm3,78 1202 paddq xmm4,xmm6 1203 paddq xmm3,xmm5 1204 pshufd xmm6,xmm0,78 1205 pshufd xmm5,xmm1,78 1206 paddq xmm0,xmm6 1207 paddq xmm1,xmm5 1208 pshufd xmm6,xmm2,78 1209 movdqa xmm5,xmm3 1210 pand xmm3,xmm7 1211 psrlq xmm5,26 1212 paddq xmm2,xmm6 1213 paddq xmm5,xmm4 1214 movdqa xmm6,xmm0 1215 pand xmm0,xmm7 1216 psrlq xmm6,26 1217 movdqa xmm4,xmm5 1218 paddq xmm6,xmm1 1219 psrlq xmm5,26 1220 pand xmm4,xmm7 1221 movdqa xmm1,xmm6 1222 psrlq xmm6,26 1223 paddd xmm0,xmm5 1224 psllq xmm5,2 1225 paddq xmm6,xmm2 1226 paddq xmm5,xmm0 1227 pand xmm1,xmm7 1228 movdqa xmm2,xmm6 1229 psrlq xmm6,26 1230 pand xmm2,xmm7 1231 paddd xmm6,xmm3 1232 movdqa xmm0,xmm5 1233 psrlq xmm5,26 1234 movdqa xmm3,xmm6 1235 psrlq xmm6,26 1236 pand xmm0,xmm7 1237 paddd xmm1,xmm5 1238 pand xmm3,xmm7 1239 paddd xmm4,xmm6 1240L$013done: 1241 movd DWORD [edi-48],xmm0 1242 movd DWORD [edi-44],xmm1 1243 movd DWORD [edi-40],xmm2 1244 movd DWORD [edi-36],xmm3 1245 movd DWORD [edi-32],xmm4 1246 mov esp,ebp 1247L$007nodata: 1248 pop edi 1249 pop esi 1250 pop ebx 1251 pop ebp 1252 ret 1253align 32 1254align 16 1255__poly1305_emit_sse2: 1256 push ebp 1257 push ebx 1258 push esi 1259 push edi 1260 mov ebp,DWORD [20+esp] 1261 cmp DWORD [20+ebp],0 1262 je NEAR L$enter_emit 1263 mov eax,DWORD [ebp] 1264 mov edi,DWORD [4+ebp] 1265 mov ecx,DWORD [8+ebp] 1266 mov edx,DWORD [12+ebp] 1267 mov esi,DWORD [16+ebp] 1268 mov ebx,edi 1269 shl edi,26 1270 shr ebx,6 1271 add eax,edi 1272 mov edi,ecx 1273 adc ebx,0 1274 shl edi,20 1275 shr ecx,12 1276 add ebx,edi 1277 mov edi,edx 1278 adc ecx,0 1279 shl edi,14 1280 shr edx,18 1281 add ecx,edi 1282 mov edi,esi 1283 adc edx,0 1284 shl edi,8 1285 shr esi,24 1286 add edx,edi 1287 adc esi,0 1288 mov edi,esi 1289 and esi,3 1290 shr edi,2 1291 lea ebp,[edi*4+edi] 1292 mov edi,DWORD [24+esp] 1293 add eax,ebp 1294 mov ebp,DWORD [28+esp] 1295 adc ebx,0 1296 adc ecx,0 1297 adc edx,0 1298 adc esi,0 1299 movd xmm0,eax 1300 add eax,5 1301 movd xmm1,ebx 1302 adc ebx,0 1303 movd xmm2,ecx 1304 adc ecx,0 1305 movd xmm3,edx 1306 adc edx,0 1307 adc esi,0 1308 shr esi,2 1309 neg esi 1310 and eax,esi 1311 and ebx,esi 1312 and ecx,esi 1313 and edx,esi 1314 mov DWORD [edi],eax 1315 movd eax,xmm0 1316 mov DWORD [4+edi],ebx 1317 movd ebx,xmm1 1318 mov DWORD [8+edi],ecx 1319 movd ecx,xmm2 1320 mov DWORD [12+edi],edx 1321 movd edx,xmm3 1322 not esi 1323 and eax,esi 1324 and ebx,esi 1325 or eax,DWORD [edi] 1326 and ecx,esi 1327 or ebx,DWORD [4+edi] 1328 and edx,esi 1329 or ecx,DWORD [8+edi] 1330 or edx,DWORD [12+edi] 1331 add eax,DWORD [ebp] 1332 adc ebx,DWORD [4+ebp] 1333 mov DWORD [edi],eax 1334 adc ecx,DWORD [8+ebp] 1335 mov DWORD [4+edi],ebx 1336 adc edx,DWORD [12+ebp] 1337 mov DWORD [8+edi],ecx 1338 mov DWORD [12+edi],edx 1339 pop edi 1340 pop esi 1341 pop ebx 1342 pop ebp 1343 ret 1344align 32 1345align 16 1346__poly1305_init_avx2: 1347 vmovdqu xmm4,[24+edi] 1348 lea edi,[48+edi] 1349 mov ebp,esp 1350 sub esp,224 1351 and esp,-16 1352 vmovdqa xmm7,[64+ebx] 1353 vpand xmm0,xmm4,xmm7 1354 vpsrlq xmm1,xmm4,26 1355 vpsrldq xmm3,xmm4,6 1356 vpand xmm1,xmm1,xmm7 1357 vpsrlq xmm2,xmm3,4 1358 vpsrlq xmm3,xmm3,30 1359 vpand xmm2,xmm2,xmm7 1360 vpand xmm3,xmm3,xmm7 1361 vpsrldq xmm4,xmm4,13 1362 lea edx,[144+esp] 1363 mov ecx,2 1364L$018square: 1365 vmovdqa [esp],xmm0 1366 vmovdqa [16+esp],xmm1 1367 vmovdqa [32+esp],xmm2 1368 vmovdqa [48+esp],xmm3 1369 vmovdqa [64+esp],xmm4 1370 vpslld xmm6,xmm1,2 1371 vpslld xmm5,xmm2,2 1372 vpaddd xmm6,xmm6,xmm1 1373 vpaddd xmm5,xmm5,xmm2 1374 vmovdqa [80+esp],xmm6 1375 vmovdqa [96+esp],xmm5 1376 vpslld xmm6,xmm3,2 1377 vpslld xmm5,xmm4,2 1378 vpaddd xmm6,xmm6,xmm3 1379 vpaddd xmm5,xmm5,xmm4 1380 vmovdqa [112+esp],xmm6 1381 vmovdqa [128+esp],xmm5 1382 vpshufd xmm5,xmm0,68 1383 vmovdqa xmm6,xmm1 1384 vpshufd xmm1,xmm1,68 1385 vpshufd xmm2,xmm2,68 1386 vpshufd xmm3,xmm3,68 1387 vpshufd xmm4,xmm4,68 1388 vmovdqa [edx],xmm5 1389 vmovdqa [16+edx],xmm1 1390 vmovdqa [32+edx],xmm2 1391 vmovdqa [48+edx],xmm3 1392 vmovdqa [64+edx],xmm4 1393 vpmuludq xmm4,xmm4,xmm0 1394 vpmuludq xmm3,xmm3,xmm0 1395 vpmuludq xmm2,xmm2,xmm0 1396 vpmuludq xmm1,xmm1,xmm0 1397 vpmuludq xmm0,xmm5,xmm0 1398 vpmuludq xmm5,xmm6,[48+edx] 1399 vpaddq xmm4,xmm4,xmm5 1400 vpmuludq xmm7,xmm6,[32+edx] 1401 vpaddq xmm3,xmm3,xmm7 1402 vpmuludq xmm5,xmm6,[16+edx] 1403 vpaddq xmm2,xmm2,xmm5 1404 vmovdqa xmm7,[80+esp] 1405 vpmuludq xmm6,xmm6,[edx] 1406 vpaddq xmm1,xmm1,xmm6 1407 vmovdqa xmm5,[32+esp] 1408 vpmuludq xmm7,xmm7,[64+edx] 1409 vpaddq xmm0,xmm0,xmm7 1410 vpmuludq xmm6,xmm5,[32+edx] 1411 vpaddq xmm4,xmm4,xmm6 1412 vpmuludq xmm7,xmm5,[16+edx] 1413 vpaddq xmm3,xmm3,xmm7 1414 vmovdqa xmm6,[96+esp] 1415 vpmuludq xmm5,xmm5,[edx] 1416 vpaddq xmm2,xmm2,xmm5 1417 vpmuludq xmm7,xmm6,[64+edx] 1418 vpaddq xmm1,xmm1,xmm7 1419 vmovdqa xmm5,[48+esp] 1420 vpmuludq xmm6,xmm6,[48+edx] 1421 vpaddq xmm0,xmm0,xmm6 1422 vpmuludq xmm7,xmm5,[16+edx] 1423 vpaddq xmm4,xmm4,xmm7 1424 vmovdqa xmm6,[112+esp] 1425 vpmuludq xmm5,xmm5,[edx] 1426 vpaddq xmm3,xmm3,xmm5 1427 vpmuludq xmm7,xmm6,[64+edx] 1428 vpaddq xmm2,xmm2,xmm7 1429 vpmuludq xmm5,xmm6,[48+edx] 1430 vpaddq xmm1,xmm1,xmm5 1431 vmovdqa xmm7,[64+esp] 1432 vpmuludq xmm6,xmm6,[32+edx] 1433 vpaddq xmm0,xmm0,xmm6 1434 vmovdqa xmm5,[128+esp] 1435 vpmuludq xmm7,xmm7,[edx] 1436 vpaddq xmm4,xmm4,xmm7 1437 vpmuludq xmm6,xmm5,[64+edx] 1438 vpaddq xmm3,xmm3,xmm6 1439 vpmuludq xmm7,xmm5,[16+edx] 1440 vpaddq xmm0,xmm0,xmm7 1441 vpmuludq xmm6,xmm5,[32+edx] 1442 vpaddq xmm1,xmm1,xmm6 1443 vmovdqa xmm7,[64+ebx] 1444 vpmuludq xmm5,xmm5,[48+edx] 1445 vpaddq xmm2,xmm2,xmm5 1446 vpsrlq xmm5,xmm3,26 1447 vpand xmm3,xmm3,xmm7 1448 vpsrlq xmm6,xmm0,26 1449 vpand xmm0,xmm0,xmm7 1450 vpaddq xmm4,xmm4,xmm5 1451 vpaddq xmm1,xmm1,xmm6 1452 vpsrlq xmm5,xmm4,26 1453 vpand xmm4,xmm4,xmm7 1454 vpsrlq xmm6,xmm1,26 1455 vpand xmm1,xmm1,xmm7 1456 vpaddq xmm2,xmm2,xmm6 1457 vpaddd xmm0,xmm0,xmm5 1458 vpsllq xmm5,xmm5,2 1459 vpsrlq xmm6,xmm2,26 1460 vpand xmm2,xmm2,xmm7 1461 vpaddd xmm0,xmm0,xmm5 1462 vpaddd xmm3,xmm3,xmm6 1463 vpsrlq xmm6,xmm3,26 1464 vpsrlq xmm5,xmm0,26 1465 vpand xmm0,xmm0,xmm7 1466 vpand xmm3,xmm3,xmm7 1467 vpaddd xmm1,xmm1,xmm5 1468 vpaddd xmm4,xmm4,xmm6 1469 dec ecx 1470 jz NEAR L$019square_break 1471 vpunpcklqdq xmm0,xmm0,[esp] 1472 vpunpcklqdq xmm1,xmm1,[16+esp] 1473 vpunpcklqdq xmm2,xmm2,[32+esp] 1474 vpunpcklqdq xmm3,xmm3,[48+esp] 1475 vpunpcklqdq xmm4,xmm4,[64+esp] 1476 jmp NEAR L$018square 1477L$019square_break: 1478 vpsllq xmm0,xmm0,32 1479 vpsllq xmm1,xmm1,32 1480 vpsllq xmm2,xmm2,32 1481 vpsllq xmm3,xmm3,32 1482 vpsllq xmm4,xmm4,32 1483 vpor xmm0,xmm0,[esp] 1484 vpor xmm1,xmm1,[16+esp] 1485 vpor xmm2,xmm2,[32+esp] 1486 vpor xmm3,xmm3,[48+esp] 1487 vpor xmm4,xmm4,[64+esp] 1488 vpshufd xmm0,xmm0,141 1489 vpshufd xmm1,xmm1,141 1490 vpshufd xmm2,xmm2,141 1491 vpshufd xmm3,xmm3,141 1492 vpshufd xmm4,xmm4,141 1493 vmovdqu [edi],xmm0 1494 vmovdqu [16+edi],xmm1 1495 vmovdqu [32+edi],xmm2 1496 vmovdqu [48+edi],xmm3 1497 vmovdqu [64+edi],xmm4 1498 vpslld xmm6,xmm1,2 1499 vpslld xmm5,xmm2,2 1500 vpaddd xmm6,xmm6,xmm1 1501 vpaddd xmm5,xmm5,xmm2 1502 vmovdqu [80+edi],xmm6 1503 vmovdqu [96+edi],xmm5 1504 vpslld xmm6,xmm3,2 1505 vpslld xmm5,xmm4,2 1506 vpaddd xmm6,xmm6,xmm3 1507 vpaddd xmm5,xmm5,xmm4 1508 vmovdqu [112+edi],xmm6 1509 vmovdqu [128+edi],xmm5 1510 mov esp,ebp 1511 lea edi,[edi-48] 1512 ret 1513align 32 1514align 16 1515__poly1305_blocks_avx2: 1516 push ebp 1517 push ebx 1518 push esi 1519 push edi 1520 mov edi,DWORD [20+esp] 1521 mov esi,DWORD [24+esp] 1522 mov ecx,DWORD [28+esp] 1523 mov eax,DWORD [20+edi] 1524 and ecx,-16 1525 jz NEAR L$020nodata 1526 cmp ecx,64 1527 jae NEAR L$021enter_avx2 1528 test eax,eax 1529 jz NEAR L$enter_blocks 1530L$021enter_avx2: 1531 vzeroupper 1532 call L$022pic_point 1533L$022pic_point: 1534 pop ebx 1535 lea ebx,[(L$const_sse2-L$022pic_point)+ebx] 1536 test eax,eax 1537 jnz NEAR L$023base2_26 1538 call __poly1305_init_avx2 1539 mov eax,DWORD [edi] 1540 mov ecx,DWORD [3+edi] 1541 mov edx,DWORD [6+edi] 1542 mov esi,DWORD [9+edi] 1543 mov ebp,DWORD [13+edi] 1544 shr ecx,2 1545 and eax,67108863 1546 shr edx,4 1547 and ecx,67108863 1548 shr esi,6 1549 and edx,67108863 1550 mov DWORD [edi],eax 1551 mov DWORD [4+edi],ecx 1552 mov DWORD [8+edi],edx 1553 mov DWORD [12+edi],esi 1554 mov DWORD [16+edi],ebp 1555 mov DWORD [20+edi],1 1556 mov esi,DWORD [24+esp] 1557 mov ecx,DWORD [28+esp] 1558L$023base2_26: 1559 mov eax,DWORD [32+esp] 1560 mov ebp,esp 1561 sub esp,448 1562 and esp,-512 1563 vmovdqu xmm0,[48+edi] 1564 lea edx,[288+esp] 1565 vmovdqu xmm1,[64+edi] 1566 vmovdqu xmm2,[80+edi] 1567 vmovdqu xmm3,[96+edi] 1568 vmovdqu xmm4,[112+edi] 1569 lea edi,[48+edi] 1570 vpermq ymm0,ymm0,64 1571 vpermq ymm1,ymm1,64 1572 vpermq ymm2,ymm2,64 1573 vpermq ymm3,ymm3,64 1574 vpermq ymm4,ymm4,64 1575 vpshufd ymm0,ymm0,200 1576 vpshufd ymm1,ymm1,200 1577 vpshufd ymm2,ymm2,200 1578 vpshufd ymm3,ymm3,200 1579 vpshufd ymm4,ymm4,200 1580 vmovdqa [edx-128],ymm0 1581 vmovdqu xmm0,[80+edi] 1582 vmovdqa [edx-96],ymm1 1583 vmovdqu xmm1,[96+edi] 1584 vmovdqa [edx-64],ymm2 1585 vmovdqu xmm2,[112+edi] 1586 vmovdqa [edx-32],ymm3 1587 vmovdqu xmm3,[128+edi] 1588 vmovdqa [edx],ymm4 1589 vpermq ymm0,ymm0,64 1590 vpermq ymm1,ymm1,64 1591 vpermq ymm2,ymm2,64 1592 vpermq ymm3,ymm3,64 1593 vpshufd ymm0,ymm0,200 1594 vpshufd ymm1,ymm1,200 1595 vpshufd ymm2,ymm2,200 1596 vpshufd ymm3,ymm3,200 1597 vmovdqa [32+edx],ymm0 1598 vmovd xmm0,DWORD [edi-48] 1599 vmovdqa [64+edx],ymm1 1600 vmovd xmm1,DWORD [edi-44] 1601 vmovdqa [96+edx],ymm2 1602 vmovd xmm2,DWORD [edi-40] 1603 vmovdqa [128+edx],ymm3 1604 vmovd xmm3,DWORD [edi-36] 1605 vmovd xmm4,DWORD [edi-32] 1606 vmovdqa ymm7,[64+ebx] 1607 neg eax 1608 test ecx,63 1609 jz NEAR L$024even 1610 mov edx,ecx 1611 and ecx,-64 1612 and edx,63 1613 vmovdqu xmm5,[esi] 1614 cmp edx,32 1615 jb NEAR L$025one 1616 vmovdqu xmm6,[16+esi] 1617 je NEAR L$026two 1618 vinserti128 ymm5,ymm5,[32+esi],1 1619 lea esi,[48+esi] 1620 lea ebx,[8+ebx] 1621 lea edx,[296+esp] 1622 jmp NEAR L$027tail 1623L$026two: 1624 lea esi,[32+esi] 1625 lea ebx,[16+ebx] 1626 lea edx,[304+esp] 1627 jmp NEAR L$027tail 1628L$025one: 1629 lea esi,[16+esi] 1630 vpxor ymm6,ymm6,ymm6 1631 lea ebx,[32+eax*8+ebx] 1632 lea edx,[312+esp] 1633 jmp NEAR L$027tail 1634align 32 1635L$024even: 1636 vmovdqu xmm5,[esi] 1637 vmovdqu xmm6,[16+esi] 1638 vinserti128 ymm5,ymm5,[32+esi],1 1639 vinserti128 ymm6,ymm6,[48+esi],1 1640 lea esi,[64+esi] 1641 sub ecx,64 1642 jz NEAR L$027tail 1643L$028loop: 1644 vmovdqa [64+esp],ymm2 1645 vpsrldq ymm2,ymm5,6 1646 vmovdqa [esp],ymm0 1647 vpsrldq ymm0,ymm6,6 1648 vmovdqa [32+esp],ymm1 1649 vpunpckhqdq ymm1,ymm5,ymm6 1650 vpunpcklqdq ymm5,ymm5,ymm6 1651 vpunpcklqdq ymm2,ymm2,ymm0 1652 vpsrlq ymm0,ymm2,30 1653 vpsrlq ymm2,ymm2,4 1654 vpsrlq ymm6,ymm5,26 1655 vpsrlq ymm1,ymm1,40 1656 vpand ymm2,ymm2,ymm7 1657 vpand ymm5,ymm5,ymm7 1658 vpand ymm6,ymm6,ymm7 1659 vpand ymm0,ymm0,ymm7 1660 vpor ymm1,ymm1,[ebx] 1661 vpaddq ymm2,ymm2,[64+esp] 1662 vpaddq ymm5,ymm5,[esp] 1663 vpaddq ymm6,ymm6,[32+esp] 1664 vpaddq ymm0,ymm0,ymm3 1665 vpaddq ymm1,ymm1,ymm4 1666 vpmuludq ymm3,ymm2,[edx-96] 1667 vmovdqa [32+esp],ymm6 1668 vpmuludq ymm4,ymm2,[edx-64] 1669 vmovdqa [96+esp],ymm0 1670 vpmuludq ymm0,ymm2,[96+edx] 1671 vmovdqa [128+esp],ymm1 1672 vpmuludq ymm1,ymm2,[128+edx] 1673 vpmuludq ymm2,ymm2,[edx-128] 1674 vpmuludq ymm7,ymm5,[edx-32] 1675 vpaddq ymm3,ymm3,ymm7 1676 vpmuludq ymm6,ymm5,[edx] 1677 vpaddq ymm4,ymm4,ymm6 1678 vpmuludq ymm7,ymm5,[edx-128] 1679 vpaddq ymm0,ymm0,ymm7 1680 vmovdqa ymm7,[32+esp] 1681 vpmuludq ymm6,ymm5,[edx-96] 1682 vpaddq ymm1,ymm1,ymm6 1683 vpmuludq ymm5,ymm5,[edx-64] 1684 vpaddq ymm2,ymm2,ymm5 1685 vpmuludq ymm6,ymm7,[edx-64] 1686 vpaddq ymm3,ymm3,ymm6 1687 vpmuludq ymm5,ymm7,[edx-32] 1688 vpaddq ymm4,ymm4,ymm5 1689 vpmuludq ymm6,ymm7,[128+edx] 1690 vpaddq ymm0,ymm0,ymm6 1691 vmovdqa ymm6,[96+esp] 1692 vpmuludq ymm5,ymm7,[edx-128] 1693 vpaddq ymm1,ymm1,ymm5 1694 vpmuludq ymm7,ymm7,[edx-96] 1695 vpaddq ymm2,ymm2,ymm7 1696 vpmuludq ymm5,ymm6,[edx-128] 1697 vpaddq ymm3,ymm3,ymm5 1698 vpmuludq ymm7,ymm6,[edx-96] 1699 vpaddq ymm4,ymm4,ymm7 1700 vpmuludq ymm5,ymm6,[64+edx] 1701 vpaddq ymm0,ymm0,ymm5 1702 vmovdqa ymm5,[128+esp] 1703 vpmuludq ymm7,ymm6,[96+edx] 1704 vpaddq ymm1,ymm1,ymm7 1705 vpmuludq ymm6,ymm6,[128+edx] 1706 vpaddq ymm2,ymm2,ymm6 1707 vpmuludq ymm7,ymm5,[128+edx] 1708 vpaddq ymm3,ymm3,ymm7 1709 vpmuludq ymm6,ymm5,[32+edx] 1710 vpaddq ymm0,ymm0,ymm6 1711 vpmuludq ymm7,ymm5,[edx-128] 1712 vpaddq ymm4,ymm4,ymm7 1713 vmovdqa ymm7,[64+ebx] 1714 vpmuludq ymm6,ymm5,[64+edx] 1715 vpaddq ymm1,ymm1,ymm6 1716 vpmuludq ymm5,ymm5,[96+edx] 1717 vpaddq ymm2,ymm2,ymm5 1718 vpsrlq ymm5,ymm3,26 1719 vpand ymm3,ymm3,ymm7 1720 vpsrlq ymm6,ymm0,26 1721 vpand ymm0,ymm0,ymm7 1722 vpaddq ymm4,ymm4,ymm5 1723 vpaddq ymm1,ymm1,ymm6 1724 vpsrlq ymm5,ymm4,26 1725 vpand ymm4,ymm4,ymm7 1726 vpsrlq ymm6,ymm1,26 1727 vpand ymm1,ymm1,ymm7 1728 vpaddq ymm2,ymm2,ymm6 1729 vpaddq ymm0,ymm0,ymm5 1730 vpsllq ymm5,ymm5,2 1731 vpsrlq ymm6,ymm2,26 1732 vpand ymm2,ymm2,ymm7 1733 vpaddq ymm0,ymm0,ymm5 1734 vpaddq ymm3,ymm3,ymm6 1735 vpsrlq ymm6,ymm3,26 1736 vpsrlq ymm5,ymm0,26 1737 vpand ymm0,ymm0,ymm7 1738 vpand ymm3,ymm3,ymm7 1739 vpaddq ymm1,ymm1,ymm5 1740 vpaddq ymm4,ymm4,ymm6 1741 vmovdqu xmm5,[esi] 1742 vmovdqu xmm6,[16+esi] 1743 vinserti128 ymm5,ymm5,[32+esi],1 1744 vinserti128 ymm6,ymm6,[48+esi],1 1745 lea esi,[64+esi] 1746 sub ecx,64 1747 jnz NEAR L$028loop 1748L$027tail: 1749 vmovdqa [64+esp],ymm2 1750 vpsrldq ymm2,ymm5,6 1751 vmovdqa [esp],ymm0 1752 vpsrldq ymm0,ymm6,6 1753 vmovdqa [32+esp],ymm1 1754 vpunpckhqdq ymm1,ymm5,ymm6 1755 vpunpcklqdq ymm5,ymm5,ymm6 1756 vpunpcklqdq ymm2,ymm2,ymm0 1757 vpsrlq ymm0,ymm2,30 1758 vpsrlq ymm2,ymm2,4 1759 vpsrlq ymm6,ymm5,26 1760 vpsrlq ymm1,ymm1,40 1761 vpand ymm2,ymm2,ymm7 1762 vpand ymm5,ymm5,ymm7 1763 vpand ymm6,ymm6,ymm7 1764 vpand ymm0,ymm0,ymm7 1765 vpor ymm1,ymm1,[ebx] 1766 and ebx,-64 1767 vpaddq ymm2,ymm2,[64+esp] 1768 vpaddq ymm5,ymm5,[esp] 1769 vpaddq ymm6,ymm6,[32+esp] 1770 vpaddq ymm0,ymm0,ymm3 1771 vpaddq ymm1,ymm1,ymm4 1772 vpmuludq ymm3,ymm2,[edx-92] 1773 vmovdqa [32+esp],ymm6 1774 vpmuludq ymm4,ymm2,[edx-60] 1775 vmovdqa [96+esp],ymm0 1776 vpmuludq ymm0,ymm2,[100+edx] 1777 vmovdqa [128+esp],ymm1 1778 vpmuludq ymm1,ymm2,[132+edx] 1779 vpmuludq ymm2,ymm2,[edx-124] 1780 vpmuludq ymm7,ymm5,[edx-28] 1781 vpaddq ymm3,ymm3,ymm7 1782 vpmuludq ymm6,ymm5,[4+edx] 1783 vpaddq ymm4,ymm4,ymm6 1784 vpmuludq ymm7,ymm5,[edx-124] 1785 vpaddq ymm0,ymm0,ymm7 1786 vmovdqa ymm7,[32+esp] 1787 vpmuludq ymm6,ymm5,[edx-92] 1788 vpaddq ymm1,ymm1,ymm6 1789 vpmuludq ymm5,ymm5,[edx-60] 1790 vpaddq ymm2,ymm2,ymm5 1791 vpmuludq ymm6,ymm7,[edx-60] 1792 vpaddq ymm3,ymm3,ymm6 1793 vpmuludq ymm5,ymm7,[edx-28] 1794 vpaddq ymm4,ymm4,ymm5 1795 vpmuludq ymm6,ymm7,[132+edx] 1796 vpaddq ymm0,ymm0,ymm6 1797 vmovdqa ymm6,[96+esp] 1798 vpmuludq ymm5,ymm7,[edx-124] 1799 vpaddq ymm1,ymm1,ymm5 1800 vpmuludq ymm7,ymm7,[edx-92] 1801 vpaddq ymm2,ymm2,ymm7 1802 vpmuludq ymm5,ymm6,[edx-124] 1803 vpaddq ymm3,ymm3,ymm5 1804 vpmuludq ymm7,ymm6,[edx-92] 1805 vpaddq ymm4,ymm4,ymm7 1806 vpmuludq ymm5,ymm6,[68+edx] 1807 vpaddq ymm0,ymm0,ymm5 1808 vmovdqa ymm5,[128+esp] 1809 vpmuludq ymm7,ymm6,[100+edx] 1810 vpaddq ymm1,ymm1,ymm7 1811 vpmuludq ymm6,ymm6,[132+edx] 1812 vpaddq ymm2,ymm2,ymm6 1813 vpmuludq ymm7,ymm5,[132+edx] 1814 vpaddq ymm3,ymm3,ymm7 1815 vpmuludq ymm6,ymm5,[36+edx] 1816 vpaddq ymm0,ymm0,ymm6 1817 vpmuludq ymm7,ymm5,[edx-124] 1818 vpaddq ymm4,ymm4,ymm7 1819 vmovdqa ymm7,[64+ebx] 1820 vpmuludq ymm6,ymm5,[68+edx] 1821 vpaddq ymm1,ymm1,ymm6 1822 vpmuludq ymm5,ymm5,[100+edx] 1823 vpaddq ymm2,ymm2,ymm5 1824 vpsrldq ymm5,ymm4,8 1825 vpsrldq ymm6,ymm3,8 1826 vpaddq ymm4,ymm4,ymm5 1827 vpsrldq ymm5,ymm0,8 1828 vpaddq ymm3,ymm3,ymm6 1829 vpsrldq ymm6,ymm1,8 1830 vpaddq ymm0,ymm0,ymm5 1831 vpsrldq ymm5,ymm2,8 1832 vpaddq ymm1,ymm1,ymm6 1833 vpermq ymm6,ymm4,2 1834 vpaddq ymm2,ymm2,ymm5 1835 vpermq ymm5,ymm3,2 1836 vpaddq ymm4,ymm4,ymm6 1837 vpermq ymm6,ymm0,2 1838 vpaddq ymm3,ymm3,ymm5 1839 vpermq ymm5,ymm1,2 1840 vpaddq ymm0,ymm0,ymm6 1841 vpermq ymm6,ymm2,2 1842 vpaddq ymm1,ymm1,ymm5 1843 vpaddq ymm2,ymm2,ymm6 1844 vpsrlq ymm5,ymm3,26 1845 vpand ymm3,ymm3,ymm7 1846 vpsrlq ymm6,ymm0,26 1847 vpand ymm0,ymm0,ymm7 1848 vpaddq ymm4,ymm4,ymm5 1849 vpaddq ymm1,ymm1,ymm6 1850 vpsrlq ymm5,ymm4,26 1851 vpand ymm4,ymm4,ymm7 1852 vpsrlq ymm6,ymm1,26 1853 vpand ymm1,ymm1,ymm7 1854 vpaddq ymm2,ymm2,ymm6 1855 vpaddq ymm0,ymm0,ymm5 1856 vpsllq ymm5,ymm5,2 1857 vpsrlq ymm6,ymm2,26 1858 vpand ymm2,ymm2,ymm7 1859 vpaddq ymm0,ymm0,ymm5 1860 vpaddq ymm3,ymm3,ymm6 1861 vpsrlq ymm6,ymm3,26 1862 vpsrlq ymm5,ymm0,26 1863 vpand ymm0,ymm0,ymm7 1864 vpand ymm3,ymm3,ymm7 1865 vpaddq ymm1,ymm1,ymm5 1866 vpaddq ymm4,ymm4,ymm6 1867 cmp ecx,0 1868 je NEAR L$029done 1869 vpshufd xmm0,xmm0,252 1870 lea edx,[288+esp] 1871 vpshufd xmm1,xmm1,252 1872 vpshufd xmm2,xmm2,252 1873 vpshufd xmm3,xmm3,252 1874 vpshufd xmm4,xmm4,252 1875 jmp NEAR L$024even 1876align 16 1877L$029done: 1878 vmovd DWORD [edi-48],xmm0 1879 vmovd DWORD [edi-44],xmm1 1880 vmovd DWORD [edi-40],xmm2 1881 vmovd DWORD [edi-36],xmm3 1882 vmovd DWORD [edi-32],xmm4 1883 vzeroupper 1884 mov esp,ebp 1885L$020nodata: 1886 pop edi 1887 pop esi 1888 pop ebx 1889 pop ebp 1890 ret 1891align 64 1892L$const_sse2: 1893dd 16777216,0,16777216,0,16777216,0,16777216,0 1894dd 0,0,0,0,0,0,0,0 1895dd 67108863,0,67108863,0,67108863,0,67108863,0 1896dd 268435455,268435452,268435452,268435452 1897db 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 1898db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1899db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1900db 114,103,62,0 1901align 4 1902segment .bss 1903common _OPENSSL_ia32cap_P 16 1904