1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__,obj 8section code use32 class=code align=64 9%elifidn __OUTPUT_FORMAT__,win32 10%ifdef __YASM_VERSION_ID__ 11%if __YASM_VERSION_ID__ < 01010000h 12%error yasm version 1.1.0 or later needed. 13%endif 14; Yasm automatically includes .00 and complains about redefining it. 15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 16%else 17$@feat.00 equ 1 18%endif 19section .text code align=64 20%else 21section .text code 22%endif 23global _ChaCha20_ctr32 24align 16 25_ChaCha20_ctr32: 26L$_ChaCha20_ctr32_begin: 27 push ebp 28 push ebx 29 push esi 30 push edi 31 xor eax,eax 32 cmp eax,DWORD [28+esp] 33 je NEAR L$000no_data 34 call L$pic_point 35L$pic_point: 36 pop eax 37 lea ebp,[_OPENSSL_ia32cap_P] 38 test DWORD [ebp],16777216 39 jz NEAR L$001x86 40 test DWORD [4+ebp],512 41 jz NEAR L$001x86 42 jmp NEAR L$ssse3_shortcut 43L$001x86: 44 mov esi,DWORD [32+esp] 45 mov edi,DWORD [36+esp] 46 sub esp,132 47 mov eax,DWORD [esi] 48 mov ebx,DWORD [4+esi] 49 mov ecx,DWORD [8+esi] 50 mov edx,DWORD [12+esi] 51 mov DWORD [80+esp],eax 52 mov DWORD [84+esp],ebx 53 mov DWORD [88+esp],ecx 54 mov DWORD [92+esp],edx 55 mov eax,DWORD [16+esi] 56 mov ebx,DWORD [20+esi] 57 mov ecx,DWORD [24+esi] 58 mov edx,DWORD [28+esi] 59 mov DWORD [96+esp],eax 60 mov DWORD [100+esp],ebx 61 mov DWORD [104+esp],ecx 62 mov DWORD [108+esp],edx 63 mov eax,DWORD [edi] 64 mov ebx,DWORD [4+edi] 65 mov ecx,DWORD [8+edi] 66 mov edx,DWORD [12+edi] 67 sub eax,1 68 mov DWORD [112+esp],eax 69 mov DWORD [116+esp],ebx 70 mov DWORD [120+esp],ecx 71 mov DWORD [124+esp],edx 72 jmp NEAR L$002entry 73align 16 74L$003outer_loop: 75 mov DWORD [156+esp],ebx 76 mov DWORD [152+esp],eax 77 mov DWORD [160+esp],ecx 78L$002entry: 79 mov eax,1634760805 80 mov DWORD [4+esp],857760878 81 mov DWORD [8+esp],2036477234 82 mov DWORD [12+esp],1797285236 83 mov ebx,DWORD [84+esp] 84 mov ebp,DWORD [88+esp] 85 mov ecx,DWORD [104+esp] 86 mov esi,DWORD [108+esp] 87 mov edx,DWORD [116+esp] 88 mov edi,DWORD [120+esp] 89 mov DWORD [20+esp],ebx 90 mov DWORD [24+esp],ebp 91 mov DWORD [40+esp],ecx 92 mov DWORD [44+esp],esi 93 mov DWORD [52+esp],edx 94 mov DWORD [56+esp],edi 95 mov ebx,DWORD [92+esp] 96 mov edi,DWORD [124+esp] 97 mov edx,DWORD [112+esp] 98 mov ebp,DWORD [80+esp] 99 mov ecx,DWORD [96+esp] 100 mov esi,DWORD [100+esp] 101 add edx,1 102 mov DWORD [28+esp],ebx 103 mov DWORD [60+esp],edi 104 mov DWORD [112+esp],edx 105 mov ebx,10 106 jmp NEAR L$004loop 107align 16 108L$004loop: 109 add eax,ebp 110 mov DWORD [128+esp],ebx 111 mov ebx,ebp 112 xor edx,eax 113 rol edx,16 114 add ecx,edx 115 xor ebx,ecx 116 mov edi,DWORD [52+esp] 117 rol ebx,12 118 mov ebp,DWORD [20+esp] 119 add eax,ebx 120 xor edx,eax 121 mov DWORD [esp],eax 122 rol edx,8 123 mov eax,DWORD [4+esp] 124 add ecx,edx 125 mov DWORD [48+esp],edx 126 xor ebx,ecx 127 add eax,ebp 128 rol ebx,7 129 xor edi,eax 130 mov DWORD [32+esp],ecx 131 rol edi,16 132 mov DWORD [16+esp],ebx 133 add esi,edi 134 mov ecx,DWORD [40+esp] 135 xor ebp,esi 136 mov edx,DWORD [56+esp] 137 rol ebp,12 138 mov ebx,DWORD [24+esp] 139 add eax,ebp 140 xor edi,eax 141 mov DWORD [4+esp],eax 142 rol edi,8 143 mov eax,DWORD [8+esp] 144 add esi,edi 145 mov DWORD [52+esp],edi 146 xor ebp,esi 147 add eax,ebx 148 rol ebp,7 149 xor edx,eax 150 mov DWORD [36+esp],esi 151 rol edx,16 152 mov DWORD [20+esp],ebp 153 add ecx,edx 154 mov esi,DWORD [44+esp] 155 xor ebx,ecx 156 mov edi,DWORD [60+esp] 157 rol ebx,12 158 mov ebp,DWORD [28+esp] 159 add eax,ebx 160 xor edx,eax 161 mov DWORD [8+esp],eax 162 rol edx,8 163 mov eax,DWORD [12+esp] 164 add ecx,edx 165 mov DWORD [56+esp],edx 166 xor ebx,ecx 167 add eax,ebp 168 rol ebx,7 169 xor edi,eax 170 rol edi,16 171 mov DWORD [24+esp],ebx 172 add esi,edi 173 xor ebp,esi 174 rol ebp,12 175 mov ebx,DWORD [20+esp] 176 add eax,ebp 177 xor edi,eax 178 mov DWORD [12+esp],eax 179 rol edi,8 180 mov eax,DWORD [esp] 181 add esi,edi 182 mov edx,edi 183 xor ebp,esi 184 add eax,ebx 185 rol ebp,7 186 xor edx,eax 187 rol edx,16 188 mov DWORD [28+esp],ebp 189 add ecx,edx 190 xor ebx,ecx 191 mov edi,DWORD [48+esp] 192 rol ebx,12 193 mov ebp,DWORD [24+esp] 194 add eax,ebx 195 xor edx,eax 196 mov DWORD [esp],eax 197 rol edx,8 198 mov eax,DWORD [4+esp] 199 add ecx,edx 200 mov DWORD [60+esp],edx 201 xor ebx,ecx 202 add eax,ebp 203 rol ebx,7 204 xor edi,eax 205 mov DWORD [40+esp],ecx 206 rol edi,16 207 mov DWORD [20+esp],ebx 208 add esi,edi 209 mov ecx,DWORD [32+esp] 210 xor ebp,esi 211 mov edx,DWORD [52+esp] 212 rol ebp,12 213 mov ebx,DWORD [28+esp] 214 add eax,ebp 215 xor edi,eax 216 mov DWORD [4+esp],eax 217 rol edi,8 218 mov eax,DWORD [8+esp] 219 add esi,edi 220 mov DWORD [48+esp],edi 221 xor ebp,esi 222 add eax,ebx 223 rol ebp,7 224 xor edx,eax 225 mov DWORD [44+esp],esi 226 rol edx,16 227 mov DWORD [24+esp],ebp 228 add ecx,edx 229 mov esi,DWORD [36+esp] 230 xor ebx,ecx 231 mov edi,DWORD [56+esp] 232 rol ebx,12 233 mov ebp,DWORD [16+esp] 234 add eax,ebx 235 xor edx,eax 236 mov DWORD [8+esp],eax 237 rol edx,8 238 mov eax,DWORD [12+esp] 239 add ecx,edx 240 mov DWORD [52+esp],edx 241 xor ebx,ecx 242 add eax,ebp 243 rol ebx,7 244 xor edi,eax 245 rol edi,16 246 mov DWORD [28+esp],ebx 247 add esi,edi 248 xor ebp,esi 249 mov edx,DWORD [48+esp] 250 rol ebp,12 251 mov ebx,DWORD [128+esp] 252 add eax,ebp 253 xor edi,eax 254 mov DWORD [12+esp],eax 255 rol edi,8 256 mov eax,DWORD [esp] 257 add esi,edi 258 mov DWORD [56+esp],edi 259 xor ebp,esi 260 rol ebp,7 261 dec ebx 262 jnz NEAR L$004loop 263 mov ebx,DWORD [160+esp] 264 add eax,1634760805 265 add ebp,DWORD [80+esp] 266 add ecx,DWORD [96+esp] 267 add esi,DWORD [100+esp] 268 cmp ebx,64 269 jb NEAR L$005tail 270 mov ebx,DWORD [156+esp] 271 add edx,DWORD [112+esp] 272 add edi,DWORD [120+esp] 273 xor eax,DWORD [ebx] 274 xor ebp,DWORD [16+ebx] 275 mov DWORD [esp],eax 276 mov eax,DWORD [152+esp] 277 xor ecx,DWORD [32+ebx] 278 xor esi,DWORD [36+ebx] 279 xor edx,DWORD [48+ebx] 280 xor edi,DWORD [56+ebx] 281 mov DWORD [16+eax],ebp 282 mov DWORD [32+eax],ecx 283 mov DWORD [36+eax],esi 284 mov DWORD [48+eax],edx 285 mov DWORD [56+eax],edi 286 mov ebp,DWORD [4+esp] 287 mov ecx,DWORD [8+esp] 288 mov esi,DWORD [12+esp] 289 mov edx,DWORD [20+esp] 290 mov edi,DWORD [24+esp] 291 add ebp,857760878 292 add ecx,2036477234 293 add esi,1797285236 294 add edx,DWORD [84+esp] 295 add edi,DWORD [88+esp] 296 xor ebp,DWORD [4+ebx] 297 xor ecx,DWORD [8+ebx] 298 xor esi,DWORD [12+ebx] 299 xor edx,DWORD [20+ebx] 300 xor edi,DWORD [24+ebx] 301 mov DWORD [4+eax],ebp 302 mov DWORD [8+eax],ecx 303 mov DWORD [12+eax],esi 304 mov DWORD [20+eax],edx 305 mov DWORD [24+eax],edi 306 mov ebp,DWORD [28+esp] 307 mov ecx,DWORD [40+esp] 308 mov esi,DWORD [44+esp] 309 mov edx,DWORD [52+esp] 310 mov edi,DWORD [60+esp] 311 add ebp,DWORD [92+esp] 312 add ecx,DWORD [104+esp] 313 add esi,DWORD [108+esp] 314 add edx,DWORD [116+esp] 315 add edi,DWORD [124+esp] 316 xor ebp,DWORD [28+ebx] 317 xor ecx,DWORD [40+ebx] 318 xor esi,DWORD [44+ebx] 319 xor edx,DWORD [52+ebx] 320 xor edi,DWORD [60+ebx] 321 lea ebx,[64+ebx] 322 mov DWORD [28+eax],ebp 323 mov ebp,DWORD [esp] 324 mov DWORD [40+eax],ecx 325 mov ecx,DWORD [160+esp] 326 mov DWORD [44+eax],esi 327 mov DWORD [52+eax],edx 328 mov DWORD [60+eax],edi 329 mov DWORD [eax],ebp 330 lea eax,[64+eax] 331 sub ecx,64 332 jnz NEAR L$003outer_loop 333 jmp NEAR L$006done 334L$005tail: 335 add edx,DWORD [112+esp] 336 add edi,DWORD [120+esp] 337 mov DWORD [esp],eax 338 mov DWORD [16+esp],ebp 339 mov DWORD [32+esp],ecx 340 mov DWORD [36+esp],esi 341 mov DWORD [48+esp],edx 342 mov DWORD [56+esp],edi 343 mov ebp,DWORD [4+esp] 344 mov ecx,DWORD [8+esp] 345 mov esi,DWORD [12+esp] 346 mov edx,DWORD [20+esp] 347 mov edi,DWORD [24+esp] 348 add ebp,857760878 349 add ecx,2036477234 350 add esi,1797285236 351 add edx,DWORD [84+esp] 352 add edi,DWORD [88+esp] 353 mov DWORD [4+esp],ebp 354 mov DWORD [8+esp],ecx 355 mov DWORD [12+esp],esi 356 mov DWORD [20+esp],edx 357 mov DWORD [24+esp],edi 358 mov ebp,DWORD [28+esp] 359 mov ecx,DWORD [40+esp] 360 mov esi,DWORD [44+esp] 361 mov edx,DWORD [52+esp] 362 mov edi,DWORD [60+esp] 363 add ebp,DWORD [92+esp] 364 add ecx,DWORD [104+esp] 365 add esi,DWORD [108+esp] 366 add edx,DWORD [116+esp] 367 add edi,DWORD [124+esp] 368 mov DWORD [28+esp],ebp 369 mov ebp,DWORD [156+esp] 370 mov DWORD [40+esp],ecx 371 mov ecx,DWORD [152+esp] 372 mov DWORD [44+esp],esi 373 xor esi,esi 374 mov DWORD [52+esp],edx 375 mov DWORD [60+esp],edi 376 xor eax,eax 377 xor edx,edx 378L$007tail_loop: 379 mov al,BYTE [ebp*1+esi] 380 mov dl,BYTE [esi*1+esp] 381 lea esi,[1+esi] 382 xor al,dl 383 mov BYTE [esi*1+ecx-1],al 384 dec ebx 385 jnz NEAR L$007tail_loop 386L$006done: 387 add esp,132 388L$000no_data: 389 pop edi 390 pop esi 391 pop ebx 392 pop ebp 393 ret 394global _ChaCha20_ssse3 395align 16 396_ChaCha20_ssse3: 397L$_ChaCha20_ssse3_begin: 398 push ebp 399 push ebx 400 push esi 401 push edi 402L$ssse3_shortcut: 403 mov edi,DWORD [20+esp] 404 mov esi,DWORD [24+esp] 405 mov ecx,DWORD [28+esp] 406 mov edx,DWORD [32+esp] 407 mov ebx,DWORD [36+esp] 408 mov ebp,esp 409 sub esp,524 410 and esp,-64 411 mov DWORD [512+esp],ebp 412 lea eax,[(L$ssse3_data-L$pic_point)+eax] 413 movdqu xmm3,[ebx] 414 cmp ecx,256 415 jb NEAR L$0081x 416 mov DWORD [516+esp],edx 417 mov DWORD [520+esp],ebx 418 sub ecx,256 419 lea ebp,[384+esp] 420 movdqu xmm7,[edx] 421 pshufd xmm0,xmm3,0 422 pshufd xmm1,xmm3,85 423 pshufd xmm2,xmm3,170 424 pshufd xmm3,xmm3,255 425 paddd xmm0,[48+eax] 426 pshufd xmm4,xmm7,0 427 pshufd xmm5,xmm7,85 428 psubd xmm0,[64+eax] 429 pshufd xmm6,xmm7,170 430 pshufd xmm7,xmm7,255 431 movdqa [64+ebp],xmm0 432 movdqa [80+ebp],xmm1 433 movdqa [96+ebp],xmm2 434 movdqa [112+ebp],xmm3 435 movdqu xmm3,[16+edx] 436 movdqa [ebp-64],xmm4 437 movdqa [ebp-48],xmm5 438 movdqa [ebp-32],xmm6 439 movdqa [ebp-16],xmm7 440 movdqa xmm7,[32+eax] 441 lea ebx,[128+esp] 442 pshufd xmm0,xmm3,0 443 pshufd xmm1,xmm3,85 444 pshufd xmm2,xmm3,170 445 pshufd xmm3,xmm3,255 446 pshufd xmm4,xmm7,0 447 pshufd xmm5,xmm7,85 448 pshufd xmm6,xmm7,170 449 pshufd xmm7,xmm7,255 450 movdqa [ebp],xmm0 451 movdqa [16+ebp],xmm1 452 movdqa [32+ebp],xmm2 453 movdqa [48+ebp],xmm3 454 movdqa [ebp-128],xmm4 455 movdqa [ebp-112],xmm5 456 movdqa [ebp-96],xmm6 457 movdqa [ebp-80],xmm7 458 lea esi,[128+esi] 459 lea edi,[128+edi] 460 jmp NEAR L$009outer_loop 461align 16 462L$009outer_loop: 463 movdqa xmm1,[ebp-112] 464 movdqa xmm2,[ebp-96] 465 movdqa xmm3,[ebp-80] 466 movdqa xmm5,[ebp-48] 467 movdqa xmm6,[ebp-32] 468 movdqa xmm7,[ebp-16] 469 movdqa [ebx-112],xmm1 470 movdqa [ebx-96],xmm2 471 movdqa [ebx-80],xmm3 472 movdqa [ebx-48],xmm5 473 movdqa [ebx-32],xmm6 474 movdqa [ebx-16],xmm7 475 movdqa xmm2,[32+ebp] 476 movdqa xmm3,[48+ebp] 477 movdqa xmm4,[64+ebp] 478 movdqa xmm5,[80+ebp] 479 movdqa xmm6,[96+ebp] 480 movdqa xmm7,[112+ebp] 481 paddd xmm4,[64+eax] 482 movdqa [32+ebx],xmm2 483 movdqa [48+ebx],xmm3 484 movdqa [64+ebx],xmm4 485 movdqa [80+ebx],xmm5 486 movdqa [96+ebx],xmm6 487 movdqa [112+ebx],xmm7 488 movdqa [64+ebp],xmm4 489 movdqa xmm0,[ebp-128] 490 movdqa xmm6,xmm4 491 movdqa xmm3,[ebp-64] 492 movdqa xmm4,[ebp] 493 movdqa xmm5,[16+ebp] 494 mov edx,10 495 nop 496align 16 497L$010loop: 498 paddd xmm0,xmm3 499 movdqa xmm2,xmm3 500 pxor xmm6,xmm0 501 pshufb xmm6,[eax] 502 paddd xmm4,xmm6 503 pxor xmm2,xmm4 504 movdqa xmm3,[ebx-48] 505 movdqa xmm1,xmm2 506 pslld xmm2,12 507 psrld xmm1,20 508 por xmm2,xmm1 509 movdqa xmm1,[ebx-112] 510 paddd xmm0,xmm2 511 movdqa xmm7,[80+ebx] 512 pxor xmm6,xmm0 513 movdqa [ebx-128],xmm0 514 pshufb xmm6,[16+eax] 515 paddd xmm4,xmm6 516 movdqa [64+ebx],xmm6 517 pxor xmm2,xmm4 518 paddd xmm1,xmm3 519 movdqa xmm0,xmm2 520 pslld xmm2,7 521 psrld xmm0,25 522 pxor xmm7,xmm1 523 por xmm2,xmm0 524 movdqa [ebx],xmm4 525 pshufb xmm7,[eax] 526 movdqa [ebx-64],xmm2 527 paddd xmm5,xmm7 528 movdqa xmm4,[32+ebx] 529 pxor xmm3,xmm5 530 movdqa xmm2,[ebx-32] 531 movdqa xmm0,xmm3 532 pslld xmm3,12 533 psrld xmm0,20 534 por xmm3,xmm0 535 movdqa xmm0,[ebx-96] 536 paddd xmm1,xmm3 537 movdqa xmm6,[96+ebx] 538 pxor xmm7,xmm1 539 movdqa [ebx-112],xmm1 540 pshufb xmm7,[16+eax] 541 paddd xmm5,xmm7 542 movdqa [80+ebx],xmm7 543 pxor xmm3,xmm5 544 paddd xmm0,xmm2 545 movdqa xmm1,xmm3 546 pslld xmm3,7 547 psrld xmm1,25 548 pxor xmm6,xmm0 549 por xmm3,xmm1 550 movdqa [16+ebx],xmm5 551 pshufb xmm6,[eax] 552 movdqa [ebx-48],xmm3 553 paddd xmm4,xmm6 554 movdqa xmm5,[48+ebx] 555 pxor xmm2,xmm4 556 movdqa xmm3,[ebx-16] 557 movdqa xmm1,xmm2 558 pslld xmm2,12 559 psrld xmm1,20 560 por xmm2,xmm1 561 movdqa xmm1,[ebx-80] 562 paddd xmm0,xmm2 563 movdqa xmm7,[112+ebx] 564 pxor xmm6,xmm0 565 movdqa [ebx-96],xmm0 566 pshufb xmm6,[16+eax] 567 paddd xmm4,xmm6 568 movdqa [96+ebx],xmm6 569 pxor xmm2,xmm4 570 paddd xmm1,xmm3 571 movdqa xmm0,xmm2 572 pslld xmm2,7 573 psrld xmm0,25 574 pxor xmm7,xmm1 575 por xmm2,xmm0 576 pshufb xmm7,[eax] 577 movdqa [ebx-32],xmm2 578 paddd xmm5,xmm7 579 pxor xmm3,xmm5 580 movdqa xmm2,[ebx-48] 581 movdqa xmm0,xmm3 582 pslld xmm3,12 583 psrld xmm0,20 584 por xmm3,xmm0 585 movdqa xmm0,[ebx-128] 586 paddd xmm1,xmm3 587 pxor xmm7,xmm1 588 movdqa [ebx-80],xmm1 589 pshufb xmm7,[16+eax] 590 paddd xmm5,xmm7 591 movdqa xmm6,xmm7 592 pxor xmm3,xmm5 593 paddd xmm0,xmm2 594 movdqa xmm1,xmm3 595 pslld xmm3,7 596 psrld xmm1,25 597 pxor xmm6,xmm0 598 por xmm3,xmm1 599 pshufb xmm6,[eax] 600 movdqa [ebx-16],xmm3 601 paddd xmm4,xmm6 602 pxor xmm2,xmm4 603 movdqa xmm3,[ebx-32] 604 movdqa xmm1,xmm2 605 pslld xmm2,12 606 psrld xmm1,20 607 por xmm2,xmm1 608 movdqa xmm1,[ebx-112] 609 paddd xmm0,xmm2 610 movdqa xmm7,[64+ebx] 611 pxor xmm6,xmm0 612 movdqa [ebx-128],xmm0 613 pshufb xmm6,[16+eax] 614 paddd xmm4,xmm6 615 movdqa [112+ebx],xmm6 616 pxor xmm2,xmm4 617 paddd xmm1,xmm3 618 movdqa xmm0,xmm2 619 pslld xmm2,7 620 psrld xmm0,25 621 pxor xmm7,xmm1 622 por xmm2,xmm0 623 movdqa [32+ebx],xmm4 624 pshufb xmm7,[eax] 625 movdqa [ebx-48],xmm2 626 paddd xmm5,xmm7 627 movdqa xmm4,[ebx] 628 pxor xmm3,xmm5 629 movdqa xmm2,[ebx-16] 630 movdqa xmm0,xmm3 631 pslld xmm3,12 632 psrld xmm0,20 633 por xmm3,xmm0 634 movdqa xmm0,[ebx-96] 635 paddd xmm1,xmm3 636 movdqa xmm6,[80+ebx] 637 pxor xmm7,xmm1 638 movdqa [ebx-112],xmm1 639 pshufb xmm7,[16+eax] 640 paddd xmm5,xmm7 641 movdqa [64+ebx],xmm7 642 pxor xmm3,xmm5 643 paddd xmm0,xmm2 644 movdqa xmm1,xmm3 645 pslld xmm3,7 646 psrld xmm1,25 647 pxor xmm6,xmm0 648 por xmm3,xmm1 649 movdqa [48+ebx],xmm5 650 pshufb xmm6,[eax] 651 movdqa [ebx-32],xmm3 652 paddd xmm4,xmm6 653 movdqa xmm5,[16+ebx] 654 pxor xmm2,xmm4 655 movdqa xmm3,[ebx-64] 656 movdqa xmm1,xmm2 657 pslld xmm2,12 658 psrld xmm1,20 659 por xmm2,xmm1 660 movdqa xmm1,[ebx-80] 661 paddd xmm0,xmm2 662 movdqa xmm7,[96+ebx] 663 pxor xmm6,xmm0 664 movdqa [ebx-96],xmm0 665 pshufb xmm6,[16+eax] 666 paddd xmm4,xmm6 667 movdqa [80+ebx],xmm6 668 pxor xmm2,xmm4 669 paddd xmm1,xmm3 670 movdqa xmm0,xmm2 671 pslld xmm2,7 672 psrld xmm0,25 673 pxor xmm7,xmm1 674 por xmm2,xmm0 675 pshufb xmm7,[eax] 676 movdqa [ebx-16],xmm2 677 paddd xmm5,xmm7 678 pxor xmm3,xmm5 679 movdqa xmm0,xmm3 680 pslld xmm3,12 681 psrld xmm0,20 682 por xmm3,xmm0 683 movdqa xmm0,[ebx-128] 684 paddd xmm1,xmm3 685 movdqa xmm6,[64+ebx] 686 pxor xmm7,xmm1 687 movdqa [ebx-80],xmm1 688 pshufb xmm7,[16+eax] 689 paddd xmm5,xmm7 690 movdqa [96+ebx],xmm7 691 pxor xmm3,xmm5 692 movdqa xmm1,xmm3 693 pslld xmm3,7 694 psrld xmm1,25 695 por xmm3,xmm1 696 dec edx 697 jnz NEAR L$010loop 698 movdqa [ebx-64],xmm3 699 movdqa [ebx],xmm4 700 movdqa [16+ebx],xmm5 701 movdqa [64+ebx],xmm6 702 movdqa [96+ebx],xmm7 703 movdqa xmm1,[ebx-112] 704 movdqa xmm2,[ebx-96] 705 movdqa xmm3,[ebx-80] 706 paddd xmm0,[ebp-128] 707 paddd xmm1,[ebp-112] 708 paddd xmm2,[ebp-96] 709 paddd xmm3,[ebp-80] 710 movdqa xmm6,xmm0 711 punpckldq xmm0,xmm1 712 movdqa xmm7,xmm2 713 punpckldq xmm2,xmm3 714 punpckhdq xmm6,xmm1 715 punpckhdq xmm7,xmm3 716 movdqa xmm1,xmm0 717 punpcklqdq xmm0,xmm2 718 movdqa xmm3,xmm6 719 punpcklqdq xmm6,xmm7 720 punpckhqdq xmm1,xmm2 721 punpckhqdq xmm3,xmm7 722 movdqu xmm4,[esi-128] 723 movdqu xmm5,[esi-64] 724 movdqu xmm2,[esi] 725 movdqu xmm7,[64+esi] 726 lea esi,[16+esi] 727 pxor xmm4,xmm0 728 movdqa xmm0,[ebx-64] 729 pxor xmm5,xmm1 730 movdqa xmm1,[ebx-48] 731 pxor xmm6,xmm2 732 movdqa xmm2,[ebx-32] 733 pxor xmm7,xmm3 734 movdqa xmm3,[ebx-16] 735 movdqu [edi-128],xmm4 736 movdqu [edi-64],xmm5 737 movdqu [edi],xmm6 738 movdqu [64+edi],xmm7 739 lea edi,[16+edi] 740 paddd xmm0,[ebp-64] 741 paddd xmm1,[ebp-48] 742 paddd xmm2,[ebp-32] 743 paddd xmm3,[ebp-16] 744 movdqa xmm6,xmm0 745 punpckldq xmm0,xmm1 746 movdqa xmm7,xmm2 747 punpckldq xmm2,xmm3 748 punpckhdq xmm6,xmm1 749 punpckhdq xmm7,xmm3 750 movdqa xmm1,xmm0 751 punpcklqdq xmm0,xmm2 752 movdqa xmm3,xmm6 753 punpcklqdq xmm6,xmm7 754 punpckhqdq xmm1,xmm2 755 punpckhqdq xmm3,xmm7 756 movdqu xmm4,[esi-128] 757 movdqu xmm5,[esi-64] 758 movdqu xmm2,[esi] 759 movdqu xmm7,[64+esi] 760 lea esi,[16+esi] 761 pxor xmm4,xmm0 762 movdqa xmm0,[ebx] 763 pxor xmm5,xmm1 764 movdqa xmm1,[16+ebx] 765 pxor xmm6,xmm2 766 movdqa xmm2,[32+ebx] 767 pxor xmm7,xmm3 768 movdqa xmm3,[48+ebx] 769 movdqu [edi-128],xmm4 770 movdqu [edi-64],xmm5 771 movdqu [edi],xmm6 772 movdqu [64+edi],xmm7 773 lea edi,[16+edi] 774 paddd xmm0,[ebp] 775 paddd xmm1,[16+ebp] 776 paddd xmm2,[32+ebp] 777 paddd xmm3,[48+ebp] 778 movdqa xmm6,xmm0 779 punpckldq xmm0,xmm1 780 movdqa xmm7,xmm2 781 punpckldq xmm2,xmm3 782 punpckhdq xmm6,xmm1 783 punpckhdq xmm7,xmm3 784 movdqa xmm1,xmm0 785 punpcklqdq xmm0,xmm2 786 movdqa xmm3,xmm6 787 punpcklqdq xmm6,xmm7 788 punpckhqdq xmm1,xmm2 789 punpckhqdq xmm3,xmm7 790 movdqu xmm4,[esi-128] 791 movdqu xmm5,[esi-64] 792 movdqu xmm2,[esi] 793 movdqu xmm7,[64+esi] 794 lea esi,[16+esi] 795 pxor xmm4,xmm0 796 movdqa xmm0,[64+ebx] 797 pxor xmm5,xmm1 798 movdqa xmm1,[80+ebx] 799 pxor xmm6,xmm2 800 movdqa xmm2,[96+ebx] 801 pxor xmm7,xmm3 802 movdqa xmm3,[112+ebx] 803 movdqu [edi-128],xmm4 804 movdqu [edi-64],xmm5 805 movdqu [edi],xmm6 806 movdqu [64+edi],xmm7 807 lea edi,[16+edi] 808 paddd xmm0,[64+ebp] 809 paddd xmm1,[80+ebp] 810 paddd xmm2,[96+ebp] 811 paddd xmm3,[112+ebp] 812 movdqa xmm6,xmm0 813 punpckldq xmm0,xmm1 814 movdqa xmm7,xmm2 815 punpckldq xmm2,xmm3 816 punpckhdq xmm6,xmm1 817 punpckhdq xmm7,xmm3 818 movdqa xmm1,xmm0 819 punpcklqdq xmm0,xmm2 820 movdqa xmm3,xmm6 821 punpcklqdq xmm6,xmm7 822 punpckhqdq xmm1,xmm2 823 punpckhqdq xmm3,xmm7 824 movdqu xmm4,[esi-128] 825 movdqu xmm5,[esi-64] 826 movdqu xmm2,[esi] 827 movdqu xmm7,[64+esi] 828 lea esi,[208+esi] 829 pxor xmm4,xmm0 830 pxor xmm5,xmm1 831 pxor xmm6,xmm2 832 pxor xmm7,xmm3 833 movdqu [edi-128],xmm4 834 movdqu [edi-64],xmm5 835 movdqu [edi],xmm6 836 movdqu [64+edi],xmm7 837 lea edi,[208+edi] 838 sub ecx,256 839 jnc NEAR L$009outer_loop 840 add ecx,256 841 jz NEAR L$011done 842 mov ebx,DWORD [520+esp] 843 lea esi,[esi-128] 844 mov edx,DWORD [516+esp] 845 lea edi,[edi-128] 846 movd xmm2,DWORD [64+ebp] 847 movdqu xmm3,[ebx] 848 paddd xmm2,[96+eax] 849 pand xmm3,[112+eax] 850 por xmm3,xmm2 851L$0081x: 852 movdqa xmm0,[32+eax] 853 movdqu xmm1,[edx] 854 movdqu xmm2,[16+edx] 855 movdqa xmm6,[eax] 856 movdqa xmm7,[16+eax] 857 mov DWORD [48+esp],ebp 858 movdqa [esp],xmm0 859 movdqa [16+esp],xmm1 860 movdqa [32+esp],xmm2 861 movdqa [48+esp],xmm3 862 mov edx,10 863 jmp NEAR L$012loop1x 864align 16 865L$013outer1x: 866 movdqa xmm3,[80+eax] 867 movdqa xmm0,[esp] 868 movdqa xmm1,[16+esp] 869 movdqa xmm2,[32+esp] 870 paddd xmm3,[48+esp] 871 mov edx,10 872 movdqa [48+esp],xmm3 873 jmp NEAR L$012loop1x 874align 16 875L$012loop1x: 876 paddd xmm0,xmm1 877 pxor xmm3,xmm0 878db 102,15,56,0,222 879 paddd xmm2,xmm3 880 pxor xmm1,xmm2 881 movdqa xmm4,xmm1 882 psrld xmm1,20 883 pslld xmm4,12 884 por xmm1,xmm4 885 paddd xmm0,xmm1 886 pxor xmm3,xmm0 887db 102,15,56,0,223 888 paddd xmm2,xmm3 889 pxor xmm1,xmm2 890 movdqa xmm4,xmm1 891 psrld xmm1,25 892 pslld xmm4,7 893 por xmm1,xmm4 894 pshufd xmm2,xmm2,78 895 pshufd xmm1,xmm1,57 896 pshufd xmm3,xmm3,147 897 nop 898 paddd xmm0,xmm1 899 pxor xmm3,xmm0 900db 102,15,56,0,222 901 paddd xmm2,xmm3 902 pxor xmm1,xmm2 903 movdqa xmm4,xmm1 904 psrld xmm1,20 905 pslld xmm4,12 906 por xmm1,xmm4 907 paddd xmm0,xmm1 908 pxor xmm3,xmm0 909db 102,15,56,0,223 910 paddd xmm2,xmm3 911 pxor xmm1,xmm2 912 movdqa xmm4,xmm1 913 psrld xmm1,25 914 pslld xmm4,7 915 por xmm1,xmm4 916 pshufd xmm2,xmm2,78 917 pshufd xmm1,xmm1,147 918 pshufd xmm3,xmm3,57 919 dec edx 920 jnz NEAR L$012loop1x 921 paddd xmm0,[esp] 922 paddd xmm1,[16+esp] 923 paddd xmm2,[32+esp] 924 paddd xmm3,[48+esp] 925 cmp ecx,64 926 jb NEAR L$014tail 927 movdqu xmm4,[esi] 928 movdqu xmm5,[16+esi] 929 pxor xmm0,xmm4 930 movdqu xmm4,[32+esi] 931 pxor xmm1,xmm5 932 movdqu xmm5,[48+esi] 933 pxor xmm2,xmm4 934 pxor xmm3,xmm5 935 lea esi,[64+esi] 936 movdqu [edi],xmm0 937 movdqu [16+edi],xmm1 938 movdqu [32+edi],xmm2 939 movdqu [48+edi],xmm3 940 lea edi,[64+edi] 941 sub ecx,64 942 jnz NEAR L$013outer1x 943 jmp NEAR L$011done 944L$014tail: 945 movdqa [esp],xmm0 946 movdqa [16+esp],xmm1 947 movdqa [32+esp],xmm2 948 movdqa [48+esp],xmm3 949 xor eax,eax 950 xor edx,edx 951 xor ebp,ebp 952L$015tail_loop: 953 mov al,BYTE [ebp*1+esp] 954 mov dl,BYTE [ebp*1+esi] 955 lea ebp,[1+ebp] 956 xor al,dl 957 mov BYTE [ebp*1+edi-1],al 958 dec ecx 959 jnz NEAR L$015tail_loop 960L$011done: 961 mov esp,DWORD [512+esp] 962 pop edi 963 pop esi 964 pop ebx 965 pop ebp 966 ret 967align 64 968L$ssse3_data: 969db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 970db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 971dd 1634760805,857760878,2036477234,1797285236 972dd 0,1,2,3 973dd 4,4,4,4 974dd 1,0,0,0 975dd 4,0,0,0 976dd 0,-1,-1,-1 977align 64 978db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 979db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 980db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 981db 114,103,62,0 982segment .bss 983common _OPENSSL_ia32cap_P 16 984