1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%include "ring_core_generated/prefix_symbols_nasm.inc" 5%ifidn __OUTPUT_FORMAT__,obj 6section code use32 class=code align=64 7%elifidn __OUTPUT_FORMAT__,win32 8$@feat.00 equ 1 9section .text code align=64 10%else 11section .text code 12%endif 13global _ChaCha20_ctr32 14align 16 15_ChaCha20_ctr32: 16L$_ChaCha20_ctr32_begin: 17 push ebp 18 push ebx 19 push esi 20 push edi 21 xor eax,eax 22 cmp eax,DWORD [28+esp] 23 je NEAR L$000no_data 24 call L$pic_point 25L$pic_point: 26 pop eax 27 lea ebp,[_OPENSSL_ia32cap_P] 28 test DWORD [ebp],16777216 29 jz NEAR L$001x86 30 test DWORD [4+ebp],512 31 jz NEAR L$001x86 32 jmp NEAR L$ssse3_shortcut 33L$001x86: 34 mov esi,DWORD [32+esp] 35 mov edi,DWORD [36+esp] 36 sub esp,132 37 mov eax,DWORD [esi] 38 mov ebx,DWORD [4+esi] 39 mov ecx,DWORD [8+esi] 40 mov edx,DWORD [12+esi] 41 mov DWORD [80+esp],eax 42 mov DWORD [84+esp],ebx 43 mov DWORD [88+esp],ecx 44 mov DWORD [92+esp],edx 45 mov eax,DWORD [16+esi] 46 mov ebx,DWORD [20+esi] 47 mov ecx,DWORD [24+esi] 48 mov edx,DWORD [28+esi] 49 mov DWORD [96+esp],eax 50 mov DWORD [100+esp],ebx 51 mov DWORD [104+esp],ecx 52 mov DWORD [108+esp],edx 53 mov eax,DWORD [edi] 54 mov ebx,DWORD [4+edi] 55 mov ecx,DWORD [8+edi] 56 mov edx,DWORD [12+edi] 57 sub eax,1 58 mov DWORD [112+esp],eax 59 mov DWORD [116+esp],ebx 60 mov DWORD [120+esp],ecx 61 mov DWORD [124+esp],edx 62 jmp NEAR L$002entry 63align 16 64L$003outer_loop: 65 mov DWORD [156+esp],ebx 66 mov DWORD [152+esp],eax 67 mov DWORD [160+esp],ecx 68L$002entry: 69 mov eax,1634760805 70 mov DWORD [4+esp],857760878 71 mov DWORD [8+esp],2036477234 72 mov DWORD [12+esp],1797285236 73 mov ebx,DWORD [84+esp] 74 mov ebp,DWORD [88+esp] 75 mov ecx,DWORD [104+esp] 76 mov esi,DWORD [108+esp] 77 mov edx,DWORD [116+esp] 78 mov edi,DWORD [120+esp] 79 mov DWORD [20+esp],ebx 80 mov DWORD [24+esp],ebp 81 mov DWORD [40+esp],ecx 82 mov DWORD [44+esp],esi 83 mov DWORD [52+esp],edx 84 mov DWORD [56+esp],edi 85 mov ebx,DWORD [92+esp] 86 mov edi,DWORD [124+esp] 87 mov edx,DWORD [112+esp] 88 mov ebp,DWORD [80+esp] 89 mov ecx,DWORD [96+esp] 90 mov esi,DWORD [100+esp] 91 add edx,1 92 mov DWORD [28+esp],ebx 93 mov DWORD [60+esp],edi 94 mov DWORD [112+esp],edx 95 mov ebx,10 96 jmp NEAR L$004loop 97align 16 98L$004loop: 99 add eax,ebp 100 mov DWORD [128+esp],ebx 101 mov ebx,ebp 102 xor edx,eax 103 rol edx,16 104 add ecx,edx 105 xor ebx,ecx 106 mov edi,DWORD [52+esp] 107 rol ebx,12 108 mov ebp,DWORD [20+esp] 109 add eax,ebx 110 xor edx,eax 111 mov DWORD [esp],eax 112 rol edx,8 113 mov eax,DWORD [4+esp] 114 add ecx,edx 115 mov DWORD [48+esp],edx 116 xor ebx,ecx 117 add eax,ebp 118 rol ebx,7 119 xor edi,eax 120 mov DWORD [32+esp],ecx 121 rol edi,16 122 mov DWORD [16+esp],ebx 123 add esi,edi 124 mov ecx,DWORD [40+esp] 125 xor ebp,esi 126 mov edx,DWORD [56+esp] 127 rol ebp,12 128 mov ebx,DWORD [24+esp] 129 add eax,ebp 130 xor edi,eax 131 mov DWORD [4+esp],eax 132 rol edi,8 133 mov eax,DWORD [8+esp] 134 add esi,edi 135 mov DWORD [52+esp],edi 136 xor ebp,esi 137 add eax,ebx 138 rol ebp,7 139 xor edx,eax 140 mov DWORD [36+esp],esi 141 rol edx,16 142 mov DWORD [20+esp],ebp 143 add ecx,edx 144 mov esi,DWORD [44+esp] 145 xor ebx,ecx 146 mov edi,DWORD [60+esp] 147 rol ebx,12 148 mov ebp,DWORD [28+esp] 149 add eax,ebx 150 xor edx,eax 151 mov DWORD [8+esp],eax 152 rol edx,8 153 mov eax,DWORD [12+esp] 154 add ecx,edx 155 mov DWORD [56+esp],edx 156 xor ebx,ecx 157 add eax,ebp 158 rol ebx,7 159 xor edi,eax 160 rol edi,16 161 mov DWORD [24+esp],ebx 162 add esi,edi 163 xor ebp,esi 164 rol ebp,12 165 mov ebx,DWORD [20+esp] 166 add eax,ebp 167 xor edi,eax 168 mov DWORD [12+esp],eax 169 rol edi,8 170 mov eax,DWORD [esp] 171 add esi,edi 172 mov edx,edi 173 xor ebp,esi 174 add eax,ebx 175 rol ebp,7 176 xor edx,eax 177 rol edx,16 178 mov DWORD [28+esp],ebp 179 add ecx,edx 180 xor ebx,ecx 181 mov edi,DWORD [48+esp] 182 rol ebx,12 183 mov ebp,DWORD [24+esp] 184 add eax,ebx 185 xor edx,eax 186 mov DWORD [esp],eax 187 rol edx,8 188 mov eax,DWORD [4+esp] 189 add ecx,edx 190 mov DWORD [60+esp],edx 191 xor ebx,ecx 192 add eax,ebp 193 rol ebx,7 194 xor edi,eax 195 mov DWORD [40+esp],ecx 196 rol edi,16 197 mov DWORD [20+esp],ebx 198 add esi,edi 199 mov ecx,DWORD [32+esp] 200 xor ebp,esi 201 mov edx,DWORD [52+esp] 202 rol ebp,12 203 mov ebx,DWORD [28+esp] 204 add eax,ebp 205 xor edi,eax 206 mov DWORD [4+esp],eax 207 rol edi,8 208 mov eax,DWORD [8+esp] 209 add esi,edi 210 mov DWORD [48+esp],edi 211 xor ebp,esi 212 add eax,ebx 213 rol ebp,7 214 xor edx,eax 215 mov DWORD [44+esp],esi 216 rol edx,16 217 mov DWORD [24+esp],ebp 218 add ecx,edx 219 mov esi,DWORD [36+esp] 220 xor ebx,ecx 221 mov edi,DWORD [56+esp] 222 rol ebx,12 223 mov ebp,DWORD [16+esp] 224 add eax,ebx 225 xor edx,eax 226 mov DWORD [8+esp],eax 227 rol edx,8 228 mov eax,DWORD [12+esp] 229 add ecx,edx 230 mov DWORD [52+esp],edx 231 xor ebx,ecx 232 add eax,ebp 233 rol ebx,7 234 xor edi,eax 235 rol edi,16 236 mov DWORD [28+esp],ebx 237 add esi,edi 238 xor ebp,esi 239 mov edx,DWORD [48+esp] 240 rol ebp,12 241 mov ebx,DWORD [128+esp] 242 add eax,ebp 243 xor edi,eax 244 mov DWORD [12+esp],eax 245 rol edi,8 246 mov eax,DWORD [esp] 247 add esi,edi 248 mov DWORD [56+esp],edi 249 xor ebp,esi 250 rol ebp,7 251 dec ebx 252 jnz NEAR L$004loop 253 mov ebx,DWORD [160+esp] 254 add eax,1634760805 255 add ebp,DWORD [80+esp] 256 add ecx,DWORD [96+esp] 257 add esi,DWORD [100+esp] 258 cmp ebx,64 259 jb NEAR L$005tail 260 mov ebx,DWORD [156+esp] 261 add edx,DWORD [112+esp] 262 add edi,DWORD [120+esp] 263 xor eax,DWORD [ebx] 264 xor ebp,DWORD [16+ebx] 265 mov DWORD [esp],eax 266 mov eax,DWORD [152+esp] 267 xor ecx,DWORD [32+ebx] 268 xor esi,DWORD [36+ebx] 269 xor edx,DWORD [48+ebx] 270 xor edi,DWORD [56+ebx] 271 mov DWORD [16+eax],ebp 272 mov DWORD [32+eax],ecx 273 mov DWORD [36+eax],esi 274 mov DWORD [48+eax],edx 275 mov DWORD [56+eax],edi 276 mov ebp,DWORD [4+esp] 277 mov ecx,DWORD [8+esp] 278 mov esi,DWORD [12+esp] 279 mov edx,DWORD [20+esp] 280 mov edi,DWORD [24+esp] 281 add ebp,857760878 282 add ecx,2036477234 283 add esi,1797285236 284 add edx,DWORD [84+esp] 285 add edi,DWORD [88+esp] 286 xor ebp,DWORD [4+ebx] 287 xor ecx,DWORD [8+ebx] 288 xor esi,DWORD [12+ebx] 289 xor edx,DWORD [20+ebx] 290 xor edi,DWORD [24+ebx] 291 mov DWORD [4+eax],ebp 292 mov DWORD [8+eax],ecx 293 mov DWORD [12+eax],esi 294 mov DWORD [20+eax],edx 295 mov DWORD [24+eax],edi 296 mov ebp,DWORD [28+esp] 297 mov ecx,DWORD [40+esp] 298 mov esi,DWORD [44+esp] 299 mov edx,DWORD [52+esp] 300 mov edi,DWORD [60+esp] 301 add ebp,DWORD [92+esp] 302 add ecx,DWORD [104+esp] 303 add esi,DWORD [108+esp] 304 add edx,DWORD [116+esp] 305 add edi,DWORD [124+esp] 306 xor ebp,DWORD [28+ebx] 307 xor ecx,DWORD [40+ebx] 308 xor esi,DWORD [44+ebx] 309 xor edx,DWORD [52+ebx] 310 xor edi,DWORD [60+ebx] 311 lea ebx,[64+ebx] 312 mov DWORD [28+eax],ebp 313 mov ebp,DWORD [esp] 314 mov DWORD [40+eax],ecx 315 mov ecx,DWORD [160+esp] 316 mov DWORD [44+eax],esi 317 mov DWORD [52+eax],edx 318 mov DWORD [60+eax],edi 319 mov DWORD [eax],ebp 320 lea eax,[64+eax] 321 sub ecx,64 322 jnz NEAR L$003outer_loop 323 jmp NEAR L$006done 324L$005tail: 325 add edx,DWORD [112+esp] 326 add edi,DWORD [120+esp] 327 mov DWORD [esp],eax 328 mov DWORD [16+esp],ebp 329 mov DWORD [32+esp],ecx 330 mov DWORD [36+esp],esi 331 mov DWORD [48+esp],edx 332 mov DWORD [56+esp],edi 333 mov ebp,DWORD [4+esp] 334 mov ecx,DWORD [8+esp] 335 mov esi,DWORD [12+esp] 336 mov edx,DWORD [20+esp] 337 mov edi,DWORD [24+esp] 338 add ebp,857760878 339 add ecx,2036477234 340 add esi,1797285236 341 add edx,DWORD [84+esp] 342 add edi,DWORD [88+esp] 343 mov DWORD [4+esp],ebp 344 mov DWORD [8+esp],ecx 345 mov DWORD [12+esp],esi 346 mov DWORD [20+esp],edx 347 mov DWORD [24+esp],edi 348 mov ebp,DWORD [28+esp] 349 mov ecx,DWORD [40+esp] 350 mov esi,DWORD [44+esp] 351 mov edx,DWORD [52+esp] 352 mov edi,DWORD [60+esp] 353 add ebp,DWORD [92+esp] 354 add ecx,DWORD [104+esp] 355 add esi,DWORD [108+esp] 356 add edx,DWORD [116+esp] 357 add edi,DWORD [124+esp] 358 mov DWORD [28+esp],ebp 359 mov ebp,DWORD [156+esp] 360 mov DWORD [40+esp],ecx 361 mov ecx,DWORD [152+esp] 362 mov DWORD [44+esp],esi 363 xor esi,esi 364 mov DWORD [52+esp],edx 365 mov DWORD [60+esp],edi 366 xor eax,eax 367 xor edx,edx 368L$007tail_loop: 369 mov al,BYTE [ebp*1+esi] 370 mov dl,BYTE [esi*1+esp] 371 lea esi,[1+esi] 372 xor al,dl 373 mov BYTE [esi*1+ecx-1],al 374 dec ebx 375 jnz NEAR L$007tail_loop 376L$006done: 377 add esp,132 378L$000no_data: 379 pop edi 380 pop esi 381 pop ebx 382 pop ebp 383 ret 384align 16 385__ChaCha20_ssse3: 386 push ebp 387 push ebx 388 push esi 389 push edi 390L$ssse3_shortcut: 391 mov edi,DWORD [20+esp] 392 mov esi,DWORD [24+esp] 393 mov ecx,DWORD [28+esp] 394 mov edx,DWORD [32+esp] 395 mov ebx,DWORD [36+esp] 396 mov ebp,esp 397 sub esp,524 398 and esp,-64 399 mov DWORD [512+esp],ebp 400 lea eax,[(L$ssse3_data-L$pic_point)+eax] 401 movdqu xmm3,[ebx] 402 cmp ecx,256 403 jb NEAR L$0081x 404 mov DWORD [516+esp],edx 405 mov DWORD [520+esp],ebx 406 sub ecx,256 407 lea ebp,[384+esp] 408 movdqu xmm7,[edx] 409 pshufd xmm0,xmm3,0 410 pshufd xmm1,xmm3,85 411 pshufd xmm2,xmm3,170 412 pshufd xmm3,xmm3,255 413 paddd xmm0,[48+eax] 414 pshufd xmm4,xmm7,0 415 pshufd xmm5,xmm7,85 416 psubd xmm0,[64+eax] 417 pshufd xmm6,xmm7,170 418 pshufd xmm7,xmm7,255 419 movdqa [64+ebp],xmm0 420 movdqa [80+ebp],xmm1 421 movdqa [96+ebp],xmm2 422 movdqa [112+ebp],xmm3 423 movdqu xmm3,[16+edx] 424 movdqa [ebp-64],xmm4 425 movdqa [ebp-48],xmm5 426 movdqa [ebp-32],xmm6 427 movdqa [ebp-16],xmm7 428 movdqa xmm7,[32+eax] 429 lea ebx,[128+esp] 430 pshufd xmm0,xmm3,0 431 pshufd xmm1,xmm3,85 432 pshufd xmm2,xmm3,170 433 pshufd xmm3,xmm3,255 434 pshufd xmm4,xmm7,0 435 pshufd xmm5,xmm7,85 436 pshufd xmm6,xmm7,170 437 pshufd xmm7,xmm7,255 438 movdqa [ebp],xmm0 439 movdqa [16+ebp],xmm1 440 movdqa [32+ebp],xmm2 441 movdqa [48+ebp],xmm3 442 movdqa [ebp-128],xmm4 443 movdqa [ebp-112],xmm5 444 movdqa [ebp-96],xmm6 445 movdqa [ebp-80],xmm7 446 lea esi,[128+esi] 447 lea edi,[128+edi] 448 jmp NEAR L$009outer_loop 449align 16 450L$009outer_loop: 451 movdqa xmm1,[ebp-112] 452 movdqa xmm2,[ebp-96] 453 movdqa xmm3,[ebp-80] 454 movdqa xmm5,[ebp-48] 455 movdqa xmm6,[ebp-32] 456 movdqa xmm7,[ebp-16] 457 movdqa [ebx-112],xmm1 458 movdqa [ebx-96],xmm2 459 movdqa [ebx-80],xmm3 460 movdqa [ebx-48],xmm5 461 movdqa [ebx-32],xmm6 462 movdqa [ebx-16],xmm7 463 movdqa xmm2,[32+ebp] 464 movdqa xmm3,[48+ebp] 465 movdqa xmm4,[64+ebp] 466 movdqa xmm5,[80+ebp] 467 movdqa xmm6,[96+ebp] 468 movdqa xmm7,[112+ebp] 469 paddd xmm4,[64+eax] 470 movdqa [32+ebx],xmm2 471 movdqa [48+ebx],xmm3 472 movdqa [64+ebx],xmm4 473 movdqa [80+ebx],xmm5 474 movdqa [96+ebx],xmm6 475 movdqa [112+ebx],xmm7 476 movdqa [64+ebp],xmm4 477 movdqa xmm0,[ebp-128] 478 movdqa xmm6,xmm4 479 movdqa xmm3,[ebp-64] 480 movdqa xmm4,[ebp] 481 movdqa xmm5,[16+ebp] 482 mov edx,10 483 nop 484align 16 485L$010loop: 486 paddd xmm0,xmm3 487 movdqa xmm2,xmm3 488 pxor xmm6,xmm0 489 pshufb xmm6,[eax] 490 paddd xmm4,xmm6 491 pxor xmm2,xmm4 492 movdqa xmm3,[ebx-48] 493 movdqa xmm1,xmm2 494 pslld xmm2,12 495 psrld xmm1,20 496 por xmm2,xmm1 497 movdqa xmm1,[ebx-112] 498 paddd xmm0,xmm2 499 movdqa xmm7,[80+ebx] 500 pxor xmm6,xmm0 501 movdqa [ebx-128],xmm0 502 pshufb xmm6,[16+eax] 503 paddd xmm4,xmm6 504 movdqa [64+ebx],xmm6 505 pxor xmm2,xmm4 506 paddd xmm1,xmm3 507 movdqa xmm0,xmm2 508 pslld xmm2,7 509 psrld xmm0,25 510 pxor xmm7,xmm1 511 por xmm2,xmm0 512 movdqa [ebx],xmm4 513 pshufb xmm7,[eax] 514 movdqa [ebx-64],xmm2 515 paddd xmm5,xmm7 516 movdqa xmm4,[32+ebx] 517 pxor xmm3,xmm5 518 movdqa xmm2,[ebx-32] 519 movdqa xmm0,xmm3 520 pslld xmm3,12 521 psrld xmm0,20 522 por xmm3,xmm0 523 movdqa xmm0,[ebx-96] 524 paddd xmm1,xmm3 525 movdqa xmm6,[96+ebx] 526 pxor xmm7,xmm1 527 movdqa [ebx-112],xmm1 528 pshufb xmm7,[16+eax] 529 paddd xmm5,xmm7 530 movdqa [80+ebx],xmm7 531 pxor xmm3,xmm5 532 paddd xmm0,xmm2 533 movdqa xmm1,xmm3 534 pslld xmm3,7 535 psrld xmm1,25 536 pxor xmm6,xmm0 537 por xmm3,xmm1 538 movdqa [16+ebx],xmm5 539 pshufb xmm6,[eax] 540 movdqa [ebx-48],xmm3 541 paddd xmm4,xmm6 542 movdqa xmm5,[48+ebx] 543 pxor xmm2,xmm4 544 movdqa xmm3,[ebx-16] 545 movdqa xmm1,xmm2 546 pslld xmm2,12 547 psrld xmm1,20 548 por xmm2,xmm1 549 movdqa xmm1,[ebx-80] 550 paddd xmm0,xmm2 551 movdqa xmm7,[112+ebx] 552 pxor xmm6,xmm0 553 movdqa [ebx-96],xmm0 554 pshufb xmm6,[16+eax] 555 paddd xmm4,xmm6 556 movdqa [96+ebx],xmm6 557 pxor xmm2,xmm4 558 paddd xmm1,xmm3 559 movdqa xmm0,xmm2 560 pslld xmm2,7 561 psrld xmm0,25 562 pxor xmm7,xmm1 563 por xmm2,xmm0 564 pshufb xmm7,[eax] 565 movdqa [ebx-32],xmm2 566 paddd xmm5,xmm7 567 pxor xmm3,xmm5 568 movdqa xmm2,[ebx-48] 569 movdqa xmm0,xmm3 570 pslld xmm3,12 571 psrld xmm0,20 572 por xmm3,xmm0 573 movdqa xmm0,[ebx-128] 574 paddd xmm1,xmm3 575 pxor xmm7,xmm1 576 movdqa [ebx-80],xmm1 577 pshufb xmm7,[16+eax] 578 paddd xmm5,xmm7 579 movdqa xmm6,xmm7 580 pxor xmm3,xmm5 581 paddd xmm0,xmm2 582 movdqa xmm1,xmm3 583 pslld xmm3,7 584 psrld xmm1,25 585 pxor xmm6,xmm0 586 por xmm3,xmm1 587 pshufb xmm6,[eax] 588 movdqa [ebx-16],xmm3 589 paddd xmm4,xmm6 590 pxor xmm2,xmm4 591 movdqa xmm3,[ebx-32] 592 movdqa xmm1,xmm2 593 pslld xmm2,12 594 psrld xmm1,20 595 por xmm2,xmm1 596 movdqa xmm1,[ebx-112] 597 paddd xmm0,xmm2 598 movdqa xmm7,[64+ebx] 599 pxor xmm6,xmm0 600 movdqa [ebx-128],xmm0 601 pshufb xmm6,[16+eax] 602 paddd xmm4,xmm6 603 movdqa [112+ebx],xmm6 604 pxor xmm2,xmm4 605 paddd xmm1,xmm3 606 movdqa xmm0,xmm2 607 pslld xmm2,7 608 psrld xmm0,25 609 pxor xmm7,xmm1 610 por xmm2,xmm0 611 movdqa [32+ebx],xmm4 612 pshufb xmm7,[eax] 613 movdqa [ebx-48],xmm2 614 paddd xmm5,xmm7 615 movdqa xmm4,[ebx] 616 pxor xmm3,xmm5 617 movdqa xmm2,[ebx-16] 618 movdqa xmm0,xmm3 619 pslld xmm3,12 620 psrld xmm0,20 621 por xmm3,xmm0 622 movdqa xmm0,[ebx-96] 623 paddd xmm1,xmm3 624 movdqa xmm6,[80+ebx] 625 pxor xmm7,xmm1 626 movdqa [ebx-112],xmm1 627 pshufb xmm7,[16+eax] 628 paddd xmm5,xmm7 629 movdqa [64+ebx],xmm7 630 pxor xmm3,xmm5 631 paddd xmm0,xmm2 632 movdqa xmm1,xmm3 633 pslld xmm3,7 634 psrld xmm1,25 635 pxor xmm6,xmm0 636 por xmm3,xmm1 637 movdqa [48+ebx],xmm5 638 pshufb xmm6,[eax] 639 movdqa [ebx-32],xmm3 640 paddd xmm4,xmm6 641 movdqa xmm5,[16+ebx] 642 pxor xmm2,xmm4 643 movdqa xmm3,[ebx-64] 644 movdqa xmm1,xmm2 645 pslld xmm2,12 646 psrld xmm1,20 647 por xmm2,xmm1 648 movdqa xmm1,[ebx-80] 649 paddd xmm0,xmm2 650 movdqa xmm7,[96+ebx] 651 pxor xmm6,xmm0 652 movdqa [ebx-96],xmm0 653 pshufb xmm6,[16+eax] 654 paddd xmm4,xmm6 655 movdqa [80+ebx],xmm6 656 pxor xmm2,xmm4 657 paddd xmm1,xmm3 658 movdqa xmm0,xmm2 659 pslld xmm2,7 660 psrld xmm0,25 661 pxor xmm7,xmm1 662 por xmm2,xmm0 663 pshufb xmm7,[eax] 664 movdqa [ebx-16],xmm2 665 paddd xmm5,xmm7 666 pxor xmm3,xmm5 667 movdqa xmm0,xmm3 668 pslld xmm3,12 669 psrld xmm0,20 670 por xmm3,xmm0 671 movdqa xmm0,[ebx-128] 672 paddd xmm1,xmm3 673 movdqa xmm6,[64+ebx] 674 pxor xmm7,xmm1 675 movdqa [ebx-80],xmm1 676 pshufb xmm7,[16+eax] 677 paddd xmm5,xmm7 678 movdqa [96+ebx],xmm7 679 pxor xmm3,xmm5 680 movdqa xmm1,xmm3 681 pslld xmm3,7 682 psrld xmm1,25 683 por xmm3,xmm1 684 dec edx 685 jnz NEAR L$010loop 686 movdqa [ebx-64],xmm3 687 movdqa [ebx],xmm4 688 movdqa [16+ebx],xmm5 689 movdqa [64+ebx],xmm6 690 movdqa [96+ebx],xmm7 691 movdqa xmm1,[ebx-112] 692 movdqa xmm2,[ebx-96] 693 movdqa xmm3,[ebx-80] 694 paddd xmm0,[ebp-128] 695 paddd xmm1,[ebp-112] 696 paddd xmm2,[ebp-96] 697 paddd xmm3,[ebp-80] 698 movdqa xmm6,xmm0 699 punpckldq xmm0,xmm1 700 movdqa xmm7,xmm2 701 punpckldq xmm2,xmm3 702 punpckhdq xmm6,xmm1 703 punpckhdq xmm7,xmm3 704 movdqa xmm1,xmm0 705 punpcklqdq xmm0,xmm2 706 movdqa xmm3,xmm6 707 punpcklqdq xmm6,xmm7 708 punpckhqdq xmm1,xmm2 709 punpckhqdq xmm3,xmm7 710 movdqu xmm4,[esi-128] 711 movdqu xmm5,[esi-64] 712 movdqu xmm2,[esi] 713 movdqu xmm7,[64+esi] 714 lea esi,[16+esi] 715 pxor xmm4,xmm0 716 movdqa xmm0,[ebx-64] 717 pxor xmm5,xmm1 718 movdqa xmm1,[ebx-48] 719 pxor xmm6,xmm2 720 movdqa xmm2,[ebx-32] 721 pxor xmm7,xmm3 722 movdqa xmm3,[ebx-16] 723 movdqu [edi-128],xmm4 724 movdqu [edi-64],xmm5 725 movdqu [edi],xmm6 726 movdqu [64+edi],xmm7 727 lea edi,[16+edi] 728 paddd xmm0,[ebp-64] 729 paddd xmm1,[ebp-48] 730 paddd xmm2,[ebp-32] 731 paddd xmm3,[ebp-16] 732 movdqa xmm6,xmm0 733 punpckldq xmm0,xmm1 734 movdqa xmm7,xmm2 735 punpckldq xmm2,xmm3 736 punpckhdq xmm6,xmm1 737 punpckhdq xmm7,xmm3 738 movdqa xmm1,xmm0 739 punpcklqdq xmm0,xmm2 740 movdqa xmm3,xmm6 741 punpcklqdq xmm6,xmm7 742 punpckhqdq xmm1,xmm2 743 punpckhqdq xmm3,xmm7 744 movdqu xmm4,[esi-128] 745 movdqu xmm5,[esi-64] 746 movdqu xmm2,[esi] 747 movdqu xmm7,[64+esi] 748 lea esi,[16+esi] 749 pxor xmm4,xmm0 750 movdqa xmm0,[ebx] 751 pxor xmm5,xmm1 752 movdqa xmm1,[16+ebx] 753 pxor xmm6,xmm2 754 movdqa xmm2,[32+ebx] 755 pxor xmm7,xmm3 756 movdqa xmm3,[48+ebx] 757 movdqu [edi-128],xmm4 758 movdqu [edi-64],xmm5 759 movdqu [edi],xmm6 760 movdqu [64+edi],xmm7 761 lea edi,[16+edi] 762 paddd xmm0,[ebp] 763 paddd xmm1,[16+ebp] 764 paddd xmm2,[32+ebp] 765 paddd xmm3,[48+ebp] 766 movdqa xmm6,xmm0 767 punpckldq xmm0,xmm1 768 movdqa xmm7,xmm2 769 punpckldq xmm2,xmm3 770 punpckhdq xmm6,xmm1 771 punpckhdq xmm7,xmm3 772 movdqa xmm1,xmm0 773 punpcklqdq xmm0,xmm2 774 movdqa xmm3,xmm6 775 punpcklqdq xmm6,xmm7 776 punpckhqdq xmm1,xmm2 777 punpckhqdq xmm3,xmm7 778 movdqu xmm4,[esi-128] 779 movdqu xmm5,[esi-64] 780 movdqu xmm2,[esi] 781 movdqu xmm7,[64+esi] 782 lea esi,[16+esi] 783 pxor xmm4,xmm0 784 movdqa xmm0,[64+ebx] 785 pxor xmm5,xmm1 786 movdqa xmm1,[80+ebx] 787 pxor xmm6,xmm2 788 movdqa xmm2,[96+ebx] 789 pxor xmm7,xmm3 790 movdqa xmm3,[112+ebx] 791 movdqu [edi-128],xmm4 792 movdqu [edi-64],xmm5 793 movdqu [edi],xmm6 794 movdqu [64+edi],xmm7 795 lea edi,[16+edi] 796 paddd xmm0,[64+ebp] 797 paddd xmm1,[80+ebp] 798 paddd xmm2,[96+ebp] 799 paddd xmm3,[112+ebp] 800 movdqa xmm6,xmm0 801 punpckldq xmm0,xmm1 802 movdqa xmm7,xmm2 803 punpckldq xmm2,xmm3 804 punpckhdq xmm6,xmm1 805 punpckhdq xmm7,xmm3 806 movdqa xmm1,xmm0 807 punpcklqdq xmm0,xmm2 808 movdqa xmm3,xmm6 809 punpcklqdq xmm6,xmm7 810 punpckhqdq xmm1,xmm2 811 punpckhqdq xmm3,xmm7 812 movdqu xmm4,[esi-128] 813 movdqu xmm5,[esi-64] 814 movdqu xmm2,[esi] 815 movdqu xmm7,[64+esi] 816 lea esi,[208+esi] 817 pxor xmm4,xmm0 818 pxor xmm5,xmm1 819 pxor xmm6,xmm2 820 pxor xmm7,xmm3 821 movdqu [edi-128],xmm4 822 movdqu [edi-64],xmm5 823 movdqu [edi],xmm6 824 movdqu [64+edi],xmm7 825 lea edi,[208+edi] 826 sub ecx,256 827 jnc NEAR L$009outer_loop 828 add ecx,256 829 jz NEAR L$011done 830 mov ebx,DWORD [520+esp] 831 lea esi,[esi-128] 832 mov edx,DWORD [516+esp] 833 lea edi,[edi-128] 834 movd xmm2,DWORD [64+ebp] 835 movdqu xmm3,[ebx] 836 paddd xmm2,[96+eax] 837 pand xmm3,[112+eax] 838 por xmm3,xmm2 839L$0081x: 840 movdqa xmm0,[32+eax] 841 movdqu xmm1,[edx] 842 movdqu xmm2,[16+edx] 843 movdqa xmm6,[eax] 844 movdqa xmm7,[16+eax] 845 mov DWORD [48+esp],ebp 846 movdqa [esp],xmm0 847 movdqa [16+esp],xmm1 848 movdqa [32+esp],xmm2 849 movdqa [48+esp],xmm3 850 mov edx,10 851 jmp NEAR L$012loop1x 852align 16 853L$013outer1x: 854 movdqa xmm3,[80+eax] 855 movdqa xmm0,[esp] 856 movdqa xmm1,[16+esp] 857 movdqa xmm2,[32+esp] 858 paddd xmm3,[48+esp] 859 mov edx,10 860 movdqa [48+esp],xmm3 861 jmp NEAR L$012loop1x 862align 16 863L$012loop1x: 864 paddd xmm0,xmm1 865 pxor xmm3,xmm0 866db 102,15,56,0,222 867 paddd xmm2,xmm3 868 pxor xmm1,xmm2 869 movdqa xmm4,xmm1 870 psrld xmm1,20 871 pslld xmm4,12 872 por xmm1,xmm4 873 paddd xmm0,xmm1 874 pxor xmm3,xmm0 875db 102,15,56,0,223 876 paddd xmm2,xmm3 877 pxor xmm1,xmm2 878 movdqa xmm4,xmm1 879 psrld xmm1,25 880 pslld xmm4,7 881 por xmm1,xmm4 882 pshufd xmm2,xmm2,78 883 pshufd xmm1,xmm1,57 884 pshufd xmm3,xmm3,147 885 nop 886 paddd xmm0,xmm1 887 pxor xmm3,xmm0 888db 102,15,56,0,222 889 paddd xmm2,xmm3 890 pxor xmm1,xmm2 891 movdqa xmm4,xmm1 892 psrld xmm1,20 893 pslld xmm4,12 894 por xmm1,xmm4 895 paddd xmm0,xmm1 896 pxor xmm3,xmm0 897db 102,15,56,0,223 898 paddd xmm2,xmm3 899 pxor xmm1,xmm2 900 movdqa xmm4,xmm1 901 psrld xmm1,25 902 pslld xmm4,7 903 por xmm1,xmm4 904 pshufd xmm2,xmm2,78 905 pshufd xmm1,xmm1,147 906 pshufd xmm3,xmm3,57 907 dec edx 908 jnz NEAR L$012loop1x 909 paddd xmm0,[esp] 910 paddd xmm1,[16+esp] 911 paddd xmm2,[32+esp] 912 paddd xmm3,[48+esp] 913 cmp ecx,64 914 jb NEAR L$014tail 915 movdqu xmm4,[esi] 916 movdqu xmm5,[16+esi] 917 pxor xmm0,xmm4 918 movdqu xmm4,[32+esi] 919 pxor xmm1,xmm5 920 movdqu xmm5,[48+esi] 921 pxor xmm2,xmm4 922 pxor xmm3,xmm5 923 lea esi,[64+esi] 924 movdqu [edi],xmm0 925 movdqu [16+edi],xmm1 926 movdqu [32+edi],xmm2 927 movdqu [48+edi],xmm3 928 lea edi,[64+edi] 929 sub ecx,64 930 jnz NEAR L$013outer1x 931 jmp NEAR L$011done 932L$014tail: 933 movdqa [esp],xmm0 934 movdqa [16+esp],xmm1 935 movdqa [32+esp],xmm2 936 movdqa [48+esp],xmm3 937 xor eax,eax 938 xor edx,edx 939 xor ebp,ebp 940L$015tail_loop: 941 mov al,BYTE [ebp*1+esp] 942 mov dl,BYTE [ebp*1+esi] 943 lea ebp,[1+ebp] 944 xor al,dl 945 mov BYTE [ebp*1+edi-1],al 946 dec ecx 947 jnz NEAR L$015tail_loop 948L$011done: 949 mov esp,DWORD [512+esp] 950 pop edi 951 pop esi 952 pop ebx 953 pop ebp 954 ret 955align 64 956L$ssse3_data: 957db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 958db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 959dd 1634760805,857760878,2036477234,1797285236 960dd 0,1,2,3 961dd 4,4,4,4 962dd 1,0,0,0 963dd 4,0,0,0 964dd 0,-1,-1,-1 965align 64 966db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 967db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 968db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 969db 114,103,62,0 970segment .bss 971common _OPENSSL_ia32cap_P 16 972