1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__, win32 8%ifidn __OUTPUT_FORMAT__,obj 9section code use32 class=code align=64 10%elifidn __OUTPUT_FORMAT__,win32 11$@feat.00 equ 1 12section .text code align=64 13%else 14section .text code 15%endif 16global _ChaCha20_ctr32 17align 16 18_ChaCha20_ctr32: 19L$_ChaCha20_ctr32_begin: 20 push ebp 21 push ebx 22 push esi 23 push edi 24 xor eax,eax 25 cmp eax,DWORD [28+esp] 26 je NEAR L$000no_data 27 call L$pic_point 28L$pic_point: 29 pop eax 30 lea ebp,[_OPENSSL_ia32cap_P] 31 test DWORD [ebp],16777216 32 jz NEAR L$001x86 33 test DWORD [4+ebp],512 34 jz NEAR L$001x86 35 jmp NEAR L$ssse3_shortcut 36L$001x86: 37 mov esi,DWORD [32+esp] 38 mov edi,DWORD [36+esp] 39 sub esp,132 40 mov eax,DWORD [esi] 41 mov ebx,DWORD [4+esi] 42 mov ecx,DWORD [8+esi] 43 mov edx,DWORD [12+esi] 44 mov DWORD [80+esp],eax 45 mov DWORD [84+esp],ebx 46 mov DWORD [88+esp],ecx 47 mov DWORD [92+esp],edx 48 mov eax,DWORD [16+esi] 49 mov ebx,DWORD [20+esi] 50 mov ecx,DWORD [24+esi] 51 mov edx,DWORD [28+esi] 52 mov DWORD [96+esp],eax 53 mov DWORD [100+esp],ebx 54 mov DWORD [104+esp],ecx 55 mov DWORD [108+esp],edx 56 mov eax,DWORD [edi] 57 mov ebx,DWORD [4+edi] 58 mov ecx,DWORD [8+edi] 59 mov edx,DWORD [12+edi] 60 sub eax,1 61 mov DWORD [112+esp],eax 62 mov DWORD [116+esp],ebx 63 mov DWORD [120+esp],ecx 64 mov DWORD [124+esp],edx 65 jmp NEAR L$002entry 66align 16 67L$003outer_loop: 68 mov DWORD [156+esp],ebx 69 mov DWORD [152+esp],eax 70 mov DWORD [160+esp],ecx 71L$002entry: 72 mov eax,1634760805 73 mov DWORD [4+esp],857760878 74 mov DWORD [8+esp],2036477234 75 mov DWORD [12+esp],1797285236 76 mov ebx,DWORD [84+esp] 77 mov ebp,DWORD [88+esp] 78 mov ecx,DWORD [104+esp] 79 mov esi,DWORD [108+esp] 80 mov edx,DWORD [116+esp] 81 mov edi,DWORD [120+esp] 82 mov DWORD [20+esp],ebx 83 mov DWORD [24+esp],ebp 84 mov DWORD [40+esp],ecx 85 mov DWORD [44+esp],esi 86 mov DWORD [52+esp],edx 87 mov DWORD [56+esp],edi 88 mov ebx,DWORD [92+esp] 89 mov edi,DWORD [124+esp] 90 mov edx,DWORD [112+esp] 91 mov ebp,DWORD [80+esp] 92 mov ecx,DWORD [96+esp] 93 mov esi,DWORD [100+esp] 94 add edx,1 95 mov DWORD [28+esp],ebx 96 mov DWORD [60+esp],edi 97 mov DWORD [112+esp],edx 98 mov ebx,10 99 jmp NEAR L$004loop 100align 16 101L$004loop: 102 add eax,ebp 103 mov DWORD [128+esp],ebx 104 mov ebx,ebp 105 xor edx,eax 106 rol edx,16 107 add ecx,edx 108 xor ebx,ecx 109 mov edi,DWORD [52+esp] 110 rol ebx,12 111 mov ebp,DWORD [20+esp] 112 add eax,ebx 113 xor edx,eax 114 mov DWORD [esp],eax 115 rol edx,8 116 mov eax,DWORD [4+esp] 117 add ecx,edx 118 mov DWORD [48+esp],edx 119 xor ebx,ecx 120 add eax,ebp 121 rol ebx,7 122 xor edi,eax 123 mov DWORD [32+esp],ecx 124 rol edi,16 125 mov DWORD [16+esp],ebx 126 add esi,edi 127 mov ecx,DWORD [40+esp] 128 xor ebp,esi 129 mov edx,DWORD [56+esp] 130 rol ebp,12 131 mov ebx,DWORD [24+esp] 132 add eax,ebp 133 xor edi,eax 134 mov DWORD [4+esp],eax 135 rol edi,8 136 mov eax,DWORD [8+esp] 137 add esi,edi 138 mov DWORD [52+esp],edi 139 xor ebp,esi 140 add eax,ebx 141 rol ebp,7 142 xor edx,eax 143 mov DWORD [36+esp],esi 144 rol edx,16 145 mov DWORD [20+esp],ebp 146 add ecx,edx 147 mov esi,DWORD [44+esp] 148 xor ebx,ecx 149 mov edi,DWORD [60+esp] 150 rol ebx,12 151 mov ebp,DWORD [28+esp] 152 add eax,ebx 153 xor edx,eax 154 mov DWORD [8+esp],eax 155 rol edx,8 156 mov eax,DWORD [12+esp] 157 add ecx,edx 158 mov DWORD [56+esp],edx 159 xor ebx,ecx 160 add eax,ebp 161 rol ebx,7 162 xor edi,eax 163 rol edi,16 164 mov DWORD [24+esp],ebx 165 add esi,edi 166 xor ebp,esi 167 rol ebp,12 168 mov ebx,DWORD [20+esp] 169 add eax,ebp 170 xor edi,eax 171 mov DWORD [12+esp],eax 172 rol edi,8 173 mov eax,DWORD [esp] 174 add esi,edi 175 mov edx,edi 176 xor ebp,esi 177 add eax,ebx 178 rol ebp,7 179 xor edx,eax 180 rol edx,16 181 mov DWORD [28+esp],ebp 182 add ecx,edx 183 xor ebx,ecx 184 mov edi,DWORD [48+esp] 185 rol ebx,12 186 mov ebp,DWORD [24+esp] 187 add eax,ebx 188 xor edx,eax 189 mov DWORD [esp],eax 190 rol edx,8 191 mov eax,DWORD [4+esp] 192 add ecx,edx 193 mov DWORD [60+esp],edx 194 xor ebx,ecx 195 add eax,ebp 196 rol ebx,7 197 xor edi,eax 198 mov DWORD [40+esp],ecx 199 rol edi,16 200 mov DWORD [20+esp],ebx 201 add esi,edi 202 mov ecx,DWORD [32+esp] 203 xor ebp,esi 204 mov edx,DWORD [52+esp] 205 rol ebp,12 206 mov ebx,DWORD [28+esp] 207 add eax,ebp 208 xor edi,eax 209 mov DWORD [4+esp],eax 210 rol edi,8 211 mov eax,DWORD [8+esp] 212 add esi,edi 213 mov DWORD [48+esp],edi 214 xor ebp,esi 215 add eax,ebx 216 rol ebp,7 217 xor edx,eax 218 mov DWORD [44+esp],esi 219 rol edx,16 220 mov DWORD [24+esp],ebp 221 add ecx,edx 222 mov esi,DWORD [36+esp] 223 xor ebx,ecx 224 mov edi,DWORD [56+esp] 225 rol ebx,12 226 mov ebp,DWORD [16+esp] 227 add eax,ebx 228 xor edx,eax 229 mov DWORD [8+esp],eax 230 rol edx,8 231 mov eax,DWORD [12+esp] 232 add ecx,edx 233 mov DWORD [52+esp],edx 234 xor ebx,ecx 235 add eax,ebp 236 rol ebx,7 237 xor edi,eax 238 rol edi,16 239 mov DWORD [28+esp],ebx 240 add esi,edi 241 xor ebp,esi 242 mov edx,DWORD [48+esp] 243 rol ebp,12 244 mov ebx,DWORD [128+esp] 245 add eax,ebp 246 xor edi,eax 247 mov DWORD [12+esp],eax 248 rol edi,8 249 mov eax,DWORD [esp] 250 add esi,edi 251 mov DWORD [56+esp],edi 252 xor ebp,esi 253 rol ebp,7 254 dec ebx 255 jnz NEAR L$004loop 256 mov ebx,DWORD [160+esp] 257 add eax,1634760805 258 add ebp,DWORD [80+esp] 259 add ecx,DWORD [96+esp] 260 add esi,DWORD [100+esp] 261 cmp ebx,64 262 jb NEAR L$005tail 263 mov ebx,DWORD [156+esp] 264 add edx,DWORD [112+esp] 265 add edi,DWORD [120+esp] 266 xor eax,DWORD [ebx] 267 xor ebp,DWORD [16+ebx] 268 mov DWORD [esp],eax 269 mov eax,DWORD [152+esp] 270 xor ecx,DWORD [32+ebx] 271 xor esi,DWORD [36+ebx] 272 xor edx,DWORD [48+ebx] 273 xor edi,DWORD [56+ebx] 274 mov DWORD [16+eax],ebp 275 mov DWORD [32+eax],ecx 276 mov DWORD [36+eax],esi 277 mov DWORD [48+eax],edx 278 mov DWORD [56+eax],edi 279 mov ebp,DWORD [4+esp] 280 mov ecx,DWORD [8+esp] 281 mov esi,DWORD [12+esp] 282 mov edx,DWORD [20+esp] 283 mov edi,DWORD [24+esp] 284 add ebp,857760878 285 add ecx,2036477234 286 add esi,1797285236 287 add edx,DWORD [84+esp] 288 add edi,DWORD [88+esp] 289 xor ebp,DWORD [4+ebx] 290 xor ecx,DWORD [8+ebx] 291 xor esi,DWORD [12+ebx] 292 xor edx,DWORD [20+ebx] 293 xor edi,DWORD [24+ebx] 294 mov DWORD [4+eax],ebp 295 mov DWORD [8+eax],ecx 296 mov DWORD [12+eax],esi 297 mov DWORD [20+eax],edx 298 mov DWORD [24+eax],edi 299 mov ebp,DWORD [28+esp] 300 mov ecx,DWORD [40+esp] 301 mov esi,DWORD [44+esp] 302 mov edx,DWORD [52+esp] 303 mov edi,DWORD [60+esp] 304 add ebp,DWORD [92+esp] 305 add ecx,DWORD [104+esp] 306 add esi,DWORD [108+esp] 307 add edx,DWORD [116+esp] 308 add edi,DWORD [124+esp] 309 xor ebp,DWORD [28+ebx] 310 xor ecx,DWORD [40+ebx] 311 xor esi,DWORD [44+ebx] 312 xor edx,DWORD [52+ebx] 313 xor edi,DWORD [60+ebx] 314 lea ebx,[64+ebx] 315 mov DWORD [28+eax],ebp 316 mov ebp,DWORD [esp] 317 mov DWORD [40+eax],ecx 318 mov ecx,DWORD [160+esp] 319 mov DWORD [44+eax],esi 320 mov DWORD [52+eax],edx 321 mov DWORD [60+eax],edi 322 mov DWORD [eax],ebp 323 lea eax,[64+eax] 324 sub ecx,64 325 jnz NEAR L$003outer_loop 326 jmp NEAR L$006done 327L$005tail: 328 add edx,DWORD [112+esp] 329 add edi,DWORD [120+esp] 330 mov DWORD [esp],eax 331 mov DWORD [16+esp],ebp 332 mov DWORD [32+esp],ecx 333 mov DWORD [36+esp],esi 334 mov DWORD [48+esp],edx 335 mov DWORD [56+esp],edi 336 mov ebp,DWORD [4+esp] 337 mov ecx,DWORD [8+esp] 338 mov esi,DWORD [12+esp] 339 mov edx,DWORD [20+esp] 340 mov edi,DWORD [24+esp] 341 add ebp,857760878 342 add ecx,2036477234 343 add esi,1797285236 344 add edx,DWORD [84+esp] 345 add edi,DWORD [88+esp] 346 mov DWORD [4+esp],ebp 347 mov DWORD [8+esp],ecx 348 mov DWORD [12+esp],esi 349 mov DWORD [20+esp],edx 350 mov DWORD [24+esp],edi 351 mov ebp,DWORD [28+esp] 352 mov ecx,DWORD [40+esp] 353 mov esi,DWORD [44+esp] 354 mov edx,DWORD [52+esp] 355 mov edi,DWORD [60+esp] 356 add ebp,DWORD [92+esp] 357 add ecx,DWORD [104+esp] 358 add esi,DWORD [108+esp] 359 add edx,DWORD [116+esp] 360 add edi,DWORD [124+esp] 361 mov DWORD [28+esp],ebp 362 mov ebp,DWORD [156+esp] 363 mov DWORD [40+esp],ecx 364 mov ecx,DWORD [152+esp] 365 mov DWORD [44+esp],esi 366 xor esi,esi 367 mov DWORD [52+esp],edx 368 mov DWORD [60+esp],edi 369 xor eax,eax 370 xor edx,edx 371L$007tail_loop: 372 mov al,BYTE [ebp*1+esi] 373 mov dl,BYTE [esi*1+esp] 374 lea esi,[1+esi] 375 xor al,dl 376 mov BYTE [esi*1+ecx-1],al 377 dec ebx 378 jnz NEAR L$007tail_loop 379L$006done: 380 add esp,132 381L$000no_data: 382 pop edi 383 pop esi 384 pop ebx 385 pop ebp 386 ret 387global _ChaCha20_ssse3 388align 16 389_ChaCha20_ssse3: 390L$_ChaCha20_ssse3_begin: 391 push ebp 392 push ebx 393 push esi 394 push edi 395L$ssse3_shortcut: 396 mov edi,DWORD [20+esp] 397 mov esi,DWORD [24+esp] 398 mov ecx,DWORD [28+esp] 399 mov edx,DWORD [32+esp] 400 mov ebx,DWORD [36+esp] 401 mov ebp,esp 402 sub esp,524 403 and esp,-64 404 mov DWORD [512+esp],ebp 405 lea eax,[(L$ssse3_data-L$pic_point)+eax] 406 movdqu xmm3,[ebx] 407 cmp ecx,256 408 jb NEAR L$0081x 409 mov DWORD [516+esp],edx 410 mov DWORD [520+esp],ebx 411 sub ecx,256 412 lea ebp,[384+esp] 413 movdqu xmm7,[edx] 414 pshufd xmm0,xmm3,0 415 pshufd xmm1,xmm3,85 416 pshufd xmm2,xmm3,170 417 pshufd xmm3,xmm3,255 418 paddd xmm0,[48+eax] 419 pshufd xmm4,xmm7,0 420 pshufd xmm5,xmm7,85 421 psubd xmm0,[64+eax] 422 pshufd xmm6,xmm7,170 423 pshufd xmm7,xmm7,255 424 movdqa [64+ebp],xmm0 425 movdqa [80+ebp],xmm1 426 movdqa [96+ebp],xmm2 427 movdqa [112+ebp],xmm3 428 movdqu xmm3,[16+edx] 429 movdqa [ebp-64],xmm4 430 movdqa [ebp-48],xmm5 431 movdqa [ebp-32],xmm6 432 movdqa [ebp-16],xmm7 433 movdqa xmm7,[32+eax] 434 lea ebx,[128+esp] 435 pshufd xmm0,xmm3,0 436 pshufd xmm1,xmm3,85 437 pshufd xmm2,xmm3,170 438 pshufd xmm3,xmm3,255 439 pshufd xmm4,xmm7,0 440 pshufd xmm5,xmm7,85 441 pshufd xmm6,xmm7,170 442 pshufd xmm7,xmm7,255 443 movdqa [ebp],xmm0 444 movdqa [16+ebp],xmm1 445 movdqa [32+ebp],xmm2 446 movdqa [48+ebp],xmm3 447 movdqa [ebp-128],xmm4 448 movdqa [ebp-112],xmm5 449 movdqa [ebp-96],xmm6 450 movdqa [ebp-80],xmm7 451 lea esi,[128+esi] 452 lea edi,[128+edi] 453 jmp NEAR L$009outer_loop 454align 16 455L$009outer_loop: 456 movdqa xmm1,[ebp-112] 457 movdqa xmm2,[ebp-96] 458 movdqa xmm3,[ebp-80] 459 movdqa xmm5,[ebp-48] 460 movdqa xmm6,[ebp-32] 461 movdqa xmm7,[ebp-16] 462 movdqa [ebx-112],xmm1 463 movdqa [ebx-96],xmm2 464 movdqa [ebx-80],xmm3 465 movdqa [ebx-48],xmm5 466 movdqa [ebx-32],xmm6 467 movdqa [ebx-16],xmm7 468 movdqa xmm2,[32+ebp] 469 movdqa xmm3,[48+ebp] 470 movdqa xmm4,[64+ebp] 471 movdqa xmm5,[80+ebp] 472 movdqa xmm6,[96+ebp] 473 movdqa xmm7,[112+ebp] 474 paddd xmm4,[64+eax] 475 movdqa [32+ebx],xmm2 476 movdqa [48+ebx],xmm3 477 movdqa [64+ebx],xmm4 478 movdqa [80+ebx],xmm5 479 movdqa [96+ebx],xmm6 480 movdqa [112+ebx],xmm7 481 movdqa [64+ebp],xmm4 482 movdqa xmm0,[ebp-128] 483 movdqa xmm6,xmm4 484 movdqa xmm3,[ebp-64] 485 movdqa xmm4,[ebp] 486 movdqa xmm5,[16+ebp] 487 mov edx,10 488 nop 489align 16 490L$010loop: 491 paddd xmm0,xmm3 492 movdqa xmm2,xmm3 493 pxor xmm6,xmm0 494 pshufb xmm6,[eax] 495 paddd xmm4,xmm6 496 pxor xmm2,xmm4 497 movdqa xmm3,[ebx-48] 498 movdqa xmm1,xmm2 499 pslld xmm2,12 500 psrld xmm1,20 501 por xmm2,xmm1 502 movdqa xmm1,[ebx-112] 503 paddd xmm0,xmm2 504 movdqa xmm7,[80+ebx] 505 pxor xmm6,xmm0 506 movdqa [ebx-128],xmm0 507 pshufb xmm6,[16+eax] 508 paddd xmm4,xmm6 509 movdqa [64+ebx],xmm6 510 pxor xmm2,xmm4 511 paddd xmm1,xmm3 512 movdqa xmm0,xmm2 513 pslld xmm2,7 514 psrld xmm0,25 515 pxor xmm7,xmm1 516 por xmm2,xmm0 517 movdqa [ebx],xmm4 518 pshufb xmm7,[eax] 519 movdqa [ebx-64],xmm2 520 paddd xmm5,xmm7 521 movdqa xmm4,[32+ebx] 522 pxor xmm3,xmm5 523 movdqa xmm2,[ebx-32] 524 movdqa xmm0,xmm3 525 pslld xmm3,12 526 psrld xmm0,20 527 por xmm3,xmm0 528 movdqa xmm0,[ebx-96] 529 paddd xmm1,xmm3 530 movdqa xmm6,[96+ebx] 531 pxor xmm7,xmm1 532 movdqa [ebx-112],xmm1 533 pshufb xmm7,[16+eax] 534 paddd xmm5,xmm7 535 movdqa [80+ebx],xmm7 536 pxor xmm3,xmm5 537 paddd xmm0,xmm2 538 movdqa xmm1,xmm3 539 pslld xmm3,7 540 psrld xmm1,25 541 pxor xmm6,xmm0 542 por xmm3,xmm1 543 movdqa [16+ebx],xmm5 544 pshufb xmm6,[eax] 545 movdqa [ebx-48],xmm3 546 paddd xmm4,xmm6 547 movdqa xmm5,[48+ebx] 548 pxor xmm2,xmm4 549 movdqa xmm3,[ebx-16] 550 movdqa xmm1,xmm2 551 pslld xmm2,12 552 psrld xmm1,20 553 por xmm2,xmm1 554 movdqa xmm1,[ebx-80] 555 paddd xmm0,xmm2 556 movdqa xmm7,[112+ebx] 557 pxor xmm6,xmm0 558 movdqa [ebx-96],xmm0 559 pshufb xmm6,[16+eax] 560 paddd xmm4,xmm6 561 movdqa [96+ebx],xmm6 562 pxor xmm2,xmm4 563 paddd xmm1,xmm3 564 movdqa xmm0,xmm2 565 pslld xmm2,7 566 psrld xmm0,25 567 pxor xmm7,xmm1 568 por xmm2,xmm0 569 pshufb xmm7,[eax] 570 movdqa [ebx-32],xmm2 571 paddd xmm5,xmm7 572 pxor xmm3,xmm5 573 movdqa xmm2,[ebx-48] 574 movdqa xmm0,xmm3 575 pslld xmm3,12 576 psrld xmm0,20 577 por xmm3,xmm0 578 movdqa xmm0,[ebx-128] 579 paddd xmm1,xmm3 580 pxor xmm7,xmm1 581 movdqa [ebx-80],xmm1 582 pshufb xmm7,[16+eax] 583 paddd xmm5,xmm7 584 movdqa xmm6,xmm7 585 pxor xmm3,xmm5 586 paddd xmm0,xmm2 587 movdqa xmm1,xmm3 588 pslld xmm3,7 589 psrld xmm1,25 590 pxor xmm6,xmm0 591 por xmm3,xmm1 592 pshufb xmm6,[eax] 593 movdqa [ebx-16],xmm3 594 paddd xmm4,xmm6 595 pxor xmm2,xmm4 596 movdqa xmm3,[ebx-32] 597 movdqa xmm1,xmm2 598 pslld xmm2,12 599 psrld xmm1,20 600 por xmm2,xmm1 601 movdqa xmm1,[ebx-112] 602 paddd xmm0,xmm2 603 movdqa xmm7,[64+ebx] 604 pxor xmm6,xmm0 605 movdqa [ebx-128],xmm0 606 pshufb xmm6,[16+eax] 607 paddd xmm4,xmm6 608 movdqa [112+ebx],xmm6 609 pxor xmm2,xmm4 610 paddd xmm1,xmm3 611 movdqa xmm0,xmm2 612 pslld xmm2,7 613 psrld xmm0,25 614 pxor xmm7,xmm1 615 por xmm2,xmm0 616 movdqa [32+ebx],xmm4 617 pshufb xmm7,[eax] 618 movdqa [ebx-48],xmm2 619 paddd xmm5,xmm7 620 movdqa xmm4,[ebx] 621 pxor xmm3,xmm5 622 movdqa xmm2,[ebx-16] 623 movdqa xmm0,xmm3 624 pslld xmm3,12 625 psrld xmm0,20 626 por xmm3,xmm0 627 movdqa xmm0,[ebx-96] 628 paddd xmm1,xmm3 629 movdqa xmm6,[80+ebx] 630 pxor xmm7,xmm1 631 movdqa [ebx-112],xmm1 632 pshufb xmm7,[16+eax] 633 paddd xmm5,xmm7 634 movdqa [64+ebx],xmm7 635 pxor xmm3,xmm5 636 paddd xmm0,xmm2 637 movdqa xmm1,xmm3 638 pslld xmm3,7 639 psrld xmm1,25 640 pxor xmm6,xmm0 641 por xmm3,xmm1 642 movdqa [48+ebx],xmm5 643 pshufb xmm6,[eax] 644 movdqa [ebx-32],xmm3 645 paddd xmm4,xmm6 646 movdqa xmm5,[16+ebx] 647 pxor xmm2,xmm4 648 movdqa xmm3,[ebx-64] 649 movdqa xmm1,xmm2 650 pslld xmm2,12 651 psrld xmm1,20 652 por xmm2,xmm1 653 movdqa xmm1,[ebx-80] 654 paddd xmm0,xmm2 655 movdqa xmm7,[96+ebx] 656 pxor xmm6,xmm0 657 movdqa [ebx-96],xmm0 658 pshufb xmm6,[16+eax] 659 paddd xmm4,xmm6 660 movdqa [80+ebx],xmm6 661 pxor xmm2,xmm4 662 paddd xmm1,xmm3 663 movdqa xmm0,xmm2 664 pslld xmm2,7 665 psrld xmm0,25 666 pxor xmm7,xmm1 667 por xmm2,xmm0 668 pshufb xmm7,[eax] 669 movdqa [ebx-16],xmm2 670 paddd xmm5,xmm7 671 pxor xmm3,xmm5 672 movdqa xmm0,xmm3 673 pslld xmm3,12 674 psrld xmm0,20 675 por xmm3,xmm0 676 movdqa xmm0,[ebx-128] 677 paddd xmm1,xmm3 678 movdqa xmm6,[64+ebx] 679 pxor xmm7,xmm1 680 movdqa [ebx-80],xmm1 681 pshufb xmm7,[16+eax] 682 paddd xmm5,xmm7 683 movdqa [96+ebx],xmm7 684 pxor xmm3,xmm5 685 movdqa xmm1,xmm3 686 pslld xmm3,7 687 psrld xmm1,25 688 por xmm3,xmm1 689 dec edx 690 jnz NEAR L$010loop 691 movdqa [ebx-64],xmm3 692 movdqa [ebx],xmm4 693 movdqa [16+ebx],xmm5 694 movdqa [64+ebx],xmm6 695 movdqa [96+ebx],xmm7 696 movdqa xmm1,[ebx-112] 697 movdqa xmm2,[ebx-96] 698 movdqa xmm3,[ebx-80] 699 paddd xmm0,[ebp-128] 700 paddd xmm1,[ebp-112] 701 paddd xmm2,[ebp-96] 702 paddd xmm3,[ebp-80] 703 movdqa xmm6,xmm0 704 punpckldq xmm0,xmm1 705 movdqa xmm7,xmm2 706 punpckldq xmm2,xmm3 707 punpckhdq xmm6,xmm1 708 punpckhdq xmm7,xmm3 709 movdqa xmm1,xmm0 710 punpcklqdq xmm0,xmm2 711 movdqa xmm3,xmm6 712 punpcklqdq xmm6,xmm7 713 punpckhqdq xmm1,xmm2 714 punpckhqdq xmm3,xmm7 715 movdqu xmm4,[esi-128] 716 movdqu xmm5,[esi-64] 717 movdqu xmm2,[esi] 718 movdqu xmm7,[64+esi] 719 lea esi,[16+esi] 720 pxor xmm4,xmm0 721 movdqa xmm0,[ebx-64] 722 pxor xmm5,xmm1 723 movdqa xmm1,[ebx-48] 724 pxor xmm6,xmm2 725 movdqa xmm2,[ebx-32] 726 pxor xmm7,xmm3 727 movdqa xmm3,[ebx-16] 728 movdqu [edi-128],xmm4 729 movdqu [edi-64],xmm5 730 movdqu [edi],xmm6 731 movdqu [64+edi],xmm7 732 lea edi,[16+edi] 733 paddd xmm0,[ebp-64] 734 paddd xmm1,[ebp-48] 735 paddd xmm2,[ebp-32] 736 paddd xmm3,[ebp-16] 737 movdqa xmm6,xmm0 738 punpckldq xmm0,xmm1 739 movdqa xmm7,xmm2 740 punpckldq xmm2,xmm3 741 punpckhdq xmm6,xmm1 742 punpckhdq xmm7,xmm3 743 movdqa xmm1,xmm0 744 punpcklqdq xmm0,xmm2 745 movdqa xmm3,xmm6 746 punpcklqdq xmm6,xmm7 747 punpckhqdq xmm1,xmm2 748 punpckhqdq xmm3,xmm7 749 movdqu xmm4,[esi-128] 750 movdqu xmm5,[esi-64] 751 movdqu xmm2,[esi] 752 movdqu xmm7,[64+esi] 753 lea esi,[16+esi] 754 pxor xmm4,xmm0 755 movdqa xmm0,[ebx] 756 pxor xmm5,xmm1 757 movdqa xmm1,[16+ebx] 758 pxor xmm6,xmm2 759 movdqa xmm2,[32+ebx] 760 pxor xmm7,xmm3 761 movdqa xmm3,[48+ebx] 762 movdqu [edi-128],xmm4 763 movdqu [edi-64],xmm5 764 movdqu [edi],xmm6 765 movdqu [64+edi],xmm7 766 lea edi,[16+edi] 767 paddd xmm0,[ebp] 768 paddd xmm1,[16+ebp] 769 paddd xmm2,[32+ebp] 770 paddd xmm3,[48+ebp] 771 movdqa xmm6,xmm0 772 punpckldq xmm0,xmm1 773 movdqa xmm7,xmm2 774 punpckldq xmm2,xmm3 775 punpckhdq xmm6,xmm1 776 punpckhdq xmm7,xmm3 777 movdqa xmm1,xmm0 778 punpcklqdq xmm0,xmm2 779 movdqa xmm3,xmm6 780 punpcklqdq xmm6,xmm7 781 punpckhqdq xmm1,xmm2 782 punpckhqdq xmm3,xmm7 783 movdqu xmm4,[esi-128] 784 movdqu xmm5,[esi-64] 785 movdqu xmm2,[esi] 786 movdqu xmm7,[64+esi] 787 lea esi,[16+esi] 788 pxor xmm4,xmm0 789 movdqa xmm0,[64+ebx] 790 pxor xmm5,xmm1 791 movdqa xmm1,[80+ebx] 792 pxor xmm6,xmm2 793 movdqa xmm2,[96+ebx] 794 pxor xmm7,xmm3 795 movdqa xmm3,[112+ebx] 796 movdqu [edi-128],xmm4 797 movdqu [edi-64],xmm5 798 movdqu [edi],xmm6 799 movdqu [64+edi],xmm7 800 lea edi,[16+edi] 801 paddd xmm0,[64+ebp] 802 paddd xmm1,[80+ebp] 803 paddd xmm2,[96+ebp] 804 paddd xmm3,[112+ebp] 805 movdqa xmm6,xmm0 806 punpckldq xmm0,xmm1 807 movdqa xmm7,xmm2 808 punpckldq xmm2,xmm3 809 punpckhdq xmm6,xmm1 810 punpckhdq xmm7,xmm3 811 movdqa xmm1,xmm0 812 punpcklqdq xmm0,xmm2 813 movdqa xmm3,xmm6 814 punpcklqdq xmm6,xmm7 815 punpckhqdq xmm1,xmm2 816 punpckhqdq xmm3,xmm7 817 movdqu xmm4,[esi-128] 818 movdqu xmm5,[esi-64] 819 movdqu xmm2,[esi] 820 movdqu xmm7,[64+esi] 821 lea esi,[208+esi] 822 pxor xmm4,xmm0 823 pxor xmm5,xmm1 824 pxor xmm6,xmm2 825 pxor xmm7,xmm3 826 movdqu [edi-128],xmm4 827 movdqu [edi-64],xmm5 828 movdqu [edi],xmm6 829 movdqu [64+edi],xmm7 830 lea edi,[208+edi] 831 sub ecx,256 832 jnc NEAR L$009outer_loop 833 add ecx,256 834 jz NEAR L$011done 835 mov ebx,DWORD [520+esp] 836 lea esi,[esi-128] 837 mov edx,DWORD [516+esp] 838 lea edi,[edi-128] 839 movd xmm2,DWORD [64+ebp] 840 movdqu xmm3,[ebx] 841 paddd xmm2,[96+eax] 842 pand xmm3,[112+eax] 843 por xmm3,xmm2 844L$0081x: 845 movdqa xmm0,[32+eax] 846 movdqu xmm1,[edx] 847 movdqu xmm2,[16+edx] 848 movdqa xmm6,[eax] 849 movdqa xmm7,[16+eax] 850 mov DWORD [48+esp],ebp 851 movdqa [esp],xmm0 852 movdqa [16+esp],xmm1 853 movdqa [32+esp],xmm2 854 movdqa [48+esp],xmm3 855 mov edx,10 856 jmp NEAR L$012loop1x 857align 16 858L$013outer1x: 859 movdqa xmm3,[80+eax] 860 movdqa xmm0,[esp] 861 movdqa xmm1,[16+esp] 862 movdqa xmm2,[32+esp] 863 paddd xmm3,[48+esp] 864 mov edx,10 865 movdqa [48+esp],xmm3 866 jmp NEAR L$012loop1x 867align 16 868L$012loop1x: 869 paddd xmm0,xmm1 870 pxor xmm3,xmm0 871db 102,15,56,0,222 872 paddd xmm2,xmm3 873 pxor xmm1,xmm2 874 movdqa xmm4,xmm1 875 psrld xmm1,20 876 pslld xmm4,12 877 por xmm1,xmm4 878 paddd xmm0,xmm1 879 pxor xmm3,xmm0 880db 102,15,56,0,223 881 paddd xmm2,xmm3 882 pxor xmm1,xmm2 883 movdqa xmm4,xmm1 884 psrld xmm1,25 885 pslld xmm4,7 886 por xmm1,xmm4 887 pshufd xmm2,xmm2,78 888 pshufd xmm1,xmm1,57 889 pshufd xmm3,xmm3,147 890 nop 891 paddd xmm0,xmm1 892 pxor xmm3,xmm0 893db 102,15,56,0,222 894 paddd xmm2,xmm3 895 pxor xmm1,xmm2 896 movdqa xmm4,xmm1 897 psrld xmm1,20 898 pslld xmm4,12 899 por xmm1,xmm4 900 paddd xmm0,xmm1 901 pxor xmm3,xmm0 902db 102,15,56,0,223 903 paddd xmm2,xmm3 904 pxor xmm1,xmm2 905 movdqa xmm4,xmm1 906 psrld xmm1,25 907 pslld xmm4,7 908 por xmm1,xmm4 909 pshufd xmm2,xmm2,78 910 pshufd xmm1,xmm1,147 911 pshufd xmm3,xmm3,57 912 dec edx 913 jnz NEAR L$012loop1x 914 paddd xmm0,[esp] 915 paddd xmm1,[16+esp] 916 paddd xmm2,[32+esp] 917 paddd xmm3,[48+esp] 918 cmp ecx,64 919 jb NEAR L$014tail 920 movdqu xmm4,[esi] 921 movdqu xmm5,[16+esi] 922 pxor xmm0,xmm4 923 movdqu xmm4,[32+esi] 924 pxor xmm1,xmm5 925 movdqu xmm5,[48+esi] 926 pxor xmm2,xmm4 927 pxor xmm3,xmm5 928 lea esi,[64+esi] 929 movdqu [edi],xmm0 930 movdqu [16+edi],xmm1 931 movdqu [32+edi],xmm2 932 movdqu [48+edi],xmm3 933 lea edi,[64+edi] 934 sub ecx,64 935 jnz NEAR L$013outer1x 936 jmp NEAR L$011done 937L$014tail: 938 movdqa [esp],xmm0 939 movdqa [16+esp],xmm1 940 movdqa [32+esp],xmm2 941 movdqa [48+esp],xmm3 942 xor eax,eax 943 xor edx,edx 944 xor ebp,ebp 945L$015tail_loop: 946 mov al,BYTE [ebp*1+esp] 947 mov dl,BYTE [ebp*1+esi] 948 lea ebp,[1+ebp] 949 xor al,dl 950 mov BYTE [ebp*1+edi-1],al 951 dec ecx 952 jnz NEAR L$015tail_loop 953L$011done: 954 mov esp,DWORD [512+esp] 955 pop edi 956 pop esi 957 pop ebx 958 pop ebp 959 ret 960align 64 961L$ssse3_data: 962db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 963db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 964dd 1634760805,857760878,2036477234,1797285236 965dd 0,1,2,3 966dd 4,4,4,4 967dd 1,0,0,0 968dd 4,0,0,0 969dd 0,-1,-1,-1 970align 64 971db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 972db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 973db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 974db 114,103,62,0 975segment .bss 976common _OPENSSL_ia32cap_P 16 977%else 978; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 979ret 980%endif 981