1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__i386__) 5#if defined(BORINGSSL_PREFIX) 6#include <boringssl_prefix_symbols_asm.h> 7#endif 8.text 9.globl _ChaCha20_ctr32 10.private_extern _ChaCha20_ctr32 11.align 4 12_ChaCha20_ctr32: 13L_ChaCha20_ctr32_begin: 14 pushl %ebp 15 pushl %ebx 16 pushl %esi 17 pushl %edi 18 xorl %eax,%eax 19 cmpl 28(%esp),%eax 20 je L000no_data 21 call Lpic_point 22Lpic_point: 23 popl %eax 24 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp 25 testl $16777216,(%ebp) 26 jz L001x86 27 testl $512,4(%ebp) 28 jz L001x86 29 jmp Lssse3_shortcut 30L001x86: 31 movl 32(%esp),%esi 32 movl 36(%esp),%edi 33 subl $132,%esp 34 movl (%esi),%eax 35 movl 4(%esi),%ebx 36 movl 8(%esi),%ecx 37 movl 12(%esi),%edx 38 movl %eax,80(%esp) 39 movl %ebx,84(%esp) 40 movl %ecx,88(%esp) 41 movl %edx,92(%esp) 42 movl 16(%esi),%eax 43 movl 20(%esi),%ebx 44 movl 24(%esi),%ecx 45 movl 28(%esi),%edx 46 movl %eax,96(%esp) 47 movl %ebx,100(%esp) 48 movl %ecx,104(%esp) 49 movl %edx,108(%esp) 50 movl (%edi),%eax 51 movl 4(%edi),%ebx 52 movl 8(%edi),%ecx 53 movl 12(%edi),%edx 54 subl $1,%eax 55 movl %eax,112(%esp) 56 movl %ebx,116(%esp) 57 movl %ecx,120(%esp) 58 movl %edx,124(%esp) 59 jmp L002entry 60.align 4,0x90 61L003outer_loop: 62 movl %ebx,156(%esp) 63 movl %eax,152(%esp) 64 movl %ecx,160(%esp) 65L002entry: 66 movl $1634760805,%eax 67 movl $857760878,4(%esp) 68 movl $2036477234,8(%esp) 69 movl $1797285236,12(%esp) 70 movl 84(%esp),%ebx 71 movl 88(%esp),%ebp 72 movl 104(%esp),%ecx 73 movl 108(%esp),%esi 74 movl 116(%esp),%edx 75 movl 120(%esp),%edi 76 movl %ebx,20(%esp) 77 movl %ebp,24(%esp) 78 movl %ecx,40(%esp) 79 movl %esi,44(%esp) 80 movl %edx,52(%esp) 81 movl %edi,56(%esp) 82 movl 92(%esp),%ebx 83 movl 124(%esp),%edi 84 movl 112(%esp),%edx 85 movl 80(%esp),%ebp 86 movl 96(%esp),%ecx 87 movl 100(%esp),%esi 88 addl $1,%edx 89 movl %ebx,28(%esp) 90 movl %edi,60(%esp) 91 movl %edx,112(%esp) 92 movl $10,%ebx 93 jmp L004loop 94.align 4,0x90 95L004loop: 96 addl %ebp,%eax 97 movl %ebx,128(%esp) 98 movl %ebp,%ebx 99 xorl %eax,%edx 100 roll $16,%edx 101 addl %edx,%ecx 102 xorl %ecx,%ebx 103 movl 52(%esp),%edi 104 roll $12,%ebx 105 movl 20(%esp),%ebp 106 addl %ebx,%eax 107 xorl %eax,%edx 108 movl %eax,(%esp) 109 roll $8,%edx 110 movl 4(%esp),%eax 111 addl %edx,%ecx 112 movl %edx,48(%esp) 113 xorl %ecx,%ebx 114 addl %ebp,%eax 115 roll $7,%ebx 116 xorl %eax,%edi 117 movl %ecx,32(%esp) 118 roll $16,%edi 119 movl %ebx,16(%esp) 120 addl %edi,%esi 121 movl 40(%esp),%ecx 122 xorl %esi,%ebp 123 movl 56(%esp),%edx 124 roll $12,%ebp 125 movl 24(%esp),%ebx 126 addl %ebp,%eax 127 xorl %eax,%edi 128 movl %eax,4(%esp) 129 roll $8,%edi 130 movl 8(%esp),%eax 131 addl %edi,%esi 132 movl %edi,52(%esp) 133 xorl %esi,%ebp 134 addl %ebx,%eax 135 roll $7,%ebp 136 xorl %eax,%edx 137 movl %esi,36(%esp) 138 roll $16,%edx 139 movl %ebp,20(%esp) 140 addl %edx,%ecx 141 movl 44(%esp),%esi 142 xorl %ecx,%ebx 143 movl 60(%esp),%edi 144 roll $12,%ebx 145 movl 28(%esp),%ebp 146 addl %ebx,%eax 147 xorl %eax,%edx 148 movl %eax,8(%esp) 149 roll $8,%edx 150 movl 12(%esp),%eax 151 addl %edx,%ecx 152 movl %edx,56(%esp) 153 xorl %ecx,%ebx 154 addl %ebp,%eax 155 roll $7,%ebx 156 xorl %eax,%edi 157 roll $16,%edi 158 movl %ebx,24(%esp) 159 addl %edi,%esi 160 xorl %esi,%ebp 161 roll $12,%ebp 162 movl 20(%esp),%ebx 163 addl %ebp,%eax 164 xorl %eax,%edi 165 movl %eax,12(%esp) 166 roll $8,%edi 167 movl (%esp),%eax 168 addl %edi,%esi 169 movl %edi,%edx 170 xorl %esi,%ebp 171 addl %ebx,%eax 172 roll $7,%ebp 173 xorl %eax,%edx 174 roll $16,%edx 175 movl %ebp,28(%esp) 176 addl %edx,%ecx 177 xorl %ecx,%ebx 178 movl 48(%esp),%edi 179 roll $12,%ebx 180 movl 24(%esp),%ebp 181 addl %ebx,%eax 182 xorl %eax,%edx 183 movl %eax,(%esp) 184 roll $8,%edx 185 movl 4(%esp),%eax 186 addl %edx,%ecx 187 movl %edx,60(%esp) 188 xorl %ecx,%ebx 189 addl %ebp,%eax 190 roll $7,%ebx 191 xorl %eax,%edi 192 movl %ecx,40(%esp) 193 roll $16,%edi 194 movl %ebx,20(%esp) 195 addl %edi,%esi 196 movl 32(%esp),%ecx 197 xorl %esi,%ebp 198 movl 52(%esp),%edx 199 roll $12,%ebp 200 movl 28(%esp),%ebx 201 addl %ebp,%eax 202 xorl %eax,%edi 203 movl %eax,4(%esp) 204 roll $8,%edi 205 movl 8(%esp),%eax 206 addl %edi,%esi 207 movl %edi,48(%esp) 208 xorl %esi,%ebp 209 addl %ebx,%eax 210 roll $7,%ebp 211 xorl %eax,%edx 212 movl %esi,44(%esp) 213 roll $16,%edx 214 movl %ebp,24(%esp) 215 addl %edx,%ecx 216 movl 36(%esp),%esi 217 xorl %ecx,%ebx 218 movl 56(%esp),%edi 219 roll $12,%ebx 220 movl 16(%esp),%ebp 221 addl %ebx,%eax 222 xorl %eax,%edx 223 movl %eax,8(%esp) 224 roll $8,%edx 225 movl 12(%esp),%eax 226 addl %edx,%ecx 227 movl %edx,52(%esp) 228 xorl %ecx,%ebx 229 addl %ebp,%eax 230 roll $7,%ebx 231 xorl %eax,%edi 232 roll $16,%edi 233 movl %ebx,28(%esp) 234 addl %edi,%esi 235 xorl %esi,%ebp 236 movl 48(%esp),%edx 237 roll $12,%ebp 238 movl 128(%esp),%ebx 239 addl %ebp,%eax 240 xorl %eax,%edi 241 movl %eax,12(%esp) 242 roll $8,%edi 243 movl (%esp),%eax 244 addl %edi,%esi 245 movl %edi,56(%esp) 246 xorl %esi,%ebp 247 roll $7,%ebp 248 decl %ebx 249 jnz L004loop 250 movl 160(%esp),%ebx 251 addl $1634760805,%eax 252 addl 80(%esp),%ebp 253 addl 96(%esp),%ecx 254 addl 100(%esp),%esi 255 cmpl $64,%ebx 256 jb L005tail 257 movl 156(%esp),%ebx 258 addl 112(%esp),%edx 259 addl 120(%esp),%edi 260 xorl (%ebx),%eax 261 xorl 16(%ebx),%ebp 262 movl %eax,(%esp) 263 movl 152(%esp),%eax 264 xorl 32(%ebx),%ecx 265 xorl 36(%ebx),%esi 266 xorl 48(%ebx),%edx 267 xorl 56(%ebx),%edi 268 movl %ebp,16(%eax) 269 movl %ecx,32(%eax) 270 movl %esi,36(%eax) 271 movl %edx,48(%eax) 272 movl %edi,56(%eax) 273 movl 4(%esp),%ebp 274 movl 8(%esp),%ecx 275 movl 12(%esp),%esi 276 movl 20(%esp),%edx 277 movl 24(%esp),%edi 278 addl $857760878,%ebp 279 addl $2036477234,%ecx 280 addl $1797285236,%esi 281 addl 84(%esp),%edx 282 addl 88(%esp),%edi 283 xorl 4(%ebx),%ebp 284 xorl 8(%ebx),%ecx 285 xorl 12(%ebx),%esi 286 xorl 20(%ebx),%edx 287 xorl 24(%ebx),%edi 288 movl %ebp,4(%eax) 289 movl %ecx,8(%eax) 290 movl %esi,12(%eax) 291 movl %edx,20(%eax) 292 movl %edi,24(%eax) 293 movl 28(%esp),%ebp 294 movl 40(%esp),%ecx 295 movl 44(%esp),%esi 296 movl 52(%esp),%edx 297 movl 60(%esp),%edi 298 addl 92(%esp),%ebp 299 addl 104(%esp),%ecx 300 addl 108(%esp),%esi 301 addl 116(%esp),%edx 302 addl 124(%esp),%edi 303 xorl 28(%ebx),%ebp 304 xorl 40(%ebx),%ecx 305 xorl 44(%ebx),%esi 306 xorl 52(%ebx),%edx 307 xorl 60(%ebx),%edi 308 leal 64(%ebx),%ebx 309 movl %ebp,28(%eax) 310 movl (%esp),%ebp 311 movl %ecx,40(%eax) 312 movl 160(%esp),%ecx 313 movl %esi,44(%eax) 314 movl %edx,52(%eax) 315 movl %edi,60(%eax) 316 movl %ebp,(%eax) 317 leal 64(%eax),%eax 318 subl $64,%ecx 319 jnz L003outer_loop 320 jmp L006done 321L005tail: 322 addl 112(%esp),%edx 323 addl 120(%esp),%edi 324 movl %eax,(%esp) 325 movl %ebp,16(%esp) 326 movl %ecx,32(%esp) 327 movl %esi,36(%esp) 328 movl %edx,48(%esp) 329 movl %edi,56(%esp) 330 movl 4(%esp),%ebp 331 movl 8(%esp),%ecx 332 movl 12(%esp),%esi 333 movl 20(%esp),%edx 334 movl 24(%esp),%edi 335 addl $857760878,%ebp 336 addl $2036477234,%ecx 337 addl $1797285236,%esi 338 addl 84(%esp),%edx 339 addl 88(%esp),%edi 340 movl %ebp,4(%esp) 341 movl %ecx,8(%esp) 342 movl %esi,12(%esp) 343 movl %edx,20(%esp) 344 movl %edi,24(%esp) 345 movl 28(%esp),%ebp 346 movl 40(%esp),%ecx 347 movl 44(%esp),%esi 348 movl 52(%esp),%edx 349 movl 60(%esp),%edi 350 addl 92(%esp),%ebp 351 addl 104(%esp),%ecx 352 addl 108(%esp),%esi 353 addl 116(%esp),%edx 354 addl 124(%esp),%edi 355 movl %ebp,28(%esp) 356 movl 156(%esp),%ebp 357 movl %ecx,40(%esp) 358 movl 152(%esp),%ecx 359 movl %esi,44(%esp) 360 xorl %esi,%esi 361 movl %edx,52(%esp) 362 movl %edi,60(%esp) 363 xorl %eax,%eax 364 xorl %edx,%edx 365L007tail_loop: 366 movb (%esi,%ebp,1),%al 367 movb (%esp,%esi,1),%dl 368 leal 1(%esi),%esi 369 xorb %dl,%al 370 movb %al,-1(%ecx,%esi,1) 371 decl %ebx 372 jnz L007tail_loop 373L006done: 374 addl $132,%esp 375L000no_data: 376 popl %edi 377 popl %esi 378 popl %ebx 379 popl %ebp 380 ret 381.globl _ChaCha20_ssse3 382.private_extern _ChaCha20_ssse3 383.align 4 384_ChaCha20_ssse3: 385L_ChaCha20_ssse3_begin: 386 pushl %ebp 387 pushl %ebx 388 pushl %esi 389 pushl %edi 390Lssse3_shortcut: 391 movl 20(%esp),%edi 392 movl 24(%esp),%esi 393 movl 28(%esp),%ecx 394 movl 32(%esp),%edx 395 movl 36(%esp),%ebx 396 movl %esp,%ebp 397 subl $524,%esp 398 andl $-64,%esp 399 movl %ebp,512(%esp) 400 leal Lssse3_data-Lpic_point(%eax),%eax 401 movdqu (%ebx),%xmm3 402 cmpl $256,%ecx 403 jb L0081x 404 movl %edx,516(%esp) 405 movl %ebx,520(%esp) 406 subl $256,%ecx 407 leal 384(%esp),%ebp 408 movdqu (%edx),%xmm7 409 pshufd $0,%xmm3,%xmm0 410 pshufd $85,%xmm3,%xmm1 411 pshufd $170,%xmm3,%xmm2 412 pshufd $255,%xmm3,%xmm3 413 paddd 48(%eax),%xmm0 414 pshufd $0,%xmm7,%xmm4 415 pshufd $85,%xmm7,%xmm5 416 psubd 64(%eax),%xmm0 417 pshufd $170,%xmm7,%xmm6 418 pshufd $255,%xmm7,%xmm7 419 movdqa %xmm0,64(%ebp) 420 movdqa %xmm1,80(%ebp) 421 movdqa %xmm2,96(%ebp) 422 movdqa %xmm3,112(%ebp) 423 movdqu 16(%edx),%xmm3 424 movdqa %xmm4,-64(%ebp) 425 movdqa %xmm5,-48(%ebp) 426 movdqa %xmm6,-32(%ebp) 427 movdqa %xmm7,-16(%ebp) 428 movdqa 32(%eax),%xmm7 429 leal 128(%esp),%ebx 430 pshufd $0,%xmm3,%xmm0 431 pshufd $85,%xmm3,%xmm1 432 pshufd $170,%xmm3,%xmm2 433 pshufd $255,%xmm3,%xmm3 434 pshufd $0,%xmm7,%xmm4 435 pshufd $85,%xmm7,%xmm5 436 pshufd $170,%xmm7,%xmm6 437 pshufd $255,%xmm7,%xmm7 438 movdqa %xmm0,(%ebp) 439 movdqa %xmm1,16(%ebp) 440 movdqa %xmm2,32(%ebp) 441 movdqa %xmm3,48(%ebp) 442 movdqa %xmm4,-128(%ebp) 443 movdqa %xmm5,-112(%ebp) 444 movdqa %xmm6,-96(%ebp) 445 movdqa %xmm7,-80(%ebp) 446 leal 128(%esi),%esi 447 leal 128(%edi),%edi 448 jmp L009outer_loop 449.align 4,0x90 450L009outer_loop: 451 movdqa -112(%ebp),%xmm1 452 movdqa -96(%ebp),%xmm2 453 movdqa -80(%ebp),%xmm3 454 movdqa -48(%ebp),%xmm5 455 movdqa -32(%ebp),%xmm6 456 movdqa -16(%ebp),%xmm7 457 movdqa %xmm1,-112(%ebx) 458 movdqa %xmm2,-96(%ebx) 459 movdqa %xmm3,-80(%ebx) 460 movdqa %xmm5,-48(%ebx) 461 movdqa %xmm6,-32(%ebx) 462 movdqa %xmm7,-16(%ebx) 463 movdqa 32(%ebp),%xmm2 464 movdqa 48(%ebp),%xmm3 465 movdqa 64(%ebp),%xmm4 466 movdqa 80(%ebp),%xmm5 467 movdqa 96(%ebp),%xmm6 468 movdqa 112(%ebp),%xmm7 469 paddd 64(%eax),%xmm4 470 movdqa %xmm2,32(%ebx) 471 movdqa %xmm3,48(%ebx) 472 movdqa %xmm4,64(%ebx) 473 movdqa %xmm5,80(%ebx) 474 movdqa %xmm6,96(%ebx) 475 movdqa %xmm7,112(%ebx) 476 movdqa %xmm4,64(%ebp) 477 movdqa -128(%ebp),%xmm0 478 movdqa %xmm4,%xmm6 479 movdqa -64(%ebp),%xmm3 480 movdqa (%ebp),%xmm4 481 movdqa 16(%ebp),%xmm5 482 movl $10,%edx 483 nop 484.align 4,0x90 485L010loop: 486 paddd %xmm3,%xmm0 487 movdqa %xmm3,%xmm2 488 pxor %xmm0,%xmm6 489 pshufb (%eax),%xmm6 490 paddd %xmm6,%xmm4 491 pxor %xmm4,%xmm2 492 movdqa -48(%ebx),%xmm3 493 movdqa %xmm2,%xmm1 494 pslld $12,%xmm2 495 psrld $20,%xmm1 496 por %xmm1,%xmm2 497 movdqa -112(%ebx),%xmm1 498 paddd %xmm2,%xmm0 499 movdqa 80(%ebx),%xmm7 500 pxor %xmm0,%xmm6 501 movdqa %xmm0,-128(%ebx) 502 pshufb 16(%eax),%xmm6 503 paddd %xmm6,%xmm4 504 movdqa %xmm6,64(%ebx) 505 pxor %xmm4,%xmm2 506 paddd %xmm3,%xmm1 507 movdqa %xmm2,%xmm0 508 pslld $7,%xmm2 509 psrld $25,%xmm0 510 pxor %xmm1,%xmm7 511 por %xmm0,%xmm2 512 movdqa %xmm4,(%ebx) 513 pshufb (%eax),%xmm7 514 movdqa %xmm2,-64(%ebx) 515 paddd %xmm7,%xmm5 516 movdqa 32(%ebx),%xmm4 517 pxor %xmm5,%xmm3 518 movdqa -32(%ebx),%xmm2 519 movdqa %xmm3,%xmm0 520 pslld $12,%xmm3 521 psrld $20,%xmm0 522 por %xmm0,%xmm3 523 movdqa -96(%ebx),%xmm0 524 paddd %xmm3,%xmm1 525 movdqa 96(%ebx),%xmm6 526 pxor %xmm1,%xmm7 527 movdqa %xmm1,-112(%ebx) 528 pshufb 16(%eax),%xmm7 529 paddd %xmm7,%xmm5 530 movdqa %xmm7,80(%ebx) 531 pxor %xmm5,%xmm3 532 paddd %xmm2,%xmm0 533 movdqa %xmm3,%xmm1 534 pslld $7,%xmm3 535 psrld $25,%xmm1 536 pxor %xmm0,%xmm6 537 por %xmm1,%xmm3 538 movdqa %xmm5,16(%ebx) 539 pshufb (%eax),%xmm6 540 movdqa %xmm3,-48(%ebx) 541 paddd %xmm6,%xmm4 542 movdqa 48(%ebx),%xmm5 543 pxor %xmm4,%xmm2 544 movdqa -16(%ebx),%xmm3 545 movdqa %xmm2,%xmm1 546 pslld $12,%xmm2 547 psrld $20,%xmm1 548 por %xmm1,%xmm2 549 movdqa -80(%ebx),%xmm1 550 paddd %xmm2,%xmm0 551 movdqa 112(%ebx),%xmm7 552 pxor %xmm0,%xmm6 553 movdqa %xmm0,-96(%ebx) 554 pshufb 16(%eax),%xmm6 555 paddd %xmm6,%xmm4 556 movdqa %xmm6,96(%ebx) 557 pxor %xmm4,%xmm2 558 paddd %xmm3,%xmm1 559 movdqa %xmm2,%xmm0 560 pslld $7,%xmm2 561 psrld $25,%xmm0 562 pxor %xmm1,%xmm7 563 por %xmm0,%xmm2 564 pshufb (%eax),%xmm7 565 movdqa %xmm2,-32(%ebx) 566 paddd %xmm7,%xmm5 567 pxor %xmm5,%xmm3 568 movdqa -48(%ebx),%xmm2 569 movdqa %xmm3,%xmm0 570 pslld $12,%xmm3 571 psrld $20,%xmm0 572 por %xmm0,%xmm3 573 movdqa -128(%ebx),%xmm0 574 paddd %xmm3,%xmm1 575 pxor %xmm1,%xmm7 576 movdqa %xmm1,-80(%ebx) 577 pshufb 16(%eax),%xmm7 578 paddd %xmm7,%xmm5 579 movdqa %xmm7,%xmm6 580 pxor %xmm5,%xmm3 581 paddd %xmm2,%xmm0 582 movdqa %xmm3,%xmm1 583 pslld $7,%xmm3 584 psrld $25,%xmm1 585 pxor %xmm0,%xmm6 586 por %xmm1,%xmm3 587 pshufb (%eax),%xmm6 588 movdqa %xmm3,-16(%ebx) 589 paddd %xmm6,%xmm4 590 pxor %xmm4,%xmm2 591 movdqa -32(%ebx),%xmm3 592 movdqa %xmm2,%xmm1 593 pslld $12,%xmm2 594 psrld $20,%xmm1 595 por %xmm1,%xmm2 596 movdqa -112(%ebx),%xmm1 597 paddd %xmm2,%xmm0 598 movdqa 64(%ebx),%xmm7 599 pxor %xmm0,%xmm6 600 movdqa %xmm0,-128(%ebx) 601 pshufb 16(%eax),%xmm6 602 paddd %xmm6,%xmm4 603 movdqa %xmm6,112(%ebx) 604 pxor %xmm4,%xmm2 605 paddd %xmm3,%xmm1 606 movdqa %xmm2,%xmm0 607 pslld $7,%xmm2 608 psrld $25,%xmm0 609 pxor %xmm1,%xmm7 610 por %xmm0,%xmm2 611 movdqa %xmm4,32(%ebx) 612 pshufb (%eax),%xmm7 613 movdqa %xmm2,-48(%ebx) 614 paddd %xmm7,%xmm5 615 movdqa (%ebx),%xmm4 616 pxor %xmm5,%xmm3 617 movdqa -16(%ebx),%xmm2 618 movdqa %xmm3,%xmm0 619 pslld $12,%xmm3 620 psrld $20,%xmm0 621 por %xmm0,%xmm3 622 movdqa -96(%ebx),%xmm0 623 paddd %xmm3,%xmm1 624 movdqa 80(%ebx),%xmm6 625 pxor %xmm1,%xmm7 626 movdqa %xmm1,-112(%ebx) 627 pshufb 16(%eax),%xmm7 628 paddd %xmm7,%xmm5 629 movdqa %xmm7,64(%ebx) 630 pxor %xmm5,%xmm3 631 paddd %xmm2,%xmm0 632 movdqa %xmm3,%xmm1 633 pslld $7,%xmm3 634 psrld $25,%xmm1 635 pxor %xmm0,%xmm6 636 por %xmm1,%xmm3 637 movdqa %xmm5,48(%ebx) 638 pshufb (%eax),%xmm6 639 movdqa %xmm3,-32(%ebx) 640 paddd %xmm6,%xmm4 641 movdqa 16(%ebx),%xmm5 642 pxor %xmm4,%xmm2 643 movdqa -64(%ebx),%xmm3 644 movdqa %xmm2,%xmm1 645 pslld $12,%xmm2 646 psrld $20,%xmm1 647 por %xmm1,%xmm2 648 movdqa -80(%ebx),%xmm1 649 paddd %xmm2,%xmm0 650 movdqa 96(%ebx),%xmm7 651 pxor %xmm0,%xmm6 652 movdqa %xmm0,-96(%ebx) 653 pshufb 16(%eax),%xmm6 654 paddd %xmm6,%xmm4 655 movdqa %xmm6,80(%ebx) 656 pxor %xmm4,%xmm2 657 paddd %xmm3,%xmm1 658 movdqa %xmm2,%xmm0 659 pslld $7,%xmm2 660 psrld $25,%xmm0 661 pxor %xmm1,%xmm7 662 por %xmm0,%xmm2 663 pshufb (%eax),%xmm7 664 movdqa %xmm2,-16(%ebx) 665 paddd %xmm7,%xmm5 666 pxor %xmm5,%xmm3 667 movdqa %xmm3,%xmm0 668 pslld $12,%xmm3 669 psrld $20,%xmm0 670 por %xmm0,%xmm3 671 movdqa -128(%ebx),%xmm0 672 paddd %xmm3,%xmm1 673 movdqa 64(%ebx),%xmm6 674 pxor %xmm1,%xmm7 675 movdqa %xmm1,-80(%ebx) 676 pshufb 16(%eax),%xmm7 677 paddd %xmm7,%xmm5 678 movdqa %xmm7,96(%ebx) 679 pxor %xmm5,%xmm3 680 movdqa %xmm3,%xmm1 681 pslld $7,%xmm3 682 psrld $25,%xmm1 683 por %xmm1,%xmm3 684 decl %edx 685 jnz L010loop 686 movdqa %xmm3,-64(%ebx) 687 movdqa %xmm4,(%ebx) 688 movdqa %xmm5,16(%ebx) 689 movdqa %xmm6,64(%ebx) 690 movdqa %xmm7,96(%ebx) 691 movdqa -112(%ebx),%xmm1 692 movdqa -96(%ebx),%xmm2 693 movdqa -80(%ebx),%xmm3 694 paddd -128(%ebp),%xmm0 695 paddd -112(%ebp),%xmm1 696 paddd -96(%ebp),%xmm2 697 paddd -80(%ebp),%xmm3 698 movdqa %xmm0,%xmm6 699 punpckldq %xmm1,%xmm0 700 movdqa %xmm2,%xmm7 701 punpckldq %xmm3,%xmm2 702 punpckhdq %xmm1,%xmm6 703 punpckhdq %xmm3,%xmm7 704 movdqa %xmm0,%xmm1 705 punpcklqdq %xmm2,%xmm0 706 movdqa %xmm6,%xmm3 707 punpcklqdq %xmm7,%xmm6 708 punpckhqdq %xmm2,%xmm1 709 punpckhqdq %xmm7,%xmm3 710 movdqu -128(%esi),%xmm4 711 movdqu -64(%esi),%xmm5 712 movdqu (%esi),%xmm2 713 movdqu 64(%esi),%xmm7 714 leal 16(%esi),%esi 715 pxor %xmm0,%xmm4 716 movdqa -64(%ebx),%xmm0 717 pxor %xmm1,%xmm5 718 movdqa -48(%ebx),%xmm1 719 pxor %xmm2,%xmm6 720 movdqa -32(%ebx),%xmm2 721 pxor %xmm3,%xmm7 722 movdqa -16(%ebx),%xmm3 723 movdqu %xmm4,-128(%edi) 724 movdqu %xmm5,-64(%edi) 725 movdqu %xmm6,(%edi) 726 movdqu %xmm7,64(%edi) 727 leal 16(%edi),%edi 728 paddd -64(%ebp),%xmm0 729 paddd -48(%ebp),%xmm1 730 paddd -32(%ebp),%xmm2 731 paddd -16(%ebp),%xmm3 732 movdqa %xmm0,%xmm6 733 punpckldq %xmm1,%xmm0 734 movdqa %xmm2,%xmm7 735 punpckldq %xmm3,%xmm2 736 punpckhdq %xmm1,%xmm6 737 punpckhdq %xmm3,%xmm7 738 movdqa %xmm0,%xmm1 739 punpcklqdq %xmm2,%xmm0 740 movdqa %xmm6,%xmm3 741 punpcklqdq %xmm7,%xmm6 742 punpckhqdq %xmm2,%xmm1 743 punpckhqdq %xmm7,%xmm3 744 movdqu -128(%esi),%xmm4 745 movdqu -64(%esi),%xmm5 746 movdqu (%esi),%xmm2 747 movdqu 64(%esi),%xmm7 748 leal 16(%esi),%esi 749 pxor %xmm0,%xmm4 750 movdqa (%ebx),%xmm0 751 pxor %xmm1,%xmm5 752 movdqa 16(%ebx),%xmm1 753 pxor %xmm2,%xmm6 754 movdqa 32(%ebx),%xmm2 755 pxor %xmm3,%xmm7 756 movdqa 48(%ebx),%xmm3 757 movdqu %xmm4,-128(%edi) 758 movdqu %xmm5,-64(%edi) 759 movdqu %xmm6,(%edi) 760 movdqu %xmm7,64(%edi) 761 leal 16(%edi),%edi 762 paddd (%ebp),%xmm0 763 paddd 16(%ebp),%xmm1 764 paddd 32(%ebp),%xmm2 765 paddd 48(%ebp),%xmm3 766 movdqa %xmm0,%xmm6 767 punpckldq %xmm1,%xmm0 768 movdqa %xmm2,%xmm7 769 punpckldq %xmm3,%xmm2 770 punpckhdq %xmm1,%xmm6 771 punpckhdq %xmm3,%xmm7 772 movdqa %xmm0,%xmm1 773 punpcklqdq %xmm2,%xmm0 774 movdqa %xmm6,%xmm3 775 punpcklqdq %xmm7,%xmm6 776 punpckhqdq %xmm2,%xmm1 777 punpckhqdq %xmm7,%xmm3 778 movdqu -128(%esi),%xmm4 779 movdqu -64(%esi),%xmm5 780 movdqu (%esi),%xmm2 781 movdqu 64(%esi),%xmm7 782 leal 16(%esi),%esi 783 pxor %xmm0,%xmm4 784 movdqa 64(%ebx),%xmm0 785 pxor %xmm1,%xmm5 786 movdqa 80(%ebx),%xmm1 787 pxor %xmm2,%xmm6 788 movdqa 96(%ebx),%xmm2 789 pxor %xmm3,%xmm7 790 movdqa 112(%ebx),%xmm3 791 movdqu %xmm4,-128(%edi) 792 movdqu %xmm5,-64(%edi) 793 movdqu %xmm6,(%edi) 794 movdqu %xmm7,64(%edi) 795 leal 16(%edi),%edi 796 paddd 64(%ebp),%xmm0 797 paddd 80(%ebp),%xmm1 798 paddd 96(%ebp),%xmm2 799 paddd 112(%ebp),%xmm3 800 movdqa %xmm0,%xmm6 801 punpckldq %xmm1,%xmm0 802 movdqa %xmm2,%xmm7 803 punpckldq %xmm3,%xmm2 804 punpckhdq %xmm1,%xmm6 805 punpckhdq %xmm3,%xmm7 806 movdqa %xmm0,%xmm1 807 punpcklqdq %xmm2,%xmm0 808 movdqa %xmm6,%xmm3 809 punpcklqdq %xmm7,%xmm6 810 punpckhqdq %xmm2,%xmm1 811 punpckhqdq %xmm7,%xmm3 812 movdqu -128(%esi),%xmm4 813 movdqu -64(%esi),%xmm5 814 movdqu (%esi),%xmm2 815 movdqu 64(%esi),%xmm7 816 leal 208(%esi),%esi 817 pxor %xmm0,%xmm4 818 pxor %xmm1,%xmm5 819 pxor %xmm2,%xmm6 820 pxor %xmm3,%xmm7 821 movdqu %xmm4,-128(%edi) 822 movdqu %xmm5,-64(%edi) 823 movdqu %xmm6,(%edi) 824 movdqu %xmm7,64(%edi) 825 leal 208(%edi),%edi 826 subl $256,%ecx 827 jnc L009outer_loop 828 addl $256,%ecx 829 jz L011done 830 movl 520(%esp),%ebx 831 leal -128(%esi),%esi 832 movl 516(%esp),%edx 833 leal -128(%edi),%edi 834 movd 64(%ebp),%xmm2 835 movdqu (%ebx),%xmm3 836 paddd 96(%eax),%xmm2 837 pand 112(%eax),%xmm3 838 por %xmm2,%xmm3 839L0081x: 840 movdqa 32(%eax),%xmm0 841 movdqu (%edx),%xmm1 842 movdqu 16(%edx),%xmm2 843 movdqa (%eax),%xmm6 844 movdqa 16(%eax),%xmm7 845 movl %ebp,48(%esp) 846 movdqa %xmm0,(%esp) 847 movdqa %xmm1,16(%esp) 848 movdqa %xmm2,32(%esp) 849 movdqa %xmm3,48(%esp) 850 movl $10,%edx 851 jmp L012loop1x 852.align 4,0x90 853L013outer1x: 854 movdqa 80(%eax),%xmm3 855 movdqa (%esp),%xmm0 856 movdqa 16(%esp),%xmm1 857 movdqa 32(%esp),%xmm2 858 paddd 48(%esp),%xmm3 859 movl $10,%edx 860 movdqa %xmm3,48(%esp) 861 jmp L012loop1x 862.align 4,0x90 863L012loop1x: 864 paddd %xmm1,%xmm0 865 pxor %xmm0,%xmm3 866.byte 102,15,56,0,222 867 paddd %xmm3,%xmm2 868 pxor %xmm2,%xmm1 869 movdqa %xmm1,%xmm4 870 psrld $20,%xmm1 871 pslld $12,%xmm4 872 por %xmm4,%xmm1 873 paddd %xmm1,%xmm0 874 pxor %xmm0,%xmm3 875.byte 102,15,56,0,223 876 paddd %xmm3,%xmm2 877 pxor %xmm2,%xmm1 878 movdqa %xmm1,%xmm4 879 psrld $25,%xmm1 880 pslld $7,%xmm4 881 por %xmm4,%xmm1 882 pshufd $78,%xmm2,%xmm2 883 pshufd $57,%xmm1,%xmm1 884 pshufd $147,%xmm3,%xmm3 885 nop 886 paddd %xmm1,%xmm0 887 pxor %xmm0,%xmm3 888.byte 102,15,56,0,222 889 paddd %xmm3,%xmm2 890 pxor %xmm2,%xmm1 891 movdqa %xmm1,%xmm4 892 psrld $20,%xmm1 893 pslld $12,%xmm4 894 por %xmm4,%xmm1 895 paddd %xmm1,%xmm0 896 pxor %xmm0,%xmm3 897.byte 102,15,56,0,223 898 paddd %xmm3,%xmm2 899 pxor %xmm2,%xmm1 900 movdqa %xmm1,%xmm4 901 psrld $25,%xmm1 902 pslld $7,%xmm4 903 por %xmm4,%xmm1 904 pshufd $78,%xmm2,%xmm2 905 pshufd $147,%xmm1,%xmm1 906 pshufd $57,%xmm3,%xmm3 907 decl %edx 908 jnz L012loop1x 909 paddd (%esp),%xmm0 910 paddd 16(%esp),%xmm1 911 paddd 32(%esp),%xmm2 912 paddd 48(%esp),%xmm3 913 cmpl $64,%ecx 914 jb L014tail 915 movdqu (%esi),%xmm4 916 movdqu 16(%esi),%xmm5 917 pxor %xmm4,%xmm0 918 movdqu 32(%esi),%xmm4 919 pxor %xmm5,%xmm1 920 movdqu 48(%esi),%xmm5 921 pxor %xmm4,%xmm2 922 pxor %xmm5,%xmm3 923 leal 64(%esi),%esi 924 movdqu %xmm0,(%edi) 925 movdqu %xmm1,16(%edi) 926 movdqu %xmm2,32(%edi) 927 movdqu %xmm3,48(%edi) 928 leal 64(%edi),%edi 929 subl $64,%ecx 930 jnz L013outer1x 931 jmp L011done 932L014tail: 933 movdqa %xmm0,(%esp) 934 movdqa %xmm1,16(%esp) 935 movdqa %xmm2,32(%esp) 936 movdqa %xmm3,48(%esp) 937 xorl %eax,%eax 938 xorl %edx,%edx 939 xorl %ebp,%ebp 940L015tail_loop: 941 movb (%esp,%ebp,1),%al 942 movb (%esi,%ebp,1),%dl 943 leal 1(%ebp),%ebp 944 xorb %dl,%al 945 movb %al,-1(%edi,%ebp,1) 946 decl %ecx 947 jnz L015tail_loop 948L011done: 949 movl 512(%esp),%esp 950 popl %edi 951 popl %esi 952 popl %ebx 953 popl %ebp 954 ret 955.align 6,0x90 956Lssse3_data: 957.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 958.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 959.long 1634760805,857760878,2036477234,1797285236 960.long 0,1,2,3 961.long 4,4,4,4 962.long 1,0,0,0 963.long 4,0,0,0 964.long 0,-1,-1,-1 965.align 6,0x90 966.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 967.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 968.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 969.byte 114,103,62,0 970.section __IMPORT,__pointers,non_lazy_symbol_pointers 971L_OPENSSL_ia32cap_P$non_lazy_ptr: 972.indirect_symbol _OPENSSL_ia32cap_P 973.long 0 974#endif 975