1#if defined(__i386__) 2.text 3.globl _ChaCha20_ctr32 4.private_extern _ChaCha20_ctr32 5.align 4 6_ChaCha20_ctr32: 7L_ChaCha20_ctr32_begin: 8 pushl %ebp 9 pushl %ebx 10 pushl %esi 11 pushl %edi 12 xorl %eax,%eax 13 cmpl 28(%esp),%eax 14 je L000no_data 15 call Lpic_point 16Lpic_point: 17 popl %eax 18 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp 19 testl $16777216,(%ebp) 20 jz L001x86 21 testl $512,4(%ebp) 22 jz L001x86 23 jmp Lssse3_shortcut 24L001x86: 25 movl 32(%esp),%esi 26 movl 36(%esp),%edi 27 subl $132,%esp 28 movl (%esi),%eax 29 movl 4(%esi),%ebx 30 movl 8(%esi),%ecx 31 movl 12(%esi),%edx 32 movl %eax,80(%esp) 33 movl %ebx,84(%esp) 34 movl %ecx,88(%esp) 35 movl %edx,92(%esp) 36 movl 16(%esi),%eax 37 movl 20(%esi),%ebx 38 movl 24(%esi),%ecx 39 movl 28(%esi),%edx 40 movl %eax,96(%esp) 41 movl %ebx,100(%esp) 42 movl %ecx,104(%esp) 43 movl %edx,108(%esp) 44 movl (%edi),%eax 45 movl 4(%edi),%ebx 46 movl 8(%edi),%ecx 47 movl 12(%edi),%edx 48 subl $1,%eax 49 movl %eax,112(%esp) 50 movl %ebx,116(%esp) 51 movl %ecx,120(%esp) 52 movl %edx,124(%esp) 53 jmp L002entry 54.align 4,0x90 55L003outer_loop: 56 movl %ebx,156(%esp) 57 movl %eax,152(%esp) 58 movl %ecx,160(%esp) 59L002entry: 60 movl $1634760805,%eax 61 movl $857760878,4(%esp) 62 movl $2036477234,8(%esp) 63 movl $1797285236,12(%esp) 64 movl 84(%esp),%ebx 65 movl 88(%esp),%ebp 66 movl 104(%esp),%ecx 67 movl 108(%esp),%esi 68 movl 116(%esp),%edx 69 movl 120(%esp),%edi 70 movl %ebx,20(%esp) 71 movl %ebp,24(%esp) 72 movl %ecx,40(%esp) 73 movl %esi,44(%esp) 74 movl %edx,52(%esp) 75 movl %edi,56(%esp) 76 movl 92(%esp),%ebx 77 movl 124(%esp),%edi 78 movl 112(%esp),%edx 79 movl 80(%esp),%ebp 80 movl 96(%esp),%ecx 81 movl 100(%esp),%esi 82 addl $1,%edx 83 movl %ebx,28(%esp) 84 movl %edi,60(%esp) 85 movl %edx,112(%esp) 86 movl $10,%ebx 87 jmp L004loop 88.align 4,0x90 89L004loop: 90 addl %ebp,%eax 91 movl %ebx,128(%esp) 92 movl %ebp,%ebx 93 xorl %eax,%edx 94 roll $16,%edx 95 addl %edx,%ecx 96 xorl %ecx,%ebx 97 movl 52(%esp),%edi 98 roll $12,%ebx 99 movl 20(%esp),%ebp 100 addl %ebx,%eax 101 xorl %eax,%edx 102 movl %eax,(%esp) 103 roll $8,%edx 104 movl 4(%esp),%eax 105 addl %edx,%ecx 106 movl %edx,48(%esp) 107 xorl %ecx,%ebx 108 addl %ebp,%eax 109 roll $7,%ebx 110 xorl %eax,%edi 111 movl %ecx,32(%esp) 112 roll $16,%edi 113 movl %ebx,16(%esp) 114 addl %edi,%esi 115 movl 40(%esp),%ecx 116 xorl %esi,%ebp 117 movl 56(%esp),%edx 118 roll $12,%ebp 119 movl 24(%esp),%ebx 120 addl %ebp,%eax 121 xorl %eax,%edi 122 movl %eax,4(%esp) 123 roll $8,%edi 124 movl 8(%esp),%eax 125 addl %edi,%esi 126 movl %edi,52(%esp) 127 xorl %esi,%ebp 128 addl %ebx,%eax 129 roll $7,%ebp 130 xorl %eax,%edx 131 movl %esi,36(%esp) 132 roll $16,%edx 133 movl %ebp,20(%esp) 134 addl %edx,%ecx 135 movl 44(%esp),%esi 136 xorl %ecx,%ebx 137 movl 60(%esp),%edi 138 roll $12,%ebx 139 movl 28(%esp),%ebp 140 addl %ebx,%eax 141 xorl %eax,%edx 142 movl %eax,8(%esp) 143 roll $8,%edx 144 movl 12(%esp),%eax 145 addl %edx,%ecx 146 movl %edx,56(%esp) 147 xorl %ecx,%ebx 148 addl %ebp,%eax 149 roll $7,%ebx 150 xorl %eax,%edi 151 roll $16,%edi 152 movl %ebx,24(%esp) 153 addl %edi,%esi 154 xorl %esi,%ebp 155 roll $12,%ebp 156 movl 20(%esp),%ebx 157 addl %ebp,%eax 158 xorl %eax,%edi 159 movl %eax,12(%esp) 160 roll $8,%edi 161 movl (%esp),%eax 162 addl %edi,%esi 163 movl %edi,%edx 164 xorl %esi,%ebp 165 addl %ebx,%eax 166 roll $7,%ebp 167 xorl %eax,%edx 168 roll $16,%edx 169 movl %ebp,28(%esp) 170 addl %edx,%ecx 171 xorl %ecx,%ebx 172 movl 48(%esp),%edi 173 roll $12,%ebx 174 movl 24(%esp),%ebp 175 addl %ebx,%eax 176 xorl %eax,%edx 177 movl %eax,(%esp) 178 roll $8,%edx 179 movl 4(%esp),%eax 180 addl %edx,%ecx 181 movl %edx,60(%esp) 182 xorl %ecx,%ebx 183 addl %ebp,%eax 184 roll $7,%ebx 185 xorl %eax,%edi 186 movl %ecx,40(%esp) 187 roll $16,%edi 188 movl %ebx,20(%esp) 189 addl %edi,%esi 190 movl 32(%esp),%ecx 191 xorl %esi,%ebp 192 movl 52(%esp),%edx 193 roll $12,%ebp 194 movl 28(%esp),%ebx 195 addl %ebp,%eax 196 xorl %eax,%edi 197 movl %eax,4(%esp) 198 roll $8,%edi 199 movl 8(%esp),%eax 200 addl %edi,%esi 201 movl %edi,48(%esp) 202 xorl %esi,%ebp 203 addl %ebx,%eax 204 roll $7,%ebp 205 xorl %eax,%edx 206 movl %esi,44(%esp) 207 roll $16,%edx 208 movl %ebp,24(%esp) 209 addl %edx,%ecx 210 movl 36(%esp),%esi 211 xorl %ecx,%ebx 212 movl 56(%esp),%edi 213 roll $12,%ebx 214 movl 16(%esp),%ebp 215 addl %ebx,%eax 216 xorl %eax,%edx 217 movl %eax,8(%esp) 218 roll $8,%edx 219 movl 12(%esp),%eax 220 addl %edx,%ecx 221 movl %edx,52(%esp) 222 xorl %ecx,%ebx 223 addl %ebp,%eax 224 roll $7,%ebx 225 xorl %eax,%edi 226 roll $16,%edi 227 movl %ebx,28(%esp) 228 addl %edi,%esi 229 xorl %esi,%ebp 230 movl 48(%esp),%edx 231 roll $12,%ebp 232 movl 128(%esp),%ebx 233 addl %ebp,%eax 234 xorl %eax,%edi 235 movl %eax,12(%esp) 236 roll $8,%edi 237 movl (%esp),%eax 238 addl %edi,%esi 239 movl %edi,56(%esp) 240 xorl %esi,%ebp 241 roll $7,%ebp 242 decl %ebx 243 jnz L004loop 244 movl 160(%esp),%ebx 245 addl $1634760805,%eax 246 addl 80(%esp),%ebp 247 addl 96(%esp),%ecx 248 addl 100(%esp),%esi 249 cmpl $64,%ebx 250 jb L005tail 251 movl 156(%esp),%ebx 252 addl 112(%esp),%edx 253 addl 120(%esp),%edi 254 xorl (%ebx),%eax 255 xorl 16(%ebx),%ebp 256 movl %eax,(%esp) 257 movl 152(%esp),%eax 258 xorl 32(%ebx),%ecx 259 xorl 36(%ebx),%esi 260 xorl 48(%ebx),%edx 261 xorl 56(%ebx),%edi 262 movl %ebp,16(%eax) 263 movl %ecx,32(%eax) 264 movl %esi,36(%eax) 265 movl %edx,48(%eax) 266 movl %edi,56(%eax) 267 movl 4(%esp),%ebp 268 movl 8(%esp),%ecx 269 movl 12(%esp),%esi 270 movl 20(%esp),%edx 271 movl 24(%esp),%edi 272 addl $857760878,%ebp 273 addl $2036477234,%ecx 274 addl $1797285236,%esi 275 addl 84(%esp),%edx 276 addl 88(%esp),%edi 277 xorl 4(%ebx),%ebp 278 xorl 8(%ebx),%ecx 279 xorl 12(%ebx),%esi 280 xorl 20(%ebx),%edx 281 xorl 24(%ebx),%edi 282 movl %ebp,4(%eax) 283 movl %ecx,8(%eax) 284 movl %esi,12(%eax) 285 movl %edx,20(%eax) 286 movl %edi,24(%eax) 287 movl 28(%esp),%ebp 288 movl 40(%esp),%ecx 289 movl 44(%esp),%esi 290 movl 52(%esp),%edx 291 movl 60(%esp),%edi 292 addl 92(%esp),%ebp 293 addl 104(%esp),%ecx 294 addl 108(%esp),%esi 295 addl 116(%esp),%edx 296 addl 124(%esp),%edi 297 xorl 28(%ebx),%ebp 298 xorl 40(%ebx),%ecx 299 xorl 44(%ebx),%esi 300 xorl 52(%ebx),%edx 301 xorl 60(%ebx),%edi 302 leal 64(%ebx),%ebx 303 movl %ebp,28(%eax) 304 movl (%esp),%ebp 305 movl %ecx,40(%eax) 306 movl 160(%esp),%ecx 307 movl %esi,44(%eax) 308 movl %edx,52(%eax) 309 movl %edi,60(%eax) 310 movl %ebp,(%eax) 311 leal 64(%eax),%eax 312 subl $64,%ecx 313 jnz L003outer_loop 314 jmp L006done 315L005tail: 316 addl 112(%esp),%edx 317 addl 120(%esp),%edi 318 movl %eax,(%esp) 319 movl %ebp,16(%esp) 320 movl %ecx,32(%esp) 321 movl %esi,36(%esp) 322 movl %edx,48(%esp) 323 movl %edi,56(%esp) 324 movl 4(%esp),%ebp 325 movl 8(%esp),%ecx 326 movl 12(%esp),%esi 327 movl 20(%esp),%edx 328 movl 24(%esp),%edi 329 addl $857760878,%ebp 330 addl $2036477234,%ecx 331 addl $1797285236,%esi 332 addl 84(%esp),%edx 333 addl 88(%esp),%edi 334 movl %ebp,4(%esp) 335 movl %ecx,8(%esp) 336 movl %esi,12(%esp) 337 movl %edx,20(%esp) 338 movl %edi,24(%esp) 339 movl 28(%esp),%ebp 340 movl 40(%esp),%ecx 341 movl 44(%esp),%esi 342 movl 52(%esp),%edx 343 movl 60(%esp),%edi 344 addl 92(%esp),%ebp 345 addl 104(%esp),%ecx 346 addl 108(%esp),%esi 347 addl 116(%esp),%edx 348 addl 124(%esp),%edi 349 movl %ebp,28(%esp) 350 movl 156(%esp),%ebp 351 movl %ecx,40(%esp) 352 movl 152(%esp),%ecx 353 movl %esi,44(%esp) 354 xorl %esi,%esi 355 movl %edx,52(%esp) 356 movl %edi,60(%esp) 357 xorl %eax,%eax 358 xorl %edx,%edx 359L007tail_loop: 360 movb (%esi,%ebp,1),%al 361 movb (%esp,%esi,1),%dl 362 leal 1(%esi),%esi 363 xorb %dl,%al 364 movb %al,-1(%ecx,%esi,1) 365 decl %ebx 366 jnz L007tail_loop 367L006done: 368 addl $132,%esp 369L000no_data: 370 popl %edi 371 popl %esi 372 popl %ebx 373 popl %ebp 374 ret 375.globl _ChaCha20_ssse3 376.private_extern _ChaCha20_ssse3 377.align 4 378_ChaCha20_ssse3: 379L_ChaCha20_ssse3_begin: 380 pushl %ebp 381 pushl %ebx 382 pushl %esi 383 pushl %edi 384Lssse3_shortcut: 385 movl 20(%esp),%edi 386 movl 24(%esp),%esi 387 movl 28(%esp),%ecx 388 movl 32(%esp),%edx 389 movl 36(%esp),%ebx 390 movl %esp,%ebp 391 subl $524,%esp 392 andl $-64,%esp 393 movl %ebp,512(%esp) 394 leal Lssse3_data-Lpic_point(%eax),%eax 395 movdqu (%ebx),%xmm3 396 cmpl $256,%ecx 397 jb L0081x 398 movl %edx,516(%esp) 399 movl %ebx,520(%esp) 400 subl $256,%ecx 401 leal 384(%esp),%ebp 402 movdqu (%edx),%xmm7 403 pshufd $0,%xmm3,%xmm0 404 pshufd $85,%xmm3,%xmm1 405 pshufd $170,%xmm3,%xmm2 406 pshufd $255,%xmm3,%xmm3 407 paddd 48(%eax),%xmm0 408 pshufd $0,%xmm7,%xmm4 409 pshufd $85,%xmm7,%xmm5 410 psubd 64(%eax),%xmm0 411 pshufd $170,%xmm7,%xmm6 412 pshufd $255,%xmm7,%xmm7 413 movdqa %xmm0,64(%ebp) 414 movdqa %xmm1,80(%ebp) 415 movdqa %xmm2,96(%ebp) 416 movdqa %xmm3,112(%ebp) 417 movdqu 16(%edx),%xmm3 418 movdqa %xmm4,-64(%ebp) 419 movdqa %xmm5,-48(%ebp) 420 movdqa %xmm6,-32(%ebp) 421 movdqa %xmm7,-16(%ebp) 422 movdqa 32(%eax),%xmm7 423 leal 128(%esp),%ebx 424 pshufd $0,%xmm3,%xmm0 425 pshufd $85,%xmm3,%xmm1 426 pshufd $170,%xmm3,%xmm2 427 pshufd $255,%xmm3,%xmm3 428 pshufd $0,%xmm7,%xmm4 429 pshufd $85,%xmm7,%xmm5 430 pshufd $170,%xmm7,%xmm6 431 pshufd $255,%xmm7,%xmm7 432 movdqa %xmm0,(%ebp) 433 movdqa %xmm1,16(%ebp) 434 movdqa %xmm2,32(%ebp) 435 movdqa %xmm3,48(%ebp) 436 movdqa %xmm4,-128(%ebp) 437 movdqa %xmm5,-112(%ebp) 438 movdqa %xmm6,-96(%ebp) 439 movdqa %xmm7,-80(%ebp) 440 leal 128(%esi),%esi 441 leal 128(%edi),%edi 442 jmp L009outer_loop 443.align 4,0x90 444L009outer_loop: 445 movdqa -112(%ebp),%xmm1 446 movdqa -96(%ebp),%xmm2 447 movdqa -80(%ebp),%xmm3 448 movdqa -48(%ebp),%xmm5 449 movdqa -32(%ebp),%xmm6 450 movdqa -16(%ebp),%xmm7 451 movdqa %xmm1,-112(%ebx) 452 movdqa %xmm2,-96(%ebx) 453 movdqa %xmm3,-80(%ebx) 454 movdqa %xmm5,-48(%ebx) 455 movdqa %xmm6,-32(%ebx) 456 movdqa %xmm7,-16(%ebx) 457 movdqa 32(%ebp),%xmm2 458 movdqa 48(%ebp),%xmm3 459 movdqa 64(%ebp),%xmm4 460 movdqa 80(%ebp),%xmm5 461 movdqa 96(%ebp),%xmm6 462 movdqa 112(%ebp),%xmm7 463 paddd 64(%eax),%xmm4 464 movdqa %xmm2,32(%ebx) 465 movdqa %xmm3,48(%ebx) 466 movdqa %xmm4,64(%ebx) 467 movdqa %xmm5,80(%ebx) 468 movdqa %xmm6,96(%ebx) 469 movdqa %xmm7,112(%ebx) 470 movdqa %xmm4,64(%ebp) 471 movdqa -128(%ebp),%xmm0 472 movdqa %xmm4,%xmm6 473 movdqa -64(%ebp),%xmm3 474 movdqa (%ebp),%xmm4 475 movdqa 16(%ebp),%xmm5 476 movl $10,%edx 477 nop 478.align 4,0x90 479L010loop: 480 paddd %xmm3,%xmm0 481 movdqa %xmm3,%xmm2 482 pxor %xmm0,%xmm6 483 pshufb (%eax),%xmm6 484 paddd %xmm6,%xmm4 485 pxor %xmm4,%xmm2 486 movdqa -48(%ebx),%xmm3 487 movdqa %xmm2,%xmm1 488 pslld $12,%xmm2 489 psrld $20,%xmm1 490 por %xmm1,%xmm2 491 movdqa -112(%ebx),%xmm1 492 paddd %xmm2,%xmm0 493 movdqa 80(%ebx),%xmm7 494 pxor %xmm0,%xmm6 495 movdqa %xmm0,-128(%ebx) 496 pshufb 16(%eax),%xmm6 497 paddd %xmm6,%xmm4 498 movdqa %xmm6,64(%ebx) 499 pxor %xmm4,%xmm2 500 paddd %xmm3,%xmm1 501 movdqa %xmm2,%xmm0 502 pslld $7,%xmm2 503 psrld $25,%xmm0 504 pxor %xmm1,%xmm7 505 por %xmm0,%xmm2 506 movdqa %xmm4,(%ebx) 507 pshufb (%eax),%xmm7 508 movdqa %xmm2,-64(%ebx) 509 paddd %xmm7,%xmm5 510 movdqa 32(%ebx),%xmm4 511 pxor %xmm5,%xmm3 512 movdqa -32(%ebx),%xmm2 513 movdqa %xmm3,%xmm0 514 pslld $12,%xmm3 515 psrld $20,%xmm0 516 por %xmm0,%xmm3 517 movdqa -96(%ebx),%xmm0 518 paddd %xmm3,%xmm1 519 movdqa 96(%ebx),%xmm6 520 pxor %xmm1,%xmm7 521 movdqa %xmm1,-112(%ebx) 522 pshufb 16(%eax),%xmm7 523 paddd %xmm7,%xmm5 524 movdqa %xmm7,80(%ebx) 525 pxor %xmm5,%xmm3 526 paddd %xmm2,%xmm0 527 movdqa %xmm3,%xmm1 528 pslld $7,%xmm3 529 psrld $25,%xmm1 530 pxor %xmm0,%xmm6 531 por %xmm1,%xmm3 532 movdqa %xmm5,16(%ebx) 533 pshufb (%eax),%xmm6 534 movdqa %xmm3,-48(%ebx) 535 paddd %xmm6,%xmm4 536 movdqa 48(%ebx),%xmm5 537 pxor %xmm4,%xmm2 538 movdqa -16(%ebx),%xmm3 539 movdqa %xmm2,%xmm1 540 pslld $12,%xmm2 541 psrld $20,%xmm1 542 por %xmm1,%xmm2 543 movdqa -80(%ebx),%xmm1 544 paddd %xmm2,%xmm0 545 movdqa 112(%ebx),%xmm7 546 pxor %xmm0,%xmm6 547 movdqa %xmm0,-96(%ebx) 548 pshufb 16(%eax),%xmm6 549 paddd %xmm6,%xmm4 550 movdqa %xmm6,96(%ebx) 551 pxor %xmm4,%xmm2 552 paddd %xmm3,%xmm1 553 movdqa %xmm2,%xmm0 554 pslld $7,%xmm2 555 psrld $25,%xmm0 556 pxor %xmm1,%xmm7 557 por %xmm0,%xmm2 558 pshufb (%eax),%xmm7 559 movdqa %xmm2,-32(%ebx) 560 paddd %xmm7,%xmm5 561 pxor %xmm5,%xmm3 562 movdqa -48(%ebx),%xmm2 563 movdqa %xmm3,%xmm0 564 pslld $12,%xmm3 565 psrld $20,%xmm0 566 por %xmm0,%xmm3 567 movdqa -128(%ebx),%xmm0 568 paddd %xmm3,%xmm1 569 pxor %xmm1,%xmm7 570 movdqa %xmm1,-80(%ebx) 571 pshufb 16(%eax),%xmm7 572 paddd %xmm7,%xmm5 573 movdqa %xmm7,%xmm6 574 pxor %xmm5,%xmm3 575 paddd %xmm2,%xmm0 576 movdqa %xmm3,%xmm1 577 pslld $7,%xmm3 578 psrld $25,%xmm1 579 pxor %xmm0,%xmm6 580 por %xmm1,%xmm3 581 pshufb (%eax),%xmm6 582 movdqa %xmm3,-16(%ebx) 583 paddd %xmm6,%xmm4 584 pxor %xmm4,%xmm2 585 movdqa -32(%ebx),%xmm3 586 movdqa %xmm2,%xmm1 587 pslld $12,%xmm2 588 psrld $20,%xmm1 589 por %xmm1,%xmm2 590 movdqa -112(%ebx),%xmm1 591 paddd %xmm2,%xmm0 592 movdqa 64(%ebx),%xmm7 593 pxor %xmm0,%xmm6 594 movdqa %xmm0,-128(%ebx) 595 pshufb 16(%eax),%xmm6 596 paddd %xmm6,%xmm4 597 movdqa %xmm6,112(%ebx) 598 pxor %xmm4,%xmm2 599 paddd %xmm3,%xmm1 600 movdqa %xmm2,%xmm0 601 pslld $7,%xmm2 602 psrld $25,%xmm0 603 pxor %xmm1,%xmm7 604 por %xmm0,%xmm2 605 movdqa %xmm4,32(%ebx) 606 pshufb (%eax),%xmm7 607 movdqa %xmm2,-48(%ebx) 608 paddd %xmm7,%xmm5 609 movdqa (%ebx),%xmm4 610 pxor %xmm5,%xmm3 611 movdqa -16(%ebx),%xmm2 612 movdqa %xmm3,%xmm0 613 pslld $12,%xmm3 614 psrld $20,%xmm0 615 por %xmm0,%xmm3 616 movdqa -96(%ebx),%xmm0 617 paddd %xmm3,%xmm1 618 movdqa 80(%ebx),%xmm6 619 pxor %xmm1,%xmm7 620 movdqa %xmm1,-112(%ebx) 621 pshufb 16(%eax),%xmm7 622 paddd %xmm7,%xmm5 623 movdqa %xmm7,64(%ebx) 624 pxor %xmm5,%xmm3 625 paddd %xmm2,%xmm0 626 movdqa %xmm3,%xmm1 627 pslld $7,%xmm3 628 psrld $25,%xmm1 629 pxor %xmm0,%xmm6 630 por %xmm1,%xmm3 631 movdqa %xmm5,48(%ebx) 632 pshufb (%eax),%xmm6 633 movdqa %xmm3,-32(%ebx) 634 paddd %xmm6,%xmm4 635 movdqa 16(%ebx),%xmm5 636 pxor %xmm4,%xmm2 637 movdqa -64(%ebx),%xmm3 638 movdqa %xmm2,%xmm1 639 pslld $12,%xmm2 640 psrld $20,%xmm1 641 por %xmm1,%xmm2 642 movdqa -80(%ebx),%xmm1 643 paddd %xmm2,%xmm0 644 movdqa 96(%ebx),%xmm7 645 pxor %xmm0,%xmm6 646 movdqa %xmm0,-96(%ebx) 647 pshufb 16(%eax),%xmm6 648 paddd %xmm6,%xmm4 649 movdqa %xmm6,80(%ebx) 650 pxor %xmm4,%xmm2 651 paddd %xmm3,%xmm1 652 movdqa %xmm2,%xmm0 653 pslld $7,%xmm2 654 psrld $25,%xmm0 655 pxor %xmm1,%xmm7 656 por %xmm0,%xmm2 657 pshufb (%eax),%xmm7 658 movdqa %xmm2,-16(%ebx) 659 paddd %xmm7,%xmm5 660 pxor %xmm5,%xmm3 661 movdqa %xmm3,%xmm0 662 pslld $12,%xmm3 663 psrld $20,%xmm0 664 por %xmm0,%xmm3 665 movdqa -128(%ebx),%xmm0 666 paddd %xmm3,%xmm1 667 movdqa 64(%ebx),%xmm6 668 pxor %xmm1,%xmm7 669 movdqa %xmm1,-80(%ebx) 670 pshufb 16(%eax),%xmm7 671 paddd %xmm7,%xmm5 672 movdqa %xmm7,96(%ebx) 673 pxor %xmm5,%xmm3 674 movdqa %xmm3,%xmm1 675 pslld $7,%xmm3 676 psrld $25,%xmm1 677 por %xmm1,%xmm3 678 decl %edx 679 jnz L010loop 680 movdqa %xmm3,-64(%ebx) 681 movdqa %xmm4,(%ebx) 682 movdqa %xmm5,16(%ebx) 683 movdqa %xmm6,64(%ebx) 684 movdqa %xmm7,96(%ebx) 685 movdqa -112(%ebx),%xmm1 686 movdqa -96(%ebx),%xmm2 687 movdqa -80(%ebx),%xmm3 688 paddd -128(%ebp),%xmm0 689 paddd -112(%ebp),%xmm1 690 paddd -96(%ebp),%xmm2 691 paddd -80(%ebp),%xmm3 692 movdqa %xmm0,%xmm6 693 punpckldq %xmm1,%xmm0 694 movdqa %xmm2,%xmm7 695 punpckldq %xmm3,%xmm2 696 punpckhdq %xmm1,%xmm6 697 punpckhdq %xmm3,%xmm7 698 movdqa %xmm0,%xmm1 699 punpcklqdq %xmm2,%xmm0 700 movdqa %xmm6,%xmm3 701 punpcklqdq %xmm7,%xmm6 702 punpckhqdq %xmm2,%xmm1 703 punpckhqdq %xmm7,%xmm3 704 movdqu -128(%esi),%xmm4 705 movdqu -64(%esi),%xmm5 706 movdqu (%esi),%xmm2 707 movdqu 64(%esi),%xmm7 708 leal 16(%esi),%esi 709 pxor %xmm0,%xmm4 710 movdqa -64(%ebx),%xmm0 711 pxor %xmm1,%xmm5 712 movdqa -48(%ebx),%xmm1 713 pxor %xmm2,%xmm6 714 movdqa -32(%ebx),%xmm2 715 pxor %xmm3,%xmm7 716 movdqa -16(%ebx),%xmm3 717 movdqu %xmm4,-128(%edi) 718 movdqu %xmm5,-64(%edi) 719 movdqu %xmm6,(%edi) 720 movdqu %xmm7,64(%edi) 721 leal 16(%edi),%edi 722 paddd -64(%ebp),%xmm0 723 paddd -48(%ebp),%xmm1 724 paddd -32(%ebp),%xmm2 725 paddd -16(%ebp),%xmm3 726 movdqa %xmm0,%xmm6 727 punpckldq %xmm1,%xmm0 728 movdqa %xmm2,%xmm7 729 punpckldq %xmm3,%xmm2 730 punpckhdq %xmm1,%xmm6 731 punpckhdq %xmm3,%xmm7 732 movdqa %xmm0,%xmm1 733 punpcklqdq %xmm2,%xmm0 734 movdqa %xmm6,%xmm3 735 punpcklqdq %xmm7,%xmm6 736 punpckhqdq %xmm2,%xmm1 737 punpckhqdq %xmm7,%xmm3 738 movdqu -128(%esi),%xmm4 739 movdqu -64(%esi),%xmm5 740 movdqu (%esi),%xmm2 741 movdqu 64(%esi),%xmm7 742 leal 16(%esi),%esi 743 pxor %xmm0,%xmm4 744 movdqa (%ebx),%xmm0 745 pxor %xmm1,%xmm5 746 movdqa 16(%ebx),%xmm1 747 pxor %xmm2,%xmm6 748 movdqa 32(%ebx),%xmm2 749 pxor %xmm3,%xmm7 750 movdqa 48(%ebx),%xmm3 751 movdqu %xmm4,-128(%edi) 752 movdqu %xmm5,-64(%edi) 753 movdqu %xmm6,(%edi) 754 movdqu %xmm7,64(%edi) 755 leal 16(%edi),%edi 756 paddd (%ebp),%xmm0 757 paddd 16(%ebp),%xmm1 758 paddd 32(%ebp),%xmm2 759 paddd 48(%ebp),%xmm3 760 movdqa %xmm0,%xmm6 761 punpckldq %xmm1,%xmm0 762 movdqa %xmm2,%xmm7 763 punpckldq %xmm3,%xmm2 764 punpckhdq %xmm1,%xmm6 765 punpckhdq %xmm3,%xmm7 766 movdqa %xmm0,%xmm1 767 punpcklqdq %xmm2,%xmm0 768 movdqa %xmm6,%xmm3 769 punpcklqdq %xmm7,%xmm6 770 punpckhqdq %xmm2,%xmm1 771 punpckhqdq %xmm7,%xmm3 772 movdqu -128(%esi),%xmm4 773 movdqu -64(%esi),%xmm5 774 movdqu (%esi),%xmm2 775 movdqu 64(%esi),%xmm7 776 leal 16(%esi),%esi 777 pxor %xmm0,%xmm4 778 movdqa 64(%ebx),%xmm0 779 pxor %xmm1,%xmm5 780 movdqa 80(%ebx),%xmm1 781 pxor %xmm2,%xmm6 782 movdqa 96(%ebx),%xmm2 783 pxor %xmm3,%xmm7 784 movdqa 112(%ebx),%xmm3 785 movdqu %xmm4,-128(%edi) 786 movdqu %xmm5,-64(%edi) 787 movdqu %xmm6,(%edi) 788 movdqu %xmm7,64(%edi) 789 leal 16(%edi),%edi 790 paddd 64(%ebp),%xmm0 791 paddd 80(%ebp),%xmm1 792 paddd 96(%ebp),%xmm2 793 paddd 112(%ebp),%xmm3 794 movdqa %xmm0,%xmm6 795 punpckldq %xmm1,%xmm0 796 movdqa %xmm2,%xmm7 797 punpckldq %xmm3,%xmm2 798 punpckhdq %xmm1,%xmm6 799 punpckhdq %xmm3,%xmm7 800 movdqa %xmm0,%xmm1 801 punpcklqdq %xmm2,%xmm0 802 movdqa %xmm6,%xmm3 803 punpcklqdq %xmm7,%xmm6 804 punpckhqdq %xmm2,%xmm1 805 punpckhqdq %xmm7,%xmm3 806 movdqu -128(%esi),%xmm4 807 movdqu -64(%esi),%xmm5 808 movdqu (%esi),%xmm2 809 movdqu 64(%esi),%xmm7 810 leal 208(%esi),%esi 811 pxor %xmm0,%xmm4 812 pxor %xmm1,%xmm5 813 pxor %xmm2,%xmm6 814 pxor %xmm3,%xmm7 815 movdqu %xmm4,-128(%edi) 816 movdqu %xmm5,-64(%edi) 817 movdqu %xmm6,(%edi) 818 movdqu %xmm7,64(%edi) 819 leal 208(%edi),%edi 820 subl $256,%ecx 821 jnc L009outer_loop 822 addl $256,%ecx 823 jz L011done 824 movl 520(%esp),%ebx 825 leal -128(%esi),%esi 826 movl 516(%esp),%edx 827 leal -128(%edi),%edi 828 movd 64(%ebp),%xmm2 829 movdqu (%ebx),%xmm3 830 paddd 96(%eax),%xmm2 831 pand 112(%eax),%xmm3 832 por %xmm2,%xmm3 833L0081x: 834 movdqa 32(%eax),%xmm0 835 movdqu (%edx),%xmm1 836 movdqu 16(%edx),%xmm2 837 movdqa (%eax),%xmm6 838 movdqa 16(%eax),%xmm7 839 movl %ebp,48(%esp) 840 movdqa %xmm0,(%esp) 841 movdqa %xmm1,16(%esp) 842 movdqa %xmm2,32(%esp) 843 movdqa %xmm3,48(%esp) 844 movl $10,%edx 845 jmp L012loop1x 846.align 4,0x90 847L013outer1x: 848 movdqa 80(%eax),%xmm3 849 movdqa (%esp),%xmm0 850 movdqa 16(%esp),%xmm1 851 movdqa 32(%esp),%xmm2 852 paddd 48(%esp),%xmm3 853 movl $10,%edx 854 movdqa %xmm3,48(%esp) 855 jmp L012loop1x 856.align 4,0x90 857L012loop1x: 858 paddd %xmm1,%xmm0 859 pxor %xmm0,%xmm3 860.byte 102,15,56,0,222 861 paddd %xmm3,%xmm2 862 pxor %xmm2,%xmm1 863 movdqa %xmm1,%xmm4 864 psrld $20,%xmm1 865 pslld $12,%xmm4 866 por %xmm4,%xmm1 867 paddd %xmm1,%xmm0 868 pxor %xmm0,%xmm3 869.byte 102,15,56,0,223 870 paddd %xmm3,%xmm2 871 pxor %xmm2,%xmm1 872 movdqa %xmm1,%xmm4 873 psrld $25,%xmm1 874 pslld $7,%xmm4 875 por %xmm4,%xmm1 876 pshufd $78,%xmm2,%xmm2 877 pshufd $57,%xmm1,%xmm1 878 pshufd $147,%xmm3,%xmm3 879 nop 880 paddd %xmm1,%xmm0 881 pxor %xmm0,%xmm3 882.byte 102,15,56,0,222 883 paddd %xmm3,%xmm2 884 pxor %xmm2,%xmm1 885 movdqa %xmm1,%xmm4 886 psrld $20,%xmm1 887 pslld $12,%xmm4 888 por %xmm4,%xmm1 889 paddd %xmm1,%xmm0 890 pxor %xmm0,%xmm3 891.byte 102,15,56,0,223 892 paddd %xmm3,%xmm2 893 pxor %xmm2,%xmm1 894 movdqa %xmm1,%xmm4 895 psrld $25,%xmm1 896 pslld $7,%xmm4 897 por %xmm4,%xmm1 898 pshufd $78,%xmm2,%xmm2 899 pshufd $147,%xmm1,%xmm1 900 pshufd $57,%xmm3,%xmm3 901 decl %edx 902 jnz L012loop1x 903 paddd (%esp),%xmm0 904 paddd 16(%esp),%xmm1 905 paddd 32(%esp),%xmm2 906 paddd 48(%esp),%xmm3 907 cmpl $64,%ecx 908 jb L014tail 909 movdqu (%esi),%xmm4 910 movdqu 16(%esi),%xmm5 911 pxor %xmm4,%xmm0 912 movdqu 32(%esi),%xmm4 913 pxor %xmm5,%xmm1 914 movdqu 48(%esi),%xmm5 915 pxor %xmm4,%xmm2 916 pxor %xmm5,%xmm3 917 leal 64(%esi),%esi 918 movdqu %xmm0,(%edi) 919 movdqu %xmm1,16(%edi) 920 movdqu %xmm2,32(%edi) 921 movdqu %xmm3,48(%edi) 922 leal 64(%edi),%edi 923 subl $64,%ecx 924 jnz L013outer1x 925 jmp L011done 926L014tail: 927 movdqa %xmm0,(%esp) 928 movdqa %xmm1,16(%esp) 929 movdqa %xmm2,32(%esp) 930 movdqa %xmm3,48(%esp) 931 xorl %eax,%eax 932 xorl %edx,%edx 933 xorl %ebp,%ebp 934L015tail_loop: 935 movb (%esp,%ebp,1),%al 936 movb (%esi,%ebp,1),%dl 937 leal 1(%ebp),%ebp 938 xorb %dl,%al 939 movb %al,-1(%edi,%ebp,1) 940 decl %ecx 941 jnz L015tail_loop 942L011done: 943 movl 512(%esp),%esp 944 popl %edi 945 popl %esi 946 popl %ebx 947 popl %ebp 948 ret 949.align 6,0x90 950Lssse3_data: 951.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 952.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 953.long 1634760805,857760878,2036477234,1797285236 954.long 0,1,2,3 955.long 4,4,4,4 956.long 1,0,0,0 957.long 4,0,0,0 958.long 0,-1,-1,-1 959.align 6,0x90 960.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 961.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 962.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 963.byte 114,103,62,0 964.section __IMPORT,__pointers,non_lazy_symbol_pointers 965L_OPENSSL_ia32cap_P$non_lazy_ptr: 966.indirect_symbol _OPENSSL_ia32cap_P 967.long 0 968#endif 969