1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__i386__) 5#if defined(BORINGSSL_PREFIX) 6#include <boringssl_prefix_symbols_asm.h> 7#endif 8.text 9.globl ChaCha20_ctr32 10.hidden ChaCha20_ctr32 11.type ChaCha20_ctr32,@function 12.align 16 13ChaCha20_ctr32: 14.L_ChaCha20_ctr32_begin: 15 pushl %ebp 16 pushl %ebx 17 pushl %esi 18 pushl %edi 19 xorl %eax,%eax 20 cmpl 28(%esp),%eax 21 je .L000no_data 22 call .Lpic_point 23.Lpic_point: 24 popl %eax 25 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 26 testl $16777216,(%ebp) 27 jz .L001x86 28 testl $512,4(%ebp) 29 jz .L001x86 30 jmp .Lssse3_shortcut 31.L001x86: 32 movl 32(%esp),%esi 33 movl 36(%esp),%edi 34 subl $132,%esp 35 movl (%esi),%eax 36 movl 4(%esi),%ebx 37 movl 8(%esi),%ecx 38 movl 12(%esi),%edx 39 movl %eax,80(%esp) 40 movl %ebx,84(%esp) 41 movl %ecx,88(%esp) 42 movl %edx,92(%esp) 43 movl 16(%esi),%eax 44 movl 20(%esi),%ebx 45 movl 24(%esi),%ecx 46 movl 28(%esi),%edx 47 movl %eax,96(%esp) 48 movl %ebx,100(%esp) 49 movl %ecx,104(%esp) 50 movl %edx,108(%esp) 51 movl (%edi),%eax 52 movl 4(%edi),%ebx 53 movl 8(%edi),%ecx 54 movl 12(%edi),%edx 55 subl $1,%eax 56 movl %eax,112(%esp) 57 movl %ebx,116(%esp) 58 movl %ecx,120(%esp) 59 movl %edx,124(%esp) 60 jmp .L002entry 61.align 16 62.L003outer_loop: 63 movl %ebx,156(%esp) 64 movl %eax,152(%esp) 65 movl %ecx,160(%esp) 66.L002entry: 67 movl $1634760805,%eax 68 movl $857760878,4(%esp) 69 movl $2036477234,8(%esp) 70 movl $1797285236,12(%esp) 71 movl 84(%esp),%ebx 72 movl 88(%esp),%ebp 73 movl 104(%esp),%ecx 74 movl 108(%esp),%esi 75 movl 116(%esp),%edx 76 movl 120(%esp),%edi 77 movl %ebx,20(%esp) 78 movl %ebp,24(%esp) 79 movl %ecx,40(%esp) 80 movl %esi,44(%esp) 81 movl %edx,52(%esp) 82 movl %edi,56(%esp) 83 movl 92(%esp),%ebx 84 movl 124(%esp),%edi 85 movl 112(%esp),%edx 86 movl 80(%esp),%ebp 87 movl 96(%esp),%ecx 88 movl 100(%esp),%esi 89 addl $1,%edx 90 movl %ebx,28(%esp) 91 movl %edi,60(%esp) 92 movl %edx,112(%esp) 93 movl $10,%ebx 94 jmp .L004loop 95.align 16 96.L004loop: 97 addl %ebp,%eax 98 movl %ebx,128(%esp) 99 movl %ebp,%ebx 100 xorl %eax,%edx 101 roll $16,%edx 102 addl %edx,%ecx 103 xorl %ecx,%ebx 104 movl 52(%esp),%edi 105 roll $12,%ebx 106 movl 20(%esp),%ebp 107 addl %ebx,%eax 108 xorl %eax,%edx 109 movl %eax,(%esp) 110 roll $8,%edx 111 movl 4(%esp),%eax 112 addl %edx,%ecx 113 movl %edx,48(%esp) 114 xorl %ecx,%ebx 115 addl %ebp,%eax 116 roll $7,%ebx 117 xorl %eax,%edi 118 movl %ecx,32(%esp) 119 roll $16,%edi 120 movl %ebx,16(%esp) 121 addl %edi,%esi 122 movl 40(%esp),%ecx 123 xorl %esi,%ebp 124 movl 56(%esp),%edx 125 roll $12,%ebp 126 movl 24(%esp),%ebx 127 addl %ebp,%eax 128 xorl %eax,%edi 129 movl %eax,4(%esp) 130 roll $8,%edi 131 movl 8(%esp),%eax 132 addl %edi,%esi 133 movl %edi,52(%esp) 134 xorl %esi,%ebp 135 addl %ebx,%eax 136 roll $7,%ebp 137 xorl %eax,%edx 138 movl %esi,36(%esp) 139 roll $16,%edx 140 movl %ebp,20(%esp) 141 addl %edx,%ecx 142 movl 44(%esp),%esi 143 xorl %ecx,%ebx 144 movl 60(%esp),%edi 145 roll $12,%ebx 146 movl 28(%esp),%ebp 147 addl %ebx,%eax 148 xorl %eax,%edx 149 movl %eax,8(%esp) 150 roll $8,%edx 151 movl 12(%esp),%eax 152 addl %edx,%ecx 153 movl %edx,56(%esp) 154 xorl %ecx,%ebx 155 addl %ebp,%eax 156 roll $7,%ebx 157 xorl %eax,%edi 158 roll $16,%edi 159 movl %ebx,24(%esp) 160 addl %edi,%esi 161 xorl %esi,%ebp 162 roll $12,%ebp 163 movl 20(%esp),%ebx 164 addl %ebp,%eax 165 xorl %eax,%edi 166 movl %eax,12(%esp) 167 roll $8,%edi 168 movl (%esp),%eax 169 addl %edi,%esi 170 movl %edi,%edx 171 xorl %esi,%ebp 172 addl %ebx,%eax 173 roll $7,%ebp 174 xorl %eax,%edx 175 roll $16,%edx 176 movl %ebp,28(%esp) 177 addl %edx,%ecx 178 xorl %ecx,%ebx 179 movl 48(%esp),%edi 180 roll $12,%ebx 181 movl 24(%esp),%ebp 182 addl %ebx,%eax 183 xorl %eax,%edx 184 movl %eax,(%esp) 185 roll $8,%edx 186 movl 4(%esp),%eax 187 addl %edx,%ecx 188 movl %edx,60(%esp) 189 xorl %ecx,%ebx 190 addl %ebp,%eax 191 roll $7,%ebx 192 xorl %eax,%edi 193 movl %ecx,40(%esp) 194 roll $16,%edi 195 movl %ebx,20(%esp) 196 addl %edi,%esi 197 movl 32(%esp),%ecx 198 xorl %esi,%ebp 199 movl 52(%esp),%edx 200 roll $12,%ebp 201 movl 28(%esp),%ebx 202 addl %ebp,%eax 203 xorl %eax,%edi 204 movl %eax,4(%esp) 205 roll $8,%edi 206 movl 8(%esp),%eax 207 addl %edi,%esi 208 movl %edi,48(%esp) 209 xorl %esi,%ebp 210 addl %ebx,%eax 211 roll $7,%ebp 212 xorl %eax,%edx 213 movl %esi,44(%esp) 214 roll $16,%edx 215 movl %ebp,24(%esp) 216 addl %edx,%ecx 217 movl 36(%esp),%esi 218 xorl %ecx,%ebx 219 movl 56(%esp),%edi 220 roll $12,%ebx 221 movl 16(%esp),%ebp 222 addl %ebx,%eax 223 xorl %eax,%edx 224 movl %eax,8(%esp) 225 roll $8,%edx 226 movl 12(%esp),%eax 227 addl %edx,%ecx 228 movl %edx,52(%esp) 229 xorl %ecx,%ebx 230 addl %ebp,%eax 231 roll $7,%ebx 232 xorl %eax,%edi 233 roll $16,%edi 234 movl %ebx,28(%esp) 235 addl %edi,%esi 236 xorl %esi,%ebp 237 movl 48(%esp),%edx 238 roll $12,%ebp 239 movl 128(%esp),%ebx 240 addl %ebp,%eax 241 xorl %eax,%edi 242 movl %eax,12(%esp) 243 roll $8,%edi 244 movl (%esp),%eax 245 addl %edi,%esi 246 movl %edi,56(%esp) 247 xorl %esi,%ebp 248 roll $7,%ebp 249 decl %ebx 250 jnz .L004loop 251 movl 160(%esp),%ebx 252 addl $1634760805,%eax 253 addl 80(%esp),%ebp 254 addl 96(%esp),%ecx 255 addl 100(%esp),%esi 256 cmpl $64,%ebx 257 jb .L005tail 258 movl 156(%esp),%ebx 259 addl 112(%esp),%edx 260 addl 120(%esp),%edi 261 xorl (%ebx),%eax 262 xorl 16(%ebx),%ebp 263 movl %eax,(%esp) 264 movl 152(%esp),%eax 265 xorl 32(%ebx),%ecx 266 xorl 36(%ebx),%esi 267 xorl 48(%ebx),%edx 268 xorl 56(%ebx),%edi 269 movl %ebp,16(%eax) 270 movl %ecx,32(%eax) 271 movl %esi,36(%eax) 272 movl %edx,48(%eax) 273 movl %edi,56(%eax) 274 movl 4(%esp),%ebp 275 movl 8(%esp),%ecx 276 movl 12(%esp),%esi 277 movl 20(%esp),%edx 278 movl 24(%esp),%edi 279 addl $857760878,%ebp 280 addl $2036477234,%ecx 281 addl $1797285236,%esi 282 addl 84(%esp),%edx 283 addl 88(%esp),%edi 284 xorl 4(%ebx),%ebp 285 xorl 8(%ebx),%ecx 286 xorl 12(%ebx),%esi 287 xorl 20(%ebx),%edx 288 xorl 24(%ebx),%edi 289 movl %ebp,4(%eax) 290 movl %ecx,8(%eax) 291 movl %esi,12(%eax) 292 movl %edx,20(%eax) 293 movl %edi,24(%eax) 294 movl 28(%esp),%ebp 295 movl 40(%esp),%ecx 296 movl 44(%esp),%esi 297 movl 52(%esp),%edx 298 movl 60(%esp),%edi 299 addl 92(%esp),%ebp 300 addl 104(%esp),%ecx 301 addl 108(%esp),%esi 302 addl 116(%esp),%edx 303 addl 124(%esp),%edi 304 xorl 28(%ebx),%ebp 305 xorl 40(%ebx),%ecx 306 xorl 44(%ebx),%esi 307 xorl 52(%ebx),%edx 308 xorl 60(%ebx),%edi 309 leal 64(%ebx),%ebx 310 movl %ebp,28(%eax) 311 movl (%esp),%ebp 312 movl %ecx,40(%eax) 313 movl 160(%esp),%ecx 314 movl %esi,44(%eax) 315 movl %edx,52(%eax) 316 movl %edi,60(%eax) 317 movl %ebp,(%eax) 318 leal 64(%eax),%eax 319 subl $64,%ecx 320 jnz .L003outer_loop 321 jmp .L006done 322.L005tail: 323 addl 112(%esp),%edx 324 addl 120(%esp),%edi 325 movl %eax,(%esp) 326 movl %ebp,16(%esp) 327 movl %ecx,32(%esp) 328 movl %esi,36(%esp) 329 movl %edx,48(%esp) 330 movl %edi,56(%esp) 331 movl 4(%esp),%ebp 332 movl 8(%esp),%ecx 333 movl 12(%esp),%esi 334 movl 20(%esp),%edx 335 movl 24(%esp),%edi 336 addl $857760878,%ebp 337 addl $2036477234,%ecx 338 addl $1797285236,%esi 339 addl 84(%esp),%edx 340 addl 88(%esp),%edi 341 movl %ebp,4(%esp) 342 movl %ecx,8(%esp) 343 movl %esi,12(%esp) 344 movl %edx,20(%esp) 345 movl %edi,24(%esp) 346 movl 28(%esp),%ebp 347 movl 40(%esp),%ecx 348 movl 44(%esp),%esi 349 movl 52(%esp),%edx 350 movl 60(%esp),%edi 351 addl 92(%esp),%ebp 352 addl 104(%esp),%ecx 353 addl 108(%esp),%esi 354 addl 116(%esp),%edx 355 addl 124(%esp),%edi 356 movl %ebp,28(%esp) 357 movl 156(%esp),%ebp 358 movl %ecx,40(%esp) 359 movl 152(%esp),%ecx 360 movl %esi,44(%esp) 361 xorl %esi,%esi 362 movl %edx,52(%esp) 363 movl %edi,60(%esp) 364 xorl %eax,%eax 365 xorl %edx,%edx 366.L007tail_loop: 367 movb (%esi,%ebp,1),%al 368 movb (%esp,%esi,1),%dl 369 leal 1(%esi),%esi 370 xorb %dl,%al 371 movb %al,-1(%ecx,%esi,1) 372 decl %ebx 373 jnz .L007tail_loop 374.L006done: 375 addl $132,%esp 376.L000no_data: 377 popl %edi 378 popl %esi 379 popl %ebx 380 popl %ebp 381 ret 382.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 383.globl ChaCha20_ssse3 384.hidden ChaCha20_ssse3 385.type ChaCha20_ssse3,@function 386.align 16 387ChaCha20_ssse3: 388.L_ChaCha20_ssse3_begin: 389 pushl %ebp 390 pushl %ebx 391 pushl %esi 392 pushl %edi 393.Lssse3_shortcut: 394 movl 20(%esp),%edi 395 movl 24(%esp),%esi 396 movl 28(%esp),%ecx 397 movl 32(%esp),%edx 398 movl 36(%esp),%ebx 399 movl %esp,%ebp 400 subl $524,%esp 401 andl $-64,%esp 402 movl %ebp,512(%esp) 403 leal .Lssse3_data-.Lpic_point(%eax),%eax 404 movdqu (%ebx),%xmm3 405 cmpl $256,%ecx 406 jb .L0081x 407 movl %edx,516(%esp) 408 movl %ebx,520(%esp) 409 subl $256,%ecx 410 leal 384(%esp),%ebp 411 movdqu (%edx),%xmm7 412 pshufd $0,%xmm3,%xmm0 413 pshufd $85,%xmm3,%xmm1 414 pshufd $170,%xmm3,%xmm2 415 pshufd $255,%xmm3,%xmm3 416 paddd 48(%eax),%xmm0 417 pshufd $0,%xmm7,%xmm4 418 pshufd $85,%xmm7,%xmm5 419 psubd 64(%eax),%xmm0 420 pshufd $170,%xmm7,%xmm6 421 pshufd $255,%xmm7,%xmm7 422 movdqa %xmm0,64(%ebp) 423 movdqa %xmm1,80(%ebp) 424 movdqa %xmm2,96(%ebp) 425 movdqa %xmm3,112(%ebp) 426 movdqu 16(%edx),%xmm3 427 movdqa %xmm4,-64(%ebp) 428 movdqa %xmm5,-48(%ebp) 429 movdqa %xmm6,-32(%ebp) 430 movdqa %xmm7,-16(%ebp) 431 movdqa 32(%eax),%xmm7 432 leal 128(%esp),%ebx 433 pshufd $0,%xmm3,%xmm0 434 pshufd $85,%xmm3,%xmm1 435 pshufd $170,%xmm3,%xmm2 436 pshufd $255,%xmm3,%xmm3 437 pshufd $0,%xmm7,%xmm4 438 pshufd $85,%xmm7,%xmm5 439 pshufd $170,%xmm7,%xmm6 440 pshufd $255,%xmm7,%xmm7 441 movdqa %xmm0,(%ebp) 442 movdqa %xmm1,16(%ebp) 443 movdqa %xmm2,32(%ebp) 444 movdqa %xmm3,48(%ebp) 445 movdqa %xmm4,-128(%ebp) 446 movdqa %xmm5,-112(%ebp) 447 movdqa %xmm6,-96(%ebp) 448 movdqa %xmm7,-80(%ebp) 449 leal 128(%esi),%esi 450 leal 128(%edi),%edi 451 jmp .L009outer_loop 452.align 16 453.L009outer_loop: 454 movdqa -112(%ebp),%xmm1 455 movdqa -96(%ebp),%xmm2 456 movdqa -80(%ebp),%xmm3 457 movdqa -48(%ebp),%xmm5 458 movdqa -32(%ebp),%xmm6 459 movdqa -16(%ebp),%xmm7 460 movdqa %xmm1,-112(%ebx) 461 movdqa %xmm2,-96(%ebx) 462 movdqa %xmm3,-80(%ebx) 463 movdqa %xmm5,-48(%ebx) 464 movdqa %xmm6,-32(%ebx) 465 movdqa %xmm7,-16(%ebx) 466 movdqa 32(%ebp),%xmm2 467 movdqa 48(%ebp),%xmm3 468 movdqa 64(%ebp),%xmm4 469 movdqa 80(%ebp),%xmm5 470 movdqa 96(%ebp),%xmm6 471 movdqa 112(%ebp),%xmm7 472 paddd 64(%eax),%xmm4 473 movdqa %xmm2,32(%ebx) 474 movdqa %xmm3,48(%ebx) 475 movdqa %xmm4,64(%ebx) 476 movdqa %xmm5,80(%ebx) 477 movdqa %xmm6,96(%ebx) 478 movdqa %xmm7,112(%ebx) 479 movdqa %xmm4,64(%ebp) 480 movdqa -128(%ebp),%xmm0 481 movdqa %xmm4,%xmm6 482 movdqa -64(%ebp),%xmm3 483 movdqa (%ebp),%xmm4 484 movdqa 16(%ebp),%xmm5 485 movl $10,%edx 486 nop 487.align 16 488.L010loop: 489 paddd %xmm3,%xmm0 490 movdqa %xmm3,%xmm2 491 pxor %xmm0,%xmm6 492 pshufb (%eax),%xmm6 493 paddd %xmm6,%xmm4 494 pxor %xmm4,%xmm2 495 movdqa -48(%ebx),%xmm3 496 movdqa %xmm2,%xmm1 497 pslld $12,%xmm2 498 psrld $20,%xmm1 499 por %xmm1,%xmm2 500 movdqa -112(%ebx),%xmm1 501 paddd %xmm2,%xmm0 502 movdqa 80(%ebx),%xmm7 503 pxor %xmm0,%xmm6 504 movdqa %xmm0,-128(%ebx) 505 pshufb 16(%eax),%xmm6 506 paddd %xmm6,%xmm4 507 movdqa %xmm6,64(%ebx) 508 pxor %xmm4,%xmm2 509 paddd %xmm3,%xmm1 510 movdqa %xmm2,%xmm0 511 pslld $7,%xmm2 512 psrld $25,%xmm0 513 pxor %xmm1,%xmm7 514 por %xmm0,%xmm2 515 movdqa %xmm4,(%ebx) 516 pshufb (%eax),%xmm7 517 movdqa %xmm2,-64(%ebx) 518 paddd %xmm7,%xmm5 519 movdqa 32(%ebx),%xmm4 520 pxor %xmm5,%xmm3 521 movdqa -32(%ebx),%xmm2 522 movdqa %xmm3,%xmm0 523 pslld $12,%xmm3 524 psrld $20,%xmm0 525 por %xmm0,%xmm3 526 movdqa -96(%ebx),%xmm0 527 paddd %xmm3,%xmm1 528 movdqa 96(%ebx),%xmm6 529 pxor %xmm1,%xmm7 530 movdqa %xmm1,-112(%ebx) 531 pshufb 16(%eax),%xmm7 532 paddd %xmm7,%xmm5 533 movdqa %xmm7,80(%ebx) 534 pxor %xmm5,%xmm3 535 paddd %xmm2,%xmm0 536 movdqa %xmm3,%xmm1 537 pslld $7,%xmm3 538 psrld $25,%xmm1 539 pxor %xmm0,%xmm6 540 por %xmm1,%xmm3 541 movdqa %xmm5,16(%ebx) 542 pshufb (%eax),%xmm6 543 movdqa %xmm3,-48(%ebx) 544 paddd %xmm6,%xmm4 545 movdqa 48(%ebx),%xmm5 546 pxor %xmm4,%xmm2 547 movdqa -16(%ebx),%xmm3 548 movdqa %xmm2,%xmm1 549 pslld $12,%xmm2 550 psrld $20,%xmm1 551 por %xmm1,%xmm2 552 movdqa -80(%ebx),%xmm1 553 paddd %xmm2,%xmm0 554 movdqa 112(%ebx),%xmm7 555 pxor %xmm0,%xmm6 556 movdqa %xmm0,-96(%ebx) 557 pshufb 16(%eax),%xmm6 558 paddd %xmm6,%xmm4 559 movdqa %xmm6,96(%ebx) 560 pxor %xmm4,%xmm2 561 paddd %xmm3,%xmm1 562 movdqa %xmm2,%xmm0 563 pslld $7,%xmm2 564 psrld $25,%xmm0 565 pxor %xmm1,%xmm7 566 por %xmm0,%xmm2 567 pshufb (%eax),%xmm7 568 movdqa %xmm2,-32(%ebx) 569 paddd %xmm7,%xmm5 570 pxor %xmm5,%xmm3 571 movdqa -48(%ebx),%xmm2 572 movdqa %xmm3,%xmm0 573 pslld $12,%xmm3 574 psrld $20,%xmm0 575 por %xmm0,%xmm3 576 movdqa -128(%ebx),%xmm0 577 paddd %xmm3,%xmm1 578 pxor %xmm1,%xmm7 579 movdqa %xmm1,-80(%ebx) 580 pshufb 16(%eax),%xmm7 581 paddd %xmm7,%xmm5 582 movdqa %xmm7,%xmm6 583 pxor %xmm5,%xmm3 584 paddd %xmm2,%xmm0 585 movdqa %xmm3,%xmm1 586 pslld $7,%xmm3 587 psrld $25,%xmm1 588 pxor %xmm0,%xmm6 589 por %xmm1,%xmm3 590 pshufb (%eax),%xmm6 591 movdqa %xmm3,-16(%ebx) 592 paddd %xmm6,%xmm4 593 pxor %xmm4,%xmm2 594 movdqa -32(%ebx),%xmm3 595 movdqa %xmm2,%xmm1 596 pslld $12,%xmm2 597 psrld $20,%xmm1 598 por %xmm1,%xmm2 599 movdqa -112(%ebx),%xmm1 600 paddd %xmm2,%xmm0 601 movdqa 64(%ebx),%xmm7 602 pxor %xmm0,%xmm6 603 movdqa %xmm0,-128(%ebx) 604 pshufb 16(%eax),%xmm6 605 paddd %xmm6,%xmm4 606 movdqa %xmm6,112(%ebx) 607 pxor %xmm4,%xmm2 608 paddd %xmm3,%xmm1 609 movdqa %xmm2,%xmm0 610 pslld $7,%xmm2 611 psrld $25,%xmm0 612 pxor %xmm1,%xmm7 613 por %xmm0,%xmm2 614 movdqa %xmm4,32(%ebx) 615 pshufb (%eax),%xmm7 616 movdqa %xmm2,-48(%ebx) 617 paddd %xmm7,%xmm5 618 movdqa (%ebx),%xmm4 619 pxor %xmm5,%xmm3 620 movdqa -16(%ebx),%xmm2 621 movdqa %xmm3,%xmm0 622 pslld $12,%xmm3 623 psrld $20,%xmm0 624 por %xmm0,%xmm3 625 movdqa -96(%ebx),%xmm0 626 paddd %xmm3,%xmm1 627 movdqa 80(%ebx),%xmm6 628 pxor %xmm1,%xmm7 629 movdqa %xmm1,-112(%ebx) 630 pshufb 16(%eax),%xmm7 631 paddd %xmm7,%xmm5 632 movdqa %xmm7,64(%ebx) 633 pxor %xmm5,%xmm3 634 paddd %xmm2,%xmm0 635 movdqa %xmm3,%xmm1 636 pslld $7,%xmm3 637 psrld $25,%xmm1 638 pxor %xmm0,%xmm6 639 por %xmm1,%xmm3 640 movdqa %xmm5,48(%ebx) 641 pshufb (%eax),%xmm6 642 movdqa %xmm3,-32(%ebx) 643 paddd %xmm6,%xmm4 644 movdqa 16(%ebx),%xmm5 645 pxor %xmm4,%xmm2 646 movdqa -64(%ebx),%xmm3 647 movdqa %xmm2,%xmm1 648 pslld $12,%xmm2 649 psrld $20,%xmm1 650 por %xmm1,%xmm2 651 movdqa -80(%ebx),%xmm1 652 paddd %xmm2,%xmm0 653 movdqa 96(%ebx),%xmm7 654 pxor %xmm0,%xmm6 655 movdqa %xmm0,-96(%ebx) 656 pshufb 16(%eax),%xmm6 657 paddd %xmm6,%xmm4 658 movdqa %xmm6,80(%ebx) 659 pxor %xmm4,%xmm2 660 paddd %xmm3,%xmm1 661 movdqa %xmm2,%xmm0 662 pslld $7,%xmm2 663 psrld $25,%xmm0 664 pxor %xmm1,%xmm7 665 por %xmm0,%xmm2 666 pshufb (%eax),%xmm7 667 movdqa %xmm2,-16(%ebx) 668 paddd %xmm7,%xmm5 669 pxor %xmm5,%xmm3 670 movdqa %xmm3,%xmm0 671 pslld $12,%xmm3 672 psrld $20,%xmm0 673 por %xmm0,%xmm3 674 movdqa -128(%ebx),%xmm0 675 paddd %xmm3,%xmm1 676 movdqa 64(%ebx),%xmm6 677 pxor %xmm1,%xmm7 678 movdqa %xmm1,-80(%ebx) 679 pshufb 16(%eax),%xmm7 680 paddd %xmm7,%xmm5 681 movdqa %xmm7,96(%ebx) 682 pxor %xmm5,%xmm3 683 movdqa %xmm3,%xmm1 684 pslld $7,%xmm3 685 psrld $25,%xmm1 686 por %xmm1,%xmm3 687 decl %edx 688 jnz .L010loop 689 movdqa %xmm3,-64(%ebx) 690 movdqa %xmm4,(%ebx) 691 movdqa %xmm5,16(%ebx) 692 movdqa %xmm6,64(%ebx) 693 movdqa %xmm7,96(%ebx) 694 movdqa -112(%ebx),%xmm1 695 movdqa -96(%ebx),%xmm2 696 movdqa -80(%ebx),%xmm3 697 paddd -128(%ebp),%xmm0 698 paddd -112(%ebp),%xmm1 699 paddd -96(%ebp),%xmm2 700 paddd -80(%ebp),%xmm3 701 movdqa %xmm0,%xmm6 702 punpckldq %xmm1,%xmm0 703 movdqa %xmm2,%xmm7 704 punpckldq %xmm3,%xmm2 705 punpckhdq %xmm1,%xmm6 706 punpckhdq %xmm3,%xmm7 707 movdqa %xmm0,%xmm1 708 punpcklqdq %xmm2,%xmm0 709 movdqa %xmm6,%xmm3 710 punpcklqdq %xmm7,%xmm6 711 punpckhqdq %xmm2,%xmm1 712 punpckhqdq %xmm7,%xmm3 713 movdqu -128(%esi),%xmm4 714 movdqu -64(%esi),%xmm5 715 movdqu (%esi),%xmm2 716 movdqu 64(%esi),%xmm7 717 leal 16(%esi),%esi 718 pxor %xmm0,%xmm4 719 movdqa -64(%ebx),%xmm0 720 pxor %xmm1,%xmm5 721 movdqa -48(%ebx),%xmm1 722 pxor %xmm2,%xmm6 723 movdqa -32(%ebx),%xmm2 724 pxor %xmm3,%xmm7 725 movdqa -16(%ebx),%xmm3 726 movdqu %xmm4,-128(%edi) 727 movdqu %xmm5,-64(%edi) 728 movdqu %xmm6,(%edi) 729 movdqu %xmm7,64(%edi) 730 leal 16(%edi),%edi 731 paddd -64(%ebp),%xmm0 732 paddd -48(%ebp),%xmm1 733 paddd -32(%ebp),%xmm2 734 paddd -16(%ebp),%xmm3 735 movdqa %xmm0,%xmm6 736 punpckldq %xmm1,%xmm0 737 movdqa %xmm2,%xmm7 738 punpckldq %xmm3,%xmm2 739 punpckhdq %xmm1,%xmm6 740 punpckhdq %xmm3,%xmm7 741 movdqa %xmm0,%xmm1 742 punpcklqdq %xmm2,%xmm0 743 movdqa %xmm6,%xmm3 744 punpcklqdq %xmm7,%xmm6 745 punpckhqdq %xmm2,%xmm1 746 punpckhqdq %xmm7,%xmm3 747 movdqu -128(%esi),%xmm4 748 movdqu -64(%esi),%xmm5 749 movdqu (%esi),%xmm2 750 movdqu 64(%esi),%xmm7 751 leal 16(%esi),%esi 752 pxor %xmm0,%xmm4 753 movdqa (%ebx),%xmm0 754 pxor %xmm1,%xmm5 755 movdqa 16(%ebx),%xmm1 756 pxor %xmm2,%xmm6 757 movdqa 32(%ebx),%xmm2 758 pxor %xmm3,%xmm7 759 movdqa 48(%ebx),%xmm3 760 movdqu %xmm4,-128(%edi) 761 movdqu %xmm5,-64(%edi) 762 movdqu %xmm6,(%edi) 763 movdqu %xmm7,64(%edi) 764 leal 16(%edi),%edi 765 paddd (%ebp),%xmm0 766 paddd 16(%ebp),%xmm1 767 paddd 32(%ebp),%xmm2 768 paddd 48(%ebp),%xmm3 769 movdqa %xmm0,%xmm6 770 punpckldq %xmm1,%xmm0 771 movdqa %xmm2,%xmm7 772 punpckldq %xmm3,%xmm2 773 punpckhdq %xmm1,%xmm6 774 punpckhdq %xmm3,%xmm7 775 movdqa %xmm0,%xmm1 776 punpcklqdq %xmm2,%xmm0 777 movdqa %xmm6,%xmm3 778 punpcklqdq %xmm7,%xmm6 779 punpckhqdq %xmm2,%xmm1 780 punpckhqdq %xmm7,%xmm3 781 movdqu -128(%esi),%xmm4 782 movdqu -64(%esi),%xmm5 783 movdqu (%esi),%xmm2 784 movdqu 64(%esi),%xmm7 785 leal 16(%esi),%esi 786 pxor %xmm0,%xmm4 787 movdqa 64(%ebx),%xmm0 788 pxor %xmm1,%xmm5 789 movdqa 80(%ebx),%xmm1 790 pxor %xmm2,%xmm6 791 movdqa 96(%ebx),%xmm2 792 pxor %xmm3,%xmm7 793 movdqa 112(%ebx),%xmm3 794 movdqu %xmm4,-128(%edi) 795 movdqu %xmm5,-64(%edi) 796 movdqu %xmm6,(%edi) 797 movdqu %xmm7,64(%edi) 798 leal 16(%edi),%edi 799 paddd 64(%ebp),%xmm0 800 paddd 80(%ebp),%xmm1 801 paddd 96(%ebp),%xmm2 802 paddd 112(%ebp),%xmm3 803 movdqa %xmm0,%xmm6 804 punpckldq %xmm1,%xmm0 805 movdqa %xmm2,%xmm7 806 punpckldq %xmm3,%xmm2 807 punpckhdq %xmm1,%xmm6 808 punpckhdq %xmm3,%xmm7 809 movdqa %xmm0,%xmm1 810 punpcklqdq %xmm2,%xmm0 811 movdqa %xmm6,%xmm3 812 punpcklqdq %xmm7,%xmm6 813 punpckhqdq %xmm2,%xmm1 814 punpckhqdq %xmm7,%xmm3 815 movdqu -128(%esi),%xmm4 816 movdqu -64(%esi),%xmm5 817 movdqu (%esi),%xmm2 818 movdqu 64(%esi),%xmm7 819 leal 208(%esi),%esi 820 pxor %xmm0,%xmm4 821 pxor %xmm1,%xmm5 822 pxor %xmm2,%xmm6 823 pxor %xmm3,%xmm7 824 movdqu %xmm4,-128(%edi) 825 movdqu %xmm5,-64(%edi) 826 movdqu %xmm6,(%edi) 827 movdqu %xmm7,64(%edi) 828 leal 208(%edi),%edi 829 subl $256,%ecx 830 jnc .L009outer_loop 831 addl $256,%ecx 832 jz .L011done 833 movl 520(%esp),%ebx 834 leal -128(%esi),%esi 835 movl 516(%esp),%edx 836 leal -128(%edi),%edi 837 movd 64(%ebp),%xmm2 838 movdqu (%ebx),%xmm3 839 paddd 96(%eax),%xmm2 840 pand 112(%eax),%xmm3 841 por %xmm2,%xmm3 842.L0081x: 843 movdqa 32(%eax),%xmm0 844 movdqu (%edx),%xmm1 845 movdqu 16(%edx),%xmm2 846 movdqa (%eax),%xmm6 847 movdqa 16(%eax),%xmm7 848 movl %ebp,48(%esp) 849 movdqa %xmm0,(%esp) 850 movdqa %xmm1,16(%esp) 851 movdqa %xmm2,32(%esp) 852 movdqa %xmm3,48(%esp) 853 movl $10,%edx 854 jmp .L012loop1x 855.align 16 856.L013outer1x: 857 movdqa 80(%eax),%xmm3 858 movdqa (%esp),%xmm0 859 movdqa 16(%esp),%xmm1 860 movdqa 32(%esp),%xmm2 861 paddd 48(%esp),%xmm3 862 movl $10,%edx 863 movdqa %xmm3,48(%esp) 864 jmp .L012loop1x 865.align 16 866.L012loop1x: 867 paddd %xmm1,%xmm0 868 pxor %xmm0,%xmm3 869.byte 102,15,56,0,222 870 paddd %xmm3,%xmm2 871 pxor %xmm2,%xmm1 872 movdqa %xmm1,%xmm4 873 psrld $20,%xmm1 874 pslld $12,%xmm4 875 por %xmm4,%xmm1 876 paddd %xmm1,%xmm0 877 pxor %xmm0,%xmm3 878.byte 102,15,56,0,223 879 paddd %xmm3,%xmm2 880 pxor %xmm2,%xmm1 881 movdqa %xmm1,%xmm4 882 psrld $25,%xmm1 883 pslld $7,%xmm4 884 por %xmm4,%xmm1 885 pshufd $78,%xmm2,%xmm2 886 pshufd $57,%xmm1,%xmm1 887 pshufd $147,%xmm3,%xmm3 888 nop 889 paddd %xmm1,%xmm0 890 pxor %xmm0,%xmm3 891.byte 102,15,56,0,222 892 paddd %xmm3,%xmm2 893 pxor %xmm2,%xmm1 894 movdqa %xmm1,%xmm4 895 psrld $20,%xmm1 896 pslld $12,%xmm4 897 por %xmm4,%xmm1 898 paddd %xmm1,%xmm0 899 pxor %xmm0,%xmm3 900.byte 102,15,56,0,223 901 paddd %xmm3,%xmm2 902 pxor %xmm2,%xmm1 903 movdqa %xmm1,%xmm4 904 psrld $25,%xmm1 905 pslld $7,%xmm4 906 por %xmm4,%xmm1 907 pshufd $78,%xmm2,%xmm2 908 pshufd $147,%xmm1,%xmm1 909 pshufd $57,%xmm3,%xmm3 910 decl %edx 911 jnz .L012loop1x 912 paddd (%esp),%xmm0 913 paddd 16(%esp),%xmm1 914 paddd 32(%esp),%xmm2 915 paddd 48(%esp),%xmm3 916 cmpl $64,%ecx 917 jb .L014tail 918 movdqu (%esi),%xmm4 919 movdqu 16(%esi),%xmm5 920 pxor %xmm4,%xmm0 921 movdqu 32(%esi),%xmm4 922 pxor %xmm5,%xmm1 923 movdqu 48(%esi),%xmm5 924 pxor %xmm4,%xmm2 925 pxor %xmm5,%xmm3 926 leal 64(%esi),%esi 927 movdqu %xmm0,(%edi) 928 movdqu %xmm1,16(%edi) 929 movdqu %xmm2,32(%edi) 930 movdqu %xmm3,48(%edi) 931 leal 64(%edi),%edi 932 subl $64,%ecx 933 jnz .L013outer1x 934 jmp .L011done 935.L014tail: 936 movdqa %xmm0,(%esp) 937 movdqa %xmm1,16(%esp) 938 movdqa %xmm2,32(%esp) 939 movdqa %xmm3,48(%esp) 940 xorl %eax,%eax 941 xorl %edx,%edx 942 xorl %ebp,%ebp 943.L015tail_loop: 944 movb (%esp,%ebp,1),%al 945 movb (%esi,%ebp,1),%dl 946 leal 1(%ebp),%ebp 947 xorb %dl,%al 948 movb %al,-1(%edi,%ebp,1) 949 decl %ecx 950 jnz .L015tail_loop 951.L011done: 952 movl 512(%esp),%esp 953 popl %edi 954 popl %esi 955 popl %ebx 956 popl %ebp 957 ret 958.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 959.align 64 960.Lssse3_data: 961.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 962.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 963.long 1634760805,857760878,2036477234,1797285236 964.long 0,1,2,3 965.long 4,4,4,4 966.long 1,0,0,0 967.long 4,0,0,0 968.long 0,-1,-1,-1 969.align 64 970.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 971.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 972.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 973.byte 114,103,62,0 974#endif 975.section .note.GNU-stack,"",@progbits 976