1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__i386__) 5#if defined(BORINGSSL_PREFIX) 6#include <boringssl_prefix_symbols_asm.h> 7#endif 8.text 9.globl GFp_ChaCha20_ctr32 10.hidden GFp_ChaCha20_ctr32 11.type GFp_ChaCha20_ctr32,@function 12.align 16 13GFp_ChaCha20_ctr32: 14.L_GFp_ChaCha20_ctr32_begin: 15 pushl %ebp 16 pushl %ebx 17 pushl %esi 18 pushl %edi 19 xorl %eax,%eax 20 cmpl 28(%esp),%eax 21 je .L000no_data 22 call .Lpic_point 23.Lpic_point: 24 popl %eax 25 leal GFp_ia32cap_P-.Lpic_point(%eax),%ebp 26 testl $16777216,(%ebp) 27 jz .L001x86 28 testl $512,4(%ebp) 29 jz .L001x86 30 jmp .Lssse3_shortcut 31.L001x86: 32 movl 32(%esp),%esi 33 movl 36(%esp),%edi 34 subl $132,%esp 35 movl (%esi),%eax 36 movl 4(%esi),%ebx 37 movl 8(%esi),%ecx 38 movl 12(%esi),%edx 39 movl %eax,80(%esp) 40 movl %ebx,84(%esp) 41 movl %ecx,88(%esp) 42 movl %edx,92(%esp) 43 movl 16(%esi),%eax 44 movl 20(%esi),%ebx 45 movl 24(%esi),%ecx 46 movl 28(%esi),%edx 47 movl %eax,96(%esp) 48 movl %ebx,100(%esp) 49 movl %ecx,104(%esp) 50 movl %edx,108(%esp) 51 movl (%edi),%eax 52 movl 4(%edi),%ebx 53 movl 8(%edi),%ecx 54 movl 12(%edi),%edx 55 subl $1,%eax 56 movl %eax,112(%esp) 57 movl %ebx,116(%esp) 58 movl %ecx,120(%esp) 59 movl %edx,124(%esp) 60 jmp .L002entry 61.align 16 62.L003outer_loop: 63 movl %ebx,156(%esp) 64 movl %eax,152(%esp) 65 movl %ecx,160(%esp) 66.L002entry: 67 movl $1634760805,%eax 68 movl $857760878,4(%esp) 69 movl $2036477234,8(%esp) 70 movl $1797285236,12(%esp) 71 movl 84(%esp),%ebx 72 movl 88(%esp),%ebp 73 movl 104(%esp),%ecx 74 movl 108(%esp),%esi 75 movl 116(%esp),%edx 76 movl 120(%esp),%edi 77 movl %ebx,20(%esp) 78 movl %ebp,24(%esp) 79 movl %ecx,40(%esp) 80 movl %esi,44(%esp) 81 movl %edx,52(%esp) 82 movl %edi,56(%esp) 83 movl 92(%esp),%ebx 84 movl 124(%esp),%edi 85 movl 112(%esp),%edx 86 movl 80(%esp),%ebp 87 movl 96(%esp),%ecx 88 movl 100(%esp),%esi 89 addl $1,%edx 90 movl %ebx,28(%esp) 91 movl %edi,60(%esp) 92 movl %edx,112(%esp) 93 movl $10,%ebx 94 jmp .L004loop 95.align 16 96.L004loop: 97 addl %ebp,%eax 98 movl %ebx,128(%esp) 99 movl %ebp,%ebx 100 xorl %eax,%edx 101 roll $16,%edx 102 addl %edx,%ecx 103 xorl %ecx,%ebx 104 movl 52(%esp),%edi 105 roll $12,%ebx 106 movl 20(%esp),%ebp 107 addl %ebx,%eax 108 xorl %eax,%edx 109 movl %eax,(%esp) 110 roll $8,%edx 111 movl 4(%esp),%eax 112 addl %edx,%ecx 113 movl %edx,48(%esp) 114 xorl %ecx,%ebx 115 addl %ebp,%eax 116 roll $7,%ebx 117 xorl %eax,%edi 118 movl %ecx,32(%esp) 119 roll $16,%edi 120 movl %ebx,16(%esp) 121 addl %edi,%esi 122 movl 40(%esp),%ecx 123 xorl %esi,%ebp 124 movl 56(%esp),%edx 125 roll $12,%ebp 126 movl 24(%esp),%ebx 127 addl %ebp,%eax 128 xorl %eax,%edi 129 movl %eax,4(%esp) 130 roll $8,%edi 131 movl 8(%esp),%eax 132 addl %edi,%esi 133 movl %edi,52(%esp) 134 xorl %esi,%ebp 135 addl %ebx,%eax 136 roll $7,%ebp 137 xorl %eax,%edx 138 movl %esi,36(%esp) 139 roll $16,%edx 140 movl %ebp,20(%esp) 141 addl %edx,%ecx 142 movl 44(%esp),%esi 143 xorl %ecx,%ebx 144 movl 60(%esp),%edi 145 roll $12,%ebx 146 movl 28(%esp),%ebp 147 addl %ebx,%eax 148 xorl %eax,%edx 149 movl %eax,8(%esp) 150 roll $8,%edx 151 movl 12(%esp),%eax 152 addl %edx,%ecx 153 movl %edx,56(%esp) 154 xorl %ecx,%ebx 155 addl %ebp,%eax 156 roll $7,%ebx 157 xorl %eax,%edi 158 roll $16,%edi 159 movl %ebx,24(%esp) 160 addl %edi,%esi 161 xorl %esi,%ebp 162 roll $12,%ebp 163 movl 20(%esp),%ebx 164 addl %ebp,%eax 165 xorl %eax,%edi 166 movl %eax,12(%esp) 167 roll $8,%edi 168 movl (%esp),%eax 169 addl %edi,%esi 170 movl %edi,%edx 171 xorl %esi,%ebp 172 addl %ebx,%eax 173 roll $7,%ebp 174 xorl %eax,%edx 175 roll $16,%edx 176 movl %ebp,28(%esp) 177 addl %edx,%ecx 178 xorl %ecx,%ebx 179 movl 48(%esp),%edi 180 roll $12,%ebx 181 movl 24(%esp),%ebp 182 addl %ebx,%eax 183 xorl %eax,%edx 184 movl %eax,(%esp) 185 roll $8,%edx 186 movl 4(%esp),%eax 187 addl %edx,%ecx 188 movl %edx,60(%esp) 189 xorl %ecx,%ebx 190 addl %ebp,%eax 191 roll $7,%ebx 192 xorl %eax,%edi 193 movl %ecx,40(%esp) 194 roll $16,%edi 195 movl %ebx,20(%esp) 196 addl %edi,%esi 197 movl 32(%esp),%ecx 198 xorl %esi,%ebp 199 movl 52(%esp),%edx 200 roll $12,%ebp 201 movl 28(%esp),%ebx 202 addl %ebp,%eax 203 xorl %eax,%edi 204 movl %eax,4(%esp) 205 roll $8,%edi 206 movl 8(%esp),%eax 207 addl %edi,%esi 208 movl %edi,48(%esp) 209 xorl %esi,%ebp 210 addl %ebx,%eax 211 roll $7,%ebp 212 xorl %eax,%edx 213 movl %esi,44(%esp) 214 roll $16,%edx 215 movl %ebp,24(%esp) 216 addl %edx,%ecx 217 movl 36(%esp),%esi 218 xorl %ecx,%ebx 219 movl 56(%esp),%edi 220 roll $12,%ebx 221 movl 16(%esp),%ebp 222 addl %ebx,%eax 223 xorl %eax,%edx 224 movl %eax,8(%esp) 225 roll $8,%edx 226 movl 12(%esp),%eax 227 addl %edx,%ecx 228 movl %edx,52(%esp) 229 xorl %ecx,%ebx 230 addl %ebp,%eax 231 roll $7,%ebx 232 xorl %eax,%edi 233 roll $16,%edi 234 movl %ebx,28(%esp) 235 addl %edi,%esi 236 xorl %esi,%ebp 237 movl 48(%esp),%edx 238 roll $12,%ebp 239 movl 128(%esp),%ebx 240 addl %ebp,%eax 241 xorl %eax,%edi 242 movl %eax,12(%esp) 243 roll $8,%edi 244 movl (%esp),%eax 245 addl %edi,%esi 246 movl %edi,56(%esp) 247 xorl %esi,%ebp 248 roll $7,%ebp 249 decl %ebx 250 jnz .L004loop 251 movl 160(%esp),%ebx 252 addl $1634760805,%eax 253 addl 80(%esp),%ebp 254 addl 96(%esp),%ecx 255 addl 100(%esp),%esi 256 cmpl $64,%ebx 257 jb .L005tail 258 movl 156(%esp),%ebx 259 addl 112(%esp),%edx 260 addl 120(%esp),%edi 261 xorl (%ebx),%eax 262 xorl 16(%ebx),%ebp 263 movl %eax,(%esp) 264 movl 152(%esp),%eax 265 xorl 32(%ebx),%ecx 266 xorl 36(%ebx),%esi 267 xorl 48(%ebx),%edx 268 xorl 56(%ebx),%edi 269 movl %ebp,16(%eax) 270 movl %ecx,32(%eax) 271 movl %esi,36(%eax) 272 movl %edx,48(%eax) 273 movl %edi,56(%eax) 274 movl 4(%esp),%ebp 275 movl 8(%esp),%ecx 276 movl 12(%esp),%esi 277 movl 20(%esp),%edx 278 movl 24(%esp),%edi 279 addl $857760878,%ebp 280 addl $2036477234,%ecx 281 addl $1797285236,%esi 282 addl 84(%esp),%edx 283 addl 88(%esp),%edi 284 xorl 4(%ebx),%ebp 285 xorl 8(%ebx),%ecx 286 xorl 12(%ebx),%esi 287 xorl 20(%ebx),%edx 288 xorl 24(%ebx),%edi 289 movl %ebp,4(%eax) 290 movl %ecx,8(%eax) 291 movl %esi,12(%eax) 292 movl %edx,20(%eax) 293 movl %edi,24(%eax) 294 movl 28(%esp),%ebp 295 movl 40(%esp),%ecx 296 movl 44(%esp),%esi 297 movl 52(%esp),%edx 298 movl 60(%esp),%edi 299 addl 92(%esp),%ebp 300 addl 104(%esp),%ecx 301 addl 108(%esp),%esi 302 addl 116(%esp),%edx 303 addl 124(%esp),%edi 304 xorl 28(%ebx),%ebp 305 xorl 40(%ebx),%ecx 306 xorl 44(%ebx),%esi 307 xorl 52(%ebx),%edx 308 xorl 60(%ebx),%edi 309 leal 64(%ebx),%ebx 310 movl %ebp,28(%eax) 311 movl (%esp),%ebp 312 movl %ecx,40(%eax) 313 movl 160(%esp),%ecx 314 movl %esi,44(%eax) 315 movl %edx,52(%eax) 316 movl %edi,60(%eax) 317 movl %ebp,(%eax) 318 leal 64(%eax),%eax 319 subl $64,%ecx 320 jnz .L003outer_loop 321 jmp .L006done 322.L005tail: 323 addl 112(%esp),%edx 324 addl 120(%esp),%edi 325 movl %eax,(%esp) 326 movl %ebp,16(%esp) 327 movl %ecx,32(%esp) 328 movl %esi,36(%esp) 329 movl %edx,48(%esp) 330 movl %edi,56(%esp) 331 movl 4(%esp),%ebp 332 movl 8(%esp),%ecx 333 movl 12(%esp),%esi 334 movl 20(%esp),%edx 335 movl 24(%esp),%edi 336 addl $857760878,%ebp 337 addl $2036477234,%ecx 338 addl $1797285236,%esi 339 addl 84(%esp),%edx 340 addl 88(%esp),%edi 341 movl %ebp,4(%esp) 342 movl %ecx,8(%esp) 343 movl %esi,12(%esp) 344 movl %edx,20(%esp) 345 movl %edi,24(%esp) 346 movl 28(%esp),%ebp 347 movl 40(%esp),%ecx 348 movl 44(%esp),%esi 349 movl 52(%esp),%edx 350 movl 60(%esp),%edi 351 addl 92(%esp),%ebp 352 addl 104(%esp),%ecx 353 addl 108(%esp),%esi 354 addl 116(%esp),%edx 355 addl 124(%esp),%edi 356 movl %ebp,28(%esp) 357 movl 156(%esp),%ebp 358 movl %ecx,40(%esp) 359 movl 152(%esp),%ecx 360 movl %esi,44(%esp) 361 xorl %esi,%esi 362 movl %edx,52(%esp) 363 movl %edi,60(%esp) 364 xorl %eax,%eax 365 xorl %edx,%edx 366.L007tail_loop: 367 movb (%esi,%ebp,1),%al 368 movb (%esp,%esi,1),%dl 369 leal 1(%esi),%esi 370 xorb %dl,%al 371 movb %al,-1(%ecx,%esi,1) 372 decl %ebx 373 jnz .L007tail_loop 374.L006done: 375 addl $132,%esp 376.L000no_data: 377 popl %edi 378 popl %esi 379 popl %ebx 380 popl %ebp 381 ret 382.size GFp_ChaCha20_ctr32,.-.L_GFp_ChaCha20_ctr32_begin 383.hidden _ChaCha20_ssse3 384.type _ChaCha20_ssse3,@function 385.align 16 386_ChaCha20_ssse3: 387 pushl %ebp 388 pushl %ebx 389 pushl %esi 390 pushl %edi 391.Lssse3_shortcut: 392 movl 20(%esp),%edi 393 movl 24(%esp),%esi 394 movl 28(%esp),%ecx 395 movl 32(%esp),%edx 396 movl 36(%esp),%ebx 397 movl %esp,%ebp 398 subl $524,%esp 399 andl $-64,%esp 400 movl %ebp,512(%esp) 401 leal .Lssse3_data-.Lpic_point(%eax),%eax 402 movdqu (%ebx),%xmm3 403 cmpl $256,%ecx 404 jb .L0081x 405 movl %edx,516(%esp) 406 movl %ebx,520(%esp) 407 subl $256,%ecx 408 leal 384(%esp),%ebp 409 movdqu (%edx),%xmm7 410 pshufd $0,%xmm3,%xmm0 411 pshufd $85,%xmm3,%xmm1 412 pshufd $170,%xmm3,%xmm2 413 pshufd $255,%xmm3,%xmm3 414 paddd 48(%eax),%xmm0 415 pshufd $0,%xmm7,%xmm4 416 pshufd $85,%xmm7,%xmm5 417 psubd 64(%eax),%xmm0 418 pshufd $170,%xmm7,%xmm6 419 pshufd $255,%xmm7,%xmm7 420 movdqa %xmm0,64(%ebp) 421 movdqa %xmm1,80(%ebp) 422 movdqa %xmm2,96(%ebp) 423 movdqa %xmm3,112(%ebp) 424 movdqu 16(%edx),%xmm3 425 movdqa %xmm4,-64(%ebp) 426 movdqa %xmm5,-48(%ebp) 427 movdqa %xmm6,-32(%ebp) 428 movdqa %xmm7,-16(%ebp) 429 movdqa 32(%eax),%xmm7 430 leal 128(%esp),%ebx 431 pshufd $0,%xmm3,%xmm0 432 pshufd $85,%xmm3,%xmm1 433 pshufd $170,%xmm3,%xmm2 434 pshufd $255,%xmm3,%xmm3 435 pshufd $0,%xmm7,%xmm4 436 pshufd $85,%xmm7,%xmm5 437 pshufd $170,%xmm7,%xmm6 438 pshufd $255,%xmm7,%xmm7 439 movdqa %xmm0,(%ebp) 440 movdqa %xmm1,16(%ebp) 441 movdqa %xmm2,32(%ebp) 442 movdqa %xmm3,48(%ebp) 443 movdqa %xmm4,-128(%ebp) 444 movdqa %xmm5,-112(%ebp) 445 movdqa %xmm6,-96(%ebp) 446 movdqa %xmm7,-80(%ebp) 447 leal 128(%esi),%esi 448 leal 128(%edi),%edi 449 jmp .L009outer_loop 450.align 16 451.L009outer_loop: 452 movdqa -112(%ebp),%xmm1 453 movdqa -96(%ebp),%xmm2 454 movdqa -80(%ebp),%xmm3 455 movdqa -48(%ebp),%xmm5 456 movdqa -32(%ebp),%xmm6 457 movdqa -16(%ebp),%xmm7 458 movdqa %xmm1,-112(%ebx) 459 movdqa %xmm2,-96(%ebx) 460 movdqa %xmm3,-80(%ebx) 461 movdqa %xmm5,-48(%ebx) 462 movdqa %xmm6,-32(%ebx) 463 movdqa %xmm7,-16(%ebx) 464 movdqa 32(%ebp),%xmm2 465 movdqa 48(%ebp),%xmm3 466 movdqa 64(%ebp),%xmm4 467 movdqa 80(%ebp),%xmm5 468 movdqa 96(%ebp),%xmm6 469 movdqa 112(%ebp),%xmm7 470 paddd 64(%eax),%xmm4 471 movdqa %xmm2,32(%ebx) 472 movdqa %xmm3,48(%ebx) 473 movdqa %xmm4,64(%ebx) 474 movdqa %xmm5,80(%ebx) 475 movdqa %xmm6,96(%ebx) 476 movdqa %xmm7,112(%ebx) 477 movdqa %xmm4,64(%ebp) 478 movdqa -128(%ebp),%xmm0 479 movdqa %xmm4,%xmm6 480 movdqa -64(%ebp),%xmm3 481 movdqa (%ebp),%xmm4 482 movdqa 16(%ebp),%xmm5 483 movl $10,%edx 484 nop 485.align 16 486.L010loop: 487 paddd %xmm3,%xmm0 488 movdqa %xmm3,%xmm2 489 pxor %xmm0,%xmm6 490 pshufb (%eax),%xmm6 491 paddd %xmm6,%xmm4 492 pxor %xmm4,%xmm2 493 movdqa -48(%ebx),%xmm3 494 movdqa %xmm2,%xmm1 495 pslld $12,%xmm2 496 psrld $20,%xmm1 497 por %xmm1,%xmm2 498 movdqa -112(%ebx),%xmm1 499 paddd %xmm2,%xmm0 500 movdqa 80(%ebx),%xmm7 501 pxor %xmm0,%xmm6 502 movdqa %xmm0,-128(%ebx) 503 pshufb 16(%eax),%xmm6 504 paddd %xmm6,%xmm4 505 movdqa %xmm6,64(%ebx) 506 pxor %xmm4,%xmm2 507 paddd %xmm3,%xmm1 508 movdqa %xmm2,%xmm0 509 pslld $7,%xmm2 510 psrld $25,%xmm0 511 pxor %xmm1,%xmm7 512 por %xmm0,%xmm2 513 movdqa %xmm4,(%ebx) 514 pshufb (%eax),%xmm7 515 movdqa %xmm2,-64(%ebx) 516 paddd %xmm7,%xmm5 517 movdqa 32(%ebx),%xmm4 518 pxor %xmm5,%xmm3 519 movdqa -32(%ebx),%xmm2 520 movdqa %xmm3,%xmm0 521 pslld $12,%xmm3 522 psrld $20,%xmm0 523 por %xmm0,%xmm3 524 movdqa -96(%ebx),%xmm0 525 paddd %xmm3,%xmm1 526 movdqa 96(%ebx),%xmm6 527 pxor %xmm1,%xmm7 528 movdqa %xmm1,-112(%ebx) 529 pshufb 16(%eax),%xmm7 530 paddd %xmm7,%xmm5 531 movdqa %xmm7,80(%ebx) 532 pxor %xmm5,%xmm3 533 paddd %xmm2,%xmm0 534 movdqa %xmm3,%xmm1 535 pslld $7,%xmm3 536 psrld $25,%xmm1 537 pxor %xmm0,%xmm6 538 por %xmm1,%xmm3 539 movdqa %xmm5,16(%ebx) 540 pshufb (%eax),%xmm6 541 movdqa %xmm3,-48(%ebx) 542 paddd %xmm6,%xmm4 543 movdqa 48(%ebx),%xmm5 544 pxor %xmm4,%xmm2 545 movdqa -16(%ebx),%xmm3 546 movdqa %xmm2,%xmm1 547 pslld $12,%xmm2 548 psrld $20,%xmm1 549 por %xmm1,%xmm2 550 movdqa -80(%ebx),%xmm1 551 paddd %xmm2,%xmm0 552 movdqa 112(%ebx),%xmm7 553 pxor %xmm0,%xmm6 554 movdqa %xmm0,-96(%ebx) 555 pshufb 16(%eax),%xmm6 556 paddd %xmm6,%xmm4 557 movdqa %xmm6,96(%ebx) 558 pxor %xmm4,%xmm2 559 paddd %xmm3,%xmm1 560 movdqa %xmm2,%xmm0 561 pslld $7,%xmm2 562 psrld $25,%xmm0 563 pxor %xmm1,%xmm7 564 por %xmm0,%xmm2 565 pshufb (%eax),%xmm7 566 movdqa %xmm2,-32(%ebx) 567 paddd %xmm7,%xmm5 568 pxor %xmm5,%xmm3 569 movdqa -48(%ebx),%xmm2 570 movdqa %xmm3,%xmm0 571 pslld $12,%xmm3 572 psrld $20,%xmm0 573 por %xmm0,%xmm3 574 movdqa -128(%ebx),%xmm0 575 paddd %xmm3,%xmm1 576 pxor %xmm1,%xmm7 577 movdqa %xmm1,-80(%ebx) 578 pshufb 16(%eax),%xmm7 579 paddd %xmm7,%xmm5 580 movdqa %xmm7,%xmm6 581 pxor %xmm5,%xmm3 582 paddd %xmm2,%xmm0 583 movdqa %xmm3,%xmm1 584 pslld $7,%xmm3 585 psrld $25,%xmm1 586 pxor %xmm0,%xmm6 587 por %xmm1,%xmm3 588 pshufb (%eax),%xmm6 589 movdqa %xmm3,-16(%ebx) 590 paddd %xmm6,%xmm4 591 pxor %xmm4,%xmm2 592 movdqa -32(%ebx),%xmm3 593 movdqa %xmm2,%xmm1 594 pslld $12,%xmm2 595 psrld $20,%xmm1 596 por %xmm1,%xmm2 597 movdqa -112(%ebx),%xmm1 598 paddd %xmm2,%xmm0 599 movdqa 64(%ebx),%xmm7 600 pxor %xmm0,%xmm6 601 movdqa %xmm0,-128(%ebx) 602 pshufb 16(%eax),%xmm6 603 paddd %xmm6,%xmm4 604 movdqa %xmm6,112(%ebx) 605 pxor %xmm4,%xmm2 606 paddd %xmm3,%xmm1 607 movdqa %xmm2,%xmm0 608 pslld $7,%xmm2 609 psrld $25,%xmm0 610 pxor %xmm1,%xmm7 611 por %xmm0,%xmm2 612 movdqa %xmm4,32(%ebx) 613 pshufb (%eax),%xmm7 614 movdqa %xmm2,-48(%ebx) 615 paddd %xmm7,%xmm5 616 movdqa (%ebx),%xmm4 617 pxor %xmm5,%xmm3 618 movdqa -16(%ebx),%xmm2 619 movdqa %xmm3,%xmm0 620 pslld $12,%xmm3 621 psrld $20,%xmm0 622 por %xmm0,%xmm3 623 movdqa -96(%ebx),%xmm0 624 paddd %xmm3,%xmm1 625 movdqa 80(%ebx),%xmm6 626 pxor %xmm1,%xmm7 627 movdqa %xmm1,-112(%ebx) 628 pshufb 16(%eax),%xmm7 629 paddd %xmm7,%xmm5 630 movdqa %xmm7,64(%ebx) 631 pxor %xmm5,%xmm3 632 paddd %xmm2,%xmm0 633 movdqa %xmm3,%xmm1 634 pslld $7,%xmm3 635 psrld $25,%xmm1 636 pxor %xmm0,%xmm6 637 por %xmm1,%xmm3 638 movdqa %xmm5,48(%ebx) 639 pshufb (%eax),%xmm6 640 movdqa %xmm3,-32(%ebx) 641 paddd %xmm6,%xmm4 642 movdqa 16(%ebx),%xmm5 643 pxor %xmm4,%xmm2 644 movdqa -64(%ebx),%xmm3 645 movdqa %xmm2,%xmm1 646 pslld $12,%xmm2 647 psrld $20,%xmm1 648 por %xmm1,%xmm2 649 movdqa -80(%ebx),%xmm1 650 paddd %xmm2,%xmm0 651 movdqa 96(%ebx),%xmm7 652 pxor %xmm0,%xmm6 653 movdqa %xmm0,-96(%ebx) 654 pshufb 16(%eax),%xmm6 655 paddd %xmm6,%xmm4 656 movdqa %xmm6,80(%ebx) 657 pxor %xmm4,%xmm2 658 paddd %xmm3,%xmm1 659 movdqa %xmm2,%xmm0 660 pslld $7,%xmm2 661 psrld $25,%xmm0 662 pxor %xmm1,%xmm7 663 por %xmm0,%xmm2 664 pshufb (%eax),%xmm7 665 movdqa %xmm2,-16(%ebx) 666 paddd %xmm7,%xmm5 667 pxor %xmm5,%xmm3 668 movdqa %xmm3,%xmm0 669 pslld $12,%xmm3 670 psrld $20,%xmm0 671 por %xmm0,%xmm3 672 movdqa -128(%ebx),%xmm0 673 paddd %xmm3,%xmm1 674 movdqa 64(%ebx),%xmm6 675 pxor %xmm1,%xmm7 676 movdqa %xmm1,-80(%ebx) 677 pshufb 16(%eax),%xmm7 678 paddd %xmm7,%xmm5 679 movdqa %xmm7,96(%ebx) 680 pxor %xmm5,%xmm3 681 movdqa %xmm3,%xmm1 682 pslld $7,%xmm3 683 psrld $25,%xmm1 684 por %xmm1,%xmm3 685 decl %edx 686 jnz .L010loop 687 movdqa %xmm3,-64(%ebx) 688 movdqa %xmm4,(%ebx) 689 movdqa %xmm5,16(%ebx) 690 movdqa %xmm6,64(%ebx) 691 movdqa %xmm7,96(%ebx) 692 movdqa -112(%ebx),%xmm1 693 movdqa -96(%ebx),%xmm2 694 movdqa -80(%ebx),%xmm3 695 paddd -128(%ebp),%xmm0 696 paddd -112(%ebp),%xmm1 697 paddd -96(%ebp),%xmm2 698 paddd -80(%ebp),%xmm3 699 movdqa %xmm0,%xmm6 700 punpckldq %xmm1,%xmm0 701 movdqa %xmm2,%xmm7 702 punpckldq %xmm3,%xmm2 703 punpckhdq %xmm1,%xmm6 704 punpckhdq %xmm3,%xmm7 705 movdqa %xmm0,%xmm1 706 punpcklqdq %xmm2,%xmm0 707 movdqa %xmm6,%xmm3 708 punpcklqdq %xmm7,%xmm6 709 punpckhqdq %xmm2,%xmm1 710 punpckhqdq %xmm7,%xmm3 711 movdqu -128(%esi),%xmm4 712 movdqu -64(%esi),%xmm5 713 movdqu (%esi),%xmm2 714 movdqu 64(%esi),%xmm7 715 leal 16(%esi),%esi 716 pxor %xmm0,%xmm4 717 movdqa -64(%ebx),%xmm0 718 pxor %xmm1,%xmm5 719 movdqa -48(%ebx),%xmm1 720 pxor %xmm2,%xmm6 721 movdqa -32(%ebx),%xmm2 722 pxor %xmm3,%xmm7 723 movdqa -16(%ebx),%xmm3 724 movdqu %xmm4,-128(%edi) 725 movdqu %xmm5,-64(%edi) 726 movdqu %xmm6,(%edi) 727 movdqu %xmm7,64(%edi) 728 leal 16(%edi),%edi 729 paddd -64(%ebp),%xmm0 730 paddd -48(%ebp),%xmm1 731 paddd -32(%ebp),%xmm2 732 paddd -16(%ebp),%xmm3 733 movdqa %xmm0,%xmm6 734 punpckldq %xmm1,%xmm0 735 movdqa %xmm2,%xmm7 736 punpckldq %xmm3,%xmm2 737 punpckhdq %xmm1,%xmm6 738 punpckhdq %xmm3,%xmm7 739 movdqa %xmm0,%xmm1 740 punpcklqdq %xmm2,%xmm0 741 movdqa %xmm6,%xmm3 742 punpcklqdq %xmm7,%xmm6 743 punpckhqdq %xmm2,%xmm1 744 punpckhqdq %xmm7,%xmm3 745 movdqu -128(%esi),%xmm4 746 movdqu -64(%esi),%xmm5 747 movdqu (%esi),%xmm2 748 movdqu 64(%esi),%xmm7 749 leal 16(%esi),%esi 750 pxor %xmm0,%xmm4 751 movdqa (%ebx),%xmm0 752 pxor %xmm1,%xmm5 753 movdqa 16(%ebx),%xmm1 754 pxor %xmm2,%xmm6 755 movdqa 32(%ebx),%xmm2 756 pxor %xmm3,%xmm7 757 movdqa 48(%ebx),%xmm3 758 movdqu %xmm4,-128(%edi) 759 movdqu %xmm5,-64(%edi) 760 movdqu %xmm6,(%edi) 761 movdqu %xmm7,64(%edi) 762 leal 16(%edi),%edi 763 paddd (%ebp),%xmm0 764 paddd 16(%ebp),%xmm1 765 paddd 32(%ebp),%xmm2 766 paddd 48(%ebp),%xmm3 767 movdqa %xmm0,%xmm6 768 punpckldq %xmm1,%xmm0 769 movdqa %xmm2,%xmm7 770 punpckldq %xmm3,%xmm2 771 punpckhdq %xmm1,%xmm6 772 punpckhdq %xmm3,%xmm7 773 movdqa %xmm0,%xmm1 774 punpcklqdq %xmm2,%xmm0 775 movdqa %xmm6,%xmm3 776 punpcklqdq %xmm7,%xmm6 777 punpckhqdq %xmm2,%xmm1 778 punpckhqdq %xmm7,%xmm3 779 movdqu -128(%esi),%xmm4 780 movdqu -64(%esi),%xmm5 781 movdqu (%esi),%xmm2 782 movdqu 64(%esi),%xmm7 783 leal 16(%esi),%esi 784 pxor %xmm0,%xmm4 785 movdqa 64(%ebx),%xmm0 786 pxor %xmm1,%xmm5 787 movdqa 80(%ebx),%xmm1 788 pxor %xmm2,%xmm6 789 movdqa 96(%ebx),%xmm2 790 pxor %xmm3,%xmm7 791 movdqa 112(%ebx),%xmm3 792 movdqu %xmm4,-128(%edi) 793 movdqu %xmm5,-64(%edi) 794 movdqu %xmm6,(%edi) 795 movdqu %xmm7,64(%edi) 796 leal 16(%edi),%edi 797 paddd 64(%ebp),%xmm0 798 paddd 80(%ebp),%xmm1 799 paddd 96(%ebp),%xmm2 800 paddd 112(%ebp),%xmm3 801 movdqa %xmm0,%xmm6 802 punpckldq %xmm1,%xmm0 803 movdqa %xmm2,%xmm7 804 punpckldq %xmm3,%xmm2 805 punpckhdq %xmm1,%xmm6 806 punpckhdq %xmm3,%xmm7 807 movdqa %xmm0,%xmm1 808 punpcklqdq %xmm2,%xmm0 809 movdqa %xmm6,%xmm3 810 punpcklqdq %xmm7,%xmm6 811 punpckhqdq %xmm2,%xmm1 812 punpckhqdq %xmm7,%xmm3 813 movdqu -128(%esi),%xmm4 814 movdqu -64(%esi),%xmm5 815 movdqu (%esi),%xmm2 816 movdqu 64(%esi),%xmm7 817 leal 208(%esi),%esi 818 pxor %xmm0,%xmm4 819 pxor %xmm1,%xmm5 820 pxor %xmm2,%xmm6 821 pxor %xmm3,%xmm7 822 movdqu %xmm4,-128(%edi) 823 movdqu %xmm5,-64(%edi) 824 movdqu %xmm6,(%edi) 825 movdqu %xmm7,64(%edi) 826 leal 208(%edi),%edi 827 subl $256,%ecx 828 jnc .L009outer_loop 829 addl $256,%ecx 830 jz .L011done 831 movl 520(%esp),%ebx 832 leal -128(%esi),%esi 833 movl 516(%esp),%edx 834 leal -128(%edi),%edi 835 movd 64(%ebp),%xmm2 836 movdqu (%ebx),%xmm3 837 paddd 96(%eax),%xmm2 838 pand 112(%eax),%xmm3 839 por %xmm2,%xmm3 840.L0081x: 841 movdqa 32(%eax),%xmm0 842 movdqu (%edx),%xmm1 843 movdqu 16(%edx),%xmm2 844 movdqa (%eax),%xmm6 845 movdqa 16(%eax),%xmm7 846 movl %ebp,48(%esp) 847 movdqa %xmm0,(%esp) 848 movdqa %xmm1,16(%esp) 849 movdqa %xmm2,32(%esp) 850 movdqa %xmm3,48(%esp) 851 movl $10,%edx 852 jmp .L012loop1x 853.align 16 854.L013outer1x: 855 movdqa 80(%eax),%xmm3 856 movdqa (%esp),%xmm0 857 movdqa 16(%esp),%xmm1 858 movdqa 32(%esp),%xmm2 859 paddd 48(%esp),%xmm3 860 movl $10,%edx 861 movdqa %xmm3,48(%esp) 862 jmp .L012loop1x 863.align 16 864.L012loop1x: 865 paddd %xmm1,%xmm0 866 pxor %xmm0,%xmm3 867.byte 102,15,56,0,222 868 paddd %xmm3,%xmm2 869 pxor %xmm2,%xmm1 870 movdqa %xmm1,%xmm4 871 psrld $20,%xmm1 872 pslld $12,%xmm4 873 por %xmm4,%xmm1 874 paddd %xmm1,%xmm0 875 pxor %xmm0,%xmm3 876.byte 102,15,56,0,223 877 paddd %xmm3,%xmm2 878 pxor %xmm2,%xmm1 879 movdqa %xmm1,%xmm4 880 psrld $25,%xmm1 881 pslld $7,%xmm4 882 por %xmm4,%xmm1 883 pshufd $78,%xmm2,%xmm2 884 pshufd $57,%xmm1,%xmm1 885 pshufd $147,%xmm3,%xmm3 886 nop 887 paddd %xmm1,%xmm0 888 pxor %xmm0,%xmm3 889.byte 102,15,56,0,222 890 paddd %xmm3,%xmm2 891 pxor %xmm2,%xmm1 892 movdqa %xmm1,%xmm4 893 psrld $20,%xmm1 894 pslld $12,%xmm4 895 por %xmm4,%xmm1 896 paddd %xmm1,%xmm0 897 pxor %xmm0,%xmm3 898.byte 102,15,56,0,223 899 paddd %xmm3,%xmm2 900 pxor %xmm2,%xmm1 901 movdqa %xmm1,%xmm4 902 psrld $25,%xmm1 903 pslld $7,%xmm4 904 por %xmm4,%xmm1 905 pshufd $78,%xmm2,%xmm2 906 pshufd $147,%xmm1,%xmm1 907 pshufd $57,%xmm3,%xmm3 908 decl %edx 909 jnz .L012loop1x 910 paddd (%esp),%xmm0 911 paddd 16(%esp),%xmm1 912 paddd 32(%esp),%xmm2 913 paddd 48(%esp),%xmm3 914 cmpl $64,%ecx 915 jb .L014tail 916 movdqu (%esi),%xmm4 917 movdqu 16(%esi),%xmm5 918 pxor %xmm4,%xmm0 919 movdqu 32(%esi),%xmm4 920 pxor %xmm5,%xmm1 921 movdqu 48(%esi),%xmm5 922 pxor %xmm4,%xmm2 923 pxor %xmm5,%xmm3 924 leal 64(%esi),%esi 925 movdqu %xmm0,(%edi) 926 movdqu %xmm1,16(%edi) 927 movdqu %xmm2,32(%edi) 928 movdqu %xmm3,48(%edi) 929 leal 64(%edi),%edi 930 subl $64,%ecx 931 jnz .L013outer1x 932 jmp .L011done 933.L014tail: 934 movdqa %xmm0,(%esp) 935 movdqa %xmm1,16(%esp) 936 movdqa %xmm2,32(%esp) 937 movdqa %xmm3,48(%esp) 938 xorl %eax,%eax 939 xorl %edx,%edx 940 xorl %ebp,%ebp 941.L015tail_loop: 942 movb (%esp,%ebp,1),%al 943 movb (%esi,%ebp,1),%dl 944 leal 1(%ebp),%ebp 945 xorb %dl,%al 946 movb %al,-1(%edi,%ebp,1) 947 decl %ecx 948 jnz .L015tail_loop 949.L011done: 950 movl 512(%esp),%esp 951 popl %edi 952 popl %esi 953 popl %ebx 954 popl %ebp 955 ret 956.size _ChaCha20_ssse3,.-_ChaCha20_ssse3 957.align 64 958.Lssse3_data: 959.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 960.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 961.long 1634760805,857760878,2036477234,1797285236 962.long 0,1,2,3 963.long 4,4,4,4 964.long 1,0,0,0 965.long 4,0,0,0 966.long 0,-1,-1,-1 967.align 64 968.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 969.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 970.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 971.byte 114,103,62,0 972#endif 973.section .note.GNU-stack,"",@progbits 974