1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) 7.text 8.globl ChaCha20_ctr32 9.hidden ChaCha20_ctr32 10.type ChaCha20_ctr32,@function 11.align 16 12ChaCha20_ctr32: 13.L_ChaCha20_ctr32_begin: 14 pushl %ebp 15 pushl %ebx 16 pushl %esi 17 pushl %edi 18 xorl %eax,%eax 19 cmpl 28(%esp),%eax 20 je .L000no_data 21 call .Lpic_point 22.Lpic_point: 23 popl %eax 24 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 25 testl $16777216,(%ebp) 26 jz .L001x86 27 testl $512,4(%ebp) 28 jz .L001x86 29 jmp .Lssse3_shortcut 30.L001x86: 31 movl 32(%esp),%esi 32 movl 36(%esp),%edi 33 subl $132,%esp 34 movl (%esi),%eax 35 movl 4(%esi),%ebx 36 movl 8(%esi),%ecx 37 movl 12(%esi),%edx 38 movl %eax,80(%esp) 39 movl %ebx,84(%esp) 40 movl %ecx,88(%esp) 41 movl %edx,92(%esp) 42 movl 16(%esi),%eax 43 movl 20(%esi),%ebx 44 movl 24(%esi),%ecx 45 movl 28(%esi),%edx 46 movl %eax,96(%esp) 47 movl %ebx,100(%esp) 48 movl %ecx,104(%esp) 49 movl %edx,108(%esp) 50 movl (%edi),%eax 51 movl 4(%edi),%ebx 52 movl 8(%edi),%ecx 53 movl 12(%edi),%edx 54 subl $1,%eax 55 movl %eax,112(%esp) 56 movl %ebx,116(%esp) 57 movl %ecx,120(%esp) 58 movl %edx,124(%esp) 59 jmp .L002entry 60.align 16 61.L003outer_loop: 62 movl %ebx,156(%esp) 63 movl %eax,152(%esp) 64 movl %ecx,160(%esp) 65.L002entry: 66 movl $1634760805,%eax 67 movl $857760878,4(%esp) 68 movl $2036477234,8(%esp) 69 movl $1797285236,12(%esp) 70 movl 84(%esp),%ebx 71 movl 88(%esp),%ebp 72 movl 104(%esp),%ecx 73 movl 108(%esp),%esi 74 movl 116(%esp),%edx 75 movl 120(%esp),%edi 76 movl %ebx,20(%esp) 77 movl %ebp,24(%esp) 78 movl %ecx,40(%esp) 79 movl %esi,44(%esp) 80 movl %edx,52(%esp) 81 movl %edi,56(%esp) 82 movl 92(%esp),%ebx 83 movl 124(%esp),%edi 84 movl 112(%esp),%edx 85 movl 80(%esp),%ebp 86 movl 96(%esp),%ecx 87 movl 100(%esp),%esi 88 addl $1,%edx 89 movl %ebx,28(%esp) 90 movl %edi,60(%esp) 91 movl %edx,112(%esp) 92 movl $10,%ebx 93 jmp .L004loop 94.align 16 95.L004loop: 96 addl %ebp,%eax 97 movl %ebx,128(%esp) 98 movl %ebp,%ebx 99 xorl %eax,%edx 100 roll $16,%edx 101 addl %edx,%ecx 102 xorl %ecx,%ebx 103 movl 52(%esp),%edi 104 roll $12,%ebx 105 movl 20(%esp),%ebp 106 addl %ebx,%eax 107 xorl %eax,%edx 108 movl %eax,(%esp) 109 roll $8,%edx 110 movl 4(%esp),%eax 111 addl %edx,%ecx 112 movl %edx,48(%esp) 113 xorl %ecx,%ebx 114 addl %ebp,%eax 115 roll $7,%ebx 116 xorl %eax,%edi 117 movl %ecx,32(%esp) 118 roll $16,%edi 119 movl %ebx,16(%esp) 120 addl %edi,%esi 121 movl 40(%esp),%ecx 122 xorl %esi,%ebp 123 movl 56(%esp),%edx 124 roll $12,%ebp 125 movl 24(%esp),%ebx 126 addl %ebp,%eax 127 xorl %eax,%edi 128 movl %eax,4(%esp) 129 roll $8,%edi 130 movl 8(%esp),%eax 131 addl %edi,%esi 132 movl %edi,52(%esp) 133 xorl %esi,%ebp 134 addl %ebx,%eax 135 roll $7,%ebp 136 xorl %eax,%edx 137 movl %esi,36(%esp) 138 roll $16,%edx 139 movl %ebp,20(%esp) 140 addl %edx,%ecx 141 movl 44(%esp),%esi 142 xorl %ecx,%ebx 143 movl 60(%esp),%edi 144 roll $12,%ebx 145 movl 28(%esp),%ebp 146 addl %ebx,%eax 147 xorl %eax,%edx 148 movl %eax,8(%esp) 149 roll $8,%edx 150 movl 12(%esp),%eax 151 addl %edx,%ecx 152 movl %edx,56(%esp) 153 xorl %ecx,%ebx 154 addl %ebp,%eax 155 roll $7,%ebx 156 xorl %eax,%edi 157 roll $16,%edi 158 movl %ebx,24(%esp) 159 addl %edi,%esi 160 xorl %esi,%ebp 161 roll $12,%ebp 162 movl 20(%esp),%ebx 163 addl %ebp,%eax 164 xorl %eax,%edi 165 movl %eax,12(%esp) 166 roll $8,%edi 167 movl (%esp),%eax 168 addl %edi,%esi 169 movl %edi,%edx 170 xorl %esi,%ebp 171 addl %ebx,%eax 172 roll $7,%ebp 173 xorl %eax,%edx 174 roll $16,%edx 175 movl %ebp,28(%esp) 176 addl %edx,%ecx 177 xorl %ecx,%ebx 178 movl 48(%esp),%edi 179 roll $12,%ebx 180 movl 24(%esp),%ebp 181 addl %ebx,%eax 182 xorl %eax,%edx 183 movl %eax,(%esp) 184 roll $8,%edx 185 movl 4(%esp),%eax 186 addl %edx,%ecx 187 movl %edx,60(%esp) 188 xorl %ecx,%ebx 189 addl %ebp,%eax 190 roll $7,%ebx 191 xorl %eax,%edi 192 movl %ecx,40(%esp) 193 roll $16,%edi 194 movl %ebx,20(%esp) 195 addl %edi,%esi 196 movl 32(%esp),%ecx 197 xorl %esi,%ebp 198 movl 52(%esp),%edx 199 roll $12,%ebp 200 movl 28(%esp),%ebx 201 addl %ebp,%eax 202 xorl %eax,%edi 203 movl %eax,4(%esp) 204 roll $8,%edi 205 movl 8(%esp),%eax 206 addl %edi,%esi 207 movl %edi,48(%esp) 208 xorl %esi,%ebp 209 addl %ebx,%eax 210 roll $7,%ebp 211 xorl %eax,%edx 212 movl %esi,44(%esp) 213 roll $16,%edx 214 movl %ebp,24(%esp) 215 addl %edx,%ecx 216 movl 36(%esp),%esi 217 xorl %ecx,%ebx 218 movl 56(%esp),%edi 219 roll $12,%ebx 220 movl 16(%esp),%ebp 221 addl %ebx,%eax 222 xorl %eax,%edx 223 movl %eax,8(%esp) 224 roll $8,%edx 225 movl 12(%esp),%eax 226 addl %edx,%ecx 227 movl %edx,52(%esp) 228 xorl %ecx,%ebx 229 addl %ebp,%eax 230 roll $7,%ebx 231 xorl %eax,%edi 232 roll $16,%edi 233 movl %ebx,28(%esp) 234 addl %edi,%esi 235 xorl %esi,%ebp 236 movl 48(%esp),%edx 237 roll $12,%ebp 238 movl 128(%esp),%ebx 239 addl %ebp,%eax 240 xorl %eax,%edi 241 movl %eax,12(%esp) 242 roll $8,%edi 243 movl (%esp),%eax 244 addl %edi,%esi 245 movl %edi,56(%esp) 246 xorl %esi,%ebp 247 roll $7,%ebp 248 decl %ebx 249 jnz .L004loop 250 movl 160(%esp),%ebx 251 addl $1634760805,%eax 252 addl 80(%esp),%ebp 253 addl 96(%esp),%ecx 254 addl 100(%esp),%esi 255 cmpl $64,%ebx 256 jb .L005tail 257 movl 156(%esp),%ebx 258 addl 112(%esp),%edx 259 addl 120(%esp),%edi 260 xorl (%ebx),%eax 261 xorl 16(%ebx),%ebp 262 movl %eax,(%esp) 263 movl 152(%esp),%eax 264 xorl 32(%ebx),%ecx 265 xorl 36(%ebx),%esi 266 xorl 48(%ebx),%edx 267 xorl 56(%ebx),%edi 268 movl %ebp,16(%eax) 269 movl %ecx,32(%eax) 270 movl %esi,36(%eax) 271 movl %edx,48(%eax) 272 movl %edi,56(%eax) 273 movl 4(%esp),%ebp 274 movl 8(%esp),%ecx 275 movl 12(%esp),%esi 276 movl 20(%esp),%edx 277 movl 24(%esp),%edi 278 addl $857760878,%ebp 279 addl $2036477234,%ecx 280 addl $1797285236,%esi 281 addl 84(%esp),%edx 282 addl 88(%esp),%edi 283 xorl 4(%ebx),%ebp 284 xorl 8(%ebx),%ecx 285 xorl 12(%ebx),%esi 286 xorl 20(%ebx),%edx 287 xorl 24(%ebx),%edi 288 movl %ebp,4(%eax) 289 movl %ecx,8(%eax) 290 movl %esi,12(%eax) 291 movl %edx,20(%eax) 292 movl %edi,24(%eax) 293 movl 28(%esp),%ebp 294 movl 40(%esp),%ecx 295 movl 44(%esp),%esi 296 movl 52(%esp),%edx 297 movl 60(%esp),%edi 298 addl 92(%esp),%ebp 299 addl 104(%esp),%ecx 300 addl 108(%esp),%esi 301 addl 116(%esp),%edx 302 addl 124(%esp),%edi 303 xorl 28(%ebx),%ebp 304 xorl 40(%ebx),%ecx 305 xorl 44(%ebx),%esi 306 xorl 52(%ebx),%edx 307 xorl 60(%ebx),%edi 308 leal 64(%ebx),%ebx 309 movl %ebp,28(%eax) 310 movl (%esp),%ebp 311 movl %ecx,40(%eax) 312 movl 160(%esp),%ecx 313 movl %esi,44(%eax) 314 movl %edx,52(%eax) 315 movl %edi,60(%eax) 316 movl %ebp,(%eax) 317 leal 64(%eax),%eax 318 subl $64,%ecx 319 jnz .L003outer_loop 320 jmp .L006done 321.L005tail: 322 addl 112(%esp),%edx 323 addl 120(%esp),%edi 324 movl %eax,(%esp) 325 movl %ebp,16(%esp) 326 movl %ecx,32(%esp) 327 movl %esi,36(%esp) 328 movl %edx,48(%esp) 329 movl %edi,56(%esp) 330 movl 4(%esp),%ebp 331 movl 8(%esp),%ecx 332 movl 12(%esp),%esi 333 movl 20(%esp),%edx 334 movl 24(%esp),%edi 335 addl $857760878,%ebp 336 addl $2036477234,%ecx 337 addl $1797285236,%esi 338 addl 84(%esp),%edx 339 addl 88(%esp),%edi 340 movl %ebp,4(%esp) 341 movl %ecx,8(%esp) 342 movl %esi,12(%esp) 343 movl %edx,20(%esp) 344 movl %edi,24(%esp) 345 movl 28(%esp),%ebp 346 movl 40(%esp),%ecx 347 movl 44(%esp),%esi 348 movl 52(%esp),%edx 349 movl 60(%esp),%edi 350 addl 92(%esp),%ebp 351 addl 104(%esp),%ecx 352 addl 108(%esp),%esi 353 addl 116(%esp),%edx 354 addl 124(%esp),%edi 355 movl %ebp,28(%esp) 356 movl 156(%esp),%ebp 357 movl %ecx,40(%esp) 358 movl 152(%esp),%ecx 359 movl %esi,44(%esp) 360 xorl %esi,%esi 361 movl %edx,52(%esp) 362 movl %edi,60(%esp) 363 xorl %eax,%eax 364 xorl %edx,%edx 365.L007tail_loop: 366 movb (%esi,%ebp,1),%al 367 movb (%esp,%esi,1),%dl 368 leal 1(%esi),%esi 369 xorb %dl,%al 370 movb %al,-1(%ecx,%esi,1) 371 decl %ebx 372 jnz .L007tail_loop 373.L006done: 374 addl $132,%esp 375.L000no_data: 376 popl %edi 377 popl %esi 378 popl %ebx 379 popl %ebp 380 ret 381.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 382.globl ChaCha20_ssse3 383.hidden ChaCha20_ssse3 384.type ChaCha20_ssse3,@function 385.align 16 386ChaCha20_ssse3: 387.L_ChaCha20_ssse3_begin: 388 pushl %ebp 389 pushl %ebx 390 pushl %esi 391 pushl %edi 392.Lssse3_shortcut: 393 movl 20(%esp),%edi 394 movl 24(%esp),%esi 395 movl 28(%esp),%ecx 396 movl 32(%esp),%edx 397 movl 36(%esp),%ebx 398 movl %esp,%ebp 399 subl $524,%esp 400 andl $-64,%esp 401 movl %ebp,512(%esp) 402 leal .Lssse3_data-.Lpic_point(%eax),%eax 403 movdqu (%ebx),%xmm3 404 cmpl $256,%ecx 405 jb .L0081x 406 movl %edx,516(%esp) 407 movl %ebx,520(%esp) 408 subl $256,%ecx 409 leal 384(%esp),%ebp 410 movdqu (%edx),%xmm7 411 pshufd $0,%xmm3,%xmm0 412 pshufd $85,%xmm3,%xmm1 413 pshufd $170,%xmm3,%xmm2 414 pshufd $255,%xmm3,%xmm3 415 paddd 48(%eax),%xmm0 416 pshufd $0,%xmm7,%xmm4 417 pshufd $85,%xmm7,%xmm5 418 psubd 64(%eax),%xmm0 419 pshufd $170,%xmm7,%xmm6 420 pshufd $255,%xmm7,%xmm7 421 movdqa %xmm0,64(%ebp) 422 movdqa %xmm1,80(%ebp) 423 movdqa %xmm2,96(%ebp) 424 movdqa %xmm3,112(%ebp) 425 movdqu 16(%edx),%xmm3 426 movdqa %xmm4,-64(%ebp) 427 movdqa %xmm5,-48(%ebp) 428 movdqa %xmm6,-32(%ebp) 429 movdqa %xmm7,-16(%ebp) 430 movdqa 32(%eax),%xmm7 431 leal 128(%esp),%ebx 432 pshufd $0,%xmm3,%xmm0 433 pshufd $85,%xmm3,%xmm1 434 pshufd $170,%xmm3,%xmm2 435 pshufd $255,%xmm3,%xmm3 436 pshufd $0,%xmm7,%xmm4 437 pshufd $85,%xmm7,%xmm5 438 pshufd $170,%xmm7,%xmm6 439 pshufd $255,%xmm7,%xmm7 440 movdqa %xmm0,(%ebp) 441 movdqa %xmm1,16(%ebp) 442 movdqa %xmm2,32(%ebp) 443 movdqa %xmm3,48(%ebp) 444 movdqa %xmm4,-128(%ebp) 445 movdqa %xmm5,-112(%ebp) 446 movdqa %xmm6,-96(%ebp) 447 movdqa %xmm7,-80(%ebp) 448 leal 128(%esi),%esi 449 leal 128(%edi),%edi 450 jmp .L009outer_loop 451.align 16 452.L009outer_loop: 453 movdqa -112(%ebp),%xmm1 454 movdqa -96(%ebp),%xmm2 455 movdqa -80(%ebp),%xmm3 456 movdqa -48(%ebp),%xmm5 457 movdqa -32(%ebp),%xmm6 458 movdqa -16(%ebp),%xmm7 459 movdqa %xmm1,-112(%ebx) 460 movdqa %xmm2,-96(%ebx) 461 movdqa %xmm3,-80(%ebx) 462 movdqa %xmm5,-48(%ebx) 463 movdqa %xmm6,-32(%ebx) 464 movdqa %xmm7,-16(%ebx) 465 movdqa 32(%ebp),%xmm2 466 movdqa 48(%ebp),%xmm3 467 movdqa 64(%ebp),%xmm4 468 movdqa 80(%ebp),%xmm5 469 movdqa 96(%ebp),%xmm6 470 movdqa 112(%ebp),%xmm7 471 paddd 64(%eax),%xmm4 472 movdqa %xmm2,32(%ebx) 473 movdqa %xmm3,48(%ebx) 474 movdqa %xmm4,64(%ebx) 475 movdqa %xmm5,80(%ebx) 476 movdqa %xmm6,96(%ebx) 477 movdqa %xmm7,112(%ebx) 478 movdqa %xmm4,64(%ebp) 479 movdqa -128(%ebp),%xmm0 480 movdqa %xmm4,%xmm6 481 movdqa -64(%ebp),%xmm3 482 movdqa (%ebp),%xmm4 483 movdqa 16(%ebp),%xmm5 484 movl $10,%edx 485 nop 486.align 16 487.L010loop: 488 paddd %xmm3,%xmm0 489 movdqa %xmm3,%xmm2 490 pxor %xmm0,%xmm6 491 pshufb (%eax),%xmm6 492 paddd %xmm6,%xmm4 493 pxor %xmm4,%xmm2 494 movdqa -48(%ebx),%xmm3 495 movdqa %xmm2,%xmm1 496 pslld $12,%xmm2 497 psrld $20,%xmm1 498 por %xmm1,%xmm2 499 movdqa -112(%ebx),%xmm1 500 paddd %xmm2,%xmm0 501 movdqa 80(%ebx),%xmm7 502 pxor %xmm0,%xmm6 503 movdqa %xmm0,-128(%ebx) 504 pshufb 16(%eax),%xmm6 505 paddd %xmm6,%xmm4 506 movdqa %xmm6,64(%ebx) 507 pxor %xmm4,%xmm2 508 paddd %xmm3,%xmm1 509 movdqa %xmm2,%xmm0 510 pslld $7,%xmm2 511 psrld $25,%xmm0 512 pxor %xmm1,%xmm7 513 por %xmm0,%xmm2 514 movdqa %xmm4,(%ebx) 515 pshufb (%eax),%xmm7 516 movdqa %xmm2,-64(%ebx) 517 paddd %xmm7,%xmm5 518 movdqa 32(%ebx),%xmm4 519 pxor %xmm5,%xmm3 520 movdqa -32(%ebx),%xmm2 521 movdqa %xmm3,%xmm0 522 pslld $12,%xmm3 523 psrld $20,%xmm0 524 por %xmm0,%xmm3 525 movdqa -96(%ebx),%xmm0 526 paddd %xmm3,%xmm1 527 movdqa 96(%ebx),%xmm6 528 pxor %xmm1,%xmm7 529 movdqa %xmm1,-112(%ebx) 530 pshufb 16(%eax),%xmm7 531 paddd %xmm7,%xmm5 532 movdqa %xmm7,80(%ebx) 533 pxor %xmm5,%xmm3 534 paddd %xmm2,%xmm0 535 movdqa %xmm3,%xmm1 536 pslld $7,%xmm3 537 psrld $25,%xmm1 538 pxor %xmm0,%xmm6 539 por %xmm1,%xmm3 540 movdqa %xmm5,16(%ebx) 541 pshufb (%eax),%xmm6 542 movdqa %xmm3,-48(%ebx) 543 paddd %xmm6,%xmm4 544 movdqa 48(%ebx),%xmm5 545 pxor %xmm4,%xmm2 546 movdqa -16(%ebx),%xmm3 547 movdqa %xmm2,%xmm1 548 pslld $12,%xmm2 549 psrld $20,%xmm1 550 por %xmm1,%xmm2 551 movdqa -80(%ebx),%xmm1 552 paddd %xmm2,%xmm0 553 movdqa 112(%ebx),%xmm7 554 pxor %xmm0,%xmm6 555 movdqa %xmm0,-96(%ebx) 556 pshufb 16(%eax),%xmm6 557 paddd %xmm6,%xmm4 558 movdqa %xmm6,96(%ebx) 559 pxor %xmm4,%xmm2 560 paddd %xmm3,%xmm1 561 movdqa %xmm2,%xmm0 562 pslld $7,%xmm2 563 psrld $25,%xmm0 564 pxor %xmm1,%xmm7 565 por %xmm0,%xmm2 566 pshufb (%eax),%xmm7 567 movdqa %xmm2,-32(%ebx) 568 paddd %xmm7,%xmm5 569 pxor %xmm5,%xmm3 570 movdqa -48(%ebx),%xmm2 571 movdqa %xmm3,%xmm0 572 pslld $12,%xmm3 573 psrld $20,%xmm0 574 por %xmm0,%xmm3 575 movdqa -128(%ebx),%xmm0 576 paddd %xmm3,%xmm1 577 pxor %xmm1,%xmm7 578 movdqa %xmm1,-80(%ebx) 579 pshufb 16(%eax),%xmm7 580 paddd %xmm7,%xmm5 581 movdqa %xmm7,%xmm6 582 pxor %xmm5,%xmm3 583 paddd %xmm2,%xmm0 584 movdqa %xmm3,%xmm1 585 pslld $7,%xmm3 586 psrld $25,%xmm1 587 pxor %xmm0,%xmm6 588 por %xmm1,%xmm3 589 pshufb (%eax),%xmm6 590 movdqa %xmm3,-16(%ebx) 591 paddd %xmm6,%xmm4 592 pxor %xmm4,%xmm2 593 movdqa -32(%ebx),%xmm3 594 movdqa %xmm2,%xmm1 595 pslld $12,%xmm2 596 psrld $20,%xmm1 597 por %xmm1,%xmm2 598 movdqa -112(%ebx),%xmm1 599 paddd %xmm2,%xmm0 600 movdqa 64(%ebx),%xmm7 601 pxor %xmm0,%xmm6 602 movdqa %xmm0,-128(%ebx) 603 pshufb 16(%eax),%xmm6 604 paddd %xmm6,%xmm4 605 movdqa %xmm6,112(%ebx) 606 pxor %xmm4,%xmm2 607 paddd %xmm3,%xmm1 608 movdqa %xmm2,%xmm0 609 pslld $7,%xmm2 610 psrld $25,%xmm0 611 pxor %xmm1,%xmm7 612 por %xmm0,%xmm2 613 movdqa %xmm4,32(%ebx) 614 pshufb (%eax),%xmm7 615 movdqa %xmm2,-48(%ebx) 616 paddd %xmm7,%xmm5 617 movdqa (%ebx),%xmm4 618 pxor %xmm5,%xmm3 619 movdqa -16(%ebx),%xmm2 620 movdqa %xmm3,%xmm0 621 pslld $12,%xmm3 622 psrld $20,%xmm0 623 por %xmm0,%xmm3 624 movdqa -96(%ebx),%xmm0 625 paddd %xmm3,%xmm1 626 movdqa 80(%ebx),%xmm6 627 pxor %xmm1,%xmm7 628 movdqa %xmm1,-112(%ebx) 629 pshufb 16(%eax),%xmm7 630 paddd %xmm7,%xmm5 631 movdqa %xmm7,64(%ebx) 632 pxor %xmm5,%xmm3 633 paddd %xmm2,%xmm0 634 movdqa %xmm3,%xmm1 635 pslld $7,%xmm3 636 psrld $25,%xmm1 637 pxor %xmm0,%xmm6 638 por %xmm1,%xmm3 639 movdqa %xmm5,48(%ebx) 640 pshufb (%eax),%xmm6 641 movdqa %xmm3,-32(%ebx) 642 paddd %xmm6,%xmm4 643 movdqa 16(%ebx),%xmm5 644 pxor %xmm4,%xmm2 645 movdqa -64(%ebx),%xmm3 646 movdqa %xmm2,%xmm1 647 pslld $12,%xmm2 648 psrld $20,%xmm1 649 por %xmm1,%xmm2 650 movdqa -80(%ebx),%xmm1 651 paddd %xmm2,%xmm0 652 movdqa 96(%ebx),%xmm7 653 pxor %xmm0,%xmm6 654 movdqa %xmm0,-96(%ebx) 655 pshufb 16(%eax),%xmm6 656 paddd %xmm6,%xmm4 657 movdqa %xmm6,80(%ebx) 658 pxor %xmm4,%xmm2 659 paddd %xmm3,%xmm1 660 movdqa %xmm2,%xmm0 661 pslld $7,%xmm2 662 psrld $25,%xmm0 663 pxor %xmm1,%xmm7 664 por %xmm0,%xmm2 665 pshufb (%eax),%xmm7 666 movdqa %xmm2,-16(%ebx) 667 paddd %xmm7,%xmm5 668 pxor %xmm5,%xmm3 669 movdqa %xmm3,%xmm0 670 pslld $12,%xmm3 671 psrld $20,%xmm0 672 por %xmm0,%xmm3 673 movdqa -128(%ebx),%xmm0 674 paddd %xmm3,%xmm1 675 movdqa 64(%ebx),%xmm6 676 pxor %xmm1,%xmm7 677 movdqa %xmm1,-80(%ebx) 678 pshufb 16(%eax),%xmm7 679 paddd %xmm7,%xmm5 680 movdqa %xmm7,96(%ebx) 681 pxor %xmm5,%xmm3 682 movdqa %xmm3,%xmm1 683 pslld $7,%xmm3 684 psrld $25,%xmm1 685 por %xmm1,%xmm3 686 decl %edx 687 jnz .L010loop 688 movdqa %xmm3,-64(%ebx) 689 movdqa %xmm4,(%ebx) 690 movdqa %xmm5,16(%ebx) 691 movdqa %xmm6,64(%ebx) 692 movdqa %xmm7,96(%ebx) 693 movdqa -112(%ebx),%xmm1 694 movdqa -96(%ebx),%xmm2 695 movdqa -80(%ebx),%xmm3 696 paddd -128(%ebp),%xmm0 697 paddd -112(%ebp),%xmm1 698 paddd -96(%ebp),%xmm2 699 paddd -80(%ebp),%xmm3 700 movdqa %xmm0,%xmm6 701 punpckldq %xmm1,%xmm0 702 movdqa %xmm2,%xmm7 703 punpckldq %xmm3,%xmm2 704 punpckhdq %xmm1,%xmm6 705 punpckhdq %xmm3,%xmm7 706 movdqa %xmm0,%xmm1 707 punpcklqdq %xmm2,%xmm0 708 movdqa %xmm6,%xmm3 709 punpcklqdq %xmm7,%xmm6 710 punpckhqdq %xmm2,%xmm1 711 punpckhqdq %xmm7,%xmm3 712 movdqu -128(%esi),%xmm4 713 movdqu -64(%esi),%xmm5 714 movdqu (%esi),%xmm2 715 movdqu 64(%esi),%xmm7 716 leal 16(%esi),%esi 717 pxor %xmm0,%xmm4 718 movdqa -64(%ebx),%xmm0 719 pxor %xmm1,%xmm5 720 movdqa -48(%ebx),%xmm1 721 pxor %xmm2,%xmm6 722 movdqa -32(%ebx),%xmm2 723 pxor %xmm3,%xmm7 724 movdqa -16(%ebx),%xmm3 725 movdqu %xmm4,-128(%edi) 726 movdqu %xmm5,-64(%edi) 727 movdqu %xmm6,(%edi) 728 movdqu %xmm7,64(%edi) 729 leal 16(%edi),%edi 730 paddd -64(%ebp),%xmm0 731 paddd -48(%ebp),%xmm1 732 paddd -32(%ebp),%xmm2 733 paddd -16(%ebp),%xmm3 734 movdqa %xmm0,%xmm6 735 punpckldq %xmm1,%xmm0 736 movdqa %xmm2,%xmm7 737 punpckldq %xmm3,%xmm2 738 punpckhdq %xmm1,%xmm6 739 punpckhdq %xmm3,%xmm7 740 movdqa %xmm0,%xmm1 741 punpcklqdq %xmm2,%xmm0 742 movdqa %xmm6,%xmm3 743 punpcklqdq %xmm7,%xmm6 744 punpckhqdq %xmm2,%xmm1 745 punpckhqdq %xmm7,%xmm3 746 movdqu -128(%esi),%xmm4 747 movdqu -64(%esi),%xmm5 748 movdqu (%esi),%xmm2 749 movdqu 64(%esi),%xmm7 750 leal 16(%esi),%esi 751 pxor %xmm0,%xmm4 752 movdqa (%ebx),%xmm0 753 pxor %xmm1,%xmm5 754 movdqa 16(%ebx),%xmm1 755 pxor %xmm2,%xmm6 756 movdqa 32(%ebx),%xmm2 757 pxor %xmm3,%xmm7 758 movdqa 48(%ebx),%xmm3 759 movdqu %xmm4,-128(%edi) 760 movdqu %xmm5,-64(%edi) 761 movdqu %xmm6,(%edi) 762 movdqu %xmm7,64(%edi) 763 leal 16(%edi),%edi 764 paddd (%ebp),%xmm0 765 paddd 16(%ebp),%xmm1 766 paddd 32(%ebp),%xmm2 767 paddd 48(%ebp),%xmm3 768 movdqa %xmm0,%xmm6 769 punpckldq %xmm1,%xmm0 770 movdqa %xmm2,%xmm7 771 punpckldq %xmm3,%xmm2 772 punpckhdq %xmm1,%xmm6 773 punpckhdq %xmm3,%xmm7 774 movdqa %xmm0,%xmm1 775 punpcklqdq %xmm2,%xmm0 776 movdqa %xmm6,%xmm3 777 punpcklqdq %xmm7,%xmm6 778 punpckhqdq %xmm2,%xmm1 779 punpckhqdq %xmm7,%xmm3 780 movdqu -128(%esi),%xmm4 781 movdqu -64(%esi),%xmm5 782 movdqu (%esi),%xmm2 783 movdqu 64(%esi),%xmm7 784 leal 16(%esi),%esi 785 pxor %xmm0,%xmm4 786 movdqa 64(%ebx),%xmm0 787 pxor %xmm1,%xmm5 788 movdqa 80(%ebx),%xmm1 789 pxor %xmm2,%xmm6 790 movdqa 96(%ebx),%xmm2 791 pxor %xmm3,%xmm7 792 movdqa 112(%ebx),%xmm3 793 movdqu %xmm4,-128(%edi) 794 movdqu %xmm5,-64(%edi) 795 movdqu %xmm6,(%edi) 796 movdqu %xmm7,64(%edi) 797 leal 16(%edi),%edi 798 paddd 64(%ebp),%xmm0 799 paddd 80(%ebp),%xmm1 800 paddd 96(%ebp),%xmm2 801 paddd 112(%ebp),%xmm3 802 movdqa %xmm0,%xmm6 803 punpckldq %xmm1,%xmm0 804 movdqa %xmm2,%xmm7 805 punpckldq %xmm3,%xmm2 806 punpckhdq %xmm1,%xmm6 807 punpckhdq %xmm3,%xmm7 808 movdqa %xmm0,%xmm1 809 punpcklqdq %xmm2,%xmm0 810 movdqa %xmm6,%xmm3 811 punpcklqdq %xmm7,%xmm6 812 punpckhqdq %xmm2,%xmm1 813 punpckhqdq %xmm7,%xmm3 814 movdqu -128(%esi),%xmm4 815 movdqu -64(%esi),%xmm5 816 movdqu (%esi),%xmm2 817 movdqu 64(%esi),%xmm7 818 leal 208(%esi),%esi 819 pxor %xmm0,%xmm4 820 pxor %xmm1,%xmm5 821 pxor %xmm2,%xmm6 822 pxor %xmm3,%xmm7 823 movdqu %xmm4,-128(%edi) 824 movdqu %xmm5,-64(%edi) 825 movdqu %xmm6,(%edi) 826 movdqu %xmm7,64(%edi) 827 leal 208(%edi),%edi 828 subl $256,%ecx 829 jnc .L009outer_loop 830 addl $256,%ecx 831 jz .L011done 832 movl 520(%esp),%ebx 833 leal -128(%esi),%esi 834 movl 516(%esp),%edx 835 leal -128(%edi),%edi 836 movd 64(%ebp),%xmm2 837 movdqu (%ebx),%xmm3 838 paddd 96(%eax),%xmm2 839 pand 112(%eax),%xmm3 840 por %xmm2,%xmm3 841.L0081x: 842 movdqa 32(%eax),%xmm0 843 movdqu (%edx),%xmm1 844 movdqu 16(%edx),%xmm2 845 movdqa (%eax),%xmm6 846 movdqa 16(%eax),%xmm7 847 movl %ebp,48(%esp) 848 movdqa %xmm0,(%esp) 849 movdqa %xmm1,16(%esp) 850 movdqa %xmm2,32(%esp) 851 movdqa %xmm3,48(%esp) 852 movl $10,%edx 853 jmp .L012loop1x 854.align 16 855.L013outer1x: 856 movdqa 80(%eax),%xmm3 857 movdqa (%esp),%xmm0 858 movdqa 16(%esp),%xmm1 859 movdqa 32(%esp),%xmm2 860 paddd 48(%esp),%xmm3 861 movl $10,%edx 862 movdqa %xmm3,48(%esp) 863 jmp .L012loop1x 864.align 16 865.L012loop1x: 866 paddd %xmm1,%xmm0 867 pxor %xmm0,%xmm3 868.byte 102,15,56,0,222 869 paddd %xmm3,%xmm2 870 pxor %xmm2,%xmm1 871 movdqa %xmm1,%xmm4 872 psrld $20,%xmm1 873 pslld $12,%xmm4 874 por %xmm4,%xmm1 875 paddd %xmm1,%xmm0 876 pxor %xmm0,%xmm3 877.byte 102,15,56,0,223 878 paddd %xmm3,%xmm2 879 pxor %xmm2,%xmm1 880 movdqa %xmm1,%xmm4 881 psrld $25,%xmm1 882 pslld $7,%xmm4 883 por %xmm4,%xmm1 884 pshufd $78,%xmm2,%xmm2 885 pshufd $57,%xmm1,%xmm1 886 pshufd $147,%xmm3,%xmm3 887 nop 888 paddd %xmm1,%xmm0 889 pxor %xmm0,%xmm3 890.byte 102,15,56,0,222 891 paddd %xmm3,%xmm2 892 pxor %xmm2,%xmm1 893 movdqa %xmm1,%xmm4 894 psrld $20,%xmm1 895 pslld $12,%xmm4 896 por %xmm4,%xmm1 897 paddd %xmm1,%xmm0 898 pxor %xmm0,%xmm3 899.byte 102,15,56,0,223 900 paddd %xmm3,%xmm2 901 pxor %xmm2,%xmm1 902 movdqa %xmm1,%xmm4 903 psrld $25,%xmm1 904 pslld $7,%xmm4 905 por %xmm4,%xmm1 906 pshufd $78,%xmm2,%xmm2 907 pshufd $147,%xmm1,%xmm1 908 pshufd $57,%xmm3,%xmm3 909 decl %edx 910 jnz .L012loop1x 911 paddd (%esp),%xmm0 912 paddd 16(%esp),%xmm1 913 paddd 32(%esp),%xmm2 914 paddd 48(%esp),%xmm3 915 cmpl $64,%ecx 916 jb .L014tail 917 movdqu (%esi),%xmm4 918 movdqu 16(%esi),%xmm5 919 pxor %xmm4,%xmm0 920 movdqu 32(%esi),%xmm4 921 pxor %xmm5,%xmm1 922 movdqu 48(%esi),%xmm5 923 pxor %xmm4,%xmm2 924 pxor %xmm5,%xmm3 925 leal 64(%esi),%esi 926 movdqu %xmm0,(%edi) 927 movdqu %xmm1,16(%edi) 928 movdqu %xmm2,32(%edi) 929 movdqu %xmm3,48(%edi) 930 leal 64(%edi),%edi 931 subl $64,%ecx 932 jnz .L013outer1x 933 jmp .L011done 934.L014tail: 935 movdqa %xmm0,(%esp) 936 movdqa %xmm1,16(%esp) 937 movdqa %xmm2,32(%esp) 938 movdqa %xmm3,48(%esp) 939 xorl %eax,%eax 940 xorl %edx,%edx 941 xorl %ebp,%ebp 942.L015tail_loop: 943 movb (%esp,%ebp,1),%al 944 movb (%esi,%ebp,1),%dl 945 leal 1(%ebp),%ebp 946 xorb %dl,%al 947 movb %al,-1(%edi,%ebp,1) 948 decl %ecx 949 jnz .L015tail_loop 950.L011done: 951 movl 512(%esp),%esp 952 popl %edi 953 popl %esi 954 popl %ebx 955 popl %ebp 956 ret 957.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 958.align 64 959.Lssse3_data: 960.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 961.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 962.long 1634760805,857760878,2036477234,1797285236 963.long 0,1,2,3 964.long 4,4,4,4 965.long 1,0,0,0 966.long 4,0,0,0 967.long 0,-1,-1,-1 968.align 64 969.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 970.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 971.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 972.byte 114,103,62,0 973#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) 974