1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__i386__) 5#include "ring_core_generated/prefix_symbols_asm.h" 6.text 7.globl ChaCha20_ctr32 8.hidden ChaCha20_ctr32 9.type ChaCha20_ctr32,@function 10.align 16 11ChaCha20_ctr32: 12.L_ChaCha20_ctr32_begin: 13 pushl %ebp 14 pushl %ebx 15 pushl %esi 16 pushl %edi 17 xorl %eax,%eax 18 cmpl 28(%esp),%eax 19 je .L000no_data 20 call .Lpic_point 21.Lpic_point: 22 popl %eax 23 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 24 testl $16777216,(%ebp) 25 jz .L001x86 26 testl $512,4(%ebp) 27 jz .L001x86 28 jmp .Lssse3_shortcut 29.L001x86: 30 movl 32(%esp),%esi 31 movl 36(%esp),%edi 32 subl $132,%esp 33 movl (%esi),%eax 34 movl 4(%esi),%ebx 35 movl 8(%esi),%ecx 36 movl 12(%esi),%edx 37 movl %eax,80(%esp) 38 movl %ebx,84(%esp) 39 movl %ecx,88(%esp) 40 movl %edx,92(%esp) 41 movl 16(%esi),%eax 42 movl 20(%esi),%ebx 43 movl 24(%esi),%ecx 44 movl 28(%esi),%edx 45 movl %eax,96(%esp) 46 movl %ebx,100(%esp) 47 movl %ecx,104(%esp) 48 movl %edx,108(%esp) 49 movl (%edi),%eax 50 movl 4(%edi),%ebx 51 movl 8(%edi),%ecx 52 movl 12(%edi),%edx 53 subl $1,%eax 54 movl %eax,112(%esp) 55 movl %ebx,116(%esp) 56 movl %ecx,120(%esp) 57 movl %edx,124(%esp) 58 jmp .L002entry 59.align 16 60.L003outer_loop: 61 movl %ebx,156(%esp) 62 movl %eax,152(%esp) 63 movl %ecx,160(%esp) 64.L002entry: 65 movl $1634760805,%eax 66 movl $857760878,4(%esp) 67 movl $2036477234,8(%esp) 68 movl $1797285236,12(%esp) 69 movl 84(%esp),%ebx 70 movl 88(%esp),%ebp 71 movl 104(%esp),%ecx 72 movl 108(%esp),%esi 73 movl 116(%esp),%edx 74 movl 120(%esp),%edi 75 movl %ebx,20(%esp) 76 movl %ebp,24(%esp) 77 movl %ecx,40(%esp) 78 movl %esi,44(%esp) 79 movl %edx,52(%esp) 80 movl %edi,56(%esp) 81 movl 92(%esp),%ebx 82 movl 124(%esp),%edi 83 movl 112(%esp),%edx 84 movl 80(%esp),%ebp 85 movl 96(%esp),%ecx 86 movl 100(%esp),%esi 87 addl $1,%edx 88 movl %ebx,28(%esp) 89 movl %edi,60(%esp) 90 movl %edx,112(%esp) 91 movl $10,%ebx 92 jmp .L004loop 93.align 16 94.L004loop: 95 addl %ebp,%eax 96 movl %ebx,128(%esp) 97 movl %ebp,%ebx 98 xorl %eax,%edx 99 roll $16,%edx 100 addl %edx,%ecx 101 xorl %ecx,%ebx 102 movl 52(%esp),%edi 103 roll $12,%ebx 104 movl 20(%esp),%ebp 105 addl %ebx,%eax 106 xorl %eax,%edx 107 movl %eax,(%esp) 108 roll $8,%edx 109 movl 4(%esp),%eax 110 addl %edx,%ecx 111 movl %edx,48(%esp) 112 xorl %ecx,%ebx 113 addl %ebp,%eax 114 roll $7,%ebx 115 xorl %eax,%edi 116 movl %ecx,32(%esp) 117 roll $16,%edi 118 movl %ebx,16(%esp) 119 addl %edi,%esi 120 movl 40(%esp),%ecx 121 xorl %esi,%ebp 122 movl 56(%esp),%edx 123 roll $12,%ebp 124 movl 24(%esp),%ebx 125 addl %ebp,%eax 126 xorl %eax,%edi 127 movl %eax,4(%esp) 128 roll $8,%edi 129 movl 8(%esp),%eax 130 addl %edi,%esi 131 movl %edi,52(%esp) 132 xorl %esi,%ebp 133 addl %ebx,%eax 134 roll $7,%ebp 135 xorl %eax,%edx 136 movl %esi,36(%esp) 137 roll $16,%edx 138 movl %ebp,20(%esp) 139 addl %edx,%ecx 140 movl 44(%esp),%esi 141 xorl %ecx,%ebx 142 movl 60(%esp),%edi 143 roll $12,%ebx 144 movl 28(%esp),%ebp 145 addl %ebx,%eax 146 xorl %eax,%edx 147 movl %eax,8(%esp) 148 roll $8,%edx 149 movl 12(%esp),%eax 150 addl %edx,%ecx 151 movl %edx,56(%esp) 152 xorl %ecx,%ebx 153 addl %ebp,%eax 154 roll $7,%ebx 155 xorl %eax,%edi 156 roll $16,%edi 157 movl %ebx,24(%esp) 158 addl %edi,%esi 159 xorl %esi,%ebp 160 roll $12,%ebp 161 movl 20(%esp),%ebx 162 addl %ebp,%eax 163 xorl %eax,%edi 164 movl %eax,12(%esp) 165 roll $8,%edi 166 movl (%esp),%eax 167 addl %edi,%esi 168 movl %edi,%edx 169 xorl %esi,%ebp 170 addl %ebx,%eax 171 roll $7,%ebp 172 xorl %eax,%edx 173 roll $16,%edx 174 movl %ebp,28(%esp) 175 addl %edx,%ecx 176 xorl %ecx,%ebx 177 movl 48(%esp),%edi 178 roll $12,%ebx 179 movl 24(%esp),%ebp 180 addl %ebx,%eax 181 xorl %eax,%edx 182 movl %eax,(%esp) 183 roll $8,%edx 184 movl 4(%esp),%eax 185 addl %edx,%ecx 186 movl %edx,60(%esp) 187 xorl %ecx,%ebx 188 addl %ebp,%eax 189 roll $7,%ebx 190 xorl %eax,%edi 191 movl %ecx,40(%esp) 192 roll $16,%edi 193 movl %ebx,20(%esp) 194 addl %edi,%esi 195 movl 32(%esp),%ecx 196 xorl %esi,%ebp 197 movl 52(%esp),%edx 198 roll $12,%ebp 199 movl 28(%esp),%ebx 200 addl %ebp,%eax 201 xorl %eax,%edi 202 movl %eax,4(%esp) 203 roll $8,%edi 204 movl 8(%esp),%eax 205 addl %edi,%esi 206 movl %edi,48(%esp) 207 xorl %esi,%ebp 208 addl %ebx,%eax 209 roll $7,%ebp 210 xorl %eax,%edx 211 movl %esi,44(%esp) 212 roll $16,%edx 213 movl %ebp,24(%esp) 214 addl %edx,%ecx 215 movl 36(%esp),%esi 216 xorl %ecx,%ebx 217 movl 56(%esp),%edi 218 roll $12,%ebx 219 movl 16(%esp),%ebp 220 addl %ebx,%eax 221 xorl %eax,%edx 222 movl %eax,8(%esp) 223 roll $8,%edx 224 movl 12(%esp),%eax 225 addl %edx,%ecx 226 movl %edx,52(%esp) 227 xorl %ecx,%ebx 228 addl %ebp,%eax 229 roll $7,%ebx 230 xorl %eax,%edi 231 roll $16,%edi 232 movl %ebx,28(%esp) 233 addl %edi,%esi 234 xorl %esi,%ebp 235 movl 48(%esp),%edx 236 roll $12,%ebp 237 movl 128(%esp),%ebx 238 addl %ebp,%eax 239 xorl %eax,%edi 240 movl %eax,12(%esp) 241 roll $8,%edi 242 movl (%esp),%eax 243 addl %edi,%esi 244 movl %edi,56(%esp) 245 xorl %esi,%ebp 246 roll $7,%ebp 247 decl %ebx 248 jnz .L004loop 249 movl 160(%esp),%ebx 250 addl $1634760805,%eax 251 addl 80(%esp),%ebp 252 addl 96(%esp),%ecx 253 addl 100(%esp),%esi 254 cmpl $64,%ebx 255 jb .L005tail 256 movl 156(%esp),%ebx 257 addl 112(%esp),%edx 258 addl 120(%esp),%edi 259 xorl (%ebx),%eax 260 xorl 16(%ebx),%ebp 261 movl %eax,(%esp) 262 movl 152(%esp),%eax 263 xorl 32(%ebx),%ecx 264 xorl 36(%ebx),%esi 265 xorl 48(%ebx),%edx 266 xorl 56(%ebx),%edi 267 movl %ebp,16(%eax) 268 movl %ecx,32(%eax) 269 movl %esi,36(%eax) 270 movl %edx,48(%eax) 271 movl %edi,56(%eax) 272 movl 4(%esp),%ebp 273 movl 8(%esp),%ecx 274 movl 12(%esp),%esi 275 movl 20(%esp),%edx 276 movl 24(%esp),%edi 277 addl $857760878,%ebp 278 addl $2036477234,%ecx 279 addl $1797285236,%esi 280 addl 84(%esp),%edx 281 addl 88(%esp),%edi 282 xorl 4(%ebx),%ebp 283 xorl 8(%ebx),%ecx 284 xorl 12(%ebx),%esi 285 xorl 20(%ebx),%edx 286 xorl 24(%ebx),%edi 287 movl %ebp,4(%eax) 288 movl %ecx,8(%eax) 289 movl %esi,12(%eax) 290 movl %edx,20(%eax) 291 movl %edi,24(%eax) 292 movl 28(%esp),%ebp 293 movl 40(%esp),%ecx 294 movl 44(%esp),%esi 295 movl 52(%esp),%edx 296 movl 60(%esp),%edi 297 addl 92(%esp),%ebp 298 addl 104(%esp),%ecx 299 addl 108(%esp),%esi 300 addl 116(%esp),%edx 301 addl 124(%esp),%edi 302 xorl 28(%ebx),%ebp 303 xorl 40(%ebx),%ecx 304 xorl 44(%ebx),%esi 305 xorl 52(%ebx),%edx 306 xorl 60(%ebx),%edi 307 leal 64(%ebx),%ebx 308 movl %ebp,28(%eax) 309 movl (%esp),%ebp 310 movl %ecx,40(%eax) 311 movl 160(%esp),%ecx 312 movl %esi,44(%eax) 313 movl %edx,52(%eax) 314 movl %edi,60(%eax) 315 movl %ebp,(%eax) 316 leal 64(%eax),%eax 317 subl $64,%ecx 318 jnz .L003outer_loop 319 jmp .L006done 320.L005tail: 321 addl 112(%esp),%edx 322 addl 120(%esp),%edi 323 movl %eax,(%esp) 324 movl %ebp,16(%esp) 325 movl %ecx,32(%esp) 326 movl %esi,36(%esp) 327 movl %edx,48(%esp) 328 movl %edi,56(%esp) 329 movl 4(%esp),%ebp 330 movl 8(%esp),%ecx 331 movl 12(%esp),%esi 332 movl 20(%esp),%edx 333 movl 24(%esp),%edi 334 addl $857760878,%ebp 335 addl $2036477234,%ecx 336 addl $1797285236,%esi 337 addl 84(%esp),%edx 338 addl 88(%esp),%edi 339 movl %ebp,4(%esp) 340 movl %ecx,8(%esp) 341 movl %esi,12(%esp) 342 movl %edx,20(%esp) 343 movl %edi,24(%esp) 344 movl 28(%esp),%ebp 345 movl 40(%esp),%ecx 346 movl 44(%esp),%esi 347 movl 52(%esp),%edx 348 movl 60(%esp),%edi 349 addl 92(%esp),%ebp 350 addl 104(%esp),%ecx 351 addl 108(%esp),%esi 352 addl 116(%esp),%edx 353 addl 124(%esp),%edi 354 movl %ebp,28(%esp) 355 movl 156(%esp),%ebp 356 movl %ecx,40(%esp) 357 movl 152(%esp),%ecx 358 movl %esi,44(%esp) 359 xorl %esi,%esi 360 movl %edx,52(%esp) 361 movl %edi,60(%esp) 362 xorl %eax,%eax 363 xorl %edx,%edx 364.L007tail_loop: 365 movb (%esi,%ebp,1),%al 366 movb (%esp,%esi,1),%dl 367 leal 1(%esi),%esi 368 xorb %dl,%al 369 movb %al,-1(%ecx,%esi,1) 370 decl %ebx 371 jnz .L007tail_loop 372.L006done: 373 addl $132,%esp 374.L000no_data: 375 popl %edi 376 popl %esi 377 popl %ebx 378 popl %ebp 379 ret 380.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 381.hidden _ChaCha20_ssse3 382.type _ChaCha20_ssse3,@function 383.align 16 384_ChaCha20_ssse3: 385 pushl %ebp 386 pushl %ebx 387 pushl %esi 388 pushl %edi 389.Lssse3_shortcut: 390 movl 20(%esp),%edi 391 movl 24(%esp),%esi 392 movl 28(%esp),%ecx 393 movl 32(%esp),%edx 394 movl 36(%esp),%ebx 395 movl %esp,%ebp 396 subl $524,%esp 397 andl $-64,%esp 398 movl %ebp,512(%esp) 399 leal .Lssse3_data-.Lpic_point(%eax),%eax 400 movdqu (%ebx),%xmm3 401 cmpl $256,%ecx 402 jb .L0081x 403 movl %edx,516(%esp) 404 movl %ebx,520(%esp) 405 subl $256,%ecx 406 leal 384(%esp),%ebp 407 movdqu (%edx),%xmm7 408 pshufd $0,%xmm3,%xmm0 409 pshufd $85,%xmm3,%xmm1 410 pshufd $170,%xmm3,%xmm2 411 pshufd $255,%xmm3,%xmm3 412 paddd 48(%eax),%xmm0 413 pshufd $0,%xmm7,%xmm4 414 pshufd $85,%xmm7,%xmm5 415 psubd 64(%eax),%xmm0 416 pshufd $170,%xmm7,%xmm6 417 pshufd $255,%xmm7,%xmm7 418 movdqa %xmm0,64(%ebp) 419 movdqa %xmm1,80(%ebp) 420 movdqa %xmm2,96(%ebp) 421 movdqa %xmm3,112(%ebp) 422 movdqu 16(%edx),%xmm3 423 movdqa %xmm4,-64(%ebp) 424 movdqa %xmm5,-48(%ebp) 425 movdqa %xmm6,-32(%ebp) 426 movdqa %xmm7,-16(%ebp) 427 movdqa 32(%eax),%xmm7 428 leal 128(%esp),%ebx 429 pshufd $0,%xmm3,%xmm0 430 pshufd $85,%xmm3,%xmm1 431 pshufd $170,%xmm3,%xmm2 432 pshufd $255,%xmm3,%xmm3 433 pshufd $0,%xmm7,%xmm4 434 pshufd $85,%xmm7,%xmm5 435 pshufd $170,%xmm7,%xmm6 436 pshufd $255,%xmm7,%xmm7 437 movdqa %xmm0,(%ebp) 438 movdqa %xmm1,16(%ebp) 439 movdqa %xmm2,32(%ebp) 440 movdqa %xmm3,48(%ebp) 441 movdqa %xmm4,-128(%ebp) 442 movdqa %xmm5,-112(%ebp) 443 movdqa %xmm6,-96(%ebp) 444 movdqa %xmm7,-80(%ebp) 445 leal 128(%esi),%esi 446 leal 128(%edi),%edi 447 jmp .L009outer_loop 448.align 16 449.L009outer_loop: 450 movdqa -112(%ebp),%xmm1 451 movdqa -96(%ebp),%xmm2 452 movdqa -80(%ebp),%xmm3 453 movdqa -48(%ebp),%xmm5 454 movdqa -32(%ebp),%xmm6 455 movdqa -16(%ebp),%xmm7 456 movdqa %xmm1,-112(%ebx) 457 movdqa %xmm2,-96(%ebx) 458 movdqa %xmm3,-80(%ebx) 459 movdqa %xmm5,-48(%ebx) 460 movdqa %xmm6,-32(%ebx) 461 movdqa %xmm7,-16(%ebx) 462 movdqa 32(%ebp),%xmm2 463 movdqa 48(%ebp),%xmm3 464 movdqa 64(%ebp),%xmm4 465 movdqa 80(%ebp),%xmm5 466 movdqa 96(%ebp),%xmm6 467 movdqa 112(%ebp),%xmm7 468 paddd 64(%eax),%xmm4 469 movdqa %xmm2,32(%ebx) 470 movdqa %xmm3,48(%ebx) 471 movdqa %xmm4,64(%ebx) 472 movdqa %xmm5,80(%ebx) 473 movdqa %xmm6,96(%ebx) 474 movdqa %xmm7,112(%ebx) 475 movdqa %xmm4,64(%ebp) 476 movdqa -128(%ebp),%xmm0 477 movdqa %xmm4,%xmm6 478 movdqa -64(%ebp),%xmm3 479 movdqa (%ebp),%xmm4 480 movdqa 16(%ebp),%xmm5 481 movl $10,%edx 482 nop 483.align 16 484.L010loop: 485 paddd %xmm3,%xmm0 486 movdqa %xmm3,%xmm2 487 pxor %xmm0,%xmm6 488 pshufb (%eax),%xmm6 489 paddd %xmm6,%xmm4 490 pxor %xmm4,%xmm2 491 movdqa -48(%ebx),%xmm3 492 movdqa %xmm2,%xmm1 493 pslld $12,%xmm2 494 psrld $20,%xmm1 495 por %xmm1,%xmm2 496 movdqa -112(%ebx),%xmm1 497 paddd %xmm2,%xmm0 498 movdqa 80(%ebx),%xmm7 499 pxor %xmm0,%xmm6 500 movdqa %xmm0,-128(%ebx) 501 pshufb 16(%eax),%xmm6 502 paddd %xmm6,%xmm4 503 movdqa %xmm6,64(%ebx) 504 pxor %xmm4,%xmm2 505 paddd %xmm3,%xmm1 506 movdqa %xmm2,%xmm0 507 pslld $7,%xmm2 508 psrld $25,%xmm0 509 pxor %xmm1,%xmm7 510 por %xmm0,%xmm2 511 movdqa %xmm4,(%ebx) 512 pshufb (%eax),%xmm7 513 movdqa %xmm2,-64(%ebx) 514 paddd %xmm7,%xmm5 515 movdqa 32(%ebx),%xmm4 516 pxor %xmm5,%xmm3 517 movdqa -32(%ebx),%xmm2 518 movdqa %xmm3,%xmm0 519 pslld $12,%xmm3 520 psrld $20,%xmm0 521 por %xmm0,%xmm3 522 movdqa -96(%ebx),%xmm0 523 paddd %xmm3,%xmm1 524 movdqa 96(%ebx),%xmm6 525 pxor %xmm1,%xmm7 526 movdqa %xmm1,-112(%ebx) 527 pshufb 16(%eax),%xmm7 528 paddd %xmm7,%xmm5 529 movdqa %xmm7,80(%ebx) 530 pxor %xmm5,%xmm3 531 paddd %xmm2,%xmm0 532 movdqa %xmm3,%xmm1 533 pslld $7,%xmm3 534 psrld $25,%xmm1 535 pxor %xmm0,%xmm6 536 por %xmm1,%xmm3 537 movdqa %xmm5,16(%ebx) 538 pshufb (%eax),%xmm6 539 movdqa %xmm3,-48(%ebx) 540 paddd %xmm6,%xmm4 541 movdqa 48(%ebx),%xmm5 542 pxor %xmm4,%xmm2 543 movdqa -16(%ebx),%xmm3 544 movdqa %xmm2,%xmm1 545 pslld $12,%xmm2 546 psrld $20,%xmm1 547 por %xmm1,%xmm2 548 movdqa -80(%ebx),%xmm1 549 paddd %xmm2,%xmm0 550 movdqa 112(%ebx),%xmm7 551 pxor %xmm0,%xmm6 552 movdqa %xmm0,-96(%ebx) 553 pshufb 16(%eax),%xmm6 554 paddd %xmm6,%xmm4 555 movdqa %xmm6,96(%ebx) 556 pxor %xmm4,%xmm2 557 paddd %xmm3,%xmm1 558 movdqa %xmm2,%xmm0 559 pslld $7,%xmm2 560 psrld $25,%xmm0 561 pxor %xmm1,%xmm7 562 por %xmm0,%xmm2 563 pshufb (%eax),%xmm7 564 movdqa %xmm2,-32(%ebx) 565 paddd %xmm7,%xmm5 566 pxor %xmm5,%xmm3 567 movdqa -48(%ebx),%xmm2 568 movdqa %xmm3,%xmm0 569 pslld $12,%xmm3 570 psrld $20,%xmm0 571 por %xmm0,%xmm3 572 movdqa -128(%ebx),%xmm0 573 paddd %xmm3,%xmm1 574 pxor %xmm1,%xmm7 575 movdqa %xmm1,-80(%ebx) 576 pshufb 16(%eax),%xmm7 577 paddd %xmm7,%xmm5 578 movdqa %xmm7,%xmm6 579 pxor %xmm5,%xmm3 580 paddd %xmm2,%xmm0 581 movdqa %xmm3,%xmm1 582 pslld $7,%xmm3 583 psrld $25,%xmm1 584 pxor %xmm0,%xmm6 585 por %xmm1,%xmm3 586 pshufb (%eax),%xmm6 587 movdqa %xmm3,-16(%ebx) 588 paddd %xmm6,%xmm4 589 pxor %xmm4,%xmm2 590 movdqa -32(%ebx),%xmm3 591 movdqa %xmm2,%xmm1 592 pslld $12,%xmm2 593 psrld $20,%xmm1 594 por %xmm1,%xmm2 595 movdqa -112(%ebx),%xmm1 596 paddd %xmm2,%xmm0 597 movdqa 64(%ebx),%xmm7 598 pxor %xmm0,%xmm6 599 movdqa %xmm0,-128(%ebx) 600 pshufb 16(%eax),%xmm6 601 paddd %xmm6,%xmm4 602 movdqa %xmm6,112(%ebx) 603 pxor %xmm4,%xmm2 604 paddd %xmm3,%xmm1 605 movdqa %xmm2,%xmm0 606 pslld $7,%xmm2 607 psrld $25,%xmm0 608 pxor %xmm1,%xmm7 609 por %xmm0,%xmm2 610 movdqa %xmm4,32(%ebx) 611 pshufb (%eax),%xmm7 612 movdqa %xmm2,-48(%ebx) 613 paddd %xmm7,%xmm5 614 movdqa (%ebx),%xmm4 615 pxor %xmm5,%xmm3 616 movdqa -16(%ebx),%xmm2 617 movdqa %xmm3,%xmm0 618 pslld $12,%xmm3 619 psrld $20,%xmm0 620 por %xmm0,%xmm3 621 movdqa -96(%ebx),%xmm0 622 paddd %xmm3,%xmm1 623 movdqa 80(%ebx),%xmm6 624 pxor %xmm1,%xmm7 625 movdqa %xmm1,-112(%ebx) 626 pshufb 16(%eax),%xmm7 627 paddd %xmm7,%xmm5 628 movdqa %xmm7,64(%ebx) 629 pxor %xmm5,%xmm3 630 paddd %xmm2,%xmm0 631 movdqa %xmm3,%xmm1 632 pslld $7,%xmm3 633 psrld $25,%xmm1 634 pxor %xmm0,%xmm6 635 por %xmm1,%xmm3 636 movdqa %xmm5,48(%ebx) 637 pshufb (%eax),%xmm6 638 movdqa %xmm3,-32(%ebx) 639 paddd %xmm6,%xmm4 640 movdqa 16(%ebx),%xmm5 641 pxor %xmm4,%xmm2 642 movdqa -64(%ebx),%xmm3 643 movdqa %xmm2,%xmm1 644 pslld $12,%xmm2 645 psrld $20,%xmm1 646 por %xmm1,%xmm2 647 movdqa -80(%ebx),%xmm1 648 paddd %xmm2,%xmm0 649 movdqa 96(%ebx),%xmm7 650 pxor %xmm0,%xmm6 651 movdqa %xmm0,-96(%ebx) 652 pshufb 16(%eax),%xmm6 653 paddd %xmm6,%xmm4 654 movdqa %xmm6,80(%ebx) 655 pxor %xmm4,%xmm2 656 paddd %xmm3,%xmm1 657 movdqa %xmm2,%xmm0 658 pslld $7,%xmm2 659 psrld $25,%xmm0 660 pxor %xmm1,%xmm7 661 por %xmm0,%xmm2 662 pshufb (%eax),%xmm7 663 movdqa %xmm2,-16(%ebx) 664 paddd %xmm7,%xmm5 665 pxor %xmm5,%xmm3 666 movdqa %xmm3,%xmm0 667 pslld $12,%xmm3 668 psrld $20,%xmm0 669 por %xmm0,%xmm3 670 movdqa -128(%ebx),%xmm0 671 paddd %xmm3,%xmm1 672 movdqa 64(%ebx),%xmm6 673 pxor %xmm1,%xmm7 674 movdqa %xmm1,-80(%ebx) 675 pshufb 16(%eax),%xmm7 676 paddd %xmm7,%xmm5 677 movdqa %xmm7,96(%ebx) 678 pxor %xmm5,%xmm3 679 movdqa %xmm3,%xmm1 680 pslld $7,%xmm3 681 psrld $25,%xmm1 682 por %xmm1,%xmm3 683 decl %edx 684 jnz .L010loop 685 movdqa %xmm3,-64(%ebx) 686 movdqa %xmm4,(%ebx) 687 movdqa %xmm5,16(%ebx) 688 movdqa %xmm6,64(%ebx) 689 movdqa %xmm7,96(%ebx) 690 movdqa -112(%ebx),%xmm1 691 movdqa -96(%ebx),%xmm2 692 movdqa -80(%ebx),%xmm3 693 paddd -128(%ebp),%xmm0 694 paddd -112(%ebp),%xmm1 695 paddd -96(%ebp),%xmm2 696 paddd -80(%ebp),%xmm3 697 movdqa %xmm0,%xmm6 698 punpckldq %xmm1,%xmm0 699 movdqa %xmm2,%xmm7 700 punpckldq %xmm3,%xmm2 701 punpckhdq %xmm1,%xmm6 702 punpckhdq %xmm3,%xmm7 703 movdqa %xmm0,%xmm1 704 punpcklqdq %xmm2,%xmm0 705 movdqa %xmm6,%xmm3 706 punpcklqdq %xmm7,%xmm6 707 punpckhqdq %xmm2,%xmm1 708 punpckhqdq %xmm7,%xmm3 709 movdqu -128(%esi),%xmm4 710 movdqu -64(%esi),%xmm5 711 movdqu (%esi),%xmm2 712 movdqu 64(%esi),%xmm7 713 leal 16(%esi),%esi 714 pxor %xmm0,%xmm4 715 movdqa -64(%ebx),%xmm0 716 pxor %xmm1,%xmm5 717 movdqa -48(%ebx),%xmm1 718 pxor %xmm2,%xmm6 719 movdqa -32(%ebx),%xmm2 720 pxor %xmm3,%xmm7 721 movdqa -16(%ebx),%xmm3 722 movdqu %xmm4,-128(%edi) 723 movdqu %xmm5,-64(%edi) 724 movdqu %xmm6,(%edi) 725 movdqu %xmm7,64(%edi) 726 leal 16(%edi),%edi 727 paddd -64(%ebp),%xmm0 728 paddd -48(%ebp),%xmm1 729 paddd -32(%ebp),%xmm2 730 paddd -16(%ebp),%xmm3 731 movdqa %xmm0,%xmm6 732 punpckldq %xmm1,%xmm0 733 movdqa %xmm2,%xmm7 734 punpckldq %xmm3,%xmm2 735 punpckhdq %xmm1,%xmm6 736 punpckhdq %xmm3,%xmm7 737 movdqa %xmm0,%xmm1 738 punpcklqdq %xmm2,%xmm0 739 movdqa %xmm6,%xmm3 740 punpcklqdq %xmm7,%xmm6 741 punpckhqdq %xmm2,%xmm1 742 punpckhqdq %xmm7,%xmm3 743 movdqu -128(%esi),%xmm4 744 movdqu -64(%esi),%xmm5 745 movdqu (%esi),%xmm2 746 movdqu 64(%esi),%xmm7 747 leal 16(%esi),%esi 748 pxor %xmm0,%xmm4 749 movdqa (%ebx),%xmm0 750 pxor %xmm1,%xmm5 751 movdqa 16(%ebx),%xmm1 752 pxor %xmm2,%xmm6 753 movdqa 32(%ebx),%xmm2 754 pxor %xmm3,%xmm7 755 movdqa 48(%ebx),%xmm3 756 movdqu %xmm4,-128(%edi) 757 movdqu %xmm5,-64(%edi) 758 movdqu %xmm6,(%edi) 759 movdqu %xmm7,64(%edi) 760 leal 16(%edi),%edi 761 paddd (%ebp),%xmm0 762 paddd 16(%ebp),%xmm1 763 paddd 32(%ebp),%xmm2 764 paddd 48(%ebp),%xmm3 765 movdqa %xmm0,%xmm6 766 punpckldq %xmm1,%xmm0 767 movdqa %xmm2,%xmm7 768 punpckldq %xmm3,%xmm2 769 punpckhdq %xmm1,%xmm6 770 punpckhdq %xmm3,%xmm7 771 movdqa %xmm0,%xmm1 772 punpcklqdq %xmm2,%xmm0 773 movdqa %xmm6,%xmm3 774 punpcklqdq %xmm7,%xmm6 775 punpckhqdq %xmm2,%xmm1 776 punpckhqdq %xmm7,%xmm3 777 movdqu -128(%esi),%xmm4 778 movdqu -64(%esi),%xmm5 779 movdqu (%esi),%xmm2 780 movdqu 64(%esi),%xmm7 781 leal 16(%esi),%esi 782 pxor %xmm0,%xmm4 783 movdqa 64(%ebx),%xmm0 784 pxor %xmm1,%xmm5 785 movdqa 80(%ebx),%xmm1 786 pxor %xmm2,%xmm6 787 movdqa 96(%ebx),%xmm2 788 pxor %xmm3,%xmm7 789 movdqa 112(%ebx),%xmm3 790 movdqu %xmm4,-128(%edi) 791 movdqu %xmm5,-64(%edi) 792 movdqu %xmm6,(%edi) 793 movdqu %xmm7,64(%edi) 794 leal 16(%edi),%edi 795 paddd 64(%ebp),%xmm0 796 paddd 80(%ebp),%xmm1 797 paddd 96(%ebp),%xmm2 798 paddd 112(%ebp),%xmm3 799 movdqa %xmm0,%xmm6 800 punpckldq %xmm1,%xmm0 801 movdqa %xmm2,%xmm7 802 punpckldq %xmm3,%xmm2 803 punpckhdq %xmm1,%xmm6 804 punpckhdq %xmm3,%xmm7 805 movdqa %xmm0,%xmm1 806 punpcklqdq %xmm2,%xmm0 807 movdqa %xmm6,%xmm3 808 punpcklqdq %xmm7,%xmm6 809 punpckhqdq %xmm2,%xmm1 810 punpckhqdq %xmm7,%xmm3 811 movdqu -128(%esi),%xmm4 812 movdqu -64(%esi),%xmm5 813 movdqu (%esi),%xmm2 814 movdqu 64(%esi),%xmm7 815 leal 208(%esi),%esi 816 pxor %xmm0,%xmm4 817 pxor %xmm1,%xmm5 818 pxor %xmm2,%xmm6 819 pxor %xmm3,%xmm7 820 movdqu %xmm4,-128(%edi) 821 movdqu %xmm5,-64(%edi) 822 movdqu %xmm6,(%edi) 823 movdqu %xmm7,64(%edi) 824 leal 208(%edi),%edi 825 subl $256,%ecx 826 jnc .L009outer_loop 827 addl $256,%ecx 828 jz .L011done 829 movl 520(%esp),%ebx 830 leal -128(%esi),%esi 831 movl 516(%esp),%edx 832 leal -128(%edi),%edi 833 movd 64(%ebp),%xmm2 834 movdqu (%ebx),%xmm3 835 paddd 96(%eax),%xmm2 836 pand 112(%eax),%xmm3 837 por %xmm2,%xmm3 838.L0081x: 839 movdqa 32(%eax),%xmm0 840 movdqu (%edx),%xmm1 841 movdqu 16(%edx),%xmm2 842 movdqa (%eax),%xmm6 843 movdqa 16(%eax),%xmm7 844 movl %ebp,48(%esp) 845 movdqa %xmm0,(%esp) 846 movdqa %xmm1,16(%esp) 847 movdqa %xmm2,32(%esp) 848 movdqa %xmm3,48(%esp) 849 movl $10,%edx 850 jmp .L012loop1x 851.align 16 852.L013outer1x: 853 movdqa 80(%eax),%xmm3 854 movdqa (%esp),%xmm0 855 movdqa 16(%esp),%xmm1 856 movdqa 32(%esp),%xmm2 857 paddd 48(%esp),%xmm3 858 movl $10,%edx 859 movdqa %xmm3,48(%esp) 860 jmp .L012loop1x 861.align 16 862.L012loop1x: 863 paddd %xmm1,%xmm0 864 pxor %xmm0,%xmm3 865.byte 102,15,56,0,222 866 paddd %xmm3,%xmm2 867 pxor %xmm2,%xmm1 868 movdqa %xmm1,%xmm4 869 psrld $20,%xmm1 870 pslld $12,%xmm4 871 por %xmm4,%xmm1 872 paddd %xmm1,%xmm0 873 pxor %xmm0,%xmm3 874.byte 102,15,56,0,223 875 paddd %xmm3,%xmm2 876 pxor %xmm2,%xmm1 877 movdqa %xmm1,%xmm4 878 psrld $25,%xmm1 879 pslld $7,%xmm4 880 por %xmm4,%xmm1 881 pshufd $78,%xmm2,%xmm2 882 pshufd $57,%xmm1,%xmm1 883 pshufd $147,%xmm3,%xmm3 884 nop 885 paddd %xmm1,%xmm0 886 pxor %xmm0,%xmm3 887.byte 102,15,56,0,222 888 paddd %xmm3,%xmm2 889 pxor %xmm2,%xmm1 890 movdqa %xmm1,%xmm4 891 psrld $20,%xmm1 892 pslld $12,%xmm4 893 por %xmm4,%xmm1 894 paddd %xmm1,%xmm0 895 pxor %xmm0,%xmm3 896.byte 102,15,56,0,223 897 paddd %xmm3,%xmm2 898 pxor %xmm2,%xmm1 899 movdqa %xmm1,%xmm4 900 psrld $25,%xmm1 901 pslld $7,%xmm4 902 por %xmm4,%xmm1 903 pshufd $78,%xmm2,%xmm2 904 pshufd $147,%xmm1,%xmm1 905 pshufd $57,%xmm3,%xmm3 906 decl %edx 907 jnz .L012loop1x 908 paddd (%esp),%xmm0 909 paddd 16(%esp),%xmm1 910 paddd 32(%esp),%xmm2 911 paddd 48(%esp),%xmm3 912 cmpl $64,%ecx 913 jb .L014tail 914 movdqu (%esi),%xmm4 915 movdqu 16(%esi),%xmm5 916 pxor %xmm4,%xmm0 917 movdqu 32(%esi),%xmm4 918 pxor %xmm5,%xmm1 919 movdqu 48(%esi),%xmm5 920 pxor %xmm4,%xmm2 921 pxor %xmm5,%xmm3 922 leal 64(%esi),%esi 923 movdqu %xmm0,(%edi) 924 movdqu %xmm1,16(%edi) 925 movdqu %xmm2,32(%edi) 926 movdqu %xmm3,48(%edi) 927 leal 64(%edi),%edi 928 subl $64,%ecx 929 jnz .L013outer1x 930 jmp .L011done 931.L014tail: 932 movdqa %xmm0,(%esp) 933 movdqa %xmm1,16(%esp) 934 movdqa %xmm2,32(%esp) 935 movdqa %xmm3,48(%esp) 936 xorl %eax,%eax 937 xorl %edx,%edx 938 xorl %ebp,%ebp 939.L015tail_loop: 940 movb (%esp,%ebp,1),%al 941 movb (%esi,%ebp,1),%dl 942 leal 1(%ebp),%ebp 943 xorb %dl,%al 944 movb %al,-1(%edi,%ebp,1) 945 decl %ecx 946 jnz .L015tail_loop 947.L011done: 948 movl 512(%esp),%esp 949 popl %edi 950 popl %esi 951 popl %ebx 952 popl %ebp 953 ret 954.size _ChaCha20_ssse3,.-_ChaCha20_ssse3 955.align 64 956.Lssse3_data: 957.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 958.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 959.long 1634760805,857760878,2036477234,1797285236 960.long 0,1,2,3 961.long 4,4,4,4 962.long 1,0,0,0 963.long 4,0,0,0 964.long 0,-1,-1,-1 965.align 64 966.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 967.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 968.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 969.byte 114,103,62,0 970#endif 971.section .note.GNU-stack,"",@progbits 972