1# salsa20_pm.s version 20051229 2# D. J. Bernstein 3# Public domain. 4 5#include <linux/linkage.h> 6 7.text 8 9# enter salsa20_encrypt_bytes 10ENTRY(salsa20_encrypt_bytes) 11 mov %esp,%eax 12 and $31,%eax 13 add $256,%eax 14 sub %eax,%esp 15 # eax_stack = eax 16 movl %eax,80(%esp) 17 # ebx_stack = ebx 18 movl %ebx,84(%esp) 19 # esi_stack = esi 20 movl %esi,88(%esp) 21 # edi_stack = edi 22 movl %edi,92(%esp) 23 # ebp_stack = ebp 24 movl %ebp,96(%esp) 25 # x = arg1 26 movl 4(%esp,%eax),%edx 27 # m = arg2 28 movl 8(%esp,%eax),%esi 29 # out = arg3 30 movl 12(%esp,%eax),%edi 31 # bytes = arg4 32 movl 16(%esp,%eax),%ebx 33 # bytes -= 0 34 sub $0,%ebx 35 # goto done if unsigned<= 36 jbe ._done 37._start: 38 # in0 = *(uint32 *) (x + 0) 39 movl 0(%edx),%eax 40 # in1 = *(uint32 *) (x + 4) 41 movl 4(%edx),%ecx 42 # in2 = *(uint32 *) (x + 8) 43 movl 8(%edx),%ebp 44 # j0 = in0 45 movl %eax,164(%esp) 46 # in3 = *(uint32 *) (x + 12) 47 movl 12(%edx),%eax 48 # j1 = in1 49 movl %ecx,168(%esp) 50 # in4 = *(uint32 *) (x + 16) 51 movl 16(%edx),%ecx 52 # j2 = in2 53 movl %ebp,172(%esp) 54 # in5 = *(uint32 *) (x + 20) 55 movl 20(%edx),%ebp 56 # j3 = in3 57 movl %eax,176(%esp) 58 # in6 = *(uint32 *) (x + 24) 59 movl 24(%edx),%eax 60 # j4 = in4 61 movl %ecx,180(%esp) 62 # in7 = *(uint32 *) (x + 28) 63 movl 28(%edx),%ecx 64 # j5 = in5 65 movl %ebp,184(%esp) 66 # in8 = *(uint32 *) (x + 32) 67 movl 32(%edx),%ebp 68 # j6 = in6 69 movl %eax,188(%esp) 70 # in9 = *(uint32 *) (x + 36) 71 movl 36(%edx),%eax 72 # j7 = in7 73 movl %ecx,192(%esp) 74 # in10 = *(uint32 *) (x + 40) 75 movl 40(%edx),%ecx 76 # j8 = in8 77 movl %ebp,196(%esp) 78 # in11 = *(uint32 *) (x + 44) 79 movl 44(%edx),%ebp 80 # j9 = in9 81 movl %eax,200(%esp) 82 # in12 = *(uint32 *) (x + 48) 83 movl 48(%edx),%eax 84 # j10 = in10 85 movl %ecx,204(%esp) 86 # in13 = *(uint32 *) (x + 52) 87 movl 52(%edx),%ecx 88 # j11 = in11 89 movl %ebp,208(%esp) 90 # in14 = *(uint32 *) (x + 56) 91 movl 56(%edx),%ebp 92 # j12 = in12 93 movl %eax,212(%esp) 94 # in15 = *(uint32 *) (x + 60) 95 movl 60(%edx),%eax 96 # j13 = in13 97 movl %ecx,216(%esp) 98 # j14 = in14 99 movl %ebp,220(%esp) 100 # j15 = in15 101 movl %eax,224(%esp) 102 # x_backup = x 103 movl %edx,64(%esp) 104._bytesatleast1: 105 # bytes - 64 106 cmp $64,%ebx 107 # goto nocopy if unsigned>= 108 jae ._nocopy 109 # ctarget = out 110 movl %edi,228(%esp) 111 # out = &tmp 112 leal 0(%esp),%edi 113 # i = bytes 114 mov %ebx,%ecx 115 # while (i) { *out++ = *m++; --i } 116 rep movsb 117 # out = &tmp 118 leal 0(%esp),%edi 119 # m = &tmp 120 leal 0(%esp),%esi 121._nocopy: 122 # out_backup = out 123 movl %edi,72(%esp) 124 # m_backup = m 125 movl %esi,68(%esp) 126 # bytes_backup = bytes 127 movl %ebx,76(%esp) 128 # in0 = j0 129 movl 164(%esp),%eax 130 # in1 = j1 131 movl 168(%esp),%ecx 132 # in2 = j2 133 movl 172(%esp),%edx 134 # in3 = j3 135 movl 176(%esp),%ebx 136 # x0 = in0 137 movl %eax,100(%esp) 138 # x1 = in1 139 movl %ecx,104(%esp) 140 # x2 = in2 141 movl %edx,108(%esp) 142 # x3 = in3 143 movl %ebx,112(%esp) 144 # in4 = j4 145 movl 180(%esp),%eax 146 # in5 = j5 147 movl 184(%esp),%ecx 148 # in6 = j6 149 movl 188(%esp),%edx 150 # in7 = j7 151 movl 192(%esp),%ebx 152 # x4 = in4 153 movl %eax,116(%esp) 154 # x5 = in5 155 movl %ecx,120(%esp) 156 # x6 = in6 157 movl %edx,124(%esp) 158 # x7 = in7 159 movl %ebx,128(%esp) 160 # in8 = j8 161 movl 196(%esp),%eax 162 # in9 = j9 163 movl 200(%esp),%ecx 164 # in10 = j10 165 movl 204(%esp),%edx 166 # in11 = j11 167 movl 208(%esp),%ebx 168 # x8 = in8 169 movl %eax,132(%esp) 170 # x9 = in9 171 movl %ecx,136(%esp) 172 # x10 = in10 173 movl %edx,140(%esp) 174 # x11 = in11 175 movl %ebx,144(%esp) 176 # in12 = j12 177 movl 212(%esp),%eax 178 # in13 = j13 179 movl 216(%esp),%ecx 180 # in14 = j14 181 movl 220(%esp),%edx 182 # in15 = j15 183 movl 224(%esp),%ebx 184 # x12 = in12 185 movl %eax,148(%esp) 186 # x13 = in13 187 movl %ecx,152(%esp) 188 # x14 = in14 189 movl %edx,156(%esp) 190 # x15 = in15 191 movl %ebx,160(%esp) 192 # i = 20 193 mov $20,%ebp 194 # p = x0 195 movl 100(%esp),%eax 196 # s = x5 197 movl 120(%esp),%ecx 198 # t = x10 199 movl 140(%esp),%edx 200 # w = x15 201 movl 160(%esp),%ebx 202._mainloop: 203 # x0 = p 204 movl %eax,100(%esp) 205 # x10 = t 206 movl %edx,140(%esp) 207 # p += x12 208 addl 148(%esp),%eax 209 # x5 = s 210 movl %ecx,120(%esp) 211 # t += x6 212 addl 124(%esp),%edx 213 # x15 = w 214 movl %ebx,160(%esp) 215 # r = x1 216 movl 104(%esp),%esi 217 # r += s 218 add %ecx,%esi 219 # v = x11 220 movl 144(%esp),%edi 221 # v += w 222 add %ebx,%edi 223 # p <<<= 7 224 rol $7,%eax 225 # p ^= x4 226 xorl 116(%esp),%eax 227 # t <<<= 7 228 rol $7,%edx 229 # t ^= x14 230 xorl 156(%esp),%edx 231 # r <<<= 7 232 rol $7,%esi 233 # r ^= x9 234 xorl 136(%esp),%esi 235 # v <<<= 7 236 rol $7,%edi 237 # v ^= x3 238 xorl 112(%esp),%edi 239 # x4 = p 240 movl %eax,116(%esp) 241 # x14 = t 242 movl %edx,156(%esp) 243 # p += x0 244 addl 100(%esp),%eax 245 # x9 = r 246 movl %esi,136(%esp) 247 # t += x10 248 addl 140(%esp),%edx 249 # x3 = v 250 movl %edi,112(%esp) 251 # p <<<= 9 252 rol $9,%eax 253 # p ^= x8 254 xorl 132(%esp),%eax 255 # t <<<= 9 256 rol $9,%edx 257 # t ^= x2 258 xorl 108(%esp),%edx 259 # s += r 260 add %esi,%ecx 261 # s <<<= 9 262 rol $9,%ecx 263 # s ^= x13 264 xorl 152(%esp),%ecx 265 # w += v 266 add %edi,%ebx 267 # w <<<= 9 268 rol $9,%ebx 269 # w ^= x7 270 xorl 128(%esp),%ebx 271 # x8 = p 272 movl %eax,132(%esp) 273 # x2 = t 274 movl %edx,108(%esp) 275 # p += x4 276 addl 116(%esp),%eax 277 # x13 = s 278 movl %ecx,152(%esp) 279 # t += x14 280 addl 156(%esp),%edx 281 # x7 = w 282 movl %ebx,128(%esp) 283 # p <<<= 13 284 rol $13,%eax 285 # p ^= x12 286 xorl 148(%esp),%eax 287 # t <<<= 13 288 rol $13,%edx 289 # t ^= x6 290 xorl 124(%esp),%edx 291 # r += s 292 add %ecx,%esi 293 # r <<<= 13 294 rol $13,%esi 295 # r ^= x1 296 xorl 104(%esp),%esi 297 # v += w 298 add %ebx,%edi 299 # v <<<= 13 300 rol $13,%edi 301 # v ^= x11 302 xorl 144(%esp),%edi 303 # x12 = p 304 movl %eax,148(%esp) 305 # x6 = t 306 movl %edx,124(%esp) 307 # p += x8 308 addl 132(%esp),%eax 309 # x1 = r 310 movl %esi,104(%esp) 311 # t += x2 312 addl 108(%esp),%edx 313 # x11 = v 314 movl %edi,144(%esp) 315 # p <<<= 18 316 rol $18,%eax 317 # p ^= x0 318 xorl 100(%esp),%eax 319 # t <<<= 18 320 rol $18,%edx 321 # t ^= x10 322 xorl 140(%esp),%edx 323 # s += r 324 add %esi,%ecx 325 # s <<<= 18 326 rol $18,%ecx 327 # s ^= x5 328 xorl 120(%esp),%ecx 329 # w += v 330 add %edi,%ebx 331 # w <<<= 18 332 rol $18,%ebx 333 # w ^= x15 334 xorl 160(%esp),%ebx 335 # x0 = p 336 movl %eax,100(%esp) 337 # x10 = t 338 movl %edx,140(%esp) 339 # p += x3 340 addl 112(%esp),%eax 341 # p <<<= 7 342 rol $7,%eax 343 # x5 = s 344 movl %ecx,120(%esp) 345 # t += x9 346 addl 136(%esp),%edx 347 # x15 = w 348 movl %ebx,160(%esp) 349 # r = x4 350 movl 116(%esp),%esi 351 # r += s 352 add %ecx,%esi 353 # v = x14 354 movl 156(%esp),%edi 355 # v += w 356 add %ebx,%edi 357 # p ^= x1 358 xorl 104(%esp),%eax 359 # t <<<= 7 360 rol $7,%edx 361 # t ^= x11 362 xorl 144(%esp),%edx 363 # r <<<= 7 364 rol $7,%esi 365 # r ^= x6 366 xorl 124(%esp),%esi 367 # v <<<= 7 368 rol $7,%edi 369 # v ^= x12 370 xorl 148(%esp),%edi 371 # x1 = p 372 movl %eax,104(%esp) 373 # x11 = t 374 movl %edx,144(%esp) 375 # p += x0 376 addl 100(%esp),%eax 377 # x6 = r 378 movl %esi,124(%esp) 379 # t += x10 380 addl 140(%esp),%edx 381 # x12 = v 382 movl %edi,148(%esp) 383 # p <<<= 9 384 rol $9,%eax 385 # p ^= x2 386 xorl 108(%esp),%eax 387 # t <<<= 9 388 rol $9,%edx 389 # t ^= x8 390 xorl 132(%esp),%edx 391 # s += r 392 add %esi,%ecx 393 # s <<<= 9 394 rol $9,%ecx 395 # s ^= x7 396 xorl 128(%esp),%ecx 397 # w += v 398 add %edi,%ebx 399 # w <<<= 9 400 rol $9,%ebx 401 # w ^= x13 402 xorl 152(%esp),%ebx 403 # x2 = p 404 movl %eax,108(%esp) 405 # x8 = t 406 movl %edx,132(%esp) 407 # p += x1 408 addl 104(%esp),%eax 409 # x7 = s 410 movl %ecx,128(%esp) 411 # t += x11 412 addl 144(%esp),%edx 413 # x13 = w 414 movl %ebx,152(%esp) 415 # p <<<= 13 416 rol $13,%eax 417 # p ^= x3 418 xorl 112(%esp),%eax 419 # t <<<= 13 420 rol $13,%edx 421 # t ^= x9 422 xorl 136(%esp),%edx 423 # r += s 424 add %ecx,%esi 425 # r <<<= 13 426 rol $13,%esi 427 # r ^= x4 428 xorl 116(%esp),%esi 429 # v += w 430 add %ebx,%edi 431 # v <<<= 13 432 rol $13,%edi 433 # v ^= x14 434 xorl 156(%esp),%edi 435 # x3 = p 436 movl %eax,112(%esp) 437 # x9 = t 438 movl %edx,136(%esp) 439 # p += x2 440 addl 108(%esp),%eax 441 # x4 = r 442 movl %esi,116(%esp) 443 # t += x8 444 addl 132(%esp),%edx 445 # x14 = v 446 movl %edi,156(%esp) 447 # p <<<= 18 448 rol $18,%eax 449 # p ^= x0 450 xorl 100(%esp),%eax 451 # t <<<= 18 452 rol $18,%edx 453 # t ^= x10 454 xorl 140(%esp),%edx 455 # s += r 456 add %esi,%ecx 457 # s <<<= 18 458 rol $18,%ecx 459 # s ^= x5 460 xorl 120(%esp),%ecx 461 # w += v 462 add %edi,%ebx 463 # w <<<= 18 464 rol $18,%ebx 465 # w ^= x15 466 xorl 160(%esp),%ebx 467 # x0 = p 468 movl %eax,100(%esp) 469 # x10 = t 470 movl %edx,140(%esp) 471 # p += x12 472 addl 148(%esp),%eax 473 # x5 = s 474 movl %ecx,120(%esp) 475 # t += x6 476 addl 124(%esp),%edx 477 # x15 = w 478 movl %ebx,160(%esp) 479 # r = x1 480 movl 104(%esp),%esi 481 # r += s 482 add %ecx,%esi 483 # v = x11 484 movl 144(%esp),%edi 485 # v += w 486 add %ebx,%edi 487 # p <<<= 7 488 rol $7,%eax 489 # p ^= x4 490 xorl 116(%esp),%eax 491 # t <<<= 7 492 rol $7,%edx 493 # t ^= x14 494 xorl 156(%esp),%edx 495 # r <<<= 7 496 rol $7,%esi 497 # r ^= x9 498 xorl 136(%esp),%esi 499 # v <<<= 7 500 rol $7,%edi 501 # v ^= x3 502 xorl 112(%esp),%edi 503 # x4 = p 504 movl %eax,116(%esp) 505 # x14 = t 506 movl %edx,156(%esp) 507 # p += x0 508 addl 100(%esp),%eax 509 # x9 = r 510 movl %esi,136(%esp) 511 # t += x10 512 addl 140(%esp),%edx 513 # x3 = v 514 movl %edi,112(%esp) 515 # p <<<= 9 516 rol $9,%eax 517 # p ^= x8 518 xorl 132(%esp),%eax 519 # t <<<= 9 520 rol $9,%edx 521 # t ^= x2 522 xorl 108(%esp),%edx 523 # s += r 524 add %esi,%ecx 525 # s <<<= 9 526 rol $9,%ecx 527 # s ^= x13 528 xorl 152(%esp),%ecx 529 # w += v 530 add %edi,%ebx 531 # w <<<= 9 532 rol $9,%ebx 533 # w ^= x7 534 xorl 128(%esp),%ebx 535 # x8 = p 536 movl %eax,132(%esp) 537 # x2 = t 538 movl %edx,108(%esp) 539 # p += x4 540 addl 116(%esp),%eax 541 # x13 = s 542 movl %ecx,152(%esp) 543 # t += x14 544 addl 156(%esp),%edx 545 # x7 = w 546 movl %ebx,128(%esp) 547 # p <<<= 13 548 rol $13,%eax 549 # p ^= x12 550 xorl 148(%esp),%eax 551 # t <<<= 13 552 rol $13,%edx 553 # t ^= x6 554 xorl 124(%esp),%edx 555 # r += s 556 add %ecx,%esi 557 # r <<<= 13 558 rol $13,%esi 559 # r ^= x1 560 xorl 104(%esp),%esi 561 # v += w 562 add %ebx,%edi 563 # v <<<= 13 564 rol $13,%edi 565 # v ^= x11 566 xorl 144(%esp),%edi 567 # x12 = p 568 movl %eax,148(%esp) 569 # x6 = t 570 movl %edx,124(%esp) 571 # p += x8 572 addl 132(%esp),%eax 573 # x1 = r 574 movl %esi,104(%esp) 575 # t += x2 576 addl 108(%esp),%edx 577 # x11 = v 578 movl %edi,144(%esp) 579 # p <<<= 18 580 rol $18,%eax 581 # p ^= x0 582 xorl 100(%esp),%eax 583 # t <<<= 18 584 rol $18,%edx 585 # t ^= x10 586 xorl 140(%esp),%edx 587 # s += r 588 add %esi,%ecx 589 # s <<<= 18 590 rol $18,%ecx 591 # s ^= x5 592 xorl 120(%esp),%ecx 593 # w += v 594 add %edi,%ebx 595 # w <<<= 18 596 rol $18,%ebx 597 # w ^= x15 598 xorl 160(%esp),%ebx 599 # x0 = p 600 movl %eax,100(%esp) 601 # x10 = t 602 movl %edx,140(%esp) 603 # p += x3 604 addl 112(%esp),%eax 605 # p <<<= 7 606 rol $7,%eax 607 # x5 = s 608 movl %ecx,120(%esp) 609 # t += x9 610 addl 136(%esp),%edx 611 # x15 = w 612 movl %ebx,160(%esp) 613 # r = x4 614 movl 116(%esp),%esi 615 # r += s 616 add %ecx,%esi 617 # v = x14 618 movl 156(%esp),%edi 619 # v += w 620 add %ebx,%edi 621 # p ^= x1 622 xorl 104(%esp),%eax 623 # t <<<= 7 624 rol $7,%edx 625 # t ^= x11 626 xorl 144(%esp),%edx 627 # r <<<= 7 628 rol $7,%esi 629 # r ^= x6 630 xorl 124(%esp),%esi 631 # v <<<= 7 632 rol $7,%edi 633 # v ^= x12 634 xorl 148(%esp),%edi 635 # x1 = p 636 movl %eax,104(%esp) 637 # x11 = t 638 movl %edx,144(%esp) 639 # p += x0 640 addl 100(%esp),%eax 641 # x6 = r 642 movl %esi,124(%esp) 643 # t += x10 644 addl 140(%esp),%edx 645 # x12 = v 646 movl %edi,148(%esp) 647 # p <<<= 9 648 rol $9,%eax 649 # p ^= x2 650 xorl 108(%esp),%eax 651 # t <<<= 9 652 rol $9,%edx 653 # t ^= x8 654 xorl 132(%esp),%edx 655 # s += r 656 add %esi,%ecx 657 # s <<<= 9 658 rol $9,%ecx 659 # s ^= x7 660 xorl 128(%esp),%ecx 661 # w += v 662 add %edi,%ebx 663 # w <<<= 9 664 rol $9,%ebx 665 # w ^= x13 666 xorl 152(%esp),%ebx 667 # x2 = p 668 movl %eax,108(%esp) 669 # x8 = t 670 movl %edx,132(%esp) 671 # p += x1 672 addl 104(%esp),%eax 673 # x7 = s 674 movl %ecx,128(%esp) 675 # t += x11 676 addl 144(%esp),%edx 677 # x13 = w 678 movl %ebx,152(%esp) 679 # p <<<= 13 680 rol $13,%eax 681 # p ^= x3 682 xorl 112(%esp),%eax 683 # t <<<= 13 684 rol $13,%edx 685 # t ^= x9 686 xorl 136(%esp),%edx 687 # r += s 688 add %ecx,%esi 689 # r <<<= 13 690 rol $13,%esi 691 # r ^= x4 692 xorl 116(%esp),%esi 693 # v += w 694 add %ebx,%edi 695 # v <<<= 13 696 rol $13,%edi 697 # v ^= x14 698 xorl 156(%esp),%edi 699 # x3 = p 700 movl %eax,112(%esp) 701 # x9 = t 702 movl %edx,136(%esp) 703 # p += x2 704 addl 108(%esp),%eax 705 # x4 = r 706 movl %esi,116(%esp) 707 # t += x8 708 addl 132(%esp),%edx 709 # x14 = v 710 movl %edi,156(%esp) 711 # p <<<= 18 712 rol $18,%eax 713 # p ^= x0 714 xorl 100(%esp),%eax 715 # t <<<= 18 716 rol $18,%edx 717 # t ^= x10 718 xorl 140(%esp),%edx 719 # s += r 720 add %esi,%ecx 721 # s <<<= 18 722 rol $18,%ecx 723 # s ^= x5 724 xorl 120(%esp),%ecx 725 # w += v 726 add %edi,%ebx 727 # w <<<= 18 728 rol $18,%ebx 729 # w ^= x15 730 xorl 160(%esp),%ebx 731 # i -= 4 732 sub $4,%ebp 733 # goto mainloop if unsigned > 734 ja ._mainloop 735 # x0 = p 736 movl %eax,100(%esp) 737 # x5 = s 738 movl %ecx,120(%esp) 739 # x10 = t 740 movl %edx,140(%esp) 741 # x15 = w 742 movl %ebx,160(%esp) 743 # out = out_backup 744 movl 72(%esp),%edi 745 # m = m_backup 746 movl 68(%esp),%esi 747 # in0 = x0 748 movl 100(%esp),%eax 749 # in1 = x1 750 movl 104(%esp),%ecx 751 # in0 += j0 752 addl 164(%esp),%eax 753 # in1 += j1 754 addl 168(%esp),%ecx 755 # in0 ^= *(uint32 *) (m + 0) 756 xorl 0(%esi),%eax 757 # in1 ^= *(uint32 *) (m + 4) 758 xorl 4(%esi),%ecx 759 # *(uint32 *) (out + 0) = in0 760 movl %eax,0(%edi) 761 # *(uint32 *) (out + 4) = in1 762 movl %ecx,4(%edi) 763 # in2 = x2 764 movl 108(%esp),%eax 765 # in3 = x3 766 movl 112(%esp),%ecx 767 # in2 += j2 768 addl 172(%esp),%eax 769 # in3 += j3 770 addl 176(%esp),%ecx 771 # in2 ^= *(uint32 *) (m + 8) 772 xorl 8(%esi),%eax 773 # in3 ^= *(uint32 *) (m + 12) 774 xorl 12(%esi),%ecx 775 # *(uint32 *) (out + 8) = in2 776 movl %eax,8(%edi) 777 # *(uint32 *) (out + 12) = in3 778 movl %ecx,12(%edi) 779 # in4 = x4 780 movl 116(%esp),%eax 781 # in5 = x5 782 movl 120(%esp),%ecx 783 # in4 += j4 784 addl 180(%esp),%eax 785 # in5 += j5 786 addl 184(%esp),%ecx 787 # in4 ^= *(uint32 *) (m + 16) 788 xorl 16(%esi),%eax 789 # in5 ^= *(uint32 *) (m + 20) 790 xorl 20(%esi),%ecx 791 # *(uint32 *) (out + 16) = in4 792 movl %eax,16(%edi) 793 # *(uint32 *) (out + 20) = in5 794 movl %ecx,20(%edi) 795 # in6 = x6 796 movl 124(%esp),%eax 797 # in7 = x7 798 movl 128(%esp),%ecx 799 # in6 += j6 800 addl 188(%esp),%eax 801 # in7 += j7 802 addl 192(%esp),%ecx 803 # in6 ^= *(uint32 *) (m + 24) 804 xorl 24(%esi),%eax 805 # in7 ^= *(uint32 *) (m + 28) 806 xorl 28(%esi),%ecx 807 # *(uint32 *) (out + 24) = in6 808 movl %eax,24(%edi) 809 # *(uint32 *) (out + 28) = in7 810 movl %ecx,28(%edi) 811 # in8 = x8 812 movl 132(%esp),%eax 813 # in9 = x9 814 movl 136(%esp),%ecx 815 # in8 += j8 816 addl 196(%esp),%eax 817 # in9 += j9 818 addl 200(%esp),%ecx 819 # in8 ^= *(uint32 *) (m + 32) 820 xorl 32(%esi),%eax 821 # in9 ^= *(uint32 *) (m + 36) 822 xorl 36(%esi),%ecx 823 # *(uint32 *) (out + 32) = in8 824 movl %eax,32(%edi) 825 # *(uint32 *) (out + 36) = in9 826 movl %ecx,36(%edi) 827 # in10 = x10 828 movl 140(%esp),%eax 829 # in11 = x11 830 movl 144(%esp),%ecx 831 # in10 += j10 832 addl 204(%esp),%eax 833 # in11 += j11 834 addl 208(%esp),%ecx 835 # in10 ^= *(uint32 *) (m + 40) 836 xorl 40(%esi),%eax 837 # in11 ^= *(uint32 *) (m + 44) 838 xorl 44(%esi),%ecx 839 # *(uint32 *) (out + 40) = in10 840 movl %eax,40(%edi) 841 # *(uint32 *) (out + 44) = in11 842 movl %ecx,44(%edi) 843 # in12 = x12 844 movl 148(%esp),%eax 845 # in13 = x13 846 movl 152(%esp),%ecx 847 # in12 += j12 848 addl 212(%esp),%eax 849 # in13 += j13 850 addl 216(%esp),%ecx 851 # in12 ^= *(uint32 *) (m + 48) 852 xorl 48(%esi),%eax 853 # in13 ^= *(uint32 *) (m + 52) 854 xorl 52(%esi),%ecx 855 # *(uint32 *) (out + 48) = in12 856 movl %eax,48(%edi) 857 # *(uint32 *) (out + 52) = in13 858 movl %ecx,52(%edi) 859 # in14 = x14 860 movl 156(%esp),%eax 861 # in15 = x15 862 movl 160(%esp),%ecx 863 # in14 += j14 864 addl 220(%esp),%eax 865 # in15 += j15 866 addl 224(%esp),%ecx 867 # in14 ^= *(uint32 *) (m + 56) 868 xorl 56(%esi),%eax 869 # in15 ^= *(uint32 *) (m + 60) 870 xorl 60(%esi),%ecx 871 # *(uint32 *) (out + 56) = in14 872 movl %eax,56(%edi) 873 # *(uint32 *) (out + 60) = in15 874 movl %ecx,60(%edi) 875 # bytes = bytes_backup 876 movl 76(%esp),%ebx 877 # in8 = j8 878 movl 196(%esp),%eax 879 # in9 = j9 880 movl 200(%esp),%ecx 881 # in8 += 1 882 add $1,%eax 883 # in9 += 0 + carry 884 adc $0,%ecx 885 # j8 = in8 886 movl %eax,196(%esp) 887 # j9 = in9 888 movl %ecx,200(%esp) 889 # bytes - 64 890 cmp $64,%ebx 891 # goto bytesatleast65 if unsigned> 892 ja ._bytesatleast65 893 # goto bytesatleast64 if unsigned>= 894 jae ._bytesatleast64 895 # m = out 896 mov %edi,%esi 897 # out = ctarget 898 movl 228(%esp),%edi 899 # i = bytes 900 mov %ebx,%ecx 901 # while (i) { *out++ = *m++; --i } 902 rep movsb 903._bytesatleast64: 904 # x = x_backup 905 movl 64(%esp),%eax 906 # in8 = j8 907 movl 196(%esp),%ecx 908 # in9 = j9 909 movl 200(%esp),%edx 910 # *(uint32 *) (x + 32) = in8 911 movl %ecx,32(%eax) 912 # *(uint32 *) (x + 36) = in9 913 movl %edx,36(%eax) 914._done: 915 # eax = eax_stack 916 movl 80(%esp),%eax 917 # ebx = ebx_stack 918 movl 84(%esp),%ebx 919 # esi = esi_stack 920 movl 88(%esp),%esi 921 # edi = edi_stack 922 movl 92(%esp),%edi 923 # ebp = ebp_stack 924 movl 96(%esp),%ebp 925 # leave 926 add %eax,%esp 927 ret 928._bytesatleast65: 929 # bytes -= 64 930 sub $64,%ebx 931 # out += 64 932 add $64,%edi 933 # m += 64 934 add $64,%esi 935 # goto bytesatleast1 936 jmp ._bytesatleast1 937ENDPROC(salsa20_encrypt_bytes) 938 939# enter salsa20_keysetup 940ENTRY(salsa20_keysetup) 941 mov %esp,%eax 942 and $31,%eax 943 add $256,%eax 944 sub %eax,%esp 945 # eax_stack = eax 946 movl %eax,64(%esp) 947 # ebx_stack = ebx 948 movl %ebx,68(%esp) 949 # esi_stack = esi 950 movl %esi,72(%esp) 951 # edi_stack = edi 952 movl %edi,76(%esp) 953 # ebp_stack = ebp 954 movl %ebp,80(%esp) 955 # k = arg2 956 movl 8(%esp,%eax),%ecx 957 # kbits = arg3 958 movl 12(%esp,%eax),%edx 959 # x = arg1 960 movl 4(%esp,%eax),%eax 961 # in1 = *(uint32 *) (k + 0) 962 movl 0(%ecx),%ebx 963 # in2 = *(uint32 *) (k + 4) 964 movl 4(%ecx),%esi 965 # in3 = *(uint32 *) (k + 8) 966 movl 8(%ecx),%edi 967 # in4 = *(uint32 *) (k + 12) 968 movl 12(%ecx),%ebp 969 # *(uint32 *) (x + 4) = in1 970 movl %ebx,4(%eax) 971 # *(uint32 *) (x + 8) = in2 972 movl %esi,8(%eax) 973 # *(uint32 *) (x + 12) = in3 974 movl %edi,12(%eax) 975 # *(uint32 *) (x + 16) = in4 976 movl %ebp,16(%eax) 977 # kbits - 256 978 cmp $256,%edx 979 # goto kbits128 if unsigned< 980 jb ._kbits128 981._kbits256: 982 # in11 = *(uint32 *) (k + 16) 983 movl 16(%ecx),%edx 984 # in12 = *(uint32 *) (k + 20) 985 movl 20(%ecx),%ebx 986 # in13 = *(uint32 *) (k + 24) 987 movl 24(%ecx),%esi 988 # in14 = *(uint32 *) (k + 28) 989 movl 28(%ecx),%ecx 990 # *(uint32 *) (x + 44) = in11 991 movl %edx,44(%eax) 992 # *(uint32 *) (x + 48) = in12 993 movl %ebx,48(%eax) 994 # *(uint32 *) (x + 52) = in13 995 movl %esi,52(%eax) 996 # *(uint32 *) (x + 56) = in14 997 movl %ecx,56(%eax) 998 # in0 = 1634760805 999 mov $1634760805,%ecx 1000 # in5 = 857760878 1001 mov $857760878,%edx 1002 # in10 = 2036477234 1003 mov $2036477234,%ebx 1004 # in15 = 1797285236 1005 mov $1797285236,%esi 1006 # *(uint32 *) (x + 0) = in0 1007 movl %ecx,0(%eax) 1008 # *(uint32 *) (x + 20) = in5 1009 movl %edx,20(%eax) 1010 # *(uint32 *) (x + 40) = in10 1011 movl %ebx,40(%eax) 1012 # *(uint32 *) (x + 60) = in15 1013 movl %esi,60(%eax) 1014 # goto keysetupdone 1015 jmp ._keysetupdone 1016._kbits128: 1017 # in11 = *(uint32 *) (k + 0) 1018 movl 0(%ecx),%edx 1019 # in12 = *(uint32 *) (k + 4) 1020 movl 4(%ecx),%ebx 1021 # in13 = *(uint32 *) (k + 8) 1022 movl 8(%ecx),%esi 1023 # in14 = *(uint32 *) (k + 12) 1024 movl 12(%ecx),%ecx 1025 # *(uint32 *) (x + 44) = in11 1026 movl %edx,44(%eax) 1027 # *(uint32 *) (x + 48) = in12 1028 movl %ebx,48(%eax) 1029 # *(uint32 *) (x + 52) = in13 1030 movl %esi,52(%eax) 1031 # *(uint32 *) (x + 56) = in14 1032 movl %ecx,56(%eax) 1033 # in0 = 1634760805 1034 mov $1634760805,%ecx 1035 # in5 = 824206446 1036 mov $824206446,%edx 1037 # in10 = 2036477238 1038 mov $2036477238,%ebx 1039 # in15 = 1797285236 1040 mov $1797285236,%esi 1041 # *(uint32 *) (x + 0) = in0 1042 movl %ecx,0(%eax) 1043 # *(uint32 *) (x + 20) = in5 1044 movl %edx,20(%eax) 1045 # *(uint32 *) (x + 40) = in10 1046 movl %ebx,40(%eax) 1047 # *(uint32 *) (x + 60) = in15 1048 movl %esi,60(%eax) 1049._keysetupdone: 1050 # eax = eax_stack 1051 movl 64(%esp),%eax 1052 # ebx = ebx_stack 1053 movl 68(%esp),%ebx 1054 # esi = esi_stack 1055 movl 72(%esp),%esi 1056 # edi = edi_stack 1057 movl 76(%esp),%edi 1058 # ebp = ebp_stack 1059 movl 80(%esp),%ebp 1060 # leave 1061 add %eax,%esp 1062 ret 1063ENDPROC(salsa20_keysetup) 1064 1065# enter salsa20_ivsetup 1066ENTRY(salsa20_ivsetup) 1067 mov %esp,%eax 1068 and $31,%eax 1069 add $256,%eax 1070 sub %eax,%esp 1071 # eax_stack = eax 1072 movl %eax,64(%esp) 1073 # ebx_stack = ebx 1074 movl %ebx,68(%esp) 1075 # esi_stack = esi 1076 movl %esi,72(%esp) 1077 # edi_stack = edi 1078 movl %edi,76(%esp) 1079 # ebp_stack = ebp 1080 movl %ebp,80(%esp) 1081 # iv = arg2 1082 movl 8(%esp,%eax),%ecx 1083 # x = arg1 1084 movl 4(%esp,%eax),%eax 1085 # in6 = *(uint32 *) (iv + 0) 1086 movl 0(%ecx),%edx 1087 # in7 = *(uint32 *) (iv + 4) 1088 movl 4(%ecx),%ecx 1089 # in8 = 0 1090 mov $0,%ebx 1091 # in9 = 0 1092 mov $0,%esi 1093 # *(uint32 *) (x + 24) = in6 1094 movl %edx,24(%eax) 1095 # *(uint32 *) (x + 28) = in7 1096 movl %ecx,28(%eax) 1097 # *(uint32 *) (x + 32) = in8 1098 movl %ebx,32(%eax) 1099 # *(uint32 *) (x + 36) = in9 1100 movl %esi,36(%eax) 1101 # eax = eax_stack 1102 movl 64(%esp),%eax 1103 # ebx = ebx_stack 1104 movl 68(%esp),%ebx 1105 # esi = esi_stack 1106 movl 72(%esp),%esi 1107 # edi = edi_stack 1108 movl 76(%esp),%edi 1109 # ebp = ebp_stack 1110 movl 80(%esp),%ebp 1111 # leave 1112 add %eax,%esp 1113 ret 1114ENDPROC(salsa20_ivsetup) 1115