1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4%ifdef __YASM_VERSION_ID__ 5%if __YASM_VERSION_ID__ < 01010000h 6%error yasm version 1.1.0 or later needed. 7%endif 8; Yasm automatically includes .00 and complains about redefining it. 9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 10%else 11$@feat.00 equ 1 12%endif 13section .text code align=64 14%else 15section .text code 16%endif 17;extern _OPENSSL_ia32cap_P 18global _bn_mul_add_words 19align 16 20_bn_mul_add_words: 21L$_bn_mul_add_words_begin: 22 lea eax,[_OPENSSL_ia32cap_P] 23 bt DWORD [eax],26 24 jnc NEAR L$000maw_non_sse2 25 mov eax,DWORD [4+esp] 26 mov edx,DWORD [8+esp] 27 mov ecx,DWORD [12+esp] 28 movd mm0,DWORD [16+esp] 29 pxor mm1,mm1 30 jmp NEAR L$001maw_sse2_entry 31align 16 32L$002maw_sse2_unrolled: 33 movd mm3,DWORD [eax] 34 paddq mm1,mm3 35 movd mm2,DWORD [edx] 36 pmuludq mm2,mm0 37 movd mm4,DWORD [4+edx] 38 pmuludq mm4,mm0 39 movd mm6,DWORD [8+edx] 40 pmuludq mm6,mm0 41 movd mm7,DWORD [12+edx] 42 pmuludq mm7,mm0 43 paddq mm1,mm2 44 movd mm3,DWORD [4+eax] 45 paddq mm3,mm4 46 movd mm5,DWORD [8+eax] 47 paddq mm5,mm6 48 movd mm4,DWORD [12+eax] 49 paddq mm7,mm4 50 movd DWORD [eax],mm1 51 movd mm2,DWORD [16+edx] 52 pmuludq mm2,mm0 53 psrlq mm1,32 54 movd mm4,DWORD [20+edx] 55 pmuludq mm4,mm0 56 paddq mm1,mm3 57 movd mm6,DWORD [24+edx] 58 pmuludq mm6,mm0 59 movd DWORD [4+eax],mm1 60 psrlq mm1,32 61 movd mm3,DWORD [28+edx] 62 add edx,32 63 pmuludq mm3,mm0 64 paddq mm1,mm5 65 movd mm5,DWORD [16+eax] 66 paddq mm2,mm5 67 movd DWORD [8+eax],mm1 68 psrlq mm1,32 69 paddq mm1,mm7 70 movd mm5,DWORD [20+eax] 71 paddq mm4,mm5 72 movd DWORD [12+eax],mm1 73 psrlq mm1,32 74 paddq mm1,mm2 75 movd mm5,DWORD [24+eax] 76 paddq mm6,mm5 77 movd DWORD [16+eax],mm1 78 psrlq mm1,32 79 paddq mm1,mm4 80 movd mm5,DWORD [28+eax] 81 paddq mm3,mm5 82 movd DWORD [20+eax],mm1 83 psrlq mm1,32 84 paddq mm1,mm6 85 movd DWORD [24+eax],mm1 86 psrlq mm1,32 87 paddq mm1,mm3 88 movd DWORD [28+eax],mm1 89 lea eax,[32+eax] 90 psrlq mm1,32 91 sub ecx,8 92 jz NEAR L$003maw_sse2_exit 93L$001maw_sse2_entry: 94 test ecx,4294967288 95 jnz NEAR L$002maw_sse2_unrolled 96align 4 97L$004maw_sse2_loop: 98 movd mm2,DWORD [edx] 99 movd mm3,DWORD [eax] 100 pmuludq mm2,mm0 101 lea edx,[4+edx] 102 paddq mm1,mm3 103 paddq mm1,mm2 104 movd DWORD [eax],mm1 105 sub ecx,1 106 psrlq mm1,32 107 lea eax,[4+eax] 108 jnz NEAR L$004maw_sse2_loop 109L$003maw_sse2_exit: 110 movd eax,mm1 111 emms 112 ret 113align 16 114L$000maw_non_sse2: 115 push ebp 116 push ebx 117 push esi 118 push edi 119 ; 120 xor esi,esi 121 mov edi,DWORD [20+esp] 122 mov ecx,DWORD [28+esp] 123 mov ebx,DWORD [24+esp] 124 and ecx,4294967288 125 mov ebp,DWORD [32+esp] 126 push ecx 127 jz NEAR L$005maw_finish 128align 16 129L$006maw_loop: 130 ; Round 0 131 mov eax,DWORD [ebx] 132 mul ebp 133 add eax,esi 134 adc edx,0 135 add eax,DWORD [edi] 136 adc edx,0 137 mov DWORD [edi],eax 138 mov esi,edx 139 ; Round 4 140 mov eax,DWORD [4+ebx] 141 mul ebp 142 add eax,esi 143 adc edx,0 144 add eax,DWORD [4+edi] 145 adc edx,0 146 mov DWORD [4+edi],eax 147 mov esi,edx 148 ; Round 8 149 mov eax,DWORD [8+ebx] 150 mul ebp 151 add eax,esi 152 adc edx,0 153 add eax,DWORD [8+edi] 154 adc edx,0 155 mov DWORD [8+edi],eax 156 mov esi,edx 157 ; Round 12 158 mov eax,DWORD [12+ebx] 159 mul ebp 160 add eax,esi 161 adc edx,0 162 add eax,DWORD [12+edi] 163 adc edx,0 164 mov DWORD [12+edi],eax 165 mov esi,edx 166 ; Round 16 167 mov eax,DWORD [16+ebx] 168 mul ebp 169 add eax,esi 170 adc edx,0 171 add eax,DWORD [16+edi] 172 adc edx,0 173 mov DWORD [16+edi],eax 174 mov esi,edx 175 ; Round 20 176 mov eax,DWORD [20+ebx] 177 mul ebp 178 add eax,esi 179 adc edx,0 180 add eax,DWORD [20+edi] 181 adc edx,0 182 mov DWORD [20+edi],eax 183 mov esi,edx 184 ; Round 24 185 mov eax,DWORD [24+ebx] 186 mul ebp 187 add eax,esi 188 adc edx,0 189 add eax,DWORD [24+edi] 190 adc edx,0 191 mov DWORD [24+edi],eax 192 mov esi,edx 193 ; Round 28 194 mov eax,DWORD [28+ebx] 195 mul ebp 196 add eax,esi 197 adc edx,0 198 add eax,DWORD [28+edi] 199 adc edx,0 200 mov DWORD [28+edi],eax 201 mov esi,edx 202 ; 203 sub ecx,8 204 lea ebx,[32+ebx] 205 lea edi,[32+edi] 206 jnz NEAR L$006maw_loop 207L$005maw_finish: 208 mov ecx,DWORD [32+esp] 209 and ecx,7 210 jnz NEAR L$007maw_finish2 211 jmp NEAR L$008maw_end 212L$007maw_finish2: 213 ; Tail Round 0 214 mov eax,DWORD [ebx] 215 mul ebp 216 add eax,esi 217 adc edx,0 218 add eax,DWORD [edi] 219 adc edx,0 220 dec ecx 221 mov DWORD [edi],eax 222 mov esi,edx 223 jz NEAR L$008maw_end 224 ; Tail Round 1 225 mov eax,DWORD [4+ebx] 226 mul ebp 227 add eax,esi 228 adc edx,0 229 add eax,DWORD [4+edi] 230 adc edx,0 231 dec ecx 232 mov DWORD [4+edi],eax 233 mov esi,edx 234 jz NEAR L$008maw_end 235 ; Tail Round 2 236 mov eax,DWORD [8+ebx] 237 mul ebp 238 add eax,esi 239 adc edx,0 240 add eax,DWORD [8+edi] 241 adc edx,0 242 dec ecx 243 mov DWORD [8+edi],eax 244 mov esi,edx 245 jz NEAR L$008maw_end 246 ; Tail Round 3 247 mov eax,DWORD [12+ebx] 248 mul ebp 249 add eax,esi 250 adc edx,0 251 add eax,DWORD [12+edi] 252 adc edx,0 253 dec ecx 254 mov DWORD [12+edi],eax 255 mov esi,edx 256 jz NEAR L$008maw_end 257 ; Tail Round 4 258 mov eax,DWORD [16+ebx] 259 mul ebp 260 add eax,esi 261 adc edx,0 262 add eax,DWORD [16+edi] 263 adc edx,0 264 dec ecx 265 mov DWORD [16+edi],eax 266 mov esi,edx 267 jz NEAR L$008maw_end 268 ; Tail Round 5 269 mov eax,DWORD [20+ebx] 270 mul ebp 271 add eax,esi 272 adc edx,0 273 add eax,DWORD [20+edi] 274 adc edx,0 275 dec ecx 276 mov DWORD [20+edi],eax 277 mov esi,edx 278 jz NEAR L$008maw_end 279 ; Tail Round 6 280 mov eax,DWORD [24+ebx] 281 mul ebp 282 add eax,esi 283 adc edx,0 284 add eax,DWORD [24+edi] 285 adc edx,0 286 mov DWORD [24+edi],eax 287 mov esi,edx 288L$008maw_end: 289 mov eax,esi 290 pop ecx 291 pop edi 292 pop esi 293 pop ebx 294 pop ebp 295 ret 296global _bn_mul_words 297align 16 298_bn_mul_words: 299L$_bn_mul_words_begin: 300 lea eax,[_OPENSSL_ia32cap_P] 301 bt DWORD [eax],26 302 jnc NEAR L$009mw_non_sse2 303 mov eax,DWORD [4+esp] 304 mov edx,DWORD [8+esp] 305 mov ecx,DWORD [12+esp] 306 movd mm0,DWORD [16+esp] 307 pxor mm1,mm1 308align 16 309L$010mw_sse2_loop: 310 movd mm2,DWORD [edx] 311 pmuludq mm2,mm0 312 lea edx,[4+edx] 313 paddq mm1,mm2 314 movd DWORD [eax],mm1 315 sub ecx,1 316 psrlq mm1,32 317 lea eax,[4+eax] 318 jnz NEAR L$010mw_sse2_loop 319 movd eax,mm1 320 emms 321 ret 322align 16 323L$009mw_non_sse2: 324 push ebp 325 push ebx 326 push esi 327 push edi 328 ; 329 xor esi,esi 330 mov edi,DWORD [20+esp] 331 mov ebx,DWORD [24+esp] 332 mov ebp,DWORD [28+esp] 333 mov ecx,DWORD [32+esp] 334 and ebp,4294967288 335 jz NEAR L$011mw_finish 336L$012mw_loop: 337 ; Round 0 338 mov eax,DWORD [ebx] 339 mul ecx 340 add eax,esi 341 adc edx,0 342 mov DWORD [edi],eax 343 mov esi,edx 344 ; Round 4 345 mov eax,DWORD [4+ebx] 346 mul ecx 347 add eax,esi 348 adc edx,0 349 mov DWORD [4+edi],eax 350 mov esi,edx 351 ; Round 8 352 mov eax,DWORD [8+ebx] 353 mul ecx 354 add eax,esi 355 adc edx,0 356 mov DWORD [8+edi],eax 357 mov esi,edx 358 ; Round 12 359 mov eax,DWORD [12+ebx] 360 mul ecx 361 add eax,esi 362 adc edx,0 363 mov DWORD [12+edi],eax 364 mov esi,edx 365 ; Round 16 366 mov eax,DWORD [16+ebx] 367 mul ecx 368 add eax,esi 369 adc edx,0 370 mov DWORD [16+edi],eax 371 mov esi,edx 372 ; Round 20 373 mov eax,DWORD [20+ebx] 374 mul ecx 375 add eax,esi 376 adc edx,0 377 mov DWORD [20+edi],eax 378 mov esi,edx 379 ; Round 24 380 mov eax,DWORD [24+ebx] 381 mul ecx 382 add eax,esi 383 adc edx,0 384 mov DWORD [24+edi],eax 385 mov esi,edx 386 ; Round 28 387 mov eax,DWORD [28+ebx] 388 mul ecx 389 add eax,esi 390 adc edx,0 391 mov DWORD [28+edi],eax 392 mov esi,edx 393 ; 394 add ebx,32 395 add edi,32 396 sub ebp,8 397 jz NEAR L$011mw_finish 398 jmp NEAR L$012mw_loop 399L$011mw_finish: 400 mov ebp,DWORD [28+esp] 401 and ebp,7 402 jnz NEAR L$013mw_finish2 403 jmp NEAR L$014mw_end 404L$013mw_finish2: 405 ; Tail Round 0 406 mov eax,DWORD [ebx] 407 mul ecx 408 add eax,esi 409 adc edx,0 410 mov DWORD [edi],eax 411 mov esi,edx 412 dec ebp 413 jz NEAR L$014mw_end 414 ; Tail Round 1 415 mov eax,DWORD [4+ebx] 416 mul ecx 417 add eax,esi 418 adc edx,0 419 mov DWORD [4+edi],eax 420 mov esi,edx 421 dec ebp 422 jz NEAR L$014mw_end 423 ; Tail Round 2 424 mov eax,DWORD [8+ebx] 425 mul ecx 426 add eax,esi 427 adc edx,0 428 mov DWORD [8+edi],eax 429 mov esi,edx 430 dec ebp 431 jz NEAR L$014mw_end 432 ; Tail Round 3 433 mov eax,DWORD [12+ebx] 434 mul ecx 435 add eax,esi 436 adc edx,0 437 mov DWORD [12+edi],eax 438 mov esi,edx 439 dec ebp 440 jz NEAR L$014mw_end 441 ; Tail Round 4 442 mov eax,DWORD [16+ebx] 443 mul ecx 444 add eax,esi 445 adc edx,0 446 mov DWORD [16+edi],eax 447 mov esi,edx 448 dec ebp 449 jz NEAR L$014mw_end 450 ; Tail Round 5 451 mov eax,DWORD [20+ebx] 452 mul ecx 453 add eax,esi 454 adc edx,0 455 mov DWORD [20+edi],eax 456 mov esi,edx 457 dec ebp 458 jz NEAR L$014mw_end 459 ; Tail Round 6 460 mov eax,DWORD [24+ebx] 461 mul ecx 462 add eax,esi 463 adc edx,0 464 mov DWORD [24+edi],eax 465 mov esi,edx 466L$014mw_end: 467 mov eax,esi 468 pop edi 469 pop esi 470 pop ebx 471 pop ebp 472 ret 473global _bn_sqr_words 474align 16 475_bn_sqr_words: 476L$_bn_sqr_words_begin: 477 lea eax,[_OPENSSL_ia32cap_P] 478 bt DWORD [eax],26 479 jnc NEAR L$015sqr_non_sse2 480 mov eax,DWORD [4+esp] 481 mov edx,DWORD [8+esp] 482 mov ecx,DWORD [12+esp] 483align 16 484L$016sqr_sse2_loop: 485 movd mm0,DWORD [edx] 486 pmuludq mm0,mm0 487 lea edx,[4+edx] 488 movq [eax],mm0 489 sub ecx,1 490 lea eax,[8+eax] 491 jnz NEAR L$016sqr_sse2_loop 492 emms 493 ret 494align 16 495L$015sqr_non_sse2: 496 push ebp 497 push ebx 498 push esi 499 push edi 500 ; 501 mov esi,DWORD [20+esp] 502 mov edi,DWORD [24+esp] 503 mov ebx,DWORD [28+esp] 504 and ebx,4294967288 505 jz NEAR L$017sw_finish 506L$018sw_loop: 507 ; Round 0 508 mov eax,DWORD [edi] 509 mul eax 510 mov DWORD [esi],eax 511 mov DWORD [4+esi],edx 512 ; Round 4 513 mov eax,DWORD [4+edi] 514 mul eax 515 mov DWORD [8+esi],eax 516 mov DWORD [12+esi],edx 517 ; Round 8 518 mov eax,DWORD [8+edi] 519 mul eax 520 mov DWORD [16+esi],eax 521 mov DWORD [20+esi],edx 522 ; Round 12 523 mov eax,DWORD [12+edi] 524 mul eax 525 mov DWORD [24+esi],eax 526 mov DWORD [28+esi],edx 527 ; Round 16 528 mov eax,DWORD [16+edi] 529 mul eax 530 mov DWORD [32+esi],eax 531 mov DWORD [36+esi],edx 532 ; Round 20 533 mov eax,DWORD [20+edi] 534 mul eax 535 mov DWORD [40+esi],eax 536 mov DWORD [44+esi],edx 537 ; Round 24 538 mov eax,DWORD [24+edi] 539 mul eax 540 mov DWORD [48+esi],eax 541 mov DWORD [52+esi],edx 542 ; Round 28 543 mov eax,DWORD [28+edi] 544 mul eax 545 mov DWORD [56+esi],eax 546 mov DWORD [60+esi],edx 547 ; 548 add edi,32 549 add esi,64 550 sub ebx,8 551 jnz NEAR L$018sw_loop 552L$017sw_finish: 553 mov ebx,DWORD [28+esp] 554 and ebx,7 555 jz NEAR L$019sw_end 556 ; Tail Round 0 557 mov eax,DWORD [edi] 558 mul eax 559 mov DWORD [esi],eax 560 dec ebx 561 mov DWORD [4+esi],edx 562 jz NEAR L$019sw_end 563 ; Tail Round 1 564 mov eax,DWORD [4+edi] 565 mul eax 566 mov DWORD [8+esi],eax 567 dec ebx 568 mov DWORD [12+esi],edx 569 jz NEAR L$019sw_end 570 ; Tail Round 2 571 mov eax,DWORD [8+edi] 572 mul eax 573 mov DWORD [16+esi],eax 574 dec ebx 575 mov DWORD [20+esi],edx 576 jz NEAR L$019sw_end 577 ; Tail Round 3 578 mov eax,DWORD [12+edi] 579 mul eax 580 mov DWORD [24+esi],eax 581 dec ebx 582 mov DWORD [28+esi],edx 583 jz NEAR L$019sw_end 584 ; Tail Round 4 585 mov eax,DWORD [16+edi] 586 mul eax 587 mov DWORD [32+esi],eax 588 dec ebx 589 mov DWORD [36+esi],edx 590 jz NEAR L$019sw_end 591 ; Tail Round 5 592 mov eax,DWORD [20+edi] 593 mul eax 594 mov DWORD [40+esi],eax 595 dec ebx 596 mov DWORD [44+esi],edx 597 jz NEAR L$019sw_end 598 ; Tail Round 6 599 mov eax,DWORD [24+edi] 600 mul eax 601 mov DWORD [48+esi],eax 602 mov DWORD [52+esi],edx 603L$019sw_end: 604 pop edi 605 pop esi 606 pop ebx 607 pop ebp 608 ret 609global _bn_div_words 610align 16 611_bn_div_words: 612L$_bn_div_words_begin: 613 mov edx,DWORD [4+esp] 614 mov eax,DWORD [8+esp] 615 mov ecx,DWORD [12+esp] 616 div ecx 617 ret 618global _bn_add_words 619align 16 620_bn_add_words: 621L$_bn_add_words_begin: 622 push ebp 623 push ebx 624 push esi 625 push edi 626 ; 627 mov ebx,DWORD [20+esp] 628 mov esi,DWORD [24+esp] 629 mov edi,DWORD [28+esp] 630 mov ebp,DWORD [32+esp] 631 xor eax,eax 632 and ebp,4294967288 633 jz NEAR L$020aw_finish 634L$021aw_loop: 635 ; Round 0 636 mov ecx,DWORD [esi] 637 mov edx,DWORD [edi] 638 add ecx,eax 639 mov eax,0 640 adc eax,eax 641 add ecx,edx 642 adc eax,0 643 mov DWORD [ebx],ecx 644 ; Round 1 645 mov ecx,DWORD [4+esi] 646 mov edx,DWORD [4+edi] 647 add ecx,eax 648 mov eax,0 649 adc eax,eax 650 add ecx,edx 651 adc eax,0 652 mov DWORD [4+ebx],ecx 653 ; Round 2 654 mov ecx,DWORD [8+esi] 655 mov edx,DWORD [8+edi] 656 add ecx,eax 657 mov eax,0 658 adc eax,eax 659 add ecx,edx 660 adc eax,0 661 mov DWORD [8+ebx],ecx 662 ; Round 3 663 mov ecx,DWORD [12+esi] 664 mov edx,DWORD [12+edi] 665 add ecx,eax 666 mov eax,0 667 adc eax,eax 668 add ecx,edx 669 adc eax,0 670 mov DWORD [12+ebx],ecx 671 ; Round 4 672 mov ecx,DWORD [16+esi] 673 mov edx,DWORD [16+edi] 674 add ecx,eax 675 mov eax,0 676 adc eax,eax 677 add ecx,edx 678 adc eax,0 679 mov DWORD [16+ebx],ecx 680 ; Round 5 681 mov ecx,DWORD [20+esi] 682 mov edx,DWORD [20+edi] 683 add ecx,eax 684 mov eax,0 685 adc eax,eax 686 add ecx,edx 687 adc eax,0 688 mov DWORD [20+ebx],ecx 689 ; Round 6 690 mov ecx,DWORD [24+esi] 691 mov edx,DWORD [24+edi] 692 add ecx,eax 693 mov eax,0 694 adc eax,eax 695 add ecx,edx 696 adc eax,0 697 mov DWORD [24+ebx],ecx 698 ; Round 7 699 mov ecx,DWORD [28+esi] 700 mov edx,DWORD [28+edi] 701 add ecx,eax 702 mov eax,0 703 adc eax,eax 704 add ecx,edx 705 adc eax,0 706 mov DWORD [28+ebx],ecx 707 ; 708 add esi,32 709 add edi,32 710 add ebx,32 711 sub ebp,8 712 jnz NEAR L$021aw_loop 713L$020aw_finish: 714 mov ebp,DWORD [32+esp] 715 and ebp,7 716 jz NEAR L$022aw_end 717 ; Tail Round 0 718 mov ecx,DWORD [esi] 719 mov edx,DWORD [edi] 720 add ecx,eax 721 mov eax,0 722 adc eax,eax 723 add ecx,edx 724 adc eax,0 725 dec ebp 726 mov DWORD [ebx],ecx 727 jz NEAR L$022aw_end 728 ; Tail Round 1 729 mov ecx,DWORD [4+esi] 730 mov edx,DWORD [4+edi] 731 add ecx,eax 732 mov eax,0 733 adc eax,eax 734 add ecx,edx 735 adc eax,0 736 dec ebp 737 mov DWORD [4+ebx],ecx 738 jz NEAR L$022aw_end 739 ; Tail Round 2 740 mov ecx,DWORD [8+esi] 741 mov edx,DWORD [8+edi] 742 add ecx,eax 743 mov eax,0 744 adc eax,eax 745 add ecx,edx 746 adc eax,0 747 dec ebp 748 mov DWORD [8+ebx],ecx 749 jz NEAR L$022aw_end 750 ; Tail Round 3 751 mov ecx,DWORD [12+esi] 752 mov edx,DWORD [12+edi] 753 add ecx,eax 754 mov eax,0 755 adc eax,eax 756 add ecx,edx 757 adc eax,0 758 dec ebp 759 mov DWORD [12+ebx],ecx 760 jz NEAR L$022aw_end 761 ; Tail Round 4 762 mov ecx,DWORD [16+esi] 763 mov edx,DWORD [16+edi] 764 add ecx,eax 765 mov eax,0 766 adc eax,eax 767 add ecx,edx 768 adc eax,0 769 dec ebp 770 mov DWORD [16+ebx],ecx 771 jz NEAR L$022aw_end 772 ; Tail Round 5 773 mov ecx,DWORD [20+esi] 774 mov edx,DWORD [20+edi] 775 add ecx,eax 776 mov eax,0 777 adc eax,eax 778 add ecx,edx 779 adc eax,0 780 dec ebp 781 mov DWORD [20+ebx],ecx 782 jz NEAR L$022aw_end 783 ; Tail Round 6 784 mov ecx,DWORD [24+esi] 785 mov edx,DWORD [24+edi] 786 add ecx,eax 787 mov eax,0 788 adc eax,eax 789 add ecx,edx 790 adc eax,0 791 mov DWORD [24+ebx],ecx 792L$022aw_end: 793 pop edi 794 pop esi 795 pop ebx 796 pop ebp 797 ret 798global _bn_sub_words 799align 16 800_bn_sub_words: 801L$_bn_sub_words_begin: 802 push ebp 803 push ebx 804 push esi 805 push edi 806 ; 807 mov ebx,DWORD [20+esp] 808 mov esi,DWORD [24+esp] 809 mov edi,DWORD [28+esp] 810 mov ebp,DWORD [32+esp] 811 xor eax,eax 812 and ebp,4294967288 813 jz NEAR L$023aw_finish 814L$024aw_loop: 815 ; Round 0 816 mov ecx,DWORD [esi] 817 mov edx,DWORD [edi] 818 sub ecx,eax 819 mov eax,0 820 adc eax,eax 821 sub ecx,edx 822 adc eax,0 823 mov DWORD [ebx],ecx 824 ; Round 1 825 mov ecx,DWORD [4+esi] 826 mov edx,DWORD [4+edi] 827 sub ecx,eax 828 mov eax,0 829 adc eax,eax 830 sub ecx,edx 831 adc eax,0 832 mov DWORD [4+ebx],ecx 833 ; Round 2 834 mov ecx,DWORD [8+esi] 835 mov edx,DWORD [8+edi] 836 sub ecx,eax 837 mov eax,0 838 adc eax,eax 839 sub ecx,edx 840 adc eax,0 841 mov DWORD [8+ebx],ecx 842 ; Round 3 843 mov ecx,DWORD [12+esi] 844 mov edx,DWORD [12+edi] 845 sub ecx,eax 846 mov eax,0 847 adc eax,eax 848 sub ecx,edx 849 adc eax,0 850 mov DWORD [12+ebx],ecx 851 ; Round 4 852 mov ecx,DWORD [16+esi] 853 mov edx,DWORD [16+edi] 854 sub ecx,eax 855 mov eax,0 856 adc eax,eax 857 sub ecx,edx 858 adc eax,0 859 mov DWORD [16+ebx],ecx 860 ; Round 5 861 mov ecx,DWORD [20+esi] 862 mov edx,DWORD [20+edi] 863 sub ecx,eax 864 mov eax,0 865 adc eax,eax 866 sub ecx,edx 867 adc eax,0 868 mov DWORD [20+ebx],ecx 869 ; Round 6 870 mov ecx,DWORD [24+esi] 871 mov edx,DWORD [24+edi] 872 sub ecx,eax 873 mov eax,0 874 adc eax,eax 875 sub ecx,edx 876 adc eax,0 877 mov DWORD [24+ebx],ecx 878 ; Round 7 879 mov ecx,DWORD [28+esi] 880 mov edx,DWORD [28+edi] 881 sub ecx,eax 882 mov eax,0 883 adc eax,eax 884 sub ecx,edx 885 adc eax,0 886 mov DWORD [28+ebx],ecx 887 ; 888 add esi,32 889 add edi,32 890 add ebx,32 891 sub ebp,8 892 jnz NEAR L$024aw_loop 893L$023aw_finish: 894 mov ebp,DWORD [32+esp] 895 and ebp,7 896 jz NEAR L$025aw_end 897 ; Tail Round 0 898 mov ecx,DWORD [esi] 899 mov edx,DWORD [edi] 900 sub ecx,eax 901 mov eax,0 902 adc eax,eax 903 sub ecx,edx 904 adc eax,0 905 dec ebp 906 mov DWORD [ebx],ecx 907 jz NEAR L$025aw_end 908 ; Tail Round 1 909 mov ecx,DWORD [4+esi] 910 mov edx,DWORD [4+edi] 911 sub ecx,eax 912 mov eax,0 913 adc eax,eax 914 sub ecx,edx 915 adc eax,0 916 dec ebp 917 mov DWORD [4+ebx],ecx 918 jz NEAR L$025aw_end 919 ; Tail Round 2 920 mov ecx,DWORD [8+esi] 921 mov edx,DWORD [8+edi] 922 sub ecx,eax 923 mov eax,0 924 adc eax,eax 925 sub ecx,edx 926 adc eax,0 927 dec ebp 928 mov DWORD [8+ebx],ecx 929 jz NEAR L$025aw_end 930 ; Tail Round 3 931 mov ecx,DWORD [12+esi] 932 mov edx,DWORD [12+edi] 933 sub ecx,eax 934 mov eax,0 935 adc eax,eax 936 sub ecx,edx 937 adc eax,0 938 dec ebp 939 mov DWORD [12+ebx],ecx 940 jz NEAR L$025aw_end 941 ; Tail Round 4 942 mov ecx,DWORD [16+esi] 943 mov edx,DWORD [16+edi] 944 sub ecx,eax 945 mov eax,0 946 adc eax,eax 947 sub ecx,edx 948 adc eax,0 949 dec ebp 950 mov DWORD [16+ebx],ecx 951 jz NEAR L$025aw_end 952 ; Tail Round 5 953 mov ecx,DWORD [20+esi] 954 mov edx,DWORD [20+edi] 955 sub ecx,eax 956 mov eax,0 957 adc eax,eax 958 sub ecx,edx 959 adc eax,0 960 dec ebp 961 mov DWORD [20+ebx],ecx 962 jz NEAR L$025aw_end 963 ; Tail Round 6 964 mov ecx,DWORD [24+esi] 965 mov edx,DWORD [24+edi] 966 sub ecx,eax 967 mov eax,0 968 adc eax,eax 969 sub ecx,edx 970 adc eax,0 971 mov DWORD [24+ebx],ecx 972L$025aw_end: 973 pop edi 974 pop esi 975 pop ebx 976 pop ebp 977 ret 978global _bn_sub_part_words 979align 16 980_bn_sub_part_words: 981L$_bn_sub_part_words_begin: 982 push ebp 983 push ebx 984 push esi 985 push edi 986 ; 987 mov ebx,DWORD [20+esp] 988 mov esi,DWORD [24+esp] 989 mov edi,DWORD [28+esp] 990 mov ebp,DWORD [32+esp] 991 xor eax,eax 992 and ebp,4294967288 993 jz NEAR L$026aw_finish 994L$027aw_loop: 995 ; Round 0 996 mov ecx,DWORD [esi] 997 mov edx,DWORD [edi] 998 sub ecx,eax 999 mov eax,0 1000 adc eax,eax 1001 sub ecx,edx 1002 adc eax,0 1003 mov DWORD [ebx],ecx 1004 ; Round 1 1005 mov ecx,DWORD [4+esi] 1006 mov edx,DWORD [4+edi] 1007 sub ecx,eax 1008 mov eax,0 1009 adc eax,eax 1010 sub ecx,edx 1011 adc eax,0 1012 mov DWORD [4+ebx],ecx 1013 ; Round 2 1014 mov ecx,DWORD [8+esi] 1015 mov edx,DWORD [8+edi] 1016 sub ecx,eax 1017 mov eax,0 1018 adc eax,eax 1019 sub ecx,edx 1020 adc eax,0 1021 mov DWORD [8+ebx],ecx 1022 ; Round 3 1023 mov ecx,DWORD [12+esi] 1024 mov edx,DWORD [12+edi] 1025 sub ecx,eax 1026 mov eax,0 1027 adc eax,eax 1028 sub ecx,edx 1029 adc eax,0 1030 mov DWORD [12+ebx],ecx 1031 ; Round 4 1032 mov ecx,DWORD [16+esi] 1033 mov edx,DWORD [16+edi] 1034 sub ecx,eax 1035 mov eax,0 1036 adc eax,eax 1037 sub ecx,edx 1038 adc eax,0 1039 mov DWORD [16+ebx],ecx 1040 ; Round 5 1041 mov ecx,DWORD [20+esi] 1042 mov edx,DWORD [20+edi] 1043 sub ecx,eax 1044 mov eax,0 1045 adc eax,eax 1046 sub ecx,edx 1047 adc eax,0 1048 mov DWORD [20+ebx],ecx 1049 ; Round 6 1050 mov ecx,DWORD [24+esi] 1051 mov edx,DWORD [24+edi] 1052 sub ecx,eax 1053 mov eax,0 1054 adc eax,eax 1055 sub ecx,edx 1056 adc eax,0 1057 mov DWORD [24+ebx],ecx 1058 ; Round 7 1059 mov ecx,DWORD [28+esi] 1060 mov edx,DWORD [28+edi] 1061 sub ecx,eax 1062 mov eax,0 1063 adc eax,eax 1064 sub ecx,edx 1065 adc eax,0 1066 mov DWORD [28+ebx],ecx 1067 ; 1068 add esi,32 1069 add edi,32 1070 add ebx,32 1071 sub ebp,8 1072 jnz NEAR L$027aw_loop 1073L$026aw_finish: 1074 mov ebp,DWORD [32+esp] 1075 and ebp,7 1076 jz NEAR L$028aw_end 1077 ; Tail Round 0 1078 mov ecx,DWORD [esi] 1079 mov edx,DWORD [edi] 1080 sub ecx,eax 1081 mov eax,0 1082 adc eax,eax 1083 sub ecx,edx 1084 adc eax,0 1085 mov DWORD [ebx],ecx 1086 add esi,4 1087 add edi,4 1088 add ebx,4 1089 dec ebp 1090 jz NEAR L$028aw_end 1091 ; Tail Round 1 1092 mov ecx,DWORD [esi] 1093 mov edx,DWORD [edi] 1094 sub ecx,eax 1095 mov eax,0 1096 adc eax,eax 1097 sub ecx,edx 1098 adc eax,0 1099 mov DWORD [ebx],ecx 1100 add esi,4 1101 add edi,4 1102 add ebx,4 1103 dec ebp 1104 jz NEAR L$028aw_end 1105 ; Tail Round 2 1106 mov ecx,DWORD [esi] 1107 mov edx,DWORD [edi] 1108 sub ecx,eax 1109 mov eax,0 1110 adc eax,eax 1111 sub ecx,edx 1112 adc eax,0 1113 mov DWORD [ebx],ecx 1114 add esi,4 1115 add edi,4 1116 add ebx,4 1117 dec ebp 1118 jz NEAR L$028aw_end 1119 ; Tail Round 3 1120 mov ecx,DWORD [esi] 1121 mov edx,DWORD [edi] 1122 sub ecx,eax 1123 mov eax,0 1124 adc eax,eax 1125 sub ecx,edx 1126 adc eax,0 1127 mov DWORD [ebx],ecx 1128 add esi,4 1129 add edi,4 1130 add ebx,4 1131 dec ebp 1132 jz NEAR L$028aw_end 1133 ; Tail Round 4 1134 mov ecx,DWORD [esi] 1135 mov edx,DWORD [edi] 1136 sub ecx,eax 1137 mov eax,0 1138 adc eax,eax 1139 sub ecx,edx 1140 adc eax,0 1141 mov DWORD [ebx],ecx 1142 add esi,4 1143 add edi,4 1144 add ebx,4 1145 dec ebp 1146 jz NEAR L$028aw_end 1147 ; Tail Round 5 1148 mov ecx,DWORD [esi] 1149 mov edx,DWORD [edi] 1150 sub ecx,eax 1151 mov eax,0 1152 adc eax,eax 1153 sub ecx,edx 1154 adc eax,0 1155 mov DWORD [ebx],ecx 1156 add esi,4 1157 add edi,4 1158 add ebx,4 1159 dec ebp 1160 jz NEAR L$028aw_end 1161 ; Tail Round 6 1162 mov ecx,DWORD [esi] 1163 mov edx,DWORD [edi] 1164 sub ecx,eax 1165 mov eax,0 1166 adc eax,eax 1167 sub ecx,edx 1168 adc eax,0 1169 mov DWORD [ebx],ecx 1170 add esi,4 1171 add edi,4 1172 add ebx,4 1173L$028aw_end: 1174 cmp DWORD [36+esp],0 1175 je NEAR L$029pw_end 1176 mov ebp,DWORD [36+esp] 1177 cmp ebp,0 1178 je NEAR L$029pw_end 1179 jge NEAR L$030pw_pos 1180 ; pw_neg 1181 mov edx,0 1182 sub edx,ebp 1183 mov ebp,edx 1184 and ebp,4294967288 1185 jz NEAR L$031pw_neg_finish 1186L$032pw_neg_loop: 1187 ; dl<0 Round 0 1188 mov ecx,0 1189 mov edx,DWORD [edi] 1190 sub ecx,eax 1191 mov eax,0 1192 adc eax,eax 1193 sub ecx,edx 1194 adc eax,0 1195 mov DWORD [ebx],ecx 1196 ; dl<0 Round 1 1197 mov ecx,0 1198 mov edx,DWORD [4+edi] 1199 sub ecx,eax 1200 mov eax,0 1201 adc eax,eax 1202 sub ecx,edx 1203 adc eax,0 1204 mov DWORD [4+ebx],ecx 1205 ; dl<0 Round 2 1206 mov ecx,0 1207 mov edx,DWORD [8+edi] 1208 sub ecx,eax 1209 mov eax,0 1210 adc eax,eax 1211 sub ecx,edx 1212 adc eax,0 1213 mov DWORD [8+ebx],ecx 1214 ; dl<0 Round 3 1215 mov ecx,0 1216 mov edx,DWORD [12+edi] 1217 sub ecx,eax 1218 mov eax,0 1219 adc eax,eax 1220 sub ecx,edx 1221 adc eax,0 1222 mov DWORD [12+ebx],ecx 1223 ; dl<0 Round 4 1224 mov ecx,0 1225 mov edx,DWORD [16+edi] 1226 sub ecx,eax 1227 mov eax,0 1228 adc eax,eax 1229 sub ecx,edx 1230 adc eax,0 1231 mov DWORD [16+ebx],ecx 1232 ; dl<0 Round 5 1233 mov ecx,0 1234 mov edx,DWORD [20+edi] 1235 sub ecx,eax 1236 mov eax,0 1237 adc eax,eax 1238 sub ecx,edx 1239 adc eax,0 1240 mov DWORD [20+ebx],ecx 1241 ; dl<0 Round 6 1242 mov ecx,0 1243 mov edx,DWORD [24+edi] 1244 sub ecx,eax 1245 mov eax,0 1246 adc eax,eax 1247 sub ecx,edx 1248 adc eax,0 1249 mov DWORD [24+ebx],ecx 1250 ; dl<0 Round 7 1251 mov ecx,0 1252 mov edx,DWORD [28+edi] 1253 sub ecx,eax 1254 mov eax,0 1255 adc eax,eax 1256 sub ecx,edx 1257 adc eax,0 1258 mov DWORD [28+ebx],ecx 1259 ; 1260 add edi,32 1261 add ebx,32 1262 sub ebp,8 1263 jnz NEAR L$032pw_neg_loop 1264L$031pw_neg_finish: 1265 mov edx,DWORD [36+esp] 1266 mov ebp,0 1267 sub ebp,edx 1268 and ebp,7 1269 jz NEAR L$029pw_end 1270 ; dl<0 Tail Round 0 1271 mov ecx,0 1272 mov edx,DWORD [edi] 1273 sub ecx,eax 1274 mov eax,0 1275 adc eax,eax 1276 sub ecx,edx 1277 adc eax,0 1278 dec ebp 1279 mov DWORD [ebx],ecx 1280 jz NEAR L$029pw_end 1281 ; dl<0 Tail Round 1 1282 mov ecx,0 1283 mov edx,DWORD [4+edi] 1284 sub ecx,eax 1285 mov eax,0 1286 adc eax,eax 1287 sub ecx,edx 1288 adc eax,0 1289 dec ebp 1290 mov DWORD [4+ebx],ecx 1291 jz NEAR L$029pw_end 1292 ; dl<0 Tail Round 2 1293 mov ecx,0 1294 mov edx,DWORD [8+edi] 1295 sub ecx,eax 1296 mov eax,0 1297 adc eax,eax 1298 sub ecx,edx 1299 adc eax,0 1300 dec ebp 1301 mov DWORD [8+ebx],ecx 1302 jz NEAR L$029pw_end 1303 ; dl<0 Tail Round 3 1304 mov ecx,0 1305 mov edx,DWORD [12+edi] 1306 sub ecx,eax 1307 mov eax,0 1308 adc eax,eax 1309 sub ecx,edx 1310 adc eax,0 1311 dec ebp 1312 mov DWORD [12+ebx],ecx 1313 jz NEAR L$029pw_end 1314 ; dl<0 Tail Round 4 1315 mov ecx,0 1316 mov edx,DWORD [16+edi] 1317 sub ecx,eax 1318 mov eax,0 1319 adc eax,eax 1320 sub ecx,edx 1321 adc eax,0 1322 dec ebp 1323 mov DWORD [16+ebx],ecx 1324 jz NEAR L$029pw_end 1325 ; dl<0 Tail Round 5 1326 mov ecx,0 1327 mov edx,DWORD [20+edi] 1328 sub ecx,eax 1329 mov eax,0 1330 adc eax,eax 1331 sub ecx,edx 1332 adc eax,0 1333 dec ebp 1334 mov DWORD [20+ebx],ecx 1335 jz NEAR L$029pw_end 1336 ; dl<0 Tail Round 6 1337 mov ecx,0 1338 mov edx,DWORD [24+edi] 1339 sub ecx,eax 1340 mov eax,0 1341 adc eax,eax 1342 sub ecx,edx 1343 adc eax,0 1344 mov DWORD [24+ebx],ecx 1345 jmp NEAR L$029pw_end 1346L$030pw_pos: 1347 and ebp,4294967288 1348 jz NEAR L$033pw_pos_finish 1349L$034pw_pos_loop: 1350 ; dl>0 Round 0 1351 mov ecx,DWORD [esi] 1352 sub ecx,eax 1353 mov DWORD [ebx],ecx 1354 jnc NEAR L$035pw_nc0 1355 ; dl>0 Round 1 1356 mov ecx,DWORD [4+esi] 1357 sub ecx,eax 1358 mov DWORD [4+ebx],ecx 1359 jnc NEAR L$036pw_nc1 1360 ; dl>0 Round 2 1361 mov ecx,DWORD [8+esi] 1362 sub ecx,eax 1363 mov DWORD [8+ebx],ecx 1364 jnc NEAR L$037pw_nc2 1365 ; dl>0 Round 3 1366 mov ecx,DWORD [12+esi] 1367 sub ecx,eax 1368 mov DWORD [12+ebx],ecx 1369 jnc NEAR L$038pw_nc3 1370 ; dl>0 Round 4 1371 mov ecx,DWORD [16+esi] 1372 sub ecx,eax 1373 mov DWORD [16+ebx],ecx 1374 jnc NEAR L$039pw_nc4 1375 ; dl>0 Round 5 1376 mov ecx,DWORD [20+esi] 1377 sub ecx,eax 1378 mov DWORD [20+ebx],ecx 1379 jnc NEAR L$040pw_nc5 1380 ; dl>0 Round 6 1381 mov ecx,DWORD [24+esi] 1382 sub ecx,eax 1383 mov DWORD [24+ebx],ecx 1384 jnc NEAR L$041pw_nc6 1385 ; dl>0 Round 7 1386 mov ecx,DWORD [28+esi] 1387 sub ecx,eax 1388 mov DWORD [28+ebx],ecx 1389 jnc NEAR L$042pw_nc7 1390 ; 1391 add esi,32 1392 add ebx,32 1393 sub ebp,8 1394 jnz NEAR L$034pw_pos_loop 1395L$033pw_pos_finish: 1396 mov ebp,DWORD [36+esp] 1397 and ebp,7 1398 jz NEAR L$029pw_end 1399 ; dl>0 Tail Round 0 1400 mov ecx,DWORD [esi] 1401 sub ecx,eax 1402 mov DWORD [ebx],ecx 1403 jnc NEAR L$043pw_tail_nc0 1404 dec ebp 1405 jz NEAR L$029pw_end 1406 ; dl>0 Tail Round 1 1407 mov ecx,DWORD [4+esi] 1408 sub ecx,eax 1409 mov DWORD [4+ebx],ecx 1410 jnc NEAR L$044pw_tail_nc1 1411 dec ebp 1412 jz NEAR L$029pw_end 1413 ; dl>0 Tail Round 2 1414 mov ecx,DWORD [8+esi] 1415 sub ecx,eax 1416 mov DWORD [8+ebx],ecx 1417 jnc NEAR L$045pw_tail_nc2 1418 dec ebp 1419 jz NEAR L$029pw_end 1420 ; dl>0 Tail Round 3 1421 mov ecx,DWORD [12+esi] 1422 sub ecx,eax 1423 mov DWORD [12+ebx],ecx 1424 jnc NEAR L$046pw_tail_nc3 1425 dec ebp 1426 jz NEAR L$029pw_end 1427 ; dl>0 Tail Round 4 1428 mov ecx,DWORD [16+esi] 1429 sub ecx,eax 1430 mov DWORD [16+ebx],ecx 1431 jnc NEAR L$047pw_tail_nc4 1432 dec ebp 1433 jz NEAR L$029pw_end 1434 ; dl>0 Tail Round 5 1435 mov ecx,DWORD [20+esi] 1436 sub ecx,eax 1437 mov DWORD [20+ebx],ecx 1438 jnc NEAR L$048pw_tail_nc5 1439 dec ebp 1440 jz NEAR L$029pw_end 1441 ; dl>0 Tail Round 6 1442 mov ecx,DWORD [24+esi] 1443 sub ecx,eax 1444 mov DWORD [24+ebx],ecx 1445 jnc NEAR L$049pw_tail_nc6 1446 mov eax,1 1447 jmp NEAR L$029pw_end 1448L$050pw_nc_loop: 1449 mov ecx,DWORD [esi] 1450 mov DWORD [ebx],ecx 1451L$035pw_nc0: 1452 mov ecx,DWORD [4+esi] 1453 mov DWORD [4+ebx],ecx 1454L$036pw_nc1: 1455 mov ecx,DWORD [8+esi] 1456 mov DWORD [8+ebx],ecx 1457L$037pw_nc2: 1458 mov ecx,DWORD [12+esi] 1459 mov DWORD [12+ebx],ecx 1460L$038pw_nc3: 1461 mov ecx,DWORD [16+esi] 1462 mov DWORD [16+ebx],ecx 1463L$039pw_nc4: 1464 mov ecx,DWORD [20+esi] 1465 mov DWORD [20+ebx],ecx 1466L$040pw_nc5: 1467 mov ecx,DWORD [24+esi] 1468 mov DWORD [24+ebx],ecx 1469L$041pw_nc6: 1470 mov ecx,DWORD [28+esi] 1471 mov DWORD [28+ebx],ecx 1472L$042pw_nc7: 1473 ; 1474 add esi,32 1475 add ebx,32 1476 sub ebp,8 1477 jnz NEAR L$050pw_nc_loop 1478 mov ebp,DWORD [36+esp] 1479 and ebp,7 1480 jz NEAR L$051pw_nc_end 1481 mov ecx,DWORD [esi] 1482 mov DWORD [ebx],ecx 1483L$043pw_tail_nc0: 1484 dec ebp 1485 jz NEAR L$051pw_nc_end 1486 mov ecx,DWORD [4+esi] 1487 mov DWORD [4+ebx],ecx 1488L$044pw_tail_nc1: 1489 dec ebp 1490 jz NEAR L$051pw_nc_end 1491 mov ecx,DWORD [8+esi] 1492 mov DWORD [8+ebx],ecx 1493L$045pw_tail_nc2: 1494 dec ebp 1495 jz NEAR L$051pw_nc_end 1496 mov ecx,DWORD [12+esi] 1497 mov DWORD [12+ebx],ecx 1498L$046pw_tail_nc3: 1499 dec ebp 1500 jz NEAR L$051pw_nc_end 1501 mov ecx,DWORD [16+esi] 1502 mov DWORD [16+ebx],ecx 1503L$047pw_tail_nc4: 1504 dec ebp 1505 jz NEAR L$051pw_nc_end 1506 mov ecx,DWORD [20+esi] 1507 mov DWORD [20+ebx],ecx 1508L$048pw_tail_nc5: 1509 dec ebp 1510 jz NEAR L$051pw_nc_end 1511 mov ecx,DWORD [24+esi] 1512 mov DWORD [24+ebx],ecx 1513L$049pw_tail_nc6: 1514L$051pw_nc_end: 1515 mov eax,0 1516L$029pw_end: 1517 pop edi 1518 pop esi 1519 pop ebx 1520 pop ebp 1521 ret 1522segment .bss 1523common _OPENSSL_ia32cap_P 16 1524