1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4%ifdef __YASM_VERSION_ID__ 5%if __YASM_VERSION_ID__ < 01010000h 6%error yasm version 1.1.0 or later needed. 7%endif 8; Yasm automatically includes .00 and complains about redefining it. 9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 10%else 11$@feat.00 equ 1 12%endif 13section .text code align=64 14%else 15section .text code 16%endif 17global _bn_mul_comba8 18align 16 19_bn_mul_comba8: 20L$_bn_mul_comba8_begin: 21 push esi 22 mov esi,DWORD [12+esp] 23 push edi 24 mov edi,DWORD [20+esp] 25 push ebp 26 push ebx 27 xor ebx,ebx 28 mov eax,DWORD [esi] 29 xor ecx,ecx 30 mov edx,DWORD [edi] 31 ; ################## Calculate word 0 32 xor ebp,ebp 33 ; mul a[0]*b[0] 34 mul edx 35 add ebx,eax 36 mov eax,DWORD [20+esp] 37 adc ecx,edx 38 mov edx,DWORD [edi] 39 adc ebp,0 40 mov DWORD [eax],ebx 41 mov eax,DWORD [4+esi] 42 ; saved r[0] 43 ; ################## Calculate word 1 44 xor ebx,ebx 45 ; mul a[1]*b[0] 46 mul edx 47 add ecx,eax 48 mov eax,DWORD [esi] 49 adc ebp,edx 50 mov edx,DWORD [4+edi] 51 adc ebx,0 52 ; mul a[0]*b[1] 53 mul edx 54 add ecx,eax 55 mov eax,DWORD [20+esp] 56 adc ebp,edx 57 mov edx,DWORD [edi] 58 adc ebx,0 59 mov DWORD [4+eax],ecx 60 mov eax,DWORD [8+esi] 61 ; saved r[1] 62 ; ################## Calculate word 2 63 xor ecx,ecx 64 ; mul a[2]*b[0] 65 mul edx 66 add ebp,eax 67 mov eax,DWORD [4+esi] 68 adc ebx,edx 69 mov edx,DWORD [4+edi] 70 adc ecx,0 71 ; mul a[1]*b[1] 72 mul edx 73 add ebp,eax 74 mov eax,DWORD [esi] 75 adc ebx,edx 76 mov edx,DWORD [8+edi] 77 adc ecx,0 78 ; mul a[0]*b[2] 79 mul edx 80 add ebp,eax 81 mov eax,DWORD [20+esp] 82 adc ebx,edx 83 mov edx,DWORD [edi] 84 adc ecx,0 85 mov DWORD [8+eax],ebp 86 mov eax,DWORD [12+esi] 87 ; saved r[2] 88 ; ################## Calculate word 3 89 xor ebp,ebp 90 ; mul a[3]*b[0] 91 mul edx 92 add ebx,eax 93 mov eax,DWORD [8+esi] 94 adc ecx,edx 95 mov edx,DWORD [4+edi] 96 adc ebp,0 97 ; mul a[2]*b[1] 98 mul edx 99 add ebx,eax 100 mov eax,DWORD [4+esi] 101 adc ecx,edx 102 mov edx,DWORD [8+edi] 103 adc ebp,0 104 ; mul a[1]*b[2] 105 mul edx 106 add ebx,eax 107 mov eax,DWORD [esi] 108 adc ecx,edx 109 mov edx,DWORD [12+edi] 110 adc ebp,0 111 ; mul a[0]*b[3] 112 mul edx 113 add ebx,eax 114 mov eax,DWORD [20+esp] 115 adc ecx,edx 116 mov edx,DWORD [edi] 117 adc ebp,0 118 mov DWORD [12+eax],ebx 119 mov eax,DWORD [16+esi] 120 ; saved r[3] 121 ; ################## Calculate word 4 122 xor ebx,ebx 123 ; mul a[4]*b[0] 124 mul edx 125 add ecx,eax 126 mov eax,DWORD [12+esi] 127 adc ebp,edx 128 mov edx,DWORD [4+edi] 129 adc ebx,0 130 ; mul a[3]*b[1] 131 mul edx 132 add ecx,eax 133 mov eax,DWORD [8+esi] 134 adc ebp,edx 135 mov edx,DWORD [8+edi] 136 adc ebx,0 137 ; mul a[2]*b[2] 138 mul edx 139 add ecx,eax 140 mov eax,DWORD [4+esi] 141 adc ebp,edx 142 mov edx,DWORD [12+edi] 143 adc ebx,0 144 ; mul a[1]*b[3] 145 mul edx 146 add ecx,eax 147 mov eax,DWORD [esi] 148 adc ebp,edx 149 mov edx,DWORD [16+edi] 150 adc ebx,0 151 ; mul a[0]*b[4] 152 mul edx 153 add ecx,eax 154 mov eax,DWORD [20+esp] 155 adc ebp,edx 156 mov edx,DWORD [edi] 157 adc ebx,0 158 mov DWORD [16+eax],ecx 159 mov eax,DWORD [20+esi] 160 ; saved r[4] 161 ; ################## Calculate word 5 162 xor ecx,ecx 163 ; mul a[5]*b[0] 164 mul edx 165 add ebp,eax 166 mov eax,DWORD [16+esi] 167 adc ebx,edx 168 mov edx,DWORD [4+edi] 169 adc ecx,0 170 ; mul a[4]*b[1] 171 mul edx 172 add ebp,eax 173 mov eax,DWORD [12+esi] 174 adc ebx,edx 175 mov edx,DWORD [8+edi] 176 adc ecx,0 177 ; mul a[3]*b[2] 178 mul edx 179 add ebp,eax 180 mov eax,DWORD [8+esi] 181 adc ebx,edx 182 mov edx,DWORD [12+edi] 183 adc ecx,0 184 ; mul a[2]*b[3] 185 mul edx 186 add ebp,eax 187 mov eax,DWORD [4+esi] 188 adc ebx,edx 189 mov edx,DWORD [16+edi] 190 adc ecx,0 191 ; mul a[1]*b[4] 192 mul edx 193 add ebp,eax 194 mov eax,DWORD [esi] 195 adc ebx,edx 196 mov edx,DWORD [20+edi] 197 adc ecx,0 198 ; mul a[0]*b[5] 199 mul edx 200 add ebp,eax 201 mov eax,DWORD [20+esp] 202 adc ebx,edx 203 mov edx,DWORD [edi] 204 adc ecx,0 205 mov DWORD [20+eax],ebp 206 mov eax,DWORD [24+esi] 207 ; saved r[5] 208 ; ################## Calculate word 6 209 xor ebp,ebp 210 ; mul a[6]*b[0] 211 mul edx 212 add ebx,eax 213 mov eax,DWORD [20+esi] 214 adc ecx,edx 215 mov edx,DWORD [4+edi] 216 adc ebp,0 217 ; mul a[5]*b[1] 218 mul edx 219 add ebx,eax 220 mov eax,DWORD [16+esi] 221 adc ecx,edx 222 mov edx,DWORD [8+edi] 223 adc ebp,0 224 ; mul a[4]*b[2] 225 mul edx 226 add ebx,eax 227 mov eax,DWORD [12+esi] 228 adc ecx,edx 229 mov edx,DWORD [12+edi] 230 adc ebp,0 231 ; mul a[3]*b[3] 232 mul edx 233 add ebx,eax 234 mov eax,DWORD [8+esi] 235 adc ecx,edx 236 mov edx,DWORD [16+edi] 237 adc ebp,0 238 ; mul a[2]*b[4] 239 mul edx 240 add ebx,eax 241 mov eax,DWORD [4+esi] 242 adc ecx,edx 243 mov edx,DWORD [20+edi] 244 adc ebp,0 245 ; mul a[1]*b[5] 246 mul edx 247 add ebx,eax 248 mov eax,DWORD [esi] 249 adc ecx,edx 250 mov edx,DWORD [24+edi] 251 adc ebp,0 252 ; mul a[0]*b[6] 253 mul edx 254 add ebx,eax 255 mov eax,DWORD [20+esp] 256 adc ecx,edx 257 mov edx,DWORD [edi] 258 adc ebp,0 259 mov DWORD [24+eax],ebx 260 mov eax,DWORD [28+esi] 261 ; saved r[6] 262 ; ################## Calculate word 7 263 xor ebx,ebx 264 ; mul a[7]*b[0] 265 mul edx 266 add ecx,eax 267 mov eax,DWORD [24+esi] 268 adc ebp,edx 269 mov edx,DWORD [4+edi] 270 adc ebx,0 271 ; mul a[6]*b[1] 272 mul edx 273 add ecx,eax 274 mov eax,DWORD [20+esi] 275 adc ebp,edx 276 mov edx,DWORD [8+edi] 277 adc ebx,0 278 ; mul a[5]*b[2] 279 mul edx 280 add ecx,eax 281 mov eax,DWORD [16+esi] 282 adc ebp,edx 283 mov edx,DWORD [12+edi] 284 adc ebx,0 285 ; mul a[4]*b[3] 286 mul edx 287 add ecx,eax 288 mov eax,DWORD [12+esi] 289 adc ebp,edx 290 mov edx,DWORD [16+edi] 291 adc ebx,0 292 ; mul a[3]*b[4] 293 mul edx 294 add ecx,eax 295 mov eax,DWORD [8+esi] 296 adc ebp,edx 297 mov edx,DWORD [20+edi] 298 adc ebx,0 299 ; mul a[2]*b[5] 300 mul edx 301 add ecx,eax 302 mov eax,DWORD [4+esi] 303 adc ebp,edx 304 mov edx,DWORD [24+edi] 305 adc ebx,0 306 ; mul a[1]*b[6] 307 mul edx 308 add ecx,eax 309 mov eax,DWORD [esi] 310 adc ebp,edx 311 mov edx,DWORD [28+edi] 312 adc ebx,0 313 ; mul a[0]*b[7] 314 mul edx 315 add ecx,eax 316 mov eax,DWORD [20+esp] 317 adc ebp,edx 318 mov edx,DWORD [4+edi] 319 adc ebx,0 320 mov DWORD [28+eax],ecx 321 mov eax,DWORD [28+esi] 322 ; saved r[7] 323 ; ################## Calculate word 8 324 xor ecx,ecx 325 ; mul a[7]*b[1] 326 mul edx 327 add ebp,eax 328 mov eax,DWORD [24+esi] 329 adc ebx,edx 330 mov edx,DWORD [8+edi] 331 adc ecx,0 332 ; mul a[6]*b[2] 333 mul edx 334 add ebp,eax 335 mov eax,DWORD [20+esi] 336 adc ebx,edx 337 mov edx,DWORD [12+edi] 338 adc ecx,0 339 ; mul a[5]*b[3] 340 mul edx 341 add ebp,eax 342 mov eax,DWORD [16+esi] 343 adc ebx,edx 344 mov edx,DWORD [16+edi] 345 adc ecx,0 346 ; mul a[4]*b[4] 347 mul edx 348 add ebp,eax 349 mov eax,DWORD [12+esi] 350 adc ebx,edx 351 mov edx,DWORD [20+edi] 352 adc ecx,0 353 ; mul a[3]*b[5] 354 mul edx 355 add ebp,eax 356 mov eax,DWORD [8+esi] 357 adc ebx,edx 358 mov edx,DWORD [24+edi] 359 adc ecx,0 360 ; mul a[2]*b[6] 361 mul edx 362 add ebp,eax 363 mov eax,DWORD [4+esi] 364 adc ebx,edx 365 mov edx,DWORD [28+edi] 366 adc ecx,0 367 ; mul a[1]*b[7] 368 mul edx 369 add ebp,eax 370 mov eax,DWORD [20+esp] 371 adc ebx,edx 372 mov edx,DWORD [8+edi] 373 adc ecx,0 374 mov DWORD [32+eax],ebp 375 mov eax,DWORD [28+esi] 376 ; saved r[8] 377 ; ################## Calculate word 9 378 xor ebp,ebp 379 ; mul a[7]*b[2] 380 mul edx 381 add ebx,eax 382 mov eax,DWORD [24+esi] 383 adc ecx,edx 384 mov edx,DWORD [12+edi] 385 adc ebp,0 386 ; mul a[6]*b[3] 387 mul edx 388 add ebx,eax 389 mov eax,DWORD [20+esi] 390 adc ecx,edx 391 mov edx,DWORD [16+edi] 392 adc ebp,0 393 ; mul a[5]*b[4] 394 mul edx 395 add ebx,eax 396 mov eax,DWORD [16+esi] 397 adc ecx,edx 398 mov edx,DWORD [20+edi] 399 adc ebp,0 400 ; mul a[4]*b[5] 401 mul edx 402 add ebx,eax 403 mov eax,DWORD [12+esi] 404 adc ecx,edx 405 mov edx,DWORD [24+edi] 406 adc ebp,0 407 ; mul a[3]*b[6] 408 mul edx 409 add ebx,eax 410 mov eax,DWORD [8+esi] 411 adc ecx,edx 412 mov edx,DWORD [28+edi] 413 adc ebp,0 414 ; mul a[2]*b[7] 415 mul edx 416 add ebx,eax 417 mov eax,DWORD [20+esp] 418 adc ecx,edx 419 mov edx,DWORD [12+edi] 420 adc ebp,0 421 mov DWORD [36+eax],ebx 422 mov eax,DWORD [28+esi] 423 ; saved r[9] 424 ; ################## Calculate word 10 425 xor ebx,ebx 426 ; mul a[7]*b[3] 427 mul edx 428 add ecx,eax 429 mov eax,DWORD [24+esi] 430 adc ebp,edx 431 mov edx,DWORD [16+edi] 432 adc ebx,0 433 ; mul a[6]*b[4] 434 mul edx 435 add ecx,eax 436 mov eax,DWORD [20+esi] 437 adc ebp,edx 438 mov edx,DWORD [20+edi] 439 adc ebx,0 440 ; mul a[5]*b[5] 441 mul edx 442 add ecx,eax 443 mov eax,DWORD [16+esi] 444 adc ebp,edx 445 mov edx,DWORD [24+edi] 446 adc ebx,0 447 ; mul a[4]*b[6] 448 mul edx 449 add ecx,eax 450 mov eax,DWORD [12+esi] 451 adc ebp,edx 452 mov edx,DWORD [28+edi] 453 adc ebx,0 454 ; mul a[3]*b[7] 455 mul edx 456 add ecx,eax 457 mov eax,DWORD [20+esp] 458 adc ebp,edx 459 mov edx,DWORD [16+edi] 460 adc ebx,0 461 mov DWORD [40+eax],ecx 462 mov eax,DWORD [28+esi] 463 ; saved r[10] 464 ; ################## Calculate word 11 465 xor ecx,ecx 466 ; mul a[7]*b[4] 467 mul edx 468 add ebp,eax 469 mov eax,DWORD [24+esi] 470 adc ebx,edx 471 mov edx,DWORD [20+edi] 472 adc ecx,0 473 ; mul a[6]*b[5] 474 mul edx 475 add ebp,eax 476 mov eax,DWORD [20+esi] 477 adc ebx,edx 478 mov edx,DWORD [24+edi] 479 adc ecx,0 480 ; mul a[5]*b[6] 481 mul edx 482 add ebp,eax 483 mov eax,DWORD [16+esi] 484 adc ebx,edx 485 mov edx,DWORD [28+edi] 486 adc ecx,0 487 ; mul a[4]*b[7] 488 mul edx 489 add ebp,eax 490 mov eax,DWORD [20+esp] 491 adc ebx,edx 492 mov edx,DWORD [20+edi] 493 adc ecx,0 494 mov DWORD [44+eax],ebp 495 mov eax,DWORD [28+esi] 496 ; saved r[11] 497 ; ################## Calculate word 12 498 xor ebp,ebp 499 ; mul a[7]*b[5] 500 mul edx 501 add ebx,eax 502 mov eax,DWORD [24+esi] 503 adc ecx,edx 504 mov edx,DWORD [24+edi] 505 adc ebp,0 506 ; mul a[6]*b[6] 507 mul edx 508 add ebx,eax 509 mov eax,DWORD [20+esi] 510 adc ecx,edx 511 mov edx,DWORD [28+edi] 512 adc ebp,0 513 ; mul a[5]*b[7] 514 mul edx 515 add ebx,eax 516 mov eax,DWORD [20+esp] 517 adc ecx,edx 518 mov edx,DWORD [24+edi] 519 adc ebp,0 520 mov DWORD [48+eax],ebx 521 mov eax,DWORD [28+esi] 522 ; saved r[12] 523 ; ################## Calculate word 13 524 xor ebx,ebx 525 ; mul a[7]*b[6] 526 mul edx 527 add ecx,eax 528 mov eax,DWORD [24+esi] 529 adc ebp,edx 530 mov edx,DWORD [28+edi] 531 adc ebx,0 532 ; mul a[6]*b[7] 533 mul edx 534 add ecx,eax 535 mov eax,DWORD [20+esp] 536 adc ebp,edx 537 mov edx,DWORD [28+edi] 538 adc ebx,0 539 mov DWORD [52+eax],ecx 540 mov eax,DWORD [28+esi] 541 ; saved r[13] 542 ; ################## Calculate word 14 543 xor ecx,ecx 544 ; mul a[7]*b[7] 545 mul edx 546 add ebp,eax 547 mov eax,DWORD [20+esp] 548 adc ebx,edx 549 adc ecx,0 550 mov DWORD [56+eax],ebp 551 ; saved r[14] 552 ; save r[15] 553 mov DWORD [60+eax],ebx 554 pop ebx 555 pop ebp 556 pop edi 557 pop esi 558 ret 559global _bn_mul_comba4 560align 16 561_bn_mul_comba4: 562L$_bn_mul_comba4_begin: 563 push esi 564 mov esi,DWORD [12+esp] 565 push edi 566 mov edi,DWORD [20+esp] 567 push ebp 568 push ebx 569 xor ebx,ebx 570 mov eax,DWORD [esi] 571 xor ecx,ecx 572 mov edx,DWORD [edi] 573 ; ################## Calculate word 0 574 xor ebp,ebp 575 ; mul a[0]*b[0] 576 mul edx 577 add ebx,eax 578 mov eax,DWORD [20+esp] 579 adc ecx,edx 580 mov edx,DWORD [edi] 581 adc ebp,0 582 mov DWORD [eax],ebx 583 mov eax,DWORD [4+esi] 584 ; saved r[0] 585 ; ################## Calculate word 1 586 xor ebx,ebx 587 ; mul a[1]*b[0] 588 mul edx 589 add ecx,eax 590 mov eax,DWORD [esi] 591 adc ebp,edx 592 mov edx,DWORD [4+edi] 593 adc ebx,0 594 ; mul a[0]*b[1] 595 mul edx 596 add ecx,eax 597 mov eax,DWORD [20+esp] 598 adc ebp,edx 599 mov edx,DWORD [edi] 600 adc ebx,0 601 mov DWORD [4+eax],ecx 602 mov eax,DWORD [8+esi] 603 ; saved r[1] 604 ; ################## Calculate word 2 605 xor ecx,ecx 606 ; mul a[2]*b[0] 607 mul edx 608 add ebp,eax 609 mov eax,DWORD [4+esi] 610 adc ebx,edx 611 mov edx,DWORD [4+edi] 612 adc ecx,0 613 ; mul a[1]*b[1] 614 mul edx 615 add ebp,eax 616 mov eax,DWORD [esi] 617 adc ebx,edx 618 mov edx,DWORD [8+edi] 619 adc ecx,0 620 ; mul a[0]*b[2] 621 mul edx 622 add ebp,eax 623 mov eax,DWORD [20+esp] 624 adc ebx,edx 625 mov edx,DWORD [edi] 626 adc ecx,0 627 mov DWORD [8+eax],ebp 628 mov eax,DWORD [12+esi] 629 ; saved r[2] 630 ; ################## Calculate word 3 631 xor ebp,ebp 632 ; mul a[3]*b[0] 633 mul edx 634 add ebx,eax 635 mov eax,DWORD [8+esi] 636 adc ecx,edx 637 mov edx,DWORD [4+edi] 638 adc ebp,0 639 ; mul a[2]*b[1] 640 mul edx 641 add ebx,eax 642 mov eax,DWORD [4+esi] 643 adc ecx,edx 644 mov edx,DWORD [8+edi] 645 adc ebp,0 646 ; mul a[1]*b[2] 647 mul edx 648 add ebx,eax 649 mov eax,DWORD [esi] 650 adc ecx,edx 651 mov edx,DWORD [12+edi] 652 adc ebp,0 653 ; mul a[0]*b[3] 654 mul edx 655 add ebx,eax 656 mov eax,DWORD [20+esp] 657 adc ecx,edx 658 mov edx,DWORD [4+edi] 659 adc ebp,0 660 mov DWORD [12+eax],ebx 661 mov eax,DWORD [12+esi] 662 ; saved r[3] 663 ; ################## Calculate word 4 664 xor ebx,ebx 665 ; mul a[3]*b[1] 666 mul edx 667 add ecx,eax 668 mov eax,DWORD [8+esi] 669 adc ebp,edx 670 mov edx,DWORD [8+edi] 671 adc ebx,0 672 ; mul a[2]*b[2] 673 mul edx 674 add ecx,eax 675 mov eax,DWORD [4+esi] 676 adc ebp,edx 677 mov edx,DWORD [12+edi] 678 adc ebx,0 679 ; mul a[1]*b[3] 680 mul edx 681 add ecx,eax 682 mov eax,DWORD [20+esp] 683 adc ebp,edx 684 mov edx,DWORD [8+edi] 685 adc ebx,0 686 mov DWORD [16+eax],ecx 687 mov eax,DWORD [12+esi] 688 ; saved r[4] 689 ; ################## Calculate word 5 690 xor ecx,ecx 691 ; mul a[3]*b[2] 692 mul edx 693 add ebp,eax 694 mov eax,DWORD [8+esi] 695 adc ebx,edx 696 mov edx,DWORD [12+edi] 697 adc ecx,0 698 ; mul a[2]*b[3] 699 mul edx 700 add ebp,eax 701 mov eax,DWORD [20+esp] 702 adc ebx,edx 703 mov edx,DWORD [12+edi] 704 adc ecx,0 705 mov DWORD [20+eax],ebp 706 mov eax,DWORD [12+esi] 707 ; saved r[5] 708 ; ################## Calculate word 6 709 xor ebp,ebp 710 ; mul a[3]*b[3] 711 mul edx 712 add ebx,eax 713 mov eax,DWORD [20+esp] 714 adc ecx,edx 715 adc ebp,0 716 mov DWORD [24+eax],ebx 717 ; saved r[6] 718 ; save r[7] 719 mov DWORD [28+eax],ecx 720 pop ebx 721 pop ebp 722 pop edi 723 pop esi 724 ret 725global _bn_sqr_comba8 726align 16 727_bn_sqr_comba8: 728L$_bn_sqr_comba8_begin: 729 push esi 730 push edi 731 push ebp 732 push ebx 733 mov edi,DWORD [20+esp] 734 mov esi,DWORD [24+esp] 735 xor ebx,ebx 736 xor ecx,ecx 737 mov eax,DWORD [esi] 738 ; ############### Calculate word 0 739 xor ebp,ebp 740 ; sqr a[0]*a[0] 741 mul eax 742 add ebx,eax 743 adc ecx,edx 744 mov edx,DWORD [esi] 745 adc ebp,0 746 mov DWORD [edi],ebx 747 mov eax,DWORD [4+esi] 748 ; saved r[0] 749 ; ############### Calculate word 1 750 xor ebx,ebx 751 ; sqr a[1]*a[0] 752 mul edx 753 add eax,eax 754 adc edx,edx 755 adc ebx,0 756 add ecx,eax 757 adc ebp,edx 758 mov eax,DWORD [8+esi] 759 adc ebx,0 760 mov DWORD [4+edi],ecx 761 mov edx,DWORD [esi] 762 ; saved r[1] 763 ; ############### Calculate word 2 764 xor ecx,ecx 765 ; sqr a[2]*a[0] 766 mul edx 767 add eax,eax 768 adc edx,edx 769 adc ecx,0 770 add ebp,eax 771 adc ebx,edx 772 mov eax,DWORD [4+esi] 773 adc ecx,0 774 ; sqr a[1]*a[1] 775 mul eax 776 add ebp,eax 777 adc ebx,edx 778 mov edx,DWORD [esi] 779 adc ecx,0 780 mov DWORD [8+edi],ebp 781 mov eax,DWORD [12+esi] 782 ; saved r[2] 783 ; ############### Calculate word 3 784 xor ebp,ebp 785 ; sqr a[3]*a[0] 786 mul edx 787 add eax,eax 788 adc edx,edx 789 adc ebp,0 790 add ebx,eax 791 adc ecx,edx 792 mov eax,DWORD [8+esi] 793 adc ebp,0 794 mov edx,DWORD [4+esi] 795 ; sqr a[2]*a[1] 796 mul edx 797 add eax,eax 798 adc edx,edx 799 adc ebp,0 800 add ebx,eax 801 adc ecx,edx 802 mov eax,DWORD [16+esi] 803 adc ebp,0 804 mov DWORD [12+edi],ebx 805 mov edx,DWORD [esi] 806 ; saved r[3] 807 ; ############### Calculate word 4 808 xor ebx,ebx 809 ; sqr a[4]*a[0] 810 mul edx 811 add eax,eax 812 adc edx,edx 813 adc ebx,0 814 add ecx,eax 815 adc ebp,edx 816 mov eax,DWORD [12+esi] 817 adc ebx,0 818 mov edx,DWORD [4+esi] 819 ; sqr a[3]*a[1] 820 mul edx 821 add eax,eax 822 adc edx,edx 823 adc ebx,0 824 add ecx,eax 825 adc ebp,edx 826 mov eax,DWORD [8+esi] 827 adc ebx,0 828 ; sqr a[2]*a[2] 829 mul eax 830 add ecx,eax 831 adc ebp,edx 832 mov edx,DWORD [esi] 833 adc ebx,0 834 mov DWORD [16+edi],ecx 835 mov eax,DWORD [20+esi] 836 ; saved r[4] 837 ; ############### Calculate word 5 838 xor ecx,ecx 839 ; sqr a[5]*a[0] 840 mul edx 841 add eax,eax 842 adc edx,edx 843 adc ecx,0 844 add ebp,eax 845 adc ebx,edx 846 mov eax,DWORD [16+esi] 847 adc ecx,0 848 mov edx,DWORD [4+esi] 849 ; sqr a[4]*a[1] 850 mul edx 851 add eax,eax 852 adc edx,edx 853 adc ecx,0 854 add ebp,eax 855 adc ebx,edx 856 mov eax,DWORD [12+esi] 857 adc ecx,0 858 mov edx,DWORD [8+esi] 859 ; sqr a[3]*a[2] 860 mul edx 861 add eax,eax 862 adc edx,edx 863 adc ecx,0 864 add ebp,eax 865 adc ebx,edx 866 mov eax,DWORD [24+esi] 867 adc ecx,0 868 mov DWORD [20+edi],ebp 869 mov edx,DWORD [esi] 870 ; saved r[5] 871 ; ############### Calculate word 6 872 xor ebp,ebp 873 ; sqr a[6]*a[0] 874 mul edx 875 add eax,eax 876 adc edx,edx 877 adc ebp,0 878 add ebx,eax 879 adc ecx,edx 880 mov eax,DWORD [20+esi] 881 adc ebp,0 882 mov edx,DWORD [4+esi] 883 ; sqr a[5]*a[1] 884 mul edx 885 add eax,eax 886 adc edx,edx 887 adc ebp,0 888 add ebx,eax 889 adc ecx,edx 890 mov eax,DWORD [16+esi] 891 adc ebp,0 892 mov edx,DWORD [8+esi] 893 ; sqr a[4]*a[2] 894 mul edx 895 add eax,eax 896 adc edx,edx 897 adc ebp,0 898 add ebx,eax 899 adc ecx,edx 900 mov eax,DWORD [12+esi] 901 adc ebp,0 902 ; sqr a[3]*a[3] 903 mul eax 904 add ebx,eax 905 adc ecx,edx 906 mov edx,DWORD [esi] 907 adc ebp,0 908 mov DWORD [24+edi],ebx 909 mov eax,DWORD [28+esi] 910 ; saved r[6] 911 ; ############### Calculate word 7 912 xor ebx,ebx 913 ; sqr a[7]*a[0] 914 mul edx 915 add eax,eax 916 adc edx,edx 917 adc ebx,0 918 add ecx,eax 919 adc ebp,edx 920 mov eax,DWORD [24+esi] 921 adc ebx,0 922 mov edx,DWORD [4+esi] 923 ; sqr a[6]*a[1] 924 mul edx 925 add eax,eax 926 adc edx,edx 927 adc ebx,0 928 add ecx,eax 929 adc ebp,edx 930 mov eax,DWORD [20+esi] 931 adc ebx,0 932 mov edx,DWORD [8+esi] 933 ; sqr a[5]*a[2] 934 mul edx 935 add eax,eax 936 adc edx,edx 937 adc ebx,0 938 add ecx,eax 939 adc ebp,edx 940 mov eax,DWORD [16+esi] 941 adc ebx,0 942 mov edx,DWORD [12+esi] 943 ; sqr a[4]*a[3] 944 mul edx 945 add eax,eax 946 adc edx,edx 947 adc ebx,0 948 add ecx,eax 949 adc ebp,edx 950 mov eax,DWORD [28+esi] 951 adc ebx,0 952 mov DWORD [28+edi],ecx 953 mov edx,DWORD [4+esi] 954 ; saved r[7] 955 ; ############### Calculate word 8 956 xor ecx,ecx 957 ; sqr a[7]*a[1] 958 mul edx 959 add eax,eax 960 adc edx,edx 961 adc ecx,0 962 add ebp,eax 963 adc ebx,edx 964 mov eax,DWORD [24+esi] 965 adc ecx,0 966 mov edx,DWORD [8+esi] 967 ; sqr a[6]*a[2] 968 mul edx 969 add eax,eax 970 adc edx,edx 971 adc ecx,0 972 add ebp,eax 973 adc ebx,edx 974 mov eax,DWORD [20+esi] 975 adc ecx,0 976 mov edx,DWORD [12+esi] 977 ; sqr a[5]*a[3] 978 mul edx 979 add eax,eax 980 adc edx,edx 981 adc ecx,0 982 add ebp,eax 983 adc ebx,edx 984 mov eax,DWORD [16+esi] 985 adc ecx,0 986 ; sqr a[4]*a[4] 987 mul eax 988 add ebp,eax 989 adc ebx,edx 990 mov edx,DWORD [8+esi] 991 adc ecx,0 992 mov DWORD [32+edi],ebp 993 mov eax,DWORD [28+esi] 994 ; saved r[8] 995 ; ############### Calculate word 9 996 xor ebp,ebp 997 ; sqr a[7]*a[2] 998 mul edx 999 add eax,eax 1000 adc edx,edx 1001 adc ebp,0 1002 add ebx,eax 1003 adc ecx,edx 1004 mov eax,DWORD [24+esi] 1005 adc ebp,0 1006 mov edx,DWORD [12+esi] 1007 ; sqr a[6]*a[3] 1008 mul edx 1009 add eax,eax 1010 adc edx,edx 1011 adc ebp,0 1012 add ebx,eax 1013 adc ecx,edx 1014 mov eax,DWORD [20+esi] 1015 adc ebp,0 1016 mov edx,DWORD [16+esi] 1017 ; sqr a[5]*a[4] 1018 mul edx 1019 add eax,eax 1020 adc edx,edx 1021 adc ebp,0 1022 add ebx,eax 1023 adc ecx,edx 1024 mov eax,DWORD [28+esi] 1025 adc ebp,0 1026 mov DWORD [36+edi],ebx 1027 mov edx,DWORD [12+esi] 1028 ; saved r[9] 1029 ; ############### Calculate word 10 1030 xor ebx,ebx 1031 ; sqr a[7]*a[3] 1032 mul edx 1033 add eax,eax 1034 adc edx,edx 1035 adc ebx,0 1036 add ecx,eax 1037 adc ebp,edx 1038 mov eax,DWORD [24+esi] 1039 adc ebx,0 1040 mov edx,DWORD [16+esi] 1041 ; sqr a[6]*a[4] 1042 mul edx 1043 add eax,eax 1044 adc edx,edx 1045 adc ebx,0 1046 add ecx,eax 1047 adc ebp,edx 1048 mov eax,DWORD [20+esi] 1049 adc ebx,0 1050 ; sqr a[5]*a[5] 1051 mul eax 1052 add ecx,eax 1053 adc ebp,edx 1054 mov edx,DWORD [16+esi] 1055 adc ebx,0 1056 mov DWORD [40+edi],ecx 1057 mov eax,DWORD [28+esi] 1058 ; saved r[10] 1059 ; ############### Calculate word 11 1060 xor ecx,ecx 1061 ; sqr a[7]*a[4] 1062 mul edx 1063 add eax,eax 1064 adc edx,edx 1065 adc ecx,0 1066 add ebp,eax 1067 adc ebx,edx 1068 mov eax,DWORD [24+esi] 1069 adc ecx,0 1070 mov edx,DWORD [20+esi] 1071 ; sqr a[6]*a[5] 1072 mul edx 1073 add eax,eax 1074 adc edx,edx 1075 adc ecx,0 1076 add ebp,eax 1077 adc ebx,edx 1078 mov eax,DWORD [28+esi] 1079 adc ecx,0 1080 mov DWORD [44+edi],ebp 1081 mov edx,DWORD [20+esi] 1082 ; saved r[11] 1083 ; ############### Calculate word 12 1084 xor ebp,ebp 1085 ; sqr a[7]*a[5] 1086 mul edx 1087 add eax,eax 1088 adc edx,edx 1089 adc ebp,0 1090 add ebx,eax 1091 adc ecx,edx 1092 mov eax,DWORD [24+esi] 1093 adc ebp,0 1094 ; sqr a[6]*a[6] 1095 mul eax 1096 add ebx,eax 1097 adc ecx,edx 1098 mov edx,DWORD [24+esi] 1099 adc ebp,0 1100 mov DWORD [48+edi],ebx 1101 mov eax,DWORD [28+esi] 1102 ; saved r[12] 1103 ; ############### Calculate word 13 1104 xor ebx,ebx 1105 ; sqr a[7]*a[6] 1106 mul edx 1107 add eax,eax 1108 adc edx,edx 1109 adc ebx,0 1110 add ecx,eax 1111 adc ebp,edx 1112 mov eax,DWORD [28+esi] 1113 adc ebx,0 1114 mov DWORD [52+edi],ecx 1115 ; saved r[13] 1116 ; ############### Calculate word 14 1117 xor ecx,ecx 1118 ; sqr a[7]*a[7] 1119 mul eax 1120 add ebp,eax 1121 adc ebx,edx 1122 adc ecx,0 1123 mov DWORD [56+edi],ebp 1124 ; saved r[14] 1125 mov DWORD [60+edi],ebx 1126 pop ebx 1127 pop ebp 1128 pop edi 1129 pop esi 1130 ret 1131global _bn_sqr_comba4 1132align 16 1133_bn_sqr_comba4: 1134L$_bn_sqr_comba4_begin: 1135 push esi 1136 push edi 1137 push ebp 1138 push ebx 1139 mov edi,DWORD [20+esp] 1140 mov esi,DWORD [24+esp] 1141 xor ebx,ebx 1142 xor ecx,ecx 1143 mov eax,DWORD [esi] 1144 ; ############### Calculate word 0 1145 xor ebp,ebp 1146 ; sqr a[0]*a[0] 1147 mul eax 1148 add ebx,eax 1149 adc ecx,edx 1150 mov edx,DWORD [esi] 1151 adc ebp,0 1152 mov DWORD [edi],ebx 1153 mov eax,DWORD [4+esi] 1154 ; saved r[0] 1155 ; ############### Calculate word 1 1156 xor ebx,ebx 1157 ; sqr a[1]*a[0] 1158 mul edx 1159 add eax,eax 1160 adc edx,edx 1161 adc ebx,0 1162 add ecx,eax 1163 adc ebp,edx 1164 mov eax,DWORD [8+esi] 1165 adc ebx,0 1166 mov DWORD [4+edi],ecx 1167 mov edx,DWORD [esi] 1168 ; saved r[1] 1169 ; ############### Calculate word 2 1170 xor ecx,ecx 1171 ; sqr a[2]*a[0] 1172 mul edx 1173 add eax,eax 1174 adc edx,edx 1175 adc ecx,0 1176 add ebp,eax 1177 adc ebx,edx 1178 mov eax,DWORD [4+esi] 1179 adc ecx,0 1180 ; sqr a[1]*a[1] 1181 mul eax 1182 add ebp,eax 1183 adc ebx,edx 1184 mov edx,DWORD [esi] 1185 adc ecx,0 1186 mov DWORD [8+edi],ebp 1187 mov eax,DWORD [12+esi] 1188 ; saved r[2] 1189 ; ############### Calculate word 3 1190 xor ebp,ebp 1191 ; sqr a[3]*a[0] 1192 mul edx 1193 add eax,eax 1194 adc edx,edx 1195 adc ebp,0 1196 add ebx,eax 1197 adc ecx,edx 1198 mov eax,DWORD [8+esi] 1199 adc ebp,0 1200 mov edx,DWORD [4+esi] 1201 ; sqr a[2]*a[1] 1202 mul edx 1203 add eax,eax 1204 adc edx,edx 1205 adc ebp,0 1206 add ebx,eax 1207 adc ecx,edx 1208 mov eax,DWORD [12+esi] 1209 adc ebp,0 1210 mov DWORD [12+edi],ebx 1211 mov edx,DWORD [4+esi] 1212 ; saved r[3] 1213 ; ############### Calculate word 4 1214 xor ebx,ebx 1215 ; sqr a[3]*a[1] 1216 mul edx 1217 add eax,eax 1218 adc edx,edx 1219 adc ebx,0 1220 add ecx,eax 1221 adc ebp,edx 1222 mov eax,DWORD [8+esi] 1223 adc ebx,0 1224 ; sqr a[2]*a[2] 1225 mul eax 1226 add ecx,eax 1227 adc ebp,edx 1228 mov edx,DWORD [8+esi] 1229 adc ebx,0 1230 mov DWORD [16+edi],ecx 1231 mov eax,DWORD [12+esi] 1232 ; saved r[4] 1233 ; ############### Calculate word 5 1234 xor ecx,ecx 1235 ; sqr a[3]*a[2] 1236 mul edx 1237 add eax,eax 1238 adc edx,edx 1239 adc ecx,0 1240 add ebp,eax 1241 adc ebx,edx 1242 mov eax,DWORD [12+esi] 1243 adc ecx,0 1244 mov DWORD [20+edi],ebp 1245 ; saved r[5] 1246 ; ############### Calculate word 6 1247 xor ebp,ebp 1248 ; sqr a[3]*a[3] 1249 mul eax 1250 add ebx,eax 1251 adc ecx,edx 1252 adc ebp,0 1253 mov DWORD [24+edi],ebx 1254 ; saved r[6] 1255 mov DWORD [28+edi],ecx 1256 pop ebx 1257 pop ebp 1258 pop edi 1259 pop esi 1260 ret 1261