1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(BORINGSSL_PREFIX) 13#include <boringssl_prefix_symbols_asm.h> 14#endif 15.section __TEXT,__const 16 17# p434 x 2 18Lp434x2: 19.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF 20.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47 21.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688 22 23# p434 + 1 24Lp434p1: 25.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3 26.quad 0x6CFC5FD681C52056, 0x0002341F27177344 27 28.text 29.globl _sike_mpmul 30.private_extern _sike_mpmul 31.align 4 32_sike_mpmul: 33 stp x29, x30, [sp,#-96]! 34 add x29, sp, #0 35 stp x19, x20, [sp,#16] 36 stp x21, x22, [sp,#32] 37 stp x23, x24, [sp,#48] 38 stp x25, x26, [sp,#64] 39 stp x27, x28, [sp,#80] 40 41 ldp x3, x4, [x0] 42 ldp x5, x6, [x0,#16] 43 ldp x7, x8, [x0,#32] 44 ldr x9, [x0,#48] 45 ldp x10, x11, [x1,#0] 46 ldp x12, x13, [x1,#16] 47 ldp x14, x15, [x1,#32] 48 ldr x16, [x1,#48] 49 50 // x3-x7 <- AH + AL, x7 <- carry 51 adds x3, x3, x7 52 adcs x4, x4, x8 53 adcs x5, x5, x9 54 adcs x6, x6, xzr 55 adc x7, xzr, xzr 56 57 // x10-x13 <- BH + BL, x8 <- carry 58 adds x10, x10, x14 59 adcs x11, x11, x15 60 adcs x12, x12, x16 61 adcs x13, x13, xzr 62 adc x8, xzr, xzr 63 64 // x9 <- combined carry 65 and x9, x7, x8 66 // x7-x8 <- mask 67 sub x7, xzr, x7 68 sub x8, xzr, x8 69 70 // x15-x19 <- masked (BH + BL) 71 and x14, x10, x7 72 and x15, x11, x7 73 and x16, x12, x7 74 and x17, x13, x7 75 76 // x20-x23 <- masked (AH + AL) 77 and x20, x3, x8 78 and x21, x4, x8 79 and x22, x5, x8 80 and x23, x6, x8 81 82 // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1 83 adds x14, x14, x20 84 adcs x15, x15, x21 85 adcs x16, x16, x22 86 adcs x17, x17, x23 87 adc x7, x9, xzr 88 89 // x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part 90 stp x3, x4, [x2,#0] 91 // A0-A1 <- AH + AL, T0 <- mask 92 adds x3, x3, x5 93 adcs x4, x4, x6 94 adc x25, xzr, xzr 95 96 // C6, T1 <- BH + BL, C7 <- mask 97 adds x23, x10, x12 98 adcs x26, x11, x13 99 adc x24, xzr, xzr 100 101 // C0-C1 <- masked (BH + BL) 102 sub x19, xzr, x25 103 sub x20, xzr, x24 104 and x8, x23, x19 105 and x9, x26, x19 106 107 // C4-C5 <- masked (AH + AL), T0 <- combined carry 108 and x21, x3, x20 109 and x22, x4, x20 110 mul x19, x3, x23 111 mul x20, x3, x26 112 and x25, x25, x24 113 114 // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 115 adds x8, x21, x8 116 umulh x21, x3, x26 117 adcs x9, x22, x9 118 umulh x22, x3, x23 119 adc x25, x25, xzr 120 121 // C2-C5 <- (AH+AL) x (BH+BL), low part 122 mul x3, x4, x23 123 umulh x23, x4, x23 124 adds x20, x20, x22 125 adc x21, x21, xzr 126 127 mul x24, x4, x26 128 umulh x26, x4, x26 129 adds x20, x20, x3 130 adcs x21, x21, x23 131 adc x22, xzr, xzr 132 133 adds x21, x21, x24 134 adc x22, x22, x26 135 136 ldp x3, x4, [x2,#0] 137 138 // C2-C5, T0 <- (AH+AL) x (BH+BL), final part 139 adds x21, x8, x21 140 umulh x24, x3, x10 141 umulh x26, x3, x11 142 adcs x22, x9, x22 143 mul x8, x3, x10 144 mul x9, x3, x11 145 adc x25, x25, xzr 146 147 // C0-C1, T1, C7 <- AL x BL 148 mul x3, x4, x10 149 umulh x10, x4, x10 150 adds x9, x9, x24 151 adc x26, x26, xzr 152 153 mul x23, x4, x11 154 umulh x11, x4, x11 155 adds x9, x9, x3 156 adcs x26, x26, x10 157 adc x24, xzr, xzr 158 159 adds x26, x26, x23 160 adc x24, x24, x11 161 162 163 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL 164 mul x3, x5, x12 165 umulh x10, x5, x12 166 subs x19, x19, x8 167 sbcs x20, x20, x9 168 sbcs x21, x21, x26 169 mul x4, x5, x13 170 umulh x23, x5, x13 171 sbcs x22, x22, x24 172 sbc x25, x25, xzr 173 174 // A0, A1, C6, B0 <- AH x BH 175 mul x5, x6, x12 176 umulh x12, x6, x12 177 adds x4, x4, x10 178 adc x23, x23, xzr 179 180 mul x11, x6, x13 181 umulh x13, x6, x13 182 adds x4, x4, x5 183 adcs x23, x23, x12 184 adc x10, xzr, xzr 185 186 adds x23, x23, x11 187 adc x10, x10, x13 188 189 190 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH 191 subs x19, x19, x3 192 sbcs x20, x20, x4 193 sbcs x21, x21, x23 194 sbcs x22, x22, x10 195 sbc x25, x25, xzr 196 197 adds x19, x19, x26 198 adcs x20, x20, x24 199 adcs x21, x21, x3 200 adcs x22, x22, x4 201 adcs x23, x25, x23 202 adc x24, x10, xzr 203 204 205 // x15-x19, x7 <- (AH+AL) x (BH+BL), final step 206 adds x14, x14, x21 207 adcs x15, x15, x22 208 adcs x16, x16, x23 209 adcs x17, x17, x24 210 adc x7, x7, xzr 211 212 // Load AL 213 ldp x3, x4, [x0] 214 ldp x5, x6, [x0,#16] 215 // Load BL 216 ldp x10, x11, [x1,#0] 217 ldp x12, x13, [x1,#16] 218 219 // Temporarily store x8 in x2 220 stp x8, x9, [x2,#0] 221 // x21-x28 <- AL x BL 222 // A0-A1 <- AH + AL, T0 <- mask 223 adds x3, x3, x5 224 adcs x4, x4, x6 225 adc x8, xzr, xzr 226 227 // C6, T1 <- BH + BL, C7 <- mask 228 adds x27, x10, x12 229 adcs x9, x11, x13 230 adc x28, xzr, xzr 231 232 // C0-C1 <- masked (BH + BL) 233 sub x23, xzr, x8 234 sub x24, xzr, x28 235 and x21, x27, x23 236 and x22, x9, x23 237 238 // C4-C5 <- masked (AH + AL), T0 <- combined carry 239 and x25, x3, x24 240 and x26, x4, x24 241 mul x23, x3, x27 242 mul x24, x3, x9 243 and x8, x8, x28 244 245 // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 246 adds x21, x25, x21 247 umulh x25, x3, x9 248 adcs x22, x26, x22 249 umulh x26, x3, x27 250 adc x8, x8, xzr 251 252 // C2-C5 <- (AH+AL) x (BH+BL), low part 253 mul x3, x4, x27 254 umulh x27, x4, x27 255 adds x24, x24, x26 256 adc x25, x25, xzr 257 258 mul x28, x4, x9 259 umulh x9, x4, x9 260 adds x24, x24, x3 261 adcs x25, x25, x27 262 adc x26, xzr, xzr 263 264 adds x25, x25, x28 265 adc x26, x26, x9 266 267 ldp x3, x4, [x0,#0] 268 269 // C2-C5, T0 <- (AH+AL) x (BH+BL), final part 270 adds x25, x21, x25 271 umulh x28, x3, x10 272 umulh x9, x3, x11 273 adcs x26, x22, x26 274 mul x21, x3, x10 275 mul x22, x3, x11 276 adc x8, x8, xzr 277 278 // C0-C1, T1, C7 <- AL x BL 279 mul x3, x4, x10 280 umulh x10, x4, x10 281 adds x22, x22, x28 282 adc x9, x9, xzr 283 284 mul x27, x4, x11 285 umulh x11, x4, x11 286 adds x22, x22, x3 287 adcs x9, x9, x10 288 adc x28, xzr, xzr 289 290 adds x9, x9, x27 291 adc x28, x28, x11 292 293 294 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL 295 mul x3, x5, x12 296 umulh x10, x5, x12 297 subs x23, x23, x21 298 sbcs x24, x24, x22 299 sbcs x25, x25, x9 300 mul x4, x5, x13 301 umulh x27, x5, x13 302 sbcs x26, x26, x28 303 sbc x8, x8, xzr 304 305 // A0, A1, C6, B0 <- AH x BH 306 mul x5, x6, x12 307 umulh x12, x6, x12 308 adds x4, x4, x10 309 adc x27, x27, xzr 310 311 mul x11, x6, x13 312 umulh x13, x6, x13 313 adds x4, x4, x5 314 adcs x27, x27, x12 315 adc x10, xzr, xzr 316 317 adds x27, x27, x11 318 adc x10, x10, x13 319 320 321 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH 322 subs x23, x23, x3 323 sbcs x24, x24, x4 324 sbcs x25, x25, x27 325 sbcs x26, x26, x10 326 sbc x8, x8, xzr 327 328 adds x23, x23, x9 329 adcs x24, x24, x28 330 adcs x25, x25, x3 331 adcs x26, x26, x4 332 adcs x27, x8, x27 333 adc x28, x10, xzr 334 335 // Restore x8 336 ldp x8, x9, [x2,#0] 337 338 // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL 339 subs x8, x8, x21 340 sbcs x9, x9, x22 341 sbcs x19, x19, x23 342 sbcs x20, x20, x24 343 sbcs x14, x14, x25 344 sbcs x15, x15, x26 345 sbcs x16, x16, x27 346 sbcs x17, x17, x28 347 sbc x7, x7, xzr 348 349 // Store ALxBL, low 350 stp x21, x22, [x2] 351 stp x23, x24, [x2,#16] 352 353 // Load AH 354 ldp x3, x4, [x0,#32] 355 ldr x5, [x0,#48] 356 // Load BH 357 ldp x10, x11, [x1,#32] 358 ldr x12, [x1,#48] 359 360 adds x8, x8, x25 361 adcs x9, x9, x26 362 adcs x19, x19, x27 363 adcs x20, x20, x28 364 adc x1, xzr, xzr 365 366 add x0, x0, #32 367 // Temporarily store x8,x9 in x2 368 stp x8,x9, [x2,#32] 369 // x21-x28 <- AH x BH 370 371 // A0 * B0 372 mul x21, x3, x10 // C0 373 umulh x24, x3, x10 374 375 // A0 * B1 376 mul x22, x3, x11 377 umulh x23, x3, x11 378 379 // A1 * B0 380 mul x8, x4, x10 381 umulh x9, x4, x10 382 adds x22, x22, x24 383 adc x23, x23, xzr 384 385 // A0 * B2 386 mul x27, x3, x12 387 umulh x28, x3, x12 388 adds x22, x22, x8 // C1 389 adcs x23, x23, x9 390 adc x24, xzr, xzr 391 392 // A2 * B0 393 mul x8, x5, x10 394 umulh x25, x5, x10 395 adds x23, x23, x27 396 adcs x24, x24, x25 397 adc x25, xzr, xzr 398 399 // A1 * B1 400 mul x27, x4, x11 401 umulh x9, x4, x11 402 adds x23, x23, x8 403 adcs x24, x24, x28 404 adc x25, x25, xzr 405 406 // A1 * B2 407 mul x8, x4, x12 408 umulh x28, x4, x12 409 adds x23, x23, x27 // C2 410 adcs x24, x24, x9 411 adc x25, x25, xzr 412 413 // A2 * B1 414 mul x27, x5, x11 415 umulh x9, x5, x11 416 adds x24, x24, x8 417 adcs x25, x25, x28 418 adc x26, xzr, xzr 419 420 // A2 * B2 421 mul x8, x5, x12 422 umulh x28, x5, x12 423 adds x24, x24, x27 // C3 424 adcs x25, x25, x9 425 adc x26, x26, xzr 426 427 adds x25, x25, x8 // C4 428 adc x26, x26, x28 // C5 429 430 // Restore x8,x9 431 ldp x8,x9, [x2,#32] 432 433 neg x1, x1 434 435 // x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH 436 subs x8, x8, x21 437 sbcs x9, x9, x22 438 sbcs x19, x19, x23 439 sbcs x20, x20, x24 440 sbcs x14, x14, x25 441 sbcs x15, x15, x26 442 sbcs x16, x16, xzr 443 sbcs x17, x17, xzr 444 sbc x7, x7, xzr 445 446 // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low 447 stp x8, x9, [x2,#32] 448 stp x19, x20, [x2,#48] 449 450 adds x1, x1, #1 451 adcs x14, x14, x21 452 adcs x15, x15, x22 453 adcs x16, x16, x23 454 adcs x17, x17, x24 455 adcs x25, x7, x25 456 adc x26, x26, xzr 457 458 stp x14, x15, [x2,#64] 459 stp x16, x17, [x2,#80] 460 stp x25, x26, [x2,#96] 461 462 ldp x19, x20, [x29,#16] 463 ldp x21, x22, [x29,#32] 464 ldp x23, x24, [x29,#48] 465 ldp x25, x26, [x29,#64] 466 ldp x27, x28, [x29,#80] 467 ldp x29, x30, [sp],#96 468 ret 469.globl _sike_fprdc 470.private_extern _sike_fprdc 471.align 4 472_sike_fprdc: 473 stp x29, x30, [sp, #-96]! 474 add x29, sp, xzr 475 stp x19, x20, [sp,#16] 476 stp x21, x22, [sp,#32] 477 stp x23, x24, [sp,#48] 478 stp x25, x26, [sp,#64] 479 stp x27, x28, [sp,#80] 480 481 ldp x2, x3, [x0,#0] // a[0-1] 482 483 // Load the prime constant 484 adrp x26, Lp434p1@PAGE 485 add x26, x26, Lp434p1@PAGEOFF 486 ldp x23, x24, [x26, #0x0] 487 ldp x25, x26, [x26,#0x10] 488 489 // a[0-1] * p434+1 490 mul x4, x2, x23 // C0 491 umulh x7, x2, x23 492 493 mul x5, x2, x24 494 umulh x6, x2, x24 495 496 mul x10, x3, x23 497 umulh x11, x3, x23 498 adds x5, x5, x7 499 adc x6, x6, xzr 500 501 mul x27, x2, x25 502 umulh x28, x2, x25 503 adds x5, x5, x10 // C1 504 adcs x6, x6, x11 505 adc x7, xzr, xzr 506 507 mul x10, x3, x24 508 umulh x11, x3, x24 509 adds x6, x6, x27 510 adcs x7, x7, x28 511 adc x8, xzr, xzr 512 513 mul x27, x2, x26 514 umulh x28, x2, x26 515 adds x6, x6, x10 // C2 516 adcs x7, x7, x11 517 adc x8, x8, xzr 518 519 mul x10, x3, x25 520 umulh x11, x3, x25 521 adds x7, x7, x27 522 adcs x8, x8, x28 523 adc x9, xzr, xzr 524 525 mul x27, x3, x26 526 umulh x28, x3, x26 527 adds x7, x7, x10 // C3 528 adcs x8, x8, x11 529 adc x9, x9, xzr 530 adds x8, x8, x27 // C4 531 adc x9, x9, x28 // C5 532 533 534 535 ldp x10, x11, [x0, #0x18] 536 ldp x12, x13, [x0, #0x28] 537 ldp x14, x15, [x0, #0x38] 538 ldp x16, x17, [x0, #0x48] 539 ldp x19, x20, [x0, #0x58] 540 ldr x21, [x0, #0x68] 541 542 adds x10, x10, x4 543 adcs x11, x11, x5 544 adcs x12, x12, x6 545 adcs x13, x13, x7 546 adcs x14, x14, x8 547 adcs x15, x15, x9 548 adcs x22, x16, xzr 549 adcs x17, x17, xzr 550 adcs x19, x19, xzr 551 adcs x20, x20, xzr 552 adc x21, x21, xzr 553 554 ldr x2, [x0,#0x10] // a[2] 555 // a[2-3] * p434+1 556 mul x4, x2, x23 // C0 557 umulh x7, x2, x23 558 559 mul x5, x2, x24 560 umulh x6, x2, x24 561 562 mul x0, x10, x23 563 umulh x3, x10, x23 564 adds x5, x5, x7 565 adc x6, x6, xzr 566 567 mul x27, x2, x25 568 umulh x28, x2, x25 569 adds x5, x5, x0 // C1 570 adcs x6, x6, x3 571 adc x7, xzr, xzr 572 573 mul x0, x10, x24 574 umulh x3, x10, x24 575 adds x6, x6, x27 576 adcs x7, x7, x28 577 adc x8, xzr, xzr 578 579 mul x27, x2, x26 580 umulh x28, x2, x26 581 adds x6, x6, x0 // C2 582 adcs x7, x7, x3 583 adc x8, x8, xzr 584 585 mul x0, x10, x25 586 umulh x3, x10, x25 587 adds x7, x7, x27 588 adcs x8, x8, x28 589 adc x9, xzr, xzr 590 591 mul x27, x10, x26 592 umulh x28, x10, x26 593 adds x7, x7, x0 // C3 594 adcs x8, x8, x3 595 adc x9, x9, xzr 596 adds x8, x8, x27 // C4 597 adc x9, x9, x28 // C5 598 599 600 601 adds x12, x12, x4 602 adcs x13, x13, x5 603 adcs x14, x14, x6 604 adcs x15, x15, x7 605 adcs x16, x22, x8 606 adcs x17, x17, x9 607 adcs x22, x19, xzr 608 adcs x20, x20, xzr 609 adc x21, x21, xzr 610 611 mul x4, x11, x23 // C0 612 umulh x7, x11, x23 613 614 mul x5, x11, x24 615 umulh x6, x11, x24 616 617 mul x10, x12, x23 618 umulh x3, x12, x23 619 adds x5, x5, x7 620 adc x6, x6, xzr 621 622 mul x27, x11, x25 623 umulh x28, x11, x25 624 adds x5, x5, x10 // C1 625 adcs x6, x6, x3 626 adc x7, xzr, xzr 627 628 mul x10, x12, x24 629 umulh x3, x12, x24 630 adds x6, x6, x27 631 adcs x7, x7, x28 632 adc x8, xzr, xzr 633 634 mul x27, x11, x26 635 umulh x28, x11, x26 636 adds x6, x6, x10 // C2 637 adcs x7, x7, x3 638 adc x8, x8, xzr 639 640 mul x10, x12, x25 641 umulh x3, x12, x25 642 adds x7, x7, x27 643 adcs x8, x8, x28 644 adc x9, xzr, xzr 645 646 mul x27, x12, x26 647 umulh x28, x12, x26 648 adds x7, x7, x10 // C3 649 adcs x8, x8, x3 650 adc x9, x9, xzr 651 adds x8, x8, x27 // C4 652 adc x9, x9, x28 // C5 653 654 655 adds x14, x14, x4 656 adcs x15, x15, x5 657 adcs x16, x16, x6 658 adcs x17, x17, x7 659 adcs x19, x22, x8 660 adcs x20, x20, x9 661 adc x22, x21, xzr 662 663 stp x14, x15, [x1, #0x0] // C0, C1 664 665 mul x4, x13, x23 // C0 666 umulh x10, x13, x23 667 668 mul x5, x13, x24 669 umulh x27, x13, x24 670 adds x5, x5, x10 // C1 671 adc x10, xzr, xzr 672 673 mul x6, x13, x25 674 umulh x28, x13, x25 675 adds x27, x10, x27 676 adcs x6, x6, x27 // C2 677 adc x10, xzr, xzr 678 679 mul x7, x13, x26 680 umulh x8, x13, x26 681 adds x28, x10, x28 682 adcs x7, x7, x28 // C3 683 adc x8, x8, xzr // C4 684 685 adds x16, x16, x4 686 adcs x17, x17, x5 687 adcs x19, x19, x6 688 adcs x20, x20, x7 689 adc x21, x22, x8 690 691 str x16, [x1, #0x10] 692 stp x17, x19, [x1, #0x18] 693 stp x20, x21, [x1, #0x28] 694 695 ldp x19, x20, [x29,#16] 696 ldp x21, x22, [x29,#32] 697 ldp x23, x24, [x29,#48] 698 ldp x25, x26, [x29,#64] 699 ldp x27, x28, [x29,#80] 700 ldp x29, x30, [sp],#96 701 ret 702.globl _sike_fpadd 703.private_extern _sike_fpadd 704.align 4 705_sike_fpadd: 706 stp x29,x30, [sp,#-16]! 707 add x29, sp, #0 708 709 ldp x3, x4, [x0,#0] 710 ldp x5, x6, [x0,#16] 711 ldp x7, x8, [x0,#32] 712 ldr x9, [x0,#48] 713 ldp x11, x12, [x1,#0] 714 ldp x13, x14, [x1,#16] 715 ldp x15, x16, [x1,#32] 716 ldr x17, [x1,#48] 717 718 // Add a + b 719 adds x3, x3, x11 720 adcs x4, x4, x12 721 adcs x5, x5, x13 722 adcs x6, x6, x14 723 adcs x7, x7, x15 724 adcs x8, x8, x16 725 adc x9, x9, x17 726 727 // Subtract 2xp434 728 adrp x17, Lp434x2@PAGE 729 add x17, x17, Lp434x2@PAGEOFF 730 ldp x11, x12, [x17, #0] 731 ldp x13, x14, [x17, #16] 732 ldp x15, x16, [x17, #32] 733 subs x3, x3, x11 734 sbcs x4, x4, x12 735 sbcs x5, x5, x12 736 sbcs x6, x6, x13 737 sbcs x7, x7, x14 738 sbcs x8, x8, x15 739 sbcs x9, x9, x16 740 sbc x0, xzr, xzr // x0 can be reused now 741 742 // Add 2xp434 anded with the mask in x0 743 and x11, x11, x0 744 and x12, x12, x0 745 and x13, x13, x0 746 and x14, x14, x0 747 and x15, x15, x0 748 and x16, x16, x0 749 750 adds x3, x3, x11 751 adcs x4, x4, x12 752 adcs x5, x5, x12 753 adcs x6, x6, x13 754 adcs x7, x7, x14 755 adcs x8, x8, x15 756 adc x9, x9, x16 757 758 stp x3, x4, [x2,#0] 759 stp x5, x6, [x2,#16] 760 stp x7, x8, [x2,#32] 761 str x9, [x2,#48] 762 763 ldp x29, x30, [sp],#16 764 ret 765.globl _sike_fpsub 766.private_extern _sike_fpsub 767.align 4 768_sike_fpsub: 769 stp x29, x30, [sp,#-16]! 770 add x29, sp, #0 771 772 ldp x3, x4, [x0,#0] 773 ldp x5, x6, [x0,#16] 774 ldp x7, x8, [x0,#32] 775 ldr x9, [x0,#48] 776 ldp x11, x12, [x1,#0] 777 ldp x13, x14, [x1,#16] 778 ldp x15, x16, [x1,#32] 779 ldr x17, [x1,#48] 780 781 // Subtract a - b 782 subs x3, x3, x11 783 sbcs x4, x4, x12 784 sbcs x5, x5, x13 785 sbcs x6, x6, x14 786 sbcs x7, x7, x15 787 sbcs x8, x8, x16 788 sbcs x9, x9, x17 789 sbc x0, xzr, xzr 790 791 // Add 2xp434 anded with the mask in x0 792 adrp x17, Lp434x2@PAGE 793 add x17, x17, Lp434x2@PAGEOFF 794 795 // First half 796 ldp x11, x12, [x17, #0] 797 ldp x13, x14, [x17, #16] 798 ldp x15, x16, [x17, #32] 799 800 // Add 2xp434 anded with the mask in x0 801 and x11, x11, x0 802 and x12, x12, x0 803 and x13, x13, x0 804 and x14, x14, x0 805 and x15, x15, x0 806 and x16, x16, x0 807 808 adds x3, x3, x11 809 adcs x4, x4, x12 810 adcs x5, x5, x12 811 adcs x6, x6, x13 812 adcs x7, x7, x14 813 adcs x8, x8, x15 814 adc x9, x9, x16 815 816 stp x3, x4, [x2,#0] 817 stp x5, x6, [x2,#16] 818 stp x7, x8, [x2,#32] 819 str x9, [x2,#48] 820 821 ldp x29, x30, [sp],#16 822 ret 823.globl _sike_mpadd_asm 824.private_extern _sike_mpadd_asm 825.align 4 826_sike_mpadd_asm: 827 stp x29, x30, [sp,#-16]! 828 add x29, sp, #0 829 830 ldp x3, x4, [x0,#0] 831 ldp x5, x6, [x0,#16] 832 ldp x7, x8, [x0,#32] 833 ldr x9, [x0,#48] 834 ldp x11, x12, [x1,#0] 835 ldp x13, x14, [x1,#16] 836 ldp x15, x16, [x1,#32] 837 ldr x17, [x1,#48] 838 839 adds x3, x3, x11 840 adcs x4, x4, x12 841 adcs x5, x5, x13 842 adcs x6, x6, x14 843 adcs x7, x7, x15 844 adcs x8, x8, x16 845 adc x9, x9, x17 846 847 stp x3, x4, [x2,#0] 848 stp x5, x6, [x2,#16] 849 stp x7, x8, [x2,#32] 850 str x9, [x2,#48] 851 852 ldp x29, x30, [sp],#16 853 ret 854.globl _sike_mpsubx2_asm 855.private_extern _sike_mpsubx2_asm 856.align 4 857_sike_mpsubx2_asm: 858 stp x29, x30, [sp,#-16]! 859 add x29, sp, #0 860 861 ldp x3, x4, [x0,#0] 862 ldp x5, x6, [x0,#16] 863 ldp x11, x12, [x1,#0] 864 ldp x13, x14, [x1,#16] 865 subs x3, x3, x11 866 sbcs x4, x4, x12 867 sbcs x5, x5, x13 868 sbcs x6, x6, x14 869 ldp x7, x8, [x0,#32] 870 ldp x9, x10, [x0,#48] 871 ldp x11, x12, [x1,#32] 872 ldp x13, x14, [x1,#48] 873 sbcs x7, x7, x11 874 sbcs x8, x8, x12 875 sbcs x9, x9, x13 876 sbcs x10, x10, x14 877 878 stp x3, x4, [x2,#0] 879 stp x5, x6, [x2,#16] 880 stp x7, x8, [x2,#32] 881 stp x9, x10, [x2,#48] 882 883 ldp x3, x4, [x0,#64] 884 ldp x5, x6, [x0,#80] 885 ldp x11, x12, [x1,#64] 886 ldp x13, x14, [x1,#80] 887 sbcs x3, x3, x11 888 sbcs x4, x4, x12 889 sbcs x5, x5, x13 890 sbcs x6, x6, x14 891 ldp x7, x8, [x0,#96] 892 ldp x11, x12, [x1,#96] 893 sbcs x7, x7, x11 894 sbcs x8, x8, x12 895 sbc x0, xzr, xzr 896 897 stp x3, x4, [x2,#64] 898 stp x5, x6, [x2,#80] 899 stp x7, x8, [x2,#96] 900 901 ldp x29, x30, [sp],#16 902 ret 903.globl _sike_mpdblsubx2_asm 904.private_extern _sike_mpdblsubx2_asm 905.align 4 906_sike_mpdblsubx2_asm: 907 stp x29, x30, [sp, #-16]! 908 add x29, sp, #0 909 910 ldp x3, x4, [x2, #0] 911 ldp x5, x6, [x2,#16] 912 ldp x7, x8, [x2,#32] 913 914 ldp x11, x12, [x0, #0] 915 ldp x13, x14, [x0,#16] 916 ldp x15, x16, [x0,#32] 917 918 subs x3, x3, x11 919 sbcs x4, x4, x12 920 sbcs x5, x5, x13 921 sbcs x6, x6, x14 922 sbcs x7, x7, x15 923 sbcs x8, x8, x16 924 925 // x9 stores carry 926 adc x9, xzr, xzr 927 928 ldp x11, x12, [x1, #0] 929 ldp x13, x14, [x1,#16] 930 ldp x15, x16, [x1,#32] 931 subs x3, x3, x11 932 sbcs x4, x4, x12 933 sbcs x5, x5, x13 934 sbcs x6, x6, x14 935 sbcs x7, x7, x15 936 sbcs x8, x8, x16 937 adc x9, x9, xzr 938 939 stp x3, x4, [x2, #0] 940 stp x5, x6, [x2,#16] 941 stp x7, x8, [x2,#32] 942 943 ldp x3, x4, [x2,#48] 944 ldp x5, x6, [x2,#64] 945 ldp x7, x8, [x2,#80] 946 947 ldp x11, x12, [x0,#48] 948 ldp x13, x14, [x0,#64] 949 ldp x15, x16, [x0,#80] 950 951 // x9 = 2 - x9 952 neg x9, x9 953 add x9, x9, #2 954 955 subs x3, x3, x9 956 sbcs x3, x3, x11 957 sbcs x4, x4, x12 958 sbcs x5, x5, x13 959 sbcs x6, x6, x14 960 sbcs x7, x7, x15 961 sbcs x8, x8, x16 962 adc x9, xzr, xzr 963 964 ldp x11, x12, [x1,#48] 965 ldp x13, x14, [x1,#64] 966 ldp x15, x16, [x1,#80] 967 subs x3, x3, x11 968 sbcs x4, x4, x12 969 sbcs x5, x5, x13 970 sbcs x6, x6, x14 971 sbcs x7, x7, x15 972 sbcs x8, x8, x16 973 adc x9, x9, xzr 974 975 stp x3, x4, [x2,#48] 976 stp x5, x6, [x2,#64] 977 stp x7, x8, [x2,#80] 978 979 ldp x3, x4, [x2,#96] 980 ldp x11, x12, [x0,#96] 981 ldp x13, x14, [x1,#96] 982 983 // x9 = 2 - x9 984 neg x9, x9 985 add x9, x9, #2 986 987 subs x3, x3, x9 988 sbcs x3, x3, x11 989 sbcs x4, x4, x12 990 subs x3, x3, x13 991 sbc x4, x4, x14 992 stp x3, x4, [x2,#96] 993 994 ldp x29, x30, [sp],#16 995 ret 996#endif // !OPENSSL_NO_ASM 997