1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31/******************************************************************************/ 32// ALGORITHM DESCRIPTION 33// --------------------- 34// 35// Description: 36// Let K = 64 (table size). 37// 38// Four sub-domains: 39// 1. |x| < 1/(2*K) 40// expm1(x) ~ P(x) 41// 2. 1/(2*K) <= |x| <= 56*log(2) 42// x x/log(2) n 43// e - 1 = 2 = 2 * T[j] * (1 + P(y)) - 1 44// 3. 56*log(2) < x < MAX_LOG 45// x x x/log(2) n 46// e - 1 ~ e = 2 = 2 * T[j] * (1 + P(y)) 47// 4. x < -56*log(2) 48// x x 49// e - 1 = -1 + e ~ -1 50// where 51// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] 52// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] 53// j/K 54// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). 55// 56// P(y) is a minimax polynomial approximation of exp(x)-1 57// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). 58// 59// In case 3, to avoid problems with arithmetic overflow and underflow, 60// n n1 n2 61// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] 62// and BIAS is a value of exponent bias. 63// 64// Special cases: 65// expm1(NaN) is NaN 66// expm1(+INF) is +INF 67// expm1(-INF) is -1 68// expm1(x) is x for subnormals 69// for finite argument, only expm1(0)=0 is exact. 70// For IEEE double 71// if x > 709.782712893383973096 then expm1(x) overflow 72// 73/******************************************************************************/ 74 75#include <private/bionic_asm.h> 76# -- Begin static_func 77 .text 78 .align __bionic_asm_align 79 .type static_func, @function 80static_func: 81..B1.1: 82 call ..L2 83..L2: 84 popl %eax 85 lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax 86 lea static_const_table@GOTOFF(%eax), %eax 87 ret 88 .size static_func,.-static_func 89# -- End static_func 90 91# -- Begin expm1 92ENTRY(expm1) 93# parameter 1: 8 + %ebp 94..B2.1: 95..B2.2: 96 pushl %ebp 97 movl %esp, %ebp 98 subl $120, %esp 99 movl %ebx, 64(%esp) 100 call static_func 101 movl %eax, %ebx 102 movsd 128(%esp), %xmm0 103 unpcklpd %xmm0, %xmm0 104 movapd 64(%ebx), %xmm1 105 movapd 48(%ebx), %xmm6 106 movapd 80(%ebx), %xmm2 107 movapd 96(%ebx), %xmm3 108 pextrw $3, %xmm0, %eax 109 andl $32767, %eax 110 movl $16527, %edx 111 subl %eax, %edx 112 subl $16304, %eax 113 orl %eax, %edx 114 cmpl $-2147483648, %edx 115 jae .L_2TAG_PACKET_0.0.2 116 mulpd %xmm0, %xmm1 117 addpd %xmm6, %xmm1 118 movapd %xmm1, %xmm7 119 subpd %xmm6, %xmm1 120 mulpd %xmm1, %xmm2 121 movapd 112(%ebx), %xmm4 122 mulpd %xmm1, %xmm3 123 movapd 128(%ebx), %xmm5 124 subpd %xmm2, %xmm0 125 movd %xmm7, %eax 126 movl %eax, %ecx 127 andl $63, %ecx 128 shll $4, %ecx 129 sarl $6, %eax 130 movl %eax, %edx 131 subpd %xmm3, %xmm0 132 movapd 160(%ebx,%ecx), %xmm2 133 movsd 144(%ebx), %xmm3 134 mulpd %xmm0, %xmm4 135 movapd %xmm0, %xmm1 136 mulpd %xmm0, %xmm0 137 mulsd %xmm0, %xmm3 138 addpd %xmm4, %xmm5 139 mulsd %xmm0, %xmm0 140 movapd %xmm2, %xmm4 141 unpckhpd %xmm2, %xmm2 142 movdqa 16(%ebx), %xmm6 143 pand %xmm6, %xmm7 144 movdqa 32(%ebx), %xmm6 145 paddq %xmm6, %xmm7 146 psllq $46, %xmm7 147 mulsd %xmm0, %xmm3 148 mulpd %xmm5, %xmm0 149 addl $894, %edx 150 cmpl $1916, %edx 151 ja .L_2TAG_PACKET_1.0.2 152 addsd %xmm3, %xmm0 153 xorpd %xmm3, %xmm3 154 movl $16368, %eax 155 pinsrw $3, %eax, %xmm3 156 orpd %xmm7, %xmm2 157 mulsd %xmm4, %xmm7 158 movapd %xmm3, %xmm6 159 addsd %xmm1, %xmm3 160 pextrw $3, %xmm2, %edx 161 pshufd $238, %xmm0, %xmm5 162 psrlq $38, %xmm3 163 psllq $38, %xmm3 164 movapd %xmm2, %xmm4 165 subsd %xmm3, %xmm6 166 addsd %xmm5, %xmm0 167 addsd %xmm6, %xmm1 168 addsd %xmm7, %xmm4 169 mulsd %xmm3, %xmm7 170 mulsd %xmm2, %xmm3 171 xorpd %xmm5, %xmm5 172 movl $16368, %eax 173 pinsrw $3, %eax, %xmm5 174 addsd %xmm1, %xmm0 175 movl $17184, %ecx 176 subl %edx, %ecx 177 subl $16256, %edx 178 orl %edx, %ecx 179 jl .L_2TAG_PACKET_2.0.2 180 mulsd %xmm4, %xmm0 181 subsd %xmm5, %xmm3 182 addsd %xmm7, %xmm0 183 addsd %xmm3, %xmm0 184.L_2TAG_PACKET_3.0.2: 185 jmp .L_2TAG_PACKET_4.0.2 186.L_2TAG_PACKET_2.0.2: 187 cmpl $0, %edx 188 jl .L_2TAG_PACKET_5.0.2 189 mulsd %xmm4, %xmm0 190 subsd %xmm5, %xmm7 191 addsd %xmm7, %xmm0 192 addsd %xmm3, %xmm0 193 jmp .L_2TAG_PACKET_3.0.2 194.L_2TAG_PACKET_5.0.2: 195 mulsd %xmm4, %xmm0 196 addsd %xmm7, %xmm0 197 addsd %xmm3, %xmm0 198 subsd %xmm5, %xmm0 199 jmp .L_2TAG_PACKET_3.0.2 200.L_2TAG_PACKET_1.0.2: 201 movl 132(%esp), %ecx 202 addsd %xmm0, %xmm1 203 unpckhpd %xmm0, %xmm0 204 addsd %xmm1, %xmm0 205 cmpl $0, %ecx 206 jl .L_2TAG_PACKET_6.0.2 207 fstcw 24(%esp) 208 movzwl 24(%esp), %edx 209 orl $768, %edx 210 movw %dx, 28(%esp) 211 fldcw 28(%esp) 212 movl %eax, %edx 213 sarl $1, %eax 214 subl %eax, %edx 215 movdqa (%ebx), %xmm6 216 pandn %xmm2, %xmm6 217 addl $1023, %eax 218 movd %eax, %xmm3 219 psllq $52, %xmm3 220 orpd %xmm3, %xmm6 221 mulsd %xmm3, %xmm4 222 movsd %xmm0, 8(%esp) 223 fldl 8(%esp) 224 movsd %xmm6, 16(%esp) 225 fldl 16(%esp) 226 movsd %xmm4, 16(%esp) 227 fldl 16(%esp) 228 addl $1023, %edx 229 movd %edx, %xmm4 230 psllq $52, %xmm4 231 faddp %st, %st(1) 232 fmul %st, %st(1) 233 faddp %st, %st(1) 234 movsd %xmm4, 8(%esp) 235 fldl 8(%esp) 236 fmulp %st, %st(1) 237 fstpl 8(%esp) 238 movsd 8(%esp), %xmm0 239 fldcw 24(%esp) 240 pextrw $3, %xmm0, %ecx 241 andl $32752, %ecx 242 cmpl $32752, %ecx 243 jae .L_2TAG_PACKET_7.0.2 244 jmp .L_2TAG_PACKET_4.0.2 245 cmpl $-2147483648, %ecx 246 jb .L_2TAG_PACKET_7.0.2 247 jmp .L_2TAG_PACKET_4.0.2 248.L_2TAG_PACKET_7.0.2: 249 movl $41, %edx 250.L_2TAG_PACKET_8.0.2: 251 movsd %xmm0, (%esp) 252 movsd 128(%esp), %xmm0 253 fldl (%esp) 254 jmp .L_2TAG_PACKET_9.0.2 255.L_2TAG_PACKET_10.0.2: 256 cmpl $2146435072, %eax 257 jae .L_2TAG_PACKET_11.0.2 258 movsd 1272(%ebx), %xmm0 259 mulsd %xmm0, %xmm0 260 movl $41, %edx 261 jmp .L_2TAG_PACKET_8.0.2 262.L_2TAG_PACKET_11.0.2: 263 movl 132(%esp), %eax 264 movl 128(%esp), %edx 265 movl %eax, %ecx 266 andl $2147483647, %eax 267 cmpl $2146435072, %eax 268 ja .L_2TAG_PACKET_12.0.2 269 cmpl $0, %edx 270 jne .L_2TAG_PACKET_12.0.2 271 cmpl $0, %ecx 272 jl .L_2TAG_PACKET_13.0.2 273 movsd 1256(%ebx), %xmm0 274 jmp .L_2TAG_PACKET_4.0.2 275.L_2TAG_PACKET_13.0.2: 276 jmp .L_2TAG_PACKET_6.0.2 277.L_2TAG_PACKET_12.0.2: 278 movsd 128(%esp), %xmm0 279 addsd %xmm0, %xmm0 280 jmp .L_2TAG_PACKET_4.0.2 281.L_2TAG_PACKET_14.0.2: 282 addl $16304, %eax 283 cmpl $15504, %eax 284 jb .L_2TAG_PACKET_15.0.2 285 movapd 1184(%ebx), %xmm2 286 pshufd $68, %xmm0, %xmm1 287 movapd 1200(%ebx), %xmm3 288 movapd 1216(%ebx), %xmm4 289 movsd 1232(%ebx), %xmm5 290 mulsd %xmm1, %xmm1 291 xorpd %xmm6, %xmm6 292 movl $16352, %eax 293 pinsrw $3, %eax, %xmm6 294 mulpd %xmm0, %xmm2 295 xorpd %xmm7, %xmm7 296 movl $16368, %edx 297 pinsrw $3, %edx, %xmm7 298 addpd %xmm3, %xmm2 299 mulsd %xmm1, %xmm5 300 pshufd $228, %xmm1, %xmm3 301 mulpd %xmm1, %xmm1 302 mulsd %xmm0, %xmm6 303 mulpd %xmm0, %xmm2 304 addpd %xmm4, %xmm2 305 movapd %xmm7, %xmm4 306 addsd %xmm6, %xmm7 307 mulpd %xmm3, %xmm1 308 psrlq $27, %xmm7 309 psllq $27, %xmm7 310 movsd 1288(%ebx), %xmm3 311 subsd %xmm7, %xmm4 312 mulpd %xmm1, %xmm2 313 addsd %xmm4, %xmm6 314 pshufd $238, %xmm2, %xmm1 315 addsd %xmm2, %xmm6 316 andpd %xmm0, %xmm3 317 movapd %xmm0, %xmm4 318 addsd %xmm6, %xmm1 319 subsd %xmm3, %xmm0 320 addsd %xmm5, %xmm1 321 mulsd %xmm7, %xmm3 322 mulsd %xmm7, %xmm0 323 mulsd %xmm1, %xmm4 324 addsd %xmm4, %xmm0 325 addsd %xmm3, %xmm0 326 jmp .L_2TAG_PACKET_4.0.2 327.L_2TAG_PACKET_15.0.2: 328 cmpl $16, %eax 329 jae .L_2TAG_PACKET_3.0.2 330 movapd %xmm0, %xmm2 331 movd %xmm0, %eax 332 psrlq $31, %xmm2 333 movd %xmm2, %ecx 334 orl %ecx, %eax 335 je .L_2TAG_PACKET_3.0.2 336 movl $16, %edx 337 xorpd %xmm1, %xmm1 338 pinsrw $3, %edx, %xmm1 339 mulsd %xmm1, %xmm1 340 movl $42, %edx 341 jmp .L_2TAG_PACKET_8.0.2 342.L_2TAG_PACKET_0.0.2: 343 cmpl $0, %eax 344 jl .L_2TAG_PACKET_14.0.2 345 movl 132(%esp), %eax 346 cmpl $1083179008, %eax 347 jge .L_2TAG_PACKET_10.0.2 348 cmpl $-1048576, %eax 349 jae .L_2TAG_PACKET_11.0.2 350.L_2TAG_PACKET_6.0.2: 351 xorpd %xmm0, %xmm0 352 movl $49136, %eax 353 pinsrw $3, %eax, %xmm0 354 jmp .L_2TAG_PACKET_4.0.2 355.L_2TAG_PACKET_4.0.2: 356 movsd %xmm0, 48(%esp) 357 fldl 48(%esp) 358.L_2TAG_PACKET_9.0.2: 359 movl 64(%esp), %ebx 360 movl %ebp, %esp 361 popl %ebp 362 ret 363..B2.3: 364END(expm1) 365# -- End expm1 366 367# Start file scope ASM 368ALIAS_SYMBOL(expm1l, expm1); 369# End file scope ASM 370 .section .rodata, "a" 371 .align 16 372 .align 16 373static_const_table: 374 .long 0 375 .long 4293918720 376 .long 0 377 .long 4293918720 378 .long 4294967232 379 .long 0 380 .long 4294967232 381 .long 0 382 .long 65472 383 .long 0 384 .long 65472 385 .long 0 386 .long 0 387 .long 1127743488 388 .long 0 389 .long 1127743488 390 .long 1697350398 391 .long 1079448903 392 .long 1697350398 393 .long 1079448903 394 .long 4277796864 395 .long 1065758274 396 .long 4277796864 397 .long 1065758274 398 .long 3164486458 399 .long 1025308570 400 .long 3164486458 401 .long 1025308570 402 .long 1963358694 403 .long 1065423121 404 .long 1431655765 405 .long 1069897045 406 .long 1431655765 407 .long 1067799893 408 .long 0 409 .long 1071644672 410 .long 381774871 411 .long 1062650220 412 .long 381774871 413 .long 1062650220 414 .long 0 415 .long 0 416 .long 0 417 .long 0 418 .long 1000070955 419 .long 1042145304 420 .long 1040187392 421 .long 11418 422 .long 988267849 423 .long 1039500660 424 .long 3539992576 425 .long 22960 426 .long 36755401 427 .long 1042114290 428 .long 402653184 429 .long 34629 430 .long 3634769483 431 .long 1042178627 432 .long 1820327936 433 .long 46424 434 .long 2155991225 435 .long 1041560680 436 .long 847249408 437 .long 58348 438 .long 2766913307 439 .long 1039293264 440 .long 3489660928 441 .long 70401 442 .long 3651174602 443 .long 1040488175 444 .long 2927624192 445 .long 82586 446 .long 3073892131 447 .long 1042240606 448 .long 1006632960 449 .long 94904 450 .long 1328391742 451 .long 1042019037 452 .long 3942645760 453 .long 107355 454 .long 2650893825 455 .long 1041903210 456 .long 822083584 457 .long 119943 458 .long 2397289153 459 .long 1041802037 460 .long 2281701376 461 .long 132667 462 .long 430997175 463 .long 1042110606 464 .long 1845493760 465 .long 145530 466 .long 1230936525 467 .long 1041801015 468 .long 1702887424 469 .long 158533 470 .long 740675935 471 .long 1040178913 472 .long 4110417920 473 .long 171677 474 .long 3489810261 475 .long 1041825986 476 .long 2793406464 477 .long 184965 478 .long 2532600530 479 .long 1040767882 480 .long 167772160 481 .long 198398 482 .long 3542557060 483 .long 1041827263 484 .long 2986344448 485 .long 211976 486 .long 1401563777 487 .long 1041061093 488 .long 922746880 489 .long 225703 490 .long 3129406026 491 .long 1041852413 492 .long 880803840 493 .long 239579 494 .long 900993572 495 .long 1039283234 496 .long 1275068416 497 .long 253606 498 .long 2115029358 499 .long 1042140042 500 .long 562036736 501 .long 267786 502 .long 1086643152 503 .long 1041785419 504 .long 1610612736 505 .long 282120 506 .long 82864366 507 .long 1041256244 508 .long 3045064704 509 .long 296610 510 .long 2392968152 511 .long 1040913683 512 .long 3573547008 513 .long 311258 514 .long 2905856183 515 .long 1040002214 516 .long 1988100096 517 .long 326066 518 .long 3742008261 519 .long 1040011137 520 .long 1451229184 521 .long 341035 522 .long 863393794 523 .long 1040880621 524 .long 914358272 525 .long 356167 526 .long 1446136837 527 .long 1041372426 528 .long 3707764736 529 .long 371463 530 .long 927855201 531 .long 1040617636 532 .long 360710144 533 .long 386927 534 .long 1492679939 535 .long 1041050306 536 .long 2952790016 537 .long 402558 538 .long 608827001 539 .long 1041582217 540 .long 2181038080 541 .long 418360 542 .long 606260204 543 .long 1042271987 544 .long 1711276032 545 .long 434334 546 .long 3163044019 547 .long 1041843851 548 .long 1006632960 549 .long 450482 550 .long 4148747325 551 .long 1041962972 552 .long 3900702720 553 .long 466805 554 .long 802924201 555 .long 1041275378 556 .long 1442840576 557 .long 483307 558 .long 3052749833 559 .long 1041940577 560 .long 1937768448 561 .long 499988 562 .long 2216116399 563 .long 1041486744 564 .long 914358272 565 .long 516851 566 .long 2729697836 567 .long 1041445764 568 .long 2566914048 569 .long 533897 570 .long 540608356 571 .long 1041310907 572 .long 2600468480 573 .long 551129 574 .long 2916344493 575 .long 1040535661 576 .long 1107296256 577 .long 568549 578 .long 731391814 579 .long 1039497014 580 .long 2566914048 581 .long 586158 582 .long 1024722704 583 .long 1041461625 584 .long 2961178624 585 .long 603959 586 .long 3806831748 587 .long 1041732499 588 .long 2675965952 589 .long 621954 590 .long 238953304 591 .long 1040316488 592 .long 2189426688 593 .long 640145 594 .long 749123235 595 .long 1041725785 596 .long 2063597568 597 .long 658534 598 .long 1168187977 599 .long 1041175214 600 .long 2986344448 601 .long 677123 602 .long 3506096399 603 .long 1042186095 604 .long 1426063360 605 .long 695915 606 .long 1470221620 607 .long 1041675499 608 .long 2566914048 609 .long 714911 610 .long 3182425146 611 .long 1041483134 612 .long 3087007744 613 .long 734114 614 .long 3131698208 615 .long 1042208657 616 .long 4068474880 617 .long 753526 618 .long 2300504125 619 .long 1041428596 620 .long 2415919104 621 .long 773150 622 .long 2290297931 623 .long 1037388400 624 .long 3716153344 625 .long 792987 626 .long 3532148223 627 .long 1041626194 628 .long 771751936 629 .long 813041 630 .long 1161884404 631 .long 1042015258 632 .long 3699376128 633 .long 833312 634 .long 876383176 635 .long 1037968878 636 .long 1241513984 637 .long 853805 638 .long 3379986796 639 .long 1042213153 640 .long 3699376128 641 .long 874520 642 .long 1545797737 643 .long 1041681569 644 .long 58720256 645 .long 895462 646 .long 2925146801 647 .long 1042212567 648 .long 855638016 649 .long 916631 650 .long 1316627971 651 .long 1038516204 652 .long 3883925504 653 .long 938030 654 .long 3267869137 655 .long 1040337004 656 .long 2726297600 657 .long 959663 658 .long 3720868999 659 .long 1041782409 660 .long 3992977408 661 .long 981531 662 .long 433316142 663 .long 1041994064 664 .long 1526726656 665 .long 1003638 666 .long 781232103 667 .long 1040093400 668 .long 2172649472 669 .long 1025985 670 .long 2773927732 671 .long 1053236707 672 .long 381774871 673 .long 1062650220 674 .long 379653899 675 .long 1056571845 676 .long 286331153 677 .long 1065423121 678 .long 436314138 679 .long 1059717536 680 .long 1431655765 681 .long 1067799893 682 .long 1431655765 683 .long 1069897045 684 .long 0 685 .long 1071644672 686 .long 0 687 .long 1072693248 688 .long 0 689 .long 2146435072 690 .long 0 691 .long 0 692 .long 4294967295 693 .long 2146435071 694 .long 0 695 .long 1048576 696 .long 4227858432 697 .long 4294967295 698 .type static_const_table,@object 699 .size static_const_table,1296 700 .data 701 .section .note.GNU-stack, "" 702# End 703