1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31/******************************************************************************/ 32// ALGORITHM DESCRIPTION 33// --------------------- 34// 35// Description: 36// Let K = 64 (table size). 37// x x/log(2) n 38// e = 2 = 2 * T[j] * (1 + P(y)) 39// where 40// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] 41// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] 42// j/K 43// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). 44// 45// P(y) is a minimax polynomial approximation of exp(x)-1 46// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). 47// 48// To avoid problems with arithmetic overflow and underflow, 49// n n1 n2 50// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] 51// where BIAS is a value of exponent bias. 52// 53// Special cases: 54// exp(NaN) = NaN 55// exp(+INF) = +INF 56// exp(-INF) = 0 57// exp(x) = 1 for subnormals 58// for finite argument, only exp(0)=1 is exact 59// For IEEE double 60// if x > 709.782712893383973096 then exp(x) overflow 61// if x < -745.133219101941108420 then exp(x) underflow 62// 63/******************************************************************************/ 64 65#include <private/bionic_asm.h> 66# -- Begin exp 67ENTRY(exp) 68# parameter 1: %xmm0 69..B1.1: 70..___tag_value_exp.1: 71 subq $24, %rsp 72..___tag_value_exp.3: 73 movsd %xmm0, 8(%rsp) 74..B1.2: 75 unpcklpd %xmm0, %xmm0 76 movapd cv(%rip), %xmm1 77 movapd Shifter(%rip), %xmm6 78 movapd 16+cv(%rip), %xmm2 79 movapd 32+cv(%rip), %xmm3 80 pextrw $3, %xmm0, %eax 81 andl $32767, %eax 82 movl $16527, %edx 83 subl %eax, %edx 84 subl $15504, %eax 85 orl %eax, %edx 86 cmpl $-2147483648, %edx 87 jae .L_2TAG_PACKET_0.0.2 88 mulpd %xmm0, %xmm1 89 addpd %xmm6, %xmm1 90 movapd %xmm1, %xmm7 91 subpd %xmm6, %xmm1 92 mulpd %xmm1, %xmm2 93 movapd 64+cv(%rip), %xmm4 94 mulpd %xmm1, %xmm3 95 movapd 80+cv(%rip), %xmm5 96 subpd %xmm2, %xmm0 97 movd %xmm7, %eax 98 movl %eax, %ecx 99 andl $63, %ecx 100 shll $4, %ecx 101 sarl $6, %eax 102 movl %eax, %edx 103 movdqa mmask(%rip), %xmm6 104 pand %xmm6, %xmm7 105 movdqa bias(%rip), %xmm6 106 paddq %xmm6, %xmm7 107 psllq $46, %xmm7 108 subpd %xmm3, %xmm0 109 lea Tbl_addr(%rip), %r8 110 movapd (%rcx,%r8), %xmm2 111 mulpd %xmm0, %xmm4 112 movapd %xmm0, %xmm6 113 movapd %xmm0, %xmm1 114 mulpd %xmm6, %xmm6 115 mulpd %xmm6, %xmm0 116 addpd %xmm4, %xmm5 117 mulsd %xmm6, %xmm0 118 mulpd 48+cv(%rip), %xmm6 119 addsd %xmm2, %xmm1 120 unpckhpd %xmm2, %xmm2 121 mulpd %xmm5, %xmm0 122 addsd %xmm0, %xmm1 123 orpd %xmm7, %xmm2 124 unpckhpd %xmm0, %xmm0 125 addsd %xmm1, %xmm0 126 addsd %xmm6, %xmm0 127 addl $894, %edx 128 cmpl $1916, %edx 129 ja .L_2TAG_PACKET_1.0.2 130 mulsd %xmm2, %xmm0 131 addsd %xmm2, %xmm0 132 jmp ..B1.5 133.L_2TAG_PACKET_1.0.2: 134 xorpd %xmm3, %xmm3 135 movapd ALLONES(%rip), %xmm4 136 movl $-1022, %edx 137 subl %eax, %edx 138 movd %edx, %xmm5 139 psllq %xmm5, %xmm4 140 movl %eax, %ecx 141 sarl $1, %eax 142 pinsrw $3, %eax, %xmm3 143 movapd ebias(%rip), %xmm6 144 psllq $4, %xmm3 145 psubd %xmm3, %xmm2 146 mulsd %xmm2, %xmm0 147 cmpl $52, %edx 148 jg .L_2TAG_PACKET_2.0.2 149 andpd %xmm2, %xmm4 150 paddd %xmm6, %xmm3 151 subsd %xmm4, %xmm2 152 addsd %xmm2, %xmm0 153 cmpl $1023, %ecx 154 jge .L_2TAG_PACKET_3.0.2 155 pextrw $3, %xmm0, %ecx 156 andl $32768, %ecx 157 orl %ecx, %edx 158 cmpl $0, %edx 159 je .L_2TAG_PACKET_4.0.2 160 movapd %xmm0, %xmm6 161 addsd %xmm4, %xmm0 162 mulsd %xmm3, %xmm0 163 pextrw $3, %xmm0, %ecx 164 andl $32752, %ecx 165 cmpl $0, %ecx 166 je .L_2TAG_PACKET_5.0.2 167 jmp ..B1.5 168.L_2TAG_PACKET_5.0.2: 169 mulsd %xmm3, %xmm6 170 mulsd %xmm3, %xmm4 171 movq %xmm6, %xmm0 172 pxor %xmm4, %xmm6 173 psrad $31, %xmm6 174 pshufd $85, %xmm6, %xmm6 175 psllq $1, %xmm0 176 psrlq $1, %xmm0 177 pxor %xmm6, %xmm0 178 psrlq $63, %xmm6 179 paddq %xmm6, %xmm0 180 paddq %xmm4, %xmm0 181 movl $15, (%rsp) 182 jmp .L_2TAG_PACKET_6.0.2 183.L_2TAG_PACKET_4.0.2: 184 addsd %xmm4, %xmm0 185 mulsd %xmm3, %xmm0 186 jmp ..B1.5 187.L_2TAG_PACKET_3.0.2: 188 addsd %xmm4, %xmm0 189 mulsd %xmm3, %xmm0 190 pextrw $3, %xmm0, %ecx 191 andl $32752, %ecx 192 cmpl $32752, %ecx 193 jnb .L_2TAG_PACKET_7.0.2 194 jmp ..B1.5 195.L_2TAG_PACKET_2.0.2: 196 paddd %xmm6, %xmm3 197 addpd %xmm2, %xmm0 198 mulsd %xmm3, %xmm0 199 movl $15, (%rsp) 200 jmp .L_2TAG_PACKET_6.0.2 201.L_2TAG_PACKET_8.0.2: 202 cmpl $2146435072, %eax 203 jae .L_2TAG_PACKET_9.0.2 204 movl 12(%rsp), %eax 205 cmpl $-2147483648, %eax 206 jae .L_2TAG_PACKET_10.0.2 207 movsd XMAX(%rip), %xmm0 208 mulsd %xmm0, %xmm0 209.L_2TAG_PACKET_7.0.2: 210 movl $14, (%rsp) 211 jmp .L_2TAG_PACKET_6.0.2 212.L_2TAG_PACKET_10.0.2: 213 movsd XMIN(%rip), %xmm0 214 mulsd %xmm0, %xmm0 215 movl $15, (%rsp) 216 jmp .L_2TAG_PACKET_6.0.2 217.L_2TAG_PACKET_9.0.2: 218 movl 8(%rsp), %edx 219 cmpl $2146435072, %eax 220 ja .L_2TAG_PACKET_11.0.2 221 cmpl $0, %edx 222 jne .L_2TAG_PACKET_11.0.2 223 movl 12(%rsp), %eax 224 cmpl $2146435072, %eax 225 jne .L_2TAG_PACKET_12.0.2 226 movsd INF(%rip), %xmm0 227 jmp ..B1.5 228.L_2TAG_PACKET_12.0.2: 229 movsd ZERO(%rip), %xmm0 230 jmp ..B1.5 231.L_2TAG_PACKET_11.0.2: 232 movsd 8(%rsp), %xmm0 233 addsd %xmm0, %xmm0 234 jmp ..B1.5 235.L_2TAG_PACKET_0.0.2: 236 movl 12(%rsp), %eax 237 andl $2147483647, %eax 238 cmpl $1083179008, %eax 239 jae .L_2TAG_PACKET_8.0.2 240 movsd 8(%rsp), %xmm0 241 addsd ONE_val(%rip), %xmm0 242 jmp ..B1.5 243.L_2TAG_PACKET_6.0.2: 244 movq %xmm0, 16(%rsp) 245..B1.3: 246 movq 16(%rsp), %xmm0 247.L_2TAG_PACKET_13.0.2: 248..B1.5: 249 addq $24, %rsp 250..___tag_value_exp.4: 251 ret 252..___tag_value_exp.5: 253END(exp) 254# -- End exp 255 .section .rodata, "a" 256 .align 16 257 .align 16 258cv: 259 .long 1697350398 260 .long 1079448903 261 .long 1697350398 262 .long 1079448903 263 .long 4277796864 264 .long 1065758274 265 .long 4277796864 266 .long 1065758274 267 .long 3164486458 268 .long 1025308570 269 .long 3164486458 270 .long 1025308570 271 .long 4294967294 272 .long 1071644671 273 .long 4294967294 274 .long 1071644671 275 .long 3811088480 276 .long 1062650204 277 .long 1432067621 278 .long 1067799893 279 .long 3230715663 280 .long 1065423125 281 .long 1431604129 282 .long 1069897045 283 .type cv,@object 284 .size cv,96 285 .align 16 286Shifter: 287 .long 0 288 .long 1127743488 289 .long 0 290 .long 1127743488 291 .type Shifter,@object 292 .size Shifter,16 293 .align 16 294mmask: 295 .long 4294967232 296 .long 0 297 .long 4294967232 298 .long 0 299 .type mmask,@object 300 .size mmask,16 301 .align 16 302bias: 303 .long 65472 304 .long 0 305 .long 65472 306 .long 0 307 .type bias,@object 308 .size bias,16 309 .align 16 310Tbl_addr: 311 .long 0 312 .long 0 313 .long 0 314 .long 0 315 .long 235107661 316 .long 1018002367 317 .long 1048019040 318 .long 11418 319 .long 896005651 320 .long 1015861842 321 .long 3541402996 322 .long 22960 323 .long 1642514529 324 .long 1012987726 325 .long 410360776 326 .long 34629 327 .long 1568897900 328 .long 1016568486 329 .long 1828292879 330 .long 46424 331 .long 1882168529 332 .long 1010744893 333 .long 852742562 334 .long 58348 335 .long 509852888 336 .long 1017336174 337 .long 3490863952 338 .long 70401 339 .long 653277307 340 .long 1017431380 341 .long 2930322911 342 .long 82586 343 .long 1649557430 344 .long 1017729363 345 .long 1014845818 346 .long 94904 347 .long 1058231231 348 .long 1015777676 349 .long 3949972341 350 .long 107355 351 .long 1044000607 352 .long 1016786167 353 .long 828946858 354 .long 119943 355 .long 1151779725 356 .long 1015705409 357 .long 2288159958 358 .long 132667 359 .long 3819481236 360 .long 1016499965 361 .long 1853186616 362 .long 145530 363 .long 2552227826 364 .long 1015039787 365 .long 1709341917 366 .long 158533 367 .long 1829350193 368 .long 1015216097 369 .long 4112506593 370 .long 171677 371 .long 1913391795 372 .long 1015756674 373 .long 2799960843 374 .long 184965 375 .long 1303423926 376 .long 1015238005 377 .long 171030293 378 .long 198398 379 .long 1574172746 380 .long 1016061241 381 .long 2992903935 382 .long 211976 383 .long 3424156969 384 .long 1017196428 385 .long 926591434 386 .long 225703 387 .long 1938513547 388 .long 1017631273 389 .long 887463926 390 .long 239579 391 .long 2804567149 392 .long 1015390024 393 .long 1276261410 394 .long 253606 395 .long 631083525 396 .long 1017690182 397 .long 569847337 398 .long 267786 399 .long 1623370770 400 .long 1011049453 401 .long 1617004845 402 .long 282120 403 .long 3667985273 404 .long 1013894369 405 .long 3049340112 406 .long 296610 407 .long 3145379760 408 .long 1014403278 409 .long 3577096743 410 .long 311258 411 .long 2603100681 412 .long 1017152460 413 .long 1990012070 414 .long 326066 415 .long 3249202951 416 .long 1017448880 417 .long 1453150081 418 .long 341035 419 .long 419288974 420 .long 1016280325 421 .long 917841882 422 .long 356167 423 .long 3793507337 424 .long 1016095713 425 .long 3712504873 426 .long 371463 427 .long 728023093 428 .long 1016345318 429 .long 363667784 430 .long 386927 431 .long 2582678538 432 .long 1017123460 433 .long 2956612996 434 .long 402558 435 .long 7592966 436 .long 1016721543 437 .long 2186617380 438 .long 418360 439 .long 228611441 440 .long 1016696141 441 .long 1719614412 442 .long 434334 443 .long 2261665670 444 .long 1017457593 445 .long 1013258798 446 .long 450482 447 .long 544148907 448 .long 1017323666 449 .long 3907805043 450 .long 466805 451 .long 2383914918 452 .long 1017143586 453 .long 1447192520 454 .long 483307 455 .long 1176412038 456 .long 1017267372 457 .long 1944781190 458 .long 499988 459 .long 2882956373 460 .long 1013312481 461 .long 919555682 462 .long 516851 463 .long 3154077648 464 .long 1016528543 465 .long 2571947538 466 .long 533897 467 .long 348651999 468 .long 1016405780 469 .long 2604962540 470 .long 551129 471 .long 3253791412 472 .long 1015920431 473 .long 1110089947 474 .long 568549 475 .long 1509121860 476 .long 1014756995 477 .long 2568320822 478 .long 586158 479 .long 2617649212 480 .long 1017340090 481 .long 2966275556 482 .long 603959 483 .long 553214634 484 .long 1016457425 485 .long 2682146383 486 .long 621954 487 .long 730975783 488 .long 1014083580 489 .long 2191782032 490 .long 640145 491 .long 1486499517 492 .long 1016818996 493 .long 2069751140 494 .long 658534 495 .long 2595788928 496 .long 1016407932 497 .long 2990417244 498 .long 677123 499 .long 1853053619 500 .long 1015310724 501 .long 1434058175 502 .long 695915 503 .long 2462790535 504 .long 1015814775 505 .long 2572866477 506 .long 714911 507 .long 3693944214 508 .long 1017259110 509 .long 3092190714 510 .long 734114 511 .long 2979333550 512 .long 1017188654 513 .long 4076559942 514 .long 753526 515 .long 174054861 516 .long 1014300631 517 .long 2420883922 518 .long 773150 519 .long 816778419 520 .long 1014197934 521 .long 3716502172 522 .long 792987 523 .long 3507050924 524 .long 1015341199 525 .long 777507147 526 .long 813041 527 .long 1821514088 528 .long 1013410604 529 .long 3706687593 530 .long 833312 531 .long 920623539 532 .long 1016295433 533 .long 1242007931 534 .long 853805 535 .long 2789017511 536 .long 1014276997 537 .long 3707479175 538 .long 874520 539 .long 3586233004 540 .long 1015962192 541 .long 64696965 542 .long 895462 543 .long 474650514 544 .long 1016642419 545 .long 863738718 546 .long 916631 547 .long 1614448851 548 .long 1014281732 549 .long 3884662774 550 .long 938030 551 .long 2450082086 552 .long 1016164135 553 .long 2728693977 554 .long 959663 555 .long 1101668360 556 .long 1015989180 557 .long 3999357479 558 .long 981531 559 .long 835814894 560 .long 1015702697 561 .long 1533953344 562 .long 1003638 563 .long 1301400989 564 .long 1014466875 565 .long 2174652632 566 .long 1025985 567 .type Tbl_addr,@object 568 .size Tbl_addr,1024 569 .align 16 570ALLONES: 571 .long 4294967295 572 .long 4294967295 573 .long 4294967295 574 .long 4294967295 575 .type ALLONES,@object 576 .size ALLONES,16 577 .align 16 578ebias: 579 .long 0 580 .long 1072693248 581 .long 0 582 .long 1072693248 583 .type ebias,@object 584 .size ebias,16 585 .align 4 586XMAX: 587 .long 4294967295 588 .long 2146435071 589 .type XMAX,@object 590 .size XMAX,8 591 .align 4 592XMIN: 593 .long 0 594 .long 1048576 595 .type XMIN,@object 596 .size XMIN,8 597 .align 4 598INF: 599 .long 0 600 .long 2146435072 601 .type INF,@object 602 .size INF,8 603 .align 4 604ZERO: 605 .long 0 606 .long 0 607 .type ZERO,@object 608 .size ZERO,8 609 .align 4 610ONE_val: 611 .long 0 612 .long 1072693248 613 .type ONE_val,@object 614 .size ONE_val,8 615 .data 616 .section .note.GNU-stack, "" 617// -- Begin DWARF2 SEGMENT .eh_frame 618 .section .eh_frame,"a",@progbits 619.eh_frame_seg: 620 .align 1 621 .4byte 0x00000014 622 .8byte 0x00527a0100000000 623 .8byte 0x08070c1b01107801 624 .4byte 0x00000190 625 .4byte 0x0000001c 626 .4byte 0x0000001c 627 .4byte ..___tag_value_exp.1-. 628 .4byte ..___tag_value_exp.5-..___tag_value_exp.1 629 .2byte 0x0400 630 .4byte ..___tag_value_exp.3-..___tag_value_exp.1 631 .2byte 0x200e 632 .byte 0x04 633 .4byte ..___tag_value_exp.4-..___tag_value_exp.3 634 .2byte 0x080e 635 .byte 0x00 636# End 637