• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32//                     ALGORITHM DESCRIPTION
33//                     ---------------------
34//
35// Description:
36//  Let K = 64 (table size).
37//        x    x/log(2)     n
38//       e  = 2          = 2 * T[j] * (1 + P(y))
39//  where
40//       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
41//       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
42//                  j/K
43//       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
44//
45//       P(y) is a minimax polynomial approximation of exp(x)-1
46//       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
47//
48//  To avoid problems with arithmetic overflow and underflow,
49//            n                        n1  n2
50//  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
51//  where BIAS is a value of exponent bias.
52//
53// Special cases:
54//  exp(NaN) = NaN
55//  exp(+INF) = +INF
56//  exp(-INF) = 0
57//  exp(x) = 1 for subnormals
58//  for finite argument, only exp(0)=1 is exact
59//  For IEEE double
60//    if x >  709.782712893383973096 then exp(x) overflow
61//    if x < -745.133219101941108420 then exp(x) underflow
62//
63/******************************************************************************/
64
65#include <private/bionic_asm.h>
66# -- Begin  exp
67ENTRY(exp)
68# parameter 1: %xmm0
69..B1.1:
70..___tag_value_exp.1:
71        subq      $24, %rsp
72..___tag_value_exp.3:
73        movsd     %xmm0, 8(%rsp)
74..B1.2:
75        unpcklpd  %xmm0, %xmm0
76        movapd    cv(%rip), %xmm1
77        movapd    Shifter(%rip), %xmm6
78        movapd    16+cv(%rip), %xmm2
79        movapd    32+cv(%rip), %xmm3
80        pextrw    $3, %xmm0, %eax
81        andl      $32767, %eax
82        movl      $16527, %edx
83        subl      %eax, %edx
84        subl      $15504, %eax
85        orl       %eax, %edx
86        cmpl      $-2147483648, %edx
87        jae       .L_2TAG_PACKET_0.0.2
88        mulpd     %xmm0, %xmm1
89        addpd     %xmm6, %xmm1
90        movapd    %xmm1, %xmm7
91        subpd     %xmm6, %xmm1
92        mulpd     %xmm1, %xmm2
93        movapd    64+cv(%rip), %xmm4
94        mulpd     %xmm1, %xmm3
95        movapd    80+cv(%rip), %xmm5
96        subpd     %xmm2, %xmm0
97        movd      %xmm7, %eax
98        movl      %eax, %ecx
99        andl      $63, %ecx
100        shll      $4, %ecx
101        sarl      $6, %eax
102        movl      %eax, %edx
103        movdqa    mmask(%rip), %xmm6
104        pand      %xmm6, %xmm7
105        movdqa    bias(%rip), %xmm6
106        paddq     %xmm6, %xmm7
107        psllq     $46, %xmm7
108        subpd     %xmm3, %xmm0
109        lea       Tbl_addr(%rip), %r8
110        movapd    (%rcx,%r8), %xmm2
111        mulpd     %xmm0, %xmm4
112        movapd    %xmm0, %xmm6
113        movapd    %xmm0, %xmm1
114        mulpd     %xmm6, %xmm6
115        mulpd     %xmm6, %xmm0
116        addpd     %xmm4, %xmm5
117        mulsd     %xmm6, %xmm0
118        mulpd     48+cv(%rip), %xmm6
119        addsd     %xmm2, %xmm1
120        unpckhpd  %xmm2, %xmm2
121        mulpd     %xmm5, %xmm0
122        addsd     %xmm0, %xmm1
123        orpd      %xmm7, %xmm2
124        unpckhpd  %xmm0, %xmm0
125        addsd     %xmm1, %xmm0
126        addsd     %xmm6, %xmm0
127        addl      $894, %edx
128        cmpl      $1916, %edx
129        ja        .L_2TAG_PACKET_1.0.2
130        mulsd     %xmm2, %xmm0
131        addsd     %xmm2, %xmm0
132        jmp       ..B1.5
133.L_2TAG_PACKET_1.0.2:
134        xorpd     %xmm3, %xmm3
135        movapd    ALLONES(%rip), %xmm4
136        movl      $-1022, %edx
137        subl      %eax, %edx
138        movd      %edx, %xmm5
139        psllq     %xmm5, %xmm4
140        movl      %eax, %ecx
141        sarl      $1, %eax
142        pinsrw    $3, %eax, %xmm3
143        movapd    ebias(%rip), %xmm6
144        psllq     $4, %xmm3
145        psubd     %xmm3, %xmm2
146        mulsd     %xmm2, %xmm0
147        cmpl      $52, %edx
148        jg        .L_2TAG_PACKET_2.0.2
149        andpd     %xmm2, %xmm4
150        paddd     %xmm6, %xmm3
151        subsd     %xmm4, %xmm2
152        addsd     %xmm2, %xmm0
153        cmpl      $1023, %ecx
154        jge       .L_2TAG_PACKET_3.0.2
155        pextrw    $3, %xmm0, %ecx
156        andl      $32768, %ecx
157        orl       %ecx, %edx
158        cmpl      $0, %edx
159        je        .L_2TAG_PACKET_4.0.2
160        movapd    %xmm0, %xmm6
161        addsd     %xmm4, %xmm0
162        mulsd     %xmm3, %xmm0
163        pextrw    $3, %xmm0, %ecx
164        andl      $32752, %ecx
165        cmpl      $0, %ecx
166        je        .L_2TAG_PACKET_5.0.2
167        jmp       ..B1.5
168.L_2TAG_PACKET_5.0.2:
169        mulsd     %xmm3, %xmm6
170        mulsd     %xmm3, %xmm4
171        movq      %xmm6, %xmm0
172        pxor      %xmm4, %xmm6
173        psrad     $31, %xmm6
174        pshufd    $85, %xmm6, %xmm6
175        psllq     $1, %xmm0
176        psrlq     $1, %xmm0
177        pxor      %xmm6, %xmm0
178        psrlq     $63, %xmm6
179        paddq     %xmm6, %xmm0
180        paddq     %xmm4, %xmm0
181        movl      $15, (%rsp)
182        jmp       .L_2TAG_PACKET_6.0.2
183.L_2TAG_PACKET_4.0.2:
184        addsd     %xmm4, %xmm0
185        mulsd     %xmm3, %xmm0
186        jmp       ..B1.5
187.L_2TAG_PACKET_3.0.2:
188        addsd     %xmm4, %xmm0
189        mulsd     %xmm3, %xmm0
190        pextrw    $3, %xmm0, %ecx
191        andl      $32752, %ecx
192        cmpl      $32752, %ecx
193        jnb       .L_2TAG_PACKET_7.0.2
194        jmp       ..B1.5
195.L_2TAG_PACKET_2.0.2:
196        paddd     %xmm6, %xmm3
197        addpd     %xmm2, %xmm0
198        mulsd     %xmm3, %xmm0
199        movl      $15, (%rsp)
200        jmp       .L_2TAG_PACKET_6.0.2
201.L_2TAG_PACKET_8.0.2:
202        cmpl      $2146435072, %eax
203        jae       .L_2TAG_PACKET_9.0.2
204        movl      12(%rsp), %eax
205        cmpl      $-2147483648, %eax
206        jae       .L_2TAG_PACKET_10.0.2
207        movsd     XMAX(%rip), %xmm0
208        mulsd     %xmm0, %xmm0
209.L_2TAG_PACKET_7.0.2:
210        movl      $14, (%rsp)
211        jmp       .L_2TAG_PACKET_6.0.2
212.L_2TAG_PACKET_10.0.2:
213        movsd     XMIN(%rip), %xmm0
214        mulsd     %xmm0, %xmm0
215        movl      $15, (%rsp)
216        jmp       .L_2TAG_PACKET_6.0.2
217.L_2TAG_PACKET_9.0.2:
218        movl      8(%rsp), %edx
219        cmpl      $2146435072, %eax
220        ja        .L_2TAG_PACKET_11.0.2
221        cmpl      $0, %edx
222        jne       .L_2TAG_PACKET_11.0.2
223        movl      12(%rsp), %eax
224        cmpl      $2146435072, %eax
225        jne       .L_2TAG_PACKET_12.0.2
226        movsd     INF(%rip), %xmm0
227        jmp       ..B1.5
228.L_2TAG_PACKET_12.0.2:
229        movsd     ZERO(%rip), %xmm0
230        jmp       ..B1.5
231.L_2TAG_PACKET_11.0.2:
232        movsd     8(%rsp), %xmm0
233        addsd     %xmm0, %xmm0
234        jmp       ..B1.5
235.L_2TAG_PACKET_0.0.2:
236        movl      12(%rsp), %eax
237        andl      $2147483647, %eax
238        cmpl      $1083179008, %eax
239        jae       .L_2TAG_PACKET_8.0.2
240        movsd     8(%rsp), %xmm0
241        addsd     ONE_val(%rip), %xmm0
242        jmp       ..B1.5
243.L_2TAG_PACKET_6.0.2:
244        movq      %xmm0, 16(%rsp)
245..B1.3:
246        movq      16(%rsp), %xmm0
247.L_2TAG_PACKET_13.0.2:
248..B1.5:
249        addq      $24, %rsp
250..___tag_value_exp.4:
251        ret
252..___tag_value_exp.5:
253END(exp)
254# -- End  exp
255	.section .rodata, "a"
256	.align 16
257	.align 16
258cv:
259	.long	1697350398
260	.long	1079448903
261	.long	1697350398
262	.long	1079448903
263	.long	4277796864
264	.long	1065758274
265	.long	4277796864
266	.long	1065758274
267	.long	3164486458
268	.long	1025308570
269	.long	3164486458
270	.long	1025308570
271	.long	4294967294
272	.long	1071644671
273	.long	4294967294
274	.long	1071644671
275	.long	3811088480
276	.long	1062650204
277	.long	1432067621
278	.long	1067799893
279	.long	3230715663
280	.long	1065423125
281	.long	1431604129
282	.long	1069897045
283	.type	cv,@object
284	.size	cv,96
285	.align 16
286Shifter:
287	.long	0
288	.long	1127743488
289	.long	0
290	.long	1127743488
291	.type	Shifter,@object
292	.size	Shifter,16
293	.align 16
294mmask:
295	.long	4294967232
296	.long	0
297	.long	4294967232
298	.long	0
299	.type	mmask,@object
300	.size	mmask,16
301	.align 16
302bias:
303	.long	65472
304	.long	0
305	.long	65472
306	.long	0
307	.type	bias,@object
308	.size	bias,16
309	.align 16
310Tbl_addr:
311	.long	0
312	.long	0
313	.long	0
314	.long	0
315	.long	235107661
316	.long	1018002367
317	.long	1048019040
318	.long	11418
319	.long	896005651
320	.long	1015861842
321	.long	3541402996
322	.long	22960
323	.long	1642514529
324	.long	1012987726
325	.long	410360776
326	.long	34629
327	.long	1568897900
328	.long	1016568486
329	.long	1828292879
330	.long	46424
331	.long	1882168529
332	.long	1010744893
333	.long	852742562
334	.long	58348
335	.long	509852888
336	.long	1017336174
337	.long	3490863952
338	.long	70401
339	.long	653277307
340	.long	1017431380
341	.long	2930322911
342	.long	82586
343	.long	1649557430
344	.long	1017729363
345	.long	1014845818
346	.long	94904
347	.long	1058231231
348	.long	1015777676
349	.long	3949972341
350	.long	107355
351	.long	1044000607
352	.long	1016786167
353	.long	828946858
354	.long	119943
355	.long	1151779725
356	.long	1015705409
357	.long	2288159958
358	.long	132667
359	.long	3819481236
360	.long	1016499965
361	.long	1853186616
362	.long	145530
363	.long	2552227826
364	.long	1015039787
365	.long	1709341917
366	.long	158533
367	.long	1829350193
368	.long	1015216097
369	.long	4112506593
370	.long	171677
371	.long	1913391795
372	.long	1015756674
373	.long	2799960843
374	.long	184965
375	.long	1303423926
376	.long	1015238005
377	.long	171030293
378	.long	198398
379	.long	1574172746
380	.long	1016061241
381	.long	2992903935
382	.long	211976
383	.long	3424156969
384	.long	1017196428
385	.long	926591434
386	.long	225703
387	.long	1938513547
388	.long	1017631273
389	.long	887463926
390	.long	239579
391	.long	2804567149
392	.long	1015390024
393	.long	1276261410
394	.long	253606
395	.long	631083525
396	.long	1017690182
397	.long	569847337
398	.long	267786
399	.long	1623370770
400	.long	1011049453
401	.long	1617004845
402	.long	282120
403	.long	3667985273
404	.long	1013894369
405	.long	3049340112
406	.long	296610
407	.long	3145379760
408	.long	1014403278
409	.long	3577096743
410	.long	311258
411	.long	2603100681
412	.long	1017152460
413	.long	1990012070
414	.long	326066
415	.long	3249202951
416	.long	1017448880
417	.long	1453150081
418	.long	341035
419	.long	419288974
420	.long	1016280325
421	.long	917841882
422	.long	356167
423	.long	3793507337
424	.long	1016095713
425	.long	3712504873
426	.long	371463
427	.long	728023093
428	.long	1016345318
429	.long	363667784
430	.long	386927
431	.long	2582678538
432	.long	1017123460
433	.long	2956612996
434	.long	402558
435	.long	7592966
436	.long	1016721543
437	.long	2186617380
438	.long	418360
439	.long	228611441
440	.long	1016696141
441	.long	1719614412
442	.long	434334
443	.long	2261665670
444	.long	1017457593
445	.long	1013258798
446	.long	450482
447	.long	544148907
448	.long	1017323666
449	.long	3907805043
450	.long	466805
451	.long	2383914918
452	.long	1017143586
453	.long	1447192520
454	.long	483307
455	.long	1176412038
456	.long	1017267372
457	.long	1944781190
458	.long	499988
459	.long	2882956373
460	.long	1013312481
461	.long	919555682
462	.long	516851
463	.long	3154077648
464	.long	1016528543
465	.long	2571947538
466	.long	533897
467	.long	348651999
468	.long	1016405780
469	.long	2604962540
470	.long	551129
471	.long	3253791412
472	.long	1015920431
473	.long	1110089947
474	.long	568549
475	.long	1509121860
476	.long	1014756995
477	.long	2568320822
478	.long	586158
479	.long	2617649212
480	.long	1017340090
481	.long	2966275556
482	.long	603959
483	.long	553214634
484	.long	1016457425
485	.long	2682146383
486	.long	621954
487	.long	730975783
488	.long	1014083580
489	.long	2191782032
490	.long	640145
491	.long	1486499517
492	.long	1016818996
493	.long	2069751140
494	.long	658534
495	.long	2595788928
496	.long	1016407932
497	.long	2990417244
498	.long	677123
499	.long	1853053619
500	.long	1015310724
501	.long	1434058175
502	.long	695915
503	.long	2462790535
504	.long	1015814775
505	.long	2572866477
506	.long	714911
507	.long	3693944214
508	.long	1017259110
509	.long	3092190714
510	.long	734114
511	.long	2979333550
512	.long	1017188654
513	.long	4076559942
514	.long	753526
515	.long	174054861
516	.long	1014300631
517	.long	2420883922
518	.long	773150
519	.long	816778419
520	.long	1014197934
521	.long	3716502172
522	.long	792987
523	.long	3507050924
524	.long	1015341199
525	.long	777507147
526	.long	813041
527	.long	1821514088
528	.long	1013410604
529	.long	3706687593
530	.long	833312
531	.long	920623539
532	.long	1016295433
533	.long	1242007931
534	.long	853805
535	.long	2789017511
536	.long	1014276997
537	.long	3707479175
538	.long	874520
539	.long	3586233004
540	.long	1015962192
541	.long	64696965
542	.long	895462
543	.long	474650514
544	.long	1016642419
545	.long	863738718
546	.long	916631
547	.long	1614448851
548	.long	1014281732
549	.long	3884662774
550	.long	938030
551	.long	2450082086
552	.long	1016164135
553	.long	2728693977
554	.long	959663
555	.long	1101668360
556	.long	1015989180
557	.long	3999357479
558	.long	981531
559	.long	835814894
560	.long	1015702697
561	.long	1533953344
562	.long	1003638
563	.long	1301400989
564	.long	1014466875
565	.long	2174652632
566	.long	1025985
567	.type	Tbl_addr,@object
568	.size	Tbl_addr,1024
569	.align 16
570ALLONES:
571	.long	4294967295
572	.long	4294967295
573	.long	4294967295
574	.long	4294967295
575	.type	ALLONES,@object
576	.size	ALLONES,16
577	.align 16
578ebias:
579	.long	0
580	.long	1072693248
581	.long	0
582	.long	1072693248
583	.type	ebias,@object
584	.size	ebias,16
585	.align 4
586XMAX:
587	.long	4294967295
588	.long	2146435071
589	.type	XMAX,@object
590	.size	XMAX,8
591	.align 4
592XMIN:
593	.long	0
594	.long	1048576
595	.type	XMIN,@object
596	.size	XMIN,8
597	.align 4
598INF:
599	.long	0
600	.long	2146435072
601	.type	INF,@object
602	.size	INF,8
603	.align 4
604ZERO:
605	.long	0
606	.long	0
607	.type	ZERO,@object
608	.size	ZERO,8
609	.align 4
610ONE_val:
611	.long	0
612	.long	1072693248
613	.type	ONE_val,@object
614	.size	ONE_val,8
615	.data
616	.section .note.GNU-stack, ""
617// -- Begin DWARF2 SEGMENT .eh_frame
618	.section .eh_frame,"a",@progbits
619.eh_frame_seg:
620	.align 1
621	.4byte 0x00000014
622	.8byte 0x00527a0100000000
623	.8byte 0x08070c1b01107801
624	.4byte 0x00000190
625	.4byte 0x0000001c
626	.4byte 0x0000001c
627	.4byte ..___tag_value_exp.1-.
628	.4byte ..___tag_value_exp.5-..___tag_value_exp.1
629	.2byte 0x0400
630	.4byte ..___tag_value_exp.3-..___tag_value_exp.1
631	.2byte 0x200e
632	.byte 0x04
633	.4byte ..___tag_value_exp.4-..___tag_value_exp.3
634	.2byte 0x080e
635	.byte 0x00
636# End
637